コード例 #1
0
ファイル: recognizer.py プロジェクト: ck624/dependency-parser
 def init_stats_computer(self, what, **kwargs):
     if what == 'grad':
         names, generator = self.generate_grad_stats(**kwargs)
     elif what == 'activation':
         names, generator = self.generate_activation_stats(**kwargs)
     else:
         raise Exception('Unknown stats computer {}'.format(what))
     cg = ComputationGraph(generator)
     self.stat_functs = cg.get_theano_function()
     self.stat_functs_inputs = [inp.name for inp in cg.inputs]
     self.stat_names = names
コード例 #2
0
    def __init__(self, data_stream, variables, path=None, **kwargs):
        self.data_stream = data_stream
        self.variables = variables
        self.path = path
        self.prediction = None

        kwargs.setdefault('after_training', True)
        super(PredictDataStream, self).__init__(**kwargs)

        cg = ComputationGraph(variables)
        self.theano_function = cg.get_theano_function()
コード例 #3
0
    def __init__(self, data_stream, variables, path=None, **kwargs):
        self.data_stream = data_stream
        self.variables = variables
        self.path = path
        self.prediction = None

        kwargs.setdefault("after_training", True)
        super(PredictDataStream, self).__init__(**kwargs)

        cg = ComputationGraph(variables)
        self.theano_function = cg.get_theano_function()
コード例 #4
0
ファイル: predictDataStream.py プロジェクト: o7s8r6/videoqa
    def __init__(self, data_stream, output_tensor, path, **kwargs):
        self.data_stream = data_stream
        self.output_tensor = output_tensor
        self.prediction = None
        self.path = path
        kwargs.setdefault('before_training', True)
        super(PredictDataStream, self).__init__(**kwargs)

        cg1 = ComputationGraph(output_tensor)
        self.theano_function = cg1.get_theano_function(
            on_unused_input='ignore')
        self.iter = 0
コード例 #5
0
ファイル: sample.py プロジェクト: anirudh9119/SpeechSyn
    def __init__(self, generator, steps=320, n_samples = 10, 
            mean_data = 0, std_data = 1, sample_rate = 8000,
            save_name = "sample_", **kwargs):
        super(Speak, self).__init__(**kwargs)
        steps = 300
        sample = ComputationGraph(generator.generate(n_steps=steps, 
            batch_size=n_samples, iterate=True))
        self.sample_fn = sample.get_theano_function()

        self.mean_data = mean_data
        self.std_data = std_data
        self.sample_rate = sample_rate
        self.save_name = save_name
コード例 #6
0
ファイル: recognizer.py プロジェクト: ck624/dependency-parser
    def init_beam_search(self, beam_size):
        """Compile beam search and set the beam size.

        See Blocks issue #500.

        """
        if hasattr(self, 'search_function'):
            # Only recompile if the user wants a different beam size
            return

        generated = self.get_generate_graph(use_mask=True)
        cg = ComputationGraph(nestedDictionaryValues(generated))
        cg = self.activate_masks(cg)
        self.search_function_inputs = [x.name for x in cg.inputs]
        self.search_function_pos_outputs = [x.name for x in cg.outputs[3:]]
        self.search_function = cg.get_theano_function()
コード例 #7
0
ファイル: predict.py プロジェクト: YuzhouWang/657-Project
    def __init__(self, data_stream, variables, path=None, **kwargs):
        self.data_stream = data_stream
        self.variables = variables
        # for zip(var, var1) in self.variables, variables
        #     var.name = var1.name
        #print (var.name for var in variables)
        #print "varnames ^"
        #self.variables.name = variables.name
        self.path = path
        self.prediction = None

        kwargs.setdefault('after_training', True)
        super(PredictDataStream, self).__init__(**kwargs)

        cg = ComputationGraph(variables)
        self.theano_function = cg.get_theano_function()
コード例 #8
0
    def __init__(self, model_path='vgg.tar', synset_words='synset_words.txt'):
        self.vgg_net = VGGNet()
        x = theano.tensor.tensor4('x')
        y_hat = self.vgg_net.apply(x)
        cg = ComputationGraph(y_hat)
        self.model = Model(y_hat)
        with open(model_path, 'rb') as f:
            self.model.set_parameter_values(load_parameters(f))

        with open(synset_words) as f:
            self.classes = numpy.array(f.read().splitlines())

        self.predict = cg.get_theano_function()

        fc15 = VariableFilter(
            theano_name_regex='fc_15_apply_output')(cg.variables)[0]
        self.fe_extractor = ComputationGraph(fc15).get_theano_function()
コード例 #9
0
    def __init__(self,
                 generator,
                 steps=320,
                 n_samples=10,
                 mean_data=0,
                 std_data=1,
                 sample_rate=8000,
                 save_name="sample_",
                 **kwargs):
        super(Speak, self).__init__(**kwargs)
        steps = 300
        sample = ComputationGraph(
            generator.generate(n_steps=steps,
                               batch_size=n_samples,
                               iterate=True))
        self.sample_fn = sample.get_theano_function()

        self.mean_data = mean_data
        self.std_data = std_data
        self.sample_rate = sample_rate
        self.save_name = save_name
コード例 #10
0
ファイル: test_batch_norm.py プロジェクト: theprernab/cuboid
def test_batchnorm_rolling():
    layer = BatchNormalization(input_dim=5, rolling_accumulate=True)
    layer.initialize()
    x = T.matrix()

    x_val = np.ones((6, 5), dtype=theano.config.floatX)
    x_val[0, 0] = 10.0

    y = layer.apply(x)
    cg = ComputationGraph([y])

    _func = cg.get_theano_function()
    for i in range(100):
        ret = _func(x_val)
    u = layer.u.get_value()
    assert_allclose(u[0], 1.58491838)
    assert_allclose(u[1], 0.6339674)

    s = layer.s.get_value()
    assert_allclose(s[0], 7.13214684)
    assert_allclose(s[1], 0.)
コード例 #11
0
def test_batchnorm_rolling():
    layer = BatchNormalization(
            input_dim = 5, rolling_accumulate=True)
    layer.initialize()
    x = T.matrix()

    x_val = np.ones((6, 5), dtype=theano.config.floatX)
    x_val[0,0] = 10.0

    y = layer.apply(x)
    cg = ComputationGraph([y])

    _func = cg.get_theano_function()
    for i in range(100):
        ret = _func(x_val)
    u = layer.u.get_value()
    assert_allclose(u[0], 1.58491838)
    assert_allclose(u[1], 0.6339674)

    s = layer.s.get_value()
    assert_allclose(s[0], 7.13214684)
    assert_allclose(s[1], 0.)
コード例 #12
0
class GeneratePredictions(SimpleExtension):
    def __init__(self, extra_generation_steps, compute_targets, compute_policy,
                 force_generate_groundtruth, catching_up_coof,
                 catching_up_freq, **kwargs):
        self.extra_generation_steps = extra_generation_steps
        self.compute_targets = compute_targets
        self.compute_policy = compute_policy
        self.force_generate_groundtruth = force_generate_groundtruth
        self.catching_up_coof = catching_up_coof
        self.catching_up_freq = catching_up_freq
        kwargs.setdefault('before_training', True)
        kwargs.setdefault('before_batch', True)
        kwargs.setdefault('after_batch', True)
        super(GeneratePredictions, self).__init__(**kwargs)

    def do(self, which_callback, *args):
        if which_callback == 'before_training':
            logger.info("Compiling prediction generator...")
            recognizer, = self.main_loop.model.get_top_bricks()
            self.trained_recognizer = recognizer
            self.recognizer = copy.deepcopy(recognizer)

            # A bit of defensive programming, because why not :)
            assert self.recognizer.generator.readout.compute_targets
            assert self.recognizer.generator.readout.trpo_coef == 0.0
            assert self.recognizer.generator.readout.solve_bellman
            assert self.recognizer.generator.readout.epsilon == 0.0

            groundtruth = self.recognizer.labels
            groundtruth_mask = self.recognizer.labels_mask
            generated = self.recognizer.get_generate_graph(
                n_steps=self.recognizer.labels.shape[0] +
                self.extra_generation_steps,
                return_initial_states=True,
                use_softmax_t=True)
            generation_method = self.recognizer.generator.generate

            if not self.force_generate_groundtruth:
                prediction = generated.pop('samples')
                prediction_mask = self.recognizer.mask_for_prediction(
                    prediction, groundtruth_mask, self.extra_generation_steps)
            else:
                prediction = groundtruth.copy()
                prediction_mask = groundtruth_mask.copy()
            prediction.name = 'predicted_labels'
            prediction_mask.name = 'predicted_mask'

            cg = ComputationGraph(generated.values())
            attended, = VariableFilter(applications=[generation_method],
                                       name='attended')(cg)
            attended_mask, = VariableFilter(applications=[generation_method],
                                            name='attended_mask')(cg)
            generated = {key: value[:-1] for key, value in generated.items()}
            costs = self.recognizer.generator.readout.costs(
                prediction=prediction,
                prediction_mask=prediction_mask,
                groundtruth=groundtruth,
                groundtruth_mask=groundtruth_mask,
                attended=attended,
                attended_mask=attended_mask,
                **generated)
            cost_cg = ComputationGraph(costs)
            value_targets, = VariableFilter(name='value_targets')(cost_cg)
            value_targets.name = 'value_targets'
            probs, = VariableFilter(name='probs')(cost_cg)
            probs.name = 'probs'
            rewards, = VariableFilter(name='rewards')(cost_cg)

            variables_to_compute = [prediction, prediction_mask]
            if self.compute_targets:
                logger.debug("Also compute the targets")
                variables_to_compute += [value_targets]
            if self.compute_policy:
                variables_to_compute += [probs]
            self.extended_cg = ComputationGraph(variables_to_compute)
            self._generate = self.extended_cg.get_theano_function()
            logger.info("Prediction generator compiled")

            params = Selector(self.recognizer).get_parameters()
            trained_params = Selector(self.trained_recognizer).get_parameters()
            if self.catching_up_freq:

                def get_coof(name):
                    if isinstance(self.catching_up_coof, float):
                        return self.catching_up_coof
                    elif isinstance(self.catching_up_coof, list):
                        result = None
                        for pattern, coof in self.catching_up_coof:
                            if re.match(pattern, name):
                                result = coof
                        return result
                    else:
                        raise ValueError

                updates = []
                for name in params:
                    coof = get_coof(name)
                    logging.debug(
                        "Catching up coefficient for {} is {}".format(
                            name, coof))
                    updates.append((params[name], params[name] * (1 - coof) +
                                    trained_params[name] * coof))
                # This is needed when parameters are shared between brick
                # and occur more than once in the list of updates.
                updates = dict(updates).items()
                self._catch_up = theano.function([], [], updates=updates)
        elif which_callback == 'before_batch':
            batch, = args
            generated = self._generate(
                *
                [batch[variable.name] for variable in self.extended_cg.inputs])
            for variable, value in zip(self.extended_cg.outputs, generated):
                batch[variable.name] = value
        elif which_callback == 'after_batch':
            if (self.catching_up_freq
                    and self.main_loop.status['iterations_done'] %
                    self.catching_up_freq == 0):
                self._catch_up()
        else:
            raise ValueError("can't be called on " + which_callback)
コード例 #13
0
def train_ladder(cli_params, dataset=None, save_to='results/ova_all_full'):
    cli_params['save_dir'] = prepare_dir(save_to)
    logfile = os.path.join(cli_params['save_dir'], 'log.txt')

    # Log also DEBUG to a file
    fh = logging.FileHandler(filename=logfile)
    fh.setLevel(logging.DEBUG)
    logger.addHandler(fh)

    logger.info('Logging into %s' % logfile)

    p, loaded = load_and_log_params(cli_params)

    ladder = setup_model(p)

    # Training
    all_params = ComputationGraph([ladder.costs.total]).parameters
    logger.info('Found the following parameters: %s' % str(all_params))

    # Fetch all batch normalization updates. They are in the clean path.
    bn_updates = ComputationGraph([ladder.costs.class_clean]).updates
    assert 'counter' in [u.name for u in bn_updates.keys()], \
        'No batch norm params in graph - the graph has been cut?'

    training_algorithm = GradientDescent(
        cost=ladder.costs.total,
        params=all_params,
        step_rule=Adam(learning_rate=ladder.lr))
    # In addition to actual training, also do BN variable approximations
    training_algorithm.add_updates(bn_updates)

    short_prints = {
        "train": {
            'T_C_class': ladder.costs.class_corr,
            'T_C_de': ladder.costs.denois.values(),
        },
        "valid_approx":
        OrderedDict([
            ('V_C_class', ladder.costs.class_clean),
            ('V_E', ladder.error.clean),
            ('V_C_de', ladder.costs.denois.values()),
        ]),
        "valid_final":
        OrderedDict([
            ('VF_C_class', ladder.costs.class_clean),
            ('VF_E', ladder.error.clean),
            ('VF_C_de', ladder.costs.denois.values()),
        ]),
    }

    ovadataset = dataset['ovadataset']
    train_indexes = dataset['train_indexes']
    val_indexes = dataset['val_indexes']

    main_loop = MainLoop(
        training_algorithm,
        # Datastream used for training
        make_datastream(ovadataset,
                        train_indexes,
                        p.batch_size,
                        scheme=ShuffledScheme),
        model=Model(ladder.costs.total),
        extensions=[
            FinishAfter(after_n_epochs=p.num_epochs),

            # This will estimate the validation error using
            # running average estimates of the batch normalization
            # parameters, mean and variance
            ApproxTestMonitoring(
                [ladder.costs.class_clean, ladder.error.clean] +
                ladder.costs.denois.values(),
                make_datastream(ovadataset, val_indexes, p.batch_size),
                prefix="valid_approx"),

            # This Monitor is slower, but more accurate since it will first
            # estimate batch normalization parameters from training data and
            # then do another pass to calculate the validation error.
            FinalTestMonitoring(
                [ladder.costs.class_clean, ladder.error.clean_mc] +
                ladder.costs.denois.values(),
                make_datastream(ovadataset, train_indexes, p.batch_size),
                make_datastream(ovadataset, val_indexes, p.batch_size),
                prefix="valid_final",
                after_n_epochs=p.num_epochs),
            TrainingDataMonitoring([
                ladder.costs.total, ladder.costs.class_corr,
                training_algorithm.total_gradient_norm
            ] + ladder.costs.denois.values(),
                                   prefix="train",
                                   after_epoch=True),
            ShortPrinting(short_prints),
            LRDecay(ladder.lr,
                    p.num_epochs * p.lrate_decay,
                    p.num_epochs,
                    after_epoch=True),
        ])
    main_loop.run()

    # Get results
    df = main_loop.log.to_dataframe()
    col = 'valid_final_error_matrix_cost'
    logger.info('%s %g' % (col, df[col].iloc[-1]))

    ds = make_datastream(ovadataset, val_indexes, p.batch_size)
    outputs = ladder.act.clean.labeled.h[len(ladder.layers) - 1]
    outputreplacer = TestMonitoring()
    _, _, outputs = outputreplacer._get_bn_params(outputs)

    cg = ComputationGraph(outputs)
    f = cg.get_theano_function()

    it = ds.get_epoch_iterator(as_dict=True)
    res = []
    inputs = {
        'features_labeled': [],
        'targets_labeled': [],
        'features_unlabeled': []
    }
    # Loop over one epoch
    for d in it:
        # Store all inputs
        for k, v in d.iteritems():
            inputs[k] += [v]
        # Store outputs
        res += [f(*[d[str(inp)] for inp in cg.inputs])]

    # Concatenate all minibatches
    res = [numpy.vstack(minibatches) for minibatches in zip(*res)]
    inputs = {k: numpy.vstack(v) for k, v in inputs.iteritems()}

    if main_loop.log.status['epoch_interrupt_received']:
        return None
    return res[0], inputs
コード例 #14
0
def dump_unlabeled_encoder(cli_params):
    """
    called when dumping
    :return: inputs, result
    """
    p, _ = load_and_log_params(cli_params)
    _, data, whiten, cnorm = setup_data(p, test_set=(p.data_type == 'test'))
    ladder = setup_model(p)

    # Analyze activations
    if p.data_type == 'train':
        dset, indices, calc_batchnorm = data.train, data.train_ind, False
    elif p.data_type == 'valid':
        dset, indices, calc_batchnorm = data.valid, data.valid_ind, True
    elif p.data_type == 'test':
        dset, indices, calc_batchnorm = data.test, data.test_ind, True
    else:
        raise Exception("Unknown data-type %s" % p.data_type)

    if calc_batchnorm:
        logger.info('Calculating batch normalization for clean.labeled path')
        main_loop = DummyLoop(extensions=[
            FinalTestMonitoring(
                [
                    ladder.costs.class_clean, ladder.error.clean,
                    ladder.oos.clean
                ] + ladder.costs.denois.values(),
                make_datastream(
                    data.train,
                    data.train_ind,
                    # These need to match with the training
                    p.batch_size,
                    n_labeled=p.labeled_samples,
                    n_unlabeled=len(data.train_ind),
                    balanced_classes=p.balanced_classes,
                    cnorm=cnorm,
                    whiten=whiten,
                    scheme=ShuffledScheme),
                make_datastream(data.valid,
                                data.valid_ind,
                                p.valid_batch_size,
                                n_labeled=len(data.valid_ind),
                                n_unlabeled=len(data.valid_ind),
                                balanced_classes=p.balanced_classes,
                                cnorm=cnorm,
                                whiten=whiten,
                                scheme=ShuffledScheme),
                prefix="valid_final",
                before_training=True),
            ShortPrinting(
                {
                    "valid_final":
                    OrderedDict([
                        ('VF_C_class', ladder.costs.class_clean),
                        ('VF_E', ladder.error.clean),
                        ('VF_O', ladder.oos.clean),
                        ('VF_C_de', [
                            ladder.costs.denois.get(0),
                            ladder.costs.denois.get(1),
                            ladder.costs.denois.get(2),
                            ladder.costs.denois.get(3)
                        ]),
                    ]),
                },
                after_training=True,
                use_log=False),
        ])
        main_loop.run()

    all_ind = numpy.arange(dset.num_examples)
    # Make a datastream that has all the indices in the labeled pathway
    ds = make_datastream(dset,
                         all_ind,
                         batch_size=p.get('batch_size'),
                         n_labeled=len(all_ind),
                         n_unlabeled=len(all_ind),
                         balanced_classes=False,
                         whiten=whiten,
                         cnorm=cnorm,
                         scheme=SequentialScheme)

    # If layer=-1 we want out the values after softmax
    if p.layer < 0:
        # ladder.act.clean.unlabeled.h is a dict not a list
        outputs = ladder.act.clean.labeled.h[len(ladder.layers) + p.layer]
    else:
        outputs = ladder.act.clean.labeled.h[p.layer]

    # Replace the batch normalization paramameters with the shared variables
    if calc_batchnorm:
        outputreplacer = TestMonitoring()
        _, _, outputs = outputreplacer._get_bn_params(outputs)

    cg = ComputationGraph(outputs)
    f = cg.get_theano_function()

    it = ds.get_epoch_iterator(as_dict=True)
    res = []

    # Loop over one epoch
    for d in it:
        # Store outputs
        res += [f(*[d[str(inp)] for inp in cg.inputs])]

    # Concatenate all minibatches
    res = [numpy.vstack(minibatches) for minibatches in zip(*res)]

    return res[0]
コード例 #15
0
def analyze(cli_params):
    """
    called when evaluating
    :return: inputs, result
    """
    p, _ = load_and_log_params(cli_params)
    _, data, whiten, cnorm = setup_data(p, test_set=(p.data_type == 'test'))
    ladder = setup_model(p)

    # Analyze activations
    if p.data_type == 'train':
        dset, indices, calc_batchnorm = data.train, data.train_ind, False
    elif p.data_type == 'valid':
        dset, indices, calc_batchnorm = data.valid, data.valid_ind, True
    elif p.data_type == 'test':
        dset, indices, calc_batchnorm = data.test, data.test_ind, True
    else:
        raise Exception("Unknown data-type %s" % p.data_type)

    if calc_batchnorm:
        logger.info('Calculating batch normalization for clean.labeled path')
        main_loop = DummyLoop(extensions=[
            FinalTestMonitoring(
                [
                    ladder.costs.class_clean, ladder.error.clean,
                    ladder.oos.clean
                ] + ladder.costs.denois.values(),
                make_datastream(
                    data.train,
                    data.train_ind,
                    # These need to match with the training
                    p.batch_size,
                    n_labeled=p.labeled_samples,
                    n_unlabeled=len(data.train_ind),
                    cnorm=cnorm,
                    balanced_classes=p.balanced_classes,
                    whiten=whiten,
                    scheme=ShuffledScheme),
                make_datastream(data.valid,
                                data.valid_ind,
                                p.valid_batch_size,
                                n_labeled=len(data.valid_ind),
                                n_unlabeled=len(data.valid_ind),
                                balanced_classes=p.balanced_classes,
                                cnorm=cnorm,
                                whiten=whiten,
                                scheme=ShuffledScheme),
                prefix="valid_final",
                before_training=True),
            ShortPrinting(
                {
                    "valid_final":
                    OrderedDict([
                        ('VF_C_class', ladder.costs.class_clean),
                        ('VF_E', ladder.error.clean),
                        ('VF_O', ladder.oos.clean),
                        ('VF_C_de', [
                            ladder.costs.denois.get(0),
                            ladder.costs.denois.get(1),
                            ladder.costs.denois.get(2),
                            ladder.costs.denois.get(3)
                        ]),
                    ]),
                },
                after_training=True,
                use_log=False),
        ])
        main_loop.run()
        # df = DataFrame.from_dict(main_loop.log, orient='index')
        # col = 'valid_final_error_rate_clean'
        # logger.info('%s %g' % (col, df[col].iloc[-1]))

    # Make a datastream that has all the indices in the labeled pathway
    ds = make_datastream(dset,
                         indices,
                         batch_size=p.get('batch_size'),
                         n_labeled=len(indices),
                         n_unlabeled=len(indices),
                         balanced_classes=False,
                         whiten=whiten,
                         cnorm=cnorm,
                         scheme=SequentialScheme)

    # If layer=-1 we want out the values after softmax
    outputs = ladder.act.clean.labeled.h[len(ladder.layers) - 1]

    # Replace the batch normalization paramameters with the shared variables
    if calc_batchnorm:
        outputreplacer = TestMonitoring()
        _, _, outputs = outputreplacer._get_bn_params(outputs)

    cg = ComputationGraph(outputs)
    f = cg.get_theano_function()

    it = ds.get_epoch_iterator(as_dict=True)
    res = []
    inputs = {
        'features_labeled': [],
        'targets_labeled': [],
        'features_unlabeled': []
    }
    # Loop over one epoch
    for d in it:
        # Store all inputs
        for k, v in d.iteritems():
            inputs[k] += [v]
        # Store outputs
        res += [f(*[d[str(inp)] for inp in cg.inputs])]

    # Concatenate all minibatches
    res = [numpy.vstack(minibatches) for minibatches in zip(*res)]
    inputs = {k: numpy.concatenate(v) for k, v in inputs.iteritems()}

    return inputs['targets_labeled'], res[0]
コード例 #16
0
ファイル: run.py プロジェクト: fulldecent/LRE
def dump_unlabeled_encoder(cli_params):
    """
    called when dumping
    :return: inputs, result
    """
    p, _ = load_and_log_params(cli_params)
    _, data, whiten, cnorm = setup_data(p, test_set=(p.data_type == 'test'))
    ladder = setup_model(p)

    # Analyze activations
    if p.data_type == 'train':
        dset, indices, calc_batchnorm = data.train, data.train_ind, False
    elif p.data_type == 'valid':
        dset, indices, calc_batchnorm = data.valid, data.valid_ind, True
    elif p.data_type == 'test':
        dset, indices, calc_batchnorm = data.test, data.test_ind, True
    else:
        raise Exception("Unknown data-type %s"%p.data_type)

    if calc_batchnorm:
        logger.info('Calculating batch normalization for clean.labeled path')
        main_loop = DummyLoop(
            extensions=[
                FinalTestMonitoring(
                    [ladder.costs.class_clean, ladder.error.clean, ladder.oos.clean]
                    + ladder.costs.denois.values(),
                    make_datastream(data.train, data.train_ind,
                                    # These need to match with the training
                                    p.batch_size,
                                    n_labeled=p.labeled_samples,
                                    n_unlabeled=len(data.train_ind),
                                    balanced_classes=p.balanced_classes,
                                    cnorm=cnorm,
                                    whiten=whiten, scheme=ShuffledScheme),
                    make_datastream(data.valid, data.valid_ind,
                                    p.valid_batch_size,
                                    n_labeled=len(data.valid_ind),
                                    n_unlabeled=len(data.valid_ind),
                                    balanced_classes=p.balanced_classes,
                                    cnorm=cnorm,
                                    whiten=whiten, scheme=ShuffledScheme),
                    prefix="valid_final", before_training=True),
                ShortPrinting({
                    "valid_final": OrderedDict([
                        ('VF_C_class', ladder.costs.class_clean),
                        ('VF_E', ladder.error.clean),
                        ('VF_O', ladder.oos.clean),
                        ('VF_C_de', [ladder.costs.denois.get(0),
                                     ladder.costs.denois.get(1),
                                     ladder.costs.denois.get(2),
                                     ladder.costs.denois.get(3)]),
                    ]),
                }, after_training=True, use_log=False),
            ])
        main_loop.run()

    all_ind = numpy.arange(dset.num_examples)
    # Make a datastream that has all the indices in the labeled pathway
    ds = make_datastream(dset, all_ind,
                         batch_size=p.get('batch_size'),
                         n_labeled=len(all_ind),
                         n_unlabeled=len(all_ind),
                         balanced_classes=False,
                         whiten=whiten,
                         cnorm=cnorm,
                         scheme=SequentialScheme)

    # If layer=-1 we want out the values after softmax
    if p.layer < 0:
        # ladder.act.clean.unlabeled.h is a dict not a list
        outputs = ladder.act.clean.labeled.h[len(ladder.layers) + p.layer]
    else:
        outputs = ladder.act.clean.labeled.h[p.layer]

    # Replace the batch normalization paramameters with the shared variables
    if calc_batchnorm:
        outputreplacer = TestMonitoring()
        _, _,  outputs = outputreplacer._get_bn_params(outputs)

    cg = ComputationGraph(outputs)
    f = cg.get_theano_function()

    it = ds.get_epoch_iterator(as_dict=True)
    res = []

    # Loop over one epoch
    for d in it:
        # Store outputs
        res += [f(*[d[str(inp)] for inp in cg.inputs])]

    # Concatenate all minibatches
    res = [numpy.vstack(minibatches) for minibatches in zip(*res)]

    return res[0]
コード例 #17
0
ファイル: __init__.py プロジェクト: anirudh9119/SpeechSyn
 def __init__(self, variable, **kwargs):
     super(SaveComputationGraph, self).__init__(**kwargs)
     variable_graph = ComputationGraph(variable)
     self.theano_function = variable_graph.get_theano_function()
コード例 #18
0
ファイル: run.py プロジェクト: lude-ma/ladder
def analyze(cli_params):
    p, _ = load_and_log_params(cli_params)
    _, data, whiten, cnorm = setup_data(p, test_set=True)
    ladder = setup_model(p)

    # Analyze activations
    dset, indices, calc_batchnorm = {
        'train': (data.train, data.train_ind, False),
        'valid': (data.valid, data.valid_ind, True),
        'test':  (data.test, data.test_ind, True),
    }[p.data_type]

    if calc_batchnorm:
        logger.info('Calculating batch normalization for clean.labeled path')
        main_loop = DummyLoop(
            extensions=[
                FinalTestMonitoring(
                    [ladder.costs.class_clean, ladder.error.clean]
                    + ladder.costs.denois.values(),
                    make_datastream(data.train, data.train_ind,
                                    # These need to match with the training
                                    p.batch_size,
                                    n_labeled=p.labeled_samples,
                                    n_unlabeled=len(data.train_ind),
                                    cnorm=cnorm,
                                    whiten=whiten, scheme=ShuffledScheme),
                    make_datastream(data.valid, data.valid_ind,
                                    p.valid_batch_size,
                                    n_labeled=len(data.valid_ind),
                                    n_unlabeled=len(data.valid_ind),
                                    cnorm=cnorm,
                                    whiten=whiten, scheme=ShuffledScheme),
                    prefix="valid_final", before_training=True),
                ShortPrinting({
                    "valid_final": OrderedDict([
                        ('VF_C_class', ladder.costs.class_clean),
                        ('VF_E', ladder.error.clean),
                        ('VF_C_de', [ladder.costs.denois.get(0),
                                     ladder.costs.denois.get(1),
                                     ladder.costs.denois.get(2),
                                     ladder.costs.denois.get(3)]),
                    ]),
                }, after_training=True, use_log=False),
            ])
        main_loop.run()

    # Make a datastream that has all the indices in the labeled pathway
    ds = make_datastream(dset, indices,
                         batch_size=p.get('batch_size'),
                         n_labeled=len(indices),
                         n_unlabeled=len(indices),
                         balanced_classes=False,
                         whiten=whiten,
                         cnorm=cnorm,
                         scheme=SequentialScheme)

    # We want out the values after softmax
    outputs = ladder.act.clean.labeled.h[len(ladder.layers) - 1]

    # Replace the batch normalization paramameters with the shared variables
    if calc_batchnorm:
        outputreplacer = TestMonitoring()
        _, _,  outputs = outputreplacer._get_bn_params(outputs)

    cg = ComputationGraph(outputs)
    f = cg.get_theano_function()

    it = ds.get_epoch_iterator(as_dict=True)
    res = []
    inputs = {'features_labeled': [],
              'targets_labeled': [],
              'features_unlabeled': []}
    # Loop over one epoch
    for d in it:
        # Store all inputs
        for k, v in d.iteritems():
            inputs[k] += [v]
        # Store outputs
        res += [f(*[d[str(inp)] for inp in cg.inputs])]

    # Concatenate all minibatches
    res = [numpy.vstack(minibatches) for minibatches in zip(*res)]
    inputs = {k: numpy.vstack(v) for k, v in inputs.iteritems()}

    return inputs['targets_labeled'], res[0]
コード例 #19
0
ファイル: recognizer.py プロジェクト: DingKe/attention-lvcsr
 def init_generate(self):
     generated = self.get_generate_graph(use_mask=False)
     cg = ComputationGraph(generated['outputs'])
     self._do_generate = cg.get_theano_function()
コード例 #20
0
def analyze(cli_params):
    p, _ = load_and_log_params(cli_params)
    _, data = setup_data(p, test_set=True)
    ladder = setup_model(p)

    # Analyze activations
    dset, indices, calc_batchnorm = {
        'train': (data.train, data.train_ind, False),
        'valid': (data.valid, data.valid_ind, True),
        'test': (data.test, data.test_ind, True),
    }[p.data_type]

    if calc_batchnorm:
        logger.info('Calculating batch normalization for clean.labeled path')
        main_loop = DummyLoop(extensions=[
            FinalTestMonitoring(
                [ladder.costs.class_clean, ladder.error.clean] +
                list(ladder.costs.denois.values()),
                make_datastream(
                    data.train,
                    data.train_ind,
                    # These need to match with the training
                    p.batch_size,
                    n_labeled=p.labeled_samples,
                    n_unlabeled=len(data.train_ind),
                    scheme=ShuffledScheme),
                make_datastream(data.valid,
                                data.valid_ind,
                                p.valid_batch_size,
                                n_labeled=len(data.valid_ind),
                                n_unlabeled=len(data.valid_ind),
                                scheme=ShuffledScheme),
                prefix="valid_final",
                before_training=True),
            ShortPrinting(
                {
                    "valid_final":
                    OrderedDict([
                        ('VF_C_class', ladder.costs.class_clean),
                        ('VF_E', ladder.error.clean),
                        ('VF_C_de', [
                            ladder.costs.denois.get(0),
                            ladder.costs.denois.get(1),
                            ladder.costs.denois.get(2),
                            ladder.costs.denois.get(3)
                        ]),
                    ]),
                },
                after_training=True,
                use_log=False),
        ])
        main_loop.run()

    # Make a datastream that has all the indices in the labeled pathway
    ds = make_datastream(dset,
                         indices,
                         batch_size=p.get('batch_size'),
                         n_labeled=len(indices),
                         n_unlabeled=len(indices),
                         balanced_classes=False,
                         scheme=SequentialScheme)

    # We want out the values after softmax
    outputs = ladder.act.clean.labeled.h[len(ladder.layers) - 1]

    # Replace the batch normalization paramameters with the shared variables
    if calc_batchnorm:
        outputreplacer = TestMonitoring()
        _, _, outputs = outputreplacer._get_bn_params(outputs)

    cg = ComputationGraph(outputs)
    f = cg.get_theano_function()

    it = ds.get_epoch_iterator(as_dict=True)
    res = []
    inputs = {
        'features_labeled': [],
        'targets_labeled': [],
        'features_unlabeled': []
    }
    # Loop over one epoch
    for d in it:
        # Store all inputs
        for k, v in d.items():
            inputs[k] += [v]
        # Store outputs
        res += [f(*[d[str(inp)] for inp in cg.inputs])]

    # Concatenate all minibatches
    res = [numpy.vstack(minibatches) for minibatches in zip(*res)]
    inputs = {k: numpy.vstack(v) for k, v in inputs.items()}

    return inputs['targets_labeled'], res[0]
コード例 #21
0
    post_merge = Identity(),
    merged_dim = dimension,
    name="readout")

generator = SequenceGenerator(
    readout=readout,
    transition=transition,
    fork = Fork(['inputs'], prototype=Identity()),
    weights_init = initialization.Identity(1.),
    biases_init = initialization.Constant(0.),
    name="generator")

generator.push_initialization_config()
generator.transition.transition.weights_init = initialization.Identity(2.)
generator.initialize()

results = generator.generate(n_steps=n_steps, 
            batch_size=1, iterate=True,
            return_initial_states = True)

results_cg = ComputationGraph(results)
results_tf = results_cg.get_theano_function()

generated_sequence_t = results_tf()[1]
generated_sequence_t.shape=(n_steps+1, dimension)
print generated_sequence_t
print generated_sequence



コード例 #22
0
ファイル: sample_sp_only.py プロジェクト: anirudh9119/play
save_dir = os.environ['RESULTS_DIR']
save_dir = os.path.join(save_dir, 'blizzard/')

experiment_name = "sp_only_0"

main_loop = load(save_dir + "pkl/best_" + experiment_name + ".pkl")

generator = main_loop.model.get_top_bricks()[0]

steps = 2048
n_samples = 1

sample = ComputationGraph(
    generator.generate(n_steps=steps, batch_size=n_samples, iterate=True))
sample_fn = sample.get_theano_function()

outputs = sample_fn()[-2]

outputs = outputs * sp_std + sp_mean
outputs = outputs.swapaxes(0, 1)
outputs = outputs[0]

print outputs.max(), outputs.min()

pyplot.figure(figsize=(100, 15))
pyplot.imshow(outputs.T)
pyplot.colorbar()
pyplot.gca().invert_yaxis()
pyplot.savefig(save_dir + "samples/best_" + experiment_name + "9.png")
pyplot.close()
コード例 #23
0
ファイル: gan_train.py プロジェクト: Bjornwolf/language-model
                                    (discriminator_cg.outputs[0] > 0.5)[:m].sum().astype('float32')),
                                   (false_dataset, false_dataset + 
                                    (discriminator_cg.outputs[0] < 0.5)[m:].sum().astype('float32'))])
generator_descent.add_updates([(gen_errors, gen_errors + (ComputationGraph(discriminated_samples).outputs[0] > 0.5).sum().astype('float32'))])


extensions = []
extensions.append(Timing(after_batch=True))
extensions.append(Checkpoint('gan.thn', every_n_batches=10000, 
                             use_cpickle=True, save_separately=['log']))
extensions.append(Printing(every_n_batches=1))


main_loop = GANMainLoop(algorithm_g=generator_descent,
                        g_out=g_out,
                        algorithm_d=discriminator_descent,
                        d_out=d_out,
                        false_generated=false_generated,
                        false_dataset=false_dataset,
                        generator_errors=gen_errors,
                        data_stream=data_stream,
                        generator=generator_cg.get_theano_function(),
                        discriminator=discriminator_cg.get_theano_function(),
                        k=1,
                        noise_per_sample=100,
                        minibatches=m,
                        extensions=extensions,
                        observables=observables)

main_loop.run()
コード例 #24
0
 def init_generate(self):
     generated = self.get_generate_graph(use_mask=False)
     cg = ComputationGraph(generated['samples'])
     self._do_generate = cg.get_theano_function()
コード例 #25
0
    def _create_main_loop(self):
        # hyper parameters
        hp = self.params
        batch_size = hp['batch_size']
        biases_init = Constant(0)
        batch_normalize = hp['batch_normalize']

        ### Build fprop
        tensor5 = T.TensorType(config.floatX, (False, ) * 5)
        X = tensor5("images")
        #X = T.tensor4("images")
        y = T.lvector('targets')

        gnet_params = OrderedDict()
        #X_shuffled = X[:, :, :, :, [2, 1, 0]]
        #X_shuffled = gpu_contiguous(X.dimshuffle(0, 1, 4, 2, 3)) * 255

        X = X[:, :, :, :, [2, 1, 0]]
        X_shuffled = X.dimshuffle((0, 1, 4, 2, 3)) * 255
        X_r = X_shuffled.reshape(
            (X_shuffled.shape[0], X_shuffled.shape[1] * X_shuffled.shape[2],
             X_shuffled.shape[3], X_shuffled.shape[4]))
        X_r = X_r - (np.array([104, 117, 123])[None, :, None,
                                               None]).astype('float32')

        expressions, input_data, param = stream_layer_exp(inputs=('data', X_r),
                                                          mode='rgb')
        res = expressions['outloss']
        y_hat = res.flatten(ndim=2)

        import pdb
        pdb.set_trace()

        ### Build Cost
        cost = CategoricalCrossEntropy().apply(y, y_hat)
        cost = T.cast(cost, theano.config.floatX)
        cost.name = 'cross_entropy'

        y_pred = T.argmax(y_hat, axis=1)
        misclass = T.cast(T.mean(T.neq(y_pred, y)), theano.config.floatX)
        misclass.name = 'misclass'

        monitored_channels = []
        monitored_quantities = [cost, misclass, y_hat, y_pred]
        model = Model(cost)

        training_cg = ComputationGraph(monitored_quantities)
        inference_cg = ComputationGraph(monitored_quantities)

        ### Get evaluation function
        #training_eval = training_cg.get_theano_function(additional_updates=bn_updates)
        training_eval = training_cg.get_theano_function()
        #inference_eval = inference_cg.get_theano_function()

        # Dataset
        test = JpegHDF5Dataset(
            'test',
            #name='jpeg_data_flows.hdf5',
            load_in_memory=True)
        #mean = np.load(os.path.join(os.environ['UCF101'], 'mean.npy'))
        import pdb
        pdb.set_trace()

        ### Eval
        labels = np.zeros(test.num_video_examples)
        y_hat = np.zeros((test.num_video_examples, 101))
        labels_flip = np.zeros(test.num_video_examples)
        y_hat_flip = np.zeros((test.num_video_examples, 101))

        ### Important to shuffle list for batch normalization statistic
        #rng = np.random.RandomState()
        #examples_list = range(test.num_video_examples)
        #import pdb; pdb.set_trace()
        #rng.shuffle(examples_list)

        nb_frames = 1

        for i in xrange(24):
            scheme = HDF5SeqScheme(test.video_indexes,
                                   examples=test.num_video_examples,
                                   batch_size=batch_size,
                                   f_subsample=i,
                                   nb_subsample=25,
                                   frames_per_video=nb_frames)
            #for crop in ['upleft', 'upright', 'downleft', 'downright', 'center']:
            for crop in ['center']:
                stream = JpegHDF5Transformer(
                    input_size=(240, 320),
                    crop_size=(224, 224),
                    #input_size=(256, 342), crop_size=(224, 224),
                    crop_type=crop,
                    translate_labels=True,
                    flip='noflip',
                    nb_frames=nb_frames,
                    data_stream=ForceFloatX(
                        DataStream(dataset=test, iteration_scheme=scheme)))
                stream_flip = JpegHDF5Transformer(
                    input_size=(240, 320),
                    crop_size=(224, 224),
                    #input_size=(256, 342), crop_size=(224, 224),
                    crop_type=crop,
                    translate_labels=True,
                    flip='flip',
                    nb_frames=nb_frames,
                    data_stream=ForceFloatX(
                        DataStream(dataset=test, iteration_scheme=scheme)))

                ## Do the evaluation
                epoch = stream.get_epoch_iterator()
                for j, batch in enumerate(epoch):
                    output = training_eval(batch[0], batch[1])
                    # import cv2
                    # cv2.imshow('img', batch[0][0, 0, :, :, :])
                    # cv2.waitKey(160)
                    # cv2.destroyAllWindows()
                    #import pdb; pdb.set_trace()
                    labels_flip[batch_size * j:batch_size * (j + 1)] = batch[1]
                    y_hat_flip[batch_size * j:batch_size *
                               (j + 1), :] += output[2]
                preds = y_hat_flip.argmax(axis=1)
                misclass = np.sum(labels_flip != preds) / float(len(preds))
                print i, crop, "flip Misclass:", misclass

                epoch = stream_flip.get_epoch_iterator()
                for j, batch in enumerate(epoch):
                    output = training_eval(batch[0], batch[1])
                    labels[batch_size * j:batch_size * (j + 1)] = batch[1]
                    y_hat[batch_size * j:batch_size * (j + 1), :] += output[2]
                preds = y_hat.argmax(axis=1)
                misclass = np.sum(labels != preds) / float(len(preds))
                print i, crop, "noflip Misclass:", misclass

                y_merge = y_hat + y_hat_flip
                preds = y_merge.argmax(axis=1)
                misclass = np.sum(labels != preds) / float(len(preds))
                print i, crop, "avg Misclass:", misclass

        ### Compute misclass
        y_hat += y_hat_flip
        preds = y_hat.argmax(axis=1)
        misclass = np.sum(labels != preds) / float(len(preds))
        print "Misclass:", misclass
コード例 #26
0
ファイル: theanotest.py プロジェクト: YuzhouWang/657-Project
# test = tensor.eq(x, s)
# a = test.eval()
# print a
#xSub = theano.shared(x)
probsPadded = tensor.zeros_like(vocab, dtype=numpy.float32)
probsSubset = probsPadded[context]
b = tensor.set_subtensor(probsSubset, probs)

ans1probs = b[ans1]
ans1score = ans1probs.sum()
ans2probs = b[ans2]
ans2score = ans2probs.sum()
ans3probs = b[ans3]
ans3score = ans3probs.sum()
allans = tensor.stacklists([ans1score, ans2score, ans3score])
pred = tensor.argmax(allans)


cg = ComputationGraph([ans1probs, ans1score, ans2probs, ans2score, ans3probs, ans3score, allans, pred])
f = cg.get_theano_function()
out = f()
# a = probsPadded.eval()
# be = b.eval()
# a1p = ans1probs.eval()
# a1 = ans1score.eval()
# print a
# print be
# print a1p
# print a1
print out
コード例 #27
0
save_dir = os.environ['RESULTS_DIR']
save_dir = os.path.join(save_dir,'blizzard/')

experiment_name = "sp_only_0"

main_loop = load(save_dir+"pkl/best_"+experiment_name+".pkl")

generator = main_loop.model.get_top_bricks()[0]

steps = 2048
n_samples = 1

sample = ComputationGraph(generator.generate(n_steps=steps, 
    batch_size=n_samples, iterate=True))
sample_fn = sample.get_theano_function()

outputs = sample_fn()[-2]

outputs = outputs*sp_std + sp_mean
outputs = outputs.swapaxes(0,1)
outputs = outputs[0]

print outputs.max(), outputs.min()

pyplot.figure(figsize=(100,15))
pyplot.imshow(outputs.T)
pyplot.colorbar()
pyplot.gca().invert_yaxis()
pyplot.savefig(save_dir+"samples/best_"+experiment_name+"9.png")
pyplot.close()
コード例 #28
0
    def __init__(self, config, vocab_size):
        question = tensor.imatrix('question')

        # set up 32-bit integer matrices
        question_mask = tensor.imatrix('question_mask')
        context = tensor.imatrix('context')
        context_mask = tensor.imatrix('context_mask')
        answer = tensor.ivector('answer')
        candidates = tensor.imatrix('candidates')
        candidates_mask = tensor.imatrix('candidates_mask')

        # and the multple choice answers:
        ans1 = tensor.ivector('ans1')
        ans1_mask = tensor.ivector('ans1_mask')
        ans2 = tensor.ivector('ans2')
        ans2_mask = tensor.ivector('ans2_mask')
        ans3 = tensor.ivector('ans3')
        ans3_mask = tensor.ivector('ans3_mask')
        ans4 = tensor.ivector('ans4')
        ans4_mask = tensor.ivector('ans4_mask')

        bricks = []

        # inverts 1st and 2nd dimensions of matrix
        question = question.dimshuffle(1, 0)
        question_mask = question_mask.dimshuffle(1, 0)
        context = context.dimshuffle(1, 0)
        context_mask = context_mask.dimshuffle(1, 0)

        # Embed questions and cntext
        embed = LookupTable(vocab_size, config.embed_size, name='question_embed')
        bricks.append(embed)

        qembed = embed.apply(question)
        cembed = embed.apply(context)
        a1embed = embed.apply(ans1)
        a2embed = embed.apply(ans2)
        a3embed = embed.apply(ans3)
        a4embed = embed.apply(ans4)

        qlstms, qhidden_list = make_bidir_lstm_stack(qembed, config.embed_size, question_mask.astype(theano.config.floatX),
                                                     config.question_lstm_size, config.question_skip_connections, 'q')
        clstms, chidden_list = make_bidir_lstm_stack(cembed, config.embed_size, context_mask.astype(theano.config.floatX),
                                                     config.ctx_lstm_size, config.ctx_skip_connections, 'ctx')
        bricks = bricks + qlstms + clstms

        # Calculate question encoding (concatenate layer1)
        if config.question_skip_connections:
            qenc_dim = 2*sum(config.question_lstm_size)
            qenc = tensor.concatenate([h[-1,:,:] for h in qhidden_list], axis=1)
        else:
            qenc_dim = 2*config.question_lstm_size[-1]
            qenc = tensor.concatenate([h[-1,:,:] for h in qhidden_list[-2:]], axis=1)
        qenc.name = 'qenc'

        # Calculate context encoding (concatenate layer1)
        if config.ctx_skip_connections:
            cenc_dim = 2*sum(config.ctx_lstm_size)
            cenc = tensor.concatenate(chidden_list, axis=2)
        else:
            cenc_dim = 2*config.ctx_lstm_size[-1]
            cenc = tensor.concatenate(chidden_list[-2:], axis=2)
        cenc.name = 'cenc'

        # Attention mechanism MLP
        attention_mlp = MLP(dims=config.attention_mlp_hidden + [1],
                            activations=config.attention_mlp_activations[1:] + [Identity()],
                            name='attention_mlp')
        attention_qlinear = Linear(input_dim=qenc_dim, output_dim=config.attention_mlp_hidden[0], name='attq')
        attention_clinear = Linear(input_dim=cenc_dim, output_dim=config.attention_mlp_hidden[0], use_bias=False, name='attc')
        bricks += [attention_mlp, attention_qlinear, attention_clinear]
        layer1 = Tanh().apply(attention_clinear.apply(cenc.reshape((cenc.shape[0]*cenc.shape[1], cenc.shape[2])))
                                        .reshape((cenc.shape[0],cenc.shape[1],config.attention_mlp_hidden[0]))
                             + attention_qlinear.apply(qenc)[None, :, :])
        layer1.name = 'layer1'
        att_weights = attention_mlp.apply(layer1.reshape((layer1.shape[0]*layer1.shape[1], layer1.shape[2])))
        att_weights.name = 'att_weights_0'
        att_weights = att_weights.reshape((layer1.shape[0], layer1.shape[1]))
        att_weights.name = 'att_weights'

        attended = tensor.sum(cenc * tensor.nnet.softmax(att_weights.T).T[:, :, None], axis=0)
        attended.name = 'attended'

        # Now we can calculate our output
        out_mlp = MLP(dims=[cenc_dim + qenc_dim] + config.out_mlp_hidden + [config.n_entities],
                      activations=config.out_mlp_activations + [Identity()],
                      name='out_mlp')
        bricks += [out_mlp]
        probs = out_mlp.apply(tensor.concatenate([attended, qenc], axis=1))
        probs.name = 'probs'

        # not needed anymore, since we're not only looking at entities
        # is_candidate = tensor.eq(tensor.arange(config.n_entities, dtype='int32')[None, None, :],
        #                          tensor.switch(candidates_mask, candidates, -tensor.ones_like(candidates))[:, :, None]).sum(axis=1)
        # probs = tensor.switch(is_candidate, probs, -1000 * tensor.ones_like(probs))

        # Calculate prediction, cost and error rate

        # vocab = tensor.arange(10)
        # probs = numpy.asarray([0, 0.8, 0, 0.2], dtype=numpy.float32)
        # context = numpy.asarray([3, 2, 8, 1], dtype=numpy.int32)
        # ans3 =  numpy.asarray([2, 8, 1], dtype=numpy.int32)
        # ans1 =  numpy.asarray([1, 3, 4], dtype=numpy.int32)
        # ans2 =  numpy.asarray([1, 1, 4], dtype=numpy.int32)

        # convert probs vector to one that's the same size as vocab, with all zeros except probs:
        # probs = tensor.switch(is_candidate, probs, -1000 * tensor.ones_like(probs))
        probsPadded = tensor.zeros_like(vocab_size, dtype=numpy.float32)
        probsSubset = probsPadded[cembed] #TODO this should be masked
        b = tensor.set_subtensor(probsSubset, probs)

        # get the similarity score of each (masked) answer with the context probs:
        ans1probs = b[a1enc]
        ans1score = tensor.switch(ans1_mask, ans1probs, tensor.zeros_like(ans1probs)).sum()
        ans2probs = b[a2enc]
        ans2score = ans2probs.sum()
        ans3probs = b[a3enc]
        ans3score = ans3probs.sum()
        ans4probs = b[a4enc]
        ans4score = ans4probs.sum()

        # and pick the best one:
        allans = tensor.stacklists([ans1score, ans2score, ans3score, ans4score])
        pred = tensor.argmax(allans)

        cg = ComputationGraph([ans1probs, ans1score, ans2probs, ans2score, ans3probs, ans3score, ans4probs, ans4score, allans, pred])
        f = cg.get_theano_function()
        out = f()

        #pred = probs.argmax(axis=1)
        #print "pred"
        #print pred TODO CHANGE THIS!
        cost = Softmax().categorical_cross_entropy(answer, probs).mean()
        error_rate = tensor.neq(answer, pred).mean()

        # Apply dropout
        cg = ComputationGraph([cost, error_rate])
        if config.w_noise > 0:
            noise_vars = VariableFilter(roles=[WEIGHT])(cg)
            cg = apply_noise(cg, noise_vars, config.w_noise)
        if config.dropout > 0:
            cg = apply_dropout(cg, qhidden_list + chidden_list, config.dropout)
        [cost_reg, error_rate_reg] = cg.outputs

        # Other stuff
        cost_reg.name = cost.name = 'cost'
        error_rate_reg.name = error_rate.name = 'error_rate'


        self.probs = probs
        self.probs.name = "probs"
        self.cost = cost
        self.cost.name = "cost"
        #
        self.sgd_cost = cost_reg
        self.monitor_vars = [[cost_reg], [error_rate_reg]]
        self.monitor_vars_valid = [[cost], [error_rate]]

        # Initialize bricks
        for brick in bricks:
            brick.weights_init = config.weights_init
            brick.biases_init = config.biases_init
            brick.initialize()
コード例 #29
0
 def __init__(self, variable, **kwargs):
     super(SaveComputationGraph, self).__init__(**kwargs)
     variable_graph = ComputationGraph(variable)
     self.theano_function = variable_graph.get_theano_function()
コード例 #30
0
    def _create_main_loop(self):
        # hyper parameters
        hp = self.params
        batch_size = hp['batch_size']
        biases_init = Constant(0)
        batch_normalize = hp['batch_normalize']

        ### Build fprop
        tensor5 = T.TensorType(config.floatX, (False,)*5)
        X = tensor5("images")
        #X = T.tensor4("images")
        y = T.lvector('targets')

        gnet_params = OrderedDict()
        #X_shuffled = X[:, :, :, :, [2, 1, 0]]
        #X_shuffled = gpu_contiguous(X.dimshuffle(0, 1, 4, 2, 3)) * 255

        X = X[:, :, :, :, [2, 1, 0]]
        X_shuffled = X.dimshuffle((0, 1, 4, 2, 3)) * 255
        X_r = X_shuffled.reshape((X_shuffled.shape[0],
                                  X_shuffled.shape[1]*X_shuffled.shape[2],
                                  X_shuffled.shape[3], X_shuffled.shape[4]))
        X_r = X_r - (np.array([104, 117, 123])[None, :, None, None]).astype('float32')


        expressions, input_data, param = stream_layer_exp(inputs = ('data', X_r),
                                                          mode='rgb')
        res = expressions['outloss']
        y_hat = res.flatten(ndim=2)

        import pdb; pdb.set_trace()

        ### Build Cost
        cost = CategoricalCrossEntropy().apply(y, y_hat)
        cost = T.cast(cost, theano.config.floatX)
        cost.name = 'cross_entropy'

        y_pred = T.argmax(y_hat, axis=1)
        misclass = T.cast(T.mean(T.neq(y_pred, y)), theano.config.floatX)
        misclass.name = 'misclass'

        monitored_channels = []
        monitored_quantities = [cost, misclass, y_hat, y_pred]
        model = Model(cost)

        training_cg = ComputationGraph(monitored_quantities)
        inference_cg = ComputationGraph(monitored_quantities)

        ### Get evaluation function
        #training_eval = training_cg.get_theano_function(additional_updates=bn_updates)
        training_eval = training_cg.get_theano_function()
        #inference_eval = inference_cg.get_theano_function()


        # Dataset
        test = JpegHDF5Dataset('test',
                               #name='jpeg_data_flows.hdf5',
                               load_in_memory=True)
        #mean = np.load(os.path.join(os.environ['UCF101'], 'mean.npy'))
        import pdb; pdb.set_trace()

        ### Eval
        labels = np.zeros(test.num_video_examples)
        y_hat = np.zeros((test.num_video_examples, 101))
        labels_flip = np.zeros(test.num_video_examples)
        y_hat_flip = np.zeros((test.num_video_examples, 101))

        ### Important to shuffle list for batch normalization statistic
        #rng = np.random.RandomState()
        #examples_list = range(test.num_video_examples)
        #import pdb; pdb.set_trace()
        #rng.shuffle(examples_list)

        nb_frames=1

        for i in xrange(24):
            scheme = HDF5SeqScheme(test.video_indexes,
                                   examples=test.num_video_examples,
                                   batch_size=batch_size,
                                   f_subsample=i,
                                   nb_subsample=25,
                                   frames_per_video=nb_frames)
           #for crop in ['upleft', 'upright', 'downleft', 'downright', 'center']:
            for crop in ['center']:
                stream = JpegHDF5Transformer(
                    input_size=(240, 320), crop_size=(224, 224),
                    #input_size=(256, 342), crop_size=(224, 224),
                    crop_type=crop,
                    translate_labels = True,
                    flip='noflip', nb_frames = nb_frames,
                    data_stream=ForceFloatX(DataStream(
                            dataset=test, iteration_scheme=scheme)))
                stream_flip = JpegHDF5Transformer(
                    input_size=(240, 320), crop_size=(224, 224),
                    #input_size=(256, 342), crop_size=(224, 224),
                    crop_type=crop,
                    translate_labels = True,
                    flip='flip', nb_frames = nb_frames,
                    data_stream=ForceFloatX(DataStream(
                            dataset=test, iteration_scheme=scheme)))

                ## Do the evaluation
                epoch = stream.get_epoch_iterator()
                for j, batch in enumerate(epoch):
                    output = training_eval(batch[0], batch[1])
                    # import cv2
                    # cv2.imshow('img', batch[0][0, 0, :, :, :])
                    # cv2.waitKey(160)
                    # cv2.destroyAllWindows()
                    #import pdb; pdb.set_trace()
                    labels_flip[batch_size*j:batch_size*(j+1)] = batch[1]
                    y_hat_flip[batch_size*j:batch_size*(j+1), :] += output[2]
                preds = y_hat_flip.argmax(axis=1)
                misclass =  np.sum(labels_flip != preds) / float(len(preds))
                print i, crop, "flip Misclass:", misclass

                epoch = stream_flip.get_epoch_iterator()
                for j, batch in enumerate(epoch):
                    output = training_eval(batch[0], batch[1])
                    labels[batch_size*j:batch_size*(j+1)] = batch[1]
                    y_hat[batch_size*j:batch_size*(j+1), :] += output[2]
                preds = y_hat.argmax(axis=1)
                misclass =  np.sum(labels != preds) / float(len(preds))
                print i, crop, "noflip Misclass:", misclass

                y_merge = y_hat + y_hat_flip
                preds = y_merge.argmax(axis=1)
                misclass =  np.sum(labels != preds) / float(len(preds))
                print i, crop, "avg Misclass:", misclass


        ### Compute misclass
        y_hat += y_hat_flip
        preds = y_hat.argmax(axis=1)
        misclass =  np.sum(labels != preds) / float(len(preds))
        print "Misclass:", misclass