Example #1
0
    def get_updates(self, learning_rate, grads, lr_scalers):
        """Wraps the respective method of the wrapped learning rule.

        Performs name-based input substitution for the monitored values.
        Currently very hacky: the inputs from the gradients are typically
        named `$ALGO[$SOURCE]` in PyLearn2, where `$ALGO` is the algorithm
        name and `$SOURCE` is a source name from the data specification.
        This convention is exploited to match them with the inputs of
        monitoring values, whose input names are expected to match source
        names.

        """
        updates = self.learning_rule.get_updates(learning_rate, grads,
                                                 lr_scalers)
        grad_inputs = ComputationGraph(list(grads.values())).dict_of_inputs()
        for value, accumulator in zip(self.values, self.accumulators):
            value_inputs = ComputationGraph(value).dict_of_inputs()
            replace_dict = dict()
            for name, input_ in value_inputs.items():
                # See docstring to see how it works
                grad_input = grad_inputs[unpack(
                    [n for n in grad_inputs
                     if n.endswith('[{}]'.format(name))],
                    singleton=True)]
                replace_dict[input_] = tensor.unbroadcast(
                    grad_input, *range(grad_input.ndim))
            updates[accumulator] = (
                accumulator + theano.clone(value, replace_dict))
        self._callback_called = True
        updates.update(self.updates)
        return updates
Example #2
0
    def get_cost_graph(self, batch=True,
                       prediction=None, prediction_mask=None):

        if batch:
            inputs = self.inputs
            inputs_mask = self.inputs_mask
            groundtruth = self.labels
            groundtruth_mask = self.labels_mask
        else:
            inputs, inputs_mask = self.bottom.single_to_batch_inputs(
                self.single_inputs)
            groundtruth = self.single_labels[:, None]
            groundtruth_mask = None

        if not prediction:
            prediction = groundtruth
        if not prediction_mask:
            prediction_mask = groundtruth_mask

        cost = self.cost(inputs_mask=inputs_mask,
                         labels=prediction,
                         labels_mask=prediction_mask,
                         **inputs)
        cost_cg = ComputationGraph(cost)
        if self.criterion['name'].startswith("mse"):
            placeholder, = VariableFilter(theano_name='groundtruth')(cost_cg)
            cost_cg = cost_cg.replace({placeholder: groundtruth})
        return cost_cg
Example #3
0
def test_replace():
    # Test if replace works with outputs
    x = tensor.scalar()
    y = x + 1
    cg = ComputationGraph([y])
    doubled_cg = cg.replace([(y, 2 * y)])
    out_val = doubled_cg.outputs[0].eval({x: 2})
    assert out_val == 6.0
Example #4
0
def test_snapshot():
    x = tensor.matrix('x')
    linear = MLP([Identity(), Identity()], [10, 10, 10],
                 weights_init=Constant(1), biases_init=Constant(2))
    linear.initialize()
    y = linear.apply(x)
    cg = ComputationGraph(y)
    snapshot = cg.get_snapshot(dict(x=numpy.zeros((1, 10), dtype=floatX)))
    assert len(snapshot) == 14
def test_replace_variable_is_auxiliary():
    # Test if warning appears when variable is an AUXILIARY variable
    with warnings.catch_warnings(record=True) as w:
        x = tensor.scalar()
        y = x + 1
        add_role(y, AUXILIARY)
        cg = ComputationGraph([y])
        cg.replace([(y, 2 * y)])
        assert len(w) == 1
        assert "auxiliary" in str(w[-1].message)
def test_replace_variable_not_in_graph():
    # Test if warning appears when variable is not in graph
    with warnings.catch_warnings(record=True) as w:
        x = tensor.scalar()
        y = x + 1
        z = tensor.scalar()
        cg = ComputationGraph([y])
        cg.replace([(y, 2 * y), (z, 2 * z)])
        assert len(w) == 1
        assert "not a part of" in str(w[-1].message)
    def __init__(self, data_stream, variables, path=None, **kwargs):
        self.data_stream = data_stream
        self.variables = variables
        self.path = path
        self.prediction = None

        kwargs.setdefault("after_training", True)
        super(PredictDataStream, self).__init__(**kwargs)

        cg = ComputationGraph(variables)
        self.theano_function = cg.get_theano_function()
Example #8
0
    def __init__(self, generator, steps=320, n_samples = 10, 
            mean_data = 0, std_data = 1, sample_rate = 8000,
            save_name = "sample_", **kwargs):
        super(Speak, self).__init__(**kwargs)
        steps = 300
        sample = ComputationGraph(generator.generate(n_steps=steps, 
            batch_size=n_samples, iterate=True))
        self.sample_fn = sample.get_theano_function()

        self.mean_data = mean_data
        self.std_data = std_data
        self.sample_rate = sample_rate
        self.save_name = save_name
Example #9
0
    def __init__(self, data_stream, variables, path=None, **kwargs):
        self.data_stream = data_stream
        self.variables = variables
        # for zip(var, var1) in self.variables, variables
        #     var.name = var1.name
        #print (var.name for var in variables)
        #print "varnames ^"
        #self.variables.name = variables.name
        self.path = path
        self.prediction = None

        kwargs.setdefault('after_training', True)
        super(PredictDataStream, self).__init__(**kwargs)

        cg = ComputationGraph(variables)
        self.theano_function = cg.get_theano_function()
Example #10
0
def test_computation_graph():
    x = tensor.matrix('x')
    y = tensor.matrix('y')
    z = x + y
    z.name = 'z'
    a = z.copy()
    a.name = 'a'
    b = z.copy()
    b.name = 'b'
    r = tensor.matrix('r')

    cg = ComputationGraph([a, b])
    assert set(cg.inputs) == {x, y}
    assert set(cg.outputs) == {a, b}
    assert set(cg.variables) == {x, y, z, a, b}
    assert cg.variables[2] is z
    assert ComputationGraph(a).inputs == cg.inputs

    cg2 = cg.replace({z: r})
    assert set(cg2.inputs) == {r}
    assert set([v.name for v in cg2.outputs]) == {'a', 'b'}

    W = theano.shared(numpy.zeros((3, 3),
                                  dtype=theano.config.floatX))
    cg3 = ComputationGraph([z + W])
    assert set(cg3.shared_variables) == {W}

    cg4 = ComputationGraph([W])
    assert cg4.variables == [W]

    w1 = W ** 2
    cg5 = ComputationGraph([w1])
    assert W in cg5.variables
    assert w1 in cg5.variables

    # Test scan
    s, _ = theano.scan(lambda inp, accum: accum + inp,
                       sequences=x,
                       outputs_info=tensor.zeros_like(x[0]))
    scan = s.owner.inputs[0].owner.op
    cg6 = ComputationGraph(s)
    assert cg6.scans == [scan]
    assert all(v in cg6.scan_variables for v in scan.inputs + scan.outputs)
Example #11
0
def test_computation_graph():
    x = tensor.matrix('x')
    y = tensor.matrix('y')
    z = x + y
    a = z.copy()
    a.name = 'a'
    b = z.copy()
    b.name = 'b'
    r = tensor.matrix('r')

    cg = ComputationGraph([a, b])
    assert set(cg.inputs) == {x, y}
    assert set(cg.outputs) == {a, b}
    assert set(cg.variables) == {x, y, z, a, b}
    assert ComputationGraph(a).inputs == cg.inputs

    cg2 = cg.replace({z: r})
    assert set(cg2.inputs) == {r}
    assert set([v.name for v in cg2.outputs]) == {'a', 'b'}
Example #12
0
File: nn.py Project: Nozdi/masters
    def _get_bn_params(self, output_vars):
        # Pick out the nodes with batch normalization vars
        cg = ComputationGraph(output_vars)
        var_filter = VariableFilter(roles=[BNPARAM])
        bn_ps = var_filter(cg.variables)

        if len(bn_ps) == 0:
            logger.warn('No batch normalization parameters found - is' +
                        ' batch normalization turned off?')
            self._bn = False
            self._counter = None
            self._counter_max = None
            bn_share = []
            output_vars_replaced = output_vars
        else:
            self._bn = True
            assert len(set([p.name for p in bn_ps])) == len(bn_ps), \
                'Some batch norm params have the same name'
            logger.info('Batch norm parameters: %s' %
                        ', '.join([p.name for p in bn_ps]))

            # Filter out the shared variables from the model updates
            def filter_share(par):
                lst = [
                    up for up in cg.updates
                    if up.name == 'shared_%s' % par.name
                ]
                assert len(lst) == 1
                return lst[0]

            bn_share = map(filter_share, bn_ps)

            # Replace the BN coefficients in the test data model - Replace the
            # theano variables in the test graph with the shareds
            output_vars_replaced = cg.replace(zip(bn_ps, bn_share)).outputs

            # Pick out the counter
            self._counter = self._param_from_updates(cg.updates, 'counter')
            self._counter_max = self._param_from_updates(
                cg.updates, 'counter_max')

        return bn_ps, bn_share, output_vars_replaced
Example #13
0
 def _create_model(with_dropout):
     cg = ComputationGraph(ali.compute_losses(x, z))
     if with_dropout:
         inputs = VariableFilter(
             bricks=([ali.discriminator.x_discriminator.layers[0]] +
                     ali.discriminator.x_discriminator.layers[2::3] +
                     ali.discriminator.z_discriminator.layers[::2] +
                     ali.discriminator.joint_discriminator.layers[::2]),
             roles=[INPUT])(cg.variables)
         cg = apply_dropout(cg, inputs, 0.2)
     return Model(cg.outputs)
Example #14
0
def test_application_graph_auxiliary_vars():
    X = tensor.matrix('X')
    brick = TestBrick(0)
    Y = brick.access_application_call(X)
    graph = ComputationGraph(outputs=[Y])
    test_val_found = False
    for var in graph.variables:
        if var.name == 'test_val':
            test_val_found = True
            break
    assert test_val_found
Example #15
0
def construct_graphs(task, hyperparameters, **kwargs):
    x, x_shape, y = task.get_variables()

    convnet = construct_model(task=task, **hyperparameters)
    convnet.initialize()

    h = convnet.apply(x)
    h = h.flatten(ndim=2)

    emitter = task.get_emitter(input_dim=np.prod(convnet.get_dim("output")),
                               **hyperparameters)
    emitter.initialize()

    emitter_outputs = emitter.emit(h, y)
    cost = emitter_outputs.cost.copy(name="cost")

    # gather all the outputs we could possibly care about for training
    # *and* monitoring; prepare_graphs will do graph transformations
    # after which we may *only* use these to access *any* variables.
    outputs_by_name = OrderedDict()
    for key in "x x_shape cost".split():
        outputs_by_name[key] = locals()[key]
    for key in task.monitor_outputs():
        outputs_by_name[key] = emitter_outputs[key]
    outputs = list(outputs_by_name.values())

    # construct training and inference graphs
    mode_by_set = OrderedDict([("train", "training"), ("valid", "inference"),
                               ("test", "inference")])
    outputs_by_mode, updates_by_mode = OrderedDict(), OrderedDict()
    for mode in "training inference".split():
        (outputs_by_mode[mode],
         updates_by_mode[mode]) = prepare_mode(mode,
                                               outputs,
                                               convnet=convnet,
                                               emitter=emitter,
                                               **hyperparameters)
    # inference updates may make sense at some point but don't know
    # where to put them now
    assert not updates_by_mode["inference"]

    # assign by set for convenience
    graphs_by_set = OrderedDict([(which_set,
                                  ComputationGraph(outputs_by_mode[mode]))
                                 for which_set, mode in mode_by_set.items()])
    outputs_by_set = OrderedDict([(which_set,
                                   OrderedDict(
                                       util.equizip(outputs_by_name.keys(),
                                                    outputs_by_mode[mode])))
                                  for which_set, mode in mode_by_set.items()])
    updates_by_set = OrderedDict([(which_set, updates_by_mode[mode])
                                  for which_set, mode in mode_by_set.items()])

    return graphs_by_set, outputs_by_set, updates_by_set
    def __init__(self, quantities):
        self.quantities = quantities
        requires = []
        for quantity in quantities:
            requires += quantity.requires
        self.requires = list(set(requires))
        self._initialized = False

        self.quantity_names = [q.name for q in self.quantities]
        self._computation_graph = ComputationGraph(self.requires)
        self.inputs = self._computation_graph.inputs
Example #17
0
def test_apply_noise():
    x = tensor.scalar()
    y = tensor.scalar()
    z = x + y

    cg = ComputationGraph([z])
    noised_cg = apply_noise(cg, [y], 1, 1)
    assert_allclose(noised_cg.outputs[0].eval({
        x: 1.,
        y: 1.
    }), 2 + MRG_RandomStreams(1).normal(tuple()).eval())
Example #18
0
 def _get_updates(self, bn_ps, bn_share):
     cg = ComputationGraph(bn_ps)
     # Only store updates that relate to params or the counter
     updates = OrderedDict([(up, cg.updates[up]) for up in cg.updates
                            if up.name == 'counter' or up in bn_share])
     assert self._counter == self._param_from_updates(cg.updates, 'counter')
     assert self._counter_max == self._param_from_updates(
         cg.updates, 'counter_max')
     assert len(updates) == len(bn_ps) + 1, \
         'Counter or var missing from update'
     return updates
Example #19
0
def test_convolutional_sequence_use_bias():
    cnn = ConvolutionalSequence(
        sum([[Convolutional(filter_size=(1, 1), num_filters=1), Rectifier()]
             for _ in range(3)], []),
        num_channels=1, image_size=(1, 1),
        use_bias=False)
    cnn.allocate()
    x = tensor.tensor4()
    y = cnn.apply(x)
    params = ComputationGraph(y).parameters
    assert len(params) == 3 and all(param.name == 'W' for param in params)
Example #20
0
    def __init__(self, generator, N=8, steps=1200, path='samples', **kwargs):
        self.N = N
        self.path = path
        super(Sample, self).__init__(**kwargs)

        batch_size = self.N * self.N

        self.sample = ComputationGraph(
            generator.generate(n_steps=steps,
                               batch_size=batch_size,
                               iterate=True)).get_theano_function()
Example #21
0
    def __init__(self, variables, use_take_last=False):
        _validate_variable_names(variables)
        self.variables = variables
        self.variable_names = [v.name for v in self.variables]
        self.use_take_last = use_take_last
        self._computation_graph = ComputationGraph(self.variables)
        self.inputs = self._computation_graph.inputs

        self._initialized = False
        self._create_aggregators()
        self._compile()
Example #22
0
    def do(self, which_callback, *args):
        import ipdb
        ipdb.set_trace()
        vds = self.main_loop.extensions[1].data_stream
        num_batches = 1 + vds.data_stream.dataset.num_examples / vds.batch_size
        # for i in range(9):
        #     batch = vds.get_epoch_iterator().next()
        # import ipdb; ipdb.set_trace()

        mlp = self.main_loop.model.top_bricks[1]
        probs = mlp.apply_outputs
        ComputationGraph(probs).inputs
Example #23
0
 def do(self, which_callback, *args, **kwargs):
     if which_callback == 'before_training':
         cg = ComputationGraph(self.main_loop.algorithm.total_step_norm)
         self._learning_rate_var, = VariableFilter(
             theano_name='learning_rate')(cg)
         logger.debug("Annealing extension is initialized")
     elif which_callback == 'after_epoch':
         logger.debug("Annealing the learning rate to {}".format(
             self._annealing_learning_rate))
         self._learning_rate_var.set_value(self._annealing_learning_rate)
     else:
         raise ValueError("don't know what to do")
Example #24
0
 def _create_model(with_dropout):
     cg = ComputationGraph(ali.compute_losses(x, z))
     if with_dropout:
         inputs = VariableFilter(bricks=ali.discriminator.
                                 joint_discriminator.children[1:],
                                 roles=[INPUT])(cg.variables)
         cg = apply_dropout(cg, inputs, 0.5)
         inputs = VariableFilter(
             bricks=[ali.discriminator.joint_discriminator],
             roles=[INPUT])(cg.variables)
         cg = apply_dropout(cg, inputs, 0.2)
     return Model(cg.outputs)
Example #25
0
def test_batchnorm_rolling():
    layer = BatchNormalization(input_dim=5, rolling_accumulate=True)
    layer.initialize()
    x = T.matrix()

    x_val = np.ones((6, 5), dtype=theano.config.floatX)
    x_val[0, 0] = 10.0

    y = layer.apply(x)
    cg = ComputationGraph([y])

    _func = cg.get_theano_function()
    for i in range(100):
        ret = _func(x_val)
    u = layer.u.get_value()
    assert_allclose(u[0], 1.58491838)
    assert_allclose(u[1], 0.6339674)

    s = layer.s.get_value()
    assert_allclose(s[0], 7.13214684)
    assert_allclose(s[1], 0.)
Example #26
0
    def __init__(self,
                 generator,
                 steps=320,
                 n_samples=10,
                 mean_data=0,
                 std_data=1,
                 sample_rate=8000,
                 save_name="sample_",
                 **kwargs):
        super(Speak, self).__init__(**kwargs)
        steps = 300
        sample = ComputationGraph(
            generator.generate(n_steps=steps,
                               batch_size=n_samples,
                               iterate=True))
        self.sample_fn = sample.get_theano_function()

        self.mean_data = mean_data
        self.std_data = std_data
        self.sample_rate = sample_rate
        self.save_name = save_name
Example #27
0
    def __init__(self, worker, experiment, config):
        # Data
        dataset = CIFAR10('train', flatten=False)
        test_dataset = CIFAR10('test', flatten=False)
        batch_size = 128

        scheme = ShuffledScheme(dataset.num_examples, batch_size)
        datastream = DataStream(dataset, iteration_scheme=scheme)

        test_scheme = ShuffledScheme(test_dataset.num_examples, batch_size)
        test_stream = DataStream(test_dataset, iteration_scheme=test_scheme)

        # Model
        m = ModelHelper(config)

        def score_func(mainloop):
            scores = mainloop.log.to_dataframe()["test_accur"].values
            return np.mean(np.sort(scores)[-4:-1])

        # Algorithm
        cg = ComputationGraph([m.cost])
        algorithm = GradientDescent(cost=m.cost,
                                    params=cg.parameters,
                                    step_rule=AdaM())

        #job_name = os.path.basename(worker.running_job)
        job_name = os.path.basename(".")
        update_path = (os.path.join(os.path.join(worker.path, "updates"),
                                    job_name))
        if not os.path.exists(update_path):
            os.mkdir(update_path)

        self.main_loop = MainLoop(
            algorithm,
            datastream,
            model=Model(m.cost),
            extensions=[
                Timing(),
                TrainingDataMonitoring([m.cost, m.accur],
                                       prefix="train",
                                       after_epoch=True),
                DataStreamMonitoring([m.cost, m.accur],
                                     test_stream,
                                     prefix="test"),
                FinishAfter(after_n_epochs=1),
                LogToFile(os.path.join(update_path, "log.csv")),
                Printing(),
                EpochProgress(dataset.num_examples // batch_size + 1)
                #, DistributeUpdate(worker, every_n_epochs=1)
                #, DistributeWhetlabFinish(worker, experiment, score_func)
                #, Plot('cifar10',
                #channels=[['train_cost', 'test_cost'], ['train_accur', 'test_accur']])
            ])
    def __init__(self, samples):
        # Extracting information from the sampling computation graph
        self.cg = ComputationGraph(samples)
        self.inputs = self.cg.inputs
        self.generator = get_brick(samples)
        if not isinstance(self.generator, BaseSequenceGenerator):
            raise ValueError
        self.generate_call = get_application_call(samples)
        if (not self.generate_call.application == self.generator.generate):
            raise ValueError
        self.inner_cg = ComputationGraph(self.generate_call.inner_outputs)

        # Fetching names from the sequence generator
        self.context_names = self.generator.generate.contexts
        self.state_names = self.generator.generate.states

        # Parsing the inner computation graph of sampling scan
        self.contexts = [
            VariableFilter(bricks=[self.generator], name=name,
                           roles=[INPUT])(self.inner_cg)[0]
            for name in self.context_names
        ]
        self.input_states = []
        # Includes only those state names that were actually used
        # in 'generate'
        self.input_state_names = []
        for name in self.generator.generate.states:
            var = VariableFilter(bricks=[self.generator],
                                 name=name,
                                 roles=[INPUT])(self.inner_cg)
            if var:
                self.input_state_names.append(name)
                self.input_states.append(var[0])

        self.tv_overlap_name = ['tw_vocab_overlap']
        self.tv_overlap = [
            VariableFilter(bricks=[self.generator],
                           name=self.tv_overlap_name[0],
                           roles=[INPUT])(self.inner_cg)[0]
        ]
Example #29
0
    def __init__(self,
                 gate_values,
                 updates,
                 dataset,
                 ploting_path=None,
                 **kwargs):
        kwargs.setdefault("after_batch", 1)
        self.text_length = 300
        self.dataset = dataset
        super(VisualizeGateLSTM, self).__init__(**kwargs)

        in_gates = gate_values["in_gates"]
        out_gates = gate_values["out_gates"]
        forget_gates = gate_values["forget_gates"]
        cg_in = ComputationGraph(in_gates)
        cg_out = ComputationGraph(out_gates)
        cg_forget = ComputationGraph(forget_gates)
        for cg in [cg_in, cg_forget, cg_out]:
            assert (len(cg.inputs) == 1)
            assert (cg.inputs[0].name == "features")

        state_vars = [
            theano.shared(v[0:1, :].zeros_like().eval(), v.name + '-gen')
            for v, _ in updates
        ]
        givens = [(v, x) for (v, _), x in zip(updates, state_vars)]
        f_updates = [(x, upd) for x, (_, upd) in zip(state_vars, updates)]

        self.generate_in = theano.function(inputs=cg_in.inputs,
                                           outputs=in_gates,
                                           givens=givens,
                                           updates=f_updates)
        self.generate_out = theano.function(inputs=cg_out.inputs,
                                            outputs=out_gates,
                                            givens=givens,
                                            updates=f_updates)
        self.generate_forget = theano.function(inputs=cg_forget.inputs,
                                               outputs=forget_gates,
                                               givens=givens,
                                               updates=f_updates)
Example #30
0
def build_mlp(features_car_cat, features_car_int, features_nocar_cat,
              features_nocar_int, features_cp, features_hascar, means, labels):

    mlp_car = MLP(activations=[Rectifier(), Rectifier(), None],
                  dims=[8 + 185, 200, 200, 1],
                  weights_init=IsotropicGaussian(.1),
                  biases_init=Constant(0),
                  name='mlp_interval_car')
    mlp_car.initialize()
    mlp_nocar = MLP(activations=[Rectifier(), Rectifier(), None],
                    dims=[5 + 135, 200, 200, 1],
                    weights_init=IsotropicGaussian(.1),
                    biases_init=Constant(0),
                    name='mlp_interval_nocar')
    mlp_nocar.initialize()

    feature_car = tensor.concatenate((features_car_cat, features_car_int),
                                     axis=1)
    feature_nocar = tensor.concatenate(
        (features_nocar_cat, features_nocar_int), axis=1)
    prediction = mlp_nocar.apply(feature_nocar)
    # gating with the last feature : does the dude own a car
    prediction += tensor.addbroadcast(features_hascar,
                                      1) * mlp_car.apply(feature_car)

    prediction_loc, _, _, _, = \
            build_mlp_onlyloc(features_car_cat, features_car_int,
                              features_nocar_cat, features_nocar_int,
                              features_cp, features_hascar,
                              means, labels)
    prediction += prediction_loc

    # add crm
    mlp_crm = MLP(activations=[None],
                  dims=[1, 1],
                  weights_init=IsotropicGaussian(.1),
                  biases_init=Constant(0),
                  name='mlp_crm')
    mlp_crm.initialize()
    crm = features_nocar_int[:, 0][:, None]
    prediction = prediction * mlp_crm.apply(crm)

    cost = MAPECost().apply(labels, prediction)

    cg = ComputationGraph(cost)
    input_var = VariableFilter(roles=[INPUT])(cg.variables)
    print input_var

    cg_dropout1 = apply_dropout(cg, [input_var[6], input_var[7]], .4)
    cost_dropout1 = cg_dropout1.outputs[0]

    return prediction, cost_dropout1, cg_dropout1.parameters, cost
Example #31
0
def test_batchnorm_rolling():
    layer = BatchNormalization(
            input_dim = 5, rolling_accumulate=True)
    layer.initialize()
    x = T.matrix()

    x_val = np.ones((6, 5), dtype=theano.config.floatX)
    x_val[0,0] = 10.0

    y = layer.apply(x)
    cg = ComputationGraph([y])

    _func = cg.get_theano_function()
    for i in range(100):
        ret = _func(x_val)
    u = layer.u.get_value()
    assert_allclose(u[0], 1.58491838)
    assert_allclose(u[1], 0.6339674)

    s = layer.s.get_value()
    assert_allclose(s[0], 7.13214684)
    assert_allclose(s[1], 0.)
Example #32
0
def test_apply_dropout_custom_divisor():
    x = tensor.vector()
    y = tensor.vector()
    z = x - y
    cg = ComputationGraph([z])
    scaled_dropped_cg = apply_dropout(cg, [y], 0.8, seed=2, custom_divisor=2.5)

    x_ = numpy.array([9., 8., 9.], dtype=theano.config.floatX)
    y_ = numpy.array([4., 5., 6.], dtype=theano.config.floatX)

    assert_allclose(
        scaled_dropped_cg.outputs[0].eval({x: x_, y: y_}),
        x_ - (y_ * MRG_RandomStreams(2).binomial((3,), p=0.2).eval() / 2.5))
Example #33
0
    def __init__(self, samples):
        # Extracting information from the sampling computation graph
        self.cg = ComputationGraph(samples)
        self.inputs = self.cg.inputs
        self.generator = get_brick(samples)
        if not isinstance(self.generator, BaseSequenceGenerator):
            raise ValueError
        self.generate_call = get_application_call(samples)
        if (not self.generate_call.application == self.generator.generate):
            raise ValueError
        self.inner_cg = ComputationGraph(self.generate_call.inner_outputs)

        # Fetching names from the sequence generator
        self.context_names = self.generator.generate.contexts
        self.state_names = self.generator.generate.states

        # WORKING: new function which returns all the outputs of the generate function as auxilliary variables
        # WORKING: keep all the outputs of the generate function on the beam, parse them at the end
        self.output_names = self.generator.generate.outputs

        # Parsing the inner computation graph of sampling scan
        self.contexts = [
            VariableFilter(bricks=[self.generator], name=name,
                           roles=[INPUT])(self.inner_cg)[0]
            for name in self.context_names
        ]
        self.input_states = []
        # Includes only those state names that were actually used
        # in 'generate'
        self.input_state_names = []
        for name in self.generator.generate.states:
            var = VariableFilter(bricks=[self.generator],
                                 name=name,
                                 roles=[INPUT])(self.inner_cg)
            if var:
                self.input_state_names.append(name)
                self.input_states.append(var[0])

        self.compiled = False
Example #34
0
    def __init__(self, outputs, return_vars, stream):
        if not isinstance(outputs, list):
            outputs = [outputs]
        if not isinstance(return_vars, list):
            return_vars = [return_vars]

        self.outputs = outputs
        self.return_vars = return_vars
        self.stream = stream

        cg = ComputationGraph(self.outputs)
        self.input_names = [i.name for i in cg.inputs]
        self.f = theano.function(inputs=cg.inputs, outputs=self.outputs)
def use_decoder_on_representations(decoder, training_representation,
                                   sampling_representation):
    punctuation_marks = tensor.lmatrix('punctuation_marks')
    punctuation_marks_mask = tensor.matrix('punctuation_marks_mask')
    cost = decoder.cost(training_representation, punctuation_marks_mask,
                        punctuation_marks, punctuation_marks_mask)

    generated = decoder.generate(sampling_representation)
    search_model = Model(generated)
    _, samples = VariableFilter(bricks=[decoder.sequence_generator],
                                name="outputs")(ComputationGraph(generated[1]))

    return cost, samples, search_model, punctuation_marks, punctuation_marks_mask
Example #36
0
def test_apply_dropout():
    x = tensor.vector()
    y = tensor.vector()
    z = x * y
    cg = ComputationGraph([z])
    dropped_cg = apply_dropout(cg, [x], 0.4, seed=1)

    x_ = numpy.array([5., 6., 7.], dtype=theano.config.floatX)
    y_ = numpy.array([1., 2., 3.], dtype=theano.config.floatX)

    assert_allclose(
        dropped_cg.outputs[0].eval({x: x_, y: y_}),
        x_ * y_ * MRG_RandomStreams(1).binomial((3,), p=0.6).eval() / 0.6)
    def __init__(self, variables, use_take_last=False):
        self.variables = variables
        self.use_take_last = use_take_last

        self.variable_names = [v.name for v in self.variables]
        if len(set(self.variable_names)) < len(self.variables):
            raise ValueError("variables should have different names")
        self._computation_graph = ComputationGraph(self.variables)
        self.inputs = self._computation_graph.inputs

        self._initialized = False
        self._create_aggregators()
        self._compile()
Example #38
0
    def init_beam_search(self, beam_size):
        """Compile beam search and set the beam size.

        See Blocks issue #500.

        """
        self.beam_size = beam_size
        generated = self.get_generate_graph()
        samples, = VariableFilter(applications=[self.generator.generate],
                                  name="outputs")(ComputationGraph(
                                      generated['outputs']))
        self._beam_search = BeamSearch(beam_size, samples)
        self._beam_search.compile()
Example #39
0
def main(save_to, num_epochs):
    mlp = MLP([Tanh(), Softmax()], [784, 100, 10],
              weights_init=IsotropicGaussian(0.01),
              biases_init=Constant(0))
    mlp.initialize()
    x = tensor.matrix('features')
    y = tensor.lmatrix('targets')
    probs = mlp.apply(x)
    cost = CategoricalCrossEntropy().apply(y.flatten(), probs)
    error_rate = MisclassificationRate().apply(y.flatten(), probs)

    cg = ComputationGraph([cost])
    W1, W2 = VariableFilter(roles=[WEIGHT])(cg.variables)
    cost = cost + .00005 * (W1**2).sum() + .00005 * (W2**2).sum()
    cost.name = 'final_cost'

    mnist_train = MNIST("train")
    mnist_test = MNIST("test")

    algorithm = GradientDescent(cost=cost,
                                params=cg.parameters,
                                step_rule=Scale(learning_rate=0.1))
    main_loop = MainLoop(
        algorithm,
        DataStream(mnist_train,
                   iteration_scheme=SequentialScheme(mnist_train.num_examples,
                                                     50)),
        model=Model(cost),
        extensions=[
            Timing(),
            FinishAfter(after_n_epochs=num_epochs),
            DataStreamMonitoring([cost, error_rate],
                                 DataStream(mnist_test,
                                            iteration_scheme=SequentialScheme(
                                                mnist_test.num_examples, 500)),
                                 prefix="test"),
            TrainingDataMonitoring([
                cost, error_rate,
                aggregation.mean(algorithm.total_gradient_norm)
            ],
                                   prefix="train",
                                   after_epoch=True),
            Checkpoint(save_to),
            Plot('MNIST example',
                 channels=[[
                     'test_final_cost',
                     'test_misclassificationrate_apply_error_rate'
                 ], ['train_total_gradient_norm']]),
            Printing()
        ])
    main_loop.run()
Example #40
0
    def create_act_table(self, save_to, act_table):
        batch_size = 500
        image_size = (28, 28)
        output_size = 10
        convnet = create_lenet_5()
        layers = convnet.layers

        x = tensor.tensor4('features')
        y = tensor.lmatrix('targets')

        # Normalize input and apply the convnet
        probs = convnet.apply(x)
        cg = ComputationGraph([probs])

        def full_brick_name(brick):
            return '/'.join([''] + [b.name for b in brick.get_unique_path()])

        # Find layer outputs to probe
        outmap = OrderedDict(
            (full_brick_name(get_brick(out)), out) for out in VariableFilter(
                roles=[OUTPUT], bricks=[Convolutional, Linear])(cg.variables))
        # Generate pics for biases
        biases = VariableFilter(roles=[BIAS])(cg.parameters)

        # Generate parallel array, in the same order, for outputs
        outs = [outmap[full_brick_name(get_brick(b))] for b in biases]

        # Figure work count
        error_rate = (MisclassificationRate().apply(
            y.flatten(), probs).copy(name='error_rate'))
        max_activation_table = (MaxActivationTable().apply(outs).copy(
            name='max_activation_table'))
        max_activation_table.tag.aggregation_scheme = (
            Concatenate(max_activation_table))

        model = Model([error_rate, max_activation_table])

        # Load it with trained parameters
        params = load_parameters(open(save_to, 'rb'))
        model.set_parameter_values(params)

        mnist_test_stream = DataStream.default_stream(
            self.mnist_test,
            iteration_scheme=SequentialScheme(self.mnist_test.num_examples,
                                              batch_size))

        evaluator = DatasetEvaluator([error_rate, max_activation_table])
        results = evaluator.evaluate(mnist_test_stream)
        table = results['max_activation_table']
        pickle.dump(table, open(act_table, 'wb'))
        return table
Example #41
0
def train_model(cost,
                train_stream,
                valid_stream,
                valid_freq,
                valid_rare,
                load_location=None,
                save_location=None):
    cost.name = 'nll'
    perplexity = 2**(cost / tensor.log(2))
    perplexity.name = 'ppl'

    # Define the model
    model = Model(cost)

    # Load the parameters from a dumped model
    if load_location is not None:
        logger.info('Loading parameters...')
        model.set_param_values(load_parameter_values(load_location))

    cg = ComputationGraph(cost)
    algorithm = GradientDescent(cost=cost,
                                step_rule=Scale(learning_rate=0.01),
                                params=cg.parameters)
    main_loop = MainLoop(
        model=model,
        data_stream=train_stream,
        algorithm=algorithm,
        extensions=[
            DataStreamMonitoring([cost, perplexity],
                                 valid_stream,
                                 prefix='valid_all',
                                 every_n_batches=5000),
            # Overfitting of rare words occurs between 3000 and 4000 iterations
            DataStreamMonitoring([cost, perplexity],
                                 valid_rare,
                                 prefix='valid_rare',
                                 every_n_batches=500),
            DataStreamMonitoring([cost, perplexity],
                                 valid_freq,
                                 prefix='valid_frequent',
                                 every_n_batches=5000),
            Printing(every_n_batches=500)
        ])
    main_loop.run()

    # Save the main loop
    if save_location is not None:
        logger.info('Saving the main loop...')
        dump_manager = MainLoopDumpManager(save_location)
        dump_manager.dump(main_loop)
        logger.info('Saved')
Example #42
0
 def initialize(self):
     """Initialize the training algorithm.
     """
     logger.info("Initializing the training algorithm")
     update_values = [new_value for _, new_value in self.updates]
     activity_variables = [l.theano_variable for l in self.prunable_layers]
     logger.debug("Inferring graph inputs...")
     self.inputs = ComputationGraph(update_values).inputs
     logger.debug("Compiling training function...")
     self._function = theano.function(self.inputs,
                                      activity_variables,
                                      updates=self.updates,
                                      **self.theano_func_kwargs)
     logger.info("The training algorithm is initialized")
Example #43
0
 def get_cost_graph(self, batch=True,
                    prediction=None, prediction_mask=None):
     if batch:
         recordings = self.recordings
         recordings_mask = self.recordings_mask
         groundtruth = self.labels
         groundtruth_mask = self.labels_mask
     else:
         recordings = self.single_recording[:, None, :]
         recordings_mask = tensor.ones_like(recordings[:, :, 0])
         groundtruth = self.single_transcription[:, None]
         groundtruth_mask = None
     if not prediction:
         prediction = groundtruth
     if not prediction_mask:
         prediction_mask = groundtruth_mask
     cost = self.cost(recordings, recordings_mask,
                      prediction, prediction_mask)
     cost_cg = ComputationGraph(cost)
     if self.criterion['name'].startswith("mse"):
         placeholder, = VariableFilter(theano_name='groundtruth')(cost_cg)
         cost_cg = cost_cg.replace({placeholder: groundtruth})
     return cost_cg
Example #44
0
 def __init__(self, graph, data_stream, n_batches, **kwargs):
     kwargs.setdefault("after_epoch", True)
     kwargs.setdefault("before_first_epoch", True)
     super(BatchNormExtension, self).__init__(**kwargs)
     self.n_batches = n_batches
     self.bricks = get_batch_norm_bricks(graph)
     self.data_stream = data_stream
     self.updates = self._get_updates()
     variables = [brick.training_output for brick in self.bricks]
     self._computation_graph = ComputationGraph(variables)
     self.inputs = self._computation_graph.inputs
     self.inputs = list(set(self.inputs))
     self.inputs_names = [v.name for v in self.inputs]
     self._compile()
Example #45
0
    def buildObjective(self):
        """Builds the approximate objective corresponding to L_elbo in GMVAE article"""

        # self.z_prior might be the modified version
        self.L_elbo = T.mean(self.reconst + self.conditional_prior +
                             self.w_prior + self.z_prior)

        self.L_elbo_modif = T.mean(self.reconst + self.conditional_prior +
                                   self.w_prior_modif + self.z_prior_modif)

        #---Getting model parameter---#
        cg = ComputationGraph(self.L_elbo)
        #self.phi_theta is the list of all the parameters in q and p.
        self.params = VariableFilter(roles=[PARAMETER])(cg.variables)
Example #46
0
    def _get_bn_params(self, output_vars):
        # Pick out the nodes with batch normalization vars
        cg = ComputationGraph(output_vars)
        var_filter = VariableFilter(roles=[BNPARAM])
        bn_ps = var_filter(cg.variables)

        if len(bn_ps) == 0:
            logger.warn('No batch normalization parameters found - is' +
                        ' batch normalization turned off?')
            self._bn = False
            self._counter = None
            self._counter_max = None
            bn_share = []
            output_vars_replaced = output_vars
        else:
            self._bn = True
            assert len(set([p.name for p in bn_ps])) == len(bn_ps), \
                'Some batch norm params have the same name'
            logger.info('Batch norm parameters: %s' % ', '.join([p.name for p in bn_ps]))

            # Filter out the shared variables from the model updates
            def filter_share(par):
                lst = [up for up in cg.updates if up.name == 'shared_%s' % par.name]
                assert len(lst) == 1
                return lst[0]
            bn_share = map(filter_share, bn_ps)

            # Replace the BN coefficients in the test data model - Replace the
            # theano variables in the test graph with the shareds
            output_vars_replaced = cg.replace(zip(bn_ps, bn_share)).outputs

            # Pick out the counter
            self._counter = self._param_from_updates(cg.updates, 'counter')
            self._counter_max = self._param_from_updates(cg.updates, 'counter_max')

        return bn_ps, bn_share, output_vars_replaced
Example #47
0
    def __init__(self, variables, use_take_last=False):
        self.variables = variables
        self.use_take_last = use_take_last

        self.variable_names = [v.name for v in self.variables]
        if len(set(self.variable_names)) < len(self.variables):
            duplicates = []
            for vname in set(self.variable_names):
                if self.variable_names.count(vname) > 1:
                    duplicates.append(vname)
            raise ValueError("variables should have different names!"
                             " Duplicates: {}".format(', '.join(duplicates)))
        self._computation_graph = ComputationGraph(self.variables)
        self.inputs = self._computation_graph.inputs

        self._initialized = False
        self._create_aggregators()
        self._compile()
Example #48
0
class Pylearn2Cost(pylearn2.costs.cost.Cost):
    """Wraps a Theano cost to support the PyLearn2 Cost interface.

    Parameters
    ----------
    cost : Theano variable
        The Theano variable corresponding to the end of the cost
        computation graph.

    Notes
    -----
    The inputs of the computation graph must have names compatible with
    names of the data sources. The is necessary in order to replace with
    with the ones given by PyLearn2.

    """
    def __init__(self, cost):
        self.cost = cost
        self.inputs = ComputationGraph(self.cost).dict_of_inputs()

    def expr(self, model, data, **kwargs):
        assert not model.supervised
        data = pack(data)
        data = [tensor.unbroadcast(var, *range(var.ndim))
                for var in data]
        return theano.clone(
            self.cost, replace=dict(zip(self.inputs.values(), data)))

    def get_gradients(self, model, data, **kwargs):
        if not hasattr(self, "_grads"):
            self._grads = [tensor.grad(self.expr(model, data), p)
                           for p in model.get_params()]
        return OrderedDict(zip(model.get_params(), self._grads)), OrderedDict()

    def get_monitoring_channels(self, model, data, **kwargs):
        return OrderedDict()

    def get_data_specs(self, model):
        return model.data_specs
Example #49
0
def apply_adaptive_noise(computation_graph,
                         cost,
                         variables,
                         num_examples,
                         parameters=None,
                         init_sigma=1e-6,
                         model_cost_coefficient=1.0,
                         seed=None,
                         gradients=None,
                         ):
    """Add adaptive noise to parameters of a model.

    Each of the given variables will be replaced by a normal
    distribution with learned mean and standard deviation.

    A model cost is computed based on the precision of the the distributions
    associated with each variable. It is added to the given cost used to
    train the model.

    See: A. Graves "Practical Variational Inference for Neural Networks",
         NIPS 2011

    Parameters
    ----------
    computation_graph : instance of :class:`ComputationGraph`
        The computation graph.
    cost : :class:`~tensor.TensorVariable`
        The cost without weight noise. It should be a member of the
        computation_graph.
    variables : :class:`~tensor.TensorVariable`
        Variables to add noise to.
    num_examples : int
        Number of training examples. The cost of the model is divided by
        the number of training examples, please see
        A. Graves "Practical Variational Inference for Neural Networks"
        for justification
    parameters : list of :class:`~tensor.TensorVariable`
        parameters of the model, if gradients are given the list will not
        be used. Otherwise, it will be used to compute the gradients
    init_sigma : float,
        initial standard deviation of noise variables
    model_cost_coefficient : float,
        the weight of the model cost
    seed : int, optional
        The seed with which
        :class:`~theano.sandbox.rng_mrg.MRG_RandomStreams` is initialized,
        is set to 1 by default.
    gradients : dict, optional
        Adaptive weight noise introduces new parameters for which new cost
        and gradients must be computed. Unless the gradients paramter is
        given, it will use theano.grad to get the gradients
    Returns
    -------

    cost : :class:`~tensor.TensorVariable`
        The new cost
    computation_graph : instance of :class:`ComputationGraph`
        new graph with added noise.
    gradients : dict
        a dictionary of gradients for all parameters: the original ones
        and the adaptive noise ones
    noise_brick : :class:~lvsr.graph.NoiseBrick
        the brick that holds all noise parameters and whose .apply method
        can be used to find variables added by adaptive noise
    """
    if not seed:
        seed = config.default_seed
    rng = MRG_RandomStreams(seed)

    try:
        cost_index = computation_graph.outputs.index(cost)
    except ValueError:
        raise ValueError("cost is not part of the computation_graph")

    if gradients is None:
        if parameters is None:
            raise ValueError("Either gradients or parameters must be given")
        logger.info("Taking the cost gradient")
        gradients = dict(equizip(parameters,
                                 tensor.grad(cost, parameters)))
    else:
        if parameters is not None:
            logger.warn("Both gradients and parameters given, will ignore"
                        "parameters")
        parameters = gradients.keys()

    gradients = OrderedDict(gradients)

    log_sigma_scale = 2048.0

    P_noisy = variables  # We will add noise to these
    Beta = []  # will hold means, log_stdev and stdevs
    P_with_noise = []  # will hold parames with added noise

    # These don't change
    P_clean = list(set(parameters).difference(P_noisy))

    noise_brick = NoiseBrick()

    for p in P_noisy:
        p_u = p
        p_val = p.get_value(borrow=True)
        p_ls2 = theano.shared((numpy.zeros_like(p_val) +
                               numpy.log(init_sigma) * 2. / log_sigma_scale
                               ).astype(dtype=numpy.float32))
        p_ls2.name = __get_name(p_u)
        noise_brick.parameters.append(p_ls2)
        p_s2 = tensor.exp(p_ls2 * log_sigma_scale)
        Beta.append((p_u, p_ls2, p_s2))

        p_noisy = p_u + rng.normal(size=p_val.shape) * tensor.sqrt(p_s2)
        p_noisy = tensor.patternbroadcast(p_noisy, p.type.broadcastable)
        P_with_noise.append(p_noisy)

    #  compute the prior mean and variation
    temp_sum = 0.0
    temp_param_count = 0.0
    for p_u, unused_p_ls2, unused_p_s2 in Beta:
        temp_sum = temp_sum + p_u.sum()
        temp_param_count = temp_param_count + p_u.shape.prod()

    prior_u = tensor.cast(temp_sum / temp_param_count, 'float32')

    temp_sum = 0.0
    for p_u, unused_ls2, p_s2 in Beta:
        temp_sum = temp_sum + (p_s2).sum() + (((p_u-prior_u)**2).sum())

    prior_s2 = tensor.cast(temp_sum/temp_param_count, 'float32')

    #  convert everything to use the noisy parameters
    full_computation_graph = ComputationGraph(computation_graph.outputs +
                                              gradients.values())
    full_computation_graph = full_computation_graph.replace(
        dict(zip(P_noisy, P_with_noise)))

    LC = 0.0  # model cost
    for p_u, p_ls2, p_s2 in Beta:
        LC = (LC +
              0.5 * ((tensor.log(prior_s2) - p_ls2 * log_sigma_scale).sum()) +
              1.0 / (2.0 * prior_s2) * (((p_u - prior_u)**2) + p_s2 - prior_s2
                                        ).sum()
              )

    LC = LC / num_examples * model_cost_coefficient

    train_cost = noise_brick.apply(
        full_computation_graph.outputs[cost_index].copy(), LC,
        prior_u, prior_s2)

    gradients = OrderedDict(
        zip(gradients.keys(),
            full_computation_graph.outputs[-len(gradients):]))

    #
    # Delete the gradients form the computational graph
    #
    del full_computation_graph.outputs[-len(gradients):]

    new_grads = {p: gradients.pop(p) for p in P_clean}

    #
    # Warning!!!
    # This only works for batch size 1 (we want that the sum of squares
    # be the square of the sum!
    #
    diag_hessian_estimate = {p: g**2 for p, g in gradients.iteritems()}

    for p_u, p_ls2, p_s2 in Beta:
        p_grad = gradients[p_u]
        p_u_grad = (model_cost_coefficient * (p_u - prior_u) /
                    (num_examples*prior_s2) + p_grad)

        p_ls2_grad = (numpy.float32(model_cost_coefficient *
                                    0.5 / num_examples * log_sigma_scale) *
                      (p_s2/prior_s2 - 1.0) +
                      (0.5*log_sigma_scale) * p_s2 * diag_hessian_estimate[p_u]
                      )
        new_grads[p_u] = p_u_grad
        new_grads[p_ls2] = p_ls2_grad

    return train_cost, full_computation_graph, new_grads, noise_brick
Example #50
0
def train(cli_params):
    cli_params["save_dir"] = prepare_dir(cli_params["save_to"])
    logfile = os.path.join(cli_params["save_dir"], "log.txt")

    # Log also DEBUG to a file
    fh = logging.FileHandler(filename=logfile)
    fh.setLevel(logging.DEBUG)
    logger.addHandler(fh)

    logger.info("Logging into %s" % logfile)

    p, loaded = load_and_log_params(cli_params)
    in_dim, data, whiten, cnorm = setup_data(p, test_set=False)

    if not loaded:
        # Set the zero layer to match input dimensions
        p.encoder_layers = (in_dim,) + p.encoder_layers

    ladder = setup_model(p)

    # Training
    all_params = ComputationGraph([ladder.costs.total]).parameters
    logger.info("Found the following parameters: %s" % str(all_params))

    # Fetch all batch normalization updates. They are in the clean path.
    bn_updates = ComputationGraph([ladder.costs.class_clean]).updates
    assert "counter" in [u.name for u in bn_updates.keys()], "No batch norm params in graph - the graph has been cut?"

    training_algorithm = GradientDescent(
        cost=ladder.costs.total, params=all_params, step_rule=Adam(learning_rate=ladder.lr)
    )
    # In addition to actual training, also do BN variable approximations
    training_algorithm.add_updates(bn_updates)

    model = Model(ladder.costs.total)

    monitored_variables = [
        ladder.costs.class_corr,
        ladder.costs.class_clean,
        ladder.error,
        #         training_algorithm.total_gradient_norm,
        ladder.costs.total,
    ]
    #         + ladder.costs.denois.values()

    # Make a global history recorder so that we can get summary at end of
    # training when we write to Sentinel
    # global_history records all relevant monitoring vars
    # updated by SaveLog every time
    global_history = {}

    main_loop = MainLoop(
        training_algorithm,
        # Datastream used for training
        make_datastream(
            data.train,
            data.train_ind,
            p.batch_size,
            n_labeled=p.labeled_samples,
            n_unlabeled=p.unlabeled_samples,
            whiten=whiten,
            cnorm=cnorm,
        ),
        model=model,
        extensions=[
            FinishAfter(after_n_epochs=p.num_epochs),
            # write out to sentinel file for experiment automator to work
            SentinelWhenFinish(save_dir=p.save_dir, global_history=global_history),
            # This will estimate the validation error using
            # running average estimates of the batch normalization
            # parameters, mean and variance
            ApproxTestMonitoring(
                monitored_variables,
                make_datastream(
                    data.valid, data.valid_ind, p.valid_batch_size, whiten=whiten, cnorm=cnorm, scheme=ShuffledScheme
                ),
                prefix="valid_approx",
            ),
            # This Monitor is slower, but more accurate since it will first
            # estimate batch normalization parameters from training data and
            # then do another pass to calculate the validation error.
            FinalTestMonitoring(
                monitored_variables,
                make_datastream(
                    data.train,
                    data.train_ind,
                    p.batch_size,
                    n_labeled=p.labeled_samples,
                    whiten=whiten,
                    cnorm=cnorm,
                    scheme=ShuffledScheme,
                ),
                make_datastream(
                    data.valid,
                    data.valid_ind,
                    p.valid_batch_size,
                    n_labeled=len(data.valid_ind),
                    whiten=whiten,
                    cnorm=cnorm,
                    scheme=ShuffledScheme,
                ),
                prefix="valid_final",
                after_n_epochs=p.num_epochs,
            ),
            TrainingDataMonitoring(variables=monitored_variables, prefix="train", after_epoch=True),
            SaveParams("valid_approx_cost_class_corr", model, p.save_dir),
            #             SaveParams(None, all_params, p.save_dir, after_epoch=True),
            SaveExpParams(p, p.save_dir, before_training=True),
            SaveLog(save_dir=p.save_dir, after_epoch=True, global_history=global_history),
            Printing(),
            #             ShortPrinting(short_prints),
            LRDecay(ladder.lr, p.num_epochs * p.lrate_decay, p.num_epochs, after_epoch=True),
        ],
    )
    main_loop.run()

    # Get results
    df = main_loop.log.to_dataframe()
    col = "valid_final_error_rate"
    logger.info("%s %g" % (col, df[col].iloc[-1]))

    if main_loop.log.status["epoch_interrupt_received"]:
        return None
    return df
Example #51
0
def analyze(cli_params):
    p, _ = load_and_log_params(cli_params)
    _, data, whiten, cnorm = setup_data(p, test_set=True)
    ladder = setup_model(p)

    # Analyze activations
    dset, indices, calc_batchnorm = {
        'train': (data.train, data.train_ind, False),
        'valid': (data.valid, data.valid_ind, True),
        'test':  (data.test, data.test_ind, True),
    }[p.data_type]

    if calc_batchnorm:
        logger.info('Calculating batch normalization for clean.labeled path')
        main_loop = DummyLoop(
            extensions=[
                FinalTestMonitoring(
                    [ladder.costs.class_clean, ladder.error.clean]
                    + ladder.costs.denois.values(),
                    make_datastream(data.train, data.train_ind,
                                    # These need to match with the training
                                    p.batch_size,
                                    n_labeled=p.labeled_samples,
                                    n_unlabeled=len(data.train_ind),
                                    cnorm=cnorm,
                                    whiten=whiten, scheme=ShuffledScheme),
                    make_datastream(data.valid, data.valid_ind,
                                    p.valid_batch_size,
                                    n_labeled=len(data.valid_ind),
                                    n_unlabeled=len(data.valid_ind),
                                    cnorm=cnorm,
                                    whiten=whiten, scheme=ShuffledScheme),
                    prefix="valid_final", before_training=True),
                ShortPrinting({
                    "valid_final": OrderedDict([
                        ('VF_C_class', ladder.costs.class_clean),
                        ('VF_E', ladder.error.clean),
                        ('VF_C_de', [ladder.costs.denois.get(0),
                                     ladder.costs.denois.get(1),
                                     ladder.costs.denois.get(2),
                                     ladder.costs.denois.get(3)]),
                    ]),
                }, after_training=True, use_log=False),
            ])
        main_loop.run()

    # Make a datastream that has all the indices in the labeled pathway
    ds = make_datastream(dset, indices,
                         batch_size=p.get('batch_size'),
                         n_labeled=len(indices),
                         n_unlabeled=len(indices),
                         balanced_classes=False,
                         whiten=whiten,
                         cnorm=cnorm,
                         scheme=SequentialScheme)

    # We want out the values after softmax
    outputs = ladder.act.clean.labeled.h[len(ladder.layers) - 1]

    # Replace the batch normalization paramameters with the shared variables
    if calc_batchnorm:
        outputreplacer = TestMonitoring()
        _, _,  outputs = outputreplacer._get_bn_params(outputs)

    cg = ComputationGraph(outputs)
    f = cg.get_theano_function()

    it = ds.get_epoch_iterator(as_dict=True)
    res = []
    inputs = {'features_labeled': [],
              'targets_labeled': [],
              'features_unlabeled': []}
    # Loop over one epoch
    for d in it:
        # Store all inputs
        for k, v in d.iteritems():
            inputs[k] += [v]
        # Store outputs
        res += [f(*[d[str(inp)] for inp in cg.inputs])]

    # Concatenate all minibatches
    res = [numpy.vstack(minibatches) for minibatches in zip(*res)]
    inputs = {k: numpy.vstack(v) for k, v in inputs.iteritems()}

    return inputs['targets_labeled'], res[0]
Example #52
0
    def _create_main_loop(self):
        # hyper parameters
        hp = self.params
        batch_size = hp['batch_size']
        biases_init = Constant(0)
        batch_normalize = hp['batch_normalize']

        ### Build fprop
        tensor5 = T.TensorType(config.floatX, (False,)*5)
        X = tensor5("images")
        #X = T.tensor4("images")
        y = T.lvector('targets')

        gnet_params = OrderedDict()
        #X_shuffled = X[:, :, :, :, [2, 1, 0]]
        #X_shuffled = gpu_contiguous(X.dimshuffle(0, 1, 4, 2, 3)) * 255

        X = X[:, :, :, :, [2, 1, 0]]
        X_shuffled = X.dimshuffle((0, 1, 4, 2, 3)) * 255
        X_r = X_shuffled.reshape((X_shuffled.shape[0],
                                  X_shuffled.shape[1]*X_shuffled.shape[2],
                                  X_shuffled.shape[3], X_shuffled.shape[4]))
        X_r = X_r - (np.array([104, 117, 123])[None, :, None, None]).astype('float32')


        expressions, input_data, param = stream_layer_exp(inputs = ('data', X_r),
                                                          mode='rgb')
        res = expressions['outloss']
        y_hat = res.flatten(ndim=2)

        import pdb; pdb.set_trace()

        ### Build Cost
        cost = CategoricalCrossEntropy().apply(y, y_hat)
        cost = T.cast(cost, theano.config.floatX)
        cost.name = 'cross_entropy'

        y_pred = T.argmax(y_hat, axis=1)
        misclass = T.cast(T.mean(T.neq(y_pred, y)), theano.config.floatX)
        misclass.name = 'misclass'

        monitored_channels = []
        monitored_quantities = [cost, misclass, y_hat, y_pred]
        model = Model(cost)

        training_cg = ComputationGraph(monitored_quantities)
        inference_cg = ComputationGraph(monitored_quantities)

        ### Get evaluation function
        #training_eval = training_cg.get_theano_function(additional_updates=bn_updates)
        training_eval = training_cg.get_theano_function()
        #inference_eval = inference_cg.get_theano_function()


        # Dataset
        test = JpegHDF5Dataset('test',
                               #name='jpeg_data_flows.hdf5',
                               load_in_memory=True)
        #mean = np.load(os.path.join(os.environ['UCF101'], 'mean.npy'))
        import pdb; pdb.set_trace()

        ### Eval
        labels = np.zeros(test.num_video_examples)
        y_hat = np.zeros((test.num_video_examples, 101))
        labels_flip = np.zeros(test.num_video_examples)
        y_hat_flip = np.zeros((test.num_video_examples, 101))

        ### Important to shuffle list for batch normalization statistic
        #rng = np.random.RandomState()
        #examples_list = range(test.num_video_examples)
        #import pdb; pdb.set_trace()
        #rng.shuffle(examples_list)

        nb_frames=1

        for i in xrange(24):
            scheme = HDF5SeqScheme(test.video_indexes,
                                   examples=test.num_video_examples,
                                   batch_size=batch_size,
                                   f_subsample=i,
                                   nb_subsample=25,
                                   frames_per_video=nb_frames)
           #for crop in ['upleft', 'upright', 'downleft', 'downright', 'center']:
            for crop in ['center']:
                stream = JpegHDF5Transformer(
                    input_size=(240, 320), crop_size=(224, 224),
                    #input_size=(256, 342), crop_size=(224, 224),
                    crop_type=crop,
                    translate_labels = True,
                    flip='noflip', nb_frames = nb_frames,
                    data_stream=ForceFloatX(DataStream(
                            dataset=test, iteration_scheme=scheme)))
                stream_flip = JpegHDF5Transformer(
                    input_size=(240, 320), crop_size=(224, 224),
                    #input_size=(256, 342), crop_size=(224, 224),
                    crop_type=crop,
                    translate_labels = True,
                    flip='flip', nb_frames = nb_frames,
                    data_stream=ForceFloatX(DataStream(
                            dataset=test, iteration_scheme=scheme)))

                ## Do the evaluation
                epoch = stream.get_epoch_iterator()
                for j, batch in enumerate(epoch):
                    output = training_eval(batch[0], batch[1])
                    # import cv2
                    # cv2.imshow('img', batch[0][0, 0, :, :, :])
                    # cv2.waitKey(160)
                    # cv2.destroyAllWindows()
                    #import pdb; pdb.set_trace()
                    labels_flip[batch_size*j:batch_size*(j+1)] = batch[1]
                    y_hat_flip[batch_size*j:batch_size*(j+1), :] += output[2]
                preds = y_hat_flip.argmax(axis=1)
                misclass =  np.sum(labels_flip != preds) / float(len(preds))
                print i, crop, "flip Misclass:", misclass

                epoch = stream_flip.get_epoch_iterator()
                for j, batch in enumerate(epoch):
                    output = training_eval(batch[0], batch[1])
                    labels[batch_size*j:batch_size*(j+1)] = batch[1]
                    y_hat[batch_size*j:batch_size*(j+1), :] += output[2]
                preds = y_hat.argmax(axis=1)
                misclass =  np.sum(labels != preds) / float(len(preds))
                print i, crop, "noflip Misclass:", misclass

                y_merge = y_hat + y_hat_flip
                preds = y_merge.argmax(axis=1)
                misclass =  np.sum(labels != preds) / float(len(preds))
                print i, crop, "avg Misclass:", misclass


        ### Compute misclass
        y_hat += y_hat_flip
        preds = y_hat.argmax(axis=1)
        misclass =  np.sum(labels != preds) / float(len(preds))
        print "Misclass:", misclass
Example #53
0
def train(cli_params):
    cli_params['save_dir'] = prepare_dir(cli_params['save_to'])
    logfile = os.path.join(cli_params['save_dir'], 'log.txt')

    # Log also DEBUG to a file
    fh = logging.FileHandler(filename=logfile)
    fh.setLevel(logging.DEBUG)
    logger.addHandler(fh)

    logger.info('Logging into %s' % logfile)

    p, loaded = load_and_log_params(cli_params)
    in_dim, data, whiten, cnorm = setup_data(p, test_set=True)
    
    if not loaded:
        # Set the zero layer to match input dimensions
        p.encoder_layers = (in_dim,) + p.encoder_layers

    ladder = setup_model(p)

    # Training
    all_params = ComputationGraph([ladder.costs.total]).parameters
    logger.info('Found the following parameters: %s' % str(all_params))

    # Fetch all batch normalization updates. They are in the clean path.
    bn_updates = ComputationGraph([ladder.costs.class_clean]).updates
    assert 'counter' in [u.name for u in bn_updates.keys()], \
        'No batch norm params in graph - the graph has been cut?'

    training_algorithm = GradientDescent(
        cost=ladder.costs.total, params=all_params,
        step_rule=Adam(learning_rate=ladder.lr))
    # In addition to actual training, also do BN variable approximations
    training_algorithm.add_updates(bn_updates)

    model=Model(ladder.costs.total)

    monitored_variables = [
        ladder.costs.class_corr, 
        ladder.costs.class_clean,
        ladder.error, 
#         training_algorithm.total_gradient_norm,
        ladder.costs.total] \
#         + ladder.costs.denois.values()

    # Make a global history recorder so that we can get summary at end of 
    # training when we write to Sentinel
    # global_history records all relevant monitoring vars
    # updated by SaveLog every time
    global_history = {}

    main_loop = MainLoop(
        training_algorithm,
        # Datastream used for training
        make_datastream(data.train, data.train_ind,
                        p.batch_size,
                        n_labeled=p.labeled_samples,
                        n_unlabeled=p.unlabeled_samples,
                        whiten=whiten,
                        cnorm=cnorm),
        model=model,
        extensions=[
            FinishAfter(after_n_epochs=p.num_epochs),
            
            # This will estimate the validation error using
            # running average estimates of the batch normalization
            # parameters, mean and variance
            ApproxTestMonitoring(
                monitored_variables,
                make_datastream(data.valid, data.valid_ind,
                                p.valid_batch_size, whiten=whiten, cnorm=cnorm,
                                scheme=ShuffledScheme),
                prefix="valid_approx"),

            # This Monitor is slower, but more accurate since it will first
            # estimate batch normalization parameters from training data and
            # then do another pass to calculate the validation error.
            FinalTestMonitoring(
                monitored_variables,
                make_datastream(data.train, data.train_ind,
                                p.batch_size,
                                n_labeled=p.labeled_samples,
                                whiten=whiten, cnorm=cnorm,
                                scheme=ShuffledScheme),
                # DEPREC: we directly test now
#                 make_datastream(data.valid, data.valid_ind,
#                                 p.valid_batch_size,
#                                 n_labeled=len(data.valid_ind),
#                                 whiten=whiten, cnorm=cnorm,
#                                 scheme=ShuffledScheme),
#                 prefix="valid_final",
                make_datastream(data.test, data.test_ind,
                                p.batch_size,
                                n_labeled=len(data.test_ind),
                                whiten=whiten, cnorm=cnorm,
                                scheme=ShuffledScheme),
                prefix="final_test",
                after_n_epochs=p.num_epochs),

            TrainingDataMonitoring(
                variables=monitored_variables,
                prefix="train", after_epoch=True),

            # write out to sentinel file for experiment automator to work
            # REMOVE THIS if you're running test mode with early stopping immediately after
            SentinelWhenFinish(save_dir=p.save_dir,
                               global_history=global_history),

            # originally use 'valid_approx_cost_class_clean'
            # turns out should use ER as early stopping
            # use CE as a fallback (secondary early stopvar) if ER is the same
#             SaveParams(('valid_approx_error_rate', 'valid_approx_cost_class_clean'),
#                         model, p.save_dir),
            # doesn't do early stopping now
            SaveParams(None, model, p.save_dir, after_epoch=True),
            SaveExpParams(p, p.save_dir, before_training=True),
            SaveLog(save_dir=p.save_dir, 
                    after_epoch=True,
                    global_history=global_history),
            Printing(),
#             ShortPrinting(short_prints),
            LRDecay(ladder.lr, p.num_epochs * p.lrate_decay, p.num_epochs,
                    after_epoch=True),
        ])
    main_loop.run()
    
    # ================= Add testing at end of training =================
    # DEPREC don't do early stopping anymore
    if False:
        p.load_from = p.save_dir
        ladder = setup_model(p)
        
        logger.info('Start testing on trained_params_best')
        main_loop = DummyLoop(
            extensions=[
                # write to global history
                SaveLog(save_dir=p.save_dir, 
                        after_training=True,
                        global_history=global_history),

                # write out to sentinel file for experiment automator to work
                SentinelWhenFinish(save_dir=p.save_dir,
                                   global_history=global_history),

                FinalTestMonitoring(
                    [ladder.costs.class_clean, ladder.error]
                    + ladder.costs.denois.values(),
                    make_datastream(data.train, data.train_ind,
                                    # These need to match with the training
                                    p.batch_size,
                                    n_labeled=p.labeled_samples,
                                    n_unlabeled=len(data.train_ind),
                                    cnorm=cnorm,
                                    whiten=whiten, scheme=ShuffledScheme),
                    make_datastream(data.test, data.test_ind,
                                    p.batch_size,
                                    n_labeled=len(data.test_ind),
                                    n_unlabeled=len(data.test_ind),
                                    cnorm=cnorm,
                                    whiten=whiten, scheme=ShuffledScheme),
                    prefix="test", 
                    before_training=True)
            ])
        main_loop.run()

    # Get results
    df = main_loop.log.to_dataframe()
#     col = 'valid_final_error_rate'
#     logger.info('%s %g' % (col, df[col].iloc[-1]))

    if main_loop.log.status['epoch_interrupt_received']:
        return None
    return df
Example #54
0

features = tensor.matrix('features')
noise = tensor.matrix('noise')


# g = MLP(activations=[Logistic()], dims=[100, 784])
# d = MLP(activations=[Identity()], dims=[784, 1])
g = MLP(activations=[Identity(), Identity(), Identity(), Identity(), Rectifier()], dims=[100, 2400, 2400, 2400, 2400, 784])
d = MLP(activations=[Tanh(), Tanh(), Identity()], dims=[784, 1200, 1200, 1])

generated_samples = g.apply(noise)
discriminated_features = d.apply(features)
discriminated_samples = d.apply(generated_samples)

generator_cg = ComputationGraph(generated_samples)
discriminator_cg = ComputationGraph(discriminated_features)
dsamples_cg = ComputationGraph(discriminated_samples)
generator_parameters = generator_cg.parameters

m = 100
b_size = discriminated_features.shape[0] / 2
cost_generator = tensor.sum(tensor.log(1 + tensor.exp(-discriminated_samples))) / discriminated_samples.shape[0].astype('float32')
cost_discriminator = (tensor.sum(discriminated_features[:b_size]) + tensor.sum(tensor.log(1 + tensor.exp(-discriminated_features)))) / b_size.astype('float32')

g.weights_init = IsotropicGaussian(0.05)
d.weights_init = IsotropicGaussian(0.005)
g.biases_init = d.biases_init = Constant(0)

g.initialize()
d.initialize()
Example #55
0
class AggregationBuffer(object):
    """Intermediate results of aggregating values of Theano variables.

    Encapsulates aggregators for a list of Theano variables. Collects
    the respective updates and provides initialization and readout
    routines.


    Parameters
    ----------
    variables : list of :class:`~tensor.TensorVariable`
        The variable names are used as record names in the logs. Hence, all
        the variable names must be unique.
    use_take_last : bool
        When ``True``, the :class:`TakeLast` aggregation scheme is used
        instead of :class:`_DataIndependent` for those variables that
        do not require data to be computed.

    Attributes
    ----------
    initialization_updates : list of tuples
        Initialization updates of the aggregators.
    accumulation_updates : list of tuples
        Accumulation updates of the aggregators.
    readout_variables : dict
        A dictionary of record names to :class:`~tensor.TensorVariable`
        representing the aggregated values.
    inputs : list of :class:`~tensor.TensorVariable`
        The list of inputs needed for accumulation.

    """
    def __init__(self, variables, use_take_last=False):
        _validate_variable_names(variables)
        self.variables = variables
        self.variable_names = [v.name for v in self.variables]
        self.use_take_last = use_take_last
        self._computation_graph = ComputationGraph(self.variables)
        self.inputs = self._computation_graph.inputs

        self._initialized = False
        self._create_aggregators()
        self._compile()

    def _create_aggregators(self):
        """Create aggregators and collect updates."""
        self.initialization_updates = []
        self.accumulation_updates = []
        self.readout_variables = OrderedDict()

        for v in self.variables:
            logger.debug('variable to evaluate: %s', v.name)
            if not hasattr(v.tag, 'aggregation_scheme'):
                if not self._computation_graph.has_inputs(v):
                    scheme = (TakeLast if self.use_take_last
                              else _DataIndependent)
                    logger.debug('Using %s aggregation scheme'
                                 ' for %s since it does not depend on'
                                 ' the data', scheme.__name__, v.name)
                    v.tag.aggregation_scheme = scheme(v)
                else:
                    logger.debug('Using the default '
                                 ' (average over minibatches)'
                                 ' aggregation scheme for %s', v.name)
                    v.tag.aggregation_scheme = Mean(v, 1.0)

            aggregator = v.tag.aggregation_scheme.get_aggregator()
            self.initialization_updates.extend(
                aggregator.initialization_updates)
            self.accumulation_updates.extend(aggregator.accumulation_updates)
            self.readout_variables[v.name] = aggregator.readout_variable

    def _compile(self):
        """Compiles Theano functions.

        .. todo::

            The current compilation method does not account for updates
            attached to `ComputationGraph` elements. Compiling should
            be out-sourced to `ComputationGraph` to deal with it.

        """
        logger.debug("Compiling initialization and readout functions")
        if self.initialization_updates:
            self._initialize_fun = theano.function(
                [], [], updates=self.initialization_updates)
        else:
            self._initialize_fun = None

        # We need to call `as_tensor_variable` here
        # to avoid returning `CudaNdarray`s to the user, which
        # happens otherwise under some circumstances (see
        # https://groups.google.com/forum/#!topic/theano-users/H3vkDN-Shok)
        self._readout_fun = theano.function(
            [], [tensor.as_tensor_variable(v)
                 for v in self.readout_variables.values()])
        logger.debug("Initialization and readout functions compiled")

    def initialize_aggregators(self):
        """Initialize the aggregators."""
        self._initialized = True
        if self._initialize_fun is not None:
            self._initialize_fun()

    def get_aggregated_values(self):
        """Readout the aggregated values."""
        if not self._initialized:
            raise Exception("To readout you must first initialize, then "
                            "process batches!")
        ret_vals = self._readout_fun()
        return OrderedDict(equizip(self.variable_names, ret_vals))
Example #56
0
sp_mean = data_stats['sp_mean']
sp_std = data_stats['sp_std']

save_dir = os.environ['RESULTS_DIR']
save_dir = os.path.join(save_dir,'blizzard/')

experiment_name = "sp_only_0"

main_loop = load(save_dir+"pkl/best_"+experiment_name+".pkl")

generator = main_loop.model.get_top_bricks()[0]

steps = 2048
n_samples = 1

sample = ComputationGraph(generator.generate(n_steps=steps, 
    batch_size=n_samples, iterate=True))
sample_fn = sample.get_theano_function()

outputs = sample_fn()[-2]

outputs = outputs*sp_std + sp_mean
outputs = outputs.swapaxes(0,1)
outputs = outputs[0]

print outputs.max(), outputs.min()

pyplot.figure(figsize=(100,15))
pyplot.imshow(outputs.T)
pyplot.colorbar()
pyplot.gca().invert_yaxis()
pyplot.savefig(save_dir+"samples/best_"+experiment_name+"9.png")
    def __init__(self, config, vocab_size):
        question = tensor.imatrix('question')

        # set up 32-bit integer matrices
        question_mask = tensor.imatrix('question_mask')
        context = tensor.imatrix('context')
        context_mask = tensor.imatrix('context_mask')
        answer = tensor.ivector('answer')
        candidates = tensor.imatrix('candidates')
        candidates_mask = tensor.imatrix('candidates_mask')

        # and the multple choice answers:
        ans1 = tensor.ivector('ans1')
        ans1_mask = tensor.ivector('ans1_mask')
        ans2 = tensor.ivector('ans2')
        ans2_mask = tensor.ivector('ans2_mask')
        ans3 = tensor.ivector('ans3')
        ans3_mask = tensor.ivector('ans3_mask')
        ans4 = tensor.ivector('ans4')
        ans4_mask = tensor.ivector('ans4_mask')

        bricks = []

        # inverts 1st and 2nd dimensions of matrix
        question = question.dimshuffle(1, 0)
        question_mask = question_mask.dimshuffle(1, 0)
        context = context.dimshuffle(1, 0)
        context_mask = context_mask.dimshuffle(1, 0)

        # Embed questions and cntext
        embed = LookupTable(vocab_size, config.embed_size, name='question_embed')
        bricks.append(embed)

        qembed = embed.apply(question)
        cembed = embed.apply(context)
        a1embed = embed.apply(ans1)
        a2embed = embed.apply(ans2)
        a3embed = embed.apply(ans3)
        a4embed = embed.apply(ans4)

        qlstms, qhidden_list = make_bidir_lstm_stack(qembed, config.embed_size, question_mask.astype(theano.config.floatX),
                                                     config.question_lstm_size, config.question_skip_connections, 'q')
        clstms, chidden_list = make_bidir_lstm_stack(cembed, config.embed_size, context_mask.astype(theano.config.floatX),
                                                     config.ctx_lstm_size, config.ctx_skip_connections, 'ctx')
        bricks = bricks + qlstms + clstms

        # Calculate question encoding (concatenate layer1)
        if config.question_skip_connections:
            qenc_dim = 2*sum(config.question_lstm_size)
            qenc = tensor.concatenate([h[-1,:,:] for h in qhidden_list], axis=1)
        else:
            qenc_dim = 2*config.question_lstm_size[-1]
            qenc = tensor.concatenate([h[-1,:,:] for h in qhidden_list[-2:]], axis=1)
        qenc.name = 'qenc'

        # Calculate context encoding (concatenate layer1)
        if config.ctx_skip_connections:
            cenc_dim = 2*sum(config.ctx_lstm_size)
            cenc = tensor.concatenate(chidden_list, axis=2)
        else:
            cenc_dim = 2*config.ctx_lstm_size[-1]
            cenc = tensor.concatenate(chidden_list[-2:], axis=2)
        cenc.name = 'cenc'

        # Attention mechanism MLP
        attention_mlp = MLP(dims=config.attention_mlp_hidden + [1],
                            activations=config.attention_mlp_activations[1:] + [Identity()],
                            name='attention_mlp')
        attention_qlinear = Linear(input_dim=qenc_dim, output_dim=config.attention_mlp_hidden[0], name='attq')
        attention_clinear = Linear(input_dim=cenc_dim, output_dim=config.attention_mlp_hidden[0], use_bias=False, name='attc')
        bricks += [attention_mlp, attention_qlinear, attention_clinear]
        layer1 = Tanh().apply(attention_clinear.apply(cenc.reshape((cenc.shape[0]*cenc.shape[1], cenc.shape[2])))
                                        .reshape((cenc.shape[0],cenc.shape[1],config.attention_mlp_hidden[0]))
                             + attention_qlinear.apply(qenc)[None, :, :])
        layer1.name = 'layer1'
        att_weights = attention_mlp.apply(layer1.reshape((layer1.shape[0]*layer1.shape[1], layer1.shape[2])))
        att_weights.name = 'att_weights_0'
        att_weights = att_weights.reshape((layer1.shape[0], layer1.shape[1]))
        att_weights.name = 'att_weights'

        attended = tensor.sum(cenc * tensor.nnet.softmax(att_weights.T).T[:, :, None], axis=0)
        attended.name = 'attended'

        # Now we can calculate our output
        out_mlp = MLP(dims=[cenc_dim + qenc_dim] + config.out_mlp_hidden + [config.n_entities],
                      activations=config.out_mlp_activations + [Identity()],
                      name='out_mlp')
        bricks += [out_mlp]
        probs = out_mlp.apply(tensor.concatenate([attended, qenc], axis=1))
        probs.name = 'probs'

        # not needed anymore, since we're not only looking at entities
        # is_candidate = tensor.eq(tensor.arange(config.n_entities, dtype='int32')[None, None, :],
        #                          tensor.switch(candidates_mask, candidates, -tensor.ones_like(candidates))[:, :, None]).sum(axis=1)
        # probs = tensor.switch(is_candidate, probs, -1000 * tensor.ones_like(probs))

        # Calculate prediction, cost and error rate

        # vocab = tensor.arange(10)
        # probs = numpy.asarray([0, 0.8, 0, 0.2], dtype=numpy.float32)
        # context = numpy.asarray([3, 2, 8, 1], dtype=numpy.int32)
        # ans3 =  numpy.asarray([2, 8, 1], dtype=numpy.int32)
        # ans1 =  numpy.asarray([1, 3, 4], dtype=numpy.int32)
        # ans2 =  numpy.asarray([1, 1, 4], dtype=numpy.int32)

        # convert probs vector to one that's the same size as vocab, with all zeros except probs:
        # probs = tensor.switch(is_candidate, probs, -1000 * tensor.ones_like(probs))
        probsPadded = tensor.zeros_like(vocab_size, dtype=numpy.float32)
        probsSubset = probsPadded[cembed] #TODO this should be masked
        b = tensor.set_subtensor(probsSubset, probs)

        # get the similarity score of each (masked) answer with the context probs:
        ans1probs = b[a1enc]
        ans1score = tensor.switch(ans1_mask, ans1probs, tensor.zeros_like(ans1probs)).sum()
        ans2probs = b[a2enc]
        ans2score = ans2probs.sum()
        ans3probs = b[a3enc]
        ans3score = ans3probs.sum()
        ans4probs = b[a4enc]
        ans4score = ans4probs.sum()

        # and pick the best one:
        allans = tensor.stacklists([ans1score, ans2score, ans3score, ans4score])
        pred = tensor.argmax(allans)

        cg = ComputationGraph([ans1probs, ans1score, ans2probs, ans2score, ans3probs, ans3score, ans4probs, ans4score, allans, pred])
        f = cg.get_theano_function()
        out = f()

        #pred = probs.argmax(axis=1)
        #print "pred"
        #print pred TODO CHANGE THIS!
        cost = Softmax().categorical_cross_entropy(answer, probs).mean()
        error_rate = tensor.neq(answer, pred).mean()

        # Apply dropout
        cg = ComputationGraph([cost, error_rate])
        if config.w_noise > 0:
            noise_vars = VariableFilter(roles=[WEIGHT])(cg)
            cg = apply_noise(cg, noise_vars, config.w_noise)
        if config.dropout > 0:
            cg = apply_dropout(cg, qhidden_list + chidden_list, config.dropout)
        [cost_reg, error_rate_reg] = cg.outputs

        # Other stuff
        cost_reg.name = cost.name = 'cost'
        error_rate_reg.name = error_rate.name = 'error_rate'


        self.probs = probs
        self.probs.name = "probs"
        self.cost = cost
        self.cost.name = "cost"
        #
        self.sgd_cost = cost_reg
        self.monitor_vars = [[cost_reg], [error_rate_reg]]
        self.monitor_vars_valid = [[cost], [error_rate]]

        # Initialize bricks
        for brick in bricks:
            brick.weights_init = config.weights_init
            brick.biases_init = config.biases_init
            brick.initialize()
Example #58
0
 def __init__(self, variable, **kwargs):
     super(SaveComputationGraph, self).__init__(**kwargs)
     variable_graph = ComputationGraph(variable)
     self.theano_function = variable_graph.get_theano_function()
Example #59
0
def train(cli_params):
    cli_params['save_dir'] = prepare_dir(cli_params['save_to'])
    logfile = os.path.join(cli_params['save_dir'], 'log.txt')

    # Log also DEBUG to a file
    fh = logging.FileHandler(filename=logfile)
    fh.setLevel(logging.DEBUG)
    logger.addHandler(fh)

    logger.info('Logging into %s' % logfile)

    p, loaded = load_and_log_params(cli_params)
    in_dim, data, whiten, cnorm = setup_data(p, test_set=False)
    if not loaded:
        # Set the zero layer to match input dimensions
        p.encoder_layers = (in_dim,) + p.encoder_layers

    ladder = setup_model(p)

    # Training
    all_params = ComputationGraph([ladder.costs.total]).parameters
    logger.info('Found the following parameters: %s' % str(all_params))

    # Fetch all batch normalization updates. They are in the clean path.
    bn_updates = ComputationGraph([ladder.costs.class_clean]).updates
    assert 'counter' in [u.name for u in bn_updates.keys()], \
        'No batch norm params in graph - the graph has been cut?'

    training_algorithm = GradientDescent(
        cost=ladder.costs.total, parameters=all_params,
        step_rule=Adam(learning_rate=ladder.lr.get_value()))
    # In addition to actual training, also do BN variable approximations
    training_algorithm.add_updates(bn_updates)

    short_prints = {
        "train": {
            'T_C_class': ladder.costs.class_corr,
            'T_C_de': ladder.costs.denois.values(),
        },
        "valid_approx": OrderedDict([
            ('V_C_class', ladder.costs.class_clean),
            ('V_E', ladder.error.clean),
            ('V_C_de', ladder.costs.denois.values()),
        ]),
        "valid_final": OrderedDict([
            ('VF_C_class', ladder.costs.class_clean),
            ('VF_E', ladder.error.clean),
            ('VF_C_de', ladder.costs.denois.values()),
        ]),
    }

    main_loop = MainLoop(
        training_algorithm,
        # Datastream used for training
        make_datastream(data.train, data.train_ind,
                        p.batch_size,
                        n_labeled=p.labeled_samples,
                        n_unlabeled=p.unlabeled_samples,
                        whiten=whiten,
                        cnorm=cnorm),
        model=Model(ladder.costs.total),
        extensions=[
            FinishAfter(after_n_epochs=p.num_epochs),

            # This will estimate the validation error using
            # running average estimates of the batch normalization
            # parameters, mean and variance
            ApproxTestMonitoring(
                [ladder.costs.class_clean, ladder.error.clean]
                + ladder.costs.denois.values(),
                make_datastream(data.valid, data.valid_ind,
                                p.valid_batch_size, whiten=whiten, cnorm=cnorm,
                                scheme=ShuffledScheme),
                prefix="valid_approx"),

            # This Monitor is slower, but more accurate since it will first
            # estimate batch normalization parameters from training data and
            # then do another pass to calculate the validation error.
            FinalTestMonitoring(
                [ladder.costs.class_clean, ladder.error.clean]
                + ladder.costs.denois.values(),
                make_datastream(data.train, data.train_ind,
                                p.batch_size,
                                n_labeled=p.labeled_samples,
                                whiten=whiten, cnorm=cnorm,
                                scheme=ShuffledScheme),
                make_datastream(data.valid, data.valid_ind,
                                p.valid_batch_size,
                                n_labeled=len(data.valid_ind),
                                whiten=whiten, cnorm=cnorm,
                                scheme=ShuffledScheme),
                prefix="valid_final",
                after_n_epochs=p.num_epochs),

            TrainingDataMonitoring(
                [ladder.costs.total, ladder.costs.class_corr,
                 training_algorithm.total_gradient_norm]
                + ladder.costs.denois.values(),
                prefix="train", after_epoch=True),

            SaveParams(None, all_params, p.save_dir, after_epoch=True),
            SaveExpParams(p, p.save_dir, before_training=True),
            SaveLog(p.save_dir, after_training=True),
            ShortPrinting(short_prints),
            LRDecay(ladder.lr, p.num_epochs * p.lrate_decay, p.num_epochs,
                    after_epoch=True),
        ])
    main_loop.run()

    # Get results
    df = DataFrame.from_dict(main_loop.log, orient='index')
    col = 'valid_final_error_rate_clean'
    logger.info('%s %g' % (col, df[col].iloc[-1]))

    if main_loop.log.status['epoch_interrupt_received']:
        return None
    return df
Example #60
0
def dump_unlabeled_encoder(cli_params):
    """
    called when dumping
    :return: inputs, result
    """
    p, _ = load_and_log_params(cli_params)
    _, data, whiten, cnorm = setup_data(p, test_set=(p.data_type == 'test'))
    ladder = setup_model(p)

    # Analyze activations
    if p.data_type == 'train':
        dset, indices, calc_batchnorm = data.train, data.train_ind, False
    elif p.data_type == 'valid':
        dset, indices, calc_batchnorm = data.valid, data.valid_ind, True
    elif p.data_type == 'test':
        dset, indices, calc_batchnorm = data.test, data.test_ind, True
    else:
        raise Exception("Unknown data-type %s"%p.data_type)

    if calc_batchnorm:
        logger.info('Calculating batch normalization for clean.labeled path')
        main_loop = DummyLoop(
            extensions=[
                FinalTestMonitoring(
                    [ladder.costs.class_clean, ladder.error.clean, ladder.oos.clean]
                    + ladder.costs.denois.values(),
                    make_datastream(data.train, data.train_ind,
                                    # These need to match with the training
                                    p.batch_size,
                                    n_labeled=p.labeled_samples,
                                    n_unlabeled=len(data.train_ind),
                                    balanced_classes=p.balanced_classes,
                                    cnorm=cnorm,
                                    whiten=whiten, scheme=ShuffledScheme),
                    make_datastream(data.valid, data.valid_ind,
                                    p.valid_batch_size,
                                    n_labeled=len(data.valid_ind),
                                    n_unlabeled=len(data.valid_ind),
                                    balanced_classes=p.balanced_classes,
                                    cnorm=cnorm,
                                    whiten=whiten, scheme=ShuffledScheme),
                    prefix="valid_final", before_training=True),
                ShortPrinting({
                    "valid_final": OrderedDict([
                        ('VF_C_class', ladder.costs.class_clean),
                        ('VF_E', ladder.error.clean),
                        ('VF_O', ladder.oos.clean),
                        ('VF_C_de', [ladder.costs.denois.get(0),
                                     ladder.costs.denois.get(1),
                                     ladder.costs.denois.get(2),
                                     ladder.costs.denois.get(3)]),
                    ]),
                }, after_training=True, use_log=False),
            ])
        main_loop.run()

    all_ind = numpy.arange(dset.num_examples)
    # Make a datastream that has all the indices in the labeled pathway
    ds = make_datastream(dset, all_ind,
                         batch_size=p.get('batch_size'),
                         n_labeled=len(all_ind),
                         n_unlabeled=len(all_ind),
                         balanced_classes=False,
                         whiten=whiten,
                         cnorm=cnorm,
                         scheme=SequentialScheme)

    # If layer=-1 we want out the values after softmax
    if p.layer < 0:
        # ladder.act.clean.unlabeled.h is a dict not a list
        outputs = ladder.act.clean.labeled.h[len(ladder.layers) + p.layer]
    else:
        outputs = ladder.act.clean.labeled.h[p.layer]

    # Replace the batch normalization paramameters with the shared variables
    if calc_batchnorm:
        outputreplacer = TestMonitoring()
        _, _,  outputs = outputreplacer._get_bn_params(outputs)

    cg = ComputationGraph(outputs)
    f = cg.get_theano_function()

    it = ds.get_epoch_iterator(as_dict=True)
    res = []

    # Loop over one epoch
    for d in it:
        # Store outputs
        res += [f(*[d[str(inp)] for inp in cg.inputs])]

    # Concatenate all minibatches
    res = [numpy.vstack(minibatches) for minibatches in zip(*res)]

    return res[0]