Beispiel #1
0
 def _compile_logprobs_computer(self):
     # This filtering should return identical variables
     # (in terms of computations) variables, and we do not care
     # which to use.
     probs = VariableFilter(
         applications=[self.generator.readout.emitter.probs],
         roles=[OUTPUT])(self.inner_cg)[0]
     logprobs = -tensor.log(probs)
     self.logprobs_computer = function(self.contexts + self.input_states,
                                       logprobs,
                                       on_unused_input='ignore')
Beispiel #2
0
def test_collect():
    x = tensor.matrix()
    mlp = MLP(activations=[Logistic(), Logistic()], dims=[784, 100, 784],
              use_bias=False)
    cost = SquaredError().apply(x, mlp.apply(x))
    cg = ComputationGraph(cost)
    var_filter = VariableFilter(roles=[PARAMETER])
    W1, W2 = var_filter(cg.variables)
    for i, W in enumerate([W1, W2]):
        W.set_value(numpy.ones_like(W.get_value()) * (i + 1))
    new_cg = collect_parameters(cg, cg.shared_variables)
    collected_params, = new_cg.shared_variables
    assert numpy.all(collected_params.get_value()[:784 * 100] == 1.)
    assert numpy.all(collected_params.get_value()[784 * 100:] == 2.)
    assert collected_params.ndim == 1
    W1, W2 = VariableFilter(roles=[COLLECTED])(new_cg.variables)
    assert W1.eval().shape == (784, 100)
    assert numpy.all(W1.eval() == 1.)
    assert W2.eval().shape == (100, 784)
    assert numpy.all(W2.eval() == 2.)
Beispiel #3
0
 def _create_model(with_dropout):
     cg = ComputationGraph(ali.compute_losses(x, z))
     if with_dropout:
         inputs = VariableFilter(
             bricks=([ali.discriminator.x_discriminator.layers[0]] +
                     ali.discriminator.x_discriminator.layers[2::3] +
                     ali.discriminator.z_discriminator.layers[::2] +
                     ali.discriminator.joint_discriminator.layers[::2]),
             roles=[INPUT])(cg.variables)
         cg = apply_dropout(cg, inputs, 0.2)
     return Model(cg.outputs)
Beispiel #4
0
 def _compile_logprobs_computer(self, givens):
     """Modified version of ``BeamSearch._compile_logprobs_computer``
     with ``givens``.
     """
     probs = VariableFilter(
         applications=[beam_search.generator.readout.emitter.probs],
         roles=[OUTPUT])(beam_search.inner_cg)[0]
     logprobs = -T.log(probs)
     self.logprobs_computer = function(
         [self.src_indices] + beam_search.input_states, 
         logprobs,
         givens=givens)
Beispiel #5
0
 def do(self, which_callback, *args, **kwargs):
     if which_callback == 'before_training':
         cg = ComputationGraph(self.main_loop.algorithm.total_step_norm)
         self._learning_rate_var, = VariableFilter(
             theano_name='learning_rate')(cg)
         logger.debug("Annealing extension is initialized")
     elif which_callback == 'after_epoch':
         logger.debug("Annealing the learning rate to {}".format(
             self._annealing_learning_rate))
         self._learning_rate_var.set_value(self._annealing_learning_rate)
     else:
         raise ValueError("don't know what to do")
Beispiel #6
0
 def tag_dropout(self, variables, rng=None, **hyperparameters):
     from blocks.roles import INPUT
     from blocks.filter import VariableFilter
     rng = util.get_rng(seed=1)
     bricks_ = [brick for brick in util.all_bricks(self.emitters)
                if isinstance(brick, bricks.Linear)]
     variables = (VariableFilter(roles=[INPUT], bricks=bricks_)
                  (theano.gof.graph.ancestors(variables)))
     graph.add_transform(
         variables,
         graph.DropoutTransform("classifier_dropout", rng=rng),
         reason="regularization")
Beispiel #7
0
def showcase(cg, output_name="tanh_apply_output", number=-1):
  import numpy
  import time
  first = True
  test_ds = get_data_stream(False)
  for image in next(test_ds.get_epoch_iterator())[0]:
    cg2 = cg.replace({cg.inputs[0]: numpy.asmatrix(image)})
    out = (VariableFilter(theano_name_regex=output_name) (cg2.variables))[number]
    plot_images(image, out.eval(), first)
    first = False
    time.sleep(1)
  plt.close()
    def __init__(self, samples):
        # Extracting information from the sampling computation graph
        self.cg = ComputationGraph(samples)
        self.inputs = self.cg.inputs
        self.generator = get_brick(samples)
        if not isinstance(self.generator, BaseSequenceGenerator):
            raise ValueError
        self.generate_call = get_application_call(samples)
        if (not self.generate_call.application == self.generator.generate):
            raise ValueError
        self.inner_cg = ComputationGraph(self.generate_call.inner_outputs)

        # Fetching names from the sequence generator
        self.context_names = self.generator.generate.contexts
        self.state_names = self.generator.generate.states

        # Parsing the inner computation graph of sampling scan
        self.contexts = [
            VariableFilter(bricks=[self.generator], name=name,
                           roles=[INPUT])(self.inner_cg)[0]
            for name in self.context_names
        ]
        self.input_states = []
        # Includes only those state names that were actually used
        # in 'generate'
        self.input_state_names = []
        for name in self.generator.generate.states:
            var = VariableFilter(bricks=[self.generator],
                                 name=name,
                                 roles=[INPUT])(self.inner_cg)
            if var:
                self.input_state_names.append(name)
                self.input_states.append(var[0])

        self.tv_overlap_name = ['tw_vocab_overlap']
        self.tv_overlap = [
            VariableFilter(bricks=[self.generator],
                           name=self.tv_overlap_name[0],
                           roles=[INPUT])(self.inner_cg)[0]
        ]
Beispiel #9
0
def build_mlp(features_car_cat, features_car_int, features_nocar_cat,
              features_nocar_int, features_cp, features_hascar, means, labels):

    mlp_car = MLP(activations=[Rectifier(), Rectifier(), None],
                  dims=[8 + 185, 200, 200, 1],
                  weights_init=IsotropicGaussian(.1),
                  biases_init=Constant(0),
                  name='mlp_interval_car')
    mlp_car.initialize()
    mlp_nocar = MLP(activations=[Rectifier(), Rectifier(), None],
                    dims=[5 + 135, 200, 200, 1],
                    weights_init=IsotropicGaussian(.1),
                    biases_init=Constant(0),
                    name='mlp_interval_nocar')
    mlp_nocar.initialize()

    feature_car = tensor.concatenate((features_car_cat, features_car_int),
                                     axis=1)
    feature_nocar = tensor.concatenate(
        (features_nocar_cat, features_nocar_int), axis=1)
    prediction = mlp_nocar.apply(feature_nocar)
    # gating with the last feature : does the dude own a car
    prediction += tensor.addbroadcast(features_hascar,
                                      1) * mlp_car.apply(feature_car)

    prediction_loc, _, _, _, = \
            build_mlp_onlyloc(features_car_cat, features_car_int,
                              features_nocar_cat, features_nocar_int,
                              features_cp, features_hascar,
                              means, labels)
    prediction += prediction_loc

    # add crm
    mlp_crm = MLP(activations=[None],
                  dims=[1, 1],
                  weights_init=IsotropicGaussian(.1),
                  biases_init=Constant(0),
                  name='mlp_crm')
    mlp_crm.initialize()
    crm = features_nocar_int[:, 0][:, None]
    prediction = prediction * mlp_crm.apply(crm)

    cost = MAPECost().apply(labels, prediction)

    cg = ComputationGraph(cost)
    input_var = VariableFilter(roles=[INPUT])(cg.variables)
    print input_var

    cg_dropout1 = apply_dropout(cg, [input_var[6], input_var[7]], .4)
    cost_dropout1 = cg_dropout1.outputs[0]

    return prediction, cost_dropout1, cg_dropout1.parameters, cost
def use_decoder_on_representations(decoder, training_representation,
                                   sampling_representation):
    punctuation_marks = tensor.lmatrix('punctuation_marks')
    punctuation_marks_mask = tensor.matrix('punctuation_marks_mask')
    cost = decoder.cost(training_representation, punctuation_marks_mask,
                        punctuation_marks, punctuation_marks_mask)

    generated = decoder.generate(sampling_representation)
    search_model = Model(generated)
    _, samples = VariableFilter(bricks=[decoder.sequence_generator],
                                name="outputs")(ComputationGraph(generated[1]))

    return cost, samples, search_model, punctuation_marks, punctuation_marks_mask
Beispiel #11
0
    def __init__(self, samples):
        # Extracting information from the sampling computation graph
        self.cg = ComputationGraph(samples)
        self.inputs = self.cg.inputs
        self.generator = get_brick(samples)
        if not isinstance(self.generator, BaseSequenceGenerator):
            raise ValueError
        self.generate_call = get_application_call(samples)
        if (not self.generate_call.application == self.generator.generate):
            raise ValueError
        self.inner_cg = ComputationGraph(self.generate_call.inner_outputs)

        # Fetching names from the sequence generator
        self.context_names = self.generator.generate.contexts
        self.state_names = self.generator.generate.states

        # WORKING: new function which returns all the outputs of the generate function as auxilliary variables
        # WORKING: keep all the outputs of the generate function on the beam, parse them at the end
        self.output_names = self.generator.generate.outputs

        # Parsing the inner computation graph of sampling scan
        self.contexts = [
            VariableFilter(bricks=[self.generator], name=name,
                           roles=[INPUT])(self.inner_cg)[0]
            for name in self.context_names
        ]
        self.input_states = []
        # Includes only those state names that were actually used
        # in 'generate'
        self.input_state_names = []
        for name in self.generator.generate.states:
            var = VariableFilter(bricks=[self.generator],
                                 name=name,
                                 roles=[INPUT])(self.inner_cg)
            if var:
                self.input_state_names.append(name)
                self.input_states.append(var[0])

        self.compiled = False
Beispiel #12
0
 def activate_masks(self, cg):
     if self.mask_dict is None:
         return {}
     outputs = VariableFilter(roles=[OUTPUT])(cg)
     replace_masks = {}
     for mask_name, mask_value in self.mask_dict.iteritems():
         if mask_name.startswith('recognizer/recognizer_'):
             mask_name = mask_name[24:]
         for output in outputs:
             if get_var_path(output).endswith(mask_name):
                 value = (np.float32(1.0) - mask_value).astype(output.dtype)
                 replace_masks[output] = output * value
     return cg.replace(replace_masks)
Beispiel #13
0
def get_batchnorm_parameters(cg):
    """ Get the parameters marked with BATCHNORM_POPULATION
    Parameters
    ---------
    cg: `blocks.graph.ComputationGraph`
        computation graph to look through

    Returns
    -------
    variables: list
        list of variables
    """
    return VariableFilter(roles=[BATCHNORM_POPULATION])(cg.auxiliary_variables)
Beispiel #14
0
    def init_beam_search(self, beam_size):
        """Compile beam search and set the beam size.

        See Blocks issue #500.

        """
        self.beam_size = beam_size
        generated = self.get_generate_graph()
        samples, = VariableFilter(applications=[self.generator.generate],
                                  name="outputs")(ComputationGraph(
                                      generated['outputs']))
        self._beam_search = BeamSearch(beam_size, samples)
        self._beam_search.compile()
Beispiel #15
0
    def train(self):
        error = tensor.neq(self.y.flatten(), self.y_hat.flatten() > 0.5).mean()
        error.name = 'error'
        self.error = error
        experiment = Experiment(self.params['model_name'], self.train_stream)
        experiment.cost = self.cost
        experiment.set_adam(self.params['learning_rate'])
        experiment.add_printing(after_epoch=True)
        experiment.monitor_f_score(self.y,
                                   self.y_hat,
                                   average='macro',
                                   threshold=self.params['threshold'])
        experiment.monitor_auc_score(self.y, self.y_hat, average='macro')
        experiment.add_timing()
        experiment.extensions.append(
            EarlyStopping('dev_f_score',
                          epochs=self.params['n_epochs'],
                          choose_best=max))
        weights = VariableFilter(theano_name='W')(experiment.cg.variables)
        experiment.regularize_max_norm(self.params['max_norms'], weights)
        experiment.apply_dropout(self.params['dropout'])
        experiment.track_best('dev_f_score',
                              save_path=self.params['model_name'] + '.tar',
                              choose_best=max)
        experiment.track_best('dev_cost',
                              save_path=self.params['model_name'] +
                              '_cost.tar')
        experiment.plot_channels(channels=[
            ['tra_f_score', 'dev_f_score'],
            ['tra_cost', 'dev_cost'],
        ],
                                 url_bokeh='http://localhost:5006/',
                                 before_first_epoch=True,
                                 after_epoch=True)
        experiment.add_monitored_vars([error])
        experiment.add_norm_grads_vars()
        experiment.monitor_stream(self.train_stream,
                                  prefix='tra',
                                  after_epoch=True)
        experiment.monitor_stream(self.dev_stream, prefix='dev')
        self.experiment = experiment

        print('# of params for the model: {0}'.format(
            experiment.get_num_params()))
        main_loop = experiment.get_main_loop()
        if not os.path.isfile(self.params['model_name'] + '.tar'):
            main_loop.run()

        with open(self.params['model_name'] + '.tar', "rb") as f:
            print('loading saved model...')
            main_loop.model.set_parameter_values(load_parameters(f))
Beispiel #16
0
def main(save_to, num_epochs):
    mlp = MLP([Tanh(), Softmax()], [784, 100, 10],
              weights_init=IsotropicGaussian(0.01),
              biases_init=Constant(0))
    mlp.initialize()
    x = tensor.matrix('features')
    y = tensor.lmatrix('targets')
    probs = mlp.apply(x)
    cost = CategoricalCrossEntropy().apply(y.flatten(), probs)
    error_rate = MisclassificationRate().apply(y.flatten(), probs)

    cg = ComputationGraph([cost])
    W1, W2 = VariableFilter(roles=[WEIGHT])(cg.variables)
    cost = cost + .00005 * (W1**2).sum() + .00005 * (W2**2).sum()
    cost.name = 'final_cost'

    mnist_train = MNIST("train")
    mnist_test = MNIST("test")

    algorithm = GradientDescent(cost=cost,
                                params=cg.parameters,
                                step_rule=Scale(learning_rate=0.1))
    main_loop = MainLoop(
        algorithm,
        DataStream(mnist_train,
                   iteration_scheme=SequentialScheme(mnist_train.num_examples,
                                                     50)),
        model=Model(cost),
        extensions=[
            Timing(),
            FinishAfter(after_n_epochs=num_epochs),
            DataStreamMonitoring([cost, error_rate],
                                 DataStream(mnist_test,
                                            iteration_scheme=SequentialScheme(
                                                mnist_test.num_examples, 500)),
                                 prefix="test"),
            TrainingDataMonitoring([
                cost, error_rate,
                aggregation.mean(algorithm.total_gradient_norm)
            ],
                                   prefix="train",
                                   after_epoch=True),
            Checkpoint(save_to),
            Plot('MNIST example',
                 channels=[[
                     'test_final_cost',
                     'test_misclassificationrate_apply_error_rate'
                 ], ['train_total_gradient_norm']]),
            Printing()
        ])
    main_loop.run()
Beispiel #17
0
 def _compile_initial_state_and_context_computer(self):
     initial_states = VariableFilter(applications=[
         self.generator.initial_states
     ],
                                     roles=[OUTPUT])(self.cg)
     outputs = OrderedDict([(v.tag.name, v) for v in initial_states])
     beam_size = unpack(
         VariableFilter(applications=[self.generator.initial_states],
                        name='batch_size')(self.cg))
     for name, context in equizip(self.context_names, self.contexts):
         outputs[name] = context
     for name, embedding in equizip(self.topical_names,
                                    self.topical_embeddings):
         outputs[name] = embedding
     for name, context in equizip(self.topical_context_names,
                                  self.topical_contexts):
         outputs[name] = context
     for name, embedding in equizip(self.content_names,
                                    self.content_embeddings):
         outputs[name] = embedding
     outputs['beam_size'] = beam_size
     self.initial_state_and_context_computer = function(
         self.inputs, outputs, on_unused_input='ignore')
Beispiel #18
0
def get_linear_transformation_roles(mlp, cg):

    D_by_layer = defaultdict(dict)

    for (role, role_str) in [(INPUT, 'input'), (OUTPUT, 'output'),
                             (WEIGHT, 'weight'), (BIAS, 'bias')]:

        for v in VariableFilter(bricks=mlp.linear_transformations,
                                roles=[role])(cg.variables):
            key = v.tag.annotations[0].name
            D_by_layer[key][role_str] = v
            #D_by_layer[key][role_str] = v

    return D_by_layer
Beispiel #19
0
    def buildObjective(self):
        """Builds the approximate objective corresponding to L_elbo in GMVAE article"""

        # self.z_prior might be the modified version
        self.L_elbo = T.mean(self.reconst + self.conditional_prior +
                             self.w_prior + self.z_prior)

        self.L_elbo_modif = T.mean(self.reconst + self.conditional_prior +
                                   self.w_prior_modif + self.z_prior_modif)

        #---Getting model parameter---#
        cg = ComputationGraph(self.L_elbo)
        #self.phi_theta is the list of all the parameters in q and p.
        self.params = VariableFilter(roles=[PARAMETER])(cg.variables)
Beispiel #20
0
    def primal_step(self, x, y, learning_rate, input_dim, p, mask=None):
        if mask is None:
            self.model = self.model(x, y, input_dim, p)
        else:
            self.model = self.model(x, y, input_dim, p, mask=mask)
        probs = self.model.create_model()
        cost = T.sum((probs - y.dimshuffle(0, 'x'))**2)
        cg = ComputationGraph([cost])

        weights = VariableFilter(roles=[WEIGHT])(cg.variables)

        updates = Adam(cost, weights)

        return updates, cost
Beispiel #21
0
def get_algorithm_parameters_dict(algorithm, model):
    name_to_var = model.get_parameter_dict()
    var_to_name = {v: k for k, v in name_to_var.items()}

    output_dict = dict()

    for val, update in algorithm.steps.items():
        cg = ComputationGraph([update])
        shared_to_save = VariableFilter(roles=[ALGORITHM_BUFFER])(cg)

        parent_name = var_to_name[val]
        for k in shared_to_save:
            output_dict[parent_name+"/"+k.name] = k
    return output_dict
Beispiel #22
0
def construct_model(input_dim, output_dim):
    # Construct the model
    r = tensor.fmatrix('r')
    x = tensor.fmatrix('x')
    y = tensor.ivector('y')

    nx = x.shape[0]
    nj = x.shape[1]  # also is r.shape[0]
    nr = r.shape[1]

    # r is nj x nr
    # x is nx x nj
    # y is nx x 1

    # r_rep is nx x nj x nr
    r_rep = r[None, :, :].repeat(axis=0, repeats=nx)
    # x3 is nx x nj x 1
    x3 = x[:, :, None]

    # concat is nx x nj x (nr + 1)
    concat = tensor.concatenate([r_rep, x3], axis=2)
    mlp_input = concat.reshape((nx * nj, nr + 1))

    # input_dim must be nr
    mlp = MLP(activations=activation_functions,
              dims=[input_dim + 1] + hidden_dims + [output_dim])

    activations = mlp.apply(mlp_input)

    act_sh = activations.reshape((nx, nj, output_dim))
    final = act_sh.mean(axis=1)

    cost = Softmax().categorical_cross_entropy(y, final).mean()

    pred = final.argmax(axis=1)
    error_rate = tensor.neq(y, pred).mean()

    # Initialize parameters
    for brick in [mlp]:
        brick.weights_init = IsotropicGaussian(0.01)
        brick.biases_init = Constant(0.001)
        brick.initialize()

    # apply noise
    cg = ComputationGraph([cost, error_rate])
    noise_vars = VariableFilter(roles=[WEIGHT])(cg)
    apply_noise(cg, noise_vars, noise_std)
    [cost_reg, error_rate_reg] = cg.outputs

    return cost_reg, error_rate_reg, cost, error_rate
Beispiel #23
0
    def do(self, callback_name, *args):
        current_value = self.main_loop.log.current_row.get(self.track_var)
        if current_value is None:
            return

        if current_value < self.best_value - self.epsilon:
            self.best_value = current_value
            self.counter = 0
            # self.iteration_state = copy.deepcopy(self.main_loop.iteration_state)
            self.log = copy.deepcopy(self.main_loop.log)
            self.parameter_values = self.main_loop.model.get_parameter_values()
        else:
            self.counter += 1

        # If nan, skip steps to go back.
        if math.isnan(current_value):
            self.counter = self.patience + 1

        if self.algorithm_buffers is None:
            self.algorithm_buffers = [
                x for x, y in self.main_loop.algorithm.step_rule_updates
            ]
            self.algorithm_buffers = VariableFilter(roles=[ALGORITHM_BUFFER])(
                self.algorithm_buffers)
            # self.algorithm_values = [x.get_value() for x in self.algorithm_buffers]

        if self.counter > self.patience:
            self.counter = 0
            # self.main_loop.iteration_state = self.iteration_state
            #self.main_loop.log = self.log
            self.main_loop.model.set_parameter_values(self.parameter_values)

            # Reset algorithm buffer
            for var in self.algorithm_buffers:
                var_value = var.get_value()
                var.set_value(
                    numpy.zeros(var_value.shape, dtype=var_value.dtype))

            # Reset states
            for var in self.states:
                var_value = var.get_value()
                var.set_value(
                    numpy.zeros(var_value.shape, dtype=var_value.dtype))

            self.lr.set_value(float(0.5 * self.lr.get_value()))

            if self.lr.get_value() < self.tolerance:
                self.main_loop.log.current_row[
                    'training_finish_requested'] = True
Beispiel #24
0
    def create_computation_graph():
        # Encode
        phi = encoder_mlp.apply(encoder_convnet.apply(x).flatten(ndim=2))
        nlat = encoder_mlp.output_dim // 2
        mu_phi = phi[:, :nlat]
        log_sigma_phi = phi[:, nlat:]
        # Sample from the approximate posterior
        epsilon = random_brick.theano_rng.normal(size=mu_phi.shape,
                                                 dtype=mu_phi.dtype)
        z = mu_phi + epsilon * tensor.exp(log_sigma_phi)
        # Decode
        mu_theta = decoder_convnet.apply(
            decoder_mlp.apply(z).reshape((-1, ) +
                                         decoder_convnet.get_dim('input_')))
        log_sigma = log_sigma_theta.dimshuffle('x', 0, 1, 2)

        # Compute KL and reconstruction terms
        kl_term = 0.5 * (tensor.exp(2 * log_sigma_phi) + mu_phi**2 -
                         2 * log_sigma_phi - 1).sum(axis=1)
        reconstruction_term = -0.5 * (
            tensor.log(2 * pi) + 2 * log_sigma +
            (x - mu_theta)**2 / tensor.exp(2 * log_sigma)).sum(axis=[1, 2, 3])
        total_reconstruction_term = reconstruction_term

        if discriminative_regularization:
            # Propagate both the input and the reconstruction through the
            # classifier
            acts_cg = ComputationGraph([classifier_convnet.apply(x)])
            acts_hat_cg = ComputationGraph(
                [classifier_convnet.apply(mu_theta)])

            # Retrieve activations of interest and compute discriminative
            # regularization reconstruction terms
            for layer, log_sigma in zip(classifier_convnet.layers[4::6],
                                        variance_parameters[1:]):
                variable_filter = VariableFilter(roles=[OUTPUT],
                                                 bricks=[layer])
                d, = variable_filter(acts_cg)
                d_hat, = variable_filter(acts_hat_cg)
                log_sigma = log_sigma.dimshuffle('x', 0, 1, 2)

                total_reconstruction_term += -0.5 * (
                    tensor.log(2 * pi) + 2 * log_sigma +
                    (d - d_hat)**2 / tensor.exp(2 * log_sigma)).sum(
                        axis=[1, 2, 3])

        cost = (kl_term - total_reconstruction_term).mean()

        return ComputationGraph([cost, kl_term, reconstruction_term])
Beispiel #25
0
def main(save_to, num_epochs, batch_size):
    mlp = MLP([Tanh(), Tanh(), Tanh(), Softmax()], [3072, 4096, 1024, 512, 10],
              weights_init=IsotropicGaussian(0.01),
              biases_init=Constant(0))
    mlp.initialize()
    x = tt.tensor4('features', dtype='float32')
    y = tt.vector('label', dtype='int32')

    probs = mlp.apply(x.reshape((-1, 3072)))
    cost = CategoricalCrossEntropy().apply(y, probs)
    error_rate = MisclassificationRate().apply(y, probs)

    cg = ComputationGraph([cost])
    ws = VariableFilter(roles=[WEIGHT])(cg.variables)
    cost = cost + .00005 * sum(([(w**2).sum() for w in ws]))
    cost.name = 'final_cost'

    train_dataset = Cifar10Dataset(data_dir='/home/belohlavek/data/cifar10',
                                   is_train=True)
    valid_dataset = Cifar10Dataset(data_dir='/home/belohlavek/data/cifar10',
                                   is_train=False)

    train_stream = train_dataset.get_stream(batch_size)
    valid_stream = valid_dataset.get_stream(batch_size)

    algorithm = GradientDescent(cost=cost,
                                parameters=cg.parameters,
                                step_rule=Adam(learning_rate=0.001))
    extensions = [
        Timing(),
        LogExtension('/home/belohlavek/ALI/mlp.log'),
        FinishAfter(after_n_epochs=num_epochs),
        DataStreamMonitoring([cost, error_rate], valid_stream, prefix="test"),
        TrainingDataMonitoring([
            cost, error_rate,
            aggregation.mean(algorithm.total_gradient_norm)
        ],
                               prefix="train",
                               after_epoch=True),
        Checkpoint(save_to),
        Printing()
    ]

    main_loop = MainLoop(algorithm,
                         train_stream,
                         model=Model(cost),
                         extensions=extensions)

    main_loop.run()
Beispiel #26
0
    def analyze(self, inputs, groundtruth, prediction=None):
        """Compute cost and aligment."""

        input_values_dict = dict(inputs)
        input_values_dict['groundtruth'] = groundtruth
        if prediction is not None:
            input_values_dict['prediction'] = prediction
        if not hasattr(self, "_analyze"):
            input_variables = list(self.single_inputs.values())
            input_variables.append(self.single_labels.copy(name='groundtruth'))

            prediction_variable = tensor.lvector('prediction')
            if prediction is not None:
                input_variables.append(prediction_variable)
                cg = self.get_cost_graph(batch=False,
                                         prediction=prediction_variable[:,
                                                                        None])
            else:
                cg = self.get_cost_graph(batch=False)
            cost = cg.outputs[0]

            weights, = VariableFilter(bricks=[self.generator],
                                      name="weights")(cg)

            energies = VariableFilter(bricks=[self.generator],
                                      name="energies")(cg)
            energies_output = [
                energies[0][:,
                            0, :] if energies else tensor.zeros_like(weights)
            ]

            self._analyze = theano.function(input_variables,
                                            [cost[:, 0], weights[:, 0, :]] +
                                            energies_output,
                                            on_unused_input='warn')
        return self._analyze(**input_values_dict)
Beispiel #27
0
def load_params_and_get_beam_search(exp_config):

    encoder = BidirectionalEncoder(exp_config['src_vocab_size'],
                                   exp_config['enc_embed'],
                                   exp_config['enc_nhids'])

    # let user specify the target transition class name in config,
    # eval it and pass to decoder
    target_transition_name = exp_config.get(
        'target_transition', 'GRUInitialStateWithInitialStateSumContext')
    target_transition = eval(target_transition_name)

    decoder = InitialContextDecoder(exp_config['trg_vocab_size'],
                                    exp_config['dec_embed'],
                                    exp_config['dec_nhids'],
                                    exp_config['enc_nhids'] * 2,
                                    exp_config['context_dim'],
                                    target_transition)

    # Create Theano variables
    logger.info('Creating theano variables')
    sampling_input = tensor.lmatrix('source')
    sampling_context = tensor.matrix('context_input')

    logger.info("Building sampling model")
    sampling_representation = encoder.apply(sampling_input,
                                            tensor.ones(sampling_input.shape))

    generated = decoder.generate(sampling_input, sampling_representation,
                                 sampling_context)
    _, samples = VariableFilter(
        bricks=[decoder.sequence_generator], name="outputs")(ComputationGraph(
            generated[1]))  # generated[1] is next_outputs

    beam_search = BeamSearch(samples=samples)

    # Set the parameters
    logger.info("Creating Model...")
    model = Model(generated)
    logger.info("Loading parameters from model: {}".format(
        exp_config['saved_parameters']))

    # load the parameter values from an .npz file
    param_values = LoadNMT.load_parameter_values(
        exp_config['saved_parameters'])
    LoadNMT.set_model_parameters(model, param_values)

    return beam_search, sampling_input, sampling_context
Beispiel #28
0
def test_variable_filter():
    # Creating computation graph
    brick1 = Linear(input_dim=2, output_dim=2, name='linear1')
    brick2 = Bias(2, name='bias1')
    activation = Sigmoid(name='sigm')

    x = tensor.vector()
    h1 = brick1.apply(x)
    h2 = activation.apply(h1)
    y = brick2.apply(h2)
    cg = ComputationGraph(y)

    parameters = [brick1.W, brick1.b, brick2.params[0]]
    bias = [brick1.b, brick2.params[0]]
    brick1_bias = [brick1.b]

    # Testing filtering by role
    role_filter = VariableFilter(roles=[PARAMETER])
    assert parameters == role_filter(cg.variables)
    role_filter = VariableFilter(roles=[FILTER])
    assert [] == role_filter(cg.variables)

    # Testing filtering by role using each_role flag
    role_filter = VariableFilter(roles=[PARAMETER, BIAS])
    assert parameters == role_filter(cg.variables)
    role_filter = VariableFilter(roles=[PARAMETER, BIAS], each_role=True)
    assert not parameters == role_filter(cg.variables)
    assert bias == role_filter(cg.variables)

    # Testing filtering by bricks classes
    brick_filter = VariableFilter(roles=[BIAS], bricks=[Linear])
    assert brick1_bias == brick_filter(cg.variables)

    # Testing filtering by bricks instances
    brick_filter = VariableFilter(roles=[BIAS], bricks=[brick1])
    assert brick1_bias == brick_filter(cg.variables)

    # Testing filtering by name
    name_filter = VariableFilter(name='W_norm')
    assert [cg.variables[2]] == name_filter(cg.variables)

    # Testing filtering by application
    appli_filter = VariableFilter(application=brick1.apply)
    variables = [cg.variables[1], cg.variables[8]]
    assert variables == appli_filter(cg.variables)
    def init_beam_search(self, beam_size):
        """Compile beam search and set the beam size.

        See Blocks issue #500.

        """
        if hasattr(self, '_beam_search') and self.beam_size == beam_size:
            # Only recompile if the user wants a different beam size
            return
        self.beam_size = beam_size
        generated = self.get_generate_graph(use_mask=False, n_steps=3)
        cg = ComputationGraph(generated.values())
        samples, = VariableFilter(
            applications=[self.generator.generate], name="samples")(cg)
        self._beam_search = BeamSearch(beam_size, samples)
        self._beam_search.compile()
Beispiel #30
0
    def primal_step(self, x, y, learning_rate, input_dim, p, mask=None):
        if mask is None:
            self.model = self.model(x, y, input_dim, p)
        else:
            self.model = self.model(x, y, input_dim, p, mask=mask)
        cost = self.model.create_model()

        flag = T.eq(y, 1) * (self.gamma[0] * self.alpha[0] +
                             self.gamma[1] * self.beta[0]) +\
            T.eq(y, 0) * (self.gamma[0] * self.alpha[1] +
                          self.gamma[1] * self.beta[0])

        q0 = theano.shared(np.float32(0), name='q0')
        q1 = theano.shared(np.float32(0), name='q1')
        r0 = theano.shared(np.float32(0), name='r0')
        r1 = theano.shared(np.float32(0), name='r1')

        q0_temp = q0 * self.t + T.mean(
            (T.eq(y, 1) * self.alpha[0] +
             T.eq(y, 0) * self.alpha[1]).dimshuffle(0, 'x') * cost)
        q1_temp = q1 * self.t + T.mean(
            (T.eq(y, 1) * self.beta[0] + T.eq(y, 0) * self.beta[1]).dimshuffle(
                0, 'x') * cost)
        # Update r
        r0_next = (r0 * self.t + T.mean(
            T.eq(y, 1).dimshuffle(0, 'x') * cost)) * 1.0 / (self.t + 1)
        r1_next = (r1 * self.t + T.mean(
            T.eq(y, 0).dimshuffle(0, 'x') * cost)) * 1.0 / (self.t + 1)

        # Update q
        q0_next = (q0_temp - self.dual_class.dual1_fn(self.alpha)) / (self.t +
                                                                      1)
        q1_next = (q1_temp - self.dual_class.dual2_fn(self.beta)) / (self.t +
                                                                     1)

        primal_updates = [(q0, q0_next), (q1, q1_next), (r0, r0_next),
                          (r1, r1_next), (self.t, self.t + 1)]

        cost_weighed = T.mean(cost * flag.dimshuffle(0, 'x'))
        cg = ComputationGraph([cost_weighed])

        weights = VariableFilter(roles=[WEIGHT])(cg.variables)

        updates = Adam(cost_weighed, weights) + primal_updates

        primal_var = [[r0, r1], [q0, q1]]
        return updates, cost_weighed, cost, primal_var