def _compile_logprobs_computer(self): # This filtering should return identical variables # (in terms of computations) variables, and we do not care # which to use. probs = VariableFilter( applications=[self.generator.readout.emitter.probs], roles=[OUTPUT])(self.inner_cg)[0] logprobs = -tensor.log(probs) self.logprobs_computer = function(self.contexts + self.input_states, logprobs, on_unused_input='ignore')
def test_collect(): x = tensor.matrix() mlp = MLP(activations=[Logistic(), Logistic()], dims=[784, 100, 784], use_bias=False) cost = SquaredError().apply(x, mlp.apply(x)) cg = ComputationGraph(cost) var_filter = VariableFilter(roles=[PARAMETER]) W1, W2 = var_filter(cg.variables) for i, W in enumerate([W1, W2]): W.set_value(numpy.ones_like(W.get_value()) * (i + 1)) new_cg = collect_parameters(cg, cg.shared_variables) collected_params, = new_cg.shared_variables assert numpy.all(collected_params.get_value()[:784 * 100] == 1.) assert numpy.all(collected_params.get_value()[784 * 100:] == 2.) assert collected_params.ndim == 1 W1, W2 = VariableFilter(roles=[COLLECTED])(new_cg.variables) assert W1.eval().shape == (784, 100) assert numpy.all(W1.eval() == 1.) assert W2.eval().shape == (100, 784) assert numpy.all(W2.eval() == 2.)
def _create_model(with_dropout): cg = ComputationGraph(ali.compute_losses(x, z)) if with_dropout: inputs = VariableFilter( bricks=([ali.discriminator.x_discriminator.layers[0]] + ali.discriminator.x_discriminator.layers[2::3] + ali.discriminator.z_discriminator.layers[::2] + ali.discriminator.joint_discriminator.layers[::2]), roles=[INPUT])(cg.variables) cg = apply_dropout(cg, inputs, 0.2) return Model(cg.outputs)
def _compile_logprobs_computer(self, givens): """Modified version of ``BeamSearch._compile_logprobs_computer`` with ``givens``. """ probs = VariableFilter( applications=[beam_search.generator.readout.emitter.probs], roles=[OUTPUT])(beam_search.inner_cg)[0] logprobs = -T.log(probs) self.logprobs_computer = function( [self.src_indices] + beam_search.input_states, logprobs, givens=givens)
def do(self, which_callback, *args, **kwargs): if which_callback == 'before_training': cg = ComputationGraph(self.main_loop.algorithm.total_step_norm) self._learning_rate_var, = VariableFilter( theano_name='learning_rate')(cg) logger.debug("Annealing extension is initialized") elif which_callback == 'after_epoch': logger.debug("Annealing the learning rate to {}".format( self._annealing_learning_rate)) self._learning_rate_var.set_value(self._annealing_learning_rate) else: raise ValueError("don't know what to do")
def tag_dropout(self, variables, rng=None, **hyperparameters): from blocks.roles import INPUT from blocks.filter import VariableFilter rng = util.get_rng(seed=1) bricks_ = [brick for brick in util.all_bricks(self.emitters) if isinstance(brick, bricks.Linear)] variables = (VariableFilter(roles=[INPUT], bricks=bricks_) (theano.gof.graph.ancestors(variables))) graph.add_transform( variables, graph.DropoutTransform("classifier_dropout", rng=rng), reason="regularization")
def showcase(cg, output_name="tanh_apply_output", number=-1): import numpy import time first = True test_ds = get_data_stream(False) for image in next(test_ds.get_epoch_iterator())[0]: cg2 = cg.replace({cg.inputs[0]: numpy.asmatrix(image)}) out = (VariableFilter(theano_name_regex=output_name) (cg2.variables))[number] plot_images(image, out.eval(), first) first = False time.sleep(1) plt.close()
def __init__(self, samples): # Extracting information from the sampling computation graph self.cg = ComputationGraph(samples) self.inputs = self.cg.inputs self.generator = get_brick(samples) if not isinstance(self.generator, BaseSequenceGenerator): raise ValueError self.generate_call = get_application_call(samples) if (not self.generate_call.application == self.generator.generate): raise ValueError self.inner_cg = ComputationGraph(self.generate_call.inner_outputs) # Fetching names from the sequence generator self.context_names = self.generator.generate.contexts self.state_names = self.generator.generate.states # Parsing the inner computation graph of sampling scan self.contexts = [ VariableFilter(bricks=[self.generator], name=name, roles=[INPUT])(self.inner_cg)[0] for name in self.context_names ] self.input_states = [] # Includes only those state names that were actually used # in 'generate' self.input_state_names = [] for name in self.generator.generate.states: var = VariableFilter(bricks=[self.generator], name=name, roles=[INPUT])(self.inner_cg) if var: self.input_state_names.append(name) self.input_states.append(var[0]) self.tv_overlap_name = ['tw_vocab_overlap'] self.tv_overlap = [ VariableFilter(bricks=[self.generator], name=self.tv_overlap_name[0], roles=[INPUT])(self.inner_cg)[0] ]
def build_mlp(features_car_cat, features_car_int, features_nocar_cat, features_nocar_int, features_cp, features_hascar, means, labels): mlp_car = MLP(activations=[Rectifier(), Rectifier(), None], dims=[8 + 185, 200, 200, 1], weights_init=IsotropicGaussian(.1), biases_init=Constant(0), name='mlp_interval_car') mlp_car.initialize() mlp_nocar = MLP(activations=[Rectifier(), Rectifier(), None], dims=[5 + 135, 200, 200, 1], weights_init=IsotropicGaussian(.1), biases_init=Constant(0), name='mlp_interval_nocar') mlp_nocar.initialize() feature_car = tensor.concatenate((features_car_cat, features_car_int), axis=1) feature_nocar = tensor.concatenate( (features_nocar_cat, features_nocar_int), axis=1) prediction = mlp_nocar.apply(feature_nocar) # gating with the last feature : does the dude own a car prediction += tensor.addbroadcast(features_hascar, 1) * mlp_car.apply(feature_car) prediction_loc, _, _, _, = \ build_mlp_onlyloc(features_car_cat, features_car_int, features_nocar_cat, features_nocar_int, features_cp, features_hascar, means, labels) prediction += prediction_loc # add crm mlp_crm = MLP(activations=[None], dims=[1, 1], weights_init=IsotropicGaussian(.1), biases_init=Constant(0), name='mlp_crm') mlp_crm.initialize() crm = features_nocar_int[:, 0][:, None] prediction = prediction * mlp_crm.apply(crm) cost = MAPECost().apply(labels, prediction) cg = ComputationGraph(cost) input_var = VariableFilter(roles=[INPUT])(cg.variables) print input_var cg_dropout1 = apply_dropout(cg, [input_var[6], input_var[7]], .4) cost_dropout1 = cg_dropout1.outputs[0] return prediction, cost_dropout1, cg_dropout1.parameters, cost
def use_decoder_on_representations(decoder, training_representation, sampling_representation): punctuation_marks = tensor.lmatrix('punctuation_marks') punctuation_marks_mask = tensor.matrix('punctuation_marks_mask') cost = decoder.cost(training_representation, punctuation_marks_mask, punctuation_marks, punctuation_marks_mask) generated = decoder.generate(sampling_representation) search_model = Model(generated) _, samples = VariableFilter(bricks=[decoder.sequence_generator], name="outputs")(ComputationGraph(generated[1])) return cost, samples, search_model, punctuation_marks, punctuation_marks_mask
def __init__(self, samples): # Extracting information from the sampling computation graph self.cg = ComputationGraph(samples) self.inputs = self.cg.inputs self.generator = get_brick(samples) if not isinstance(self.generator, BaseSequenceGenerator): raise ValueError self.generate_call = get_application_call(samples) if (not self.generate_call.application == self.generator.generate): raise ValueError self.inner_cg = ComputationGraph(self.generate_call.inner_outputs) # Fetching names from the sequence generator self.context_names = self.generator.generate.contexts self.state_names = self.generator.generate.states # WORKING: new function which returns all the outputs of the generate function as auxilliary variables # WORKING: keep all the outputs of the generate function on the beam, parse them at the end self.output_names = self.generator.generate.outputs # Parsing the inner computation graph of sampling scan self.contexts = [ VariableFilter(bricks=[self.generator], name=name, roles=[INPUT])(self.inner_cg)[0] for name in self.context_names ] self.input_states = [] # Includes only those state names that were actually used # in 'generate' self.input_state_names = [] for name in self.generator.generate.states: var = VariableFilter(bricks=[self.generator], name=name, roles=[INPUT])(self.inner_cg) if var: self.input_state_names.append(name) self.input_states.append(var[0]) self.compiled = False
def activate_masks(self, cg): if self.mask_dict is None: return {} outputs = VariableFilter(roles=[OUTPUT])(cg) replace_masks = {} for mask_name, mask_value in self.mask_dict.iteritems(): if mask_name.startswith('recognizer/recognizer_'): mask_name = mask_name[24:] for output in outputs: if get_var_path(output).endswith(mask_name): value = (np.float32(1.0) - mask_value).astype(output.dtype) replace_masks[output] = output * value return cg.replace(replace_masks)
def get_batchnorm_parameters(cg): """ Get the parameters marked with BATCHNORM_POPULATION Parameters --------- cg: `blocks.graph.ComputationGraph` computation graph to look through Returns ------- variables: list list of variables """ return VariableFilter(roles=[BATCHNORM_POPULATION])(cg.auxiliary_variables)
def init_beam_search(self, beam_size): """Compile beam search and set the beam size. See Blocks issue #500. """ self.beam_size = beam_size generated = self.get_generate_graph() samples, = VariableFilter(applications=[self.generator.generate], name="outputs")(ComputationGraph( generated['outputs'])) self._beam_search = BeamSearch(beam_size, samples) self._beam_search.compile()
def train(self): error = tensor.neq(self.y.flatten(), self.y_hat.flatten() > 0.5).mean() error.name = 'error' self.error = error experiment = Experiment(self.params['model_name'], self.train_stream) experiment.cost = self.cost experiment.set_adam(self.params['learning_rate']) experiment.add_printing(after_epoch=True) experiment.monitor_f_score(self.y, self.y_hat, average='macro', threshold=self.params['threshold']) experiment.monitor_auc_score(self.y, self.y_hat, average='macro') experiment.add_timing() experiment.extensions.append( EarlyStopping('dev_f_score', epochs=self.params['n_epochs'], choose_best=max)) weights = VariableFilter(theano_name='W')(experiment.cg.variables) experiment.regularize_max_norm(self.params['max_norms'], weights) experiment.apply_dropout(self.params['dropout']) experiment.track_best('dev_f_score', save_path=self.params['model_name'] + '.tar', choose_best=max) experiment.track_best('dev_cost', save_path=self.params['model_name'] + '_cost.tar') experiment.plot_channels(channels=[ ['tra_f_score', 'dev_f_score'], ['tra_cost', 'dev_cost'], ], url_bokeh='http://localhost:5006/', before_first_epoch=True, after_epoch=True) experiment.add_monitored_vars([error]) experiment.add_norm_grads_vars() experiment.monitor_stream(self.train_stream, prefix='tra', after_epoch=True) experiment.monitor_stream(self.dev_stream, prefix='dev') self.experiment = experiment print('# of params for the model: {0}'.format( experiment.get_num_params())) main_loop = experiment.get_main_loop() if not os.path.isfile(self.params['model_name'] + '.tar'): main_loop.run() with open(self.params['model_name'] + '.tar', "rb") as f: print('loading saved model...') main_loop.model.set_parameter_values(load_parameters(f))
def main(save_to, num_epochs): mlp = MLP([Tanh(), Softmax()], [784, 100, 10], weights_init=IsotropicGaussian(0.01), biases_init=Constant(0)) mlp.initialize() x = tensor.matrix('features') y = tensor.lmatrix('targets') probs = mlp.apply(x) cost = CategoricalCrossEntropy().apply(y.flatten(), probs) error_rate = MisclassificationRate().apply(y.flatten(), probs) cg = ComputationGraph([cost]) W1, W2 = VariableFilter(roles=[WEIGHT])(cg.variables) cost = cost + .00005 * (W1**2).sum() + .00005 * (W2**2).sum() cost.name = 'final_cost' mnist_train = MNIST("train") mnist_test = MNIST("test") algorithm = GradientDescent(cost=cost, params=cg.parameters, step_rule=Scale(learning_rate=0.1)) main_loop = MainLoop( algorithm, DataStream(mnist_train, iteration_scheme=SequentialScheme(mnist_train.num_examples, 50)), model=Model(cost), extensions=[ Timing(), FinishAfter(after_n_epochs=num_epochs), DataStreamMonitoring([cost, error_rate], DataStream(mnist_test, iteration_scheme=SequentialScheme( mnist_test.num_examples, 500)), prefix="test"), TrainingDataMonitoring([ cost, error_rate, aggregation.mean(algorithm.total_gradient_norm) ], prefix="train", after_epoch=True), Checkpoint(save_to), Plot('MNIST example', channels=[[ 'test_final_cost', 'test_misclassificationrate_apply_error_rate' ], ['train_total_gradient_norm']]), Printing() ]) main_loop.run()
def _compile_initial_state_and_context_computer(self): initial_states = VariableFilter(applications=[ self.generator.initial_states ], roles=[OUTPUT])(self.cg) outputs = OrderedDict([(v.tag.name, v) for v in initial_states]) beam_size = unpack( VariableFilter(applications=[self.generator.initial_states], name='batch_size')(self.cg)) for name, context in equizip(self.context_names, self.contexts): outputs[name] = context for name, embedding in equizip(self.topical_names, self.topical_embeddings): outputs[name] = embedding for name, context in equizip(self.topical_context_names, self.topical_contexts): outputs[name] = context for name, embedding in equizip(self.content_names, self.content_embeddings): outputs[name] = embedding outputs['beam_size'] = beam_size self.initial_state_and_context_computer = function( self.inputs, outputs, on_unused_input='ignore')
def get_linear_transformation_roles(mlp, cg): D_by_layer = defaultdict(dict) for (role, role_str) in [(INPUT, 'input'), (OUTPUT, 'output'), (WEIGHT, 'weight'), (BIAS, 'bias')]: for v in VariableFilter(bricks=mlp.linear_transformations, roles=[role])(cg.variables): key = v.tag.annotations[0].name D_by_layer[key][role_str] = v #D_by_layer[key][role_str] = v return D_by_layer
def buildObjective(self): """Builds the approximate objective corresponding to L_elbo in GMVAE article""" # self.z_prior might be the modified version self.L_elbo = T.mean(self.reconst + self.conditional_prior + self.w_prior + self.z_prior) self.L_elbo_modif = T.mean(self.reconst + self.conditional_prior + self.w_prior_modif + self.z_prior_modif) #---Getting model parameter---# cg = ComputationGraph(self.L_elbo) #self.phi_theta is the list of all the parameters in q and p. self.params = VariableFilter(roles=[PARAMETER])(cg.variables)
def primal_step(self, x, y, learning_rate, input_dim, p, mask=None): if mask is None: self.model = self.model(x, y, input_dim, p) else: self.model = self.model(x, y, input_dim, p, mask=mask) probs = self.model.create_model() cost = T.sum((probs - y.dimshuffle(0, 'x'))**2) cg = ComputationGraph([cost]) weights = VariableFilter(roles=[WEIGHT])(cg.variables) updates = Adam(cost, weights) return updates, cost
def get_algorithm_parameters_dict(algorithm, model): name_to_var = model.get_parameter_dict() var_to_name = {v: k for k, v in name_to_var.items()} output_dict = dict() for val, update in algorithm.steps.items(): cg = ComputationGraph([update]) shared_to_save = VariableFilter(roles=[ALGORITHM_BUFFER])(cg) parent_name = var_to_name[val] for k in shared_to_save: output_dict[parent_name+"/"+k.name] = k return output_dict
def construct_model(input_dim, output_dim): # Construct the model r = tensor.fmatrix('r') x = tensor.fmatrix('x') y = tensor.ivector('y') nx = x.shape[0] nj = x.shape[1] # also is r.shape[0] nr = r.shape[1] # r is nj x nr # x is nx x nj # y is nx x 1 # r_rep is nx x nj x nr r_rep = r[None, :, :].repeat(axis=0, repeats=nx) # x3 is nx x nj x 1 x3 = x[:, :, None] # concat is nx x nj x (nr + 1) concat = tensor.concatenate([r_rep, x3], axis=2) mlp_input = concat.reshape((nx * nj, nr + 1)) # input_dim must be nr mlp = MLP(activations=activation_functions, dims=[input_dim + 1] + hidden_dims + [output_dim]) activations = mlp.apply(mlp_input) act_sh = activations.reshape((nx, nj, output_dim)) final = act_sh.mean(axis=1) cost = Softmax().categorical_cross_entropy(y, final).mean() pred = final.argmax(axis=1) error_rate = tensor.neq(y, pred).mean() # Initialize parameters for brick in [mlp]: brick.weights_init = IsotropicGaussian(0.01) brick.biases_init = Constant(0.001) brick.initialize() # apply noise cg = ComputationGraph([cost, error_rate]) noise_vars = VariableFilter(roles=[WEIGHT])(cg) apply_noise(cg, noise_vars, noise_std) [cost_reg, error_rate_reg] = cg.outputs return cost_reg, error_rate_reg, cost, error_rate
def do(self, callback_name, *args): current_value = self.main_loop.log.current_row.get(self.track_var) if current_value is None: return if current_value < self.best_value - self.epsilon: self.best_value = current_value self.counter = 0 # self.iteration_state = copy.deepcopy(self.main_loop.iteration_state) self.log = copy.deepcopy(self.main_loop.log) self.parameter_values = self.main_loop.model.get_parameter_values() else: self.counter += 1 # If nan, skip steps to go back. if math.isnan(current_value): self.counter = self.patience + 1 if self.algorithm_buffers is None: self.algorithm_buffers = [ x for x, y in self.main_loop.algorithm.step_rule_updates ] self.algorithm_buffers = VariableFilter(roles=[ALGORITHM_BUFFER])( self.algorithm_buffers) # self.algorithm_values = [x.get_value() for x in self.algorithm_buffers] if self.counter > self.patience: self.counter = 0 # self.main_loop.iteration_state = self.iteration_state #self.main_loop.log = self.log self.main_loop.model.set_parameter_values(self.parameter_values) # Reset algorithm buffer for var in self.algorithm_buffers: var_value = var.get_value() var.set_value( numpy.zeros(var_value.shape, dtype=var_value.dtype)) # Reset states for var in self.states: var_value = var.get_value() var.set_value( numpy.zeros(var_value.shape, dtype=var_value.dtype)) self.lr.set_value(float(0.5 * self.lr.get_value())) if self.lr.get_value() < self.tolerance: self.main_loop.log.current_row[ 'training_finish_requested'] = True
def create_computation_graph(): # Encode phi = encoder_mlp.apply(encoder_convnet.apply(x).flatten(ndim=2)) nlat = encoder_mlp.output_dim // 2 mu_phi = phi[:, :nlat] log_sigma_phi = phi[:, nlat:] # Sample from the approximate posterior epsilon = random_brick.theano_rng.normal(size=mu_phi.shape, dtype=mu_phi.dtype) z = mu_phi + epsilon * tensor.exp(log_sigma_phi) # Decode mu_theta = decoder_convnet.apply( decoder_mlp.apply(z).reshape((-1, ) + decoder_convnet.get_dim('input_'))) log_sigma = log_sigma_theta.dimshuffle('x', 0, 1, 2) # Compute KL and reconstruction terms kl_term = 0.5 * (tensor.exp(2 * log_sigma_phi) + mu_phi**2 - 2 * log_sigma_phi - 1).sum(axis=1) reconstruction_term = -0.5 * ( tensor.log(2 * pi) + 2 * log_sigma + (x - mu_theta)**2 / tensor.exp(2 * log_sigma)).sum(axis=[1, 2, 3]) total_reconstruction_term = reconstruction_term if discriminative_regularization: # Propagate both the input and the reconstruction through the # classifier acts_cg = ComputationGraph([classifier_convnet.apply(x)]) acts_hat_cg = ComputationGraph( [classifier_convnet.apply(mu_theta)]) # Retrieve activations of interest and compute discriminative # regularization reconstruction terms for layer, log_sigma in zip(classifier_convnet.layers[4::6], variance_parameters[1:]): variable_filter = VariableFilter(roles=[OUTPUT], bricks=[layer]) d, = variable_filter(acts_cg) d_hat, = variable_filter(acts_hat_cg) log_sigma = log_sigma.dimshuffle('x', 0, 1, 2) total_reconstruction_term += -0.5 * ( tensor.log(2 * pi) + 2 * log_sigma + (d - d_hat)**2 / tensor.exp(2 * log_sigma)).sum( axis=[1, 2, 3]) cost = (kl_term - total_reconstruction_term).mean() return ComputationGraph([cost, kl_term, reconstruction_term])
def main(save_to, num_epochs, batch_size): mlp = MLP([Tanh(), Tanh(), Tanh(), Softmax()], [3072, 4096, 1024, 512, 10], weights_init=IsotropicGaussian(0.01), biases_init=Constant(0)) mlp.initialize() x = tt.tensor4('features', dtype='float32') y = tt.vector('label', dtype='int32') probs = mlp.apply(x.reshape((-1, 3072))) cost = CategoricalCrossEntropy().apply(y, probs) error_rate = MisclassificationRate().apply(y, probs) cg = ComputationGraph([cost]) ws = VariableFilter(roles=[WEIGHT])(cg.variables) cost = cost + .00005 * sum(([(w**2).sum() for w in ws])) cost.name = 'final_cost' train_dataset = Cifar10Dataset(data_dir='/home/belohlavek/data/cifar10', is_train=True) valid_dataset = Cifar10Dataset(data_dir='/home/belohlavek/data/cifar10', is_train=False) train_stream = train_dataset.get_stream(batch_size) valid_stream = valid_dataset.get_stream(batch_size) algorithm = GradientDescent(cost=cost, parameters=cg.parameters, step_rule=Adam(learning_rate=0.001)) extensions = [ Timing(), LogExtension('/home/belohlavek/ALI/mlp.log'), FinishAfter(after_n_epochs=num_epochs), DataStreamMonitoring([cost, error_rate], valid_stream, prefix="test"), TrainingDataMonitoring([ cost, error_rate, aggregation.mean(algorithm.total_gradient_norm) ], prefix="train", after_epoch=True), Checkpoint(save_to), Printing() ] main_loop = MainLoop(algorithm, train_stream, model=Model(cost), extensions=extensions) main_loop.run()
def analyze(self, inputs, groundtruth, prediction=None): """Compute cost and aligment.""" input_values_dict = dict(inputs) input_values_dict['groundtruth'] = groundtruth if prediction is not None: input_values_dict['prediction'] = prediction if not hasattr(self, "_analyze"): input_variables = list(self.single_inputs.values()) input_variables.append(self.single_labels.copy(name='groundtruth')) prediction_variable = tensor.lvector('prediction') if prediction is not None: input_variables.append(prediction_variable) cg = self.get_cost_graph(batch=False, prediction=prediction_variable[:, None]) else: cg = self.get_cost_graph(batch=False) cost = cg.outputs[0] weights, = VariableFilter(bricks=[self.generator], name="weights")(cg) energies = VariableFilter(bricks=[self.generator], name="energies")(cg) energies_output = [ energies[0][:, 0, :] if energies else tensor.zeros_like(weights) ] self._analyze = theano.function(input_variables, [cost[:, 0], weights[:, 0, :]] + energies_output, on_unused_input='warn') return self._analyze(**input_values_dict)
def load_params_and_get_beam_search(exp_config): encoder = BidirectionalEncoder(exp_config['src_vocab_size'], exp_config['enc_embed'], exp_config['enc_nhids']) # let user specify the target transition class name in config, # eval it and pass to decoder target_transition_name = exp_config.get( 'target_transition', 'GRUInitialStateWithInitialStateSumContext') target_transition = eval(target_transition_name) decoder = InitialContextDecoder(exp_config['trg_vocab_size'], exp_config['dec_embed'], exp_config['dec_nhids'], exp_config['enc_nhids'] * 2, exp_config['context_dim'], target_transition) # Create Theano variables logger.info('Creating theano variables') sampling_input = tensor.lmatrix('source') sampling_context = tensor.matrix('context_input') logger.info("Building sampling model") sampling_representation = encoder.apply(sampling_input, tensor.ones(sampling_input.shape)) generated = decoder.generate(sampling_input, sampling_representation, sampling_context) _, samples = VariableFilter( bricks=[decoder.sequence_generator], name="outputs")(ComputationGraph( generated[1])) # generated[1] is next_outputs beam_search = BeamSearch(samples=samples) # Set the parameters logger.info("Creating Model...") model = Model(generated) logger.info("Loading parameters from model: {}".format( exp_config['saved_parameters'])) # load the parameter values from an .npz file param_values = LoadNMT.load_parameter_values( exp_config['saved_parameters']) LoadNMT.set_model_parameters(model, param_values) return beam_search, sampling_input, sampling_context
def test_variable_filter(): # Creating computation graph brick1 = Linear(input_dim=2, output_dim=2, name='linear1') brick2 = Bias(2, name='bias1') activation = Sigmoid(name='sigm') x = tensor.vector() h1 = brick1.apply(x) h2 = activation.apply(h1) y = brick2.apply(h2) cg = ComputationGraph(y) parameters = [brick1.W, brick1.b, brick2.params[0]] bias = [brick1.b, brick2.params[0]] brick1_bias = [brick1.b] # Testing filtering by role role_filter = VariableFilter(roles=[PARAMETER]) assert parameters == role_filter(cg.variables) role_filter = VariableFilter(roles=[FILTER]) assert [] == role_filter(cg.variables) # Testing filtering by role using each_role flag role_filter = VariableFilter(roles=[PARAMETER, BIAS]) assert parameters == role_filter(cg.variables) role_filter = VariableFilter(roles=[PARAMETER, BIAS], each_role=True) assert not parameters == role_filter(cg.variables) assert bias == role_filter(cg.variables) # Testing filtering by bricks classes brick_filter = VariableFilter(roles=[BIAS], bricks=[Linear]) assert brick1_bias == brick_filter(cg.variables) # Testing filtering by bricks instances brick_filter = VariableFilter(roles=[BIAS], bricks=[brick1]) assert brick1_bias == brick_filter(cg.variables) # Testing filtering by name name_filter = VariableFilter(name='W_norm') assert [cg.variables[2]] == name_filter(cg.variables) # Testing filtering by application appli_filter = VariableFilter(application=brick1.apply) variables = [cg.variables[1], cg.variables[8]] assert variables == appli_filter(cg.variables)
def init_beam_search(self, beam_size): """Compile beam search and set the beam size. See Blocks issue #500. """ if hasattr(self, '_beam_search') and self.beam_size == beam_size: # Only recompile if the user wants a different beam size return self.beam_size = beam_size generated = self.get_generate_graph(use_mask=False, n_steps=3) cg = ComputationGraph(generated.values()) samples, = VariableFilter( applications=[self.generator.generate], name="samples")(cg) self._beam_search = BeamSearch(beam_size, samples) self._beam_search.compile()
def primal_step(self, x, y, learning_rate, input_dim, p, mask=None): if mask is None: self.model = self.model(x, y, input_dim, p) else: self.model = self.model(x, y, input_dim, p, mask=mask) cost = self.model.create_model() flag = T.eq(y, 1) * (self.gamma[0] * self.alpha[0] + self.gamma[1] * self.beta[0]) +\ T.eq(y, 0) * (self.gamma[0] * self.alpha[1] + self.gamma[1] * self.beta[0]) q0 = theano.shared(np.float32(0), name='q0') q1 = theano.shared(np.float32(0), name='q1') r0 = theano.shared(np.float32(0), name='r0') r1 = theano.shared(np.float32(0), name='r1') q0_temp = q0 * self.t + T.mean( (T.eq(y, 1) * self.alpha[0] + T.eq(y, 0) * self.alpha[1]).dimshuffle(0, 'x') * cost) q1_temp = q1 * self.t + T.mean( (T.eq(y, 1) * self.beta[0] + T.eq(y, 0) * self.beta[1]).dimshuffle( 0, 'x') * cost) # Update r r0_next = (r0 * self.t + T.mean( T.eq(y, 1).dimshuffle(0, 'x') * cost)) * 1.0 / (self.t + 1) r1_next = (r1 * self.t + T.mean( T.eq(y, 0).dimshuffle(0, 'x') * cost)) * 1.0 / (self.t + 1) # Update q q0_next = (q0_temp - self.dual_class.dual1_fn(self.alpha)) / (self.t + 1) q1_next = (q1_temp - self.dual_class.dual2_fn(self.beta)) / (self.t + 1) primal_updates = [(q0, q0_next), (q1, q1_next), (r0, r0_next), (r1, r1_next), (self.t, self.t + 1)] cost_weighed = T.mean(cost * flag.dimshuffle(0, 'x')) cg = ComputationGraph([cost_weighed]) weights = VariableFilter(roles=[WEIGHT])(cg.variables) updates = Adam(cost_weighed, weights) + primal_updates primal_var = [[r0, r1], [q0, q1]] return updates, cost_weighed, cost, primal_var