def init_stats_computer(self, what, **kwargs): if what == 'grad': names, generator = self.generate_grad_stats(**kwargs) elif what == 'activation': names, generator = self.generate_activation_stats(**kwargs) else: raise Exception('Unknown stats computer {}'.format(what)) cg = ComputationGraph(generator) self.stat_functs = cg.get_theano_function() self.stat_functs_inputs = [inp.name for inp in cg.inputs] self.stat_names = names
def __init__(self, data_stream, variables, path=None, **kwargs): self.data_stream = data_stream self.variables = variables self.path = path self.prediction = None kwargs.setdefault('after_training', True) super(PredictDataStream, self).__init__(**kwargs) cg = ComputationGraph(variables) self.theano_function = cg.get_theano_function()
def __init__(self, data_stream, variables, path=None, **kwargs): self.data_stream = data_stream self.variables = variables self.path = path self.prediction = None kwargs.setdefault("after_training", True) super(PredictDataStream, self).__init__(**kwargs) cg = ComputationGraph(variables) self.theano_function = cg.get_theano_function()
def __init__(self, data_stream, output_tensor, path, **kwargs): self.data_stream = data_stream self.output_tensor = output_tensor self.prediction = None self.path = path kwargs.setdefault('before_training', True) super(PredictDataStream, self).__init__(**kwargs) cg1 = ComputationGraph(output_tensor) self.theano_function = cg1.get_theano_function( on_unused_input='ignore') self.iter = 0
def __init__(self, generator, steps=320, n_samples = 10, mean_data = 0, std_data = 1, sample_rate = 8000, save_name = "sample_", **kwargs): super(Speak, self).__init__(**kwargs) steps = 300 sample = ComputationGraph(generator.generate(n_steps=steps, batch_size=n_samples, iterate=True)) self.sample_fn = sample.get_theano_function() self.mean_data = mean_data self.std_data = std_data self.sample_rate = sample_rate self.save_name = save_name
def init_beam_search(self, beam_size): """Compile beam search and set the beam size. See Blocks issue #500. """ if hasattr(self, 'search_function'): # Only recompile if the user wants a different beam size return generated = self.get_generate_graph(use_mask=True) cg = ComputationGraph(nestedDictionaryValues(generated)) cg = self.activate_masks(cg) self.search_function_inputs = [x.name for x in cg.inputs] self.search_function_pos_outputs = [x.name for x in cg.outputs[3:]] self.search_function = cg.get_theano_function()
def __init__(self, data_stream, variables, path=None, **kwargs): self.data_stream = data_stream self.variables = variables # for zip(var, var1) in self.variables, variables # var.name = var1.name #print (var.name for var in variables) #print "varnames ^" #self.variables.name = variables.name self.path = path self.prediction = None kwargs.setdefault('after_training', True) super(PredictDataStream, self).__init__(**kwargs) cg = ComputationGraph(variables) self.theano_function = cg.get_theano_function()
def __init__(self, model_path='vgg.tar', synset_words='synset_words.txt'): self.vgg_net = VGGNet() x = theano.tensor.tensor4('x') y_hat = self.vgg_net.apply(x) cg = ComputationGraph(y_hat) self.model = Model(y_hat) with open(model_path, 'rb') as f: self.model.set_parameter_values(load_parameters(f)) with open(synset_words) as f: self.classes = numpy.array(f.read().splitlines()) self.predict = cg.get_theano_function() fc15 = VariableFilter( theano_name_regex='fc_15_apply_output')(cg.variables)[0] self.fe_extractor = ComputationGraph(fc15).get_theano_function()
def __init__(self, generator, steps=320, n_samples=10, mean_data=0, std_data=1, sample_rate=8000, save_name="sample_", **kwargs): super(Speak, self).__init__(**kwargs) steps = 300 sample = ComputationGraph( generator.generate(n_steps=steps, batch_size=n_samples, iterate=True)) self.sample_fn = sample.get_theano_function() self.mean_data = mean_data self.std_data = std_data self.sample_rate = sample_rate self.save_name = save_name
def test_batchnorm_rolling(): layer = BatchNormalization(input_dim=5, rolling_accumulate=True) layer.initialize() x = T.matrix() x_val = np.ones((6, 5), dtype=theano.config.floatX) x_val[0, 0] = 10.0 y = layer.apply(x) cg = ComputationGraph([y]) _func = cg.get_theano_function() for i in range(100): ret = _func(x_val) u = layer.u.get_value() assert_allclose(u[0], 1.58491838) assert_allclose(u[1], 0.6339674) s = layer.s.get_value() assert_allclose(s[0], 7.13214684) assert_allclose(s[1], 0.)
def test_batchnorm_rolling(): layer = BatchNormalization( input_dim = 5, rolling_accumulate=True) layer.initialize() x = T.matrix() x_val = np.ones((6, 5), dtype=theano.config.floatX) x_val[0,0] = 10.0 y = layer.apply(x) cg = ComputationGraph([y]) _func = cg.get_theano_function() for i in range(100): ret = _func(x_val) u = layer.u.get_value() assert_allclose(u[0], 1.58491838) assert_allclose(u[1], 0.6339674) s = layer.s.get_value() assert_allclose(s[0], 7.13214684) assert_allclose(s[1], 0.)
class GeneratePredictions(SimpleExtension): def __init__(self, extra_generation_steps, compute_targets, compute_policy, force_generate_groundtruth, catching_up_coof, catching_up_freq, **kwargs): self.extra_generation_steps = extra_generation_steps self.compute_targets = compute_targets self.compute_policy = compute_policy self.force_generate_groundtruth = force_generate_groundtruth self.catching_up_coof = catching_up_coof self.catching_up_freq = catching_up_freq kwargs.setdefault('before_training', True) kwargs.setdefault('before_batch', True) kwargs.setdefault('after_batch', True) super(GeneratePredictions, self).__init__(**kwargs) def do(self, which_callback, *args): if which_callback == 'before_training': logger.info("Compiling prediction generator...") recognizer, = self.main_loop.model.get_top_bricks() self.trained_recognizer = recognizer self.recognizer = copy.deepcopy(recognizer) # A bit of defensive programming, because why not :) assert self.recognizer.generator.readout.compute_targets assert self.recognizer.generator.readout.trpo_coef == 0.0 assert self.recognizer.generator.readout.solve_bellman assert self.recognizer.generator.readout.epsilon == 0.0 groundtruth = self.recognizer.labels groundtruth_mask = self.recognizer.labels_mask generated = self.recognizer.get_generate_graph( n_steps=self.recognizer.labels.shape[0] + self.extra_generation_steps, return_initial_states=True, use_softmax_t=True) generation_method = self.recognizer.generator.generate if not self.force_generate_groundtruth: prediction = generated.pop('samples') prediction_mask = self.recognizer.mask_for_prediction( prediction, groundtruth_mask, self.extra_generation_steps) else: prediction = groundtruth.copy() prediction_mask = groundtruth_mask.copy() prediction.name = 'predicted_labels' prediction_mask.name = 'predicted_mask' cg = ComputationGraph(generated.values()) attended, = VariableFilter(applications=[generation_method], name='attended')(cg) attended_mask, = VariableFilter(applications=[generation_method], name='attended_mask')(cg) generated = {key: value[:-1] for key, value in generated.items()} costs = self.recognizer.generator.readout.costs( prediction=prediction, prediction_mask=prediction_mask, groundtruth=groundtruth, groundtruth_mask=groundtruth_mask, attended=attended, attended_mask=attended_mask, **generated) cost_cg = ComputationGraph(costs) value_targets, = VariableFilter(name='value_targets')(cost_cg) value_targets.name = 'value_targets' probs, = VariableFilter(name='probs')(cost_cg) probs.name = 'probs' rewards, = VariableFilter(name='rewards')(cost_cg) variables_to_compute = [prediction, prediction_mask] if self.compute_targets: logger.debug("Also compute the targets") variables_to_compute += [value_targets] if self.compute_policy: variables_to_compute += [probs] self.extended_cg = ComputationGraph(variables_to_compute) self._generate = self.extended_cg.get_theano_function() logger.info("Prediction generator compiled") params = Selector(self.recognizer).get_parameters() trained_params = Selector(self.trained_recognizer).get_parameters() if self.catching_up_freq: def get_coof(name): if isinstance(self.catching_up_coof, float): return self.catching_up_coof elif isinstance(self.catching_up_coof, list): result = None for pattern, coof in self.catching_up_coof: if re.match(pattern, name): result = coof return result else: raise ValueError updates = [] for name in params: coof = get_coof(name) logging.debug( "Catching up coefficient for {} is {}".format( name, coof)) updates.append((params[name], params[name] * (1 - coof) + trained_params[name] * coof)) # This is needed when parameters are shared between brick # and occur more than once in the list of updates. updates = dict(updates).items() self._catch_up = theano.function([], [], updates=updates) elif which_callback == 'before_batch': batch, = args generated = self._generate( * [batch[variable.name] for variable in self.extended_cg.inputs]) for variable, value in zip(self.extended_cg.outputs, generated): batch[variable.name] = value elif which_callback == 'after_batch': if (self.catching_up_freq and self.main_loop.status['iterations_done'] % self.catching_up_freq == 0): self._catch_up() else: raise ValueError("can't be called on " + which_callback)
def train_ladder(cli_params, dataset=None, save_to='results/ova_all_full'): cli_params['save_dir'] = prepare_dir(save_to) logfile = os.path.join(cli_params['save_dir'], 'log.txt') # Log also DEBUG to a file fh = logging.FileHandler(filename=logfile) fh.setLevel(logging.DEBUG) logger.addHandler(fh) logger.info('Logging into %s' % logfile) p, loaded = load_and_log_params(cli_params) ladder = setup_model(p) # Training all_params = ComputationGraph([ladder.costs.total]).parameters logger.info('Found the following parameters: %s' % str(all_params)) # Fetch all batch normalization updates. They are in the clean path. bn_updates = ComputationGraph([ladder.costs.class_clean]).updates assert 'counter' in [u.name for u in bn_updates.keys()], \ 'No batch norm params in graph - the graph has been cut?' training_algorithm = GradientDescent( cost=ladder.costs.total, params=all_params, step_rule=Adam(learning_rate=ladder.lr)) # In addition to actual training, also do BN variable approximations training_algorithm.add_updates(bn_updates) short_prints = { "train": { 'T_C_class': ladder.costs.class_corr, 'T_C_de': ladder.costs.denois.values(), }, "valid_approx": OrderedDict([ ('V_C_class', ladder.costs.class_clean), ('V_E', ladder.error.clean), ('V_C_de', ladder.costs.denois.values()), ]), "valid_final": OrderedDict([ ('VF_C_class', ladder.costs.class_clean), ('VF_E', ladder.error.clean), ('VF_C_de', ladder.costs.denois.values()), ]), } ovadataset = dataset['ovadataset'] train_indexes = dataset['train_indexes'] val_indexes = dataset['val_indexes'] main_loop = MainLoop( training_algorithm, # Datastream used for training make_datastream(ovadataset, train_indexes, p.batch_size, scheme=ShuffledScheme), model=Model(ladder.costs.total), extensions=[ FinishAfter(after_n_epochs=p.num_epochs), # This will estimate the validation error using # running average estimates of the batch normalization # parameters, mean and variance ApproxTestMonitoring( [ladder.costs.class_clean, ladder.error.clean] + ladder.costs.denois.values(), make_datastream(ovadataset, val_indexes, p.batch_size), prefix="valid_approx"), # This Monitor is slower, but more accurate since it will first # estimate batch normalization parameters from training data and # then do another pass to calculate the validation error. FinalTestMonitoring( [ladder.costs.class_clean, ladder.error.clean_mc] + ladder.costs.denois.values(), make_datastream(ovadataset, train_indexes, p.batch_size), make_datastream(ovadataset, val_indexes, p.batch_size), prefix="valid_final", after_n_epochs=p.num_epochs), TrainingDataMonitoring([ ladder.costs.total, ladder.costs.class_corr, training_algorithm.total_gradient_norm ] + ladder.costs.denois.values(), prefix="train", after_epoch=True), ShortPrinting(short_prints), LRDecay(ladder.lr, p.num_epochs * p.lrate_decay, p.num_epochs, after_epoch=True), ]) main_loop.run() # Get results df = main_loop.log.to_dataframe() col = 'valid_final_error_matrix_cost' logger.info('%s %g' % (col, df[col].iloc[-1])) ds = make_datastream(ovadataset, val_indexes, p.batch_size) outputs = ladder.act.clean.labeled.h[len(ladder.layers) - 1] outputreplacer = TestMonitoring() _, _, outputs = outputreplacer._get_bn_params(outputs) cg = ComputationGraph(outputs) f = cg.get_theano_function() it = ds.get_epoch_iterator(as_dict=True) res = [] inputs = { 'features_labeled': [], 'targets_labeled': [], 'features_unlabeled': [] } # Loop over one epoch for d in it: # Store all inputs for k, v in d.iteritems(): inputs[k] += [v] # Store outputs res += [f(*[d[str(inp)] for inp in cg.inputs])] # Concatenate all minibatches res = [numpy.vstack(minibatches) for minibatches in zip(*res)] inputs = {k: numpy.vstack(v) for k, v in inputs.iteritems()} if main_loop.log.status['epoch_interrupt_received']: return None return res[0], inputs
def dump_unlabeled_encoder(cli_params): """ called when dumping :return: inputs, result """ p, _ = load_and_log_params(cli_params) _, data, whiten, cnorm = setup_data(p, test_set=(p.data_type == 'test')) ladder = setup_model(p) # Analyze activations if p.data_type == 'train': dset, indices, calc_batchnorm = data.train, data.train_ind, False elif p.data_type == 'valid': dset, indices, calc_batchnorm = data.valid, data.valid_ind, True elif p.data_type == 'test': dset, indices, calc_batchnorm = data.test, data.test_ind, True else: raise Exception("Unknown data-type %s" % p.data_type) if calc_batchnorm: logger.info('Calculating batch normalization for clean.labeled path') main_loop = DummyLoop(extensions=[ FinalTestMonitoring( [ ladder.costs.class_clean, ladder.error.clean, ladder.oos.clean ] + ladder.costs.denois.values(), make_datastream( data.train, data.train_ind, # These need to match with the training p.batch_size, n_labeled=p.labeled_samples, n_unlabeled=len(data.train_ind), balanced_classes=p.balanced_classes, cnorm=cnorm, whiten=whiten, scheme=ShuffledScheme), make_datastream(data.valid, data.valid_ind, p.valid_batch_size, n_labeled=len(data.valid_ind), n_unlabeled=len(data.valid_ind), balanced_classes=p.balanced_classes, cnorm=cnorm, whiten=whiten, scheme=ShuffledScheme), prefix="valid_final", before_training=True), ShortPrinting( { "valid_final": OrderedDict([ ('VF_C_class', ladder.costs.class_clean), ('VF_E', ladder.error.clean), ('VF_O', ladder.oos.clean), ('VF_C_de', [ ladder.costs.denois.get(0), ladder.costs.denois.get(1), ladder.costs.denois.get(2), ladder.costs.denois.get(3) ]), ]), }, after_training=True, use_log=False), ]) main_loop.run() all_ind = numpy.arange(dset.num_examples) # Make a datastream that has all the indices in the labeled pathway ds = make_datastream(dset, all_ind, batch_size=p.get('batch_size'), n_labeled=len(all_ind), n_unlabeled=len(all_ind), balanced_classes=False, whiten=whiten, cnorm=cnorm, scheme=SequentialScheme) # If layer=-1 we want out the values after softmax if p.layer < 0: # ladder.act.clean.unlabeled.h is a dict not a list outputs = ladder.act.clean.labeled.h[len(ladder.layers) + p.layer] else: outputs = ladder.act.clean.labeled.h[p.layer] # Replace the batch normalization paramameters with the shared variables if calc_batchnorm: outputreplacer = TestMonitoring() _, _, outputs = outputreplacer._get_bn_params(outputs) cg = ComputationGraph(outputs) f = cg.get_theano_function() it = ds.get_epoch_iterator(as_dict=True) res = [] # Loop over one epoch for d in it: # Store outputs res += [f(*[d[str(inp)] for inp in cg.inputs])] # Concatenate all minibatches res = [numpy.vstack(minibatches) for minibatches in zip(*res)] return res[0]
def analyze(cli_params): """ called when evaluating :return: inputs, result """ p, _ = load_and_log_params(cli_params) _, data, whiten, cnorm = setup_data(p, test_set=(p.data_type == 'test')) ladder = setup_model(p) # Analyze activations if p.data_type == 'train': dset, indices, calc_batchnorm = data.train, data.train_ind, False elif p.data_type == 'valid': dset, indices, calc_batchnorm = data.valid, data.valid_ind, True elif p.data_type == 'test': dset, indices, calc_batchnorm = data.test, data.test_ind, True else: raise Exception("Unknown data-type %s" % p.data_type) if calc_batchnorm: logger.info('Calculating batch normalization for clean.labeled path') main_loop = DummyLoop(extensions=[ FinalTestMonitoring( [ ladder.costs.class_clean, ladder.error.clean, ladder.oos.clean ] + ladder.costs.denois.values(), make_datastream( data.train, data.train_ind, # These need to match with the training p.batch_size, n_labeled=p.labeled_samples, n_unlabeled=len(data.train_ind), cnorm=cnorm, balanced_classes=p.balanced_classes, whiten=whiten, scheme=ShuffledScheme), make_datastream(data.valid, data.valid_ind, p.valid_batch_size, n_labeled=len(data.valid_ind), n_unlabeled=len(data.valid_ind), balanced_classes=p.balanced_classes, cnorm=cnorm, whiten=whiten, scheme=ShuffledScheme), prefix="valid_final", before_training=True), ShortPrinting( { "valid_final": OrderedDict([ ('VF_C_class', ladder.costs.class_clean), ('VF_E', ladder.error.clean), ('VF_O', ladder.oos.clean), ('VF_C_de', [ ladder.costs.denois.get(0), ladder.costs.denois.get(1), ladder.costs.denois.get(2), ladder.costs.denois.get(3) ]), ]), }, after_training=True, use_log=False), ]) main_loop.run() # df = DataFrame.from_dict(main_loop.log, orient='index') # col = 'valid_final_error_rate_clean' # logger.info('%s %g' % (col, df[col].iloc[-1])) # Make a datastream that has all the indices in the labeled pathway ds = make_datastream(dset, indices, batch_size=p.get('batch_size'), n_labeled=len(indices), n_unlabeled=len(indices), balanced_classes=False, whiten=whiten, cnorm=cnorm, scheme=SequentialScheme) # If layer=-1 we want out the values after softmax outputs = ladder.act.clean.labeled.h[len(ladder.layers) - 1] # Replace the batch normalization paramameters with the shared variables if calc_batchnorm: outputreplacer = TestMonitoring() _, _, outputs = outputreplacer._get_bn_params(outputs) cg = ComputationGraph(outputs) f = cg.get_theano_function() it = ds.get_epoch_iterator(as_dict=True) res = [] inputs = { 'features_labeled': [], 'targets_labeled': [], 'features_unlabeled': [] } # Loop over one epoch for d in it: # Store all inputs for k, v in d.iteritems(): inputs[k] += [v] # Store outputs res += [f(*[d[str(inp)] for inp in cg.inputs])] # Concatenate all minibatches res = [numpy.vstack(minibatches) for minibatches in zip(*res)] inputs = {k: numpy.concatenate(v) for k, v in inputs.iteritems()} return inputs['targets_labeled'], res[0]
def dump_unlabeled_encoder(cli_params): """ called when dumping :return: inputs, result """ p, _ = load_and_log_params(cli_params) _, data, whiten, cnorm = setup_data(p, test_set=(p.data_type == 'test')) ladder = setup_model(p) # Analyze activations if p.data_type == 'train': dset, indices, calc_batchnorm = data.train, data.train_ind, False elif p.data_type == 'valid': dset, indices, calc_batchnorm = data.valid, data.valid_ind, True elif p.data_type == 'test': dset, indices, calc_batchnorm = data.test, data.test_ind, True else: raise Exception("Unknown data-type %s"%p.data_type) if calc_batchnorm: logger.info('Calculating batch normalization for clean.labeled path') main_loop = DummyLoop( extensions=[ FinalTestMonitoring( [ladder.costs.class_clean, ladder.error.clean, ladder.oos.clean] + ladder.costs.denois.values(), make_datastream(data.train, data.train_ind, # These need to match with the training p.batch_size, n_labeled=p.labeled_samples, n_unlabeled=len(data.train_ind), balanced_classes=p.balanced_classes, cnorm=cnorm, whiten=whiten, scheme=ShuffledScheme), make_datastream(data.valid, data.valid_ind, p.valid_batch_size, n_labeled=len(data.valid_ind), n_unlabeled=len(data.valid_ind), balanced_classes=p.balanced_classes, cnorm=cnorm, whiten=whiten, scheme=ShuffledScheme), prefix="valid_final", before_training=True), ShortPrinting({ "valid_final": OrderedDict([ ('VF_C_class', ladder.costs.class_clean), ('VF_E', ladder.error.clean), ('VF_O', ladder.oos.clean), ('VF_C_de', [ladder.costs.denois.get(0), ladder.costs.denois.get(1), ladder.costs.denois.get(2), ladder.costs.denois.get(3)]), ]), }, after_training=True, use_log=False), ]) main_loop.run() all_ind = numpy.arange(dset.num_examples) # Make a datastream that has all the indices in the labeled pathway ds = make_datastream(dset, all_ind, batch_size=p.get('batch_size'), n_labeled=len(all_ind), n_unlabeled=len(all_ind), balanced_classes=False, whiten=whiten, cnorm=cnorm, scheme=SequentialScheme) # If layer=-1 we want out the values after softmax if p.layer < 0: # ladder.act.clean.unlabeled.h is a dict not a list outputs = ladder.act.clean.labeled.h[len(ladder.layers) + p.layer] else: outputs = ladder.act.clean.labeled.h[p.layer] # Replace the batch normalization paramameters with the shared variables if calc_batchnorm: outputreplacer = TestMonitoring() _, _, outputs = outputreplacer._get_bn_params(outputs) cg = ComputationGraph(outputs) f = cg.get_theano_function() it = ds.get_epoch_iterator(as_dict=True) res = [] # Loop over one epoch for d in it: # Store outputs res += [f(*[d[str(inp)] for inp in cg.inputs])] # Concatenate all minibatches res = [numpy.vstack(minibatches) for minibatches in zip(*res)] return res[0]
def __init__(self, variable, **kwargs): super(SaveComputationGraph, self).__init__(**kwargs) variable_graph = ComputationGraph(variable) self.theano_function = variable_graph.get_theano_function()
def analyze(cli_params): p, _ = load_and_log_params(cli_params) _, data, whiten, cnorm = setup_data(p, test_set=True) ladder = setup_model(p) # Analyze activations dset, indices, calc_batchnorm = { 'train': (data.train, data.train_ind, False), 'valid': (data.valid, data.valid_ind, True), 'test': (data.test, data.test_ind, True), }[p.data_type] if calc_batchnorm: logger.info('Calculating batch normalization for clean.labeled path') main_loop = DummyLoop( extensions=[ FinalTestMonitoring( [ladder.costs.class_clean, ladder.error.clean] + ladder.costs.denois.values(), make_datastream(data.train, data.train_ind, # These need to match with the training p.batch_size, n_labeled=p.labeled_samples, n_unlabeled=len(data.train_ind), cnorm=cnorm, whiten=whiten, scheme=ShuffledScheme), make_datastream(data.valid, data.valid_ind, p.valid_batch_size, n_labeled=len(data.valid_ind), n_unlabeled=len(data.valid_ind), cnorm=cnorm, whiten=whiten, scheme=ShuffledScheme), prefix="valid_final", before_training=True), ShortPrinting({ "valid_final": OrderedDict([ ('VF_C_class', ladder.costs.class_clean), ('VF_E', ladder.error.clean), ('VF_C_de', [ladder.costs.denois.get(0), ladder.costs.denois.get(1), ladder.costs.denois.get(2), ladder.costs.denois.get(3)]), ]), }, after_training=True, use_log=False), ]) main_loop.run() # Make a datastream that has all the indices in the labeled pathway ds = make_datastream(dset, indices, batch_size=p.get('batch_size'), n_labeled=len(indices), n_unlabeled=len(indices), balanced_classes=False, whiten=whiten, cnorm=cnorm, scheme=SequentialScheme) # We want out the values after softmax outputs = ladder.act.clean.labeled.h[len(ladder.layers) - 1] # Replace the batch normalization paramameters with the shared variables if calc_batchnorm: outputreplacer = TestMonitoring() _, _, outputs = outputreplacer._get_bn_params(outputs) cg = ComputationGraph(outputs) f = cg.get_theano_function() it = ds.get_epoch_iterator(as_dict=True) res = [] inputs = {'features_labeled': [], 'targets_labeled': [], 'features_unlabeled': []} # Loop over one epoch for d in it: # Store all inputs for k, v in d.iteritems(): inputs[k] += [v] # Store outputs res += [f(*[d[str(inp)] for inp in cg.inputs])] # Concatenate all minibatches res = [numpy.vstack(minibatches) for minibatches in zip(*res)] inputs = {k: numpy.vstack(v) for k, v in inputs.iteritems()} return inputs['targets_labeled'], res[0]
def init_generate(self): generated = self.get_generate_graph(use_mask=False) cg = ComputationGraph(generated['outputs']) self._do_generate = cg.get_theano_function()
def analyze(cli_params): p, _ = load_and_log_params(cli_params) _, data = setup_data(p, test_set=True) ladder = setup_model(p) # Analyze activations dset, indices, calc_batchnorm = { 'train': (data.train, data.train_ind, False), 'valid': (data.valid, data.valid_ind, True), 'test': (data.test, data.test_ind, True), }[p.data_type] if calc_batchnorm: logger.info('Calculating batch normalization for clean.labeled path') main_loop = DummyLoop(extensions=[ FinalTestMonitoring( [ladder.costs.class_clean, ladder.error.clean] + list(ladder.costs.denois.values()), make_datastream( data.train, data.train_ind, # These need to match with the training p.batch_size, n_labeled=p.labeled_samples, n_unlabeled=len(data.train_ind), scheme=ShuffledScheme), make_datastream(data.valid, data.valid_ind, p.valid_batch_size, n_labeled=len(data.valid_ind), n_unlabeled=len(data.valid_ind), scheme=ShuffledScheme), prefix="valid_final", before_training=True), ShortPrinting( { "valid_final": OrderedDict([ ('VF_C_class', ladder.costs.class_clean), ('VF_E', ladder.error.clean), ('VF_C_de', [ ladder.costs.denois.get(0), ladder.costs.denois.get(1), ladder.costs.denois.get(2), ladder.costs.denois.get(3) ]), ]), }, after_training=True, use_log=False), ]) main_loop.run() # Make a datastream that has all the indices in the labeled pathway ds = make_datastream(dset, indices, batch_size=p.get('batch_size'), n_labeled=len(indices), n_unlabeled=len(indices), balanced_classes=False, scheme=SequentialScheme) # We want out the values after softmax outputs = ladder.act.clean.labeled.h[len(ladder.layers) - 1] # Replace the batch normalization paramameters with the shared variables if calc_batchnorm: outputreplacer = TestMonitoring() _, _, outputs = outputreplacer._get_bn_params(outputs) cg = ComputationGraph(outputs) f = cg.get_theano_function() it = ds.get_epoch_iterator(as_dict=True) res = [] inputs = { 'features_labeled': [], 'targets_labeled': [], 'features_unlabeled': [] } # Loop over one epoch for d in it: # Store all inputs for k, v in d.items(): inputs[k] += [v] # Store outputs res += [f(*[d[str(inp)] for inp in cg.inputs])] # Concatenate all minibatches res = [numpy.vstack(minibatches) for minibatches in zip(*res)] inputs = {k: numpy.vstack(v) for k, v in inputs.items()} return inputs['targets_labeled'], res[0]
post_merge = Identity(), merged_dim = dimension, name="readout") generator = SequenceGenerator( readout=readout, transition=transition, fork = Fork(['inputs'], prototype=Identity()), weights_init = initialization.Identity(1.), biases_init = initialization.Constant(0.), name="generator") generator.push_initialization_config() generator.transition.transition.weights_init = initialization.Identity(2.) generator.initialize() results = generator.generate(n_steps=n_steps, batch_size=1, iterate=True, return_initial_states = True) results_cg = ComputationGraph(results) results_tf = results_cg.get_theano_function() generated_sequence_t = results_tf()[1] generated_sequence_t.shape=(n_steps+1, dimension) print generated_sequence_t print generated_sequence
save_dir = os.environ['RESULTS_DIR'] save_dir = os.path.join(save_dir, 'blizzard/') experiment_name = "sp_only_0" main_loop = load(save_dir + "pkl/best_" + experiment_name + ".pkl") generator = main_loop.model.get_top_bricks()[0] steps = 2048 n_samples = 1 sample = ComputationGraph( generator.generate(n_steps=steps, batch_size=n_samples, iterate=True)) sample_fn = sample.get_theano_function() outputs = sample_fn()[-2] outputs = outputs * sp_std + sp_mean outputs = outputs.swapaxes(0, 1) outputs = outputs[0] print outputs.max(), outputs.min() pyplot.figure(figsize=(100, 15)) pyplot.imshow(outputs.T) pyplot.colorbar() pyplot.gca().invert_yaxis() pyplot.savefig(save_dir + "samples/best_" + experiment_name + "9.png") pyplot.close()
(discriminator_cg.outputs[0] > 0.5)[:m].sum().astype('float32')), (false_dataset, false_dataset + (discriminator_cg.outputs[0] < 0.5)[m:].sum().astype('float32'))]) generator_descent.add_updates([(gen_errors, gen_errors + (ComputationGraph(discriminated_samples).outputs[0] > 0.5).sum().astype('float32'))]) extensions = [] extensions.append(Timing(after_batch=True)) extensions.append(Checkpoint('gan.thn', every_n_batches=10000, use_cpickle=True, save_separately=['log'])) extensions.append(Printing(every_n_batches=1)) main_loop = GANMainLoop(algorithm_g=generator_descent, g_out=g_out, algorithm_d=discriminator_descent, d_out=d_out, false_generated=false_generated, false_dataset=false_dataset, generator_errors=gen_errors, data_stream=data_stream, generator=generator_cg.get_theano_function(), discriminator=discriminator_cg.get_theano_function(), k=1, noise_per_sample=100, minibatches=m, extensions=extensions, observables=observables) main_loop.run()
def init_generate(self): generated = self.get_generate_graph(use_mask=False) cg = ComputationGraph(generated['samples']) self._do_generate = cg.get_theano_function()
def _create_main_loop(self): # hyper parameters hp = self.params batch_size = hp['batch_size'] biases_init = Constant(0) batch_normalize = hp['batch_normalize'] ### Build fprop tensor5 = T.TensorType(config.floatX, (False, ) * 5) X = tensor5("images") #X = T.tensor4("images") y = T.lvector('targets') gnet_params = OrderedDict() #X_shuffled = X[:, :, :, :, [2, 1, 0]] #X_shuffled = gpu_contiguous(X.dimshuffle(0, 1, 4, 2, 3)) * 255 X = X[:, :, :, :, [2, 1, 0]] X_shuffled = X.dimshuffle((0, 1, 4, 2, 3)) * 255 X_r = X_shuffled.reshape( (X_shuffled.shape[0], X_shuffled.shape[1] * X_shuffled.shape[2], X_shuffled.shape[3], X_shuffled.shape[4])) X_r = X_r - (np.array([104, 117, 123])[None, :, None, None]).astype('float32') expressions, input_data, param = stream_layer_exp(inputs=('data', X_r), mode='rgb') res = expressions['outloss'] y_hat = res.flatten(ndim=2) import pdb pdb.set_trace() ### Build Cost cost = CategoricalCrossEntropy().apply(y, y_hat) cost = T.cast(cost, theano.config.floatX) cost.name = 'cross_entropy' y_pred = T.argmax(y_hat, axis=1) misclass = T.cast(T.mean(T.neq(y_pred, y)), theano.config.floatX) misclass.name = 'misclass' monitored_channels = [] monitored_quantities = [cost, misclass, y_hat, y_pred] model = Model(cost) training_cg = ComputationGraph(monitored_quantities) inference_cg = ComputationGraph(monitored_quantities) ### Get evaluation function #training_eval = training_cg.get_theano_function(additional_updates=bn_updates) training_eval = training_cg.get_theano_function() #inference_eval = inference_cg.get_theano_function() # Dataset test = JpegHDF5Dataset( 'test', #name='jpeg_data_flows.hdf5', load_in_memory=True) #mean = np.load(os.path.join(os.environ['UCF101'], 'mean.npy')) import pdb pdb.set_trace() ### Eval labels = np.zeros(test.num_video_examples) y_hat = np.zeros((test.num_video_examples, 101)) labels_flip = np.zeros(test.num_video_examples) y_hat_flip = np.zeros((test.num_video_examples, 101)) ### Important to shuffle list for batch normalization statistic #rng = np.random.RandomState() #examples_list = range(test.num_video_examples) #import pdb; pdb.set_trace() #rng.shuffle(examples_list) nb_frames = 1 for i in xrange(24): scheme = HDF5SeqScheme(test.video_indexes, examples=test.num_video_examples, batch_size=batch_size, f_subsample=i, nb_subsample=25, frames_per_video=nb_frames) #for crop in ['upleft', 'upright', 'downleft', 'downright', 'center']: for crop in ['center']: stream = JpegHDF5Transformer( input_size=(240, 320), crop_size=(224, 224), #input_size=(256, 342), crop_size=(224, 224), crop_type=crop, translate_labels=True, flip='noflip', nb_frames=nb_frames, data_stream=ForceFloatX( DataStream(dataset=test, iteration_scheme=scheme))) stream_flip = JpegHDF5Transformer( input_size=(240, 320), crop_size=(224, 224), #input_size=(256, 342), crop_size=(224, 224), crop_type=crop, translate_labels=True, flip='flip', nb_frames=nb_frames, data_stream=ForceFloatX( DataStream(dataset=test, iteration_scheme=scheme))) ## Do the evaluation epoch = stream.get_epoch_iterator() for j, batch in enumerate(epoch): output = training_eval(batch[0], batch[1]) # import cv2 # cv2.imshow('img', batch[0][0, 0, :, :, :]) # cv2.waitKey(160) # cv2.destroyAllWindows() #import pdb; pdb.set_trace() labels_flip[batch_size * j:batch_size * (j + 1)] = batch[1] y_hat_flip[batch_size * j:batch_size * (j + 1), :] += output[2] preds = y_hat_flip.argmax(axis=1) misclass = np.sum(labels_flip != preds) / float(len(preds)) print i, crop, "flip Misclass:", misclass epoch = stream_flip.get_epoch_iterator() for j, batch in enumerate(epoch): output = training_eval(batch[0], batch[1]) labels[batch_size * j:batch_size * (j + 1)] = batch[1] y_hat[batch_size * j:batch_size * (j + 1), :] += output[2] preds = y_hat.argmax(axis=1) misclass = np.sum(labels != preds) / float(len(preds)) print i, crop, "noflip Misclass:", misclass y_merge = y_hat + y_hat_flip preds = y_merge.argmax(axis=1) misclass = np.sum(labels != preds) / float(len(preds)) print i, crop, "avg Misclass:", misclass ### Compute misclass y_hat += y_hat_flip preds = y_hat.argmax(axis=1) misclass = np.sum(labels != preds) / float(len(preds)) print "Misclass:", misclass
# test = tensor.eq(x, s) # a = test.eval() # print a #xSub = theano.shared(x) probsPadded = tensor.zeros_like(vocab, dtype=numpy.float32) probsSubset = probsPadded[context] b = tensor.set_subtensor(probsSubset, probs) ans1probs = b[ans1] ans1score = ans1probs.sum() ans2probs = b[ans2] ans2score = ans2probs.sum() ans3probs = b[ans3] ans3score = ans3probs.sum() allans = tensor.stacklists([ans1score, ans2score, ans3score]) pred = tensor.argmax(allans) cg = ComputationGraph([ans1probs, ans1score, ans2probs, ans2score, ans3probs, ans3score, allans, pred]) f = cg.get_theano_function() out = f() # a = probsPadded.eval() # be = b.eval() # a1p = ans1probs.eval() # a1 = ans1score.eval() # print a # print be # print a1p # print a1 print out
save_dir = os.environ['RESULTS_DIR'] save_dir = os.path.join(save_dir,'blizzard/') experiment_name = "sp_only_0" main_loop = load(save_dir+"pkl/best_"+experiment_name+".pkl") generator = main_loop.model.get_top_bricks()[0] steps = 2048 n_samples = 1 sample = ComputationGraph(generator.generate(n_steps=steps, batch_size=n_samples, iterate=True)) sample_fn = sample.get_theano_function() outputs = sample_fn()[-2] outputs = outputs*sp_std + sp_mean outputs = outputs.swapaxes(0,1) outputs = outputs[0] print outputs.max(), outputs.min() pyplot.figure(figsize=(100,15)) pyplot.imshow(outputs.T) pyplot.colorbar() pyplot.gca().invert_yaxis() pyplot.savefig(save_dir+"samples/best_"+experiment_name+"9.png") pyplot.close()
def __init__(self, config, vocab_size): question = tensor.imatrix('question') # set up 32-bit integer matrices question_mask = tensor.imatrix('question_mask') context = tensor.imatrix('context') context_mask = tensor.imatrix('context_mask') answer = tensor.ivector('answer') candidates = tensor.imatrix('candidates') candidates_mask = tensor.imatrix('candidates_mask') # and the multple choice answers: ans1 = tensor.ivector('ans1') ans1_mask = tensor.ivector('ans1_mask') ans2 = tensor.ivector('ans2') ans2_mask = tensor.ivector('ans2_mask') ans3 = tensor.ivector('ans3') ans3_mask = tensor.ivector('ans3_mask') ans4 = tensor.ivector('ans4') ans4_mask = tensor.ivector('ans4_mask') bricks = [] # inverts 1st and 2nd dimensions of matrix question = question.dimshuffle(1, 0) question_mask = question_mask.dimshuffle(1, 0) context = context.dimshuffle(1, 0) context_mask = context_mask.dimshuffle(1, 0) # Embed questions and cntext embed = LookupTable(vocab_size, config.embed_size, name='question_embed') bricks.append(embed) qembed = embed.apply(question) cembed = embed.apply(context) a1embed = embed.apply(ans1) a2embed = embed.apply(ans2) a3embed = embed.apply(ans3) a4embed = embed.apply(ans4) qlstms, qhidden_list = make_bidir_lstm_stack(qembed, config.embed_size, question_mask.astype(theano.config.floatX), config.question_lstm_size, config.question_skip_connections, 'q') clstms, chidden_list = make_bidir_lstm_stack(cembed, config.embed_size, context_mask.astype(theano.config.floatX), config.ctx_lstm_size, config.ctx_skip_connections, 'ctx') bricks = bricks + qlstms + clstms # Calculate question encoding (concatenate layer1) if config.question_skip_connections: qenc_dim = 2*sum(config.question_lstm_size) qenc = tensor.concatenate([h[-1,:,:] for h in qhidden_list], axis=1) else: qenc_dim = 2*config.question_lstm_size[-1] qenc = tensor.concatenate([h[-1,:,:] for h in qhidden_list[-2:]], axis=1) qenc.name = 'qenc' # Calculate context encoding (concatenate layer1) if config.ctx_skip_connections: cenc_dim = 2*sum(config.ctx_lstm_size) cenc = tensor.concatenate(chidden_list, axis=2) else: cenc_dim = 2*config.ctx_lstm_size[-1] cenc = tensor.concatenate(chidden_list[-2:], axis=2) cenc.name = 'cenc' # Attention mechanism MLP attention_mlp = MLP(dims=config.attention_mlp_hidden + [1], activations=config.attention_mlp_activations[1:] + [Identity()], name='attention_mlp') attention_qlinear = Linear(input_dim=qenc_dim, output_dim=config.attention_mlp_hidden[0], name='attq') attention_clinear = Linear(input_dim=cenc_dim, output_dim=config.attention_mlp_hidden[0], use_bias=False, name='attc') bricks += [attention_mlp, attention_qlinear, attention_clinear] layer1 = Tanh().apply(attention_clinear.apply(cenc.reshape((cenc.shape[0]*cenc.shape[1], cenc.shape[2]))) .reshape((cenc.shape[0],cenc.shape[1],config.attention_mlp_hidden[0])) + attention_qlinear.apply(qenc)[None, :, :]) layer1.name = 'layer1' att_weights = attention_mlp.apply(layer1.reshape((layer1.shape[0]*layer1.shape[1], layer1.shape[2]))) att_weights.name = 'att_weights_0' att_weights = att_weights.reshape((layer1.shape[0], layer1.shape[1])) att_weights.name = 'att_weights' attended = tensor.sum(cenc * tensor.nnet.softmax(att_weights.T).T[:, :, None], axis=0) attended.name = 'attended' # Now we can calculate our output out_mlp = MLP(dims=[cenc_dim + qenc_dim] + config.out_mlp_hidden + [config.n_entities], activations=config.out_mlp_activations + [Identity()], name='out_mlp') bricks += [out_mlp] probs = out_mlp.apply(tensor.concatenate([attended, qenc], axis=1)) probs.name = 'probs' # not needed anymore, since we're not only looking at entities # is_candidate = tensor.eq(tensor.arange(config.n_entities, dtype='int32')[None, None, :], # tensor.switch(candidates_mask, candidates, -tensor.ones_like(candidates))[:, :, None]).sum(axis=1) # probs = tensor.switch(is_candidate, probs, -1000 * tensor.ones_like(probs)) # Calculate prediction, cost and error rate # vocab = tensor.arange(10) # probs = numpy.asarray([0, 0.8, 0, 0.2], dtype=numpy.float32) # context = numpy.asarray([3, 2, 8, 1], dtype=numpy.int32) # ans3 = numpy.asarray([2, 8, 1], dtype=numpy.int32) # ans1 = numpy.asarray([1, 3, 4], dtype=numpy.int32) # ans2 = numpy.asarray([1, 1, 4], dtype=numpy.int32) # convert probs vector to one that's the same size as vocab, with all zeros except probs: # probs = tensor.switch(is_candidate, probs, -1000 * tensor.ones_like(probs)) probsPadded = tensor.zeros_like(vocab_size, dtype=numpy.float32) probsSubset = probsPadded[cembed] #TODO this should be masked b = tensor.set_subtensor(probsSubset, probs) # get the similarity score of each (masked) answer with the context probs: ans1probs = b[a1enc] ans1score = tensor.switch(ans1_mask, ans1probs, tensor.zeros_like(ans1probs)).sum() ans2probs = b[a2enc] ans2score = ans2probs.sum() ans3probs = b[a3enc] ans3score = ans3probs.sum() ans4probs = b[a4enc] ans4score = ans4probs.sum() # and pick the best one: allans = tensor.stacklists([ans1score, ans2score, ans3score, ans4score]) pred = tensor.argmax(allans) cg = ComputationGraph([ans1probs, ans1score, ans2probs, ans2score, ans3probs, ans3score, ans4probs, ans4score, allans, pred]) f = cg.get_theano_function() out = f() #pred = probs.argmax(axis=1) #print "pred" #print pred TODO CHANGE THIS! cost = Softmax().categorical_cross_entropy(answer, probs).mean() error_rate = tensor.neq(answer, pred).mean() # Apply dropout cg = ComputationGraph([cost, error_rate]) if config.w_noise > 0: noise_vars = VariableFilter(roles=[WEIGHT])(cg) cg = apply_noise(cg, noise_vars, config.w_noise) if config.dropout > 0: cg = apply_dropout(cg, qhidden_list + chidden_list, config.dropout) [cost_reg, error_rate_reg] = cg.outputs # Other stuff cost_reg.name = cost.name = 'cost' error_rate_reg.name = error_rate.name = 'error_rate' self.probs = probs self.probs.name = "probs" self.cost = cost self.cost.name = "cost" # self.sgd_cost = cost_reg self.monitor_vars = [[cost_reg], [error_rate_reg]] self.monitor_vars_valid = [[cost], [error_rate]] # Initialize bricks for brick in bricks: brick.weights_init = config.weights_init brick.biases_init = config.biases_init brick.initialize()
def __init__(self, variable, **kwargs): super(SaveComputationGraph, self).__init__(**kwargs) variable_graph = ComputationGraph(variable) self.theano_function = variable_graph.get_theano_function()
def _create_main_loop(self): # hyper parameters hp = self.params batch_size = hp['batch_size'] biases_init = Constant(0) batch_normalize = hp['batch_normalize'] ### Build fprop tensor5 = T.TensorType(config.floatX, (False,)*5) X = tensor5("images") #X = T.tensor4("images") y = T.lvector('targets') gnet_params = OrderedDict() #X_shuffled = X[:, :, :, :, [2, 1, 0]] #X_shuffled = gpu_contiguous(X.dimshuffle(0, 1, 4, 2, 3)) * 255 X = X[:, :, :, :, [2, 1, 0]] X_shuffled = X.dimshuffle((0, 1, 4, 2, 3)) * 255 X_r = X_shuffled.reshape((X_shuffled.shape[0], X_shuffled.shape[1]*X_shuffled.shape[2], X_shuffled.shape[3], X_shuffled.shape[4])) X_r = X_r - (np.array([104, 117, 123])[None, :, None, None]).astype('float32') expressions, input_data, param = stream_layer_exp(inputs = ('data', X_r), mode='rgb') res = expressions['outloss'] y_hat = res.flatten(ndim=2) import pdb; pdb.set_trace() ### Build Cost cost = CategoricalCrossEntropy().apply(y, y_hat) cost = T.cast(cost, theano.config.floatX) cost.name = 'cross_entropy' y_pred = T.argmax(y_hat, axis=1) misclass = T.cast(T.mean(T.neq(y_pred, y)), theano.config.floatX) misclass.name = 'misclass' monitored_channels = [] monitored_quantities = [cost, misclass, y_hat, y_pred] model = Model(cost) training_cg = ComputationGraph(monitored_quantities) inference_cg = ComputationGraph(monitored_quantities) ### Get evaluation function #training_eval = training_cg.get_theano_function(additional_updates=bn_updates) training_eval = training_cg.get_theano_function() #inference_eval = inference_cg.get_theano_function() # Dataset test = JpegHDF5Dataset('test', #name='jpeg_data_flows.hdf5', load_in_memory=True) #mean = np.load(os.path.join(os.environ['UCF101'], 'mean.npy')) import pdb; pdb.set_trace() ### Eval labels = np.zeros(test.num_video_examples) y_hat = np.zeros((test.num_video_examples, 101)) labels_flip = np.zeros(test.num_video_examples) y_hat_flip = np.zeros((test.num_video_examples, 101)) ### Important to shuffle list for batch normalization statistic #rng = np.random.RandomState() #examples_list = range(test.num_video_examples) #import pdb; pdb.set_trace() #rng.shuffle(examples_list) nb_frames=1 for i in xrange(24): scheme = HDF5SeqScheme(test.video_indexes, examples=test.num_video_examples, batch_size=batch_size, f_subsample=i, nb_subsample=25, frames_per_video=nb_frames) #for crop in ['upleft', 'upright', 'downleft', 'downright', 'center']: for crop in ['center']: stream = JpegHDF5Transformer( input_size=(240, 320), crop_size=(224, 224), #input_size=(256, 342), crop_size=(224, 224), crop_type=crop, translate_labels = True, flip='noflip', nb_frames = nb_frames, data_stream=ForceFloatX(DataStream( dataset=test, iteration_scheme=scheme))) stream_flip = JpegHDF5Transformer( input_size=(240, 320), crop_size=(224, 224), #input_size=(256, 342), crop_size=(224, 224), crop_type=crop, translate_labels = True, flip='flip', nb_frames = nb_frames, data_stream=ForceFloatX(DataStream( dataset=test, iteration_scheme=scheme))) ## Do the evaluation epoch = stream.get_epoch_iterator() for j, batch in enumerate(epoch): output = training_eval(batch[0], batch[1]) # import cv2 # cv2.imshow('img', batch[0][0, 0, :, :, :]) # cv2.waitKey(160) # cv2.destroyAllWindows() #import pdb; pdb.set_trace() labels_flip[batch_size*j:batch_size*(j+1)] = batch[1] y_hat_flip[batch_size*j:batch_size*(j+1), :] += output[2] preds = y_hat_flip.argmax(axis=1) misclass = np.sum(labels_flip != preds) / float(len(preds)) print i, crop, "flip Misclass:", misclass epoch = stream_flip.get_epoch_iterator() for j, batch in enumerate(epoch): output = training_eval(batch[0], batch[1]) labels[batch_size*j:batch_size*(j+1)] = batch[1] y_hat[batch_size*j:batch_size*(j+1), :] += output[2] preds = y_hat.argmax(axis=1) misclass = np.sum(labels != preds) / float(len(preds)) print i, crop, "noflip Misclass:", misclass y_merge = y_hat + y_hat_flip preds = y_merge.argmax(axis=1) misclass = np.sum(labels != preds) / float(len(preds)) print i, crop, "avg Misclass:", misclass ### Compute misclass y_hat += y_hat_flip preds = y_hat.argmax(axis=1) misclass = np.sum(labels != preds) / float(len(preds)) print "Misclass:", misclass