def train(self): print "Loading data" datafile = self.get_datafile() nbexamples = datafile.num_examples nbexamples -= nbexamples % (self.sequence_dim * self.time_dim) train_stream = ReshapeTransformer( DataStream(dataset=datafile, iteration_scheme=ShuffledBatchChunkScheme( nbexamples, self.sequence_dim * self.time_dim)), self.sequence_dim, self.time_dim) if self.image_size is not None: train_stream = Mapping(train_stream, spec_mapping, add_sources=['spectrogram']) print "Building Theano Graph" algorithm, self.fprop = self.build_theano_functions() main_loop = MainLoop(algorithm=algorithm, data_stream=train_stream, model=self.model, extensions=[ FinishAfter(after_n_epochs=EPOCHS), TrainingDataMonitoring( [aggregation.mean(self.model.outputs[0])], prefix="train", after_epoch=True), Printing(), SaveParams(EXP_PATH + NAME, after_epoch=True) ]) main_loop.run()
def test_main_loop(): class TestDataStream(object): def __init__(self): self.epochs = self._generate_data() def _generate_data(self): def wrap_in_dicts(iterable): for x in iterable: yield dict(data=x) yield iter(wrap_in_dicts([1, 2, 3])) yield iter(wrap_in_dicts([4, 5])) yield iter(wrap_in_dicts([6, 7, 8, 9])) def get_epoch_iterator(self, as_dict): assert as_dict is True return next(self.epochs) finish_extension = FinishAfter() finish_extension.add_condition( 'after_epoch', predicate=lambda log: log.status.epochs_done == 2) main_loop = MainLoop(MockAlgorithm(), TestDataStream(), extensions=[WriteBatchExtension(), finish_extension]) main_loop.run() assert main_loop.log.status.iterations_done == 5 assert main_loop.log.status._epoch_ends == [3, 5] assert len(list(main_loop.log)) == 7 for i in range(1, 6): assert main_loop.log[i].batch == dict(data=i)
def test_shared_variable_modifier_two_params(): weights = numpy.array([-1, 1], dtype=theano.config.floatX) features = [numpy.array(f, dtype=theano.config.floatX) for f in [[1, 2], [3, 4], [5, 6]]] targets = [(weights * f).sum() for f in features] n_batches = 3 dataset = IterableDataset(dict(features=features, targets=targets)) x = tensor.vector('features') y = tensor.scalar('targets') W = shared_floatx([0, 0], name='W') cost = ((x * W).sum() - y) ** 2 cost.name = 'cost' step_rule = Scale(0.001) sgd = GradientDescent(cost=cost, params=[W], step_rule=step_rule) modifier = SharedVariableModifier( step_rule.learning_rate, lambda _, val: numpy.cast[theano.config.floatX](val * 0.2)) main_loop = MainLoop( model=None, data_stream=dataset.get_example_stream(), algorithm=sgd, extensions=[FinishAfter(after_n_epochs=1), modifier]) main_loop.run() new_value = step_rule.learning_rate.get_value() assert_allclose(new_value, 0.001 * 0.2 ** n_batches, atol=1e-5)
def main(): print("Fetching dataset...") trainset, validset, testset = load_jsb_chorales() print("Initializing model...") lstm = LstmBlocks(trainset.input_size, 100, trainset.target_size) print("Building DataStream...") dataset_train = IterableDataset({'x': trainset.inputs, 'y': trainset.targets}) dataset_valid = IterableDataset({'x': validset.inputs, 'y': validset.targets}) stream_train = DataStream(dataset=dataset_train) stream_valid = DataStream(dataset=dataset_valid) print("Build training process...") algorithm = GradientDescent(cost=lstm.cost, parameters=lstm.computation_graph.parameters, step_rule=Adam()) valid_monitor = DataStreamMonitoring(variables=[lstm.cost], data_stream=stream_valid, prefix="valid") train_monitor = TrainingDataMonitoring(variables=[lstm.cost], prefix="train", after_epoch=True) main_loop = MainLoop(data_stream=stream_train, algorithm=algorithm, extensions=[valid_monitor, train_monitor, FinishAfter(after_n_epochs=N_EPOCHS), Printing(), ProgressBar()]) print("Training...") main_loop.run()
def main(save_to, num_batches, continue_=False): mlp = MLP([Tanh(), Identity()], [1, 10, 1], weights_init=IsotropicGaussian(0.01), biases_init=Constant(0), seed=1) mlp.initialize() x = tensor.vector('numbers') y = tensor.vector('roots') cost = SquaredError().apply(y[:, None], mlp.apply(x[:, None])) cost.name = "cost" main_loop = MainLoop( GradientDescent( cost=cost, params=ComputationGraph(cost).parameters, step_rule=Scale(learning_rate=0.001)), get_data_stream(range(100)), model=Model(cost), extensions=([LoadFromDump(save_to)] if continue_ else []) + [Timing(), FinishAfter(after_n_batches=num_batches), DataStreamMonitoring( [cost], get_data_stream(range(100, 200)), prefix="test"), TrainingDataMonitoring([cost], after_epoch=True), Dump(save_to), Printing()]) main_loop.run() return main_loop
def main(save_to, num_epochs): mlp = MLP([Tanh(), Softmax()], [784, 100, 10], weights_init=IsotropicGaussian(0.01), biases_init=Constant(0)) mlp.initialize() x = tensor.matrix('features') y = tensor.lmatrix('targets') probs = mlp.apply(tensor.flatten(x, outdim=2)) cost = CategoricalCrossEntropy().apply(y.flatten(), probs) error_rate = MisclassificationRate().apply(y.flatten(), probs) cg = ComputationGraph([cost]) W1, W2 = VariableFilter(roles=[WEIGHT])(cg.variables) cost = cost + .00005 * (W1**2).sum() + .00005 * (W2**2).sum() cost.name = 'final_cost' mnist_train = MNIST(("train", )) mnist_test = MNIST(("test", )) algorithm = GradientDescent(cost=cost, parameters=cg.parameters, step_rule=Scale(learning_rate=0.1)) extensions = [ Timing(), FinishAfter(after_n_epochs=num_epochs), DataStreamMonitoring([cost, error_rate], Flatten(DataStream.default_stream( mnist_test, iteration_scheme=SequentialScheme( mnist_test.num_examples, 500)), which_sources=('features', )), prefix="test"), TrainingDataMonitoring([ cost, error_rate, aggregation.mean(algorithm.total_gradient_norm) ], prefix="train", after_epoch=True), Checkpoint(save_to), Printing() ] if BLOCKS_EXTRAS_AVAILABLE: extensions.append( Plot('MNIST example', channels=[[ 'test_final_cost', 'test_misclassificationrate_apply_error_rate' ], ['train_total_gradient_norm']])) main_loop = MainLoop(algorithm, Flatten(DataStream.default_stream( mnist_train, iteration_scheme=SequentialScheme( mnist_train.num_examples, 50)), which_sources=('features', )), model=Model(cost), extensions=extensions) main_loop.run()
def train(self): print "Loading data" datafile = self.get_datafile() nbexamples = datafile.num_examples train_stream = DataStream( dataset=datafile, iteration_scheme=OverlapSequentialScheme( nbexamples, self.time_dim)) print "Building Theano Graph" algorithm, self.fprop = self.build_theano_functions() main_loop = MainLoop( algorithm=algorithm, data_stream=train_stream, extensions=[ FinishAfter(after_n_epochs=EPOCHS), TrainingDataMonitoring( [self.model.outputs[0]], prefix="train", after_epoch=True, every_n_batches=4000), #ProgressBar(), Printing() ]) main_loop.run()
def main(save_to, num_batches, continue_=False): mlp = MLP([Tanh(), Identity()], [1, 10, 1], weights_init=IsotropicGaussian(0.01), biases_init=Constant(0), seed=1) mlp.initialize() x = tensor.vector('numbers') y = tensor.vector('roots') cost = SquaredError().apply(y[:, None], mlp.apply(x[:, None])) cost.name = "cost" main_loop = MainLoop( GradientDescent(cost=cost, params=ComputationGraph(cost).parameters, step_rule=Scale(learning_rate=0.001)), get_data_stream(range(100)), model=Model(cost), extensions=([LoadFromDump(save_to)] if continue_ else []) + [ Timing(), FinishAfter(after_n_batches=num_batches), DataStreamMonitoring( [cost], get_data_stream(range(100, 200)), prefix="test"), TrainingDataMonitoring([cost], after_epoch=True), Dump(save_to), Printing() ]) main_loop.run() return main_loop
def test_training_data_monitoring(): weights = numpy.array([-1, 1], dtype=theano.config.floatX) features = [ numpy.array(f, dtype=theano.config.floatX) for f in [[1, 2], [3, 4], [5, 6]] ] targets = [(weights * f).sum() for f in features] n_batches = 3 dataset = IterableDataset(dict(features=features, targets=targets)) x = tensor.vector('features') y = tensor.scalar('targets') W = shared_floatx([0, 0], name='W') V = shared_floatx(7, name='V') W_sum = named_copy(W.sum(), 'W_sum') cost = ((x * W).sum() - y)**2 cost.name = 'cost' class TrueCostExtension(TrainingExtension): def before_batch(self, data): self.main_loop.log.current_row['true_cost'] = (( (W.get_value() * data["features"]).sum() - data["targets"])**2) main_loop = MainLoop(model=None, data_stream=dataset.get_example_stream(), algorithm=GradientDescent(cost=cost, parameters=[W], step_rule=Scale(0.001)), extensions=[ FinishAfter(after_n_epochs=1), TrainingDataMonitoring([W_sum, cost, V], prefix="train1", after_batch=True), TrainingDataMonitoring( [aggregation.mean(W_sum), cost], prefix="train2", after_epoch=True), TrueCostExtension() ]) main_loop.run() # Check monitoring of a shared varible assert_allclose(main_loop.log.current_row['train1_V'], 7.0) for i in range(n_batches): # The ground truth is written to the log before the batch is # processed, where as the extension writes after the batch is # processed. This is why the iteration numbers differs here. assert_allclose(main_loop.log[i]['true_cost'], main_loop.log[i + 1]['train1_cost']) assert_allclose( main_loop.log[n_batches]['train2_cost'], sum([main_loop.log[i]['true_cost'] for i in range(n_batches)]) / n_batches) assert_allclose( main_loop.log[n_batches]['train2_W_sum'], sum([ main_loop.log[i]['train1_W_sum'] for i in range(1, n_batches + 1) ]) / n_batches)
def train_model(cost, train_stream, valid_stream, args): step_rule = learning_algorithm(args) cg = ComputationGraph(cost) algorithm = GradientDescent(cost=cost, step_rule=step_rule, parameters=cg.parameters) extensions = [] # Training and Validation score monitoring extensions.extend([ TrainingDataMonitoring([cost], prefix='train', every_n_batches=args.monitoring_freq), DataStreamMonitoring([cost], stream=valid_stream, prefix='valid', every_n_batches=args.monitoring_freq)] ) # Printing extensions.append(ProgressBar()) extensions.append(Printing(every_n_batches=args.monitoring_freq)) main_loop = MainLoop(model=Model(cost), data_stream=train_stream, algorithm=algorithm, extensions=extensions ) # This is where the magic happens! main_loop.run()
def main(): x = tensor.matrix("features") input_to_hidden1 = get_typical_layer(x, 784, 500) #hidden1_to_hidden2 = get_typical_layer(input_to_hidden1, 500, 300) hidden1_to_latent = get_typical_layer(input_to_hidden1, 500, 20) latent_to_hidden2 = get_typical_layer(hidden1_to_latent, 20, 500) #hidden3_to_hidden4 = get_typical_layer(latent_to_hidden3, 300, 500) hidden2_to_output = get_typical_layer(latent_to_hidden2, 500, 784, Logistic()) hidden2_to_output.name = "last_before_output" from blocks.bricks.cost import SquaredError, AbsoluteError, BinaryCrossEntropy from blocks.graph import ComputationGraph from blocks.algorithms import Adam, GradientDescent, Scale from blocks.roles import WEIGHT cost = BinaryCrossEntropy(name="error").apply(x, hidden2_to_output) cg = ComputationGraph(cost) weights = VariableFilter(roles=[WEIGHT]) (cg.variables) # cost += 0.0001 * tensor.sum(map(lambda x: (x**2).sum(), weights)) # cost.name = "regularized error" gd = GradientDescent(cost=cost, parameters=cg.parameters, step_rule=Adam()) from blocks.main_loop import MainLoop from blocks.extensions import FinishAfter, Printing, ProgressBar from blocks.extensions.monitoring import DataStreamMonitoring, TrainingDataMonitoring monitor = TrainingDataMonitoring([cost], after_epoch=True) main_loop = MainLoop(data_stream=get_data_stream(), algorithm=gd, extensions=[monitor, FinishAfter(after_n_epochs=5), ProgressBar(), Printing()]) main_loop.run() showcase(cg, "last_before_output")
def main(max_seq_length, lstm_dim, batch_size, num_batches, num_epochs): dataset_train = IterableDataset(generate_data(max_seq_length, batch_size, num_batches)) dataset_test = IterableDataset(generate_data(max_seq_length, batch_size, 100)) stream_train = DataStream(dataset=dataset_train) stream_test = DataStream(dataset=dataset_test) x = T.tensor3('x') y = T.matrix('y') # we need to provide data for the LSTM layer of size 4 * ltsm_dim, see # LSTM layer documentation for the explanation x_to_h = Linear(1, lstm_dim * 4, name='x_to_h', weights_init=IsotropicGaussian(), biases_init=Constant(0.0)) lstm = LSTM(lstm_dim, name='lstm', weights_init=IsotropicGaussian(), biases_init=Constant(0.0)) h_to_o = Linear(lstm_dim, 1, name='h_to_o', weights_init=IsotropicGaussian(), biases_init=Constant(0.0)) x_transform = x_to_h.apply(x) h, c = lstm.apply(x_transform) # only values of hidden units of the last timeframe are used for # the classification y_hat = h_to_o.apply(h[-1]) y_hat = Logistic().apply(y_hat) cost = BinaryCrossEntropy().apply(y, y_hat) cost.name = 'cost' lstm.initialize() x_to_h.initialize() h_to_o.initialize() cg = ComputationGraph(cost) algorithm = GradientDescent(cost=cost, parameters=cg.parameters, step_rule=Adam()) test_monitor = DataStreamMonitoring(variables=[cost], data_stream=stream_test, prefix="test") train_monitor = TrainingDataMonitoring(variables=[cost], prefix="train", after_epoch=True) main_loop = MainLoop(algorithm, stream_train, extensions=[test_monitor, train_monitor, FinishAfter(after_n_epochs=num_epochs), Printing(), ProgressBar()]) main_loop.run() print 'Learned weights:' for layer in (x_to_h, lstm, h_to_o): print "Layer '%s':" % layer.name for param in layer.parameters: print param.name, ': ', param.get_value() print
def do_test(with_serialization): data_stream = ContainerDataset(range(10)).get_default_stream() main_loop = MainLoop(None, data_stream, MockAlgorithm(), extensions=[FinishAfter(after_n_batches=14)]) main_loop.run() assert main_loop.log.status.iterations_done == 14 if with_serialization: string_io = BytesIO() dill.dump(main_loop, string_io, fmode=dill.CONTENTS_FMODE) string_io.seek(0) main_loop = dill.load(string_io) finish_after = unpack([ ext for ext in main_loop.extensions if isinstance(ext, FinishAfter) ], singleton=True) finish_after.add_condition( "after_batch", predicate=lambda log: log.status.iterations_done == 27) main_loop.run() assert main_loop.log.status.iterations_done == 27 assert main_loop.log.status.epochs_done == 2 for i in range(27): assert main_loop.log[i].batch == {"data": i % 10}
def run(self): self.build_extensions_list() print "Calling MainLoop" main_loop = MainLoop(data_stream=self.streams['mainloop'], algorithm=self.model.algorithm, extensions=self.extensions) main_loop.run()
def test_main_loop(): class TestDataStream(object): def __init__(self): self.epochs = self._generate_data() def _generate_data(self): def wrap_in_dicts(iterable): for x in iterable: yield dict(data=x) yield iter(wrap_in_dicts([1, 2, 3])) yield iter(wrap_in_dicts([4, 5])) yield iter(wrap_in_dicts([6, 7, 8, 9])) def get_epoch_iterator(self, as_dict): assert as_dict is True return next(self.epochs) finish_extension = FinishAfter() finish_extension.add_condition( 'after_epoch', predicate=lambda log: log.status['epochs_done'] == 2) main_loop = MainLoop(MockAlgorithm(), TestDataStream(), extensions=[WriteBatchExtension(), finish_extension]) main_loop.run() assert main_loop.log.status['iterations_done'] == 5 assert main_loop.log.status['_epoch_ends'] == [3, 5] assert len(main_loop.log) == 5 for i in range(1, 6): assert main_loop.log[i]['batch'] == dict(data=i)
def test_shared_variable_modifier_two_params(): weights = numpy.array([-1, 1], dtype=floatX) features = [numpy.array(f, dtype=floatX) for f in [[1, 2], [3, 4], [5, 6]]] targets = [(weights * f).sum() for f in features] n_batches = 3 dataset = ContainerDataset(dict(features=features, targets=targets)) x = tensor.vector('features') y = tensor.scalar('targets') W = shared_floatx([0, 0], name='W') cost = ((x * W).sum() - y)**2 cost.name = 'cost' step_rule = Scale(0.001) sgd = GradientDescent(cost=cost, params=[W], step_rule=step_rule) modifier = SharedVariableModifier( step_rule.learning_rate, lambda _, val: numpy.cast[floatX](val * 0.2)) main_loop = MainLoop(model=None, data_stream=dataset.get_default_stream(), algorithm=sgd, extensions=[FinishAfter(after_n_epochs=1), modifier]) main_loop.run() new_value = step_rule.learning_rate.get_value() assert_allclose(new_value, 0.001 * 0.2**n_batches, atol=1e-5)
def run(discriminative_regularization=True): streams = create_celeba_streams(training_batch_size=100, monitoring_batch_size=500, include_targets=False) main_loop_stream, train_monitor_stream, valid_monitor_stream = streams[:3] # Compute parameter updates for the batch normalization population # statistics. They are updated following an exponential moving average. rval = create_training_computation_graphs(discriminative_regularization) cg, bn_cg, variance_parameters = rval pop_updates = list( set(get_batch_normalization_updates(bn_cg, allow_duplicates=True))) decay_rate = 0.05 extra_updates = [(p, m * decay_rate + p * (1 - decay_rate)) for p, m in pop_updates] model = Model(bn_cg.outputs[0]) selector = Selector( find_bricks( model.top_bricks, lambda brick: brick.name in ('encoder_convnet', 'encoder_mlp', 'decoder_convnet', 'decoder_mlp'))) parameters = list(selector.get_parameters().values()) + variance_parameters # Prepare algorithm step_rule = Adam() algorithm = GradientDescent(cost=bn_cg.outputs[0], parameters=parameters, step_rule=step_rule) algorithm.add_updates(extra_updates) # Prepare monitoring monitored_quantities_list = [] for graph in [bn_cg, cg]: cost, kl_term, reconstruction_term = graph.outputs cost.name = 'nll_upper_bound' avg_kl_term = kl_term.mean(axis=0) avg_kl_term.name = 'avg_kl_term' avg_reconstruction_term = -reconstruction_term.mean(axis=0) avg_reconstruction_term.name = 'avg_reconstruction_term' monitored_quantities_list.append( [cost, avg_kl_term, avg_reconstruction_term]) train_monitoring = DataStreamMonitoring( monitored_quantities_list[0], train_monitor_stream, prefix="train", updates=extra_updates, after_epoch=False, before_first_epoch=False, every_n_epochs=5) valid_monitoring = DataStreamMonitoring( monitored_quantities_list[1], valid_monitor_stream, prefix="valid", after_epoch=False, before_first_epoch=False, every_n_epochs=5) # Prepare checkpoint save_path = 'celeba_vae_{}regularization.zip'.format( '' if discriminative_regularization else 'no_') checkpoint = Checkpoint(save_path, every_n_epochs=5, use_cpickle=True) extensions = [Timing(), FinishAfter(after_n_epochs=75), train_monitoring, valid_monitoring, checkpoint, Printing(), ProgressBar()] main_loop = MainLoop(data_stream=main_loop_stream, algorithm=algorithm, extensions=extensions) main_loop.run()
def main(save_to, num_batches): linear = Linear() rnn=SORN() x = tensor.vector('numbers') states_E, states_I, updates=rnn.apply(linear.apply(x[None, :])) y=linear.apply(states_E[-1]) cost=SquaredError().apply(y[:,None], mlp.apply(states_E[-1])) # consider updates about linear from x and to y # 1. make all in SORN # 2. gradient? main_loop = MainLoop( UpdatesAlgorithm( updates=updates), get_data_stream(range(100)), model=Model(), extensions=[ Timing(), FinishAfter(after_n_batches=num_batches), DataStreamMonitoring( [cost], get_data_stream(range(100, 200)), prefix="test"), TrainingDataMonitoring([cost], after_epoch=True), Checkpoint(save_to), Printing()]) main_loop.run() return main_loop
def do_test(with_serialization): data_stream = IterableDataset(range(10)).get_example_stream() main_loop = MainLoop(MockAlgorithm(), data_stream, extensions=[ WriteBatchExtension(), FinishAfter(after_n_batches=14) ]) main_loop.run() assert main_loop.log.status['iterations_done'] == 14 if with_serialization: main_loop = cPickle.loads(cPickle.dumps(main_loop)) finish_after = unpack([ ext for ext in main_loop.extensions if isinstance(ext, FinishAfter) ], singleton=True) finish_after.add_condition( ["after_batch"], predicate=lambda log: log.status['iterations_done'] == 27) main_loop.run() assert main_loop.log.status['iterations_done'] == 27 assert main_loop.log.status['epochs_done'] == 2 for i in range(27): assert main_loop.log[i + 1]['batch'] == {"data": i % 10}
def train(self): print "Loading data" datafile = self.get_datafile() nbexamples = datafile.num_examples nbexamples -= nbexamples%(self.sequence_dim*self.time_dim) train_stream = ReshapeTransformer( DataStream( dataset=datafile, iteration_scheme=ShuffledBatchChunkScheme( nbexamples, self.sequence_dim*self.time_dim)), self.sequence_dim, self.time_dim) if self.image_size is not None : train_stream = Mapping(train_stream, spec_mapping, add_sources=['spectrogram']) print "Building Theano Graph" algorithm, self.fprop = self.build_theano_functions() main_loop = MainLoop( algorithm=algorithm, data_stream=train_stream, model=self.model, extensions=[ FinishAfter(after_n_epochs=EPOCHS), TrainingDataMonitoring( [aggregation.mean(self.model.outputs[0])], prefix="train", after_epoch=True), Printing(), SaveParams(EXP_PATH+NAME, after_epoch=True) ]) main_loop.run()
def test_shared_variable_modifier(): weights = numpy.array([-1, 1], dtype=theano.config.floatX) features = [numpy.array(f, dtype=theano.config.floatX) for f in [[1, 2], [3, 4], [5, 6]]] targets = [(weights * f).sum() for f in features] n_batches = 3 dataset = IterableDataset(dict(features=features, targets=targets)) x = tensor.vector('features') y = tensor.scalar('targets') W = shared_floatx([0, 0], name='W') cost = ((x * W).sum() - y) ** 2 cost.name = 'cost' step_rule = Scale(0.001) sgd = GradientDescent(cost=cost, parameters=[W], step_rule=step_rule) main_loop = MainLoop( model=None, data_stream=dataset.get_example_stream(), algorithm=sgd, extensions=[ FinishAfter(after_n_epochs=1), SharedVariableModifier( step_rule.learning_rate, lambda n: numpy.cast[theano.config.floatX](10. / n) )]) main_loop.run() assert_allclose(step_rule.learning_rate.get_value(), numpy.cast[theano.config.floatX](10. / n_batches))
def test_shared_variable_modifier(): weights = numpy.array([-1, 1], dtype=theano.config.floatX) features = [numpy.array(f, dtype=theano.config.floatX) for f in [[1, 2], [3, 4], [5, 6]]] targets = [(weights * f).sum() for f in features] n_batches = 3 dataset = IterableDataset(dict(features=features, targets=targets)) x = tensor.vector("features") y = tensor.scalar("targets") W = shared_floatx([0, 0], name="W") cost = ((x * W).sum() - y) ** 2 cost.name = "cost" step_rule = Scale(0.001) sgd = GradientDescent(cost=cost, parameters=[W], step_rule=step_rule) main_loop = MainLoop( model=None, data_stream=dataset.get_example_stream(), algorithm=sgd, extensions=[ FinishAfter(after_n_epochs=1), SharedVariableModifier(step_rule.learning_rate, lambda n: numpy.cast[theano.config.floatX](10.0 / n)), ], ) main_loop.run() assert_allclose(step_rule.learning_rate.get_value(), numpy.cast[theano.config.floatX](10.0 / n_batches))
def train_base_model(self, train_data, test_data, input_dim): x = T.matrix('features') y = T.matrix('targets') mlp, cost, mis_cost = self.create_base_model(x, y, input_dim) cg = ComputationGraph([cost]) inputs = VariableFilter(roles=[INPUT])(cg.variables) cg = apply_dropout(cg, inputs, 0.2) algorithm = GradientDescent(cost=cost, parameters=cg.parameters, step_rule=Adam(learning_rate=0.001)) data_stream = train_data data_stream_test = test_data monitor = DataStreamMonitoring(variables=[mis_cost], data_stream=data_stream_test, prefix="test") plot_ext = Plot('F1-measure', channels=[['test_MisclassificationRate']], after_batch=True) main_loop = MainLoop(data_stream=data_stream, algorithm=algorithm, extensions=[ monitor, FinishAfter(after_n_epochs=50), Printing(), plot_ext ]) main_loop.run() return mlp
def maxout_vae_mnist_test(path_vae_mnist): # load vae model on mnist vae_mnist = load(path_vae_mnist) maxout = Maxout() x = T.matrix('features') y = T.imatrix('targets') batch_size = 128 z, _ = vae_mnist.sampler.sample(vae_mnist.encoder_mlp.apply(x)) predict = maxout.apply(z) cost = Softmax().categorical_cross_entropy(y.flatten(), predict) y_hat = Softmax().apply(predict) cost.name = 'cost' cg = ComputationGraph(cost) temp = cg.parameters for t, i in zip(temp, range(len(temp))): t.name = t.name+str(i)+"maxout" error_brick = MisclassificationRate() error_rate = error_brick.apply(y, y_hat) # training step_rule = RMSProp(0.01, 0.9) #step_rule = Momentum(0.2, 0.9) train_set = MNIST('train') test_set = MNIST("test") data_stream_train = Flatten(DataStream.default_stream( train_set, iteration_scheme=SequentialScheme(train_set.num_examples, batch_size))) data_stream_test =Flatten(DataStream.default_stream( test_set, iteration_scheme=SequentialScheme(test_set.num_examples, batch_size))) algorithm = GradientDescent(cost=cost, params=cg.parameters, step_rule=step_rule) monitor_train = TrainingDataMonitoring( variables=[cost], data_stream=data_stream_train, prefix="train") monitor_valid = DataStreamMonitoring( variables=[cost, error_rate], data_stream=data_stream_test, prefix="test") extensions = [ monitor_train, monitor_valid, FinishAfter(after_n_epochs=50), Printing(every_n_epochs=1) ] main_loop = MainLoop(data_stream=data_stream_train, algorithm=algorithm, model = Model(cost), extensions=extensions) main_loop.run() # save here from blocks.serialization import dump with closing(open('../data_mnist/maxout', 'w')) as f: dump(maxout, f)
class Runner(object): def __init__(self, worker, experiment, config): # Data dataset = CIFAR10('train', flatten=False) test_dataset = CIFAR10('test', flatten=False) batch_size = 128 scheme = ShuffledScheme(dataset.num_examples, batch_size) datastream = DataStream(dataset, iteration_scheme=scheme) test_scheme = ShuffledScheme(test_dataset.num_examples, batch_size) test_stream = DataStream(test_dataset, iteration_scheme=test_scheme) # Model m = ModelHelper(config) def score_func(mainloop): scores = mainloop.log.to_dataframe()["test_accur"].values return np.mean(np.sort(scores)[-4:-1]) # Algorithm cg = ComputationGraph([m.cost]) algorithm = GradientDescent(cost=m.cost, params=cg.parameters, step_rule=AdaM()) #job_name = os.path.basename(worker.running_job) job_name = os.path.basename(".") update_path = (os.path.join(os.path.join(worker.path, "updates"), job_name)) if not os.path.exists(update_path): os.mkdir(update_path) self.main_loop = MainLoop( algorithm, datastream, model=Model(m.cost), extensions=[ Timing(), TrainingDataMonitoring([m.cost, m.accur], prefix="train", after_epoch=True), DataStreamMonitoring([m.cost, m.accur], test_stream, prefix="test"), FinishAfter(after_n_epochs=1), LogToFile(os.path.join(update_path, "log.csv")), Printing(), EpochProgress(dataset.num_examples // batch_size + 1) #, DistributeUpdate(worker, every_n_epochs=1) #, DistributeWhetlabFinish(worker, experiment, score_func) #, Plot('cifar10', #channels=[['train_cost', 'test_cost'], ['train_accur', 'test_accur']]) ]) def run(self): self.main_loop.run()
def train(cost, error_rate, batch_size=100, num_epochs=150): # Setting Loggesetr timestr = time.strftime("%Y_%m_%d_at_%H_%M") save_path = 'results/memory_' + timestr log_path = os.path.join(save_path, 'log.txt') os.makedirs(save_path) fh = logging.FileHandler(filename=log_path) fh.setLevel(logging.DEBUG) logger.addHandler(fh) # Training blocks_model = Model(cost) all_params = blocks_model.parameters print "Number of found parameters:" + str(len(all_params)) print all_params training_algorithm = GradientDescent( cost=cost, parameters=all_params, step_rule=Adam(learning_rate=0.001)) # training_algorithm = GradientDescent( # cost=cost, params=all_params, # step_rule=Scale(learning_rate=model.default_lr)) monitored_variables = [cost, error_rate] # the rest is for validation # train_data_stream, valid_data_stream = get_mnist_streams( # 50000, batch_size) train_data_stream, valid_data_stream = get_mnist_video_streams(batch_size) train_monitoring = TrainingDataMonitoring( variables=monitored_variables, prefix="train", after_epoch=True) valid_monitoring = DataStreamMonitoring( variables=monitored_variables, data_stream=valid_data_stream, prefix="valid", after_epoch=True) main_loop = MainLoop( algorithm=training_algorithm, data_stream=train_data_stream, model=blocks_model, extensions=[ train_monitoring, valid_monitoring, FinishAfter(after_n_epochs=num_epochs), SaveParams('valid_misclassificationrate_apply_error_rate', blocks_model, save_path), SaveLog(save_path, after_epoch=True), ProgressBar(), Printing()]) main_loop.run()
def main(save_to, num_epochs, bokeh=False): mlp = MLP([Tanh(), Softmax()], [784, 100, 10], weights_init=IsotropicGaussian(0.01), biases_init=Constant(0)) mlp.initialize() x = tensor.matrix('features') y = tensor.lmatrix('targets') probs = mlp.apply(x) cost = CategoricalCrossEntropy().apply(y.flatten(), probs) error_rate = MisclassificationRate().apply(y.flatten(), probs) cg = ComputationGraph([cost]) W1, W2 = VariableFilter(roles=[WEIGHT])(cg.variables) cost = cost + .00005 * (W1 ** 2).sum() + .00005 * (W2 ** 2).sum() cost.name = 'final_cost' mnist_train = MNIST("train") mnist_test = MNIST("test") algorithm = GradientDescent( cost=cost, params=cg.parameters, step_rule=Scale(learning_rate=0.1)) extensions = [Timing(), FinishAfter(after_n_epochs=num_epochs), DataStreamMonitoring( [cost, error_rate], DataStream(mnist_test, iteration_scheme=SequentialScheme( mnist_test.num_examples, 500)), prefix="test"), TrainingDataMonitoring( [cost, error_rate, aggregation.mean(algorithm.total_gradient_norm)], prefix="train", after_epoch=True), Checkpoint(save_to), Printing()] if bokeh: extensions.append(Plot( 'MNIST example', channels=[ ['test_final_cost', 'test_misclassificationrate_apply_error_rate'], ['train_total_gradient_norm']])) main_loop = MainLoop( algorithm, DataStream(mnist_train, iteration_scheme=SequentialScheme( mnist_train.num_examples, 50)), model=Model(cost), extensions=extensions) main_loop.run()
def run(model_name): running_on_laptop = socket.gethostname() == 'yop' X = tensor.tensor4('image_features', dtype='float32') T = tensor.matrix('targets', dtype='float32') image_border_size = 100 if running_on_laptop: host_plot = 'http://*****:*****@ %s' % (model_name, datetime.datetime.now(), socket.gethostname()), channels=[['loss', 'valid_loss_test'], ['valid_error']], after_epoch=True, server_url=host_plot), Printing(), Checkpoint('train2') ] main_loop = MainLoop(data_stream=train_stream, algorithm=algorithm, extensions=extensions) main_loop.run()
class Runner(object): def __init__(self, worker, experiment, config): # Data dataset = CIFAR10('train', flatten=False) test_dataset = CIFAR10('test', flatten=False) batch_size = 128 scheme = ShuffledScheme(dataset.num_examples, batch_size) datastream = DataStream(dataset, iteration_scheme=scheme) test_scheme = ShuffledScheme(test_dataset.num_examples, batch_size) test_stream = DataStream(test_dataset, iteration_scheme=test_scheme) # Model m = ModelHelper(config) def score_func(mainloop): scores = mainloop.log.to_dataframe()["test_accur"].values return np.mean(np.sort(scores)[-4:-1]) # Algorithm cg = ComputationGraph([m.cost]) algorithm = GradientDescent( cost = m.cost, params=cg.parameters, step_rule = AdaM()) #job_name = os.path.basename(worker.running_job) job_name = os.path.basename(".") update_path = (os.path.join(os.path.join(worker.path, "updates"), job_name)) if not os.path.exists(update_path): os.mkdir(update_path) self.main_loop = MainLoop( algorithm, datastream, model = Model(m.cost), extensions=[ Timing(), TrainingDataMonitoring( [m.cost, m.accur], prefix="train", after_epoch=True) , DataStreamMonitoring( [m.cost, m.accur], test_stream, prefix="test") , FinishAfter(after_n_epochs=1) , LogToFile(os.path.join(update_path, "log.csv")) , Printing() , EpochProgress(dataset.num_examples // batch_size + 1) #, DistributeUpdate(worker, every_n_epochs=1) #, DistributeWhetlabFinish(worker, experiment, score_func) #, Plot('cifar10', #channels=[['train_cost', 'test_cost'], ['train_accur', 'test_accur']]) ]) def run(self): self.main_loop.run()
def main(): import configurations from stream import DStream logger = logging.getLogger(__name__) cfig = getattr(configurations, 'get_config_penn')() rnnlm = Rnnlm(cfig['vocabsize'], cfig['nemb'], cfig['nhids']) rnnlm.weights_init = IsotropicGaussian(0.1) rnnlm.biases_init = Constant(0.) rnnlm.push_initialization_config() rnnlm.generator.transition.weights_init = Orthogonal() sentence = tensor.lmatrix('sentence') sentence_mask = tensor.matrix('sentence_mask') batch_cost = rnnlm.cost(sentence, sentence_mask).sum() batch_size = sentence.shape[1].copy(name='batch_size') cost = aggregation.mean(batch_cost, batch_size) cost.name = "sequence_log_likelihood" logger.info("Cost graph is built") model = Model(cost) parameters = model.get_parameter_dict() logger.info("Parameters:\n" + pprint.pformat( [(key, value.get_value().shape) for key, value in parameters.items()], width=120)) for brick in model.get_top_bricks(): brick.initialize() cg = ComputationGraph(cost) algorithm = GradientDescent( cost=cost, parameters=cg.parameters, step_rule=CompositeRule([StepClipping(10.0), Scale(0.01)])) gradient_norm = aggregation.mean(algorithm.total_gradient_norm) step_norm = aggregation.mean(algorithm.total_step_norm) monitored_vars = [cost, gradient_norm, step_norm] train_monitor = TrainingDataMonitoring(variables=monitored_vars, after_batch=True, before_first_epoch=True, prefix='tra') extensions = [train_monitor, Timing(), Printing(after_batch=True), FinishAfter(after_n_epochs=1000), Printing(every_n_batches=1)] train_stream = DStream(datatype='train', config=cfig) main_loop = MainLoop(model=model, data_stream=train_stream, algorithm=algorithm, extensions=extensions) main_loop.run()
def test_training_data_monitoring(): weights = numpy.array([-1, 1], dtype=theano.config.floatX) features = [numpy.array(f, dtype=theano.config.floatX) for f in [[1, 2], [3, 4], [5, 6]]] targets = [(weights * f).sum() for f in features] n_batches = 3 dataset = IterableDataset(dict(features=features, targets=targets)) x = tensor.vector('features') y = tensor.scalar('targets') W = shared_floatx([0, 0], name='W') V = shared_floatx(7, name='V') W_sum = named_copy(W.sum(), 'W_sum') cost = ((x * W).sum() - y) ** 2 cost.name = 'cost' class TrueCostExtension(TrainingExtension): def before_batch(self, data): self.main_loop.log.current_row['true_cost'] = ( ((W.get_value() * data["features"]).sum() - data["targets"]) ** 2) main_loop = MainLoop( model=None, data_stream=dataset.get_example_stream(), algorithm=GradientDescent(cost=cost, params=[W], step_rule=Scale(0.001)), extensions=[ FinishAfter(after_n_epochs=1), TrainingDataMonitoring([W_sum, cost, V], prefix="train1", after_batch=True), TrainingDataMonitoring([aggregation.mean(W_sum), cost], prefix="train2", after_epoch=True), TrueCostExtension()]) main_loop.run() # Check monitoring of a shared varible assert_allclose(main_loop.log.current_row['train1_V'], 7.0) for i in range(n_batches): # The ground truth is written to the log before the batch is # processed, where as the extension writes after the batch is # processed. This is why the iteration numbers differs here. assert_allclose(main_loop.log[i]['true_cost'], main_loop.log[i + 1]['train1_cost']) assert_allclose( main_loop.log[n_batches]['train2_cost'], sum([main_loop.log[i]['true_cost'] for i in range(n_batches)]) / n_batches) assert_allclose( main_loop.log[n_batches]['train2_W_sum'], sum([main_loop.log[i]['train1_W_sum'] for i in range(1, n_batches + 1)]) / n_batches)
def train(cost, error_rate, batch_size=100, num_epochs=150): # Setting Loggesetr timestr = time.strftime("%Y_%m_%d_at_%H_%M") save_path = 'results/memory_' + timestr log_path = os.path.join(save_path, 'log.txt') os.makedirs(save_path) fh = logging.FileHandler(filename=log_path) fh.setLevel(logging.DEBUG) logger.addHandler(fh) # Training blocks_model = Model(cost) all_params = blocks_model.parameters print "Number of found parameters:" + str(len(all_params)) print all_params training_algorithm = GradientDescent(cost=cost, parameters=all_params, step_rule=Adam(learning_rate=0.001)) # training_algorithm = GradientDescent( # cost=cost, params=all_params, # step_rule=Scale(learning_rate=model.default_lr)) monitored_variables = [cost, error_rate] # the rest is for validation # train_data_stream, valid_data_stream = get_mnist_streams( # 50000, batch_size) train_data_stream, valid_data_stream = get_mnist_video_streams(batch_size) train_monitoring = TrainingDataMonitoring(variables=monitored_variables, prefix="train", after_epoch=True) valid_monitoring = DataStreamMonitoring(variables=monitored_variables, data_stream=valid_data_stream, prefix="valid", after_epoch=True) main_loop = MainLoop( algorithm=training_algorithm, data_stream=train_data_stream, model=blocks_model, extensions=[ train_monitoring, valid_monitoring, FinishAfter(after_n_epochs=num_epochs), SaveParams('valid_misclassificationrate_apply_error_rate', blocks_model, save_path), SaveLog(save_path, after_epoch=True), ProgressBar(), Printing() ]) main_loop.run()
def train_model(m, train_stream, valid_stream, load_location=None, save_location=None): # Define the model model = Model(m.cost_reg) ae_excl_vars = set() if hasattr(m, 'ae_costs'): for i, cost in enumerate(m.ae_costs): print "Trianing stacked AE layer", i+1 # train autoencoder component separately cost.name = 'ae_cost%d'%i cg = ComputationGraph(cost) params = set(cg.parameters) - ae_excl_vars ae_excl_vars = ae_excl_vars | params algorithm = GradientDescent(cost=cost, step_rule=config.step_rule, params=list(params)) main_loop = MainLoop( data_stream=NoData(train_stream), algorithm=algorithm, extensions=[ TrainingDataMonitoring([cost], prefix='train', every_n_epochs=1), Printing(every_n_epochs=1), FinishAfter(every_n_epochs=1000), ] ) main_loop.run() cg = ComputationGraph(m.cost_reg) params = list(set(cg.parameters) - ae_excl_vars) algorithm = GradientDescent(cost=m.cost_reg, step_rule=config.step_rule, params=params) main_loop = MainLoop( model=model, data_stream=train_stream, algorithm=algorithm, extensions=[ TrainingDataMonitoring( [m.cost_reg, m.ber_reg, m.cost, m.ber], prefix='train', every_n_epochs=1*config.pt_freq), DataStreamMonitoring([m.cost, m.ber], valid_stream, prefix='valid', after_epoch=False, every_n_epochs=5*config.pt_freq), Printing(every_n_epochs=1*config.pt_freq, after_epoch=False), Plot(document='tr_'+model_name+'_'+config.param_desc, channels=[['train_cost', 'train_cost_reg', 'valid_cost'], ['train_ber', 'train_ber_reg', 'valid_ber']], server_url='http://eos21:4201', every_n_epochs=1*config.pt_freq, after_epoch=False), FinishAfter(every_n_epochs=10000) ] ) main_loop.run()
def run(model_name, port_train, port_valid): running_on_laptop = socket.gethostname() == 'yop' X = tensor.tensor4('image_features', dtype='float32') T = tensor.matrix('targets', dtype='float32') image_border_size = (100, 100) if running_on_laptop: host_plot = 'http://*****:*****@ %s' % (model_name, datetime.datetime.now(), socket.gethostname()), channels=[['loss'], ['error', 'valid_error']], after_epoch=True, server_url=host_plot), Printing(), Checkpoint('train2') ] main_loop = MainLoop(data_stream=train_stream, algorithm=algorithm, extensions=extensions) main_loop.run()
def test_main_loop(): main_loop = MainLoop( MockAlgorithm(), IterableDataset(range(10)).get_example_stream(), extensions=[WriteBatchExtension(), FinishAfter(after_n_epochs=2)]) main_loop.run() assert_raises(AttributeError, getattr, main_loop, 'model') assert main_loop.log.status['iterations_done'] == 20 assert main_loop.log.status['_epoch_ends'] == [10, 20] assert len(main_loop.log) == 20 for i in range(20): assert main_loop.log[i + 1]['batch'] == {'data': i % 10}
def align_with_nam(config, args): """Main method for using the Neural Alignment Model. Args: config (dict): NMT configuration args (object): ArgumentParser object containing the command line arguments Returns: list. List of alignments, where alignments are represented as numpy matrices containing confidences between 0 and 1. """ global alignments config['attention'] = 'parameterized' alignments = [] nmt_model = NMTModel(config) nmt_model.set_up() align_stream = _get_align_stream(**config) extensions = [ FinishAfter(after_epoch=True), TrainingDataMonitoring([nmt_model.cost], after_batch=True), PrintCurrentLogRow(after_batch=True), NextSentenceExtension(align_stream=align_stream, every_n_batches=args.iterations, before_training=True) ] train_params = [] for p in nmt_model.cg.parameters: if p.name in 'alignment_matrix': train_params.append(p) break algorithm = GradientDescent( cost=nmt_model.cost, parameters=train_params ) main_loop = MainLoop( model=nmt_model.training_model, algorithm=algorithm, data_stream=align_stream, extensions=extensions ) nmt_model_path = get_nmt_model_path(args.nmt_model_selector, config) loader = LoadNMTUtils(nmt_model_path, config['saveto'], nmt_model.training_model) loader.load_weights() try: main_loop.run() except StopIteration: logging.info("Alignment finished") return alignments
def train_model(cost, train_stream, valid_stream, valid_freq, valid_rare, load_location=None, save_location=None): cost.name = 'nll' perplexity = 2**(cost / tensor.log(2)) perplexity.name = 'ppl' # Define the model model = Model(cost) # Load the parameters from a dumped model if load_location is not None: logger.info('Loading parameters...') model.set_param_values(load_parameter_values(load_location)) cg = ComputationGraph(cost) algorithm = GradientDescent(cost=cost, step_rule=Scale(learning_rate=0.01), params=cg.parameters) main_loop = MainLoop( model=model, data_stream=train_stream, algorithm=algorithm, extensions=[ DataStreamMonitoring([cost, perplexity], valid_stream, prefix='valid_all', every_n_batches=5000), # Overfitting of rare words occurs between 3000 and 4000 iterations DataStreamMonitoring([cost, perplexity], valid_rare, prefix='valid_rare', every_n_batches=500), DataStreamMonitoring([cost, perplexity], valid_freq, prefix='valid_frequent', every_n_batches=5000), Printing(every_n_batches=500) ]) main_loop.run() # Save the main loop if save_location is not None: logger.info('Saving the main loop...') dump_manager = MainLoopDumpManager(save_location) dump_manager.dump(main_loop) logger.info('Saved')
def infer_population(data_stream, model, n_batches): """ Sets the population parameters for a given model""" # construct a main loop with algorithm algorithm = BatchNormAccumulate(model) main_loop = MainLoop( algorithm=algorithm, data_stream=data_stream, model=model, extensions=[FinishAfter(after_n_batches=n_batches), ProgressBar()]) main_loop.run() parameters = get_batchnorm_parameters(model) batchnorm_bricks = set([get_brick(p) for p in parameters]) for b in batchnorm_bricks: b.use_population = True
def train(model, batch_size=50, num_epochs=1500): # Setting Logger timestr = time.strftime("%Y_%m_%d_at_%H_%M") save_path = "results/memory_" + timestr log_path = os.path.join(save_path, "log.txt") os.makedirs(save_path) fh = logging.FileHandler(filename=log_path) fh.setLevel(logging.DEBUG) logger.addHandler(fh) # Training cost = model.outputs["cost"] blocks_model = Model(cost) all_params = blocks_model.parameters print "Number of found parameters:" + str(len(all_params)) print all_params training_algorithm = GradientDescent(cost=cost, params=all_params, step_rule=Adam(learning_rate=model.default_lr)) # training_algorithm = GradientDescent( # cost=cost, params=all_params, # step_rule=Scale(learning_rate=model.default_lr)) monitored_variables = [cost] # the rest is for validation # train_data_stream, valid_data_stream = get_mnist_streams( # 50000, batch_size) train_data_stream, valid_data_stream = get_memory_streams(20, 10) train_monitoring = TrainingDataMonitoring(variables=monitored_variables, prefix="train", after_epoch=True) valid_monitoring = DataStreamMonitoring( variables=monitored_variables, data_stream=valid_data_stream, prefix="valid", after_epoch=True ) main_loop = MainLoop( algorithm=training_algorithm, data_stream=train_data_stream, model=blocks_model, extensions=[ train_monitoring, valid_monitoring, FinishAfter(after_n_epochs=num_epochs), SaveParams("valid_MSE", blocks_model, save_path), SaveLog(save_path, after_epoch=True), Printing(), ], ) main_loop.run()
def main(save_to, num_epochs): mlp = MLP([Tanh(), Softmax()], [784, 100, 10], weights_init=IsotropicGaussian(0.01), biases_init=Constant(0)) mlp.initialize() x = tensor.matrix('features') y = tensor.lmatrix('targets') probs = mlp.apply(x) cost = CategoricalCrossEntropy().apply(y.flatten(), probs) error_rate = MisclassificationRate().apply(y.flatten(), probs) cg = ComputationGraph([cost]) W1, W2 = VariableFilter(roles=[WEIGHTS])(cg.variables) cost = cost + .00005 * (W1**2).sum() + .00005 * (W2**2).sum() cost.name = 'final_cost' mnist_train = MNIST("train") mnist_test = MNIST("test") algorithm = GradientDescent(cost=cost, step_rule=SteepestDescent(learning_rate=0.1)) main_loop = MainLoop( mlp, DataStream(mnist_train, iteration_scheme=SequentialScheme(mnist_train.num_examples, 50)), algorithm, extensions=[ Timing(), FinishAfter(after_n_epochs=num_epochs), DataStreamMonitoring([cost, error_rate], DataStream(mnist_test, iteration_scheme=SequentialScheme( mnist_test.num_examples, 500)), prefix="test"), TrainingDataMonitoring([ cost, error_rate, aggregation.mean(algorithm.total_gradient_norm) ], prefix="train", after_every_epoch=True), SerializeMainLoop(save_to), Plot('MNIST example', channels=[[ 'test_final_cost', 'test_misclassificationrate_apply_error_rate' ], ['train_total_gradient_norm']]), Printing() ]) main_loop.run()
def run(get_model, model_name): train_stream = ServerDataStream(('cases', 'image_position', 'multiplier', 'sax', 'sax_features', 'targets'), False, hwm=10) valid_stream = ServerDataStream(('cases', 'image_position', 'multiplier', 'sax', 'sax_features', 'targets'), False, hwm=10, port=5558) ftensor5 = tensor.TensorType('float32', (False,)*5) input_var = ftensor5('sax_features') target_var = tensor.matrix('targets') multiply_var = tensor.matrix('multiplier') multiply_var = T.addbroadcast(multiply_var, 1) prediction, test_prediction, test_pred_mid, params_bottom, params_top = get_model(input_var, multiply_var) # load parameters cg = ComputationGraph(test_pred_mid) params_val = numpy.load('sunnybrook/best_weights.npz') for p, value in zip(cg.shared_variables, params_val['arr_0']): p.set_value(value) crps = tensor.abs_(test_prediction - target_var).mean() loss = squared_error(prediction, target_var).mean() loss.name = 'loss' crps.name = 'crps' algorithm = GradientDescent( cost=loss, parameters=params_top, step_rule=Adam(), on_unused_sources='ignore' ) host_plot = 'http://localhost:5006' extensions = [ Timing(), TrainingDataMonitoring([loss], after_epoch=True), DataStreamMonitoring(variables=[crps, loss], data_stream=valid_stream, prefix="valid"), Plot('%s %s %s' % (model_name, datetime.date.today(), time.strftime('%H:%M')), channels=[['loss','valid_loss'], ['valid_crps']], after_epoch=True, server_url=host_plot), Printing(), Checkpoint('train'), FinishAfter(after_n_epochs=20) ] main_loop = MainLoop(data_stream=train_stream, algorithm=algorithm, extensions=extensions) main_loop.run()
def run(): streams = create_celeba_streams(training_batch_size=100, monitoring_batch_size=500, include_targets=True) main_loop_stream = streams[0] train_monitor_stream = streams[1] valid_monitor_stream = streams[2] cg, bn_dropout_cg = create_training_computation_graphs() # Compute parameter updates for the batch normalization population # statistics. They are updated following an exponential moving average. pop_updates = get_batch_normalization_updates(bn_dropout_cg) decay_rate = 0.05 extra_updates = [(p, m * decay_rate + p * (1 - decay_rate)) for p, m in pop_updates] # Prepare algorithm step_rule = Adam() algorithm = GradientDescent(cost=bn_dropout_cg.outputs[0], parameters=bn_dropout_cg.parameters, step_rule=step_rule) algorithm.add_updates(extra_updates) # Prepare monitoring cost = bn_dropout_cg.outputs[0] cost.name = 'cost' train_monitoring = DataStreamMonitoring( [cost], train_monitor_stream, prefix="train", before_first_epoch=False, after_epoch=False, after_training=True, updates=extra_updates) cost, accuracy = cg.outputs cost.name = 'cost' accuracy.name = 'accuracy' monitored_quantities = [cost, accuracy] valid_monitoring = DataStreamMonitoring( monitored_quantities, valid_monitor_stream, prefix="valid", before_first_epoch=False, after_epoch=False, every_n_epochs=5) # Prepare checkpoint checkpoint = Checkpoint( 'celeba_classifier.zip', every_n_epochs=5, use_cpickle=True) extensions = [Timing(), FinishAfter(after_n_epochs=50), train_monitoring, valid_monitoring, checkpoint, Printing(), ProgressBar()] main_loop = MainLoop(data_stream=main_loop_stream, algorithm=algorithm, extensions=extensions) main_loop.run()
def main(save_to, num_epochs, batch_size): mlp = MLP([Tanh(), Tanh(), Tanh(), Softmax()], [3072, 4096, 1024, 512, 10], weights_init=IsotropicGaussian(0.01), biases_init=Constant(0)) mlp.initialize() x = tt.tensor4('features', dtype='float32') y = tt.vector('label', dtype='int32') probs = mlp.apply(x.reshape((-1, 3072))) cost = CategoricalCrossEntropy().apply(y, probs) error_rate = MisclassificationRate().apply(y, probs) cg = ComputationGraph([cost]) ws = VariableFilter(roles=[WEIGHT])(cg.variables) cost = cost + .00005 * sum(([(w**2).sum() for w in ws])) cost.name = 'final_cost' train_dataset = Cifar10Dataset(data_dir='/home/belohlavek/data/cifar10', is_train=True) valid_dataset = Cifar10Dataset(data_dir='/home/belohlavek/data/cifar10', is_train=False) train_stream = train_dataset.get_stream(batch_size) valid_stream = valid_dataset.get_stream(batch_size) algorithm = GradientDescent(cost=cost, parameters=cg.parameters, step_rule=Adam(learning_rate=0.001)) extensions = [ Timing(), LogExtension('/home/belohlavek/ALI/mlp.log'), FinishAfter(after_n_epochs=num_epochs), DataStreamMonitoring([cost, error_rate], valid_stream, prefix="test"), TrainingDataMonitoring([ cost, error_rate, aggregation.mean(algorithm.total_gradient_norm) ], prefix="train", after_epoch=True), Checkpoint(save_to), Printing() ] main_loop = MainLoop(algorithm, train_stream, model=Model(cost), extensions=extensions) main_loop.run()
def train(config, save_path, bokeh_name, params, bokeh_server, bokeh, test_tag, use_load_ext, load_log, fast_start): model, algorithm, data, extensions = initialize_all( config, save_path, bokeh_name, params, bokeh_server, bokeh, test_tag, use_load_ext, load_log, fast_start) # Save the config into the status log = NDarrayLog() log.status['_config'] = repr(config) main_loop = MainLoop(model=model, log=log, algorithm=algorithm, data_stream=data.get_stream("train"), extensions=extensions) main_loop.run()
def work(): config_dict = yaml.load(open(sys.argv[1], 'r')) print config_dict if config_dict['working_mode'] == 'train_new': train, valid, alphabet = build_datasets(config_dict) generator, cost = build_model(len(alphabet), config_dict) algorithm = build_algorithm(generator, cost, config_dict) extensions = build_extensions(cost, algorithm, valid, config_dict) main_loop = MainLoop(algorithm=algorithm, data_stream=train, model=Model(cost), extensions=extensions) main_loop.run() elif config_dict['working_mode'] == 'train_resume': # TODO pass
def test_load(): # Create a main loop and checkpoint it mlp = MLP(activations=[None], dims=[10, 10], weights_init=Constant(1.), use_bias=False) mlp.initialize() W = mlp.linear_transformations[0].W x = tensor.vector('data') cost = mlp.apply(x).mean() data = numpy.random.rand(10, 10).astype(theano.config.floatX) data_stream = IterableDataset(data).get_example_stream() main_loop = MainLoop(data_stream=data_stream, algorithm=GradientDescent(cost=cost, parameters=[W]), extensions=[ FinishAfter(after_n_batches=5), Checkpoint('myweirdmodel.picklebarrel') ]) main_loop.run() # Load the parameters, log and iteration state old_value = W.get_value() W.set_value(old_value * 2) main_loop = MainLoop(model=Model(cost), data_stream=data_stream, algorithm=GradientDescent(cost=cost, parameters=[W]), extensions=[ Load('myweirdmodel.picklebarrel', load_iteration_state=True, load_log=True) ]) main_loop.extensions[0].main_loop = main_loop main_loop._run_extensions('before_training') assert_allclose(W.get_value(), old_value) # Make sure things work too if the model was never saved before main_loop = MainLoop(model=Model(cost), data_stream=data_stream, algorithm=GradientDescent(cost=cost, parameters=[W]), extensions=[ Load('mynonexisting.picklebarrel', load_iteration_state=True, load_log=True) ]) main_loop.extensions[0].main_loop = main_loop main_loop._run_extensions('before_training')
def run(get_model, model_name): train_stream = ServerDataStream(('cases', 'image_features', 'image_targets', 'multiplier'), False, hwm=10) valid_stream = ServerDataStream(('cases', 'image_features', 'image_targets', 'multiplier'), False, hwm=10, port=5558) input_var = tensor.tensor4('image_features') target_var = tensor.tensor4('image_targets') multiply_var = tensor.matrix('multiplier') multiply_var = T.addbroadcast(multiply_var, 1) test_prediction, prediction, params = get_model(input_var, target_var, multiply_var) loss = binary_crossentropy(prediction, target_var).mean() loss.name = 'loss' valid_error = T.neq((test_prediction>0.5)*1., target_var).mean() valid_error.name = 'error' scale = Scale(0.1) algorithm = GradientDescent( cost=loss, parameters=params, step_rule=scale, #step_rule=Adam(), on_unused_sources='ignore' ) host_plot = 'http://localhost:5006' extensions = [ Timing(), TrainingDataMonitoring([loss], after_epoch=True), DataStreamMonitoring(variables=[loss, valid_error], data_stream=valid_stream, prefix="valid"), Plot('%s %s %s' % (model_name, datetime.date.today(), time.strftime('%H:%M')), channels=[['loss','valid_loss'],['valid_error']], after_epoch=True, server_url=host_plot), Printing(), # Checkpoint('train'), FinishAfter(after_n_epochs=10) ] main_loop = MainLoop(data_stream=train_stream, algorithm=algorithm, extensions=extensions) cg = ComputationGraph(test_prediction) while True: main_loop.run() scale.learning_rate.set_value(numpy.float32(scale.learning_rate.get_value()*0.7)) numpy.savez('best_weights.npz', [param.get_value() for param in cg.shared_variables])
def test_main_loop(): old_config_profile_value = config.profile config.profile = True main_loop = MainLoop( MockAlgorithm(), IterableDataset(range(10)).get_example_stream(), extensions=[WriteBatchExtension(), FinishAfter(after_n_epochs=2)]) main_loop.run() assert_raises(AttributeError, getattr, main_loop, 'model') assert main_loop.log.status['iterations_done'] == 20 assert main_loop.log.status['_epoch_ends'] == [10, 20] assert len(main_loop.log) == 20 for i in range(20): assert main_loop.log[i + 1]['batch'] == {'data': i % 10} config.profile = old_config_profile_value
def train(config, save_path, bokeh_name, params, bokeh_server, bokeh, test_tag, use_load_ext, load_log, fast_start): model, algorithm, data, extensions = initialize_all( config, save_path, bokeh_name, params, bokeh_server, bokeh, test_tag, use_load_ext, load_log, fast_start) # Save the config into the status log = NDarrayLog() log.status['_config'] = repr(config) main_loop = MainLoop( model=model, log=log, algorithm=algorithm, data_stream=data.get_stream("train"), extensions=extensions) main_loop.run()
def align_with_nam(config, args): """Main method for using the Neural Alignment Model. Args: config (dict): NMT configuration args (object): ArgumentParser object containing the command line arguments Returns: list. List of alignments, where alignments are represented as numpy matrices containing confidences between 0 and 1. """ global alignments config['attention'] = 'parameterized' alignments = [] nmt_model = NMTModel(config) nmt_model.set_up() align_stream = _get_align_stream(**config) extensions = [ FinishAfter(after_epoch=True), TrainingDataMonitoring([nmt_model.cost], after_batch=True), PrintCurrentLogRow(after_batch=True), NextSentenceExtension(align_stream=align_stream, every_n_batches=args.iterations, before_training=True) ] train_params = [] for p in nmt_model.cg.parameters: if p.name in 'alignment_matrix': train_params.append(p) break algorithm = GradientDescent(cost=nmt_model.cost, parameters=train_params) main_loop = MainLoop(model=nmt_model.training_model, algorithm=algorithm, data_stream=align_stream, extensions=extensions) nmt_model_path = get_nmt_model_path(args.nmt_model_selector, config) loader = LoadNMTUtils(nmt_model_path, config['saveto'], nmt_model.training_model) loader.load_weights() try: main_loop.run() except StopIteration: logging.info("Alignment finished") return alignments
def test_checkpointing(): # Create a main loop and checkpoint it mlp = MLP(activations=[None], dims=[10, 10], weights_init=Constant(1.), use_bias=False) mlp.initialize() W = mlp.linear_transformations[0].W x = tensor.vector('data') cost = mlp.apply(x).mean() data = numpy.random.rand(10, 10).astype(theano.config.floatX) data_stream = IterableDataset(data).get_example_stream() main_loop = MainLoop( data_stream=data_stream, algorithm=GradientDescent(cost=cost, parameters=[W]), extensions=[FinishAfter(after_n_batches=5), Checkpoint('myweirdmodel.tar', parameters=[W])] ) main_loop.run() # Load it again old_value = W.get_value() W.set_value(old_value * 2) main_loop = MainLoop( model=Model(cost), data_stream=data_stream, algorithm=GradientDescent(cost=cost, parameters=[W]), extensions=[Load('myweirdmodel.tar')] ) main_loop.extensions[0].main_loop = main_loop main_loop._run_extensions('before_training') assert_allclose(W.get_value(), old_value) # Make sure things work too if the model was never saved before main_loop = MainLoop( model=Model(cost), data_stream=data_stream, algorithm=GradientDescent(cost=cost, parameters=[W]), extensions=[Load('mynonexisting.tar')] ) main_loop.extensions[0].main_loop = main_loop main_loop._run_extensions('before_training') # Cleaning if os.path.exists('myweirdmodel.tar'): os.remove('myweirdmodel.tar')
def train_model(cost, error_rate, train_stream, load_location=None, save_location=None): cost.name = "Cross_entropy" error_rate.name = 'Error_rate' # Define the model model = Model(cost) # Load the parameters from a dumped model if load_location is not None: logger.info('Loading parameters...') model.set_param_values(load_parameter_values(load_location)) cg = ComputationGraph(cost) step_rule = Momentum(learning_rate=0.1, momentum=0.9) algorithm = GradientDescent(cost=cost, step_rule=step_rule, params=cg.parameters) main_loop = MainLoop( model=model, data_stream=train_stream, algorithm=algorithm, extensions=[ # DataStreamMonitoring([cost], test_stream, prefix='test', # after_epoch=False, every_n_epochs=10), DataStreamMonitoring([cost], train_stream, prefix='train', after_epoch=True), Printing(after_epoch=True) ]) main_loop.run() # Save the main loop if save_location is not None: logger.info('Saving the main loop...') dump_manager = MainLoopDumpManager(save_location) dump_manager.dump(main_loop) logger.info('Saved')
def train(config, save_path, bokeh_name, params, bokeh_server, bokeh, test_tag, use_load_ext, load_log, fast_start): conf_dump = pickle.dumps(config, protocol=0) model, algorithm, data, extensions = initialize_all( config, test_tag, save_path, bokeh_name, params, bokeh_server, bokeh, use_load_ext, load_log, fast_start) data.get_stream("train", **data_params_train) dataset_dump = pickle.dumps(data.info_dataset, protocol=0) postfix_dump = pickle.dumps(data.postfix_manager, protocol=0) # Save the config into the status log = NDarrayLog() log.status['_config'] = repr(config) log.status['_config_pickle'] = repr(conf_dump) log.status['_dataset_pickle'] = repr(dataset_dump) log.status['_postfix_pickle'] = repr(postfix_dump) main_loop = MainLoop( model=model, log=log, algorithm=algorithm, data_stream=data.get_stream("train", **data_params_train), extensions=extensions) main_loop.conf_pickle_shared = theano.shared( numpy.frombuffer(conf_dump, numpy.byte), name='_config_pickle') main_loop.data_pickle_shared = theano.shared( numpy.frombuffer(dataset_dump, numpy.byte), name='_dataset_pickle') main_loop.post_pickle_shared = theano.shared( numpy.frombuffer(postfix_dump, numpy.byte), name='_postfix_pickle') main_loop.run() return main_loop
def train(self, training_data): step_rules = [Adam(), StepClipping(1.0)] algorithm = GradientDescent( cost=self.Cost, parameters=self.ComputationGraph.parameters, step_rule=CompositeRule(step_rules)) train_stream = DataStream.default_stream( training_data, iteration_scheme=SequentialScheme(training_data.num_examples, batch_size=20)) main = MainLoop(model=Model(self.Cost), data_stream=train_stream, algorithm=algorithm, extensions=[ FinishAfter(), Printing(), Checkpoint('trainingdata.tar', every_n_epochs=10) ]) main.run()
cg = ComputationGraph(cost) model = Model(cost) ################# # Algorithm ################# algorithm = GradientDescent(cost=cost, parameters=cg.parameters, step_rule=CompositeRule( [StepClipping(10.0), Adam(lr)])) train_monitor = TrainingDataMonitoring(variables=[cost], after_epoch=True, prefix="train") extensions = extensions = [ train_monitor, TrackTheBest('train_sequence_log_likelihood'), Printing(after_epoch=True) ] main_loop = MainLoop(model=model, data_stream=data_stream, algorithm=algorithm, extensions=extensions) main_loop.run()
def main(name, dataset, epochs, batch_size, learning_rate, attention, n_iter, enc_dim, dec_dim, z_dim, oldmodel): image_size, data_train, data_valid, data_test = datasets.get_data(dataset) train_stream = Flatten( DataStream(data_train, iteration_scheme=SequentialScheme(data_train.num_examples, batch_size))) valid_stream = Flatten( DataStream(data_valid, iteration_scheme=SequentialScheme(data_valid.num_examples, batch_size))) test_stream = Flatten( DataStream(data_test, iteration_scheme=SequentialScheme(data_test.num_examples, batch_size))) if name is None: name = dataset img_height, img_width = image_size x_dim = img_height * img_width rnninits = { #'weights_init': Orthogonal(), 'weights_init': IsotropicGaussian(0.01), 'biases_init': Constant(0.), } inits = { #'weights_init': Orthogonal(), 'weights_init': IsotropicGaussian(0.01), 'biases_init': Constant(0.), } if attention != "": read_N, write_N = attention.split(',') read_N = int(read_N) write_N = int(write_N) read_dim = 2 * read_N**2 reader = AttentionReader(x_dim=x_dim, dec_dim=dec_dim, width=img_width, height=img_height, N=read_N, **inits) writer = AttentionWriter(input_dim=dec_dim, output_dim=x_dim, width=img_width, height=img_height, N=write_N, **inits) attention_tag = "r%d-w%d" % (read_N, write_N) else: read_dim = 2 * x_dim reader = Reader(x_dim=x_dim, dec_dim=dec_dim, **inits) writer = Writer(input_dim=dec_dim, output_dim=x_dim, **inits) attention_tag = "full" #---------------------------------------------------------------------- # Learning rate def lr_tag(value): """ Convert a float into a short tag-usable string representation. E.g.: 0.1 -> 11 0.01 -> 12 0.001 -> 13 0.005 -> 53 """ exp = np.floor(np.log10(value)) leading = ("%e" % value)[0] return "%s%d" % (leading, -exp) lr_str = lr_tag(learning_rate) subdir = time.strftime("%Y%m%d-%H%M%S") + "-" + name longname = "%s-%s-t%d-enc%d-dec%d-z%d-lr%s" % ( dataset, attention_tag, n_iter, enc_dim, dec_dim, z_dim, lr_str) pickle_file = subdir + "/" + longname + ".pkl" print("\nRunning experiment %s" % longname) print(" dataset: %s" % dataset) print(" subdirectory: %s" % subdir) print(" learning rate: %g" % learning_rate) print(" attention: %s" % attention) print(" n_iterations: %d" % n_iter) print(" encoder dimension: %d" % enc_dim) print(" z dimension: %d" % z_dim) print(" decoder dimension: %d" % dec_dim) print(" batch size: %d" % batch_size) print(" epochs: %d" % epochs) print() #---------------------------------------------------------------------- encoder_rnn = LSTM(dim=enc_dim, name="RNN_enc", **rnninits) decoder_rnn = LSTM(dim=dec_dim, name="RNN_dec", **rnninits) encoder_mlp = MLP([Identity()], [(read_dim + dec_dim), 4 * enc_dim], name="MLP_enc", **inits) decoder_mlp = MLP([Identity()], [z_dim, 4 * dec_dim], name="MLP_dec", **inits) q_sampler = Qsampler(input_dim=enc_dim, output_dim=z_dim, **inits) draw = DrawModel(n_iter, reader=reader, encoder_mlp=encoder_mlp, encoder_rnn=encoder_rnn, sampler=q_sampler, decoder_mlp=decoder_mlp, decoder_rnn=decoder_rnn, writer=writer) draw.initialize() #------------------------------------------------------------------------ x = tensor.matrix('features') #x_recons = 1. + x x_recons, kl_terms = draw.reconstruct(x) #x_recons, _, _, _, _ = draw.silly(x, n_steps=10, batch_size=100) #x_recons = x_recons[-1,:,:] #samples = draw.sample(100) #x_recons = samples[-1, :, :] #x_recons = samples[-1, :, :] recons_term = BinaryCrossEntropy().apply(x, x_recons) recons_term.name = "recons_term" cost = recons_term + kl_terms.sum(axis=0).mean() cost.name = "nll_bound" #------------------------------------------------------------ cg = ComputationGraph([cost]) params = VariableFilter(roles=[PARAMETER])(cg.variables) algorithm = GradientDescent( cost=cost, params=params, step_rule=CompositeRule([ StepClipping(10.), Adam(learning_rate), ]) #step_rule=RMSProp(learning_rate), #step_rule=Momentum(learning_rate=learning_rate, momentum=0.95) ) #algorithm.add_updates(scan_updates) #------------------------------------------------------------------------ # Setup monitors monitors = [cost] for t in range(n_iter): kl_term_t = kl_terms[t, :].mean() kl_term_t.name = "kl_term_%d" % t #x_recons_t = T.nnet.sigmoid(c[t,:,:]) #recons_term_t = BinaryCrossEntropy().apply(x, x_recons_t) #recons_term_t = recons_term_t.mean() #recons_term_t.name = "recons_term_%d" % t monitors += [kl_term_t] train_monitors = monitors[:] train_monitors += [aggregation.mean(algorithm.total_gradient_norm)] train_monitors += [aggregation.mean(algorithm.total_step_norm)] # Live plotting... plot_channels = [ ["train_nll_bound", "test_nll_bound"], ["train_kl_term_%d" % t for t in range(n_iter)], #["train_recons_term_%d" % t for t in range(n_iter)], ["train_total_gradient_norm", "train_total_step_norm"] ] #------------------------------------------------------------ if not os.path.exists(subdir): os.makedirs(subdir) main_loop = MainLoop( model=Model(cost), data_stream=train_stream, algorithm=algorithm, extensions=[ Timing(), FinishAfter(after_n_epochs=epochs), TrainingDataMonitoring(train_monitors, prefix="train", after_epoch=True), # DataStreamMonitoring( # monitors, # valid_stream, ## updates=scan_updates, # prefix="valid"), DataStreamMonitoring( monitors, test_stream, # updates=scan_updates, prefix="test"), Checkpoint(name, before_training=False, after_epoch=True, save_separately=['log', 'model']), #Checkpoint(image_size=image_size, save_subdir=subdir, path=pickle_file, before_training=False, after_epoch=True, save_separately=['log', 'model']), Plot(name, channels=plot_channels), ProgressBar(), Printing() ]) if oldmodel is not None: print("Initializing parameters with old model %s" % oldmodel) with open(oldmodel, "rb") as f: oldmodel = pickle.load(f) main_loop.model.set_param_values(oldmodel.get_param_values()) del oldmodel main_loop.run()