def test_checkpoint_save_separately_paths(): class FakeMainLoop(object): def __init__(self): self.foo = 'abcdef' self.bar = {'a': 1} self.baz = 351921 chkpt = Checkpoint(path='myweirdmodel.picklebarrel', save_separately=['foo', 'bar']) expected = {'foo': 'myweirdmodel_foo.picklebarrel', 'bar': 'myweirdmodel_bar.picklebarrel'} assert chkpt.save_separately_filenames(chkpt.path) == expected expected = {'foo': 'notmodelpath_foo', 'bar': 'notmodelpath_bar'} assert chkpt.save_separately_filenames('notmodelpath') == expected
def create_main_loop(): model, bn_model, bn_updates = create_models() ali, = bn_model.top_bricks discriminator_loss, generator_loss = bn_model.outputs step_rule = Adam(learning_rate=LEARNING_RATE, beta1=BETA1) algorithm = ali_algorithm(discriminator_loss, ali.discriminator_parameters, step_rule, generator_loss, ali.generator_parameters, step_rule) algorithm.add_updates(bn_updates) streams = create_gaussian_mixture_data_streams( batch_size=BATCH_SIZE, monitoring_batch_size=MONITORING_BATCH_SIZE, means=MEANS, variances=VARIANCES, priors=PRIORS) main_loop_stream, train_monitor_stream, valid_monitor_stream = streams bn_monitored_variables = ([ v for v in bn_model.auxiliary_variables if 'norm' not in v.name ] + bn_model.outputs) monitored_variables = ( [v for v in model.auxiliary_variables if 'norm' not in v.name] + model.outputs) extensions = [ Timing(), FinishAfter(after_n_epochs=NUM_EPOCHS), DataStreamMonitoring(bn_monitored_variables, train_monitor_stream, prefix="train", updates=bn_updates), DataStreamMonitoring(monitored_variables, valid_monitor_stream, prefix="valid"), Checkpoint(os.path.join(self._work_dir, "main_loop.tar"), after_epoch=True, after_training=True, use_cpickle=True), ProgressBar(), Printing(), #ModelLogger(folder=self._work_dir, after_epoch=True), GraphLogger(num_modes=1, num_samples=2500, dimension=2, r=0, std=1, folder=self._work_dir, after_epoch=True, after_training=True), MetricLogger(means=MEANS, variances=VARIANCES, folder=self._work_dir, after_epoch=True) ] main_loop = MainLoop(model=bn_model, data_stream=main_loop_stream, algorithm=algorithm, extensions=extensions) return main_loop
def test_save_the_best(): with NamedTemporaryFile(dir=config.temp_dir) as dst,\ NamedTemporaryFile(dir=config.temp_dir) as dst_best: track_cost = TrackTheBest("cost", after_epoch=False, after_batch=True) main_loop = MockMainLoop( extensions=[FinishAfter(after_n_epochs=1), WriteCostExtension(), track_cost, Checkpoint(dst.name, after_batch=True, save_separately=['log']) .add_condition( ["after_batch"], OnLogRecord(track_cost.notification_name), (dst_best.name,))]) main_loop.run() assert main_loop.log[4]['saved_to'] == (dst.name, dst_best.name) assert main_loop.log[5]['saved_to'] == (dst.name, dst_best.name) assert main_loop.log[6]['saved_to'] == (dst.name,) with open(dst_best.name, 'rb') as src: assert load(src).log.status['iterations_done'] == 5 root, ext = os.path.splitext(dst_best.name) log_path = root + "_log" + ext with open(log_path, 'rb') as src: assert cPickle.load(src).status['iterations_done'] == 5
def create_main_loop(save_path): model, bn_model, bn_updates = create_models() ali, = bn_model.top_bricks discriminator_loss, generator_loss = bn_model.outputs step_rule = Adam(learning_rate=LEARNING_RATE, beta1=BETA1) algorithm = ali_algorithm(discriminator_loss, ali.discriminator_parameters, step_rule, generator_loss, ali.generator_parameters, step_rule) algorithm.add_updates(bn_updates) streams = create_celeba_data_streams(BATCH_SIZE, MONITORING_BATCH_SIZE) main_loop_stream, train_monitor_stream, valid_monitor_stream = streams bn_monitored_variables = ( [v for v in bn_model.auxiliary_variables if 'norm' not in v.name] + bn_model.outputs) monitored_variables = ( [v for v in model.auxiliary_variables if 'norm' not in v.name] + model.outputs) extensions = [ Timing(), FinishAfter(after_n_epochs=NUM_EPOCHS), DataStreamMonitoring( bn_monitored_variables, train_monitor_stream, prefix="train", updates=bn_updates), DataStreamMonitoring( monitored_variables, valid_monitor_stream, prefix="valid"), Checkpoint(save_path, after_epoch=True, after_training=True, use_cpickle=True), ProgressBar(), Printing(), ] main_loop = MainLoop(model=bn_model, data_stream=main_loop_stream, algorithm=algorithm, extensions=extensions) return main_loop
def prepare_opti(cost, test, *args): model = Model(cost) logger.info("Model created") algorithm = GradientDescent(cost=cost, parameters=model.parameters, step_rule=Adam(learning_rate=0.0015), on_unused_sources='ignore') to_monitor = [algorithm.cost] if args: to_monitor.extend(args) extensions = [ FinishAfter(after_n_epochs=nb_epoch), FinishIfNoImprovementAfter(notification_name='loglikelihood_nat', epochs=patience), TrainingDataMonitoring(to_monitor, prefix="train", after_epoch=True), DataStreamMonitoring(to_monitor, test_stream, prefix="test"), Printing(), ProgressBar(), ApplyMask(before_first_epoch=True, after_batch=True), Checkpoint(check, every_n_epochs=save_every), SaveModel(name=path + '/' + 'pixelcnn_{}'.format(dataset), every_n_epochs=save_every), GenerateSamples(every_n_epochs=save_every), #Checkpoint(path+'/'+'exp.log', save_separately=['log'],every_n_epochs=save_every), ] if resume: logger.info("Restoring from previous checkpoint") extensions = [Load(path + '/' + check)] return model, algorithm, extensions
def main(save_to, num_batches): mlp = MLP([Tanh(), Identity()], [1, 10, 1], weights_init=IsotropicGaussian(0.01), biases_init=Constant(0), seed=1) mlp.initialize() x = tensor.vector('numbers') y = tensor.vector('roots') cost = SquaredError().apply(y[:, None], mlp.apply(x[:, None])) cost.name = "cost" main_loop = MainLoop( GradientDescent(cost=cost, parameters=ComputationGraph(cost).parameters, step_rule=Scale(learning_rate=0.001)), get_data_stream(range(100)), model=Model(cost), extensions=[ Timing(), FinishAfter(after_n_batches=num_batches), DataStreamMonitoring([cost], get_data_stream(range(100, 200)), prefix="test"), TrainingDataMonitoring([cost], after_epoch=True), Checkpoint(save_to), Printing() ]) main_loop.run() return main_loop
def test_checkpoint_save_separately_paths(): class FakeMainLoop(object): def __init__(self): self.foo = 'abcdef' self.bar = {'a': 1} self.baz = 351921 chkpt = Checkpoint(path='myweirdmodel.picklebarrel', save_separately=['foo', 'bar']) expected = { 'foo': 'myweirdmodel_foo.picklebarrel', 'bar': 'myweirdmodel_bar.picklebarrel' } assert chkpt.save_separately_filenames(chkpt.path) == expected expected = {'foo': 'notmodelpath_foo', 'bar': 'notmodelpath_bar'} assert chkpt.save_separately_filenames('notmodelpath') == expected
def main(save_to, num_epochs): mlp = MLP([Tanh(), Softmax()], [784, 100, 10], weights_init=IsotropicGaussian(0.01), biases_init=Constant(0)) mlp.initialize() x = tensor.matrix('features') y = tensor.lmatrix('targets') probs = mlp.apply(tensor.flatten(x, outdim=2)) cost = CategoricalCrossEntropy().apply(y.flatten(), probs) error_rate = MisclassificationRate().apply(y.flatten(), probs) cg = ComputationGraph([cost]) W1, W2 = VariableFilter(roles=[WEIGHT])(cg.variables) cost = cost + .00005 * (W1**2).sum() + .00005 * (W2**2).sum() cost.name = 'final_cost' mnist_train = MNIST(("train", )) mnist_test = MNIST(("test", )) algorithm = GradientDescent(cost=cost, parameters=cg.parameters, step_rule=Scale(learning_rate=0.1)) extensions = [ Timing(), FinishAfter(after_n_epochs=num_epochs), DataStreamMonitoring([cost, error_rate], Flatten(DataStream.default_stream( mnist_test, iteration_scheme=SequentialScheme( mnist_test.num_examples, 500)), which_sources=('features', )), prefix="test"), TrainingDataMonitoring([ cost, error_rate, aggregation.mean(algorithm.total_gradient_norm) ], prefix="train", after_epoch=True), Checkpoint(save_to), Printing() ] if BLOCKS_EXTRAS_AVAILABLE: extensions.append( Plot('MNIST example', channels=[[ 'test_final_cost', 'test_misclassificationrate_apply_error_rate' ], ['train_total_gradient_norm']])) main_loop = MainLoop(algorithm, Flatten(DataStream.default_stream( mnist_train, iteration_scheme=SequentialScheme( mnist_train.num_examples, 50)), which_sources=('features', )), model=Model(cost), extensions=extensions) main_loop.run()
def test_add_list_condition(): extension_list = Checkpoint('extension_list').add_condition( ['before_first_epoch', 'after_epoch'], OnLogRecord('notification_name'), ('dest_path.kl',)) extension_iter = Checkpoint('extension_iter') extension_iter.add_condition( ['before_first_epoch'], OnLogRecord('notification_name'), ('dest_path.kl',)) extension_iter.add_condition( ['after_epoch'], OnLogRecord('notification_name'), ('dest_path.kl',)) assert len(extension_list._conditions) == len(extension_iter._conditions) assert_raises(ValueError, extension_iter.add_condition, callbacks_names='after_epoch', predicate=OnLogRecord('notification_name'), arguments=('dest_path.kl',))
def run(model_name): running_on_laptop = socket.gethostname() == 'yop' X = tensor.tensor4('image_features', dtype='float32') T = tensor.matrix('targets', dtype='float32') image_border_size = 100 if running_on_laptop: host_plot = 'http://*****:*****@ %s' % (model_name, datetime.datetime.now(), socket.gethostname()), channels=[['loss', 'valid_loss_test'], ['valid_error']], after_epoch=True, server_url=host_plot), Printing(), Checkpoint('train2') ] main_loop = MainLoop(data_stream=train_stream, algorithm=algorithm, extensions=extensions) main_loop.run()
def create_main_loop(save_path): model, bn_model, bn_updates = create_models() ali, = bn_model.top_bricks discriminator_loss, generator_loss = bn_model.outputs step_rule = Adam(learning_rate=LEARNING_RATE, beta1=BETA1) algorithm = ali_algorithm(discriminator_loss, ali.discriminator_parameters, step_rule, generator_loss, ali.generator_parameters, step_rule) algorithm.add_updates(bn_updates) streams = create_cifar10_data_streams(BATCH_SIZE, MONITORING_BATCH_SIZE) main_loop_stream, train_monitor_stream, valid_monitor_stream = streams for d in main_loop_stream.get_epoch_iterator(as_dict=True): print(d.keys) print(d['features'].shape, d['features'].dtype) break main_loop_stream = ShapesDataset(num_examples=600, img_size=32, min_diameter=3, seed=1234).create_stream(batch_size=BATCH_SIZE, is_train=True) for d in main_loop_stream.get_epoch_iterator(as_dict=True): print(d.keys) print(d['features'].shape, d['features'].dtype) break train_monitor_stream = ShapesDataset(num_examples=100, img_size=32, min_diameter=3, seed=1234).create_stream(batch_size=BATCH_SIZE, is_train=False) valid_monitor_stream = ShapesDataset(num_examples=100, img_size=32, min_diameter=3, seed=5678).create_stream(batch_size=BATCH_SIZE, is_train=False) bn_monitored_variables = ( [v for v in bn_model.auxiliary_variables if 'norm' not in v.name] + bn_model.outputs) monitored_variables = ( [v for v in model.auxiliary_variables if 'norm' not in v.name] + model.outputs) extensions = [ Timing(), FinishAfter(after_n_epochs=NUM_EPOCHS), DataStreamMonitoring( bn_monitored_variables, train_monitor_stream, prefix="train", updates=bn_updates), DataStreamMonitoring( monitored_variables, valid_monitor_stream, prefix="valid"), Checkpoint(save_path, after_epoch=True, after_training=True, use_cpickle=True), ProgressBar(), Printing(), ] main_loop = MainLoop(model=bn_model, data_stream=main_loop_stream, algorithm=algorithm, extensions=extensions) return main_loop
def run(): streams = create_celeba_streams(training_batch_size=100, monitoring_batch_size=500, include_targets=True) main_loop_stream = streams[0] train_monitor_stream = streams[1] valid_monitor_stream = streams[2] cg, bn_dropout_cg = create_training_computation_graphs() # Compute parameter updates for the batch normalization population # statistics. They are updated following an exponential moving average. pop_updates = get_batch_normalization_updates(bn_dropout_cg) decay_rate = 0.05 extra_updates = [(p, m * decay_rate + p * (1 - decay_rate)) for p, m in pop_updates] # Prepare algorithm step_rule = Adam() algorithm = GradientDescent(cost=bn_dropout_cg.outputs[0], parameters=bn_dropout_cg.parameters, step_rule=step_rule) algorithm.add_updates(extra_updates) # Prepare monitoring cost = bn_dropout_cg.outputs[0] cost.name = 'cost' train_monitoring = DataStreamMonitoring( [cost], train_monitor_stream, prefix="train", before_first_epoch=False, after_epoch=False, after_training=True, updates=extra_updates) cost, accuracy = cg.outputs cost.name = 'cost' accuracy.name = 'accuracy' monitored_quantities = [cost, accuracy] valid_monitoring = DataStreamMonitoring( monitored_quantities, valid_monitor_stream, prefix="valid", before_first_epoch=False, after_epoch=False, every_n_epochs=5) # Prepare checkpoint checkpoint = Checkpoint( 'celeba_classifier.zip', every_n_epochs=5, use_cpickle=True) extensions = [Timing(), FinishAfter(after_n_epochs=50), train_monitoring, valid_monitoring, checkpoint, Printing(), ProgressBar()] main_loop = MainLoop(data_stream=main_loop_stream, algorithm=algorithm, extensions=extensions) main_loop.run()
def main(save_to, num_epochs, batch_size): mlp = MLP([Tanh(), Tanh(), Tanh(), Softmax()], [3072, 4096, 1024, 512, 10], weights_init=IsotropicGaussian(0.01), biases_init=Constant(0)) mlp.initialize() x = tt.tensor4('features', dtype='float32') y = tt.vector('label', dtype='int32') probs = mlp.apply(x.reshape((-1, 3072))) cost = CategoricalCrossEntropy().apply(y, probs) error_rate = MisclassificationRate().apply(y, probs) cg = ComputationGraph([cost]) ws = VariableFilter(roles=[WEIGHT])(cg.variables) cost = cost + .00005 * sum(([(w**2).sum() for w in ws])) cost.name = 'final_cost' train_dataset = Cifar10Dataset(data_dir='/home/belohlavek/data/cifar10', is_train=True) valid_dataset = Cifar10Dataset(data_dir='/home/belohlavek/data/cifar10', is_train=False) train_stream = train_dataset.get_stream(batch_size) valid_stream = valid_dataset.get_stream(batch_size) algorithm = GradientDescent(cost=cost, parameters=cg.parameters, step_rule=Adam(learning_rate=0.001)) extensions = [ Timing(), LogExtension('/home/belohlavek/ALI/mlp.log'), FinishAfter(after_n_epochs=num_epochs), DataStreamMonitoring([cost, error_rate], valid_stream, prefix="test"), TrainingDataMonitoring([ cost, error_rate, aggregation.mean(algorithm.total_gradient_norm) ], prefix="train", after_epoch=True), Checkpoint(save_to), Printing() ] main_loop = MainLoop(algorithm, train_stream, model=Model(cost), extensions=extensions) main_loop.run()
def test_load(): # Create a main loop and checkpoint it mlp = MLP(activations=[None], dims=[10, 10], weights_init=Constant(1.), use_bias=False) mlp.initialize() W = mlp.linear_transformations[0].W x = tensor.vector('data') cost = mlp.apply(x).mean() data = numpy.random.rand(10, 10).astype(theano.config.floatX) data_stream = IterableDataset(data).get_example_stream() main_loop = MainLoop(data_stream=data_stream, algorithm=GradientDescent(cost=cost, parameters=[W]), extensions=[ FinishAfter(after_n_batches=5), Checkpoint('myweirdmodel.picklebarrel') ]) main_loop.run() # Load the parameters, log and iteration state old_value = W.get_value() W.set_value(old_value * 2) main_loop = MainLoop(model=Model(cost), data_stream=data_stream, algorithm=GradientDescent(cost=cost, parameters=[W]), extensions=[ Load('myweirdmodel.picklebarrel', load_iteration_state=True, load_log=True) ]) main_loop.extensions[0].main_loop = main_loop main_loop._run_extensions('before_training') assert_allclose(W.get_value(), old_value) # Make sure things work too if the model was never saved before main_loop = MainLoop(model=Model(cost), data_stream=data_stream, algorithm=GradientDescent(cost=cost, parameters=[W]), extensions=[ Load('mynonexisting.picklebarrel', load_iteration_state=True, load_log=True) ]) main_loop.extensions[0].main_loop = main_loop main_loop._run_extensions('before_training')
def construct_main_loop(name, task_name, patch_shape, batch_size, n_spatial_dims, n_patches, n_epochs, learning_rate, hyperparameters, **kwargs): name = "%s_%s" % (name, task_name) hyperparameters["name"] = name task = get_task(**hyperparameters) hyperparameters["n_channels"] = task.n_channels x_uncentered, y = task.get_variables() x = task.preprocess(x_uncentered) # this is a theano variable; it may depend on the batch hyperparameters["image_shape"] = x.shape[-n_spatial_dims:] ram = construct_model(task=task, **hyperparameters) ram.initialize() hs = ram.compute(x, n_patches) cost = ram.emitter.cost(hs, y, n_patches) cost.name = "cost" print "setting up main loop..." graph = ComputationGraph(cost) uselessflunky = Model(cost) algorithm = GradientDescent(cost=cost, parameters=graph.parameters, step_rule=Adam(learning_rate=learning_rate)) monitors = construct_monitors( x=x, x_uncentered=x_uncentered, y=y, hs=hs, cost=cost, algorithm=algorithm, task=task, model=uselessflunky, ram=ram, graph=graph, **hyperparameters) main_loop = MainLoop(data_stream=task.get_stream("train"), algorithm=algorithm, extensions=(monitors + [FinishAfter(after_n_epochs=n_epochs), DumpMinimum(name+'_best', channel_name='valid_error_rate'), Dump(name+'_dump', every_n_epochs=10), Checkpoint(name+'_checkpoint.pkl', every_n_epochs=10, on_interrupt=False), ProgressBar(), Timing(), Printing(), PrintingTo(name+"_log")]), model=uselessflunky) return main_loop
def test_add_list_condition(): extension_list = Checkpoint('extension_list').add_condition( ['before_first_epoch', 'after_epoch'], OnLogRecord('notification_name'), ('dest_path.kl', )) extension_iter = Checkpoint('extension_iter') extension_iter.add_condition(['before_first_epoch'], OnLogRecord('notification_name'), ('dest_path.kl', )) extension_iter.add_condition(['after_epoch'], OnLogRecord('notification_name'), ('dest_path.kl', )) assert len(extension_list._conditions) == len(extension_iter._conditions) assert_raises(ValueError, extension_iter.add_condition, callbacks_names='after_epoch', predicate=OnLogRecord('notification_name'), arguments=('dest_path.kl', ))
def test_save_the_best(): skip_if_configuration_set('log_backend', 'sqlite', "Known to be flaky with SQLite log backend.") with NamedTemporaryFile(dir=config.temp_dir) as dst,\ NamedTemporaryFile(dir=config.temp_dir) as dst_best: track_cost = TrackTheBest("cost", after_epoch=False, after_batch=True) main_loop = MockMainLoop(extensions=[ FinishAfter(after_n_epochs=1), WriteCostExtension(), track_cost, Checkpoint(dst.name, after_batch=True, save_separately=['log']). add_condition(["after_batch"], OnLogRecord(track_cost.notification_name), ( dst_best.name, )) ]) main_loop.run() assert main_loop.log[4]['saved_to'] == (dst.name, dst_best.name) assert main_loop.log[5]['saved_to'] == (dst.name, dst_best.name) assert main_loop.log[6]['saved_to'] == (dst.name, ) with open(dst_best.name, 'rb') as src: assert load(src).log.status['iterations_done'] == 5
def setUp(self): """Create main loop and run it.""" mlp = MLP(activations=[None], dims=[10, 10], weights_init=Constant(1.), use_bias=False) mlp.initialize() self.W = mlp.linear_transformations[0].W x = tensor.vector('data') cost = mlp.apply(x).mean() data = numpy.random.rand(10, 10).astype(theano.config.floatX) self.data_stream = IterableDataset(data).get_example_stream() self.model = Model(cost) self.algorithm = GradientDescent(cost=cost, parameters=[self.W]) self.main_loop = MainLoop(model=self.model, data_stream=self.data_stream, algorithm=self.algorithm, extensions=[ FinishAfter(after_n_batches=5), Checkpoint('myweirdmodel.tar', save_separately=['log']) ]) self.main_loop.run()
def train(self, data_file, output_data_file, n_epochs=0): training_data = dataset.T_H5PYDataset(data_file, which_sets=('train',)) test_data = dataset.T_H5PYDataset(data_file, which_sets=('test',)) session = Session(root_url='http://localhost:5006') if self.MainLoop is None: step_rules = [RMSProp(learning_rate=0.2, decay_rate=0.95), StepClipping(1)] algorithm = GradientDescent(cost=self.Cost, parameters=self.ComputationGraph.parameters, step_rule=CompositeRule(step_rules), on_unused_sources='ignore') train_stream = DataStream.default_stream( training_data, iteration_scheme=SequentialScheme( training_data.num_examples, batch_size=100)) test_stream = DataStream.default_stream( test_data, iteration_scheme=SequentialScheme( test_data.num_examples, batch_size=100)) self.MainLoop = MainLoop( model=Model(self.Cost), data_stream=train_stream, algorithm=algorithm, extensions=[ FinishAfter(after_n_epochs=n_epochs), Printing(), Checkpoint(output_data_file, every_n_epochs=50), TrainingDataMonitoring([self.Cost], after_batch=True, prefix='train'), DataStreamMonitoring([self.Cost], after_batch=True, data_stream=test_stream, prefix='test'), Plot(output_data_file, channels=[['train_cost', 'test_cost']]) ]) self.MainLoop.run()
def train(self, training_data): step_rules = [Adam(), StepClipping(1.0)] algorithm = GradientDescent( cost=self.Cost, parameters=self.ComputationGraph.parameters, step_rule=CompositeRule(step_rules)) train_stream = DataStream.default_stream( training_data, iteration_scheme=SequentialScheme(training_data.num_examples, batch_size=20)) main = MainLoop(model=Model(self.Cost), data_stream=train_stream, algorithm=algorithm, extensions=[ FinishAfter(), Printing(), Checkpoint('trainingdata.tar', every_n_epochs=10) ]) main.run()
########### ALGORITHM of training############# algorithm = GradientDescent(cost=cost, parameters=cg.parameters, step_rule=Adam(learning_rate=0.0005)) extensions = [ Timing(), FinishAfter(after_n_epochs=num_epochs), DataStreamMonitoring([cost, error_rate, error_rate2], stream_valid, prefix="valid"), TrainingDataMonitoring( [cost, error_rate, aggregation.mean(algorithm.total_gradient_norm)], prefix="train", after_epoch=True), Checkpoint("catsVsDogs256.pkl"), ProgressBar(), Printing() ] #Adding a live plot with the bokeh server extensions.append( Plot('CatsVsDogs_256', channels=[['train_error_rate', 'valid_error_rate'], ['valid_cost', 'valid_error_rate2'], ['train_total_gradient_norm']], after_epoch=True)) model = Model(cost) main_loop = MainLoop(algorithm, stream_train,
valid_monitor = DataStreamMonitoring(monitoring_variables, valid_stream, every_n_batches=n_batches, prefix="valid") extensions = [ ProgressBar(), Timing(every_n_batches=n_batches), train_monitor, valid_monitor, TrackTheBest('valid_nll', every_n_batches=n_batches), Plot(save_dir + "progress/" + experiment_name + ".png", [['train_nll', 'valid_nll'], ['valid_learning_rate']], every_n_batches=n_batches, email=False), Checkpoint(save_dir + "pkl/best_" + experiment_name + ".pkl", use_cpickle=True).add_condition( ['after_batch'], predicate=OnLogRecord('valid_nll_best_so_far')), Printing(every_n_batches=n_batches), Flush(every_n_batches=n_batches, before_first_epoch=True), LearningRateSchedule(lr, 'valid_nll', path=save_dir + "pkl/best_" + experiment_name + ".pkl", states=states.values(), every_n_batches=n_batches, before_first_epoch=True) ] main_loop = MainLoop(model=model, data_stream=train_stream, algorithm=algorithm,
def main(save_to, model, train, test, num_epochs, input_size = (150,150), learning_rate=0.01, batch_size=50, num_batches=None, flatten_stream=False): """ save_to : where to save trained model model : model given in input must be already initialised (works with convnet and mlp) input_size : the shape of the reshaped image in input (before flattening is applied if flatten_stream is True) """ if flatten_stream : x = tensor.matrix('image_features') else : x = tensor.tensor4('image_features') y = tensor.lmatrix('targets') #Data augmentation #insert data augmentation here #Generating stream train_stream = DataStream.default_stream( train, iteration_scheme=ShuffledScheme(train.num_examples, batch_size) ) test_stream = DataStream.default_stream( test, iteration_scheme=ShuffledScheme(test.num_examples, batch_size) ) #Reshaping procedure #Add a crop option in scikitresize so that the image is not deformed #Resize to desired square shape train_stream = ScikitResize(train_stream, input_size, which_sources=('image_features',)) test_stream = ScikitResize(test_stream, input_size, which_sources=('image_features',)) #Flattening the stream if flatten_stream is True: train_stream = Flatten(train_stream, which_sources=('image_features',)) test_stream = Flatten(test_stream, which_sources=('image_features',)) # Apply input to model probs = model.apply(x) #Defining cost and various indices to watch #print(probs) #cost = SquaredError().apply(y.flatten(),probs) cost = CategoricalCrossEntropy().apply(y.flatten(), probs).copy(name='cost') error_rate = MisclassificationRate().apply(y.flatten(), probs).copy( name='error_rate') #Building Computation Graph cg = ComputationGraph([cost, error_rate]) # Train with simple SGD algorithm = GradientDescent( cost=cost, parameters=cg.parameters, step_rule=Scale(learning_rate=learning_rate)) #Defining extensions extensions = [Timing(), FinishAfter(after_n_epochs=num_epochs, after_n_batches=num_batches), TrainingDataMonitoring([cost, error_rate,aggregation.mean(algorithm.total_gradient_norm)], prefix="train", every_n_batches=5), DataStreamMonitoring([cost, error_rate],test_stream,prefix="test", every_n_batches=25), Checkpoint(save_to), ProgressBar(), Printing(every_n_batches=5)] # `Timing` extension reports time for reading data, aggregating a batch # and monitoring; # `ProgressBar` displays a nice progress bar during training. model = Model(cost) main_loop = MainLoop( algorithm, train_stream, model=model, extensions=extensions) main_loop.run()
def main(save_to): batch_size = 365 feature_maps = [6, 16] mlp_hiddens = [120, 84] conv_sizes = [5, 5] pool_sizes = [2, 2] image_size = (28, 28) output_size = 10 # The above are from LeCun's paper. The blocks example had: # feature_maps = [20, 50] # mlp_hiddens = [500] # Use ReLUs everywhere and softmax for the final prediction conv_activations = [Rectifier() for _ in feature_maps] mlp_activations = [Rectifier() for _ in mlp_hiddens] + [Softmax()] convnet = LeNet(conv_activations, 1, image_size, filter_sizes=zip(conv_sizes, conv_sizes), feature_maps=feature_maps, pooling_sizes=zip(pool_sizes, pool_sizes), top_mlp_activations=mlp_activations, top_mlp_dims=mlp_hiddens + [output_size], border_mode='valid', weights_init=Uniform(width=.2), biases_init=Constant(0)) # We push initialization config to set different initialization schemes # for convolutional layers. convnet.push_initialization_config() convnet.layers[0].weights_init = Uniform(width=.2) convnet.layers[1].weights_init = Uniform(width=.09) convnet.top_mlp.linear_transformations[0].weights_init = Uniform(width=.08) convnet.top_mlp.linear_transformations[1].weights_init = Uniform(width=.11) convnet.initialize() logging.info("Input dim: {} {} {}".format( *convnet.children[0].get_dim('input_'))) for i, layer in enumerate(convnet.layers): if isinstance(layer, Activation): logging.info("Layer {} ({})".format( i, layer.__class__.__name__)) else: logging.info("Layer {} ({}) dim: {} {} {}".format( i, layer.__class__.__name__, *layer.get_dim('output'))) x = tensor.tensor4('features') # Normalize input and apply the convnet probs = convnet.apply(x) cg = ComputationGraph([probs]) outs = VariableFilter( roles=[OUTPUT], bricks=[Convolutional, Linear])(cg.variables) # Create an interior activation model model = Model([probs] + outs) # Load it with trained parameters params = load_parameters(open(save_to, 'rb')) model.set_parameter_values(params) algorithm = MaximumActivationSearch(outputs=outs) # Use the mnist test set, unshuffled mnist_test = MNIST(("test",), sources=['features']) mnist_test_stream = DataStream.default_stream( mnist_test, iteration_scheme=SequentialScheme( mnist_test.num_examples, batch_size)) extensions = [Timing(), FinishAfter(after_n_epochs=1), DataStreamMonitoring( [], mnist_test_stream, prefix="test"), Checkpoint("maxact.tar"), ProgressBar(), Printing()] main_loop = MainLoop( algorithm, mnist_test_stream, model=model, extensions=extensions) main_loop.run() examples = mnist_test.get_example_stream() example = examples.get_data(0)[0] layers = convnet.layers for output, record in algorithm.maximum_activations.items(): layer = get_brick(output) activations, indices, snapshots = ( r.get_value() if r else None for r in record[1:]) filmstrip = Filmstrip( example.shape[-2:], (indices.shape[1], indices.shape[0]), background='blue') if layer in layers: fieldmap = layerarray_fieldmap(layers[0:layers.index(layer) + 1]) for unit in range(indices.shape[1]): for index in range(100): mask = make_mask(example.shape[-2:], fieldmap, numpy.clip( snapshots[index, unit, :, :], 0, numpy.inf)) imagenum = indices[index, unit, 0] filmstrip.set_image((unit, index), examples.get_data(imagenum)[0], mask) else: for unit in range(indices.shape[1]): for index in range(100): imagenum = indices[index, unit] filmstrip.set_image((unit, index), examples.get_data(imagenum)[0]) filmstrip.save(layer.name + '_maxact.jpg')
def construct_main_loop(name, task_name, patch_shape, batch_size, n_spatial_dims, n_patches, max_epochs, patience_epochs, learning_rate, gradient_limiter, hyperparameters, **kwargs): task = tasks.get_task(**hyperparameters) hyperparameters["n_channels"] = task.n_channels extensions = [] # let theta noise decay as training progresses for key in "location_std scale_std".split(): hyperparameters[key] = theano.shared(hyperparameters[key], name=key) extensions.append( util.ExponentialDecay(hyperparameters[key], hyperparameters["%s_decay" % key], after_batch=True)) print "constructing graphs..." graphs, outputs, updates = construct_graphs(task=task, **hyperparameters) print "setting up main loop..." from blocks.model import Model model = Model(outputs["train"]["cost"]) from blocks.algorithms import GradientDescent, CompositeRule, StepClipping, Adam, RMSProp from extensions import Compressor if gradient_limiter == "clip": limiter = StepClipping(1.) elif gradient_limiter == "compress": limiter = Compressor() else: raise ValueError() algorithm = GradientDescent( cost=outputs["train"]["cost"], parameters=graphs["train"].parameters, step_rule=CompositeRule([limiter, Adam(learning_rate=learning_rate)])) algorithm.add_updates(updates["train"]) extensions.extend( construct_monitors(algorithm=algorithm, task=task, model=model, graphs=graphs, outputs=outputs, updates=updates, **hyperparameters)) from blocks.extensions import FinishAfter, Printing, ProgressBar, Timing from blocks.extensions.stopping import FinishIfNoImprovementAfter from blocks.extensions.training import TrackTheBest from blocks.extensions.saveload import Checkpoint from dump import DumpBest, LightCheckpoint, PrintingTo, DumpGraph, DumpLog extensions.extend([ TrackTheBest("valid_error_rate", "best_valid_error_rate"), FinishIfNoImprovementAfter("best_valid_error_rate", epochs=patience_epochs), FinishAfter(after_n_epochs=max_epochs), DumpBest("best_valid_error_rate", name + "_best.zip"), Checkpoint(hyperparameters["checkpoint_save_path"], on_interrupt=False, every_n_epochs=10, use_cpickle=True), DumpLog("log.pkl", after_epoch=True), ProgressBar(), Timing(), Printing(), PrintingTo(name + "_log"), DumpGraph(name + "_grad_graph") ]) from blocks.main_loop import MainLoop main_loop = MainLoop(data_stream=task.get_stream("train"), algorithm=algorithm, extensions=extensions, model=model) from tabulate import tabulate print "parameter sizes:" print tabulate( (key, "x".join(map(str, value.get_value().shape)), value.get_value().size) for key, value in main_loop.model.get_parameter_dict().items()) return main_loop
def test_checkpoint_exception(self): """Check checkpoint exception.""" checkpoint = Checkpoint(None, save_separately=["foo"]) checkpoint.main_loop = self.main_loop self.assertRaises(AttributeError, checkpoint.do, None)
def main(name, dataset, epochs, batch_size, learning_rate, attention, n_iter, enc_dim, dec_dim, z_dim, oldmodel): image_size, data_train, data_valid, data_test = datasets.get_data(dataset) train_stream = Flatten( DataStream(data_train, iteration_scheme=SequentialScheme(data_train.num_examples, batch_size))) valid_stream = Flatten( DataStream(data_valid, iteration_scheme=SequentialScheme(data_valid.num_examples, batch_size))) test_stream = Flatten( DataStream(data_test, iteration_scheme=SequentialScheme(data_test.num_examples, batch_size))) if name is None: name = dataset img_height, img_width = image_size x_dim = img_height * img_width rnninits = { #'weights_init': Orthogonal(), 'weights_init': IsotropicGaussian(0.01), 'biases_init': Constant(0.), } inits = { #'weights_init': Orthogonal(), 'weights_init': IsotropicGaussian(0.01), 'biases_init': Constant(0.), } if attention != "": read_N, write_N = attention.split(',') read_N = int(read_N) write_N = int(write_N) read_dim = 2 * read_N**2 reader = AttentionReader(x_dim=x_dim, dec_dim=dec_dim, width=img_width, height=img_height, N=read_N, **inits) writer = AttentionWriter(input_dim=dec_dim, output_dim=x_dim, width=img_width, height=img_height, N=write_N, **inits) attention_tag = "r%d-w%d" % (read_N, write_N) else: read_dim = 2 * x_dim reader = Reader(x_dim=x_dim, dec_dim=dec_dim, **inits) writer = Writer(input_dim=dec_dim, output_dim=x_dim, **inits) attention_tag = "full" #---------------------------------------------------------------------- # Learning rate def lr_tag(value): """ Convert a float into a short tag-usable string representation. E.g.: 0.1 -> 11 0.01 -> 12 0.001 -> 13 0.005 -> 53 """ exp = np.floor(np.log10(value)) leading = ("%e" % value)[0] return "%s%d" % (leading, -exp) lr_str = lr_tag(learning_rate) subdir = time.strftime("%Y%m%d-%H%M%S") + "-" + name longname = "%s-%s-t%d-enc%d-dec%d-z%d-lr%s" % ( dataset, attention_tag, n_iter, enc_dim, dec_dim, z_dim, lr_str) pickle_file = subdir + "/" + longname + ".pkl" print("\nRunning experiment %s" % longname) print(" dataset: %s" % dataset) print(" subdirectory: %s" % subdir) print(" learning rate: %g" % learning_rate) print(" attention: %s" % attention) print(" n_iterations: %d" % n_iter) print(" encoder dimension: %d" % enc_dim) print(" z dimension: %d" % z_dim) print(" decoder dimension: %d" % dec_dim) print(" batch size: %d" % batch_size) print(" epochs: %d" % epochs) print() #---------------------------------------------------------------------- encoder_rnn = LSTM(dim=enc_dim, name="RNN_enc", **rnninits) decoder_rnn = LSTM(dim=dec_dim, name="RNN_dec", **rnninits) encoder_mlp = MLP([Identity()], [(read_dim + dec_dim), 4 * enc_dim], name="MLP_enc", **inits) decoder_mlp = MLP([Identity()], [z_dim, 4 * dec_dim], name="MLP_dec", **inits) q_sampler = Qsampler(input_dim=enc_dim, output_dim=z_dim, **inits) draw = DrawModel(n_iter, reader=reader, encoder_mlp=encoder_mlp, encoder_rnn=encoder_rnn, sampler=q_sampler, decoder_mlp=decoder_mlp, decoder_rnn=decoder_rnn, writer=writer) draw.initialize() #------------------------------------------------------------------------ x = tensor.matrix('features') #x_recons = 1. + x x_recons, kl_terms = draw.reconstruct(x) #x_recons, _, _, _, _ = draw.silly(x, n_steps=10, batch_size=100) #x_recons = x_recons[-1,:,:] #samples = draw.sample(100) #x_recons = samples[-1, :, :] #x_recons = samples[-1, :, :] recons_term = BinaryCrossEntropy().apply(x, x_recons) recons_term.name = "recons_term" cost = recons_term + kl_terms.sum(axis=0).mean() cost.name = "nll_bound" #------------------------------------------------------------ cg = ComputationGraph([cost]) params = VariableFilter(roles=[PARAMETER])(cg.variables) algorithm = GradientDescent( cost=cost, params=params, step_rule=CompositeRule([ StepClipping(10.), Adam(learning_rate), ]) #step_rule=RMSProp(learning_rate), #step_rule=Momentum(learning_rate=learning_rate, momentum=0.95) ) #algorithm.add_updates(scan_updates) #------------------------------------------------------------------------ # Setup monitors monitors = [cost] for t in range(n_iter): kl_term_t = kl_terms[t, :].mean() kl_term_t.name = "kl_term_%d" % t #x_recons_t = T.nnet.sigmoid(c[t,:,:]) #recons_term_t = BinaryCrossEntropy().apply(x, x_recons_t) #recons_term_t = recons_term_t.mean() #recons_term_t.name = "recons_term_%d" % t monitors += [kl_term_t] train_monitors = monitors[:] train_monitors += [aggregation.mean(algorithm.total_gradient_norm)] train_monitors += [aggregation.mean(algorithm.total_step_norm)] # Live plotting... plot_channels = [ ["train_nll_bound", "test_nll_bound"], ["train_kl_term_%d" % t for t in range(n_iter)], #["train_recons_term_%d" % t for t in range(n_iter)], ["train_total_gradient_norm", "train_total_step_norm"] ] #------------------------------------------------------------ if not os.path.exists(subdir): os.makedirs(subdir) main_loop = MainLoop( model=Model(cost), data_stream=train_stream, algorithm=algorithm, extensions=[ Timing(), FinishAfter(after_n_epochs=epochs), TrainingDataMonitoring(train_monitors, prefix="train", after_epoch=True), # DataStreamMonitoring( # monitors, # valid_stream, ## updates=scan_updates, # prefix="valid"), DataStreamMonitoring( monitors, test_stream, # updates=scan_updates, prefix="test"), Checkpoint(name, before_training=False, after_epoch=True, save_separately=['log', 'model']), #Checkpoint(image_size=image_size, save_subdir=subdir, path=pickle_file, before_training=False, after_epoch=True, save_separately=['log', 'model']), Plot(name, channels=plot_channels), ProgressBar(), Printing() ]) if oldmodel is not None: print("Initializing parameters with old model %s" % oldmodel) with open(oldmodel, "rb") as f: oldmodel = pickle.load(f) main_loop.model.set_param_values(oldmodel.get_param_values()) del oldmodel main_loop.run()
### Gradient Descent algorithm = GradientDescent(cost=cost, parameters=cg.parameters, step_rule=Scale(learning_rate=learning_rate)) extensions = [Timing(), FinishAfter(after_n_epochs=num_epochs), DataStreamMonitoring( [cost, error_rate, error_rate2], data_valid_stream, prefix="valid"), TrainingDataMonitoring( [cost, error_rate, aggregation.mean(algorithm.total_gradient_norm)], prefix="train", after_epoch=True), Checkpoint(save_to), ProgressBar(), Printing()] ### Plotting extensions if mode == ("GPU_run" or "data_server"): try: from plot import Plot extensions.append(Plot('%s %s @ %s' % (graph_name, datetime.datetime.now(), socket.gethostname()), channels=[['train_error_rate', 'valid_error_rate'], ['train_total_gradient_norm']], after_epoch=True, server_url=host_plot)) PLOT_AVAILABLE = True except ImportError: PLOT_AVAILABLE = False extensions.append(Checkpoint(save_to, after_epoch=True, after_training=True, save_separately=['log']))
hidden_dim=args.num_hidden))) #, num_examples=1000))) if not args.baseline: save_str += 'BN_' if args.zoneout: save_str += 'zoneout_' + str(args.drop_prob) + '-' if args.elephant: save_str += 'elephant_' + str(args.drop_prob) + '-' extensions.extend([ TrackTheBest("valid_training_error_rate", "best_valid_training_error_rate"), #DumpBest("best_valid_training_error_rate", "best.zip"), FinishAfter(after_n_epochs=args.num_epochs), #FinishIfNoImprovementAfter("best_valid_error_rate", epochs=50), Checkpoint(save_str + "checkpoint.zip", on_interrupt=False, every_n_epochs=1, use_cpickle=True), DumpLog(save_str + "log.pkl", after_epoch=True) ]) if not args.cluster: extensions.append(ProgressBar()) extensions.extend([ Timing(), Printing(), PrintingTo("log"), ]) train_stream = get_stream(which_set="train", for_evaluation=False, batch_size=args.batch_size,
#***************** main loop **************** train_monitor = TrainingDataMonitoring([cost_monitor], prefix="train") val_monitor = DataStreamMonitoring([cost_monitor], stream_val, prefix="val") if conf.task=='CTC': PER_Monitor = PhonemeErrorRate elif conf.task=='framewise': PER_Monitor = PhonemeErrorRateFramewise else: raise ValueError, conf.task per_val_monitor = PER_Monitor(stream_val, theano.function([x,x_m],y_hat_softmax), before_first_epoch=True,after_epoch=True, prefix='valPER') per_test_monitor = PER_Monitor(stream_test, theano.function([x,x_m],y_hat_softmax), before_first_epoch=False, every_n_epochs=5, prefix='testPER') checkpoint=Checkpoint(conf.path_to_model, after_training=False) checkpoint.add_condition(['after_epoch'], predicate=predicates.OnLogRecord('valid_log_p_best_so_far')) extensions=[val_monitor, train_monitor, per_val_monitor, per_test_monitor, Timing(), FinishAfter(after_n_epochs=conf.max_epochs), checkpoint, Printing(), TrackTheBest(record_name='val_monitor',notification_name='valid_log_p_best_so_far'), FinishIfNoImprovementAfter(notification_name='valid_log_p_best_so_far', epochs=conf.epochs_early_stopping), ] main_loop = MainLoop( algorithm = algorithm, data_stream = stream_train,
def train_lstm(train, test, input_dim, hidden_dimension, columns, epochs, save_file, execution_name, batch_size, plot): stream_train = build_stream(train, batch_size, columns) stream_test = build_stream(test, batch_size, columns) # The train stream will return (TimeSequence, BatchSize, Dimensions) for # and the train test will return (TimeSequence, BatchSize, 1) x = T.tensor3('x') y = T.tensor3('y') y = y.reshape((y.shape[1], y.shape[0], y.shape[2])) # input_dim = 6 # output_dim = 1 linear_lstm = LinearLSTM(input_dim, 1, hidden_dimension, # print_intermediate=True, print_attrs=['__str__', 'shape']) y_hat = linear_lstm.apply(x) linear_lstm.initialize() c_test = AbsolutePercentageError().apply(y, y_hat) c_test.name = 'mape' c = SquaredError().apply(y, y_hat) c.name = 'cost' cg = ComputationGraph(c_test) def one_perc_min(current_value, best_value): if (1 - best_value / current_value) > 0.01: return best_value else: return current_value extensions = [] extensions.append(DataStreamMonitoring(variables=[c, c_test], data_stream=stream_test, prefix='test', after_epoch=False, every_n_epochs=100)) extensions.append(TrainingDataMonitoring(variables=[c_test], prefix='train', after_epoch=True)) extensions.append(FinishAfter(after_n_epochs=epochs)) # extensions.append(Printing()) # extensions.append(ProgressBar()) extensions.append(TrackTheBest('test_mape', choose_best=one_perc_min)) extensions.append(TrackTheBest('test_cost', choose_best=one_perc_min)) extensions.append(FinishIfNoImprovementAfter('test_cost_best_so_far', epochs=500)) # Save only parameters, not the whole main loop and only when best_test_cost is updated checkpoint = Checkpoint(save_file, save_main_loop=False, after_training=False) checkpoint.add_condition(['after_epoch'], predicate=OnLogRecord('test_cost_best_so_far')) extensions.append(checkpoint) if BOKEH_AVAILABLE and plot: extensions.append(Plot(execution_name, channels=[[ # 'train_cost', 'test_cost']])) step_rule = Adam() algorithm = GradientDescent(cost=c_test, parameters=cg.parameters, step_rule=step_rule) main_loop = MainLoop(algorithm, stream_train, model=Model(c_test), extensions=extensions) main_loop.run() test_mape = 0 if main_loop.log.status.get('best_test_mape', None) is None: with open(save_file, 'rb') as f: parameters = load_parameters(f) model = main_loop.model model.set_parameter_values(parameters) ev = DatasetEvaluator([c_test]) test_mape = ev.evaluate(stream_test)['mape'] else: test_mape = main_loop.log.status['best_test_mape'] return test_mape, main_loop.log.status['epochs_done']
def build_and_run(save_to,modelconfig,experimentconfig): """ part of this is adapted from lasagne tutorial""" n, num_filters, image_size, num_blockstack = modelconfig['depth'], modelconfig['num_filters'], modelconfig['image_size'], modelconfig['num_blockstack'] print("Amount of bottlenecks: %d" % n) # Prepare Theano variables for inputs and targets input_var = T.tensor4('image_features') #target_value = T.ivector('targets') target_var = T.lmatrix('targets') target_vec = T.extra_ops.to_one_hot(target_var[:,0],2) #target_var = T.matrix('targets') # Create residual net model print("Building model...") network = build_cnn(input_var, image_size, n, num_blockstack, num_filters) get_info(network) prediction = lasagne.utils.as_theano_expression(lasagne.layers.get_output(network)) test_prediction = lasagne.utils.as_theano_expression(lasagne.layers.get_output(network,deterministic=True)) # Loss function -> The objective to minimize print("Instanciation of loss function...") #loss = CategoricalCrossEntropy().apply(target_var.flatten(), prediction) #test_loss = CategoricalCrossEntropy().apply(target_var.flatten(), test_prediction) # loss = lasagne.objectives.categorical_crossentropy(prediction, target_var.flatten()).mean() # test_loss = lasagne.objectives.categorical_crossentropy(test_prediction, target_var.flatten()).mean() loss = lasagne.objectives.squared_error(prediction,target_vec).mean() test_loss = lasagne.objectives.squared_error(test_prediction,target_vec).mean() # loss = tensor.nnet.binary_crossentropy(prediction, target_var).mean() # test_loss = tensor.nnet.binary_crossentropy(test_prediction, target_var).mean() test_loss.name = "loss" # loss.name = 'x-ent_error' # loss.name = 'sqr_error' layers = lasagne.layers.get_all_layers(network) #l1 and l2 regularization #pondlayers = {x:0.000025 for i,x in enumerate(layers)} #l1_penality = lasagne.regularization.regularize_layer_params_weighted(pondlayers, lasagne.regularization.l2) #l2_penality = lasagne.regularization.regularize_layer_params(layers[len(layers)/4:], lasagne.regularization.l1) * 25e-6 #reg_penalty = l1_penality + l2_penality #reg_penalty.name = 'reg_penalty' #loss = loss + reg_penalty loss.name = 'reg_loss' error_rate = MisclassificationRate().apply(target_var.flatten(), test_prediction).copy( name='error_rate') # Load the dataset print("Loading data...") istest = 'test' in experimentconfig.keys() if istest: print("Using test stream") train_stream, valid_stream, test_stream = get_stream(experimentconfig['batch_size'],image_size,test=istest) # Defining step rule and algorithm if 'step_rule' in experimentconfig.keys() and not experimentconfig['step_rule'] is None : step_rule = experimentconfig['step_rule'](learning_rate=experimentconfig['learning_rate']) else : step_rule=Scale(learning_rate=experimentconfig['learning_rate']) params = map(lasagne.utils.as_theano_expression,lasagne.layers.get_all_params(network, trainable=True)) print("Initializing algorithm") algorithm = GradientDescent( cost=loss, gradients={var:T.grad(loss,var) for var in params},#parameters=cg.parameters, #params step_rule=step_rule) #algorithm.add_updates(extra_updates) grad_norm = aggregation.mean(algorithm.total_gradient_norm) grad_norm.name = "grad_norm" print("Initializing extensions...") plot = Plot(save_to, channels=[['train_loss','valid_loss'], ['train_grad_norm'], #['train_grad_norm','train_reg_penalty'], ['train_error_rate','valid_error_rate']], server_url='http://hades.calculquebec.ca:5042') checkpoint = Checkpoint('models/best_'+save_to+'.tar') # checkpoint.add_condition(['after_n_batches=25'], checkpoint.add_condition(['after_epoch'], predicate=OnLogRecord('valid_error_rate_best_so_far')) #Defining extensions extensions = [Timing(), FinishAfter(after_n_epochs=experimentconfig['num_epochs'], after_n_batches=experimentconfig['num_batches']), TrainingDataMonitoring([test_loss, error_rate, grad_norm], # reg_penalty], prefix="train", after_epoch=True), #after_n_epochs=1 DataStreamMonitoring([test_loss, error_rate],valid_stream,prefix="valid", after_epoch=True), #after_n_epochs=1 plot, #Checkpoint(save_to,after_n_epochs=5), #ProgressBar(), # Plot(save_to, channels=[['train_loss','valid_loss'], ['train_error_rate','valid_error_rate']], server_url='http://hades.calculquebec.ca:5042'), #'grad_norm' # after_batch=True), Printing(after_epoch=True), TrackTheBest('valid_error_rate',min), #Keep best checkpoint, #Save best FinishIfNoImprovementAfter('valid_error_rate_best_so_far', epochs=5)] # Early-stopping # model = Model(loss) # print("Model",model) main_loop = MainLoop( algorithm, train_stream, # model=model, extensions=extensions) print("Starting main loop...") main_loop.run()
step_rule = Momentum(learning_rate=0.001, momentum=0.5) #step_rule = Adam(learning_rate=0.001, beta1=0.9, beta2=0.01, epsilon=0.0001) algorithm = GradientDescent(cost=cost, parameters=NICE.params, step_rule=step_rule) monitor_variables = [cost] #monitor = DataStreamMonitoring(variables=[cost, prior_term, jacobian_term], # data_stream=valid_monitor_stream, # prefix="train") model = Model(cost) checkpoint = Checkpoint(model_save, after_training=False) checkpoint.add_condition(['after_epoch'], predicate=OnLogRecord('valid_negative_ll_best_so_far')) main_loop = MainLoop(model=model, data_stream=train_data_stream, algorithm=algorithm, extensions=[Timing(), DataStreamMonitoring( monitor_variables, train_monitor_stream, prefix="train"), DataStreamMonitoring( monitor_variables, valid_monitor_stream, prefix="valid"), FinishAfter(after_n_epochs=n_epochs), TrackTheBest('valid_negative_ll'), Printing(), ProgressBar(),
def build_and_run(label, config): ############## CREATE THE NETWORK ############### #Define the parameters num_epochs, num_batches, num_channels, image_shape, filter_size, num_filter, pooling_sizes, mlp_hiddens, output_size, batch_size, activation, mlp_activation = config['num_epochs'], config['num_batches'], config['num_channels'], config['image_shape'], config['filter_size'], config['num_filter'], config['pooling_sizes'], config['mlp_hiddens'], config['output_size'], config['batch_size'], config['activation'], config['mlp_activation'] # print(num_epochs, num_channels, image_shape, filter_size, num_filter, pooling_sizes, mlp_hiddens, output_size, batch_size, activation, mlp_activation) lambda_l1 = 0.000025 lambda_l2 = 0.000025 print("Building model") #Create the symbolics variable x = T.tensor4('image_features') y = T.lmatrix('targets') #Get the parameters conv_parameters = zip(filter_size, num_filter) #Create the convolutions layers conv_layers = list(interleave([(Convolutional( filter_size=filter_size, num_filters=num_filter, name='conv_{}'.format(i)) for i, (filter_size, num_filter) in enumerate(conv_parameters)), (activation), (MaxPooling(size, name='pool_{}'.format(i)) for i, size in enumerate(pooling_sizes))])) # (AveragePooling(size, name='pool_{}'.format(i)) for i, size in enumerate(pooling_sizes))])) #Create the sequence conv_sequence = ConvolutionalSequence(conv_layers, num_channels, image_size=image_shape, weights_init=Uniform(width=0.2), biases_init=Constant(0.)) #Initialize the convnet conv_sequence.initialize() #Add the MLP top_mlp_dims = [np.prod(conv_sequence.get_dim('output'))] + mlp_hiddens + [output_size] out = Flattener().apply(conv_sequence.apply(x)) mlp = MLP(mlp_activation, top_mlp_dims, weights_init=Uniform(0, 0.2), biases_init=Constant(0.)) #Initialisze the MLP mlp.initialize() #Get the output predict = mlp.apply(out) cost = CategoricalCrossEntropy().apply(y.flatten(), predict).copy(name='cost') error = MisclassificationRate().apply(y.flatten(), predict) #Little trick to plot the error rate in two different plots (We can't use two time the same data in the plot for a unknow reason) error_rate = error.copy(name='error_rate') error_rate2 = error.copy(name='error_rate2') ########### REGULARIZATION ################## cg = ComputationGraph([cost]) weights = VariableFilter(roles=[WEIGHT])(cg.variables) biases = VariableFilter(roles=[BIAS])(cg.variables) # # l2_penalty_weights = T.sum([i*lambda_l2/len(weights) * (W ** 2).sum() for i,W in enumerate(weights)]) # Gradually increase penalty for layer l2_penalty = T.sum([lambda_l2 * (W ** 2).sum() for i,W in enumerate(weights+biases)]) # Gradually increase penalty for layer # # #l2_penalty_bias = T.sum([lambda_l2*(B **2).sum() for B in biases]) # # #l2_penalty = l2_penalty_weights + l2_penalty_bias l2_penalty.name = 'l2_penalty' l1_penalty = T.sum([lambda_l1*T.abs_(z).sum() for z in weights+biases]) # l1_penalty_weights = T.sum([i*lambda_l1/len(weights) * T.abs_(W).sum() for i,W in enumerate(weights)]) # Gradually increase penalty for layer # l1_penalty_biases = T.sum([lambda_l1 * T.abs_(B).sum() for B in biases]) # l1_penalty = l1_penalty_biases + l1_penalty_weights l1_penalty.name = 'l1_penalty' costreg = cost + l2_penalty + l1_penalty costreg.name = 'costreg' ########### DEFINE THE ALGORITHM ############# # algorithm = GradientDescent(cost=cost, parameters=cg.parameters, step_rule=Momentum()) algorithm = GradientDescent(cost=costreg, parameters=cg.parameters, step_rule=Adam()) ########### GET THE DATA ##################### istest = 'test' in config.keys() train_stream, valid_stream, test_stream = get_stream(batch_size,image_shape,test=istest) ########### INITIALIZING EXTENSIONS ########## checkpoint = Checkpoint('models/best_'+label+'.tar') checkpoint.add_condition(['after_epoch'], predicate=OnLogRecord('valid_error_rate_best_so_far')) #Adding a live plot with the bokeh server plot = Plot(label, channels=[['train_error_rate', 'valid_error_rate'], ['valid_cost', 'valid_error_rate2'], # ['train_costreg','train_grad_norm']], # ['train_costreg','train_total_gradient_norm','train_l2_penalty','train_l1_penalty']], server_url="http://hades.calculquebec.ca:5042") grad_norm = aggregation.mean(algorithm.total_gradient_norm) grad_norm.name = 'grad_norm' extensions = [Timing(), FinishAfter(after_n_epochs=num_epochs, after_n_batches=num_batches), DataStreamMonitoring([cost, error_rate, error_rate2], valid_stream, prefix="valid"), TrainingDataMonitoring([costreg, error_rate, error_rate2, grad_norm,l2_penalty,l1_penalty], prefix="train", after_epoch=True), plot, ProgressBar(), Printing(), TrackTheBest('valid_error_rate',min), #Keep best checkpoint, #Save best FinishIfNoImprovementAfter('valid_error_rate_best_so_far', epochs=4)] # Early-stopping model = Model(cost) main_loop = MainLoop(algorithm,data_stream=train_stream,model=model,extensions=extensions) main_loop.run()
def build_and_run(experimentconfig, modelconfig, save_to=None): #modelconfig, """ part of this is adapted from lasagne tutorial""" # Prepare Theano variables for inputs and targets input_var = T.tensor4('image_features') target_var = T.lmatrix('targets') target_vec = T.extra_ops.to_one_hot(target_var[:,0],2) # Create vgg model print("Building model...") image_size = modelconfig['image_size'] network = vgg16.build_small_model() prediction = lasagne.utils.as_theano_expression(lasagne.layers.get_output(network["prob"],input_var)) # test_prediction = lasagne.layers.get_output(network["prob"],input_var,deterministic=True) # Loss function -> The objective to minimize print("Instanciation of loss function...") # loss = lasagne.objectives.categorical_crossentropy(prediction, target_var.flatten()) loss = lasagne.objectives.squared_error(prediction,target_vec) # test_loss = lasagne.objectives.squared_error(test_prediction,target_vec) loss = loss.mean() # layers = network.values() #l1 and l2 regularization # pondlayers = {x:0.01 for x in layers} # l1_penality = lasagne.regularization.regularize_layer_params_weighted(pondlayers, lasagne.regularization.l2) # l2_penality = lasagne.regularization.regularize_layer_params(layers[len(layers)/4:], lasagne.regularization.l1) * 1e-4 # reg_penalty = l1_penality + l2_penality # reg_penalty.name = 'reg_penalty' #loss = loss + reg_penalty loss.name = 'loss' error_rate = MisclassificationRate().apply(target_var.flatten(), prediction).copy( name='error_rate') # Load the dataset print("Loading data...") if 'test' in experimentconfig.keys() and experimentconfig['test'] is True: train_stream, valid_stream, test_stream = get_stream(experimentconfig['batch_size'],image_size,test=True) else : train_stream, valid_stream, test_stream = get_stream(experimentconfig['batch_size'],image_size,test=False) # Defining step rule and algorithm if 'step_rule' in experimentconfig.keys() and not experimentconfig['step_rule'] is None : step_rule = experimentconfig['step_rule'](learning_rate=experimentconfig['learning_rate']) else : step_rule=Scale(learning_rate=experimentconfig['learning_rate']) params = map(lasagne.utils.as_theano_expression,lasagne.layers.get_all_params(network['prob'], trainable=True)) algorithm = GradientDescent( cost=loss, gradients={var:T.grad(loss,var) for var in params}, step_rule=step_rule) grad_norm = aggregation.mean(algorithm.total_gradient_norm) grad_norm.name='grad_norm' print("Initializing extensions...") plot = Plot(save_to, channels=[['train_loss','valid_loss','train_grad_norm'],['train_error_rate','valid_error_rate']], server_url='http://hades.calculquebec.ca:5042') checkpoint = Checkpoint('models/best_'+save_to+'.tar') # checkpoint.add_condition(['after_n_batches=25'], checkpoint.add_condition(['after_epoch'], predicate=OnLogRecord('valid_error_rate_best_so_far')) #Defining extensions extensions = [Timing(), FinishAfter(after_n_epochs=experimentconfig['num_epochs'], after_n_batches=experimentconfig['num_batches']), TrainingDataMonitoring([loss, error_rate, grad_norm, reg_penalty], prefix="train", after_epoch=True), #after_n_epochs=1 DataStreamMonitoring([loss, error_rate],valid_stream,prefix="valid", after_epoch=True), #after_n_epochs=1 #Checkpoint(save_to,after_n_epochs=5), #ProgressBar(), plot, # after_batch=True), Printing(after_epoch=True), TrackTheBest('valid_error_rate',min), #Keep best checkpoint, #Save best FinishIfNoImprovementAfter('valid_error_rate_best_so_far', epochs=5)] # Early-stopping # model = Model(ComputationGraph(network)) main_loop = MainLoop( algorithm, train_stream, # model=model, extensions=extensions) print("Starting main loop...") main_loop.run()
def main(save_to, num_epochs, regularization=0.0003, subset=None, num_batches=None, histogram=None, resume=False): batch_size = 500 output_size = 10 convnet = create_lenet_5() layers = convnet.layers x = tensor.tensor4('features') y = tensor.lmatrix('targets') # Normalize input and apply the convnet probs = convnet.apply(x) cost = (CategoricalCrossEntropy().apply(y.flatten(), probs) .copy(name='cost')) components = (ComponentwiseCrossEntropy().apply(y.flatten(), probs) .copy(name='components')) error_rate = (MisclassificationRate().apply(y.flatten(), probs) .copy(name='error_rate')) confusion = (ConfusionMatrix().apply(y.flatten(), probs) .copy(name='confusion')) confusion.tag.aggregation_scheme = Sum(confusion) cg = ComputationGraph([cost, error_rate, components]) # Apply regularization to the cost weights = VariableFilter(roles=[WEIGHT])(cg.variables) l2_norm = sum([(W ** 2).sum() for W in weights]) l2_norm.name = 'l2_norm' cost = cost + regularization * l2_norm cost.name = 'cost_with_regularization' if subset: start = 30000 - subset // 2 mnist_train = MNIST(("train",), subset=slice(start, start+subset)) else: mnist_train = MNIST(("train",)) mnist_train_stream = DataStream.default_stream( mnist_train, iteration_scheme=ShuffledScheme( mnist_train.num_examples, batch_size)) mnist_test = MNIST(("test",)) mnist_test_stream = DataStream.default_stream( mnist_test, iteration_scheme=ShuffledScheme( mnist_test.num_examples, batch_size)) # Train with simple SGD algorithm = GradientDescent( cost=cost, parameters=cg.parameters, step_rule=AdaDelta(decay_rate=0.99)) # `Timing` extension reports time for reading data, aggregating a batch # and monitoring; # `ProgressBar` displays a nice progress bar during training. extensions = [Timing(), FinishAfter(after_n_epochs=num_epochs, after_n_batches=num_batches), DataStreamMonitoring( [cost, error_rate, confusion], mnist_test_stream, prefix="test"), TrainingDataMonitoring( [cost, error_rate, l2_norm, aggregation.mean(algorithm.total_gradient_norm)], prefix="train", after_epoch=True), Checkpoint(save_to), ProgressBar(), Printing()] if histogram: attribution = AttributionExtension( components=components, parameters=cg.parameters, components_size=output_size, after_batch=True) extensions.insert(0, attribution) if resume: extensions.append(Load(save_to, True, True)) model = Model(cost) main_loop = MainLoop( algorithm, mnist_train_stream, model=model, extensions=extensions) main_loop.run() if histogram: save_attributions(attribution, filename=histogram) with open('execution-log.json', 'w') as outfile: json.dump(main_loop.log, outfile, cls=NumpyEncoder)
parameters=cg.parameters, step_rule=Momentum(learning_rate=0.0001, momentum=0.9)) extensions = [ Timing(), FinishAfter(after_n_epochs=num_epochs), TrainingDataMonitoring( [cost, error_rate, aggregation.mean(algorithm.total_gradient_norm)], prefix="train", after_epoch=True), DataStreamMonitoring([cost, error_rate, error_rate2], stream_valid, prefix="valid", after_epoch=True), Checkpoint("google_Ortho2_pretrain2_l0001.pkl", after_epoch=True), ProgressBar(), Printing() ] #Adding a live plot with the bokeh server extensions.append( Plot('CatsVsDogs160_GoogleNet_Reload2_l0001', channels=[['train_error_rate', 'valid_error_rate'], ['valid_cost', 'valid_error_rate2'], ['train_total_gradient_norm']], after_batch=True)) params = load_parameter_values('GoogleParameters.pkl') model = Model(cost) model.set_parameter_values(params)
valid_monitor = DataStreamMonitoring( [cost], valid_stream, after_epoch=True, #before_first_epoch = False, prefix="valid") extensions = extensions = [ Timing(every_n_batches=n_batches), train_monitor, valid_monitor, TrackTheBest('valid_nll', after_epoch=True), Plot(save_dir + experiment_name + ".png", [['train_nll', 'valid_nll']], every_n_batches=4 * n_batches, email=True), Checkpoint(save_dir + experiment_name + ".pkl", use_cpickle=True, every_n_batches=n_batches * 8, after_epoch=True), Checkpoint(save_dir + "best_" + experiment_name + ".pkl", after_epoch=True, use_cpickle=True).add_condition( ['after_epoch'], predicate=OnLogRecord('valid_nll_best_so_far')), Printing(every_n_batches=n_batches, after_epoch=True), FinishAfter(after_n_epochs=2), SaveComputationGraph(emit) ] main_loop = MainLoop(model=model, data_stream=train_stream, algorithm=algorithm, extensions=extensions)
def main(args): """Run experiment. """ lr_tag = float_tag(args.learning_rate) x_dim, train_stream, valid_stream, test_stream = datasets.get_streams( args.data, args.batch_size) #------------------------------------------------------------ # Setup model deterministic_act = Tanh deterministic_size = 1. if args.method == 'vae': sizes_tag = args.layer_spec.replace(",", "-") layer_sizes = [int(i) for i in args.layer_spec.split(",")] layer_sizes, z_dim = layer_sizes[:-1], layer_sizes[-1] name = "%s-%s-%s-lr%s-spl%d-%s" % \ (args.data, args.method, args.name, lr_tag, args.n_samples, sizes_tag) if args.activation == "tanh": hidden_act = Tanh() elif args.activation == "logistic": hidden_act = Logistic() elif args.activation == "relu": hidden_act = Rectifier() else: raise "Unknown hidden nonlinearity %s" % args.hidden_act model = VAE(x_dim=x_dim, hidden_layers=layer_sizes, hidden_act=hidden_act, z_dim=z_dim, batch_norm=args.batch_normalization) model.initialize() elif args.method == 'dvae': sizes_tag = args.layer_spec.replace(",", "-") layer_sizes = [int(i) for i in args.layer_spec.split(",")] layer_sizes, z_dim = layer_sizes[:-1], layer_sizes[-1] name = "%s-%s-%s-lr%s-spl%d-%s" % \ (args.data, args.method, args.name, lr_tag, args.n_samples, sizes_tag) if args.activation == "tanh": hidden_act = Tanh() elif args.activation == "logistic": hidden_act = Logistic() elif args.activation == "relu": hidden_act = Rectifier() else: raise "Unknown hidden nonlinearity %s" % args.hidden_act model = DVAE(x_dim=x_dim, hidden_layers=layer_sizes, hidden_act=hidden_act, z_dim=z_dim, batch_norm=args.batch_normalization) model.initialize() elif args.method == 'rws': sizes_tag = args.layer_spec.replace(",", "-") qbase = "" if not args.no_qbaseline else "noqb-" name = "%s-%s-%s-%slr%s-dl%d-spl%d-%s" % \ (args.data, args.method, args.name, qbase, lr_tag, args.deterministic_layers, args.n_samples, sizes_tag) p_layers, q_layers = create_layers(args.layer_spec, x_dim, args.deterministic_layers, deterministic_act, deterministic_size) model = ReweightedWakeSleep( p_layers, q_layers, qbaseline=(not args.no_qbaseline), ) model.initialize() elif args.method == 'bihm-rws': sizes_tag = args.layer_spec.replace(",", "-") name = "%s-%s-%s-lr%s-dl%d-spl%d-%s" % \ (args.data, args.method, args.name, lr_tag, args.deterministic_layers, args.n_samples, sizes_tag) p_layers, q_layers = create_layers(args.layer_spec, x_dim, args.deterministic_layers, deterministic_act, deterministic_size) model = BiHM( p_layers, q_layers, l1reg=args.l1reg, l2reg=args.l2reg, ) model.initialize() elif args.method == 'continue': import cPickle as pickle from os.path import basename, splitext with open(args.model_file, 'rb') as f: m = pickle.load(f) if isinstance(m, MainLoop): m = m.model model = m.get_top_bricks()[0] while len(model.parents) > 0: model = model.parents[0] assert isinstance(model, (BiHM, ReweightedWakeSleep, VAE)) mname, _, _ = basename(args.model_file).rpartition("_model.pkl") name = "%s-cont-%s-lr%s-spl%s" % (mname, args.name, lr_tag, args.n_samples) else: raise ValueError("Unknown training method '%s'" % args.method) #------------------------------------------------------------ x = tensor.matrix('features') #------------------------------------------------------------ # Testset monitoring train_monitors = [] valid_monitors = [] test_monitors = [] for s in [1, 10, 100, 1000]: log_p, log_ph = model.log_likelihood(x, s) log_p = -log_p.mean() log_ph = -log_ph.mean() log_p.name = "log_p_%d" % s log_ph.name = "log_ph_%d" % s #train_monitors += [log_p, log_ph] #valid_monitors += [log_p, log_ph] test_monitors += [log_p, log_ph] #------------------------------------------------------------ # Z estimation #for s in [100000]: # z2 = tensor.exp(model.estimate_log_z2(s)) / s # z2.name = "z2_%d" % s # # valid_monitors += [z2] # test_monitors += [z2] #------------------------------------------------------------ # Gradient and training monitoring if args.method in ['vae', 'dvae']: log_p_bound, gradients = model.get_gradients(x, args.n_samples) log_p_bound = -log_p_bound.mean() log_p_bound.name = "log_p_bound" cost = log_p_bound train_monitors += [ log_p_bound, named(model.kl_term.mean(), 'kl_term'), named(model.recons_term.mean(), 'recons_term') ] valid_monitors += [ log_p_bound, named(model.kl_term.mean(), 'kl_term'), named(model.recons_term.mean(), 'recons_term') ] test_monitors += [ log_p_bound, named(model.kl_term.mean(), 'kl_term'), named(model.recons_term.mean(), 'recons_term') ] else: log_p, log_ph, gradients = model.get_gradients(x, args.n_samples) log_p = -log_p.mean() log_ph = -log_ph.mean() log_p.name = "log_p" log_ph.name = "log_ph" cost = log_ph train_monitors += [log_p, log_ph] valid_monitors += [log_p, log_ph] #------------------------------------------------------------ # Detailed monitoring """ n_layers = len(p_layers) log_px, w, log_p, log_q, samples = model.log_likelihood(x, n_samples) exp_samples = [] for l in xrange(n_layers): e = (w.dimshuffle(0, 1, 'x')*samples[l]).sum(axis=1) e.name = "inference_h%d" % l e.tag.aggregation_scheme = aggregation.TakeLast(e) exp_samples.append(e) s1 = samples[1] sh1 = s1.shape s1_ = s1.reshape([sh1[0]*sh1[1], sh1[2]]) s0, _ = model.p_layers[0].sample_expected(s1_) s0 = s0.reshape([sh1[0], sh1[1], s0.shape[1]]) s0 = (w.dimshuffle(0, 1, 'x')*s0).sum(axis=1) s0.name = "inference_h0^" s0.tag.aggregation_scheme = aggregation.TakeLast(s0) exp_samples.append(s0) # Draw P-samples p_samples, _, _ = model.sample_p(100) #weights = model.importance_weights(samples) #weights = weights / weights.sum() for i, s in enumerate(p_samples): s.name = "psamples_h%d" % i s.tag.aggregation_scheme = aggregation.TakeLast(s) # samples = model.sample(100, oversample=100) for i, s in enumerate(samples): s.name = "samples_h%d" % i s.tag.aggregation_scheme = aggregation.TakeLast(s) """ cg = ComputationGraph([cost]) #------------------------------------------------------------ if args.step_rule == "momentum": step_rule = Momentum(args.learning_rate, 0.95) elif args.step_rule == "rmsprop": step_rule = RMSProp(args.learning_rate) elif args.step_rule == "adam": step_rule = Adam(args.learning_rate) else: raise "Unknown step_rule %s" % args.step_rule #parameters = cg.parameters[:4] + cg.parameters[5:] parameters = cg.parameters algorithm = GradientDescent( cost=cost, parameters=parameters, gradients=gradients, step_rule=CompositeRule([ #StepClipping(25), step_rule, #RemoveNotFinite(1.0), ])) #------------------------------------------------------------ train_monitors += [ aggregation.mean(algorithm.total_gradient_norm), aggregation.mean(algorithm.total_step_norm) ] #------------------------------------------------------------ # Live plotting? plotting_extensions = [] if args.live_plotting: plotting_extensions = [ PlotManager( name, [ Plotter(channels=[[ "valid_%s" % cost.name, "valid_log_p" ], ["train_total_gradient_norm", "train_total_step_norm"]], titles=[ "validation cost", "norm of training gradient and step" ]), DisplayImage( [ WeightDisplay(model.p_layers[0].mlp. linear_transformations[0].W, n_weights=100, image_shape=(28, 28)) ] #ImageDataStreamDisplay(test_stream, image_shape=(28,28))] ) ]) ] main_loop = MainLoop( model=Model(cost), data_stream=train_stream, algorithm=algorithm, extensions=[ Timing(), ProgressBar(), TrainingDataMonitoring( train_monitors, prefix="train", after_epoch=True), DataStreamMonitoring( valid_monitors, data_stream=valid_stream, prefix="valid"), DataStreamMonitoring(test_monitors, data_stream=test_stream, prefix="test", after_epoch=False, after_training=True, every_n_epochs=10), #SharedVariableModifier( # algorithm.step_rule.components[0].learning_rate, # half_lr_func, # before_training=False, # after_epoch=False, # after_batch=False, # every_n_epochs=half_lr), TrackTheBest('valid_%s' % cost.name), Checkpoint(name + ".pkl", save_separately=['log', 'model']), FinishIfNoImprovementAfter('valid_%s_best_so_far' % cost.name, epochs=args.patience), FinishAfter(after_n_epochs=args.max_epochs), Printing() ] + plotting_extensions) main_loop.run()