def test_training_data_monitoring(): weights = numpy.array([-1, 1], dtype=theano.config.floatX) features = [ numpy.array(f, dtype=theano.config.floatX) for f in [[1, 2], [3, 4], [5, 6]] ] targets = [(weights * f).sum() for f in features] n_batches = 3 dataset = IterableDataset(dict(features=features, targets=targets)) x = tensor.vector('features') y = tensor.scalar('targets') W = shared_floatx([0, 0], name='W') V = shared_floatx(7, name='V') W_sum = named_copy(W.sum(), 'W_sum') cost = ((x * W).sum() - y)**2 cost.name = 'cost' class TrueCostExtension(TrainingExtension): def before_batch(self, data): self.main_loop.log.current_row['true_cost'] = (( (W.get_value() * data["features"]).sum() - data["targets"])**2) main_loop = MainLoop(model=None, data_stream=dataset.get_example_stream(), algorithm=GradientDescent(cost=cost, parameters=[W], step_rule=Scale(0.001)), extensions=[ FinishAfter(after_n_epochs=1), TrainingDataMonitoring([W_sum, cost, V], prefix="train1", after_batch=True), TrainingDataMonitoring( [aggregation.mean(W_sum), cost], prefix="train2", after_epoch=True), TrueCostExtension() ]) main_loop.run() # Check monitoring of a shared varible assert_allclose(main_loop.log.current_row['train1_V'], 7.0) for i in range(n_batches): # The ground truth is written to the log before the batch is # processed, where as the extension writes after the batch is # processed. This is why the iteration numbers differs here. assert_allclose(main_loop.log[i]['true_cost'], main_loop.log[i + 1]['train1_cost']) assert_allclose( main_loop.log[n_batches]['train2_cost'], sum([main_loop.log[i]['true_cost'] for i in range(n_batches)]) / n_batches) assert_allclose( main_loop.log[n_batches]['train2_W_sum'], sum([ main_loop.log[i]['train1_W_sum'] for i in range(1, n_batches + 1) ]) / n_batches)
def create_training_data_monitoring(): if "every_n_epochs" in self.args.evaluate_every_n: return TrainingDataMonitoring(vars_to_monitor, prefix='train', after_epoch=True) else: return TrainingDataMonitoring(vars_to_monitor, prefix='train', after_epoch=True, **self.args.evaluate_every_n)
def prepare_opti(cost, test): model = Model(cost) algorithm = GradientDescent( cost=cost, parameters=model.parameters, step_rule=RMSProp(), on_unused_sources='ignore' ) extensions = [ FinishAfter(after_n_epochs=nb_epoch), FinishIfNoImprovementAfter(notification_name='test_cross_entropy', epochs=patience), TrainingDataMonitoring( [algorithm.cost], prefix="train", after_epoch=True), DataStreamMonitoring( [algorithm.cost], test_stream, prefix="test"), Printing(), ProgressBar(), #Checkpoint(path, after_epoch=True) ] if resume: print "Restoring from previous breakpoint" extensions.extend([ Load(path) ]) return model, algorithm, extensions
def main(save_to, num_batches, continue_=False): mlp = MLP([Tanh(), Identity()], [1, 10, 1], weights_init=IsotropicGaussian(0.01), biases_init=Constant(0), seed=1) mlp.initialize() x = tensor.vector('numbers') y = tensor.vector('roots') cost = SquaredError().apply(y[:, None], mlp.apply(x[:, None])) cost.name = "cost" main_loop = MainLoop( GradientDescent(cost=cost, params=ComputationGraph(cost).parameters, step_rule=Scale(learning_rate=0.001)), get_data_stream(range(100)), model=Model(cost), extensions=([LoadFromDump(save_to)] if continue_ else []) + [ Timing(), FinishAfter(after_n_batches=num_batches), DataStreamMonitoring( [cost], get_data_stream(range(100, 200)), prefix="test"), TrainingDataMonitoring([cost], after_epoch=True), Dump(save_to), Printing() ]) main_loop.run() return main_loop
def train(self): print "Loading data" datafile = self.get_datafile() nbexamples = datafile.num_examples nbexamples -= nbexamples % (self.sequence_dim * self.time_dim) train_stream = ReshapeTransformer( DataStream(dataset=datafile, iteration_scheme=ShuffledBatchChunkScheme( nbexamples, self.sequence_dim * self.time_dim)), self.sequence_dim, self.time_dim) if self.image_size is not None: train_stream = Mapping(train_stream, spec_mapping, add_sources=['spectrogram']) print "Building Theano Graph" algorithm, self.fprop = self.build_theano_functions() main_loop = MainLoop(algorithm=algorithm, data_stream=train_stream, model=self.model, extensions=[ FinishAfter(after_n_epochs=EPOCHS), TrainingDataMonitoring( [aggregation.mean(self.model.outputs[0])], prefix="train", after_epoch=True), Printing(), SaveParams(EXP_PATH + NAME, after_epoch=True) ]) main_loop.run()
def test_training_data_monitoring_updates_algorithm(): features = [ numpy.array(f, dtype=theano.config.floatX) for f in [[1, 2], [3, 5], [5, 8]] ] targets = numpy.array([f.sum() for f in features]) dataset = IterableDataset(dict(features=features, targets=targets)) x = tensor.vector('features') y = tensor.scalar('targets') m = x.mean().copy(name='features_mean') t = y.sum().copy(name='targets_sum') main_loop = MainLoop( model=None, data_stream=dataset.get_example_stream(), algorithm=UpdatesAlgorithm(), extensions=[ TrainingDataMonitoring([m, t], prefix="train1", after_batch=True) ], ) main_loop.extensions[0].main_loop = main_loop assert len(main_loop.algorithm.updates) == 0 main_loop.extensions[0].do('before_training') assert len(main_loop.algorithm.updates) > 0
def main(save_to, num_epochs): mlp = MLP([Tanh(), Softmax()], [784, 100, 10], weights_init=IsotropicGaussian(0.01), biases_init=Constant(0)) mlp.initialize() x = tensor.matrix('features') y = tensor.lmatrix('targets') probs = mlp.apply(tensor.flatten(x, outdim=2)) cost = CategoricalCrossEntropy().apply(y.flatten(), probs) error_rate = MisclassificationRate().apply(y.flatten(), probs) cg = ComputationGraph([cost]) W1, W2 = VariableFilter(roles=[WEIGHT])(cg.variables) cost = cost + .00005 * (W1**2).sum() + .00005 * (W2**2).sum() cost.name = 'final_cost' mnist_train = MNIST(("train", )) mnist_test = MNIST(("test", )) algorithm = GradientDescent(cost=cost, parameters=cg.parameters, step_rule=Scale(learning_rate=0.1)) extensions = [ Timing(), FinishAfter(after_n_epochs=num_epochs), DataStreamMonitoring([cost, error_rate], Flatten(DataStream.default_stream( mnist_test, iteration_scheme=SequentialScheme( mnist_test.num_examples, 500)), which_sources=('features', )), prefix="test"), TrainingDataMonitoring([ cost, error_rate, aggregation.mean(algorithm.total_gradient_norm) ], prefix="train", after_epoch=True), Checkpoint(save_to), Printing() ] if BLOCKS_EXTRAS_AVAILABLE: extensions.append( Plot('MNIST example', channels=[[ 'test_final_cost', 'test_misclassificationrate_apply_error_rate' ], ['train_total_gradient_norm']])) main_loop = MainLoop(algorithm, Flatten(DataStream.default_stream( mnist_train, iteration_scheme=SequentialScheme( mnist_train.num_examples, 50)), which_sources=('features', )), model=Model(cost), extensions=extensions) main_loop.run()
def prepare_opti(cost, test, *args): model = Model(cost) logger.info("Model created") algorithm = GradientDescent(cost=cost, parameters=model.parameters, step_rule=Adam(learning_rate=0.0015), on_unused_sources='ignore') to_monitor = [algorithm.cost] if args: to_monitor.extend(args) extensions = [ FinishAfter(after_n_epochs=nb_epoch), FinishIfNoImprovementAfter(notification_name='loglikelihood_nat', epochs=patience), TrainingDataMonitoring(to_monitor, prefix="train", after_epoch=True), DataStreamMonitoring(to_monitor, test_stream, prefix="test"), Printing(), ProgressBar(), ApplyMask(before_first_epoch=True, after_batch=True), Checkpoint(check, every_n_epochs=save_every), SaveModel(name=path + '/' + 'pixelcnn_{}'.format(dataset), every_n_epochs=save_every), GenerateSamples(every_n_epochs=save_every), #Checkpoint(path+'/'+'exp.log', save_separately=['log'],every_n_epochs=save_every), ] if resume: logger.info("Restoring from previous checkpoint") extensions = [Load(path + '/' + check)] return model, algorithm, extensions
def train(self): print "Loading data" datafile = self.get_datafile() nbexamples = datafile.num_examples train_stream = DataStream( dataset=datafile, iteration_scheme=OverlapSequentialScheme( nbexamples, self.time_dim)) print "Building Theano Graph" algorithm, self.fprop = self.build_theano_functions() main_loop = MainLoop( algorithm=algorithm, data_stream=train_stream, extensions=[ FinishAfter(after_n_epochs=EPOCHS), TrainingDataMonitoring( [self.model.outputs[0]], prefix="train", after_epoch=True, every_n_batches=4000), #ProgressBar(), Printing() ]) main_loop.run()
def main(): x = tensor.matrix("features") input_to_hidden1 = get_typical_layer(x, 784, 500) #hidden1_to_hidden2 = get_typical_layer(input_to_hidden1, 500, 300) hidden1_to_latent = get_typical_layer(input_to_hidden1, 500, 20) latent_to_hidden2 = get_typical_layer(hidden1_to_latent, 20, 500) #hidden3_to_hidden4 = get_typical_layer(latent_to_hidden3, 300, 500) hidden2_to_output = get_typical_layer(latent_to_hidden2, 500, 784, Logistic()) hidden2_to_output.name = "last_before_output" from blocks.bricks.cost import SquaredError, AbsoluteError, BinaryCrossEntropy from blocks.graph import ComputationGraph from blocks.algorithms import Adam, GradientDescent, Scale from blocks.roles import WEIGHT cost = BinaryCrossEntropy(name="error").apply(x, hidden2_to_output) cg = ComputationGraph(cost) weights = VariableFilter(roles=[WEIGHT]) (cg.variables) # cost += 0.0001 * tensor.sum(map(lambda x: (x**2).sum(), weights)) # cost.name = "regularized error" gd = GradientDescent(cost=cost, parameters=cg.parameters, step_rule=Adam()) from blocks.main_loop import MainLoop from blocks.extensions import FinishAfter, Printing, ProgressBar from blocks.extensions.monitoring import DataStreamMonitoring, TrainingDataMonitoring monitor = TrainingDataMonitoring([cost], after_epoch=True) main_loop = MainLoop(data_stream=get_data_stream(), algorithm=gd, extensions=[monitor, FinishAfter(after_n_epochs=5), ProgressBar(), Printing()]) main_loop.run() showcase(cg, "last_before_output")
def run(model_name): running_on_laptop = socket.gethostname() == 'yop' X = tensor.tensor4('image_features', dtype='float32') T = tensor.matrix('targets', dtype='float32') image_border_size = 100 if running_on_laptop: host_plot = 'http://*****:*****@ %s' % (model_name, datetime.datetime.now(), socket.gethostname()), channels=[['loss', 'valid_loss_test'], ['valid_error']], after_epoch=True, server_url=host_plot), Printing(), Checkpoint('train2') ] main_loop = MainLoop(data_stream=train_stream, algorithm=algorithm, extensions=extensions) main_loop.run()
def train(cost, error_rate, batch_size=100, num_epochs=150): # Setting Loggesetr timestr = time.strftime("%Y_%m_%d_at_%H_%M") save_path = 'results/memory_' + timestr log_path = os.path.join(save_path, 'log.txt') os.makedirs(save_path) fh = logging.FileHandler(filename=log_path) fh.setLevel(logging.DEBUG) logger.addHandler(fh) # Training blocks_model = Model(cost) all_params = blocks_model.parameters print "Number of found parameters:" + str(len(all_params)) print all_params training_algorithm = GradientDescent(cost=cost, parameters=all_params, step_rule=Adam(learning_rate=0.001)) # training_algorithm = GradientDescent( # cost=cost, params=all_params, # step_rule=Scale(learning_rate=model.default_lr)) monitored_variables = [cost, error_rate] # the rest is for validation # train_data_stream, valid_data_stream = get_mnist_streams( # 50000, batch_size) train_data_stream, valid_data_stream = get_mnist_video_streams(batch_size) train_monitoring = TrainingDataMonitoring(variables=monitored_variables, prefix="train", after_epoch=True) valid_monitoring = DataStreamMonitoring(variables=monitored_variables, data_stream=valid_data_stream, prefix="valid", after_epoch=True) main_loop = MainLoop( algorithm=training_algorithm, data_stream=train_data_stream, model=blocks_model, extensions=[ train_monitoring, valid_monitoring, FinishAfter(after_n_epochs=num_epochs), SaveParams('valid_misclassificationrate_apply_error_rate', blocks_model, save_path), SaveLog(save_path, after_epoch=True), ProgressBar(), Printing() ]) main_loop.run()
def __init__(self, worker, experiment, config): # Data dataset = CIFAR10('train', flatten=False) test_dataset = CIFAR10('test', flatten=False) batch_size = 128 scheme = ShuffledScheme(dataset.num_examples, batch_size) datastream = DataStream(dataset, iteration_scheme=scheme) test_scheme = ShuffledScheme(test_dataset.num_examples, batch_size) test_stream = DataStream(test_dataset, iteration_scheme=test_scheme) # Model m = ModelHelper(config) def score_func(mainloop): scores = mainloop.log.to_dataframe()["test_accur"].values return np.mean(np.sort(scores)[-4:-1]) # Algorithm cg = ComputationGraph([m.cost]) algorithm = GradientDescent(cost=m.cost, params=cg.parameters, step_rule=AdaM()) #job_name = os.path.basename(worker.running_job) job_name = os.path.basename(".") update_path = (os.path.join(os.path.join(worker.path, "updates"), job_name)) if not os.path.exists(update_path): os.mkdir(update_path) self.main_loop = MainLoop( algorithm, datastream, model=Model(m.cost), extensions=[ Timing(), TrainingDataMonitoring([m.cost, m.accur], prefix="train", after_epoch=True), DataStreamMonitoring([m.cost, m.accur], test_stream, prefix="test"), FinishAfter(after_n_epochs=1), LogToFile(os.path.join(update_path, "log.csv")), Printing(), EpochProgress(dataset.num_examples // batch_size + 1) #, DistributeUpdate(worker, every_n_epochs=1) #, DistributeWhetlabFinish(worker, experiment, score_func) #, Plot('cifar10', #channels=[['train_cost', 'test_cost'], ['train_accur', 'test_accur']]) ])
def monitor_stream(self, stream, prefix, **kwargs): variables = self.monitored_vars + self.get_quantitites_vars() if stream == self.train_stream: monitor = TrainingDataMonitoring(variables, prefix=prefix, **kwargs) else: monitor = DataStreamMonitoring(variables, data_stream=stream, prefix=prefix, **kwargs) self.extensions.insert(0, monitor)
def main(save_to, num_epochs): mlp = MLP([Tanh(), Softmax()], [784, 100, 10], weights_init=IsotropicGaussian(0.01), biases_init=Constant(0)) mlp.initialize() x = tensor.matrix('features') y = tensor.lmatrix('targets') probs = mlp.apply(x) cost = CategoricalCrossEntropy().apply(y.flatten(), probs) error_rate = MisclassificationRate().apply(y.flatten(), probs) cg = ComputationGraph([cost]) W1, W2 = VariableFilter(roles=[WEIGHTS])(cg.variables) cost = cost + .00005 * (W1**2).sum() + .00005 * (W2**2).sum() cost.name = 'final_cost' mnist_train = MNIST("train") mnist_test = MNIST("test") algorithm = GradientDescent(cost=cost, step_rule=SteepestDescent(learning_rate=0.1)) main_loop = MainLoop( mlp, DataStream(mnist_train, iteration_scheme=SequentialScheme(mnist_train.num_examples, 50)), algorithm, extensions=[ Timing(), FinishAfter(after_n_epochs=num_epochs), DataStreamMonitoring([cost, error_rate], DataStream(mnist_test, iteration_scheme=SequentialScheme( mnist_test.num_examples, 500)), prefix="test"), TrainingDataMonitoring([ cost, error_rate, aggregation.mean(algorithm.total_gradient_norm) ], prefix="train", after_every_epoch=True), SerializeMainLoop(save_to), Plot('MNIST example', channels=[[ 'test_final_cost', 'test_misclassificationrate_apply_error_rate' ], ['train_total_gradient_norm']]), Printing() ]) main_loop.run()
def main(save_to, num_epochs, batch_size): mlp = MLP([Tanh(), Tanh(), Tanh(), Softmax()], [3072, 4096, 1024, 512, 10], weights_init=IsotropicGaussian(0.01), biases_init=Constant(0)) mlp.initialize() x = tt.tensor4('features', dtype='float32') y = tt.vector('label', dtype='int32') probs = mlp.apply(x.reshape((-1, 3072))) cost = CategoricalCrossEntropy().apply(y, probs) error_rate = MisclassificationRate().apply(y, probs) cg = ComputationGraph([cost]) ws = VariableFilter(roles=[WEIGHT])(cg.variables) cost = cost + .00005 * sum(([(w**2).sum() for w in ws])) cost.name = 'final_cost' train_dataset = Cifar10Dataset(data_dir='/home/belohlavek/data/cifar10', is_train=True) valid_dataset = Cifar10Dataset(data_dir='/home/belohlavek/data/cifar10', is_train=False) train_stream = train_dataset.get_stream(batch_size) valid_stream = valid_dataset.get_stream(batch_size) algorithm = GradientDescent(cost=cost, parameters=cg.parameters, step_rule=Adam(learning_rate=0.001)) extensions = [ Timing(), LogExtension('/home/belohlavek/ALI/mlp.log'), FinishAfter(after_n_epochs=num_epochs), DataStreamMonitoring([cost, error_rate], valid_stream, prefix="test"), TrainingDataMonitoring([ cost, error_rate, aggregation.mean(algorithm.total_gradient_norm) ], prefix="train", after_epoch=True), Checkpoint(save_to), Printing() ] main_loop = MainLoop(algorithm, train_stream, model=Model(cost), extensions=extensions) main_loop.run()
def align_with_nam(config, args): """Main method for using the Neural Alignment Model. Args: config (dict): NMT configuration args (object): ArgumentParser object containing the command line arguments Returns: list. List of alignments, where alignments are represented as numpy matrices containing confidences between 0 and 1. """ global alignments config['attention'] = 'parameterized' alignments = [] nmt_model = NMTModel(config) nmt_model.set_up() align_stream = _get_align_stream(**config) extensions = [ FinishAfter(after_epoch=True), TrainingDataMonitoring([nmt_model.cost], after_batch=True), PrintCurrentLogRow(after_batch=True), NextSentenceExtension(align_stream=align_stream, every_n_batches=args.iterations, before_training=True) ] train_params = [] for p in nmt_model.cg.parameters: if p.name in 'alignment_matrix': train_params.append(p) break algorithm = GradientDescent(cost=nmt_model.cost, parameters=train_params) main_loop = MainLoop(model=nmt_model.training_model, algorithm=algorithm, data_stream=align_stream, extensions=extensions) nmt_model_path = get_nmt_model_path(args.nmt_model_selector, config) loader = LoadNMTUtils(nmt_model_path, config['saveto'], nmt_model.training_model) loader.load_weights() try: main_loop.run() except StopIteration: logging.info("Alignment finished") return alignments
def prepare_opti(cost, test): model = Model(cost) algorithm = GradientDescent(cost=cost, parameters=model.parameters, step_rule=Adam(), on_unused_sources='ignore') extensions = [ FinishAfter(after_n_epochs=nb_epoch), FinishIfNoImprovementAfter(notification_name='test_vae_cost', epochs=patience), TrainingDataMonitoring([algorithm.cost], after_epoch=True), DataStreamMonitoring([algorithm.cost], test, prefix="test"), Printing(), ProgressBar(), #SaveModel(name='vae', after_n_epochs=save_every) ] return model, algorithm, extensions
def construct_common_graph(situation, args, outputs, dummy_states, Wy, by, y): ytilde = T.dot(outputs["h"][-1], Wy) + by yhat = ytilde[:, 0] ## mse cost mse = ((yhat - y)**2).mean() error_rate = mse.mean().copy(name="MSE") cost = error_rate.copy(name="cost") graph = ComputationGraph([cost, error_rate]) state_grads = dict((k, T.grad(cost, v)) for k, v in dummy_states.items()) extensions = [] # extensions = [ # DumpVariables("%s_hiddens" % situation, graph.inputs, # [v.copy(name="%s%s" % (k, suffix)) # for suffix, things in [("", outputs), ("_grad", state_grads)] # for k, v in things.items()], # batch=next(get_stream(which_set="train", # batch_size=args.batch_size, # num_examples=args.batch_size) # .get_epoch_iterator(as_dict=True)), # before_training=True, every_n_epochs=10)] h_grads = T.grad(mse, dummy_states['h']) c_grads = T.grad(mse, dummy_states['c']) grad_timesubsample = 1 grad_norms = [] for t in xrange(0, args.length, grad_timesubsample): grad_norms.append( h_grads[t, :, :].norm(2).copy(name="grad_norm:h_%04i" % t)) for t in xrange(0, args.length, grad_timesubsample): grad_norms.append( c_grads[t, :, :].norm(2).copy(name="grad_norm:c_%04i" % t)) extensions.append( TrainingDataMonitoring(grad_norms, prefix="iteration", after_batch=True)) return graph, extensions
def build_extensions_list(self): epochs = self.config['epochs'] save_freq = self.config['save_freq'] original_save = self.config['original_save'] if self.config.has_key('original_save') \ else False tracked_train = self.model.linksholder.filter_and_broadcast_request( 'train', 'TrackingLink') tracked_valid = self.model.linksholder.filter_and_broadcast_request( 'valid', 'TrackingLink') extensions = [ Timing(), Experiment(self.filemanager.exp_name, self.filemanager.local_path, self.filemanager.network_path, self.filemanager.full_dump), FinishAfter(after_n_epochs=epochs), TrainingDataMonitoring(tracked_train, prefix="train", after_epoch=True), DataStreamMonitoring(tracked_valid, self.streams['valid'], prefix="valid", after_epoch=True), #Ipdb(after_batch=True), SaveExperiment(self.model.parameters, original_save=original_save, every_n_epochs=save_freq), ] extensions += [ #LoadExperiment( # parameters, # full_load=False), ProgressBar(), Printing(), ] self.extensions = extensions
def train(self, data_file, output_data_file, n_epochs=0): training_data = dataset.T_H5PYDataset(data_file, which_sets=('train',)) test_data = dataset.T_H5PYDataset(data_file, which_sets=('test',)) session = Session(root_url='http://localhost:5006') if self.MainLoop is None: step_rules = [RMSProp(learning_rate=0.2, decay_rate=0.95), StepClipping(1)] algorithm = GradientDescent(cost=self.Cost, parameters=self.ComputationGraph.parameters, step_rule=CompositeRule(step_rules), on_unused_sources='ignore') train_stream = DataStream.default_stream( training_data, iteration_scheme=SequentialScheme( training_data.num_examples, batch_size=100)) test_stream = DataStream.default_stream( test_data, iteration_scheme=SequentialScheme( test_data.num_examples, batch_size=100)) self.MainLoop = MainLoop( model=Model(self.Cost), data_stream=train_stream, algorithm=algorithm, extensions=[ FinishAfter(after_n_epochs=n_epochs), Printing(), Checkpoint(output_data_file, every_n_epochs=50), TrainingDataMonitoring([self.Cost], after_batch=True, prefix='train'), DataStreamMonitoring([self.Cost], after_batch=True, data_stream=test_stream, prefix='test'), Plot(output_data_file, channels=[['train_cost', 'test_cost']]) ]) self.MainLoop.run()
cost.name = "sequence_log_likelihood" cg = ComputationGraph(cost) model = Model(cost) ################# # Algorithm ################# algorithm = GradientDescent(cost=cost, parameters=cg.parameters, step_rule=CompositeRule( [StepClipping(10.0), Adam(lr)])) train_monitor = TrainingDataMonitoring(variables=[cost], after_epoch=True, prefix="train") extensions = extensions = [ train_monitor, TrackTheBest('train_sequence_log_likelihood'), Printing(after_epoch=True) ] main_loop = MainLoop(model=model, data_stream=data_stream, algorithm=algorithm, extensions=extensions) main_loop.run()
model = Model(cost) ################# # Algorithm ################# n_batches = 139 * 16 algorithm = GradientDescent(cost=cost, parameters=cg.parameters, step_rule=CompositeRule( [StepClipping(10.0), Adam(lr)])) train_monitor = TrainingDataMonitoring(variables=[cost], every_n_batches=n_batches, prefix="train") valid_monitor = DataStreamMonitoring( [cost], valid_stream, after_epoch=True, #before_first_epoch = False, prefix="valid") extensions = extensions = [ Timing(every_n_batches=n_batches), train_monitor, valid_monitor, TrackTheBest('valid_nll', after_epoch=True), Plot(save_dir + experiment_name + ".png", [['train_nll', 'valid_nll']], every_n_batches=4 * n_batches, email=True),
# Build model m = config.Model(config, ds.vocab_size) # Build the Blocks stuff for training model = Model(m.sgd_cost) # test_model = Model(m.generations) algorithm = GradientDescent(cost=m.sgd_cost, step_rule=config.step_rule, parameters=model.parameters, on_unused_sources='ignore') extensions = [ TrainingDataMonitoring([v for l in m.monitor_vars for v in l], prefix='train', after_epoch=True) ] if config.save_freq is not None and dump_path is not None: extensions += [ SaveLoadParams(path=dump_path, model=model, before_training=False, after_training=True, after_epoch=True) ] if valid_stream is not None and config.valid_freq != -1: extensions += [ DataStreamMonitoring([v for l in m.monitor_vars_valid for v in l], valid_stream, prefix='valid'),
def main(name, dataset, epochs, batch_size, learning_rate, attention, n_iter, enc_dim, dec_dim, z_dim, oldmodel): image_size, data_train, data_valid, data_test = datasets.get_data(dataset) train_stream = Flatten( DataStream(data_train, iteration_scheme=SequentialScheme(data_train.num_examples, batch_size))) valid_stream = Flatten( DataStream(data_valid, iteration_scheme=SequentialScheme(data_valid.num_examples, batch_size))) test_stream = Flatten( DataStream(data_test, iteration_scheme=SequentialScheme(data_test.num_examples, batch_size))) if name is None: name = dataset img_height, img_width = image_size x_dim = img_height * img_width rnninits = { #'weights_init': Orthogonal(), 'weights_init': IsotropicGaussian(0.01), 'biases_init': Constant(0.), } inits = { #'weights_init': Orthogonal(), 'weights_init': IsotropicGaussian(0.01), 'biases_init': Constant(0.), } if attention != "": read_N, write_N = attention.split(',') read_N = int(read_N) write_N = int(write_N) read_dim = 2 * read_N**2 reader = AttentionReader(x_dim=x_dim, dec_dim=dec_dim, width=img_width, height=img_height, N=read_N, **inits) writer = AttentionWriter(input_dim=dec_dim, output_dim=x_dim, width=img_width, height=img_height, N=write_N, **inits) attention_tag = "r%d-w%d" % (read_N, write_N) else: read_dim = 2 * x_dim reader = Reader(x_dim=x_dim, dec_dim=dec_dim, **inits) writer = Writer(input_dim=dec_dim, output_dim=x_dim, **inits) attention_tag = "full" #---------------------------------------------------------------------- # Learning rate def lr_tag(value): """ Convert a float into a short tag-usable string representation. E.g.: 0.1 -> 11 0.01 -> 12 0.001 -> 13 0.005 -> 53 """ exp = np.floor(np.log10(value)) leading = ("%e" % value)[0] return "%s%d" % (leading, -exp) lr_str = lr_tag(learning_rate) subdir = time.strftime("%Y%m%d-%H%M%S") + "-" + name longname = "%s-%s-t%d-enc%d-dec%d-z%d-lr%s" % ( dataset, attention_tag, n_iter, enc_dim, dec_dim, z_dim, lr_str) pickle_file = subdir + "/" + longname + ".pkl" print("\nRunning experiment %s" % longname) print(" dataset: %s" % dataset) print(" subdirectory: %s" % subdir) print(" learning rate: %g" % learning_rate) print(" attention: %s" % attention) print(" n_iterations: %d" % n_iter) print(" encoder dimension: %d" % enc_dim) print(" z dimension: %d" % z_dim) print(" decoder dimension: %d" % dec_dim) print(" batch size: %d" % batch_size) print(" epochs: %d" % epochs) print() #---------------------------------------------------------------------- encoder_rnn = LSTM(dim=enc_dim, name="RNN_enc", **rnninits) decoder_rnn = LSTM(dim=dec_dim, name="RNN_dec", **rnninits) encoder_mlp = MLP([Identity()], [(read_dim + dec_dim), 4 * enc_dim], name="MLP_enc", **inits) decoder_mlp = MLP([Identity()], [z_dim, 4 * dec_dim], name="MLP_dec", **inits) q_sampler = Qsampler(input_dim=enc_dim, output_dim=z_dim, **inits) draw = DrawModel(n_iter, reader=reader, encoder_mlp=encoder_mlp, encoder_rnn=encoder_rnn, sampler=q_sampler, decoder_mlp=decoder_mlp, decoder_rnn=decoder_rnn, writer=writer) draw.initialize() #------------------------------------------------------------------------ x = tensor.matrix('features') #x_recons = 1. + x x_recons, kl_terms = draw.reconstruct(x) #x_recons, _, _, _, _ = draw.silly(x, n_steps=10, batch_size=100) #x_recons = x_recons[-1,:,:] #samples = draw.sample(100) #x_recons = samples[-1, :, :] #x_recons = samples[-1, :, :] recons_term = BinaryCrossEntropy().apply(x, x_recons) recons_term.name = "recons_term" cost = recons_term + kl_terms.sum(axis=0).mean() cost.name = "nll_bound" #------------------------------------------------------------ cg = ComputationGraph([cost]) params = VariableFilter(roles=[PARAMETER])(cg.variables) algorithm = GradientDescent( cost=cost, params=params, step_rule=CompositeRule([ StepClipping(10.), Adam(learning_rate), ]) #step_rule=RMSProp(learning_rate), #step_rule=Momentum(learning_rate=learning_rate, momentum=0.95) ) #algorithm.add_updates(scan_updates) #------------------------------------------------------------------------ # Setup monitors monitors = [cost] for t in range(n_iter): kl_term_t = kl_terms[t, :].mean() kl_term_t.name = "kl_term_%d" % t #x_recons_t = T.nnet.sigmoid(c[t,:,:]) #recons_term_t = BinaryCrossEntropy().apply(x, x_recons_t) #recons_term_t = recons_term_t.mean() #recons_term_t.name = "recons_term_%d" % t monitors += [kl_term_t] train_monitors = monitors[:] train_monitors += [aggregation.mean(algorithm.total_gradient_norm)] train_monitors += [aggregation.mean(algorithm.total_step_norm)] # Live plotting... plot_channels = [ ["train_nll_bound", "test_nll_bound"], ["train_kl_term_%d" % t for t in range(n_iter)], #["train_recons_term_%d" % t for t in range(n_iter)], ["train_total_gradient_norm", "train_total_step_norm"] ] #------------------------------------------------------------ if not os.path.exists(subdir): os.makedirs(subdir) main_loop = MainLoop( model=Model(cost), data_stream=train_stream, algorithm=algorithm, extensions=[ Timing(), FinishAfter(after_n_epochs=epochs), TrainingDataMonitoring(train_monitors, prefix="train", after_epoch=True), # DataStreamMonitoring( # monitors, # valid_stream, ## updates=scan_updates, # prefix="valid"), DataStreamMonitoring( monitors, test_stream, # updates=scan_updates, prefix="test"), Checkpoint(name, before_training=False, after_epoch=True, save_separately=['log', 'model']), #Checkpoint(image_size=image_size, save_subdir=subdir, path=pickle_file, before_training=False, after_epoch=True, save_separately=['log', 'model']), Plot(name, channels=plot_channels), ProgressBar(), Printing() ]) if oldmodel is not None: print("Initializing parameters with old model %s" % oldmodel) with open(oldmodel, "rb") as f: oldmodel = pickle.load(f) main_loop.model.set_param_values(oldmodel.get_param_values()) del oldmodel main_loop.run()
def main(save_to, num_epochs, regularization=0.0003, subset=None, num_batches=None, histogram=None, resume=False): batch_size = 500 output_size = 10 convnet = create_lenet_5() layers = convnet.layers x = tensor.tensor4('features') y = tensor.lmatrix('targets') # Normalize input and apply the convnet probs = convnet.apply(x) cost = (CategoricalCrossEntropy().apply(y.flatten(), probs) .copy(name='cost')) components = (ComponentwiseCrossEntropy().apply(y.flatten(), probs) .copy(name='components')) error_rate = (MisclassificationRate().apply(y.flatten(), probs) .copy(name='error_rate')) confusion = (ConfusionMatrix().apply(y.flatten(), probs) .copy(name='confusion')) confusion.tag.aggregation_scheme = Sum(confusion) cg = ComputationGraph([cost, error_rate, components]) # Apply regularization to the cost weights = VariableFilter(roles=[WEIGHT])(cg.variables) l2_norm = sum([(W ** 2).sum() for W in weights]) l2_norm.name = 'l2_norm' cost = cost + regularization * l2_norm cost.name = 'cost_with_regularization' if subset: start = 30000 - subset // 2 mnist_train = MNIST(("train",), subset=slice(start, start+subset)) else: mnist_train = MNIST(("train",)) mnist_train_stream = DataStream.default_stream( mnist_train, iteration_scheme=ShuffledScheme( mnist_train.num_examples, batch_size)) mnist_test = MNIST(("test",)) mnist_test_stream = DataStream.default_stream( mnist_test, iteration_scheme=ShuffledScheme( mnist_test.num_examples, batch_size)) # Train with simple SGD algorithm = GradientDescent( cost=cost, parameters=cg.parameters, step_rule=AdaDelta(decay_rate=0.99)) # `Timing` extension reports time for reading data, aggregating a batch # and monitoring; # `ProgressBar` displays a nice progress bar during training. extensions = [Timing(), FinishAfter(after_n_epochs=num_epochs, after_n_batches=num_batches), DataStreamMonitoring( [cost, error_rate, confusion], mnist_test_stream, prefix="test"), TrainingDataMonitoring( [cost, error_rate, l2_norm, aggregation.mean(algorithm.total_gradient_norm)], prefix="train", after_epoch=True), Checkpoint(save_to), ProgressBar(), Printing()] if histogram: attribution = AttributionExtension( components=components, parameters=cg.parameters, components_size=output_size, after_batch=True) extensions.insert(0, attribution) if resume: extensions.append(Load(save_to, True, True)) model = Model(cost) main_loop = MainLoop( algorithm, mnist_train_stream, model=model, extensions=extensions) main_loop.run() if histogram: save_attributions(attribution, filename=histogram) with open('execution-log.json', 'w') as outfile: json.dump(main_loop.log, outfile, cls=NumpyEncoder)
error_rate = error.copy(name='error_rate') error_rate2 = error.copy(name='error_rate2') cg = ComputationGraph([cost, error_rate]) ########### ALGORITHM of training############# algorithm = GradientDescent(cost=cost, parameters=cg.parameters, step_rule=Adam(learning_rate=0.0005)) extensions = [ Timing(), FinishAfter(after_n_epochs=num_epochs), DataStreamMonitoring([cost, error_rate, error_rate2], stream_valid, prefix="valid"), TrainingDataMonitoring( [cost, error_rate, aggregation.mean(algorithm.total_gradient_norm)], prefix="train", after_epoch=True), Checkpoint("catsVsDogs256.pkl"), ProgressBar(), Printing() ] #Adding a live plot with the bokeh server extensions.append( Plot('CatsVsDogs_256', channels=[['train_error_rate', 'valid_error_rate'], ['valid_cost', 'valid_error_rate2'], ['train_total_gradient_norm']], after_epoch=True))
def main(config, tr_stream, dev_stream, source_vocab, target_vocab, use_bokeh=False): # Create Theano variables logger.info('Creating theano variables') source_sentence = tensor.lmatrix('source') source_sentence_mask = tensor.matrix('source_mask') target_sentence = tensor.lmatrix('target') target_sentence_mask = tensor.matrix('target_mask') initial_context = tensor.matrix('initial_context') # Construct model logger.info('Building RNN encoder-decoder') encoder = BidirectionalEncoder(config['src_vocab_size'], config['enc_embed'], config['enc_nhids']) # let user specify the target transition class name in config, # eval it and pass to decoder target_transition_name = config.get( 'target_transition', 'GRUInitialStateWithInitialStateSumContext') target_transition = eval(target_transition_name) logger.info('Using target transition: {}'.format(target_transition_name)) decoder = InitialContextDecoder(config['trg_vocab_size'], config['dec_embed'], config['dec_nhids'], config['enc_nhids'] * 2, config['context_dim'], target_transition) cost = decoder.cost(encoder.apply(source_sentence, source_sentence_mask), source_sentence_mask, target_sentence, target_sentence_mask, initial_context) cost.name = 'decoder_cost' # Initialize model logger.info('Initializing model') encoder.weights_init = decoder.weights_init = IsotropicGaussian( config['weight_scale']) encoder.biases_init = decoder.biases_init = Constant(0) encoder.push_initialization_config() decoder.push_initialization_config() encoder.bidir.prototype.weights_init = Orthogonal() decoder.transition.weights_init = Orthogonal() encoder.initialize() decoder.initialize() logger.info('Creating computational graph') cg = ComputationGraph(cost) # GRAPH TRANSFORMATIONS FOR BETTER TRAINING # TODO: validate performance with/without regularization if config.get('l2_regularization', False) is True: l2_reg_alpha = config['l2_regularization_alpha'] logger.info( 'Applying l2 regularization with alpha={}'.format(l2_reg_alpha)) model_weights = VariableFilter(roles=[WEIGHT])(cg.variables) for W in model_weights: cost = cost + (l2_reg_alpha * (W**2).sum()) # why do we need to name the cost variable? Where did the original name come from? cost.name = 'decoder_cost_cost' cg = ComputationGraph(cost) # apply dropout for regularization if config['dropout'] < 1.0: # dropout is applied to the output of maxout in ghog # this is the probability of dropping out, so you probably want to make it <=0.5 logger.info('Applying dropout') dropout_inputs = [ x for x in cg.intermediary_variables if x.name == 'maxout_apply_output' ] cg = apply_dropout(cg, dropout_inputs, config['dropout']) # Print shapes shapes = [param.get_value().shape for param in cg.parameters] logger.info("Parameter shapes: ") for shape, count in Counter(shapes).most_common(): logger.info(' {:15}: {}'.format(shape, count)) logger.info("Total number of parameters: {}".format(len(shapes))) # Print parameter names enc_dec_param_dict = merge( Selector(encoder).get_parameters(), Selector(decoder).get_parameters()) logger.info("Parameter names: ") for name, value in enc_dec_param_dict.items(): logger.info(' {:15}: {}'.format(value.get_value().shape, name)) logger.info("Total number of parameters: {}".format( len(enc_dec_param_dict))) # Set up training model logger.info("Building model") training_model = Model(cost) # create the training directory, and copy this config there if directory doesn't exist if not os.path.isdir(config['saveto']): os.makedirs(config['saveto']) shutil.copy(config['config_file'], config['saveto']) # Set extensions # TODO: add checking for existing model and loading logger.info("Initializing extensions") extensions = [ FinishAfter(after_n_batches=config['finish_after']), TrainingDataMonitoring([cost], after_batch=True), Printing(after_batch=True), CheckpointNMT(config['saveto'], every_n_batches=config['save_freq']) ] # Create the theano variables that we need for the sampling graph sampling_input = tensor.lmatrix('input') sampling_context = tensor.matrix('context_input') # Set up beam search and sampling computation graphs if necessary if config['hook_samples'] >= 1 or config.get('bleu_script', None) is not None: logger.info("Building sampling model") sampling_representation = encoder.apply( sampling_input, tensor.ones(sampling_input.shape)) generated = decoder.generate(sampling_input, sampling_representation, sampling_context) search_model = Model(generated) _, samples = VariableFilter( bricks=[decoder.sequence_generator], name="outputs")( ComputationGraph(generated[1])) # generated[1] is next_outputs # Add sampling if config['hook_samples'] >= 1: logger.info("Building sampler") extensions.append( Sampler( model=search_model, data_stream=tr_stream, hook_samples=config['hook_samples'], every_n_batches=config['sampling_freq'], src_vocab=source_vocab, trg_vocab=target_vocab, src_vocab_size=config['src_vocab_size'], )) # Add early stopping based on bleu if config.get('bleu_script', None) is not None: logger.info("Building bleu validator") extensions.append( BleuValidator(sampling_input, sampling_context, samples=samples, config=config, model=search_model, data_stream=dev_stream, src_vocab=source_vocab, trg_vocab=target_vocab, normalize=config['normalized_bleu'], every_n_batches=config['bleu_val_freq'])) # Add early stopping based on Meteor if config.get('meteor_directory', None) is not None: logger.info("Building meteor validator") extensions.append( MeteorValidator(sampling_input, sampling_context, samples=samples, config=config, model=search_model, data_stream=dev_stream, src_vocab=source_vocab, trg_vocab=target_vocab, normalize=config['normalized_bleu'], every_n_batches=config['bleu_val_freq'])) # Reload model if necessary if config['reload']: extensions.append(LoadNMT(config['saveto'])) # Plot cost in bokeh if necessary if use_bokeh and BOKEH_AVAILABLE: extensions.append( Plot(config['model_save_directory'], channels=[[ 'decoder_cost', 'validation_set_bleu_score', 'validation_set_meteor_score' ]], every_n_batches=10)) # Set up training algorithm logger.info("Initializing training algorithm") # if there is dropout or random noise, we need to use the output of the modified graph if config['dropout'] < 1.0 or config['weight_noise_ff'] > 0.0: algorithm = GradientDescent(cost=cg.outputs[0], parameters=cg.parameters, step_rule=CompositeRule([ StepClipping(config['step_clipping']), eval(config['step_rule'])() ])) else: algorithm = GradientDescent(cost=cost, parameters=cg.parameters, step_rule=CompositeRule([ StepClipping(config['step_clipping']), eval(config['step_rule'])() ])) # enrich the logged information extensions.append(Timing(every_n_batches=100)) # Initialize main loop logger.info("Initializing main loop") main_loop = MainLoop(model=training_model, algorithm=algorithm, data_stream=tr_stream, extensions=extensions) # Train! main_loop.run()
################# # Algorithm ################# n_batches = 200 algorithm = GradientDescent(cost=cost, parameters=cg.parameters, step_rule=CompositeRule( [StepClipping(10.0), Adam(lr)])) algorithm.add_updates(extra_updates) train_monitor = TrainingDataMonitoring(variables=monitoring_variables + data_monitoring, every_n_batches=n_batches, prefix="train") valid_monitor = DataStreamMonitoring(monitoring_variables, valid_stream, every_n_batches=n_batches, prefix="valid") extensions = [ ProgressBar(), Timing(every_n_batches=n_batches), train_monitor, valid_monitor, TrackTheBest('valid_nll', every_n_batches=n_batches), Plot(save_dir + "progress/" + experiment_name + ".png", [['train_nll', 'valid_nll'], ['valid_learning_rate']], every_n_batches=n_batches, email=False),
def main(save_to, model, train, test, num_epochs, input_size = (150,150), learning_rate=0.01, batch_size=50, num_batches=None, flatten_stream=False): """ save_to : where to save trained model model : model given in input must be already initialised (works with convnet and mlp) input_size : the shape of the reshaped image in input (before flattening is applied if flatten_stream is True) """ if flatten_stream : x = tensor.matrix('image_features') else : x = tensor.tensor4('image_features') y = tensor.lmatrix('targets') #Data augmentation #insert data augmentation here #Generating stream train_stream = DataStream.default_stream( train, iteration_scheme=ShuffledScheme(train.num_examples, batch_size) ) test_stream = DataStream.default_stream( test, iteration_scheme=ShuffledScheme(test.num_examples, batch_size) ) #Reshaping procedure #Add a crop option in scikitresize so that the image is not deformed #Resize to desired square shape train_stream = ScikitResize(train_stream, input_size, which_sources=('image_features',)) test_stream = ScikitResize(test_stream, input_size, which_sources=('image_features',)) #Flattening the stream if flatten_stream is True: train_stream = Flatten(train_stream, which_sources=('image_features',)) test_stream = Flatten(test_stream, which_sources=('image_features',)) # Apply input to model probs = model.apply(x) #Defining cost and various indices to watch #print(probs) #cost = SquaredError().apply(y.flatten(),probs) cost = CategoricalCrossEntropy().apply(y.flatten(), probs).copy(name='cost') error_rate = MisclassificationRate().apply(y.flatten(), probs).copy( name='error_rate') #Building Computation Graph cg = ComputationGraph([cost, error_rate]) # Train with simple SGD algorithm = GradientDescent( cost=cost, parameters=cg.parameters, step_rule=Scale(learning_rate=learning_rate)) #Defining extensions extensions = [Timing(), FinishAfter(after_n_epochs=num_epochs, after_n_batches=num_batches), TrainingDataMonitoring([cost, error_rate,aggregation.mean(algorithm.total_gradient_norm)], prefix="train", every_n_batches=5), DataStreamMonitoring([cost, error_rate],test_stream,prefix="test", every_n_batches=25), Checkpoint(save_to), ProgressBar(), Printing(every_n_batches=5)] # `Timing` extension reports time for reading data, aggregating a batch # and monitoring; # `ProgressBar` displays a nice progress bar during training. model = Model(cost) main_loop = MainLoop( algorithm, train_stream, model=model, extensions=extensions) main_loop.run()