def train(cost, error_rate, batch_size=100, num_epochs=150): # Setting Loggesetr timestr = time.strftime("%Y_%m_%d_at_%H_%M") save_path = 'results/memory_' + timestr log_path = os.path.join(save_path, 'log.txt') os.makedirs(save_path) fh = logging.FileHandler(filename=log_path) fh.setLevel(logging.DEBUG) logger.addHandler(fh) # Training blocks_model = Model(cost) all_params = blocks_model.parameters print "Number of found parameters:" + str(len(all_params)) print all_params training_algorithm = GradientDescent(cost=cost, parameters=all_params, step_rule=Adam(learning_rate=0.001)) # training_algorithm = GradientDescent( # cost=cost, params=all_params, # step_rule=Scale(learning_rate=model.default_lr)) monitored_variables = [cost, error_rate] # the rest is for validation # train_data_stream, valid_data_stream = get_mnist_streams( # 50000, batch_size) train_data_stream, valid_data_stream = get_mnist_video_streams(batch_size) train_monitoring = TrainingDataMonitoring(variables=monitored_variables, prefix="train", after_epoch=True) valid_monitoring = DataStreamMonitoring(variables=monitored_variables, data_stream=valid_data_stream, prefix="valid", after_epoch=True) main_loop = MainLoop( algorithm=training_algorithm, data_stream=train_data_stream, model=blocks_model, extensions=[ train_monitoring, valid_monitoring, FinishAfter(after_n_epochs=num_epochs), SaveParams('valid_misclassificationrate_apply_error_rate', blocks_model, save_path), SaveLog(save_path, after_epoch=True), ProgressBar(), Printing() ]) main_loop.run()
def train(cli_params): cli_params['save_dir'] = prepare_dir(cli_params['save_to']) logfile = os.path.join(cli_params['save_dir'], 'log.txt') # Log also DEBUG to a file fh = logging.FileHandler(filename=logfile) fh.setLevel(logging.DEBUG) logger.addHandler(fh) logger.info('Logging into %s' % logfile) p, loaded = load_and_log_params(cli_params) in_dim, data, whiten, cnorm = setup_data(p, test_set=False) if not loaded: # Set the zero layer to match input dimensions p.encoder_layers = (in_dim, ) + p.encoder_layers ladder = setup_model(p) # Training all_params = ComputationGraph([ladder.costs.total]).parameters logger.info('Found the following parameters: %s' % str(all_params)) # Fetch all batch normalization updates. They are in the clean path. bn_updates = ComputationGraph([ladder.costs.class_clean]).updates assert 'counter' in [u.name for u in bn_updates.keys()], \ 'No batch norm params in graph - the graph has been cut?' training_algorithm = GradientDescent( cost=ladder.costs.total, parameters=all_params, step_rule=Adam(learning_rate=ladder.lr)) # In addition to actual training, also do BN variable approximations training_algorithm.add_updates(bn_updates) short_prints = { "train": { 'T_C_class': ladder.costs.class_corr, 'T_C_de': ladder.costs.denois.values(), }, "valid_approx": OrderedDict([ ('V_C_class', ladder.costs.class_clean), ('V_E', ladder.error.clean), ('V_C_de', ladder.costs.denois.values()), ]), "valid_final": OrderedDict([ ('VF_C_class', ladder.costs.class_clean), ('VF_E', ladder.error.clean), ('VF_C_de', ladder.costs.denois.values()), ]), } main_loop = MainLoop( training_algorithm, # Datastream used for training make_datastream(data.train, data.train_ind, p.batch_size, n_labeled=p.labeled_samples, n_unlabeled=p.unlabeled_samples, whiten=whiten, cnorm=cnorm), model=Model(ladder.costs.total), extensions=[ FinishAfter(after_n_epochs=p.num_epochs), # This will estimate the validation error using # running average estimates of the batch normalization # parameters, mean and variance ApproxTestMonitoring( [ladder.costs.class_clean, ladder.error.clean] + ladder.costs.denois.values(), make_datastream(data.valid, data.valid_ind, p.valid_batch_size, whiten=whiten, cnorm=cnorm, scheme=ShuffledScheme), prefix="valid_approx"), # This Monitor is slower, but more accurate since it will first # estimate batch normalization parameters from training data and # then do another pass to calculate the validation error. FinalTestMonitoring( [ladder.costs.class_clean, ladder.error.clean] + ladder.costs.denois.values(), make_datastream(data.train, data.train_ind, p.batch_size, n_labeled=p.labeled_samples, whiten=whiten, cnorm=cnorm, scheme=ShuffledScheme), make_datastream(data.valid, data.valid_ind, p.valid_batch_size, n_labeled=len(data.valid_ind), whiten=whiten, cnorm=cnorm, scheme=ShuffledScheme), prefix="valid_final", after_n_epochs=p.num_epochs), TrainingDataMonitoring([ ladder.costs.total, ladder.costs.class_corr, training_algorithm.total_gradient_norm ] + ladder.costs.denois.values(), prefix="train", after_epoch=True), SaveParams(None, all_params, p.save_dir, after_epoch=True), SaveExpParams(p, p.save_dir, before_training=True), SaveLog(p.save_dir, after_training=True), ShortPrinting(short_prints), LRDecay(ladder.lr, p.num_epochs * p.lrate_decay, p.num_epochs, after_epoch=True), ]) main_loop.run() # Get results df = DataFrame.from_dict(main_loop.log, orient='index') col = 'valid_final_error_rate_clean' logger.info('%s %g' % (col, df[col].iloc[-1])) if main_loop.log.status['epoch_interrupt_received']: return None return df
monitor_train_cost = TrainingDataMonitoring( monitored_variables, prefix="train", after_epoch=True) monitor_train_cost_evaluation = DataStreamMonitoring( monitored_variables, data_stream=train_stream_evaluation, prefix="train_evaluation", after_epoch=True) monitor_valid_cost = DataStreamMonitoring( monitored_variables, data_stream=valid_stream, prefix="valid", after_epoch=True) main_loop = MainLoop(data_stream=train_stream, algorithm=algorithm, extensions=[monitor_train_cost, monitor_train_cost_evaluation, monitor_valid_cost, Printing(), SaveLog(after_epoch=True), SaveParams('valid_error_rate', model, save_path, after_epoch=True)], model=model) print 'Starting training ...' main_loop.run()
def train(model, configs): get_streams = configs['get_streams'] save_path = configs['save_path'] num_epochs = configs['num_epochs'] batch_size = configs['batch_size'] lrs = configs['lrs'] until_which_epoch = configs['until_which_epoch'] grad_clipping = configs['grad_clipping'] monitorings = model.monitorings # Training if configs['weight_noise'] > 0: cg = ComputationGraph(model.cost) weights = VariableFilter(roles=[WEIGHT])(cg.variables) cg = apply_noise(cg, weights, configs['weight_noise']) model.cost = cg.outputs[0].copy(name='CE') if configs['l2_reg'] > 0: cg = ComputationGraph(model.cost) weights = VariableFilter(roles=[WEIGHT])(cg.variables) new_cost = model.cost + configs['l2_reg'] * sum([ (weight ** 2).sum() for weight in weights]) model.cost = new_cost.copy(name='CE') blocks_model = Model(model.cost) all_params = blocks_model.parameters print "Number of found parameters:" + str(len(all_params)) print all_params default_lr = np.float32(configs['lrs'][0]) lr_var = theano.shared(default_lr, name="learning_rate") clipping = StepClipping(threshold=np.cast[floatX](grad_clipping)) # sgd_momentum = Momentum( # learning_rate=0.0001, # momentum=0.95) # step_rule = CompositeRule([clipping, sgd_momentum]) adam = Adam(learning_rate=lr_var) step_rule = CompositeRule([clipping, adam]) training_algorithm = GradientDescent( cost=model.cost, parameters=all_params, step_rule=step_rule) monitored_variables = [ lr_var, aggregation.mean(training_algorithm.total_gradient_norm)] + monitorings for param in all_params: name = param.tag.annotations[0].name + "." + param.name to_monitor = training_algorithm.gradients[param].norm(2) to_monitor.name = name + "_grad_norm" monitored_variables.append(to_monitor) to_monitor = param.norm(2) to_monitor.name = name + "_norm" monitored_variables.append(to_monitor) train_data_stream, valid_data_stream = get_streams(batch_size) train_monitoring = TrainingDataMonitoring( variables=monitored_variables, prefix="train", after_epoch=True) valid_monitoring = DataStreamMonitoring( variables=monitored_variables, data_stream=valid_data_stream, prefix="valid", after_epoch=True) main_loop = MainLoop( algorithm=training_algorithm, data_stream=train_data_stream, model=blocks_model, extensions=[ train_monitoring, valid_monitoring, FinishAfter(after_n_epochs=num_epochs), SaveParams('valid_CE', blocks_model, save_path, after_epoch=True), SaveLog(after_epoch=True), ProgressBar(), LRDecay(lr_var, lrs, until_which_epoch, after_epoch=True), Printing(after_epoch=True)]) main_loop.run()
def train(ladder, batch_size=100, num_train_examples=60000, num_epochs=150, lrate_decay=0.67): # Setting Logger timestr = time.strftime("%Y_%m_%d_at_%H_%M") save_path = 'results/mnist_100_standard_' + timestr log_path = os.path.join(save_path, 'log.txt') os.makedirs(save_path) fh = logging.FileHandler(filename=log_path) fh.setLevel(logging.DEBUG) logger.addHandler(fh) # Training model = Model(ladder.costs.total) all_params = model.parameters print len(all_params) print all_params training_algorithm = GradientDescent( cost=ladder.costs.total, parameters=all_params, step_rule=Adam(learning_rate=ladder.lr)) # Fetch all batch normalization updates. They are in the clean path. # In addition to actual training, also do BN variable approximations bn_updates = ComputationGraph([ladder.costs.class_clean]).updates training_algorithm.add_updates(bn_updates) monitored_variables = [ ladder.costs.class_corr, ladder.costs.class_clean, ladder.error, training_algorithm.total_gradient_norm, ladder.costs.total ] + ladder.costs.denois.values() train_data_stream, test_data_stream = get_mixed_streams(batch_size) train_monitoring = TrainingDataMonitoring(variables=monitored_variables, prefix="train", after_epoch=True) valid_monitoring = DataStreamMonitoring(variables=monitored_variables, data_stream=test_data_stream, prefix="test", after_epoch=True) main_loop = MainLoop(algorithm=training_algorithm, data_stream=train_data_stream, model=model, extensions=[ train_monitoring, valid_monitoring, FinishAfter(after_n_epochs=num_epochs), SaveParams('test_CE_corr', model, save_path), SaveLog(save_path, after_epoch=True), LRDecay(lr=ladder.lr, decay_first=num_epochs * lrate_decay, decay_last=num_epochs, after_epoch=True), Printing() ]) main_loop.run()
def train(model, batch_size=100, num_epochs=1000): cost = model.cost monitorings = model.monitorings # Setting Loggesetr timestr = time.strftime("%Y_%m_%d_at_%H_%M") save_path = 'results/CMV_V2_' + timestr log_path = os.path.join(save_path, 'log.txt') os.makedirs(save_path) fh = logging.FileHandler(filename=log_path) fh.setLevel(logging.DEBUG) logger.addHandler(fh) # Training blocks_model = Model(cost) all_params = blocks_model.parameters print "Number of found parameters:" + str(len(all_params)) print all_params clipping = StepClipping(threshold=np.cast[floatX](10)) adam = Adam(learning_rate=model.lr_var) step_rule = CompositeRule([adam, clipping]) training_algorithm = GradientDescent(cost=cost, parameters=all_params, step_rule=step_rule) monitored_variables = [ model.lr_var, cost, aggregation.mean(training_algorithm.total_gradient_norm) ] + monitorings blocks_model = Model(cost) params_dicts = blocks_model.get_parameter_dict() for name, param in params_dicts.iteritems(): to_monitor = training_algorithm.gradients[param].norm(2) to_monitor.name = name + "_grad_norm" monitored_variables.append(to_monitor) to_monitor = param.norm(2) to_monitor.name = name + "_norm" monitored_variables.append(to_monitor) train_data_stream, valid_data_stream = get_cmv_v2_streams(batch_size) train_monitoring = TrainingDataMonitoring(variables=monitored_variables, prefix="train", after_epoch=True) valid_monitoring = DataStreamMonitoring(variables=monitored_variables, data_stream=valid_data_stream, prefix="valid", after_epoch=True) main_loop = MainLoop( algorithm=training_algorithm, data_stream=train_data_stream, model=blocks_model, extensions=[ train_monitoring, valid_monitoring, FinishAfter(after_n_epochs=num_epochs), SaveParams('valid_misclassificationrate_apply_error_rate', blocks_model, save_path), SaveLog(save_path, after_epoch=True), ProgressBar(), LRDecay(model.lr_var, [0.001, 0.0001, 0.00001, 0.000001], [8, 15, 30, 1000], after_epoch=True), Printing() ]) main_loop.run()
log_path = save_path + '/log.txt' fh = logging.FileHandler(filename=log_path) fh.setLevel(logging.DEBUG) logger.addHandler(fh) algorithm = GradientDescent(cost=cost, parameters=ComputationGraph(cost).parameters, step_rule=CompositeRule([StepClipping(10.0), Adam(1e-3)])) # 3e-4 monitor_cost = TrainingDataMonitoring([cost], prefix='train', after_epoch=False, before_training=True, every_n_batches=1000) data = get_episodic_copy_data(100, int(1e6), 10, batch_size) dataset = IterableDataset({'x': data[0].astype('int8'), 'y': data[1].astype('int8')}) stream = DataStream(dataset) model = Model(cost) main_loop = MainLoop(data_stream=stream, algorithm=algorithm, extensions=[monitor_cost, Printing(after_epoch=False, every_n_batches=1000), SaveLog(every_n_batches=1000)], model=model) print 'Starting training ...' main_loop.run()