def track_best(channel, save_path): tracker = TrackTheBest(channel, choose_best=min) checkpoint = saveload.Checkpoint(save_path, after_training=False, use_cpickle=True) checkpoint.add_condition(["after_epoch"], predicate=predicates.OnLogRecord( '{0}_best_so_far'.format(channel))) return [tracker, checkpoint]
def track_best(self, channel, save_path=None, choose_best=min): tracker = TrackTheBest(channel, choose_best=choose_best) self.extensions.append(tracker) if save_path: checkpoint = saveload.Checkpoint(save_path, after_training=False, use_cpickle=True) checkpoint.add_condition(["after_epoch"], predicate=predicates.OnLogRecord( '{0}_best_so_far'.format(channel))) self.extensions.append(checkpoint)
step_rules = [RMSProp(learning_rate=learning_rate, decay_rate=decay_rate), StepClipping(step_clipping)] algorithm = GradientDescent(cost=cost, parameters=cg.parameters, step_rule=CompositeRule(step_rules)) # Extensions gradient_norm = aggregation.mean(algorithm.total_gradient_norm) step_norm = aggregation.mean(algorithm.total_step_norm) monitored_vars = [cost, gradient_norm, step_norm] dev_monitor = DataStreamMonitoring(variables=[cost], after_epoch=True, before_first_epoch=True, data_stream=dev_stream, prefix="dev") train_monitor = TrainingDataMonitoring(variables=monitored_vars, after_batch=True, before_first_epoch=True, prefix='tra') extensions = [dev_monitor, train_monitor, Timing(), Printing(after_batch=True), FinishAfter(after_n_epochs=nepochs), saveload.Load(load_path), saveload.Checkpoint(last_path), ] + track_best('dev_cost', save_path) if learning_rate_decay not in (0, 1): extensions.append(SharedVariableModifier(step_rules[0].learning_rate, lambda n, lr: numpy.cast[theano.config.floatX](learning_rate_decay * lr), after_epoch=True, after_batch=False)) print('number of parameters in the model: ' + str(tensor.sum([p.size for p in cg.parameters]).eval())) # Finally build the main loop and train the model main_loop = MainLoop(data_stream=train_stream, algorithm=algorithm, model=Model(cost), extensions=extensions) main_loop.run()
prefix='train') set_train_flag = SetTrainFlag(after_epoch=True, before_epoch=True, flag=train_flag) # plot = Plot('Plotting example', channels=[['cost']], after_batch=True, open_browser=True) extensions = [ set_train_flag, test_monitor, train_monitor, Timing(), Printing(after_epoch=True), FinishAfter(after_n_epochs=nepochs), saveload.Load(load_path), saveload.Checkpoint(last_path, every_n_epochs=10000), ] + track_best('test_cost', save_path) #+ track_best('train_cost', last_path) if learning_rate_decay not in (0, 1): extensions.append( SharedVariableModifier(step_rules[0].learning_rate, lambda n, lr: np.cast[theano.config.floatX] (learning_rate_decay * lr), after_epoch=False, every_n_epochs=lr_decay_every_n_epochs, after_batch=False)) print 'number of parameters in the model: ' + str( T.sum([p.size for p in cg.parameters]).eval()) # Finally build the main loop and train the model main_loop = MainLoop(data_stream=train_stream,
def run(): # Load Model net_size = 256 #Hard-code instead of loading model (takes too long to set up network) #net = vaegan.VAEGAN() #network_saver = saver.NetworkSaver('vaegan/models/', net=net) #network_saver.load() # DATA train_stream = get_stream(hdf5_file, 'train', batch_size) #TODO jonathan ? test_stream = get_stream(hdf5_file, 'test', batch_size) #TODO jonathan ? # MODEL x = T.TensorType('floatX', [False] * 3)('features') y = T.tensor3('targets', dtype='floatX') train_flag = [theano.shared(0)] x = x.swapaxes(0, 1) y = y.swapaxes(0, 1) # More Config out_size = len(output_columns) - 1 # code_mode=RL-MDN latent_size = net_size in_size = latent_size + len(input_columns) # NN fprop y_hat, cost, cells = nn_fprop(x, y, in_size, out_size, hidden_size, num_recurrent_layers, train_flag) # COST cg = ComputationGraph(cost) extra_updates = [] # RMS Prop training optimizer step_rules = [ RMSProp(learning_rate=learning_rate, decay_rate=decay_rate), StepClipping(step_clipping) ] parameters_to_update = cg.parameters algorithm = GradientDescent(cost=cg.outputs[0], parameters=parameters_to_update, step_rule=CompositeRule(step_rules)) algorithm.add_updates( extra_updates) # TODO jonathan what is this, is this needed? # Extensions gradient_norm = aggregation.mean(algorithm.total_gradient_norm) step_norm = aggregation.mean(algorithm.total_step_norm) monitored_vars = [ cost, step_rules[0].learning_rate, gradient_norm, step_norm ] test_monitor = DataStreamMonitoring(variables=[cost], after_epoch=True, before_first_epoch=True, data_stream=test_stream, prefix="test") train_monitor = TrainingDataMonitoring(variables=monitored_vars, after_epoch=True, before_first_epoch=True, prefix='train') set_train_flag = SetTrainFlag(after_epoch=True, before_epoch=True, flag=train_flag) # plot = Plot('Plotting example', channels=[['cost']], after_batch=True, open_browser=True) extensions = [ set_train_flag, test_monitor, train_monitor, Timing(), Printing(after_epoch=True), FinishAfter(after_n_epochs=nepochs), saveload.Load(load_path), saveload.Checkpoint(last_path, every_n_epochs=10000), ] + track_best('test_cost', save_path) #+ track_best('train_cost', last_path) if learning_rate_decay not in (0, 1): extensions.append( SharedVariableModifier(step_rules[0].learning_rate, lambda n, lr: np.cast[theano.config.floatX] (learning_rate_decay * lr), after_epoch=False, every_n_epochs=lr_decay_every_n_epochs, after_batch=False)) print 'number of parameters in the model: ' + str( T.sum([p.size for p in cg.parameters]).eval()) # Finally build the main loop and train the model mainLoop = MainLoop(data_stream=train_stream, algorithm=algorithm, model=Model(cost), extensions=extensions) mainLoop.run()