def load_models(net, model_path=save_path, in_size=len(input_columns), out_size=len(output_columns) - 1 if cost_mode == 'RL-MDN' else len(output_columns), hidden_size=hidden_size, num_recurrent_layers=num_recurrent_layers, model=layer_models[0]): initials = [] if not os.path.isfile(model_path): print 'Could not find model file.' sys.exit(0) print 'Loading model from {0}...'.format(model_path) x = tensor.tensor3('features', dtype=theano.config.floatX) y = tensor.tensor3('targets', dtype='floatX') train_flag = [theano.shared(0)] latent_size = net.get_size() # latent_size in_size = latent_size + len(input_columns) y_hat, cost, cells = nn_fprop(x, y, in_size, out_size, hidden_size, num_recurrent_layers, train_flag) main_loop = MainLoop(algorithm=None, data_stream=None, model=Model(cost), extensions=[saveload.Load(model_path)]) for extension in main_loop.extensions: extension.main_loop = main_loop main_loop._run_extensions('before_training') bin_model = main_loop.model print 'Model loaded. Building prediction function...' hiddens = [] for i in range(num_recurrent_layers): brick = [b for b in bin_model.get_top_bricks() if b.name == layer_models[i] + str(i)][0] hiddens.extend(VariableFilter(theano_name=brick.name + '_apply_states')(bin_model.variables)) hiddens.extend(VariableFilter(theano_name=brick.name + '_apply_cells')(cells)) initials.extend(VariableFilter(roles=[roles.INITIAL_STATE])(brick.parameters)) predict_func = theano.function([x], hiddens + [y_hat]) encoder, code_size = load_encoder(net) return predict_func, initials, encoder, code_size
step_rules = [RMSProp(learning_rate=learning_rate, decay_rate=decay_rate), StepClipping(step_clipping)] algorithm = GradientDescent(cost=cost, parameters=cg.parameters, step_rule=CompositeRule(step_rules)) # Extensions gradient_norm = aggregation.mean(algorithm.total_gradient_norm) step_norm = aggregation.mean(algorithm.total_step_norm) monitored_vars = [cost, gradient_norm, step_norm] dev_monitor = DataStreamMonitoring(variables=[cost], after_epoch=True, before_first_epoch=True, data_stream=dev_stream, prefix="dev") train_monitor = TrainingDataMonitoring(variables=monitored_vars, after_batch=True, before_first_epoch=True, prefix='tra') extensions = [dev_monitor, train_monitor, Timing(), Printing(after_batch=True), FinishAfter(after_n_epochs=nepochs), saveload.Load(load_path), saveload.Checkpoint(last_path), ] + track_best('dev_cost', save_path) if learning_rate_decay not in (0, 1): extensions.append(SharedVariableModifier(step_rules[0].learning_rate, lambda n, lr: numpy.cast[theano.config.floatX](learning_rate_decay * lr), after_epoch=True, after_batch=False)) print('number of parameters in the model: ' + str(tensor.sum([p.size for p in cg.parameters]).eval())) # Finally build the main loop and train the model main_loop = MainLoop(data_stream=train_stream, algorithm=algorithm, model=Model(cost), extensions=extensions) main_loop.run()
[ch for ch in args.primetext if ch in char_to_ix.keys()]) if len(primetext) == 0: raise Exception('primetext characters are not in the vocabulary') x_curr = numpy.expand_dims(numpy.array( [char_to_ix[ch] for ch in primetext], dtype='uint8'), axis=1) print 'Loading model from {0}...'.format(args.model) x = tensor.matrix('features', dtype='uint8') y = tensor.matrix('targets', dtype='uint8') y_hat, cost, cells = nn_fprop(x, y, vocab_size, hidden_size, num_layers, model) main_loop = MainLoop(algorithm=None, data_stream=None, model=Model(cost), extensions=[saveload.Load(args.model)]) for extension in main_loop.extensions: extension.main_loop = main_loop main_loop._run_extensions('before_training') bin_model = main_loop.model activations = [] initial_states = [] for i in range(num_layers): brick = [ b for b in bin_model.get_top_bricks() if b.name == model + str(i) ][0] activations.extend( VariableFilter(theano_name=brick.name + '_apply_states')( bin_model.variables)) activations.extend( VariableFilter(theano_name=brick.name + '_apply_cells')(cells))
y = y.swapaxes(0, 1) in_size = num_features out_size = num_features y_hat, cost, cells = nn_fprop(x, y, in_size, out_size, hidden_size[network_mode], num_layers, layer_models[network_mode][0], 'MDN', training=False) main_loop = MainLoop(algorithm=None, data_stream=None, model=Model(cost), extensions=[saveload.Load(save_path[network_mode])]) for extension in main_loop.extensions: extension.main_loop = main_loop main_loop._run_extensions('before_training') bin_model = main_loop.model print 'Model loaded. Building prediction function...' hiddens = [] initials = [] for i in range(num_layers): brick = [ b for b in bin_model.get_top_bricks() if b.name == layer_models[network_mode][i] + str(i) + '-' ][0] hiddens.extend( VariableFilter(theano_name=brick.name + '_apply_states')(
def load_model(self, load_path): load_pre = saveload.Load(load_path) self.extensions.append(load_pre)
def training(self, fea2obj, batch_size, learning_rate=0.005, steprule='adagrad', wait_epochs=5, kl_weight_init=None, klw_ep=50, klw_inc_rate=0, num_epochs=None): networkfile = self._config['net'] n_epochs = num_epochs or int(self._config['nepochs']) reg_weight = float(self._config['loss_weight']) reg_type = self._config['loss_reg'] numtrain = int( self._config['num_train']) if 'num_train' in self._config else None train_stream, num_samples_train = get_comb_stream( fea2obj, 'train', batch_size, shuffle=True, num_examples=numtrain) dev_stream, num_samples_dev = get_comb_stream(fea2obj, 'dev', batch_size=None, shuffle=False) logger.info('sources: %s -- number of train/dev samples: %d/%d', train_stream.sources, num_samples_train, num_samples_dev) t2idx = fea2obj['targets'].t2idx klw_init = kl_weight_init or float( self._config['kld_weight']) if 'kld_weight' in self._config else 1 logger.info('kl_weight_init: %d', klw_init) kl_weight = shared_floatx(klw_init, 'kl_weight') entropy_weight = shared_floatx(1., 'entropy_weight') cost, p_at_1, _, KLD, logpy_xz, pat1_recog, misclassify_rate = build_model_new( fea2obj, len(t2idx), self._config, kl_weight, entropy_weight) cg = ComputationGraph(cost) weights = VariableFilter(roles=[WEIGHT])(cg.parameters) logger.info('Model weights are: %s', weights) if 'L2' in reg_type: cost += reg_weight * l2_norm(weights) logger.info('applying %s with weight: %f ', reg_type, reg_weight) dropout = -0.1 if dropout > 0: cg = apply_dropout(cg, weights, dropout) cost = cg.outputs[0] cost.name = 'cost' logger.info('Our Algorithm is : %s, and learning_rate: %f', steprule, learning_rate) if 'adagrad' in steprule: cnf_step_rule = AdaGrad(learning_rate) elif 'adadelta' in steprule: cnf_step_rule = AdaDelta(decay_rate=0.95) elif 'decay' in steprule: cnf_step_rule = RMSProp(learning_rate=learning_rate, decay_rate=0.90) cnf_step_rule = CompositeRule([cnf_step_rule, StepClipping(1)]) elif 'momentum' in steprule: cnf_step_rule = Momentum(learning_rate=learning_rate, momentum=0.9) elif 'adam' in steprule: cnf_step_rule = Adam(learning_rate=learning_rate) else: logger.info('The steprule param is wrong! which is: %s', steprule) algorithm = GradientDescent(cost=cost, parameters=cg.parameters, step_rule=cnf_step_rule, on_unused_sources='warn') #algorithm.add_updates(updates) gradient_norm = aggregation.mean(algorithm.total_gradient_norm) step_norm = aggregation.mean(algorithm.total_step_norm) monitored_vars = [ cost, gradient_norm, step_norm, p_at_1, KLD, logpy_xz, kl_weight, pat1_recog ] train_monitor = TrainingDataMonitoring(variables=monitored_vars, after_batch=True, before_first_epoch=True, prefix='tra') dev_monitor = DataStreamMonitoring(variables=[ cost, p_at_1, KLD, logpy_xz, pat1_recog, misclassify_rate ], after_epoch=True, before_first_epoch=True, data_stream=dev_stream, prefix="dev") extensions = [ dev_monitor, train_monitor, Timing(), TrackTheBest('dev_cost'), FinishIfNoImprovementAfter('dev_cost_best_so_far', epochs=wait_epochs), Printing(after_batch=False), #, ProgressBar() FinishAfter(after_n_epochs=n_epochs), saveload.Load(networkfile + '.toload.pkl'), ] + track_best('dev_cost', networkfile + '.best.pkl') #extensions.append(SharedVariableModifier(kl_weight, # lambda n, klw: numpy.cast[theano.config.floatX] (klw_inc_rate + klw), after_epoch=False, every_n_epochs=klw_ep, after_batch=False)) # extensions.append(SharedVariableModifier(entropy_weight, # lambda n, crw: numpy.cast[theano.config.floatX](crw - klw_inc_rate), after_epoch=False, every_n_epochs=klw_ep, after_batch=False)) logger.info('number of parameters in the model: %d', tensor.sum([p.size for p in cg.parameters]).eval()) logger.info('Lookup table sizes: %s', [p.size.eval() for p in cg.parameters if 'lt' in p.name]) main_loop = MainLoop(data_stream=train_stream, algorithm=algorithm, model=Model(cost), extensions=extensions) main_loop.run()
def run(): # Load Model net_size = 256 #Hard-code instead of loading model (takes too long to set up network) #net = vaegan.VAEGAN() #network_saver = saver.NetworkSaver('vaegan/models/', net=net) #network_saver.load() # DATA train_stream = get_stream(hdf5_file, 'train', batch_size) #TODO jonathan ? test_stream = get_stream(hdf5_file, 'test', batch_size) #TODO jonathan ? # MODEL x = T.TensorType('floatX', [False] * 3)('features') y = T.tensor3('targets', dtype='floatX') train_flag = [theano.shared(0)] x = x.swapaxes(0, 1) y = y.swapaxes(0, 1) # More Config out_size = len(output_columns) - 1 # code_mode=RL-MDN latent_size = net_size in_size = latent_size + len(input_columns) # NN fprop y_hat, cost, cells = nn_fprop(x, y, in_size, out_size, hidden_size, num_recurrent_layers, train_flag) # COST cg = ComputationGraph(cost) extra_updates = [] # RMS Prop training optimizer step_rules = [ RMSProp(learning_rate=learning_rate, decay_rate=decay_rate), StepClipping(step_clipping) ] parameters_to_update = cg.parameters algorithm = GradientDescent(cost=cg.outputs[0], parameters=parameters_to_update, step_rule=CompositeRule(step_rules)) algorithm.add_updates( extra_updates) # TODO jonathan what is this, is this needed? # Extensions gradient_norm = aggregation.mean(algorithm.total_gradient_norm) step_norm = aggregation.mean(algorithm.total_step_norm) monitored_vars = [ cost, step_rules[0].learning_rate, gradient_norm, step_norm ] test_monitor = DataStreamMonitoring(variables=[cost], after_epoch=True, before_first_epoch=True, data_stream=test_stream, prefix="test") train_monitor = TrainingDataMonitoring(variables=monitored_vars, after_epoch=True, before_first_epoch=True, prefix='train') set_train_flag = SetTrainFlag(after_epoch=True, before_epoch=True, flag=train_flag) # plot = Plot('Plotting example', channels=[['cost']], after_batch=True, open_browser=True) extensions = [ set_train_flag, test_monitor, train_monitor, Timing(), Printing(after_epoch=True), FinishAfter(after_n_epochs=nepochs), saveload.Load(load_path), saveload.Checkpoint(last_path, every_n_epochs=10000), ] + track_best('test_cost', save_path) #+ track_best('train_cost', last_path) if learning_rate_decay not in (0, 1): extensions.append( SharedVariableModifier(step_rules[0].learning_rate, lambda n, lr: np.cast[theano.config.floatX] (learning_rate_decay * lr), after_epoch=False, every_n_epochs=lr_decay_every_n_epochs, after_batch=False)) print 'number of parameters in the model: ' + str( T.sum([p.size for p in cg.parameters]).eval()) # Finally build the main loop and train the model mainLoop = MainLoop(data_stream=train_stream, algorithm=algorithm, model=Model(cost), extensions=extensions) mainLoop.run()
def load_models( models=hierarchy_models, in_size=len(hierarchy_input_columns[level_number_in_hierarchy]), out_size=len(hierarchy_output_columns[level_number_in_hierarchy]), hidden_size=hidden_size, num_layers=num_layers, model=layer_models[0]): predict_funcs = [] initials = [] for hierarchy_index in range(len(models)): saved_model = models[hierarchy_index] print 'Loading model from {0}...'.format(models[hierarchy_index]) x = tensor.tensor3('features', dtype=theano.config.floatX) y = tensor.tensor3('targets', dtype=theano.config.floatX) y_hat, cost, cells = nn_fprop(x, y, in_size, out_size, hidden_size, num_layers, model, training=False) main_loop = MainLoop(algorithm=None, data_stream=None, model=Model(cost), extensions=[saveload.Load(saved_model)]) for extension in main_loop.extensions: extension.main_loop = main_loop main_loop._run_extensions('before_training') bin_model = main_loop.model print 'Model loaded. Building prediction function...' hiddens = [] initials.append([]) for i in range(num_layers - specialized_layer_num): brick = [ b for b in bin_model.get_top_bricks() if b.name == layer_models[i] + str(i) + '-' + str(-1) ][0] hiddens.extend( VariableFilter(theano_name=brick.name + '_apply_states')( bin_model.variables)) hiddens.extend( VariableFilter(theano_name=brick.name + '_apply_cells')(cells)) initials[hierarchy_index].extend( VariableFilter(roles=[roles.INITIAL_STATE])(brick.parameters)) specialized_count = len(game_tasks) if task_specialized else 0 for task in range(specialized_count): for i in range(num_layers - specialized_layer_num, num_layers): brick = [ b for b in bin_model.get_top_bricks() if b.name == layer_models[i] + str(i) + '-' + str(task) ][0] hiddens.extend( VariableFilter(theano_name=brick.name + '_apply_states')( bin_model.variables)) hiddens.extend( VariableFilter(theano_name=brick.name + '_apply_cells')(cells)) initials[hierarchy_index].extend( VariableFilter(roles=[roles.INITIAL_STATE])( brick.parameters)) output_count = len(game_tasks) if task_specialized else 1 predict_funcs.append([]) for task in range(output_count): predict_funcs[hierarchy_index].append( theano.function([x], hiddens + [y_hat[task]])) return predict_funcs, initials