def load_models(model_path=save_path, in_size=len(input_columns), out_size=len(output_columns) - 1 if cost_mode == 'RL-MDN' else len(output_columns), hidden_size=hidden_size, num_recurrent_layers=num_recurrent_layers, model=layer_models[0]): initials = [] if not os.path.isfile(model_path): print 'Could not find model file.' sys.exit(0) print 'Loading model from {0}...'.format(model_path) x = tensor.tensor3('features', dtype=theano.config.floatX) y = tensor.tensor3('targets', dtype='floatX') train_flag = [theano.shared(0)] _, latent_size = load_encoder() in_size = latent_size + len(input_columns) y_hat, cost, cells = nn_fprop(x, y, in_size, out_size, hidden_size, num_recurrent_layers, train_flag) main_loop = MainLoop(algorithm=None, data_stream=None, model=Model(cost), extensions=[saveload.Load(model_path)]) for extension in main_loop.extensions: extension.main_loop = main_loop main_loop._run_extensions('before_training') bin_model = main_loop.model print 'Model loaded. Building prediction function...' hiddens = [] for i in range(num_recurrent_layers): brick = [ b for b in bin_model.get_top_bricks() if b.name == layer_models[i] + str(i) ][0] hiddens.extend( VariableFilter(theano_name=brick.name + '_apply_states')( bin_model.variables)) hiddens.extend( VariableFilter(theano_name=brick.name + '_apply_cells')(cells)) initials.extend( VariableFilter(roles=[roles.INITIAL_STATE])(brick.parameters)) predict_func = theano.function([x], hiddens + [y_hat]) encoder, code_size = load_encoder() return predict_func, initials, encoder, code_size
x_curr = numpy.expand_dims(numpy.array( [char_to_ix[ch] for ch in primetext], dtype='uint8'), axis=1) print 'Loading model from {0}...'.format(args.model) x = tensor.matrix('features', dtype='uint8') y = tensor.matrix('targets', dtype='uint8') y_hat, cost, cells = nn_fprop(x, y, vocab_size, hidden_size, num_layers, model) main_loop = MainLoop(algorithm=None, data_stream=None, model=Model(cost), extensions=[saveload.Load(args.model)]) for extension in main_loop.extensions: extension.main_loop = main_loop main_loop._run_extensions('before_training') bin_model = main_loop.model activations = [] initial_states = [] for i in range(num_layers): brick = [ b for b in bin_model.get_top_bricks() if b.name == model + str(i) ][0] activations.extend( VariableFilter(theano_name=brick.name + '_apply_states')( bin_model.variables)) activations.extend( VariableFilter(theano_name=brick.name + '_apply_cells')(cells)) initial_states.extend( VariableFilter(roles=[roles.INITIAL_STATE])(brick.parameters))
def load_models( models=hierarchy_models, in_size=len(hierarchy_input_columns[level_number_in_hierarchy]), out_size=len(hierarchy_output_columns[level_number_in_hierarchy]), hidden_size=hidden_size, num_layers=num_layers, model=layer_models[0]): predict_funcs = [] initials = [] for hierarchy_index in range(len(models)): saved_model = models[hierarchy_index] print 'Loading model from {0}...'.format(models[hierarchy_index]) x = tensor.tensor3('features', dtype=theano.config.floatX) y = tensor.tensor3('targets', dtype=theano.config.floatX) y_hat, cost, cells = nn_fprop(x, y, in_size, out_size, hidden_size, num_layers, model, training=False) main_loop = MainLoop(algorithm=None, data_stream=None, model=Model(cost), extensions=[saveload.Load(saved_model)]) for extension in main_loop.extensions: extension.main_loop = main_loop main_loop._run_extensions('before_training') bin_model = main_loop.model print 'Model loaded. Building prediction function...' hiddens = [] initials.append([]) for i in range(num_layers - specialized_layer_num): brick = [ b for b in bin_model.get_top_bricks() if b.name == layer_models[i] + str(i) + '-' + str(-1) ][0] hiddens.extend( VariableFilter(theano_name=brick.name + '_apply_states')( bin_model.variables)) hiddens.extend( VariableFilter(theano_name=brick.name + '_apply_cells')(cells)) initials[hierarchy_index].extend( VariableFilter(roles=[roles.INITIAL_STATE])(brick.parameters)) specialized_count = len(game_tasks) if task_specialized else 0 for task in range(specialized_count): for i in range(num_layers - specialized_layer_num, num_layers): brick = [ b for b in bin_model.get_top_bricks() if b.name == layer_models[i] + str(i) + '-' + str(task) ][0] hiddens.extend( VariableFilter(theano_name=brick.name + '_apply_states')( bin_model.variables)) hiddens.extend( VariableFilter(theano_name=brick.name + '_apply_cells')(cells)) initials[hierarchy_index].extend( VariableFilter(roles=[roles.INITIAL_STATE])( brick.parameters)) output_count = len(game_tasks) if task_specialized else 1 predict_funcs.append([]) for task in range(output_count): predict_funcs[hierarchy_index].append( theano.function([x], hiddens + [y_hat[task]])) return predict_funcs, initials
args.primetext = ix_to_char[numpy.random.randint(vocab_size)] primetext = ''.join([ch for ch in args.primetext if ch in char_to_ix.keys()]) if len(primetext) == 0: raise Exception('primetext characters are not in the vocabulary') x_curr = numpy.expand_dims( numpy.array([char_to_ix[ch] for ch in primetext], dtype='uint8'), axis=1) print 'Loading model from {0}...'.format(args.model) x = tensor.matrix('features', dtype='uint8') y = tensor.matrix('targets', dtype='uint8') y_hat, cost, cells = nn_fprop(x, y, vocab_size, hidden_size, num_layers, model) main_loop = MainLoop(algorithm=None, data_stream=None, model=Model(cost), extensions=[saveload.Load(args.model)]) for extension in main_loop.extensions: extension.main_loop = main_loop main_loop._run_extensions('before_training') bin_model = main_loop.model activations = [] initial_states = [] for i in range(num_layers): brick = [b for b in bin_model.get_top_bricks() if b.name==model+str(i)][0] activations.extend(VariableFilter(theano_name=brick.name+'_apply_states')(bin_model.variables)) activations.extend(VariableFilter(theano_name=brick.name+'_apply_cells')(cells)) initial_states.extend(VariableFilter(roles=[roles.INITIAL_STATE])(brick.parameters)) #take activations of last element activations = [act[-1].flatten() for act in activations] states_as_params = [tensor.vector(dtype=initial.dtype) for initial in initial_states] zip(initial_states, states_as_params) #Get prob. distribution of the last element in the last seq of the batch fprop = theano.function([x] + states_as_params, activations + [y_hat[-1, -1, :]], givens=zip(initial_states, states_as_params))