class TrainModel(): def __init__(self, param): np.random.seed(1) tf.set_random_seed(1) self.param = param if 'normalize_wrt_num_args' not in self.param: self.param['normalize_wrt_num_args'] = False if 'dont_look_back_attention' not in self.param: self.param['dont_look_back_attention'] = False if 'concat_query_npistate' not in self.param: self.param['concat_query_npistate'] = False if 'query_attention' not in self.param: self.param['query_attention'] = False if self.param['dont_look_back_attention']: self.param['query_attention'] = True if 'single_reward_function' not in self.param: self.param['single_reward_function'] = False if 'terminate_prog' not in self.param: self.param['terminate_prog'] = False terminate_prog = False else: terminate_prog = self.param['terminate_prog'] if 'train_mode' not in self.param: self.param['train_mode'] = 'reinforce' self.qtype_wise_batching = self.param['questype_wise_batching'] self.read_data = ReadBatchData(param) print "initialized read data" if 'quantitative' in self.param[ 'question_type'] or 'comparative' in self.param[ 'question_type']: if 'relaxed_reward_till_epoch' in self.param: relaxed_reward_till_epoch = self.param[ 'relaxed_reward_till_epoch'] else: self.param['relaxed_reward_till_epoch'] = [-1, -1] relaxed_reward_till_epoch = [-1, -1] else: self.param['relaxed_reward_till_epoch'] = [-1, -1] relaxed_reward_till_epoch = [-1, -1] if 'params_turn_on_after' not in self.param: self.param['params_turn_on_after'] = 'epoch' if self.param['params_turn_on_after'] != 'epoch' and self.param[ 'params_turn_on_after'] != 'batch': raise Exception('params_turn_on_after should be epoch or batch') if 'print' in self.param: self.printing = self.param['print'] else: self.param['print'] = False self.printing = True if 'prune_beam_type_mismatch' not in self.param: self.param['prune_beam_type_mismatch'] = 0 if 'prune_after_epoch_no.' not in self.param: self.param['prune_after_epoch_no.'] = [ self.param['max_epochs'], 1000000 ] if self.param['question_type'] == 'verify': boolean_reward_multiplier = 1 else: boolean_reward_multiplier = 0.1 if 'none_decay' not in self.param: self.param['none_decay'] = 0 if 'print_test_freq' not in self.param: self.param['print_test_freq'] = self.param['print_train_freq'] if 'unused_var_penalize_after_epoch' not in self.param: self.param['unused_var_penalize_after_epoch'] = [ self.param['max_epochs'], 1000000 ] unused_var_penalize_after_epoch = self.param[ 'unused_var_penalize_after_epoch'] if 'epoch_for_feasible_program_at_last_step' not in self.param: self.param['epoch_for_feasible_program_at_last_step'] = [ self.param['max_epochs'], 1000000 ] if 'epoch_for_biasing_program_sample_with_target' not in self.param: self.param['epoch_for_biasing_program_sample_with_target'] = [ self.param['max_epochs'], 1000000 ] if 'epoch_for_biasing_program_sample_with_last_variable' not in self.param: self.param[ 'epoch_for_biasing_program_sample_with_last_variable'] = [ self.param['max_epochs'], 100000 ] if 'use_var_key_as_onehot' not in self.param: self.param['use_var_key_as_onehot'] = False if 'reward_function' not in self.param: reward_func = "jaccard" self.param['reward_function'] = "jaccard" else: reward_func = self.param['reward_function'] if 'relaxed_reward_strict' not in self.param: relaxed_reward_strict = False self.param['relaxed_reward_strict'] = relaxed_reward_strict else: relaxed_reward_strict = self.param['relaxed_reward_strict'] if param['parallel'] == 1: raise Exception( 'Need to fix the intermediate rewards for parallelly executing interpreter' ) for k, v in param.items(): print 'PARAM: ', k, ':: ', v print 'loaded params ' self.train_data = [] if os.path.isdir(param['train_data_file']): self.training_files = [ param['train_data_file'] + '/' + x for x in os.listdir(param['train_data_file']) if x.endswith('.pkl') ] elif not isinstance(param['train_data_file'], list): self.training_files = [param['train_data_file']] else: self.training_files = param['train_data_file'] random.shuffle(self.training_files) sys.stdout.flush() self.test_data = [] if os.path.isdir(param['test_data_file']): self.test_files = [ param['test_data_file'] + '/' + x for x in os.listdir(param['test_data_file']) if x.endswith('.pkl') ] elif not isinstance(param['test_data_file'], list): self.test_files = [param['test_data_file']] else: self.test_files = param['test_data_file'] for file in self.test_files: self.test_data.extend(pkl.load(open(file))) if self.qtype_wise_batching: self.test_data_map = self.read_data.get_data_per_questype( self.test_data) self.test_batch_size_types = self.get_batch_size_per_type( self.test_data_map) self.n_test_batches = int( math.ceil( float(sum([len(x) for x in self.test_data_map.values()]))) / float(self.param['batch_size'])) else: self.n_test_batches = int( math.ceil( float(len(self.test_data)) / float(self.param['batch_size']))) if not os.path.exists(param['model_dir']): os.mkdir(param['model_dir']) self.model_file = os.path.join(param['model_dir'], param['model_file']) with tf.Graph().as_default(): start = time.time() self.model = NPI(param, self.read_data.none_argtype_index, self.read_data.num_argtypes, \ self.read_data.num_progs, self.read_data.max_arguments, \ self.read_data.rel_index, self.read_data.type_index, \ self.read_data.wikidata_rel_embed, self.read_data.wikidata_type_embed, \ self.read_data.vocab_init_embed, self.read_data.program_to_argtype, \ self.read_data.program_to_targettype) self.model.create_placeholder() [self.action_sequence, self.program_probs, self.logProgramProb, self.Reward_placeholder, self.Relaxed_rewards_placeholder, \ self.train_op, self.loss, self.beam_props, self.per_step_probs, self.IfPosIntermediateReward, \ self.mask_IntermediateReward, self.IntermediateReward] = self.model.reinforce() #self.program_keys, self.program_embedding, self.word_embeddings, self.argtype_embedding, self.query_attention_h_mat = self.model.get_parameters() if param['Debug'] == 0: config = tf.ConfigProto() config.gpu_options.allow_growth = True self.sess = tf.Session(config=config) self.sess = tf.Session() else: self.sess = tf_debug.LocalCLIDebugWrapperSession(tf.Session()) self.saver = tf.train.Saver() ckpt = tf.train.get_checkpoint_state(param['model_dir']) if ckpt and ckpt.model_checkpoint_path: print "best model exists in ", self.model_file, "... restoring from there " self.saver = tf.train.Saver() self.saver.restore(self.sess, ckpt.model_checkpoint_path) print 'restored model' else: init = tf.global_variables_initializer() self.sess.run(init) print 'initialized model' end = time.time() print 'model created in ', (end - start), 'seconds' sys.stdout.flush() self.interpreter = Interpreter(self.param['wikidata_dir'], self.param['num_timesteps'], \ self.read_data.program_type_vocab, self.read_data.argument_type_vocab, self.printing, terminate_prog, relaxed_reward_strict, reward_function = reward_func, boolean_reward_multiplier = boolean_reward_multiplier, relaxed_reward_till_epoch=relaxed_reward_till_epoch, unused_var_penalize_after_epoch=unused_var_penalize_after_epoch) if self.param['parallel'] == 1: self.InterpreterProxy, self.InterpreterProxyListener = proxy.createProxy( self.interpreter) self.interpreter.parallel = 1 self.lock = Lock() print "initialized interpreter" def perform_full_test(self, epoch, overall_step_count): test_reward = 0 test_reward_at0 = 0 for i in xrange(self.starting_batch, self.n_test_batches): train_batch_dict = self.get_batch(i, self.test_data, self.test_data_map, self.test_batch_size_types) avg_batch_reward_at0, avg_batch_reward, _ = self.perform_test( train_batch_dict, epoch, overall_step_count) if i % self.param['print_test_freq'] == 0 and i > 0: test_reward += avg_batch_reward test_reward_at0 += avg_batch_reward_at0 avg_test_reward = float(test_reward) / float(i + 1) avg_test_reward_at0 = float(test_reward_at0) / float(i + 1) print( 'Valid Results in Epoch %d Step %d (avg over batch) test reward (over all) =%.6f test reward (at top beam)=%.6f running avg test reward (over all)=%.6f running avg test reward (at top beam)=%.6f' % (epoch, i, avg_batch_reward, avg_batch_reward_at0, avg_test_reward, avg_test_reward_at0)) sys.stdout.flush() overall_avg_test_reward = test_reward / float(self.n_test_batches) overall_avg_test_reward_at0 = test_reward_at0 / float( self.n_test_batches) return overall_avg_test_reward_at0, overall_avg_test_reward def feeding_dict1(self, encoder_inputs_w2v, encoder_inputs_kb_emb, variable_mask, \ variable_embed, variable_atten, kb_attention, batch_response_type, \ batch_required_argtypes, feasible_program_at_last_step, bias_prog_sampling_with_target,\ bias_prog_sampling_with_last_variable, epoch_inv, epsilon, PruneNow): feed_dict = {} for model_enc_inputs_w2v, enc_inputs_w2v in zip( self.model.encoder_text_inputs_w2v, encoder_inputs_w2v): feed_dict[model_enc_inputs_w2v] = enc_inputs_w2v feed_dict[ self.model.encoder_text_inputs_kb_emb] = encoder_inputs_kb_emb #print 'preprocessed variable mask for None (',variable_mask.shape, ')', variable_mask[0] for i in xrange(variable_mask.shape[0]): for j in xrange(variable_mask.shape[1]): feed_dict[self.model.preprocessed_var_mask_table[i] [j]] = variable_mask[i][j] for i in xrange(variable_embed.shape[0]): for j in xrange(variable_embed.shape[1]): feed_dict[self.model.preprocessed_var_emb_table[i] [j]] = variable_embed[i][j] feed_dict[self.model.kb_attention] = kb_attention # in phase 1 we should sample only generative programs temp = np.zeros([self.param['batch_size'], self.read_data.num_progs], dtype=np.int32) for i in self.read_data.program_variable_declaration_phase: temp[:, i] = 1 feed_dict[self.model.progs_phase_1] = temp # in phase 2 we should not sample generative programs and we can sample all other programs temp = np.zeros([self.param['batch_size'], self.read_data.num_progs], dtype=np.int32) for i in self.read_data.program_algorithm_phase: temp[:, i] = 1 feed_dict[self.model.progs_phase_2] = temp feed_dict[self.model.gold_target_type] = batch_response_type feed_dict[self.model.required_argtypes] = batch_required_argtypes feed_dict[self.model.randomness_threshold_beam_search] = epsilon feed_dict[self.model.DoPruning] = PruneNow feed_dict[self.model.relaxed_reward_multipler] = epoch_inv * np.ones( (1, 1), dtype=np.float32) feed_dict[ self.model. last_step_feasible_program] = feasible_program_at_last_step * np.ones( (1, 1)) feed_dict[ self.model. bias_prog_sampling_with_target] = bias_prog_sampling_with_target * np.ones( (1, 1)) feed_dict[ self.model. bias_prog_sampling_with_last_variable] = bias_prog_sampling_with_last_variable * np.ones( (1, 1)) return feed_dict def map_multiply(self, arg): orig_shape = arg[0].shape arg0 = np.reshape( arg[0], (self.param['batch_size'] * self.param['beam_size'], -1)) arg1 = np.reshape( arg[1], (self.param['batch_size'] * self.param['beam_size'], 1)) mul = np.reshape(np.multiply(arg0, arg1), orig_shape) return np.sum(mul, axis=(0, 1)) # Disable def blockPrint(self): sys.stdout = open(os.devnull, 'w') # Restore def enablePrint(self): sys.stdout = sys.__stdout__ def parallel_forward_pass_interpreter(self, batch_orig_context, a_seq, per_step_probs, \ program_probabilities, variable_value_table, batch_response_entities, \ batch_response_ints, batch_response_bools): Reward_From_Model = np.transpose(np.array(a_seq['Model_Reward_Flag'])) keys = [ 'program_type', 'argument_type', 'target_type', 'target_table_index', 'argument_table_index', 'variable_value_table' ] old_seq = dict.fromkeys([ 'program_type', 'argument_type', 'target_type', 'target_table_index', 'argument_table_index' ]) for key in old_seq: old_seq[key] = np.array(a_seq[key]).tolist() new_a_seq = [[dict.fromkeys(keys) for beam_id in xrange(self.param['beam_size'])] \ for batch_id in xrange(self.param['batch_size'])] def asine(batch_id, beam_id, key): new_a_seq[batch_id][beam_id][key] = [ 'phi' for _ in xrange(self.param['num_timesteps']) ] [[[asine(batch_id,beam_id,key) for key in keys] for beam_id in xrange(self.param['beam_size'])] \ for batch_id in xrange(self.param['batch_size'])] def handle_variable_value_table(key, beam_id, timestep, batch_id): if key is not 'variable_value_table': new_a_seq[batch_id][beam_id][key][timestep] = old_seq[key][ beam_id][timestep][batch_id] else: new_a_seq[batch_id][beam_id][key] = variable_value_table[ batch_id].tolist() [handle_variable_value_table(key,beam_id,timestep,batch_id) for (key,beam_id,timestep,batch_id) in list(itertools.product\ (keys,xrange(self.param['beam_size']),xrange(self.param['num_timesteps']),xrange(self.param['batch_size'])))] def calculate_program_reward(shared_object, arg_f): shared_object.calculate_program_reward(arg_f) def parallel_fetch_interpreter(l, f, arg_f, shared_object): l.acquire() f(shared_object, arg_f) l.release() self.interpreter.rewards = [[None for beam_id in xrange(self.param['beam_size'])] \ for batch_id in xrange(self.param['batch_size'])] self.blockPrint() jobs = [] for batch_id in xrange(self.param['batch_size']): for beam_id in xrange(self.param['beam_size']): args = (new_a_seq[batch_id][beam_id], \ batch_response_entities[batch_id], \ batch_response_ints[batch_id], \ batch_response_bools[batch_id], beam_id,batch_id) arg_f = msgpack.packb(args, use_bin_type=True) p = Process(target=parallel_fetch_interpreter, args=(self.lock, calculate_program_reward, arg_f, \ self.InterpreterProxy)) jobs.append(p) p.start() self.InterpreterProxyListener.listen() while True in set([job.is_alive() for job in jobs]): self.InterpreterProxyListener.listen() [job.join() for job in jobs if job.is_alive()] self.enablePrint() for batch_id in xrange(self.param['batch_size']): if self.printing: print 'batch id ', batch_id, ':: Query :: ', batch_orig_context[ batch_id] for beam_id in xrange(self.param['beam_size']): if self.printing: print 'beam id', beam_id print 'per_step_probs', per_step_probs[batch_id, beam_id] print 'product_per_step_prob', np.product( per_step_probs[batch_id, beam_id]) print 'per_step_programs [', new_a_seq_i = new_a_seq[batch_id][beam_id] for timestep in range(len(new_a_seq_i['program_type'])): prog = new_a_seq_i['program_type'][timestep] args = new_a_seq_i['argument_table_index'][timestep] if self.printing: print self.read_data.program_type_vocab_inv[prog]+'( '+','.join([str(\ self.read_data.argument_type_vocab_inv[self.read_data.program_to_argtype[prog][arg]])+\ '('+str(args[arg])+')' for arg in range(len(args))])+' )', if self.printing: print ']' rewards = np.array(self.interpreter.rewards) if self.param['reward_from_model']: rewards = np.where(Reward_From_Model == 0, rewards, -1 * np.ones_like(rewards)) return rewards def forward_pass_interpreter(self, batch_orig_context, a_seq, per_step_probs, \ program_probabilities, variable_value_table, batch_response_entities, \ batch_response_ints, batch_response_bools, epoch_number, overall_step_count): Reward_From_Model = np.transpose(np.array(a_seq['Model_Reward_Flag'])) keys = [ 'program_type', 'argument_type', 'target_type', 'target_table_index', 'argument_table_index', 'variable_value_table' ] new_a_seq = [[dict.fromkeys(keys) for beam_id in xrange(self.param['beam_size'])] \ for batch_id in xrange(self.param['batch_size'])] def asine(batch_id, beam_id, key): new_a_seq[batch_id][beam_id][key] = [ 'phi' for _ in xrange(self.param['num_timesteps']) ] [[[asine(batch_id,beam_id,key) for key in keys] for beam_id in xrange(self.param['beam_size'])] \ for batch_id in xrange(self.param['batch_size'])] def handle_variable_value_table(key, beam_id, timestep, batch_id): if key is not 'variable_value_table': new_a_seq[batch_id][beam_id][key][timestep] = a_seq[key][ beam_id][timestep][batch_id] else: new_a_seq[batch_id][beam_id][key] = variable_value_table[ batch_id] [handle_variable_value_table(key,beam_id,timestep,batch_id) for (key,beam_id,timestep,batch_id) in list(itertools.product\ (keys,xrange(self.param['beam_size']),xrange(self.param['num_timesteps']),xrange(self.param['batch_size'])))] for batch_id in xrange(self.param['batch_size']): for beam_id in xrange(self.param['beam_size']): new_a_seq[batch_id][beam_id][ 'program_probability'] = program_probabilities[batch_id][ beam_id] rewards = [] intermediate_rewards_flag = [] mask_intermediate_rewards = [] intermediate_rewards = [] relaxed_rewards = [] for batch_id in xrange(self.param['batch_size']): if self.printing: print 'batch id ', batch_id, ':: Query :: ', batch_orig_context[ batch_id] rewards_batch = [] intermediate_rewards_flag_batch = [] relaxed_rewards_batch = [] mask_intermediate_rewards_batch = [] intermediate_rewards_batch = [] for beam_id in xrange(self.param['beam_size']): if self.printing: print 'beam id', beam_id print 'per_step_probs', per_step_probs[batch_id, beam_id] print 'product_per_step_prob', np.product( per_step_probs[batch_id, beam_id]) print 'per_step_programs [', new_a_seq_i = new_a_seq[batch_id][beam_id] for timestep in range(len(new_a_seq_i['program_type'])): prog = new_a_seq_i['program_type'][timestep] args = new_a_seq_i['argument_table_index'][timestep] if self.printing: print self.read_data.program_type_vocab_inv[prog]+'( '+','.join([str(\ self.read_data.argument_type_vocab_inv[self.read_data.program_to_argtype[prog][arg]])+\ '('+str(args[arg])+')' for arg in range(len(args))])+' )', if self.printing: print ']' args = (new_a_seq[batch_id][beam_id], \ batch_response_entities[batch_id], \ batch_response_ints[batch_id], \ batch_response_bools[batch_id]) reward, max_intermediate_reward, relaxed_reward, intermediate_mask, intermediate_reward_flag = self.interpreter.calculate_program_reward( args, epoch_number, overall_step_count) rewards_batch.append(reward) intermediate_rewards_flag_batch.append( intermediate_reward_flag) relaxed_rewards_batch.append(relaxed_reward) mask_intermediate_rewards_batch.append(intermediate_mask) intermediate_rewards_batch.append(max_intermediate_reward) #print 'per_step_programs', [self.read_data.program_type_vocab_inv[x] for x in new_a_seq[batch_id][beam_id]['program_type']] rewards.append(rewards_batch) intermediate_rewards_flag.append(intermediate_rewards_flag_batch) mask_intermediate_rewards.append(mask_intermediate_rewards_batch) intermediate_rewards.append(intermediate_rewards_batch) relaxed_rewards.append(relaxed_rewards_batch) rewards = np.array(rewards) if self.param['reward_from_model']: rewards = np.where(Reward_From_Model == 0, rewards, -1 * np.ones_like(rewards)) intermediate_rewards = np.array(intermediate_rewards) intermediate_rewards_flag = np.array(intermediate_rewards_flag) mask_intermediate_rewards = np.array(mask_intermediate_rewards) relaxed_rewards = np.array(relaxed_rewards) return rewards, intermediate_rewards, relaxed_rewards, intermediate_rewards_flag, mask_intermediate_rewards def get_ml_rewards(self, rewards): ml_rewards = np.zeros( (self.param['batch_size'], self.param['beam_size'])) for i in xrange(self.param['batch_size']): max_reward = -100.0 max_index = -1 for j in xrange(self.param['beam_size']): if rewards[i][j] > max_reward: max_reward = rewards[i][j] max_index = j if max_index != -1 and max_reward > 0: ml_rewards[i][max_index] = 1.0 if max_index != -1 and max_reward < 0: ml_rewards[i][max_index] = -1.0 return ml_rewards def get_data_and_feed_dict(self, batch_dict, epoch, overall_step_count): batch_orig_context, batch_context_nonkb_words, batch_context_kb_words, \ batch_context_entities, batch_context_types, batch_context_rel, batch_context_ints, \ batch_orig_response, batch_response_entities, batch_response_ints, batch_response_bools, batch_response_type, batch_required_argtypes, \ variable_mask, variable_embed, variable_atten, kb_attention, variable_value_table = self.read_data.get_batch_data(batch_dict) if (self.param['params_turn_on_after'] == 'epoch' and epoch >= self.param['epoch_for_feasible_program_at_last_step'][0]) or ( self.param['params_turn_on_after'] == 'batch' and overall_step_count >= self.param['epoch_for_feasible_program_at_last_step'][1]): feasible_program_at_last_step = 1. print 'Using feasible_program_at_last_step' else: feasible_program_at_last_step = 0. if (self.param['params_turn_on_after'] == 'epoch' and epoch >= self.param['epoch_for_biasing_program_sample_with_target'][0] ) or ( self.param['params_turn_on_after'] == 'batch' and overall_step_count >= self.param['epoch_for_biasing_program_sample_with_target'][1]): print 'Using program biasing with target' bias_prog_sampling_with_target = 1. else: bias_prog_sampling_with_target = 0. if ( self.param['params_turn_on_after'] == 'epoch' and epoch >= self. param['epoch_for_biasing_program_sample_with_last_variable'][0] ) or (self.param['params_turn_on_after'] == 'batch' and overall_step_count >= self. param['epoch_for_biasing_program_sample_with_last_variable'][1]): bias_prog_sampling_with_last_variable = 1. else: bias_prog_sampling_with_last_variable = 0. if (self.param['params_turn_on_after'] == 'epoch' and epoch >= self.param['relaxed_reward_till_epoch'][0]) or ( self.param['params_turn_on_after'] == 'batch' and overall_step_count >= self.param['relaxed_reward_till_epoch'][1]): relaxed_reward_multipler = 0. else: if self.param['params_turn_on_after'] == 'epoch': relaxed_reward_multipler = ( self.param['relaxed_reward_till_epoch'][0] - epoch) / float(self.param['relaxed_reward_till_epoch'][0]) relaxed_reward_multipler = np.clip(relaxed_reward_multipler, 0, 1) elif self.param['params_turn_on_after'] == 'batch': relaxed_reward_multipler = ( self.param['relaxed_reward_till_epoch'][1] - overall_step_count) / float( self.param['relaxed_reward_till_epoch'][1]) relaxed_reward_multipler = np.clip(relaxed_reward_multipler, 0, 1) epsilon = 0 if self.param['params_turn_on_after'] == 'epoch' and self.param[ 'explore'][0] > 0: epsilon = self.param["initial_epsilon"] * np.clip( 1.0 - (epoch / self.param['explore'][0]), 0, 1) elif self.param['params_turn_on_after'] == 'batch' and self.param[ 'explore'][1] > 0: epsilon = self.param["initial_epsilon"] * np.clip( 1.0 - (overall_step_count / self.param['explore'][1]), 0, 1) PruneNow = 0 if (self.param['params_turn_on_after'] == 'epoch' and epoch >= self.param['prune_after_epoch_no.'][0]) or ( self.param['params_turn_on_after'] == 'batch' and overall_step_count >= self.param['prune_after_epoch_no.'][1]): PruneNow = 1 feed_dict1 = self.feeding_dict1(batch_context_nonkb_words, batch_context_kb_words, variable_mask, \ variable_embed, variable_atten, kb_attention, batch_response_type, batch_required_argtypes,\ feasible_program_at_last_step, bias_prog_sampling_with_target, bias_prog_sampling_with_last_variable,\ relaxed_reward_multipler, epsilon, PruneNow) return feed_dict1, batch_orig_context, batch_response_entities, batch_response_ints, batch_response_bools, variable_value_table def perform_test(self, batch_dict, epoch, overall_step_count): feed_dict1, batch_orig_context, batch_response_entities, batch_response_ints, batch_response_bools, variable_value_table = self.get_data_and_feed_dict( batch_dict, epoch, overall_step_count) a_seq, program_probabilities, per_step_probs = self.sess.run( [self.action_sequence, self.program_probs, self.per_step_probs], feed_dict=feed_dict1) # reshaping per_step_probs for printability per_step_probs = np.array(per_step_probs) [reward, intermediate_rewards, relaxed_rewards, intermediate_rewards_flag, \ mask_intermediate_rewards] = self.forward_pass_interpreter(batch_orig_context, a_seq, per_step_probs, \ program_probabilities, variable_value_table, batch_response_entities, \ batch_response_ints, batch_response_bools, epoch, overall_step_count) reward = np.array(reward) relaxed_rewards = np.array(relaxed_rewards) reward[reward < 0.] = 0. self.print_reward(reward) return sum(reward[:, 0]) / float(self.param['batch_size']), sum( np.max(reward, axis=1)) / float(self.param['batch_size']), 0 def perform_training(self, batch_dict, epoch, overall_step_count): feed_dict1, batch_orig_context, batch_response_entities, batch_response_ints, batch_response_bools, variable_value_table = self.get_data_and_feed_dict( batch_dict, epoch, overall_step_count) # ============================================================================= # For Proper Run Use this # ============================================================================= if self.param['Debug'] == 0: partial_run_op = self.sess.partial_run_setup([self.action_sequence, self.program_probs, self.logProgramProb, \ self.train_op, self.loss, self.beam_props, self.per_step_probs], \ feed_dict1.keys()+[self.Reward_placeholder, self.Relaxed_rewards_placeholder, \ self.IfPosIntermediateReward, \ self.mask_IntermediateReward, \ self.IntermediateReward]) a_seq, program_probabilities, logprogram_probabilities, \ beam_props, per_step_probs = self.sess.partial_run(partial_run_op, \ [self.action_sequence, self.program_probs, self.logProgramProb, \ self.beam_props, self.per_step_probs], feed_dict=feed_dict1) # reshaping per_step_probs for printability per_step_probs = np.array(per_step_probs) if self.param['parallel'] is not 1: [reward, intermediate_rewards, relaxed_rewards, intermediate_rewards_flag, \ mask_intermediate_rewards] = self.forward_pass_interpreter(batch_orig_context, a_seq, per_step_probs, \ program_probabilities, variable_value_table, batch_response_entities, \ batch_response_ints, batch_response_bools, epoch, overall_step_count) else: reward = self.parallel_forward_pass_interpreter(batch_orig_context, a_seq, per_step_probs, program_probabilities, \ variable_value_table, batch_response_entities, batch_response_ints, \ batch_response_bools) reward = np.array(reward) relaxed_rewards = np.array(relaxed_rewards) if self.param['train_mode'] == 'ml': reward = self.get_ml_rewards(reward) rescaling_term_grad = reward else: rescaling_term_grad = reward a,loss = self.sess.partial_run(partial_run_op, [self.train_op, self.loss], \ feed_dict = {self.Reward_placeholder:rescaling_term_grad, \ self.Relaxed_rewards_placeholder:relaxed_rewards, \ self.IfPosIntermediateReward:intermediate_rewards_flag,\ self.mask_IntermediateReward:mask_intermediate_rewards, \ self.IntermediateReward:intermediate_rewards}) # ----------------------------------------------------------------------------- # ============================================================================= # For Debugging Use This # ============================================================================= else: [a_seq, program_probabilities, logprogram_probabilities, \ beam_props, per_step_probs] = self.sess.run([self.action_sequence, self.program_probs, self.logProgramProb, \ self.beam_props, self.per_step_probs], feed_dict=feed_dict1) per_step_probs = np.array(per_step_probs) reward = np.zeros( [self.param['batch_size'], self.param['beam_size']]) loss = 0 # ----------------------------------------------------------------------------- reward[reward < 0.] = 0. self.print_reward(reward) return sum(reward[:, 0]) / float(self.param['batch_size']), sum( np.max(reward, axis=1)) / float( self.param['batch_size']), loss / float( self.param['batch_size']) def print_reward(self, reward): batch_size = len(reward) beam_size = len(reward[0]) best_reward_till_beam = {i: 0.0 for i in xrange(beam_size)} avg_reward_at_beam = {i: 0.0 for i in xrange(beam_size)} for batch_id in xrange(batch_size): for beam_id in xrange(beam_size): best_reward_till_beam[beam_id] += float( max(reward[batch_id][:(beam_id + 1)])) avg_reward_at_beam[beam_id] += float(reward[batch_id][beam_id]) best_reward_till_beam = { k: v / float(batch_size) for k, v in best_reward_till_beam.items() } avg_reward_at_beam = { k: v / float(batch_size) for k, v in avg_reward_at_beam.items() } for k in xrange(beam_size): print 'for beam ', k, ' best reward till this beam', best_reward_till_beam[ k], ' (avg reward at this beam =', avg_reward_at_beam[k], ')' def remove_bad_data(self, data): for index, d in enumerate(data[:]): utter = d[0].lower() utter_yes_no_removed = utter.replace('yes', '').replace('no', '') utter_yes_no_removed = re.sub(' +', ' ', utter_yes_no_removed) utter_yes_no_removed = utter_yes_no_removed.translate( string.maketrans("", ""), string.punctuation).strip() if 'no, i meant' in utter or 'could you tell me the answer for that?' in utter or len( utter_yes_no_removed) <= 1: data.remove(d) return data def get_batch_size_per_type(self, data_map): num_data_types = len(data_map) batch_size_types = { qtype: int(float(self.param['batch_size']) / float(num_data_types)) for qtype in data_map } diff = self.param['batch_size'] - sum(batch_size_types.values()) qtypes = data_map.keys() count = 0 while diff > 0 and count < len(qtypes): batch_size_types[qtypes[count]] += 1 count += 1 if count == len(qtypes): count = 0 diff -= 1 if sum(batch_size_types.values()) != self.param['batch_size']: raise Exception( "sum(batch_size_types.values())!=self.param['batch_size']") return batch_size_types def get_batch(self, i, data, data_map, batch_size_types): if not self.qtype_wise_batching: batch_dict = data[i * self.param['batch_size']:(i + 1) * self.param['batch_size']] if len(batch_dict) < self.param['batch_size']: batch_dict.extend(data[:self.param['batch_size'] - len(batch_dict)]) else: batch_dict = [] for qtype in data_map: data_map_qtype = data_map[qtype][i * batch_size_types[qtype]:(i + 1) * batch_size_types[qtype]] if len(data_map_qtype) < batch_size_types[qtype]: data_map_qtype.extend( data_map[qtype][:batch_size_types[qtype] - len(data_map_qtype)]) batch_dict.extend(data_map_qtype) if len(batch_dict) != self.param['batch_size']: raise Exception("len(batch_dict)!=self.param['batch_size']") return batch_dict def train(self): print 'Going for test' fr = open(self.param['model_dir'] + '/metadata.txt').readlines() epoch_number = int(fr[0].split(' ')[1].strip()) overall_step_count = int(fr[1].split(' ')[1].strip()) avg_test_reward_at0, avg_test_reward = self.perform_full_test( epoch_number, overall_step_count) print 'Validation over... overall avg. test reward (over all)', avg_test_reward, ' test reward (at top beam)', avg_test_reward_at0
class TrainModel(): def __init__(self, param): self.param = param self.read_data = ReadBatchData(param) print "initialized read data" self.interpreter = Interpreter(self.read_data.program_type_vocab, self.read_data.argument_type_vocab) print "initialized interpreter" self.train_data = [] if not isinstance(param['train_data_file'], list): self.training_files = [param['train_data_file']] else: self.training_files = param['train_data_file'] random.shuffle(self.training_files) print 'Training data loaded' sys.stdout.flush() self.valid_data = [] if not isinstance(param['valid_data_file'], list): self.valid_files = [param['valid_data_file']] else: self.valid_files = param['valid_data_file'] for file in self.valid_files: self.valid_data.extend(pkl.load(open(file))) if not os.path.exists(param['model_dir']): os.mkdir(param['model_dir']) self.model_file = os.path.join(param['model_dir'], "best_model") with tf.Graph().as_default(): self.model = NPI( param, self.read_data.none_argtype_index, self.read_data.num_argtypes, self.read_data.num_progs, self.read_data.max_arguments, self.read_data.rel_index, self.read_data.type_index, self.read_data.wikidata_rel_embed, self.read_data.wikidata_type_embed, self.read_data.vocab_init_embed, self.read_data.program_to_argtype, self.read_data.program_to_targettype) self.model.create_placeholder() self.action_sequence, self.program_probs, self.gradients = self.model.reinforce( ) self.train_op = self.model.train() print 'model created' sys.stdout.flush() self.saver = tf.train.Saver() init = tf.initialize_all_variables() self.sess = tf.Session( ) #tf_debug.LocalCLIDebugWrapperSession(tf.Session()) if len(glob.glob(os.path.join(param['model_dir'], '*'))) > 0: print "best model exists .. restoring from there " self.saver.restore(self.sess, self.model_file) else: print "initializing fresh variables" self.sess.run(init) def feeding_dict1(self, encoder_inputs_w2v, encoder_inputs_kb_emb, variable_mask, variable_embed, kb_attention): feed_dict = {} for model_enc_inputs_w2v, enc_inputs_w2v in zip( self.model.encoder_text_inputs_w2v, encoder_inputs_w2v): feed_dict[model_enc_inputs_w2v] = enc_inputs_w2v feed_dict[ self.model.encoder_text_inputs_kb_emb] = encoder_inputs_kb_emb print 'variable_mask', variable_mask.shape for i in range(variable_mask.shape[0]): for j in range(variable_mask.shape[1]): feed_dict[self.model.preprocessed_var_mask_table[i] [j]] = variable_mask[i][j] for i in range(variable_embed.shape[0]): for j in range(variable_embed.shape[1]): feed_dict[self.model.preprocessed_var_emb_table[i] [j]] = variable_embed[i][j] feed_dict[self.model.kb_attention] = kb_attention # for i in range(len(self.model.parameters)): # feed_dict[self.model.grad_values[i]] = np.zeros(self.model.grad_values[i].get_shape(), dtype=np.float32) return feed_dict def feeding_dict2(self, grad_values): feed_dict = {} assert len(self.model.grad_values) == len(grad_values) for model_grad_val_i, grad_val_i in zip(self.model.grad_values, grad_values): feed_dict[model_grad_val_i] = grad_val_i return feed_dict def map_multiply(self, arg): orig_shape = arg[0].shape arg0 = np.reshape( arg[0], (self.param['batch_size'] * self.param['beam_size'], -1)) arg1 = np.reshape( arg[1], (self.param['batch_size'] * self.param['beam_size'], 1)) mul = np.reshape(np.multiply(arg0, arg1), orig_shape) return np.sum(mul, axis=(0, 1)) def generate_threads_for_interpreter(self, a_seq, variable_value_table): print len(a_seq['argument_table_index']), len( a_seq['argument_table_index'] [0]), a_seq['argument_table_index'][0][0] keys = [ 'program_type', 'argument_type', 'target_type', 'target_table_index', 'argument_table_index' ] batch_length_set_sequences = [] for i in range(self.param['batch_size']): new_dict = dict.fromkeys(keys) new_dict['program_type'] = [ a_seq['program_type'][j][i] for j in range(len(a_seq['program_type'])) ] new_dict['argument_type'] = [ a_seq['argument_type'][j][i] for j in range(len(a_seq['argument_type'])) ] new_dict['target_type'] = [ a_seq['target_type'][j][i] for j in range(len(a_seq['target_type'])) ] new_dict['target_table_index'] = [ a_seq['target_table_index'][j][i] for j in range(len(a_seq['target_table_index'])) ] new_dict['argument_table_index'] = [ a_seq['argument_table_index'][j][i] for j in range(len(a_seq['argument_table_index'])) ] new_dict['variable_value_table'] = variable_value_table[i] batch_length_set_sequences.append(new_dict) return batch_length_set_sequences def perform_training(self, batch_dict): batch_orig_context, batch_context_nonkb_words, batch_context_kb_words, \ batch_context_entities, batch_context_types, batch_context_rel, batch_context_ints, \ batch_orig_response, batch_response_entities, batch_response_ints, batch_response_bools, \ variable_mask, variable_embed, variable_atten, kb_attention, variable_value_table = self.read_data.get_batch_data(batch_dict) feed_dict1 = self.feeding_dict1(batch_context_nonkb_words, batch_context_kb_words, variable_mask, variable_embed, kb_attention) a_seq, program_probabilities, grad = self.sess.run( [self.action_sequence, self.program_probs, self.gradients], feed_dict=feed_dict1) # print 100*'$' # print 'program_probabilities shape:',program_probabilities.shape # print 'grad shape:',grad.values()[0].shape data_for_interpreter = self.generate_threads_for_interpreter( a_seq, variable_value_table) target_value, target_type_id, Flag = zip(*map( self.interpreter.execute_multiline_program, data_for_interpreter)) reward = self.interpreter.calculate_reward(target_value, target_type_id, Flag, batch_response_entities, batch_response_ints, batch_response_bools) reward = np.reshape( reward, [self.param['batch_size'], self.param['beam_size']]) current_baseline = np.sum(np.multiply(program_probabilities, reward), axis=1, keepdims=True) rescaling_term_grad = np.subtract(np.array(reward), current_baseline) # print 'reward shape:',reward.shape # print'current_baseline shape:',current_baseline.shape # print'rescaling_term_grad shape:',rescaling_term_grad.shape # print 100*'$' # grad_values = map(self.map_multiply,zip(grad.values(),rescaling_term_grad)) grad_values = [ self.map_multiply([grad[x], rescaling_term_grad]) for x in grad.keys() ] feed_dict2 = self.feeding_dict2(grad_values) self.sess.run([self.train_op], feed_dict=feed_dict2) return reward def train(self): best_valid_loss = float("inf") best_valid_epoch = 0 last_overall_avg_train_loss = None overall_step_count = 0 for epoch in range(self.param['max_epochs']): len_train_data = 0. train_loss = 0. for file in self.training_files: train_data = pkl.load(open(file)) len_train_data = len_train_data + len(train_data) random.shuffle(train_data) n_batches = int( math.ceil( float(len(train_data)) / float(self.param['batch_size']))) print 'number of batches ', n_batches, 'len train data ', len( train_data), 'batch size', self.param['batch_size'] sys.stdout.flush() for i in range(n_batches): overall_step_count = overall_step_count + 1 train_batch_dict = train_data[i * self.param['batch_size']: (i + 1) * self.param['batch_size']] if len(train_batch_dict) < self.param['batch_size']: train_batch_dict.extend( train_data[:self.param['batch_size'] - len(train_batch_dict)]) sum_batch_loss = sum( self.perform_training(train_batch_dict)) avg_batch_loss = sum_batch_loss / float( self.param['batch_size']) if overall_step_count % self.param['print_train_freq'] == 0: print( 'Epoch %d Step %d train loss (avg over batch) =%.6f' % (epoch, i, avg_batch_loss)) sys.stdout.flush() train_loss = train_loss + sum_batch_loss avg_train_loss = float(train_loss) / float(i + 1) overall_step_count += 1 overall_avg_train_loss = train_loss / float(len_train_data) print 'epoch ', epoch, ' of training is completed ... overall avg. train loss ', overall_avg_train_loss