def test_model(simulator, story_int, story_mask, q_int, q_mask, ans_int, ans_mask, params, old_std, f_test, vocab_text, inv_vocab_text): ent_list = { '1': 'office', '2': 'hallway', '3': 'kitchen', '4': 'garden', '5': 'bedroom', '6': 'bathroom', '7': 'mary', '8': 'john', '9': 'daniel', '10': 'sandra', '11': 'milk', '12': 'football', '13': 'apple' } # form vocabs and data numpy.set_printoptions(precision=3) n_samples = len(story_int) acc = 0. sys.stdout = old_std print 'n_samples:' print n_samples story_test = numpy.zeros( (1, params['max_story_len_test'], params['max_sent_len'])) story_mask_test = numpy.zeros( (1, params['max_story_len_test'], params['max_sent_len'], params['dim_emb_story'])) q_test = numpy.zeros((1, params['max_q_num_test'], params['max_sent_len'])) q_mask_test = numpy.zeros( (1, params['max_q_num_test'], params['max_sent_len'], params['dim_emb_story'])) act_selected_test = numpy.zeros( (1, params['max_story_len_test'], (params['ent_range']) * 1 * params['relation_num'])) reward_test = numpy.ones( (1, params['max_story_len_test'], (params['ent_range']) * 1 * params['relation_num'])) reward_immediate_test = numpy.zeros((1, params['max_story_len_test'])) ans_test = numpy.zeros((1, params['max_q_num_test'], params['vocab_size'])) ans_mask_test = numpy.zeros((1, params['max_q_num_test'])) fin_test = numpy.zeros( (1, params['max_q_num_test'], params['dim_emb_env'])) fin_test_pred = numpy.zeros( (1, params['max_q_num_test'], params['dim_emb_env'])) fin_one_hot_test = numpy.zeros( (1, params['max_q_num_test'], (params['ent_range']) * 1 * params['relation_num'])) reward_fin_test = numpy.ones( (1, params['max_q_num_test'], (params['ent_range']) * 1 * params['relation_num'])) working_memory = working_environment(params['enti_num'], params['relation_num_expand']) working_embed = envEmbedding(params['relation_num_expand'], params['enti_num'], params['dim_emb_env']) work_space_table = varTable() mask_sim = numpy.zeros((1, params['max_story_len_test'], params['dim_emb_story'] * params['max_sent_len'])) print 'test started.' sys.stdout = f_test train_trace_id = 0 # always == 0 for story_id in range(n_samples): story_test[train_trace_id, :len(story_int[0]), :len( story_int[0][0])] = story_int[story_id] story_mask_test[train_trace_id, :len(story_int[0]), :len( story_int[0][0]), :] = story_mask[story_id] q_test[train_trace_id, :len(q_int[0]), :len(q_int[0][0] )] = q_int[story_id] q_mask_test[train_trace_id, :len(q_int[0]), :len( q_int[0][0]), :] = q_mask[story_id] ans_test[train_trace_id, :len(ans_int[0]), :len(ans_int[0][0] )] = ans_int[story_id] for time_slice in range(params['max_story_len_test']): mask_sim[0][time_slice][:] = numpy.ones(params['dim_emb_story'] * params['max_sent_len']) action_probs, retrieve_probs = simulator.predict([ story_test[numpy.newaxis, train_trace_id], story_mask_test[numpy.newaxis, train_trace_id], q_test[numpy.newaxis, train_trace_id], q_mask_test[numpy.newaxis, train_trace_id], mask_sim[:], reward_test[numpy.newaxis, train_trace_id], reward_fin_test[numpy.newaxis, train_trace_id] ]) for time_slice in range(params['max_story_len_test']): tupleList, adjGraph, temp_index = working_memory.returnEnv() action_selected, action_one_hot = select_action_hard( action_probs[:, time_slice, :], params['epsilon']) act_selected_test[train_trace_id, time_slice, :] = action_one_hot arg_1_ptr = action_selected // ((1) * params['relation_num']) arg_2_ptr = ( action_selected - arg_1_ptr * (1) * params['relation_num']) // params['relation_num'] arg_r_ptr = ( action_selected - arg_1_ptr * (1) * params['relation_num']) % params['relation_num'] + 1 arg_r = arg_r_ptr flag = 0 if time_slice < len(story_int[story_id]): for w_id in story_int[story_id][time_slice]: # print inv_vocab_text[str(int(w_id))] if inv_vocab_text[str( int(w_id))] in ent_list.values() and flag == 0: if arg_1_ptr == 0: arg_1 = int(w_id) else: arg_2 = int(w_id) flag = 1 elif inv_vocab_text[str( int(w_id))] in ent_list.values() and flag == 1: if arg_1_ptr == 0: arg_2 = int(w_id) else: arg_1 = int(w_id) slice_reward = 0 for tt in range(time_slice + 1): reward_immediate_test[train_trace_id][tt] += slice_reward * ( params['reward_decay']**(time_slice - tt)) if arg_1 > 0 and arg_2 > 0 and story_mask_test[train_trace_id, time_slice, 0, 0] > 0: # retrieve the table arg_1_int = work_space_table.retr_insert(arg_1, inv_vocab_text) arg_2_int = work_space_table.retr_insert(arg_2, inv_vocab_text) working_memory.modifyEnv((arg_1_int, arg_r, arg_2_int)) # compute fin_train and fin_train_pred retrieved_relation_list = [] reward_temp_list = [] for q_idx in range(len(retrieve_probs[0])): retr_idx = numpy.argmax(retrieve_probs[0, q_idx, :]) arg1_retr_ptr = retr_idx // ((1) * params['relation_num']) arg2_retr_ptr = ( retr_idx - arg1_retr_ptr * (1) * params['relation_num']) // params['relation_num'] relation_pred = ( retr_idx - arg1_retr_ptr * (1) * params['relation_num']) % params['relation_num'] + 1 flag = 0 for q_w in q_int[story_id, 0]: if inv_vocab_text[str( int(q_w))] in ent_list.values() and flag == 0: if arg1_retr_ptr == 0: arg1_retr = int(q_w) else: arg2_retr = int(q_w) flag = 1 elif inv_vocab_text[str( int(q_w))] in ent_list.values() and flag == 1: if arg1_retr_ptr == 0: arg2_retr = int(q_w) else: arg1_retr = int(q_w) retrieve_reward_pre = 0 reward_temp_list.append(retrieve_reward_pre) arg1_retr_int = work_space_table.retr(arg1_retr, inv_vocab_text) arg2_retr_int = work_space_table.retr(arg2_retr, inv_vocab_text) arg_retr_id = working_memory.retrieveRelation( arg1_retr_int, arg2_retr_int) retrieved_relation_list.append(arg_retr_id) one_hot_single = numpy.zeros( ((params['ent_range']) * 1 * params['relation_num'])) one_hot_single[retr_idx] = 1 fin_one_hot_test[train_trace_id, q_idx, :] = one_hot_single reward_q_total = 0. ans_shared = 0 for q_idx in range(len(retrieve_probs[0])): ans_word_int = numpy.argmax(ans_test[train_trace_id][0]) + 1 ans_word = inv_vocab_text[str(ans_word_int)] if retrieved_relation_list[q_idx] == 1: ans_pred = 'yes' else: ans_pred = 'no' reward_scalar_q, ans_q, label_q = compute_single_reward( ans_pred, ans_word) reward_scalar_q *= q_mask_test[train_trace_id, q_idx, 0, 0] if ans_q == label_q and q_idx == 0: acc += 1. reward_q_total += reward_scalar_q reward_q_total += reward_temp_list[0] for time_slice in range(params['max_story_len_test']): reward_immediate_test[train_trace_id, time_slice] += reward_q_total * ( params['reward_decay'] **(params['max_story_len'] - time_slice)) for q_idx in range(len(retrieve_probs[0])): # pass reward_fin_test[ train_trace_id, q_idx, :] *= reward_q_total # used as input at the last softmax for time_slice in range(params['max_story_len_test']): # pass reward_test[train_trace_id, time_slice, :] *= reward_immediate_test[ train_trace_id, time_slice] # used as input at the last softmax mask_sim *= 0 working_memory.resetEnv() work_space_table.reset() sys.stdout = old_std print 'test result:' print acc / n_samples
def train_two_phrase(train_file_names, test_file_names, params): K.set_epsilon(1e-4) ent_list = { '1': 'office', '2': 'hallway', '3': 'kitchen', '4': 'garden', '5': 'bedroom', '6': 'bathroom', '7': 'mary', '8': 'john', '9': 'daniel', '10': 'sandra', '11': 'milk', '12': 'football', '13': 'apple' } numpy.set_printoptions(precision=3) print 'loading data.' old_std = sys.stdout f_print = open('debug_print_phr1.txt', 'w') f_debug = open('debug_print_phr2.txt', 'w') f_test = open('debug_print_test.txt', 'w') sys.stdout = f_print lines = [] for f_name in train_file_names: f = open(f_name, 'r') lines.extend(f.readlines()) f.close() lines_test = [] for f_name in test_file_names: f = open(f_name, 'r') lines_test.extend(f.readlines()) f.close() vocab_text = {} inv_vocab_text = {} data = parse_stories(lines, vocab_text, inv_vocab_text, 1) data_test = parse_stories(lines_test, vocab_text, inv_vocab_text, 0) l_train = len(data) l_test = len(data_test) data = data[:min(params['train_set_size'], l_train)] data_test = data_test[:min(params['test_set_size'], l_test)] story_int, story_mask, q_int, q_mask, ans_int, ans_mask, sizes = int_stories( data, vocab_text) story_mask = repeatTensor(story_mask, params['dim_emb_story']) q_mask = repeatTensor(q_mask, params['dim_emb_story']) story_int_test, story_mask_test, q_int_test, q_mask_test, ans_int_test, ans_mask_test, sizes_test = int_stories( data_test, vocab_text) story_mask_test = repeatTensor(story_mask_test, params['dim_emb_story']) q_mask_test = repeatTensor(q_mask_test, params['dim_emb_story']) inv_vocab_text['0'] = 'dududu' params['max_story_len'] = sizes[0] params['max_sent_len'] = max(sizes[1], sizes[3]) params['max_q_num'] = sizes[2] params['max_story_len_test'] = sizes_test[0] params['max_q_num_test'] = sizes_test[2] params['vocab_size'] = len(vocab_text) params['vocab_size_ans'] = len(vocab_text) n_samples = len(story_int) params['ent_range'] = 2 print 'params:' print params print 'n_samples:' print n_samples print 'vocab_text:' print vocab_text sys.stdout = old_std story_train = numpy.zeros( (n_samples * params['story_sims_per_epoch'], params['max_story_len'], params['max_sent_len'])) story_mask_train = numpy.zeros( (n_samples * params['story_sims_per_epoch'], params['max_story_len'], params['max_sent_len'], params['dim_emb_story'])) q_train = numpy.zeros((n_samples * params['story_sims_per_epoch'], params['max_q_num'], params['max_sent_len'])) q_mask_train = numpy.zeros( (n_samples * params['story_sims_per_epoch'], params['max_q_num'], params['max_sent_len'], params['dim_emb_story'])) act_selected_train = numpy.zeros( (n_samples * params['story_sims_per_epoch'], params['max_story_len'], (params['ent_range']) * 1 * params['relation_num'])) reward_train = numpy.ones( (n_samples * params['story_sims_per_epoch'], params['max_story_len'], (params['ent_range']) * 1 * params['relation_num'])) reward_immediate_train = numpy.zeros( (n_samples * params['story_sims_per_epoch'], params['max_story_len'])) ans_train = numpy.zeros((n_samples * params['story_sims_per_epoch'], params['max_q_num'], params['vocab_size'])) ans_mask_train = numpy.zeros( (n_samples * params['story_sims_per_epoch'], params['max_q_num'])) fin_train = numpy.zeros((n_samples * params['story_sims_per_epoch'], params['max_q_num'], params['dim_emb_env'])) fin_train_pred = numpy.zeros((n_samples * params['story_sims_per_epoch'], params['max_q_num'], params['dim_emb_env'])) fin_one_hot_train = numpy.zeros( (n_samples * params['story_sims_per_epoch'], params['max_q_num'], (params['ent_range']) * 1 * params['relation_num'])) reward_fin_train = numpy.ones( (n_samples * params['story_sims_per_epoch'], params['max_q_num'], (params['ent_range']) * 1 * params['relation_num'])) print 'building model.' reasoner, simulator, debugger = DRL_Reasoner(params) test_simulator = DRL_Reasoner_Test(params) working_memory = working_environment(params['enti_num'], params['relation_num_expand']) working_embed = envEmbedding(params['relation_num_expand'], params['enti_num'], params['dim_emb_env']) work_space_table = varTable() mask_sim = numpy.zeros((1, params['max_story_len'], params['dim_emb_story'] * params['max_sent_len'])) all_sim_number = 0 print 'two phrase training started.' for epoch_id in range(params['epoch_num']): train_trace_id = 0 avg_reward_q_epoch = 0. avg_reward_action_epoch = numpy.zeros(params['max_story_len']) print 'epoch %d phrase 1' % epoch_id epoch_precision_rate = 0. sample_rate = numpy.zeros(n_samples) for story_id in range(n_samples): story_precision = 0. for sim_round in range(params['story_sims_per_epoch']): sys.stdout = f_print story_train[train_trace_id, :len(story_int[0]), :len( story_int[0][0])] = story_int[story_id] story_mask_train[train_trace_id, :len(story_int[0]), :len( story_int[0][0]), :] = story_mask[story_id] q_train[train_trace_id, :len(q_int[0]), :len( q_int[0][0])] = q_int[story_id] q_mask_train[train_trace_id, :len(q_int[0]), :len( q_int[0][0]), :] = q_mask[story_id] ans_train[train_trace_id, :len(ans_int[0]), :len( ans_int[0][0])] = ans_int[story_id] for time_slice in range(params['max_story_len']): mask_sim[0][time_slice][:] = numpy.ones( params['dim_emb_story'] * params['max_sent_len']) action_probs, retrieve_probs = simulator.predict([ story_train[numpy.newaxis, train_trace_id], story_mask_train[numpy.newaxis, train_trace_id], q_train[numpy.newaxis, train_trace_id], q_mask_train[numpy.newaxis, train_trace_id], mask_sim[:], reward_train[numpy.newaxis, train_trace_id], reward_fin_train[numpy.newaxis, train_trace_id] ]) for time_slice in range(params['max_story_len']): tupleList, adjGraph, temp_index = working_memory.returnEnv( ) action_selected, action_one_hot = select_action( action_probs[:, time_slice, :], params['epsilon']) act_selected_train[train_trace_id, time_slice, :] = action_one_hot arg_1_ptr = action_selected // ( (1) * params['relation_num'] ) # start from 0 (empty number) arg_2_ptr = ( action_selected - arg_1_ptr * (1) * params['relation_num']) // params['relation_num'] arg_r_ptr = (action_selected - arg_1_ptr * (1) * params['relation_num'] ) % params['relation_num'] + 1 arg_r = arg_r_ptr flag = 0 if time_slice < len(story_int[story_id]): for w_id in story_int[story_id][time_slice]: if inv_vocab_text[str(int( w_id))] in ent_list.values() and flag == 0: if arg_1_ptr == 0: arg_1 = int(w_id) else: arg_2 = int(w_id) flag = 1 elif inv_vocab_text[str(int( w_id))] in ent_list.values() and flag == 1: if arg_1_ptr == 0: arg_2 = int(w_id) else: arg_1 = int(w_id) slice_reward = 0 for tt in range(time_slice + 1): reward_immediate_train[train_trace_id][ tt] += slice_reward * (params['reward_decay'] **(time_slice - tt)) if arg_1 > 0 and arg_2 > 0 and story_mask_train[ train_trace_id, time_slice, 0, 0] > 0: arg_1_int = work_space_table.retr_insert( arg_1, inv_vocab_text) arg_2_int = work_space_table.retr_insert( arg_2, inv_vocab_text) working_memory.modifyEnv((arg_1_int, arg_r, arg_2_int)) retrieved_relation_list = [] reward_temp_list = [] for q_idx in range(len(retrieve_probs[0])): retr_idx, action_one_hot_retr = select_action( retrieve_probs[0, q_idx, :], params['epsilon']) arg1_retr_ptr = retr_idx // ((1) * params['relation_num']) arg2_retr_ptr = ( retr_idx - arg1_retr_ptr * (1) * params['relation_num']) // params['relation_num'] relation_pred = (retr_idx - arg1_retr_ptr * (1) * params['relation_num'] ) % params['relation_num'] + 1 flag = 0 for q_w in q_int[story_id, 0]: if inv_vocab_text[str( int(q_w))] in ent_list.values() and flag == 0: if arg1_retr_ptr == 0: arg1_retr = int(q_w) else: arg2_retr = int(q_w) flag = 1 elif inv_vocab_text[str( int(q_w))] in ent_list.values() and flag == 1: if arg1_retr_ptr == 0: arg2_retr = int(q_w) else: arg1_retr = int(q_w) retrieve_reward_pre = 0 reward_temp_list.append(retrieve_reward_pre) arg1_retr_int = work_space_table.retr( arg1_retr, inv_vocab_text) arg2_retr_int = work_space_table.retr( arg2_retr, inv_vocab_text) arg_retr_id = working_memory.retrieveRelation( arg1_retr_int, arg2_retr_int) retrieved_relation_list.append(arg_retr_id) one_hot_single = numpy.zeros( ((params['ent_range']) * 1 * params['relation_num'])) one_hot_single[retr_idx] = 1 fin_one_hot_train[train_trace_id, q_idx, :] = one_hot_single reward_q_total = 0. ans_shared = 0 for q_idx in range(len(retrieve_probs[0])): ans_word_int = numpy.argmax( ans_train[train_trace_id][0]) + 1 ans_word = inv_vocab_text[str(ans_word_int)] if retrieved_relation_list[q_idx] == 1: ans_pred = 'yes' else: ans_pred = 'no' reward_scalar_q, ans_q, label_q = compute_single_reward( ans_pred, ans_word) reward_scalar_q *= q_mask_train[train_trace_id, q_idx, 0, 0] if ans_q == label_q and q_idx == 0: epoch_precision_rate += 1. story_precision += 1. reward_q_total += reward_scalar_q reward_q_total += reward_temp_list[0] for time_slice in range(params['max_story_len']): reward_immediate_train[train_trace_id, time_slice] += reward_q_total * ( params['reward_decay'] **(params['max_story_len'] - time_slice)) for q_idx in range(len(retrieve_probs[0])): # pass if q_idx == 0: mask_reward = 1. else: mask_reward = 0. reward_fin_train[train_trace_id, q_idx, :] *= (reward_q_total * mask_reward) avg_reward_q_epoch = avg_reward_q_epoch * (train_trace_id) / ( train_trace_id + 1) + reward_q_total / (train_trace_id + 1) for time_slice in range(params['max_story_len']): # pass reward_train[train_trace_id, time_slice, :] *= ( reward_immediate_train[train_trace_id, time_slice] * story_mask_train[train_trace_id, time_slice, 0, 0] ) # used as input at the last softmax avg_reward_action_epoch[ time_slice] = avg_reward_action_epoch[time_slice] * ( train_trace_id ) / (train_trace_id + 1) + reward_immediate_train[ train_trace_id, time_slice] / (train_trace_id + 1) train_trace_id += 1 all_sim_number += 1 mask_sim *= 0 working_memory.resetEnv() work_space_table.reset() sample_rate[ story_id] = story_precision / params['story_sims_per_epoch'] for q_idx in range(params['max_q_num']): if 0: # pass reward_fin_train[:, q_idx, :] -= avg_reward_q_epoch # used as input at the last softmax for time_slice in range(params['max_story_len']): if 0: # pass reward_train[:, time_slice, :] -= avg_reward_action_epoch[ time_slice] # used as input at the last softmax epoch_precision_rate = epoch_precision_rate / ( n_samples * params['story_sims_per_epoch']) sys.stdout = old_std print 'precision of this epoch: %f' % epoch_precision_rate print 'epoch %d phrase 2' % (epoch_id) print 'sample_rate:' print sample_rate # phrase2: go batch train on the trace pool. mask_sim_2 = numpy.ones( (n_samples * params['story_sims_per_epoch'], params['max_story_len'], params['dim_emb_story'] * params['max_sent_len'])) reasoner.fit( [ story_train, story_mask_train, q_train, q_mask_train, mask_sim_2, reward_train, reward_fin_train ], { 'action_probs_re': act_selected_train, 'retrieve_probs_re': fin_one_hot_train }, batch_size=params['batch_size_phrase2'], nb_epoch=10, verbose=2) sys.stdout = old_std # test the model weights_train = [ i for i in simulator.layers if len(i.get_weights()) != 0 ] weights_test = [ i for i in test_simulator.layers if len(i.get_weights()) != 0 ] for (l1, l2) in zip(weights_train, weights_test): l2.set_weights(l1.get_weights()) if (epoch_id + 1) % params['test_epoch_period'] == 0: test_model(test_simulator, story_int_test, story_mask_test, q_int_test, q_mask_test, ans_int_test, ans_mask_test, params, old_std, f_test, vocab_text, inv_vocab_text) sys.stdout = old_std story_train *= 0 story_mask_train *= 0 q_train *= 0 q_mask_train *= 0 act_selected_train *= 0 reward_train = numpy.ones( (n_samples * params['story_sims_per_epoch'], params['max_story_len'], (params['ent_range']) * 1 * params['relation_num'])) # real number reward signal ans_train *= 0 ans_mask_train *= 0 fin_train *= 0 fin_train_pred *= 0 reward_fin_train = numpy.ones( (n_samples * params['story_sims_per_epoch'], params['max_q_num'], params['ent_range'] * 1 * params['relation_num'])) fin_one_hot_train *= 0 reward_immediate_train *= 0 f_print.close() f_debug.close() f_test.close()
def test_model(simulator, story_int, story_mask, q_int, q_mask, ans_int, ans_mask, params, old_std, f_test, vocab_text, inv_vocab_text): ent_list = { '1': 'office', '2': 'hallway', '3': 'kitchen', '4': 'garden', '5': 'bedroom', '6': 'bathroom', '7': 'mary', '8': 'john', '9': 'daniel', '10': 'sandra' } numpy.set_printoptions(precision=3) n_samples = len(story_int) acc = 0. sys.stdout = old_std print 'n_samples:' print n_samples story_test = numpy.zeros( (1, params['max_story_len'], params['max_sent_len'])) story_mask_test = numpy.zeros( (1, params['max_story_len'], params['max_sent_len'], params['dim_emb_story'])) q_test = numpy.zeros((1, params['max_q_num'], params['max_sent_len'])) q_mask_test = numpy.zeros((1, params['max_q_num'], params['max_sent_len'], params['dim_emb_story'])) act_selected_test = numpy.zeros( (1, params['max_story_len'], (params['ent_range']) * 1 * params['relation_num'])) # one hot vector reward_test = numpy.ones( (1, params['max_story_len'], (params['ent_range']) * 1 * params['relation_num'])) # real number reward signal reward_immediate_test = numpy.zeros((1, params['max_story_len'])) ans_test = numpy.zeros((1, params['max_q_num'], params['vocab_size'])) ans_mask_test = numpy.zeros((1, params['max_q_num'])) fin_test = numpy.zeros((1, params['max_q_num'], params['dim_emb_env'])) fin_test_pred = numpy.zeros( (1, params['max_q_num'], params['dim_emb_env'])) fin_one_hot_test = numpy.zeros( (1, params['max_q_num'], (1) * 1 * params['relation_num'])) reward_fin_test = numpy.ones( (1, params['max_q_num'], (1) * 1 * params['relation_num'])) working_memory = working_environment(params['enti_num'], params['relation_num_expand']) working_embed = envEmbedding(params['relation_num_expand'], params['enti_num'], params['dim_emb_env']) work_space_table = varTable() mask_sim = numpy.zeros((1, params['max_story_len'], params['dim_emb_story'] * params['max_sent_len'])) print 'test started.' sys.stdout = f_test train_trace_id = 0 # always == 0 for story_id in range(n_samples): story_test[train_trace_id, :len(story_int[0]), :len( story_int[0][0])] = story_int[story_id] story_mask_test[train_trace_id, :len(story_int[0]), :len( story_int[0][0]), :] = story_mask[story_id] q_test[train_trace_id, :len(q_int[0]), :len(q_int[0][0] )] = q_int[story_id] q_mask_test[train_trace_id, :len(q_int[0]), :len( q_int[0][0]), :] = q_mask[story_id] ans_test[train_trace_id, :len(ans_int[0]), :len(ans_int[0][0] )] = ans_int[story_id] for time_slice in range(params['max_story_len']): mask_sim[0][time_slice][:] = numpy.ones(params['dim_emb_story'] * params['max_sent_len']) # read and embed environment tupleList, adjGraph, temp_index = working_memory.returnEnv() action_probs, retrieve_probs = simulator.predict([ story_test[numpy.newaxis, train_trace_id], story_mask_test[numpy.newaxis, train_trace_id], q_test[numpy.newaxis, train_trace_id], q_mask_test[numpy.newaxis, train_trace_id], mask_sim[:], reward_test[numpy.newaxis, train_trace_id], reward_fin_test[numpy.newaxis, train_trace_id] ]) action_selected, action_one_hot = select_action_hard( action_probs[:, time_slice, :], params['epsilon']) act_selected_test[train_trace_id, time_slice, :] = action_one_hot arg_1_ptr = action_selected // ((1) * params['relation_num']) arg_2_ptr = ( action_selected - arg_1_ptr * (1) * params['relation_num']) // params['relation_num'] arg_r_ptr = ( action_selected - arg_1_ptr * (1) * params['relation_num']) % params['relation_num'] + 1 arg_r = arg_r_ptr order1 = arg_1_ptr / 2 order2 = arg_1_ptr % 2 flag = 0 three_ents = [] if story_mask_test[train_trace_id, time_slice, 0, 0] > 0: for w_id in story_int[story_id][time_slice]: if inv_vocab_text[str(int(w_id))] in ent_list.values(): three_ents.append(int(w_id)) if order1 == 0: arg_1_first = three_ents[0] arg_2_first = three_ents[2] else: arg_1_first = three_ents[2] arg_2_first = three_ents[0] if order2 == 0: arg_1_second = three_ents[1] arg_2_second = three_ents[2] else: arg_1_second = three_ents[2] arg_2_second = three_ents[1] slice_reward = 0 for tt in range(time_slice + 1): reward_immediate_test[train_trace_id][tt] += slice_reward * ( params['reward_decay']**(time_slice - tt)) if story_mask_test[train_trace_id, time_slice, 0, 0] > 0: # retrieve the table arg_1_int_first = work_space_table.retr_insert( arg_1_first, inv_vocab_text) arg_2_int_first = work_space_table.retr_insert( arg_2_first, inv_vocab_text) arg_1_int_second = work_space_table.retr_insert( arg_1_second, inv_vocab_text) arg_2_int_second = work_space_table.retr_insert( arg_2_second, inv_vocab_text) working_memory.modifyEnv( (arg_1_int_first, arg_r, arg_2_int_first)) working_memory.modifyEnv( (arg_1_int_second, arg_r, arg_2_int_second)) # compute fin_train and fin_train_pred retrieved_relation_list = [] reward_temp_list = [] for q_idx in range(len(retrieve_probs[0])): retr_idx = numpy.argmax(retrieve_probs[0, q_idx, :]) arg1_retr_ptr = retr_idx // ((1) * params['relation_num']) arg2_retr_ptr = ( retr_idx - arg1_retr_ptr * (1) * params['relation_num']) // params['relation_num'] relation_pred = ( retr_idx - arg1_retr_ptr * (1) * params['relation_num']) % params['relation_num'] + 1 for q_w in q_int[story_id, 0]: if inv_vocab_text[str(int(q_w))] in ent_list.values(): arg1_retr = int(q_w) break retrieve_reward_pre = 0 reward_temp_list.append(retrieve_reward_pre) arg1_retr_int = work_space_table.retr(arg1_retr, inv_vocab_text) arg_retr_id = working_memory.retrieveArg(arg1_retr_int, relation_pred) arg_retr_str = work_space_table.inv_retr(arg_retr_id) retrieved_relation_list.append(arg_retr_str) one_hot_single = numpy.zeros(((1) * 1 * params['relation_num'])) one_hot_single[retr_idx] = 1 fin_one_hot_test[train_trace_id, q_idx, :] = one_hot_single reward_q_total = 0. ans_shared = 0 for q_idx in range(len(retrieve_probs[0])): ans_word_int = numpy.argmax(ans_test[train_trace_id][0]) + 1 ans_word = inv_vocab_text[str(ans_word_int)] reward_scalar_q, ans_q, label_q = compute_single_reward( retrieved_relation_list[q_idx], ans_word) reward_scalar_q *= q_mask_test[train_trace_id, q_idx, 0, 0] if ans_q == label_q and q_idx == 0: acc += 1. reward_q_total += reward_scalar_q reward_q_total += reward_temp_list[0] for time_slice in range(params['max_story_len']): reward_immediate_test[train_trace_id, time_slice] += reward_q_total * ( params['reward_decay'] **(params['max_story_len'] - time_slice)) for q_idx in range(len(retrieve_probs[0])): # pass reward_fin_test[train_trace_id, q_idx, :] *= reward_q_total for time_slice in range(params['max_story_len']): # pass reward_test[train_trace_id, time_slice, :] *= reward_immediate_test[train_trace_id, time_slice] mask_sim *= 0 working_memory.resetEnv() work_space_table.reset() sys.stdout = old_std print 'test result:' print acc / n_samples
def train_two_phrase(train_file_names, test_file_names, params): K.set_epsilon(1e-4) ent_list = { '1': 'office', '2': 'hallway', '3': 'kitchen', '4': 'garden', '5': 'bedroom', '6': 'bathroom', '7': 'mary', '8': 'john', '9': 'daniel', '10': 'sandra' } # form vocabs and data numpy.set_printoptions(precision=3) print 'loading data.' old_std = sys.stdout f_print = open('debug_print_phr1.txt', 'w') f_debug = open('debug_print_phr2.txt', 'w') f_test = open('debug_print_test.txt', 'w') sys.stdout = f_print lines = [] for f_name in train_file_names: f = open(f_name, 'r') lines.extend(f.readlines()) f.close() lines_test = [] for f_name in test_file_names: f = open(f_name, 'r') lines_test.extend(f.readlines()) f.close() vocab_text = {} inv_vocab_text = {} data = parse_stories(lines, vocab_text, inv_vocab_text) data_test = parse_stories(lines_test, vocab_text, inv_vocab_text) data = data[:params['train_set_size']] data_test = data_test[:params['test_set_size']] story_int, story_mask, q_int, q_mask, ans_int, ans_mask, sizes = int_stories( data, vocab_text) story_mask = repeatTensor(story_mask, params['dim_emb_story']) q_mask = repeatTensor(q_mask, params['dim_emb_story']) story_int_test, story_mask_test, q_int_test, q_mask_test, ans_int_test, ans_mask_test, sizes_test = int_stories( data_test, vocab_text) story_mask_test = repeatTensor(story_mask_test, params['dim_emb_story']) q_mask_test = repeatTensor(q_mask_test, params['dim_emb_story']) inv_vocab_text['0'] = 'dududu' params['max_story_len'] = max(sizes[2], sizes[0]) params['max_sent_len'] = max(sizes[1], sizes[3]) params['max_q_num'] = max(sizes[2], sizes[0]) params['vocab_size'] = len(vocab_text) params['vocab_size_ans'] = len(vocab_text) n_samples = len(story_int) params['ent_range'] = 4 # normal order and inverse order for two tuples. print 'params:' print params print 'n_samples:' print n_samples print 'vocab_text:' print vocab_text sys.stdout = old_std story_train = numpy.zeros( (n_samples * params['story_sims_per_epoch'], params['max_story_len'], params['max_sent_len'])) story_mask_train = numpy.zeros( (n_samples * params['story_sims_per_epoch'], params['max_story_len'], params['max_sent_len'], params['dim_emb_story'])) q_train = numpy.zeros((n_samples * params['story_sims_per_epoch'], params['max_q_num'], params['max_sent_len'])) q_mask_train = numpy.zeros( (n_samples * params['story_sims_per_epoch'], params['max_q_num'], params['max_sent_len'], params['dim_emb_story'])) act_selected_train = numpy.zeros( (n_samples * params['story_sims_per_epoch'], params['max_story_len'], (params['ent_range']) * 1 * params['relation_num'])) reward_train = numpy.ones( (n_samples * params['story_sims_per_epoch'], params['max_story_len'], (params['ent_range']) * 1 * params['relation_num'])) reward_immediate_train = numpy.zeros( (n_samples * params['story_sims_per_epoch'], params['max_story_len'])) ans_train = numpy.zeros((n_samples * params['story_sims_per_epoch'], params['max_q_num'], params['vocab_size'])) ans_mask_train = numpy.zeros( (n_samples * params['story_sims_per_epoch'], params['max_q_num'])) fin_train = numpy.zeros((n_samples * params['story_sims_per_epoch'], params['max_q_num'], params['dim_emb_env'])) fin_train_pred = numpy.zeros((n_samples * params['story_sims_per_epoch'], params['max_q_num'], params['dim_emb_env'])) fin_one_hot_train = numpy.zeros( (n_samples * params['story_sims_per_epoch'], params['max_q_num'], (1) * 1 * params['relation_num'])) reward_fin_train = numpy.ones( (n_samples * params['story_sims_per_epoch'], params['max_q_num'], (1) * 1 * params['relation_num'])) print 'building model.' # simulate and generate final train data, do batch train and policy gd # build model reasoner, simulator, debugger, gradient_checker = DRL_Reasoner(params) working_memory = working_environment(params['enti_num'], params['relation_num_expand']) working_embed = envEmbedding(params['relation_num_expand'], params['enti_num'], params['dim_emb_env']) work_space_table = varTable() # for every single story-question group, take simulations, compute the env embeddings, actions taken, and total rewards. # take a certain number of simulations for each group, and use the pool as total train data, do sgd and batch training. # loop that progress for many epoches. mask_sim = numpy.zeros((1, params['max_story_len'], params['dim_emb_story'] * params['max_sent_len'])) all_sim_number = 0 print 'two phrase training started.' for epoch_id in range(params['epoch_num']): train_trace_id = 0 avg_reward_q_epoch = 0. avg_reward_action_epoch = numpy.zeros(params['max_story_len']) print 'epoch %d phrase 1' % epoch_id # phrase 1: simulate and train data generating epoch_precision_rate = 0. sample_rate = numpy.zeros(n_samples) for story_id in range(n_samples): story_precision = 0. for sim_round in range(params['story_sims_per_epoch']): sys.stdout = f_print if (epoch_id + 1) % params['print_epoch_period'] == 0: print '=======simulation====== epoch = %d, story_id = %d, sim_round = %d' % ( epoch_id, story_id, sim_round) story_train[train_trace_id, :len(story_int[0]), :len( story_int[0][0])] = story_int[story_id] story_mask_train[train_trace_id, :len(story_int[0]), :len( story_int[0][0]), :] = story_mask[story_id] q_train[train_trace_id, :len(q_int[0]), :len( q_int[0][0])] = q_int[story_id] q_mask_train[train_trace_id, :len(q_int[0]), :len( q_int[0][0]), :] = q_mask[story_id] ans_train[train_trace_id, :len(ans_int[0]), :len( ans_int[0][0])] = ans_int[story_id] for time_slice in range(params['max_story_len']): mask_sim[0][time_slice][:] = numpy.ones( params['dim_emb_story'] * params['max_sent_len']) action_probs, retrieve_probs = simulator.predict([ story_train[numpy.newaxis, train_trace_id], story_mask_train[numpy.newaxis, train_trace_id], q_train[numpy.newaxis, train_trace_id], q_mask_train[numpy.newaxis, train_trace_id], mask_sim[:], reward_train[numpy.newaxis, train_trace_id], reward_fin_train[numpy.newaxis, train_trace_id] ]) for time_slice in range(params['max_story_len']): # read and embed environment tupleList, adjGraph, temp_index = working_memory.returnEnv( ) action_selected, action_one_hot = select_action( action_probs[:, time_slice, :], params['epsilon']) act_selected_train[train_trace_id, time_slice, :] = action_one_hot arg_1_ptr = action_selected // ( (1) * params['relation_num'] ) # start from 0 (empty number) arg_2_ptr = ( action_selected - arg_1_ptr * (1) * params['relation_num']) // params['relation_num'] arg_r_ptr = (action_selected - arg_1_ptr * (1) * params['relation_num'] ) % params['relation_num'] + 1 arg_r = arg_r_ptr order1 = arg_1_ptr / 2 order2 = arg_1_ptr % 2 flag = 0 three_ents = [] if story_mask_train[train_trace_id, time_slice, 0, 0] > 0: for w_id in story_int[story_id][time_slice]: if inv_vocab_text[str( int(w_id))] in ent_list.values(): three_ents.append(int(w_id)) if order1 == 0: arg_1_first = three_ents[0] arg_2_first = three_ents[2] else: arg_1_first = three_ents[2] arg_2_first = three_ents[0] if order2 == 0: arg_1_second = three_ents[1] arg_2_second = three_ents[2] else: arg_1_second = three_ents[2] arg_2_second = three_ents[1] slice_reward = 0 for tt in range(time_slice + 1): reward_immediate_train[train_trace_id][ tt] += slice_reward * (params['reward_decay'] **(time_slice - tt)) if story_mask_train[train_trace_id, time_slice, 0, 0] > 0: # retrieve the table arg_1_int_first = work_space_table.retr_insert( arg_1_first, inv_vocab_text) arg_2_int_first = work_space_table.retr_insert( arg_2_first, inv_vocab_text) arg_1_int_second = work_space_table.retr_insert( arg_1_second, inv_vocab_text) arg_2_int_second = work_space_table.retr_insert( arg_2_second, inv_vocab_text) working_memory.modifyEnv( (arg_1_int_first, arg_r, arg_2_int_first)) working_memory.modifyEnv( (arg_1_int_second, arg_r, arg_2_int_second)) # compute fin_train and fin_train_pred retrieved_relation_list = [] reward_temp_list = [ ] # reward for every question based on arg1/2 in q or not. for q_idx in range(len(retrieve_probs[0])): retr_idx, action_one_hot_retr = select_action( retrieve_probs[0, q_idx, :], params['epsilon']) arg1_retr_ptr = retr_idx // ((1) * params['relation_num']) arg2_retr_ptr = ( retr_idx - arg1_retr_ptr * (1) * params['relation_num']) // params['relation_num'] relation_pred = (retr_idx - arg1_retr_ptr * (1) * params['relation_num'] ) % params['relation_num'] + 1 # convert from ptr to id: for q_w in q_int[story_id, 0]: if inv_vocab_text[str(int(q_w))] in ent_list.values(): arg1_retr = int(q_w) break retrieve_reward_pre = 0 reward_temp_list.append(retrieve_reward_pre) arg1_retr_int = work_space_table.retr( arg1_retr, inv_vocab_text) # relation_pred_emb = working_embed.returnSingleEmb(relation_pred, 1, 1) arg_retr_id = working_memory.retrieveArg( arg1_retr_int, relation_pred) arg_retr_str = work_space_table.inv_retr(arg_retr_id) retrieved_relation_list.append(arg_retr_str) one_hot_single = numpy.zeros( ((1) * 1 * params['relation_num'])) one_hot_single[retr_idx] = 1 fin_one_hot_train[train_trace_id, q_idx, :] = one_hot_single reward_q_total = 0. ans_shared = 0 for q_idx in range(len(retrieve_probs[0])): ans_word_int = numpy.argmax( ans_train[train_trace_id][0]) + 1 ans_word = inv_vocab_text[str(ans_word_int)] reward_scalar_q, ans_q, label_q = compute_single_reward( retrieved_relation_list[q_idx], ans_word) reward_scalar_q *= q_mask_train[train_trace_id, q_idx, 0, 0] if ans_q == label_q and q_idx == 0: epoch_precision_rate += 1. story_precision += 1. reward_q_total += reward_scalar_q reward_q_total += reward_temp_list[0] for time_slice in range(params['max_story_len']): reward_immediate_train[train_trace_id, time_slice] += reward_q_total * ( params['reward_decay'] **(params['max_story_len'] - time_slice)) for q_idx in range(len(retrieve_probs[0])): # pass if q_idx == 0: mask_reward = 1. else: mask_reward = 0. reward_fin_train[train_trace_id, q_idx, :] *= ( reward_q_total * mask_reward ) # used as input at the last softmax avg_reward_q_epoch = avg_reward_q_epoch * (train_trace_id) / ( train_trace_id + 1) + reward_q_total / ( train_trace_id + 1) # update E[r] for final q. for time_slice in range(params['max_story_len']): # pass reward_train[train_trace_id, time_slice, :] *= ( reward_immediate_train[train_trace_id, time_slice] * story_mask_train[train_trace_id, time_slice, 0, 0] ) # used as input at the last softmax avg_reward_action_epoch[ time_slice] = avg_reward_action_epoch[time_slice] * ( train_trace_id ) / (train_trace_id + 1) + reward_immediate_train[ train_trace_id, time_slice] / (train_trace_id + 1) # one simulation finished. train_trace_id += 1 all_sim_number += 1 mask_sim *= 0 working_memory.resetEnv() work_space_table.reset() sample_rate[ story_id] = story_precision / params['story_sims_per_epoch'] for q_idx in range(params['max_q_num']): if 0: # pass reward_fin_train[:, q_idx, :] -= avg_reward_q_epoch # used as input at the last softmax for time_slice in range(params['max_story_len']): if 0: # pass reward_train[:, time_slice, :] -= avg_reward_action_epoch[ time_slice] # used as input at the last softmax epoch_precision_rate = epoch_precision_rate / ( n_samples * params['story_sims_per_epoch']) print 'the total answer precision of this epoch:' print epoch_precision_rate sys.stdout = old_std print 'precision of this epoch: %f' % epoch_precision_rate print 'epoch %d phrase 2' % (epoch_id) print 'sample_rate:' print sample_rate # phrase2: go batch train on the trace pool. mask_sim_2 = numpy.ones( (n_samples * params['story_sims_per_epoch'], params['max_story_len'], params['dim_emb_story'] * params['max_sent_len'])) reasoner.fit( [ story_train, story_mask_train, q_train, q_mask_train, mask_sim_2, reward_train, reward_fin_train ], { 'action_probs_re': act_selected_train, 'retrieve_probs_re': fin_one_hot_train }, batch_size=params['batch_size_phrase2'], nb_epoch=10, verbose=2) sys.stdout = old_std # test the model if (epoch_id + 1) % params['test_epoch_period'] == 0: test_model(simulator, story_int_test, story_mask_test, q_int_test, q_mask_test, ans_int_test, ans_mask_test, params, old_std, f_test, vocab_text, inv_vocab_text) sys.stdout = old_std story_train *= 0 story_mask_train *= 0 q_train *= 0 q_mask_train *= 0 act_selected_train *= 0 reward_train = numpy.ones( (n_samples * params['story_sims_per_epoch'], params['max_story_len'], (params['ent_range']) * 1 * params['relation_num'])) # real number reward signal ans_train *= 0 ans_mask_train *= 0 fin_train *= 0 fin_train_pred *= 0 reward_fin_train = numpy.ones( (n_samples * params['story_sims_per_epoch'], params['max_q_num'], 1 * 1 * params['relation_num'])) fin_one_hot_train *= 0 reward_immediate_train *= 0 f_print.close() f_debug.close() f_test.close()
def test_model(simulator, story_int, story_mask, q_int, q_mask, ans_int, ans_mask, params, old_std, f_test, vocab_text, inv_vocab_text): relate_dict = {'left': 1, 'right': 2, 'above': 3, 'below': 4} adj_list = ['pink', 'blue', 'red', 'yellow'] ent_list = ['triangle', 'rectangle', 'square', 'sphere'] concat_ent_list = [] for i in adj_list: for j in ent_list: concat_ent_list.append(i + j) concat_ent_list.extend(ent_list) # form vocabs and data numpy.set_printoptions(precision=3) n_samples = len(story_int) acc = 0. sys.stdout = old_std print 'n_samples:' print n_samples # initialize the env embeddings, actions taken, rewards for the whole train data of an epoch story_test = numpy.zeros( (1, params['max_story_len'], params['max_sent_len'])) story_mask_test = numpy.zeros( (1, params['max_story_len'], params['max_sent_len'], params['dim_emb_story'])) q_test = numpy.zeros((1, params['max_q_num'], params['max_sent_len'])) q_mask_test = numpy.zeros((1, params['max_q_num'], params['max_sent_len'], params['dim_emb_story'])) # env_test = numpy.zeros((1, params['max_story_len'], params['dim_emb_env'])) act_selected_test = numpy.zeros( (1, params['max_story_len'], (params['ent_range']) * 1 * params['relation_num'])) reward_test = numpy.ones( (1, params['max_story_len'], (params['ent_range']) * 1 * params['relation_num'])) reward_immediate_test = numpy.zeros((1, params['max_story_len'])) ans_test = numpy.zeros((1, params['max_q_num'], params['vocab_size'])) ans_mask_test = numpy.zeros((1, params['max_q_num'])) fin_test = numpy.zeros((1, params['max_q_num'], params['dim_emb_env'])) fin_test_pred = numpy.zeros( (1, params['max_q_num'], params['dim_emb_env'])) fin_one_hot_test = numpy.zeros( (1, params['max_q_num'], (params['ent_range']) * 1 * params['relation_num'])) reward_fin_test = numpy.ones( (1, params['max_q_num'], (params['ent_range']) * 1 * params['relation_num'])) working_memory = working_environment(params['enti_num'], params['relation_num_expand']) working_embed = envEmbedding(params['relation_num_expand'], params['enti_num'], params['dim_emb_env']) work_space_table = varTable() mask_sim = numpy.zeros((1, params['max_story_len'], params['dim_emb_story'] * params['max_sent_len'])) print 'test started.' sys.stdout = f_test train_trace_id = 0 # always == 0 sample_rate = numpy.zeros(n_samples) for story_id in range(n_samples): story_precision = 0. story_test[train_trace_id, :len(story_int[0]), :len( story_int[0][0])] = story_int[story_id] story_mask_test[train_trace_id, :len(story_int[0]), :len( story_int[0][0]), :] = story_mask[story_id] q_test[train_trace_id, :len(q_int[0]), :len(q_int[0][0] )] = q_int[story_id] q_mask_test[train_trace_id, :len(q_int[0]), :len( q_int[0][0]), :] = q_mask[story_id] ans_test[train_trace_id, :len(ans_int[0]), :len(ans_int[0][0] )] = ans_int[story_id] for time_slice in range(params['max_story_len']): mask_sim[0][time_slice][:] = numpy.ones(params['dim_emb_story'] * params['max_sent_len']) tupleList, adjGraph, temp_index = working_memory.returnEnv() action_probs, retrieve_probs = simulator.predict([ story_test[numpy.newaxis, train_trace_id], story_mask_test[numpy.newaxis, train_trace_id], q_test[numpy.newaxis, train_trace_id], q_mask_test[numpy.newaxis, train_trace_id], mask_sim[:], reward_test[numpy.newaxis, train_trace_id], reward_fin_test[numpy.newaxis, train_trace_id] ]) action_selected, action_one_hot = select_action_hard( action_probs[:, time_slice, :], params['epsilon']) act_selected_test[train_trace_id, time_slice, :] = action_one_hot arg_1_ptr = action_selected // ( (1) * params['relation_num']) # start from 0 (empty number) arg_2_ptr = ( action_selected - arg_1_ptr * (1) * params['relation_num']) // params['relation_num'] arg_r_ptr = ( action_selected - arg_1_ptr * (1) * params['relation_num']) % params['relation_num'] + 1 arg_r = arg_r_ptr flag = 0 arg_1 = 0 arg_2 = 0 if time_slice < len(story_int[story_id]): for w_id in story_int[story_id][time_slice]: if inv_vocab_text[str( int(w_id))] in concat_ent_list and flag == 0: if arg_1_ptr == 0: arg_1 = int(w_id) else: arg_2 = int(w_id) flag = 1 elif inv_vocab_text[str( int(w_id))] in concat_ent_list and flag == 1: if arg_1_ptr == 0: arg_2 = int(w_id) else: arg_1 = int(w_id) slice_reward = 0 for tt in range(time_slice + 1): reward_immediate_test[train_trace_id][tt] += slice_reward * ( params['reward_decay']**(time_slice - tt)) if arg_1 > 0 and arg_2 > 0 and story_mask_test[train_trace_id, time_slice, 0, 0] > 0: # retrieve the table arg_1_int = work_space_table.retr_insert(arg_1, inv_vocab_text) arg_2_int = work_space_table.retr_insert(arg_2, inv_vocab_text) working_memory.modifyEnv((arg_1_int, arg_r, arg_2_int)) # compute fin_train and fin_train_pred retrieved_relation_list = [] pred_relation_list = [] reward_temp_list = [ ] # reward for every question based on arg1/2 in q or not. for q_idx in range(len(retrieve_probs[0])): retr_idx = numpy.argmax(retrieve_probs[0, q_idx, :]) arg1_retr_ptr = retr_idx // ((1) * params['relation_num']) arg2_retr_ptr = ( retr_idx - arg1_retr_ptr * (1) * params['relation_num']) // params['relation_num'] relation_pred = ( retr_idx - arg1_retr_ptr * (1) * params['relation_num']) % params['relation_num'] + 1 # convert from ptr to id: flag = 0 arg1_retr = 0 arg2_retr = 0 for q_w in q_int[story_id, q_idx]: if inv_vocab_text[str( int(q_w))] in concat_ent_list and flag == 0: if arg1_retr_ptr == 0: arg1_retr = int(q_w) else: arg2_retr = int(q_w) flag = 1 elif inv_vocab_text[str( int(q_w))] in concat_ent_list and flag == 1: if arg1_retr_ptr == 0: arg2_retr = int(q_w) else: arg1_retr = int(q_w) retrieve_reward_pre = 0 reward_temp_list.append(retrieve_reward_pre) arg1_retr_int = work_space_table.retr(arg1_retr, inv_vocab_text) arg2_retr_int = work_space_table.retr(arg2_retr, inv_vocab_text) relation_retr = working_memory.retrieveRelation( arg1_retr_int, arg2_retr_int) retrieved_relation_list.append(relation_retr) pred_relation_list.append(relation_pred) one_hot_single = numpy.zeros( ((params['ent_range']) * 1 * params['relation_num'])) one_hot_single[retr_idx] = 1 fin_one_hot_test[train_trace_id, q_idx, :] = one_hot_single reward_q_total = 0. reward_q_list = [] for q_idx in range(len(retrieve_probs[0])): ans_word_int = numpy.argmax(ans_test[train_trace_id][q_idx]) + 1 ans_word = inv_vocab_text[str(ans_word_int)] reward_scalar_q, ans_q, label_q = compute_single_reward_yn( retrieved_relation_list[q_idx], pred_relation_list[q_idx], ans_word) reward_scalar_q *= q_mask_test[train_trace_id, q_idx, 0, 0] if ans_q == label_q: acc += 1. story_precision += 1. reward_q_total += reward_scalar_q reward_q_list.append(reward_scalar_q) sample_rate[story_id] = story_precision / params['max_story_len'] for time_slice in range(params['max_story_len']): reward_immediate_test[train_trace_id, time_slice] += reward_q_total * ( params['reward_decay'] **(params['max_story_len'] - time_slice)) for q_idx in range(len(retrieve_probs[0])): # pass reward_fin_test[train_trace_id, q_idx, :] *= reward_q_list[ q_idx] # used as input at the last softmax for time_slice in range(params['max_story_len']): # pass reward_test[train_trace_id, time_slice, :] *= ( reward_immediate_test[train_trace_id, time_slice] * story_mask_test[train_trace_id, time_slice, 0, 0] ) # used as input at the last softmax mask_sim *= 0 working_memory.resetEnv() work_space_table.reset() sys.stdout = old_std print 'test result:' print sample_rate print 'total accuracy:' print acc / (n_samples * params['max_story_len'])
def train_two_phrase(train_file_names, test_file_names, params): K.set_epsilon(1e-4) relate_dict = {'left': 1, 'right': 2, 'above': 3, 'below': 4} adj_list = ['pink', 'blue', 'red', 'yellow'] ent_list = ['triangle', 'rectangle', 'square', 'sphere'] concat_ent_list = [] for i in adj_list: for j in ent_list: concat_ent_list.append(i + j) concat_ent_list.extend(ent_list) # form vocabs and data numpy.set_printoptions(precision=3) print 'loading data.' old_std = sys.stdout f_print = open('debug_print_phr1.txt', 'w') f_debug = open('debug_print_phr2.txt', 'w') f_test = open('debug_print_test.txt', 'w') sys.stdout = f_print lines = [] for f_name in train_file_names: f = open(f_name, 'r') lines.extend(f.readlines()) f.close() lines_test = [] for f_name in test_file_names: f = open(f_name, 'r') lines_test.extend(f.readlines()) f.close() vocab_text = {} inv_vocab_text = {} data = parse_stories(lines, vocab_text, inv_vocab_text) data_test = parse_stories(lines_test, vocab_text, inv_vocab_text) story_int, story_mask, q_int, q_mask, ans_int, ans_mask, sizes = int_stories( data, vocab_text) story_mask = repeatTensor(story_mask, params['dim_emb_story']) q_mask = repeatTensor(q_mask, params['dim_emb_story']) story_int_test, story_mask_test, q_int_test, q_mask_test, ans_int_test, ans_mask_test, sizes_test = int_stories( data_test, vocab_text) story_mask_test = repeatTensor(story_mask_test, params['dim_emb_story']) q_mask_test = repeatTensor(q_mask_test, params['dim_emb_story']) inv_vocab_text['0'] = 'dududu' params['max_story_len'] = max(sizes[2], sizes[0]) params['max_story_len_valid'] = sizes[0] params['max_sent_len'] = max(sizes[1], sizes[3]) params['max_q_num'] = max(sizes[2], sizes[0]) params['vocab_size'] = len(vocab_text) params['vocab_size_ans'] = len(vocab_text) n_samples = len(story_int) params['ent_range'] = 2 print 'params:' print params print 'n_samples:' print n_samples print 'vocab_text:' print vocab_text sys.stdout = old_std story_train = numpy.zeros( (n_samples * params['story_sims_per_epoch'], params['max_story_len'], params['max_sent_len'])) story_mask_train = numpy.zeros( (n_samples * params['story_sims_per_epoch'], params['max_story_len'], params['max_sent_len'], params['dim_emb_story'])) q_train = numpy.zeros((n_samples * params['story_sims_per_epoch'], params['max_q_num'], params['max_sent_len'])) q_mask_train = numpy.zeros( (n_samples * params['story_sims_per_epoch'], params['max_q_num'], params['max_sent_len'], params['dim_emb_story'])) act_selected_train = numpy.zeros( (n_samples * params['story_sims_per_epoch'], params['max_story_len'], (params['ent_range']) * 1 * params['relation_num'])) reward_train = numpy.ones( (n_samples * params['story_sims_per_epoch'], params['max_story_len'], (params['ent_range']) * 1 * params['relation_num'])) reward_immediate_train = numpy.zeros( (n_samples * params['story_sims_per_epoch'], params['max_story_len'])) ans_train = numpy.zeros((n_samples * params['story_sims_per_epoch'], params['max_q_num'], params['vocab_size'])) ans_mask_train = numpy.zeros( (n_samples * params['story_sims_per_epoch'], params['max_q_num'])) fin_train = numpy.zeros((n_samples * params['story_sims_per_epoch'], params['max_q_num'], params['dim_emb_env'])) fin_train_pred = numpy.zeros((n_samples * params['story_sims_per_epoch'], params['max_q_num'], params['dim_emb_env'])) fin_one_hot_train = numpy.zeros( (n_samples * params['story_sims_per_epoch'], params['max_q_num'], (params['ent_range']) * 1 * params['relation_num'])) reward_fin_train = numpy.ones( (n_samples * params['story_sims_per_epoch'], params['max_q_num'], (params['ent_range']) * 1 * params['relation_num'])) print 'building model.' reasoner, simulator, debugger, gradient_checker = DRL_Reasoner(params) working_memory = working_environment(params['enti_num'], params['relation_num_expand']) working_embed = envEmbedding(params['relation_num_expand'], params['enti_num'], params['dim_emb_env']) work_space_table = varTable() mask_sim = numpy.zeros((1, params['max_story_len'], params['dim_emb_story'] * params['max_sent_len'])) all_sim_number = 0 print 'two phrase training started.' for epoch_id in range(params['epoch_num']): train_trace_id = 0 avg_reward_q_epoch = 0. avg_reward_action_epoch = numpy.zeros(params['max_story_len']) print 'epoch %d phrase 1' % epoch_id epoch_precision_rate = 0. sample_rate = numpy.zeros(n_samples) for story_id in range(n_samples): story_precision = 0. for sim_round in range(params['story_sims_per_epoch']): sys.stdout = f_print story_train[train_trace_id, :len(story_int[0]), :len( story_int[0][0])] = story_int[story_id] story_mask_train[train_trace_id, :len(story_int[0]), :len( story_int[0][0]), :] = story_mask[story_id] q_train[train_trace_id, :len(q_int[0]), :len( q_int[0][0])] = q_int[story_id] q_mask_train[train_trace_id, :len(q_int[0]), :len( q_int[0][0]), :] = q_mask[story_id] ans_train[train_trace_id, :len(ans_int[0]), :len( ans_int[0][0])] = ans_int[story_id] for time_slice in range(params['max_story_len']): mask_sim[0][time_slice][:] = numpy.ones( params['dim_emb_story'] * params['max_sent_len']) # read and embed environment tupleList, adjGraph, temp_index = working_memory.returnEnv( ) action_probs, retrieve_probs = simulator.predict([ story_train[numpy.newaxis, train_trace_id], story_mask_train[numpy.newaxis, train_trace_id], q_train[numpy.newaxis, train_trace_id], q_mask_train[numpy.newaxis, train_trace_id], mask_sim[:], reward_train[numpy.newaxis, train_trace_id], reward_fin_train[numpy.newaxis, train_trace_id] ]) action_selected, action_one_hot = select_action( action_probs[:, time_slice, :], params['epsilon']) act_selected_train[train_trace_id, time_slice, :] = action_one_hot arg_1_ptr = action_selected // ( (1) * params['relation_num'] ) # start from 0 (empty number) arg_2_ptr = ( action_selected - arg_1_ptr * (1) * params['relation_num']) // params['relation_num'] arg_r_ptr = (action_selected - arg_1_ptr * (1) * params['relation_num'] ) % params['relation_num'] + 1 arg_r = arg_r_ptr flag = 0 arg_1 = 0 arg_2 = 0 if time_slice < len(story_int[story_id]): for w_id in story_int[story_id][time_slice]: if inv_vocab_text[str(int( w_id))] in concat_ent_list and flag == 0: if arg_1_ptr == 0: arg_1 = int(w_id) else: arg_2 = int(w_id) flag = 1 elif inv_vocab_text[str(int( w_id))] in concat_ent_list and flag == 1: if arg_1_ptr == 0: arg_2 = int(w_id) else: arg_1 = int(w_id) slice_reward = 0 for tt in range(time_slice + 1): reward_immediate_train[train_trace_id][ tt] += slice_reward * (params['reward_decay'] **(time_slice - tt)) if arg_1 > 0 and arg_2 > 0 and story_mask_train[ train_trace_id, time_slice, 0, 0] > 0: # retrieve the table arg_1_int = work_space_table.retr_insert( arg_1, inv_vocab_text) arg_2_int = work_space_table.retr_insert( arg_2, inv_vocab_text) working_memory.modifyEnv((arg_1_int, arg_r, arg_2_int)) # compute fin_train and fin_train_pred retrieved_relation_list = [] pred_relation_list = [] reward_temp_list = [ ] # reward for every question based on arg1/2 in q or not. for q_idx in range(len(retrieve_probs[0])): retr_idx, action_one_hot_retr = select_action( retrieve_probs[:, q_idx, :], params['epsilon']) arg1_retr_ptr = retr_idx // ((1) * params['relation_num']) arg2_retr_ptr = ( retr_idx - arg1_retr_ptr * (1) * params['relation_num']) // params['relation_num'] relation_pred = (retr_idx - arg1_retr_ptr * (1) * params['relation_num'] ) % params['relation_num'] + 1 flag = 0 arg1_retr = 0 arg2_retr = 0 for q_w in q_int[story_id, q_idx]: if inv_vocab_text[str( int(q_w))] in concat_ent_list and flag == 0: if arg1_retr_ptr == 0: arg1_retr = int(q_w) else: arg2_retr = int(q_w) flag = 1 elif inv_vocab_text[str( int(q_w))] in concat_ent_list and flag == 1: if arg1_retr_ptr == 0: arg2_retr = int(q_w) else: arg1_retr = int(q_w) retrieve_reward_pre = 0 reward_temp_list.append(retrieve_reward_pre) arg1_retr_int = work_space_table.retr( arg1_retr, inv_vocab_text) arg2_retr_int = work_space_table.retr( arg2_retr, inv_vocab_text) # relation_pred_emb = working_embed.returnSingleEmb(relation_pred, 1, 1) relation_retr = working_memory.retrieveRelation( arg1_retr_int, arg2_retr_int) retrieved_relation_list.append(relation_retr) pred_relation_list.append(relation_pred) one_hot_single = numpy.zeros( ((params['ent_range']) * 1 * params['relation_num'])) one_hot_single[retr_idx] = 1 fin_one_hot_train[train_trace_id, q_idx, :] = one_hot_single reward_q_total = 0. reward_q_list = [] for q_idx in range(len(retrieve_probs[0])): ans_word_int = numpy.argmax( ans_train[train_trace_id][q_idx]) + 1 ans_word = inv_vocab_text[str(ans_word_int)] reward_scalar_q, ans_q, label_q = compute_single_reward_yn( retrieved_relation_list[q_idx], pred_relation_list[q_idx], ans_word) reward_scalar_q *= q_mask_train[train_trace_id, q_idx, 0, 0] if ans_q == label_q: epoch_precision_rate += 1. story_precision += 1. reward_q_total += reward_scalar_q reward_q_list.append(reward_scalar_q) for time_slice in range(params['max_story_len']): reward_immediate_train[ train_trace_id, time_slice] += reward_q_total * ( params['reward_decay'] **(params['max_story_len_valid'] - time_slice)) for q_idx in range(len(retrieve_probs[0])): # pass reward_fin_train[train_trace_id, q_idx, :] *= reward_q_list[q_idx] avg_reward_q_epoch = avg_reward_q_epoch * (train_trace_id) / ( train_trace_id + 1) + reward_q_total / (train_trace_id + 1) for time_slice in range(params['max_story_len']): # pass reward_train[train_trace_id, time_slice, :] *= ( reward_immediate_train[train_trace_id, time_slice] * story_mask_train[train_trace_id, time_slice, 0, 0]) avg_reward_action_epoch[ time_slice] = avg_reward_action_epoch[time_slice] * ( train_trace_id ) / (train_trace_id + 1) + reward_immediate_train[ train_trace_id, time_slice] / (train_trace_id + 1) train_trace_id += 1 all_sim_number += 1 mask_sim *= 0 working_memory.resetEnv() work_space_table.reset() sample_rate[story_id] = story_precision / ( params['story_sims_per_epoch'] * params['max_story_len']) for q_idx in range(params['max_q_num']): if 0: # pass reward_fin_train[:, q_idx, :] -= avg_reward_q_epoch # used as input at the last softmax for time_slice in range(params['max_story_len']): if 0: # pass reward_train[:, time_slice, :] -= avg_reward_action_epoch[ time_slice] # used as input at the last softmax epoch_precision_rate = epoch_precision_rate / ( n_samples * params['story_sims_per_epoch'] * params['max_story_len']) sys.stdout = old_std print 'precision of this epoch: %f' % epoch_precision_rate print 'epoch %d phrase 2' % (epoch_id) print 'sample_rate:' print sample_rate # phrase2: go batch train on the trace pool. mask_sim_2 = numpy.ones( (n_samples * params['story_sims_per_epoch'], params['max_story_len'], params['dim_emb_story'] * params['max_sent_len'])) reasoner.fit( [ story_train, story_mask_train, q_train, q_mask_train, mask_sim_2, reward_train, reward_fin_train ], { 'action_probs_re': act_selected_train, 'retrieve_probs_re': fin_one_hot_train }, batch_size=params['batch_size_phrase2'], nb_epoch=10, verbose=2) sys.stdout = old_std # test the model if (epoch_id + 1) % params['test_epoch_period'] == 0: test_model(simulator, story_int_test, story_mask_test, q_int_test, q_mask_test, ans_int_test, ans_mask_test, params, old_std, f_test, vocab_text, inv_vocab_text) sys.stdout = old_std story_train *= 0 story_mask_train *= 0 q_train *= 0 q_mask_train *= 0 act_selected_train *= 0 reward_train = numpy.ones( (n_samples * params['story_sims_per_epoch'], params['max_story_len'], (params['ent_range']) * 1 * params['relation_num'])) # real number reward signal ans_train *= 0 ans_mask_train *= 0 fin_train *= 0 fin_train_pred *= 0 reward_fin_train = numpy.ones( (n_samples * params['story_sims_per_epoch'], params['max_q_num'], (params['ent_range']) * 1 * params['relation_num'])) fin_one_hot_train *= 0 reward_immediate_train *= 0 f_print.close() f_debug.close() f_test.close()