def main(_): print('reading word embedding') word_vec = np.load(export_path + 'vec.npy') print('reading entity embedding') ent_embedding = np.load(export_path + 'ent_embedding.npy') print('reading relation embedding') rel_embedding = np.load(export_path + 'rel_embedding.npy') print('reading test data') test_instance_triple = np.load(export_path + 'test_instance_triple.npy') test_instance_scope = np.load(export_path + 'test_instance_scope.npy') test_len = np.load(export_path + 'test_len.npy') test_label = np.load(export_path + 'test_label.npy') test_word = np.load(export_path + 'test_word.npy') test_pos1 = np.load(export_path + 'test_pos1.npy') test_pos2 = np.load(export_path + 'test_pos2.npy') test_mask = np.load(export_path + 'test_mask.npy') test_head = np.load(export_path + 'test_head.npy') test_tail = np.load(export_path + 'test_tail.npy') train_desc_tail = np.load(export_path + 'test_desc_tail.npy') train_desc_head = np.load(export_path + 'test_desc_head.npy') print('reading finished') print('mentions : %d' % (len(test_instance_triple))) print('sentences : %d' % (len(test_len))) print('relations : %d' % (FLAGS.num_classes)) print('word size : %d' % (len(word_vec[0]))) print('position size : %d' % (FLAGS.pos_size)) print('hidden size : %d' % (FLAGS.hidden_size)) print('reading finished') # desc = {} # with open(export_path + 'desc.txt') as f: # for content in f: # en_id, en_desc = content.strip().split('\t') # en_desc = en_desc.strip().split(',') # en_desc = [int(word) for word in en_desc] # desc[int(en_id)] = en_desc print('building network...') sess_db = tf.Session() # sess_db = tf_debug.LocalCLIDebugWrapperSession(sess) # sess_db.add_tensor_filter('has_inf_or_nan',tf_debug.has_inf_or_nan) merged_summary = tf.summary.merge_all() global_step = tf.Variable(0, name='global_step', trainable=False) if FLAGS.model.lower() == "cnn": model = network.CNN(is_training=False, word_embeddings=word_vec, ent_embedding=ent_embedding, rel_embedding=rel_embedding) elif FLAGS.model.lower() == "pcnn": model = network.PCNN(is_training=False, word_embeddings=word_vec, ent_embedding=ent_embedding, rel_embedding=rel_embedding) elif FLAGS.model.lower() == "lstm": model = network.RNN(is_training=False, word_embeddings=word_vec, cell_name="LSTM", simple_position=True) elif FLAGS.model.lower() == "gru": model = network.RNN(is_training=False, word_embeddings=word_vec, cell_name="GRU", simple_position=True) elif FLAGS.model.lower() == "bi-lstm" or FLAGS.model.lower() == "bilstm": model = network.BiRNN(is_training=False, word_embeddings=word_vec, cell_name="LSTM", simple_position=True) elif FLAGS.model.lower() == "bi-gru" or FLAGS.model.lower() == "bigru": model = network.BiRNN(is_training=False, word_embeddings=word_vec, cell_name="GRU", simple_position=True) sess_db.run(tf.global_variables_initializer()) saver = tf.train.Saver() def test_step(head, tail, word, pos1, pos2, mask, leng, label_index, label, scope, head_desc, tail_desc): feed_dict = { model.head_index: head, model.tail_index: tail, model.word: word, model.pos1: pos1, model.pos2: pos2, model.mask: mask, model.len: leng, model.label_index: label_index, model.label: label, model.scope: scope, model.keep_prob: FLAGS.keep_prob, model.head_description: head_desc, model.tail_description: tail_desc } if FLAGS.katt_flag == 1: output, head_desc_att, tail_desc_att = sess_db.run( [model.test_output, model.head_desc_att, model.tail_desc_att], feed_dict) else: output = sess_db.run(model.test_output, feed_dict) # np.save('./case_study/head_desc_att',head_desc_att) # np.save('./case_study/tail_desc_att',tail_desc_att) # output = sess_db.run(model.test_output, feed_dict) return output f = open('results.txt', 'w') f.write('iteration\taverage precision\tP@100\tP@300\tP@500\n') for iters in range(1, 15): print(iters) saver.restore( sess_db, FLAGS.checkpoint_path + FLAGS.save_name + '/' + FLAGS.model + str(FLAGS.katt_flag) + "-" + str(80 * iters)) summary_writer = tf.summary.FileWriter(FLAGS.summary_dir, sess_db.graph) stack_output = [] stack_label = [] iteration = len(test_instance_scope) // FLAGS.test_batch_size for i in range(iteration): temp_str = 'running ' + str(i) + '/' + str(iteration) + '...' sys.stdout.write(temp_str + '\r') sys.stdout.flush() input_scope = test_instance_scope[i * FLAGS.test_batch_size:(i + 1) * FLAGS.test_batch_size] index = [] scope = [0] label = [] # print('input_scope:',input_scope) for num in input_scope: index = index + list(range(num[0], num[1] + 1)) label.append(test_label[num[0]]) scope.append(scope[len(scope) - 1] + num[1] - num[0] + 1) label_ = np.zeros((FLAGS.test_batch_size, FLAGS.num_classes)) label_[np.arange(FLAGS.test_batch_size), label] = 1 output = test_step(test_head[index], test_tail[index], test_word[index, :], test_pos1[index, :], test_pos2[index, :], test_mask[index, :], test_len[index], test_label[index], label_, np.array(scope), train_desc_head[index], train_desc_tail[index]) stack_output.append(output) stack_label.append(label_) # print('attention score:',np.shape(attention_score)) # np.save('attention_scpre',attention_score) print('evaluating...') # print(stack_output) # ff = open('attention.txt','w') # ff.write(attention_score) # ff.close() stack_output = np.concatenate(stack_output, axis=0) stack_label = np.concatenate(stack_label, axis=0) exclude_na_flatten_output = stack_output[:, 1:] exclude_na_flatten_label = stack_label[:, 1:] print(exclude_na_flatten_output.shape) print(exclude_na_flatten_label.shape) # print (exclude_na_flatten_output) np.save( './' + 'model' + str(FLAGS.alpha) + '/' + FLAGS.model + '+sen_att_all_prob_' + str(iters) + '.npy', exclude_na_flatten_output) np.save( './' + 'model' + str(FLAGS.alpha) + '/' + FLAGS.model + '+sen_att_all_label_' + str(iters) + '.npy', exclude_na_flatten_label) average_precision = average_precision_score(exclude_na_flatten_label, exclude_na_flatten_output, average="micro") exclude_na_flatten_label = np.reshape(exclude_na_flatten_label, -1) exclude_na_flatten_output = np.reshape(exclude_na_flatten_output, -1) order = np.argsort(-exclude_na_flatten_output) p_100 = np.mean(exclude_na_flatten_label[order[:100]]) p_300 = np.mean(exclude_na_flatten_label[order[:300]]) p_500 = np.mean(exclude_na_flatten_label[order[:500]]) print('pr: ' + str(average_precision)) print('p@100:' + str(p_100)) print('p@300:' + str(p_300)) print('p@500:' + str(p_500)) f.write( str(average_precision) + '\t' + str(p_100) + '\t' + str(p_300) + '\t' + str(p_500) + '\n') f.close()
def main(_): time_start = time.time() save_path = './model/' print('reading word embedding') word_embedding = np.load('./data/vec.npy') print('reading corpus') train_y = np.load('./data/small_y.npy') train_word = np.load('./data/small_word.npy') train_pos1 = np.load('./data/small_pos1.npy') train_pos2 = np.load('./data/small_pos2.npy') context_word, context_pos1, context_pos2, context_y = context_split( train_word, train_pos1, train_pos2, train_y ) settings = network.Settings() settings.vocab_size = len(word_embedding) settings.num_classes = len(context_y[0]) print(settings.num_classes) entity_count = settings.entity_count with tf.Graph().as_default(): sess = tf.Session() with sess.as_default(): initializer = tf.contrib.layers.xavier_initializer() with tf.variable_scope('model', reuse=None, initializer=initializer): m = network.PCNN(is_training=True, word_embeddings=word_embedding, settings=settings) global_step = tf.Variable(0, name='global_step', trainable=False) optimizer = tf.train.AdamOptimizer(0.001) train_op = optimizer.minimize(m.final_loss, global_step=global_step) sess.run(tf.global_variables_initializer()) saver = tf.train.Saver(max_to_keep=None) merged_summary = tf.summary.merge_all() summary_writer = tf.summary.FileWriter( FLAGS.summary_dir + '/train_loss', sess.graph ) def train_step(word_batch, pos1_batch, pos2_batch, y_batch, entity_count): feed_dict = {} total_shape = [] total_num = 0 total_word = [[], [], []] total_pos1 = [[], [], []] total_pos2 = [[], [], []] for i in range(len(word_batch[0])): total_shape.append(total_num) total_num += len(word_batch[0][i]) for loc in range(3): for word in word_batch[loc][i]: total_word[loc].append(word) for pos1 in pos1_batch[loc][i]: total_pos1[loc].append(pos1) for pos2 in pos2_batch[loc][i]: total_pos2[loc].append(pos2) total_shape.append(total_num) total_shape = np.array(total_shape) feed_dict[m.total_shape] = np.array(total_shape) feed_dict[m.input_word_left] = np.array(total_word[0]) feed_dict[m.input_word_mid] = np.array(total_word[1]) feed_dict[m.input_word_right] = np.array(total_word[2]) feed_dict[m.input_pos1_left] = np.array(total_pos1[0]) feed_dict[m.input_pos1_mid] = np.array(total_pos1[1]) feed_dict[m.input_pos1_right] = np.array(total_pos1[2]) feed_dict[m.input_pos2_left] = np.array(total_pos2[0]) feed_dict[m.input_pos2_mid] = np.array(total_pos2[1]) feed_dict[m.input_pos2_right] = np.array(total_pos2[2]) feed_dict[m.input_y] = y_batch _, step, loss, accuracy, summary, l2_loss, final_loss = \ sess.run( [train_op, global_step, m.total_loss, m.accuracy, merged_summary, m.l2_loss, m.final_loss], feed_dict) time_str = datetime.datetime.now().isoformat() accuracy = np.reshape(np.array(accuracy), entity_count) acc = np.mean(accuracy) summary_writer.add_summary(summary, step) if step % 50 == 0: tmp_str = '{}: step{}, softmax_loss {:g}, acc {:g}'.format( time_str, step, loss, acc ) print(tmp_str) for one_epoch in range(settings.num_epochs): tmp_order = np.arange(len(context_word[0])) np.random.shuffle(tmp_order) for i in range(len(tmp_order) // settings.entity_count): tmp_word = [[], [], []] tmp_pos1 = [[], [], []] tmp_pos2 = [[], [], []] tmp_y = [] tmp_input = tmp_order[ i*settings.entity_count:(i + 1)*settings.entity_count ] for k in tmp_input: for loc in range(3): tmp_word[loc].append(context_word[loc][k]) tmp_pos1[loc].append(context_pos1[loc][k]) tmp_pos2[loc].append(context_pos2[loc][k]) tmp_y.append(context_y[k]) num = 0 for single_word in tmp_word[0]: num += len(single_word) if num > 1500: print('out of range') continue train_step(tmp_word, tmp_pos1, tmp_pos2, tmp_y, settings.entity_count) current_step = tf.train.global_step(sess, global_step) if current_step > 9000 and current_step % 500 == 0: print('saving model') path = saver.save(sess, save_path + 'PCNN_model', global_step=current_step) tmpstr = 'saved model to ' + path print(tmpstr) time_finish = time.time() time_elapsed = time_finish - time_start print('Time Used:', str(datetime.timedelta(seconds=time_elapsed)))
def main(_): print 'reading word embedding' word_vec = np.load(export_path + 'vec.npy') print 'reading training data' instance_triple = np.load(export_path + 'train_instance_triple.npy') instance_scope = np.load(export_path + 'train_instance_scope.npy') train_len = np.load(export_path + 'train_len.npy') train_label = np.load(export_path + 'train_label.npy') train_word = np.load(export_path + 'train_word.npy') train_pos1 = np.load(export_path + 'train_pos1.npy') train_pos2 = np.load(export_path + 'train_pos2.npy') train_mask = np.load(export_path + 'train_mask.npy') train_head = np.load(export_path + 'train_head.npy') train_tail = np.load(export_path + 'train_tail.npy') print 'reading finished' print 'mentions : %d' % (len(instance_triple)) print 'sentences : %d' % (len(train_len)) print 'relations : %d' % (FLAGS.num_classes) print 'word size : %d' % (len(word_vec[0])) print 'position size : %d' % (FLAGS.pos_size) print 'hidden size : %d' % (FLAGS.hidden_size) reltot = {} for index, i in enumerate(train_label): if not i in reltot: reltot[i] = 1.0 else: reltot[i] += 1.0 for i in reltot: reltot[i] = 1 / (reltot[i]**(0.05)) print 'building network...' sess = tf.Session() if FLAGS.model.lower() == "cnn": model = network.CNN(is_training=True, word_embeddings=word_vec) elif FLAGS.model.lower() == "pcnn": model = network.PCNN(is_training=True, word_embeddings=word_vec) elif FLAGS.model.lower() == "lstm": model = network.RNN(is_training=True, word_embeddings=word_vec, cell_name="LSTM", simple_position=True) elif FLAGS.model.lower() == "gru": model = network.RNN(is_training=True, word_embeddings=word_vec, cell_name="GRU", simple_position=True) elif FLAGS.model.lower() == "bi-lstm" or FLAGS.model.lower() == "bilstm": model = network.BiRNN(is_training=True, word_embeddings=word_vec, cell_name="LSTM", simple_position=True) elif FLAGS.model.lower() == "bi-gru" or FLAGS.model.lower() == "bigru": model = network.BiRNN(is_training=True, word_embeddings=word_vec, cell_name="GRU", simple_position=True) global_step = tf.Variable(0, name='global_step', trainable=False) global_step_kg = tf.Variable(0, name='global_step_kg', trainable=False) global_step_kg_satt = tf.Variable(0, name='global_step_kg_satt', trainable=False) tf.summary.scalar('learning_rate', FLAGS.learning_rate) tf.summary.scalar('learning_rate_kg', FLAGS.learning_rate_kg) optimizer = tf.train.GradientDescentOptimizer(FLAGS.learning_rate) grads_and_vars = optimizer.compute_gradients(model.loss) train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step) optimizer_kg = tf.train.GradientDescentOptimizer(FLAGS.learning_rate_kg) grads_and_vars_kg = optimizer_kg.compute_gradients(model.loss_kg) train_op_kg = optimizer_kg.apply_gradients(grads_and_vars_kg, global_step=global_step_kg) optimizer_kg_satt = tf.train.GradientDescentOptimizer( FLAGS.learning_rate_kg) grads_and_vars_kg_satt = optimizer_kg_satt.compute_gradients( model.loss_kg_att) train_op_kg_satt = optimizer_kg_satt.apply_gradients( grads_and_vars_kg_satt, global_step=global_step_kg_satt) merged_summary = tf.summary.merge_all() summary_writer = tf.summary.FileWriter(FLAGS.summary_dir, sess.graph) sess.run(tf.global_variables_initializer()) saver = tf.train.Saver(max_to_keep=None) print 'building finished' def train_kg_att(coord): def train_step_kg_att(h_batch, t_batch, r_batch, r_scope, r_label): feed_dict = { model.pos_h: h_batch, model.pos_t: t_batch, model.pos_r: r_batch, model.r_scope: r_scope, model.r_label: r_label, model.r_length: np.array([len(r_label)]), } _, loss = sess.run([train_op_kg_satt, model.loss_kg_att], feed_dict) return loss def merge(head, tail, rel): hash = {} for (h, t, r) in zip(head, tail, rel): if r < FLAGS.num_classes: if not r in hash: hash[r] = [] hash[r].append((h, t)) rel = [] head = [] tail = [] rel_label = [] rel_config = [0] for r in hash: if len(hash[r]) != 0: rel_config.append(rel_config[-1]) rel_label.append(r) for h, t in hash[r]: rel_config[-1] += 1 head.append(h) tail.append(t) rel.append(r) return np.array(head), np.array(tail), np.array(rel), np.array( rel_config), np.array(rel_label) batch_size = (FLAGS.tri_total / FLAGS.nbatch_kg) ph = np.zeros(batch_size, dtype=np.int32) pt = np.zeros(batch_size, dtype=np.int32) pr = np.zeros(batch_size, dtype=np.int32) nh = np.zeros(batch_size, dtype=np.int32) nt = np.zeros(batch_size, dtype=np.int32) nr = np.zeros(batch_size, dtype=np.int32) ph_addr = ph.__array_interface__['data'][0] pt_addr = pt.__array_interface__['data'][0] pr_addr = pr.__array_interface__['data'][0] nh_addr = nh.__array_interface__['data'][0] nt_addr = nt.__array_interface__['data'][0] nr_addr = nr.__array_interface__['data'][0] lib.getBatch.argtypes = [ ctypes.c_void_p, ctypes.c_void_p, ctypes.c_void_p, ctypes.c_void_p, ctypes.c_void_p, ctypes.c_void_p, ctypes.c_int ] times_kg = 0 while not coord.should_stop(): times_kg += 1 # if times_kg == 3000: # coord.request_stop() res = 0.0 for batch in range(FLAGS.nbatch_kg): lib.getBatch(ph_addr, pt_addr, pr_addr, nh_addr, nt_addr, nr_addr, batch_size) h, t, r, r_range, r_label = merge(ph, pt, pr) res += train_step_kg_att(h, t, r, r_range, r_label) time_str = datetime.datetime.now().isoformat() print "batch %d time %s | loss : %f" % (times_kg, time_str, res) def train_kg(coord): def train_step_kg(pos_h_batch, pos_t_batch, pos_r_batch, neg_h_batch, neg_t_batch, neg_r_batch): feed_dict = { model.pos_h: pos_h_batch, model.pos_t: pos_t_batch, model.pos_r: pos_r_batch, model.neg_h: neg_h_batch, model.neg_t: neg_t_batch, model.neg_r: neg_r_batch } _, step, loss = sess.run( [train_op_kg, global_step_kg, model.loss_kg], feed_dict) return loss batch_size = (FLAGS.tri_total / FLAGS.nbatch_kg) ph = np.zeros(batch_size, dtype=np.int32) pt = np.zeros(batch_size, dtype=np.int32) pr = np.zeros(batch_size, dtype=np.int32) nh = np.zeros(batch_size, dtype=np.int32) nt = np.zeros(batch_size, dtype=np.int32) nr = np.zeros(batch_size, dtype=np.int32) ph_addr = ph.__array_interface__['data'][0] pt_addr = pt.__array_interface__['data'][0] pr_addr = pr.__array_interface__['data'][0] nh_addr = nh.__array_interface__['data'][0] nt_addr = nt.__array_interface__['data'][0] nr_addr = nr.__array_interface__['data'][0] lib.getBatch.argtypes = [ ctypes.c_void_p, ctypes.c_void_p, ctypes.c_void_p, ctypes.c_void_p, ctypes.c_void_p, ctypes.c_void_p, ctypes.c_int ] times_kg = 0 while not coord.should_stop(): times_kg += 1 # if times_kg == 3000: # coord.request_stop() res = 0.0 for batch in range(FLAGS.nbatch_kg): lib.getBatch(ph_addr, pt_addr, pr_addr, nh_addr, nt_addr, nr_addr, batch_size) res += train_step_kg(ph, pt, pr, nh, nt, nr) time_str = datetime.datetime.now().isoformat() print "batch %d time %s | loss : %f" % (times_kg, time_str, res) def train_nn(coord): def train_step(head, tail, word, pos1, pos2, mask, leng, label_index, label, scope, weights): feed_dict = { model.head_index: head, model.tail_index: tail, model.word: word, model.pos1: pos1, model.pos2: pos2, model.mask: mask, model.len: leng, model.label_index: label_index, model.label: label, model.scope: scope, model.keep_prob: FLAGS.keep_prob, model.weights: weights } _, step, loss, summary, output, correct_predictions = sess.run([ train_op, global_step, model.loss, merged_summary, model.output, model.correct_predictions ], feed_dict) summary_writer.add_summary(summary, step) return output, loss, correct_predictions stack_output = [] stack_label = [] stack_ce_loss = [] train_order = range(len(instance_triple)) save_epoch = 2 eval_step = 300 for one_epoch in range(FLAGS.max_epoch): print('epoch ' + str(one_epoch + 1) + ' starts!') np.random.shuffle(train_order) s1 = 0.0 s2 = 0.0 tot1 = 1.0 tot2 = 1.0 losstot = 0.0 for i in range(int(len(train_order) / float(FLAGS.batch_size))): input_scope = np.take( instance_scope, train_order[i * FLAGS.batch_size:(i + 1) * FLAGS.batch_size], axis=0) index = [] scope = [0] label = [] weights = [] for num in input_scope: index = index + range(num[0], num[1] + 1) label.append(train_label[num[0]]) scope.append(scope[len(scope) - 1] + num[1] - num[0] + 1) weights.append(reltot[train_label[num[0]]]) label_ = np.zeros((FLAGS.batch_size, FLAGS.num_classes)) label_[np.arange(FLAGS.batch_size), label] = 1 output, loss, correct_predictions = train_step( train_head[index], train_tail[index], train_word[index, :], train_pos1[index, :], train_pos2[index, :], train_mask[index, :], train_len[index], train_label[index], label_, np.array(scope), weights) num = 0 s = 0 losstot += loss for num in correct_predictions: if label[s] == 0: tot1 += 1.0 if num: s1 += 1.0 else: tot2 += 1.0 if num: s2 += 1.0 s = s + 1 time_str = datetime.datetime.now().isoformat() # print "batch %d step %d time %s | loss : %f, NA accuracy: %f, not NA accuracy: %f" % (one_epoch, i, time_str, loss, s1 / tot1, s2 / tot2) current_step = tf.train.global_step(sess, global_step) if (one_epoch + 1) % save_epoch == 0: print 'epoch ' + str(one_epoch + 1) + ' has finished' print 'saving model...' path = saver.save(sess, FLAGS.model_dir + FLAGS.model + str(FLAGS.katt_flag), global_step=current_step) coord.request_stop() coord = tf.train.Coordinator() threads = [] threads.append(threading.Thread(target=train_kg, args=(coord, ))) threads.append(threading.Thread(target=train_nn, args=(coord, ))) threads.append(threading.Thread(target=train_kg_att, args=(coord, ))) for t in threads: t.start() coord.join(threads) if (FLAGS.store_kg_flag != 0): print 'saving kg...' ent_embedding, rel_embedding = sess.run( [model.word_embedding, model.rel_embeddings]) ent_embedding = ent_embedding.tolist() rel_embedding = rel_embedding.tolist() f = open("entity2vec", "w") f.write(json.dumps(ent_embedding)) f.close() f = open("relation2vec", "w") f.write(json.dumps(rel_embedding)) f.close()
def main(_): print 'reading word embedding' word_vec = np.load(export_path + 'vec.npy') print 'reading test data' test_instance_triple = np.load(export_path + 'test_instance_triple.npy') test_instance_scope = np.load(export_path + 'test_instance_scope.npy') test_len = np.load(export_path + 'test_len.npy') test_label = np.load(export_path + 'test_label.npy') test_word = np.load(export_path + 'test_word.npy') test_pos1 = np.load(export_path + 'test_pos1.npy') test_pos2 = np.load(export_path + 'test_pos2.npy') test_mask = np.load(export_path + 'test_mask.npy') test_head = np.load(export_path + 'test_head.npy') test_tail = np.load(export_path + 'test_tail.npy') print 'reading finished' print 'mentions : %d' % (len(test_instance_triple)) print 'sentences : %d' % (len(test_len)) print 'relations : %d' % (FLAGS.num_classes) print 'word size : %d' % (len(word_vec[0])) print 'position size : %d' % (FLAGS.pos_size) print 'hidden size : %d' % (FLAGS.hidden_size) print 'reading finished' print 'building network...' sess = tf.Session() if FLAGS.model.lower() == "cnn": model = network.CNN(is_training=False, word_embeddings=word_vec) elif FLAGS.model.lower() == "pcnn": model = network.PCNN(is_training=False, word_embeddings=word_vec) elif FLAGS.model.lower() == "lstm": model = network.RNN(is_training=False, word_embeddings=word_vec, cell_name="LSTM", simple_position=True) elif FLAGS.model.lower() == "gru": model = network.RNN(is_training=False, word_embeddings=word_vec, cell_name="GRU", simple_position=True) elif FLAGS.model.lower() == "bi-lstm" or FLAGS.model.lower() == "bilstm": model = network.BiRNN(is_training=False, word_embeddings=word_vec, cell_name="LSTM", simple_position=True) elif FLAGS.model.lower() == "bi-gru" or FLAGS.model.lower() == "bigru": model = network.BiRNN(is_training=False, word_embeddings=word_vec, cell_name="GRU", simple_position=True) sess.run(tf.global_variables_initializer()) saver = tf.train.Saver() def test_step(head, tail, word, pos1, pos2, mask, leng, label_index, label, scope): feed_dict = { model.head_index: head, model.tail_index: tail, model.word: word, model.pos1: pos1, model.pos2: pos2, model.mask: mask, model.len: leng, model.label_index: label_index, model.label: label, model.scope: scope, model.keep_prob: FLAGS.keep_prob } output = sess.run(model.test_output, feed_dict) return output FLAGS.test_batch_size, FLAGS.num_classes = int(FLAGS.test_batch_size), int( FLAGS.num_classes) f = open('results.txt', 'w') f.write('iteration\taverage precision\n') for iters in range(1, 30): print iters saver.restore( sess, FLAGS.checkpoint_path + FLAGS.model + str(int(FLAGS.katt_flag)) + "-" + str(3664 * iters)) stack_output = [] stack_label = [] iteration = len(test_instance_scope) / FLAGS.test_batch_size for i in range(int(iteration)): temp_str = 'running ' + str(i) + '/' + str(iteration) + '...' sys.stdout.write(temp_str + '\r') sys.stdout.flush() input_scope = test_instance_scope[i * FLAGS.test_batch_size:(i + 1) * FLAGS.test_batch_size] index = [] scope = [0] label = [] for num in input_scope: index = index + range(num[0], num[1] + 1) label.append(test_label[num[0]]) scope.append(scope[len(scope) - 1] + num[1] - num[0] + 1) label_ = np.zeros((FLAGS.test_batch_size, FLAGS.num_classes)) label_[np.arange(FLAGS.test_batch_size), label] = 1 output = test_step(test_head[index], test_tail[index], test_word[index, :], test_pos1[index, :], test_pos2[index, :], test_mask[index, :], test_len[index], test_label[index], label_, np.array(scope)) stack_output.append(output) stack_label.append(label_) print 'evaluating...' stack_output = np.concatenate(stack_output, axis=0) stack_label = np.concatenate(stack_label, axis=0) exclude_na_flatten_output = stack_output[:, 1:] exclude_na_flatten_label = stack_label[:, 1:] print exclude_na_flatten_output.shape print exclude_na_flatten_label.shape average_precision = average_precision_score(exclude_na_flatten_label, exclude_na_flatten_output, average="micro") np.save( './' + FLAGS.model + '+sen_att_all_prob_' + str(iters) + '.npy', exclude_na_flatten_output) np.save( './' + FLAGS.model + '+sen_att_all_label_' + str(iters) + '.npy', exclude_na_flatten_label) print 'average_precision: ' + str(average_precision) f.write(str(average_precision) + '\n') f.close()
def main(_): pathname = './model/PCNN_model-' word_embedding = np.load('./data/vec.npy') test_settings = network.Settings() test_settings.vocab_size = 114044 test_settings.num_classes = test_num_classes = 53 test_settings.entity_count = test_entity_count = 262 * 9 with tf.Graph().as_default(): sess = tf.Session() with sess.as_default(): with tf.variable_scope('model'): mtest = network.PCNN(is_training=False, word_embeddings=word_embedding, settings=test_settings) saver = tf.train.Saver() def test_step(word_batch, pos1_batch, pos2_batch, y_batch): feed_dict = {} total_shape = [] total_num = 0 total_word = [[], [], []] total_pos1 = [[], [], []] total_pos2 = [[], [], []] for i in range(len(word_batch[0])): total_shape.append(total_num) total_num += len(word_batch[0][i]) for loc in range(3): for word in word_batch[loc][i]: total_word[loc].append(word) for pos1 in pos1_batch[loc][i]: total_pos1[loc].append(pos1) for pos2 in pos2_batch[loc][i]: total_pos2[loc].append(pos2) total_shape.append(total_num) total_shape = np.array(total_shape) feed_dict[mtest.total_shape] = np.array(total_shape) feed_dict[mtest.input_word_left] = np.array(total_word[0]) feed_dict[mtest.input_word_mid] = np.array(total_word[1]) feed_dict[mtest.input_word_right] = np.array(total_word[2]) feed_dict[mtest.input_pos1_left] = np.array(total_pos1[0]) feed_dict[mtest.input_pos1_mid] = np.array(total_pos1[1]) feed_dict[mtest.input_pos1_right] = np.array(total_pos1[2]) feed_dict[mtest.input_pos2_left] = np.array(total_pos2[0]) feed_dict[mtest.input_pos2_mid] = np.array(total_pos2[1]) feed_dict[mtest.input_pos2_right] = np.array(total_pos2[2]) feed_dict[mtest.input_y] = y_batch loss, accuracy, prob = sess.run( [mtest.loss, mtest.accuracy, mtest.prob], feed_dict) return prob, accuracy def generate_prob(test_y, test_word, test_pos1, test_pos2, test_settings): all_prob = [] acc = [] entity_count = test_settings.entity_count for i in range(len(test_word[0]) // entity_count): prob, accuracy = test_step( slice_cascade_data(test_word, i * entity_count, (i + 1) * entity_count), slice_cascade_data(test_pos1, i * entity_count, (i + 1) * entity_count), slice_cascade_data(test_pos2, i * entity_count, (i + 1) * entity_count), test_y[i * test_entity_count:(i + 1) * entity_count]) acc.append( np.mean(np.reshape(np.array(accuracy), entity_count))) prob = np.reshape(np.array(prob), (entity_count, test_num_classes)) for single_prob in prob: all_prob.append(single_prob[1:]) all_prob = np.reshape(np.array(all_prob), (-1)) return all_prob def print_pn(all_ans, all_prob): order = np.argsort(-all_prob) print('P@100:') top100 = order[:100] correct_num_100 = 0.0 for i in top100: if all_ans[i] == 1: correct_num_100 += 1.0 print(correct_num_100 / 100) print('P@200:') top200 = order[:200] correct_num_200 = 0.0 for i in top200: if all_ans[i] == 1: correct_num_200 += 1.0 print(correct_num_200 / 200) print('P@300:') top300 = order[:300] correct_num_300 = 0.0 for i in top300: if all_ans[i] == 1: correct_num_300 += 1.0 print(correct_num_300 / 300) def eval_pn(test_y, test_word, test_pos1, test_pos2, test_settings): all_prob = generate_prob(test_y, test_word, test_pos1, test_pos2, test_settings) eval_y = [] for i in test_y: eval_y.append(i[1:]) all_ans = np.reshape(eval_y, -1) print_pn(all_ans, all_prob) test_list = [17000] for model_iter in test_list: saver.restore(sess, pathname + str(model_iter)) print('Restore Complete') print('Evaluating P@N for iter' + str(model_iter)) print('Evaluating P@N for one:') test_y = np.load('./data/pone_test_y.npy') test_word = np.load('./data/pone_test_word.npy') test_pos1 = np.load('./data/pone_test_pos1.npy') test_pos2 = np.load('./data/pone_test_pos2.npy') c_word, c_pos1, c_pos2, c_y = context_split( test_word, test_pos1, test_pos2, test_y) eval_pn(c_y, c_word, c_pos1, c_pos2, test_settings) print('Evaluating P@N for two:') test_y = np.load('./data/ptwo_test_y.npy') test_word = np.load('./data/ptwo_test_word.npy') test_pos1 = np.load('./data/ptwo_test_pos1.npy') test_pos2 = np.load('./data/ptwo_test_pos2.npy') c_word, c_pos1, c_pos2, c_y = context_split( test_word, test_pos1, test_pos2, test_y) eval_pn(c_y, c_word, c_pos1, c_pos2, test_settings) print('Evaluating P@N for all:') test_y = np.load('./data/pall_test_y.npy') test_word = np.load('./data/pall_test_word.npy') test_pos1 = np.load('./data/pall_test_pos1.npy') test_pos2 = np.load('./data/pall_test_pos2.npy') c_word, c_pos1, c_pos2, c_y = context_split( test_word, test_pos1, test_pos2, test_y) eval_pn(c_y, c_word, c_pos1, c_pos2, test_settings) time_str = datetime.datetime.now().isoformat() print(time_str) print('Evaluating all test data and save data for PR curve') test_y = np.load('./data/testall_y.npy') test_word = np.load('./data/testall_word.npy') test_pos1 = np.load('./data/testall_pos1.npy') test_pos2 = np.load('./data/testall_pos2.npy') c_word, c_pos1, c_pos2, c_y = context_split( test_word, test_pos1, test_pos2, test_y) print('Sanity Check') print(len(c_word[0]), len(c_y)) all_prob_ = generate_prob(c_y, c_word, c_pos1, c_pos2, test_settings) # all_ans_ = np.load('./data/allans.npy') eval_y = [] for i in c_y: eval_y.append(i[1:]) all_ans_ = np.reshape(eval_y, -1) print('P@N for all test data:') print_pn(all_ans_, all_prob_) print('saving all test result...') current_step = model_iter np.save('./out/all_prob_iter_' + str(current_step) + '.npy', all_prob_) # print(np.shape(all_prob_), np.shape(all_ans_)) # length of all_prob_ is shorter than all_ans_ # because of batching all_ans_trimmed = all_ans_[:all_prob_.size] avg_precision = average_precision_score( all_ans_trimmed, all_prob_) print('PR curve area:', str(avg_precision)) time_str = datetime.datetime.now().isoformat() print(time_str)
def main(_): print ('reading word embedding') word_vec = np.load(export_path + 'vec.npy') if use_embedding else None print ('reading training data') instance_triple = np.load(export_path + 'train_instance_triple.npy') instance_scope = np.load(export_path + 'train_instance_scope.npy') train_len = np.load(export_path + 'train_len.npy') train_label = np.load(export_path + 'train_label.npy') # relation idx for each sample train_word = np.load(export_path + 'train_word.npy') train_pos1 = np.load(export_path + 'train_pos1.npy') train_pos2 = np.load(export_path + 'train_pos2.npy') train_mask = np.load(export_path + 'train_mask.npy') train_head = np.load(export_path + 'train_head.npy') train_tail = np.load(export_path + 'train_tail.npy') print ('reading finished') print ('mentions : %d' % (len(instance_triple))) print ('sentences : %d' % (len(train_len))) print ('relations : %d' % (FLAGS.num_classes)) print ('position size : %d' % (FLAGS.pos_size)) print ('hidden size : %d' % (FLAGS.hidden_size)) # train_label: all sample's relation idx # count different relations numbers in all samples, give different weights reltot = {} for index, i in enumerate(train_label): if not i in reltot: reltot[i] = 1.0 else: reltot[i] += 1.0 for i in reltot: reltot[i] = 1/(reltot[i] ** (0.05)) print ('building network...') sess = tf.Session() if FLAGS.model.lower() == "cnn": model = network.CNN(use_embedding,embedding_size,is_training = True,word_embeddings = word_vec) elif FLAGS.model.lower() == "pcnn": model = network.PCNN(use_embedding,embedding_size,is_training = True,word_embeddings = word_vec) elif FLAGS.model.lower() == "lstm": model = network.RNN(use_embedding,embedding_size,is_training = True,word_embeddings = word_vec, cell_name = "LSTM", simple_position = True) elif FLAGS.model.lower() == "gru": model = network.RNN(use_embedding,embedding_size,is_training = True,word_embeddings = word_vec, cell_name = "GRU", simple_position = True) elif FLAGS.model.lower() == "bi-lstm" or FLAGS.model.lower() == "bilstm": model = network.BiRNN(use_embedding,embedding_size,is_training = True, word_embeddings = word_vec, cell_name = "LSTM", simple_position = True) elif FLAGS.model.lower() == "bi-gru" or FLAGS.model.lower() == "bigru": model = network.BiRNN(use_embedding,embedding_size,is_training = True, word_embeddings = word_vec, cell_name = "GRU", simple_position = True) # model once sure, just one model, different train_epoch, optimizer. global_step = tf.Variable(0,name='global_step',trainable=False) global_step_kg = tf.Variable(0,name='global_step_kg',trainable=False) tf.summary.scalar('learning_rate', FLAGS.learning_rate) tf.summary.scalar('learning_rate_kg', FLAGS.learning_rate_kg) # text op optimizer = tf.train.GradientDescentOptimizer(FLAGS.learning_rate) # sgd grads_and_vars = optimizer.compute_gradients(model.loss) train_op = optimizer.apply_gradients(grads_and_vars, global_step = global_step) # kg op optimizer_kg = tf.train.GradientDescentOptimizer(FLAGS.learning_rate_kg) grads_and_vars_kg = optimizer_kg.compute_gradients(model.loss_kg) train_op_kg = optimizer_kg.apply_gradients(grads_and_vars_kg, global_step = global_step_kg) merged_summary = tf.summary.merge_all() summary_writer = tf.summary.FileWriter(FLAGS.summary_dir, sess.graph) sess.run(tf.global_variables_initializer()) #saver = tf.train.Saver(max_to_keep=None) saver = tf.train.Saver() print ('building finished') def train_kg(coord): #train_step_kg(ph, pt, pr, nh, nt, nr),from c++ def train_step_kg(pos_h_batch, pos_t_batch, pos_r_batch, neg_h_batch, neg_t_batch, neg_r_batch): feed_dict = { model.pos_h: pos_h_batch, model.pos_t: pos_t_batch, model.pos_r: pos_r_batch, model.neg_h: neg_h_batch, model.neg_t: neg_t_batch, model.neg_r: neg_r_batch } _, step, loss = sess.run( [train_op_kg, global_step_kg, model.loss_kg], feed_dict) return loss batch_size = (int(FLAGS.tri_total) // int(FLAGS.nbatch_kg)) # 100,600/ 300,200 /200,300 / 1000,60 #batch_size = (int(FLAGS.ent_total) // int(FLAGS.nbatch_kg)) # should not be FLAGS.tri_total # B. defi each element is np32, 32 wei ph = np.zeros(batch_size, dtype = np.int32) # use to store batch's ex e1 pt = np.zeros(batch_size, dtype = np.int32) # e2 pr = np.zeros(batch_size, dtype = np.int32) # r nh = np.zeros(batch_size, dtype = np.int32) # n_e1 nt = np.zeros(batch_size, dtype = np.int32) # n_e2 nr = np.zeros(batch_size, dtype = np.int32) # n_r #ph.__array_interface__['data'] :2-tuple whose first argument is an integer (a long integer if necessary) that points to the data-area storing the array contents # array's first element's address ph_addr = ph.__array_interface__['data'][0] pt_addr = pt.__array_interface__['data'][0] pr_addr = pr.__array_interface__['data'][0] nh_addr = nh.__array_interface__['data'][0] nt_addr = nt.__array_interface__['data'][0] nr_addr = nr.__array_interface__['data'][0] # define type in c # ctypes.c_void_p==void * ctypes.c_int=int lib.getBatch.argtypes = [ctypes.c_void_p, ctypes.c_void_p, ctypes.c_void_p, ctypes.c_void_p, ctypes.c_void_p, ctypes.c_void_p, ctypes.c_int] times_kg = 0 # coord.request_stop() let it stop at some time # pure batch. B. continuelly train batch. no concept of epoch. just have a size while not coord.should_stop(): times_kg += 1 res = 0.0 #print(type(FLAGS.nbatch_kg)) #print(FLAGS.nbatch_kg) for batch in range(int(FLAGS.nbatch_kg)): lib.getBatch(ph_addr, pt_addr, pr_addr, nh_addr, nt_addr, nr_addr, batch_size) res += train_step_kg(ph, pt, pr, nh, nt, nr) time_str = datetime.datetime.now().isoformat() print ("KB batch %d time %s | loss : %f" % (times_kg, time_str, res)) if pure_KB and times_kg % 20000 == 0: print ('saving model...') # path = saver.save(sess,FLAGS.model_dir+FLAGS.model+str(FLAGS.katt_flag)+transX, global_step=current_step) path = saver.save(sess,FLAGS.model_dir+FLAGS.model+str(FLAGS.katt_flag)+transX+'_pureKB_'+str(pure_KB)+'_epoch'+str(times_kg)+'_nkb'+str(FLAGS.nbatch_kg)+'_win'+str(FLAGS.win_size)+'_'+str(embedding_size)) # tf.app.flags.DEFINE_string('model_dir','./model/','path to store model') # tf.app.flags.DEFINE_string('checkpoint_path','./model/','path to store model') # saver.restore(sess, FLAGS.checkpoint_path + FLAGS.model+str(FLAGS.katt_flag)+"-"+str(3664*iters)) print ('have savde model to '+path) if pure_KB and times_kg==160000: coord.request_stop() def train_nn(coord): # train_step(train_head[index], train_tail[index], train_word[index,:], train_pos1[index,:], train_pos2[index,:], train_mask[index,:], train_len[index],train_label[index], label_, np.array(scope), weights) # all from numpy def train_step(head, tail, word, pos1, pos2, mask, leng, label_index, label, scope, weights): feed_dict = { model.head_index: head, model.tail_index: tail, model.word: word, model.pos1: pos1, model.pos2: pos2, model.mask: mask, model.len : leng, model.label_index: label_index, # B, real relation idx model.label: label, # B,|R|. real pos value 1. other pos 0 model.scope: scope, model.keep_prob: FLAGS.keep_prob, model.weights: weights } _, step, loss, summary, output, correct_predictions = sess.run([train_op, global_step, model.loss, merged_summary, model.output, model.correct_predictions], feed_dict) summary_writer.add_summary(summary, step) return output, loss, correct_predictions train_order = list(range(len(instance_triple))) save_epoch = 150 for one_epoch in range(FLAGS.max_epoch): print('epoch '+str(one_epoch+1)+' starts!') np.random.shuffle(train_order) s1 = 0.0 s2 = 0.0 tot1 = 0.0 tot2 = 0.0 losstot = 0.0 for i in range(int(len(train_order)/float(FLAGS.batch_size))): input_scope = np.take(instance_scope, train_order[i * FLAGS.batch_size:(i+1)*FLAGS.batch_size], axis=0) index = [] scope = [0] label = [] # sample's true relation idx weights = [] for num in input_scope: index = index + list(range(num[0], num[1] + 1)) label.append(train_label[num[0]]) if train_label[num[0]] > 53: pass scope.append(scope[len(scope)-1] + num[1] - num[0] + 1) weights.append(reltot[train_label[num[0]]]) label_ = np.zeros((FLAGS.batch_size, FLAGS.num_classes)) label_[np.arange(FLAGS.batch_size), label] = 1 # correct_predictions:B, if each sample predict true(1), else 0 cnn's output output, loss, correct_predictions = train_step(train_head[index], train_tail[index], train_word[index,:], train_pos1[index,:], train_pos2[index,:], train_mask[index,:], train_len[index],train_label[index], label_, np.array(scope), weights) num = 0 s = 0 losstot += loss for num in correct_predictions: # if label[s] == 0: # tot1 += 1.0 # if num: # s1+= 1.0 tot2 += 1.0 if num: s2 += 1.0 s = s + 1 time_str = datetime.datetime.now().isoformat() print ("epoch %d batch %d time %s | loss : %f, accuracy: %f" % (one_epoch, i, time_str, loss, s2 / tot2)) current_step = tf.train.global_step(sess, global_step) if (one_epoch + 1) % save_epoch == 0: print ('epoch '+str(one_epoch+1)+' has finished') print ('saving model...') # path = saver.save(sess,FLAGS.model_dir+FLAGS.model+str(FLAGS.katt_flag)+transX, global_step=current_step) path = saver.save(sess,FLAGS.model_dir+FLAGS.model+str(FLAGS.katt_flag)+transX+'_pureKB_'+str(pure_KB)+'_epoch'+str(one_epoch)+'_nkb'+str(FLAGS.nbatch_kg)+'_win'+str(FLAGS.win_size)+'_'+str(embedding_size)) # tf.app.flags.DEFINE_string('model_dir','./model/','path to store model') # tf.app.flags.DEFINE_string('checkpoint_path','./model/','path to store model') # saver.restore(sess, FLAGS.checkpoint_path + FLAGS.model+str(FLAGS.katt_flag)+"-"+str(3664*iters)) print ('have savde model to '+path) coord.request_stop() coord = tf.train.Coordinator() threads = [] threads.append(threading.Thread(target=train_kg, args=(coord,))) if not pure_KB: threads.append(threading.Thread(target=train_nn, args=(coord,))) for t in threads: t.start() coord.join(threads)
def main(_): print 'reading word embedding' word_vec = np.load(export_path + 'vec.npy') print 'reading training data' instance_triple = np.load(export_path + 'train_instance_triple.npy') instance_scope = np.load(export_path + 'train_instance_scope.npy') train_len = np.load(export_path + 'train_len.npy') train_label = np.load(export_path + 'train_label.npy') train_word = np.load(export_path + 'train_word.npy') train_pos1 = np.load(export_path + 'train_pos1.npy') train_pos2 = np.load(export_path + 'train_pos2.npy') train_mask = np.load(export_path + 'train_mask.npy') train_head = np.load(export_path + 'train_head.npy') train_tail = np.load(export_path + 'train_tail.npy') print 'reading finished' print 'mentions : %d' % (len(instance_triple)) print 'sentences : %d' % (len(train_len)) print 'relations : %d' % (FLAGS.num_classes) print 'word size : %d' % (len(word_vec[0])) print 'position size : %d' % (FLAGS.pos_size) print 'hidden size : %d' % (FLAGS.hidden_size) reltot = {} for index, i in enumerate(train_label): if not i in reltot: reltot[i] = 1.0 else: reltot[i] += 1.0 for i in reltot: reltot[i] = 1 / (reltot[i]**(0.05)) print 'building network...' sess = tf.Session() if FLAGS.model.lower() == "cnn": model = network.CNN(is_training=True, word_embeddings=word_vec) elif FLAGS.model.lower() == "pcnn": model = network.PCNN(is_training=True, word_embeddings=word_vec) elif FLAGS.model.lower() == "lstm": model = network.RNN(is_training=True, word_embeddings=word_vec, cell_name="LSTM", simple_position=True) elif FLAGS.model.lower() == "gru": model = network.RNN(is_training=True, word_embeddings=word_vec, cell_name="GRU", simple_position=True) elif FLAGS.model.lower() == "bi-lstm" or FLAGS.model.lower() == "bilstm": model = network.BiRNN(is_training=True, word_embeddings=word_vec, cell_name="LSTM", simple_position=True) elif FLAGS.model.lower() == "bi-gru" or FLAGS.model.lower() == "bigru": model = network.BiRNN(is_training=True, word_embeddings=word_vec, cell_name="GRU", simple_position=True) global_step = tf.Variable(0, name='global_step', trainable=False) global_step_kg = tf.Variable(0, name='global_step_kg', trainable=False) tf.summary.scalar('learning_rate', FLAGS.learning_rate) tf.summary.scalar('learning_rate_kg', FLAGS.learning_rate_kg) optimizer = tf.train.GradientDescentOptimizer(FLAGS.learning_rate) grads_and_vars = optimizer.compute_gradients(model.loss) train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step) optimizer_kg = tf.train.GradientDescentOptimizer(FLAGS.learning_rate_kg) grads_and_vars_kg = optimizer_kg.compute_gradients(model.loss_kg) train_op_kg = optimizer_kg.apply_gradients(grads_and_vars_kg, global_step=global_step_kg) merged_summary = tf.summary.merge_all() summary_writer = tf.summary.FileWriter(FLAGS.summary_dir, sess.graph) sess.run(tf.global_variables_initializer()) saver = tf.train.Saver(max_to_keep=None) print("================================================") saver.restore( sess, FLAGS.model_dir + sys.argv[1] + FLAGS.model + str(FLAGS.katt_flag) + "-" + str(itera)) ent_embedding, rel_embedding = sess.run( [model.word_embedding, model.rel_embeddings]) ent_embedding = ent_embedding.tolist() rel_embedding = rel_embedding.tolist() f = open(export_path + "entity2vec", "w") f.write(json.dumps(ent_embedding)) f.close() f = open(export_path + "relation2vec", "w") f.write(json.dumps(rel_embedding)) f.close()