def main(): n_entities, headings, raw_data = currency_data() #random.seed(0) testing_weeks = int(0.75 * len(raw_data) / (24 * 7)) training_set, training_prices, test_set, test_prices = fix_data( headings, raw_data, testing_weeks) rnn = network.RNN(n_entities, int((len(raw_data[0]) - 2) / n_entities), 7 * 24, 1) rnn.train_to_max(training_set, training_prices, test_set, test_prices, 1000)
def main(): args = parse_args() # random seed affects np.random.choice below np.random.seed(args.seed) print 'Reading input' print '\t- Input file: %s' % (args.input) ds = dataset.Dataset(args.input, 50, 50) print 'Done reading input' print 'Building network' config = network.RNNConfig(ds.hash, args.nhidden, args.nlayers, ds.num_classes) rnn = network.RNN(config, 1, 1) chkpt_path = config.get_checkpoint_path(args.savedir) print '\t- Checkpoint path: %s' % (chkpt_path) print 'Done building network' print 'Initializing session' # Initializing the tensor flow variables init = tf.global_variables_initializer() with tf.Session() as sess: sess.run(init) saver = tf.train.Saver(tf.global_variables()) if tf.train.checkpoint_exists(chkpt_path): print '\t- Restoring graph from checkpoint' saver.restore(sess, chkpt_path) print 'Done initializing session' x = np.zeros((1, 1)) for cur in args.prime[:-1]: x[0, 0] = ds.encode(cur) feed = {rnn.inputs: x} probs = sess.run([rnn.probs], feed_dict=feed) symbol = np.random.choice(ds.num_classes, p=np.reshape(probs[0], ds.num_classes)) generated = args.prime cur = args.prime[-1] for i in range(args.length): x[0, 0] = ds.encode(cur) feed = {rnn.inputs: x} probs = sess.run([rnn.probs], feed_dict=feed) symbol = np.random.choice(np.arange(ds.num_classes), p=np.reshape(probs[0], ds.num_classes)) next = ds.decode(symbol) generated += next cur = next print generated
def main(_): print('reading word embedding') word_vec = np.load(export_path + 'vec.npy') print('reading entity embedding') ent_embedding = np.load(export_path + 'ent_embedding.npy') print('reading relation embedding') rel_embedding = np.load(export_path + 'rel_embedding.npy') print('reading test data') test_instance_triple = np.load(export_path + 'test_instance_triple.npy') test_instance_scope = np.load(export_path + 'test_instance_scope.npy') test_len = np.load(export_path + 'test_len.npy') test_label = np.load(export_path + 'test_label.npy') test_word = np.load(export_path + 'test_word.npy') test_pos1 = np.load(export_path + 'test_pos1.npy') test_pos2 = np.load(export_path + 'test_pos2.npy') test_mask = np.load(export_path + 'test_mask.npy') test_head = np.load(export_path + 'test_head.npy') test_tail = np.load(export_path + 'test_tail.npy') train_desc_tail = np.load(export_path + 'test_desc_tail.npy') train_desc_head = np.load(export_path + 'test_desc_head.npy') print('reading finished') print('mentions : %d' % (len(test_instance_triple))) print('sentences : %d' % (len(test_len))) print('relations : %d' % (FLAGS.num_classes)) print('word size : %d' % (len(word_vec[0]))) print('position size : %d' % (FLAGS.pos_size)) print('hidden size : %d' % (FLAGS.hidden_size)) print('reading finished') # desc = {} # with open(export_path + 'desc.txt') as f: # for content in f: # en_id, en_desc = content.strip().split('\t') # en_desc = en_desc.strip().split(',') # en_desc = [int(word) for word in en_desc] # desc[int(en_id)] = en_desc print('building network...') sess_db = tf.Session() # sess_db = tf_debug.LocalCLIDebugWrapperSession(sess) # sess_db.add_tensor_filter('has_inf_or_nan',tf_debug.has_inf_or_nan) merged_summary = tf.summary.merge_all() global_step = tf.Variable(0, name='global_step', trainable=False) if FLAGS.model.lower() == "cnn": model = network.CNN(is_training=False, word_embeddings=word_vec, ent_embedding=ent_embedding, rel_embedding=rel_embedding) elif FLAGS.model.lower() == "pcnn": model = network.PCNN(is_training=False, word_embeddings=word_vec, ent_embedding=ent_embedding, rel_embedding=rel_embedding) elif FLAGS.model.lower() == "lstm": model = network.RNN(is_training=False, word_embeddings=word_vec, cell_name="LSTM", simple_position=True) elif FLAGS.model.lower() == "gru": model = network.RNN(is_training=False, word_embeddings=word_vec, cell_name="GRU", simple_position=True) elif FLAGS.model.lower() == "bi-lstm" or FLAGS.model.lower() == "bilstm": model = network.BiRNN(is_training=False, word_embeddings=word_vec, cell_name="LSTM", simple_position=True) elif FLAGS.model.lower() == "bi-gru" or FLAGS.model.lower() == "bigru": model = network.BiRNN(is_training=False, word_embeddings=word_vec, cell_name="GRU", simple_position=True) sess_db.run(tf.global_variables_initializer()) saver = tf.train.Saver() def test_step(head, tail, word, pos1, pos2, mask, leng, label_index, label, scope, head_desc, tail_desc): feed_dict = { model.head_index: head, model.tail_index: tail, model.word: word, model.pos1: pos1, model.pos2: pos2, model.mask: mask, model.len: leng, model.label_index: label_index, model.label: label, model.scope: scope, model.keep_prob: FLAGS.keep_prob, model.head_description: head_desc, model.tail_description: tail_desc } if FLAGS.katt_flag == 1: output, head_desc_att, tail_desc_att = sess_db.run( [model.test_output, model.head_desc_att, model.tail_desc_att], feed_dict) else: output = sess_db.run(model.test_output, feed_dict) # np.save('./case_study/head_desc_att',head_desc_att) # np.save('./case_study/tail_desc_att',tail_desc_att) # output = sess_db.run(model.test_output, feed_dict) return output f = open('results.txt', 'w') f.write('iteration\taverage precision\tP@100\tP@300\tP@500\n') for iters in range(1, 15): print(iters) saver.restore( sess_db, FLAGS.checkpoint_path + FLAGS.save_name + '/' + FLAGS.model + str(FLAGS.katt_flag) + "-" + str(80 * iters)) summary_writer = tf.summary.FileWriter(FLAGS.summary_dir, sess_db.graph) stack_output = [] stack_label = [] iteration = len(test_instance_scope) // FLAGS.test_batch_size for i in range(iteration): temp_str = 'running ' + str(i) + '/' + str(iteration) + '...' sys.stdout.write(temp_str + '\r') sys.stdout.flush() input_scope = test_instance_scope[i * FLAGS.test_batch_size:(i + 1) * FLAGS.test_batch_size] index = [] scope = [0] label = [] # print('input_scope:',input_scope) for num in input_scope: index = index + list(range(num[0], num[1] + 1)) label.append(test_label[num[0]]) scope.append(scope[len(scope) - 1] + num[1] - num[0] + 1) label_ = np.zeros((FLAGS.test_batch_size, FLAGS.num_classes)) label_[np.arange(FLAGS.test_batch_size), label] = 1 output = test_step(test_head[index], test_tail[index], test_word[index, :], test_pos1[index, :], test_pos2[index, :], test_mask[index, :], test_len[index], test_label[index], label_, np.array(scope), train_desc_head[index], train_desc_tail[index]) stack_output.append(output) stack_label.append(label_) # print('attention score:',np.shape(attention_score)) # np.save('attention_scpre',attention_score) print('evaluating...') # print(stack_output) # ff = open('attention.txt','w') # ff.write(attention_score) # ff.close() stack_output = np.concatenate(stack_output, axis=0) stack_label = np.concatenate(stack_label, axis=0) exclude_na_flatten_output = stack_output[:, 1:] exclude_na_flatten_label = stack_label[:, 1:] print(exclude_na_flatten_output.shape) print(exclude_na_flatten_label.shape) # print (exclude_na_flatten_output) np.save( './' + 'model' + str(FLAGS.alpha) + '/' + FLAGS.model + '+sen_att_all_prob_' + str(iters) + '.npy', exclude_na_flatten_output) np.save( './' + 'model' + str(FLAGS.alpha) + '/' + FLAGS.model + '+sen_att_all_label_' + str(iters) + '.npy', exclude_na_flatten_label) average_precision = average_precision_score(exclude_na_flatten_label, exclude_na_flatten_output, average="micro") exclude_na_flatten_label = np.reshape(exclude_na_flatten_label, -1) exclude_na_flatten_output = np.reshape(exclude_na_flatten_output, -1) order = np.argsort(-exclude_na_flatten_output) p_100 = np.mean(exclude_na_flatten_label[order[:100]]) p_300 = np.mean(exclude_na_flatten_label[order[:300]]) p_500 = np.mean(exclude_na_flatten_label[order[:500]]) print('pr: ' + str(average_precision)) print('p@100:' + str(p_100)) print('p@300:' + str(p_300)) print('p@500:' + str(p_500)) f.write( str(average_precision) + '\t' + str(p_100) + '\t' + str(p_300) + '\t' + str(p_500) + '\n') f.close()
def main(_): print 'reading word embedding' word_vec = np.load(export_path + 'vec.npy') print 'reading training data' instance_triple = np.load(export_path + 'train_instance_triple.npy') instance_scope = np.load(export_path + 'train_instance_scope.npy') train_len = np.load(export_path + 'train_len.npy') train_label = np.load(export_path + 'train_label.npy') train_word = np.load(export_path + 'train_word.npy') train_pos1 = np.load(export_path + 'train_pos1.npy') train_pos2 = np.load(export_path + 'train_pos2.npy') train_mask = np.load(export_path + 'train_mask.npy') train_head = np.load(export_path + 'train_head.npy') train_tail = np.load(export_path + 'train_tail.npy') print 'reading finished' print 'mentions : %d' % (len(instance_triple)) print 'sentences : %d' % (len(train_len)) print 'relations : %d' % (FLAGS.num_classes) print 'word size : %d' % (len(word_vec[0])) print 'position size : %d' % (FLAGS.pos_size) print 'hidden size : %d' % (FLAGS.hidden_size) reltot = {} for index, i in enumerate(train_label): if not i in reltot: reltot[i] = 1.0 else: reltot[i] += 1.0 for i in reltot: reltot[i] = 1 / (reltot[i]**(0.05)) print 'building network...' sess = tf.Session() if FLAGS.model.lower() == "cnn": model = network.CNN(is_training=True, word_embeddings=word_vec) elif FLAGS.model.lower() == "pcnn": model = network.PCNN(is_training=True, word_embeddings=word_vec) elif FLAGS.model.lower() == "lstm": model = network.RNN(is_training=True, word_embeddings=word_vec, cell_name="LSTM", simple_position=True) elif FLAGS.model.lower() == "gru": model = network.RNN(is_training=True, word_embeddings=word_vec, cell_name="GRU", simple_position=True) elif FLAGS.model.lower() == "bi-lstm" or FLAGS.model.lower() == "bilstm": model = network.BiRNN(is_training=True, word_embeddings=word_vec, cell_name="LSTM", simple_position=True) elif FLAGS.model.lower() == "bi-gru" or FLAGS.model.lower() == "bigru": model = network.BiRNN(is_training=True, word_embeddings=word_vec, cell_name="GRU", simple_position=True) global_step = tf.Variable(0, name='global_step', trainable=False) global_step_kg = tf.Variable(0, name='global_step_kg', trainable=False) global_step_kg_satt = tf.Variable(0, name='global_step_kg_satt', trainable=False) tf.summary.scalar('learning_rate', FLAGS.learning_rate) tf.summary.scalar('learning_rate_kg', FLAGS.learning_rate_kg) optimizer = tf.train.GradientDescentOptimizer(FLAGS.learning_rate) grads_and_vars = optimizer.compute_gradients(model.loss) train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step) optimizer_kg = tf.train.GradientDescentOptimizer(FLAGS.learning_rate_kg) grads_and_vars_kg = optimizer_kg.compute_gradients(model.loss_kg) train_op_kg = optimizer_kg.apply_gradients(grads_and_vars_kg, global_step=global_step_kg) optimizer_kg_satt = tf.train.GradientDescentOptimizer( FLAGS.learning_rate_kg) grads_and_vars_kg_satt = optimizer_kg_satt.compute_gradients( model.loss_kg_att) train_op_kg_satt = optimizer_kg_satt.apply_gradients( grads_and_vars_kg_satt, global_step=global_step_kg_satt) merged_summary = tf.summary.merge_all() summary_writer = tf.summary.FileWriter(FLAGS.summary_dir, sess.graph) sess.run(tf.global_variables_initializer()) saver = tf.train.Saver(max_to_keep=None) print 'building finished' def train_kg_att(coord): def train_step_kg_att(h_batch, t_batch, r_batch, r_scope, r_label): feed_dict = { model.pos_h: h_batch, model.pos_t: t_batch, model.pos_r: r_batch, model.r_scope: r_scope, model.r_label: r_label, model.r_length: np.array([len(r_label)]), } _, loss = sess.run([train_op_kg_satt, model.loss_kg_att], feed_dict) return loss def merge(head, tail, rel): hash = {} for (h, t, r) in zip(head, tail, rel): if r < FLAGS.num_classes: if not r in hash: hash[r] = [] hash[r].append((h, t)) rel = [] head = [] tail = [] rel_label = [] rel_config = [0] for r in hash: if len(hash[r]) != 0: rel_config.append(rel_config[-1]) rel_label.append(r) for h, t in hash[r]: rel_config[-1] += 1 head.append(h) tail.append(t) rel.append(r) return np.array(head), np.array(tail), np.array(rel), np.array( rel_config), np.array(rel_label) batch_size = (FLAGS.tri_total / FLAGS.nbatch_kg) ph = np.zeros(batch_size, dtype=np.int32) pt = np.zeros(batch_size, dtype=np.int32) pr = np.zeros(batch_size, dtype=np.int32) nh = np.zeros(batch_size, dtype=np.int32) nt = np.zeros(batch_size, dtype=np.int32) nr = np.zeros(batch_size, dtype=np.int32) ph_addr = ph.__array_interface__['data'][0] pt_addr = pt.__array_interface__['data'][0] pr_addr = pr.__array_interface__['data'][0] nh_addr = nh.__array_interface__['data'][0] nt_addr = nt.__array_interface__['data'][0] nr_addr = nr.__array_interface__['data'][0] lib.getBatch.argtypes = [ ctypes.c_void_p, ctypes.c_void_p, ctypes.c_void_p, ctypes.c_void_p, ctypes.c_void_p, ctypes.c_void_p, ctypes.c_int ] times_kg = 0 while not coord.should_stop(): times_kg += 1 # if times_kg == 3000: # coord.request_stop() res = 0.0 for batch in range(FLAGS.nbatch_kg): lib.getBatch(ph_addr, pt_addr, pr_addr, nh_addr, nt_addr, nr_addr, batch_size) h, t, r, r_range, r_label = merge(ph, pt, pr) res += train_step_kg_att(h, t, r, r_range, r_label) time_str = datetime.datetime.now().isoformat() print "batch %d time %s | loss : %f" % (times_kg, time_str, res) def train_kg(coord): def train_step_kg(pos_h_batch, pos_t_batch, pos_r_batch, neg_h_batch, neg_t_batch, neg_r_batch): feed_dict = { model.pos_h: pos_h_batch, model.pos_t: pos_t_batch, model.pos_r: pos_r_batch, model.neg_h: neg_h_batch, model.neg_t: neg_t_batch, model.neg_r: neg_r_batch } _, step, loss = sess.run( [train_op_kg, global_step_kg, model.loss_kg], feed_dict) return loss batch_size = (FLAGS.tri_total / FLAGS.nbatch_kg) ph = np.zeros(batch_size, dtype=np.int32) pt = np.zeros(batch_size, dtype=np.int32) pr = np.zeros(batch_size, dtype=np.int32) nh = np.zeros(batch_size, dtype=np.int32) nt = np.zeros(batch_size, dtype=np.int32) nr = np.zeros(batch_size, dtype=np.int32) ph_addr = ph.__array_interface__['data'][0] pt_addr = pt.__array_interface__['data'][0] pr_addr = pr.__array_interface__['data'][0] nh_addr = nh.__array_interface__['data'][0] nt_addr = nt.__array_interface__['data'][0] nr_addr = nr.__array_interface__['data'][0] lib.getBatch.argtypes = [ ctypes.c_void_p, ctypes.c_void_p, ctypes.c_void_p, ctypes.c_void_p, ctypes.c_void_p, ctypes.c_void_p, ctypes.c_int ] times_kg = 0 while not coord.should_stop(): times_kg += 1 # if times_kg == 3000: # coord.request_stop() res = 0.0 for batch in range(FLAGS.nbatch_kg): lib.getBatch(ph_addr, pt_addr, pr_addr, nh_addr, nt_addr, nr_addr, batch_size) res += train_step_kg(ph, pt, pr, nh, nt, nr) time_str = datetime.datetime.now().isoformat() print "batch %d time %s | loss : %f" % (times_kg, time_str, res) def train_nn(coord): def train_step(head, tail, word, pos1, pos2, mask, leng, label_index, label, scope, weights): feed_dict = { model.head_index: head, model.tail_index: tail, model.word: word, model.pos1: pos1, model.pos2: pos2, model.mask: mask, model.len: leng, model.label_index: label_index, model.label: label, model.scope: scope, model.keep_prob: FLAGS.keep_prob, model.weights: weights } _, step, loss, summary, output, correct_predictions = sess.run([ train_op, global_step, model.loss, merged_summary, model.output, model.correct_predictions ], feed_dict) summary_writer.add_summary(summary, step) return output, loss, correct_predictions stack_output = [] stack_label = [] stack_ce_loss = [] train_order = range(len(instance_triple)) save_epoch = 2 eval_step = 300 for one_epoch in range(FLAGS.max_epoch): print('epoch ' + str(one_epoch + 1) + ' starts!') np.random.shuffle(train_order) s1 = 0.0 s2 = 0.0 tot1 = 1.0 tot2 = 1.0 losstot = 0.0 for i in range(int(len(train_order) / float(FLAGS.batch_size))): input_scope = np.take( instance_scope, train_order[i * FLAGS.batch_size:(i + 1) * FLAGS.batch_size], axis=0) index = [] scope = [0] label = [] weights = [] for num in input_scope: index = index + range(num[0], num[1] + 1) label.append(train_label[num[0]]) scope.append(scope[len(scope) - 1] + num[1] - num[0] + 1) weights.append(reltot[train_label[num[0]]]) label_ = np.zeros((FLAGS.batch_size, FLAGS.num_classes)) label_[np.arange(FLAGS.batch_size), label] = 1 output, loss, correct_predictions = train_step( train_head[index], train_tail[index], train_word[index, :], train_pos1[index, :], train_pos2[index, :], train_mask[index, :], train_len[index], train_label[index], label_, np.array(scope), weights) num = 0 s = 0 losstot += loss for num in correct_predictions: if label[s] == 0: tot1 += 1.0 if num: s1 += 1.0 else: tot2 += 1.0 if num: s2 += 1.0 s = s + 1 time_str = datetime.datetime.now().isoformat() # print "batch %d step %d time %s | loss : %f, NA accuracy: %f, not NA accuracy: %f" % (one_epoch, i, time_str, loss, s1 / tot1, s2 / tot2) current_step = tf.train.global_step(sess, global_step) if (one_epoch + 1) % save_epoch == 0: print 'epoch ' + str(one_epoch + 1) + ' has finished' print 'saving model...' path = saver.save(sess, FLAGS.model_dir + FLAGS.model + str(FLAGS.katt_flag), global_step=current_step) coord.request_stop() coord = tf.train.Coordinator() threads = [] threads.append(threading.Thread(target=train_kg, args=(coord, ))) threads.append(threading.Thread(target=train_nn, args=(coord, ))) threads.append(threading.Thread(target=train_kg_att, args=(coord, ))) for t in threads: t.start() coord.join(threads) if (FLAGS.store_kg_flag != 0): print 'saving kg...' ent_embedding, rel_embedding = sess.run( [model.word_embedding, model.rel_embeddings]) ent_embedding = ent_embedding.tolist() rel_embedding = rel_embedding.tolist() f = open("entity2vec", "w") f.write(json.dumps(ent_embedding)) f.close() f = open("relation2vec", "w") f.write(json.dumps(rel_embedding)) f.close()
def main(_): print 'reading word embedding' word_vec = np.load(export_path + 'vec.npy') print 'reading test data' test_instance_triple = np.load(export_path + 'test_instance_triple.npy') test_instance_scope = np.load(export_path + 'test_instance_scope.npy') test_len = np.load(export_path + 'test_len.npy') test_label = np.load(export_path + 'test_label.npy') test_word = np.load(export_path + 'test_word.npy') test_pos1 = np.load(export_path + 'test_pos1.npy') test_pos2 = np.load(export_path + 'test_pos2.npy') test_mask = np.load(export_path + 'test_mask.npy') test_head = np.load(export_path + 'test_head.npy') test_tail = np.load(export_path + 'test_tail.npy') print 'reading finished' print 'mentions : %d' % (len(test_instance_triple)) print 'sentences : %d' % (len(test_len)) print 'relations : %d' % (FLAGS.num_classes) print 'word size : %d' % (len(word_vec[0])) print 'position size : %d' % (FLAGS.pos_size) print 'hidden size : %d' % (FLAGS.hidden_size) print 'reading finished' print 'building network...' sess = tf.Session() if FLAGS.model.lower() == "cnn": model = network.CNN(is_training=False, word_embeddings=word_vec) elif FLAGS.model.lower() == "pcnn": model = network.PCNN(is_training=False, word_embeddings=word_vec) elif FLAGS.model.lower() == "lstm": model = network.RNN(is_training=False, word_embeddings=word_vec, cell_name="LSTM", simple_position=True) elif FLAGS.model.lower() == "gru": model = network.RNN(is_training=False, word_embeddings=word_vec, cell_name="GRU", simple_position=True) elif FLAGS.model.lower() == "bi-lstm" or FLAGS.model.lower() == "bilstm": model = network.BiRNN(is_training=False, word_embeddings=word_vec, cell_name="LSTM", simple_position=True) elif FLAGS.model.lower() == "bi-gru" or FLAGS.model.lower() == "bigru": model = network.BiRNN(is_training=False, word_embeddings=word_vec, cell_name="GRU", simple_position=True) sess.run(tf.global_variables_initializer()) saver = tf.train.Saver() def test_step(head, tail, word, pos1, pos2, mask, leng, label_index, label, scope): feed_dict = { model.head_index: head, model.tail_index: tail, model.word: word, model.pos1: pos1, model.pos2: pos2, model.mask: mask, model.len: leng, model.label_index: label_index, model.label: label, model.scope: scope, model.keep_prob: FLAGS.keep_prob } output = sess.run(model.test_output, feed_dict) return output FLAGS.test_batch_size, FLAGS.num_classes = int(FLAGS.test_batch_size), int( FLAGS.num_classes) f = open('results.txt', 'w') f.write('iteration\taverage precision\n') for iters in range(1, 30): print iters saver.restore( sess, FLAGS.checkpoint_path + FLAGS.model + str(int(FLAGS.katt_flag)) + "-" + str(3664 * iters)) stack_output = [] stack_label = [] iteration = len(test_instance_scope) / FLAGS.test_batch_size for i in range(int(iteration)): temp_str = 'running ' + str(i) + '/' + str(iteration) + '...' sys.stdout.write(temp_str + '\r') sys.stdout.flush() input_scope = test_instance_scope[i * FLAGS.test_batch_size:(i + 1) * FLAGS.test_batch_size] index = [] scope = [0] label = [] for num in input_scope: index = index + range(num[0], num[1] + 1) label.append(test_label[num[0]]) scope.append(scope[len(scope) - 1] + num[1] - num[0] + 1) label_ = np.zeros((FLAGS.test_batch_size, FLAGS.num_classes)) label_[np.arange(FLAGS.test_batch_size), label] = 1 output = test_step(test_head[index], test_tail[index], test_word[index, :], test_pos1[index, :], test_pos2[index, :], test_mask[index, :], test_len[index], test_label[index], label_, np.array(scope)) stack_output.append(output) stack_label.append(label_) print 'evaluating...' stack_output = np.concatenate(stack_output, axis=0) stack_label = np.concatenate(stack_label, axis=0) exclude_na_flatten_output = stack_output[:, 1:] exclude_na_flatten_label = stack_label[:, 1:] print exclude_na_flatten_output.shape print exclude_na_flatten_label.shape average_precision = average_precision_score(exclude_na_flatten_label, exclude_na_flatten_output, average="micro") np.save( './' + FLAGS.model + '+sen_att_all_prob_' + str(iters) + '.npy', exclude_na_flatten_output) np.save( './' + FLAGS.model + '+sen_att_all_label_' + str(iters) + '.npy', exclude_na_flatten_label) print 'average_precision: ' + str(average_precision) f.write(str(average_precision) + '\n') f.close()
# ------------------------------------------------------------------- x = tf.placeholder("float", [None, config["n_steps"], config["n_input"]]) keep_prob = tf.placeholder("float") weights = { 'out': tf.get_variable("weights_1", shape=[config["n_hidden"], config["n_classes"]], initializer=tf.contrib.layers.xavier_initializer(), dtype=tf.float32), } biases = {'out': tf.Variable(tf.zeros([config["n_classes"]]))} pred = network.RNN(x, keep_prob, weights, biases, config) model_files = [ sys.argv[6], sys.argv[7], sys.argv[8], sys.argv[9], sys.argv[10] ] preds = [] with tf.Session() as sess: for f in model_files: saver = tf.train.Saver() saver.restore(sess, f + "/60000.ckpt") preds.append( sess.run(pred, feed_dict={ x: predict_data, keep_prob: 1.0 })[0])
def main(_): print ('reading word embedding') word_vec = np.load(export_path + 'vec.npy') if use_embedding else None print ('reading training data') instance_triple = np.load(export_path + 'train_instance_triple.npy') instance_scope = np.load(export_path + 'train_instance_scope.npy') train_len = np.load(export_path + 'train_len.npy') train_label = np.load(export_path + 'train_label.npy') # relation idx for each sample train_word = np.load(export_path + 'train_word.npy') train_pos1 = np.load(export_path + 'train_pos1.npy') train_pos2 = np.load(export_path + 'train_pos2.npy') train_mask = np.load(export_path + 'train_mask.npy') train_head = np.load(export_path + 'train_head.npy') train_tail = np.load(export_path + 'train_tail.npy') print ('reading finished') print ('mentions : %d' % (len(instance_triple))) print ('sentences : %d' % (len(train_len))) print ('relations : %d' % (FLAGS.num_classes)) print ('position size : %d' % (FLAGS.pos_size)) print ('hidden size : %d' % (FLAGS.hidden_size)) # train_label: all sample's relation idx # count different relations numbers in all samples, give different weights reltot = {} for index, i in enumerate(train_label): if not i in reltot: reltot[i] = 1.0 else: reltot[i] += 1.0 for i in reltot: reltot[i] = 1/(reltot[i] ** (0.05)) print ('building network...') sess = tf.Session() if FLAGS.model.lower() == "cnn": model = network.CNN(use_embedding,embedding_size,is_training = True,word_embeddings = word_vec) elif FLAGS.model.lower() == "pcnn": model = network.PCNN(use_embedding,embedding_size,is_training = True,word_embeddings = word_vec) elif FLAGS.model.lower() == "lstm": model = network.RNN(use_embedding,embedding_size,is_training = True,word_embeddings = word_vec, cell_name = "LSTM", simple_position = True) elif FLAGS.model.lower() == "gru": model = network.RNN(use_embedding,embedding_size,is_training = True,word_embeddings = word_vec, cell_name = "GRU", simple_position = True) elif FLAGS.model.lower() == "bi-lstm" or FLAGS.model.lower() == "bilstm": model = network.BiRNN(use_embedding,embedding_size,is_training = True, word_embeddings = word_vec, cell_name = "LSTM", simple_position = True) elif FLAGS.model.lower() == "bi-gru" or FLAGS.model.lower() == "bigru": model = network.BiRNN(use_embedding,embedding_size,is_training = True, word_embeddings = word_vec, cell_name = "GRU", simple_position = True) # model once sure, just one model, different train_epoch, optimizer. global_step = tf.Variable(0,name='global_step',trainable=False) global_step_kg = tf.Variable(0,name='global_step_kg',trainable=False) tf.summary.scalar('learning_rate', FLAGS.learning_rate) tf.summary.scalar('learning_rate_kg', FLAGS.learning_rate_kg) # text op optimizer = tf.train.GradientDescentOptimizer(FLAGS.learning_rate) # sgd grads_and_vars = optimizer.compute_gradients(model.loss) train_op = optimizer.apply_gradients(grads_and_vars, global_step = global_step) # kg op optimizer_kg = tf.train.GradientDescentOptimizer(FLAGS.learning_rate_kg) grads_and_vars_kg = optimizer_kg.compute_gradients(model.loss_kg) train_op_kg = optimizer_kg.apply_gradients(grads_and_vars_kg, global_step = global_step_kg) merged_summary = tf.summary.merge_all() summary_writer = tf.summary.FileWriter(FLAGS.summary_dir, sess.graph) sess.run(tf.global_variables_initializer()) #saver = tf.train.Saver(max_to_keep=None) saver = tf.train.Saver() print ('building finished') def train_kg(coord): #train_step_kg(ph, pt, pr, nh, nt, nr),from c++ def train_step_kg(pos_h_batch, pos_t_batch, pos_r_batch, neg_h_batch, neg_t_batch, neg_r_batch): feed_dict = { model.pos_h: pos_h_batch, model.pos_t: pos_t_batch, model.pos_r: pos_r_batch, model.neg_h: neg_h_batch, model.neg_t: neg_t_batch, model.neg_r: neg_r_batch } _, step, loss = sess.run( [train_op_kg, global_step_kg, model.loss_kg], feed_dict) return loss batch_size = (int(FLAGS.tri_total) // int(FLAGS.nbatch_kg)) # 100,600/ 300,200 /200,300 / 1000,60 #batch_size = (int(FLAGS.ent_total) // int(FLAGS.nbatch_kg)) # should not be FLAGS.tri_total # B. defi each element is np32, 32 wei ph = np.zeros(batch_size, dtype = np.int32) # use to store batch's ex e1 pt = np.zeros(batch_size, dtype = np.int32) # e2 pr = np.zeros(batch_size, dtype = np.int32) # r nh = np.zeros(batch_size, dtype = np.int32) # n_e1 nt = np.zeros(batch_size, dtype = np.int32) # n_e2 nr = np.zeros(batch_size, dtype = np.int32) # n_r #ph.__array_interface__['data'] :2-tuple whose first argument is an integer (a long integer if necessary) that points to the data-area storing the array contents # array's first element's address ph_addr = ph.__array_interface__['data'][0] pt_addr = pt.__array_interface__['data'][0] pr_addr = pr.__array_interface__['data'][0] nh_addr = nh.__array_interface__['data'][0] nt_addr = nt.__array_interface__['data'][0] nr_addr = nr.__array_interface__['data'][0] # define type in c # ctypes.c_void_p==void * ctypes.c_int=int lib.getBatch.argtypes = [ctypes.c_void_p, ctypes.c_void_p, ctypes.c_void_p, ctypes.c_void_p, ctypes.c_void_p, ctypes.c_void_p, ctypes.c_int] times_kg = 0 # coord.request_stop() let it stop at some time # pure batch. B. continuelly train batch. no concept of epoch. just have a size while not coord.should_stop(): times_kg += 1 res = 0.0 #print(type(FLAGS.nbatch_kg)) #print(FLAGS.nbatch_kg) for batch in range(int(FLAGS.nbatch_kg)): lib.getBatch(ph_addr, pt_addr, pr_addr, nh_addr, nt_addr, nr_addr, batch_size) res += train_step_kg(ph, pt, pr, nh, nt, nr) time_str = datetime.datetime.now().isoformat() print ("KB batch %d time %s | loss : %f" % (times_kg, time_str, res)) if pure_KB and times_kg % 20000 == 0: print ('saving model...') # path = saver.save(sess,FLAGS.model_dir+FLAGS.model+str(FLAGS.katt_flag)+transX, global_step=current_step) path = saver.save(sess,FLAGS.model_dir+FLAGS.model+str(FLAGS.katt_flag)+transX+'_pureKB_'+str(pure_KB)+'_epoch'+str(times_kg)+'_nkb'+str(FLAGS.nbatch_kg)+'_win'+str(FLAGS.win_size)+'_'+str(embedding_size)) # tf.app.flags.DEFINE_string('model_dir','./model/','path to store model') # tf.app.flags.DEFINE_string('checkpoint_path','./model/','path to store model') # saver.restore(sess, FLAGS.checkpoint_path + FLAGS.model+str(FLAGS.katt_flag)+"-"+str(3664*iters)) print ('have savde model to '+path) if pure_KB and times_kg==160000: coord.request_stop() def train_nn(coord): # train_step(train_head[index], train_tail[index], train_word[index,:], train_pos1[index,:], train_pos2[index,:], train_mask[index,:], train_len[index],train_label[index], label_, np.array(scope), weights) # all from numpy def train_step(head, tail, word, pos1, pos2, mask, leng, label_index, label, scope, weights): feed_dict = { model.head_index: head, model.tail_index: tail, model.word: word, model.pos1: pos1, model.pos2: pos2, model.mask: mask, model.len : leng, model.label_index: label_index, # B, real relation idx model.label: label, # B,|R|. real pos value 1. other pos 0 model.scope: scope, model.keep_prob: FLAGS.keep_prob, model.weights: weights } _, step, loss, summary, output, correct_predictions = sess.run([train_op, global_step, model.loss, merged_summary, model.output, model.correct_predictions], feed_dict) summary_writer.add_summary(summary, step) return output, loss, correct_predictions train_order = list(range(len(instance_triple))) save_epoch = 150 for one_epoch in range(FLAGS.max_epoch): print('epoch '+str(one_epoch+1)+' starts!') np.random.shuffle(train_order) s1 = 0.0 s2 = 0.0 tot1 = 0.0 tot2 = 0.0 losstot = 0.0 for i in range(int(len(train_order)/float(FLAGS.batch_size))): input_scope = np.take(instance_scope, train_order[i * FLAGS.batch_size:(i+1)*FLAGS.batch_size], axis=0) index = [] scope = [0] label = [] # sample's true relation idx weights = [] for num in input_scope: index = index + list(range(num[0], num[1] + 1)) label.append(train_label[num[0]]) if train_label[num[0]] > 53: pass scope.append(scope[len(scope)-1] + num[1] - num[0] + 1) weights.append(reltot[train_label[num[0]]]) label_ = np.zeros((FLAGS.batch_size, FLAGS.num_classes)) label_[np.arange(FLAGS.batch_size), label] = 1 # correct_predictions:B, if each sample predict true(1), else 0 cnn's output output, loss, correct_predictions = train_step(train_head[index], train_tail[index], train_word[index,:], train_pos1[index,:], train_pos2[index,:], train_mask[index,:], train_len[index],train_label[index], label_, np.array(scope), weights) num = 0 s = 0 losstot += loss for num in correct_predictions: # if label[s] == 0: # tot1 += 1.0 # if num: # s1+= 1.0 tot2 += 1.0 if num: s2 += 1.0 s = s + 1 time_str = datetime.datetime.now().isoformat() print ("epoch %d batch %d time %s | loss : %f, accuracy: %f" % (one_epoch, i, time_str, loss, s2 / tot2)) current_step = tf.train.global_step(sess, global_step) if (one_epoch + 1) % save_epoch == 0: print ('epoch '+str(one_epoch+1)+' has finished') print ('saving model...') # path = saver.save(sess,FLAGS.model_dir+FLAGS.model+str(FLAGS.katt_flag)+transX, global_step=current_step) path = saver.save(sess,FLAGS.model_dir+FLAGS.model+str(FLAGS.katt_flag)+transX+'_pureKB_'+str(pure_KB)+'_epoch'+str(one_epoch)+'_nkb'+str(FLAGS.nbatch_kg)+'_win'+str(FLAGS.win_size)+'_'+str(embedding_size)) # tf.app.flags.DEFINE_string('model_dir','./model/','path to store model') # tf.app.flags.DEFINE_string('checkpoint_path','./model/','path to store model') # saver.restore(sess, FLAGS.checkpoint_path + FLAGS.model+str(FLAGS.katt_flag)+"-"+str(3664*iters)) print ('have savde model to '+path) coord.request_stop() coord = tf.train.Coordinator() threads = [] threads.append(threading.Thread(target=train_kg, args=(coord,))) if not pure_KB: threads.append(threading.Thread(target=train_nn, args=(coord,))) for t in threads: t.start() coord.join(threads)
def main(_): print 'reading word embedding' word_vec = np.load(export_path + 'vec.npy') print 'reading training data' instance_triple = np.load(export_path + 'train_instance_triple.npy') instance_scope = np.load(export_path + 'train_instance_scope.npy') train_len = np.load(export_path + 'train_len.npy') train_label = np.load(export_path + 'train_label.npy') train_word = np.load(export_path + 'train_word.npy') train_pos1 = np.load(export_path + 'train_pos1.npy') train_pos2 = np.load(export_path + 'train_pos2.npy') train_mask = np.load(export_path + 'train_mask.npy') train_head = np.load(export_path + 'train_head.npy') train_tail = np.load(export_path + 'train_tail.npy') print 'reading finished' print 'mentions : %d' % (len(instance_triple)) print 'sentences : %d' % (len(train_len)) print 'relations : %d' % (FLAGS.num_classes) print 'word size : %d' % (len(word_vec[0])) print 'position size : %d' % (FLAGS.pos_size) print 'hidden size : %d' % (FLAGS.hidden_size) reltot = {} for index, i in enumerate(train_label): if not i in reltot: reltot[i] = 1.0 else: reltot[i] += 1.0 for i in reltot: reltot[i] = 1 / (reltot[i]**(0.05)) print 'building network...' sess = tf.Session() if FLAGS.model.lower() == "cnn": model = network.CNN(is_training=True, word_embeddings=word_vec) elif FLAGS.model.lower() == "pcnn": model = network.PCNN(is_training=True, word_embeddings=word_vec) elif FLAGS.model.lower() == "lstm": model = network.RNN(is_training=True, word_embeddings=word_vec, cell_name="LSTM", simple_position=True) elif FLAGS.model.lower() == "gru": model = network.RNN(is_training=True, word_embeddings=word_vec, cell_name="GRU", simple_position=True) elif FLAGS.model.lower() == "bi-lstm" or FLAGS.model.lower() == "bilstm": model = network.BiRNN(is_training=True, word_embeddings=word_vec, cell_name="LSTM", simple_position=True) elif FLAGS.model.lower() == "bi-gru" or FLAGS.model.lower() == "bigru": model = network.BiRNN(is_training=True, word_embeddings=word_vec, cell_name="GRU", simple_position=True) global_step = tf.Variable(0, name='global_step', trainable=False) global_step_kg = tf.Variable(0, name='global_step_kg', trainable=False) tf.summary.scalar('learning_rate', FLAGS.learning_rate) tf.summary.scalar('learning_rate_kg', FLAGS.learning_rate_kg) optimizer = tf.train.GradientDescentOptimizer(FLAGS.learning_rate) grads_and_vars = optimizer.compute_gradients(model.loss) train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step) optimizer_kg = tf.train.GradientDescentOptimizer(FLAGS.learning_rate_kg) grads_and_vars_kg = optimizer_kg.compute_gradients(model.loss_kg) train_op_kg = optimizer_kg.apply_gradients(grads_and_vars_kg, global_step=global_step_kg) merged_summary = tf.summary.merge_all() summary_writer = tf.summary.FileWriter(FLAGS.summary_dir, sess.graph) sess.run(tf.global_variables_initializer()) saver = tf.train.Saver(max_to_keep=None) print("================================================") saver.restore( sess, FLAGS.model_dir + sys.argv[1] + FLAGS.model + str(FLAGS.katt_flag) + "-" + str(itera)) ent_embedding, rel_embedding = sess.run( [model.word_embedding, model.rel_embeddings]) ent_embedding = ent_embedding.tolist() rel_embedding = rel_embedding.tolist() f = open(export_path + "entity2vec", "w") f.write(json.dumps(ent_embedding)) f.close() f = open(export_path + "relation2vec", "w") f.write(json.dumps(rel_embedding)) f.close()
def test_rnn(): print "Testing RNN without minibatch..." # without minibatch for k in xrange(5): print "Network %i..." % k # random parameters input_dim = np.random.randint(1, 30) hidden_dim = np.random.randint(1, 30) activation = T.tanh if np.random.randint(2) else T.nnet.sigmoid with_batch = False # rnn rnn = network.RNN(input_dim, hidden_dim, activation, with_batch, 'rnn') for i in xrange(10): print "%i" % i, input = T.matrix('input_test') input_value = np.random.rand( np.random.randint(low=1, high=30), input_dim ).astype(floatX) output = rnn.link(input) h_t = rnn.h_0.get_value() for i in xrange(input_value.shape[0]): h_t = np.dot( input_value[i], rnn.w_x.get_value() ) + np.dot(h_t, rnn.w_h.get_value()) + rnn.b_h.get_value() if activation == T.tanh: h_t = np.tanh(h_t) else: h_t = expit(h_t) assert h_t.shape == (hidden_dim,) np.testing.assert_array_almost_equal( output.eval({input: input_value}), h_t ) print "OK" print "Testing RNN with minibatch..." # with minibatch for k in xrange(5): print "Network %i..." % k # random parameters input_dim = np.random.randint(1, 30) hidden_dim = np.random.randint(1, 30) activation = T.tanh if np.random.randint(2) else T.nnet.sigmoid with_batch = True # hidden layer rnn = network.RNN(input_dim, hidden_dim, activation, with_batch, 'RNN') for i in xrange(10): print "%i" % i, input = T.tensor3('input_test') input_value = np.random.rand( np.random.randint(low=1, high=10), np.random.randint(low=1, high=30), input_dim ).astype(floatX) input_value_dimshuffled = np.transpose(input_value, (1, 0, 2)) output = rnn.link(input) h_t = np.array([rnn.h_0.get_value()] * input_value_dimshuffled.shape[1]) for i in xrange(input_value_dimshuffled.shape[0]): h_t = np.dot( input_value_dimshuffled[i], rnn.w_x.get_value() ) + np.dot(h_t, rnn.w_h.get_value()) + rnn.b_h.get_value() if activation == T.tanh: h_t = np.tanh(h_t) else: h_t = expit(h_t) assert h_t.shape == (input_value.shape[0], hidden_dim) np.testing.assert_array_almost_equal( output.eval({input: input_value}), h_t ) print "OK" print "All tests ran successfully for RNN."