def __init__(self): print("NRE\tSetup: CNN NRE based on joint KG\nWaiting...") print("Word embedding...") self.word_vec = np.load(export_path + 'vec.npy') print("embedding finished.") print('relations : %d' % FLAGS.num_classes) print('word size : %d' % (len(self.word_vec[0]))) print('position size : %d' % FLAGS.pos_size) print('hidden size : %d' % FLAGS.hidden_size) print("reading pre-data finished.") print("network building:") self.sess = tf.Session() self.model = network.CNN(is_training=False, word_embeddings=self.word_vec) self.sess.run(tf.global_variables_initializer()) self.saver = tf.train.Saver() self.dict_word = {}
level=logging.INFO, stream=sys.stdout) # os.environ['TF_CPP_MIN_LOG_LEVEL']='2' n_epochs = 100 # Number of models h = 1536 # Image height w = 2064 # Image width img_idx = 0 # test_(n-1).png in folder, n-1 = img_idx n_predict = 48 # Number of test images if not os.path.exists("predictions test"): os.makedirs("predictions test") # Initialise model logging.info("Getting predictions") convnet = network.CNN(keep_rate=1.0, train_mode=False) images = tf.placeholder(tf.float32, shape=(1, h, w, 3)) # Build network convnet.build(images) logging.info("Finished building network") # Get and save predictions epoch_acc = np.zeros(n_epochs) epoch_dsc = np.zeros(n_epochs) for j in range(n_epochs): init = tf.global_variables_initializer() # if restore is True: saver = tf.train.Saver()
def main(_): print('reading word embedding') word_vec = np.load(export_path + 'vec.npy') print('reading entity embedding') ent_embedding = np.load(export_path + 'ent_embedding.npy') print('reading relation embedding') rel_embedding = np.load(export_path + 'rel_embedding.npy') print('reading test data') test_instance_triple = np.load(export_path + 'test_instance_triple.npy') test_instance_scope = np.load(export_path + 'test_instance_scope.npy') test_len = np.load(export_path + 'test_len.npy') test_label = np.load(export_path + 'test_label.npy') test_word = np.load(export_path + 'test_word.npy') test_pos1 = np.load(export_path + 'test_pos1.npy') test_pos2 = np.load(export_path + 'test_pos2.npy') test_mask = np.load(export_path + 'test_mask.npy') test_head = np.load(export_path + 'test_head.npy') test_tail = np.load(export_path + 'test_tail.npy') train_desc_tail = np.load(export_path + 'test_desc_tail.npy') train_desc_head = np.load(export_path + 'test_desc_head.npy') print('reading finished') print('mentions : %d' % (len(test_instance_triple))) print('sentences : %d' % (len(test_len))) print('relations : %d' % (FLAGS.num_classes)) print('word size : %d' % (len(word_vec[0]))) print('position size : %d' % (FLAGS.pos_size)) print('hidden size : %d' % (FLAGS.hidden_size)) print('reading finished') # desc = {} # with open(export_path + 'desc.txt') as f: # for content in f: # en_id, en_desc = content.strip().split('\t') # en_desc = en_desc.strip().split(',') # en_desc = [int(word) for word in en_desc] # desc[int(en_id)] = en_desc print('building network...') sess_db = tf.Session() # sess_db = tf_debug.LocalCLIDebugWrapperSession(sess) # sess_db.add_tensor_filter('has_inf_or_nan',tf_debug.has_inf_or_nan) merged_summary = tf.summary.merge_all() global_step = tf.Variable(0, name='global_step', trainable=False) if FLAGS.model.lower() == "cnn": model = network.CNN(is_training=False, word_embeddings=word_vec, ent_embedding=ent_embedding, rel_embedding=rel_embedding) elif FLAGS.model.lower() == "pcnn": model = network.PCNN(is_training=False, word_embeddings=word_vec, ent_embedding=ent_embedding, rel_embedding=rel_embedding) elif FLAGS.model.lower() == "lstm": model = network.RNN(is_training=False, word_embeddings=word_vec, cell_name="LSTM", simple_position=True) elif FLAGS.model.lower() == "gru": model = network.RNN(is_training=False, word_embeddings=word_vec, cell_name="GRU", simple_position=True) elif FLAGS.model.lower() == "bi-lstm" or FLAGS.model.lower() == "bilstm": model = network.BiRNN(is_training=False, word_embeddings=word_vec, cell_name="LSTM", simple_position=True) elif FLAGS.model.lower() == "bi-gru" or FLAGS.model.lower() == "bigru": model = network.BiRNN(is_training=False, word_embeddings=word_vec, cell_name="GRU", simple_position=True) sess_db.run(tf.global_variables_initializer()) saver = tf.train.Saver() def test_step(head, tail, word, pos1, pos2, mask, leng, label_index, label, scope, head_desc, tail_desc): feed_dict = { model.head_index: head, model.tail_index: tail, model.word: word, model.pos1: pos1, model.pos2: pos2, model.mask: mask, model.len: leng, model.label_index: label_index, model.label: label, model.scope: scope, model.keep_prob: FLAGS.keep_prob, model.head_description: head_desc, model.tail_description: tail_desc } if FLAGS.katt_flag == 1: output, head_desc_att, tail_desc_att = sess_db.run( [model.test_output, model.head_desc_att, model.tail_desc_att], feed_dict) else: output = sess_db.run(model.test_output, feed_dict) # np.save('./case_study/head_desc_att',head_desc_att) # np.save('./case_study/tail_desc_att',tail_desc_att) # output = sess_db.run(model.test_output, feed_dict) return output f = open('results.txt', 'w') f.write('iteration\taverage precision\tP@100\tP@300\tP@500\n') for iters in range(1, 15): print(iters) saver.restore( sess_db, FLAGS.checkpoint_path + FLAGS.save_name + '/' + FLAGS.model + str(FLAGS.katt_flag) + "-" + str(80 * iters)) summary_writer = tf.summary.FileWriter(FLAGS.summary_dir, sess_db.graph) stack_output = [] stack_label = [] iteration = len(test_instance_scope) // FLAGS.test_batch_size for i in range(iteration): temp_str = 'running ' + str(i) + '/' + str(iteration) + '...' sys.stdout.write(temp_str + '\r') sys.stdout.flush() input_scope = test_instance_scope[i * FLAGS.test_batch_size:(i + 1) * FLAGS.test_batch_size] index = [] scope = [0] label = [] # print('input_scope:',input_scope) for num in input_scope: index = index + list(range(num[0], num[1] + 1)) label.append(test_label[num[0]]) scope.append(scope[len(scope) - 1] + num[1] - num[0] + 1) label_ = np.zeros((FLAGS.test_batch_size, FLAGS.num_classes)) label_[np.arange(FLAGS.test_batch_size), label] = 1 output = test_step(test_head[index], test_tail[index], test_word[index, :], test_pos1[index, :], test_pos2[index, :], test_mask[index, :], test_len[index], test_label[index], label_, np.array(scope), train_desc_head[index], train_desc_tail[index]) stack_output.append(output) stack_label.append(label_) # print('attention score:',np.shape(attention_score)) # np.save('attention_scpre',attention_score) print('evaluating...') # print(stack_output) # ff = open('attention.txt','w') # ff.write(attention_score) # ff.close() stack_output = np.concatenate(stack_output, axis=0) stack_label = np.concatenate(stack_label, axis=0) exclude_na_flatten_output = stack_output[:, 1:] exclude_na_flatten_label = stack_label[:, 1:] print(exclude_na_flatten_output.shape) print(exclude_na_flatten_label.shape) # print (exclude_na_flatten_output) np.save( './' + 'model' + str(FLAGS.alpha) + '/' + FLAGS.model + '+sen_att_all_prob_' + str(iters) + '.npy', exclude_na_flatten_output) np.save( './' + 'model' + str(FLAGS.alpha) + '/' + FLAGS.model + '+sen_att_all_label_' + str(iters) + '.npy', exclude_na_flatten_label) average_precision = average_precision_score(exclude_na_flatten_label, exclude_na_flatten_output, average="micro") exclude_na_flatten_label = np.reshape(exclude_na_flatten_label, -1) exclude_na_flatten_output = np.reshape(exclude_na_flatten_output, -1) order = np.argsort(-exclude_na_flatten_output) p_100 = np.mean(exclude_na_flatten_label[order[:100]]) p_300 = np.mean(exclude_na_flatten_label[order[:300]]) p_500 = np.mean(exclude_na_flatten_label[order[:500]]) print('pr: ' + str(average_precision)) print('p@100:' + str(p_100)) print('p@300:' + str(p_300)) print('p@500:' + str(p_500)) f.write( str(average_precision) + '\t' + str(p_100) + '\t' + str(p_300) + '\t' + str(p_500) + '\n') f.close()
import logging import sys import network logging.basicConfig(format='%(asctime)s %(levelname)s %(message)s', level=logging.INFO, stream=sys.stdout) # os.environ['TF_CPP_MIN_LOG_LEVEL']='2' batch_size = 128 n_train_data = 59904 # Number of RGB images n_epochs = 100 # Number of epochs to train for restore = False # Option to continue training from saved model save = True # Save model every epoch h = 48 # Image height w = 48 # Image width keep_rate = 1.0 # 1 - dropout rate if not os.path.exists("predictions training"): os.makedirs("predictions training") # Train neural network logging.info("Training network") convnet = network.CNN(keep_rate=keep_rate, train_mode=True) t_net = network.TRAIN_CNN(convnet, batch_size, h, w) t_net.train_network(n_train_data, batch_size, n_epochs, restore=restore, save=save)
def main(_): print 'reading word embedding' word_vec = np.load(export_path + 'vec.npy') print 'reading training data' instance_triple = np.load(export_path + 'train_instance_triple.npy') instance_scope = np.load(export_path + 'train_instance_scope.npy') train_len = np.load(export_path + 'train_len.npy') train_label = np.load(export_path + 'train_label.npy') train_word = np.load(export_path + 'train_word.npy') train_pos1 = np.load(export_path + 'train_pos1.npy') train_pos2 = np.load(export_path + 'train_pos2.npy') train_mask = np.load(export_path + 'train_mask.npy') train_head = np.load(export_path + 'train_head.npy') train_tail = np.load(export_path + 'train_tail.npy') print 'reading finished' print 'mentions : %d' % (len(instance_triple)) print 'sentences : %d' % (len(train_len)) print 'relations : %d' % (FLAGS.num_classes) print 'word size : %d' % (len(word_vec[0])) print 'position size : %d' % (FLAGS.pos_size) print 'hidden size : %d' % (FLAGS.hidden_size) reltot = {} for index, i in enumerate(train_label): if not i in reltot: reltot[i] = 1.0 else: reltot[i] += 1.0 for i in reltot: reltot[i] = 1 / (reltot[i]**(0.05)) print 'building network...' sess = tf.Session() if FLAGS.model.lower() == "cnn": model = network.CNN(is_training=True, word_embeddings=word_vec) elif FLAGS.model.lower() == "pcnn": model = network.PCNN(is_training=True, word_embeddings=word_vec) elif FLAGS.model.lower() == "lstm": model = network.RNN(is_training=True, word_embeddings=word_vec, cell_name="LSTM", simple_position=True) elif FLAGS.model.lower() == "gru": model = network.RNN(is_training=True, word_embeddings=word_vec, cell_name="GRU", simple_position=True) elif FLAGS.model.lower() == "bi-lstm" or FLAGS.model.lower() == "bilstm": model = network.BiRNN(is_training=True, word_embeddings=word_vec, cell_name="LSTM", simple_position=True) elif FLAGS.model.lower() == "bi-gru" or FLAGS.model.lower() == "bigru": model = network.BiRNN(is_training=True, word_embeddings=word_vec, cell_name="GRU", simple_position=True) global_step = tf.Variable(0, name='global_step', trainable=False) global_step_kg = tf.Variable(0, name='global_step_kg', trainable=False) global_step_kg_satt = tf.Variable(0, name='global_step_kg_satt', trainable=False) tf.summary.scalar('learning_rate', FLAGS.learning_rate) tf.summary.scalar('learning_rate_kg', FLAGS.learning_rate_kg) optimizer = tf.train.GradientDescentOptimizer(FLAGS.learning_rate) grads_and_vars = optimizer.compute_gradients(model.loss) train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step) optimizer_kg = tf.train.GradientDescentOptimizer(FLAGS.learning_rate_kg) grads_and_vars_kg = optimizer_kg.compute_gradients(model.loss_kg) train_op_kg = optimizer_kg.apply_gradients(grads_and_vars_kg, global_step=global_step_kg) optimizer_kg_satt = tf.train.GradientDescentOptimizer( FLAGS.learning_rate_kg) grads_and_vars_kg_satt = optimizer_kg_satt.compute_gradients( model.loss_kg_att) train_op_kg_satt = optimizer_kg_satt.apply_gradients( grads_and_vars_kg_satt, global_step=global_step_kg_satt) merged_summary = tf.summary.merge_all() summary_writer = tf.summary.FileWriter(FLAGS.summary_dir, sess.graph) sess.run(tf.global_variables_initializer()) saver = tf.train.Saver(max_to_keep=None) print 'building finished' def train_kg_att(coord): def train_step_kg_att(h_batch, t_batch, r_batch, r_scope, r_label): feed_dict = { model.pos_h: h_batch, model.pos_t: t_batch, model.pos_r: r_batch, model.r_scope: r_scope, model.r_label: r_label, model.r_length: np.array([len(r_label)]), } _, loss = sess.run([train_op_kg_satt, model.loss_kg_att], feed_dict) return loss def merge(head, tail, rel): hash = {} for (h, t, r) in zip(head, tail, rel): if r < FLAGS.num_classes: if not r in hash: hash[r] = [] hash[r].append((h, t)) rel = [] head = [] tail = [] rel_label = [] rel_config = [0] for r in hash: if len(hash[r]) != 0: rel_config.append(rel_config[-1]) rel_label.append(r) for h, t in hash[r]: rel_config[-1] += 1 head.append(h) tail.append(t) rel.append(r) return np.array(head), np.array(tail), np.array(rel), np.array( rel_config), np.array(rel_label) batch_size = (FLAGS.tri_total / FLAGS.nbatch_kg) ph = np.zeros(batch_size, dtype=np.int32) pt = np.zeros(batch_size, dtype=np.int32) pr = np.zeros(batch_size, dtype=np.int32) nh = np.zeros(batch_size, dtype=np.int32) nt = np.zeros(batch_size, dtype=np.int32) nr = np.zeros(batch_size, dtype=np.int32) ph_addr = ph.__array_interface__['data'][0] pt_addr = pt.__array_interface__['data'][0] pr_addr = pr.__array_interface__['data'][0] nh_addr = nh.__array_interface__['data'][0] nt_addr = nt.__array_interface__['data'][0] nr_addr = nr.__array_interface__['data'][0] lib.getBatch.argtypes = [ ctypes.c_void_p, ctypes.c_void_p, ctypes.c_void_p, ctypes.c_void_p, ctypes.c_void_p, ctypes.c_void_p, ctypes.c_int ] times_kg = 0 while not coord.should_stop(): times_kg += 1 # if times_kg == 3000: # coord.request_stop() res = 0.0 for batch in range(FLAGS.nbatch_kg): lib.getBatch(ph_addr, pt_addr, pr_addr, nh_addr, nt_addr, nr_addr, batch_size) h, t, r, r_range, r_label = merge(ph, pt, pr) res += train_step_kg_att(h, t, r, r_range, r_label) time_str = datetime.datetime.now().isoformat() print "batch %d time %s | loss : %f" % (times_kg, time_str, res) def train_kg(coord): def train_step_kg(pos_h_batch, pos_t_batch, pos_r_batch, neg_h_batch, neg_t_batch, neg_r_batch): feed_dict = { model.pos_h: pos_h_batch, model.pos_t: pos_t_batch, model.pos_r: pos_r_batch, model.neg_h: neg_h_batch, model.neg_t: neg_t_batch, model.neg_r: neg_r_batch } _, step, loss = sess.run( [train_op_kg, global_step_kg, model.loss_kg], feed_dict) return loss batch_size = (FLAGS.tri_total / FLAGS.nbatch_kg) ph = np.zeros(batch_size, dtype=np.int32) pt = np.zeros(batch_size, dtype=np.int32) pr = np.zeros(batch_size, dtype=np.int32) nh = np.zeros(batch_size, dtype=np.int32) nt = np.zeros(batch_size, dtype=np.int32) nr = np.zeros(batch_size, dtype=np.int32) ph_addr = ph.__array_interface__['data'][0] pt_addr = pt.__array_interface__['data'][0] pr_addr = pr.__array_interface__['data'][0] nh_addr = nh.__array_interface__['data'][0] nt_addr = nt.__array_interface__['data'][0] nr_addr = nr.__array_interface__['data'][0] lib.getBatch.argtypes = [ ctypes.c_void_p, ctypes.c_void_p, ctypes.c_void_p, ctypes.c_void_p, ctypes.c_void_p, ctypes.c_void_p, ctypes.c_int ] times_kg = 0 while not coord.should_stop(): times_kg += 1 # if times_kg == 3000: # coord.request_stop() res = 0.0 for batch in range(FLAGS.nbatch_kg): lib.getBatch(ph_addr, pt_addr, pr_addr, nh_addr, nt_addr, nr_addr, batch_size) res += train_step_kg(ph, pt, pr, nh, nt, nr) time_str = datetime.datetime.now().isoformat() print "batch %d time %s | loss : %f" % (times_kg, time_str, res) def train_nn(coord): def train_step(head, tail, word, pos1, pos2, mask, leng, label_index, label, scope, weights): feed_dict = { model.head_index: head, model.tail_index: tail, model.word: word, model.pos1: pos1, model.pos2: pos2, model.mask: mask, model.len: leng, model.label_index: label_index, model.label: label, model.scope: scope, model.keep_prob: FLAGS.keep_prob, model.weights: weights } _, step, loss, summary, output, correct_predictions = sess.run([ train_op, global_step, model.loss, merged_summary, model.output, model.correct_predictions ], feed_dict) summary_writer.add_summary(summary, step) return output, loss, correct_predictions stack_output = [] stack_label = [] stack_ce_loss = [] train_order = range(len(instance_triple)) save_epoch = 2 eval_step = 300 for one_epoch in range(FLAGS.max_epoch): print('epoch ' + str(one_epoch + 1) + ' starts!') np.random.shuffle(train_order) s1 = 0.0 s2 = 0.0 tot1 = 1.0 tot2 = 1.0 losstot = 0.0 for i in range(int(len(train_order) / float(FLAGS.batch_size))): input_scope = np.take( instance_scope, train_order[i * FLAGS.batch_size:(i + 1) * FLAGS.batch_size], axis=0) index = [] scope = [0] label = [] weights = [] for num in input_scope: index = index + range(num[0], num[1] + 1) label.append(train_label[num[0]]) scope.append(scope[len(scope) - 1] + num[1] - num[0] + 1) weights.append(reltot[train_label[num[0]]]) label_ = np.zeros((FLAGS.batch_size, FLAGS.num_classes)) label_[np.arange(FLAGS.batch_size), label] = 1 output, loss, correct_predictions = train_step( train_head[index], train_tail[index], train_word[index, :], train_pos1[index, :], train_pos2[index, :], train_mask[index, :], train_len[index], train_label[index], label_, np.array(scope), weights) num = 0 s = 0 losstot += loss for num in correct_predictions: if label[s] == 0: tot1 += 1.0 if num: s1 += 1.0 else: tot2 += 1.0 if num: s2 += 1.0 s = s + 1 time_str = datetime.datetime.now().isoformat() # print "batch %d step %d time %s | loss : %f, NA accuracy: %f, not NA accuracy: %f" % (one_epoch, i, time_str, loss, s1 / tot1, s2 / tot2) current_step = tf.train.global_step(sess, global_step) if (one_epoch + 1) % save_epoch == 0: print 'epoch ' + str(one_epoch + 1) + ' has finished' print 'saving model...' path = saver.save(sess, FLAGS.model_dir + FLAGS.model + str(FLAGS.katt_flag), global_step=current_step) coord.request_stop() coord = tf.train.Coordinator() threads = [] threads.append(threading.Thread(target=train_kg, args=(coord, ))) threads.append(threading.Thread(target=train_nn, args=(coord, ))) threads.append(threading.Thread(target=train_kg_att, args=(coord, ))) for t in threads: t.start() coord.join(threads) if (FLAGS.store_kg_flag != 0): print 'saving kg...' ent_embedding, rel_embedding = sess.run( [model.word_embedding, model.rel_embeddings]) ent_embedding = ent_embedding.tolist() rel_embedding = rel_embedding.tolist() f = open("entity2vec", "w") f.write(json.dumps(ent_embedding)) f.close() f = open("relation2vec", "w") f.write(json.dumps(rel_embedding)) f.close()
def train_classification(total_epoch=1000, batch_size=16, log_out_span=10, log_path=network_cfg['log_path']): # Data dataset = cifar10_loader.Cifar10(data_cfg) X_train, X_test, T_train, T_test, N_train, N_test \ = dataset.fetch_bin_to_tensor(data_argumantation_int=data_cfg['Data_Augmentation_Ratio'], reshape3d=True) # Model define cnn = network.CNN(network_cfg) # Config sess = tf.Session(config=tf_config) init_op = tf.global_variables_initializer() sess.run(init_op) train_loss_list = [] saver = tf.train.Saver() log_writer = tf.summary.FileWriter(log_path, sess.graph) update_ops = tf.get_collection( tf.GraphKeys.UPDATE_OPS ) # For update NON-TRAINABLE parameter for batch norm. def __eval_process(operation, data_type, all_data=False): X = None T = None N = batch_size loss_total = accu_total = 0 summary_train = summary_test = None if data_type == 'train': is_training = True X = X_train T = T_train if all_data: N = N_train else: # data_type == 'test' is_training = False X = X_test T = T_test if all_data: N = N_test for l in range(0, N, batch_size): img_batch = X[l:l + batch_size] t_batch = T[l:l + batch_size] real_batch = len(t_batch) feed = { cnn.img_plh: img_batch, cnn.t_plh: t_batch, cnn.real_batch_holder: real_batch, cnn.is_training_holder: is_training, # For test mode } if operation == 'loss_and_accuracy_result': loss, accu = sess.run([cnn.loss, cnn.accuracy], feed_dict=feed) loss_total += np.mean(loss) * batch_size accu_total += np.mean(accu) * batch_size elif operation == 'summary_for_tensorboard': summary_train, summary_test = sess.run( [cnn.sum_train, cnn.sum_test], feed_dict=feed) else: pass return loss_total, accu_total, summary_train, summary_test # Iteration for epoch in range(total_epoch): print('epoch %d | ' % epoch, end='') sum_acc = 0 sum_loss = 0 perm = np.random.permutation(N_train) cnt = 0 for i in range(0, N_train, batch_size): perm_batch = perm[i:i + batch_size] train_img = X_train[perm[i:i + batch_size]] train_label = T_train[perm[i:i + batch_size]] batch_num = len(perm_batch) feed = { cnn.img_plh: train_img, cnn.t_plh: train_label, cnn.is_training_holder: True, cnn.real_batch_holder: batch_num, } _, __, loss, acc = sess.run([ update_ops, cnn.optimize, cnn.loss, cnn.accuracy, ], feed_dict=feed) sum_acc += np.mean(acc) * batch_size sum_loss += np.mean(loss) * batch_size cnt += 1 train_accuracy = sum_acc / N_train train_loss = sum_loss / N_train print('Train accuracy %.3f | ' % (train_accuracy), end='') print('Train loss %.3f | ' % (train_loss)) if epoch % log_out_span == 0: # update tensorborad train_summary = __eval_process(operation='summary_for_tensorboard', data_type='train')[2] test_summary = __eval_process(operation='summary_for_tensorboard', data_type='test')[3] log_writer.add_summary( train_summary, epoch) # Write log to tensorboard of train state log_writer.add_summary(test_summary, epoch) # same # total loss # save model saver.save(sess, log_path + 'graph1') # save graph.meta,graph.index and so on ... print('save')
for j in range(axis_y): for k in range(axis_x): if (voxel_data[(div * div * i) + (div * j) + (k)] == 1): x[n, m, i, j, k] = 1 ## point_x.append(k) ## point_y.append(j) ## point_z.append(i) ## ## ax.scatter(point_x, point_y, point_z) ## plt.show() t1 = time.time() input_cloud = x.astype(xp.float32) output_label = y.astype(xp.float32) nn = network.CNN() serializers.load_npz(model_path + model_name, nn) ##if args.gpu >= 0: ## chainer.cuda.get_device(args.gpu).use() ## nn.to_gpu(gpu) y_pre = nn.predict(input_cloud) t2 = time.time() elapsed_time1 = t1 - t0 elapsed_time2 = t2 - t1 print("voxel化時間:{}".format(elapsed_time1)) print("推定時間:{}".format(elapsed_time2)) print(output_label)
def main(_): hidden_dim = int(FLAGS.dim) batch = int(FLAGS.batch) is_train = int(FLAGS.train) source = FLAGS.source target = FLAGS.target total_epochs = int(FLAGS.epochs) word_len = int(FLAGS.word) drop = float(FLAGS.drop) filters = int(FLAGS.filter) if filters == 0: filters = False else: filters = True filters_thre = int(FLAGS.thre) wordMap, wordVec = getEmbed(filters=filters, thre=filters_thre) class_types = getTypes() train_set = getQuestion(wordMap, class_types, source, 'corpus_train.txt', word_size=word_len, filters=filters, thre=filters_thre) dev_set = getQuestion(wordMap, class_types, source, 'corpus_dev.txt', word_size=word_len, filters=filters, thre=filters_thre) gpu_options = tf.GPUOptions(visible_device_list=FLAGS.gpu, allow_growth=True) with tf.Graph().as_default(): sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, allow_soft_placement=True)) with sess.as_default(): initializer = tf.contrib.layers.xavier_initializer() # initializer = tf.orthogonal_initializer() with tf.variable_scope('model', initializer=initializer): m = network.CNN(wordVec, word_len) global_step = tf.Variable(0, name='global_step', trainable=False) optimizer = tf.train.AdamOptimizer(float(FLAGS.lr)) # optimizer = tf.train.RMSPropOptimizer(float(FLAGS.lr)) # lr = tf.train.exponential_decay(float(FLAGS.lr),global_step=global_step,decay_steps=500,decay_rate=0.98) # optimizer = tf.train.AdamOptimizer(lr) # optimizer = tf.train.MomentumOptimizer(lr,momentum=0.9) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): train_op = optimizer.minimize(m.loss, global_step=global_step) sess.run(tf.global_variables_initializer()) saver = tf.train.Saver(max_to_keep=None) # merged_summary = tf.summary.merge_all() # summary_writer = tf.summary.FileWriter(FLAGS.summary_dir,sess.graph) def train_step(word, label): feed_dict = {} feed_dict[m.word] = word feed_dict[m.label] = label feed_dict[m.keep_prob] = drop _, step, loss, accuracy = sess.run( [train_op, global_step, m.loss, m.accuracy], feed_dict) time_str = datetime.datetime.now().isoformat() accuracy = np.reshape(np.array(accuracy), (-1)) accuracy = np.mean(accuracy) # summary_writer.add_summary(summary,step) if step % 10 == 0: tempstr = "{}: step {}, softmax_loss {:g}, acc {:g}".format( time_str, step, np.mean(loss), accuracy) print tempstr def dev_step(word, label): feed_dict = {} feed_dict[m.word] = word feed_dict[m.label] = label feed_dict[m.keep_prob] = 1.0 loss, accuracy, prediction = sess.run( [m.loss, m.accuracy, m.prediction], feed_dict) return loss, accuracy, prediction def getData(lst, dataset): word = [] label = [] for k in lst: w, l = dataset[k] word.append(w) label.append(l) word = np.array(word) label = np.array(label) return word, label def evaluate(total_labels, total_pred): a = np.sum(total_labels) b = np.sum(total_pred) c = 0 for i, j in zip(total_labels, total_pred): if i == 1 and j == 1: c += 1 if b <= 0: precision = 0.0 else: precision = float(c) / float(b) recall = float(c) / float(a) f1 = 2 * precision * recall / (precision + recall) return precision, recall, f1 max_accuracy = 0.0 for one_epoch in range(total_epochs): print('turn: ' + str(one_epoch)) temp_order = range(len(train_set)) np.random.shuffle(temp_order) for i in range(int(len(temp_order) / float(batch))): temp_input = temp_order[i * batch:(i + 1) * batch] word, label = getData(temp_input, train_set) train_step(word, label) current_step = tf.train.global_step(sess, global_step) if (current_step % 50 == 0): accuracy = [] losses = [] total_pred = [] total_labels = [] dev_order = range(len(dev_set)) for i in range(int(len(dev_order) / float(batch))): temp_input = dev_order[i * batch:(i + 1) * batch] word, label = getData(temp_input, dev_set) loss, accs, prediction = dev_step(word, label) #if current_step == 50: for acc in accs: accuracy.append(acc) accuracy = np.reshape(np.array(accuracy), (-1)) accuracy = np.mean(accuracy) print('dev...') if accuracy > max_accuracy: max_accuracy = accuracy # if losses < min_loss: # min_loss = losses print('accuracy: ' + str(accuracy)) # print('precision: ' + str(precision)) # print('recall: ' + str(recall)) # print('f1: ' + str(f1)) # print('loss: ' + str(losses)) # if accuracy < 91 and min_loss>0.2: # # if min_loss>0.2: # continue print 'saving model' # path = saver.save(sess,target +'/CNN_model.'+FLAGS.v,global_step=current_step) path = saver.save(sess, target + '/laiye_model.' + FLAGS.v, global_step=0) tempstr = 'have saved model to ' + path print tempstr
def main(_): print 'reading word embedding' word_vec = np.load(export_path + 'vec.npy') print 'reading test data' test_instance_triple = np.load(export_path + 'test_instance_triple.npy') test_instance_scope = np.load(export_path + 'test_instance_scope.npy') test_len = np.load(export_path + 'test_len.npy') test_label = np.load(export_path + 'test_label.npy') test_word = np.load(export_path + 'test_word.npy') test_pos1 = np.load(export_path + 'test_pos1.npy') test_pos2 = np.load(export_path + 'test_pos2.npy') test_mask = np.load(export_path + 'test_mask.npy') test_head = np.load(export_path + 'test_head.npy') test_tail = np.load(export_path + 'test_tail.npy') print 'reading finished' print 'mentions : %d' % (len(test_instance_triple)) print 'sentences : %d' % (len(test_len)) print 'relations : %d' % (FLAGS.num_classes) print 'word size : %d' % (len(word_vec[0])) print 'position size : %d' % (FLAGS.pos_size) print 'hidden size : %d' % (FLAGS.hidden_size) print 'reading finished' print 'building network...' sess = tf.Session() if FLAGS.model.lower() == "cnn": model = network.CNN(is_training=False, word_embeddings=word_vec) elif FLAGS.model.lower() == "pcnn": model = network.PCNN(is_training=False, word_embeddings=word_vec) elif FLAGS.model.lower() == "lstm": model = network.RNN(is_training=False, word_embeddings=word_vec, cell_name="LSTM", simple_position=True) elif FLAGS.model.lower() == "gru": model = network.RNN(is_training=False, word_embeddings=word_vec, cell_name="GRU", simple_position=True) elif FLAGS.model.lower() == "bi-lstm" or FLAGS.model.lower() == "bilstm": model = network.BiRNN(is_training=False, word_embeddings=word_vec, cell_name="LSTM", simple_position=True) elif FLAGS.model.lower() == "bi-gru" or FLAGS.model.lower() == "bigru": model = network.BiRNN(is_training=False, word_embeddings=word_vec, cell_name="GRU", simple_position=True) sess.run(tf.global_variables_initializer()) saver = tf.train.Saver() def test_step(head, tail, word, pos1, pos2, mask, leng, label_index, label, scope): feed_dict = { model.head_index: head, model.tail_index: tail, model.word: word, model.pos1: pos1, model.pos2: pos2, model.mask: mask, model.len: leng, model.label_index: label_index, model.label: label, model.scope: scope, model.keep_prob: FLAGS.keep_prob } output = sess.run(model.test_output, feed_dict) return output FLAGS.test_batch_size, FLAGS.num_classes = int(FLAGS.test_batch_size), int( FLAGS.num_classes) f = open('results.txt', 'w') f.write('iteration\taverage precision\n') for iters in range(1, 30): print iters saver.restore( sess, FLAGS.checkpoint_path + FLAGS.model + str(int(FLAGS.katt_flag)) + "-" + str(3664 * iters)) stack_output = [] stack_label = [] iteration = len(test_instance_scope) / FLAGS.test_batch_size for i in range(int(iteration)): temp_str = 'running ' + str(i) + '/' + str(iteration) + '...' sys.stdout.write(temp_str + '\r') sys.stdout.flush() input_scope = test_instance_scope[i * FLAGS.test_batch_size:(i + 1) * FLAGS.test_batch_size] index = [] scope = [0] label = [] for num in input_scope: index = index + range(num[0], num[1] + 1) label.append(test_label[num[0]]) scope.append(scope[len(scope) - 1] + num[1] - num[0] + 1) label_ = np.zeros((FLAGS.test_batch_size, FLAGS.num_classes)) label_[np.arange(FLAGS.test_batch_size), label] = 1 output = test_step(test_head[index], test_tail[index], test_word[index, :], test_pos1[index, :], test_pos2[index, :], test_mask[index, :], test_len[index], test_label[index], label_, np.array(scope)) stack_output.append(output) stack_label.append(label_) print 'evaluating...' stack_output = np.concatenate(stack_output, axis=0) stack_label = np.concatenate(stack_label, axis=0) exclude_na_flatten_output = stack_output[:, 1:] exclude_na_flatten_label = stack_label[:, 1:] print exclude_na_flatten_output.shape print exclude_na_flatten_label.shape average_precision = average_precision_score(exclude_na_flatten_label, exclude_na_flatten_output, average="micro") np.save( './' + FLAGS.model + '+sen_att_all_prob_' + str(iters) + '.npy', exclude_na_flatten_output) np.save( './' + FLAGS.model + '+sen_att_all_label_' + str(iters) + '.npy', exclude_na_flatten_label) print 'average_precision: ' + str(average_precision) f.write(str(average_precision) + '\n') f.close()
dev_samples = json.load(f) samples = dev_samples else: with open(contain_test_file, 'r') as f: # contain e test 2862 sp test_samples = json.load(f) samples = test_samples N_entity = config["entity_total"] N_relation = config["rel_total"] eid2idx = config["e_dict"] # # from pure file pid2idx = config["relation2id"] sess = tf.Session() model = network.CNN(use_embedding, embedding_size, is_training=False, word_embeddings=word_vec) sess.run(tf.global_variables_initializer()) saver = tf.train.Saver() # saver.save(sess,FLAGS.model_dir+FLAGS.model+str(FLAGS.katt_flag)+transX, global_step=current_step) # tf.app.flags.DEFINE_string('model_dir','./model/','path to store model') # tf.app.flags.DEFINE_string('checkpoint_path','./model/','path to store model') # saver.restore(sess, FLAGS.checkpoint_path + FLAGS.model+str(FLAGS.katt_flag)+"-"+str(3664*iters)) FLAGS.katt_flag = int(FLAGS.katt_flag) print(FLAGS.model_dir + FLAGS.model + str(FLAGS.katt_flag) + transX) saver.restore(sess, FLAGS.model_dir + FLAGS.model + str(FLAGS.katt_flag) + transX) if transE: ## sum(abs(pos_h_e + pos_r_e - pos_t_e), 1, keep_dims = True) entity_embedding = model.word_embedding.eval(
def main(_): print ('reading word embedding') word_vec = np.load(export_path + 'vec.npy') if use_embedding else None print ('reading training data') instance_triple = np.load(export_path + 'train_instance_triple.npy') instance_scope = np.load(export_path + 'train_instance_scope.npy') train_len = np.load(export_path + 'train_len.npy') train_label = np.load(export_path + 'train_label.npy') # relation idx for each sample train_word = np.load(export_path + 'train_word.npy') train_pos1 = np.load(export_path + 'train_pos1.npy') train_pos2 = np.load(export_path + 'train_pos2.npy') train_mask = np.load(export_path + 'train_mask.npy') train_head = np.load(export_path + 'train_head.npy') train_tail = np.load(export_path + 'train_tail.npy') print ('reading finished') print ('mentions : %d' % (len(instance_triple))) print ('sentences : %d' % (len(train_len))) print ('relations : %d' % (FLAGS.num_classes)) print ('position size : %d' % (FLAGS.pos_size)) print ('hidden size : %d' % (FLAGS.hidden_size)) # train_label: all sample's relation idx # count different relations numbers in all samples, give different weights reltot = {} for index, i in enumerate(train_label): if not i in reltot: reltot[i] = 1.0 else: reltot[i] += 1.0 for i in reltot: reltot[i] = 1/(reltot[i] ** (0.05)) print ('building network...') sess = tf.Session() if FLAGS.model.lower() == "cnn": model = network.CNN(use_embedding,embedding_size,is_training = True,word_embeddings = word_vec) elif FLAGS.model.lower() == "pcnn": model = network.PCNN(use_embedding,embedding_size,is_training = True,word_embeddings = word_vec) elif FLAGS.model.lower() == "lstm": model = network.RNN(use_embedding,embedding_size,is_training = True,word_embeddings = word_vec, cell_name = "LSTM", simple_position = True) elif FLAGS.model.lower() == "gru": model = network.RNN(use_embedding,embedding_size,is_training = True,word_embeddings = word_vec, cell_name = "GRU", simple_position = True) elif FLAGS.model.lower() == "bi-lstm" or FLAGS.model.lower() == "bilstm": model = network.BiRNN(use_embedding,embedding_size,is_training = True, word_embeddings = word_vec, cell_name = "LSTM", simple_position = True) elif FLAGS.model.lower() == "bi-gru" or FLAGS.model.lower() == "bigru": model = network.BiRNN(use_embedding,embedding_size,is_training = True, word_embeddings = word_vec, cell_name = "GRU", simple_position = True) # model once sure, just one model, different train_epoch, optimizer. global_step = tf.Variable(0,name='global_step',trainable=False) global_step_kg = tf.Variable(0,name='global_step_kg',trainable=False) tf.summary.scalar('learning_rate', FLAGS.learning_rate) tf.summary.scalar('learning_rate_kg', FLAGS.learning_rate_kg) # text op optimizer = tf.train.GradientDescentOptimizer(FLAGS.learning_rate) # sgd grads_and_vars = optimizer.compute_gradients(model.loss) train_op = optimizer.apply_gradients(grads_and_vars, global_step = global_step) # kg op optimizer_kg = tf.train.GradientDescentOptimizer(FLAGS.learning_rate_kg) grads_and_vars_kg = optimizer_kg.compute_gradients(model.loss_kg) train_op_kg = optimizer_kg.apply_gradients(grads_and_vars_kg, global_step = global_step_kg) merged_summary = tf.summary.merge_all() summary_writer = tf.summary.FileWriter(FLAGS.summary_dir, sess.graph) sess.run(tf.global_variables_initializer()) #saver = tf.train.Saver(max_to_keep=None) saver = tf.train.Saver() print ('building finished') def train_kg(coord): #train_step_kg(ph, pt, pr, nh, nt, nr),from c++ def train_step_kg(pos_h_batch, pos_t_batch, pos_r_batch, neg_h_batch, neg_t_batch, neg_r_batch): feed_dict = { model.pos_h: pos_h_batch, model.pos_t: pos_t_batch, model.pos_r: pos_r_batch, model.neg_h: neg_h_batch, model.neg_t: neg_t_batch, model.neg_r: neg_r_batch } _, step, loss = sess.run( [train_op_kg, global_step_kg, model.loss_kg], feed_dict) return loss batch_size = (int(FLAGS.tri_total) // int(FLAGS.nbatch_kg)) # 100,600/ 300,200 /200,300 / 1000,60 #batch_size = (int(FLAGS.ent_total) // int(FLAGS.nbatch_kg)) # should not be FLAGS.tri_total # B. defi each element is np32, 32 wei ph = np.zeros(batch_size, dtype = np.int32) # use to store batch's ex e1 pt = np.zeros(batch_size, dtype = np.int32) # e2 pr = np.zeros(batch_size, dtype = np.int32) # r nh = np.zeros(batch_size, dtype = np.int32) # n_e1 nt = np.zeros(batch_size, dtype = np.int32) # n_e2 nr = np.zeros(batch_size, dtype = np.int32) # n_r #ph.__array_interface__['data'] :2-tuple whose first argument is an integer (a long integer if necessary) that points to the data-area storing the array contents # array's first element's address ph_addr = ph.__array_interface__['data'][0] pt_addr = pt.__array_interface__['data'][0] pr_addr = pr.__array_interface__['data'][0] nh_addr = nh.__array_interface__['data'][0] nt_addr = nt.__array_interface__['data'][0] nr_addr = nr.__array_interface__['data'][0] # define type in c # ctypes.c_void_p==void * ctypes.c_int=int lib.getBatch.argtypes = [ctypes.c_void_p, ctypes.c_void_p, ctypes.c_void_p, ctypes.c_void_p, ctypes.c_void_p, ctypes.c_void_p, ctypes.c_int] times_kg = 0 # coord.request_stop() let it stop at some time # pure batch. B. continuelly train batch. no concept of epoch. just have a size while not coord.should_stop(): times_kg += 1 res = 0.0 #print(type(FLAGS.nbatch_kg)) #print(FLAGS.nbatch_kg) for batch in range(int(FLAGS.nbatch_kg)): lib.getBatch(ph_addr, pt_addr, pr_addr, nh_addr, nt_addr, nr_addr, batch_size) res += train_step_kg(ph, pt, pr, nh, nt, nr) time_str = datetime.datetime.now().isoformat() print ("KB batch %d time %s | loss : %f" % (times_kg, time_str, res)) if pure_KB and times_kg % 20000 == 0: print ('saving model...') # path = saver.save(sess,FLAGS.model_dir+FLAGS.model+str(FLAGS.katt_flag)+transX, global_step=current_step) path = saver.save(sess,FLAGS.model_dir+FLAGS.model+str(FLAGS.katt_flag)+transX+'_pureKB_'+str(pure_KB)+'_epoch'+str(times_kg)+'_nkb'+str(FLAGS.nbatch_kg)+'_win'+str(FLAGS.win_size)+'_'+str(embedding_size)) # tf.app.flags.DEFINE_string('model_dir','./model/','path to store model') # tf.app.flags.DEFINE_string('checkpoint_path','./model/','path to store model') # saver.restore(sess, FLAGS.checkpoint_path + FLAGS.model+str(FLAGS.katt_flag)+"-"+str(3664*iters)) print ('have savde model to '+path) if pure_KB and times_kg==160000: coord.request_stop() def train_nn(coord): # train_step(train_head[index], train_tail[index], train_word[index,:], train_pos1[index,:], train_pos2[index,:], train_mask[index,:], train_len[index],train_label[index], label_, np.array(scope), weights) # all from numpy def train_step(head, tail, word, pos1, pos2, mask, leng, label_index, label, scope, weights): feed_dict = { model.head_index: head, model.tail_index: tail, model.word: word, model.pos1: pos1, model.pos2: pos2, model.mask: mask, model.len : leng, model.label_index: label_index, # B, real relation idx model.label: label, # B,|R|. real pos value 1. other pos 0 model.scope: scope, model.keep_prob: FLAGS.keep_prob, model.weights: weights } _, step, loss, summary, output, correct_predictions = sess.run([train_op, global_step, model.loss, merged_summary, model.output, model.correct_predictions], feed_dict) summary_writer.add_summary(summary, step) return output, loss, correct_predictions train_order = list(range(len(instance_triple))) save_epoch = 150 for one_epoch in range(FLAGS.max_epoch): print('epoch '+str(one_epoch+1)+' starts!') np.random.shuffle(train_order) s1 = 0.0 s2 = 0.0 tot1 = 0.0 tot2 = 0.0 losstot = 0.0 for i in range(int(len(train_order)/float(FLAGS.batch_size))): input_scope = np.take(instance_scope, train_order[i * FLAGS.batch_size:(i+1)*FLAGS.batch_size], axis=0) index = [] scope = [0] label = [] # sample's true relation idx weights = [] for num in input_scope: index = index + list(range(num[0], num[1] + 1)) label.append(train_label[num[0]]) if train_label[num[0]] > 53: pass scope.append(scope[len(scope)-1] + num[1] - num[0] + 1) weights.append(reltot[train_label[num[0]]]) label_ = np.zeros((FLAGS.batch_size, FLAGS.num_classes)) label_[np.arange(FLAGS.batch_size), label] = 1 # correct_predictions:B, if each sample predict true(1), else 0 cnn's output output, loss, correct_predictions = train_step(train_head[index], train_tail[index], train_word[index,:], train_pos1[index,:], train_pos2[index,:], train_mask[index,:], train_len[index],train_label[index], label_, np.array(scope), weights) num = 0 s = 0 losstot += loss for num in correct_predictions: # if label[s] == 0: # tot1 += 1.0 # if num: # s1+= 1.0 tot2 += 1.0 if num: s2 += 1.0 s = s + 1 time_str = datetime.datetime.now().isoformat() print ("epoch %d batch %d time %s | loss : %f, accuracy: %f" % (one_epoch, i, time_str, loss, s2 / tot2)) current_step = tf.train.global_step(sess, global_step) if (one_epoch + 1) % save_epoch == 0: print ('epoch '+str(one_epoch+1)+' has finished') print ('saving model...') # path = saver.save(sess,FLAGS.model_dir+FLAGS.model+str(FLAGS.katt_flag)+transX, global_step=current_step) path = saver.save(sess,FLAGS.model_dir+FLAGS.model+str(FLAGS.katt_flag)+transX+'_pureKB_'+str(pure_KB)+'_epoch'+str(one_epoch)+'_nkb'+str(FLAGS.nbatch_kg)+'_win'+str(FLAGS.win_size)+'_'+str(embedding_size)) # tf.app.flags.DEFINE_string('model_dir','./model/','path to store model') # tf.app.flags.DEFINE_string('checkpoint_path','./model/','path to store model') # saver.restore(sess, FLAGS.checkpoint_path + FLAGS.model+str(FLAGS.katt_flag)+"-"+str(3664*iters)) print ('have savde model to '+path) coord.request_stop() coord = tf.train.Coordinator() threads = [] threads.append(threading.Thread(target=train_kg, args=(coord,))) if not pure_KB: threads.append(threading.Thread(target=train_nn, args=(coord,))) for t in threads: t.start() coord.join(threads)
def main(_): print('reading word embedding') word_vec = np.load(export_path + 'vec.npy') print('reading entity embedding') ent_embedding = np.load(export_path + 'ent_embedding.npy') print('reading relation embedding') rel_embedding = np.load(export_path + 'rel_embedding.npy') print('reading training data') instance_triple = np.load(export_path + 'train_instance_triple.npy') instance_scope = np.load(export_path + 'train_instance_scope.npy') train_len = np.load(export_path + 'train_len.npy') train_label = np.load(export_path + 'train_label.npy') train_word = np.load(export_path + 'train_word.npy') train_pos1 = np.load(export_path + 'train_pos1.npy') train_pos2 = np.load(export_path + 'train_pos2.npy') train_mask = np.load(export_path + 'train_mask.npy') train_head = np.load(export_path + 'train_head.npy') train_tail = np.load(export_path + 'train_tail.npy') train_desc_tail = np.load(export_path + 'train_desc_tail.npy') train_desc_head = np.load(export_path + 'train_desc_head.npy') desc_all = np.load(export_path + 'desc_all.npy') print('reading finished') print('mentions : %d' % (len(instance_triple))) print('sentences : %d' % (len(train_len))) print('relations : %d' % (FLAGS.num_classes)) print('word size : %d' % (len(word_vec[0]))) print('position size : %d' % (FLAGS.pos_size)) print('hidden size : %d' % (FLAGS.hidden_size)) reltot = {} for index, i in enumerate(train_label): if not i in reltot: reltot[i] = 1.0 else: reltot[i] += 1.0 for i in reltot: reltot[i] = 1 / (reltot[i]**(0.05)) print('building network...') i = tf.ConfigProto() i.gpu_options.allow_growth = True sess = tf.Session(config=i) if FLAGS.model.lower() == "cnn": model = network.CNN(is_training=True, word_embeddings=word_vec, ent_embedding=ent_embedding, rel_embedding=rel_embedding) global_step = tf.Variable(0, name='global_step', trainable=False) global_step_kg = tf.Variable(0, name='global_step_kg', trainable=False) tf.summary.scalar('learning_rate', FLAGS.learning_rate) tf.summary.scalar('learning_rate_kg', FLAGS.learning_rate_kg) sess.run(tf.global_variables_initializer()) optimizer = tf.train.GradientDescentOptimizer(FLAGS.learning_rate) grads_and_vars = optimizer.compute_gradients(model.loss) train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step) merged_summary = tf.summary.merge_all() summary_writer = tf.summary.FileWriter(FLAGS.summary_dir, sess.graph) saver = tf.train.Saver(max_to_keep=None) print('building finished') def train_nn(coord): def train_step(head, tail, word, pos1, pos2, mask, leng, label_index, label, scope, weights, head_desc, tail_desc): feed_dict = { model.head_index: head, model.tail_index: tail, model.word: word, model.pos1: pos1, model.pos2: pos2, model.mask: mask, model.len: leng, model.label_index: label_index, model.label: label, model.scope: scope, model.keep_prob: FLAGS.keep_prob, model.weights: weights, model.head_description: head_desc, model.tail_description: tail_desc } _, step, loss, summary, output, correct_predictions = sess.run([ train_op, global_step, model.loss, merged_summary, model.output, model.correct_predictions ], feed_dict) summary_writer.add_summary(summary, step) return output, loss, correct_predictions stack_output = [] stack_label = [] stack_ce_loss = [] train_order = list(range(len(instance_triple))) save_epoch = 2 eval_step = 300 for one_epoch in range(FLAGS.max_epoch): # one_epoch = 0 # while not coord.should_stop(): print('epoch ' + str(one_epoch + 1) + ' starts!') # one_epoch += 1 np.random.shuffle(train_order) s1 = 0.0 s2 = 0.0 tot1 = 0.0 tot2 = 0.0 losstot = 0.0 for i in range(int(len(train_order) / float(FLAGS.batch_size))): ## randomly sample a batch of input scope input_scope = np.take( instance_scope, train_order[i * FLAGS.batch_size:(i + 1) * FLAGS.batch_size], axis=0) index = [] scope = [0] label = [] ## relation label corresponding to each scope weights = [] for num in input_scope: index = index + list(range(num[0], num[1] + 1)) label.append(train_label[num[0]]) if train_label[num[0]] > 53: print(train_label[num[0]]) scope.append(scope[len(scope) - 1] + num[1] - num[0] + 1) weights.append(reltot[train_label[num[0]]]) label_ = np.zeros((FLAGS.batch_size, FLAGS.num_classes)) label_[np.arange(FLAGS.batch_size), label] = 1 output, loss, correct_predictions = train_step( train_head[index], train_tail[index], train_word[index, :], train_pos1[index, :], train_pos2[index, :], train_mask[index, :], train_len[index], train_label[index], label_, np.array(scope), weights, train_desc_head[index], train_desc_tail[index]) num = 0 s = 0 losstot += loss for num in correct_predictions: if label[s] == 0: tot1 += 1.0 if num: s1 += 1.0 else: tot2 += 1.0 if num: s2 += 1.0 s = s + 1 time_str = datetime.datetime.now().isoformat() print( "batch %d step %d time %s | loss : %f, NA accuracy: %f, not NA accuracy: %f" % (one_epoch, i, time_str, loss, s1 / tot1, s2 / tot2)) current_step = tf.train.global_step(sess, global_step) if (one_epoch + 1) % save_epoch == 0: print('epoch ' + str(one_epoch + 1) + ' has finished') print('saving model...') path = saver.save(sess, FLAGS.model_dir + FLAGS.save_name + '/' + FLAGS.model + str(FLAGS.katt_flag), global_step=current_step) print('have savde model to ' + path) coord.request_stop() coord = tf.train.Coordinator() threads = [] threads.append(threading.Thread(target=train_nn, args=(coord, ))) for t in threads: t.start() coord.join(threads)
def main(): parser = argparse.ArgumentParser(description='Chainer example: MNIST') parser.add_argument('--batchsize', '-b', type=int, default=256, help='Number of images in each mini-batch') parser.add_argument('--batchsize2', '-b2', type=int, default=64, help='Number of images in each mini-batch') parser.add_argument('--data_type', '-d', type=str, default='LSHTC1') parser.add_argument('--model_type', '-m', type=str, default='DocModel') parser.add_argument('--model_path', '-mp', type=str, default='./models/ResNet50_model_500.npz') parser.add_argument('--gpu', '-g', type=int, default=-1) parser.add_argument('--cluster', '-c', type=int, default=100) parser.add_argument('--weight_decay', '-w', type=float, default=0.0000) parser.add_argument('--unit', '-u', type=int, default=300) parser.add_argument('--alpha', '-a', type=float, default=0.005) parser.add_argument('--epoch', '-e', type=int, default=10) parser.add_argument('--epoch2', '-e2', type=int, default=10) parser.add_argument('--mu', '-mu', type=float, default=30.0) parser.add_argument('--out', '-o', type=str, default='results') parser.add_argument('--train_file', '-train_f', type=str, default='dataset/LSHTC1/LSHTC1_selected03.train') parser.add_argument('--test_file', '-test_f', type=str, default='dataset/LSHTC1/LSHTC1_selected03.test') parser.add_argument('--train_instance', '-train_i', type=str, default='PDSparse/examples/LSHTC1/LSHTC1.train') parser.add_argument('--train_label', '-train_l', type=str, default='PDSparse/examples/LSHTC1/LSHTC1.train') parser.add_argument('--test_instance', '-test_i', type=str, default='PDSparse/examples/LSHTC1/LSHTC1.train') parser.add_argument('--test_label', '-test_l', type=str, default='PDSparse/examples/LSHTC1/LSHTC1.train') parser.add_argument('--seed', '-s', type=int, default=0) parser.add_argument('--resume', '-r', default='', help='resume the training from snapshot') parser.add_argument('--resume2', '-r2', default='', help='resume the training from snapshot') parser.add_argument('--optimizer', '-op', type=str, default='Adam') parser.add_argument('--optimizer2', '-op2', type=str, default='Adam') parser.add_argument('--initial_lr', type=float, default=0.05) parser.add_argument('--lr_decay_rate', type=float, default=0.5) parser.add_argument('--lr_decay_epoch', type=float, default=25) parser.add_argument('--random', action='store_true', default=False, help='Use random assignment or not') parser.add_argument('--valid', '--v', action='store_true', help='Use random assignment or not') args = parser.parse_args() random.seed(args.seed) np.random.seed(args.seed) gpu = args.gpu data_type = args.data_type model_type = args.model_type num_clusters = args.cluster initial_lr = args.initial_lr lr_decay_rate = args.lr_decay_rate lr_decay_epoch = args.lr_decay_epoch opt1 = args.optimizer opt2 = args.optimizer2 model_path = args.model_path rand_assign = args.random train_file = args.train_file test_file = args.test_file unit = args.unit alpha = args.alpha sparse = False ndim = 1 n_in = None train_transform = None test_transform = None if data_type == 'toy': model = network.LinearModel(2, 2) num_classes = 4 elif data_type == 'mnist': num_classes = 10 if model_type == 'linear': model = network.LinearModel(784, num_clusters) elif model_type == 'DNN': model = network.MLP(1000, num_clusters) elif model_type == 'CNN': ndim = 3 model = network.CNN(num_clusters) else: raise ValueError elif data_type == 'cifar100': num_classes = 100 train_transform = partial(dataset.transform, mean=0.0, std=1.0, train=True) test_transform = partial(dataset.transform, mean=0.0, std=1.0, train=False) if model_type == 'Resnet50': model = network.ResNet50(num_clusters) n_in = 2048 load_npz(model_path, model, not_load_list=['fc7']) elif model_type == 'VGG': model = network.VGG(num_clusters) n_in = 1024 load_npz(model_path, model, not_load_list=['fc6']) else: raise ValueError elif data_type == 'LSHTC1': sparse = True num_classes = None if model_type == 'DocModel': model = network.DocModel(n_in=1024, n_mid=unit, n_out=num_clusters) elif model_type == 'DocModel2': model = network.DocModel2(n_in=1024, n_mid=unit, n_out=num_clusters) elif model_type == 'linear': model = network.LinearModel(n_in=92586, n_out=num_clusters) else: raise ValueError elif data_type == 'Dmoz': sparse = True num_classes = None if model_type == 'DocModel': model = network.DocModel(n_in=561127, n_mid=unit, n_out=num_clusters) elif model_type == 'linear': model = network.LinearModel(n_in=1024, n_out=num_clusters) else: raise ValueError else: num_classes = 10 if model_type == 'Resnet50': model = network.ResNet50(num_clusters) elif model_type == 'Resnet101': model = network.ResNet101(num_clusters) elif model_type == 'VGG': model = network.VGG(num_clusters) elif model_type == 'CNN': model = network.CNN(num_clusters) else: raise ValueError if gpu >= 0: # Make a specified GPU current chainer.backends.cuda.get_device_from_id(gpu).use() model.to_gpu() # Copy the model to the GPU (train_instances, train_labels), (test_instances, test_labels), num_classes \ = load_data(data_type, ndim, train_file, test_file) if rand_assign: assignment, count_classes = random_assignment(num_clusters, num_classes) else: if opt1 == 'Adam': optimizer = chainer.optimizers.Adam(alpha=alpha) else: optimizer = chainer.optimizers.SGD(lr=alpha) optimizer.setup(model) train = Dataset(*(train_instances, train_labels), sparse) test = Dataset(*(test_instances, test_labels), sparse) train_iter = chainer.iterators.SerialIterator( train, batch_size=args.batchsize) train_updater = Updater(model, train, train_iter, optimizer, num_clusters=num_clusters, device=gpu, mu=args.mu) trainer = training.Trainer(train_updater, (args.epoch, 'epoch'), out=args.out) trainer.extend(extensions.LogReport(trigger=(1, 'epoch'))) trainer.extend( extensions.PrintReport([ 'epoch', 'iteration', 'main/loss', 'main/loss_cc', 'main/loss_mut_info', 'main/H_Y', 'main/H_YX', 'elapsed_time' ])) trainer.extend(extensions.snapshot(), trigger=(5, 'epoch')) if args.resume: chainer.serializers.load_npz(args.resume, trainer) trainer.run() """ end clustering """ """ res, ss = check_cluster(model, train, num_classes, num_clusters, device=gpu) res_sum = tuple(0 for _ in range(num_clusters)) for i in range(num_classes): res_sum = tuple(res_sum[j] + res[i][j] for j in range(num_clusters)) print(res, res_sum, ss) """ """ res, ss = check_cluster(model, test, num_classes, num_clusters, device=gpu) res_sum = tuple(0 for _ in range(num_clusters)) for i in range(num_classes): res_sum = tuple(res_sum[j] + res[i][j] for j in range(num_clusters)) """ cluster_label = separate.det_cluster(model, train, num_classes, batchsize=128, device=gpu, sparse=sparse) assignment, count_classes = separate.assign(cluster_label, num_classes, num_clusters) del optimizer del train_iter del train_updater del trainer del train del test print(count_classes) """ start classification """ model = h_net.HierarchicalNetwork(model, num_clusters, count_classes, n_in=n_in) if opt2 == 'Adam': optimizer2 = chainer.optimizers.Adam(alpha=initial_lr) elif opt2 == 'SGD': optimizer2 = chainer.optimizers.SGD(lr=initial_lr) else: optimizer2 = chainer.optimizers.MomentumSGD(lr=initial_lr) optimizer2.setup(model) if args.weight_decay > 0: optimizer2.add_hook(chainer.optimizer.WeightDecay(args.weight_decay)) if gpu >= 0: # Make a specified GPU current chainer.backends.cuda.get_device_from_id(gpu).use() model.to_gpu() # Copy the model to the GPU train = dataset.Dataset(train_instances, train_labels, assignment, _transform=train_transform, sparse=sparse) test = dataset.Dataset(test_instances, test_labels, assignment, _transform=test_transform, sparse=sparse) train_iter = chainer.iterators.SerialIterator(train, batch_size=args.batchsize2) test_iter = chainer.iterators.SerialIterator(test, batch_size=1, repeat=False) train_updater = updater.Updater(model, train, train_iter, optimizer2, num_clusters, device=gpu) trainer = training.Trainer(train_updater, (args.epoch2, 'epoch'), args.out) acc = accuracy.Accuracy(model, assignment, num_clusters) trainer.extend(extensions.Evaluator(test_iter, acc, device=gpu)) """ trainer.extend( extensions.snapshot(filename='snapshot_iter_{.updater.iteration}.npz'), trigger=(20, 'epoch')) """ trainer.extend(extensions.LogReport(trigger=(1, 'epoch'))) trainer.extend( extensions.PrintReport([ 'epoch', 'main/loss', 'main/loss_cluster', 'main/loss_class', 'validation/main/accuracy', 'validation/main/cluster_accuracy', 'validation/main/loss', 'elapsed_time' ])) if opt2 != 'Adam': trainer.extend(extensions.ExponentialShift('lr', lr_decay_rate), trigger=(lr_decay_epoch, 'epoch')) if args.resume2: chainer.serializers.load_npz(args.resume2, trainer) trainer.run()
def main(_): print 'reading word embedding' word_vec = np.load(export_path + 'vec.npy') print 'reading training data' instance_triple = np.load(export_path + 'train_instance_triple.npy') instance_scope = np.load(export_path + 'train_instance_scope.npy') train_len = np.load(export_path + 'train_len.npy') train_label = np.load(export_path + 'train_label.npy') train_word = np.load(export_path + 'train_word.npy') train_pos1 = np.load(export_path + 'train_pos1.npy') train_pos2 = np.load(export_path + 'train_pos2.npy') train_mask = np.load(export_path + 'train_mask.npy') train_head = np.load(export_path + 'train_head.npy') train_tail = np.load(export_path + 'train_tail.npy') print 'reading finished' print 'mentions : %d' % (len(instance_triple)) print 'sentences : %d' % (len(train_len)) print 'relations : %d' % (FLAGS.num_classes) print 'word size : %d' % (len(word_vec[0])) print 'position size : %d' % (FLAGS.pos_size) print 'hidden size : %d' % (FLAGS.hidden_size) reltot = {} for index, i in enumerate(train_label): if not i in reltot: reltot[i] = 1.0 else: reltot[i] += 1.0 for i in reltot: reltot[i] = 1 / (reltot[i]**(0.05)) print 'building network...' sess = tf.Session() if FLAGS.model.lower() == "cnn": model = network.CNN(is_training=True, word_embeddings=word_vec) elif FLAGS.model.lower() == "pcnn": model = network.PCNN(is_training=True, word_embeddings=word_vec) elif FLAGS.model.lower() == "lstm": model = network.RNN(is_training=True, word_embeddings=word_vec, cell_name="LSTM", simple_position=True) elif FLAGS.model.lower() == "gru": model = network.RNN(is_training=True, word_embeddings=word_vec, cell_name="GRU", simple_position=True) elif FLAGS.model.lower() == "bi-lstm" or FLAGS.model.lower() == "bilstm": model = network.BiRNN(is_training=True, word_embeddings=word_vec, cell_name="LSTM", simple_position=True) elif FLAGS.model.lower() == "bi-gru" or FLAGS.model.lower() == "bigru": model = network.BiRNN(is_training=True, word_embeddings=word_vec, cell_name="GRU", simple_position=True) global_step = tf.Variable(0, name='global_step', trainable=False) global_step_kg = tf.Variable(0, name='global_step_kg', trainable=False) tf.summary.scalar('learning_rate', FLAGS.learning_rate) tf.summary.scalar('learning_rate_kg', FLAGS.learning_rate_kg) optimizer = tf.train.GradientDescentOptimizer(FLAGS.learning_rate) grads_and_vars = optimizer.compute_gradients(model.loss) train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step) optimizer_kg = tf.train.GradientDescentOptimizer(FLAGS.learning_rate_kg) grads_and_vars_kg = optimizer_kg.compute_gradients(model.loss_kg) train_op_kg = optimizer_kg.apply_gradients(grads_and_vars_kg, global_step=global_step_kg) merged_summary = tf.summary.merge_all() summary_writer = tf.summary.FileWriter(FLAGS.summary_dir, sess.graph) sess.run(tf.global_variables_initializer()) saver = tf.train.Saver(max_to_keep=None) print("================================================") saver.restore( sess, FLAGS.model_dir + sys.argv[1] + FLAGS.model + str(FLAGS.katt_flag) + "-" + str(itera)) ent_embedding, rel_embedding = sess.run( [model.word_embedding, model.rel_embeddings]) ent_embedding = ent_embedding.tolist() rel_embedding = rel_embedding.tolist() f = open(export_path + "entity2vec", "w") f.write(json.dumps(ent_embedding)) f.close() f = open(export_path + "relation2vec", "w") f.write(json.dumps(rel_embedding)) f.close()