def train(): print("Preparing train and validation data.") d = datautil.prepare_bilstm_data(args.data_path, args.curriculum_learing, reg=0) train_iter = datautil.Itertool(d['train_ids_path'], batch_size=args.batch_size, num_steps=args.seq_len, shuf=True) if args.curriculum_learing: train_cl_iter = datautil.Itertool(d['train_cl_ids_path'], batch_size=args.batch_size, num_steps=args.seq_len, shuf=True) valid_iter = datautil.Itertool(d['dev_ids_path'], batch_size=args.batch_size, num_steps=args.seq_len) test_iter = datautil.Itertool(d['test_ids_path'], batch_size=args.batch_size, num_steps=args.seq_len) term_vocab = d['term_vocab'] label_vocab = d['label_vocab'] id_term_vocab = dict((v, k) for k, v in term_vocab.iteritems()) id_label_vocab = dict((v, k) for k, v in label_vocab.iteritems()) term_emb = datautil.load_embedding_prebuilt(args.data_path + '/' + args.word_emb) if not os.path.exists('tmp/'): os.mkdir('tmp/') if not os.path.exists(args.save_path): os.mkdir(args.save_path) print("Building model.") g = tf.Graph() config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: model = BilstmSeq2Seq(args, args.seq_len, None, d['term_vocab_size'], d['feature_vocab_size'], args.emb_dim, args.hid_dim, d['label_vocab_size'], term_emb) print("Succeed in initializing the bidirectional LSTM model.") print("Begin training timestamp {}".format(time.time())) sys.stdout.flush() checkpoint_path = os.path.join(args.save_path, "model.ckpt") ckpt = tf.train.get_checkpoint_state(checkpoint_path) if ckpt and tf.gfile.Exists(ckpt.model_checkpoint_path): print("Reading model parameters from {}".format( ckpt.model_checkpoint_path)) sess.run(tf.global_variables_initializer()) model.saver.restore(sess, ckpt.model_checkpoint_path) else: print("Created model with fresh parameters.") sess.run(tf.global_variables_initializer()) print("Training model.") total_step_num = 0 best_eval_f1_score = 0.0 for e in range(args.max_epoch): if args.curriculum_learing: outs = [] start_time = time.time() for w, (term, fea, y) in train_cl_iter: start_time = time.time() feed = dict( zip([ model.terms, model.features, model.targets, model.dropout_keep_prob ], [term, fea, y, 0.5])) cost, _ = sess.run([model.cost, model.optimizer], feed) outs.append(cost) step_time = time.time() - start_time print('CL1 phrase Epoch:%d, Time: %.06f, Cost: %0.6f' % (e, step_time, 1.0 * np.average(outs))) outs = [] step_time = 0 for w, (term, fea, y) in train_iter: # model.task_type = "train" start_time = time.time() feed = dict( zip([ model.terms, model.features, model.targets, model.dropout_keep_prob ], [term, fea, y, 0.5])) cost, _ = sess.run([model.cost, model.optimizer], feed) outs.append(cost) total_step_num += 1 step_time += (time.time() - start_time) / args.save_freq if total_step_num % args.report_freq == 0: e_sub = 1.0 * total_step_num * args.batch_size / train_iter.total_size print( 'CL2 phrase Epoch:%f, Step:%d, Step Time: %.06f, Cost: %0.6f' % (e_sub, total_step_num, step_time, 1.0 * np.sum(outs) / args.report_freq)) sys.stdout.flush() outs = [] step_time = 0. if total_step_num % args.valid_freq == 0: # model.task_type = "test" vcost_valid = [] ws = [] predict_tags = [] true_tags = [] for w, (term, fea, y) in valid_iter: feed = dict( zip([ model.terms, model.features, model.targets, model.dropout_keep_prob ], [term, fea, y, 1.0])) out, predict, target = sess.run([ model.test_cost, model.test_predict_labels, model.target_labels ], feed) vcost_valid.append(out) ws.append(term) predict_tags.append(predict) true_tags.append(target) tools.conlleval(predict_tags, true_tags, ws, 'tmp/eval.crf', id_term_vocab, id_label_vocab, args.seq_len) d = tools.get_perf('tmp/eval.crf') print( 'Validation Cost: %0.6f, Precision: %0.6f, Recall: %0.6f, F1-score: %0.6f' % (np.sum(vcost_valid) / len(vcost_valid), d['p'], d['r'], d['f1'])) print('Validation Details') print('\n'.join(d['detail'])) sys.stdout.flush() if d['f1'] >= best_eval_f1_score: best_eval_f1_score = d['f1'] model.saver.save(sess, checkpoint_path, global_step=total_step_num) subprocess.call([ 'cp', 'tmp/eval.crf', 'tmp/eval.crf.%d_best_f1_%.2f' % (total_step_num, best_eval_f1_score) ]) if total_step_num % args.valid_freq == 0: model.task_type = "test" vcost_test = [] ws = [] predict_tags = [] true_tags = [] for w, (term, fea, y) in test_iter: feed = dict( zip([ model.terms, model.features, model.targets, model.dropout_keep_prob ], [term, fea, y, 1.0])) out, predict, target = sess.run([ model.test_cost, model.test_predict_labels, model.target_labels ], feed) vcost_test.append(out) ws.append(term) predict_tags.append(predict) true_tags.append(target) tools.conlleval(predict_tags, true_tags, ws, 'tmp/test.crf', id_term_vocab, id_label_vocab, args.seq_len) d = tools.get_perf('tmp/test.crf') print( 'Test Cost: %0.6f, Precision: %0.6f, Recall: %0.6f, F1-score: %0.6f' % (np.sum(vcost_test) / len(vcost_test), d['p'], d['r'], d['f1'])) print('Test Details') print('\n'.join(d['detail'])) sys.stdout.flush() f = open(args.save_path + '/_SUCCESS', 'w') f.writelines('_SUCCESS') f.close()
def predict(): # fw=open('yzx1026.crf','w+') # seg_list = jieba.cut("请问鼓浪屿在哪?", cut_all=False) # str=" ".join(seg_list) # fw.write('<S>\tO\tO\n') #for i in str.strip().split(): # i=i.encode('utf-8') # fw.write(i+'\tO\tO\t\n') #fw.close() #predict_file="/data/yzx1026.crf" id_path, term_vocab, fea_vocab, label_vocab = datautil.prepare_predict_data( args.data_path, args.predict_file) data_iter = datautil.Itertool(id_path, batch_size=args.batch_size, num_steps=args.seq_len) id_term_vocab = dict((v, k) for k, v in term_vocab.iteritems()) id_label_vocab = dict((v, k) for k, v in label_vocab.iteritems()) config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: model = BilstmSeq2Seq(args, args.seq_len, None, len(term_vocab), len(fea_vocab), args.emb_dim, args.hid_dim, len(label_vocab), pretrained_emb=None) ckpt = tf.train.get_checkpoint_state(args.save_path) print("Reading model parameters from {}".format( ckpt.model_checkpoint_path)) sess.run(tf.global_variables_initializer()) model.saver.restore(sess, ckpt.model_checkpoint_path) fout = open(args.predict_output, 'w') vcost_test = [] ws = [] predict_tags = [] true_tags = [] for w, (term, fea, y) in data_iter: feed = dict( zip([ model.terms, model.features, model.targets, model.dropout_keep_prob ], [term, fea, y, 1.0])) out, predict, target = sess.run([ model.test_cost, model.test_predict_labels, model.target_labels ], feed) vcost_test.append(out) ws.append(term) predict_tags.append(predict) true_tags.append(target) tools.predict_dump(fout, id_term_vocab, id_label_vocab, w, predict, seq_len=args.seq_len, topK=args.top_k) tools.conlleval(predict_tags, true_tags, ws, 'data/predict_.crf', id_term_vocab, id_label_vocab, args.seq_len) d = tools.get_perf('data/predict_.crf') print( 'Predict Cost: %0.6f, Precision: %0.6f, Recall: %0.6f, F1-score: %0.6f' % (np.sum(vcost_test) / len(vcost_test), d['p'], d['r'], d['f1'])) print('Predict Details') print('\n'.join(d['detail'])) sys.stdout.flush() # for w, t in data_iter: # # # term, fea = t[0], t[1] # feed = dict(zip([model.terms, model.features, model.dropout_keep_prob], [term, fea, 1.0])) # predict = sess.run([model.predict], feed) # # predict, predict_topk = sess.run([model.predict, model.predict_topk], feed) fout.close()
def main(): s = { 'nh1': 300, 'nh2': 300, 'win': 3, 'emb_dimension': 300, 'lr': 0.001, 'lr_decay': 0.5, 'max_grad_norm': 5, 'seed': 345, 'nepochs': 50, 'batch_size': 16, 'keep_prob': 1.0, 'check_dir': './checkpoints/GZ_EMNLP2016/semeval_0.001_16', 'display_test_per': 1, 'lr_decay_per': 5 } # load the dataset #data_set_file = 'data/ACL2017/inspec/inspec_t_a_GZ_data_set.pkl' #emb_file = 'data/ACL2017/inspec/inspec_t_a_GZ_embedding.pkl' # data_set_file = 'data/ACL2017/krapivin/krapivin_t_a_allwords_data_set.pkl' # emb_file = 'data/ACL2017/ACL2017_t_a_embedding.pkl' # data_set_file = 'data/ACL2017/krapivin/krapivin_t_a_GZ_data_set.pkl' # emb_file = 'data/ACL2017/krapivin/krapivin_t_a_GZ_embedding.pkl' #data_set_file = 'data/ACL2017/kp20k/kp20k_t_a_allwords_data_set.pkl' #emb_file = 'data/ACL2017/kp20k/ACL2017_t_a_embedding.pkl' data_set_file = 'data/ACL2017/semeval/semeval_t_a_GZ_data_set.pkl' emb_file = 'data/ACL2017/semeval/semeval_t_a_GZ_embedding.pkl' # train_set, test_set, dic, embedding = load.atisfold(data_set_file, emb_file) #data_set_file = 'data/ACL2017/nus/nus_t_a_GZ_data_set.pkl' #emb_file = 'data/ACL2017/nus/nus_t_a_GZ_embedding.pkl' # train_set, test_set, dic, embedding = load.atisfold(data_set_file, emb_file) print('loading dataset.....') train_set, valid_set, test_set, dic, embedding = load.atisfold_ACL2017( data_set_file, emb_file) # idx2label = dict((k,v) for v,k in dic['labels2idx'].items()) # idx2word = dict((k,v) for v,k in dic['words2idx'].items()) # vocab = set(dic['words2idx'].keys()) # vocsize = len(vocab) test_lex, test_y, test_z = test_set # test_lex = test_lex[:1000] # test_y = test_y[:1000] # test_z = test_z[:1000] y_nclasses = 2 z_nclasses = 5 gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.9) config = tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False, allow_soft_placement=True) with tf.Session(config=config) as sess: rnn = model.Model(nh1=s['nh1'], nh2=s['nh2'], ny=y_nclasses, nz=z_nclasses, de=s['emb_dimension'], cs=s['win'], lr=s['lr'], lr_decay=s['lr_decay'], embedding=embedding, max_gradient_norm=s['max_grad_norm'], batch_size=s['batch_size'], model_cell='lstm') checkpoint_dir = s['check_dir'] logfile = open(str(s['check_dir']) + '/predict_log_NEW.txt', 'a', encoding='utf-8') saver = tf.train.Saver(tf.all_variables()) ckpt = tf.train.get_checkpoint_state(checkpoint_dir) if ckpt and ckpt.model_checkpoint_path: print(ckpt.model_checkpoint_path) logfile.write(str(ckpt.model_checkpoint_path) + '\n') saver.restore(sess, ckpt.model_checkpoint_path) def dev_step(cwords): feed = { rnn.input_x: cwords, rnn.keep_prob: s['keep_prob'], # rnn.batch_size:s['batch_size'] } fetches = rnn.sz_pred sz_pred = sess.run(fetches=fetches, feed_dict=feed) return sz_pred predictions_test = [] groundtruth_test = [] start_num = 0 steps = len(test_lex) // s['batch_size'] # for batch in tl.iterate.minibatches(test_lex, test_z, batch_size=s['batch_size']): print('testing............') for step in range(steps): # batch = batch_putin(test_lex, test_z, start_num=start_num, batch_size=s['batch_size']) # x, z = batch x, z = test_batch_putin(test_lex, test_z, start_num=start_num, batch_size=s['batch_size']) x = load.pad_sentences(x) x = tools.contextwin_2(x, s['win']) predictions_test.extend(dev_step(x)) groundtruth_test.extend(z) start_num += s['batch_size'] if step % 100 == 0: print('tested %d batch......' % step) print('dataset: ' + data_set_file) logfile.write('dataset: ' + data_set_file + '\n') print("result:") logfile.write("result:\n") # res_test = tools.conlleval(predictions_test, groundtruth_test) res_test = tools.conlleval(predictions_test, groundtruth_test) print('all: ', res_test) logfile.write('all: ' + str(res_test) + '\n') res_test_top5 = tools.conlleval_top(predictions_test, groundtruth_test, 5) print('top5: ', res_test_top5) logfile.write('top5: ' + str(res_test_top5) + '\n') res_test_top10 = tools.conlleval_top(predictions_test, groundtruth_test, 10) print('top10: ', res_test_top10) logfile.write('top10: ' + str(res_test_top10) + '\n') logfile.write( '-----------------------------------------------------------------------------------------------------------------------' + '\n') logfile.close()
def main(): s = { 'nh1': 450, # 第1层LSTM的隐藏单元数 'nh2': 450, # 第2层LSTM的隐藏单元数 'emb_dimension': 300, # 词向量维度 'lr': 0.0001, # 初始学习率 'lr_decay': 0.5, # 学习率衰减率 'lr_decay_per': 5, # 如果训练5次以后准确率没有上升,则衰减学习率为原来的0.5倍 'nepochs': 50, # 总共迭代50个epoch 'batch_size': 16, # batch_size=16 'keep_prob': 0.5, # drop out 概率 'check_dir': './checkpoints/kp20k_mycps_multisize_CNN_LSTM_attention_Adam_0.0001_16_GPU_goon', # 模型保存地址 'max_grad_norm': 5, # 'seed': 345, # 'display_test_per': 1, # 'load_ckpt_dir': './checkpoints/kp20k_mycps_multisize_CNN_LSTM_attention_Adam_0.0001_16_NEW', 'again_epoch': 7 ############################## } data_set_file = 'data/ACL2017/kp20k/kp20k_t_a_allwords_data_set.pkl' emb_file = 'data/ACL2017/ACL2017_t_a_embedding.pkl' # train_set, test_set, dic, embedding = load.atisfold(data_set_file, emb_file) print('loading dataset.....') train_set, valid_set, test_set, dic, embedding = load.atisfold_ACL2017( data_set_file, emb_file) train_lex, train_y, train_z = train_set # train_lex: [[每条tweet的word的idx],[每条tweet的word的idx]], train_y: [[关键词的位置为1]], train_z: [[关键词的位置为0~4(开头、结尾...)]] # tr = int(len(train_lex) * 0.9) # valid_lex, valid_y, valid_z = train_lex[tr:], train_y[tr:], train_z[tr:] ################ # train_lex, train_y, train_z = train_lex[:tr], train_y[:tr], train_z[:tr] valid_lex, valid_y, valid_z = valid_set test_lex, test_y, test_z = test_set log_dir = s['check_dir'] if not os.path.exists(log_dir): os.mkdir(log_dir) logfile = open(str(s['check_dir']) + '/log.txt', 'a', encoding='utf-8') print('len(train_data) {}'.format(len(train_lex))) print('len(valid_data) {}'.format(len(valid_lex))) print('len(test_data) {}'.format(len(test_lex))) logfile.write('len(train_data) {}\n'.format(len(train_lex))) logfile.write('len(valid_data) {}\n'.format(len(valid_lex))) logfile.write('len(test_data) {}\n'.format(len(test_lex))) vocab = set(dic['words2idx'].keys()) vocsize = len(vocab) print('len(vocab) {}'.format(vocsize)) print("Train started!") logfile.write('len(vocab) {}\n'.format(vocsize)) logfile.write("Train started!\n") y_nclasses = 2 z_nclasses = 5 nsentences = len(train_lex) gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=1.0) config = tf.ConfigProto( gpu_options=gpu_options, log_device_placement=True, allow_soft_placement=True) ########################################### with tf.compat.v1.Session( config=config) as sess: ##################################### my_model = mymodel.myModel(nh1=s['nh1'], nh2=s['nh2'], ny=y_nclasses, nz=z_nclasses, de=s['emb_dimension'], lr=s['lr'], lr_decay=s['lr_decay'], embedding=embedding, max_gradient_norm=s['max_grad_norm'], batch_size=s['batch_size'], rnn_model_cell='lstm') # 保存模型 checkpoint_dir = s['check_dir'] if not os.path.exists(checkpoint_dir): os.mkdir(checkpoint_dir) checkpoint_prefix = os.path.join(checkpoint_dir, 'model') #重新加载模型 load_ckp_dir = s['load_ckpt_dir'] loader = tf.train.Saver(tf.all_variables()) ckpt = tf.train.get_checkpoint_state(load_ckp_dir) if ckpt and ckpt.model_checkpoint_path: # print(ckpt.all_model_checkpoint_paths[4]) print(ckpt.model_checkpoint_path) logfile.write('loading ' + str(ckpt.model_checkpoint_path) + '......\n') loader.restore(sess, ckpt.model_checkpoint_path) def train_step(cwords, label_y, label_z): feed = { my_model.cnn_input_x: cwords, my_model.rnn_input_y: label_y, my_model.rnn_input_z: label_z, my_model.keep_prob: s['keep_prob'] } fetches = [my_model.loss, my_model.train_op] loss, _ = sess.run(fetches=fetches, feed_dict=feed) return loss def dev_step(cwords): feed = {my_model.cnn_input_x: cwords, my_model.keep_prob: 1.0} fetches = my_model.sz_pred sz_pred = sess.run(fetches=fetches, feed_dict=feed) return sz_pred saver = tf.train.Saver(tf.all_variables(), max_to_keep=3) # sess.run(tf.global_variables_initializer()) best_f = 0.32468199323141556 ################################################ best_e = 6 decay_e = 6 test_best_f = 0.3285848062741426 ############################################### test_best_e = 6 best_res = None test_best_res = None for e in range( s['again_epoch'], s['nepochs'] ): ################################################################ tools.shuffle([train_lex, train_y, train_z], s['seed']) t_start = time.time() start_num = 0 steps = len(train_lex) // s['batch_size'] for step in range( steps): ################################################## input_x, label_y, label_z = train_batch_putin( train_lex, train_y, train_z, start_num=start_num, batch_size=s['batch_size']) # input_x, label_y, label_z = batch # label_y, label_z = list(zip(*target)) input_x = load.pad_sentences(input_x) label_y = load.pad_sentences(label_y) label_z = load.pad_sentences(label_z) loss = train_step(input_x, label_y, label_z) start_num += s['batch_size'] print( 'loss %.6f' % loss, ' [learning] epoch %i>> %2.2f%%' % (e, s['batch_size'] * step * 100. / nsentences), 'completed in %.2f (sec) <<\r' % (time.time() - t_start)) if step % 1000 == 0: logfile.write('loss %.6f' % loss) logfile.write( ' [learning] epoch %i>> %2.2f%%' % (e, s['batch_size'] * step * 100. / nsentences)) logfile.write('completed in %.2f (sec) <<\n' % (time.time() - t_start)) # VALID if e >= 0: print('Validing..............') predictions_valid = [] predictions_test = [] groundtruth_valid = [] groundtruth_test = [] start_num = 0 steps = len(valid_lex) // s['batch_size'] for step in range(steps): x, z = test_batch_putin(valid_lex, valid_z, start_num=start_num, batch_size=s['batch_size']) # x, z = batch x = load.pad_sentences(x) predictions_valid.extend(dev_step(x)) groundtruth_valid.extend(z) start_num += s['batch_size'] res_valid = tools.conlleval(predictions_valid, groundtruth_valid) del predictions_valid del groundtruth_valid if res_valid['f'] > best_f: best_f = res_valid['f'] best_e = e decay_e = e best_res = res_valid print('\nVALID new best:', res_valid) logfile.write('\nVALID new best: ' + str(res_valid)) path = saver.save(sess=sess, save_path=checkpoint_prefix, global_step=e) print("Save model checkpoint to {}".format(path)) logfile.write( "\nSave model checkpoint to {}\n".format(path)) else: print('\nVALID new curr:', res_valid) logfile.write('\nVALID new curr: ' + str(res_valid)) # TEST print('Testing..............') start_num = 0 steps = len(test_lex) // s['batch_size'] if e % s['display_test_per'] == 0: for step in range(steps): x, z = test_batch_putin(test_lex, test_z, start_num=start_num, batch_size=s['batch_size']) # x, z = batch x = load.pad_sentences(x) predictions_test.extend(dev_step(x)) groundtruth_test.extend(z) start_num += s['batch_size'] res_test = tools.conlleval(predictions_test, groundtruth_test) del predictions_test del groundtruth_test if res_test['f'] > test_best_f: test_best_f = res_test['f'] test_best_e = e test_best_res = res_test print('TEST new best:', res_test) logfile.write('\nTEST new best: ' + str(res_test) + '\n') else: print('TEST new curr:', res_test) logfile.write('\nTEST new curr: ' + str(res_test) + '\n') # learning rate decay if no improvement in 10 epochs if e - best_e > s['lr_decay_per'] and e - decay_e > s[ 'lr_decay_per']: sess.run(fetches=my_model.learning_rate_decay_op) decay_e = e lr = sess.run(fetches=my_model.lr) print('learning rate:%f' % lr) logfile.write('\nlearning rate:%f\n' % lr) if lr < 1e-6: break print("Train finished!") print('Valid Best Result: epoch %d: ' % (best_e), best_res) print('Test Best Result: epoch %d: ' % (test_best_e), test_best_res) logfile.write("Train finished!\n") logfile.write('Valid Best Result: epoch %d: ' % (best_e) + str(best_res)) logfile.write('\nTest Best Result: epoch %d: ' % (test_best_e) + str(test_best_res)) logfile.close()
def main(): s = { 'nh1': 450, 'nh2': 450, 'win': 3, 'emb_dimension': 300, 'lr': 0.0001, 'lr_decay': 0.5, # 'max_grad_norm': 5, # 'seed': 345, # 'nepochs': 50, 'batch_size': 16, 'keep_prob': 1.0, 'check_dir': './checkpoints/GZ_mycps_Adam_0.0001_16/kp20k', 'display_test_per': 1, # 'lr_decay_per': 5 # } # load the dataset # data_set_file = 'CNTN/data/inspec_wo_stem/data_set.pkl' # emb_file = 'CNTN/data/inspec_wo_stem/embedding.pkl' # data_set_file = 'data/ACL2017/krapivin/krapivin_t_a_GZ_data_set.pkl' # emb_file = 'data/ACL2017/krapivin/krapivin_t_a_GZ_embedding.pkl' data_set_file = 'data/ACL2017/kp20k/kp20k_t_a_data_set.pkl' emb_file = 'data/ACL2017/kp20k/kp20k_t_a_embedding.pkl' # train_set, test_set, dic, embedding = load.atisfold(data_set_file, emb_file) print('loading dataset.....') train_set, valid_set, test_set, dic, embedding = load.atisfold_ACL2017(data_set_file, emb_file) test_lex, test_y, test_z = test_set y_nclasses = 2 z_nclasses = 5 gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=1.0) config = tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False, allow_soft_placement=True) ########################################### with tf.Session(config=config) as sess: my_model = mymodel.myModel( nh1=s['nh1'], nh2=s['nh2'], ny=y_nclasses, nz=z_nclasses, de=s['emb_dimension'], lr=s['lr'], lr_decay=s['lr_decay'], embedding=embedding, max_gradient_norm=s['max_grad_norm'], batch_size=s['batch_size'], rnn_model_cell='lstm' ) checkpoint_dir = s['check_dir'] logfile = open(str(s['check_dir']) + '/predict_log_NEW.txt', 'a', encoding='utf-8') saver = tf.train.Saver(tf.all_variables()) ckpt = tf.train.get_checkpoint_state(checkpoint_dir) if ckpt and ckpt.model_checkpoint_path: # print(ckpt.all_model_checkpoint_paths[4]) print(ckpt.model_checkpoint_path) logfile.write(str(ckpt.model_checkpoint_path) + '\n') saver.restore(sess, ckpt.model_checkpoint_path) def dev_step(cwords): feed = { my_model.cnn_input_x: cwords, my_model.keep_prob: s['keep_prob'] } fetches = my_model.sz_pred sz_pred = sess.run(fetches=fetches, feed_dict=feed) return sz_pred predictions_test = [] groundtruth_test = [] start_num = 0 steps = len(test_lex) // s['batch_size'] # for batch in tl.iterate.minibatches(test_lex, test_z, batch_size=s['batch_size']): print('testing............') for step in range(steps): # batch = batch_putin(test_lex, test_z, start_num=start_num, batch_size=s['batch_size']) x, z = test_batch_putin(test_lex, test_z, start_num=start_num, batch_size=s['batch_size']) # x, z = batch x = load.pad_sentences(x) # x = tools.contextwin_2(x, s['win']) predictions_test.extend(dev_step(x)) groundtruth_test.extend(z) start_num += s['batch_size'] if step % 100 == 0: print('tested %d batch......' % (step//100)) print('dataset: ' + data_set_file) logfile.write('dataset: ' + data_set_file + '\n') print("测试结果:") logfile.write("测试结果:\n") res_test = tools.conlleval(predictions_test, groundtruth_test) print('all: ', res_test) logfile.write('all: ' + str(res_test) + '\n') res_test_top5 = tools.conlleval_top(predictions_test, groundtruth_test, 5) print('top5: ', res_test_top5) logfile.write('top5: ' + str(res_test_top5) + '\n') res_test_top10 = tools.conlleval_top(predictions_test, groundtruth_test, 10) print('top10: ', res_test_top10) logfile.write('top10: ' + str(res_test_top10) + '\n') logfile.write('-----------------------------------------------------------------------------------------------------------------------' + '\n') logfile.close()
def main(): s = { 'nh1': 300, 'nh2': 300, 'win': 3, 'emb_dimension': 300, 'lr': 0.1, 'lr_decay': 0.5, 'max_grad_norm': 5, 'seed': 345, 'nepochs': 150, 'batch_size': 16, 'keep_prob': 0.5, 'check_dir': './checkpoints', 'display_test_per': 3, 'lr_decay_per': 10 } train_set, test_set, dic, embedding = load.atisfold() idx2label = dict((k, v) for v, k in dic['labels2idx'].items()) idx2word = dict((k, v) for v, k in dic['words2idx'].items()) train_lex, train_y, train_z = train_set tr = int(len(train_lex) * 0.9) valid_lex, valid_y, valid_z = train_lex[tr:], train_y[tr:], train_z[tr:] train_lex, train_y, train_z = train_lex[:tr], train_y[:tr], train_z[:tr] test_lex, test_y, test_z = test_set print('len(train_data) {}'.format(len(train_lex))) print('len(valid_data) {}'.format(len(valid_lex))) print('len(test_data) {}'.format(len(test_lex))) vocab = set(dic['words2idx'].keys()) vocsize = len(vocab) print('len(vocab) {}'.format(vocsize)) print("Train started!") y_nclasses = 2 z_nclasses = 5 nsentences = len(train_lex) # tf.reset_default_graph() print('#' * 30) with tf.Session() as sess: rnn = model.Model(nh1=s['nh1'], nh2=s['nh2'], ny=y_nclasses, nz=z_nclasses, de=s['emb_dimension'], cs=s['win'], lr=s['lr'], lr_decay=s['lr_decay'], embedding=embedding, max_gradient_norm=s['max_grad_norm'], model_cell='lstm') checkpoint_dir = s['check_dir'] if not os.path.exists(checkpoint_dir): os.mkdir(checkpoint_dir) checkpoint_prefix = os.path.join(checkpoint_dir, 'model') print("*" * 30) def train_step(cwords, label_y, label_z): feed = { rnn.input_x: cwords, rnn.input_y: label_y, rnn.input_z: label_z, rnn.keep_prob: s['keep_prob'], rnn.batch_size: s['batch_size'] } fetches = [rnn.loss, rnn.train_op] loss, _ = sess.run(fetches=fetches, feed_dict=feed) return loss def dev_step(cwords): feed = { rnn.input_x: cwords, rnn.keep_prob: 1.0, rnn.batch_size: s['batch_size'] } fetches = rnn.sz_pred sz_pred = sess.run(fetches=fetches, feed_dict=feed) return sz_pred saver = tf.train.Saver(tf.all_variables()) sess.run(tf.initialize_all_variables()) best_f = -1 best_e = 0 test_best_f = -1 test_best_e = 0 best_res = None test_best_res = None for e in range(s['nepochs']): tools.shuffle([train_lex, train_y, train_z], s['seed']) t_start = time.time() for step, batch in enumerate( tl.iterate.minibatches(train_lex, list(zip(train_y, train_z)), batch_size=s['batch_size'])): input_x, target = batch label_y, label_z = zip(*target) input_x = load.pad_sentences(input_x) label_y = load.pad_sentences(label_y) label_z = load.pad_sentences(label_z) # print(type(input_x)) # print(type(s['win'])) # print(input_x) # print(s['win']) cwords = tools.contextwin_2((input_x), s['win']) #print(s['batch_size']) loss = train_step(cwords, label_y, label_z) print( 'loss %.2f' % loss, ' [learning] epoch %i>> %2.2f%%' % (e, s['batch_size'] * step * 100. / nsentences), 'completed in %.2f (sec) <<\r' % (time.time() - t_start), ) sys.stdout.flush() #VALID predictions_valid = [] predictions_test = [] groundtruth_valid = [] groundtruth_test = [] for batch in tl.iterate.minibatches(valid_lex, valid_z, batch_size=s['batch_size']): x, z = batch x = load.pad_sentences(x) x = tools.contextwin_2(x, s['win']) predictions_valid.extend(dev_step(x)) groundtruth_valid.extend(z) res_valid = tools.conlleval(predictions_valid, groundtruth_valid, '') if res_valid['f'] > best_f: best_f = res_valid['f'] best_e = e best_res = res_valid print('\nVALID new best:', res_valid) path = saver.save(sess=sess, save_path=checkpoint_prefix, global_step=e) print("Save model checkpoint to {}".format(path)) else: print('\nVALID new curr:', res_valid) #TEST if e % s['display_test_per'] == 0: for batch in tl.iterate.minibatches( test_lex, test_z, batch_size=s['batch_size']): x, z = batch x = load.pad_sentences(x) x = tools.contextwin_2(x, s['win']) predictions_test.extend(dev_step(x)) groundtruth_test.extend(z) res_test = tools.conlleval(predictions_test, groundtruth_test, '') if res_test['f'] > test_best_f: test_best_f = res_test['f'] test_best_e = e test_best_res = res_test print('TEST new best:', res_test) else: print('TEST new curr:', res_test) # learning rate decay if no improvement in 10 epochs if e - best_e > s['lr_decay_per']: sess.run(fetches=rnn.learning_rate_decay_op) lr = sess.run(fetches=rnn.lr) print('learning rate:%f' % lr) if lr < 1e-5: break print() print("Train finished!") print('Valid Best Result: epoch %d: ' % (best_e), best_res) print('Test Best Result: epoch %d: ' % (test_best_e), test_best_res)
def main(): s = { 'nh1': 300, 'nh2': 300, 'win': 3, 'emb_dimension': 300, 'lr': 0.01, 'lr_decay': 0.5, # 'max_grad_norm': 5, # 'seed': 345, # 'nepochs': 50, 'batch_size': 16, 'keep_prob': 0.5, 'check_dir': './checkpoints/GZ_EMNLP2016/kp20k_0.01_16', 'display_test_per': 1, # 'lr_decay_per': 5 # } # data_set_file = 'data/ACL2017/inspec/inspec_t_a_GZ_data_set.pkl' # emb_file = 'data/ACL2017/inspec/inspec_t_a_GZ_embedding.pkl' # data_set_file = 'data/ACL2017/semeval/semeval_t_a_GZ_data_set.pkl' #emb_file = 'data/ACL2017/semeval/semeval_t_a_GZ_embedding.pkl' data_set_file = 'data/ACL2017/kp20k/kp20k_t_a_allwords_data_set.pkl' emb_file = 'data/ACL2017/kp20k/ACL2017_t_a_embedding.pkl' #data_set_file = 'data/ACL2017/inspec/inspec_t_a_GZ_data_set.pkl' #emb_file = 'data/ACL2017/inspec/inspec_t_a_GZ_embedding.pkl' # data_set_file = 'data/ACL2017/semeval/semeval_t_a_GZ_data_set.pkl' # emb_file = 'data/ACL2017/semeval/semeval_t_a_GZ_embedding.pkl' #data_set_file = 'data/ACL2017/nus/nus_t_a_GZ_data_set.pkl' #emb_file = 'data/ACL2017/nus/nus_t_a_GZ_embedding.pkl' #data_set_file = 'data/ACL2017/krapivin/krapivin_t_a_GZ_data_set.pkl' #emb_file = 'data/ACL2017/krapivin/krapivin_t_a_GZ_embedding.pkl' print('loading dataset.....') # train_set,test_set,dic,embedding = load.atisfold(data_set_file, emb_file) train_set, valid_set, test_set, dic, embedding = load.atisfold_ACL2017( data_set_file, emb_file) # idx2label = dict((k,v) for v,k in dic['labels2idx'].iteritems()) # idx2word = dict((k,v) for v,k in dic['words2idx'].iteritems()) train_lex, train_y, train_z = train_set # train_lex: [[每条tweet的word的idx],[每条tweet的word的idx]], train_y: [[关键词的位置为1]], train_z: [[关键词的位置为0~4(开头、结尾...)]] # tr = int(len(train_lex)*0.9) # valid_lex, valid_y, valid_z = train_lex[tr:], train_y[tr:], train_z[tr:] # train_lex, train_y, train_z = train_lex[:tr], train_y[:tr], train_z[:tr] # test_lex, test_y, test_z = test_set valid_lex, valid_y, valid_z = valid_set test_lex, test_y, test_z = test_set log_dir = s['check_dir'] if not os.path.exists(log_dir): os.mkdir(log_dir) logfile = open(str(s['check_dir']) + '/log.txt', 'a', encoding='utf-8', buffering=1) print('len(train_data) {}'.format(len(train_lex))) print('len(valid_data) {}'.format(len(valid_lex))) print('len(test_data) {}'.format(len(test_lex))) logfile.write('len(train_data) {}\n'.format(len(train_lex))) logfile.write('len(valid_data) {}\n'.format(len(valid_lex))) logfile.write('len(test_data) {}\n'.format(len(test_lex))) vocab = set(dic['words2idx'].keys()) vocsize = len(vocab) print('len(vocab) {}'.format(vocsize)) print("Train started!") logfile.write('len(vocab) {}\n'.format(vocsize)) logfile.write("Train started!\n") y_nclasses = 2 z_nclasses = 5 nsentences = len(train_lex) gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=1.0) config = tf.ConfigProto( gpu_options=gpu_options, log_device_placement=False, allow_soft_placement=True) ########################################### with tf.compat.v1.Session( config=config) as sess: ##################################### rnn = model.Model(nh1=s['nh1'], nh2=s['nh2'], ny=y_nclasses, nz=z_nclasses, de=s['emb_dimension'], cs=s['win'], lr=s['lr'], lr_decay=s['lr_decay'], embedding=embedding, max_gradient_norm=s['max_grad_norm'], batch_size=s['batch_size'], model_cell='lstm') # my_model = mymodel.myModel( # # nh1=s['nh1'], # # nh2=s['nh2'], # # ny=y_nclasses, # # nz=z_nclasses, # de=s['emb_dimension'], # lr=s['lr'], # lr_decay=s['lr_decay'], # embedding=embedding, # max_gradient_norm=s['max_grad_norm'], # keep_prob=s['keep_prob'], # model_cell='lstm' # ) # 保存模型 checkpoint_dir = s['check_dir'] if not os.path.exists(checkpoint_dir): os.mkdir(checkpoint_dir) checkpoint_prefix = os.path.join(checkpoint_dir, 'model') def train_step(cwords, label_y, label_z): feed = { rnn.input_x: cwords, rnn.input_y: label_y, rnn.input_z: label_z, rnn.keep_prob: s['keep_prob'] # rnn.batch_size:s['batch_size'] } fetches = [rnn.loss, rnn.train_op] loss, _ = sess.run(fetches=fetches, feed_dict=feed) # _,Loss = sess.run(fetches=fetches, feed_dict=feed) return loss def dev_step(cwords): feed = { rnn.input_x: cwords, rnn.keep_prob: 1.0 # rnn.keep_prob:1.0, # rnn.batch_size:s['batch_size'] } fetches = rnn.sz_pred sz_pred = sess.run(fetches=fetches, feed_dict=feed) return sz_pred saver = tf.train.Saver(tf.all_variables(), max_to_keep=2) sess.run(tf.global_variables_initializer()) best_f = -1 best_e = 0 test_best_f = -1 test_best_e = 0 best_res = None test_best_res = None for e in range(s['nepochs']): tools.shuffle([train_lex, train_y, train_z], s['seed']) t_start = time.time() start_num = 0 # for step,batch in enumerate(tl.iterate.minibatches(train_lex,list(zip(train_y,train_z)),batch_size=s['batch_size'])): # for step, batch in enumerate(batch_putin(train_lex, list(zip(train_y, train_z)), start_num=start_num, batch_size=s['batch_size'])): steps = len(train_lex) // s['batch_size'] for step in range(steps): # batch = batch_putin(train_lex,list(zip(train_y, train_z)), start_num=start_num, batch_size=s['batch_size']) # input_x,target=batch # label_y,label_z=list(zip(*target)) input_x, label_y, label_z = train_batch_putin( train_lex, train_y, train_z, start_num=start_num, batch_size=s['batch_size']) input_x = load.pad_sentences(input_x) label_y = load.pad_sentences(label_y) label_z = load.pad_sentences(label_z) cwords = tools.contextwin_2(input_x, s['win']) # cwords = input_x loss = train_step(cwords, label_y, label_z) start_num += s['batch_size'] print( 'loss %.6f' % loss, ' [learning] epoch %i>> %2.2f%%' % (e, s['batch_size'] * step * 100. / nsentences), 'completed in %.2f (sec) <<\r' % (time.time() - t_start)) if step % 1000 == 0: logfile.write('loss %.6f' % loss) logfile.write( ' [learning] epoch %i>> %2.2f%%' % (e, s['batch_size'] * step * 100. / nsentences)) logfile.write('completed in %.2f (sec) <<\n' % (time.time() - t_start)) # sys.stdout.flush()) #VALID if e >= 0: print('Validing..............') predictions_valid = [] predictions_test = [] groundtruth_valid = [] groundtruth_test = [] start_num = 0 steps = len(valid_lex) // s['batch_size'] # for batch in tl.iterate.minibatches(valid_lex,valid_z,batch_size=s['batch_size']): for step in range(steps): # batch = batch_putin(valid_lex, valid_z, start_num=start_num, batch_size=s['batch_size']) # x,z=batch x, z = test_batch_putin(valid_lex, valid_z, start_num=start_num, batch_size=s['batch_size']) x = load.pad_sentences(x) x = tools.contextwin_2(x, s['win']) predictions_valid.extend(dev_step(x)) groundtruth_valid.extend(z) start_num += s['batch_size'] res_valid = tools.conlleval(predictions_valid, groundtruth_valid) del predictions_valid del groundtruth_valid if res_valid['f'] > best_f: best_f = res_valid['f'] best_e = e best_res = res_valid print('\nVALID new best:', res_valid) logfile.write('\nVALID new best: ' + str(res_valid)) path = saver.save(sess=sess, save_path=checkpoint_prefix, global_step=e) print("Save model checkpoint to {}".format(path)) logfile.write( "\nSave model checkpoint to {}\n".format(path)) else: print('\nVALID new curr:', res_valid) logfile.write('\nVALID new curr: ' + str(res_valid)) #TEST print('Testing..............') start_num = 0 steps = len(test_lex) // s['batch_size'] if e % s['display_test_per'] == 0: # for batch in tl.iterate.minibatches(test_lex, test_z, batch_size=s['batch_size']): for step in range(steps): # batch = batch_putin(test_lex, test_z, start_num=start_num, batch_size=s['batch_size']) # x,z = batch x, z = test_batch_putin(test_lex, test_z, start_num=start_num, batch_size=s['batch_size']) x = load.pad_sentences(x) x = tools.contextwin_2(x, s['win']) predictions_test.extend(dev_step(x)) groundtruth_test.extend(z) start_num += s['batch_size'] res_test = tools.conlleval(predictions_test, groundtruth_test) if res_test['f'] > test_best_f: test_best_f = res_test['f'] test_best_e = e test_best_res = res_test print('TEST new best:', res_test) logfile.write('\nTEST new best: ' + str(res_test)) else: print('TEST new curr:', res_test) logfile.write('\nTEST new curr: ' + str(res_test)) # learning rate decay if no improvement in 10 epochs if e - best_e > s['lr_decay_per']: sess.run(fetches=rnn.learning_rate_decay_op) lr = sess.run(fetches=rnn.lr) print('learning rate:%f' % lr) logfile.write('\nlearning rate:%f\n' % lr) if lr < 1e-6: break print("Train finished!") print('Valid Best Result: epoch %d: ' % (best_e), best_res) print('Test Best Result: epoch %d: ' % (test_best_e), test_best_res) logfile.write("Train finished!\n") logfile.write('Valid Best Result: epoch %d: ' % (best_e) + str(best_res)) logfile.write('\nTest Best Result: epoch %d: ' % (test_best_e) + str(test_best_res)) logfile.close()
def main(): if not os.path.exists(args.data_path): os.mkdir(args.data_path) d = datautil.prepare_bilstm_data(cf.data_path, max_vocab_size=100000, reg=1) valid_iter = datautil.Itertool(args.input_path, batch_size=cf.batch_size, seq_len=cf.seq_len) tag_id_to_labels = datautil.gen_label_map(d['label_vocab_path']) char_id_to_chars = datautil.gen_word_map(d['char_vocab_path']) term_emb = datautil.load_embedding_prebuilt(cf.data_path + '/' + cf.word_emb) with tf.Session() as sess: model = BilstmSeq2Seq(cf, cf.seq_len, d['char_vocab_size'], d['term_vocab_size'], d['feature_vocab_size'], cf.emb_dim, cf.hid_dim, d['label_vocab_size'], term_emb) print("Succeed in initializing the bidirectional LSTM model.") print("Begin training timestamp {}".format(time.time())) sys.stdout.flush() ckpt = tf.train.get_checkpoint_state(cf.save_path) if ckpt: print("Reading model parameters from {}".format( ckpt.model_checkpoint_path)) model.saver = tf.train.import_meta_graph( ckpt.model_checkpoint_path + '.meta') model.saver.restore(sess, ckpt.model_checkpoint_path) else: raise ValueError("checkpoint not found.") vcost_valid = [] ws = [] predict_tags = [] true_tags = [] for w, c, fea, term, y in valid_iter: feed = dict( zip([ model.chars, model.features, model.terms, model.targets, model.dropout_keep_prob ], [c, fea, term, y, 1.0])) out, predict, target = sess.run( [model.cost, model.predict_labels, model.target_labels], feed) vcost_valid.append(out) ws.append(c) predict_tags.append(predict) true_tags.append(target) tools.conlleval(predict_tags, true_tags, ws, 'tmp/eval.crf', char_id_to_chars, tag_id_to_labels, cf.seq_len) d = tools.get_perf('tmp/eval.crf') print( 'Validation Cost: %0.6f, Precision: %0.6f, Recall: %0.6f, F1-score: %0.6f' % (np.sum(vcost_valid) / len(vcost_valid), d['p'], d['r'], d['f1'])) print('Validation Details') print('\n'.join(d['detail'])) sys.stdout.flush()
def main(): s = { 'nh1': 300, 'nh2': 300, 'win': 3, 'emb_dimension': 300, 'lr': 0.1, 'lr_decay': 0.5, 'max_grad_norm': 5, 'seed': 345, 'nepochs': 50, 'batch_size': 16, 'keep_prob': 1.0, 'check_dir': './checkpoints', 'display_test_per': 5, 'lr_decay_per': 10 } # load the dataset train_set, test_set, dic, embedding = load.atisfold() idx2label = dict((k, v) for v, k in dic['labels2idx'].iteritems()) idx2word = dict((k, v) for v, k in dic['words2idx'].iteritems()) vocab = set(dic['words2idx'].keys()) vocsize = len(vocab) test_lex, test_y, test_z = test_set[0:1000] y_nclasses = 2 z_nclasses = 5 with tf.Session() as sess: rnn = model.Model(nh1=s['nh1'], nh2=s['nh2'], ny=y_nclasses, nz=z_nclasses, de=s['emb_dimension'], cs=s['win'], lr=s['lr'], lr_decay=s['lr_decay'], embedding=embedding, max_gradient_norm=s['max_grad_norm'], model_cell='lstm') checkpoint_dir = s['check_dir'] saver = tf.train.Saver(tf.all_variables()) ckpt = tf.train.get_checkpoint_state(checkpoint_dir) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) def dev_step(cwords): feed = { rnn.input_x: cwords, rnn.keep_prob: 1.0, rnn.batch_size: s['batch_size'] } fetches = rnn.sz_pred sz_pred = sess.run(fetches=fetches, feed_dict=feed) return sz_pred print("测试结果�") predictions_test = [] groundtruth_test = [] for batch in tl.iterate.minibatches(test_lex, test_z, batch_size=s['batch_size']): x, z = batch x = load.pad_sentences(x) x = tools.contextwin_2(x, s['win']) predictions_test.extend(dev_step(x)) groundtruth_test.extend(z) res_test = tools.conlleval(predictions_test, groundtruth_test, '') print res_test