def predict(): if FLAGS.model_path == None: raise 'Model path is None!' if FLAGS.dataset == 'pku': config.hidden_dim = 64 word2id = get_word2id(train_data_path, bigram_words=bigram_words_path, min_bw_frequence=FLAGS.min_bg_freq) id2word = build_reverse_dictionary(word2id) x_test, y_test = get_train_data(test_data_path, word2id) init_embedding = None print 'test_data_path: %s' % test_data_path print 'bigram_words_path: %s' % bigram_words_path print 'model_path: %s' % FLAGS.model_path print 'min_bg_freq: %d' % FLAGS.min_bg_freq with tf.Session() as sess: model = models.BaselineModel(vocab_size=len(word2id), word_dim=config.word_dim, hidden_dim=config.hidden_dim, pad_word=word2id[PAD], init_embedding=init_embedding, num_classes=config.num_classes, clip=config.clip, lr=config.lr, l2_reg_lamda=config.l2_reg_lamda, num_layers=config.num_layers, rnn_cell=config.rnn_cell, bi_direction=config.bi_direction) checkpoints_model = os.path.join('checkpoints', FLAGS.model_path) saver = tf.train.Saver(tf.all_variables()) ckpt = tf.train.get_checkpoint_state(checkpoints_model) if ckpt and ckpt.model_checkpoint_path: print 'test_start!' saver.restore(sess, ckpt.model_checkpoint_path) else: print '没有训练好的模型' exit() test_pred = [] for i in range(0, len(x_test), config.batch_size): input_x = x_test[slice(i, i + config.batch_size)] input_x = padding3(input_x, word2id[PAD]) y = y_test[slice(i, i + config.batch_size)] y = padding(y, 3) predict = model.predict_step(sess, input_x) test_pred += predict P, R, F = evaluate_word_PRF(test_pred, y_test) print '%s: P:%f R:%f F:%f' % (FLAGS.model_path, P, R, F) print '------------------------------------------'
load_prev_model = args.load_prev_model if args.model == 'simple_baseline': if load_prev_model: model = models.SimpleBaselineModel(args).cuda() checkpoint = torch.load(args.resume) model.load_state_dict(checkpoint) else: model = models.SimpleBaselineModel(args) model.cuda() # load model to gpu if args.model == 'baseline': if load_prev_model: model = models.BaselineModel(args).cuda() checkpoint = torch.load(args.resume) model.load_state_dict(checkpoint) else: model = models.BaselineModel(args) model.cuda() # load model to gpu if args.model == 'best': if load_prev_model: model = models.BestModel(args).cuda() checkpoint = torch.load(args.resume) model.load_state_dict(checkpoint) else: model = models.BestModel(args) model.cuda() # load model to gpu
def train(): word2id = get_word2id(train_data_path, bigram_words=bigram_words_path, min_bw_frequence=FLAGS.min_bg_freq) X_train, y_train = get_train_data(train_data_path, word2id) init_embedding = get_embedding(word2id, size=config.word_dim) # domain1 X_test1, y_test1 = get_train_data(literature_path, word2id) # domain2 X_test2, y_test2 = get_train_data(computer_path, word2id) # domain3 X_test3, y_test3 = get_train_data(medicine_path, word2id) # domain4 X_test4, y_test4 = get_train_data(finance_path, word2id) print 'train_data %s' % train_data_path print 'bigram %s' % bigram_words_path print 'model_path: %s' % FLAGS.model_path print 'min_bg_freq: %d' % FLAGS.min_bg_freq print 'len(train) %d ' % (len(X_train)) print 'len(test) %d %d %d %d' % (len(X_test1), len(X_test2), len(X_test3), len(X_test4)) print 'init_embedding shape [%d,%d]' % (init_embedding.shape[0], init_embedding.shape[1]) print 'Train started!' tfConfig = tf.ConfigProto() tfConfig.gpu_options.per_process_gpu_memory_fraction = FLAGS.memory with tf.Session(config=tfConfig) as sess: model = models.BaselineModel(vocab_size=len(word2id), word_dim=config.word_dim, hidden_dim=config.hidden_dim, pad_word=word2id[PAD], init_embedding=init_embedding, num_classes=config.num_classes, clip=config.clip, lr=config.lr, l2_reg_lamda=config.l2_reg_lamda, num_layers=config.num_layers, rnn_cell=config.rnn_cell, bi_direction=config.bi_direction) if not os.path.exists('checkpoints'): os.mkdir('checkpoints') checkpoints_model = os.path.join('checkpoints', FLAGS.model_path) saver = tf.train.Saver(tf.all_variables()) ckpt = tf.train.get_checkpoint_state(checkpoints_model) if ckpt and ckpt.model_checkpoint_path: print 'restore from original model!' saver.restore(sess, ckpt.model_checkpoint_path) else: sess.run(tf.global_variables_initializer()) best_f1, best_e1 = 0, 0 best_f2, best_e2 = 0, 0 best_f3, best_e3 = 0, 0 best_f4, best_e4 = 0, 0 for epoch in xrange(config.n_epoch): start_time = time.time() #train train_loss = [] for step, (X, Y) in enumerate( data_iterator(X_train, y_train, config.batch_size, padding_word=word2id[PAD], shuffle=True)): loss = model.train_step(sess, X, Y, config.dropout_keep_prob) print 'epoch:%d>>%2.2f%%' % ( epoch, config.batch_size * step * 100.0 / len(X_train) ), 'completed in %.2f (sec) <<\r' % (time.time() - start_time), sys.stdout.flush() train_loss.append(loss) train_loss = np.mean(train_loss, dtype=float) print 'Train Epoch %d loss %f' % (epoch, train_loss) saver.save(sess, checkpoints_model, epoch) def test(X_test, y_test, domain): test_pred = [] for i in range(0, len(X_test), config.batch_size): input_x = X_test[slice(i, i + config.batch_size)] input_x = padding3(input_x, word2id[PAD]) y = y_test[slice(i, i + config.batch_size)] y = padding(y, 3) predict = model.predict_step(sess, input_x) test_pred += predict P, R, F = evaluate_word_PRF(test_pred, y_test) print '%s Test: P:%f R:%f F:%f' % (domain, P, R, F) return F #domain1 f1 = test(X_test1, y_test1, 'A') if best_f1 < f1: best_f1 = f1 best_e1 = epoch # domain2 f2 = test(X_test2, y_test2, 'B') if best_f2 < f2: best_f2 = f2 best_e2 = epoch #domain3 f3 = test(X_test3, y_test3, 'C') if best_f3 < f3: best_f3 = f3 best_e3 = epoch #domain4 f4 = test(X_test4, y_test4, 'D') if best_f4 < f4: best_f4 = f4 best_e4 = epoch print "best A:%f %d best B: %f %d best C %f %d best D %f %d" % ( best_f1, best_e1, best_f2, best_e2, best_f3, best_e3, best_f4, best_e4) print "******************************************************"
def predict(): if FLAGS.model_path == None: raise 'Model path is None!' word2id = get_word2id(train_data_path, bigram_words=bigram_words_path, min_bw_frequence=FLAGS.min_bg_freq) id2word = build_reverse_dictionary(word2id) init_embedding = None # domain1 X_test1, y_test1 = get_train_data(literature_path, word2id) # domain2 X_test2, y_test2 = get_train_data(computer_path, word2id) # domain3 X_test3, y_test3 = get_train_data(medicine_path, word2id) # domain4 X_test4, y_test4 = get_train_data(finance_path, word2id) print 'len(test) %d %d %d %d' % (len(X_test1), len(X_test2), len(X_test3), len(X_test4)) print 'model %s' % FLAGS.model print 'bigram_words_path: %s' % bigram_words_path print 'model_path: %s' % FLAGS.model_path with tf.Session() as sess: model = models.BaselineModel(vocab_size=len(word2id), word_dim=config.word_dim, hidden_dim=config.hidden_dim, pad_word=word2id[PAD], init_embedding=init_embedding, num_classes=config.num_classes, clip=config.clip, lr=config.lr, l2_reg_lamda=config.l2_reg_lamda, num_layers=config.num_layers, rnn_cell=config.rnn_cell, bi_direction=config.bi_direction) checkpoints_model = os.path.join('checkpoints', FLAGS.model_path) saver = tf.train.Saver(tf.all_variables()) ckpt = tf.train.get_checkpoint_state(checkpoints_model) checkpoints_model = checkpoints_model + '-%d' % FLAGS.epoch print 'test_start!' saver.restore(sess, checkpoints_model) def test(X_test, y_test, domain): test_pred = [] for i in range(0, len(X_test), config.batch_size): input_x = X_test[slice(i, i + config.batch_size)] input_x = padding3(input_x, word2id[PAD]) y = y_test[slice(i, i + config.batch_size)] y = padding(y, 3) predict = model.predict_step(sess, input_x) test_pred += predict P, R, F = evaluate_word_PRF(test_pred, y_test) print '%s Test: P:%f R:%f F:%f' % (domain, P, R, F) return test_pred # domain1 test_pred1 = test(X_test1, y_test1, 'Literature') convert_wordsegmentation(X_test1, test_pred1, id2word, FLAGS.model, literature_path) convert_wordsegmentation(X_test1, y_test1, id2word, FLAGS.model, literature_path + '_golden') # domain2 test_pred2 = test(X_test2, y_test2, 'Computer ') convert_wordsegmentation(X_test2, test_pred2, id2word, FLAGS.model, computer_path) convert_wordsegmentation(X_test2, y_test2, id2word, FLAGS.model, computer_path + '_golden') # domain3 test_pred3 = test(X_test3, y_test3, 'Medicine ') convert_wordsegmentation(X_test3, test_pred3, id2word, FLAGS.model, medicine_path) convert_wordsegmentation(X_test3, y_test3, id2word, FLAGS.model, medicine_path + '_golden') # domain4 test_pred4 = test(X_test4, y_test4, 'Finance ') convert_wordsegmentation(X_test4, test_pred4, id2word, FLAGS.model, finance_path) convert_wordsegmentation(X_test4, y_test4, id2word, FLAGS.model, finance_path + '_golden')
def train(): word2id = get_word2id(train_data_path,\ bigram_words=bigram_words_path, min_bw_frequence=FLAGS.min_bg_freq) X_train, y_train = get_train_data(train_data_path, word2id) X_valid, y_valid = get_train_data(dev_data_path, word2id) x_test, y_test = get_train_data(test_data_path, word2id) init_embedding = get_embedding(word2id, size=config.word_dim) print 'train_data_path: %s' % train_data_path print 'valid_data_path: %s' % dev_data_path print 'test_data_path: %s' % test_data_path print 'bigram_words_path: %s' % bigram_words_path print 'model_path: %s' % FLAGS.model_path print 'min_bg_freq: %d' % FLAGS.min_bg_freq print 'len(train_data): %d' % len(X_train) print 'len(valid_data): %d' % len(X_valid) print 'len(test_data): %d' % len(x_test) print 'init_embedding shape: [%d,%d]' % (init_embedding.shape[0], init_embedding.shape[1]) print 'Train started!' tfConfig = tf.ConfigProto() # google protobuf2 tfConfig.gpu_options.per_process_gpu_memory_fraction = FLAGS.memory with tf.Session(config=tfConfig) as sess: model = models.BaselineModel(vocab_size=len(word2id), word_dim=config.word_dim, hidden_dim=config.hidden_dim, pad_word=word2id[PAD], init_embedding=init_embedding, num_classes=config.num_classes, clip=config.clip, lr=config.lr, l2_reg_lamda=config.l2_reg_lamda, num_layers=config.num_layers, rnn_cell=config.rnn_cell, bi_direction=config.bi_direction) if not os.path.exists('checkpoints'): os.mkdir('checkpoints') checkpoints_model = os.path.join('checkpoints', FLAGS.model_path) saver = tf.train.Saver( tf.all_variables()) # defined in python.training.saver.py ckpt = tf.train.get_checkpoint_state(checkpoints_model) if ckpt and ckpt.model_checkpoint_path: print 'restore from original model!' else: saver.restore(sess, ckpt.model_checkpoint_path) sess.run(tf.global_variables_initializer()) best_f1, best_e = 0, 0 for epoch in xrange(config.n_epoch): start_time = time.time() #train train_loss = [] for step, (X, Y) in enumerate( data_iterator(X_train, y_train, config.batch_size, padding_word=word2id[PAD], shuffle=True)): loss = model.train_step(sess, X, Y, config.dropout_keep_prob) print 'epoch:%d>>%2.2f%%' % ( epoch, config.batch_size * step * 100.0 / len(X_train) ), 'completed in %.2f (sec) <<\r' % (time.time() - start_time), sys.stdout.flush() train_loss.append(loss) train_loss = np.mean(train_loss, dtype=float) print 'Train Epoch %d loss %f' % (epoch, train_loss) #valid valid_loss = [] valid_pred = [] for i in range(0, len(X_valid), config.batch_size): input_x = X_valid[slice(i, i + config.batch_size)] input_x = padding3(input_x, word2id[PAD]) y = y_valid[slice(i, i + config.batch_size)] y = padding(y, 3) loss, predict = model.dev_step(sess, input_x, y) valid_loss.append(loss) valid_pred += predict valid_loss = np.mean(valid_loss, dtype=float) P, R, F = evaluate_word_PRF(valid_pred, y_valid) print 'Valid Epoch %d loss %f' % (epoch, valid_loss) print 'P:%f R:%f F:%f' % (P, R, F) print '--------------------------------' if F > best_f1: best_f1 = F best_e = 0 saver.save(sess, checkpoints_model) else: best_e += 1 test_pred = [] for i in range(0, len(x_test), config.batch_size): input_x = x_test[slice(i, i + config.batch_size)] input_x = padding3(input_x, word2id[PAD]) y = y_test[slice(i, i + config.batch_size)] y = padding(y, 3) predict = model.predict_step(sess, input_x) test_pred += predict P, R, F = evaluate_word_PRF(test_pred, y_test) print 'Test: P:%f R:%f F:%f Best_dev_F:%f' % (P, R, F, best_f1) if best_e > 4: print 'Early stopping' break print 'best_f1 on validset is %f' % best_f1