def main(unused_argv): vocab = data.Vocab(FLAGS.vocab_path, 1000000) # Check for presence of required special tokens. assert vocab.WordToId(data.PAD_TOKEN) > 0 assert vocab.WordToId(data.UNKNOWN_TOKEN) >= 0 assert vocab.WordToId(data.SENTENCE_START) > 0 assert vocab.WordToId(data.SENTENCE_END) > 0 batch_size = 1 if FLAGS.mode == 'decode': batch_size = FLAGS.beam_size hps = seq2seq_attention_model.HParams( mode=FLAGS.mode, # train, eval, decode min_lr=0.01, # min learning rate. lr=0.15, # learning rate batch_size=batch_size, #enc_layers=4, enc_layers=2, enc_timesteps=60, #enc_timesteps=120, #dec_timesteps=30, dec_timesteps=15, min_input_len=2, # discard articles/summaries < than this num_hidden=128, # for rnn cell #num_hidden=256, # for rnn cell emb_dim=128, # If 0, don't use embedding max_grad_norm=2, num_softmax_samples=10) # If 0, no sampled softmax. #num_softmax_samples=4096) # If 0, no sampled softmax. batcher = batch_reader.Batcher(FLAGS.data_path, vocab, hps, FLAGS.article_key, FLAGS.abstract_key, FLAGS.max_article_sentences, FLAGS.max_abstract_sentences, bucketing=FLAGS.use_bucketing, truncate_input=FLAGS.truncate_input) tf.set_random_seed(FLAGS.random_seed) if hps.mode == 'train': model = seq2seq_attention_model.Seq2SeqAttentionModel( hps, vocab, num_gpus=FLAGS.num_gpus) _Train(model, batcher) elif hps.mode == 'eval': model = seq2seq_attention_model.Seq2SeqAttentionModel( hps, vocab, num_gpus=FLAGS.num_gpus) _Eval(model, batcher, vocab=vocab) elif hps.mode == 'decode': decode_mdl_hps = hps # Only need to restore the 1st step and reuse it since # we keep and feed in state for each step's output. decode_mdl_hps = hps._replace(dec_timesteps=1) model = seq2seq_attention_model.Seq2SeqAttentionModel( decode_mdl_hps, vocab, num_gpus=FLAGS.num_gpus) decoder = seq2seq_attention_decode.BSDecoder(model, batcher, hps, vocab) decoder.DecodeLoop()
def __init__(self, model, hps, vocab, to_build_grapth): """Beam search decoding. Args: model: The seq2seq attentional model. batch_reader: The batch data reader. hps: Hyperparamters. vocab: Vocabulary """ self._model = model if to_build_grapth: self._model.build_graph() # 這是batch_reader.Batcher物件,只使用.NextBatch()函式 self._batch_reader = batch_reader.Batcher( FLAGS.data_path, vocab, hps, FLAGS.article_key, FLAGS.abstract_key, FLAGS.max_article_sentences, FLAGS.max_abstract_sentences, bucketing=FLAGS.use_bucketing, truncate_input=FLAGS.truncate_input) self._hps = hps self._vocab = vocab self._saver = tf.train.Saver() self._decode_io = DecodeIO(FLAGS.decode_dir)
def _Eval_Step(model): """边训练边评估 模型评估(截取法,最后小于batch_size的数据直接舍弃) 读取最近一次的模型参数进行评估 """ os.environ['CUDA_VISIBLE_DEVICES'] = '' with tf.Session() as sess: model.build_graph() saver = tf.train.Saver() # 加载最新的模型 while True: data_batcher = batch_reader.Batcher( parameter_config.EVALUATION_SET, model._vocab, 'index', 'target', 'sentence', model._hps, bucketing=False, truncate_input=True) ckpt = tf.train.get_checkpoint_state(parameter_config.CKPT_PATH) saver.restore(sess, ckpt.model_checkpoint_path) predict_list = [] target_list = [] while True: (index_batch, target_batch, enc_batch, enc_input_lens, batch_lens) = data_batcher.NextEvalBatch() if batch_lens != parameter_config.BATCH_SIZE: break loss, predict, global_step = model.run_eval_step( sess, target_batch, enc_batch, enc_input_lens, 1.0, 1.0) predict_list.extend( list(np.reshape(predict, parameter_config.BATCH_SIZE))) target_list.extend( list(np.reshape(target_batch, parameter_config.BATCH_SIZE))) acc = evaluation_function.calculate_acc(target_list, predict_list) auc = evaluation_function.calculate_auc(target_list, predict_list) ks = evaluation_function.calculate_ks(target_list, predict_list) print('step:{} acc:{} auc:{} ks :{}'.format( global_step, acc, auc, ks)) time.sleep(parameter_config.SLEEP_TIME)
def main(unused_argv): vocab = wash_data.Vocab(FLAGS.vocab_path, 1000000) hps = seq2seq_attention_model.HParams( mode=FLAGS.mode, # train, eval, decode min_lr=0.0001, # min learning rate. lr=0.001, # learning rate batch_size=FLAGS.batch_size, enc_layers=1, # the number of RNN layer in encoder when train enc_timesteps=1500,#encode输入维度 dec_timesteps=40,#decode输入维度 min_input_len=1, # discard articles/summaries < than this num_hidden=128, # for rnn cell LSTM的隐藏维度 emb_dim=256, # If 0, don't use embedding,vocab的嵌入维度 max_grad_norm=2, # Gradient intercept ratio num_softmax_samples=4096) # If 0, no sampled softmax. batcher = batch_reader.Batcher( FLAGS.articleData_path,FLAGS.summaryData_path,FLAGS.decodeData_path, vocab, hps,FLAGS.max_article_sentences, FLAGS.max_summary_sentences, bucketing=FLAGS.use_bucketing, truncate_input=FLAGS.truncate_input,epoch = FLAGS.epoch) tf.set_random_seed(FLAGS.random_seed) if hps.mode == 'train': model = seq2seq_attention_model.Seq2SeqAttentionModel( hps, vocab,num_gpus=FLAGS.num_gpus) _Train(model, batcher) elif hps.mode == 'eval': model = seq2seq_attention_model.Seq2SeqAttentionModel( hps, vocab,num_gpus=FLAGS.num_gpus) _Eval(model, batcher, vocab=vocab) elif hps.mode == 'decode': print("decode begin") decode_mdl_hps = hps # Only need to restore the 1st step and reuse it since # we keep and feed in state for each step's output. decode_mdl_hps = hps._replace(dec_timesteps=1) model = seq2seq_attention_model.Seq2SeqAttentionModel( decode_mdl_hps, vocab,num_gpus=FLAGS.num_gpus) decoder = seq2seq_attention_decode.BSDecoder(model, batcher, hps, vocab) decoder.DecodeLoop(choose = FLAGS.choose)
def main(mode_type): # 读取词表 vocab = data.Vocab( os.path.join(parameter_config.VOCAB_DIR, parameter_config.VOCAB_FILE_NAME), parameter_config.VOCAB_SIZE) batch_size = parameter_config.BATCH_SIZE if mode_type == 'decode': batch_size = 1 # 设置模型超参数 hps = seq2seq_model.HParams( mode=mode_type, # train, eval, decode batch_size=batch_size, enc_timesteps=parameter_config.ENC_TIMESTEPS, emb_dim=parameter_config.EMB_DIM, min_input_len=parameter_config.MIN_INPUT_LEN, num_hidden=parameter_config.NUM_HIDDEN, enc_layers=parameter_config.ENC_LAYERS, min_lr=parameter_config.MIN_LR, lr=parameter_config.LR, max_grad_norm=parameter_config.MAX_GRAD_NORM) tf.set_random_seed(111) if hps.mode == 'train': batcher = batch_reader.Batcher(parameter_config.TRAIN_DIR, vocab, 'index', 'target', 'sentence', hps, bucketing=False, truncate_input=True) model = seq2seq_model.Seq2SeqModel(hps, vocab, num_gpus=0) _Train(model, batcher, parameter_config.TRAIN_STEP) elif hps.mode == 'eval': batcher = batch_reader.Batcher(parameter_config.EVALUATION_SET, vocab, 'index', 'target', 'sentence', hps, bucketing=False, truncate_input=True) model = seq2seq_model.Seq2SeqModel(hps, vocab, num_gpus=0) _Eval(model, batcher) elif hps.mode == 'decode': batcher = batch_reader.Batcher(parameter_config.DECODE_DIR, vocab, 'index', 'target', 'sentence', hps, bucketing=False, truncate_input=True) model = seq2seq_model.Seq2SeqModel(hps, vocab, num_gpus=0) if not os.path.exists( os.path.join(os.getcwd(), parameter_config.DECODE_STORE_DIR)): os.mkdir( os.path.join(os.getcwd(), parameter_config.DECODE_STORE_DIR)) _Decode( model, batcher, os.path.join(parameter_config.DECODE_STORE_DIR, parameter_config.DECODE_STORE_FILE)) elif hps.mode == 'eval_step': model = seq2seq_model.Seq2SeqModel(hps, vocab, num_gpus=0) _Eval_Step(model) else: print('mode_type must be train eval decode or eval_step')
def main(unused_argv): vocab = data.Vocab(FLAGS.vocab_path, 1000000) # Check for presence of required special tokens. assert vocab.CheckVocab(data.PAD_TOKEN) > 0 assert vocab.CheckVocab(data.UNKNOWN_TOKEN) >= 0 assert vocab.CheckVocab(data.SENTENCE_START) > 0 assert vocab.CheckVocab(data.SENTENCE_END) > 0 batch_size = 4 if FLAGS.mode == 'decode': batch_size = FLAGS.beam_size hps = seq2seq_attention_model.HParams( mode=FLAGS.mode, # train, eval, decode min_lr=0.01, # min learning rate. lr=0.15, # learning rate batch_size=batch_size, enc_layers=1, enc_timesteps=120, dec_timesteps=30, min_input_len=2, # discard articles/summaries < than this num_hidden=128, # for rnn cell emb_dim=128, # If 0, don't use embedding max_grad_norm=2, num_softmax_samples=4096) # If 0, no sampled softmax. batcher = batch_reader.Batcher(FLAGS.data_path, vocab, hps, FLAGS.article_key, FLAGS.abstract_key, FLAGS.max_article_sentences, FLAGS.max_abstract_sentences, bucketing=FLAGS.use_bucketing, truncate_input=FLAGS.truncate_input) tf.set_random_seed(FLAGS.random_seed) if hps.mode == 'train': model = seq2seq_attention_model.Seq2SeqAttentionModel( hps, vocab, num_gpus=FLAGS.num_gpus) _Train(model, batcher) elif hps.mode == 'eval': model = seq2seq_attention_model.Seq2SeqAttentionModel( hps, vocab, num_gpus=FLAGS.num_gpus) _Eval(model, batcher, vocab=vocab) elif hps.mode == 'decode': decode_mdl_hps = hps # Only need to restore the 1st step and reuse it since # we keep and feed in state for each step's output. decode_mdl_hps = hps._replace(dec_timesteps=1) model = seq2seq_attention_model.Seq2SeqAttentionModel( decode_mdl_hps, vocab, num_gpus=FLAGS.num_gpus) to_build_grapth = True p = preprocessing(FLAGS.vocab_path) # 舊的decode迴圈 # while True: # kb_input = input('> ') # if kb_input == 'c': # description_str = input('輸入description > ') # context_str = input('輸入context> ') # input_data = p.get_data(description=description_str, context=context_str) # print('輸入資料:') # pprint(input_data) # elif kb_input == 'q': # break # else: # try: # text_to_binary('yahoo_knowledge_data/decode/ver_5/dataset_ready/data_ready_' + kb_input, # 'yahoo_knowledge_data/decode/decode_data') # except: # print('預設testing data出現錯誤') # decoder = seq2seq_attention_decode.BSDecoder(model, hps, vocab, to_build_grapth) # to_build_grapth = False # decoder.DecodeLoop() # 論文用的decode迴圈 file_num = 1 while True: if file_num % 60 == 0: print('已經印60筆') break try: text_to_binary( 'yahoo_knowledge_data/decode/ver_5/dataset_ready/data_ready_' + str(file_num), 'yahoo_knowledge_data/decode/decode_data') except: print('預設testing data出現錯誤') break decoder = seq2seq_attention_decode.BSDecoder( model, hps, vocab, to_build_grapth) to_build_grapth = False decoder.DecodeLoop() print('==================', file_num, '==================') file_num += 1
def main(unused_argv): vocab = data.Vocab(FLAGS.vocab_path, 1000000) # Check for presence of required special tokens. assert vocab.CheckVocab(data.PAD_TOKEN) > 0 assert vocab.CheckVocab(data.UNKNOWN_TOKEN) >= 0 assert vocab.CheckVocab(data.SENTENCE_START) > 0 assert vocab.CheckVocab(data.SENTENCE_END) > 0 batch_size = 64 if FLAGS.mode == 'decode': batch_size = FLAGS.beam_size hps = seq2seq_attention_model.HParams( mode=FLAGS.mode, # train, eval, decode min_lr=0.01, # min learning rate. lr=0.15, # learning rate batch_size=batch_size, enc_layers=4, enc_timesteps=120, dec_timesteps=30, min_input_len=2, # discard articles/summaries < than this num_hidden=256, # for rnn cell emb_dim=128, # If 0, don't use embedding max_grad_norm=2, num_softmax_samples=4096) # If 0, no sampled softmax. eval_hps = seq2seq_attention_model.HParams( mode='eval', # train, eval, decode min_lr=0.01, # min learning rate. lr=0.15, # learning rate batch_size=batch_size, enc_layers=4, enc_timesteps=120, dec_timesteps=30, min_input_len=2, # discard articles/summaries < than this num_hidden=256, # for rnn cell emb_dim=128, # If 0, don't use embedding max_grad_norm=2, num_softmax_samples=4096) # If 0, no sampled softmax. batcher = batch_reader.Batcher( FLAGS.data_path, vocab, hps, FLAGS.article_key, FLAGS.abstract_key, FLAGS.max_article_sentences, FLAGS.max_abstract_sentences, bucketing=FLAGS.use_bucketing, truncate_input=FLAGS.truncate_input) eval_batcher = batch_reader.Batcher( FLAGS.eval_data_path, vocab, eval_hps, FLAGS.article_key, FLAGS.abstract_key, FLAGS.max_article_sentences, FLAGS.max_abstract_sentences, bucketing=FLAGS.use_bucketing, truncate_input=FLAGS.truncate_input) tf.set_random_seed(FLAGS.random_seed) if hps.mode == 'train': model = seq2seq_attention_model.Seq2SeqAttentionModel( hps, vocab, num_gpus=FLAGS.num_gpus) eval_model = seq2seq_attention_model.Seq2SeqAttentionModel( eval_hps, vocab, num_gpus=FLAGS.num_gpus ) count = 0 while count * FLAGS.eval_every_iteration < FLAGS.max_run_steps: _Train(model, batcher) eval_avg_loss = 0 # read previous loss from eval_dir (if any) try: eval_results = tf.contrib.estimator.read_eval_metrics(FLAGS.eval_dir) i = 0 for step, metrics in eval_results.items(): eval_avg_loss += metrics['running_avg_loss'] i += 1 prev_avg_loss = eval_avg_loss / i except FileNotFoundError: print("Haven't run evaluation yet.") cur_loss = _Eval(eval_model, eval_batcher, 20, vocab=vocab) if eval_avg_loss is not 0 and prev_avg_loss < cur_loss: print("Early stopping!") break count += 1 elif hps.mode == 'eval': model = seq2seq_attention_model.Seq2SeqAttentionModel( hps, vocab, num_gpus=FLAGS.num_gpus) _Eval(model, eval_batcher, vocab=vocab) elif hps.mode == 'decode': decode_mdl_hps = hps # Only need to restore the 1st step and reuse it since # we keep and feed in state for each step's output. decode_mdl_hps = hps._replace(dec_timesteps=1) model = seq2seq_attention_model.Seq2SeqAttentionModel( decode_mdl_hps, vocab, num_gpus=FLAGS.num_gpus) decoder = seq2seq_attention_decode.BSDecoder(model, batcher, hps, vocab) decoder.DecodeLoop()
def main(unused_argv): tf.logging.set_verbosity(tf.logging.INFO) vocab = data.Vocab(FLAGS.vocab_path, 1000000) # Check for presence of required special tokens. assert vocab.CheckVocab(data.PAD_TOKEN) > 0 assert vocab.CheckVocab(data.UNKNOWN_TOKEN) >= 0 assert vocab.CheckVocab(data.START_DECODING) > 0 assert vocab.CheckVocab(data.STOP_DECODING) > 0 batch_size = 4 if FLAGS.mode == 'decode': batch_size = FLAGS.beam_size hps = seq2seq_attention_model.HParams( mode=FLAGS.mode, # train, eval, decode min_lr=0.01, # min learning rate. lr=0.15, # learning rate batch_size=batch_size, enc_layers=1, enc_timesteps=800, dec_timesteps=200, min_input_len=2, # discard articles/summaries < than this num_hidden=256, # for rnn cell emb_dim=128, # If 0, don't use embedding max_grad_norm=2, num_softmax_samples=4096, # If 0, no sampled softmax. trunc_norm_init_std=0.05) batcher = batch_reader.Batcher(FLAGS.data_path, vocab, hps, FLAGS.article_id_key, FLAGS.article_key, FLAGS.abstract_key, FLAGS.labels_key, FLAGS.section_names_key, FLAGS.sections_key, FLAGS.max_article_sentences, FLAGS.max_abstract_sentences, bucketing=FLAGS.use_bucketing, truncate_input=FLAGS.truncate_input) tf.set_random_seed(FLAGS.random_seed) if hps.mode == 'train': model = seq2seq_attention_model.Seq2SeqAttentionModel( hps, vocab, num_gpus=FLAGS.num_gpus) _Train(model, batcher) elif hps.mode == 'eval': model = seq2seq_attention_model.Seq2SeqAttentionModel( hps, vocab, num_gpus=FLAGS.num_gpus) _Eval(model, batcher, vocab=vocab) elif hps.mode == 'decode': decode_mdl_hps = hps # Only need to restore the 1st step and reuse it since # we keep and feed in state for each step's output. decode_mdl_hps = hps._replace(dec_timesteps=1) model = seq2seq_attention_model.Seq2SeqAttentionModel( decode_mdl_hps, vocab, num_gpus=FLAGS.num_gpus) decoder = seq2seq_attention_decode.BeamSearchDecoder( model, batcher, hps, vocab) decoder.decode_loop()