Ejemplo n.º 1
0
def main(unused_argv):
    vocab = data.Vocab(FLAGS.vocab_path, 1000000)
    # Check for presence of required special tokens.
    assert vocab.WordToId(data.PAD_TOKEN) > 0
    assert vocab.WordToId(data.UNKNOWN_TOKEN) >= 0
    assert vocab.WordToId(data.SENTENCE_START) > 0
    assert vocab.WordToId(data.SENTENCE_END) > 0

    batch_size = 1
    if FLAGS.mode == 'decode':
        batch_size = FLAGS.beam_size

    hps = seq2seq_attention_model.HParams(
        mode=FLAGS.mode,  # train, eval, decode
        min_lr=0.01,  # min learning rate.
        lr=0.15,  # learning rate
        batch_size=batch_size,
        #enc_layers=4,
        enc_layers=2,
        enc_timesteps=60,
        #enc_timesteps=120,
        #dec_timesteps=30,
        dec_timesteps=15,
        min_input_len=2,  # discard articles/summaries < than this
        num_hidden=128,  # for rnn cell
        #num_hidden=256,  # for rnn cell
        emb_dim=128,  # If 0, don't use embedding
        max_grad_norm=2,
        num_softmax_samples=10)  # If 0, no sampled softmax.
    #num_softmax_samples=4096)  # If 0, no sampled softmax.

    batcher = batch_reader.Batcher(FLAGS.data_path,
                                   vocab,
                                   hps,
                                   FLAGS.article_key,
                                   FLAGS.abstract_key,
                                   FLAGS.max_article_sentences,
                                   FLAGS.max_abstract_sentences,
                                   bucketing=FLAGS.use_bucketing,
                                   truncate_input=FLAGS.truncate_input)
    tf.set_random_seed(FLAGS.random_seed)

    if hps.mode == 'train':
        model = seq2seq_attention_model.Seq2SeqAttentionModel(
            hps, vocab, num_gpus=FLAGS.num_gpus)
        _Train(model, batcher)
    elif hps.mode == 'eval':
        model = seq2seq_attention_model.Seq2SeqAttentionModel(
            hps, vocab, num_gpus=FLAGS.num_gpus)
        _Eval(model, batcher, vocab=vocab)
    elif hps.mode == 'decode':
        decode_mdl_hps = hps
        # Only need to restore the 1st step and reuse it since
        # we keep and feed in state for each step's output.
        decode_mdl_hps = hps._replace(dec_timesteps=1)
        model = seq2seq_attention_model.Seq2SeqAttentionModel(
            decode_mdl_hps, vocab, num_gpus=FLAGS.num_gpus)
        decoder = seq2seq_attention_decode.BSDecoder(model, batcher, hps,
                                                     vocab)
        decoder.DecodeLoop()
Ejemplo n.º 2
0
    def __init__(self, model, hps, vocab, to_build_grapth):
        """Beam search decoding.

        Args:
            model: The seq2seq attentional model.
            batch_reader: The batch data reader.
            hps: Hyperparamters.
            vocab: Vocabulary
        """
        self._model = model
        if to_build_grapth:
            self._model.build_graph()
        # 這是batch_reader.Batcher物件,只使用.NextBatch()函式
        self._batch_reader = batch_reader.Batcher(
            FLAGS.data_path,
            vocab,
            hps,
            FLAGS.article_key,
            FLAGS.abstract_key,
            FLAGS.max_article_sentences,
            FLAGS.max_abstract_sentences,
            bucketing=FLAGS.use_bucketing,
            truncate_input=FLAGS.truncate_input)
        self._hps = hps
        self._vocab = vocab
        self._saver = tf.train.Saver()
        self._decode_io = DecodeIO(FLAGS.decode_dir)
Ejemplo n.º 3
0
def _Eval_Step(model):
    """边训练边评估
       模型评估(截取法,最后小于batch_size的数据直接舍弃)
       读取最近一次的模型参数进行评估
    """
    os.environ['CUDA_VISIBLE_DEVICES'] = ''
    with tf.Session() as sess:
        model.build_graph()
        saver = tf.train.Saver()
        # 加载最新的模型
        while True:
            data_batcher = batch_reader.Batcher(
                parameter_config.EVALUATION_SET,
                model._vocab,
                'index',
                'target',
                'sentence',
                model._hps,
                bucketing=False,
                truncate_input=True)
            ckpt = tf.train.get_checkpoint_state(parameter_config.CKPT_PATH)
            saver.restore(sess, ckpt.model_checkpoint_path)

            predict_list = []
            target_list = []

            while True:
                (index_batch, target_batch, enc_batch, enc_input_lens,
                 batch_lens) = data_batcher.NextEvalBatch()
                if batch_lens != parameter_config.BATCH_SIZE:
                    break
                loss, predict, global_step = model.run_eval_step(
                    sess, target_batch, enc_batch, enc_input_lens, 1.0, 1.0)
                predict_list.extend(
                    list(np.reshape(predict, parameter_config.BATCH_SIZE)))
                target_list.extend(
                    list(np.reshape(target_batch,
                                    parameter_config.BATCH_SIZE)))

            acc = evaluation_function.calculate_acc(target_list, predict_list)
            auc = evaluation_function.calculate_auc(target_list, predict_list)
            ks = evaluation_function.calculate_ks(target_list, predict_list)
            print('step:{} acc:{} auc:{} ks :{}'.format(
                global_step, acc, auc, ks))
            time.sleep(parameter_config.SLEEP_TIME)
Ejemplo n.º 4
0
def main(unused_argv):
    vocab = wash_data.Vocab(FLAGS.vocab_path, 1000000)

    hps = seq2seq_attention_model.HParams(
        mode=FLAGS.mode,  # train, eval, decode
        min_lr=0.0001,  # min learning rate.
        lr=0.001,  # learning rate
        batch_size=FLAGS.batch_size,
        enc_layers=1, # the number of RNN layer in encoder when train
        enc_timesteps=1500,#encode输入维度
        dec_timesteps=40,#decode输入维度
        min_input_len=1,  # discard articles/summaries < than this
        num_hidden=128,  # for rnn cell LSTM的隐藏维度
        emb_dim=256,  # If 0, don't use embedding,vocab的嵌入维度
        max_grad_norm=2, # Gradient intercept ratio
        num_softmax_samples=4096)  # If 0, no sampled softmax.

    batcher = batch_reader.Batcher(
        FLAGS.articleData_path,FLAGS.summaryData_path,FLAGS.decodeData_path, vocab, hps,FLAGS.max_article_sentences,
        FLAGS.max_summary_sentences, bucketing=FLAGS.use_bucketing,
        truncate_input=FLAGS.truncate_input,epoch = FLAGS.epoch)

    tf.set_random_seed(FLAGS.random_seed)

    if hps.mode == 'train':
        model = seq2seq_attention_model.Seq2SeqAttentionModel(
            hps, vocab,num_gpus=FLAGS.num_gpus)
        _Train(model, batcher)
    
    elif hps.mode == 'eval':
        model = seq2seq_attention_model.Seq2SeqAttentionModel(
            hps, vocab,num_gpus=FLAGS.num_gpus)
        _Eval(model, batcher, vocab=vocab)   
 
    elif hps.mode == 'decode':
        print("decode begin")
        decode_mdl_hps = hps
        # Only need to restore the 1st step and reuse it since
        # we keep and feed in state for each step's output.
        decode_mdl_hps = hps._replace(dec_timesteps=1)
        model = seq2seq_attention_model.Seq2SeqAttentionModel(
            decode_mdl_hps, vocab,num_gpus=FLAGS.num_gpus)
        decoder = seq2seq_attention_decode.BSDecoder(model, batcher, hps, vocab)
        decoder.DecodeLoop(choose = FLAGS.choose) 
Ejemplo n.º 5
0
def main(mode_type):
    # 读取词表
    vocab = data.Vocab(
        os.path.join(parameter_config.VOCAB_DIR,
                     parameter_config.VOCAB_FILE_NAME),
        parameter_config.VOCAB_SIZE)
    batch_size = parameter_config.BATCH_SIZE
    if mode_type == 'decode':
        batch_size = 1

    # 设置模型超参数
    hps = seq2seq_model.HParams(
        mode=mode_type,  # train, eval, decode
        batch_size=batch_size,
        enc_timesteps=parameter_config.ENC_TIMESTEPS,
        emb_dim=parameter_config.EMB_DIM,
        min_input_len=parameter_config.MIN_INPUT_LEN,
        num_hidden=parameter_config.NUM_HIDDEN,
        enc_layers=parameter_config.ENC_LAYERS,
        min_lr=parameter_config.MIN_LR,
        lr=parameter_config.LR,
        max_grad_norm=parameter_config.MAX_GRAD_NORM)

    tf.set_random_seed(111)

    if hps.mode == 'train':
        batcher = batch_reader.Batcher(parameter_config.TRAIN_DIR,
                                       vocab,
                                       'index',
                                       'target',
                                       'sentence',
                                       hps,
                                       bucketing=False,
                                       truncate_input=True)
        model = seq2seq_model.Seq2SeqModel(hps, vocab, num_gpus=0)
        _Train(model, batcher, parameter_config.TRAIN_STEP)
    elif hps.mode == 'eval':
        batcher = batch_reader.Batcher(parameter_config.EVALUATION_SET,
                                       vocab,
                                       'index',
                                       'target',
                                       'sentence',
                                       hps,
                                       bucketing=False,
                                       truncate_input=True)
        model = seq2seq_model.Seq2SeqModel(hps, vocab, num_gpus=0)
        _Eval(model, batcher)
    elif hps.mode == 'decode':
        batcher = batch_reader.Batcher(parameter_config.DECODE_DIR,
                                       vocab,
                                       'index',
                                       'target',
                                       'sentence',
                                       hps,
                                       bucketing=False,
                                       truncate_input=True)
        model = seq2seq_model.Seq2SeqModel(hps, vocab, num_gpus=0)
        if not os.path.exists(
                os.path.join(os.getcwd(), parameter_config.DECODE_STORE_DIR)):
            os.mkdir(
                os.path.join(os.getcwd(), parameter_config.DECODE_STORE_DIR))
        _Decode(
            model, batcher,
            os.path.join(parameter_config.DECODE_STORE_DIR,
                         parameter_config.DECODE_STORE_FILE))
    elif hps.mode == 'eval_step':
        model = seq2seq_model.Seq2SeqModel(hps, vocab, num_gpus=0)
        _Eval_Step(model)
    else:
        print('mode_type must be train eval decode or eval_step')
Ejemplo n.º 6
0
def main(unused_argv):
    vocab = data.Vocab(FLAGS.vocab_path, 1000000)
    # Check for presence of required special tokens.
    assert vocab.CheckVocab(data.PAD_TOKEN) > 0
    assert vocab.CheckVocab(data.UNKNOWN_TOKEN) >= 0
    assert vocab.CheckVocab(data.SENTENCE_START) > 0
    assert vocab.CheckVocab(data.SENTENCE_END) > 0

    batch_size = 4
    if FLAGS.mode == 'decode':
        batch_size = FLAGS.beam_size

    hps = seq2seq_attention_model.HParams(
        mode=FLAGS.mode,  # train, eval, decode
        min_lr=0.01,  # min learning rate.
        lr=0.15,  # learning rate
        batch_size=batch_size,
        enc_layers=1,
        enc_timesteps=120,
        dec_timesteps=30,
        min_input_len=2,  # discard articles/summaries < than this
        num_hidden=128,  # for rnn cell
        emb_dim=128,  # If 0, don't use embedding
        max_grad_norm=2,
        num_softmax_samples=4096)  # If 0, no sampled softmax.

    batcher = batch_reader.Batcher(FLAGS.data_path,
                                   vocab,
                                   hps,
                                   FLAGS.article_key,
                                   FLAGS.abstract_key,
                                   FLAGS.max_article_sentences,
                                   FLAGS.max_abstract_sentences,
                                   bucketing=FLAGS.use_bucketing,
                                   truncate_input=FLAGS.truncate_input)
    tf.set_random_seed(FLAGS.random_seed)

    if hps.mode == 'train':
        model = seq2seq_attention_model.Seq2SeqAttentionModel(
            hps, vocab, num_gpus=FLAGS.num_gpus)
        _Train(model, batcher)
    elif hps.mode == 'eval':
        model = seq2seq_attention_model.Seq2SeqAttentionModel(
            hps, vocab, num_gpus=FLAGS.num_gpus)
        _Eval(model, batcher, vocab=vocab)
    elif hps.mode == 'decode':
        decode_mdl_hps = hps
        # Only need to restore the 1st step and reuse it since
        # we keep and feed in state for each step's output.
        decode_mdl_hps = hps._replace(dec_timesteps=1)
        model = seq2seq_attention_model.Seq2SeqAttentionModel(
            decode_mdl_hps, vocab, num_gpus=FLAGS.num_gpus)

        to_build_grapth = True
        p = preprocessing(FLAGS.vocab_path)

        # 舊的decode迴圈
        # while True:
        #     kb_input = input('> ')
        #     if kb_input == 'c':
        #         description_str = input('輸入description > ')
        #         context_str = input('輸入context> ')
        #         input_data = p.get_data(description=description_str, context=context_str)
        #         print('輸入資料:')
        #         pprint(input_data)
        #     elif kb_input == 'q':
        #         break
        #     else:
        #         try:
        #             text_to_binary('yahoo_knowledge_data/decode/ver_5/dataset_ready/data_ready_' + kb_input,
        #                     'yahoo_knowledge_data/decode/decode_data')
        #         except:
        #             print('預設testing data出現錯誤')
        #     decoder = seq2seq_attention_decode.BSDecoder(model, hps, vocab, to_build_grapth)
        #     to_build_grapth = False
        #     decoder.DecodeLoop()

        # 論文用的decode迴圈
        file_num = 1
        while True:
            if file_num % 60 == 0:
                print('已經印60筆')
                break
            try:
                text_to_binary(
                    'yahoo_knowledge_data/decode/ver_5/dataset_ready/data_ready_'
                    + str(file_num), 'yahoo_knowledge_data/decode/decode_data')
            except:
                print('預設testing data出現錯誤')
                break
            decoder = seq2seq_attention_decode.BSDecoder(
                model, hps, vocab, to_build_grapth)
            to_build_grapth = False
            decoder.DecodeLoop()
            print('==================', file_num, '==================')
            file_num += 1
Ejemplo n.º 7
0
def main(unused_argv):
    vocab = data.Vocab(FLAGS.vocab_path, 1000000)
    # Check for presence of required special tokens.
    assert vocab.CheckVocab(data.PAD_TOKEN) > 0
    assert vocab.CheckVocab(data.UNKNOWN_TOKEN) >= 0
    assert vocab.CheckVocab(data.SENTENCE_START) > 0
    assert vocab.CheckVocab(data.SENTENCE_END) > 0

    batch_size = 64
    if FLAGS.mode == 'decode':
        batch_size = FLAGS.beam_size

    hps = seq2seq_attention_model.HParams(
        mode=FLAGS.mode,  # train, eval, decode
        min_lr=0.01,  # min learning rate.
        lr=0.15,  # learning rate
        batch_size=batch_size,
        enc_layers=4,
        enc_timesteps=120,
        dec_timesteps=30,
        min_input_len=2,  # discard articles/summaries < than this
        num_hidden=256,  # for rnn cell
        emb_dim=128,  # If 0, don't use embedding
        max_grad_norm=2,
        num_softmax_samples=4096)  # If 0, no sampled softmax.

    eval_hps = seq2seq_attention_model.HParams(
        mode='eval',  # train, eval, decode
        min_lr=0.01,  # min learning rate.
        lr=0.15,  # learning rate
        batch_size=batch_size,
        enc_layers=4,
        enc_timesteps=120,
        dec_timesteps=30,
        min_input_len=2,  # discard articles/summaries < than this
        num_hidden=256,  # for rnn cell
        emb_dim=128,  # If 0, don't use embedding
        max_grad_norm=2,
        num_softmax_samples=4096)  # If 0, no sampled softmax.

    batcher = batch_reader.Batcher(
        FLAGS.data_path, vocab, hps, FLAGS.article_key,
        FLAGS.abstract_key, FLAGS.max_article_sentences,
        FLAGS.max_abstract_sentences, bucketing=FLAGS.use_bucketing,
        truncate_input=FLAGS.truncate_input)
    eval_batcher = batch_reader.Batcher(
        FLAGS.eval_data_path, vocab, eval_hps, FLAGS.article_key,
        FLAGS.abstract_key, FLAGS.max_article_sentences,
        FLAGS.max_abstract_sentences, bucketing=FLAGS.use_bucketing,
        truncate_input=FLAGS.truncate_input)
    tf.set_random_seed(FLAGS.random_seed)

    if hps.mode == 'train':
        model = seq2seq_attention_model.Seq2SeqAttentionModel(
            hps, vocab, num_gpus=FLAGS.num_gpus)
        eval_model = seq2seq_attention_model.Seq2SeqAttentionModel(
            eval_hps, vocab, num_gpus=FLAGS.num_gpus
        )
        count = 0
        while count * FLAGS.eval_every_iteration < FLAGS.max_run_steps:
            _Train(model, batcher)
            eval_avg_loss = 0
            # read previous loss from eval_dir (if any)
            try:
                eval_results = tf.contrib.estimator.read_eval_metrics(FLAGS.eval_dir)
                i = 0
                for step, metrics in eval_results.items():
                    eval_avg_loss += metrics['running_avg_loss']
                    i += 1
                prev_avg_loss = eval_avg_loss / i
            except FileNotFoundError:
                print("Haven't run evaluation yet.")
            cur_loss = _Eval(eval_model, eval_batcher, 20, vocab=vocab)
            if eval_avg_loss is not 0 and prev_avg_loss < cur_loss:
                print("Early stopping!")
                break
            count += 1

    elif hps.mode == 'eval':
        model = seq2seq_attention_model.Seq2SeqAttentionModel(
            hps, vocab, num_gpus=FLAGS.num_gpus)
        _Eval(model, eval_batcher, vocab=vocab)
    elif hps.mode == 'decode':
        decode_mdl_hps = hps
        # Only need to restore the 1st step and reuse it since
        # we keep and feed in state for each step's output.
        decode_mdl_hps = hps._replace(dec_timesteps=1)
        model = seq2seq_attention_model.Seq2SeqAttentionModel(
            decode_mdl_hps, vocab, num_gpus=FLAGS.num_gpus)
        decoder = seq2seq_attention_decode.BSDecoder(model, batcher, hps, vocab)
        decoder.DecodeLoop()
Ejemplo n.º 8
0
def main(unused_argv):
    tf.logging.set_verbosity(tf.logging.INFO)
    vocab = data.Vocab(FLAGS.vocab_path, 1000000)
    # Check for presence of required special tokens.
    assert vocab.CheckVocab(data.PAD_TOKEN) > 0
    assert vocab.CheckVocab(data.UNKNOWN_TOKEN) >= 0
    assert vocab.CheckVocab(data.START_DECODING) > 0
    assert vocab.CheckVocab(data.STOP_DECODING) > 0

    batch_size = 4
    if FLAGS.mode == 'decode':
        batch_size = FLAGS.beam_size

    hps = seq2seq_attention_model.HParams(
        mode=FLAGS.mode,  # train, eval, decode
        min_lr=0.01,  # min learning rate.
        lr=0.15,  # learning rate
        batch_size=batch_size,
        enc_layers=1,
        enc_timesteps=800,
        dec_timesteps=200,
        min_input_len=2,  # discard articles/summaries < than this
        num_hidden=256,  # for rnn cell
        emb_dim=128,  # If 0, don't use embedding
        max_grad_norm=2,
        num_softmax_samples=4096,  # If 0, no sampled softmax.
        trunc_norm_init_std=0.05)

    batcher = batch_reader.Batcher(FLAGS.data_path,
                                   vocab,
                                   hps,
                                   FLAGS.article_id_key,
                                   FLAGS.article_key,
                                   FLAGS.abstract_key,
                                   FLAGS.labels_key,
                                   FLAGS.section_names_key,
                                   FLAGS.sections_key,
                                   FLAGS.max_article_sentences,
                                   FLAGS.max_abstract_sentences,
                                   bucketing=FLAGS.use_bucketing,
                                   truncate_input=FLAGS.truncate_input)
    tf.set_random_seed(FLAGS.random_seed)

    if hps.mode == 'train':
        model = seq2seq_attention_model.Seq2SeqAttentionModel(
            hps, vocab, num_gpus=FLAGS.num_gpus)
        _Train(model, batcher)
    elif hps.mode == 'eval':
        model = seq2seq_attention_model.Seq2SeqAttentionModel(
            hps, vocab, num_gpus=FLAGS.num_gpus)
        _Eval(model, batcher, vocab=vocab)
    elif hps.mode == 'decode':
        decode_mdl_hps = hps
        # Only need to restore the 1st step and reuse it since
        # we keep and feed in state for each step's output.
        decode_mdl_hps = hps._replace(dec_timesteps=1)
        model = seq2seq_attention_model.Seq2SeqAttentionModel(
            decode_mdl_hps, vocab, num_gpus=FLAGS.num_gpus)
        decoder = seq2seq_attention_decode.BeamSearchDecoder(
            model, batcher, hps, vocab)
        decoder.decode_loop()