Exemple #1
0
def save_predict_result(results, params):

    # 读取结果
    test_df = pd.read_csv(test_data_path)
    # 填充结果
    test_df['Prediction'] = results[:20000]
    #  提取ID和预测结果两列
    test_df = test_df[['QID', 'Prediction']]
    # 保存结果.
    result_save_path = get_result_filename(params)
    test_df.to_csv(result_save_path, index=None, sep=',')
Exemple #2
0
def get_params():
    # 获得参数
    parser = argparse.ArgumentParser()
    parser.add_argument("--mode", default='train', help="run mode", type=str)
    parser.add_argument("--max_enc_len", default=400, help="Encoder input max sequence length", type=int)
    parser.add_argument("--max_dec_len", default=100, help="Decoder input max sequence length", type=int)
    parser.add_argument("--batch_size", default=batch_size, help="batch size", type=int)
    parser.add_argument("--epochs", default=epochs, help="train epochs", type=int)
    parser.add_argument("--vocab_path", default=vocab_path, help="vocab path", type=str)
    parser.add_argument("--learning_rate", default=0.15, help="Learning rate", type=float)
    parser.add_argument("--adagrad_init_acc", default=0.1,
                        help="Adagrad optimizer initial accumulator value. "
                             "Please refer to the Adagrad optimizer API documentation "
                             "on tensorflow site for more details.",
                        type=float)
    parser.add_argument('--rand_unif_init_mag', default=0.02,
                        help='magnitude for lstm cells random uniform inititalization', type=float)
    parser.add_argument('--trunc_norm_init_std', default=1e-4, help='std of trunc norm init, '
                                                                    'used for initializing everything else',
                        type=float)

    parser.add_argument('--cov_loss_wt', default=1.0, help='Weight of coverage loss (lambda in the paper).'
                                                           ' If zero, then no incentive to minimize coverage loss.',
                        type=float)

    parser.add_argument('--max_grad_norm', default=2.0, help='for gradient clipping', type=float)

    parser.add_argument("--vocab_size", default=vocab_size, help="max vocab size , None-> Max ", type=int)

    parser.add_argument("--beam_size", default=batch_size,
                        help="beam size for beam search decoding (must be equal to batch size in decode mode)",
                        type=int)
    parser.add_argument("--embed_size", default=300, help="Words embeddings dimension", type=int)
    parser.add_argument("--enc_units", default=256, help="Encoder GRU cell units number", type=int)
    parser.add_argument("--dec_units", default=256, help="Decoder GRU cell units number", type=int)
    parser.add_argument("--attn_units", default=256, help="[context vector, decoder state, decoder input] feedforward \
                                result dimension - this result is used to compute the attention weights",
                        type=int)
    parser.add_argument("--train_seg_x_dir", default=train_x_seg_path, help="train_seg_x_dir", type=str)
    parser.add_argument("--train_seg_y_dir", default=train_y_seg_path, help="train_seg_y_dir", type=str)
    parser.add_argument("--test_seg_x_dir", default=test_x_seg_path, help="train_seg_x_dir", type=str)

    parser.add_argument("--checkpoint_dir", default=checkpoint_dir,
                        help="checkpoint_dir",
                        type=str)

    parser.add_argument("--checkpoints_save_steps", default=5, help="Save checkpoints every N steps", type=int)
    parser.add_argument("--min_dec_steps", default=4, help="min_dec_steps", type=int)

    parser.add_argument("--max_train_steps", default=sample_total // batch_size, help="max_train_steps", type=int)
    parser.add_argument("--save_batch_train_data", default=False, help="save batch train data to pickle", type=bool)
    parser.add_argument("--load_batch_train_data", default=False, help="load batch train data from pickle",
                        type=bool)
    parser.add_argument("--test_save_dir", default=save_result_dir, help="test_save_dir", type=str)
    parser.add_argument("--pointer_gen", default=False, help="pointer_gen", type=bool)
    parser.add_argument("--use_coverage", default=False, help="use_coverage", type=bool)

    parser.add_argument("--greedy_decode", default=False, help="greedy_decode", type=bool)
    parser.add_argument("--result_save_path", default=get_result_filename(batch_size, epochs, 200, 300),
                        help='result_save_path', type=str)
    args = parser.parse_args()
    params = vars(args)
    return params
Exemple #3
0
def train_models(checkpoint_dir,
                 test_sentence,
                 vocab_path,
                 reverse_vocab_path,
                 test=False):

    # 生成训练集和测试集
    train_df_X, train_df_Y, test_df_X, wv_model, X_max_len, train_y_max_len = build_dataset(
        train_data_path, test_data_path, save_wv_model_path, testOnly=test)

    # 词表大小
    vocab_size = len(vocab)
    params = {}
    params['vocab_size'] = vocab_size
    params['input_length'] = train_data_X.shape[1]

    vocab_inp_size = vocab_size
    vocab_tar_size = vocab_size
    input_length = train_data_X.shape[1]
    output_length = train_data_Y.shape[1]

    BUFFER_SIZE = len(train_data_X)
    steps_per_epoch = len(train_data_X) // BATCH_SIZE
    start_index = train_ids_y[0][0]

    # Dataset generator
    dataset = tf.data.Dataset.from_tensor_slices(
        (train_data_X, train_data_Y)).shuffle(BUFFER_SIZE)
    dataset = dataset.batch(BATCH_SIZE, drop_remainder=True)

    # create sample input and target
    # example_input_batch, example_target_batch = next(iter(dataset))

    # # create encoder model
    encoder = Encoder(vocab_inp_size, embedding_dim, embedding_matrix,
                      input_length, units, BATCH_SIZE)

    # create decoder model
    decoder = Decoder(vocab_tar_size, embedding_dim, units, BATCH_SIZE)

    # model = Seq2Seq(params)

    # Define the optimizer and the loss function
    # optimizer = tf.keras.optimizers.Adam(1e-3)
    optimizer = tf.keras.optimizers.Adagrad(1e-3)

    # Checkpoints (Object-based saving)
    checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt")
    checkpoint = tf.train.Checkpoint(optimizer=optimizer,
                                     encoder=encoder,
                                     decoder=decoder)

    if test:
        # test only and plot results
        #
        # * The evaluate function is similar to the training loop, except we don't use *teacher forcing* here. The input to the decoder at each time step is its previous predictions along with the hidden state and the encoder output.
        # * Stop predicting when the model predicts the *end token*.
        # * And store the *attention weights for every time step*.
        #
        # Note: The encoder output is calculated only once for one input.

        # restoring the latest checkpoint in checkpoint_dir
        checkpoint.restore(tf.train.latest_checkpoint(checkpoint_dir))

        # result, sentence, attention_plot = evaluate(encoder, decoder, test_sentence, vocab, reverse_vocab, units, input_length, train_y_max_len, start_index)
        results = model_predict(encoder, decoder, test_data_X, BATCH_SIZE,
                                vocab, reverse_vocab, train_y_max_len,
                                start_index)

        # print(results[1005])

        # 读入提交数据
        test_df = pd.read_csv(test_data_path)
        test_df.head()

        for idx, result in enumerate(results):
            if result == '': print(idx)

        # 赋值结果
        test_df['Prediction'] = results
        # 提取ID和预测结果两列
        test_df = test_df[['QID', 'Prediction']]

        test_df.head()

        # 判断是否有空值
        # for predic in test_df['Prediction']:
        #     if type(predic) != str:
        #         print(predic)

        test_df['Prediction'] = test_df['Prediction'].apply(submit_proc)

        test_df.head()

        # 获取结果存储路径
        result_save_path = get_result_filename(
            BATCH_SIZE,
            EPOCHS,
            X_max_len,
            embedding_dim,
            commit='_4_1_submit_seq2seq_code')

        # 保存结果.
        test_df.to_csv(result_save_path, index=None, sep=',')

        # 读取结果
        test_df = pd.read_csv(result_save_path)
        # 查看格式
        test_df.head(10)

        # print('Input: %s' % (sentence))
        # print('Predicted report: {}'.format(result))

        # attention_plot = attention_plot[:len(result.split(' ')), :len(sentence.split(' '))]
        # plot_attention(attention_plot, sentence.split(' '), result.split(' '))

    else:
        # Training
        #
        # 1. Pass the *input* through the *encoder* which return *encoder output* and the *encoder hidden state*.
        # 2. The encoder output, encoder hidden state and the decoder input (which is the *start token*) is passed to the decoder.
        # 3. The decoder returns the *predictions* and the *decoder hidden state*.
        # 4. The decoder hidden state is then passed back into the model and the predictions are used to calculate the loss.
        # 5. Use *teacher forcing* to decide the next input to the decoder.
        # 6. *Teacher forcing* is the technique where the *target word* is passed as the *next input* to the decoder.
        # 7. The final step is to calculate the gradients and apply it to the optimizer and backpropagate.
        for epoch in range(EPOCHS):
            start = time.time()
            total_loss = 0

            for (batch, (inp,
                         targ)) in enumerate(dataset.take(steps_per_epoch)):
                batch_loss = train_step(encoder, decoder, inp, targ, optimizer,
                                        start_index)
                total_loss += batch_loss

                if batch % 100 == 0:
                    print('Epoch {} Batch {} Loss {:.4f}'.format(
                        epoch + 1, batch, batch_loss.numpy()))
            # saving (checkpoint) the model every epoch
            checkpoint.save(file_prefix=checkpoint_prefix)

            print('Epoch {} Loss {:.4f}'.format(epoch + 1,
                                                total_loss / steps_per_epoch))
            print('Time taken for 1 epoch {} sec\n'.format(time.time() -
                                                           start))
def main():
    # 获得参数
    parser = argparse.ArgumentParser()

    parser.add_argument("--mode", default='train', help="run mode", type=str)
    parser.add_argument("--max_enc_len",
                        default=400,
                        help="Encoder input max sequence length",
                        type=int)
    parser.add_argument("--max_dec_len",
                        default=100,
                        help="Decoder input max sequence length",
                        type=int)
    parser.add_argument("--batch_size",
                        default=batch_size,
                        help="batch size",
                        type=int)
    parser.add_argument("--epochs",
                        default=epochs,
                        help="train epochs",
                        type=int)
    parser.add_argument("--vocab_path",
                        default=vocab_path,
                        help="vocab path",
                        type=str)
    parser.add_argument("--learning_rate",
                        default=0.15,
                        help="Learning rate",
                        type=float)
    parser.add_argument(
        "--adagrad_init_acc",
        default=0.1,
        help="Adagrad optimizer initial accumulator value. "
        "Please refer to the Adagrad optimizer API documentation "
        "on tensorflow site for more details.",
        type=float)
    parser.add_argument(
        '--rand_unif_init_mag',
        default=0.02,
        help='magnitude for lstm cells random uniform inititalization',
        type=float)
    parser.add_argument('--eps', default=1e-12, help='eps', type=float)

    parser.add_argument('--trunc_norm_init_std',
                        default=1e-4,
                        help='std of trunc norm init, '
                        'used for initializing everything else',
                        type=float)

    parser.add_argument(
        '--cov_loss_wt',
        default=1.0,
        help='Weight of coverage loss (lambda in the paper).'
        ' If zero, then no incentive to minimize coverage loss.',
        type=float)

    parser.add_argument('--max_grad_norm',
                        default=2.0,
                        help='for gradient clipping',
                        type=float)

    parser.add_argument("--vocab_size",
                        default=50000,
                        help="max vocab size , None-> Max ",
                        type=int)
    parser.add_argument("--max_vocab_size",
                        default=50000,
                        help="max vocab size , None-> Max ",
                        type=int)

    parser.add_argument(
        "--beam_size",
        default=batch_size,
        help=
        "beam size for beam search decoding (must be equal to batch size in decode mode)",
        type=int)
    parser.add_argument("--embed_size",
                        default=300,
                        help="Words embeddings dimension",
                        type=int)
    parser.add_argument("--enc_units",
                        default=128,
                        help="Encoder GRU cell units number",
                        type=int)
    parser.add_argument("--dec_units",
                        default=256,
                        help="Decoder GRU cell units number",
                        type=int)
    parser.add_argument(
        "--attn_units",
        default=256,
        help="[context vector, decoder state, decoder input] feedforward \
                            result dimension - this result is used to compute the attention weights",
        type=int)

    parser.add_argument("--train_seg_x_dir",
                        default=train_x_seg_path,
                        help="train_seg_x_dir",
                        type=str)
    parser.add_argument("--train_seg_y_dir",
                        default=train_y_seg_path,
                        help="train_seg_y_dir",
                        type=str)

    parser.add_argument("--val_seg_x_dir",
                        default=val_x_seg_path,
                        help="val_x_seg_path",
                        type=str)
    parser.add_argument("--val_seg_y_dir",
                        default=val_y_seg_path,
                        help="val_y_seg_path",
                        type=str)

    parser.add_argument("--test_seg_x_dir",
                        default=test_x_seg_path,
                        help="train_seg_x_dir",
                        type=str)
    parser.add_argument("--checkpoint_dir",
                        default=checkpoint_dir,
                        help="checkpoint_dir",
                        type=str)
    parser.add_argument("--checkpoints_save_steps",
                        default=5,
                        help="Save checkpoints every N steps",
                        type=int)
    parser.add_argument("--min_dec_steps",
                        default=4,
                        help="min_dec_steps",
                        type=int)

    parser.add_argument("--max_train_steps",
                        default=500000 / (batch_size / 8),
                        help="max_train_steps",
                        type=int)
    # parser.add_argument("--max_train_steps", default=50, help="max_train_steps", type=int)
    parser.add_argument("--save_batch_train_data",
                        default=False,
                        help="save batch train data to pickle",
                        type=bool)
    parser.add_argument("--load_batch_train_data",
                        default=False,
                        help="load batch train data from pickle",
                        type=bool)
    parser.add_argument("--test_save_dir",
                        default=save_result_dir,
                        help="test_save_dir",
                        type=str)
    parser.add_argument("--pointer_gen",
                        default=True,
                        help="training, eval or test options",
                        type=bool)
    parser.add_argument("--use_coverage",
                        default=True,
                        help="test_save_dir",
                        type=bool)

    parser.add_argument("--greedy_decode",
                        default=False,
                        help="greedy_decode",
                        type=bool)
    parser.add_argument("--result_save_path",
                        default=get_result_filename(batch_size, epochs, 200,
                                                    300),
                        help='result_save_path',
                        type=str)

    parser.add_argument("--max_num_to_eval",
                        default=5,
                        help="max_num_to_eval",
                        type=int)
    parser.add_argument("--num_to_test",
                        default=20000,
                        help="num_to_test",
                        type=int)
    parser.add_argument("--gpu_memory",
                        default=30,
                        help="gpu_memory GB",
                        type=int)

    args = parser.parse_args()
    params = vars(args)
    # print(params)
    if params["mode"] == "train":
        train(params)
    elif params["mode"] == "test":
        params['beam_size'] = 2
        params['batch_size'] = 2
        result_save_path = params['result_save_path']
        predict_result(params, result_save_path)
        # test_and_save(params)
    elif params["mode"] == "eval":
        evaluate(params)
    elif params['mode'] == 'auto':
        # PGN training
        params['mode'] = 'train'

        # params['use_coverage'] = False
        # params['epochs'] = 30
        params['use_coverage'] = True
        params['epochs'] = 30

        train(params)
        # predict result
        params['mode'] = 'test'
        params['beam_size'] = 2
        params['batch_size'] = 2
        result_save_path = params['result_save_path']
        predict_result(params, result_save_path)
        # evaluate
        params['mode'] = 'eval'
        evaluate(params)
Exemple #5
0

def test_and_save(params):
    assert params["test_save_dir"], "provide a dir to save the results"
    gen = test(params)
    results = []
    with tqdm(total=params["num_to_test"], position=0, leave=True) as pbar:
        for i in range(params["num_to_test"]):
            trial = next(gen)
            results.append(trial.abstract)
            pbar.update(1)
    return results


def predict_result(params, result_save_path):
    # 预测结果
    results = test_and_save(params)
    # 保存结果
    save_predict_result(results, result_save_path)



if __name__ == '__main__':
    # 获得参数
    params = get_params()
    params['batch_size'] = 3
    params['beam_size'] = 3
    params['mode'] = 'test'
    result_save_path = get_result_filename(params['batch_size'], 30, 400, 300)
    predict_result(params,result_save_path)