Example #1
0
def evaluate_metrics(model, test_iter):
    model.eval()
    fields = test_iter.dataset.fields
    greedy = Greedy(use_stop=True)
    df = pd.DataFrame(columns=[
        "Answer", "Target Question", "Predited Question", "bleu1", "bleu2",
        "bleu3", "bleu4"
    ])
    pbar = tqdm(test_iter,
                total=len(test_iter),
                unit=' batches',
                disable=hp.tqdm)
    for i, batch in enumerate(pbar):
        greedy.set_maxlen(len(batch.que[1:]))
        outputs, attention = model(batch.ans, greedy)
        seq_len, batch_size, vocab_size = outputs.size()

        preds = outputs.topk(1)[1]
        source = sequence_to_text(batch.ans[:, 0].data, fields['ans'])
        prediction = sequence_to_text(preds[:, 0].data, fields['que'])
        target = sequence_to_text(batch.que[1:, 0].data, fields['que'])
        bleu = calc_bleu([clean(target)], clean(prediction))
        df = df.append(
            {
                "Answer": " ".join(clean(source)),
                "Target Question": " ".join(clean(target)),
                "Predited Question": " ".join(clean(prediction)),
                "bleu1": bleu["bleu1"],
                "bleu2": bleu["bleu2"],
                "bleu3": bleu["bleu3"],
                "bleu4": bleu["bleu4"]
            },
            ignore_index=True)
    df.to_csv("eval_results.csv")
def main():
    valid_dataset = TranslateDataset('valid')

    with open(Config.vocab_file, 'rb') as file:
        data = pickle.load(file)

    # 加载id转为文本 的词表
    src_idx2char = data['dict']['src_idx2char']
    tgt_idx2char = data['dict']['tgt_idx2char']

    src_text, tgt_text = valid_dataset[0]
    src_text = sequence_to_text(src_text, src_idx2char)  # 将id转为文本
    src_text = ' '.join(src_text)
    print('src_text: ' + src_text)

    tgt_text = sequence_to_text(tgt_text, tgt_idx2char)  # 将id转为文本
    tgt_text = ' '.join(tgt_text)
    print('tgt_text: ' + tgt_text)
Example #3
0
def main():
    valid_dataset = Qingyun11wChatDataset('valid')
    # print(valid_dataset[0])

    # 加载词表
    with open(Config.vocab_file, 'rb') as file:
        data = pickle.load(file)

    idx2char = data['dict']['idx2char']

    for i in range(10):
        src_text, tgt_text = valid_dataset[i]
        src_text = sequence_to_text(src_text, idx2char)
        src_text = ''.join(src_text)
        print('src_text: ' + src_text)

        tgt_text = sequence_to_text(tgt_text, idx2char)
        tgt_text = ''.join(tgt_text)
        print('tgt_text: ' + tgt_text)
Example #4
0
def main():
    from utils import sequence_to_text

    valid_dataset = AiChallenger2017Dataset('valid')
    print(valid_dataset[0])

    with open(vocab_file, 'rb') as file:
        data = pickle.load(file)

    src_idx2char = data['dict']['src_idx2char']
    tgt_idx2char = data['dict']['tgt_idx2char']

    src_text, tgt_text = valid_dataset[0]
    src_text = sequence_to_text(src_text, src_idx2char)
    src_text = ' '.join(src_text)
    print('src_text: ' + src_text)

    tgt_text = sequence_to_text(tgt_text, tgt_idx2char)
    tgt_text = ''.join(tgt_text)
    print('tgt_text: ' + tgt_text)
Example #5
0
    import config
    from tqdm import tqdm
    from utils import parse_args, sequence_to_text

    args = parse_args()
    collate_fn = TextMelCollate(config.n_frames_per_step)

    train_dataset = TextMelLoader('train', config)
    print('len(train_dataset): ' + str(len(train_dataset)))

    dev_dataset = TextMelLoader('dev', config)
    print('len(dev_dataset): ' + str(len(dev_dataset)))

    text, mel = train_dataset[0]
    print('text: ' + str(text))
    text = sequence_to_text(text.numpy().tolist())
    text = ''.join(text)
    print('text: ' + str(text))
    print('type(mel): ' + str(type(mel)))

    text_lengths = []
    mel_lengths = []

    for data in tqdm(dev_dataset):
        text, mel = data
        text = sequence_to_text(text.numpy().tolist())
        text = ''.join(text)
        mel = mel.numpy()

        # print('text: ' + str(text))
        # print('mel.size: ' + str(mel.size))
Example #6
0
    char2idx = data['char2idx']
    idx2char = data['idx2char']

    args = parse_args()
    collate_fn = TextMelCollate(config.n_frames_per_step)

    train_dataset = TextMelLoader(training_files, config)
    print('len(train_dataset): ' + str(len(train_dataset)))

    dev_dataset = TextMelLoader(validation_files, config)
    print('len(dev_dataset): ' + str(len(dev_dataset)))

    text, mel = train_dataset[0]
    print('text: ' + str(text))
    text = sequence_to_text(text.numpy().tolist(), idx2char)
    text = ''.join(text)
    print('text: ' + str(text))
    print('type(mel): ' + str(type(mel)))

    text_lengths = []
    mel_lengths = []

    for data in tqdm(dev_dataset):
        text, mel = data
        text = sequence_to_text(text.numpy().tolist(), idx2char)
        text = ''.join(text)
        mel = mel.numpy()

        # print('text: ' + str(text))
        # print('mel.size: ' + str(mel.size))