def evaluate_metrics(model, test_iter): model.eval() fields = test_iter.dataset.fields greedy = Greedy(use_stop=True) df = pd.DataFrame(columns=[ "Answer", "Target Question", "Predited Question", "bleu1", "bleu2", "bleu3", "bleu4" ]) pbar = tqdm(test_iter, total=len(test_iter), unit=' batches', disable=hp.tqdm) for i, batch in enumerate(pbar): greedy.set_maxlen(len(batch.que[1:])) outputs, attention = model(batch.ans, greedy) seq_len, batch_size, vocab_size = outputs.size() preds = outputs.topk(1)[1] source = sequence_to_text(batch.ans[:, 0].data, fields['ans']) prediction = sequence_to_text(preds[:, 0].data, fields['que']) target = sequence_to_text(batch.que[1:, 0].data, fields['que']) bleu = calc_bleu([clean(target)], clean(prediction)) df = df.append( { "Answer": " ".join(clean(source)), "Target Question": " ".join(clean(target)), "Predited Question": " ".join(clean(prediction)), "bleu1": bleu["bleu1"], "bleu2": bleu["bleu2"], "bleu3": bleu["bleu3"], "bleu4": bleu["bleu4"] }, ignore_index=True) df.to_csv("eval_results.csv")
def main(): valid_dataset = TranslateDataset('valid') with open(Config.vocab_file, 'rb') as file: data = pickle.load(file) # 加载id转为文本 的词表 src_idx2char = data['dict']['src_idx2char'] tgt_idx2char = data['dict']['tgt_idx2char'] src_text, tgt_text = valid_dataset[0] src_text = sequence_to_text(src_text, src_idx2char) # 将id转为文本 src_text = ' '.join(src_text) print('src_text: ' + src_text) tgt_text = sequence_to_text(tgt_text, tgt_idx2char) # 将id转为文本 tgt_text = ' '.join(tgt_text) print('tgt_text: ' + tgt_text)
def main(): valid_dataset = Qingyun11wChatDataset('valid') # print(valid_dataset[0]) # 加载词表 with open(Config.vocab_file, 'rb') as file: data = pickle.load(file) idx2char = data['dict']['idx2char'] for i in range(10): src_text, tgt_text = valid_dataset[i] src_text = sequence_to_text(src_text, idx2char) src_text = ''.join(src_text) print('src_text: ' + src_text) tgt_text = sequence_to_text(tgt_text, idx2char) tgt_text = ''.join(tgt_text) print('tgt_text: ' + tgt_text)
def main(): from utils import sequence_to_text valid_dataset = AiChallenger2017Dataset('valid') print(valid_dataset[0]) with open(vocab_file, 'rb') as file: data = pickle.load(file) src_idx2char = data['dict']['src_idx2char'] tgt_idx2char = data['dict']['tgt_idx2char'] src_text, tgt_text = valid_dataset[0] src_text = sequence_to_text(src_text, src_idx2char) src_text = ' '.join(src_text) print('src_text: ' + src_text) tgt_text = sequence_to_text(tgt_text, tgt_idx2char) tgt_text = ''.join(tgt_text) print('tgt_text: ' + tgt_text)
import config from tqdm import tqdm from utils import parse_args, sequence_to_text args = parse_args() collate_fn = TextMelCollate(config.n_frames_per_step) train_dataset = TextMelLoader('train', config) print('len(train_dataset): ' + str(len(train_dataset))) dev_dataset = TextMelLoader('dev', config) print('len(dev_dataset): ' + str(len(dev_dataset))) text, mel = train_dataset[0] print('text: ' + str(text)) text = sequence_to_text(text.numpy().tolist()) text = ''.join(text) print('text: ' + str(text)) print('type(mel): ' + str(type(mel))) text_lengths = [] mel_lengths = [] for data in tqdm(dev_dataset): text, mel = data text = sequence_to_text(text.numpy().tolist()) text = ''.join(text) mel = mel.numpy() # print('text: ' + str(text)) # print('mel.size: ' + str(mel.size))
char2idx = data['char2idx'] idx2char = data['idx2char'] args = parse_args() collate_fn = TextMelCollate(config.n_frames_per_step) train_dataset = TextMelLoader(training_files, config) print('len(train_dataset): ' + str(len(train_dataset))) dev_dataset = TextMelLoader(validation_files, config) print('len(dev_dataset): ' + str(len(dev_dataset))) text, mel = train_dataset[0] print('text: ' + str(text)) text = sequence_to_text(text.numpy().tolist(), idx2char) text = ''.join(text) print('text: ' + str(text)) print('type(mel): ' + str(type(mel))) text_lengths = [] mel_lengths = [] for data in tqdm(dev_dataset): text, mel = data text = sequence_to_text(text.numpy().tolist(), idx2char) text = ''.join(text) mel = mel.numpy() # print('text: ' + str(text)) # print('mel.size: ' + str(mel.size))