batch_ctxs = batch_data["X"].tolist() batch_floors = batch_data["X_floor"].tolist() for idx in range(len(batch_ctxs)): ctx_seq = batch_ctxs[idx] ctx_floors = batch_floors[idx] ctx_lst = [] for uttr_idx in range(len(ctx_seq)): ctx = ctx_seq[uttr_idx] if ctx[0] == tokenizer.pad_token_id: continue ctx = tokenizer.convert_ids_to_tokens(ids=ctx, trim_bos=True, trim_from_eos=True, trim_pad=True) ctx = tokenizer.convert_tokens_to_string(ctx) ctx_floor = ctx_floors[uttr_idx] ctx_floor = "A" if ctx_floor == 1 else "B" ctx_lst.append((ctx, ctx_floor)) ctxs.append(ctx_lst) batch_refs = batch_data["Y"].tolist() batch_floors = batch_data["Y_floor"].tolist() for idx in range(len(batch_refs)): ref = batch_refs[idx] ref = tokenizer.convert_ids_to_tokens(ids=ref, trim_bos=True, trim_from_eos=True, trim_pad=True) ref = tokenizer.convert_tokens_to_string(ref) ref_floor = "A" if batch_floors[idx] == 1 else "B"
batch_data = test_sample_data_source.next(1) log_s = "context:\n" context = batch_data["X"].tolist()[0] context_floors = batch_data["X_floor"].tolist()[0] for uttr, floor in zip(context, context_floors): if uttr[0] == tokenizer.pad_token_id: continue uttr = tokenizer.convert_ids_to_tokens( ids=uttr, trim_bos=True, trim_from_eos=True, trim_pad=True) floor = "A" if floor == 1 else "B" log_s += " {}: {}\n".format( floor, tokenizer.convert_tokens_to_string(uttr)) mlog(log_s) log_s = "ref text:\n" floor = batch_data["Y_floor"][0].item() floor = "A" if floor == 1 else "B" uttr = batch_data["Y"][0].tolist() uttr = tokenizer.convert_ids_to_tokens(ids=uttr, trim_bos=True, trim_from_eos=True, trim_pad=True) log_s += " {}: {}\n".format( floor, tokenizer.convert_tokens_to_string(uttr)) mlog(log_s) log_s = "hyp text:\n"
log_s = "<Test> - Samples:" mlog(log_s) for sample_idx in range(5): ret_data, ret_stat = model.sample_step(batch_size=1) log_s = "hyp text:\n" hyp = ret_data["symbols"][0].tolist() hyp = tokenizer.convert_ids_to_tokens( ids=hyp, trim_bos=True, trim_from_eos=True, trim_pad=True, ) log_s += " {}\n".format( tokenizer.convert_tokens_to_string(hyp) ) log_s += "="*30 mlog(log_s) # Evaluation on dev dataset if n_step > 0 and n_step % config.validate_after_n_step == 0: model.eval() log_s = f"<Dev> learning rate: {lr}\n" mlog(log_s) dev_data_source.epoch_init(shuffle=False) while True: batch_data = dev_data_source.next(config.eval_batch_size) if batch_data is None:
log_s = "context:\n" context = batch_data["X"].tolist()[0] context_floors = batch_data["X_floor"].tolist()[0] for uttr, floor in zip(context, context_floors): if uttr[0] == tokenizer.pad_token_id: continue uttr = tokenizer.convert_ids_to_tokens( ids=uttr, trim_bos=True, trim_from_eos=True, trim_pad=True, ) floor = "A" if floor == 1 else "B" log_s += " {}: {}\n".format( floor, tokenizer.convert_tokens_to_string(uttr)) mlog(log_s) log_s = "response:\n" floor = batch_data["Y_floor"][0].item() floor = "A" if floor == 1 else "B" uttr = batch_data["Y"][0].tolist() uttr = tokenizer.convert_ids_to_tokens( ids=uttr, trim_bos=True, trim_from_eos=True, trim_pad=True, ) log_s += " {}: {}\n".format( floor, tokenizer.convert_tokens_to_string(uttr)) mlog(log_s)