def get_reward_funcs(forward_path, backward_path, *args, **kwargs): net_forward = Seq2Seq.load(forward_path, *args, **kwargs) net_backward = Seq2Seq.load(backward_path, *args, **kwargs) minfo = MutualInformation(net_forward, net_backward) eanswer = EaseAnswering(net_forward) iflow = InformationFlow() return minfo, eanswer, iflow
def eval_bs(test_set: Dataset, vocab: Vocab, model: Seq2Seq, params: Params): test_gen = test_set.generator(1, vocab, None, True if params.pointer else False) n_samples = int(params.test_sample_ratio * len(test_set.pairs)) if params.test_save_results and params.model_path_prefix: result_file = tarfile.open(params.model_path_prefix + ".results.tgz", 'w:gz') else: result_file = None model.eval() r1, r2, rl, rsu4 = 0, 0, 0, 0 prog_bar = tqdm(range(1, n_samples + 1)) for i in prog_bar: batch = next(test_gen) scores, file_content = eval_bs_batch(batch, model, vocab, pack_seq=params.pack_seq, beam_size=params.beam_size, min_out_len=params.min_out_len, max_out_len=params.max_out_len, len_in_words=params.out_len_in_words, details=result_file is not None) if file_content: file_content = file_content.encode('utf-8') file_info = tarfile.TarInfo(name='%06d.txt' % i) file_info.size = len(file_content) result_file.addfile(file_info, fileobj=BytesIO(file_content)) if scores: r1 += scores[0]['1_f'] r2 += scores[0]['2_f'] rl += scores[0]['l_f'] rsu4 += scores[0]['su4_f'] prog_bar.set_postfix(R1='%.4g' % (r1 / i * 100), R2='%.4g' % (r2 / i * 100), RL='%.4g' % (rl / i * 100), RSU4='%.4g' % (rsu4 / i * 100))
def main(): args = parse_arguments() hidden_size = 100 embed_size = 50 print("[!] preparing dataset...") train_iter, val_iter, test_iter, src, trg = load_dataset(args.batch_size) src_size, trg_size = len(src.vocab), len(trg.vocab) print("[TRAIN]:%d (dataset:%d)\t[TEST]:%d (dataset:%d)" % (len(train_iter), len( train_iter.dataset), len(test_iter), len(test_iter.dataset))) print("[src_vocab]:%d [trg_vocab]:%d" % (src_size, trg_size)) print("[!] Instantiating models...") encoder = Encoder(src_size, embed_size, hidden_size, n_layers=2, dropout=0.5) decoder = Decoder(embed_size, hidden_size, trg_size, n_layers=1, dropout=0.5) if cuda: seq2seq = Seq2Seq(encoder, decoder).cuda() else: seq2seq = Seq2Seq(encoder, decoder) optimizer = optim.Adam(seq2seq.parameters(), lr=args.lr) print(seq2seq) best_val_loss = None for e in range(1, args.epochs + 1): train(e, seq2seq, optimizer, train_iter, trg_size, args.grad_clip, src, trg) val_loss = evaluate(seq2seq, val_iter, trg_size, src, trg) print("[Epoch:%d] val_loss:%5.3f | val_pp:%5.2fS" % (e, val_loss, math.exp(val_loss))) # Save the model if the validation loss is the best we've seen so far. if not best_val_loss or val_loss < best_val_loss: print("[!] saving model...") if not os.path.isdir(".save"): os.makedirs(".save") torch.save(seq2seq.state_dict(), './.save/seq2seq_%d.pt' % (e)) best_val_loss = val_loss sos = trg.vocab.stoi['<sos>'] eos = trg.vocab.stoi['<eos>'] index2str = {} for (key, value) in trg.vocab.stoi.items(): index2str[value] = key write_predict(seq2seq, src.vocab.stoi, sos, eos, index2str)
def get_model(input_vocab_size, output_vocab_size): return Seq2Seq(input_vocab_size, output_vocab_size, get_buckets(FLAGS.buckets), FLAGS.layer_size, FLAGS.n_layers, FLAGS.max_gradient_norm, FLAGS.batch_size, FLAGS.learning_rate, FLAGS.learning_rate_decay_factor, get_rnn_cell(FLAGS.rnn_cell), FLAGS.n_samples, FLAGS.forward_only)
def experiment_fn(run_config, params): seq2seq = Seq2Seq() estimator = tf.estimator.Estimator(model_fn=seq2seq.model_fn, model_dir=Config.train.model_dir, params=params, config=run_config) vocab = data_loader.load_vocab("vocab") Config.data.vocab_size = len(vocab) train_X, test_X, train_y, test_y = data_loader.make_train_and_test_set() train_input_fn, train_input_hook = dataset.get_train_inputs( train_X, train_y) test_input_fn, test_input_hook = dataset.get_test_inputs(test_X, test_y) experiment = tf.contrib.learn.Experiment( estimator=estimator, train_input_fn=train_input_fn, eval_input_fn=test_input_fn, train_steps=Config.train.train_steps, min_eval_frequency=Config.train.min_eval_frequency, train_monitors=[ train_input_hook, hook.print_variables( variables=['train/enc_0', 'train/dec_0', 'train/pred_0'], vocab=vocab, every_n_iter=Config.train.check_hook_n_iter) ], eval_hooks=[test_input_hook]) return experiment
def test(dialog, batch_size=100): print("\n=== 예측 테스트 ===") model = Seq2Seq(dialog.voc_size) with tf.Session() as sess: # 모델을 읽어온다. ckpt = tf.train.get_checkpoint_state('./model') print("다음 파일에서 모델을 읽는 중 입니다..", ckpt.model_checkpoint_path) model.saver.restore(sess, ckpt.model_checkpoint_path) enc_input, dec_input, targets = dialog.next_batch(batch_size) expect, outputs, accuracy = model.test(sess, enc_input, dec_input, targets) expect = dialog.decode(expect) outputs = dialog.decode(outputs) pick = random.randrange(0, len(expect) / 2) input = dialog.decode([dialog.seq_data[pick * 2]], True) expect = dialog.decode([dialog.seq_data[pick * 2 + 1]], True) outputs = dialog.cut_eos(outputs[pick]) print("\n정확도:", accuracy) print("랜덤 결과\n") print(" 입력값:", input) print(" 실제값:", expect) print(" 예측값:", ' '.join(outputs))
def train_txt_generator(): seq2seq = Seq2Seq(vocab_size, hidden_size, embedding_size) optimizer = opt.Adam(seq2seq.parameters(), lr=5e-4) num_epoch = 1 for i in range(num_epoch): print('epoch {}/{}'.format(i + 1, num_epoch)) shuffle_indices = np.random.permutation(np.arange(data_size)) mr_ = mr[shuffle_indices] lengths_ = mr_lengths[shuffle_indices] ref_ = ref[shuffle_indices] for j in range(1): start = j * batch_size end = min(data_size, (j + 1) * batch_size) y = seq2seq.forward(torch.LongTensor(mr_[start:end]), torch.LongTensor(ref_[start:end]), torch.LongTensor(lengths_[start:end])) ref_gt = np.array(ref[start:end], dtype=int) tgt = torch.tensor(np.eye(vocab_size)[ref_gt]) loss = -torch.sum(torch.mul(torch.log(y)[:, :-1, :], tgt[:, 1:, :])) \ - torch.sum(torch.mul(torch.log(1-y)[:, :-1, :], 1-tgt[:, 1:, :])) optimizer.zero_grad() loss.backward() optimizer.step() print(loss) if (j+1) % 200 == 0: torch.save(seq2seq.state_dict(), 'checkpoint/s2s-' + str(j + 1) + '-parameter.pkl')
def __init__(self, tokenizer, model_path, config_file=None, embed_size=256, hidden_size=256, n_layers=2, device='cuda'): self.tokenizer = tokenizer self.device = device if config_file: with open(config_file, 'r') as f: config = json.load(f) self.embed_size = config['embed_size'] self.hidden_size = config['hidden_size'] self.n_layers = config['n_layers'] else: self.embed_size = embed_size self.hidden_size = hidden_size self.n_layers = n_layers self.model = Seq2Seq(len(self.tokenizer), hidden_size, embed_size, n_layers=n_layers, device=device) self.model.load_state_dict(torch.load(model_path), strict=False) self.model.to(self.device) self.model.eval()
def test(dialog): print("\n=== 예측 테스트 ===") model = Seq2Seq(dialog.vocab_size) with tf.Session() as sess: ckpt = tf.train.get_checkpoint_state(FLAGS.train_dir) print("다음 파일에서 모델을 읽는 중 입니다..", ckpt.model_checkpoint_path) model.saver.restore(sess, ckpt.model_checkpoint_path) enc_input, dec_input, targets = dialog.make_batch() pick = random.randrange(0, len(enc_input)) expect, outputs, accuracy = model.test(sess, [enc_input[pick]], [dec_input[pick]], [targets[pick]]) expect = dialog.decode(expect) outputs = dialog.decode(outputs) input = dialog.decode([dialog.examples[pick]], True) expect = dialog.decode([dialog.examples[pick]], True) #outputs = dialog.cut_eos(outputs[0]) print("\n정확도:", accuracy) print("랜덤 결과\n") print(" 입력값:", input) print(" 실제값:", expect) print(" 예측값:", outputs)
def build_model(model_class, config, tokenizer): encoder = model_class(config=config) decoder_layer = nn.TransformerDecoderLayer( d_model=config.hidden_size, nhead=config.num_attention_heads) decoder = nn.TransformerDecoder(decoder_layer, num_layers=6) model = Seq2Seq( encoder=encoder, decoder=decoder, config=config, beam_size=args.beam_size, max_length=args.max_target_length, sos_id=tokenizer.cls_token_id, eos_id=tokenizer.sep_token_id, ) assert os.path.exists("pytorch_model.bin"), "Weight is not downloaded." model.load_state_dict( torch.load( "pytorch_model.bin", map_location=torch.device("cpu"), ), strict=False, ) return model
def train(dialog, batch_size=100, epoch=100): model = Seq2Seq(dialog.vocab_size) with tf.Session() as sess: ckpt = tf.train.get_checkpoint_state(FLAGS.train_dir) if ckpt and tf.train.checkpoint_exists(ckpt.model_checkpoint_path): print("다음 파일에서 모델을 읽는 중 입니다..", ckpt.model_checkpoint_path) model.saver.restore(sess, ckpt.model_checkpoint_path) else: print("새로운 모델을 생성하는 중 입니다.") sess.run(tf.global_variables_initializer()) writer = tf.summary.FileWriter(FLAGS.log_dir, sess.graph) total_batch = int(math.ceil(len(dialog.examples) / float(batch_size))) for step in range(total_batch * epoch): # option enc_input, dec_input, targets = dialog.next_batch(batch_size) # train _, loss = model.train(sess, enc_input, dec_input, targets) checkpoint_path = os.path.join(FLAGS.train_dir, FLAGS.ckpt_name) model.saver.save(sess, checkpoint_path, global_step=model.global_step) print('최적화 완료!')
def test(dialog, batch_size=100): print "\n=== 예측 테스트 ===" model = Seq2Seq(dialog.vocab_size) with tf.Session() as sess: ckpt = tf.train.get_checkpoint_state(FLAGS.train_dir) print "다음 파일에서 모델을 읽는 중 입니다..", ckpt.model_checkpoint_path model.saver.restore(sess, ckpt.model_checkpoint_path) enc_input, dec_input, targets = dialog.next_batch(batch_size) expect, outputs, accuracy = model.test(sess, enc_input, dec_input, targets) expect = dialog.decode(expect) outputs = dialog.decode(outputs) pick = random.randrange(0, len(expect) / 2) input = dialog.decode([dialog.examples[pick * 2]], True) expect = dialog.decode([dialog.examples[pick * 2 + 1]], True) outputs = dialog.cut_eos(outputs[pick]) print "\n정확도:", accuracy print "랜덤 결과\n", print " 입력값:", input print " 실제값:", expect print " 예측값:", ' '.join(outputs)
def main(fpath): ENC_EMB_DIM = 256 DEC_EMB_DIM = 256 ENC_HID_DIM = 512 DEC_HID_DIM = 512 ENC_DROPOUT = 0.5 DEC_DROPOUT = 0.5 device = torch.device('cuda') dataset = Dataset() INPUT_DIM = len(dataset.SRC.vocab) OUTPUT_DIM = len(dataset.TRG.vocab) SRC_PAD_IDX = dataset.SRC.vocab.stoi[dataset.SRC.pad_token] encoder = Encoder(INPUT_DIM, ENC_EMB_DIM, ENC_HID_DIM, DEC_HID_DIM, ENC_DROPOUT) attention = Attention(ENC_HID_DIM, DEC_HID_DIM) decoder = Decoder(DEC_EMB_DIM, ENC_HID_DIM, DEC_HID_DIM, OUTPUT_DIM, DEC_DROPOUT, attention) model = Seq2Seq(encoder, decoder, SRC_PAD_IDX, device) model.load_state_dict(torch.load("best_model.pt")) model.to(device) with open(fpath, "r") as f: sentences = f.readlines() translate_sentence(model, sentences, dataset.SRC, dataset.TRG, device)
def test(dialog, batch_size=100): print "\n=== 测试 ===" model = Seq2Seq(dialog.vocab_size) with tf.Session() as sess: ckpt = tf.train.get_checkpoint_state(FLAGS.train_dir) print "模型检查点位置.", ckpt.model_checkpoint_path model.saver.restore(sess, ckpt.model_checkpoint_path) enc_input, dec_input, targets = dialog.next_batch(batch_size) expect, outputs, accuracy = model.test(sess, enc_input, dec_input, targets) expect = dialog.decode(expect) outputs = dialog.decode(outputs) pick = random.randrange(0, len(expect) / 2) input = dialog.decode([dialog.examples[pick * 2]], True) expect = dialog.decode([dialog.examples[pick * 2 + 1]], True) outputs = dialog.cut_eos(outputs[pick]) print "\n准确率:", accuracy print "数据展示\n", print " 输入数据:", input print " 答案:", expect print " 实际输出:", ' '.join(outputs)
def _load_model(self): print('Loading pretrained model') if self.config['model']['seq2seq'] == 'vanilla': print('Loading Seq2Seq Vanilla model') self.model = Seq2Seq( src_emb_dim=self.config['model']['dim_word_src'], trg_emb_dim=self.config['model']['dim_word_trg'], src_vocab_size=len(self.src_dict), trg_vocab_size=len(self.tgt_dict), src_hidden_dim=self.config['model']['dim_src'], trg_hidden_dim=self.config['model']['dim_trg'], pad_token_src=self.src_dict['<pad>'], pad_token_trg=self.tgt_dict['<pad>'], use_cuda=self.use_cuda, batch_size=self.config['data']['batch_size'], bidirectional=self.config['model']['bidirectional'], nlayers=self.config['model']['n_layers_src'], nlayers_trg=self.config['model']['n_layers_trg'], dropout=0., ) if self.use_cuda: self.model = self.model.cuda() self.model.load_state_dict( self.model_weights )
def train(dialog, batch_size=100, epoch=100): model = Seq2Seq(dialog.vocab_size) with tf.Session() as sess: # TODO: 세션을 로드하고 로그를 위한 summary 저장등의 로직을 Seq2Seq 모델로 넣을 필요가 있음 ckpt = tf.train.get_checkpoint_state(FLAGS.train_dir) if ckpt and tf.train.checkpoint_exists(ckpt.model_checkpoint_path): print("다음 파일에서 모델을 읽는 중 입니다..", ckpt.model_checkpoint_path) model.saver.restore(sess, ckpt.model_checkpoint_path) else: print("새로운 모델을 생성하는 중 입니다.") sess.run(tf.global_variables_initializer()) writer = tf.summary.FileWriter(FLAGS.log_dir, sess.graph) total_batch = int(math.ceil(len(dialog.examples)/float(batch_size))) for step in range(total_batch*epoch): enc_input, dec_input, targets = dialog.next_batch(batch_size) _, loss = model.train(sess, enc_input, dec_input, targets) if (step + 1) % 100 == 0: model.write_logs(sess, writer, enc_input, dec_input, targets) print('Step:', '%06d' % model.global_step.eval(), 'cost =', '{:.6f}'.format(loss)) checkpoint_path = os.path.join(FLAGS.train_dir, FLAGS.ckpt_name) model.saver.save(sess, checkpoint_path, global_step=model.global_step) print('최적화 완료!')
def train(dialog, batch_size=100, epoch=100): model = Seq2Seq(dialog.vocab_size) with tf.Session() as sess: # TODO: 加载一个会话 可以利用summary 恢复模型数据 ckpt = tf.train.get_checkpoint_state(FLAGS.train_dir) if ckpt and tf.train.checkpoint_exists(ckpt.model_checkpoint_path): print "模型检查点位置..", ckpt.model_checkpoint_path model.saver.restore(sess, ckpt.model_checkpoint_path) else: print "初始化会话" sess.run(tf.global_variables_initializer()) writer = tf.summary.FileWriter(FLAGS.log_dir, sess.graph) total_batch = int(math.ceil(len(dialog.examples) / float(batch_size))) for step in range(total_batch * epoch): enc_input, dec_input, targets = dialog.next_batch(batch_size) _, loss = model.train(sess, enc_input, dec_input, targets) if (step + 1) % 100 == 0: model.write_logs(sess, writer, enc_input, dec_input, targets) print 'Step:', '%06d' % model.global_step.eval(),\ 'cost =', '{:.6f}'.format(loss) # saver 用于保存和加载数据 checkpoint_path = os.path.join(FLAGS.train_dir, FLAGS.ckpt_name) model.saver.save(sess, checkpoint_path, global_step=model.global_step) print '训练完成!'
def train(dialog, batch_size=100, epoch=100): model = Seq2Seq(dialog.voc_size) with tf.Session() as sess: # 모델을 읽어온다. 없으면 새로 만든다. ckpt = tf.train.get_checkpoint_state('./model') if ckpt and tf.train.checkpoint_exists(ckpt.model_checkpoint_path): print("다음 파일에서 모델을 읽는 중 입니다..", ckpt.model_checkpoint_path) model.saver.restore(sess, ckpt.model_checkpoint_path) else: print("새로운 모델을 생성하는 중 입니다.") sess.run(tf.global_variables_initializer()) # 학습시작. total_batch = int(math.ceil(len(dialog.seq_data)/float(batch_size))) for step in range(total_batch * epoch): enc_input, dec_input, targets = dialog.next_batch(batch_size) _, loss = model.train(sess, enc_input, dec_input, targets) if step % 100 == 0: print('cost = ', loss) # 학습된 모델을 저장한다. checkpoint_path = os.path.join('./model', 'conversation.ckpt') model.saver.save(sess, checkpoint_path, global_step=model.global_step) print('최적화 완료!')
def test(dialog, batch_size=100): print("predition test") model = Seq2Seq(dialog.vocab_size) with tf.Session() as sess: ckpt = tf.train.get_checkpoint_state(FLAGS.train_dir) print("reading trained model..", ckpt.model_checkpoint_path) model.saver.restore(sess, ckpt.model_checkpoint_path) enc_input, dec_input, targets = dialog.next_batch(batch_size) expect, output, accuracy = model.test(sess, enc_input, dec_input, targets) expect = dialog.decode(expect) output = dialog.decode(output) pick = random.randrange(0, len(expect) / 2) input = dialog.decode([dialog.examples[pick * 2]], True) expect = dialog.decode([dialog.examples[pick * 2 + 1]], True) output = dialog.cut_eos(output[pick]) print("\naccuracy:", accuracy) print("result") print(" input:", input) print(" expect:", expect) print(" predict:", ' '.join(output))
def train(dialog, batch_size=100, epoch=100): model = Seq2Seq(dialog.vocab_size) with tf.Session() as sess: ckpt = tf.train.get_checkpoint_state(FLAGS.train_dir) if ckpt and tf.train.checkpoint_exists(ckpt.model_checkpoint_path): print("read model from existed one") model.saver.restore(sess, ckpt.model_checkpoint_path) else: print("creating new model") sess.run(tf.global_variables_initializer()) # writer = tf.summary.FileWriter(FLAGS.log_dir, sess.graph) total_batch = int(math.ceil(len(dialog.examples) / float(batch_size))) for step in range(total_batch * epoch): enc_input, dec_input, targets = dialog.next_batch(batch_size) _, loss = model.train(sess, enc_input, dec_input, targets) if (step + 1) % 100 == 0: print('Step:', '%06d' % model.global_step.eval(), 'Cost =', '{:.6f}'.format(loss)) checkpoint_path = os.path.join(FLAGS.train_dir, FLAGS.ckpt_name) model.saver.save(sess, checkpoint_path, global_step=model.global_step) print("training complete.")
def __init__(self, tokenizer, embed_size=256, hidden_size=256, n_layers=1, lr=2e-5, dropout=0.5, tf_board_dir='./tfboard_log'): # tokenizer self.tokenizer = tokenizer # model self.model = Seq2Seq(len(self.tokenizer), hidden_size, embed_size, n_layers=n_layers, dropout=dropout).to(DEVICE) self.model.apply(self.init_weights) self.model = nn.DataParallel(self.model) # tfboard & log self.writer = SummaryWriter(tf_board_dir) self.log = logging.getLogger('Trainer') # self.log.setLevel(logging.INFO) self.log.warning(f'CUDA count: {torch.cuda.device_count()}') # parameters self.hidden_size = hidden_size self.embed_size = embed_size # optimizer & criterion parameters_num = sum(p.numel() for p in self.model.parameters() if p.requires_grad) self.criterion = nn.CrossEntropyLoss(ignore_index=0) self.optimizer = torch.optim.Adam(self.model.parameters(), lr=lr) self.log.warning(f'trainable parameters: {parameters_num}')
def test(): du = DataLoader(**data_config) params['src_vcb_size'] = du.vocab_size params['tgt_vcb_size'] = du.vocab_size params['batch_size'] = 1 tf.reset_default_graph() config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False, gpu_options=tf.GPUOptions(allow_growth=True)) with tf.Session(config=config) as sess: model = Seq2Seq(params, mode='decode') sess.run(tf.global_variables_initializer()) model.load(sess, tf.train.latest_checkpoint('./logs/model/')) # model.load(sess, './logs/model/model_16.ckpt') # sent = input('you: ') # while (sent): # result = model.get_response(sess, sent, du) # print('bot: ', result) # # sent = input('you: ') sents = [('天王盖地虎', '宝塔镇妖河')] for sent in sents: result = model.get_response(sess, sent[0], du) print('source : ', sent[0]) print('target : ', sent[1]) print('predict: ', result) print('')
def build_model(config, input_vocab_size, target_vocab_size): embed_size = config['embed_size'] hidden_size = config['hidden_size'] proj_size = config['proj_size'] enc_num_layers = config['enc_num_layers'] dec_num_layers = config['dec_num_layers'] dropout = config['dropout'] attn_type = config['attn_type'] self_attn = config['self_attn'] intra_temp_attn = config['intra_temp_attn'] dec_attn = config['dec_attn'] if self_attn or intra_temp_attn: dec_attn = True encoder = Encoder(input_vocab_size, embed_size, hidden_size, enc_num_layers, dropout) decoder = Decoder(target_vocab_size, embed_size, hidden_size, dec_num_layers, proj_size, dropout, attn_type=attn_type, self_attn=self_attn, dec_attn=dec_attn, intra_temp_attn=intra_temp_attn) model = Seq2Seq(encoder, decoder).to(device) return model
def train_batch(batch: Batch, model: Seq2Seq, criterion, optimizer, *, pack_seq=True, forcing_ratio=0.5, partial_forcing=True, rl_ratio: float = 0, vocab=None, grad_norm: float = 0): if not pack_seq: input_lengths = None else: input_lengths = batch.input_lengths optimizer.zero_grad() input_tensor = batch.input_tensor.to(DEVICE) target_tensor = batch.target_tensor.to(DEVICE) ext_vocab_size = batch.ext_vocab_size out = model(input_tensor, target_tensor, input_lengths, criterion, forcing_ratio=forcing_ratio, partial_forcing=partial_forcing, ext_vocab_size=ext_vocab_size) if rl_ratio > 0: assert vocab is not None sample_out = model(input_tensor, saved_out=out, criterion=criterion, sample=True, ext_vocab_size=ext_vocab_size) baseline_out = model(input_tensor, saved_out=out, visualize=False, ext_vocab_size=ext_vocab_size) scores = eval_batch_output([ex.tgt for ex in batch.examples], vocab, batch.oov_dict, sample_out.decoded_tokens, baseline_out.decoded_tokens) greedy_rouge = scores[1]['l_f'] neg_reward = greedy_rouge - scores[0]['l_f'] # if sample > baseline, the reward is positive (i.e. good exploration), rl_loss is negative rl_loss = neg_reward * sample_out.loss loss = (1 - rl_ratio) * out.loss + rl_ratio * rl_loss else: loss = out.loss greedy_rouge = None loss.backward() if grad_norm > 0: clip_grad_norm_(model.parameters(), grad_norm) optimizer.step() target_length = target_tensor.size(0) return loss.item() / target_length, greedy_rouge
def test_by_human(): # test the result from .de to .en args = parse_arguments() hidden_size = 512 embed_size = 256 assert torch.cuda.is_available() print("[!] preparing dataset for test ...") train_iter, val_iter, test_iter, DE, EN = load_dataset(args.batch_size) de_size, en_size = len(DE.vocab), len(EN.vocab) # load the model encoder = Encoder(de_size, embed_size, hidden_size, n_layers=2, dropout=0.5) decoder = Decoder(embed_size, hidden_size, en_size, n_layers=1, dropout=0.5) seq2seq = Seq2Seq(encoder, decoder).cuda() seq2seq.load_state_dict(torch.load('.save/seq2seq_21.pt')) # only decoder 1 batch sents for b, batch in enumerate(train_iter): src, len_src = batch.src trg, len_trg = batch.trg src, trg = src.cuda(), trg.cuda() # do not use force teaching, just use the maximum possibility output = seq2seq(src, trg, 0) output = output.transpose(0, 1) # (B*T*N) src = src.transpose(0, 1) # (B*T) # src for source, result in zip(src, output): print('German: : ') print(' ', end=' ') for word in source: if DE.vocab.itos[word] in ["<pad>", "<sos>", "<unk>", "<eos>"]: continue print(DE.vocab.itos[word], end=' ') # print(word, end=' ') print() print('English: ') print(' ', end=' ') for word in result: _, index = word.max(0) if EN.vocab.itos[index] in [ "<pad>", "<sos>", "<unk>", "<eos>" ]: continue print(EN.vocab.itos[index], end=' ') # print('test ...', word) print() print("[!] End the testing ...") break
def initialize(self): sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8') self.tagger = MeCab.Tagger('-Owakati') self.id2word = json.load(open(self.dict_i2w, "r")) self.id2word = {int(key): value for key, value in self.id2word.items()} self.word2id = json.load(open(self.dict_w2i, "r")) self.model = Seq2Seq(input_words=len(self.word2id), train=False) chainer.serializers.load_npz(self.modelname, self.model)
def load_model(PATH: str, src_text, trg_text): attention = PATH[:PATH.index('E')] embedding_size = int(PATH[PATH.index('E') + 1:PATH.index('H')]) hidden_size = int(PATH[PATH.index('H') + 1:PATH.index('.')]) model = Seq2Seq(src_text, embedding_size, hidden_size, trg_text, attention) model.load_state_dict(torch.load(PATH)) model.eval() return model
def __init__(self, opt): super(Seq2SeqInstructor, self).__init__(opt) self.seq2seq = Seq2Seq(opt.vocab_size, opt.embed_dim, opt.num_hiddens, opt.num_layers, opt.drop_prob) if cfg.CUDA: self.seq2seq = self.seq2seq.cuda() self.optimizer = torch.optim.Adam(self.seq2seq.parameters(), lr=opt.lr) self.loss = nn.CrossEntropyLoss(reduction='none')
def model_load(self): encoder = Encoder(**self.checkpoint['encoder_parameter']) decoder = AttentionDecoder(**self.checkpoint['decoder_parameter']) model = Seq2Seq(encoder, decoder, self.seq_len, self.get_attention) model.load_state_dict(self.checkpoint['model_state_dict']) model.to(device) model.eval() return model
def train_module(): #train_data_set = DataSet('train.txt') train_data_set = DataSet('smalltrain.txt') #dev_data_set = DataSet('val.txt') dev_data_set = DataSet('smallval.txt') model = Seq2Seq() model.to(cfg.device) trainIters(train_data_set, dev_data_set, model)