Example #1
0
def get_reward_funcs(forward_path, backward_path, *args, **kwargs):
    net_forward = Seq2Seq.load(forward_path, *args, **kwargs)
    net_backward = Seq2Seq.load(backward_path, *args, **kwargs)
    minfo = MutualInformation(net_forward, net_backward)
    eanswer = EaseAnswering(net_forward)
    iflow = InformationFlow()
    return minfo, eanswer, iflow
Example #2
0
def eval_bs(test_set: Dataset, vocab: Vocab, model: Seq2Seq, params: Params):
    test_gen = test_set.generator(1, vocab, None, True if params.pointer else False)
    n_samples = int(params.test_sample_ratio * len(test_set.pairs))

    if params.test_save_results and params.model_path_prefix:
        result_file = tarfile.open(params.model_path_prefix + ".results.tgz", 'w:gz')
    else:
        result_file = None

    model.eval()
    r1, r2, rl, rsu4 = 0, 0, 0, 0
    prog_bar = tqdm(range(1, n_samples + 1))
    for i in prog_bar:
        batch = next(test_gen)
        scores, file_content = eval_bs_batch(batch, model, vocab, pack_seq=params.pack_seq,
                                             beam_size=params.beam_size,
                                             min_out_len=params.min_out_len,
                                             max_out_len=params.max_out_len,
                                             len_in_words=params.out_len_in_words,
                                             details=result_file is not None)
        if file_content:
            file_content = file_content.encode('utf-8')
            file_info = tarfile.TarInfo(name='%06d.txt' % i)
            file_info.size = len(file_content)
            result_file.addfile(file_info, fileobj=BytesIO(file_content))
        if scores:
            r1 += scores[0]['1_f']
            r2 += scores[0]['2_f']
            rl += scores[0]['l_f']
            rsu4 += scores[0]['su4_f']
            prog_bar.set_postfix(R1='%.4g' % (r1 / i * 100), R2='%.4g' % (r2 / i * 100),
                                 RL='%.4g' % (rl / i * 100), RSU4='%.4g' % (rsu4 / i * 100))
Example #3
0
def main():
    args = parse_arguments()
    hidden_size = 100
    embed_size = 50

    print("[!] preparing dataset...")
    train_iter, val_iter, test_iter, src, trg = load_dataset(args.batch_size)
    src_size, trg_size = len(src.vocab), len(trg.vocab)
    print("[TRAIN]:%d (dataset:%d)\t[TEST]:%d (dataset:%d)" %
          (len(train_iter), len(
              train_iter.dataset), len(test_iter), len(test_iter.dataset)))
    print("[src_vocab]:%d [trg_vocab]:%d" % (src_size, trg_size))

    print("[!] Instantiating models...")
    encoder = Encoder(src_size,
                      embed_size,
                      hidden_size,
                      n_layers=2,
                      dropout=0.5)
    decoder = Decoder(embed_size,
                      hidden_size,
                      trg_size,
                      n_layers=1,
                      dropout=0.5)
    if cuda:
        seq2seq = Seq2Seq(encoder, decoder).cuda()
    else:
        seq2seq = Seq2Seq(encoder, decoder)
    optimizer = optim.Adam(seq2seq.parameters(), lr=args.lr)
    print(seq2seq)

    best_val_loss = None
    for e in range(1, args.epochs + 1):
        train(e, seq2seq, optimizer, train_iter, trg_size, args.grad_clip, src,
              trg)
        val_loss = evaluate(seq2seq, val_iter, trg_size, src, trg)
        print("[Epoch:%d] val_loss:%5.3f | val_pp:%5.2fS" %
              (e, val_loss, math.exp(val_loss)))

        # Save the model if the validation loss is the best we've seen so far.
        if not best_val_loss or val_loss < best_val_loss:
            print("[!] saving model...")
            if not os.path.isdir(".save"):
                os.makedirs(".save")
            torch.save(seq2seq.state_dict(), './.save/seq2seq_%d.pt' % (e))
            best_val_loss = val_loss
    sos = trg.vocab.stoi['<sos>']
    eos = trg.vocab.stoi['<eos>']
    index2str = {}
    for (key, value) in trg.vocab.stoi.items():
        index2str[value] = key
    write_predict(seq2seq, src.vocab.stoi, sos, eos, index2str)
Example #4
0
def get_model(input_vocab_size, output_vocab_size):
    return Seq2Seq(input_vocab_size, output_vocab_size,
                   get_buckets(FLAGS.buckets), FLAGS.layer_size,
                   FLAGS.n_layers, FLAGS.max_gradient_norm, FLAGS.batch_size,
                   FLAGS.learning_rate, FLAGS.learning_rate_decay_factor,
                   get_rnn_cell(FLAGS.rnn_cell), FLAGS.n_samples,
                   FLAGS.forward_only)
Example #5
0
def experiment_fn(run_config, params):

    seq2seq = Seq2Seq()
    estimator = tf.estimator.Estimator(model_fn=seq2seq.model_fn,
                                       model_dir=Config.train.model_dir,
                                       params=params,
                                       config=run_config)

    vocab = data_loader.load_vocab("vocab")
    Config.data.vocab_size = len(vocab)

    train_X, test_X, train_y, test_y = data_loader.make_train_and_test_set()

    train_input_fn, train_input_hook = dataset.get_train_inputs(
        train_X, train_y)
    test_input_fn, test_input_hook = dataset.get_test_inputs(test_X, test_y)

    experiment = tf.contrib.learn.Experiment(
        estimator=estimator,
        train_input_fn=train_input_fn,
        eval_input_fn=test_input_fn,
        train_steps=Config.train.train_steps,
        min_eval_frequency=Config.train.min_eval_frequency,
        train_monitors=[
            train_input_hook,
            hook.print_variables(
                variables=['train/enc_0', 'train/dec_0', 'train/pred_0'],
                vocab=vocab,
                every_n_iter=Config.train.check_hook_n_iter)
        ],
        eval_hooks=[test_input_hook])
    return experiment
Example #6
0
def test(dialog, batch_size=100):
    print("\n=== 예측 테스트 ===")

    model = Seq2Seq(dialog.voc_size)

    with tf.Session() as sess:
        # 모델을 읽어온다.
        ckpt = tf.train.get_checkpoint_state('./model')
        print("다음 파일에서 모델을 읽는 중 입니다..", ckpt.model_checkpoint_path)
        model.saver.restore(sess, ckpt.model_checkpoint_path)

        enc_input, dec_input, targets = dialog.next_batch(batch_size)

        expect, outputs, accuracy = model.test(sess, enc_input, dec_input, targets)

        expect = dialog.decode(expect)
        outputs = dialog.decode(outputs)

        pick = random.randrange(0, len(expect) / 2)
        input = dialog.decode([dialog.seq_data[pick * 2]], True)
        expect = dialog.decode([dialog.seq_data[pick * 2 + 1]], True)
        outputs = dialog.cut_eos(outputs[pick])

        print("\n정확도:", accuracy)
        print("랜덤 결과\n")
        print("    입력값:", input)
        print("    실제값:", expect)
        print("    예측값:", ' '.join(outputs))
Example #7
0
def train_txt_generator():
    seq2seq = Seq2Seq(vocab_size, hidden_size, embedding_size)
    optimizer = opt.Adam(seq2seq.parameters(), lr=5e-4)

    num_epoch = 1

    for i in range(num_epoch):
        print('epoch {}/{}'.format(i + 1, num_epoch))
        shuffle_indices = np.random.permutation(np.arange(data_size))
        mr_ = mr[shuffle_indices]
        lengths_ = mr_lengths[shuffle_indices]
        ref_ = ref[shuffle_indices]

        for j in range(1):
            start = j * batch_size
            end = min(data_size, (j + 1) * batch_size)
            y = seq2seq.forward(torch.LongTensor(mr_[start:end]), torch.LongTensor(ref_[start:end]),
                                torch.LongTensor(lengths_[start:end]))
            ref_gt = np.array(ref[start:end], dtype=int)
            tgt = torch.tensor(np.eye(vocab_size)[ref_gt])
            loss = -torch.sum(torch.mul(torch.log(y)[:, :-1, :], tgt[:, 1:, :])) \
                   - torch.sum(torch.mul(torch.log(1-y)[:, :-1, :], 1-tgt[:, 1:, :]))
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            print(loss)

            if (j+1) % 200 == 0:
                torch.save(seq2seq.state_dict(), 'checkpoint/s2s-' + str(j + 1) + '-parameter.pkl')
Example #8
0
    def __init__(self,
                 tokenizer,
                 model_path,
                 config_file=None,
                 embed_size=256,
                 hidden_size=256,
                 n_layers=2,
                 device='cuda'):
        self.tokenizer = tokenizer
        self.device = device
        if config_file:
            with open(config_file, 'r') as f:
                config = json.load(f)
                self.embed_size = config['embed_size']
                self.hidden_size = config['hidden_size']
                self.n_layers = config['n_layers']
        else:
            self.embed_size = embed_size
            self.hidden_size = hidden_size
            self.n_layers = n_layers

        self.model = Seq2Seq(len(self.tokenizer),
                             hidden_size,
                             embed_size,
                             n_layers=n_layers,
                             device=device)
        self.model.load_state_dict(torch.load(model_path), strict=False)
        self.model.to(self.device)
        self.model.eval()
Example #9
0
def test(dialog):
    print("\n=== 예측 테스트 ===")

    model = Seq2Seq(dialog.vocab_size)

    with tf.Session() as sess:
        ckpt = tf.train.get_checkpoint_state(FLAGS.train_dir)
        print("다음 파일에서 모델을 읽는 중 입니다..", ckpt.model_checkpoint_path)
        model.saver.restore(sess, ckpt.model_checkpoint_path)

        enc_input, dec_input, targets = dialog.make_batch()

        pick = random.randrange(0, len(enc_input))

        expect, outputs, accuracy = model.test(sess, [enc_input[pick]],
                                               [dec_input[pick]],
                                               [targets[pick]])

        expect = dialog.decode(expect)
        outputs = dialog.decode(outputs)

        input = dialog.decode([dialog.examples[pick]], True)
        expect = dialog.decode([dialog.examples[pick]], True)
        #outputs = dialog.cut_eos(outputs[0])

        print("\n정확도:", accuracy)
        print("랜덤 결과\n")
        print("    입력값:", input)
        print("    실제값:", expect)
        print("    예측값:", outputs)
Example #10
0
def build_model(model_class, config, tokenizer):
    encoder = model_class(config=config)
    decoder_layer = nn.TransformerDecoderLayer(
        d_model=config.hidden_size, nhead=config.num_attention_heads)
    decoder = nn.TransformerDecoder(decoder_layer, num_layers=6)
    model = Seq2Seq(
        encoder=encoder,
        decoder=decoder,
        config=config,
        beam_size=args.beam_size,
        max_length=args.max_target_length,
        sos_id=tokenizer.cls_token_id,
        eos_id=tokenizer.sep_token_id,
    )

    assert os.path.exists("pytorch_model.bin"), "Weight is not downloaded."

    model.load_state_dict(
        torch.load(
            "pytorch_model.bin",
            map_location=torch.device("cpu"),
        ),
        strict=False,
    )
    return model
Example #11
0
def train(dialog, batch_size=100, epoch=100):
    model = Seq2Seq(dialog.vocab_size)

    with tf.Session() as sess:

        ckpt = tf.train.get_checkpoint_state(FLAGS.train_dir)
        if ckpt and tf.train.checkpoint_exists(ckpt.model_checkpoint_path):
            print("다음 파일에서 모델을 읽는 중 입니다..", ckpt.model_checkpoint_path)
            model.saver.restore(sess, ckpt.model_checkpoint_path)
        else:
            print("새로운 모델을 생성하는 중 입니다.")
            sess.run(tf.global_variables_initializer())

        writer = tf.summary.FileWriter(FLAGS.log_dir, sess.graph)
        total_batch = int(math.ceil(len(dialog.examples) / float(batch_size)))

        for step in range(total_batch * epoch):
            # option
            enc_input, dec_input, targets = dialog.next_batch(batch_size)

            # train
            _, loss = model.train(sess, enc_input, dec_input, targets)

        checkpoint_path = os.path.join(FLAGS.train_dir, FLAGS.ckpt_name)
        model.saver.save(sess, checkpoint_path, global_step=model.global_step)

    print('최적화 완료!')
Example #12
0
def test(dialog, batch_size=100):
    print "\n=== 예측 테스트 ==="

    model = Seq2Seq(dialog.vocab_size)

    with tf.Session() as sess:
        ckpt = tf.train.get_checkpoint_state(FLAGS.train_dir)
        print "다음 파일에서 모델을 읽는 중 입니다..", ckpt.model_checkpoint_path
        model.saver.restore(sess, ckpt.model_checkpoint_path)

        enc_input, dec_input, targets = dialog.next_batch(batch_size)

        expect, outputs, accuracy = model.test(sess, enc_input, dec_input, targets)

        expect = dialog.decode(expect)
        outputs = dialog.decode(outputs)

        pick = random.randrange(0, len(expect) / 2)
        input = dialog.decode([dialog.examples[pick * 2]], True)
        expect = dialog.decode([dialog.examples[pick * 2 + 1]], True)
        outputs = dialog.cut_eos(outputs[pick])

        print "\n정확도:", accuracy
        print "랜덤 결과\n",
        print "    입력값:", input
        print "    실제값:", expect
        print "    예측값:", ' '.join(outputs)
Example #13
0
def main(fpath):
    ENC_EMB_DIM = 256
    DEC_EMB_DIM = 256
    ENC_HID_DIM = 512
    DEC_HID_DIM = 512
    ENC_DROPOUT = 0.5
    DEC_DROPOUT = 0.5

    device = torch.device('cuda')
    dataset = Dataset()
    INPUT_DIM = len(dataset.SRC.vocab)
    OUTPUT_DIM = len(dataset.TRG.vocab)
    SRC_PAD_IDX = dataset.SRC.vocab.stoi[dataset.SRC.pad_token]

    encoder = Encoder(INPUT_DIM, ENC_EMB_DIM, ENC_HID_DIM, DEC_HID_DIM,
                      ENC_DROPOUT)
    attention = Attention(ENC_HID_DIM, DEC_HID_DIM)
    decoder = Decoder(DEC_EMB_DIM, ENC_HID_DIM, DEC_HID_DIM, OUTPUT_DIM,
                      DEC_DROPOUT, attention)
    model = Seq2Seq(encoder, decoder, SRC_PAD_IDX, device)
    model.load_state_dict(torch.load("best_model.pt"))
    model.to(device)
    with open(fpath, "r") as f:
        sentences = f.readlines()

    translate_sentence(model, sentences, dataset.SRC, dataset.TRG, device)
Example #14
0
def test(dialog, batch_size=100):
    print "\n=== 测试 ==="

    model = Seq2Seq(dialog.vocab_size)

    with tf.Session() as sess:
        ckpt = tf.train.get_checkpoint_state(FLAGS.train_dir)
        print "模型检查点位置.", ckpt.model_checkpoint_path
        model.saver.restore(sess, ckpt.model_checkpoint_path)

        enc_input, dec_input, targets = dialog.next_batch(batch_size)

        expect, outputs, accuracy = model.test(sess, enc_input, dec_input,
                                               targets)

        expect = dialog.decode(expect)
        outputs = dialog.decode(outputs)

        pick = random.randrange(0, len(expect) / 2)
        input = dialog.decode([dialog.examples[pick * 2]], True)
        expect = dialog.decode([dialog.examples[pick * 2 + 1]], True)
        outputs = dialog.cut_eos(outputs[pick])

        print "\n准确率:", accuracy
        print "数据展示\n",
        print "    输入数据:", input
        print "    答案:", expect
        print "    实际输出:", ' '.join(outputs)
Example #15
0
    def _load_model(self):
        print('Loading pretrained model')
        if self.config['model']['seq2seq'] == 'vanilla':
            print('Loading Seq2Seq Vanilla model')

            self.model = Seq2Seq(
                src_emb_dim=self.config['model']['dim_word_src'],
                trg_emb_dim=self.config['model']['dim_word_trg'],
                src_vocab_size=len(self.src_dict),
                trg_vocab_size=len(self.tgt_dict),
                src_hidden_dim=self.config['model']['dim_src'],
                trg_hidden_dim=self.config['model']['dim_trg'],
                pad_token_src=self.src_dict['<pad>'],
                pad_token_trg=self.tgt_dict['<pad>'],
                use_cuda=self.use_cuda,
                batch_size=self.config['data']['batch_size'],
                bidirectional=self.config['model']['bidirectional'],
                nlayers=self.config['model']['n_layers_src'],
                nlayers_trg=self.config['model']['n_layers_trg'],
                dropout=0.,
            )
            if self.use_cuda:
                self.model = self.model.cuda()

        self.model.load_state_dict(
            self.model_weights
        )
Example #16
0
def train(dialog, batch_size=100, epoch=100):
    model = Seq2Seq(dialog.vocab_size)

    with tf.Session() as sess:
        # TODO: 세션을 로드하고 로그를 위한 summary 저장등의 로직을 Seq2Seq 모델로 넣을 필요가 있음
        ckpt = tf.train.get_checkpoint_state(FLAGS.train_dir)
        if ckpt and tf.train.checkpoint_exists(ckpt.model_checkpoint_path):
            print("다음 파일에서 모델을 읽는 중 입니다..", ckpt.model_checkpoint_path)
            model.saver.restore(sess, ckpt.model_checkpoint_path)
        else:
            print("새로운 모델을 생성하는 중 입니다.")
            sess.run(tf.global_variables_initializer())

        writer = tf.summary.FileWriter(FLAGS.log_dir, sess.graph)

        total_batch = int(math.ceil(len(dialog.examples)/float(batch_size)))

        
        for step in range(total_batch*epoch):
            enc_input, dec_input, targets = dialog.next_batch(batch_size)

            _, loss = model.train(sess, enc_input, dec_input, targets)

            if (step + 1) % 100 == 0:
                model.write_logs(sess, writer, enc_input, dec_input, targets)

                print('Step:', '%06d' % model.global_step.eval(),
                      'cost =', '{:.6f}'.format(loss))

        checkpoint_path = os.path.join(FLAGS.train_dir, FLAGS.ckpt_name)
        model.saver.save(sess, checkpoint_path, global_step=model.global_step)

    print('최적화 완료!')
Example #17
0
def train(dialog, batch_size=100, epoch=100):
    model = Seq2Seq(dialog.vocab_size)

    with tf.Session() as sess:
        # TODO: 加载一个会话  可以利用summary 恢复模型数据
        ckpt = tf.train.get_checkpoint_state(FLAGS.train_dir)
        if ckpt and tf.train.checkpoint_exists(ckpt.model_checkpoint_path):
            print "模型检查点位置..", ckpt.model_checkpoint_path
            model.saver.restore(sess, ckpt.model_checkpoint_path)
        else:
            print "初始化会话"
            sess.run(tf.global_variables_initializer())

        writer = tf.summary.FileWriter(FLAGS.log_dir, sess.graph)

        total_batch = int(math.ceil(len(dialog.examples) / float(batch_size)))

        for step in range(total_batch * epoch):
            enc_input, dec_input, targets = dialog.next_batch(batch_size)

            _, loss = model.train(sess, enc_input, dec_input, targets)

            if (step + 1) % 100 == 0:
                model.write_logs(sess, writer, enc_input, dec_input, targets)

                print 'Step:', '%06d' % model.global_step.eval(),\
                      'cost =', '{:.6f}'.format(loss)
        # saver 用于保存和加载数据
        checkpoint_path = os.path.join(FLAGS.train_dir, FLAGS.ckpt_name)
        model.saver.save(sess, checkpoint_path, global_step=model.global_step)

    print '训练完成!'
Example #18
0
def train(dialog, batch_size=100, epoch=100):
    model = Seq2Seq(dialog.voc_size)

    with tf.Session() as sess:

        # 모델을 읽어온다. 없으면 새로 만든다.
        ckpt = tf.train.get_checkpoint_state('./model')
        if ckpt and tf.train.checkpoint_exists(ckpt.model_checkpoint_path):
            print("다음 파일에서 모델을 읽는 중 입니다..", ckpt.model_checkpoint_path)
            model.saver.restore(sess, ckpt.model_checkpoint_path)
        else:
            print("새로운 모델을 생성하는 중 입니다.")
            sess.run(tf.global_variables_initializer())

        # 학습시작.
        total_batch = int(math.ceil(len(dialog.seq_data)/float(batch_size)))
        for step in range(total_batch * epoch):
            enc_input, dec_input, targets = dialog.next_batch(batch_size)
            _, loss = model.train(sess, enc_input, dec_input, targets)
            if step % 100 == 0:
                print('cost = ', loss)

        # 학습된 모델을 저장한다.
        checkpoint_path = os.path.join('./model', 'conversation.ckpt')
        model.saver.save(sess, checkpoint_path, global_step=model.global_step)

        print('최적화 완료!')
Example #19
0
def test(dialog, batch_size=100):
    print("predition test")

    model = Seq2Seq(dialog.vocab_size)

    with tf.Session() as sess:
        ckpt = tf.train.get_checkpoint_state(FLAGS.train_dir)
        print("reading trained model..", ckpt.model_checkpoint_path)
        model.saver.restore(sess, ckpt.model_checkpoint_path)

        enc_input, dec_input, targets = dialog.next_batch(batch_size)

        expect, output, accuracy = model.test(sess, enc_input, dec_input,
                                              targets)

        expect = dialog.decode(expect)
        output = dialog.decode(output)

        pick = random.randrange(0, len(expect) / 2)
        input = dialog.decode([dialog.examples[pick * 2]], True)
        expect = dialog.decode([dialog.examples[pick * 2 + 1]], True)
        output = dialog.cut_eos(output[pick])

        print("\naccuracy:", accuracy)
        print("result")
        print("     input:", input)
        print("     expect:", expect)
        print("     predict:", ' '.join(output))
Example #20
0
def train(dialog, batch_size=100, epoch=100):
    model = Seq2Seq(dialog.vocab_size)

    with tf.Session() as sess:
        ckpt = tf.train.get_checkpoint_state(FLAGS.train_dir)
        if ckpt and tf.train.checkpoint_exists(ckpt.model_checkpoint_path):
            print("read model from existed one")
            model.saver.restore(sess, ckpt.model_checkpoint_path)
        else:
            print("creating new model")
            sess.run(tf.global_variables_initializer())

        # writer = tf.summary.FileWriter(FLAGS.log_dir, sess.graph)

        total_batch = int(math.ceil(len(dialog.examples) / float(batch_size)))

        for step in range(total_batch * epoch):
            enc_input, dec_input, targets = dialog.next_batch(batch_size)

            _, loss = model.train(sess, enc_input, dec_input, targets)

            if (step + 1) % 100 == 0:
                print('Step:', '%06d' % model.global_step.eval(), 'Cost =',
                      '{:.6f}'.format(loss))

        checkpoint_path = os.path.join(FLAGS.train_dir, FLAGS.ckpt_name)
        model.saver.save(sess, checkpoint_path, global_step=model.global_step)

    print("training complete.")
Example #21
0
    def __init__(self,
                 tokenizer,
                 embed_size=256,
                 hidden_size=256,
                 n_layers=1,
                 lr=2e-5,
                 dropout=0.5,
                 tf_board_dir='./tfboard_log'):
        # tokenizer
        self.tokenizer = tokenizer

        # model
        self.model = Seq2Seq(len(self.tokenizer),
                             hidden_size,
                             embed_size,
                             n_layers=n_layers,
                             dropout=dropout).to(DEVICE)
        self.model.apply(self.init_weights)
        self.model = nn.DataParallel(self.model)
        # tfboard & log
        self.writer = SummaryWriter(tf_board_dir)
        self.log = logging.getLogger('Trainer')
        # self.log.setLevel(logging.INFO)
        self.log.warning(f'CUDA count: {torch.cuda.device_count()}')

        # parameters
        self.hidden_size = hidden_size
        self.embed_size = embed_size

        # optimizer & criterion
        parameters_num = sum(p.numel() for p in self.model.parameters()
                             if p.requires_grad)
        self.criterion = nn.CrossEntropyLoss(ignore_index=0)
        self.optimizer = torch.optim.Adam(self.model.parameters(), lr=lr)
        self.log.warning(f'trainable parameters: {parameters_num}')
Example #22
0
def test():
    du = DataLoader(**data_config)
    params['src_vcb_size'] = du.vocab_size
    params['tgt_vcb_size'] = du.vocab_size
    params['batch_size'] = 1
    tf.reset_default_graph()
    config = tf.ConfigProto(allow_soft_placement=True,
                            log_device_placement=False,
                            gpu_options=tf.GPUOptions(allow_growth=True))

    with tf.Session(config=config) as sess:
        model = Seq2Seq(params, mode='decode')
        sess.run(tf.global_variables_initializer())
        model.load(sess, tf.train.latest_checkpoint('./logs/model/'))
        # model.load(sess, './logs/model/model_16.ckpt')

        # sent = input('you: ')
        # while (sent):
        #     result = model.get_response(sess, sent, du)
        #     print('bot: ', result)
        #
        #     sent = input('you: ')

        sents = [('天王盖地虎', '宝塔镇妖河')]
        for sent in sents:
            result = model.get_response(sess, sent[0], du)

            print('source : ', sent[0])
            print('target : ', sent[1])
            print('predict: ', result)
            print('')
Example #23
0
def build_model(config, input_vocab_size, target_vocab_size):

    embed_size = config['embed_size']
    hidden_size = config['hidden_size']
    proj_size = config['proj_size']
    enc_num_layers = config['enc_num_layers']
    dec_num_layers = config['dec_num_layers']
    dropout = config['dropout']
    attn_type = config['attn_type']
    self_attn = config['self_attn']
    intra_temp_attn = config['intra_temp_attn']
    dec_attn = config['dec_attn']
    if self_attn or intra_temp_attn:
        dec_attn = True

    encoder = Encoder(input_vocab_size, embed_size, hidden_size,
                      enc_num_layers, dropout)
    decoder = Decoder(target_vocab_size,
                      embed_size,
                      hidden_size,
                      dec_num_layers,
                      proj_size,
                      dropout,
                      attn_type=attn_type,
                      self_attn=self_attn,
                      dec_attn=dec_attn,
                      intra_temp_attn=intra_temp_attn)
    model = Seq2Seq(encoder, decoder).to(device)

    return model
Example #24
0
def train_batch(batch: Batch,
                model: Seq2Seq,
                criterion,
                optimizer,
                *,
                pack_seq=True,
                forcing_ratio=0.5,
                partial_forcing=True,
                rl_ratio: float = 0,
                vocab=None,
                grad_norm: float = 0):
    if not pack_seq:
        input_lengths = None
    else:
        input_lengths = batch.input_lengths

    optimizer.zero_grad()
    input_tensor = batch.input_tensor.to(DEVICE)
    target_tensor = batch.target_tensor.to(DEVICE)
    ext_vocab_size = batch.ext_vocab_size

    out = model(input_tensor,
                target_tensor,
                input_lengths,
                criterion,
                forcing_ratio=forcing_ratio,
                partial_forcing=partial_forcing,
                ext_vocab_size=ext_vocab_size)

    if rl_ratio > 0:
        assert vocab is not None
        sample_out = model(input_tensor,
                           saved_out=out,
                           criterion=criterion,
                           sample=True,
                           ext_vocab_size=ext_vocab_size)
        baseline_out = model(input_tensor,
                             saved_out=out,
                             visualize=False,
                             ext_vocab_size=ext_vocab_size)
        scores = eval_batch_output([ex.tgt for ex in batch.examples], vocab,
                                   batch.oov_dict, sample_out.decoded_tokens,
                                   baseline_out.decoded_tokens)
        greedy_rouge = scores[1]['l_f']
        neg_reward = greedy_rouge - scores[0]['l_f']
        # if sample > baseline, the reward is positive (i.e. good exploration), rl_loss is negative
        rl_loss = neg_reward * sample_out.loss
        loss = (1 - rl_ratio) * out.loss + rl_ratio * rl_loss
    else:
        loss = out.loss
        greedy_rouge = None

    loss.backward()
    if grad_norm > 0:
        clip_grad_norm_(model.parameters(), grad_norm)
    optimizer.step()

    target_length = target_tensor.size(0)
    return loss.item() / target_length, greedy_rouge
Example #25
0
def test_by_human():
    # test the result from .de to .en
    args = parse_arguments()
    hidden_size = 512
    embed_size = 256
    assert torch.cuda.is_available()

    print("[!] preparing dataset for test ...")
    train_iter, val_iter, test_iter, DE, EN = load_dataset(args.batch_size)
    de_size, en_size = len(DE.vocab), len(EN.vocab)

    # load the model
    encoder = Encoder(de_size,
                      embed_size,
                      hidden_size,
                      n_layers=2,
                      dropout=0.5)
    decoder = Decoder(embed_size,
                      hidden_size,
                      en_size,
                      n_layers=1,
                      dropout=0.5)
    seq2seq = Seq2Seq(encoder, decoder).cuda()
    seq2seq.load_state_dict(torch.load('.save/seq2seq_21.pt'))

    # only decoder 1 batch sents
    for b, batch in enumerate(train_iter):
        src, len_src = batch.src
        trg, len_trg = batch.trg
        src, trg = src.cuda(), trg.cuda()
        # do not use force teaching, just use the maximum possibility
        output = seq2seq(src, trg, 0)
        output = output.transpose(0, 1)  # (B*T*N)
        src = src.transpose(0, 1)  # (B*T)

        # src
        for source, result in zip(src, output):
            print('German: : ')
            print('    ', end=' ')
            for word in source:
                if DE.vocab.itos[word] in ["<pad>", "<sos>", "<unk>", "<eos>"]:
                    continue
                print(DE.vocab.itos[word], end=' ')
                # print(word, end=' ')
            print()
            print('English: ')
            print('    ', end=' ')
            for word in result:
                _, index = word.max(0)
                if EN.vocab.itos[index] in [
                        "<pad>", "<sos>", "<unk>", "<eos>"
                ]:
                    continue
                print(EN.vocab.itos[index], end=' ')
                # print('test ...', word)
            print()

        print("[!] End the testing ...")
        break
Example #26
0
 def initialize(self):
     sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
     self.tagger = MeCab.Tagger('-Owakati')
     self.id2word = json.load(open(self.dict_i2w, "r"))
     self.id2word = {int(key): value for key, value in self.id2word.items()}
     self.word2id = json.load(open(self.dict_w2i, "r"))
     self.model = Seq2Seq(input_words=len(self.word2id), train=False)
     chainer.serializers.load_npz(self.modelname, self.model)
def load_model(PATH: str, src_text, trg_text):
    attention = PATH[:PATH.index('E')]
    embedding_size = int(PATH[PATH.index('E') + 1:PATH.index('H')])
    hidden_size = int(PATH[PATH.index('H') + 1:PATH.index('.')])
    model = Seq2Seq(src_text, embedding_size, hidden_size, trg_text, attention)
    model.load_state_dict(torch.load(PATH))
    model.eval()
    return model
Example #28
0
 def __init__(self, opt):
     super(Seq2SeqInstructor, self).__init__(opt)
     self.seq2seq = Seq2Seq(opt.vocab_size, opt.embed_dim, opt.num_hiddens,
                            opt.num_layers, opt.drop_prob)
     if cfg.CUDA:
         self.seq2seq = self.seq2seq.cuda()
     self.optimizer = torch.optim.Adam(self.seq2seq.parameters(), lr=opt.lr)
     self.loss = nn.CrossEntropyLoss(reduction='none')
 def model_load(self):
     encoder = Encoder(**self.checkpoint['encoder_parameter'])
     decoder = AttentionDecoder(**self.checkpoint['decoder_parameter'])
     model = Seq2Seq(encoder, decoder, self.seq_len, self.get_attention)
     model.load_state_dict(self.checkpoint['model_state_dict'])
     model.to(device)
     model.eval()
     return model
Example #30
0
def train_module():
    #train_data_set = DataSet('train.txt')
    train_data_set = DataSet('smalltrain.txt')
    #dev_data_set = DataSet('val.txt')
    dev_data_set = DataSet('smallval.txt')
    model = Seq2Seq()
    model.to(cfg.device)
    trainIters(train_data_set, dev_data_set, model)