Esempio n. 1
0
    def hook(trainer, epoch, batch, checkpoints):
        # grab random batch from valid
        src, _ = valid[random.randint(0, len(valid)-1)]
        if trainer.model.encoder.conditional:
            (src, *_) = src
        (src, lengths) = src
        # grab random examples from batch
        idxs = torch.randperm(n)
        src = src[:, idxs.cuda() if src.data.is_cuda else idxs]
        # dict
        d = trainer.model.decoder.embeddings.d
        sep = ' ' if level == 'word' or level == 'token' else ''

        for idx, src in enumerate(src.chunk(src.size(1), 1)):
            report = '{}\nSource: '.format(idx + 1)
            report += sep.join(d.vocab[char.data[0]] for char in src)
            report += '\n'
            
            for sample in range(samples):
                scores, hyps, _ = trainer.model.translate_beam(
                    src, lengths=lengths, beam_width=beam_width)
                # select only best
                scores, hyps = [scores[0]], [hyps[0]]

                # report
                report += 'Sample {}:'.format(sample + 1)
                # report best n hypotheses
                report += "".join(
                    u.format_hyp(scores[i], hyps[i], i, d, level)
                    for i in range(len(hyps))
                ) + '\n'

            trainer.log("info", report)
Esempio n. 2
0
    def hook(trainer, epoch, batch, checkpoints):
        # prepare
        d = trainer.model.decoder.embeddings.d
        items = min(n, dataset.batch_size)
        # sample random batch
        batch = dataset[random.randint(0, len(dataset) - 1)]
        (inp, inp_lengths), (trg, _) = batch
        inp, inp_lengths, source = inp[:, :
                                       items], inp_lengths[:items], trg[:, :
                                                                        items]
        # translate
        source, report = source.transpose(0, 1).tolist(), ''
        for sample in range(samples):
            scores, hyps, _ = trainer.model.translate_beam(
                inp, inp_lengths, beam_width=beam_width)

            for num, (score, hyp, trg) in enumerate(zip(scores, hyps, source)):
                report += u.format_hyp(score,
                                       hyp,
                                       num + 1,
                                       d,
                                       level=level,
                                       trg=trg)

        trainer.log("info", '\n***' + report + '\n***')
Esempio n. 3
0
 def hook(trainer, epoch, batch, checkpoints):
     d = trainer.datasets['train'].d['src']
     inp = torch.LongTensor([d.index(i) for i in target.split()])
     inp = Variable(inp, volatile=True).unsqueeze(1)
     z_params = trainer.model.encode(inp)
     for hyp_num in range(1, n + 1):
         score, hyp = trainer.model.generate(z_params=z_params)
         trainer.log("info", u.format_hyp(score[0], hyp[0], hyp_num, d))
Esempio n. 4
0
 def hook(trainer, epoch, batch, num_checkpoints):
     d = trainer.datasets['train'].d['src']
     inp = torch.LongTensor([d.index(i) for i in target.split()])
     inp = Variable(inp, volatile=True).unsqueeze(1)
     z_params = trainer.model.encode(inp)
     for hyp_num in range(1, n + 1):
         score, hyp = trainer.model.generate(z_params=z_params)
         trainer.log("info", u.format_hyp(score[0], hyp[0], hyp_num, d))
Esempio n. 5
0
 def hook(trainer, epoch, batch_num, checkpoint):
     trainer.log("info", "Translating %s" % target)
     scores, hyps, atts = translate(trainer.model, target, gpu, beam=beam)
     hyps = [
         u.format_hyp(score, hyp, num + 1, trainer.model.trg_dict)
         for num, (score, hyp) in enumerate(zip(scores, hyps))
     ]
     trainer.log("info", '\n***' + ''.join(hyps) + '\n***')
Esempio n. 6
0
 def hook(trainer, epoch, batch_num, checkpoint):
     trainer.log("info", "Translating {}".format(target))
     trg_dict = trainer.model.decoder.embeddings.d
     scores, hyps = translate(trainer.model, target, gpu, beam=beam)
     hyps = [
         u.format_hyp(score, hyp, num + 1, trg_dict)
         for num, (score, hyp) in enumerate(zip(scores, hyps))
     ]
     trainer.log("info", '\n***' + ''.join(hyps) + '\n***')
Esempio n. 7
0
 def hook(trainer, epoch, batch_num, checkpoint):
     trainer.log("info", "Checking training...")
     if validate:
         loss = trainer.validate_model()
         trainer.log("info", "Valid loss: %g" % loss)
         trainer.log("info", "Registering early stopping loss...")
         if early_stopping is not None:
             early_stopping.add_checkpoint(loss)
     trainer.log("info", "Generating text...")
     scores, hyps = trainer.model.generate(
         d, seed_text=seed_text, max_seq_len=max_seq_len, gpu=gpu,
         method=method, temperature=temperature, width=width)
     hyps = [u.format_hyp(score, hyp, hyp_num + 1, d)
             for hyp_num, (score, hyp) in enumerate(zip(scores, hyps))]
     trainer.log("info", '\n***' + ''.join(hyps) + "\n***")
Esempio n. 8
0
 def hook(trainer, epoch, batch_num, checkpoint):
     trainer.log("info", "Checking training...")
     if validate:
         loss = trainer.validate_model()
         trainer.log("info", "Valid loss: %g" % loss)
         trainer.log("info", "Registering early stopping loss...")
         if early_stopping is not None:
             early_stopping.add_checkpoint(loss)
     trainer.log("info", "Generating text...")
     scores, hyps = trainer.model.generate(
         d, seed_text=seed_text, max_seq_len=max_seq_len, gpu=gpu,
         method=method, temperature=temperature, width=width)
     hyps = [u.format_hyp(score, hyp, hyp_num + 1, d)
             for hyp_num, (score, hyp) in enumerate(zip(scores, hyps))]
     trainer.log("info", '\n***' + ''.join(hyps) + "\n***")
Esempio n. 9
0
def main():
    # parse params:
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '--model_dir',
        default='EN_10MSENTS/Skipthoughts-2018_01_23-05_25_13-80.372-final',
        type=str)
    #parser.add_argument('--file_path', default='big.txt', type=str)
    parser.add_argument('--beam', action='store_true')
    parser.add_argument('--gpu', action='store_true')
    parser.add_argument('--max_len', default=4, type=int)
    parser.add_argument('--target', default=None, type=str)
    args = parser.parse_args()

    # load model and dict
    model = u.load_model(args.model_dir + '/model.pt')
    vocab_dict = u.load_model(args.model_dir + '/model.dict.pt')

    sents = ['It was a warm day.', 'It was a cold day.', 'His name was Mike.']

    for a, b in product(sents, repeat=2):
        x = zorro.utils.embed_single(model,
                                     [t.lower() for t in word_tokenize(a)])
        y = zorro.utils.embed_single(model,
                                     [t.lower() for t in word_tokenize(b)])

        print(a, b, cosine(x, y))

    # translate the target:
    if args.target:
        tokens = [t.lower() for t in word_tokenize(args.target)]
        x = zorro.utils.embed_single(model, tokens)

        scores, hyps = zorro.utils.translate(model,
                                             tokens,
                                             beam=args.beam,
                                             max_len=args.max_len,
                                             gpu=args.gpu)
        hyps = [
            u.format_hyp(score, hyp, num + 1, vocab_dict)
            for num, (score, hyp) in enumerate(zip(scores, hyps))
        ]
        print(f'Translation for "{args.target}":\n',
              '\n***' + ''.join(hyps) + '\n***')
    """
Esempio n. 10
0
def report(trainer, items):
    dataset, d = trainer.datasets['valid'], trainer.model.encoder.embeddings.d
    # sample batch
    batch = dataset[random.randint(0, len(dataset) - 1)]

    src, (trg, _) = batch
    if trainer.model.encoder.conditional:
        src, _ = src
    src, src_lengths = src

    # only take so many inputs
    src, src_lengths, trg = src[:, :items], src_lengths[:items], trg[:, :items]
    scores, hyps, _ = trainer.model.translate_beam(src, src_lengths)

    trg, src = trg.transpose(0, 1).tolist(), src.transpose(0, 1).tolist()

    report = ''
    for num, (score, hyp, trg) in enumerate(zip(scores, hyps, trg)):
        report += u.format_hyp(score, hyp, num + 1, d, trg=trg)

    return report
Esempio n. 11
0
def conditional_report(trainer, items):
    dataset, d = trainer.datasets['valid'], trainer.model.encoder.embeddings.d
    _, conds_d = trainer.datasets['train'].d['trg']
    # sample batch
    inp, (_, *conds) = dataset[random.randint(0, len(dataset) - 1)]

    # drop off condition from encoder
    if trainer.model.encoder.conditional:
        inp, _ = inp
    inp, lengths = inp

    # only take so many inputs
    inp, lengths, conds = inp[:, :items], lengths[:items], [
        c[:items] for c in conds
    ]

    # resample conds
    tconds = [torch.zeros_like(c).random_(len(conds_d)) for c in conds]

    # run
    scores, hyps, _ = trainer.model.translate_beam(inp, lengths, conds=tconds)

    # stringify output
    trg = inp.transpose(0, 1).tolist()
    conds = [
        '+'.join([conds_d.vocab[c[b]] for c in conds]) for b in range(len(trg))
    ]
    tconds = [
        '+'.join([conds_d.vocab[c[b]] for c in tconds])
        for b in range(len(trg))
    ]
    conds = ['<{}>=>{}>'.format(c, tc) for c, tc in zip(conds, tconds)]

    report = ''
    for score, hyp, trg, cond in zip(scores, hyps, trg, conds):
        report += u.format_hyp(score, hyp, cond, d, trg=trg)

    return report
Esempio n. 12
0
    def hook(trainer, epoch, batch_num, checkpoint):
        trainer.log("info", "Translating {}".format(target))
        # prepare
        dataset, d = trainer.datasets[
            'valid'], trainer.model.decoder.embeddings.d
        items = min(max_items, dataset.batch_size)
        # sample random batch
        batch = dataset[random.randint(0, len(dataset) - 1)]
        (src, src_lengths), (trg, _) = batch
        src, src_lengths, trg = src[:, :items], src_lengths[:items], trg[:, :
                                                                         items]
        # translate
        scores, hyps = translate(trainer.model, src, src_lengths, beam=beam)
        # report
        trues, report = trg.transpose(0, 1).tolist(), ''
        for num, (score, hyp, trg) in enumerate(zip(scores, hyps, trues)):
            report += u.format_hyp(score,
                                   hyp,
                                   num + 1,
                                   d,
                                   level='char',
                                   trg=trg)

        trainer.log("info", '\n***' + report + '\n***')
Esempio n. 13
0
def report(trainer, items):
    dataset, d = trainer.datasets['valid'], trainer.model.encoder.embeddings.d
    # sample batch
    inp, _ = dataset[random.randint(0, len(dataset) - 1)]

    # drop off condition
    if trainer.model.encoder.conditional:
        inp, _ = inp
    inp, lengths = inp

    # only take so many inputs
    inp, lengths = inp[:, :items], lengths[:items]

    # run
    scores, hyps, _ = trainer.model.translate_beam(inp, lengths)

    # stringify output
    trg = inp.transpose(0, 1).tolist()

    report = ''
    for num, (score, hyp, trg) in enumerate(zip(scores, hyps, trg)):
        report += u.format_hyp(score, hyp, num + 1, d, trg=trg)

    return report
 def hook(trainer, epoch, batch_num, checkpoint):
     trainer.log("info", "Translating %s" % target)
     scores, hyps, atts = translate(trainer.model, target, gpu, beam=beam)
     hyps = [u.format_hyp(score, hyp, num + 1, trainer.model.trg_dict)
             for num, (score, hyp) in enumerate(zip(scores, hyps))]
     trainer.log("info", '\n***' + ''.join(hyps) + '\n***')