Ejemplo n.º 1
0
def train_model(args):
    trace('making vocaburaries ...')
    src_vocab = Vocabulary.new(gens.word_list(args.source), args.vocab)
    trg_vocab = Vocabulary.new(gens.word_list(args.target), args.vocab)

    trace('making model ...')
    model = AttentionalTranslationModel.new(src_vocab, trg_vocab, args.embed, args.hidden)

    for epoch in range(args.epoch):
        trace('epoch %d/%d: ' % (epoch + 1, args.epoch))
        trained = 0
        gen1 = gens.word_list(args.source)
        gen2 = gens.word_list(args.target)
        gen3 = gens.batch(gens.sorted_parallel(gen1, gen2, 100 * args.minibatch, order=0), args.minibatch)
        model.init_optimizer()

        for src_batch, trg_batch in gen3:
            src_batch = fill_batch2(src_batch)
            trg_batch = fill_batch2(trg_batch)
            K = len(src_batch)
            hyp_batch = model.train(src_batch, trg_batch)

            for k in range(K):
                trace('epoch %3d/%3d, sample %8d' % (epoch + 1, args.epoch, trained + k + 1))
                trace('  src = ' + ' '.join([x if x != '</s>' else '*' for x in src_batch[k]]))
                trace('  trg = ' + ' '.join([x if x != '</s>' else '*' for x in trg_batch[k]]))
                trace('  hyp = ' + ' '.join([x if x != '</s>' else '*' for x in hyp_batch[k]]))

            trained += K

        trace('saving model ...')
        model.save(args.model + '.%03d' % (epoch + 1))

    trace('finished.')
Ejemplo n.º 2
0
def train_model(args):
    trace('making vocabularies ...')
    src_vocab = Vocabulary.new(gens.word_list(args.source), args.vocab)
    trg_vocab = Vocabulary.new(gens.word_list(args.target), args.vocab)

    trace('making model ...')
    model = AttentionalTranslationModel.new(src_vocab, trg_vocab, args.embed,
                                            args.hidden)

    for epoch in range(args.epoch):
        trace('epoch %d/%d: ' % (epoch + 1, args.epoch))
        trained = 0
        gen1 = gens.word_list(args.source)
        gen2 = gens.word_list(args.target)
        gen3 = gens.batch(
            gens.sorted_parallel(gen1, gen2, 100 * args.minibatch, order=0),
            args.minibatch)
        model.init_optimizer()

        for src_batch, trg_batch in gen3:
            src_batch = fill_batch2(src_batch)
            trg_batch = fill_batch2(trg_batch)
            K = len(src_batch)
            hyp_batch = model.train(src_batch, trg_batch)

            for k in range(K):
                trace('epoch %3d/%3d, sample %8d' %
                      (epoch + 1, args.epoch, trained + k + 1))
                trace(
                    '  src = ' +
                    ' '.join([x if x != '</s>' else '*'
                              for x in src_batch[k]]))
                trace(
                    '  trg = ' +
                    ' '.join([x if x != '</s>' else '*'
                              for x in trg_batch[k]]))
                trace(
                    '  hyp = ' +
                    ' '.join([x if x != '</s>' else '*'
                              for x in hyp_batch[k]]))

            trained += K

        trace('saving model ...')
        model.save(args.model + '.%03d' % (epoch + 1))

    trace('finished.')
Ejemplo n.º 3
0
def test_model(args):
    trace('loading model ...')
    model = AttentionalTranslationModel.load(args.model)
    
    trace('generating translation ...')
    generated = 0

    with open(args.target, 'w') as fp:
        for src_batch in gens.batch(gens.word_list(args.source), args.minibatch):
            src_batch = fill_batch2(src_batch)
            K = len(src_batch)

            trace('sample %8d - %8d ...' % (generated + 1, generated + K))
            hyp_batch = model.predict(src_batch, args.generation_limit)

            for hyp in hyp_batch:
                hyp.append('</s>')
                hyp = hyp[:hyp.index('</s>')]
                six.print_(' '.join(hyp), file=fp)

            generated += K

    trace('finished.')
Ejemplo n.º 4
0
def test_model(args):
    trace('loading model ...')
    model = AttentionalTranslationModel.load(args.model)

    trace('generating translation ...')
    generated = 0

    with open(args.target, 'w') as fp:
        for src_batch in gens.batch(gens.word_list(args.source),
                                    args.minibatch):
            src_batch = fill_batch2(src_batch)
            K = len(src_batch)

            trace('sample %8d - %8d ...' % (generated + 1, generated + K))
            hyp_batch = model.predict(src_batch, args.generation_limit)

            for hyp in hyp_batch:
                hyp.append('</s>')
                hyp = hyp[:hyp.index('</s>')]
                print(' '.join(hyp), file=fp)

            generated += K

    trace('finished.')