Ejemplo n.º 1
0
def main(args):
    # create model destination
    if not os.path.exists(args.model):
        os.makedirs(args.model)
    vocab_dest = os.path.join(args.model, 'vocab')
    print >> sys.stderr, 'Copying vocabulary to {}'.format(vocab_dest)
    shutil.copy(args.vocab, vocab_dest)

    # determine vocabulary size
    print >> sys.stderr, 'Loading vocabulary from {}'.format(args.vocab)
    voc = vocab.Vocab.load(args.vocab)
    vocab_size = voc.size()
    if args.vocab_size is not None:
        vocab_size = min(vocab_size, args.vocab_size)
    print >> sys.stderr, 'Vocabulary size: {}'.format(vocab_size)

    # create sequence-to-sequence model
    encoder = rnn.Rnn(emb_dim=args.emb,
                      vocab_size=vocab_size,
                      layers=args.hidden,
                      suppress_output=True,
                      lstm=args.lstm)
    decoder = rnn.Rnn(emb_dim=args.emb,
                      vocab_size=vocab_size,
                      layers=args.hidden,
                      suppress_output=False,
                      lstm=args.lstm)
    s2s = seq2seq.Seq2Seq(encoder, decoder)

    # load corpus
    print >> sys.stderr, 'Loading training data from {}'.format(args.data)
    c = corpus.load_corpus(args.data, max_len=args.max_len)

    # create batches
    print >> sys.stderr, 'Creating batches...'
    batches = corpus.create_batches(c,
                                    batch_size=args.batch,
                                    shuffle=not args.no_shuffle,
                                    max_vocab_size=vocab_size)

    # train
    print >> sys.stderr, 'Training started.'
    optimizer = util.list2optimizer(args.optim)
    util.train(s2s,
               batches,
               optimizer,
               args.model,
               max_epoch=None,
               gpu=args.gpu,
               save_every=args.save_every,
               get_status=_get_status)
Ejemplo n.º 2
0
def main(args):
    # create model destination
    if not os.path.exists(args.model):
        os.makedirs(args.model)
    vocab_dest = os.path.join(args.model, "vocab")
    print >>sys.stderr, "Copying vocabulary to {}".format(vocab_dest)
    shutil.copy(args.vocab, vocab_dest)

    # determine vocabulary size
    print >>sys.stderr, "Loading vocabulary from {}".format(args.vocab)
    voc = vocab.Vocab.load(args.vocab)
    vocab_size = voc.size()
    if args.vocab_size is not None:
        vocab_size = min(vocab_size, args.vocab_size)
    print >>sys.stderr, "Vocabulary size: {}".format(vocab_size)

    # create sequence-to-sequence model
    encoder = rnn.Rnn(emb_dim=args.emb, vocab_size=vocab_size, layers=args.hidden, suppress_output=True, lstm=args.lstm)
    decoder = rnn.Rnn(
        emb_dim=args.emb, vocab_size=vocab_size, layers=args.hidden, suppress_output=False, lstm=args.lstm
    )
    s2s = seq2seq.Seq2Seq(encoder, decoder)

    # load corpus
    print >>sys.stderr, "Loading training data from {}".format(args.data)
    c = corpus.load_corpus(args.data, max_len=args.max_len)

    # create batches
    print >>sys.stderr, "Creating batches..."
    batches = corpus.create_batches(c, batch_size=args.batch, shuffle=not args.no_shuffle, max_vocab_size=vocab_size)

    # train
    print >>sys.stderr, "Training started."
    optimizer = util.list2optimizer(args.optim)
    util.train(
        s2s,
        batches,
        optimizer,
        args.model,
        max_epoch=None,
        gpu=args.gpu,
        save_every=args.save_every,
        get_status=_get_status,
    )
Ejemplo n.º 3
0
def test_load_corpus(corpus_path):
    cps = corpus.load_corpus(corpus_path)
    batches = corpus.create_batches(cps, 3)
    assert len(batches) == 2
Ejemplo n.º 4
0
def test_load_corpus(corpus_path):
    cps = corpus.load_corpus(corpus_path)
    batches = corpus.create_batches(cps, 3)
    assert len(batches) == 2