def main(args): # create model destination if not os.path.exists(args.model): os.makedirs(args.model) vocab_dest = os.path.join(args.model, 'vocab') print >> sys.stderr, 'Copying vocabulary to {}'.format(vocab_dest) shutil.copy(args.vocab, vocab_dest) # determine vocabulary size print >> sys.stderr, 'Loading vocabulary from {}'.format(args.vocab) voc = vocab.Vocab.load(args.vocab) vocab_size = voc.size() if args.vocab_size is not None: vocab_size = min(vocab_size, args.vocab_size) print >> sys.stderr, 'Vocabulary size: {}'.format(vocab_size) # create sequence-to-sequence model encoder = rnn.Rnn(emb_dim=args.emb, vocab_size=vocab_size, layers=args.hidden, suppress_output=True, lstm=args.lstm) decoder = rnn.Rnn(emb_dim=args.emb, vocab_size=vocab_size, layers=args.hidden, suppress_output=False, lstm=args.lstm) s2s = seq2seq.Seq2Seq(encoder, decoder) # load corpus print >> sys.stderr, 'Loading training data from {}'.format(args.data) c = corpus.load_corpus(args.data, max_len=args.max_len) # create batches print >> sys.stderr, 'Creating batches...' batches = corpus.create_batches(c, batch_size=args.batch, shuffle=not args.no_shuffle, max_vocab_size=vocab_size) # train print >> sys.stderr, 'Training started.' optimizer = util.list2optimizer(args.optim) util.train(s2s, batches, optimizer, args.model, max_epoch=None, gpu=args.gpu, save_every=args.save_every, get_status=_get_status)
def test_train(tmpdir): """Test seq2seq can learn to reverse input""" encoder = rnn.Rnn(emb_dim=100, vocab_size=5, layers=[100], suppress_output=True, lstm=True) decoder = rnn.Rnn(emb_dim=100, vocab_size=5, layers=[100], suppress_output=False, lstm=True) s2s = seq2seq.Seq2Seq(encoder, decoder) def _create_batch(batch_size=20, lo=0, hi=3, length=10): xs_data = np.random.random_integers(lo, hi, (length, batch_size)).astype(np.int32) + 1 ts_data = xs_data[::-1].copy() # reversed xs_data return xs_data, ts_data train_batches = map(lambda _: _create_batch(), range(20)) test_xs_data, test_ts_data = _create_batch() def _get_status(batch, loss, acc): status = [] x_data, t_data = batch src_length, batch_size = x_data.shape trg_length = t_data.shape[0] status.append(('src_len', src_length)) status.append(('trg_len', trg_length)) status.append(('batch', batch_size)) return status dest_dir = str(tmpdir.mkdir('model')) util.train(s2s, train_batches, O.Adam(), dest_dir, max_epoch=6, log=True, save_every=2, get_status=_get_status) # check output model assert os.path.exists(os.path.join(dest_dir, 'log')) assert os.path.exists(os.path.join(dest_dir, 'epoch1')) assert not os.path.exists(os.path.join(dest_dir, 'epoch2')) assert os.path.exists(os.path.join(dest_dir, 'epoch3')) assert not os.path.exists(os.path.join(dest_dir, 'epoch4')) assert os.path.exists(os.path.join(dest_dir, 'epoch5')) assert os.path.exists(os.path.join(dest_dir, 'epoch6')) assert not os.path.exists(os.path.join(dest_dir, 'epoch7')) # check log log_line = None with open(os.path.join(dest_dir, 'log')) as f: log_line = f.readline() assert 'src_len' in log_line assert 'trg_len' in log_line assert 'batch' in log_line # measure test accuracy test_loss, test_avg = s2s.forward_batch(test_xs_data, test_ts_data, train=False) assert test_avg > 0.6
def main(args): # create model destination if not os.path.exists(args.model): os.makedirs(args.model) vocab_dest = os.path.join(args.model, "vocab") print >>sys.stderr, "Copying vocabulary to {}".format(vocab_dest) shutil.copy(args.vocab, vocab_dest) # determine vocabulary size print >>sys.stderr, "Loading vocabulary from {}".format(args.vocab) voc = vocab.Vocab.load(args.vocab) vocab_size = voc.size() if args.vocab_size is not None: vocab_size = min(vocab_size, args.vocab_size) print >>sys.stderr, "Vocabulary size: {}".format(vocab_size) # create sequence-to-sequence model encoder = rnn.Rnn(emb_dim=args.emb, vocab_size=vocab_size, layers=args.hidden, suppress_output=True, lstm=args.lstm) decoder = rnn.Rnn( emb_dim=args.emb, vocab_size=vocab_size, layers=args.hidden, suppress_output=False, lstm=args.lstm ) s2s = seq2seq.Seq2Seq(encoder, decoder) # load corpus print >>sys.stderr, "Loading training data from {}".format(args.data) c = corpus.load_corpus(args.data, max_len=args.max_len) # create batches print >>sys.stderr, "Creating batches..." batches = corpus.create_batches(c, batch_size=args.batch, shuffle=not args.no_shuffle, max_vocab_size=vocab_size) # train print >>sys.stderr, "Training started." optimizer = util.list2optimizer(args.optim) util.train( s2s, batches, optimizer, args.model, max_epoch=None, gpu=args.gpu, save_every=args.save_every, get_status=_get_status, )
def test_train(tmpdir): """Test seq2seq can learn to reverse input""" encoder = rnn.Rnn(emb_dim=100, vocab_size=5, layers=[100], suppress_output=True, lstm=True) decoder = rnn.Rnn(emb_dim=100, vocab_size=5, layers=[100], suppress_output=False, lstm=True) s2s = seq2seq.Seq2Seq(encoder, decoder) def _create_batch(batch_size=20, lo=0, hi=3, length=10): xs_data = np.random.random_integers( lo, hi, (length, batch_size)).astype(np.int32) + 1 ts_data = xs_data[::-1].copy() # reversed xs_data return xs_data, ts_data train_batches = map(lambda _: _create_batch(), range(20)) test_xs_data, test_ts_data = _create_batch() def _get_status(batch, loss, acc): status = [] x_data, t_data = batch src_length, batch_size = x_data.shape trg_length = t_data.shape[0] status.append(('src_len', src_length)) status.append(('trg_len', trg_length)) status.append(('batch', batch_size)) return status dest_dir = str(tmpdir.mkdir('model')) util.train(s2s, train_batches, O.Adam(), dest_dir, max_epoch=6, log=True, save_every=2, get_status=_get_status) # check output model assert os.path.exists(os.path.join(dest_dir, 'log')) assert os.path.exists(os.path.join(dest_dir, 'epoch1')) assert not os.path.exists(os.path.join(dest_dir, 'epoch2')) assert os.path.exists(os.path.join(dest_dir, 'epoch3')) assert not os.path.exists(os.path.join(dest_dir, 'epoch4')) assert os.path.exists(os.path.join(dest_dir, 'epoch5')) assert os.path.exists(os.path.join(dest_dir, 'epoch6')) assert not os.path.exists(os.path.join(dest_dir, 'epoch7')) # check log log_line = None with open(os.path.join(dest_dir, 'log')) as f: log_line = f.readline() assert 'src_len' in log_line assert 'trg_len' in log_line assert 'batch' in log_line # measure test accuracy test_loss, test_avg = s2s.forward_batch(test_xs_data, test_ts_data, train=False) assert test_avg > 0.6