Ejemplo n.º 1
0
def main(argv=None):
    args = commandLineParser.parse_args()
    if os.path.isdir('CMDs'):
        with open('CMDs/step_test_rnnlm.txt', 'a') as f:
            f.write(' '.join(sys.argv) + '\n')
    else:
        os.mkdir('CMDs')
        with open('CMDs/step_test_rnnlm.txt', 'a') as f:
            f.write(' '.join(sys.argv) + '\n')

    valid_data = process_data_lm("valid.dat",
                                 path="data",
                                 spId=False,
                                 input_index='input.wlist.index',
                                 output_index='input.wlist.index',
                                 bptt=None)

    network_architecture = parse_params('./config')

    rnnlm = RNNLM(network_architecture=network_architecture,
                  seed=args.seed,
                  name=args.name,
                  dir='./',
                  load_path=args.load_path,
                  debug_mode=args.debug)

    print 'Training Completed. Now predicting on validation data'
    rnnlm.predict(valid_data)
Ejemplo n.º 2
0
def rnnlm_generate_sequence():
    from rnnlm import RNNLM
    np.random.seed(10)
    L = np.random.randn(20, 10)
    model = RNNLM(L0=L)
    model.H = np.random.randn(20, 20)
    s, J = model.generate_sequence(0, 1, maxlen=15)
    print "dummy J: %g" % J
    print "dummy seq: len(s) = %d" % len(s)
    assert (len(s) <= 15 + 1)
    assert (s[0] == 0)
    assert (J > 0)
Ejemplo n.º 3
0
def rnnlm_generate_sequence():
    from rnnlm import RNNLM
    np.random.seed(10)
    L = np.random.randn(20,10)
    model = RNNLM(L0 = L)
    model.H = np.random.randn(20,20)
    s, J = model.generate_sequence(0,1, maxlen=15)
    print "dummy J: %g" % J
    print "dummy seq: len(s) = %d" % len(s)
    assert(len(s) <= 15+1)
    assert(s[0] == 0)
    assert(J > 0)
Ejemplo n.º 4
0
def train(lr):
    with open(vocab_freq_file,'r') as f:
        vocab_freq=pickle.load(f)
    vocab_p = Q_w(vocab_freq,alpha)
    J,q=alias_setup(vocab_p)

    # Load data
    print 'loading dataset...'
    train_data=TextIterator(train_datafile,n_batch=n_batch,maxlen=maxlen)
    valid_data = TextIterator(valid_datafile,n_batch=n_batch,maxlen=maxlen)
    test_data=TextIterator(test_datafile,n_batch=n_batch,maxlen=maxlen)

    print 'building model...'
    model=RNNLM(n_input,n_hidden,vocabulary_size, cell=rnn_cell,optimizer=optimizer,p=p,q_w=vocab_p,k=k)
    if os.path.isfile(model_dir):
        print 'loading checkpoint parameters....',model_dir
        model=load_model(model_dir,model)
    if goto_line>0:
        train_data.goto_line(goto_line)
        print 'goto line:',goto_line
    print 'training start...'
    start=time.time()

    idx = 0
    for epoch in xrange(NEPOCH):
        error = 0
        for x,x_mask,y,y_mask in train_data:
            idx+=1
            negy=negative_sample(y,y_mask,k,J,q)
            cost=model.train(x,x_mask, y, negy,y_mask,lr)
            #print cost
            error+=cost
            if np.isnan(cost) or np.isinf(cost):
                print 'NaN Or Inf detected!'
                return -1
            if idx % disp_freq==0:
                logger.info('epoch: %d idx: %d cost: %f ppl: %f' % (
                    epoch, idx, (error / disp_freq), np.exp(error / (1.0 * disp_freq))))
                error=0
            if idx%save_freq==0:
                logger.info( 'dumping...')
                save_model('./model/parameters_%.2f.pkl'%(time.time()-start),model)
            if idx % valid_freq==0 :
                logger.info('validing...')
                valid_cost,wer=evaluate(valid_data,model)
                logger.info('validation cost: %f perplexity: %f,word_error_rate:%f' % (valid_cost, np.exp(valid_cost), wer))
            if idx % test_freq==0 :
                logger.info('testing...')
                test_cost,wer=evaluate(test_data,model)
                logger.info('test cost: %f perplexity: %f,word_error_rate:%f' % (test_cost, np.exp(test_cost),wer))

    print "Finished. Time = "+str(time.time()-start)
Ejemplo n.º 5
0
def test():
    valid_data = TextIterator(valid_datafile,
                              filepath,
                              n_batch=n_batch,
                              brown_or_huffman=brown_or_huffman,
                              mode=matrix_or_vector,
                              word2idx_path=word2idx_path)
    test_data = TextIterator(test_datafile,
                             filepath,
                             n_batch=n_batch,
                             brown_or_huffman=brown_or_huffman,
                             mode=matrix_or_vector,
                             word2idx_path=word2idx_path)
    model = RNNLM(n_input,
                  n_hidden,
                  vocabulary_size,
                  cell,
                  optimizer,
                  p,
                  mode=matrix_or_vector)
    if os.path.isfile(args.model_dir):
        print 'loading pretrained model:', args.model_dir
        model = load_model(args.model_dir, model)
    else:
        print args.model_dir, 'not found'
    mean_cost = evaluate(valid_data, model)
    print 'valid cost:', mean_cost, 'perplexity:', np.exp(
        mean_cost)  #,"word_error_rate:",mean_wer
    mean_cost = evaluate(test_data, model)
    print 'test cost:', mean_cost, 'perplexity:', np.exp(mean_cost)
Ejemplo n.º 6
0
 def setUp(self):
     text = 'You said good-bye and I said hello.'
     cbm = CountBasedMethod()
     word_list = cbm.text_to_word_list(text)
     word_to_id, *_ = cbm.preprocess(word_list)
     vocab_size = len(word_to_id)
     wordvec_size = 100
     hidden_size  = 100
     self.rnnlm = RNNLM(vocab_size, wordvec_size, hidden_size)
     self.xs = np.array([
         [0, 4, 4, 1],
         [4, 0, 2, 1]
     ])
     self.ts = np.array([
         [0, 1, 0, 0],
         [0, 0, 0, 1]
     ])
    def __init__(self, scramble_name='noscramble', bptt=1):
        self.alpha = .1
        self.n_epochs = 100

        self.hdim = 10
        self.vocab = list(
            '0123456789+ =')  # list of all possible characters we might see
        self.vdim = len(self.vocab)
        self.vocabmap = {char: i
                         for i, char in enumerate(self.vocab)
                         }  # map char to idx number

        self.rnn = RNNLM(np.zeros((self.vdim, self.hdim)),
                         U0=np.zeros((2, self.hdim)),
                         bptt=bptt)

        self.scramble = getattr(self, scramble_name)
Ejemplo n.º 8
0
def rnnlm_load():
    from rnnlm import RNNLM
    L = np.load('rnnlm.L.npy')
    print "  loaded L: %s" % str(L.shape)
    H = np.load('rnnlm.H.npy')
    print "  loaded H: %s" % str(H.shape)
    U = np.load('rnnlm.U.npy')
    print "  loaded U: %s" % str(U.shape)
    assert (L.shape[0] == U.shape[0])
    assert (L.shape[1] == H.shape[1])
    assert (H.shape[0] == U.shape[1])
    model = RNNLM(L0=L, U0=U)
    model.params.H[:] = H
Ejemplo n.º 9
0
def create_model(sess, save_folder, FLAGS, embed_fn):
    # load vocab & embeddings
    with open(save_folder + "vocab.pkl", "rb") as handle:
        vocab = pickle.load(handle)
    with open(save_folder + "tsf_vocab_inv.pkl", "rb") as handle:
        tsf_vocab_inv = pickle.load(handle)
    with open(save_folder + "init_embed.pkl", "rb") as handle:
        init_embed = pickle.load(handle)
    with open(save_folder + "tsf_init_embed.pkl", "rb") as handle:
        tsf_init_embed = pickle.load(handle)
    vocab_size = len(vocab)
    tsf_vocab_size = len(tsf_vocab_inv)
    print("Vocab size: {}, transfer vocab size: {}".format(
        vocab_size, tsf_vocab_size))

    # generator
    config_list = [(k, FLAGS[k].value) for k in FLAGS]
    generator_config = OrderedDict(
        sorted(config_list) +
        [("encoder_vocab_size",
          vocab_size), ("decoder_vocab_size", tsf_vocab_size)])
    #print("Generator config: {}, cell_type: {}".format(generator_config, "gru"))
    generator = Generator(generator_config, init_embed, tsf_init_embed)

    # language model
    lm_config_list = [(k, FLAGS[k].value) for k in FLAGS if k.startswith("lm_")
                      ] + [("batch_size", FLAGS.batch_size)]
    lm_config = OrderedDict(
        sorted(lm_config_list) + [("lm_vocab_size", vocab_size)])
    rnnlm = RNNLM(lm_config, init_embed)

    # style discriminator
    style_discriminator = StyleDiscriminator(FLAGS.style_num_classes, FLAGS.embedding_dim, \
                                             init_embed, FLAGS.style_hidden_size, \
                                             FLAGS.style_attention_size, FLAGS.max_sent_len, \
                                             FLAGS.style_keep_prob)
    #embedding_size, init_embed, hidden_size, \
    #                 attention_size, max_sent_len, keep_prob):
    #siamese discriminator
    siamese_discrim = SiameseDiscriminator(FLAGS.embedding_dim, \
                                             init_embed, FLAGS.style_hidden_size, \
                                             FLAGS.style_attention_size, FLAGS.max_sent_len, \
                                             FLAGS.style_keep_prob)
    # semantic discriminator
    semantic_discriminator = SemanticDiscriminator(embed_fn)

    # rollout
    rollout = ROLLOUT(vocab, tsf_vocab_inv)

    return generator, rnnlm, style_discriminator, siamese_discrim, semantic_discriminator, rollout, vocab, tsf_vocab_inv
Ejemplo n.º 10
0
def train_rnnlm(train, vocab, hidden_size, epoch_num, batch_size):
    # モデル初期化
    train_data, vocab = load_data("./RNNLM_Chainer/ptb.test.txt")
    eos_id = vocab['<eos>']

    model = RNNLM(len(vocab), hidden_size)
    optimizer = optimizers.Adam()
    optimizer.setup(model)

    # TODO:minibatchでできるようにする
    
    # train_dataを文のリストに変換する
    sents = []
    sent = []
    for word_id in train_data:
        sent.append(word_id)
        if word_id == eos_id:
            sents.append(sent)
            sent = []

    # 学習・保存
    for epoch_i in range(epoch_num):
        loss_sum = 0.0
        random.shuffle(sents)
        for i, s in enumerate(sents):
            loss = model(s,train=True)
            loss_sum += loss
            model.zerograds()
            loss.backward()
            optimizer.update()
            if (i % 100 == 0):
                print i, "/", len(sents)," finished"
        print "epoch " + str(epoch_i) + " finished"
        print "average loss is " + str(loss_sum/len(sents))
        outfile = "rnnlm-" + str(epoch_i) + ".model"
        serializers.save_npz(outfile, model)
        loss_sum = 0.0
Ejemplo n.º 11
0
def setup_and_sample(args):
    if os.path.isdir(args.init_from):
        assert os.path.exists(args.init_from), "{} is not a directory".format(
            args.init_from)
        parent_dir = args.init_from
    else:
        assert os.path.exists("{}.index".format(
            args.init_from)), "{} is not a checkpoint".format(args.init_from)
        parent_dir = os.path.dirname(args.init_from)

    config_file = os.path.join(parent_dir, "config.pkl")
    vocab_file = os.path.join(parent_dir, "vocab.pkl")

    assert os.path.isfile(
        config_file), "config.pkl does not exist in directory {}".format(
            parent_dir)
    assert os.path.isfile(
        vocab_file), "vocab.pkl does not exist in directory {}".format(
            parent_dir)

    with open(config_file, 'rb') as f:
        saved_args = pickle.load(f)

    with open(vocab_file, 'rb') as f:
        saved_vocab = pickle.load(f)

    if os.path.isdir(args.init_from):
        checkpoint = tf.train.latest_checkpoint(parent_dir)
        assert checkpoint, "no checkpoint in directory {}".format(init_from)
    else:
        checkpoint = args.init_from

    saved_args.batch_size = 1
    saved_args.seq_length = 1
    model = RNNLM(saved_args)

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        saver = tf.train.Saver(tf.global_variables())

        try:
            saver.restore(sess, checkpoint)
        except ValueError:
            print("{} is not a valid checkpoint".format(checkpoint))

        ret = sample(model, sess, saved_vocab, args.length, args.temperature,
                     args.prime)

    return ret
Ejemplo n.º 12
0
def test():
    with open(vocab_freq_file,'r') as f:
        vocab_freq=pickle.load(f)
    vocab_p = Q_w(vocab_freq,alpha)
    J,q=alias_setup(vocab_p)
    valid_data = TextIterator(valid_datafile,n_batch=n_batch,maxlen=maxlen)
    test_data=TextIterator(test_datafile,n_batch=n_batch,maxlen=maxlen)
    model=RNNLM(n_input,n_hidden,vocabulary_size, cell=rnn_cell,optimizer=optimizer,p=p,q_w=vocab_p,k=k)
    if os.path.isfile(args.model_dir):
        print 'loading pretrained model:',args.model_dir
        model=load_model(args.model_dir,model)
    else:
        print args.model_dir,'not found'
    valid_cost, wer = evaluate(valid_data, model,'wer')
    logger.info('validation cost: %f perplexity: %f,word_error_rate:%f' % (valid_cost, np.exp(valid_cost), wer))
    test_cost, wer = evaluate(test_data, model,'wer')
    logger.info('test cost: %f perplexity: %f,word_error_rate:%f' % (test_cost, np.exp(test_cost), wer))
    def __init__(self, scramble_name='two_dig_scramble', bptt=1):

        # for now, sort of cheat and assume fixed size inputs and outputs
        self.x_len = 3
        self.y_len = 4

        self.alpha = .1
        self.n_epochs = 40

        self.hdim = 50
        self.vocab = list(
            '0123456789+ ')  # list of all possible characters we might see
        self.vdim = len(self.vocab)
        self.vocabmap = {char: i
                         for i, char in enumerate(self.vocab)
                         }  # map char to idx number

        self.rnns = [
            RNNLM(np.zeros((self.vdim, self.hdim)), bptt=bptt)
            for _ in range(self.y_len)
        ]

        self.scramble = getattr(self, scramble_name)
Ejemplo n.º 14
0
def main(argv=None):
    args = commandLineParser.parse_args()

    train_data = process_data_lm('train.dat',
                                 'data',
                                 spId=False,
                                 input_index='input.wlist.index',
                                 output_index='input.wlist.index',
                                 bptt=20)
    #train_data = process_data_lm('train.txt', 'data', spId=True, input_index='input.wlist.index')
    valid_data = process_data_lm("valid.dat",
                                 path="data",
                                 spId=False,
                                 input_index='input.wlist.index',
                                 output_index='input.wlist.index',
                                 bptt=None)

    network_architecture = parse_params('./config')

    rnnlm = RNNLM(network_architecture=network_architecture,
                  seed=args.seed,
                  name=args.name,
                  dir='./',
                  load_path=args.load_path,
                  debug_mode=args.debug)

    rnnlm.fit(valid_data,
              train_data,
              learning_rate=1e-2,
              lr_decay=0.94,
              batch_size=64,
              dropout=args.dropout,
              optimizer=tf.train.AdamOptimizer,
              n_epochs=10)
    sys.exit()
    rnnlm.save()
Ejemplo n.º 15
0
import sys, os
from numpy import *
from matplotlib.pyplot import *
%matplotlib inline
matplotlib.rcParams['savefig.dpi'] = 100

%load_ext autoreload
%autoreload 2

from rnnlm import RNNLM

# Gradient check on toy data, for speed
random.seed(10)
wv_dummy = random.randn(10,50)
model = RNNLM(L0 = wv_dummy, U0 = wv_dummy,
              alpha=0.005, rseed=10, bptt=4)
model.grad_check(array([1,2,3]), array([2,3,4]))

from data_utils import utils as du
import pandas as pd

# Load the vocabulary
#vocab = pd.read_table("data/lm/vocab.ptb.txt", header=None, sep="\s+",
#                     index_col=0, names=['count', 'freq'], )

vocab2 = pd.read_table("worddic.txt",header=None,sep="\s+",index_col=0)

# Choose how many top words to keep
#vocabsize = 2000
vocabsize2 = 58868 #remove for implemenation
#num_to_word = dict(enumerate(vocab.index[:vocabsize]))
Ejemplo n.º 16
0
class TestRNNLM(unittest.TestCase):
    def setUp(self):
        text = 'You said good-bye and I said hello.'
        cbm = CountBasedMethod()
        word_list = cbm.text_to_word_list(text)
        word_to_id, *_ = cbm.preprocess(word_list)
        vocab_size = len(word_to_id)
        wordvec_size = 100
        hidden_size  = 100
        self.rnnlm = RNNLM(vocab_size, wordvec_size, hidden_size)
        self.xs = np.array([
            [0, 4, 4, 1],
            [4, 0, 2, 1]
        ])
        self.ts = np.array([
            [0, 1, 0, 0],
            [0, 0, 0, 1]
        ])

    def test_predict(self):
        score = self.rnnlm._predict(self.xs)
        self.assertEqual((2, 4, 7), score.shape)

    def test_forward(self):
        loss = self.rnnlm.forward(self.xs, self.ts)
        self.assertEqual(1.94, round(loss, 2))

    def test_backward(self):
        self.rnnlm.forward(self.xs, self.ts)
        dout = self.rnnlm.backward()
        self.assertEqual(None, dout)

    def test_reset_state(self):
        self.rnnlm.forward(self.xs, self.ts)
        self.rnnlm.backward()
        self.assertEqual((2, 100), self.rnnlm.lstm_layer.h.shape)
        self.rnnlm.reset_state()
        self.assertEqual(None, self.rnnlm.lstm_layer.h)

    def test_save_params(self):
        self.rnnlm.forward(self.xs, self.ts)
        self.rnnlm.backward()
        self.rnnlm.save_params()
        self.assertEqual(True, path.exists('../pkl/rnnlm.pkl'))

    def test_load_params(self):
        self.rnnlm.load_params()
        a, b, c, d, e, f = self.rnnlm.params
        self.assertEqual((7, 100), a.shape)
        self.assertEqual((100, 400), b.shape)
        self.assertEqual((100, 400), c.shape)
        self.assertEqual((400,), d.shape)
        self.assertEqual((100, 7), e.shape)
        self.assertEqual((7,), f.shape)
Ejemplo n.º 17
0
def train(
        train_path,
        validation_path,
        dictionary_path,
        model_path,
        reload_state=False,
        dim_word=100,  # word vector dimensionality
        dim=1000,  # the number of LSTM units
        encoder='lstm',
        patience=10,
        max_epochs=5000,
        dispFreq=100,
        decay_c=0.,
        alpha_c=0.,
        diag_c=0.,
        lrate=0.01,
        n_words=100000,
        maxlen=100,  # maximum length of the description
        optimizer='rmsprop',
        batch_size=16,
        valid_batch_size=16,
        validFreq=1000,
        saveFreq=1000,  # save the parameters after every saveFreq updates
        sampleFreq=100,  # generate some text samples after every sampleFreq updates
        profile=False):

    # Model options
    model_options = locals().copy()

    worddicts = dict()
    worddicts_r = dict()
    with open(dictionary_path, 'rb') as f:
        for (i, line) in enumerate(f):
            word = line.strip()
            code = i + 2
            worddicts_r[code] = word
            worddicts[word] = code

    # reload options
    if reload_state and os.path.exists(model_path):
        with open('%s.pkl' % model_path, 'rb') as f:
            models_options = pkl.load(f)

    print '### Loading data.'

    train = TextIterator(train_path,
                         worddicts,
                         n_words_source=n_words,
                         batch_size=batch_size,
                         maxlen=maxlen)
    valid = TextIterator(validation_path,
                         worddicts,
                         n_words_source=n_words,
                         batch_size=valid_batch_size,
                         maxlen=maxlen)

    print '### Building neural network.'

    rnnlm = RNNLM(model_options)
    trainer = ModelTrainer(rnnlm, optimizer, model_options)
    sampler = TextSampler(rnnlm, model_options)

    print '### Training neural network.'

    best_params = None
    bad_count = 0

    if validFreq == -1:
        validFreq = len(train[0]) / batch_size
    if saveFreq == -1:
        saveFreq = len(train[0]) / batch_size
    if sampleFreq == -1:
        sampleFreq = len(train[0]) / batch_size

    uidx = 0
    estop = False
    for eidx in xrange(max_epochs):
        n_samples = 0

        for x in train:
            n_samples += len(x)
            uidx += 1

            x, x_mask = prepare_data(x, maxlen=maxlen, n_words=n_words)

            if x == None:
                print 'Minibatch with zero sample under length ', maxlen
                uidx -= 1
                continue

            ud_start = time.time()
            cost = trainer.f_grad_shared(x, x_mask)
            trainer.f_update(lrate)
            ud = time.time() - ud_start

            if numpy.isnan(cost) or numpy.isinf(cost):
                print 'NaN detected'
                return 1., 1., 1.

            if numpy.mod(uidx, dispFreq) == 0:
                print 'Epoch ', eidx, 'Update ', uidx, 'Cost ', cost, 'UD ', ud

            if numpy.mod(uidx, saveFreq) == 0:
                # Save the best parameters, or the current state if best_params
                # is None.
                rnnlm.save_params(best_params)
                # Save the training options.
                pkl.dump(model_options, open('%s.pkl' % model_path, 'wb'))

            if numpy.mod(uidx, sampleFreq) == 0:
                # FIXME: random selection?
                for jj in xrange(5):
                    sample, score = sampler.generate()
                    print 'Sample ', jj, ': ',
                    ss = sample
                    for vv in ss:
                        if vv == 0:
                            break
                        if vv in worddicts_r:
                            print worddicts_r[vv],
                        else:
                            print 'UNK',
                    print

            if numpy.mod(uidx, validFreq) == 0:
                valid_errs = pred_probs(f_log_probs, prepare_data,
                                        model_options, valid)
                valid_err = valid_errs.mean()
                rnnlm.error_history.append(valid_err)

                if uidx == 0 or valid_err <= numpy.array(error_history).min():
                    best_params = rnnlm.get_param_values()
                    bad_counter = 0
                if len(rnnlm.error_history
                       ) > patience and valid_err >= numpy.array(
                           rnnlm.error_history)[:-patience].min():
                    bad_counter += 1
                    if bad_counter > patience:
                        print 'Early Stop!'
                        estop = True
                        break

                if numpy.isnan(valid_err):
                    import ipdb
                    ipdb.set_trace()

                print 'Valid ', valid_err

        print 'Seen %d samples' % n_samples

        if estop:
            break

    if best_params is not None:
        rnnlm.set_param_values(best_params)

    valid_err = pred_probs(f_log_probs, prepare_data, model_options,
                           valid).mean()

    print 'Valid ', valid_err

    params = copy.copy(best_params)
    numpy.savez(model_path,
                zipped_params=best_params,
                error_history=rnnlm.error_history,
                **params)

    return valid_err
Ejemplo n.º 18
0
def train(config, sw):

    # Initialize the device which to run the model on
    device = torch.device(config.device)

    vocab = torchtext.vocab.FastText()
    #vocab = torchtext.vocab.GloVe()

    # get data iterators
    lm_iters, s_iters = load_data(embeddings=vocab,
                                  device=device,
                                  batch_size=config.batch_size,
                                  bptt_len=config.seq_len)

    _, valid_iter, test_iter, field = s_iters
    vocab = field.vocab

    if config.use_bptt:
        train_iter, _, _, _ = lm_iters
    else:
        train_iter, _, _, _ = s_iters

    print("Vocab size: {}".format(vocab.vectors.shape))

    # create embedding layer
    embedding = nn.Embedding.from_pretrained(vocab.vectors).to(device)
    EMBED_DIM = 300

    num_classes = vocab.vectors.shape[0]
    # Initialize the model that we are going to use
    if config.model == "rnnlm":
        model = RNNLM(EMBED_DIM, config.hidden_dim, num_classes)
    elif config.model == "s-vae":
        model = SentenceVAE(EMBED_DIM,
                            config.hidden_dim,
                            num_classes,
                            fb_lambda=config.freebits_lambda,
                            wd_keep_prob=config.wdropout_prob,
                            wd_unk=embedding(
                                torch.LongTensor([vocab.stoi["<unk>"]
                                                  ]).to(device)),
                            mu_f_beta=config.mu_forcing_beta)
    else:
        raise Error("Invalid model parameter.")
    model = model.to(device)

    # Setup the loss, optimizer, lr-scheduler
    optimizer = optim.Adam(model.parameters(), lr=config.learning_rate)
    criterion = torch.nn.NLLLoss(reduction="sum").to(config.device)
    scheduler = optim.lr_scheduler.StepLR(optimizer,
                                          1,
                                          gamma=config.learning_rate_decay)
    lr = config.learning_rate

    global_step = 0
    best_nll = sys.maxsize
    best_pp = sys.maxsize
    best_kl = None
    for epoch in itertools.count():
        for batch in train_iter:

            # [1] Get data
            if config.use_bptt:
                batch_text = batch.text
                batch_target = batch.target
                txt_len = torch.full((batch_text.shape[1], ),
                                     batch_text.shape[0],
                                     device=device)
                tgt_len = txt_len
            else:
                batch_text, txt_len = batch.text
                batch_target, tgt_len = batch.target

            batch_text = embedding(batch_text.to(device))
            batch_target = batch_target.to(device)

            # [2] Forward & Loss
            batch_output = model(batch_text, txt_len)

            # merge batch and sequence dimension for evaluation
            batch_output = batch_output.view(-1, batch_output.shape[2])
            batch_target = batch_target.view(-1)

            B = batch_text.shape[1]
            nll = criterion(batch_output, batch_target) / B
            sw.add_scalar('Train/NLL', nll.item(), global_step)

            loss = nll.clone()
            for loss_name, additional_loss in model.get_additional_losses(
            ).items():
                loss += additional_loss
                sw.add_scalar('Train/' + loss_name, additional_loss,
                              global_step)

            # [3] Optimize
            optimizer.zero_grad()
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(),
                                           max_norm=config.max_norm)
            optimizer.step()

            sw.add_scalar('Train/Loss', loss.item(), global_step)

            if global_step % config.print_every == 0:
                print("[{}] Train Step {:04d}/{:04d}, "
                      "NLL = {:.2f}, Loss = {:.3f}".format(
                          datetime.now().strftime("%Y-%m-%d %H:%M"),
                          global_step, config.train_steps, nll.item(),
                          loss.item()),
                      flush=True)

            global_step += 1

        epoch_nll, epoch_pp, epoch_kl, additional_losses = test_model(
            model, embedding, criterion, valid_iter, device)
        model.train()

        print("Valid NLL: {}".format(epoch_nll))
        print("Valid Perplexity: {}".format(epoch_pp))
        print("Valid KL: {}".format(epoch_kl))
        sw.add_scalar('Valid/NLL', epoch_nll, global_step)
        sw.add_scalar('Valid/Perplexity', epoch_pp, global_step)
        sw.add_scalar('Valid/KL', epoch_kl, global_step)

        # the additional_loss below will also have kl but not multisample
        for loss_name, additional_loss in additional_losses.items():
            sw.add_scalar('Valid/' + loss_name, additional_loss, global_step)

        # sample some sentences
        MAX_LEN = 50
        for _ in range(5):
            text = model.temperature_sample(embedding, MAX_LEN)
            text = ' '.join(vocab.itos[w] for w in text)
            print(text)
            sw.add_text('Valid/Sample-text', text, global_step)

        if epoch_nll < best_nll:
            best_nll = epoch_nll
            save_model("best", model, config)
        if epoch_pp < best_pp:
            best_pp = epoch_pp

        if global_step >= config.train_steps:
            break

        scheduler.step()
        print("Learning Rate: {}".format(
            [group['lr'] for group in optimizer.param_groups]))

    print('Done training.')

    best_model = load_model("best", config)
    test_nll, test_pp, test_kl, test_additional_losses = test_model(
        best_model, embedding, criterion, test_iter, device)
    print("Test NLL: {}".format(test_nll))
    print("Test PP: {}".format(test_pp))
    print("Test KL: {}".format(test_kl))
    print("{}".format(test_additional_losses))

    return best_model, model, {'hparam/nll': best_nll, 'hparam/pp': best_pp}
Ejemplo n.º 19
0
def train(lr):
    # Load data
    logger.info('loading dataset...')

    train_data = TextIterator(train_datafile,
                              filepath,
                              n_batch=n_batch,
                              brown_or_huffman=brown_or_huffman,
                              mode=matrix_or_vector,
                              word2idx_path=word2idx_path)
    valid_data = TextIterator(valid_datafile,
                              filepath,
                              n_batch=n_batch,
                              brown_or_huffman=brown_or_huffman,
                              mode=matrix_or_vector,
                              word2idx_path=word2idx_path)
    test_data = TextIterator(test_datafile,
                             filepath,
                             n_batch=n_batch,
                             brown_or_huffman=brown_or_huffman,
                             mode=matrix_or_vector,
                             word2idx_path=word2idx_path)
    logger.info('building model...')
    model = RNNLM(n_input,
                  n_hidden,
                  vocabulary_size,
                  cell,
                  optimizer,
                  p=p,
                  mode=matrix_or_vector)
    if os.path.exists(model_dir) and reload_dumps == 1:
        logger.info('loading parameters from: %s' % model_dir)
        model = load_model(model_dir, model)
    else:
        logger.info("init parameters....")
    logger.info('training start...')
    start = time.time()
    idx = 0
    for epoch in xrange(NEPOCH):
        error = 0
        for x, x_mask, (y_node, y_choice, y_bit_mask), y_mask in train_data:
            idx += 1
            cost = model.train(x, x_mask, y_node, y_choice, y_bit_mask, y_mask,
                               lr)
            error += cost
            if np.isnan(cost) or np.isinf(cost):
                print 'NaN Or Inf detected!'
                return -1
            if idx % disp_freq == 0:
                logger.info('epoch: %d idx: %d cost: %f ppl: %f' %
                            (epoch, idx, error / disp_freq,
                             np.exp(error / (1.0 * disp_freq))))  #,'lr:',lr
                error = 0
            if idx % save_freq == 0:
                logger.info('dumping...')
                save_model(
                    './model/parameters_%.2f.pkl' % (time.time() - start),
                    model)
            if idx % valid_freq == 0:
                logger.info('validing....')
                valid_cost = evaluate(valid_data, model)
                logger.info('valid_cost: %f perplexity: %f' %
                            (valid_cost, np.exp(valid_cost)))
            if idx % test_freq == 0:
                logger.info('testing...')
                test_cost = evaluate(test_data, model)
                logger.info('test cost: %f perplexity: %f' %
                            (test_cost, np.exp(test_cost)))
            #if idx%clip_freq==0 and lr >=0.01:
            #    print 'cliping learning rate:',
            #    lr=lr*0.9
            #    print lr
        sys.stdout.flush()

    print "Finished. Time = " + str(time.time() - start)
Ejemplo n.º 20
0
wordvec_size = 100
hidden_size = 100
time_size = 35
learning_rate = 20.0
max_epoch = 4
max_grad = 0.25

# Load trainig data
corpus, word_to_id, id_to_word = load_data('train')
corpus_test, *_ = load_data('test')
vocab_size = len(word_to_id)
xs = corpus[:-1]
ts = corpus[1:]

# Generate a model, optimiser and trainer
model = RNNLM(vocab_size, wordvec_size, hidden_size)
optimiser = SGD(learning_rate)
trainer = RNNLMTrainer(model, optimiser)

# 1. Train applying gradients clipping
training_process = trainer.fit(xs,
                               ts,
                               max_epoch,
                               batch_size,
                               time_size,
                               max_grad,
                               eval_interval=20)
for iter in training_process:
    print(iter)
file_path = '../img/train_rnnlm.png'
tainer.save_plot_image(file_path, ylim=(0, 500))
Ejemplo n.º 21
0
	method = "RNNPTONE"
	hdim = 40 # dimension of hidden layer = dimension of word vectors
	#random.seed(10)
	nepoch = 1
	N = nepoch * len(Y_train)
	k = 5 # minibatch size
	fraction_lost = 0.07923163705
	#idx=[]
	#print X_train.size
	#for i in range(N/k):
	#    idx.append(random.choice(len(Y_train),k))
	if method == "RNNLM":
		L0 = zeros((vocabsize, hdim)) # replace with random init, 
					      # or do in RNNLM.__init__()
		model = RNNLM(L0, U0 = L0, alpha=0.1,  bptt=3)

		idx = epochiter(len(Y_train), nepoch)
		model.train_sgd(X = X_train, y = Y_train, idxiter = idx, printevery = 500, costevery = 500)

		dev_loss = model.compute_mean_loss(X_dev, Y_dev)
		if not os.path.exists("model/" + method):
			os.makedirs("model/" + method)

		print "Unadjusted: %.03f" % exp(dev_loss)
		print "Adjusted for missing vocab: %.03f" % exp(adjust_loss(dev_loss, fraction_lost))
		save("model/" + method + "/rnnlm.L.npy", model.sparams.L)
		save("model/" + method + "/rnnlm.U.npy", model.params.U)
		save("model/" + method + "/rnnlm.H.npy", model.params.H)
		print "RNNLM"
Ejemplo n.º 22
0
def train(train_path,
          validation_path,
          dictionary_path,
          model_path,
          reload_state=False,
          dim_word=100, # word vector dimensionality
          dim=1000, # the number of LSTM units
          encoder='lstm',
          patience=10,
          max_epochs=5000,
          dispFreq=100,
          decay_c=0., 
          alpha_c=0., 
          diag_c=0.,
          lrate=0.01, 
          n_words=100000,
          maxlen=100, # maximum length of the description
          optimizer='rmsprop', 
          batch_size = 16,
          valid_batch_size = 16,
          validFreq=1000,
          saveFreq=1000, # save the parameters after every saveFreq updates
          sampleFreq=100, # generate some text samples after every sampleFreq updates
          profile=False):

    # Model options
    model_options = locals().copy()

    worddicts = dict()
    worddicts_r = dict()
    with open(dictionary_path, 'rb') as f:
        for (i, line) in enumerate(f):
            word = line.strip()
            code = i + 2
            worddicts_r[code] = word
            worddicts[word] = code

    # reload options
    if reload_state and os.path.exists(model_path):
        with open('%s.pkl' % model_path, 'rb') as f:
            models_options = pkl.load(f)

    print '### Loading data.'

    train = TextIterator(train_path, 
                         worddicts,
                         n_words_source=n_words, 
                         batch_size=batch_size,
                         maxlen=maxlen)
    valid = TextIterator(validation_path, 
                         worddicts,
                         n_words_source=n_words, 
                         batch_size=valid_batch_size,
                         maxlen=maxlen)

    print '### Building neural network.'

    rnnlm = RNNLM(model_options)
    trainer = ModelTrainer(rnnlm, optimizer, model_options)
    sampler = TextSampler(rnnlm, model_options)

    print '### Training neural network.'

    best_params = None
    bad_count = 0

    if validFreq == -1:
        validFreq = len(train[0])/batch_size
    if saveFreq == -1:
        saveFreq = len(train[0])/batch_size
    if sampleFreq == -1:
        sampleFreq = len(train[0])/batch_size

    uidx = 0
    estop = False
    for eidx in xrange(max_epochs):
        n_samples = 0

        for x in train:
            n_samples += len(x)
            uidx += 1

            x, x_mask = prepare_data(x, maxlen=maxlen, n_words=n_words)

            if x == None:
                print 'Minibatch with zero sample under length ', maxlen
                uidx -= 1
                continue

            ud_start = time.time()
            cost = trainer.f_grad_shared(x, x_mask)
            trainer.f_update(lrate)
            ud = time.time() - ud_start

            if numpy.isnan(cost) or numpy.isinf(cost):
                print 'NaN detected'
                return 1., 1., 1.

            if numpy.mod(uidx, dispFreq) == 0:
                print 'Epoch ', eidx, 'Update ', uidx, 'Cost ', cost, 'UD ', ud

            if numpy.mod(uidx, saveFreq) == 0:
                # Save the best parameters, or the current state if best_params
                # is None.
                rnnlm.save_params(best_params)
                # Save the training options.
                pkl.dump(model_options, open('%s.pkl' % model_path, 'wb'))

            if numpy.mod(uidx, sampleFreq) == 0:
                # FIXME: random selection?
                for jj in xrange(5):
                    sample, score = sampler.generate()
                    print 'Sample ', jj, ': ',
                    ss = sample
                    for vv in ss:
                        if vv == 0:
                            break
                        if vv in worddicts_r:
                            print worddicts_r[vv], 
                        else:
                            print 'UNK',
                    print

            if numpy.mod(uidx, validFreq) == 0:
                valid_errs = pred_probs(f_log_probs, prepare_data, model_options, valid)
                valid_err = valid_errs.mean()
                rnnlm.error_history.append(valid_err)

                if uidx == 0 or valid_err <= numpy.array(error_history).min():
                    best_params = rnnlm.get_param_values()
                    bad_counter = 0
                if len(rnnlm.error_history) > patience and valid_err >= numpy.array(rnnlm.error_history)[:-patience].min():
                    bad_counter += 1
                    if bad_counter > patience:
                        print 'Early Stop!'
                        estop = True
                        break

                if numpy.isnan(valid_err):
                    import ipdb; ipdb.set_trace()

                print 'Valid ', valid_err

        print 'Seen %d samples'%n_samples

        if estop:
            break

    if best_params is not None:
        rnnlm.set_param_values(best_params)

    valid_err = pred_probs(f_log_probs, prepare_data, model_options, valid).mean()

    print 'Valid ', valid_err

    params = copy.copy(best_params)
    numpy.savez(model_path, zipped_params=best_params, 
                error_history=rnnlm.error_history, 
                **params)

    return valid_err
Ejemplo n.º 23
0
    pass


parser = argparse.ArgumentParser(description="A program for testing RNNLM.")
parser.add_argument("sentence",
                    action="store",
                    type=str,
                    help="a sentence that you want to test.")
parser.add_argument("model_file_path",
                    action="store",
                    type=str,
                    help="a model file path that you want to test.")
parser.add_argument("vocab_file_path",
                    action="store",
                    type=str,
                    help="a vocab file used to train the model.")
parser.add_argument("hidden_size",
                    action="store",
                    type=int,
                    help="a hidden size of RNN.")
args = parser.parse_args()

# train時にpickleしたvocabファイル(word->id)をロード
with open(args.vocab_file_path, "rb") as f:
    vocab = pickle.load(f)
# modelを読み込み
model = RNNLM(len(vocab), args.hidden_size)
serializers.load_npz(args.model_file_path, model)

test_rnnlm(args.sentence, model, vocab, args.hidden_size)
Ejemplo n.º 24
0
def rnnlm_init():
    from rnnlm import RNNLM
    np.random.seed(10)
    L = np.random.randn(50, 10)
    model = RNNLM(L0=L)
def main():
    random.seed(SEED)
    np.random.seed(SEED)
    tf.random.set_seed(SEED)
    assert START_TOKEN == 0

    physical_devices = tf.config.experimental.list_physical_devices("GPU")
    if len(physical_devices) > 0:
        for dev in physical_devices:
            tf.config.experimental.set_memory_growth(dev, True)

    generator = Generator(VOCAB_SIZE, BATCH_SIZE, EMB_DIM, HIDDEN_DIM, SEQ_LENGTH, START_TOKEN)
    target_lstm = RNNLM(VOCAB_SIZE, BATCH_SIZE, EMB_DIM, HIDDEN_DIM, SEQ_LENGTH, START_TOKEN) 
    discriminator = Discriminator(sequence_length=SEQ_LENGTH, num_classes=2, vocab_size=VOCAB_SIZE, embedding_size=dis_embedding_dim,
                                  filter_sizes=dis_filter_sizes, num_filters=dis_num_filters, dropout_keep_prob=dis_dropout_keep_prob,
                                  l2_reg_lambda=dis_l2_reg_lambda)
    
    gen_dataset = dataset_for_generator(positive_file, BATCH_SIZE)
    log = open('save/experiment-log.txt', 'w')
    #  pre-train generator
    if not os.path.exists("save/generator_pretrained.h5"):
        print('Start pre-training...')
        log.write('pre-training...\n')
        generator.pretrain(gen_dataset, target_lstm, PRE_EPOCH_NUM, generated_num // BATCH_SIZE, eval_file)
        generator.save("save/generator_pretrained.h5")
    else:
        generator.load("save/generator_pretrained.h5")

    if not os.path.exists("discriminator_pretrained.h5"):
        print('Start pre-training discriminator...')
        # Train 3 epoch on the generated data and do this for 50 times
        for _ in range(50):
            print("Dataset", _)
            generator.generate_samples(generated_num // BATCH_SIZE, negative_file)
            dis_dataset = dataset_for_discriminator(positive_file, negative_file, BATCH_SIZE)
            discriminator.train(dis_dataset, 3, (generated_num // BATCH_SIZE) * 2)
        discriminator.save("save/discriminator_pretrained.h5")
    else:
        discriminator.load("save/discriminator_pretrained.h5")

    rollout = ROLLOUT(generator, 0.8)

    print('#########################################################################')
    print('Start Adversarial Training...')
    log.write('adversarial training...\n')
    
    for total_batch in range(TOTAL_BATCH):
        print("Generator", total_batch, 'of ', TOTAL_BATCH)
        # Train the generator for one step
        for it in range(1):
            samples = generator.generate_one_batch()
            rewards = rollout.get_reward(samples, 16, discriminator)
            generator.train_step(samples, rewards)

        # Test
        if total_batch % 10 == 0 or total_batch == TOTAL_BATCH - 1:
            generator.generate_samples(generated_num // BATCH_SIZE, eval_file)
            likelihood_dataset = dataset_for_generator(eval_file, BATCH_SIZE)
            test_loss = target_lstm.target_loss(likelihood_dataset)
            buffer = 'epoch:\t' + str(total_batch) + '\tnll:\t' + str(test_loss) + '\n'
            print('total_batch: ', total_batch, 'of: ', TOTAL_BATCH, 'test_loss: ', test_loss)
            generator.save(f"save/generator_{total_batch}.h5")
            discriminator.save(f"save/discriminator_{total_batch}.h5")
            log.write(buffer)

        # Update roll-out parameters
        rollout.update_params()

        # Train the discriminator
        print("Discriminator", total_batch, 'of ', TOTAL_BATCH)
        # There will be 5 x 3 = 15 epochs in this loop
        for _ in range(5):
            generator.generate_samples(generated_num // BATCH_SIZE, negative_file)
            dis_dataset = dataset_for_discriminator(positive_file, negative_file, BATCH_SIZE)
            discriminator.train(dis_dataset, 3, (generated_num // BATCH_SIZE) * 2)
    generator.save(f"save/generator_{TOTAL_BATCH}.h5")
    discriminator.save(f"save/discriminator_{TOTAL_BATCH}.h5")

    log.close()
class NaiveRnnlmDiscr:
    def __init__(self, scramble_name='noscramble', bptt=1):
        self.alpha = .1
        self.n_epochs = 100

        self.hdim = 10
        self.vocab = list(
            '0123456789+ =')  # list of all possible characters we might see
        self.vdim = len(self.vocab)
        self.vocabmap = {char: i
                         for i, char in enumerate(self.vocab)
                         }  # map char to idx number

        self.rnn = RNNLM(np.zeros((self.vdim, self.hdim)),
                         U0=np.zeros((2, self.hdim)),
                         bptt=bptt)

        self.scramble = getattr(self, scramble_name)

    def encode_expr(self, expr):
        return [self.vocabmap[c] for c in expr]

    def decode(self, indices):
        return ''.join([self.vocab[idx] for idx in indices])

    def lengthen_double(self, x_string):
        # format from '21 + 12' -> '021 + 012'
        return ' + '.join(
            [lengthen(s, self.x_len) for s in x_string.split(' + ')])

    def scramble_double(self, x_string):
        # format 'abc + 123' to 'a1b2c3'
        lengthened = self.lengthen_double(x_string)
        nums = lengthened.split(' + ')
        return ''.join([x1 + x2 for x1, x2 in zip(nums[0], nums[1])])

    def noscramble(self, x_string):
        return x_string

    # def unscrambled_simple(self, x_string, i):
    #     return ''.join(c for c in self.lengthen_double(x_string) if c != ' ' and c != '+')
    # def scramble_simple(self, x_string, i):
    #     return self.scramble_double(x_string)
    # def two_dig_scramble(self, x_string, i):
    #     # where i is the output digit we're computing
    #     # in my opinion, this function knows a little too much about how to pick our digits
    #     x_slice = slice(0, 2) if i == 0 else slice(2*(i-1), 2*i)
    #     return self.scramble_double(x_string)[x_slice]
    # def rot_scramble(self, x_string, i):
    #     six_digs = self.scramble_double(x_string)
    #     start_dig = 0 if i == 0 else i - 1
    #     return [c for c in reversed(six_digs[start_dig:] + six_digs[:start_dig])]
    # def rot_scramble_half(self, x_string, i):
    #     return self.rot_scramble(x_string, i)[3:]

    def train(self, xy_data, rnn=None):
        # This function trains one RNN

        self.rnn = rnn if rnn is not None else self.rnn

        xs = [np.array(self.encode_expr(self.scramble(x))) for x, y in xy_data]
        ys = [y for x, y in xy_data]

        # for printing purposes only
        dev_data = get_data('data/neg_dev.txt')

        dev_xs = [
            np.array(self.encode_expr(self.scramble(x))) for x, y in dev_data
        ]
        dev_ys = [y for x, y in dev_data]

        self.rnn.grad_check(dev_xs[0], dev_ys[0])

        for j in xrange(self.n_epochs):
            for x, y in zip(xs, ys):
                self.rnn.train_point_sgd(x, y, self.alpha)
            # print 'train loss', rnn_i.compute_loss(xs_i, ys_i)
            if j % 10 == 0:
                print 'dev loss', self.rnn.compute_loss(
                    dev_xs[:100],
                    dev_ys[:100]), 'train loss', self.rnn.compute_loss(
                        xs[:100], ys[:100])

        # # extra stuff to print
        # for x,y in zip(xs_i,ys)[:5]:
        #     yhat = rnn_i.predict(x)
        #     print x, yhat, np.argmax(yhat)

        return self.rnn

    def predict_one(self, x, rnn=None):
        rnn = rnn if rnn is not None else self.rnn
        if rnn is None:
            raise Exception('Model not trained!')

        x_encoded = self.encode_expr(self.scramble(x))
        return np.argmax(rnn.predict(x_encoded))
Ejemplo n.º 27
0
def train(args):
    print(vars(args))

    loader = SequenceLoader(args)

    if args.init_from is not None:
        if os.path.isdir(args.init_from):
            assert os.path.exists(args.init_from), "{} is not a directory".format(args.init_from)
            parent_dir = args.init_from
        else:
            assert os.path.exists("{}.index".format(args.init_from)), "{} is not a checkpoint".format(args.init_from)
            parent_dir = os.path.dirname(args.init_from)

        config_file = os.path.join(parent_dir, "config.pkl")
        vocab_file = os.path.join(parent_dir, "vocab.pkl")

        assert os.path.isfile(config_file), "config.pkl does not exist in directory {}".format(parent_dir)
        assert os.path.isfile(vocab_file), "vocab.pkl does not exist in directory {}".format(parent_dir)

        if os.path.isdir(args.init_from):
            checkpoint = tf.train.latest_checkpoint(parent_dir)
            assert checkpoint, "no checkpoint in directory {}".format(args.init_from)
        else:
            checkpoint = args.init_from

        with open(os.path.join(parent_dir, 'config.pkl'), 'rb') as f:
            saved_args = pickle.load(f)

        assert saved_args.hidden_size == args.hidden_size, "hidden size argument ({}) differs from save ({})" \
            .format(saved_args.hidden_size, args.hidden_size)
        assert saved_args.num_layers == args.num_layers, "number of layers argument ({}) differs from save ({})" \
            .format(saved_args.num_layers, args.num_layers)

        with open(os.path.join(parent_dir, 'vocab.pkl'), 'rb') as f:
            saved_vocab = pickle.load(f)

        assert saved_vocab == loader.vocab, "vocab in data directory differs from save"

    if not os.path.exists(args.save_dir):
        os.makedirs(args.save_dir)

    new_config_file = os.path.join(args.save_dir, 'config.pkl')
    new_vocab_file = os.path.join(args.save_dir, 'vocab.pkl')

    if not os.path.exists(new_config_file):
        with open(new_config_file, 'wb') as f:
            pickle.dump(args, f)
    if not os.path.exists(new_vocab_file):
        with open(new_vocab_file, 'wb') as f:
            pickle.dump(loader.vocab, f)

    model = RNNLM(args)



    with tf.Session() as sess:
        #tf.summary.merge_all()
        sess.run(tf.global_variables_initializer())
        saver = tf.train.Saver(tf.global_variables())

        if args.init_from is not None:
            try:
                saver.restore(sess, checkpoint)
            except ValueError:
                print("{} is not a valid checkpoint".format(checkpoint))

                print("initializing from {}".format(checkpoint))

        start_datetime = datetime.datetime.now().isoformat()

        # if args.tensorboard:
        train_writer = tf.summary.FileWriter(os.path.join(args.save_dir, start_datetime))
        train_writer.add_graph(sess.graph)

        for e in range(args.num_epochs):
            if e % args.decay_every == 0:
                lr = args.learning_rate * (args.decay_factor ** e)

            state = sess.run(model.zero_state)

            for b, (x, y) in enumerate(loader.train):
                global_step = e * loader.train.num_batches + b
                start = time.time()
                feed = {model.x: x,
                        model.y: y,
                        model.dropout: args.dropout,
                        model.lr: lr}
                state_feed = {pl: s for pl, s in zip(sum(model.start_state, ()), sum(state, ()))}
                feed.update(state_feed)
                train_loss, state, _ = sess.run([model.loss, model.end_state, model.train_op], feed)
                end = time.time()

                # if args.verbose:
                print("{}/{} (epoch {}), train_loss = {:.3f}, perplexity = {:.3f}, time/batch = {:.3f}".format(
                    global_step, args.num_epochs * loader.train.num_batches, e, train_loss, np.exp(train_loss),
                    end - start))

                # if args.tensorboard:
                summary = tf.Summary(
                    value=[tf.Summary.Value(tag="RNNLM Train Loss", simple_value=float(train_loss))])

                train_writer.add_summary(summary, global_step)

                summary1 = tf.Summary(
                    value=[tf.Summary.Value(tag="RNNLM Train Perplexity", simple_value=float(np.exp(train_loss)))])
                train_writer.add_summary(summary1, global_step)

                if global_step % args.save_every == 0 \
                        or (e == args.num_epochs - 1 and b == loader.train.num_batches - 1):
                    all_loss = 0
                    val_state = sess.run(model.zero_state)
                    start = time.time()

                    for b, (x, y) in enumerate(loader.val):
                        feed = {model.x: x,
                                model.y: y}
                        state_feed = {pl: s for pl, s in zip(sum(model.start_state, ()), sum(val_state, ()))}
                        feed.update(state_feed)
                        batch_loss, val_state = sess.run([model.loss, model.end_state], feed)
                        all_loss += batch_loss

                    end = time.time()
                    val_loss = all_loss / loader.val.num_batches

                    # if args.verbose:
                    print("val_loss = {:.3f}, perplexity = {:.3f}, time/val = {:.3f}".format(val_loss,
                                                                                             np.exp(val_loss),
                                                                                             end - start))

                    checkpoint_path = os.path.join(args.save_dir, '{}-iter_{}-val_{:.3f}.ckpt' \
                                                   .format(start_datetime, global_step, val_loss))
                    saver.save(sess, checkpoint_path)

                    # if args.verbose:
                    print("model saved to {}".format(checkpoint_path))

                    # if args.tensorboard:
                    summary = tf.Summary(
                        value=[tf.Summary.Value(tag="RNNLM Val Loss", simple_value=float(val_loss))])
                    tf.summary.histogram('Val_Perplexity', val_loss)
                    # tf.histogram_summary('Val_Perplexity', val_loss)
                    train_writer.add_summary(summary, global_step)

                    summary1 = tf.Summary(
                        value=[tf.Summary.Value(tag="RNNLM Val Perplexity", simple_value=float(np.exp(val_loss)))])
                    # tf.summary.histogram('Val_Perplexity', np.exp(val_loss))
                    train_writer.add_summary(summary1, global_step)
        tf.summary.merge_all()
Ejemplo n.º 28
0
# Gradient check is going to take a *long* time here
# since it's quadratic-time in the number of parameters.
# run at your own risk... (but do check this!)
# model.grad_check(array([1,2,3]), array([2,3,4]))

#### YOUR CODE HERE ####

##
# Pare down to a smaller dataset, for speed
# (optional - recommended to not do this for your final model)
hdim = 100 # dimension of hidden layer = dimension of word vectors
random.seed(10)
L0 = zeros((vocabsize, hdim)) # replace with random init,
L0 = 0.1 * random.randn(*L0.shape)   # or do in RNNLM.__init__()
# test parameters; you probably want to change these
model = RNNLM(L0, U0 = L0, alpha=0.1, rseed=10, bptt=3)
ntrain = len(Y_train)
X = X_train[:ntrain]
Y = Y_train[:ntrain]
k = 5
indices = range(ntrain)
def idxiter_batches():
    num_batches = ntrain / k
    for i in xrange(num_batches):
        yield random.choice(indices, k)

model_output = model.train_sgd(X=X, y=Y, idxiter=idxiter_batches(), printevery=100, costevery=10000)

dev_loss = model.compute_mean_loss(X_dev, Y_dev)
## DO NOT CHANGE THIS CELL ##
# Report your numbers, after computing dev_loss above.
from rnnlm import RNNLM
from better_rnnlm import BetterRNNLM
from datasets import ptb
from commons.util import eval_perplexity

if __name__ == '__main__':
    # select model for evaluation
    model = RNNLM()
    # model = BetterRNNLM()

    # read tunned params
    model.load_params()
    corpus, _, _ = ptb.load_data('test')

    model.reset_state()
    ppl_test = eval_perplexity(model, corpus)
    print('Test Perplexity:', ppl_test)