Exemple #1
0
def test(test_args):
    start = time.time()
    with open(os.path.join(test_args.save_dir, 'config.pkl')) as f:
        args = cPickle.load(f)
    data_loader = TextLoader(args, train=False)
    test_data = data_loader.read_dataset(test_args.test_file)

    args.word_vocab_size = data_loader.word_vocab_size
    print "Word vocab size: " + str(data_loader.word_vocab_size) + "\n"

    # Model
    lm_model = WordLM

    print "Begin testing..."
    # If using gpu:
    # gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.9)
    # gpu_config = tf.ConfigProto(log_device_placement=False, gpu_options=gpu_options)
    # add parameters to the tf session -> tf.Session(config=gpu_config)
    with tf.Graph().as_default(), tf.Session() as sess:
        initializer = tf.random_uniform_initializer(-args.init_scale, args.init_scale)
        with tf.variable_scope("model", reuse=None, initializer=initializer):
            mtest = lm_model(args, is_training=False, is_testing=True)

        # save only the last model
        saver = tf.train.Saver(tf.all_variables())
        tf.initialize_all_variables().run()
        ckpt = tf.train.get_checkpoint_state(args.save_dir)
        if ckpt and ckpt.model_checkpoint_path:
            saver.restore(sess, ckpt.model_checkpoint_path)

        test_perplexity = run_epoch(sess, mtest, test_data, data_loader, tf.no_op())
        print("Test Perplexity: %.3f" % test_perplexity)
        print("Test time: %.0f" % (time.time() - start))
def train(args):
    data_loader = TextLoader(args.data_dir, args.batch_size, args.seq_length)
    args.vocab_size = data_loader.vocab_size

    with open(os.path.join(args.save_dir, 'config.pkl'), 'w') as f:
        cPickle.dump(args, f)
    with open(os.path.join(args.save_dir, 'chars_vocab.pkl'), 'w') as f:
        cPickle.dump((data_loader.chars, data_loader.vocab), f)

    model = Model(args)

    with tf.Session() as sess:
        tf.initialize_all_variables().run()
        saver = tf.train.Saver(tf.all_variables())
        for e in xrange(args.num_epochs):
            sess.run(tf.assign(model.lr, args.learning_rate * (args.decay_rate ** e)))
            data_loader.reset_batch_pointer()
            state = model.initial_state.eval()
            for b in xrange(data_loader.num_batches):
                start = time.time()
                x, y = data_loader.next_batch()
                feed = {model.input_data: x, model.targets: y, model.initial_state: state}
                train_loss, state, _ = sess.run([model.cost, model.final_state, model.train_op], feed)
                end = time.time()
                print "{}/{} (epoch {}), train_loss = {:.3f}, time/batch = {:.3f}" \
                    .format(e * data_loader.num_batches + b,
                            args.num_epochs * data_loader.num_batches,
                            e, train_loss, end - start)
                if (e * data_loader.num_batches + b) % args.save_every == 0:
                    checkpoint_path = os.path.join(args.save_dir, 'model.ckpt')
                    saver.save(sess, checkpoint_path, global_step = e * data_loader.num_batches + b)
                    print "model saved to {}".format(checkpoint_path)
Exemple #3
0
def train(args):
    data_loader = TextLoader(args.data_dir, args.batch_size, args.seq_length)
    args.vocab_size = data_loader.vocab_size

    # check compatibility if training is continued from previously saved model
    if args.init_from is not None:
        # check if all necessary files exist
        assert os.path.isdir(args.init_from)," %s must be a a path" % args.init_from
        assert os.path.isfile(os.path.join(args.init_from,"config.pkl")),"config.pkl file does not exist in path %s"%args.init_from
        assert os.path.isfile(os.path.join(args.init_from,"chars_vocab.pkl")),"chars_vocab.pkl.pkl file does not exist in path %s" % args.init_from
        ckpt = tf.train.get_checkpoint_state(args.init_from)
        assert ckpt,"No checkpoint found"
        assert ckpt.model_checkpoint_path,"No model path found in checkpoint"

        # open old config and check if models are compatible
        with open(os.path.join(args.init_from, 'config.pkl')) as f:
            saved_model_args = cPickle.load(f)
        need_be_same=["model","rnn_size","num_layers","seq_length"]
        for checkme in need_be_same:
            assert vars(saved_model_args)[checkme]==vars(args)[checkme],"Command line argument and saved model disagree on '%s' "%checkme

        # open saved vocab/dict and check if vocabs/dicts are compatible
        with open(os.path.join(args.init_from, 'chars_vocab.pkl')) as f:
            saved_chars, saved_vocab = cPickle.load(f)
        assert saved_chars==data_loader.chars, "Data and loaded model disagreee on character set!"
        assert saved_vocab==data_loader.vocab, "Data and loaded model disagreee on dictionary mappings!"

    with open(os.path.join(args.save_dir, 'config.pkl'), 'wb') as f:
        cPickle.dump(args, f)
    with open(os.path.join(args.save_dir, 'chars_vocab.pkl'), 'wb') as f:
        cPickle.dump((data_loader.chars, data_loader.vocab), f)

    model = Model(args)

    with tf.Session() as sess:
        tf.initialize_all_variables().run()
        saver = tf.train.Saver(tf.all_variables())
        # restore model
        if args.init_from is not None:
            saver.restore(sess, ckpt.model_checkpoint_path)
        for e in range(args.num_epochs):
            sess.run(tf.assign(model.lr, args.learning_rate * (args.decay_rate ** e)))
            data_loader.reset_batch_pointer()
            state = model.initial_state.eval()
            for b in range(data_loader.num_batches):
                start = time.time()
                x, y = data_loader.next_batch()
                feed = {model.input_data: x, model.targets: y, model.initial_state: state}
                train_loss, state, _ = sess.run([model.cost, model.final_state, model.train_op], feed)
                end = time.time()
                print("{}/{} (epoch {}), train_loss = {:.3f}, time/batch = {:.3f}" \
                    .format(e * data_loader.num_batches + b,
                            args.num_epochs * data_loader.num_batches,
                            e, train_loss, end - start))
                if (e * data_loader.num_batches + b) % args.save_every == 0\
                    or (e==args.num_epochs-1 and b == data_loader.num_batches-1): # save for the last result
                    checkpoint_path = os.path.join(args.save_dir, 'model.ckpt')
                    saver.save(sess, checkpoint_path, global_step = e * data_loader.num_batches + b)
                    print("model saved to {}".format(checkpoint_path))
def train(args):
    data_loader = TextLoader(args.data_dir, args.batch_size, args.seq_length)
    args.vocab_size = data_loader.vocab_size

    with open(os.path.join(args.save_dir, 'config.pkl'), 'w') as f:
        cPickle.dump(args, f)
    with open(os.path.join(args.save_dir, 'chars_vocab.pkl'), 'w') as f:
        cPickle.dump((data_loader.chars, data_loader.vocab), f)

    model = Model(args)

    with tf.Session() as sess:
        tf.initialize_all_variables().run()
        saver = tf.train.Saver(tf.all_variables())
        train_loss_iterations = {'iteration': [], 'epoch': [], 'train_loss': [], 'val_loss': []}

        for e in xrange(args.num_epochs):
            sess.run(tf.assign(model.lr, args.learning_rate * (args.decay_rate ** e)))
            data_loader.reset_batch_pointer()
            state = model.initial_state.eval()
            for b in xrange(data_loader.num_batches):
                start = time.time()
                x, y = data_loader.next_batch()
                feed = {model.input_data: x, model.targets: y, model.initial_state: state}
                train_loss, state, _ = sess.run([model.cost, model.final_state, model.train_op], feed)
                end = time.time()
                batch_idx = e * data_loader.num_batches + b
                print "{}/{} (epoch {}), train_loss = {:.3f}, time/batch = {:.3f}" \
                    .format(batch_idx,
                            args.num_epochs * data_loader.num_batches,
                            e, train_loss, end - start)
                train_loss_iterations['iteration'].append(batch_idx)
                train_loss_iterations['epoch'].append(e)
                train_loss_iterations['train_loss'].append(train_loss)

                if batch_idx % args.save_every == 0:

                    # evaluate
                    state_val = model.initial_state.eval()
                    avg_val_loss = 0
                    for x_val, y_val in data_loader.val_batches:
                        feed_val = {model.input_data: x_val, model.targets: y_val, model.initial_state: state_val}
                        val_loss, state_val, _ = sess.run([model.cost, model.final_state, model.train_op], feed_val)
                        avg_val_loss += val_loss / len(data_loader.val_batches)
                    print 'val_loss: {:.3f}'.format(avg_val_loss)
                    train_loss_iterations['val_loss'].append(avg_val_loss)

                    checkpoint_path = os.path.join(args.save_dir, 'model.ckpt')
                    saver.save(sess, checkpoint_path, global_step=e * data_loader.num_batches + b)
                    print "model saved to {}".format(checkpoint_path)
                else:
                    train_loss_iterations['val_loss'].append(None)

            pd.DataFrame(data=train_loss_iterations,
                         columns=train_loss_iterations.keys()).to_csv(os.path.join(args.save_dir, 'log.csv'))
class TestUtilsMethods(unittest.TestCase):
    def setUp(self):
        self.data_loader = TextLoader("tests/test_data", batch_size=2, seq_length=5)

    def test_init(self):
      print (self.data_loader.vocab)
      print (self.data_loader.tensor)
      print (self.data_loader.vocab_size)

    def test_build_vocab(self):
        sentences = ["I", "love", "cat", "cat"]
        vocab, vocab_inv = self.data_loader.build_vocab(sentences)
        print (vocab, vocab_inv)

        # Must include I, love, and cat
        self.assertItemsEqual(vocab, ["I", "love", "cat"])
        self.assertDictEqual(vocab, {'I': 0, 'love': 2, 'cat': 1})

        self.assertItemsEqual(vocab_inv, ["I", "love", "cat"])

    def test_batch_vocab(self):
        print (np.array(self.data_loader.x_batches).shape)
        self.assertItemsEqual(self.data_loader.x_batches[0][0][1:],
                              self.data_loader.y_batches[0][0][:-1])
        self.assertItemsEqual(self.data_loader.x_batches[0][1][1:],
                              self.data_loader.y_batches[0][1][:-1])
Exemple #6
0
def train(args):
    # Load data
    data_loader = TextLoader(args.data_dir, args.batch_size, args.seq_length)
    # Set vocabulary size
    args.vocab_size = data_loader.vocab_size

    # Create the save directory if it does not exist
    if not os.path.exists(args.save_dir):
        os.makedirs(args.save_dir)

    # Save the configuration and the vocab, used to reload models when sampling
    with open(os.path.join(args.save_dir, 'config.pkl'), 'wb') as f:
        cPickle.dump(args, f)
    with open(os.path.join(args.save_dir, 'chars_vocab.pkl'), 'wb') as f:
        cPickle.dump((data_loader.chars, data_loader.vocab), f)

    # Create models with arguments
    model = Model(args)

    with tf.Session() as sess:
        tf.initialize_all_variables().run()
        saver = tf.train.Saver(tf.all_variables())
        for e in range(args.num_epochs):
            sess.run(tf.assign(model.lr, args.learning_rate * (args.decay_rate ** e)))
            data_loader.reset_batch_pointer()
            state = model.initial_state.eval()
            for b in range(data_loader.num_batches):
                start = time.time()
                x, y = data_loader.next_batch()
                feed = {model.input_data: x, model.targets: y, model.initial_state: state}
                train_loss, state, _ = sess.run([model.cost, model.final_state, model.train_op], feed)
                end = time.time()
                print("{}/{} (epoch {}), train_loss = {:.3f}, time/batch = {:.3f}"
                      .format(e * data_loader.num_batches + b,
                              args.num_epochs * data_loader.num_batches,
                              e, train_loss, end - start))
                if (e * data_loader.num_batches + b) % args.save_every == 0:
                    checkpoint_path = os.path.join(args.save_dir, 'models.ckpt')
                    saver.save(sess, checkpoint_path, global_step=e * data_loader.num_batches + b)
                    print("models saved to {}".format(checkpoint_path))
        # Save the final state
        saver.save(sess, os.path.join(args.save_dir, 'models.ckpt'),
                   global_step=args.num_epochs * data_loader.num_batches)
Exemple #7
0
def train(args):
    print(args)
    data_loader = TextLoader(args.data_dir, args.batch_size, args.seq_length)
    args.vocab_size = data_loader.vocab_size

    with open(os.path.join(args.save_dir, 'config.pkl'), 'wb') as f:
        cPickle.dump(args, f)
    with open(os.path.join(args.save_dir, 'chars_vocab.pkl'), 'wb') as f:
        cPickle.dump((data_loader.chars, data_loader.vocab), f)

    model = Model(args)

    with tf.Session() as sess:
        tf.initialize_all_variables().run()
        saver = tf.train.Saver(tf.all_variables())
        for e in range(args.num_epochs):
            sess.run(tf.assign(model.lr, args.learning_rate * (args.decay_rate ** e)))
            #print("model learning rate is {}".format(model.lr.eval()))
            data_loader.reset_batch_pointer('train')

            state = model.initial_state.eval()
            for b in xrange(data_loader.ntrain):
                start = time.time()
                x, y = data_loader.next_batch('train')

                # tmp = ''
                # for c in x:
                #   for i in c:
                #     tmp += np.array(data_loader.chars)[i]
                # print(tmp)

                feed = {model.input_data: x, model.targets: y, model.initial_state: state}
                train_loss, state, _ = sess.run([model.cost, model.final_state, model.train_op], feed)
                end = time.time()
                print("{}/{} (epoch {}), train_loss = {:.3f}, time/batch = {:.3f}" \
                    .format(e * data_loader.ntrain + b,
                            args.num_epochs * data_loader.ntrain,
                            e, train_loss, end - start))
                if (e * data_loader.ntrain + b) % args.save_every == 0:
                    checkpoint_path = os.path.join(args.save_dir, 'model.ckpt')
                    saver.save(sess, checkpoint_path, global_step = e * data_loader.ntrain + b)
                    print("model saved to {}".format(checkpoint_path))


            # eval validation loss
            data_loader.reset_batch_pointer('validation')
            validation_state = model.initial_state.eval()
            val_losses = 0
            for n in xrange(data_loader.nvalidation):
                x, y = data_loader.next_batch('validation')
                feed = {model.input_data: x, model.targets: y, model.initial_state: validation_state}
                validation_loss, validation_state = sess.run([model.cost, model.final_state], feed)
                val_losses += validation_loss

            validation_loss = val_losses / data_loader.nvalidation
            print("validation loss is {}".format(validation_loss))
Exemple #8
0
def main(_):
    pp.pprint(FLAGS.__flags)
    emb = None

    try:
        # pre-trained chars embedding
        emb = np.load("./data/emb.npy")
        chars = cPickle.load(open("./data/vocab.pkl", 'rb'))
        vocab_size, emb_size = np.shape(emb)
        data_loader = TextLoader('./data', FLAGS.batch_size, chars)
    except Exception:
        data_loader = TextLoader('./data', FLAGS.batch_size)
        emb_size = FLAGS.emb_size
        vocab_size = data_loader.vocab_size

    model = DialogueModel(batch_size=FLAGS.batch_size,
                          max_seq_length=data_loader.seq_length,
                          vocab_size=vocab_size,
                          pad_token_id=0,
                          unk_token_id=UNK_ID,
                          emb_size=emb_size,
                          memory_size=FLAGS.memory_size,
                          keep_prob=FLAGS.keep_prob,
                          learning_rate=FLAGS.learning_rate,
                          grad_clip=FLAGS.grad_clip,
                          temperature=FLAGS.temperature,
                          infer=False)

    summaries = tf.summary.merge_all()

    init = tf.global_variables_initializer()

    # save hyper-parameters
    cPickle.dump(FLAGS.__flags, open(FLAGS.logdir + "/hyperparams.pkl", 'wb'))

    checkpoint = FLAGS.checkpoint + '/model.ckpt'
    count = 0

    saver = tf.train.Saver()

    with tf.Session() as sess:
        summary_writer = tf.summary.FileWriter(FLAGS.logdir, sess.graph)

        sess.run(init)

        if len(glob(checkpoint + "*")) > 0:
            saver.restore(sess, checkpoint)
            print("Model restored!")
        else:
            # load embedding
            if emb is not None:
                sess.run([], {model.embedding: emb})
            print("Fresh variables!")

        current_step = 0
        count = 0

        for e in range(FLAGS.num_epochs):
            data_loader.reset_batch_pointer()
            state = None

            # iterate by batch
            for _ in range(data_loader.num_batches):
                x, y, input_lengths, output_lengths = data_loader.next_batch()

                if (current_step + 1) % 10 != 0:
                    res = model.step(sess, x, y, input_lengths, output_lengths,
                                     state)
                else:
                    res = model.step(sess, x, y, input_lengths, output_lengths,
                                     state, summaries)
                    summary_writer.add_summary(res["summary_out"],
                                               current_step)
                    loss = res["loss"]
                    perplexity = np.exp(loss)
                    count += 1
                    print("{0}/{1}({2}), perplexity {3}".format(
                        current_step + 1,
                        FLAGS.num_epochs * data_loader.num_batches, e,
                        perplexity))
                state = res["final_state"]

                if (current_step + 1) % 2000 == 0:
                    count = 0
                    summary_writer.flush()
                    save_path = saver.save(sess, checkpoint)
                    print("Model saved in file:", save_path)

                current_step = tf.train.global_step(sess, model.global_step)

        summary_writer.close()
        save_path = saver.save(sess, checkpoint)
        print("Model saved in file:", save_path)
Exemple #9
0
def main():
    args = parse_args()
    loader = TextLoader(args.data_dir, args.batch_size, args.seq_length)
    args.vocab_size = loader.vocab_size
    print("vocab_size = {}".format(args.vocab_size))

    if args.init_from is not None:
        if os.path.isdir(args.init_from):  # init from directory
            assert os.path.exists(args.init_from), \
            "{} is not a directory".format(args.init_from)
            parent_dir = args.init_from
        else:  # init from file
            assert os.path.exists("{}.index".format(args.init_from)), \
            "{} is not a checkpoint".format(args.init_from)
            parent_dir = os.path.dirname(args.init_from)

        config_file = os.path.join(parent_dir, 'config.pkl')
        vocab_file = os.path.join(parent_dir, 'vocab.pkl')

        assert os.path.isfile(config_file), \
        "config.pkl does not exist in directory {}".format(parent_dir)
        assert os.path.isfile(vocab_file), \
        "vocab.pkl does not exist in directory {}".format(parent_dir)

        if os.path.isdir(args.init_from):
            checkpoint = tf.train.latest_checkpoint(parent_dir)
            assert checkpoint, \
            "no checkpoint in directory {}".format(init_from)
        else:
            checkpoint = args.init_from

        with open(os.path.join(parent_dir, 'config.pkl'), 'rb') as f:
            saved_args = pickle.load(f)
        with open(os.path.join(parent_dir, 'vocab.pkl'), 'rb') as f:
            saved_vocab = pickle.load(f)
        assert saved_vocab == loader.vocab, \
        "vocab in data directory differs from save"

    if not os.path.exists(args.save_dir):
        os.makedirs(args.save_dir)

    new_config_file = os.path.join(args.save_dir, 'config.pkl')
    new_vocab_file = os.path.join(args.save_dir, 'vocab.pkl')

    if not os.path.exists(new_config_file):
        with open(new_config_file, 'wb') as f:
            pickle.dump(args, f)
    if not os.path.exists(new_vocab_file):
        with open(new_vocab_file, 'wb') as f:
            pickle.dump(loader.vocab, f)

    model = Model(args)

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        saver = tf.train.Saver(tf.global_variables())

        if args.init_from is not None:
            try:
                saver.restore(sess, checkpoint)
            except ValueError:
                print("{} is not a valid checkpoint".format(checkpoint))
            print("initializing from {}".format(checkpoint))

        for e in range(args.num_epochs):
            loader.reset_batch_pointer()
            for b in range(loader.num_batches):

                start = time.time()

                x, _, length = loader.next_batch()

                # Train critic
                for i in xrange(
                        1
                ):  # How many critic iterations per generator iteration.
                    disc_feed = {model.real_inputs_discrete: x}
                    disc_cost, _ = sess.run(
                        [model.disc_cost, model.disc_train_op], disc_feed)

                # Train generator
                gen_cost, _ = sess.run([model.gen_cost, model.gen_train_op])

                end = time.time()

                global_step = e * loader.num_batches + b

                if global_step % args.display_every == 0 and global_step != 0:
                    print("{}/{} (epoch {}), gen_cost = {:.3f}, disc_cost = {:.3f}, time/batch = {:.3f}" \
                    .format(b, loader.num_batches, e, gen_cost, disc_cost, end - start))

                if global_step % args.save_every == 0 and global_step != 0:
                    checkpoint_path = os.path.join(args.save_dir, 'model.ckpt')
                    saver.save(sess, checkpoint_path, global_step=global_step)
                    print("model saved to {}".format(checkpoint_path))
Exemple #10
0
def train(args):
    data_loader = TextLoader(args.data_dir, args.batch_size, args.seq_length)
    args.vocab_size = data_loader.vocab_size

    with open(os.path.join(args.save_dir, 'config.pkl'), 'wb') as f:
        pickle.dump(args, f)
    with open(os.path.join(args.save_dir, 'chars_vocab.pkl'), 'wb') as f:
        pickle.dump((data_loader.chars, data_loader.vocab), f)

    model = Model(args)

    with tf.Session() as sess:
        tf.global_variables_initializer().run()
        saver = tf.train.Saver(tf.global_variables())
        train_loss_iterations = {'iteration': [], 'epoch': [], 'train_loss': [], 'val_loss': []}


        #Epoch Loop
        for e in range(args.num_epochs):
            sess.run(tf.assign(model.lr, args.learning_rate * (args.decay_rate ** e)))
            data_loader.reset_batch_pointer()
            state = sess.run(model.initial_state)

            #Mini Batch Training
            for b in range(data_loader.num_batches):
                start = time.time()
                x, y = data_loader.next_batch()
                feed = {model.input_data: x, model.targets: y, model.initial_state: state}
                train_loss, state, _ = sess.run([model.cost, model.final_state, model.train_op], feed)
                end = time.time()
                batch_idx = e * data_loader.num_batches + b
                print("{}/{} (epoch {}), train_loss = {:.3f}, time/batch = {:.3f}" \
                      .format(batch_idx,
                              args.num_epochs * data_loader.num_batches,
                              e, train_loss, end - start))
                train_loss_iterations['iteration'].append(batch_idx)
                train_loss_iterations['epoch'].append(e)
                train_loss_iterations['train_loss'].append(train_loss)



                #Check point saving
                if batch_idx % args.save_every == 0:

                    # evaluate the batchs in TF
                    state_val = sess.run(model.initial_state)
                    avg_val_loss = 0
                    for x_val, y_val in data_loader.val_batches:
                        feed_val = {model.input_data: x_val, model.targets: y_val, model.initial_state: state_val}
                        val_loss, state_val, _ = sess.run([model.cost, model.final_state, model.train_op], feed_val)
                        avg_val_loss += val_loss / len(list(data_loader.val_batches))
                    print('val_loss: {:.3f}'.format(avg_val_loss))
                    train_loss_iterations['val_loss'].append(avg_val_loss)

                    checkpoint_path = os.path.join(args.save_dir, 'model.ckpt')
                    saver.save(sess, checkpoint_path, global_step=e * data_loader.num_batches + b)
                    print("model saved to {}".format(checkpoint_path))
                else:
                    train_loss_iterations['val_loss'].append(None)


            #Save in Pandas
            pd.DataFrame(data=train_loss_iterations,
                         columns=train_loss_iterations.keys()).to_csv(os.path.join(args.save_dir, 'log.csv'))
Exemple #11
0
def train(args):
    # Create the data_loader object, which loads up all of our batches, vocab dictionary, etc.
    # from utils.py (and creates them if they don't already exist).
    # These files go in the data directory.
    data_loader = TextLoader(args.data_dir, args.batch_size, args.seq_length)
    args.vocab_size = data_loader.vocab_size

    load_model = False
    if not os.path.exists(args.save_dir):
        print("Creating directory %s" % args.save_dir)
        os.mkdir(args.save_dir)
    elif (os.path.exists(os.path.join(args.save_dir, 'config.pkl'))):
        # Trained model already exists
        ckpt = tf.train.get_checkpoint_state(args.save_dir)
        if ckpt and ckpt.model_checkpoint_path:
            with open(os.path.join(args.save_dir, 'config.pkl'), 'rb') as f:
                saved_args = pickle.load(f)
                args.block_size = saved_args.block_size
                args.num_blocks = saved_args.num_blocks
                args.num_layers = saved_args.num_layers
                args.model = saved_args.model
                print(
                    "Found a previous checkpoint. Overwriting model description arguments to:"
                )
                print(
                    " model: {}, block_size: {}, num_blocks: {}, num_layers: {}"
                    .format(saved_args.model, saved_args.block_size,
                            saved_args.num_blocks, saved_args.num_layers))
                load_model = True

    # Save all arguments to config.pkl in the save directory -- NOT the data directory.
    with open(os.path.join(args.save_dir, 'config.pkl'), 'wb') as f:
        pickle.dump(args, f)
    # Save a tuple of the characters list and the vocab dictionary to chars_vocab.pkl in
    # the save directory -- NOT the data directory.
    with open(os.path.join(args.save_dir, 'chars_vocab.pkl'), 'wb') as f:
        pickle.dump((data_loader.chars, data_loader.vocab), f)

    # Create the model!
    print("Building the model")
    model = Model(args)
    print("Total trainable parameters: {:,d}".format(
        model.trainable_parameter_count()))

    # Make tensorflow less verbose; filter out info (1+) and warnings (2+) but not errors (3).
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

    config = tf.ConfigProto(log_device_placement=False)
    #config.gpu_options.allow_growth = True
    with tf.Session(config=config) as sess:
        tf.global_variables_initializer().run()
        saver = tf.train.Saver(model.save_variables_list(), max_to_keep=3)
        if (load_model):
            print("Loading saved parameters")
            saver.restore(sess, ckpt.model_checkpoint_path)
        global_epoch_fraction = sess.run(model.global_epoch_fraction)
        global_seconds_elapsed = sess.run(model.global_seconds_elapsed)
        if load_model:
            print(
                "Resuming from global epoch fraction {:.3f},"
                " total trained time: {}, learning rate: {}".format(
                    global_epoch_fraction,
                    datetime.timedelta(seconds=float(global_seconds_elapsed)),
                    sess.run(model.lr)))
        if (args.set_learning_rate > 0):
            sess.run(tf.assign(model.lr, args.set_learning_rate))
            print("Reset learning rate to {}".format(args.set_learning_rate))
        data_loader.cue_batch_pointer_to_epoch_fraction(global_epoch_fraction)
        initial_batch_step = int(
            (global_epoch_fraction - int(global_epoch_fraction)) *
            data_loader.total_batch_count)
        epoch_range = (int(global_epoch_fraction),
                       args.num_epochs + int(global_epoch_fraction))
        writer = tf.summary.FileWriter(args.save_dir,
                                       graph=tf.get_default_graph())
        outputs = [
            model.cost, model.final_state, model.train_op, model.summary_op
        ]
        global_step = epoch_range[
            0] * data_loader.total_batch_count + initial_batch_step
        avg_loss = 0
        avg_steps = 0
        try:
            for e in range(*epoch_range):
                # e iterates through the training epochs.
                # Reset the model state, so it does not carry over from the end of the previous epoch.
                state = sess.run(model.zero_state)
                batch_range = (initial_batch_step,
                               data_loader.total_batch_count)
                initial_batch_step = 0
                for b in range(*batch_range):
                    global_step += 1
                    if global_step % args.decay_steps == 0:
                        # Set the model.lr element of the model to track
                        # the appropriately decayed learning rate.
                        current_learning_rate = sess.run(model.lr)
                        current_learning_rate *= args.decay_rate
                        sess.run(tf.assign(model.lr, current_learning_rate))
                        print("Decayed learning rate to {}".format(
                            current_learning_rate))
                    start = time.time()
                    # Pull the next batch inputs (x) and targets (y) from the data loader.
                    x, y = data_loader.next_batch()

                    # feed is a dictionary of variable references and respective values for initialization.
                    # Initialize the model's input data and target data from the batch,
                    # and initialize the model state to the final state from the previous batch, so that
                    # model state is accumulated and carried over between batches.
                    feed = {model.input_data: x, model.targets: y}
                    model.add_state_to_feed_dict(feed, state)

                    # Run the session! Specifically, tell TensorFlow to compute the graph to calculate
                    # the values of cost, final state, and the training op.
                    # Cost is used to monitor progress.
                    # Final state is used to carry over the state into the next batch.
                    # Training op is not used, but we want it to be calculated, since that calculation
                    # is what updates parameter states (i.e. that is where the training happens).
                    train_loss, state, _, summary = sess.run(outputs, feed)
                    elapsed = time.time() - start
                    global_seconds_elapsed += elapsed
                    writer.add_summary(summary, e * batch_range[1] + b + 1)
                    if avg_steps < 100: avg_steps += 1
                    avg_loss = 1 / avg_steps * train_loss + (
                        1 - 1 / avg_steps) * avg_loss
                    print("{:,d} / {:,d} (epoch {:.3f} / {}), loss {:.3f} (avg {:.3f}), {:.3f}s" \
                        .format(b, batch_range[1], e + b / batch_range[1], epoch_range[1],
                            train_loss, avg_loss, elapsed))
                    # Every save_every batches, save the model to disk.
                    # By default, only the five most recent checkpoint files are kept.
                    if (e * batch_range[1] + b + 1) % args.save_every == 0 \
                            or (e == epoch_range[1] - 1 and b == batch_range[1] - 1):
                        save_model(sess, saver, model, args.save_dir,
                                   global_step, data_loader.total_batch_count,
                                   global_seconds_elapsed)
        except KeyboardInterrupt:
            # Introduce a line break after ^C is displayed so save message
            # is on its own line.
            print()
        finally:
            writer.flush()
            global_step = e * data_loader.total_batch_count + b
            save_model(sess, saver, model, args.save_dir, global_step,
                       data_loader.total_batch_count, global_seconds_elapsed)
def train2(args):
    data_loader = TextLoader(args.data_dir, args.batch_size, args.seq_length, args.reprocess)
    args.vocab_size = data_loader.vocab_size

    totalTask = args.num_epochs * data_loader.num_batches

    lastCheckpoint = tf.train.latest_checkpoint(args.save_dir) 
    if lastCheckpoint is None:
        startEpoch = 0
    else:
        print "Last checkpoint :", lastCheckpoint
        startEpoch = int(lastCheckpoint.split("-")[-1])

    print "startEpoch = ", startEpoch

    with open(os.path.join(args.save_dir, 'config.pkl'), 'w') as f:
        cPickle.dump(args, f)
    with open(os.path.join(args.save_dir, 'chars_vocab.pkl'), 'w') as f:
        cPickle.dump((data_loader.chars, data_loader.vocab), f)

    model = ConstrainedModel(args)

    etaCount = 0
    etaString = "-" 
    etaStart = time.time()
    etaTime = 0

    with tf.Session() as sess:
        tf.initialize_all_variables().run()
        saver = tf.train.Saver(tf.all_variables())
        if startEpoch > 0: # load latest checkpoint
            print "Loading last checkpoint"
            saver.restore(sess, lastCheckpoint)

        for e in xrange(startEpoch, args.num_epochs):
            sess.run(tf.assign(model.lr, decayForEpoch(args, e)))
            data_loader.reset_batch_pointer()
            state = model.initial_state.eval()
            for b in xrange(data_loader.num_batches):
                start = time.time()
                x, y, con = data_loader.next_batch()

                feed = {model.input_data: x, model.targets: y, model.initial_state: state, model.con_data:con}
                train_loss, state, _ = sess.run([model.cost, model.final_state, model.train_op], feed)
                #time.sleep(0.01)
                #train_loss = 5
                end = time.time()

                taskNum = (e * data_loader.num_batches + b)
                etaCount += 1
                if (etaCount) % 25 == 0:
                    duration = time.time() - etaStart
                    etaTime = (totalTask - (taskNum + 1)) / 25 * duration
                    m, s = divmod(etaTime, 60)
                    h, m = divmod(m, 60)
                    etaString = "%d:%02d:%02d" % (h, m, s)
                    etaStart = time.time()

                print "{}/{} (epoch {}), loss = {:.3f}, time/batch = {:.3f}, ETA: {} ({})" \
                    .format(taskNum, totalTask, e, train_loss, end - start, time.ctime(time.time()+etaTime), etaString)

            if (e + 1) % args.save_every == 0 or e == args.num_epochs - 1:
                checkpoint_path = os.path.join(args.save_dir, 'model.ckpt')
                saver.save(sess, checkpoint_path, global_step = e + 1)
                print "model saved to {}".format(checkpoint_path)
Exemple #13
0
def train(args):

    data_loader = TextLoader(args.data_path, args.batch_size, args.seq_length)
    args.vocab_size = data_loader.vocab_size
    args.file_size = data_loader.file_size
    print("Vocab size: ",args.vocab_size)
    print("File size: ",args.file_size)
    args.lower_bound = 0 #If we know the entropy then we set it to this
    data_info = {}
    if args.info_path is not None:
        assert os.path.isfile(args.info_path),"Info file not found in the path: %s"%args.info_path

        #Open the info file
        with open(args.info_path, 'rb') as f:
            data_info = json.load(f)
            #Assuming we know entropy
            args.lower_bound = data_info['Entropy']
            print(data_info)

    # check compatibility if training is continued from previously saved model
    if args.init_from is not None:
        # check if all necessary files exist 
        assert os.path.isdir(args.init_from)," %s must be a a path" % args.init_from
        assert os.path.isfile(os.path.join(args.init_from,"config.pkl")),"config.pkl file does not exist in path %s"%args.init_from
        assert os.path.isfile(os.path.join(args.init_from,"chars_vocab.pkl")),"chars_vocab.pkl.pkl file does not exist in path %s" % args.init_from
        ckpt = tf.train.get_checkpoint_state(args.init_from)
        assert ckpt,"No checkpoint found"
        assert ckpt.model_checkpoint_path,"No model path found in checkpoint"

        # open old config and check if models are compatible
        with open(os.path.join(args.init_from, 'config.pkl'), 'rb') as f:
            saved_model_args = cPickle.load(f)
        need_be_same=["model","rnn_size","num_layers","seq_length"]
        for checkme in need_be_same:
            assert vars(saved_model_args)[checkme]==vars(args)[checkme],"Command line argument and saved model disagree on '%s' "%checkme
        
        # open saved vocab/dict and check if vocabs/dicts are compatible
        with open(os.path.join(args.init_from, 'chars_vocab.pkl'), 'rb') as f:
            saved_chars, saved_vocab = cPickle.load(f)
        assert saved_chars==data_loader.chars, "Data and loaded model disagree on character set!"
        assert saved_vocab==data_loader.vocab, "Data and loaded model disagree on dictionary mappings!"
        
    with open(os.path.join(args.save_dir, 'config.pkl'), 'wb') as f:
        cPickle.dump(args, f)
    with open(os.path.join(args.save_dir, 'chars_vocab.pkl'), 'wb') as f:
        cPickle.dump((data_loader.chars, data_loader.vocab), f)
        
    
    ##################################################
    # Get the model
    ##################################################
    model = Model(args)
    print("model Loaded")

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        saver = tf.train.Saver(tf.global_variables())
        writer = tf.summary.FileWriter(args.summary_dir,sess.graph)
        # restore model
        if args.init_from is not None:
            saver.restore(sess, ckpt.model_checkpoint_path)
        
        ######################################################
        # Perform the training
        #####################################################
        for e in range(args.num_epochs):
            sess.run(tf.assign(model.lr, args.learning_rate * (args.decay_rate ** e)))
            data_loader.reset_batch_pointer() #Need to check what this does
            state = sess.run(model.initial_state) #What is this initial state
            cumul_loss = 0
             
            for b in range(data_loader.num_batches):
                start = time.time()
                x, y = data_loader.next_batch()
                feed = {model.input_data: x, model.targets: y}
                
                for i, (c, h) in enumerate(model.initial_state):
                    feed[c] = state[i].c
                    feed[h] = state[i].h
                summary, train_loss, state, _ = sess.run([model.merged_summaries, model.cost, model.final_state, model.train_op], feed) #what is the training loss
                train_loss /= np.log(2)
                cumul_loss += train_loss
                end = time.time()
                print("{}/{} (epoch {}), train_loss = {:.3f}, time/batch = {:.3f}" \
                    .format(e * data_loader.num_batches + b,
                            args.num_epochs * data_loader.num_batches,
                            e, train_loss, end - start))
                if (e * data_loader.num_batches + b) % args.save_every == 0\
                    or (e==args.num_epochs-1 and b == data_loader.num_batches-1): # save for the last result
                    checkpoint_path = os.path.join(args.save_dir, 'model.ckpt')
                    saver.save(sess, checkpoint_path, global_step = e * data_loader.num_batches + b)
                    print("model saved to {}".format(checkpoint_path))

                if b%10 == 0:
                    writer.add_summary(summary,e*data_loader.num_batches + b)
             
            cumul_loss /= data_loader.num_batches
            print("Epoch {}: Cumulative Loss for the epoch: {:.3f}".format(e,cumul_loss))
            if (abs(cumul_loss - args.lower_bound) < 0.1):
                print("Stopping Training as we get a good loss.. :) ... ") 
                break    

        ##############################################################
        # Append details to the output file
        ##############################################################
        args.epoch_stopped=e+1
        args.last_epoch_loss = cumul_loss
        with open(args.output_path, 'a') as f:

            params = vars(args)
            params.update(data_info)
            #json.dump(params, f,indent=2)
            cPickle.dump(params,f)
            #f.write("\n ############################################# \n")

        with open(args.output_path+".json", 'a') as f:

            params = vars(args)
            params.update(data_info)
            json.dump(params, f,indent=2)
            #cPickle.dump(params)
            f.write("\n ############################################# \n")
def cross_validation(args):
    data_loader = TextLoader(args.utils_dir, args.data_path, args.batch_size, args.seq_length, None, None)
    args.vocab_size = data_loader.vocab_size
    args.label_size = data_loader.label_size

    with open(os.path.join(args.save_dir, 'config.pkl'), 'wb') as f:
        pickle.dump(args, f)
    with open(os.path.join(args.save_dir, 'chars_vocab.pkl'), 'wb') as f:
        pickle.dump((data_loader.chars, data_loader.vocab), f)
    with open(os.path.join(args.save_dir, 'labels.pkl'), 'wb') as f:
        pickle.dump(data_loader.labels, f)

    data = data_loader.tensor.copy()
    np.random.shuffle(data)
    data_list = np.array_split(data, 10, axis=0)

    model = Model(args)
    accuracy_list = []

    with tf.Session() as sess:
        for n in range(10):
            init = tf.initialize_all_variables()
            sess.run(init)
            saver = tf.train.Saver(tf.all_variables())

            test_data = data_list[n].copy()
            train_data = np.concatenate(map(lambda i: data_list[i], [j for j in range(10) if j!=n]), axis=0)
            data_loader.tensor = train_data

            for e in range(args.num_epochs):
                sess.run(tf.assign(model.lr, args.learning_rate * (args.decay_rate ** e)))
                data_loader.reset_batch_pointer()

                for b in range(data_loader.num_batches):
                    start = time.time()
                    state = model.initial_state.eval()
                    x, y = data_loader.next_batch()
                    feed = {model.input_data: x, model.targets: y, model.initial_state: state}
                    train_loss, state, _, accuracy = sess.run([model.cost, model.final_state, model.optimizer, model.accuracy], feed_dict=feed)
                    end = time.time()
                    print '{}/{} (epoch {}), train_loss = {:.3f}, accuracy = {:.3f}, time/batch = {:.3f}'\
                        .format(e * data_loader.num_batches + b + 1,
                                args.num_epochs * data_loader.num_batches,
                                e + 1,
                                train_loss,
                                accuracy,
                                end - start)
                    if (e*data_loader.num_batches+b+1) % args.save_every == 0 \
                        or (e==args.num_epochs-1 and b==data_loader.num_batches-1):
                        checkpoint_path = os.path.join(args.save_dir, 'model.ckpt')
                        saver.save(sess, checkpoint_path, global_step=e*data_loader.num_batches+b+1)
                        print 'model saved to {}'.format(checkpoint_path)

            n_chunks = len(test_data) / args.batch_size
            if len(test_data) % args.batch_size:
                n_chunks += 1
            test_data_list = np.array_split(test_data, n_chunks, axis=0)

            correct_total = 0.0
            num_total = 0.0
            for m in range(n_chunks):
                start = time.time()
                x = test_data_list[m][:, :-1]
                y = test_data_list[m][:, -1]
                results = model.predict_class(sess, x)
                correct_num = np.sum(results==y)
                end = time.time()

                correct_total += correct_num
                num_total += len(x)

            accuracy_total = correct_total / num_total
            accuracy_list.append(accuracy_total)
            print 'total_num = {}, total_accuracy = {:.6f}'.format(int(num_total), accuracy_total)

    accuracy_average = np.average(accuracy_list)
    print 'The average accuracy of cross_validation is {}'.format(accuracy_average)
Exemple #15
0
def train(args):
    # Create the data_loader object, which loads up all of our batches, vocab dictionary, etc.
    # from utils.py (and creates them if they don't already exist).
    # These files go in the data directory.
    data_loader = TextLoader(args.data_dir, args.batch_size, args.seq_length)
    args.vocab_size = data_loader.vocab_size

    load_model = False
    if not os.path.exists(args.save_dir):
        print("Creating directory %s" % args.save_dir)
        os.mkdir(args.save_dir)
    elif (os.path.exists(os.path.join(args.save_dir, 'config.pkl'))):
        # Trained model already exists
        ckpt = tf.train.get_checkpoint_state(args.save_dir)
        if ckpt and ckpt.model_checkpoint_path:
            with open(os.path.join(args.save_dir, 'config.pkl')) as f:
                saved_args = cPickle.load(f)
                args.rnn_size = saved_args.rnn_size
                args.num_layers = saved_args.num_layers
                args.model = saved_args.model
                print("Found a previous checkpoint. Overwriting model description arguments to:")
                print(" model: {}, rnn_size: {}, num_layers: {}".format(
                    saved_args.model, saved_args.rnn_size, saved_args.num_layers))
                load_model = True

    # Save all arguments to config.pkl in the save directory -- NOT the data directory.
    with open(os.path.join(args.save_dir, 'config.pkl'), 'w') as f:
        cPickle.dump(args, f)
    # Save a tuple of the characters list and the vocab dictionary to chars_vocab.pkl in
    # the save directory -- NOT the data directory.
    with open(os.path.join(args.save_dir, 'chars_vocab.pkl'), 'w') as f:
        cPickle.dump((data_loader.chars, data_loader.vocab), f)

    # Create the model!
    print("Building the model")
    model = Model(args)

    config = tf.ConfigProto(log_device_placement=False)
    config.gpu_options.allow_growth = True
    with tf.Session(config=config) as sess:
        tf.initialize_all_variables().run()
        saver = tf.train.Saver(model.save_variables_list())
        if (load_model):
            print("Loading saved parameters")
            saver.restore(sess, ckpt.model_checkpoint_path)
        global_epoch_fraction = sess.run(model.global_epoch_fraction)
        global_seconds_elapsed = sess.run(model.global_seconds_elapsed)
        if load_model: print("Resuming from global epoch fraction {:.3f},"
                " total trained time: {}, learning rate: {}".format(
                global_epoch_fraction, global_seconds_elapsed, sess.run(model.lr)))
        data_loader.cue_batch_pointer_to_epoch_fraction(global_epoch_fraction)
        initial_batch_step = int((global_epoch_fraction
                - int(global_epoch_fraction)) * data_loader.total_batch_count)
        epoch_range = (int(global_epoch_fraction),
                args.num_epochs + int(global_epoch_fraction))
        writer = tf.train.SummaryWriter(args.save_dir, graph=tf.get_default_graph())
        outputs = [model.cost, model.final_state, model.train_op, model.summary_op]
        is_lstm = args.model == 'lstm'
        global_step = epoch_range[0] * data_loader.total_batch_count + initial_batch_step
        try:
            for e in xrange(*epoch_range):
                # e iterates through the training epochs.
                # Reset the model state, so it does not carry over from the end of the previous epoch.
                state = sess.run(model.initial_state)
                batch_range = (initial_batch_step, data_loader.total_batch_count)
                initial_batch_step = 0
                for b in xrange(*batch_range):
                    global_step += 1
                    if global_step % args.decay_steps == 0:
                        # Set the model.lr element of the model to track
                        # the appropriately decayed learning rate.
                        current_learning_rate = sess.run(model.lr)
                        current_learning_rate *= args.decay_rate
                        sess.run(tf.assign(model.lr, current_learning_rate))
                        print("Decayed learning rate to {}".format(current_learning_rate))
                    start = time.time()
                    # Pull the next batch inputs (x) and targets (y) from the data loader.
                    x, y = data_loader.next_batch()

                    # feed is a dictionary of variable references and respective values for initialization.
                    # Initialize the model's input data and target data from the batch,
                    # and initialize the model state to the final state from the previous batch, so that
                    # model state is accumulated and carried over between batches.
                    feed = {model.input_data: x, model.targets: y}
                    if is_lstm:
                        for i, (c, h) in enumerate(model.initial_state):
                            feed[c] = state[i].c
                            feed[h] = state[i].h
                    else:
                        for i, c in enumerate(model.initial_state):
                            feed[c] = state[i]
                    # Run the session! Specifically, tell TensorFlow to compute the graph to calculate
                    # the values of cost, final state, and the training op.
                    # Cost is used to monitor progress.
                    # Final state is used to carry over the state into the next batch.
                    # Training op is not used, but we want it to be calculated, since that calculation
                    # is what updates parameter states (i.e. that is where the training happens).
                    train_loss, state, _, summary = sess.run(outputs, feed)
                    elapsed = time.time() - start
                    global_seconds_elapsed += elapsed
                    writer.add_summary(summary, e * batch_range[1] + b + 1)
                    print "{}/{} (epoch {}/{}), loss = {:.3f}, time/batch = {:.3f}s" \
                        .format(b, batch_range[1], e, epoch_range[1], train_loss, elapsed)
                    # Every save_every batches, save the model to disk.
                    # By default, only the five most recent checkpoint files are kept.
                    if (e * batch_range[1] + b + 1) % args.save_every == 0 \
                            or (e == epoch_range[1] - 1 and b == batch_range[1] - 1):
                        save_model(sess, saver, model, args.save_dir, global_step,
                                data_loader.total_batch_count, global_seconds_elapsed)
        except KeyboardInterrupt:
            # Introduce a line break after ^C is displayed so save message
            # is on its own line.
            print()
        finally:
            writer.flush()
            global_step = e * data_loader.total_batch_count + b
            save_model(sess, saver, model, args.save_dir, global_step,
                    data_loader.total_batch_count, global_seconds_elapsed)
def train(args):
    if args.continue_training in ['True', 'true']:
        args.continue_training = True
    else:
        args.continue_training = False

    data_loader = TextLoader(True, args.utils_dir, args.data_path, args.batch_size, args.seq_length, None, None)
    args.vocab_size = data_loader.vocab_size
    args.label_size = data_loader.label_size

    if args.continue_training:
        assert os.path.isfile(os.path.join(args.save_dir, 'config.pkl')), 'config.pkl file does not exist in path %s' % args.save_dir
        assert os.path.isfile(os.path.join(args.utils_dir, 'chars_vocab.pkl')), 'chars_vocab.pkl file does not exist in path %s' % args.utils_dir
        assert os.path.isfile(os.path.join(args.utils_dir, 'labels.pkl')), 'labels.pkl file does not exist in path %s' % args.utils_dir
        ckpt = tf.train.get_checkpoint_state(args.save_dir)
        assert ckpt, 'No checkpoint found'
        assert ckpt.model_checkpoint_path, 'No model path found in checkpoint'

        with open(os.path.join(args.save_dir, 'config.pkl'), 'rb') as f:
            saved_model_args = pickle.load(f)
        need_be_same = ['model', 'rnn_size', 'num_layers', 'seq_length']
        for checkme in need_be_same:
            assert vars(saved_model_args)[checkme]==vars(args)[checkme], 'command line argument and saved model disagree on %s' % checkme

        with open(os.path.join(args.utils_dir, 'chars_vocab.pkl'), 'rb') as f:
            saved_chars, saved_vocab = pickle.load(f)
        with open(os.path.join(args.utils_dir, 'labels.pkl'), 'rb') as f:
            saved_labels = pickle.load(f)
        assert saved_chars==data_loader.chars, 'data and loaded model disagree on character set'
        assert saved_vocab==data_loader.vocab, 'data and loaded model disagree on dictionary mappings'
        assert saved_labels==data_loader.labels, 'data and loaded model disagree on label dictionary mappings'

    with open(os.path.join(args.save_dir, 'config.pkl'), 'wb') as f:
        pickle.dump(args, f)
    with open(os.path.join(args.utils_dir, 'chars_vocab.pkl'), 'wb') as f:
        pickle.dump((data_loader.chars, data_loader.vocab), f)
    with open(os.path.join(args.utils_dir, 'labels.pkl'), 'wb') as f:
        pickle.dump(data_loader.labels, f)

    model = Model(args)

    with tf.Session() as sess:
        init = tf.initialize_all_variables()
        sess.run(init)
        saver = tf.train.Saver(tf.all_variables())

        if args.continue_training:
            saver.restore(sess, ckpt.model_checkpoint_path)

        for e in range(args.num_epochs):
            sess.run(tf.assign(model.lr, args.learning_rate * (args.decay_rate ** e)))
            data_loader.reset_batch_pointer()

            for b in range(data_loader.num_batches):
                start = time.time()
                x, y = data_loader.next_batch()
                feed = {model.input_data: x, model.targets: y}
                train_loss, state, _, accuracy = sess.run([model.cost, model.final_state, model.optimizer, model.accuracy], feed_dict=feed)
                end = time.time()
                print '{}/{} (epoch {}), train_loss = {:.3f}, accuracy = {:.3f}, time/batch = {:.3f}'\
                    .format(e * data_loader.num_batches + b + 1,
                            args.num_epochs * data_loader.num_batches,
                            e + 1,
                            train_loss,
                            accuracy,
                            end - start)
                if (e*data_loader.num_batches+b+1) % args.save_every == 0 \
                    or (e==args.num_epochs-1 and b==data_loader.num_batches-1):
                    checkpoint_path = os.path.join(args.save_dir, 'model.ckpt')
                    saver.save(sess, checkpoint_path, global_step=e*data_loader.num_batches+b+1)
                    print 'model saved to {}'.format(checkpoint_path)
Exemple #17
0
def train(args):

    onlyfiles = [
        f for f in listdir(args.data_dir)
        if isfile(join(args.data_dir, f)) and (
            not ("pkl" in f) and not ("npy" in f))
    ]

    for f in onlyfiles:
        print(f)
    data_loader = TextLoader(args.data_dir, onlyfiles, args.batch_size,
                             args.seq_length, args.cid_num)
    args.event_vocab_size = data_loader.event_vocab_size
    args.para_vocab_size = data_loader.para_vocab_size
    #event_vocab_rev

    #print(data_loader.event_vocab)
    #print(data_loader.event_words)
    #for data_loader in data_loader_list:
    #    args.vocab_size = data_loader.vocab_size

    #data_loader = TextLoader(args.data_dir, args.batch_size, args.seq_length)

    # check compatibility if training is continued from previously saved model

    if args.init_from is not None:
        # check if all necessary files exist
        assert os.path.isdir(
            args.init_from), " %s must be a path" % args.init_from
        assert os.path.isfile(
            os.path.join(args.init_from, "config.pkl")
        ), "config.pkl file does not exist in path %s" % args.init_from
        assert os.path.isfile(
            os.path.join(args.init_from, "event_words_vocab.pkl")
        ), "words_vocab.pkl.pkl file does not exist in path %s" % args.init_from
        assert os.path.isfile(
            os.path.join(args.init_from, "para_words_vocab.pkl")
        ), "words_vocab.pkl.pkl file does not exist in path %s" % args.init_from
        ckpt = tf.train.get_checkpoint_state(args.init_from)
        assert ckpt, "No checkpoint found"
        assert ckpt.model_checkpoint_path, "No model path found in checkpoint"

        # open old config and check if models are compatible
        with open(os.path.join(args.init_from, 'config.pkl'), 'rb') as f:
            saved_model_args = cPickle.load(f)

        need_be_same = ["model", "rnn_size", "num_layers", "seq_length"]
        for checkme in need_be_same:
            assert vars(saved_model_args)[checkme] == vars(
                args
            )[checkme], "Command line argument and saved model disagree on '%s' " % checkme
        #self.para_vocab, self.para_words, self.para_vocab_rev
        # open saved vocab/dict and check if vocabs/dicts are compatible
        with open(os.path.join(args.init_from, 'event_words_vocab.pkl'),
                  'rb') as f:
            event_saved_vocab, event_saved_words, event_saved_vocab_rev, = cPickle.load(
                f)
        with open(os.path.join(args.init_from, 'para_words_vocab.pkl'),
                  'rb') as f:
            para_saved_vocab, para_saved_words, para_saved_vocab_rev, = cPickle.load(
                f)

        assert event_saved_words == data_loader.event_words, "Data and loaded model disagree on word set!"
        assert event_saved_vocab == data_loader.event_vocab, "Data and loaded model disagree on dictionary mappings!"
        assert event_saved_vocab_rev == data_loader.event_vocab_rev, "Data and loaded model disagree on dictionary mappings!"

        assert para_saved_words == data_loader.para_words, "Data and loaded model disagree on word set!"
        assert para_saved_vocab == data_loader.para_vocab, "Data and loaded model disagree on dictionary mappings!"
        assert para_saved_vocab_rev == data_loader.para_vocab_rev, "Data and loaded model disagree on dictionary mappings!"

    with open(os.path.join(args.save_dir, 'config.pkl'), 'wb') as f:
        cPickle.dump(args, f)
    with open(os.path.join(args.save_dir, 'event_words_vocab.pkl'), 'wb') as f:
        cPickle.dump((data_loader.event_vocab, data_loader.event_words,
                      data_loader.event_vocab_rev), f)
    with open(os.path.join(args.save_dir, 'para_words_vocab.pkl'), 'wb') as f:
        cPickle.dump((data_loader.para_vocab, data_loader.para_words,
                      data_loader.para_vocab_rev), f)

    model = Model(args)
    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=args.gpu_mem)

    with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess:
        # instrument for tensorboard
        summaries = tf.summary.merge_all()
        writer = tf.summary.FileWriter(
            os.path.join(args.log_dir, time.strftime("%Y-%m-%d-%H-%M-%S")))
        writer.add_graph(sess.graph)

        #sess.run(tf.global_variables_initializer())
        tf.global_variables_initializer().run()
        saver = tf.train.Saver(tf.global_variables())
        # restore model
        if args.init_from is not None:
            saver.restore(sess, ckpt.model_checkpoint_path)
        for e in range(args.num_epochs):
            sess.run(
                tf.assign(
                    model.lr,  # the variable, a mutable tensor
                    args.learning_rate *
                    (args.decay_rate**
                     e)))  # variable to be assigned to the model.lr
            data_loader.reset_batch_pointer()
            state = sess.run(model.initial_state)
            for b in range(data_loader.num_batches):
                #for y_e, y_a1, y_a2, b_e, b_a1, b_a2 in generate_batch():
                start = time.time()
                x_e, y_e, x_p1, y_p1, x_p2, y_p2 = data_loader.next_batch()

                feed = {
                    model.event_input_data: x_e,
                    model.para1_input_data: x_p1,
                    model.para2_input_data: x_p2,
                    model.targets: y_e,
                    model.targets_para1: y_p1,
                    model.targets_para2: y_p2
                }
                for i, (c, h) in enumerate(model.initial_state):
                    feed[c] = state[i].c
                    feed[h] = state[i].h
                #train_loss, state, _, _, _, _, = sess.run([model.cost, model.final_state, model.train_op, model.probs, model.probs2, model.probs3], feed)
                train_loss, state, _, res_prob, res_prob1, res_prob2 = sess.run(
                    [
                        model.cost, model.final_state, model.train_op,
                        model.probs, model.probs1, model.probs2
                    ], feed)

                print(res_prob, res_prob1, res_prob1)
                print("   ")
                #arrays = tf.constant(res_prob)
                #print (arrays[0, :])

                #arrays1 = tf.constant(res_prob1)
                #print (arrays1[0, :])

                #calcuateProbDis(res_prob, res_prob1)

                #for i in res_prob :

                #print(res_prob.size, res_prob1.size)

                # instrument for tensorboard
                summ, train_loss, state, _ = sess.run(
                    [summaries, model.cost, model.final_state, model.train_op],
                    feed)
                writer.add_summary(summ, e * data_loader.num_batches + b)

                end = time.time()
                print(
                    "{}/{} (epoch {}), train_loss = {:.3f}, time/batch = {:.3f}"
                    .format(e * data_loader.num_batches + b,
                            args.num_epochs * data_loader.num_batches, e,
                            train_loss, end - start))
                if (e * data_loader.num_batches + b) % args.save_every == 0\
                        or (e == args.num_epochs-1 and
                            b == data_loader.num_batches-1):
                    # save for the last result
                    checkpoint_path = os.path.join(args.save_dir, 'model.ckpt')
                    saver.save(sess,
                               checkpoint_path,
                               global_step=e * data_loader.num_batches + b)
                    print("model saved to {}".format(checkpoint_path))
 def setUp(self):
     self.data_loader = TextLoader("tests/test_data",
                                   batch_size=2,
                                   seq_length=5)
def train(args):
    data_loader = TextLoader(args.data_dir, args.batch_size, args.seq_length,
                             args.input_encoding)
    args.vocab_size = data_loader.vocab_size

    # check compatibility if training is continued from previously saved model
    if args.init_from is not None:
        # check if all necessary files exist
        assert os.path.isdir(
            args.init_from), " %s must be a path" % args.init_from
        assert os.path.isfile(
            os.path.join(args.init_from, "config.pkl")
        ), "config.pkl file does not exist in path %s" % args.init_from
        assert os.path.isfile(
            os.path.join(args.init_from, "words_vocab.pkl")
        ), "words_vocab.pkl.pkl file does not exist in path %s" % args.init_from
        ckpt = tf.train.get_checkpoint_state(args.init_from)
        assert ckpt, "No checkpoint found"
        assert ckpt.model_checkpoint_path, "No model path found in checkpoint"

        # open old config and check if models are compatible
        with open(os.path.join(args.init_from, 'config.pkl'), 'rb') as f:
            saved_model_args = cPickle.load(f)
        need_be_same = ["model", "rnn_size", "num_layers", "seq_length"]
        for checkme in need_be_same:
            assert vars(saved_model_args)[checkme] == vars(
                args
            )[checkme], "Command line argument and saved model disagree on '%s' " % checkme

        # open saved vocab/dict and check if vocabs/dicts are compatible
        with open(os.path.join(args.init_from, 'words_vocab.pkl'), 'rb') as f:
            saved_words, saved_vocab = cPickle.load(f)
        assert saved_words == data_loader.words, "Data and loaded model disagree on word set!"
        assert saved_vocab == data_loader.vocab, "Data and loaded model disagree on dictionary mappings!"

    with open(os.path.join(args.save_dir, 'config.pkl'), 'wb') as f:
        cPickle.dump(args, f)
    with open(os.path.join(args.save_dir, 'words_vocab.pkl'), 'wb') as f:
        cPickle.dump((data_loader.words, data_loader.vocab), f)

    model = Model(args)

    merged = tf.summary.merge_all()
    train_writer = tf.summary.FileWriter(args.log_dir)
    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=args.gpu_mem)

    with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess:
        train_writer.add_graph(sess.graph)
        tf.global_variables_initializer().run()
        saver = tf.train.Saver(tf.global_variables())
        # restore model
        if args.init_from is not None:
            saver.restore(sess, ckpt.model_checkpoint_path)
        for e in range(model.epoch_pointer.eval(), args.num_epochs):
            sess.run(
                tf.assign(model.lr, args.learning_rate * (args.decay_rate**e)))
            data_loader.reset_batch_pointer()
            state = sess.run(model.initial_state)
            speed = 0
            if args.init_from is None:
                assign_op = model.epoch_pointer.assign(e)
                sess.run(assign_op)
            if args.init_from is not None:
                data_loader.pointer = model.batch_pointer.eval()
                args.init_from = None
            for b in range(data_loader.pointer, data_loader.num_batches):
                start = time.time()
                x, y = data_loader.next_batch()
                feed = {
                    model.input_data: x,
                    model.targets: y,
                    model.initial_state: state,
                    model.batch_time: speed
                }
                summary, train_loss, state, _, _ = sess.run([
                    merged, model.cost, model.final_state, model.train_op,
                    model.inc_batch_pointer_op
                ], feed)
                train_writer.add_summary(summary,
                                         e * data_loader.num_batches + b)
                speed = time.time() - start
                if (e * data_loader.num_batches + b) % args.batch_size == 0:
                    print("{}/{} (epoch {}), train_loss = {:.3f}, time/batch = {:.3f}" \
                        .format(e * data_loader.num_batches + b,
                                args.num_epochs * data_loader.num_batches,
                                e, train_loss, speed))
                if (e * data_loader.num_batches + b) % args.save_every == 0 \
                        or (e==args.num_epochs-1 and b == data_loader.num_batches-1): # save for the last result
                    checkpoint_path = os.path.join(args.save_dir, 'model.ckpt')
                    saver.save(sess,
                               checkpoint_path,
                               global_step=e * data_loader.num_batches + b)
                    print("model saved to {}".format(checkpoint_path))
        train_writer.close()
Exemple #20
0
def train(args):
    data_loader = TextLoader(args.batch_size)
    args.vocab_size = data_loader.vocab_size

    # check compatibility if training is continued from previously saved model
    if args.init_from is not None:
        # check if all necessary files exist
        assert os.path.isdir(
            args.init_from), " %s must be a a path" % args.init_from
        assert os.path.isfile(
            os.path.join(
                args.init_from,
                r".." + os.path.sep + "rnn_2" + os.path.sep + "config.pkl")
        ), "config.pkl file does not exist in path %s" % args.init_from
        assert os.path.isfile(
            os.path.join(
                args.init_from, r".." + os.path.sep + "rnn_2" + os.path.sep +
                "chars_vocab.pkl")
        ), "chars_vocab.pkl.pkl file does not exist in path %s" % args.init_from
        ckpt = tf.train.get_checkpoint_state(args.init_from)
        assert ckpt, "No checkpoint found"
        assert ckpt.model_checkpoint_path, "No model path found in checkpoint"
        assert os.path.isfile(
            os.path.join(
                args.init_from,
                r".." + os.path.sep + "rnn_2" + os.path.sep + "iterations")
        ), "iterations file does not exist in path %s " % args.init_from

        # open old config and check if models are compatible
        with open(
                os.path.join(
                    args.init_from, r".." + os.path.sep + "rnn_2" +
                    os.path.sep + "config.pkl"), 'rb') as f:
            saved_model_args = cPickle.load(f)
        need_be_same = ["model", "rnn_size", "num_layers"]
        for checkme in need_be_same:
            assert vars(saved_model_args)[checkme] == vars(
                args
            )[checkme], "Command line argument and saved model disagree on '%s' " % checkme

        # open saved vocab/dict and check if vocabs/dicts are compatible
        with open(
                os.path.join(
                    args.init_from, r".." + os.path.sep + "rnn_2" +
                    os.path.sep + "chars_vocab.pkl"), 'rb') as f:
            saved_chars, saved_vocab = cPickle.load(f)
        assert saved_chars == data_loader.chars, "Data and loaded model disagree on character set!"
        assert saved_vocab == data_loader.vocab, "Data and loaded model disagree on dictionary mappings!"

    with open(
            os.path.join(r".." + os.path.sep + "rnn_2" + os.path.sep +
                         "config.pkl"), 'wb') as f:
        cPickle.dump(args, f, protocol=2)
    with open(
            os.path.join(r".." + os.path.sep + "rnn_2" + os.path.sep +
                         "chars_vocab.pkl"), 'wb') as f:
        cPickle.dump((data_loader.chars, data_loader.vocab), f, protocol=2)

    model = Model(args)

    with tf.Session() as sess:
        tf.initialize_all_variables().run()
        saver = tf.train.Saver(tf.all_variables())
        iterations = 0
        # restore model and number of iterations
        if args.init_from is not None:
            saver.restore(sess, ckpt.model_checkpoint_path)
            with open(
                    os.path.join(
                        args.save_dir, r".." + os.path.sep + "rnn_2" +
                        os.path.sep + "iterations"), 'rb') as f:
                iterations = cPickle.load(f)
        losses = []
        for e in range(args.num_epochs):
            sess.run(
                tf.assign(model.lr, args.learning_rate * (args.decay_rate**e)))
            data_loader.reset_batch_pointer()
            for b in range(data_loader.num_batches):
                iterations += 1
                start = time.time()
                x, y = data_loader.next_batch()
                feed = {model.input_data: x, model.targets: y}
                train_loss, _, _ = sess.run(
                    [model.cost, model.final_state, model.train_op], feed)
                end = time.time()
                sys.stdout.write('\r')
                info = "{}/{} (epoch {}), train_loss = {:.3f}, time/batch = {:.3f}" \
                    .format(e * data_loader.num_batches + b,
                            args.num_epochs * data_loader.num_batches,
                            e, train_loss, end - start)
                sys.stdout.write(info)
                sys.stdout.flush()
                losses.append(train_loss)
                if (e * data_loader.num_batches + b) % args.save_every == 0 \
                        or (e == args.num_epochs - 1 and b == data_loader.num_batches - 1):  # save for the last result
                    checkpoint_path = os.path.join(r".." + os.path.sep +
                                                   "rnn_2" + os.path.sep +
                                                   "model.ckpt")
                    saver.save(sess, checkpoint_path, global_step=iterations)
                    with open(
                            os.path.join(r".." + os.path.sep + "rnn_2" +
                                         os.path.sep + "iterations"),
                            'wb') as f:
                        cPickle.dump(iterations, f, protocol=2)
                    with open(
                            os.path.join(r".." + os.path.sep + "rnn_2" +
                                         os.path.sep + "losses-" +
                                         str(iterations)), 'wb') as f:
                        cPickle.dump(losses, f, protocol=2)
                    losses = []
                    sys.stdout.write('\n')
                    print("model saved to {}".format(checkpoint_path))
            sys.stdout.write('\n')
Exemple #21
0
def train(args):

    provide_key_words = args.use_attention or args.state_initialization == 'average'

    if args.use_attention and args.state_initialization == 'prev' and args.attention_type == 'bahdanau_coverage':
        args.state_initialization = 'random'

    data_loader = TextLoader(args.load_preprocessed, 'training', args.data_dir,
                             args.batch_size, args.seq_length, args.vocab_size,
                             args.unk_max_number, args.unk_max_count, None,
                             args.use_bpe, args.bpe_size, args.bpe_model_path,
                             args.pretrained_embeddings, provide_key_words,
                             args.key_word_count_multiplier, args.pos_tags,
                             args.input_encoding)

    args.vocab_size = data_loader.vocab_size
    args.words_vocab_file = data_loader.words_vocab_file
    args.bpe_model_path = data_loader.bpe_model_path

    if args.pretrained_embeddings is not None:
        args.processed_embeddings = os.path.join(data_loader.embedding_dir,
                                                 'embedding_matrix.pkl')

    if args.validation_data_dir is not None:
        val_data_loader = TextLoader(
            args.load_preprocessed, 'validation', args.validation_data_dir,
            args.batch_size, args.seq_length, args.vocab_size,
            args.unk_max_number, args.unk_max_count, data_loader.vocab,
            args.use_bpe, args.bpe_size, data_loader.bpe_model_path,
            args.pretrained_embeddings, provide_key_words,
            args.key_word_count_multiplier, args.pos_tags, args.input_encoding)

        validation_log = open(os.path.join(args.log_dir, 'validation_log.txt'),
                              'a')

    # check compatibility if training is continued from previously saved model
    if args.init_from is not None:
        # check if all necessary files exist
        assert os.path.isdir(
            args.init_from), " %s must be a path" % args.init_from
        assert os.path.isfile(
            os.path.join(args.init_from, "config.pkl")
        ), "config.pkl file does not exist in path %s" % args.init_from
        ckpt = tf.train.get_checkpoint_state(args.init_from)
        assert ckpt, "No checkpoint found"
        assert ckpt.model_checkpoint_path, "No model path found in checkpoint"

        # open old config and check if models are compatible
        with open(os.path.join(args.init_from, 'config.pkl'), 'rb') as f:
            saved_model_args = cPickle.load(f)
        need_be_same = [
            "rnn_size", "embedding_size", "num_layers", "dropout_prob",
            "batch_size", "seq_length", "attention_type", "use_attention",
            "dont_train_embeddings"
        ]
        for checkme in need_be_same:
            assert vars(saved_model_args)[checkme] == vars(
                args
            )[checkme], "Command line argument and saved model disagree on '%s' " % checkme

        assert os.path.isfile(
            saved_model_args.words_vocab_file
        ), "words_vocab.pkl.pkl file does not exist in path %s" % saved_model_args.words_vocab_file

        # open saved vocab/dict and check if vocabs/dicts are compatible
        with open(saved_model_args.words_vocab_file, 'rb') as f:
            saved_words, saved_vocab = cPickle.load(f)

        assert saved_words == data_loader.words, "Data and loaded model disagree on word set!"
        assert saved_vocab == data_loader.vocab, "Data and loaded model disagree on dictionary mappings!"

    with open(os.path.join(args.save_dir, 'config.pkl'), 'wb') as f:
        cPickle.dump(args, f)

    with open(data_loader.words_vocab_file, 'wb') as f:
        cPickle.dump((data_loader.words, data_loader.vocab), f)

    model = Model(args)

    merged = tf.summary.merge_all()
    train_writer = tf.summary.FileWriter(args.log_dir)
    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=args.gpu_mem)
    checkpoint_path = os.path.join(args.save_dir, 'model.ckpt')

    training_log = open(os.path.join(args.log_dir, 'training_log.txt'), 'a')

    best_val_error = None
    start_epoch = 0
    learning_rate = args.learning_rate

    with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess:
        train_writer.add_graph(sess.graph)
        tf.global_variables_initializer().run()

        saver = tf.train.Saver(tf.global_variables())

        zero_state = sess.run(model.initial_state)
        # restore model
        if args.init_from is not None:
            saver.restore(sess, ckpt.model_checkpoint_path)

            learning_rate = model.lr.eval()

            best_val_error = model.best_val_error.eval() or None

            best_val_epoch = model.best_val_epoch.eval()

            start_epoch = model.epoch_pointer.eval() + 1

        else:
            sess.run(tf.assign(model.lr, learning_rate))

        for e in range(start_epoch, args.num_epochs):

            epoch_start = time.time()
            # decrease learning rate after every epoch if adaptive_learning_rate is not used
            if (args.validation_data_dir is None
                    or args.adaptive_learning_rate <= 0) and e > 0:
                learning_rate *= args.decay_rate
                sess.run(tf.assign(model.lr, learning_rate))

            data_loader.reset_batch_pointer()

            state = zero_state

            epoch_error = 0
            epoch_coverage_loss = 0

            # as every epoch is started, save its number in the model
            sess.run(tf.assign(model.epoch_pointer, e))

            for b in range(data_loader.pointer, data_loader.num_batches):

                x, y, target_weights, target_sequence_length, key_words, key_words_count, key_words_weights = data_loader.next_batch(
                )

                if args.state_initialization == 'zero':
                    state = zero_state

                if key_words is not None:
                    feed = {
                        model.input_data: x,
                        model.targets: y,
                        model.target_weights: target_weights,
                        model.target_sequence_length: target_sequence_length,
                        model.initial_state: state,
                        model.attention_key_words: key_words,
                        model.attention_states_count: key_words_count,
                        model.attention_states_weights: key_words_weights
                    }
                else:
                    feed = {
                        model.input_data: x,
                        model.targets: y,
                        model.target_weights: target_weights,
                        model.target_sequence_length: target_sequence_length,
                        model.initial_state: state
                    }

                summary, train_loss, state, _ = sess.run(
                    [merged, model.cost, model.final_state, model.train_op],
                    feed)

                # if model trained has bahdanau_coverage attention type, collect coverage_loss as well
                if args.use_attention and args.attention_type == 'bahdanau_coverage':
                    epoch_coverage_loss += np.sum(state.coverage_loss)

                # accumulate the train_loss
                epoch_error += train_loss

                if (e * data_loader.num_batches + b) % args.batch_size == 0:
                    train_writer.add_summary(summary,
                                             e * data_loader.num_batches + b)

            epoch_speed = time.time() - epoch_start
            if args.use_attention and args.attention_type == 'bahdanau_coverage':
                print(
                    "epoch\t{}\tepoch_loss\t{:.3f}\tepoch_coverage_loss\t{:.3f}\tepoch_time\t{:.3f}\tlearning_rate\t{:.3f}\n"
                    .format(
                        e, epoch_error / data_loader.num_batches,
                        epoch_coverage_loss / args.batch_size /
                        data_loader.num_batches, epoch_speed, learning_rate))
                training_log.write(
                    "epoch\t{}\tepoch_loss\t{:.3f}\tepoch_coverage_loss\t{:.3f}\tepoch_time\t{:.3f}\tlearning_rate\t{:.3f}\n"
                    .format(
                        e, epoch_error / data_loader.num_batches,
                        epoch_coverage_loss / args.batch_size /
                        data_loader.num_batches, epoch_speed, learning_rate))
            else:
                print(
                    "epoch\t{}\tepoch_loss\t{:.3f}\tepoch_time\t{:.3f}\tlearning_rate\t{:.3f}\n"
                    .format(e, epoch_error / data_loader.num_batches,
                            epoch_speed, learning_rate))
                training_log.write(
                    "epoch\t{}\tepoch_loss\t{:.3f}\tepoch_time\t{:.3f}\tlearning_rate\t{:.3f}\n"
                    .format(e, epoch_error / data_loader.num_batches,
                            epoch_speed, learning_rate))

            if e % args.save_every == 0 or e == args.num_epochs - 1:  # save for the last result

                # validate every saved model
                if args.validation_data_dir is not None:
                    val_start = time.time()

                    val_data_loader.reset_batch_pointer()

                    val_error = 0

                    val_coverage_loss = 0

                    val_state = zero_state

                    for val_b in range(val_data_loader.pointer,
                                       val_data_loader.num_batches):

                        if args.state_initialization == 'zero':
                            val_state = zero_state

                        val_x, val_y, val_target_weights, val_target_sequence_length, val_key_words, val_key_words_count, val_key_words_weights = val_data_loader.next_batch(
                        )

                        if val_key_words is not None:
                            val_feed = {
                                model.input_data:
                                val_x,
                                model.targets:
                                val_y,
                                model.target_weights:
                                val_target_weights,
                                model.target_sequence_length:
                                val_target_sequence_length,
                                model.initial_state:
                                val_state,
                                model.attention_key_words:
                                val_key_words,
                                model.attention_states_count:
                                val_key_words_count,
                                model.attention_states_weights:
                                val_key_words_weights
                            }
                        else:
                            val_feed = {
                                model.input_data: val_x,
                                model.targets: val_y,
                                model.target_weights: val_target_weights,
                                model.target_sequence_length:
                                val_target_sequence_length,
                                model.initial_state: val_state
                            }

                        val_train_loss, val_state = sess.run(
                            [model.cost, model.final_state], val_feed)

                        val_error += val_train_loss
                        # if model trained has bahdanau_coverage attention type, collect coverage_loss as well
                        if args.use_attention and args.attention_type == 'bahdanau_coverage':
                            val_coverage_loss += np.sum(
                                val_state.coverage_loss)

                    mean_val_error = val_error / val_data_loader.num_batches

                    val_speed = time.time() - val_start

                    if args.use_attention and args.attention_type == 'bahdanau_coverage':
                        print(
                            "epoch\t{}\tvalidation_loss\t{:.3f}\tval_coverage_loss\t{:.3f}\tvalidation_time\t{:.3f}\n"
                            .format(
                                e, mean_val_error, val_coverage_loss /
                                args.batch_size / val_data_loader.num_batches,
                                val_speed))
                        validation_log.write(
                            "epoch\t{}\tvalidation_loss\t{:.3f}\tval_coverage_loss\t{:.3f}\tvalidation_time\t{:.3f}\n"
                            .format(
                                e, mean_val_error, val_coverage_loss /
                                args.batch_size / val_data_loader.num_batches,
                                val_speed))
                    else:
                        print(
                            "epoch\t{}\tvalidation_loss\t{:.3f}\tvalidation_time\t{:.3f}\n"
                            .format(e, mean_val_error, val_speed))
                        validation_log.write(
                            "epoch\t{}\tvalidation_loss\t{:.3f}\tvalidation_time\t{:.3f}\n"
                            .format(e, mean_val_error, val_speed))

                    # save information about best validation error and epoch in model
                    if best_val_error is None or best_val_error > mean_val_error:

                        print('======= NEW BEST EPOCH =======')
                        best_val_error = mean_val_error
                        best_val_epoch = e

                        sess.run(
                            tf.assign(model.best_val_error, best_val_error))
                        sess.run(
                            tf.assign(model.best_val_epoch, best_val_epoch))

                    # if adaptive learning rate is used and enough epochs have passed without improvement then decrease learning rate
                    elif e - best_val_epoch >= args.adaptive_learning_rate and args.adaptive_learning_rate > 0:
                        learning_rate *= args.decay_rate
                        sess.run(tf.assign(model.lr, learning_rate))

                    if args.save_all or best_val_epoch == e:
                        saver.save(sess, checkpoint_path, global_step=e)
                        print("model saved to {}".format(checkpoint_path))

                    if e - best_val_epoch >= args.max_worse_validations and args.max_worse_validations > 0:
                        print(
                            "finishing early as {} evaluated models did not lower the validation loss"
                            .format(args.max_worse_validations))
                        break

                else:
                    saver.save(sess, checkpoint_path, global_step=e)
                    print("model saved to {}".format(checkpoint_path))

    training_log.close()

    if args.validation_data_dir is not None:
        validation_log.close()
        val_data_loader.close()

    train_writer.close()
    data_loader.close()
Exemple #22
0
def train(args):
    # Data Preparation
    # ====================================

    data_loader = TextLoader(args.data_dir, args.sense_file, args.batch_size,
                             args.seq_length, args.data_set_size)
    args.vocab_size = data_loader.vocab_size
    args.verb_size = len(data_loader.verbs)
    print(args.verb_size)
    print("Number of sentences: {}".format(data_loader.num_data))
    print("Vocabulary size: {}".format(args.vocab_size))

    # Check compatibility if training is continued from previously saved model
    if args.init_from is not None:
        # check if all necessary files exist
        assert os.path.isdir(
            args.init_from), " %s must be a path" % args.init_from
        assert os.path.isfile(
            os.path.join(args.init_from, "config.pkl")
        ), "config.pkl file does not exist in path %s" % args.init_from
        assert os.path.isfile(
            os.path.join(args.init_from, "words_vocab.pkl")
        ), "words_vocab.pkl.pkl file does not exist in path %s" % args.init_from
        ckpt = tf.train.get_checkpoint_state(args.init_from)
        assert ckpt, "No checkpoint found"
        assert ckpt.model_checkpoint_path, "No model path found in checkpoint"

        # open old config and check if models are compatible
        with open(os.path.join(args.init_from, 'config.pkl'), 'rb') as f:
            saved_model_args = pickle.load(f)
        need_be_same = ["rnn_size", "num_layers", "seq_length"]
        for checkme in need_be_same:
            assert vars(saved_model_args)[checkme] == vars(
                args
            )[checkme], "Command line argument and saved model disagree on '%s' " % checkme

        # open saved vocab/dict and check if vocabs/dicts are compatible
        with open(os.path.join(args.init_from, 'words_vocab.pkl'), 'rb') as f:
            saved_words, saved_vocab = pickle.load(f)
        assert saved_words == data_loader.words, "Data and loaded model disagree on word set!"
        assert saved_vocab == data_loader.vocab, "Data and loaded model disagree on dictionary mappings!"

    if not os.path.exists(args.save_dir):
        os.makedirs(args.save_dir)
    with open(os.path.join(args.save_dir, 'config.pkl'), 'wb') as f:
        pickle.dump(args, f)
    with open(os.path.join(args.save_dir, 'words_vocab.pkl'), 'wb') as f:
        pickle.dump((data_loader.words, data_loader.vocab), f)
    """
    embedding_matrix = get_vocab_embedding(args.save_dir, data_loader.words, args.embedding_file)
    print("Embedding matrix shape:",embedding_matrix.shape)
    """

    # Training
    # ====================================
    with tf.Graph().as_default():
        with tf.Session() as sess:
            #sess = tf_debug.LocalCLIDebugWrapperSession(sess)
            #sess.run()
            model = BasicLSTM(args)

            # Define training procedure
            global_step = tf.Variable(0, name='global_step', trainable=False)
            optimizer = tf.train.AdamOptimizer(args.learning_rate)
            tvars = tf.trainable_variables()
            grads, _ = tf.clip_by_global_norm(tf.gradients(model.cost, tvars),
                                              args.grad_clip)
            train_op = optimizer.apply_gradients(zip(grads, tvars),
                                                 global_step=global_step)

            # Keep track of gradient values and sparsity
            grad_summaries = []
            for g, v in zip(grads, tvars):
                if g is not None:
                    grad_hist_summary = tf.summary.histogram(
                        "{}/grad/hist".format(v.name), g)
                    sparsity_summary = tf.summary.scalar(
                        "{}/grad/sparsity".format(v.name),
                        tf.nn.zero_fraction(g))
                    grad_summaries.append(grad_hist_summary)
                    grad_summaries.append(sparsity_summary)

            # Summary for loss
            loss_summary = tf.summary.scalar("loss", model.cost)

            # Train summaries
            merged = tf.summary.merge_all()
            if not os.path.exists(args.log_dir):
                os.makedirs(args.log_dir)
            train_writer = tf.summary.FileWriter(args.log_dir, sess.graph)

            saver = tf.train.Saver(tf.global_variables())

            # Initialize all variables
            sess.run(tf.global_variables_initializer())

            # Restore model
            if args.init_from is not None:
                saver.restore(sess, ckpt.model_checkpoint_path)

            # Start training
            print("Start training")

            # create test and dev sets
            data_loader.reset_batch_pointer()
            x_batch_dev, y_batch_dev, unk_count_dev, n_sent_dev, _ = data_loader.next_batch_test(
            )
            feed_dict_dev = {
                model.x: x_batch_dev,
                model.y: y_batch_dev,
                model.keep_prob: args.keep_prob
            }
            data_loader.reset_batch_pointer()
            x_batch_test, y_batch_test, unk_count_test, n_sent_test, _ = data_loader.next_batch_test(
                set_to_choose=1)
            feed_dict_test = {
                model.x: x_batch_test,
                model.y: y_batch_test,
                model.keep_prob: args.keep_prob
            }
            data_loader.reset_batch_pointer()

            steps = 0
            prev_equal = 0.
            for epoch in range(args.num_epochs):
                data_loader.reset_batch_pointer()
                state = sess.run(model.initial_state)
                for i in range(data_loader.num_batches):
                    start = time.time()
                    x_batch, y_batch, unk_count, n_sent, _ = data_loader.next_batch(
                    )
                    feed_dict = {
                        model.x: x_batch,
                        model.y: y_batch,
                        model.keep_prob: args.keep_prob
                    }
                    _, step, summary, loss, equal = sess.run([
                        train_op, global_step, merged, model.cost, model.equal
                    ], feed_dict)

                    print(
                        "training step {}, epoch {}, batch {}/{}, loss: {:.4f}, accuracy: {:.4f}, avg unk count: {}, avg sent: {}, time/batch: {:.3f}"
                        .format(step, epoch, i, data_loader.num_batches, loss,
                                np.mean(equal),
                                int(unk_count / args.batch_size), n_sent,
                                time.time() - start))

                # Start dev
                print("Start dev")
                data_loader.reset_batch_pointer()
                accur = []
                for i in range(data_loader.num_batches_test):
                    start = time.time()
                    step, summary, loss, equal = sess.run(
                        [global_step, merged, model.cost, model.equal],
                        feed_dict_dev)

                    print(
                        "dev step {}, epoch {}, batch {}/{}, loss: {:.4f}, accuracy: {:.4f}, avg unk count: {}, avg sent: {}, time/batch: {:.3f}"
                        .format(step, epoch, i, data_loader.num_batches_test,
                                loss, np.mean(equal),
                                int(unk_count_test / args.batch_size),
                                n_sent_test,
                                time.time() - start))

                    accur.append(np.mean(equal))

                eval_acc = np.mean(accur)
                if eval_acc > prev_equal:
                    prev_equal = eval_acc
                    train_writer.add_summary(summary, step)
                    current_step = tf.train.global_step(sess, global_step)
                    checkpoint_path = os.path.join(args.save_dir, 'model.ckpt')
                    path = saver.save(sess,
                                      checkpoint_path,
                                      global_step=current_step)
                    print("Saved model checkpoint to {}".format(path))
                elif steps > args.stop_count:
                    print("early stopping")
                    break
                else:
                    steps += 1

            print("Start test")
            data_loader.reset_batch_pointer()
            accur = []
            for i in range(data_loader.num_batches_test):
                start = time.time()
                step, summary, loss, equal = sess.run(
                    [global_step, merged, model.cost, model.equal],
                    feed_dict_test)

                print(
                    "test step {}, epoch {}, batch {}/{}, loss: {:.4f}, accuracy: {:.4f}, avg unk count: {}, avg sent: {}, time/batch: {:.3f}"
                    .format(step, epoch, i, data_loader.num_batches_test, loss,
                            np.mean(equal),
                            int(unk_count_test / args.batch_size), n_sent_test,
                            time.time() - start))
                accur.append(np.mean(equal))

            print('avg test: {:.4f}'.format(np.mean(accur)))
            train_writer.close()
 def setUp(self):
     self.data_loader = TextLoader("tests/test_data", batch_size=2, seq_length=5)
Exemple #24
0
def infer(args):
    start = time.time()
    
    # Load testing data
    # ====================================
    with open(os.path.join(args.save_dir, 'config.pkl'), 'rb') as f:
        saved_args = pickle.load(f)
        print('restored args:\n', json.dumps(vars(saved_args), indent=4, separators=(',',':'))) 
    
    with open(os.path.join(args.save_dir, 'words_vocab.pkl'), 'rb') as f:
        _, vocab = pickle.load(f)    
    data_loader  = TextLoader(args.test_file, args.sense_file, args.batch_size, args.seq_length, args.data_set_size, shuffle=True)
    
    sense_idx = pickle.load(open('wsd_senses_idx.p','rb'))
    #words to sense dict
    words_sense = pickle.load(open('verbs_sense.p','rb'))

    
    # Predict
    # ===================================
    #checkpoint = tf.train.latest_checkpoint(args.save_dir)

    
    with tf.Graph().as_default():
        with tf.Session() as sess:

            start = time.time()
            saver = tf.train.import_meta_graph('./save2/model.ckpt-65.meta')
            saver.restore(sess,tf.train.latest_checkpoint('./save/'))
            graph = tf.get_default_graph()
            graph_x = graph.get_tensor_by_name("x:0")
            graph_y = graph.get_tensor_by_name("y:0")
            graph_context_layer = graph.get_tensor_by_name("cont_layer:0")
            #graph_softmax_loss = graph.get_operation_by_name("softmax_loss")
            '''
            model = BasicLSTM(saved_args, True)
            saver = tf.train.Saver()
            saver.restore(sess, checkpoint)
            '''
            

            data_loader.reset_batch_pointer()
            x_batch_test, y_batch_test, unk_count_test, n_sent_test, senss_test = data_loader.next_batch_test(collect_sense=True)
            feed_dict_test = {graph_x: x_batch_test, graph_y: y_batch_test}
            data_loader.reset_batch_pointer()

            xs = []
            ys = []
            senses = []
            data_loader.reset_batch_pointer()
            for i in range(data_loader.num_batches):
                x_batch, y_batch, unk_count, n_sent, senss = data_loader.next_batch(collect_sense=True, shuffle=False)
                feed_dict = {graph_x: x_batch, graph_y: y_batch}
                wordVecs = sess.run(graph_context_layer, feed_dict)
                
                n_sents = len(y_batch)
                for j in range(n_sents):
                    if y_batch[j] != -1:
                        xs.append(wordVecs[j])
                        ys.append(y_batch[j])
                        senses.append(senss[j])
                    

            #print(xs[0].shape)
            n_words = np.max(ys) + 1
            n_examples = len(ys)
            

            sense_vects = {}
            for i in range(n_examples):
                if senses[i] in sense_vects:
                    sense_vects[senses[i]].append(xs[i])
                else:
                    sense_vects[senses[i]] = [xs[i]]

            sense_keys = sense_vects.keys()

            sense_train_counts = {}
            for key in sense_keys:
                sense_train_counts[key] = len(sense_vects[key][:])
                sense_vects[key] = np.mean(sense_vects[key], axis=0)

            xs_test = []
            ys_test = []
            senses_test= []
            for i in range(data_loader.num_batches):
                
                wordVecs = sess.run(graph_context_layer, feed_dict_test)
                
                n_sents = len(y_batch_test)
                for j in range(n_sents):
                    if y_batch_test[j] != -1:
                        xs_test.append(wordVecs[j])
                        ys_test.append(y_batch_test[j])
                        senses_test.append(senss_test[j])
            
            n_tests = len(ys_test)
            corr = 0
            for i in range(n_tests):
                ambig_word = data_loader.verbs_idx[ys_test[i]]
                ambig_word = data_loader.words[ambig_word]
                correct_sense = senses_test[i]
                if ambig_word in words_sense:
                    max_cos = 0
                    for sense in words_sense[ambig_word]:
                        if sense in sense_idx:
                            sen = sense_idx[sense]
                            if sen in sense_vects:
                                print(cosine_similarity(sense_vects[sen], xs_test[i]))
                                exit()

                loss, contextVecs = sess.run(graph_context_layer, feed_dict)
                print(contextVecs.shape)
                exit()


    print("Saved prediction to {}".format(out_path))
    print("Total run time: {}s".format(time.time() - start))
def train(args):

    display_step = 100
    num_train = 20000;
    train_input, train_output, train_length, max_length = get_training_data(args, 'train', num_train, 0)
    test_input, test_output, test_length, max_length = get_training_data(args, 'test', 25000, 50000)
    val_input, val_output, val_length, max_length = get_training_data(args, 'val', 25000, 75000)

    #for i in range(2):
    #  print('i: ' + str(i) + ' => ' + str(train_input[i,:]))

    train_input = train_input.astype(int)

    data_loader = TextLoader(args.data_dir, args.batch_size, args.seq_length)
    args.vocab_size = 50000 #data_loader.vocab_size
    
    # check compatibility if training is continued from previously saved model
    if args.init_from is not None:
        # check if all necessary files exist 
        assert os.path.isdir(args.init_from)," %s must be a a path" % args.init_from
        assert os.path.isfile(os.path.join(args.init_from,"config.pkl")),"config.pkl file does not exist in path %s"%args.init_from
        assert os.path.isfile(os.path.join(args.init_from,"chars_vocab.pkl")),"chars_vocab.pkl.pkl file does not exist in path %s" % args.init_from
        ckpt = tf.train.get_checkpoint_state(args.init_from)
        assert ckpt,"No checkpoint found"
        assert ckpt.model_checkpoint_path,"No model path found in checkpoint"

        # open old config and check if models are compatible
        with open(os.path.join(args.init_from, 'config.pkl')) as f:
            saved_model_args = cPickle.load(f)
        need_be_same=["model","rnn_size","num_layers","seq_length"]
        for checkme in need_be_same:
            assert vars(saved_model_args)[checkme]==vars(args)[checkme],"Command line argument and saved model disagree on '%s' "%checkme
        
        # open saved vocab/dict and check if vocabs/dicts are compatible
        with open(os.path.join(args.init_from, 'chars_vocab.pkl')) as f:
            saved_chars, saved_vocab = cPickle.load(f)
        assert saved_chars==data_loader.chars, "Data and loaded model disagreee on character set!"
        assert saved_vocab==data_loader.vocab, "Data and loaded model disagreee on dictionary mappings!"
        
    with open(os.path.join(args.save_dir, 'config.pkl'), 'wb') as f:
        cPickle.dump(args, f)
    with open(os.path.join(args.save_dir, 'chars_vocab.pkl'), 'wb') as f:
        cPickle.dump((data_loader.chars, data_loader.vocab), f)
        
    model = Model(args)

    print("num_layers: ", args.num_layers)

    with tf.Session() as sess:
        tf.initialize_all_variables().run()
        saver = tf.train.Saver(tf.all_variables())
        # restore model
        if args.init_from is not None:
            saver.restore(sess, ckpt.model_checkpoint_path)
        for e in range(args.num_epochs):
            sess.run(tf.assign(model.lr, args.learning_rate * (args.decay_rate ** e)))
            data_loader.reset_batch_pointer()
            state = model.initial_state.eval()

            step = 0
            ptr = 0

	    print('train_input: ', train_input.shape)

            while step < num_train/args.batch_size:
                b = step
            #for b in range(data_loader.num_batches):
		step += 1
                start = time.time()

	        # inputs batch
	        x = np.squeeze(train_input[ptr:ptr+args.batch_size, :args.batch_size])

	        # output batch
	        y = np.squeeze(train_input[ptr:ptr+args.batch_size, 1:args.batch_size+1])
		ptr += args.batch_size+1
                #x, y = data_loader.next_batch()
		#print('x: ', x.shape)
		#print('y: ', y.shape)
		#print('x: ', x[1])
		#print('y: ', y)
                feed = {model.input_data: x, model.targets: y, model.initial_state: state}
                tt, calc_res, reg_cost, train_loss, state, _ = sess.run([model.target_vector, model.logits, model.reg_cost, model.cost, model.final_state, model.train_op], feed)
		print('out len: ', len(tt))
		print('target: ', tt)
		print('calc_res: ', calc_res)
                end = time.time()
                print("{}/{} (epoch {}), train_loss = {:.3f}, time/batch = {:.3f}, reg_cost = {:.3f}" \
                    .format(e * data_loader.num_batches + b,
                            args.num_epochs * data_loader.num_batches,
                            e, train_loss, end - start, reg_cost))
                if (e * data_loader.num_batches + b) % args.save_every == 0\
                    or (e==args.num_epochs-1 and b == data_loader.num_batches-1): # save for the last result
                    checkpoint_path = os.path.join(args.save_dir, 'model.ckpt')
                    saver.save(sess, checkpoint_path, global_step = e * data_loader.num_batches + b)
                    print("model saved to {}".format(checkpoint_path))

		if step % display_step == 0:
		    print('x: ', x[1])
Exemple #26
0
def train(args):
 
    data_loader = TextLoader(args.data_dir, args.batch_size, args.seq_length)
    args.vocab_size = data_loader.vocab_size

    load_model = False
    if not os.path.exists(args.save_dir):
        print("Creating directory %s" % args.save_dir)
        os.mkdir(args.save_dir)
    elif (os.path.exists(os.path.join(args.save_dir, 'config.pkl'))):
        # Trained model already exists
        ckpt = tf.train.get_checkpoint_state(args.save_dir)
        if ckpt and ckpt.model_checkpoint_path:
            with open(os.path.join(args.save_dir, 'config.pkl'), 'rb') as f:
                saved_args = pickle.load(f)
                args.block_size = saved_args.block_size
                args.num_blocks = saved_args.num_blocks
                args.num_layers = saved_args.num_layers
                args.model = saved_args.model
                print("Found a previous checkpoint. Overwriting model description arguments to:")
                print(" model: {}, block_size: {}, num_blocks: {}, num_layers: {}".format(
                    saved_args.model, saved_args.block_size, saved_args.num_blocks, saved_args.num_layers))
                load_model = True

   
    with open(os.path.join(args.save_dir, 'config.pkl'), 'wb') as f:
        pickle.dump(args, f)
       with open(os.path.join(args.save_dir, 'chars_vocab.pkl'), 'wb') as f:
        pickle.dump((data_loader.chars, data_loader.vocab), f)

   
    print("Building the model")
    model = Model(args)
    print("Total trainable parameters: {:,d}".format(model.trainable_parameter_count()))
    

    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

    config = tf.ConfigProto(log_device_placement=False)
   
    with tf.Session(config=config) as sess:
        tf.global_variables_initializer().run()
        saver = tf.train.Saver(model.save_variables_list(), max_to_keep=3)
        if (load_model):
            print("Loading saved parameters")
            saver.restore(sess, ckpt.model_checkpoint_path)
        global_epoch_fraction = sess.run(model.global_epoch_fraction)
        global_seconds_elapsed = sess.run(model.global_seconds_elapsed)
        if load_model: print("Resuming from global epoch fraction {:.3f},"
                " total trained time: {}, learning rate: {}".format(
                global_epoch_fraction,
                datetime.timedelta(seconds=float(global_seconds_elapsed)),
                sess.run(model.lr)))
        if (args.set_learning_rate > 0):
            sess.run(tf.assign(model.lr, args.set_learning_rate))
            print("Reset learning rate to {}".format(args.set_learning_rate))
        data_loader.cue_batch_pointer_to_epoch_fraction(global_epoch_fraction)
        initial_batch_step = int((global_epoch_fraction
                - int(global_epoch_fraction)) * data_loader.total_batch_count)
        epoch_range = (int(global_epoch_fraction),
                args.num_epochs + int(global_epoch_fraction))
        writer = tf.summary.FileWriter(args.save_dir, graph=tf.get_default_graph())
        outputs = [model.cost, model.final_state, model.train_op, model.summary_op]
        global_step = epoch_range[0] * data_loader.total_batch_count + initial_batch_step
        avg_loss = 0
        avg_steps = 0
        try:
            for e in range(*epoch_range):
              
                state = sess.run(model.zero_state)
                batch_range = (initial_batch_step, data_loader.total_batch_count)
                initial_batch_step = 0
                for b in range(*batch_range):
                    global_step += 1
                    if global_step % args.decay_steps == 0:
                        
                        current_learning_rate = sess.run(model.lr)
                        current_learning_rate *= args.decay_rate
                        sess.run(tf.assign(model.lr, current_learning_rate))
                        print("Decayed learning rate to {}".format(current_learning_rate))
                    start = time.time()
                  
                    x, y = data_loader.next_batch()

                    feed = {model.input_data: x, model.targets: y}
                    model.add_state_to_feed_dict(feed, state)
                    
                   
                    train_loss, state, _, summary = sess.run(outputs, feed)
                    elapsed = time.time() - start
                    global_seconds_elapsed += elapsed
                    writer.add_summary(summary, e * batch_range[1] + b + 1)
                    if avg_steps < 100: avg_steps += 1
                    avg_loss = 1 / avg_steps * train_loss + (1 - 1 / avg_steps) * avg_loss
                    print("{:,d} / {:,d} (epoch {:.3f} / {}), loss {:.3f} (avg {:.3f}), {:.3f}s" \
                        .format(b, batch_range[1], e + b / batch_range[1], epoch_range[1],
                            train_loss, avg_loss, elapsed))
                 
                    if (e * batch_range[1] + b + 1) % args.save_every == 0 \
                            or (e == epoch_range[1] - 1 and b == batch_range[1] - 1):
                        save_model(sess, saver, model, args.save_dir, global_step,
                                data_loader.total_batch_count, global_seconds_elapsed)
        except KeyboardInterrupt:
        
            print()
        finally:
            writer.flush()
            global_step = e * data_loader.total_batch_count + b
            save_model(sess, saver, model, args.save_dir, global_step,
                    data_loader.total_batch_count, global_seconds_elapsed)
Exemple #27
0
def train(args):
    print(args)
    data_loader = TextLoader(args.data_dir, args.batch_size, args.seq_length, args.training_data_ratio)
    args.vocab_size = data_loader.vocab_size

    with open(os.path.join(args.save_dir, 'config.pkl'), 'wb') as f:
        cPickle.dump(args, f)
    with open(os.path.join(args.save_dir, 'chars_vocab.pkl'), 'wb') as f:
        cPickle.dump((data_loader.chars, data_loader.vocab), f)

    model = Model(args)

    #sess = tf.InteractiveSession()
    with tf.Session() as sess:
        tf.initialize_all_variables().run()
        saver = tf.train.Saver(tf.all_variables())

        # Build the summary operation based on the TF collection of Summaries.
        summary_op = tf.merge_all_summaries()
        summary_writer = tf.train.SummaryWriter('/tmp', sess.graph)

        step = 0
        for e in range(args.num_epochs):
            sess.run(tf.assign(model.lr, args.learning_rate * (args.decay_rate ** e)))
            #print("model learning rate is {}".format(model.lr.eval()))
            data_loader.reset_batch_pointer('train')

            state = model.initial_state.eval()
            for b in xrange(data_loader.ntrain):
                start = time.time()
                x, y = data_loader.next_batch('train')

                feed = {model.input_data: x, model.targets: y, model.initial_state: state}
                train_loss, state, _ = sess.run([model.cost, model.final_state, model.train_op], feed)
                end = time.time()
                step = e * data_loader.ntrain + b
                print("{}/{} (epoch {}), train_loss = {:.3f}, time/batch = {:.3f}" \
                    .format(step,
                            args.num_epochs * data_loader.ntrain,
                            e, train_loss, end - start))

                if step % args.write_summary_every == 0:
                    # training loss
                    summary_str = sess.run(summary_op, feed_dict=feed)
                    summary_writer.add_summary(summary_str, step)

                if step % args.save_every == 0 or (step + 1) == (args.num_epochs * data_loader.ntrain):
                    # eval validation loss
                    data_loader.reset_batch_pointer('validation')
                    validation_state = model.initial_state.eval()
                    val_losses = 0
                    for n in xrange(data_loader.nvalidation):
                        x, y = data_loader.next_batch('validation')
                        val_feed = {model.input_data: x, model.targets: y, model.initial_state: validation_state}
                        validation_loss, validation_state = sess.run([model.cost, model.final_state], val_feed)
                        val_losses += validation_loss

                    validation_loss = val_losses / data_loader.nvalidation
                    print("validation loss is {}".format(validation_loss))

                    # write top 5 validation loss to a json file
                    args_dict = vars(args)
                    args_dict['step'] = step
                    val_loss_file = args.save_dir + '/val_loss.json'
                    loss_json = ''
                    save_new_checkpoint = False
                    time_int = int(time.time())
                    args_dict['checkpoint_path'] = os.path.join(args.save_dir, 'model.ckpt-'+str(time_int))
                    if os.path.exists(val_loss_file):
                        with open(val_loss_file, "r") as text_file:
                            text = text_file.read()
                            if text == '':
                                loss_json = {validation_loss: args_dict}
                                save_new_checkpoint = True
                            else:
                                loss_json = json.loads(text)
                                losses = loss_json.keys()
                                if len(losses) > 3:
                                    losses.sort(key=lambda x: float(x), reverse=True)
                                    loss = losses[0]
                                    if validation_loss < float(loss):
                                        to_be_remove_ckpt_file_path =  loss_json[loss]['checkpoint_path']
                                        to_be_remove_ckpt_meta_file_path = to_be_remove_ckpt_file_path + '.meta'
                                        print("removed checkpoint {}".format(to_be_remove_ckpt_file_path))
                                        if os.path.exists(to_be_remove_ckpt_file_path):
                                            os.remove(to_be_remove_ckpt_file_path)
                                        if os.path.exists(to_be_remove_ckpt_meta_file_path):
                                            os.remove(to_be_remove_ckpt_meta_file_path)
                                        del(loss_json[loss])
                                        loss_json[validation_loss] = args_dict
                                        save_new_checkpoint = True
                                else:
                                    loss_json[validation_loss] = args_dict
                                    save_new_checkpoint = True
                    else:
                       loss_json = {validation_loss: args_dict}
                       save_new_checkpoint = True

                    if save_new_checkpoint:
                        checkpoint_path = os.path.join(args.save_dir, 'model.ckpt')
                        saver.save(sess, checkpoint_path, global_step = time_int)
                        print("model saved to {}".format(checkpoint_path + '-' + str(time_int)))

                        with open(val_loss_file, "w") as text_file:
                            json.dump(loss_json, text_file)
Exemple #28
0
def main(_):
  pp.pprint(FLAGS.__flags)

  if not os.path.exists(FLAGS.checkpoint_dir):
    print(" [*] Creating checkpoint directory...")
    os.makedirs(FLAGS.checkpoint_dir)

  data_loader = TextLoader(os.path.join(FLAGS.data_dir, FLAGS.dataset_name),
                           FLAGS.batch_size, FLAGS.seq_length)
  vocab_size = data_loader.vocab_size
  valid_size = 50
  valid_window = 100

  with tf.variable_scope('model'):
    train_model = CharRNN(vocab_size, FLAGS.batch_size, FLAGS.rnn_size,
                          FLAGS.layer_depth, FLAGS.num_units, FLAGS.rnn_type,
                          FLAGS.seq_length, FLAGS.keep_prob,
                          FLAGS.grad_clip)

  with tf.variable_scope('model', reuse=True):
    simple_model = CharRNN(vocab_size, 1, FLAGS.rnn_size,
                           FLAGS.layer_depth, FLAGS.num_units, FLAGS.rnn_type,
                           1, FLAGS.keep_prob,
                           FLAGS.grad_clip)

  with tf.variable_scope('model', reuse=True):
    valid_model = CharRNN(vocab_size, FLAGS.batch_size, FLAGS.rnn_size,
                          FLAGS.layer_depth, FLAGS.num_units, FLAGS.rnn_type,
                          FLAGS.seq_length, FLAGS.keep_prob,
                          FLAGS.grad_clip)

  with tf.Session() as sess:
    tf.global_variables_initializer().run()

    train_model.load(sess, FLAGS.checkpoint_dir, FLAGS.dataset_name)

    best_val_pp = float('inf')
    best_val_epoch = 0
    valid_loss = 0
    valid_perplexity = 0
    start = time.time()

    if FLAGS.export:
      print("Eval...")
      final_embeddings = train_model.embedding.eval(sess)
      emb_file = os.path.join(FLAGS.data_dir, FLAGS.dataset_name, 'emb.npy')
      print("Embedding shape: {}".format(final_embeddings.shape))
      np.save(emb_file, final_embeddings)

    else: # Train
      current_step = 0
      similarity, valid_examples, _ = compute_similarity(train_model, valid_size, valid_window, 6)

      # save hyper-parameters
      cPickle.dump(FLAGS.__flags, open(FLAGS.log_dir + "/hyperparams.pkl", 'wb'))

      # run it!
      for e in range(FLAGS.num_epochs):
        data_loader.reset_batch_pointer()

        # decay learning rate
        sess.run(tf.assign(train_model.lr, FLAGS.learning_rate))

        # iterate by batch
        for b in range(data_loader.num_batches):
          x, y = data_loader.next_batch()
          res, time_batch = run_epochs(sess, x, y, train_model)
          train_loss = res["loss"]
          train_perplexity = np.exp(train_loss)
          iterate = e * data_loader.num_batches + b

          # print log
          print("{}/{} (epoch {}) loss = {:.2f}({:.2f}) perplexity(train/valid) = {:.2f}({:.2f}) time/batch = {:.2f} chars/sec = {:.2f}k"\
              .format(e * data_loader.num_batches + b,
                      FLAGS.num_epochs * data_loader.num_batches,
                      e, train_loss, valid_loss, train_perplexity, valid_perplexity,
                      time_batch, (FLAGS.batch_size * FLAGS.seq_length) / time_batch / 1000))

          current_step = tf.train.global_step(sess, train_model.global_step)

        # validate
        valid_loss = 0

        for vb in range(data_loader.num_valid_batches):
          res, valid_time_batch = run_epochs(sess, data_loader.x_valid[vb], data_loader.y_valid[vb], valid_model, False)
          valid_loss += res["loss"]

        valid_loss = valid_loss / data_loader.num_valid_batches
        valid_perplexity = np.exp(valid_loss)

        print("### valid_perplexity = {:.2f}, time/batch = {:.2f}".format(valid_perplexity, valid_time_batch))

        log_str = ""

        # Generate sample
        smp1 = simple_model.sample(sess, data_loader.chars, data_loader.vocab, UNK_ID, 5, u"我喜歡做")
        smp2 = simple_model.sample(sess, data_loader.chars, data_loader.vocab, UNK_ID, 5, u"他吃飯時會用")
        smp3 = simple_model.sample(sess, data_loader.chars, data_loader.vocab, UNK_ID, 5, u"人類總要重複同樣的")
        smp4 = simple_model.sample(sess, data_loader.chars, data_loader.vocab, UNK_ID, 5, u"天色暗了,好像快要")

        log_str = log_str + smp1 + "\n"
        log_str = log_str + smp2 + "\n"
        log_str = log_str + smp3 + "\n"
        log_str = log_str + smp4 + "\n"

        # Write a similarity log
        # Note that this is expensive (~20% slowdown if computed every 500 steps)
        sim = similarity.eval()
        for i in range(valid_size):
          valid_word = data_loader.chars[valid_examples[i]]
          top_k = 8 # number of nearest neighbors
          nearest = (-sim[i, :]).argsort()[1:top_k+1]
          log_str = log_str + "Nearest to %s:" % valid_word
          for k in range(top_k):
            close_word = data_loader.chars[nearest[k]]
            log_str = "%s %s," % (log_str, close_word)
          log_str = log_str + "\n"
        print(log_str)

        # Write to log
        text_file = codecs.open(FLAGS.log_dir + "/similarity.txt", "w", "utf-8")
        text_file.write(log_str)
        text_file.close()

        if valid_perplexity < best_val_pp:
          best_val_pp = valid_perplexity
          best_val_epoch = iterate

          # save best model
          train_model.save(sess, FLAGS.checkpoint_dir, FLAGS.dataset_name)
          print("model saved to {}".format(FLAGS.checkpoint_dir))

        # early_stopping
        if iterate - best_val_epoch > FLAGS.early_stopping:
          print('Total time: {}'.format(time.time() - start))
          break
def train(args):
    model_name = args.data_dir.split("/")[-1]
    # make a dir to store checkpoints
    args.save_dir = os.path.join('checkpoints', model_name)
    if not os.path.exists(args.save_dir):
        os.makedirs(args.save_dir)
    
    data_loader = TextLoader(args.data_dir, args.batch_size, args.seq_length)
    args.vocab_size = data_loader.vocab_size

    # check compatibility if training is continued from previously saved model
    if args.init_from is not None:
        # check if all necessary files exist
        assert os.path.isdir(args.init_from)," %s must be a a path" % args.init_from
        assert os.path.isfile(os.path.join(args.init_from,"config.pkl")),"config.pkl file does not exist in path %s"%args.init_from
        assert os.path.isfile(os.path.join(args.init_from,"chars_vocab.pkl")),"chars_vocab.pkl.pkl file does not exist in path %s" % args.init_from
        ckpt = tf.train.get_checkpoint_state(args.init_from)
        assert ckpt, "No checkpoint found"
        assert ckpt.model_checkpoint_path, "No model path found in checkpoint"

        # open old config and check if models are compatible
        with open(os.path.join(args.init_from, 'config.pkl'), 'rb') as f:
            saved_model_args = cPickle.load(f)
        need_be_same = ["model", "rnn_size", "num_layers", "seq_length"]
        for checkme in need_be_same:
            assert vars(saved_model_args)[checkme]==vars(args)[checkme],"Command line argument and saved model disagree on '%s' "%checkme

        # open saved vocab/dict and check if vocabs/dicts are compatible
        with open(os.path.join(args.init_from, 'chars_vocab.pkl'), 'rb') as f:
            saved_chars, saved_vocab = cPickle.load(f)
        assert saved_chars==data_loader.chars, "Data and loaded model disagree on character set!"
        assert saved_vocab==data_loader.vocab, "Data and loaded model disagree on dictionary mappings!"

    if not os.path.isdir(args.save_dir):
        os.makedirs(args.save_dir)
    with open(os.path.join(args.save_dir, 'config.pkl'), 'wb') as f:
        cPickle.dump(args, f)
    with open(os.path.join(args.save_dir, 'chars_vocab.pkl'), 'wb') as f:
        cPickle.dump((data_loader.chars, data_loader.vocab), f)

    model = Model(args)

    with tf.Session() as sess:
        # instrument for tensorboard
        summaries = tf.summary.merge_all()
        writer = tf.summary.FileWriter(
                os.path.join(args.log_dir, time.strftime("%Y-%m-%d-%H-%M-%S")))
        writer.add_graph(sess.graph)

        sess.run(tf.global_variables_initializer())
        saver = tf.train.Saver(tf.global_variables())
        # restore model
        if args.init_from is not None:
            saver.restore(sess, ckpt.model_checkpoint_path)
        for e in range(args.num_epochs):
            sess.run(tf.assign(model.lr, args.learning_rate * (args.decay_rate ** e)))
            data_loader.reset_batch_pointer()
            state = sess.run(model.initial_state)
            for b in range(data_loader.num_batches):
                start = time.time()
                x, y = data_loader.next_batch()
                feed = {model.input_data: x, model.targets: y}
                for i, (c, h) in enumerate(model.initial_state):
                    feed[c] = state[i].c
                    feed[h] = state[i].h

                # instrument for tensorboard
                summ, train_loss, state, _ = sess.run([summaries, model.cost, model.final_state, model.train_op], feed)
                writer.add_summary(summ, e * data_loader.num_batches + b)

                end = time.time()
                print("{}/{} (epoch {}), train_loss = {:.3f}, time/batch = {:.3f}"
                      .format(e * data_loader.num_batches + b,
                              args.num_epochs * data_loader.num_batches,
                              e, train_loss, end - start))
                if (e * data_loader.num_batches + b) % args.save_every == 0\
                        or (e == args.num_epochs-1 and b == data_loader.num_batches-1):
                    # remove previous checkpoints
                    current_checkpoints = [f for f in os.listdir(args.save_dir) if os.path.isfile(os.path.join(args.save_dir, f))]
                    for f in current_checkpoints:
                        if model_name in f:
                            os.remove(os.path.join(args.save_dir, f))
                    # save for the last result
                    checkpoint_path = os.path.join(args.save_dir, model_name)
                    saver.save(sess, checkpoint_path, global_step=e * data_loader.num_batches + b)
                    final_model = '{}-{}'.format(model_name, e * data_loader.num_batches + b)
                    print("model saved to {}".format(checkpoint_path))

    # get the vocab
    model_vocab = getModelVocab(model_name)
    # dump the checkpoints to javascript
    dump_checkpoints(model_vocab, model_name, final_model)
Exemple #30
0
def train(args):
    data_loader = TextLoader(args.data_dir, args.batch_size, args.seq_length)
    args.vocab_size = data_loader.vocab_size

    # check compatibility if training is continued from previously saved model
    if args.init_from is not None:
        # check if all necessary files exist
        assert os.path.isdir(
            args.init_from), " %s must be a a path" % args.init_from
        assert os.path.isfile(
            os.path.join(args.init_from, "config.pkl")
        ), "config.pkl file does not exist in path %s" % args.init_from
        assert os.path.isfile(
            os.path.join(args.init_from, "chars_vocab.pkl")
        ), "chars_vocab.pkl.pkl file does not exist in path %s" % args.init_from
        ckpt = tf.train.get_checkpoint_state(args.init_from)
        assert ckpt, "No checkpoint found"
        assert ckpt.model_checkpoint_path, "No model path found in checkpoint"

        # open old config and check if models are compatible
        #UPDATE :  Added 'rb' to read back in correcr format
        with open(os.path.join(args.init_from, 'config.pkl'), 'rb') as f:
            saved_model_args = pickle.load(f)
        need_be_same = ["model", "rnn_size", "num_layers", "seq_length"]
        for checkme in need_be_same:
            assert vars(saved_model_args)[checkme] == vars(
                args
            )[checkme], "Command line argument and saved model disagree on '%s' " % checkme

        # open saved vocab/dict and check if vocabs/dicts are compatible
        with open(os.path.join(args.init_from, 'chars_vocab.pkl'), 'rb') as f:
            saved_chars, saved_vocab = pickle.load(f)
        assert saved_chars == data_loader.chars, "Data and loaded model disagree on character set!"
        assert saved_vocab == data_loader.vocab, "Data and loaded model disagree on dictionary mappings!"

    with open(os.path.join(args.save_dir, 'config.pkl'), 'wb') as f:
        pickle.dump(args, f)
    with open(os.path.join(args.save_dir, 'chars_vocab.pkl'), 'wb') as f:
        pickle.dump((data_loader.chars, data_loader.vocab), f)

    model = Model(args)

    with tf.Session() as sess:
        tf.initialize_all_variables().run()
        saver = tf.train.Saver(tf.all_variables())
        # restore model
        if args.init_from is not None:
            saver.restore(sess, ckpt.model_checkpoint_path)
        for e in range(args.num_epochs):
            sess.run(
                tf.assign(model.lr, args.learning_rate * (args.decay_rate**e)))
            data_loader.reset_batch_pointer()
            state = model.initial_state.eval()
            for b in range(data_loader.num_batches):
                start = time.time()
                x, y = data_loader.next_batch()
                feed = {
                    model.input_data: x,
                    model.targets: y,
                    model.initial_state: state
                }
                train_loss, state, _ = sess.run(
                    [model.cost, model.final_state, model.train_op], feed)
                end = time.time()
                print("{}/{} (epoch {}), train_loss = {:.3f}, time/batch = {:.3f}" \
                    .format(e * data_loader.num_batches + b,
                            args.num_epochs * data_loader.num_batches,
                            e, train_loss, end - start))
                if (e * data_loader.num_batches + b) % args.save_every == 0\
                    or (e==args.num_epochs-1 and b == data_loader.num_batches-1): # save for the last result
                    checkpoint_path = os.path.join(args.save_dir, 'model.ckpt')
                    saver.save(sess,
                               checkpoint_path,
                               global_step=e * data_loader.num_batches + b)
                    print("model saved to {}".format(checkpoint_path))
def train(args):
    print("training on \'"+args.data_dir+"\'")
    data_loader = TextLoader(args.data_dir, args.batch_size, args.seq_length)
    args.vocab_size = data_loader.vocab_size
    
    # check compatibility if training is continued from previously saved model
    if args.init_from is not None:
        print("RELOADING FROM CHECKPOING")
        # check if all necessary files exist 
        assert os.path.isdir(args.init_from)," %s must be a a path" % args.init_from
        assert os.path.isfile(os.path.join(args.init_from,"config.pkl")),"config.pkl file does not exist in path %s"%args.init_from
        assert os.path.isfile(os.path.join(args.init_from,"chars_vocab.pkl")),"chars_vocab.pkl.pkl file does not exist in path %s" % args.init_from
        ckpt = tf.train.get_checkpoint_state(args.init_from)
        assert ckpt,"No checkpoint found"
        assert ckpt.model_checkpoint_path,"No model path found in checkpoint"

        # open old config and check if models are compatible
        with open(os.path.join(args.init_from, 'config.pkl')) as f:
            saved_model_args = cPickle.load(f)
        need_be_same=["model","rnn_size","num_layers","seq_length"]
        for checkme in need_be_same:
            assert vars(saved_model_args)[checkme]==vars(args)[checkme],"Command line argument and saved model disagree on '%s' "%checkme
        
        # open saved vocab/dict and check if vocabs/dicts are compatible
        with open(os.path.join(args.init_from, 'chars_vocab.pkl')) as f:
            saved_chars, saved_vocab = cPickle.load(f)
        assert saved_chars==data_loader.chars, "Data and loaded model disagreee on character set!"
        assert saved_vocab==data_loader.vocab, "Data and loaded model disagreee on dictionary mappings!"

    with open(os.path.join(args.save_dir, 'config.pkl'), 'wb') as f:
        cPickle.dump(args, f)
    with open(os.path.join(args.save_dir, 'chars_vocab.pkl'), 'wb') as f:
        cPickle.dump((data_loader.chars, data_loader.vocab), f)

    print("====================================")
    printargs(args)
    print("====================================")
    model = Model(args)

    def validateonce(expectationdropout=True, TrueIfVal_FalseIfTrain=True):
        data_loader.reset_batch_pointers()
        model.resetweights(expectationdropout=expectationdropout)
        state = model.resetstate()
        start = time.time()
        losses = []
        backupptrtr = data_loader.pointer_tr
        entrps = None
        truths = None
        allprobs = None
        for b in range(data_loader.num_batches_te):
            if TrueIfVal_FalseIfTrain:
                x, y = data_loader.next_batch_te()
            else:
                x, y = data_loader.next_batch_tr()
            # shapes of x and y are (batchsize, seqlength); each element is an integer from 0 to (vocabsize-1)
            feed = {model.input_data: x, model.targets: y, model.initial_state: state}
            feed = model.extrafeed(feed)
            state, probs, entropies = sess.run([model.final_state, model.probs, model.pred_entropy], feed)
            theseprobs = np.reshape(probs, (1, args.batch_size, args.seq_length, args.vocab_size))
            thesey = np.reshape(y, (args.batch_size, args.seq_length))
            allprobs = tryconcat(allprobs, theseprobs, axis=2)
            truths = tryconcat(truths, thesey, axis=1)
            y = y.flatten()
            for ii in range(y.size):
                losses.append(-np.log2(probs[ii,y[ii]]))
            thesentropies = np.reshape(entropies,(1,args.batch_size,args.seq_length))
            entrps = tryconcat(entrps, thesentropies, axis=2)
        data_loader.pointer_tr = backupptrtr
        end = time.time()
        testtimeperbatch = (end-start) / float(data_loader.num_batches_te)
        return (np.array(losses), truths, entrps, allprobs, testtimeperbatch)

    # for tensorboard
    valsumplh_cost = tf.placeholder(tf.float32, (1,), name="validation_summary_placeholder_cost")
    valsumplh_pent = tf.placeholder(tf.float32, (1,), name="validation_summary_placeholder_prediction_entropy")
    #reduce_sum fixes tensorflow scalar handling being weird (vector of size 1)
    valsumscs_cost = tf.scalar_summary('cost_val', tf.reduce_sum(valsumplh_cost))
    valsumscs_pent = tf.scalar_summary('prediction_entropy_val', tf.reduce_sum(valsumplh_pent))
    sumwriter = tf.train.SummaryWriter(args.save_dir, graph=tf.get_default_graph())
    
    befstarttime = time.time()
    
    with tf.Session() as sess:
        tf.initialize_all_variables().run()
        saver = tf.train.Saver(tf.all_variables())

        print("====================================")
        allvars = tf.all_variables()
        trainablevars = tf.trainable_variables()
        trainableMB = 0
        for tvar in allvars:
            #print(type(tvar))
            #print(tvar.name+" -- "+str(tvar.dtype)+" -- "+str(tvar.get_shape()))
            if tvar in trainablevars:
                print("@@@ "+tvar.name+" -- "+str(tvar.get_shape()))
                trainableMB += 4*tvar.get_shape().num_elements()
            else:
                print(tvar.name+" -- "+str(tvar.get_shape()))
        print(" ")
        print("trainable megabytes: "+str(float(trainableMB)/1e6))
        print("====================================")

        # restore model
        if args.init_from is not None:
            saver.restore(sess, ckpt.model_checkpoint_path)
        for e in range(args.num_epochs):
            # train model
            newlr = args.learning_rate * (args.decay_rate ** e)
            sess.run(tf.assign(model.lr, newlr))
            data_loader.reset_batch_pointers()
            model.resetweights()
            state = model.resetstate()
            for b in range(data_loader.num_batches_tr):
                model.resetweights() # reset weights at every gradient descent iteration,
                                    # but don't necessarily reset the state
                dovalidate = False
                #if b == (data_loader.num_batches_tr - 1):
                #    dovalidate = True
                x, y = data_loader.next_batch_tr()
                # shapes of x and y are (batchsize, seqlength); each element is an integer from 0 to (vocabsize-1)
                feed = {model.input_data: x, model.targets: y, model.initial_state: state}
                feed = model.extrafeed(feed)
                start = time.time()
                train_loss, state, _, summary = sess.run([model.cost, model.final_state, model.train_op, model.tbsummary], feed)
                end = time.time()
                bidx = e * data_loader.num_batches_tr + b
                sumwriter.add_summary(summary, bidx)
                epstr = "{}/{} (epoch {})".format(bidx, args.num_epochs * data_loader.num_batches_tr, e+1)
                if bidx % 100 == 0:
                    print(epstr + ", train_loss = {:.3f}, time/batch = {:.3f}, lr = {:.3f}".format(train_loss, end - start, newlr))
                if bidx % args.save_every == 0\
                    or (e==args.num_epochs-1 and b == data_loader.num_batches_tr-1): # save for the last result
                    checkpoint_path = os.path.join(args.save_dir, 'model.ckpt')
                    saver.save(sess, checkpoint_path, global_step = bidx)
                    print(epstr+", model saved to {}".format(checkpoint_path))
                    dovalidate = True
                if b > 0 and b % args.reset_every == 0:
                    state = model.resetstate()
                    #print(epstr+", reset state in the midst of a training epoch, at batch "+str(b+1)+"/"+str(data_loader.num_batches_tr))
                # validate model?
                if dovalidate:
                    valstr = ""
                    befvaltime = time.time()
                    if False and args.dropout > 1e-3:
                        testlosses = None
                        ytruths = None
                        meanpredentrops = None
                        meanprobdistrs = None
                        testtimeperbatch = 0.0
                        if e > 95:
                            niters = 29
                        else:
                            niters = 5
                        for kk in range(niters):
                            theselosses, thesetruths, theseentrops, theseprobs, thistimeperbatch = validateonce(expectationdropout=False)
                            testlosses = tryconcat(testlosses, theselosses, axis=0)
                            if meanprobdistrs is None:
                                meanprobdistrs = theseprobs
                                meanpredentrops = theseentrops
                            else:
                                meanprobdistrs += theseprobs
                                meanpredentrops += theseentrops
                            if ytruths is None:
                                ytruths = thesetruths
                            testtimeperbatch += (thistimeperbatch / float(niters))
                            #print("kk == "+str(kk+1)+"/"+str(niters))
                        meanprobdistrs /= float(niters)
                        meanpredentrops /= float(niters)
                        entropvar = entropyvariance(args, meanprobdistrs, meanpredentrops, plotfig=1)
                        testloss = np.mean(testlosses)
                        testlossstd = np.std(testlosses)
                        rendertext('blue', args.save_dir, 'z_'+str(bidx)+'_JSdiv', ytruths, entropvar)
                        rendertext('blue', args.save_dir, 'z_'+str(bidx)+'_entrop', ytruths, np.reshape(meanpredentrops,(meanpredentrops.shape[1],meanpredentrops.shape[2])))
                        valpredentropy = np.mean(meanpredentrops)
                        valpredentrstd = np.std( meanpredentrops)
                        suffix = ", estimated from "+str(niters)+" MC samples"
                    else:
                        theselosses, _, theseentrops, _, testtimeperbatch = validateonce(expectationdropout=True, TrueIfVal_FalseIfTrain=False)
                        valstr += ", exp. tr. loss "+str(np.mean(theselosses))+", pred-ent "+str(np.mean(theseentrops))+" ("+str(testtimeperbatch)+" spb)"
                        theselosses, _, theseentrops, _, testtimeperbatch = validateonce(expectationdropout=True, TrueIfVal_FalseIfTrain=True)
                        testloss   = np.mean(theselosses)
                        testlossstd = np.std(theselosses)
                        valpredentropy = np.mean(theseentrops)
                        valpredentrstd = np.std( theseentrops)
                        suffix = ", MC expectation"
                    valstr += ", val loss "+str(testloss)+" w/std "+str(testlossstd)+", pred-ent "+str(valpredentropy)+" w/std "+str(valpredentrstd)+" ("+str(testtimeperbatch)+" spb)"+suffix

                    valsummary1 = sess.run([valsumscs_cost,], {valsumplh_cost:np.array(testloss).reshape((1,))})[0]
                    valsummary2 = sess.run([valsumscs_pent,], {valsumplh_pent:np.array(valpredentropy).reshape((1,))})[0]
                    sumwriter.add_summary(valsummary1, (e+1)*data_loader.num_batches_tr)
                    sumwriter.add_summary(valsummary2, (e+1)*data_loader.num_batches_tr)
                    
                    aftvaltime = time.time()
                    
                    print(epstr+valstr)
                    print("validation time: "+str(aftvaltime-befvaltime)+" sec")
Exemple #32
0
print(__doc__)

# 학습에 필요한 설정값들을 지정합니다.
data_dir = '_rnn_data'

#data_dir = 'data/linux'
batch_size = 50  # Training : 50, Sampling : 1
seq_length = 50  # Training : 50, Sampling : 1
hidden_size = 128  # 히든 레이어의 노드 개수
learning_rate = 0.002
num_epochs = 2
num_hidden_layers = 2
grad_clip = 5  # Gradient Clipping에 사용할 임계값

# TextLoader를 이용해서 데이터를 불러옵니다.
data_loader = TextLoader(data_dir, batch_size, seq_length)
# 학습데이터에 포함된 모든 단어들을 나타내는 변수인 chars와 chars에 id를 부여해 dict 형태로 만든 vocab을 선언합니다.
chars = data_loader.chars
vocab = data_loader.vocab
vocab_size = data_loader.vocab_size  # 전체 단어개수

# 인풋데이터와 타겟데이터, 배치 사이즈를 입력받기 위한 플레이스홀더를 설정합니다.
input_data = tf.placeholder(tf.int32,
                            shape=[None, None
                                   ])  # input_data : [batch_size, seq_length])
target_data = tf.placeholder(
    tf.int32, shape=[None, None])  # target_data : [batch_size, seq_length])
state_batch_size = tf.placeholder(tf.int32,
                                  shape=[])  # Training : 50, Sampling : 1

# RNN의 마지막 히든레이어의 출력을 소프트맥스 출력값으로 변환해주기 위한 변수들을 선언합니다.
Exemple #33
0
def train(args):
    data_loader = TextLoader(args.data_dir, args.batch_size, args.seq_length, args.input_encoding)
    args.vocab_size = data_loader.vocab_size

    # check compatibility if training is continued from previously saved model
    if args.init_from is not None:
        # check if all necessary files exist
        assert os.path.isdir(args.init_from)," %s must be a path" % args.init_from
        assert os.path.isfile(os.path.join(args.init_from,"config.pkl")),"config.pkl file does not exist in path %s"%args.init_from
        assert os.path.isfile(os.path.join(args.init_from,"words_vocab.pkl")),"words_vocab.pkl.pkl file does not exist in path %s" % args.init_from
        ckpt = tf.train.get_checkpoint_state(args.init_from)
        assert ckpt,"No checkpoint found"
        assert ckpt.model_checkpoint_path,"No model path found in checkpoint"

        # open old config and check if models are compatible
        with open(os.path.join(args.init_from, 'config.pkl'), 'rb') as f:
            saved_model_args = cPickle.load(f)
        need_be_same=["model","rnn_size","num_layers","seq_length"]
        for checkme in need_be_same:
            assert vars(saved_model_args)[checkme]==vars(args)[checkme],"Command line argument and saved model disagree on '%s' "%checkme

        # open saved vocab/dict and check if vocabs/dicts are compatible
        with open(os.path.join(args.init_from, 'words_vocab.pkl'), 'rb') as f:
            saved_words, saved_vocab = cPickle.load(f)
        assert saved_words==data_loader.words, "Data and loaded model disagree on word set!"
        assert saved_vocab==data_loader.vocab, "Data and loaded model disagree on dictionary mappings!"

    with open(os.path.join(args.save_dir, 'config.pkl'), 'wb') as f:
        cPickle.dump(args, f)
    with open(os.path.join(args.save_dir, 'words_vocab.pkl'), 'wb') as f:
        cPickle.dump((data_loader.words, data_loader.vocab), f)

    model = Model(args)

    merged = tf.summary.merge_all()
    train_writer = tf.summary.FileWriter(args.log_dir)
    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=args.gpu_mem)

    with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess:
        train_writer.add_graph(sess.graph)
        tf.global_variables_initializer().run()
        saver = tf.train.Saver(tf.global_variables())
        # restore model
        if args.init_from is not None:
            saver.restore(sess, ckpt.model_checkpoint_path)
        for e in range(model.epoch_pointer.eval(), args.num_epochs):
            sess.run(tf.assign(model.lr, args.learning_rate * (args.decay_rate ** e)))
            data_loader.reset_batch_pointer()
            state = sess.run(model.initial_state)
            speed = 0
            if args.init_from is None:
                assign_op = model.epoch_pointer.assign(e)
                sess.run(assign_op)
            if args.init_from is not None:
                data_loader.pointer = model.batch_pointer.eval()
                args.init_from = None
            for b in range(data_loader.pointer, data_loader.num_batches):
                start = time.time()
                x, y = data_loader.next_batch()
                feed = {model.input_data: x, model.targets: y, model.initial_state: state,
                        model.batch_time: speed}
                summary, train_loss, state, _, _ = sess.run([merged, model.cost, model.final_state,
                                                             model.train_op, model.inc_batch_pointer_op], feed)
                train_writer.add_summary(summary, e * data_loader.num_batches + b)
                speed = time.time() - start
                if (e * data_loader.num_batches + b) % args.batch_size == 0:
                    print("{}/{} (epoch {}), train_loss = {:.3f}, time/batch = {:.3f}" \
                        .format(e * data_loader.num_batches + b,
                                args.num_epochs * data_loader.num_batches,
                                e, train_loss, speed))
                if (e * data_loader.num_batches + b) % args.save_every == 0 \
                        or (e==args.num_epochs-1 and b == data_loader.num_batches-1): # save for the last result
                    checkpoint_path = os.path.join(args.save_dir, 'model.ckpt')
                    saver.save(sess, checkpoint_path, global_step = e * data_loader.num_batches + b)
                    print("model saved to {}".format(checkpoint_path))
        train_writer.close()
Exemple #34
0
def test(test_args):
    start = time.time()
    if test_args.json == "true":
        config_extension = "json"
    else:
        config_extension = "pkl"

    if test_args.json=="true":
        with open(os.path.join(test_args.save_dir, 'config.%s' %(config_extension)), 'r') as f:
            args_dict = json.load(f)
        args = Bunch(args_dict)
    else:   
        with open(os.path.join(test_args.save_dir, 'config.%s' %(config_extension)), 'rb') as f:
            args = pickle.load(f)

    args.save_dir = test_args.save_dir
    data_loader = TextLoader(args, train=False)
    test_data = data_loader.read_dataset(test_args.test_file)

    print(args.save_dir)
    print("Unit: " + args.unit)
    print("Composition: " + args.composition)

    args.word_vocab_size = data_loader.word_vocab_size
    if args.unit != "word":
        args.subword_vocab_size = data_loader.subword_vocab_size

    # Statistics of words
    print("Word vocab size: " + str(data_loader.word_vocab_size))

    # Statistics of sub units
    if args.unit != "word":
        print("Subword vocab size: " + str(data_loader.subword_vocab_size))
        if args.composition == "bi-lstm":
            if args.unit == "char":
                args.bilstm_num_steps = data_loader.max_word_len
                print("Max word length:", data_loader.max_word_len)
            elif args.unit == "char-ngram":
                args.bilstm_num_steps = data_loader.max_ngram_per_word
                print("Max ngrams per word:", data_loader.max_ngram_per_word)
            elif args.unit == "morpheme" or args.unit == "oracle":
                args.bilstm_num_steps = data_loader.max_morph_per_word
                print("Max morphemes per word", data_loader.max_morph_per_word)

    if args.unit == "word":
        lm_model = WordModel
    elif args.composition == "addition":
        lm_model = AdditiveModel
    elif args.composition == "bi-lstm":
        lm_model = BiLSTMModel
    else:
        sys.exit("Unknown unit or composition.")

    print("Begin testing...")
    with tf.Graph().as_default(), tf.Session() as sess:
        with tf.variable_scope("model"):
            mtest = lm_model(args, is_training=False, is_testing=True)

        # save only the last model
        saver = tf.train.Saver(tf.all_variables(), max_to_keep=1)
        tf.initialize_all_variables().run()
        ckpt = tf.train.get_checkpoint_state(args.save_dir)
        if ckpt and ckpt.model_checkpoint_path:
            saver.restore(sess, ckpt.model_checkpoint_path)

        test_perplexity = run_epoch(sess, mtest, test_data, data_loader, tf.no_op())
        print("Test Perplexity: %.3f" % test_perplexity)
        print("Test time: %.0f\n" % (time.time() - start))
        print("\n")
Exemple #35
0
def train(args):
    data_loader = TextLoader(args.data_dir, args.batch_size, args.seq_length)
    args.vocab_size = data_loader.vocab_size

    # check compatibility if training is continued from previously saved model
    if args.init_from is not None:
        # check if all necessary files exist
        assert os.path.isdir(
            args.init_from), " %s must be a a path" % args.init_from
        assert os.path.isfile(
            os.path.join(args.init_from, "config.pkl")
        ), "config.pkl file does not exist in path %s" % args.init_from
        assert os.path.isfile(
            os.path.join(args.init_from, "chars_vocab.pkl")
        ), "chars_vocab.pkl.pkl file does not exist in path %s" % args.init_from
        ckpt = tf.train.latest_checkpoint(args.init_from)
        assert ckpt, "No checkpoint found"

        # open old config and check if models are compatible
        with open(os.path.join(args.init_from, 'config.pkl'), 'rb') as f:
            saved_model_args = cPickle.load(f)
        need_be_same = ["model", "rnn_size", "num_layers", "seq_length"]
        for checkme in need_be_same:
            assert vars(saved_model_args)[checkme] == vars(
                args
            )[checkme], "Command line argument and saved model disagree on '%s' " % checkme

        # open saved vocab/dict and check if vocabs/dicts are compatible
        with open(os.path.join(args.init_from, 'chars_vocab.pkl'), 'rb') as f:
            saved_chars, saved_vocab = cPickle.load(f)
        assert saved_chars == data_loader.chars, "Data and loaded model disagree on character set!"
        assert saved_vocab == data_loader.vocab, "Data and loaded model disagree on dictionary mappings!"

    if not os.path.isdir(args.save_dir):
        os.makedirs(args.save_dir)
    with open(os.path.join(args.save_dir, 'config.pkl'), 'wb') as f:
        cPickle.dump(args, f)
    with open(os.path.join(args.save_dir, 'chars_vocab.pkl'), 'wb') as f:
        cPickle.dump((data_loader.chars, data_loader.vocab), f)

    model = Model(args)

    with tf.Session() as sess:
        # instrument for tensorboard
        summaries = tf.summary.merge_all()
        writer = tf.summary.FileWriter(
            os.path.join(args.log_dir, time.strftime("%Y-%m-%d-%H-%M-%S")))
        writer.add_graph(sess.graph)

        sess.run(tf.global_variables_initializer())
        saver = tf.train.Saver(tf.global_variables())
        # restore model
        if args.init_from is not None:
            saver.restore(sess, ckpt)
        for e in range(args.num_epochs):
            sess.run(
                tf.assign(model.lr, args.learning_rate * (args.decay_rate**e)))
            data_loader.reset_batch_pointer()
            state = sess.run(model.initial_state)
            for b in range(data_loader.num_batches):
                start = time.time()
                x, y = data_loader.next_batch()
                feed = {model.input_data: x, model.targets: y}
                for i, (c, h) in enumerate(model.initial_state):
                    feed[c] = state[i].c
                    feed[h] = state[i].h

                # instrument for tensorboard
                summ, train_loss, state, _ = sess.run(
                    [summaries, model.cost, model.final_state, model.train_op],
                    feed)
                writer.add_summary(summ, e * data_loader.num_batches + b)

                end = time.time()
                print(
                    "{}/{} (epoch {}), train_loss = {:.3f}, time/batch = {:.3f}"
                    .format(e * data_loader.num_batches + b,
                            args.num_epochs * data_loader.num_batches, e,
                            train_loss, end - start))
                if (e * data_loader.num_batches + b) % args.save_every == 0\
                        or (e == args.num_epochs-1 and
                            b == data_loader.num_batches-1):
                    # save for the last result
                    checkpoint_path = os.path.join(args.save_dir, 'model.ckpt')
                    saver.save(sess,
                               checkpoint_path,
                               global_step=e * data_loader.num_batches + b)
                    print("model saved to {}".format(checkpoint_path))
Exemple #36
0
def main():
    args = parse_args()
    with open(os.path.join(args.save_dir, 'config.pkl'), 'rb') as f:
        saved_args = pickle.load(f)
    loader = TextLoader(saved_args.data_dir, saved_args.batch_size, \
    saved_args.seq_length, isTraining=False)

    saved_args.batch_size = 1  # Set batch size to 1 when sampling
    model = Model(saved_args, training=False)

    with open(args.test_file, 'r') as f:
        testset = f.readlines()
    testset_len = len(testset)

    lut = {}
    next_char_probs = {}

    vocab = loader.vocab
    charset = vocab.keys()
    charset_ordered = sorted(vocab.keys(), key=(lambda key: vocab[key]))

    results = []
    results_len = 0

    # Load first character probabilities
    first_char_probs = loader.first_char_probs
    for c in charset:
        if first_char_probs.has_key(c):
            if vocab[c] == 0:
                continue
            else:
                lut[c] = first_char_probs[c]

    total_start = time.time()

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    with tf.Session(config=config) as sess:
        sess.run(tf.global_variables_initializer())
        saver = tf.train.Saver(tf.global_variables())
        ckpt = tf.train.get_checkpoint_state(args.save_dir)
        if ckpt and ckpt.model_checkpoint_path:
            saver.restore(sess, ckpt.model_checkpoint_path)

        begin_index = 0
        if args.init_from != None:
            lut_file = os.path.join(args.save_dir, 'lut.pkl')
            with open(lut_file, 'rb') as f:
                lut = pickle.load(f)
            next_char_probs_file = os.path.join(args.save_dir,
                                                'next_char_probs.pkl')
            with open(next_char_probs_file, 'rb') as f:
                next_char_probs = pickle.load(f)
            print("lut initialized from {}".format(lut_file))
            print("next_char_probs initialized from {}".format(
                next_char_probs_file))

            partial_results_file = args.output_file + "-" + str(args.init_from)
            with open(partial_results_file, 'r') as f:
                results = f.readlines()
            assert len(results) == args.init_from, "Unexpected error!"
            results_len = len(results)

            begin_index = args.init_from

        start = time.time()
        for testline in testset[begin_index:]:
            result_prob = 0.0
            # print "testline: " + testline
            # Find probability for existing prefix
            for k in range(1, len(testline)):
                if not lut.has_key(testline[:-k]):
                    continue
                current_prefix = testline[:-k]
                # print "Found prefix in lut: " + current_prefix
                result_prob = lut[current_prefix]
                # Find probability for the rest of the string
                for m in range(k):
                    # Try to get next possible characters' probabilities from dict
                    if next_char_probs.has_key(current_prefix):
                        # print "Found next char prob of <" + current_prefix + "> in dict!"
                        next_char_prob = next_char_probs[current_prefix]
                    # Otherwise get next possible characters' probabilities by NN
                    else:
                        length = len(current_prefix)
                        line = np.array(map(vocab.get, current_prefix))
                        line = np.pad(line,
                                      (0, saved_args.seq_length - len(line)),
                                      'constant')
                        feed = {
                            model.input_data: [line],
                            model.sequence_lengths: [length]
                        }
                        probs = sess.run([model.probs], feed)
                        probs = np.reshape(probs, (-1, saved_args.vocab_size))
                        next_char_prob = probs[length - 1]
                        # Add next possible characters' probabilities to dict
                        next_char_probs[current_prefix] = next_char_prob

                    next_char = testline[-k + m]
                    current_prefix += next_char
                    result_prob *= next_char_prob[vocab[next_char]]
                    # Add new string to lut
                    lut[current_prefix] = result_prob
                break
        # print str(result_prob) + '\t' + current_prefix
            results.append(str(result_prob) + '\n')
            results_len += 1
            if results_len % args.display_every == 0:
                end = time.time()
                print("Progress: {}/{}; time taken = {}".format(
                    results_len, testset_len, end - start))
                start = time.time()
            if results_len % args.save_every == 0:
                lut_file = os.path.join(args.save_dir, 'lut.pkl')
                with open(lut_file, 'wb') as f:
                    pickle.dump(lut, f)
                next_char_probs_file = os.path.join(args.save_dir,
                                                    'next_char_probs.pkl')
                with open(next_char_probs_file, 'wb') as f:
                    pickle.dump(next_char_probs, f)
                print("lut saved to {}".format(lut_file))
                print(
                    "next_char_probs saved to {}".format(next_char_probs_file))

                partial_results_file = args.output_file + "-" + str(
                    len(results))
                with open(partial_results_file, 'w') as f:
                    f.writelines(results)
                end = time.time()
                print("Written partial results to {}; time taken = {}".format(
                    partial_results_file, end - start))
                start = time.time()

            if args.early_exit != None and results_len >= args.early_exit:
                break

    with open(args.output_file, 'w') as f:
        f.writelines(results)
    total_end = time.time()
    print(
        "Finished assigning probabilities to {} passwords; total time taken = {}"
        .format(len(results), total_end - total_start))