def train(FLAGS):

    # Load the data
    en_token_ids, en_seq_lens, en_vocab_dict, en_rev_vocab_dict = \
        process_data('data/en.p', max_vocab_size=5000, target_lang=False)
    sp_token_ids, sp_seq_lens, sp_vocab_dict, sp_rev_vocab_dict = \
        process_data('data/sp.p', max_vocab_size=5000, target_lang=True)

    # Split into train and validation sets
    train_encoder_inputs, train_decoder_inputs, train_targets, \
        train_en_seq_lens, train_sp_seq_len, \
        valid_encoder_inputs, valid_decoder_inputs, valid_targets, \
        valid_en_seq_lens, valid_sp_seq_len = \
        split_data(en_token_ids, sp_token_ids, en_seq_lens, sp_seq_lens,
            train_ratio=0.8)

    # Update parameters
    FLAGS.en_vocab_size = len(en_vocab_dict)
    FLAGS.sp_vocab_size = len(sp_vocab_dict)

    # Start session
    with tf.Session() as sess:

        # Create new model or load old one
        model = create_model(sess, FLAGS)

        # Training begins
        losses = []
        for epoch_num, epoch in enumerate(
                generate_epoch(train_encoder_inputs, train_decoder_inputs,
                               train_targets, train_en_seq_lens,
                               train_sp_seq_len, FLAGS.num_epochs,
                               FLAGS.batch_size)):

            print "EPOCH: %i" % (epoch_num)
            # Decay learning rate
            sess.run(tf.assign(model.lr, FLAGS.learning_rate * \
                (FLAGS.learning_rate_decay_factor ** epoch_num)))

            batch_loss = []

            for batch_num, (batch_encoder_inputs, batch_decoder_inputs,
                            batch_targets, batch_en_seq_lens,
                            batch_sp_seq_lens) in enumerate(epoch):

                loss, _ = model.step(sess, FLAGS, batch_encoder_inputs,
                                     batch_decoder_inputs, batch_targets,
                                     batch_en_seq_lens, batch_sp_seq_lens,
                                     FLAGS.dropout)

                batch_loss.append(loss)

            losses.append(np.mean(batch_loss))

        plt.plot(losses, label='loss')
        plt.legend()
        plt.show()
예제 #2
0
def train(params):
	hindi_token_ids, hindi_seq_lens, hindi_vocab_dict, hindi_rev_vocab_dict = process_data('../data/hindi_dump.p', max_vocab_size=100000, target_lang=False)
	bengali_token_ids, bengali_seq_lens, bengali_vocab_dict, bengali_rev_vocab_dict = process_data('../data/bengali_dump.p', max_vocab_size=100000, target_lang=True)
	train_encoder_inputs, train_decoder_inputs, train_targets, train_hindi_seq_lens, train_bengali_seq_len, valid_encoder_inputs, valid_decoder_inputs, valid_targets, valid_hindi_seq_lens, valid_bengali_seq_lens = split_data(hindi_token_ids, bengali_token_ids, hindi_seq_lens, bengali_seq_lens,train_ratio=0.8)

	params.hindi_vocab_size = len(hindi_vocab_dict)
	params.bengali_vocab_size = len(bengali_vocab_dict)

	print params.hindi_vocab_size, params.bengali_vocab_size

	with tf.Session() as sess:
		_model = model(params)
		sess.run(tf.global_variables_initializer())
		losses = []
		accs = []
		for epoch_num, epoch in enumerate(generate_epoch(train_encoder_inputs,train_decoder_inputs, train_targets,train_hindi_seq_lens, train_bengali_seq_len,params.num_epochs, params.batch_size)):
			print "EPOCH : ", epoch_num
			sess.run(tf.assign(_model.lr, 0.01 * (0.99 ** epoch_num)))
			batch_loss = []
			batch_acc = []
			for batch_num, (batch_encoder_inputs, batch_decoder_inputs,batch_targets, batch_hindi_seq_lens,batch_bengali_seq_lens) in enumerate(epoch):
				loss, _,acc = _model.step(sess, params,batch_encoder_inputs, batch_decoder_inputs, batch_targets,batch_hindi_seq_lens, batch_bengali_seq_lens,params.dropout)
				batch_loss.append(loss)
				batch_acc.append(acc)
			losses.append(np.mean(batch_loss))
			accs.append(np.mean(batch_acc))
			print "Training Loss: ",losses[-1]
			print "Training Accuracy",accs[-1]
		plt.plot(losses, label='loss')
		plt.legend()
		# plt.show()
		
		plt.title('Plot for Training Error versus Epochs', fontsize='20', style='oblique')
		plt.xlabel('Epochs', fontsize='16', color='green')
		plt.ylabel('Training Error', fontsize='16', color='green')
		plt.savefig('../output/plot.png')
		plt.show()

		acc = _model.test(sess, params, valid_encoder_inputs, valid_decoder_inputs, valid_targets, valid_hindi_seq_lens, valid_bengali_seq_lens, params.dropout)
		print acc
예제 #3
0
def train(FLAGS):
    """
    Train the model on the associative retrieval task.
    """

    # Load the train/valid datasets
    print("Loading datasets:")
    with open(os.path.join(FLAGS.data_dir, 'train.p'), 'rb') as f:
        train_X, train_y = pickle.load(f)
        print("train_X:", np.shape(train_X), ",train_y:", np.shape(train_y))
    with open(os.path.join(FLAGS.data_dir, 'valid.p'), 'rb') as f:
        valid_X, valid_y = pickle.load(f)
        print("valid_X:", np.shape(valid_X), ",valid_y:", np.shape(valid_y))

    with tf.Session() as sess:

        # Load the model
        model = create_model(sess, FLAGS)
        start_time = time.time()

        # Start training
        train_epoch_loss = []
        valid_epoch_loss = []
        train_epoch_accuracy = []
        valid_epoch_accuracy = []
        train_epoch_gradient_norm = []
        for train_epoch_num, train_epoch in enumerate(
                generate_epoch(train_X, train_y, FLAGS.num_epochs,
                               FLAGS.batch_size)):
            print("EPOCH:", train_epoch_num)

            # Assign the learning rate
            sess.run(tf.assign(model.lr, FLAGS.learning_rate))

            #sess.run(tf.assign(model.lr, FLAGS.learning_rate))
            # Decay the learning rate
            #sess.run(tf.assign(model.lr, FLAGS.learning_rate * \
            #    (FLAGS.learning_rate_decay_factor ** epoch_num)))

            #if epoch_num < 1000:
            #    sess.run(tf.assign(model.lr, FLAGS.learning_rate))
            #elif epoch_num >= 1000: # slow down now
            #    sess.run(tf.assign(model.lr, 1e-4))

            # Custom decay (empirically decided)
            #if (epoch_num%1000 == 0):
            #    sess.run(tf.assign(model.lr,
            #        FLAGS.learning_rate/(10**(epoch_num//1000))))

            # Train set
            train_batch_loss = []
            train_batch_accuracy = []
            train_batch_gradient_norm = []
            for train_batch_num, (batch_X, batch_y) in enumerate(train_epoch):

                loss, accuracy, norm, _ = model.step(sess,
                                                     batch_X,
                                                     batch_y,
                                                     FLAGS.l,
                                                     FLAGS.e,
                                                     forward_only=False)
                train_batch_loss.append(loss)
                train_batch_accuracy.append(accuracy)
                train_batch_gradient_norm.append(norm)

            train_epoch_loss.append(np.mean(train_batch_loss))
            train_epoch_accuracy.append(np.mean(train_batch_accuracy))
            train_epoch_gradient_norm.append(
                np.mean(train_batch_gradient_norm))
            print('Epoch: [%i/%i] time: %.4f, loss: %.7f,'
                  ' acc: %.7f, norm: %.7f' %
                  (train_epoch_num, FLAGS.num_epochs, time.time() - start_time,
                   train_epoch_loss[-1], train_epoch_accuracy[-1],
                   train_epoch_gradient_norm[-1]))

            # Validation set
            valid_batch_loss = []
            valid_batch_accuracy = []
            for valid_epoch_num, valid_epoch in enumerate(
                    generate_epoch(valid_X,
                                   valid_y,
                                   num_epochs=1,
                                   batch_size=FLAGS.batch_size)):

                for valid_batch_num, (batch_X,
                                      batch_y) in enumerate(valid_epoch):
                    loss, accuracy = model.step(sess,
                                                batch_X,
                                                batch_y,
                                                FLAGS.l,
                                                FLAGS.e,
                                                forward_only=True)
                    valid_batch_loss.append(loss)
                    valid_batch_accuracy.append(accuracy)

            valid_epoch_loss.append(np.mean(valid_batch_loss))
            valid_epoch_accuracy.append(np.mean(valid_batch_accuracy))

            # Save the model
            if (train_epoch_num % FLAGS.save_every == 0 or
                train_epoch_num == (FLAGS.num_epochs-1)) and \
                (train_epoch_num > 0):
                if not os.path.isdir(FLAGS.ckpt_dir):
                    os.makedirs(FLAGS.ckpt_dir)
                checkpoint_path = os.path.join(FLAGS.ckpt_dir,
                                               "%s.ckpt" % model_name)
                print("Saving the model.")
                model.saver.save(sess,
                                 checkpoint_path,
                                 global_step=model.global_step)

        plt.plot(train_epoch_accuracy, label='train accuracy')
        plt.plot(valid_epoch_accuracy, label='valid accuracy')
        plt.legend(loc=4)
        plt.title('%s_Accuracy' % FLAGS.model_name)
        plt.show()

        plt.plot(train_epoch_loss, label='train loss')
        plt.plot(valid_epoch_loss, label='valid loss')
        plt.legend(loc=3)
        plt.title('%s_Loss' % FLAGS.model_name)
        plt.show()

        plt.plot(train_epoch_gradient_norm, label='gradient norm')
        plt.legend(loc=4)
        plt.title('%s_Gradient Norm' % FLAGS.model_name)
        plt.show()

        # Store results for global plot
        with open('%s_results.p' % FLAGS.model_name, 'wb') as f:
            pickle.dump([
                train_epoch_accuracy, valid_epoch_accuracy, train_epoch_loss,
                valid_epoch_loss, train_epoch_gradient_norm
            ], f)
예제 #4
0
def test(FLAGS):
    """
    Sample inputs of your own.
    """
    # Corpus for indexing
    corpus = [
        'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
        'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '0', '1',
        '2', '3', '4', '5', '6', '7', '8', '9', '?'
    ]

    # Render the sample to proper input format
    sample = 'g5o8k1??g'
    X = []
    for item in sample:
        X.append(corpus.index(item))
    X_one_hot = np.eye(26 + 10 + 1)[np.array(X).astype('int')]

    with tf.Session() as sess:

        if FLAGS.model_name == 'RNN-LN-FW':

            # Inputs need to real inputs of batch_size 128
            # because we use A(t) which updates even during testing

            # Load the model
            model = create_model(sess, FLAGS)

            # Load real samples
            with open(os.path.join(FLAGS.data_dir, 'train.p'), 'rb') as f:
                train_X, train_y = pickle.load(f)
            for train_epoch_num, train_epoch in enumerate(
                    generate_epoch(train_X, train_y, 1, FLAGS.batch_size)):
                for train_batch_num, (batch_X,
                                      batch_y) in enumerate(train_epoch):
                    batch_X[0] = X_one_hot
                    logits = model.logits.eval(feed_dict={
                        model.X: batch_X,
                        model.l: FLAGS.l,
                        model.e: FLAGS.e
                    })

                    print("INPUT:", sample)
                    print("PREDICTION:", corpus[np.argmax(logits[0])])

                    return

        else:
            # Reset from train sizes to sample sizes
            FLAGS.batch_size = 1

            # Load the model
            model = create_model(sess, FLAGS)
            logits = model.logits.eval(feed_dict={
                model.X: [X_one_hot],
                model.l: FLAGS.l,
                model.e: FLAGS.e
            })

            print("INPUT:", sample)
            print("PREDICTION:", corpus[np.argmax(logits)])
예제 #5
0
def train(FLAGS):

    # Load the data
    en_token_ids, en_seq_lens, en_vocab_dict, en_rev_vocab_dict = \
        process_data('data/en.p', max_vocab_size=5000, target_lang=False)
    sp_token_ids, sp_seq_lens, sp_vocab_dict, sp_rev_vocab_dict = \
        process_data('data/sp.p', max_vocab_size=5000, target_lang=True)

    # Split into train and validation sets
    train_encoder_inputs, train_decoder_inputs, train_targets, \
        train_en_seq_lens, train_sp_seq_len, \
        valid_encoder_inputs, valid_decoder_inputs, valid_targets, \
        valid_en_seq_lens, valid_sp_seq_len = \
        split_data(en_token_ids, sp_token_ids, en_seq_lens, sp_seq_lens,
            train_ratio=0.8)

    # Update parameters
    FLAGS.en_vocab_size = len(en_vocab_dict)
    FLAGS.sp_vocab_size = len(sp_vocab_dict)
    FLAGS.sp_max_len = max(sp_seq_lens) + 1 # GO token

    # Start session
    with tf.Session() as sess:

        # Create new model or load old one
        model = create_model(sess, FLAGS, forward_only=False)

        # Training begins
        train_losses = []
        valid_losses = []
        for epoch_num, epoch in enumerate(generate_epoch(train_encoder_inputs,
            train_decoder_inputs, train_targets,
            train_en_seq_lens, train_sp_seq_len,
            FLAGS.num_epochs, FLAGS.batch_size)):

            print "EPOCH: %i" % (epoch_num)
            # Decay learning rate
            sess.run(tf.assign(model.lr, FLAGS.learning_rate * \
                (FLAGS.learning_rate_decay_factor ** epoch_num)))

            batch_loss = []

            for batch_num, (batch_encoder_inputs, batch_decoder_inputs,
                batch_targets, batch_en_seq_lens,
                batch_sp_seq_lens) in enumerate(epoch):

                y_pred, loss, _ = model.step(sess, FLAGS,
                    batch_encoder_inputs, batch_decoder_inputs, batch_targets,
                    batch_en_seq_lens, batch_sp_seq_lens,
                    FLAGS.dropout, forward_only=False)

                batch_loss.append(loss)
            train_losses.append(np.mean(batch_loss))

            for valid_epoch_num, valid_epoch in enumerate(generate_epoch(valid_encoder_inputs,
                valid_decoder_inputs, valid_targets,
                valid_en_seq_lens, valid_sp_seq_len,
                num_epochs=1, batch_size=FLAGS.batch_size)):

                batch_loss = []

                for batch_num, (batch_encoder_inputs, batch_decoder_inputs,
                    batch_targets, batch_en_seq_lens,
                    batch_sp_seq_lens) in enumerate(valid_epoch):

                    loss = model.step(sess, FLAGS,
                        batch_encoder_inputs, batch_decoder_inputs, batch_targets,
                        batch_en_seq_lens, batch_sp_seq_lens,
                        dropout=0.0, forward_only=True, sampling=False)

                    batch_loss.append(loss)
                valid_losses.append(np.mean(batch_loss))

        # Save checkpoint.
        if not os.path.isdir(FLAGS.ckpt_dir):
            os.makedirs(FLAGS.ckpt_dir)
        checkpoint_path = os.path.join(FLAGS.ckpt_dir, "model.ckpt")
        print "Saving the model."
        model.saver.save(sess, checkpoint_path,
                         global_step=model.global_step)

        plt.plot(train_losses, label='train_loss')
        plt.plot(valid_losses, label='valid_loss')
        plt.legend()
        plt.show()
예제 #6
0
def train(FLAGS):

    # Load the data
    en_token_ids, en_seq_lens, en_vocab_dict, en_rev_vocab_dict = \
        process_data('data/tst2013.en', max_vocab_size=30000, target_lang=False)
    sp_token_ids, sp_seq_lens, sp_vocab_dict, sp_rev_vocab_dict = \
        process_data('data/tst2013.tr', max_vocab_size=30000, target_lang=True)

    # Split into train and validation sets
    train_encoder_inputs, train_decoder_inputs, train_targets, \
        train_en_seq_lens, train_sp_seq_len, \
        valid_encoder_inputs, valid_decoder_inputs, valid_targets, \
        valid_en_seq_lens, valid_sp_seq_len = \
        split_data(en_token_ids, sp_token_ids, en_seq_lens, sp_seq_lens,
            train_ratio=0.8)
    
    output = open('data/vocab_en.pkl', 'wb')
    pickle.dump(en_vocab_dict, output)
    output.close()
    output = open('data/vocab_sp.pkl', 'wb')
    pickle.dump(sp_vocab_dict, output)
    output.close()

    # Update parameters
    FLAGS.en_vocab_size = len(en_vocab_dict)
    FLAGS.sp_vocab_size = len(sp_vocab_dict)

    print 'len(en_vocab_dict)', len(en_vocab_dict)
    print 'len(sp_vocab_dict)', len(sp_vocab_dict)
    
    # Start session
    with tf.Session() as sess:
        model = None
        # Create new model or load old one
        f = checkpoint_path + ".index"
        print f
        exit()
        if os.path.isfile(f):
            model = restore_model(sess)
        else:
            model = create_model(sess, FLAGS)

        # Training begins
        losses = []
        for epoch_num, epoch in enumerate(generate_epoch(train_encoder_inputs,
            train_decoder_inputs, train_targets,
            train_en_seq_lens, train_sp_seq_len,
            FLAGS.num_epochs, FLAGS.batch_size)):

            print "EPOCH: %i" % (epoch_num)
            # Decay learning rate
            sess.run(tf.assign(model.lr, FLAGS.learning_rate * \
                (FLAGS.learning_rate_decay_factor ** epoch_num)))

            batch_loss = []

            for batch_num, (batch_encoder_inputs, batch_decoder_inputs,
                batch_targets, batch_en_seq_lens,
                batch_sp_seq_lens) in enumerate(epoch):

                loss, _ = model.step(sess, FLAGS,
                    batch_encoder_inputs, batch_decoder_inputs, batch_targets,
                    batch_en_seq_lens, batch_sp_seq_lens,
                    FLAGS.dropout)
                print loss
                batch_loss.append(loss)
            print 'mean: ', np.mean(batch_loss)

            print "Saving the model."
            model.saver.save(sess, checkpoint_path)
예제 #7
0
def train(FLAGS):

    # Load the data
    en_token_ids, en_seq_lens, en_vocab_dict, en_rev_vocab_dict = \
        process_data('data/my_en.txt', max_vocab_size=5000, target_lang=False)
    sp_token_ids, sp_seq_lens, sp_vocab_dict, sp_rev_vocab_dict = \
        process_data('data/my_sp.txt', max_vocab_size=5000, target_lang=True)

    # Split into train and validation sets
    train_encoder_inputs, train_decoder_inputs, train_targets, \
        train_en_seq_lens, train_sp_seq_len, \
        valid_encoder_inputs, valid_decoder_inputs, valid_targets, \
        valid_en_seq_lens, valid_sp_seq_len = \
        split_data(en_token_ids, sp_token_ids, en_seq_lens, sp_seq_lens,
            train_ratio=0.8)

    output = open('data/vocab_en.pkl', 'wb')
    pickle.dump(en_vocab_dict, output)
    output.close()
    output = open('data/vocab_sp.pkl', 'wb')
    pickle.dump(sp_vocab_dict, output)
    output.close()

    # Update parameters
    FLAGS.en_vocab_size = len(en_vocab_dict)
    FLAGS.sp_vocab_size = len(sp_vocab_dict)

    print 'len(en_vocab_dict)', len(en_vocab_dict)
    print 'len(sp_vocab_dict)', len(sp_vocab_dict)

    # Start session
    with tf.Session() as sess:

        # Create new model or load old one
        model = create_model(sess, FLAGS)

        # Training begins
        losses = []
        for epoch_num, epoch in enumerate(
                generate_epoch(train_encoder_inputs, train_decoder_inputs,
                               train_targets, train_en_seq_lens,
                               train_sp_seq_len, FLAGS.num_epochs,
                               FLAGS.batch_size)):

            print "EPOCH: %i" % (epoch_num)
            # Decay learning rate
            sess.run(tf.assign(model.lr, FLAGS.learning_rate * \
                (FLAGS.learning_rate_decay_factor ** epoch_num)))

            batch_loss = []

            for batch_num, (batch_encoder_inputs, batch_decoder_inputs,
                            batch_targets, batch_en_seq_lens,
                            batch_sp_seq_lens) in enumerate(epoch):

                loss, _ = model.step(sess, FLAGS, batch_encoder_inputs,
                                     batch_decoder_inputs, batch_targets,
                                     batch_en_seq_lens, batch_sp_seq_lens,
                                     FLAGS.dropout)

                batch_loss.append(loss)

            losses.append(np.mean(batch_loss))

        checkpoint_path = "/tmp/model.ckpt"
        print "Saving the model."
        model.saver.save(sess, checkpoint_path)
        plt.plot(losses, label='loss')
        plt.legend()
        plt.savefig('seq_01.png')
예제 #8
0
def train():

    X, y = load_data_and_labels()
    vocab_list, vocab_dict, rev_vocab_dict = create_vocabulary(
        X, FLAGS.en_vocab_size)
    X, seq_lens = data_to_token_ids(X, vocab_dict)
    train_X, train_y, train_seq_lens, valid_X, valid_y, valid_seq_lens = \
        split_data(X, y, seq_lens)
    FLAGS.max_sequence_length = len(train_X[0])

    with tf.Session() as sess:

        # Load old model or create new one
        model = create_model(sess, FLAGS)

        # Train results
        for epoch_num, epoch in enumerate(
                generate_epoch(train_X, train_y, train_seq_lens,
                               FLAGS.num_epochs, FLAGS.batch_size)):
            print "EPOCH:", epoch_num

            sess.run(tf.assign(model.lr, FLAGS.learning_rate * \
                (FLAGS.learning_rate_decay_factor ** epoch_num)))

            train_loss = []
            train_accuracy = []
            for batch_num, (batch_X, batch_y,
                            batch_seq_lens) in enumerate(epoch):

                _, loss, accuracy = model.step(
                    sess,
                    batch_X,
                    batch_seq_lens,
                    batch_y,
                    dropout_keep_prob=FLAGS.dropout_keep_prob,
                    forward_only=False,
                    sampling=False)

                train_loss.append(loss)
                train_accuracy.append(accuracy)

            print
            print "EPOCH %i SUMMARY" % epoch_num
            print "Training loss %.3f" % np.mean(train_loss)
            print "Training accuracy %.3f" % np.mean(train_accuracy)
            print "----------------------"

            # Validation results
            for valid_epoch_num, valid_epoch in enumerate(
                    generate_epoch(valid_X,
                                   valid_y,
                                   valid_seq_lens,
                                   num_epochs=1,
                                   batch_size=FLAGS.batch_size)):
                valid_loss = []
                valid_accuracy = []

                for valid_batch_num, \
                    (valid_batch_X, valid_batch_y, valid_batch_seq_lens) in \
                        enumerate(valid_epoch):

                    loss, accuracy = model.step(sess,
                                                valid_batch_X,
                                                valid_batch_seq_lens,
                                                valid_batch_y,
                                                dropout_keep_prob=1.0,
                                                forward_only=True,
                                                sampling=False)

                    valid_loss.append(loss)
                    valid_accuracy.append(accuracy)

            print "Validation loss %.3f" % np.mean(valid_loss)
            print "Validation accuracy %.3f" % np.mean(valid_accuracy)
            print "----------------------"

            # Save checkpoint every epoch.
            if not os.path.isdir(FLAGS.ckpt_dir):
                os.makedirs(FLAGS.ckpt_dir)
            checkpoint_path = os.path.join(FLAGS.ckpt_dir, "model.ckpt")
            print "Saving the model."
            model.saver.save(sess,
                             checkpoint_path,
                             global_step=model.global_step)