Beispiel #1
0
def main(unused_argv):

    train_data, train_labels, test_data, test_labels = helper.mnist_data_loader(
    )

    model = MLP(input_size=FLAGS.input_size,
                num_classes=FLAGS.num_classes,
                dropout=FLAGS.dropout,
                init_lr=FLAGS.learning_rate,
                decay_steps=FLAGS.decay_steps,
                decay_rate=FLAGS.decay_rate,
                weight_decay=FLAGS.weight_decay)

    sess = tf.InteractiveSession()
    tf.global_variables_initializer().run()

    for e in range(FLAGS.epochs):
        print("----- Epoch {}/{} -----".format(e + 1, FLAGS.epochs))
        # training stage.
        train_batches = helper.generate_batches(train_data, train_labels,
                                                FLAGS.batch_size)
        for xt, yt in tqdm(train_batches, desc="Training", ascii=True):
            _, i = sess.run([model.optimization, model.add_global],
                            feed_dict={
                                model.inputs: xt,
                                model.labels: yt,
                                model.mode: True
                            })

        # testing stage.
        test_batches = helper.generate_batches(test_data, test_labels,
                                               FLAGS.batch_size)
        total_pred = correct_pred = 0
        total_loss = []
        for xt, yt in test_batches:
            logits, loss, lr = sess.run(
                [model.logits, model.loss, model.learning_rate],
                feed_dict={
                    model.inputs: xt,
                    model.labels: yt,
                    model.mode: False
                })

            pred = np.argmax(logits, axis=1)
            correct_pred += np.sum(yt == pred)
            total_pred += yt.shape[0]
            total_loss.append(loss)

        acc = correct_pred / total_pred
        loss = np.mean(total_loss)

        current = time.asctime(time.localtime(time.time()))
        print(
            """{0} Step {1:5} Learning rate: {2:.6f} Losss: {3:.4f} Accuracy: {4:.4f}"""
            .format(current, i, lr, loss, acc))

    # Save the model
    saver = tf.train.Saver()
    model_path = saver.save(sess, FLAGS.save_path)
    print("Model saved in file: %s" % model_path)
Beispiel #2
0
def run_eval(model, eval_src, eval_tgt, target_vocab_size, pad_idx,
             batch_size):

    model.eval()
    avg_eval_loss = 0.0
    num_batches = len(eval_src) // batch_size
    for batch_i, (eval_src_batch, eval_tgt_batch, lengths) in enumerate(
            helper.generate_batches(eval_src, eval_tgt, batch_size)):

        eval_src_batch = torch.tensor(eval_src_batch, device=device)
        eval_tgt_batch = torch.tensor(eval_tgt_batch, device=device)
        lengths = torch.tensor(lengths, device=device)

        outputs = model(eval_src_batch, eval_tgt_batch, lengths)
        loss = F.nll_loss(outputs[:, 1:, :].contiguous().view(
            -1, target_vocab_size),
                          eval_tgt_batch[:, 1:].contiguous().view(-1),
                          ignore_index=pad_idx)
        avg_eval_loss += loss.item()

    avg_eval_loss /= num_batches

    return avg_eval_loss
Beispiel #3
0
def run_epoches(images, labels, test_images, test_labels, n=100, alpha=0.7):
    """Run n number of epoches."""
    mse_weights = np.random.randn(784, 10) / 100
    mse_bias = np.random.randn(10, 1) / 100
    
    entropy_weights = np.random.randn(784, 10) / 100
    entropy_bias = np.random.randn(10, 1) / 100

    # for plot
    x_axis = []
    training_mse = []
    training_entropy = []
    test_mse = []
    test_entropy = []

    for i in range(n):
        eta = alpha / math.pow(i + 1, 0.5)
        batches = helper.generate_batches(images, labels)
        for batch in batches:
            gradient_mse_w = compute_gradient_mse(batch, mse_weights, mse_bias, 'w')
            gradient_mse_b = compute_gradient_mse(batch, mse_weights, mse_bias, 'b')
            
            gradient_entropy_w = \
                compute_gradient_entropy(batch, entropy_weights, entropy_bias, 'w')
            gradient_entropy_b = \
                compute_gradient_entropy(batch, entropy_weights, entropy_bias, 'b')

            mse_weights = mse_weights - eta * gradient_mse_w
            mse_bias = mse_bias - eta * gradient_mse_b
            
            entropy_weights = entropy_weights - eta * gradient_entropy_w
            entropy_bias = entropy_bias - eta * gradient_entropy_b

        #storing info every 10 epochs
        if i % 10 == 0:

            x_axis.append(i)
            y1 = helper.error(mse_weights, mse_bias, images, labels)
            training_mse.append(1 - y1)
            
            y2 = helper.error(entropy_weights, entropy_bias, images, labels)
            training_entropy.append(1 - y2)
           
            y3 = helper.error(mse_weights, mse_bias, test_images, test_labels)
            test_mse.append(1 - y3)
            
            y4 = helper.error(entropy_weights, entropy_bias,
                    test_images, test_labels)
            test_entropy.append(1 - y4)

            print 'epoch=', i
            print 'error rate on training set using mean squared error', y1
            print 'error rate on training set using cross-entropy error', y2
            print 'error rate on test set using mean squared error', y3
            print 'error rate on test set using cross-entropy error', y4

    p1, = plt.plot(x_axis, training_mse, 'r')
    p2, = plt.plot(x_axis, training_entropy, 'b')
    p3, = plt.plot(x_axis, test_mse, 'g')
    p4, = plt.plot(x_axis, test_entropy, 'k')
    plt.legend([p1, p2, p3, p4],
        ['training accuracy, mse', 'training accuracy, entropy',
            'test accuracy, mse', 'test accuracy, entropy'])
    plt.show()
Beispiel #4
0
def run_epoches(train_images, train_labels, test_images, test_labels, n=100, alpha=0.6):
    """
    Run n number of epoches.
    train_images : training images
    train_lables : training labels
    test_images : test images
    test_labels : test labels
    n : number of epoches
    alpha : initial learning rate 
    """

    # initialize weights and bias values to random
    scale_factor = 10e-4 # scaling factor for weight and bias
    # weights and bias for Mean Squared Error
    mse_weights = [np.random.randn(784, 300) * scale_factor, \
            np.random.randn(300, 100) * scale_factor, \
            np.random.randn(100, 10) * scale_factor]
    mse_bias = [np.random.randn(300, 1) * scale_factor,\
            np.random.randn(100, 1) * scale_factor, \
            np.random.randn(10, 1) * scale_factor]

    # weights and bias for Cross-Entropy Error
    cee_weights = [np.random.randn(784, 300) * scale_factor, \
            np.random.randn(300, 100) * scale_factor, \
            np.random.randn(100, 10) * scale_factor]
    cee_bias = [np.random.randn(300, 1) * scale_factor , \
            np.random.randn(100, 1) *scale_factor, \
            np.random.randn(10, 1) *scale_factor]
    
    # list of accuracies for different loss function
    training_mse = []
    training_cee = []
    test_mse = []
    test_cee = []
    x_axis = []

    for i in range(n):
        print i,'-th epoch'
        eta = alpha / math.pow(i + 1, 0.5)
        batches = helper.generate_batches(train_images, train_labels)
        for batch in batches:       

            # extract features out of the training set
            feats = batch[:,:784] # each row is a feature
            labels = batch[:,784:] # each row is a label

            x_mse = forward(feats, mse_weights, mse_bias)
            x_cee = forward(feats, cee_weights, cee_bias)

            # [d_1, d_2, d_3]
            d_mse = backward_mse(x_mse, labels, mse_weights)
            d_cee = backward_cee(x_cee, labels, cee_weights)

            mse_weights = update_w(x_mse, mse_weights, d_mse, eta)
            cee_weights = update_w(x_cee, cee_weights, d_cee, eta)
            mse_bias = update_b(x_mse, mse_bias, d_mse, eta)
            cee_bias = update_b(x_cee, cee_bias, d_cee, eta)

        #storing info every 10 epochs
        if i % 10 == 0:
            x_axis.append(i)

            res1 = calculate_accuracy(predict(train_images, mse_weights, mse_bias), train_labels)
            res2 = calculate_accuracy(predict(train_images, cee_weights, cee_bias), train_labels)
            res3 = calculate_accuracy(predict(test_images, mse_weights, mse_bias), test_labels)
            res4 = calculate_accuracy(predict(test_images, cee_weights, cee_bias), test_labels)

            training_mse.append(res1)
            training_cee.append(res2)
            test_mse.append(res3)
            test_cee.append(res4)

            print 'epoch=', i
            print 'error rate on training set using mean squared error', 1 - res1
            print 'error rate on training set using cross-entropy error', 1- res2
            print 'error rate on test set using mean squared error', 1 - res3
            print 'error rate on test set using cross-entropy error', 1 - res4

    p1, = plt.plot(x_axis, training_mse, 'r')
    p2, = plt.plot(x_axis, training_cee, 'b')
    p3, = plt.plot(x_axis, test_mse, 'g')
    p4, = plt.plot(x_axis, test_cee, 'k')
    plt.legend([p1, p2, p3, p4],
        ['training accuracy, mse', 'training accuracy, entropy',
            'test accuracy, mse', 'test accuracy, entropy'])
    plt.show()
Beispiel #5
0
def train(model,
          source,
          target,
          target_vocab_size,
          val_src,
          val_tgt,
          pad_idx,
          save_path,
          batch_size=64,
          epochs=10,
          learning_rate=0.01):

    train_loss = []
    val_loss = []
    perplexity = []
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    num_batches = len(source) // batch_size
    for epoch_i in range(epochs):
        avg_train_loss = 0.0
        train_loss_per_200 = 0.0
        for batch_i, (source_batch, target_batch, lengths) in enumerate(
                helper.generate_batches(source, target, batch_size)):
            # print(source_batch.shape, target_batch.shape, lengths.shape)
            model.train()
            source_batch = torch.tensor(source_batch, device=device)
            target_batch = torch.tensor(target_batch, device=device)
            lengths = torch.tensor(lengths, device=device)
            optimizer.zero_grad()
            outputs = model(source_batch, target_batch, lengths)
            loss = F.nll_loss(outputs[:, 1:, :].contiguous().view(
                -1, target_vocab_size),
                              target_batch[:, 1:].contiguous().view(-1),
                              ignore_index=pad_idx)
            # print(loss)
            loss.backward()
            clip_grad_norm_(model.parameters(), 1.0)
            optimizer.step()
            avg_train_loss += loss.item()
            train_loss_per_200 += loss.item()

            if batch_i % 50 == 0 and batch_i != 0:
                avg_train_loss /= 50
                print(
                    'Epoch {:>3} Batch {:>4}/{} Avg Train Loss ({} batches):{:.6f}'
                    .format(epoch_i, batch_i, num_batches, 50, avg_train_loss))
                avg_train_loss = 0.0

            if batch_i % 200 == 0 and batch_i != 0:
                train_loss_per_200 /= 200
                avg_val_loss = run_eval(model, val_src, val_tgt,
                                        target_vocab_size, pad_idx, batch_size)
                print(
                    'Epoch {:>3} Batch {:>4}/{} Avg Val Loss ({} batches):{:.6f}'
                    .format(epoch_i, batch_i, num_batches, 200, avg_val_loss))
                print('Avg Val Perplexity: {:.2f}'.format(
                    math.exp(avg_val_loss)))
                train_loss.append(train_loss_per_200)
                val_loss.append(avg_val_loss)
                perplexity.append(math.exp(avg_val_loss))
                train_loss_per_200 = 0.0

    helper.save_objects((train_loss, val_loss),
                        os.path.join(save_path, 'loss.pkl'))
    helper.save_objects(perplexity, os.path.join(save_path, 'perplexity.pkl'))
    print('------Training Complete------')
    file = os.path.join(save_path, 'model.pt')
    torch.save(model.state_dict(), file)
    print('Model saved')