def main(unused_argv): train_data, train_labels, test_data, test_labels = helper.mnist_data_loader( ) model = MLP(input_size=FLAGS.input_size, num_classes=FLAGS.num_classes, dropout=FLAGS.dropout, init_lr=FLAGS.learning_rate, decay_steps=FLAGS.decay_steps, decay_rate=FLAGS.decay_rate, weight_decay=FLAGS.weight_decay) sess = tf.InteractiveSession() tf.global_variables_initializer().run() for e in range(FLAGS.epochs): print("----- Epoch {}/{} -----".format(e + 1, FLAGS.epochs)) # training stage. train_batches = helper.generate_batches(train_data, train_labels, FLAGS.batch_size) for xt, yt in tqdm(train_batches, desc="Training", ascii=True): _, i = sess.run([model.optimization, model.add_global], feed_dict={ model.inputs: xt, model.labels: yt, model.mode: True }) # testing stage. test_batches = helper.generate_batches(test_data, test_labels, FLAGS.batch_size) total_pred = correct_pred = 0 total_loss = [] for xt, yt in test_batches: logits, loss, lr = sess.run( [model.logits, model.loss, model.learning_rate], feed_dict={ model.inputs: xt, model.labels: yt, model.mode: False }) pred = np.argmax(logits, axis=1) correct_pred += np.sum(yt == pred) total_pred += yt.shape[0] total_loss.append(loss) acc = correct_pred / total_pred loss = np.mean(total_loss) current = time.asctime(time.localtime(time.time())) print( """{0} Step {1:5} Learning rate: {2:.6f} Losss: {3:.4f} Accuracy: {4:.4f}""" .format(current, i, lr, loss, acc)) # Save the model saver = tf.train.Saver() model_path = saver.save(sess, FLAGS.save_path) print("Model saved in file: %s" % model_path)
def run_eval(model, eval_src, eval_tgt, target_vocab_size, pad_idx, batch_size): model.eval() avg_eval_loss = 0.0 num_batches = len(eval_src) // batch_size for batch_i, (eval_src_batch, eval_tgt_batch, lengths) in enumerate( helper.generate_batches(eval_src, eval_tgt, batch_size)): eval_src_batch = torch.tensor(eval_src_batch, device=device) eval_tgt_batch = torch.tensor(eval_tgt_batch, device=device) lengths = torch.tensor(lengths, device=device) outputs = model(eval_src_batch, eval_tgt_batch, lengths) loss = F.nll_loss(outputs[:, 1:, :].contiguous().view( -1, target_vocab_size), eval_tgt_batch[:, 1:].contiguous().view(-1), ignore_index=pad_idx) avg_eval_loss += loss.item() avg_eval_loss /= num_batches return avg_eval_loss
def run_epoches(images, labels, test_images, test_labels, n=100, alpha=0.7): """Run n number of epoches.""" mse_weights = np.random.randn(784, 10) / 100 mse_bias = np.random.randn(10, 1) / 100 entropy_weights = np.random.randn(784, 10) / 100 entropy_bias = np.random.randn(10, 1) / 100 # for plot x_axis = [] training_mse = [] training_entropy = [] test_mse = [] test_entropy = [] for i in range(n): eta = alpha / math.pow(i + 1, 0.5) batches = helper.generate_batches(images, labels) for batch in batches: gradient_mse_w = compute_gradient_mse(batch, mse_weights, mse_bias, 'w') gradient_mse_b = compute_gradient_mse(batch, mse_weights, mse_bias, 'b') gradient_entropy_w = \ compute_gradient_entropy(batch, entropy_weights, entropy_bias, 'w') gradient_entropy_b = \ compute_gradient_entropy(batch, entropy_weights, entropy_bias, 'b') mse_weights = mse_weights - eta * gradient_mse_w mse_bias = mse_bias - eta * gradient_mse_b entropy_weights = entropy_weights - eta * gradient_entropy_w entropy_bias = entropy_bias - eta * gradient_entropy_b #storing info every 10 epochs if i % 10 == 0: x_axis.append(i) y1 = helper.error(mse_weights, mse_bias, images, labels) training_mse.append(1 - y1) y2 = helper.error(entropy_weights, entropy_bias, images, labels) training_entropy.append(1 - y2) y3 = helper.error(mse_weights, mse_bias, test_images, test_labels) test_mse.append(1 - y3) y4 = helper.error(entropy_weights, entropy_bias, test_images, test_labels) test_entropy.append(1 - y4) print 'epoch=', i print 'error rate on training set using mean squared error', y1 print 'error rate on training set using cross-entropy error', y2 print 'error rate on test set using mean squared error', y3 print 'error rate on test set using cross-entropy error', y4 p1, = plt.plot(x_axis, training_mse, 'r') p2, = plt.plot(x_axis, training_entropy, 'b') p3, = plt.plot(x_axis, test_mse, 'g') p4, = plt.plot(x_axis, test_entropy, 'k') plt.legend([p1, p2, p3, p4], ['training accuracy, mse', 'training accuracy, entropy', 'test accuracy, mse', 'test accuracy, entropy']) plt.show()
def run_epoches(train_images, train_labels, test_images, test_labels, n=100, alpha=0.6): """ Run n number of epoches. train_images : training images train_lables : training labels test_images : test images test_labels : test labels n : number of epoches alpha : initial learning rate """ # initialize weights and bias values to random scale_factor = 10e-4 # scaling factor for weight and bias # weights and bias for Mean Squared Error mse_weights = [np.random.randn(784, 300) * scale_factor, \ np.random.randn(300, 100) * scale_factor, \ np.random.randn(100, 10) * scale_factor] mse_bias = [np.random.randn(300, 1) * scale_factor,\ np.random.randn(100, 1) * scale_factor, \ np.random.randn(10, 1) * scale_factor] # weights and bias for Cross-Entropy Error cee_weights = [np.random.randn(784, 300) * scale_factor, \ np.random.randn(300, 100) * scale_factor, \ np.random.randn(100, 10) * scale_factor] cee_bias = [np.random.randn(300, 1) * scale_factor , \ np.random.randn(100, 1) *scale_factor, \ np.random.randn(10, 1) *scale_factor] # list of accuracies for different loss function training_mse = [] training_cee = [] test_mse = [] test_cee = [] x_axis = [] for i in range(n): print i,'-th epoch' eta = alpha / math.pow(i + 1, 0.5) batches = helper.generate_batches(train_images, train_labels) for batch in batches: # extract features out of the training set feats = batch[:,:784] # each row is a feature labels = batch[:,784:] # each row is a label x_mse = forward(feats, mse_weights, mse_bias) x_cee = forward(feats, cee_weights, cee_bias) # [d_1, d_2, d_3] d_mse = backward_mse(x_mse, labels, mse_weights) d_cee = backward_cee(x_cee, labels, cee_weights) mse_weights = update_w(x_mse, mse_weights, d_mse, eta) cee_weights = update_w(x_cee, cee_weights, d_cee, eta) mse_bias = update_b(x_mse, mse_bias, d_mse, eta) cee_bias = update_b(x_cee, cee_bias, d_cee, eta) #storing info every 10 epochs if i % 10 == 0: x_axis.append(i) res1 = calculate_accuracy(predict(train_images, mse_weights, mse_bias), train_labels) res2 = calculate_accuracy(predict(train_images, cee_weights, cee_bias), train_labels) res3 = calculate_accuracy(predict(test_images, mse_weights, mse_bias), test_labels) res4 = calculate_accuracy(predict(test_images, cee_weights, cee_bias), test_labels) training_mse.append(res1) training_cee.append(res2) test_mse.append(res3) test_cee.append(res4) print 'epoch=', i print 'error rate on training set using mean squared error', 1 - res1 print 'error rate on training set using cross-entropy error', 1- res2 print 'error rate on test set using mean squared error', 1 - res3 print 'error rate on test set using cross-entropy error', 1 - res4 p1, = plt.plot(x_axis, training_mse, 'r') p2, = plt.plot(x_axis, training_cee, 'b') p3, = plt.plot(x_axis, test_mse, 'g') p4, = plt.plot(x_axis, test_cee, 'k') plt.legend([p1, p2, p3, p4], ['training accuracy, mse', 'training accuracy, entropy', 'test accuracy, mse', 'test accuracy, entropy']) plt.show()
def train(model, source, target, target_vocab_size, val_src, val_tgt, pad_idx, save_path, batch_size=64, epochs=10, learning_rate=0.01): train_loss = [] val_loss = [] perplexity = [] optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) num_batches = len(source) // batch_size for epoch_i in range(epochs): avg_train_loss = 0.0 train_loss_per_200 = 0.0 for batch_i, (source_batch, target_batch, lengths) in enumerate( helper.generate_batches(source, target, batch_size)): # print(source_batch.shape, target_batch.shape, lengths.shape) model.train() source_batch = torch.tensor(source_batch, device=device) target_batch = torch.tensor(target_batch, device=device) lengths = torch.tensor(lengths, device=device) optimizer.zero_grad() outputs = model(source_batch, target_batch, lengths) loss = F.nll_loss(outputs[:, 1:, :].contiguous().view( -1, target_vocab_size), target_batch[:, 1:].contiguous().view(-1), ignore_index=pad_idx) # print(loss) loss.backward() clip_grad_norm_(model.parameters(), 1.0) optimizer.step() avg_train_loss += loss.item() train_loss_per_200 += loss.item() if batch_i % 50 == 0 and batch_i != 0: avg_train_loss /= 50 print( 'Epoch {:>3} Batch {:>4}/{} Avg Train Loss ({} batches):{:.6f}' .format(epoch_i, batch_i, num_batches, 50, avg_train_loss)) avg_train_loss = 0.0 if batch_i % 200 == 0 and batch_i != 0: train_loss_per_200 /= 200 avg_val_loss = run_eval(model, val_src, val_tgt, target_vocab_size, pad_idx, batch_size) print( 'Epoch {:>3} Batch {:>4}/{} Avg Val Loss ({} batches):{:.6f}' .format(epoch_i, batch_i, num_batches, 200, avg_val_loss)) print('Avg Val Perplexity: {:.2f}'.format( math.exp(avg_val_loss))) train_loss.append(train_loss_per_200) val_loss.append(avg_val_loss) perplexity.append(math.exp(avg_val_loss)) train_loss_per_200 = 0.0 helper.save_objects((train_loss, val_loss), os.path.join(save_path, 'loss.pkl')) helper.save_objects(perplexity, os.path.join(save_path, 'perplexity.pkl')) print('------Training Complete------') file = os.path.join(save_path, 'model.pt') torch.save(model.state_dict(), file) print('Model saved')