def plot_learning_curve(): """ Plot the learning curve of multinomial logistic regression model. X-axis is the size of training set and y-axis is the value of loss function. Green color is loss for training data and red color is loss for validation data """ t_loss = [] v_loss = [] for m in range(1, 19): print 'm: ', m t, v = run_multinomial_logistic_regression(train_subset=m * 2500, test=False) t_loss.append(t) v_loss.append(v) plot(t_loss, vloss, 'Training Size', 'Loss')
def run_multinomial_logistic_regression(train_subset=45000, valid_size=5000, test=True): """ In Multinomial Logistic Regression, we have input X of (n X image_size * image_size * color_channel) dimension and output Y of (n X num_labels) dimension, and Y is defined as: Y = softmax( X * W + b ) where W and b are weights and biases. The loss function is defined as: Loss = cross_entropy(Y, labels) We use stochastic gradient descent, with batch size of 128, learning rate of 0.5 and 3001 steps. We do not use any regularization because it does not improve the accuracy for this case. At the end of the training, accuracy curve, loss curve will be plotted. Keyword arguments: train_subset -- the number of training example valid_size -- number data in validation set test -- if true, output a .csv file that predict 300000 data in testing set """ train_dataset, train_labels, valid_dataset, valid_labels = \ get_train_valid_data(train_subset, valid_size) print 'Building graph...' batch_size = 128 graph = tf.Graph() with graph.as_default(): tf_train_dataset = tf.placeholder(tf.float32, shape=(batch_size, num_features)) tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels)) tf_valid_dataset = tf.constant(valid_dataset) tf_valid_labels = tf.constant(valid_labels) weights = tf.Variable(tf.truncated_normal([num_features, num_labels])) biases = tf.Variable(tf.zeros([num_labels])) train_logits = model(tf_train_dataset, weights, biases) train_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(train_logits, tf_train_labels)) train_prediction = tf.nn.softmax(train_logits) optimizer = tf.train.GradientDescentOptimizer(0.5).minimize(train_loss) # Predictions for the training, validation, and test data. valid_logits = model(tf_valid_dataset, weights, biases) valid_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(valid_logits, tf_valid_labels)) valid_prediction = tf.nn.softmax(valid_logits) print 'Training...' num_steps = 3001 trained_weights = np.ndarray(shape=(num_features, num_labels)) trained_biases = np.ndarray(shape=(num_labels)) train_losses = [] valid_losses = [] train_accuracies = [] valid_accuracies = [] with tf.Session(graph=graph) as session: tf.initialize_all_variables().run() print 'Initialized' for step in xrange(num_steps): offset = (step * batch_size) % (train_labels.shape[0] - batch_size) batch_data = train_dataset[offset:(offset + batch_size), :] batch_labels = train_labels[offset:(offset + batch_size), :] feed_dict = {tf_train_dataset: batch_data, tf_train_labels: batch_labels} _, tl, vl, predictions, trained_weights, trained_biases = session.run( [optimizer, train_loss, valid_loss, train_prediction, weights, biases], feed_dict=feed_dict) train_losses.append(tl) valid_losses.append(vl) train_accuracies.append(accuracy(predictions, batch_labels)) valid_accuracies.append(accuracy(valid_prediction.eval(), valid_labels)) if step % 100 == 0: print('Complete %.2f %%' % (float(step) / num_steps * 100.0)) # Plot losses and accuracies print_loss(train_losses[-1], valid_losses[-1]) print_accuracy(train_accuracies[-1], valid_accuracies[-1]) plot(train_losses, valid_losses, 'Iteration', 'Loss') plot(train_accuracies, valid_accuracies, 'Iteration', 'Accuracy') if not test: return train_losses[-1], valid_losses[-1] part_size = 50000 test_graph = tf.Graph() with test_graph.as_default(): tf_test_dataset = tf.placeholder(tf.float32, shape=(part_size, num_features)) weights = tf.constant(trained_weights) biases = tf.constant(trained_biases) logits = model(tf_test_dataset, weights, biases) test_prediction = tf.nn.softmax(logits) test_dataset = load_test_data() test_dataset = reformat_dataset(test_dataset) total_part = 6 test_predicted_labels = np.ndarray(shape=(300000, 10)) for i in range(total_part): test_dataset_part = test_dataset[i * part_size:(i + 1) * part_size] with tf.Session(graph=test_graph) as session: tf.initialize_all_variables().run() feed_dict = {tf_test_dataset: test_dataset_part} predict = session.run([test_prediction], feed_dict=feed_dict) test_predicted_labels[i * part_size:(i + 1) * part_size, :] = np.asarray(predict)[0] test_predicted_labels = np.argmax(test_predicted_labels, 1) label_matrices_to_csv(test_predicted_labels, 'submission.csv')
def run_convolution(train_subset=45000, valid_size=5000, test=False): train_dataset, train_labels, valid_dataset, valid_labels = \ get_train_valid_data(train_subset, valid_size, reformat_data=False) print 'Building graph...' batch_size = 16 patch_size = 5 depth = 16 num_hidden = 64 graph = tf.Graph() with graph.as_default(): # Input data. tf_train_dataset = tf.placeholder(tf.float32, shape=(batch_size, image_size, image_size, color_channel)) tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels)) tf_valid_dataset = tf.constant(valid_dataset) tf_valid_labels = tf.constant(valid_labels) # Variables. layer1_weights = weight_variable([patch_size, patch_size, color_channel, depth]) layer1_biases = bias_variable(shape=[depth]) layer2_weights = weight_variable([patch_size, patch_size, depth, depth]) layer2_biases = constant_bias_variable(1.0, [depth]) layer3_weights = weight_variable([image_size / 4 * image_size / 4 * depth, num_hidden]) layer3_biases = constant_bias_variable(1.0, [num_hidden]) layer4_weights = weight_variable([num_hidden, num_labels]) layer4_biases = constant_bias_variable(1.0, [num_labels]) train_logits = model(tf_train_dataset, layer1_weights, layer1_biases, layer2_weights, layer2_biases, layer3_weights, layer3_biases, layer4_weights, layer4_biases) train_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(train_logits, tf_train_labels)) train_prediction = tf.nn.softmax(train_logits) optimizer = tf.train.GradientDescentOptimizer(0.05).minimize(train_loss) valid_logits = model(tf_valid_dataset, layer1_weights, layer1_biases, layer2_weights, layer2_biases, layer3_weights, layer3_biases, layer4_weights, layer4_biases) valid_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(valid_logits, tf_valid_labels)) valid_prediction = tf.nn.softmax(valid_logits) train_losses = [] valid_losses = [] train_accuracies = [] valid_accuracies = [] num_steps = 1001 with tf.Session(graph=graph) as session: tf.initialize_all_variables().run() print 'Initialized' for step in xrange(num_steps): offset = (step * batch_size) % (train_labels.shape[0] - batch_size) batch_data = train_dataset[offset:(offset + batch_size), :] batch_labels = train_labels[offset:(offset + batch_size), :] feed_dict = {tf_train_dataset: batch_data, tf_train_labels: batch_labels} _, tl, predictions = session.run( [optimizer, train_loss, train_prediction], feed_dict=feed_dict) train_losses.append(tl) valid_losses.append(valid_loss.eval()) train_accuracies.append(accuracy(predictions, batch_labels)) valid_accuracies.append(accuracy(valid_prediction.eval(), valid_labels)) if step % 100 == 0: print('Complete %.2f %%' % (float(step) / num_steps * 100.0)) # Plot losses and accuracies print_loss(train_losses[-1], valid_losses[-1]) print_accuracy(train_accuracies[-1], valid_accuracies[-1]) plot(train_losses, valid_losses, 'Iteration', 'Loss') plot(train_accuracies, valid_accuracies, 'Iteration', 'Accuracy')
def run_multilayer_neural_network(train_subset=45000, valid_size=5000, test=False): train_dataset, train_labels, valid_dataset, valid_labels = \ get_train_valid_data(train_subset, valid_size) print 'Building graph...' batch_size = 128 hidden_layer_unit_1 = 5000 graph = tf.Graph() with graph.as_default(): tf_train_dataset = tf.placeholder(tf.float32, shape=(batch_size, num_features)) tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels)) tf_valid_dataset = tf.constant(valid_dataset) tf_valid_labels = tf.constant(valid_labels) layer1_weights = weight_variable([num_features, hidden_layer_unit_1]) layer1_bias = bias_variable([hidden_layer_unit_1]) layer2_weights = weight_variable([hidden_layer_unit_1, num_labels]) layer2_biases = bias_variable([num_labels]) def model(data): u1 = tf.matmul(data, layer1_weights) + layer1_bias y1 = tf.nn.relu(u1) u2 = tf.matmul(y1, layer2_weights) + layer2_biases return u2 train_logits = model(tf_train_dataset) train_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(train_logits, tf_train_labels)) train_prediction = tf.nn.softmax(train_logits) optimizer = tf.train.GradientDescentOptimizer(0.5).minimize(train_loss) valid_logits = model(tf_valid_dataset) valid_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(valid_logits, tf_valid_labels)) valid_prediction = tf.nn.softmax(valid_logits) train_losses = [] valid_losses = [] train_accuracies = [] valid_accuracies = [] num_steps = 3001 with tf.Session(graph=graph) as session: tf.initialize_all_variables().run() print 'Initialized' for step in xrange(num_steps): offset = (step * batch_size) % (train_labels.shape[0] - batch_size) batch_data = train_dataset[offset:(offset + batch_size), :] batch_labels = train_labels[offset:(offset + batch_size), :] feed_dict = {tf_train_dataset: batch_data, tf_train_labels: batch_labels} _, tl, predictions = session.run( [optimizer, train_loss, train_prediction], feed_dict=feed_dict) train_losses.append(tl) valid_losses.append(valid_loss.eval()) train_accuracies.append(accuracy(predictions, batch_labels)) valid_accuracies.append(accuracy(valid_prediction.eval(), valid_labels)) if step % 100 == 0: print('Complete %.2f %%' % (float(step) / num_steps * 100.0)) # Plot losses and accuracies print_loss(train_losses[-1], valid_losses[-1]) print_accuracy(train_accuracies[-1], valid_accuracies[-1]) plot(train_losses, valid_losses, 'Iteration', 'Loss') plot(train_accuracies, valid_accuracies, 'Iteration', 'Accuracy')
def run_multinomial_logistic_regression(train_subset=45000, valid_size=5000, test=True): """ In Multinomial Logistic Regression, we have input X of (n X image_size * image_size * color_channel) dimension and output Y of (n X num_labels) dimension, and Y is defined as: Y = softmax( X * W + b ) where W and b are weights and biases. The loss function is defined as: Loss = cross_entropy(Y, labels) We use stochastic gradient descent, with batch size of 128, learning rate of 0.5 and 3001 steps. We do not use any regularization because it does not improve the accuracy for this case. At the end of the training, accuracy curve, loss curve will be plotted. Keyword arguments: train_subset -- the number of training example valid_size -- number data in validation set test -- if true, output a .csv file that predict 300000 data in testing set """ train_dataset, train_labels, valid_dataset, valid_labels = \ get_train_valid_data(train_subset, valid_size) print 'Building graph...' batch_size = 128 graph = tf.Graph() with graph.as_default(): tf_train_dataset = tf.placeholder(tf.float32, shape=(batch_size, num_features)) tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels)) tf_valid_dataset = tf.constant(valid_dataset) tf_valid_labels = tf.constant(valid_labels) weights = tf.Variable(tf.truncated_normal([num_features, num_labels])) biases = tf.Variable(tf.zeros([num_labels])) train_logits = model(tf_train_dataset, weights, biases) train_loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(train_logits, tf_train_labels)) train_prediction = tf.nn.softmax(train_logits) optimizer = tf.train.GradientDescentOptimizer(0.5).minimize(train_loss) # Predictions for the training, validation, and test data. valid_logits = model(tf_valid_dataset, weights, biases) valid_loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(valid_logits, tf_valid_labels)) valid_prediction = tf.nn.softmax(valid_logits) print 'Training...' num_steps = 3001 trained_weights = np.ndarray(shape=(num_features, num_labels)) trained_biases = np.ndarray(shape=(num_labels)) train_losses = [] valid_losses = [] train_accuracies = [] valid_accuracies = [] with tf.Session(graph=graph) as session: tf.initialize_all_variables().run() print 'Initialized' for step in xrange(num_steps): offset = (step * batch_size) % (train_labels.shape[0] - batch_size) batch_data = train_dataset[offset:(offset + batch_size), :] batch_labels = train_labels[offset:(offset + batch_size), :] feed_dict = { tf_train_dataset: batch_data, tf_train_labels: batch_labels } _, tl, vl, predictions, trained_weights, trained_biases = session.run( [ optimizer, train_loss, valid_loss, train_prediction, weights, biases ], feed_dict=feed_dict) train_losses.append(tl) valid_losses.append(vl) train_accuracies.append(accuracy(predictions, batch_labels)) valid_accuracies.append( accuracy(valid_prediction.eval(), valid_labels)) if step % 100 == 0: print('Complete %.2f %%' % (float(step) / num_steps * 100.0)) # Plot losses and accuracies print_loss(train_losses[-1], valid_losses[-1]) print_accuracy(train_accuracies[-1], valid_accuracies[-1]) plot(train_losses, valid_losses, 'Iteration', 'Loss') plot(train_accuracies, valid_accuracies, 'Iteration', 'Accuracy') if not test: return train_losses[-1], valid_losses[-1] part_size = 50000 test_graph = tf.Graph() with test_graph.as_default(): tf_test_dataset = tf.placeholder(tf.float32, shape=(part_size, num_features)) weights = tf.constant(trained_weights) biases = tf.constant(trained_biases) logits = model(tf_test_dataset, weights, biases) test_prediction = tf.nn.softmax(logits) test_dataset = load_test_data() test_dataset = reformat_dataset(test_dataset) total_part = 6 test_predicted_labels = np.ndarray(shape=(300000, 10)) for i in range(total_part): test_dataset_part = test_dataset[i * part_size:(i + 1) * part_size] with tf.Session(graph=test_graph) as session: tf.initialize_all_variables().run() feed_dict = {tf_test_dataset: test_dataset_part} predict = session.run([test_prediction], feed_dict=feed_dict) test_predicted_labels[i * part_size:(i + 1) * part_size, :] = np.asarray(predict)[0] test_predicted_labels = np.argmax(test_predicted_labels, 1) label_matrices_to_csv(test_predicted_labels, 'submission.csv')