def run_logistic_regression(train_subset=45000, valid_size=5000, test=False): train_dataset, train_labels = load_train_data() train_dataset = reformat_dataset(train_dataset) valid_dataset = train_dataset[:valid_size, :] valid_labels = train_labels[:valid_size] train_dataset = train_dataset[valid_size:valid_size + train_subset, :] train_labels = train_labels[valid_size:valid_size + train_subset] print 'Training set size: ', train_dataset.shape, train_labels.shape print 'Validation set size: ', valid_dataset.shape, valid_labels.shape print 'Training...' logreg = LogisticRegression() logreg.fit(train_dataset, train_labels) train_predict = logreg.predict(train_dataset) valid_predict = logreg.predict(valid_dataset) train_accuracy = accuracy(train_predict, train_labels) valid_accuracy = accuracy(valid_predict, valid_labels) print_accuracy(train_accuracy, valid_accuracy) # Predict test data if (not test): return print 'Predicting test dataset...' test_dataset = load_test_data() test_dataset = test_dataset.reshape((test_dataset.shape[0], test_dataset.shape[1] * test_dataset.shape[2] * test_dataset.shape[3])) test_predict = logreg.predict(test_dataset) label_matrices_to_csv(test_predict, 'submission.csv')
def run_multinomial_logistic_regression(train_subset=45000, valid_size=5000, test=True): """ In Multinomial Logistic Regression, we have input X of (n X image_size * image_size * color_channel) dimension and output Y of (n X num_labels) dimension, and Y is defined as: Y = softmax( X * W + b ) where W and b are weights and biases. The loss function is defined as: Loss = cross_entropy(Y, labels) We use stochastic gradient descent, with batch size of 128, learning rate of 0.5 and 3001 steps. We do not use any regularization because it does not improve the accuracy for this case. At the end of the training, accuracy curve, loss curve will be plotted. Keyword arguments: train_subset -- the number of training example valid_size -- number data in validation set test -- if true, output a .csv file that predict 300000 data in testing set """ train_dataset, train_labels, valid_dataset, valid_labels = \ get_train_valid_data(train_subset, valid_size) print 'Building graph...' batch_size = 128 graph = tf.Graph() with graph.as_default(): tf_train_dataset = tf.placeholder(tf.float32, shape=(batch_size, num_features)) tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels)) tf_valid_dataset = tf.constant(valid_dataset) tf_valid_labels = tf.constant(valid_labels) weights = tf.Variable(tf.truncated_normal([num_features, num_labels])) biases = tf.Variable(tf.zeros([num_labels])) train_logits = model(tf_train_dataset, weights, biases) train_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(train_logits, tf_train_labels)) train_prediction = tf.nn.softmax(train_logits) optimizer = tf.train.GradientDescentOptimizer(0.5).minimize(train_loss) # Predictions for the training, validation, and test data. valid_logits = model(tf_valid_dataset, weights, biases) valid_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(valid_logits, tf_valid_labels)) valid_prediction = tf.nn.softmax(valid_logits) print 'Training...' num_steps = 3001 trained_weights = np.ndarray(shape=(num_features, num_labels)) trained_biases = np.ndarray(shape=(num_labels)) train_losses = [] valid_losses = [] train_accuracies = [] valid_accuracies = [] with tf.Session(graph=graph) as session: tf.initialize_all_variables().run() print 'Initialized' for step in xrange(num_steps): offset = (step * batch_size) % (train_labels.shape[0] - batch_size) batch_data = train_dataset[offset:(offset + batch_size), :] batch_labels = train_labels[offset:(offset + batch_size), :] feed_dict = {tf_train_dataset: batch_data, tf_train_labels: batch_labels} _, tl, vl, predictions, trained_weights, trained_biases = session.run( [optimizer, train_loss, valid_loss, train_prediction, weights, biases], feed_dict=feed_dict) train_losses.append(tl) valid_losses.append(vl) train_accuracies.append(accuracy(predictions, batch_labels)) valid_accuracies.append(accuracy(valid_prediction.eval(), valid_labels)) if step % 100 == 0: print('Complete %.2f %%' % (float(step) / num_steps * 100.0)) # Plot losses and accuracies print_loss(train_losses[-1], valid_losses[-1]) print_accuracy(train_accuracies[-1], valid_accuracies[-1]) plot(train_losses, valid_losses, 'Iteration', 'Loss') plot(train_accuracies, valid_accuracies, 'Iteration', 'Accuracy') if not test: return train_losses[-1], valid_losses[-1] part_size = 50000 test_graph = tf.Graph() with test_graph.as_default(): tf_test_dataset = tf.placeholder(tf.float32, shape=(part_size, num_features)) weights = tf.constant(trained_weights) biases = tf.constant(trained_biases) logits = model(tf_test_dataset, weights, biases) test_prediction = tf.nn.softmax(logits) test_dataset = load_test_data() test_dataset = reformat_dataset(test_dataset) total_part = 6 test_predicted_labels = np.ndarray(shape=(300000, 10)) for i in range(total_part): test_dataset_part = test_dataset[i * part_size:(i + 1) * part_size] with tf.Session(graph=test_graph) as session: tf.initialize_all_variables().run() feed_dict = {tf_test_dataset: test_dataset_part} predict = session.run([test_prediction], feed_dict=feed_dict) test_predicted_labels[i * part_size:(i + 1) * part_size, :] = np.asarray(predict)[0] test_predicted_labels = np.argmax(test_predicted_labels, 1) label_matrices_to_csv(test_predicted_labels, 'submission.csv')
def run_multinomial_logistic_regression(train_subset=45000, valid_size=5000, test=True): """ In Multinomial Logistic Regression, we have input X of (n X image_size * image_size * color_channel) dimension and output Y of (n X num_labels) dimension, and Y is defined as: Y = softmax( X * W + b ) where W and b are weights and biases. The loss function is defined as: Loss = cross_entropy(Y, labels) We use stochastic gradient descent, with batch size of 128, learning rate of 0.5 and 3001 steps. We do not use any regularization because it does not improve the accuracy for this case. At the end of the training, accuracy curve, loss curve will be plotted. Keyword arguments: train_subset -- the number of training example valid_size -- number data in validation set test -- if true, output a .csv file that predict 300000 data in testing set """ train_dataset, train_labels, valid_dataset, valid_labels = \ get_train_valid_data(train_subset, valid_size) print 'Building graph...' batch_size = 128 graph = tf.Graph() with graph.as_default(): tf_train_dataset = tf.placeholder(tf.float32, shape=(batch_size, num_features)) tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels)) tf_valid_dataset = tf.constant(valid_dataset) tf_valid_labels = tf.constant(valid_labels) weights = tf.Variable(tf.truncated_normal([num_features, num_labels])) biases = tf.Variable(tf.zeros([num_labels])) train_logits = model(tf_train_dataset, weights, biases) train_loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(train_logits, tf_train_labels)) train_prediction = tf.nn.softmax(train_logits) optimizer = tf.train.GradientDescentOptimizer(0.5).minimize(train_loss) # Predictions for the training, validation, and test data. valid_logits = model(tf_valid_dataset, weights, biases) valid_loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(valid_logits, tf_valid_labels)) valid_prediction = tf.nn.softmax(valid_logits) print 'Training...' num_steps = 3001 trained_weights = np.ndarray(shape=(num_features, num_labels)) trained_biases = np.ndarray(shape=(num_labels)) train_losses = [] valid_losses = [] train_accuracies = [] valid_accuracies = [] with tf.Session(graph=graph) as session: tf.initialize_all_variables().run() print 'Initialized' for step in xrange(num_steps): offset = (step * batch_size) % (train_labels.shape[0] - batch_size) batch_data = train_dataset[offset:(offset + batch_size), :] batch_labels = train_labels[offset:(offset + batch_size), :] feed_dict = { tf_train_dataset: batch_data, tf_train_labels: batch_labels } _, tl, vl, predictions, trained_weights, trained_biases = session.run( [ optimizer, train_loss, valid_loss, train_prediction, weights, biases ], feed_dict=feed_dict) train_losses.append(tl) valid_losses.append(vl) train_accuracies.append(accuracy(predictions, batch_labels)) valid_accuracies.append( accuracy(valid_prediction.eval(), valid_labels)) if step % 100 == 0: print('Complete %.2f %%' % (float(step) / num_steps * 100.0)) # Plot losses and accuracies print_loss(train_losses[-1], valid_losses[-1]) print_accuracy(train_accuracies[-1], valid_accuracies[-1]) plot(train_losses, valid_losses, 'Iteration', 'Loss') plot(train_accuracies, valid_accuracies, 'Iteration', 'Accuracy') if not test: return train_losses[-1], valid_losses[-1] part_size = 50000 test_graph = tf.Graph() with test_graph.as_default(): tf_test_dataset = tf.placeholder(tf.float32, shape=(part_size, num_features)) weights = tf.constant(trained_weights) biases = tf.constant(trained_biases) logits = model(tf_test_dataset, weights, biases) test_prediction = tf.nn.softmax(logits) test_dataset = load_test_data() test_dataset = reformat_dataset(test_dataset) total_part = 6 test_predicted_labels = np.ndarray(shape=(300000, 10)) for i in range(total_part): test_dataset_part = test_dataset[i * part_size:(i + 1) * part_size] with tf.Session(graph=test_graph) as session: tf.initialize_all_variables().run() feed_dict = {tf_test_dataset: test_dataset_part} predict = session.run([test_prediction], feed_dict=feed_dict) test_predicted_labels[i * part_size:(i + 1) * part_size, :] = np.asarray(predict)[0] test_predicted_labels = np.argmax(test_predicted_labels, 1) label_matrices_to_csv(test_predicted_labels, 'submission.csv')