def plot_learning_curve():
  """
  Plot the learning curve of multinomial logistic regression model. X-axis is the size of 
  training set and y-axis is the value of loss function. Green color is loss for training data 
  and red color is loss for validation data
  """
  t_loss = []
  v_loss = []

  for m in range(1, 19):
    print 'm: ', m
    t, v = run_multinomial_logistic_regression(train_subset=m * 2500, test=False)
    t_loss.append(t)
    v_loss.append(v)

  plot(t_loss, vloss, 'Training Size', 'Loss')
def run_multinomial_logistic_regression(train_subset=45000, valid_size=5000, test=True):
  """
  In Multinomial Logistic Regression, we have 
  input X of (n X image_size * image_size * color_channel) dimension and
  output Y of (n X num_labels) dimension, and Y is defined as:

    Y = softmax( X * W + b )

  where W and b are weights and biases. The loss function is defined as:

    Loss = cross_entropy(Y, labels)

  We use stochastic gradient descent, with batch size of 128, learning rate of 0.5 and 3001 steps. 
  We do not use any regularization because it does not improve the accuracy for this case. 
  At the end of the training, accuracy curve, loss curve will be plotted.

  Keyword arguments:
    train_subset -- the number of training example
    valid_size -- number data in validation set
    test -- if true, output a .csv file that predict 300000 data in testing set
  """
  train_dataset, train_labels, valid_dataset, valid_labels = \
      get_train_valid_data(train_subset, valid_size)

  print 'Building graph...'
  batch_size = 128

  graph = tf.Graph()
  with graph.as_default():
    tf_train_dataset = tf.placeholder(tf.float32, shape=(batch_size, num_features))
    tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels))
    tf_valid_dataset = tf.constant(valid_dataset)
    tf_valid_labels = tf.constant(valid_labels)

    weights = tf.Variable(tf.truncated_normal([num_features, num_labels]))
    biases = tf.Variable(tf.zeros([num_labels]))

    train_logits = model(tf_train_dataset, weights, biases)
    train_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(train_logits,
                                                                        tf_train_labels))
    train_prediction = tf.nn.softmax(train_logits)

    optimizer = tf.train.GradientDescentOptimizer(0.5).minimize(train_loss)

    # Predictions for the training, validation, and test data.
    valid_logits = model(tf_valid_dataset, weights, biases)
    valid_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(valid_logits,
                                                                        tf_valid_labels))
    valid_prediction = tf.nn.softmax(valid_logits)

  print 'Training...'

  num_steps = 3001

  trained_weights = np.ndarray(shape=(num_features, num_labels))
  trained_biases = np.ndarray(shape=(num_labels))

  train_losses = []
  valid_losses = []

  train_accuracies = []
  valid_accuracies = []

  with tf.Session(graph=graph) as session:
    tf.initialize_all_variables().run()
    print 'Initialized'

    for step in xrange(num_steps):
      offset = (step * batch_size) % (train_labels.shape[0] - batch_size)

      batch_data = train_dataset[offset:(offset + batch_size), :]
      batch_labels = train_labels[offset:(offset + batch_size), :]
      feed_dict = {tf_train_dataset: batch_data, tf_train_labels: batch_labels}

      _, tl, vl, predictions, trained_weights, trained_biases = session.run(
          [optimizer, train_loss, valid_loss, train_prediction, weights, biases],
          feed_dict=feed_dict)

      train_losses.append(tl)
      valid_losses.append(vl)
      train_accuracies.append(accuracy(predictions, batch_labels))
      valid_accuracies.append(accuracy(valid_prediction.eval(), valid_labels))
      if step % 100 == 0:
        print('Complete %.2f %%' % (float(step) / num_steps * 100.0))

    # Plot losses and accuracies
    print_loss(train_losses[-1], valid_losses[-1])
    print_accuracy(train_accuracies[-1], valid_accuracies[-1])
    plot(train_losses, valid_losses, 'Iteration', 'Loss')
    plot(train_accuracies, valid_accuracies, 'Iteration', 'Accuracy')

  if not test:
    return train_losses[-1], valid_losses[-1]

  part_size = 50000

  test_graph = tf.Graph()
  with test_graph.as_default():
    tf_test_dataset = tf.placeholder(tf.float32, shape=(part_size, num_features))
    weights = tf.constant(trained_weights)
    biases = tf.constant(trained_biases)

    logits = model(tf_test_dataset, weights, biases)
    test_prediction = tf.nn.softmax(logits)

  test_dataset = load_test_data()
  test_dataset = reformat_dataset(test_dataset)
  total_part = 6

  test_predicted_labels = np.ndarray(shape=(300000, 10))

  for i in range(total_part):
    test_dataset_part = test_dataset[i * part_size:(i + 1) * part_size]
    with tf.Session(graph=test_graph) as session:
      tf.initialize_all_variables().run()
      feed_dict = {tf_test_dataset: test_dataset_part}
      predict = session.run([test_prediction], feed_dict=feed_dict)
      test_predicted_labels[i * part_size:(i + 1) * part_size, :] = np.asarray(predict)[0]

  test_predicted_labels = np.argmax(test_predicted_labels, 1)

  label_matrices_to_csv(test_predicted_labels, 'submission.csv')
Ejemplo n.º 3
0
def run_convolution(train_subset=45000, valid_size=5000, test=False):
  train_dataset, train_labels, valid_dataset, valid_labels = \
      get_train_valid_data(train_subset, valid_size, reformat_data=False)

  print 'Building graph...'
  batch_size = 16
  patch_size = 5
  depth = 16
  num_hidden = 64

  graph = tf.Graph()
  with graph.as_default():
    # Input data.
    tf_train_dataset = tf.placeholder(tf.float32,
                                      shape=(batch_size, image_size, image_size, color_channel))
    tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels))
    tf_valid_dataset = tf.constant(valid_dataset)
    tf_valid_labels = tf.constant(valid_labels)

    # Variables.
    layer1_weights = weight_variable([patch_size, patch_size, color_channel, depth])
    layer1_biases = bias_variable(shape=[depth])
    layer2_weights = weight_variable([patch_size, patch_size, depth, depth])
    layer2_biases = constant_bias_variable(1.0, [depth])
    layer3_weights = weight_variable([image_size / 4 * image_size / 4 * depth, num_hidden])
    layer3_biases = constant_bias_variable(1.0, [num_hidden])
    layer4_weights = weight_variable([num_hidden, num_labels])
    layer4_biases = constant_bias_variable(1.0, [num_labels])

    train_logits = model(tf_train_dataset, layer1_weights, layer1_biases, layer2_weights,
                         layer2_biases, layer3_weights, layer3_biases, layer4_weights,
                         layer4_biases)
    train_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(train_logits,
                                                                        tf_train_labels))
    train_prediction = tf.nn.softmax(train_logits)

    optimizer = tf.train.GradientDescentOptimizer(0.05).minimize(train_loss)

    valid_logits = model(tf_valid_dataset, layer1_weights, layer1_biases, layer2_weights,
                         layer2_biases, layer3_weights, layer3_biases, layer4_weights,
                         layer4_biases)
    valid_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(valid_logits,
                                                                        tf_valid_labels))
    valid_prediction = tf.nn.softmax(valid_logits)

  train_losses = []
  valid_losses = []
  train_accuracies = []
  valid_accuracies = []
  num_steps = 1001

  with tf.Session(graph=graph) as session:
    tf.initialize_all_variables().run()
    print 'Initialized'

    for step in xrange(num_steps):
      offset = (step * batch_size) % (train_labels.shape[0] - batch_size)

      batch_data = train_dataset[offset:(offset + batch_size), :]
      batch_labels = train_labels[offset:(offset + batch_size), :]
      feed_dict = {tf_train_dataset: batch_data, tf_train_labels: batch_labels}

      _, tl, predictions = session.run(
          [optimizer, train_loss, train_prediction],
          feed_dict=feed_dict)

      train_losses.append(tl)
      valid_losses.append(valid_loss.eval())
      train_accuracies.append(accuracy(predictions, batch_labels))
      valid_accuracies.append(accuracy(valid_prediction.eval(), valid_labels))

      if step % 100 == 0:
        print('Complete %.2f %%' % (float(step) / num_steps * 100.0))

    # Plot losses and accuracies
    print_loss(train_losses[-1], valid_losses[-1])
    print_accuracy(train_accuracies[-1], valid_accuracies[-1])
    plot(train_losses, valid_losses, 'Iteration', 'Loss')
    plot(train_accuracies, valid_accuracies, 'Iteration', 'Accuracy')
Ejemplo n.º 4
0
def run_multilayer_neural_network(train_subset=45000, valid_size=5000, test=False):
  train_dataset, train_labels, valid_dataset, valid_labels = \
      get_train_valid_data(train_subset, valid_size)

  print 'Building graph...'

  batch_size = 128
  hidden_layer_unit_1 = 5000

  graph = tf.Graph()
  with graph.as_default():
    tf_train_dataset = tf.placeholder(tf.float32, shape=(batch_size, num_features))
    tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels))
    tf_valid_dataset = tf.constant(valid_dataset)
    tf_valid_labels = tf.constant(valid_labels)

    layer1_weights = weight_variable([num_features, hidden_layer_unit_1])
    layer1_bias = bias_variable([hidden_layer_unit_1])

    layer2_weights = weight_variable([hidden_layer_unit_1, num_labels])
    layer2_biases = bias_variable([num_labels])

    def model(data):
      u1 = tf.matmul(data, layer1_weights) + layer1_bias
      y1 = tf.nn.relu(u1)
      u2 = tf.matmul(y1, layer2_weights) + layer2_biases
      return u2

    train_logits = model(tf_train_dataset)
    train_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(train_logits,
                                                                        tf_train_labels))
    train_prediction = tf.nn.softmax(train_logits)

    optimizer = tf.train.GradientDescentOptimizer(0.5).minimize(train_loss)

    valid_logits = model(tf_valid_dataset)
    valid_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(valid_logits,
                                                                        tf_valid_labels))
    valid_prediction = tf.nn.softmax(valid_logits)

  train_losses = []
  valid_losses = []
  train_accuracies = []
  valid_accuracies = []
  num_steps = 3001

  with tf.Session(graph=graph) as session:
    tf.initialize_all_variables().run()
    print 'Initialized'

    for step in xrange(num_steps):
      offset = (step * batch_size) % (train_labels.shape[0] - batch_size)

      batch_data = train_dataset[offset:(offset + batch_size), :]
      batch_labels = train_labels[offset:(offset + batch_size), :]
      feed_dict = {tf_train_dataset: batch_data, tf_train_labels: batch_labels}

      _, tl, predictions = session.run(
          [optimizer, train_loss, train_prediction],
          feed_dict=feed_dict)

      train_losses.append(tl)
      valid_losses.append(valid_loss.eval())
      train_accuracies.append(accuracy(predictions, batch_labels))
      valid_accuracies.append(accuracy(valid_prediction.eval(), valid_labels))

      if step % 100 == 0:
        print('Complete %.2f %%' % (float(step) / num_steps * 100.0))

    # Plot losses and accuracies
    print_loss(train_losses[-1], valid_losses[-1])
    print_accuracy(train_accuracies[-1], valid_accuracies[-1])
    plot(train_losses, valid_losses, 'Iteration', 'Loss')
    plot(train_accuracies, valid_accuracies, 'Iteration', 'Accuracy')
def run_multinomial_logistic_regression(train_subset=45000,
                                        valid_size=5000,
                                        test=True):
    """
  In Multinomial Logistic Regression, we have 
  input X of (n X image_size * image_size * color_channel) dimension and
  output Y of (n X num_labels) dimension, and Y is defined as:

    Y = softmax( X * W + b )

  where W and b are weights and biases. The loss function is defined as:

    Loss = cross_entropy(Y, labels)

  We use stochastic gradient descent, with batch size of 128, learning rate of 0.5 and 3001 steps. 
  We do not use any regularization because it does not improve the accuracy for this case. 
  At the end of the training, accuracy curve, loss curve will be plotted.

  Keyword arguments:
    train_subset -- the number of training example
    valid_size -- number data in validation set
    test -- if true, output a .csv file that predict 300000 data in testing set
  """
    train_dataset, train_labels, valid_dataset, valid_labels = \
        get_train_valid_data(train_subset, valid_size)

    print 'Building graph...'
    batch_size = 128

    graph = tf.Graph()
    with graph.as_default():
        tf_train_dataset = tf.placeholder(tf.float32,
                                          shape=(batch_size, num_features))
        tf_train_labels = tf.placeholder(tf.float32,
                                         shape=(batch_size, num_labels))
        tf_valid_dataset = tf.constant(valid_dataset)
        tf_valid_labels = tf.constant(valid_labels)

        weights = tf.Variable(tf.truncated_normal([num_features, num_labels]))
        biases = tf.Variable(tf.zeros([num_labels]))

        train_logits = model(tf_train_dataset, weights, biases)
        train_loss = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(train_logits,
                                                    tf_train_labels))
        train_prediction = tf.nn.softmax(train_logits)

        optimizer = tf.train.GradientDescentOptimizer(0.5).minimize(train_loss)

        # Predictions for the training, validation, and test data.
        valid_logits = model(tf_valid_dataset, weights, biases)
        valid_loss = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(valid_logits,
                                                    tf_valid_labels))
        valid_prediction = tf.nn.softmax(valid_logits)

    print 'Training...'

    num_steps = 3001

    trained_weights = np.ndarray(shape=(num_features, num_labels))
    trained_biases = np.ndarray(shape=(num_labels))

    train_losses = []
    valid_losses = []

    train_accuracies = []
    valid_accuracies = []

    with tf.Session(graph=graph) as session:
        tf.initialize_all_variables().run()
        print 'Initialized'

        for step in xrange(num_steps):
            offset = (step * batch_size) % (train_labels.shape[0] - batch_size)

            batch_data = train_dataset[offset:(offset + batch_size), :]
            batch_labels = train_labels[offset:(offset + batch_size), :]
            feed_dict = {
                tf_train_dataset: batch_data,
                tf_train_labels: batch_labels
            }

            _, tl, vl, predictions, trained_weights, trained_biases = session.run(
                [
                    optimizer, train_loss, valid_loss, train_prediction,
                    weights, biases
                ],
                feed_dict=feed_dict)

            train_losses.append(tl)
            valid_losses.append(vl)
            train_accuracies.append(accuracy(predictions, batch_labels))
            valid_accuracies.append(
                accuracy(valid_prediction.eval(), valid_labels))
            if step % 100 == 0:
                print('Complete %.2f %%' % (float(step) / num_steps * 100.0))

        # Plot losses and accuracies
        print_loss(train_losses[-1], valid_losses[-1])
        print_accuracy(train_accuracies[-1], valid_accuracies[-1])
        plot(train_losses, valid_losses, 'Iteration', 'Loss')
        plot(train_accuracies, valid_accuracies, 'Iteration', 'Accuracy')

    if not test:
        return train_losses[-1], valid_losses[-1]

    part_size = 50000

    test_graph = tf.Graph()
    with test_graph.as_default():
        tf_test_dataset = tf.placeholder(tf.float32,
                                         shape=(part_size, num_features))
        weights = tf.constant(trained_weights)
        biases = tf.constant(trained_biases)

        logits = model(tf_test_dataset, weights, biases)
        test_prediction = tf.nn.softmax(logits)

    test_dataset = load_test_data()
    test_dataset = reformat_dataset(test_dataset)
    total_part = 6

    test_predicted_labels = np.ndarray(shape=(300000, 10))

    for i in range(total_part):
        test_dataset_part = test_dataset[i * part_size:(i + 1) * part_size]
        with tf.Session(graph=test_graph) as session:
            tf.initialize_all_variables().run()
            feed_dict = {tf_test_dataset: test_dataset_part}
            predict = session.run([test_prediction], feed_dict=feed_dict)
            test_predicted_labels[i * part_size:(i + 1) *
                                  part_size, :] = np.asarray(predict)[0]

    test_predicted_labels = np.argmax(test_predicted_labels, 1)

    label_matrices_to_csv(test_predicted_labels, 'submission.csv')