Example #1
0
def main(_=None):
    # Since we are feeding our data as numpy arrays, we need to create
    # placeholders in the graph.
    # These must then be fed using the feed dict.
    image_placeholder = tf.placeholder(tf.float32, [BATCH_SIZE, 28, 28, 1])
    labels_placeholder = tf.placeholder(tf.float32, [BATCH_SIZE, 10])

    # Create our model.  The result of softmax_classifier is a namedtuple
    # that has members result.loss and result.softmax.
    if FLAGS.model == 'full':
        result = multilayer_fully_connected(image_placeholder,
                                            labels_placeholder)
    elif FLAGS.model == 'conv':
        result = lenet5(image_placeholder, labels_placeholder)
    else:
        raise ValueError('model must be full or conv: %s' % FLAGS.model)

    # For tracking accuracy in evaluation, we need to add an evaluation node.
    # We only include this part of the graph when testing, so we need to specify
    # that in the phase.
    # Some ops have different behaviors in test vs train and these take a phase
    # argument.
    accuracy = result.softmax.evaluate_classifier(labels_placeholder,
                                                  phase=pt.Phase.test)

    # Grab the data as numpy arrays.
    train_images, train_labels = data_utils.mnist(training=True)
    test_images, test_labels = data_utils.mnist(training=False)

    # Create the gradient optimizer and apply it to the graph.
    # pt.apply_optimizer adds regularization losses and sets up a step counter
    # (pt.global_step()) for you.
    optimizer = tf.train.GradientDescentOptimizer(0.01)
    train_op = pt.apply_optimizer(optimizer, losses=[result.loss])

    # We can set a save_path in the runner to automatically checkpoint every so
    # often.  Otherwise at the end of the session, the model will be lost.
    runner = pt.train.Runner(save_path=FLAGS.save_path)
    with tf.Session():
        for epoch in xrange(10):
            # Shuffle the training data.
            train_images, train_labels = data_utils.permute_data(
                (train_images, train_labels))

            runner.train_model(
                train_op,
                result.loss,
                EPOCH_SIZE,
                feed_vars=(image_placeholder, labels_placeholder),
                feed_data=pt.train.feed_numpy(BATCH_SIZE, train_images,
                                              train_labels),
                print_every=100)
            classification_accuracy = runner.evaluate_model(
                accuracy,
                TEST_SIZE,
                feed_vars=(image_placeholder, labels_placeholder),
                feed_data=pt.train.feed_numpy(BATCH_SIZE, test_images,
                                              test_labels))
            print('Accuracy after %d epoch %g%%' %
                  (epoch + 1, classification_accuracy * 100))
Example #2
0
def main(_=None):
    # Since we are feeding our data as numpy arrays, we need to create
    # placeholders in the graph.
    # These must then be fed using the feed dict.
    image_placeholder = tf.placeholder(tf.float32, [BATCH_SIZE, 28, 28, 1])
    labels_placeholder = tf.placeholder(tf.float32, [BATCH_SIZE, 10])

    # Create our model.  The result of softmax_classifier is a namedtuple
    # that has members result.loss and result.softmax.
    if FLAGS.model == 'full':
        result = multilayer_fully_connected(image_placeholder,
                                            labels_placeholder)
    elif FLAGS.model == 'conv':
        result = lenet5(image_placeholder, labels_placeholder)
    else:
        raise ValueError('model must be full or conv: %s' % FLAGS.model)

    # For tracking accuracy in evaluation, we need to add an evaluation node.
    # We only include this part of the graph when testing, so we need to specify
    # that in the phase.
    # Some ops have different behaviors in test vs train and these take a phase
    # argument.
    accuracy = result.softmax.evaluate_classifier(
        labels_placeholder, phase=pt.Phase.test)

    # Grab the data as numpy arrays.
    train_images, train_labels = data_utils.mnist(training=True)
    test_images, test_labels = data_utils.mnist(training=False)

    # Create the gradient optimizer and apply it to the graph.
    # pt.apply_optimizer adds regularization losses and sets up a step counter
    # (pt.global_step()) for you.
    optimizer = tf.train.GradientDescentOptimizer(0.01)
    train_op = pt.apply_optimizer(optimizer, losses=[result.loss])

    # We can set a save_path in the runner to automatically checkpoint every so
    # often.  Otherwise at the end of the session, the model will be lost.
    runner = pt.train.Runner(save_path=FLAGS.save_path)
    with tf.Session():
        for epoch in xrange(10):
            # Shuffle the training data.
            train_images, train_labels = data_utils.permute_data(
                (train_images, train_labels))

            runner.train_model(
                train_op,
                result.loss,
                EPOCH_SIZE,
                feed_vars=(image_placeholder, labels_placeholder),
                feed_data=pt.train.feed_numpy(BATCH_SIZE, train_images,
                                              train_labels),
                print_every=100)
            classification_accuracy = runner.evaluate_model(
                accuracy,
                TEST_SIZE,
                feed_vars=(image_placeholder, labels_placeholder),
                feed_data=pt.train.feed_numpy(BATCH_SIZE, test_images,
                                              test_labels))
            print('Accuracy after %d epoch %g%%' %
                  (epoch + 1, classification_accuracy * 100))
Example #3
0
def main(_=None):
    image_shape = inp.get_image_shape(FLAGS.input_folder)
    batch_shape = (BATCH_SIZE,) + image_shape

    print('>>', image_shape, batch_shape)

    image_placeholder  = tf.placeholder(tf.float32, [BATCH_SIZE, 28, 28, 1])
    labels_placeholder = tf.placeholder(tf.float32, [BATCH_SIZE, 10])

    if FLAGS.model == 'full':
        print('fully connected network')
        result = multilayer_fully_connected(image_placeholder, labels_placeholder)
    elif FLAGS.model == 'conv':
        print('conv network')
        result = lenet5(image_placeholder, labels_placeholder)

    accuracy = result.softmax.evaluate_classifier(labels_placeholder,
                                                  phase=pt.Phase.test)

    # Grab the data as numpy arrays.
    train_images, train_labels = data_utils.mnist(training=True)
    test_images,  test_labels  = data_utils.mnist(training=False)

    print(train_images.shape)
    print(train_labels.shape)

    optimizer = tf.train.GradientDescentOptimizer(0.01)
    train_op = pt.apply_optimizer(optimizer, losses=[result.loss])

    runner = pt.train.Runner(save_path=FLAGS.save_path)
    with tf.Session():
        for epoch in xrange(20):
            # Shuffle the training data.
            train_images, train_labels = data_utils.permute_data(
                (train_images, train_labels))
            train_images = inp.get_images(FLAGS.input_folder)

            runner.train_model(
                train_op,
                result.loss,
                _epoch_size,
                feed_vars=(image_placeholder, labels_placeholder),
                feed_data=pt.train.feed_numpy(BATCH_SIZE, train_images, train_labels),
                print_every=100)
            classification_accuracy = runner.evaluate_model(
                accuracy,
                _test_size,
                feed_vars=(image_placeholder, labels_placeholder),
                feed_data=pt.train.feed_numpy(BATCH_SIZE, test_images, test_labels))
            print('Accuracy after %d epoch %g%%' % (
                epoch + 1, classification_accuracy * 100))
Example #4
0
def main(_=None):
  print 'Starting Shakespeare'

  # Since we are feeding our data as numpy arrays, we need to create
  # placeholders in the graph.
  # These must then be fed using the feed dict.
  input_placeholder = tf.placeholder(tf.int32, [BATCH_SIZE, TIMESTEPS])
  output_placeholder = tf.placeholder(tf.int32, [BATCH_SIZE, TIMESTEPS])

  merged_size = BATCH_SIZE * TIMESTEPS

  inp = data_utils.reshape_data(input_placeholder)

  # We need a dense output to calculate loss and accuracy.
  # sparse_to_dense does a lookup using the indices from the first Tensor.
  # Because we are filling in a 2D array, the indices need to be 2 dimensional.
  t = tf.concat(1,
                [
                    tf.constant(
                        numpy.arange(merged_size).reshape((merged_size, 1)),
                        dtype=tf.int32),
                    data_utils.reshape_data(output_placeholder)
                ])

  labels = tf.sparse_to_dense(t, [merged_size, CHARS], 1.0, 0.0)

  # Some ops have different behaviors in test vs train and these take a phase
  # argument.
  with tf.variable_scope('shakespeare'):
    training_logits = create_model(inp, TIMESTEPS, pt.Phase.train)
    # Create the result.  Softmax applies softmax and creates a cross entropy
    # loss.  The result is a namedtuple.
    training_result = training_logits.softmax(labels)

  # Create the gradient optimizer and apply it to the graph.
  # pt.apply_optimizer adds regularization losses and sets up a step counter
  # (pt.global_step()) for you.
  optimizer = tf.train.AdagradOptimizer(0.5)
  train_op = pt.apply_optimizer(optimizer, losses=[training_result.loss])

  # For tracking accuracy in evaluation, we need to add an evaluation node.
  # We only run this when testing, so we need to specify that in the phase.
  # We also want to disable dropout, so we pass the phase to create_model.

  # Call variable scope by name so we also create a name scope.  This ensures
  # that we share variables and our names are properly organized.
  with tf.variable_scope('shakespeare', reuse=True):
    test_logits = create_model(inp, TIMESTEPS, pt.Phase.test)
    test_result = test_logits.softmax(labels)

  # Accuracy creates variables, so make it outside of the above scope.
  accuracy = test_result.softmax.evaluate_classifier(labels,
                                                     phase=pt.Phase.test)

  # Create an inference model so that we can sample.  The big difference is
  # that the input is a single character and it requires reset nodes.
  with tf.variable_scope('shakespeare', reuse=True):
    inference_input = tf.placeholder(tf.int32, [])
    # Needs to be 2 dimensional so that it matches the dims of the other models.
    reshaped = pt.wrap(inference_input).reshape([1, 1])
    inference_logits = create_model(reshaped, 1, pt.Phase.infer)

  # Grab the data as numpy arrays.
  shakespeare = data_utils.shakespeare(TIMESTEPS + 1)
  shakespeare_in = shakespeare[:, :-1]
  shakespeare_out = shakespeare[:, 1:]

  # We can set a save_path in the runner to automatically checkpoint every so
  # often.  Otherwise at the end of the session, the model will be lost.
  runner = pt.train.Runner(save_path=FLAGS.save_path)
  with tf.Session():
    for epoch in xrange(FLAGS.epochs):
      # Shuffle the training data.
      shakespeare_in, shakespeare_out = data_utils.permute_data(
          (shakespeare_in, shakespeare_out))

      runner.train_model(train_op,
                         training_result.loss,
                         len(shakespeare_in) / BATCH_SIZE,
                         feed_vars=(input_placeholder, output_placeholder),
                         feed_data=pt.train.feed_numpy(
                             BATCH_SIZE, shakespeare_in, shakespeare_out),
                         print_every=10)
      classification_accuracy = runner.evaluate_model(
          accuracy,
          len(shakespeare_in) / BATCH_SIZE,
          feed_vars=(input_placeholder, output_placeholder),
          feed_data=pt.train.feed_numpy(BATCH_SIZE, shakespeare_in,
                                        shakespeare_out))

      print 'Next character accuracy after epoch %d: %g%%' % (
          epoch + 1, classification_accuracy * 100)

      # Use a temperature smaller than 1 because the early stages of the model
      # don't assign much confidence.
      print sample(inference_input,
                   inference_logits,
                   max_length=128,
                   temperature=0.5)

    # Print a sampling from the model.
    print sample(inference_input, inference_logits)
Example #5
0
def main(_=None):
    print 'Starting Baby Names'

    # Since we are feeding our data as numpy arrays, we need to create
    # placeholders in the graph.
    # These must then be fed using the feed dict.
    input_placeholder = tf.placeholder(tf.int32, [BATCH_SIZE, TIMESTEPS])
    output_placeholder = tf.placeholder(tf.float32, [BATCH_SIZE, SEXES])

    inp = data_utils.reshape_data(input_placeholder)

    # Create a label for each timestep.
    labels = data_utils.reshape_data(tf.reshape(
        tf.tile(output_placeholder, [1, TIMESTEPS]),
        [BATCH_SIZE, TIMESTEPS, SEXES]),
                                     per_example_length=2)

    # We also need to set per example weights so that the softmax doesn't output a
    # prediction on intermediate nodes.
    length_placeholder = tf.placeholder(tf.int32, [BATCH_SIZE, 1])

    # We need a dense multiplier for the per example weights.  The only place
    # that has a non-zero loss is the first EOS after the last character of the
    # name; the characters in the name and the trailing EOS characters are given a
    # 0 loss by assigning the weight to 0.0 and in the end only one character in
    # each batch has a weight of 1.0.
    # sparse_to_dense does a lookup using the indices from the first Tensor.
    # Because we are filling in a 2D array, the indices need to be 2 dimensional.
    # Since we want to assign 1 value for each row, the first dimension can just
    # be a sequence.
    t = tf.concat(1, [
        tf.constant(numpy.arange(BATCH_SIZE).reshape((BATCH_SIZE, 1)),
                    dtype=tf.int32), length_placeholder
    ])

    # Squeeze removes dimensions that are equal to 1.  per_example_weights must
    # end up as 1 dimensional.
    per_example_weights = data_utils.reshape_data(
        tf.sparse_to_dense(t, [BATCH_SIZE, TIMESTEPS], 1.0,
                           default_value=0.0)).squeeze()

    # We need 2 copies of the graph that share variables.  The first copy runs
    # training and will do dropout if specified and the second will not include
    # dropout.  Dropout is controlled by the phase argument, which sets the mode
    # consistently throughout a graph.
    with tf.variable_scope('baby_names'):
        result = create_model(inp, labels, TIMESTEPS, per_example_weights)

    # Call variable scope by name so we also create a name scope.  This ensures
    # that we share variables and our names are properly organized.
    with tf.variable_scope('baby_names', reuse=True):
        # Some ops have different behaviors in test vs train and these take a phase
        # argument.
        test_result = create_model(inp,
                                   labels,
                                   TIMESTEPS,
                                   per_example_weights,
                                   phase=pt.Phase.test)

    # For tracking accuracy in evaluation, we need to add an evaluation node.
    # We only run this when testing, so we need to specify that in the phase.
    # Some ops have different behaviors in test vs train and these take a phase
    # argument.
    accuracy = test_result.softmax.evaluate_classifier(
        labels, phase=pt.Phase.test, per_example_weights=per_example_weights)

    # We can also compute a batch accuracy to monitor progress.
    batch_accuracy = result.softmax.evaluate_classifier(
        labels, phase=pt.Phase.train, per_example_weights=per_example_weights)

    # Grab the inputs, outputs and lengths as numpy arrays.
    # Lengths could have been calculated from names, but it was easier to
    # calculate inside the utility function.
    names, sex, lengths = data_utils.baby_names(TIMESTEPS)

    epoch_size = len(names) / BATCH_SIZE
    # Create the gradient optimizer and apply it to the graph.
    # pt.apply_optimizer adds regularization losses and sets up a step counter
    # (pt.global_step()) for you.
    # This sequence model does very well with initially high rates.
    optimizer = tf.train.AdagradOptimizer(
        tf.train.exponential_decay(1.0,
                                   pt.global_step(),
                                   epoch_size,
                                   0.95,
                                   staircase=True))
    train_op = pt.apply_optimizer(optimizer, losses=[result.loss])

    # We can set a save_path in the runner to automatically checkpoint every so
    # often.  Otherwise at the end of the session, the model will be lost.
    runner = pt.train.Runner(save_path=FLAGS.save_path)
    with tf.Session():
        for epoch in xrange(100):
            # Shuffle the training data.
            names, sex, lengths = data_utils.permute_data(
                (names, sex, lengths))

            runner.train_model(
                train_op, [result.loss, batch_accuracy],
                epoch_size,
                feed_vars=(input_placeholder, output_placeholder,
                           length_placeholder),
                feed_data=pt.train.feed_numpy(BATCH_SIZE, names, sex, lengths),
                print_every=100)
            classification_accuracy = runner.evaluate_model(
                accuracy,
                epoch_size,
                print_every=0,
                feed_vars=(input_placeholder, output_placeholder,
                           length_placeholder),
                feed_data=pt.train.feed_numpy(BATCH_SIZE, names, sex, lengths))

            print 'Accuracy after epoch %d: %g%%' % (
                epoch + 1, classification_accuracy * 100)
Example #6
0
def main(_=None):
  print('Starting Shakespeare')

  # Since we are feeding our data as numpy arrays, we need to create
  # placeholders in the graph.
  # These must then be fed using the feed dict.
  input_placeholder = tf.placeholder(tf.int32, [BATCH_SIZE, TIMESTEPS])
  output_placeholder = tf.placeholder(tf.int32, [BATCH_SIZE, TIMESTEPS])

  merged_size = BATCH_SIZE * TIMESTEPS

  inp = data_utils.reshape_data(input_placeholder)

  # We need a dense output to calculate loss and accuracy.
  # sparse_to_dense does a lookup using the indices from the first Tensor.
  # Because we are filling in a 2D array, the indices need to be 2 dimensional.
  t = tf.concat(1,
                [
                    tf.constant(
                        numpy.arange(merged_size).reshape((merged_size, 1)),
                        dtype=tf.int32),
                    data_utils.reshape_data(output_placeholder)
                ])

  labels = tf.sparse_to_dense(t, [merged_size, CHARS], 1.0, 0.0)

  # Some ops have different behaviors in test vs train and these take a phase
  # argument.
  with tf.variable_scope('shakespeare'):
    training_logits = create_model(inp, TIMESTEPS, pt.Phase.train)
    # Create the result.  Softmax applies softmax and creates a cross entropy
    # loss.  The result is a namedtuple.
    training_result = training_logits.softmax(labels)

  # Create the gradient optimizer and apply it to the graph.
  # pt.apply_optimizer adds regularization losses and sets up a step counter
  # (pt.global_step()) for you.
  optimizer = tf.train.AdagradOptimizer(0.5)
  train_op = pt.apply_optimizer(optimizer, losses=[training_result.loss])

  # For tracking accuracy in evaluation, we need to add an evaluation node.
  # We only run this when testing, so we need to specify that in the phase.
  # We also want to disable dropout, so we pass the phase to create_model.

  # Call variable scope by name so we also create a name scope.  This ensures
  # that we share variables and our names are properly organized.
  with tf.variable_scope('shakespeare', reuse=True):
    test_logits = create_model(inp, TIMESTEPS, pt.Phase.test)
    test_result = test_logits.softmax(labels)

  # Accuracy creates variables, so make it outside of the above scope.
  accuracy = test_result.softmax.evaluate_classifier(labels,
                                                     phase=pt.Phase.test)

  # Create an inference model so that we can sample.  The big difference is
  # that the input is a single character and it requires reset nodes.
  with tf.variable_scope('shakespeare', reuse=True):
    inference_input = tf.placeholder(tf.int32, [])
    # Needs to be 2 dimensional so that it matches the dims of the other models.
    reshaped = pt.wrap(inference_input).reshape([1, 1])
    inference_logits = create_model(reshaped, 1, pt.Phase.infer)

  # Grab the data as numpy arrays.
  shakespeare = data_utils.shakespeare(TIMESTEPS + 1)
  shakespeare_in = shakespeare[:, :-1]
  shakespeare_out = shakespeare[:, 1:]

  # We can set a save_path in the runner to automatically checkpoint every so
  # often.  Otherwise at the end of the session, the model will be lost.
  runner = pt.train.Runner(save_path=FLAGS.save_path)
  with tf.Session():
    for epoch in xrange(FLAGS.epochs):
      # Shuffle the training data.
      shakespeare_in, shakespeare_out = data_utils.permute_data(
          (shakespeare_in, shakespeare_out))

      runner.train_model(train_op,
                         training_result.loss,
                         len(shakespeare_in) / BATCH_SIZE,
                         feed_vars=(input_placeholder, output_placeholder),
                         feed_data=pt.train.feed_numpy(
                             BATCH_SIZE, shakespeare_in, shakespeare_out),
                         print_every=10)
      classification_accuracy = runner.evaluate_model(
          accuracy,
          len(shakespeare_in) / BATCH_SIZE,
          feed_vars=(input_placeholder, output_placeholder),
          feed_data=pt.train.feed_numpy(BATCH_SIZE, shakespeare_in,
                                        shakespeare_out))

      print('Next character accuracy after epoch %d: %g%%' % (
          epoch + 1, classification_accuracy * 100))

      # Use a temperature smaller than 1 because the early stages of the model
      # don't assign much confidence.
      print(sample(inference_input,
                   inference_logits,
                   max_length=128,
                   temperature=0.5))

    # Print a sampling from the model.
    print(sample(inference_input, inference_logits))
    result = lenet5(image_placeholder, labels_placeholder)
else:
    raise ValueError('model must be full or conv: %s' % FLAGS.model)

accuracy = result.softmax.evaluate_classifier(labels_placeholder,
                                              phase=pt.Phase.test)

train_images, train_labels = data_utils.mnist(training=True)
test_images, test_labels = data_utils.mnist(training=False)
optimizer = tf.train.GradientDescentOptimizer(0.01)
train_op = pt.apply_optimizer(optimizer, losses=[result.loss])
runner = pt.train.Runner(save_path=FLAGS.save_path)

with tf.Session():
    for epoch in range(10):
        train_images, train_labels = data_utils.permute_data(
            (train_images, train_labels))

        runner.train_model(train_op,
                           result.loss,
                           EPOCH_SIZE,
                           feed_vars=(image_placeholder, labels_placeholder),
                           feed_data=pt.train.feed_numpy(
                               BATCH_SIZE, train_images, train_labels),
                           print_every=100)
        classification_accuracy = runner.evaluate_model(
            accuracy,
            TEST_SIZE,
            feed_vars=(image_placeholder, labels_placeholder),
            feed_data=pt.train.feed_numpy(BATCH_SIZE, test_images,
                                          test_labels))
Example #8
0
def main(_=None, weight_init=None, activation_f=tf.nn.sigmoid, data_min=0, data_scale=1.0, epochs=3,learning_rate=None):
    tf.reset_default_graph()
    input_placeholder  = tf.placeholder(tf.float32, [BATCH_SIZE, 2])
    output_placeholder = tf.placeholder(tf.float32, [BATCH_SIZE, 28, 28, 1])

    # Grab the data as numpy arrays.
    train_input, train_output = data_utils.mnist(training=True)
    test_input,  test_output  = data_utils.mnist(training=False)

    train_set = ut.mnist_select_n_classes(train_input, train_output, NUM_CLASSES, min=data_min, scale=data_scale)
    test_set  = ut.mnist_select_n_classes(test_input,  test_output,  NUM_CLASSES, min=data_min, scale=data_scale)
    train_input, train_output = train_set[1], train_set[0]
    test_input,  test_output  = test_set[1],  test_set[0]

    ut.print_info('train (min, max): (%f, %f)' % (np.min(train_set[0]), np.max(train_set[0])))

    visual_inputs, visual_output = train_set[1][0:BATCH_SIZE], train_set[0][0:BATCH_SIZE]
    epoch_reconstruction = []

    EPOCH_SIZE = len(train_input) // BATCH_SIZE
    TEST_SIZE = len(test_input) // BATCH_SIZE

    ut.print_info('train: %s' % str(train_input.shape))
    ut.print_info('test:  %s' % str(test_input.shape))
    ut.print_info('output shape:  %s' % str(train_output[0].shape))

    assert visual_inputs.shape == input_placeholder.get_shape()
    assert len(train_input.shape) == len(input_placeholder.get_shape())
    assert len(test_input.shape) == len(input_placeholder.get_shape())
    assert visual_output.shape == output_placeholder.get_shape()
    assert len(train_output.shape) == len(output_placeholder.get_shape())
    assert len(test_output.shape) == len(output_placeholder.get_shape())

    with pt.defaults_scope(activation_fn=activation_f,
                           # batch_normalize=True,
                           # learned_moments_update_rate=0.0003,
                           # variance_epsilon=0.001,
                           # scale_after_normalization=True
                           ):
        with pt.defaults_scope(phase=pt.Phase.train):
            with tf.variable_scope("model") as scope:
                output_tensor = decoder(encoder(input_placeholder), weight_init=weight_init)

    pretty_loss = loss(output_tensor, output_placeholder)

    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
    # optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
    train = pt.apply_optimizer(optimizer, losses=[pretty_loss])

    init = tf.initialize_all_variables()
    runner = pt.train.Runner(save_path=FLAGS.save_path)

    best_q = 100000
    with tf.Session() as sess:
        sess.run(init)
        for epoch in xrange(epochs):
            # Shuffle the training data.

            if epoch % np.ceil(epochs / 40.0) == 0 or epoch + 1 == epochs:
                reconstruct, loss_value = sess.run([output_tensor, pretty_loss], {input_placeholder: visual_inputs, output_placeholder: visual_output})
                epoch_reconstruction.append(reconstruct)
                ut.print_info('epoch:%d (min, max): (%f %f)' %(epoch, np.min(reconstruct), np.max(reconstruct)))

            train_input, train_output = data_utils.permute_data(
                (train_input, train_output))

            runner.train_model(
                train,
                pretty_loss,
                EPOCH_SIZE,
                feed_vars=(input_placeholder, output_placeholder),
                feed_data=pt.train.feed_numpy(BATCH_SIZE, train_input, train_output)
            )
            accuracy = runner.evaluate_model(
                pretty_loss,
                TEST_SIZE,
                feed_vars=(input_placeholder, output_placeholder),
                feed_data=pt.train.feed_numpy(BATCH_SIZE, test_input, test_output))
            ut.print_time('Accuracy after %d epoch %g%%' % (
                epoch + 1, accuracy * 100))
            if best_q > accuracy * 10:
                best_q = accuracy * 10


        ut.reconstruct_images_epochs(np.asarray(epoch_reconstruction), visual_output,
                                     save_params={'suf':'mn_trivs', 'act':activation_f, 'e':epochs, 'opt':optimizer,
                                                  'lr': learning_rate, 'init':weight_init, 'acu': int(best_q)})
Example #9
0
def main(_=None):
  print('Starting Baby Names')

  # Since we are feeding our data as numpy arrays, we need to create
  # placeholders in the graph.
  # These must then be fed using the feed dict.
  input_placeholder = tf.placeholder(tf.int32, [BATCH_SIZE, TIMESTEPS])
  output_placeholder = tf.placeholder(tf.float32, [BATCH_SIZE, SEXES])

  inp = data_utils.reshape_data(input_placeholder)

  # Create a label for each timestep.
  labels = data_utils.reshape_data(
      tf.reshape(
          tf.tile(output_placeholder, [1, TIMESTEPS]), [BATCH_SIZE, TIMESTEPS,
                                                        SEXES]),
      per_example_length=2)

  # We also need to set per example weights so that the softmax doesn't output a
  # prediction on intermediate nodes.
  length_placeholder = tf.placeholder(tf.int32, [BATCH_SIZE, 1])

  # We need a dense multiplier for the per example weights.  The only place
  # that has a non-zero loss is the first EOS after the last character of the
  # name; the characters in the name and the trailing EOS characters are given a
  # 0 loss by assigning the weight to 0.0 and in the end only one character in
  # each batch has a weight of 1.0.
  # sparse_to_dense does a lookup using the indices from the first Tensor.
  # Because we are filling in a 2D array, the indices need to be 2 dimensional.
  # Since we want to assign 1 value for each row, the first dimension can just
  # be a sequence.
  t = tf.concat_v2(
      [
          tf.constant(
              numpy.arange(BATCH_SIZE).reshape((BATCH_SIZE, 1)),
              dtype=tf.int32), length_placeholder
      ],
      1)

  # Squeeze removes dimensions that are equal to 1.  per_example_weights must
  # end up as 1 dimensional.
  per_example_weights = data_utils.reshape_data(tf.sparse_to_dense(
      t, [BATCH_SIZE, TIMESTEPS], 1.0, default_value=0.0)).squeeze()

  # We need 2 copies of the graph that share variables.  The first copy runs
  # training and will do dropout if specified and the second will not include
  # dropout.  Dropout is controlled by the phase argument, which sets the mode
  # consistently throughout a graph.
  with tf.variable_scope('baby_names'):
    result = create_model(inp, labels, TIMESTEPS, per_example_weights)

  # Call variable scope by name so we also create a name scope.  This ensures
  # that we share variables and our names are properly organized.
  with tf.variable_scope('baby_names', reuse=True):
    # Some ops have different behaviors in test vs train and these take a phase
    # argument.
    test_result = create_model(inp,
                               labels,
                               TIMESTEPS,
                               per_example_weights,
                               phase=pt.Phase.test)

  # For tracking accuracy in evaluation, we need to add an evaluation node.
  # We only run this when testing, so we need to specify that in the phase.
  # Some ops have different behaviors in test vs train and these take a phase
  # argument.
  accuracy = test_result.softmax.evaluate_classifier(
      labels,
      phase=pt.Phase.test,
      per_example_weights=per_example_weights)

  # We can also compute a batch accuracy to monitor progress.
  batch_accuracy = result.softmax.evaluate_classifier(
      labels,
      phase=pt.Phase.train,
      per_example_weights=per_example_weights)

  # Grab the inputs, outputs and lengths as numpy arrays.
  # Lengths could have been calculated from names, but it was easier to
  # calculate inside the utility function.
  names, sex, lengths = data_utils.baby_names(TIMESTEPS)

  epoch_size = len(names) // BATCH_SIZE
  # Create the gradient optimizer and apply it to the graph.
  # pt.apply_optimizer adds regularization losses and sets up a step counter
  # (pt.global_step()) for you.
  # This sequence model does very well with initially high rates.
  optimizer = tf.train.AdagradOptimizer(
      tf.train.exponential_decay(1.0,
                                 pt.global_step(),
                                 epoch_size,
                                 0.95,
                                 staircase=True))
  train_op = pt.apply_optimizer(optimizer, losses=[result.loss])

  # We can set a save_path in the runner to automatically checkpoint every so
  # often.  Otherwise at the end of the session, the model will be lost.
  runner = pt.train.Runner(save_path=FLAGS.save_path)
  with tf.Session():
    for epoch in xrange(100):
      # Shuffle the training data.
      names, sex, lengths = data_utils.permute_data((names, sex, lengths))

      runner.train_model(
          train_op,
          [result.loss, batch_accuracy],
          epoch_size,
          feed_vars=(input_placeholder, output_placeholder, length_placeholder),
          feed_data=pt.train.feed_numpy(BATCH_SIZE, names, sex, lengths),
          print_every=100)
      classification_accuracy = runner.evaluate_model(
          accuracy,
          epoch_size,
          print_every=0,
          feed_vars=(input_placeholder, output_placeholder, length_placeholder),
          feed_data=pt.train.feed_numpy(BATCH_SIZE, names, sex, lengths))

      print('Accuracy after epoch %d: %g%%' % (
          epoch + 1, classification_accuracy * 100))
Example #10
0
def main(_=None, weight_init=tf.random_normal, activation_f=tf.nn.sigmoid, data_min=0, data_scale=1.0, epochs=50,
         learning_rate=0.01, prefix=None):
    tf.reset_default_graph()
    input_placeholder  = tf.placeholder(tf.float32, [BATCH_SIZE, 28, 28, 1])
    output_placeholder = tf.placeholder(tf.float32, [BATCH_SIZE, 28, 28, 1])

    # Grab the data as numpy arrays.
    train_input, train_output = data_utils.mnist(training=True)
    test_input,  test_output  = data_utils.mnist(training=False)
    train_set = ut.mnist_select_n_classes(train_input, train_output, NUM_CLASSES, min=data_min, scale=data_scale)
    test_set  = ut.mnist_select_n_classes(test_input,  test_output,  NUM_CLASSES, min=data_min, scale=data_scale)
    train_input, train_output = train_set[0], train_set[0]
    test_input,  test_output  = test_set[0],  test_set[0]
    ut.print_info('train (min, max): (%f, %f)' % (np.min(train_set[0]), np.max(train_set[0])))
    visual_inputs, visual_output = train_set[0][0:BATCH_SIZE], train_set[0][0:BATCH_SIZE]

    epoch_reconstruction = []

    EPOCH_SIZE = len(train_input) // BATCH_SIZE
    TEST_SIZE = len(test_input) // BATCH_SIZE

    assert_model(input_placeholder, output_placeholder, test_input, test_output, train_input, train_output, visual_inputs, visual_output)

    with pt.defaults_scope(activation_fn=activation_f,
                           # batch_normalize=True,
                           # learned_moments_update_rate=0.0003,
                           # variance_epsilon=0.001,
                           # scale_after_normalization=True
                           ):
        with pt.defaults_scope(phase=pt.Phase.train):
            with tf.variable_scope("model") as scope:
                output_tensor = decoder(encoder(input_placeholder), weight_init=weight_init)

    pretty_loss = loss(output_tensor, output_placeholder)

    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
    train = pt.apply_optimizer(optimizer, losses=[pretty_loss])

    init = tf.initialize_all_variables()
    runner = pt.train.Runner(save_path=FLAGS.save_path)

    best_q = 100000
    with tf.Session() as sess:
        sess.run(init)
        for epoch in xrange(epochs):
            # Shuffle the training data.
            additional_info = ''

            if epoch % np.ceil(epochs / 40.0) == 0 or epoch + 1 == epochs:
                reconstruct, loss_value = sess.run([output_tensor, pretty_loss], {input_placeholder: visual_inputs, output_placeholder: visual_output})
                epoch_reconstruction.append(reconstruct)
                additional_info += 'epoch:%d (min, max): (%f %f)' %(epoch, np.min(reconstruct), np.max(reconstruct))

            train_input, train_output = data_utils.permute_data(
                (train_input, train_output))

            runner.train_model(
                train,
                pretty_loss,
                EPOCH_SIZE,
                feed_vars=(input_placeholder, output_placeholder),
                feed_data=pt.train.feed_numpy(BATCH_SIZE, train_input, train_output),
                print_every=None
            )
            accuracy = runner.evaluate_model(
                pretty_loss,
                TEST_SIZE,
                feed_vars=(input_placeholder, output_placeholder),
                feed_data=pt.train.feed_numpy(BATCH_SIZE, test_input, test_output))
            ut.print_time('Accuracy after %2d/%d epoch %.2f; %s' % (epoch + 1, epochs, accuracy, additional_info))
            if best_q > accuracy:
                best_q = accuracy

        save_params = {'suf': 'mn_basic', 'act': activation_f, 'e': epochs, 'opt': optimizer, 'lr': learning_rate,
                       'init': weight_init, 'acu': int(best_q), 'bs': BATCH_SIZE, 'h': HIDDEN_0_SIZE, 'i':prefix}
        ut.reconstruct_images_epochs(np.asarray(epoch_reconstruction), visual_output, save_params=save_params)

    ut.print_time('Best Quality: %f for %s' % (best_q, ut.to_file_name(save_params)))
    ut.reset_start_time()
    return best_q