def main(_=None): print 'Starting Shakespeare' # Since we are feeding our data as numpy arrays, we need to create # placeholders in the graph. # These must then be fed using the feed dict. input_placeholder = tf.placeholder(tf.int32, [BATCH_SIZE, TIMESTEPS]) output_placeholder = tf.placeholder(tf.int32, [BATCH_SIZE, TIMESTEPS]) merged_size = BATCH_SIZE * TIMESTEPS inp = data_utils.reshape_data(input_placeholder) # We need a dense output to calculate loss and accuracy. # sparse_to_dense does a lookup using the indices from the first Tensor. # Because we are filling in a 2D array, the indices need to be 2 dimensional. t = tf.concat(1, [ tf.constant( numpy.arange(merged_size).reshape((merged_size, 1)), dtype=tf.int32), data_utils.reshape_data(output_placeholder) ]) labels = tf.sparse_to_dense(t, [merged_size, CHARS], 1.0, 0.0) # Some ops have different behaviors in test vs train and these take a phase # argument. with tf.variable_scope('shakespeare'): training_logits = create_model(inp, TIMESTEPS, pt.Phase.train) # Create the result. Softmax applies softmax and creates a cross entropy # loss. The result is a namedtuple. training_result = training_logits.softmax(labels) # Create the gradient optimizer and apply it to the graph. # pt.apply_optimizer adds regularization losses and sets up a step counter # (pt.global_step()) for you. optimizer = tf.train.AdagradOptimizer(0.5) train_op = pt.apply_optimizer(optimizer, losses=[training_result.loss]) # For tracking accuracy in evaluation, we need to add an evaluation node. # We only run this when testing, so we need to specify that in the phase. # We also want to disable dropout, so we pass the phase to create_model. # Call variable scope by name so we also create a name scope. This ensures # that we share variables and our names are properly organized. with tf.variable_scope('shakespeare', reuse=True): test_logits = create_model(inp, TIMESTEPS, pt.Phase.test) test_result = test_logits.softmax(labels) # Accuracy creates variables, so make it outside of the above scope. accuracy = test_result.softmax.evaluate_classifier(labels, phase=pt.Phase.test) # Create an inference model so that we can sample. The big difference is # that the input is a single character and it requires reset nodes. with tf.variable_scope('shakespeare', reuse=True): inference_input = tf.placeholder(tf.int32, []) # Needs to be 2 dimensional so that it matches the dims of the other models. reshaped = pt.wrap(inference_input).reshape([1, 1]) inference_logits = create_model(reshaped, 1, pt.Phase.infer) # Grab the data as numpy arrays. shakespeare = data_utils.shakespeare(TIMESTEPS + 1) shakespeare_in = shakespeare[:, :-1] shakespeare_out = shakespeare[:, 1:] # We can set a save_path in the runner to automatically checkpoint every so # often. Otherwise at the end of the session, the model will be lost. runner = pt.train.Runner(save_path=FLAGS.save_path) with tf.Session(): for epoch in xrange(FLAGS.epochs): # Shuffle the training data. shakespeare_in, shakespeare_out = data_utils.permute_data( (shakespeare_in, shakespeare_out)) runner.train_model(train_op, training_result.loss, len(shakespeare_in) / BATCH_SIZE, feed_vars=(input_placeholder, output_placeholder), feed_data=pt.train.feed_numpy( BATCH_SIZE, shakespeare_in, shakespeare_out), print_every=10) classification_accuracy = runner.evaluate_model( accuracy, len(shakespeare_in) / BATCH_SIZE, feed_vars=(input_placeholder, output_placeholder), feed_data=pt.train.feed_numpy(BATCH_SIZE, shakespeare_in, shakespeare_out)) print 'Next character accuracy after epoch %d: %g%%' % ( epoch + 1, classification_accuracy * 100) # Use a temperature smaller than 1 because the early stages of the model # don't assign much confidence. print sample(inference_input, inference_logits, max_length=128, temperature=0.5) # Print a sampling from the model. print sample(inference_input, inference_logits)
def main(_=None): print('Starting Shakespeare') # Since we are feeding our data as numpy arrays, we need to create # placeholders in the graph. # These must then be fed using the feed dict. input_placeholder = tf.placeholder(tf.int32, [BATCH_SIZE, TIMESTEPS]) output_placeholder = tf.placeholder(tf.int32, [BATCH_SIZE, TIMESTEPS]) merged_size = BATCH_SIZE * TIMESTEPS inp = data_utils.reshape_data(input_placeholder) # We need a dense output to calculate loss and accuracy. # sparse_to_dense does a lookup using the indices from the first Tensor. # Because we are filling in a 2D array, the indices need to be 2 dimensional. t = tf.concat(1, [ tf.constant( numpy.arange(merged_size).reshape((merged_size, 1)), dtype=tf.int32), data_utils.reshape_data(output_placeholder) ]) labels = tf.sparse_to_dense(t, [merged_size, CHARS], 1.0, 0.0) # Some ops have different behaviors in test vs train and these take a phase # argument. with tf.variable_scope('shakespeare'): training_logits = create_model(inp, TIMESTEPS, pt.Phase.train) # Create the result. Softmax applies softmax and creates a cross entropy # loss. The result is a namedtuple. training_result = training_logits.softmax(labels) # Create the gradient optimizer and apply it to the graph. # pt.apply_optimizer adds regularization losses and sets up a step counter # (pt.global_step()) for you. optimizer = tf.train.AdagradOptimizer(0.5) train_op = pt.apply_optimizer(optimizer, losses=[training_result.loss]) # For tracking accuracy in evaluation, we need to add an evaluation node. # We only run this when testing, so we need to specify that in the phase. # We also want to disable dropout, so we pass the phase to create_model. # Call variable scope by name so we also create a name scope. This ensures # that we share variables and our names are properly organized. with tf.variable_scope('shakespeare', reuse=True): test_logits = create_model(inp, TIMESTEPS, pt.Phase.test) test_result = test_logits.softmax(labels) # Accuracy creates variables, so make it outside of the above scope. accuracy = test_result.softmax.evaluate_classifier(labels, phase=pt.Phase.test) # Create an inference model so that we can sample. The big difference is # that the input is a single character and it requires reset nodes. with tf.variable_scope('shakespeare', reuse=True): inference_input = tf.placeholder(tf.int32, []) # Needs to be 2 dimensional so that it matches the dims of the other models. reshaped = pt.wrap(inference_input).reshape([1, 1]) inference_logits = create_model(reshaped, 1, pt.Phase.infer) # Grab the data as numpy arrays. shakespeare = data_utils.shakespeare(TIMESTEPS + 1) shakespeare_in = shakespeare[:, :-1] shakespeare_out = shakespeare[:, 1:] # We can set a save_path in the runner to automatically checkpoint every so # often. Otherwise at the end of the session, the model will be lost. runner = pt.train.Runner(save_path=FLAGS.save_path) with tf.Session(): for epoch in xrange(FLAGS.epochs): # Shuffle the training data. shakespeare_in, shakespeare_out = data_utils.permute_data( (shakespeare_in, shakespeare_out)) runner.train_model(train_op, training_result.loss, len(shakespeare_in) / BATCH_SIZE, feed_vars=(input_placeholder, output_placeholder), feed_data=pt.train.feed_numpy( BATCH_SIZE, shakespeare_in, shakespeare_out), print_every=10) classification_accuracy = runner.evaluate_model( accuracy, len(shakespeare_in) / BATCH_SIZE, feed_vars=(input_placeholder, output_placeholder), feed_data=pt.train.feed_numpy(BATCH_SIZE, shakespeare_in, shakespeare_out)) print('Next character accuracy after epoch %d: %g%%' % ( epoch + 1, classification_accuracy * 100)) # Use a temperature smaller than 1 because the early stages of the model # don't assign much confidence. print(sample(inference_input, inference_logits, max_length=128, temperature=0.5)) # Print a sampling from the model. print(sample(inference_input, inference_logits))
def main(_=None): print 'Starting Baby Names' # Since we are feeding our data as numpy arrays, we need to create # placeholders in the graph. # These must then be fed using the feed dict. input_placeholder = tf.placeholder(tf.int32, [BATCH_SIZE, TIMESTEPS]) output_placeholder = tf.placeholder(tf.float32, [BATCH_SIZE, SEXES]) inp = data_utils.reshape_data(input_placeholder) # Create a label for each timestep. labels = data_utils.reshape_data(tf.reshape( tf.tile(output_placeholder, [1, TIMESTEPS]), [BATCH_SIZE, TIMESTEPS, SEXES]), per_example_length=2) # We also need to set per example weights so that the softmax doesn't output a # prediction on intermediate nodes. length_placeholder = tf.placeholder(tf.int32, [BATCH_SIZE, 1]) # We need a dense multiplier for the per example weights. The only place # that has a non-zero loss is the first EOS after the last character of the # name; the characters in the name and the trailing EOS characters are given a # 0 loss by assigning the weight to 0.0 and in the end only one character in # each batch has a weight of 1.0. # sparse_to_dense does a lookup using the indices from the first Tensor. # Because we are filling in a 2D array, the indices need to be 2 dimensional. # Since we want to assign 1 value for each row, the first dimension can just # be a sequence. t = tf.concat(1, [ tf.constant(numpy.arange(BATCH_SIZE).reshape((BATCH_SIZE, 1)), dtype=tf.int32), length_placeholder ]) # Squeeze removes dimensions that are equal to 1. per_example_weights must # end up as 1 dimensional. per_example_weights = data_utils.reshape_data( tf.sparse_to_dense(t, [BATCH_SIZE, TIMESTEPS], 1.0, default_value=0.0)).squeeze() # We need 2 copies of the graph that share variables. The first copy runs # training and will do dropout if specified and the second will not include # dropout. Dropout is controlled by the phase argument, which sets the mode # consistently throughout a graph. with tf.variable_scope('baby_names'): result = create_model(inp, labels, TIMESTEPS, per_example_weights) # Call variable scope by name so we also create a name scope. This ensures # that we share variables and our names are properly organized. with tf.variable_scope('baby_names', reuse=True): # Some ops have different behaviors in test vs train and these take a phase # argument. test_result = create_model(inp, labels, TIMESTEPS, per_example_weights, phase=pt.Phase.test) # For tracking accuracy in evaluation, we need to add an evaluation node. # We only run this when testing, so we need to specify that in the phase. # Some ops have different behaviors in test vs train and these take a phase # argument. accuracy = test_result.softmax.evaluate_classifier( labels, phase=pt.Phase.test, per_example_weights=per_example_weights) # We can also compute a batch accuracy to monitor progress. batch_accuracy = result.softmax.evaluate_classifier( labels, phase=pt.Phase.train, per_example_weights=per_example_weights) # Grab the inputs, outputs and lengths as numpy arrays. # Lengths could have been calculated from names, but it was easier to # calculate inside the utility function. names, sex, lengths = data_utils.baby_names(TIMESTEPS) epoch_size = len(names) / BATCH_SIZE # Create the gradient optimizer and apply it to the graph. # pt.apply_optimizer adds regularization losses and sets up a step counter # (pt.global_step()) for you. # This sequence model does very well with initially high rates. optimizer = tf.train.AdagradOptimizer( tf.train.exponential_decay(1.0, pt.global_step(), epoch_size, 0.95, staircase=True)) train_op = pt.apply_optimizer(optimizer, losses=[result.loss]) # We can set a save_path in the runner to automatically checkpoint every so # often. Otherwise at the end of the session, the model will be lost. runner = pt.train.Runner(save_path=FLAGS.save_path) with tf.Session(): for epoch in xrange(100): # Shuffle the training data. names, sex, lengths = data_utils.permute_data( (names, sex, lengths)) runner.train_model( train_op, [result.loss, batch_accuracy], epoch_size, feed_vars=(input_placeholder, output_placeholder, length_placeholder), feed_data=pt.train.feed_numpy(BATCH_SIZE, names, sex, lengths), print_every=100) classification_accuracy = runner.evaluate_model( accuracy, epoch_size, print_every=0, feed_vars=(input_placeholder, output_placeholder, length_placeholder), feed_data=pt.train.feed_numpy(BATCH_SIZE, names, sex, lengths)) print 'Accuracy after epoch %d: %g%%' % ( epoch + 1, classification_accuracy * 100)
def main(_=None): print('Starting Baby Names') # Since we are feeding our data as numpy arrays, we need to create # placeholders in the graph. # These must then be fed using the feed dict. input_placeholder = tf.placeholder(tf.int32, [BATCH_SIZE, TIMESTEPS]) output_placeholder = tf.placeholder(tf.float32, [BATCH_SIZE, SEXES]) inp = data_utils.reshape_data(input_placeholder) # Create a label for each timestep. labels = data_utils.reshape_data( tf.reshape( tf.tile(output_placeholder, [1, TIMESTEPS]), [BATCH_SIZE, TIMESTEPS, SEXES]), per_example_length=2) # We also need to set per example weights so that the softmax doesn't output a # prediction on intermediate nodes. length_placeholder = tf.placeholder(tf.int32, [BATCH_SIZE, 1]) # We need a dense multiplier for the per example weights. The only place # that has a non-zero loss is the first EOS after the last character of the # name; the characters in the name and the trailing EOS characters are given a # 0 loss by assigning the weight to 0.0 and in the end only one character in # each batch has a weight of 1.0. # sparse_to_dense does a lookup using the indices from the first Tensor. # Because we are filling in a 2D array, the indices need to be 2 dimensional. # Since we want to assign 1 value for each row, the first dimension can just # be a sequence. t = tf.concat_v2( [ tf.constant( numpy.arange(BATCH_SIZE).reshape((BATCH_SIZE, 1)), dtype=tf.int32), length_placeholder ], 1) # Squeeze removes dimensions that are equal to 1. per_example_weights must # end up as 1 dimensional. per_example_weights = data_utils.reshape_data(tf.sparse_to_dense( t, [BATCH_SIZE, TIMESTEPS], 1.0, default_value=0.0)).squeeze() # We need 2 copies of the graph that share variables. The first copy runs # training and will do dropout if specified and the second will not include # dropout. Dropout is controlled by the phase argument, which sets the mode # consistently throughout a graph. with tf.variable_scope('baby_names'): result = create_model(inp, labels, TIMESTEPS, per_example_weights) # Call variable scope by name so we also create a name scope. This ensures # that we share variables and our names are properly organized. with tf.variable_scope('baby_names', reuse=True): # Some ops have different behaviors in test vs train and these take a phase # argument. test_result = create_model(inp, labels, TIMESTEPS, per_example_weights, phase=pt.Phase.test) # For tracking accuracy in evaluation, we need to add an evaluation node. # We only run this when testing, so we need to specify that in the phase. # Some ops have different behaviors in test vs train and these take a phase # argument. accuracy = test_result.softmax.evaluate_classifier( labels, phase=pt.Phase.test, per_example_weights=per_example_weights) # We can also compute a batch accuracy to monitor progress. batch_accuracy = result.softmax.evaluate_classifier( labels, phase=pt.Phase.train, per_example_weights=per_example_weights) # Grab the inputs, outputs and lengths as numpy arrays. # Lengths could have been calculated from names, but it was easier to # calculate inside the utility function. names, sex, lengths = data_utils.baby_names(TIMESTEPS) epoch_size = len(names) // BATCH_SIZE # Create the gradient optimizer and apply it to the graph. # pt.apply_optimizer adds regularization losses and sets up a step counter # (pt.global_step()) for you. # This sequence model does very well with initially high rates. optimizer = tf.train.AdagradOptimizer( tf.train.exponential_decay(1.0, pt.global_step(), epoch_size, 0.95, staircase=True)) train_op = pt.apply_optimizer(optimizer, losses=[result.loss]) # We can set a save_path in the runner to automatically checkpoint every so # often. Otherwise at the end of the session, the model will be lost. runner = pt.train.Runner(save_path=FLAGS.save_path) with tf.Session(): for epoch in xrange(100): # Shuffle the training data. names, sex, lengths = data_utils.permute_data((names, sex, lengths)) runner.train_model( train_op, [result.loss, batch_accuracy], epoch_size, feed_vars=(input_placeholder, output_placeholder, length_placeholder), feed_data=pt.train.feed_numpy(BATCH_SIZE, names, sex, lengths), print_every=100) classification_accuracy = runner.evaluate_model( accuracy, epoch_size, print_every=0, feed_vars=(input_placeholder, output_placeholder, length_placeholder), feed_data=pt.train.feed_numpy(BATCH_SIZE, names, sex, lengths)) print('Accuracy after epoch %d: %g%%' % ( epoch + 1, classification_accuracy * 100))