Example #1
0
    def __init__(self, decoder, l2_regularization):
        self.decoder = decoder

        self.copy_target_plc = [tf.placeholder(tf.int64, shape=[None]) for _ in decoder.copynet_logits]
        self.copy_w_plc = [tf.placeholder(tf.float32, shape=[None]) for _ in decoder.copynet_logits]

        copy_costs_in_time = [tf.nn.sparse_softmax_cross_entropy_with_logits(l, t) * w \
                for w, l, t in zip(self.copy_w_plc, decoder.copynet_logits, self.copy_target_plc)]

        copy_cost = sum([tf.reduce_sum(c) for c in copy_costs_in_time])
        tf.scalar_summary('train_copy_cost', copy_cost, collections=["summary_train"])
        tf.scalar_summary('val_copy_cost', copy_cost, collections=["summary_val"])

        with tf.variable_scope("l2_regularization"):
            l2_value = sum([tf.reduce_sum(v ** 2) for v in tf.trainable_variables()])
            if l2_regularization > 0:
                l2_cost = l2_regularization * l2_value
            else:
                l2_cost = 0.0

            tf.scalar_summary('train_l2_cost', l2_value, collections=["summary_train"])

        optimizer = tf.train.AdamOptimizer(1e-4)
        gradients = optimizer.compute_gradients(decoder.cost + copy_cost + l2_cost)
        #for (g, v) in gradients:
        #    if g is not None:
        #        tf.histogram_summary('gr_' + v.name, g, collections=["summary_gradients"])
        self.optimize_op = optimizer.apply_gradients(gradients, global_step=decoder.learning_step)
        #self.summary_gradients = tf.merge_summary(tf.get_collection("summary_gradients"))
        self.summary_train = tf.merge_summary(tf.get_collection("summary_train"))
        self.summary_val = tf.merge_summary(tf.get_collection("summary_val"))
Example #2
0
    def train(self, eval_on_test=False):
        """ Train model and save it to file.

        Train model with given hidden layers. Training data is created
        by prepare_training_data(), which must be called before this function.
        """
        tf.reset_default_graph()
        with tf.Session() as sess:
            feature_data = tf.placeholder("float", [None, self.num_predictors])
            labels = tf.placeholder("float", [None, self.num_classes])

            layers = [self.num_predictors] + self.hidden_layers + [self.num_classes]
            model = self.inference(feature_data, layers)
            cost, cost_summary_op = self.loss(model, labels)
            training_op = self.training(cost, learning_rate=0.0001)

            correct_prediction = tf.equal(tf.argmax(model, 1), tf.argmax(labels, 1))
            accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))

            # Merge all variable summaries and save the results to log file
            # summary_op = tf.merge_all_summaries()
            accuracy_op_train = tf.scalar_summary("Accuracy on Train", accuracy)
            summary_op_train = tf.merge_summary([cost_summary_op, accuracy_op_train])
            if eval_on_test:
                accuracy_op_test = tf.scalar_summary("Accuracy on Test", accuracy)
                summary_op_test = tf.merge_summary([accuracy_op_test])

            summary_writer = tf.train.SummaryWriter(self.log_dir + self.model_name, sess.graph)

            train_dict = {
                feature_data: self.training_predictors_tf.values,
                labels: self.training_classes_tf.values.reshape(len(self.training_classes_tf.values), self.num_classes)}

            if eval_on_test:
                test_dict = {
                    feature_data: self.test_predictors_tf.values,
                    labels: self.test_classes_tf.values.reshape(len(self.test_classes_tf.values), self.num_classes)}

            init = tf.initialize_all_variables()
            sess.run(init)

            for i in range(1, self.max_iteration):
                sess.run(training_op, feed_dict=train_dict)

                # Write summary to log
                if i % 100 == 0:
                    summary_str = sess.run(summary_op_train, feed_dict=train_dict)
                    summary_writer.add_summary(summary_str, i)
                    if eval_on_test:
                        summary_str = sess.run(summary_op_test, feed_dict=test_dict)
                        summary_writer.add_summary(summary_str, i)
                    summary_writer.flush()

                # Print current accuracy to console
                if i%5000 == 0:
                    print (i, sess.run(accuracy, feed_dict=train_dict))

            # Save trained parameters
            saver = tf.train.Saver()
            saver.save(sess, self.model_filename)
Example #3
0
    def summary(self):
      # Keep track of gradient values and sparsity (optional)
      grad_summaries = []
      for grad, var in self.grads_and_vars:
        if grad is not None:
          grad_hist_summary = tf.histogram_summary(var.op.name + '/gradients/hist', grad)
          sparsity_summary = tf.scalar_summary(var.op.name + '/gradients/sparsity', tf.nn.zero_fraction(grad))
          grad_summaries.append(grad_hist_summary)
          grad_summaries.append(sparsity_summary)

      grad_summaries_merged = tf.merge_summary(grad_summaries)

      # Output directory for models and summaries
      timestamp = str(int(time.time()))
      print("Writing to %s\n" % config.out_dir)

      # Summaries for loss and accuracy
      loss_summary = tf.scalar_summary("loss", self.loss)
      acc_summary = tf.scalar_summary("accuracy", self.accuracy)

      # Train Summaries
      self.train_summary_op = tf.merge_summary([loss_summary, acc_summary, grad_summaries_merged])
      train_summary_dir = os.path.join(config.out_dir, "summaries", "train")
      self.train_summary_writer = tf.train.SummaryWriter(train_summary_dir, self.sess.graph_def)

      # Dev summaries
      self.val_summary_op = tf.merge_summary([loss_summary, acc_summary])
      val_summary_dir = os.path.join(config.out_dir, "summaries", "val")
      self.val_summary_writer = tf.train.SummaryWriter(val_summary_dir, self.sess.graph_def)
Example #4
0
 def create_summaries(self):
     tf.scalar_summary("eval_cost", self.eval_cost,
                       collections=[EVAL_SUMMARIES_COLLECTION])
     tf.scalar_summary("eval_accuracy", self.eval_accuracy,
                       collections=[EVAL_SUMMARIES_COLLECTION])
     self.summaries = tf.merge_summary(
         tf.get_collection(tf.GraphKeys.SUMMARIES))
     self.eval_summaries = tf.merge_summary(tf.get_collection(
         EVAL_SUMMARIES_COLLECTION))
Example #5
0
    def define_summaries(self):
        '''Helper function for init_opt'''
        all_sum = {'g': [], 'd': [], 'hist': []}
        for k, v in self.log_vars:
            if k.startswith('g'):
                all_sum['g'].append(tf.scalar_summary(k, v))
            elif k.startswith('d'):
                all_sum['d'].append(tf.scalar_summary(k, v))
            elif k.startswith('hist'):
                all_sum['hist'].append(tf.histogram_summary(k, v))

        self.g_sum = tf.merge_summary(all_sum['g'])
        self.d_sum = tf.merge_summary(all_sum['d'])
        self.hist_sum = tf.merge_summary(all_sum['hist'])
Example #6
0
    def __init__(self, args, test):

        self.test = test
        self.reward = 0
        self.step_count = 0
        self.loss = 0.0
        self.loss_count = 0
        self.games = 0
        self.q_values = 0.0
        self.q_count = 0
        self.current_score = 0
        self.max_score = -1000000000
        self.min_score = 1000000000
        self.recording_frequency = args.recording_frequency

        with tf.device('/cpu:0'):
            self.spg = tf.placeholder(tf.float32, shape=[],
                                      name="score_per_game")
            self.mean_q = tf.placeholder(tf.float32, shape=[])
            self.total_gp = tf.placeholder(tf.float32, shape=[])
            self.max_r = tf.placeholder(tf.float32, shape=[])
            self.min_r = tf.placeholder(tf.float32, shape=[])
            self.time = tf.placeholder(tf.float32, shape=[])

            self.spg_summ = tf.scalar_summary('score_per_game', self.spg)
            self.q_summ = tf.scalar_summary('q_values', self.mean_q)
            self.gp_summ = tf.scalar_summary('steps_per_game', self.total_gp)
            self.max_summ = tf.scalar_summary('maximum_score', self.max_r)
            self.min_summ = tf.scalar_summary('minimum_score', self.min_r)
            self.time_summ = tf.scalar_summary('steps_per_second', self.time)

            if not test:
                self.mean_l = tf.placeholder(tf.float32, shape=[], name='loss')
                self.l_summ = tf.scalar_summary('loss', self.mean_l)
                self.summary_op = tf.merge_summary(
                    [self.spg_summ, self.q_summ, self.gp_summ, self.l_summ,
                     self.max_summ, self.min_summ, self.time_summ])
                self.path = (
                args.save_path + '/records/' + args.game + '/' + args.agent_type + '/' + args.agent_name + '/train')
            else:
                self.summary_op = tf.merge_summary(
                    [self.spg_summ, self.q_summ, self.gp_summ, self.max_summ,
                     self.min_summ, self.time_summ])
                self.path = (
                args.save_path + '/records/' + args.game + '/' + args.agent_type + '/' + args.agent_name + '/test')

            # self.summary_op = tf.merge_all_summaries()
            self.sess = tf.Session()
            self.summary_writer = tf.train.SummaryWriter(self.path)
            self.start_time = time.time()
Example #7
0
def get_stat():
    fields = ['loss', 'acc']

    stat = {}
    for phase in data._PHASES:
        if phase == data._TRAIN:
            iteration = sum([len(file) for file in files[data._TRAIN]]) / _BATCH_SIZE
        elif phase == data._VAL:
            iteration = sum([len(file) for file in files[data._VAL]]) / _BATCH_SIZE

        raw_averages = {field: (net[field], util.moving_average(net[field], iteration)) for field in fields}

        display = {}
        display.update({'%s_raw' % field: raw_averages[field][0] for field in fields})
        display.update({'%s_avg' % field: raw_averages[field][1] for field in fields})

        summaries = []
        summaries += [tf.scalar_summary('%s_%s_raw' % (data._NAME[phase], field), raw_averages[field][0]) for field in fields]
        summaries += [tf.scalar_summary('%s_%s_avg' % (data._NAME[phase], field), raw_averages[field][1]) for field in fields]
        summary = tf.merge_summary(summaries)

        stat[phase] = dict(
            iteration=iteration,
            display=display,
            summary=summary)

    return stat
 def train(self, x_train, y_train, x_test, y_test, n_epoch=10):
     """Train the cnn."""
     self.session = tf.Session()
     with self.session.as_default():
         optimizer = tf.train.AdamOptimizer(1e-3)
         grad_vars = optimizer.compute_gradients(self.loss)
         train_op = optimizer.apply_gradients(grad_vars)
         # summaries
         acc_summary = tf.scalar_summary('accuracy', self.accuracy)
         loss_summary = tf.scalar_summary('loss', self.loss)
         summary_op = tf.merge_summary([acc_summary, loss_summary])
         summary_dir = os.path.join('cnn_logs', 'summaries')
         summary_writer = tf.train.SummaryWriter(summary_dir, self.session.graph)
         # Init session
         self.session.run(tf.initialize_all_variables())
         # Create the batch iterator
         batches = batch_iterator(list(zip(x_train, y_train)), 64, n_epoch)
         # Train loop
         i = 0
         for batch in batches:
             x_batch, y_batch = zip(*batch)
             # train step
             feed_dict = {self.x: x_batch, self.y_: y_batch, self.keep_prob: 0.5}
             _, summaries, loss, accuracy = self.session.run([train_op, summary_op, self.loss, self.accuracy], feed_dict)
             time = datetime.datetime.now().isoformat()
             i += 1
             print("%s : step %s || loss %s , acc %s" % (time, i, loss, accuracy))
             summary_writer.add_summary(summaries, i)
             # Evaluation on test set every 100 steps
             if i % 100 == 0:
                 print("\nEvaluation on test-set")
                 feed_dict = {self.x: x_test, self.y_: y_test, self.keep_prob: 1.0}
                 _, loss, accuracy = self.session.run([train_op, self.loss, self.accuracy], feed_dict)
                 print("%s : step %s || loss %s , acc %s" % (time, i, loss, accuracy))
                 print("")
Example #9
0
    def full_model(data):
        output_logits, queue_updates = predictor(data)
        output_logits = output_logits[:, :SIG_LEN-1, :]
        output_mean = tf.argmax(output_logits, dimension=2)

        targets = data[:, 1:]
        quantized_targets = quantizer(targets, QUANT_LOWER, QUANT_UPPER, QUANT_LEVELS)
        with tf.name_scope('error'):
            batch_error = tf.reduce_mean(tf.reduce_sum(tf.nn.sparse_softmax_cross_entropy_with_logits(output_logits, quantized_targets), reduction_indices=[1]))

            error_summary = tf.scalar_summary('training error', (running_error + batch_error)/(num_runs + 1.0))
        output_plot = crappy_plot(output_mean, QUANT_LEVELS)
        target_plot = crappy_plot(quantized_targets, QUANT_LEVELS)

        M = tf.reduce_max(output_logits)
        m = tf.reduce_min(output_logits)
        scaled_logits = (output_logits-m)/(M-m)
        # image = draw_on(tf.transpose(scaled_logits, perm=[0, 2, 1])[:, :, :, None], target_plot, [1.0, 0.0, 0.0])
        # Casting is to work around some stupid tf bug; shouldn't be necessary
        output_probs = tf.reshape(tf.cast(tf.nn.softmax(tf.reshape(tf.cast(output_logits, tf.float64), [-1, QUANT_LEVELS])), tf.float32), [-1, SIG_LEN-1, QUANT_LEVELS])
        image = draw_on(tf.transpose(output_probs, perm=[0, 2, 1])[:, :, :, None], target_plot, [1.0, 0.0, 0.0])


        # image = draw_on(1.0, target_plot, [1.0, 0.0, 0.0])    # The first 1.0 starts with a white canvas
        # image = draw_on(image, output_plot, [0.0, 0.0, 1.0])

        sample_summary = tf.image_summary('posterior_sample', image, 5)
        summaries = tf.merge_summary([error_summary, sample_summary])
        return output_mean, queue_updates, batch_error, batch_error, summaries #+ 0.1*weight_decay
    def prepare_loss(self, entropy_beta):

        with tf.device(self._device), tf.name_scope(self.network_name):
            if self._continuous_mode:
                policy_loss, entropy, summaries = self._prepare_policy_loss_continuous(entropy_beta)
            else:
                policy_loss, entropy, summaries = self._prepare_policy_loss_discrete(entropy_beta)


            # R (input for value)
            self.r = tf.placeholder("float", [1],name="reward")
            # value loss (output)
            # (Learning rate for Critic is half of Actor's, so multiply by 0.5)
            value_loss = 0.5 * tf.nn.l2_loss(self.r - self.v)

            # gradienet of policy and value are summed up
            self.total_loss = policy_loss + value_loss


            # todo: unclear if i really need these
            l = []
            l.extend(summaries)
            l += [tf.scalar_summary(["R"], self.r)]
            l += [tf.scalar_summary(["(R-V)"], self.td)]
            l += [tf.scalar_summary("V (loss eval)", tf.reduce_mean(self.v))] # tf.reshape(self.v, (1,)))]
            l += [tf.scalar_summary(["V (r-td)"], self.r - self.td)]
            l += [tf.scalar_summary("entropy", tf.reduce_mean(entropy))] # tf.reshape(entropy, (1,)))]
            l += [tf.scalar_summary("policy_loss", tf.reduce_mean(policy_loss))] # tf.reshape(policy_loss, (1,)))]    # TODO: HACK: when we do batch mode, will want a histogram and ditch the reshape, most likely?
            l += [tf.scalar_summary("value_loss", value_loss)]

            self.loss_summary_op = tf.merge_summary(l)
Example #11
0
 def testMergeSummary(self):
   with self.test_session() as sess:
     const = tf.constant(10.0)
     summ1 = tf.histogram_summary("h", const, name="histo")
     summ2 = tf.scalar_summary("c", const, name="summ")
     merge = tf.merge_summary([summ1, summ2])
     value = sess.run(merge)
   self.assertEqual([], merge.get_shape())
   self.assertProtoEquals("""
     value {
       tag: "h"
       histo {
         min: 10.0
         max: 10.0
         num: 1.0
         sum: 10.0
         sum_squares: 100.0
         bucket_limit: 9.93809490288
         bucket_limit: 10.9319043932
         bucket_limit: 1.79769313486e+308
         bucket: 0.0
         bucket: 1.0
         bucket: 0.0
       }
     }
     value { tag: "c" simple_value: 10.0 }
   """, self._AsSummary(value))
  def testSummaries(self):
    with self.cached_session() as s:
      var = tf.Variable([1, 2, 3], dtype=tf.float32)
      s.run(tf.initialize_all_variables())
      x, y = np.meshgrid(np.linspace(-10, 10, 256), np.linspace(-10, 10, 256))
      image = np.sin(x**2 + y**2) / np.sqrt(x**2 + y**2) * .5 + .5
      image = image[None, :, :, None]

      # make a dummy sound
      freq = 440  # A = 440Hz
      sampling_frequency = 11000
      audio = np.sin(2 * np.pi * np.linspace(0, 1, sampling_frequency) * freq)
      audio = audio[None, :, None]
      test_dir = tempfile.mkdtemp()
      # test summaries
      writer = tf.train.SummaryWriter(test_dir)
      summaries = [
          tf.scalar_summary("scalar_var", var[0]),
          tf.scalar_summary("scalar_reduce_var", tf.reduce_sum(var)),
          tf.histogram_summary("var_histogram", var),
          tf.image_summary("sin_image", image),
          tf.audio_summary("sin_wave", audio, sampling_frequency),
      ]
      run_summaries = s.run(summaries)
      writer.add_summary(s.run(tf.merge_summary(inputs=run_summaries)))
      # This is redundant, but we want to be able to rewrite the command
      writer.add_summary(s.run(tf.merge_all_summaries()))
      writer.close()
      shutil.rmtree(test_dir)
Example #13
0
    def testCanBeCalledMultipleTimes(self):
        batch_size = 20
        val_input_batch = [tf.zeros([2, 3, 4])]
        lbl_input_batch = tf.ones([], dtype=tf.int32)
        probs = np.array([0, 1, 0, 0, 0])
        batches = tf.contrib.training.stratified_sample(
            val_input_batch, lbl_input_batch, probs, batch_size, init_probs=probs
        )
        batches += tf.contrib.training.stratified_sample(
            val_input_batch, lbl_input_batch, probs, batch_size, init_probs=probs
        )
        batches += tf.contrib.training.stratified_sample_unknown_dist(
            val_input_batch, lbl_input_batch, probs, batch_size
        )
        batches += tf.contrib.training.stratified_sample_unknown_dist(
            val_input_batch, lbl_input_batch, probs, batch_size
        )
        summary_op = tf.merge_summary(tf.get_collection(tf.GraphKeys.SUMMARIES))

        with self.test_session() as sess:
            coord = tf.train.Coordinator()
            threads = tf.train.start_queue_runners(coord=coord)

            sess.run(batches + (summary_op,))

            coord.request_stop()
            coord.join(threads)
 def __setup_ops(self):
     cross_entropy = -tf.reduce_sum(self.actual_class * tf.log(self.output))
     self.summary = tf.scalar_summary(self.label, cross_entropy)
     self.train_op = tf.train.AdamOptimizer(0.0001).minimize(cross_entropy)
     self.merge_summaries = tf.merge_summary([self.summary])
     correct_prediction = tf.equal(tf.argmax(self.output,1), tf.argmax(self.actual_class,1))
     self.accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
Example #15
0
def setup_summaries(sess, env_id, args):
    ROOT_LOG_DIR = constants.LOG_FILE #os.getcwd() + "/tf-log/"
    TODAY_LOG_DIR = ROOT_LOG_DIR + "/" + datetime.now().date().isoformat()

    LOG_DIR = TODAY_LOG_DIR + "/" + datetime.now().time().replace(second=0, microsecond=0).isoformat()[0:-3].replace(':', '.')

    LOG_DIR += " %s" % env_id #env.spec.id # args.gym_env
    LOG_DIR += " lr=%f" % args.initial_learning_rate
    LOG_DIR += " hs=%s" % args.hidden_sizes
    LOG_DIR += " lstms=%s " % args.lstm_sizes

    if len(args.tag) > 0:
        LOG_DIR += " -- %s" % args.tag


    score_input = tf.placeholder(tf.float32,name="score_input")
    score_input_avg = tf.placeholder(tf.float32,name="score_input_avg")
    score_smooth = tf.Variable(dtype=tf.float32, initial_value=0, name="score_avg")
    score_smooth_assign_op = tf.assign(score_smooth, score_input * 0.01 + score_smooth * 0.99)

    score_summary_op = [tf.merge_summary([
            tf.scalar_summary("score", score_input),
            tf.scalar_summary("score_avg", score_input_avg),
            tf.scalar_summary("score_smooth", score_smooth),
        ]),
        score_smooth_assign_op]

    from collections import deque

    moving_avg_scores = deque(maxlen=100)


    # summary_op = tf.merge_all_summaries()
    summary_writer = tf.train.SummaryWriter(LOG_DIR, sess.graph_def)

    print("logs written to: %s " % LOG_DIR)
    print("tensorboard --logdir=%s" % LOG_DIR)

    # v1
    def _record_score_fn(sess, summary_writer, score, global_t):

        moving_avg_scores.append(score)
        score_avg = np.mean(moving_avg_scores)

        summary_str, _ = sess.run(score_summary_op, feed_dict={
            score_input: score,
            score_input_avg: score_avg
        })

        moving_avg_scores.append(score)


        # print "record_score_fn:", summary_str
        summary_writer.add_summary(summary_str, global_t)





    return summary_writer, _record_score_fn
    def build_eval_graph(self):
        # Keep track of the totals while running through the batch data
        self.total_loss = tf.Variable(0.0, trainable=False, collections=[])
        self.total_correct = tf.Variable(0.0, trainable=False, collections=[])
        self.example_count = tf.Variable(0.0, trainable=False, collections=[])

        # Calculates the means
        self.mean_loss = self.total_loss / self.example_count
        self.accuracy = self.total_correct / self.example_count

        # Operations to modify to the stateful variables
        inc_total_loss = self.total_loss.assign_add(self.model.total_loss)
        inc_total_correct = self.total_correct.assign_add(
            tf.reduce_sum(tf.cast(self.model.correct_predictions, "float")))
        inc_example_count = self.example_count.assign_add(self.model.batch_size)

        # Operation to reset all the stateful vars. Should be called before starting a data set evaluation.
        with tf.control_dependencies(
                [self.total_loss.initializer, self.total_correct.initializer, self.example_count.initializer]):
            self.eval_reset = tf.no_op()

        # Operation to modify the stateful variables with data from one batch
        # Should be called for each batch in the evaluatin set
        with tf.control_dependencies([inc_total_loss, inc_total_correct, inc_example_count]):
            self.eval_step = tf.no_op()

        # Summaries
        summary_mean_loss = tf.scalar_summary("mean_loss", self.mean_loss)
        summary_acc = tf.scalar_summary("accuracy", self.accuracy)
        self.summaries = tf.merge_summary([summary_mean_loss, summary_acc])
Example #17
0
def evaluate(dataset_path):
  """Evaluate model on Dataset for a number of steps."""
  with tf.Graph().as_default(), tf.device('/cpu:0'):
    train_dir = Path(FLAGS.checkpoint_dir)
    reference_shape = mio.import_pickle(train_dir / 'reference_shape.pkl')
    
    images, gt_truth, inits, _ = data_provider.batch_inputs(
            [dataset_path], reference_shape,
            batch_size=FLAGS.batch_size, is_training=False)

    mirrored_images, _, mirrored_inits, shapes = data_provider.batch_inputs(
        [dataset_path], reference_shape,
        batch_size=FLAGS.batch_size, is_training=False, mirror_image=True)

    print('Loading model...')
    # Build a Graph that computes the logits predictions from the
    # inference model.
    with tf.device(FLAGS.device):
        patch_shape = (FLAGS.patch_size, FLAGS.patch_size)
        pred, _, _ = mdm_model.model(images, inits, patch_shape=patch_shape)

        tf.get_variable_scope().reuse_variables()

        pred_mirrored, _, _ = mdm_model.model(
            mirrored_images, mirrored_inits, patch_shape=patch_shape)

    pred_images, = tf.py_func(utils.batch_draw_landmarks,
            [images, pred], [tf.float32])
    gt_images, = tf.py_func(utils.batch_draw_landmarks,
            [images, gt_truth], [tf.float32])

    summaries = []
    summaries.append(tf.image_summary('images',
        tf.concat(2, [gt_images, pred_images]), max_images=5))
    
    avg_pred = pred + tf.py_func(flip_predictions, (pred_mirrored, shapes), (tf.float32, ))[0]
    avg_pred /= 2.

    # Calculate predictions.
    norm_error = mdm_model.normalized_rmse(avg_pred, gt_truth)

    # Restore the moving average version of the learned variables for eval.
    variable_averages = tf.train.ExponentialMovingAverage(
        mdm_train.MOVING_AVERAGE_DECAY)
    variables_to_restore = variable_averages.variables_to_restore()
    saver = tf.train.Saver(variables_to_restore)

    # Build the summary operation based on the TF collection of Summaries.
    summary_op = tf.merge_summary(summaries)

    graph_def = tf.get_default_graph().as_graph_def()
    summary_writer = tf.train.SummaryWriter(FLAGS.eval_dir,
                                            graph_def=graph_def)

    while True:
      _eval_once(saver, summary_writer, norm_error, summary_op)
      if FLAGS.run_once:
        break
      time.sleep(FLAGS.eval_interval_secs)
Example #18
0
def main(graph_path, Model, stream, validstream, continue_training=False, 
        start_model=None, start_ind=0, save_every=1):
    """Run a complete training session. Will load a saved model to continue training
    if provided. After every epoch the current model will be saved, and the tensorboard
    will graph new data.
    """  
    with tf.Graph().as_default(), tf.Session() as session:
        initializer = tf.random_uniform_initializer(-Config.init_scale,
                                                     Config.init_scale)
        with tf.variable_scope("model", reuse=None, initializer=initializer):
            m = Model(config=Config)

        tf.initialize_all_variables().run()
        saver = tf.train.Saver(max_to_keep=Config.num_models)
        if continue_training:
            print("Continuing training from saved model ",start_model)
            saver.restore(session,start_model)
        writer = tf.train.SummaryWriter(graph_path, max_queue=3) 
        last3 = []
        learning_rate = Config.learning_rate
        session.run(tf.assign(m.lr, learning_rate))
        tol = 0.001
        for i in range(start_ind, start_ind+Config.num_epochs):
            print("EPOCH: %s"%i)
            print("learning_rate: %s"%learning_rate)
            epoch_cost, median_cost, max_cost = m.run_epoch(session, stream.get_sents(), True)   
            print("Total cost for EPOCH: %s"%i)
            print(epoch_cost)
            print("Median cost: %s"%median_cost)
            print("Max cost: %s"%max_cost)
            accuracy = m.run_epoch(session, validstream.get_sents(), False)
            print("accuracy: %s"%accuracy)
            summ1 = tf.scalar_summary("epoch_cost", tf.constant(epoch_cost))
            summ2 = tf.scalar_summary("median_cost", tf.constant(median_cost))
            summ3 = tf.scalar_summary("max_cost", tf.constant(max_cost))
            summ4 = tf.scalar_summary("learning_rate", tf.constant(learning_rate))
            summ5 = tf.scalar_summary("accuracy", tf.constant(accuracy))
            merge = tf.merge_summary([summ1, summ2, summ3, summ4, summ5])
            writer.add_summary(merge.eval(), i)
            if i % save_every == 0:
                saver.save(session, model_dir + 'saved-lstm-model', global_step=i)
            if len(last3) == 3:
                h = max(last3)
                if last3[2] == h:
                    learning_rate = learning_rate/2
                    session.run(tf.assign(m.lr, learning_rate))
                elif last3[1] == h:
                    if (last3[1] - last3[2])/last3[1] < tol:
                        learning_rate = learning_rate/2
                        session.run(tf.assign(m.lr, learning_rate))
                else:
                    if (h - min(last3))/h < tol:
                        learning_rate = learning_rate/2
                        session.run(tf.assign(m.lr, learning_rate))
                last3 = last3[1:] + [median_cost]
            elif len(last3) < 3:
                last3 = last3 + [median_cost]
            else:
                raise Exception
def record_summary():
    w3_summary = tf.scalar_summary("weight 3", w3)
    w2_summary = tf.scalar_summary("weight 2", w2)
    w1_summary = tf.scalar_summary("weight 1", w1)
    w0_summary = tf.scalar_summary("weight 0", w0)
    loss_summary = tf.scalar_summary("loss", loss)
    m = tf.merge_summary([w3_summary, w2_summary, w1_summary, w0_summary, loss_summary]) 
    return m
Example #20
0
def setup_validation_summary():
    acc = tf.placeholder(tf.float32)
    auc = tf.placeholder(tf.float32)
    valid_summaries = [
        tf.summary.scalar('validation/acc', acc),
        tf.summary.scalar('validation/auc', auc)
    ]
    return tf.merge_summary(valid_summaries), acc, auc
  def __init__(self, num_outputs, reuse=False, trainable=True):
    self.num_outputs = num_outputs

    # Placeholders for our input
    # Our input are 4 RGB frames of shape 160, 160 each
    self.states = tf.placeholder(shape=[None, 84, 84, 4], dtype=tf.uint8, name="X")
    # The TD target value
    self.targets = tf.placeholder(shape=[None], dtype=tf.float32, name="y")
    # Integer id of which action was selected
    self.actions = tf.placeholder(shape=[None], dtype=tf.int32, name="actions")

    # Normalize
    X = tf.to_float(self.states) / 255.0
    batch_size = tf.shape(self.states)[0]

    # Graph shared with Value Net
    with tf.variable_scope("shared", reuse=reuse):
      fc1 = build_shared_network(X, add_summaries=(not reuse))


    with tf.variable_scope("policy_net"):
      self.logits = tf.contrib.layers.fully_connected(fc1, num_outputs, activation_fn=None)
      self.probs = tf.nn.softmax(self.logits) + 1e-8

      self.predictions = {
        "logits": self.logits,
        "probs": self.probs
      }

      # We add entropy to the loss to encourage exploration
      self.entropy = -tf.reduce_sum(self.probs * tf.log(self.probs), 1, name="entropy")
      self.entropy_mean = tf.reduce_mean(self.entropy, name="entropy_mean")

      # Get the predictions for the chosen actions only
      gather_indices = tf.range(batch_size) * tf.shape(self.probs)[1] + self.actions
      self.picked_action_probs = tf.gather(tf.reshape(self.probs, [-1]), gather_indices)

      self.losses = - (tf.log(self.picked_action_probs) * self.targets + 0.01 * self.entropy)
      self.loss = tf.reduce_sum(self.losses, name="loss")

      tf.scalar_summary(self.loss.op.name, self.loss)
      tf.scalar_summary(self.entropy_mean.op.name, self.entropy_mean)
      tf.histogram_summary(self.entropy.op.name, self.entropy)

      if trainable:
        # self.optimizer = tf.train.AdamOptimizer(1e-4)
        self.optimizer = tf.train.RMSPropOptimizer(0.00025, 0.99, 0.0, 1e-6)
        self.grads_and_vars = self.optimizer.compute_gradients(self.loss)
        self.grads_and_vars = [[grad, var] for grad, var in self.grads_and_vars if grad is not None]
        self.train_op = self.optimizer.apply_gradients(self.grads_and_vars,
          global_step=tf.contrib.framework.get_global_step())

    # Merge summaries from this network and the shared network (but not the value net)
    var_scope_name = tf.get_variable_scope().name
    summary_ops = tf.get_collection(tf.GraphKeys.SUMMARIES)
    sumaries = [s for s in summary_ops if "policy_net" in s.name or "shared" in s.name]
    sumaries = [s for s in summary_ops if var_scope_name in s.name]
    self.summaries = tf.merge_summary(sumaries)
Example #22
0
def keep_tracking(grads_and_vars, cnn, sess):
    # Keep track of gradient values and sparsity (optional)
    grad_summaries = []
    for g, v in grads_and_vars:
        if g is not None:
            grad_hist_summary = tf.histogram_summary(
                "{}/grad/hist".format(v.name), g)
            sparsity_summary = tf.scalar_summary(
                "{}/grad/sparsity".format(v.name), tf.nn.zero_fraction(g))
            grad_summaries.append(grad_hist_summary)
            grad_summaries.append(sparsity_summary)
    grad_summaries_merged = tf.merge_summary(grad_summaries)

    # Output directory for models and summaries
    timestamp = str(int(time.time()))
    out_dir = os.path.abspath(os.path.join(os.path.pardir, "runs", timestamp))
    print("Writing to {}\n".format(out_dir))

    # Summaries for loss and accuracy
    loss_summary = tf.scalar_summary("loss", cnn.loss)
    acc_summary = tf.scalar_summary("accuracy", cnn.accuracy)

    # Train Summaries
    train_summary_op = tf.merge_summary(
        [loss_summary, acc_summary, grad_summaries_merged])
    train_summary_dir = os.path.join(out_dir, "summaries", "train")
    train_summary_writer = tf.train.SummaryWriter(
        train_summary_dir, sess.graph_def)

    # Dev summaries
    dev_summary_op = tf.merge_summary([loss_summary, acc_summary])
    dev_summary_dir = os.path.join(out_dir, "summaries", "dev")
    dev_summary_writer = tf.train.SummaryWriter(
        dev_summary_dir, sess.graph_def)

    # Checkpoint directory. Tensorflow assumes this directory
    # already exists so we need to create it
    checkpoint_dir = os.path.abspath(os.path.join(out_dir, "checkpoints"))
    checkpoint_prefix = os.path.join(checkpoint_dir, "model")
    if not os.path.exists(checkpoint_dir):
        os.makedirs(checkpoint_dir)
    saver = tf.train.Saver(tf.all_variables())

    return train_summary_op, train_summary_writer, dev_summary_op, \
        dev_summary_dir, dev_summary_writer, checkpoint_prefix, saver
Example #23
0
def merge_summary(summaries):
    """
    Merge several summaries into one.

    :param summaries: Iterable of summaries.
    :return: An object that could be fed to :method:`SummaryWriter.add`
    """
    summaries = flatten_list(maybe_iterable_to_list(summaries))
    return tf.merge_summary(summaries)
Example #24
0
 def writeSummary(self):
     self.mergedSummary = tf.merge_summary([
         self.s_loss,
         self.s_recon,
         self.s_l1,
         self.s_l1_mean,
         self.h_input,
         self.h_recon,
         self.h_v1_w,
         self.h_v1_a,
         self.h_normVals,
         self.s_errorStd,
         self.s_s_nnz
         ])
     self.imageSummary = tf.merge_summary([
         self.i_w, self.i_orig, self.i_recon
         ])
     self.train_writer = tf.train.SummaryWriter(self.tfDir + "/train", self.sess.graph)
Example #25
0
  def add_summaries(self):
    """
    Adds summaries for the following variables to the graph and returns
    an operation to evaluate them.
     * loss (raw)
     * loss (moving average)

    """
    loss = tf.scalar_summary("loss (raw)", self.loss)
    return tf.merge_summary([loss])
Example #26
0
def setup_validation_summary():
    loss = tf.placeholder(tf.float32)
    acc = tf.placeholder(tf.float32)
    auc = tf.placeholder(tf.float32)
    valid_summaries = [
        tf.summary.scalar("validation/loss", loss),
        tf.summary.scalar("validation/acc", acc),
        tf.summary.scalar("validation/auc", auc),
    ]
    return tf.merge_summary(valid_summaries), loss, acc, auc
Example #27
0
    def create_summaries(self):
        # Eval cost and accuracy
        self.eval_cost = tf.Variable(0.0, name='eval_cost', trainable=False)
        self.eval_accuracy = tf.Variable(0.0, name='eval_accuracy',
                                         trainable=False)
        tf.scalar_summary("eval_cost", self.eval_cost,
                          collections=[train.EVAL_SUMMARIES_COLLECTION])
        tf.scalar_summary("eval_accuracy", self.eval_accuracy,
                          collections=[train.EVAL_SUMMARIES_COLLECTION])

        # Images
        image = tf.reshape(self.model_vars['x'],
                           (self.batcher.batch_size, 28, 28, 1))
        tf.image_summary("mnist", image, max_images=10)

        self.summaries = tf.merge_summary(
            tf.get_collection(tf.GraphKeys.SUMMARIES))
        self.eval_summaries = tf.merge_summary(tf.get_collection(
            train.EVAL_SUMMARIES_COLLECTION))
Example #28
0
 def visualization(self, n):
     fake_sum_train, superimage_train = \
         self.visualize_one_superimage(self.fake_images[:n * n],
                                       self.images[:n * n],
                                       n, "train")
     fake_sum_test, superimage_test = \
         self.visualize_one_superimage(self.fake_images[n * n:2 * n * n],
                                       self.images[n * n:2 * n * n],
                                       n, "test")
     self.superimages = tf.concat(0, [superimage_train, superimage_test])
     self.image_summary = tf.merge_summary([fake_sum_train, fake_sum_test])
  def __init__(self, reuse=False, trainable=True):
    # Placeholders for our input
    # Our input are 4 RGB frames of shape 160, 160 each
    self.states = tf.placeholder(shape=[None, 84, 84, 4], dtype=tf.uint8, name="X")
    # The TD target value
    self.targets = tf.placeholder(shape=[None], dtype=tf.float32, name="y")

    X = tf.to_float(self.states) / 255.0
    batch_size = tf.shape(self.states)[0]

    # Graph shared with Value Net
    with tf.variable_scope("shared", reuse=reuse):
      fc1 = build_shared_network(X, add_summaries=(not reuse))

    with tf.variable_scope("value_net"):
      self.logits = tf.contrib.layers.fully_connected(
        inputs=fc1,
        num_outputs=1,
        activation_fn=None)
      self.logits = tf.squeeze(self.logits, squeeze_dims=[1], name="logits")

      self.losses = tf.squared_difference(self.logits, self.targets)
      self.loss = tf.reduce_sum(self.losses, name="loss")

      self.predictions = {
        "logits": self.logits
      }

      # Summaries
      prefix = tf.get_variable_scope().name
      tf.scalar_summary(self.loss.name, self.loss)
      tf.scalar_summary("{}/max_value".format(prefix), tf.reduce_max(self.logits))
      tf.scalar_summary("{}/min_value".format(prefix), tf.reduce_min(self.logits))
      tf.scalar_summary("{}/mean_value".format(prefix), tf.reduce_mean(self.logits))
      tf.scalar_summary("{}/reward_max".format(prefix), tf.reduce_max(self.targets))
      tf.scalar_summary("{}/reward_min".format(prefix), tf.reduce_min(self.targets))
      tf.scalar_summary("{}/reward_mean".format(prefix), tf.reduce_mean(self.targets))
      tf.histogram_summary("{}/reward_targets".format(prefix), self.targets)
      tf.histogram_summary("{}/values".format(prefix), self.logits)

      if trainable:
        # self.optimizer = tf.train.AdamOptimizer(1e-4)
        self.optimizer = tf.train.RMSPropOptimizer(0.00025, 0.99, 0.0, 1e-6)
        self.grads_and_vars = self.optimizer.compute_gradients(self.loss)
        self.grads_and_vars = [[grad, var] for grad, var in self.grads_and_vars if grad is not None]
        self.train_op = self.optimizer.apply_gradients(self.grads_and_vars,
          global_step=tf.contrib.framework.get_global_step())

    var_scope_name = tf.get_variable_scope().name
    summary_ops = tf.get_collection(tf.GraphKeys.SUMMARIES)
    sumaries = [s for s in summary_ops if "policy_net" in s.name or "shared" in s.name]
    sumaries = [s for s in summary_ops if var_scope_name in s.name]
    self.summaries = tf.merge_summary(sumaries)
      def bucket_net(bucket_size, stacked_lstm):
          # ----------------------------------------------------------------------------------------------
          # Placeholders
          train_tokens = list()
          train_labels = list()
          for i in range(bucket_size-1):
            train_tokens.append(tf.placeholder(tf.int64, shape=[None], name='x_'+str(i)))
            train_labels.append(tf.placeholder(tf.float32, shape=[None, vocabulary_size], name='x_'+str(i+1)))

          embedding_inputs = list()
          for i in range(len(train_tokens)):
              embedding_inputs.append(tf.nn.embedding_lookup(embeddings, train_tokens[i]))

          visual_outputs = tf.placeholder(tf.float32, shape=[None, FLAGS.visual_dim], name='visual-embedding')

          # ----------------------------------------------------------------------------------------------
          # Unrolled LSTM loop.
          outputs, final_state = tf.nn.rnn(stacked_lstm, embedding_inputs, dtype=tf.float32)
          final_state = tf.concat(1,[_state.c for _state in final_state]) if FLAGS.lstm_stacked_layers > 1 else final_state.c
          tf.get_variable_scope().reuse_variables()
          logits = tf.matmul(tf.concat(0, outputs), state2text_weight) + state2text_bias
          deep_prediction = tf.matmul(final_state, state2vis_weight) + state2vis_bias
          visual_prediction = tf.nn.relu(wide_prediction+deep_prediction) if FLAGS.dowide else tf.nn.relu(deep_prediction)

          # ----------------------------------------------------------------------------------------------
          # Losses
          lstm_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits, tf.concat(0, train_labels)))
          visual_loss = tf.reduce_mean(tf.square(visual_prediction - visual_outputs))
          loss = lstm_loss +  visual_loss

          # ----------------------------------------------------------------------------------------------
          # Tensorboard data: loss summaries
          if FLAGS.boarddata:
              lstm_loss_summary = tf.scalar_summary('loss/lstm_loss', lstm_loss)
              visual_loss_summary = tf.scalar_summary('loss/visual_loss', visual_loss)
              loss_summary = tf.scalar_summary('loss/loss', loss)
              summaries = tf.merge_summary([loss_summary, lstm_loss_summary, visual_loss_summary])
          else: summaries = None

          #----------------------------------------------------------------------------------------------
          # Optimizer.
          def optimizer(someloss):
              global_step = tf.Variable(0)
              optimizer = tf.train.AdamOptimizer(learning_rate=0.001)
              gradients, v = zip(*optimizer.compute_gradients(someloss))
              gradients, _ = tf.clip_by_global_norm(gradients, 1.25)
              optimizer = optimizer.apply_gradients(zip(gradients, v), global_step=global_step)
              return optimizer

          return Model(bucket_size, train_tokens, train_labels, visual_outputs, visual_prediction, loss, lstm_loss, visual_loss,
                   optimizer(loss), optimizer(lstm_loss), optimizer(visual_loss), summaries)
Example #31
0
	def define_model(self):
		'''
		定义我的的计算图谱
		'''
		def model(data_flow, train=True):
			'''
			@data: original inputs
			@return: logits
			'''
			# Define Convolutional Layers
			for i, (weights, biases, config) in enumerate(zip(self.conv_weights, self.conv_biases, self.conv_config)):
				with tf.name_scope(config['name'] + '_model'):
					with tf.name_scope('convolution'):
						# default 1,1,1,1 stride and SAME padding
						data_flow = tf.nn.conv2d(data_flow, filter=weights, strides=[1, 1, 1, 1], padding='SAME')
						data_flow = data_flow + biases
						if not train:
							self.visualize_filter_map(data_flow, how_many=config['out_depth'], display_size=32//(i//2+1), name=config['name']+'_conv')
					if config['activation'] == 'relu':
						data_flow = tf.nn.relu(data_flow)
						if not train:
							self.visualize_filter_map(data_flow, how_many=config['out_depth'], display_size=32//(i//2+1), name=config['name']+'_relu')
					else:
						raise Exception('Activation Func can only be Relu right now. You passed', config['activation'])
					if config['pooling']:
						data_flow = tf.nn.max_pool(
							data_flow,
							ksize=[1, self.pooling_scale, self.pooling_scale, 1],
							strides=[1, self.pooling_stride, self.pooling_stride, 1],
							padding='SAME')
						if not train:
							self.visualize_filter_map(data_flow, how_many=config['out_depth'], display_size=32//(i//2+1)//2, name=config['name']+'_pooling')

			# Define Fully Connected Layers
			for i, (weights, biases, config) in enumerate(zip(self.fc_weights, self.fc_biases, self.fc_config)):
				if i == 0:
					shape = data_flow.get_shape().as_list()
					data_flow = tf.reshape(data_flow, [shape[0], shape[1] * shape[2] * shape[3]])
				with tf.name_scope(config['name'] + 'model'):

					### Dropout
					if train and i == len(self.fc_weights) - 1:
						data_flow =  tf.nn.dropout(data_flow, self.dropout_rate, seed=4926)
					###

					data_flow = tf.matmul(data_flow, weights) + biases
					if config['activation'] == 'relu':
						data_flow = tf.nn.relu(data_flow)
					elif config['activation'] is None:
						pass
					else:
						raise Exception('Activation Func can only be Relu or None right now. You passed', config['activation'])
			return data_flow

		# Training computation.
		logits = model(self.tf_train_samples)
		with tf.name_scope('loss'):
			self.loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits, self.tf_train_labels))
			self.loss += self.apply_regularization(_lambda=5e-4)
			self.train_summaries.append(tf.scalar_summary('Loss', self.loss))

		# learning rate decay
		global_step = tf.Variable(0)
		learning_rate = tf.train.exponential_decay(
			learning_rate=self.base_learning_rate,
			global_step=global_step*self.train_batch_size,
			decay_steps=100,
			decay_rate=self.decay_rate,
			staircase=True
		)

		# Optimizer.
		with tf.name_scope('optimizer'):
			if(self.optimizeMethod=='gradient'):
				self.optimizer = tf.train \
					.GradientDescentOptimizer(learning_rate) \
					.minimize(self.loss)
			elif(self.optimizeMethod=='momentum'):
				self.optimizer = tf.train \
					.MomentumOptimizer(learning_rate, 0.5) \
					.minimize(self.loss)
			elif(self.optimizeMethod=='adam'):
				self.optimizer = tf.train \
					.AdamOptimizer(learning_rate) \
					.minimize(self.loss)

		# Predictions for the training, validation, and test data.
		with tf.name_scope('train'):
			self.train_prediction = tf.nn.softmax(logits, name='train_prediction')
			tf.add_to_collection("prediction", self.train_prediction)
		with tf.name_scope('test'):
			self.test_prediction = tf.nn.softmax(model(self.tf_test_samples, train=False), name='test_prediction')
			tf.add_to_collection("prediction", self.test_prediction)

			single_shape = (1, 32, 32, 1)
			single_input = tf.placeholder(tf.float32, shape=single_shape, name='single_input')
			self.single_prediction = tf.nn.softmax(model(single_input, train=False), name='single_prediction')
			tf.add_to_collection("prediction", self.single_prediction)

		self.merged_train_summary = tf.merge_summary(self.train_summaries)
		self.merged_test_summary = tf.merge_summary(self.test_summaries)

		# 放在定义Graph之后,保存这张计算图
		self.saver = tf.train.Saver(tf.all_variables())
Example #32
0
    def train(self, config):
        data = glob(os.path.join(config.dataset, "*.png"))
        #np.random.shuffle(data)
        assert (len(data) > 0)

        d_optim = tf.train.AdamOptimizer(config.learning_rate, beta1=config.beta1) \
                          .minimize(self.d_loss, var_list=self.d_vars)
        g_optim = tf.train.AdamOptimizer(config.learning_rate, beta1=config.beta1) \
                          .minimize(self.g_loss, var_list=self.g_vars)
        tf.initialize_all_variables().run()

        self.saver = tf.train.Saver()
        self.g_sum = tf.merge_summary([
            self.z_sum, self.d__sum, self.G_sum, self.d_loss_fake_sum,
            self.g_loss_sum
        ])
        self.d_sum = tf.merge_summary(
            [self.z_sum, self.d_sum, self.d_loss_real_sum, self.d_loss_sum])
        self.writer = tf.train.SummaryWriter("./logs", self.sess.graph)

        sample_z = np.random.uniform(-1,
                                     1,
                                     size=(self.sample_size, self.z_dim))
        sample_files = data[0:self.sample_size]
        sample = [
            get_image(sample_file, self.image_size, is_crop=self.is_crop)
            for sample_file in sample_files
        ]
        sample_images = np.array(sample).astype(np.float32)

        counter = 1
        start_time = time.time()

        if self.load(self.checkpoint_dir):
            print(" [*] Load SUCCESS")
        else:
            print(" [!] Load failed...")

        for epoch in xrange(config.epoch):
            data = glob(os.path.join(config.dataset, "*.png"))
            batch_idxs = min(len(data), config.train_size) // self.batch_size

            for idx in xrange(0, batch_idxs):
                batch_files = data[idx * config.batch_size:(idx + 1) *
                                   config.batch_size]
                batch = [
                    get_image(batch_file,
                              self.image_size,
                              is_crop=self.is_crop)
                    for batch_file in batch_files
                ]
                batch_images = np.array(batch).astype(np.float32)

                batch_z = np.random.uniform(-1, 1, [config.batch_size, self.z_dim]) \
                            .astype(np.float32)

                # Update D network
                _, summary_str = self.sess.run([d_optim, self.d_sum],
                                               feed_dict={
                                                   self.images: batch_images,
                                                   self.z: batch_z
                                               })
                self.writer.add_summary(summary_str, counter)

                # Update G network
                _, summary_str = self.sess.run([g_optim, self.g_sum],
                                               feed_dict={self.z: batch_z})
                self.writer.add_summary(summary_str, counter)

                # Run g_optim twice to make sure that d_loss does not go to zero (different from paper)
                _, summary_str = self.sess.run([g_optim, self.g_sum],
                                               feed_dict={self.z: batch_z})
                self.writer.add_summary(summary_str, counter)

                errD_fake = self.d_loss_fake.eval({self.z: batch_z})
                errD_real = self.d_loss_real.eval({self.images: batch_images})
                errG = self.g_loss.eval({self.z: batch_z})

                counter += 1
                print("Epoch: [%2d] [%4d/%4d] time: %4.4f, d_loss: %.8f, g_loss: %.8f" \
                    % (epoch, idx, batch_idxs,
                        time.time() - start_time, errD_fake+errD_real, errG))

                if np.mod(counter, 100) == 1:
                    samples, d_loss, g_loss = self.sess.run(
                        [self.sampler, self.d_loss, self.g_loss],
                        feed_dict={
                            self.z: sample_z,
                            self.images: sample_images
                        })
                    save_images(
                        samples, [8, 8],
                        './samples/train_{:02d}_{:04d}.png'.format(epoch, idx))
                    print("[Sample] d_loss: %.8f, g_loss: %.8f" %
                          (d_loss, g_loss))

                if np.mod(counter, 500) == 2:
                    self.save(config.checkpoint_dir, counter)
Example #33
0
    def __init__(self, config, is_training=True):

        self.keep_prob = config.keep_prob
        self.batch_size = tf.Variable(0, dtype=tf.int32, trainable=False)

        num_step = config.num_step
        self.input_data = tf.placeholder(tf.int32, [None, num_step])
        self.target = tf.placeholder(tf.int64, [None])
        self.mask_x = tf.placeholder(tf.float32, [num_step, None])

        class_num = config.class_num
        hidden_neural_size = config.hidden_neural_size
        vocabulary_size = config.vocabulary_size
        embed_dim = config.embed_dim
        hidden_layer_num = config.hidden_layer_num
        self.new_batch_size = tf.placeholder(tf.int32,
                                             shape=[],
                                             name="new_batch_size")
        self._batch_size_update = tf.assign(self.batch_size,
                                            self.new_batch_size)

        #build LSTM network

        lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(hidden_neural_size,
                                                 forget_bias=0.0,
                                                 state_is_tuple=True)
        if self.keep_prob < 1:
            lstm_cell = tf.nn.rnn_cell.DropoutWrapper(
                lstm_cell, output_keep_prob=self.keep_prob)

        cell = tf.nn.rnn_cell.MultiRNNCell([lstm_cell] * hidden_layer_num,
                                           state_is_tuple=True)

        self._initial_state = cell.zero_state(self.batch_size,
                                              dtype=tf.float32)

        #embedding layer
        with tf.device("/cpu:0"), tf.name_scope("embedding_layer"):
            embedding = tf.get_variable("embedding",
                                        [vocabulary_size, embed_dim],
                                        dtype=tf.float32)
            inputs = tf.nn.embedding_lookup(embedding, self.input_data)

        if self.keep_prob < 1:
            inputs = tf.nn.dropout(inputs, self.keep_prob)

        out_put = []
        state = self._initial_state
        with tf.variable_scope("LSTM_layer"):
            for time_step in range(num_step):
                if time_step > 0: tf.get_variable_scope().reuse_variables()
                (cell_output, state) = cell(inputs[:, time_step, :], state)
                out_put.append(cell_output)

        out_put = out_put * self.mask_x[:, :, None]

        with tf.name_scope("mean_pooling_layer"):

            out_put = tf.reduce_sum(out_put, 0) / (tf.reduce_sum(
                self.mask_x, 0)[:, None])

        with tf.name_scope("Softmax_layer_and_output"):
            softmax_w = tf.get_variable("softmax_w",
                                        [hidden_neural_size, class_num],
                                        dtype=tf.float32)
            softmax_b = tf.get_variable("softmax_b", [class_num],
                                        dtype=tf.float32)
            self.logits = tf.matmul(out_put, softmax_w) + softmax_b

        with tf.name_scope("loss"):
            self.loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
                self.logits + 1e-10, self.target)
            self.cost = tf.reduce_mean(self.loss)

        with tf.name_scope("accuracy"):
            self.prediction = tf.argmax(self.logits, 1)
            correct_prediction = tf.equal(self.prediction, self.target)
            self.correct_num = tf.reduce_sum(
                tf.cast(correct_prediction, tf.float32))
            self.accuracy = tf.reduce_mean(tf.cast(correct_prediction,
                                                   tf.float32),
                                           name="accuracy")

        #add summary
        loss_summary = tf.scalar_summary("loss", self.cost)
        #add summary
        accuracy_summary = tf.scalar_summary("accuracy_summary", self.accuracy)

        if not is_training:
            return

        self.globle_step = tf.Variable(0, name="globle_step", trainable=False)
        self.lr = tf.Variable(0.0, trainable=False)

        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars),
                                          config.max_grad_norm)

        # Keep track of gradient values and sparsity (optional)
        grad_summaries = []
        for g, v in zip(grads, tvars):
            if g is not None:
                grad_hist_summary = tf.histogram_summary(
                    "{}/grad/hist".format(v.name), g)
                sparsity_summary = tf.scalar_summary(
                    "{}/grad/sparsity".format(v.name), tf.nn.zero_fraction(g))
                grad_summaries.append(grad_hist_summary)
                grad_summaries.append(sparsity_summary)
        self.grad_summaries_merged = tf.merge_summary(grad_summaries)

        self.summary = tf.merge_summary(
            [loss_summary, accuracy_summary, self.grad_summaries_merged])

        optimizer = tf.train.GradientDescentOptimizer(self.lr)
        optimizer.apply_gradients(zip(grads, tvars))
        self.train_op = optimizer.apply_gradients(zip(grads, tvars))

        self.new_lr = tf.placeholder(tf.float32,
                                     shape=[],
                                     name="new_learning_rate")
        self._lr_update = tf.assign(self.lr, self.new_lr)
    grads_and_vars = optimizer.compute_gradients(siameseModel.loss)
    tr_op_set = optimizer.apply_gradients(grads_and_vars,
                                          global_step=global_step)
    print("defined training_ops")
    # Keep track of gradient values and sparsity (optional)
    grad_summaries = []
    for g, v in grads_and_vars:
        if g is not None:
            grad_hist_summary = tf.histogram_summary(
                "{}/grad/hist".format(v.name), g)
            sparsity_summary = tf.scalar_summary(
                "{}/grad/sparsity".format(v.name), tf.nn.zero_fraction(g))
            grad_summaries.append(grad_hist_summary)
            grad_summaries.append(sparsity_summary)
    grad_summaries_merged = tf.merge_summary(grad_summaries)
    print("defined gradient summaries")
    # Output directory for models and summaries
    timestamp = str(int(time.time()))
    out_dir = os.path.abspath(os.path.join(os.path.curdir, "runs", timestamp))
    print("Writing to {}\n".format(out_dir))

    # Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it
    checkpoint_dir = os.path.abspath(os.path.join(out_dir, "checkpoints"))
    checkpoint_prefix = os.path.join(checkpoint_dir, "model")
    if not os.path.exists(checkpoint_dir):
        os.makedirs(checkpoint_dir)
    saver = tf.train.Saver(tf.all_variables(), max_to_keep=100)

    # Write vocabulary
    vocab_processor.save(os.path.join(checkpoint_dir, "vocab"))
Example #35
0
            summeries = []

            gradients = optimizer.compute_gradients(loss)
            for i, (grad, var) in enumerate(gradients):
                if grad is not None:
                    gradients[i] = (tf.clip_by_value(grad, -10, 10), var)
            for (grad, var) in gradients:
                if grad is not None:
                    summeries.append(
                        tf.histogram_summary(var.name + '/grad', grad))

            apply_gradients = optimizer.apply_gradients(gradients)

            summeries.append(tf.scalar_summary("Loss", loss))

            summerize_op = tf.merge_summary(summeries)
            no_summerize = tf.no_op()

            llprint("Done!\n")

            llprint("Initializing Variables ... ")
            session.run(tf.initialize_all_variables())
            llprint("Done!\n")

            if from_checkpoint is not None:
                llprint("Restoring Checkpoint %s ... " % (from_checkpoint))
                ncomputer.restore(session, ckpts_dir, from_checkpoint)
                llprint("Done!\n")

            last_100_losses = []
Example #36
0
    def initialize(self):
        params = self.params
        sess = self.sess
        device_type = params.device_type
        summaries = []

        global_step = tf.get_variable('global_step',
                                      shape=[],
                                      dtype='int32',
                                      initializer=tf.constant_initializer(0),
                                      trainable=False)
        self.tensors['global_step'] = global_step

        epoch = tf.get_variable('epoch',
                                shape=[],
                                dtype='int32',
                                initializer=tf.constant_initializer(0),
                                trainable=False)
        self.tensors['epoch'] = epoch

        learning_rate = tf.placeholder('float32', name='learning_rate')
        summaries.append(tf.scalar_summary("learning_rate", learning_rate))
        self.placeholders['learning_rate'] = learning_rate

        if params.opt == 'basic':
            opt = tf.train.GradientDescentOptimizer(learning_rate)
        elif params.opt == 'adagrad':
            opt = tf.train.AdagradOptimizer(learning_rate)
        elif params.opt == 'adam':
            opt = tf.train.AdamOptimizer()
        else:
            raise Exception()

        grads_pairs_dict = defaultdict(list)
        correct_tensors = []
        loss_tensors = []

        with tf.variable_scope("towers"):
            for device_id, tower in enumerate(self.towers):
                with tf.device("/%s:%d" %
                               (device_type, device_id)), tf.name_scope(
                                   "%s_%d" % (device_type, device_id)):
                    tower.initialize()
                    tf.get_variable_scope().reuse_variables()
                    loss_tensor = tower.get_loss_tensor()
                    loss_tensors.append(loss_tensor)
                    correct_tensor = tower.get_correct_tensor()
                    correct_tensors.append(correct_tensor)

                    self.tensors['correct_'], self.tensors[
                        'mask_'], self.tensors['y_mask'], self.tensors[
                            'y'] = tower.get_debug_tensor()

                    for key, variables in tower.variables_dict.items():
                        grads_pair = opt.compute_gradients(loss_tensor,
                                                           var_list=variables)
                        grads_pairs_dict[key].append(grads_pair)

        with tf.name_scope("gpu_sync"):
            loss_tensor = tf.reduce_mean(tf.pack(loss_tensors), 0, name='loss')
            correct_tensor = tf.concat(0, correct_tensors, name="correct")
            with tf.name_scope("average_gradients"):
                grads_pair_dict = {
                    key: average_gradients(grads_pairs)
                    for key, grads_pairs in grads_pairs_dict.items()
                }
                if params.max_grad_norm:
                    grads_pair_dict = {
                        key: [(tf.clip_by_norm(grad,
                                               params.max_grad_norm), var)
                              for grad, var in grads_pair]
                        for key, grads_pair in grads_pair_dict.items()
                    }

        self.tensors['loss'] = loss_tensor
        self.tensors['correct'] = correct_tensor
        summaries.append(tf.scalar_summary(loss_tensor.op.name, loss_tensor))

        for key, grads_pair in grads_pair_dict.items():
            for grad, var in grads_pair:
                if grad is not None:
                    summaries.append(
                        tf.histogram_summary(var.op.name + '/gradients/' + key,
                                             grad))

        for var in tf.trainable_variables():
            summaries.append(tf.histogram_summary(var.op.name, var))

        apply_grads_op_dict = {
            key: opt.apply_gradients(grads_pair, global_step=global_step)
            for key, grads_pair in grads_pair_dict.items()
        }

        self.train_ops = {
            key: tf.group(apply_grads_op)
            for key, apply_grads_op in apply_grads_op_dict.items()
        }

        saver = tf.train.Saver(tf.all_variables(), max_to_keep=5)
        self.saver = saver

        summary_op = tf.merge_summary(summaries)
        self.tensors['summary'] = summary_op

        init_op = tf.initialize_all_variables()
        sess.run(init_op)
        if self.write_log:
            self.writer = tf.train.SummaryWriter(params.log_dir, sess.graph)
        self.initialized = True
Example #37
0
 def merge_summaries(self):
     self.summarize = tf.merge_summary(
         [self.loss_summary + self.entropy_summary] +
         self.histogram_summaries + self.weight_summaries)
Example #38
0
def main_unsupervised():
    with tf.Graph().as_default() as g:
        sess = tf.Session()

        num_hidden = FLAGS.num_hidden_layers
        ae_hidden_shapes = [
            getattr(FLAGS, "hidden{0}_units".format(j + 1))
            for j in xrange(num_hidden)
        ]
        ae_shape = [FLAGS.image_pixels
                    ] + ae_hidden_shapes + [FLAGS.num_classes]

        ae = AutoEncoder(ae_shape, sess)

        data = read_data_sets_pretraining(FLAGS.data_dir)
        num_train = data.train.num_examples

        learning_rates = {
            j: getattr(FLAGS, "pre_layer{0}_learning_rate".format(j + 1))
            for j in xrange(num_hidden)
        }

        noise = {
            j: getattr(FLAGS, "noise_{0}".format(j + 1))
            for j in xrange(num_hidden)
        }

        for i in xrange(len(ae_shape) - 2):
            n = i + 1
            with tf.variable_scope("pretrain_{0}".format(n)):
                input_ = tf.placeholder(dtype=tf.float32,
                                        shape=(FLAGS.batch_size, ae_shape[0]),
                                        name='ae_input_pl')
                target_ = tf.placeholder(dtype=tf.float32,
                                         shape=(FLAGS.batch_size, ae_shape[0]),
                                         name='ae_target_pl')
                layer = ae.pretrain_net(input_, n)

                with tf.name_scope("target"):
                    target_for_loss = ae.pretrain_net(target_,
                                                      n,
                                                      is_target=True)

                loss = loss_x_entropy(layer, target_for_loss)
                train_op, global_step = training(loss, learning_rates[i], i)

                summary_dir = pjoin(FLAGS.summary_dir,
                                    'pretraining_{0}'.format(n))
                summary_writer = tf.train.SummaryWriter(
                    summary_dir,
                    graph_def=sess.graph_def,
                    flush_secs=FLAGS.flush_secs)
                summary_vars = [
                    ae["biases{0}".format(n)], ae["weights{0}".format(n)]
                ]

                hist_summarries = [
                    tf.histogram_summary(v.op.name, v) for v in summary_vars
                ]
                hist_summarries.append(loss_summaries[i])
                summary_op = tf.merge_summary(hist_summarries)

                vars_to_init = ae.get_variables_to_init(n)
                vars_to_init.append(global_step)
                sess.run(tf.initialize_variables(vars_to_init))

                print("\n\n")
                print("| Training Step | Cross Entropy |  Layer  |   Epoch  |")
                print("|---------------|---------------|---------|----------|")

                for step in xrange(FLAGS.pretraining_epochs * num_train):
                    feed_dict = fill_feed_dict_ae(data.train, input_, target_,
                                                  noise[i])

                    loss_summary, loss_value = sess.run([train_op, loss],
                                                        feed_dict=feed_dict)

                    if step % 100 == 0:
                        summary_str = sess.run(summary_op, feed_dict=feed_dict)
                        summary_writer.add_summary(summary_str, step)
                        image_summary_op = \
                            tf.image_summary("training_images",
                                             tf.reshape(input_,
                                                        (FLAGS.batch_size,
                                                         FLAGS.image_size,
                                                         FLAGS.image_size, 1)),
                                             max_images=FLAGS.batch_size)

                        summary_img_str = sess.run(image_summary_op,
                                                   feed_dict=feed_dict)
                        summary_writer.add_summary(summary_img_str)

                        output = "| {0:>13} | {1:13.4f} | Layer {2} | Epoch {3}  |"\
                                 .format(step, loss_value, n, step // num_train + 1)

                        print(output)
            if i == 0:
                filters = sess.run(tf.identity(ae["weights1"]))
                np.save(pjoin(FLAGS.chkpt_dir, "filters"), filters)
                filters = tile_raster_images(X=filters.T,
                                             img_shape=(FLAGS.image_size,
                                                        FLAGS.image_size),
                                             tile_shape=(10, 10),
                                             output_pixel_vals=False)
                filters = np.expand_dims(np.expand_dims(filters, 0), 3)
                image_var = tf.Variable(filters)
                image_filter = tf.identity(image_var)
                sess.run(tf.initialize_variables([image_var]))
                img_filter_summary_op = tf.image_summary(
                    "first_layer_filters", image_filter)
                summary_writer.add_summary(sess.run(img_filter_summary_op))
                summary_writer.flush()

    return ae
Example #39
0
def main_supervised(ae):
    with ae.session.graph.as_default():
        sess = ae.session
        input_pl = tf.placeholder(tf.float32,
                                  shape=(FLAGS.batch_size, FLAGS.image_pixels),
                                  name='input_pl')
        logits = ae.supervised_net(input_pl)

        data = read_data_sets(FLAGS.data_dir)
        num_train = data.train.num_examples

        labels_placeholder = tf.placeholder(tf.int32,
                                            shape=FLAGS.batch_size,
                                            name='target_pl')

        loss = loss_supervised(logits, labels_placeholder)
        train_op, global_step = training(loss, FLAGS.supervised_learning_rate)
        eval_correct = evaluation(logits, labels_placeholder)

        hist_summaries = [
            ae['biases{0}'.format(i + 1)]
            for i in xrange(ae.num_hidden_layers + 1)
        ]
        hist_summaries.extend([
            ae['weights{0}'.format(i + 1)]
            for i in xrange(ae.num_hidden_layers + 1)
        ])

        hist_summaries = [
            tf.histogram_summary(v.op.name + "_fine_tuning", v)
            for v in hist_summaries
        ]
        summary_op = tf.merge_summary(hist_summaries)

        summary_writer = tf.train.SummaryWriter(pjoin(FLAGS.summary_dir,
                                                      'fine_tuning'),
                                                graph_def=sess.graph_def,
                                                flush_secs=FLAGS.flush_secs)

        vars_to_init = ae.get_variables_to_init(ae.num_hidden_layers + 1)
        vars_to_init.append(global_step)
        sess.run(tf.initialize_variables(vars_to_init))

        steps = FLAGS.finetuning_epochs * num_train
        for step in xrange(steps):
            start_time = time.time()

            feed_dict = fill_feed_dict(data.train, input_pl,
                                       labels_placeholder)

            _, loss_value = sess.run([train_op, loss], feed_dict=feed_dict)

            duration = time.time() - start_time

            # Write the summaries and print an overview fairly often.
            if step % 100 == 0:
                # Print status to stdout.
                print('Step %d: loss = %.2f (%.3f sec)' %
                      (step, loss_value, duration))
                # Update the events file.

                summary_str = sess.run(summary_op, feed_dict=feed_dict)
                summary_writer.add_summary(summary_str, step)
                summary_img_str = sess.run(tf.image_summary(
                    "training_images",
                    tf.reshape(input_pl, (FLAGS.batch_size, FLAGS.image_size,
                                          FLAGS.image_size, 1)),
                    max_images=FLAGS.batch_size),
                                           feed_dict=feed_dict)
                summary_writer.add_summary(summary_img_str)

            if (step + 1) % 1000 == 0 or (step + 1) == steps:
                train_sum = do_eval_summary("training_error", sess,
                                            eval_correct, input_pl,
                                            labels_placeholder, data.train)

                val_sum = do_eval_summary("validation_error", sess,
                                          eval_correct, input_pl,
                                          labels_placeholder, data.validation)

                test_sum = do_eval_summary("test_error", sess, eval_correct,
                                           input_pl, labels_placeholder,
                                           data.test)

                do_eval(sess, eval_correct, input_pl, labels_placeholder,
                        data.test)

                summary_writer.add_summary(train_sum, step)
                summary_writer.add_summary(val_sum, step)
                summary_writer.add_summary(test_sum, step)
Example #40
0
def hist_summaries(*args):
  return tf.merge_summary([tf.histogram_summary(t.name,t) for t in args])
Example #41
0
    def __init__(self,
                 vocab_size,
                 hidden_size,
                 dropout,
                 num_layers,
                 max_gradient_norm,
                 max_seq_length,
                 learning_rate,
                 lr_decay,
                 batch_size,
                 forward_only=False):
        self.num_classes = 6507
        self.vocab_size = vocab_size
        self.learning_rate = tf.Variable(float(learning_rate), trainable=False)
        self.learning_rate_decay_op = self.learning_rate.assign(
            self.learning_rate * lr_decay)
        initializer = tf.random_uniform_initializer(-1, 1)
        self.batch_pointer = 0
        self.seq_input = []
        self.batch_size = batch_size
        self.seq_lengths = []
        self.projection_dim = hidden_size
        self.dropout = dropout
        self.max_gradient_norm = max_gradient_norm
        self.global_step = tf.Variable(0, trainable=False)
        self.max_seq_length = max_seq_length
        self.representation = []

        #seq_input: list of tensors, each tensor is size max_seq_length
        #target: a list of values betweeen 0 and 1 indicating target scores
        #seq_lengths:the early stop lengths of each input tensor
        self.str_summary_type = tf.placeholder(tf.string,
                                               name="str_summary_type")
        self.seq_input = tf.placeholder(tf.int32,
                                        shape=[None, max_seq_length],
                                        name="input")
        self.target = tf.placeholder(tf.float32,
                                     name="target",
                                     shape=[None, self.num_classes])
        self.seq_lengths = tf.placeholder(tf.int32,
                                          shape=[None],
                                          name="early_stop")

        self.dropout_keep_prob_embedding = tf.constant(self.dropout)
        self.dropout_keep_prob_lstm_input = tf.constant(self.dropout)
        self.dropout_keep_prob_lstm_output = tf.constant(self.dropout)

        with tf.variable_scope("embedding"), tf.device("/cpu:0"):
            W = tf.get_variable("W", [self.vocab_size, hidden_size],
                                initializer=tf.random_uniform_initializer(
                                    -1.0, 1.0))
            embedded_tokens = tf.nn.embedding_lookup(W, self.seq_input)
            embedded_tokens_drop = tf.nn.dropout(
                embedded_tokens, self.dropout_keep_prob_embedding)

        rnn_input = [
            embedded_tokens_drop[:, i, :] for i in range(self.max_seq_length)
        ]
        with tf.variable_scope("lstm") as scope:
            single_cell = rnn_cell.DropoutWrapper(
                rnn_cell.LSTMCell(hidden_size,
                                  initializer=tf.random_uniform_initializer(
                                      -1.0, 1.0),
                                  state_is_tuple=True),
                input_keep_prob=self.dropout_keep_prob_lstm_input,
                output_keep_prob=self.dropout_keep_prob_lstm_output)
            cell = rnn_cell.MultiRNNCell([single_cell] * num_layers,
                                         state_is_tuple=True)

            initial_state = cell.zero_state(self.batch_size, tf.float32)

            rnn_output, rnn_state = rnn.rnn(cell,
                                            rnn_input,
                                            initial_state=initial_state,
                                            sequence_length=self.seq_lengths)

            states_list = []
            for state in rnn_state[-1]:
                states_list.append(state)
            avg_states = tf.reduce_mean(tf.pack(states_list), 0)
            self.representation.append(rnn_state[-1][0])

        with tf.variable_scope("output_projection"):
            W = tf.get_variable(
                "W", [hidden_size, self.num_classes],
                initializer=tf.truncated_normal_initializer(stddev=0.1))
            b = tf.get_variable("b", [self.num_classes],
                                initializer=tf.constant_initializer(0.1))
            self.scores = tf.nn.xw_plus_b(rnn_state[-1][0], W, b)
            self.y = tf.nn.softmax(self.scores)
            self.predictions = tf.argmax(self.scores, 1)

        with tf.variable_scope("loss"):
            self.losses = tf.nn.softmax_cross_entropy_with_logits(
                self.scores, self.target, name="ce_losses")
            self.total_loss = tf.reduce_sum(self.losses)
            self.mean_loss = tf.reduce_mean(self.losses)

        with tf.variable_scope("accuracy"):
            self.correct_predictions = tf.equal(self.predictions,
                                                tf.argmax(self.target, 1))
            self.accuracy = tf.reduce_mean(tf.cast(self.correct_predictions,
                                                   "float"),
                                           name="accuracy")

        params = tf.trainable_variables()
        if not forward_only:
            with tf.name_scope("train") as scope:
                opt = tf.train.AdamOptimizer(self.learning_rate)
            gradients = tf.gradients(self.losses, params)
            clipped_gradients, norm = tf.clip_by_global_norm(
                gradients, self.max_gradient_norm)
            with tf.name_scope("grad_norms") as scope:
                grad_summ = tf.scalar_summary("grad_norms", norm)
            self.update = opt.apply_gradients(zip(clipped_gradients, params),
                                              global_step=self.global_step)
            loss_summ = tf.scalar_summary(
                "{0}_loss".format(self.str_summary_type), self.mean_loss)
            acc_summ = tf.scalar_summary(
                "{0}_accuracy".format(self.str_summary_type), self.accuracy)
            self.merged = tf.merge_summary([loss_summ, acc_summ])
        self.saver = tf.train.Saver(tf.all_variables())
Example #42
0
    def __init__(self, classifier, input_dim, max_input_length,
                 max_target_length, init_learning_rate, learning_rate_decay,
                 num_steps, numutterances_per_minibatch):
        '''
        NnetTrainer constructor, creates the training graph

        Args:
            classifier: the neural net classifier that will be trained
            input_dim: the input dimension to the nnnetgraph
            max_input_length: the maximal length of the input sequences
            max_target_length: the maximal length of the target sequences
            init_learning_rate: the initial learning rate
            learning_rate_decay: the parameter for exponential learning rate
                decay
            num_steps: the total number of steps that will be taken
            numutterances_per_minibatch: determines how many utterances are
                processed at a time to limit memory usage
        '''

        self.numutterances_per_minibatch = numutterances_per_minibatch
        self.max_input_length = max_input_length
        self.max_target_length = max_target_length

        #create the graph
        self.graph = tf.Graph()

        #define the placeholders in the graph
        with self.graph.as_default():

            #create the inputs placeholder
            self.inputs = tf.placeholder(tf.float32,
                                         shape=[
                                             max_input_length,
                                             numutterances_per_minibatch,
                                             input_dim
                                         ],
                                         name='inputs')

            #split the 3D input tensor in a list of batch_size*input_dim tensors
            split_inputs = tf.unpack(self.inputs)

            #reference labels
            self.targets = tf.placeholder(
                tf.int32,
                shape=[max_target_length, numutterances_per_minibatch, 1],
                name='targets')

            #split the 3D targets tensor in a list of batch_size*input_dim
            #tensors
            split_targets = tf.unpack(self.targets)

            #the length of all the input sequences
            self.input_seq_length = tf.placeholder(
                tf.int32,
                shape=[numutterances_per_minibatch],
                name='input_seq_length')

            #the length of all the output sequences
            self.target_seq_length = tf.placeholder(
                tf.int32,
                shape=[numutterances_per_minibatch],
                name='output_seq_length')

            #compute the training outputs of the nnetgraph
            trainlogits, logit_seq_length, self.modelsaver, self.control_ops = (
                classifier(split_inputs,
                           self.input_seq_length,
                           is_training=True,
                           reuse=False,
                           scope='Classifier'))

            #compute the validation output of the nnetgraph
            logits, _, _, _ = classifier(split_inputs,
                                         self.input_seq_length,
                                         is_training=False,
                                         reuse=True,
                                         scope='Classifier')

            #get a list of trainable variables in the decoder graph
            params = tf.trainable_variables()

            #add the variables and operations to the graph that are used for
            #training

            #total number of steps
            nsteps = tf.constant(num_steps, dtype=tf.int32, name='num_steps')

            #the total loss of the entire batch
            batch_loss = tf.get_variable(
                'batch_loss', [],
                dtype=tf.float32,
                initializer=tf.constant_initializer(0),
                trainable=False)

            with tf.variable_scope('train_variables'):

                #the amount of steps already taken
                self.global_step = tf.get_variable(
                    'global_step', [],
                    dtype=tf.int32,
                    initializer=tf.constant_initializer(0),
                    trainable=False)

                #a variable to scale the learning rate (used to reduce the
                #learning rate in case validation performance drops)
                learning_rate_fact = tf.get_variable(
                    'learning_rate_fact', [],
                    initializer=tf.constant_initializer(1.0),
                    trainable=False)

                #compute the learning rate with exponential decay and scale with
                #the learning rate factor
                learning_rate = tf.train.exponential_decay(
                    init_learning_rate, self.global_step, nsteps,
                    learning_rate_decay) * learning_rate_fact

                #create the optimizer
                optimizer = tf.train.AdamOptimizer(learning_rate)

            #for every parameter create a variable that holds its gradients
            with tf.variable_scope('gradients'):
                grads = [
                    tf.get_variable(param.op.name,
                                    param.get_shape().as_list(),
                                    initializer=tf.constant_initializer(0),
                                    trainable=False) for param in params
                ]

            with tf.name_scope('train'):
                #the total number of frames that are used in the batch
                num_frames = tf.get_variable(
                    name='num_frames',
                    shape=[],
                    dtype=tf.int32,
                    initializer=tf.constant_initializer(0),
                    trainable=False)

                #operation to update num_frames
                #pylint: disable=E1101
                update_num_frames = num_frames.assign_add(
                    tf.reduce_sum(self.target_seq_length))

                #compute the training loss
                loss = self.compute_loss(split_targets, trainlogits,
                                         logit_seq_length,
                                         self.target_seq_length)

                #operation to half the learning rate
                self.halve_learningrate_op = learning_rate_fact.assign(
                    learning_rate_fact / 2).op

                #create an operation to initialise the gradients
                self.init_grads = tf.initialize_variables(grads)

                #the operation to initialise the batch loss
                self.init_loss = batch_loss.initializer  #pylint: disable=E1101

                #the operation to initialize the num_frames
                #pylint: disable=E1101
                self.init_num_frames = num_frames.initializer

                #compute the gradients of the batch
                batchgrads = tf.gradients(loss, params)

                #create an operation to update the batch loss
                #pylint: disable=E1101
                self.update_loss = batch_loss.assign_add(loss)

                #create an operation to update the gradients, the batch_loss
                #and do all other update ops
                #pylint: disable=E1101
                update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
                self.update_gradients_op = tf.group(*([
                    grads[p].assign_add(batchgrads[p])
                    for p in range(len(grads)) if batchgrads[p] is not None
                ] + [self.update_loss] + update_ops + [update_num_frames]),
                                                    name='update_gradients')

                #create an operation to apply the gradients

                #average the gradients
                meangrads = [
                    tf.div(grad,
                           tf.cast(num_frames, tf.float32),
                           name=grad.op.name) for grad in grads
                ]

                #clip the gradients
                meangrads = [
                    tf.clip_by_value(grad, -1., 1.) for grad in meangrads
                ]

                #apply the gradients
                self.apply_gradients_op = optimizer.apply_gradients(
                    [(meangrads[p], params[p]) for p in range(len(meangrads))],
                    global_step=self.global_step,
                    name='apply_gradients')

            with tf.name_scope('valid'):
                #compute the validation loss
                valid_loss = self.compute_loss(split_targets, logits,
                                               logit_seq_length,
                                               self.target_seq_length)

                #operation to update the validation loss
                #pylint: disable=E1101
                self.update_valid_loss = tf.group(
                    *([batch_loss.assign_add(valid_loss), update_num_frames]))

            #operation to compute the average loss in the batch
            self.average_loss = batch_loss / tf.cast(num_frames, tf.float32)

            # add an operation to initialise all the variables in the graph
            self.init_op = tf.initialize_all_variables()

            #saver for the training variables
            self.saver = tf.train.Saver(
                tf.get_collection(tf.GraphKeys.VARIABLES,
                                  scope='train_variables'))

            #create the summaries for visualisation
            self.summary = tf.merge_summary([
                tf.histogram_summary(val.name, val)
                for val in params + meangrads
            ] + [tf.scalar_summary('loss', self.average_loss)])

        #specify that the graph can no longer be modified after this point
        self.graph.finalize()

        #start without visualisation
        self.summarywriter = None
Example #43
0
def train(dataset):
    """Train on dataset for a number of steps."""
    with tf.Graph().as_default(), tf.device('/cpu:0'):
        # Create a variable to count the number of train() calls. This equals the
        # number of batches processed * FLAGS.num_gpus.
        global_step = tf.get_variable(
            'global_step', [],
            initializer=tf.constant_initializer(0), trainable=False)

        # Calculate the learning rate schedule.
        num_batches_per_epoch = (dataset.num_examples_per_epoch() /
                                 FLAGS.batch_size)
        decay_steps = int(num_batches_per_epoch * FLAGS.num_epochs_per_decay)

        # Decay the learning rate exponentially based on the number of steps.
        lr = tf.train.exponential_decay(FLAGS.initial_learning_rate,
                                        global_step,
                                        decay_steps,
                                        FLAGS.learning_rate_decay_factor,
                                        staircase=True)

        # Create an optimizer that performs gradient descent.
        opt = tf.train.RMSPropOptimizer(lr, RMSPROP_DECAY,
                                        momentum=RMSPROP_MOMENTUM,
                                        epsilon=RMSPROP_EPSILON)

        # Get images and labels for ImageNet and split the batch across GPUs.
        assert FLAGS.batch_size % FLAGS.num_gpus == 0, (
            'Batch size must be divisible by number of GPUs')
        split_batch_size = int(FLAGS.batch_size / FLAGS.num_gpus)

        # Override the number of preprocessing threads to account for the increased
        # number of GPU towers.
        num_preprocess_threads = FLAGS.num_preprocess_threads * FLAGS.num_gpus
        images, labels = image_processing.distorted_inputs(
            dataset,
            num_preprocess_threads=num_preprocess_threads)

        input_summaries = copy.copy(tf.get_collection(tf.GraphKeys.SUMMARIES))

        # Number of classes in the Dataset label set plus 1.
        # Label 0 is reserved for an (unused) background class.
        num_classes = dataset.num_classes() + 1

        # Split the batch of images and labels for towers.
        images_splits = tf.split(0, FLAGS.num_gpus, images)
        labels_splits = tf.split(0, FLAGS.num_gpus, labels)

        # Calculate the gradients for each model tower.
        tower_grads = []
        for i in xrange(FLAGS.num_gpus):
            with tf.device('/gpu:%d' % i):
                with tf.name_scope('%s_%d' % (inception.TOWER_NAME, i)) as scope:
                    # Force all Variables to reside on the CPU.
                    with slim.arg_scope([slim.variables.variable], device='/cpu:0'):
                        # Calculate the loss for one tower of the ImageNet model. This
                        # function constructs the entire ImageNet model but shares the
                        # variables across all towers.
                        loss = _tower_loss(images_splits[i], labels_splits[i], num_classes,
                                           scope)

                    # Reuse variables for the next tower.
                    tf.get_variable_scope().reuse_variables()

                    # Retain the summaries from the final tower.
                    summaries = tf.get_collection(tf.GraphKeys.SUMMARIES, scope)

                    # Retain the Batch Normalization updates operations only from the
                    # final tower. Ideally, we should grab the updates from all towers
                    # but these stats accumulate extremely fast so we can ignore the
                    # other stats from the other towers without significant detriment.
                    batchnorm_updates = tf.get_collection(slim.ops.UPDATE_OPS_COLLECTION,
                                                          scope)

                    # Calculate the gradients for the batch of data on this ImageNet
                    # tower.
                    grads = opt.compute_gradients(loss)

                    # Keep track of the gradients across all towers.
                    tower_grads.append(grads)

        # We must calculate the mean of each gradient. Note that this is the
        # synchronization point across all towers.
        grads = _average_gradients(tower_grads)

        # Add a summaries for the input processing and global_step.
        summaries.extend(input_summaries)

        # Add a summary to track the learning rate.
        summaries.append(tf.scalar_summary('learning_rate', lr))

        # Add histograms for gradients.
        for grad, var in grads:
            if grad is not None:
                summaries.append(
                    tf.histogram_summary(var.op.name + '/gradients', grad))

        # Apply the gradients to adjust the shared variables.
        apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)

        # Add histograms for trainable variables.
        for var in tf.trainable_variables():
            summaries.append(tf.histogram_summary(var.op.name, var))

        # Track the moving averages of all trainable variables.
        # Note that we maintain a "double-average" of the BatchNormalization
        # global statistics. This is more complicated then need be but we employ
        # this for backward-compatibility with our previous models.
        variable_averages = tf.train.ExponentialMovingAverage(
            inception.MOVING_AVERAGE_DECAY, global_step)

        # Another possiblility is to use tf.slim.get_variables().
        variables_to_average = (tf.trainable_variables() +
                                tf.moving_average_variables())
        variables_averages_op = variable_averages.apply(variables_to_average)

        # Group all updates to into a single train op.
        batchnorm_updates_op = tf.group(*batchnorm_updates)
        train_op = tf.group(apply_gradient_op, variables_averages_op,
                            batchnorm_updates_op)

        # Create a saver.
        saver = tf.train.Saver(tf.all_variables())

        # Build the summary operation from the last tower summaries.
        summary_op = tf.merge_summary(summaries)

        # Build an initialization operation to run below.
        init = tf.initialize_all_variables()

        # Start running operations on the Graph. allow_soft_placement must be set to
        # True to build towers on GPU, as some of the ops do not have GPU
        # implementations.
        sess = tf.Session(config=tf.ConfigProto(
            allow_soft_placement=True,
            log_device_placement=FLAGS.log_device_placement))
        sess.run(init)

        if FLAGS.pretrained_model_checkpoint_path:
            assert tf.gfile.Exists(FLAGS.pretrained_model_checkpoint_path)
            variables_to_restore = tf.get_collection(
                slim.variables.VARIABLES_TO_RESTORE)
            restorer = tf.train.Saver(variables_to_restore)
            restorer.restore(sess, FLAGS.pretrained_model_checkpoint_path)
            print('%s: Pre-trained model restored from %s' %
                  (datetime.now(), FLAGS.pretrained_model_checkpoint_path))

        # Start the queue runners.
        tf.train.start_queue_runners(sess=sess)

        summary_writer = tf.train.SummaryWriter(
            FLAGS.train_dir,
            graph_def=sess.graph.as_graph_def(add_shapes=True))

        for step in xrange(FLAGS.max_steps):
            start_time = time.time()
            _, loss_value = sess.run([train_op, loss])
            duration = time.time() - start_time

            assert not np.isnan(loss_value), 'Model diverged with loss = NaN'

            if step % 10 == 0:
                examples_per_sec = FLAGS.batch_size / float(duration)
                format_str = ('%s: step %d, loss = %.2f (%.1f examples/sec; %.3f '
                              'sec/batch)')
                print(format_str % (datetime.now(), step, loss_value,
                                    examples_per_sec, duration))

            if step % 100 == 0:
                summary_str = sess.run(summary_op)
                summary_writer.add_summary(summary_str, step)

            # Save the model checkpoint periodically.
            if step % 5000 == 0 or (step + 1) == FLAGS.max_steps:
                checkpoint_path = os.path.join(FLAGS.train_dir, 'model.ckpt')
                saver.save(sess, checkpoint_path, global_step=step)
Example #44
0
def eval_loop(graph,
              eval_dir,
              train_dir,
              num_training_steps=10000,
              summary_frequency=10):
    """A generator which runs evaluation steps at each output.

  Args:
    graph: A tf.Graph object containing the model.
    eval_dir: A string path to the directory to write eval summary events.
    train_dir: A string path to the directory to search for checkpoints to eval.
    num_training_steps: Generator terminates after this many steps.
    summary_frequency: How many training iterations to run per generator
        iteration.

  Yields:
    A dict of training metrics, and runs summary_frequency training steps
    between each yield. If no checkpoints are found, None is yielded.
  """
    cross_entropy = graph.get_collection('cross_entropy')[0]
    log_perplexity = graph.get_collection('log_perplexity')[0]
    accuracy = graph.get_collection('accuracy')[0]
    global_step = graph.get_collection('global_step')[0]

    with graph.as_default():
        summary_op = tf.merge_summary([
            tf.scalar_summary('cross_entropy_loss', cross_entropy),
            tf.scalar_summary('log_perplexity', log_perplexity),
            tf.scalar_summary('accuracy', accuracy)
        ])

        saver = tf.train.Saver()
    session = tf.Session(graph=graph)
    summary_writer = tf.train.SummaryWriter(eval_dir, session.graph)

    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(sess=session, coord=coord)

    gs = 0

    logging.info('Starting eval loop')
    try:
        while gs < num_training_steps:
            checkpoint_path = tf.train.latest_checkpoint(train_dir)
            if not checkpoint_path:
                logging.info('Waiting for checkpoint file in directory %s',
                             train_dir)
                yield
                continue

            saver.restore(session, checkpoint_path)

            ce, lp, a, gs, serialized_summaries = session.run([
                cross_entropy, log_perplexity, accuracy, global_step,
                summary_op
            ])

            logging.info(
                'Global Step: %s - Loss: %.3f - Log-perplexity: %.3f - '
                'Step Accuracy: %.2f', gs, ce, lp, a)

            summary_writer.add_summary(serialized_summaries, global_step=gs)
            summary_writer.flush()

            yield {
                'loss': ce,
                'log_perplexity': lp,
                'accuracy': a,
                'global_step': gs
            }
    except tf.errors.OutOfRangeError as e:
        logging.warn('Got error reported to coordinator: %s', e)
    finally:
        coord.request_stop()
        summary_writer.close()

    coord.join(threads)
Example #45
0
def build_graph(mode, config, sequence_example_file_paths=None):
  """Builds the TensorFlow graph.

  Args:
    mode: 'train', 'eval', or 'generate'. Only mode related ops are added to
        the graph.
    config: An EventSequenceRnnConfig containing the encoder/decoder and HParams
        to use.
    sequence_example_file_paths: A list of paths to TFRecord files containing
        tf.train.SequenceExample protos. Only needed for training and
        evaluation. May be a sharded file of the form.

  Returns:
    A tf.Graph instance which contains the TF ops.

  Raises:
    ValueError: If mode is not 'train', 'eval', or 'generate'.
  """
  if mode not in ('train', 'eval', 'generate'):
    raise ValueError("The mode parameter must be 'train', 'eval', "
                     "or 'generate'. The mode parameter was: %s" % mode)

  hparams = config.hparams
  encoder_decoder = config.encoder_decoder

  tf.logging.info('hparams = %s', hparams.values())

  input_size = encoder_decoder.input_size
  num_classes = encoder_decoder.num_classes
  no_event_label = encoder_decoder.default_event_label

  with tf.Graph().as_default() as graph:
    inputs, labels, lengths, = None, None, None
    state_is_tuple = True

    if mode == 'train' or mode == 'eval':
      inputs, labels, lengths = magenta.common.get_padded_batch(
          sequence_example_file_paths, hparams.batch_size, input_size)

    elif mode == 'generate':
      inputs = tf.placeholder(tf.float32, [hparams.batch_size, None,
                                           input_size])
      # If state_is_tuple is True, the output RNN cell state will be a tuple
      # instead of a tensor. During training and evaluation this improves
      # performance. However, during generation, the RNN cell state is fed
      # back into the graph with a feed dict. Feed dicts require passed in
      # values to be tensors and not tuples, so state_is_tuple is set to False.
      state_is_tuple = False

    cell = make_rnn_cell(hparams.rnn_layer_sizes,
                         dropout_keep_prob=hparams.dropout_keep_prob,
                         attn_length=hparams.attn_length,
                         state_is_tuple=state_is_tuple)

    initial_state = cell.zero_state(hparams.batch_size, tf.float32)

    outputs, final_state = tf.nn.dynamic_rnn(
        cell, inputs, lengths, initial_state, parallel_iterations=1,
        swap_memory=True)

    outputs_flat = tf.reshape(outputs, [-1, hparams.rnn_layer_sizes[-1]])
    logits_flat = tf.contrib.layers.linear(outputs_flat, num_classes)

    if mode == 'train' or mode == 'eval':
      if hparams.skip_first_n_losses:
        logits = tf.reshape(logits_flat, [hparams.batch_size, -1, num_classes])
        logits = logits[:, hparams.skip_first_n_losses:, :]
        logits_flat = tf.reshape(logits, [-1, num_classes])
        labels = labels[:, hparams.skip_first_n_losses:]

      labels_flat = tf.reshape(labels, [-1])
      softmax_cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
          logits_flat, labels_flat)
      loss = tf.reduce_mean(softmax_cross_entropy)
      perplexity = tf.reduce_mean(tf.exp(softmax_cross_entropy))

      correct_predictions = tf.to_float(
          tf.nn.in_top_k(logits_flat, labels_flat, 1))
      accuracy = tf.reduce_mean(correct_predictions) * 100

      event_positions = tf.to_float(tf.not_equal(labels_flat, no_event_label))
      event_accuracy = tf.truediv(
          tf.reduce_sum(tf.mul(correct_predictions, event_positions)),
          tf.reduce_sum(event_positions)) * 100

      no_event_positions = tf.to_float(tf.equal(labels_flat, no_event_label))
      no_event_accuracy = tf.truediv(
          tf.reduce_sum(tf.mul(correct_predictions, no_event_positions)),
          tf.reduce_sum(no_event_positions)) * 100

      global_step = tf.Variable(0, trainable=False, name='global_step')

      tf.add_to_collection('loss', loss)
      tf.add_to_collection('perplexity', perplexity)
      tf.add_to_collection('accuracy', accuracy)
      tf.add_to_collection('global_step', global_step)

      summaries = [
          tf.scalar_summary('loss', loss),
          tf.scalar_summary('perplexity', perplexity),
          tf.scalar_summary('accuracy', accuracy),
          tf.scalar_summary('event_accuracy', event_accuracy),
          tf.scalar_summary('no_event_accuracy', no_event_accuracy),
      ]

      if mode == 'train':
        learning_rate = tf.train.exponential_decay(
            hparams.initial_learning_rate, global_step, hparams.decay_steps,
            hparams.decay_rate, staircase=True, name='learning_rate')

        opt = tf.train.AdamOptimizer(learning_rate)
        params = tf.trainable_variables()
        gradients = tf.gradients(loss, params)
        clipped_gradients, _ = tf.clip_by_global_norm(gradients,
                                                      hparams.clip_norm)
        train_op = opt.apply_gradients(zip(clipped_gradients, params),
                                       global_step)
        tf.add_to_collection('learning_rate', learning_rate)
        tf.add_to_collection('train_op', train_op)

        summaries.append(tf.scalar_summary('learning_rate', learning_rate))

      if mode == 'eval':
        summary_op = tf.merge_summary(summaries)
        tf.add_to_collection('summary_op', summary_op)

    elif mode == 'generate':
      temperature = tf.placeholder(tf.float32, [])
      softmax_flat = tf.nn.softmax(
          tf.div(logits_flat, tf.fill([num_classes], temperature)))
      softmax = tf.reshape(softmax_flat, [hparams.batch_size, -1, num_classes])

      tf.add_to_collection('inputs', inputs)
      tf.add_to_collection('initial_state', initial_state)
      tf.add_to_collection('final_state', final_state)
      tf.add_to_collection('temperature', temperature)
      tf.add_to_collection('softmax', softmax)

  return graph
Example #46
0
    def pre_train_layer(self, depth, data, epoch):
        sess = self.sess
        print 'Starting to pretrain layer %d.' % depth
        hidden_layer = self.hidden_layers[depth]
        summary_list = []
        with tf.name_scope(hidden_layer.name):
            with tf.name_scope("x_values"):
                x_original = tf.placeholder(tf.float32,
                                            shape=[None, self.input_dim])
                x_latent = self.get_encoded_input(x_original, depth)
                x_corrupt = corrupt(x_latent, corruption_level=self.noise)

            with tf.name_scope("encoded_and_decoded"):
                encoded = hidden_layer.encode(x_corrupt)
                encoded = tf.nn.dropout(encoded, keep_prob=0.5)
                decoded = hidden_layer.decode(encoded)
                attach_variable_summaries(encoded,
                                          "encoded",
                                          summ_list=summary_list)
                attach_variable_summaries(decoded,
                                          "decoded",
                                          summ_list=summary_list)

            # Reconstruction loss
            with tf.name_scope("reconstruction_loss"):
                # loss = self.get_loss(x_latent, decoded)
                loss = self.get_l2_loss(x_latent, decoded)
                attach_scalar_summary(loss,
                                      "%s_loss" % 'l2_loss',
                                      summ_list=summary_list)

            trainable_vars = [
                hidden_layer.weights, hidden_layer.biases,
                hidden_layer.decode_biases
            ]
            # Only optimize variables for this layer ("greedy")
            with tf.name_scope("train_step"):
                train_op = tf.train.AdamOptimizer(
                    learning_rate=self.pretrain_lr).minimize(
                        loss, var_list=trainable_vars)
            sess.run(tf.initialize_all_variables())

            # Merge summaries and get a summary writer
            merged = tf.merge_summary(summary_list)
            pretrain_writer = tf.train.SummaryWriter(
                "model/" + hidden_layer.name, sess.graph)

            step = 0
            for i in range(epoch):
                np.random.shuffle(data)
                batches = [
                    _ for _ in utilities.gen_batches(data, FLAGS.batch_size)
                ]
                for batch_x_original in batches:
                    sess.run(train_op,
                             feed_dict={x_original: batch_x_original})

                    if step % self.print_step == 0:
                        loss_value = sess.run(
                            loss, feed_dict={x_original: batch_x_original})
                        endoce_mean = sess.run(
                            tf.reduce_mean(encoded),
                            feed_dict={x_original: batch_x_original})
                        print("Step %s, batch %s loss = %s, weights_mean=%s" %
                              (step, 'l2_loss', loss_value, endoce_mean))

                    if step % FLAGS.log_step == 0:
                        summary = sess.run(
                            merged, feed_dict={x_original: batch_x_original})
                        pretrain_writer.add_summary(summary, global_step=step)

                    # Break for debugging purposes
                    if FLAGS.debug and step > 5:
                        break
                    step += 1
            print(
                "Finished pretraining of layer %d. Updated layer weights and biases."
                % depth)
Example #47
0
        # Define Training procedure
        global_step = tf.Variable(0, name="global_step", trainable=False)
        optimizer = tf.train.AdamOptimizer(1e-3)
        grads_and_vars = optimizer.compute_gradients(cnn.loss)
        train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step)

        # Keep track of gradient values and sparsity (optional)
        grad_summaries = []
        for g, v in grads_and_vars:
            if g is not None:
                grad_hist_summary = tf.histogram_summary("{}/grad/hist".format(v.name), g)
                sparsity_summary = tf.scalar_summary("{}/grad/sparsity".format(v.name),
                                                     tf.nn.zero_fraction(g))
                grad_summaries.append(grad_hist_summary)
                grad_summaries.append(sparsity_summary)
        grad_summaries_merged = tf.merge_summary(grad_summaries)

        # Output directory for models and summaries
        timestamp = str(int(time.time()))
        out_dir = os.path.abspath(os.path.join(os.path.curdir, "runs", timestamp))
        print("Writing to {}\n".format(out_dir))

        # Summaries for loss and accuracy
        loss_summary = tf.scalar_summary("loss", cnn.loss)
        acc_summary = tf.scalar_summary("accuracy", cnn.accuracy)

        # Train Summaries
        train_summary_op = tf.merge_summary([loss_summary, acc_summary, grad_summaries_merged])
        train_summary_dir = os.path.join(out_dir, "summaries", "train")
        train_summary_writer = tf.train.SummaryWriter(train_summary_dir, sess.graph)
Example #48
0
    def finetune_parameters_gen(self, xy_train_gen, output_dim, epochs):
        """An implementation of finetuning to support data feeding from generators."""
        sess = self.sess
        summary_list = []

        print("Starting to fine tune parameters of network.")
        with tf.name_scope("finetuning"):
            with tf.name_scope("inputs"):
                x = tf.placeholder(tf.float32,
                                   shape=[None, self.input_dim],
                                   name="raw_input")
                with tf.name_scope("fully_encoded"):
                    x_encoded = self.get_encoded_input(
                        x, depth=-1)  # Full depth encoding
            """Note on W below: The difference between self.output_dim and output_dim is that the former
            is the output dimension of the autoencoder stack, which is the dimension of the new feature
            space. The latter is the dimension of the y value space for classification. Ex: If the output
            should be binary, then the output_dim = 2."""
            with tf.name_scope("softmax_variables"):
                self.W = weight_variable(self.output_dim,
                                         output_dim,
                                         name="weights")
                self.b = bias_variable(output_dim,
                                       initial_value=0,
                                       name="biases")
                attach_variable_summaries(self.W,
                                          self.W.name,
                                          summ_list=summary_list)
                attach_variable_summaries(self.b,
                                          self.b.name,
                                          summ_list=summary_list)
            with tf.name_scope("outputs"):
                y_logits = tf.matmul(x_encoded, self.W) + self.b
                with tf.name_scope("predicted"):
                    y_pred = tf.nn.softmax(y_logits, name="y_pred")
                    attach_variable_summaries(y_pred,
                                              y_pred.name,
                                              summ_list=summary_list)
                with tf.name_scope("actual"):
                    y_actual = tf.placeholder(tf.float32,
                                              shape=[None, output_dim],
                                              name="y_actual")
                    attach_variable_summaries(y_actual,
                                              y_actual.name,
                                              summ_list=summary_list)

            with tf.name_scope("cross_entropy"):
                cross_entropy = tf.reduce_mean(
                    tf.nn.softmax_cross_entropy_with_logits(
                        y_logits, y_actual))
                attach_scalar_summary(cross_entropy,
                                      "cross_entropy",
                                      summ_list=summary_list)

            trainable_vars = self.get_all_variables(
                additional_layer=[self.W, self.b])

            with tf.name_scope("train_step"):
                train_step = tf.train.AdamOptimizer(
                    learning_rate=self.finetune_lr).minimize(
                        cross_entropy, var_list=trainable_vars)

            with tf.name_scope("evaluation"):
                correct_prediction = tf.equal(tf.argmax(y_pred, 1),
                                              tf.argmax(y_actual, 1))
                accuracy = tf.reduce_mean(
                    tf.cast(correct_prediction, tf.float32))
                attach_scalar_summary(accuracy,
                                      "finetune_accuracy",
                                      summ_list=summary_list)

            sess.run(tf.initialize_all_variables())

            # Merge summaries and get a summary writer
            merged = tf.merge_summary(summary_list)
            train_writer = tf.train.SummaryWriter(
                FLAGS.log_dir + "/train/finetune", sess.graph)

            step = 0
            for i in range(epochs):
                np.random.shuffle(xy_train_gen)
                train_data_batchs = [
                    _ for _ in utilities.gen_batches(xy_train_gen,
                                                     FLAGS.batch_size)
                ]
                for batch in train_data_batchs:
                    batch_xs, batch_ys = zip(*batch)
                    # print 'get xs batch size===', len(batch_xs), type(batch_xs[0]), batch_xs[0].shape
                    # print 'get ys batch size===', len(batch_ys), type(batch_ys[0]), batch_ys[0].shape
                    if step % self.print_step == 0:
                        print(
                            "Step %s, batch accuracy: " % step,
                            sess.run(accuracy,
                                     feed_dict={
                                         x: batch_xs,
                                         y_actual: batch_ys
                                     }))

                    # For debugging predicted y values
                    if step % (self.print_step * 10) == 0:
                        print("Predicted y-value:",
                              sess.run(y_pred, feed_dict={x: batch_xs})[0])
                        print("Actual y-value:", batch_ys[0])

                    if step % FLAGS.log_step == 0:
                        summary = sess.run(merged,
                                           feed_dict={
                                               x: batch_xs,
                                               y_actual: batch_ys
                                           })
                        train_writer.add_summary(summary, global_step=step)

                    # For debugging, break early.
                    if FLAGS.debug and step > 5:
                        break

                    sess.run(train_step,
                             feed_dict={
                                 x: batch_xs,
                                 y_actual: batch_ys
                             })
                    step += 1
            print("Completed fine-tuning of parameters.")
            tuned_params = {
                "layer1_weights":
                sess.run(self.hidden_layers[0].get_weight_variable()),
                "layer2_weights":
                sess.run(self.hidden_layers[1].get_weight_variable()),
                "layer3_weights":
                sess.run(self.hidden_layers[2].get_weight_variable()),
                "weights":
                sess.run(self.W),
                "biases":
                sess.run(self.b)
            }
            return tuned_params
    def __init__(
        self,
        conf,
        images=None,
        actions=None,
        states=None,
        sequence_length=None,
        reuse_scope=None,
    ):

        from prediction_hiddenstate import construct_model

        if sequence_length is None:
            sequence_length = conf['sequence_length']

        self.prefix = prefix = tf.placeholder(tf.string, [])
        self.iter_num = tf.placeholder(tf.float32, [])
        summaries = []

        # Split into timesteps.
        actions = tf.split(1, actions.get_shape()[1], actions)
        actions = [tf.squeeze(act) for act in actions]
        states = tf.split(1, states.get_shape()[1], states)
        states = [tf.squeeze(st) for st in states]
        images = tf.split(1, images.get_shape()[1], images)
        images = [tf.squeeze(img) for img in images]

        if reuse_scope is None:
            gen_images, gen_states, gen_masks, inf_low_state, pred_low_state = construct_model(
                images,
                actions,
                states,
                iter_num=self.iter_num,
                k=conf['schedsamp_k'],
                use_state=conf['use_state'],
                context_frames=conf['context_frames'],
                conf=conf)
        else:  # If it's a validation or test model.
            with tf.variable_scope(reuse_scope, reuse=True):
                gen_images, gen_states, gen_masks, inf_low_state, pred_low_state = construct_model(
                    images,
                    actions,
                    states,
                    iter_num=self.iter_num,
                    k=conf['schedsamp_k'],
                    use_state=conf['use_state'],
                    context_frames=conf['context_frames'],
                    conf=conf)

        self.inf_low_state = inf_low_state
        self.gen_images = gen_images
        self.gen_masks = gen_masks
        self.gen_states = gen_states

        self.lr = tf.placeholder_with_default(conf['learning_rate'], ())

        if 'prop_latent' in conf:
            return  # do not do backprop when visualizing latent model forward propagation

        # L2 loss, PSNR for eval.
        loss, psnr_all = 0.0, 0.0
        for i, x, gx in zip(range(len(gen_images)),
                            images[conf['context_frames']:],
                            gen_images[conf['context_frames'] - 1:]):
            recon_cost = mean_squared_error(x, gx)
            psnr_i = peak_signal_to_noise_ratio(x, gx)
            psnr_all += psnr_i
            summaries.append(
                tf.scalar_summary(prefix + '_recon_cost' + str(i), recon_cost))
            summaries.append(
                tf.scalar_summary(prefix + '_psnr' + str(i), psnr_i))

            loss += recon_cost

        for i, state, gen_state in zip(
                range(len(gen_states)), states[conf['context_frames']:],
                gen_states[conf['context_frames'] - 1:]):
            state_cost = mean_squared_error(
                state, gen_state) * 1e-4 * conf['use_state']
            summaries.append(
                tf.scalar_summary(prefix + '_state_cost' + str(i), state_cost))
            loss += state_cost
        summaries.append(tf.scalar_summary(prefix + '_psnr_all', psnr_all))
        self.psnr_all = psnr_all

        self.loss = loss = loss / np.float32(
            len(images) - conf['context_frames'])
        summaries.append(tf.scalar_summary(prefix + '_loss', loss))

        if 'train_latent_model' in conf:
            lt_state_cost_accum = 0.0
            for i, inf_state, pred_state in zip(range(len(inf_low_state)),
                                                inf_low_state[1:],
                                                pred_low_state[:-1]):

                lt_state_cost = mean_squared_error(
                    inf_state, pred_state) * conf['lt_state_factor']
                summaries.append(
                    tf.scalar_summary(prefix + '_low_state_cost' + str(i + 1),
                                      lt_state_cost))
                lt_state_cost_accum += lt_state_cost

            if not 'joint' in conf:
                lt_model_var = tf.get_default_graph().get_collection(
                    name=tf.GraphKeys.TRAINABLE_VARIABLES,
                    scope='model/latent_model')

                train_lt_op = tf.train.AdamOptimizer(self.lr).minimize(
                    lt_state_cost_accum, var_list=lt_model_var)
                with tf.control_dependencies([train_lt_op]):
                    self.train_op = tf.train.AdamOptimizer(
                        self.lr).minimize(loss)

            else:
                loss += lt_state_cost_accum
                self.train_op = tf.train.AdamOptimizer(self.lr).minimize(loss)
        else:
            self.train_op = tf.train.AdamOptimizer(self.lr).minimize(loss)
        self.summ_op = tf.merge_summary(summaries)
Example #50
0
    def define_graph(self):
        """
        Sets up the model graph in TensorFlow.
        """
        with tf.name_scope('generator'):
            ##
            # Data
            ##

            with tf.name_scope('data'):
                self.input_frames_train = tf.placeholder(tf.float32,
                                                         shape=[
                                                             None,
                                                             self.height_train,
                                                             self.width_train,
                                                             3 * c.HIST_LEN
                                                         ])
                self.gt_frames_train = tf.placeholder(
                    tf.float32,
                    shape=[None, self.height_train, self.width_train, 3])

                self.input_frames_test = tf.placeholder(tf.float32,
                                                        shape=[
                                                            None,
                                                            self.height_test,
                                                            self.width_test,
                                                            3 * c.HIST_LEN
                                                        ])
                self.gt_frames_test = tf.placeholder(
                    tf.float32,
                    shape=[None, self.height_test, self.width_test, 3])

                # use variable batch_size for more flexibility
                self.batch_size_train = tf.shape(self.input_frames_train)[0]
                self.batch_size_test = tf.shape(self.input_frames_test)[0]

            ##
            # Scale network setup and calculation
            ##

            self.summaries_train = []
            self.scale_preds_train = []  # the generated images at each scale
            self.scale_gts_train = []  # the ground truth images at each scale
            self.d_scale_preds = [
            ]  # the predictions from the discriminator model

            self.summaries_test = []
            self.scale_preds_test = []  # the generated images at each scale
            self.scale_gts_test = []  # the ground truth images at each scale

            for scale_num in xrange(self.num_scale_nets):
                with tf.name_scope('scale_' + str(scale_num)):
                    with tf.name_scope('setup'):
                        ws = []
                        bs = []

                        # create weights for kernels
                        for i in xrange(len(
                                self.scale_kernel_sizes[scale_num])):
                            ws.append(
                                w([
                                    self.scale_kernel_sizes[scale_num][i],
                                    self.scale_kernel_sizes[scale_num][i],
                                    self.scale_layer_fms[scale_num][i],
                                    self.scale_layer_fms[scale_num][i + 1]
                                ]))
                            bs.append(
                                b([self.scale_layer_fms[scale_num][i + 1]]))

                    with tf.name_scope('calculation'):

                        def calculate(height, width, inputs, gts,
                                      last_gen_frames):
                            # scale inputs and gts
                            scale_factor = 1. / 2**(
                                (self.num_scale_nets - 1) - scale_num)
                            scale_height = int(height * scale_factor)
                            scale_width = int(width * scale_factor)

                            inputs = tf.image.resize_images(
                                inputs, [scale_height, scale_width])
                            scale_gts = tf.image.resize_images(
                                gts, [scale_height, scale_width])

                            # for all scales but the first, add the frame generated by the last
                            # scale to the input
                            if scale_num > 0:
                                last_gen_frames = tf.image.resize_images(
                                    last_gen_frames,
                                    [scale_height, scale_width])
                                inputs = tf.concat(3,
                                                   [inputs, last_gen_frames])

                            # generated frame predictions
                            preds = inputs

                            # perform convolutions
                            with tf.name_scope('convolutions'):
                                for i in xrange(
                                        len(self.scale_kernel_sizes[scale_num])
                                ):
                                    # Convolve layer
                                    preds = tf.nn.conv2d(preds,
                                                         ws[i], [1, 1, 1, 1],
                                                         padding=c.PADDING_G)

                                    # Activate with ReLU (or Tanh for last layer)
                                    if i == len(
                                            self.scale_kernel_sizes[scale_num]
                                    ) - 1:
                                        preds = tf.nn.tanh(preds + bs[i])
                                    else:
                                        preds = tf.nn.relu(preds + bs[i])

                            return preds, scale_gts

                        ##
                        # Perform train calculation
                        ##

                        # for all scales but the first, add the frame generated by the last
                        # scale to the input
                        if scale_num > 0:
                            last_scale_pred_train = self.scale_preds_train[
                                scale_num - 1]
                        else:
                            last_scale_pred_train = None

                        # calculate
                        train_preds, train_gts = calculate(
                            self.height_train, self.width_train,
                            self.input_frames_train, self.gt_frames_train,
                            last_scale_pred_train)
                        self.scale_preds_train.append(train_preds)
                        self.scale_gts_train.append(train_gts)

                        # We need to run the network first to get generated frames, run the
                        # discriminator on those frames to get d_scale_preds, then run this
                        # again for the loss optimization.
                        if c.ADVERSARIAL:
                            self.d_scale_preds.append(
                                tf.placeholder(tf.float32, [None, 1]))

                        ##
                        # Perform test calculation
                        ##

                        # for all scales but the first, add the frame generated by the last
                        # scale to the input
                        if scale_num > 0:
                            last_scale_pred_test = self.scale_preds_test[
                                scale_num - 1]
                        else:
                            last_scale_pred_test = None

                        # calculate
                        test_preds, test_gts = calculate(
                            self.height_test, self.width_test,
                            self.input_frames_test, self.gt_frames_test,
                            last_scale_pred_test)
                        self.scale_preds_test.append(test_preds)
                        self.scale_gts_test.append(test_gts)

            ##
            # Training
            ##

            with tf.name_scope('train'):
                # global loss is the combined loss from every scale network
                self.global_loss = combined_loss(self.scale_preds_train,
                                                 self.scale_gts_train,
                                                 self.d_scale_preds)
                self.global_step = tf.Variable(0, trainable=False)
                self.optimizer = tf.train.AdamOptimizer(
                    learning_rate=c.LRATE_G, name='optimizer')
                self.train_op = self.optimizer.minimize(
                    self.global_loss,
                    global_step=self.global_step,
                    name='train_op')

                # train loss summary
                loss_summary = tf.scalar_summary('train_loss_G',
                                                 self.global_loss)
                self.summaries_train.append(loss_summary)

            ##
            # Error
            ##

            with tf.name_scope('error'):
                # error computation
                # get error at largest scale
                self.psnr_error_train = psnr_error(self.scale_preds_train[-1],
                                                   self.gt_frames_train)
                self.sharpdiff_error_train = sharp_diff_error(
                    self.scale_preds_train[-1], self.gt_frames_train)
                self.psnr_error_test = psnr_error(self.scale_preds_test[-1],
                                                  self.gt_frames_test)
                self.sharpdiff_error_test = sharp_diff_error(
                    self.scale_preds_test[-1], self.gt_frames_test)
                # train error summaries
                summary_psnr_train = tf.scalar_summary('train_PSNR',
                                                       self.psnr_error_train)
                summary_sharpdiff_train = tf.scalar_summary(
                    'train_SharpDiff', self.sharpdiff_error_train)
                self.summaries_train += [
                    summary_psnr_train, summary_sharpdiff_train
                ]

                # test error
                summary_psnr_test = tf.scalar_summary('test_PSNR',
                                                      self.psnr_error_test)
                summary_sharpdiff_test = tf.scalar_summary(
                    'test_SharpDiff', self.sharpdiff_error_test)
                self.summaries_test += [
                    summary_psnr_test, summary_sharpdiff_test
                ]

            # add summaries to visualize in TensorBoard
            self.summaries_train = tf.merge_summary(self.summaries_train)
            self.summaries_test = tf.merge_summary(self.summaries_test)
Example #51
0
def _eval_once(saver, summary_writer, rmse_op, summary_op):
    """Runs Eval once.
  Args:
    saver: Saver.
    summary_writer: Summary writer.
    rmse_op: rmse_op.
    summary_op: Summary op.
  """
    with tf.Session() as sess:
        ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir)
        if ckpt and ckpt.model_checkpoint_path:
            if os.path.isabs(ckpt.model_checkpoint_path):
                # Restores from checkpoint with absolute path.
                saver.restore(sess, ckpt.model_checkpoint_path)
            else:
                # Restores from checkpoint with relative path.
                saver.restore(
                    sess,
                    os.path.join(FLAGS.checkpoint_dir,
                                 ckpt.model_checkpoint_path))

            # Assuming model_checkpoint_path looks something like:
            #   /my-favorite-path/imagenet_train/model.ckpt-0,
            # extract global_step from it.
            global_step = ckpt.model_checkpoint_path.split('/')[-1].split(
                '-')[-1]
            print('Succesfully loaded model from %s at step=%s.' %
                  (ckpt.model_checkpoint_path, global_step))
        else:
            print('No checkpoint file found')
            return

        # Start the queue runners.
        coord = tf.train.Coordinator()
        try:
            threads = []
            for qr in tf.get_collection(tf.GraphKeys.QUEUE_RUNNERS):
                threads.extend(
                    qr.create_threads(sess,
                                      coord=coord,
                                      daemon=True,
                                      start=True))

            num_iter = int(math.ceil(FLAGS.num_examples / FLAGS.batch_size))
            # Counts the number of correct predictions.
            errors = []

            total_sample_count = num_iter * FLAGS.batch_size
            step = 0

            print('%s: starting evaluation on (%s).' %
                  (datetime.now(), FLAGS.dataset_path))
            start_time = time.time()
            while step < num_iter and not coord.should_stop():
                rmse = sess.run(rmse_op)
                errors.append(rmse)
                step += 1
                if step % 20 == 0:
                    duration = time.time() - start_time
                    sec_per_batch = duration / 20.0
                    examples_per_sec = FLAGS.batch_size / sec_per_batch
                    print('%s: [%d batches out of %d] (%.1f examples/sec; %.3f'
                          'sec/batch)' % (datetime.now(), step, num_iter,
                                          examples_per_sec, sec_per_batch))
                    start_time = time.time()

            errors = np.vstack(errors).ravel()
            mean_rmse = errors.mean()
            auc_at_08 = (errors < .08).mean()
            auc_at_05 = (errors < .05).mean()
            ced_image = plot_ced([errors.tolist()])
            ced_plot = sess.run(
                tf.merge_summary(
                    [tf.image_summary('ced_plot', ced_image[None, ...])]))

            print('Errors', errors.shape)
            print(
                '%s: mean_rmse = %.4f, auc @ 0.05 = %.4f, auc @ 0.08 = %.4f [%d examples]'
                % (datetime.now(), errors.mean(), auc_at_05, auc_at_08,
                   total_sample_count))

            summary = tf.Summary()
            summary.ParseFromString(sess.run(summary_op))
            summary.value.add(tag='AUC @ 0.08', simple_value=float(auc_at_08))
            summary.value.add(tag='AUC @ 0.05', simple_value=float(auc_at_05))
            summary.value.add(tag='Mean RMSE', simple_value=float(mean_rmse))
            summary_writer.add_summary(ced_plot, global_step)
            summary_writer.add_summary(summary, global_step)

        except Exception as e:  # pylint: disable=broad-except
            coord.request_stop(e)

        coord.request_stop()
        coord.join(threads, stop_grace_period_secs=10)
Example #52
0
def main(_):
    """Build the full graph for feeding inputs, training, and
    saving checkpoints.  Run the training. Then, load the saved graph and
    run some predictions."""

    # Get input data: get the sets of images and labels for training,
    # validation, and test on MNIST.
    data_sets = read_data_sets(FLAGS.data_dir, False)

    mnist_graph = tf.Graph()
    with mnist_graph.as_default():
        # Generate placeholders for the images and labels.
        images_placeholder = tf.placeholder(tf.float32)
        labels_placeholder = tf.placeholder(tf.int32)
        tf.add_to_collection("images", images_placeholder)  # Remember this Op.
        tf.add_to_collection("labels", labels_placeholder)  # Remember this Op.

        # Build a Graph that computes predictions from the inference model.
        logits = mnist_inference(images_placeholder, HIDDEN1_UNITS)
        tf.add_to_collection("logits", logits)  # Remember this Op.

        # Add to the Graph the Ops that calculate and apply gradients.
        train_op, loss = mnist_training(logits, labels_placeholder, 0.01)

        # prediction accuracy
        _, indices_op = tf.nn.top_k(logits)
        flattened = tf.reshape(indices_op, [-1])
        correct_prediction = tf.cast(tf.equal(labels_placeholder, flattened),
                                     tf.float32)
        accuracy = tf.reduce_mean(correct_prediction)

        # Define info to be used by the SummaryWriter. This will let
        # TensorBoard plot values during the training process.
        loss_summary = tf.scalar_summary("loss", loss)
        train_summary_op = tf.merge_summary([loss_summary])

        # Add the variable initializer Op.
        init = tf.initialize_all_variables()

        # Create a saver for writing training checkpoints.
        saver = tf.train.Saver()

        # Create a summary writer.
        print("Writing Summaries to %s" % FLAGS.model_dir)
        train_summary_writer = tf.train.SummaryWriter(FLAGS.model_dir)

    # Run training and save checkpoint at the end.
    with tf.Session(graph=mnist_graph) as sess:
        # Run the Op to initialize the variables.
        sess.run(init)

        # Start the training loop.
        for step in xrange(FLAGS.num_steps):
            # Read a batch of images and labels.
            images_feed, labels_feed = data_sets.train.next_batch(BATCH_SIZE)

            # Run one step of the model.  The return values are the activations
            # from the `train_op` (which is discarded) and the `loss` Op.  To
            # inspect the values of your Ops or variables, you may include them
            # in the list passed to sess.run() and the value tensors will be
            # returned in the tuple from the call.
            _, loss_value, tsummary, acc = sess.run(
                [train_op, loss, train_summary_op, accuracy],
                feed_dict={
                    images_placeholder: images_feed,
                    labels_placeholder: labels_feed
                })
            if step % 100 == 0:
                # Write summary info
                train_summary_writer.add_summary(tsummary, step)
            if step % 1000 == 0:
                # Print loss/accuracy info
                print('----Step %d: loss = %.4f' % (step, loss_value))
                print("accuracy: %s" % acc)

        print("\nWriting checkpoint file.")
        checkpoint_file = os.path.join(FLAGS.model_dir, 'checkpoint')
        saver.save(sess, checkpoint_file, global_step=step)
        _, loss_value = sess.run(
            [train_op, loss],
            feed_dict={
                images_placeholder: data_sets.test.images,
                labels_placeholder: data_sets.test.labels
            })
        print("Test set loss: %s" % loss_value)

    # Run evaluation based on the saved checkpoint.
    with tf.Session(graph=tf.Graph()) as sess:
        checkpoint_file = tf.train.latest_checkpoint(FLAGS.model_dir)
        print("\nRunning predictions based on saved checkpoint.")
        print("checkpoint file: {}".format(checkpoint_file))
        # Load the saved meta graph and restore variables
        saver = tf.train.import_meta_graph("{}.meta".format(checkpoint_file))
        saver.restore(sess, checkpoint_file)

        # Retrieve the Ops we 'remembered'.
        logits = tf.get_collection("logits")[0]
        images_placeholder = tf.get_collection("images")[0]
        labels_placeholder = tf.get_collection("labels")[0]

        # Add an Op that chooses the top k predictions.
        eval_op = tf.nn.top_k(logits)

        # Run evaluation.
        images_feed, labels_feed = data_sets.validation.next_batch(
            EVAL_BATCH_SIZE)
        prediction = sess.run(eval_op,
                              feed_dict={
                                  images_placeholder: images_feed,
                                  labels_placeholder: labels_feed
                              })
        for i in range(len(labels_feed)):
            print("Ground truth: %d\nPrediction: %d" %
                  (labels_feed[i], prediction.indices[i][0]))
	def define_model(self):
		'''
		定义我的的计算图谱
		'''
		def model(data_flow, train=True):
			'''
			@data: original inputs
			@return: logits
			'''
			# Define Convolutional Layers
			for i, (weights, biases, config) in enumerate(zip(self.conv_weights, self.conv_biases, self.conv_config)):
				with tf.name_scope(config['name'] + '_model'):
					with tf.name_scope('convolution'):
						# default 1,1,1,1 stride and SAME padding
						data_flow = tf.nn.conv2d(data_flow, filter=weights, strides=[1, 1, 1, 1], padding='SAME')
						data_flow = data_flow + biases
						if not train:
							self.visualize_filter_map(data_flow, how_many=config['out_depth'], display_size=32//(i//2+1), name=config['name']+'_conv')
					if config['activation'] == 'relu':
						data_flow = tf.nn.relu(data_flow)
						if not train:
							self.visualize_filter_map(data_flow, how_many=config['out_depth'], display_size=32//(i//2+1), name=config['name']+'_relu')
					else:
						raise Exception('Activation Func can only be Relu right now. You passed', config['activation'])
					if config['pooling']:
						data_flow = tf.nn.max_pool(
							data_flow,
							ksize=[1, self.pooling_scale, self.pooling_scale, 1],
							strides=[1, self.pooling_stride, self.pooling_stride, 1],
							padding='SAME')
						if not train:
							self.visualize_filter_map(data_flow, how_many=config['out_depth'], display_size=32//(i//2+1)//2, name=config['name']+'_pooling')

			# Define Fully Connected Layers
			for i, (weights, biases, config) in enumerate(zip(self.fc_weights, self.fc_biases, self.fc_config)):
				if i == 0:
					shape = data_flow.get_shape().as_list()
					data_flow = tf.reshape(data_flow, [shape[0], shape[1] * shape[2] * shape[3]])
				with tf.name_scope(config['name'] + 'model'):
					data_flow = tf.matmul(data_flow, weights) + biases
					if config['activation'] == 'relu':
						data_flow = tf.nn.relu(data_flow)
					elif config['activation'] is None:
						pass
					else:
						raise Exception('Activation Func can only be Relu or None right now. You passed', config['activation'])
			return data_flow

		# Training computation.
		logits = model(self.tf_train_samples)
		with tf.name_scope('loss'):
			self.loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits, self.tf_train_labels))
			self.train_summaries.append(tf.scalar_summary('Loss', self.loss))

		# Optimizer.
		with tf.name_scope('optimizer'):
			self.optimizer = tf.train.GradientDescentOptimizer(0.0001).minimize(self.loss)

		# Predictions for the training, validation, and test data.
		with tf.name_scope('train'):
			self.train_prediction = tf.nn.softmax(logits, name='train_prediction')
		with tf.name_scope('test'):
			self.test_prediction = tf.nn.softmax(model(self.tf_test_samples, train=False), name='test_prediction')

		self.merged_train_summary = tf.merge_summary(self.train_summaries)
		self.merged_test_summary = tf.merge_summary(self.test_summaries)
Example #54
0
def train():
    args = parse_args()

    np.random.seed(args.seed)

    batch_size = args.batch_size
    n_epochs = args.epochs
    use_batch_norm = args.use_batch_norm
    fix_std = args.fix_std
    plot_every = args.plot_every
    use_infogan = args.infogan
    style_size = args.style_size
    categorical_cardinality = args.categorical_cardinality
    num_continuous = args.num_continuous
    generator_desc = args.generator
    discriminator_desc = args.discriminator

    if args.dataset is None:
        assert args.scale_dataset == [28, 28]
        X = load_mnist_dataset()
        if args.max_images is not None:
            X = X[:args.max_images]
        dataset_name = "mnist"
    else:
        scaled_image_width, scaled_image_height = args.scale_dataset

        # load pngs and jpegs here
        X = load_image_dataset(
            args.dataset,
            desired_width=
            scaled_image_width,  # TODO(jonathan): pick up from generator or add a command line arg (either or)...
            desired_height=scaled_image_height,
            value_range=(0.0, 1.0),
            max_images=args.max_images,
            force_grayscale=args.force_grayscale)
        dataset_name = basename(args.dataset.rstrip("/"))

    if use_infogan:
        z_size = style_size + sum(categorical_cardinality) + num_continuous
        sample_noise = create_infogan_noise_sample(categorical_cardinality,
                                                   num_continuous, style_size)
    else:
        z_size = style_size
        sample_noise = create_gan_noise_sample(style_size)

    discriminator_lr = tf.get_variable("discriminator_lr", (),
                                       initializer=tf.constant_initializer(
                                           args.discriminator_lr))
    generator_lr = tf.get_variable("generator_lr", (),
                                   initializer=tf.constant_initializer(
                                       args.generator_lr))

    n_images, image_height, image_width, n_channels = X.shape

    discriminator_lr_placeholder = tf.placeholder(tf.float32, (),
                                                  name="discriminator_lr")
    generator_lr_placeholder = tf.placeholder(tf.float32, (),
                                              name="generator_lr")
    assign_discriminator_lr_op = discriminator_lr.assign(
        discriminator_lr_placeholder)
    assign_generator_lr_op = generator_lr.assign(generator_lr_placeholder)

    ## begin model
    true_images = tf.placeholder(tf.float32,
                                 [None, image_height, image_width, n_channels],
                                 name="true_images")
    zc_vectors = tf.placeholder(tf.float32, [None, z_size], name="zc_vectors")
    is_training_discriminator = tf.placeholder(
        tf.bool, [], name="is_training_discriminator")
    is_training_generator = tf.placeholder(tf.bool, [],
                                           name="is_training_generator")

    fake_images = generator_forward(zc_vectors,
                                    generator_desc,
                                    is_training=is_training_generator,
                                    name="generator",
                                    debug=True)

    print("Generator produced images of shape %s" %
          (fake_images.get_shape()[1:]))
    print("")

    discriminator_fake = discriminator_forward(
        fake_images,
        discriminator_desc,
        is_training=is_training_discriminator,
        name="discriminator",
        use_batch_norm=use_batch_norm,
        debug=True)
    prob_fake = discriminator_fake["prob"]
    discriminator_true = discriminator_forward(
        true_images,
        discriminator_desc,
        is_training=is_training_discriminator,
        reuse=True,
        name="discriminator",
        use_batch_norm=use_batch_norm)
    prob_true = discriminator_true["prob"]

    # discriminator should maximize:
    ll_believing_fake_images_are_fake = tf.log(1.0 - prob_fake + TINY)
    ll_true_images = tf.log(prob_true + TINY)
    discriminator_obj = (tf.reduce_mean(ll_believing_fake_images_are_fake) +
                         tf.reduce_mean(ll_true_images))

    # generator should maximize:
    ll_believing_fake_images_are_real = tf.reduce_mean(tf.log(prob_fake +
                                                              TINY))
    generator_obj = ll_believing_fake_images_are_real

    discriminator_solver = tf.train.AdamOptimizer(
        learning_rate=discriminator_lr, beta1=0.5)
    generator_solver = tf.train.AdamOptimizer(learning_rate=generator_lr,
                                              beta1=0.5)

    discriminator_variables = scope_variables("discriminator")
    generator_variables = scope_variables("generator")

    train_discriminator = discriminator_solver.minimize(
        -discriminator_obj, var_list=discriminator_variables)
    train_generator = generator_solver.minimize(-generator_obj,
                                                var_list=generator_variables)
    discriminator_obj_summary = tf.scalar_summary("discriminator_objective",
                                                  discriminator_obj)
    generator_obj_summary = tf.scalar_summary("generator_objective",
                                              generator_obj)

    if use_infogan:
        categorical_c_vectors = []
        offset = 0
        for cardinality in categorical_cardinality:
            categorical_c_vectors.append(zc_vectors[:, offset:offset +
                                                    cardinality])
            offset += cardinality

        continuous_c_vector = zc_vectors[:, offset:offset + num_continuous]

        q_output = reconstruct_mutual_info(
            categorical_c_vectors,
            continuous_c_vector,
            categorical_lambda=args.categorical_lambda,
            continuous_lambda=args.continuous_lambda,
            fix_std=fix_std,
            hidden=discriminator_fake["hidden"],
            is_training=is_training_discriminator,
            name="mutual_info")

        mutual_info_objective = q_output["mutual_info"]
        mutual_info_variables = scope_variables("mutual_info")
        neg_mutual_info_objective = -mutual_info_objective
        train_mutual_info = generator_solver.minimize(
            neg_mutual_info_objective,
            var_list=generator_variables + discriminator_variables +
            mutual_info_variables)
        ll_categorical = q_output["ll_categorical"]
        ll_continuous = q_output["ll_continuous"]
        std_contig = q_output["std_contig"]

        mutual_info_obj_summary = tf.scalar_summary("mutual_info_objective",
                                                    mutual_info_objective)
        ll_categorical_obj_summary = tf.scalar_summary(
            "ll_categorical_objective", ll_categorical)
        ll_continuous_obj_summary = tf.scalar_summary(
            "ll_continuous_objective", ll_continuous)
        std_contig_summary = tf.scalar_summary("std_contig", std_contig)
        generator_obj_summary = tf.merge_summary([
            generator_obj_summary, mutual_info_obj_summary,
            ll_categorical_obj_summary, ll_continuous_obj_summary,
            std_contig_summary
        ])
    else:
        neg_mutual_info_objective = NOOP
        mutual_info_objective = NOOP
        train_mutual_info = NOOP
        ll_categorical = NOOP
        ll_continuous = NOOP
        std_contig = NOOP
        entropy = NOOP

    log_dir = next_unused_name(
        join(PROJECT_DIR, "%s_log" % (dataset_name, ),
             "infogan" if use_infogan else "gan"))
    journalist = tf.train.SummaryWriter(log_dir, flush_secs=10)
    print("Saving tensorboard logs to %r" % (log_dir, ))

    img_summaries = {}
    if use_infogan:
        plotter = CategoricalPlotter(
            categorical_cardinality=categorical_cardinality,
            num_continuous=num_continuous,
            style_size=style_size,
            journalist=journalist,
            generate=lambda sess, x: sess.run(
                fake_images, {
                    zc_vectors: x,
                    is_training_discriminator: False,
                    is_training_generator: False
                }))
    else:
        image_placeholder = None
        plotter = None
        img_summaries["fake_images"] = tf.image_summary("fake images",
                                                        fake_images,
                                                        max_images=10)
    image_summary_op = tf.merge_summary(list(
        img_summaries.values())) if len(img_summaries) else NOOP

    idxes = np.arange(n_images, dtype=np.int32)
    iters = 0
    with tf.Session() as sess:
        # pleasure
        sess.run(tf.initialize_all_variables())
        # content
        for epoch in range(n_epochs):
            disc_epoch_obj = []
            gen_epoch_obj = []
            infogan_epoch_obj = []

            np.random.shuffle(idxes)
            pbar = create_progress_bar("epoch %d >> " % (epoch, ))

            for idx in pbar(range(0, n_images, batch_size)):
                batch = X[idxes[idx:idx + batch_size]]
                # train discriminator
                noise = sample_noise(batch_size)
                _, summary_result1, disc_obj, infogan_obj = sess.run(
                    [
                        train_discriminator, discriminator_obj_summary,
                        discriminator_obj, neg_mutual_info_objective
                    ],
                    feed_dict={
                        true_images: batch,
                        zc_vectors: noise,
                        is_training_discriminator: True,
                        is_training_generator: True
                    })

                disc_epoch_obj.append(disc_obj)

                if use_infogan:
                    infogan_epoch_obj.append(infogan_obj)

                # train generator
                noise = sample_noise(batch_size)
                _, _, summary_result2, gen_obj, infogan_obj = sess.run(
                    [
                        train_generator, train_mutual_info,
                        generator_obj_summary, generator_obj,
                        neg_mutual_info_objective
                    ],
                    feed_dict={
                        zc_vectors: noise,
                        is_training_discriminator: True,
                        is_training_generator: True
                    })

                journalist.add_summary(summary_result1, iters)
                journalist.add_summary(summary_result2, iters)
                journalist.flush()
                gen_epoch_obj.append(gen_obj)

                if use_infogan:
                    infogan_epoch_obj.append(infogan_obj)

                iters += 1

                if iters % plot_every == 0:
                    if use_infogan:
                        plotter.generate_images(sess, 10, iteration=iters)
                    else:
                        noise = sample_noise(batch_size)
                        current_summary = sess.run(
                            image_summary_op, {
                                zc_vectors: noise,
                                is_training_discriminator: False,
                                is_training_generator: False
                            })
                        journalist.add_summary(current_summary, iters)
                    journalist.flush()

            msg = "epoch %d >> discriminator LL %.2f (lr=%.6f), generator LL %.2f (lr=%.6f)" % (
                epoch, np.mean(disc_epoch_obj), sess.run(discriminator_lr),
                np.mean(gen_epoch_obj), sess.run(generator_lr))
            if use_infogan:
                msg = msg + ", infogan loss %.2f" % (
                    np.mean(infogan_epoch_obj), )
            print(msg)
Example #55
0
        with open(meta_fname, 'w') as meta_file:
            meta_file.write("Meta-information\n")
            meta_file.write("Label: {0}\n".format(FLAGS.label))
            if FLAGS.lstm:
                meta_file.write("LSTM\n")
            else:
                meta_file.write("CNN\n")
            meta_file.write("\nFlags:\n")
            meta_file.write(flags_to_string())

        # Summaries for loss and accuracy
        loss_summary = tf.scalar_summary("loss", model.loss)
        acc_summary = tf.scalar_summary("accuracy", model.accuracy)

        # Train Summaries
        train_summary_op = tf.merge_summary([loss_summary, acc_summary])
        train_summary_dir = os.path.join(out_dir_full, "summaries", "train")
        train_summary_writer = tf.train.SummaryWriter(train_summary_dir, sess.graph)

        # Dev summaries
        dev_summary_dir = os.path.join(out_dir_full, "summaries", "dev")
        dev_summary_writer = tf.train.SummaryWriter(dev_summary_dir, sess.graph)

        # Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it
        checkpoint_dir = os.path.abspath(os.path.join(out_dir_full, "checkpoints"))
        checkpoint_prefix = os.path.join(checkpoint_dir, "model")
        if not os.path.exists(checkpoint_dir):
            os.makedirs(checkpoint_dir)
        saver = tf.train.Saver(tf.all_variables())

        # Initialize all variables and override pre-computed embeddings
def train():
    """Train CIFAR-10 for a number of steps."""
    with tf.Graph().as_default(), tf.device('/cpu:0'):
        # Create a variable to count the number of train() calls. This equals the
        # number of batches processed * FLAGS.num_gpus.
        global_step = tf.get_variable('global_step', [],
                                      initializer=tf.constant_initializer(0),
                                      trainable=False)

        # Calculate the learning rate schedule.
        num_batches_per_epoch = (cifar10.NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN /
                                 FLAGS.batch_size)
        decay_steps = int(num_batches_per_epoch * cifar10.NUM_EPOCHS_PER_DECAY)

        # Decay the learning rate exponentially based on the number of steps.
        lr = tf.train.exponential_decay(cifar10.INITIAL_LEARNING_RATE,
                                        global_step,
                                        decay_steps,
                                        cifar10.LEARNING_RATE_DECAY_FACTOR,
                                        staircase=True)

        # Create an optimizer that performs gradient descent.
        opt = tf.train.GradientDescentOptimizer(lr)

        # Calculate the gradients for each model tower.
        tower_grads = []
        for i in xrange(FLAGS.num_gpus):
            with tf.device('/gpu:%d' % i):
                with tf.name_scope('%s_%d' % (cifar10.TOWER_NAME, i)) as scope:
                    # Calculate the loss for one tower of the CIFAR model. This function
                    # constructs the entire CIFAR model but shares the variables across
                    # all towers.
                    loss = tower_loss(scope)

                    # Reuse variables for the next tower.
                    tf.get_variable_scope().reuse_variables()

                    # Retain the summaries from the final tower.
                    summaries = tf.get_collection(tf.GraphKeys.SUMMARIES,
                                                  scope)

                    # Calculate the gradients for the batch of data on this CIFAR tower.
                    grads = opt.compute_gradients(loss)

                    # Keep track of the gradients across all towers.
                    tower_grads.append(grads)

        # We must calculate the mean of each gradient. Note that this is the
        # synchronization point across all towers.
        grads = average_gradients(tower_grads)

        # Add a summary to track the learning rate.
        summaries.append(tf.scalar_summary('learning_rate', lr))

        # Add histograms for gradients.
        for grad, var in grads:
            if grad is not None:
                summaries.append(
                    tf.histogram_summary(var.op.name + '/gradients', grad))

        # Apply the gradients to adjust the shared variables.
        apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)

        # Add histograms for trainable variables.
        for var in tf.trainable_variables():
            summaries.append(tf.histogram_summary(var.op.name, var))

        # Track the moving averages of all trainable variables.
        variable_averages = tf.train.ExponentialMovingAverage(
            cifar10.MOVING_AVERAGE_DECAY, global_step)
        variables_averages_op = variable_averages.apply(
            tf.trainable_variables())

        # Group all updates to into a single train op.
        train_op = tf.group(apply_gradient_op, variables_averages_op)

        # Create a saver.
        saver = tf.train.Saver(tf.global_variables())

        # Build the summary operation from the last tower summaries.
        summary_op = tf.merge_summary(summaries)

        # Build an initialization operation to run below.
        init = tf.global_variables_initializer()

        # Start running operations on the Graph. allow_soft_placement must be set to
        # True to build towers on GPU, as some of the ops do not have GPU
        # implementations.
        sess = tf.Session(config=tf.ConfigProto(
            allow_soft_placement=True,
            log_device_placement=FLAGS.log_device_placement))
        sess.run(init)

        # Start the queue runners.
        tf.train.start_queue_runners(sess=sess)

        summary_writer = tf.summary.FileWriter(FLAGS.train_dir, sess.graph)

        for step in xrange(FLAGS.max_steps):
            start_time = time.time()
            _, loss_value = sess.run([train_op, loss])
            duration = time.time() - start_time

            assert not np.isnan(loss_value), 'Model diverged with loss = NaN'

            if step % 10 == 0:
                num_examples_per_step = FLAGS.batch_size * FLAGS.num_gpus
                examples_per_sec = num_examples_per_step / duration
                sec_per_batch = duration / FLAGS.num_gpus

                format_str = (
                    '%s: step %d, loss = %.2f (%.1f examples/sec; %.3f '
                    'sec/batch)')
                print(format_str % (datetime.now(), step, loss_value,
                                    examples_per_sec, sec_per_batch))

            if step % 100 == 0:
                summary_str = sess.run(summary_op)
                summary_writer.add_summary(summary_str, step)

            # Save the model checkpoint periodically.
            if step % 1000 == 0 or (step + 1) == FLAGS.max_steps:
                checkpoint_path = os.path.join(FLAGS.train_dir, 'model.ckpt')
                saver.save(sess, checkpoint_path, global_step=step)
Example #57
0
def main(_):
    if not FLAGS.dataset_dir:
        raise ValueError(
            'You must supply the dataset directory with --dataset_dir')

    tf.logging.set_verbosity(tf.logging.INFO)
    with tf.Graph().as_default():
        ######################
        # Config model_deploy#
        ######################
        deploy_config = model_deploy.DeploymentConfig(
            num_clones=FLAGS.num_clones,
            clone_on_cpu=FLAGS.clone_on_cpu,
            replica_id=FLAGS.task,
            num_replicas=FLAGS.worker_replicas,
            num_ps_tasks=FLAGS.num_ps_tasks)

        # Create global_step
        with tf.device(deploy_config.variables_device()):
            global_step = slim.create_global_step()

        ######################
        # Select the dataset #
        ######################
        dataset = dataset_factory.get_dataset(FLAGS.dataset_name,
                                              FLAGS.dataset_split_name,
                                              FLAGS.dataset_dir)

        ####################
        # Select the network #
        ####################
        network_fn = nets_factory.get_network_fn(
            FLAGS.model_name,
            num_classes=(dataset.num_classes - FLAGS.labels_offset),
            weight_decay=FLAGS.weight_decay,
            is_training=True)

        #####################################
        # Select the preprocessing function #
        #####################################
        preprocessing_name = FLAGS.preprocessing_name or FLAGS.model_name
        image_preprocessing_fn = preprocessing_factory.get_preprocessing(
            preprocessing_name, is_training=True)

        ##############################################################
        # Create a dataset provider that loads data from the dataset #
        ##############################################################
        with tf.device(deploy_config.inputs_device()):
            provider = slim.dataset_data_provider.DatasetDataProvider(
                dataset,
                num_readers=FLAGS.num_readers,
                common_queue_capacity=20 * FLAGS.batch_size,
                common_queue_min=10 * FLAGS.batch_size)
            [image, label] = provider.get(['image', 'label'])
            label -= FLAGS.labels_offset

            train_image_size = FLAGS.train_image_size or network_fn.default_image_size

            image = image_preprocessing_fn(image, train_image_size,
                                           train_image_size)

            images, labels = tf.train.batch(
                [image, label],
                batch_size=FLAGS.batch_size,
                num_threads=FLAGS.num_preprocessing_threads,
                capacity=5 * FLAGS.batch_size)
            labels = slim.one_hot_encoding(
                labels, dataset.num_classes - FLAGS.labels_offset)
            batch_queue = slim.prefetch_queue.prefetch_queue(
                [images, labels], capacity=2 * deploy_config.num_clones)

        ####################
        # Define the model #
        ####################
        def clone_fn(batch_queue):
            """Allows data parallelism by creating multiple clones of network_fn."""
            images, labels = batch_queue.dequeue()
            logits, end_points = network_fn(images)

            #############################
            # Specify the loss function #
            #############################
            if 'AuxLogits' in end_points:
                slim.losses.softmax_cross_entropy(
                    end_points['AuxLogits'],
                    labels,
                    label_smoothing=FLAGS.label_smoothing,
                    weight=0.4,
                    scope='aux_loss')
            slim.losses.softmax_cross_entropy(
                logits,
                labels,
                label_smoothing=FLAGS.label_smoothing,
                weight=1.0)
            return end_points

        # Gather initial summaries.
        summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES))

        clones = model_deploy.create_clones(deploy_config, clone_fn,
                                            [batch_queue])
        first_clone_scope = deploy_config.clone_scope(0)
        # Gather update_ops from the first clone. These contain, for example,
        # the updates for the batch_norm variables created by network_fn.
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS,
                                       first_clone_scope)

        # Add summaries for end_points.
        end_points = clones[0].outputs
        for end_point in end_points:
            x = end_points[end_point]
            summaries.add(tf.histogram_summary('activations/' + end_point, x))
            summaries.add(
                tf.scalar_summary('sparsity/' + end_point,
                                  tf.nn.zero_fraction(x)))

        # Add summaries for losses.
        for loss in tf.get_collection(tf.GraphKeys.LOSSES, first_clone_scope):
            summaries.add(tf.scalar_summary('losses/%s' % loss.op.name, loss))

        # Add summaries for variables.
        for variable in slim.get_model_variables():
            summaries.add(tf.histogram_summary(variable.op.name, variable))

        #################################
        # Configure the moving averages #
        #################################
        if FLAGS.moving_average_decay:
            moving_average_variables = slim.get_model_variables()
            variable_averages = tf.train.ExponentialMovingAverage(
                FLAGS.moving_average_decay, global_step)
        else:
            moving_average_variables, variable_averages = None, None

        #########################################
        # Configure the optimization procedure. #
        #########################################
        with tf.device(deploy_config.optimizer_device()):
            learning_rate = _configure_learning_rate(dataset.num_samples,
                                                     global_step)
            optimizer = _configure_optimizer(learning_rate)
            summaries.add(
                tf.scalar_summary('learning_rate',
                                  learning_rate,
                                  name='learning_rate'))

        if FLAGS.sync_replicas:
            # If sync_replicas is enabled, the averaging will be done in the chief
            # queue runner.
            optimizer = tf.train.SyncReplicasOptimizer(
                opt=optimizer,
                replicas_to_aggregate=FLAGS.replicas_to_aggregate,
                variable_averages=variable_averages,
                variables_to_average=moving_average_variables,
                replica_id=tf.constant(FLAGS.task, tf.int32, shape=()),
                total_num_replicas=FLAGS.worker_replicas)
        elif FLAGS.moving_average_decay:
            # Update ops executed locally by trainer.
            update_ops.append(
                variable_averages.apply(moving_average_variables))

        # Variables to train.
        variables_to_train = _get_variables_to_train()

        #  and returns a train_tensor and summary_op
        total_loss, clones_gradients = model_deploy.optimize_clones(
            clones, optimizer, var_list=variables_to_train)
        # Add total_loss to summary.
        summaries.add(
            tf.scalar_summary('total_loss', total_loss, name='total_loss'))

        # Create gradient updates.
        grad_updates = optimizer.apply_gradients(clones_gradients,
                                                 global_step=global_step)
        update_ops.append(grad_updates)

        update_op = tf.group(*update_ops)
        train_tensor = control_flow_ops.with_dependencies([update_op],
                                                          total_loss,
                                                          name='train_op')

        # Add the summaries from the first clone. These contain the summaries
        # created by model_fn and either optimize_clones() or _gather_clone_loss().
        summaries |= set(
            tf.get_collection(tf.GraphKeys.SUMMARIES, first_clone_scope))

        # Merge all summaries together.
        summary_op = tf.merge_summary(list(summaries), name='summary_op')

        ###########################
        # Kicks off the training. #
        ###########################
        slim.learning.train(
            train_tensor,
            logdir=FLAGS.train_dir,
            master=FLAGS.master,
            is_chief=(FLAGS.task == 0),
            init_fn=_get_init_fn(),
            summary_op=summary_op,
            number_of_steps=FLAGS.max_number_of_steps,
            log_every_n_steps=FLAGS.log_every_n_steps,
            save_summaries_secs=FLAGS.save_summaries_secs,
            save_interval_secs=FLAGS.save_interval_secs,
            sync_optimizer=optimizer if FLAGS.sync_replicas else None)
Example #58
0
                                                concat_inputs)
  pred_vals = tf.cast(tf.argmax(unnormalized_probs, dimension = 1), tf.int32)

  normalized_probs = tf.nn.softmax(unnormalized_probs)
  one_hot_gold = tf.one_hot(gold_label_placeholder, 2)
  l2_loss = 0
  for weight in rnn_weights + final_weights:
    l2_loss += tf.nn.l2_loss(weight)
  loss = tf.reduce_mean(-tf.log(tf.reduce_sum(tf.mul(normalized_probs, one_hot_gold), 1)))

  accuracy = tf.reduce_sum(tf.cast(tf.equal(gold_label_placeholder, pred_vals),
                                   tf.float32)) / tf.cast(batch_size_placeholder,
                                                          tf.float32)
  loss_summary = tf.scalar_summary("loss", loss)
  acc_summary = tf.scalar_summary("acc", accuracy)
  summaries = tf.merge_summary([loss_summary, acc_summary])

  optimizer = tf.train.AdamOptimizer(learning_rate).minimize(loss)

### Actually running stuff
# Train step.
with tf.Session(graph = graph) as session:
  session.run(tf.initialize_all_variables())
  train_writer = tf.train.SummaryWriter("performance/train")
  dev_writer = tf.train.SummaryWriter("performance/dev")
  valid_writer = tf.train.SummaryWriter("performance/valid")

  step_num = 0
  keep_training = True
  max_acc = 0
Example #59
0
    def run(self):
        inputs = tf.nn.embedding_lookup(self.word_embedding, self.x)
        prob = self.model(inputs)

        with tf.name_scope('loss'):
            cost = - tf.reduce_mean(self.y * tf.log(prob))
            reg, variables = tf.nn.l2_loss(self.word_embedding), ['softmax']
            for vari in variables:
                reg += tf.nn.l2_loss(self.weights[vari]) + \
                    tf.nn.l2_loss(self.biases[vari])
            cost += reg * self.l2_reg

        with tf.name_scope('train'):
            global_step = tf.Variable(
                0, name="tr_global_step", trainable=False)
            optimizer = tf.train.AdamOptimizer(
                learning_rate=self.learning_rate).minimize(cost, global_step=global_step)

        with tf.name_scope('predict'):
            correct_pred = tf.equal(tf.argmax(prob, 1), tf.argmax(self.y, 1))
            accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
            correct_num = tf.reduce_sum(tf.cast(correct_pred, tf.int32))

        with tf.name_scope('summary'):
            localtime = time.strftime("%X %Y-%m-%d", time.localtime())
            Summary_dir = 'Summary/' + localtime

            info = 'batch-{}, lr-{}, kb-{}, l2_reg-{}'.format(
                self.batch_size,  self.learning_rate, self.Keep_Prob, self.l2_reg)
            info = info + '\ntrain_file_path:' + self.train_file_path + '\ntest_index:' + str(self.test_index) + '\nembedding_type:' + str(self.embedding_type) + '\nMethod: Emotion_GRU'
            summary_acc = tf.scalar_summary('ACC ' + info, accuracy)
            summary_loss = tf.scalar_summary('LOSS ' + info, cost)
            summary_op = tf.merge_summary([summary_loss, summary_acc])

            test_acc = tf.placeholder(tf.float32)
            test_loss = tf.placeholder(tf.float32)
            summary_test_acc = tf.scalar_summary('ACC ' + info, test_acc)
            summary_test_loss = tf.scalar_summary('LOSS ' + info, test_loss)
            summary_test = tf.merge_summary(
                [summary_test_loss, summary_test_acc])

            train_summary_writer = tf.train.SummaryWriter(
                Summary_dir + '/train')
            test_summary_writer = tf.train.SummaryWriter(Summary_dir + '/test')

        with tf.name_scope('saveModel'):
            saver = tf.train.Saver(write_version=tf.train.SaverDef.V2)
            save_dir = 'Models/' + localtime + '/'
            if not os.path.exists(save_dir):
                os.makedirs(save_dir)

        with tf.name_scope('readData'):
            print '----------{}----------'.format(time.strftime("%Y-%m-%d %X", time.localtime()))
            tr_x, tr_y, tr_doc_len, te_x, te_y, te_doc_len, ev_x, ev_y, ev_doc_len= load_data_for_Emotion_CNN(
                self.train_file_path,
                self.word_id_mapping,
                self.max_doc_len,
                self.test_index,
                self.n_class
            )
            print 'train docs: {}    test docs: {}'.format(len(tr_y), len(te_y))
            print 'training_iter:', self.training_iter
            print info
            print '----------{}----------'.format(time.strftime("%Y-%m-%d %X", time.localtime()))

        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True

        with tf.Session() as sess:
            sess.run(tf.initialize_all_variables())
            max_acc, bestIter = 0., 0

            def test():
                acc, loss, cnt = 0., 0., 0
                for test, num in self.get_batch_data(te_x, te_y, te_doc_len, 20, keep_prob=1.0):
                    _loss, _acc = sess.run([cost, correct_num], feed_dict=test)
                    acc += _acc
                    loss += _loss * num
                    cnt += num
                loss = loss / cnt
                acc = acc / cnt
                return loss, acc

            def new_test():
                feed_dict = {
                    self.x: ev_x,
                    self.doc_len: ev_doc_len,
                    self.keep_prob: 1.0,
                }
                y_true = ev_y
                y_pred_p = sess.run(prob, feed_dict=feed_dict)
                # y_pred = np.ceil(y_pred_p-1.0/8)
                y_pred  = calibrated_label_ranking(y_pred_p)
                Emotion_eval(y_true, y_pred, y_pred_p)

            if self.training_iter==0:
                saver.restore(sess, 'Models/10:01:44 2017-03-11/-856')
                loss, acc=test()
                print loss,acc
                new_test()

            for i in xrange(self.training_iter):

                for train, _ in self.get_batch_data(tr_x, tr_y, tr_doc_len, self.batch_size, self.Keep_Prob):
                    _, step, summary, loss, acc = sess.run(
                        [optimizer, global_step, summary_op, cost, accuracy], feed_dict=train)
                    train_summary_writer.add_summary(summary, step)
                    print 'Iter {}: mini-batch loss={:.6f}, acc={:.6f}'.format(step, loss, acc)

                if i % self.display_step == 0:
                    loss, acc=test()

                    if acc > max_acc:
                        max_acc = acc
                        bestIter = step
                        saver.save(sess, save_dir, global_step=step)
                        new_test()

                    summary = sess.run(summary_test, feed_dict={
                                       test_loss: loss, test_acc: acc})
                    test_summary_writer.add_summary(summary, step)
                    print '----------{}----------'.format(time.strftime("%Y-%m-%d %X", time.localtime()))
                    print 'Iter {}: test loss={:.6f}, test acc={:.6f}'.format(step, loss, acc)
                    print 'round {}: max_acc={} BestIter={}\n'.format(i, max_acc, bestIter)

            print 'Optimization Finished!'
Example #60
0
def training_loop(graph,
                  train_dir,
                  num_training_steps=10000,
                  summary_frequency=10,
                  steps_to_average=20):
    """A generator which runs training steps at each output.

  Args:
    graph: A tf.Graph object containing the model.
    train_dir: A string path to the directory to write training checkpoints and
        summary events.
    num_training_steps: Generator terminates after this many steps.
    summary_frequency: How many training iterations to run per generator
        iteration.
    steps_to_average: Average accuracy has a moving window. This is the size of
        that window.

  Yields:
    A dict of training metrics, and runs summary_frequency training steps
    between each yield.
  """
    cross_entropy = graph.get_collection('cross_entropy')[0]
    log_perplexity = graph.get_collection('log_perplexity')[0]
    accuracy = graph.get_collection('accuracy')[0]
    global_step = graph.get_collection('global_step')[0]
    learning_rate = graph.get_collection('learning_rate')[0]
    training_op = graph.get_collection('training_op')[0]

    checkpoint_file = os.path.join(train_dir, 'basic_rnn.ckpt')

    with graph.as_default():
        summary_op = tf.merge_summary([
            tf.scalar_summary('cross_entropy_loss', cross_entropy),
            tf.scalar_summary('log_perplexity', log_perplexity),
            tf.scalar_summary('learning_rate', learning_rate),
            tf.scalar_summary('accuracy', accuracy),
            tf.scalar_summary('global_step', global_step)
        ])

        saver = tf.train.Saver()
        init_op = tf.initialize_all_variables()

    # Run training loop.
    session = tf.Session(graph=graph)
    summary_writer = tf.train.SummaryWriter(train_dir, session.graph)

    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(sess=session, coord=coord)

    step = 0
    gs = 0

    logging.info('Starting training loop')
    try:
        accuracies = collections.deque(maxlen=steps_to_average)
        session.run(init_op)
        while gs < num_training_steps:
            step += 1
            ce, lp, a, gs, lr, serialized_summaries, _ = session.run([
                cross_entropy, log_perplexity, accuracy, global_step,
                learning_rate, summary_op, training_op
            ])
            summary_writer.add_summary(serialized_summaries, global_step=gs)

            accuracies.append(a)
            if step % summary_frequency == 0:
                saved_path = saver.save(session,
                                        checkpoint_file,
                                        global_step=gs)
                logging.info('Wrote checkpoint to %s', saved_path)
                summary_writer.flush()
                avg_accuracy = sum(accuracies) / len(accuracies)
                logging.info(
                    'Global Step: %s - Loss: %.3f - '
                    'Log-perplexity: %.3f - Step Accuracy: %.2f - '
                    'Avg Accuracy (last %d summaries): %.2f - '
                    'Learning Rate: %f', '{:,}'.format(gs), ce, lp, a,
                    steps_to_average, avg_accuracy, lr)
                yield {
                    'step': step,
                    'global_step': gs,
                    'loss': ce,
                    'log_perplexity': lp,
                    'accuracy': a,
                    'average_accuracy': avg_accuracy,
                    'learning_rate': lr
                }
        saver.save(session, train_dir, global_step=gs)
    except tf.errors.OutOfRangeError as e:
        logging.warn('Got error reported to coordinator: %s', e)
    finally:
        try:
            coord.request_stop()
            summary_writer.close()
        except RuntimeError as e:
            logging.warn('Got runtime error: %s', e)