Example #1
0
  def accumulate_privacy_spending(self, eps_delta, unused_sigma,
                                  num_examples):
    """Accumulate the privacy spending.

    Currently only support approximate privacy. Here we assume we use Gaussian
    noise on randomly sampled batch so we get better composition: 1. the per
    batch privacy is computed using privacy amplication via sampling bound;
    2. the composition is done using the composition with Gaussian noise.
    TODO(liqzhang) Add a link to a document that describes the bounds used.

    Args:
      eps_delta: EpsDelta pair which can be tensors.
      unused_sigma: the noise sigma. Unused for this accountant.
      num_examples: the number of examples involved.
    Returns:
      a TensorFlow operation for updating the privacy spending.
    """

    eps, delta = eps_delta
    with tf.control_dependencies(
        [tf.Assert(tf.greater(delta, 0),
                   ["delta needs to be greater than 0"])]):
      amortize_ratio = (tf.cast(num_examples, tf.float32) * 1.0 /
                        self._total_examples)
      # Use privacy amplification via sampling bound.
      # See Lemma 2.2 in http://arxiv.org/pdf/1405.7085v2.pdf
      # TODO(liqzhang) Add a link to a document with formal statement
      # and proof.
      amortize_eps = tf.reshape(tf.log(1.0 + amortize_ratio * (
          tf.exp(eps) - 1.0)), [1])
      amortize_delta = tf.reshape(amortize_ratio * delta, [1])
      return tf.group(*[tf.assign_add(self._eps_squared_sum,
                                      tf.square(amortize_eps)),
                        tf.assign_add(self._delta_sum, amortize_delta)])
Example #2
0
    def _apply_stats(self, statsUpdates, accumulate=False, accumulateCoeff=0.):
        updateOps = []
        # obtain the stats var list
        for stats_var in statsUpdates:
            stats_new = statsUpdates[stats_var]
            if accumulate:
                # simple superbatch averaging
                update_op = tf.assign_add(
                    stats_var, accumulateCoeff * stats_new, use_locking=True)
            else:
                # exponential running averaging
                update_op = tf.assign(
                    stats_var, stats_var * self._stats_decay, use_locking=True)
                update_op = tf.assign_add(
                    update_op, (1. - self._stats_decay) * stats_new, use_locking=True)
            updateOps.append(update_op)

        with tf.control_dependencies(updateOps):
            stats_step_op = tf.assign_add(self.stats_step, 1)

        if KFAC_DEBUG:
            stats_step_op = (tf.Print(stats_step_op,
                                      [tf.convert_to_tensor('step:'),
                                       self.global_step,
                                       tf.convert_to_tensor('fac step:'),
                                       self.factor_step,
                                       tf.convert_to_tensor('sgd step:'),
                                       self.sgd_step,
                                       tf.convert_to_tensor('Accum:'),
                                       tf.convert_to_tensor(accumulate),
                                       tf.convert_to_tensor('Accum coeff:'),
                                       tf.convert_to_tensor(accumulateCoeff),
                                       tf.convert_to_tensor('stat step:'),
                                       self.stats_step, updateOps[0], updateOps[1]]))
        return [stats_step_op, ]
def evaluate_precision_recall(
    input_layer, labels, threshold=0.5, per_example_weights=None, name=PROVIDED, phase=Phase.train
):
    """Computes the precision and recall of the prediction vs the labels.

  Args:
    input_layer: A Pretty Tensor object.
    labels: The target labels to learn as a float tensor.
    threshold: The threshold to use to decide if the prediction is true.
    per_example_weights: A Tensor with a weight per example.
    name: An optional name.
    phase: The phase of this model; non training phases compute a total across
      all examples.
  Returns:
    Precision and Recall.
  """
    _ = name  # Eliminate warning, name used for namescoping by PT.
    selected, sum_retrieved, sum_relevant = _compute_precision_recall(
        input_layer, labels, threshold, per_example_weights
    )

    if phase != Phase.train:
        dtype = tf.float32
        # Create the variables in all cases so that the load logic is easier.
        relevant_count = tf.get_variable(
            "relevant_count",
            [],
            dtype,
            tf.zeros_initializer,
            collections=[bookkeeper.GraphKeys.TEST_VARIABLES],
            trainable=False,
        )
        retrieved_count = tf.get_variable(
            "retrieved_count",
            [],
            dtype,
            tf.zeros_initializer,
            collections=[bookkeeper.GraphKeys.TEST_VARIABLES],
            trainable=False,
        )
        selected_count = tf.get_variable(
            "selected_count",
            [],
            dtype,
            tf.zeros_initializer,
            collections=[bookkeeper.GraphKeys.TEST_VARIABLES],
            trainable=False,
        )

        with input_layer.g.device(selected_count.device):
            selected = tf.assign_add(selected_count, selected)
        with input_layer.g.device(retrieved_count.device):
            sum_retrieved = tf.assign_add(retrieved_count, sum_retrieved)
        with input_layer.g.device(relevant_count.device):
            sum_relevant = tf.assign_add(relevant_count, sum_relevant)

    return (
        tf.select(tf.equal(sum_retrieved, 0), tf.zeros_like(selected), selected / sum_retrieved),
        tf.select(tf.equal(sum_relevant, 0), tf.zeros_like(selected), selected / sum_relevant),
    )
 def test_summary_saver(self):
   with tf.Graph().as_default() as g, tf.Session() as sess:
     log_dir = 'log/dir'
     summary_writer = testing.FakeSummaryWriter(log_dir, g)
     var = tf.Variable(0.0)
     tensor = tf.assign_add(var, 1.0)
     summary_op = tf.scalar_summary('my_summary', tensor)
     global_step = tf.contrib.framework.get_or_create_global_step()
     train_op = tf.assign_add(global_step, 1)
     hook = tf.train.SummarySaverHook(
         summary_op=summary_op, save_steps=8, summary_writer=summary_writer)
     hook.begin()
     sess.run(tf.initialize_all_variables())
     mon_sess = monitored_session._HookedSession(sess, [hook])
     for i in range(30):
       _ = i
       mon_sess.run(train_op)
     hook.end(sess)
     summary_writer.assert_summaries(
         test_case=self,
         expected_logdir=log_dir,
         expected_graph=g,
         expected_summaries={
             1: {'my_summary': 1.0},
             9: {'my_summary': 2.0},
             17: {'my_summary': 3.0},
             25: {'my_summary': 4.0},
         })
Example #5
0
    def apply(self, var_list):
        """Applies the running average to a list of variables
        Creates shadow variables and update op. Returns a grouped update op for
        all the averages in the list."""
        update_ops = []
        with tf.variable_scope('running_average'):
            for var in var_list:
                # add a shadow var that gets initialized to the same value
                # and a count to keep track of how many times it's been updated
                name = var.op.name
                count = tf.get_variable(
                    name+'_count', dtype=tf.float32,
                    initializer=tf.constant_initializer(0.0),
                    shape=[], trainable=False)
                shadow = tf.get_variable(
                    name+'_shadow', dtype=var.dtype,
                    initializer=var.initialized_value(),
                    collections=[tf.GraphKeys.MOVING_AVERAGE_VARIABLES,
                                 tf.GraphKeys.VARIABLES],
                    trainable=False)
                # now make the update ops
                # increase the count
                count_update = tf.assign_add(count, 1.0)
                with tf.control_dependencies([count_update]):
                    difference = (var - shadow)/count
                    update = tf.assign_add(shadow, difference)
                update_ops.append(update)
                self.shadow_vars[var] = (shadow, count)

        return update_ops
  def test_train_skip_train_if_max_step_already_saved(self):
    with tf.Graph().as_default() as g, self.test_session(g):
      with tf.control_dependencies(self._build_inference_graph()):
        train_op = tf.assign_add(tf.contrib.framework.get_global_step(), 1)
      learn.graph_actions._monitored_train(  # pylint: disable=protected-access
          g,
          output_dir=self._output_dir,
          train_op=train_op,
          loss_op=tf.constant(2.0),
          max_steps=10)
      step = checkpoints.load_variable(
          self._output_dir, tf.contrib.framework.get_global_step().name)
      self.assertEqual(10, step)

    with tf.Graph().as_default() as g, self.test_session(g):
      with tf.control_dependencies(self._build_inference_graph()):
        train_op = tf.assign_add(tf.contrib.framework.get_global_step(), 1)
      learn.graph_actions._monitored_train(  # pylint: disable=protected-access
          g,
          output_dir=self._output_dir,
          train_op=train_op,
          loss_op=tf.constant(2.0),
          max_steps=10)
      step = checkpoints.load_variable(
          self._output_dir, tf.contrib.framework.get_global_step().name)
      self.assertEqual(10, step)
def _eval_metric(input_, topk, correct_predictions, examples, phase):
    """Creates the standard tracking varibles if in test and returns accuracy."""
    my_parameters = {}
    if phase in (Phase.test, Phase.infer):
        dtype = tf.float32
        # Create the variables using tf.Variable because we don't want to share.
        count = tf.Variable(
            tf.constant(0, dtype=dtype),
            name="count_%d" % topk,
            collections=[bookkeeper.GraphKeys.TEST_VARIABLES],
            trainable=False,
        )
        correct = tf.Variable(
            tf.constant(0, dtype=dtype),
            name="correct_%d" % topk,
            collections=[bookkeeper.GraphKeys.TEST_VARIABLES],
            trainable=False,
        )
        my_parameters["count"] = count
        my_parameters["correct"] = correct
        with input_.g.device(count.device):
            examples = tf.assign_add(count, examples)
        with input_.g.device(correct.device):
            correct_predictions = tf.assign_add(correct, correct_predictions)
    return correct_predictions, examples, my_parameters
def running_mean(cost, tag_name, batch_size=1):
    with tf.name_scope("running_mean_" + tag_name):
        with tf.variable_scope(tag_name):
            cost_sum = tf.get_variable(
              "cost_sum",
              initializer=tf.zeros_initializer,
              dtype=tf.float64,
              shape=(),
              collections=[tf.GraphKeys.LOCAL_VARIABLES],
              trainable=False)
            batches = tf.get_variable(
              "cost_num_batches",
              initializer=tf.zeros_initializer,
              dtype=tf.int32,
              shape=(),
              collections=[tf.GraphKeys.LOCAL_VARIABLES],
              trainable=False)

        cost_add = tf.assign_add(cost_sum, tf.cast(cost, dtype=tf.float64))
        batches_add = tf.assign_add(batches, batch_size)
        update_cost_mean = tf.group(cost_add, batches_add)

        reset_batches = tf.assign(batches, 0)
        reset_cost_sum = tf.assign(cost_sum, 0.0)
        reset_cost_mean = tf.group(reset_batches, reset_cost_sum)

        mean_cost = tf.divide(
          cost_sum,
          tf.cast(batches, dtype=tf.float64))
        train_loss_summary = tf.summary.scalar(tag_name, mean_cost)

    return reset_cost_mean, update_cost_mean, train_loss_summary
 def loop_body(i):
   asn1 = tf.assign_add(var_a, 1, name="a_add")
   with tf.control_dependencies([asn1]):
     asn2 = tf.assign_add(var_b, var_a, name="b_add")
   with tf.control_dependencies([asn2]):
     ni = tf.add(i, 1, name="i_add")
     return ni
    def __init__(self, epsilon=1e-2, shape=()):

        self._sum = tf.get_variable(
            dtype=tf.float64,
            shape=shape,
            initializer=tf.constant_initializer(0.0),
            name="runningsum", trainable=False)
        self._sumsq = tf.get_variable(
            dtype=tf.float64,
            shape=shape,
            initializer=tf.constant_initializer(epsilon),
            name="runningsumsq", trainable=False)
        self._count = tf.get_variable(
            dtype=tf.float64,
            shape=(),
            initializer=tf.constant_initializer(epsilon),
            name="count", trainable=False)
        self.shape = shape

        self.mean = tf.to_float(self._sum / self._count)
        self.std = tf.sqrt( tf.maximum( tf.to_float(self._sumsq / self._count) - tf.square(self.mean) , 1e-2 ))

        newsum = tf.placeholder(shape=self.shape, dtype=tf.float64, name='sum')
        newsumsq = tf.placeholder(shape=self.shape, dtype=tf.float64, name='var')
        newcount = tf.placeholder(shape=[], dtype=tf.float64, name='count')
        self.incfiltparams = U.function([newsum, newsumsq, newcount], [],
            updates=[tf.assign_add(self._sum, newsum),
                     tf.assign_add(self._sumsq, newsumsq),
                     tf.assign_add(self._count, newcount)])
Example #11
0
 def test_capture_variable(self):
     monitor = learn.monitors.CaptureVariable(var_name="my_assign_add:0", every_n=8, first_n=2)
     with tf.Graph().as_default() as g, self.test_session(g):
         var = tf.Variable(0.0, name="my_var")
         var.initializer.run()
         tf.assign_add(var, 1.0, name="my_assign_add")
         self._run_monitor(monitor, num_epochs=3, num_steps_per_epoch=10)
         self.assertEqual({0: 1.0, 1: 2.0, 2: 3.0, 10: 4.0, 18: 5.0, 26: 6.0, 29: 7.0}, monitor.values)
Example #12
0
def loss(loss_value):
  """Calculates aggregated mean loss."""
  total_loss = tf.Variable(0.0, False)
  loss_count = tf.Variable(0, False)
  total_loss_update = tf.assign_add(total_loss, loss_value)
  loss_count_update = tf.assign_add(loss_count, 1)
  loss_op = total_loss / tf.cast(loss_count, tf.float32)
  return [total_loss_update, loss_count_update], loss_op
Example #13
0
def train_one_epoch(generator, discriminator,
                    generator_optimizer, discriminator_optimizer,
                    dataset, log_interval, noise_dim):
  """Trains `generator` and `discriminator` models on `dataset`.

  Args:
    generator: Generator model.
    discriminator: Discriminator model.
    generator_optimizer: Optimizer to use for generator.
    discriminator_optimizer: Optimizer to use for discriminator.
    dataset: Dataset of images to train on.
    log_interval: How many global steps to wait between logging and collecting
      summaries.
    noise_dim: Dimension of noise vector to use.
  """

  total_generator_loss = 0.0
  total_discriminator_loss = 0.0
  for (batch_index, images) in enumerate(tfe.Iterator(dataset)):
    with tf.device('/cpu:0'):
      tf.assign_add(tf.train.get_global_step(), 1)

    with tf.contrib.summary.record_summaries_every_n_global_steps(log_interval):
      current_batch_size = images.shape[0]
      noise = tf.random_uniform(shape=[current_batch_size, noise_dim],
                                minval=-1., maxval=1., seed=batch_index)

      with tfe.GradientTape(persistent=True) as g:
        generated_images = generator(noise)
        tf.contrib.summary.image('generated_images',
                                 tf.reshape(generated_images, [-1, 28, 28, 1]),
                                 max_images=10)

        discriminator_gen_outputs = discriminator(generated_images)
        discriminator_real_outputs = discriminator(images)
        discriminator_loss_val = discriminator_loss(discriminator_real_outputs,
                                                    discriminator_gen_outputs)
        total_discriminator_loss += discriminator_loss_val

        generator_loss_val = generator_loss(discriminator_gen_outputs)
        total_generator_loss += generator_loss_val

      generator_grad = g.gradient(generator_loss_val, generator.variables)
      discriminator_grad = g.gradient(discriminator_loss_val,
                                      discriminator.variables)

      with tf.variable_scope('generator'):
        generator_optimizer.apply_gradients(zip(generator_grad,
                                                generator.variables))
      with tf.variable_scope('discriminator'):
        discriminator_optimizer.apply_gradients(zip(discriminator_grad,
                                                    discriminator.variables))

      if log_interval and batch_index > 0 and batch_index % log_interval == 0:
        print('Batch #%d\tAverage Generator Loss: %.6f\t'
              'Average Discriminator Loss: %.6f' % (
                  batch_index, total_generator_loss/batch_index,
                  total_discriminator_loss/batch_index))
  def setUp(self):
    tf.test.TestCase.setUp(self)

    self.log_dir = 'log/dir'
    self.summary_writer = testing.FakeSummaryWriter(self.log_dir)

    var = tf.Variable(0.0)
    tensor = tf.assign_add(var, 1.0)
    self.summary_op = tf.summary.scalar('my_summary', tensor)

    global_step = tf.contrib.framework.get_or_create_global_step()
    self.train_op = tf.assign_add(global_step, 1)
Example #15
0
def accuracy(logits, labels):
  """Calculates aggregated accuracy."""
  is_correct = tf.nn.in_top_k(logits, labels, 1)
  correct = tf.reduce_sum(tf.cast(is_correct, tf.int32))
  incorrect = tf.reduce_sum(tf.cast(tf.logical_not(is_correct), tf.int32))
  correct_count = tf.Variable(0, False)
  incorrect_count = tf.Variable(0, False)
  correct_count_update = tf.assign_add(correct_count, correct)
  incorrect_count_update = tf.assign_add(incorrect_count, incorrect)
  accuracy_op = tf.cast(correct_count, tf.float32) / tf.cast(
      correct_count + incorrect_count, tf.float32)
  return [correct_count_update, incorrect_count_update], accuracy_op
Example #16
0
  def advance_counters(self, total):
    """Returns ops to advance the per-component step and total counters.

    Args:
      total: Total number of actions to increment counters by.

    Returns:
      tf.Group op incrementing 'step' by 1 and 'total' by total.
    """
    update_total = tf.assign_add(self._total, total, use_locking=True)
    update_step = tf.assign_add(self._step, 1, use_locking=True)
    return tf.group(update_total, update_step)
 def test_train_loss(self):
   with tf.Graph().as_default() as g, self.test_session(g):
     tf.contrib.framework.create_global_step()
     loss_var = tf.contrib.framework.local_variable(10.0)
     train_op = tf.group(
         tf.assign_add(tf.contrib.framework.get_global_step(), 1),
         tf.assign_add(loss_var, -1.0))
     self._assert_summaries(self._output_dir)
     loss = learn.graph_actions.train(
         g, output_dir=self._output_dir, train_op=train_op,
         loss_op=loss_var.value(), steps=6)
     self.assertEqual(4.0, loss)
     self._assert_summaries(self._output_dir, expected_graphs=[g])
    def session_run_job():
      with tf.Session() as sess:
        a = tf.Variable(10, dtype=tf.int32, name='a')
        b = tf.Variable(20, dtype=tf.int32, name='b')
        d = tf.constant(1, dtype=tf.int32, name='d')
        inc_a = tf.assign_add(a, d, name='inc_a')
        inc_b = tf.assign_add(b, d, name='inc_b')
        inc_ab = tf.group([inc_a, inc_b], name="inc_ab")

        sess.run(tf.global_variables_initializer())

        sess = tf_debug.TensorBoardDebugWrapperSession(sess, self._debugger_url)
        session_run_results.append(sess.run(inc_ab))
Example #19
0
  def testPlateauOpHook(self):
    global_step = tf.train.create_global_step()
    counter = tf.get_variable("count", initializer=0, dtype=tf.int32)
    indicator = tf.get_variable("indicator", initializer=0, dtype=tf.int32)
    tf.summary.scalar("count", counter)
    incr_global_step = tf.assign_add(global_step, 1)
    incr_counter = tf.assign_add(counter, 1)
    incr_indicator = tf.assign_add(indicator, 1)

    # Stop if the global step has not gone up by more than 1 in 20 steps.

    ckpt_dir = self.ckpt_dir("plateauop")
    stop_hook = metrics_hook.PlateauOpHook(
        ckpt_dir,
        "count_1",
        incr_indicator,
        num_plateau_steps=20,
        plateau_delta=1.,
        plateau_decrease=False,
        every_n_steps=10)
    with self.sess(stop_hook, ckpt_dir) as sess:
      for _ in range(20):
        sess.run((incr_global_step, incr_counter))

      # Summary files should now have 2 values in them
      self.flush()

      # Run for more steps so that the hook gets triggered and we verify that we
      # don't stop.
      for _ in range(30):
        sess.run((incr_global_step, incr_counter))

      self.flush()

      # Run without incrementing the counter
      for _ in range(30):
        sess.run(incr_global_step)
      self.flush()

      self.assertTrue(sess.run(indicator) < 1)

      # Metrics should be written such that now the counter has gone >20 steps
      # without being incremented.
      # Check that we run the incr_indicator op several times
      for _ in range(3):
        for _ in range(10):
          sess.run(incr_global_step)
        self.flush()

      self.assertTrue(sess.run(indicator) > 1)
 def setUp(self):
   self.model_dir = tempfile.mkdtemp()
   self.graph = tf.Graph()
   with self.graph.as_default():
     self.scaffold = monitored_session.Scaffold()
     self.global_step = tf.contrib.framework.get_or_create_global_step()
     self.train_op = tf.assign_add(self.global_step, 1)
 def test_recover_and_retry_on_aborted_error(self):
   # Tests that we silently retry and recover on abort.  This test uses
   # a CheckpointSaver to have something to recover from.
   logdir = self._test_dir('test_recover_and_retry_on_aborted_error')
   with tf.Graph().as_default():
     gstep = tf.contrib.framework.get_or_create_global_step()
     do_step = tf.assign_add(gstep, 1)
     scaffold = supervised_session.Scaffold()
     abort_monitor = RaiseOnceAtStepN(
         3, tf.errors.AbortedError(None, None, 'Abort'))
     # Save after each step.
     ckpt_monitor = tf.contrib.learn.monitors.CheckpointSaver(
         1, scaffold.saver, logdir)
     monitors = [abort_monitor, ckpt_monitor]
     with supervised_session.SupervisedSession('', scaffold=scaffold,
                                               checkpoint_dir=logdir,
                                               monitors=monitors) as session:
       self.assertEqual(0, session.run(gstep))
       self.assertEqual(1, session.run(do_step))
       self.assertEqual(2, session.run(do_step))
       self.assertFalse(session.should_stop())
       # Here at step 3, the monitor triggers and raises AbortedError.  The
       # SupervisedSession automatically restores and retries.
       self.assertEqual(3, session.run(do_step))
       self.assertTrue(abort_monitor.raised)
       self.assertFalse(session.should_stop())
       self.assertEqual(4, session.run(do_step))
       self.assertFalse(session.should_stop())
Example #22
0
  def accumulate_privacy_spending(self, unused_eps_delta,
                                  sigma, num_examples):
    """Accumulate privacy spending.

    In particular, accounts for privacy spending when we assume there
    are num_examples, and we are releasing the vector
    (sum_{i=1}^{num_examples} x_i) + Normal(0, stddev=l2norm_bound*sigma)
    where l2norm_bound is the maximum l2_norm of each example x_i, and
    the num_examples have been randomly selected out of a pool of
    self.total_examples.

    Args:
      unused_eps_delta: EpsDelta pair which can be tensors. Unused
        in this accountant.
      sigma: the noise sigma, in the multiples of the sensitivity (that is,
        if the l2norm sensitivity is k, then the caller must have added
        Gaussian noise with stddev=k*sigma to the result of the query).
      num_examples: the number of examples involved.
    Returns:
      a TensorFlow operation for updating the privacy spending.
    """
    q = tf.cast(num_examples, tf.float64) * 1.0 / self._total_examples

    moments_accum_ops = []
    for i in range(len(self._log_moments)):
      moment = self._compute_log_moment(sigma, q, self._moment_orders[i])
      moments_accum_ops.append(tf.assign_add(self._log_moments[i], moment))
    return tf.group(*moments_accum_ops)
Example #23
0
  def testPeriodicTargetUpdate(self, use_locking, update_period):
    """Tests that the simple success case works as expected.

    This is an integration test. The periodically and update parts are
    unit-tested in the preceding.

    Args:
      use_locking: value for `periodic_target_update`'s `use_locking` argument.
      update_period: how often an update should happen.
    """
    target_variables = [tf.Variable(tf.zeros([1, 2]))]
    source_variables = [tf.Variable(tf.random_normal([1, 2]))]
    increment = tf.ones([1, 2])

    update_source_op = tf.assign_add(source_variables[0], increment)
    updated = target_update_ops.periodic_target_update(
        target_variables,
        source_variables,
        update_period=update_period,
        use_locking=use_locking)

    with self.test_session() as sess:
      sess.run(tf.global_variables_initializer())

      for step in range(3 * update_period):
        sess.run(update_source_op)
        sess.run(updated)
        targets, sources = sess.run([target_variables, source_variables])

        if step % update_period == 0:
          self.assertAllClose(targets, sources)
        else:
          self.assertNotAllClose(targets, sources)
Example #24
0
 def __init__(self, train_time, time_limit=None):
   super(TrainTimeHook, self).__init__()
   self._train_time = train_time
   self._time_limit = time_limit
   self._increment_amount = tf.placeholder(tf.float32, None)
   self._increment_op = tf.assign_add(train_time, self._increment_amount)
   self._last_run_duration = None
 def test_num_steps(self):
   logdir = self._test_dir('test_num_steps')
   with tf.Graph().as_default():
     gstep = tf.contrib.framework.get_or_create_global_step()
     do_step = tf.assign_add(gstep, 1)
     scaffold = supervised_session.Scaffold()
     # Do 3 steps and save.
     monitors = [tf.contrib.learn.monitors.StopAtStep(num_steps=3)]
     with supervised_session.SupervisedSession('', scaffold=scaffold,
                                               monitors=monitors) as session:
       session.run(do_step)
       self.assertFalse(session.should_stop())
       session.run(do_step)
       self.assertFalse(session.should_stop())
       session.run(do_step)
       self.assertTrue(session.should_stop())
       save_path = scaffold.saver.save(session.session,
                                       os.path.join(logdir, 'step-3'))
     # Restore and do 4 steps.
     def load_ckpt(scaffold, sess):
       scaffold.saver.restore(sess, save_path)
     scaffold = supervised_session.Scaffold(init_fn=load_ckpt)
     monitors = [tf.contrib.learn.monitors.StopAtStep(num_steps=4)]
     with supervised_session.SupervisedSession('', scaffold=scaffold,
                                               monitors=monitors) as session:
       self.assertEqual(3, session.run(gstep))
       session.run(do_step)
       self.assertFalse(session.should_stop())
       session.run(do_step)
       self.assertFalse(session.should_stop())
       session.run(do_step)
       self.assertFalse(session.should_stop())
       session.run(do_step)
       self.assertTrue(session.should_stop())
Example #26
0
  def testEvaluateWithEvalFeedDict(self):
    # Create a checkpoint.
    checkpoint_dir = os.path.join(self.get_temp_dir(),
                                  'evaluate_with_eval_feed_dict')
    self._train_model(checkpoint_dir, num_steps=1)

    # We need a variable that that the saver will try to restore.
    tf.contrib.framework.get_or_create_global_step()

    # Create a variable and an eval op that increments it with a placeholder.
    my_var = tf.contrib.framework.local_variable(0.0, name='my_var')
    increment = tf.placeholder(dtype=tf.float32)
    eval_ops = tf.assign_add(my_var, increment)

    increment_value = 3
    num_evals = 5
    expected_value = increment_value * num_evals
    final_values = tf.contrib.training.evaluate_repeatedly(
        checkpoint_dir=checkpoint_dir,
        eval_ops=eval_ops,
        feed_dict={increment: 3},
        final_ops={'my_var': tf.identity(my_var)},
        hooks=[
            tf.contrib.training.StopAfterNEvalsHook(num_evals),
        ],
        max_number_of_evaluations=1)
    self.assertEqual(final_values['my_var'], expected_value)
    def testCallsMonitorsWithLastStep(self):
        with tf.Graph().as_default(), tf.Session() as sess:
            global_step_tensor = tf.contrib.framework.create_global_step()
            mock_mon = FakeMonitor()
            mock_mon2 = FakeMonitor()
            mon_sess = monitored_session.MonitoredSession(
                sess=sess, monitors=[mock_mon, mock_mon2], global_step_tensor=global_step_tensor
            )
            inc_5 = tf.assign_add(global_step_tensor, 5)
            # Initialize global_step_tensor to '0':
            sess.run(tf.initialize_all_variables())

            mon_sess.run(fetches=[inc_5])
            for mon in [mock_mon, mock_mon2]:
                self.assertEqual(mon.last_begin_step, 1)
                self.assertEqual(mon.last_end_step, 1)
                self.assertEqual(mon.last_post_step, 1)

            mon_sess.run(fetches=[inc_5])
            for mon in [mock_mon, mock_mon2]:
                self.assertEqual(mon.last_begin_step, 6)
                self.assertEqual(mon.last_end_step, 6)
                self.assertEqual(mon.last_post_step, 6)

            mon_sess.run(fetches=[inc_5])
            for mon in [mock_mon, mock_mon2]:
                self.assertEqual(mon.last_begin_step, 11)
                self.assertEqual(mon.last_end_step, 11)
                self.assertEqual(mon.last_post_step, 11)
Example #28
0
  def testStop(self):
    global_step = tf.train.create_global_step()
    tf.summary.scalar("global_step", global_step)
    incr_global_step = tf.assign_add(global_step, 1)

    ckpt_dir = self.ckpt_dir("stop")
    dummy = DummyHook(ckpt_dir, every_n_steps=10)
    with self.sess(dummy, ckpt_dir) as sess:
      for _ in range(20):
        sess.run(incr_global_step)

      # Summary files should now have 2 global step values in them
      self.flush()

      # Run for 10 more so that the hook gets triggered again
      for _ in range(10):
        sess.run(incr_global_step)

      # Check that the metrics have actually been collected.
      self.assertTrue("" in dummy.test_metrics)
      metrics = dummy.test_metrics[""]
      self.assertTrue("global_step_1" in metrics)
      steps, vals = metrics["global_step_1"]
      self.assertTrue(len(steps) == len(vals))
      self.assertTrue(len(steps) >= 2)

      # Run for 10 more so that the hook triggers stoppage
      for _ in range(10):
        sess.run(incr_global_step)

      with self.assertRaisesRegexp(RuntimeError, "after should_stop requested"):
        sess.run(incr_global_step)
Example #29
0
  def testEvalOpAndFinalOp(self):
    checkpoint_dir = os.path.join(self.get_temp_dir(), 'eval_ops_and_final_ops')

    # Train a model for a single step to get a checkpoint.
    self._train_model(checkpoint_dir, num_steps=1)
    checkpoint_path = tf.contrib.training.wait_for_new_checkpoint(
        checkpoint_dir)

    # Create the model so we have something to restore.
    inputs = tf.constant(self._inputs, dtype=tf.float32)
    logistic_classifier(inputs)

    num_evals = 5
    final_increment = 9.0

    my_var = tf.contrib.framework.local_variable(0.0, name='MyVar')
    eval_ops = tf.assign_add(my_var, 1.0)
    final_ops = tf.identity(my_var) + final_increment

    final_ops_values = tf.contrib.training.evaluate_once(
        checkpoint_path=checkpoint_path,
        eval_ops=eval_ops,
        final_ops={'value': final_ops},
        hooks=[
            tf.contrib.training.StopAfterNEvalsHook(num_evals),
        ])
    self.assertEqual(final_ops_values['value'], num_evals + final_increment)
Example #30
0
def _model_fn(features, labels, mode):
    print("\t_model_fn:features=", features)
    print("\t_model_fn:labels=", labels)
    print("\t_model_fn:mode=", mode)
    
    # Build a linear model and predict values
    W = tf.get_variable("W", [1], dtype=tf.float64)
    b = tf.get_variable("b", [1], dtype=tf.float64)
    y = W*features['x'] + b
    # Loss sub-graph
    """Clouds: what is "labels"? "labels" is the standard answer? where "y" is the predict answer?""" 
    loss = tf.reduce_sum(tf.square(y - labels))
    # Training sub-graph
    global_step = tf.train.get_global_step()
    optimizer = tf.train.GradientDescentOptimizer(0.01)
    """Clouds: what is tf.group???"""
    train = tf.group(optimizer.minimize(loss),
                   tf.assign_add(global_step, 1))
    print("--------\n\t_model_fn:train group=", train )
    print("--------\n\n")
    
    # EstimatorSpec connects subgraphs we built to the
    # appropriate functionality.
    return tf.estimator.EstimatorSpec(
          mode=mode,
          predictions=y,
          loss=loss,
          train_op=train)
Example #31
0
def train_adv(model=None):
    assert FLAGS.train_dir, 'train_dir must be given'
    print('train dir is %s' % FLAGS.train_dir)
    # global_step = tf.train.get_or_create_global_step()
    global_step = tf.Variable(0, trainable=False)
    add_global = tf.assign_add(global_step, 1)

    action_prob_op, gene_loss_op, train_gene_op = model.train_generator(
        global_step)
    dis_loss_op, train_dis_op, reward_op = model.train_discriminator()
    train_sentence_op, train_sentence_len_op, train_label_op = model.get_generator_data(
    )
    original_prob_op = model.get_original_prob()
    dev_acc_op, dev_num_op, dev_init_op = model.build_dev_graph()
    test_acc_op, test_num_op, test_init_op = model.build_test_graph()

    train_ckpt_dir = FLAGS.train_dir + '/train_ckpt'
    os.makedirs(train_ckpt_dir, exist_ok=True)
    sum_writer = tf.summary.FileWriter(str(train_ckpt_dir),
                                       graph=tf.get_default_graph())
    best_dev_acc = 0.0
    final_acc = 0.0

    average_reward = 0
    all_reward = 0
    all_sent_num = 0

    saver = tf.train.Saver(max_to_keep=1)
    init = tf.global_variables_initializer()
    with tf.Session(config=utils.get_config()) as sess:
        tf.set_random_seed(FLAGS.random_seed)
        np.random.seed(FLAGS.random_seed)
        sess.run(init)
        for _ in itertools.count(1):
            this_global_step = sess.run(add_global)
            if this_global_step >= FLAGS.max_steps + 1:
                break
            sentence, sentence_len, label = sess.run(
                [train_sentence_op, train_sentence_len_op, train_label_op])
            raw_sentence = sentence.copy()
            if this_global_step < FLAGS.dis_warm_up_step:  # discriminator warm up
                dis_loss, _, = sess.run(
                    [dis_loss_op, train_dis_op],
                    feed_dict={
                        'discriminator/sentence:0': sentence,
                        'discriminator/sentence_len:0': sentence_len,
                        'discriminator/train_label:0': label,
                    })
                gene_loss = 0.0
            elif this_global_step < FLAGS.gene_warm_up_step + FLAGS.dis_warm_up_step:  # generator warm up
                original_prob = sess.run(original_prob_op,
                                         feed_dict={
                                             'sentence_original:0': sentence,
                                             'sentence_len_original:0':
                                             sentence_len,
                                             'sentence_label_original:0': label
                                         })
                action = sess.run(action_prob_op,
                                  feed_dict={
                                      'generator/train_sentence:0':
                                      sentence,
                                      'generator/train_sentence_len:0':
                                      sentence_len
                                  })
                sentence_new, action_idx = generate_new_sentence_with_action(
                    model.vocab, action, sentence, sentence_len)
                reward = sess.run(reward_op,
                                  feed_dict={
                                      'discriminator/sentence:0':
                                      sentence_new,
                                      'discriminator/sentence_len:0':
                                      sentence_len,
                                      'discriminator/train_label:0':
                                      label,
                                      'discriminator/original_prob:0':
                                      original_prob
                                  })
                all_sent_num += len(reward)
                all_reward += np.sum(reward)
                average_reward = all_reward / all_sent_num
                reward -= average_reward
                gene_loss, _ = sess.run(
                    [gene_loss_op, train_gene_op],
                    feed_dict={
                        'generator/train_sentence:0': raw_sentence,
                        'generator/train_sentence_len:0': sentence_len,
                        'generator/reward_score:0': reward,
                        'generator/action_idx:0': action_idx
                    })
                dis_loss = 0
            else:  # adversarial train
                rand_num = random.choice([1] * FLAGS.every + [0])
                if rand_num != 0:  # train with generated sentences
                    original_prob = sess.run(original_prob_op,
                                             feed_dict={
                                                 'sentence_original:0':
                                                 sentence,
                                                 'sentence_len_original:0':
                                                 sentence_len,
                                                 'sentence_label_original:0':
                                                 label
                                             })
                    action = sess.run(action_prob_op,
                                      feed_dict={
                                          'generator/train_sentence:0':
                                          sentence,
                                          'generator/train_sentence_len:0':
                                          sentence_len
                                      })
                    sentence_new, action_idx = generate_new_sentence_with_action(
                        model.vocab, action, sentence, sentence_len)

                    dis_loss, _, reward = sess.run(
                        [dis_loss_op, train_dis_op, reward_op],
                        feed_dict={
                            'discriminator/sentence:0': sentence_new,
                            'discriminator/sentence_len:0': sentence_len,
                            'discriminator/train_label:0': label,
                            'discriminator/original_prob:0': original_prob
                        })
                    all_sent_num += len(reward)
                    all_reward += np.sum(reward)
                    average_reward = all_reward / all_sent_num
                    reward -= average_reward
                    gene_loss, _ = sess.run(
                        [gene_loss_op, train_gene_op],
                        feed_dict={
                            'generator/train_sentence:0': raw_sentence,
                            'generator/train_sentence_len:0': sentence_len,
                            'generator/reward_score:0': reward,
                            'generator/action_idx:0': action_idx
                        })
                else:  # train with original sentence
                    dis_loss, _, = sess.run(
                        [dis_loss_op, train_dis_op],
                        feed_dict={
                            'discriminator/sentence:0': sentence,
                            'discriminator/sentence_len:0': sentence_len,
                            'discriminator/train_label:0': label,
                        })
                    gene_loss = 0.0

            if this_global_step != 0 and this_global_step % FLAGS.test_steps == 0 and this_global_step > FLAGS.dis_warm_up_step + FLAGS.gene_warm_up_step:
                number = 0
                accuracy = 0.0
                while True:
                    try:
                        acc, num = sess.run([dev_acc_op, dev_num_op])
                        number += num
                        accuracy += acc * num
                    except tf.errors.OutOfRangeError:
                        break
                accuracy /= number
                print('At step %d. dev num=%d acc=%f.' %
                      (this_global_step, number, accuracy))
                if accuracy > best_dev_acc:
                    best_dev_acc = accuracy
                    print("best acc=%f At step %d." %
                          (best_dev_acc, this_global_step))
                    test_accuracy = 0.
                    test_number = 0
                    while True:
                        try:
                            test_acc, test_num = sess.run(
                                [test_acc_op, test_num_op])
                            test_number += test_num
                            test_accuracy += test_acc * test_num
                        except tf.errors.OutOfRangeError:
                            break
                    test_accuracy /= test_number
                    print('test num=%d acc=%f.' % (test_number, test_accuracy))
                    final_acc = test_accuracy
                    sess.run(test_init_op)
                    save_checkpoint(saver, sess, FLAGS.train_dir,
                                    this_global_step)
                summary = tf.Summary()
                summary.value.add(tag='test_acc', simple_value=accuracy)
                summary.value.add(tag='best_dev_acc',
                                  simple_value=best_dev_acc)
                sum_writer.add_summary(summary, this_global_step)
                sess.run(dev_init_op)
    sum_writer.close()
    print('Accuracy of test set is %f .' % final_acc)
Example #32
0
def model_fn(features, labels, mode, params):
    """The model_fn argument for creating an Estimator."""
    tf.logging.info("features = %s labels = %s mode = %s params=%s" %
                    (features, labels, mode, params))
    global_step = tf.train.get_global_step()
    graph = mtf.Graph()
    mesh = mtf.Mesh(graph, "my_mesh")
    logits, loss = mnist_model(features, labels, mesh)
    mesh_shape = mtf.convert_to_shape(FLAGS.mesh_shape)
    layout_rules = mtf.convert_to_layout_rules(FLAGS.layout)
    mesh_size = mesh_shape.size
    mesh_devices = [""] * mesh_size
    mesh_impl = placement_mesh_impl.PlacementMeshImpl(mesh_shape, layout_rules,
                                                      mesh_devices)

    if mode == tf.estimator.ModeKeys.TRAIN:
        var_grads = mtf.gradients(
            [loss], [v.outputs[0] for v in graph.trainable_variables])
        optimizer = mtf_optimize.AdafactorOptimizer()
        update_ops = []
        for grad, var in zip(var_grads, graph.trainable_variables):
            update_ops.extend(optimizer.apply_grad(grad, var))

    lowering = mtf.Lowering(graph, {mesh: mesh_impl})
    restore_hook = mtf.MtfRestoreHook(lowering)

    tf_logits = lowering.export_to_tf_tensor(logits)
    if mode != tf.estimator.ModeKeys.PREDICT:
        tf_loss = lowering.export_to_tf_tensor(loss)
        tf.summary.scalar("loss", tf_loss)

    if mode == tf.estimator.ModeKeys.TRAIN:
        tf_update_ops = [lowering.lowered_operation(op) for op in update_ops]
        tf_update_ops.append(tf.assign_add(global_step, 1))
        train_op = tf.group(tf_update_ops)
        saver = tf.train.Saver(tf.global_variables(),
                               sharded=True,
                               max_to_keep=10,
                               keep_checkpoint_every_n_hours=2,
                               defer_build=False,
                               save_relative_paths=True)
        tf.add_to_collection(tf.GraphKeys.SAVERS, saver)
        saver_listener = mtf.MtfCheckpointSaverListener(lowering)
        saver_hook = tf.train.CheckpointSaverHook(FLAGS.model_dir,
                                                  save_steps=1000,
                                                  saver=saver,
                                                  listeners=[saver_listener])

        accuracy = tf.metrics.accuracy(labels=labels,
                                       predictions=tf.argmax(tf_logits,
                                                             axis=1))

        # Name tensors to be logged with LoggingTensorHook.
        tf.identity(tf_loss, "cross_entropy")
        tf.identity(accuracy[1], name="train_accuracy")

        # Save accuracy scalar to Tensorboard output.
        tf.summary.scalar("train_accuracy", accuracy[1])

        # restore_hook must come before saver_hook
        return tf.estimator.EstimatorSpec(
            tf.estimator.ModeKeys.TRAIN,
            loss=tf_loss,
            train_op=train_op,
            training_chief_hooks=[restore_hook, saver_hook])

    if mode == tf.estimator.ModeKeys.PREDICT:
        predictions = {
            "classes": tf.argmax(tf_logits, axis=1),
            "probabilities": tf.nn.softmax(tf_logits),
        }
        return tf.estimator.EstimatorSpec(
            mode=tf.estimator.ModeKeys.PREDICT,
            predictions=predictions,
            prediction_hooks=[restore_hook],
            export_outputs={
                "classify": tf.estimator.export.PredictOutput(predictions)
            })
    if mode == tf.estimator.ModeKeys.EVAL:
        return tf.estimator.EstimatorSpec(
            mode=tf.estimator.ModeKeys.EVAL,
            loss=tf_loss,
            evaluation_hooks=[restore_hook],
            eval_metric_ops={
                "accuracy":
                tf.metrics.accuracy(labels=labels,
                                    predictions=tf.argmax(tf_logits, axis=1)),
            })
Example #33
0
    def __init__(self, sess, input_norm, config):
        super(AE_Expert_Network, self).__init__(sess, config, config.expert_lr)

        self.rng = np.random.RandomState(config.random_seed)

        self.expert_layer1_dim = config.l1_dim
        self.expert_layer2_dim = config.l2_dim

        self.input_norm = input_norm

        self.use_better_q_gd = False
        if config.use_better_q_gd == "True":
            self.use_better_q_gd = True
            self.better_q_gd_alpha = 1e-2  # config.better_q_gd_alpha
            self.better_q_gd_max_steps = 10  # config.better_q_gd_max_steps
            self.better_q_gd_stop = 1e-3  # config.better_q_gd_stop

        # original network
        self.inputs, self.phase, self.action, self.q_prediction = self.build_network(
            scope_name='ae_expert')
        self.net_params = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                            scope='ae_expert')

        # Target network
        self.target_inputs, self.target_phase, self.target_action, self.target_q_prediction = self.build_network(
            scope_name='target_ae_expert')
        self.target_net_params = tf.get_collection(
            tf.GraphKeys.TRAINABLE_VARIABLES, scope='target_ae_expert')

        # Op for periodically updating target network with online network weights
        self.update_target_net_params = [
            tf.assign_add(
                self.target_net_params[idx],
                self.tau *
                (self.net_params[idx] - self.target_net_params[idx]))
            for idx in range(len(self.target_net_params))
        ]

        # Op for init. target network with identical parameter as the original network
        self.init_target_net_params = [
            tf.assign(self.target_net_params[idx], self.net_params[idx])
            for idx in range(len(self.target_net_params))
        ]

        # TODO: Currently doesn't support batchnorm
        if self.norm_type == 'batch':
            raise NotImplementedError

        else:
            assert (self.norm_type == 'none' or self.norm_type == 'layer'
                    or self.norm_type == 'input_norm')
            self.batchnorm_ops = [tf.no_op()]
            self.update_target_batchnorm_params = tf.no_op()

        self.predicted_q_value = tf.placeholder(tf.float32, [None, 1])

        # Optimization Op
        with tf.control_dependencies(self.batchnorm_ops):

            # Expert Update
            self.expert_loss = tf.reduce_mean(
                tf.squared_difference(self.predicted_q_value,
                                      self.q_prediction))
            self.expert_optimize = tf.train.AdamOptimizer(
                self.learning_rate).minimize(self.expert_loss)

        # Get the gradient of the expert w.r.t. the action
        self.action_grads = tf.gradients(self.q_prediction, self.action)
Example #34
0
def main(args=None):
    print(args)
    tf.reset_default_graph()
    """
    Read dataset parser     
    """
    flags.network_name = args[0].split('/')[-1].split('.')[0].split(
        'main_')[-1]
    flags.logs_dir = './logs_' + flags.network_name
    dataset_parser = GANParser(flags=flags)
    """
    Transform data to TFRecord format (Only do once.)     
    """
    if False:
        dataset_parser.load_paths(is_jpg=True, load_val=True)
        dataset_parser.data2record(name='{}_train.tfrecords'.format(
            dataset_parser.dataset_name),
                                   set_type='train',
                                   test_num=None)
        dataset_parser.data2record(name='{}_val.tfrecords'.format(
            dataset_parser.dataset_name),
                                   set_type='val',
                                   test_num=None)
        # coco_parser.data2record_test(name='coco_stuff2017_test-dev_all_label.tfrecords', is_dev=True, test_num=None)
        # coco_parser.data2record_test(name='coco_stuff2017_test_all_label.tfrecords', is_dev=False, test_num=None)
        return
    """
    Build Graph
    """
    with tf.Graph().as_default():
        """
        Input (TFRecord)
        """
        with tf.name_scope('TFRecord'):
            # DatasetA
            training_a_dataset = dataset_parser.tfrecord_get_dataset(
                name='{}_trainA.tfrecords'.format(dataset_parser.dataset_name),
                batch_size=flags.batch_size,
                shuffle_size=None)
            val_a_dataset = dataset_parser.tfrecord_get_dataset(
                name='{}_valA.tfrecords'.format(dataset_parser.dataset_name),
                batch_size=flags.batch_size,
                need_flip=(flags.mode == 'train'))
            # DatasetB
            training_b_dataset = dataset_parser.tfrecord_get_dataset(
                name='{}_trainB.tfrecords'.format(dataset_parser.dataset_name),
                batch_size=flags.batch_size,
                shuffle_size=None)
            val_b_dataset = dataset_parser.tfrecord_get_dataset(
                name='{}_valB.tfrecords'.format(dataset_parser.dataset_name),
                batch_size=flags.batch_size,
                need_flip=(flags.mode == 'train'))
            # A feed-able iterator
            with tf.name_scope('RealA'):
                handle_a = tf.placeholder(tf.string, shape=[])
                iterator_a = tf.contrib.data.Iterator.from_string_handle(
                    handle_a, training_a_dataset.output_types,
                    training_a_dataset.output_shapes)
                real_a, real_a_name, real_a_shape = iterator_a.get_next()
            with tf.name_scope('RealB'):
                handle_b = tf.placeholder(tf.string, shape=[])
                iterator_b = tf.contrib.data.Iterator.from_string_handle(
                    handle_b, training_b_dataset.output_types,
                    training_b_dataset.output_shapes)
                real_b, real_b_name, real_b_shape = iterator_b.get_next()
            with tf.name_scope('InitialA_op'):
                training_a_iterator = training_a_dataset.make_initializable_iterator(
                )
                validation_a_iterator = val_a_dataset.make_initializable_iterator(
                )
            with tf.name_scope('InitialB_op'):
                training_b_iterator = training_b_dataset.make_initializable_iterator(
                )
                validation_b_iterator = val_b_dataset.make_initializable_iterator(
                )
        """
        Network (Computes predictions from the inference model)
        """
        with tf.name_scope('Network'):
            # Input
            global_step = tf.Variable(0,
                                      trainable=False,
                                      name='global_step',
                                      dtype=tf.int32)
            global_step_update_op = tf.assign_add(global_step,
                                                  1,
                                                  name='global_step_update_op')
            # mean_rgb = tf.constant((123.68, 116.78, 103.94), dtype=tf.float32)
            fake_b_pool = tf.placeholder(tf.float32,
                                         shape=[
                                             None, flags.image_height,
                                             flags.image_width, flags.c_in_dim
                                         ],
                                         name='fake_B_pool')
            image_linear_shape = tf.constant(
                flags.image_height * flags.image_width * flags.c_in_dim,
                dtype=tf.int32,
                name='image_linear_shape')

            # A -> B
            '''
            with tf.name_scope('Generator'):
                with slim.arg_scope(vgg.vgg_arg_scope()):
                    net, end_points = vgg.vgg_16(real_a - mean_rgb, num_classes=1, is_training=True, 
                    spatial_squeeze=False)
                    print(net)
                    return

                with tf.variable_scope('Generator_A2B'):
                    pred = tf.layers.conv2d(tf.nn.relu(net), 1, 1, 1)
                    pred_upscale = tf.image.resize_bilinear(pred, (flags.image_height, flags.image_width), 
                    name='up_scale')
                    segment_a = tf.nn.sigmoid(pred_upscale, name='segment_a')

            # sigmoid cross entropy Loss
            with tf.name_scope('loss_gen_a2b'):
                loss_gen_a2b = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(
                    logits=pred_upscale, labels=real_b/255.0, name='sigmoid'), name='mean')
            '''

            # A -> B
            # adjusted_a = tf.zeros_like(real_a, tf.float32, name='mask', optimize=True)
            # adjusted_a = high_light(real_a, name='high_light')
            # adjusted_a = tf.layers.average_pooling2d(real_a, 7, strides=1, padding='same', name='adjusted_a')
            adjusted_a = gaussian_blur(real_a, name='adjusted_a')
            logits_a = generator_resnet(real_a,
                                        flags,
                                        False,
                                        name="Generator_A2B")
            segment_a = tf.nn.tanh(logits_a, name='segment_a')

            logits_a_ori = tf.image.resize_bilinear(
                logits_a, (real_a_shape[0][0], real_b_shape[0][1]),
                name='logits_a_ori')
            segment_a_ori = tf.nn.tanh(logits_a_ori, name='segment_a_ori')

            with tf.variable_scope('Fake_B'):
                foreground = tf.multiply(real_a, segment_a, name='foreground')
                background = tf.multiply(adjusted_a, (1 - segment_a),
                                         name='background')
                fake_b_logits = tf.add(foreground,
                                       background,
                                       name='fake_b_logits')
                fake_b = tf.clip_by_value(fake_b_logits, 0, 255, name='fake_b')

            #
            fake_b_f = tf.reshape(fake_b, [-1, image_linear_shape],
                                  name='fake_b_f')
            fake_b_pool_f = tf.reshape(fake_b_pool, [-1, image_linear_shape],
                                       name='fake_b_pool_f')
            real_b_f = tf.reshape(real_b, [-1, image_linear_shape],
                                  name='real_b_f')
            dis_fake_b = discriminator_se_wgangp(fake_b_f,
                                                 flags,
                                                 reuse=False,
                                                 name="Discriminator_B")
            dis_fake_b_pool = discriminator_se_wgangp(fake_b_pool_f,
                                                      flags,
                                                      reuse=True,
                                                      name="Discriminator_B")
            dis_real_b = discriminator_se_wgangp(real_b_f,
                                                 flags,
                                                 reuse=True,
                                                 name="Discriminator_B")

            # WGAN Loss
            with tf.name_scope('loss_gen_a2b'):
                loss_gen_a2b = -tf.reduce_mean(dis_fake_b)

            with tf.name_scope('loss_dis_b'):
                loss_dis_b_adv_real = -tf.reduce_mean(dis_real_b)
                loss_dis_b_adv_fake = tf.reduce_mean(dis_fake_b_pool)
                loss_dis_b = tf.reduce_mean(dis_fake_b_pool) - tf.reduce_mean(
                    dis_real_b)
                with tf.name_scope('wgan-gp'):
                    alpha = tf.random_uniform(shape=[flags.batch_size, 1],
                                              minval=0.,
                                              maxval=1.)
                    differences = fake_b_pool_f - real_b_f
                    interpolates = real_b_f + (alpha * differences)
                    gradients = tf.gradients(
                        discriminator_se_wgangp(interpolates,
                                                flags,
                                                reuse=True,
                                                name="Discriminator_B"),
                        [interpolates])[0]
                    slopes = tf.sqrt(
                        tf.reduce_sum(tf.square(gradients),
                                      reduction_indices=[1]))
                    gradient_penalty = tf.reduce_mean((slopes - 1.)**2)
                    loss_dis_b += flags.lambda_gp * gradient_penalty

            # Optimizer
            '''
            trainable_var_resnet = tf.get_collection(
                key=tf.GraphKeys.TRAINABLE_VARIABLES, scope='vgg_16')
            trainable_var_gen_a2b = tf.get_collection(
                key=tf.GraphKeys.TRAINABLE_VARIABLES, scope='Generator_A2B') + trainable_var_resnet
            slim.model_analyzer.analyze_vars(trainable_var_gen_a2b, print_info=True)
            '''
            trainable_var_gen_a2b = tf.get_collection(
                key=tf.GraphKeys.TRAINABLE_VARIABLES, scope='Generator_A2B')
            trainable_var_dis_b = tf.get_collection(
                key=tf.GraphKeys.TRAINABLE_VARIABLES, scope='Discriminator_B')
            with tf.name_scope('learning_rate_decay'):
                decay = tf.maximum(
                    0.,
                    1. -
                    (tf.cast(global_step, tf.float32) / flags.training_iter),
                    name='decay')
                learning_rate = tf.multiply(flags.learning_rate,
                                            decay,
                                            name='learning_rate')
            train_op_gen_a2b = train_op(loss_gen_a2b,
                                        learning_rate,
                                        flags,
                                        trainable_var_gen_a2b,
                                        name='gen_a2b')
            train_op_dis_b = train_op(loss_dis_b,
                                      learning_rate,
                                      flags,
                                      trainable_var_dis_b,
                                      name='dis_b')

        saver = tf.train.Saver(max_to_keep=2)
        # Graph Logs
        with tf.name_scope('GEN_a2b'):
            tf.summary.scalar("loss/gen_a2b/all", loss_gen_a2b)
        with tf.name_scope('DIS_b'):
            tf.summary.scalar("loss/dis_b/all", loss_dis_b)
            tf.summary.scalar("loss/dis_b/adv_real", loss_dis_b_adv_real)
            tf.summary.scalar("loss/dis_b/adv_fake", loss_dis_b_adv_fake)
        summary_op = tf.summary.merge_all()
        """
        Session
        """
        tfconfig = tf.ConfigProto(allow_soft_placement=True)
        tfconfig.gpu_options.allow_growth = True
        with tf.Session(config=tfconfig) as sess:
            with tf.name_scope('Initial'):
                ckpt = tf.train.get_checkpoint_state(
                    dataset_parser.checkpoint_dir)
                if ckpt and ckpt.model_checkpoint_path:
                    print("Model restored: {}".format(
                        ckpt.model_checkpoint_path))
                    saver.restore(sess, ckpt.model_checkpoint_path)
                else:
                    print("No Model found.")
                    init_op = tf.group(tf.global_variables_initializer(),
                                       tf.local_variables_initializer())
                    sess.run(init_op)

                    # init_fn = slim.assign_from_checkpoint_fn('./pretrained/vgg_16.ckpt',
                    #                                          slim.get_model_variables('vgg_16'))
                    # init_fn(sess)
                summary_writer = tf.summary.FileWriter(dataset_parser.logs_dir,
                                                       sess.graph)
            """
            Training Mode
            """
            if flags.mode == 'train':
                print('Training mode! Batch size:{:d}'.format(
                    flags.batch_size))
                with tf.variable_scope('Input_port'):
                    training_a_handle = sess.run(
                        training_a_iterator.string_handle())
                    training_b_handle = sess.run(
                        training_b_iterator.string_handle())
                    # val_a_handle = sess.run(validation_a_iterator.string_handle())
                    # val_b_handle = sess.run(validation_b_iterator.string_handle())
                    image_pool_a, image_pool_b = ImagePool(
                        flags.pool_size), ImagePool(flags.pool_size)

                print('Start Training!')
                start_time = time.time()
                sess.run([
                    training_a_iterator.initializer,
                    training_b_iterator.initializer
                ])
                feed_dict_train = {
                    handle_a: training_a_handle,
                    handle_b: training_b_handle
                }
                # feed_dict_valid = {is_training: False}
                global_step_sess = sess.run(global_step)
                while global_step_sess < flags.training_iter:
                    try:
                        # Update gen_A2B, gen_B2A
                        _, fake_b_sess = sess.run([train_op_gen_a2b, fake_b],
                                                  feed_dict=feed_dict_train)

                        # _, loss_gen_a2b_sess = sess.run([train_op_gen_a2b, loss_gen_a2b], feed_dict=feed_dict_train)

                        # Update dis_B, dis_A
                        fake_b_pool_query = image_pool_b.query(fake_b_sess)
                        _ = sess.run(train_op_dis_b,
                                     feed_dict={
                                         fake_b_pool: fake_b_pool_query,
                                         handle_b: training_b_handle
                                     })

                        sess.run(global_step_update_op)
                        global_step_sess, learning_rate_sess = sess.run(
                            [global_step, learning_rate])
                        print(
                            'global step:[{:d}/{:d}], learning rate:{:f}, time:{:4.4f}'
                            .format(global_step_sess, flags.training_iter,
                                    learning_rate_sess,
                                    time.time() - start_time))

                        # Logging the events
                        if global_step_sess % flags.log_freq == 1:
                            print('Logging the events')
                            summary_op_sess = sess.run(summary_op,
                                                       feed_dict={
                                                           handle_a:
                                                           training_a_handle,
                                                           handle_b:
                                                           training_b_handle,
                                                           fake_b_pool:
                                                           fake_b_pool_query
                                                       })
                            summary_writer.add_summary(summary_op_sess,
                                                       global_step_sess)
                            # summary_writer.flush()

                        # Observe training situation (For debugging.)
                        if flags.debug and global_step_sess % flags.observe_freq == 1:
                            real_a_sess, real_b_sess, adjusted_a_sess, segment_a_sess, fake_b_sess, \
                                real_a_name_sess, real_b_name_sess = \
                                sess.run([real_a, real_b, adjusted_a, segment_a, fake_b,
                                          real_a_name, real_b_name],
                                         feed_dict={handle_a: training_a_handle, handle_b: training_b_handle})
                            print('Logging training images.')
                            dataset_parser.visualize_data(
                                real_a=real_a_sess,
                                real_b=real_b_sess,
                                adjusted_a=adjusted_a_sess,
                                segment_a=segment_a_sess,
                                fake_b=fake_b_sess,
                                shape=(1, 1),
                                global_step=global_step_sess,
                                logs_dir=dataset_parser.logs_image_train_dir,
                                real_a_name=real_a_name_sess[0].decode(),
                                real_b_name=real_b_name_sess[0].decode())
                        """
                        Saving the checkpoint
                        """
                        if global_step_sess % flags.save_freq == 0:
                            print('Saving model...')
                            saver.save(sess,
                                       dataset_parser.checkpoint_dir +
                                       '/model.ckpt',
                                       global_step=global_step_sess)

                    except tf.errors.OutOfRangeError:
                        print(
                            '----------------One epochs finished!----------------'
                        )
                        sess.run([
                            training_a_iterator.initializer,
                            training_b_iterator.initializer
                        ])
            elif flags.mode == 'test':
                from PIL import Image
                import numpy as np
                print('Start Testing!')
                '''
                with tf.variable_scope('Input_port'):
                    val_a_handle = sess.run(validation_a_iterator.string_handle())
                    val_b_handle = sess.run(validation_b_iterator.string_handle())
                sess.run([validation_a_iterator.initializer, validation_b_iterator.initializer])
                '''
                with tf.variable_scope('Input_port'):
                    val_a_handle = sess.run(
                        training_a_iterator.string_handle())
                    val_b_handle = sess.run(
                        training_b_iterator.string_handle())
                sess.run([
                    training_a_iterator.initializer,
                    training_b_iterator.initializer
                ])
                feed_dict_test = {
                    handle_a: val_a_handle,
                    handle_b: val_b_handle
                }
                image_idx = 0
                while True:
                    try:
                        segment_a_ori_sess, real_a_name_sess = \
                            sess.run([segment_a_ori, real_a_name], feed_dict=feed_dict_test)
                        segment_a_ori_sess = np.squeeze(
                            segment_a_ori_sess) * 255
                        x_png = Image.fromarray(
                            segment_a_ori_sess.astype(np.uint8))
                        x_png.save('{}/{}.png'.format(
                            dataset_parser.logs_image_val_dir,
                            real_a_name_sess[0].decode()),
                                   format='PNG')

                        print(image_idx)
                        image_idx += 1
                    except tf.errors.OutOfRangeError:
                        print(
                            '----------------One epochs finished!----------------'
                        )
                        break
Example #35
0
    def batch_inputs(self, dataset, train):
        """Contruct batches of training or evaluation examples from the image input_data.
  
        Args:
          dataset: instance of Dataset class specifying the input_data.
            See input_data.py for details.
          batch_size: integer
          train: boolean
          num_preprocess_threads: integer, total number of preprocessing threads
          num_readers: integer, number of parallel readers
  
        Returns:
          images: 4-D float Tensor of a batch of images
          labels: 1-D integer Tensor of [batch_size].
  
        Raises:
          ValueError: if data is not found
        """
        with tf.name_scope('batch_processing'):
            data_files = dataset.data_files()
            if data_files is None:
                raise ValueError('No data files found for this input_data')

            # Create filename_queue
            if train:
                filename_queue = tf.train.string_input_producer(data_files,
                                                                shuffle=True,
                                                                capacity=16)
            else:
                filename_queue = tf.train.string_input_producer(data_files,
                                                                shuffle=False,
                                                                capacity=1)

            # Approximate number of examples per shard.
            examples_per_shard = 1024
            # Size the random shuffle queue to balance between good global
            # mixing (more examples) and memory use (fewer examples).
            # 1 image uses 299*299*3*4 bytes = 1MB
            # The default input_queue_memory_factor is 16 implying a shuffling queue
            # size: examples_per_shard * 16 * 1MB = 17.6GB
            min_queue_examples = examples_per_shard * self.input_queue_memory_factor
            if train:
                examples_queue = tf.RandomShuffleQueue(
                    capacity=min_queue_examples + 3 * self.batch_size,
                    min_after_dequeue=min_queue_examples,
                    dtypes=[tf.string])
            else:
                examples_queue = tf.FIFOQueue(capacity=examples_per_shard +
                                              3 * self.batch_size,
                                              dtypes=[tf.string])

            # Create multiple readers to populate the queue of examples.
            if self.num_readers > 1:
                enqueue_ops = []
                for _ in range(self.num_readers):
                    reader = dataset.reader()
                    _, value = reader.read(filename_queue)
                    enqueue_ops.append(examples_queue.enqueue([value]))

                tf.train.queue_runner.add_queue_runner(
                    tf.train.queue_runner.QueueRunner(examples_queue,
                                                      enqueue_ops))
                example_serialized = examples_queue.dequeue()
            else:
                reader = dataset.reader()
                _, example_serialized = reader.read(filename_queue)

            pos_queue = None
            neg_queue = None

            if self.batch_size < 2:
                pos_queue = tf.RandomShuffleQueue(
                    name="pos-queue",
                    capacity=10,
                    min_after_dequeue=5,
                    dtypes=[tf.float32, tf.float32, tf.string])
                neg_queue = tf.RandomShuffleQueue(
                    name="neg-queue",
                    capacity=10,
                    min_after_dequeue=5,
                    dtypes=[tf.float32, tf.float32, tf.string])

            pos_queue_enq = []
            neg_queue_enq = []

            with tf.name_scope('split-merge'):
                if train and self.ensure_posneg_balance:
                    images_and_masks = []
                    for thread_id in range(self.num_preprocess_threads):
                        # Parse a serialized Example proto to extract the image and metadata.
                        image_buffer, mask_buffer, img_name_ = self.parse_example_proto(
                            example_serialized)

                        image_ = self.image_preprocessing(
                            image_buffer,
                            img_size=(self.input_size[0], self.input_size[1]),
                            num_channels=self.input_size[2])
                        mask_ = self.image_preprocessing(
                            mask_buffer,
                            img_size=(self.mask_size[0], self.mask_size[1]),
                            num_channels=self.mask_size[2])

                        image_ = tf.expand_dims(image_, 0)
                        mask_ = tf.expand_dims(mask_, 0)
                        img_name_ = tf.expand_dims(img_name_, 0)

                        img_shape = tf.TensorShape([
                            image_.shape[1], image_.shape[2], image_.shape[3]
                        ])
                        mask_shape = tf.TensorShape(
                            [mask_.shape[1], mask_.shape[2], mask_.shape[3]])
                        img_name_shape = tf.TensorShape([])

                        # initialize pos/neg queues with proper shape size on first
                        if pos_queue is None or neg_queue is None:
                            pos_queue = tf.RandomShuffleQueue(
                                name="pos-queue",
                                capacity=10,
                                min_after_dequeue=5,
                                dtypes=[tf.float32, tf.float32, tf.string],
                                shapes=[img_shape, mask_shape, img_name_shape])
                            neg_queue = tf.RandomShuffleQueue(
                                name="neg-queue",
                                capacity=10,
                                min_after_dequeue=5,
                                dtypes=[tf.float32, tf.float32, tf.string],
                                shapes=[img_shape, mask_shape, img_name_shape])

                        is_pos = tf.squeeze(
                            tf.reduce_sum(mask_, [1, 2], keep_dims=False))

                        neg_mask = tf.less_equal(is_pos, 0)

                        pos_idx = tf.reshape(
                            tf.where([tf.logical_not(neg_mask)]), [-1])
                        neg_idx = tf.reshape(tf.where([neg_mask]), [-1])

                        pos_data = [
                            tf.gather(image_, pos_idx),
                            tf.gather(mask_, pos_idx),
                            tf.gather(img_name_, pos_idx)
                        ]
                        neg_data = [
                            tf.gather(image_, neg_idx),
                            tf.gather(mask_, neg_idx),
                            tf.gather(img_name_, neg_idx)
                        ]

                        pos_queue_enq.append(pos_queue.enqueue_many(pos_data))
                        neg_queue_enq.append(neg_queue.enqueue_many(neg_data))

                    tf.train.queue_runner.add_queue_runner(
                        tf.train.queue_runner.QueueRunner(
                            pos_queue, pos_queue_enq))
                    tf.train.queue_runner.add_queue_runner(
                        tf.train.queue_runner.QueueRunner(
                            neg_queue, neg_queue_enq))

                    if self.batch_size >= 2:
                        if self.batch_size % 2 != 0:
                            raise Exception(
                                "'batch_size' mod 2 != 0 ! only even batch sizes supported at the moment"
                            )

                        num_deque = int(self.batch_size / 2)

                        pos_data = pos_queue.dequeue_many(num_deque)
                        neg_data = neg_queue.dequeue_many(num_deque)

                        concat_data = [
                            tf.concat([pos_data[0], neg_data[0]],
                                      axis=0,
                                      name='Concat-img'),
                            tf.concat([pos_data[1], neg_data[1]],
                                      axis=0,
                                      name='Concat-mask'),
                            tf.concat([pos_data[2], neg_data[2]],
                                      axis=0,
                                      name='Concat-img-name')
                        ]

                        # randomly permute within batch size (is this even necessary ??)
                        idx = tf.Variable(range(0, self.batch_size),
                                          trainable=False,
                                          dtype=tf.int32)
                        idx = tf.random_shuffle(idx)

                        images = tf.gather(concat_data[0], idx)
                        masks = tf.gather(concat_data[1], idx)
                        img_names = tf.gather(concat_data[2], idx)

                    else:
                        # positive only
                        #images, masks, img_names = pos_queue.dequeue()

                        # negative only
                        #images, masks, img_names = neg_queue.dequeue()

                        # mix 50/50
                        counter = tf.Variable(initial_value=0,
                                              trainable=False,
                                              dtype=tf.int32)

                        counter = tf.assign_add(counter, 1)
                        condition_term = tf.equal(tf.mod(counter, 2),
                                                  tf.constant(0))
                        images, masks, img_names = tf.cond(
                            condition_term, lambda: pos_queue.dequeue(),
                            lambda: neg_queue.dequeue())

                        if self.use_random_rotation:
                            images.set_shape(
                                tensor_shape.as_shape([None, None, 1]))
                            masks.set_shape(
                                tensor_shape.as_shape([None, None, 1]))

                            # randomly rotate image by 90 degrees
                            rot_factor = tf.random_uniform([1],
                                                           minval=0,
                                                           maxval=3,
                                                           dtype=tf.int32)
                            rot_factor = tf.gather(rot_factor, 0)

                            images = tf.image.rot90(images, k=rot_factor)
                            masks = tf.image.rot90(masks, k=rot_factor)

                        images = tf.expand_dims(images, axis=0)
                        masks = tf.expand_dims(masks, axis=0)
                        img_names = tf.expand_dims(img_names, axis=0)
                else:

                    # Parse a serialized Example proto to extract the image and metadata.
                    image_buffer, mask_buffer, img_names = self.parse_example_proto(
                        example_serialized)

                    images = self.image_preprocessing(
                        image_buffer,
                        img_size=(self.input_size[0], self.input_size[1]),
                        num_channels=self.input_size[2])
                    masks = self.image_preprocessing(
                        mask_buffer,
                        img_size=(self.mask_size[0], self.mask_size[1]),
                        num_channels=1)

                    images = tf.expand_dims(images, axis=0)
                    masks = tf.expand_dims(masks, axis=0)
                    img_names = tf.expand_dims(img_names, axis=0)

            # Reshape images into these desired dimensions.
            images = tf.cast(images, tf.float32)
            masks = tf.cast(masks, tf.float32)

            images.set_shape(
                tensor_shape.as_shape(
                    [self.batch_size, None, None, self.input_size[2]]))
            masks.set_shape(
                tensor_shape.as_shape([
                    self.batch_size, self.input_size[0], self.input_size[1],
                    self.mask_size[2]
                ]))

            # Display the training images in the visualizer.
            tf.summary.image('images', images)
            tf.summary.image('masks', masks)

            return images, masks, img_names
Example #36
0
    def __init__(self, name, make_model,
                 devices=get_available_gpus(),
                 master_device=None,
                 TrainerClass=SampleBasedTrainer,
                 sess=None, *args, verbose=False, **kwargs):
        """ A wrapper-class that performs batch-parallel training with some trainer. """

        self.name = name
        self.sess = sess = sess or tf.get_default_session() or tf.InteractiveSession()
        self.master_device = master_device = master_device or next(iter(devices))
        assert master_device in devices
        self.verbose = verbose

        class Worker(TrainerClass):
            def get_optimizer(self, *args, **kwargs):
                """ Worker does not update weights by itself. use sgd to avoid wasting memory """
                return tf.train.GradientDescentOptimizer(learning_rate=0)

        with tf.variable_scope(name):
            self.workers_by_device = {}
            for i, device in enumerate(devices):
                with tf.device(device), tf.variable_scope('worker_%i' % i):
                    model = make_model()
                    if device == master_device:
                        worker = TrainerClass(model, *args, **kwargs)
                    else:
                        worker = Worker(model, *args, **kwargs)
                    self.workers_by_device[device] = worker

                if verbose:
                    print("Created model {} weights and worker on device {}"
                          "".format(model.name, device))

        self.master_model = self.workers_by_device[master_device].model
        self.master_worker = self.workers_by_device[self.master_device]
        assert isinstance(self.master_worker, TrainerClass)

        # step 1: send main model's weights to all worker replicas
        self.scatter_weights = []

        for device, worker in self.workers_by_device.items():
            if worker == self.master_worker:
                continue
            self.scatter_weights.extend(map(tf.assign,
                                            worker.optimized_variables,
                                            self.master_worker.optimized_variables))

        # step 2: compute grads and counters at all workers
        self.gather_grads, self.gather_counters = [], []
        for device, worker in self.workers_by_device.items():
            if worker == self.master_worker:
                continue
            self.gather_grads.extend(
                map(tf.assign_add, self.master_worker.accumulated_grads, worker.accumulated_grads)
            )
            self.gather_grads.append(
                tf.assign_add(self.master_worker.accumulated_num_batches, worker.accumulated_num_batches)
            )
            master_counters_flat = [self.master_worker.accumulated_counters[name]
                                    for name in sorted(self.master_worker.accumulated_counters.keys())]
            worker_counters_flat = [worker.accumulated_counters[name]
                                    for name in sorted(self.master_worker.accumulated_counters.keys())]
            self.gather_counters.extend(
                map(tf.assign_add, master_counters_flat, worker_counters_flat)
            )

        # step 3: perform gradient step and reset all accumulated values
        self.reset_slave_grads = [
            worker.reset_gradients for worker in self.workers_by_device.values()
            if worker != self.master_worker
        ]
        self.reset_slave_counters = [
            worker.reset_counters for worker in self.workers_by_device.values()
            if worker != self.master_worker
        ]
Example #37
0
    def model(features, labels, mode):

        tc = constant_tensors()
        tc_1d1_goals_f, tc_home_points_i, tc_away_points_i, calc_poisson_prob, p_tendency_mask_f, p_gdiff_mask_f, p_fulltime_index_matrix = tc

        with tf.variable_scope("Model"):

            logits1H = buildGraph("1H", features, columns, mode)
            t_is_home_bool = tf.equal(features["Where"], "Home")
            predictions1H = create_predictions(logits1H, t_is_home_bool, tc)

            if mode == tf.estimator.ModeKeys.TRAIN:
                logits2H = buildGraph(
                    "2H", features, columns, mode,
                    tf.stack([features["T1_GHT"], features["T2_GHT"]], axis=1))
            else:
                logits2H = buildGraph("2H", features, columns, mode,
                                      predictions1H["pred"])

            predictions2H = create_predictions(logits2H, t_is_home_bool, tc)
            predictions = combine1H2H(predictions1H, predictions2H,
                                      t_is_home_bool, tc_home_points_i,
                                      tc_away_points_i,
                                      p_fulltime_index_matrix,
                                      calc_poisson_prob)

            logits2H_alt = buildGraph("2H", features, columns, mode,
                                      predictions1H["alt_pred"])
            if mode == tf.estimator.ModeKeys.PREDICT:
                # Build alternative prediction with 2nd-most likely outcome of 1H
                predictions2H_alt = create_predictions(logits2H_alt,
                                                       t_is_home_bool, tc)
                predictions_alt = combine1H2H(predictions1H, predictions2H_alt,
                                              t_is_home_bool, tc_home_points_i,
                                              tc_away_points_i,
                                              p_fulltime_index_matrix,
                                              calc_poisson_prob)
                for key, value in predictions_alt.items():
                    predictions['Alt_' + key] = value

            export_outputs = {
                "predictions":
                tf.estimator.export.RegressionOutput(predictions["p_marg_1"])
            }

        if mode == tf.estimator.ModeKeys.PREDICT:
            return tf.estimator.EstimatorSpec(mode=mode,
                                              predictions=predictions,
                                              export_outputs=export_outputs)

        with tf.variable_scope("Evaluation"):

            t_is_home_loss_bool = (t_is_home_bool & tf.less(
                features["T1_GFT"], features["T2_GFT"])) | (
                    tf.logical_not(t_is_home_bool)
                    & tf.greater(features["T1_GFT"], features["T2_GFT"]))
            t_is_home_win_bool = (t_is_home_bool & tf.greater(
                features["T1_GFT"], features["T2_GFT"])) | (
                    tf.logical_not(t_is_home_bool)
                    & tf.less(features["T1_GFT"], features["T2_GFT"]))

            eval_metric_ops_1H, loss_1H = create_losses_and_metrics_HH(
                "1H", predictions1H, features["T1_GHT"], features["T2_GHT"],
                t_is_home_bool, mode, tc, t_is_home_win_bool,
                t_is_home_loss_bool)
            eval_metric_ops_2H, loss_2H = create_losses_and_metrics_HH(
                "2H", predictions2H, features["T1_GFT"] - features["T1_GHT"],
                features["T2_GFT"] - features["T2_GHT"], t_is_home_bool, mode,
                tc, t_is_home_win_bool, t_is_home_loss_bool)

            eval_metric_ops = eval_metric_ops_1H
            eval_metric_ops.update(eval_metric_ops_2H)
            loss = loss_1H + loss_2H

            # softpoints
            gs = tf.minimum(features["T1_GFT"], 6)
            gc = tf.minimum(features["T2_GFT"], 6)
            achievable_points_mask = tf.where(
                t_is_home_bool, tf.gather(tc_home_points_i, gs * 7 + gc),
                tf.gather(tc_away_points_i, gs * 7 + gc))
            pt_softpoints = tf.reduce_sum(predictions["p_pred_12"] *
                                          achievable_points_mask,
                                          axis=1)
            eval_metric_ops["pt_softpoints"] = tf.metrics.mean(pt_softpoints)
            loss -= tf.reduce_mean(pt_softpoints)

            result_metrics = create_result_metrics(predictions["pred"][:, 0],
                                                   predictions["pred"][:, 1],
                                                   features["T1_GFT"],
                                                   features["T2_GFT"],
                                                   t_is_home_bool)
            eval_metric_ops.update(result_metrics)
            # loss -= eval_metric_ops["z_points"][1]

        for key, value in eval_metric_ops.items():
            tf.summary.scalar(key, value[1])

        if mode == tf.estimator.ModeKeys.EVAL:
            return tf.estimator.EstimatorSpec(mode=mode,
                                              predictions=predictions,
                                              loss=loss,
                                              eval_metric_ops=eval_metric_ops)

        global_step = tf.train.get_global_step()
        #optimizer = tf.train.GradientDescentOptimizer(1e-4)
        learning_rate = 1e-2
        print("Learning rate = {}".format(learning_rate))
        optimizer = tf.train.AdamOptimizer(learning_rate)
        train = tf.group(optimizer.minimize(loss),
                         tf.assign_add(global_step, 1))

        summary_op = tf.summary.merge_all()
        summary_hook = tf.train.SummarySaverHook(save_steps=100,
                                                 output_dir=model_dir +
                                                 "/train",
                                                 scaffold=None,
                                                 summary_op=summary_op)

        return tf.estimator.EstimatorSpec(mode=mode,
                                          predictions=predictions,
                                          loss=loss,
                                          train_op=train,
                                          eval_metric_ops=eval_metric_ops,
                                          training_hooks=[summary_hook])
def train(name,
          hparams,
          multi_gpu=False,
          n_models=1,
          train_completeness_threshold=0.01,
          seed=None,
          logdir='data/logs',
          max_epoch=100,
          patience=2,
          train_sampling=1.0,
          eval_sampling=1.0,
          eval_memsize=5,
          gpu=0,
          gpu_allow_growth=False,
          save_best_model=False,
          forward_split=False,
          write_summaries=False,
          verbose=False,
          asgd_decay=None,
          tqdm=True,
          side_split=True,
          max_steps=None,
          save_from_step=None,
          do_eval=True,
          predict_window=63,
          back_offset=0):

    eval_k = int(round(2621 * eval_memsize / n_models))
    eval_batch_size = int(
        eval_k /
        (hparams.rnn_depth *
         hparams.encoder_rnn_layers))  # 128 -> 1024, 256->512, 512->256
    eval_pct = 0.2
    batch_size = hparams.batch_size
    train_window = hparams.train_window
    tf.reset_default_graph()
    if seed:
        tf.set_random_seed(seed)

    with tf.device("/cpu:0"):
        inp = VarFeeder.read_vars("data/vars")
        if side_split:
            splitter = Splitter(ucdoc_features(inp),
                                inp.page_map,
                                3,
                                train_sampling=train_sampling,
                                test_sampling=eval_sampling,
                                seed=seed)
        else:
            splitter = FakeSplitter(ucdoc_features(inp),
                                    3,
                                    seed=seed,
                                    test_sampling=eval_sampling)

    real_train_pages = splitter.splits[0].train_size
    real_eval_pages = splitter.splits[0].test_size

    items_per_eval = real_eval_pages * eval_pct
    eval_batches = int(np.ceil(items_per_eval / eval_batch_size))
    steps_per_epoch = real_train_pages // batch_size
    eval_every_step = int(round(steps_per_epoch * eval_pct))
    # eval_every_step = int(round(items_per_eval * train_sampling / batch_size))

    global_step = tf.train.get_or_create_global_step()
    inc_step = tf.assign_add(global_step, 1)

    all_models: List[ModelTrainerV2] = []

    def create_model(scope, index, prefix, seed):

        with tf.variable_scope('input') as inp_scope:
            with tf.device("/cpu:0"):
                split = splitter.splits[index]
                pipe = InputPipe(
                    inp,
                    features=split.train_set,
                    n_pages=split.train_size,
                    # mode=ModelMode.TRAIN, batch_size=batch_size, n_epoch=None, verbose=verbose,
                    mode=ModelMode.TRAIN_SKIP_PREDICT,
                    batch_size=batch_size,
                    n_epoch=None,
                    verbose=verbose,
                    train_completeness_threshold=train_completeness_threshold,
                    predict_completeness_threshold=train_completeness_threshold,
                    train_window=train_window,
                    predict_window=predict_window,
                    rand_seed=seed,
                    train_skip_first=hparams.train_skip_first,
                    back_offset=back_offset)
                inp_scope.reuse_variables()
                if side_split:
                    side_eval_pipe = InputPipe(
                        inp,
                        features=split.test_set,
                        n_pages=split.test_size,
                        mode=ModelMode.EVAL,
                        batch_size=eval_batch_size,
                        n_epoch=None,
                        verbose=verbose,
                        predict_window=predict_window,
                        train_completeness_threshold=0.01,
                        predict_completeness_threshold=0,
                        train_window=train_window,
                        rand_seed=seed,
                        runs_in_burst=eval_batches,
                        back_offset=predict_window *
                        (2 if forward_split else 1))
                else:
                    side_eval_pipe = None
                if forward_split:
                    forward_eval_pipe = InputPipe(
                        inp,
                        features=split.test_set,
                        n_pages=split.test_size,
                        mode=ModelMode.EVAL,
                        batch_size=eval_batch_size,
                        n_epoch=None,
                        verbose=verbose,
                        predict_window=predict_window,
                        train_completeness_threshold=0.01,
                        predict_completeness_threshold=0,
                        train_window=train_window,
                        rand_seed=seed,
                        runs_in_burst=eval_batches,
                        back_offset=predict_window)
                else:
                    forward_eval_pipe = None
        avg_sgd = asgd_decay is not None
        #asgd_decay = 0.99 if avg_sgd else None
        train_model = Model(pipe,
                            hparams,
                            is_train=True,
                            graph_prefix=prefix,
                            asgd_decay=asgd_decay,
                            seed=seed)
        scope.reuse_variables()

        eval_stages = []
        if side_split:
            side_eval_model = Model(
                side_eval_pipe,
                hparams,
                is_train=False,
                #loss_mask=np.concatenate([np.zeros(50, dtype=np.float32), np.ones(10, dtype=np.float32)]),
                seed=seed)
            eval_stages.append((Stage.EVAL_SIDE, side_eval_model))
            if avg_sgd:
                eval_stages.append((Stage.EVAL_SIDE_EMA, side_eval_model))
        if forward_split:
            forward_eval_model = Model(forward_eval_pipe,
                                       hparams,
                                       is_train=False,
                                       seed=seed)
            eval_stages.append((Stage.EVAL_FRWD, forward_eval_model))
            if avg_sgd:
                eval_stages.append((Stage.EVAL_FRWD_EMA, forward_eval_model))

        if write_summaries:
            summ_path = f"{logdir}/{name}_{index}"
            if os.path.exists(summ_path):
                shutil.rmtree(summ_path)
            summ_writer = tf.summary.FileWriter(
                summ_path)  # , graph=tf.get_default_graph()
        else:
            summ_writer = None
        if do_eval and forward_split:
            stop_metric = lambda metrics: metrics[Stage.EVAL_FRWD]['SMAPE'
                                                                   ].avg_epoch
        else:
            stop_metric = None
        return ModelTrainerV2(train_model,
                              eval_stages,
                              index,
                              patience=patience,
                              stop_metric=stop_metric,
                              summary_writer=summ_writer)

    if n_models == 1:
        with tf.device(f"/gpu:{gpu}"):
            scope = tf.get_variable_scope()
            all_models = [create_model(scope, 0, None, seed=seed)]
    else:
        for i in range(n_models):
            device = f"/gpu:{i}" if multi_gpu else f"/gpu:{gpu}"
            with tf.device(device):
                prefix = f"m_{i}"
                with tf.variable_scope(prefix) as scope:
                    all_models.append(
                        create_model(scope, i, prefix=prefix, seed=seed + i))
    trainer = MultiModelTrainer(all_models, inc_step)
    if save_best_model or save_from_step:
        saver_path = f'data/cpt/{name}'
        if os.path.exists(saver_path):
            shutil.rmtree(saver_path)
        os.makedirs(saver_path)
        saver = tf.train.Saver(max_to_keep=10, name='train_saver')
    else:
        saver = None
    avg_sgd = asgd_decay is not None
    if avg_sgd:
        from itertools import chain

        def ema_vars(model):
            ema = model.train_model.ema
            return {
                ema.average_name(v): v
                for v in model.train_model.ema._averages
            }

        ema_names = dict(
            chain(*[ema_vars(model).items() for model in all_models]))
        #ema_names = all_models[0].train_model.ema.variables_to_restore()
        ema_loader = tf.train.Saver(var_list=ema_names,
                                    max_to_keep=1,
                                    name='ema_loader')
        ema_saver = tf.train.Saver(max_to_keep=1, name='ema_saver')
    else:
        ema_loader = None

    init = tf.global_variables_initializer()

    if forward_split and do_eval:
        eval_smape = trainer.metric(Stage.EVAL_FRWD, 'SMAPE')
        eval_mae = trainer.metric(Stage.EVAL_FRWD, 'MAE')
    else:
        eval_smape = DummyMetric()
        eval_mae = DummyMetric()

    if side_split and do_eval:
        eval_mae_side = trainer.metric(Stage.EVAL_SIDE, 'MAE')
        eval_smape_side = trainer.metric(Stage.EVAL_SIDE, 'SMAPE')
    else:
        eval_mae_side = DummyMetric()
        eval_smape_side = DummyMetric()

    train_smape = trainer.metric(Stage.TRAIN, 'SMAPE')
    train_mae = trainer.metric(Stage.TRAIN, 'MAE')
    grad_norm = trainer.metric(Stage.TRAIN, 'GrNorm')
    eval_stages = []
    ema_eval_stages = []
    if forward_split and do_eval:
        eval_stages.append(Stage.EVAL_FRWD)
        ema_eval_stages.append(Stage.EVAL_FRWD_EMA)
    if side_split and do_eval:
        eval_stages.append(Stage.EVAL_SIDE)
        ema_eval_stages.append(Stage.EVAL_SIDE_EMA)

    # gpu_options=tf.GPUOptions(allow_growth=False),
    with tf.Session(
            config=tf.ConfigProto(allow_soft_placement=True,
                                  gpu_options=tf.GPUOptions(
                                      allow_growth=gpu_allow_growth))) as sess:
        sess.run(init)
        # pipe.load_vars(sess)
        inp.restore(sess)
        for model in all_models:
            model.init(sess)
        # if beholder:
        #    visualizer = Beholder(session=sess, logdir=summ_path)
        step = 0
        prev_top = np.inf
        best_smape = np.inf
        # Contains best value (first item) and subsequent values
        best_epoch_smape = []

        for epoch in range(max_epoch):

            # n_steps = pusher.n_pages // batch_size
            if tqdm:
                #tqr = trange(steps_per_epoch, desc="%2d" % (epoch + 1), leave=False)
                tqr = trange(steps_per_epoch,
                             desc="%2d" % (epoch + 1),
                             leave=False,
                             file=logging.root.handlers[0].stream)

            else:
                tqr = range(steps_per_epoch)

            for _ in tqr:
                try:
                    step = trainer.train_step(sess, epoch)

                    pred, time_y, true_y, true_x, time_x, page_ix, norm_mean, norm_std, lagged_ix = sess.run(
                        [
                            trainer.trainers[0].train_model.predictions,
                            trainer.trainers[0].train_model.inp.time_y,
                            trainer.trainers[0].train_model.inp.true_y,
                            trainer.trainers[0].train_model.inp.true_x,
                            trainer.trainers[0].train_model.inp.time_x,
                            trainer.trainers[0].train_model.inp.page_ix,
                            trainer.trainers[0].train_model.inp.norm_mean,
                            trainer.trainers[0].train_model.inp.norm_std,
                            trainer.trainers[0].train_model.inp.lagged_x
                        ])
                    #sess.run(trainer.trainers[0].train_model.inp.inp.hits)
                    #inp = all_models[0].train_model.inp.inp,

                    pred_exp = np.round(np.expm1(pred))

                    true_exp = np.expm1(true_y)

                    error_exp = np.mean(
                        np.abs(true_exp - pred_exp) / (true_exp))
                    error = np.mean(np.abs(true_y - pred) / (true_y))
                    # page_ix = sess.run([trainer.trainers[0].train_model.inp.page_ix])[0][0]
                    # true_x = sess.run([trainer.trainers[0].train_model.inp.true_x])[0][0]
                    last_error = error_exp
                    epsilon = 0.1  # Smoothing factor, helps SMAPE to be well-behaved near zero
                    true_o = np.expm1(true_y)
                    pred_o = np.expm1(pred)
                    summ = np.maximum(np.abs(true_o) + epsilon, 0.5 + epsilon)
                    smape = np.mean(np.abs(pred_o - true_o) / summ)

                except tf.errors.OutOfRangeError:
                    break
                    # if beholder:
                    #  if step % 5 == 0:
                    # noinspection PyUnboundLocalVariable
                    #  visualizer.update()
                if step % eval_every_step == 0:
                    if eval_stages:
                        trainer.eval_step(sess,
                                          epoch,
                                          step,
                                          eval_batches,
                                          stages=eval_stages)

                    if save_best_model and epoch > 0 and eval_smape.last < best_smape:
                        best_smape = eval_smape.last
                        saver.save(sess,
                                   f'data/cpt/{name}/cpt',
                                   global_step=step)
                    if save_from_step and step >= save_from_step:
                        saver.save(sess,
                                   f'data/cpt/{name}/cpt',
                                   global_step=step)

                    if avg_sgd and ema_eval_stages:
                        ema_saver.save(sess,
                                       'data/cpt_tmp/ema',
                                       write_meta_graph=False)
                        # restore ema-backed vars
                        ema_loader.restore(sess, 'data/cpt_tmp/ema')

                        trainer.eval_step(sess,
                                          epoch,
                                          step,
                                          eval_batches,
                                          stages=ema_eval_stages)
                        # restore normal vars
                        ema_saver.restore(sess, 'data/cpt_tmp/ema')

                MAE = "%.3f/%.3f/%.3f" % (eval_mae.last, eval_mae_side.last,
                                          train_mae.last)
                improvement = '↑' if eval_smape.improved else ' '
                SMAPE = "%s%.3f/%.3f/%.3f" % (improvement, eval_smape.last,
                                              eval_smape_side.last,
                                              train_smape.last)
                if tqdm:
                    tqr.set_postfix(gr=grad_norm.last, MAE=MAE, SMAPE=SMAPE)
                if not trainer.has_active() or (max_steps
                                                and step > max_steps):
                    break

            if tqdm:
                tqr.close()
            trainer.end_epoch()
            if not best_epoch_smape or eval_smape.avg_epoch < best_epoch_smape[
                    0]:
                best_epoch_smape = [eval_smape.avg_epoch]
            else:
                best_epoch_smape.append(eval_smape.avg_epoch)

            current_top = eval_smape.top
            if prev_top > current_top:
                prev_top = current_top
                has_best_indicator = '↑'
            else:
                has_best_indicator = ' '
            status = "%2d: Best top SMAPE=%.3f%s (%s)" % (
                epoch + 1, current_top, has_best_indicator, ",".join(
                    ["%.3f" % m.top for m in eval_smape.metrics]))

            if trainer.has_active():
                status += ", frwd/side best MAE=%.3f/%.3f, SMAPE=%.3f/%.3f; avg MAE=%.3f/%.3f, SMAPE=%.3f/%.3f, %d am ,Error=%3f " % \
                          (eval_mae.best_epoch, eval_mae_side.best_epoch, eval_smape.best_epoch, eval_smape_side.best_epoch,
                           eval_mae.avg_epoch,  eval_mae_side.avg_epoch,  eval_smape.avg_epoch,  eval_smape_side.avg_epoch,
                           trainer.has_active(), last_error)
                log.info(status)
            else:
                log.info(status)
                log.info("Early stopping!")
                break
            if max_steps and step > max_steps:
                log.info("Max steps calculated")
                break
            sys.stderr.flush()

        # noinspection PyUnboundLocalVariable
        return np.mean(best_epoch_smape, dtype=np.float64)
def train():
    with tf.Graph().as_default(), tf.device('/cpu:0'):
        tf.gfile.Copy(FLAGS.input_previous_model_path + "/" +
                      FLAGS.tree_index_file,
                      FLAGS.output_model_path + "/" + FLAGS.tree_index_file,
                      overwrite=True)
        global_step = tf.train.get_or_create_global_step()
        inc_step = tf.assign_add(global_step, 1)
        #Training setting
        train_input_pipe = InputPipe([
            FLAGS.input_training_data_path + "/" + i
            for i in tf.gfile.ListDirectory(FLAGS.input_training_data_path)
        ], FLAGS.batch_size, FLAGS.num_epochs, 5, "", True)
        #auc_eval_pipe = InputPipe(FLAGS.input_validation_data_path + "/label_data.txt", FLAGS.eval_batch_size,1,3,"0,1",False) if FLAGS.auc_evaluation else None
        auc_eval_pipe = InputPipe(FLAGS.input_validation_data_path,
                                  FLAGS.eval_batch_size, 1, 3, "",
                                  True) if FLAGS.auc_evaluation else None
        #bleu_eval_pipe = InputPipe(FLAGS.input_validation_data_path + "/bleu_data.txt", FLAGS.eval_batch_size,1,2,"0",False) if FLAGS.bleu_evaluation else None
        model = TreeModel()
        trainer = SingleboxTrainer(model, inc_step, train_input_pipe,
                                   auc_eval_pipe, None)
        summary_op = tf.summary.merge_all()

        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        config.allow_soft_placement = True
        saver = tf.train.Saver(max_to_keep=FLAGS.max_model_to_keep,
                               name='model_saver')
        with tf.Session(config=config) as session:
            summ_writer = tf.summary.FileWriter(FLAGS.log_dir, session.graph)
            #Load Pretrain
            session.run(tf.local_variables_initializer())
            session.run(tf.global_variables_initializer())
            session.run(tf.tables_initializer())
            session.run(train_input_pipe.iterator.initializer)
            ckpt = tf.train.get_checkpoint_state(
                FLAGS.input_previous_model_path)
            if ckpt and ckpt.model_checkpoint_path:
                saver.restore(session, ckpt.model_checkpoint_path)
                print("Load Model From ", ckpt.model_checkpoint_path)
            else:
                print("No Initial Model Found.")
            trainer.start_time = time.time()
            while True:
                try:
                    _, avg_loss, total_weight, step, summary = session.run(
                        trainer.train_ops() + [summary_op])
                    #print(step)
                    if step % FLAGS.log_frequency == 1:
                        summ_writer.add_summary(summary, step)
                        trainer.print_log(total_weight, step, avg_loss)
                    if step % FLAGS.checkpoint_frequency == 1:
                        if FLAGS.auc_evaluation:
                            trainer.eval(step, session, 'auc')
                        if FLAGS.bleu_evaluation:
                            trainer.eval(step, session, 'bleu')
                        if trainer.improved():
                            saver.save(session,
                                       FLAGS.output_model_path + "/tree_model",
                                       global_step=step)
                        elif trainer.early_stop():
                            print("\nEarly stop")
                            break
                except tf.errors.OutOfRangeError:
                    print("End of training.")
                    break
            if not trainer.early_stop():
                saver.save(session,
                           FLAGS.output_model_path + "/" + "tree_model_final",
                           global_step=step)
Example #40
0
    h0 = sample_prob(h0_prob)
    h1 = h0

    for step in range(gibbs_sampling_steps):
        v1_prob = tf.nn.sigmoid(
            tf.matmul(h1, tf.transpose(w1)) + tf.transpose(vb1))
        v1 = sample_prob(v1_prob)
        h1_prob = tf.nn.sigmoid(tf.matmul(v1, w1) + tf.transpose(hb1))
        h1 = sample_prob(h1_prob)

    w1_positive_grad = tf.matmul(tf.transpose(X1), h0_prob)
    w1_negative_grad = tf.matmul(tf.transpose(v1_prob), h1_prob)

    dw1 = (w1_positive_grad - w1_negative_grad) / tf.to_float(tf.shape(X1)[0])

    update_w1 = tf.assign_add(w1, alpha * dw1)
    update_vb1 = tf.assign_add(vb1, alpha * tf.reduce_mean(X1 - v1, 0))
    update_hb1 = tf.assign_add(hb1, alpha * tf.reduce_mean(h0 - h1, 0))

    out1 = (update_w1, update_vb1, update_hb1)

    v1_prob = tf.nn.sigmoid(
        tf.matmul(h1, tf.transpose(w1)) + tf.transpose(vb1))
    v1 = sample_prob(v1_prob)

    err1 = X1 - v1_prob
    err_sum1 = tf.reduce_mean(err1 * err1)

    initialize1 = tf.global_variables_initializer()

batch_size = 100
Example #41
0
    def custom_loss(self, y_true, y_pred):
        mask_shape = tf.shape(y_true)[:4]
        
        cell_x = tf.to_float(tf.reshape(tf.tile(tf.range(self.grid_w), [self.grid_h]), (1, self.grid_h, self.grid_w, 1, 1)))
        cell_y = tf.transpose(cell_x, (0,2,1,3,4))

        cell_grid = tf.tile(tf.concat([cell_x,cell_y], -1), [self.batch_size, 1, 1, self.nb_box, 1])
        
        coord_mask = tf.zeros(mask_shape)
        conf_mask  = tf.zeros(mask_shape)
        class_mask = tf.zeros(mask_shape)
        
        seen = tf.Variable(0.)
        total_recall = tf.Variable(0.)
        
        """
        Adjust prediction
        """
        ### adjust x and y      
        pred_box_xy = tf.sigmoid(y_pred[..., :2]) + cell_grid
        
        ### adjust w and h
        pred_box_wh = tf.exp(y_pred[..., 2:4]) * np.reshape(self.anchors, [1,1,1,self.nb_box,2])
        
        ### adjust confidence
        pred_box_conf = tf.sigmoid(y_pred[..., 4])
        
        ### adjust class probabilities
        pred_box_class = y_pred[..., 5:]
        
        """
        Adjust ground truth
        """
        ### adjust x and y
        true_box_xy = y_true[..., 0:2] # relative position to the containing cell
        
        ### adjust w and h
        true_box_wh = y_true[..., 2:4] # number of cells accross, horizontally and vertically
        
        ### adjust confidence
        true_wh_half = true_box_wh / 2.
        true_mins    = true_box_xy - true_wh_half
        true_maxes   = true_box_xy + true_wh_half
        
        pred_wh_half = pred_box_wh / 2.
        pred_mins    = pred_box_xy - pred_wh_half
        pred_maxes   = pred_box_xy + pred_wh_half       
        
        intersect_mins  = tf.maximum(pred_mins,  true_mins)
        intersect_maxes = tf.minimum(pred_maxes, true_maxes)
        intersect_wh    = tf.maximum(intersect_maxes - intersect_mins, 0.)
        intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1]
        
        true_areas = true_box_wh[..., 0] * true_box_wh[..., 1]
        pred_areas = pred_box_wh[..., 0] * pred_box_wh[..., 1]

        union_areas = pred_areas + true_areas - intersect_areas
        iou_scores  = tf.truediv(intersect_areas, union_areas)
        
        true_box_conf = iou_scores * y_true[..., 4]
        
        ### adjust class probabilities
        true_box_class = tf.argmax(y_true[..., 5:], -1)
        
        """
        Determine the masks
        """
        ### coordinate mask: simply the position of the ground truth boxes (the predictors)
        coord_mask = tf.expand_dims(y_true[..., 4], axis=-1) * self.coord_scale
        
        ### confidence mask: penelize predictors + penalize boxes with low IOU
        # penalize the confidence of the boxes, which have IOU with some ground truth box < 0.6
        true_xy = self.true_boxes[..., 0:2]
        true_wh = self.true_boxes[..., 2:4]
        
        true_wh_half = true_wh / 2.
        true_mins    = true_xy - true_wh_half
        true_maxes   = true_xy + true_wh_half
        
        pred_xy = tf.expand_dims(pred_box_xy, 4)
        pred_wh = tf.expand_dims(pred_box_wh, 4)
        
        pred_wh_half = pred_wh / 2.
        pred_mins    = pred_xy - pred_wh_half
        pred_maxes   = pred_xy + pred_wh_half    
        
        intersect_mins  = tf.maximum(pred_mins,  true_mins)
        intersect_maxes = tf.minimum(pred_maxes, true_maxes)
        intersect_wh    = tf.maximum(intersect_maxes - intersect_mins, 0.)
        intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1]
        
        true_areas = true_wh[..., 0] * true_wh[..., 1]
        pred_areas = pred_wh[..., 0] * pred_wh[..., 1]

        union_areas = pred_areas + true_areas - intersect_areas
        iou_scores  = tf.truediv(intersect_areas, union_areas)

        best_ious = tf.reduce_max(iou_scores, axis=4)
        conf_mask = conf_mask + tf.to_float(best_ious < 0.6) * (1 - y_true[..., 4]) * self.no_object_scale
        
        # penalize the confidence of the boxes, which are reponsible for corresponding ground truth box
        conf_mask = conf_mask + y_true[..., 4] * self.object_scale
        
        ### class mask: simply the position of the ground truth boxes (the predictors)
        class_mask = y_true[..., 4] * tf.gather(self.class_wt, true_box_class) * self.class_scale       
        
        """
        Warm-up training
        """
        no_boxes_mask = tf.to_float(coord_mask < self.coord_scale/2.)
        seen = tf.assign_add(seen, 1.)
        
        true_box_xy, true_box_wh, coord_mask = tf.cond(tf.less(seen, self.warmup_batches+1), 
                              lambda: [true_box_xy + (0.5 + cell_grid) * no_boxes_mask, 
                                       true_box_wh + tf.ones_like(true_box_wh) * \
                                       np.reshape(self.anchors, [1,1,1,self.nb_box,2]) * \
                                       no_boxes_mask, 
                                       tf.ones_like(coord_mask)],
                              lambda: [true_box_xy, 
                                       true_box_wh,
                                       coord_mask])
        
        """
        Finalize the loss
        """
        nb_coord_box = tf.reduce_sum(tf.to_float(coord_mask > 0.0))
        nb_conf_box  = tf.reduce_sum(tf.to_float(conf_mask  > 0.0))
        nb_class_box = tf.reduce_sum(tf.to_float(class_mask > 0.0))
        
        loss_xy    = tf.reduce_sum(tf.square(true_box_xy-pred_box_xy)     * coord_mask) / (nb_coord_box + 1e-6) / 2.
        loss_wh    = tf.reduce_sum(tf.square(true_box_wh-pred_box_wh)     * coord_mask) / (nb_coord_box + 1e-6) / 2.
        loss_conf  = tf.reduce_sum(tf.square(true_box_conf-pred_box_conf) * conf_mask)  / (nb_conf_box  + 1e-6) / 2.
        loss_class = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=true_box_class, logits=pred_box_class)
        loss_class = tf.reduce_sum(loss_class * class_mask) / (nb_class_box + 1e-6)
        
        loss = tf.cond(tf.less(seen, self.warmup_batches+1),
                      lambda: loss_xy + loss_wh + loss_conf + loss_class + 10,
                      lambda: loss_xy + loss_wh + loss_conf + loss_class)
        
        if self.debug:
            nb_true_box = tf.reduce_sum(y_true[..., 4])
            nb_pred_box = tf.reduce_sum(tf.to_float(true_box_conf > 0.5) * tf.to_float(pred_box_conf > 0.3))
            
            current_recall = nb_pred_box/(nb_true_box + 1e-6)
            total_recall = tf.assign_add(total_recall, current_recall) 

            loss = tf.Print(loss, [loss_xy], message='Loss XY \t', summarize=1000)
            loss = tf.Print(loss, [loss_wh], message='Loss WH \t', summarize=1000)
            loss = tf.Print(loss, [loss_conf], message='Loss Conf \t', summarize=1000)
            loss = tf.Print(loss, [loss_class], message='Loss Class \t', summarize=1000)
            loss = tf.Print(loss, [loss], message='Total Loss \t', summarize=1000)
            loss = tf.Print(loss, [current_recall], message='Current Recall \t', summarize=1000)
            loss = tf.Print(loss, [total_recall/seen], message='Average Recall \t', summarize=1000)
        
        return loss
def batch_norm_log_diff(input_,
                        dim,
                        name,
                        train=True,
                        epsilon=1e-8,
                        decay=.1,
                        axes=[0],
                        reuse=None,
                        bn_lag=DEFAULT_BN_LAG):
    """Batch normalization with corresponding log determinant Jacobian."""
    if reuse is None:
        reuse = not train
    # create variables
    with tf.variable_scope(name) as scope:
        if reuse:
            scope.reuse_variables()
        var = variable_on_cpu("var", [dim],
                              tf.constant_initializer(1.),
                              trainable=False)
        mean = variable_on_cpu("mean", [dim],
                               tf.constant_initializer(0.),
                               trainable=False)
        step = variable_on_cpu("step", [],
                               tf.constant_initializer(0.),
                               trainable=False)
    # choose the appropriate moments
    if train:
        used_mean, used_var = tf.nn.moments(input_, axes, name="batch_norm")
        cur_mean, cur_var = used_mean, used_var
        if bn_lag > 0.:
            used_var = stable_var(input_=input_, mean=used_mean, axes=axes)
            cur_var = used_var
            used_mean -= (1 - bn_lag) * (used_mean - tf.stop_gradient(mean))
            used_mean /= (1. - bn_lag**(step + 1))
            used_var -= (1 - bn_lag) * (used_var - tf.stop_gradient(var))
            used_var /= (1. - bn_lag**(step + 1))
    else:
        used_mean, used_var = mean, var
        cur_mean, cur_var = used_mean, used_var

    # update variables
    if train:
        with tf.name_scope(name, "AssignMovingAvg", [mean, cur_mean, decay]):
            with ops.colocate_with(mean):
                new_mean = tf.assign_sub(
                    mean,
                    tf.check_numerics(decay * (mean - cur_mean),
                                      "NaN in moving mean."))
        with tf.name_scope(name, "AssignMovingAvg", [var, cur_var, decay]):
            with ops.colocate_with(var):
                new_var = tf.assign_sub(
                    var,
                    tf.check_numerics(decay * (var - cur_var),
                                      "NaN in moving variance."))
        with tf.name_scope(name, "IncrementTime", [step]):
            with ops.colocate_with(step):
                new_step = tf.assign_add(step, 1.)
        used_var += 0. * new_mean * new_var * new_step
    used_var += epsilon

    return used_mean, used_var
Example #43
0
N = settings["NumOptions"]
offset = 0
v_select = []
for sample in range(N):
    v_select.append(v_g[:, sample + offset])
    if sample + offset >= dim:
        continue
    if np.iscomplex(w_g[sample + offset]):
        offset += 1

from networks.network import Network

#Creating High level policy
with tf.device(args.processor):
    global_step = tf.Variable(0, trainable=False, name='global_step')
    global_step_next = tf.assign_add(global_step, 1)
    network = Network(settings["NetworkConfig"], N, netConfigOverride)
    Method = GetFunction(settings["Method"])
    net = Method(network,
                 sess,
                 scope="net",
                 stateShape=dFeatures,
                 actionSize=N,
                 HPs=settings["NetworkHPs"],
                 nTrajs=nTrajs)

#Creating Auxilary Functions for logging and saving.
writer = tf.summary.FileWriter(LOG_PATH, graph=sess.graph)
saver = tf.train.Saver(max_to_keep=3, var_list=net.getVars + [global_step])
net.InitializeVariablesFromFile(saver, MODEL_PATH_)
InitializeVariables(
Example #44
0
batch_size = 50
batch_label = np.ones([batch_size, n, 1])

x = tf.placeholder(tf.float32, [None, n_his + 1, n, 1])
keep_prob = tf.placeholder(tf.float32)
is_training = tf.placeholder(tf.bool)

train_loss = STGCN(x, n, n_his, Ks, Kt, keep_prob, is_training)
copy_loss = tf.add_n(tf.get_collection('copy_loss'))

global_step = tf.Variable(0, trainable=False)
n_sample = np.shape(xtr)[0]
n_batch = int(n_sample / float(batch_size))
lr = tf.train.exponential_decay(
    1e-2, global_step, decay_steps=5 * n_batch, decay_rate=0.7, staircase=True)
step_op = tf.assign_add(global_step, 1)

with tf.control_dependencies([step_op]):
    train_op = tf.train.RMSPropOptimizer(lr).minimize(train_loss)

saver = tf.train.Saver()
sess = tf.Session()
sess.run(tf.global_variables_initializer())

min_va_mape9 = min_va_mape6 = min_va_mape3 = 0.4
min_va_mse9 = min_va_mse6 = min_va_mse3 = 1e5
min_va_mae9 = min_va_mae6 = min_va_mae3 = 1e5
min_mape9 = min_mape6 = min_mape3 = 0.4
min_mse9 = min_mse6 = min_mse3 = 1e5
min_mae9 = min_mae6 = min_mae3 = 1e5
flag3 = flag6 = flag9 = False
def batch_norm(input_,
               dim,
               name,
               scale=True,
               train=True,
               epsilon=1e-8,
               decay=.1,
               axes=[0],
               bn_lag=DEFAULT_BN_LAG):
    """Batch normalization."""
    # create variables
    with tf.variable_scope(name):
        var = variable_on_cpu("var", [dim],
                              tf.constant_initializer(1.),
                              trainable=False)
        mean = variable_on_cpu("mean", [dim],
                               tf.constant_initializer(0.),
                               trainable=False)
        step = variable_on_cpu("step", [],
                               tf.constant_initializer(0.),
                               trainable=False)
        if scale:
            gamma = variable_on_cpu("gamma", [dim],
                                    tf.constant_initializer(1.))
        beta = variable_on_cpu("beta", [dim], tf.constant_initializer(0.))
    # choose the appropriate moments
    if train:
        used_mean, used_var = tf.nn.moments(input_, axes, name="batch_norm")
        cur_mean, cur_var = used_mean, used_var
        if bn_lag > 0.:
            used_mean -= (1. - bn_lag) * (used_mean - tf.stop_gradient(mean))
            used_var -= (1 - bn_lag) * (used_var - tf.stop_gradient(var))
            used_mean /= (1. - bn_lag**(step + 1))
            used_var /= (1. - bn_lag**(step + 1))
    else:
        used_mean, used_var = mean, var
        cur_mean, cur_var = used_mean, used_var

    # normalize
    res = (input_ - used_mean) / tf.sqrt(used_var + epsilon)
    # de-normalize
    if scale:
        res *= gamma
    res += beta

    # update variables
    if train:
        with tf.name_scope(name, "AssignMovingAvg", [mean, cur_mean, decay]):
            with ops.colocate_with(mean):
                new_mean = tf.assign_sub(
                    mean,
                    tf.check_numerics(decay * (mean - cur_mean),
                                      "NaN in moving mean."))
        with tf.name_scope(name, "AssignMovingAvg", [var, cur_var, decay]):
            with ops.colocate_with(var):
                new_var = tf.assign_sub(
                    var,
                    tf.check_numerics(decay * (var - cur_var),
                                      "NaN in moving variance."))
        with tf.name_scope(name, "IncrementTime", [step]):
            with ops.colocate_with(step):
                new_step = tf.assign_add(step, 1.)
        res += 0. * new_mean * new_var * new_step

    return res
Example #46
0
#try finally
import tensorflow as tf

array_to_outof = tf.constant([1, 2, 3, 4, 5, 6, 7, 8])

counter = tf.Variable(0)
x = tf.Variable(1.)
w = tf.Variable(2.)

op = tf.multiply(w, x)
count_op = tf.assign_add(counter, 1)

coord = tf.train.Coordinator()
sess = tf.Session()
with sess.as_default():
    sess.run(tf.global_variables_initializer())
    """Start Training"""
    threads = tf.train.start_queue_runners(coord=coord)
    try:
        while not coord.should_stop():
            print('in while-loop:')
            op_result_ = sess.run([op])
            print(op_result_)

            counter_ = sess.run(count_op)
            print('counter_:', counter_)
            array_to_outof[counter_]

    # except tf.errors.OutOfRangeError:
    #     print('except')
    except ValueError:
Example #47
0
    def __init__(self):
        self.anchor_per_scale = cfg.YOLO.ANCHOR_PER_SCALE
        self.classes = utils.read_class_names(cfg.YOLO.CLASSES)
        self.num_classes = len(self.classes)
        self.learn_rate_init = cfg.TRAIN.LEARN_RATE_INIT
        self.learn_rate_end = cfg.TRAIN.LEARN_RATE_END
        self.first_stage_epochs = cfg.TRAIN.FISRT_STAGE_EPOCHS
        self.second_stage_epochs = cfg.TRAIN.SECOND_STAGE_EPOCHS
        self.warmup_periods = cfg.TRAIN.WARMUP_EPOCHS
        self.initial_weight = cfg.TRAIN.INITIAL_WEIGHT
        self.time = time.strftime('%Y-%m-%d-%H-%M-%S',
                                  time.localtime(time.time()))
        self.moving_ave_decay = cfg.YOLO.MOVING_AVE_DECAY
        self.max_bbox_per_scale = 150
        self.train_logdir = "./data/log/train"
        self.trainset = Dataset('train')
        self.testset = Dataset('test')
        self.steps_per_period = len(self.trainset)
        config = tf.ConfigProto()
        config.allow_soft_placement = True
        config.gpu_options.allow_growth = True
        self.sess = tf.Session(config=config)

        with tf.name_scope('define_input'):
            self.input_data = tf.placeholder(dtype=tf.float32,
                                             name='input_data')
            self.label_sbbox = tf.placeholder(dtype=tf.float32,
                                              name='label_sbbox')
            self.label_mbbox = tf.placeholder(dtype=tf.float32,
                                              name='label_mbbox')
            self.label_lbbox = tf.placeholder(dtype=tf.float32,
                                              name='label_lbbox')
            self.true_sbboxes = tf.placeholder(dtype=tf.float32,
                                               name='sbboxes')
            self.true_mbboxes = tf.placeholder(dtype=tf.float32,
                                               name='mbboxes')
            self.true_lbboxes = tf.placeholder(dtype=tf.float32,
                                               name='lbboxes')
            self.trainable = tf.placeholder(dtype=tf.bool, name='training')

        with tf.name_scope("define_loss"):
            self.model = YOLOV3(self.input_data, self.trainable)
            self.net_var = tf.global_variables()
            self.giou_loss, self.conf_loss, self.prob_loss = self.model.compute_loss(
                self.label_sbbox, self.label_mbbox, self.label_lbbox,
                self.true_sbboxes, self.true_mbboxes, self.true_lbboxes)
            self.loss = self.giou_loss + self.conf_loss + self.prob_loss

        with tf.name_scope('learn_rate'):
            self.global_step = tf.Variable(1.0,
                                           dtype=tf.float64,
                                           trainable=False,
                                           name='global_step')
            warmup_steps = tf.constant(self.warmup_periods *
                                       self.steps_per_period,
                                       dtype=tf.float64,
                                       name='warmup_steps')
            train_steps = tf.constant(
                (self.first_stage_epochs + self.second_stage_epochs) *
                self.steps_per_period,
                dtype=tf.float64,
                name='train_steps')
            self.learn_rate = tf.cond(
                pred=self.global_step < warmup_steps,
                true_fn=lambda: self.global_step / warmup_steps * self.
                learn_rate_init,
                false_fn=lambda: self.learn_rate_end + 0.5 *
                (self.learn_rate_init - self.learn_rate_end) * (1 + tf.cos(
                    (self.global_step - warmup_steps) /
                    (train_steps - warmup_steps) * np.pi)))
            global_step_update = tf.assign_add(self.global_step, 1.0)

        with tf.name_scope("define_weight_decay"):
            moving_ave = tf.train.ExponentialMovingAverage(
                self.moving_ave_decay).apply(tf.trainable_variables())

        with tf.name_scope("define_first_stage_train"):
            self.first_stage_trainable_var_list = []
            for var in tf.trainable_variables():
                var_name = var.op.name
                var_name_mess = str(var_name).split('/')
                if var_name_mess[0] in [
                        'conv_sbbox', 'conv_mbbox', 'conv_lbbox'
                ]:
                    self.first_stage_trainable_var_list.append(var)

            first_stage_optimizer = tf.train.AdamOptimizer(
                self.learn_rate).minimize(
                    self.loss, var_list=self.first_stage_trainable_var_list)
            with tf.control_dependencies(
                    tf.get_collection(tf.GraphKeys.UPDATE_OPS)):
                with tf.control_dependencies(
                    [first_stage_optimizer, global_step_update]):
                    with tf.control_dependencies([moving_ave]):
                        self.train_op_with_frozen_variables = tf.no_op()

        with tf.name_scope("define_second_stage_train"):
            second_stage_trainable_var_list = tf.trainable_variables()
            second_stage_optimizer = tf.train.AdamOptimizer(
                self.learn_rate).minimize(
                    self.loss, var_list=second_stage_trainable_var_list)

            with tf.control_dependencies(
                    tf.get_collection(tf.GraphKeys.UPDATE_OPS)):
                with tf.control_dependencies(
                    [second_stage_optimizer, global_step_update]):
                    with tf.control_dependencies([moving_ave]):
                        self.train_op_with_all_variables = tf.no_op()

        with tf.name_scope('loader_and_saver'):
            self.loader = tf.train.Saver(self.net_var)
            self.saver = tf.train.Saver(tf.global_variables(), max_to_keep=10)

        with tf.name_scope('summary'):
            tf.summary.scalar("learn_rate", self.learn_rate)
            tf.summary.scalar("giou_loss", self.giou_loss)
            tf.summary.scalar("conf_loss", self.conf_loss)
            tf.summary.scalar("prob_loss", self.prob_loss)
            tf.summary.scalar("total_loss", self.loss)

            logdir = "./data/log/"
            if os.path.exists(logdir): shutil.rmtree(logdir)
            os.mkdir(logdir)
            self.write_op = tf.summary.merge_all()
            self.summary_writer = tf.summary.FileWriter(logdir,
                                                        graph=self.sess.graph)
Example #48
0
    def call(self, x):
        input_image, y_pred, y_true, true_boxes = x

        # adjust the shape of the y_predict [batch, grid_h, grid_w, 3, 4+1+nb_class]
        y_pred = tf.reshape(
            y_pred,
            tf.concat([tf.shape(y_pred)[:3],
                       tf.constant([3, -1])], axis=0))

        # initialize the masks
        object_mask = tf.expand_dims(y_true[..., 4], 4)

        # the variable to keep track of number of batches processed
        batch_seen = tf.Variable(0.)

        # compute grid factor and net factor
        grid_h = tf.shape(y_true)[1]
        grid_w = tf.shape(y_true)[2]
        grid_factor = tf.reshape(tf.cast([grid_w, grid_h], tf.float32),
                                 [1, 1, 1, 1, 2])

        net_h = tf.shape(input_image)[1]
        net_w = tf.shape(input_image)[2]
        net_factor = tf.reshape(tf.cast([net_w, net_h], tf.float32),
                                [1, 1, 1, 1, 2])
        """
        Adjust prediction
        """
        pred_box_xy = (self.cell_grid[:, :grid_h, :grid_w, :, :] +
                       tf.sigmoid(y_pred[..., :2]))  # sigma(t_xy) + c_xy
        pred_box_wh = y_pred[..., 2:4]  # t_wh
        pred_box_conf = tf.expand_dims(tf.sigmoid(y_pred[..., 4]),
                                       4)  # adjust confidence
        pred_box_class = tf.sigmoid(y_pred[...,
                                           5:])  # adjust class probabilities
        """
        Adjust ground truth
        """
        true_box_xy = y_true[..., 0:2]  # (sigma(t_xy) + c_xy)
        true_box_wh = y_true[..., 2:4]  # t_wh
        true_box_conf = tf.expand_dims(y_true[..., 4], 4)
        true_box_class = y_true[..., 5:]
        """
        Compare each predicted box to all true boxes
        """
        # initially, drag all objectness of all boxes to 0
        conf_delta = pred_box_conf - 0

        # then, ignore the boxes which have good overlap with some true box
        true_xy = true_boxes[..., 0:2] / grid_factor
        true_wh = true_boxes[..., 2:4] / net_factor

        true_wh_half = true_wh / 2.
        true_mins = true_xy - true_wh_half
        true_maxes = true_xy + true_wh_half

        pred_xy = tf.expand_dims(pred_box_xy / grid_factor, 4)
        pred_wh = tf.expand_dims(
            tf.exp(pred_box_wh) * self.anchors / net_factor, 4)

        pred_wh_half = pred_wh / 2.
        pred_mins = pred_xy - pred_wh_half
        pred_maxes = pred_xy + pred_wh_half

        intersect_mins = tf.maximum(pred_mins, true_mins)
        intersect_maxes = tf.minimum(pred_maxes, true_maxes)

        intersect_wh = tf.maximum(intersect_maxes - intersect_mins, 0.)
        intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1]

        true_areas = true_wh[..., 0] * true_wh[..., 1]
        pred_areas = pred_wh[..., 0] * pred_wh[..., 1]

        union_areas = pred_areas + true_areas - intersect_areas
        iou_scores = tf.truediv(intersect_areas, union_areas)

        best_ious = tf.reduce_max(iou_scores, axis=4)
        conf_delta *= tf.expand_dims(
            tf.to_float(best_ious < self.ignore_thresh), 4)
        """
        Compute some online statistics
        """
        true_xy = true_box_xy / grid_factor
        true_wh = tf.exp(true_box_wh) * self.anchors / net_factor

        true_wh_half = true_wh / 2.
        true_mins = true_xy - true_wh_half
        true_maxes = true_xy + true_wh_half

        pred_xy = pred_box_xy / grid_factor
        pred_wh = tf.exp(pred_box_wh) * self.anchors / net_factor

        pred_wh_half = pred_wh / 2.
        pred_mins = pred_xy - pred_wh_half
        pred_maxes = pred_xy + pred_wh_half

        intersect_mins = tf.maximum(pred_mins, true_mins)
        intersect_maxes = tf.minimum(pred_maxes, true_maxes)
        intersect_wh = tf.maximum(intersect_maxes - intersect_mins, 0.)
        intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1]

        true_areas = true_wh[..., 0] * true_wh[..., 1]
        pred_areas = pred_wh[..., 0] * pred_wh[..., 1]

        union_areas = pred_areas + true_areas - intersect_areas
        iou_scores = tf.truediv(intersect_areas, union_areas)
        iou_scores = object_mask * tf.expand_dims(iou_scores, 4)

        count = tf.reduce_sum(object_mask)
        count_noobj = tf.reduce_sum(1 - object_mask)
        detect_mask = tf.to_float(pred_box_conf * object_mask >= 0.5)
        class_mask = tf.expand_dims(
            tf.to_float(
                tf.equal(tf.argmax(pred_box_class, -1),
                         tf.argmax(true_box_class, -1))), 4)
        recall50 = tf.to_float(iou_scores >= 0.5) * detect_mask
        recall75 = tf.to_float(iou_scores >= 0.75) * detect_mask
        recall50_c = tf.reduce_sum(recall50 * class_mask) / (count + 1e-3)
        recall75_c = tf.reduce_sum(recall75 * class_mask) / (count + 1e-3)
        recall50 = tf.reduce_sum(recall50) / (count + 1e-3)
        recall75 = tf.reduce_sum(recall75) / (count + 1e-3)
        avg_iou = tf.reduce_sum(iou_scores) / (count + 1e-3)
        avg_obj = tf.reduce_sum(pred_box_conf * object_mask) / (count + 1e-3)
        avg_noobj = tf.reduce_sum(pred_box_conf *
                                  (1 - object_mask)) / (count_noobj + 1e-3)
        avg_cat = tf.reduce_sum(
            pred_box_class * true_box_class) / (count + 1e-3)
        """
        Warm-up training
        """
        batch_seen = tf.assign_add(batch_seen, 1.)

        true_box_xy, true_box_wh, xywh_mask = tf.cond(
            tf.less(batch_seen, self.warmup_batches + 1), lambda: [
                true_box_xy +
                (0.5 + self.cell_grid[:, :grid_h, :grid_w, :, :]) *
                (1 - object_mask), true_box_wh + tf.zeros_like(true_box_wh) *
                (1 - object_mask),
                tf.ones_like(object_mask)
            ], lambda: [true_box_xy, true_box_wh, object_mask])
        """
        Compare each true box to all anchor boxes
        """
        xywh_scale = tf.exp(true_box_wh) * self.anchors / net_factor
        xywh_scale = tf.expand_dims(
            2 - xywh_scale[..., 0] * xywh_scale[..., 1],
            axis=4)  # the smaller the box, the bigger the scale

        xy_delta = xywh_mask * (pred_box_xy - true_box_xy) * xywh_scale
        wh_delta = xywh_mask * (pred_box_wh - true_box_wh) * xywh_scale
        conf_delta = object_mask * (pred_box_conf - true_box_conf) * 5 + (
            1 - object_mask) * conf_delta
        class_delta = object_mask * (pred_box_class - true_box_class)

        loss = tf.reduce_sum(tf.square(xy_delta),       list(range(1,5))) + \
               tf.reduce_sum(tf.square(wh_delta),       list(range(1,5))) + \
               tf.reduce_sum(tf.square(conf_delta),     list(range(1,5))) + \
               tf.reduce_sum(tf.square(class_delta),    list(range(1,5)))

        loss = tf.cond(
            tf.less(batch_seen, self.warmup_batches +
                    1),  # add 10 to the loss if this is the warmup stage
            lambda: loss + 10,
            lambda: loss)

        # loss = tf.Print(loss, [grid_h, avg_obj], message='avg_obj \t\t', summarize=1000)
        # loss = tf.Print(loss, [grid_h, avg_noobj], message='avg_noobj \t\t', summarize=1000)
        # loss = tf.Print(loss, [grid_h, avg_iou], message='avg_iou \t\t', summarize=1000)
        # loss = tf.Print(loss, [grid_h, avg_cat], message='avg_cat \t\t', summarize=1000)
        # loss = tf.Print(loss, [grid_h, recall50], message='recall50 \t', summarize=1000)
        # loss = tf.Print(loss, [grid_h, recall75], message='recall75 \t', summarize=1000)
        # loss = tf.Print(loss, [grid_h, recall50_c], message='recall50_cat \t', summarize=1000)
        # loss = tf.Print(loss, [grid_h, recall75_c], message='recall75_Cat \t', summarize=1000)
        # loss = tf.Print(loss, [grid_h, count], message='count \t', summarize=1000)
        # loss = tf.Print(loss, [grid_h, tf.reduce_sum(loss)],  message='loss: \t',   summarize=1000)

        return loss * self.scale
Example #49
0
# add tensor to tensorboard
ns_image_tb = tf.summary.image(name='ns_image',
                               tensor=tf.reshape(ns, shape=[1, 520, 600, 1]))
ns_mean_tb = tf.summary.scalar(name='ns_mean',
                               tensor=tf.reduce_mean(tf.reduce_mean(ns)))
ns_hist_tb = tf.summary.histogram(name='ns_hist', values=ns)

zs_squre = tf.multiply(zs, zs)
zs_add = tf.add(zs_squre, xs)
zs_abs = tf.abs(zs_add)
zs_less = tf.math.less(zs_abs, 4)
zs_cast = tf.cast(zs_less, tf.float32)

#
step = tf.group(tf.assign(zs, zs_add), tf.assign_add(ns, zs_cast), name='step')

#
sess = tf.Session()
sess.run(tf.global_variables_initializer())
saver = tf.train.Saver()
tbs = tf.summary.merge_all()

writer = tf.summary.FileWriter(logdir='./tensorboard')
writer.add_graph(tf.get_default_graph())

for i in range(200):
    _, tbs_, cast_ = sess.run([step, tbs, zs_cast])
    writer.add_summary(tbs_, global_step=i)

saver.save(sess, save_path='./model/mandelbrot')
Example #50
0
        train_step = tf.train.AdagradOptimizer(
            learning_rate=opt.hyper.learning_rate / 10).minimize(
                total_loss, var_list=all_var)
    elif opt.optimizer == 4:
        train_step = tf.train.ProximalAdagradOptimizer(learning_rate=opt.hyper.learning_rate/10). \
            minimize(total_loss, var_list=all_var)
    elif opt.optimizer == 5:
        train_step = tf.train.RMSPropOptimizer(
            learning_rate=opt.hyper.learning_rate / 10).minimize(
                total_loss, var_list=all_var)
    elif opt.optimizer == 6:
        train_step = tf.train.FtrlOptimizer(
            learning_rate=opt.hyper.learning_rate / 10).minimize(
                total_loss, var_list=all_var)

    inc_global_step = tf.assign_add(global_step, 1, name='increment')

    raw_grads = tf.gradients(total_loss, all_var)
    grads = list(zip(raw_grads, tf.trainable_variables()))

    for g, v in grads:
        summary.gradient_summaries(g, v, opt)
    ################################################################################################

    ################################################################################################
    # Set up checkpoints and data
    ################################################################################################

    saver = tf.compat.v1.train.Saver(max_to_keep=opt.max_to_keep_checkpoints)

    # Automatic restore model, or force train from scratch
Example #51
0
 def _finish(self, update_ops, name_scope):
     with tf.control_dependencies(update_ops):
         return tf.assign_add(self._step_count, 1)
def train(train_imgs,train_labels,val_imgs,val_labels):
    has_train=True
    TB_LOG_DIR=os.path.join('..','model')
    ckpt = tf.train.get_checkpoint_state(TB_LOG_DIR)
    if not ckpt and not ckpt.model_checkpoint_path:
        has_train=False
    if has_train==False:
        #dataset param
        EPOCHS=150
        SHUFFLE_SZ=1000
        BATCH_SZ=200
        #model param
        OUTPUT_CNS=[24,48,96,192,1024]
        CLASS_NUM=200
        WEIGHT_DECAY=4e-5
        #training param
        WARM_UP_LR=0.002
        LEARNING_RATE=0.5
        LEARNING_RATE_DECAY=0.95
        TOTAL_STEPS=EPOCHS*100000//BATCH_SZ
        LEARNING_RATE_STEPS=TOTAL_STEPS//100
        MOMENTUM=0.9
        #display
        DISPLAY_STEP=TOTAL_STEPS//100
        TB_LOG_DIR=os.path.join('..','model')
        #validation
        VAL_SZ=10000
    else:
        #dataset param
        EPOCHS=50
        SHUFFLE_SZ=1000
        BATCH_SZ=200
        #model param
        OUTPUT_CNS=[24,48,96,192,1024]
        CLASS_NUM=200
        WEIGHT_DECAY=4e-5
        #training param
        WARM_UP_LR=0.0005  
        LEARNING_RATE=0.0005
        LEARNING_RATE_DECAY=0.9
        TOTAL_STEPS=EPOCHS*100000//BATCH_SZ
        LEARNING_RATE_STEPS=TOTAL_STEPS//100
        MOMENTUM=0.9
        #display
        DISPLAY_STEP=TOTAL_STEPS//100
        TB_LOG_DIR=os.path.join('..','model')
        #validation
        VAL_SZ=10000
        
    
    imgpaths=tf.convert_to_tensor(train_imgs)
    labels=tf.convert_to_tensor(train_labels)
    valimgpaths=tf.convert_to_tensor(val_imgs)
    vallabels=tf.convert_to_tensor(val_labels)
    
    #sess=tf.Session()
    def _parse_function(imgpath,label):
        img=tf.read_file(imgpath)
        img_decoded=tf.image.decode_jpeg(img,3)
        img_decoded.set_shape([64,64,3]) 
        img_decoded=tf.cast(img_decoded,dtype=tf.float32)
        return img_decoded,label    
    dataset=tf.data.Dataset.from_tensor_slices((imgpaths,labels)).map(_parse_function)
    dataset=dataset.shuffle(buffer_size=SHUFFLE_SZ)
    dataset=dataset.repeat(EPOCHS)
    dataset=dataset.batch(BATCH_SZ)
    iterator=dataset.make_initializable_iterator()
    batch_imgs,batch_labels=iterator.get_next()
    
    valset=tf.data.Dataset.from_tensor_slices((valimgpaths,vallabels)).map(_parse_function)
    valset=valset.batch(VAL_SZ)
    valiterator=dataset.make_initializable_iterator()
    valbatch_imgs,valbatch_labels=valiterator.get_next()
    #dimgs,dlabels=sess.run([batch_imgs,batch_labels])
       
    initial=tf.variance_scaling_initializer()
    regular=tf.contrib.layers.l2_regularizer(1.0)
    logits=model(batch_imgs,OUTPUT_CNS,CLASS_NUM,True,regular,initial)
    with tf.name_scope('loss'):
        loss=tf.reduce_mean(
                tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,labels=batch_labels))
        reg=tf.losses.get_regularization_loss()
        loss+=WEIGHT_DECAY*reg
    with tf.name_scope('train'):
        global_step=tf.get_variable('step',shape=[],trainable=False,
                                initializer=tf.zeros_initializer(dtype=tf.int64))
        def get_lr(global_step,total_step,base_lr,warm_up_lr):
            warm_up_total_step=total_step//20
            transition_total_step=warm_up_total_step
            remain_total_step=total_step-warm_up_total_step-transition_total_step
            transition_dlrt=tf.convert_to_tensor((1.0*base_lr-warm_up_lr)/transition_total_step,dtype=tf.float32)
            base_lrt=tf.convert_to_tensor(base_lr,dtype=tf.float32)
            warm_up_lrt=tf.convert_to_tensor(warm_up_lr,dtype=tf.float32)
            warm_up_total_step=tf.convert_to_tensor(warm_up_total_step,dtype=tf.float32)
            transition_total_step=tf.convert_to_tensor(transition_total_step,dtype=tf.float32)
            remain_total_step=tf.convert_to_tensor(remain_total_step,dtype=tf.float32)
            transition_lr=(tf.cast(global_step,tf.float32)-warm_up_total_step)*transition_dlrt+warm_up_lrt
            remain_lr=tf.train.exponential_decay(base_lrt,tf.cast(global_step,tf.float32)-warm_up_total_step-transition_total_step,
                                                  remain_total_step//120 ,LEARNING_RATE_DECAY)
            lr=tf.case({tf.less(global_step,warm_up_total_step): lambda:warm_up_lrt,
                        tf.greater(global_step,transition_total_step+warm_up_total_step): lambda:remain_lr},
                        default=lambda:transition_lr,exclusive=True)
            return lr
        if has_train==False:
            learning_rate=get_lr(global_step,TOTAL_STEPS,LEARNING_RATE,WARM_UP_LR)
        else:
            learning_rate=tf.train.exponential_decay(LEARNING_RATE,global_step,LEARNING_RATE_STEPS,LEARNING_RATE_DECAY) 
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) 
        with tf.control_dependencies(update_ops): 
            train_op=tf.train.MomentumOptimizer(learning_rate=learning_rate,
                                                momentum=MOMENTUM).minimize(loss)
            with tf.control_dependencies([train_op]):
                global_step_update=tf.assign_add(global_step,1)
                
    if has_train==False:                                      
        init=tf.global_variables_initializer()

    with tf.name_scope('batch_train_accuracy'):
        logits_train=model(batch_imgs,OUTPUT_CNS,CLASS_NUM,False,regular,initial)
        correct_pred_train=tf.equal(tf.cast(tf.argmax(logits_train,1),dtype=tf.int32),batch_labels)
        accuracy_train=tf.reduce_mean(tf.cast(correct_pred_train,tf.float32))
    
    with tf.name_scope('val_accuracy'):
        logits_val=model(valbatch_imgs,OUTPUT_CNS,CLASS_NUM,False,regular,initial)
        correct_pred_val=tf.equal(tf.cast(tf.argmax(logits_val,1),dtype=tf.int32),valbatch_labels)
        accuracy_val=tf.reduce_mean(tf.cast(correct_pred_val,tf.float32))
        
    sess=tf.Session()
    if has_train==False:
        sess.run(init)
    else:
        saver=tf.train.Saver()
        saver.restore(sess,ckpt.model_checkpoint_path)
    sess.run(iterator.initializer)
    
    tf.summary.scalar('loss',loss)
    tf.summary.scalar('batch_train_accuracy',accuracy_train)
    tf.summary.scalar('val_accuracy',accuracy_val)
    tf.summary.scalar('learning_rate',learning_rate)
    tb_merge_summary_op=tf.summary.merge_all()
    summary_writer=tf.summary.FileWriter(os.path.join(TB_LOG_DIR,'tensorboard'),graph=sess.graph)
    
    saver=tf.train.Saver()
    
    sess.run(tf.assign(global_step,0.0))
    for step in range(1,TOTAL_STEPS+1):
        try:
            #_,print_step=sess.run(train_op)
            sess.run(global_step_update)
        except tf.errors.OutOfRangeError:
            break
        if step%DISPLAY_STEP==0 or step==1:
            sess.run(valiterator.initializer)
            l,acct,accv,lr,summary_str=sess.run([loss,accuracy_train,accuracy_val,learning_rate,tb_merge_summary_op])
            summary_writer.add_summary(summary_str,step)
            print("epoch {:d} steps {:d}: loss={:.4f}, accuracy_batch_train={:.4f}, accuracy_val={:.4f}, learning_rate={:.5f}".format(
                    step//(TOTAL_STEPS//EPOCHS),step,l,acct,accv,lr))
    
    summary_writer.close()
    saver.save(sess,os.path.join(TB_LOG_DIR,'model_1.ckpt'))
Example #53
0
def _assign_moving_average(orig_val, new_val, momentum, name):
    with tf.name_scope(name):
        scaled_diff = (1 - momentum) * (new_val - orig_val)
        return tf.assign_add(orig_val, scaled_diff)
Example #54
0
    def __init__(self, debug=False, **kwargs):
        super(Learner, self).__init__(**kwargs)

        with self.graph.as_default(), tf.device(self.device):
            # initialize predictive model, if either:
            #  * you want to use the predictive model to "undo delay"
            #  * you want a predictive model to help you explore
            # note: self.predict is perhaps a misnomer.
            if self.predict or (self.train_model or self.explore_scale):
                self._init_model(**kwargs)

            if self.train_policy:
                self._init_policy(**kwargs)

            # build computation graph

            # to train the the policy, you have to train the critic. (self.train_policy and
            # self.train_critic might both be false, if we're only training the predictive
            # model)
            if self.train_policy or self.train_critic:
                print("Creating critic.")
                self.critic = Critic(self.core.output_size, **kwargs)

            # experience = trajectory. usually a list of SimpleStateAction's.
            self.experience = ct.inputCType(
                ssbm.SimpleStateAction, [None, self.config.experience_length],
                "experience")
            # instantaneous rewards for all but the last state
            self.experience['reward'] = tf.placeholder(
                tf.float32, [None, self.config.experience_length - 1],
                name='experience/reward')
            # manipulating time along the first axis is much more efficient
            experience = util.deepMap(tf.transpose, self.experience)
            # initial state for recurrent networks
            self.experience['initial'] = tuple(
                tf.placeholder(tf.float32, [None, size],
                               name='experience/initial/%d' % i)
                for i, size in enumerate(self.core.hidden_size))
            experience['initial'] = self.experience['initial']

            states = self.embedGame(experience['state'])
            prev_actions = self.embedAction(experience['prev_action'])
            combined = tf.concat(axis=2, values=[states, prev_actions])
            actions = self.embedAction(experience['action'])

            memory = self.config.memory
            delay = self.config.delay
            length = self.config.experience_length - memory
            history = [combined[i:i + length] for i in range(memory + 1)]
            inputs = tf.concat(axis=-1, values=history)
            if self.core.recurrent:

                def f(prev, current_input):
                    _, prev_state = prev
                    return self.core(current_input, prev_state)

                batch_size = tf.shape(self.experience['reward'])[0]
                dummy_output = tf.zeros(
                    tf.stack([batch_size,
                              tf.constant(self.core.output_size)]))
                scan_fn = tf.scan if self.dynamic else tfl.scan
                core_outputs, hidden_states = scan_fn(
                    f, inputs, (dummy_output, experience['initial']))
            else:
                core_outputs, hidden_states = self.core(
                    inputs, experience['initial'])

            actions = actions[memory:]
            rewards = experience['reward'][memory:]

            print("Creating train ops")

            train_ops = []
            losses = []
            loss_vars = []

            if self.train_model or self.predict:
                model_loss, predicted_core_outputs = self.model.train(
                    history, core_outputs, hidden_states, actions,
                    experience['state'])
            if self.train_model:
                #train_ops.append(train_model)
                losses.append(model_loss)
                loss_vars.extend(self.model.getVariables())

            if self.train_policy:
                if self.predict:
                    predict_steps = self.model.predict_steps
                    actor_inputs = predicted_core_outputs
                else:
                    predict_steps = 0
                    actor_inputs = core_outputs

                delay_length = length - delay
                actor_inputs = actor_inputs[:delay_length]

                # delayed_actions is a D+1-P length list of shape [T-M-D, B] tensors
                # The valid state indices are [M+P, T+P-D)
                # Element i corresponds to the i'th queued up action: 0 is the action about to be taken, D-P was the action chosen on this frame.
                delayed_actions = []
                for i in range(predict_steps, delay + 1):
                    delayed_actions.append(actions[i:i + delay_length])
                train_probs, train_log_probs, entropy = self.policy.train_probs(
                    actor_inputs, delayed_actions)

                behavior_probs = experience['prob'][
                    memory +
                    delay:]  # these are the actions we can compute probabilities for
                prob_ratios = tf.minimum(train_probs / behavior_probs, 1.)
                self.kls = -tf.reduce_mean(tf.log(prob_ratios), 0)
                self.kls = tf.check_numerics(self.kls, 'kl')
                kl = tf.reduce_mean(self.kls)
                tf.summary.scalar('kl', kl)
            else:
                prob_ratios = tf.ones_like()  # todo

            if self.explore_scale:
                if self.evolve_explore_scale:
                    self.explore_scale = tf.Variable(self.explore_scale,
                                                     trainable=False,
                                                     name='explore_scale')
                    self.evo_variables.append(
                        ('explore_scale', self.explore_scale, relative(1.5)))

                distances, _ = self.model.distances(history,
                                                    core_outputs,
                                                    hidden_states,
                                                    actions,
                                                    experience['state'],
                                                    predict_steps=1)
                distances = tf.add_n(list(util.deepValues(
                    distances)))  # sum over different state components
                explore_rewards = self.explore_scale * distances[0]
                explore_rewards = tf.stop_gradient(explore_rewards)
                tfl.stats(explore_rewards, 'explore_rewards')
                rewards += explore_rewards

            # build the critic (which you'll also need to train the policy)
            if self.train_policy or self.train_critic:
                shifted_core_outputs = core_outputs[:
                                                    delay_length] if self.unshift_critic else core_outputs[
                                                        delay:]
                critic_loss, targets, advantages = self.critic(
                    shifted_core_outputs, rewards[delay:], prob_ratios[:-1])

            if self.train_critic:
                losses.append(critic_loss)
                loss_vars.extend(self.critic.variables)

            if self.train_policy:
                policy_loss = self.policy.train(train_log_probs[:-1],
                                                advantages, entropy[:-1])
                losses.append(policy_loss)
                loss_vars.extend(self.policy.getVariables())

            if self.evolve_learning_rate:
                self.learning_rate = tf.Variable(self.learning_rate,
                                                 trainable=False,
                                                 name='learning_rate')
                self.evo_variables.append(
                    ('learning_rate', self.learning_rate, relative(1.5)))

            total_loss = tf.add_n(losses)
            with tf.variable_scope('train'):
                optimizer = tf.train.AdamOptimizer(self.learning_rate)
                gvs = optimizer.compute_gradients(total_loss)
                # gvs = [(tf.check_numerics(g, v.name), v) for g, v in gvs]
                gs, vs = zip(*gvs)

                norms = tf.stack([tf.norm(g) for g in gs])
                max_norm = tf.reduce_max(norms)
                tf.summary.scalar('max_grad_norm', max_norm)
                capped_gs = [
                    tf.clip_by_norm(g, self.clip_max_grad) for g in gs
                ]
                train_op = optimizer.apply_gradients(zip(capped_gs, vs))
                train_ops.append(train_op)

            print("Created train op(s)")

            avg_reward, _ = tfl.stats(experience['reward'], 'reward')

            misc_ops = []

            if not self.dynamic:
                misc_ops.append(tf.add_check_numerics_ops())

            if self.pop_id >= 0:
                self.reward = tf.Variable(0.,
                                          trainable=False,
                                          name='avg_reward')
                tf.summary.scalar('avg_reward', self.reward)
                new_reward = (1. - self.reward_decay
                              ) * self.reward + self.reward_decay * avg_reward
                misc_ops.append(tf.assign(self.reward, new_reward))

            self.mutators = []
            for name, evo_variable, mutator in self.evo_variables:
                tf.summary.scalar(name, evo_variable, family='evolution')
                self.mutators.append(
                    tf.assign(evo_variable, mutator(evo_variable)))

            self.summarize = tf.summary.merge_all()
            misc_ops.append(tf.assign_add(self.global_step, 1))
            self.misc = tf.group(*misc_ops)
            self.train_ops = tf.group(*train_ops)

            print("Creating summary writer at logs/%s." % self.name)
            #self.writer = tf.summary.FileWriter('logs/' + self.name)#, self.graph)
            self.writer = tf.summary.FileWriter(self.path)

            self._finalize_setup()
import tensorflow as tf

q = tf.FIFOQueue(1000, "float")
counter = tf.Variable(0.0)

increment_op = tf.assign_add(counter, tf.constant(1.0))

enqueue_op = q.enqueue([counter])

qr = tf.train.QueueRunner(q, enqueue_ops=[increment_op, enqueue_op] * 1)

sess = tf.Session()

sess.run(tf.global_variables_initializer())
#coordinator协调器,可以看作一种信号量,用来做同步
coord = tf.train.Coordinator()

enqueue_threads = qr.create_threads(sess, coord=coord, start=True)

coord.request_stop()  #通知其他线程关闭

#主线程
for i in range(0, 10):
    try:
        print(sess.run(q.dequeue()))
    except tf.errors.OutOfRangeError:
        break

coord.join(enqueue_threads)  #直到其他线程结束才退出
Example #56
0
    def __init__(
            self,
            training_pipeline: InputPipeline,
            cv_pipeline: InputPipeline,
            network_factory: NetworkFactory,
            objective_factory: ObjectiveFactory,
            training_options: TrainingOptions,
            learning_rate: float,
            beta1: float = 0.9,
            beta2: float = 0.999):
        """
        Create a new network trainer
        :param training_pipeline: Input pipeline used for training
        :param cv_pipeline: Input pipeline used for cross-validation
        :param network_factory: Factory to create training and evaluation networks
        :param objective_factory: Factory to create generator and discriminator losses
        :param training_options: Options controlling the training process
        :param learning_rate: Learning rate to use in the Adam optimizer
        :param beta1: Beta1 to use in the Adam optimizer
        :param beta2: Beta2 to use in the Adam optimizer
        """

        self._training_options = training_options
        self._restored_iteration = None

        # Create input pipelines
        with use_cpu():
            self._training_pipeline = training_pipeline
            self._train_x, self._train_y, _ = training_pipeline.create_pipeline()
            self._cv_pipeline = cv_pipeline
            self._cv_x, self._cv_y, _ = self._cv_pipeline.create_pipeline()

        # Create training graph
        with tf.name_scope("training"):

            # Create networks
            self._generator = network_factory.create_generator(self._train_x, use_gpu=self._training_options.use_gpu,
                                                               data_format=self._training_options.data_format)
            self._discriminator_generated = network_factory.create_discriminator(
                self._train_x, self._generator, use_gpu=self._training_options.use_gpu,
                data_format=self._training_options.data_format)
            self._discriminator_real = network_factory.create_discriminator(
                self._train_x, self._train_y, reuse=True, use_gpu=self._training_options.use_gpu,
                data_format=self._training_options.data_format)

            # Create losses
            self._generator_loss, generator_summary = objective_factory.create_generator_loss(
                self._train_x, self._train_y,
                self._generator, self._discriminator_generated, use_gpu=self._training_options.use_gpu,
                data_format=self._training_options.data_format)
            self._discriminator_loss, discriminator_summary = objective_factory.create_discriminator_loss(
                self._train_x, self._train_y,
                self._generator, self._discriminator_generated, self._discriminator_real,
                use_gpu=self._training_options.use_gpu,
                data_format=self._training_options.data_format)

            with tf.device(select_device(self._training_options.use_gpu)):
                # Create optimizers
                trainable_variables = tf.trainable_variables()
                variables_discriminator = [var for var in trainable_variables if var.name.startswith("discriminator")]
                variables_generator = [var for var in trainable_variables if var.name.startswith("generator")]

                self._optimizer_generator = tf.train.AdamOptimizer(learning_rate, beta1, beta2, name="adam_generator")
                self._optimizer_discriminator = tf.train.AdamOptimizer(learning_rate, beta1, beta2, name="adam_discriminator")

                self._op_generator = self._optimizer_generator.minimize(self._generator_loss, var_list=variables_generator)
                self._op_discriminator = self._optimizer_discriminator.minimize(self._discriminator_loss, var_list=variables_discriminator)

            with use_cpu():
                # Iteration counter
                self._global_step = tf.Variable(0, trainable=False, name="global_step", dtype=tf.int64)
                self._step_op = tf.assign_add(self._global_step, 1)

            # Create summary operation
            accuracy, precision, recall, f1_score, specificity, jaccard_similarity = _create_summaries(self._generator, self._train_y)
            summary_operations = [
                tf.summary.scalar("accuracy", accuracy),
                tf.summary.scalar("precision", precision),
                tf.summary.scalar("recall", recall),
                tf.summary.scalar("f1_score", f1_score),
                tf.summary.scalar("specificity", specificity),
                tf.summary.scalar("jaccard_similarity", jaccard_similarity)
            ]

            self._train_saver = tf.train.Saver(keep_checkpoint_every_n_hours=1)

            # Merge summaries
            self._train_summary = tf.summary.merge(summary_operations + generator_summary + discriminator_summary)
            self._train_summary_writer = tf.summary.FileWriter(
                os.path.join(self._training_options.summary_directory, "training"), graph=tf.get_default_graph())

        # Create CV graph
        with tf.name_scope("cv"):
            # Create networks
            generator = network_factory.create_generator(
                self._cv_x, reuse=True, use_gpu=self._training_options.use_gpu,
                data_format=self._training_options.data_format)
            discriminator_generated = network_factory.create_discriminator(
                self._cv_x, generator, reuse=True, use_gpu=self._training_options.use_gpu,
                data_format=self._training_options.data_format)
            discriminator_real = network_factory.create_discriminator(
                self._cv_x, self._cv_y, reuse=True, use_gpu=self._training_options.use_gpu,
                data_format=self._training_options.data_format)

            # Create losses
            _, generator_summary = objective_factory.create_generator_loss(
                self._cv_x, self._cv_y, generator, discriminator_generated, use_gpu=self._training_options.use_gpu,
                data_format=self._training_options.data_format)
            _, discriminator_summary = objective_factory.create_discriminator_loss(
                self._cv_x, self._cv_y, generator, discriminator_generated, discriminator_real,
                use_gpu=self._training_options.use_gpu,
                data_format=self._training_options.data_format)

            # Create other summary options
            accuracy, precision, recall, f1_score, specificity, jaccard_similarity = _create_summaries(generator, self._cv_y)

            # Create summary operation
            summary_operations = [
                tf.summary.scalar("accuracy", accuracy),
                tf.summary.scalar("precision", precision),
                tf.summary.scalar("recall", recall),
                tf.summary.scalar("f1_score", f1_score),
                tf.summary.scalar("specificity", specificity),
                tf.summary.scalar("jaccard_similarity", jaccard_similarity)
            ]

            with use_cpu():
                # Concatenated images
                self._concatenated_images_op = _create_concatenated_images(
                    self._cv_x,
                    self._cv_y,
                    generator,
                    self._cv_pipeline.color_converter,
                    self._training_options.data_format
                )

            # Merge summaries
            self._cv_summary = tf.summary.merge(summary_operations + generator_summary + discriminator_summary)
            self._cv_summary_writer = tf.summary.FileWriter(
                os.path.join(self._training_options.summary_directory, "cv"))
Example #57
0
  def testAccumulator(self):
    # testAccumulator compares
    #   - explicit averaging of independently computed var_grads1 and
    #     var_grads2,
    #   - Accumulator(SGD) optimizer effectively doing this over 2 steps.
    np.random.seed(12345)
    np_input1 = np.random.normal(0.1, 0.5, [2, 4, 3])
    np.random.seed(12346)
    np_input2 = np.random.normal(0.1, 0.5, [2, 4, 3])

    with self.session(use_gpu=True, graph=tf.Graph()) as sess:
      tf.set_random_seed(123456)
      params = layers.ProjectionLayer.Params()
      params.name = 'proj'
      params.dtype = tf.float64
      params.input_dim = 3
      params.output_dim = 2
      params.params_init = py_utils.WeightInit.Gaussian(0.01, 123456)
      params.is_eval = False
      params.batch_norm = False
      proj_layer = layers.ProjectionLayer(params)
      inputs1 = tf.placeholder(shape=[2, 4, 3], dtype=tf.float64)
      in_padding1 = tf.zeros([2, 4, 1], dtype=tf.float64)
      inputs2 = tf.placeholder(shape=[2, 4, 3], dtype=tf.float64)
      in_padding2 = tf.zeros([2, 4, 1], dtype=tf.float64)
      output1 = proj_layer.FPropDefaultTheta(inputs1, in_padding1)
      output2 = proj_layer.FPropDefaultTheta(inputs2, in_padding2)
      loss1 = tf.reduce_sum(output1)
      loss2 = tf.reduce_sum(output2)
      var_grads1 = py_utils.ComputeGradients(loss1, proj_layer.vars)
      var_grads2 = py_utils.ComputeGradients(loss2, proj_layer.vars)
      op = optimizer.SGD.Params()
      opt = op.Instantiate()
      lr = 1e-1
      with tf.control_dependencies([loss1, loss2]):
        var_update_op1 = opt.Apply(
            lr, py_utils.ApplyGradMultiplier(var_grads1, 1. / 2.))
        with tf.control_dependencies([var_update_op1]):
          var_update_op2 = opt.Apply(
              lr, py_utils.ApplyGradMultiplier(var_grads2, 1. / 2.))

      sess.run(tf.global_variables_initializer())
      vars1 = sess.run(proj_layer.vars.Flatten())
      loss1_1, grads1_1, loss1_2, grads1_2 = sess.run(
          [loss1, var_grads1, loss2, var_grads2],
          feed_dict={
              inputs1: np_input1,
              inputs2: np_input2,
          })
      sess.run(
          [var_update_op2], feed_dict={
              inputs1: np_input1,
              inputs2: np_input2,
          })
      vars1_1 = sess.run(proj_layer.vars.Flatten())

    with self.session(use_gpu=True, graph=tf.Graph()) as sess:
      tf.set_random_seed(123456)
      params = layers.ProjectionLayer.Params()
      params.name = 'proj'
      params.dtype = tf.float64
      params.input_dim = 3
      params.output_dim = 2
      params.params_init = py_utils.WeightInit.Gaussian(0.01, 123456)
      params.is_eval = False
      params.batch_norm = False
      proj_layer = layers.ProjectionLayer(params)
      in_padding1 = tf.zeros([2, 4, 1], dtype=tf.float64)
      inputs1 = tf.placeholder(shape=[2, 4, 3], dtype=tf.float64)
      output1 = proj_layer.FPropDefaultTheta(inputs1, in_padding1)
      loss = tf.reduce_sum(output1)
      var_grads = py_utils.ComputeGradients(loss, proj_layer.vars)
      op = optimizer.Accumulator.Params().Set(
          accum_steps=2, dtype=tf.float64, optimizer_tpl=optimizer.SGD.Params())
      opt = op.Instantiate()
      lr = 1e-1
      var_update_op = opt.Apply(lr, var_grads)
      increment_global_step_op = tf.assign_add(
          py_utils.GetOrCreateGlobalStepVar(), 1)

      sess.run(tf.global_variables_initializer())
      vars2 = sess.run(proj_layer.vars.Flatten())
      loss2_1, grads2_1 = sess.run(
          [loss, var_grads], feed_dict={
              inputs1: np_input1,
          })
      loss2_2, grads2_2 = sess.run(
          [loss, var_grads], feed_dict={
              inputs1: np_input2,
          })
      acc_0 = sess.run(
          [v for v in tf.global_variables() if 'grad_accumulator' in v.name])[0]
      sess.run(
          [var_update_op], feed_dict={
              inputs1: np_input1,
          })
      acc_1 = sess.run(
          [v for v in tf.global_variables() if 'grad_accumulator' in v.name])[0]
      vars2_intermediate = sess.run(proj_layer.vars.Flatten())
      sess.run(increment_global_step_op)
      sess.run(
          [var_update_op], feed_dict={
              inputs1: np_input2,
          })
      acc_2 = sess.run(
          [v for v in tf.global_variables() if 'grad_accumulator' in v.name])[0]
      vars2_1 = sess.run(proj_layer.vars.Flatten())

    self.assertAllClose(vars1, vars2)

    self.assertAllClose(acc_0, np.zeros_like(acc_0))
    self.assertAllClose(acc_1, grads2_1['w'][1])
    self.assertAllClose(acc_2, np.zeros_like(acc_0))

    self.assertAllClose(loss1_1, loss2_1)
    self.assertAllClose(loss1_2, loss2_2)
    self.assertAllClose(grads1_1, grads2_1)
    self.assertAllClose(grads1_2, grads2_2)

    self.assertAllClose(vars1, vars2_intermediate)

    self.assertAllClose(vars2[0], grads2_1['w'][0])
    self.assertAllClose(vars2[0], grads2_2['w'][0])

    self.assertAllClose(
        vars1[0] - 0.5 * lr * (grads1_1['w'][1] + grads1_2['w'][1]), vars1_1[0])

    self.assertAllClose(
        vars2[0] - 0.5 * lr * (grads2_1['w'][1] + grads2_2['w'][1]), vars2_1[0])

    self.assertAllClose(vars2, vars2_intermediate)
    self.assertAllClose(vars1_1, vars2_1)
Example #58
0
def train(model=None):
    assert FLAGS.train_dir, 'train_dir must be given'
    global_step = tf.Variable(0, trainable=False)
    add_global = tf.assign_add(global_step, 1)

    loss_op, train_op, acc_op = model.build_train_graph(global_step)
    dev_acc_op, dev_num_op, dev_init_op = model.build_dev_graph()
    test_acc_op, test_num_op, test_init_op = model.build_test_graph()

    train_ckpt_dir = FLAGS.train_dir + '/train_ckpt'
    os.makedirs(train_ckpt_dir, exist_ok=True)
    sum_writer = tf.summary.FileWriter(str(train_ckpt_dir),
                                       graph=tf.get_default_graph())
    best_dev_acc = 0.0
    final_acc = 0.0
    saver = tf.train.Saver(max_to_keep=1)
    init = tf.global_variables_initializer()
    with tf.Session(config=utils.get_config()) as sess:
        tf.set_random_seed(FLAGS.random_seed)
        np.random.seed(FLAGS.random_seed)
        sess.run(init)
        for _ in itertools.count(1):
            this_global_step = sess.run(add_global)
            if this_global_step >= FLAGS.max_steps + 1:
                break
            _, loss, acc = sess.run([train_op, loss_op, acc_op])
            if this_global_step != 0 and this_global_step % FLAGS.test_steps == 0:
                number = 0
                accuracy = 0.0
                while True:
                    try:
                        acc, num = sess.run([dev_acc_op, dev_num_op])
                        number += num
                        accuracy += acc * num
                    except tf.errors.OutOfRangeError:
                        break
                accuracy /= number
                print('At step %d. dev num=%d acc=%f.' %
                      (this_global_step, number, accuracy))
                if accuracy > best_dev_acc:
                    best_dev_acc = accuracy
                    print("best acc=%f At step %d." %
                          (best_dev_acc, this_global_step))
                    test_accuracy = 0.
                    test_number = 0
                    while True:
                        try:
                            test_acc, test_num = sess.run(
                                [test_acc_op, test_num_op])
                            test_number += test_num
                            test_accuracy += test_acc * test_num
                        except tf.errors.OutOfRangeError:
                            break
                    test_accuracy /= test_number
                    print('test num=%d acc=%f.' % (test_number, test_accuracy))
                    final_acc = test_accuracy
                    sess.run(test_init_op)
                    save_checkpoint(saver, sess, FLAGS.train_dir,
                                    this_global_step)
                summary = tf.Summary()
                summary.value.add(tag='test_acc', simple_value=accuracy)
                summary.value.add(tag='best_dev_acc',
                                  simple_value=best_dev_acc)
                sum_writer.add_summary(summary, this_global_step)
                sess.run(dev_init_op)
    sum_writer.close()
    print('Accuracy of test set is %f .' % final_acc)
Example #59
0
    def pathint_stabilization(self, adam_optimizer):
        """ Synaptic stabilization via the Zenke method """

        # Set up method
        optimizer_task = tf.train.GradientDescentOptimizer(learning_rate=1.0)
        small_omega_var = {}
        small_omega_var_div = {}

        reset_small_omega_ops = []
        update_small_omega_ops = []
        update_big_omega_ops = []

        # If using reinforcement learning, update rewards
        if par['training_method'] == 'RL':
            self.previous_reward = tf.Variable(-tf.ones([]), trainable=False)
            self.current_reward = tf.Variable(-tf.ones([]), trainable=False)

            reward_stacked = tf.stack(self.reward, axis=0)
            current_reward = tf.reduce_mean(
                tf.reduce_sum(reward_stacked, axis=0))
            self.update_current_reward = tf.assign(self.current_reward,
                                                   current_reward)
            self.update_previous_reward = tf.assign(self.previous_reward,
                                                    self.current_reward)

        # Iterate over variables in the model
        for var in tf.trainable_variables():

            # Reset the small omega vars
            small_omega_var[var.op.name] = tf.Variable(tf.zeros(
                var.get_shape()),
                                                       trainable=False)
            small_omega_var_div[var.op.name] = tf.Variable(tf.zeros(
                var.get_shape()),
                                                           trainable=False)
            reset_small_omega_ops.append(
                tf.assign(small_omega_var[var.op.name],
                          small_omega_var[var.op.name] * 0.0))
            reset_small_omega_ops.append(
                tf.assign(small_omega_var_div[var.op.name],
                          small_omega_var_div[var.op.name] * 0.0))

            # Update the big omega vars based on the training method
            if par['training_method'] == 'RL':
                update_big_omega_ops.append(tf.assign_add( self.big_omega_var[var.op.name], tf.div(tf.abs(small_omega_var[var.op.name]), \
                    (par['omega_xi'] + small_omega_var_div[var.op.name]))))
            elif par['training_method'] == 'SL':
                update_big_omega_ops.append(tf.assign_add( self.big_omega_var[var.op.name], tf.div(tf.nn.relu(small_omega_var[var.op.name]), \
                    (par['omega_xi'] + small_omega_var_div[var.op.name]**2))))

        # After each task is complete, call update_big_omega and reset_small_omega
        self.update_big_omega = tf.group(*update_big_omega_ops)

        # Reset_small_omega also makes a backup of the final weights, used as hook in the auxiliary loss
        self.reset_small_omega = tf.group(*reset_small_omega_ops)

        # This is called every batch
        self.delta_grads = adam_optimizer.return_delta_grads()
        self.gradients = optimizer_task.compute_gradients(self.pol_loss)

        # Update the samll omegas using the gradients
        for (grad, var) in self.gradients:
            if par['training_method'] == 'RL':
                delta_reward = self.current_reward - self.previous_reward
                update_small_omega_ops.append(
                    tf.assign_add(small_omega_var[var.op.name],
                                  self.delta_grads[var.op.name] *
                                  delta_reward))
                update_small_omega_ops.append(
                    tf.assign_add(
                        small_omega_var_div[var.op.name],
                        tf.abs(self.delta_grads[var.op.name] * delta_reward)))
            elif par['training_method'] == 'SL':
                update_small_omega_ops.append(
                    tf.assign_add(small_omega_var[var.op.name],
                                  -self.delta_grads[var.op.name] * grad))
                update_small_omega_ops.append(
                    tf.assign_add(small_omega_var_div[var.op.name],
                                  self.delta_grads[var.op.name]))

        # Make update group
        self.update_small_omega = tf.group(
            *update_small_omega_ops)  # 1) update small_omega after each train!
Example #60
0
 def accu_global_ms(self):
     accu = list()
     for grad, ms in zip(self._grads, self._global_ms):
         accu.append(tf.assign_add(ms, tf.square(grad)))
     inc_op = tf.assign_add(self._batches_num, tf.ones([]))
     return [accu, inc_op]