Beispiel #1
0
    def testTrainWithLocalVariable(self):
        with ops.Graph().as_default():
            random_seed.set_random_seed(0)
            tf_inputs = constant_op.constant(self._inputs,
                                             dtype=dtypes.float32)
            tf_labels = constant_op.constant(self._labels,
                                             dtype=dtypes.float32)

            local_multiplier = variables_lib.local_variable(1.0)

            tf_predictions = logistic_classifier(tf_inputs) * local_multiplier
            losses.log_loss(tf_labels, tf_predictions)
            total_loss = losses.get_total_loss()
            optimizer = gradient_descent.GradientDescentOptimizer(
                learning_rate=1.0)
            train_op = training.create_train_op(total_loss, optimizer)

            loss = training.train(
                train_op,
                None,
                hooks=[basic_session_run_hooks.StopAtStepHook(num_steps=300)],
                save_summaries_steps=None,
                save_checkpoint_secs=None)
            self.assertIsNotNone(loss)
            self.assertLess(loss, .015)
Beispiel #2
0
  def testResumeTrainAchievesRoughlyTheSameLoss(self):
    number_of_steps = [300, 1, 5]
    logdir = os.path.join(self.get_temp_dir(), 'resume_train_same_loss')

    for i in range(len(number_of_steps)):
      with ops.Graph().as_default():
        random_seed.set_random_seed(i)
        tf_inputs = constant_op.constant(self._inputs, dtype=dtypes.float32)
        tf_labels = constant_op.constant(self._labels, dtype=dtypes.float32)

        tf_predictions = logistic_classifier(tf_inputs)
        loss_ops.log_loss(tf_predictions, tf_labels)
        total_loss = loss_ops.get_total_loss()

        optimizer = gradient_descent.GradientDescentOptimizer(learning_rate=1.0)

        train_op = training.create_train_op(total_loss, optimizer)

        saver = saver_lib.Saver()

        loss = training.train(
            train_op,
            logdir,
            hooks=[
                basic_session_run_hooks.StopAtStepHook(
                    num_steps=number_of_steps[i]),
                basic_session_run_hooks.CheckpointSaverHook(
                    logdir, save_steps=50, saver=saver),
            ])
        self.assertIsNotNone(loss)
        self.assertLess(loss, .015)
Beispiel #3
0
    def _train_model(self, checkpoint_dir, num_steps):
        """Trains a simple classification model.

    Note that the data has been configured such that after around 300 steps,
    the model has memorized the dataset (e.g. we can expect %100 accuracy).

    Args:
      checkpoint_dir: The directory where the checkpoint is written to.
      num_steps: The number of steps to train for.
    """
        with ops.Graph().as_default():
            random_seed.set_random_seed(0)
            tf_inputs = constant_op.constant(self._inputs,
                                             dtype=dtypes.float32)
            tf_labels = constant_op.constant(self._labels,
                                             dtype=dtypes.float32)

            tf_predictions = logistic_classifier(tf_inputs)
            loss = loss_ops.log_loss(tf_predictions, tf_labels)

            optimizer = gradient_descent.GradientDescentOptimizer(
                learning_rate=1.0)
            train_op = training.create_train_op(loss, optimizer)

            loss = training.train(
                train_op,
                checkpoint_dir,
                hooks=[basic_session_run_hooks.StopAtStepHook(num_steps)])
    def testCanAchieveZeroLoss(self):
        logdir = os.path.join(self.get_temp_dir(), 'can_achieve_zero_loss')

        with ops.Graph().as_default():
            random_seed.set_random_seed(0)
            tf_inputs = constant_op.constant(self._inputs,
                                             dtype=dtypes.float32)
            tf_labels = constant_op.constant(self._labels,
                                             dtype=dtypes.float32)

            tf_predictions = logistic_classifier(tf_inputs)
            loss_ops.log_loss(tf_predictions, tf_labels)
            total_loss = loss_ops.get_total_loss()

            optimizer = gradient_descent.GradientDescentOptimizer(
                learning_rate=1.0)

            train_op = training.create_train_op(total_loss, optimizer)

            loss = training.train(
                train_op,
                logdir,
                hooks=[basic_session_run_hooks.StopAtStepHook(num_steps=300)])
            self.assertIsNotNone(loss)
            self.assertLess(loss, .015)
Beispiel #5
0
  def _train_model(self, checkpoint_dir, num_steps):
    """Trains a simple classification model.

    Note that the data has been configured such that after around 300 steps,
    the model has memorized the dataset (e.g. we can expect %100 accuracy).

    Args:
      checkpoint_dir: The directory where the checkpoint is written to.
      num_steps: The number of steps to train for.
    """
    with ops.Graph().as_default():
      random_seed.set_random_seed(0)
      tf_inputs = constant_op.constant(self._inputs, dtype=dtypes.float32)
      tf_labels = constant_op.constant(self._labels, dtype=dtypes.float32)

      tf_predictions = logistic_classifier(tf_inputs)
      loss = loss_ops.log_loss(tf_predictions, tf_labels)

      optimizer = gradient_descent.GradientDescentOptimizer(learning_rate=1.0)
      train_op = training.create_train_op(loss, optimizer)

      loss = training.train(
          train_op,
          checkpoint_dir,
          hooks=[basic_session_run_hooks.StopAtStepHook(num_steps)])
Beispiel #6
0
def gan_train(
    self,
    train_ops,
    logdir,
    get_hooks_fn=get_joint_train_hooks(),
    master='',
    is_chief=True,
    scaffold=None,
    hooks=None,
    chief_only_hooks=None,
    save_checkpoint_secs=600,
    save_summaries_steps=100,
    config=None
):
    new_hooks = get_hooks_fn(train_ops)
    if hooks is not None:
        hooks = list(hooks) + list(new_hooks)
    else:
        hooks = new_hooks
    return training.train(
            train_ops.global_step_inc_op,
            logdir,
            master=master,
            is_chief=is_chief,
            scaffold=scaffold,
            hooks=hooks,
            chief_only_hooks=chief_only_hooks,
            save_checkpoint_secs=save_checkpoint_secs,
            save_summaries_steps=save_summaries_steps,
            config=config)
Beispiel #7
0
  def testTrainWithAlteredGradients(self):
    # Use the same learning rate but different gradient multipliers
    # to train two models. Model with equivalently larger learning
    # rate (i.e., learning_rate * gradient_multiplier) has smaller
    # training loss.
    multipliers = [1., 1000.]
    number_of_steps = 10
    learning_rate = 0.001

    # First, train the model with equivalently smaller learning rate.
    with ops.Graph().as_default():
      random_seed.set_random_seed(0)
      train_op = self.create_train_op(
          learning_rate=learning_rate, gradient_multiplier=multipliers[0])

      loss0 = training.train(
          train_op,
          None,
          hooks=[
              basic_session_run_hooks.StopAtStepHook(num_steps=number_of_steps),
          ],
          save_checkpoint_secs=None,
          save_summaries_steps=None)
      self.assertIsNotNone(loss0)
      self.assertGreater(loss0, .5)

    # Second, train the model with equivalently larger learning rate.
    with ops.Graph().as_default():
      random_seed.set_random_seed(0)
      train_op = self.create_train_op(
          learning_rate=learning_rate, gradient_multiplier=multipliers[1])

      loss1 = training.train(
          train_op,
          None,
          hooks=[
              basic_session_run_hooks.StopAtStepHook(num_steps=number_of_steps),
          ],
          save_checkpoint_secs=None,
          save_summaries_steps=None)
      self.assertIsNotNone(loss1)
      self.assertLess(loss1, .5)

    # The loss of the model trained with larger learning rate should
    # be smaller.
    self.assertGreater(loss0, loss1)
Beispiel #8
0
    def testTrainWithAlteredGradients(self):
        # Use the same learning rate but different gradient multipliers
        # to train two models. Model with equivalently larger learning
        # rate (i.e., learning_rate * gradient_multiplier) has smaller
        # training loss.
        multipliers = [1., 1000.]
        number_of_steps = 10
        learning_rate = 0.001

        # First, train the model with equivalently smaller learning rate.
        with ops.Graph().as_default():
            random_seed.set_random_seed(0)
            train_op = self.create_train_op(learning_rate=learning_rate,
                                            gradient_multiplier=multipliers[0])

            loss0 = training.train(train_op,
                                   None,
                                   hooks=[
                                       basic_session_run_hooks.StopAtStepHook(
                                           num_steps=number_of_steps),
                                   ],
                                   save_checkpoint_secs=None,
                                   save_summaries_steps=None)
            self.assertIsNotNone(loss0)
            self.assertGreater(loss0, .5)

        # Second, train the model with equivalently larger learning rate.
        with ops.Graph().as_default():
            random_seed.set_random_seed(0)
            train_op = self.create_train_op(learning_rate=learning_rate,
                                            gradient_multiplier=multipliers[1])

            loss1 = training.train(train_op,
                                   None,
                                   hooks=[
                                       basic_session_run_hooks.StopAtStepHook(
                                           num_steps=number_of_steps),
                                   ],
                                   save_checkpoint_secs=None,
                                   save_summaries_steps=None)
            self.assertIsNotNone(loss1)
            self.assertLess(loss1, .5)

        # The loss of the model trained with larger learning rate should
        # be smaller.
        self.assertGreater(loss0, loss1)
Beispiel #9
0
def gan_train(
    train_ops,
    logdir,
    get_hooks_fn=get_sequential_train_hooks(),
    master='',
    is_chief=True,
    scaffold=None,
    hooks=None,
    chief_only_hooks=None,
    save_checkpoint_secs=600,
    save_summaries_steps=100,
    config=None):
  """A wrapper around `contrib.training.train` that uses GAN hooks.

  Args:
    train_ops: A GANTrainOps named tuple.
    logdir: The directory where the graph and checkpoints are saved.
    get_hooks_fn: A function that takes a GANTrainOps tuple and returns a list
      of hooks.
    master: The URL of the master.
    is_chief: Specifies whether or not the training is being run by the primary
      replica during replica training.
    scaffold: An tf.train.Scaffold instance.
    hooks: List of `tf.train.SessionRunHook` callbacks which are run inside the
      training loop.
    chief_only_hooks: List of `tf.train.SessionRunHook` instances which are run
      inside the training loop for the chief trainer only.
    save_checkpoint_secs: The frequency, in seconds, that a checkpoint is saved
      using a default checkpoint saver. If `save_checkpoint_secs` is set to
      `None`, then the default checkpoint saver isn't used.
    save_summaries_steps: The frequency, in number of global steps, that the
      summaries are written to disk using a default summary saver. If
      `save_summaries_steps` is set to `None`, then the default summary saver
      isn't used.
    config: An instance of `tf.ConfigProto`.

  Returns:
    Output of the call to `training.train`.
  """
  new_hooks = get_hooks_fn(train_ops)
  if hooks is not None:
    hooks = list(hooks) + list(new_hooks)
  else:
    hooks = new_hooks
  return training.train(
      train_ops.global_step_inc_op,
      logdir,
      master=master,
      is_chief=is_chief,
      scaffold=scaffold,
      hooks=hooks,
      chief_only_hooks=chief_only_hooks,
      save_checkpoint_secs=save_checkpoint_secs,
      save_summaries_steps=save_summaries_steps,
      config=config)
Beispiel #10
0
def gan_train(
    train_ops,
    logdir,
    get_hooks_fn=get_sequential_train_hooks(),
    master='',
    is_chief=True,
    scaffold=None,
    hooks=None,
    chief_only_hooks=None,
    save_checkpoint_secs=600,
    save_summaries_steps=100,
    config=None):
  """A wrapper around `contrib.training.train` that uses GAN hooks.

  Args:
    train_ops: A GANTrainOps named tuple.
    logdir: The directory where the graph and checkpoints are saved.
    get_hooks_fn: A function that takes a GANTrainOps tuple and returns a list
      of hooks.
    master: The URL of the master.
    is_chief: Specifies whether or not the training is being run by the primary
      replica during replica training.
    scaffold: An tf.train.Scaffold instance.
    hooks: List of `tf.train.SessionRunHook` callbacks which are run inside the
      training loop.
    chief_only_hooks: List of `tf.train.SessionRunHook` instances which are run
      inside the training loop for the chief trainer only.
    save_checkpoint_secs: The frequency, in seconds, that a checkpoint is saved
      using a default checkpoint saver. If `save_checkpoint_secs` is set to
      `None`, then the default checkpoint saver isn't used.
    save_summaries_steps: The frequency, in number of global steps, that the
      summaries are written to disk using a default summary saver. If
      `save_summaries_steps` is set to `None`, then the default summary saver
      isn't used.
    config: An instance of `tf.ConfigProto`.

  Returns:
    Output of the call to `training.train`.
  """
  new_hooks = get_hooks_fn(train_ops)
  if hooks is not None:
    hooks = list(hooks) + list(new_hooks)
  else:
    hooks = new_hooks
  return training.train(
      train_ops.global_step_inc_op,
      logdir,
      master=master,
      is_chief=is_chief,
      scaffold=scaffold,
      hooks=hooks,
      chief_only_hooks=chief_only_hooks,
      save_checkpoint_secs=save_checkpoint_secs,
      save_summaries_steps=save_summaries_steps,
      config=config)
Beispiel #11
0
  def testTrainWithNoInitAssignCanAchieveZeroLoss(self):
    g = ops.Graph()
    with g.as_default():
      random_seed.set_random_seed(0)
      tf_inputs = constant_op.constant(self._inputs, dtype=dtypes.float32)
      tf_labels = constant_op.constant(self._labels, dtype=dtypes.float32)

      tf_predictions = batchnorm_classifier(tf_inputs)
      loss_ops.log_loss(tf_predictions, tf_labels)
      total_loss = loss_ops.get_total_loss()

      optimizer = gradient_descent.GradientDescentOptimizer(learning_rate=1.0)

      train_op = training.create_train_op(total_loss, optimizer)

      loss = training.train(
          train_op,
          self._logdir,
          hooks=[basic_session_run_hooks.StopAtStepHook(num_steps=300)])
      self.assertLess(loss, .1)
Beispiel #12
0
  def testCanAchieveZeroLoss(self):
    with ops.Graph().as_default():
      random_seed.set_random_seed(0)
      tf_inputs = constant_op.constant(self._inputs, dtype=dtypes.float32)
      tf_labels = constant_op.constant(self._labels, dtype=dtypes.float32)

      tf_predictions = logistic_classifier(tf_inputs)
      losses.log_loss(tf_labels, tf_predictions)
      total_loss = losses.get_total_loss()
      optimizer = gradient_descent.GradientDescentOptimizer(learning_rate=1.0)
      train_op = training.create_train_op(total_loss, optimizer)

      loss = training.train(
          train_op,
          None,
          hooks=[basic_session_run_hooks.StopAtStepHook(num_steps=300)],
          save_summaries_steps=None,
          save_checkpoint_secs=None)
      self.assertIsNotNone(loss)
      self.assertLess(loss, .015)
  def testCanAchieveZeroLoss(self):
    logdir = os.path.join(self.get_temp_dir(), 'can_achieve_zero_loss')

    with ops.Graph().as_default():
      random_seed.set_random_seed(0)
      tf_inputs = constant_op.constant(self._inputs, dtype=dtypes.float32)
      tf_labels = constant_op.constant(self._labels, dtype=dtypes.float32)

      tf_predictions = logistic_classifier(tf_inputs)
      loss_ops.log_loss(tf_predictions, tf_labels)
      total_loss = loss_ops.get_total_loss()

      optimizer = gradient_descent.GradientDescentOptimizer(learning_rate=1.0)

      train_op = training.create_train_op(total_loss, optimizer)

      loss = training.train(
          train_op,
          logdir,
          hooks=[basic_session_run_hooks.StopAtStepHook(num_steps=300)])
      self.assertIsNotNone(loss)
      self.assertLess(loss, .015)
Beispiel #14
0
  def testTrainWithAlteredGradients(self):
    # Use the same learning rate but different gradient multipliers
    # to train two models. Model with equivalently larger learning
    # rate (i.e., learning_rate * gradient_multiplier) has smaller
    # training loss.
    logdir1 = os.path.join(self.get_temp_dir(), 'tmp_logs6/')
    logdir2 = os.path.join(self.get_temp_dir(), 'tmp_logs7/')

    if gfile.Exists(logdir1):
      gfile.DeleteRecursively(logdir1)
    if gfile.Exists(logdir2):
      gfile.DeleteRecursively(logdir2)

    multipliers = [1., 1000.]
    number_of_steps = 10
    losses = []
    learning_rate = 0.001

    # First, train the model with equivalently smaller learning rate.
    with ops.Graph().as_default():
      random_seed.set_random_seed(0)
      train_op = self.create_train_op(
          learning_rate=learning_rate, gradient_multiplier=multipliers[0])

      saver = saver_lib.Saver()

      loss = training.train(
          train_op,
          logdir1,
          hooks=[
              basic_session_run_hooks.StopAtStepHook(num_steps=number_of_steps),
              basic_session_run_hooks.CheckpointSaverHook(
                  logdir1, save_steps=50, saver=saver),
          ])

      losses.append(loss)
      self.assertGreater(loss, .5)

    # Second, train the model with equivalently larger learning rate.
    with ops.Graph().as_default():
      random_seed.set_random_seed(0)
      train_op = self.create_train_op(
          learning_rate=learning_rate, gradient_multiplier=multipliers[1])
      saver = saver_lib.Saver()

      loss = training.train(
          train_op,
          logdir2,
          hooks=[
              basic_session_run_hooks.StopAtStepHook(num_steps=number_of_steps),
              basic_session_run_hooks.CheckpointSaverHook(
                  logdir2, save_steps=50, saver=saver),
          ])

      losses.append(loss)
      self.assertIsNotNone(loss)
      self.assertLess(loss, .5)

    # The loss of the model trained with larger learning rate should
    # be smaller.
    self.assertGreater(losses[0], losses[1])
Beispiel #15
0
  def testTrainAllVarsHasLowerLossThanTrainSubsetOfVars(self):
    logdir = os.path.join(self.get_temp_dir(), 'tmp_logs3/')
    if gfile.Exists(logdir):  # For running on jenkins.
      gfile.DeleteRecursively(logdir)

    # First, train only the weights of the model.
    with ops.Graph().as_default():
      random_seed.set_random_seed(0)
      total_loss = self.ModelLoss()
      optimizer = gradient_descent.GradientDescentOptimizer(learning_rate=1.0)
      weights = variables_lib.get_variables_by_name('weights')

      train_op = training.create_train_op(
          total_loss, optimizer, variables_to_train=weights)

      saver = saver_lib.Saver()
      loss = training.train(
          train_op,
          logdir,
          hooks=[
              basic_session_run_hooks.CheckpointSaverHook(
                  logdir, save_steps=1, saver=saver),
              basic_session_run_hooks.StopAtStepHook(num_steps=200),
          ])
      self.assertGreater(loss, .015)
      self.assertLess(loss, .05)

    # Next, train the biases of the model.
    with ops.Graph().as_default():
      random_seed.set_random_seed(1)
      total_loss = self.ModelLoss()
      optimizer = gradient_descent.GradientDescentOptimizer(learning_rate=1.0)
      biases = variables_lib.get_variables_by_name('biases')

      train_op = training.create_train_op(
          total_loss, optimizer, variables_to_train=biases)

      saver = saver_lib.Saver()
      loss = training.train(
          train_op,
          logdir,
          hooks=[
              basic_session_run_hooks.CheckpointSaverHook(
                  logdir, save_steps=1, saver=saver),
              basic_session_run_hooks.StopAtStepHook(num_steps=300),
          ])
      self.assertGreater(loss, .015)
      self.assertLess(loss, .05)

    # Finally, train both weights and bias to get lower loss.
    with ops.Graph().as_default():
      random_seed.set_random_seed(2)
      total_loss = self.ModelLoss()
      optimizer = gradient_descent.GradientDescentOptimizer(learning_rate=1.0)

      train_op = training.create_train_op(total_loss, optimizer)
      saver = saver_lib.Saver()
      loss = training.train(
          train_op,
          logdir,
          hooks=[
              basic_session_run_hooks.CheckpointSaverHook(
                  logdir, save_steps=1, saver=saver),
              basic_session_run_hooks.StopAtStepHook(num_steps=400),
          ])
      self.assertIsNotNone(loss)
      self.assertLess(loss, .015)
Beispiel #16
0
  def testTrainWithInitFromCheckpoint(self):
    logdir1 = os.path.join(self.get_temp_dir(), 'tmp_logs1/')
    logdir2 = os.path.join(self.get_temp_dir(), 'tmp_logs2/')

    if gfile.Exists(logdir1):  # For running on jenkins.
      gfile.DeleteRecursively(logdir1)
    if gfile.Exists(logdir2):  # For running on jenkins.
      gfile.DeleteRecursively(logdir2)

    # First, train the model one step (make sure the error is high).
    with ops.Graph().as_default():
      random_seed.set_random_seed(0)
      train_op = self.create_train_op()
      saver = saver_lib.Saver()
      loss = training.train(
          train_op,
          logdir1,
          hooks=[
              basic_session_run_hooks.CheckpointSaverHook(
                  logdir1, save_steps=1, saver=saver),
              basic_session_run_hooks.StopAtStepHook(num_steps=1),
          ],
          save_checkpoint_secs=None)
      self.assertGreater(loss, .5)

    # Next, train the model to convergence.
    with ops.Graph().as_default():
      random_seed.set_random_seed(1)
      train_op = self.create_train_op()
      saver = saver_lib.Saver()
      loss = training.train(
          train_op,
          logdir1,
          hooks=[
              basic_session_run_hooks.CheckpointSaverHook(
                  logdir1, save_steps=1, saver=saver),
              basic_session_run_hooks.StopAtStepHook(num_steps=300),
          ],
          save_checkpoint_secs=None)
      self.assertIsNotNone(loss)
      self.assertLess(loss, .02)

    # Finally, advance the model a single step and validate that the loss is
    # still low.
    with ops.Graph().as_default():
      random_seed.set_random_seed(2)
      train_op = self.create_train_op()

      model_variables = variables_lib2.global_variables()
      model_path = os.path.join(logdir1, 'model.ckpt-300')

      assign_fn = variables_lib.assign_from_checkpoint_fn(model_path,
                                                          model_variables)

      def init_fn(_, session):
        assign_fn(session)

      loss = training.train(
          train_op,
          logdir2,
          scaffold=monitored_session.Scaffold(init_fn=init_fn),
          hooks=[basic_session_run_hooks.StopAtStepHook(num_steps=1)])

      self.assertIsNotNone(loss)
      self.assertLess(loss, .02)
Beispiel #17
0
def main(_):
    utils.create_folder(folder_list=[FLAGS.dataset_dir, FLAGS.train_log_dir])

    # Force all input processing onto CPU in order to reserve the GPU for
    # the forward inference and back-propagation.
    with tf.name_scope('inputs'):
        with tf.device('/cpu:0'):
            input = ginput.Input()
            input_a, input_b = input.get_batch(FLAGS.batch_size,
                                               FLAGS.img_size, FLAGS.channels)
            labels = tf.cast(tf.random_uniform([FLAGS.batch_size, 1],
                                               minval=0,
                                               maxval=2,
                                               dtype=tf.int32),
                             dtype=tf.float32)

    # Define the Model
    model = gmodel.Model(model_type=FLAGS.model_type,
                         is_train=True,
                         grid_size=FLAGS.grid_size)
    model.build(img_a=input_a, img_b=input_b, labels=labels)

    # Define Loss
    loss_fn = gloss.Loss(model=model,
                         weight_decay_coeff=FLAGS.weight_decay_coeff)

    # Get global step and the respective operation
    global_step_inc = utils.create_global_step()

    # Create training operation
    train_op = gtrain_op.TrainOp(model=model,
                                 loss_fn=loss_fn,
                                 learning_rate=FLAGS.learning_rate,
                                 decay_steps=FLAGS.decay_steps,
                                 beta=FLAGS.beta,
                                 staircase=FLAGS.staircase)

    op = train_op.create_train_op()

    status_message = tf.string_join([
        'Starting train step: ',
        tf.as_string(tf.train.get_or_create_global_step())
    ],
                                    name='status_message')

    hooks = [
        tf.train.LoggingTensorHook([status_message], every_n_iter=10),
        tf.train.StopAtStepHook(num_steps=FLAGS.max_number_of_steps),
        utils.RunTrainOpsHook(train_ops=op)
    ]

    training.train(train_op=global_step_inc,
                   logdir=FLAGS.train_log_dir,
                   master='',
                   is_chief=True,
                   scaffold=None,
                   hooks=hooks,
                   chief_only_hooks=None,
                   save_checkpoint_secs=FLAGS.save_checkpoint_secs,
                   save_summaries_steps=FLAGS.save_summaries_steps,
                   config=None,
                   max_wait_secs=7200)