Exemple #1
0
    def _setup_graph_training(self, accuracy, loss, optimizer):
        data_types, data_shapes = self.data['train'].output_types, \
                                  self.data['train'].output_shapes
        self._iterator = tf.data.Iterator.from_structure(
            data_types, data_shapes)
        self._inits = {
            'train': self._iterator.make_initializer(self.data['train']),
            'val': self._iterator.make_initializer(self.data['val']),
            'test': self._iterator.make_initializer(self.data['test'])
        }
        features, labels = self._iterator.get_next()
        training = tf.placeholder(tf.bool, name='training')

        self.logits = self.model(features, training=training)
        self.accuracy = accuracy(labels, self.logits)
        self.loss = loss(labels, self.logits)
        with self.summary.as_default(), summary.always_record_summaries():
            summary.scalar('train_loss', self.loss)
            summary.scalar('train_accuracy', self.accuracy)
        with self.test_summary.as_default(), summary.always_record_summaries():
            summary.scalar('val_loss',
                           tf.placeholder(tf.float32, name='avg_loss'))
            summary.scalar('val_accuracy',
                           tf.placeholder(tf.float32, name='avg_accuracy'))
        # Needed for BatchNorm to work
        extra_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        with tf.control_dependencies(extra_update_ops):
            self.optimizer = optimizer.minimize(self.loss,
                                                global_step=self.step_counter)
Exemple #2
0
    def train(self, num_epochs=1, verbose=True):
        """Train the model, and log training statistics.

        Parameters
        ----------
        num_epochs : int
            Number of epochs to train for. Defaults to 1.
        """
        with self.test_summary.as_default(), summary.always_record_summaries():
            tf.contrib.summary.initialize()
            val_loss, val_accuracy = self.evaluate('val')
            if verbose:
                print('\nInitial validation loss, accuracy: %f, %f' %
                      (val_loss, val_accuracy))

        for idx in range(num_epochs):
            with self.summary.as_default(), summary.always_record_summaries():
                start = time.time()
                if tf.executing_eagerly():
                    self._train_eager_one_epoch()
                else:
                    self._train_graph_one_epoch()
                end = time.time()
            with self.test_summary.as_default(
            ), summary.always_record_summaries():
                val_loss, val_accuracy = self.evaluate('val')
            if verbose:
                print('\nTrain time for epoch #%d: %f' %
                      (idx + 1, end - start))
                print('\nValidation loss, accuracy for epoch #%d: %f, %f' %
                      (idx + 1, val_loss, val_accuracy))
Exemple #3
0
        def host_call_fn(gs, loss, lr, ce):
            """Training host call. Creates scalar summaries for training metrics.

      This function is executed on the CPU and should not directly reference
      any Tensors in the rest of the `model_fn`. To pass Tensors from the model
      to the `metric_fn`, provide as part of the `host_call`. See
      https://www.tensorflow.org/api_docs/python/tf/contrib/tpu/TPUEstimatorSpec
      for more information.

      Arguments should match the list of `Tensor` objects passed as the second
      element in the tuple passed to `host_call`.

      Args:
        gs: `Tensor with shape `[batch]` for the global_step
        loss: `Tensor` with shape `[batch]` for the training loss.
        lr: `Tensor` with shape `[batch]` for the learning_rate.
        ce: `Tensor` with shape `[batch]` for the current_epoch.

      Returns:
        List of summary ops to run on the CPU host.
      """
            gs = gs[0]
            with summary.create_file_writer(FLAGS.model_dir).as_default():
                with summary.always_record_summaries():
                    summary.scalar('loss', tf.reduce_mean(loss), step=gs)
                    summary.scalar('learning_rate',
                                   tf.reduce_mean(lr),
                                   step=gs)
                    summary.scalar('current_epoch',
                                   tf.reduce_mean(ce),
                                   step=gs)

                    return summary.all_summary_ops()
Exemple #4
0
 def host_call_fn(*tensors):
     """Training host call. Creates scalar summaries for
     training metrics. This function is executed on the CPU and
     should not directly reference any Tensors in the rest of
     the `model_fn`. To pass Tensors from the  model to
     the `metric_fn`, provide as part of the `host_call`. See
     https://www.tensorflow.org/api_docs/python/tf/contrib/tpu/TPUEstimatorSpec
     for more information. Arguments should match the list of
     `Tensor` objects passed as the second element in the tuple
      passed to `host_call`.
     """
     gs = tensors[0][0]
     # Host call fns are executed params['iterations_per_loop']
     # times after one TPU loop is finished, setting max_queue
     # value to the same as number of iterations will make the
     # summary writer only flush the data to storage once per
     # loop.
     summary_writer = summary.create_file_writer(
         config.checkpoint_dir,
         max_queue=config.get('iterations_per_loop', 1000))
     with summary_writer.as_default():
         with summary.always_record_summaries():
             for idx in range(len(tensors_to_print)):
                 summary.scalar(tensors_to_print_names[idx],
                                tensors[idx][0],
                                step=gs)
             return summary.all_summary_ops()
      def host_call_fn(gs, loss, lr, ce):
        """Training host call. Creates scalar summaries for training metrics.

        This function is executed on the CPU and should not directly reference
        any Tensors in the rest of the `model_fn`. To pass Tensors from the
        model to the `metric_fn`, provide as part of the `host_call`. See
        https://www.tensorflow.org/api_docs/python/tf/contrib/tpu/TPUEstimatorSpec
        for more information.

        Arguments should match the list of `Tensor` objects passed as the second
        element in the tuple passed to `host_call`.

        Args:
          gs: `Tensor with shape `[batch]` for the global_step
          loss: `Tensor` with shape `[batch]` for the training loss.
          lr: `Tensor` with shape `[batch]` for the learning_rate.
          ce: `Tensor` with shape `[batch]` for the current_epoch.

        Returns:
          List of summary ops to run on the CPU host.
        """
        gs = gs[0]
        with summary.create_file_writer(FLAGS.model_dir).as_default():
          with summary.always_record_summaries():
            summary.scalar('loss', loss[0], step=gs)
            summary.scalar('learning_rate', lr[0], step=gs)
            summary.scalar('current_epoch', ce[0], step=gs)

            return summary.all_summary_ops()
Exemple #6
0
    def host_call_fn(global_step, *args):
        """Training host call. Creates scalar summaries for training metrics.

    This function is executed on the CPU and should not directly reference
    any Tensors in the rest of the `model_fn`. To pass Tensors from the
    model to the `metric_fn`, provide as part of the `host_call`. See
    https://www.tensorflow.org/api_docs/python/tf/contrib/tpu/TPUEstimatorSpec
    for more information.

    Arguments should match the list of `Tensor` objects passed as the second
    element in the tuple passed to `host_call`.

    Args:
      global_step: `Tensor with shape `[batch]` for the global_step
      *args: Remaining tensors to log.

    Returns:
      List of summary ops to run on the CPU host.
    """
        step = global_step[0]
        with contrib_summary.create_file_writer(
                logdir=model_dir, filename_suffix=".host_call").as_default():
            with contrib_summary.always_record_summaries():
                for i, name in enumerate(metric_names):
                    contrib_summary.scalar(prefix + name,
                                           args[i][0],
                                           step=step)

                return contrib_summary.all_summary_ops()
Exemple #7
0
 def host_call_fn(gs, loss, lr, ce):
     """Training host call. Creates scalar summaries for training metrics.
     This function is executed on the CPU and should not directly reference
     any Tensors in the rest of the `model_fn`. To pass Tensors from the
     model to the `metric_fn`, provide as part of the `host_call`. See
     https://www.tensorflow.org/api_docs/python/tf/contrib/tpu/TPUEstimatorSpec
     for more information.
     Arguments should match the list of `Tensor` objects passed as the second
     element in the tuple passed to `host_call`.
     Args:
     gs: `Tensor with shape `[batch]` for the global_step
     loss: `Tensor` with shape `[batch]` for the training loss.
     lr: `Tensor` with shape `[batch]` for the learning_rate.
     ce: `Tensor` with shape `[batch]` for the current_epoch.
     Returns:
     List of summary ops to run on the CPU host.
     """
     gs = gs[0]
     # Host call fns are executed FLAGS.iterations_per_loop times after one
     # TPU loop is finished, setting max_queue value to the same as number of
     # iterations will make the summary writer only flush the data to storage
     # once per loop.
     with summary.create_file_writer(
             model_dir, max_queue=iterations_per_loop).as_default():
         with summary.always_record_summaries():
             summary.scalar('loss', loss[0], step=gs)
             summary.scalar('learning_rate', lr[0], step=gs)
             summary.scalar('current_epoch', ce[0], step=gs)
             return summary.all_summary_ops()
Exemple #8
0
 def host_call_fn(gs, g_loss, d_loss, real_audio, generated_audio):
   """Training host call. Creates scalar summaries for training metrics.
   This function is executed on the CPU and should not directly reference
   any Tensors in the rest of the `model_fn`. To pass Tensors from the
   model to the `metric_fn`, provide as part of the `host_call`. See
   https://www.tensorflow.org/api_docs/python/tf/contrib/tpu/TPUEstimatorSpec
   for more information.
   Arguments should match the list of `Tensor` objects passed as the second
   element in the tuple passed to `host_call`.
   Args:
     gs: `Tensor with shape `[batch]` for the global_step
     g_loss: `Tensor` with shape `[batch]` for the generator loss.
     d_loss: `Tensor` with shape `[batch]` for the discriminator loss.
     real_audio: `Tensor` with shape `[batch, 8192, 1]`
     generated_audio: `Tensor` with shape `[batch, 8192, 1]`
   Returns:
     List of summary ops to run on the CPU host.
   """
   gs = gs[0]
   with summary.create_file_writer(FLAGS.model_dir).as_default():
       with summary.always_record_summaries():
           summary.scalar('g_loss', g_loss, step=gs)
           summary.scalar('d_loss', d_loss, step=gs)
           summary.audio('real_audio', real_audio, sample_rate=_FS, max_outputs=10, step=gs)
           summary.audio('generated_audio', generated_audio, sample_rate=_FS, max_outputs=10, step=gs)
   return summary.all_summary_ops()
def _host_call_fn(gs, loss, lr):
  """Training host call.

  Creates scalar summaries for training metrics.

  This function is executed on the CPU and should not directly reference
  any Tensors in the rest of the `model_fn`. To pass Tensors from the
  model to the `metric_fn`, provide as part of the `host_call`. See
  https://www.tensorflow.org/api_docs/python/tf/contrib/tpu/TPUEstimatorSpec
  for more information.

  Arguments should match the list of `Tensor` objects passed as the second
  element in the tuple passed to `host_call`.

  Args:
    gs: `Tensor with shape `[batch]` for the global_step
    loss: `Tensor` with shape `[batch]` for the training loss.
    lr: `Tensor` with shape `[batch]` for the learning_rate.

  Returns:
    List of summary ops to run on the CPU host.
  """
  # Host call fns are executed FLAGS.iterations_per_loop times after one
  # TPU loop is finished, setting max_queue value to the same as number of
  # iterations will make the summary writer only flush the data to storage
  # once per loop.
  gs = gs[0]
  with summary.create_file_writer(
      FLAGS.model_dir, max_queue=FLAGS.iterations_per_loop).as_default():
    with summary.always_record_summaries():
      summary.scalar("loss", loss[0], step=gs)
      summary.scalar("learning_rate", lr[0], step=gs)
      return summary.all_summary_ops()
Exemple #10
0
            def host_call_fn(gs, loss, acc, ce):  #lr, ce):
                '''Training host call. Creates scalar summaries for training
                    metrics.
                    This funciton is executed on the CPU. As in, after 
                    :iterations_per_loop computation in TPU, control moves to
                    the CPU where the summaries are updated.
                    Arguments should match the list of 'Tensor' objects passed as
                    the second element in the tuple passed to 'host_call'.
                Args:
                    gs: Tensor with shape [batch] for global step
                    loss: Tensor with shape [batch] for the training loss
                    lr: Tensor with shape [batch] for the learning rate
                    ce: Tensor with shape [batch] for the current epoch

                Returns:
                    List of summary ops to run on the CPU host.
                '''
                gs = gs[0]
                # Host call fns are executed FLAGS.iterations_per_loop times after
                # one TPU loop is finished, setting max_queue value to the same as
                # number of iterations will make the summary writer only flush the
                # data to storage once per loop.
                with summary.create_file_writer(
                        FLAGS.model_dir,
                        max_queue=FLAGS.iterations_per_loop).as_default():

                    with summary.always_record_summaries():
                        summary.scalar('loss', loss[0], step=gs)
                        summary.scalar('top_1', acc[0], step=gs)
                        #summary.scalar('top_5', t5_acc[0], step=gs)
                        #summary.scalar('learning_rate', lr[0], step=gs)
                        summary.scalar('current_epoch', ce[0], step=gs)

                        return summary.all_summary_ops()
Exemple #11
0
def _write_aggregate_summaries(model_dir, global_step, eval_tag,
                               aggregates_dict):
  """Writes text metrics as summaries."""

  eval_dir = os.path.join(model_dir, eval_tag)
  summary_writer = contrib_summary.create_file_writer(eval_dir)
  with summary_writer.as_default(), \
       contrib_summary.always_record_summaries():
    for k, v in sorted(aggregates_dict[_ROUGE_METRIC].items()):
      contrib_summary.scalar(
        "text_eval/%s-R" % k, v.mid.recall, step=global_step)
      contrib_summary.scalar(
        "text_eval/%s-P" % k, v.mid.precision, step=global_step)
      contrib_summary.scalar(
        "text_eval/%s-F" % k, v.mid.fmeasure, step=global_step)
    for k, v in sorted(aggregates_dict[_BLEU_METRIC].items()):
      contrib_summary.scalar("text_eval/%s" % k, v.mid.bleu, step=global_step)
    for k, v in sorted(aggregates_dict[_REPETITION_METRIC].items()):
      contrib_summary.scalar(
        "text_eval/%s-T" % k, v.mid.target_ratio, step=global_step)
      contrib_summary.scalar(
        "text_eval/%s-P" % k, v.mid.prediction_ratio, step=global_step)
    for k, v in sorted(aggregates_dict[_LENGTH_METRIC].items()):
      contrib_summary.scalar(
        "text_eval/%s-T" % k, v.mid.target_length, step=global_step)
      contrib_summary.scalar(
        "text_eval/%s-P" % k, v.mid.prediction_length, step=global_step)
      contrib_summary.scalar(
        "text_eval/%s-R" % k, v.mid.relative_length, step=global_step)
Exemple #12
0
    def _host_call(
        concat_activations: tf.Tensor,
        concat_sequence_lengths: tf.Tensor,
    ) -> List[tf.Operation]:
      """Stores the activations and sequence lengths into a summary.

      TPUEstimator will concat the activations and sequence lengths from the
      minibatches on each core along axis=0 and pass them to this host call.
      This host call writes them to a file using the TF summary APIs.

      Args:
        concat_activations: The activations for the global batch. 2D
          Tensor(type=float32, shape=[batch_size, max_sequence_length]).
        concat_sequence_lengths:  The sequence lengths for the global batch. 2D
          Tensor(type=int64, shape=[batch_size, max_sequence_length]).

      Returns:
        A list of summary ops for TPUEstimator to run on the host.
      """
      with contrib_summary.create_file_writer(self._summary_dir).as_default():
        with contrib_summary.always_record_summaries():
          contrib_summary.generic(
              self._SUMMARY_ACTIVATIONS,
              concat_activations,
          )
          contrib_summary.generic(self._SUMMARY_SEQUENCE_LENGTHS,
                                  concat_sequence_lengths)
          return contrib_summary.all_summary_ops()
Exemple #13
0
            def host_call_fn(gs, loss, lr, ce, bi_list, bo_list, big_list,
                             bog_list):
                """Training host call. Creates scalar summaries for training metrics.

        This function is executed on the CPU and should not directly reference
        any Tensors in the rest of the `model_fn`. To pass Tensors from the
        model to the `metric_fn`, provide as part of the `host_call`. See
        https://www.tensorflow.org/api_docs/python/tf/contrib/tpu/TPUEstimatorSpec
        for more information.

        Arguments should match the list of `Tensor` objects passed as the second
        element in the tuple passed to `host_call`.

        Args:
          gs: `Tensor with shape `[batch]` for the global_step
          loss: `Tensor` with shape `[batch]` for the training loss.
          lr: `Tensor` with shape `[batch]` for the learning_rate.
          ce: `Tensor` with shape `[batch]` for the current_epoch.

        Returns:
          List of summary ops to run on the CPU host.
        """
                gs = gs[0]
                # Host call fns are executed params['iterations_per_loop'] times after
                # one TPU loop is finished, setting max_queue value to the same as
                # number of iterations will make the summary writer only flush the data
                # to storage once per loop.
                with summary.create_file_writer(
                        FLAGS.model_dir,
                        max_queue=params['iterations_per_loop']).as_default():
                    with summary.always_record_summaries():
                        summary.scalar('loss', loss[0], step=gs)
                        summary.scalar('learning_rate', lr[0], step=gs)
                        summary.scalar('current_epoch', ce[0], step=gs)

                # TODO record distribution every 1251 steps (steps per epoch)
                with summary.record_summaries_every_n_global_steps(
                        FLAGS.steps_per_eval):
                    index = 0
                    for activ in bi_list:
                        normal_histogram(activ, 'bn-input-' + str(index))
                        log_histogram(activ, 'bn-input-' + str(index))
                        index = index + 1
                    index = 0
                    for activ in bo_list:
                        normal_histogram(activ, 'bn-output-' + str(index))
                        log_histogram(activ, 'bn-output-' + str(index))
                        index = index + 1
                    index = 0
                    for activ in big_list:
                        normal_histogram(activ, 'bn-input-grad-' + str(index))
                        log_histogram(activ, 'bn-input-grad-' + str(index))
                        index = index + 1
                    index = 0
                    for activ in bog_list:
                        normal_histogram(activ, 'bn-output-grad-' + str(index))
                        log_histogram(activ, 'bn-output-grad-' + str(index))
                        index = index + 1
                return summary.all_summary_ops()
Exemple #14
0
 def host_call_fn(**kwargs):
     writer = contrib_summary.create_file_writer(summary_dir,
                                                 max_queue=1000)
     always_record = contrib_summary.always_record_summaries()
     with writer.as_default(), always_record:
         for name, scalar in kwargs.items():
             contrib_summary.scalar(name, tf.reduce_mean(scalar))
         return contrib_summary.all_summary_ops()
Exemple #15
0
            def host_call_fn(gs, loss, lr, ce):
                gs = gs[0]
                with summary.create_file_writer(FLAGS.model_dir).as_default():
                    with summary.always_record_summaries():
                        summary.scalar('loss', loss[0], step=gs)
                        summary.scalar('learning_rate', lr[0], step=gs)
                        summary.scalar('current_epoch', ce[0], step=gs)

                        return summary.all_summary_ops()
Exemple #16
0
        def host_call_fn(gs, lpl, dcl, ls):
            gs = gs[0]
            with summary.create_file_writer(params['model_dir'], max_queue=params['save_checkpoints_steps']).as_default():
                with summary.always_record_summaries():
                    summary.scalar('label_prediction_loss', lpl[0], step=gs)
                    summary.scalar('domain_classification_loss', dcl[0], step=gs)
                    summary.scalar('loss', ls[0], step=gs)

            return summary.all_summary_ops()
Exemple #17
0
        def host_call_fn(gs, lpl, dcl, ls):
            gs = gs[0]
            with summary.create_file_writer(params['model_dir'], max_queue=params['save_checkpoints_steps']).as_default():
                with summary.always_record_summaries():
                    summary.scalar('label_prediction_loss', lpl[0], step=gs)
                    summary.scalar('domain_classification_loss', dcl[0], step=gs)
                    summary.scalar('loss', ls[0], step=gs)

            return summary.all_summary_ops()
Exemple #18
0
    def eval_metrics_host_call_fn(policy_output, value_output, pi_tensor, policy_cost,
                                  value_cost, l2_cost, combined_cost, step,
                                  est_mode=tf.estimator.ModeKeys.TRAIN):
        policy_entropy = -tf.reduce_mean(tf.reduce_sum(
            policy_output * tf.log(policy_output), axis=1))
        # pi_tensor is one_hot when generated from sgfs (for supervised learning)
        # and soft-max when using self-play records. argmax normalizes the two.
        policy_target_top_1 = tf.argmax(pi_tensor, axis=1)
        policy_output_top_1 = tf.argmax(policy_output, axis=1)

        policy_output_in_top1 = tf.to_float(
            tf.nn.in_top_k(policy_output, policy_target_top_1, k=1))
        policy_output_in_top3 = tf.to_float(
            tf.nn.in_top_k(policy_output, policy_target_top_1, k=3))

        policy_top_1_confidence = tf.reduce_max(policy_output, axis=1)
        policy_target_top_1_confidence = tf.boolean_mask(
            policy_output,
            tf.one_hot(policy_target_top_1, tf.shape(policy_output)[1]))

        # TODO(sethtroisi): For V10 add tf.variable_scope for tf.metrics.mean's
        with tf.variable_scope("metrics"):
            metric_ops = {
                'policy_cost': tf.metrics.mean(policy_cost),
                'value_cost': tf.metrics.mean(value_cost),
                'l2_cost': tf.metrics.mean(l2_cost),
                'policy_entropy': tf.metrics.mean(policy_entropy),
                'combined_cost': tf.metrics.mean(combined_cost),

                'policy_accuracy_top_1': tf.metrics.mean(policy_output_in_top1),
                'policy_accuracy_top_3': tf.metrics.mean(policy_output_in_top3),
                'policy_top_1_confidence': tf.metrics.mean(policy_top_1_confidence),
                'policy_target_top_1_confidence': tf.metrics.mean(
                    policy_target_top_1_confidence),
                'value_confidence': tf.metrics.mean(tf.abs(value_output)),
            }

        if est_mode == tf.estimator.ModeKeys.EVAL:
            return metric_ops

        # Create summary ops so that they show up in SUMMARIES collection
        # That way, they get logged automatically during training
        summary_writer = summary.create_file_writer(FLAGS.model_dir)
        with summary_writer.as_default(), \
                summary.always_record_summaries():
            for metric_name, metric_op in metric_ops.items():
                summary.scalar(metric_name, metric_op[1])

        # Reset metrics occasionally so that they are mean of recent batches.
        reset_op = tf.variables_initializer(tf.local_variables("metrics"))
        cond_reset_op = tf.cond(
            tf.equal(tf.mod(tf.reduce_min(step), FLAGS.summary_steps), tf.to_int64(1)),
            lambda: reset_op,
            lambda: tf.no_op())

        return summary.all_summary_ops() + [cond_reset_op]
Exemple #19
0
 def host_call_fn(gs, lr):
     # Outfeed supports int32 but global_step is expected to be int64.
     gs = tf.cast(tf.reduce_mean(gs), tf.int64)
     with summary.create_file_writer(
             self.model_dir).as_default():
         with summary.always_record_summaries():
             summary.scalar('learning_rate',
                            tf.reduce_mean(lr),
                            step=gs)
             return summary.all_summary_ops()
Exemple #20
0
 def host_call_fn(gs, *summary_tensors):
     gs = gs[0]
     with contrib_summary.create_file_writer(
             FLAGS.workdir).as_default():
         with contrib_summary.always_record_summaries():
             for name, reshaped_tensor in zip(
                     summary_names, summary_tensors):
                 contrib_summary.scalar(
                     name, tf.reduce_mean(reshaped_tensor), step=gs)
             return contrib_summary.all_summary_ops()
Exemple #21
0
def main(_):
  """Run td3/ddpg evaluation."""
  contrib_eager_python_tfe.enable_eager_execution()

  if FLAGS.use_gpu:
    tf.device('/device:GPU:0').__enter__()

  tf.gfile.MakeDirs(FLAGS.log_dir)
  summary_writer = contrib_summary.create_file_writer(
      FLAGS.log_dir, flush_millis=10000)

  env = gym.make(FLAGS.env)
  if FLAGS.wrap_for_absorbing:
    env = lfd_envs.AbsorbingWrapper(env)

  obs_shape = env.observation_space.shape
  act_shape = env.action_space.shape

  with tf.variable_scope('actor'):
    actor = Actor(obs_shape[0], act_shape[0])

  random_reward, _ = do_rollout(
      env, actor, None, num_trajectories=10, sample_random=True)

  reward_scale = contrib_eager_python_tfe.Variable(1, name='reward_scale')
  saver = contrib_eager_python_tfe.Saver(actor.variables + [reward_scale])

  last_checkpoint = tf.train.latest_checkpoint(FLAGS.load_dir)
  with summary_writer.as_default():
    while True:
      last_checkpoint = wait_for_next_checkpoint(FLAGS.load_dir,
                                                 last_checkpoint)

      total_numsteps = int(last_checkpoint.split('-')[-1])

      saver.restore(last_checkpoint)

      average_reward, average_length = do_rollout(
          env, actor, None, noise_scale=0.0, num_trajectories=FLAGS.num_trials)

      logging.info(
          'Evaluation: average episode length %d, average episode reward %f',
          average_length, average_reward)

      print('Evaluation: average episode length {}, average episode reward {}'.
            format(average_length, average_reward))

      with contrib_summary.always_record_summaries():
        if reward_scale.numpy() != 1.0:
          contrib_summary.scalar(
              'reward/scaled', (average_reward - random_reward) /
              (reward_scale.numpy() - random_reward),
              step=total_numsteps)
        contrib_summary.scalar('reward', average_reward, step=total_numsteps)
        contrib_summary.scalar('length', average_length, step=total_numsteps)
Exemple #22
0
 def host_call_fn(global_step, *tensors):
     """Training host call."""
     global_step = global_step[0]
     with contrib_summary.create_file_writer(summary_dir +
                                             '/metrics').as_default():
         with contrib_summary.always_record_summaries():
             for i, tensor in enumerate(tensors):
                 contrib_summary.scalar(names[i],
                                        tensor[0],
                                        step=global_step)
             return contrib_summary.all_summary_ops()
Exemple #23
0
 def host_call_fn(gs,
                  loss,
                  lr,
                  mix=None,
                  gt_sources=None,
                  est_sources=None):
     """Training host call. Creates scalar summaries for training metrics.
         This function is executed on the CPU and should not directly reference
         any Tensors in the rest of the `model_fn`. To pass Tensors from the
         model to the `metric_fn`, provide as part of the `host_call`. See
         https://www.tensorflow.org/api_docs/python/tf/contrib/tpu/TPUEstimatorSpec
         for more information.
         Arguments should match the list of `Tensor` objects passed as the second
         element in the tuple passed to `host_call`.
         Args:
           gs: `Tensor with shape `[batch]` for the global_step
           loss: `Tensor` with shape `[batch]` for the training loss.
           lr: `Tensor` with shape `[batch]` for the learning_rate.
           input: `Tensor` with shape `[batch, mix_samples, 1]`
           gt_sources: `Tensor` with shape `[batch, sources_n, output_samples, 1]`
           est_sources: `Tensor` with shape `[batch, sources_n, output_samples, 1]`
         Returns:
           List of summary ops to run on the CPU host.
         """
     gs = gs[0]
     with summary.create_file_writer(
             model_config["model_base_dir"] + os.path.sep +
             str(model_config["experiment_id"])).as_default():
         with summary.always_record_summaries():
             summary.scalar('loss', loss[0], step=gs)
             summary.scalar('learning_rate', lr[0], step=gs)
         if gs % 10000 == 0:
             with summary.record_summaries_every_n_global_steps(
                     model_config["audio_summaries_every_n_steps"]):
                 summary.audio('mix',
                               mix,
                               model_config['expected_sr'],
                               max_outputs=model_config["num_sources"])
                 for source_id in range(gt_sources.shape[1].value):
                     summary.audio('gt_sources_{source_id}'.format(
                         source_id=source_id),
                                   gt_sources[:, source_id, :, :],
                                   model_config['expected_sr'],
                                   max_outputs=model_config["num_sources"])
                     summary.audio('est_sources_{source_id}'.format(
                         source_id=source_id),
                                   est_sources[:, source_id, :, :],
                                   model_config['expected_sr'],
                                   max_outputs=model_config["num_sources"])
     return summary.all_summary_ops()
Exemple #24
0
            def host_call_fn(gs, scalar_values):
                """Returns summary."""
                gs = gs[0]
                values = tf.unstack(scalar_values)

                with summary.create_file_writer(
                        FLAGS.model_dir,
                        max_queue=FLAGS.iterations_per_loop).as_default():
                    with summary.always_record_summaries():
                        for key, value in zip(tensorboard_scalars.keys(),
                                              values):
                            tf.contrib.summary.scalar(key, value, step=gs)

                        return summary.all_summary_ops()
Exemple #25
0
def record_stats(*, stats, summary_writer, step, log_interval, name=None, comm=MPI.COMM_WORLD):
    def log_stats(step, *stat_values):
        if comm.Get_rank() != 0 or step % log_interval != 0:
            return

        for k, v in safe_zip(stats.keys(), stat_values):
            print('k = ', k, ', v = ', v)

    summary_ops = [tf.py_func(log_stats, [step] + list(stats.values()), [])]
    if summary_writer:
        with summary_writer.as_default(), summary.always_record_summaries():
            for key, value in stats.items():
                summary_ops.append(summary.scalar(key, value, step=step))
    return tf.group(*summary_ops, name=name)
Exemple #26
0
        def host_call_fn(*args):
          """Host call function to compute training summaries."""
          scalars = _list_to_dicts(args, scalars_to_summarize.keys())[0]
          for name in scalars:
            scalars[name] = scalars[name][0]

          with contrib_summary.create_file_writer(
              summary_dir, max_queue=1000).as_default():
            with contrib_summary.always_record_summaries():
              for name, value in scalars.items():
                if name not in ["global_step"]:
                  contrib_summary.scalar(
                      name, value, step=scalars["global_step"])

          return contrib_summary.all_summary_ops()
def host_call_fn(model_dir, **kwargs):
    """host_call function used for creating training summaries when using TPU.

  Args:
    model_dir: String indicating the output_dir to save summaries in.
    **kwargs: Set of metric names and tensor values for all desired summaries.

  Returns:
    Summary op to be passed to the host_call arg of the estimator function.
  """
    gs = kwargs.pop('global_step')[0]
    with summary.create_file_writer(model_dir).as_default():
        with summary.always_record_summaries():
            for name, tensor in kwargs.iteritems():
                summary.scalar(name, tensor[0], step=gs)
            return summary.all_summary_ops()
Exemple #28
0
            def host_call_fn(gs, lr):
                """Training host call. Creates scalar summaries for training metrics.

        Args:
          gs: `Tensor with shape `[batch]` for the global_step
          lr: `Tensor` with shape `[batch]` for the learning_rate.

        Returns:
          List of summary ops to run on the CPU host.
        """
                gs = gs[0]
                with summary.create_file_writer(
                        params['model_dir']).as_default():
                    with summary.always_record_summaries():
                        summary.scalar('learning_rate', lr[0], step=gs)
                        return summary.all_summary_ops()
Exemple #29
0
def test(model, dataset):
    """Perform an evaluation of `model` on the examples from `dataset`."""
    from tensorflow.contrib import summary as contrib_summary  # pylint: disable=g-import-not-at-top
    avg_loss = tf.keras.metrics.Mean('loss', dtype=tf.float32)
    accuracy = tf.keras.metrics.Accuracy('accuracy', dtype=tf.float32)

    for (images, labels) in dataset:
        logits = model(images, training=False)
        avg_loss.update_state(loss(logits, labels))
        accuracy.update_state(tf.argmax(logits, axis=1, output_type=tf.int64),
                              tf.cast(labels, tf.int64))
    print('Test set: Average loss: %.4f, Accuracy: %4f%%\n' %
          (avg_loss.result(), 100 * accuracy.result()))
    with contrib_summary.always_record_summaries():
        contrib_summary.scalar('loss', avg_loss.result())
        contrib_summary.scalar('accuracy', accuracy.result())
Exemple #30
0
    def summary(self, inputs, outputs, losses, step):
        super().summary(inputs, outputs, losses, step)

        with summary.always_record_summaries():
            if step.numpy() % 20 == 0:
                summary.image('summary/tube',
                              inputs['rgb'],
                              max_images=1,
                              step=step)
                summary.image('summary/map',
                              inputs['map'],
                              max_images=1,
                              step=step)
                summary.image('summary/output',
                              outputs['map'],
                              max_images=1,
                              step=step)
                summary.scalar('summary/loss', losses['loss'], step=step)
Exemple #31
0
    def _write_tensorboard_images(self, max_images: int = 3) -> None:
        """
        Write sample images to TensorBoard logs
        :param max_images: Maximum number of images per group to write
        """
        # Generate images with the same seed each epoch (to track progress)
        constant_samples = self._constant_samples
        constant_images = self._generator(constant_samples, training=False)

        # Generate images with a random seed each epoch (for variability)
        image_count = constant_samples.shape[0]  # mimic constant image_count
        latent_samples = self._latent_samples(image_count, self._z_dim)
        random_images = self._generator(latent_samples, training=False)

        # Write the images to the logs
        with S.always_record_summaries():
            S.image('constant', constant_images, max_images=max_images)
            S.image('random', random_images, max_images=max_images)
Exemple #32
0
    def _write_tensorboard_losses(d_loss: T.Tensor, d_loss_real: T.Tensor,
                                  d_loss_fake: T.Tensor,
                                  g_loss: T.Tensor) -> None:
        """
        Write losses to TensorBoard logs
        :param d_loss: Discriminator total loss
        :param d_loss_real: Discriminator loss on real images
        :param d_loss_fake: Discriminator loss on fake images
        :param g_loss: Generator loss
        """
        with S.always_record_summaries():
            # First family has total losses
            S.scalar('d_loss', d_loss, family='aggregate')
            S.scalar('g_loss', g_loss, family='aggregate')

            # Second family has a breakdown of discriminator losses
            S.scalar('d_loss_fake', d_loss_fake, family='discriminator')
            S.scalar('d_loss_real', d_loss_real, family='discriminator')
Exemple #33
0
    def summary_fn(G0, G1, G2, R0, R1, R2, L_D0, L_D1, L_D2, L_D0_W, L_D1_W, L_D2_W, L_G0, L_G1, L_G2, L_G,
                     D0_global_step, D1_global_step, D2_global_step, G_global_step):
        with summary.create_file_writer(config.log_dir).as_default():
            with summary.always_record_summaries():
                max_image_outputs = 10

                D0_global_step = tpu_depad(D0_global_step)
                D1_global_step = tpu_depad(D1_global_step)
                D2_global_step = tpu_depad(D2_global_step)
                G_global_step  = tpu_depad(G_global_step)
                L_D0           = tpu_depad(L_D0)
                L_D1           = tpu_depad(L_D1)
                L_D2           = tpu_depad(L_D2)
                L_D0_W         = tpu_depad(L_D0_W)
                L_D1_W         = tpu_depad(L_D1_W)
                L_D2_W         = tpu_depad(L_D2_W)
                L_G0           = tpu_depad(L_G0)
                L_G1           = tpu_depad(L_G1)
                L_G2           = tpu_depad(L_G2)
                L_G            = tpu_depad(L_G)

                summary.image('R0', R0, max_images=max_image_outputs, step=D0_global_step)
                summary.image('R1', R1, max_images=max_image_outputs, step=D1_global_step)
                summary.image('R2', R2, max_images=max_image_outputs, step=D2_global_step)
                summary.image('G0', G0, max_images=max_image_outputs, step=G_global_step)
                summary.image('G1', G1, max_images=max_image_outputs, step=G_global_step)
                summary.image('G2', G2, max_images=max_image_outputs, step=G_global_step)

                with tf.name_scope('losses'):
                    summary.scalar('D0', L_D0, step=D0_global_step)
                    summary.scalar('D1', L_D1, step=D1_global_step)
                    summary.scalar('D2', L_D2, step=D2_global_step)
                    summary.scalar('D0_W', L_D0_W, step=D0_global_step)
                    summary.scalar('D1_W', L_D1_W, step=D1_global_step)
                    summary.scalar('D2_W', L_D2_W, step=D2_global_step)

                    summary.scalar('G0', L_G0, step=G_global_step)
                    summary.scalar('G1', L_G1, step=G_global_step)
                    summary.scalar('G2', L_G2, step=G_global_step)
                    summary.scalar('G', L_G,   step=G_global_step)

                return summary.all_summary_ops()