def host_call_fn(gs, loss, lr, ce):
        """Training host call. Creates scalar summaries for training metrics.

        This function is executed on the CPU and should not directly reference
        any Tensors in the rest of the `model_fn`. To pass Tensors from the
        model to the `metric_fn`, provide as part of the `host_call`. See
        https://www.tensorflow.org/api_docs/python/tf/contrib/tpu/TPUEstimatorSpec
        for more information.

        Arguments should match the list of `Tensor` objects passed as the second
        element in the tuple passed to `host_call`.

        Args:
          gs: `Tensor with shape `[batch]` for the global_step
          loss: `Tensor` with shape `[batch]` for the training loss.
          lr: `Tensor` with shape `[batch]` for the learning_rate.
          ce: `Tensor` with shape `[batch]` for the current_epoch.

        Returns:
          List of summary ops to run on the CPU host.
        """
        gs = gs[0]
        with summary.create_file_writer(FLAGS.model_dir).as_default():
          with summary.always_record_summaries():
            summary.scalar('loss', loss[0], step=gs)
            summary.scalar('learning_rate', lr[0], step=gs)
            summary.scalar('current_epoch', ce[0], step=gs)

            return summary.all_summary_ops()
Exemple #2
0
    def host_call_fn(global_step, *args):
        """Training host call. Creates scalar summaries for training metrics.

    This function is executed on the CPU and should not directly reference
    any Tensors in the rest of the `model_fn`. To pass Tensors from the
    model to the `metric_fn`, provide as part of the `host_call`. See
    https://www.tensorflow.org/api_docs/python/tf/contrib/tpu/TPUEstimatorSpec
    for more information.

    Arguments should match the list of `Tensor` objects passed as the second
    element in the tuple passed to `host_call`.

    Args:
      global_step: `Tensor with shape `[batch]` for the global_step
      *args: Remaining tensors to log.

    Returns:
      List of summary ops to run on the CPU host.
    """
        step = global_step[0]
        with contrib_summary.create_file_writer(
                logdir=model_dir, filename_suffix=".host_call").as_default():
            with contrib_summary.always_record_summaries():
                for i, name in enumerate(metric_names):
                    contrib_summary.scalar(prefix + name,
                                           args[i][0],
                                           step=step)

                return contrib_summary.all_summary_ops()
Exemple #3
0
            def host_call_fn(gs, loss, acc, ce):  #lr, ce):
                '''Training host call. Creates scalar summaries for training
                    metrics.
                    This funciton is executed on the CPU. As in, after 
                    :iterations_per_loop computation in TPU, control moves to
                    the CPU where the summaries are updated.
                    Arguments should match the list of 'Tensor' objects passed as
                    the second element in the tuple passed to 'host_call'.
                Args:
                    gs: Tensor with shape [batch] for global step
                    loss: Tensor with shape [batch] for the training loss
                    lr: Tensor with shape [batch] for the learning rate
                    ce: Tensor with shape [batch] for the current epoch

                Returns:
                    List of summary ops to run on the CPU host.
                '''
                gs = gs[0]
                # Host call fns are executed FLAGS.iterations_per_loop times after
                # one TPU loop is finished, setting max_queue value to the same as
                # number of iterations will make the summary writer only flush the
                # data to storage once per loop.
                with summary.create_file_writer(
                        FLAGS.model_dir,
                        max_queue=FLAGS.iterations_per_loop).as_default():

                    with summary.always_record_summaries():
                        summary.scalar('loss', loss[0], step=gs)
                        summary.scalar('top_1', acc[0], step=gs)
                        #summary.scalar('top_5', t5_acc[0], step=gs)
                        #summary.scalar('learning_rate', lr[0], step=gs)
                        summary.scalar('current_epoch', ce[0], step=gs)

                        return summary.all_summary_ops()
Exemple #4
0
 def host_call_fn(*tensors):
     """Training host call. Creates scalar summaries for
     training metrics. This function is executed on the CPU and
     should not directly reference any Tensors in the rest of
     the `model_fn`. To pass Tensors from the  model to
     the `metric_fn`, provide as part of the `host_call`. See
     https://www.tensorflow.org/api_docs/python/tf/contrib/tpu/TPUEstimatorSpec
     for more information. Arguments should match the list of
     `Tensor` objects passed as the second element in the tuple
      passed to `host_call`.
     """
     gs = tensors[0][0]
     # Host call fns are executed params['iterations_per_loop']
     # times after one TPU loop is finished, setting max_queue
     # value to the same as number of iterations will make the
     # summary writer only flush the data to storage once per
     # loop.
     summary_writer = summary.create_file_writer(
         config.checkpoint_dir,
         max_queue=config.get('iterations_per_loop', 1000))
     with summary_writer.as_default():
         with summary.always_record_summaries():
             for idx in range(len(tensors_to_print)):
                 summary.scalar(tensors_to_print_names[idx],
                                tensors[idx][0],
                                step=gs)
             return summary.all_summary_ops()
    def __init__(self, name, logdir='logs'):
        dir = os.path.join(logdir, name)
        writer = summary.create_file_writer(dir, flush_millis=10000)

        def log_performance(rewards, actions, tloss, ploss, vloss, entropy):
            with writer.as_default(
            ), summary.record_summaries_every_n_global_steps(10):
                summary.scalar('Perf/Total Reward', tf.reduce_sum(rewards))
                summary.histogram('Actions', actions)
                summary.scalar('Perf/Episode Duration', tf.size(rewards))
                summary.scalar('Perf/Total Loss', tloss)
                summary.scalar('Perf/Policy Loss', tf.reduce_mean(ploss))
                summary.scalar('Perf/Value Loss', tf.reduce_mean(vloss))
                summary.scalar('Perf/Policy Entropy', tf.reduce_mean(entropy))

        def log_gradients(gnorms):
            with writer.as_default(
            ), summary.record_summaries_every_n_global_steps(10):
                summary.histogram('Gradient Norms', gnorms)

        def log_weights(var_list):
            for var in var_list:
                with writer.as_default(
                ), summary.record_summaries_every_n_global_steps(10):
                    summary.histogram(var.name, var)

        self.log_performance = log_performance
        self.log_gradients = log_gradients
        self.log_weights = log_weights
Exemple #6
0
def get_summary_writer(save_dir, subdir='', comm=MPI.COMM_WORLD):
    if comm.Get_rank() != 0:
        return None
    if save_dir is None:
        return None
    with tf.init_scope():
        return summary.create_file_writer(os.path.join(save_dir, 'tb', subdir))
Exemple #7
0
 def host_call_fn(gs, g_loss, d_loss, real_audio, generated_audio):
   """Training host call. Creates scalar summaries for training metrics.
   This function is executed on the CPU and should not directly reference
   any Tensors in the rest of the `model_fn`. To pass Tensors from the
   model to the `metric_fn`, provide as part of the `host_call`. See
   https://www.tensorflow.org/api_docs/python/tf/contrib/tpu/TPUEstimatorSpec
   for more information.
   Arguments should match the list of `Tensor` objects passed as the second
   element in the tuple passed to `host_call`.
   Args:
     gs: `Tensor with shape `[batch]` for the global_step
     g_loss: `Tensor` with shape `[batch]` for the generator loss.
     d_loss: `Tensor` with shape `[batch]` for the discriminator loss.
     real_audio: `Tensor` with shape `[batch, 8192, 1]`
     generated_audio: `Tensor` with shape `[batch, 8192, 1]`
   Returns:
     List of summary ops to run on the CPU host.
   """
   gs = gs[0]
   with summary.create_file_writer(FLAGS.model_dir).as_default():
       with summary.always_record_summaries():
           summary.scalar('g_loss', g_loss, step=gs)
           summary.scalar('d_loss', d_loss, step=gs)
           summary.audio('real_audio', real_audio, sample_rate=_FS, max_outputs=10, step=gs)
           summary.audio('generated_audio', generated_audio, sample_rate=_FS, max_outputs=10, step=gs)
   return summary.all_summary_ops()
def _host_call_fn(gs, loss, lr):
  """Training host call.

  Creates scalar summaries for training metrics.

  This function is executed on the CPU and should not directly reference
  any Tensors in the rest of the `model_fn`. To pass Tensors from the
  model to the `metric_fn`, provide as part of the `host_call`. See
  https://www.tensorflow.org/api_docs/python/tf/contrib/tpu/TPUEstimatorSpec
  for more information.

  Arguments should match the list of `Tensor` objects passed as the second
  element in the tuple passed to `host_call`.

  Args:
    gs: `Tensor with shape `[batch]` for the global_step
    loss: `Tensor` with shape `[batch]` for the training loss.
    lr: `Tensor` with shape `[batch]` for the learning_rate.

  Returns:
    List of summary ops to run on the CPU host.
  """
  # Host call fns are executed FLAGS.iterations_per_loop times after one
  # TPU loop is finished, setting max_queue value to the same as number of
  # iterations will make the summary writer only flush the data to storage
  # once per loop.
  gs = gs[0]
  with summary.create_file_writer(
      FLAGS.model_dir, max_queue=FLAGS.iterations_per_loop).as_default():
    with summary.always_record_summaries():
      summary.scalar("loss", loss[0], step=gs)
      summary.scalar("learning_rate", lr[0], step=gs)
      return summary.all_summary_ops()
Exemple #9
0
def _write_aggregate_summaries(model_dir, global_step, eval_tag,
                               aggregates_dict):
  """Writes text metrics as summaries."""

  eval_dir = os.path.join(model_dir, eval_tag)
  summary_writer = contrib_summary.create_file_writer(eval_dir)
  with summary_writer.as_default(), \
       contrib_summary.always_record_summaries():
    for k, v in sorted(aggregates_dict[_ROUGE_METRIC].items()):
      contrib_summary.scalar(
        "text_eval/%s-R" % k, v.mid.recall, step=global_step)
      contrib_summary.scalar(
        "text_eval/%s-P" % k, v.mid.precision, step=global_step)
      contrib_summary.scalar(
        "text_eval/%s-F" % k, v.mid.fmeasure, step=global_step)
    for k, v in sorted(aggregates_dict[_BLEU_METRIC].items()):
      contrib_summary.scalar("text_eval/%s" % k, v.mid.bleu, step=global_step)
    for k, v in sorted(aggregates_dict[_REPETITION_METRIC].items()):
      contrib_summary.scalar(
        "text_eval/%s-T" % k, v.mid.target_ratio, step=global_step)
      contrib_summary.scalar(
        "text_eval/%s-P" % k, v.mid.prediction_ratio, step=global_step)
    for k, v in sorted(aggregates_dict[_LENGTH_METRIC].items()):
      contrib_summary.scalar(
        "text_eval/%s-T" % k, v.mid.target_length, step=global_step)
      contrib_summary.scalar(
        "text_eval/%s-P" % k, v.mid.prediction_length, step=global_step)
      contrib_summary.scalar(
        "text_eval/%s-R" % k, v.mid.relative_length, step=global_step)
Exemple #10
0
    def _host_call(
        concat_activations: tf.Tensor,
        concat_sequence_lengths: tf.Tensor,
    ) -> List[tf.Operation]:
      """Stores the activations and sequence lengths into a summary.

      TPUEstimator will concat the activations and sequence lengths from the
      minibatches on each core along axis=0 and pass them to this host call.
      This host call writes them to a file using the TF summary APIs.

      Args:
        concat_activations: The activations for the global batch. 2D
          Tensor(type=float32, shape=[batch_size, max_sequence_length]).
        concat_sequence_lengths:  The sequence lengths for the global batch. 2D
          Tensor(type=int64, shape=[batch_size, max_sequence_length]).

      Returns:
        A list of summary ops for TPUEstimator to run on the host.
      """
      with contrib_summary.create_file_writer(self._summary_dir).as_default():
        with contrib_summary.always_record_summaries():
          contrib_summary.generic(
              self._SUMMARY_ACTIVATIONS,
              concat_activations,
          )
          contrib_summary.generic(self._SUMMARY_SEQUENCE_LENGTHS,
                                  concat_sequence_lengths)
          return contrib_summary.all_summary_ops()
Exemple #11
0
        def host_call_fn(gs, loss, lr, ce):
            """Training host call. Creates scalar summaries for training metrics.

      This function is executed on the CPU and should not directly reference
      any Tensors in the rest of the `model_fn`. To pass Tensors from the model
      to the `metric_fn`, provide as part of the `host_call`. See
      https://www.tensorflow.org/api_docs/python/tf/contrib/tpu/TPUEstimatorSpec
      for more information.

      Arguments should match the list of `Tensor` objects passed as the second
      element in the tuple passed to `host_call`.

      Args:
        gs: `Tensor with shape `[batch]` for the global_step
        loss: `Tensor` with shape `[batch]` for the training loss.
        lr: `Tensor` with shape `[batch]` for the learning_rate.
        ce: `Tensor` with shape `[batch]` for the current_epoch.

      Returns:
        List of summary ops to run on the CPU host.
      """
            gs = gs[0]
            with summary.create_file_writer(FLAGS.model_dir).as_default():
                with summary.always_record_summaries():
                    summary.scalar('loss', tf.reduce_mean(loss), step=gs)
                    summary.scalar('learning_rate',
                                   tf.reduce_mean(lr),
                                   step=gs)
                    summary.scalar('current_epoch',
                                   tf.reduce_mean(ce),
                                   step=gs)

                    return summary.all_summary_ops()
Exemple #12
0
 def host_call_fn(gs, loss, lr, ce):
     """Training host call. Creates scalar summaries for training metrics.
     This function is executed on the CPU and should not directly reference
     any Tensors in the rest of the `model_fn`. To pass Tensors from the
     model to the `metric_fn`, provide as part of the `host_call`. See
     https://www.tensorflow.org/api_docs/python/tf/contrib/tpu/TPUEstimatorSpec
     for more information.
     Arguments should match the list of `Tensor` objects passed as the second
     element in the tuple passed to `host_call`.
     Args:
     gs: `Tensor with shape `[batch]` for the global_step
     loss: `Tensor` with shape `[batch]` for the training loss.
     lr: `Tensor` with shape `[batch]` for the learning_rate.
     ce: `Tensor` with shape `[batch]` for the current_epoch.
     Returns:
     List of summary ops to run on the CPU host.
     """
     gs = gs[0]
     # Host call fns are executed FLAGS.iterations_per_loop times after one
     # TPU loop is finished, setting max_queue value to the same as number of
     # iterations will make the summary writer only flush the data to storage
     # once per loop.
     with summary.create_file_writer(
             model_dir, max_queue=iterations_per_loop).as_default():
         with summary.always_record_summaries():
             summary.scalar('loss', loss[0], step=gs)
             summary.scalar('learning_rate', lr[0], step=gs)
             summary.scalar('current_epoch', ce[0], step=gs)
             return summary.all_summary_ops()
Exemple #13
0
    def eval_metrics_host_call_fn(policy_output,
                                  value_output,
                                  pi_tensor,
                                  policy_cost,
                                  value_cost,
                                  l2_cost,
                                  combined_cost,
                                  est_mode=tf.estimator.ModeKeys.TRAIN):
        policy_entropy = -tf.reduce_mean(
            tf.reduce_sum(policy_output * tf.log(policy_output), axis=1))
        # pi_tensor is one_hot when generated from sgfs (for supervised learning)
        # and soft-max when using self-play records. argmax normalizes the two.
        policy_target_top_1 = tf.argmax(pi_tensor, axis=1)
        policy_output_top_1 = tf.argmax(policy_output, axis=1)

        policy_output_in_top3 = tf.to_float(
            tf.nn.in_top_k(policy_output, policy_target_top_1, k=3))

        policy_top_1_confidence = tf.reduce_max(policy_output, axis=1)
        policy_target_top_1_confidence = tf.boolean_mask(
            policy_output,
            tf.one_hot(policy_target_top_1,
                       tf.shape(policy_output)[1]))

        metric_ops = {
            'policy_cost':
            tf.metrics.mean(policy_cost),
            'value_cost':
            tf.metrics.mean(value_cost),
            'l2_cost':
            tf.metrics.mean(l2_cost),
            'policy_entropy':
            tf.metrics.mean(policy_entropy),
            'combined_cost':
            tf.metrics.mean(combined_cost),
            'policy_accuracy_top_1':
            tf.metrics.accuracy(labels=policy_target_top_1,
                                predictions=policy_output_top_1),
            'policy_accuracy_top_3':
            tf.metrics.mean(policy_output_in_top3),
            'policy_top_1_confidence':
            tf.metrics.mean(policy_top_1_confidence),
            'policy_target_top_1_confidence':
            tf.metrics.mean(policy_target_top_1_confidence),
            'value_confidence':
            tf.metrics.mean(tf.abs(value_output)),
        }

        # Create summary ops so that they show up in SUMMARIES collection
        # That way, they get logged automatically during training
        summary_writer = summary.create_file_writer(FLAGS.model_dir)
        with summary_writer.as_default(), \
                summary.record_summaries_every_n_global_steps(FLAGS.summary_steps):
            for metric_name, metric_op in metric_ops.items():
                summary.scalar(metric_name, metric_op[1])

        if est_mode == tf.estimator.ModeKeys.EVAL:
            return metric_ops
        return summary.all_summary_ops()
Exemple #14
0
            def host_call_fn(gs, loss, lr, ce, bi_list, bo_list, big_list,
                             bog_list):
                """Training host call. Creates scalar summaries for training metrics.

        This function is executed on the CPU and should not directly reference
        any Tensors in the rest of the `model_fn`. To pass Tensors from the
        model to the `metric_fn`, provide as part of the `host_call`. See
        https://www.tensorflow.org/api_docs/python/tf/contrib/tpu/TPUEstimatorSpec
        for more information.

        Arguments should match the list of `Tensor` objects passed as the second
        element in the tuple passed to `host_call`.

        Args:
          gs: `Tensor with shape `[batch]` for the global_step
          loss: `Tensor` with shape `[batch]` for the training loss.
          lr: `Tensor` with shape `[batch]` for the learning_rate.
          ce: `Tensor` with shape `[batch]` for the current_epoch.

        Returns:
          List of summary ops to run on the CPU host.
        """
                gs = gs[0]
                # Host call fns are executed params['iterations_per_loop'] times after
                # one TPU loop is finished, setting max_queue value to the same as
                # number of iterations will make the summary writer only flush the data
                # to storage once per loop.
                with summary.create_file_writer(
                        FLAGS.model_dir,
                        max_queue=params['iterations_per_loop']).as_default():
                    with summary.always_record_summaries():
                        summary.scalar('loss', loss[0], step=gs)
                        summary.scalar('learning_rate', lr[0], step=gs)
                        summary.scalar('current_epoch', ce[0], step=gs)

                # TODO record distribution every 1251 steps (steps per epoch)
                with summary.record_summaries_every_n_global_steps(
                        FLAGS.steps_per_eval):
                    index = 0
                    for activ in bi_list:
                        normal_histogram(activ, 'bn-input-' + str(index))
                        log_histogram(activ, 'bn-input-' + str(index))
                        index = index + 1
                    index = 0
                    for activ in bo_list:
                        normal_histogram(activ, 'bn-output-' + str(index))
                        log_histogram(activ, 'bn-output-' + str(index))
                        index = index + 1
                    index = 0
                    for activ in big_list:
                        normal_histogram(activ, 'bn-input-grad-' + str(index))
                        log_histogram(activ, 'bn-input-grad-' + str(index))
                        index = index + 1
                    index = 0
                    for activ in bog_list:
                        normal_histogram(activ, 'bn-output-grad-' + str(index))
                        log_histogram(activ, 'bn-output-grad-' + str(index))
                        index = index + 1
                return summary.all_summary_ops()
Exemple #15
0
 def _init_logging(self) -> None:
     """
     Initialize TensorBoard Logging
     """
     # Create summary writer and set as default
     self._summary_writer = S.create_file_writer(self.log_dir)
     self._summary_writer.set_as_default()
     self._global_step = T.train.get_or_create_global_step()
Exemple #16
0
 def host_call_fn(**kwargs):
     writer = contrib_summary.create_file_writer(summary_dir,
                                                 max_queue=1000)
     always_record = contrib_summary.always_record_summaries()
     with writer.as_default(), always_record:
         for name, scalar in kwargs.items():
             contrib_summary.scalar(name, tf.reduce_mean(scalar))
         return contrib_summary.all_summary_ops()
Exemple #17
0
    def eval_metrics_host_call_fn(policy_output, value_output, pi_tensor, policy_cost,
                                  value_cost, l2_cost, combined_cost, step,
                                  est_mode=tf.estimator.ModeKeys.TRAIN):
        policy_entropy = -tf.reduce_mean(tf.reduce_sum(
            policy_output * tf.log(policy_output), axis=1))
        # pi_tensor is one_hot when generated from sgfs (for supervised learning)
        # and soft-max when using self-play records. argmax normalizes the two.
        policy_target_top_1 = tf.argmax(pi_tensor, axis=1)

        policy_output_in_top1 = tf.to_float(
            tf.nn.in_top_k(policy_output, policy_target_top_1, k=1))
        policy_output_in_top3 = tf.to_float(
            tf.nn.in_top_k(policy_output, policy_target_top_1, k=3))

        policy_top_1_confidence = tf.reduce_max(policy_output, axis=1)
        policy_target_top_1_confidence = tf.boolean_mask(
            policy_output,
            tf.one_hot(policy_target_top_1, tf.shape(policy_output)[1]))

        with tf.variable_scope("metrics"):
            metric_ops = {
                'policy_cost': tf.metrics.mean(policy_cost),
                'value_cost': tf.metrics.mean(value_cost),
                'l2_cost': tf.metrics.mean(l2_cost),
                'policy_entropy': tf.metrics.mean(policy_entropy),
                'combined_cost': tf.metrics.mean(combined_cost),

                'policy_accuracy_top_1': tf.metrics.mean(policy_output_in_top1),
                'policy_accuracy_top_3': tf.metrics.mean(policy_output_in_top3),
                'policy_top_1_confidence': tf.metrics.mean(policy_top_1_confidence),
                'policy_target_top_1_confidence': tf.metrics.mean(
                    policy_target_top_1_confidence),
                'value_confidence': tf.metrics.mean(tf.abs(value_output)),
            }

        if est_mode == tf.estimator.ModeKeys.EVAL:
            return metric_ops

        # NOTE: global_step is rounded to a multiple of FLAGS.summary_steps.
        eval_step = tf.reduce_min(step)

        # Create summary ops so that they show up in SUMMARIES collection
        # That way, they get logged automatically during training
        summary_writer = summary.create_file_writer(FLAGS.work_dir)
        with summary_writer.as_default(), \
                summary.record_summaries_every_n_global_steps(
                    params['summary_steps'], eval_step):
            for metric_name, metric_op in metric_ops.items():
                summary.scalar(metric_name, metric_op[1], step=eval_step)

        # Reset metrics occasionally so that they are mean of recent batches.
        reset_op = tf.variables_initializer(tf.local_variables("metrics"))
        cond_reset_op = tf.cond(
            tf.equal(eval_step % params['summary_steps'], tf.to_int64(1)),
            lambda: reset_op,
            lambda: tf.no_op())

        return summary.all_summary_ops() + [cond_reset_op]
Exemple #18
0
        def host_call_fn(gs, lpl, dcl, ls):
            gs = gs[0]
            with summary.create_file_writer(params['model_dir'], max_queue=params['save_checkpoints_steps']).as_default():
                with summary.always_record_summaries():
                    summary.scalar('label_prediction_loss', lpl[0], step=gs)
                    summary.scalar('domain_classification_loss', dcl[0], step=gs)
                    summary.scalar('loss', ls[0], step=gs)

            return summary.all_summary_ops()
Exemple #19
0
        def host_call_fn(gs, lpl, dcl, ls):
            gs = gs[0]
            with summary.create_file_writer(params['model_dir'], max_queue=params['save_checkpoints_steps']).as_default():
                with summary.always_record_summaries():
                    summary.scalar('label_prediction_loss', lpl[0], step=gs)
                    summary.scalar('domain_classification_loss', dcl[0], step=gs)
                    summary.scalar('loss', ls[0], step=gs)

            return summary.all_summary_ops()
Exemple #20
0
            def host_call_fn(gs, loss, lr, ce):
                gs = gs[0]
                with summary.create_file_writer(FLAGS.model_dir).as_default():
                    with summary.always_record_summaries():
                        summary.scalar('loss', loss[0], step=gs)
                        summary.scalar('learning_rate', lr[0], step=gs)
                        summary.scalar('current_epoch', ce[0], step=gs)

                        return summary.all_summary_ops()
Exemple #21
0
 def host_call_fn(gs, *summary_tensors):
     gs = gs[0]
     with contrib_summary.create_file_writer(
             FLAGS.workdir).as_default():
         with contrib_summary.always_record_summaries():
             for name, reshaped_tensor in zip(
                     summary_names, summary_tensors):
                 contrib_summary.scalar(
                     name, tf.reduce_mean(reshaped_tensor), step=gs)
             return contrib_summary.all_summary_ops()
Exemple #22
0
 def host_call_fn(gs, lr):
     # Outfeed supports int32 but global_step is expected to be int64.
     gs = tf.cast(tf.reduce_mean(gs), tf.int64)
     with summary.create_file_writer(
             self.model_dir).as_default():
         with summary.always_record_summaries():
             summary.scalar('learning_rate',
                            tf.reduce_mean(lr),
                            step=gs)
             return summary.all_summary_ops()
Exemple #23
0
    def __init__(self, path, metrics=None, clear_logs=True, **kwargs) -> None:
        if os.path.exists(path):
            if clear_logs:
                self._clear_logs(path)
        else:
            makedirs(path)

        self.writer = summary.create_file_writer(path)
        self.metrics = metrics
        self.kwargs = kwargs
Exemple #24
0
def main(_):
  """Run td3/ddpg evaluation."""
  contrib_eager_python_tfe.enable_eager_execution()

  if FLAGS.use_gpu:
    tf.device('/device:GPU:0').__enter__()

  tf.gfile.MakeDirs(FLAGS.log_dir)
  summary_writer = contrib_summary.create_file_writer(
      FLAGS.log_dir, flush_millis=10000)

  env = gym.make(FLAGS.env)
  if FLAGS.wrap_for_absorbing:
    env = lfd_envs.AbsorbingWrapper(env)

  obs_shape = env.observation_space.shape
  act_shape = env.action_space.shape

  with tf.variable_scope('actor'):
    actor = Actor(obs_shape[0], act_shape[0])

  random_reward, _ = do_rollout(
      env, actor, None, num_trajectories=10, sample_random=True)

  reward_scale = contrib_eager_python_tfe.Variable(1, name='reward_scale')
  saver = contrib_eager_python_tfe.Saver(actor.variables + [reward_scale])

  last_checkpoint = tf.train.latest_checkpoint(FLAGS.load_dir)
  with summary_writer.as_default():
    while True:
      last_checkpoint = wait_for_next_checkpoint(FLAGS.load_dir,
                                                 last_checkpoint)

      total_numsteps = int(last_checkpoint.split('-')[-1])

      saver.restore(last_checkpoint)

      average_reward, average_length = do_rollout(
          env, actor, None, noise_scale=0.0, num_trajectories=FLAGS.num_trials)

      logging.info(
          'Evaluation: average episode length %d, average episode reward %f',
          average_length, average_reward)

      print('Evaluation: average episode length {}, average episode reward {}'.
            format(average_length, average_reward))

      with contrib_summary.always_record_summaries():
        if reward_scale.numpy() != 1.0:
          contrib_summary.scalar(
              'reward/scaled', (average_reward - random_reward) /
              (reward_scale.numpy() - random_reward),
              step=total_numsteps)
        contrib_summary.scalar('reward', average_reward, step=total_numsteps)
        contrib_summary.scalar('length', average_length, step=total_numsteps)
Exemple #25
0
 def host_call_fn(global_step, *tensors):
     """Training host call."""
     global_step = global_step[0]
     with contrib_summary.create_file_writer(summary_dir +
                                             '/metrics').as_default():
         with contrib_summary.always_record_summaries():
             for i, tensor in enumerate(tensors):
                 contrib_summary.scalar(names[i],
                                        tensor[0],
                                        step=global_step)
             return contrib_summary.all_summary_ops()
Exemple #26
0
def create_default_writer_and_save_dir(root_dir):
    """Creates default directories."""
    base_dir = osp.expanduser(root_dir)
    if not tf.io.gfile.exists(base_dir):
        tf.io.gfile.makedirs(base_dir)
    tag = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S')
    tb_logdir = osp.join(base_dir, tag, 'tb')
    save_dir = osp.join(base_dir, tag, 'train')
    tf.io.gfile.makedirs(tb_logdir)
    tf.io.gfile.makedirs(save_dir)
    writer = contrib_summary.create_file_writer(tb_logdir)
    writer.set_as_default()
    return writer, save_dir
Exemple #27
0
 def host_call_fn(global_step, *tensors):
     """Training host call."""
     global_step = global_step[0]
     with contrib_summary.create_file_writer(
             params.output_dir).as_default():
         with contrib_summary.record_summaries_every_n_global_steps(
                 n=params.log_every, global_step=global_step):
             for i, tensor in enumerate(tensors):
                 if 'images' not in names[i]:
                     contrib_summary.scalar(names[i],
                                            tensor[0],
                                            step=global_step)
             return contrib_summary.all_summary_ops()
Exemple #28
0
            def host_call_fn(gs, scalar_values):
                """Returns summary."""
                gs = gs[0]
                values = tf.unstack(scalar_values)

                with summary.create_file_writer(
                        FLAGS.model_dir,
                        max_queue=FLAGS.iterations_per_loop).as_default():
                    with summary.always_record_summaries():
                        for key, value in zip(tensorboard_scalars.keys(),
                                              values):
                            tf.contrib.summary.scalar(key, value, step=gs)

                        return summary.all_summary_ops()
Exemple #29
0
 def host_call_fn(gs,
                  loss,
                  lr,
                  mix=None,
                  gt_sources=None,
                  est_sources=None):
     """Training host call. Creates scalar summaries for training metrics.
         This function is executed on the CPU and should not directly reference
         any Tensors in the rest of the `model_fn`. To pass Tensors from the
         model to the `metric_fn`, provide as part of the `host_call`. See
         https://www.tensorflow.org/api_docs/python/tf/contrib/tpu/TPUEstimatorSpec
         for more information.
         Arguments should match the list of `Tensor` objects passed as the second
         element in the tuple passed to `host_call`.
         Args:
           gs: `Tensor with shape `[batch]` for the global_step
           loss: `Tensor` with shape `[batch]` for the training loss.
           lr: `Tensor` with shape `[batch]` for the learning_rate.
           input: `Tensor` with shape `[batch, mix_samples, 1]`
           gt_sources: `Tensor` with shape `[batch, sources_n, output_samples, 1]`
           est_sources: `Tensor` with shape `[batch, sources_n, output_samples, 1]`
         Returns:
           List of summary ops to run on the CPU host.
         """
     gs = gs[0]
     with summary.create_file_writer(
             model_config["model_base_dir"] + os.path.sep +
             str(model_config["experiment_id"])).as_default():
         with summary.always_record_summaries():
             summary.scalar('loss', loss[0], step=gs)
             summary.scalar('learning_rate', lr[0], step=gs)
         if gs % 10000 == 0:
             with summary.record_summaries_every_n_global_steps(
                     model_config["audio_summaries_every_n_steps"]):
                 summary.audio('mix',
                               mix,
                               model_config['expected_sr'],
                               max_outputs=model_config["num_sources"])
                 for source_id in range(gt_sources.shape[1].value):
                     summary.audio('gt_sources_{source_id}'.format(
                         source_id=source_id),
                                   gt_sources[:, source_id, :, :],
                                   model_config['expected_sr'],
                                   max_outputs=model_config["num_sources"])
                     summary.audio('est_sources_{source_id}'.format(
                         source_id=source_id),
                                   est_sources[:, source_id, :, :],
                                   model_config['expected_sr'],
                                   max_outputs=model_config["num_sources"])
     return summary.all_summary_ops()
Exemple #30
0
 def __init__(self,
              model,
              accuracy,
              loss,
              optimizer,
              train_data,
              val_data,
              test_data,
              summary_dir='summaries'):
     self.model = model
     self.data = {'train': train_data, 'val': val_data, 'test': test_data}
     self.step_counter = tf.train.get_or_create_global_step()
     self.summary = summary.create_file_writer(summary_dir + '/train',
                                               flush_millis=10000,
                                               name='train_summ')
     self.test_summary = summary.create_file_writer(summary_dir + '/test',
                                                    flush_millis=10000,
                                                    name='val_summ')
     if tf.executing_eagerly():
         self.loss = loss
         self.accuracy = accuracy
         self.optimizer = optimizer
     else:
         self._setup_graph_training(accuracy, loss, optimizer)
Exemple #31
0
        def host_call_fn(*args):
          """Host call function to compute training summaries."""
          scalars = _list_to_dicts(args, scalars_to_summarize.keys())[0]
          for name in scalars:
            scalars[name] = scalars[name][0]

          with contrib_summary.create_file_writer(
              summary_dir, max_queue=1000).as_default():
            with contrib_summary.always_record_summaries():
              for name, value in scalars.items():
                if name not in ["global_step"]:
                  contrib_summary.scalar(
                      name, value, step=scalars["global_step"])

          return contrib_summary.all_summary_ops()
Exemple #32
0
            def host_call_fn(gs, lr):
                """Training host call. Creates scalar summaries for training metrics.

        Args:
          gs: `Tensor with shape `[batch]` for the global_step
          lr: `Tensor` with shape `[batch]` for the learning_rate.

        Returns:
          List of summary ops to run on the CPU host.
        """
                gs = gs[0]
                with summary.create_file_writer(
                        params['model_dir']).as_default():
                    with summary.always_record_summaries():
                        summary.scalar('learning_rate', lr[0], step=gs)
                        return summary.all_summary_ops()
def host_call_fn(model_dir, **kwargs):
    """host_call function used for creating training summaries when using TPU.

  Args:
    model_dir: String indicating the output_dir to save summaries in.
    **kwargs: Set of metric names and tensor values for all desired summaries.

  Returns:
    Summary op to be passed to the host_call arg of the estimator function.
  """
    gs = kwargs.pop('global_step')[0]
    with summary.create_file_writer(model_dir).as_default():
        with summary.always_record_summaries():
            for name, tensor in kwargs.iteritems():
                summary.scalar(name, tensor[0], step=gs)
            return summary.all_summary_ops()