Ejemplo n.º 1
0
    def tf_summaries(self, train_step=None, step_metrics=()):
        """Generates per-metric summaries against `train_step` and `step_metrics`.

    Args:
      train_step: (Optional) Step counter for training iterations. If None, no
        metric is generated against the global step.
      step_metrics: (Optional) Iterable of step metrics to generate summaries
        against.

    Returns:
      A list of scalar summaries.
    """
        result_list = self.result()
        prefix = self._prefix
        single_metric_name = 'Metric'
        # In case there is a single name (e.g., `Reward`) for all metrics, store it
        # in `single_metric_name`.
        if len(self.metric_names) == 1:
            single_metric_name = self.metric_names[0]
        summaries = []
        for metric_index, result in enumerate(result_list):
            # Common name for all metrics.
            tag = common.join_scope(prefix, self.name)
            # The default metric name is the `single_metric_name` followed by the
            # index.
            metric_name = single_metric_name + str(metric_index)
            # In case there is a valid individual name for each metric, use it.
            if (metric_index < len(self.metric_names)
                    and len(result_list) == len(self.metric_names)
                    and self.metric_names[metric_index] is not None):
                metric_name = self.metric_names[metric_index]
            tag = common.join_scope(tag, metric_name)
            if train_step is not None:
                summaries.append(
                    tf.compat.v2.summary.scalar(name=tag,
                                                data=result,
                                                step=train_step))
        if prefix:
            prefix += '_'
        for metric_index, result in enumerate(result_list):
            for step_metric in step_metrics:
                # Skip plotting the metrics against itself.
                if self.name == step_metric.name:
                    continue
                step_tag = '{}vs_{}/{}'.format(prefix, step_metric.name,
                                               self.name)
                # Summaries expect the step value to be an int64.
                step = tf.cast(step_metric.result(), tf.int64)
                summaries.append(
                    tf.compat.v2.summary.scalar(name=step_tag,
                                                data=result,
                                                step=step))

        return summaries
Ejemplo n.º 2
0
    def single_agent_summary(self, agent_id, train_step=None, step_metrics=()):
        summaries = []
        prefix = self._prefix
        name = self.name + '_agent' + str(agent_id)
        tag = common.join_scope(prefix, name)

        result = self.result_for_agent(agent_id)

        if train_step is not None:
            summaries.append(
                tf.compat.v2.summary.scalar(name=tag,
                                            data=result,
                                            step=train_step))
        if prefix:
            prefix += '_'
        for step_metric in step_metrics:
            # Skip plotting the metrics against itself.
            if self.name == step_metric.name:
                continue
            step_tag = '{}vs_{}/{}'.format(prefix, step_metric.name, name)
            # Summaries expect the step value to be an int64.
            step = tf.cast(step_metric.result(), tf.int64)
            summaries.append(
                tf.compat.v2.summary.scalar(name=step_tag,
                                            data=result,
                                            step=step))
        return summaries
Ejemplo n.º 3
0
    def tf_summaries(self, train_step=None, step_metrics=()):
        """Generates summaries against train_step and all step_metrics.

    Args:
      train_step: (Optional) Step counter for training iterations. If None, no
        metric is generated against the global step.
      step_metrics: (Optional) Iterable of step metrics to generate summaries
        against.

    Returns:
      A list of summaries.
    """
        summaries = []
        prefix = self._prefix
        tag = common.join_scope(prefix, self.name)
        result = self.result()
        if train_step is not None:
            summaries.append(
                tf.compat.v2.summary.scalar(name=tag,
                                            data=result,
                                            step=train_step))
        if prefix:
            prefix += '_'
        for step_metric in step_metrics:
            # Skip plotting the metrics against itself.
            if self.name == step_metric.name:
                continue
            step_tag = '{}vs_{}/{}'.format(prefix, step_metric.name, self.name)
            step = step_metric.result()
            summaries.append(
                tf.compat.v2.summary.scalar(name=step_tag,
                                            data=result,
                                            step=step))
        return summaries
Ejemplo n.º 4
0
def record_metrics(metrics, train_step, summary_writer, summary_prefix):
    if train_step and summary_writer:
        with summary_writer.as_default():
            for m in metrics:
                tag = common.join_scope(summary_prefix, m.name)
                tf.compat.v2.summary.scalar(name=tag,
                                            data=m.result(),
                                            step=train_step)
Ejemplo n.º 5
0
    def tf_summaries(
        self,
        train_step: types.Int = None,
        step_metrics: Sequence[MetricType] = ()
    ) -> tf.Operation:
        """Build TF summary op and placeholder for this metric.

    To execute the op, call py_metric.run_summaries.

    Args:
      train_step: Step counter for training iterations. If None, no metric is
        generated against the global step.
      step_metrics: Step values to plot as X axis in addition to global_step.

    Returns:
      The summary op.

    Raises:
      RuntimeError: If this method has already been called (it can only be
        called once).
      ValueError: If any item in step_metrics is not of type PyMetric or
        tf_metric.TFStepMetric.
    """
        if self.summary_op is not None:
            raise RuntimeError(
                'metric.tf_summaries() can only be called once.')

        tag = common.join_scope(self.prefix, self.name)
        summaries = []
        summaries.append(
            tf.compat.v2.summary.scalar(name=tag,
                                        data=self.summary_placeholder,
                                        step=train_step))
        prefix = self.prefix
        if prefix:
            prefix += '_'
        for step_metric in step_metrics:
            # Skip plotting the metrics against itself.
            if self.name == step_metric.name:
                continue
            step_tag = '{}vs_{}/{}'.format(prefix, step_metric.name, self.name)
            if isinstance(step_metric, PyMetric):
                step_tensor = step_metric.summary_placeholder
            elif isinstance(step_metric, tf_metric.TFStepMetric):
                step_tensor = step_metric.result()
            else:
                raise ValueError(
                    'step_metric is not PyMetric or TFStepMetric: '
                    '{}'.format(step_metric))
            summaries.append(
                tf.compat.v2.summary.scalar(name=step_tag,
                                            data=self.summary_placeholder,
                                            step=step_tensor))

        self._summary_op = tf.group(*summaries)
        return self._summary_op
Ejemplo n.º 6
0
def eager_compute(metrics,
                  environment,
                  policy,
                  num_episodes=1,
                  train_step=None,
                  summary_writer=None,
                  summary_prefix='',
                  use_function=True):
  """Compute metrics using `policy` on the `environment`.

  *NOTE*: Because placeholders are not compatible with Eager mode we can not use
  python policies. Because we use tf_policies we need the environment time_steps
  to be tensors making it easier to use a tf_env for evaluations. Otherwise this
  method mirrors `compute` directly.

  Args:
    metrics: List of metrics to compute.
    environment: tf_environment instance.
    policy: tf_policy instance used to step the environment.
    num_episodes: Number of episodes to compute the metrics over.
    train_step: An optional step to write summaries against.
    summary_writer: An optional writer for generating metric summaries.
    summary_prefix: An optional prefix scope for metric summaries.
    use_function: Option to enable use of `tf.function` when collecting the
      metrics.
  Returns:
    A dictionary of results {metric_name: metric_value}
  """
  for metric in metrics:
    metric.reset()

  time_step = environment.reset()
  policy_state = policy.get_initial_state(environment.batch_size)

  driver = dynamic_episode_driver.DynamicEpisodeDriver(
      environment,
      policy,
      observers=metrics,
      num_episodes=num_episodes)
  if use_function:
    common.function(driver.run)(time_step, policy_state)
  else:
    driver.run(time_step, policy_state)

  results = [(metric.name, metric.result()) for metric in metrics]
  # TODO(b/120301678) remove the summaries and merge with compute
  if train_step and summary_writer:
    with summary_writer.as_default():
      for m in metrics:
        tag = common.join_scope(summary_prefix, m.name)
        tf.compat.v2.summary.scalar(name=tag, data=m.result(), step=train_step)
  # TODO(b/130249101): Add an option to log metrics.
  return collections.OrderedDict(results)
Ejemplo n.º 7
0
 def tf_summaries(self, step_metrics=()):
     prefix = self._prefix
     tag = common_utils.join_scope(prefix, self.name)
     result = self.result()
     tf.contrib.summary.scalar(name=tag, tensor=result)
     if prefix:
         prefix += '_'
     for step_metric in step_metrics:
         # Skip plotting the metrics against itself.
         if self.name == step_metric.name:
             continue
         step_tag = '{}vs_{}/{}'.format(prefix, step_metric.name, self.name)
         step = step_metric.result()
         tf.contrib.summary.scalar(name=step_tag, tensor=result, step=step)
Ejemplo n.º 8
0
def evaluate(eval_metrics,
             eval_tf_env,
             eval_policy,
             num_eval_episodes,
             num_eval_seeds,
             global_step=None,
             eval_summary_writer=None,
             summary_prefix='Metrics',
             seed=12345):
    """ Evaluate policy on the evaluation environment for the specified episodes and metrics """

    all_results = []
    # Calculate metrics for the number of seeds provided in order to get more accurated results
    for i in range(num_eval_seeds):
        for env in eval_tf_env.envs:
            env.seed(seed + i)
        # One final eval before exiting.
        results = metric_utils.eager_compute(
            eval_metrics,
            eval_tf_env,
            eval_policy,
            num_episodes=num_eval_episodes,
            train_step=global_step,
        )
        all_results.append(results)

    # Calculate mean of the resulting metrics
    mean_results = collections.OrderedDict(results)
    if num_eval_seeds > 1:
        for metric in mean_results:
            metric_sum = 0
            for result in all_results:
                metric_sum = tf_add(metric_sum, result[metric])
            mean_results[metric] = metric_sum / len(all_results)

    # Write on Tensorboard writer if provided
    if global_step and eval_summary_writer:
        with eval_summary_writer.as_default():
            for metric, value in mean_results.items():
                tag = common.join_scope(summary_prefix, metric)
                summary.scalar(name=tag, data=value, step=global_step)

    # Print out the results of the metrics
    log = [
        '{0} = {1}'.format(metric, value)
        for metric, value in mean_results.items()
    ]
    logging.info('%s \n\t\t %s', '', '\n\t\t '.join(log))

    return mean_results
Ejemplo n.º 9
0
    def tf_summaries(self,
                     reset_states,
                     values,
                     train_step=None,
                     step_metrics=()):
        """Generates summaries against train_step and all step_metrics.

    Args:
      reset_states: candidate states for reset
      values: values assigned by our function
      train_step: (Optional) Step counter for training iterations. If None, no
        metric is generated against the global step.
      step_metrics: (Optional) Iterable of step metrics to generate summaries
        against.

    Returns:
      A list of summaries.
    """
        summaries = []
        prefix = self._prefix
        tag = common.join_scope(prefix, self.name)
        result = self.result(reset_states, values)
        if train_step is not None:
            summaries.append(
                tf.compat.v2.summary.image(name=tag,
                                           data=result,
                                           step=train_step))
        if prefix:
            prefix += '_'
        for step_metric in step_metrics:
            # Skip plotting the metrics against itself.
            if self.name == step_metric.name:
                continue
            step_tag = '{}vs_{}/{}'.format(prefix, step_metric.name, self.name)
            # Summaries expect the step value to be an int64.
            step = tf.cast(step_metric.result(), tf.int64)
            summaries.append(
                tf.compat.v2.summary.image(name=step_tag,
                                           data=result,
                                           step=step))
        return summaries
Ejemplo n.º 10
0
def _latest_eval(eval_dir, eval_metrics):
    """Get the latest global step for which an evaluation result was written."""
    if not tf.io.gfile.exists(eval_dir):
        return None

    expected_tags = set(
        [common.join_scope('Metrics', metric.name) for metric in eval_metrics])

    # Record which summaries were written for each global step.
    events_by_step = collections.defaultdict(set)
    for events_file in tf.io.gfile.listdir(eval_dir):
        loader = event_file_loader.EventFileLoader(
            os.path.join(eval_dir, events_file))
        for event in loader.Load():
            if event.summary.value:
                events_by_step[event.step].add(event.summary.value[0].tag)

    # Find the greatest step for which all expected summaries are present.
    for step in sorted(list(events_by_step.keys()), key=lambda step: -step):
        if events_by_step[step].issuperset(expected_tags):
            return step

    return None
Ejemplo n.º 11
0
 def log(self):
   tag = common.join_scope(self.prefix, self.name)
   logging.info('%s', '{0} = {1}'.format(tag, self.result()))
Ejemplo n.º 12
0
def eager_compute(metrics,
                  environment,
                  policy,
                  num_episodes=1,
                  train_step=None,
                  summary_writer=None,
                  summary_prefix='',
                  use_function=True,
                  buckets=None):
    """Compute metrics using `policy` on the `environment`.

    *DISCLAIMER*: This is mostly a copy from tf_agents.eval.metric_utils. I had to adapt
    it to support my histograms.

    *NOTE*: Because placeholders are not compatible with Eager mode we can not use
    python policies. Because we use tf_policies we need the environment time_steps
    to be tensors making it easier to use a tf_env for evaluations. Otherwise this
    method mirrors `compute` directly.

    Args:
      metrics: List of metrics to compute.
      environment: tf_environment instance.
      policy: tf_policy instance used to step the environment.
      num_episodes: Number of episodes to compute the metrics over.
      train_step: An optional step to write summaries against.
      summary_writer: An optional writer for generating metric summaries.
      summary_prefix: An optional prefix scope for metric summaries.
      use_function: Option to enable use of `tf.function` when collecting the
        metrics.
      buckets: Number of buckets used for histogram output.
    Returns:
      A dictionary of results {metric_name: metric_value}
    """
    for metric in metrics:
        metric.reset()

    time_step = environment.reset()
    policy_state = policy.get_initial_state(environment.batch_size)

    driver = dynamic_episode_driver.DynamicEpisodeDriver(
        environment, policy, observers=metrics, num_episodes=num_episodes)
    if use_function:
        common.function(driver.run)(time_step, policy_state)
    else:
        driver.run(time_step, policy_state)

    results = [(metric.name, metric.result()) for metric in metrics]
    if train_step is not None and summary_writer:
        with summary_writer.as_default():
            for m in metrics:
                tag = common.join_scope(summary_prefix, m.name)
                if m.name == 'ChosenActionIDHistogram':  # this is my change
                    tf.compat.v2.summary.histogram(name=tag,
                                                   data=m.result(),
                                                   step=train_step,
                                                   buckets=buckets)
                else:
                    tf.compat.v2.summary.scalar(name=tag,
                                                data=m.result(),
                                                step=train_step)

    return collections.OrderedDict(results)
Ejemplo n.º 13
0
def eager_compute(metrics,
                  environment,
                  policy,
                  num_episodes=1,
                  train_step=None,
                  summary_writer=None,
                  summary_prefix='',
                  use_function=True,
                  use_attention_networks=False):
  """Compute metrics using `policy` on the `environment`.

  *NOTE*: Because placeholders are not compatible with Eager mode we can not use
  python policies. Because we use tf_policies we need the environment time_steps
  to be tensors making it easier to use a tf_env for evaluations. Otherwise this
  method mirrors `compute` directly.

  Args:
    metrics: List of metrics to compute.
    environment: tf_environment instance.
    policy: tf_policy instance used to step the environment.
    num_episodes: Number of episodes to compute the metrics over.
    train_step: An optional step to write summaries against.
    summary_writer: An optional writer for generating metric summaries.
    summary_prefix: An optional prefix scope for metric summaries.
    use_function: Option to enable use of `tf.function` when collecting the
      metrics.
    use_attention_networks: Option to use attention network architecture in the
    agent. This architecture requires observations from the previous time step.
  Returns:
    A dictionary of results {metric_name: metric_value}
  """
  for metric in metrics:
    metric.reset()

  multiagent_metrics = [m for m in metrics if 'Multiagent' in m.name]

  if use_attention_networks:
    driver = drivers.StateTFDriver(
        environment,
        policy,
        observers=metrics,
        max_episodes=num_episodes,
        disable_tf_function=not use_function,
    )
  else:
    driver = tf_driver.TFDriver(
        environment,
        policy,
        observers=metrics,
        max_episodes=num_episodes,
        disable_tf_function=not use_function)

  def run_driver():
    time_step = environment.reset()
    policy_state = policy.get_initial_state(environment.batch_size)
    if use_attention_networks:
      time_step.observation['policy_state'] = (
          policy_state['actor_network_state'][0],
          policy_state['actor_network_state'][1])
    driver.run(time_step, policy_state)

  if use_function:
    common.function(run_driver)()
  else:
    run_driver()

  results = [(metric.name, metric.result()) for metric in metrics]
  for m in multiagent_metrics:
    for a in range(m.n_agents):
      results.append((m.name + '_agent' + str(a), m.result_for_agent(a)))

  # TODO(b/120301678) remove the summaries and merge with compute
  if train_step and summary_writer:
    with summary_writer.as_default():
      for m in metrics:
        tag = common.join_scope(summary_prefix, m.name)
        tf.compat.v2.summary.scalar(name=tag, data=m.result(), step=train_step)
        if 'Multiagent' in m.name:
          for a in range(m.n_agents):
            tf.compat.v2.summary.scalar(name=tag + '_agent' + str(a),
                                        data=m.result_for_agent(a),
                                        step=train_step)
  # TODO(b/130249101): Add an option to log metrics.
  return collections.OrderedDict(results)
Ejemplo n.º 14
0
 def _test_scopes(self, parent_scope, child_scope, expected_joined_scope):
     joined_scope = common.join_scope(parent_scope, child_scope)
     self.assertEqual(joined_scope, expected_joined_scope)
Ejemplo n.º 15
0
def log_metric(metric, prefix):
    tag = common.join_scope(prefix, metric.name)
    logging.info('%s', '{0} = {1}'.format(tag, metric.result()))
Ejemplo n.º 16
0
def log_metric(metric, prefix):
    tag = common_utils.join_scope(prefix, metric.name)
    tf.logging.info('{0} = {1}'.format(tag, metric.result()))
Ejemplo n.º 17
0
 def log(self):
   tag = common_utils.join_scope(self.prefix, self.name)
   tf.logging.info('{0} = {1}'.format(tag, self.result()))