def tf_summaries(self, train_step=None, step_metrics=()): """Generates per-metric summaries against `train_step` and `step_metrics`. Args: train_step: (Optional) Step counter for training iterations. If None, no metric is generated against the global step. step_metrics: (Optional) Iterable of step metrics to generate summaries against. Returns: A list of scalar summaries. """ result_list = self.result() prefix = self._prefix single_metric_name = 'Metric' # In case there is a single name (e.g., `Reward`) for all metrics, store it # in `single_metric_name`. if len(self.metric_names) == 1: single_metric_name = self.metric_names[0] summaries = [] for metric_index, result in enumerate(result_list): # Common name for all metrics. tag = common.join_scope(prefix, self.name) # The default metric name is the `single_metric_name` followed by the # index. metric_name = single_metric_name + str(metric_index) # In case there is a valid individual name for each metric, use it. if (metric_index < len(self.metric_names) and len(result_list) == len(self.metric_names) and self.metric_names[metric_index] is not None): metric_name = self.metric_names[metric_index] tag = common.join_scope(tag, metric_name) if train_step is not None: summaries.append( tf.compat.v2.summary.scalar(name=tag, data=result, step=train_step)) if prefix: prefix += '_' for metric_index, result in enumerate(result_list): for step_metric in step_metrics: # Skip plotting the metrics against itself. if self.name == step_metric.name: continue step_tag = '{}vs_{}/{}'.format(prefix, step_metric.name, self.name) # Summaries expect the step value to be an int64. step = tf.cast(step_metric.result(), tf.int64) summaries.append( tf.compat.v2.summary.scalar(name=step_tag, data=result, step=step)) return summaries
def single_agent_summary(self, agent_id, train_step=None, step_metrics=()): summaries = [] prefix = self._prefix name = self.name + '_agent' + str(agent_id) tag = common.join_scope(prefix, name) result = self.result_for_agent(agent_id) if train_step is not None: summaries.append( tf.compat.v2.summary.scalar(name=tag, data=result, step=train_step)) if prefix: prefix += '_' for step_metric in step_metrics: # Skip plotting the metrics against itself. if self.name == step_metric.name: continue step_tag = '{}vs_{}/{}'.format(prefix, step_metric.name, name) # Summaries expect the step value to be an int64. step = tf.cast(step_metric.result(), tf.int64) summaries.append( tf.compat.v2.summary.scalar(name=step_tag, data=result, step=step)) return summaries
def tf_summaries(self, train_step=None, step_metrics=()): """Generates summaries against train_step and all step_metrics. Args: train_step: (Optional) Step counter for training iterations. If None, no metric is generated against the global step. step_metrics: (Optional) Iterable of step metrics to generate summaries against. Returns: A list of summaries. """ summaries = [] prefix = self._prefix tag = common.join_scope(prefix, self.name) result = self.result() if train_step is not None: summaries.append( tf.compat.v2.summary.scalar(name=tag, data=result, step=train_step)) if prefix: prefix += '_' for step_metric in step_metrics: # Skip plotting the metrics against itself. if self.name == step_metric.name: continue step_tag = '{}vs_{}/{}'.format(prefix, step_metric.name, self.name) step = step_metric.result() summaries.append( tf.compat.v2.summary.scalar(name=step_tag, data=result, step=step)) return summaries
def record_metrics(metrics, train_step, summary_writer, summary_prefix): if train_step and summary_writer: with summary_writer.as_default(): for m in metrics: tag = common.join_scope(summary_prefix, m.name) tf.compat.v2.summary.scalar(name=tag, data=m.result(), step=train_step)
def tf_summaries( self, train_step: types.Int = None, step_metrics: Sequence[MetricType] = () ) -> tf.Operation: """Build TF summary op and placeholder for this metric. To execute the op, call py_metric.run_summaries. Args: train_step: Step counter for training iterations. If None, no metric is generated against the global step. step_metrics: Step values to plot as X axis in addition to global_step. Returns: The summary op. Raises: RuntimeError: If this method has already been called (it can only be called once). ValueError: If any item in step_metrics is not of type PyMetric or tf_metric.TFStepMetric. """ if self.summary_op is not None: raise RuntimeError( 'metric.tf_summaries() can only be called once.') tag = common.join_scope(self.prefix, self.name) summaries = [] summaries.append( tf.compat.v2.summary.scalar(name=tag, data=self.summary_placeholder, step=train_step)) prefix = self.prefix if prefix: prefix += '_' for step_metric in step_metrics: # Skip plotting the metrics against itself. if self.name == step_metric.name: continue step_tag = '{}vs_{}/{}'.format(prefix, step_metric.name, self.name) if isinstance(step_metric, PyMetric): step_tensor = step_metric.summary_placeholder elif isinstance(step_metric, tf_metric.TFStepMetric): step_tensor = step_metric.result() else: raise ValueError( 'step_metric is not PyMetric or TFStepMetric: ' '{}'.format(step_metric)) summaries.append( tf.compat.v2.summary.scalar(name=step_tag, data=self.summary_placeholder, step=step_tensor)) self._summary_op = tf.group(*summaries) return self._summary_op
def eager_compute(metrics, environment, policy, num_episodes=1, train_step=None, summary_writer=None, summary_prefix='', use_function=True): """Compute metrics using `policy` on the `environment`. *NOTE*: Because placeholders are not compatible with Eager mode we can not use python policies. Because we use tf_policies we need the environment time_steps to be tensors making it easier to use a tf_env for evaluations. Otherwise this method mirrors `compute` directly. Args: metrics: List of metrics to compute. environment: tf_environment instance. policy: tf_policy instance used to step the environment. num_episodes: Number of episodes to compute the metrics over. train_step: An optional step to write summaries against. summary_writer: An optional writer for generating metric summaries. summary_prefix: An optional prefix scope for metric summaries. use_function: Option to enable use of `tf.function` when collecting the metrics. Returns: A dictionary of results {metric_name: metric_value} """ for metric in metrics: metric.reset() time_step = environment.reset() policy_state = policy.get_initial_state(environment.batch_size) driver = dynamic_episode_driver.DynamicEpisodeDriver( environment, policy, observers=metrics, num_episodes=num_episodes) if use_function: common.function(driver.run)(time_step, policy_state) else: driver.run(time_step, policy_state) results = [(metric.name, metric.result()) for metric in metrics] # TODO(b/120301678) remove the summaries and merge with compute if train_step and summary_writer: with summary_writer.as_default(): for m in metrics: tag = common.join_scope(summary_prefix, m.name) tf.compat.v2.summary.scalar(name=tag, data=m.result(), step=train_step) # TODO(b/130249101): Add an option to log metrics. return collections.OrderedDict(results)
def tf_summaries(self, step_metrics=()): prefix = self._prefix tag = common_utils.join_scope(prefix, self.name) result = self.result() tf.contrib.summary.scalar(name=tag, tensor=result) if prefix: prefix += '_' for step_metric in step_metrics: # Skip plotting the metrics against itself. if self.name == step_metric.name: continue step_tag = '{}vs_{}/{}'.format(prefix, step_metric.name, self.name) step = step_metric.result() tf.contrib.summary.scalar(name=step_tag, tensor=result, step=step)
def evaluate(eval_metrics, eval_tf_env, eval_policy, num_eval_episodes, num_eval_seeds, global_step=None, eval_summary_writer=None, summary_prefix='Metrics', seed=12345): """ Evaluate policy on the evaluation environment for the specified episodes and metrics """ all_results = [] # Calculate metrics for the number of seeds provided in order to get more accurated results for i in range(num_eval_seeds): for env in eval_tf_env.envs: env.seed(seed + i) # One final eval before exiting. results = metric_utils.eager_compute( eval_metrics, eval_tf_env, eval_policy, num_episodes=num_eval_episodes, train_step=global_step, ) all_results.append(results) # Calculate mean of the resulting metrics mean_results = collections.OrderedDict(results) if num_eval_seeds > 1: for metric in mean_results: metric_sum = 0 for result in all_results: metric_sum = tf_add(metric_sum, result[metric]) mean_results[metric] = metric_sum / len(all_results) # Write on Tensorboard writer if provided if global_step and eval_summary_writer: with eval_summary_writer.as_default(): for metric, value in mean_results.items(): tag = common.join_scope(summary_prefix, metric) summary.scalar(name=tag, data=value, step=global_step) # Print out the results of the metrics log = [ '{0} = {1}'.format(metric, value) for metric, value in mean_results.items() ] logging.info('%s \n\t\t %s', '', '\n\t\t '.join(log)) return mean_results
def tf_summaries(self, reset_states, values, train_step=None, step_metrics=()): """Generates summaries against train_step and all step_metrics. Args: reset_states: candidate states for reset values: values assigned by our function train_step: (Optional) Step counter for training iterations. If None, no metric is generated against the global step. step_metrics: (Optional) Iterable of step metrics to generate summaries against. Returns: A list of summaries. """ summaries = [] prefix = self._prefix tag = common.join_scope(prefix, self.name) result = self.result(reset_states, values) if train_step is not None: summaries.append( tf.compat.v2.summary.image(name=tag, data=result, step=train_step)) if prefix: prefix += '_' for step_metric in step_metrics: # Skip plotting the metrics against itself. if self.name == step_metric.name: continue step_tag = '{}vs_{}/{}'.format(prefix, step_metric.name, self.name) # Summaries expect the step value to be an int64. step = tf.cast(step_metric.result(), tf.int64) summaries.append( tf.compat.v2.summary.image(name=step_tag, data=result, step=step)) return summaries
def _latest_eval(eval_dir, eval_metrics): """Get the latest global step for which an evaluation result was written.""" if not tf.io.gfile.exists(eval_dir): return None expected_tags = set( [common.join_scope('Metrics', metric.name) for metric in eval_metrics]) # Record which summaries were written for each global step. events_by_step = collections.defaultdict(set) for events_file in tf.io.gfile.listdir(eval_dir): loader = event_file_loader.EventFileLoader( os.path.join(eval_dir, events_file)) for event in loader.Load(): if event.summary.value: events_by_step[event.step].add(event.summary.value[0].tag) # Find the greatest step for which all expected summaries are present. for step in sorted(list(events_by_step.keys()), key=lambda step: -step): if events_by_step[step].issuperset(expected_tags): return step return None
def log(self): tag = common.join_scope(self.prefix, self.name) logging.info('%s', '{0} = {1}'.format(tag, self.result()))
def eager_compute(metrics, environment, policy, num_episodes=1, train_step=None, summary_writer=None, summary_prefix='', use_function=True, buckets=None): """Compute metrics using `policy` on the `environment`. *DISCLAIMER*: This is mostly a copy from tf_agents.eval.metric_utils. I had to adapt it to support my histograms. *NOTE*: Because placeholders are not compatible with Eager mode we can not use python policies. Because we use tf_policies we need the environment time_steps to be tensors making it easier to use a tf_env for evaluations. Otherwise this method mirrors `compute` directly. Args: metrics: List of metrics to compute. environment: tf_environment instance. policy: tf_policy instance used to step the environment. num_episodes: Number of episodes to compute the metrics over. train_step: An optional step to write summaries against. summary_writer: An optional writer for generating metric summaries. summary_prefix: An optional prefix scope for metric summaries. use_function: Option to enable use of `tf.function` when collecting the metrics. buckets: Number of buckets used for histogram output. Returns: A dictionary of results {metric_name: metric_value} """ for metric in metrics: metric.reset() time_step = environment.reset() policy_state = policy.get_initial_state(environment.batch_size) driver = dynamic_episode_driver.DynamicEpisodeDriver( environment, policy, observers=metrics, num_episodes=num_episodes) if use_function: common.function(driver.run)(time_step, policy_state) else: driver.run(time_step, policy_state) results = [(metric.name, metric.result()) for metric in metrics] if train_step is not None and summary_writer: with summary_writer.as_default(): for m in metrics: tag = common.join_scope(summary_prefix, m.name) if m.name == 'ChosenActionIDHistogram': # this is my change tf.compat.v2.summary.histogram(name=tag, data=m.result(), step=train_step, buckets=buckets) else: tf.compat.v2.summary.scalar(name=tag, data=m.result(), step=train_step) return collections.OrderedDict(results)
def eager_compute(metrics, environment, policy, num_episodes=1, train_step=None, summary_writer=None, summary_prefix='', use_function=True, use_attention_networks=False): """Compute metrics using `policy` on the `environment`. *NOTE*: Because placeholders are not compatible with Eager mode we can not use python policies. Because we use tf_policies we need the environment time_steps to be tensors making it easier to use a tf_env for evaluations. Otherwise this method mirrors `compute` directly. Args: metrics: List of metrics to compute. environment: tf_environment instance. policy: tf_policy instance used to step the environment. num_episodes: Number of episodes to compute the metrics over. train_step: An optional step to write summaries against. summary_writer: An optional writer for generating metric summaries. summary_prefix: An optional prefix scope for metric summaries. use_function: Option to enable use of `tf.function` when collecting the metrics. use_attention_networks: Option to use attention network architecture in the agent. This architecture requires observations from the previous time step. Returns: A dictionary of results {metric_name: metric_value} """ for metric in metrics: metric.reset() multiagent_metrics = [m for m in metrics if 'Multiagent' in m.name] if use_attention_networks: driver = drivers.StateTFDriver( environment, policy, observers=metrics, max_episodes=num_episodes, disable_tf_function=not use_function, ) else: driver = tf_driver.TFDriver( environment, policy, observers=metrics, max_episodes=num_episodes, disable_tf_function=not use_function) def run_driver(): time_step = environment.reset() policy_state = policy.get_initial_state(environment.batch_size) if use_attention_networks: time_step.observation['policy_state'] = ( policy_state['actor_network_state'][0], policy_state['actor_network_state'][1]) driver.run(time_step, policy_state) if use_function: common.function(run_driver)() else: run_driver() results = [(metric.name, metric.result()) for metric in metrics] for m in multiagent_metrics: for a in range(m.n_agents): results.append((m.name + '_agent' + str(a), m.result_for_agent(a))) # TODO(b/120301678) remove the summaries and merge with compute if train_step and summary_writer: with summary_writer.as_default(): for m in metrics: tag = common.join_scope(summary_prefix, m.name) tf.compat.v2.summary.scalar(name=tag, data=m.result(), step=train_step) if 'Multiagent' in m.name: for a in range(m.n_agents): tf.compat.v2.summary.scalar(name=tag + '_agent' + str(a), data=m.result_for_agent(a), step=train_step) # TODO(b/130249101): Add an option to log metrics. return collections.OrderedDict(results)
def _test_scopes(self, parent_scope, child_scope, expected_joined_scope): joined_scope = common.join_scope(parent_scope, child_scope) self.assertEqual(joined_scope, expected_joined_scope)
def log_metric(metric, prefix): tag = common.join_scope(prefix, metric.name) logging.info('%s', '{0} = {1}'.format(tag, metric.result()))
def log_metric(metric, prefix): tag = common_utils.join_scope(prefix, metric.name) tf.logging.info('{0} = {1}'.format(tag, metric.result()))
def log(self): tag = common_utils.join_scope(self.prefix, self.name) tf.logging.info('{0} = {1}'.format(tag, self.result()))