Ejemplos de TFDeque.mean en Python

Lenguaje de programación: Python

Namespace/Package Name: tf_agents.metrics.tf_metrics

Clase / Tipo: TFDeque

Método / Función: mean

Ejemplos en hotexamples.com: 3

Python TFDeque.mean - 3 ejemplos encontrados. Estos son los ejemplos en Python del mundo real mejor valorados de tf_agents.metrics.tf_metrics.TFDeque.mean extraídos de proyectos de código abierto. Puedes valorar ejemplos para ayudarnos a mejorar la calidad de los ejemplos.

Métodos usados con frecuencia

Mostrar Ocultar

TFDeque(7)

clear(6)

add(4)

mean(3)

extend(2)

Métodos usados con frecuencia

TFDeque (7)

clear (6)

add (4)

mean (3)

extend (2)

Ejemplo n.º 1

Mostrar archivo

Archivo: adversarial_eval.py Proyecto: zzhaozeng/google-research

class AdversarialEnvironmentScalar(tf_metric.TFStepMetric):
    """Metric to compute average of simple scalars like number of obstacles."""
    def __init__(self,
                 name,
                 prefix='Metrics',
                 dtype=tf.float32,
                 batch_size=1,
                 buffer_size=10):
        super(AdversarialEnvironmentScalar, self).__init__(name=name,
                                                           prefix=prefix)
        self._buffer = TFDeque(buffer_size, dtype)
        self._dtype = dtype

    @common.function(autograph=True)
    def call(self, new_scalar_vals):
        for v in new_scalar_vals:
            self._buffer.add(v)
        return new_scalar_vals

    def result(self):
        return self._buffer.mean()

    @common.function
    def reset(self):
        self._buffer.clear()

Ejemplo n.º 2

Mostrar archivo

Archivo: multiagent_metrics.py Proyecto: zhwzhong/google-research

class AverageReturnMetric(tf_metric.TFStepMetric):
    """Metric for the average collective return and individual agent returns."""
    def __init__(self,
                 n_agents,
                 name='MultiagentAverageReturn',
                 prefix='Metrics',
                 dtype=tf.float32,
                 batch_size=1,
                 buffer_size=10):
        super(AverageReturnMetric, self).__init__(name=name, prefix=prefix)
        self.n_agents = n_agents
        self._dtype = dtype

        # Accumulator and buffer for the average return of all agents
        self._collective_return_accumulator = common.create_variable(
            initial_value=0,
            dtype=dtype,
            shape=(batch_size, ),
            name='Accumulator')
        self._collective_buffer = TFDeque(buffer_size, dtype)

        # Accumulators for each agent's independent reward
        self._agent_return_accumulators = []
        for a in range(n_agents):
            self._agent_return_accumulators.append(
                common.create_variable(initial_value=0,
                                       dtype=dtype,
                                       shape=(batch_size, ),
                                       name='Accumulator' + str(a)))

        # Buffers for each agent's independent reward
        self._agent_buffers = []
        for a in range(n_agents):
            self._agent_buffers.append(TFDeque(buffer_size, dtype))

    @common.function(autograph=True)
    def call(self, trajectory):
        # Zero out batch indices where a new episode is starting.
        self._collective_return_accumulator.assign(
            zero_out_new_episodes(trajectory,
                                  self._collective_return_accumulator))
        for a in range(self.n_agents):
            self._agent_return_accumulators[a].assign(
                zero_out_new_episodes(trajectory,
                                      self._agent_return_accumulators[a]))

        # Note that trajectory.reward has shape (batch, n_agents)

        # Update accumulator with sum of received rewards.
        self._collective_return_accumulator.assign_add(
            tf.reduce_mean(trajectory.reward, axis=1))

        # Pull out data for each agent and assign
        for a in range(self.n_agents):
            self._agent_return_accumulators[a].assign_add(trajectory.reward[:,
                                                                            a])

        # Add final returns to buffer.
        last_episode_indices = tf.squeeze(tf.where(trajectory.is_last()),
                                          axis=-1)
        for indx in last_episode_indices:
            self._collective_buffer.add(
                self._collective_return_accumulator[indx])

            # Agent buffers that use the global done
            for a in range(self.n_agents):
                self._agent_buffers[a].add(
                    self._agent_return_accumulators[a][indx])

        return trajectory

    def result(self):
        return self._collective_buffer.mean()

    def result_for_agent(self, agent_id):
        return self._agent_buffers[agent_id].mean()

    @common.function
    def reset(self):
        self._collective_buffer.clear()
        self._collective_return_accumulator.assign(
            tf.zeros_like(self._collective_return_accumulator))

        for a in range(self.n_agents):
            self._agent_buffers[a].clear()
            self._agent_return_accumulators[a].assign(
                tf.zeros_like(self._agent_return_accumulators[a]))

    def tf_summaries(self, train_step=None, step_metrics=()):
        """Generates summaries for all agents & collective summary against steps.

    Args:
      train_step: (Optional) Step counter for training iterations. If None, no
        metric is generated against the global step.
      step_metrics: (Optional) Iterable of step metrics to generate summaries
        against.

    Returns:
      A list of summaries.
    """
        summaries = super(AverageReturnMetric,
                          self).tf_summaries(train_step=train_step,
                                             step_metrics=step_metrics)

        for a in range(self.n_agents):
            summaries.extend(
                self.single_agent_summary(a, train_step, step_metrics))

        return summaries

    def single_agent_summary(self, agent_id, train_step=None, step_metrics=()):
        summaries = []
        prefix = self._prefix
        name = self.name + '_agent' + str(agent_id)
        tag = common.join_scope(prefix, name)

        result = self.result_for_agent(agent_id)

        if train_step is not None:
            summaries.append(
                tf.compat.v2.summary.scalar(name=tag,
                                            data=result,
                                            step=train_step))
        if prefix:
            prefix += '_'
        for step_metric in step_metrics:
            # Skip plotting the metrics against itself.
            if self.name == step_metric.name:
                continue
            step_tag = '{}vs_{}/{}'.format(prefix, step_metric.name, name)
            # Summaries expect the step value to be an int64.
            step = tf.cast(step_metric.result(), tf.int64)
            summaries.append(
                tf.compat.v2.summary.scalar(name=step_tag,
                                            data=result,
                                            step=step))
        return summaries

Ejemplo n.º 3

Mostrar archivo

class ActionProbabilityMetric(TFStepMetric):
    """
    A metric that records the average action probabilities over a given period.
    Implementation similar to tf_agent.metrics.tf_metrics.AverageReturnMetric
    """

    def __init__(self,
                 policy: tf_policy.TFPolicy,
                 action_indices: Tuple[int, ...],
                 name: str = 'ActionProbability',
                 prefix: str = 'Metrics',
                 dtype: Type = tf.float32,
                 batch_size: int = 1,
                 buffer_size: int = 10):
        """
        :param policy: Policy of the agent used for reevaluation to attain action probabilities at
            each time step.
        :param action_indices: A tuple of indices of the action probability vector to track. This is
            a tuple to allow for the case where the action is a tuple of tensors.
        :param name: Name of the metric (as it will appear in tensorboard).
        :param prefix: Prefix to apply as part of the naming convention.
        :param dtype: Data type of the metric.
        :param batch_size: Batch size of the RL environment.
        :param buffer_size: The capacity of the buffer which will rewrite itself when full but is
            emptied at every logging point.
        """
        super().__init__(name=name, prefix=prefix)
        self._action_indices = action_indices
        self._dtype = dtype
        self._probability_accumulator = common.create_variable(
            initial_value=0, dtype=dtype, shape=(batch_size,), name='Accumulator'
        )
        self._policy = policy
        self._buffer = TFDeque(buffer_size, dtype)
        self._count_accumulator = common.create_variable(
            initial_value=0, dtype=dtype, shape=(batch_size,), name='CountAccumulator'
        )

    @common.function(autograph=True)
    def call(self, trajectory: Trajectory) -> Trajectory:
        time_step = TimeStep(trajectory.step_type, trajectory.reward, trajectory.discount,
                             trajectory.observation)
        action_dist = self._policy.distribution(time_step).action

        # If the action distribution is in fact a tuple of distributions (one for each resource set)
        # then we need to index into them to attain the underlying distribution which can then be
        # used to attain probabilities. This is only the case where there are multiple resource
        # sets.
        for i in self._action_indices[:-1]:
            action_dist = action_dist[i]

        action_probs = action_dist.probs_parameter()
        # Zero out batch indices where a new episode is starting.
        self._probability_accumulator.assign(
            tf.where(trajectory.is_first(), tf.zeros_like(self._probability_accumulator),
                     self._probability_accumulator))
        self._count_accumulator.assign(
            tf.where(trajectory.is_first(), tf.zeros_like(self._count_accumulator),
                     self._count_accumulator))
        # Update accumulators with probability and count increments.
        self._probability_accumulator.assign_add(action_probs[..., 0, self._action_indices[-1]])
        self._count_accumulator.assign_add(tf.ones_like(self._count_accumulator))

        # Add final cumulants to buffer at the end of episodes.
        last_episode_indices = tf.squeeze(tf.where(trajectory.is_last()), axis=-1)
        for idx in last_episode_indices:
            self._buffer.add(self._probability_accumulator[idx] / self._count_accumulator[idx])

        return trajectory

    def result(self) -> tf.Tensor:
        """Return the metric value."""
        return self._buffer.mean()

    @common.function
    def reset(self) -> None:
        """Clear the buffer and reset the accumulators."""
        self._buffer.clear()
        self._probability_accumulator.assign(tf.zeros_like(self._probability_accumulator))
        self._count_accumulator.assign(tf.zeros_like(self._count_accumulator))

    @property
    def action_indices(self) -> Tuple[int, ...]:
        return self._action_indices