Python create_counter 예제들, tf_agents.utils.common.create_counter Python 예제들

예제 #1

0

파일 보기

파일: tensor_normalizer.py 프로젝트: GitHub30/agents

 def _create_variables(self):
     """Creates the variables needed for EMATensorNormalizer."""
     self._mean_moving_avg = nest.map_structure(
         lambda spec: create_counter('mean', 0, spec.shape, tf.float32),
         self._tensor_spec)
     self._var_moving_avg = nest.map_structure(
         lambda spec: create_counter('var', 1, spec.shape, tf.float32),
         self._tensor_spec)

예제 #2

0

파일 보기

파일: tensor_normalizer.py 프로젝트: GitHub30/agents

 def _create_variables(self):
     """Uses self._scope and creates all variables needed for the normalizer."""
     self._count = nest.map_structure(
         lambda spec: create_counter('count', 1e-8, spec.shape, tf.float32),
         self._tensor_spec)
     self._mean_sum = nest.map_structure(
         lambda spec: create_counter('mean_sum', 0, spec.shape, tf.float32),
         self._tensor_spec)
     self._var_sum = nest.map_structure(
         lambda spec: create_counter('var_sum', 0, spec.shape, tf.float32),
         self._tensor_spec)

예제 #3

0

파일 보기

파일: tf_metrics.py 프로젝트: Akshay22121995/agents

 def __init__(self, name='NumberOfEpisodes', dtype=tf.int64):
     super(NumberOfEpisodes, self).__init__(name=name)
     self.dtype = dtype
     self.number_episodes = common.create_counter(initial_value=0,
                                                  dtype=self.dtype,
                                                  shape=(),
                                                  name='number_episodes')

예제 #4

0

파일 보기

파일: tf_metrics.py 프로젝트: Akshay22121995/agents

 def __init__(self, name='EnvironmentSteps', dtype=tf.int64):
     super(EnvironmentSteps, self).__init__(name=name)
     self.dtype = dtype
     self.environment_steps = common.create_counter(
         initial_value=0,
         dtype=self.dtype,
         shape=(),
         name='environment_steps')

예제 #5

0

파일 보기

파일: sac_agent.py 프로젝트: Akshay22121995/agents

    def __init__(self,
                 time_step_spec,
                 action_spec,
                 critic_network,
                 actor_network,
                 actor_optimizer,
                 critic_optimizer,
                 alpha_optimizer,
                 actor_policy_ctor=actor_policy.ActorPolicy,
                 squash_actions=True,
                 target_update_tau=1.0,
                 target_update_period=1,
                 td_errors_loss_fn=tf.math.squared_difference,
                 gamma=1.0,
                 reward_scale_factor=1.0,
                 initial_log_alpha=0.0,
                 target_entropy=None,
                 gradient_clipping=None,
                 debug_summaries=False,
                 summarize_grads_and_vars=False,
                 name=None):
        """Creates a SAC Agent.

    Args:
      time_step_spec: A `TimeStep` spec of the expected time_steps.
      action_spec: A nest of BoundedTensorSpec representing the actions.
      critic_network: A function critic_network((observations, actions)) that
        returns the q_values for each observation and action.
      actor_network: A function actor_network(observation, action_spec) that
       returns action distribution.
      actor_optimizer: The optimizer to use for the actor network.
      critic_optimizer: The default optimizer to use for the critic network.
      alpha_optimizer: The default optimizer to use for the alpha variable.
      actor_policy_ctor: The policy class to use.
      squash_actions: Whether or not to use tanh to squash actions between
        -1 and 1.
      target_update_tau: Factor for soft update of the target networks.
      target_update_period: Period for soft update of the target networks.
      td_errors_loss_fn:  A function for computing the elementwise TD errors
        loss.
      gamma: A discount factor for future rewards.
      reward_scale_factor: Multiplicative scale for the reward.
      initial_log_alpha: Initial value for log_alpha.
      target_entropy: The target average policy entropy, for updating alpha.
      gradient_clipping: Norm length to clip gradients.
      debug_summaries: A bool to gather debug summaries.
      summarize_grads_and_vars: If True, gradient and network variable summaries
        will be written during training.
      name: The name of this agent. All variables in this module will fall
        under that name. Defaults to the class name.
    """
        tf.Module.__init__(self, name=name)

        self._critic_network1 = critic_network
        self._critic_network2 = critic_network.copy(name='CriticNetwork2')
        self._target_critic_network1 = critic_network.copy(
            name='TargetCriticNetwork1')
        self._target_critic_network2 = critic_network.copy(
            name='TargetCriticNetwork2')
        self._actor_network = actor_network

        policy = actor_policy_ctor(time_step_spec=time_step_spec,
                                   action_spec=action_spec,
                                   actor_network=self._actor_network)

        self._log_alpha = common_utils.create_counter(
            'initial_log_alpha',
            initial_value=initial_log_alpha,
            dtype=tf.float32,
            trainable=True)

        # If target_entropy was not passed, set it to negative of the total number
        # of action dimensions.
        if target_entropy is None:
            flat_action_spec = tf.nest.flatten(action_spec)
            target_entropy = -np.sum([
                np.product(single_spec.shape.as_list())
                for single_spec in flat_action_spec
            ])

        self._squash_actions = squash_actions
        self._target_update_tau = target_update_tau
        self._target_update_period = target_update_period
        self._actor_optimizer = actor_optimizer
        self._critic_optimizer = critic_optimizer
        self._alpha_optimizer = alpha_optimizer
        self._td_errors_loss_fn = td_errors_loss_fn
        self._gamma = gamma
        self._reward_scale_factor = reward_scale_factor
        self._target_entropy = target_entropy
        self._gradient_clipping = gradient_clipping
        self._debug_summaries = debug_summaries
        self._summarize_grads_and_vars = summarize_grads_and_vars

        super(SacAgent,
              self).__init__(time_step_spec,
                             action_spec,
                             policy=policy,
                             collect_policy=policy,
                             train_sequence_length=2,
                             debug_summaries=debug_summaries,
                             summarize_grads_and_vars=summarize_grads_and_vars)

예제 #6

0

파일 보기