Esempio n. 1
0
 def _update_random_action(spec, noisy_action):
     random_action = spec_utils.scale_to_spec(
         torch.rand_like(noisy_action) * 2 - 1, spec)
     ind = torch.where(
         torch.rand(noisy_action.shape[:1]) <
         self._rollout_random_action)
     noisy_action[ind[0], :] = random_action[ind[0], :]
Esempio n. 2
0
    def forward(self, observation, state=()):
        """Computes action given an observation.

        Args:
            inputs:  A tensor consistent with ``input_tensor_spec``
            state: empty for API consistent with ``ActorRNNNetwork``

        Returns:
            tuple:
            - action (torch.Tensor): a tensor consistent with ``action_spec``
            - state: empty
        """

        observation, state = super().forward(observation, state)
        encoded_obs, _ = self._encoding_net(observation)

        actions = []
        i = 0
        for layer, spec in zip(self._action_layers, self._flat_action_spec):
            pre_activation = layer(encoded_obs)
            action = self._squashing_func(pre_activation)
            action = spec_utils.scale_to_spec(action, spec)

            if alf.summary.should_summarize_output():
                alf.summary.scalar(
                    name='summarize_output/' + self.name + '.action_layer.' +
                    str(i) + '.pre_activation.output_norm.' +
                    common.exe_mode_name(),
                    data=torch.mean(
                        pre_activation.norm(
                            dim=list(range(1, pre_activation.ndim)))))
                a_name = ('summarize_output/' + self.name + '.action_layer.' +
                          str(i) + '.action.output_norm.' +
                          common.exe_mode_name())
                alf.summary.scalar(
                    name=a_name,
                    data=torch.mean(
                        action.norm(dim=list(range(1, action.ndim)))))

            actions.append(action)
            i += 1

        output_actions = nest.pack_sequence_as(self._action_spec, actions)
        return output_actions, state
Esempio n. 3
0
    def forward(self, observation, state):
        """Computes action given an observation.

        Args:
            inputs:  A tensor consistent with ``input_tensor_spec``
            state (nest[tuple]): a nest structure of state tuples ``(h, c)``

        Returns:
            tuple:
            - action (torch.Tensor): a tensor consistent with ``action_spec``
            - new_state (nest[tuple]): the updated states
        """
        observation, state = super().forward(observation, state)
        encoded_obs, state = self._lstm_encoding_net(observation, state)

        actions = []
        for layer, spec in zip(self._action_layers, self._flat_action_spec):
            action = layer(encoded_obs)
            action = spec_utils.scale_to_spec(action, spec)
            actions.append(action)

        output_actions = nest.pack_sequence_as(self._action_spec, actions)
        return output_actions, state