Beispiel #1
0
    def sample_actions(
        self,
        vec_obs: List[torch.Tensor],
        vis_obs: List[torch.Tensor],
        masks: Optional[torch.Tensor] = None,
        memories: Optional[torch.Tensor] = None,
        seq_len: int = 1,
        all_log_probs: bool = False,
    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, Dict[
            str, torch.Tensor], torch.Tensor]:
        """
        :param all_log_probs: Returns (for discrete actions) a tensor of log probs, one for each action.
        """
        dists, value_heads, memories = self.actor_critic.get_dist_and_value(
            vec_obs, vis_obs, masks, memories, seq_len)
        action_list = self.actor_critic.sample_action(dists)
        log_probs, entropies, all_logs = ModelUtils.get_probs_and_entropy(
            action_list, dists)
        actions = torch.stack(action_list, dim=-1)
        if self.use_continuous_act:
            actions = actions[:, :, 0]
        else:
            actions = actions[:, 0, :]

        return (
            actions,
            all_logs if all_log_probs else log_probs,
            entropies,
            value_heads,
            memories,
        )
Beispiel #2
0
def test_get_probs_and_entropy():
    # Test continuous
    # Add two dists to the list. This isn't done in the code but we'd like to support it.
    dist_list = [
        GaussianDistInstance(torch.zeros((1, 2)), torch.ones((1, 2))),
        GaussianDistInstance(torch.zeros((1, 2)), torch.ones((1, 2))),
    ]
    action_list = [torch.zeros((1, 2)), torch.zeros((1, 2))]
    log_probs, entropies, all_probs = ModelUtils.get_probs_and_entropy(
        action_list, dist_list)
    assert log_probs.shape == (1, 2, 2)
    assert entropies.shape == (1, 2, 2)
    assert all_probs is None

    for log_prob in log_probs.flatten():
        # Log prob of standard normal at 0
        assert log_prob == pytest.approx(-0.919, abs=0.01)

    for ent in entropies.flatten():
        # entropy of standard normal at 0
        assert ent == pytest.approx(1.42, abs=0.01)

    # Test continuous
    # Add two dists to the list.
    act_size = 2
    test_prob = torch.tensor([[1.0 - 0.1 * (act_size - 1)] + [0.1] *
                              (act_size - 1)])  # High prob for first action
    dist_list = [
        CategoricalDistInstance(test_prob),
        CategoricalDistInstance(test_prob)
    ]
    action_list = [torch.tensor([0]), torch.tensor([1])]
    log_probs, entropies, all_probs = ModelUtils.get_probs_and_entropy(
        action_list, dist_list)
    assert all_probs.shape == (1, len(dist_list * act_size))
    assert entropies.shape == (1, len(dist_list))
    # Make sure the first action has high probability than the others.
    assert log_probs.flatten()[0] > log_probs.flatten()[1]
Beispiel #3
0
    def sample_actions(
        self,
        vec_obs: List[torch.Tensor],
        vis_obs: List[torch.Tensor],
        masks: Optional[torch.Tensor] = None,
        memories: Optional[torch.Tensor] = None,
        seq_len: int = 1,
        all_log_probs: bool = False,
    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor,
               torch.Tensor]:
        """
        :param vec_obs: List of vector observations.
        :param vis_obs: List of visual observations.
        :param masks: Loss masks for RNN, else None.
        :param memories: Input memories when using RNN, else None.
        :param seq_len: Sequence length when using RNN.
        :param all_log_probs: Returns (for discrete actions) a tensor of log probs, one for each action.
        :return: Tuple of actions, actions clipped to -1, 1, log probabilities (dependent on all_log_probs),
            entropies, and output memories, all as Torch Tensors.
        """
        if memories is None:
            dists, memories = self.actor_critic.get_dists(
                vec_obs, vis_obs, masks, memories, seq_len)
        else:
            # If we're using LSTM. we need to execute the values to get the critic memories
            dists, _, memories = self.actor_critic.get_dist_and_value(
                vec_obs, vis_obs, masks, memories, seq_len)
        action_list = self.actor_critic.sample_action(dists)
        log_probs, entropies, all_logs = ModelUtils.get_probs_and_entropy(
            action_list, dists)
        actions = torch.stack(action_list, dim=-1)
        if self.use_continuous_act:
            actions = actions[:, :, 0]
        else:
            actions = actions[:, 0, :]
        # Use the sum of entropy across actions, not the mean
        entropy_sum = torch.sum(entropies, dim=1)

        if self._clip_action and self.use_continuous_act:
            clipped_action = torch.clamp(actions, -3, 3) / 3
        else:
            clipped_action = actions
        return (
            actions,
            clipped_action,
            all_logs if all_log_probs else log_probs,
            entropy_sum,
            memories,
        )
Beispiel #4
0
    def evaluate_actions(
        self,
        vec_obs: torch.Tensor,
        vis_obs: torch.Tensor,
        actions: torch.Tensor,
        masks: Optional[torch.Tensor] = None,
        memories: Optional[torch.Tensor] = None,
        seq_len: int = 1,
    ) -> Tuple[torch.Tensor, torch.Tensor, Dict[str, torch.Tensor]]:
        dists, value_heads, _ = self.actor_critic.get_dist_and_value(
            vec_obs, vis_obs, masks, memories, seq_len)
        action_list = [actions[..., i] for i in range(actions.shape[-1])]
        log_probs, entropies, _ = ModelUtils.get_probs_and_entropy(
            action_list, dists)

        return log_probs, entropies, value_heads