Python GaussianDistribution Examples

Programming Language: Python

Namespace/Package Name: mlagents.trainers.torch.distributions

Examples at hotexamples.com: 4

Python GaussianDistribution - 4 examples found. These are the top rated real world Python examples of mlagents.trainers.torch.distributions.GaussianDistribution extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

GaussianDistribution(4)

parameters(1)

Example #1

Show file

def test_gaussian_distribution(conditional_sigma, tanh_squash):
    torch.manual_seed(0)
    hidden_size = 16
    act_size = 4
    sample_embedding = torch.ones((1, 16))
    gauss_dist = GaussianDistribution(
        hidden_size,
        act_size,
        conditional_sigma=conditional_sigma,
        tanh_squash=tanh_squash,
    )

    # Make sure backprop works
    force_action = torch.zeros((1, act_size))
    optimizer = torch.optim.Adam(gauss_dist.parameters(), lr=3e-3)

    for _ in range(50):
        dist_inst = gauss_dist(sample_embedding)[0]
        if tanh_squash:
            assert isinstance(dist_inst, TanhGaussianDistInstance)
        else:
            assert isinstance(dist_inst, GaussianDistInstance)
        log_prob = dist_inst.log_prob(force_action)
        loss = torch.nn.functional.mse_loss(log_prob,
                                            -2 * torch.ones(log_prob.shape))
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    for prob in log_prob.flatten():
        assert prob == pytest.approx(-2, abs=0.1)

Example #2

Show file

 def __init__(
     self,
     observation_shapes: List[Tuple[int, ...]],
     network_settings: NetworkSettings,
     act_type: ActionType,
     act_size: List[int],
     conditional_sigma: bool = False,
     tanh_squash: bool = False,
 ):
     super().__init__()
     self.act_type = act_type
     self.act_size = act_size
     self.version_number = torch.nn.Parameter(torch.Tensor([2.0]))
     self.memory_size = torch.nn.Parameter(torch.Tensor([0]))
     self.is_continuous_int = torch.nn.Parameter(
         torch.Tensor([int(act_type == ActionType.CONTINUOUS)])
     )
     self.act_size_vector = torch.nn.Parameter(torch.Tensor(act_size))
     self.network_body = NetworkBody(observation_shapes, network_settings)
     if network_settings.memory is not None:
         self.encoding_size = network_settings.memory.memory_size // 2
     else:
         self.encoding_size = network_settings.hidden_units
     if self.act_type == ActionType.CONTINUOUS:
         self.distribution = GaussianDistribution(
             self.encoding_size,
             act_size[0],
             conditional_sigma=conditional_sigma,
             tanh_squash=tanh_squash,
         )
     else:
         self.distribution = MultiCategoricalDistribution(
             self.encoding_size, act_size
         )

Example #3

Show file

    def __init__(
        self,
        hidden_size: int,
        action_spec: ActionSpec,
        conditional_sigma: bool = False,
        tanh_squash: bool = False,
        deterministic: bool = False,
    ):
        """
        A torch module that represents the action space of a policy. The ActionModel may contain
        a continuous distribution, a discrete distribution or both where construction depends on
        the action_spec.  The ActionModel uses the encoded input of the network body to parameterize
        these distributions. The forward method of this module outputs the action, log probs,
        and entropies given the encoding from the network body.
        :params hidden_size: Size of the input to the ActionModel.
        :params action_spec: The ActionSpec defining the action space dimensions and distributions.
        :params conditional_sigma: Whether or not the std of a Gaussian is conditioned on state.
        :params tanh_squash: Whether to squash the output of a Gaussian with the tanh function.
        :params deterministic: Whether to select actions deterministically in policy.
        """
        super().__init__()
        self.encoding_size = hidden_size
        self.action_spec = action_spec
        self._continuous_distribution = None
        self._discrete_distribution = None

        if self.action_spec.continuous_size > 0:
            self._continuous_distribution = GaussianDistribution(
                self.encoding_size,
                self.action_spec.continuous_size,
                conditional_sigma=conditional_sigma,
                tanh_squash=tanh_squash,
            )

        if self.action_spec.discrete_size > 0:
            self._discrete_distribution = MultiCategoricalDistribution(
                self.encoding_size, self.action_spec.discrete_branches)

        # During training, clipping is done in TorchPolicy, but we need to clip before ONNX
        # export as well.
        self._clip_action_on_export = not tanh_squash
        self._deterministic = deterministic

Example #4

Show file

    def __init__(
        self,
        observation_shapes: List[Tuple[int, ...]],
        network_settings: NetworkSettings,
        action_spec: ActionSpec,
        conditional_sigma: bool = False,
        tanh_squash: bool = False,
    ):
        super().__init__()
        self.action_spec = action_spec
        self.version_number = torch.nn.Parameter(torch.Tensor([2.0]))
        self.is_continuous_int = torch.nn.Parameter(
            torch.Tensor([int(self.action_spec.is_continuous())]))
        self.act_size_vector = torch.nn.Parameter(
            torch.Tensor([
                self.action_spec.continuous_size +
                sum(self.action_spec.discrete_branches)
            ]),
            requires_grad=False,
        )
        self.network_body = NetworkBody(observation_shapes, network_settings)
        if network_settings.memory is not None:
            self.encoding_size = network_settings.memory.memory_size // 2
        else:
            self.encoding_size = network_settings.hidden_units

        if self.action_spec.is_continuous():
            self.distribution = GaussianDistribution(
                self.encoding_size,
                self.action_spec.continuous_size,
                conditional_sigma=conditional_sigma,
                tanh_squash=tanh_squash,
            )
        else:
            self.distribution = MultiCategoricalDistribution(
                self.encoding_size, self.action_spec.discrete_branches)
        # During training, clipping is done in TorchPolicy, but we need to clip before ONNX
        # export as well.
        self._clip_action_on_export = not tanh_squash