def flow():
    return Basic1DFlow(
        ptd.Distribution(
            cond_dist=ptd.Normal(),
            params={
                "loc": torch.zeros(2),
                "scale": torch.ones(2)
            },
        ))
Beispiel #2
0
    def __init__(self, obs_space: Box, action_space: Box, spec: MLPModelSpec):
        encoder = StateActionMLP(obs_space, action_space, spec)

        params = nnx.NormalParams(
            encoder.out_features,
            obs_space.shape[0],
            input_dependent_scale=spec.input_dependent_scale,
            bound_parameters=not spec.fix_logvar_bounds,
        )
        if spec.fix_logvar_bounds:
            params.max_logvar.fill_(2)
            params.min_logvar.fill_(-20)
        params = DynamicsParams(encoder, params)

        dist = ptd.Independent(ptd.Normal(), reinterpreted_batch_ndims=1)

        super().__init__(params, dist)
        # Can only assign modules and parameters after calling nn.Module.__init__
        self.encoder = encoder
Beispiel #3
0
    def __init__(
        self,
        obs_space: Box,
        action_space: Box,
        mlp_spec: MLPStochasticPolicy.spec_cls,
        input_dependent_scale: bool,
    ):
        def params_fn(out_features):
            return nnx.PolicyNormalParams(
                out_features,
                action_space.shape[0],
                input_dependent_scale=input_dependent_scale,
            )

        dist = ptd.TransformedDistribution(
            ptd.Independent(ptd.Normal(), reinterpreted_batch_ndims=1),
            ptd.flows.TanhSquashTransform(
                low=torch.as_tensor(action_space.low),
                high=torch.as_tensor(action_space.high),
                event_dim=1,
            ),
        )
        super().__init__(obs_space, mlp_spec, params_fn, dist)
Beispiel #4
0
 def __init__(self, obs_space: Box, action_space: Box, spec: SVGModelSpec):
     params = SVGDynamicsParams(obs_space, action_space, spec)
     dist = ptd.Independent(ptd.Normal(), reinterpreted_batch_ndims=1)
     super().__init__(params, dist)