Ejemplo n.º 1
0
    def __init__(self, env_spec, name="CategoricalMLPPolicy", **kwargs):
        self._obs_dim = env_spec.input_space.flat_dim
        self._action_dim = env_spec.output_space.flat_dim

        Policy.__init__(self, env_spec, name)
        MLPModule.__init__(self, input_dim=self._obs_dim,
                           output_dim=self._action_dim,
                           **kwargs)
Ejemplo n.º 2
0
    def __init__(self, env_spec, **kwargs):
        self._obs_dim = env_spec.observation_space.flat_dim
        self._action_dim = env_spec.action_space.flat_dim

        Policy.__init__(self, env_spec)
        GaussianMLPModule.__init__(self,
                                   input_dim=self._obs_dim,
                                   output_dim=self._action_dim,
                                   **kwargs)
Ejemplo n.º 3
0
 def __init__(self, env_spec, **kwargs):
     assert isinstance(env_spec.action_space, akro.Discrete)
     self._env_spec = env_spec
     self._obs_dim = env_spec.observation_space.flat_dim
     self._action_dim = env_spec.action_space.flat_dim
     Policy.__init__(self, env_spec)
     CategoricalMLPModule.__init__(self,
                                   input_dim=self._obs_dim,
                                   output_dim=self._action_dim,
                                   **kwargs)
Ejemplo n.º 4
0
    def __init__(self, env_spec, name='DeterministicMLPPolicy', **kwargs):
        """Initialize class with multiple attributes.

        Args:
            env_spec (garage.envs.env_spec.EnvSpec): Environment specification.
            name (str): Policy name.
            kwargs : Additional keyword arguments passed to the MLPModule.

        """
        self._obs_dim = env_spec.observation_space.flat_dim
        self._action_dim = env_spec.action_space.flat_dim

        Policy.__init__(self, env_spec, name)

        MLPModule.__init__(self,
                           input_dim=self._obs_dim,
                           output_dim=self._action_dim,
                           **kwargs)
Ejemplo n.º 5
0
    def __init__(self,
                 env_spec,
                 hidden_sizes=(32, 32),
                 hidden_nonlinearity=torch.tanh,
                 hidden_w_init=nn.init.xavier_uniform_,
                 hidden_b_init=nn.init.zeros_,
                 output_nonlinearity=None,
                 output_w_init=nn.init.xavier_uniform_,
                 output_b_init=nn.init.zeros_,
                 learn_std=True,
                 init_std=1.0,
                 min_std=1e-6,
                 max_std=None,
                 std_parameterization='exp',
                 layer_normalization=False,
                 name='GaussianMLPPolicy'):
        self._obs_dim = env_spec.observation_space.flat_dim
        self._action_dim = env_spec.action_space.flat_dim

        Policy.__init__(self, env_spec, name)
        GaussianMLPModule.__init__(self,
                                   input_dim=self._obs_dim,
                                   output_dim=self._action_dim,
                                   hidden_sizes=hidden_sizes,
                                   hidden_nonlinearity=hidden_nonlinearity,
                                   hidden_w_init=hidden_w_init,
                                   hidden_b_init=hidden_b_init,
                                   output_nonlinearity=output_nonlinearity,
                                   output_w_init=output_w_init,
                                   output_b_init=output_b_init,
                                   learn_std=learn_std,
                                   init_std=init_std,
                                   min_std=min_std,
                                   max_std=max_std,
                                   std_parameterization=std_parameterization,
                                   layer_normalization=layer_normalization)
Ejemplo n.º 6
0
    def __init__(self,
                 env_spec,
                 n_agents,
                 hidden_sizes=(32, 32),
                 hidden_nonlinearity=torch.tanh,
                 hidden_w_init=nn.init.xavier_uniform_,
                 hidden_b_init=nn.init.zeros_,
                 output_nonlinearity=None,
                 output_w_init=nn.init.xavier_uniform_,
                 output_b_init=nn.init.zeros_,
                 layer_normalization=False,
                 name='DecCategoricalMLPPolicy'):

        assert isinstance(env_spec.action_space, akro.Discrete), (
            'CategoricalMLPPolicy only works with akro.Discrete action '
            'space.')

        self.centralized = True  # centralized training

        self._n_agents = n_agents
        self._obs_dim = int(env_spec.observation_space.flat_dim /
                            n_agents)  # dec obs_dim
        self._action_dim = env_spec.action_space.n

        Policy.__init__(self, env_spec, name)
        CategoricalMLPModule.__init__(self,
                                      input_dim=self._obs_dim,
                                      output_dim=self._action_dim,
                                      hidden_sizes=hidden_sizes,
                                      hidden_nonlinearity=hidden_nonlinearity,
                                      hidden_w_init=hidden_w_init,
                                      hidden_b_init=hidden_b_init,
                                      output_nonlinearity=output_nonlinearity,
                                      output_w_init=output_w_init,
                                      output_b_init=output_b_init,
                                      layer_normalization=layer_normalization)
Ejemplo n.º 7
0
    def __init__(self,
                 env_spec,
                 hidden_sizes=(32, 32),
                 hidden_nonlinearity=nn.ReLU,
                 hidden_w_init=nn.init.xavier_uniform_,
                 hidden_b_init=nn.init.zeros_,
                 output_nonlinearity=None,
                 output_w_init=nn.init.xavier_uniform_,
                 output_b_init=nn.init.zeros_,
                 init_std=1.0,
                 min_std=np.exp(-20.),
                 max_std=np.exp(2.),
                 std_parameterization='exp',
                 layer_normalization=False):

        self._obs_dim = env_spec.observation_space.flat_dim
        self._action_dim = env_spec.action_space.flat_dim

        Policy.__init__(self, env_spec, name='TanhGaussianPolicy')
        GaussianMLPTwoHeadedModule.__init__(
            self,
            input_dim=self._obs_dim,
            output_dim=self._action_dim,
            hidden_sizes=hidden_sizes,
            hidden_nonlinearity=hidden_nonlinearity,
            hidden_w_init=hidden_w_init,
            hidden_b_init=hidden_b_init,
            output_nonlinearity=output_nonlinearity,
            output_w_init=output_w_init,
            output_b_init=output_b_init,
            init_std=init_std,
            min_std=min_std,
            max_std=max_std,
            std_parameterization=std_parameterization,
            layer_normalization=layer_normalization,
            normal_distribution_cls=TanhNormal)
Ejemplo n.º 8
0
    def __init__(self, env_spec, module):
        nn.Module.__init__(self)
        Policy.__init__(self, env_spec)

        self._module = module