예제 #1
0
    def __init__(self,
                 n_input_channels,
                 n_dim_action,
                 n_hidden_channels,
                 n_hidden_layers,
                 action_space,
                 scale_mu=True,
                 normalize_input=True):
        self.n_input_channels = n_input_channels
        self.n_hidden_layers = n_hidden_layers
        self.n_hidden_channels = n_hidden_channels

        assert action_space is not None
        self.scale_mu = scale_mu
        self.action_space = action_space

        super().__init__()
        with self.init_scope():
            assert n_hidden_layers >= 1
            self.hidden_layers = MLPBN(in_size=n_input_channels,
                                       out_size=n_hidden_channels,
                                       hidden_sizes=[n_hidden_channels] *
                                       (n_hidden_layers - 1),
                                       normalize_input=normalize_input)

            self.v = L.Linear(n_hidden_channels, 1)
            self.mu = L.Linear(n_hidden_channels, n_dim_action)
            self.mat_diag = L.Linear(n_hidden_channels, n_dim_action)
            non_diag_size = n_dim_action * (n_dim_action - 1) // 2
            if non_diag_size > 0:
                self.mat_non_diag = L.Linear(n_hidden_channels, non_diag_size)
예제 #2
0
    def __init__(self, n_input_channels, n_dim_action, n_hidden_channels,
                 n_hidden_layers, action_space, scale_mu=True,
                 normalize_input=True):
        self.n_input_channels = n_input_channels
        self.n_hidden_layers = n_hidden_layers
        self.n_hidden_channels = n_hidden_channels

        assert action_space is not None
        self.scale_mu = scale_mu
        self.action_space = action_space

        layers = {}

        assert n_hidden_layers >= 1
        layers['hidden_layers'] = MLPBN(
            in_size=n_input_channels, out_size=n_hidden_channels,
            hidden_sizes=[n_hidden_channels] * (n_hidden_layers - 1),
            normalize_input=normalize_input)

        layers['v'] = L.Linear(n_hidden_channels, 1)
        layers['mu'] = L.Linear(n_hidden_channels, n_dim_action)
        layers['mat_diag'] = L.Linear(n_hidden_channels, n_dim_action)
        non_diag_size = n_dim_action * (n_dim_action - 1) // 2
        if non_diag_size > 0:
            layers['mat_non_diag'] = L.Linear(n_hidden_channels, non_diag_size)

        super().__init__(**layers)
예제 #3
0
    def __init__(self,
                 n_dim_obs,
                 n_dim_action,
                 n_hidden_channels,
                 n_hidden_layers,
                 normalize_input=True,
                 nonlinearity=F.relu,
                 last_wscale=1.):
        assert n_hidden_layers >= 1
        self.n_input_channels = n_dim_obs + n_dim_action
        self.n_hidden_layers = n_hidden_layers
        self.n_hidden_channels = n_hidden_channels
        self.normalize_input = normalize_input
        self.nonlinearity = nonlinearity

        super().__init__()
        with self.init_scope():
            # No need to pass nonlinearity to obs_mlp because it has no
            # hidden layers
            self.obs_mlp = MLPBN(in_size=n_dim_obs,
                                 out_size=n_hidden_channels,
                                 hidden_sizes=[],
                                 normalize_input=normalize_input,
                                 normalize_output=True)
            self.mlp = MLP(
                in_size=n_hidden_channels + n_dim_action,
                out_size=1,
                hidden_sizes=([self.n_hidden_channels] *
                              (self.n_hidden_layers - 1)),
                nonlinearity=nonlinearity,
                last_wscale=last_wscale,
            )

        self.output = self.mlp.output
예제 #4
0
    def __init__(self, n_input_channels, n_hidden_layers,
                 n_hidden_channels, action_size,
                 min_action=None, max_action=None, bound_action=True,
                 normalize_input=True,
                 nonlinearity=F.relu,
                 last_wscale=1.):
        self.n_input_channels = n_input_channels
        self.n_hidden_layers = n_hidden_layers
        self.n_hidden_channels = n_hidden_channels
        self.action_size = action_size
        self.min_action = min_action
        self.max_action = max_action
        self.bound_action = bound_action
        self.normalize_input = normalize_input

        if self.bound_action:
            def action_filter(x):
                return bound_by_tanh(
                    x, self.min_action, self.max_action)
        else:
            action_filter = None

        super().__init__(
            model=MLPBN(n_input_channels,
                        action_size,
                        (n_hidden_channels,) * n_hidden_layers,
                        normalize_input=self.normalize_input,
                        nonlinearity=nonlinearity,
                        last_wscale=last_wscale,
                        ),
            action_filter=action_filter)
예제 #5
0
    def __init__(self,
                 n_input_channels,
                 n_hidden_layers,
                 n_hidden_channels,
                 action_size,
                 min_action=None,
                 max_action=None,
                 bound_action=True,
                 normalize_input=True):
        self.n_input_channels = n_input_channels
        self.n_hidden_layers = n_hidden_layers
        self.n_hidden_channels = n_hidden_channels
        self.action_size = action_size
        self.min_action = min_action
        self.max_action = max_action
        self.bound_action = bound_action
        self.normalize_input = normalize_input

        if self.bound_action:
            action_filter = lambda x: bound_by_tanh(x, self.min_action, self.
                                                    max_action)
        else:
            action_filter = None

        super().__init__(model=MLPBN(n_input_channels,
                                     action_size,
                                     (n_hidden_channels, ) * n_hidden_layers,
                                     normalize_input=self.normalize_input),
                         action_filter=action_filter)
예제 #6
0
파일: NN.py 프로젝트: wsgan001/KDDRL
    def __init__(self,
                 ndim_obs,
                 n_actions,
                 n_hidden_channels,
                 n_hidden_layers,
                 nonlinearity=F.relu,
                 last_wscale=1.0):

        super(QFunc,
              self).__init__(model=MLPBN(in_size=ndim_obs,
                                         out_size=n_actions,
                                         hidden_sizes=[n_hidden_channels] *
                                         n_hidden_layers))
    def __init__(self,
                 n_dim_obs,
                 n_dim_action,
                 n_hidden_channels,
                 n_hidden_layers,
                 normalize_input=True):
        self.n_input_channels = n_dim_obs + n_dim_action
        self.n_hidden_layers = n_hidden_layers
        self.n_hidden_channels = n_hidden_channels
        self.normalize_input = normalize_input

        super().__init__(obs_mlp=MLPBN(in_size=n_dim_obs,
                                       out_size=n_hidden_channels,
                                       hidden_sizes=[],
                                       normalize_input=normalize_input,
                                       normalize_output=True),
                         mlp=MLP(in_size=n_hidden_channels + n_dim_action,
                                 out_size=1,
                                 hidden_sizes=([self.n_hidden_channels] *
                                               (self.n_hidden_layers - 1))))
        self.output = self.mlp.output