Exemple #1
0
    def __init__(self,
                 n_input_channels,
                 action_size,
                 var,
                 n_hidden_layers=0,
                 n_hidden_channels=None,
                 min_action=None,
                 max_action=None,
                 bound_mean=False,
                 nonlinearity=F.relu):

        self.n_input_channels = n_input_channels
        self.action_size = action_size
        self.n_hidden_layers = n_hidden_layers
        self.n_hidden_channels = n_hidden_channels
        self.min_action = min_action
        self.max_action = max_action
        self.bound_mean = bound_mean
        self.nonlinearity = nonlinearity
        self.var = var
        layers = []
        layers.append(L.Linear(n_input_channels, n_hidden_channels))
        for _ in range(n_hidden_layers - 1):
            layers.append(self.nonlinearity)
            layers.append(L.Linear(n_hidden_channels, n_hidden_channels))
        layers.append(L.Linear(n_hidden_channels, action_size))
        if self.bound_mean:
            layers.append(
                lambda x: bound_by_tanh(x, self.min_action, self.max_action))
        layers.append(lambda x: distribution.GaussianDistribution(
            x, self.xp.broadcast_to(self.var, x.shape)))
        super().__init__(*layers)
Exemple #2
0
    def __init__(self,
                 n_input_channels,
                 n_hidden_layers,
                 n_hidden_channels,
                 action_size,
                 min_action=None,
                 max_action=None,
                 bound_action=True,
                 normalize_input=True):
        self.n_input_channels = n_input_channels
        self.n_hidden_layers = n_hidden_layers
        self.n_hidden_channels = n_hidden_channels
        self.action_size = action_size
        self.min_action = min_action
        self.max_action = max_action
        self.bound_action = bound_action
        self.normalize_input = normalize_input

        if self.bound_action:
            action_filter = lambda x: bound_by_tanh(x, self.min_action, self.
                                                    max_action)
        else:
            action_filter = None

        super().__init__(model=MLPBN(n_input_channels,
                                     action_size,
                                     (n_hidden_channels, ) * n_hidden_layers,
                                     normalize_input=self.normalize_input),
                         action_filter=action_filter)
Exemple #3
0
 def compute_mean_and_var(self, x, test=False):
     h = x
     for layer in self.hidden_layers:
         h = self.nonlinearity(layer(h))
     mean = self.mean_layer(h)
     if self.bound_mean:
         mean = bound_by_tanh(mean, self.min_action, self.max_action)
     var = F.broadcast_to(F.softplus(self.var_layer(h)), mean.shape)
     return mean, var
    def __init__(self,
                 n_input_channels,
                 action_size,
                 var,
                 n_hidden_layers=0,
                 n_hidden_channels=None,
                 min_action=None,
                 max_action=None,
                 bound_mean=False,
                 nonlinearity=F.relu,
                 mean_wscale=1):

        self.n_input_channels = n_input_channels
        self.action_size = action_size
        self.n_hidden_layers = n_hidden_layers
        self.n_hidden_channels = n_hidden_channels
        self.min_action = min_action
        self.max_action = max_action
        self.bound_mean = bound_mean
        self.nonlinearity = nonlinearity
        if np.isscalar(var):
            self.var = np.full(action_size, var, dtype=np.float32)
        else:
            self.var = var
        layers = []
        if n_hidden_layers > 0:
            # Input to hidden
            layers.append(L.Linear(n_input_channels, n_hidden_channels))
            layers.append(self.nonlinearity)
            for _ in range(n_hidden_layers - 1):
                # Hidden to hidden
                layers.append(L.Linear(n_hidden_channels, n_hidden_channels))
                layers.append(self.nonlinearity)
            # The last layer is used to compute the mean
            layers.append(
                L.Linear(n_hidden_channels,
                         action_size,
                         initialW=LeCunNormal(mean_wscale)))
        else:
            # There's only one layer for computing the mean
            layers.append(
                L.Linear(n_input_channels,
                         action_size,
                         initialW=LeCunNormal(mean_wscale)))

        if self.bound_mean:
            layers.append(
                lambda x: bound_by_tanh(x, self.min_action, self.max_action))

        def get_var_array(shape):
            self.var = self.xp.asarray(self.var)
            return self.xp.broadcast_to(self.var, shape)

        layers.append(lambda x: distribution.GaussianDistribution(
            x, get_var_array(x.shape)))
        super().__init__(*layers)
    def __init__(
        self,
        n_input_channels,
        action_size,
        n_hidden_layers=0,
        n_hidden_channels=None,
        min_action=None,
        max_action=None,
        bound_mean=False,
        var_type='spherical',
        nonlinearity=F.relu,
        mean_wscale=1,
        var_func=F.softplus,
        var_param_init=0,
    ):

        self.n_input_channels = n_input_channels
        self.action_size = action_size
        self.n_hidden_layers = n_hidden_layers
        self.n_hidden_channels = n_hidden_channels
        self.min_action = min_action
        self.max_action = max_action
        self.bound_mean = bound_mean
        self.nonlinearity = nonlinearity
        self.var_func = var_func
        var_size = {'spherical': 1, 'diagonal': action_size}[var_type]

        layers = []
        layers.append(L.Linear(n_input_channels, n_hidden_channels))
        for _ in range(n_hidden_layers - 1):
            layers.append(self.nonlinearity)
            layers.append(L.Linear(n_hidden_channels, n_hidden_channels))
        layers.append(self.nonlinearity)
        # The last layer is used to compute the mean
        layers.append(
            L.Linear(n_hidden_channels,
                     action_size,
                     initialW=LeCunNormal(mean_wscale)))

        if self.bound_mean:
            layers.append(
                lambda x: bound_by_tanh(x, self.min_action, self.max_action))

        super().__init__()
        with self.init_scope():
            self.hidden_layers = links.Sequence(*layers)
            self.var_param = chainer.Parameter(initializer=var_param_init,
                                               shape=(var_size, ))
Exemple #6
0
    def __init__(self,
                 n_input_channels,
                 n_hidden_layers,
                 n_hidden_channels,
                 action_size,
                 min_action=None,
                 max_action=None,
                 bound_action=True):
        self.n_input_channels = n_input_channels
        self.n_hidden_layers = n_hidden_layers
        self.n_hidden_channels = n_hidden_channels
        self.action_size = action_size
        self.min_action = min_action
        self.max_action = max_action
        self.bound_action = bound_action

        if self.bound_action:
            action_filter = lambda x: bound_by_tanh(x, self.min_action, self.
                                                    max_action)
        else:
            action_filter = None

        model = chainer.Chain(
            fc=MLP(self.n_input_channels, n_hidden_channels,
                   (self.n_hidden_channels, ) * self.n_hidden_layers),
            lstm=L.LSTM(n_hidden_channels, n_hidden_channels),
            out=L.Linear(n_hidden_channels, action_size),
        )

        def model_call(model, x):
            h = F.relu(model.fc(x))
            h = model.lstm(h)
            h = model.out(h)
            return h

        super().__init__(model=model,
                         model_call=model_call,
                         action_filter=action_filter)
 def action_filter(x):
     return bound_by_tanh(
         x, self.min_action, self.max_action)