def __init__(self, n_input_channels, action_size, var, n_hidden_layers=0, n_hidden_channels=None, min_action=None, max_action=None, bound_mean=False, nonlinearity=F.relu): self.n_input_channels = n_input_channels self.action_size = action_size self.n_hidden_layers = n_hidden_layers self.n_hidden_channels = n_hidden_channels self.min_action = min_action self.max_action = max_action self.bound_mean = bound_mean self.nonlinearity = nonlinearity self.var = var layers = [] layers.append(L.Linear(n_input_channels, n_hidden_channels)) for _ in range(n_hidden_layers - 1): layers.append(self.nonlinearity) layers.append(L.Linear(n_hidden_channels, n_hidden_channels)) layers.append(L.Linear(n_hidden_channels, action_size)) if self.bound_mean: layers.append( lambda x: bound_by_tanh(x, self.min_action, self.max_action)) layers.append(lambda x: distribution.GaussianDistribution( x, self.xp.broadcast_to(self.var, x.shape))) super().__init__(*layers)
def __init__(self, n_input_channels, n_hidden_layers, n_hidden_channels, action_size, min_action=None, max_action=None, bound_action=True, normalize_input=True): self.n_input_channels = n_input_channels self.n_hidden_layers = n_hidden_layers self.n_hidden_channels = n_hidden_channels self.action_size = action_size self.min_action = min_action self.max_action = max_action self.bound_action = bound_action self.normalize_input = normalize_input if self.bound_action: action_filter = lambda x: bound_by_tanh(x, self.min_action, self. max_action) else: action_filter = None super().__init__(model=MLPBN(n_input_channels, action_size, (n_hidden_channels, ) * n_hidden_layers, normalize_input=self.normalize_input), action_filter=action_filter)
def compute_mean_and_var(self, x, test=False): h = x for layer in self.hidden_layers: h = self.nonlinearity(layer(h)) mean = self.mean_layer(h) if self.bound_mean: mean = bound_by_tanh(mean, self.min_action, self.max_action) var = F.broadcast_to(F.softplus(self.var_layer(h)), mean.shape) return mean, var
def __init__(self, n_input_channels, action_size, var, n_hidden_layers=0, n_hidden_channels=None, min_action=None, max_action=None, bound_mean=False, nonlinearity=F.relu, mean_wscale=1): self.n_input_channels = n_input_channels self.action_size = action_size self.n_hidden_layers = n_hidden_layers self.n_hidden_channels = n_hidden_channels self.min_action = min_action self.max_action = max_action self.bound_mean = bound_mean self.nonlinearity = nonlinearity if np.isscalar(var): self.var = np.full(action_size, var, dtype=np.float32) else: self.var = var layers = [] if n_hidden_layers > 0: # Input to hidden layers.append(L.Linear(n_input_channels, n_hidden_channels)) layers.append(self.nonlinearity) for _ in range(n_hidden_layers - 1): # Hidden to hidden layers.append(L.Linear(n_hidden_channels, n_hidden_channels)) layers.append(self.nonlinearity) # The last layer is used to compute the mean layers.append( L.Linear(n_hidden_channels, action_size, initialW=LeCunNormal(mean_wscale))) else: # There's only one layer for computing the mean layers.append( L.Linear(n_input_channels, action_size, initialW=LeCunNormal(mean_wscale))) if self.bound_mean: layers.append( lambda x: bound_by_tanh(x, self.min_action, self.max_action)) def get_var_array(shape): self.var = self.xp.asarray(self.var) return self.xp.broadcast_to(self.var, shape) layers.append(lambda x: distribution.GaussianDistribution( x, get_var_array(x.shape))) super().__init__(*layers)
def __init__( self, n_input_channels, action_size, n_hidden_layers=0, n_hidden_channels=None, min_action=None, max_action=None, bound_mean=False, var_type='spherical', nonlinearity=F.relu, mean_wscale=1, var_func=F.softplus, var_param_init=0, ): self.n_input_channels = n_input_channels self.action_size = action_size self.n_hidden_layers = n_hidden_layers self.n_hidden_channels = n_hidden_channels self.min_action = min_action self.max_action = max_action self.bound_mean = bound_mean self.nonlinearity = nonlinearity self.var_func = var_func var_size = {'spherical': 1, 'diagonal': action_size}[var_type] layers = [] layers.append(L.Linear(n_input_channels, n_hidden_channels)) for _ in range(n_hidden_layers - 1): layers.append(self.nonlinearity) layers.append(L.Linear(n_hidden_channels, n_hidden_channels)) layers.append(self.nonlinearity) # The last layer is used to compute the mean layers.append( L.Linear(n_hidden_channels, action_size, initialW=LeCunNormal(mean_wscale))) if self.bound_mean: layers.append( lambda x: bound_by_tanh(x, self.min_action, self.max_action)) super().__init__() with self.init_scope(): self.hidden_layers = links.Sequence(*layers) self.var_param = chainer.Parameter(initializer=var_param_init, shape=(var_size, ))
def __init__(self, n_input_channels, n_hidden_layers, n_hidden_channels, action_size, min_action=None, max_action=None, bound_action=True): self.n_input_channels = n_input_channels self.n_hidden_layers = n_hidden_layers self.n_hidden_channels = n_hidden_channels self.action_size = action_size self.min_action = min_action self.max_action = max_action self.bound_action = bound_action if self.bound_action: action_filter = lambda x: bound_by_tanh(x, self.min_action, self. max_action) else: action_filter = None model = chainer.Chain( fc=MLP(self.n_input_channels, n_hidden_channels, (self.n_hidden_channels, ) * self.n_hidden_layers), lstm=L.LSTM(n_hidden_channels, n_hidden_channels), out=L.Linear(n_hidden_channels, action_size), ) def model_call(model, x): h = F.relu(model.fc(x)) h = model.lstm(h) h = model.out(h) return h super().__init__(model=model, model_call=model_call, action_filter=action_filter)
def action_filter(x): return bound_by_tanh( x, self.min_action, self.max_action)