def __init__(self, n_input_channels, n_dim_action, n_hidden_channels, n_hidden_layers, action_space, scale_mu=True, normalize_input=True): self.n_input_channels = n_input_channels self.n_hidden_layers = n_hidden_layers self.n_hidden_channels = n_hidden_channels assert action_space is not None self.scale_mu = scale_mu self.action_space = action_space super().__init__() with self.init_scope(): assert n_hidden_layers >= 1 self.hidden_layers = MLPBN(in_size=n_input_channels, out_size=n_hidden_channels, hidden_sizes=[n_hidden_channels] * (n_hidden_layers - 1), normalize_input=normalize_input) self.v = L.Linear(n_hidden_channels, 1) self.mu = L.Linear(n_hidden_channels, n_dim_action) self.mat_diag = L.Linear(n_hidden_channels, n_dim_action) non_diag_size = n_dim_action * (n_dim_action - 1) // 2 if non_diag_size > 0: self.mat_non_diag = L.Linear(n_hidden_channels, non_diag_size)
def __init__(self, n_input_channels, n_dim_action, n_hidden_channels, n_hidden_layers, action_space, scale_mu=True, normalize_input=True): self.n_input_channels = n_input_channels self.n_hidden_layers = n_hidden_layers self.n_hidden_channels = n_hidden_channels assert action_space is not None self.scale_mu = scale_mu self.action_space = action_space layers = {} assert n_hidden_layers >= 1 layers['hidden_layers'] = MLPBN( in_size=n_input_channels, out_size=n_hidden_channels, hidden_sizes=[n_hidden_channels] * (n_hidden_layers - 1), normalize_input=normalize_input) layers['v'] = L.Linear(n_hidden_channels, 1) layers['mu'] = L.Linear(n_hidden_channels, n_dim_action) layers['mat_diag'] = L.Linear(n_hidden_channels, n_dim_action) non_diag_size = n_dim_action * (n_dim_action - 1) // 2 if non_diag_size > 0: layers['mat_non_diag'] = L.Linear(n_hidden_channels, non_diag_size) super().__init__(**layers)
def __init__(self, n_dim_obs, n_dim_action, n_hidden_channels, n_hidden_layers, normalize_input=True, nonlinearity=F.relu, last_wscale=1.): assert n_hidden_layers >= 1 self.n_input_channels = n_dim_obs + n_dim_action self.n_hidden_layers = n_hidden_layers self.n_hidden_channels = n_hidden_channels self.normalize_input = normalize_input self.nonlinearity = nonlinearity super().__init__() with self.init_scope(): # No need to pass nonlinearity to obs_mlp because it has no # hidden layers self.obs_mlp = MLPBN(in_size=n_dim_obs, out_size=n_hidden_channels, hidden_sizes=[], normalize_input=normalize_input, normalize_output=True) self.mlp = MLP( in_size=n_hidden_channels + n_dim_action, out_size=1, hidden_sizes=([self.n_hidden_channels] * (self.n_hidden_layers - 1)), nonlinearity=nonlinearity, last_wscale=last_wscale, ) self.output = self.mlp.output
def __init__(self, n_input_channels, n_hidden_layers, n_hidden_channels, action_size, min_action=None, max_action=None, bound_action=True, normalize_input=True, nonlinearity=F.relu, last_wscale=1.): self.n_input_channels = n_input_channels self.n_hidden_layers = n_hidden_layers self.n_hidden_channels = n_hidden_channels self.action_size = action_size self.min_action = min_action self.max_action = max_action self.bound_action = bound_action self.normalize_input = normalize_input if self.bound_action: def action_filter(x): return bound_by_tanh( x, self.min_action, self.max_action) else: action_filter = None super().__init__( model=MLPBN(n_input_channels, action_size, (n_hidden_channels,) * n_hidden_layers, normalize_input=self.normalize_input, nonlinearity=nonlinearity, last_wscale=last_wscale, ), action_filter=action_filter)
def __init__(self, n_input_channels, n_hidden_layers, n_hidden_channels, action_size, min_action=None, max_action=None, bound_action=True, normalize_input=True): self.n_input_channels = n_input_channels self.n_hidden_layers = n_hidden_layers self.n_hidden_channels = n_hidden_channels self.action_size = action_size self.min_action = min_action self.max_action = max_action self.bound_action = bound_action self.normalize_input = normalize_input if self.bound_action: action_filter = lambda x: bound_by_tanh(x, self.min_action, self. max_action) else: action_filter = None super().__init__(model=MLPBN(n_input_channels, action_size, (n_hidden_channels, ) * n_hidden_layers, normalize_input=self.normalize_input), action_filter=action_filter)
def __init__(self, ndim_obs, n_actions, n_hidden_channels, n_hidden_layers, nonlinearity=F.relu, last_wscale=1.0): super(QFunc, self).__init__(model=MLPBN(in_size=ndim_obs, out_size=n_actions, hidden_sizes=[n_hidden_channels] * n_hidden_layers))
def __init__(self, n_dim_obs, n_dim_action, n_hidden_channels, n_hidden_layers, normalize_input=True): self.n_input_channels = n_dim_obs + n_dim_action self.n_hidden_layers = n_hidden_layers self.n_hidden_channels = n_hidden_channels self.normalize_input = normalize_input super().__init__(obs_mlp=MLPBN(in_size=n_dim_obs, out_size=n_hidden_channels, hidden_sizes=[], normalize_input=normalize_input, normalize_output=True), mlp=MLP(in_size=n_hidden_channels + n_dim_action, out_size=1, hidden_sizes=([self.n_hidden_channels] * (self.n_hidden_layers - 1)))) self.output = self.mlp.output