def __init__(self, n_actions, n_atoms, v_min, v_max, n_input_channels=4, activation=F.relu, bias=0.1): assert n_atoms >= 2 assert v_min < v_max self.n_actions = n_actions self.n_input_channels = n_input_channels self.activation = activation self.n_atoms = n_atoms super().__init__() z_values = np.linspace(v_min, v_max, num=n_atoms, dtype=np.float32) self.add_persistent('z_values', z_values) with self.init_scope(): self.conv_layers = chainer.ChainList( L.Convolution2D(n_input_channels, 32, 8, stride=4, initial_bias=bias), L.Convolution2D(32, 64, 4, stride=2, initial_bias=bias), L.Convolution2D(64, 64, 3, stride=1, initial_bias=bias)) self.a_stream = MLP(3136, n_actions * n_atoms, [512]) self.v_stream = MLP(3136, n_atoms, [512])
def __init__(self, n_dim_obs, n_dim_action, n_hidden_channels, n_hidden_layers, nonlinearity=F.relu, last_wscale=1.): assert n_hidden_layers >= 1 self.n_input_channels = n_dim_obs + n_dim_action self.n_hidden_layers = n_hidden_layers self.n_hidden_channels = n_hidden_channels self.nonlinearity = nonlinearity super().__init__() with self.init_scope(): # No need to pass nonlinearity to obs_mlp because it has no # hidden layers self.obs_mlp = MLP(in_size=n_dim_obs, out_size=n_hidden_channels, hidden_sizes=[]) self.mlp = MLP( in_size=n_hidden_channels + n_dim_action, out_size=1, hidden_sizes=([self.n_hidden_channels] * (self.n_hidden_layers - 1)), nonlinearity=nonlinearity, last_wscale=last_wscale, ) self.output = self.mlp.output
def __init__(self, n_dim_obs, n_dim_action, n_hidden_channels, n_hidden_layers): self.n_input_channels = n_dim_obs + n_dim_action self.n_hidden_layers = n_hidden_layers self.n_hidden_channels = n_hidden_channels super().__init__(obs_mlp=MLP(in_size=n_dim_obs, out_size=n_hidden_channels, hidden_sizes=[]), mlp=MLP(in_size=n_hidden_channels + n_dim_action, out_size=1, hidden_sizes=([self.n_hidden_channels] * (self.n_hidden_layers - 1)))) self.output = self.mlp.output
def __init__(self, n_input_channels, n_hidden_layers, n_hidden_channels, action_size, min_action=None, max_action=None, bound_action=True, nonlinearity=F.relu, last_wscale=1.): self.n_input_channels = n_input_channels self.n_hidden_layers = n_hidden_layers self.n_hidden_channels = n_hidden_channels self.action_size = action_size self.min_action = min_action self.max_action = max_action self.bound_action = bound_action if self.bound_action: def action_filter(x): return bound_by_tanh( x, self.min_action, self.max_action) else: action_filter = None super().__init__( model=MLP(n_input_channels, action_size, (n_hidden_channels,) * n_hidden_layers, nonlinearity=nonlinearity, last_wscale=last_wscale, ), action_filter=action_filter)
def __init__(self, n_actions, n_input_channels=4, activation=F.relu, bias=0.1): self.n_actions = n_actions self.n_input_channels = n_input_channels self.activation = activation super().__init__() with self.init_scope(): self.conv_layers = chainer.ChainList( L.Convolution2D(n_input_channels, 32, 8, stride=4, initial_bias=bias), L.Convolution2D(32, 64, 4, stride=2, initial_bias=bias), L.Convolution2D(64, 64, 3, stride=1, initial_bias=bias)) self.a_stream = MLP(3136, n_actions, [512]) self.v_stream = MLP(3136, 1, [512])
def __init__(self, n_input_channels, n_hidden_layers, n_hidden_channels, action_size, min_action=None, max_action=None, bound_action=True): self.n_input_channels = n_input_channels self.n_hidden_layers = n_hidden_layers self.n_hidden_channels = n_hidden_channels self.action_size = action_size self.min_action = min_action self.max_action = max_action self.bound_action = bound_action if self.bound_action: action_filter = lambda x: bound_by_tanh(x, self.min_action, self. max_action) else: action_filter = None super().__init__(model=MLP(n_input_channels, action_size, (n_hidden_channels, ) * n_hidden_layers), action_filter=action_filter)
def __init__(self, ndim_obs, n_actions, n_hidden_channels, n_hidden_layers, nonlinearity=F.relu, last_wscale=1.0): super().__init__(model=MLP( in_size=ndim_obs, out_size=n_actions, hidden_sizes=[n_hidden_channels] * n_hidden_layers, nonlinearity=nonlinearity, last_wscale=last_wscale))
def __init__(self, n_actions, n_input_channels=4, activation=F.relu, bias=0.1): self.n_actions = n_actions self.n_input_channels = n_input_channels self.activation = activation conv_layers = chainer.ChainList( L.Convolution2D(n_input_channels, 32, 8, stride=4, bias=bias), L.Convolution2D(32, 64, 4, stride=2, bias=bias), L.Convolution2D(64, 64, 3, stride=1, bias=bias)) a_stream = MLP(3136, n_actions, [512]) v_stream = MLP(3136, 1, [512]) super().__init__(conv_layers=conv_layers, a_stream=a_stream, v_stream=v_stream)
def __init__(self, n_input_channels, n_hidden_layers=0, n_hidden_channels=None): self.n_input_channels = n_input_channels self.n_hidden_layers = n_hidden_layers self.n_hidden_channels = n_hidden_channels super().__init__( model=MLP(self.n_input_channels, 1, [self.n_hidden_channels] * self.n_hidden_layers), )
def __init__(self, n_dim_obs, n_dim_action, n_hidden_channels, n_hidden_layers): self.n_input_channels = n_dim_obs + n_dim_action self.n_hidden_layers = n_hidden_layers self.n_hidden_channels = n_hidden_channels super().__init__() with self.init_scope(): self.fc = MLP(self.n_input_channels, n_hidden_channels, [self.n_hidden_channels] * self.n_hidden_layers) self.lstm = L.LSTM(n_hidden_channels, n_hidden_channels) self.out = L.Linear(n_hidden_channels, 1)
def __init__(self, n_dim_obs, n_dim_action, n_hidden_channels, n_hidden_layers): self.n_input_channels = n_dim_obs + n_dim_action self.n_hidden_layers = n_hidden_layers self.n_hidden_channels = n_hidden_channels super().__init__( fc=MLP(self.n_input_channels, n_hidden_channels, [self.n_hidden_channels] * self.n_hidden_layers), lstm=L.LSTM(n_hidden_channels, n_hidden_channels), out=L.Linear(n_hidden_channels, 1), )
def __init__(self, n_dim_obs, n_dim_action, n_hidden_channels, n_hidden_layers): self.n_input_channels = n_dim_obs self.n_hidden_layers = n_hidden_layers self.n_hidden_channels = n_hidden_channels self.state_stack = [] super().__init__(fc=MLP(in_size=self.n_input_channels, out_size=n_hidden_channels, hidden_sizes=[self.n_hidden_channels] * self.n_hidden_layers), lstm=L.LSTM(n_hidden_channels, n_hidden_channels), out=L.Linear(n_hidden_channels, n_dim_action))
def __init__(self, n_input_channels, n_hidden_layers=0, n_hidden_channels=None, nonlinearity=F.relu, last_wscale=1): self.n_input_channels = n_input_channels self.n_hidden_layers = n_hidden_layers self.n_hidden_channels = n_hidden_channels super().__init__(model=MLP(self.n_input_channels, 1, [self.n_hidden_channels] * self.n_hidden_layers, nonlinearity=nonlinearity, last_wscale=last_wscale), )
def __init__(self, n_dim_obs, n_dim_action, n_hidden_channels, n_hidden_layers, nonlinearity=F.relu, last_wscale=1.): self.n_input_channels = n_dim_obs + n_dim_action self.n_hidden_layers = n_hidden_layers self.n_hidden_channels = n_hidden_channels self.nonlinearity = nonlinearity super().__init__() with self.init_scope(): self.fc = MLP(self.n_input_channels, n_hidden_channels, [self.n_hidden_channels] * self.n_hidden_layers, nonlinearity=nonlinearity, ) self.lstm = L.LSTM(n_hidden_channels, n_hidden_channels) self.out = L.Linear(n_hidden_channels, 1, initialW=LeCunNormal(last_wscale))
def __init__(self, ndim_obs, n_actions, n_atoms, v_min, v_max, n_hidden_channels, n_hidden_layers, nonlinearity=F.relu, last_wscale=1.0): assert n_atoms >= 2 assert v_min < v_max z_values = np.linspace(v_min, v_max, num=n_atoms, dtype=np.float32) model = chainerrl.links.Sequence( MLP(in_size=ndim_obs, out_size=n_actions * n_atoms, hidden_sizes=[n_hidden_channels] * n_hidden_layers, nonlinearity=nonlinearity, last_wscale=last_wscale), lambda x: F.reshape(x, (-1, n_actions, n_atoms)), lambda x: F.softmax(x, axis=2), ) super().__init__(model=model, z_values=z_values)
def __init__(self, n_input_channels, n_actions, n_hidden_layers=0, n_hidden_channels=None, beta=1.0): self.n_input_channels = n_input_channels self.n_actions = n_actions self.n_hidden_layers = n_hidden_layers self.n_hidden_channels = n_hidden_channels self.beta = beta super().__init__(model=MLP(n_input_channels, n_actions, (n_hidden_channels, ) * n_hidden_layers), beta=self.beta)
def __init__(self, n_input_channels, n_hidden_layers, n_hidden_channels, action_size, min_action=None, max_action=None, bound_action=True, nonlinearity=F.relu, last_wscale=1.): self.n_input_channels = n_input_channels self.n_hidden_layers = n_hidden_layers self.n_hidden_channels = n_hidden_channels self.action_size = action_size self.min_action = min_action self.max_action = max_action self.bound_action = bound_action if self.bound_action: def action_filter(x): return bound_by_tanh(x, self.min_action, self.max_action) else: action_filter = None model = chainer.Chain( fc=MLP( self.n_input_channels, n_hidden_channels, (self.n_hidden_channels, ) * self.n_hidden_layers, nonlinearity=nonlinearity, ), lstm=L.LSTM(n_hidden_channels, n_hidden_channels), out=L.Linear(n_hidden_channels, action_size, initialW=LeCunNormal(last_wscale)), ) def model_call(model, x): h = nonlinearity(model.fc(x)) h = model.lstm(h) h = model.out(h) return h super().__init__(model=model, model_call=model_call, action_filter=action_filter)
def __init__(self, n_input_channels, n_actions, n_hidden_layers=0, n_hidden_channels=None, beta=1.0, nonlinearity=F.relu, last_wscale=1.0, min_prob=0.0): self.n_input_channels = n_input_channels self.n_actions = n_actions self.n_hidden_layers = n_hidden_layers self.n_hidden_channels = n_hidden_channels self.beta = beta super().__init__( model=MLP(n_input_channels, n_actions, (n_hidden_channels,) * n_hidden_layers, nonlinearity=nonlinearity, last_wscale=last_wscale), beta=self.beta, min_prob=min_prob)
def __init__(self, n_input_channels, n_hidden_layers, n_hidden_channels, action_size, min_action=None, max_action=None, bound_action=True): self.n_input_channels = n_input_channels self.n_hidden_layers = n_hidden_layers self.n_hidden_channels = n_hidden_channels self.action_size = action_size self.min_action = min_action self.max_action = max_action self.bound_action = bound_action if self.bound_action: action_filter = lambda x: bound_by_tanh(x, self.min_action, self. max_action) else: action_filter = None model = chainer.Chain( fc=MLP(self.n_input_channels, n_hidden_channels, (self.n_hidden_channels, ) * self.n_hidden_layers), lstm=L.LSTM(n_hidden_channels, n_hidden_channels), out=L.Linear(n_hidden_channels, action_size), ) def model_call(model, x): h = F.relu(model.fc(x)) h = model.lstm(h) h = model.out(h) return h super().__init__(model=model, model_call=model_call, action_filter=action_filter)
def __init__(self, n_dim_obs, n_dim_action, n_hidden_channels, n_hidden_layers, normalize_input=True): self.n_input_channels = n_dim_obs + n_dim_action self.n_hidden_layers = n_hidden_layers self.n_hidden_channels = n_hidden_channels self.normalize_input = normalize_input super().__init__() with self.init_scope(): self.obs_mlp = MLPBN(in_size=n_dim_obs, out_size=n_hidden_channels, hidden_sizes=[], normalize_input=normalize_input, normalize_output=True) self.mlp = MLP(in_size=n_hidden_channels + n_dim_action, out_size=1, hidden_sizes=([self.n_hidden_channels] * (self.n_hidden_layers - 1))) self.output = self.mlp.output
def __init__(self, ndim_obs, n_actions, n_hidden_channels, n_hidden_layers): super().__init__(model=MLP(in_size=ndim_obs, out_size=n_actions, hidden_sizes=[n_hidden_channels] * n_hidden_layers))