def __init__( self, n_dim_obs, n_dim_action, n_hidden_channels, n_hidden_layers, nonlinearity=F.relu, last_wscale=1.0, ): raise NotImplementedError() self.n_input_channels = n_dim_obs + n_dim_action self.n_hidden_layers = n_hidden_layers self.n_hidden_channels = n_hidden_channels self.nonlinearity = nonlinearity super().__init__() self.fc = MLP( self.n_input_channels, n_hidden_channels, [self.n_hidden_channels] * self.n_hidden_layers, nonlinearity=nonlinearity, ) self.lstm = nn.LSTM(num_layers=1, input_size=n_hidden_channels, hidden_size=n_hidden_channels) self.out = nn.Linear(n_hidden_channels, 1) for (n, p) in self.lstm.named_parameters(): if "weight" in n: init_lecun_normal(p) else: nn.init.zeros_(p) init_lecun_normal(self.out.weight, scale=last_wscale) nn.init.zeros_(self.out.bias)
def __init__( self, in_size, out_size, hidden_sizes, normalize_input=True, normalize_output=False, nonlinearity=F.relu, last_wscale=1, ): self.in_size = in_size self.out_size = out_size self.hidden_sizes = hidden_sizes self.normalize_input = normalize_input self.normalize_output = normalize_output self.nonlinearity = nonlinearity super().__init__() if normalize_input: self.input_bn = nn.BatchNorm1d(in_size) if hidden_sizes: self.hidden_layers = nn.ModuleList() self.hidden_layers.append(LinearBN(in_size, hidden_sizes[0])) for hin, hout in zip(hidden_sizes, hidden_sizes[1:]): self.hidden_layers.append(LinearBN(hin, hout)) self.output = nn.Linear(hidden_sizes[-1], out_size) else: self.output = nn.Linear(in_size, out_size) init_lecun_normal(self.output.weight, scale=last_wscale) if normalize_output: self.output_bn = nn.BatchNorm1d(out_size)
def __init__( self, in_size, out_size, hidden_sizes, nonlinearity=F.relu, last_wscale=1 ): self.in_size = in_size self.out_size = out_size self.hidden_sizes = hidden_sizes self.nonlinearity = nonlinearity super().__init__() if hidden_sizes: self.hidden_layers = nn.ModuleList() self.hidden_layers.append(nn.Linear(in_size, hidden_sizes[0])) for hin, hout in zip(hidden_sizes, hidden_sizes[1:]): self.hidden_layers.append(nn.Linear(hin, hout)) self.hidden_layers.apply(init_chainer_default) self.output = nn.Linear(hidden_sizes[-1], out_size) else: self.output = nn.Linear(in_size, out_size) init_lecun_normal(self.output.weight, scale=last_wscale) nn.init.zeros_(self.output.bias)