def test_mlp(self): """ test getting sequential MLP """ sizes = [2, 3, 3, 2] mlp_nn = mlp(sizes) mlp_nn_sac = mlp(sizes, sac=True) assert len(mlp_nn) == 2 * (len(sizes) - 1) assert all(isinstance(mlp_nn[i], nn.Linear) for i in range(0, 5, 2)) assert len(mlp_nn_sac) == 2 * (len(sizes) - 2) assert all(isinstance(mlp_nn_sac[i], nn.Linear) for i in range(0, 4, 2)) inp = torch.randn((2,)) assert mlp_nn(inp).shape == (2,) assert mlp_nn_sac(inp).shape == (3,)
def __init__(self, *args, **kwargs): super(CnnValue, self).__init__(*args, **kwargs) self.conv, self.output_size = cnn( (self.state_dim, 16, 32), activation=self.activation ) self.model = mlp([self.output_size, *self.fc_layers, self.action_dim])
def __init__(self, framestack: int, action_dim: int, hidden: Tuple = (32, 32), discrete: bool = True, *args, **kwargs): super(CNNPolicy, self).__init__(framestack, action_dim, hidden, discrete, **kwargs) channels = (framestack, 16, 32) self.conv, output_size = cnn(channels) self.fc = mlp([output_size] + list(hidden) + [action_dim], sac=self.sac)
def __init__(self, state_dim: int, action_dim: int, hidden: Tuple = (32, 32), discrete: bool = True, *args, **kwargs): super(MlpPolicy, self).__init__(state_dim, action_dim, hidden, discrete, **kwargs) self.activation = kwargs[ "activation"] if "activation" in kwargs else "relu" self.model = mlp( [state_dim] + list(hidden) + [action_dim], activation=self.activation, sac=self.sac, )
def __init__(self, *args, **kwargs): super(CnnDuelingValue, self).__init__(*args, **kwargs) self.advantage = mlp( [self.output_size, *self.fc_layers, self.action_dim]) self.value = mlp([self.output_size, *self.fc_layers, 1])
def __init__(self, *args, **kwargs): super(MlpDuelingValue, self).__init__(*args, **kwargs) self.feature = mlp([self.state_dim, *self.fc_layers[:-1]]) self.advantage = mlp([self.fc_layers[-1], self.action_dim]) self.value = mlp([self.fc_layers[-1], 1])