def __init__(self, *args, **kwargs): super(CnnValue, self).__init__(*args, **kwargs) self.conv, self.output_size = cnn( (self.state_dim, 16, 32), activation=self.activation ) self.model = mlp([self.output_size, *self.fc_layers, self.action_dim])
def __init__(self, framestack: int, action_dim: int, hidden: Tuple = (32, 32), discrete: bool = True, *args, **kwargs): super(CNNPolicy, self).__init__(framestack, action_dim, hidden, discrete, **kwargs) channels = (framestack, 16, 32) self.conv, output_size = cnn(channels) self.fc = mlp([output_size] + list(hidden) + [action_dim], sac=self.sac)
def test_cnn(self): """ test getting CNN layers """ channels = [1, 2, 4] kernels = [4, 1] strides = [2, 2] cnn_nn, output_size = cnn(channels, kernels, strides) assert len(cnn_nn) == 2 * (len(channels) - 1) assert all(isinstance(cnn_nn[i], nn.Conv2d) for i in range(0, len(channels), 2)) assert all( isinstance(cnn_nn[i], nn.ReLU) for i in range(1, len(channels) + 1, 2) ) assert output_size == 1764
def __init__( self, framestack: int, action_dim: spaces.Space, policy_layers: Tuple = (256, ), value_layers: Tuple = (256, ), val_type: str = "V", discrete: bool = True, *args, **kwargs, ): super(CNNActorCritic, self).__init__() self.feature, output_size = cnn((framestack, 16, 32)) self.actor = MlpPolicy(output_size, action_dim, policy_layers, discrete, **kwargs) self.critic = MlpValue(output_size, action_dim, val_type, value_layers)