def __init__(self, framestack: int, action_dim: int, hidden: Tuple = (32, 32), discrete: bool = True, *args, **kwargs): super(CNNPolicy, self).__init__(framestack, action_dim, hidden, discrete, **kwargs) channels = (framestack, 16, 32) self.conv, output_size = cnn(channels) self.fc = mlp([output_size] + list(hidden) + [action_dim], sac=self.sac)
def __init__( self, framestack: int, action_dim: spaces.Space, policy_layers: Tuple = (256, ), value_layers: Tuple = (256, ), val_type: str = "V", discrete: bool = True, *args, **kwargs, ): super(CNNActorCritic, self).__init__() self.feature, output_size = cnn((framestack, 16, 32)) self.actor = MlpPolicy(output_size, action_dim, policy_layers, discrete, **kwargs) self.critic = MlpValue(output_size, action_dim, val_type, value_layers)
def __init__(self, *args, **kwargs): super(CnnValue, self).__init__(*args, **kwargs) self.conv, self.output_size = cnn((self.state_dim, 16, 32), activation=self.activation) self.model = mlp([self.output_size, *self.fc_layers, self.action_dim])