def __init__(self, action_dim: int, history_length: int = 4, fc_layers: Tuple = (256, )): super(DuelingDQNValueCNN, self).__init__() self.action_dim = action_dim self.conv, output_size = cnn((history_length, 16, 32)) self.advantage = mlp([output_size] + list(fc_layers) + [action_dim]) self.value = mlp([output_size] + list(fc_layers) + [1])
def __init__( self, action_dim: int, history_length: int = 4, fc_layers: Tuple = (128, ), noisy_layers: Tuple = (128, 128), ): super(NoisyDQNValueCNN, self).__init__() self.conv, output_size = cnn((history_length, 16, 32)) self.model = noisy_mlp([output_size] + list(fc_layers), list(noisy_layers) + [action_dim])
def __init__(self, action_dim, history_length=4, val_type="Qs", fc_layers=(256, )): super(CNNValue, self).__init__() self.action_dim = action_dim self.conv, output_size = cnn((history_length, 16, 32)) self.fc = _get_val_model(mlp, val_type, output_size, fc_layers, action_dim)
def __init__(self, framestack: int, action_dim: spaces.Space, fc_layers: Tuple = (256, ), val_type: str = "V", discrete: bool = True, *args, **kwargs): super(CNNActorCritic, self).__init__() self.feature, output_size = cnn((framestack, 16, 32)) self.actor = MlpPolicy(output_size, action_dim, fc_layers, discrete, **kwargs) self.critic = MlpValue(output_size, action_dim, val_type, fc_layers)
def __init__(self, framestack: int, action_dim: int, hidden: Tuple = (32, 32), discrete: bool = True, *args, **kwargs): super(CNNPolicy, self).__init__(framestack, action_dim, hidden, discrete, **kwargs) self.action_dim = action_dim self.conv, output_size = cnn((framestack, 16, 32)) self.fc = mlp([output_size] + list(hidden) + [action_dim], sac=self.sac)
def __init__( self, action_dim: int, num_atoms: int, history_length: int = 4, fc_layers: Tuple = (128, 128), noisy_layers: Tuple = (128, 512), ): super(CategoricalDQNValueCNN, self).__init__() self.action_dim = action_dim self.num_atoms = num_atoms self.conv, output_size = cnn((history_length, 16, 32)) self.model = noisy_mlp( [output_size] + list(fc_layers), list(noisy_layers) + [self.action_dim * self.num_atoms], )
def __init__( self, framestack: int, action_dim: int, val_type: str = "Qs", fc_layers: Tuple = (256, ), **kwargs, ): super(CNNValue, self).__init__() self.action_dim = action_dim activation = kwargs["activation"] if "activation" in kwargs else "relu" self.conv, output_size = cnn((framestack, 16, 32), activation=activation) self.fc = _get_val_model(mlp, val_type, output_size, fc_layers, action_dim)
def __init__(self, input_dim, action_dim, hidden_dims=[128, 64]): super(Discriminator, self).__init__() self.conv, self.conv_output_size = cnn() self.fc1 = nn.Linear(self.conv_output_size, hidden_dims[0]) self.fc2 = nn.Linear(hidden_dims[0] + action_dim, hidden_dims[1]) self.fc3 = nn.Linear(hidden_dims[1], 1)