def __init__(self, action_dim: int, history_length: int = 4, fc_layers: Tuple = (256, )): super(DuelingDQNValueCNN, self).__init__() self.action_dim = action_dim self.conv, output_size = cnn((history_length, 16, 32)) self.advantage = mlp([output_size] + list(fc_layers) + [action_dim]) self.value = mlp([output_size] + list(fc_layers) + [1])
def __init__(self, framestack: int, action_dim: int, hidden: Tuple = (32, 32), discrete: bool = True, *args, **kwargs): super(CNNPolicy, self).__init__(framestack, action_dim, hidden, discrete, **kwargs) self.action_dim = action_dim self.conv, output_size = cnn((framestack, 16, 32)) self.fc = mlp([output_size] + list(hidden) + [action_dim], sac=self.sac)
def __init__(self, state_dim, action_dim, hidden=(32, 32), disc=True, *args, **kwargs): super(MlpPolicy, self).__init__(state_dim, action_dim, hidden, disc, **kwargs) self.state_dim = state_dim self.action_dim = action_dim self.model = mlp([state_dim] + list(hidden) + [action_dim], sac=self.sac)
def __init__(self, state_dim: int, action_dim: int, hidden: Tuple = (32, 32), discrete: bool = True, *args, **kwargs): super(MlpPolicy, self).__init__(state_dim, action_dim, hidden, discrete, **kwargs) self.state_dim = state_dim self.action_dim = action_dim self.activation = kwargs[ "activation"] if "activation" in kwargs else "relu" self.model = mlp( [state_dim] + list(hidden) + [action_dim], activation=self.activation, sac=self.sac, )