예제 #1
0
def default_model():
    return models.ActorCriticWithTargets(
        actor=models.Actor(encoder=models.ObservationEncoder(),
                           torso=models.MLP((256, 256), 'relu'),
                           head=models.DeterministicPolicyHead()),
        critic=models.Critic(encoder=models.ObservationActionEncoder(),
                             torso=models.MLP((256, 256), 'relu'),
                             head=models.ValueHead()),
        observation_normalizer=normalizers.MeanStd())
예제 #2
0
def default_model():
    return models.ActorCriticWithTargets(
        actor=models.Actor(encoder=models.ObservationEncoder(),
                           torso=models.MLP((256, 256), 'relu'),
                           head=models.DeterministicPolicyHead()),
        critic=models.Critic(
            encoder=models.ObservationActionEncoder(),
            torso=models.MLP((256, 256), 'relu'),
            # These values are for the control suite with 0.99 discount.
            head=models.DistributionalValueHead(-150., 150., 51)),
        observation_normalizer=normalizers.MeanStd())