def default_model(): return models.ActorCriticWithTargets( actor=models.Actor(encoder=models.ObservationEncoder(), torso=models.MLP((256, 256), 'relu'), head=models.DeterministicPolicyHead()), critic=models.Critic(encoder=models.ObservationActionEncoder(), torso=models.MLP((256, 256), 'relu'), head=models.ValueHead()), observation_normalizer=normalizers.MeanStd())
def default_model(): return models.ActorCriticWithTargets( actor=models.Actor(encoder=models.ObservationEncoder(), torso=models.MLP((256, 256), 'relu'), head=models.DeterministicPolicyHead()), critic=models.Critic( encoder=models.ObservationActionEncoder(), torso=models.MLP((256, 256), 'relu'), # These values are for the control suite with 0.99 discount. head=models.DistributionalValueHead(-150., 150., 51)), observation_normalizer=normalizers.MeanStd())