def default_model(): return models.ActorCritic( actor=models.Actor(encoder=models.ObservationEncoder(), torso=models.MLP((64, 64), 'tanh'), head=models.DetachedScaleGaussianPolicyHead()), critic=models.Critic(encoder=models.ObservationEncoder(), torso=models.MLP((64, 64), 'tanh'), head=models.ValueHead()), observation_normalizer=normalizers.MeanStd())
def default_model(): return models.ActorCriticWithTargets( actor=models.Actor(encoder=models.ObservationEncoder(), torso=models.MLP((256, 256), 'relu'), head=models.DeterministicPolicyHead()), critic=models.Critic(encoder=models.ObservationActionEncoder(), torso=models.MLP((256, 256), 'relu'), head=models.ValueHead()), observation_normalizer=normalizers.MeanStd())
def default_model(): return models.ActorCriticWithTargets( actor=models.Actor(encoder=models.ObservationEncoder(), torso=models.MLP((256, 256), 'relu'), head=models.DeterministicPolicyHead()), critic=models.Critic( encoder=models.ObservationActionEncoder(), torso=models.MLP((256, 256), 'relu'), # These values are for the control suite with 0.99 discount. head=models.DistributionalValueHead(-150., 150., 51)), observation_normalizer=normalizers.MeanStd())
def default_model(): return models.ActorTwinCriticWithTargets( actor=models.Actor( encoder=models.ObservationEncoder(), torso=models.MLP((256, 256), 'relu'), head=models.GaussianPolicyHead( loc_activation=None, distribution=models.SquashedMultivariateNormalDiag)), critic=models.Critic(encoder=models.ObservationActionEncoder(), torso=models.MLP((256, 256), 'relu'), head=models.ValueHead()), observation_normalizer=normalizers.MeanStd(), target_coeff=0.005)