def default_model(): return models.ActorTwinCriticWithTargets( actor=models.Actor(encoder=models.ObservationEncoder(), torso=models.MLP((256, 256), torch.nn.ReLU), head=models.DeterministicPolicyHead()), critic=models.Critic(encoder=models.ObservationActionEncoder(), torso=models.MLP((256, 256), torch.nn.ReLU), head=models.ValueHead()), observation_normalizer=normalizers.MeanStd())
def default_model(): return models.ActorCritic( actor=models.Actor( encoder=models.ObservationEncoder(), torso=models.MLP((64, 64), torch.nn.Tanh), head=models.DetachedScaleGaussianPolicyHead()), critic=models.Critic( encoder=models.ObservationEncoder(), torso=models.MLP((64, 64), torch.nn.Tanh), head=models.ValueHead()), observation_normalizer=normalizers.MeanStd())
def default_model(): return models.ActorCriticWithTargets( actor=models.Actor(encoder=models.ObservationEncoder(), torso=models.MLP((256, 256), torch.nn.ReLU), head=models.DeterministicPolicyHead()), critic=models.Critic( encoder=models.ObservationActionEncoder(), torso=models.MLP((256, 256), torch.nn.ReLU), # These values are for the control suite with 0.99 discount. head=models.DistributionalValueHead(-150., 150., 51)), observation_normalizer=normalizers.MeanStd())
def default_model(): return models.ActorTwinCriticWithTargets( actor=models.Actor( encoder=models.ObservationEncoder(), torso=models.MLP((256, 256), torch.nn.ReLU), head=models.GaussianPolicyHead( loc_activation=torch.nn.Identity, distribution=models.SquashedMultivariateNormalDiag)), critic=models.Critic(encoder=models.ObservationActionEncoder(), torso=models.MLP((256, 256), torch.nn.ReLU), head=models.ValueHead()), observation_normalizer=normalizers.MeanStd())