def build_ddpg_models_and_action_dist(policy, obs_space, action_space, config): model = build_ddpg_models(policy, obs_space, action_space, config) # TODO(sven): Unify this once we generically support creating more than # one Model per policy. Note: Device placement is done automatically # already for `policy.model` (but not for the target model). device = (torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")) policy.target_model = policy.target_model.to(device) return model, TorchDeterministic
def build_ddpg_models_and_action_dist( policy: Policy, obs_space: gym.spaces.Space, action_space: gym.spaces.Space, config: TrainerConfigDict) -> Tuple[ModelV2, ActionDistribution]: model = build_ddpg_models(policy, obs_space, action_space, config) if isinstance(action_space, Simplex): return model, TorchDirichlet else: return model, TorchDeterministic
def build_ddpg_models_and_action_dist( policy: Policy, obs_space: gym.spaces.Space, action_space: gym.spaces.Space, config: TrainerConfigDict) -> Tuple[ModelV2, ActionDistribution]: model = build_ddpg_models(policy, obs_space, action_space, config) # TODO(sven): Unify this once we generically support creating more than # one Model per policy. Note: Device placement is done automatically # already for `policy.model` (but not for the target model). device = (torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")) policy.target_model = policy.target_model.to(device) if isinstance(action_space, Simplex): return model, TorchDirichlet else: return model, TorchDeterministic