Example #1
0
def build_ddpg_models_and_action_dist(policy, obs_space, action_space, config):
    model = build_ddpg_models(policy, obs_space, action_space, config)
    # TODO(sven): Unify this once we generically support creating more than
    #  one Model per policy. Note: Device placement is done automatically
    #  already for `policy.model` (but not for the target model).
    device = (torch.device("cuda")
              if torch.cuda.is_available() else torch.device("cpu"))
    policy.target_model = policy.target_model.to(device)
    return model, TorchDeterministic
def build_ddpg_models_and_action_dist(
        policy: Policy, obs_space: gym.spaces.Space,
        action_space: gym.spaces.Space,
        config: TrainerConfigDict) -> Tuple[ModelV2, ActionDistribution]:
    model = build_ddpg_models(policy, obs_space, action_space, config)

    if isinstance(action_space, Simplex):
        return model, TorchDirichlet
    else:
        return model, TorchDeterministic
Example #3
0
def build_ddpg_models_and_action_dist(
        policy: Policy, obs_space: gym.spaces.Space,
        action_space: gym.spaces.Space,
        config: TrainerConfigDict) -> Tuple[ModelV2, ActionDistribution]:
    model = build_ddpg_models(policy, obs_space, action_space, config)
    # TODO(sven): Unify this once we generically support creating more than
    #  one Model per policy. Note: Device placement is done automatically
    #  already for `policy.model` (but not for the target model).
    device = (torch.device("cuda")
              if torch.cuda.is_available() else torch.device("cpu"))
    policy.target_model = policy.target_model.to(device)

    if isinstance(action_space, Simplex):
        return model, TorchDirichlet
    else:
        return model, TorchDeterministic