예제 #1
0
def test_mopo_impl(
    observation_shape,
    action_size,
    learning_rate,
    optim_factory,
    encoder_factory,
    n_ensembles,
    lam,
    discrete_action,
    scaler,
):
    impl = MOPOImpl(
        observation_shape,
        action_size,
        learning_rate,
        optim_factory,
        encoder_factory,
        n_ensembles,
        lam,
        discrete_action,
        use_gpu=False,
        scaler=scaler,
    )
    impl.build()
    torch_impl_tester(impl, discrete=discrete_action)
예제 #2
0
def test_mopo_impl(
    observation_shape,
    action_size,
    learning_rate,
    optim_factory,
    encoder_factory,
    n_ensembles,
    lam,
    discrete_action,
    scaler,
    action_scaler,
):
    impl = MOPOImpl(
        observation_shape=observation_shape,
        action_size=action_size,
        learning_rate=learning_rate,
        optim_factory=optim_factory,
        encoder_factory=encoder_factory,
        n_ensembles=n_ensembles,
        lam=lam,
        discrete_action=discrete_action,
        use_gpu=None,
        scaler=scaler,
        action_scaler=action_scaler if not discrete_action else None,
    )
    impl.build()
    torch_impl_tester(impl, discrete=discrete_action)
예제 #3
0
def test_probabilistic_ensemble_dynamics_impl(
    observation_shape,
    action_size,
    learning_rate,
    optim_factory,
    encoder_factory,
    n_ensembles,
    variance_type,
    discrete_action,
    scaler,
    action_scaler,
    reward_scaler,
):
    impl = ProbabilisticEnsembleDynamicsImpl(
        observation_shape=observation_shape,
        action_size=action_size,
        learning_rate=learning_rate,
        optim_factory=optim_factory,
        encoder_factory=encoder_factory,
        n_ensembles=n_ensembles,
        variance_type=variance_type,
        discrete_action=discrete_action,
        use_gpu=None,
        scaler=scaler,
        action_scaler=action_scaler if not discrete_action else None,
        reward_scaler=reward_scaler,
    )
    impl.build()
    torch_impl_tester(impl, discrete=discrete_action, n_ensembles=n_ensembles)
예제 #4
0
def test_mopo_impl(observation_shape, action_size, learning_rate, eps,
                   weight_decay, n_ensembles, lam, use_batch_norm,
                   discrete_action, scaler):
    impl = MOPOImpl(observation_shape,
                    action_size,
                    learning_rate,
                    eps,
                    weight_decay,
                    n_ensembles,
                    lam,
                    use_batch_norm,
                    discrete_action,
                    use_gpu=False,
                    scaler=scaler)
    torch_impl_tester(impl, discrete=discrete_action)