Exemple #1
0
def test_sac(
    observation_shape, action_size, q_func_factory, scaler, action_scaler
):
    sac = SAC(
        q_func_factory=q_func_factory,
        scaler=scaler,
        action_scaler=action_scaler,
    )
    algo_tester(sac, observation_shape)
    algo_update_tester(sac, observation_shape, action_size)
Exemple #2
0
def test_sac(
    observation_shape,
    action_size,
    q_func_factory,
    scalers,
    target_reduction_type,
):
    scaler, action_scaler, reward_scaler = scalers
    sac = SAC(
        q_func_factory=q_func_factory,
        scaler=scaler,
        action_scaler=action_scaler,
        reward_scaler=reward_scaler,
        target_reduction_type=target_reduction_type,
    )
    algo_tester(sac,
                observation_shape,
                test_policy_copy=True,
                test_q_function_copy=True)
    algo_update_tester(sac, observation_shape, action_size)
Exemple #3
0
def test_sac_performance(q_func_factory):
    if q_func_factory == "iqn" or q_func_factory == "fqf":
        pytest.skip("IQN is computationally expensive")

    sac = SAC(q_func_factory=q_func_factory)
    algo_pendulum_tester(sac, n_trials=3)
Exemple #4
0
def test_sac_performance(q_func_type):
    if q_func_type == 'iqn' or q_func_type == 'fqf':
        pytest.skip('IQN is computationally expensive')

    sac = SAC(q_func_type=q_func_type)
    algo_pendulum_tester(sac, n_trials=3)
Exemple #5
0
def test_sac(observation_shape, action_size, q_func_type, scaler):
    sac = SAC(q_func_type=q_func_type, scaler=scaler)
    algo_tester(sac, observation_shape)
    algo_update_tester(sac, observation_shape, action_size)