@pytest.mark.parametrize("critic_optim_factory", [AdamFactory()]) @pytest.mark.parametrize("temp_optim_factory", [AdamFactory()]) @pytest.mark.parametrize("alpha_optim_factory", [AdamFactory()]) @pytest.mark.parametrize("encoder_factory", [DefaultEncoderFactory()]) @pytest.mark.parametrize("q_func_factory", ["mean", "qr", "iqn", "fqf"]) @pytest.mark.parametrize("gamma", [0.99]) @pytest.mark.parametrize("tau", [0.05]) @pytest.mark.parametrize("n_critics", [2]) @pytest.mark.parametrize("bootstrap", [False]) @pytest.mark.parametrize("share_encoder", [False, True]) @pytest.mark.parametrize("initial_temperature", [1.0]) @pytest.mark.parametrize("initial_alpha", [5.0]) @pytest.mark.parametrize("alpha_threshold", [10.0]) @pytest.mark.parametrize("n_action_samples", [10]) @pytest.mark.parametrize("soft_q_backup", [True]) @pytest.mark.parametrize("scaler", [None, DummyScaler()]) @pytest.mark.parametrize("action_scaler", [None, DummyActionScaler()]) @pytest.mark.parametrize("augmentation", [DrQPipeline()]) def test_cql_impl( observation_shape, action_size, actor_learning_rate, critic_learning_rate, temp_learning_rate, alpha_learning_rate, actor_optim_factory, critic_optim_factory, temp_optim_factory, alpha_optim_factory, encoder_factory, q_func_factory,
@pytest.mark.parametrize('alpha_learning_rate', [1e-3]) @pytest.mark.parametrize('gamma', [0.99]) @pytest.mark.parametrize('tau', [0.05]) @pytest.mark.parametrize('n_critics', [2]) @pytest.mark.parametrize('bootstrap', [False]) @pytest.mark.parametrize('share_encoder', [False, True]) @pytest.mark.parametrize('initial_temperature', [1.0]) @pytest.mark.parametrize('initial_alpha', [1.0]) @pytest.mark.parametrize('alpha_threshold', [0.05]) @pytest.mark.parametrize('lam', [0.75]) @pytest.mark.parametrize('n_action_samples', [4]) @pytest.mark.parametrize('mmd_sigma', [20.0]) @pytest.mark.parametrize('eps', [1e-8]) @pytest.mark.parametrize('use_batch_norm', [True, False]) @pytest.mark.parametrize('q_func_type', ['mean', 'qr', 'iqn', 'fqf']) @pytest.mark.parametrize('scaler', [None, DummyScaler()]) @pytest.mark.parametrize('augmentation', [AugmentationPipeline()]) @pytest.mark.parametrize('n_augmentations', [1]) @pytest.mark.parametrize('encoder_params', [{}]) def test_bear_impl(observation_shape, action_size, actor_learning_rate, critic_learning_rate, imitator_learning_rate, temp_learning_rate, alpha_learning_rate, gamma, tau, n_critics, bootstrap, share_encoder, initial_temperature, initial_alpha, alpha_threshold, lam, n_action_samples, mmd_sigma, eps, use_batch_norm, q_func_type, scaler, augmentation, n_augmentations, encoder_params): impl = BEARImpl(observation_shape, action_size, actor_learning_rate, critic_learning_rate, imitator_learning_rate,