def test_adam_factory(lr, module): factory = AdamFactory() optim = factory.create(module.parameters(), lr) assert isinstance(optim, Adam) assert optim.defaults["lr"] == lr params = factory.get_params() parameters = module.parameters() assert isinstance(AdamFactory(**params).create(parameters, lr), Adam)
from d3rlpy.models.encoders import DefaultEncoderFactory from d3rlpy.models.q_functions import create_q_func_factory from tests.algos.algo_test import ( torch_impl_tester, DummyScaler, DummyActionScaler, ) @pytest.mark.parametrize("observation_shape", [(100, ), (4, 84, 84)]) @pytest.mark.parametrize("action_size", [2]) @pytest.mark.parametrize("actor_learning_rate", [1e-3]) @pytest.mark.parametrize("critic_learning_rate", [1e-3]) @pytest.mark.parametrize("temp_learning_rate", [1e-3]) @pytest.mark.parametrize("alpha_learning_rate", [1e-3]) @pytest.mark.parametrize("actor_optim_factory", [AdamFactory()]) @pytest.mark.parametrize("critic_optim_factory", [AdamFactory()]) @pytest.mark.parametrize("temp_optim_factory", [AdamFactory()]) @pytest.mark.parametrize("alpha_optim_factory", [AdamFactory()]) @pytest.mark.parametrize("encoder_factory", [DefaultEncoderFactory()]) @pytest.mark.parametrize("q_func_factory", ["mean", "qr", "iqn", "fqf"]) @pytest.mark.parametrize("gamma", [0.99]) @pytest.mark.parametrize("tau", [0.05]) @pytest.mark.parametrize("n_critics", [2]) @pytest.mark.parametrize("bootstrap", [False]) @pytest.mark.parametrize("share_encoder", [False, True]) @pytest.mark.parametrize("initial_temperature", [1.0]) @pytest.mark.parametrize("initial_alpha", [5.0]) @pytest.mark.parametrize("alpha_threshold", [10.0]) @pytest.mark.parametrize("n_action_samples", [10]) @pytest.mark.parametrize("soft_q_backup", [True])
from d3rlpy.models.encoders import DefaultEncoderFactory from d3rlpy.models.optimizers import AdamFactory from d3rlpy.models.q_functions import create_q_func_factory from tests.algos.algo_test import ( DummyActionScaler, DummyScaler, torch_impl_tester, ) @pytest.mark.parametrize("observation_shape", [(100, ), (1, 48, 48)]) @pytest.mark.parametrize("action_size", [2]) @pytest.mark.parametrize("actor_learning_rate", [1e-3]) @pytest.mark.parametrize("critic_learning_rate", [1e-3]) @pytest.mark.parametrize("imitator_learning_rate", [1e-3]) @pytest.mark.parametrize("actor_optim_factory", [AdamFactory()]) @pytest.mark.parametrize("critic_optim_factory", [AdamFactory()]) @pytest.mark.parametrize("imitator_optim_factory", [AdamFactory()]) @pytest.mark.parametrize("encoder_factory", [DefaultEncoderFactory()]) @pytest.mark.parametrize("q_func_factory", ["mean", "qr", "iqn", "fqf"]) @pytest.mark.parametrize("gamma", [0.99]) @pytest.mark.parametrize("tau", [0.05]) @pytest.mark.parametrize("n_critics", [2]) @pytest.mark.parametrize("lam", [0.75]) @pytest.mark.parametrize("n_action_samples", [10]) # small for test @pytest.mark.parametrize("action_flexibility", [0.05]) @pytest.mark.parametrize("latent_size", [32]) @pytest.mark.parametrize("beta", [0.5]) @pytest.mark.parametrize("scaler", [None, DummyScaler()]) @pytest.mark.parametrize("action_scaler", [None, DummyActionScaler()]) def test_bcq_impl(
import pytest from d3rlpy.dynamics.torch.mopo_impl import MOPOImpl from d3rlpy.models.optimizers import AdamFactory from d3rlpy.models.encoders import DefaultEncoderFactory from tests.algos.algo_test import DummyScaler, DummyActionScaler from tests.dynamics.dynamics_test import torch_impl_tester @pytest.mark.parametrize("observation_shape", [(100,)]) @pytest.mark.parametrize("action_size", [2]) @pytest.mark.parametrize("learning_rate", [1e-3]) @pytest.mark.parametrize("optim_factory", [AdamFactory()]) @pytest.mark.parametrize("encoder_factory", [DefaultEncoderFactory()]) @pytest.mark.parametrize("n_ensembles", [5]) @pytest.mark.parametrize("lam", [1.0]) @pytest.mark.parametrize("discrete_action", [False, True]) @pytest.mark.parametrize("scaler", [None, DummyScaler()]) @pytest.mark.parametrize("action_scaler", [None, DummyActionScaler()]) def test_mopo_impl( observation_shape, action_size, learning_rate, optim_factory, encoder_factory, n_ensembles, lam, discrete_action, scaler, action_scaler, ):
import gym from d3rlpy.algos import DQN from d3rlpy.models.optimizers import AdamFactory from d3rlpy.online.buffers import ReplayBuffer from d3rlpy.online.explorers import LinearDecayEpsilonGreedy from d3rlpy.envs import Atari # get wrapped atari environment env = Atari(gym.make('BreakoutNoFrameskip-v4')) eval_env = Atari(gym.make('BreakoutNoFrameskip-v4'), is_eval=True) # setup algorithm dqn = DQN(batch_size=32, learning_rate=5e-5, optim_factory=AdamFactory(eps=1e-2 / 32), target_update_interval=10000 // 4, q_func_factory='fqf', scaler='pixel', n_frames=4, use_gpu=True) # replay buffer for experience replay buffer = ReplayBuffer(maxlen=1000000, env=env) # epilon-greedy explorer explorer = LinearDecayEpsilonGreedy(start_epsilon=1.0, end_epsilon=0.01, duration=1000000) # start training
from d3rlpy.algos import DiscreteCQL from d3rlpy.models.optimizers import AdamFactory from d3rlpy.datasets import get_atari from d3rlpy.metrics.scorer import evaluate_on_environment from d3rlpy.metrics.scorer import average_value_estimation_scorer from sklearn.model_selection import train_test_split dataset, env = get_atari('breakout-medium-v0') _, test_episodes = train_test_split(dataset, test_size=0.2) cql = DiscreteCQL(optim_factory=AdamFactory(eps=1e-2 / 32), scaler='pixel', n_frames=4, q_func_factory='qr', use_gpu=True) cql.fit(dataset.episodes, eval_episodes=test_episodes, n_epochs=2000, scorers={ 'environment': evaluate_on_environment(env, epsilon=0.001), 'value_scale': average_value_estimation_scorer })