コード例 #1
0
ファイル: test_optimizers.py プロジェクト: wx-b/d3rlpy
def test_adam_factory(lr, module):
    factory = AdamFactory()

    optim = factory.create(module.parameters(), lr)

    assert isinstance(optim, Adam)
    assert optim.defaults["lr"] == lr

    params = factory.get_params()
    parameters = module.parameters()
    assert isinstance(AdamFactory(**params).create(parameters, lr), Adam)
コード例 #2
0
from d3rlpy.models.encoders import DefaultEncoderFactory
from d3rlpy.models.q_functions import create_q_func_factory
from tests.algos.algo_test import (
    torch_impl_tester,
    DummyScaler,
    DummyActionScaler,
)


@pytest.mark.parametrize("observation_shape", [(100, ), (4, 84, 84)])
@pytest.mark.parametrize("action_size", [2])
@pytest.mark.parametrize("actor_learning_rate", [1e-3])
@pytest.mark.parametrize("critic_learning_rate", [1e-3])
@pytest.mark.parametrize("temp_learning_rate", [1e-3])
@pytest.mark.parametrize("alpha_learning_rate", [1e-3])
@pytest.mark.parametrize("actor_optim_factory", [AdamFactory()])
@pytest.mark.parametrize("critic_optim_factory", [AdamFactory()])
@pytest.mark.parametrize("temp_optim_factory", [AdamFactory()])
@pytest.mark.parametrize("alpha_optim_factory", [AdamFactory()])
@pytest.mark.parametrize("encoder_factory", [DefaultEncoderFactory()])
@pytest.mark.parametrize("q_func_factory", ["mean", "qr", "iqn", "fqf"])
@pytest.mark.parametrize("gamma", [0.99])
@pytest.mark.parametrize("tau", [0.05])
@pytest.mark.parametrize("n_critics", [2])
@pytest.mark.parametrize("bootstrap", [False])
@pytest.mark.parametrize("share_encoder", [False, True])
@pytest.mark.parametrize("initial_temperature", [1.0])
@pytest.mark.parametrize("initial_alpha", [5.0])
@pytest.mark.parametrize("alpha_threshold", [10.0])
@pytest.mark.parametrize("n_action_samples", [10])
@pytest.mark.parametrize("soft_q_backup", [True])
コード例 #3
0
ファイル: test_bcq_impl.py プロジェクト: wx-b/d3rlpy
from d3rlpy.models.encoders import DefaultEncoderFactory
from d3rlpy.models.optimizers import AdamFactory
from d3rlpy.models.q_functions import create_q_func_factory
from tests.algos.algo_test import (
    DummyActionScaler,
    DummyScaler,
    torch_impl_tester,
)


@pytest.mark.parametrize("observation_shape", [(100, ), (1, 48, 48)])
@pytest.mark.parametrize("action_size", [2])
@pytest.mark.parametrize("actor_learning_rate", [1e-3])
@pytest.mark.parametrize("critic_learning_rate", [1e-3])
@pytest.mark.parametrize("imitator_learning_rate", [1e-3])
@pytest.mark.parametrize("actor_optim_factory", [AdamFactory()])
@pytest.mark.parametrize("critic_optim_factory", [AdamFactory()])
@pytest.mark.parametrize("imitator_optim_factory", [AdamFactory()])
@pytest.mark.parametrize("encoder_factory", [DefaultEncoderFactory()])
@pytest.mark.parametrize("q_func_factory", ["mean", "qr", "iqn", "fqf"])
@pytest.mark.parametrize("gamma", [0.99])
@pytest.mark.parametrize("tau", [0.05])
@pytest.mark.parametrize("n_critics", [2])
@pytest.mark.parametrize("lam", [0.75])
@pytest.mark.parametrize("n_action_samples", [10])  # small for test
@pytest.mark.parametrize("action_flexibility", [0.05])
@pytest.mark.parametrize("latent_size", [32])
@pytest.mark.parametrize("beta", [0.5])
@pytest.mark.parametrize("scaler", [None, DummyScaler()])
@pytest.mark.parametrize("action_scaler", [None, DummyActionScaler()])
def test_bcq_impl(
コード例 #4
0
import pytest

from d3rlpy.dynamics.torch.mopo_impl import MOPOImpl
from d3rlpy.models.optimizers import AdamFactory
from d3rlpy.models.encoders import DefaultEncoderFactory
from tests.algos.algo_test import DummyScaler, DummyActionScaler
from tests.dynamics.dynamics_test import torch_impl_tester


@pytest.mark.parametrize("observation_shape", [(100,)])
@pytest.mark.parametrize("action_size", [2])
@pytest.mark.parametrize("learning_rate", [1e-3])
@pytest.mark.parametrize("optim_factory", [AdamFactory()])
@pytest.mark.parametrize("encoder_factory", [DefaultEncoderFactory()])
@pytest.mark.parametrize("n_ensembles", [5])
@pytest.mark.parametrize("lam", [1.0])
@pytest.mark.parametrize("discrete_action", [False, True])
@pytest.mark.parametrize("scaler", [None, DummyScaler()])
@pytest.mark.parametrize("action_scaler", [None, DummyActionScaler()])
def test_mopo_impl(
    observation_shape,
    action_size,
    learning_rate,
    optim_factory,
    encoder_factory,
    n_ensembles,
    lam,
    discrete_action,
    scaler,
    action_scaler,
):
コード例 #5
0
ファイル: fqf.py プロジェクト: navidmdn/d3rlpy
import gym

from d3rlpy.algos import DQN
from d3rlpy.models.optimizers import AdamFactory
from d3rlpy.online.buffers import ReplayBuffer
from d3rlpy.online.explorers import LinearDecayEpsilonGreedy
from d3rlpy.envs import Atari

# get wrapped atari environment
env = Atari(gym.make('BreakoutNoFrameskip-v4'))
eval_env = Atari(gym.make('BreakoutNoFrameskip-v4'), is_eval=True)

# setup algorithm
dqn = DQN(batch_size=32,
          learning_rate=5e-5,
          optim_factory=AdamFactory(eps=1e-2 / 32),
          target_update_interval=10000 // 4,
          q_func_factory='fqf',
          scaler='pixel',
          n_frames=4,
          use_gpu=True)

# replay buffer for experience replay
buffer = ReplayBuffer(maxlen=1000000, env=env)

# epilon-greedy explorer
explorer = LinearDecayEpsilonGreedy(start_epsilon=1.0,
                                    end_epsilon=0.01,
                                    duration=1000000)

# start training
コード例 #6
0
from d3rlpy.algos import DiscreteCQL
from d3rlpy.models.optimizers import AdamFactory
from d3rlpy.datasets import get_atari
from d3rlpy.metrics.scorer import evaluate_on_environment
from d3rlpy.metrics.scorer import average_value_estimation_scorer
from sklearn.model_selection import train_test_split

dataset, env = get_atari('breakout-medium-v0')

_, test_episodes = train_test_split(dataset, test_size=0.2)

cql = DiscreteCQL(optim_factory=AdamFactory(eps=1e-2 / 32),
                  scaler='pixel',
                  n_frames=4,
                  q_func_factory='qr',
                  use_gpu=True)

cql.fit(dataset.episodes,
        eval_episodes=test_episodes,
        n_epochs=2000,
        scorers={
            'environment': evaluate_on_environment(env, epsilon=0.001),
            'value_scale': average_value_estimation_scorer
        })