Esempio n. 1
0
def test_mean_q_function_factory(observation_shape, action_size):
    encoder = _create_encoder(observation_shape, action_size)

    factory = MeanQFunctionFactory()
    if action_size is None:
        q_func = factory.create_continuous(encoder)
        assert isinstance(q_func, ContinuousMeanQFunction)
    else:
        q_func = factory.create_discrete(encoder, action_size)
        assert isinstance(q_func, DiscreteMeanQFunction)

    assert factory.get_type() == "mean"
    params = factory.get_params()
    new_factory = MeanQFunctionFactory(**params)
    assert new_factory.get_params() == params
Esempio n. 2
0
def test_create_continuous_q_function(
    observation_shape,
    action_size,
    batch_size,
    n_ensembles,
    encoder_factory,
    share_encoder,
    bootstrap,
):
    q_func_factory = MeanQFunctionFactory(share_encoder=share_encoder,
                                          bootstrap=bootstrap)

    q_func = create_continuous_q_function(
        observation_shape,
        action_size,
        encoder_factory,
        q_func_factory,
        n_ensembles,
    )

    assert isinstance(q_func, EnsembleContinuousQFunction)
    if n_ensembles == 1:
        assert q_func.bootstrap == False
    else:
        assert q_func.bootstrap == bootstrap

    # check share_encoder
    encoder = q_func.q_funcs[0].encoder
    for q_func in q_func.q_funcs[1:]:
        if share_encoder:
            assert encoder is q_func.encoder
        else:
            assert encoder is not q_func.encoder

    x = torch.rand((batch_size, ) + observation_shape)
    action = torch.rand(batch_size, action_size)
    y = q_func(x, action)
    assert y.shape == (batch_size, 1)
Esempio n. 3
0
    check_scaler,
    check_use_gpu,
)
from d3rlpy.gpu import Device
from d3rlpy.models.encoders import DefaultEncoderFactory
from d3rlpy.models.q_functions import MeanQFunctionFactory
from d3rlpy.preprocessing.action_scalers import MinMaxActionScaler
from d3rlpy.preprocessing.scalers import MinMaxScaler


@pytest.mark.parametrize("value", ["default", DefaultEncoderFactory()])
def test_check_encoder(value):
    assert isinstance(check_encoder(value), DefaultEncoderFactory)


@pytest.mark.parametrize("value", ["mean", MeanQFunctionFactory()])
def test_check_q_func(value):
    assert isinstance(check_q_func(value), MeanQFunctionFactory)


@pytest.mark.parametrize("value", ["min_max", MinMaxScaler(), None])
def test_check_scaler(value):
    scaler = check_scaler(value)
    if value is None:
        assert scaler is None
    else:
        assert isinstance(scaler, MinMaxScaler)


@pytest.mark.parametrize("value", ["min_max", MinMaxActionScaler(), None])
def test_check_action_scaler(value):
Esempio n. 4
0
    loss = q_func.compute_error(obs_t, act_t, rew_tp1, q_tp1, gamma)
    if bootstrap:
        assert not torch.allclose(ref_td_sum, loss)
    elif q_func_type != "iqn":
        assert torch.allclose(ref_td_sum, loss)

    # check layer connection
    check_parameter_updates(q_func, (obs_t, act_t, rew_tp1, q_tp1))


@pytest.mark.parametrize("observation_shape", [(4, 84, 84), (100, )])
@pytest.mark.parametrize("action_size", [3])
@pytest.mark.parametrize("encoder_factory", [DefaultEncoderFactory()])
@pytest.mark.parametrize(
    "q_func_factory",
    [MeanQFunctionFactory(), QRQFunctionFactory()])
@pytest.mark.parametrize("n_ensembles", [2])
@pytest.mark.parametrize("batch_size", [100])
@pytest.mark.parametrize("n_quantiles", [32])
@pytest.mark.parametrize("n_actions", [10])
@pytest.mark.parametrize("lam", [0.75])
def test_compute_max_with_n_actions(
    observation_shape,
    action_size,
    encoder_factory,
    q_func_factory,
    n_ensembles,
    batch_size,
    n_quantiles,
    n_actions,
    lam,
Esempio n. 5
0
    policy = create_categorical_policy(observation_shape, action_size,
                                       encoder_factory)

    assert isinstance(policy, CategoricalPolicy)

    x = torch.rand((batch_size, ) + observation_shape)
    y = policy(x)
    assert y.shape == (batch_size, )


@pytest.mark.parametrize("observation_shape", [(4, 84, 84), (100, )])
@pytest.mark.parametrize("action_size", [2])
@pytest.mark.parametrize("batch_size", [32])
@pytest.mark.parametrize("n_ensembles", [1, 5])
@pytest.mark.parametrize("encoder_factory", [DefaultEncoderFactory()])
@pytest.mark.parametrize("q_func_factory", [MeanQFunctionFactory()])
@pytest.mark.parametrize("share_encoder", [False, True])
def test_create_discrete_q_function(
    observation_shape,
    action_size,
    batch_size,
    n_ensembles,
    encoder_factory,
    q_func_factory,
    share_encoder,
):
    q_func = create_discrete_q_function(
        observation_shape,
        action_size,
        encoder_factory,
        q_func_factory,
Esempio n. 6
0
import pytest
import torch

from d3rlpy.models.builders import create_continuous_q_function
from d3rlpy.models.encoders import DefaultEncoderFactory
from d3rlpy.models.q_functions import MeanQFunctionFactory, QRQFunctionFactory
from d3rlpy.models.torch.q_functions import compute_max_with_n_actions


@pytest.mark.parametrize("observation_shape", [(4, 84, 84), (100,)])
@pytest.mark.parametrize("action_size", [3])
@pytest.mark.parametrize("encoder_factory", [DefaultEncoderFactory()])
@pytest.mark.parametrize(
    "q_func_factory", [MeanQFunctionFactory(), QRQFunctionFactory()]
)
@pytest.mark.parametrize("n_ensembles", [2])
@pytest.mark.parametrize("batch_size", [100])
@pytest.mark.parametrize("n_quantiles", [32])
@pytest.mark.parametrize("n_actions", [10])
@pytest.mark.parametrize("lam", [0.75])
def test_compute_max_with_n_actions(
    observation_shape,
    action_size,
    encoder_factory,
    q_func_factory,
    n_ensembles,
    batch_size,
    n_quantiles,
    n_actions,
    lam,
):