Python IPWLearner Examples, obp.policy.offline.IPWLearner Python Examples

Example #1

0

Show file

def test_sample_action():
    n_actions = 2
    len_list = 1
    context = np.array([1.0, 1.0, 1.0, 1.0]).reshape(2, -1)
    action = np.array([0, 1])
    reward = np.array([1.0, 0.0])
    position = np.array([0, 0])
    learner = IPWLearner(n_actions=n_actions, len_list=len_list)
    learner.fit(context=context,
                action=action,
                reward=reward,
                position=position)

    with pytest.raises(ValueError):
        invalid_type_context = [1.0, 2.0]
        learner.sample_action(context=invalid_type_context)

    with pytest.raises(ValueError):
        invalid_ndim_context = np.array([1.0, 2.0, 3.0, 4.0])
        learner.sample_action(context=invalid_ndim_context)

    context = np.array([1.0, 1.0, 1.0, 1.0]).reshape(2, -1)
    n_rounds = context.shape[0]
    sampled_action = learner.sample_action(context=context)

    assert sampled_action.shape[0] == n_rounds
    assert sampled_action.shape[1] == n_actions
    assert sampled_action.shape[2] == len_list

Example #2

0

Show file

def test_opl_predict():
    n_actions = 2
    len_list = 1

    # shape error
    with pytest.raises(ValueError):
        context = np.array([1.0, 1.0])
        learner = IPWLearner(n_actions=n_actions, len_list=len_list)
        learner.predict(context=context)

    # shape consistency of action_dist
    # n_rounds is 5, dim_context is 2
    context = np.array([1.0, 1.0, 1.0, 1.0]).reshape(2, -1)
    action = np.array([0, 1])
    reward = np.array([1.0, 0.0])
    position = np.array([0, 0])
    learner = IPWLearner(n_actions=2, len_list=1)
    learner.fit(context=context,
                action=action,
                reward=reward,
                position=position)

    context_test = np.array([i for i in range(10)]).reshape(5, 2)
    action_dist = learner.predict(context=context_test)
    assert action_dist.shape[0] == 5
    assert action_dist.shape[1] == n_actions
    assert action_dist.shape[2] == len_list

Example #3

0

Show file

File: test_offline.py Project: aiueola/zr-obp

def test_ipw_learner_predict():
    n_actions = 2
    len_list = 1

    # shape error
    desc = "`context` must be 2D array"
    with pytest.raises(ValueError, match=f"{desc}*"):
        context = np.array([1.0, 1.0])
        learner = IPWLearner(n_actions=n_actions, len_list=len_list)
        learner.predict(context=context)

    # shape consistency of action_dist
    # n_rounds is 5, dim_context is 2
    context = np.array([1.0, 1.0, 1.0, 1.0]).reshape(2, -1)
    action = np.array([0, 1])
    reward = np.array([1.0, 0.0])
    position = np.array([0, 0])
    learner = IPWLearner(n_actions=2, len_list=1)
    learner.fit(context=context,
                action=action,
                reward=reward,
                position=position)

    context_test = np.array([i for i in range(10)]).reshape(5, 2)
    action_dist = learner.predict(context=context_test)
    assert np.allclose(action_dist.sum(1),
                       np.ones_like((context_test.shape[0], len_list)))
    assert action_dist.shape[0] == 5
    assert action_dist.shape[1] == n_actions
    assert action_dist.shape[2] == len_list

Example #4

0

Show file

def test_create_train_data_for_opl():
    context = np.array([1.0, 1.0]).reshape(1, -1)
    learner = IPWLearner(n_actions=2)
    action = np.array([0])
    reward = np.array([1.0])
    pscore = np.array([0.5])

    X, sample_weight, y = learner._create_train_data_for_opl(
        context=context, action=action, reward=reward, pscore=pscore
    )

    assert np.allclose(X, np.array([1.0, 1.0]).reshape(1, -1))
    assert np.allclose(sample_weight, np.array([2.0]))
    assert np.allclose(y, np.array([0]))

Example #5

0

Show file

File: test_offline.py Project: aiueola/zr-obp

def test_ipw_learner_init_base_classifier_list():
    # base classifier
    len_list = 2
    learner1 = IPWLearner(n_actions=2, len_list=len_list)
    assert isinstance(learner1.base_classifier, LogisticRegression)
    for i in range(len_list):
        assert isinstance(learner1.base_classifier_list[i], LogisticRegression)

    from sklearn.naive_bayes import GaussianNB

    learner2 = IPWLearner(n_actions=2,
                          len_list=len_list,
                          base_classifier=GaussianNB())
    assert isinstance(learner2.base_classifier, GaussianNB)
    for i in range(len_list):
        assert isinstance(learner2.base_classifier_list[i], GaussianNB)

Example #6

0

Show file

def test_ipw_learner_init():
    # base classifier
    len_list = 2
    learner1 = IPWLearner(n_actions=2, len_list=len_list)
    assert isinstance(learner1.base_classifier, LogisticRegression)
    for i in range(len_list):
        assert isinstance(learner1.base_classifier_list[i], LogisticRegression)

    with pytest.raises(ValueError):
        from sklearn.linear_model import LinearRegression

        IPWLearner(n_actions=2, base_classifier=LinearRegression())

    from sklearn.naive_bayes import GaussianNB

    learner2 = IPWLearner(n_actions=2, len_list=len_list, base_classifier=GaussianNB())
    assert isinstance(learner2.base_classifier, GaussianNB)
    for i in range(len_list):
        assert isinstance(learner2.base_classifier_list[i], GaussianNB)

Example #7

0

Show file

File: test_offline.py Project: aiueola/zr-obp

def test_ipw_learner_init_using_invalid_inputs(
    n_actions,
    len_list,
    base_classifier,
    description,
):
    with pytest.raises(ValueError, match=f"{description}*"):
        _ = IPWLearner(
            n_actions=n_actions,
            len_list=len_list,
            base_classifier=base_classifier,
        )

Example #8

0

Show file

File: test_offline.py Project: aiueola/zr-obp

def test_ipw_learner_init_using_valid_inputs(
    n_actions,
    len_list,
    base_classifier,
    description,
):
    ipw_learner = IPWLearner(
        n_actions=n_actions,
        len_list=len_list,
        base_classifier=base_classifier,
    )
    # policy_type
    assert ipw_learner.policy_type == PolicyType.OFFLINE

Example #9

0

Show file

def test_base_opl_init():
    # n_actions
    with pytest.raises(ValueError):
        IPWLearner(n_actions=1)

    with pytest.raises(ValueError):
        IPWLearner(n_actions="3")

    # len_list
    with pytest.raises(ValueError):
        IPWLearner(n_actions=2, len_list=0)

    with pytest.raises(ValueError):
        IPWLearner(n_actions=2, len_list="3")

    # policy_type
    assert IPWLearner(n_actions=2).policy_type == "offline"

    # invalid relationship between n_actions and len_list
    with pytest.raises(ValueError):
        IPWLearner(n_actions=5, len_list=10)

    with pytest.raises(ValueError):
        IPWLearner(n_actions=2, len_list=3)

Example #10

0

Show file

def test_opl_fit():
    context = np.array([1.0, 1.0, 1.0, 1.0]).reshape(2, -1)
    action = np.array([0, 1])
    reward = np.array([1.0, 0.0])
    position = np.array([0, 0])
    learner = IPWLearner(n_actions=2, len_list=1)
    learner.fit(context=context,
                action=action,
                reward=reward,
                position=position)

    # inconsistency with the shape
    with pytest.raises(AssertionError):
        learner = IPWLearner(n_actions=2, len_list=2)
        variant_context = np.array([1.0, 1.0, 1.0, 1.0])
        learner.fit(context=variant_context,
                    action=action,
                    reward=reward,
                    position=position)

    # len_list > 2, but position is not set
    with pytest.raises(ValueError):
        learner = IPWLearner(n_actions=2, len_list=2)
        learner.fit(context=context, action=action, reward=reward)

Example #11

0

Show file

File: test_offline.py Project: aiueola/zr-obp

def test_ipw_learner_fit():
    n_rounds = 1000
    dim_context = 5
    n_actions = 3
    len_list = 2
    context = np.ones((n_rounds, dim_context))
    action = np.random.choice(np.arange(len_list, dtype=int), size=n_rounds)
    reward = np.random.choice(np.arange(2), size=n_rounds)
    position = np.random.choice(np.arange(len_list, dtype=int), size=n_rounds)

    # inconsistency with the shape
    desc = "Expected `context.shape[0]"
    with pytest.raises(ValueError, match=f"{desc}*"):
        learner = IPWLearner(n_actions=n_actions, len_list=len_list)
        variant_context = np.random.normal(size=(n_rounds + 1, n_actions))
        learner.fit(
            context=variant_context,
            action=action,
            reward=reward,
            position=position,
        )

    # len_list > 2, but position is not set
    desc = "When `self.len_list > 1"
    with pytest.raises(ValueError, match=f"{desc}*"):
        learner = IPWLearner(n_actions=n_actions, len_list=len_list)
        learner.fit(context=context, action=action, reward=reward)

    # position must be non-negative
    desc = "`position` elements must be non-negative integers"
    with pytest.raises(ValueError, match=f"{desc}*"):
        negative_position = position - 1
        learner = IPWLearner(n_actions=n_actions, len_list=len_list)
        learner.fit(context=context,
                    action=action,
                    reward=reward,
                    position=negative_position)

    # IPWLearner cannot handle negative rewards
    desc = "A negative value is found in"
    with pytest.raises(ValueError, match=f"{desc}*"):
        negative_reward = reward - 1.0
        learner = IPWLearner(n_actions=n_actions, len_list=len_list)
        learner.fit(context=context,
                    action=action,
                    reward=negative_reward,
                    position=position)