Exemple #1
0
def test_negative_reward(make_ready_env, datums):
    datums.add().rows([1], [1], [0.5], [1], [0.5])
    env = make_ready_env(cash=10)
    assert unpack_reward(env.step([0, 1])) == 1.0
    assert unpack_reward(env.step([1, 0])) == 0.5
    assert unpack_reward(env.step([0, 1])) == 0.5
    assert unpack_reward(env.step([1, 0])) == 0.25
Exemple #2
0
def test_commission_fees_are_deducted_on_all_shifted_assets(
        make_ready_env, datums):
    datums.add().rows([1], [1], [3])
    datums.add().rows([1], [1], [0.5])
    env = make_ready_env(cash=10, commission=0.1)
    assert unpack_reward(env.step([0, 0.6, 0.4])) == 0.9
    assert unpack_reward(env.step([1, 0, 0])) == approx((2 * 0.9) * 0.9)
Exemple #3
0
def test_immediate_relative_reward(make_ready_env, datums):
    datums.add().rows([1], [1], [0.5], [0.5], [1])
    env = RelativeReward(make_ready_env(cash=10))
    assert unpack_reward(env.step([0, 1])) == 1
    assert unpack_reward(env.step([0, 1])) == 0.5
    assert unpack_reward(env.step([0, 1])) == 1
    assert unpack_reward(env.step([0, 1])) == 2
Exemple #4
0
def test_positive_reward(make_ready_env, datums):
    datums.add().rows([1], [1], [2], [1], [2])
    env = make_ready_env(cash=10)
    assert unpack_reward(env.step([0, 1])) == 1.0
    assert unpack_reward(env.step([1, 0])) == 2.0
    assert unpack_reward(env.step([0, 1])) == 2.0
    assert unpack_reward(env.step([1, 0])) == 4.0
def test_each_step_outside_of_goal_returns_a_penalty(env, walk_len,
                                                     steps_to_edge):
    assert all(
        unpack_reward(env.step(0)) == env.penalty
        for _ in range(steps_to_edge))
    assert all(
        unpack_reward(env.step(1)) == env.penalty for _ in range(walk_len - 2))
Exemple #6
0
def test_final_value_of_portfolio_as_only_reward(make_ready_env, datums):
    datums.add().rows([1], [1], [2], [1], [2])
    env = OnlyFinalReward(make_ready_env(cash=10))
    assert unpack_reward(env.step([0, 1])) == 0.0
    assert unpack_reward(env.step([1, 0])) == 0.0
    assert unpack_reward(env.step([0, 1])) == 0.0
    assert unpack_reward(env.step([1, 0])) == 4.0
Exemple #7
0
def test_commission_fees_are_deducted(make_ready_env, datums):
    datums.add().rows([1], [1], [2], [1], [2])
    env = make_ready_env(cash=10, commission=0.1)
    assert unpack_reward(env.step([0, 1])) == 0.9
    assert unpack_reward(env.step([1, 0])) == approx((2 * 0.9) * 0.9)
    assert unpack_reward(env.step([0, 1])) == approx((2 * 0.9) * 0.9**2)
    assert unpack_reward(env.step([1, 0])) == approx(
        (2 * (2 * 0.9) * 0.9**2) * 0.9)
Exemple #8
0
def test_relative_reward_resets_properly(make_ready_env, datums):
    datums.add().rows([1], [1], [0.5])
    env = RelativeReward(make_ready_env(cash=10))
    assert unpack_reward(env.step([0, 1])) == 1
    assert unpack_reward(env.step([0, 1])) == 0.5
    env.reset()
    assert unpack_reward(env.step([0, 1])) == 1
    assert unpack_reward(env.step([0, 1])) == 0.5
Exemple #9
0
def test_combine_reward_wrappers(make_ready_env, datums, baseline_datums):
    datums.add().rows([1], [1], [2], [1], [2])
    baseline_datums.rows(1.0, 1.2, 1.4, 1.6, 2)
    env = OnlyFinalReward(
        SharpRatioReward(make_ready_env(cash=10, baseline=baseline_datums)))
    assert unpack_reward(env.step([0, 1])) == 0
    assert unpack_reward(env.step([1, 0])) == 0
    assert unpack_reward(env.step([0, 1])) == 0
    assert unpack_reward(env.step([1, 0])) == approx(
        np.mean([-0.2, 0.6, 0.4, 2]) / np.std([-0.2, 0.6, 0.4, 2], ddof=1))
Exemple #10
0
def test_calculate_growing_sharp_ratio(make_ready_env, datums,
                                       baseline_datums):
    datums.add().rows([1], [1], [2], [1], [2])
    baseline_datums.rows(1.0, 1.2, 1.4, 1.6, 2)
    env = SharpRatioReward(make_ready_env(cash=10, baseline=baseline_datums))
    assert unpack_reward(env.step([0, 1])) == approx(-0.2)
    assert unpack_reward(env.step([1, 0])) == approx(
        np.mean([-0.2, 0.6]) / np.std([-0.2, 0.6], ddof=1))
    assert unpack_reward(env.step([0, 1])) == approx(
        np.mean([-0.2, 0.6, 0.4]) / np.std([-0.2, 0.6, 0.4], ddof=1))
    assert unpack_reward(env.step([1, 0])) == approx(
        np.mean([-0.2, 0.6, 0.4, 2]) / np.std([-0.2, 0.6, 0.4, 2], ddof=1))
Exemple #11
0
def test_commission_fees_are_not_deducted_when_assets_are_not_moved(
        make_ready_env, datums):
    datums.add().rows([1], [1], [2])
    env = make_ready_env(cash=10, commission=0.1)
    assert unpack_reward(idle_step(env)) == 1.0
    assert unpack_reward(idle_step(env)) == 1.0
Exemple #12
0
def test_idle_reward(make_ready_env, datums):
    datums.add().rows([1], [1], [2])
    env = make_ready_env(cash=10)
    assert unpack_reward(idle_step(env)) == 1.0
def test_each_step_gives_a_reward_of_minus_one(env, idle):
    env.reset()
    assert unpack_reward(env.step(idle)) == -1