コード例 #1
0
ファイル: test_exploit.py プロジェクト: rjagerman/chainercb
def test_nr_actions():
    policy = Exploit(setup_softmax_policy())

    # Generate minibatch of 32 identical samples
    np.random.seed(42)
    x = Variable(np.random.random((32, 3)).astype('float32'))

    # Assert nr actions
    assert_allclose(policy.nr_actions(x).data, np.ones(32) * 6)
コード例 #2
0
ファイル: test_exploit.py プロジェクト: rjagerman/chainercb
def test_draw():
    policy = Exploit(setup_softmax_policy())

    # Generate minibatch of 32 random samples
    np.random.seed(42)
    x = Variable(np.random.random((32, 3)).astype('float32'))

    # Assert draw
    expected = np.array([5, 2, 5, 1, 2, 5, 1, 5, 2, 2, 2, 2, 1, 1, 5, 2, 1, 2,
                         2, 1, 1, 2, 5, 2, 5, 2, 1, 2, 5, 2, 5, 2])
    assert_allclose(policy.draw(x).data, expected)
コード例 #3
0
ファイル: test_exploit.py プロジェクト: rjagerman/chainercb
def test_uniform():
    policy = Exploit(setup_softmax_policy())

    # Generate minibatch of 32 identical samples
    np.random.seed(42)
    x = Variable(np.random.random((32, 3)).astype('float32'))

    # Assert uniform
    expected = np.array([5, 2, 4, 4, 1, 1, 2, 1, 2, 4, 1, 3, 2, 4, 2, 5, 0, 5,
                         3, 4, 3, 4, 0, 1, 1, 1, 4, 3, 0, 3, 4, 2])
    assert_allclose(policy.uniform(x).data, expected)
コード例 #4
0
ファイル: test_explore.py プロジェクト: rjagerman/chainercb
def test_draw():
    policy = setup_softmax_policy()
    epsilon_greedy = Explore(policy)

    # Generate minibatch of 32 random samples
    np.random.seed(42)
    x = Variable(np.random.random((32, 3)).astype('float32'))

    # Assert draw
    expected = np.array([5, 2, 4, 4, 1, 1, 2, 1, 2, 4, 1, 3, 2, 4, 2, 5, 0, 5,
                         3, 4, 3, 4, 0, 1, 1, 1, 4, 3, 0, 3, 4, 2])
    assert_allclose(epsilon_greedy.draw(x).data, expected)
コード例 #5
0
ファイル: test_explore.py プロジェクト: rjagerman/chainercb
def test_max():
    policy = setup_softmax_policy()
    epsilon_greedy = Explore(policy)

    # Generate minibatch of 32 identical samples
    np.random.seed(42)
    x = Variable(np.random.random((32, 3)).astype('float32'))

    # Assert max
    expected = np.array([5, 2, 5, 1, 2, 5, 1, 5, 2, 2, 2, 2, 1, 1, 5, 2, 1, 2,
                         2, 1, 1, 2, 5, 2, 5, 2, 1, 2, 5, 2, 5, 2])
    assert_allclose(epsilon_greedy.max(x).data, expected)
コード例 #6
0
ファイル: test_exploit.py プロジェクト: rjagerman/chainercb
def test_log_propensity():
    policy = Exploit(setup_softmax_policy())

    # Generate minibatch of 32 identical samples
    np.random.seed(42)
    x = Variable(np.random.random((32, 3)).astype(dtype='float32'))
    action = Variable(
        np.random.random_integers(
            0,
            policy.nr_actions(x)[0].data - 1,
            size=32
        ).astype('int32')
    )
    p = policy.log_propensity(x, action)
    expected = np.array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
                         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
                         0., 0., 0., 0.])
    assert_allclose(p.data, expected)
コード例 #7
0
def test_log_propensity():
    policy = EpsilonGreedy(setup_softmax_policy(), epsilon=0.25)

    # Generate minibatch of 32 identical samples
    np.random.seed(42)
    x = Variable(np.random.random((32, 3)).astype(dtype='float32'))
    action = Variable(
        np.random.random_integers(0, policy.nr_actions(x)[0].data - 1,
                                  size=32).astype('int32'))
    p = policy.log_propensity(x, action)
    expected = np.array([
        -3.1780539, -3.1780539, -3.1780539, -3.1780539, -3.1780539, -3.1780539,
        -3.1780539, -3.1780539, -3.1780539, -3.1780539, -3.1780539, -3.1780539,
        -3.1780539, -3.1780539, -3.1780539, -3.1780539, -3.1780539,
        -0.23361483, -0.23361483, -3.1780539, -3.1780539, -3.1780539,
        -3.1780539, -3.1780539, -0.23361483, -3.1780539, -3.1780539,
        -3.1780539, -3.1780539, -3.1780539, -3.1780539, -3.1780539
    ])
    assert_allclose(p.data, expected)
コード例 #8
0
ファイル: test_softmax.py プロジェクト: rjagerman/chainercb
def test_log_propensity():
    policy = setup_softmax_policy()

    # Generate minibatch of 32 identical samples
    np.random.seed(42)
    x = Variable(np.random.random((32, 3)).astype(dtype='float32'))
    action = Variable(
        np.random.random_integers(0, policy.nr_actions(x)[0].data - 1,
                                  size=32).astype('int32'))
    p = policy.log_propensity(x, action)
    expected = np.array([
        -1.5074409, -1.399737, -2.5254014, -7.554132, -2.960781, -1.5043257,
        -1.4787177, -4.316305, -2.1677716, -5.611033, -1.6950495, -3.6481519,
        -2.9404507, -1.4871595, -1.6609019, -6.7251015, -3.69492, -1.146996,
        -1.0498471, -1.6062899, -6.6821046, -1.4551655, -2.7798572, -2.9515395,
        -1.0423813, -1.583216, -3.2810836, -1.5953934, -6.2446265, -8.515331,
        -7.061472, -7.896145
    ])
    assert_allclose(p.data, expected)
コード例 #9
0
ファイル: test_softmax.py プロジェクト: rjagerman/chainercb
def test_propensity():
    policy = setup_softmax_policy()

    # Generate minibatch of 32 identical samples
    np.random.seed(42)
    x = Variable(np.random.random((32, 3)).astype(dtype='float32'))
    action = Variable(
        np.random.random_integers(0, policy.nr_actions(x)[0].data - 1,
                                  size=32).astype('int32'))
    p = policy.propensity(x, action)
    expected = np.array([
        2.2147602e-01, 2.4666181e-01, 8.0026180e-02, 5.2394089e-04,
        5.1778451e-02, 2.2216703e-01, 2.2792980e-01, 1.3349118e-02,
        1.1443234e-01, 3.6572900e-03, 1.8359013e-01, 2.6039204e-02,
        5.2841913e-02, 2.2601378e-01, 1.8996757e-01, 1.2003987e-03,
        2.4849439e-02, 3.1758940e-01, 3.4999129e-01, 2.0063059e-01,
        1.2531378e-03, 2.3336175e-01, 6.2047362e-02, 5.2259181e-02,
        3.5261405e-01, 2.0531374e-01, 3.7587512e-02, 2.0282875e-01,
        1.9408551e-03, 2.0037284e-04, 8.5751503e-04, 3.7217548e-04
    ])
    assert_allclose(p.data, expected)