Python Table.table 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: mushroom_rl.utils.table

클래스/타입: Table

메소드/함수: table

hotexamples.com에서의 예제들: 3

Python Table.table - 3개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 mushroom_rl.utils.table.Table.table에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

Table(20)

table(3)

자주 사용되는 메소드들

Table (20)

table (3)

예제 #1

파일 보기

파일: test_td_policy.py 프로젝트: ml-research/mushroom-rl

def test_boltzmann():
    np.random.seed(88)
    beta = Parameter(0.1)
    pi = Boltzmann(beta)

    Q = Table((10, 3))
    Q.table = np.random.randn(10, 3)

    pi.set_q(Q)

    s = np.array([2])
    a = np.array([1])

    p_s = pi(s)
    p_s_test = np.array([0.30676679, 0.36223227, 0.33100094])
    assert np.allclose(p_s, p_s_test)

    p_sa = pi(s, a)
    p_sa_test = np.array([0.36223227])
    assert np.allclose(p_sa, p_sa_test)

    a = pi.draw_action(s)
    a_test = 2
    assert a.item() == a_test

    beta_2 = LinearParameter(0.2, 0.1, 2)
    pi.set_beta(beta_2)
    p_sa_2 = pi(s, a)
    assert p_sa_2 < p_sa

    pi.update(s, a)
    p_sa_3 = pi(s, a)
    p_sa_3_test = np.array([0.33100094])
    assert np.allclose(p_sa_3, p_sa_3_test)

예제 #2

파일 보기

파일: test_td_policy.py 프로젝트: ml-research/mushroom-rl

def test_eps_greedy():
    np.random.seed(88)
    eps = Parameter(0.1)
    pi = EpsGreedy(eps)

    Q = Table((10, 3))
    Q.table = np.random.randn(10, 3)

    pi.set_q(Q)

    s = np.array([2])
    a = np.array([1])

    p_s = pi(s)
    p_s_test = np.array([0.03333333, 0.93333333, 0.03333333])
    assert np.allclose(p_s, p_s_test)

    p_sa = pi(s, a)
    p_sa_test = np.array([0.93333333])
    assert np.allclose(p_sa, p_sa_test)

    a = pi.draw_action(s)
    a_test = 1
    assert a.item() == a_test

    eps_2 = LinearParameter(0.2, 0.1, 2)
    pi.set_epsilon(eps_2)
    p_sa_2 = pi(s, a)
    assert p_sa_2 < p_sa

    pi.update(s, a)
    pi.update(s, a)
    p_sa_3 = pi(s, a)
    print(eps_2.get_value())
    assert p_sa_3 == p_sa

예제 #3

파일 보기

파일: test_td_policy.py 프로젝트: ml-research/mushroom-rl

def test_mellowmax():
    np.random.seed(88)
    omega = Parameter(3)
    pi = Mellowmax(omega)

    Q = Table((10, 3))
    Q.table = np.random.randn(10, 3)

    pi.set_q(Q)

    s = np.array([2])
    a = np.array([1])

    p_s = pi(s)
    p_s_test = np.array([0.08540336, 0.69215916, 0.22243748])
    assert np.allclose(p_s, p_s_test)

    p_sa = pi(s, a)
    p_sa_test = np.array([0.69215916])
    assert np.allclose(p_sa, p_sa_test)

    a = pi.draw_action(s)
    a_test = 2
    assert a.item() == a_test

    try:
        beta = Parameter(0.1)
        pi.set_beta(beta)
    except RuntimeError:
        pass
    else:
        assert False

    try:
        pi.update(s,a)
    except RuntimeError:
        pass
    else:
        assert False