Python build_SB_example35 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: emdp.examples

메소드/함수: build_SB_example35

hotexamples.com에서의 예제들: 7

Python build_SB_example35 - 7개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 emdp.examples.build_SB_example35에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

def test_plotting_integration():
    mdp = examples.build_SB_example35()

    trajectories = []
    for _ in range(3):  # 3 trajectories
        trajectory = [mdp.reset()]
        for _ in range(10):  # 10 steps maximum
            state, reward, done, info = mdp.step(
                random.sample(
                    [actions.LEFT, actions.RIGHT, actions.UP, actions.DOWN],
                    1)[0])
            trajectory.append(state)
        trajectories.append(trajectory)

    gwp = GridWorldPlotter(
        mdp.size, mdp.has_absorbing_state
    )  # alternatively you can use GridWorldPlotter.from_mdp(mdp)
    fig = plt.figure(figsize=(10, 4))
    ax = fig.add_subplot(121)

    # trajectory
    gwp.plot_trajectories(ax, trajectories)
    gwp.plot_grid(ax)

    # heatmap
    ax = fig.add_subplot(122)
    gwp.plot_heatmap(ax, trajectories)
    gwp.plot_grid(ax)

예제 #2

파일 보기

파일: test_torch_analytic.py 프로젝트: d3sm0/emdp

def test_differentiable():
    mdp = build_SB_example35()

    print(mdp.reward)
    # random policy:
    policy = np.ones((mdp.P.shape[0], mdp.P.shape[1]))/mdp.P.shape[1]
    policy = torch.tensor(policy, requires_grad=True).float()
    V_pi = calculate_V_pi(mdp.P, mdp.reward, policy, mdp.discount)
    grads = torch.autograd.grad(V_pi.mean(), [policy])
    assert grads is not None
    for grad in grads:
        assert torch.isfinite(grad).all()
        assert not torch.equal(grad, torch.tensor(0.0))

예제 #3

파일 보기

파일: test_analytic_solutions.py 프로젝트: d3sm0/emdp

def test_V_pi():
    mdp = build_SB_example35()

    print(mdp.reward)
    # random policy:
    policy = np.ones((mdp.P.shape[0], mdp.P.shape[1]))/mdp.P.shape[1]

    V_pi = calculate_V_pi(mdp.P, mdp.reward, policy, mdp.discount)

    assert np.allclose(np.round(V_pi, 1), np.array([3.3, 8.8, 4.4, 5.3, 1.5,
                                       1.5, 3.0, 2.3, 1.9, 0.5,
                                       0.1, 0.7, 0.7, 0.4, -0.4,
                                       -1.0, -0.4, -0.4, -0.6, -1.2,
                                       -1.9, -1.3, -1.2, -1.4, -2.0]))

예제 #4

파일 보기

파일: test_torch_analytic.py 프로젝트: d3sm0/emdp

def test_V_pi():
    """Check if computation works."""
    mdp = build_SB_example35()

    print(mdp.reward)
    # random policy:
    policy = np.ones((mdp.P.shape[0], mdp.P.shape[1]))/mdp.P.shape[1]
    policy = torch.from_numpy(policy).float()
    V_pi = calculate_V_pi(mdp.P, mdp.reward, policy, mdp.discount).detach().numpy()

    assert np.allclose(np.round(V_pi, 1), np.array([3.3, 8.8, 4.4, 5.3, 1.5,
                                       1.5, 3.0, 2.3, 1.9, 0.5,
                                       0.1, 0.7, 0.7, 0.4, -0.4,
                                       -1.0, -0.4, -0.4, -0.6, -1.2,
                                       -1.9, -1.3, -1.2, -1.4, -2.0]))

예제 #5

파일 보기

파일: test_examples.py 프로젝트: d3sm0/emdp

def test_SB_example35():

    mdp = build_SB_example35()

    mdp.set_current_state_to((0, 0))
    state, reward, done, _ = mdp.step(actions.UP)
    assert not done
    assert reward == -1
    assert mdp.unflatten_state(state) == (0, 0)

    state, reward, done, _ = mdp.step(actions.RIGHT)
    assert not done
    assert reward == 0
    assert mdp.unflatten_state(state) == (0, 1)

    state, reward, done, _ = mdp.step(actions.RIGHT)
    assert not done
    assert reward == +10
    assert mdp.unflatten_state(state) == (4, 1)

예제 #6

파일 보기

def test_gym_int_observation():
    env = emdp.emdp_gym.gymify(
        examples.build_SB_example35(),
        observation_one_hot=False)
    state = env.reset()
    assert type(state) == int

예제 #7

파일 보기

파일: wrapped_examples.py 프로젝트: zafarali/emdp

 def __init__(self):
     super().__init__(examples.build_SB_example35())