Esempi in Python per GridWorld.close

Linguaggio di programmazione: Python

Spazio dei nomi/nome del pacchetto: grid_world

Classe/tipologia: GridWorld

Metodo/funzione: close

Esempi su hotexamples.com: 2

GridWorld.close in Python: 2 esempi trovati. Questi sono i migliori esempi reali in Python per grid_world.GridWorld.close, estratti da progetti open source. Li puoi valutare, per aiutarci a migliorare la qualità dei nostri esempi.

Metodi utilizzati di frequente

Mostra Nascondi

GridWorld(30)

step(8)

reset(6)

action_space(4)

giveReward(3)

current_state(3)

nxtPosition(3)

set_state(2)

print_policy(2)

observation_space(2)

move(2)

make_maps(2)

isEndFunc(2)

_feature_map(2)

close(2)

get_path(1)

painful_game(1)

simulate(1)

show_policy(1)

show_grid(1)

build_tower(1)

reward(1)

reset_state(1)

coordToScalar(1)

one_hot(1)

get_reward_matrix(1)

create_walls(1)

default_game(1)

display(1)

game_over(1)

makeMove(1)

T(1)

get_neighbors(1)

get_transition_matrix(1)

get_states(1)

visualize_heuristic(1)

Esempio n. 1

Mostra file

File: test_grid_world.py Progetto: r06921017/hri_costmap

def test_gridworld_q_learning():
    np.random.seed(0)

    N = 5
    goal_pos = np.array([[N-1, N-1]])
    human_pos = np.array([[N-1, 0]])
    human_radius = 2

    grid = np.ones((N, N), dtype=float) * -1
    grid = construct_goal_reward(grid, goal_pos, 10)
    grid = construct_human_radius_reward(grid, human_pos, human_radius, -10)

    env = GridWorld(
        dimensions=(N, N),
        init_pos=(0, 0),
        goal_pos=goal_pos,
        reward_grid=grid,
        human_pos=human_pos,
        action_success_rate=0.8,
        render=True,
    )

    mdp_algo = q_learning(env.transition, env.reward, gamma=0.99)
    mdp_algo.run()
    policy = StochasticGreedyPolicy(
        env.action_space(), mdp_algo, env.transition)

    # plot results
    R = env.reward.reshape((N, N)).T
    V = np.asarray(mdp_algo.V).reshape((N, N)).T

    plot_grid_map(R, "Reward", cmap=plt.cm.Reds)
    plot_grid_map(V, "Value Function", cmap=plt.cm.Blues)
    plt.show()

    obs, rew, done, info = env.reset()
    while not done:
        act = policy.get_action(obs)
        obs, rew, done, info = env.step(act)
        time.sleep(0.2)

    env.close()

Esempio n. 2

Mostra file

File: test_grid_world.py Progetto: r06921017/hri_costmap

def test_gridworld_value_iteration():
    np.random.seed(0)

    N = 10
    goal_pos = np.array([[N-1, N-1], [N-1, N-2]])
    human_pos = np.array([[N//2, N//2], [N-1, 0]])
    human_radius = 3

    grid = np.zeros((N, N), dtype=float)
    grid = construct_goal_reward(grid, goal_pos, 10)
    grid = construct_human_radius_reward(grid, human_pos, human_radius, -10)

    env = GridWorld(
        dimensions=(N, N),
        init_pos=(0, 0),
        goal_pos=goal_pos,
        reward_grid=grid,
        human_pos=human_pos,
        action_success_rate=1,
        render=True,
    )

    mdp_algo = value_iteration(env.transition, env.reward, gamma=0.99)
    policy = EpsGreedyPolicy(env.action_space(), mdp_algo)

    # plot results
    R = env.reward.reshape((N, N)).T
    V = np.asarray(mdp_algo.V).reshape((N, N)).T

    plot_grid_map(R, "Reward", cmap=plt.cm.Reds)
    plot_grid_map(V, "Value Function", cmap=plt.cm.Blues)
    plot_policy(policy, (N, N), "Policy", values=V, cmap=plt.cm.Blues)
    plt.show()

    obs, rew, done, info = env.reset()
    while not done:
        act = policy.get_action(obs)
        obs, rew, done, info = env.step(act)
        time.sleep(0.2)

    env.close()