#     pos = env.arraystate2pos(state)
#     print(pos)
#     optimal_action = np.zeros(2)
#     action, optimal_action, a, b, theta = agent.test(env, nb_episodes=500000, visualize=False, nb_max_episode_steps=200, modif = True, pos = pos)
#     env.non_random_reset(pos[0], pos[1], pos[2])
#     env.render = True
#     env.step(action, rand = optimal_action, a = a, b = b, theta = theta)

# state = env.reset()
# pos = env.arraystate2pos(state)
# optimal_action = np.zeros(2)
# optimal_action[0], optimal_action[1], a, b, theta = agent.test(env, nb_episodes=500000, visualize=False, nb_max_episode_steps=200, modif = True, pos = pos)

# env.non_random_reset(pos[0], pos[1], pos[2])
# env.render = True
# env.step(optimal_action, a = a, b = b, theta = theta)
nb_test = 50
for i in range(nb_test):
    env.render = False
    state = env.reset()
    pos = env.arraystate2pos(state)
    optimal_action = np.zeros(2)
    action, optimal_action, a, b, theta = agent.test(env,
                                                     nb_episodes=500000,
                                                     visualize=False,
                                                     nb_max_episode_steps=200,
                                                     modif=True,
                                                     pos=pos)
    env.non_random_reset(pos[0], pos[1], pos[2])
    env.render = True
    env.step(action, rand=optimal_action, a=a, b=b, theta=theta)
コード例 #2
0
import numpy as np
from carom import Carom
import gym
from vpython import *

nb_rows = 1000
nb_good_steps = 0
env = Carom(render=True)
np.random.seed(32)
env.seed(32)
demo_table = np.load('demoTable.npy')

for i in range(10):
    index = np.random.choice(1000)
    env.state = demo_table[index][0]
    pos_white = vector(env.state[0], env.state[1], 0)
    pos_yellow = vector(env.state[2], env.state[3], 0)
    pos_red = vector(env.state[4], env.state[5], 0)
    env.non_random_reset(pos_white, pos_yellow, pos_red)
    action = demo_table[index][1]
    env.step(action)