# pos = env.arraystate2pos(state) # print(pos) # optimal_action = np.zeros(2) # action, optimal_action, a, b, theta = agent.test(env, nb_episodes=500000, visualize=False, nb_max_episode_steps=200, modif = True, pos = pos) # env.non_random_reset(pos[0], pos[1], pos[2]) # env.render = True # env.step(action, rand = optimal_action, a = a, b = b, theta = theta) # state = env.reset() # pos = env.arraystate2pos(state) # optimal_action = np.zeros(2) # optimal_action[0], optimal_action[1], a, b, theta = agent.test(env, nb_episodes=500000, visualize=False, nb_max_episode_steps=200, modif = True, pos = pos) # env.non_random_reset(pos[0], pos[1], pos[2]) # env.render = True # env.step(optimal_action, a = a, b = b, theta = theta) nb_test = 50 for i in range(nb_test): env.render = False state = env.reset() pos = env.arraystate2pos(state) optimal_action = np.zeros(2) action, optimal_action, a, b, theta = agent.test(env, nb_episodes=500000, visualize=False, nb_max_episode_steps=200, modif=True, pos=pos) env.non_random_reset(pos[0], pos[1], pos[2]) env.render = True env.step(action, rand=optimal_action, a=a, b=b, theta=theta)
import numpy as np from carom import Carom import gym from vpython import * nb_rows = 1000 nb_good_steps = 0 env = Carom(render=True) np.random.seed(32) env.seed(32) demo_table = np.load('demoTable.npy') for i in range(10): index = np.random.choice(1000) env.state = demo_table[index][0] pos_white = vector(env.state[0], env.state[1], 0) pos_yellow = vector(env.state[2], env.state[3], 0) pos_red = vector(env.state[4], env.state[5], 0) env.non_random_reset(pos_white, pos_yellow, pos_red) action = demo_table[index][1] env.step(action)