# pos = env.arraystate2pos(state) # print(pos) # optimal_action = np.zeros(2) # action, optimal_action, a, b, theta = agent.test(env, nb_episodes=500000, visualize=False, nb_max_episode_steps=200, modif = True, pos = pos) # env.non_random_reset(pos[0], pos[1], pos[2]) # env.render = True # env.step(action, rand = optimal_action, a = a, b = b, theta = theta) # state = env.reset() # pos = env.arraystate2pos(state) # optimal_action = np.zeros(2) # optimal_action[0], optimal_action[1], a, b, theta = agent.test(env, nb_episodes=500000, visualize=False, nb_max_episode_steps=200, modif = True, pos = pos) # env.non_random_reset(pos[0], pos[1], pos[2]) # env.render = True # env.step(optimal_action, a = a, b = b, theta = theta) nb_test = 50 for i in range(nb_test): env.render = False state = env.reset() pos = env.arraystate2pos(state) optimal_action = np.zeros(2) action, optimal_action, a, b, theta = agent.test(env, nb_episodes=500000, visualize=False, nb_max_episode_steps=200, modif=True, pos=pos) env.non_random_reset(pos[0], pos[1], pos[2]) env.render = True env.step(action, rand=optimal_action, a=a, b=b, theta=theta)
from carom import Carom from Constants import * import numpy as np from vpython import sleep def choose_branch(Q, state): chosen_branch = Q[state].argmax() return chosen_branch tree_actions_index = np.load("tree_actions_index.npy") Q = np.load("treeQmatrix.npy") env = Carom(render=True) env.reset() actions = env.get_actions() nb_states = 0 index2 = 0 state = 0 episode_reward = 0 nb_branches = len(tree_actions_index[0, 0]) for i in range(tree_actions_index.shape[0]): nb_states += nb_branches**i for level in range(tree_actions_index.shape[0]): chosen_branch = choose_branch(Q, state) action_index = int(tree_actions_index[level, index2][chosen_branch]) reward, coll_r = env.step3(actions[action_index][0], actions[action_index][1],
#num_episodes = 2000 #lr = .8 #y = .95 env = Carom(render=False) #tree_states_index = np.zeros((goal_points,nb_branches**(goal_points-1))) tree_actions_index = np.zeros((goal_points, nb_branches**(goal_points - 1)), dtype=myList) #print(tree_states_index) actions = env.get_actions() states_list = [(pos_white.x, pos_white.y, pos_yellow.x, pos_yellow.y, pos_red.x, pos_red.y)] #actions =[(0,0,0,20,4),(0,0,0,130,3),(0,0,0,1,8)] #Q = np.zeros((1, len(actions))) env.reset() #env.step(0,0,0,90,5) #a, b, thetha, phi, Vb #for i in range(num_episodes): level = 0 state_index = -1 while total_points < sum_goal_points: level += 1 for i in range(nb_branches**level): if (i % nb_branches == 0 or i == 0): state_index += 1 pos_white = vector(states_list[state_index][0], states_list[state_index][1], 0) pos_yellow = vector(states_list[state_index][2], states_list[state_index][3], 0) pos_red = vector(states_list[state_index][4], states_list[state_index][5], 0)