#     pos = env.arraystate2pos(state)
#     print(pos)
#     optimal_action = np.zeros(2)
#     action, optimal_action, a, b, theta = agent.test(env, nb_episodes=500000, visualize=False, nb_max_episode_steps=200, modif = True, pos = pos)
#     env.non_random_reset(pos[0], pos[1], pos[2])
#     env.render = True
#     env.step(action, rand = optimal_action, a = a, b = b, theta = theta)

# state = env.reset()
# pos = env.arraystate2pos(state)
# optimal_action = np.zeros(2)
# optimal_action[0], optimal_action[1], a, b, theta = agent.test(env, nb_episodes=500000, visualize=False, nb_max_episode_steps=200, modif = True, pos = pos)

# env.non_random_reset(pos[0], pos[1], pos[2])
# env.render = True
# env.step(optimal_action, a = a, b = b, theta = theta)
nb_test = 50
for i in range(nb_test):
    env.render = False
    state = env.reset()
    pos = env.arraystate2pos(state)
    optimal_action = np.zeros(2)
    action, optimal_action, a, b, theta = agent.test(env,
                                                     nb_episodes=500000,
                                                     visualize=False,
                                                     nb_max_episode_steps=200,
                                                     modif=True,
                                                     pos=pos)
    env.non_random_reset(pos[0], pos[1], pos[2])
    env.render = True
    env.step(action, rand=optimal_action, a=a, b=b, theta=theta)
Esempio n. 2
0
from carom import Carom
from Constants import *
import numpy as np
from vpython import sleep


def choose_branch(Q, state):
    chosen_branch = Q[state].argmax()
    return chosen_branch


tree_actions_index = np.load("tree_actions_index.npy")
Q = np.load("treeQmatrix.npy")

env = Carom(render=True)
env.reset()
actions = env.get_actions()
nb_states = 0
index2 = 0
state = 0
episode_reward = 0
nb_branches = len(tree_actions_index[0, 0])

for i in range(tree_actions_index.shape[0]):
    nb_states += nb_branches**i

for level in range(tree_actions_index.shape[0]):
    chosen_branch = choose_branch(Q, state)
    action_index = int(tree_actions_index[level, index2][chosen_branch])
    reward, coll_r = env.step3(actions[action_index][0],
                               actions[action_index][1],
#num_episodes = 2000
#lr = .8
#y = .95
env = Carom(render=False)
#tree_states_index = np.zeros((goal_points,nb_branches**(goal_points-1)))
tree_actions_index = np.zeros((goal_points, nb_branches**(goal_points - 1)),
                              dtype=myList)

#print(tree_states_index)
actions = env.get_actions()

states_list = [(pos_white.x, pos_white.y, pos_yellow.x, pos_yellow.y,
                pos_red.x, pos_red.y)]
#actions =[(0,0,0,20,4),(0,0,0,130,3),(0,0,0,1,8)]
#Q = np.zeros((1, len(actions)))
env.reset()
#env.step(0,0,0,90,5) #a, b, thetha, phi, Vb
#for i in range(num_episodes):
level = 0
state_index = -1
while total_points < sum_goal_points:
    level += 1
    for i in range(nb_branches**level):
        if (i % nb_branches == 0 or i == 0):
            state_index += 1
            pos_white = vector(states_list[state_index][0],
                               states_list[state_index][1], 0)
            pos_yellow = vector(states_list[state_index][2],
                                states_list[state_index][3], 0)
            pos_red = vector(states_list[state_index][4],
                             states_list[state_index][5], 0)