Esempio n. 1
0

tree_actions_index = np.load("tree_actions_index.npy")
Q = np.load("treeQmatrix.npy")

env = Carom(render=True)
env.reset()
actions = env.get_actions()
nb_states = 0
index2 = 0
state = 0
episode_reward = 0
nb_branches = len(tree_actions_index[0, 0])

for i in range(tree_actions_index.shape[0]):
    nb_states += nb_branches**i

for level in range(tree_actions_index.shape[0]):
    chosen_branch = choose_branch(Q, state)
    action_index = int(tree_actions_index[level, index2][chosen_branch])
    reward, coll_r = env.step3(actions[action_index][0],
                               actions[action_index][1],
                               actions[action_index][2],
                               actions[action_index][3],
                               actions[action_index][4])
    episode_reward += reward
    index2 = index2 * nb_branches + chosen_branch
    nextState = state * nb_branches + chosen_branch + 1
    state = nextState

print("Total reward: %.3f" % (episode_reward))
Esempio n. 2
0
theta = 5
phi = 85
V = 5
actions = env.get_fixed_actions()
nb_shots = 5
shots = []

for i in range(nb_shots):
    coll_reward = 0
    while coll_reward == 0:
        #env.reset(pos_white, pos_yellow, pos_red)
        env.reset()
        action_index = int(np.random.choice(len(actions)))
        action_reward, coll_reward = env.step3(actions[action_index][0],
                                               actions[action_index][1],
                                               actions[action_index][2],
                                               actions[action_index][3],
                                               actions[action_index][4])
        if coll_reward == 1:
            if action_index in shots:
                coll_reward = 0
    shots.append(action_index)

env.render = True
for i in range(nb_shots):
    #env.reset(pos_white, pos_yellow, pos_red)
    env.reset()
    env.step3(actions[shots[i]][0], actions[shots[i]][1], actions[shots[i]][2],
              actions[shots[i]][3], actions[shots[i]][4] - 1)
# env.reset(pos_white, pos_yellow, pos_red)