tree_actions_index = np.load("tree_actions_index.npy") Q = np.load("treeQmatrix.npy") env = Carom(render=True) env.reset() actions = env.get_actions() nb_states = 0 index2 = 0 state = 0 episode_reward = 0 nb_branches = len(tree_actions_index[0, 0]) for i in range(tree_actions_index.shape[0]): nb_states += nb_branches**i for level in range(tree_actions_index.shape[0]): chosen_branch = choose_branch(Q, state) action_index = int(tree_actions_index[level, index2][chosen_branch]) reward, coll_r = env.step3(actions[action_index][0], actions[action_index][1], actions[action_index][2], actions[action_index][3], actions[action_index][4]) episode_reward += reward index2 = index2 * nb_branches + chosen_branch nextState = state * nb_branches + chosen_branch + 1 state = nextState print("Total reward: %.3f" % (episode_reward))
theta = 5 phi = 85 V = 5 actions = env.get_fixed_actions() nb_shots = 5 shots = [] for i in range(nb_shots): coll_reward = 0 while coll_reward == 0: #env.reset(pos_white, pos_yellow, pos_red) env.reset() action_index = int(np.random.choice(len(actions))) action_reward, coll_reward = env.step3(actions[action_index][0], actions[action_index][1], actions[action_index][2], actions[action_index][3], actions[action_index][4]) if coll_reward == 1: if action_index in shots: coll_reward = 0 shots.append(action_index) env.render = True for i in range(nb_shots): #env.reset(pos_white, pos_yellow, pos_red) env.reset() env.step3(actions[shots[i]][0], actions[shots[i]][1], actions[shots[i]][2], actions[shots[i]][3], actions[shots[i]][4] - 1) # env.reset(pos_white, pos_yellow, pos_red)