def select_optimal_path(q_table, enviroment, state_evaluate): global steps, steps_desc i, j = identifies_state(enviroment, enviromentsize) k, l = identifiesgoal_state(enviroment, enviromentsize) state = int(state_matrix[i][j]) goal_state = int(state_matrix[k][l]) states = [] steps = [] steps_desc = [] states.append(state) reward, next_state = 0, 0 done = False while (not done): print(state) action = select_optimal_action(state) reward, next_state = next_step(action, state, goal_state) state = next_state states.append(state) if reward == 10: done = True states = states[:-1] define_steps() print(q_table, '\n', '\n', states, '\n', steps, '\n', steps_desc, '\n', 'path_length:', len(states)) plot_q_with_steps(enviroment, states, enviromentsize, state_evaluate) steps_matrix.append(steps_desc)
def select_optimal_path(q_table, enviroment): enviroment[0][0] = 1 # reset enviroment to learn a new goal i, j = identifies_state(enviroment, enviromentsize) k, l = identifiesgoal_state(enviroment, enviromentsize) state = int(state_matrix[i][j]) goal_state = int(state_matrix[k][l]) states = [] states.append(state) print(state, goal_state, '\n') print('\n', q_table, '\n', enviroment) old_state, reward, next_state = 0, 0, 0 while(int(reward) < 20): action = select_optimal_action(q_table, state) reward, next_state = next_step( q_table, enviroment, action, state, old_state, goal_state) old_state = state state = next_state states.append(state) print(states) print(state, goal_state, '\n', enviroment) print(states, '\n', q_table)