def select_optimal_path(q_table, enviroment, state_evaluate):
    global steps, steps_desc
    i, j = identifies_state(enviroment, enviromentsize)
    k, l = identifiesgoal_state(enviroment, enviromentsize)
    state = int(state_matrix[i][j])
    goal_state = int(state_matrix[k][l])
    states = []
    steps = []
    steps_desc = []
    states.append(state)
    reward, next_state = 0, 0
    done = False
    while (not done):
        print(state)
        action = select_optimal_action(state)
        reward, next_state = next_step(action, state, goal_state)
        state = next_state
        states.append(state)
        if reward == 10:
            done = True
    states = states[:-1]
    define_steps()
    print(q_table, '\n', '\n', states, '\n', steps, '\n', steps_desc, '\n',
          'path_length:', len(states))
    plot_q_with_steps(enviroment, states, enviromentsize, state_evaluate)
    steps_matrix.append(steps_desc)
コード例 #2
0
def select_optimal_path(q_table, enviroment):
    enviroment[0][0] = 1
    # reset enviroment to learn a new goal
    i, j = identifies_state(enviroment, enviromentsize)
    k, l = identifiesgoal_state(enviroment, enviromentsize)
    state = int(state_matrix[i][j])
    goal_state = int(state_matrix[k][l])
    states = []
    states.append(state)
    print(state, goal_state, '\n')
    print('\n', q_table, '\n', enviroment)
    old_state, reward, next_state = 0, 0, 0
    while(int(reward) < 20):
        action = select_optimal_action(q_table, state)
        reward, next_state = next_step(
            q_table, enviroment, action, state, old_state, goal_state)
        old_state = state
        state = next_state
        states.append(state)
        print(states)
        print(state, goal_state, '\n', enviroment)
    print(states, '\n', q_table)