def search_for_one_solution(problem_id, map_name, plot_or_not):
    problem_id = problem_id
    reward_hole = 0.0
    is_stochastic = False
    if  map_name == '4x4-base':
        n_dim = 4
    else:
        n_dim = 8

    env = LochLomondEnv(problem_id = problem_id, is_stochastic = is_stochastic, reward_hole = reward_hole, map_name_base = map_name)
    env.reset()
    # Create a dict representation of the state space
    state_space_locations, state_space_actions, state_initial_id, state_goal_id = env2statespace(env)

    #--------------SOLUTION--------------#
    maze_map = UndirectedGraph(state_space_actions)
    maze_map.locations = state_space_locations
    maze_problem = GraphProblem(state_initial_id, state_goal_id, maze_map)

    iterations, _, node = my_astar_search_graph(problem=maze_problem, h=None)
    #-------------Trace the solution-----------------#
    solution_path = [node]
    cnode = node.parent
    solution_path.append(cnode)
    i = 0
    while cnode.state != state_initial_id:
        i += 1
        cnode = cnode.parent
        solution_path.append(cnode)

    solution = []
    solution_x = []
    solution_y = []
    for s in str(solution_path).split('_',-1):
        for s_s in str(s).split('>',-1):
            if s_s.isdigit():
                solution.append(s_s)
    for i in range(int(len(solution)/2)):
        solution_y.append(int(solution[i*2]))
        solution_x.append(int(solution[i*2+1]))

    print("Steps:",i)
    print("Goal state:"+str(solution_path[0]))
    print("Final Solution:",solution_path[::-1])
    print("----------------------------------------")
    env.close()

    plt.cla()
    plt.plot(solution_x[::-1], solution_y[::-1])
    plt.scatter(solution_x[::-1], solution_y[::-1],s=120)
    plt.xlim(0,n_dim-1)
    plt.ylim(n_dim-1,0)
    plt.grid(True)
    plt.title("Simple Agent Solution for Problem%s" % problem_id)
    plt.savefig('./Images/%sx%s maps: Simple Agent Solution for Problem%s.jpg' % (n_dim,n_dim,problem_id))
    print("Figure Saved in Folder 'Images'")
    if plot_or_not == True:
        plt.show()
Example #2
0
def train_for_one_problem(problem_id, map_name):
    problem_id = problem_id  # problem_id \in [0:7] generates 8 diffrent problems on which you can train/fine-tune your agent
    reward_hole = 0.0  # should be less than or equal to 0.0 (you can fine tune this depending on you RL agent choice)
    is_stochastic = False  # should be False for A-star (deterministic search) and True for the RL agent

    env = LochLomondEnv(problem_id=problem_id,
                        is_stochastic=is_stochastic,
                        reward_hole=reward_hole,
                        map_name_base=map_name)
    env.reset()

    done = False
    total_test_num = 60000
    restart_times = 0
    succeed_times = 0
    shortest_path = 100
    one_map_succeed_percentage = []

    for i in range(total_test_num):
        restart_times += 1
        done = False
        n_actions_for_episode = 0
        while not done:
            n_actions_for_episode += 1
            action = env.action_space.sample(
            )  # take random action from the available actions
            observation, reward, done, info = env.step(action)

            if done:
                print("\rProblem:%s Episodes #%s / 60000" %
                      (problem_id, restart_times),
                      end='')
                if reward == 1.0:
                    if shortest_path > n_actions_for_episode:
                        shortest_path = n_actions_for_episode
                    succeed_times += 1
                else:
                    env.reset()

    print("\nSucceed Times:", succeed_times)
    print("Total Times:", total_test_num)
    print("Shortest path:", shortest_path)

    one_map_succeed_percentage = float(succeed_times / 60000)
    return one_map_succeed_percentage
    env.close()