def search_for_one_solution(problem_id, map_name, plot_or_not): problem_id = problem_id reward_hole = 0.0 is_stochastic = False if map_name == '4x4-base': n_dim = 4 else: n_dim = 8 env = LochLomondEnv(problem_id = problem_id, is_stochastic = is_stochastic, reward_hole = reward_hole, map_name_base = map_name) env.reset() # Create a dict representation of the state space state_space_locations, state_space_actions, state_initial_id, state_goal_id = env2statespace(env) #--------------SOLUTION--------------# maze_map = UndirectedGraph(state_space_actions) maze_map.locations = state_space_locations maze_problem = GraphProblem(state_initial_id, state_goal_id, maze_map) iterations, _, node = my_astar_search_graph(problem=maze_problem, h=None) #-------------Trace the solution-----------------# solution_path = [node] cnode = node.parent solution_path.append(cnode) i = 0 while cnode.state != state_initial_id: i += 1 cnode = cnode.parent solution_path.append(cnode) solution = [] solution_x = [] solution_y = [] for s in str(solution_path).split('_',-1): for s_s in str(s).split('>',-1): if s_s.isdigit(): solution.append(s_s) for i in range(int(len(solution)/2)): solution_y.append(int(solution[i*2])) solution_x.append(int(solution[i*2+1])) print("Steps:",i) print("Goal state:"+str(solution_path[0])) print("Final Solution:",solution_path[::-1]) print("----------------------------------------") env.close() plt.cla() plt.plot(solution_x[::-1], solution_y[::-1]) plt.scatter(solution_x[::-1], solution_y[::-1],s=120) plt.xlim(0,n_dim-1) plt.ylim(n_dim-1,0) plt.grid(True) plt.title("Simple Agent Solution for Problem%s" % problem_id) plt.savefig('./Images/%sx%s maps: Simple Agent Solution for Problem%s.jpg' % (n_dim,n_dim,problem_id)) print("Figure Saved in Folder 'Images'") if plot_or_not == True: plt.show()
def train_for_one_problem(problem_id, map_name): problem_id = problem_id # problem_id \in [0:7] generates 8 diffrent problems on which you can train/fine-tune your agent reward_hole = 0.0 # should be less than or equal to 0.0 (you can fine tune this depending on you RL agent choice) is_stochastic = False # should be False for A-star (deterministic search) and True for the RL agent env = LochLomondEnv(problem_id=problem_id, is_stochastic=is_stochastic, reward_hole=reward_hole, map_name_base=map_name) env.reset() done = False total_test_num = 60000 restart_times = 0 succeed_times = 0 shortest_path = 100 one_map_succeed_percentage = [] for i in range(total_test_num): restart_times += 1 done = False n_actions_for_episode = 0 while not done: n_actions_for_episode += 1 action = env.action_space.sample( ) # take random action from the available actions observation, reward, done, info = env.step(action) if done: print("\rProblem:%s Episodes #%s / 60000" % (problem_id, restart_times), end='') if reward == 1.0: if shortest_path > n_actions_for_episode: shortest_path = n_actions_for_episode succeed_times += 1 else: env.reset() print("\nSucceed Times:", succeed_times) print("Total Times:", total_test_num) print("Shortest path:", shortest_path) one_map_succeed_percentage = float(succeed_times / 60000) return one_map_succeed_percentage env.close()