def main(arglist): # input_file = arglist[0] # output_file = arglist[1] input_file = "testcases/t2_brickyard.txt" output_file = "testcases/output.txt" # Read the input testcase file game_map = LaserTankMap.process_input_file(input_file) actions = [] start = Node(game_map) end = Goal(game_map) solution = (astar(start, end)) print(solution) for i in solution: actions.append(i) # # # Code for your main method can go here. # # Your code should find a sequence of actions for the agent to follow to reach the goal, and store this sequence # in 'actions'. # # # Write the solution to the output file write_output_file(output_file, actions)
def main(arglist): input_file = arglist[0] output_file = arglist[1] t_start = time.time() # Read the input testcase file game_map = LaserTankMap.process_input_file(input_file) actions = [] # # # Code for your main method can go here. # # Your code should find a sequence of actions for the agent to follow to reach the goal, and store this sequence # in 'actions'. # # for i in range(len(game_map.grid_data)): if "F" in game_map.grid_data[i]: y_flag = i x_flag = game_map.grid_data[i].index("F") actions = astar_search(game_map, x_flag, y_flag) # Write the solution to the output file write_output_file(output_file, actions) t_elapsed = time.time() - t_start print(t_elapsed)
def main(arglist): input_file = arglist[0] output_file = arglist[1] # Read the input testcase file game_map = LaserTankMap.process_input_file(input_file) actions = [] # closedList = set() # myTup = (tuple(map(tuple, game_map.grid_data))), game_map.player_x, game_map.player_y, game_map.player_heading # anotherTup = (tuple(map(tuple, game_map.grid_data))), game_map.player_x + 1, game_map.player_y + 1, game_map.player_heading # # closedList.add(myTup) # closedList.add(anotherTup) # # hi = (tuple(map(tuple, game_map.grid_data))), game_map.player_x - 1, game_map.player_y + 1, game_map.player_heading # # print(hi in closedList) node = Node(game_map) actions = uniform_cost_search(node) # Write the solution to the output file write_output_file(output_file, actions)
def main(arglist): input_file = arglist[0] output_file = arglist[1] # Read the input 'testcase' file game_map = LaserTankMap.process_input_file(input_file) # # # Code for your main method can go here. # # Your code should find a sequence of actions for the agent to follow to reach the goal, and store this sequence # in 'actions'. # # # Find the flag position to be used for the manhattan heuristic function flag_pos = (0, 0) for i in range(game_map.x_size): for j in range(game_map.y_size): if game_map.grid_data[j][i] == game_map.FLAG_SYMBOL: flag_pos = (i, j) # Wrap the initial instance and flag enter = LaserTankState(game_map, 0, flag_pos) actions = search_astar(enter) # actions = search_bfs(enter) actions = ucs(enter) # Write the solution to the output file write_output_file(output_file, actions)
def main(arglist): input_file = arglist[0] output_file = arglist[1] # Read the input testcase file game_map = LaserTankMap.process_input_file(input_file) # Extract goal for coord_x in range(game_map.x_size): for coord_y in range(game_map.y_size): if game_map.grid_data[coord_y][coord_x] == 'F': global goal_coord_x global goal_coord_y goal_coord_x = coord_x goal_coord_y = coord_y actions = [] actionset = ['W', 'D', 'A', 'S'] # Initialise Starting State start_state = PlayerTank(game_map.grid_data, 0, game_map.player_x, game_map.player_y, 'W', [], game_map.x_size, game_map.y_size, game_map.player_heading) #Start the Fringe/Frontier fringe = queue.PriorityQueue() fringe.put(start_state) #Keep track of all states explored hash_explored = {start_state.id: [start_state]} while not fringe.empty(): current = fringe.get() # When goal is reached if current.goal_reached(): actions = current.path break for action in actionset: neighbor = current.action_move(action) # Proceed if no collision or game over. # Add to visited and fringe if not previously if neighbor != 0: if (neighbor.id not in hash_explored): hash_explored[neighbor.id] = [neighbor] fringe.put(neighbor) elif (neighbor not in hash_explored[neighbor.id]): hash_explored[neighbor.id].append(neighbor) fringe.put(neighbor) # Write the solution to the output file write_output_file(output_file, actions)
def main(arglist): """ Visualise the path of the given output file applied to the given map file :param arglist: map file name, output file name """ if len(arglist) != 2: print( "Running this file visualises the path of the given output file applied to the given map file." ) print("Usage: path_visualiser.py [map_file_name] [output_file_name]") return map_file = arglist[0] soln_file = arglist[1] optimal_steps = get_optimal_number_of_steps(map_file) game_map = LaserTankMap.process_input_file(map_file) game_map.render() f = open(soln_file, 'r') moves = f.readline().strip().split(',') # apply each move in sequence error_occurred = False for i in range(len(moves)): move = moves[i] ret = game_map.apply_move(move) game_map.render() if ret == LaserTankMap.COLLISION: print("ERROR: Move resulting in Collision performed at step " + str(i)) error_occurred = True elif ret == LaserTankMap.GAME_OVER: print("ERROR: Move resulting in Game Over performed at step " + str(i)) error_occurred = True time.sleep(0.5) if error_occurred: return -1 if game_map.is_finished(): print("Puzzle solved.") if len(moves) == optimal_steps: print("Solution is optimal (" + str(len(moves)) + " steps)!") return 0 else: print("Solution is " + str(len(moves) - optimal_steps) + " steps longer than optimal.") return len(moves) - optimal_steps else: print("ERROR: Goal not reached after all actions performed.") return -1
def main(arglist): input_file = arglist[0] output_file = arglist[1] # Read the input testcase file game_map = LaserTankMap.process_input_file(input_file) actions = [] coord = (game_map.player_y, game_map.player_x) for y in range(game_map.y_size): for x in range(game_map.x_size): if game_map.grid_data[y][x] == game_map.FLAG_SYMBOL: goal_coord = (y, x) # print(coord) # print(goal_coord) game_map.coord = coord state = State(game_map, 0, []) # UCS # result = transition(state, "ucs", goal_coord) # A* result = transition(state, "a*", goal_coord) # A* with heuristic of teleport # result = transition(state, "a*-teleport", goal_coord) # A* with heuristic of ice # result = transition(state, "a*-ice", goal_coord) print("Nodes Generated:", result[1]) print("Nodes on Fringe:", result[2]) print("Explored Nodes:", result[3]) print("Time Taken:", result[4], "seconds") output_string = ','.join(result[0]) # moves # print(outputString) actions.append(output_string) # Write the solution to the output file write_output_file(output_file, actions)
def main(arglist): input_file = arglist[0] output_file = arglist[1] #input_file = "testcases/t3_labyrinth.txt" # Read the input testcase file game_map = LaserTankMap.process_input_file(input_file) # show game map #game_map.render() actions = [] # get the coordinate of goal for i in range(game_map.x_size): for j in range(game_map.y_size): if game_map.grid_data[j][i] == "F": goal_x = i goal_y = j # count the start time start_time = time.time() # 4 actions actionset = ["f", "r", "l", "s"] # record the start node start = Node(lasertank=game_map, cost=0, path="") # test the estimate cost #estimate_cost = start.estimate_cost(goal_x,goal_y) #print(estimate_cost) #print(start.lasertank.player_x,start.lasertank.player_y) # the set of explored id = 0 explored = {id:(start.lasertank.player_x,start.lasertank.player_y,start.lasertank.player_heading,start.lasertank.grid_data)} #map_explored = {id: start.lasertank.grid_data} # set the frontier queue heapq.heappush(actions,start) while len(actions) > 0: #heapq.heapify(actions) current_node = heapq.heappop(actions) # check if arrive the goal if current_node.lasertank.is_finished(): end_time = time.time() run_time = end_time - start_time print("Find the Solution successfully!") actions = current_node.path print("The total cost is :", current_node.total_cost) print("The path is: " + str(actions)) print("The Steps are: ", len(actions)) print("The time is: " + str(run_time)) break # add the current node to explored id += 1 explored[id] = (current_node.lasertank.player_x,current_node.lasertank.player_y,current_node.lasertank.player_heading,current_node.lasertank.grid_data) #map_explored[id] = (current_node.lasertank.grid_data) # serach for children for move in actionset: node_copy = current_node.deep_copy() status = node_copy.lasertank.apply_move(move) child_path = node_copy.path + move child_cost = node_copy.cost + 1 if status == 0: # SUCCESS # check node if existed in explored if (node_copy.lasertank.player_x,node_copy.lasertank.player_y,node_copy.lasertank.player_heading,node_copy.lasertank.grid_data) in explored.values(): continue else: # add in queue """if (node_copy.lasertank.grid_data) not in map_explored.values(): child_cost -= 1""" node_copy.path = child_path node_copy.cost = child_cost total_cost = node_copy.cost + node_copy.estimate_cost(goal_x,goal_y) node_copy.total_cost = total_cost heapq.heappush(actions,node_copy) # check the frontier queue #print(node_copy.path) # Write the solution to the output file write_output_file(output_file, actions)
def main(arglist): """ Visualise the policy your code produces for the given map file. :param arglist: [map_file_name, mode] """ if len(arglist) != 1: print( "Running this file visualises the path your code produces for the given map file. " "Set mode to be 'value', 'policy' or 'episode'. MCTS can only be used in 'episode' mode." ) print("Usage: policy_visualiser.py [map_file_name] [mode]") return input_file = arglist[0] game_map = LaserTankMap.process_input_file(input_file) simulator = game_map.make_clone() solver = Solver() mark = 0 # do offline computation if game_map.method == 'q-learning': if not WINDOWS and not DEBUG_MODE: signal.signal(signal.SIGALRM, timeout_handler) signal.alarm(game_map.time_limit + 1) try: solver.train_q_learning(simulator) except TimeOutException: print("/!\\ Ran overtime during train_q_learning( )") sys.exit(OVERTIME) except: traceback.print_exc() print("/!\\ Crash occurred during train_q_learning( )") sys.exit(CRASH) if not WINDOWS and not DEBUG_MODE: signal.alarm(0) elif game_map.method == 'sarsa': if not WINDOWS and not DEBUG_MODE: signal.signal(signal.SIGALRM, timeout_handler) signal.alarm(game_map.time_limit + 1) try: solver.train_sarsa(simulator) except TimeOutException: print("/!\\ Ran overtime during train_sarsa( )") sys.exit(OVERTIME) except: traceback.print_exc() print("/!\\ Crash occurred during train_sarsa( )") sys.exit(CRASH) if not WINDOWS and not DEBUG_MODE: signal.alarm(0) # simulate an episode (using de-randomised transitions) and compare total reward to benchmark total_reward = 0 max_steps = 60 state = game_map.make_clone() for i in range(max_steps): if not WINDOWS and not DEBUG_MODE: signal.signal(signal.SIGALRM, timeout_handler) signal.alarm(1) try: action = solver.get_policy(state) except TimeOutException: print("/!\\ Ran overtime during get_policy( )") sys.exit(mark) except: traceback.print_exc() print("/!\\ get_policy( ) caused crash during evaluation") sys.exit(mark) if not WINDOWS and not DEBUG_MODE: signal.alarm(0) r, f = state.apply_move(action) state.render() total_reward += r if f: break time.sleep(1) # compute score based on how close episode reward is to optimum print( f"Avg Episode Reward = {str(total_reward)}, Benchmark = {str(game_map.benchmark)}" ) diff = game_map.benchmark - total_reward # amount by which benchmark score is better if diff < 0: diff = 0 if diff > 20: diff = 20 below = math.ceil(diff / 2) mark = 10 - below if below == 0: print("Testcase passed, policy matches or exceeds benchmark") elif mark > 0: print( f"Testcase passed, {below} marks below solution quality benchmark") sys.exit(mark)
def main(arglist): """ Test whether the given output file is a valid solution to the given map file. This test script uses a 'trapdoor function' approach to comparing your computed values and policy to a reference solution without revealing the reference solution - 3 different results are computed based on your values and policy and compared to the results computed for the reference solution. :param arglist: [map file name] """ if len(arglist) != 1: print( "Running this file tests whether your code produces an optimal policy for the given map file." ) print("Usage: tester.py [map file name]") return input_file = arglist[0] game_map = LaserTankMap.process_input_file(input_file) solver = Solver(game_map) mark = 0 # do offline computation if game_map.method == 'vi': if not WINDOWS and not DEBUG_MODE: signal.signal(signal.SIGALRM, timeout_handler) signal.alarm(game_map.time_limit + 1) try: solver.run_value_iteration() except TimeOutException: print("/!\\ Ran overtime during run_value_iteration( )") sys.exit(OVERTIME) except: traceback.print_exc() print("/!\\ Crash occurred during run_value_iteration( )") sys.exit(CRASH) if not WINDOWS and not DEBUG_MODE: signal.alarm(0) elif game_map.method == 'pi': if not WINDOWS and not DEBUG_MODE: signal.signal(signal.SIGALRM, timeout_handler) signal.alarm(game_map.time_limit + 1) try: solver.run_policy_iteration() except TimeOutException: print("/!\\ Ran overtime during run_policy_iteration( )") sys.exit(OVERTIME) except: traceback.print_exc() print("/!\\ Crash occurred during run_policy_iteration( )") sys.exit(CRASH) if not WINDOWS and not DEBUG_MODE: signal.alarm(0) # simulate an episode (using de-randomised transitions) and compare total reward to benchmark total_reward = 0 state = game_map.make_clone() seed = game_map.initial_seed for i in range(int((game_map.benchmark / game_map.move_cost) * 2)): new_seed = seed + 1 if not WINDOWS and not DEBUG_MODE: signal.signal(signal.SIGALRM, timeout_handler) if game_map.method == 'mcts': signal.alarm(game_map.time_limit + 1) else: signal.alarm(1) try: if game_map.method == 'mcts': action = solver.get_mcts_policy(state) else: action = solver.get_offline_policy(state) # except TimeOutException: # if game_map.method == 'mcts': # print("/!\\ Ran overtime during get_mcts_policy( )") # else: # print("/!\\ Ran overtime during get_offline_policy( )") # sys.exit(mark) except: traceback.print_exc() if game_map.method == 'mcts': print("/!\\ get_mcts_policy( ) caused crash during evaluation") else: print( "/!\\ get_offline_policy( ) caused crash during evaluation" ) sys.exit(mark) r = state.apply_move(action, new_seed) total_reward += r if r == game_map.goal_reward or r == game_map.game_over_cost: break seed = new_seed # compute score based on how close episode reward is to optimum print( f"Episode Reward = {str(total_reward)}, Benchmark = {str(game_map.benchmark)}" ) mark = 10 below = 0 for i in range(1, 11): if total_reward > (game_map.benchmark * (1 + (i / 20))): break else: mark -= 1 below += 1 if below == 0: print("Testcase passed, policy optimum") elif mark > 0: print(f"Testcase passed, {below} points below optimum") sys.exit(mark)
def main(arglist): """ Visualise the policy your code produces for the given map file. :param arglist: [map_file_name, mode] """ if len(arglist) != 1: print( "Running this file visualises the path your code produces for the given map file. " ) print("Usage: policy_visualiser.py [map_file_name]") return input_file = arglist[0] game_map = LaserTankMap.process_input_file(input_file) solver = Solver(game_map) mark = 0 # do offline computation if game_map.method == 'vi': if not WINDOWS: signal.signal(signal.SIGALRM, timeout_handler) signal.alarm(game_map.time_limit + 1) try: solver.run_value_iteration() except TimeOutException: print("/!\\ Ran overtime during run_value_iteration( )") sys.exit(mark) except: traceback.print_exc() print("/!\\ Crash occurred during run_value_iteration( )") sys.exit(mark) if not WINDOWS: signal.alarm(0) elif game_map.method == 'pi': if not WINDOWS: signal.signal(signal.SIGALRM, timeout_handler) signal.alarm(game_map.time_limit + 1) try: solver.run_policy_iteration() except TimeOutException: print("/!\\ Ran overtime during run_policy_iteration( )") sys.exit(mark) except: traceback.print_exc() print("/!\\ Crash occurred during run_policy_iteration( )") sys.exit(mark) if not WINDOWS: signal.alarm(0) # simulate an episode (using de-randomised transitions) and compare total reward to benchmark total_reward = 0 state = game_map.make_clone() state.render() seed = hash(input_file) # use file name as RNG seed for i in range(100): new_seed = seed + 1 if not WINDOWS: signal.signal(signal.SIGALRM, timeout_handler) if game_map.method == 'mcts': signal.alarm(game_map.time_limit + 1) else: signal.alarm(1) try: if game_map.method == 'mcts': action = solver.get_mcts_policy(state) else: action = solver.get_offline_policy(state) except TimeOutException: if game_map.method == 'mcts': print("/!\\ Ran overtime during get_mcts_policy( )") else: print("/!\\ Ran overtime during get_offline_policy( )") sys.exit(mark) except: traceback.print_exc() if game_map.method == 'mcts': print("/!\\ get_mcts_policy( ) caused crash during evaluation") else: print( "/!\\ get_offline_policy( ) caused crash during evaluation" ) sys.exit(mark) if not WINDOWS and not DEBUG_MODE: signal.alarm(0) r = state.apply_move(action, new_seed) state.render() total_reward += r if r == game_map.goal_reward or r == game_map.game_over_cost: break seed = new_seed time.sleep(0.5)
def main(): # input_file = arglist[0] # output_file = arglist[1] input_file = "testcases/t1_bridgeport.txt" # Read the input testcase file game_map = LaserTankMap.process_input_file(input_file) # show game map #game_map.render() actions = [] # get the coordinate of goal for i in range(game_map.x_size): for j in range(game_map.y_size): if game_map.grid_data[j][i] == "F": goal_x = i goal_y = j #========================================== # get the teleport pos = 0 teleportx = {pos: 0} teleporty = {pos: 0} exist_tele = 0 for i in range(game_map.x_size): for j in range(game_map.y_size): if game_map.grid_data[j][i] == "T": teleportx[pos] = i teleporty[pos] = j exist_tele = 1 pos += 1 # assignment test variable #number_node_create = 0 #number_node_fringe = 0 # =========================================== # count the start time start_time = time.time() # 4 actions actionset = ["f", "r", "l", "s"] # record the start node start = Node(lasertank=game_map, cost=0, path="") # test the estimate cost #estimate_cost = start.estimate_cost(goal_x,goal_y) #print(estimate_cost) #print(start.lasertank.player_x,start.lasertank.player_y) # the set of explored id = 0 explored = { id: (start.lasertank.player_x, start.lasertank.player_y, start.lasertank.player_heading, start.lasertank.grid_data) } #map_explored = {id: start.lasertank.grid_data} # set the frontier queue heapq.heappush(actions, start) while len(actions) > 0: #heapq.heapify(actions) current_node = heapq.heappop(actions) # check if arrive the goal if current_node.lasertank.is_finished(): end_time = time.time() run_time = end_time - start_time print("Find the Solution successfully!") print("the map is: ", input_file) # =========================================== # print("the number of Node generated: ", number_node_create) # print("the number of Node in fringe: ",len(actions)) # print("the number of Node on explored: ", id) # =========================================== actions = current_node.path print("The path is: " + str(actions)) print("The Steps are: ", len(actions)) print("The time is: " + str(run_time)) break # add the current node to explored id += 1 explored[id] = (current_node.lasertank.player_x, current_node.lasertank.player_y, current_node.lasertank.player_heading, current_node.lasertank.grid_data) #map_explored[id] = (current_node.lasertank.grid_data) # serach for children for move in actionset: node_copy = current_node.deep_copy() status = node_copy.lasertank.apply_move(move) child_path = node_copy.path + move child_cost = node_copy.cost + 1 if status == 0: # SUCCESS # check node if existed in explored if (node_copy.lasertank.player_x, node_copy.lasertank.player_y, node_copy.lasertank.player_heading, node_copy.lasertank.grid_data) in explored.values(): continue else: # add in queue """if (node_copy.lasertank.grid_data) not in map_explored.values(): child_cost -= 1""" node_copy.path = child_path node_copy.cost = child_cost total_cost = node_copy.cost + node_copy.estimate_cost( goal_x, goal_y) if exist_tele == 1: total_cost = min( total_cost, (distance(node_copy.lasertank.player_x, node_copy.lasertank.player_y, teleportx[0], teleporty[0]) + distance(teleportx[1], teleporty[1], goal_x, goal_y)), (distance(node_copy.lasertank.player_x, node_copy.lasertank.player_y, teleportx[1], teleporty[1]) + distance(teleportx[0], teleporty[0], goal_x, goal_y))) node_copy.total_cost = total_cost heapq.heappush(actions, node_copy)
# else: # if (state.player_x, state.player_y - 1) != "#": # return 'f' # else: # return 'r' # return self.policy[current_state] # pass if __name__ == '__main__': input_file = "testcases/vi_t1.txt" method = "pi" game_map = LaserTankMap.process_input_file(input_file) solver = Solver(game_map) if method == "vi": solver.run_value_iteration() elif method == "pi": solver.run_policy_iteration() # simulate an episode (using de-randomised transitions) and compare total reward to benchmark total_reward = 0 state = game_map.make_clone() seed = game_map.initial_seed for i in range(int((game_map.benchmark / game_map.move_cost) * 2)): new_seed = seed + 1 action = solver.get_offline_policy(state) r = state.apply_move(action, new_seed) total_reward += r if r == game_map.goal_reward or r == game_map.game_over_cost:
:param state: a LaserTankMap instance :return: pi(s) [an element of LaserTankMap.MOVES] """ # # TODO # Write code to return the optimal action to be performed at this state based on the stored Q-values. # # You can assume that either train_q_learning( ) or train_sarsa( ) has been called before this # method is called. # # When this method is called, you are allowed up to 1 second of compute time. # pass if __name__ == "__main__": solver = Solver() map_dir = "testcases/q-learn_t1.txt" test_map = LaserTankMap.process_input_file(map_dir) simulator = test_map.make_clone() mark = 0 # do offline computation if test_map.method == 'q-learning': solver.train_q_learning(simulator) print("=========== END =============")
def main(arglist): """ Test whether the given output file is a valid solution to the given map file. This test script uses a 'trapdoor function' approach to comparing your computed values and policy to a reference solution without revealing the reference solution - 3 different results are computed based on your values and policy and compared to the results computed for the reference solution. :param arglist: [map file name] """ if len(arglist) != 1: print( "Running this file tests whether your code produces an approximately optimal policy for the given map " "file.") print("Usage: tester.py [map file name]") return input_file = arglist[0] game_map = LaserTankMap.process_input_file(input_file) simulator = game_map.make_clone() solver = Solver() mark = 0 # do offline computation if game_map.method == 'q-learning': if not WINDOWS and not DEBUG_MODE: signal.signal(signal.SIGALRM, timeout_handler) signal.alarm(game_map.time_limit + 1) try: solver.train_q_learning(simulator) except TimeOutException: print("/!\\ Ran overtime during train_q_learning( )") sys.exit(OVERTIME) except: traceback.print_exc() print("/!\\ Crash occurred during train_q_learning( )") sys.exit(CRASH) if not WINDOWS and not DEBUG_MODE: signal.alarm(0) elif game_map.method == 'sarsa': if not WINDOWS and not DEBUG_MODE: signal.signal(signal.SIGALRM, timeout_handler) signal.alarm(game_map.time_limit + 1) try: solver.train_sarsa(simulator) except TimeOutException: print("/!\\ Ran overtime during train_sarsa( )") sys.exit(OVERTIME) except: traceback.print_exc() print("/!\\ Crash occurred during train_sarsa( )") sys.exit(CRASH) if not WINDOWS and not DEBUG_MODE: signal.alarm(0) # simulate an episode (using de-randomised transitions) and compare total reward to benchmark total_reward = 0 num_trials = 50 max_steps = 60 for _ in range(num_trials): state = game_map.make_clone() for i in range(max_steps): if not WINDOWS and not DEBUG_MODE: signal.signal(signal.SIGALRM, timeout_handler) signal.alarm(1) try: action = solver.get_policy(state) except TimeOutException: print("/!\\ Ran overtime during get_policy( )") sys.exit(mark) except: traceback.print_exc() print("/!\\ get_policy( ) caused crash during evaluation") sys.exit(mark) if not WINDOWS and not DEBUG_MODE: signal.alarm(0) r, f = state.apply_move(action) total_reward += r if f: break total_reward /= num_trials # compute score based on how close episode reward is to optimum print( f"Avg Episode Reward = {str(total_reward)}, Benchmark = {str(game_map.benchmark)}" ) diff = game_map.benchmark - total_reward # amount by which benchmark score is better if diff < 0: diff = 0 if diff > 20: diff = 20 below = math.ceil(diff / 2) mark = 10 - below if below == 0: print("Testcase passed, policy matches or exceeds benchmark") elif mark > 0: print( f"Testcase passed, {below} marks below solution quality benchmark") sys.exit(mark)
def main(): # input_file = arglist[0] # output_file = arglist[1] input_file = "testcases/t2_shortcut.txt" # Read the input testcase file game_map = LaserTankMap.process_input_file(input_file) # show game map #game_map.render() actions = [] # get the coordinate of goal """for i in range(game_map.x_size): for j in range(game_map.y_size): if game_map.grid_data[j][i] == "F": goal_x = i goal_y = j""" # assignment test variable #number_node_create = 0 # count the start time start_time = time.time() # 4 actions actionset = ["s", "f", "r", "l"] # record the start node start = Node(lasertank=game_map, cost=0, path="") # test the estimate cost #estimate_cost = start.estimate_cost(goal_x,goal_y) #print(estimate_cost) #print(start.lasertank.player_x,start.lasertank.player_y) # the set of explored id = 0 explored = { id: (start.lasertank.player_x, start.lasertank.player_y, start.lasertank.player_heading, start.lasertank.grid_data) } # set the frontier queue heapq.heappush(actions, start) while len(actions) > 0: #heapq.heapify(actions) current_node = heapq.heappop(actions) # check if arrive the goal if current_node.lasertank.is_finished(): end_time = time.time() run_time = end_time - start_time print("Find the Solution successfully!") print("the map is: ", input_file) #print("the number of Node generated: ", number_node_create) #print("the number of Node in fringe: ", len(actions)) print("the number of Node on explored: ", id) actions = current_node.path print("The path is: " + str(actions)) print("The number of steps is: ", len(actions)) print("The run time is: " + str(run_time)) exit() # add the current node to explored id += 1 explored[id] = (current_node.lasertank.player_x, current_node.lasertank.player_y, current_node.lasertank.player_heading, current_node.lasertank.grid_data) # serach for children for move in actionset: node_copy = current_node.deep_copy() status = node_copy.lasertank.apply_move(move) child_path = node_copy.path + move child_cost = node_copy.cost + 1 if status == 0: # SUCCESS # check node if existed in explored if (node_copy.lasertank.player_x, node_copy.lasertank.player_y, node_copy.lasertank.player_heading, node_copy.lasertank.grid_data) in explored.values(): continue else: # add in queue node_copy.path = child_path node_copy.cost = child_cost #total_cost = node_copy.cost + node_copy.estimate_cost(goal_x,goal_y) #node_copy.total_cost = total_cost heapq.heappush(actions, node_copy)
def main(arglist): """ Test whether the given output file is a valid solution to the given map file. This test script uses a 'trapdoor function' approach to comparing your computed values and policy to a reference solution without revealing the reference solution - 3 different results are computed based on your values and policy and compared to the results computed for the reference solution. :param arglist: [map file name] """ input_file = arglist[0] input_file_1 = arglist[1] game_map = LaserTankMap.process_input_file(input_file) game_map_1 = LaserTankMap.process_input_file(input_file_1) simulator = game_map.make_clone() simulator_1 = game_map_1.make_clone() solver = Solver(0.01) solver_1 = Solver(0.01) if game_map.method == 'q-learning': solver.train_q_learning(simulator) total_reward = 0 num_trials = 50 max_steps = 60 for _ in range(num_trials): state = game_map.make_clone() for i in range(max_steps): action = solver.get_policy(state) r, f = state.apply_move(action) total_reward += r if f: break total_reward /= num_trials # compute score based on how close episode reward is to optimum print( f"Avg Episode Reward = {str(total_reward)}, Benchmark = {str(game_map.benchmark)}" ) diff = game_map.benchmark - total_reward # amount by which benchmark score is better if diff < 0: diff = 0 if diff > 20: diff = 20 below = math.ceil(diff / 2) mark = 10 - below if below == 0: print("Testcase passed, policy matches or exceeds benchmark") elif mark > 0: print( f"Testcase passed, {below} marks below solution quality benchmark") Aveg_0 = solver.get_list() if game_map_1.method == 'sarsa': solver_1.train_sarsa(simulator_1) total_reward = 0 num_trials = 50 max_steps = 60 for _ in range(num_trials): state = game_map_1.make_clone() for i in range(max_steps): action = solver_1.get_policy(state) r, f = state.apply_move(action) total_reward += r if f: break total_reward /= num_trials # compute score based on how close episode reward is to optimum print( f"Avg Episode Reward = {str(total_reward)}, Benchmark = {str(game_map_1.benchmark)}" ) diff = game_map_1.benchmark - total_reward # amount by which benchmark score is better if diff < 0: diff = 0 if diff > 20: diff = 20 below = math.ceil(diff / 2) mark = 10 - below if below == 0: print("Testcase passed, policy matches or exceeds benchmark") elif mark > 0: print( f"Testcase passed, {below} marks below solution quality benchmark") Aveg_1 = solver_1.get_list() x = range(len(Aveg_0)) x_1 = range(len(Aveg_1)) plt.plot(x, Aveg_0, '--r', label='q_learning') plt.plot(x_1, Aveg_1, '-b', label='sarsa') plt.xlabel('eqisode') plt.ylabel('Average Reward') plt.title( 'learned policy against iteration number under \n Q-learning and SARSA with lr rate 0.01' ) plt.legend() plt.savefig('q4.png') plt.show()