def goal_reached(self): new_grid = [row[:] for row in self.grid] new_player = LaserTankMap(self.x_size, self.y_size, new_grid, self.coord_x, self.coord_y, self.player_heading) if new_player.is_finished(): return True return False
def train_q_learning(self, simulator: LaserTankMap): print('q-learning') """ Train the agent using Q-learning, building up a table of Q-values. :param simulator: A simulator for collecting episode data (LaserTankMap instance) """ # Q(s, a) table # suggested format: key = hash(state), value = dict(mapping actions to values) q_values = {} # # TODO # Write your Q-Learning implementation here. # # When this method is called, you are allowed up to [state.time_limit] seconds of compute time. You should # continue training until the time limit is reached. # start = time.time() reward_list = [] episode_reward = [] while time.time() - start < simulator.time_limit: s = simulator.__hash__() a = self.choose_action(simulator, q_values) if s not in q_values: q_values[s] = {} q_s = q_values[s] if a in q_s: old_q = q_s[a] else: old_q = .0 r, episode_finished = simulator.apply_move(a) reward_list.append(r) next_s = simulator.__hash__() if next_s not in q_values: q_values[next_s] = {} next_s_q = {} for action in simulator.MOVES: # print(action) next_s_q[action] = .0 if action in q_values[next_s]: next_s_q[action] = q_values[next_s][action] best_next_q = next_s_q[dict_argmax(next_s_q)] # update q_values(s,a,r,old_q,best_next_q) td = r + (simulator.gamma * best_next_q) - old_q q_values[s][a] = old_q + (self.learning_rate * td) if episode_finished: episode_reward.append(sum(reward_list)) reward_list = [] simulator.reset_to_start() df = pd.DataFrame(episode_reward) # df.to_csv('episode.csv', index=False) # store the computed Q-values self.q_values = q_values
def deep_copy(self): new_tank = LaserTankMap(self.lasertank.x_size, self.lasertank.y_size, self.lasertank.grid_data, self.lasertank.player_x, self.lasertank.player_y, self.lasertank.player_heading) copy_grid = [] for row in self.lasertank.grid_data: copy_grid.append(row.copy()) new_tank.grid_data = copy_grid copy = Node(new_tank, self.cost, self.path) return copy
def get_neighborlist(self): # Logic retrieved from tutor code: https://gist.github.com/tttor/826be15b99bb4b33a50787d7eb7b5fda neighborlist = [] for action in self.moves: data = [x[:] for x in self.lasertank.grid_data] temp = LaserTankMap(self.lasertank.x_size, self.lasertank.y_size, data, self.lasertank.player_x, self.lasertank.player_y, self.lasertank.player_heading) temp.apply_move(action) neighbor = LaserTankState(temp, 1, self.flag_pos) neighborlist.append((neighbor, action)) return neighborlist
def main(arglist): input_file = arglist[0] output_file = arglist[1] t_start = time.time() # Read the input testcase file game_map = LaserTankMap.process_input_file(input_file) actions = [] # # # Code for your main method can go here. # # Your code should find a sequence of actions for the agent to follow to reach the goal, and store this sequence # in 'actions'. # # for i in range(len(game_map.grid_data)): if "F" in game_map.grid_data[i]: y_flag = i x_flag = game_map.grid_data[i].index("F") actions = astar_search(game_map, x_flag, y_flag) # Write the solution to the output file write_output_file(output_file, actions) t_elapsed = time.time() - t_start print(t_elapsed)
def main(arglist): # input_file = arglist[0] # output_file = arglist[1] input_file = "testcases/t2_brickyard.txt" output_file = "testcases/output.txt" # Read the input testcase file game_map = LaserTankMap.process_input_file(input_file) actions = [] start = Node(game_map) end = Goal(game_map) solution = (astar(start, end)) print(solution) for i in solution: actions.append(i) # # # Code for your main method can go here. # # Your code should find a sequence of actions for the agent to follow to reach the goal, and store this sequence # in 'actions'. # # # Write the solution to the output file write_output_file(output_file, actions)
def main(arglist): input_file = arglist[0] output_file = arglist[1] # Read the input testcase file game_map = LaserTankMap.process_input_file(input_file) actions = [] # closedList = set() # myTup = (tuple(map(tuple, game_map.grid_data))), game_map.player_x, game_map.player_y, game_map.player_heading # anotherTup = (tuple(map(tuple, game_map.grid_data))), game_map.player_x + 1, game_map.player_y + 1, game_map.player_heading # # closedList.add(myTup) # closedList.add(anotherTup) # # hi = (tuple(map(tuple, game_map.grid_data))), game_map.player_x - 1, game_map.player_y + 1, game_map.player_heading # # print(hi in closedList) node = Node(game_map) actions = uniform_cost_search(node) # Write the solution to the output file write_output_file(output_file, actions)
def main(arglist): input_file = arglist[0] output_file = arglist[1] # Read the input 'testcase' file game_map = LaserTankMap.process_input_file(input_file) # # # Code for your main method can go here. # # Your code should find a sequence of actions for the agent to follow to reach the goal, and store this sequence # in 'actions'. # # # Find the flag position to be used for the manhattan heuristic function flag_pos = (0, 0) for i in range(game_map.x_size): for j in range(game_map.y_size): if game_map.grid_data[j][i] == game_map.FLAG_SYMBOL: flag_pos = (i, j) # Wrap the initial instance and flag enter = LaserTankState(game_map, 0, flag_pos) actions = search_astar(enter) # actions = search_bfs(enter) actions = ucs(enter) # Write the solution to the output file write_output_file(output_file, actions)
def __init__(self, game_map): self.game_map = game_map self.lasertank = LaserTankMap(self.game_map) self.t_success_prob = self.lasertank.t_success_prob self.t_error_prob = self.lasertank.t_error_prob self.converged = False # # TODO # Write any environment preprocessing code you need here (e.g. storing teleport locations). self.state = list((x, y, z) for x in range(1, self.lasertank.x_size - 1) for y in range(1, self.lasertank.y_size - 1) for z in DIRECTIONS) # self.state.append(EXIT_STATE) self.reward = {state: 0 for state in self.state} for i in range(1, self.lasertank.x_size - 1): for j in range(1, self.lasertank.y_size - 1): if self.lasertank.grid_data[j][i] == "#": REWARDS[(i, j)] = self.lasertank.collision_cost # self.state.remove((i, j)) elif self.lasertank.grid_data[j][i] == "W": REWARDS[(i, j)] = self.lasertank.game_over_cost # self.state.remove((i, j)) elif self.lasertank.grid_data[j][i] == "F": REWARDS[(i, j)] = self.lasertank.goal_reward for k in range(4): self.state.append((EXIT_STATE[0], EXIT_STATE[1], k)) self.value = {state: 0 for state in self.state} self.policy = {state: 'f' for state in self.state}
def create_copy(self): """ This function copy's the game board """ new_map = [row[:] for row in self.grid_data] new_state = LaserTankMap(x_size=self.x_size, y_size=self.y_size, grid_data=new_map, player_x=self.player_x, player_y=self.player_y, player_heading=self.player_heading) return new_state
def get_successor(self): next_states = [] for move in self.moves: new_data = [row[:] for row in self.game_map.grid_data] new_map = LaserTankMap(self.game_map.x_size, self.game_map.y_size, new_data, player_x=self.game_map.player_x, player_y=self.game_map.player_y, player_heading=self.game_map.player_heading) # new_state = deepcopy(self.get_map()) new_parents = [row[:] for row in self.parents] # new_parents = deepcopy(self.parents) if new_map.apply_move(move) == LaserTankMap.SUCCESS: new_parents.append(move) nextState = State(new_map, 1, new_parents) next_states.append((nextState, move)) return next_states
def main(arglist): input_file = arglist[0] output_file = arglist[1] # Read the input testcase file game_map = LaserTankMap.process_input_file(input_file) # Extract goal for coord_x in range(game_map.x_size): for coord_y in range(game_map.y_size): if game_map.grid_data[coord_y][coord_x] == 'F': global goal_coord_x global goal_coord_y goal_coord_x = coord_x goal_coord_y = coord_y actions = [] actionset = ['W', 'D', 'A', 'S'] # Initialise Starting State start_state = PlayerTank(game_map.grid_data, 0, game_map.player_x, game_map.player_y, 'W', [], game_map.x_size, game_map.y_size, game_map.player_heading) #Start the Fringe/Frontier fringe = queue.PriorityQueue() fringe.put(start_state) #Keep track of all states explored hash_explored = {start_state.id: [start_state]} while not fringe.empty(): current = fringe.get() # When goal is reached if current.goal_reached(): actions = current.path break for action in actionset: neighbor = current.action_move(action) # Proceed if no collision or game over. # Add to visited and fringe if not previously if neighbor != 0: if (neighbor.id not in hash_explored): hash_explored[neighbor.id] = [neighbor] fringe.put(neighbor) elif (neighbor not in hash_explored[neighbor.id]): hash_explored[neighbor.id].append(neighbor) fringe.put(neighbor) # Write the solution to the output file write_output_file(output_file, actions)
def main(arglist): """ Visualise the path of the given output file applied to the given map file :param arglist: map file name, output file name """ if len(arglist) != 2: print( "Running this file visualises the path of the given output file applied to the given map file." ) print("Usage: path_visualiser.py [map_file_name] [output_file_name]") return map_file = arglist[0] soln_file = arglist[1] optimal_steps = get_optimal_number_of_steps(map_file) game_map = LaserTankMap.process_input_file(map_file) game_map.render() f = open(soln_file, 'r') moves = f.readline().strip().split(',') # apply each move in sequence error_occurred = False for i in range(len(moves)): move = moves[i] ret = game_map.apply_move(move) game_map.render() if ret == LaserTankMap.COLLISION: print("ERROR: Move resulting in Collision performed at step " + str(i)) error_occurred = True elif ret == LaserTankMap.GAME_OVER: print("ERROR: Move resulting in Game Over performed at step " + str(i)) error_occurred = True time.sleep(0.5) if error_occurred: return -1 if game_map.is_finished(): print("Puzzle solved.") if len(moves) == optimal_steps: print("Solution is optimal (" + str(len(moves)) + " steps)!") return 0 else: print("Solution is " + str(len(moves) - optimal_steps) + " steps longer than optimal.") return len(moves) - optimal_steps else: print("ERROR: Goal not reached after all actions performed.") return -1
def main(arglist): input_file = arglist[0] output_file = arglist[1] # Read the input testcase file game_map = LaserTankMap.process_input_file(input_file) actions = [] coord = (game_map.player_y, game_map.player_x) for y in range(game_map.y_size): for x in range(game_map.x_size): if game_map.grid_data[y][x] == game_map.FLAG_SYMBOL: goal_coord = (y, x) # print(coord) # print(goal_coord) game_map.coord = coord state = State(game_map, 0, []) # UCS # result = transition(state, "ucs", goal_coord) # A* result = transition(state, "a*", goal_coord) # A* with heuristic of teleport # result = transition(state, "a*-teleport", goal_coord) # A* with heuristic of ice # result = transition(state, "a*-ice", goal_coord) print("Nodes Generated:", result[1]) print("Nodes on Fringe:", result[2]) print("Explored Nodes:", result[3]) print("Time Taken:", result[4], "seconds") output_string = ','.join(result[0]) # moves # print(outputString) actions.append(output_string) # Write the solution to the output file write_output_file(output_file, actions)
def action_move(self, action): new_grid = [row[:] for row in self.grid] new_player = LaserTankMap(self.x_size, self.y_size, new_grid, self.coord_x, self.coord_y, self.player_heading) # Move Forward if action == 'W': result = new_player.apply_move('f') path_to_take = 'f' # Turn Clockwise elif action == 'D': result = new_player.apply_move('r') path_to_take = 'r' # Turn Counter-Clockwise elif action == 'A': result = new_player.apply_move('l') path_to_take = 'l' # Shoot Laser elif action == 'S': result = new_player.apply_move('s') path_to_take = 's' else: print("No/Worng Action Input") if result == 0: new_state = PlayerTank(new_player.grid_data, self.cost + 1, new_player.player_x, new_player.player_y, action, self.path + [path_to_take], self.x_size, self.y_size, new_player.player_heading) elif result == 1: new_state = 0 elif result == 2: new_state = 0 return new_state
def main(arglist): """ Test whether the given output file is a valid solution to the given map file. This test script uses a 'trapdoor function' approach to comparing your computed values and policy to a reference solution without revealing the reference solution - 3 different results are computed based on your values and policy and compared to the results computed for the reference solution. :param arglist: [map file name] """ input_file = arglist[0] input_file_1 = arglist[1] game_map = LaserTankMap.process_input_file(input_file) game_map_1 = LaserTankMap.process_input_file(input_file_1) simulator = game_map.make_clone() simulator_1 = game_map_1.make_clone() solver = Solver(0.01) solver_1 = Solver(0.01) if game_map.method == 'q-learning': solver.train_q_learning(simulator) total_reward = 0 num_trials = 50 max_steps = 60 for _ in range(num_trials): state = game_map.make_clone() for i in range(max_steps): action = solver.get_policy(state) r, f = state.apply_move(action) total_reward += r if f: break total_reward /= num_trials # compute score based on how close episode reward is to optimum print( f"Avg Episode Reward = {str(total_reward)}, Benchmark = {str(game_map.benchmark)}" ) diff = game_map.benchmark - total_reward # amount by which benchmark score is better if diff < 0: diff = 0 if diff > 20: diff = 20 below = math.ceil(diff / 2) mark = 10 - below if below == 0: print("Testcase passed, policy matches or exceeds benchmark") elif mark > 0: print( f"Testcase passed, {below} marks below solution quality benchmark") Aveg_0 = solver.get_list() if game_map_1.method == 'sarsa': solver_1.train_sarsa(simulator_1) total_reward = 0 num_trials = 50 max_steps = 60 for _ in range(num_trials): state = game_map_1.make_clone() for i in range(max_steps): action = solver_1.get_policy(state) r, f = state.apply_move(action) total_reward += r if f: break total_reward /= num_trials # compute score based on how close episode reward is to optimum print( f"Avg Episode Reward = {str(total_reward)}, Benchmark = {str(game_map_1.benchmark)}" ) diff = game_map_1.benchmark - total_reward # amount by which benchmark score is better if diff < 0: diff = 0 if diff > 20: diff = 20 below = math.ceil(diff / 2) mark = 10 - below if below == 0: print("Testcase passed, policy matches or exceeds benchmark") elif mark > 0: print( f"Testcase passed, {below} marks below solution quality benchmark") Aveg_1 = solver_1.get_list() x = range(len(Aveg_0)) x_1 = range(len(Aveg_1)) plt.plot(x, Aveg_0, '--r', label='q_learning') plt.plot(x_1, Aveg_1, '-b', label='sarsa') plt.xlabel('eqisode') plt.ylabel('Average Reward') plt.title( 'learned policy against iteration number under \n Q-learning and SARSA with lr rate 0.01' ) plt.legend() plt.savefig('q4.png') plt.show()
def main(arglist): """ Test whether the given output file is a valid solution to the given map file. This test script uses a 'trapdoor function' approach to comparing your computed values and policy to a reference solution without revealing the reference solution - 3 different results are computed based on your values and policy and compared to the results computed for the reference solution. :param arglist: [map file name] """ if len(arglist) != 1: print( "Running this file tests whether your code produces an approximately optimal policy for the given map " "file.") print("Usage: tester.py [map file name]") return input_file = arglist[0] game_map = LaserTankMap.process_input_file(input_file) simulator = game_map.make_clone() solver = Solver() mark = 0 # do offline computation if game_map.method == 'q-learning': if not WINDOWS and not DEBUG_MODE: signal.signal(signal.SIGALRM, timeout_handler) signal.alarm(game_map.time_limit + 1) try: solver.train_q_learning(simulator) except TimeOutException: print("/!\\ Ran overtime during train_q_learning( )") sys.exit(OVERTIME) except: traceback.print_exc() print("/!\\ Crash occurred during train_q_learning( )") sys.exit(CRASH) if not WINDOWS and not DEBUG_MODE: signal.alarm(0) elif game_map.method == 'sarsa': if not WINDOWS and not DEBUG_MODE: signal.signal(signal.SIGALRM, timeout_handler) signal.alarm(game_map.time_limit + 1) try: solver.train_sarsa(simulator) except TimeOutException: print("/!\\ Ran overtime during train_sarsa( )") sys.exit(OVERTIME) except: traceback.print_exc() print("/!\\ Crash occurred during train_sarsa( )") sys.exit(CRASH) if not WINDOWS and not DEBUG_MODE: signal.alarm(0) # simulate an episode (using de-randomised transitions) and compare total reward to benchmark total_reward = 0 num_trials = 50 max_steps = 60 for _ in range(num_trials): state = game_map.make_clone() for i in range(max_steps): if not WINDOWS and not DEBUG_MODE: signal.signal(signal.SIGALRM, timeout_handler) signal.alarm(1) try: action = solver.get_policy(state) except TimeOutException: print("/!\\ Ran overtime during get_policy( )") sys.exit(mark) except: traceback.print_exc() print("/!\\ get_policy( ) caused crash during evaluation") sys.exit(mark) if not WINDOWS and not DEBUG_MODE: signal.alarm(0) r, f = state.apply_move(action) total_reward += r if f: break total_reward /= num_trials # compute score based on how close episode reward is to optimum print( f"Avg Episode Reward = {str(total_reward)}, Benchmark = {str(game_map.benchmark)}" ) diff = game_map.benchmark - total_reward # amount by which benchmark score is better if diff < 0: diff = 0 if diff > 20: diff = 20 below = math.ceil(diff / 2) mark = 10 - below if below == 0: print("Testcase passed, policy matches or exceeds benchmark") elif mark > 0: print( f"Testcase passed, {below} marks below solution quality benchmark") sys.exit(mark)
def main(arglist): input_file = arglist[0] output_file = arglist[1] #input_file = "testcases/t3_labyrinth.txt" # Read the input testcase file game_map = LaserTankMap.process_input_file(input_file) # show game map #game_map.render() actions = [] # get the coordinate of goal for i in range(game_map.x_size): for j in range(game_map.y_size): if game_map.grid_data[j][i] == "F": goal_x = i goal_y = j # count the start time start_time = time.time() # 4 actions actionset = ["f", "r", "l", "s"] # record the start node start = Node(lasertank=game_map, cost=0, path="") # test the estimate cost #estimate_cost = start.estimate_cost(goal_x,goal_y) #print(estimate_cost) #print(start.lasertank.player_x,start.lasertank.player_y) # the set of explored id = 0 explored = {id:(start.lasertank.player_x,start.lasertank.player_y,start.lasertank.player_heading,start.lasertank.grid_data)} #map_explored = {id: start.lasertank.grid_data} # set the frontier queue heapq.heappush(actions,start) while len(actions) > 0: #heapq.heapify(actions) current_node = heapq.heappop(actions) # check if arrive the goal if current_node.lasertank.is_finished(): end_time = time.time() run_time = end_time - start_time print("Find the Solution successfully!") actions = current_node.path print("The total cost is :", current_node.total_cost) print("The path is: " + str(actions)) print("The Steps are: ", len(actions)) print("The time is: " + str(run_time)) break # add the current node to explored id += 1 explored[id] = (current_node.lasertank.player_x,current_node.lasertank.player_y,current_node.lasertank.player_heading,current_node.lasertank.grid_data) #map_explored[id] = (current_node.lasertank.grid_data) # serach for children for move in actionset: node_copy = current_node.deep_copy() status = node_copy.lasertank.apply_move(move) child_path = node_copy.path + move child_cost = node_copy.cost + 1 if status == 0: # SUCCESS # check node if existed in explored if (node_copy.lasertank.player_x,node_copy.lasertank.player_y,node_copy.lasertank.player_heading,node_copy.lasertank.grid_data) in explored.values(): continue else: # add in queue """if (node_copy.lasertank.grid_data) not in map_explored.values(): child_cost -= 1""" node_copy.path = child_path node_copy.cost = child_cost total_cost = node_copy.cost + node_copy.estimate_cost(goal_x,goal_y) node_copy.total_cost = total_cost heapq.heappush(actions,node_copy) # check the frontier queue #print(node_copy.path) # Write the solution to the output file write_output_file(output_file, actions)
def new_apply_move_1(player_x, player_y, move, r, t_success_prob, x_size, y_size, collision_cost, gird_data, game_map, game_over_cost): if (player_x, player_y) in REWARDS: # s = REWARDS return REWARDS[(player_x, player_y)], EXIT_STATE[0], EXIT_STATE[1] if (player_x, player_y) == EXIT_STATE: return 0, player_x, player_y t_error_prob = 1 - t_success_prob if move == UP: if r < t_success_prob: next_y = player_y - 1 next_x = player_x elif r < t_success_prob + (t_error_prob * (1 / 5)): next_y = player_y - 1 next_x = player_x - 1 elif r < t_success_prob + (t_error_prob * (2 / 5)): next_y = player_y - 1 next_x = player_x + 1 elif r < t_success_prob + (t_error_prob * (3 / 5)): next_y = player_y next_x = player_x - 1 elif r < t_success_prob + (t_error_prob * (4 / 5)): next_y = player_y next_x = player_x + 1 else: next_y = player_y next_x = player_x if next_y < 1 or next_x < 1 or next_x >= x_size - 1: return collision_cost, player_x, player_y elif move == DOWN: if r < t_success_prob: next_y = player_y + 1 next_x = player_x elif r < t_success_prob + (t_error_prob * (1 / 5)): next_y = player_y + 1 next_x = player_x - 1 elif r < t_success_prob + (t_error_prob * (2 / 5)): next_y = player_y - 1 next_x = player_x + 1 elif r < t_success_prob + (t_error_prob * (3 / 5)): next_y = player_y next_x = player_x - 1 elif r < t_success_prob + (t_error_prob * (4 / 5)): next_y = player_y next_x = player_x + 1 else: next_y = player_y next_x = player_x if next_y >= y_size - 1 or next_x > 1 or next_x <= x_size - 1: return collision_cost, player_x, player_y elif move == LEFT: if r < t_success_prob: next_y = player_y next_x = player_x - 1 elif r < t_success_prob + (t_error_prob * (1 / 5)): next_y = player_y - 1 next_x = player_x - 1 elif r < t_success_prob + (t_error_prob * (2 / 5)): next_y = player_y + 1 next_x = player_x - 1 elif r < t_success_prob + (t_error_prob * (3 / 5)): next_y = player_y - 1 next_x = player_x elif r < t_success_prob + (t_error_prob * (4 / 5)): next_y = player_y + 1 next_x = player_x else: next_y = player_y next_x = player_x if next_x < 1 or next_y < 1 or next_y >= y_size - 1: return collision_cost, player_x, player_y else: if r < t_success_prob: next_y = player_y next_x = player_x + 1 elif r < t_success_prob + (t_error_prob * (1 / 5)): next_y = player_y - 1 next_x = player_x + 1 elif r < t_success_prob + (t_error_prob * (2 / 5)): next_y = player_y + 1 next_x = player_x + 1 elif r < t_success_prob + (t_error_prob * (3 / 5)): next_y = player_y - 1 next_x = player_x elif r < t_success_prob + (t_error_prob * (4 / 5)): next_y = player_y + 1 next_x = player_x else: next_y = player_y next_x = player_x if next_x >= x_size - 1 or next_y < 1 or next_y >= y_size - 1: return collision_cost, player_x, player_y if LaserTankMap.cell_is_blocked(game_map, next_y, next_x): return collision_cost, player_x, player_y # check for game over conditions if LaserTankMap.cell_is_game_over(game_map, next_y, next_x): # game_over_cost return game_over_cost, player_x, player_y if gird_data[next_y][next_x] == LaserTankMap.FLAG_SYMBOL: return 0, next_x, next_y # goal reward else: return -1, next_x, next_y # move cost
def main(arglist): """ Visualise the policy your code produces for the given map file. :param arglist: [map_file_name, mode] """ if len(arglist) != 1: print( "Running this file visualises the path your code produces for the given map file. " "Set mode to be 'value', 'policy' or 'episode'. MCTS can only be used in 'episode' mode." ) print("Usage: policy_visualiser.py [map_file_name] [mode]") return input_file = arglist[0] game_map = LaserTankMap.process_input_file(input_file) simulator = game_map.make_clone() solver = Solver() mark = 0 # do offline computation if game_map.method == 'q-learning': if not WINDOWS and not DEBUG_MODE: signal.signal(signal.SIGALRM, timeout_handler) signal.alarm(game_map.time_limit + 1) try: solver.train_q_learning(simulator) except TimeOutException: print("/!\\ Ran overtime during train_q_learning( )") sys.exit(OVERTIME) except: traceback.print_exc() print("/!\\ Crash occurred during train_q_learning( )") sys.exit(CRASH) if not WINDOWS and not DEBUG_MODE: signal.alarm(0) elif game_map.method == 'sarsa': if not WINDOWS and not DEBUG_MODE: signal.signal(signal.SIGALRM, timeout_handler) signal.alarm(game_map.time_limit + 1) try: solver.train_sarsa(simulator) except TimeOutException: print("/!\\ Ran overtime during train_sarsa( )") sys.exit(OVERTIME) except: traceback.print_exc() print("/!\\ Crash occurred during train_sarsa( )") sys.exit(CRASH) if not WINDOWS and not DEBUG_MODE: signal.alarm(0) # simulate an episode (using de-randomised transitions) and compare total reward to benchmark total_reward = 0 max_steps = 60 state = game_map.make_clone() for i in range(max_steps): if not WINDOWS and not DEBUG_MODE: signal.signal(signal.SIGALRM, timeout_handler) signal.alarm(1) try: action = solver.get_policy(state) except TimeOutException: print("/!\\ Ran overtime during get_policy( )") sys.exit(mark) except: traceback.print_exc() print("/!\\ get_policy( ) caused crash during evaluation") sys.exit(mark) if not WINDOWS and not DEBUG_MODE: signal.alarm(0) r, f = state.apply_move(action) state.render() total_reward += r if f: break time.sleep(1) # compute score based on how close episode reward is to optimum print( f"Avg Episode Reward = {str(total_reward)}, Benchmark = {str(game_map.benchmark)}" ) diff = game_map.benchmark - total_reward # amount by which benchmark score is better if diff < 0: diff = 0 if diff > 20: diff = 20 below = math.ceil(diff / 2) mark = 10 - below if below == 0: print("Testcase passed, policy matches or exceeds benchmark") elif mark > 0: print( f"Testcase passed, {below} marks below solution quality benchmark") sys.exit(mark)
:param state: a LaserTankMap instance :return: pi(s) [an element of LaserTankMap.MOVES] """ # # TODO # Write code to return the optimal action to be performed at this state based on the stored Q-values. # # You can assume that either train_q_learning( ) or train_sarsa( ) has been called before this # method is called. # # When this method is called, you are allowed up to 1 second of compute time. # pass if __name__ == "__main__": solver = Solver() map_dir = "testcases/q-learn_t1.txt" test_map = LaserTankMap.process_input_file(map_dir) simulator = test_map.make_clone() mark = 0 # do offline computation if test_map.method == 'q-learning': solver.train_q_learning(simulator) print("=========== END =============")
def main(arglist): """ Test whether the given output file is a valid solution to the given map file. This test script uses a 'trapdoor function' approach to comparing your computed values and policy to a reference solution without revealing the reference solution - 3 different results are computed based on your values and policy and compared to the results computed for the reference solution. :param arglist: [map file name] """ if len(arglist) != 1: print( "Running this file tests whether your code produces an optimal policy for the given map file." ) print("Usage: tester.py [map file name]") return input_file = arglist[0] game_map = LaserTankMap.process_input_file(input_file) solver = Solver(game_map) mark = 0 # do offline computation if game_map.method == 'vi': if not WINDOWS and not DEBUG_MODE: signal.signal(signal.SIGALRM, timeout_handler) signal.alarm(game_map.time_limit + 1) try: solver.run_value_iteration() except TimeOutException: print("/!\\ Ran overtime during run_value_iteration( )") sys.exit(OVERTIME) except: traceback.print_exc() print("/!\\ Crash occurred during run_value_iteration( )") sys.exit(CRASH) if not WINDOWS and not DEBUG_MODE: signal.alarm(0) elif game_map.method == 'pi': if not WINDOWS and not DEBUG_MODE: signal.signal(signal.SIGALRM, timeout_handler) signal.alarm(game_map.time_limit + 1) try: solver.run_policy_iteration() except TimeOutException: print("/!\\ Ran overtime during run_policy_iteration( )") sys.exit(OVERTIME) except: traceback.print_exc() print("/!\\ Crash occurred during run_policy_iteration( )") sys.exit(CRASH) if not WINDOWS and not DEBUG_MODE: signal.alarm(0) # simulate an episode (using de-randomised transitions) and compare total reward to benchmark total_reward = 0 state = game_map.make_clone() seed = game_map.initial_seed for i in range(int((game_map.benchmark / game_map.move_cost) * 2)): new_seed = seed + 1 if not WINDOWS and not DEBUG_MODE: signal.signal(signal.SIGALRM, timeout_handler) if game_map.method == 'mcts': signal.alarm(game_map.time_limit + 1) else: signal.alarm(1) try: if game_map.method == 'mcts': action = solver.get_mcts_policy(state) else: action = solver.get_offline_policy(state) # except TimeOutException: # if game_map.method == 'mcts': # print("/!\\ Ran overtime during get_mcts_policy( )") # else: # print("/!\\ Ran overtime during get_offline_policy( )") # sys.exit(mark) except: traceback.print_exc() if game_map.method == 'mcts': print("/!\\ get_mcts_policy( ) caused crash during evaluation") else: print( "/!\\ get_offline_policy( ) caused crash during evaluation" ) sys.exit(mark) r = state.apply_move(action, new_seed) total_reward += r if r == game_map.goal_reward or r == game_map.game_over_cost: break seed = new_seed # compute score based on how close episode reward is to optimum print( f"Episode Reward = {str(total_reward)}, Benchmark = {str(game_map.benchmark)}" ) mark = 10 below = 0 for i in range(1, 11): if total_reward > (game_map.benchmark * (1 + (i / 20))): break else: mark -= 1 below += 1 if below == 0: print("Testcase passed, policy optimum") elif mark > 0: print(f"Testcase passed, {below} points below optimum") sys.exit(mark)
def main(arglist): """ Visualise the policy your code produces for the given map file. :param arglist: [map_file_name, mode] """ if len(arglist) != 1: print( "Running this file visualises the path your code produces for the given map file. " ) print("Usage: policy_visualiser.py [map_file_name]") return input_file = arglist[0] game_map = LaserTankMap.process_input_file(input_file) solver = Solver(game_map) mark = 0 # do offline computation if game_map.method == 'vi': if not WINDOWS: signal.signal(signal.SIGALRM, timeout_handler) signal.alarm(game_map.time_limit + 1) try: solver.run_value_iteration() except TimeOutException: print("/!\\ Ran overtime during run_value_iteration( )") sys.exit(mark) except: traceback.print_exc() print("/!\\ Crash occurred during run_value_iteration( )") sys.exit(mark) if not WINDOWS: signal.alarm(0) elif game_map.method == 'pi': if not WINDOWS: signal.signal(signal.SIGALRM, timeout_handler) signal.alarm(game_map.time_limit + 1) try: solver.run_policy_iteration() except TimeOutException: print("/!\\ Ran overtime during run_policy_iteration( )") sys.exit(mark) except: traceback.print_exc() print("/!\\ Crash occurred during run_policy_iteration( )") sys.exit(mark) if not WINDOWS: signal.alarm(0) # simulate an episode (using de-randomised transitions) and compare total reward to benchmark total_reward = 0 state = game_map.make_clone() state.render() seed = hash(input_file) # use file name as RNG seed for i in range(100): new_seed = seed + 1 if not WINDOWS: signal.signal(signal.SIGALRM, timeout_handler) if game_map.method == 'mcts': signal.alarm(game_map.time_limit + 1) else: signal.alarm(1) try: if game_map.method == 'mcts': action = solver.get_mcts_policy(state) else: action = solver.get_offline_policy(state) except TimeOutException: if game_map.method == 'mcts': print("/!\\ Ran overtime during get_mcts_policy( )") else: print("/!\\ Ran overtime during get_offline_policy( )") sys.exit(mark) except: traceback.print_exc() if game_map.method == 'mcts': print("/!\\ get_mcts_policy( ) caused crash during evaluation") else: print( "/!\\ get_offline_policy( ) caused crash during evaluation" ) sys.exit(mark) if not WINDOWS and not DEBUG_MODE: signal.alarm(0) r = state.apply_move(action, new_seed) state.render() total_reward += r if r == game_map.goal_reward or r == game_map.game_over_cost: break seed = new_seed time.sleep(0.5)
def main(): # input_file = arglist[0] # output_file = arglist[1] input_file = "testcases/t1_bridgeport.txt" # Read the input testcase file game_map = LaserTankMap.process_input_file(input_file) # show game map #game_map.render() actions = [] # get the coordinate of goal for i in range(game_map.x_size): for j in range(game_map.y_size): if game_map.grid_data[j][i] == "F": goal_x = i goal_y = j #========================================== # get the teleport pos = 0 teleportx = {pos: 0} teleporty = {pos: 0} exist_tele = 0 for i in range(game_map.x_size): for j in range(game_map.y_size): if game_map.grid_data[j][i] == "T": teleportx[pos] = i teleporty[pos] = j exist_tele = 1 pos += 1 # assignment test variable #number_node_create = 0 #number_node_fringe = 0 # =========================================== # count the start time start_time = time.time() # 4 actions actionset = ["f", "r", "l", "s"] # record the start node start = Node(lasertank=game_map, cost=0, path="") # test the estimate cost #estimate_cost = start.estimate_cost(goal_x,goal_y) #print(estimate_cost) #print(start.lasertank.player_x,start.lasertank.player_y) # the set of explored id = 0 explored = { id: (start.lasertank.player_x, start.lasertank.player_y, start.lasertank.player_heading, start.lasertank.grid_data) } #map_explored = {id: start.lasertank.grid_data} # set the frontier queue heapq.heappush(actions, start) while len(actions) > 0: #heapq.heapify(actions) current_node = heapq.heappop(actions) # check if arrive the goal if current_node.lasertank.is_finished(): end_time = time.time() run_time = end_time - start_time print("Find the Solution successfully!") print("the map is: ", input_file) # =========================================== # print("the number of Node generated: ", number_node_create) # print("the number of Node in fringe: ",len(actions)) # print("the number of Node on explored: ", id) # =========================================== actions = current_node.path print("The path is: " + str(actions)) print("The Steps are: ", len(actions)) print("The time is: " + str(run_time)) break # add the current node to explored id += 1 explored[id] = (current_node.lasertank.player_x, current_node.lasertank.player_y, current_node.lasertank.player_heading, current_node.lasertank.grid_data) #map_explored[id] = (current_node.lasertank.grid_data) # serach for children for move in actionset: node_copy = current_node.deep_copy() status = node_copy.lasertank.apply_move(move) child_path = node_copy.path + move child_cost = node_copy.cost + 1 if status == 0: # SUCCESS # check node if existed in explored if (node_copy.lasertank.player_x, node_copy.lasertank.player_y, node_copy.lasertank.player_heading, node_copy.lasertank.grid_data) in explored.values(): continue else: # add in queue """if (node_copy.lasertank.grid_data) not in map_explored.values(): child_cost -= 1""" node_copy.path = child_path node_copy.cost = child_cost total_cost = node_copy.cost + node_copy.estimate_cost( goal_x, goal_y) if exist_tele == 1: total_cost = min( total_cost, (distance(node_copy.lasertank.player_x, node_copy.lasertank.player_y, teleportx[0], teleporty[0]) + distance(teleportx[1], teleporty[1], goal_x, goal_y)), (distance(node_copy.lasertank.player_x, node_copy.lasertank.player_y, teleportx[1], teleporty[1]) + distance(teleportx[0], teleporty[0], goal_x, goal_y))) node_copy.total_cost = total_cost heapq.heappush(actions, node_copy)
def main(): # input_file = arglist[0] # output_file = arglist[1] input_file = "testcases/t2_shortcut.txt" # Read the input testcase file game_map = LaserTankMap.process_input_file(input_file) # show game map #game_map.render() actions = [] # get the coordinate of goal """for i in range(game_map.x_size): for j in range(game_map.y_size): if game_map.grid_data[j][i] == "F": goal_x = i goal_y = j""" # assignment test variable #number_node_create = 0 # count the start time start_time = time.time() # 4 actions actionset = ["s", "f", "r", "l"] # record the start node start = Node(lasertank=game_map, cost=0, path="") # test the estimate cost #estimate_cost = start.estimate_cost(goal_x,goal_y) #print(estimate_cost) #print(start.lasertank.player_x,start.lasertank.player_y) # the set of explored id = 0 explored = { id: (start.lasertank.player_x, start.lasertank.player_y, start.lasertank.player_heading, start.lasertank.grid_data) } # set the frontier queue heapq.heappush(actions, start) while len(actions) > 0: #heapq.heapify(actions) current_node = heapq.heappop(actions) # check if arrive the goal if current_node.lasertank.is_finished(): end_time = time.time() run_time = end_time - start_time print("Find the Solution successfully!") print("the map is: ", input_file) #print("the number of Node generated: ", number_node_create) #print("the number of Node in fringe: ", len(actions)) print("the number of Node on explored: ", id) actions = current_node.path print("The path is: " + str(actions)) print("The number of steps is: ", len(actions)) print("The run time is: " + str(run_time)) exit() # add the current node to explored id += 1 explored[id] = (current_node.lasertank.player_x, current_node.lasertank.player_y, current_node.lasertank.player_heading, current_node.lasertank.grid_data) # serach for children for move in actionset: node_copy = current_node.deep_copy() status = node_copy.lasertank.apply_move(move) child_path = node_copy.path + move child_cost = node_copy.cost + 1 if status == 0: # SUCCESS # check node if existed in explored if (node_copy.lasertank.player_x, node_copy.lasertank.player_y, node_copy.lasertank.player_heading, node_copy.lasertank.grid_data) in explored.values(): continue else: # add in queue node_copy.path = child_path node_copy.cost = child_cost #total_cost = node_copy.cost + node_copy.estimate_cost(goal_x,goal_y) #node_copy.total_cost = total_cost heapq.heappush(actions, node_copy)
# else: # if (state.player_x, state.player_y - 1) != "#": # return 'f' # else: # return 'r' # return self.policy[current_state] # pass if __name__ == '__main__': input_file = "testcases/vi_t1.txt" method = "pi" game_map = LaserTankMap.process_input_file(input_file) solver = Solver(game_map) if method == "vi": solver.run_value_iteration() elif method == "pi": solver.run_policy_iteration() # simulate an episode (using de-randomised transitions) and compare total reward to benchmark total_reward = 0 state = game_map.make_clone() seed = game_map.initial_seed for i in range(int((game_map.benchmark / game_map.move_cost) * 2)): new_seed = seed + 1 action = solver.get_offline_policy(state) r = state.apply_move(action, new_seed) total_reward += r if r == game_map.goal_reward or r == game_map.game_over_cost: