Example #1
0
 def goal_reached(self):
     new_grid = [row[:] for row in self.grid]
     new_player = LaserTankMap(self.x_size, self.y_size, new_grid,
                               self.coord_x, self.coord_y,
                               self.player_heading)
     if new_player.is_finished():
         return True
     return False
Example #2
0
    def train_q_learning(self, simulator: LaserTankMap):
        print('q-learning')
        """
        Train the agent using Q-learning, building up a table of Q-values.
        :param simulator: A simulator for collecting episode data (LaserTankMap instance)
        """

        # Q(s, a) table
        # suggested format: key = hash(state), value = dict(mapping actions to values)
        q_values = {}

        #
        # TODO
        # Write your Q-Learning implementation here.
        #
        # When this method is called, you are allowed up to [state.time_limit] seconds of compute time. You should
        # continue training until the time limit is reached.
        #
        start = time.time()
        reward_list = []
        episode_reward = []
        while time.time() - start < simulator.time_limit:
            s = simulator.__hash__()
            a = self.choose_action(simulator, q_values)
            if s not in q_values:
                q_values[s] = {}
            q_s = q_values[s]
            if a in q_s:
                old_q = q_s[a]
            else:
                old_q = .0

            r, episode_finished = simulator.apply_move(a)
            reward_list.append(r)
            next_s = simulator.__hash__()
            if next_s not in q_values:
                q_values[next_s] = {}
            next_s_q = {}

            for action in simulator.MOVES:
                # print(action)
                next_s_q[action] = .0
                if action in q_values[next_s]:
                    next_s_q[action] = q_values[next_s][action]

            best_next_q = next_s_q[dict_argmax(next_s_q)]

            # update q_values(s,a,r,old_q,best_next_q)
            td = r + (simulator.gamma * best_next_q) - old_q
            q_values[s][a] = old_q + (self.learning_rate * td)
            if episode_finished:
                episode_reward.append(sum(reward_list))
                reward_list = []
                simulator.reset_to_start()
        df = pd.DataFrame(episode_reward)
        # df.to_csv('episode.csv', index=False)
        # store the computed Q-values
        self.q_values = q_values
Example #3
0
    def deep_copy(self):
        new_tank = LaserTankMap(self.lasertank.x_size, self.lasertank.y_size, self.lasertank.grid_data, self.lasertank.player_x, self.lasertank.player_y, self.lasertank.player_heading)
        copy_grid = []
        for row in self.lasertank.grid_data:
            copy_grid.append(row.copy())

        new_tank.grid_data = copy_grid

        copy = Node(new_tank, self.cost, self.path)
        return copy
Example #4
0
 def get_neighborlist(self):
     # Logic retrieved from tutor code: https://gist.github.com/tttor/826be15b99bb4b33a50787d7eb7b5fda
     neighborlist = []
     for action in self.moves:
         data = [x[:] for x in self.lasertank.grid_data]
         temp = LaserTankMap(self.lasertank.x_size, self.lasertank.y_size,
                             data, self.lasertank.player_x,
                             self.lasertank.player_y,
                             self.lasertank.player_heading)
         temp.apply_move(action)
         neighbor = LaserTankState(temp, 1, self.flag_pos)
         neighborlist.append((neighbor, action))
     return neighborlist
Example #5
0
def main(arglist):
    input_file = arglist[0]
    output_file = arglist[1]
    t_start = time.time()

    # Read the input testcase file
    game_map = LaserTankMap.process_input_file(input_file)
    actions = []
    #
    #
    # Code for your main method can go here.
    #
    # Your code should find a sequence of actions for the agent to follow to reach the goal, and store this sequence
    # in 'actions'.
    #
    #
    for i in range(len(game_map.grid_data)):
        if "F" in game_map.grid_data[i]:
            y_flag = i
            x_flag = game_map.grid_data[i].index("F")
    actions = astar_search(game_map, x_flag, y_flag)
    # Write the solution to the output file
    write_output_file(output_file, actions)
    t_elapsed = time.time() - t_start
    print(t_elapsed)
Example #6
0
def main(arglist):
    # input_file = arglist[0]
    # output_file = arglist[1]
    input_file = "testcases/t2_brickyard.txt"
    output_file = "testcases/output.txt"

    # Read the input testcase file
    game_map = LaserTankMap.process_input_file(input_file)

    actions = []


    start = Node(game_map)
    end = Goal(game_map)

    solution = (astar(start, end))
    print(solution)

    for i in solution:
        actions.append(i)


    #
    #
    # Code for your main method can go here.
    #
    # Your code should find a sequence of actions for the agent to follow to reach the goal, and store this sequence
    # in 'actions'.
    #
    #

    # Write the solution to the output file
    write_output_file(output_file, actions)
Example #7
0
def main(arglist):
    input_file = arglist[0]
    output_file = arglist[1]

    # Read the input testcase file
    game_map = LaserTankMap.process_input_file(input_file)
    actions = []

    # closedList = set()
    # myTup = (tuple(map(tuple, game_map.grid_data))), game_map.player_x, game_map.player_y, game_map.player_heading
    # anotherTup = (tuple(map(tuple, game_map.grid_data))), game_map.player_x + 1, game_map.player_y + 1, game_map.player_heading
    #
    # closedList.add(myTup)
    # closedList.add(anotherTup)
    #
    # hi = (tuple(map(tuple, game_map.grid_data))), game_map.player_x - 1, game_map.player_y + 1, game_map.player_heading
    #
    # print(hi in closedList)

    node = Node(game_map)

    actions = uniform_cost_search(node)

    # Write the solution to the output file
    write_output_file(output_file, actions)
Example #8
0
def main(arglist):
    input_file = arglist[0]
    output_file = arglist[1]

    # Read the input 'testcase' file
    game_map = LaserTankMap.process_input_file(input_file)

    #
    #
    # Code for your main method can go here.
    #
    # Your code should find a sequence of actions for the agent to follow to reach the goal, and store this sequence
    # in 'actions'.
    #
    #

    # Find the flag position to be used for the manhattan heuristic function
    flag_pos = (0, 0)
    for i in range(game_map.x_size):
        for j in range(game_map.y_size):
            if game_map.grid_data[j][i] == game_map.FLAG_SYMBOL:
                flag_pos = (i, j)

    # Wrap the initial instance and flag
    enter = LaserTankState(game_map, 0, flag_pos)
    actions = search_astar(enter)
    # actions = search_bfs(enter)
    actions = ucs(enter)

    # Write the solution to the output file
    write_output_file(output_file, actions)
Example #9
0
    def __init__(self, game_map):
        self.game_map = game_map
        self.lasertank = LaserTankMap(self.game_map)

        self.t_success_prob = self.lasertank.t_success_prob
        self.t_error_prob = self.lasertank.t_error_prob

        self.converged = False

        #
        # TODO
        # Write any environment preprocessing code you need here (e.g. storing teleport locations).
        self.state = list((x, y, z)
                          for x in range(1, self.lasertank.x_size - 1)
                          for y in range(1, self.lasertank.y_size - 1)
                          for z in DIRECTIONS)
        # self.state.append(EXIT_STATE)

        self.reward = {state: 0 for state in self.state}

        for i in range(1, self.lasertank.x_size - 1):
            for j in range(1, self.lasertank.y_size - 1):
                if self.lasertank.grid_data[j][i] == "#":
                    REWARDS[(i, j)] = self.lasertank.collision_cost
                    # self.state.remove((i, j))
                elif self.lasertank.grid_data[j][i] == "W":
                    REWARDS[(i, j)] = self.lasertank.game_over_cost
                    # self.state.remove((i, j))
                elif self.lasertank.grid_data[j][i] == "F":
                    REWARDS[(i, j)] = self.lasertank.goal_reward
        for k in range(4):
            self.state.append((EXIT_STATE[0], EXIT_STATE[1], k))
        self.value = {state: 0 for state in self.state}
        self.policy = {state: 'f' for state in self.state}
Example #10
0
    def create_copy(self):
        """
        This function copy's the game board
        """
        new_map = [row[:] for row in self.grid_data]

        new_state = LaserTankMap(x_size=self.x_size, y_size=self.y_size, grid_data=new_map, player_x=self.player_x,
                                 player_y=self.player_y, player_heading=self.player_heading)

        return new_state
    def get_successor(self):
        next_states = []
        for move in self.moves:
            new_data = [row[:] for row in self.game_map.grid_data]
            new_map = LaserTankMap(self.game_map.x_size,
                                   self.game_map.y_size,
                                   new_data,
                                   player_x=self.game_map.player_x,
                                   player_y=self.game_map.player_y,
                                   player_heading=self.game_map.player_heading)
            # new_state = deepcopy(self.get_map())
            new_parents = [row[:] for row in self.parents]
            # new_parents = deepcopy(self.parents)
            if new_map.apply_move(move) == LaserTankMap.SUCCESS:
                new_parents.append(move)
                nextState = State(new_map, 1, new_parents)
                next_states.append((nextState, move))

        return next_states
Example #12
0
def main(arglist):
    input_file = arglist[0]
    output_file = arglist[1]

    # Read the input testcase file
    game_map = LaserTankMap.process_input_file(input_file)

    # Extract goal
    for coord_x in range(game_map.x_size):
        for coord_y in range(game_map.y_size):
            if game_map.grid_data[coord_y][coord_x] == 'F':
                global goal_coord_x
                global goal_coord_y
                goal_coord_x = coord_x
                goal_coord_y = coord_y

    actions = []

    actionset = ['W', 'D', 'A', 'S']

    # Initialise Starting State
    start_state = PlayerTank(game_map.grid_data, 0, game_map.player_x,
                             game_map.player_y, 'W', [], game_map.x_size,
                             game_map.y_size, game_map.player_heading)

    #Start the Fringe/Frontier
    fringe = queue.PriorityQueue()
    fringe.put(start_state)

    #Keep track of all states explored
    hash_explored = {start_state.id: [start_state]}

    while not fringe.empty():
        current = fringe.get()

        # When goal is reached
        if current.goal_reached():
            actions = current.path
            break

        for action in actionset:
            neighbor = current.action_move(action)

            # Proceed if no collision or game over.
            # Add to visited and fringe if not previously
            if neighbor != 0:
                if (neighbor.id not in hash_explored):
                    hash_explored[neighbor.id] = [neighbor]
                    fringe.put(neighbor)
                elif (neighbor not in hash_explored[neighbor.id]):
                    hash_explored[neighbor.id].append(neighbor)
                    fringe.put(neighbor)

    # Write the solution to the output file
    write_output_file(output_file, actions)
def main(arglist):
    """
    Visualise the path of the given output file applied to the given map file
    :param arglist: map file name, output file name
    """
    if len(arglist) != 2:
        print(
            "Running this file visualises the path of the given output file applied to the given map file."
        )
        print("Usage: path_visualiser.py [map_file_name] [output_file_name]")
        return

    map_file = arglist[0]
    soln_file = arglist[1]

    optimal_steps = get_optimal_number_of_steps(map_file)
    game_map = LaserTankMap.process_input_file(map_file)
    game_map.render()

    f = open(soln_file, 'r')
    moves = f.readline().strip().split(',')

    # apply each move in sequence
    error_occurred = False
    for i in range(len(moves)):
        move = moves[i]
        ret = game_map.apply_move(move)
        game_map.render()
        if ret == LaserTankMap.COLLISION:
            print("ERROR: Move resulting in Collision performed at step " +
                  str(i))
            error_occurred = True
        elif ret == LaserTankMap.GAME_OVER:
            print("ERROR: Move resulting in Game Over performed at step " +
                  str(i))
            error_occurred = True
        time.sleep(0.5)

    if error_occurred:
        return -1

    if game_map.is_finished():
        print("Puzzle solved.")
        if len(moves) == optimal_steps:
            print("Solution is optimal (" + str(len(moves)) + " steps)!")
            return 0
        else:
            print("Solution is " + str(len(moves) - optimal_steps) +
                  " steps longer than optimal.")
            return len(moves) - optimal_steps
    else:
        print("ERROR: Goal not reached after all actions performed.")
        return -1
def main(arglist):
    input_file = arglist[0]
    output_file = arglist[1]

    # Read the input testcase file
    game_map = LaserTankMap.process_input_file(input_file)

    actions = []

    coord = (game_map.player_y, game_map.player_x)
    for y in range(game_map.y_size):
        for x in range(game_map.x_size):
            if game_map.grid_data[y][x] == game_map.FLAG_SYMBOL:
                goal_coord = (y, x)
    # print(coord)
    # print(goal_coord)

    game_map.coord = coord

    state = State(game_map, 0, [])

    # UCS
    # result = transition(state, "ucs", goal_coord)
    # A*
    result = transition(state, "a*", goal_coord)
    # A* with heuristic of teleport
    # result = transition(state, "a*-teleport", goal_coord)
    # A* with heuristic of ice
    # result = transition(state, "a*-ice", goal_coord)

    print("Nodes Generated:", result[1])
    print("Nodes on Fringe:", result[2])
    print("Explored Nodes:", result[3])
    print("Time Taken:", result[4], "seconds")

    output_string = ','.join(result[0])  # moves
    # print(outputString)
    actions.append(output_string)

    # Write the solution to the output file
    write_output_file(output_file, actions)
Example #15
0
    def action_move(self, action):
        new_grid = [row[:] for row in self.grid]
        new_player = LaserTankMap(self.x_size, self.y_size, new_grid,
                                  self.coord_x, self.coord_y,
                                  self.player_heading)
        # Move Forward
        if action == 'W':
            result = new_player.apply_move('f')
            path_to_take = 'f'

        # Turn Clockwise
        elif action == 'D':
            result = new_player.apply_move('r')
            path_to_take = 'r'

        # Turn Counter-Clockwise
        elif action == 'A':
            result = new_player.apply_move('l')
            path_to_take = 'l'

        # Shoot Laser
        elif action == 'S':
            result = new_player.apply_move('s')
            path_to_take = 's'
        else:
            print("No/Worng Action Input")

        if result == 0:
            new_state = PlayerTank(new_player.grid_data, self.cost + 1,
                                   new_player.player_x, new_player.player_y,
                                   action, self.path + [path_to_take],
                                   self.x_size, self.y_size,
                                   new_player.player_heading)
        elif result == 1:
            new_state = 0
        elif result == 2:
            new_state = 0
        return new_state
Example #16
0
def main(arglist):
    """
    Test whether the given output file is a valid solution to the given map file.

    This test script uses a 'trapdoor function' approach to comparing your computed values and policy to a reference
    solution without revealing the reference solution - 3 different results are computed based on your values and policy
    and compared to the results computed for the reference solution.

    :param arglist: [map file name]
    """
    input_file = arglist[0]
    input_file_1 = arglist[1]
    game_map = LaserTankMap.process_input_file(input_file)
    game_map_1 = LaserTankMap.process_input_file(input_file_1)
    simulator = game_map.make_clone()
    simulator_1 = game_map_1.make_clone()
    solver = Solver(0.01)
    solver_1 = Solver(0.01)

    if game_map.method == 'q-learning':
        solver.train_q_learning(simulator)
    total_reward = 0
    num_trials = 50
    max_steps = 60
    for _ in range(num_trials):
        state = game_map.make_clone()
        for i in range(max_steps):
            action = solver.get_policy(state)
            r, f = state.apply_move(action)
            total_reward += r
            if f:
                break
    total_reward /= num_trials

    # compute score based on how close episode reward is to optimum
    print(
        f"Avg Episode Reward = {str(total_reward)}, Benchmark = {str(game_map.benchmark)}"
    )
    diff = game_map.benchmark - total_reward  # amount by which benchmark score is better
    if diff < 0:
        diff = 0
    if diff > 20:
        diff = 20
    below = math.ceil(diff / 2)
    mark = 10 - below

    if below == 0:
        print("Testcase passed, policy matches or exceeds benchmark")
    elif mark > 0:
        print(
            f"Testcase passed, {below} marks below solution quality benchmark")
    Aveg_0 = solver.get_list()

    if game_map_1.method == 'sarsa':
        solver_1.train_sarsa(simulator_1)
    total_reward = 0
    num_trials = 50
    max_steps = 60
    for _ in range(num_trials):
        state = game_map_1.make_clone()
        for i in range(max_steps):
            action = solver_1.get_policy(state)
            r, f = state.apply_move(action)
            total_reward += r
            if f:
                break
    total_reward /= num_trials

    # compute score based on how close episode reward is to optimum
    print(
        f"Avg Episode Reward = {str(total_reward)}, Benchmark = {str(game_map_1.benchmark)}"
    )
    diff = game_map_1.benchmark - total_reward  # amount by which benchmark score is better
    if diff < 0:
        diff = 0
    if diff > 20:
        diff = 20
    below = math.ceil(diff / 2)
    mark = 10 - below

    if below == 0:
        print("Testcase passed, policy matches or exceeds benchmark")
    elif mark > 0:
        print(
            f"Testcase passed, {below} marks below solution quality benchmark")
    Aveg_1 = solver_1.get_list()

    x = range(len(Aveg_0))
    x_1 = range(len(Aveg_1))

    plt.plot(x, Aveg_0, '--r', label='q_learning')
    plt.plot(x_1, Aveg_1, '-b', label='sarsa')
    plt.xlabel('eqisode')
    plt.ylabel('Average Reward')
    plt.title(
        'learned policy against iteration number under \n Q-learning and SARSA with lr rate 0.01'
    )
    plt.legend()
    plt.savefig('q4.png')
    plt.show()
Example #17
0
def main(arglist):
    """
    Test whether the given output file is a valid solution to the given map file.

    This test script uses a 'trapdoor function' approach to comparing your computed values and policy to a reference
    solution without revealing the reference solution - 3 different results are computed based on your values and policy
    and compared to the results computed for the reference solution.

    :param arglist: [map file name]
    """
    if len(arglist) != 1:
        print(
            "Running this file tests whether your code produces an approximately optimal policy for the given map "
            "file.")
        print("Usage: tester.py [map file name]")
        return

    input_file = arglist[0]
    game_map = LaserTankMap.process_input_file(input_file)
    simulator = game_map.make_clone()
    solver = Solver()

    mark = 0

    # do offline computation
    if game_map.method == 'q-learning':
        if not WINDOWS and not DEBUG_MODE:
            signal.signal(signal.SIGALRM, timeout_handler)
            signal.alarm(game_map.time_limit + 1)
        try:
            solver.train_q_learning(simulator)
        except TimeOutException:
            print("/!\\ Ran overtime during train_q_learning( )")
            sys.exit(OVERTIME)
        except:
            traceback.print_exc()
            print("/!\\ Crash occurred during train_q_learning( )")
            sys.exit(CRASH)
        if not WINDOWS and not DEBUG_MODE:
            signal.alarm(0)
    elif game_map.method == 'sarsa':
        if not WINDOWS and not DEBUG_MODE:
            signal.signal(signal.SIGALRM, timeout_handler)
            signal.alarm(game_map.time_limit + 1)
        try:
            solver.train_sarsa(simulator)
        except TimeOutException:
            print("/!\\ Ran overtime during train_sarsa( )")
            sys.exit(OVERTIME)
        except:
            traceback.print_exc()
            print("/!\\ Crash occurred during train_sarsa( )")
            sys.exit(CRASH)
        if not WINDOWS and not DEBUG_MODE:
            signal.alarm(0)

    # simulate an episode (using de-randomised transitions) and compare total reward to benchmark
    total_reward = 0
    num_trials = 50
    max_steps = 60
    for _ in range(num_trials):
        state = game_map.make_clone()
        for i in range(max_steps):
            if not WINDOWS and not DEBUG_MODE:
                signal.signal(signal.SIGALRM, timeout_handler)
                signal.alarm(1)
            try:
                action = solver.get_policy(state)
            except TimeOutException:
                print("/!\\ Ran overtime during get_policy( )")
                sys.exit(mark)
            except:
                traceback.print_exc()
                print("/!\\ get_policy( ) caused crash during evaluation")
                sys.exit(mark)
            if not WINDOWS and not DEBUG_MODE:
                signal.alarm(0)
            r, f = state.apply_move(action)
            total_reward += r
            if f:
                break
    total_reward /= num_trials

    # compute score based on how close episode reward is to optimum
    print(
        f"Avg Episode Reward = {str(total_reward)}, Benchmark = {str(game_map.benchmark)}"
    )
    diff = game_map.benchmark - total_reward  # amount by which benchmark score is better
    if diff < 0:
        diff = 0
    if diff > 20:
        diff = 20
    below = math.ceil(diff / 2)
    mark = 10 - below

    if below == 0:
        print("Testcase passed, policy matches or exceeds benchmark")
    elif mark > 0:
        print(
            f"Testcase passed, {below} marks below solution quality benchmark")
    sys.exit(mark)
Example #18
0
def main(arglist):
    input_file = arglist[0]
    output_file = arglist[1]
    #input_file = "testcases/t3_labyrinth.txt"
    # Read the input testcase file
    game_map = LaserTankMap.process_input_file(input_file)

    # show game map
    #game_map.render()
    actions = []

    # get the coordinate of goal
    for i in range(game_map.x_size):
        for j in range(game_map.y_size):
            if game_map.grid_data[j][i] == "F":
                goal_x = i
                goal_y = j


    # count the start time
    start_time = time.time()

    # 4 actions
    actionset = ["f", "r", "l", "s"]

    # record the start node
    start = Node(lasertank=game_map, cost=0, path="")

    # test the estimate cost
    #estimate_cost = start.estimate_cost(goal_x,goal_y)
    #print(estimate_cost)
    #print(start.lasertank.player_x,start.lasertank.player_y)

    # the set of explored
    id = 0
    explored = {id:(start.lasertank.player_x,start.lasertank.player_y,start.lasertank.player_heading,start.lasertank.grid_data)}
    #map_explored = {id: start.lasertank.grid_data}
    # set the frontier queue
    heapq.heappush(actions,start)

    while len(actions) > 0:
        #heapq.heapify(actions)
        current_node = heapq.heappop(actions)

        # check if arrive the goal
        if current_node.lasertank.is_finished():
            end_time = time.time()
            run_time = end_time - start_time
            print("Find the Solution successfully!")
            actions = current_node.path
            print("The total cost is :", current_node.total_cost)
            print("The path is: " + str(actions))
            print("The Steps are: ", len(actions))
            print("The time is: " + str(run_time))
            break

        # add the current node to explored
        id += 1
        explored[id] = (current_node.lasertank.player_x,current_node.lasertank.player_y,current_node.lasertank.player_heading,current_node.lasertank.grid_data)
        #map_explored[id] = (current_node.lasertank.grid_data)

        # serach for children
        for move in actionset:

            node_copy = current_node.deep_copy()

            status = node_copy.lasertank.apply_move(move)

            child_path = node_copy.path + move
            child_cost = node_copy.cost + 1

            if status == 0:     # SUCCESS

                # check node if existed in explored
                if (node_copy.lasertank.player_x,node_copy.lasertank.player_y,node_copy.lasertank.player_heading,node_copy.lasertank.grid_data) in explored.values():
                    continue
                else:
                    # add in queue
                    """if (node_copy.lasertank.grid_data) not in map_explored.values():
                        child_cost -= 1"""
                    node_copy.path = child_path
                    node_copy.cost = child_cost
                    total_cost = node_copy.cost + node_copy.estimate_cost(goal_x,goal_y)
                    node_copy.total_cost = total_cost
                    heapq.heappush(actions,node_copy)

                    # check the frontier queue
                    #print(node_copy.path)







    # Write the solution to the output file
    write_output_file(output_file, actions)
Example #19
0
def new_apply_move_1(player_x, player_y, move, r, t_success_prob, x_size,
                     y_size, collision_cost, gird_data, game_map,
                     game_over_cost):
    if (player_x, player_y) in REWARDS:
        # s = REWARDS
        return REWARDS[(player_x, player_y)], EXIT_STATE[0], EXIT_STATE[1]

    if (player_x, player_y) == EXIT_STATE:
        return 0, player_x, player_y

    t_error_prob = 1 - t_success_prob
    if move == UP:
        if r < t_success_prob:
            next_y = player_y - 1
            next_x = player_x
        elif r < t_success_prob + (t_error_prob * (1 / 5)):
            next_y = player_y - 1
            next_x = player_x - 1
        elif r < t_success_prob + (t_error_prob * (2 / 5)):
            next_y = player_y - 1
            next_x = player_x + 1
        elif r < t_success_prob + (t_error_prob * (3 / 5)):
            next_y = player_y
            next_x = player_x - 1
        elif r < t_success_prob + (t_error_prob * (4 / 5)):
            next_y = player_y
            next_x = player_x + 1
        else:
            next_y = player_y
            next_x = player_x

        if next_y < 1 or next_x < 1 or next_x >= x_size - 1:
            return collision_cost, player_x, player_y
    elif move == DOWN:

        if r < t_success_prob:
            next_y = player_y + 1
            next_x = player_x
        elif r < t_success_prob + (t_error_prob * (1 / 5)):
            next_y = player_y + 1
            next_x = player_x - 1
        elif r < t_success_prob + (t_error_prob * (2 / 5)):
            next_y = player_y - 1
            next_x = player_x + 1
        elif r < t_success_prob + (t_error_prob * (3 / 5)):
            next_y = player_y
            next_x = player_x - 1
        elif r < t_success_prob + (t_error_prob * (4 / 5)):
            next_y = player_y
            next_x = player_x + 1
        else:
            next_y = player_y
            next_x = player_x

        if next_y >= y_size - 1 or next_x > 1 or next_x <= x_size - 1:
            return collision_cost, player_x, player_y
    elif move == LEFT:

        if r < t_success_prob:
            next_y = player_y
            next_x = player_x - 1
        elif r < t_success_prob + (t_error_prob * (1 / 5)):
            next_y = player_y - 1
            next_x = player_x - 1
        elif r < t_success_prob + (t_error_prob * (2 / 5)):
            next_y = player_y + 1
            next_x = player_x - 1
        elif r < t_success_prob + (t_error_prob * (3 / 5)):
            next_y = player_y - 1
            next_x = player_x
        elif r < t_success_prob + (t_error_prob * (4 / 5)):
            next_y = player_y + 1
            next_x = player_x
        else:
            next_y = player_y
            next_x = player_x

        if next_x < 1 or next_y < 1 or next_y >= y_size - 1:
            return collision_cost, player_x, player_y
    else:
        if r < t_success_prob:
            next_y = player_y
            next_x = player_x + 1
        elif r < t_success_prob + (t_error_prob * (1 / 5)):
            next_y = player_y - 1
            next_x = player_x + 1
        elif r < t_success_prob + (t_error_prob * (2 / 5)):
            next_y = player_y + 1
            next_x = player_x + 1
        elif r < t_success_prob + (t_error_prob * (3 / 5)):
            next_y = player_y - 1
            next_x = player_x
        elif r < t_success_prob + (t_error_prob * (4 / 5)):
            next_y = player_y + 1
            next_x = player_x
        else:
            next_y = player_y
            next_x = player_x

        if next_x >= x_size - 1 or next_y < 1 or next_y >= y_size - 1:
            return collision_cost, player_x, player_y

    if LaserTankMap.cell_is_blocked(game_map, next_y, next_x):
        return collision_cost, player_x, player_y
        # check for game over conditions
    if LaserTankMap.cell_is_game_over(game_map, next_y,
                                      next_x):  # game_over_cost
        return game_over_cost, player_x, player_y

    if gird_data[next_y][next_x] == LaserTankMap.FLAG_SYMBOL:
        return 0, next_x, next_y  # goal reward
    else:
        return -1, next_x, next_y  # move cost
Example #20
0
def main(arglist):
    """
    Visualise the policy your code produces for the given map file.
    :param arglist: [map_file_name, mode]
    """

    if len(arglist) != 1:
        print(
            "Running this file visualises the path your code produces for the given map file. "
            "Set mode to be 'value', 'policy' or 'episode'. MCTS can only be used in 'episode' mode."
        )
        print("Usage: policy_visualiser.py [map_file_name] [mode]")
        return

    input_file = arglist[0]
    game_map = LaserTankMap.process_input_file(input_file)
    simulator = game_map.make_clone()
    solver = Solver()

    mark = 0

    # do offline computation
    if game_map.method == 'q-learning':
        if not WINDOWS and not DEBUG_MODE:
            signal.signal(signal.SIGALRM, timeout_handler)
            signal.alarm(game_map.time_limit + 1)
        try:
            solver.train_q_learning(simulator)
        except TimeOutException:
            print("/!\\ Ran overtime during train_q_learning( )")
            sys.exit(OVERTIME)
        except:
            traceback.print_exc()
            print("/!\\ Crash occurred during train_q_learning( )")
            sys.exit(CRASH)
        if not WINDOWS and not DEBUG_MODE:
            signal.alarm(0)
    elif game_map.method == 'sarsa':
        if not WINDOWS and not DEBUG_MODE:
            signal.signal(signal.SIGALRM, timeout_handler)
            signal.alarm(game_map.time_limit + 1)
        try:
            solver.train_sarsa(simulator)
        except TimeOutException:
            print("/!\\ Ran overtime during train_sarsa( )")
            sys.exit(OVERTIME)
        except:
            traceback.print_exc()
            print("/!\\ Crash occurred during train_sarsa( )")
            sys.exit(CRASH)
        if not WINDOWS and not DEBUG_MODE:
            signal.alarm(0)

    # simulate an episode (using de-randomised transitions) and compare total reward to benchmark
    total_reward = 0
    max_steps = 60
    state = game_map.make_clone()
    for i in range(max_steps):
        if not WINDOWS and not DEBUG_MODE:
            signal.signal(signal.SIGALRM, timeout_handler)
            signal.alarm(1)
        try:
            action = solver.get_policy(state)
        except TimeOutException:
            print("/!\\ Ran overtime during get_policy( )")
            sys.exit(mark)
        except:
            traceback.print_exc()
            print("/!\\ get_policy( ) caused crash during evaluation")
            sys.exit(mark)
        if not WINDOWS and not DEBUG_MODE:
            signal.alarm(0)
        r, f = state.apply_move(action)
        state.render()
        total_reward += r
        if f:
            break

        time.sleep(1)

    # compute score based on how close episode reward is to optimum
    print(
        f"Avg Episode Reward = {str(total_reward)}, Benchmark = {str(game_map.benchmark)}"
    )
    diff = game_map.benchmark - total_reward  # amount by which benchmark score is better
    if diff < 0:
        diff = 0
    if diff > 20:
        diff = 20
    below = math.ceil(diff / 2)
    mark = 10 - below

    if below == 0:
        print("Testcase passed, policy matches or exceeds benchmark")
    elif mark > 0:
        print(
            f"Testcase passed, {below} marks below solution quality benchmark")
    sys.exit(mark)
        :param state: a LaserTankMap instance
        :return: pi(s) [an element of LaserTankMap.MOVES]
        """

        #
        # TODO
        # Write code to return the optimal action to be performed at this state based on the stored Q-values.
        #
        # You can assume that either train_q_learning( ) or train_sarsa( ) has been called before this
        # method is called.
        #
        # When this method is called, you are allowed up to 1 second of compute time.
        #

        pass


if __name__ == "__main__":
    solver = Solver()
    map_dir = "testcases/q-learn_t1.txt"
    test_map = LaserTankMap.process_input_file(map_dir)
    simulator = test_map.make_clone()

    mark = 0

    # do offline computation
    if test_map.method == 'q-learning':
        solver.train_q_learning(simulator)

    print("=========== END =============")
def main(arglist):
    """
    Test whether the given output file is a valid solution to the given map file.

    This test script uses a 'trapdoor function' approach to comparing your computed values and policy to a reference
    solution without revealing the reference solution - 3 different results are computed based on your values and policy
    and compared to the results computed for the reference solution.

    :param arglist: [map file name]
    """
    if len(arglist) != 1:
        print(
            "Running this file tests whether your code produces an optimal policy for the given map file."
        )
        print("Usage: tester.py [map file name]")
        return

    input_file = arglist[0]
    game_map = LaserTankMap.process_input_file(input_file)
    solver = Solver(game_map)

    mark = 0

    # do offline computation
    if game_map.method == 'vi':
        if not WINDOWS and not DEBUG_MODE:
            signal.signal(signal.SIGALRM, timeout_handler)
            signal.alarm(game_map.time_limit + 1)
        try:
            solver.run_value_iteration()
        except TimeOutException:
            print("/!\\ Ran overtime during run_value_iteration( )")
            sys.exit(OVERTIME)
        except:
            traceback.print_exc()
            print("/!\\ Crash occurred during run_value_iteration( )")
            sys.exit(CRASH)
        if not WINDOWS and not DEBUG_MODE:
            signal.alarm(0)
    elif game_map.method == 'pi':
        if not WINDOWS and not DEBUG_MODE:
            signal.signal(signal.SIGALRM, timeout_handler)
            signal.alarm(game_map.time_limit + 1)
        try:
            solver.run_policy_iteration()
        except TimeOutException:
            print("/!\\ Ran overtime during run_policy_iteration( )")
            sys.exit(OVERTIME)
        except:
            traceback.print_exc()
            print("/!\\ Crash occurred during run_policy_iteration( )")
            sys.exit(CRASH)
        if not WINDOWS and not DEBUG_MODE:
            signal.alarm(0)

    # simulate an episode (using de-randomised transitions) and compare total reward to benchmark
    total_reward = 0
    state = game_map.make_clone()
    seed = game_map.initial_seed
    for i in range(int((game_map.benchmark / game_map.move_cost) * 2)):
        new_seed = seed + 1
        if not WINDOWS and not DEBUG_MODE:
            signal.signal(signal.SIGALRM, timeout_handler)
            if game_map.method == 'mcts':
                signal.alarm(game_map.time_limit + 1)
            else:
                signal.alarm(1)
        try:
            if game_map.method == 'mcts':
                action = solver.get_mcts_policy(state)
            else:
                action = solver.get_offline_policy(state)
        # except TimeOutException:
        #     if game_map.method == 'mcts':
        #         print("/!\\ Ran overtime during get_mcts_policy( )")
        #     else:
        #         print("/!\\ Ran overtime during get_offline_policy( )")
        #     sys.exit(mark)
        except:
            traceback.print_exc()
            if game_map.method == 'mcts':
                print("/!\\ get_mcts_policy( ) caused crash during evaluation")
            else:
                print(
                    "/!\\ get_offline_policy( ) caused crash during evaluation"
                )
            sys.exit(mark)
        r = state.apply_move(action, new_seed)
        total_reward += r
        if r == game_map.goal_reward or r == game_map.game_over_cost:
            break
        seed = new_seed

    # compute score based on how close episode reward is to optimum
    print(
        f"Episode Reward = {str(total_reward)}, Benchmark = {str(game_map.benchmark)}"
    )
    mark = 10
    below = 0
    for i in range(1, 11):
        if total_reward > (game_map.benchmark * (1 + (i / 20))):
            break
        else:
            mark -= 1
            below += 1

    if below == 0:
        print("Testcase passed, policy optimum")
    elif mark > 0:
        print(f"Testcase passed, {below} points below optimum")
    sys.exit(mark)
def main(arglist):
    """
    Visualise the policy your code produces for the given map file.
    :param arglist: [map_file_name, mode]
    """

    if len(arglist) != 1:
        print(
            "Running this file visualises the path your code produces for the given map file. "
        )
        print("Usage: policy_visualiser.py [map_file_name]")
        return

    input_file = arglist[0]
    game_map = LaserTankMap.process_input_file(input_file)
    solver = Solver(game_map)

    mark = 0

    # do offline computation
    if game_map.method == 'vi':
        if not WINDOWS:
            signal.signal(signal.SIGALRM, timeout_handler)
            signal.alarm(game_map.time_limit + 1)
        try:
            solver.run_value_iteration()
        except TimeOutException:
            print("/!\\ Ran overtime during run_value_iteration( )")
            sys.exit(mark)
        except:
            traceback.print_exc()
            print("/!\\ Crash occurred during run_value_iteration( )")
            sys.exit(mark)
        if not WINDOWS:
            signal.alarm(0)
    elif game_map.method == 'pi':
        if not WINDOWS:
            signal.signal(signal.SIGALRM, timeout_handler)
            signal.alarm(game_map.time_limit + 1)
        try:
            solver.run_policy_iteration()
        except TimeOutException:
            print("/!\\ Ran overtime during run_policy_iteration( )")
            sys.exit(mark)
        except:
            traceback.print_exc()
            print("/!\\ Crash occurred during run_policy_iteration( )")
            sys.exit(mark)
        if not WINDOWS:
            signal.alarm(0)

    # simulate an episode (using de-randomised transitions) and compare total reward to benchmark
    total_reward = 0
    state = game_map.make_clone()
    state.render()
    seed = hash(input_file)  # use file name as RNG seed
    for i in range(100):
        new_seed = seed + 1
        if not WINDOWS:
            signal.signal(signal.SIGALRM, timeout_handler)
            if game_map.method == 'mcts':
                signal.alarm(game_map.time_limit + 1)
            else:
                signal.alarm(1)
        try:
            if game_map.method == 'mcts':
                action = solver.get_mcts_policy(state)
            else:
                action = solver.get_offline_policy(state)
        except TimeOutException:
            if game_map.method == 'mcts':
                print("/!\\ Ran overtime during get_mcts_policy( )")
            else:
                print("/!\\ Ran overtime during get_offline_policy( )")
            sys.exit(mark)
        except:
            traceback.print_exc()
            if game_map.method == 'mcts':
                print("/!\\ get_mcts_policy( ) caused crash during evaluation")
            else:
                print(
                    "/!\\ get_offline_policy( ) caused crash during evaluation"
                )
            sys.exit(mark)
        if not WINDOWS and not DEBUG_MODE:
            signal.alarm(0)
        r = state.apply_move(action, new_seed)
        state.render()
        total_reward += r
        if r == game_map.goal_reward or r == game_map.game_over_cost:
            break
        seed = new_seed

        time.sleep(0.5)
def main():
    # input_file = arglist[0]
    # output_file = arglist[1]
    input_file = "testcases/t1_bridgeport.txt"
    # Read the input testcase file
    game_map = LaserTankMap.process_input_file(input_file)

    # show game map
    #game_map.render()
    actions = []

    # get the coordinate of goal
    for i in range(game_map.x_size):
        for j in range(game_map.y_size):
            if game_map.grid_data[j][i] == "F":
                goal_x = i
                goal_y = j

    #==========================================
    #   get the teleport
    pos = 0
    teleportx = {pos: 0}
    teleporty = {pos: 0}
    exist_tele = 0

    for i in range(game_map.x_size):
        for j in range(game_map.y_size):
            if game_map.grid_data[j][i] == "T":
                teleportx[pos] = i
                teleporty[pos] = j
                exist_tele = 1
                pos += 1

    # assignment test variable
    #number_node_create = 0
    #number_node_fringe = 0

    # ===========================================

    # count the start time
    start_time = time.time()

    # 4 actions
    actionset = ["f", "r", "l", "s"]

    # record the start node
    start = Node(lasertank=game_map, cost=0, path="")

    # test the estimate cost
    #estimate_cost = start.estimate_cost(goal_x,goal_y)
    #print(estimate_cost)
    #print(start.lasertank.player_x,start.lasertank.player_y)

    # the set of explored
    id = 0
    explored = {
        id: (start.lasertank.player_x, start.lasertank.player_y,
             start.lasertank.player_heading, start.lasertank.grid_data)
    }
    #map_explored = {id: start.lasertank.grid_data}
    # set the frontier queue
    heapq.heappush(actions, start)

    while len(actions) > 0:
        #heapq.heapify(actions)
        current_node = heapq.heappop(actions)

        # check if arrive the goal
        if current_node.lasertank.is_finished():
            end_time = time.time()
            run_time = end_time - start_time
            print("Find the Solution successfully!")
            print("the map is: ", input_file)

            # ===========================================
            # print("the number of Node generated: ", number_node_create)
            # print("the number of Node in fringe: ",len(actions))
            # print("the number of Node on explored: ", id)

            # ===========================================
            actions = current_node.path
            print("The path is: " + str(actions))
            print("The Steps are: ", len(actions))
            print("The time is: " + str(run_time))
            break

        # add the current node to explored
        id += 1
        explored[id] = (current_node.lasertank.player_x,
                        current_node.lasertank.player_y,
                        current_node.lasertank.player_heading,
                        current_node.lasertank.grid_data)
        #map_explored[id] = (current_node.lasertank.grid_data)

        # serach for children
        for move in actionset:

            node_copy = current_node.deep_copy()

            status = node_copy.lasertank.apply_move(move)

            child_path = node_copy.path + move
            child_cost = node_copy.cost + 1

            if status == 0:  # SUCCESS

                # check node if existed in explored
                if (node_copy.lasertank.player_x, node_copy.lasertank.player_y,
                        node_copy.lasertank.player_heading,
                        node_copy.lasertank.grid_data) in explored.values():
                    continue
                else:
                    # add in queue
                    """if (node_copy.lasertank.grid_data) not in map_explored.values():
                        child_cost -= 1"""
                    node_copy.path = child_path
                    node_copy.cost = child_cost
                    total_cost = node_copy.cost + node_copy.estimate_cost(
                        goal_x, goal_y)
                    if exist_tele == 1:
                        total_cost = min(
                            total_cost, (distance(node_copy.lasertank.player_x,
                                                  node_copy.lasertank.player_y,
                                                  teleportx[0], teleporty[0]) +
                                         distance(teleportx[1], teleporty[1],
                                                  goal_x, goal_y)),
                            (distance(node_copy.lasertank.player_x,
                                      node_copy.lasertank.player_y,
                                      teleportx[1], teleporty[1]) +
                             distance(teleportx[0], teleporty[0], goal_x,
                                      goal_y)))

                    node_copy.total_cost = total_cost
                    heapq.heappush(actions, node_copy)
def main():
    # input_file = arglist[0]
    # output_file = arglist[1]
    input_file = "testcases/t2_shortcut.txt"
    # Read the input testcase file
    game_map = LaserTankMap.process_input_file(input_file)

    # show game map
    #game_map.render()
    actions = []

    # get the coordinate of goal
    """for i in range(game_map.x_size):
        for j in range(game_map.y_size):
            if game_map.grid_data[j][i] == "F":
                goal_x = i
                goal_y = j"""

    # assignment test variable
    #number_node_create = 0

    # count the start time
    start_time = time.time()

    # 4 actions
    actionset = ["s", "f", "r", "l"]

    # record the start node
    start = Node(lasertank=game_map, cost=0, path="")

    # test the estimate cost
    #estimate_cost = start.estimate_cost(goal_x,goal_y)
    #print(estimate_cost)
    #print(start.lasertank.player_x,start.lasertank.player_y)

    # the set of explored
    id = 0
    explored = {
        id: (start.lasertank.player_x, start.lasertank.player_y,
             start.lasertank.player_heading, start.lasertank.grid_data)
    }

    # set the frontier queue
    heapq.heappush(actions, start)

    while len(actions) > 0:
        #heapq.heapify(actions)
        current_node = heapq.heappop(actions)

        # check if arrive the goal
        if current_node.lasertank.is_finished():
            end_time = time.time()
            run_time = end_time - start_time
            print("Find the Solution successfully!")
            print("the map is: ", input_file)
            #print("the number of Node generated: ", number_node_create)
            #print("the number of Node in fringe: ", len(actions))
            print("the number of Node on explored: ", id)
            actions = current_node.path
            print("The path is: " + str(actions))
            print("The number of steps is: ", len(actions))
            print("The run time is: " + str(run_time))
            exit()

        # add the current node to explored
        id += 1
        explored[id] = (current_node.lasertank.player_x,
                        current_node.lasertank.player_y,
                        current_node.lasertank.player_heading,
                        current_node.lasertank.grid_data)

        # serach for children
        for move in actionset:

            node_copy = current_node.deep_copy()

            status = node_copy.lasertank.apply_move(move)

            child_path = node_copy.path + move
            child_cost = node_copy.cost + 1

            if status == 0:  # SUCCESS

                # check node if existed in explored
                if (node_copy.lasertank.player_x, node_copy.lasertank.player_y,
                        node_copy.lasertank.player_heading,
                        node_copy.lasertank.grid_data) in explored.values():
                    continue
                else:
                    # add in queue
                    node_copy.path = child_path
                    node_copy.cost = child_cost
                    #total_cost = node_copy.cost + node_copy.estimate_cost(goal_x,goal_y)
                    #node_copy.total_cost = total_cost
                    heapq.heappush(actions, node_copy)
Example #26
0
        # else:
        #     if (state.player_x, state.player_y - 1) != "#":
        #         return 'f'
        #     else:
        #         return 'r'

        # return self.policy[current_state]
        #

        pass


if __name__ == '__main__':
    input_file = "testcases/vi_t1.txt"
    method = "pi"
    game_map = LaserTankMap.process_input_file(input_file)
    solver = Solver(game_map)
    if method == "vi":
        solver.run_value_iteration()
    elif method == "pi":
        solver.run_policy_iteration()
    # simulate an episode (using de-randomised transitions) and compare total reward to benchmark
    total_reward = 0
    state = game_map.make_clone()
    seed = game_map.initial_seed
    for i in range(int((game_map.benchmark / game_map.move_cost) * 2)):
        new_seed = seed + 1
        action = solver.get_offline_policy(state)
        r = state.apply_move(action, new_seed)
        total_reward += r
        if r == game_map.goal_reward or r == game_map.game_over_cost: