Example #1
0
def main(arglist):
    # input_file = arglist[0]
    # output_file = arglist[1]
    input_file = "testcases/t2_brickyard.txt"
    output_file = "testcases/output.txt"

    # Read the input testcase file
    game_map = LaserTankMap.process_input_file(input_file)

    actions = []


    start = Node(game_map)
    end = Goal(game_map)

    solution = (astar(start, end))
    print(solution)

    for i in solution:
        actions.append(i)


    #
    #
    # Code for your main method can go here.
    #
    # Your code should find a sequence of actions for the agent to follow to reach the goal, and store this sequence
    # in 'actions'.
    #
    #

    # Write the solution to the output file
    write_output_file(output_file, actions)
Example #2
0
def main(arglist):
    input_file = arglist[0]
    output_file = arglist[1]
    t_start = time.time()

    # Read the input testcase file
    game_map = LaserTankMap.process_input_file(input_file)
    actions = []
    #
    #
    # Code for your main method can go here.
    #
    # Your code should find a sequence of actions for the agent to follow to reach the goal, and store this sequence
    # in 'actions'.
    #
    #
    for i in range(len(game_map.grid_data)):
        if "F" in game_map.grid_data[i]:
            y_flag = i
            x_flag = game_map.grid_data[i].index("F")
    actions = astar_search(game_map, x_flag, y_flag)
    # Write the solution to the output file
    write_output_file(output_file, actions)
    t_elapsed = time.time() - t_start
    print(t_elapsed)
Example #3
0
def main(arglist):
    input_file = arglist[0]
    output_file = arglist[1]

    # Read the input testcase file
    game_map = LaserTankMap.process_input_file(input_file)
    actions = []

    # closedList = set()
    # myTup = (tuple(map(tuple, game_map.grid_data))), game_map.player_x, game_map.player_y, game_map.player_heading
    # anotherTup = (tuple(map(tuple, game_map.grid_data))), game_map.player_x + 1, game_map.player_y + 1, game_map.player_heading
    #
    # closedList.add(myTup)
    # closedList.add(anotherTup)
    #
    # hi = (tuple(map(tuple, game_map.grid_data))), game_map.player_x - 1, game_map.player_y + 1, game_map.player_heading
    #
    # print(hi in closedList)

    node = Node(game_map)

    actions = uniform_cost_search(node)

    # Write the solution to the output file
    write_output_file(output_file, actions)
Example #4
0
def main(arglist):
    input_file = arglist[0]
    output_file = arglist[1]

    # Read the input 'testcase' file
    game_map = LaserTankMap.process_input_file(input_file)

    #
    #
    # Code for your main method can go here.
    #
    # Your code should find a sequence of actions for the agent to follow to reach the goal, and store this sequence
    # in 'actions'.
    #
    #

    # Find the flag position to be used for the manhattan heuristic function
    flag_pos = (0, 0)
    for i in range(game_map.x_size):
        for j in range(game_map.y_size):
            if game_map.grid_data[j][i] == game_map.FLAG_SYMBOL:
                flag_pos = (i, j)

    # Wrap the initial instance and flag
    enter = LaserTankState(game_map, 0, flag_pos)
    actions = search_astar(enter)
    # actions = search_bfs(enter)
    actions = ucs(enter)

    # Write the solution to the output file
    write_output_file(output_file, actions)
Example #5
0
def main(arglist):
    input_file = arglist[0]
    output_file = arglist[1]

    # Read the input testcase file
    game_map = LaserTankMap.process_input_file(input_file)

    # Extract goal
    for coord_x in range(game_map.x_size):
        for coord_y in range(game_map.y_size):
            if game_map.grid_data[coord_y][coord_x] == 'F':
                global goal_coord_x
                global goal_coord_y
                goal_coord_x = coord_x
                goal_coord_y = coord_y

    actions = []

    actionset = ['W', 'D', 'A', 'S']

    # Initialise Starting State
    start_state = PlayerTank(game_map.grid_data, 0, game_map.player_x,
                             game_map.player_y, 'W', [], game_map.x_size,
                             game_map.y_size, game_map.player_heading)

    #Start the Fringe/Frontier
    fringe = queue.PriorityQueue()
    fringe.put(start_state)

    #Keep track of all states explored
    hash_explored = {start_state.id: [start_state]}

    while not fringe.empty():
        current = fringe.get()

        # When goal is reached
        if current.goal_reached():
            actions = current.path
            break

        for action in actionset:
            neighbor = current.action_move(action)

            # Proceed if no collision or game over.
            # Add to visited and fringe if not previously
            if neighbor != 0:
                if (neighbor.id not in hash_explored):
                    hash_explored[neighbor.id] = [neighbor]
                    fringe.put(neighbor)
                elif (neighbor not in hash_explored[neighbor.id]):
                    hash_explored[neighbor.id].append(neighbor)
                    fringe.put(neighbor)

    # Write the solution to the output file
    write_output_file(output_file, actions)
def main(arglist):
    """
    Visualise the path of the given output file applied to the given map file
    :param arglist: map file name, output file name
    """
    if len(arglist) != 2:
        print(
            "Running this file visualises the path of the given output file applied to the given map file."
        )
        print("Usage: path_visualiser.py [map_file_name] [output_file_name]")
        return

    map_file = arglist[0]
    soln_file = arglist[1]

    optimal_steps = get_optimal_number_of_steps(map_file)
    game_map = LaserTankMap.process_input_file(map_file)
    game_map.render()

    f = open(soln_file, 'r')
    moves = f.readline().strip().split(',')

    # apply each move in sequence
    error_occurred = False
    for i in range(len(moves)):
        move = moves[i]
        ret = game_map.apply_move(move)
        game_map.render()
        if ret == LaserTankMap.COLLISION:
            print("ERROR: Move resulting in Collision performed at step " +
                  str(i))
            error_occurred = True
        elif ret == LaserTankMap.GAME_OVER:
            print("ERROR: Move resulting in Game Over performed at step " +
                  str(i))
            error_occurred = True
        time.sleep(0.5)

    if error_occurred:
        return -1

    if game_map.is_finished():
        print("Puzzle solved.")
        if len(moves) == optimal_steps:
            print("Solution is optimal (" + str(len(moves)) + " steps)!")
            return 0
        else:
            print("Solution is " + str(len(moves) - optimal_steps) +
                  " steps longer than optimal.")
            return len(moves) - optimal_steps
    else:
        print("ERROR: Goal not reached after all actions performed.")
        return -1
def main(arglist):
    input_file = arglist[0]
    output_file = arglist[1]

    # Read the input testcase file
    game_map = LaserTankMap.process_input_file(input_file)

    actions = []

    coord = (game_map.player_y, game_map.player_x)
    for y in range(game_map.y_size):
        for x in range(game_map.x_size):
            if game_map.grid_data[y][x] == game_map.FLAG_SYMBOL:
                goal_coord = (y, x)
    # print(coord)
    # print(goal_coord)

    game_map.coord = coord

    state = State(game_map, 0, [])

    # UCS
    # result = transition(state, "ucs", goal_coord)
    # A*
    result = transition(state, "a*", goal_coord)
    # A* with heuristic of teleport
    # result = transition(state, "a*-teleport", goal_coord)
    # A* with heuristic of ice
    # result = transition(state, "a*-ice", goal_coord)

    print("Nodes Generated:", result[1])
    print("Nodes on Fringe:", result[2])
    print("Explored Nodes:", result[3])
    print("Time Taken:", result[4], "seconds")

    output_string = ','.join(result[0])  # moves
    # print(outputString)
    actions.append(output_string)

    # Write the solution to the output file
    write_output_file(output_file, actions)
Example #8
0
def main(arglist):
    input_file = arglist[0]
    output_file = arglist[1]
    #input_file = "testcases/t3_labyrinth.txt"
    # Read the input testcase file
    game_map = LaserTankMap.process_input_file(input_file)

    # show game map
    #game_map.render()
    actions = []

    # get the coordinate of goal
    for i in range(game_map.x_size):
        for j in range(game_map.y_size):
            if game_map.grid_data[j][i] == "F":
                goal_x = i
                goal_y = j


    # count the start time
    start_time = time.time()

    # 4 actions
    actionset = ["f", "r", "l", "s"]

    # record the start node
    start = Node(lasertank=game_map, cost=0, path="")

    # test the estimate cost
    #estimate_cost = start.estimate_cost(goal_x,goal_y)
    #print(estimate_cost)
    #print(start.lasertank.player_x,start.lasertank.player_y)

    # the set of explored
    id = 0
    explored = {id:(start.lasertank.player_x,start.lasertank.player_y,start.lasertank.player_heading,start.lasertank.grid_data)}
    #map_explored = {id: start.lasertank.grid_data}
    # set the frontier queue
    heapq.heappush(actions,start)

    while len(actions) > 0:
        #heapq.heapify(actions)
        current_node = heapq.heappop(actions)

        # check if arrive the goal
        if current_node.lasertank.is_finished():
            end_time = time.time()
            run_time = end_time - start_time
            print("Find the Solution successfully!")
            actions = current_node.path
            print("The total cost is :", current_node.total_cost)
            print("The path is: " + str(actions))
            print("The Steps are: ", len(actions))
            print("The time is: " + str(run_time))
            break

        # add the current node to explored
        id += 1
        explored[id] = (current_node.lasertank.player_x,current_node.lasertank.player_y,current_node.lasertank.player_heading,current_node.lasertank.grid_data)
        #map_explored[id] = (current_node.lasertank.grid_data)

        # serach for children
        for move in actionset:

            node_copy = current_node.deep_copy()

            status = node_copy.lasertank.apply_move(move)

            child_path = node_copy.path + move
            child_cost = node_copy.cost + 1

            if status == 0:     # SUCCESS

                # check node if existed in explored
                if (node_copy.lasertank.player_x,node_copy.lasertank.player_y,node_copy.lasertank.player_heading,node_copy.lasertank.grid_data) in explored.values():
                    continue
                else:
                    # add in queue
                    """if (node_copy.lasertank.grid_data) not in map_explored.values():
                        child_cost -= 1"""
                    node_copy.path = child_path
                    node_copy.cost = child_cost
                    total_cost = node_copy.cost + node_copy.estimate_cost(goal_x,goal_y)
                    node_copy.total_cost = total_cost
                    heapq.heappush(actions,node_copy)

                    # check the frontier queue
                    #print(node_copy.path)







    # Write the solution to the output file
    write_output_file(output_file, actions)
Example #9
0
def main(arglist):
    """
    Visualise the policy your code produces for the given map file.
    :param arglist: [map_file_name, mode]
    """

    if len(arglist) != 1:
        print(
            "Running this file visualises the path your code produces for the given map file. "
            "Set mode to be 'value', 'policy' or 'episode'. MCTS can only be used in 'episode' mode."
        )
        print("Usage: policy_visualiser.py [map_file_name] [mode]")
        return

    input_file = arglist[0]
    game_map = LaserTankMap.process_input_file(input_file)
    simulator = game_map.make_clone()
    solver = Solver()

    mark = 0

    # do offline computation
    if game_map.method == 'q-learning':
        if not WINDOWS and not DEBUG_MODE:
            signal.signal(signal.SIGALRM, timeout_handler)
            signal.alarm(game_map.time_limit + 1)
        try:
            solver.train_q_learning(simulator)
        except TimeOutException:
            print("/!\\ Ran overtime during train_q_learning( )")
            sys.exit(OVERTIME)
        except:
            traceback.print_exc()
            print("/!\\ Crash occurred during train_q_learning( )")
            sys.exit(CRASH)
        if not WINDOWS and not DEBUG_MODE:
            signal.alarm(0)
    elif game_map.method == 'sarsa':
        if not WINDOWS and not DEBUG_MODE:
            signal.signal(signal.SIGALRM, timeout_handler)
            signal.alarm(game_map.time_limit + 1)
        try:
            solver.train_sarsa(simulator)
        except TimeOutException:
            print("/!\\ Ran overtime during train_sarsa( )")
            sys.exit(OVERTIME)
        except:
            traceback.print_exc()
            print("/!\\ Crash occurred during train_sarsa( )")
            sys.exit(CRASH)
        if not WINDOWS and not DEBUG_MODE:
            signal.alarm(0)

    # simulate an episode (using de-randomised transitions) and compare total reward to benchmark
    total_reward = 0
    max_steps = 60
    state = game_map.make_clone()
    for i in range(max_steps):
        if not WINDOWS and not DEBUG_MODE:
            signal.signal(signal.SIGALRM, timeout_handler)
            signal.alarm(1)
        try:
            action = solver.get_policy(state)
        except TimeOutException:
            print("/!\\ Ran overtime during get_policy( )")
            sys.exit(mark)
        except:
            traceback.print_exc()
            print("/!\\ get_policy( ) caused crash during evaluation")
            sys.exit(mark)
        if not WINDOWS and not DEBUG_MODE:
            signal.alarm(0)
        r, f = state.apply_move(action)
        state.render()
        total_reward += r
        if f:
            break

        time.sleep(1)

    # compute score based on how close episode reward is to optimum
    print(
        f"Avg Episode Reward = {str(total_reward)}, Benchmark = {str(game_map.benchmark)}"
    )
    diff = game_map.benchmark - total_reward  # amount by which benchmark score is better
    if diff < 0:
        diff = 0
    if diff > 20:
        diff = 20
    below = math.ceil(diff / 2)
    mark = 10 - below

    if below == 0:
        print("Testcase passed, policy matches or exceeds benchmark")
    elif mark > 0:
        print(
            f"Testcase passed, {below} marks below solution quality benchmark")
    sys.exit(mark)
def main(arglist):
    """
    Test whether the given output file is a valid solution to the given map file.

    This test script uses a 'trapdoor function' approach to comparing your computed values and policy to a reference
    solution without revealing the reference solution - 3 different results are computed based on your values and policy
    and compared to the results computed for the reference solution.

    :param arglist: [map file name]
    """
    if len(arglist) != 1:
        print(
            "Running this file tests whether your code produces an optimal policy for the given map file."
        )
        print("Usage: tester.py [map file name]")
        return

    input_file = arglist[0]
    game_map = LaserTankMap.process_input_file(input_file)
    solver = Solver(game_map)

    mark = 0

    # do offline computation
    if game_map.method == 'vi':
        if not WINDOWS and not DEBUG_MODE:
            signal.signal(signal.SIGALRM, timeout_handler)
            signal.alarm(game_map.time_limit + 1)
        try:
            solver.run_value_iteration()
        except TimeOutException:
            print("/!\\ Ran overtime during run_value_iteration( )")
            sys.exit(OVERTIME)
        except:
            traceback.print_exc()
            print("/!\\ Crash occurred during run_value_iteration( )")
            sys.exit(CRASH)
        if not WINDOWS and not DEBUG_MODE:
            signal.alarm(0)
    elif game_map.method == 'pi':
        if not WINDOWS and not DEBUG_MODE:
            signal.signal(signal.SIGALRM, timeout_handler)
            signal.alarm(game_map.time_limit + 1)
        try:
            solver.run_policy_iteration()
        except TimeOutException:
            print("/!\\ Ran overtime during run_policy_iteration( )")
            sys.exit(OVERTIME)
        except:
            traceback.print_exc()
            print("/!\\ Crash occurred during run_policy_iteration( )")
            sys.exit(CRASH)
        if not WINDOWS and not DEBUG_MODE:
            signal.alarm(0)

    # simulate an episode (using de-randomised transitions) and compare total reward to benchmark
    total_reward = 0
    state = game_map.make_clone()
    seed = game_map.initial_seed
    for i in range(int((game_map.benchmark / game_map.move_cost) * 2)):
        new_seed = seed + 1
        if not WINDOWS and not DEBUG_MODE:
            signal.signal(signal.SIGALRM, timeout_handler)
            if game_map.method == 'mcts':
                signal.alarm(game_map.time_limit + 1)
            else:
                signal.alarm(1)
        try:
            if game_map.method == 'mcts':
                action = solver.get_mcts_policy(state)
            else:
                action = solver.get_offline_policy(state)
        # except TimeOutException:
        #     if game_map.method == 'mcts':
        #         print("/!\\ Ran overtime during get_mcts_policy( )")
        #     else:
        #         print("/!\\ Ran overtime during get_offline_policy( )")
        #     sys.exit(mark)
        except:
            traceback.print_exc()
            if game_map.method == 'mcts':
                print("/!\\ get_mcts_policy( ) caused crash during evaluation")
            else:
                print(
                    "/!\\ get_offline_policy( ) caused crash during evaluation"
                )
            sys.exit(mark)
        r = state.apply_move(action, new_seed)
        total_reward += r
        if r == game_map.goal_reward or r == game_map.game_over_cost:
            break
        seed = new_seed

    # compute score based on how close episode reward is to optimum
    print(
        f"Episode Reward = {str(total_reward)}, Benchmark = {str(game_map.benchmark)}"
    )
    mark = 10
    below = 0
    for i in range(1, 11):
        if total_reward > (game_map.benchmark * (1 + (i / 20))):
            break
        else:
            mark -= 1
            below += 1

    if below == 0:
        print("Testcase passed, policy optimum")
    elif mark > 0:
        print(f"Testcase passed, {below} points below optimum")
    sys.exit(mark)
def main(arglist):
    """
    Visualise the policy your code produces for the given map file.
    :param arglist: [map_file_name, mode]
    """

    if len(arglist) != 1:
        print(
            "Running this file visualises the path your code produces for the given map file. "
        )
        print("Usage: policy_visualiser.py [map_file_name]")
        return

    input_file = arglist[0]
    game_map = LaserTankMap.process_input_file(input_file)
    solver = Solver(game_map)

    mark = 0

    # do offline computation
    if game_map.method == 'vi':
        if not WINDOWS:
            signal.signal(signal.SIGALRM, timeout_handler)
            signal.alarm(game_map.time_limit + 1)
        try:
            solver.run_value_iteration()
        except TimeOutException:
            print("/!\\ Ran overtime during run_value_iteration( )")
            sys.exit(mark)
        except:
            traceback.print_exc()
            print("/!\\ Crash occurred during run_value_iteration( )")
            sys.exit(mark)
        if not WINDOWS:
            signal.alarm(0)
    elif game_map.method == 'pi':
        if not WINDOWS:
            signal.signal(signal.SIGALRM, timeout_handler)
            signal.alarm(game_map.time_limit + 1)
        try:
            solver.run_policy_iteration()
        except TimeOutException:
            print("/!\\ Ran overtime during run_policy_iteration( )")
            sys.exit(mark)
        except:
            traceback.print_exc()
            print("/!\\ Crash occurred during run_policy_iteration( )")
            sys.exit(mark)
        if not WINDOWS:
            signal.alarm(0)

    # simulate an episode (using de-randomised transitions) and compare total reward to benchmark
    total_reward = 0
    state = game_map.make_clone()
    state.render()
    seed = hash(input_file)  # use file name as RNG seed
    for i in range(100):
        new_seed = seed + 1
        if not WINDOWS:
            signal.signal(signal.SIGALRM, timeout_handler)
            if game_map.method == 'mcts':
                signal.alarm(game_map.time_limit + 1)
            else:
                signal.alarm(1)
        try:
            if game_map.method == 'mcts':
                action = solver.get_mcts_policy(state)
            else:
                action = solver.get_offline_policy(state)
        except TimeOutException:
            if game_map.method == 'mcts':
                print("/!\\ Ran overtime during get_mcts_policy( )")
            else:
                print("/!\\ Ran overtime during get_offline_policy( )")
            sys.exit(mark)
        except:
            traceback.print_exc()
            if game_map.method == 'mcts':
                print("/!\\ get_mcts_policy( ) caused crash during evaluation")
            else:
                print(
                    "/!\\ get_offline_policy( ) caused crash during evaluation"
                )
            sys.exit(mark)
        if not WINDOWS and not DEBUG_MODE:
            signal.alarm(0)
        r = state.apply_move(action, new_seed)
        state.render()
        total_reward += r
        if r == game_map.goal_reward or r == game_map.game_over_cost:
            break
        seed = new_seed

        time.sleep(0.5)
def main():
    # input_file = arglist[0]
    # output_file = arglist[1]
    input_file = "testcases/t1_bridgeport.txt"
    # Read the input testcase file
    game_map = LaserTankMap.process_input_file(input_file)

    # show game map
    #game_map.render()
    actions = []

    # get the coordinate of goal
    for i in range(game_map.x_size):
        for j in range(game_map.y_size):
            if game_map.grid_data[j][i] == "F":
                goal_x = i
                goal_y = j

    #==========================================
    #   get the teleport
    pos = 0
    teleportx = {pos: 0}
    teleporty = {pos: 0}
    exist_tele = 0

    for i in range(game_map.x_size):
        for j in range(game_map.y_size):
            if game_map.grid_data[j][i] == "T":
                teleportx[pos] = i
                teleporty[pos] = j
                exist_tele = 1
                pos += 1

    # assignment test variable
    #number_node_create = 0
    #number_node_fringe = 0

    # ===========================================

    # count the start time
    start_time = time.time()

    # 4 actions
    actionset = ["f", "r", "l", "s"]

    # record the start node
    start = Node(lasertank=game_map, cost=0, path="")

    # test the estimate cost
    #estimate_cost = start.estimate_cost(goal_x,goal_y)
    #print(estimate_cost)
    #print(start.lasertank.player_x,start.lasertank.player_y)

    # the set of explored
    id = 0
    explored = {
        id: (start.lasertank.player_x, start.lasertank.player_y,
             start.lasertank.player_heading, start.lasertank.grid_data)
    }
    #map_explored = {id: start.lasertank.grid_data}
    # set the frontier queue
    heapq.heappush(actions, start)

    while len(actions) > 0:
        #heapq.heapify(actions)
        current_node = heapq.heappop(actions)

        # check if arrive the goal
        if current_node.lasertank.is_finished():
            end_time = time.time()
            run_time = end_time - start_time
            print("Find the Solution successfully!")
            print("the map is: ", input_file)

            # ===========================================
            # print("the number of Node generated: ", number_node_create)
            # print("the number of Node in fringe: ",len(actions))
            # print("the number of Node on explored: ", id)

            # ===========================================
            actions = current_node.path
            print("The path is: " + str(actions))
            print("The Steps are: ", len(actions))
            print("The time is: " + str(run_time))
            break

        # add the current node to explored
        id += 1
        explored[id] = (current_node.lasertank.player_x,
                        current_node.lasertank.player_y,
                        current_node.lasertank.player_heading,
                        current_node.lasertank.grid_data)
        #map_explored[id] = (current_node.lasertank.grid_data)

        # serach for children
        for move in actionset:

            node_copy = current_node.deep_copy()

            status = node_copy.lasertank.apply_move(move)

            child_path = node_copy.path + move
            child_cost = node_copy.cost + 1

            if status == 0:  # SUCCESS

                # check node if existed in explored
                if (node_copy.lasertank.player_x, node_copy.lasertank.player_y,
                        node_copy.lasertank.player_heading,
                        node_copy.lasertank.grid_data) in explored.values():
                    continue
                else:
                    # add in queue
                    """if (node_copy.lasertank.grid_data) not in map_explored.values():
                        child_cost -= 1"""
                    node_copy.path = child_path
                    node_copy.cost = child_cost
                    total_cost = node_copy.cost + node_copy.estimate_cost(
                        goal_x, goal_y)
                    if exist_tele == 1:
                        total_cost = min(
                            total_cost, (distance(node_copy.lasertank.player_x,
                                                  node_copy.lasertank.player_y,
                                                  teleportx[0], teleporty[0]) +
                                         distance(teleportx[1], teleporty[1],
                                                  goal_x, goal_y)),
                            (distance(node_copy.lasertank.player_x,
                                      node_copy.lasertank.player_y,
                                      teleportx[1], teleporty[1]) +
                             distance(teleportx[0], teleporty[0], goal_x,
                                      goal_y)))

                    node_copy.total_cost = total_cost
                    heapq.heappush(actions, node_copy)
Example #13
0
        # else:
        #     if (state.player_x, state.player_y - 1) != "#":
        #         return 'f'
        #     else:
        #         return 'r'

        # return self.policy[current_state]
        #

        pass


if __name__ == '__main__':
    input_file = "testcases/vi_t1.txt"
    method = "pi"
    game_map = LaserTankMap.process_input_file(input_file)
    solver = Solver(game_map)
    if method == "vi":
        solver.run_value_iteration()
    elif method == "pi":
        solver.run_policy_iteration()
    # simulate an episode (using de-randomised transitions) and compare total reward to benchmark
    total_reward = 0
    state = game_map.make_clone()
    seed = game_map.initial_seed
    for i in range(int((game_map.benchmark / game_map.move_cost) * 2)):
        new_seed = seed + 1
        action = solver.get_offline_policy(state)
        r = state.apply_move(action, new_seed)
        total_reward += r
        if r == game_map.goal_reward or r == game_map.game_over_cost:
        :param state: a LaserTankMap instance
        :return: pi(s) [an element of LaserTankMap.MOVES]
        """

        #
        # TODO
        # Write code to return the optimal action to be performed at this state based on the stored Q-values.
        #
        # You can assume that either train_q_learning( ) or train_sarsa( ) has been called before this
        # method is called.
        #
        # When this method is called, you are allowed up to 1 second of compute time.
        #

        pass


if __name__ == "__main__":
    solver = Solver()
    map_dir = "testcases/q-learn_t1.txt"
    test_map = LaserTankMap.process_input_file(map_dir)
    simulator = test_map.make_clone()

    mark = 0

    # do offline computation
    if test_map.method == 'q-learning':
        solver.train_q_learning(simulator)

    print("=========== END =============")
Example #15
0
def main(arglist):
    """
    Test whether the given output file is a valid solution to the given map file.

    This test script uses a 'trapdoor function' approach to comparing your computed values and policy to a reference
    solution without revealing the reference solution - 3 different results are computed based on your values and policy
    and compared to the results computed for the reference solution.

    :param arglist: [map file name]
    """
    if len(arglist) != 1:
        print(
            "Running this file tests whether your code produces an approximately optimal policy for the given map "
            "file.")
        print("Usage: tester.py [map file name]")
        return

    input_file = arglist[0]
    game_map = LaserTankMap.process_input_file(input_file)
    simulator = game_map.make_clone()
    solver = Solver()

    mark = 0

    # do offline computation
    if game_map.method == 'q-learning':
        if not WINDOWS and not DEBUG_MODE:
            signal.signal(signal.SIGALRM, timeout_handler)
            signal.alarm(game_map.time_limit + 1)
        try:
            solver.train_q_learning(simulator)
        except TimeOutException:
            print("/!\\ Ran overtime during train_q_learning( )")
            sys.exit(OVERTIME)
        except:
            traceback.print_exc()
            print("/!\\ Crash occurred during train_q_learning( )")
            sys.exit(CRASH)
        if not WINDOWS and not DEBUG_MODE:
            signal.alarm(0)
    elif game_map.method == 'sarsa':
        if not WINDOWS and not DEBUG_MODE:
            signal.signal(signal.SIGALRM, timeout_handler)
            signal.alarm(game_map.time_limit + 1)
        try:
            solver.train_sarsa(simulator)
        except TimeOutException:
            print("/!\\ Ran overtime during train_sarsa( )")
            sys.exit(OVERTIME)
        except:
            traceback.print_exc()
            print("/!\\ Crash occurred during train_sarsa( )")
            sys.exit(CRASH)
        if not WINDOWS and not DEBUG_MODE:
            signal.alarm(0)

    # simulate an episode (using de-randomised transitions) and compare total reward to benchmark
    total_reward = 0
    num_trials = 50
    max_steps = 60
    for _ in range(num_trials):
        state = game_map.make_clone()
        for i in range(max_steps):
            if not WINDOWS and not DEBUG_MODE:
                signal.signal(signal.SIGALRM, timeout_handler)
                signal.alarm(1)
            try:
                action = solver.get_policy(state)
            except TimeOutException:
                print("/!\\ Ran overtime during get_policy( )")
                sys.exit(mark)
            except:
                traceback.print_exc()
                print("/!\\ get_policy( ) caused crash during evaluation")
                sys.exit(mark)
            if not WINDOWS and not DEBUG_MODE:
                signal.alarm(0)
            r, f = state.apply_move(action)
            total_reward += r
            if f:
                break
    total_reward /= num_trials

    # compute score based on how close episode reward is to optimum
    print(
        f"Avg Episode Reward = {str(total_reward)}, Benchmark = {str(game_map.benchmark)}"
    )
    diff = game_map.benchmark - total_reward  # amount by which benchmark score is better
    if diff < 0:
        diff = 0
    if diff > 20:
        diff = 20
    below = math.ceil(diff / 2)
    mark = 10 - below

    if below == 0:
        print("Testcase passed, policy matches or exceeds benchmark")
    elif mark > 0:
        print(
            f"Testcase passed, {below} marks below solution quality benchmark")
    sys.exit(mark)
def main():
    # input_file = arglist[0]
    # output_file = arglist[1]
    input_file = "testcases/t2_shortcut.txt"
    # Read the input testcase file
    game_map = LaserTankMap.process_input_file(input_file)

    # show game map
    #game_map.render()
    actions = []

    # get the coordinate of goal
    """for i in range(game_map.x_size):
        for j in range(game_map.y_size):
            if game_map.grid_data[j][i] == "F":
                goal_x = i
                goal_y = j"""

    # assignment test variable
    #number_node_create = 0

    # count the start time
    start_time = time.time()

    # 4 actions
    actionset = ["s", "f", "r", "l"]

    # record the start node
    start = Node(lasertank=game_map, cost=0, path="")

    # test the estimate cost
    #estimate_cost = start.estimate_cost(goal_x,goal_y)
    #print(estimate_cost)
    #print(start.lasertank.player_x,start.lasertank.player_y)

    # the set of explored
    id = 0
    explored = {
        id: (start.lasertank.player_x, start.lasertank.player_y,
             start.lasertank.player_heading, start.lasertank.grid_data)
    }

    # set the frontier queue
    heapq.heappush(actions, start)

    while len(actions) > 0:
        #heapq.heapify(actions)
        current_node = heapq.heappop(actions)

        # check if arrive the goal
        if current_node.lasertank.is_finished():
            end_time = time.time()
            run_time = end_time - start_time
            print("Find the Solution successfully!")
            print("the map is: ", input_file)
            #print("the number of Node generated: ", number_node_create)
            #print("the number of Node in fringe: ", len(actions))
            print("the number of Node on explored: ", id)
            actions = current_node.path
            print("The path is: " + str(actions))
            print("The number of steps is: ", len(actions))
            print("The run time is: " + str(run_time))
            exit()

        # add the current node to explored
        id += 1
        explored[id] = (current_node.lasertank.player_x,
                        current_node.lasertank.player_y,
                        current_node.lasertank.player_heading,
                        current_node.lasertank.grid_data)

        # serach for children
        for move in actionset:

            node_copy = current_node.deep_copy()

            status = node_copy.lasertank.apply_move(move)

            child_path = node_copy.path + move
            child_cost = node_copy.cost + 1

            if status == 0:  # SUCCESS

                # check node if existed in explored
                if (node_copy.lasertank.player_x, node_copy.lasertank.player_y,
                        node_copy.lasertank.player_heading,
                        node_copy.lasertank.grid_data) in explored.values():
                    continue
                else:
                    # add in queue
                    node_copy.path = child_path
                    node_copy.cost = child_cost
                    #total_cost = node_copy.cost + node_copy.estimate_cost(goal_x,goal_y)
                    #node_copy.total_cost = total_cost
                    heapq.heappush(actions, node_copy)
Example #17
0
def main(arglist):
    """
    Test whether the given output file is a valid solution to the given map file.

    This test script uses a 'trapdoor function' approach to comparing your computed values and policy to a reference
    solution without revealing the reference solution - 3 different results are computed based on your values and policy
    and compared to the results computed for the reference solution.

    :param arglist: [map file name]
    """
    input_file = arglist[0]
    input_file_1 = arglist[1]
    game_map = LaserTankMap.process_input_file(input_file)
    game_map_1 = LaserTankMap.process_input_file(input_file_1)
    simulator = game_map.make_clone()
    simulator_1 = game_map_1.make_clone()
    solver = Solver(0.01)
    solver_1 = Solver(0.01)

    if game_map.method == 'q-learning':
        solver.train_q_learning(simulator)
    total_reward = 0
    num_trials = 50
    max_steps = 60
    for _ in range(num_trials):
        state = game_map.make_clone()
        for i in range(max_steps):
            action = solver.get_policy(state)
            r, f = state.apply_move(action)
            total_reward += r
            if f:
                break
    total_reward /= num_trials

    # compute score based on how close episode reward is to optimum
    print(
        f"Avg Episode Reward = {str(total_reward)}, Benchmark = {str(game_map.benchmark)}"
    )
    diff = game_map.benchmark - total_reward  # amount by which benchmark score is better
    if diff < 0:
        diff = 0
    if diff > 20:
        diff = 20
    below = math.ceil(diff / 2)
    mark = 10 - below

    if below == 0:
        print("Testcase passed, policy matches or exceeds benchmark")
    elif mark > 0:
        print(
            f"Testcase passed, {below} marks below solution quality benchmark")
    Aveg_0 = solver.get_list()

    if game_map_1.method == 'sarsa':
        solver_1.train_sarsa(simulator_1)
    total_reward = 0
    num_trials = 50
    max_steps = 60
    for _ in range(num_trials):
        state = game_map_1.make_clone()
        for i in range(max_steps):
            action = solver_1.get_policy(state)
            r, f = state.apply_move(action)
            total_reward += r
            if f:
                break
    total_reward /= num_trials

    # compute score based on how close episode reward is to optimum
    print(
        f"Avg Episode Reward = {str(total_reward)}, Benchmark = {str(game_map_1.benchmark)}"
    )
    diff = game_map_1.benchmark - total_reward  # amount by which benchmark score is better
    if diff < 0:
        diff = 0
    if diff > 20:
        diff = 20
    below = math.ceil(diff / 2)
    mark = 10 - below

    if below == 0:
        print("Testcase passed, policy matches or exceeds benchmark")
    elif mark > 0:
        print(
            f"Testcase passed, {below} marks below solution quality benchmark")
    Aveg_1 = solver_1.get_list()

    x = range(len(Aveg_0))
    x_1 = range(len(Aveg_1))

    plt.plot(x, Aveg_0, '--r', label='q_learning')
    plt.plot(x_1, Aveg_1, '-b', label='sarsa')
    plt.xlabel('eqisode')
    plt.ylabel('Average Reward')
    plt.title(
        'learned policy against iteration number under \n Q-learning and SARSA with lr rate 0.01'
    )
    plt.legend()
    plt.savefig('q4.png')
    plt.show()