Ejemplo n.º 1
0
    def _play(self, swap):
        root = {}
        idx = 2 if swap else 1
        s = self.engine.empty_board()

        # first 2 moves to populate root dictionary
        root1 = search.Node(0, 0, s, 2)
        _, root1 = self.mcts[idx].search(root1)
        root[idx] = root1
        idx = 3 - idx

        root2 = search.Node(0, 0, root1.state, 1)
        root2.depth += 1
        _, root2 = self.mcts[idx].search(root2)
        root[idx] = root2

        while True:
            idx = 3 - idx

            # moving root to next state, based on other player's action
            previous_action = root[3 - idx].action
            root[idx] = root[idx].children[previous_action]
            pi, root[idx] = self.mcts[idx].search(root[idx])

            if root[idx].terminal is not None:
                break

        winner = root[idx].player if root[idx].terminal == 1 else 0
        return winner, root[idx].state
Ejemplo n.º 2
0
def local_beam_search(problem, k=5, tol=.01):
    """From the initial k nodes, keep choosing the k neighbors with highest value,
    stopping when no neighbors are better (or it is withing the tolerance)."""
    current = [
        aima.Node(
            problem.random_start(
                [[random.randint(problem.limits[i][0], problem.limits[i][1])]
                 for i in range(len(problem.arms))])) for _ in range(k)
    ]
    while True:
        neighbors = []
        for i in current:
            neighbors.extend(i.expand(problem))
        if not neighbors:
            break
        neighbor = []
        for _ in range(k):
            newneighbor = aima.argmax_random_tie(
                neighbors, key=lambda node: problem.value(node.state))
            neighbor.append(newneighbor)
            neighbors.remove(neighbor[-1])
        closer = []
        for i in neighbor:
            if problem.value(i.state) > -1 * tol:
                return i.state
            for j in current:
                closer.append(problem.value(j.state) >= problem.value(i.state))

        if all(closer):
            break
        current = neighbor
    return aima.argmax_random_tie(
        current, key=lambda node: problem.value(node.state)).state
Ejemplo n.º 3
0
def searchGen(game):
    problem = FoodSearchProblem(game.state)
    struct = util.Queue()
    visited = set()

    struct.push(search.Node(problem.getStartState()))
    while not struct.isEmpty():
        curr = struct.pop()

        if curr.state in visited:
            continue
        visited.add(curr.state)
        pos, food = curr.state
        yield convertBack(pos, food, game)

        for successor, action, cost in problem.getSuccessors(curr.state):
            struct.push(search.Node(successor, action, curr))
Ejemplo n.º 4
0
def runExpectiMax(problem, iterations):
    current = search.Node(problem.initial)
    while True:
        neighbors = current.expand(problem)
        if not neighbors:
            break
        maxNeighb = search.argmax_random_tie(neighbors,key=lambda node: problem.value(node.state, iterations))
        current = maxNeighb
        return str(maxNeighb.state) + "-" + str(problem.value(maxNeighb.state, iterations))
Ejemplo n.º 5
0
def flounder(problem, giveup=10000):
    'The worst way to solve a problem'
    node = search.Node(problem.initial)
    count = 0
    while not problem.goal_test(node.state):
        count += 1
        if count >= giveup:
            return null
        children = node.expand(problem)
        node = random.choice(children)
    return node
def breadth_first_search(problem):   
    # usiamo due variabili per la visualizzazione del tempo
    iterations = 0
    all_node_colors = []
    node_colors = dict(initial_node_colors)
    
    node = s.Node(problem.initial)
    
    node_colors[node.state] = "red"
    iterations += 1
    all_node_colors.append(dict(node_colors))
      
    if problem.goal_test(node.state):
        node_colors[node.state] = "green"
        iterations += 1
        all_node_colors.append(dict(node_colors))
        return(iterations, all_node_colors, node)
    
    frontier = s.FIFOQueue()
    frontier.append(node)
    
    # modifica il colore dei nodi di frontiera in blu
    node_colors[node.state] = "orange"
    iterations += 1
    all_node_colors.append(dict(node_colors))
        
    explored = set()
    while frontier:
        node = frontier.pop()
        node_colors[node.state] = "red"
        iterations += 1
        all_node_colors.append(dict(node_colors))
        
        explored.add(node.state)     
        
        for child in node.expand(problem):
            if child.state not in explored and child not in frontier:
                if problem.goal_test(child.state):
                    node_colors[child.state] = "green"
                    iterations += 1
                    all_node_colors.append(dict(node_colors))
                    return(iterations, all_node_colors, child)
                frontier.append(child)

                node_colors[child.state] = "orange"
                
                iterations += 1
                all_node_colors.append(dict(node_colors))
                    
        node_colors[node.state] = "gray"
        iterations += 1
        all_node_colors.append(dict(node_colors))
    return None
Ejemplo n.º 7
0
def bfs(start, end, problem):
    explored = {}
    startState = start

    fringe = util.Queue()
    root = search.Node(startState, '', 0, None)
    fringe.push(root)

    while not fringe.isEmpty():
        currNode = fringe.pop()
        if currNode.state == end:
            # If the current state is the goal state, return the actions
            # from root to currNode.
            return currNode.getTotalCost()
        if currNode.state not in explored:
            # If the current position is not in explored, it has not been explored,
            # so insert it into explored with initial value of False (not explored)
            explored[currNode.state] = False
        if explored[currNode.state]:
            # If currNode's position has already been explored, we don't explore it again
            continue
        for thing in [Directions.EAST, Directions.WEST, Directions.NORTH, Directions.SOUTH]:
            # Exploring currNode's state
            nextState = None
            if thing is Directions.EAST:
                nextState = (currNode.state[0] - 1,currNode.state[1])
            elif thing is Directions.WEST:
                nextState = (currNode.state[0] + 1,currNode.state[1])
            elif thing is Directions.NORTH:
                nextState = (currNode.state[0],currNode.state[1] + 1)
            else:
                nextState = (currNode.state[0],currNode.state[1] - 1)
            x, y = nextState
            if problem.walls[x][y]:
                continue
            temp = search.Node(nextState, thing, 1, currNode)
            fringe.push(temp)
        explored[currNode.state] = True # currNode's position has been explored
Ejemplo n.º 8
0
def newUniformCostSearch(pos, problem):
    if (problem.isGoalState(pos)):
        return 0
    frontier = util.PriorityQueue()
    visited = {}
    initialNode = search.Node(pos, [], 0)
    frontier.push(initialNode, 0)
    totalPath = 0
    while 1:
        if (frontier.isEmpty()):
            return 99999999999999999999
        node = frontier.pop()
        if node.getPos() in visited:
            continue
        visited[node.getPos()] = True
        if problem.isGoalState(node.getPos()):
            return node.getCost()
        succ = problem.getSuccessors(node.getPos())
        for succNode in succ:
            newPath = node.getPath()[:]
            newPath.append(succNode[1])
            tempNode = search.Node(succNode[0], newPath,
                                   node.getCost() + succNode[2])
            tempStack = util.PriorityQueue()
            insertTemp = True
            while not frontier.isEmpty():
                nPrime = frontier.pop()
                if nPrime.getPos() == tempNode.getPos():
                    if tempNode.getCost() < nPrime.getCost():
                        continue
                    else:
                        insertTemp = False
                tempStack.push(nPrime, nPrime.getCost())
            while not tempStack.isEmpty():
                bestNode = tempStack.pop()
                frontier.push(bestNode, bestNode.getCost())
            if insertTemp:
                frontier.push(tempNode, tempNode.getCost())
Ejemplo n.º 9
0
def my_best_first_graph_search_for_vis(problem, f):
    """Search the nodes with the lowest f scores first.
    You specify the function f(node) that you want to minimize; for example,
    if f is a heuristic estimate to the goal, then we have greedy best
    first search; if f is node.depth then we have breadth-first search.
    There is a subtlety: the line "f = memoize(f, 'f')" means that the f
    values will be cached on the nodes as they are computed. So after doing
    a best first search you can examine the f values of the path returned."""

    # we use these two variables at the time of visualisations
    iterations = 0

    f = search.memoize(f, 'f')
    node = search.Node(problem.initial)

    iterations += 1

    if problem.goal_test(node.state):
        iterations += 1
        return (iterations, node)

    frontier = search.PriorityQueue('min', f)
    frontier.append(node)

    iterations += 1

    explored = set()
    while frontier:
        node = frontier.pop()

        iterations += 1
        if problem.goal_test(node.state):
            iterations += 1
            return (iterations, node)

        explored.add(node.state)
        for child in node.expand(problem):
            if child.state not in explored and child not in frontier:
                frontier.append(child)
                iterations += 1
            elif child in frontier:
                incumbent = frontier[child]
                if f(child) < f(incumbent):
                    del frontier[incumbent]
                    frontier.append(child)
                    iterations += 1

        iterations += 1
    return None
Ejemplo n.º 10
0
def simulated_annealing_with_tol(problem, tol=.01):
    """Modified version of the aimi simulated annealing function that takes
    a problem and a tolerance"""
    schedule = problem.sched()
    current = aima.Node(problem.random_start(problem.initial))
    for t in range(sys.maxsize):
        T = schedule(t)
        if T == 0 or problem.value(current.state) > -1 * tol:
            return current.state
        neighbors = current.expand(problem)
        if not neighbors:
            return current.state
        next = random.choice(neighbors)
        delta_e = problem.value(next.state) - problem.value(current.state)
        if delta_e > 0 or aima.probability(np.exp(delta_e / T)):
            current = next
Ejemplo n.º 11
0
def hill_climbing_with_tol(problem, tol=.01):
    """From the initial node, keep choosing the neighbor with highest value,
    stopping when no neighbor is better. (Slightly modified version of the
    aima hill_climbing function)"""
    current = aima.Node(problem.random_start(problem.initial))
    while True:
        neighbors = current.expand(problem)
        if not neighbors:
            break
        neighbor = aima.argmax_random_tie(
            neighbors, key=lambda node: problem.value(node.state))
        if problem.value(neighbor.state) <= problem.value(
                current.state) or problem.value(current.state) > -1 * tol:
            break
        current = neighbor
    return current.state
def iterative_deepening_astar(problem, h, main_limit=100):
    h = search.memoize(h or problem.h)

    # Base A* heuristic - adding path cost to passed-in heuristic.
    def heuristic(n):
        return n.path_cost + h(n)

    # Find the initial state Node, and create the initial bound heuristic.
    initial = search.Node(problem.initial)
    bound = heuristic(initial)

    # Algorithm based off psuedocode from
    # https://en.wikipedia.org/wiki/Iterative_deepening_A*#Pseudocode
    def recursive_search(node, current_cost, limit):
        # Ensure the current heuristic hasn't gone over the limit.
        node_heuristic = heuristic(node)
        if current_cost + node_heuristic > limit:
            return current_cost + node_heuristic
        # Check if the search has found a goal state.
        if problem.goal_test(node.state):
            return node
        value = main_limit  # Large value at the start.
        # Recursively search over all child nodes.
        for child in node.expand(problem):
            inner_result = recursive_search(
                child, current_cost + problem.value(node.state), limit)
            if type(inner_result) is search.Node:
                return inner_result
            elif type(inner_result) is int and inner_result < value:
                value = inner_result
        return value

    while True:
        result = recursive_search(initial, 0, bound)
        if type(result) is search.Node:
            return result
        elif type(result) is int:
            if result == main_limit:
                return None
            bound = result
        else:
            print(str(type(result)) + " is an unhandled type")
Ejemplo n.º 13
0
    def self_play(self, tau):
        s = self.engine.empty_board()
        root = search.Node(
            0, 0, s, 2
        )  # first move is the child node of root, so root belongs to 2nd player
        game_steps = []

        while True:
            pi, best_node = self.mcts.search(root, tau)
            game_steps.append(
                [best_node.state, pi, best_node.Q, best_node.player])

            if best_node.terminal is not None:
                break

            root = best_node

        winner = best_node.player if best_node.terminal == 1 else 0
        self._update_match(winner, game_steps)

        return game_steps
Ejemplo n.º 14
0
def breadth_first_search(problem):
    """[Figure 3.11]
	Note that this function can be implemented in a 
	single line as below:
	return graph_search(problem, FIFOQueue())
    """
    node = search.Node(problem.initial)
    if problem.goal_test(node.state):
        return node
    frontier = search.FIFOQueue()
    frontier.append(node)
    explored = set()
    while frontier:
        node = frontier.pop()
        explored.add(tuple(node.state))
        for child in node.expand(problem):
            if child.state not in tuple(explored) and child not in frontier:
                if problem.goal_test(child.state):
                    return child
                frontier.append(child)
    return None
Ejemplo n.º 15
0
 def extend(self, items):
     self.A.extend(items)
     self.A.sort(key=lambda x:
                 (x.path_cost + self.problem.h(search.Node(x.state))),
                 reverse=False)
Ejemplo n.º 16
0
    def getCostOfActions(self, actions):
        cost = 0
        previous_action = None
        for action in actions:
            if action == "B":
                cost += 1
            elif action == "C":
                cost += 1
            elif action == "G":
                if previous_action is None:
                    cost += 10
                else:
                    cost += 1
            previous_action = action

        return cost


"""
problem = MyProblem()
solution = search.aStarSearch(problem)
print solution
"""

node1 = search.Node("A")
node2 = search.Node("A")

print "node1 == node2 ?"
print node1 == node2
Ejemplo n.º 17
0
def search_bb_dig_plan(mine):
    '''
    Compute, using Branch and Bound, the most profitable sequence of 
    digging actions from the initial state of the mine.
        

    Parameters
    ----------
    mine : Mine
        An instance of a Mine problem.

    Returns
    -------
    best_payoff, best_action_list, best_final_state

    '''
    assert isinstance(mine, Mine)

    @functools.lru_cache(maxsize=None)
    def optimistic_payoff(state):
        '''
        Returns the best potential payoff for a node state, ignoring slope constraint.
        
        Parameters
        ----------
        state :
            represented with nested lists, tuples or a ndarray
            state of the partially dug mine

        Returns
        -------
        Returns the float value of the most optimistic payoff possible from the state.

        '''
        ### Ideally need to redo this with array indexing if possible, future consideration. ###

        # Adjust state to make it an array of z indexes of last mined block in the column
        index_adjust = 1
        state = np.array(
            state
        ) - index_adjust  # Safety check, ensure state is formatted as np array.
        max_cumsum = np.empty(0)  # empty array to hold best values in cumsum

        if mine.three_dim:  # 3D Case
            for x, state_row in enumerate(state):
                for y, z in enumerate(state_row):
                    if z > -index_adjust:
                        max_cumsum = np.append(
                            max_cumsum, np.amax(mine.cumsum_mine[x, y, z:]))
                    else:  # case where there is the option to not dig at all (0 payoff is an option)
                        max_cumsum = np.append(
                            max_cumsum, max(np.amax(mine.cumsum_mine[x, y]),
                                            0))

        else:  # 2D Case, same but without y axis
            for x, z in enumerate(state):
                if z > -index_adjust:
                    max_cumsum = np.append(max_cumsum,
                                           np.amax(mine.cumsum_mine[x, z:]))
                else:
                    max_cumsum = np.append(
                        max_cumsum, max(np.amax(mine.cumsum_mine[x]), 0))

        return np.sum(max_cumsum)

    print(optimistic_payoff.cache_info())  # Cache Info

    node = search.Node(convert_to_tuple(mine.initial))
    opt_pay = lambda x: optimistic_payoff(convert_to_tuple(
        x.state))  # f for Priority Queue will be the optimistic payoff
    # frontier = search.PriorityQueue('max',opt_pay) # Use prio queue to explore best optimistic nodes first
    frontier = search.FIFOQueue(
    )  # FIFO is faster, although likely due to optimistic_payoff(s) being slow
    frontier.append(node)  # append first node

    # Initialise values for best node
    best_node = node
    best_payoff = mine.payoff(best_node.state)
    best_action_list = []
    best_final_state = best_node.state

    while frontier:
        node = frontier.pop()
        node_payoff = mine.payoff(node.state)

        # Store best node found
        if node_payoff >= best_payoff:
            best_node = node
            best_payoff = node_payoff

        for child in node.expand(mine):
            # check that child has not been added to frontier and optimistic payoff is not worse than current payoff
            if child not in frontier and opt_pay(child) > best_payoff:
                frontier.append(child)

    best_action_list = best_node.solution()
    best_final_state = convert_to_tuple(best_node.state)

    print(optimistic_payoff.cache_info())  # Cache Info

    return best_payoff, best_action_list, best_final_state