def _play(self, swap): root = {} idx = 2 if swap else 1 s = self.engine.empty_board() # first 2 moves to populate root dictionary root1 = search.Node(0, 0, s, 2) _, root1 = self.mcts[idx].search(root1) root[idx] = root1 idx = 3 - idx root2 = search.Node(0, 0, root1.state, 1) root2.depth += 1 _, root2 = self.mcts[idx].search(root2) root[idx] = root2 while True: idx = 3 - idx # moving root to next state, based on other player's action previous_action = root[3 - idx].action root[idx] = root[idx].children[previous_action] pi, root[idx] = self.mcts[idx].search(root[idx]) if root[idx].terminal is not None: break winner = root[idx].player if root[idx].terminal == 1 else 0 return winner, root[idx].state
def local_beam_search(problem, k=5, tol=.01): """From the initial k nodes, keep choosing the k neighbors with highest value, stopping when no neighbors are better (or it is withing the tolerance).""" current = [ aima.Node( problem.random_start( [[random.randint(problem.limits[i][0], problem.limits[i][1])] for i in range(len(problem.arms))])) for _ in range(k) ] while True: neighbors = [] for i in current: neighbors.extend(i.expand(problem)) if not neighbors: break neighbor = [] for _ in range(k): newneighbor = aima.argmax_random_tie( neighbors, key=lambda node: problem.value(node.state)) neighbor.append(newneighbor) neighbors.remove(neighbor[-1]) closer = [] for i in neighbor: if problem.value(i.state) > -1 * tol: return i.state for j in current: closer.append(problem.value(j.state) >= problem.value(i.state)) if all(closer): break current = neighbor return aima.argmax_random_tie( current, key=lambda node: problem.value(node.state)).state
def searchGen(game): problem = FoodSearchProblem(game.state) struct = util.Queue() visited = set() struct.push(search.Node(problem.getStartState())) while not struct.isEmpty(): curr = struct.pop() if curr.state in visited: continue visited.add(curr.state) pos, food = curr.state yield convertBack(pos, food, game) for successor, action, cost in problem.getSuccessors(curr.state): struct.push(search.Node(successor, action, curr))
def runExpectiMax(problem, iterations): current = search.Node(problem.initial) while True: neighbors = current.expand(problem) if not neighbors: break maxNeighb = search.argmax_random_tie(neighbors,key=lambda node: problem.value(node.state, iterations)) current = maxNeighb return str(maxNeighb.state) + "-" + str(problem.value(maxNeighb.state, iterations))
def flounder(problem, giveup=10000): 'The worst way to solve a problem' node = search.Node(problem.initial) count = 0 while not problem.goal_test(node.state): count += 1 if count >= giveup: return null children = node.expand(problem) node = random.choice(children) return node
def breadth_first_search(problem): # usiamo due variabili per la visualizzazione del tempo iterations = 0 all_node_colors = [] node_colors = dict(initial_node_colors) node = s.Node(problem.initial) node_colors[node.state] = "red" iterations += 1 all_node_colors.append(dict(node_colors)) if problem.goal_test(node.state): node_colors[node.state] = "green" iterations += 1 all_node_colors.append(dict(node_colors)) return(iterations, all_node_colors, node) frontier = s.FIFOQueue() frontier.append(node) # modifica il colore dei nodi di frontiera in blu node_colors[node.state] = "orange" iterations += 1 all_node_colors.append(dict(node_colors)) explored = set() while frontier: node = frontier.pop() node_colors[node.state] = "red" iterations += 1 all_node_colors.append(dict(node_colors)) explored.add(node.state) for child in node.expand(problem): if child.state not in explored and child not in frontier: if problem.goal_test(child.state): node_colors[child.state] = "green" iterations += 1 all_node_colors.append(dict(node_colors)) return(iterations, all_node_colors, child) frontier.append(child) node_colors[child.state] = "orange" iterations += 1 all_node_colors.append(dict(node_colors)) node_colors[node.state] = "gray" iterations += 1 all_node_colors.append(dict(node_colors)) return None
def bfs(start, end, problem): explored = {} startState = start fringe = util.Queue() root = search.Node(startState, '', 0, None) fringe.push(root) while not fringe.isEmpty(): currNode = fringe.pop() if currNode.state == end: # If the current state is the goal state, return the actions # from root to currNode. return currNode.getTotalCost() if currNode.state not in explored: # If the current position is not in explored, it has not been explored, # so insert it into explored with initial value of False (not explored) explored[currNode.state] = False if explored[currNode.state]: # If currNode's position has already been explored, we don't explore it again continue for thing in [Directions.EAST, Directions.WEST, Directions.NORTH, Directions.SOUTH]: # Exploring currNode's state nextState = None if thing is Directions.EAST: nextState = (currNode.state[0] - 1,currNode.state[1]) elif thing is Directions.WEST: nextState = (currNode.state[0] + 1,currNode.state[1]) elif thing is Directions.NORTH: nextState = (currNode.state[0],currNode.state[1] + 1) else: nextState = (currNode.state[0],currNode.state[1] - 1) x, y = nextState if problem.walls[x][y]: continue temp = search.Node(nextState, thing, 1, currNode) fringe.push(temp) explored[currNode.state] = True # currNode's position has been explored
def newUniformCostSearch(pos, problem): if (problem.isGoalState(pos)): return 0 frontier = util.PriorityQueue() visited = {} initialNode = search.Node(pos, [], 0) frontier.push(initialNode, 0) totalPath = 0 while 1: if (frontier.isEmpty()): return 99999999999999999999 node = frontier.pop() if node.getPos() in visited: continue visited[node.getPos()] = True if problem.isGoalState(node.getPos()): return node.getCost() succ = problem.getSuccessors(node.getPos()) for succNode in succ: newPath = node.getPath()[:] newPath.append(succNode[1]) tempNode = search.Node(succNode[0], newPath, node.getCost() + succNode[2]) tempStack = util.PriorityQueue() insertTemp = True while not frontier.isEmpty(): nPrime = frontier.pop() if nPrime.getPos() == tempNode.getPos(): if tempNode.getCost() < nPrime.getCost(): continue else: insertTemp = False tempStack.push(nPrime, nPrime.getCost()) while not tempStack.isEmpty(): bestNode = tempStack.pop() frontier.push(bestNode, bestNode.getCost()) if insertTemp: frontier.push(tempNode, tempNode.getCost())
def my_best_first_graph_search_for_vis(problem, f): """Search the nodes with the lowest f scores first. You specify the function f(node) that you want to minimize; for example, if f is a heuristic estimate to the goal, then we have greedy best first search; if f is node.depth then we have breadth-first search. There is a subtlety: the line "f = memoize(f, 'f')" means that the f values will be cached on the nodes as they are computed. So after doing a best first search you can examine the f values of the path returned.""" # we use these two variables at the time of visualisations iterations = 0 f = search.memoize(f, 'f') node = search.Node(problem.initial) iterations += 1 if problem.goal_test(node.state): iterations += 1 return (iterations, node) frontier = search.PriorityQueue('min', f) frontier.append(node) iterations += 1 explored = set() while frontier: node = frontier.pop() iterations += 1 if problem.goal_test(node.state): iterations += 1 return (iterations, node) explored.add(node.state) for child in node.expand(problem): if child.state not in explored and child not in frontier: frontier.append(child) iterations += 1 elif child in frontier: incumbent = frontier[child] if f(child) < f(incumbent): del frontier[incumbent] frontier.append(child) iterations += 1 iterations += 1 return None
def simulated_annealing_with_tol(problem, tol=.01): """Modified version of the aimi simulated annealing function that takes a problem and a tolerance""" schedule = problem.sched() current = aima.Node(problem.random_start(problem.initial)) for t in range(sys.maxsize): T = schedule(t) if T == 0 or problem.value(current.state) > -1 * tol: return current.state neighbors = current.expand(problem) if not neighbors: return current.state next = random.choice(neighbors) delta_e = problem.value(next.state) - problem.value(current.state) if delta_e > 0 or aima.probability(np.exp(delta_e / T)): current = next
def hill_climbing_with_tol(problem, tol=.01): """From the initial node, keep choosing the neighbor with highest value, stopping when no neighbor is better. (Slightly modified version of the aima hill_climbing function)""" current = aima.Node(problem.random_start(problem.initial)) while True: neighbors = current.expand(problem) if not neighbors: break neighbor = aima.argmax_random_tie( neighbors, key=lambda node: problem.value(node.state)) if problem.value(neighbor.state) <= problem.value( current.state) or problem.value(current.state) > -1 * tol: break current = neighbor return current.state
def iterative_deepening_astar(problem, h, main_limit=100): h = search.memoize(h or problem.h) # Base A* heuristic - adding path cost to passed-in heuristic. def heuristic(n): return n.path_cost + h(n) # Find the initial state Node, and create the initial bound heuristic. initial = search.Node(problem.initial) bound = heuristic(initial) # Algorithm based off psuedocode from # https://en.wikipedia.org/wiki/Iterative_deepening_A*#Pseudocode def recursive_search(node, current_cost, limit): # Ensure the current heuristic hasn't gone over the limit. node_heuristic = heuristic(node) if current_cost + node_heuristic > limit: return current_cost + node_heuristic # Check if the search has found a goal state. if problem.goal_test(node.state): return node value = main_limit # Large value at the start. # Recursively search over all child nodes. for child in node.expand(problem): inner_result = recursive_search( child, current_cost + problem.value(node.state), limit) if type(inner_result) is search.Node: return inner_result elif type(inner_result) is int and inner_result < value: value = inner_result return value while True: result = recursive_search(initial, 0, bound) if type(result) is search.Node: return result elif type(result) is int: if result == main_limit: return None bound = result else: print(str(type(result)) + " is an unhandled type")
def self_play(self, tau): s = self.engine.empty_board() root = search.Node( 0, 0, s, 2 ) # first move is the child node of root, so root belongs to 2nd player game_steps = [] while True: pi, best_node = self.mcts.search(root, tau) game_steps.append( [best_node.state, pi, best_node.Q, best_node.player]) if best_node.terminal is not None: break root = best_node winner = best_node.player if best_node.terminal == 1 else 0 self._update_match(winner, game_steps) return game_steps
def breadth_first_search(problem): """[Figure 3.11] Note that this function can be implemented in a single line as below: return graph_search(problem, FIFOQueue()) """ node = search.Node(problem.initial) if problem.goal_test(node.state): return node frontier = search.FIFOQueue() frontier.append(node) explored = set() while frontier: node = frontier.pop() explored.add(tuple(node.state)) for child in node.expand(problem): if child.state not in tuple(explored) and child not in frontier: if problem.goal_test(child.state): return child frontier.append(child) return None
def extend(self, items): self.A.extend(items) self.A.sort(key=lambda x: (x.path_cost + self.problem.h(search.Node(x.state))), reverse=False)
def getCostOfActions(self, actions): cost = 0 previous_action = None for action in actions: if action == "B": cost += 1 elif action == "C": cost += 1 elif action == "G": if previous_action is None: cost += 10 else: cost += 1 previous_action = action return cost """ problem = MyProblem() solution = search.aStarSearch(problem) print solution """ node1 = search.Node("A") node2 = search.Node("A") print "node1 == node2 ?" print node1 == node2
def search_bb_dig_plan(mine): ''' Compute, using Branch and Bound, the most profitable sequence of digging actions from the initial state of the mine. Parameters ---------- mine : Mine An instance of a Mine problem. Returns ------- best_payoff, best_action_list, best_final_state ''' assert isinstance(mine, Mine) @functools.lru_cache(maxsize=None) def optimistic_payoff(state): ''' Returns the best potential payoff for a node state, ignoring slope constraint. Parameters ---------- state : represented with nested lists, tuples or a ndarray state of the partially dug mine Returns ------- Returns the float value of the most optimistic payoff possible from the state. ''' ### Ideally need to redo this with array indexing if possible, future consideration. ### # Adjust state to make it an array of z indexes of last mined block in the column index_adjust = 1 state = np.array( state ) - index_adjust # Safety check, ensure state is formatted as np array. max_cumsum = np.empty(0) # empty array to hold best values in cumsum if mine.three_dim: # 3D Case for x, state_row in enumerate(state): for y, z in enumerate(state_row): if z > -index_adjust: max_cumsum = np.append( max_cumsum, np.amax(mine.cumsum_mine[x, y, z:])) else: # case where there is the option to not dig at all (0 payoff is an option) max_cumsum = np.append( max_cumsum, max(np.amax(mine.cumsum_mine[x, y]), 0)) else: # 2D Case, same but without y axis for x, z in enumerate(state): if z > -index_adjust: max_cumsum = np.append(max_cumsum, np.amax(mine.cumsum_mine[x, z:])) else: max_cumsum = np.append( max_cumsum, max(np.amax(mine.cumsum_mine[x]), 0)) return np.sum(max_cumsum) print(optimistic_payoff.cache_info()) # Cache Info node = search.Node(convert_to_tuple(mine.initial)) opt_pay = lambda x: optimistic_payoff(convert_to_tuple( x.state)) # f for Priority Queue will be the optimistic payoff # frontier = search.PriorityQueue('max',opt_pay) # Use prio queue to explore best optimistic nodes first frontier = search.FIFOQueue( ) # FIFO is faster, although likely due to optimistic_payoff(s) being slow frontier.append(node) # append first node # Initialise values for best node best_node = node best_payoff = mine.payoff(best_node.state) best_action_list = [] best_final_state = best_node.state while frontier: node = frontier.pop() node_payoff = mine.payoff(node.state) # Store best node found if node_payoff >= best_payoff: best_node = node best_payoff = node_payoff for child in node.expand(mine): # check that child has not been added to frontier and optimistic payoff is not worse than current payoff if child not in frontier and opt_pay(child) > best_payoff: frontier.append(child) best_action_list = best_node.solution() best_final_state = convert_to_tuple(best_node.state) print(optimistic_payoff.cache_info()) # Cache Info return best_payoff, best_action_list, best_final_state