def greedy_best_first(board, heuristic): """ an implementation of the greedy best first search algorithm. it uses a heuristic function to find the quickest way to the destination :param board: (Board) the board you start at :param heuristic: (function) the heuristic function :return: (list) path to solution, (int) number of explored boards """ frontier = PriorityQueue() node = Node(board) frontier.add(node, heuristic(node.data)) explored = [] while frontier.has_next(): node = frontier.pop() if node.data.is_solved(): return node.path(), len(explored) + 1 for move in node.data.legal_moves(): child = Node(node.data.forecast(move), node) if (not frontier.has(child)) and (child.data not in explored): frontier.add(child, heuristic(child.data)) explored.append(node.data) return None, len(explored)
class OperationQueue(object): # TODO: chunking/batching should probably happen here # with the assistance of another queue for prioritized params # (i.e., don't create subops so eagerly) def __init__(self, qid, op_type, default_limit=ALL): self.qid = qid options, unwrapped = get_unwrapped_options(op_type) self.op_type = op_type self.unwrapped_type = unwrapped self.options = options self.unique_key = options.get('unique_key', 'unique_key') self.unique_func = get_unique_func(self.unique_key) self.priority = options.get('priority', 0) self.priority_func = get_priority_func(self.priority) self.default_limit = default_limit self.param_set = set() self.op_queue = PriorityQueue() self._dup_params = [] def enqueue(self, param, **kw): unique_key = self.unique_func(param) if unique_key in self.param_set: self._dup_params.append(unique_key) return priority = self.priority_func(param) kwargs = {'limit': self.default_limit} kwargs.update(kw) new_subop = self.op_type(param, **kwargs) new_subop._origin_queue = self.qid self.op_queue.add(new_subop, priority) self.param_set.add(unique_key) def enqueue_many(self, param_list, **kw): for param in param_list: self.enqueue(param, **kw) return def __len__(self): return len(self.op_queue) def peek(self, *a, **kw): return self.op_queue.peek(*a, **kw) def pop(self, *a, **kw): return self.op_queue.pop(*a, **kw)
def a_star(board, heuristic): """ A*算法主题 :param board: 要解决的游戏 :param heuristic: 选择的启发函数 :return: 返回的解的路径 """ frontier = PriorityQueue() node = Node(board) frontier.add(node, heuristic(node.data) + len(node.path()) - 1) explored = [] while frontier.has_next(): node = frontier.pop() if node.data.is_solved(): return node.path() for move in node.data.legal_moves(): child = Node(node.data.forecast(move), node) if (not frontier.has(child)) and (child.data not in explored): frontier.add(child, heuristic(child.data) + len(child.path()) - 1) elif frontier.has(child): child_value = heuristic(child.data) + len(child.path()) - 1 if child_value < frontier.get_value(child): frontier.remove(child) frontier.add(child, child_value) explored.append(node.data) return None
def a_star(board, heuristic): """ solves the board using the A* approach accompanied by the heuristic function :param board: board to solve :param heuristic: heuristic function :return: path to solution, and number of explored nodes """ frontier = PriorityQueue() node = Node(board) frontier.add(node, heuristic(node.data) + len(node.path()) - 1) explored = [] while frontier.has_next(): node = frontier.pop() # check if solved if node.data.is_solved(): return node.path(), len(explored) + 1 # add children to frontier for move in node.data.legal_moves(): child = Node(node.data.forecast(move), node) # child must not have already been explored if (not frontier.has(child)) and (child.data not in explored): frontier.add(child, heuristic(child.data) + len(child.path()) - 1) # if the child is already in the frontier, it can be added only if it's better elif frontier.has(child): child_value = heuristic(child.data) + len(child.path()) - 1 if child_value < frontier.get_value(child): frontier.remove(child) frontier.add(child, child_value) explored.append(node.data) return None, len(explored)
class Balerion(object): """ Once the largest dragon in westeros, also the name of this simple python web crawler :-) . """ def __init__(self, link, allow_external, allow_redirects, max_limit = 10): self.root = link self.unparsed_urls = PriorityQueue() self.allow_external = allow_external self.allow_redirects = allow_redirects self.domain = None self.max_limit = max_limit self.opener = None self.create_opener() def pre_process(self): """ exit the function if seed url is not a valid url """ if(self.allowed_for_processing(self.root)): self.unparsed_urls.add(self.root, priority = 0) parsed_url = urlparse.urlparse(self.root) self.domain = parsed_url.netloc else: LOGGER.warning("Non followable root: %s " % self.root) exit() def process_page(self, response) : """ override this method to do any kind of processing on the page. """ pass def create_opener(self): """ creates http-link opener based on options choosen """ self.opener = urllib2.build_opener() if not self.allow_redirects: self.opener = urllib2.build_opener(BalerionRedirectHandler) @classmethod def allowed_for_processing(cls, next_url): """ placeholder """ parsed_url = urlparse.urlparse(next_url) if(parsed_url.scheme != 'http'): LOGGER.warning("Non followable URl: %s " % next_url) return False ROBOT.set_url(parsed_url.scheme + parsed_url.netloc + "/robots.txt") if not ROBOT.can_fetch('Balerion', next_url.encode('ascii', 'replace')): LOGGER.warning("Url disallowed by robots.txt: %s " % next_url) return False return True def process_page_links(self, raw_html, url): """ simply extracts html links using awesome beautifulsoup """ beautiful_html = BeautifulSoup(raw_html) links = [a.get('href') for a in beautiful_html.find_all('a')] links = [link for link in links if link is not None] for link in links: link_info = urlparse.urlparse(link) if not link_info.scheme and not link_info.netloc: link = urlparse.urljoin(url, link) link_info = urlparse.urlparse(link) if('http' not in link_info.scheme) : continue if self.domain not in link_info.netloc: if not self.allow_external : continue # throwing out external link else: priority = 2 # insert external link with low priority else: priority = 1 self.unparsed_urls.add(link, priority) def fetch_url(self, url): """ fetches url and returns an object represenation which store headers and status etc. """ page = AttrDict() try: # getting response from given URL resp = self.opener.open(unicode(url)) page = AttrDict({ 'body': resp.read(), 'url': resp.geturl(), 'headers': AttrDict(dict(resp.headers.items())), 'status': resp.getcode() }) except urllib2.HTTPError, err : if err.code == 404: page = AttrDict({'status': 404}) LOGGER.exception("page not found : %s at %s" % (err.code, url)) elif err.code == 403: page = AttrDict({'status': 403}) LOGGER.error("access denied : %s at %s " % (err.code, url)) else: page = AttrDict({'status': 500}) #choosing 500 as default bad access code LOGGER.error("something bad happened : %s at %s " % (err.code, url)) except urllib2.URLError, err: page = AttrDict({'status': 500}) LOGGER.error("server error %s at %s " % (err.reason, url))