Esempio n. 1
0
def greedy_best_first(board, heuristic):
    """
    an implementation of the greedy best first search algorithm. it uses a heuristic function to find the quickest
    way to the destination

    :param board: (Board) the board you start at
    :param heuristic: (function) the heuristic function
    :return: (list) path to solution, (int) number of explored boards
    """

    frontier = PriorityQueue()
    node = Node(board)
    frontier.add(node, heuristic(node.data))

    explored = []
    while frontier.has_next():
        node = frontier.pop()

        if node.data.is_solved():
            return node.path(), len(explored) + 1

        for move in node.data.legal_moves():
            child = Node(node.data.forecast(move), node)
            if (not frontier.has(child)) and (child.data not in explored):
                frontier.add(child, heuristic(child.data))

        explored.append(node.data)

    return None, len(explored)
Esempio n. 2
0
class OperationQueue(object):
    # TODO: chunking/batching should probably happen here
    # with the assistance of another queue for prioritized params
    # (i.e., don't create subops so eagerly)
    def __init__(self, qid, op_type, default_limit=ALL):
        self.qid = qid
        options, unwrapped = get_unwrapped_options(op_type)
        self.op_type = op_type
        self.unwrapped_type = unwrapped
        self.options = options

        self.unique_key = options.get('unique_key', 'unique_key')
        self.unique_func = get_unique_func(self.unique_key)
        self.priority = options.get('priority', 0)
        self.priority_func = get_priority_func(self.priority)
        self.default_limit = default_limit

        self.param_set = set()
        self.op_queue = PriorityQueue()
        self._dup_params = []

    def enqueue(self, param, **kw):
        unique_key = self.unique_func(param)
        if unique_key in self.param_set:
            self._dup_params.append(unique_key)
            return
        priority = self.priority_func(param)
        kwargs = {'limit': self.default_limit}
        kwargs.update(kw)
        new_subop = self.op_type(param, **kwargs)
        new_subop._origin_queue = self.qid
        self.op_queue.add(new_subop, priority)
        self.param_set.add(unique_key)

    def enqueue_many(self, param_list, **kw):
        for param in param_list:
            self.enqueue(param, **kw)
        return

    def __len__(self):
        return len(self.op_queue)

    def peek(self, *a, **kw):
        return self.op_queue.peek(*a, **kw)

    def pop(self, *a, **kw):
        return self.op_queue.pop(*a, **kw)
Esempio n. 3
0
class OperationQueue(object):
    # TODO: chunking/batching should probably happen here
    # with the assistance of another queue for prioritized params
    # (i.e., don't create subops so eagerly)
    def __init__(self, qid, op_type, default_limit=ALL):
        self.qid = qid
        options, unwrapped = get_unwrapped_options(op_type)
        self.op_type = op_type
        self.unwrapped_type = unwrapped
        self.options = options

        self.unique_key = options.get('unique_key', 'unique_key')
        self.unique_func = get_unique_func(self.unique_key)
        self.priority = options.get('priority', 0)
        self.priority_func = get_priority_func(self.priority)
        self.default_limit = default_limit

        self.param_set = set()
        self.op_queue = PriorityQueue()
        self._dup_params = []

    def enqueue(self, param, **kw):
        unique_key = self.unique_func(param)
        if unique_key in self.param_set:
            self._dup_params.append(unique_key)
            return
        priority = self.priority_func(param)
        kwargs = {'limit': self.default_limit}
        kwargs.update(kw)
        new_subop = self.op_type(param, **kwargs)
        new_subop._origin_queue = self.qid
        self.op_queue.add(new_subop, priority)
        self.param_set.add(unique_key)

    def enqueue_many(self, param_list, **kw):
        for param in param_list:
            self.enqueue(param, **kw)
        return

    def __len__(self):
        return len(self.op_queue)

    def peek(self, *a, **kw):
        return self.op_queue.peek(*a, **kw)

    def pop(self, *a, **kw):
        return self.op_queue.pop(*a, **kw)
def a_star(board, heuristic):
    """
    A*算法主题

    :param board: 要解决的游戏
    :param heuristic: 选择的启发函数
    :return: 返回的解的路径
    """

    frontier = PriorityQueue()
    node = Node(board)
    frontier.add(node, heuristic(node.data) + len(node.path()) - 1)

    explored = []

    while frontier.has_next():
        node = frontier.pop()

        if node.data.is_solved():
            return node.path()

        for move in node.data.legal_moves():
            child = Node(node.data.forecast(move), node)
            if (not frontier.has(child)) and (child.data not in explored):
                frontier.add(child,
                             heuristic(child.data) + len(child.path()) - 1)
            elif frontier.has(child):
                child_value = heuristic(child.data) + len(child.path()) - 1
                if child_value < frontier.get_value(child):
                    frontier.remove(child)
                    frontier.add(child, child_value)

        explored.append(node.data)

    return None
Esempio n. 5
0
def a_star(board, heuristic):
    """
    solves the board using the A* approach accompanied by the heuristic function

    :param board: board to solve
    :param heuristic: heuristic function
    :return: path to solution, and number of explored nodes
    """

    frontier = PriorityQueue()
    node = Node(board)
    frontier.add(node, heuristic(node.data) + len(node.path()) - 1)

    explored = []

    while frontier.has_next():
        node = frontier.pop()

        # check if solved
        if node.data.is_solved():
            return node.path(), len(explored) + 1

        # add children to frontier
        for move in node.data.legal_moves():
            child = Node(node.data.forecast(move), node)
            # child must not have already been explored
            if (not frontier.has(child)) and (child.data not in explored):
                frontier.add(child,
                             heuristic(child.data) + len(child.path()) - 1)
            # if the child is already in the frontier, it can be added only if it's better
            elif frontier.has(child):
                child_value = heuristic(child.data) + len(child.path()) - 1
                if child_value < frontier.get_value(child):
                    frontier.remove(child)
                    frontier.add(child, child_value)

        explored.append(node.data)

    return None, len(explored)
Esempio n. 6
0
class Balerion(object):
    """
        Once the largest dragon in westeros, also the name of this simple python web crawler :-) .
    """
    def __init__(self, link, allow_external, allow_redirects, max_limit = 10):
        self.root = link
        self.unparsed_urls = PriorityQueue()
        self.allow_external = allow_external
        self.allow_redirects = allow_redirects
        self.domain = None
        self.max_limit = max_limit
        self.opener = None
        self.create_opener()
    
    def pre_process(self):
        """
        exit the function if seed url is not a valid url
        """
        if(self.allowed_for_processing(self.root)):
            self.unparsed_urls.add(self.root, priority = 0)
            parsed_url = urlparse.urlparse(self.root)
            self.domain = parsed_url.netloc
        else:
            LOGGER.warning("Non followable root: %s " % self.root)
            exit()
    
    def process_page(self, response) :
        """
            override this method to do any kind of processing on the page. 
        """
        pass
    
    def create_opener(self):
        """
            creates http-link opener based on options choosen
        """
        self.opener = urllib2.build_opener()
        if not self.allow_redirects:
            self.opener = urllib2.build_opener(BalerionRedirectHandler)    
    
    @classmethod
    def allowed_for_processing(cls, next_url):
        """
        placeholder
        """
        parsed_url = urlparse.urlparse(next_url)
        if(parsed_url.scheme != 'http'):
            LOGGER.warning("Non followable URl: %s " % next_url)
            return False
        ROBOT.set_url(parsed_url.scheme + parsed_url.netloc + "/robots.txt")
        if not ROBOT.can_fetch('Balerion', next_url.encode('ascii', 'replace')):
            LOGGER.warning("Url disallowed by robots.txt: %s " % next_url)
            return False
        return True
        
    def process_page_links(self, raw_html, url):
        """
        simply extracts html links using awesome beautifulsoup
        """
        beautiful_html = BeautifulSoup(raw_html)
        
        links = [a.get('href') for a in beautiful_html.find_all('a')]
        links = [link for link in links if link is not None]
        
        for link in links:
            link_info = urlparse.urlparse(link)
            
            if not link_info.scheme and not link_info.netloc:
                link = urlparse.urljoin(url, link)
                link_info = urlparse.urlparse(link)
            
            if('http' not in link_info.scheme) : continue
            
            if self.domain not in link_info.netloc:
                if not self.allow_external :
                    continue  # throwing out external link
                else:
                    priority = 2  # insert external link with low priority
            else:
                priority = 1
            self.unparsed_urls.add(link, priority)
    
    def fetch_url(self, url):
        """
        fetches url and returns an object represenation which store headers and status etc.
        """
        page = AttrDict()
        try:
            # getting response from given URL
            resp = self.opener.open(unicode(url))
            page = AttrDict({
                'body': resp.read(),
                'url': resp.geturl(),
                'headers': AttrDict(dict(resp.headers.items())),
                'status': resp.getcode()
            })
        except urllib2.HTTPError, err :
            if err.code == 404:
                page = AttrDict({'status': 404})
                LOGGER.exception("page not found : %s at %s" % (err.code, url))
            elif err.code == 403:
                page = AttrDict({'status': 403})
                LOGGER.error("access denied : %s at %s " % (err.code, url))
            else:
                page = AttrDict({'status': 500}) #choosing 500 as default bad access code
                LOGGER.error("something bad happened : %s at %s " % (err.code, url)) 
        
        except urllib2.URLError, err:
            page = AttrDict({'status': 500})
            LOGGER.error("server error %s at %s " % (err.reason, url))