def collect_pages_url_within_max_depth(self, current_url, current_depth): if current_depth > self.MAX_DEPTH_ALLOWED: raise IndexError("you are searching too deep!") if current_depth <= 0: return [] web_content = craiglist_browser.get_raw_content(current_url) next_url = self.get_next_page(web_content) res = [current_url] if next_url == None: return res res.extend(self.collect_pages_url_within_max_depth(next_url, current_depth - 1)) return res
def collect_pages_url_within_max_depth(self, current_url, current_depth): if current_depth > self.MAX_DEPTH_ALLOWED: raise IndexError("you are searching too deep!") if current_depth <= 0: return [] web_content = craiglist_browser.get_raw_content(current_url) next_url = self.get_next_page(web_content) res = [current_url] if next_url == None: return res res.extend( self.collect_pages_url_within_max_depth(next_url, current_depth - 1)) return res
def collect_item_urls(self, page_urls): res = [] for i in range(len(page_urls)): web_content = craiglist_browser.get_raw_content(page_urls[i]) res.extend(self.collect_href_urls_within_page(web_content)) return res
def evaluate(self, url): web_content = craiglist_browser.get_raw_content(url) return True