def collect_pages_url_within_max_depth(self, current_url, current_depth):
     if current_depth > self.MAX_DEPTH_ALLOWED:
         raise IndexError("you are searching too deep!")
     if current_depth <= 0:
         return []
     web_content = craiglist_browser.get_raw_content(current_url)
     next_url = self.get_next_page(web_content)
     res = [current_url]
     if next_url == None:
         return res
     res.extend(self.collect_pages_url_within_max_depth(next_url, current_depth - 1))
     return res
예제 #2
0
 def collect_pages_url_within_max_depth(self, current_url, current_depth):
     if current_depth > self.MAX_DEPTH_ALLOWED:
         raise IndexError("you are searching too deep!")
     if current_depth <= 0:
         return []
     web_content = craiglist_browser.get_raw_content(current_url)
     next_url = self.get_next_page(web_content)
     res = [current_url]
     if next_url == None:
         return res
     res.extend(
         self.collect_pages_url_within_max_depth(next_url,
                                                 current_depth - 1))
     return res
예제 #3
0
 def collect_item_urls(self, page_urls):
     res = []
     for i in range(len(page_urls)):
         web_content = craiglist_browser.get_raw_content(page_urls[i])
         res.extend(self.collect_href_urls_within_page(web_content))
     return res
 def evaluate(self, url):
     web_content = craiglist_browser.get_raw_content(url)
     return True
 def evaluate(self, url):
     web_content = craiglist_browser.get_raw_content(url)
     return True
 def collect_item_urls(self, page_urls):
     res = []
     for i in range(len(page_urls)):
         web_content = craiglist_browser.get_raw_content(page_urls[i])
         res.extend(self.collect_href_urls_within_page(web_content))
     return res