def parse_html(self, response, lru):
     depth = response.meta['depth']
     lrulinks = []
     for link in self.link_extractor.extract_links(response):
         try:
             lrulink = url_to_lru(link.url)
         except ValueError, e:
             self.log("Error converting URL to LRU: %s" % e, log.ERROR)
             continue
         lrulinks.append(lrulink)
         if self._should_follow(depth, lru, lrulink) and \
                 not url_has_any_extension(link.url, self.ignored_exts):
             yield Request(link.url, callback=self.parse)
 def parse_html(self, response, lru):
     depth = response.meta['depth']
     lrulinks = []
     for link in self.link_extractor.extract_links(response):
         try:
             lrulink = url_to_lru(link.url)
         except ValueError, e:
             self.log("Error converting URL to LRU: %s" % e, log.ERROR)
             continue
         lrulinks.append(lrulink)
         if self._should_follow(depth, lru, lrulink) and \
                 not url_has_any_extension(link.url, self.ignored_exts):
             yield Request(link.url, callback=self.parse)
 def parse(self, response):
     lru = url_to_lru(response.url)
     if isinstance(response, HtmlResponse):
         return self.parse_html(response, lru)
     else:
         return self._make_raw_page(response, lru)
 def parse(self, response):
     lru = url_to_lru(response.url)
     if isinstance(response, HtmlResponse):
         return self.parse_html(response, lru)
     else:
         return self._make_raw_page(response, lru)