def _get_response(self, url, data=None): response = BaseCrawler._get_response(self, url, data) fixer = HTMLFixer(self._url_regex, url, response.raw_html) html = fixer.get_fixed_html() return html
def __init__(self, *args, **kwargs): BaseCrawler.__init__(self, *args, **kwargs) self.request_manager = FastRequestManager()
def __init__(self): BaseCrawler.__init__(self) self.stop_request = False self.pause_request = False self.status = 'init' self._set_status('ready')
def delay(self, multiplier=1): self._set_status('paused') BaseCrawler.delay(self, multiplier) while (self.pause_request) and not self.stop_request: pass self._set_status('running')
def __init__(self, *args, **kwargs): BaseCrawler.__init__(self, *args, **kwargs)