Beispiel #1
0
 def _get_response(self, url, data=None):
     
     response = BaseCrawler._get_response(self, url, data)
            
     fixer = HTMLFixer(self._url_regex, url, response.raw_html)        
     html = fixer.get_fixed_html()
     
     return html
Beispiel #2
0
    def _get_response(self, url, data=None):

        response = BaseCrawler._get_response(self, url, data)

        fixer = HTMLFixer(self._url_regex, url, response.raw_html)
        html = fixer.get_fixed_html()

        return html
Beispiel #3
0
    def __init__(self, *args, **kwargs):

        BaseCrawler.__init__(self, *args, **kwargs)
        self.request_manager = FastRequestManager()
Beispiel #4
0
 def __init__(self):
     BaseCrawler.__init__(self)
     self.stop_request = False
     self.pause_request = False
     self.status = 'init'
     self._set_status('ready')
Beispiel #5
0
 def delay(self, multiplier=1):
     self._set_status('paused')
     BaseCrawler.delay(self, multiplier)
     while (self.pause_request) and not self.stop_request:
         pass
     self._set_status('running')
Beispiel #6
0
 def __init__(self, *args, **kwargs):
     
     BaseCrawler.__init__(self, *args, **kwargs)
Beispiel #7
0
    def __init__(self, *args, **kwargs):

        BaseCrawler.__init__(self, *args, **kwargs)
        self.request_manager = FastRequestManager()
Beispiel #8
0
    def __init__(self, *args, **kwargs):

        BaseCrawler.__init__(self, *args, **kwargs)