Beispiel #1
0
    def process(self, task, callback=None, **kwargs):
        task.request = httpclient.HTTPRequest(task.url, use_gzip=self.use_gzip, user_agent=self.user_agent)
        task.response = yield gen.Task(self.client.fetch, task.request)

        if task.response.body:
            blen = len(task.response.body)
        else:
            blen = 0

        try:
            raw_len = int(task.response.headers.get('content-length', blen))
        except:
            raw_len = blen

        logging.debug("Fetched code=%d len_raw=%d len=%d url=%s" % (task.response.code, raw_len, blen, task.url))
        PageStats.crawled(task.response.code, raw_len)

        if task.response.code == 200:
            task.content = task.content_from_response()
        elif task.response.code in (301, 302):
            logging.error("Unhandled Redirect code=%d url=%s" % (task.response.code, task.url))
        else:
            task.content = None

        callback((Step.CONTINUE, task))
Beispiel #2
0
    def fetch(self, task, callback):
        logging.debug("Starting fetch of url=%s" % (task.url))
        task.response = yield gen.Task(self.client.fetch, task.request)

        if task.response.body:
            blen = len(task.response.body)
        else:
            blen = 0

        try:
            raw_len = int(task.response.headers.get('content-length', blen))
        except:
            raw_len = blen

        logging.debug("Fetched code=%d len_raw=%d len=%d url=%s" % (task.response.code, raw_len, blen, task.url))
        PageStats.crawled(task.response.code, raw_len)

        if task.response.code == 200:
            task.content = task.content_from_response()
        elif task.response.code in (301, 302):
            logging.error("Unhandled Redirect code=%d url=%s" % (task.response.code, task.url))
        else:
            task.content = None

        callback((Step.CONTINUE, task))
Beispiel #3
0
 def get(self):
     return self.finish(PageStats.stats())