Exemple #1
0
    def build_response(self, resource):
        response = Response()
        response.head = ''
        response.code = resource.status_code

        runtime_body = self.page.mainFrame().toHtml()
        body = resource.reply.data
        url = resource.reply.url().toString()
        headers = resource.headers
        cookies = self.get_cookies()

        # py3 hack
        if PY3K:
            if isinstance(body, QByteArray):
                body = body.data()
            headers = decode_dict(headers)
            cookies = decode_dict(cookies)
        else:
            runtime_body = unicode(runtime_body)
            body = str(body)
            url = str(url)

        response.runtime_body = runtime_body.encode('utf-8')
        response.body = body
        response.url = url
        response.parse(charset='utf-8')
        response.headers = headers
        response.cookies = cookies

        return response
Exemple #2
0
    def prepare_response(self, grab):
        #if self.body_file:
        #    self.body_file.close()
        response = Response()

        head = ''
        for key, val in self._response.getheaders().items():
            head += '%s: %s\r\n' % (key, val)
        head += '\r\n'
        response.head = make_str(head, encoding='latin', errors='ignore')

        #if self.body_path:
        #    response.body_path = self.body_path
        #else:
        #    response.body = b''.join(self.response_body_chunks)
        if self._request._response_path:
            response.body_path = self._request._response_path
            # Quick dirty hack, actullay, response is fully read into memory
            self._request._response_file.write(self._response.read())#data)
            self._request._response_file.close()
        else:
            if self._request.body_maxsize is not None:
                #if self.response_body_bytes_read > self.config_body_maxsize:
                #    logger.debug('Response body max size limit reached: %s' %
                #                 self.config_body_maxsize)
                response.body = self._response.read(self._request.body_maxsize)
            else:
                response.body = self._response.read()#data

        # Clear memory
        #self.response_header_chunks = []

        response.code = self._response.status
        #response.total_time = self.curl.getinfo(pycurl.TOTAL_TIME)
        #response.connect_time = self.curl.getinfo(pycurl.CONNECT_TIME)
        #response.name_lookup_time = self.curl.getinfo(pycurl.NAMELOOKUP_TIME)
        #response.download_size = self.curl.getinfo(pycurl.SIZE_DOWNLOAD)
        #response.upload_size = self.curl.getinfo(pycurl.SIZE_UPLOAD)
        #response.download_speed = self.curl.getinfo(pycurl.SPEED_DOWNLOAD)
        #response.remote_ip = self.curl.getinfo(pycurl.PRIMARY_IP)

        response.url = self._response.get_redirect_location() or self._request.url

        import email.message
        hdr = email.message.Message()
        for key, val in self._response.getheaders().items():
            hdr[key] = val
        response.parse(charset=grab.config['document_charset'],
                       headers=hdr)

        jar = self.extract_cookiejar(self._response, self._request)
        response.cookies = CookieManager(jar)

        # We do not need anymore cookies stored in the
        # curl instance so drop them
        #self.curl.setopt(pycurl.COOKIELIST, 'ALL')
        return response
Exemple #3
0
 def build_response(self, resource):
     response = Response()
     response.head = ''
     response.runtime_body = unicode(self.page.mainFrame().toHtml()).encode('utf-8')
     response.body = str(resource.reply.data)
     response.code = resource.status_code
     response.url = str(resource.reply.url().toString())
     response.parse(charset='utf-8')
     response.headers = resource.headers
     response.cookies = self.get_cookies()
     return response
Exemple #4
0
        def custom_prepare_response_func(transport, g):
            response = Response()
            response.head = cache_item['head']
            response.body = body
            response.code = cache_item['response_code']
            response.time = 0

            # Hack for deprecated behaviour
            if 'response_url' in cache_item:
                response.url = cache_item['response_url']
            else:
                logger.debug('You cache contains items without `response_url` key. It is depricated data format. Please re-download you cache or build manually `response_url` keys.')
                response.url = cache_item['url']

            response.parse()
            response.cookies = transport.extract_cookies()
            return response
Exemple #5
0
    def prepare_response(self, grab):
        response = Response()
        response.head = ''
        response.body = self.ghost.content.encode('utf-8')
        response.code = self.response_page.http_status
        response.time = 0
        response.url = self.response_page.url

        #if grab.config['document_charset'] is not None:
        #response.parse(charset=grab.config['document_charset'])
        #else:
        #response.parse()
        response.parse(charset='utf-8')

        response.cookies = self.extract_cookies()

        # We do not need anymore cookies stored in the
        # curl instance so drop them
        #self.curl.setopt(pycurl.COOKIELIST, 'ALL')
        return response
Exemple #6
0
    def prepare_response(self, grab):
        response = Response()
        response.head = ''
        response.body = self.ghost.content.encode('utf-8')
        response.code = self.response_page.http_status
        response.time = 0
        response.url = self.response_page.url

        #if grab.config['document_charset'] is not None:
            #response.parse(charset=grab.config['document_charset'])
        #else:
            #response.parse()
        response.parse(charset='utf-8')

        response.cookies = self.extract_cookies()

        # We do not need anymore cookies stored in the
        # curl instance so drop them
        #self.curl.setopt(pycurl.COOKIELIST, 'ALL')
        return response
Exemple #7
0
    def prepare_response(self, grab):
        #self.response.head = ''.join(self.response_head_chunks)
        #self.response.body = ''.join(self.response_body_chunks)
        #self.response.parse()

        response = Response()

        response.head = ''
        response._unicode_body = self.browser.page_source
        response.body = self.browser.page_source.encode('utf-8')
        response.charset = 'utf-8'
        #import pdb; pdb.set_trace()
        response.url = self.browser.current_url
        response.code = 200  # TODO: fix, self.browser.status_code
        response.cookies = self._extract_cookies()
        #self.response.code = self.curl.getinfo(pycurl.HTTP_CODE)
        #self.response.time = self.curl.getinfo(pycurl.TOTAL_TIME)
        #self.response.url = self.curl.getinfo(pycurl.EFFECTIVE_URL)
        #import pdb; pdb.set_trace()
        self.browser.quit()

        return response
Exemple #8
0
    def prepare_response(self, grab):
        #self.response.head = ''.join(self.response_head_chunks)
        #self.response.body = ''.join(self.response_body_chunks)
        #self.response.parse()

        response = Response()

        response.head = ''
        response._unicode_body = self.browser.page_source
        response.body = self.browser.page_source.encode('utf-8')
        response.charset = 'utf-8'
        #import pdb; pdb.set_trace()
        response.url = self.browser.current_url
        response.code = 200# TODO: fix, self.browser.status_code
        response.cookies = self._extract_cookies()
        #self.response.code = self.curl.getinfo(pycurl.HTTP_CODE)
        #self.response.time = self.curl.getinfo(pycurl.TOTAL_TIME)
        #self.response.url = self.curl.getinfo(pycurl.EFFECTIVE_URL)
        #import pdb; pdb.set_trace()
        self.browser.quit()

        return response
Exemple #9
0
        def custom_prepare_response_func(transport, g):
            response = Response()
            response.head = cache_item['head']
            response.body = body
            response.code = cache_item['response_code']
            response.download_size = len(body)
            response.upload_size = 0
            response.download_speed = 0

            # Hack for deprecated behaviour
            if 'response_url' in cache_item:
                response.url = cache_item['response_url']
            else:
                logger.debug('You cache contains items without `response_url` '
                             'key. It is deprecated data format. Please '
                             're-download you cache or build manually '
                             '`response_url` keys.')
                response.url = cache_item['url']

            response.parse()
            response.cookies = CookieManager(transport.extract_cookiejar())
            return response
Exemple #10
0
 def custom_prepare_response_func(transport, g):
     response = Response()
     response.head = cache_item['head']
     response.body = body
     response.code = cache_item['response_code']
     response.download_size = len(body)
     response.upload_size = 0
     response.download_speed = 0
     response.url = cache_item['response_url']
     response.parse()
     response.cookies = CookieManager(transport.extract_cookiejar())
     return response
Exemple #11
0
    def prepare_response(self, grab):
        response = Response()

        try:
            response.body = MOCK_REGISTRY[self.request_url]['body']
        except KeyError:
            raise GrabMockNotFoundError(
                'Mock registry does not have information about '\
                'following URL: %s' % self.request_url)

        now_str = datetime.now().strftime('%a, %d %B %Y %H:%M:%S')
        response.head = '\r\n'.join((
            'Accept-Ranges:bytes',
            'Content-Length:%d' % len(response.body),
            'Content-Type:text/plain',
            'Date:%s GMT' % now_str,
            'Last-Modified:%s GMT' % now_str,
            'Vary:Accept-Encoding',
        ))

        response.code = 200
        response.total_time = 0
        response.name_lookup_time = 0
        response.connect_time = 0
        response.url = self.request_url
        response.parse()
        response.cookies = CookieManager(self.extract_cookiejar())

        return response
Exemple #12
0
    def prepare_response(self, grab):
        # py3 hack
        if six.PY3:
            self.response_head_chunks = decode_list(self.response_head_chunks)

        if self.body_file:
            self.body_file.close()
        response = Response()
        response.head = ''.join(self.response_head_chunks)
        if self.body_path:
            response.body_path = self.body_path
        else:
            response.body = b''.join(self.response_body_chunks)

        # Clear memory
        self.response_head_chunks = []
        self.response_body_chunks = []

        response.code = self.curl.getinfo(pycurl.HTTP_CODE)
        response.total_time = self.curl.getinfo(pycurl.TOTAL_TIME)
        response.connect_time = self.curl.getinfo(pycurl.CONNECT_TIME)
        response.name_lookup_time = self.curl.getinfo(pycurl.NAMELOOKUP_TIME)
        response.download_size = self.curl.getinfo(pycurl.SIZE_DOWNLOAD)
        response.upload_size = self.curl.getinfo(pycurl.SIZE_UPLOAD)
        response.download_speed = self.curl.getinfo(pycurl.SPEED_DOWNLOAD)
        response.remote_ip = self.curl.getinfo(pycurl.PRIMARY_IP)

        response.url = self.curl.getinfo(pycurl.EFFECTIVE_URL)

        if grab.config['document_charset'] is not None:
            response.parse(charset=grab.config['document_charset'])
        else:
            response.parse()

        response.cookies = CookieManager(self.extract_cookiejar())

        # We do not need anymore cookies stored in the
        # curl instance so drop them
        self.curl.setopt(pycurl.COOKIELIST, 'ALL')
        return response
Exemple #13
0
    def build_response(self, resource):
        response = Response()
        response.head = ''
        response.code = resource.status_code

        runtime_body = self.page.mainFrame().toHtml()
        body = resource.reply.data
        url = resource.reply.url().toString()
        headers = resource.headers
        cookies = self.get_cookies()

        # py3 hack
        if PY3K:
            if isinstance(body, QByteArray):
                body = body.data()
            headers = decode_dict(headers)
            cookies = decode_dict(cookies)
        else:
            runtime_body = unicode(runtime_body)
            body = str(body)
            url = str(url)

        response.runtime_body = runtime_body.encode('utf-8')
        response.body = body
        response.url = url
        response.parse(charset='utf-8')
        response.headers = headers
        response.cookies = cookies

        return response
Exemple #14
0
        def custom_prepare_response_func(transport, g):
            response = Response()
            response.head = cache_item['head'].decode('utf-8')
            response.body = body
            response.code = cache_item['response_code']
            response.download_size = len(body)
            response.upload_size = 0
            response.download_speed = 0

            # Hack for deprecated behaviour
            if 'response_url' in cache_item:
                response.url = cache_item['response_url']
            else:
                logger.debug('You cache contains items without `response_url` '
                             'key. It is deprecated data format. Please '
                             're-download you cache or build manually '
                             '`response_url` keys.')
                response.url = cache_item['url']

            response.parse()
            response.cookies = CookieManager(transport.extract_cookiejar())

            return response
Exemple #15
0
    def prepare_response(self, grab):
        response = Response()
        
        try:
            response.body = MOCK_REGISTRY[self.request_url]['body']
        except KeyError:
            raise GrabMockNotFoundError(
                'Mock registry does not have information about '\
                'following URL: %s' % self.request_url)

        now_str = datetime.now().strftime('%a, %d %B %Y %H:%M:%S')
        response.head = '\r\n'.join((
            'Accept-Ranges:bytes',
            'Content-Length:%d' % len(response.body),
            'Content-Type:text/plain',
            'Date:%s GMT' % now_str,
            'Last-Modified:%s GMT' % now_str,
            'Vary:Accept-Encoding',
        ))

        response.code = 200
        response.total_time = 0
        response.name_lookup_time = 0
        response.connect_time = 0
        response.url = self.request_url
        response.parse()
        response.cookies = CookieManager(self.extract_cookiejar())

        return response
Exemple #16
0
 def custom_prepare_response_func(transport, grab):
     response = Response()
     response.head = cache_item['head']
     response.body = body
     response.code = cache_item['response_code']
     response.download_size = len(body)
     response.upload_size = 0
     response.download_speed = 0
     response.url = cache_item['response_url']
     response.parse(charset=grab.config['document_charset'])
     response.cookies = CookieManager(transport.extract_cookiejar())
     response.from_cache = True
     return response
Exemple #17
0
    def prepare_response(self, grab):
        if self.body_file:
            self.body_file.close()
        response = Response()

        response.head = b''.join(self.response_header_chunks)

        if self.body_path:
            response.body_path = self.body_path
        else:
            response.body = b''.join(self.response_body_chunks)

        # Clear memory
        self.response_header_chunks = []
        self.response_body_chunks = []

        response.code = self.curl.getinfo(pycurl.HTTP_CODE)
        response.total_time = self.curl.getinfo(pycurl.TOTAL_TIME)
        response.connect_time = self.curl.getinfo(pycurl.CONNECT_TIME)
        response.name_lookup_time = self.curl.getinfo(pycurl.NAMELOOKUP_TIME)
        response.download_size = self.curl.getinfo(pycurl.SIZE_DOWNLOAD)
        response.upload_size = self.curl.getinfo(pycurl.SIZE_UPLOAD)
        response.download_speed = self.curl.getinfo(pycurl.SPEED_DOWNLOAD)
        response.remote_ip = self.curl.getinfo(pycurl.PRIMARY_IP)

        response.url = self.curl.getinfo(pycurl.EFFECTIVE_URL)

        response.parse(charset=grab.config['document_charset'])

        response.cookies = CookieManager(self.extract_cookiejar())

        # We do not need anymore cookies stored in the
        # curl instance so drop them
        self.curl.setopt(pycurl.COOKIELIST, 'ALL')
        return response
Exemple #18
0
 def custom_prepare_response_func(transport, grab):
     response = Response()
     response.head = cache_item['head'].decode('utf-8')
     response.body = body
     response.code = cache_item['response_code']
     response.download_size = len(body)
     response.upload_size = 0
     response.download_speed = 0
     response.url = cache_item['response_url']
     response.parse(charset=grab.config['document_charset'])
     response.cookies = CookieManager(transport.extract_cookiejar())
     response.from_cache = True
     return response
Exemple #19
0
    def prepare_response(self, grab):
        try:
            #if self.body_file:
            #    self.body_file.close()
            response = Response()

            head = ''
            for key, val in self._response.getheaders().items():
                head += '%s: %s\r\n' % (key, val)
            head += '\r\n'
            response.head = make_str(head, encoding='latin', errors='ignore')

            #if self.body_path:
            #    response.body_path = self.body_path
            #else:
            #    response.body = b''.join(self.response_body_chunks)
            def read_with_timeout():
                if self._request.config_nobody:
                    return b''
                maxsize = self._request.config_body_maxsize
                chunks = []
                default_chunk_size = 10000
                if maxsize:
                    chunk_size = min(default_chunk_size, maxsize + 1)
                else:
                    chunk_size = default_chunk_size
                total_size = 0
                while True:
                    chunk = self._response.read(chunk_size)
                    if chunk:
                        total_size += len(chunk)
                        chunks.append(chunk)
                        if maxsize and total_size > maxsize:
                            logger.debug(
                                'Response body max size limit reached: %s' %
                                maxsize)
                    else:
                        break
                    if self._request.timeout:
                        if time.time(
                        ) - self._request.op_started > self._request.timeout:
                            raise GrabTimeoutError
                data = b''.join(chunks)
                if maxsize:
                    data = data[:maxsize]
                return data

            if self._request._response_path:
                response.body_path = self._request._response_path
                # FIXME: Quick dirty hack, actullay, response is fully read into memory
                self._request._response_file.write(read_with_timeout())
                self._request._response_file.close()
            else:
                response.body = read_with_timeout()

            # Clear memory
            #self.response_header_chunks = []

            response.code = self._response.status
            #response.total_time = self.curl.getinfo(pycurl.TOTAL_TIME)
            #response.connect_time = self.curl.getinfo(pycurl.CONNECT_TIME)
            #response.name_lookup_time = self.curl.getinfo(pycurl.NAMELOOKUP_TIME)
            #response.download_size = self.curl.getinfo(pycurl.SIZE_DOWNLOAD)
            #response.upload_size = self.curl.getinfo(pycurl.SIZE_UPLOAD)
            #response.download_speed = self.curl.getinfo(pycurl.SPEED_DOWNLOAD)
            #response.remote_ip = self.curl.getinfo(pycurl.PRIMARY_IP)

            response.url = self._response.get_redirect_location(
            ) or self._request.url

            import email.message
            hdr = email.message.Message()
            for key, val in self._response.getheaders().items():
                hdr[key] = val
            response.parse(charset=grab.config['document_charset'],
                           headers=hdr)

            jar = self.extract_cookiejar()  #self._response, self._request)
            response.cookies = CookieManager(jar)

            # We do not need anymore cookies stored in the
            # curl instance so drop them
            #self.curl.setopt(pycurl.COOKIELIST, 'ALL')
            return response
        finally:
            self._response.release_conn()
Exemple #20
0
        def custom_prepare_response_func(transport, g):
            response = Response()
            response.head = cache_item["head"].decode("utf-8")
            response.body = body
            response.code = cache_item["response_code"]
            response.download_size = len(body)
            response.upload_size = 0
            response.download_speed = 0

            # Hack for deprecated behaviour
            if "response_url" in cache_item:
                response.url = cache_item["response_url"]
            else:
                logger.debug(
                    "You cache contains items without `response_url` key. It is depricated data format. Please re-download you cache or build manually `response_url` keys."
                )
                response.url = cache_item["url"]

            response.parse()
            response.cookies = CookieManager(transport.extract_cookiejar())

            return response
Exemple #21
0
        def custom_prepare_response_func(transport, g):
            response = Response()
            response.head = cache_item['head']
            response.body = body
            response.code = cache_item['response_code']
            response.time = 0

            # Hack for deprecated behaviour
            if 'response_url' in cache_item:
                response.url = cache_item['response_url']
            else:
                logger.debug(
                    'You cache contains items without `response_url` key. It is depricated data format. Please re-download you cache or build manually `response_url` keys.'
                )
                response.url = cache_item['url']

            response.parse()
            response.cookies = transport.extract_cookies()
            return response
Exemple #22
0
 def custom_prepare_response_func(transport, g):
     response = Response()
     response.head = cache_item['head']
     response.body = body
     response.code = cache_item['response_code']
     response.download_size = len(body)
     response.upload_size = 0
     response.download_speed = 0
     response.url = cache_item['response_url']
     response.parse()
     response.cookies = CookieManager(transport.extract_cookiejar())
     return response
Exemple #23
0
    def prepare_response(self, grab):
        #if self.body_file:
        #    self.body_file.close()
        response = Response()

        head = ''
        for key, val in self._response.getheaders().items():
            head += '%s: %s\r\n' % (key, val)
        head += '\r\n'
        response.head = make_str(head, encoding='latin', errors='ignore')

        #if self.body_path:
        #    response.body_path = self.body_path
        #else:
        #    response.body = b''.join(self.response_body_chunks)
        if self._request._response_path:
            response.body_path = self._request._response_path
            # Quick dirty hack, actullay, response is fully read into memory
            self._request._response_file.write(self._response.read())  #data)
            self._request._response_file.close()
        else:
            if self._request.body_maxsize is not None:
                #if self.response_body_bytes_read > self.config_body_maxsize:
                #    logger.debug('Response body max size limit reached: %s' %
                #                 self.config_body_maxsize)
                response.body = self._response.read(self._request.body_maxsize)
            else:
                response.body = self._response.read()  #data

        # Clear memory
        #self.response_header_chunks = []

        response.code = self._response.status
        #response.total_time = self.curl.getinfo(pycurl.TOTAL_TIME)
        #response.connect_time = self.curl.getinfo(pycurl.CONNECT_TIME)
        #response.name_lookup_time = self.curl.getinfo(pycurl.NAMELOOKUP_TIME)
        #response.download_size = self.curl.getinfo(pycurl.SIZE_DOWNLOAD)
        #response.upload_size = self.curl.getinfo(pycurl.SIZE_UPLOAD)
        #response.download_speed = self.curl.getinfo(pycurl.SPEED_DOWNLOAD)
        #response.remote_ip = self.curl.getinfo(pycurl.PRIMARY_IP)

        response.url = self._response.get_redirect_location(
        ) or self._request.url

        import email.message
        hdr = email.message.Message()
        for key, val in self._response.getheaders().items():
            hdr[key] = val
        response.parse(charset=grab.config['document_charset'], headers=hdr)

        jar = self.extract_cookiejar(self._response, self._request)
        response.cookies = CookieManager(jar)

        # We do not need anymore cookies stored in the
        # curl instance so drop them
        #self.curl.setopt(pycurl.COOKIELIST, 'ALL')
        return response
Exemple #24
0
    def prepare_response(self, grab):
        #self.response.head = ''.join(self.response_head_chunks)
        #self.response.body = ''.join(self.response_body_chunks)
        #self.response.parse()
        #self.response.cookies = self._extract_cookies()
        #self.response.code = self.curl.getinfo(pycurl.HTTP_CODE)
        #self.response.time = self.curl.getinfo(pycurl.TOTAL_TIME)
        #self.response.url = self.curl.getinfo(pycurl.EFFECTIVE_URL)
        response = Response()
        response.head = ''
        #if grab.config['body_max_size'] is not None:
            #chunks = []
            #read_size = 0
            #for chunk in self._requests_responsek
        #else:
            #response.body = self._requests_response.content

        response.body = self._requests_response.content
        response.code = self._requests_response.status_code
        response.headers = self._requests_response.headers
        response.cookies = self._requests_response.cookies or {}
        response.url = grab.config['url']

        if grab.config['charset'] is not None:
            response.parse(charset=grab.config['charset'])
        else:
            response.parse()
        return response