def prepare_response(self, grab): #if self.body_file: # self.body_file.close() response = Response() head = '' for key, val in self._response.getheaders().items(): head += '%s: %s\r\n' % (key, val) head += '\r\n' response.head = make_str(head, encoding='latin', errors='ignore') #if self.body_path: # response.body_path = self.body_path #else: # response.body = b''.join(self.response_body_chunks) if self._request._response_path: response.body_path = self._request._response_path # Quick dirty hack, actullay, response is fully read into memory self._request._response_file.write(self._response.read())#data) self._request._response_file.close() else: if self._request.body_maxsize is not None: #if self.response_body_bytes_read > self.config_body_maxsize: # logger.debug('Response body max size limit reached: %s' % # self.config_body_maxsize) response.body = self._response.read(self._request.body_maxsize) else: response.body = self._response.read()#data # Clear memory #self.response_header_chunks = [] response.code = self._response.status #response.total_time = self.curl.getinfo(pycurl.TOTAL_TIME) #response.connect_time = self.curl.getinfo(pycurl.CONNECT_TIME) #response.name_lookup_time = self.curl.getinfo(pycurl.NAMELOOKUP_TIME) #response.download_size = self.curl.getinfo(pycurl.SIZE_DOWNLOAD) #response.upload_size = self.curl.getinfo(pycurl.SIZE_UPLOAD) #response.download_speed = self.curl.getinfo(pycurl.SPEED_DOWNLOAD) #response.remote_ip = self.curl.getinfo(pycurl.PRIMARY_IP) response.url = self._response.get_redirect_location() or self._request.url import email.message hdr = email.message.Message() for key, val in self._response.getheaders().items(): hdr[key] = val response.parse(charset=grab.config['document_charset'], headers=hdr) jar = self.extract_cookiejar(self._response, self._request) response.cookies = CookieManager(jar) # We do not need anymore cookies stored in the # curl instance so drop them #self.curl.setopt(pycurl.COOKIELIST, 'ALL') return response
def prepare_response(self, grab): #if self.body_file: # self.body_file.close() response = Response() head = '' for key, val in self._response.getheaders().items(): head += '%s: %s\r\n' % (key, val) head += '\r\n' response.head = make_str(head, encoding='latin', errors='ignore') #if self.body_path: # response.body_path = self.body_path #else: # response.body = b''.join(self.response_body_chunks) if self._request._response_path: response.body_path = self._request._response_path # Quick dirty hack, actullay, response is fully read into memory self._request._response_file.write(self._response.read()) #data) self._request._response_file.close() else: if self._request.body_maxsize is not None: #if self.response_body_bytes_read > self.config_body_maxsize: # logger.debug('Response body max size limit reached: %s' % # self.config_body_maxsize) response.body = self._response.read(self._request.body_maxsize) else: response.body = self._response.read() #data # Clear memory #self.response_header_chunks = [] response.code = self._response.status #response.total_time = self.curl.getinfo(pycurl.TOTAL_TIME) #response.connect_time = self.curl.getinfo(pycurl.CONNECT_TIME) #response.name_lookup_time = self.curl.getinfo(pycurl.NAMELOOKUP_TIME) #response.download_size = self.curl.getinfo(pycurl.SIZE_DOWNLOAD) #response.upload_size = self.curl.getinfo(pycurl.SIZE_UPLOAD) #response.download_speed = self.curl.getinfo(pycurl.SPEED_DOWNLOAD) #response.remote_ip = self.curl.getinfo(pycurl.PRIMARY_IP) response.url = self._response.get_redirect_location( ) or self._request.url import email.message hdr = email.message.Message() for key, val in self._response.getheaders().items(): hdr[key] = val response.parse(charset=grab.config['document_charset'], headers=hdr) jar = self.extract_cookiejar(self._response, self._request) response.cookies = CookieManager(jar) # We do not need anymore cookies stored in the # curl instance so drop them #self.curl.setopt(pycurl.COOKIELIST, 'ALL') return response
def build_response(self, resource): response = Response() response.head = '' response.code = resource.status_code runtime_body = self.page.mainFrame().toHtml() body = resource.reply.data url = resource.reply.url().toString() headers = resource.headers cookies = self.get_cookies() # py3 hack if PY3K: if isinstance(body, QByteArray): body = body.data() headers = decode_dict(headers) cookies = decode_dict(cookies) else: runtime_body = unicode(runtime_body) body = str(body) url = str(url) response.runtime_body = runtime_body.encode('utf-8') response.body = body response.url = url response.parse(charset='utf-8') response.headers = headers response.cookies = cookies return response
def prepare_response(self, grab): response = Response() try: response.body = MOCK_REGISTRY[self.request_url]['body'] except KeyError: raise GrabMockNotFoundError( 'Mock registry does not have information about '\ 'following URL: %s' % self.request_url) now_str = datetime.now().strftime('%a, %d %B %Y %H:%M:%S') response.head = '\r\n'.join(( 'Accept-Ranges:bytes', 'Content-Length:%d' % len(response.body), 'Content-Type:text/plain', 'Date:%s GMT' % now_str, 'Last-Modified:%s GMT' % now_str, 'Vary:Accept-Encoding', )) response.code = 200 response.total_time = 0 response.name_lookup_time = 0 response.connect_time = 0 response.url = self.request_url response.parse() response.cookies = CookieManager(self.extract_cookiejar()) return response
def prepare_response(self, grab): if self.body_file: self.body_file.close() response = Response() response.head = b"".join(self.response_header_chunks) if self.body_path: response.body_path = self.body_path else: response.body = b"".join(self.response_body_chunks) # Clear memory self.response_header_chunks = [] self.response_body_chunks = [] response.code = self.curl.getinfo(pycurl.HTTP_CODE) response.total_time = self.curl.getinfo(pycurl.TOTAL_TIME) response.connect_time = self.curl.getinfo(pycurl.CONNECT_TIME) response.name_lookup_time = self.curl.getinfo(pycurl.NAMELOOKUP_TIME) response.download_size = self.curl.getinfo(pycurl.SIZE_DOWNLOAD) response.upload_size = self.curl.getinfo(pycurl.SIZE_UPLOAD) response.download_speed = self.curl.getinfo(pycurl.SPEED_DOWNLOAD) response.remote_ip = self.curl.getinfo(pycurl.PRIMARY_IP) response.url = self.curl.getinfo(pycurl.EFFECTIVE_URL) response.parse(charset=grab.config["document_charset"]) response.cookies = CookieManager(self.extract_cookiejar()) # We do not need anymore cookies stored in the # curl instance so drop them self.curl.setopt(pycurl.COOKIELIST, "ALL") return response
def prepare_response(self, grab): if self.body_file: self.body_file.close() response = Response() response.head = b''.join(self.response_header_chunks) if self.body_path: response.body_path = self.body_path else: response.body = b''.join(self.response_body_chunks) # Clear memory self.response_header_chunks = [] self.response_body_chunks = [] response.code = self.curl.getinfo(pycurl.HTTP_CODE) response.total_time = self.curl.getinfo(pycurl.TOTAL_TIME) response.connect_time = self.curl.getinfo(pycurl.CONNECT_TIME) response.name_lookup_time = self.curl.getinfo(pycurl.NAMELOOKUP_TIME) response.download_size = self.curl.getinfo(pycurl.SIZE_DOWNLOAD) response.upload_size = self.curl.getinfo(pycurl.SIZE_UPLOAD) response.download_speed = self.curl.getinfo(pycurl.SPEED_DOWNLOAD) response.remote_ip = self.curl.getinfo(pycurl.PRIMARY_IP) response.url = self.curl.getinfo(pycurl.EFFECTIVE_URL) response.parse(charset=grab.config['document_charset']) response.cookies = CookieManager(self.extract_cookiejar()) # We do not need anymore cookies stored in the # curl instance so drop them self.curl.setopt(pycurl.COOKIELIST, 'ALL') return response
def prepare_response(self, grab): #self.response.head = ''.join(self.response_head_chunks) #self.response.body = ''.join(self.response_body_chunks) #self.response.parse() #self.response.cookies = self._extract_cookies() #self.response.code = self.curl.getinfo(pycurl.HTTP_CODE) #self.response.time = self.curl.getinfo(pycurl.TOTAL_TIME) #self.response.url = self.curl.getinfo(pycurl.EFFECTIVE_URL) response = Response() response.head = '' #if grab.config['body_max_size'] is not None: #chunks = [] #read_size = 0 #for chunk in self._requests_responsek #else: #response.body = self._requests_response.content response.body = self._requests_response.content response.code = self._requests_response.status_code response.headers = self._requests_response.headers response.cookies = self._requests_response.cookies or {} response.url = grab.config['url'] if grab.config['charset'] is not None: response.parse(charset=grab.config['charset']) else: response.parse() return response
def build_response(self, resource): response = Response() response.head = '' response.runtime_body = unicode(self.page.mainFrame().toHtml()).encode('utf-8') response.body = str(resource.reply.data) response.code = resource.status_code response.url = str(resource.reply.url().toString()) response.parse(charset='utf-8') response.headers = resource.headers response.cookies = self.get_cookies() return response
def custom_prepare_response_func(transport, g): response = Response() response.head = cache_item['head'] response.body = body response.code = cache_item['response_code'] response.download_size = len(body) response.upload_size = 0 response.download_speed = 0 response.url = cache_item['response_url'] response.parse() response.cookies = CookieManager(transport.extract_cookiejar()) return response
def custom_prepare_response_func(transport, grab): response = Response() response.head = cache_item['head'] response.body = body response.code = cache_item['response_code'] response.download_size = len(body) response.upload_size = 0 response.download_speed = 0 response.url = cache_item['response_url'] response.parse(charset=grab.config['document_charset']) response.cookies = CookieManager(transport.extract_cookiejar()) response.from_cache = True return response
def custom_prepare_response_func(transport, grab): response = Response() response.head = cache_item['head'].decode('utf-8') response.body = body response.code = cache_item['response_code'] response.download_size = len(body) response.upload_size = 0 response.download_speed = 0 response.url = cache_item['response_url'] response.parse(charset=grab.config['document_charset']) response.cookies = CookieManager(transport.extract_cookiejar()) response.from_cache = True return response
def custom_prepare_response_func(transport, g): response = Response() response.head = cache_item['head'] response.body = body response.code = cache_item['response_code'] response.time = 0 # Hack for deprecated behaviour if 'response_url' in cache_item: response.url = cache_item['response_url'] else: logger.debug('You cache contains items without `response_url` key. It is depricated data format. Please re-download you cache or build manually `response_url` keys.') response.url = cache_item['url'] response.parse() response.cookies = transport.extract_cookies() return response
def prepare_response(self, grab): response = Response() response.head = '' response.body = self.ghost.content.encode('utf-8') response.code = self.response_page.http_status response.time = 0 response.url = self.response_page.url #if grab.config['document_charset'] is not None: #response.parse(charset=grab.config['document_charset']) #else: #response.parse() response.parse(charset='utf-8') response.cookies = self.extract_cookies() # We do not need anymore cookies stored in the # curl instance so drop them #self.curl.setopt(pycurl.COOKIELIST, 'ALL') return response
def prepare_response(self, grab): #self.response.head = ''.join(self.response_head_chunks) #self.response.body = ''.join(self.response_body_chunks) #self.response.parse() response = Response() response.head = '' response._unicode_body = self.browser.page_source response.body = self.browser.page_source.encode('utf-8') response.charset = 'utf-8' #import pdb; pdb.set_trace() response.url = self.browser.current_url response.code = 200# TODO: fix, self.browser.status_code response.cookies = self._extract_cookies() #self.response.code = self.curl.getinfo(pycurl.HTTP_CODE) #self.response.time = self.curl.getinfo(pycurl.TOTAL_TIME) #self.response.url = self.curl.getinfo(pycurl.EFFECTIVE_URL) #import pdb; pdb.set_trace() self.browser.quit() return response
def prepare_response(self, grab): # py3 hack if PY3K: self.response_head_chunks = decode_list(self.response_head_chunks) if self.body_file: self.body_file.close() response = Response() response.head = ''.join(self.response_head_chunks) if self.body_path: response.body_path = self.body_path else: response.body = b''.join(self.response_body_chunks) # Clear memory self.response_head_chunks = [] self.response_body_chunks = [] response.code = self.curl.getinfo(pycurl.HTTP_CODE) response.total_time = self.curl.getinfo(pycurl.TOTAL_TIME) response.connect_time = self.curl.getinfo(pycurl.CONNECT_TIME) response.name_lookup_time = self.curl.getinfo(pycurl.NAMELOOKUP_TIME) response.download_size = self.curl.getinfo(pycurl.SIZE_DOWNLOAD) response.upload_size = self.curl.getinfo(pycurl.SIZE_UPLOAD) response.download_speed = self.curl.getinfo(pycurl.SPEED_DOWNLOAD) response.url = self.curl.getinfo(pycurl.EFFECTIVE_URL) if grab.config['document_charset'] is not None: response.parse(charset=grab.config['document_charset']) else: response.parse() response.cookies = CookieManager(self.extract_cookiejar()) # We do not need anymore cookies stored in the # curl instance so drop them self.curl.setopt(pycurl.COOKIELIST, 'ALL') return response
def custom_prepare_response_func(transport, g): response = Response() response.head = cache_item['head'] response.body = body response.code = cache_item['response_code'] response.download_size = len(body) response.upload_size = 0 response.download_speed = 0 # Hack for deprecated behaviour if 'response_url' in cache_item: response.url = cache_item['response_url'] else: logger.debug('You cache contains items without `response_url` ' 'key. It is deprecated data format. Please ' 're-download you cache or build manually ' '`response_url` keys.') response.url = cache_item['url'] response.parse() response.cookies = CookieManager(transport.extract_cookiejar()) return response
def prepare_response(self, grab): #self.response.head = ''.join(self.response_head_chunks) #self.response.body = ''.join(self.response_body_chunks) #self.response.parse() response = Response() response.head = '' response._unicode_body = self.browser.page_source response.body = self.browser.page_source.encode('utf-8') response.charset = 'utf-8' #import pdb; pdb.set_trace() response.url = self.browser.current_url response.code = 200 # TODO: fix, self.browser.status_code response.cookies = self._extract_cookies() #self.response.code = self.curl.getinfo(pycurl.HTTP_CODE) #self.response.time = self.curl.getinfo(pycurl.TOTAL_TIME) #self.response.url = self.curl.getinfo(pycurl.EFFECTIVE_URL) #import pdb; pdb.set_trace() self.browser.quit() return response
def custom_prepare_response_func(transport, g): response = Response() response.head = cache_item["head"].decode("utf-8") response.body = body response.code = cache_item["response_code"] response.download_size = len(body) response.upload_size = 0 response.download_speed = 0 # Hack for deprecated behaviour if "response_url" in cache_item: response.url = cache_item["response_url"] else: logger.debug( "You cache contains items without `response_url` key. It is depricated data format. Please re-download you cache or build manually `response_url` keys." ) response.url = cache_item["url"] response.parse() response.cookies = CookieManager(transport.extract_cookiejar()) return response
def prepare_response(self, grab): try: #if self.body_file: # self.body_file.close() response = Response() head = '' for key, val in self._response.getheaders().items(): head += '%s: %s\r\n' % (key, val) head += '\r\n' response.head = make_str(head, encoding='latin', errors='ignore') #if self.body_path: # response.body_path = self.body_path #else: # response.body = b''.join(self.response_body_chunks) def read_with_timeout(): if self._request.config_nobody: return b'' maxsize = self._request.config_body_maxsize chunks = [] default_chunk_size = 10000 if maxsize: chunk_size = min(default_chunk_size, maxsize + 1) else: chunk_size = default_chunk_size total_size = 0 while True: chunk = self._response.read(chunk_size) if chunk: total_size += len(chunk) chunks.append(chunk) if maxsize and total_size > maxsize: logger.debug( 'Response body max size limit reached: %s' % maxsize) else: break if self._request.timeout: if time.time( ) - self._request.op_started > self._request.timeout: raise GrabTimeoutError data = b''.join(chunks) if maxsize: data = data[:maxsize] return data if self._request._response_path: response.body_path = self._request._response_path # FIXME: Quick dirty hack, actullay, response is fully read into memory self._request._response_file.write(read_with_timeout()) self._request._response_file.close() else: response.body = read_with_timeout() # Clear memory #self.response_header_chunks = [] response.code = self._response.status #response.total_time = self.curl.getinfo(pycurl.TOTAL_TIME) #response.connect_time = self.curl.getinfo(pycurl.CONNECT_TIME) #response.name_lookup_time = self.curl.getinfo(pycurl.NAMELOOKUP_TIME) #response.download_size = self.curl.getinfo(pycurl.SIZE_DOWNLOAD) #response.upload_size = self.curl.getinfo(pycurl.SIZE_UPLOAD) #response.download_speed = self.curl.getinfo(pycurl.SPEED_DOWNLOAD) #response.remote_ip = self.curl.getinfo(pycurl.PRIMARY_IP) response.url = self._response.get_redirect_location( ) or self._request.url import email.message hdr = email.message.Message() for key, val in self._response.getheaders().items(): hdr[key] = val response.parse(charset=grab.config['document_charset'], headers=hdr) jar = self.extract_cookiejar() #self._response, self._request) response.cookies = CookieManager(jar) # We do not need anymore cookies stored in the # curl instance so drop them #self.curl.setopt(pycurl.COOKIELIST, 'ALL') return response finally: self._response.release_conn()