def __init__(self, lua, exceptions, reply, exposed_request, har_entry=None, read_body=False): super(_ExposedResponse, self).__init__(lua, exceptions) self.headers = self.lua.python2lua(get_headers_dict(reply)) if har_entry is None: if read_body: resp_info = reply2har(reply, include_content=True, binary_content=False) else: resp_info = reply2har(reply) else: resp_info = har_entry["response"] self.request = exposed_request self._info = resp_info self._info_lua = None self._body_binary = None
def store_reply_finished(self, req_id, reply): """ Store information about a finished reply. """ if not self.log.has_entry(req_id): return entry = self.log.get_mutable_entry(req_id) entry["_splash_processing_state"] = self.REQUEST_FINISHED # update timings now = datetime.utcnow() start_time = entry['_tmp']['start_time'] response_start_time = entry['_tmp']['response_start_time'] receive_time = get_duration(response_start_time, now) total_time = get_duration(start_time, now) entry["timings"]["receive"] = receive_time entry["time"] = total_time if not entry["timings"]["send"]: wait_time = entry["timings"]["wait"] entry["timings"]["send"] = total_time - receive_time - wait_time if entry["timings"]["send"] < 1e-6: entry["timings"]["send"] = 0 # update other reply information entry["response"].update(reply2har(reply, include_content=True))
def _handleMetaData(self): """Signal emitted before reading response body, after getting headers """ reply = self.sender() self._handle_reply_cookies(reply) callbacks = self._getWebPageAttribute(reply.request(), "callbacks") if callbacks and "on_response_headers" in callbacks: for cb in callbacks["on_response_headers"]: try: cb(reply) except: # TODO unhandled exceptions in lua callbacks # should we raise errors here? # https://github.com/scrapinghub/splash/issues/161 self.log("error in on_response_headers callback", min_level=1) self.log(traceback.format_exc(), min_level=1) har_entry = self._harEntry() if har_entry is not None: if har_entry["_tmp"]["state"] == self.REQUEST_FINISHED: self.log("Headers received for {url}; ignoring", reply, min_level=3) return har_entry["_tmp"]["state"] = self.REQUEST_HEADERS_RECEIVED har_entry["response"].update(har_qt.reply2har(reply)) now = datetime.utcnow() request_sent = har_entry["_tmp"]["request_sent_time"] har_entry["_tmp"]["response_start_time"] = now har_entry["timings"]["wait"] = har.get_duration(request_sent, now) self.log("Headers received for {url}", reply, min_level=3)
def store_reply_finished(self, req_id, reply, content): """ Store information about a finished reply. """ if not self.log.has_entry(req_id): return entry = self.log.get_mutable_entry(req_id) entry["_splash_processing_state"] = self.REQUEST_FINISHED # update timings now = datetime.utcnow() start_time = entry["_tmp"]["start_time"] response_start_time = entry["_tmp"]["response_start_time"] receive_time = get_duration(response_start_time, now) total_time = get_duration(start_time, now) entry["timings"]["receive"] = receive_time entry["time"] = total_time if not entry["timings"]["send"]: wait_time = entry["timings"]["wait"] entry["timings"]["send"] = total_time - receive_time - wait_time if entry["timings"]["send"] < 1e-6: entry["timings"]["send"] = 0 # update other reply information entry["response"].update(reply2har(reply, content=content))
def _handleFinished(self): reply = self.sender() self._cancelReplyTimer(reply) har_entry = self._harEntry() if har_entry is not None: har_entry["_tmp"]["state"] = self.REQUEST_FINISHED now = datetime.utcnow() start_time = har_entry['_tmp']['start_time'] response_start_time = har_entry['_tmp']['response_start_time'] receive_time = har.get_duration(response_start_time, now) total_time = har.get_duration(start_time, now) har_entry["timings"]["receive"] = receive_time har_entry["time"] = total_time if not har_entry["timings"]["send"]: wait_time = har_entry["timings"]["wait"] har_entry["timings"][ "send"] = total_time - receive_time - wait_time if har_entry["timings"]["send"] < 1e-6: har_entry["timings"]["send"] = 0 har_entry["response"].update(har_qt.reply2har(reply)) self.log("Finished downloading {url}", reply)
def _handleFinished(self): reply = self.sender() har_entry = self._harEntry() if har_entry is not None: har_entry["_tmp"]["state"] = self.REQUEST_FINISHED now = datetime.utcnow() start_time = har_entry['_tmp']['start_time'] response_start_time = har_entry['_tmp']['response_start_time'] receive_time = har.get_duration(response_start_time, now) total_time = har.get_duration(start_time, now) har_entry["timings"]["receive"] = receive_time har_entry["time"] = total_time if not har_entry["timings"]["send"]: wait_time = har_entry["timings"]["wait"] har_entry["timings"]["send"] = total_time - receive_time - wait_time if har_entry["timings"]["send"] < 1e-6: har_entry["timings"]["send"] = 0 har_entry["response"].update(har_qt.reply2har(reply)) self.log("Finished downloading {url}", reply)
def store_new_reply(self, req_id, reply): """ Store initial reply information. """ if not self.log.has_entry(req_id): return entry = self.log.get_mutable_entry(req_id) entry["response"].update(reply2har(reply))
def createRequest(self, operation, request, outgoingData=None): """ This method is called when a new request is sent; it must return a reply object to work with. """ start_time = datetime.utcnow() request = self._wrapRequest(request) self._handle_custom_headers(request) self._handle_request_cookies(request) with self._proxyApplied(request): callbacks = self._getWebPageAttribute(request, "callbacks") if callbacks and 'on_request' in callbacks: for cb in callbacks["on_request"]: try: cb(request, operation, outgoingData) except: # Unhandled exceptions in createRequest method cause # segfaults, so we log all errors. self.log("error in on_resource_requested callback", min_level=1) self.log(traceback.format_exc(), min_level=1) if hasattr(request, 'custom_proxy'): self.setProxy(request.custom_proxy) har_entry = self._harEntry(request, create=True) if har_entry is not None: har_entry.update(self._initialHarData( start_time=start_time, operation=operation, request=request, outgoingData=outgoingData )) reply = super(ProxiedQNetworkAccessManager, self).createRequest( operation, request, outgoingData ) if hasattr(request, 'timeout'): timeout = request.timeout * 1000 if timeout: self._setReplyTimeout(reply, timeout) if har_entry is not None: har_entry["response"].update(har_qt.reply2har(reply)) reply.error.connect(self._handleError) reply.finished.connect(self._handleFinished) # http://doc.qt.io/qt-5/qnetworkreply.html#metaDataChanged reply.metaDataChanged.connect(self._handleMetaData) reply.downloadProgress.connect(self._handleDownloadProgress) return reply
def createRequest(self, operation, request, outgoingData=None): """ This method is called when a new request is sent; it must return a reply object to work with. """ start_time = datetime.utcnow() request = self._wrapRequest(request) self._handle_custom_headers(request) self._handle_request_cookies(request) with self._proxyApplied(request): callbacks = self._getWebPageAttribute(request, "callbacks") if callbacks and 'on_request' in callbacks: for cb in callbacks["on_request"]: try: cb(request, operation, outgoingData) except: # Unhandled exceptions in createRequest method cause # segfaults, so we log all errors. self.log("error in on_resource_requested callback", min_level=1) self.log(traceback.format_exc(), min_level=1) if hasattr(request, 'custom_proxy'): self.setProxy(request.custom_proxy) har_entry = self._harEntry(request, create=True) if har_entry is not None: har_entry.update( self._initialHarData(start_time=start_time, operation=operation, request=request, outgoingData=outgoingData)) reply = super(ProxiedQNetworkAccessManager, self).createRequest(operation, request, outgoingData) if hasattr(request, 'timeout'): timeout = request.timeout * 1000 if timeout: self._setReplyTimeout(reply, timeout) if har_entry is not None: har_entry["response"].update(har_qt.reply2har(reply)) reply.error.connect(self._handleError) reply.finished.connect(self._handleFinished) # http://doc.qt.io/qt-5/qnetworkreply.html#metaDataChanged reply.metaDataChanged.connect(self._handleMetaData) reply.downloadProgress.connect(self._handleDownloadProgress) return reply
def __init__(self, lua, reply): self.lua = lua self.response = reply # according to specs HTTP response headers should not contain unicode # https://github.com/kennethreitz/requests/issues/1926#issuecomment-35524028 _headers = {str(k): str(v) for k, v in reply.rawHeaderPairs()} self.headers = self.lua.python2lua(_headers) self.info = self.lua.python2lua(reply2har(reply)) commands = get_commands(self) self.commands = self.lua.python2lua(commands) self.attr_whitelist = list(commands.keys()) + self._attribute_whitelist self._exceptions = [] self.request = self.lua.python2lua(request2har(reply.request(), reply.operation()))
def __init__(self, lua, reply, har_entry=None): super(_ExposedResponse, self).__init__(lua) # according to specs HTTP response headers should not contain unicode # https://github.com/kennethreitz/requests/issues/1926#issuecomment-35524028 _headers = {str(k): str(v) for k, v in reply.rawHeaderPairs()} self.headers = self.lua.python2lua(_headers) if har_entry is None: resp_info = reply2har(reply) else: resp_info = har_entry['response'] self.info = self.lua.python2lua(resp_info) self.request = self.lua.python2lua( request2har(reply.request(), reply.operation()) )
def __init__(self, lua, reply): self.lua = lua self.response = reply # according to specs HTTP response headers should not contain unicode # https://github.com/kennethreitz/requests/issues/1926#issuecomment-35524028 _headers = {str(k): str(v) for k, v in reply.rawHeaderPairs()} self.headers = self.lua.python2lua(_headers) self.info = self.lua.python2lua(reply2har(reply)) commands = get_commands(self) self.commands = self.lua.python2lua(commands) self.attr_whitelist = list(commands.keys()) + self._attribute_whitelist self._exceptions = [] self.request = self.lua.python2lua( request2har(reply.request(), reply.operation()))
def _handleMetaData(self): reply = self.sender() har_entry = self._harEntry() if har_entry is not None: if har_entry["_tmp"]["state"] == self.REQUEST_FINISHED: self.log("Headers received for {url}; ignoring", reply, min_level=3) return har_entry["_tmp"]["state"] = self.REQUEST_HEADERS_RECEIVED har_entry["response"].update(har_qt.reply2har(reply)) now = datetime.utcnow() request_sent = har_entry["_tmp"]["request_sent_time"] har_entry["_tmp"]["response_start_time"] = now har_entry["timings"]["wait"] = har.get_duration(request_sent, now) self.log("Headers received for {url}", reply, min_level=3)
def store_reply_headers_received(self, req_id, reply): """ Update reply information when HTTP headers are received. """ if not self.log.has_entry(req_id): return entry = self.log.get_mutable_entry(req_id) if entry["_splash_processing_state"] == self.REQUEST_FINISHED: # self.log("Headers received for {url}; ignoring", reply, # min_level=3) return entry["_splash_processing_state"] = self.REQUEST_HEADERS_RECEIVED entry["response"].update(reply2har(reply)) now = datetime.utcnow() request_sent = entry["_tmp"]["request_sent_time"] entry["_tmp"]["response_start_time"] = now entry["timings"]["wait"] = get_duration(request_sent, now)
def _handleMetaData(self): reply = self.sender() self._handle_reply_cookies(reply) har_entry = self._harEntry() if har_entry is not None: if har_entry["_tmp"]["state"] == self.REQUEST_FINISHED: self.log("Headers received for {url}; ignoring", reply, min_level=3) return har_entry["_tmp"]["state"] = self.REQUEST_HEADERS_RECEIVED har_entry["response"].update(har_qt.reply2har(reply)) now = datetime.utcnow() request_sent = har_entry["_tmp"]["request_sent_time"] har_entry["_tmp"]["response_start_time"] = now har_entry["timings"]["wait"] = har.get_duration(request_sent, now) self.log("Headers received for {url}", reply, min_level=3)
def createRequest(self, operation, request, outgoingData=None): """ This method is called when a new request is sent; it must return a reply object to work with. """ start_time = datetime.utcnow() request = self._wrapRequest(request) self._handle_custom_headers(request) self._handle_request_cookies(request) har_entry = self._harEntry(request, create=True) if har_entry is not None: if outgoingData is None: bodySize = -1 else: bodySize = outgoingData.size() har_entry.update({ '_tmp': { 'start_time': start_time, 'request_start_sending_time': start_time, 'request_sent_time': start_time, 'response_start_time': start_time, # 'outgoingData': outgoingData, 'state': self.REQUEST_CREATED, }, "startedDateTime": har.format_datetime(start_time), "request": { "method": OPERATION_NAMES.get(operation, '?'), "url": unicode(request.url().toString()), "httpVersion": "HTTP/1.1", "cookies": har_qt.request_cookies2har(request), "queryString": har_qt.querystring2har(request.url()), "headers": har_qt.headers2har(request), "headersSize": har_qt.headers_size(request), "bodySize": bodySize, }, "response": { "bodySize": -1, }, "cache": {}, "timings": { "blocked": -1, "dns": -1, "connect": -1, "ssl": -1, "send": 0, "wait": 0, "receive": 0, }, "time": 0, }) with self._proxyApplied(request): reply = super(ProxiedQNetworkAccessManager, self).createRequest(operation, request, outgoingData) if har_entry is not None: har_entry["response"].update(har_qt.reply2har(reply)) reply.error.connect(self._handleError) reply.finished.connect(self._handleFinished) reply.metaDataChanged.connect(self._handleMetaData) reply.downloadProgress.connect(self._handleDownloadProgress) return reply
def callback(reply): reply_har = reply2har(reply, include_content=True, binary_content=True) self._return(cmd_id, self.lua.python2lua(reply_har))
def callback(reply): reply_har = reply2har(reply, include_content=True, binary_content=True) cmd.return_result(self.lua.python2lua(reply_har))
def createRequest(self, operation, request, outgoingData=None): """ This method is called when a new request is sent; it must return a reply object to work with. """ start_time = datetime.utcnow() request = self._wrapRequest(request) har_entry = self._harEntry(request, create=True) if har_entry is not None: if outgoingData is None: bodySize = -1 else: bodySize = outgoingData.size() har_entry.update({ '_tmp': { 'start_time': start_time, 'request_start_sending_time': start_time, 'request_sent_time': start_time, 'response_start_time': start_time, # 'outgoingData': outgoingData, 'state': self.REQUEST_CREATED, }, "startedDateTime": har.format_datetime(start_time), "request": { "method": OPERATION_NAMES.get(operation, '?'), "url": unicode(request.url().toString()), "httpVersion": "HTTP/1.1", "cookies": har_qt.request_cookies2har(request), "queryString": har_qt.querystring2har(request.url()), "headers": har_qt.headers2har(request), "headersSize" : har_qt.headers_size(request), "bodySize": bodySize, }, "response": { "bodySize": -1, }, "cache": {}, "timings": { "blocked": -1, "dns": -1, "connect": -1, "ssl": -1, "send": 0, "wait": 0, "receive": 0, }, "time": 0, }) with self._proxyApplied(request): reply = super(ProxiedQNetworkAccessManager, self).createRequest( operation, request, outgoingData ) if har_entry is not None: har_entry["response"].update(har_qt.reply2har(reply)) reply.error.connect(self._handleError) reply.finished.connect(self._handleFinished) reply.metaDataChanged.connect(self._handleMetaData) reply.downloadProgress.connect(self._handleDownloadProgress) return reply