Example #1
0
    def _handleFinished(self):
        reply = self.sender()
        har_entry = self._harEntry()
        if har_entry is not None:
            har_entry["_tmp"]["state"] = self.REQUEST_FINISHED

            now = datetime.utcnow()
            start_time = har_entry['_tmp']['start_time']
            response_start_time = har_entry['_tmp']['response_start_time']

            receive_time = har.get_duration(response_start_time, now)
            total_time = har.get_duration(start_time, now)

            har_entry["timings"]["receive"] = receive_time
            har_entry["time"] = total_time

            if not har_entry["timings"]["send"]:
                wait_time = har_entry["timings"]["wait"]
                har_entry["timings"]["send"] = total_time - receive_time - wait_time
                if har_entry["timings"]["send"] < 1e-6:
                    har_entry["timings"]["send"] = 0

            har_entry["response"].update(har_qt.reply2har(reply))

        self.log("Finished downloading {url}", reply)
Example #2
0
    def _handleUploadProgress(self, sent, total):
        har_entry = self._harEntry()
        if har_entry is not None:
            har_entry["request"]["bodySize"] = int(sent)

            now = datetime.utcnow()
            if sent == 0:
                # it is a moment the sending is started
                start_time = har_entry["_tmp"]["request_start_time"]
                har_entry["_tmp"]["request_start_sending_time"] = now
                har_entry["timings"]["blocked"] = har.get_duration(
                    start_time, now)

            har_entry["_tmp"]["request_sent_time"] = now

            if sent == total:
                har_entry["_tmp"]["response_start_time"] = now
                start_sending_time = har_entry["_tmp"][
                    "request_start_sending_time"]
                har_entry["timings"]["send"] = har.get_duration(
                    start_sending_time, now)

        if total == -1:
            total = '?'
        self.log("Uploaded %d/%s of {url}" % (sent, total),
                 self.sender(),
                 min_level=4)
Example #3
0
    def _handleFinished(self):
        reply = self.sender()
        self._cancelReplyTimer(reply)
        har_entry = self._harEntry()
        if har_entry is not None:
            har_entry["_tmp"]["state"] = self.REQUEST_FINISHED

            now = datetime.utcnow()
            start_time = har_entry['_tmp']['start_time']
            response_start_time = har_entry['_tmp']['response_start_time']

            receive_time = har.get_duration(response_start_time, now)
            total_time = har.get_duration(start_time, now)

            har_entry["timings"]["receive"] = receive_time
            har_entry["time"] = total_time

            if not har_entry["timings"]["send"]:
                wait_time = har_entry["timings"]["wait"]
                har_entry["timings"][
                    "send"] = total_time - receive_time - wait_time
                if har_entry["timings"]["send"] < 1e-6:
                    har_entry["timings"]["send"] = 0

            har_entry["response"].update(har_qt.reply2har(reply))

        self.log("Finished downloading {url}", reply)
Example #4
0
    def _handleMetaData(self):
        """Signal emitted before reading response body, after getting headers
        """
        reply = self.sender()
        self._handle_reply_cookies(reply)

        callbacks = self._getWebPageAttribute(reply.request(), "callbacks")

        if callbacks and "on_response_headers" in callbacks:
            for cb in callbacks["on_response_headers"]:
                try:
                    cb(reply)
                except:
                    # TODO unhandled exceptions in lua callbacks
                    # should we raise errors here?
                    # https://github.com/scrapinghub/splash/issues/161
                    self.log("error in on_response_headers callback", min_level=1)
                    self.log(traceback.format_exc(), min_level=1)

        har_entry = self._harEntry()
        if har_entry is not None:
            if har_entry["_tmp"]["state"] == self.REQUEST_FINISHED:
                self.log("Headers received for {url}; ignoring", reply, min_level=3)
                return

            har_entry["_tmp"]["state"] = self.REQUEST_HEADERS_RECEIVED
            har_entry["response"].update(har_qt.reply2har(reply))

            now = datetime.utcnow()
            request_sent = har_entry["_tmp"]["request_sent_time"]
            har_entry["_tmp"]["response_start_time"] = now
            har_entry["timings"]["wait"] = har.get_duration(request_sent, now)

        self.log("Headers received for {url}", reply, min_level=3)
Example #5
0
    def _handleUploadProgress(self, sent, total):
        har_entry = self._harEntry()
        if har_entry is not None:
            har_entry["request"]["bodySize"] = int(sent)

            now = datetime.utcnow()
            if sent == 0:
                # it is a moment the sending is started
                start_time = har_entry["_tmp"]["request_start_time"]
                har_entry["_tmp"]["request_start_sending_time"] = now
                har_entry["timings"]["blocked"] = har.get_duration(start_time, now)

            har_entry["_tmp"]["request_sent_time"] = now

            if sent == total:
                har_entry["_tmp"]["response_start_time"] = now
                start_sending_time = har_entry["_tmp"]["request_start_sending_time"]
                har_entry["timings"]["send"] = har.get_duration(start_sending_time, now)

        if total == -1:
            total = '?'
        self.log("Uploaded %d/%s of {url}" % (sent, total), self.sender(), min_level=4)
Example #6
0
    def _handleMetaData(self):
        reply = self.sender()
        har_entry = self._harEntry()
        if har_entry is not None:
            if har_entry["_tmp"]["state"] == self.REQUEST_FINISHED:
                self.log("Headers received for {url}; ignoring", reply, min_level=3)
                return

            har_entry["_tmp"]["state"] = self.REQUEST_HEADERS_RECEIVED
            har_entry["response"].update(har_qt.reply2har(reply))

            now = datetime.utcnow()
            request_sent = har_entry["_tmp"]["request_sent_time"]
            har_entry["_tmp"]["response_start_time"] = now
            har_entry["timings"]["wait"] = har.get_duration(request_sent, now)

        self.log("Headers received for {url}", reply, min_level=3)
Example #7
0
    def _handleMetaData(self):
        reply = self.sender()
        self._handle_reply_cookies(reply)

        har_entry = self._harEntry()
        if har_entry is not None:
            if har_entry["_tmp"]["state"] == self.REQUEST_FINISHED:
                self.log("Headers received for {url}; ignoring", reply, min_level=3)
                return

            har_entry["_tmp"]["state"] = self.REQUEST_HEADERS_RECEIVED
            har_entry["response"].update(har_qt.reply2har(reply))

            now = datetime.utcnow()
            request_sent = har_entry["_tmp"]["request_sent_time"]
            har_entry["_tmp"]["response_start_time"] = now
            har_entry["timings"]["wait"] = har.get_duration(request_sent, now)

        self.log("Headers received for {url}", reply, min_level=3)
Example #8
0
    def _handleMetaData(self):
        """Signal emitted before reading response body, after getting headers
        """
        reply = self.sender()
        self._handle_reply_cookies(reply)

        callbacks = self._getWebPageAttribute(reply.request(), "callbacks")

        if callbacks and "on_response_headers" in callbacks:
            for cb in callbacks["on_response_headers"]:
                try:
                    cb(reply)
                except:
                    # TODO unhandled exceptions in lua callbacks
                    # should we raise errors here?
                    # https://github.com/scrapinghub/splash/issues/161
                    self.log("error in on_response_headers callback",
                             min_level=1)
                    self.log(traceback.format_exc(), min_level=1)

        har_entry = self._harEntry()
        if har_entry is not None:
            if har_entry["_tmp"]["state"] == self.REQUEST_FINISHED:
                self.log("Headers received for {url}; ignoring",
                         reply,
                         min_level=3)
                return

            har_entry["_tmp"]["state"] = self.REQUEST_HEADERS_RECEIVED
            har_entry["response"].update(har_qt.reply2har(reply))

            now = datetime.utcnow()
            request_sent = har_entry["_tmp"]["request_sent_time"]
            har_entry["_tmp"]["response_start_time"] = now
            har_entry["timings"]["wait"] = har.get_duration(request_sent, now)

        self.log("Headers received for {url}", reply, min_level=3)