Beispiel #1
0
    def _init_file(self):
        warcinfo_headers = [
            (WarcRecord.TYPE, WarcRecord.WARCINFO),
            (WarcRecord.ID, WarcRecord.random_warc_uuid()),
            (WarcRecord.DATE, warc.warc_datetime_str(datetime.utcnow())),
            (WarcRecord.FILENAME, os.path.basename(self._file_name)),
            (Warc.MAIN_URL, self._main_url),
        ]

        warcinfo_fields = "\r\n".join([
            "software: bardo",
            "format: WARC File Format 1.0",
            "conformsTo: " + CONFORMS_TO,
            "robots: unknown",
        ])

        warcinfo_content = ("application/warc-fields", warcinfo_fields)

        warcinfo_record = WarcRecord(headers=warcinfo_headers, \
                content=warcinfo_content)

        self.write_record(warcinfo_record)
Beispiel #2
0
    def _init_file(self):
        warcinfo_headers = [
            (WarcRecord.TYPE, WarcRecord.WARCINFO),
            (WarcRecord.ID, WarcRecord.random_warc_uuid()),
            (WarcRecord.DATE, warc.warc_datetime_str(datetime.utcnow())),
            (WarcRecord.FILENAME, os.path.basename(self._file_name)),
            (Warc.MAIN_URL, self._main_url),
        ]

        warcinfo_fields = "\r\n".join([
            "software: bardo",
            "format: WARC File Format 1.0",
            "conformsTo: " + CONFORMS_TO,
            "robots: unknown",
        ])

        warcinfo_content = ("application/warc-fields", warcinfo_fields)

        warcinfo_record = WarcRecord(headers=warcinfo_headers, \
                content=warcinfo_content)

        self.write_record(warcinfo_record)
Beispiel #3
0
    def _reply_finished(self):
        self._network_reply.readyRead.disconnect(self._reply_ready_read)
        self._network_reply.finished.disconnect(self._reply_finished)
        self._network_reply.error.disconnect(self._reply_error)

        status_code = self._network_reply.attribute(QNetworkRequest \
                .HttpStatusCodeAttribute)

        if not status_code.isValid():
            self._temp_data.close()
            self._temp_data = None
            self._network_reply = None

            QTimer.singleShot(0, lambda: self.finished.emit())

            return

        headers = dict()

        for header in self._network_reply.rawHeaderList():
            temp = str(self._network_reply.rawHeader(header))
            headers[str(header)] = re.sub("\s", " ", temp)

        elements = []

        for name, value in headers.iteritems():
            elements.append(name + ": " + value)

        elements.append("")

        url = qstring_to_str(self._network_reply.url().toString())

        status_msg = self._network_reply.attribute(QNetworkRequest \
                .HttpReasonPhraseAttribute)

        assert(status_msg.isValid())

        self._temp_data.seek(0)

        # XXX: we can't get HTTP version from Qt webkit, assumes 1.1
        h_status = "HTTP/1.1 " + str(status_code.toString()) + " " \
                + str(status_msg.toString())

        content_data = h_status + "\r\n" \
                + "\r\n".join(elements) + "\r\n" \
                + self._temp_data.read()

        content_type = ResponseMessage.CONTENT_TYPE

        content = (content_type, content_data)

        wr = warc.make_response(WarcRecord.random_warc_uuid(),
                warc.warc_datetime_str(datetime.utcnow()), url, content, None)

        self._temp_data.close()
        self._temp_data = None

        self.manager().current_warc.write_record(wr)

        self._init_from_warc_record(wr)

        self._network_reply = None
Beispiel #4
0
    def _reply_finished(self):
        self._network_reply.readyRead.disconnect(self._reply_ready_read)
        self._network_reply.finished.disconnect(self._reply_finished)
        self._network_reply.error.disconnect(self._reply_error)

        status_code = self._network_reply.attribute(QNetworkRequest \
                .HttpStatusCodeAttribute)

        if not status_code.isValid():
            self._temp_data.close()
            self._temp_data = None
            self._network_reply = None

            QTimer.singleShot(0, lambda: self.finished.emit())

            return

        headers = dict()

        for header in self._network_reply.rawHeaderList():
            temp = str(self._network_reply.rawHeader(header))
            headers[str(header)] = re.sub("\s", " ", temp)

        elements = []

        for name, value in headers.iteritems():
            elements.append(name + ": " + value)

        elements.append("")

        url = qstring_to_str(self._network_reply.url().toString())

        status_msg = self._network_reply.attribute(QNetworkRequest \
                .HttpReasonPhraseAttribute)

        assert (status_msg.isValid())

        self._temp_data.seek(0)

        # XXX: we can't get HTTP version from Qt webkit, assumes 1.1
        h_status = "HTTP/1.1 " + str(status_code.toString()) + " " \
                + str(status_msg.toString())

        content_data = h_status + "\r\n" \
                + "\r\n".join(elements) + "\r\n" \
                + self._temp_data.read()

        content_type = ResponseMessage.CONTENT_TYPE

        content = (content_type, content_data)

        wr = warc.make_response(WarcRecord.random_warc_uuid(),
                                warc.warc_datetime_str(datetime.utcnow()), url,
                                content, None)

        self._temp_data.close()
        self._temp_data = None

        self.manager().current_warc.write_record(wr)

        self._init_from_warc_record(wr)

        self._network_reply = None