def _init_file(self): warcinfo_headers = [ (WarcRecord.TYPE, WarcRecord.WARCINFO), (WarcRecord.ID, WarcRecord.random_warc_uuid()), (WarcRecord.DATE, warc.warc_datetime_str(datetime.utcnow())), (WarcRecord.FILENAME, os.path.basename(self._file_name)), (Warc.MAIN_URL, self._main_url), ] warcinfo_fields = "\r\n".join([ "software: bardo", "format: WARC File Format 1.0", "conformsTo: " + CONFORMS_TO, "robots: unknown", ]) warcinfo_content = ("application/warc-fields", warcinfo_fields) warcinfo_record = WarcRecord(headers=warcinfo_headers, \ content=warcinfo_content) self.write_record(warcinfo_record)
def _reply_finished(self): self._network_reply.readyRead.disconnect(self._reply_ready_read) self._network_reply.finished.disconnect(self._reply_finished) self._network_reply.error.disconnect(self._reply_error) status_code = self._network_reply.attribute(QNetworkRequest \ .HttpStatusCodeAttribute) if not status_code.isValid(): self._temp_data.close() self._temp_data = None self._network_reply = None QTimer.singleShot(0, lambda: self.finished.emit()) return headers = dict() for header in self._network_reply.rawHeaderList(): temp = str(self._network_reply.rawHeader(header)) headers[str(header)] = re.sub("\s", " ", temp) elements = [] for name, value in headers.iteritems(): elements.append(name + ": " + value) elements.append("") url = qstring_to_str(self._network_reply.url().toString()) status_msg = self._network_reply.attribute(QNetworkRequest \ .HttpReasonPhraseAttribute) assert(status_msg.isValid()) self._temp_data.seek(0) # XXX: we can't get HTTP version from Qt webkit, assumes 1.1 h_status = "HTTP/1.1 " + str(status_code.toString()) + " " \ + str(status_msg.toString()) content_data = h_status + "\r\n" \ + "\r\n".join(elements) + "\r\n" \ + self._temp_data.read() content_type = ResponseMessage.CONTENT_TYPE content = (content_type, content_data) wr = warc.make_response(WarcRecord.random_warc_uuid(), warc.warc_datetime_str(datetime.utcnow()), url, content, None) self._temp_data.close() self._temp_data = None self.manager().current_warc.write_record(wr) self._init_from_warc_record(wr) self._network_reply = None
def _reply_finished(self): self._network_reply.readyRead.disconnect(self._reply_ready_read) self._network_reply.finished.disconnect(self._reply_finished) self._network_reply.error.disconnect(self._reply_error) status_code = self._network_reply.attribute(QNetworkRequest \ .HttpStatusCodeAttribute) if not status_code.isValid(): self._temp_data.close() self._temp_data = None self._network_reply = None QTimer.singleShot(0, lambda: self.finished.emit()) return headers = dict() for header in self._network_reply.rawHeaderList(): temp = str(self._network_reply.rawHeader(header)) headers[str(header)] = re.sub("\s", " ", temp) elements = [] for name, value in headers.iteritems(): elements.append(name + ": " + value) elements.append("") url = qstring_to_str(self._network_reply.url().toString()) status_msg = self._network_reply.attribute(QNetworkRequest \ .HttpReasonPhraseAttribute) assert (status_msg.isValid()) self._temp_data.seek(0) # XXX: we can't get HTTP version from Qt webkit, assumes 1.1 h_status = "HTTP/1.1 " + str(status_code.toString()) + " " \ + str(status_msg.toString()) content_data = h_status + "\r\n" \ + "\r\n".join(elements) + "\r\n" \ + self._temp_data.read() content_type = ResponseMessage.CONTENT_TYPE content = (content_type, content_data) wr = warc.make_response(WarcRecord.random_warc_uuid(), warc.warc_datetime_str(datetime.utcnow()), url, content, None) self._temp_data.close() self._temp_data = None self.manager().current_warc.write_record(wr) self._init_from_warc_record(wr) self._network_reply = None