Esempio n. 1
0
    def test_load_http2_warc_convert_protocol(self):
        filename = self.get_test_file('http2.github.io.har')

        temp_filename = os.path.join(tempfile.gettempdir(), tempfile.gettempprefix() + '-http2.warc')

        try:
            # write then read same file
            with open(temp_filename, 'w+b') as fh:
                har2warc(filename, fh)

                fh.seek(0)

                ai = ArchiveIterator(fh, verify_http=True)

                record = next(ai)
                assert record.rec_type == 'warcinfo'

                record = next(ai)
                assert record.rec_type == 'response'

                # ensure protocol vonerted to HTTP/1.1
                assert record.http_headers.protocol == 'HTTP/1.1'

        finally:
            os.remove(temp_filename)
Esempio n. 2
0
    def har2warc(self, filename, stream):
        out = self._har2warc_temp_file(filename)

        stream = codecs.getreader('utf-8')(stream)

        rec_title = os.path.basename(filename)

        har2warc(stream, out, filename + '.warc', rec_title)

        #writer = WARCWriter(out)
        #HarParser(stream, writer).parse(filename + '.warc', rec_title)

        size = out.tell()
        out.seek(0)
        return out, size
Esempio n. 3
0
    def har2warc(self, filename, stream):
        """Convert HTTP Archive format file to WARC archive.

        :param str filename: name of HAR file
        :param stream: file object (input)

        :returns: file object (output) and size of WARC archive
        :rtype: file object and int
        """
        out = self._har2warc_temp_file(filename)

        stream = codecs.getreader('utf-8')(stream)

        rec_title = os.path.basename(filename)

        har2warc(stream, out, filename + '.warc', rec_title)

        #writer = WARCWriter(out)
        #HarParser(stream, writer).parse(filename + '.warc', rec_title)

        size = out.tell()
        out.seek(0)
        return out, size
Esempio n. 4
0
    def har2warc(self, filename, stream):
        """Convert HTTP Archive format file to WARC archive.

        :param str filename: name of HAR file
        :param stream: file object (input)

        :returns: file object (output) and size of WARC archive
        :rtype: file object and int
        """
        out = self._har2warc_temp_file(filename)

        stream = codecs.getreader('utf-8')(stream)

        rec_title = os.path.basename(filename)

        har2warc(stream, out, filename + '.warc', rec_title)

        #writer = WARCWriter(out)
        #HarParser(stream, writer).parse(filename + '.warc', rec_title)

        size = out.tell()
        out.seek(0)
        return out, size