コード例 #1
0
    def test_resolveRevisits_orig_fields(self):
        """
        when resolveRevisits=true, extra three fields are named
        ``orig.length``, ``orig.offset`` and ``orig.filename``, respectively.
        it is possible to filter fields by these names.
        """
        resp = self.query('http://www.iana.org/_css/2013.1/print.css',
                     resolveRevisits='1',
                     fields='urlkey,orig.length,orig.offset,orig.filename'
                     )
        assert resp.status_code == 200
        assert resp.content_type == 'text/x-cdxj'

        cdxes = resp.text.splitlines()
        cdx = cdxes[0]
        cdx = CDXObject(cdx.encode('utf-8'))
        assert cdx['orig.offset'] == '-'
        assert cdx['orig.length'] == '-'
        assert cdx['orig.filename'] == '-'

        for cdx in cdxes[1:]:
            cdx = CDXObject(cdx.encode('utf-8'))
            assert cdx['orig.offset'] != '-'
            assert cdx['orig.length'] != '-'
            assert cdx['orig.filename'] == 'iana.warc.gz'
コード例 #2
0
    def test_resolveRevisits(self):
        """
        with ``resolveRevisits=true``, server adds three fields pointing to
        the *original* capture.
        """
        resp = self.query('http://www.iana.org/_css/2013.1/print.css',
                     resolveRevisits='true'
                     )
        assert resp.status_code == 200
        assert resp.content_type == 'text/x-cdxj'

        cdxes = resp.text.splitlines()
        originals = {}
        for cdx in cdxes:
            cdx = CDXObject(cdx.encode('utf-8'))
            assert len(cdx) == 16

            # orig.* fields are either all '-' or (int, int, filename)
            # check if orig.* fields are equals to corresponding fields
            # for the original capture.

            sha = cdx['digest']
            if cdx['orig.length'] == '-':
                assert cdx['orig.offset'] == '-' and cdx['orig.filename'] == '-'
                originals[sha] = (int(cdx['length']), int(cdx['offset']), cdx['filename'])
            else:
                orig = originals.get(sha)
                assert orig == (int(cdx['orig.length']), int(cdx['orig.offset']), cdx['orig.filename'])
コード例 #3
0
ファイル: test_loading.py プロジェクト: ikreymer/pywb
def load_from_cdx_test(cdx, revisit_func=load_orig_cdx, reraise=False,
                       failed_files=None):
    resolve_loader = ResolvingLoader(DefaultResolverMixin.make_resolvers(test_warc_dir))
    cdx = CDXObject(cdx.encode('utf-8'))

    try:
        (headers, stream) = resolve_loader(cdx, failed_files, revisit_func)
        print(repr_format(headers))
        sys.stdout.write(stream.readline().decode('utf-8'))
        sys.stdout.write(stream.readline().decode('utf-8'))
    except ArchiveLoadFailed as e:
        if reraise:
            raise
        else:
            print('Exception: ' + e.__class__.__name__)
コード例 #4
0
    def test_fields(self):
        """
        retrieve subset of fields with ``fields`` parameter.
        """
        resp = self.query('http://www.iana.org/_css/2013.1/print.css',
                     fields='urlkey,timestamp,status')

        assert resp.status_code == 200

        cdxes = resp.text.splitlines()

        for cdx in cdxes:
            cdx = CDXObject(cdx.encode('utf-8'))
            assert cdx['urlkey'] == 'org,iana)/_css/2013.1/print.css'
            assert re.match(r'\d{14}$', cdx['timestamp'])
            assert re.match(r'\d{3}|-', cdx['status'])
コード例 #5
0
ファイル: test_loading.py プロジェクト: mirrorweb/pywb
def load_from_cdx_test(cdx,
                       revisit_func=load_orig_cdx,
                       reraise=False,
                       failed_files=None):
    resolve_loader = ResolvingLoader(
        DefaultResolverMixin.make_resolvers(test_warc_dir))
    cdx = CDXObject(cdx.encode('utf-8'))

    try:
        (headers, stream) = resolve_loader(cdx, failed_files, revisit_func)
        print(repr_format(headers))
        sys.stdout.write(stream.readline().decode('utf-8'))
        sys.stdout.write(stream.readline().decode('utf-8'))
    except ArchiveLoadFailed as e:
        if reraise:
            raise
        else:
            print('Exception: ' + e.__class__.__name__)