Python get_test_dirの例、pywb.get_test_dir Pythonの例

コード例 #1

0

ファイルを表示

ファイル: test_pathresolvers.py プロジェクト: ikreymer/pywb

    def test_resolver_dir_wildcard(self):
        resolver = DefaultResolverMixin.make_best_resolver(os.path.join(get_test_dir(), '*', ''))

        cdx = CDXObject()
        res = resolver('example.warc.gz', cdx)
        assert len(res) == 1
        assert res[0] == os.path.join(get_test_dir(), 'warcs', 'example.warc.gz')

コード例 #2

0

ファイルを表示

    def test_resolver_dir_wildcard(self):
        resolver = DefaultResolverMixin.make_best_resolver(os.path.join(get_test_dir(), '*', ''))

        cdx = CDXObject()
        res = resolver('example.warc.gz', cdx)
        assert len(res) == 1
        assert res[0] == os.path.join(get_test_dir(), 'warcs', 'example.warc.gz')

コード例 #3

0

ファイルを表示

ファイル: test_pathresolvers.py プロジェクト: ikreymer/pywb

    def test_resolver_dir_wildcard_as_file_url(self):
        url = to_file_url(get_test_dir()) +  '/*/'
        resolver = DefaultResolverMixin.make_best_resolver(url)

        cdx = CDXObject()
        res = resolver('example.warc.gz', cdx)
        assert len(res) == 1
        assert res[0] == os.path.abspath(os.path.join(get_test_dir(), 'warcs', 'example.warc.gz'))

コード例 #4

0

ファイルを表示

    def test_resolver_dir_wildcard_as_file_url(self):
        url = to_file_url(get_test_dir()) +  '/*/'
        resolver = DefaultResolverMixin.make_best_resolver(url)

        cdx = CDXObject()
        res = resolver('example.warc.gz', cdx)
        assert len(res) == 1
        assert res[0] == os.path.abspath(os.path.join(get_test_dir(), 'warcs', 'example.warc.gz'))

コード例 #5

0

ファイルを表示

    def setup_class(cls):
        super(TestZipnumAutoDir, cls).setup_class('config_test.yaml')

        manager(['init', 'testzip'])

        cls.archive_dir = os.path.join(cls.root_dir, '_test_colls', 'testzip', 'archive')
        cls.index_dir = os.path.join(cls.root_dir, '_test_colls', 'testzip', 'indexes')

        zip_cdx = os.path.join(get_test_dir(), 'zipcdx')

        shutil.copy(os.path.join(zip_cdx, 'zipnum-sample.idx'), cls.index_dir)
        shutil.copy(os.path.join(zip_cdx, 'zipnum-sample.cdx.gz'), cls.index_dir)
        shutil.copy(os.path.join(zip_cdx, 'zipnum-sample.loc'), cls.index_dir)

        shutil.copy(os.path.join(get_test_dir(), 'warcs', 'iana.warc.gz'), cls.archive_dir)

コード例 #6

0

ファイルを表示

ファイル: test_zipnum_auto_dir.py プロジェクト: ikreymer/pywb

    def setup_class(cls):
        super(TestZipnumAutoDir, cls).setup_class('config_test.yaml')

        manager(['init', 'testzip'])

        cls.archive_dir = os.path.join(cls.root_dir, '_test_colls', 'testzip', 'archive')
        cls.index_dir = os.path.join(cls.root_dir, '_test_colls', 'testzip', 'indexes')

        zip_cdx = os.path.join(get_test_dir(), 'zipcdx')

        shutil.copy(os.path.join(zip_cdx, 'zipnum-sample.idx'), cls.index_dir)
        shutil.copy(os.path.join(zip_cdx, 'zipnum-sample.cdx.gz'), cls.index_dir)
        shutil.copy(os.path.join(zip_cdx, 'zipnum-sample.loc'), cls.index_dir)

        shutil.copy(os.path.join(get_test_dir(), 'warcs', 'iana.warc.gz'), cls.archive_dir)

コード例 #7

0

ファイルを表示

    def test_hls_custom_max_bandwidth(self):
        headers = {'Content-Type': 'application/x-mpegURL'}
        with open(
                os.path.join(get_test_dir(), 'text_content',
                             'sample_hls.m3u8'), 'rt') as fh:
            content = fh.read()

        metadata = {'adaptive_max_bandwidth': 2000000}

        headers, gen, is_rw = self.rewrite_record(
            headers,
            content,
            ts='201701oe_',
            url='http://example.com/path/master.m3u8',
            warc_headers={'WARC-JSON-Metadata': json.dumps(metadata)})

        assert headers.headers == [('Content-Type', 'application/x-mpegURL')]
        filtered = """\
#EXTM3U
#EXT-X-MEDIA:TYPE=SUBTITLES,GROUP-ID="WebVTT",NAME="English",DEFAULT=YES,AUTOSELECT=YES,FORCED=NO,URI="https://example.com/subtitles/"
#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=1002000,RESOLUTION=640x360,CODECS="avc1.77.30, mp4a.40.2",SUBTITLES="WebVTT"
http://example.com/video_4.m3u8
"""

        assert b''.join(gen).decode('utf-8') == filtered

コード例 #8

0

ファイルを表示

ファイル: test_content_rewriter.py プロジェクト: ikreymer/pywb

    def test_dash_default_max(self):
        headers = {'Content-Type': 'application/dash+xml'}
        with open(os.path.join(get_test_dir(), 'text_content', 'sample_dash.mpd'), 'rt') as fh:
            content = fh.read()

        headers, gen, is_rw = self.rewrite_record(headers, content, ts='201701oe_',
                                                  url='http://example.com/path/manifest.mpd')

        assert headers.headers == [('Content-Type', 'application/dash+xml')]

        filtered = """\
<?xml version='1.0' encoding='UTF-8'?>
<MPD xmlns="urn:mpeg:dash:schema:mpd:2011" mediaPresentationDuration="PT0H3M1.63S" minBufferTime="PT1.5S" profiles="urn:mpeg:dash:profile:isoff-on-demand:2011" type="static">
  <Period duration="PT0H3M1.63S" start="PT0S">
    <AdaptationSet>
      <ContentComponent contentType="video" id="1" />
      <Representation bandwidth="4190760" codecs="avc1.640028" height="1080" id="1" mimeType="video/mp4" width="1920">
        <BaseURL>http://example.com/video-10.mp4</BaseURL>
        <SegmentBase indexRange="674-1149">
          <Initialization range="0-673" />
        </SegmentBase>
      </Representation>
      </AdaptationSet>
    <AdaptationSet>
      <ContentComponent contentType="audio" id="2" />
      <Representation bandwidth="255236" codecs="mp4a.40.2" id="7" mimeType="audio/mp4" numChannels="2" sampleRate="44100">
        <BaseURL>http://example.com/audio-2.mp4</BaseURL>
        <SegmentBase indexRange="592-851">
          <Initialization range="0-591" />
        </SegmentBase>
      </Representation>
      </AdaptationSet>
  </Period>
</MPD>"""
        assert b''.join(gen).decode('utf-8') == filtered

コード例 #9

0

ファイルを表示

ファイル: test_cdxops.py プロジェクト: eriknstr/pywb

def test_cdxj_resolve_revisit_2():
    # Resolve Revisit -- cdxj minimal -- output also json
    results = cdx_ops_test_data(
        url="http://example.com/?example=1",
        sources=[get_test_dir() + "cdxj/example-no-digest.cdxj"],
        resolveRevisits=True,
    )
    assert len(results) == 2
    assert dict(results[0]) == {
        "urlkey": "com,example)/?example=1",
        "timestamp": "20140103030321",
        "url": "http://example.com?example=1",
        "length": "1043",
        "filename": "example.warc.gz",
        "offset": "333",
        "orig.length": "-",
        "orig.offset": "-",
        "orig.filename": "-",
    }

    assert dict(results[1]) == {
        "urlkey": "com,example)/?example=1",
        "timestamp": "20140103030341",
        "url": "http://example.com?example=1",
        "length": "553",
        "filename": "example.warc.gz",
        "mime": "warc/revisit",
        "offset": "1864",
        "orig.length": "-",
        "orig.offset": "-",
        "orig.filename": "-",
    }

コード例 #10

0

ファイルを表示

ファイル: test_cdxops.py プロジェクト: yumatchlab/pywb

def test_cdxj_resolve_revisit():
    # Resolve Revisit -- cdxj minimal -- output also json
    results = cdx_ops_test_data(url='http://example.com/?example=1',
                                sources=[get_test_dir() + 'cdxj/example.cdxj'],
                                resolveRevisits=True)
    assert (len(results) == 2)
    assert (dict(results[0]) == {
        "urlkey": "com,example)/?example=1",
        "timestamp": "20140103030321",
        "url": "http://example.com?example=1",
        "length": "1043",
        "filename": "example.warc.gz",
        "digest": "B2LTWWPUOYAH7UIPQ7ZUPQ4VMBSVC36A",
        "offset": "333",
        "orig.length": "-",
        "orig.offset": "-",
        "orig.filename": "-"
    })

    assert (dict(results[1]) == {
        "urlkey": "com,example)/?example=1",
        "timestamp": "20140103030341",
        "url": "http://example.com?example=1",
        "filename": "example.warc.gz",
        "length": "553",
        "mime": "",
        "offset": "1864",
        "digest": "B2LTWWPUOYAH7UIPQ7ZUPQ4VMBSVC36A",
        "orig.length": "1043",
        "orig.offset": "333",
        "orig.filename": "example.warc.gz"
    })

コード例 #11

0

ファイルを表示

ファイル: test_rewrite_live.py プロジェクト: rebeccacremona/pywb

def test_local_no_head_only_title():
    status_headers, buff = get_rewritten(
        get_test_dir() + 'text_content/sample_no_head_2.html', urlrewriter,
        head_insert_func, 'com,example,test)/')

    # wombat insert added
    assert '<script src="/static/__pywb/wombat.js"> </script>' in buff

コード例 #12

0

ファイルを表示

ファイル: test_zipnum.py プロジェクト: akeprojecta/pywb

def test_zip_prefix_load():

    tmpdir = tempfile.mkdtemp()
    try:
        shutil.copy(test_zipnum, tmpdir)
        shutil.copy(get_test_dir() + 'zipcdx/zipnum-sample.cdx.gz',
                    os.path.join(tmpdir, 'zipnum'))

        config={}
        config['shard_index_loc'] = dict(match='(.*)',
                                         replace=r'\1')
        server = CDXServer(os.path.join(tmpdir, 'zipnum-sample.idx'),
                           config=config)


        # Test Page Count
        results = server.load_cdx(url='iana.org/',
                                  matchType='domain',
                                  showNumPages=True)

        results = list(results)
        assert len(results) == 1, results
        assert json.loads(results[0]) == {"blocks": 38, "pages": 4, "pageSize": 10}


        # Test simple query
        results = server.load_cdx(url='iana.org/')
        results = list(results)
        assert len(results) ==3, results
        assert '20140126200624' in results[0]
        assert '20140127171238' in results[1]
        assert 'warc/revisit' in results[2]

    finally:
        shutil.rmtree(tmpdir)

コード例 #13

0

ファイルを表示

def test_cdxj_resolve_revisit_2():
    # Resolve Revisit -- cdxj minimal -- output also json
    results = cdx_ops_test_data(
        url='http://example.com/?example=1',
        sources={'nd-file': get_test_dir() + 'cdxj/example-no-digest.cdxj'},
        resolveRevisits=True)
    assert (len(results) == 2)
    assert (dict(results[0]) == {
        "urlkey": "com,example)/?example=1",
        "timestamp": "20140103030321",
        "url": "http://example.com?example=1",
        "length": "1043",
        "filename": "example.warc.gz",
        "offset": "333",
        "orig.length": "-",
        "orig.offset": "-",
        "orig.filename": "-"
    })

    assert (dict(results[1]) == {
        "urlkey": "com,example)/?example=1",
        "timestamp": "20140103030341",
        "url": "http://example.com?example=1",
        "length": "553",
        "filename": "example.warc.gz",
        "mime": "warc/revisit",
        "offset": "1864",
        "orig.length": "-",
        "orig.offset": "-",
        "orig.filename": "-"
    })

コード例 #14

0

ファイルを表示

ファイル: test_content_rewriter.py プロジェクト: peterk/pywb

    def test_dash_default_max(self):
        headers = {'Content-Type': 'application/dash+xml'}
        with open(os.path.join(get_test_dir(), 'text_content', 'sample_dash.mpd'), 'rt') as fh:
            content = fh.read()

        headers, gen, is_rw = self.rewrite_record(headers, content, ts='201701oe_',
                                                  url='http://example.com/path/manifest.mpd')

        assert headers.headers == [('Content-Type', 'application/dash+xml')]

        filtered = """\
<?xml version='1.0' encoding='UTF-8'?>
<MPD xmlns="urn:mpeg:dash:schema:mpd:2011" mediaPresentationDuration="PT0H3M1.63S" minBufferTime="PT1.5S" profiles="urn:mpeg:dash:profile:isoff-on-demand:2011" type="static">
  <Period duration="PT0H3M1.63S" start="PT0S">
    <AdaptationSet>
      <ContentComponent contentType="video" id="1" />
      <Representation bandwidth="4190760" codecs="avc1.640028" height="1080" id="1" mimeType="video/mp4" width="1920">
        <BaseURL>http://example.com/video-10.mp4</BaseURL>
        <SegmentBase indexRange="674-1149">
          <Initialization range="0-673" />
        </SegmentBase>
      </Representation>
      </AdaptationSet>
    <AdaptationSet>
      <ContentComponent contentType="audio" id="2" />
      <Representation bandwidth="255236" codecs="mp4a.40.2" id="7" mimeType="audio/mp4" numChannels="2" sampleRate="44100">
        <BaseURL>http://example.com/audio-2.mp4</BaseURL>
        <SegmentBase indexRange="592-851">
          <Initialization range="0-591" />
        </SegmentBase>
      </Representation>
      </AdaptationSet>
  </Period>
</MPD>"""
        assert b''.join(gen).decode('utf-8') == filtered

コード例 #15

0

ファイルを表示

ファイル: test_pathresolvers.py プロジェクト: ikreymer/pywb

    def test_path_index_resolvers(self):
        path = os.path.join(get_test_dir(), 'text_content', 'pathindex.txt')
        path_index = PathIndexResolver(path)

        cdx = CDXObject()
        assert list(path_index('example.warc.gz', cdx)) == ['invalid_path', 'sample_archive/warcs/example.warc.gz']
        assert list(path_index('iana.warc.gz', cdx)) == ['sample_archive/warcs/iana.warc.gz']
        assert list(path_index('not-found.gz', cdx)) == []

コード例 #16

0

ファイルを表示

    def test_path_index_resolvers(self):
        path = os.path.join(get_test_dir(), 'text_content', 'pathindex.txt')
        path_index = PathIndexResolver(path)

        cdx = CDXObject()
        assert list(path_index('example.warc.gz', cdx)) == ['invalid_path', 'sample_archive/warcs/example.warc.gz']
        assert list(path_index('iana.warc.gz', cdx)) == ['sample_archive/warcs/iana.warc.gz']
        assert list(path_index('not-found.gz', cdx)) == []

コード例 #17

0

ファイルを表示

ファイル: test_rewrite_live.py プロジェクト: chdorner/pywb

def test_local_no_head_only_title():
    status_headers, buff = get_rewritten(get_test_dir() + 'text_content/sample_no_head_2.html',
                                         urlrewriter,
                                         head_insert_func,
                                         'com,example,test)/')

    # wombat insert added
    assert '<script src="/static/__pywb/wombat.js"> </script>' in buff

コード例 #18

0

ファイルを表示

ファイル: testutils.py プロジェクト: ikreymer/pywb

    def setup_class(cls):
        super(MementoOverrideTests, cls).setup_class()

        # Load expected link headers
        MementoOverrideTests.link_header_data = None
        with open(to_path(get_test_dir() + '/text_content/link_headers.yaml')) as fh:
            MementoOverrideTests.link_header_data = yaml.load(fh)

        MementoOverrideTests.orig_get_timegate_links = MementoIndexSource.get_timegate_links

コード例 #19

0

ファイルを表示

ファイル: testutils.py プロジェクト: tripti825/pywb

    def setup_class(cls):
        super(MementoOverrideTests, cls).setup_class()

        # Load expected link headers
        MementoOverrideTests.link_header_data = None
        with open(to_path(get_test_dir() +
                          '/text_content/link_headers.yaml')) as fh:
            MementoOverrideTests.link_header_data = yaml.load(fh)

        MementoOverrideTests.orig_get_timegate_links = MementoIndexSource.get_timegate_links

コード例 #20

0

ファイルを表示

ファイル: test_rewrite_live.py プロジェクト: ronos/pywb

def test_local_unclosed_script():
    status_headers, buff = get_rewritten(get_test_dir() + 'text_content/sample_unclosed_script.html',
                                         urlrewriter,
                                         head_insert_func,
                                         'com,example,test)/')

    # wombat insert added
    assert '<head><script src="/static/__pywb/wombat.js"> </script>' in buff, buff

    # JS location and JS link rewritten
    assert 'window.WB_wombat_location = "/pywb/20131226101010/http:\/\/example.com/dynamic_page.html";\n}\n</script>' in buff, buff

コード例 #21

0

ファイルを表示

ファイル: test_rewrite_live.py プロジェクト: Cloudxtreme/pywb

def test_local_unclosed_script():
    status_headers, buff = get_rewritten(get_test_dir() + 'text_content/sample_unclosed_script.html',
                                         urlrewriter,
                                         head_insert_func,
                                         'com,example,test)/')

    # wombat insert added
    assert '<head><script src="/static/__pywb/wombat.js"> </script>' in buff, buff

    # JS location and JS link rewritten
    assert 'window.WB_wombat_location = "/pywb/20131226101010/http:\/\/example.com/dynamic_page.html";\n}\n</script>' in buff, buff

コード例 #22

0

ファイルを表示

ファイル: test_rewrite_live.py プロジェクト: rebeccacremona/pywb

def test_local_no_head():
    status_headers, buff = get_rewritten(
        get_test_dir() + 'text_content/sample_no_head.html', urlrewriter,
        head_insert_func, 'com,example,test)/')

    # wombat insert added
    assert '<script src="/static/__pywb/wombat.js"> </script>' in buff, buff

    # location rewritten
    assert 'window.WB_wombat_location = "/other.html"' in buff, buff

    # link rewritten
    assert '"/pywb/20131226101010/http://example.com/some/path/another.html"' in buff, buff

コード例 #23

0

ファイルを表示

def test_local_2_link_only_rewrite():
    status_headers, buff = get_rewritten(
        get_test_dir() + 'text_content/sample.html', urlrewriter,
        head_insert_func, 'example,example,test)/nolocation_rewrite')

    # no wombat insert
    assert '<head><script src="/static/default/wombat.js"> </script>' not in buff

    # JS location NOT rewritten, JS link rewritten
    assert 'window.location = "/pywb/20131226101010/http:\/\/example.com/dynamic_page.html"' in buff

    # still link rewrite
    assert '"/pywb/20131226101010/http://example.com/some/path/another.html"' in buff

コード例 #24

0

ファイルを表示

ファイル: test_rewrite_live.py プロジェクト: rebeccacremona/pywb

def test_local_2_no_rewrite():
    status_headers, buff = get_rewritten(
        get_test_dir() + 'text_content/sample.html', urlrewriter,
        head_insert_func, 'example,example,test,norewrite)/')

    # wombat insert added
    assert '<script src="/static/__pywb/wombat.js"> </script>' in buff

    # JS location NOT rewritten, JS link NOT rewritten
    assert 'window.location = "http:\/\/example.com/dynamic_page.html"' in buff

    # still link rewrite in HTML
    assert '"/pywb/20131226101010/http://example.com/some/path/another.html"' in buff

コード例 #25

0

ファイルを表示

ファイル: test_rewrite_live.py プロジェクト: rebeccacremona/pywb

def test_local_1():
    status_headers, buff = get_rewritten(
        get_test_dir() + 'text_content/sample.html', urlrewriter,
        head_insert_func, 'example,example,test,all)/')

    # wombat insert added
    assert '<head><script src="/static/__pywb/wombat.js"> </script>' in buff, buff

    # JS location and JS link rewritten
    assert 'window.WB_wombat_location = "/pywb/20131226101010/http:\/\/example.com/dynamic_page.html"' in buff

    # link rewritten
    assert '"/pywb/20131226101010/http://example.com/some/path/another.html"' in buff

コード例 #26

0

ファイルを表示

ファイル: test_rewrite_live.py プロジェクト: rebeccacremona/pywb

def test_local_no_head_banner_only():
    status_headers, buff = get_rewritten(
        get_test_dir() + 'text_content/sample_no_head.html', bn_urlrewriter,
        head_insert_func, 'com,example,test)/')

    # wombat insert added
    assert '<script src="/static/__pywb/wombat.js"> </script>' in buff

    # location NOT rewritten
    assert 'window.location = "/other.html"' in buff

    # link NOT rewritten
    assert '"/some/path/another.html"' in buff

コード例 #27

0

ファイルを表示

ファイル: test_rewrite_live.py プロジェクト: rebeccacremona/pywb

def test_local_banner_only_no_rewrite():
    status_headers, buff = get_rewritten(
        get_test_dir() + 'text_content/sample.html', bn_urlrewriter,
        head_insert_func, 'com,example,test)/')

    # wombat insert added
    assert '<head><script src="/static/__pywb/wombat.js"> </script>' in buff

    # JS location NOT rewritten, JS link NOT rewritten
    assert 'window.location = "http:\/\/example.com/dynamic_page.html"' in buff, buff

    # link NOT rewritten
    assert '"/some/path/another.html"' in buff

コード例 #28

0

ファイルを表示

ファイル: test_rewrite_live.py プロジェクト: chdorner/pywb

def test_local_no_head_banner_only():
    status_headers, buff = get_rewritten(get_test_dir() + 'text_content/sample_no_head.html',
                                         bn_urlrewriter,
                                         head_insert_func,
                                         'com,example,test)/')

    # wombat insert added
    assert '<script src="/static/__pywb/wombat.js"> </script>' in buff

    # location NOT rewritten
    assert 'window.location = "/other.html"' in buff

    # link NOT rewritten
    assert '"another.html"' in buff

コード例 #29

0

ファイルを表示

ファイル: test_rewrite_live.py プロジェクト: tilgovi/pywb

def test_local_2_link_only_rewrite():
    status_headers, buff = get_rewritten(get_test_dir() + 'text_content/sample.html',
                                         urlrewriter,
                                         head_insert_func,
                                         'example,example,test)/nolocation_rewrite')

    # no wombat insert
    assert '<head><script src="/static/default/wombat.js"> </script>' not in buff

    # JS location NOT rewritten, JS link rewritten
    assert 'window.location = "/pywb/20131226101010/http:\/\/example.com/dynamic_page.html"' in buff

    # still link rewrite
    assert '"/pywb/20131226101010/http://example.com/some/path/another.html"' in buff

コード例 #30

0

ファイルを表示

ファイル: test_rewrite_live.py プロジェクト: chdorner/pywb

def test_local_2_no_rewrite():
    status_headers, buff = get_rewritten(get_test_dir() + 'text_content/sample.html',
                                         urlrewriter,
                                         head_insert_func,
                                         'example,example,test,norewrite)/')

    # wombat insert added
    assert '<script src="/static/__pywb/wombat.js"> </script>' in buff

    # JS location NOT rewritten, JS link NOT rewritten
    assert 'window.location = "http:\/\/example.com/dynamic_page.html"' in buff

    # still link rewrite in HTML
    assert '"/pywb/20131226101010/http://example.com/some/path/another.html"' in buff

コード例 #31

0

ファイルを表示

ファイル: test_rewrite_live.py プロジェクト: chdorner/pywb

def test_local_banner_only_no_rewrite():
    status_headers, buff = get_rewritten(get_test_dir() + 'text_content/sample.html',
                                         bn_urlrewriter,
                                         head_insert_func,
                                         'com,example,test)/')

    # wombat insert added
    assert '<head><script src="/static/__pywb/wombat.js"> </script>' in buff

    # JS location NOT rewritten, JS link NOT rewritten
    assert 'window.location = "http:\/\/example.com/dynamic_page.html"' in buff, buff

    # link NOT rewritten
    assert '"another.html"' in buff

コード例 #32

0

ファイルを表示

ファイル: test_rewrite_live.py プロジェクト: chdorner/pywb

def test_local_1():
    status_headers, buff = get_rewritten(get_test_dir() + 'text_content/sample.html',
                                         urlrewriter,
                                         head_insert_func,
                                         'example,example,test,all)/')

    # wombat insert added
    assert '<head><script src="/static/__pywb/wombat.js"> </script>' in buff, buff

    # JS location and JS link rewritten
    assert 'window.WB_wombat_location = "/pywb/20131226101010/http:\/\/example.com/dynamic_page.html"' in buff

    # link rewritten
    assert '"/pywb/20131226101010/http://example.com/some/path/another.html"' in buff

コード例 #33

0

ファイルを表示

ファイル: test_rewrite_live.py プロジェクト: chdorner/pywb

def test_local_no_head():
    status_headers, buff = get_rewritten(get_test_dir() + 'text_content/sample_no_head.html',
                                         urlrewriter,
                                         head_insert_func,
                                         'com,example,test)/')

    # wombat insert added
    assert '<script src="/static/__pywb/wombat.js"> </script>' in buff, buff

    # location rewritten
    assert 'window.WB_wombat_location = "/other.html"' in buff, buff

    # link rewritten
    assert '"/pywb/20131226101010/http://example.com/some/path/another.html"' in buff, buff

コード例 #34

0

ファイルを表示

    def test_dash_custom_max_resolution(self):
        headers = {'Content-Type': 'application/dash+xml'}
        with open(
                os.path.join(get_test_dir(), 'text_content',
                             'sample_dash.mpd'), 'rt') as fh:
            content = fh.read()

        metadata = {
            'adaptive_max_resolution': 921600,
            'adaptive_max_bandwidth': 2000000
        }

        headers, gen, is_rw = self.rewrite_record(
            headers,
            content,
            ts='201701oe_',
            url='http://example.com/path/manifest.mpd',
            warc_headers={'WARC-JSON-Metadata': json.dumps(metadata)})

        assert headers.headers == [('Content-Type', 'application/dash+xml')]

        filtered = """\
<?xml version='1.0' encoding='UTF-8'?>
<MPD xmlns="urn:mpeg:dash:schema:mpd:2011" mediaPresentationDuration="PT0H3M1.63S" minBufferTime="PT1.5S" profiles="urn:mpeg:dash:profile:isoff-on-demand:2011" type="static">
  <Period duration="PT0H3M1.63S" start="PT0S">
    <AdaptationSet>
      <ContentComponent contentType="video" id="1" />
      <Representation bandwidth="2073921" codecs="avc1.4d401f" height="720" id="2" mimeType="video/mp4" width="1280">
        <BaseURL>http://example.com/video-9.mp4</BaseURL>
        <SegmentBase indexRange="708-1183">
          <Initialization range="0-707" />
        </SegmentBase>
      </Representation>
      </AdaptationSet>
    <AdaptationSet>
      <ContentComponent contentType="audio" id="2" />
      <Representation bandwidth="255236" codecs="mp4a.40.2" id="7" mimeType="audio/mp4" numChannels="2" sampleRate="44100">
        <BaseURL>http://example.com/audio-2.mp4</BaseURL>
        <SegmentBase indexRange="592-851">
          <Initialization range="0-591" />
        </SegmentBase>
      </Representation>
      </AdaptationSet>
  </Period>
</MPD>"""

        assert b''.join(gen).decode('utf-8') == filtered

コード例 #35

0

ファイルを表示

ファイル: test_content_rewriter.py プロジェクト: peterk/pywb

    def test_hls_default_max(self):
        headers = {'Content-Type': 'application/vnd.apple.mpegurl'}
        with open(os.path.join(get_test_dir(), 'text_content', 'sample_hls.m3u8'), 'rt') as fh:
            content = fh.read()

        headers, gen, is_rw = self.rewrite_record(headers, content, ts='201701oe_',
                                                  url='http://example.com/path/master.m3u8')

        assert headers.headers == [('Content-Type', 'application/vnd.apple.mpegurl')]
        filtered = """\
#EXTM3U
#EXT-X-MEDIA:TYPE=SUBTITLES,GROUP-ID="WebVTT",NAME="English",DEFAULT=YES,AUTOSELECT=YES,FORCED=NO,URI="https://example.com/subtitles/"
#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=4495000,RESOLUTION=1920x1080,CODECS="avc1.640028, mp4a.40.2",SUBTITLES="WebVTT"
http://example.com/video_6.m3u8
"""

        assert b''.join(gen).decode('utf-8') == filtered

コード例 #36

0

ファイルを表示

ファイル: test_content_rewriter.py プロジェクト: ikreymer/pywb

    def test_hls_default_max(self):
        headers = {'Content-Type': 'application/vnd.apple.mpegurl'}
        with open(os.path.join(get_test_dir(), 'text_content', 'sample_hls.m3u8'), 'rt') as fh:
            content = fh.read()

        headers, gen, is_rw = self.rewrite_record(headers, content, ts='201701oe_',
                                                  url='http://example.com/path/master.m3u8')

        assert headers.headers == [('Content-Type', 'application/vnd.apple.mpegurl')]
        filtered = """\
#EXTM3U
#EXT-X-MEDIA:TYPE=SUBTITLES,GROUP-ID="WebVTT",NAME="English",DEFAULT=YES,AUTOSELECT=YES,FORCED=NO,URI="https://example.com/subtitles/"
#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=4495000,RESOLUTION=1920x1080,CODECS="avc1.640028, mp4a.40.2",SUBTITLES="WebVTT"
http://example.com/video_6.m3u8
"""

        assert b''.join(gen).decode('utf-8') == filtered

コード例 #37

0

ファイルを表示

ファイル: test_content_rewriter.py プロジェクト: ikreymer/pywb

    def test_dash_fb_in_js(self):
        headers = {'Content-Type': 'text/javascript'}
        with open(os.path.join(get_test_dir(), 'text_content', 'sample_dash.mpd'), 'rt') as fh:
            content = 'dash_manifest:"' + fh.read().encode('unicode-escape').decode('utf-8')

        rep_ids = r'\n",dash_prefetched_representation_ids:["4","5"]'
        content += rep_ids

        headers, gen, is_rw = self.rewrite_record(headers, content, ts='201701js_',
                                                  url='http://facebook.com/example/dash/manifest.mpd')

        assert headers.headers == [('Content-Type', 'text/javascript')]

        result = b''.join(gen).decode('utf-8')

        # 4, 5 representations removed, replaced with default 1, 7
        assert 'dash_prefetched_representation_ids:["1", "7"]' in result
        assert rep_ids not in result

コード例 #38

0

ファイルを表示

ファイル: test_content_rewriter.py プロジェクト: zan-kusterle/pywb

    def test_dash_fb_in_js(self):
        headers = {'Content-Type': 'text/javascript'}
        with open(os.path.join(get_test_dir(), 'text_content', 'sample_dash.mpd'), 'rt') as fh:
            content = 'dash_manifest:"' + fh.read().encode('unicode-escape').decode('utf-8')

        rep_ids = r'\n",dash_prefetched_representation_ids:["4","5"]'
        content += rep_ids

        headers, gen, is_rw = self.rewrite_record(headers, content, ts='201701js_',
                                                  url='http://facebook.com/example/dash/manifest.mpd')

        assert headers.headers == [('Content-Type', 'text/javascript')]

        result = b''.join(gen).decode('utf-8')

        # 4, 5 representations removed, replaced with default 1, 7
        assert 'dash_prefetched_representation_ids:["1", "7"]' in result
        assert rep_ids not in result

コード例 #39

0

ファイルを表示

ファイル: test_content_rewriter.py プロジェクト: ikreymer/pywb

    def test_hls_custom_max_bandwidth(self):
        headers = {'Content-Type': 'application/x-mpegURL'}
        with open(os.path.join(get_test_dir(), 'text_content', 'sample_hls.m3u8'), 'rt') as fh:
            content = fh.read()

        metadata = {'adaptive_max_bandwidth': 2000000}

        headers, gen, is_rw = self.rewrite_record(headers, content, ts='201701oe_',
                                                  url='http://example.com/path/master.m3u8',
                                                  warc_headers={'WARC-JSON-Metadata': json.dumps(metadata)})

        assert headers.headers == [('Content-Type', 'application/x-mpegURL')]
        filtered = """\
#EXTM3U
#EXT-X-MEDIA:TYPE=SUBTITLES,GROUP-ID="WebVTT",NAME="English",DEFAULT=YES,AUTOSELECT=YES,FORCED=NO,URI="https://example.com/subtitles/"
#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=1002000,RESOLUTION=640x360,CODECS="avc1.77.30, mp4a.40.2",SUBTITLES="WebVTT"
http://example.com/video_4.m3u8
"""

        assert b''.join(gen).decode('utf-8') == filtered

コード例 #40

0

ファイルを表示

ファイル: test_content_rewriter.py プロジェクト: ikreymer/pywb

    def test_dash_custom_max_resolution(self):
        headers = {'Content-Type': 'application/dash+xml'}
        with open(os.path.join(get_test_dir(), 'text_content', 'sample_dash.mpd'), 'rt') as fh:
            content = fh.read()

        metadata = {'adaptive_max_resolution': 921600,
                    'adaptive_max_bandwidth': 2000000}

        headers, gen, is_rw = self.rewrite_record(headers, content, ts='201701oe_',
                                                  url='http://example.com/path/manifest.mpd',
                                                  warc_headers={'WARC-JSON-Metadata': json.dumps(metadata)})

        assert headers.headers == [('Content-Type', 'application/dash+xml')]

        filtered = """\
<?xml version='1.0' encoding='UTF-8'?>
<MPD xmlns="urn:mpeg:dash:schema:mpd:2011" mediaPresentationDuration="PT0H3M1.63S" minBufferTime="PT1.5S" profiles="urn:mpeg:dash:profile:isoff-on-demand:2011" type="static">
  <Period duration="PT0H3M1.63S" start="PT0S">
    <AdaptationSet>
      <ContentComponent contentType="video" id="1" />
      <Representation bandwidth="2073921" codecs="avc1.4d401f" height="720" id="2" mimeType="video/mp4" width="1280">
        <BaseURL>http://example.com/video-9.mp4</BaseURL>
        <SegmentBase indexRange="708-1183">
          <Initialization range="0-707" />
        </SegmentBase>
      </Representation>
      </AdaptationSet>
    <AdaptationSet>
      <ContentComponent contentType="audio" id="2" />
      <Representation bandwidth="255236" codecs="mp4a.40.2" id="7" mimeType="audio/mp4" numChannels="2" sampleRate="44100">
        <BaseURL>http://example.com/audio-2.mp4</BaseURL>
        <SegmentBase indexRange="592-851">
          <Initialization range="0-591" />
        </SegmentBase>
      </Representation>
      </AdaptationSet>
  </Period>
</MPD>"""

        assert b''.join(gen).decode('utf-8') == filtered

コード例 #41

0

ファイルを表示

ファイル: test_zipnum.py プロジェクト: ikreymer/pywb

def test_zip_prefix_load():

    tmpdir = tempfile.mkdtemp()
    try:
        shutil.copy(test_zipnum, tmpdir)
        shutil.copy(get_test_dir() + 'zipcdx/zipnum-sample.cdx.gz',
                    os.path.join(tmpdir, 'zipnum'))

        config={}
        config['shard_index_loc'] = dict(match='(.*)',
                                         replace=r'\1')

        config['path'] = os.path.join(tmpdir, 'zipnum-sample.idx')
        config['type'] = 'zipnum'
        server = init_index_agg({'zip': config})

        # Test Page Count
        results = server(dict(url='iana.org/',
                              matchType='domain',
                              showNumPages=True))

        cdx_iter, err = results
        results = list(cdx_iter)
        assert len(results) == 1, results
        assert results[0] == {"blocks": 38, "pages": 4, "pageSize": 10}


        # Test simple query
        results = server(dict(url='iana.org/'))

        cdx_iter, err = results
        results = list(cdx_iter)
        assert len(results) == 3, results
        assert '20140126200624' == results[0]['timestamp']
        assert '20140127171238' == results[1]['timestamp']
        assert 'warc/revisit' == results[2]['mime']

    finally:
        shutil.rmtree(tmpdir)

コード例 #42

0

ファイルを表示

ファイル: test_zipnum.py プロジェクト: mirrorweb/pywb

def test_zip_prefix_load():

    tmpdir = tempfile.mkdtemp()
    try:
        shutil.copy(test_zipnum, tmpdir)
        shutil.copy(get_test_dir() + 'zipcdx/zipnum-sample.cdx.gz',
                    os.path.join(tmpdir, 'zipnum'))

        config = {}
        config['shard_index_loc'] = dict(match='(.*)', replace=r'\1')

        config['path'] = os.path.join(tmpdir, 'zipnum-sample.idx')
        config['type'] = 'zipnum'
        server = init_index_agg({'zip': config})

        # Test Page Count
        results = server(
            dict(url='iana.org/', matchType='domain', showNumPages=True))

        cdx_iter, err = results
        results = list(cdx_iter)
        assert len(results) == 1, results
        assert results[0] == {"blocks": 38, "pages": 4, "pageSize": 10}

        # Test simple query
        results = server(dict(url='iana.org/'))

        cdx_iter, err = results
        results = list(cdx_iter)
        assert len(results) == 3, results
        assert '20140126200624' == results[0]['timestamp']
        assert '20140127171238' == results[1]['timestamp']
        assert 'warc/revisit' == results[2]['mime']

    finally:
        shutil.rmtree(tmpdir)

コード例 #43

0

ファイルを表示

def test_zip_prefix_load():

    tmpdir = tempfile.mkdtemp()
    try:
        shutil.copy(test_zipnum, tmpdir)
        shutil.copy(get_test_dir() + 'zipcdx/zipnum-sample.cdx.gz',
                    os.path.join(tmpdir, 'zipnum'))

        config = {}
        config['shard_index_loc'] = dict(match='(.*)', replace=r'\1')
        server = CDXServer(os.path.join(tmpdir, 'zipnum-sample.idx'),
                           config=config)

        # Test Page Count
        results = server.load_cdx(url='iana.org/',
                                  matchType='domain',
                                  showNumPages=True)

        results = list(results)
        assert len(results) == 1, results
        assert json.loads(results[0]) == {
            "blocks": 38,
            "pages": 4,
            "pageSize": 10
        }

        # Test simple query
        results = server.load_cdx(url='iana.org/')
        results = list(results)
        assert len(results) == 3, results
        assert '20140126200624' in results[0]
        assert '20140127171238' in results[1]
        assert 'warc/revisit' in results[2]

    finally:
        shutil.rmtree(tmpdir)

コード例 #44

0

ファイルを表示

ファイル: test_cdxops.py プロジェクト: eriknstr/pywb

>>> cdx_ops_test('http://iana.org/domains/root/db', resolveRevisits = True)
org,iana)/domains/root/db 20140126200927 http://www.iana.org/domains/root/db/ text/html 302 3I42H3S6NNFQ2MSVX7XZKYAYSCX5QBYJ - - 446 671278 iana.warc.gz - - -
org,iana)/domains/root/db 20140126200928 http://www.iana.org/domains/root/db text/html 200 DHXA725IW5VJJFRTWBQT6BEZKRE7H57S - - 18365 672225 iana.warc.gz - - -

"""

# =================================================================
from pywb.cdx.cdxserver import CDXServer
import os
import sys
import six

from pywb import get_test_dir

test_cdx_dir = get_test_dir() + "cdx/"


def cdx_ops_test_data(url, sources=[test_cdx_dir + "iana.cdx"], **kwparams):
    kwparams["url"] = url
    if not "output" in kwparams:
        kwparams["output"] = "cdxobject"

    server = CDXServer(sources)
    results = server.load_cdx(**kwparams)
    return list(results)


def cdx_ops_test(*args, **kwargs):
    results = cdx_ops_test_data(*args, **kwargs)

コード例 #45

0

ファイルを表示

ファイル: test_cdxops.py プロジェクト: Orbiter/pywb

{"urlkey": "com,example)/?example=1", "timestamp": "20140103030321", "url": "http://example.com?example=1", "length": "1043", "filename": "example.warc.gz", "offset": "333", "orig.length": "-", "orig.offset": "-", "orig.filename": "-"}
{"urlkey": "com,example)/?example=1", "timestamp": "20140103030341", "url": "http://example.com?example=1", "length": "553", "filename": "example.warc.gz", "mime": "warc/revisit", "offset": "1864", "orig.length": "-", "orig.offset": "-", "orig.filename": "-"}




"""

#=================================================================
from pywb.cdx.cdxserver import CDXServer
import os
import sys

from pywb import get_test_dir

test_cdx_dir = get_test_dir() + 'cdx/'


def cdx_ops_test(url, sources = [test_cdx_dir + 'iana.cdx'], **kwparams):
    kwparams['url'] = url
    if not 'output' in kwparams:
        kwparams['output'] = 'cdxobject'
    fields = kwparams.get('fields')
    if fields:
        fields = fields.split(',')

    server = CDXServer(sources)
    results = server.load_cdx(**kwparams)

    for x in results:
        if not isinstance(x, str):

コード例 #46

0

ファイルを表示

ファイル: test_binsearch.py プロジェクト: akeprojecta/pywb

>>> print_binsearch_results_range('org,iana)/protocols', 'z-', iter_range)
org,iana)/protocols 20140126200715 http://www.iana.org/protocols text/html 200 IRUJZEUAXOUUG224ZMI4VWTUPJX6XJTT - - 63663 496277 iana.warc.gz
org,iana)/time-zones 20140126200737 http://www.iana.org/time-zones text/html 200 4Z27MYWOSXY2XDRAJRW7WRMT56LXDD4R - - 2449 569675 iana.warc.gz


"""

#=================================================================
import os
from pywb.utils.binsearch import iter_prefix, iter_exact, iter_range

from pywb import get_test_dir

#test_cdx_dir = os.path.dirname(os.path.realpath(__file__)) + '/../sample-data/'
test_cdx_dir = get_test_dir() + 'cdx/'


def print_binsearch_results(key, iter_func):
    with open(test_cdx_dir + 'iana.cdx', 'rb') as cdx:
        for line in iter_func(cdx, key):
            print line


def print_binsearch_results_range(key, end_key, iter_func, prev_size=0):
    with open(test_cdx_dir + 'iana.cdx', 'rb') as cdx:
        for line in iter_func(cdx, key, end_key, prev_size=prev_size):
            print line


if __name__ == "__main__":

コード例 #47

0

ファイルを表示

ファイル: test_indexing.py プロジェクト: Orbiter/pywb

from pywb.warc.cdxindexer import write_cdx_index, main, cdx_filename

from pywb.cdx.cdxobject import CDXObject

from io import BytesIO
import sys

import os
import shutil
import tempfile

from pytest import raises


TEST_CDX_DIR = get_test_dir() + 'cdx/'
TEST_WARC_DIR = get_test_dir() + 'warcs/'

def read_fully(cdx):
    with open(TEST_CDX_DIR + cdx, 'rb') as fh:
        curr = BytesIO()
        while True:
            b = fh.read()
            if not b:
                break
            curr.write(b)
    return curr.getvalue()

def cdx_index(warc, **options):
    buff = BytesIO()

コード例 #48

0

ファイルを表示

test_3: http://cdxserver.example.com/cdx

test_4: !!python/object:pywb.cdx.cdxsource.RemoteCDXSource {{
            remote_url: 'http://cdxserver.example.com/cdx',
            cookie: custom_token=value,
            remote_processing: true,
        }}

test_5: {0}cdx/example.cdx

test_6:
    index_paths: invalid://abc


""".format(get_test_dir())

def test_cdxserver_config():
    config = yaml.load(yaml_config)
    cdxserver = create_cdx_server(config.get('test_1'))
    assert(isinstance(cdxserver, CDXServer))
    sources = cdxserver.sources
    assert len(sources) == 5

    assert type(sources[0]) == CDXFile
    assert sources[0].filename.endswith('example.cdx')

    # remote source with no remote processing
    assert type(sources[1]) == RemoteCDXSource
    assert sources[1].remote_url == 'http://cdxserver.example.com/cdx'
    assert sources[1].remote_processing == False

コード例 #49

0

ファイルを表示

from pywb import get_test_dir

from pywb.warc.cdxindexer import write_cdx_index, main, cdx_filename

from pywb.cdx.cdxobject import CDXObject

from io import BytesIO
import sys

import os
import shutil
import tempfile

from pytest import raises

TEST_CDX_DIR = get_test_dir() + 'cdx/'
TEST_WARC_DIR = get_test_dir() + 'warcs/'


def read_fully(cdx):
    with open(TEST_CDX_DIR + cdx, 'rb') as fh:
        curr = BytesIO()
        while True:
            b = fh.read()
            if not b:
                break
            curr.write(b)
    return curr.getvalue()


def cdx_index(warc, **options):

コード例 #50

0

ファイルを表示

ファイル: test_zipnum.py プロジェクト: ikreymer/pywb

"""

from pywb import get_test_dir
from pywb.warcserver.index.test.test_cdxops import cdx_ops_test, cdx_ops_test_data
from pywb.warcserver.warcserver import init_index_agg

import shutil
import tempfile
import os
import json

import pytest


test_zipnum = get_test_dir() + 'zipcdx/zipnum-sample.idx'

def zip_ops_test_data(url, **kwargs):
    sources = {'zip': test_zipnum}
    res = cdx_ops_test_data(url, sources, **kwargs)
    if res:
        return res[0]

def zip_ops_test(url, **kwargs):
    sources = {'zip': test_zipnum}
    cdx_ops_test(url, sources, **kwargs)

def zip_test_err(url, **kwargs):
    sources = {'zip': get_test_dir() + 'zipcdx/zipnum-bad.idx'}
    cdx_ops_test(url, sources, **kwargs)

コード例 #51

0

ファイルを表示

ファイル: test_zipnum.py プロジェクト: ikreymer/pywb

def zip_test_err(url, **kwargs):
    sources = {'zip': get_test_dir() + 'zipcdx/zipnum-bad.idx'}
    cdx_ops_test(url, sources, **kwargs)

コード例 #52

0

ファイルを表示

ファイル: test_auto_colls.py プロジェクト: Cloudxtreme/pywb

 def _get_sample_warc(self, name):
     return os.path.join(get_test_dir(), 'warcs', name)

コード例 #53

0

ファイルを表示

ファイル: test_rewrite_live.py プロジェクト: chdorner/pywb

def test_wombat_top():
    #status_headers, buff = get_rewritten('https://assets-cdn.github.com/assets/github-0f06d0f46fe7bcfbf31f2380f23aec15ba21b8ec.js', urlrewriter)
    status_headers, buff = get_rewritten(get_test_dir() + 'text_content/toptest.js', urlrewriter)

    assert 'WB_wombat_top!==window' in buff

コード例 #54

0

ファイルを表示

ファイル: test_loading.py プロジェクト: tilgovi/pywb

"""

import os
import sys
import pprint

from pywb.warc.recordloader import ArcWarcRecordLoader, ArchiveLoadFailed
from pywb.warc.pathresolvers import make_best_resolvers
from pywb.warc.resolvingloader import ResolvingLoader
from pywb.cdx.cdxobject import CDXObject

from pywb import get_test_dir

#==============================================================================
test_warc_dir = get_test_dir() + 'warcs/'


URL_AGNOSTIC_ORIG_CDX = 'org,iana,example)/ 20130702195402 http://example.iana.org/ \
text/html 200 B2LTWWPUOYAH7UIPQ7ZUPQ4VMBSVC36A - - \
1001 353 example-url-agnostic-orig.warc.gz'

URL_AGNOSTIC_REVISIT_CDX = 'com,example)/ 20130729195151 http://[email protected]/ \
warc/revisit - B2LTWWPUOYAH7UIPQ7ZUPQ4VMBSVC36A - - \
591 355 example-url-agnostic-revisit.warc.gz'

URL_AGNOSTIC_REVISIT_NO_DIGEST_CDX = 'com,example)/ 20130729195151 http://[email protected]/ \
warc/revisit - - - - \
591 355 example-url-agnostic-revisit.warc.gz'

BAD_ORIG_CDX = 'org,iana,example)/ 20130702195401 http://example.iana.org/ \

コード例 #55

0

ファイルを表示

def zip_test_err(url, **kwargs):
    sources = get_test_dir() + 'zipcdx/zipnum-bad.idx'
    cdx_ops_test(url, sources, **kwargs)

コード例 #56

0

ファイルを表示

ファイル: test_rewrite_live.py プロジェクト: rebeccacremona/pywb

def test_wombat_top():
    #status_headers, buff = get_rewritten('https://assets-cdn.github.com/assets/github-0f06d0f46fe7bcfbf31f2380f23aec15ba21b8ec.js', urlrewriter)
    status_headers, buff = get_rewritten(
        get_test_dir() + 'text_content/toptest.js', urlrewriter)

    assert 'WB_wombat_top!==window' in buff

コード例 #57

0

ファイルを表示

ファイル: test_zipnum.py プロジェクト: tilgovi/pywb

org,iana)/domains/int 20140126201239    zipnum    8884    353    36
org,iana)/domains/root/servers 20140126201227    zipnum    9237    386    37

>>> zip_ops_test(url = 'http://iana.org/domains/', matchType = 'prefix')
org,iana)/domains/arpa 20140126201248 http://www.iana.org/domains/arpa text/html 200 QOFZZRN6JIKAL2JRL6ZC2VVG42SPKGHT - - 2939 759039 iana.warc.gz
org,iana)/domains/example 20140128051539 http://www.iana.org/domains/example text/html 302 JZ622UA23G5ZU6Y3XAKH4LINONUEICEG - - 577 2907 example.warc.gz
org,iana)/domains/idn-tables 20140126201127 http://www.iana.org/domains/idn-tables text/html 200 HNCUFTJMOQOGAEY6T56KVC3T7TVLKGEW - - 8118 715878 iana.warc.gz
org,iana)/domains/int 20140126201239 http://www.iana.org/domains/int text/html 200 X32BBNNORV4SPEHTQF5KI5NFHSKTZK6Q - - 2482 746788 iana.warc.gz
org,iana)/domains/reserved 20140126201054 http://www.iana.org/domains/reserved text/html 200 R5AAEQX5XY5X5DG66B23ODN5DUBWRA27 - - 3573 701457 iana.warc.gz
org,iana)/domains/root 20140126200912 http://www.iana.org/domains/root text/html 200 YWA2R6UVWCYNHBZJKBTPYPZ5CJWKGGUX - - 2691 657746 iana.warc.gz
org,iana)/domains/root/db 20140126200927 http://www.iana.org/domains/root/db/ text/html 302 3I42H3S6NNFQ2MSVX7XZKYAYSCX5QBYJ - - 446 671278 iana.warc.gz
org,iana)/domains/root/db 20140126200928 http://www.iana.org/domains/root/db text/html 200 DHXA725IW5VJJFRTWBQT6BEZKRE7H57S - - 18365 672225 iana.warc.gz
org,iana)/domains/root/servers 20140126201227 http://www.iana.org/domains/root/servers text/html 200 AFW34N3S4NK2RJ6QWMVPB5E2AIUETAHU - - 3137 733840 iana.warc.gz

"""

from test_cdxops import cdx_ops_test
from pywb import get_test_dir

test_zipnum = get_test_dir() + 'zipcdx/zipnum-sample.idx'


def zip_ops_test(url, **kwargs):
    sources = test_zipnum
    cdx_ops_test(url, sources, **kwargs)


if __name__ == "__main__":
    import doctest
    doctest.testmod()

コード例 #58

0

ファイルを表示

ファイル: testutils.py プロジェクト: ikreymer/pywb

# ============================================================================
def to_json_list(cdxlist, fields=['timestamp', 'load_url', 'filename', 'source']):
    return list([json.loads(cdx.to_json(fields)) for cdx in cdxlist])

def key_ts_res(cdxlist, extra='filename'):
    return '\n'.join([cdx['urlkey'] + ' ' + cdx['timestamp'] + ' ' + cdx[extra] for cdx in cdxlist])

def to_path(path):
    if os.path.sep != '/':
        path = path.replace('/', os.path.sep)

    return path


# ============================================================================
TEST_CDX_PATH = to_path(get_test_dir() + '/cdxj/')
TEST_WARC_PATH = to_path(get_test_dir() + '/warcs/')


# ============================================================================
class BaseTestClass(object):
    @classmethod
    def setup_class(cls):
        pass

    @classmethod
    def teardown_class(cls):
        pass


# ============================================================================