def test_record_param_user_coll(self): warc_path = to_path(self.root_dir + '/warcs/{user}/{coll}/') dedup_index = self._get_dedup_index() recorder_app = RecorderApp(self.upstream_url, PerRecordWARCWriter(warc_path, dedup_index=dedup_index)) self._test_all_warcs('/warcs/USER/COLL/', None) resp = self._test_warc_write(recorder_app, 'httpbin.org', '/user-agent', '¶m.recorder.user=USER¶m.recorder.coll=COLL') assert '"user-agent": "{0}"'.format(UA) in resp.text #assert b'HTTP/1.1 200 OK' in resp.body #assert b'"foo": "bar"' in resp.body self._test_all_warcs('/warcs/USER/COLL/', 1) r = FakeStrictRedis.from_url('redis://localhost/2') res = r.zrangebylex('USER:COLL:cdxj', '[org,httpbin)/', '(org,httpbin,') assert len(res) == 1 cdx = CDXObject(res[0]) assert cdx['urlkey'] == 'org,httpbin)/user-agent' assert cdx['mime'] == 'application/json' assert cdx['offset'] == '0' assert cdx['filename'].startswith(to_path('USER/COLL/')) assert cdx['filename'].endswith('.warc.gz') warcs = r.hgetall('USER:COLL:warc') full_path = to_path(self.root_dir + '/warcs/' + cdx['filename']) assert warcs == {cdx['filename'].encode('utf-8'): full_path.encode('utf-8')}
def test_record_param_user_coll_skip(self): warc_path = to_path(self.root_dir + '/warcs/{user}/{coll}/') dedup_index = self._get_dedup_index(dupe_policy=SkipDupePolicy()) recorder_app = RecorderApp( self.upstream_url, PerRecordWARCWriter(warc_path, dedup_index=dedup_index)) # No new entries written self._test_all_warcs('/warcs/USER/COLL/', 2) resp = self._test_warc_write( recorder_app, 'httpbin.org', '/user-agent', '¶m.recorder.user=USER¶m.recorder.coll=COLL') assert '"user-agent": "{0}"'.format(UA) in resp.text #assert b'HTTP/1.1 200 OK' in resp.body #assert b'"foo": "bar"' in resp.body self._test_all_warcs('/warcs/USER/COLL/', 2) # Test Redis CDX r = FakeStrictRedis.from_url('redis://localhost/2') res = r.zrangebylex('USER:COLL:cdxj', '[org,httpbin)/', '(org,httpbin,') assert len(res) == 2
def test_redis_warc_1(self): f = FakeStrictRedis.from_url('redis://localhost/2') f.hset('test:warc', 'example2.warc.gz', TEST_WARC_PATH + 'example2.warc.gz') resp = self.testapp.get('/allredis/resource?url=http://www.example.com/') assert resp.headers['Warcserver-Source-Coll'] == 'example'
def test_error_redis_file_not_found(self): f = FakeStrictRedis.from_url('redis://localhost/2') f.hset('test:warc', 'example2.warc.gz', './x-no-such-dir/example2.warc.gz') resp = self.testapp.get( '/allredis/resource?url=http://www.example.com/', status=503) assert resp.json[ 'message'] == "example2.warc.gz: [Errno 2] No such file or directory: './x-no-such-dir/example2.warc.gz'" f.hdel('test:warc', 'example2.warc.gz') resp = self.testapp.get( '/allredis/resource?url=http://www.example.com/', status=503) assert resp.json == { 'message': 'example2.warc.gz: Archive File Not Found', 'errors': { 'WARCPathLoader': 'example2.warc.gz: Archive File Not Found' } } f.delete('test:warc') resp = self.testapp.get( '/allredis/resource?url=http://www.example.com/', status=503) assert resp.json == { 'message': 'example2.warc.gz: Archive File Not Found', 'errors': { 'WARCPathLoader': 'example2.warc.gz: Archive File Not Found' } }
def test_record_param_user_coll_write_dupe_no_revisit(self): warc_path = to_path(self.root_dir + '/warcs/{user}/{coll}/') dedup_index = self._get_dedup_index(dupe_policy=WriteDupePolicy()) writer = PerRecordWARCWriter(warc_path, dedup_index=dedup_index) recorder_app = RecorderApp(self.upstream_url, writer) resp = self._test_warc_write( recorder_app, 'httpbin.org', '/get?foo=bar', '¶m.recorder.user=USER¶m.recorder.coll=COLL') assert b'HTTP/1.1 200 OK' in resp.body assert b'"foo": "bar"' in resp.body self._test_all_warcs('/warcs/USER/COLL/', 3) r = FakeStrictRedis.from_url('redis://localhost/2') res = r.zrangebylex('USER:COLL:cdxj', '[org,httpbin)/', '(org,httpbin,') assert len(res) == 3 mimes = [CDXObject(x)['mime'] for x in res] assert sorted(mimes) == [ 'application/json', 'application/json', 'warc/revisit' ] assert len(writer.fh_cache) == 0
def __init__(self, old_redis_url, new_redis_url, dry_run=True, per_recording_list=False, s3_import=False, s3_root=None): self.old_redis = StrictRedis.from_url(old_redis_url, decode_responses=True) self.dry_run = dry_run self.per_recording_list = per_recording_list self.s3_import = s3_import if s3_import: assert(s3_root) import boto3 self.s3_root = s3_root self.s3 = boto3.client('s3') else: self.s3_root = None self.s3 = None if self.dry_run: import redis redis.StrictRedis = fakeredis.FakeStrictRedis self.redis = FakeStrictRedis.from_url(new_redis_url, decode_responses=True) else: self.redis = StrictRedis.from_url(new_redis_url, decode_responses=True) print('Redis Inited') self.cli = CLIUserManager(new_redis_url)
def __init__(self, old_redis_url, new_redis_url, dry_run=True, per_recording_list=False, s3_import=False, s3_root=None): self.old_redis = StrictRedis.from_url(old_redis_url, decode_responses=True) self.dry_run = dry_run self.per_recording_list = per_recording_list self.s3_import = s3_import if s3_import: assert (s3_root) import boto3 self.s3_root = s3_root self.s3 = boto3.client('s3') else: self.s3_root = None self.s3 = None if self.dry_run: import redis redis.StrictRedis = fakeredis.FakeStrictRedis self.redis = FakeStrictRedis.from_url(new_redis_url, decode_responses=True) else: self.redis = StrictRedis.from_url(new_redis_url, decode_responses=True) print('Redis Inited') self.cli = CLIUserManager(new_redis_url)
def test_record_video_metadata(self): pytest.importorskip('youtube_dl') warc_path = to_path(self.root_dir + '/warcs/{user}/{coll}/') dedup_index = self._get_dedup_index() writer = PerRecordWARCWriter(warc_path, dedup_index=dedup_index) recorder_app = RecorderApp(self.upstream_url, writer) params = {'param.recorder.user': '******', 'param.recorder.coll': 'VIDEO', 'content_type': 'application/vnd.youtube-dl_formats+json' } resp = self._test_warc_write(recorder_app, 'www.youtube.com', '/v/BfBgWtAIbRc', '&' + urlencode(params), link_url='metadata://www.youtube.com/v/BfBgWtAIbRc') r = FakeStrictRedis.from_url('redis://localhost/2') warcs = r.hgetall('USER:VIDEO:warc') assert len(warcs) == 1 filename = list(warcs.values())[0] with open(filename, 'rb') as fh: decomp = DecompressingBufferedReader(fh) record = ArcWarcRecordLoader().parse_record_stream(decomp) status_headers = record.rec_headers assert status_headers.get_header('WARC-Type') == 'metadata' assert status_headers.get_header('Content-Type') == 'application/vnd.youtube-dl_formats+json' assert status_headers.get_header('WARC-Block-Digest') != '' assert status_headers.get_header('WARC-Block-Digest') == status_headers.get_header('WARC-Payload-Digest')
def test_record_param_user_coll_write_dupe_no_revisit(self): warc_path = to_path(self.root_dir + '/warcs/{user}/{coll}/') dedup_index = self._get_dedup_index(dupe_policy=WriteDupePolicy()) writer = PerRecordWARCWriter(warc_path, dedup_index=dedup_index) recorder_app = RecorderApp(self.upstream_url, writer) resp = self._test_warc_write(recorder_app, 'httpbin.org', '/get?foo=bar', '¶m.recorder.user=USER¶m.recorder.coll=COLL') assert b'HTTP/1.1 200 OK' in resp.body assert b'"foo": "bar"' in resp.body self._test_all_warcs('/warcs/USER/COLL/', 3) r = FakeStrictRedis.from_url('redis://localhost/2') res = r.zrangebylex('USER:COLL:cdxj', '[org,httpbin)/', '(org,httpbin,') assert len(res) == 3 mimes = [CDXObject(x)['mime'] for x in res] assert sorted(mimes) == ['application/json', 'application/json', 'warc/revisit'] assert len(writer.fh_cache) == 0
def test_record_custom_record(self): dedup_index = self._get_dedup_index(user=False) warc_path = to_path(self.root_dir + '/warcs/meta/meta.warc.gz') writer = MultiFileWARCWriter(warc_path, dedup_index=dedup_index) recorder_app = RecorderApp(self.upstream_url, writer) req_url = '/live/resource/postreq?url=custom://httpbin.org¶m.recorder.coll=META&put_record=resource' buff = b'Some Data' testapp = webtest.TestApp(recorder_app) headers = {'content-type': 'text/plain', 'WARC-Custom': 'foo'} resp = testapp.put(req_url, headers=headers, params=buff) assert resp.json['success'] == 'true' assert resp.json['WARC-Date'] != '' self._test_all_warcs('/warcs/meta', 1) r = FakeStrictRedis.from_url('redis://localhost/2') warcs = r.hgetall('META:warc') assert len(warcs) == 1 warc_key = os.path.join('meta', 'meta.warc.gz').encode('utf-8') with open(warcs[warc_key], 'rb') as fh: decomp = DecompressingBufferedReader(fh) record = ArcWarcRecordLoader().parse_record_stream( decomp, ensure_http_headers=True) status_headers = record.rec_headers assert len(record.rec_headers.headers) == 9 assert status_headers.get_header('WARC-Type') == 'resource' assert status_headers.get_header( 'WARC-Target-URI') == 'custom://httpbin.org' assert status_headers.get_header('WARC-Record-ID') != '' assert status_headers.get_header('WARC-Date') != '' assert status_headers.get_header('WARC-Block-Digest') != '' assert status_headers.get_header( 'WARC-Block-Digest') == status_headers.get_header( 'WARC-Payload-Digest') assert status_headers.get_header('Content-Type') == 'text/plain' assert status_headers.get_header('Content-Length') == str(len(buff)) assert status_headers.get_header('WARC-Custom') == 'foo' assert record.raw_stream.read() == buff status_headers = record.http_headers assert len(record.http_headers.headers) == 2 assert status_headers.get_header('Content-Type') == 'text/plain' assert status_headers.get_header('Content-Length') == str(len(buff)) writer.close() assert len(writer.fh_cache) == 0
def test_record_custom_record(self): dedup_index = self._get_dedup_index(user=False) warc_path = to_path(self.root_dir + '/warcs/meta/meta.warc.gz') writer = MultiFileWARCWriter(warc_path, dedup_index=dedup_index) recorder_app = RecorderApp(self.upstream_url, writer) req_url = '/live/resource/postreq?url=custom://httpbin.org¶m.recorder.coll=META&put_record=resource' buff = b'Some Data' testapp = webtest.TestApp(recorder_app) headers = {'content-type': 'text/plain', 'WARC-Custom': 'foo' } resp = testapp.put(req_url, headers=headers, params=buff) assert resp.json['success'] == 'true' assert resp.json['WARC-Date'] != '' self._test_all_warcs('/warcs/meta', 1) r = FakeStrictRedis.from_url('redis://localhost/2') warcs = r.hgetall('META:warc') assert len(warcs) == 1 warc_key = os.path.join('meta', 'meta.warc.gz').encode('utf-8') with open(warcs[warc_key], 'rb') as fh: decomp = DecompressingBufferedReader(fh) record = ArcWarcRecordLoader().parse_record_stream(decomp, ensure_http_headers=True) status_headers = record.rec_headers assert len(record.rec_headers.headers) == 9 assert status_headers.get_header('WARC-Type') == 'resource' assert status_headers.get_header('WARC-Target-URI') == 'custom://httpbin.org' assert status_headers.get_header('WARC-Record-ID') != '' assert status_headers.get_header('WARC-Date') != '' assert status_headers.get_header('WARC-Block-Digest') != '' assert status_headers.get_header('WARC-Block-Digest') == status_headers.get_header('WARC-Payload-Digest') assert status_headers.get_header('Content-Type') == 'text/plain' assert status_headers.get_header('Content-Length') == str(len(buff)) assert status_headers.get_header('WARC-Custom') == 'foo' assert record.raw_stream.read() == buff status_headers = record.http_headers assert len(record.http_headers.headers) == 2 assert status_headers.get_header('Content-Type') == 'text/plain' assert status_headers.get_header('Content-Length') == str(len(buff)) writer.close() assert len(writer.fh_cache) == 0
def test_anon_auto_delete(self): sesh_redis = FakeStrictRedis.from_url('redis://localhost:6379/0') sesh_redis.flushdb() time.sleep(4.0) assert set(self.redis.keys()) == set([b'h:roles', b'h:defaults', b'h:temp-usage']) assert glob.glob(os.path.join(self.warcs_dir, 'temp$*')) == []
def test_record_param_user_coll_revisit(self): warc_path = to_path(self.root_dir + '/warcs/{user}/{coll}/') dedup_index = self._get_dedup_index() recorder_app = RecorderApp( self.upstream_url, PerRecordWARCWriter(warc_path, dedup_index=dedup_index)) self._test_all_warcs('/warcs/USER/COLL/', 1) resp = self._test_warc_write( recorder_app, 'httpbin.org', '/user-agent', '¶m.recorder.user=USER¶m.recorder.coll=COLL') assert '"user-agent": "{0}"'.format(UA) in resp.text #assert b'HTTP/1.1 200 OK' in resp.body #assert b'"foo": "bar"' in resp.body self._test_all_warcs('/warcs/USER/COLL/', 2) # Test Redis CDX r = FakeStrictRedis.from_url('redis://localhost/2') res = r.zrangebylex('USER:COLL:cdxj', '[org,httpbin)/', '(org,httpbin,') assert len(res) == 2 if b'warc/revisit' in res[0]: cdx = CDXObject(res[0]) else: cdx = CDXObject(res[1]) assert cdx['urlkey'] == 'org,httpbin)/user-agent' assert cdx['mime'] == 'warc/revisit' assert cdx['offset'] == '0' assert cdx['filename'].startswith(to_path('USER/COLL/')) assert cdx['filename'].endswith('.warc.gz') fullwarc = os.path.join(self.root_dir, 'warcs', cdx['filename']) warcs = r.hgetall('USER:COLL:warc') assert len(warcs) == 2 assert warcs[cdx['filename'].encode('utf-8')] == fullwarc.encode( 'utf-8') with open(fullwarc, 'rb') as fh: decomp = DecompressingBufferedReader(fh) # Test refers-to headers status_headers = StatusAndHeadersParser(['WARC/1.0']).parse(decomp) assert status_headers.get_header('WARC-Type') == 'revisit' assert status_headers.get_header( 'WARC-Target-URI') == 'http://httpbin.org/user-agent' assert status_headers.get_header('WARC-Date') != '' assert status_headers.get_header( 'WARC-Refers-To-Target-URI') == 'http://httpbin.org/user-agent' assert status_headers.get_header('WARC-Refers-To-Date') != ''
def setup_class(cls, redis_url='redis://localhost:6379/2'): super(FakeRedisTests, cls).setup_class() del PUBSUBS[:] DATABASES.clear() cls.redismock = patch('redis.StrictRedis', FakeStrictRedisSharedPubSub) cls.redismock.start() cls.redis = FakeStrictRedis.from_url(redis_url)
def test_anon_auto_delete(self): sesh_redis = FakeStrictRedis.from_url('redis://localhost:6379/0') sesh_redis.flushdb() def assert_empty_keys(): assert set(self.redis.keys()) == set( ['h:roles', 'h:defaults', 'h:temp-usage']) assert glob.glob(os.path.join(self.warcs_dir, 'temp$*')) == [] self.sleep_try(0.1, 10.0, assert_empty_keys)
def test_url_agnost(self): f = FakeStrictRedis.from_url('redis://localhost/2') f.hset('test:foo:warc', 'example-url-agnostic-revisit.warc.gz', TEST_WARC_PATH + 'example-url-agnostic-revisit.warc.gz') f.hset('test:foo:warc', 'example-url-agnostic-orig.warc.gz', TEST_WARC_PATH + 'example-url-agnostic-orig.warc.gz') resp = self.testapp.get('/urlagnost/resource?url=http://example.com/¶m.arg=foo') assert resp.status_int == 200 assert resp.headers['Link'] == MementoUtils.make_link('http://[email protected]/', 'original') assert resp.headers['Warcserver-Source-Coll'] == 'url-agnost' assert resp.headers['Memento-Datetime'] == 'Mon, 29 Jul 2013 19:51:51 GMT'
def test_record_param_user_coll_revisit(self): warc_path = to_path(self.root_dir + '/warcs/{user}/{coll}/') dedup_index = self._get_dedup_index() recorder_app = RecorderApp(self.upstream_url, PerRecordWARCWriter(warc_path, dedup_index=dedup_index)) self._test_all_warcs('/warcs/USER/COLL/', 1) resp = self._test_warc_write(recorder_app, 'httpbin.org', '/user-agent', '¶m.recorder.user=USER¶m.recorder.coll=COLL') assert '"user-agent": "{0}"'.format(UA) in resp.text #assert b'HTTP/1.1 200 OK' in resp.body #assert b'"foo": "bar"' in resp.body self._test_all_warcs('/warcs/USER/COLL/', 2) # Test Redis CDX r = FakeStrictRedis.from_url('redis://localhost/2') res = r.zrangebylex('USER:COLL:cdxj', '[org,httpbin)/', '(org,httpbin,') assert len(res) == 2 if b'warc/revisit' in res[0]: cdx = CDXObject(res[0]) else: cdx = CDXObject(res[1]) assert cdx['urlkey'] == 'org,httpbin)/user-agent' assert cdx['mime'] == 'warc/revisit' assert cdx['offset'] == '0' assert cdx['filename'].startswith(to_path('USER/COLL/')) assert cdx['filename'].endswith('.warc.gz') fullwarc = os.path.join(self.root_dir, 'warcs', cdx['filename']) warcs = r.hgetall('USER:COLL:warc') assert len(warcs) == 2 assert warcs[cdx['filename'].encode('utf-8')] == fullwarc.encode('utf-8') with open(fullwarc, 'rb') as fh: decomp = DecompressingBufferedReader(fh) # Test refers-to headers status_headers = StatusAndHeadersParser(['WARC/1.0']).parse(decomp) assert status_headers.get_header('WARC-Type') == 'revisit' assert status_headers.get_header('WARC-Target-URI') == 'http://httpbin.org/user-agent' assert status_headers.get_header('WARC-Date') != '' assert status_headers.get_header('WARC-Refers-To-Target-URI') == 'http://httpbin.org/user-agent' assert status_headers.get_header('WARC-Refers-To-Date') != ''
def test_anon_auto_delete(self): sesh_redis = FakeStrictRedis.from_url('redis://localhost:6379/0') sesh_redis.flushdb() def assert_empty_keys(): assert set(self.redis.keys()) == set(self.POST_DEL_KEYS) assert glob.glob(os.path.join(self.warcs_dir, 'temp$*')) == [] self.sleep_try(0.1, 10.0, assert_empty_keys) def assert_dir_delete(): assert not os.path.isdir(os.path.join(self.warcs_dir, self.anon_user)) self.sleep_try(0.1, 5.0, assert_dir_delete)
def test_anon_auto_delete(self): sesh_redis = FakeStrictRedis.from_url('redis://localhost:6379/0') sesh_redis.flushdb() def assert_empty_keys(): assert set(self.redis.keys()) == set(self.POST_DEL_KEYS) assert glob.glob(os.path.join(self.warcs_dir, 'temp$*')) == [] self.sleep_try(0.1, 10.0, assert_empty_keys) def assert_dir_delete(): assert not os.path.isdir( os.path.join(self.warcs_dir, self.anon_user)) self.sleep_try(0.1, 5.0, assert_dir_delete)
def setup_class(cls, extra_config_file='test_no_invites_config.yaml', init_anon=True, **kwargs): super(BaseWRTests, cls).setup_class() cls.warcs_dir = to_path(cls.root_dir + '/warcs/') os.makedirs(cls.warcs_dir) os.environ['RECORD_ROOT'] = cls.warcs_dir os.environ['WR_CONFIG'] = 'pkg://webrecorder/config/wr.yaml' if extra_config_file: os.environ['WR_USER_CONFIG'] = os.path.join( cls.get_curr_dir(), extra_config_file) os.environ['REDIS_BASE_URL'] = 'redis://*****:*****@localhost') cls.set_nx_env('EMAIL_SMTP_URL', 'smtp://[email protected]:test@localhost:25') cls.redis = FakeStrictRedis.from_url(os.environ['REDIS_BASE_URL'], decode_responses=True) cls.custom_init(kwargs) if kwargs.get('no_app'): return cls.appcont = AppController() cls.testapp = webtest.TestApp(cls.appcont.app) if init_anon: res = cls.testapp.get('/api/v1/anon_user') cls.anon_user = res.json['anon_user'] else: cls.anon_user = None
def test_error_redis_file_not_found(self): f = FakeStrictRedis.from_url('redis://localhost/2') f.hset('test:warc', 'example2.warc.gz', './x-no-such-dir/example2.warc.gz') resp = self.testapp.get('/allredis/resource?url=http://www.example.com/', status=503) assert resp.json['message'] == "example2.warc.gz: [Errno 2] No such file or directory: './x-no-such-dir/example2.warc.gz'" f.hdel('test:warc', 'example2.warc.gz') resp = self.testapp.get('/allredis/resource?url=http://www.example.com/', status=503) assert resp.json == {'message': 'example2.warc.gz: Archive File Not Found', 'errors': {'WARCPathLoader': 'example2.warc.gz: Archive File Not Found'}} f.delete('test:warc') resp = self.testapp.get('/allredis/resource?url=http://www.example.com/', status=503) assert resp.json == {'message': 'example2.warc.gz: Archive File Not Found', 'errors': {'WARCPathLoader': 'example2.warc.gz: Archive File Not Found'}}
def test_record_param_user_coll_skip(self): warc_path = to_path(self.root_dir + '/warcs/{user}/{coll}/') dedup_index = self._get_dedup_index(dupe_policy=SkipDupePolicy()) recorder_app = RecorderApp(self.upstream_url, PerRecordWARCWriter(warc_path, dedup_index=dedup_index)) # No new entries written self._test_all_warcs('/warcs/', 2) resp = self._test_warc_write(recorder_app, 'httpbin.org', '/get?foo=bar', '¶m.recorder.user=USER¶m.recorder.coll=COLL') assert b'HTTP/1.1 200 OK' in resp.body assert b'"foo": "bar"' in resp.body self._test_all_warcs('/warcs/USER/COLL/', 2) # Test Redis CDX r = FakeStrictRedis.from_url('redis://localhost/2') res = r.zrangebylex('USER:COLL:cdxj', '[org,httpbin)/', '(org,httpbin,') assert len(res) == 2
def test_record_multiple_writes_keep_open(self): warc_path = to_path(self.root_dir + '/warcs/FOO/ABC-{hostname}-{timestamp}.warc.gz') rel_path = to_path(self.root_dir + '/warcs/') dedup_index = self._get_dedup_index(user=False) writer = MultiFileWARCWriter(warc_path, dedup_index=dedup_index) recorder_app = RecorderApp(self.upstream_url, writer) # First Record resp = self._test_warc_write(recorder_app, 'httpbin.org', '/get?foo=bar', '¶m.recorder.coll=FOO') assert b'HTTP/1.1 200 OK' in resp.body assert b'"foo": "bar"' in resp.body # Second Record resp = self._test_warc_write(recorder_app, 'httpbin.org', '/get?boo=far', '¶m.recorder.coll=FOO') assert b'HTTP/1.1 200 OK' in resp.body assert b'"boo": "far"' in resp.body self._test_all_warcs('/warcs/FOO/', 1) # Check two records in WARC r = FakeStrictRedis.from_url('redis://localhost/2') res = r.zrangebylex('FOO:cdxj', '[org,httpbin)/', '(org,httpbin,') assert len(res) == 2 files, coll_dir = self._test_all_warcs('/warcs/FOO/', 1) fullname = coll_dir + files[0] cdxout = BytesIO() with open(fullname, 'rb') as fh: filename = os.path.relpath(fullname, rel_path) write_cdx_index(cdxout, fh, filename, cdxj=True, append_post=True, sort=True) res = [CDXObject(x) for x in res] cdxres = cdxout.getvalue().strip() cdxres = cdxres.split(b'\n') cdxres = [CDXObject(x) for x in cdxres] assert cdxres == res assert len(writer.fh_cache) == 1 writer.close_key(to_path(self.root_dir + '/warcs/FOO/')) assert len(writer.fh_cache) == 0 writer.close() resp = self._test_warc_write(recorder_app, 'httpbin.org', '/get?boo=far', '¶m.recorder.coll=FOO') self._test_all_warcs('/warcs/FOO/', 2) warcs = r.hgetall('FOO:warc') assert len(warcs) == 2 writer.close() assert len(writer.fh_cache) == 0
def add_cdx_to_redis(filename, key, redis_url='redis://localhost:6379/2'): r = FakeStrictRedis.from_url(redis_url) with open(filename, 'rb') as fh: for line in fh: r.zadd(key, 0, line.rstrip())
def setup_class(cls): super(TestRecordDedup, cls).setup_class('config_test_record_dedup.yaml', custom_config={'recorder': 'live'}) cls.redis = FakeStrictRedis.from_url("redis://localhost/0")
def setup_module(): r = FakeStrictRedis.from_url('redis://localhost:6379/2') r.delete('test:rediscdx') with open('testdata/iana.cdxj', 'rb') as fh: for line in fh: r.zadd('test:rediscdx', 0, line.rstrip())
def get_config(): config = TestCacheConfig() config.cache_redis_client = FakeStrictRedis.from_url( config.CACHE_ALCHEMY_REDIS_URL, decode_responses=True) return config
def setup_class(cls, extra_config_file='test_no_invites_config.yaml', init_anon=True, **kwargs): super(BaseWRTests, cls).setup_class() cls.warcs_dir = to_path(cls.root_dir + '/warcs/') cls.storage_dir = os.path.join(to_path(cls.root_dir + '/storage/')) os.makedirs(cls.warcs_dir) os.environ['RECORD_ROOT'] = cls.warcs_dir os.environ['STORAGE_ROOT'] = cls.storage_dir cls.storage_today = os.path.join(cls.storage_dir, today_str()) os.environ['WR_CONFIG'] = 'pkg://webrecorder/config/wr.yaml' if extra_config_file: os.environ['WR_USER_CONFIG'] = os.path.join(cls.get_curr_dir(), extra_config_file) os.environ['REDIS_BASE_URL'] = 'redis://*****:*****@localhost') cls.set_nx_env('EMAIL_SMTP_URL', 'smtp://[email protected]:test@localhost:25') cls.set_nx_env('NO_REMOTE_BROWSERS', '1') def load_wr_config(): config = load_overlay_config('WR_CONFIG', 'pkg://webrecorder/config/wr.yaml', 'WR_USER_CONFIG', '') config['dyn_stats_key_templ'] = { 'rec': 'r:{rec}:<sesh_id>:stats:', 'coll': 'c:{coll}:<sesh_id>:stats:' } config['dyn_ref_templ'] = { 'rec': 'r:{rec}:<sesh_id>:ref:', 'coll': 'c:{coll}:<sesh_id>:ref:', } return config import webrecorder.maincontroller webrecorder.maincontroller.load_wr_config = load_wr_config cls.redis = FakeStrictRedis.from_url(os.environ['REDIS_BASE_URL'], decode_responses=True) cls.sesh_redis = FakeStrictRedis.from_url(os.environ['REDIS_SESSION_URL'], decode_responses=True) cls.custom_init(kwargs) if kwargs.get('no_app'): return cls.maincont = MainController() cls.testapp = webtest.TestApp(cls.maincont.app) if init_anon: res = cls.testapp.post('/api/v1/auth/anon_user') cls.anon_user = res.json['user']['username'] cls.assert_temp_user_sesh(cls.anon_user) else: cls.anon_user = None