def test_file_not_found(self): source = FileIndexSource('testdata/not-found-x') url = 'http://x-not-found-x.notfound/' res, errs = self.query_single_source(source, dict(url=url, limit=3)) expected = '' assert(key_ts_res(res) == expected) assert(errs['source'] == "NotFoundException('testdata/not-found-x',)"), errs
def test_file_not_found(self): source = FileIndexSource('testdata/not-found-x') url = 'http://x-not-found-x.notfound/' res, errs = self.query_single_source(source, dict(url=url, limit=3)) expected = '' assert(key_ts_res(res) == expected) assert(errs['source'] == "NotFoundException('testdata/not-found-x',)"), errs
def test_live(self): url = 'http://example.com/' source = LiveIndexSource() res, errs = self.query_single_source(source, dict(url=url)) expected = 'com,example)/ {0} http://example.com/'.format(timestamp_now()) assert(key_ts_res(res, 'load_url') == expected) assert(errs == {})
def test_another_remote_not_found(self): source = MementoIndexSource.from_timegate_url('http://webenact.rhizome.org/all/') url = 'http://x-not-found-x.notfound/' res, errs = self.query_single_source(source, dict(url=url, limit=3)) expected = '' assert(key_ts_res(res) == expected) assert(errs['source'] == "NotFoundException('http://webenact.rhizome.org/all/timemap/link/http://x-not-found-x.notfound/',)")
def test_remote_closest_loader(self, remote_source): url = 'http://instagram.com/amaliaulman' res, errs = self.query_single_source(remote_source, dict(url=url, closest='20141014162332', limit=1)) expected = """\ com,instagram)/amaliaulman 20141014162333 http://webenact.rhizome.org/all/20141014162333id_/http://instagram.com/amaliaulman""" assert(key_ts_res(res, 'load_url') == expected) assert(errs == {})
def test_exact_query_2(self): res, errs = self.do_query({'url': 'http://example.com/some/path'}) expected = """\ com,example)/some/path 20180112200243 example.warc.gz com,example)/some/path 20180216200300 example.warc.gz""" assert (key_ts_res(res) == expected) assert (errs == {}) assert query_url == 'http://localhost:8080/path?q=type%3Aurlquery+url%3Ahttp%253A%252F%252Fexample.com%252Fsome%252Fpath'
def test_another_remote_not_found(self): source = MementoIndexSource.from_timegate_url('https://webenact.rhizome.org/all/') url = 'http://x-not-found-x.notfound/' res, errs = self.query_single_source(source, dict(url=url, limit=3)) expected = '' assert(key_ts_res(res) == expected) assert(errs['source'] == "NotFoundException('https://webenact.rhizome.org/all/timemap/link/http://x-not-found-x.notfound/',)")
def test_live(self): url = 'http://example.com/' source = LiveIndexSource() res, errs = self.query_single_source(source, dict(url=url)) expected = 'com,example)/ {0} http://example.com/'.format(timestamp_now()) assert(key_ts_res(res, 'load_url') == expected) assert(errs == {})
def test_remote_closest_loader(self, remote_source): url = 'http://instagram.com/amaliaulman' res, errs = self.query_single_source(remote_source, dict(url=url, closest='20141014162332', limit=1)) expected = """\ com,instagram)/amaliaulman 20141014162333 https://webenact.rhizome.org/all/20141014162333id_/http://instagram.com/amaliaulman""" assert(key_ts_res(res, 'load_url') == expected) assert(errs == {})
def test_remote_loader_with_prefix(self): url = 'http://instagram.com/amaliaulman?__=1234234234' remote_source = self.all_sources['remote_cdx'] res, errs = self.query_single_source(remote_source, dict(url=url, closest='20141014162332', limit=1, allowFuzzy='0')) expected = """\ com,instagram)/amaliaulman 20141014162333 https://webenact.rhizome.org/all/20141014162333id_/http://instagram.com/amaliaulman""" assert(key_ts_res(res, 'load_url') == expected) assert(errs == {})
def test_file_prefix_loader(self, local_source): res, errs = self.query_single_source(local_source, dict(url='http://iana.org/domains/root/*')) expected = """\ org,iana)/domains/root/db 20140126200927 iana.warc.gz org,iana)/domains/root/db 20140126200928 iana.warc.gz org,iana)/domains/root/servers 20140126201227 iana.warc.gz""" assert(key_ts_res(res) == expected) assert(errs == {})
def test_file_prefix_loader(self, local_source): res, errs = self.query_single_source(local_source, dict(url='http://iana.org/domains/root/*')) expected = """\ org,iana)/domains/root/db 20140126200927 iana.warc.gz org,iana)/domains/root/db 20140126200928 iana.warc.gz org,iana)/domains/root/servers 20140126201227 iana.warc.gz""" assert(key_ts_res(res) == expected) assert(errs == {})
def test_all_not_found(self, all_source): url = 'http://x-not-found-x.notfound/' res, errs = self.query_single_source(all_source, dict(url=url, limit=3)) expected = '' assert(key_ts_res(res) == expected) if all_source == self.all_sources['memento']: assert('x-not-found-x.notfound/' in errs['source']) else: assert(errs == {})
def test_all_not_found(self, all_source): url = 'http://x-not-found-x.notfound/' res, errs = self.query_single_source(all_source, dict(url=url, limit=3)) expected = '' assert(key_ts_res(res) == expected) if all_source == self.all_sources[remote_sources[0]]: assert('http%3A//x-not-found-x.notfound/' in errs['source']) else: assert(errs == {})
def test_local_cdxj_loader(self, local_source): url = 'http://www.iana.org/_css/2013.1/fonts/Inconsolata.otf' res, errs = self.query_single_source(local_source, dict(url=url, limit=3)) expected = """\ org,iana)/_css/2013.1/fonts/inconsolata.otf 20140126200826 iana.warc.gz org,iana)/_css/2013.1/fonts/inconsolata.otf 20140126200912 iana.warc.gz org,iana)/_css/2013.1/fonts/inconsolata.otf 20140126200930 iana.warc.gz""" assert(key_ts_res(res) == expected) assert(errs == {})
def test_local_cdxj_loader(self, local_source): url = 'http://www.iana.org/_css/2013.1/fonts/Inconsolata.otf' res, errs = self.query_single_source(local_source, dict(url=url, limit=3)) expected = """\ org,iana)/_css/2013.1/fonts/inconsolata.otf 20140126200826 iana.warc.gz org,iana)/_css/2013.1/fonts/inconsolata.otf 20140126200912 iana.warc.gz org,iana)/_css/2013.1/fonts/inconsolata.otf 20140126200930 iana.warc.gz""" assert(key_ts_res(res) == expected) assert(errs == {})
def test_remote_loader(self, remote_source): url = 'http://instagram.com/amaliaulman' res, errs = self.query_single_source(remote_source, dict(url=url)) expected = """\ com,instagram)/amaliaulman 20141014150552 https://webenact.rhizome.org/excellences-and-perfections/20141014150552id_/http://instagram.com/amaliaulman com,instagram)/amaliaulman 20141014155217 https://webenact.rhizome.org/excellences-and-perfections/20141014155217id_/http://instagram.com/amaliaulman com,instagram)/amaliaulman 20141014162333 https://webenact.rhizome.org/excellences-and-perfections/20141014162333id_/http://instagram.com/amaliaulman com,instagram)/amaliaulman 20141014171636 https://webenact.rhizome.org/excellences-and-perfections/20141014171636id_/http://instagram.com/amaliaulman""" assert(key_ts_res(res, 'load_url') == expected) assert(errs == {})
def test_remote_closest_wb_memento_loader(self): replay = 'https://webenact.rhizome.org/all/{timestamp}id_/{url}' source = WBMementoIndexSource(replay, '', replay) url = 'http://instagram.com/amaliaulman' res, errs = self.query_single_source(source, dict(url=url, closest='20141014162332', limit=1)) expected = """\ com,instagram)/amaliaulman 20141014162333 https://webenact.rhizome.org/all/20141014162333id_/http://instagram.com/amaliaulman""" assert(key_ts_res(res, 'load_url') == expected) assert(errs == {})
def test_prefix_query(self): res, errs = self.do_query({ 'url': 'http://example.com/', 'matchType': 'prefix' }) expected = """\ com,example)/ 20180112200243 example.warc.gz com,example)/ 20180216200300 example.warc.gz com,example)/some/path 20180112200243 example.warc.gz com,example)/some/path 20180216200300 example.warc.gz""" assert (key_ts_res(res) == expected) assert (errs == {})
def test_remote_closest_wb_memnto_loader(self): replay = 'http://webenact.rhizome.org/all/{timestamp}id_/{url}' source = WBMementoIndexSource(replay, '', replay) url = 'http://instagram.com/amaliaulman' res, errs = self.query_single_source(source, dict(url=url, closest='20141014162332', limit=1)) expected = """\ com,instagram)/amaliaulman 20141014162333 http://webenact.rhizome.org/all/20141014162333id_/http://instagram.com/amaliaulman""" assert(key_ts_res(res, 'load_url') == expected) assert(errs == {})
def test_remote_loader(self, remote_source): url = 'http://instagram.com/amaliaulman' res, errs = self.query_single_source(remote_source, dict(url=url)) expected = """\ com,instagram)/amaliaulman 20141014150552 http://webenact.rhizome.org/all/20141014150552id_/http://instagram.com/amaliaulman com,instagram)/amaliaulman 20141014155217 http://webenact.rhizome.org/all/20141014155217id_/http://instagram.com/amaliaulman com,instagram)/amaliaulman 20141014162333 http://webenact.rhizome.org/all/20141014162333id_/http://instagram.com/amaliaulman com,instagram)/amaliaulman 20141014171636 http://webenact.rhizome.org/all/20141014171636id_/http://instagram.com/amaliaulman""" assert(key_ts_res(res, 'load_url') == expected) assert(errs == {})
def test_exact_query(self): res, errs = self.do_query({'url': 'http://example.com/', 'limit': 100}) reslist = list(res) expected = """\ com,example)/ 20180112200243 example.warc.gz com,example)/ 20180216200300 example.warc.gz""" assert (key_ts_res(reslist) == expected) assert (errs == {}) assert query_url == 'http://localhost:8080/path?q=limit%3A100+type%3Aurlquery+url%3Ahttp%253A%252F%252Fexample.com%252F' assert reslist[0]['length'] == '123' assert 'length' not in reslist[1]