def ia_call(argv, expected_exit_code=0): sys.argv = argv try: ia.main() except SystemExit as exc: exit_code = exc.code if exc.code else 0 assert exit_code == expected_exit_code
def test_ia_upload_status_check(capsys): with responses.RequestsMock() as rsps: rsps.add(responses.GET, '{0}//s3.us.archive.org'.format(protocol), body=STATUS_CHECK_RESPONSE, status=200, content_type='application/json') sys.argv = ['ia', 'upload', 'nasa', '--status-check'] try: ia.main() except SystemExit as exc: assert not exc.code out, err = capsys.readouterr() assert 'success: nasa is accepting requests.' in out j = json.loads(STATUS_CHECK_RESPONSE) j['over_limit'] = 1 rsps.add(responses.GET, '{0}//s3.us.archive.org'.format(protocol), body=json.dumps(j), status=200, content_type='application/json') sys.argv = ['ia', 'upload', 'nasa', '--status-check'] try: ia.main() except SystemExit as exc: assert exc.code == 1 out, err = capsys.readouterr() assert ('warning: nasa is over limit, and not accepting requests. ' 'Expect 503 SlowDown errors.') in err
def test_ia_upload(tmpdir): tmpdir.chdir() with open('test.txt', 'w') as fh: fh.write('foo') with responses.RequestsMock() as rsps: rsps.add(responses.GET, '{0}//archive.org/metadata/nasa'.format(protocol), body=ITEM_METADATA, status=200, content_type='application/json') rsps.add(responses.PUT, '{0}//s3.us.archive.org/nasa/test.txt'.format(protocol), body='', status=200, content_type='text/plain') sys.argv = ['ia', '--log', 'upload', 'nasa', 'test.txt'] try: ia.main() except SystemExit as exc: assert not exc.code with open('internetarchive.log', 'r') as fh: assert ('uploaded test.txt to {0}//s3.us.archive.org/nasa/' 'test.txt'.format(protocol)) in fh.read()
def test_ia_search_itemlist(capsys): with responses.RequestsMock() as rsps: url1 = ('{0}//archive.org/advancedsearch.php?' 'q=collection%3Aattentionkmartshoppers&output=json&rows=0&' 'sort%5B0%5D=identifier+asc&fl%5B0%5D=identifier'.format(protocol)) url2 = ('{0}//archive.org/advancedsearch.php?' 'fl%5B0%5D=identifier&rows=250&sort%5B0%5D=identifier+asc&q=collection%3' 'Aattentionkmartshoppers&output=json&page=1'.format(protocol)) rsps.add(responses.GET, url1, body=TEST_SEARCH_RESPONSE, status=200, match_querystring=True) rsps.add(responses.GET, url2, body=TEST_SEARCH_RESPONSE, status=200, match_querystring=True) sys.argv = ['ia', 'search', 'collection:attentionkmartshoppers', '--itemlist'] try: ia.main() except SystemExit as exc: assert not exc.code out, err = capsys.readouterr() j = json.loads(TEST_SEARCH_RESPONSE) expected_output = '\n'.join([d['identifier'] for d in j['response']['docs']]) + '\n' assert out == expected_output
def test_ia_search_itemlist(capsys): with responses.RequestsMock(assert_all_requests_are_fired=True) as rsps: url1 = ('{0}//archive.org/services/search/beta/scrape.php' '?q=collection%3Aattentionkmartshoppers' '&REQUIRE_AUTH=true&size=10000'.format(protocol)) url2 = ('{0}//archive.org/services/search/beta/scrape.php?' 'cursor=W3siaWRlbnRpZmllciI6IjE5NjEtTC0wNTkxNCJ9XQ%3D%3D' '&REQUIRE_AUTH=true&q=collection%3Aattentionkmartshoppers' '&size=10000'.format(protocol)) rsps.add(responses.POST, url1, body=TEST_SCRAPE_RESPONSE, status=200, match_querystring=True) _j = json.loads(TEST_SCRAPE_RESPONSE) del _j['cursor'] _r = json.dumps(_j) rsps.add(responses.POST, url2, body=_r, status=200, match_querystring=True) sys.argv = ['ia', 'search', 'collection:attentionkmartshoppers', '--itemlist'] try: ia.main() except SystemExit as exc: assert not exc.code out, err = capsys.readouterr() j = json.loads(TEST_SEARCH_RESPONSE) assert len(out.split()) == 200
def test_ia_upload_unicode(tmpdir): tmpdir.chdir() with open('தமிழ் - baz ∆.txt', 'w') as fh: fh.write('unicode foo') fname = u'தமிழ் - foo; baz ∆.txt' efname = '%E0%AE%A4%E0%AE%AE%E0%AE%BF%E0%AE%B4%E0%AF%8D%20-%20baz%20%E2%88%86.txt' with responses.RequestsMock(assert_all_requests_are_fired=False) as rsps: rsps.add(responses.GET, '{0}//archive.org/metadata/nasa'.format(protocol), body=ITEM_METADATA, status=200, content_type='application/json') rsps.add(responses.PUT, '{0}//s3.us.archive.org/nasa/{1}'.format(protocol, efname), body='', status=200, content_type='text/plain') sys.argv = [ 'ia', '--log', 'upload', 'nasa', 'தமிழ் - baz ∆.txt', '--metadata', 'foo:∆' ] try: ia.main() except SystemExit as exc: assert not exc.code with open('internetarchive.log', 'r') as fh: assert ('uploaded தமிழ் - baz ∆.txt to {0}//s3.us.archive.org/nasa/' '%E0%AE%A4%E0%AE%AE%E0%AE%BF%E0%AE%B4%E0%AF%8D%20-%20' 'baz%20%E2%88%86.txt'.format(protocol)) in fh.read()
def test_ia_search_sort_asc(capsys): url1 = ( '{0}//archive.org/advancedsearch.php?q=collection%3Anasa&output=json&' 'rows=0&sort%5B0%5D=identifier+asc'.format(protocol)) url2 = ( '{0}//archive.org/advancedsearch.php?q=collection%3Anasa&output=json&' 'rows=250&sort%5B0%5D=identifier+asc&page=1'.format(protocol)) with responses.RequestsMock() as rsps: rsps.add( responses.GET, url1, body=TEST_SEARCH_RESPONSE, status=200, match_querystring=True) rsps.add( responses.GET, url2, body=TEST_SEARCH_RESPONSE, status=200, match_querystring=True) sys.argv = [ 'ia', 'search', 'collection:nasa', '--sort', 'identifier:asc' ] try: ia.main() except SystemExit as exc: assert not exc.code out, err = capsys.readouterr() j = json.loads(TEST_SEARCH_RESPONSE) expected_output = '\n'.join([json.dumps(d) for d in j['response']['docs']]) + '\n' assert out == expected_output
def test_ia_metadata_exists(capsys, testitem_metadata): with responses.RequestsMock() as rsps: rsps.add(responses.GET, '{0}//archive.org/metadata/nasa'.format(protocol), body=testitem_metadata, status=200) sys.argv = ['ia', 'metadata', '--exists', 'nasa'] try: ia.main() except SystemExit as exc: assert exc.code == 0 out, err = capsys.readouterr() assert out == 'nasa exists\n' rsps.add(responses.GET, '{0}//archive.org/metadata/nasa'.format(protocol), body='{}', status=200) sys.argv = ['ia', 'metadata', '--exists', 'nasa'] try: ia.main() except SystemExit as exc: assert exc.code == 1 out, err = capsys.readouterr() assert err == 'nasa does not exist\n'
def test_ia_upload_unicode(tmpdir): tmpdir.chdir() with open('தமிழ் - baz ∆.txt', 'w') as fh: fh.write('unicode foo') fname = u'தமிழ் - foo; baz ∆.txt' efname = '%E0%AE%A4%E0%AE%AE%E0%AE%BF%E0%AE%B4%E0%AF%8D%20-%20baz%20%E2%88%86.txt' with responses.RequestsMock(assert_all_requests_are_fired=False) as rsps: rsps.add(responses.GET, '{0}//archive.org/metadata/nasa'.format(protocol), body=ITEM_METADATA, status=200, content_type='application/json') rsps.add(responses.PUT, '{0}//s3.us.archive.org/nasa/{1}'.format(protocol, efname), body='', status=200, content_type='text/plain') sys.argv = ['ia', '--log', 'upload', 'nasa', 'தமிழ் - baz ∆.txt', '--metadata', 'foo:∆'] try: ia.main() except SystemExit as exc: assert not exc.code with open('internetarchive.log', 'r') as fh: assert ('uploaded தமிழ் - baz ∆.txt to {0}//s3.us.archive.org/nasa/' '%E0%AE%A4%E0%AE%AE%E0%AE%BF%E0%AE%B4%E0%AF%8D%20-%20' 'baz%20%E2%88%86.txt'.format(protocol)) in fh.read()
def test_ia_search_itemlist(capsys): with responses.RequestsMock() as rsps: url1 = ('{0}//archive.org/services/search/beta/scrape.php' '?q=collection%3Aattentionkmartshoppers' '&fields=identifier&size=10000'.format(protocol)) url2 = ('{0}//archive.org/services/search/beta/scrape.php' '?q=collection%3Aattentionkmartshoppers&fields=identifier' '&cursor=W3siaWRlbnRpZmllciI6IjE5NjEtTC0wNTkxNCJ9XQ%3D%3D' '&size=10000'.format(protocol)) rsps.add(responses.GET, url1, body=TEST_SCRAPE_RESPONSE, status=200, match_querystring=True) _j = json.loads(TEST_SCRAPE_RESPONSE) del _j['cursor'] _r = json.dumps(_j) rsps.add(responses.GET, url2, body=_r, status=200, match_querystring=True) sys.argv = ['ia', 'search', 'collection:attentionkmartshoppers', '--itemlist'] try: ia.main() except SystemExit as exc: assert not exc.code out, err = capsys.readouterr() j = json.loads(TEST_SEARCH_RESPONSE) assert len(out.split()) == 200
def test_ia_metadata_modify(capsys, testitem_metadata): md_rsp = ('{"success":true,"task_id":447613301,' '"log":"https://catalogd.archive.org/log/447613301"}') with responses.RequestsMock() as rsps: rsps.add(responses.GET, '{0}//archive.org/metadata/nasa'.format(protocol), body=testitem_metadata, status=200) rsps.add(responses.POST, '{0}//archive.org/metadata/nasa'.format(protocol), body=md_rsp, status=200) rsps.add(responses.GET, '{0}//archive.org/metadata/nasa'.format(protocol), body=testitem_metadata, status=200) valid_key = "foo-{k}".format(k=int(time())) sys.argv = [ 'ia', 'metadata', '--modify', '{0}:test_value'.format(valid_key), 'nasa' ] try: ia.main() except SystemExit as exc: assert exc.code == 0 out, err = capsys.readouterr() assert out == 'nasa - success: https://catalogd.archive.org/log/447613301\n'
def test_ia_upload_403(capsys): s3_error = ('<Error>' '<Code>SignatureDoesNotMatch</Code>' '<Message>The request signature we calculated does not match ' 'the signature you provided. Check your AWS Secret Access Key ' 'and signing method. For more information, see REST ' 'Authentication and SOAP Authentication for details.</Message>' "<Resource>'PUT\n\n\n\n/iacli-test-item60/test-replace.txt'</Resource>" '<RequestId>18a9c5ea-088f-42f5-9fcf-70651cc085ca</RequestId>' '</Error>') with responses.RequestsMock() as rsps: rsps.add(responses.GET, '{0}//archive.org/metadata/nasa'.format(protocol), body=ITEM_METADATA, status=200, content_type='application/json') rsps.add(responses.PUT, '{0}//s3.us.archive.org/nasa/test_ia_upload.py'.format(protocol), body=s3_error, status=403, content_type='text/plain') sys.argv = ['ia', 'upload', 'nasa', __file__] try: ia.main() except SystemExit as exc: assert exc.code == 1 out, err = capsys.readouterr() assert 'error uploading test_ia_upload.py to nasa, 403' in err
def test_ia_upload_403(capsys): s3_error = ('<Error>' '<Code>SignatureDoesNotMatch</Code>' '<Message>The request signature we calculated does not match ' 'the signature you provided. Check your AWS Secret Access Key ' 'and signing method. For more information, see REST ' 'Authentication and SOAP Authentication for details.</Message>' "<Resource>'PUT\n\n\n\n/iacli-test-item60/test-replace.txt'</Resource>" '<RequestId>18a9c5ea-088f-42f5-9fcf-70651cc085ca</RequestId>' '</Error>') with responses.RequestsMock() as rsps: rsps.add(responses.GET, '{0}//archive.org/metadata/nasa'.format(protocol), body=ITEM_METADATA, status=200, content_type='application/json') rsps.add(responses.PUT, '{0}//s3.us.archive.org/nasa/test_ia_upload.py'.format(protocol), body=s3_error, status=403, content_type='text/plain') sys.argv = ['ia', 'upload', 'nasa', __file__] try: ia.main() except SystemExit as exc: assert exc.code == 1 out, err = capsys.readouterr() assert 'error uploading test_ia_upload.py' in err
def test_ia_upload_invalid_cmd(capsys): sys.argv = ['ia', 'upload', 'nasa', 'nofile.txt'] try: ia.main() except SystemExit as exc: assert exc.code == 1 out, err = capsys.readouterr() assert '<file> should be a readable file or directory.' in err
def ia_call(argv, expected_exit_code=0): # Use a test config for all `ia` tests. argv.insert(1, '--config-file') argv.insert(2, TEST_CONFIG) sys.argv = argv try: ia.main() except SystemExit as exc: exit_code = exc.code if exc.code else 0 assert exit_code == expected_exit_code
def test_ia_search_multi_page(capsys): j = json.loads(TEST_SEARCH_RESPONSE) url1 = ('{0}//archive.org/advancedsearch.php?' 'q=collection%3Anasa&output=json&rows=0&sort%5B0%5D=identifier+asc&' 'fl%5B0%5D=identifier'.format(protocol)) url2 = ('{0}//archive.org/advancedsearch.php?' 'q=collection%3Anasa&output=json&rows=25&page=1&sort%5B0%5D=identifier+asc&' 'fl%5B0%5D=identifier'.format(protocol)) url3 = ('{0}//archive.org/advancedsearch.php?' 'q=collection%3Anasa&output=json&rows=25&page=2&sort%5B0%5D=identifier+asc&' 'fl%5B0%5D=identifier'.format(protocol)) url4 = ('{0}//archive.org/advancedsearch.php?' 'q=collection%3Anasa&output=json&rows=25&page=3&sort%5B0%5D=identifier+asc&' 'fl%5B0%5D=identifier'.format(protocol)) with responses.RequestsMock() as rsps: rsps.add(responses.GET, url1, body=TEST_SEARCH_RESPONSE, status=200, match_querystring=True) _j = deepcopy(j) _j['response']['docs'] = j['response']['docs'][:25] rsps.add(responses.GET, url2, body=json.dumps(_j), status=200, match_querystring=True) _j = deepcopy(j) _j['response']['docs'] = j['response']['docs'][25:] rsps.add(responses.GET, url3, body=json.dumps(_j), status=200, match_querystring=True) _j = deepcopy(j) _j['response']['docs'] = [] rsps.add(responses.GET, url4, body=json.dumps(_j), status=200, match_querystring=True) sys.argv = ['ia', 'search', 'collection:nasa', '-p', 'rows:25', '-f', 'identifier'] try: ia.main() except SystemExit as exc: assert not exc.code out, err = capsys.readouterr() out_ids = set() for l in out.split('\n'): if not l: continue jj = json.loads(l) out_ids.add(jj['identifier']) expected_out_ids = set([d['identifier'] for d in j['response']['docs']]) assert out_ids == expected_out_ids
def test_ia_metadata_formats(capsys, testitem_metadata): with responses.RequestsMock() as rsps: rsps.add(responses.GET, '{0}//archive.org/metadata/nasa'.format(protocol), body=testitem_metadata, status=200) sys.argv = ['ia', 'metadata', '--formats', 'nasa'] try: ia.main() except SystemExit as exc: assert exc.code == 0 out, err = capsys.readouterr() assert set(out.split('\n')) == set(['Collection Header', 'Archive BitTorrent', 'JPEG', 'Metadata', ''])
def test_ia_metadata_formats(capsys, testitem_metadata): with responses.RequestsMock() as rsps: rsps.add(responses.GET, '{0}//archive.org/metadata/nasa'.format(protocol), body=testitem_metadata, status=200) sys.argv = ['ia', 'metadata', '--formats', 'nasa'] try: ia.main() except SystemExit as exc: assert exc.code == 0 out, err = capsys.readouterr() assert set(out.split('\n')) == set([ 'Collection Header', 'Archive BitTorrent', 'JPEG', 'Metadata', '' ])
def test_ia_search_num_found(capsys): with responses.RequestsMock(assert_all_requests_are_fired=False) as rsps: url = ( "{0}//archive.org/services/search/v1/scrape" "?q=collection%3Anasa&total_only=true" "&REQUIRE_AUTH=true&count=10000".format(protocol) ) rsps.add(responses.POST, url, body='{"items":[],"count":0,"total":50}', status=200, match_querystring=True) sys.argv = ["ia", "search", "collection:nasa", "--num-found"] try: ia.main() except SystemExit as exc: assert not exc.code out, err = capsys.readouterr() assert out == "50\n"
def test_ia_upload_size_hint(capsys): with responses.RequestsMock() as rsps: rsps.add(responses.GET, '{0}//archive.org/metadata/nasa'.format(protocol), body=ITEM_METADATA, status=200, content_type='application/json') sys.argv = ['ia', 'upload', '--debug', 'nasa', '--size-hint', '30', 'test.txt'] try: ia.main() except SystemExit as exc: assert not exc.code out, err = capsys.readouterr() assert set(out.split('\n')) == set(['', ' x-archive-size-hint:30', 'Endpoint:', 'HTTP Headers:', 'nasa:', (' {0}//s3.us.archive.org/nasa/' 'test.txt'.format(protocol))])
def test_ia_search_num_found(capsys): with responses.RequestsMock() as rsps: url = ('{0}//archive.org/advancedsearch.php?q=collection%3Anasa&output=json&' 'rows=0&sort%5B0%5D=identifier+asc'.format(protocol)) rsps.add(responses.GET, url, body=TEST_SEARCH_RESPONSE, status=200, match_querystring=True) sys.argv = ['ia', 'search', 'collection:nasa', '--num-found'] try: ia.main() except SystemExit as exc: assert not exc.code out, err = capsys.readouterr() assert out == '50\n'
def test_ia_search_num_found(capsys): with responses.RequestsMock(assert_all_requests_are_fired=False) as rsps: url = ('{0}//archive.org/services/search/beta/scrape.php' '?q=collection%3Anasa&total_only=true'.format(protocol)) rsps.add(responses.GET, url, body='{"items":[],"count":0,"total":50}', status=200, match_querystring=True) sys.argv = ['ia', 'search', 'collection:nasa', '--num-found'] try: ia.main() except SystemExit as exc: assert not exc.code out, err = capsys.readouterr() assert out == '50\n'
def test_ia_search_num_found(capsys): with responses.RequestsMock() as rsps: url = ('{0}//archive.org/advancedsearch.php?' 'q=collection%3Anasa&output=json&rows=0'.format(protocol)) rsps.add(responses.GET, url, body=TEST_SEARCH_RESPONSE, status=200, match_querystring=True) sys.argv = ['ia', 'search', 'collection:nasa', '--num-found'] try: ia.main() except SystemExit as exc: assert not exc.code out, err = capsys.readouterr() assert out == '50\n'
def test_ia_metadata_modify(capsys, testitem_metadata): md_rsp = ('{"success":true,"task_id":447613301,' '"log":"https://catalogd.archive.org/log/447613301"}') with responses.RequestsMock() as rsps: rsps.add(responses.GET, '{0}//archive.org/metadata/nasa'.format(protocol), body=testitem_metadata, status=200) rsps.add(responses.POST, '{0}//archive.org/metadata/nasa'.format(protocol), body=md_rsp, status=200) rsps.add(responses.GET, '{0}//archive.org/metadata/nasa'.format(protocol), body=testitem_metadata, status=200) valid_key = "foo-{k}".format(k=int(time())) sys.argv = ['ia', 'metadata', '--modify', '{0}:test_value'.format(valid_key), 'nasa'] try: ia.main() except SystemExit as exc: assert exc.code == 0 out, err = capsys.readouterr() assert out == 'nasa - success: https://catalogd.archive.org/log/447613301\n'
def test_ia(capsys): sys.argv = ['ia', '--help'] try: ia.main() except SystemExit as exc: assert not exc.code out, err = capsys.readouterr() assert 'A command line interface to Archive.org.' in out try: sys.argv = ['ia', 'ls', 'nasa'] ia.main() except SystemExit as exc: assert not exc.code try: sys.argv = ['ia', 'nocmd'] ia.main() except SystemExit as exc: assert exc.code == 127 out, err = capsys.readouterr() assert "error: 'nocmd' is not an ia command!" in err try: sys.argv = ['ia', 'help'] ia.main() except SystemExit as exc: assert not exc.code out, err = capsys.readouterr() assert 'A command line interface to Archive.org.' in err try: sys.argv = ['ia', 'help', 'list'] ia.main() except SystemExit as exc: assert not exc.code
def test_ia(capsys): sys.argv = ['ia', '--help'] try: ia.main() except SystemExit as exc: assert not exc.code out, err = capsys.readouterr() assert 'A command line interface to Archive.org.' in out try: sys.argv = ['ia', '--insecure', 'ls', 'nasa'] ia.main() except SystemExit as exc: assert not exc.code try: sys.argv = ['ia', 'nocmd'] ia.main() except SystemExit as exc: assert exc.code == 127 out, err = capsys.readouterr() assert "error: 'nocmd' is not an ia command!" in err try: sys.argv = ['ia', 'help'] ia.main() except SystemExit as exc: assert not exc.code out, err = capsys.readouterr() assert 'A command line interface to Archive.org.' in err try: sys.argv = ['ia', 'help', 'list'] ia.main() except SystemExit as exc: assert not exc.code