def test_ia_search_itemlist(capsys): test_scrape_response = load_test_data_file('scrape_response.json') with responses.RequestsMock(assert_all_requests_are_fired=True) as rsps: url1 = ('{0}//archive.org/services/search/v1/scrape' '?q=collection%3Aattentionkmartshoppers' '&REQUIRE_AUTH=true&count=10000'.format(PROTOCOL)) url2 = ('{0}//archive.org/services/search/v1/scrape?' 'cursor=W3siaWRlbnRpZmllciI6IjE5NjEtTC0wNTkxNCJ9XQ%3D%3D' '&REQUIRE_AUTH=true&q=collection%3Aattentionkmartshoppers' '&count=10000'.format(PROTOCOL)) rsps.add(responses.POST, url1, body=test_scrape_response, match_querystring=True) _j = json.loads(test_scrape_response) del _j['cursor'] _r = json.dumps(_j) rsps.add(responses.POST, url2, body=_r, match_querystring=True) ia_call([ 'ia', 'search', 'collection:attentionkmartshoppers', '--itemlist' ]) out, err = capsys.readouterr() assert len(out.split()) == 200
def test_ia_metadata_exists(capsys): with IaRequestsMock() as rsps: rsps.add_metadata_mock('nasa') ia_call(['ia', 'metadata', '--exists', 'nasa']) out, err = capsys.readouterr() assert out == 'nasa exists\n' rsps.add_metadata_mock('nasa', '{}') sys.argv = ['ia', 'metadata', '--exists', 'nasa'] ia_call(['ia', 'metadata', '--exists', 'nasa'], expected_exit_code=1) out, err = capsys.readouterr() assert err == 'nasa does not exist\n'
def test_ia_upload_debug(capsys, tmpdir_ch, nasa_mocker): with open('test.txt', 'w') as fh: fh.write('foo') ia_call(['ia', 'upload', '--debug', 'nasa', 'test.txt']) out, err = capsys.readouterr() assert set(out.split('\n')) == set([ '', 'Endpoint:', ' {0}//s3.us.archive.org/nasa/test.txt'.format(PROTOCOL), 'HTTP Headers:', ' x-archive-size-hint:3', 'nasa:' ])
def test_ia_metadata_modify(capsys): md_rsp = ('{"success":true,"task_id":447613301,' '"log":"https://catalogd.archive.org/log/447613301"}') with IaRequestsMock() as rsps: rsps.add_metadata_mock('nasa') rsps.add_metadata_mock('nasa', body=md_rsp, method=responses.POST) rsps.add_metadata_mock('nasa') valid_key = f'foo-{int(time())}' ia_call(['ia', 'metadata', '--modify', f'{valid_key}:test_value', 'nasa']) out, err = capsys.readouterr() assert err == 'nasa - success: https://catalogd.archive.org/log/447613301\n'
def test_ia_search_num_found(capsys): with IaRequestsMock(assert_all_requests_are_fired=False) as rsps: url = (f'{PROTOCOL}//archive.org/services/search/v1/scrape' '?q=collection%3Anasa&total_only=true' '&count=10000') rsps.add(responses.POST, url, body='{"items":[],"count":0,"total":50}', match_querystring=True) ia_call(['ia', 'search', 'collection:nasa', '--num-found']) out, err = capsys.readouterr() assert out == '50\n'
def test_ia_search_num_found(capsys): with IaRequestsMock(assert_all_requests_are_fired=False) as rsps: url = ('{0}//archive.org/services/search/v1/scrape' '?q=collection%3Anasa&total_only=true' '&REQUIRE_AUTH=true&count=10000'.format(PROTOCOL)) rsps.add(responses.POST, url, body='{"items":[],"count":0,"total":50}', match_querystring=True) ia_call(['ia', 'search', 'collection:nasa', '--num-found']) out, err = capsys.readouterr() assert out == '50\n'
def test_ia_upload_invalid_identifier(capsys, caplog): with open('test.txt', 'w') as fh: fh.write('foo') ia_call(['ia', '--log', 'upload', 'føø', 'test.txt'], expected_exit_code=1) out, err = capsys.readouterr() assert ( '<identifier> should be between 3 and 80 characters in length, and ' 'can only contain alphanumeric characters, periods ".", ' 'underscores "_", or dashes "-". However, <identifier> cannot begin ' 'with periods, underscores, or dashes.') in err
def test_ia_metadata_modify(capsys): md_rsp = ('{"success":true,"task_id":447613301,' '"log":"https://catalogd.archive.org/log/447613301"}') with IaRequestsMock() as rsps: rsps.add_metadata_mock('nasa') rsps.add_metadata_mock('nasa', body=md_rsp, method=responses.POST) rsps.add_metadata_mock('nasa') valid_key = "foo-{k}".format(k=int(time())) ia_call(['ia', 'metadata', '--modify', '{0}:test_value'.format(valid_key), 'nasa']) out, err = capsys.readouterr() assert out == 'nasa - success: https://catalogd.archive.org/log/447613301\n'
def test_ia_upload_spreadsheet_empty_identifier(tmpdir_ch, capsys, caplog): with open('test.txt', 'w') as fh: fh.write('foo') with open('test.csv', 'w') as fh: fh.write('identifier,file\n') fh.write(',test.txt\n') ia_call(['ia', 'upload', '--spreadsheet', 'test.csv'], expected_exit_code=1) assert 'error: no identifier column on spreadsheet.' in capsys.readouterr( ).err
def test_ia_upload_debug(capsys, tmpdir_ch, nasa_mocker): with open('test.txt', 'w') as fh: fh.write('foo') ia_call(['ia', 'upload', '--debug', 'nasa', 'test.txt']) out, err = capsys.readouterr() assert 'User-Agent' in out assert 's3.us.archive.org/nasa/test.txt' in out assert 'Accept:*/*' in out assert 'Authorization:LOW ' in out assert 'Connection:close' in out assert 'Content-Length:3' in out assert 'Accept-Encoding:gzip, deflate' in out
def test_ia_upload_debug(capsys, tmpdir_ch, nasa_mocker): with open('test.txt', 'w') as fh: fh.write('foo') ia_call(['ia', 'upload', '--debug', 'nasa', 'test.txt']) out, err = capsys.readouterr() assert 'User-Agent' in out assert 's3.us.archive.org/nasa/test.txt' in out assert 'Accept:*/*' in out assert 'Authorization:LOW ' in out assert 'Connection:keep-alive' in out assert 'Content-Length:3' in out assert 'Accept-Encoding:gzip, deflate' in out
def test_ia_upload(tmpdir_ch, caplog): with open('test.txt', 'w') as fh: fh.write('foo') with IaRequestsMock() as rsps: rsps.add_metadata_mock('nasa') rsps.add(responses.PUT, f'{PROTOCOL}//s3.us.archive.org/nasa/test.txt', body='', content_type='text/plain') ia_call(['ia', '--log', 'upload', 'nasa', 'test.txt']) assert f'uploaded test.txt to {PROTOCOL}//s3.us.archive.org/nasa/test.txt' in caplog.text
def test_ia_upload(tmpdir_ch, caplog): with open('test.txt', 'w') as fh: fh.write('foo') with IaRequestsMock() as rsps: rsps.add_metadata_mock('nasa') rsps.add(responses.PUT, '{0}//s3.us.archive.org/nasa/test.txt'.format(PROTOCOL), body='', content_type='text/plain') ia_call(['ia', '--log', 'upload', 'nasa', 'test.txt']) assert ('uploaded test.txt to {0}//s3.us.archive.org/nasa/' 'test.txt'.format(PROTOCOL)) in caplog.text
def test_ia_upload_remote_name(tmpdir_ch, caplog): with open('test.txt', 'w') as fh: fh.write('foo') with IaRequestsMock() as rsps: rsps.add_metadata_mock('nasa') rsps.add(responses.PUT, '{0}//s3.us.archive.org/nasa/hi.txt'.format(PROTOCOL), body='', content_type='text/plain') ia_call(['ia', '--log', 'upload', 'nasa', 'test.txt', '--remote-name', 'hi.txt']) assert ('uploaded hi.txt to {0}//s3.us.archive.org/nasa/' 'hi.txt'.format(PROTOCOL)) in caplog.text()
def test_ia(capsys): ia_call(['ia', '--help']) out, err = capsys.readouterr() assert 'A command line interface to Archive.org.' in out ia_call(['ia', '--insecure', 'ls', 'nasa']) ia_call(['ia', 'nocmd'], expected_exit_code=127) out, err = capsys.readouterr() assert "error: 'nocmd' is not an ia command!" in err ia_call(['ia', 'help']) out, err = capsys.readouterr() assert 'A command line interface to Archive.org.' in err ia_call(['ia', 'help', 'list'])
def test_ia_upload_unicode(tmpdir_ch, caplog): with open('தமிழ் - baz ∆.txt', 'w') as fh: fh.write('unicode foo') efname = '%E0%AE%A4%E0%AE%AE%E0%AE%BF%E0%AE%B4%E0%AF%8D%20-%20baz%20%E2%88%86.txt' with IaRequestsMock(assert_all_requests_are_fired=False) as rsps: rsps.add_metadata_mock('nasa') rsps.add(responses.PUT, '{0}//s3.us.archive.org/nasa/{1}'.format(PROTOCOL, efname), body='', content_type='text/plain') ia_call(['ia', '--log', 'upload', 'nasa', 'தமிழ் - baz ∆.txt', '--metadata', 'foo:∆']) assert (u'uploaded தமிழ் - baz ∆.txt to {0}//s3.us.archive.org/nasa/' u'%E0%AE%A4%E0%AE%AE%E0%AE%BF%E0%AE%B4%E0%AF%8D%20-%20' u'baz%20%E2%88%86.txt'.format(PROTOCOL)) in caplog.text
def test_ia_upload_spreadsheet_bom(tmpdir_ch, caplog): with open('test.txt', 'w') as fh: fh.write('foo') with open('test.csv', 'wb') as fh: fh.write(b'\xef\xbb\xbf') fh.write(b'identifier,file\n') fh.write(b'nasa,test.txt\n') with IaRequestsMock() as rsps: rsps.add_metadata_mock('nasa') rsps.add(responses.PUT, f'{PROTOCOL}//s3.us.archive.org/nasa/test.txt', body='', content_type='text/plain') ia_call(['ia', 'upload', '--spreadsheet', 'test.csv']) assert f'uploaded test.txt to {PROTOCOL}//s3.us.archive.org/nasa/test.txt' in caplog.text
def test_ia_upload_unicode(tmpdir_ch, caplog): with open('தமிழ் - baz ∆.txt', 'w') as fh: fh.write('unicode foo') efname = '%E0%AE%A4%E0%AE%AE%E0%AE%BF%E0%AE%B4%E0%AF%8D%20-%20baz%20%E2%88%86.txt' with IaRequestsMock(assert_all_requests_are_fired=False) as rsps: rsps.add_metadata_mock('nasa') rsps.add(responses.PUT, '{0}//s3.us.archive.org/nasa/{1}'.format(PROTOCOL, efname), body='', content_type='text/plain') ia_call(['ia', '--log', 'upload', 'nasa', 'தமிழ் - baz ∆.txt', '--metadata', 'foo:∆']) assert (u'uploaded தமிழ் - baz ∆.txt to {0}//s3.us.archive.org/nasa/' u'%E0%AE%A4%E0%AE%AE%E0%AE%BF%E0%AE%B4%E0%AF%8D%20-%20' u'baz%20%E2%88%86.txt'.format(PROTOCOL)) in caplog.text()
def test_ia_upload_keep_directories(tmpdir_ch, caplog): os.mkdir('foo') with open('foo/test.txt', 'w') as fh: fh.write('foo') with open('test.csv', 'w') as fh: fh.write('identifier,file\n') fh.write('nasa,foo/test.txt\n') # Default behaviour with IaRequestsMock() as rsps: rsps.add_metadata_mock('nasa') rsps.add(responses.PUT, f'{PROTOCOL}//s3.us.archive.org/nasa/test.txt', body='', content_type='text/plain') ia_call(['ia', '--log', 'upload', 'nasa', 'foo/test.txt']) assert f'uploaded test.txt to {PROTOCOL}//s3.us.archive.org/nasa/test.txt' in caplog.text caplog.clear() with IaRequestsMock() as rsps: rsps.add_metadata_mock('nasa') rsps.add(responses.PUT, f'{PROTOCOL}//s3.us.archive.org/nasa/test.txt', body='', content_type='text/plain') ia_call(['ia', '--log', 'upload', '--spreadsheet', 'test.csv']) assert f'uploaded test.txt to {PROTOCOL}//s3.us.archive.org/nasa/test.txt' in caplog.text caplog.clear() # With the option with IaRequestsMock() as rsps: rsps.add_metadata_mock('nasa') rsps.add(responses.PUT, f'{PROTOCOL}//s3.us.archive.org/nasa/foo/test.txt', body='', content_type='text/plain') ia_call([ 'ia', '--log', 'upload', 'nasa', 'foo/test.txt', '--keep-directories' ]) assert f'uploaded foo/test.txt to {PROTOCOL}//s3.us.archive.org/nasa/foo/test.txt' in caplog.text caplog.clear() with IaRequestsMock() as rsps: rsps.add_metadata_mock('nasa') rsps.add(responses.PUT, f'{PROTOCOL}//s3.us.archive.org/nasa/foo/test.txt', body='', content_type='text/plain') ia_call([ 'ia', '--log', 'upload', '--spreadsheet', 'test.csv', '--keep-directories' ]) assert f'uploaded foo/test.txt to {PROTOCOL}//s3.us.archive.org/nasa/foo/test.txt' in caplog.text
def test_ia_upload_status_check(capsys): with IaRequestsMock() as rsps: rsps.add(responses.GET, '{0}//s3.us.archive.org'.format(PROTOCOL), body=STATUS_CHECK_RESPONSE, content_type='application/json') ia_call(['ia', 'upload', 'nasa', '--status-check']) out, err = capsys.readouterr() assert 'success: nasa is accepting requests.' in out j = json.loads(STATUS_CHECK_RESPONSE) j['over_limit'] = 1 rsps.add(responses.GET, '{0}//s3.us.archive.org'.format(PROTOCOL), body=json.dumps(j), content_type='application/json') ia_call(['ia', 'upload', 'nasa', '--status-check'], expected_exit_code=1) out, err = capsys.readouterr() assert ('warning: nasa is over limit, and not accepting requests. ' 'Expect 503 SlowDown errors.') in err
def test_ia_upload_status_check(capsys): with IaRequestsMock() as rsps: rsps.add(responses.GET, '{0}//s3.us.archive.org'.format(PROTOCOL), body=STATUS_CHECK_RESPONSE, content_type='application/json') ia_call(['ia', 'upload', 'nasa', '--status-check']) out, err = capsys.readouterr() assert 'success: nasa is accepting requests.' in out j = json.loads(STATUS_CHECK_RESPONSE) j['over_limit'] = 1 rsps.reset() rsps.add(responses.GET, '{0}//s3.us.archive.org'.format(PROTOCOL), body=json.dumps(j), content_type='application/json') ia_call(['ia', 'upload', 'nasa', '--status-check'], expected_exit_code=1) out, err = capsys.readouterr() assert ('warning: nasa is over limit, and not accepting requests. ' 'Expect 503 SlowDown errors.') in err
def test_ia_upload_stdin(tmpdir_ch, caplog): @contextmanager def replace_stdin(f): original_stdin = sys.stdin sys.stdin = f try: yield finally: sys.stdin = original_stdin with IaRequestsMock() as rsps: rsps.add_metadata_mock('nasa') rsps.add(responses.PUT, f'{PROTOCOL}//s3.us.archive.org/nasa/hi.txt', body='', content_type='text/plain') with replace_stdin(StringIO('foo')): ia_call([ 'ia', '--log', 'upload', 'nasa', '-', '--remote-name', 'hi.txt' ]) assert f'uploaded hi.txt to {PROTOCOL}//s3.us.archive.org/nasa/hi.txt' in caplog.text
def test_ia_upload_403(capsys): s3_error = ('<Error>' '<Code>SignatureDoesNotMatch</Code>' '<Message>The request signature we calculated does not match ' 'the signature you provided. Check your AWS Secret Access Key ' 'and signing method. For more information, see REST ' 'Authentication and SOAP Authentication for details.</Message>' "<Resource>'PUT\n\n\n\n/iacli-test-item60/test-replace.txt'</Resource>" '<RequestId>18a9c5ea-088f-42f5-9fcf-70651cc085ca</RequestId>' '</Error>') with IaRequestsMock() as rsps: rsps.add_metadata_mock('nasa') rsps.add(responses.PUT, '{0}//s3.us.archive.org/nasa/test_ia_upload.py'.format(PROTOCOL), body=s3_error, status=403, content_type='text/plain') ia_call(['ia', 'upload', 'nasa', __file__], expected_exit_code=1) out, err = capsys.readouterr() assert 'error uploading test_ia_upload.py' in err
def test_ia_search_itemlist(capsys): test_scrape_response = load_test_data_file('scrape_response.json') with responses.RequestsMock(assert_all_requests_are_fired=True) as rsps: url1 = ('{0}//archive.org/services/search/v1/scrape' '?q=collection%3Aattentionkmartshoppers' '&REQUIRE_AUTH=true&count=10000'.format(PROTOCOL)) url2 = ('{0}//archive.org/services/search/v1/scrape?' 'cursor=W3siaWRlbnRpZmllciI6IjE5NjEtTC0wNTkxNCJ9XQ%3D%3D' '&REQUIRE_AUTH=true&q=collection%3Aattentionkmartshoppers' '&count=10000'.format(PROTOCOL)) rsps.add(responses.POST, url1, body=test_scrape_response, match_querystring=True) _j = json.loads(test_scrape_response) del _j['cursor'] _r = json.dumps(_j) rsps.add(responses.POST, url2, body=_r, match_querystring=True) ia_call(['ia', 'search', 'collection:attentionkmartshoppers', '--itemlist']) out, err = capsys.readouterr() assert len(out.split()) == 200
def test_ia_upload_spreadsheet(tmpdir_ch, caplog): with open('foo.txt', 'w') as fh: fh.write('foo') with open('test.txt', 'w') as fh: fh.write('bar') with open('test.csv', 'w') as fh: fh.write('identifier,file,REMOTE_NAME\n') fh.write('nasa,foo.txt,\n') fh.write(',test.txt,bar.txt\n') with IaRequestsMock() as rsps: rsps.add_metadata_mock('nasa') rsps.add(responses.PUT, f'{PROTOCOL}//s3.us.archive.org/nasa/foo.txt', body='', content_type='text/plain') rsps.add(responses.PUT, f'{PROTOCOL}//s3.us.archive.org/nasa/bar.txt', body='', content_type='text/plain') ia_call(['ia', 'upload', '--spreadsheet', 'test.csv']) assert f'uploaded foo.txt to {PROTOCOL}//s3.us.archive.org/nasa/foo.txt' in caplog.text assert f'uploaded bar.txt to {PROTOCOL}//s3.us.archive.org/nasa/bar.txt' in caplog.text
def test_ia_upload_spreadsheet_item_and_identifier_column(tmpdir_ch, caplog): # item is preferred, and both are discarded with open('test.txt', 'w') as fh: fh.write('foo') with open('test.csv', 'w') as fh: fh.write('item,identifier,file\n') fh.write('nasa,uhoh,test.txt\n') with IaRequestsMock() as rsps: rsps.add_metadata_mock('nasa') rsps.add(responses.PUT, f'{PROTOCOL}//s3.us.archive.org/nasa/test.txt', body='', content_type='text/plain') ia_call(['ia', 'upload', '--spreadsheet', 'test.csv']) # Verify that the item and identifier columns are not in the PUT request headers putCalls = [c for c in rsps.calls if c.request.method == 'PUT'] assert len(putCalls) == 1 assert 'x-archive-meta00-identifier' not in putCalls[0].request.headers assert 'x-archive-meta00-item' not in putCalls[0].request.headers assert f'uploaded test.txt to {PROTOCOL}//s3.us.archive.org/nasa/test.txt' in caplog.text
def test_ia_upload_checksum(tmpdir_ch, caplog): with open('test.txt', 'w') as fh: fh.write('foo') # First upload, file not in metadata yet with IaRequestsMock() as rsps: rsps.add_metadata_mock('nasa') rsps.add(responses.PUT, f'{PROTOCOL}//s3.us.archive.org/nasa/test.txt', body='', content_type='text/plain') ia_call(['ia', '--log', 'upload', 'nasa', 'test.txt', '--checksum']) assert f'uploaded test.txt to {PROTOCOL}//s3.us.archive.org/nasa/test.txt' in caplog.text caplog.clear() # Second upload with file in metadata def insert_test_txt(body): body = json.loads(body) body['files'].append({ 'name': 'test.txt', 'md5': 'acbd18db4cc2f85cedef654fccc4a4d8' }) return json.dumps(body) with IaRequestsMock() as rsps: rsps.add_metadata_mock('nasa', transform_body=insert_test_txt) ia_call(['ia', '--log', 'upload', 'nasa', 'test.txt', '--checksum'], expected_exit_code=1) assert f'test.txt already exists: {PROTOCOL}//s3.us.archive.org/nasa/test.txt' in caplog.text caplog.clear() # Second upload with spreadsheet with open('test.csv', 'w') as fh: fh.write('identifier,file\n') fh.write('nasa,test.txt\n') with IaRequestsMock() as rsps: rsps.add_metadata_mock('nasa', transform_body=insert_test_txt) ia_call([ 'ia', '--log', 'upload', '--spreadsheet', 'test.csv', '--checksum' ], expected_exit_code=1) assert f'test.txt already exists: {PROTOCOL}//s3.us.archive.org/nasa/test.txt' in caplog.text
def test_ia_upload_invalid_cmd(capsys): ia_call(['ia', 'upload', 'nasa', 'nofile.txt'], expected_exit_code=1) out, err = capsys.readouterr() assert '<file> should be a readable file or directory.' in err
def test_ia_upload_inexistent_file(tmpdir_ch, capsys, caplog): ia_call(['ia', 'upload', 'foo', 'test.txt'], expected_exit_code=1) out, err = capsys.readouterr() assert '<file> should be a readable file or directory.' in err
def test_ia_metadata_formats(capsys, nasa_mocker): ia_call(['ia', 'metadata', '--formats', 'nasa']) out, err = capsys.readouterr() expected_formats = set(['Collection Header', 'Archive BitTorrent', 'JPEG', 'Metadata', '']) assert set(out.split('\n')) == expected_formats
def test_ia_metadata_formats(capsys, nasa_mocker): ia_call(['ia', 'metadata', '--formats', 'nasa']) out, err = capsys.readouterr() expected_formats = {'Collection Header', 'Archive BitTorrent', 'JPEG', 'Metadata', ''} assert set(out.split('\n')) == expected_formats