def test_add_versions(): cli = Client(**AUTH) new_version_ids = [ 'd68c5521-0728-4098-96dd-e6330612f049', 'db2932c4-413b-41f6-b73d-602faccf2f49', '4cfe3e9b-01b3-4a5f-bb45-e7657fc38849', 'e1731130-569a-45a5-8db9-e58764e72049', '901feef4-91b8-4140-8dcc-a414f52bef49', '4cd662bc-e322-463e-9fe1-12fbccb62a49', '1d0e7eb7-4920-48b5-a810-d01e7ae27c49', '8b420ce3-ecc5-43e2-865a-b02c854f6449', 'ae23d4f2-ab34-43da-b58f-57c4ab8bdd49', 'b8cc3d0f-f2eb-43ef-bfc7-d0b589ee7f49' ] versions = [ dict( uuid=version_id, # Notice the importer needs page_url instead of page_id. page_url='http://example.com', capture_time=TIME, uri='http://example.com', version_hash='hash_placeholder', title='title_placeholder', page_maintainers=['agency_placeholder'], page_tags=['site:site_placeholder'], source_type='test') for version_id in new_version_ids ] # FIXME: need to spy on the data POSTed to DB and make sure the number of # lines matches the number of new_version_ids import_ids = cli.add_versions(versions, batch_size=5) global_stash['import_ids'] = import_ids
def test_get_annotation(): cli = Client(**AUTH) annotation_id = global_stash['annotation_id'] result = cli.get_annotation(annotation_id=annotation_id, page_id=PAGE_ID, to_version_id=TO_VERSION_ID) fetched_annotation = result['data']['annotation'] annotation = {'foo': 'bar'} assert fetched_annotation == annotation
def test_add_version(): cli = Client(**AUTH) cli.add_version(page_id=PAGE_ID, uuid=NEW_VERSION_ID, capture_time=TIME, uri='http://example.com', hash='hash_placeholder', title='title_placeholder', source_type='test')
def test_add_annotation(): cli = Client(**AUTH) # smoke test annotation = {'foo': 'bar'} result = cli.add_annotation(annotation=annotation, page_id=PAGE_ID, to_version_id=TO_VERSION_ID) annotation_id = result['data']['uuid'] global_stash['annotation_id'] = annotation_id
def test_get_new_version(): cli = Client(**AUTH) data = cli.get_version(NEW_VERSION_ID)['data'] assert data['uuid'] == NEW_VERSION_ID assert data['page_uuid'] == PAGE_ID # Some floating-point error occurs in round-trip. epsilon = timedelta(seconds=0.001) assert data['capture_time'] - TIME < epsilon assert data['source_type'] == 'test' assert data['title'] == 'title_placeholder'
def test_monitor_import_statuses_returns_errors(): cli = Client(**AUTH) import_ids = global_stash['import_ids'] errors = cli.monitor_import_statuses(import_ids) assert errors == { 47: [ "Row 2: Response body for 'http://example.com' did " "not match expected hash (hash_placeholder)" ] }
def test_get_version(): cli = Client(**AUTH) res = cli.get_version(TO_VERSION_ID) assert res['data']['uuid'] == TO_VERSION_ID assert res['data']['page_uuid'] == PAGE_ID # Test relations res = cli.get_version(TO_VERSION_ID, include_change_from_previous=True, include_change_from_earliest=True) assert 'change_from_previous' in res['data'] assert 'change_from_earliest' in res['data']
def test_list_versions(): cli = Client(**AUTH) res = cli.list_versions() assert res['data'] # Test relations res = cli.list_versions(include_change_from_previous=True) assert all(['change_from_previous' in item for item in res['data']]) is True res = cli.list_versions(include_change_from_earliest=True) assert all(['change_from_earliest' in item for item in res['data']]) is True
def test_missing_creds(): try: env = os.environ.copy() os.environ.clear() with pytest.raises(MissingCredentials): Client.from_env() os.environ.update({ 'WEB_MONITORING_DB_URL': AUTH['url'], 'WEB_MONITORING_DB_EMAIL': AUTH['email'], 'WEB_MONITORING_DB_PASSWORD': AUTH['password'] }) Client.from_env() # should work finally: os.environ.update(env)
def get_staging_cli(): try: email = os.environ['WEB_MONITORING_DB_STAGING_EMAIL'] password = os.environ['WEB_MONITORING_DB_STAGING_PASSWORD'] url = os.environ['WEB_MONITORING_DB_STAGING_URL'] except KeyError: raise Exception( '''You must have the following env vars set to update fixture content: WEB_MONITORING_DB_STAGING_EMAIL, WEB_MONITORING_DB_STAGING_PASSWORD, WEB_MONITORING_DB_STAGING_URL''') return Client(email, password, url)
def test_list_pages(): cli = Client(**AUTH) res = cli.list_pages() assert res['data'] # Test chunk query parameters. res = cli.list_pages(chunk_size=2) assert len(res['data']) == 2 res = cli.list_pages(chunk_size=5) assert len(res['data']) == 5 # Test filtering query parameters. res = cli.list_pages(url='__nonexistent__') assert len(res['data']) == 0 res = cli.list_pages(url=URL) assert len(res['data']) > 0 res = cli.list_pages(tags=['__nonexistent__']) assert len(res['data']) == 0 res = cli.list_pages(tags=[SITE]) assert len(res['data']) > 0 res = cli.list_pages(maintainers=['__nonexistent__']) assert len(res['data']) == 0 res = cli.list_pages(maintainers=[AGENCY]) assert len(res['data']) > 0 # Test relations res = cli.list_pages(include_earliest=True) assert all(['earliest' in page for page in res['data']]) is True res = cli.list_pages(include_latest=True) assert all(['latest' in page for page in res['data']]) is True
def test_validate_credentials_should_raise(): bad_auth = AUTH.copy() bad_auth['password'] = '******' cli = Client(**bad_auth) with pytest.raises(UnauthorizedCredentials): cli.validate_credentials()
def test_get_version_by_versionista_id(): cli = Client(**AUTH) res = cli.get_version_by_versionista_id(VERSIONISTA_ID) assert res['data']['uuid'] == TO_VERSION_ID assert res['data']['page_uuid'] == PAGE_ID
def test_get_user_session(): cli = Client(**AUTH) session = cli.get_user_session() assert session['user']['email'] == AUTH['email']
def test_validate_credentials(): cli = Client(**AUTH) cli.validate_credentials()
def test_get_import_status(): cli = Client(**AUTH) import_id, *_ = global_stash['import_ids'] result = cli.get_import_status(import_id) assert not result['data']['processing_errors']
def test_get_version_by_versionista_id_failure(): cli = Client(**AUTH) with pytest.raises(ValueError): cli.get_version_by_versionista_id('__nonexistent__')
def test_list_annotations(): cli = Client(**AUTH) # smoke test cli.list_annotations(page_id=PAGE_ID, to_version_id=TO_VERSION_ID)
def test_get_change(): cli = Client(**AUTH) # smoke test cli.get_change(page_id=PAGE_ID, to_version_id=TO_VERSION_ID)
def test_list_page_versions(): cli = Client(**AUTH) res = cli.list_versions(page_id=PAGE_ID) assert all([v['page_uuid'] == PAGE_ID for v in res['data']])
def test_get_page(): cli = Client(**AUTH) res = cli.get_page(PAGE_ID) assert res['data']['uuid'] == PAGE_ID
def test_monitor_import_statuses(): cli = Client(**AUTH) import_ids = global_stash['import_ids'] errors = cli.monitor_import_statuses(import_ids) assert not errors
def test_list_changes(): cli = Client(**AUTH) # smoke test cli.list_changes(PAGE_ID)
# @Mr0grog: "The “Survivor Impacts” text is in a `<p>` element between # two `<ul>` elements on this page, but in the diff, the `<p>` gets moved # _into_ the `<ul>`, so it renders like a list item instead of like the # header-ish thing it actually is." ('f2d5d701-707a-42e0-8881-653346d01e0a', 'fc74d750-c651-46b7-bf74-434ad8c62e04'), # See issue #99 ('9d4de183-a186-456c-bffb-55d82989877d', '775a8b04-9bac-4d0d-8db0-a8e133c4a964'), ] # Fetch content as we need it, and cache. This can potentially matter if a # subset of the tests are run. version_content_cache = {} staging_cli = Client( email=os.environ['WEB_MONITORING_DB_STAGING_EMAIL'], password=os.environ['WEB_MONITORING_DB_STAGING_PASSWORD'], url=os.environ['WEB_MONITORING_DB_STAGING_URL']) CACHE_DIR = Path.home() / Path('.cache', 'web-monitoring-processing', 'tests') os.makedirs(CACHE_DIR, exist_ok=True) def get_staging_content(version_id): # Try our in-memory cache, the on-disk cache, and finally the network. try: return version_content_cache[version_id] except KeyError: try: with open(CACHE_DIR / Path(version_id), 'r') as f: content = f.read()