def test_update(self): resource = ResourceFactory() self.dataset.resources.append(resource) self.dataset.save() now = datetime.now() data = { 'title': faker.sentence(), 'description': faker.text(), 'url': faker.url(), 'published': now.isoformat(), 'extras': { 'extra:id': 'id', } } with self.api_user(): response = self.put( url_for('api.resource', dataset=self.dataset, rid=str(resource.id)), data) self.assert200(response) self.dataset.reload() self.assertEqual(len(self.dataset.resources), 1) updated = self.dataset.resources[0] self.assertEqual(updated.title, data['title']) self.assertEqual(updated.description, data['description']) self.assertEqual(updated.url, data['url']) self.assertEqual(updated.extras, {'extra:id': 'id'}) self.assertEqualDates(updated.published, now)
def test_get_next_page(self): '''Should fetch 2 pages of resources from the API''' resources = [ResourceFactory() for _ in range(80)] dataset = DatasetFactory(resources=resources) response = self.get( url_for('apiv2.resources', dataset=dataset.id, page=1, page_size=DEFAULT_PAGE_SIZE)) self.assert200(response) data = response.json assert len(data['data']) == DEFAULT_PAGE_SIZE assert data['total'] == len(resources) assert data['page'] == 1 assert data['page_size'] == DEFAULT_PAGE_SIZE assert data['next_page'] == url_for('apiv2.resources', dataset=dataset.id, page=2, page_size=DEFAULT_PAGE_SIZE, _external=True) assert data['previous_page'] is None response = self.get(data['next_page']) self.assert200(response) data = response.json assert len(data['data']) == len(resources) - DEFAULT_PAGE_SIZE assert data['total'] == len(resources) assert data['page'] == 2 assert data['page_size'] == DEFAULT_PAGE_SIZE assert data['next_page'] == None assert data['previous_page'] == url_for('apiv2.resources', dataset=dataset.id, page=1, page_size=DEFAULT_PAGE_SIZE, _external=True)
def test_purge_organizations(self): with self.autoindex(): org = Organization.objects.create(name='delete me', description='XXX') resources = [ResourceFactory() for _ in range(2)] dataset = DatasetFactory(resources=resources, organization=org) # Upload organization's logo file = create_test_image() user = AdminFactory() self.login(user) response = self.post(url_for('api.organization_logo', org=org), {'file': (file, 'test.png')}, json=False) self.assert200(response) # Delete organization response = self.delete(url_for('api.organization', org=org)) self.assert204(response) tasks.purge_organizations() # Check organization's logo is deleted self.assertEqual(list(storages.avatars.list_files()), []) dataset = Dataset.objects(id=dataset.id).first() self.assertIsNone(dataset.organization) organization = Organization.objects(name='delete me').first() self.assertIsNone(organization) indexed_dataset = DatasetSearch.get(id=dataset.id, using=es.client, index=es.index_name) self.assertIsNone(indexed_dataset.organization)
def test_suggest_datasets_api_unicode(self): '''It should suggest datasets with special characters''' with self.autoindex(): for i in range(4): DatasetFactory(title='testé-{0}'.format(i) if i % 2 else faker.word(), resources=[ResourceFactory()]) response = self.get(url_for('api.suggest_datasets'), qs={ 'q': 'testé', 'size': '5' }) self.assert200(response) self.assertLessEqual(len(response.json), 5) self.assertGreater(len(response.json), 1) for suggestion in response.json: self.assertIn('id', suggestion) self.assertIn('title', suggestion) self.assertIn('slug', suggestion) self.assertIn('score', suggestion) self.assertIn('image_url', suggestion) self.assertTrue(suggestion['title'].startswith('test'))
def test_delete_404(self): with self.api_user(): response = self.delete( url_for('api.resource', dataset=self.dataset, rid=str(ResourceFactory().id))) self.assert404(response)
def test_all_resource_fields(self): license = LicenseFactory() resource = ResourceFactory(format='csv') dataset = DatasetFactory(resources=[resource], license=license) permalink = url_for('datasets.resource', id=resource.id, _external=True) r = resource_to_rdf(resource, dataset) self.assertEqual(r.value(DCT.title), Literal(resource.title)) self.assertEqual(r.value(DCT.description), Literal(resource.description)) self.assertEqual(r.value(DCT.issued), Literal(resource.published)) self.assertEqual(r.value(DCT.modified), Literal(resource.modified)) self.assertEqual(r.value(DCT.license).identifier, URIRef(license.url)) self.assertEqual(r.value(DCT.rights), Literal(license.title)) self.assertEqual( r.value(DCAT.downloadURL).identifier, URIRef(resource.url)) self.assertEqual(r.value(DCAT.accessURL).identifier, URIRef(permalink)) self.assertEqual(r.value(DCAT.bytesSize), Literal(resource.filesize)) self.assertEqual(r.value(DCAT.mediaType), Literal(resource.mime)) self.assertEqual(r.value(DCT.term('format')), Literal(resource.format)) checksum = r.value(SPDX.checksum) self.assertEqual(r.graph.value(checksum.identifier, RDF.type), SPDX.Checksum) self.assertEqual(r.graph.value(checksum.identifier, SPDX.algorithm), SPDX.checksumAlgorithm_sha1) self.assertEqual(checksum.value(SPDX.checksumValue), Literal(resource.checksum.value))
def test_suggest_tags_api(self): '''It should suggest tags''' with self.autoindex(): for i in range(3): tags = [ faker.word(), faker.word(), 'test', 'test-{0}'.format(i) ] ReuseFactory(tags=tags, datasets=[DatasetFactory()]) DatasetFactory(tags=tags, resources=[ResourceFactory()]) response = self.get(url_for('api.suggest_tags'), qs={ 'q': 'tes', 'size': '5' }) self.assert200(response) self.assertLessEqual(len(response.json), 5) self.assertGreater(len(response.json), 1) self.assertEqual(response.json[0]['text'], 'test') for suggestion in response.json: self.assertIn('text', suggestion) self.assertIn('score', suggestion) self.assertTrue(suggestion['text'].startswith('test'))
def test_datasets_csv(self): self.app.config['EXPORT_CSV_MODELS'] = [] with self.autoindex(): datasets = [ DatasetFactory(resources=[ResourceFactory()]) for _ in range(5) ] hidden_dataset = DatasetFactory() response = self.get(url_for('site.datasets_csv')) self.assert200(response) self.assertEqual(response.mimetype, 'text/csv') self.assertEqual(response.charset, 'utf-8') csvfile = StringIO(response.data.decode('utf8')) reader = csv.get_reader(csvfile) header = next(reader) self.assertEqual(header[0], 'id') self.assertIn('title', header) self.assertIn('description', header) self.assertIn('created_at', header) self.assertIn('last_modified', header) self.assertIn('tags', header) self.assertIn('metric.reuses', header) rows = list(reader) ids = [row[0] for row in rows] self.assertEqual(len(rows), len(datasets)) for dataset in datasets: self.assertIn(str(dataset.id), ids) self.assertNotIn(str(hidden_dataset.id), ids)
def test_get_dataset(self): resources = [ResourceFactory() for _ in range(2)] dataset = DatasetFactory(resources=resources) response = self.get(url_for('apiv2.dataset', dataset=dataset)) self.assert200(response) data = response.json assert data['resources']['rel'] == 'subsection' assert data['resources']['href'] == url_for( 'apiv2.resources', dataset=dataset.id, page=1, page_size=DEFAULT_PAGE_SIZE, _external=True) assert data['resources']['type'] == 'GET' assert data['resources']['total'] == len(resources) assert data['community_resources']['rel'] == 'subsection' assert data['community_resources']['href'] == url_for( 'api.community_resources', dataset=dataset.id, page=1, page_size=DEFAULT_PAGE_SIZE, _external=True) assert data['community_resources']['type'] == 'GET' assert data['community_resources']['total'] == 0
def test_resource_latest_url(self): '''It should redirect to the real resource URL''' resource = ResourceFactory() DatasetFactory(resources=[resource]) response = self.get(url_for('datasets.resource', id=resource.id)) self.assertStatus(response, 302) self.assertEqual(response.location, resource.url)
def test_add_resource(self): user = UserFactory() dataset = DatasetFactory(owner=user) resource = ResourceFactory() expected_signals = (post_save, Dataset.after_save, Dataset.on_update, Dataset.on_resource_added) with assert_emit(*expected_signals): dataset.add_resource(ResourceFactory()) assert len(dataset.resources) == 1 with assert_emit(*expected_signals): dataset.add_resource(resource) assert len(dataset.resources) == 2 assert dataset.resources[0].id == resource.id assert dataset.resources[0].dataset == dataset
def test_render_list_with_query(self): '''It should render the dataset list page with a query string''' with self.autoindex(): datasets = [DatasetFactory( resources=[ResourceFactory()]) for i in range(3)] expected_dataset = DatasetFactory( title='test for query', resources=[ResourceFactory()]) datasets.append(expected_dataset) response = self.get(url_for('datasets.list'), qs={'q': 'test for query'}) self.assert200(response) rendered_datasets = self.get_context_variable('datasets') self.assertEqual(len(rendered_datasets), 1) self.assertEqual(rendered_datasets[0].id, expected_dataset.id)
def test_publish_message_resource_removed(self): kafka_mock = Mock() KafkaProducerSingleton.get_instance = lambda: kafka_mock resource = ResourceFactory() dataset = DatasetFactory(resources=[resource]) expected_signals = (Dataset.on_resource_removed, ) with assert_emit(*expected_signals): dataset.remove_resource(resource) producer = KafkaProducerSingleton.get_instance() message_type = f'resource.{KafkaMessageType.DELETED.value}' expected_value = { 'service': 'udata', 'data': None, 'meta': { 'message_type': message_type, 'dataset_id': str(dataset.id) } } topic = f"{current_app.config['UDATA_INSTANCE_NAME']}.{message_type}" producer.send.assert_called_with(topic, value=expected_value, key=str(resource.id).encode("utf-8"))
def test_resources_csv_with_filters(self): '''Should handle filtering but ignore paging or facets''' with self.autoindex(): filtered_datasets = [ DatasetFactory( resources=[ResourceFactory(), ResourceFactory()], tags=['selected']) for _ in range(6) ] [DatasetFactory(resources=[ResourceFactory()]) for _ in range(3)] DatasetFactory() response = self.get( url_for('site.resources_csv', tag='selected', page_size=3, facets=True)) self.assert200(response) self.assertEqual(response.mimetype, 'text/csv') self.assertEqual(response.charset, 'utf-8') csvfile = StringIO(response.data.decode('utf8')) reader = csv.get_reader(csvfile) header = next(reader) self.assertEqual(header[0], 'dataset.id') self.assertIn('dataset.title', header) self.assertIn('dataset.url', header) self.assertIn('title', header) self.assertIn('description', header) self.assertIn('filetype', header) self.assertIn('url', header) self.assertIn('created_at', header) self.assertIn('modified', header) self.assertIn('downloads', header) resource_id_index = header.index('id') rows = list(reader) ids = [(row[0], row[resource_id_index]) for row in rows] self.assertEqual(len(rows), sum(len(d.resources) for d in filtered_datasets)) for dataset in filtered_datasets: for resource in dataset.resources: self.assertIn((str(dataset.id), str(resource.id)), ids)
def test_resource_card_resource_with_schema(self): resource = ResourceFactory(schema={'name': 'etalab/irve'}) content = render_resource_card(resource=resource) modal_name = f"schemaModalId{str(resource.id).replace('-', '')}" assert 'Voir le schéma' in content assert f"$refs.{modal_name}" in content
def test_add_resource_missing_checksum_type(self): user = UserFactory() dataset = DatasetFactory(owner=user) resource = ResourceFactory() resource.checksum.type = None with pytest.raises(db.ValidationError): dataset.add_resource(resource)
def test_resources_csv(self): with self.autoindex(): org = OrganizationFactory() datasets = [ DatasetFactory( organization=org, resources=[ResourceFactory(), ResourceFactory()]) for _ in range(3) ] not_org_dataset = DatasetFactory(resources=[ResourceFactory()]) hidden_dataset = DatasetFactory() response = self.get( url_for('organizations.datasets_resources_csv', org=org)) self.assert200(response) self.assertEqual(response.mimetype, 'text/csv') self.assertEqual(response.charset, 'utf-8') csvfile = StringIO.StringIO(response.data) reader = reader = csv.get_reader(csvfile) header = reader.next() self.assertEqual(header[0], 'dataset.id') self.assertIn('dataset.title', header) self.assertIn('dataset.url', header) self.assertIn('title', header) self.assertIn('filetype', header) self.assertIn('url', header) self.assertIn('created_at', header) self.assertIn('modified', header) self.assertIn('downloads', header) resource_id_index = header.index('id') rows = list(reader) ids = [(row[0], row[resource_id_index]) for row in rows] self.assertEqual(len(rows), sum(len(d.resources) for d in datasets)) for dataset in datasets: for resource in dataset.resources: self.assertIn((str(dataset.id), str(resource.id)), ids) dataset_ids = set(row[0] for row in rows) self.assertNotIn(str(hidden_dataset.id), dataset_ids) self.assertNotIn(str(not_org_dataset.id), dataset_ids)
def test_update_resource_missing_checksum_type(self): user = UserFactory() resource = ResourceFactory() dataset = DatasetFactory(owner=user, resources=[resource]) resource.checksum.type = None with self.assertRaises(db.ValidationError): dataset.update_resource(resource)
def test_resource_latest_url_stripped(self): '''It should return strip extras spaces from the resource URL''' url = 'http://www.somewhere.com/path/with/spaces/ ' resource = ResourceFactory(url=url) DatasetFactory(resources=[resource]) response = self.get(url_for('datasets.resource', id=resource.id)) self.assertStatus(response, 302) self.assertEqual(response.location, url.strip())
def test_ignore_post_save_signal(self): resource = ResourceFactory() DatasetFactory(resources=[resource]) unexpected_signals = Dataset.after_save, Dataset.on_update with assert_not_emit(*unexpected_signals), assert_emit(post_save): resource.title = 'New title' resource.save(signal_kwargs={'ignores': ['post_save']})
def test_datasets_csv_with_filters(self): '''Should handle filtering but ignore paging or facets''' with self.autoindex(): filtered_datasets = [ DatasetFactory(resources=[ResourceFactory()], tags=['selected']) for _ in range(6) ] datasets = [ DatasetFactory(resources=[ResourceFactory()]) for _ in range(3) ] hidden_dataset = DatasetFactory() response = self.get( url_for('site.datasets_csv', tag='selected', page_size=3, facets=True)) self.assert200(response) self.assertEqual(response.mimetype, 'text/csv') self.assertEqual(response.charset, 'utf-8') csvfile = StringIO(response.data.decode('utf8')) reader = csv.get_reader(csvfile) header = next(reader) self.assertEqual(header[0], 'id') self.assertIn('title', header) self.assertIn('description', header) self.assertIn('created_at', header) self.assertIn('last_modified', header) self.assertIn('tags', header) self.assertIn('metric.reuses', header) rows = list(reader) ids = [row[0] for row in rows] # Should ignore paging self.assertEqual(len(rows), len(filtered_datasets)) # SHoulf pass filter for dataset in filtered_datasets: self.assertIn(str(dataset.id), ids) for dataset in datasets: self.assertNotIn(str(dataset.id), ids) self.assertNotIn(str(hidden_dataset.id), ids)
def test_fallback_to_default_locale(): resource = ResourceFactory(extras={ 'geop:resource_id': 'RID', }) DatasetFactory(resources=[resource], extras={ 'geop:dataset_id': 'DID' }) expected = 'https://geo.data.gouv.fr/embed/datasets/DID/resources/RID?lang=fr' # noqa assert resource.preview_url == expected
def test_ignore_post_save_signal(self): resource = ResourceFactory() # assigning to a variable to avoid garbage collection issue _ = DatasetFactory(resources=[resource]) unexpected_signals = Dataset.after_save, Dataset.on_update with assert_not_emit(*unexpected_signals), assert_emit(post_save): resource.title = 'New title' resource.save(signal_kwargs={'ignores': ['post_save']})
def test_display_preview_for_api_resources(locale): resource = ResourceFactory(extras={ 'geop:resource_id': 'RID', }) DatasetFactory(resources=[resource], extras={'geop:dataset_id': 'DID'}) expected = 'https://geo.data.gouv.fr/embed/datasets/DID/resources/RID?lang={0}'.format( locale) # noqa with language(locale): assert resource.preview_url == expected
def test_suggest_datasets_api_no_match(self): '''It should not provide dataset suggestion if no match''' with self.autoindex(): for i in range(3): DatasetFactory(resources=[ResourceFactory()]) response = self.get(url_for('api.suggest_datasets'), qs={'q': 'xxxxxx', 'size': '5'}) self.assert200(response) self.assertEqual(len(response.json), 0)
def dataset_resource(app): resource = ResourceFactory(url='http://schéma.org') dataset = DatasetFactory(resources=[resource]) # 2x visit visit(dataset) visit(dataset) # 1 download on url, 1 on latest url download(resource) download(resource, latest=True) return dataset, resource
def test_base_modals_dataset_w_schema(self): resource = ResourceFactory(schema='etalab/irve') dataset = DatasetFactory(resources=[resource]) content = render_base_modals(dataset=dataset) assert 'etalab/irve' in content assert f"schema-modal-Id{str(resource.id).replace('-', '')}" in content assert 'https://validata.example.com/table-schema' in content assert 'https://schema.data.gouv.fr' in content
def test_purge_organizations(self): org = Organization.objects.create(name='delete me', description='XXX') resources = [ResourceFactory() for _ in range(2)] dataset = DatasetFactory(resources=resources, organization=org) # Upload organization's logo file = create_test_image() user = AdminFactory() self.login(user) response = self.post( url_for('api.organization_logo', org=org), {'file': (file, 'test.png')}, json=False) self.assert200(response) transfer_to_org = Transfer.objects.create( owner=user, recipient=org, subject=dataset, comment='comment', ) transfer_from_org = Transfer.objects.create( owner=org, recipient=user, subject=dataset, comment='comment', ) oauth_client = OAuth2Client.objects.create( name='test-client', owner=user, organization=org, redirect_uris=['https://test.org/callback'], ) # Delete organization response = self.delete(url_for('api.organization', org=org)) self.assert204(response) tasks.purge_organizations() oauth_client.reload() assert oauth_client.organization is None assert Transfer.objects.filter(id=transfer_from_org.id).count() == 0 assert Transfer.objects.filter(id=transfer_to_org.id).count() == 0 # Check organization's logo is deleted self.assertEqual(list(storages.avatars.list_files()), []) dataset = Dataset.objects(id=dataset.id).first() self.assertIsNone(dataset.organization) organization = Organization.objects(name='delete me').first() self.assertIsNone(organization)
def test_create_2nd(self): self.dataset.resources.append(ResourceFactory()) self.dataset.save() data = ResourceFactory.as_dict() with self.api_user(): response = self.post( url_for('api.resources', dataset=self.dataset), data) self.assert201(response) self.dataset.reload() self.assertEqual(len(self.dataset.resources), 2)
def test_dataset_api_get(self): '''It should fetch a dataset from the API''' with self.autoindex(): resources = [ResourceFactory() for _ in range(2)] dataset = DatasetFactory(resources=resources) response = self.get(url_for('api.dataset', dataset=dataset)) self.assert200(response) data = json.loads(response.data) self.assertEqual(len(data['resources']), len(resources)) self.assertFalse('quality' in data)