def test_ignore_post_save_signal(self): dataset = DatasetFactory() unexpected_signals = Dataset.after_save, Dataset.on_update with assert_not_emit(*unexpected_signals), assert_emit(post_save): dataset.title = 'New title' dataset.save(signal_kwargs={'ignores': ['post_save']})
def test_update_resource_missing_checksum_type(self): user = UserFactory() resource = ResourceFactory() dataset = DatasetFactory(owner=user, resources=[resource]) resource.checksum.type = None with pytest.raises(db.ValidationError): dataset.update_resource(resource)
def test_update_resource(self): user = UserFactory() resource = ResourceFactory() dataset = DatasetFactory(owner=user, resources=[resource]) expected_signals = post_save, Dataset.after_save, Dataset.on_update resource.description = 'New description' with assert_emit(*expected_signals): dataset.update_resource(resource) assert len(dataset.resources) == 1 assert dataset.resources[0].id == resource.id assert dataset.resources[0].description == 'New description'
def test_add_resource_without_checksum(self): user = UserFactory() dataset = DatasetFactory(owner=user) resource = ResourceFactory(checksum=None) expected_signals = post_save, Dataset.after_save, Dataset.on_update with assert_emit(*expected_signals): dataset.add_resource(ResourceFactory(checksum=None)) assert len(dataset.resources) == 1 with assert_emit(*expected_signals): dataset.add_resource(resource) assert len(dataset.resources) == 2 assert dataset.resources[0].id == resource.id
def test_attach(self): datasets = DatasetFactory.create_batch(3) with NamedTemporaryFile() as csvfile: writer = csv.DictWriter(csvfile, fieldnames=['local', 'remote'], delimiter=b';', quotechar=b'"') writer.writeheader() for index, dataset in enumerate(datasets): writer.writerow({ 'local': str(dataset.id), 'remote': str(index) }) csvfile.flush() result = actions.attach('test.org', csvfile.name) assert result.success == len(datasets) assert result.errors == 0 for index, dataset in enumerate(datasets): dataset.reload() assert dataset.extras['harvest:domain'] == 'test.org' assert dataset.extras['harvest:remote_id'] == str(index)
def test_attach_skip_not_found(self): datasets = DatasetFactory.create_batch(3) with NamedTemporaryFile() as csvfile: writer = csv.DictWriter(csvfile, fieldnames=['local', 'remote'], delimiter=b';', quotechar=b'"') writer.writeheader() writer.writerow({ 'local': 'not-found', 'remote': '42' }) for index, dataset in enumerate(datasets): writer.writerow({ 'local': str(dataset.id), 'remote': str(index) }) csvfile.flush() result = actions.attach('test.org', csvfile.name) assert result.success == len(datasets) assert result.errors == 1
def test_add_resource(self): user = UserFactory() dataset = DatasetFactory(owner=user) resource = ResourceFactory() expected_signals = (post_save, Dataset.after_save, Dataset.on_update, Dataset.on_resource_added) with assert_emit(*expected_signals): dataset.add_resource(ResourceFactory()) assert len(dataset.resources) == 1 with assert_emit(*expected_signals): dataset.add_resource(resource) assert len(dataset.resources) == 2 assert dataset.resources[0].id == resource.id assert dataset.resources[0].dataset == dataset
def test_render_list_with_facets(self): '''It should render the dataset list page with facets''' with self.autoindex(): datasets = DatasetFactory.create_batch(3, visible=True, org=True, geo=True) response = self.get(url_for('datasets.list')) self.assert200(response) rendered_datasets = self.get_context_variable('datasets') self.assertEqual(len(rendered_datasets), len(datasets))
def test_quality_all(self): user = UserFactory() visitor = UserFactory() dataset = DatasetFactory(owner=user, frequency='weekly', tags=['foo', 'bar'], description='a' * 42) dataset.add_resource(ResourceFactory(format='pdf')) DiscussionFactory( subject=dataset, user=visitor, discussion=[MessageDiscussionFactory(posted_by=visitor)]) assert dataset.quality['score'] == 0 assert sorted(dataset.quality.keys()) == [ 'description_length', 'discussions', 'frequency', 'has_only_closed_or_no_formats', 'has_resources', 'has_unavailable_resources', 'has_untreated_discussions', 'score', 'tags_count', 'update_in' ]
def test_delete_home_dataset(self): '''Should pull home datasets on deletion''' current_site.settings.home_datasets = DatasetFactory.create_batch(3) current_site.save() dataset = current_site.settings.home_datasets[1] dataset.deleted = datetime.now() dataset.save() current_site.reload() home_datasets = [d.id for d in current_site.settings.home_datasets] self.assertEqual(len(home_datasets), 2) self.assertNotIn(dataset.id, home_datasets)
def test_attach_does_not_duplicate(self): attached_datasets = [] for i in range(2): dataset = DatasetFactory.build() dataset.extras['harvest:domain'] = 'test.org' dataset.extras['harvest:remote_id'] = str(i) dataset.last_modified = datetime.now() dataset.save() attached_datasets.append(dataset) datasets = DatasetFactory.create_batch(3) with NamedTemporaryFile() as csvfile: writer = csv.DictWriter(csvfile, fieldnames=['local', 'remote'], delimiter=b';', quotechar=b'"') writer.writeheader() for index, dataset in enumerate(datasets): writer.writerow({ 'local': str(dataset.id), 'remote': str(index) }) csvfile.flush() result = actions.attach('test.org', csvfile.name) dbcount = Dataset.objects(**{ 'extras__harvest:remote_id__exists': True }).count() assert result.success == len(datasets) assert dbcount == result.success for index, dataset in enumerate(datasets): dataset.reload() assert dataset.extras['harvest:domain'] == 'test.org' assert dataset.extras['harvest:remote_id'] == str(index)
def test_minimal(self): dataset = DatasetFactory.build() # Does not have an URL d = dataset_to_rdf(dataset) g = d.graph assert isinstance(d, RdfResource) assert len(list(g.subjects(RDF.type, DCAT.Dataset))) is 1 assert g.value(d.identifier, RDF.type) == DCAT.Dataset assert isinstance(d.identifier, BNode) assert d.value(DCT.identifier) == Literal(dataset.id) assert d.value(DCT.title) == Literal(dataset.title) assert d.value(DCT.issued) == Literal(dataset.created_at) assert d.value(DCT.modified) == Literal(dataset.last_modified)
def test_quality_default(self): dataset = DatasetFactory(description='') assert dataset.quality == {'score': 0}
def test_next_update_empty(self): dataset = DatasetFactory() assert dataset.next_update is None
def test_quality_has_opened_formats(self): dataset = DatasetFactory(description='', ) dataset.add_resource(ResourceFactory(format='pdf')) dataset.add_resource(ResourceFactory(format='csv')) assert not dataset.quality['has_only_closed_or_no_formats'] assert dataset.quality['score'] == 4
def test_quality_description_length(self): dataset = DatasetFactory(description='a' * 42) assert dataset.quality['description_length'] == 42 assert dataset.quality['score'] == 0 dataset = DatasetFactory(description='a' * 420) assert dataset.quality['score'] == 2
def test_tags_normalized(self): tags = [' one another!', ' one another!', 'This IS a "tag"…'] dataset = DatasetFactory(tags=tags) assert len(dataset.tags) == 2 assert dataset.tags[1] == 'this-is-a-tag'
def process(self, item): mock_process.send(self, item=item) return DatasetFactory.build(title='dataset-{0}'.format(item.remote_id))
def setup(self, app): self.resource = ResourceFactory() self.dataset = DatasetFactory(resources=[self.resource]) self.checker = CroquemortLinkChecker()
def test_delete(self): '''It should delete the connected user''' user = self.login() self.assertIsNone(user.deleted) other_user = UserFactory() members = [Member(user=user), Member(user=other_user)] organization = OrganizationFactory(members=members) disc_msg_content = faker.sentence() disc_msg = DiscMsg(content=disc_msg_content, posted_by=user) other_disc_msg_content = faker.sentence() other_disc_msg = DiscMsg(content=other_disc_msg_content, posted_by=other_user) discussion = DiscussionFactory(user=user, discussion=[disc_msg, other_disc_msg]) issue_msg_content = faker.sentence() issue_msg = IssueMsg(content=issue_msg_content, posted_by=user) other_issue_msg_content = faker.sentence() other_issue_msg = IssueMsg(content=other_issue_msg_content, posted_by=other_user) issue = IssueFactory(user=user, discussion=[issue_msg, other_issue_msg]) dataset = DatasetFactory(owner=user) reuse = ReuseFactory(owner=user) resource = CommunityResourceFactory(owner=user) activity = UserCreatedDataset.objects().create(actor=user, related_to=dataset) following = Follow.objects().create(follower=user, following=other_user) followed = Follow.objects().create(follower=other_user, following=user) with self.capture_mails() as mails: response = self.delete(url_for('api.me')) self.assertEqual(len(mails), 1) self.assertEqual(mails[0].send_to, set([user.email])) self.assertEqual(mails[0].subject, _('Account deletion')) self.assert204(response) user.reload() organization.reload() discussion.reload() issue.reload() dataset.reload() reuse.reload() resource.reload() activity.reload() # The following are deleted with self.assertRaises(Follow.DoesNotExist): following.reload() # The followers are deleted with self.assertRaises(Follow.DoesNotExist): followed.reload() # The personal data of the user are anonymized self.assertEqual(user.email, '{}@deleted'.format(user.id)) self.assertEqual(user.password, None) self.assertEqual(user.active, False) self.assertEqual(user.first_name, 'DELETED') self.assertEqual(user.last_name, 'DELETED') self.assertFalse(bool(user.avatar)) self.assertEqual(user.avatar_url, None) self.assertEqual(user.website, None) self.assertEqual(user.about, None) # The user is marked as deleted self.assertIsNotNone(user.deleted) # The user is removed from his organizations self.assertEqual(len(organization.members), 1) self.assertEqual(organization.members[0].user.id, other_user.id) # The discussions are kept but the messages are anonymized self.assertEqual(len(discussion.discussion), 2) self.assertEqual(discussion.discussion[0].content, 'DELETED') self.assertEqual(discussion.discussion[1].content, other_disc_msg_content) # The issues are kept and the messages are not anonymized self.assertEqual(len(issue.discussion), 2) self.assertEqual(issue.discussion[0].content, issue_msg_content) self.assertEqual(issue.discussion[1].content, other_issue_msg_content) # The datasets are unchanged self.assertEqual(dataset.owner, user) # The reuses are unchanged self.assertEqual(reuse.owner, user) # The community resources are unchanged self.assertEqual(resource.owner, user) # The activities are unchanged self.assertEqual(activity.actor, user)
def test_resources_metric(self, app): DatasetFactory.create_batch(3, nb_resources=3) site = Site.objects.get(id=app.config['SITE_ID']) assert site.metrics['resources'] == 9
def test_last_update_without_resource(self): user = UserFactory() dataset = DatasetFactory(owner=user) assert_equal_dates(dataset.last_update, dataset.last_modified)
def test_last_update_with_resource(self): user = UserFactory() dataset = DatasetFactory(owner=user) resource = ResourceFactory() dataset.add_resource(resource) assert_equal_dates(dataset.last_update, resource.published)
def test_url_is_stripped(self): url = 'http://www.somewhere.com/with/spaces/ ' dataset = DatasetFactory(resources=[ResourceFactory(url=url)]) assert dataset.resources[0].url == url.strip()
def datasets(self): return DatasetFactory.create_batch(3)
def test_raise_404_if_private(self): '''It should raise a 404 if the dataset is private''' dataset = DatasetFactory(private=True) response = self.get(url_for('datasets.show', dataset=dataset)) self.assert404(response)
def test_bad_url(self): with pytest.raises(db.ValidationError): DatasetFactory(resources=[ResourceFactory(url='not-an-url')])
def test_raise_410_if_deleted(self): '''It should raise a 410 if the dataset is deleted''' dataset = DatasetFactory(deleted=datetime.now()) response = self.get(url_for('datasets.show', dataset=dataset)) self.assert410(response)
def test_legacy_frequencies(self): for oldFreq, newFreq in LEGACY_FREQUENCIES.items(): dataset = DatasetFactory(frequency=oldFreq) assert dataset.frequency == newFreq
def test_send_on_delete(self): dataset = DatasetFactory() with assert_emit(Dataset.on_delete): dataset.deleted = datetime.now() dataset.save()
def test_quality_has_undefined_and_closed_format(self): dataset = DatasetFactory(description='', ) dataset.add_resource(ResourceFactory(format=None)) dataset.add_resource(ResourceFactory(format='xls')) assert dataset.quality['has_only_closed_or_no_formats'] assert dataset.quality['score'] == 0
def test_community_resource_deleted_dataset(self): dataset = DatasetFactory() community_resource = CommunityResourceFactory(dataset=dataset) community_resource.dataset.delete() community_resource.reload() assert community_resource.dataset is None
def setUp(self): self.resource = ResourceFactory() self.dataset = DatasetFactory(resources=[self.resource])
def test_display_no_preview_for_no_resource_extra(): resource = ResourceFactory() DatasetFactory(resources=[resource], extras={ 'geop:dataset_id': 'DID' }) assert resource.preview_url is None
def test_quality_next_update(self): dataset = DatasetFactory(description='', frequency='weekly') assert -6 == dataset.quality['update_in'] assert dataset.quality['frequency'] == 'weekly' assert dataset.quality['score'] == 2
def test_display_no_preview_for_no_dataset_extra(): resource = ResourceFactory(extras={ 'geop:resource_id': 'RID', }) DatasetFactory(resources=[resource]) assert resource.preview_url is None
def test_next_update_weekly(self): dataset = DatasetFactory(frequency='weekly') assert_equal_dates(dataset.next_update, datetime.now() + timedelta(days=7))
def test_ftp_url(self, httpretty): resource = ResourceFactory(url='Ftp://etalab.gouv.fr') DatasetFactory(resources=[resource]) res = self.checker.check(resource) assert res is None
def test_croquemort_not_configured(self, app): dataset = DatasetFactory(visible=True) checker = CroquemortLinkChecker() assert checker.check(dataset.resources[0]) is None
def test_200_if_deleted_but_authorized(self): '''It should not raise a 410 if the can view it''' self.login() dataset = DatasetFactory(deleted=datetime.now(), owner=self.user) response = self.get(url_for('datasets.show', dataset=dataset)) self.assert200(response)
def test_url_is_required(self): with pytest.raises(db.ValidationError): DatasetFactory(resources=[ResourceFactory(url=None)])
def test_json_ld(self): '''It should render a json-ld markup into the dataset page''' resource = ResourceFactory(format='png', description='* Title 1\n* Title 2', metrics={'views': 10}) license = LicenseFactory(url='http://www.datagouv.fr/licence') dataset = DatasetFactory(license=license, tags=['foo', 'bar'], resources=[resource], description='a&éèëù$£', owner=UserFactory(), extras={'foo': 'bar'}) community_resource = CommunityResourceFactory( dataset=dataset, format='csv', description='* Title 1\n* Title 2', metrics={'views': 42}) url = url_for('datasets.show', dataset=dataset) response = self.get(url) self.assert200(response) json_ld = self.get_json_ld(response) self.assertEquals(json_ld['@context'], 'http://schema.org') self.assertEquals(json_ld['@type'], 'Dataset') self.assertEquals(json_ld['@id'], str(dataset.id)) self.assertEquals(json_ld['description'], 'a&éèëù$£') self.assertEquals(json_ld['alternateName'], dataset.slug) self.assertEquals(json_ld['dateCreated'][:16], dataset.created_at.isoformat()[:16]) self.assertEquals(json_ld['dateModified'][:16], dataset.last_modified.isoformat()[:16]) self.assertEquals(json_ld['url'], 'http://localhost{}'.format(url)) self.assertEquals(json_ld['name'], dataset.title) self.assertEquals(json_ld['keywords'], 'bar,foo') self.assertEquals(len(json_ld['distribution']), 1) json_ld_resource = json_ld['distribution'][0] self.assertEquals(json_ld_resource['@type'], 'DataDownload') self.assertEquals(json_ld_resource['@id'], str(resource.id)) self.assertEquals(json_ld_resource['url'], resource.latest) self.assertEquals(json_ld_resource['name'], resource.title) self.assertEquals(json_ld_resource['contentUrl'], resource.url) self.assertEquals(json_ld_resource['dateCreated'][:16], resource.created_at.isoformat()[:16]) self.assertEquals(json_ld_resource['dateModified'][:16], resource.modified.isoformat()[:16]) self.assertEquals(json_ld_resource['datePublished'][:16], resource.published.isoformat()[:16]) self.assertEquals(json_ld_resource['encodingFormat'], 'png') self.assertEquals(json_ld_resource['contentSize'], resource.filesize) self.assertEquals(json_ld_resource['fileFormat'], resource.mime) self.assertEquals(json_ld_resource['description'], 'Title 1 Title 2') self.assertEquals( json_ld_resource['interactionStatistic'], { '@type': 'InteractionCounter', 'interactionType': { '@type': 'DownloadAction', }, 'userInteractionCount': 10, }) self.assertEquals(len(json_ld['contributedDistribution']), 1) json_ld_resource = json_ld['contributedDistribution'][0] self.assertEquals(json_ld_resource['@type'], 'DataDownload') self.assertEquals(json_ld_resource['@id'], str(community_resource.id)) self.assertEquals(json_ld_resource['url'], community_resource.latest) self.assertEquals(json_ld_resource['name'], community_resource.title) self.assertEquals(json_ld_resource['contentUrl'], community_resource.url) self.assertEquals(json_ld_resource['dateCreated'][:16], community_resource.created_at.isoformat()[:16]) self.assertEquals(json_ld_resource['dateModified'][:16], community_resource.modified.isoformat()[:16]) self.assertEquals(json_ld_resource['datePublished'][:16], community_resource.published.isoformat()[:16]) self.assertEquals(json_ld_resource['encodingFormat'], community_resource.format) self.assertEquals(json_ld_resource['contentSize'], community_resource.filesize) self.assertEquals(json_ld_resource['fileFormat'], community_resource.mime) self.assertEquals(json_ld_resource['description'], 'Title 1 Title 2') self.assertEquals( json_ld_resource['interactionStatistic'], { '@type': 'InteractionCounter', 'interactionType': { '@type': 'DownloadAction', }, 'userInteractionCount': 42, }) self.assertEquals(json_ld['extras'], [{ '@type': 'http://schema.org/PropertyValue', 'name': 'foo', 'value': 'bar', }]) self.assertEquals(json_ld['license'], 'http://www.datagouv.fr/licence') self.assertEquals(json_ld['author']['@type'], 'Person')
def test_redirect_datasets(self, client): dataset = DatasetFactory() response = client.get('/en/dataset/%s/' % dataset.slug) assert_redirects(response, url_for('datasets.show', dataset=dataset))