def test_all_dataset_fields(self): resources = ResourceFactory.build_batch(3) dataset = DatasetFactory(tags=faker.words(nb=3), resources=resources, frequency='daily', acronym='acro') d = dataset_to_rdf(dataset) g = d.graph assert isinstance(d, RdfResource) assert len(list(g.subjects(RDF.type, DCAT.Dataset))) is 1 assert g.value(d.identifier, RDF.type) == DCAT.Dataset assert isinstance(d.identifier, URIRef) uri = url_for('datasets.show_redirect', dataset=dataset.id, _external=True) assert str(d.identifier) == uri assert d.value(DCT.identifier) == Literal(dataset.id) assert d.value(DCT.title) == Literal(dataset.title) assert d.value(SKOS.altLabel) == Literal(dataset.acronym) assert d.value(DCT.description) == Literal(dataset.description) assert d.value(DCT.issued) == Literal(dataset.created_at) assert d.value(DCT.modified) == Literal(dataset.last_modified) assert d.value(DCT.accrualPeriodicity).identifier == FREQ.daily expected_tags = set(Literal(t) for t in dataset.tags) assert set(d.objects(DCAT.keyword)) == expected_tags assert len(list(d.objects(DCAT.distribution))) == len(resources)
def build_catalog(site, datasets, format=None): '''Build the DCAT catalog for this site''' site_url = url_for('site.home_redirect', _external=True) catalog_url = url_for('site.rdf_catalog', _external=True) graph = Graph(namespace_manager=namespace_manager) catalog = graph.resource(URIRef(catalog_url)) catalog.set(RDF.type, DCAT.Catalog) catalog.set(DCT.title, Literal(site.title)) catalog.set(DCT.language, Literal(current_app.config['DEFAULT_LANGUAGE'])) catalog.set(FOAF.homepage, URIRef(site_url)) publisher = graph.resource(BNode()) publisher.set(RDF.type, FOAF.Organization) publisher.set(FOAF.name, Literal(current_app.config['SITE_AUTHOR'])) catalog.set(DCT.publisher, publisher) for dataset in datasets: rdf_dataset = dataset_to_rdf(dataset, graph) if dataset.owner: rdf_dataset.add(DCT.publisher, user_to_rdf(dataset.owner, graph)) elif dataset.organization: rdf_dataset.add(DCT.publisher, organization_to_rdf(dataset.organization, graph)) catalog.add(DCAT.dataset, rdf_dataset) if isinstance(datasets, Paginable): paginate_catalog(catalog, graph, datasets, format, 'site.rdf_catalog_format') return catalog
def test_all_dataset_fields(self): resources = ResourceFactory.build_batch(3) dataset = DatasetFactory(tags=faker.words(nb=3), resources=resources, frequency='daily') d = dataset_to_rdf(dataset) g = d.graph self.assertIsInstance(d, RdfResource) self.assertEqual(len(list(g.subjects(RDF.type, DCAT.Dataset))), 1) self.assertEqual(g.value(d.identifier, RDF.type), DCAT.Dataset) self.assertIsInstance(d.identifier, URIRef) uri = url_for('datasets.show_redirect', dataset=dataset.id, _external=True) self.assertEqual(str(d.identifier), uri) self.assertEqual(d.value(DCT.identifier), Literal(dataset.id)) self.assertEqual(d.value(DCT.title), Literal(dataset.title)) self.assertEqual(d.value(DCT.description), Literal(dataset.description)) self.assertEqual(d.value(DCT.issued), Literal(dataset.created_at)) self.assertEqual(d.value(DCT.modified), Literal(dataset.last_modified)) self.assertEqual(d.value(DCT.accrualPeriodicity).identifier, FREQ.daily) expected_tags = set(Literal(t) for t in dataset.tags) self.assertEqual(set(d.objects(DCAT.keyword)), expected_tags) self.assertEqual(len(list(d.objects(DCAT.distribution))), len(resources))
def build_catalog(site, datasets, format=None): '''Build the DCAT catalog for this site''' site_url = url_for('site.home_redirect', _external=True) catalog_url = url_for('site.rdf_catalog', _external=True) graph = Graph(namespace_manager=namespace_manager) catalog = graph.resource(URIRef(catalog_url)) catalog.set(RDF.type, DCAT.Catalog) catalog.set(DCT.title, Literal(site.title)) catalog.set(DCT.language, Literal(current_app.config['DEFAULT_LANGUAGE'])) catalog.set(FOAF.homepage, URIRef(site_url)) publisher = graph.resource(BNode()) publisher.set(RDF.type, FOAF.Organization) publisher.set(FOAF.name, Literal(current_app.config['SITE_AUTHOR'])) catalog.set(DCT.publisher, publisher) for dataset in datasets: catalog.add(DCAT.dataset, dataset_to_rdf(dataset, graph)) if isinstance(datasets, Paginable): if not format: raise ValueError('Pagination requires format') catalog.add(RDF.type, HYDRA.Collection) catalog.set(HYDRA.totalItems, Literal(datasets.total)) kwargs = { 'format': format, 'page_size': datasets.page_size, '_external': True, } first_url = url_for('site.rdf_catalog_format', page=1, **kwargs) page_url = url_for('site.rdf_catalog_format', page=datasets.page, **kwargs) last_url = url_for('site.rdf_catalog_format', page=datasets.pages, **kwargs) pagination = graph.resource(URIRef(page_url)) pagination.set(RDF.type, HYDRA.PartialCollectionView) pagination.set(HYDRA.first, URIRef(first_url)) pagination.set(HYDRA.last, URIRef(last_url)) if datasets.has_next: next_url = url_for('site.rdf_catalog_format', page=datasets.page + 1, **kwargs) pagination.set(HYDRA.next, URIRef(next_url)) if datasets.has_prev: prev_url = url_for('site.rdf_catalog_format', page=datasets.page - 1, **kwargs) pagination.set(HYDRA.previous, URIRef(prev_url)) catalog.set(HYDRA.view, pagination) return catalog
def test_from_external_repository(self): dataset = DatasetFactory(extras={ 'dct:identifier': 'an-identifier', 'uri': 'https://somewhere.org/dataset', }) d = dataset_to_rdf(dataset) assert isinstance(d.identifier, URIRef) assert str(d.identifier) == 'https://somewhere.org/dataset' assert d.value(DCT.identifier) == Literal('an-identifier')
def test_from_external_repository(self): dataset = DatasetFactory(extras={ 'dct:identifier': 'an-identifier', 'uri': 'https://somewhere.org/dataset', }) d = dataset_to_rdf(dataset) self.assertIsInstance(d.identifier, URIRef) self.assertEqual(str(d.identifier), 'https://somewhere.org/dataset') self.assertEqual(d.value(DCT.identifier), Literal('an-identifier'))
def test_temporal_coverage(self): start = faker.past_date(start_date='-30d') end = faker.future_date(end_date='+30d') temporal_coverage = db.DateRange(start=start, end=end) dataset = DatasetFactory(temporal_coverage=temporal_coverage) d = dataset_to_rdf(dataset) pot = d.value(DCT.temporal) assert pot.value(RDF.type).identifier == DCT.PeriodOfTime assert pot.value(SCHEMA.startDate).toPython() == start assert pot.value(SCHEMA.endDate).toPython() == end
def test_with_owner(self): user = UserFactory() dataset = DatasetFactory(owner=user) d = dataset_to_rdf(dataset) g = d.graph assert isinstance(d, RdfResource) datasets = g.subjects(RDF.type, DCAT.Dataset) users = g.subjects(RDF.type, FOAF.Person) assert len(list(datasets)) == 1 assert len(list(users)) == 1 publisher = d.value(DCT.publisher) assert publisher.value(RDF.type).identifier == FOAF.Person
def test_with_org(self): org = OrganizationFactory() dataset = DatasetFactory(organization=org) d = dataset_to_rdf(dataset) g = d.graph assert isinstance(d, RdfResource) datasets = g.subjects(RDF.type, DCAT.Dataset) organizations = g.subjects(RDF.type, FOAF.Organization) assert len(list(datasets)) == 1 assert len(list(organizations)) == 1 publisher = d.value(DCT.publisher) assert publisher.value(RDF.type).identifier == FOAF.Organization
def test_with_owner(self): user = UserFactory() dataset = DatasetFactory(owner=user) d = dataset_to_rdf(dataset) g = d.graph self.assertIsInstance(d, RdfResource) datasets = g.subjects(RDF.type, DCAT.Dataset) users = g.subjects(RDF.type, FOAF.Person) self.assertEqual(len(list(datasets)), 1) self.assertEqual(len(list(users)), 1) publisher = d.value(DCT.publisher) self.assertEqual(publisher.value(RDF.type).identifier, FOAF.Person)
def test_minimal(self): dataset = DatasetFactory.build() # Does not have an URL d = dataset_to_rdf(dataset) g = d.graph assert isinstance(d, RdfResource) assert len(list(g.subjects(RDF.type, DCAT.Dataset))) is 1 assert g.value(d.identifier, RDF.type) == DCAT.Dataset assert isinstance(d.identifier, BNode) assert d.value(DCT.identifier) == Literal(dataset.id) assert d.value(DCT.title) == Literal(dataset.title) assert d.value(DCT.issued) == Literal(dataset.created_at) assert d.value(DCT.modified) == Literal(dataset.last_modified)
def test_with_org(self): org = OrganizationFactory() dataset = DatasetFactory(organization=org) d = dataset_to_rdf(dataset) g = d.graph self.assertIsInstance(d, RdfResource) datasets = g.subjects(RDF.type, DCAT.Dataset) organizations = g.subjects(RDF.type, FOAF.Organization) self.assertEqual(len(list(datasets)), 1) self.assertEqual(len(list(organizations)), 1) publisher = d.value(DCT.publisher) self.assertEqual(publisher.value(RDF.type).identifier, FOAF.Organization)
def test_minimal(self): dataset = DatasetFactory.build() # Does not have an URL d = dataset_to_rdf(dataset) g = d.graph self.assertIsInstance(d, RdfResource) self.assertEqual(len(list(g.subjects(RDF.type, DCAT.Dataset))), 1) self.assertEqual(g.value(d.identifier, RDF.type), DCAT.Dataset) self.assertIsInstance(d.identifier, BNode) self.assertEqual(d.value(DCT.identifier), Literal(dataset.id)) self.assertEqual(d.value(DCT.title), Literal(dataset.title)) self.assertEqual(d.value(DCT.issued), Literal(dataset.created_at)) self.assertEqual(d.value(DCT.modified), Literal(dataset.last_modified))
def build_org_catalog(org, datasets, format=None): graph = Graph(namespace_manager=namespace_manager) org_catalog_url = url_for('organizations.rdf_catalog', org=org.id, _external=True) catalog = graph.resource(URIRef(org_catalog_url)) catalog.set(RDF.type, DCAT.Catalog) catalog.set(DCT.publisher, organization_to_rdf(org, graph)) for dataset in datasets: catalog.add(DCAT.dataset, dataset_to_rdf(dataset, graph)) values = {'org': org.id} if isinstance(datasets, Paginable): paginate_catalog(catalog, graph, datasets, format, 'organizations.rdf_catalog_format', **values) return catalog