Esempio n. 1
0
    def test_all_dataset_fields(self):
        resources = ResourceFactory.build_batch(3)
        dataset = DatasetFactory(tags=faker.words(nb=3), resources=resources,
                                 frequency='daily', acronym='acro')
        d = dataset_to_rdf(dataset)
        g = d.graph

        assert isinstance(d, RdfResource)
        assert len(list(g.subjects(RDF.type, DCAT.Dataset))) is 1

        assert g.value(d.identifier, RDF.type) == DCAT.Dataset

        assert isinstance(d.identifier, URIRef)
        uri = url_for('datasets.show_redirect',
                      dataset=dataset.id, _external=True)
        assert str(d.identifier) == uri
        assert d.value(DCT.identifier) == Literal(dataset.id)
        assert d.value(DCT.title) == Literal(dataset.title)
        assert d.value(SKOS.altLabel) == Literal(dataset.acronym)
        assert d.value(DCT.description) == Literal(dataset.description)
        assert d.value(DCT.issued) == Literal(dataset.created_at)
        assert d.value(DCT.modified) == Literal(dataset.last_modified)
        assert d.value(DCT.accrualPeriodicity).identifier == FREQ.daily
        expected_tags = set(Literal(t) for t in dataset.tags)
        assert set(d.objects(DCAT.keyword)) == expected_tags
        assert len(list(d.objects(DCAT.distribution))) == len(resources)
Esempio n. 2
0
def build_catalog(site, datasets, format=None):
    '''Build the DCAT catalog for this site'''
    site_url = url_for('site.home_redirect', _external=True)
    catalog_url = url_for('site.rdf_catalog', _external=True)
    graph = Graph(namespace_manager=namespace_manager)
    catalog = graph.resource(URIRef(catalog_url))

    catalog.set(RDF.type, DCAT.Catalog)
    catalog.set(DCT.title, Literal(site.title))
    catalog.set(DCT.language, Literal(current_app.config['DEFAULT_LANGUAGE']))
    catalog.set(FOAF.homepage, URIRef(site_url))

    publisher = graph.resource(BNode())
    publisher.set(RDF.type, FOAF.Organization)
    publisher.set(FOAF.name, Literal(current_app.config['SITE_AUTHOR']))
    catalog.set(DCT.publisher, publisher)

    for dataset in datasets:
        rdf_dataset = dataset_to_rdf(dataset, graph)
        if dataset.owner:
            rdf_dataset.add(DCT.publisher, user_to_rdf(dataset.owner, graph))
        elif dataset.organization:
            rdf_dataset.add(DCT.publisher,
                            organization_to_rdf(dataset.organization, graph))
        catalog.add(DCAT.dataset, rdf_dataset)

    if isinstance(datasets, Paginable):
        paginate_catalog(catalog, graph, datasets, format,
                         'site.rdf_catalog_format')

    return catalog
Esempio n. 3
0
    def test_all_dataset_fields(self):
        resources = ResourceFactory.build_batch(3)
        dataset = DatasetFactory(tags=faker.words(nb=3), resources=resources,
                                 frequency='daily')
        d = dataset_to_rdf(dataset)
        g = d.graph

        self.assertIsInstance(d, RdfResource)
        self.assertEqual(len(list(g.subjects(RDF.type, DCAT.Dataset))), 1)

        self.assertEqual(g.value(d.identifier, RDF.type), DCAT.Dataset)

        self.assertIsInstance(d.identifier, URIRef)
        uri = url_for('datasets.show_redirect',
                      dataset=dataset.id, _external=True)
        self.assertEqual(str(d.identifier), uri)
        self.assertEqual(d.value(DCT.identifier), Literal(dataset.id))
        self.assertEqual(d.value(DCT.title), Literal(dataset.title))
        self.assertEqual(d.value(DCT.description),
                         Literal(dataset.description))
        self.assertEqual(d.value(DCT.issued), Literal(dataset.created_at))
        self.assertEqual(d.value(DCT.modified),
                         Literal(dataset.last_modified))
        self.assertEqual(d.value(DCT.accrualPeriodicity).identifier,
                         FREQ.daily)
        expected_tags = set(Literal(t) for t in dataset.tags)
        self.assertEqual(set(d.objects(DCAT.keyword)), expected_tags)

        self.assertEqual(len(list(d.objects(DCAT.distribution))),
                         len(resources))
Esempio n. 4
0
def build_catalog(site, datasets, format=None):
    '''Build the DCAT catalog for this site'''
    site_url = url_for('site.home_redirect', _external=True)
    catalog_url = url_for('site.rdf_catalog', _external=True)
    graph = Graph(namespace_manager=namespace_manager)
    catalog = graph.resource(URIRef(catalog_url))

    catalog.set(RDF.type, DCAT.Catalog)
    catalog.set(DCT.title, Literal(site.title))
    catalog.set(DCT.language, Literal(current_app.config['DEFAULT_LANGUAGE']))
    catalog.set(FOAF.homepage, URIRef(site_url))

    publisher = graph.resource(BNode())
    publisher.set(RDF.type, FOAF.Organization)
    publisher.set(FOAF.name, Literal(current_app.config['SITE_AUTHOR']))
    catalog.set(DCT.publisher, publisher)

    for dataset in datasets:
        catalog.add(DCAT.dataset, dataset_to_rdf(dataset, graph))

    if isinstance(datasets, Paginable):
        if not format:
            raise ValueError('Pagination requires format')
        catalog.add(RDF.type, HYDRA.Collection)
        catalog.set(HYDRA.totalItems, Literal(datasets.total))
        kwargs = {
            'format': format,
            'page_size': datasets.page_size,
            '_external': True,
        }

        first_url = url_for('site.rdf_catalog_format', page=1, **kwargs)
        page_url = url_for('site.rdf_catalog_format',
                           page=datasets.page,
                           **kwargs)
        last_url = url_for('site.rdf_catalog_format',
                           page=datasets.pages,
                           **kwargs)
        pagination = graph.resource(URIRef(page_url))
        pagination.set(RDF.type, HYDRA.PartialCollectionView)

        pagination.set(HYDRA.first, URIRef(first_url))
        pagination.set(HYDRA.last, URIRef(last_url))
        if datasets.has_next:
            next_url = url_for('site.rdf_catalog_format',
                               page=datasets.page + 1,
                               **kwargs)
            pagination.set(HYDRA.next, URIRef(next_url))
        if datasets.has_prev:
            prev_url = url_for('site.rdf_catalog_format',
                               page=datasets.page - 1,
                               **kwargs)
            pagination.set(HYDRA.previous, URIRef(prev_url))

        catalog.set(HYDRA.view, pagination)

    return catalog
Esempio n. 5
0
    def test_from_external_repository(self):
        dataset = DatasetFactory(extras={
            'dct:identifier': 'an-identifier',
            'uri': 'https://somewhere.org/dataset',
        })

        d = dataset_to_rdf(dataset)

        assert isinstance(d.identifier, URIRef)
        assert str(d.identifier) == 'https://somewhere.org/dataset'
        assert d.value(DCT.identifier) == Literal('an-identifier')
Esempio n. 6
0
    def test_from_external_repository(self):
        dataset = DatasetFactory(extras={
            'dct:identifier': 'an-identifier',
            'uri': 'https://somewhere.org/dataset',
        })

        d = dataset_to_rdf(dataset)

        self.assertIsInstance(d.identifier, URIRef)
        self.assertEqual(str(d.identifier), 'https://somewhere.org/dataset')
        self.assertEqual(d.value(DCT.identifier), Literal('an-identifier'))
Esempio n. 7
0
    def test_temporal_coverage(self):
        start = faker.past_date(start_date='-30d')
        end = faker.future_date(end_date='+30d')
        temporal_coverage = db.DateRange(start=start, end=end)
        dataset = DatasetFactory(temporal_coverage=temporal_coverage)

        d = dataset_to_rdf(dataset)

        pot = d.value(DCT.temporal)

        assert pot.value(RDF.type).identifier == DCT.PeriodOfTime
        assert pot.value(SCHEMA.startDate).toPython() == start
        assert pot.value(SCHEMA.endDate).toPython() == end
Esempio n. 8
0
    def test_with_owner(self):
        user = UserFactory()
        dataset = DatasetFactory(owner=user)
        d = dataset_to_rdf(dataset)
        g = d.graph

        assert isinstance(d, RdfResource)
        datasets = g.subjects(RDF.type, DCAT.Dataset)
        users = g.subjects(RDF.type, FOAF.Person)
        assert len(list(datasets)) == 1
        assert len(list(users)) == 1

        publisher = d.value(DCT.publisher)
        assert publisher.value(RDF.type).identifier == FOAF.Person
Esempio n. 9
0
    def test_with_org(self):
        org = OrganizationFactory()
        dataset = DatasetFactory(organization=org)
        d = dataset_to_rdf(dataset)
        g = d.graph

        assert isinstance(d, RdfResource)
        datasets = g.subjects(RDF.type, DCAT.Dataset)
        organizations = g.subjects(RDF.type, FOAF.Organization)
        assert len(list(datasets)) == 1
        assert len(list(organizations)) == 1

        publisher = d.value(DCT.publisher)
        assert publisher.value(RDF.type).identifier == FOAF.Organization
Esempio n. 10
0
    def test_with_owner(self):
        user = UserFactory()
        dataset = DatasetFactory(owner=user)
        d = dataset_to_rdf(dataset)
        g = d.graph

        self.assertIsInstance(d, RdfResource)
        datasets = g.subjects(RDF.type, DCAT.Dataset)
        users = g.subjects(RDF.type, FOAF.Person)
        self.assertEqual(len(list(datasets)), 1)
        self.assertEqual(len(list(users)), 1)

        publisher = d.value(DCT.publisher)
        self.assertEqual(publisher.value(RDF.type).identifier, FOAF.Person)
Esempio n. 11
0
    def test_minimal(self):
        dataset = DatasetFactory.build()  # Does not have an URL
        d = dataset_to_rdf(dataset)
        g = d.graph

        assert isinstance(d, RdfResource)
        assert len(list(g.subjects(RDF.type, DCAT.Dataset))) is 1

        assert g.value(d.identifier, RDF.type) == DCAT.Dataset

        assert isinstance(d.identifier, BNode)
        assert d.value(DCT.identifier) == Literal(dataset.id)
        assert d.value(DCT.title) == Literal(dataset.title)
        assert d.value(DCT.issued) == Literal(dataset.created_at)
        assert d.value(DCT.modified) == Literal(dataset.last_modified)
Esempio n. 12
0
    def test_with_org(self):
        org = OrganizationFactory()
        dataset = DatasetFactory(organization=org)
        d = dataset_to_rdf(dataset)
        g = d.graph

        self.assertIsInstance(d, RdfResource)
        datasets = g.subjects(RDF.type, DCAT.Dataset)
        organizations = g.subjects(RDF.type, FOAF.Organization)
        self.assertEqual(len(list(datasets)), 1)
        self.assertEqual(len(list(organizations)), 1)

        publisher = d.value(DCT.publisher)
        self.assertEqual(publisher.value(RDF.type).identifier,
                         FOAF.Organization)
Esempio n. 13
0
    def test_minimal(self):
        dataset = DatasetFactory.build()  # Does not have an URL
        d = dataset_to_rdf(dataset)
        g = d.graph

        self.assertIsInstance(d, RdfResource)
        self.assertEqual(len(list(g.subjects(RDF.type, DCAT.Dataset))), 1)

        self.assertEqual(g.value(d.identifier, RDF.type), DCAT.Dataset)

        self.assertIsInstance(d.identifier, BNode)
        self.assertEqual(d.value(DCT.identifier), Literal(dataset.id))
        self.assertEqual(d.value(DCT.title), Literal(dataset.title))
        self.assertEqual(d.value(DCT.issued), Literal(dataset.created_at))
        self.assertEqual(d.value(DCT.modified), Literal(dataset.last_modified))
Esempio n. 14
0
def build_org_catalog(org, datasets, format=None):
    graph = Graph(namespace_manager=namespace_manager)
    org_catalog_url = url_for('organizations.rdf_catalog', org=org.id, _external=True)

    catalog = graph.resource(URIRef(org_catalog_url))
    catalog.set(RDF.type, DCAT.Catalog)
    catalog.set(DCT.publisher, organization_to_rdf(org, graph))

    for dataset in datasets:
        catalog.add(DCAT.dataset, dataset_to_rdf(dataset, graph))

    values = {'org': org.id}
    
    if isinstance(datasets, Paginable):
        paginate_catalog(catalog, graph, datasets, format, 'organizations.rdf_catalog_format', **values)
    
    return catalog