Beispiel #1
0
    def update_config(self, config):
        p.toolkit.add_template_directory(config, 'templates')

        # Check catalog URI on startup to emit a warning if necessary
        utils.catalog_uri()

        # Check custom catalog endpoint
        custom_endpoint = config.get(CUSTOM_ENDPOINT_CONFIG)
        if custom_endpoint:
            if not custom_endpoint[:1] == '/':
                raise Exception(
                    '"{0}" should start with a backslash (/)'.format(
                        CUSTOM_ENDPOINT_CONFIG))
            if '{_format}' not in custom_endpoint:
                raise Exception(
                    '"{0}" should contain {{_format}}'.format(
                        CUSTOM_ENDPOINT_CONFIG))
Beispiel #2
0
    def test_graph_from_catalog_modified_date(self):

        dataset = factories.Dataset()

        s = RDFSerializer()
        g = s.g

        catalog = s.graph_from_catalog()

        eq_(unicode(catalog), utils.catalog_uri())

        assert self._triple(g, catalog, DCT.modified,
                            dataset['metadata_modified'], XSD.dateTime)
Beispiel #3
0
    def test_graph_from_catalog(self):

        s = RDFSerializer()
        g = s.g

        catalog = s.graph_from_catalog()

        eq_(unicode(catalog), utils.catalog_uri())

        # Basic fields
        assert self._triple(g, catalog, RDF.type, DCAT.Catalog)
        assert self._triple(g, catalog, DCT.title,
                            config.get('ckan.site_title'))
        assert self._triple(g, catalog, FOAF.homepage,
                            URIRef(config.get('ckan.site_url')))
        assert self._triple(g, catalog, DCT.language, 'en')
    def graph_from_catalog(self, catalog_dict=None):
        '''
        Creates a graph for the catalog (CKAN site) using the loaded profiles

        The class RDFLib graph (accessible via `serializer.g`) will be updated
        by the loaded profiles.

        Returns the reference to the catalog, which will be an rdflib URIRef.
        '''

        catalog_ref = URIRef(catalog_uri())

        for profile_class in self._profiles:
            profile = profile_class(self.g, self.compatibility_mode)
            profile.graph_from_catalog(catalog_dict, catalog_ref)

        return catalog_ref
Beispiel #5
0
    def test_graph_from_catalog_dict(self):

        catalog_dict = {
            'title': 'My Catalog',
            'description': 'An Open Data Catalog',
            'homepage': 'http://example.com',
            'language': 'de',
        }

        s = RDFSerializer()
        g = s.g

        catalog = s.graph_from_catalog(catalog_dict)

        eq_(unicode(catalog), utils.catalog_uri())

        # Basic fields
        assert self._triple(g, catalog, RDF.type, DCAT.Catalog)
        assert self._triple(g, catalog, DCT.title, catalog_dict['title'])
        assert self._triple(g, catalog, DCT.description,
                            catalog_dict['description'])
        assert self._triple(g, catalog, FOAF.homepage,
                            URIRef(catalog_dict['homepage']))
        assert self._triple(g, catalog, DCT.language, catalog_dict['language'])
    def graph_from_dataset(self, dataset_dict, dataset_ref):

        log.debug('Iniciando graph_from_dataset')
        g = self.g

        for prefix, namespace in namespaces.iteritems():
            log.debug('Binding namespace %s with prefix %s', namespace, prefix)
            g.bind(prefix, namespace)

        g.add((dataset_ref, RDF.type, DCAT.Dataset))

        log.debug('Insertando title')
        #Insertamos el titulo con lang es
        title = dataset_dict.get('title')
        g.add((dataset_ref, DCT.title, Literal(title, lang='es')))

        log.debug('Insertando description')
        #Insertamos el titulo con lang es
        notes = dataset_dict.get('notes')
        g.add((dataset_ref, DCT.description, Literal(notes, lang='es')))

        log.debug('Insertando theme')
        #Insertamos los grupos
        #TODO En el RDF original se anade un rdf:resource
        for group in dataset_dict.get('groups'):
            g.add((dataset_ref, DCAT.theme, Literal(group['display_name'])))

        # Tags
        for tag in dataset_dict.get('tags', []):
            g.add((dataset_ref, DCAT.keyword, Literal(tag['name'], lang='es')))

        #Identifier
        #TODO Pasar la url por configuracion
        dataset_name = dataset_dict.get('name')
        dataset_identifier = '{0}/catalogo/{1}'.format(
            catalog_uri().rstrip('/'), dataset_name)
        g.add((dataset_ref, DCT.identifier,
               Literal(dataset_identifier,
                       datatype='http://www.w3.org/2001/XMLSchema#anyURI')))

        # Dates
        items = [
            ('issued', DCT.issued, ['metadata_created'], Literal),
            ('modified', DCT.modified, ['metadata_modified'], Literal),
        ]
        self._add_date_triples_from_dict(dataset_dict, dataset_ref, items)

        publisher_uri = '{0}/catalogo/{1}'.format(
            catalog_uri().rstrip('/'), dataset_dict['organization']['name'])

        if publisher_uri:
            publisher_details = URIRef(publisher_uri)
        else:
            # No organization nor publisher_uri
            publisher_details = BNode()

        g.add((dataset_ref, DCT.publisher, publisher_details))

        #License
        license_url = dataset_dict.get('license_url')
        g.add((dataset_ref, DCT.license, URIRef(license_url)))

        #Spatial
        #TODO Revisar los namespaces
        spatial = BNode()

        spatial_title = 'aragon'
        spatial_comunidad = 'aragon2'
        spatial_url = 'http://opendata.aragon.es/recurso/territorio/ComunidadAutonoma/Aragon?api_key=e103dc13eb276ad734e680f5855f20c6'

        g.add((spatial, DCT.title, Literal(spatial_title, lang='es')))
        g.add((spatial, ARAGODEF.ComunidadAutonoma,
               Literal(spatial_comunidad, lang='es')))
        g.add((spatial, RDF.resource, Literal(spatial_url)))
        g.add((dataset_ref, DCT.spatial, spatial))

        #Temporal
        #TODO Introduce nodos Description y no utiliza los prefijos para los namespaces custom
        start = self._get_dataset_value(dataset_dict, 'TemporalFrom')
        end = self._get_dataset_value(dataset_dict, 'TemporalUntil')
        if start or end:
            temporal_extent = BNode()
            timeinterval_extent = BNode()

            g.add((temporal_extent, TIME.Interval, timeinterval_extent))
            g.add((timeinterval_extent, RDF.type,
                   URIRef('http://purl.org/dc/terms/PeriodOfTime')))

            if start:
                hasBeginning = BNode()
                g.add((timeinterval_extent, TIME.hasBeginning, hasBeginning))

                instant_begin = BNode()
                g.add((hasBeginning, TIME.Instant, instant_begin))
                g.add((instant_begin, TIME.inXSDDate,
                       Literal(
                           start,
                           datatype='http://www.w3.org/2001/XMLSchema#date')))
            if end:
                hasEnd = BNode()
                g.add((timeinterval_extent, TIME.hasEnd, hasEnd))

                instant_end = BNode()
                g.add((hasEnd, TIME.Instant, instant_end))
                g.add((instant_end, TIME.inXSDDate,
                       Literal(
                           end,
                           datatype='http://www.w3.org/2001/XMLSchema#date')))

            g.add((dataset_ref, DCT.temporal, temporal_extent))

        #Incluimos el extra Granularity
        granularity = self._get_dataset_value(dataset_dict, 'Granularity')
        if granularity:
            ref_granularity_extent = BNode()

            g.add((ref_granularity_extent, RDFS.label,
                   Literal('Granularity', lang='es')))
            g.add((ref_granularity_extent, RDFS.value,
                   Literal(granularity, lang='es')))

            g.add((dataset_ref, DCT.references, ref_granularity_extent))

        #incluimos el extra Diccionario de datos y Data Dictionary URL0
        data_dictionary = self._get_dataset_value(dataset_dict,
                                                  'Data Dictionary')
        data_dictionary_url = self._get_dataset_value(dataset_dict,
                                                      'Data Dictionary URL0')
        if data_dictionary and data_dictionary_url:
            ref_dictionary_extent = BNode()

            g.add((ref_dictionary_extent, RDFS.label,
                   Literal('Data Dictionary', lang='es')))
            g.add((ref_dictionary_extent, RDFS.value,
                   Literal(data_dictionary, lang='es')))
            g.add((ref_dictionary_extent, RDF.resource,
                   Literal(data_dictionary_url)))

            g.add((dataset_ref, DCT.references, ref_dictionary_extent))

        # Resources
        for resource_dict in dataset_dict.get('resources', []):

            distribution = URIRef(resource_uri(resource_dict))

            g.add((dataset_ref, DCAT.Distribution, distribution))

            #Identifier
            identifier = resource_uri(resource_dict)
            g.add(
                (distribution, DCT.identifier,
                 Literal(identifier,
                         datatype='http://www.w3.org/2001/XMLSchema#anyURI')))

            #title
            title = resource_dict.get('name')
            g.add((distribution, DCT.title, Literal(title, lang='es')))

            #Description
            description = resource_dict.get('description')
            g.add(
                (distribution, DCT.description, Literal(description,
                                                        lang='es')))

            #accessUrl
            # URL
            url = resource_dict.get('url')
            download_url = resource_dict.get('download_url')
            if download_url:
                g.add(
                    (distribution, DCAT.downloadURL,
                     Literal(
                         download_url,
                         datatype='http://www.w3.org/2001/XMLSchema#anyURI')))
            if (url and not download_url) or (url and url != download_url):
                g.add(
                    (distribution, DCAT.accessURL,
                     Literal(
                         url,
                         datatype='http://www.w3.org/2001/XMLSchema#anyURI')))

            #format
            format_res = resource_dict.get('format')
            #TODO En el importador nos se esta rellenando el mimetype_inner
            mimetype_inner_res = resource_dict.get('mimetype_inner')
            if format_res:

                format_extent = BNode()
                mediatype_extent = BNode()

                g.add((mediatype_extent, RDFS.value,
                       Literal(mimetype_inner_res)))
                g.add((mediatype_extent, RDFS.label, Literal(format_res)))

                g.add((format_extent, DCT.MediaType, mediatype_extent))
                g.add((distribution, DCT['format'], format_extent))