예제 #1
0
def dataset_uri(dataset_dict):
    '''
    Returns an URI for the dataset

    This will be used to uniquely reference the dataset on the RDF
    serializations.

    The value will be the first found of:

        1. The value of the `uri` field
        2. The value of an extra with key `uri`
        3. `catalog_uri()` + '/dataset/' + `id` field

    Check the documentation for `catalog_uri()` for the recommended ways of
    setting it.

    Returns a string with the dataset URI.
    '''

    uri = dataset_dict.get('uri')
    if not uri:
        for extra in dataset_dict.get('extras', []):
            if extra['key'] == 'uri' and extra['value'] != 'None':
                uri = extra['value']
                break
    if not uri and dataset_dict.get('id'):
        uri = '{0}/catalogo/{1}'.format(catalog_uri().rstrip('/'),
                                        dataset_dict['id'])
    if not uri:
        uri = '{0}/catalogo/{1}'.format(catalog_uri().rstrip('/'),
                                        str(uuid.uuid4()))
        log.warning('Using a random id for dataset URI')

    return uri
예제 #2
0
    def update_config(self, config):
        p.toolkit.add_template_directory(config, 'templates')

        # Check catalog URI on startup to emit a warning if necessary
        utils.catalog_uri()

        # Check custom catalog endpoint
        custom_endpoint = config.get(CUSTOM_ENDPOINT_CONFIG)
        if custom_endpoint:
            if not custom_endpoint[:1] == '/':
                raise Exception(
                    '"{0}" should start with a backslash (/)'.format(
                        CUSTOM_ENDPOINT_CONFIG))
            if '{_format}' not in custom_endpoint:
                raise Exception('"{0}" should contain {{_format}}'.format(
                    CUSTOM_ENDPOINT_CONFIG))
예제 #3
0
    def _dge_harvest_dataset_uri(self, dataset_dict):
        '''
        Returns an URI for the dataset
    
        This will be used to uniquely reference the dataset on the RDF
        serializations.
    
        The value will be the first found of:
    
            1. `catalog_uri()` + '/catalogo/' + `name` field
            2. The value of the `uri` field
            3. The value of an extra with key `uri`
            4. `catalog_uri()` + '/catalogo/' + `id` field
    
        Check the documentation for `catalog_uri()` for the recommended ways of
        setting it.
    
        Returns a string with the dataset URI.
        '''

        if dataset_dict.get('name'):
            uri = '{0}/catalogo/{1}'.format(catalog_uri().rstrip('/'),
                                            dataset_dict['name'])
        if not uri:
            uri = dataset_uri(dataset_dict)
        return uri
예제 #4
0
    def graph_from_catalog(self, catalog_dict, catalog_ref):

        g = self.g

        for prefix, namespace in it_namespaces.iteritems():
            g.bind(prefix, namespace)

        ### Add a further type for the Catalog node
        g.add((catalog_ref, RDF.type, DCATAPIT.Catalog))

        ### Replace homepage
        # Try to avoid to have the Catalog URIRef identical to the homepage URI
        g.remove(
            (catalog_ref, FOAF.homepage, URIRef(config.get('ckan.site_url'))))
        g.add((catalog_ref, FOAF.homepage, URIRef(catalog_uri() + '/#')))

        ### publisher
        pub_agent_name = config.get('ckanext.dcatapit_configpublisher_name',
                                    'unknown')
        pub_agent_id = config.get(
            'ckanext.dcatapit_configpublisher_code_identifier', 'unknown')

        agent = BNode()
        self.g.add((agent, RDF['type'], DCATAPIT.Agent))
        self.g.add((agent, RDF['type'], FOAF.Agent))
        self.g.add((catalog_ref, DCT.publisher, agent))
        self.g.add((agent, FOAF.name, Literal(pub_agent_name)))
        self.g.add((agent, DCT.identifier, Literal(pub_agent_id)))

        ### issued date
        issued = config.get('ckanext.dcatapit_config.catalog_issued',
                            '1900-01-01')
        if issued:
            self._add_date_triple(catalog_ref, DCT.issued, issued)

        ### theme taxonomy

        # <dcat:themeTaxonomy rdf:resource="http://publications.europa.eu/resource/authority/data-theme"/>

        # <skos:ConceptScheme rdf:about="http://publications.europa.eu/resource/authority/data-theme">
        #    <dct:title xml:lang="it">Il Vocabolario Data Theme</dct:title>
        # </skos:ConceptScheme>

        taxonomy = URIRef(THEME_BASE_URI.rstrip('/'))
        self.g.add((catalog_ref, DCAT.themeTaxonomy, taxonomy))
        self.g.add((taxonomy, RDF.type, SKOS.ConceptScheme))
        self.g.add((taxonomy, DCT.title,
                    Literal('Il Vocabolario Data Theme', lang='it')))

        ### language
        langs = config.get('ckan.locales_offered', 'it')

        for lang_offered in langs.split():
            lang_code = lang_mapping_ckan_to_voc.get(lang_offered)
            if lang_code:
                self.g.add((catalog_ref, DCT.language,
                            URIRef(LANG_BASE_URI + lang_code)))

        self.g.remove(
            (catalog_ref, DCT.language, Literal(config.get(DEFAULT_LANG))))
예제 #5
0
def resource_uri(resource_dict):
    '''
    Returns an URI for the resource

    This will be used to uniquely reference the resource on the RDF
    serializations.

    The value will be the first found of:

        1. The value of the `uri` field
        2. `catalog_uri()` + '/dataset/' + `package_id` + '/resource/'
            + `id` field

    Check the documentation for `catalog_uri()` for the recommended ways of
    setting it.

    Returns a string with the resource URI.
    '''

    uri = resource_dict.get('uri')
    if not uri or uri == 'None':
        dataset_id = dataset_id_from_resource(resource_dict)

        uri = '{0}/catalog/{1}/resource/{2}'.format(catalog_uri().rstrip('/'),
                                                    dataset_id,
                                                    resource_dict['id'])

    return uri
예제 #6
0
    def update_config(self, config):
        p.toolkit.add_template_directory(config, 'templates')

        # Check catalog URI on startup to emit a warning if necessary
        catalog_uri()

        # Check custom catalog endpoint
        custom_endpoint = config.get(CUSTOM_ENDPOINT_CONFIG)
        if custom_endpoint:
            if not custom_endpoint[:1] == '/':
                raise Exception(
                    '"{0}" should start with a backslash (/)'.format(
                        CUSTOM_ENDPOINT_CONFIG))
            if '{_format}' not in custom_endpoint:
                raise Exception(
                    '"{0}" should contain {{_format}}'.format(
                        CUSTOM_ENDPOINT_CONFIG))
예제 #7
0
    def test_graph_from_catalog_modified_date(self):

        dataset = factories.Dataset()

        s = RDFSerializer()
        g = s.g

        catalog = s.graph_from_catalog()

        eq_(unicode(catalog), utils.catalog_uri())

        assert self._triple(g, catalog, DCT.modified, dataset['metadata_modified'], XSD.dateTime)
    def test_graph_from_catalog_modified_date(self):

        dataset = factories.Dataset()

        s = RDFSerializer()
        g = s.g

        catalog = s.graph_from_catalog()

        eq_(unicode(catalog), utils.catalog_uri())

        assert self._triple(g, catalog, DCT.modified, dataset['metadata_modified'], XSD.dateTime)
예제 #9
0
def object_uri(record_dict):
    """
    Returns an URI for an object

    This will be used to uniquely reference the dataset on the RDF
    serializations.

    Returns a string with the dataset URI.
    """

    uuid = record_dict.get('uuid')
    uri = '{0}/object/{1}'.format(catalog_uri().rstrip('/'), uuid)
    return uri
예제 #10
0
    def test_graph_from_catalog(self):

        s = RDFSerializer()
        g = s.g

        catalog = s.graph_from_catalog()

        eq_(unicode(catalog), utils.catalog_uri())

        # Basic fields
        assert self._triple(g, catalog, RDF.type, DCAT.Catalog)
        assert self._triple(g, catalog, DCT.title, config.get('ckan.site_title'))
        assert self._triple(g, catalog, FOAF.homepage, config.get('ckan.site_url'))
        assert self._triple(g, catalog, DCT.language, 'en')
    def test_graph_from_catalog(self):

        s = RDFSerializer()
        g = s.g

        catalog = s.graph_from_catalog()

        eq_(unicode(catalog), utils.catalog_uri())

        # Basic fields
        assert self._triple(g, catalog, RDF.type, DCAT.Catalog)
        assert self._triple(g, catalog, DCT.title, config.get('ckan.site_title'))
        assert self._triple(g, catalog, FOAF.homepage, URIRef(config.get('ckan.site_url')))
        assert self._triple(g, catalog, DCT.language, 'en')
예제 #12
0
    def graph_from_catalog(self, catalog_dict=None):
        '''
        Creates a graph for the catalog (CKAN site) using the loaded profiles

        The class RDFLib graph (accessible via `serializer.g`) will be updated
        by the loaded profiles.

        Returns the reference to the catalog, which will be an rdflib URIRef.
        '''

        catalog_ref = URIRef(catalog_uri())

        for profile_class in self._profiles:
            profile = profile_class(self.g, self.compatibility_mode)
            profile.graph_from_catalog(catalog_dict, catalog_ref)

        return catalog_ref
예제 #13
0
    def _page_url(page):

        base_url = catalog_uri()
        base_url = '%s%s' % (
            base_url, toolkit.request.path)

        params = [p for p in toolkit.request.params.items()
                  if p[0] != 'page' and p[0] in ('modified_since', 'profiles', 'q', 'fq')]
        if params:
            qs = '&'.join(['{0}={1}'.format(p[0], p[1]) for p in params])
            return '{0}?{1}&page={2}'.format(
                base_url,
                qs,
                page
            )
        else:
            return '{0}?page={1}'.format(
                base_url,
                page
            )
예제 #14
0
def organization_uri(orga_dict):
    '''
    Returns an URI for the organization

    This will be used to uniquely reference the organization on the RDF serializations.

    The value will be

        `catalog_uri()` + '/organization/' + `orga_id`

    Check the documentation for `catalog_uri()` for the recommended ways of
    setting it.

    Returns a string with the resource URI.
    '''

    uri = '{0}/organization/{1}'.format(catalog_uri().rstrip('/'),
                                        orga_dict.get('id', None))

    return uri
    def test_graph_from_catalog_dict(self):

        catalog_dict = {
            "title": "My Catalog",
            "description": "An Open Data Catalog",
            "homepage": "http://example.com",
            "language": "de",
        }

        s = RDFSerializer()
        g = s.g

        catalog = s.graph_from_catalog(catalog_dict)

        eq_(unicode(catalog), utils.catalog_uri())

        # Basic fields
        assert self._triple(g, catalog, RDF.type, DCAT.Catalog)
        assert self._triple(g, catalog, DCT.title, catalog_dict["title"])
        assert self._triple(g, catalog, DCT.description, catalog_dict["description"])
        assert self._triple(g, catalog, FOAF.homepage, URIRef(catalog_dict["homepage"]))
        assert self._triple(g, catalog, DCT.language, catalog_dict["language"])
예제 #16
0
    def test_graph_from_catalog_dict(self):

        catalog_dict = {
            'title': 'My Catalog',
            'description': 'An Open Data Catalog',
            'homepage': 'http://example.com',
            'language': 'de',
        }

        s = RDFSerializer()
        g = s.g

        catalog = s.graph_from_catalog(catalog_dict)

        eq_(unicode(catalog), utils.catalog_uri())

        # Basic fields
        assert self._triple(g, catalog, RDF.type, DCAT.Catalog)
        assert self._triple(g, catalog, DCT.title, catalog_dict['title'])
        assert self._triple(g, catalog, DCT.description, catalog_dict['description'])
        assert self._triple(g, catalog, FOAF.homepage, catalog_dict['homepage'])
        assert self._triple(g, catalog, DCT.language, catalog_dict['language'])
    def test_graph_from_catalog_dict(self):

        catalog_dict = {
            'title': 'My Catalog',
            'description': 'An Open Data Catalog',
            'homepage': 'http://example.com',
            'language': 'de',
        }

        s = RDFSerializer()
        g = s.g

        catalog = s.graph_from_catalog(catalog_dict)

        eq_(unicode(catalog), utils.catalog_uri())

        # Basic fields
        assert self._triple(g, catalog, RDF.type, DCAT.Catalog)
        assert self._triple(g, catalog, DCT.title, catalog_dict['title'])
        assert self._triple(g, catalog, DCT.description, catalog_dict['description'])
        assert self._triple(g, catalog, FOAF.homepage, URIRef(catalog_dict['homepage']))
        assert self._triple(g, catalog, DCT.language, catalog_dict['language'])
예제 #18
0
    def test_graph_from_catalog_dict_language_uri_ref(self):

        catalog_dict = {
            'title':
            'My Catalog',
            'description':
            'An Open Data Catalog',
            'homepage':
            'http://example.com',
            'language':
            'http://publications.europa.eu/resource/authority/language/ITA',
        }

        s = RDFSerializer()
        g = s.g

        catalog = s.graph_from_catalog(catalog_dict)

        eq_(unicode(catalog), utils.catalog_uri())

        # language field
        assert self._triple(g, catalog, DCT.language,
                            URIRef(catalog_dict['language']))
예제 #19
0
    def graph_from_dataset(self, dataset_dict, dataset_ref):
        namespaces = {
            'dc': DC,
            'dcat': DCAT,
            'adms': ADMS,
            'vcard': VCARD,
            'foaf': FOAF,
            'schema': SCHEMA,
            'time': TIME,
            'skos': SKOS,
            'locn': LOCN,
            'gsp': GSP,
            'owl': OWL,
            'tdwgi': TDWGI,
            'aiiso': AIISO,
            'mads': MADS,
            'void': VOID,
            'cc': CC,
            'org': ORG
        }

        g = self.g

        context = self.get_context()

        # Add some more namespaces
        for prefix, namespace in namespaces.iteritems():
            g.bind(prefix, namespace)

        # Add #dataset to the dataset URI to denote the conceptual object - the actual dataset
        # Without #dataset is the metadata - and that needs CC0 for BBC res
        dataset_uri = URIRef(dataset_ref + '#dataset')

        # Add dataset description (NB: This isn't the dataset - this is the dataset metadata)
        dataset_metadata_uri = URIRef(dataset_ref)
        g.add((dataset_metadata_uri, RDF.type, VOID.DatasetDescription))
        g.add((dataset_metadata_uri, CC.license, URIRef(METADATA_LICENCE)))
        # This metadata describes #dataset
        g.add((dataset_metadata_uri, FOAF.primaryTopic, dataset_uri))
        # If it is possible to access the RDF via dataset name, not uuid
        # In which case add a sameAs for the dataset name uri
        if dataset_dict['name'] in request.environ.get('CKAN_CURRENT_URL'):
            alt_dataset_uri = '{0}/dataset/{1}'.format(catalog_uri().rstrip('/'), dataset_dict['name'])
            # Add a sameAs link
            g.add((dataset_metadata_uri, OWL.sameAs, URIRef(alt_dataset_uri)))
        # And now we can describe the dataset itself
        g.add((dataset_uri, RDF.type, DCAT.Dataset))

        # Basic fields
        items = [
            ('title', DC.title, None, Literal),
            ('url', DCAT.landingPage, None, URIRef),
        ]
        self._add_triples_from_dict(dataset_dict, dataset_uri, items)

        if dataset_dict.get('notes', None):
            g.add((dataset_uri, DCAT.description, Literal(dataset_dict['notes'])))

        # Add DOI
        doi = dataset_dict.get('doi', None)
        if doi:
            g.set((dataset_uri, DC.identifier, URIRef(doi)))

        # Tags
        for tag in dataset_dict.get('tags', []):
            g.add((dataset_uri, DCAT.keyword, Literal(tag['name'])))

        # Dates
        items = [
            ('issued', DC.issued, ['metadata_created'], Literal),
            ('modified', DC.modified, ['metadata_modified'], Literal),
        ]
        self._add_date_triples_from_dict(dataset_dict, dataset_uri, items)

        # We don't have maintainers - whoever added it to the portal is the maintainer
        creator_user_id = dataset_dict['creator_user_id']
        user = toolkit.get_action('user_show')(context, {
            'id': creator_user_id,
        })

        # Add publisher
        nhm_uri = self.graph_add_museum()

        if user:
            # if this is the admin user, just add the contactPoint
            if user['sysadmin'] and user['name'] == 'admin':
                g.add((dataset_uri, DCAT.contactPoint, nhm_uri))
            else:
                user_uri = URIRef(self.user_uri(creator_user_id))
                g.add((user_uri, RDF.type, VCARD.Person))
                if 'fullname' in user:
                    g.add((user_uri, VCARD.fn, Literal(user['fullname'])))
                if 'email' in user:
                    g.add((user_uri, VCARD.hasEmail, URIRef(user['email'])))
                # All users are members of the NHM
                g.add((user_uri, MADS.hasAffiliation, nhm_uri))
                # This user is the contact point for the dataset
                g.add((dataset_uri, DCAT.contactPoint, user_uri))

        # Add update frequency
        update_frequency = dataset_dict.get('update_frequency', None)
        if update_frequency:
            code = self._get_update_frequency_code(update_frequency)
            if code:
                g.set((dataset_uri, DC.accrualPeriodicity, URIRef(SDMX_CODE[code])))

        # Add licence - use URL if we have it
        # Otherwise try using the licence title
        if dataset_dict.get('license_url', None):
            g.set((dataset_uri, DC.license, URIRef(dataset_dict['license_url'])))
        elif dataset_dict.get('license_title', None):
            g.set((dataset_uri, DC.license, Literal(dataset_dict['license_title'])))

        # Add categories
        # Create concept schema for all categories, add link any related to the dataset
        for category in dataset_dict['dataset_category']:
            # print category
            n = BNode()
            g.add((n, rdflib.RDF.type, SKOS.Concept))
            g.add((n, SKOS.prefLabel, Literal(category)))
            g.add((dataset_uri, DCAT.theme, n))

        # Temporal extent
        temporal_extent = dataset_dict.get('temporal_extent', None)
        if temporal_extent:
            g.add((dataset_uri, DC.temporal, Literal(temporal_extent)))

        author = dataset_dict.get('author', None)
        if author:

            if author == 'Natural History Museum':
                g.add((dataset_uri, DC.creator, nhm_uri))
            else:
                author_details = BNode()
                g.add((author_details, VCARD.fn, Literal(author)))
                if dataset_dict.get('author_email', None):
                    g.add((author_details, VCARD.hasEmail, Literal(dataset_dict['author_email'])))
                g.add((author_details, RDF.type, VCARD.Person))
                g.add((dataset_uri, DC.creator, author_details))
                affiliation = dataset_dict.get('affiliation', None)
                if affiliation:
                    if affiliation == 'Natural History Museum':
                        g.add((author_details, MADS.hasAffiliation, nhm_uri))
                    else:
                        g.add((author_details, MADS.hasAffiliation, Literal(affiliation)))

        contributors = dataset_dict.get('contributors', None)
        if contributors:
            g.add((dataset_uri, DC.contributor, Literal(contributors)))

        self.graph_add_resources(dataset_uri, dataset_dict)
예제 #20
0
 def user_uri(id):
     return '{0}/user/{1}'.format(catalog_uri().rstrip('/'), id)
예제 #21
0
    def graph_from_dataset(self, dataset_dict, dataset_ref):

        title = dataset_dict.get('title')

        g = self.g

        for prefix, namespace in it_namespaces.iteritems():
            g.bind(prefix, namespace)

        ### add a further type for the Dataset node
        g.add((dataset_ref, RDF.type, DCATAPIT.Dataset))

        ### replace themes
        value = self._get_dict_value(dataset_dict, 'theme')
        if value:
            for theme in value.split(','):
                self.g.remove((dataset_ref, DCAT.theme, URIRef(theme)))
                theme = theme.replace('{', '').replace('}', '')
                self.g.add(
                    (dataset_ref, DCAT.theme, URIRef(THEME_BASE_URI + theme)))
                self._add_concept(THEME_CONCEPTS, theme)
        else:
            self.g.add((dataset_ref, DCAT.theme,
                        URIRef(THEME_BASE_URI + DEFAULT_THEME_KEY)))
            self._add_concept(THEME_CONCEPTS, DEFAULT_THEME_KEY)

        ### replace languages
        value = self._get_dict_value(dataset_dict, 'language')
        if value:
            for lang in value.split(','):
                self.g.remove((dataset_ref, DCT.language, Literal(lang)))
                lang = lang.replace('{', '').replace('}', '')
                self.g.add(
                    (dataset_ref, DCT.language, URIRef(LANG_BASE_URI + lang)))
                # self._add_concept(LANG_CONCEPTS, lang)

        ### add spatial (EU URI)
        value = self._get_dict_value(dataset_dict, 'geographical_name')
        if value:
            for gname in value.split(','):
                gname = gname.replace('{', '').replace('}', '')

                dct_location = BNode()
                self.g.add((dataset_ref, DCT.spatial, dct_location))

                self.g.add((dct_location, RDF['type'], DCT.Location))

                # Try and add a Concept from the spatial vocabulary
                if self._add_concept(GEO_CONCEPTS, gname):
                    self.g.add((dct_location, DCATAPIT.geographicalIdentifier,
                                Literal(GEO_BASE_URI + gname)))

                    # geo concept is not really required, but may be a useful adding
                    self.g.add((dct_location, LOCN.geographicalName,
                                URIRef(GEO_BASE_URI + gname)))
                else:
                    # The dataset field is not a controlled tag, let's create a Concept out of the label we have
                    concept = BNode()
                    self.g.add((concept, RDF['type'], SKOS.Concept))
                    self.g.add((concept, SKOS.prefLabel, Literal(gname)))
                    self.g.add((dct_location, LOCN.geographicalName, concept))

        ### add spatial (GeoNames)
        value = self._get_dict_value(dataset_dict, 'geographical_geonames_url')
        if value:
            dct_location = BNode()
            self.g.add((dataset_ref, DCT.spatial, dct_location))

            self.g.add((dct_location, RDF['type'], DCT.Location))
            self.g.add((dct_location, DCATAPIT.geographicalIdentifier,
                        Literal(value)))

        ### replace periodicity
        self._remove_node(dataset_dict, dataset_ref,
                          ('frequency', DCT.accrualPeriodicity, None, Literal))
        self._add_uri_node(
            dataset_dict, dataset_ref,
            ('frequency', DCT.accrualPeriodicity, DEFAULT_FREQ_CODE, URIRef),
            FREQ_BASE_URI)
        # self._add_concept(FREQ_CONCEPTS, dataset_dict.get('frequency', DEFAULT_VOCABULARY_KEY))

        ### replace landing page
        self._remove_node(dataset_dict, dataset_ref,
                          ('url', DCAT.landingPage, None, URIRef))
        landing_page_uri = None
        if dataset_dict.get('name'):
            landing_page_uri = '{0}/dataset/{1}'.format(
                catalog_uri().rstrip('/'), dataset_dict['name'])
        else:
            landing_page_uri = dataset_uri(
                dataset_dict)  # TODO: preserve original URI if harvested

        self.g.add((dataset_ref, DCAT.landingPage, URIRef(landing_page_uri)))

        ### conformsTo
        self.g.remove((dataset_ref, DCT.conformsTo, None))
        value = self._get_dict_value(dataset_dict, 'conforms_to')
        if value:
            for item in value.split(','):

                standard = BNode()
                self.g.add((dataset_ref, DCT.conformsTo, standard))

                self.g.add((standard, RDF['type'], DCT.Standard))
                self.g.add((standard, RDF['type'], DCATAPIT.Standard))
                self.g.add((standard, DCT.identifier, Literal(item)))

        ### publisher

        # DCAT by default creates this node
        # <dct:publisher>
        #   <foaf:Organization rdf:about="http://10.10.100.75/organization/55535226-f82a-4cf7-903a-3e10afeaa79a">
        #     <foaf:name>orga2_test</foaf:name>
        #   </foaf:Organization>
        # </dct:publisher>

        for s, p, o in g.triples((dataset_ref, DCT.publisher, None)):
            #log.info("Removing publisher %r", o)
            g.remove((s, p, o))

        self._add_agent(dataset_dict, dataset_ref, 'publisher', DCT.publisher)

        ### Rights holder : Agent
        holder_ref = self._add_agent(dataset_dict, dataset_ref, 'holder',
                                     DCT.rightsHolder)

        ### Autore : Agent
        self._add_agent(dataset_dict, dataset_ref, 'creator', DCT.creator)

        ### Point of Contact

        # <dcat:contactPoint rdf:resource="http://dati.gov.it/resource/PuntoContatto/contactPointRegione_r_liguri"/>

        # <!-- http://dati.gov.it/resource/PuntoContatto/contactPointRegione_r_liguri -->
        # <dcatapit:Organization rdf:about="http://dati.gov.it/resource/PuntoContatto/contactPointRegione_r_liguri">
        #    <rdf:type rdf:resource="&vcard;Kind"/>
        #    <rdf:type rdf:resource="&vcard;Organization"/>
        #    <vcard:hasEmail rdf:resource="mailto:[email protected]"/>
        #    <vcard:fn>Regione Liguria - Sportello Cartografico</vcard:fn>
        # </dcatapit:Organization>

        # TODO: preserve original info if harvested

        # retrieve the contactPoint added by the euro serializer
        euro_poc = g.value(subject=dataset_ref,
                           predicate=DCAT.contactPoint,
                           object=None,
                           any=False)

        # euro poc has this format:
        # <dcat:contactPoint>
        #    <vcard:Organization rdf:nodeID="Nfcd06f452bcd41f48f33c45b0c95979e">
        #       <vcard:fn>THE ORGANIZATION NAME</vcard:fn>
        #       <vcard:hasEmail>THE ORGANIZATION EMAIL</vcard:hasEmail>
        #    </vcard:Organization>
        # </dcat:contactPoint>

        if euro_poc:
            g.remove((dataset_ref, DCAT.contactPoint, euro_poc))

        org_id = dataset_dict.get('organization', {}).get('id')

        # get orga info
        org_show = logic.get_action('organization_show')

        try:
            org_dict = org_show({}, {
                'id': org_id,
                'include_datasets': False,
                'include_tags': False,
                'include_users': False,
                'include_groups': False,
                'include_extras': True,
                'include_followers': False
            })
        except Exception, e:
            org_dict = {}
예제 #22
0
    def update_config(self, config):
        p.toolkit.add_template_directory(config, 'templates')

        # Check catalog URI on startup to emit a warning if necessary
        catalog_uri()
예제 #23
0
    def graph_from_dataset(self, dataset_dict, dataset_ref):

        title = dataset_dict.get('title')

        g = self.g

        for prefix, namespace in it_namespaces.iteritems():
            g.bind(prefix, namespace)

        ### add a further type for the Dataset node
        g.add((dataset_ref, RDF.type, DCATAPIT.Dataset))

        ### replace themes
        value = self._get_dict_value(dataset_dict, 'theme')
        self._add_themes(dataset_ref, value)

        ### replace languages
        value = self._get_dict_value(dataset_dict, 'language')
        if value:
            for lang in value.split(','):
                self.g.remove((dataset_ref, DCT.language, Literal(lang)))
                lang = lang.replace('{', '').replace('}', '')
                self.g.add(
                    (dataset_ref, DCT.language, URIRef(LANG_BASE_URI + lang)))
                # self._add_concept(LANG_CONCEPTS, lang)

        ### add spatial (EU URI)
        value = self._get_dict_value(dataset_dict, 'geographical_name')
        if value:
            for gname in value.split(','):
                gname = gname.replace('{', '').replace('}', '')

                dct_location = BNode()
                self.g.add((dataset_ref, DCT.spatial, dct_location))

                self.g.add((dct_location, RDF['type'], DCT.Location))

                # Try and add a Concept from the spatial vocabulary
                if self._add_concept(GEO_CONCEPTS, gname):
                    self.g.add((dct_location, DCATAPIT.geographicalIdentifier,
                                Literal(GEO_BASE_URI + gname)))

                    # geo concept is not really required, but may be a useful adding
                    self.g.add((dct_location, LOCN.geographicalName,
                                URIRef(GEO_BASE_URI + gname)))
                else:
                    # The dataset field is not a controlled tag, let's create a Concept out of the label we have
                    concept = BNode()
                    self.g.add((concept, RDF['type'], SKOS.Concept))
                    self.g.add((concept, SKOS.prefLabel, Literal(gname)))
                    self.g.add((dct_location, LOCN.geographicalName, concept))

        ### add spatial (GeoNames)
        value = self._get_dict_value(dataset_dict, 'geographical_geonames_url')
        if value:
            dct_location = BNode()
            self.g.add((dataset_ref, DCT.spatial, dct_location))

            self.g.add((dct_location, RDF['type'], DCT.Location))
            self.g.add((dct_location, DCATAPIT.geographicalIdentifier,
                        Literal(value)))

        ### replace periodicity
        self._remove_node(dataset_dict, dataset_ref,
                          ('frequency', DCT.accrualPeriodicity, None, Literal))
        self._add_uri_node(
            dataset_dict, dataset_ref,
            ('frequency', DCT.accrualPeriodicity, DEFAULT_FREQ_CODE, URIRef),
            FREQ_BASE_URI)
        # self._add_concept(FREQ_CONCEPTS, dataset_dict.get('frequency', DEFAULT_VOCABULARY_KEY))

        ### replace landing page
        self._remove_node(dataset_dict, dataset_ref,
                          ('url', DCAT.landingPage, None, URIRef))
        landing_page_uri = None
        if dataset_dict.get('name'):
            landing_page_uri = '{0}/dataset/{1}'.format(
                catalog_uri().rstrip('/'), dataset_dict['name'])
        else:
            landing_page_uri = dataset_uri(
                dataset_dict)  # TODO: preserve original URI if harvested

        self.g.add((dataset_ref, DCAT.landingPage, URIRef(landing_page_uri)))

        ### conformsTo
        self.g.remove((dataset_ref, DCT.conformsTo, None))
        value = self._get_dict_value(dataset_dict, 'conforms_to')
        if value:
            try:
                conforms_to = json.loads(value)
            except (
                    TypeError,
                    ValueError,
            ):
                log.warn("Cannot deserialize DCATAPIT:conformsTo value: %s",
                         value)
                conforms_to = []

            for item in conforms_to:
                standard = URIRef(item['uri']) if item.get('uri') else BNode()

                self.g.add((dataset_ref, DCT.conformsTo, standard))
                self.g.add((standard, RDF['type'], DCT.Standard))
                self.g.add((standard, RDF['type'], DCATAPIT.Standard))

                self.g.add(
                    (standard, DCT.identifier, Literal(item['identifier'])))

                for lang, val in (item.get('title') or {}).items():
                    if lang in OFFERED_LANGS:
                        self.g.add(
                            (standard, DCT.title,
                             Literal(val,
                                     lang=lang_mapping_ckan_to_xmllang.get(
                                         lang, lang))))

                for lang, val in (item.get('description') or {}).items():
                    if lang in OFFERED_LANGS:
                        self.g.add(
                            (standard, DCT.description,
                             Literal(val,
                                     lang=lang_mapping_ckan_to_xmllang.get(
                                         lang, lang))))

                for reference_document in (item.get('referenceDocumentation')
                                           or []):
                    self.g.add((standard, DCATAPIT.referenceDocumentation,
                                URIRef(reference_document)))

        ### ADMS:identifier alternative identifiers
        self.g.remove((
            dataset_ref,
            ADMS.identifier,
            None,
        ))
        try:
            alt_ids = json.loads(dataset_dict['alternate_identifier'])
        except (
                KeyError,
                TypeError,
                ValueError,
        ):
            alt_ids = []

        for alt_identifier in alt_ids:
            node = BNode()
            self.g.add((dataset_ref, ADMS.identifier, node))

            identifier = Literal(alt_identifier['identifier'])
            self.g.add((node, SKOS.notation, identifier))

            if alt_identifier.get('agent'):
                adata = alt_identifier['agent']
                agent = BNode()

                self.g.add((agent, RDF['type'], DCATAPIT.Agent))
                self.g.add((agent, RDF['type'], FOAF.Agent))
                self.g.add((node, DCT.creator, agent))
                if adata.get('agent_name'):
                    for alang, aname in adata['agent_name'].items():
                        self.g.add((agent, FOAF.name, Literal(aname,
                                                              lang=alang)))

                if adata.get('agent_identifier'):
                    self.g.add((agent, DCT.identifier,
                                Literal(adata['agent_identifier'])))

        self._set_temporal_coverage(self.g, dataset_dict, dataset_ref)

        ### publisher

        # DCAT by default creates this node
        # <dct:publisher>
        #   <foaf:Organization rdf:about="http://10.10.100.75/organization/55535226-f82a-4cf7-903a-3e10afeaa79a">
        #     <foaf:name>orga2_test</foaf:name>
        #   </foaf:Organization>
        # </dct:publisher>

        for s, p, o in g.triples((dataset_ref, DCT.publisher, None)):
            #log.info("Removing publisher %r", o)
            g.remove((s, p, o))

        publisher_ref = self._add_agent(dataset_dict,
                                        dataset_ref,
                                        'publisher',
                                        DCT.publisher,
                                        use_default_lang=True)

        ### Autore : Agent
        self._add_creators(dataset_dict, dataset_ref)

        ### Point of Contact

        # <dcat:contactPoint rdf:resource="http://dati.gov.it/resource/PuntoContatto/contactPointRegione_r_liguri"/>

        # <!-- http://dati.gov.it/resource/PuntoContatto/contactPointRegione_r_liguri -->
        # <dcatapit:Organization rdf:about="http://dati.gov.it/resource/PuntoContatto/contactPointRegione_r_liguri">
        #    <rdf:type rdf:resource="&vcard;Kind"/>
        #    <rdf:type rdf:resource="&vcard;Organization"/>
        #    <vcard:hasEmail rdf:resource="mailto:[email protected]"/>
        #    <vcard:fn>Regione Liguria - Sportello Cartografico</vcard:fn>
        # </dcatapit:Organization>

        # TODO: preserve original info if harvested

        # retrieve the contactPoint added by the euro serializer
        euro_poc = g.value(subject=dataset_ref,
                           predicate=DCAT.contactPoint,
                           object=None,
                           any=False)

        # euro poc has this format:
        # <dcat:contactPoint>
        #    <vcard:Organization rdf:nodeID="Nfcd06f452bcd41f48f33c45b0c95979e">
        #       <vcard:fn>THE ORGANIZATION NAME</vcard:fn>
        #       <vcard:hasEmail>THE ORGANIZATION EMAIL</vcard:hasEmail>
        #    </vcard:Organization>
        # </dcat:contactPoint>

        if euro_poc:
            g.remove((dataset_ref, DCAT.contactPoint, euro_poc))

        org_id = dataset_dict.get('owner_org')

        # get orga info
        org_show = logic.get_action('organization_show')

        org_dict = {}
        if org_id:
            try:
                org_dict = org_show({'ignore_auth': True}, {
                    'id': org_id,
                    'include_datasets': False,
                    'include_tags': False,
                    'include_users': False,
                    'include_groups': False,
                    'include_extras': True,
                    'include_followers': False
                })
            except Exception, err:
                log.warning("Cannot get org for %s: %s",
                            org_id,
                            err,
                            exc_info=err)