Ejemplo n.º 1
0
def update_conforms_to(pdata):
    cname = pdata.pop('conforms_to', None)
    to_delete = []
    if not cname:
        for idx, ex in enumerate(pdata.get('extras') or []):
            if ex['key'] == 'conforms_to':
                to_delete.append(idx)
                cname = ex['value']
        if to_delete:
            for idx in reversed(to_delete):
                pdata['extras'].pop(idx)

    ml_pkg = interfaces.get_for_package(pdata['id'])
    if ml_pkg:
        try:
            ml_conforms_to = ml_pkg['conforms_to']
        except KeyError:
            ml_conforms_to = {}
    if cname:
        validator = toolkit.get_validator('dcatapit_conforms_to')
        try:
            # do not update conforms_to if it's already present
            conforms_to = json.loads(cname)
            if isinstance(conforms_to, list) and len(conforms_to):
                pdata['conforms_to'] = validator(cname, {})
                return
        except (
                ValueError,
                TypeError,
                Invalid,
        ), err:
            print(u'dataset {}: conforms_to present, but invalid: {}'.format(
                pdata['name'], err)).encode('utf-8')

        standard = {'identifier': None, 'description': {}}
        if not ml_conforms_to:
            standard['identifier'] = cname
        else:
            standard['identifier'] = ml_conforms_to.get(
                'it') or ml_conforms_to.get(DEFAULT_LANG) or cname
            for lang, val in ml_conforms_to.items():
                standard['description'][lang] = val

        Session.query(ML_PM).filter(ML_PM.package_id == pdata['id'],
                                    ML_PM.field == 'conforms_to').delete()
        pdata['conforms_to'] = json.dumps([standard])
Ejemplo n.º 2
0
            g.add(
                (poc, VCARD.hasTelephone, Literal(org_dict.get('telephone'))))
        if 'site' in org_dict.keys():
            g.add((poc, VCARD.hasURL, Literal(org_dict.get('site'))))

        ### Rights holder : Agent,
        ### holder_ref keeps graph reference to holder subject
        ### holder_use_dataset is a flag if holder info is taken from dataset (or organization)
        holder_ref, holder_use_dataset = self._add_right_holder(
            dataset_dict, org_dict, dataset_ref)

        ### Multilingual
        # Add localized entries in dataset
        # TODO: should we remove the non-localized nodes?

        loc_dict = interfaces.get_for_package(dataset_dict['id'])
        #  The multilang fields
        loc_package_mapping = {
            'title': (dataset_ref, DCT.title),
            'notes': (dataset_ref, DCT.description),
            'publisher_name': (publisher_ref, FOAF.name),
        }
        if holder_use_dataset and holder_ref:
            loc_package_mapping['holder_name'] = (holder_ref, FOAF.name)

        self._add_multilang_values(loc_dict, loc_package_mapping)
        if not holder_use_dataset and holder_ref:
            loc_dict = interfaces.get_for_group_or_organization(org_dict['id'])
            loc_package_mapping = {'name': (holder_ref, FOAF.name)}
            self._add_multilang_values(loc_dict, loc_package_mapping)
    def test_graph_from_dataset(self):

        conforms_to_in = [{'identifier': 'CONF1',
                                       'uri': 'conf01',
                                 'title': {'en': 'title', 'it': 'title'},
                                 'referenceDocumentation': ['http://abc.efg/'],},
                                {'identifier': 'CONF2',
                                 'title': {'en': 'title', 'it': 'title'},
                                 'description': {'en': 'descen', 'it': 'descit'},
                                 'referenceDocumentation': ['http://abc.efg/'],},
                                 ]

        alternate_identifiers = [{'identifier': 'aaaabc',
                                 'agent': {'agent_identifier': 'agent01',
                                           'agent_name': {'en': 'Agent en 01', 'it': 'Agent it 01'}},
                                 },
                                 {'identifier': 'other identifier', 'agent': {}}]
        creators = [{'creator_name': {'en': 'abc'}, 'creator_identifier': "ABC"},
                    {'creator_name': {'en': 'cde'}, 'creator_identifier': "CDE"},
                    ]

        temporal_coverage = [{'temporal_start': '2001-01-01', 'temporal_end': '2001-02-01 10:11:12'},
                             {'temporal_start': '2001-01-01', 'temporal_end': '2001-02-01 11:12:13'},
                            ]

        subthemes = [{'theme': 'AGRI', 'subthemes': ['http://eurovoc.europa.eu/100253',
                                                     'http://eurovoc.europa.eu/100258']},
                     {'theme': 'ENVI', 'subthemes': []}]

        dataset = {
            'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name': 'test-dataset',
            'title': 'Dataset di test DCAT_AP-IT',
            'notes': 'dcatapit dataset di test',
            'metadata_created': '2015-06-26T15:21:09.034694',
            'metadata_modified': '2015-06-26T15:21:09.075774',
            'tags': [{'name': 'Tag 1'}, {'name': 'Tag 2'}],
            'issued':'2016-11-29',
            'modified':'2016-11-29',
            'identifier':'ISBN',
            'temporal_start':'2016-11-01',
            'temporal_end':'2016-11-30',
            'frequency':'UPDATE_CONT',
            'publisher_name':'bolzano',
            'publisher_identifier':'234234234',
            'creator_name':'test',
            'creator_identifier':'412946129',
            'holder_name':'bolzano',
            'holder_identifier':'234234234',
            'alternate_identifier':json.dumps(alternate_identifiers),
            'temporal_coverage': json.dumps(temporal_coverage),
            #'theme':'ECON',
            'geographical_geonames_url':'http://www.geonames.org/3181913',
            'language':'{DEU,ENG,ITA}',
            'is_version_of':'http://dcat.geo-solutions.it/dataset/energia-da-fonti-rinnovabili2',
            'conforms_to':json.dumps(conforms_to_in),
            'creator': json.dumps(creators),
            'theme': json.dumps(subthemes),


        }
        
        pkg_id = dataset['id']
        
        pub_names = {'it': 'IT publisher',
                     'es': 'EN publisher'}
        holder_names = {'it': 'IT holder name',
                        'es': 'EN holder name'}

        multilang_fields = [('publisher_name', 'package', k, v) for k, v in pub_names.items()] +\
                           [('holder_name', 'package', k, v) for k, v in holder_names.items()]
        
        pkg = helpers.call_action('package_create', {'defer_commit': True}, **dataset)
        rev = getattr(Session,  'revision', repo.new_revision())
        Session.flush()
        Session.revision = rev
        pkg_id = pkg['id']

        for field_name, field_type, lang, text in multilang_fields:
            interfaces.upsert_package_multilang(pkg_id, field_name, field_type, lang, text)

        loc_dict = interfaces.get_for_package(pkg_id)
        #assert loc_dict['publisher_name'] == pub_names
        #assert loc_dict['holder_name'] == holder_names


        # temporary bug for comaptibility with interfaces.get_language(),
        # which will return lang[0]
        pub_names.update({DEFAULT_LANG: dataset['publisher_name']})
        # pub_names.update({DEFAULT_LANG[0]: dataset['publisher_name']})
        holder_names.update({DEFAULT_LANG: dataset['holder_name']})
        # holder_names.update({DEFAULT_LANG[0]: dataset['holder_name']})
        
        s = RDFSerializer()
        g = s.g

        dataset_ref = s.graph_from_dataset(dataset)

        eq_(unicode(dataset_ref), utils.dataset_uri(dataset))

        # Basic fields
        assert self._triple(g, dataset_ref, RDF.type, DCATAPIT.Dataset)
        assert self._triple(g, dataset_ref, DCT.title, dataset['title'])
        assert self._triple(g, dataset_ref, DCT.description, dataset['notes'])

        assert self._triple(g, dataset_ref, DCT.identifier, dataset['identifier'])

        # Tags
        eq_(len([t for t in g.triples((dataset_ref, DCAT.keyword, None))]), 2)
        for tag in dataset['tags']:
            assert self._triple(g, dataset_ref, DCAT.keyword, tag['name'])
        
        # conformsTo
        conforms_to = list(g.triples((None, DCT.conformsTo, None)))
        assert conforms_to

        conforms_to_dict = dict((d['identifier'], d) for d in conforms_to_in)
        for conf in conforms_to:
            conf_id = conf[-1]

            identifier = g.value(conf_id, DCT.identifier)
            titles = list(g.objects(conf_id, DCT.title))
            descs = list(g.objects(conf_id, DCT.description))
            references = list(g.objects(conf_id, DCATAPIT.referenceDocumentation))
            
            check = conforms_to_dict.get(str(identifier))
            
            assert isinstance(check, dict)

            if check.get('uri'):
                assert check['uri'] == str(conf_id)
            assert len(titles), "missing titles"
            
            assert (len(descs)> 0) == bool(check.get('description')), "missing descriptions"

            for title in titles:
                tlang = title.language
                tval = str(title)
                assert tval == check['title'][tlang], (tlang, tval, check['title'])

            for desc in descs:
                tlang = desc.language
                tval = str(desc)
                assert tval == check['description'][tlang], (tlang, str(tval), check['description'])
            
            ref_docs = check.get('referenceDocumentation')
            assert len(references) == len(ref_docs), "missing reference documentation"
            
            for dref in references:
                assert str(dref) in ref_docs, "{} not in {}".format(dref, ref_docs)
                                                                
            for ref in ref_docs:
                assert URIRef(ref) in references

        # alternate identifiers
        alt_ids = [a[-1] for a in g.triples((None, ADMS.identifier, None))]
        alt_ids_dict = dict((a['identifier'], a) for a in alternate_identifiers)

        for alt_id in alt_ids:
            identifier = g.value(alt_id, SKOS.notation)
            check = alt_ids_dict[str(identifier)]
            assert str(identifier) == check['identifier']
            if check.get('agent'):
                agent_ref = g.value(alt_id, DCT.creator)
                assert agent_ref is not None

                agent_identifier = g.value(agent_ref, DCT.identifier)

                agent_name = dict((v.language, str(v)) for v in g.objects(agent_ref, FOAF.name))
                
                assert set(agent_name.items()) == set(check['agent']['agent_name'].items()),\
                    "expected {}, got {} for {}".format(check['agent']['agent_name'], agent_name, agent_ref)

                assert str(agent_identifier) == check['agent']['agent_identifier'],\
                    "expected {}, got {}".format(check['agent']['agent_identifier'], agent_identifier)
        # creators
        creators.append({'creator_name':{'en': 'test'},
                         'creator_identifier':'412946129'})
        creators_in = list(g.objects(dataset_ref, DCT.creator))
        assert len(creators) == len(creators_in)

        for cref in creators_in:
            cnames = dict((str(c.language) if c.language else DEFAULT_LANG, str(c)) for c in g.objects(cref, FOAF.name))
            c_identifier = g.value(cref, DCT.identifier)
            c_dict = {'creator_name': cnames,
                      'creator_identifier': str(c_identifier)}
            assert c_dict in creators, "no {} in {}".format(c_dict, creators)

        # temporal coverage
        temporal_coverage.append({'temporal_start': dataset['temporal_start'],
                                  'temporal_end': dataset['temporal_end']})
        temp_exts = list(g.triples((dataset_ref, DCT.temporal, None)))
        assert len(temp_exts) == len(temporal_coverage)
        
        # normalize values
        for item in temporal_coverage:
            for k, v in item.items():
                item[k] = pdate(v)

        temp_ext = []
        for interval_t in temp_exts:
            interval = interval_t[-1]
            start = g.value(interval, SCHEMA.startDate)
            end = g.value(interval, SCHEMA.endDate)
            assert start is not None
            assert end is not None
            temp_ext.append({'temporal_start': pdate(str(start)),
                             'temporal_end': pdate(str(end))})

        set1 = set([tuple(d.items()) for d in temp_ext])
        set2 = set([tuple(d.items()) for d in temporal_coverage])
        assert set1 == set2, "Got different temporal coverage sets: \n{}\n vs\n {}".format(set1, set2)

        for pub_ref in g.objects(dataset_ref, DCT.publisher):
            _pub_names = list(g.objects(pub_ref, FOAF.name))

            assert len(_pub_names) 

            for pub_name in _pub_names:
                if pub_name.language:
                    assert str(pub_name.language) in pub_names, "no {} in {}".format(pub_name.language, pub_names)
                    assert pub_names[str(pub_name.language)] == str(pub_name), "{} vs {}".format(pub_name, pub_names)

        for holder_ref in g.objects(dataset_ref, DCT.rightsHolder):
            _holder_names = list(g.objects(holder_ref, FOAF.name))

            assert len(_holder_names) 

            for holder_name in _holder_names:
                if holder_name.language:
                    assert str(holder_name.language) in holder_names, "no {} in {}".format(holder_name.language, holder_names)
                    assert holder_names[str(holder_name.language)] == str(holder_name), "{} vs {}".format(holder_name, holder_names)
Ejemplo n.º 4
0
class ItalianDCATAPProfile(RDFProfile):
    '''
    An RDF profile for the Italian DCAT-AP recommendation for data portals
    It requires the European DCAT-AP profile (`euro_dcat_ap`)
    '''
    def parse_dataset(self, dataset_dict, dataset_ref):

        # check the dataset type
        if (dataset_ref, RDF.type, DCATAPIT.Dataset) not in self.g:
            # not a DCATAPIT dataset
            return dataset_dict

        # date info
        for predicate, key, logf in (
            (DCT.issued, 'issued', log.debug),
            (DCT.modified, 'modified', log.warn),
        ):
            value = self._object_value(dataset_ref, predicate)
            if value:
                self._remove_from_extra(dataset_dict, key)

                value = helpers.format(value, '%Y-%m-%d', 'date')
                dataset_dict[key] = value
            else:
                logf('No %s found for dataset "%s"', predicate,
                     dataset_dict.get('title', '---'))

        # 0..1 predicates
        for predicate, key, logf in ((DCT.identifier, 'identifier',
                                      log.warn), ):
            value = self._object_value(dataset_ref, predicate)
            if value:
                self._remove_from_extra(dataset_dict, key)
                dataset_dict[key] = value
            else:
                logf('No %s found for dataset "%s"', predicate,
                     dataset_dict.get('title', '---'))

        # 0..n predicates list
        for predicate, key, logf in (
            (ADMS.identifier, 'alternate_identifier', log.debug),
            (DCT.isVersionOf, 'is_version_of', log.debug),
        ):
            valueList = self._object_value_list(dataset_ref, predicate)
            if valueList:
                self._remove_from_extra(dataset_dict, key)
                value = ','.join(valueList)
                dataset_dict[key] = value
            else:
                logf('No %s found for dataset "%s"', predicate,
                     dataset_dict.get('title', '---'))

        # conformsTo
        self._remove_from_extra(dataset_dict, 'conforms_to')
        conform_list = []
        for conforms_to in self.g.objects(dataset_ref, DCT.conformsTo):
            conform_list.append(self._object_value(conforms_to,
                                                   DCT.identifier))
        if conform_list:
            value = ','.join(conform_list)
            dataset_dict['conforms_to'] = value
        else:
            log.debug('No DCT.conformsTo found for dataset "%s"',
                      dataset_dict.get('title', '---'))

        # Temporal
        start, end = self._time_interval(dataset_ref, DCT.temporal)
        for v, key, logf in (
            (start, 'temporal_start', log.debug),
            (end, 'temporal_end', log.debug),
        ):
            if v:
                self._remove_from_extra(dataset_dict, key)

                value = helpers.format(v, '%Y-%m-%d', 'date')
                dataset_dict[key] = value
            else:
                log.warn('No %s Date found for dataset "%s"', key,
                         dataset_dict.get('title', '---'))

        # URI 0..1
        for predicate, key, base_uri in ((DCT.accrualPeriodicity, 'frequency',
                                          FREQ_BASE_URI), ):
            valueRef = self._object_value(dataset_ref, predicate)
            if valueRef:
                self._remove_from_extra(dataset_dict, key)
                value = self._strip_uri(valueRef, base_uri)
                dataset_dict[key] = value
            else:
                log.warn('No %s found for dataset "%s"', predicate,
                         dataset_dict.get('title', '---'))

        # URI lists
        for predicate, key, base_uri in (
            (DCT.language, 'language', LANG_BASE_URI),
            (DCAT.theme, 'theme', THEME_BASE_URI),
        ):
            self._remove_from_extra(dataset_dict, key)
            valueRefList = self._object_value_list(dataset_ref, predicate)
            valueList = [
                self._strip_uri(valueRef, base_uri)
                for valueRef in valueRefList
            ]
            value = ','.join(valueList)
            if len(valueList) > 1:
                value = '{' + value + '}'
            dataset_dict[key] = value

        # Spatial
        spatial_tags = []
        geonames_url = None

        for spatial in self.g.objects(dataset_ref, DCT.spatial):
            for spatial_literal in self.g.objects(
                    spatial, DCATAPIT.geographicalIdentifier):
                spatial_value = spatial_literal.value
                if GEO_BASE_URI in spatial_value:
                    spatial_tags.append(
                        self._strip_uri(spatial_value, GEO_BASE_URI))
                else:
                    if geonames_url:
                        log.warn(
                            "GeoName URL is already set to %s, value %s will not be imported",
                            geonames_url, spatial_value)
                    else:
                        geonames_url = spatial_value

        if len(spatial_tags) > 0:
            value = ','.join(spatial_tags)
            if len(spatial_tags) > 1:
                value = '{' + value + '}'
            dataset_dict['geographical_name'] = value

        if geonames_url:
            dataset_dict['geographical_geonames_url'] = geonames_url

        ### Collect strings from multilang fields

        # { 'field_name': {'it': 'italian loc', 'de': 'german loc', ...}, ...}
        localized_dict = {}

        for key, predicate in (
            ('title', DCT.title),
            ('notes', DCT.description),
        ):
            self._collect_multilang_strings(dataset_dict, key, dataset_ref,
                                            predicate, localized_dict)

        # Agents
        for predicate, basekey in (
            (DCT.publisher, 'publisher'),
            (DCT.rightsHolder, 'holder'),
            (DCT.creator, 'creator'),
        ):
            agent_dict, agent_loc_dict = self._parse_agent(
                dataset_ref, predicate, basekey)
            for key, v in agent_dict.iteritems():
                self._remove_from_extra(dataset_dict, key)
                dataset_dict[key] = v
            localized_dict.update(agent_loc_dict)

        # when all localized data have been parsed, check if there really any and add it to the dict
        if len(localized_dict) > 0:
            log.debug('Found multilang metadata')
            dataset_dict[LOCALISED_DICT_NAME_BASE] = localized_dict

        ### Resources

        resources_loc_dict = {}

        # In ckan, the license is a dataset property, not resource's
        # We'll collect all of the resources' licenses, then we will postprocess them
        licenses = []  #  contains tuples (url, name)

        for resource_dict in dataset_dict.get('resources', []):
            resource_uri = resource_dict['uri']
            if not resource_uri:
                log.warn("URI not defined for resource %s",
                         resource_dict['name'])
                continue

            distribution = URIRef(resource_uri)
            if not (dataset_ref, DCAT.distribution, distribution) in self.g:
                log.warn("Distribution not found in dataset %s", resource_uri)
                continue

            # URI 0..1
            for predicate, key, base_uri in (
                (DCT['format'], 'format', FORMAT_BASE_URI),  # Format
            ):
                valueRef = self._object_value(distribution, predicate)
                if valueRef:
                    value = self._strip_uri(valueRef, base_uri)
                    resource_dict[key] = value
                else:
                    log.warn('No %s found for resource "%s"::"%s"', predicate,
                             dataset_dict.get('title', '---'),
                             resource_dict.get('name', '---'))

            # License
            license = self._object(distribution, DCT.license)
            if license:
                # just add this info in the resource extras
                resource_dict['license_url'] = str(license)
                license_name = self._object_value(
                    license, FOAF.name)  # may be either the title or the id
                if (license_name):
                    # just add this info in the resource extras
                    resource_dict['license_name'] = license_name
                else:
                    license_name = "unknown"
                licenses.append((str(license), license_name))
            else:
                log.warn('No license found for resource "%s"::"%s"',
                         dataset_dict.get('title', '---'),
                         resource_dict.get('name', '---'))

            # Multilang
            loc_dict = {}

            for key, predicate in (
                ('name', DCT.title),
                ('description', DCT.description),
            ):
                self._collect_multilang_strings(resource_dict, key,
                                                distribution, predicate,
                                                loc_dict)

            if len(loc_dict) > 0:
                log.debug('Found multilang metadata in resource %s',
                          resource_dict['name'])
                resources_loc_dict[resource_uri] = loc_dict

        if len(resources_loc_dict) > 0:
            log.debug('Found multilang metadata in resources')
            dataset_dict[LOCALISED_DICT_NAME_RESOURCES] = resources_loc_dict

        # postprocess licenses
        # license_ids = {id for url,id in licenses}  # does not work in python 2.6
        license_ids = set()
        for url, id in licenses:
            license_ids.add(id)

        if license_ids:
            if len(license_ids) > 1:
                log.warn('More than one license found for dataset "%s"',
                         dataset_dict.get('title', '---'))
            dataset_dict['license_id'] = license_ids.pop()  # take a random one

        return dataset_dict

    def _collect_multilang_strings(self, base_dict, key, subj, pred, loc_dict):
        '''
        Search for multilang Literals matching (subj, pred).
        - Literals not localized will be stored as source_dict[key] -- possibly replacing the value set by the EURO parser
        - Localized literals will be stored into target_dict[key][lang]
        '''

        for obj in self.g.objects(subj, pred):
            value = obj.value
            lang = obj.language
            if not lang:
                # force default value in dataset
                base_dict[key] = value
            else:
                # add localized string
                lang_dict = loc_dict.setdefault(key, {})
                lang_dict[lang_mapping_xmllang_to_ckan.get(lang)] = value

    def _remove_from_extra(self, dataset_dict, key):

        #  search and replace
        for extra in dataset_dict.get('extras', []):
            if extra['key'] == key:
                dataset_dict['extras'].pop(dataset_dict['extras'].index(extra))
                return

    def _add_or_replace_extra(self, dataset_dict, key, value):

        #  search and replace
        for extra in dataset_dict.get('extras', []):
            if extra['key'] == key:
                extra['value'] = value
                return

        # add if not found
        dataset_dict['extras'].append({'key': key, 'value': value})

    def _parse_agent(self, subject, predicate, base_name):

        agent_dict = {}
        loc_dict = {}

        for agent in self.g.objects(subject, predicate):
            agent_dict[base_name + '_identifier'] = self._object_value(
                agent, DCT.identifier)
            self._collect_multilang_strings(agent_dict, base_name + '_name',
                                            agent, FOAF.name, loc_dict)

        return agent_dict, loc_dict

    def _strip_uri(self, value, base_uri):
        return value.replace(base_uri, '')

    def graph_from_dataset(self, dataset_dict, dataset_ref):

        title = dataset_dict.get('title')

        g = self.g

        for prefix, namespace in it_namespaces.iteritems():
            g.bind(prefix, namespace)

        ### add a further type for the Dataset node
        g.add((dataset_ref, RDF.type, DCATAPIT.Dataset))

        ### replace themes
        value = self._get_dict_value(dataset_dict, 'theme')
        if value:
            for theme in value.split(','):
                self.g.remove((dataset_ref, DCAT.theme, URIRef(theme)))
                theme = theme.replace('{', '').replace('}', '')
                self.g.add(
                    (dataset_ref, DCAT.theme, URIRef(THEME_BASE_URI + theme)))
                self._add_concept(THEME_CONCEPTS, theme)
        else:
            self.g.add((dataset_ref, DCAT.theme,
                        URIRef(THEME_BASE_URI + DEFAULT_THEME_KEY)))
            self._add_concept(THEME_CONCEPTS, DEFAULT_THEME_KEY)

        ### replace languages
        value = self._get_dict_value(dataset_dict, 'language')
        if value:
            for lang in value.split(','):
                self.g.remove((dataset_ref, DCT.language, Literal(lang)))
                lang = lang.replace('{', '').replace('}', '')
                self.g.add(
                    (dataset_ref, DCT.language, URIRef(LANG_BASE_URI + lang)))
                # self._add_concept(LANG_CONCEPTS, lang)

        ### add spatial (EU URI)
        value = self._get_dict_value(dataset_dict, 'geographical_name')
        if value:
            for gname in value.split(','):
                gname = gname.replace('{', '').replace('}', '')

                dct_location = BNode()
                self.g.add((dataset_ref, DCT.spatial, dct_location))

                self.g.add((dct_location, RDF['type'], DCT.Location))

                # Try and add a Concept from the spatial vocabulary
                if self._add_concept(GEO_CONCEPTS, gname):
                    self.g.add((dct_location, DCATAPIT.geographicalIdentifier,
                                Literal(GEO_BASE_URI + gname)))

                    # geo concept is not really required, but may be a useful adding
                    self.g.add((dct_location, LOCN.geographicalName,
                                URIRef(GEO_BASE_URI + gname)))
                else:
                    # The dataset field is not a controlled tag, let's create a Concept out of the label we have
                    concept = BNode()
                    self.g.add((concept, RDF['type'], SKOS.Concept))
                    self.g.add((concept, SKOS.prefLabel, Literal(gname)))
                    self.g.add((dct_location, LOCN.geographicalName, concept))

        ### add spatial (GeoNames)
        value = self._get_dict_value(dataset_dict, 'geographical_geonames_url')
        if value:
            dct_location = BNode()
            self.g.add((dataset_ref, DCT.spatial, dct_location))

            self.g.add((dct_location, RDF['type'], DCT.Location))
            self.g.add((dct_location, DCATAPIT.geographicalIdentifier,
                        Literal(value)))

        ### replace periodicity
        self._remove_node(dataset_dict, dataset_ref,
                          ('frequency', DCT.accrualPeriodicity, None, Literal))
        self._add_uri_node(
            dataset_dict, dataset_ref,
            ('frequency', DCT.accrualPeriodicity, DEFAULT_FREQ_CODE, URIRef),
            FREQ_BASE_URI)
        # self._add_concept(FREQ_CONCEPTS, dataset_dict.get('frequency', DEFAULT_VOCABULARY_KEY))

        ### replace landing page
        self._remove_node(dataset_dict, dataset_ref,
                          ('url', DCAT.landingPage, None, URIRef))
        landing_page_uri = None
        if dataset_dict.get('name'):
            landing_page_uri = '{0}/dataset/{1}'.format(
                catalog_uri().rstrip('/'), dataset_dict['name'])
        else:
            landing_page_uri = dataset_uri(
                dataset_dict)  # TODO: preserve original URI if harvested

        self.g.add((dataset_ref, DCAT.landingPage, URIRef(landing_page_uri)))

        ### conformsTo
        self.g.remove((dataset_ref, DCT.conformsTo, None))
        value = self._get_dict_value(dataset_dict, 'conforms_to')
        if value:
            for item in value.split(','):

                standard = BNode()
                self.g.add((dataset_ref, DCT.conformsTo, standard))

                self.g.add((standard, RDF['type'], DCT.Standard))
                self.g.add((standard, RDF['type'], DCATAPIT.Standard))
                self.g.add((standard, DCT.identifier, Literal(item)))

        ### publisher

        # DCAT by default creates this node
        # <dct:publisher>
        #   <foaf:Organization rdf:about="http://10.10.100.75/organization/55535226-f82a-4cf7-903a-3e10afeaa79a">
        #     <foaf:name>orga2_test</foaf:name>
        #   </foaf:Organization>
        # </dct:publisher>

        for s, p, o in g.triples((dataset_ref, DCT.publisher, None)):
            #log.info("Removing publisher %r", o)
            g.remove((s, p, o))

        self._add_agent(dataset_dict, dataset_ref, 'publisher', DCT.publisher)

        ### Rights holder : Agent
        holder_ref = self._add_agent(dataset_dict, dataset_ref, 'holder',
                                     DCT.rightsHolder)

        ### Autore : Agent
        self._add_agent(dataset_dict, dataset_ref, 'creator', DCT.creator)

        ### Point of Contact

        # <dcat:contactPoint rdf:resource="http://dati.gov.it/resource/PuntoContatto/contactPointRegione_r_liguri"/>

        # <!-- http://dati.gov.it/resource/PuntoContatto/contactPointRegione_r_liguri -->
        # <dcatapit:Organization rdf:about="http://dati.gov.it/resource/PuntoContatto/contactPointRegione_r_liguri">
        #    <rdf:type rdf:resource="&vcard;Kind"/>
        #    <rdf:type rdf:resource="&vcard;Organization"/>
        #    <vcard:hasEmail rdf:resource="mailto:[email protected]"/>
        #    <vcard:fn>Regione Liguria - Sportello Cartografico</vcard:fn>
        # </dcatapit:Organization>

        # TODO: preserve original info if harvested

        # retrieve the contactPoint added by the euro serializer
        euro_poc = g.value(subject=dataset_ref,
                           predicate=DCAT.contactPoint,
                           object=None,
                           any=False)

        # euro poc has this format:
        # <dcat:contactPoint>
        #    <vcard:Organization rdf:nodeID="Nfcd06f452bcd41f48f33c45b0c95979e">
        #       <vcard:fn>THE ORGANIZATION NAME</vcard:fn>
        #       <vcard:hasEmail>THE ORGANIZATION EMAIL</vcard:hasEmail>
        #    </vcard:Organization>
        # </dcat:contactPoint>

        if euro_poc:
            g.remove((dataset_ref, DCAT.contactPoint, euro_poc))

        org_id = dataset_dict.get('organization', {}).get('id')

        # get orga info
        org_show = logic.get_action('organization_show')

        try:
            org_dict = org_show({}, {
                'id': org_id,
                'include_datasets': False,
                'include_tags': False,
                'include_users': False,
                'include_groups': False,
                'include_extras': True,
                'include_followers': False
            })
        except Exception, e:
            org_dict = {}

        org_uri = organization_uri(org_dict)

        poc = URIRef(org_uri)
        g.add((dataset_ref, DCAT.contactPoint, poc))
        g.add((poc, RDF.type, DCATAPIT.Organization))
        g.add((poc, RDF.type, VCARD.Kind))
        g.add((poc, RDF.type, VCARD.Organization))

        g.add((poc, VCARD.fn, Literal(org_dict.get('name'))))

        if 'email' in org_dict.keys(
        ):  # this element is mandatory for dcatapit, but it may not have been filled for imported datasets
            g.add((poc, VCARD.hasEmail, URIRef(org_dict.get('email'))))
        if 'telephone' in org_dict.keys():
            g.add(
                (poc, VCARD.hasTelephone, Literal(org_dict.get('telephone'))))
        if 'site' in org_dict.keys():
            g.add((poc, VCARD.hasURL, Literal(org_dict.get('site'))))

        ### Multilingual
        # Add localized entries in dataset
        # TODO: should we remove the non-localized nodes?

        loc_dict = interfaces.get_for_package(dataset_dict['id'])
        #  The multilang fields
        loc_package_mapping = {
            'title': (dataset_ref, DCT.title),
            'notes': (dataset_ref, DCT.description),
            'holder_name': (holder_ref, FOAF.name)
        }

        self._add_multilang_values(loc_dict, loc_package_mapping)

        ### Resources
        for resource_dict in dataset_dict.get('resources', []):

            distribution = URIRef(resource_uri(
                resource_dict))  # TODO: preserve original info if harvested

            # Add the DCATAPIT type
            g.add((distribution, RDF.type, DCATAPIT.Distribution))

            ### format
            self._remove_node(resource_dict, distribution,
                              ('format', DCT['format'], None, Literal))
            if not self._add_uri_node(resource_dict, distribution,
                                      ('distribution_format', DCT['format'],
                                       None, URIRef), FORMAT_BASE_URI):
                guessed_format = guess_format(resource_dict)
                if guessed_format:
                    self.g.add((distribution, DCT['format'],
                                URIRef(FORMAT_BASE_URI + guessed_format)))
                else:
                    log.warn('No format for resource: %s / %s',
                             dataset_dict.get('title', 'N/A'),
                             resource_dict.get('description', 'N/A'))
                    self.g.add((distribution, DCT['format'],
                                URIRef(FORMAT_BASE_URI + DEFAULT_FORMAT_CODE)))

            ### license
            # <dct:license rdf:resource="http://creativecommons.org/licenses/by/3.0/it/"/>
            #
            # <dcatapit:LicenseDocument rdf:about="http://creativecommons.org/licenses/by/3.0/it/">
            #    <rdf:type rdf:resource="&dct;LicenseDocument"/>
            #    <owl:versionInfo>3.0 ITA</owl:versionInfo>
            #    <foaf:name>CC BY</foaf:name>
            #    <dct:type rdf:resource="http://purl.org/adms/licencetype/Attribution"/>
            # </dcatapit:LicenseDocument>

            # "license_id" : "cc-zero"
            # "license_title" : "Creative Commons CCZero",
            # "license_url" : "http://www.opendefinition.org/licenses/cc-zero",

            license_url = dataset_dict.get('license_url', '')
            license_id = dataset_dict.get('license_id', '')
            license_title = dataset_dict.get('license_title', '')

            if license_url:
                license = URIRef(license_url)
                g.add((license, RDF['type'], DCATAPIT.LicenseDocument))
                g.add((license, RDF['type'], DCT.LicenseDocument))
                g.add((license, DCT['type'],
                       URIRef('http://purl.org/adms/licencetype/Attribution')
                       ))  # TODO: infer from CKAN license

                g.add((distribution, DCT.license, license))

                if license_id:
                    # log.debug('Adding license id: %s', license_id)
                    g.add((license, FOAF.name, Literal(license_id)))
                elif license_title:
                    # log.debug('Adding license title: %s', license_title)
                    g.add((license, FOAF.name, Literal(license_title)))
                else:
                    g.add((license, FOAF.name, Literal('unknown')))
                    log.warn('License not found for dataset: %s', title)

            ### Multilingual
            # Add localized entries in resource
            # TODO: should we remove the not-localized nodes?

            loc_dict = interfaces.get_for_resource(resource_dict['id'])

            #  The multilang fields
            loc_resource_mapping = {
                'name': (distribution, DCT.title),
                'description': (distribution, DCT.description),
            }
            self._add_multilang_values(loc_dict, loc_resource_mapping)