def _parse_contact(self, dataset_dict, dataset_ref, predicate, prefix,
                       extras_only):
        """ Adds a Contact with name, email and url from the graph to the
        dataset dict. If extras_only is True, all items are stored in extras. Otherwise,
        name and email are stored as top-level dict entries in the dataset."""
        node = self._object(dataset_ref, predicate)

        if node:
            contacttype = self._object(node, RDF.type)
            if contacttype in [FOAF.Agent, FOAF.Person, FOAF.Organization]:
                name = self._object_value(node, FOAF.name)
                email = self._object_value(node, FOAF.mbox)
                url = self._object_value(node, FOAF.homepage)
                dct_type = self._object_value(node, DCT.type)
                ctype_string = "Person" if contacttype == FOAF.Person else "Organization"

                # if the contact has items on top-level, the name has no _name suffix
                name_key = prefix + "_name" if extras_only else prefix

                ds_utils.insert(dataset_dict, name_key, name, extras_only)
                ds_utils.insert(dataset_dict, prefix + "_email",
                                self._without_mailto(email), extras_only)
                ds_utils.insert(dataset_dict, prefix + "_url", url, True)
                ds_utils.insert(dataset_dict, prefix + "_type", dct_type, True)
                ds_utils.insert(dataset_dict, prefix + "_contacttype",
                                ctype_string, True)
Beispiel #2
0
    def parse_dataset(self, dataset_dict, dataset_ref):
        """ Transforms DCAT-AP.de-Data to CKAN-Dictionary """
        # Manage different versions of DCATDE namespaces first.
        # Ensure that they are ordered from oldest to newest version, such that older values get overwritten
        # in case of multiple definitions
        dcatde_versions = [DCATDE_1_0, DCATDE]

        # geocodingText and legalbasisText got renamed, so handle them separately
        for key, predicate, in (
            ('legalbasisText', DCATDE_1_0.legalbasisText),
            ('geocodingText', DCATDE_1_0.geocodingText),
            ('legalbasisText', DCATDE.legalBasis),
            ('geocodingText', DCATDE.geocodingDescription),
        ):
            values = self._object_value_list(dataset_ref, predicate)
            if values:
                ds_utils.set_extras_field(dataset_dict, key,
                                          json.dumps(values))

        # iterate over all namespaces to import as much as possible
        for dcatde_namespace in dcatde_versions:
            # Simple additional fields
            for key, predicate in (
                ('qualityProcessURI', dcatde_namespace.qualityProcessURI),
                ('politicalGeocodingLevelURI',
                 dcatde_namespace.politicalGeocodingLevelURI),
            ):
                value = self._object_value(dataset_ref, predicate)
                if value:
                    ds_utils.set_extras_field(dataset_dict, key, value)

            # List fields
            for key, predicate, in (
                ('contributorID', dcatde_namespace.contributorID),
                ('politicalGeocodingURI',
                 dcatde_namespace.politicalGeocodingURI),
            ):
                values = self._object_value_list(dataset_ref, predicate)
                if values:
                    ds_utils.set_extras_field(dataset_dict, key,
                                              json.dumps(values))

            self._parse_contact(dataset_dict, dataset_ref,
                                dcatde_namespace.originator, 'originator',
                                True)
            self._parse_contact(dataset_dict, dataset_ref,
                                dcatde_namespace.maintainer, 'maintainer',
                                False)

            # Add additional distribution fields
            for distribution in self.g.objects(dataset_ref, DCAT.distribution):
                for resource_dict in dataset_dict.get('resources', []):
                    # Match distribution in graph and distribution in ckan-dict
                    if unicode(distribution) == resource_uri(resource_dict):
                        for key, predicate in (
                            ('licenseAttributionByText',
                             dcatde_namespace.licenseAttributionByText),
                            ('plannedAvailability',
                             dcatde_namespace.plannedAvailability)):
                            value = self._object_value(distribution, predicate)
                            if value:
                                ds_utils.insert_resource_extra(
                                    resource_dict, key, value)
        # -- end loop over dcatde namespaces --

        # additions in other namespaces than DCATDE
        self._parse_contact(dataset_dict, dataset_ref, DCT.contributor,
                            'contributor', True)
        self._parse_contact(dataset_dict, dataset_ref, DCT.creator, 'author',
                            False)

        # dcat:landingPage
        landing_page = self._object_value(dataset_ref, DCAT.landingPage)
        if landing_page:
            ds_utils.set_extras_field(dataset_dict, 'metadata_original_html',
                                      landing_page)

        # dcat:contactPoint
        # TODO: dcat-ap adds the values to extras.contact_... . Maybe better than maintainer?
        contact = self._object(dataset_ref, DCAT.contactPoint)
        self._add_maintainer_field(dataset_dict, contact, 'url', VCARD.hasURL)

        contact_tel = self._object_value(contact, VCARD.hasTelephone)
        if contact_tel:
            ds_utils.insert(dataset_dict, 'maintainer_tel',
                            self._without_tel(contact_tel), True)

        self._add_maintainer_field(dataset_dict, contact, 'street',
                                   VCARD.hasStreetAddress)
        self._add_maintainer_field(dataset_dict, contact, 'city',
                                   VCARD.hasLocality)
        self._add_maintainer_field(dataset_dict, contact, 'zip',
                                   VCARD.hasPostalCode)
        self._add_maintainer_field(dataset_dict, contact, 'country',
                                   VCARD.hasCountryName)

        # Groups
        groups = self._get_dataset_value(dataset_dict, 'groups')

        if not groups:
            groups = []

        for obj in self.g.objects(dataset_ref, DCAT.theme):
            current_theme = unicode(obj)

            if current_theme.startswith(dcat_theme_prefix):
                group = current_theme.replace(dcat_theme_prefix, '').lower()
                groups.append({'id': group, 'name': group})

        dataset_dict['groups'] = groups

        return dataset_dict
Beispiel #3
0
 def _add_maintainer_field(self, dataset_dict, contact, field, _type):
     contact_item = self._object_value(contact, _type)
     ds_utils.insert(dataset_dict, 'maintainer_' + field, contact_item,
                     True)
Beispiel #4
0
    def parse_dataset(self, dataset_dict, dataset_ref):
        """ Transforms DCAT-AP.de-Data to CKAN-Dictionary """

        # Simple additional fields
        for key, predicate in (
               ('qualityProcessURI', DCATDE.qualityProcessURI),
               ('metadata_original_html', DCAT.landingPage),
               ('politicalGeocodingLevelURI', DCATDE.politicalGeocodingLevelURI),
               ):
            value = self._object_value(dataset_ref, predicate)
            if value:
                ds_utils.insert_new_extras_field(dataset_dict, key, value)

        # List fields
        for key, predicate, in (
               ('contributorID', DCATDE.contributorID),
               ('politicalGeocodingURI', DCATDE.politicalGeocodingURI),
               ('legalbasisText', DCATDE.legalbasisText),
               ('geocodingText', DCATDE.geocodingText),
               ):
            values = self._object_value_list(dataset_ref, predicate)
            if values:
                ds_utils.insert_new_extras_field(dataset_dict, key, json.dumps(values))

        self._parse_contact(dataset_dict, dataset_ref, DCATDE.originator, 'originator', True)
        self._parse_contact(dataset_dict, dataset_ref, DCATDE.maintainer, 'maintainer', False)
        self._parse_contact(dataset_dict, dataset_ref, DCT.contributor, 'contributor', True)
        self._parse_contact(dataset_dict, dataset_ref, DCT.creator, 'author', False)

        # dcat:contactPoint
        # TODO: dcat-ap adds the values to extras.contact_... . Maybe better than maintainer?
        contact = self._object(dataset_ref, DCAT.contactPoint)
        self._add_maintainer_field(dataset_dict, contact, 'url', VCARD.hasURL)

        contact_tel = self._object_value(contact, VCARD.hasTelephone)
        if contact_tel:
            ds_utils.insert(dataset_dict, 'maintainer_tel', self._without_tel(contact_tel), True)

        self._add_maintainer_field(dataset_dict, contact, 'street', VCARD.hasStreetAddress)
        self._add_maintainer_field(dataset_dict, contact, 'city', VCARD.hasLocality)
        self._add_maintainer_field(dataset_dict, contact, 'zip', VCARD.hasPostalCode)
        self._add_maintainer_field(dataset_dict, contact, 'country', VCARD.hasCountryName)

        # Groups
        groups = self._get_dataset_value(dataset_dict, 'groups')

        if not groups:
            groups = []

        for obj in self.g.objects(dataset_ref, DCAT.theme):
            current_theme = unicode(obj)

            if current_theme.startswith(dcat_theme_prefix):
                group = current_theme.replace(dcat_theme_prefix, '').lower()
                groups.append({'id': group, 'name': group})

        dataset_dict['groups'] = groups

        # Add additional distribution fields
        for distribution in self.g.objects(dataset_ref, DCAT.distribution):
            for resource_dict in dataset_dict.get('resources', []):
                # Match distribution in graph and distribution in ckan-dict
                if unicode(distribution) == resource_uri(resource_dict):
                    for key, predicate in (
                            ('licenseAttributionByText', DCATDE.licenseAttributionByText),
                            ('plannedAvailability', DCATDE.plannedAvailability)
                    ):
                        value = self._object_value(distribution, predicate)
                        if value:
                            ds_utils.insert_resource_extra(resource_dict, key, value)

        return dataset_dict
    def _parse_contact_vcard(self, dataset_dict, dataset_ref, predicate,
                             prefix):
        """ Adds a Contact of type VCARD from the graph to the dataset dict.
        All items are stored in the extras dict of the dataset with the given prefix."""

        contact = self._object(dataset_ref, predicate)
        contact_url = self._get_vcard_property_value(contact, VCARD.hasURL)
        ds_utils.insert(dataset_dict, prefix + '_url', contact_url, True)

        contact_tel = self._get_vcard_property_value(contact,
                                                     VCARD.hasTelephone)
        ds_utils.insert(dataset_dict, prefix + '_tel',
                        self._without_tel(contact_tel), True)

        # If hasAddress object contains, use it to read address values from there
        obj_with_address_values = contact
        address = self._object(contact, VCARD.hasAddress)
        if address:
            obj_with_address_values = address
        contact_street = self._get_vcard_property_value(
            obj_with_address_values, VCARD.hasStreetAddress,
            VCARD['street-address'])
        ds_utils.insert(dataset_dict, prefix + '_street', contact_street, True)
        contact_city = self._get_vcard_property_value(obj_with_address_values,
                                                      VCARD.hasLocality,
                                                      VCARD.locality)
        ds_utils.insert(dataset_dict, prefix + '_city', contact_city, True)
        contact_zip = self._get_vcard_property_value(obj_with_address_values,
                                                     VCARD.hasPostalCode,
                                                     VCARD['postal-code'])
        ds_utils.insert(dataset_dict, prefix + '_zip', contact_zip, True)
        contact_country = self._get_vcard_property_value(
            obj_with_address_values, VCARD.hasCountryName,
            VCARD['country-name'])
        ds_utils.insert(dataset_dict, prefix + '_country', contact_country,
                        True)