def test_dcat_to_ckan(self):
        dcat_dict = self._get_file_as_dict('dataset.json')
        expected_ckan_dict = self._get_file_as_dict('ckan_dataset.json')

        ckan_dict = converters.dcat_to_ckan(dcat_dict)

        assert ckan_dict == expected_ckan_dict, self._poor_mans_dict_diff(
            expected_ckan_dict, ckan_dict)
Example #2
0
    def _get_package_dict(self, harvest_object):

        content = harvest_object.content

        dcat_dict = json.loads(content)

        package_dict = converters.dcat_to_ckan(dcat_dict)

        return package_dict, dcat_dict
Example #3
0
    def _get_package_dict(self, harvest_object):

        content = harvest_object.content

        dcat_dict = json.loads(content)

        package_dict = converters.dcat_to_ckan(dcat_dict)

        return package_dict, dcat_dict
Example #4
0
    def _get_package_dict(self, harvest_object):

        content = harvest_object.content

        dcat_dict = json.loads(content)

        vocabulary = self._get_object_extra(harvest_object, 'vocabulary')
        package_dict = converters.dcat_to_ckan(dcat_dict, vocabulary)

        return package_dict, dcat_dict
Example #5
0
    def _get_package_dict(self, harvest_object):

        content = harvest_object.content

        dataset = formats.xml.DCATDataset(content)
        dcat_dict = dataset.read_values()

        package_dict = converters.dcat_to_ckan(dcat_dict)

        return package_dict, dcat_dict
Example #6
0
    def _get_package_dict(self, harvest_object):

        content = harvest_object.content

        dcat_dict = json.loads(content)

        vocabulary = self._get_object_extra(harvest_object, 'vocabulary')
        package_dict = converters.dcat_to_ckan(dcat_dict, vocabulary)

        return package_dict, dcat_dict
Example #7
0
class DCATXMLHarvester(DCATHarvester):

    DCAT_NS = 'http://www.w3.org/ns/dcat#'
    DCT_NS = 'http://purl.org/dc/terms/'
    RDF_NS = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'

    def info(self):
        return {
            'name':
            'dcat_xml',
            'title':
            'DCAT XML-RDF Harvester',
            'description':
            'Harvester for DCAT dataset descriptions serialized as XML-RDF'
        }

    def _get_guids_and_datasets(self, content):

        doc = etree.fromstring(content)

        for dataset_element in doc.xpath('//dcat:Dataset',
                                         namespaces={'dcat': self.DCAT_NS}):

            as_string = etree.tostring(dataset_element)

            # Get identifier
            guid = dataset_element.get('{{{ns}}}about'.format(ns=self.RDF_NS))
            if not guid:
                id_element = dataset_element.find(
                    '{{{ns}}}identifier'.format(ns=self.DCT_NS))
                if id_element:
                    guid = id_element.strip()
                else:
                    # This is bad, any ideas welcomed
                    guid = sha1(as_string).hexdigest()

            yield guid, as_string

    def _get_package_dict(self, harvest_object):

        content = harvest_object.content

        try:
            dataset = formats.xml.DCATDataset(content)
        except ValueError, e:
            self._save_object_error(
                'Content does not look like dcat:Dataset for harvest object {0}'
                .format(harvest_object.id), harvest_object, 'Import')
            return None, None
        dcat_dict = dataset.read_values()

        package_dict = converters.dcat_to_ckan(dcat_dict)

        return package_dict, dcat_dict
Example #8
0
    def test_dcat_to_ckan(self):
        dcat_dict = self._get_file_as_dict('dataset.json')
        expected_ckan_dict = self._get_file_as_dict('ckan_dataset.json')

        # Pop CKAN specific fields
        expected_ckan_dict.pop('id', None)
        expected_ckan_dict['resources'][0].pop('id', None)
        expected_ckan_dict['resources'][0].pop('package_id', None)

        ckan_dict = converters.dcat_to_ckan(dcat_dict)

        assert ckan_dict == expected_ckan_dict, self._poor_mans_dict_diff(
            expected_ckan_dict, ckan_dict)
    def test_dcat_to_ckan(self):
        dcat_dict = self._get_file_as_dict('dataset.json')
        expected_ckan_dict = self._get_file_as_dict('ckan_dataset.json')

        # Pop CKAN specific fields
        expected_ckan_dict.pop('id', None)
        expected_ckan_dict['resources'][0].pop('id', None)
        expected_ckan_dict['resources'][0].pop('package_id', None)

        ckan_dict = converters.dcat_to_ckan(dcat_dict)

        assert ckan_dict == expected_ckan_dict, self._poor_mans_dict_diff(
            expected_ckan_dict, ckan_dict)
Example #10
0
    def _get_package_dict(self, harvest_object):

        content = harvest_object.content

        try:
            dataset = formats.xml.DCATDataset(content)
        except ValueError:
            msg = "Content does not look like dcat:Dataset for harvest object {0}".format(harvest_object.id)
            self._save_object_error(msg, harvest_object, "Import")
            return None, None
        dcat_dict = dataset.read_values()

        package_dict = converters.dcat_to_ckan(dcat_dict)

        return package_dict, dcat_dict