def test_dataset_json_ld_1(self):

        contents = self._get_file_contents('catalog_pod.jsonld')

        p = RDFParser(profiles=['euro_dcat_ap'])

        p.parse(contents, _format='json-ld')

        datasets = [d for d in p.datasets()]

        eq_(len(datasets), 1)

        dataset = datasets[0]
        extras = dict((e['key'], e['value']) for e in dataset['extras'])

        eq_(dataset['title'], 'U.S. Widget Manufacturing Statistics')

        eq_(extras['contact_name'], 'Jane Doe')
        # mailto gets removed for storage and is added again on output
        eq_(extras['contact_email'], '*****@*****.**')
        eq_(extras['publisher_name'], 'Widget Services')
        eq_(extras['publisher_email'], '*****@*****.**')

        eq_(len(dataset['resources']), 4)

        resource = [r for r in dataset['resources'] if r['name'] == 'widgets.csv'][0]
        eq_(resource['name'], u'widgets.csv')
        eq_(resource['url'], u'https://data.agency.gov/datasets/widgets-statistics/widgets.csv')
        eq_(resource['download_url'], u'https://data.agency.gov/datasets/widgets-statistics/widgets.csv')
Example #2
0
    def _build_and_parse_format_mediatype_graph(self,
                                                format_item=None,
                                                mediatype_item=None):
        g = Graph()

        dataset = URIRef("http://example.org/datasets/1")
        g.add((dataset, RDF.type, self.DCAT.Dataset))

        distribution = URIRef("http://example.org/datasets/1/ds/1")
        g.add((dataset, self.DCAT.distribution, distribution))
        g.add((distribution, RDF.type, self.DCAT.Distribution))
        if format_item:
            g.add((distribution, self.DCT['format'], format_item))
        if mediatype_item:
            g.add((distribution, self.DCAT.mediaType, mediatype_item))
        if format_item is None and mediatype_item is None:
            raise AssertionError(
                'At least one of format or mediaType is required!')

        p = RDFParser(profiles=['euro_dcat_ap', 'dcatap_de'])

        p.g = g

        dataset = [d for d in p.datasets()][0]
        return dataset.get('resources')
Example #3
0
    def test_subthemes(self):

        load_themes()

        subthemes = [{
            'theme':
            'AGRI',
            'subthemes': [
                'http://eurovoc.europa.eu/100253',
                'http://eurovoc.europa.eu/100258'
            ]
        }, {
            'theme': 'ENVI',
            'subthemes': []
        }]

        dataset = {
            'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name': 'test-dataset',
            'title': 'Dataset di test DCAT_AP-IT',
            'notes': 'dcatapit dataset di test',
            'metadata_created': '2015-06-26T15:21:09.034694',
            'metadata_modified': '2015-06-26T15:21:09.075774',
            'tags': [{
                'name': 'Tag 1'
            }, {
                'name': 'Tag 2'
            }],
            'issued': '2016-11-29',
            'modified': '2016-11-29',
            'frequency': 'UPDATE_CONT',
            'publisher_name': 'bolzano',
            'publisher_identifier': '234234234',
            'creator_name': 'test',
            'creator_identifier': '412946129',
            'holder_name': 'bolzano',
            'holder_identifier': '234234234',
            'alternate_identifier': 'ISBN,TEST',
            'theme': json.dumps(subthemes),
        }

        s = RDFSerializer()
        p = RDFParser(profiles=['euro_dcat_ap', 'it_dcat_ap'])

        serialized = s.serialize_dataset(dataset)

        p.parse(serialized)
        datasets = list(p.datasets())

        assert len(datasets) == 1
        d = datasets[0]
        themes = json.loads(dataset['theme'])
        assert (len(themes) == len(subthemes) == 2)
        for t in themes:
            if t['theme'] == 'ENVI':
                assert t['subthemes'] == []
            elif t['theme'] == 'AGRI':
                assert set(t['subthemes']) == set(subthemes[0]['subthemes'])
            else:
                assert False, "Unknown theme: {}".format(t)
    def test_distribution_format_format_normalized(self):
        g = Graph()

        dataset1 = URIRef("http://example.org/datasets/1")
        g.add((dataset1, RDF.type, DCAT.Dataset))

        distribution1_1 = URIRef("http://example.org/datasets/1/ds/1")
        g.add((distribution1_1, RDF.type, DCAT.Distribution))
        g.add((distribution1_1, DCAT.mediaType, Literal("text/csv")))
        g.add((distribution1_1, DCT["format"], Literal("Comma Separated Values")))
        g.add((dataset1, DCAT.distribution, distribution1_1))

        p = RDFParser(profiles=["euro_dcat_ap"])

        p.g = g

        datasets = [d for d in p.datasets()]

        resource = datasets[0]["resources"][0]

        if toolkit.check_ckan_version(min_version="2.3"):
            eq_(resource["format"], u"CSV")
            eq_(resource["mimetype"], u"text/csv")
        else:
            eq_(resource["format"], u"Comma Separated Values")
    def test_dataset_access_rights_and_distribution_rights_rights_statement_uriref(
            self):
        g = Graph()

        dataset_ref = URIRef("http://example.org/datasets/1")
        g.add((dataset_ref, RDF.type, DCAT.Dataset))

        # access_rights
        access_rights = BNode()
        g.add((access_rights, RDF.type, DCT.RightsStatement))
        g.add((access_rights, RDFS.label,
               URIRef("http://example.org/datasets/1/ds/3")))
        g.add((dataset_ref, DCT.accessRights, access_rights))
        # rights
        rights = BNode()
        g.add((rights, RDF.type, DCT.RightsStatement))
        g.add(
            (rights, RDFS.label, URIRef("http://example.org/datasets/1/ds/2")))
        distribution = URIRef("http://example.org/datasets/1/ds/1")
        g.add((dataset_ref, DCAT.distribution, distribution))
        g.add((distribution, RDF.type, DCAT.Distribution))
        g.add((distribution, DCT.rights, rights))

        p = RDFParser(profiles=['euro_dcat_ap'])

        p.g = g

        dataset = [d for d in p.datasets()][0]
        extras = self._extras(dataset)
        assert extras['access_rights'] == 'http://example.org/datasets/1/ds/3'
        resource = dataset['resources'][0]
        assert resource['rights'] == 'http://example.org/datasets/1/ds/2'
    def test_dataset_json_ld_with_at_graph(self):

        contents = self._get_file_contents('catalog_with_at_graph.jsonld')

        p = RDFParser(profiles=['euro_dcat_ap'])

        p.parse(contents, _format='json-ld')

        datasets = [d for d in p.datasets()]

        eq_(len(datasets), 1)

        dataset = datasets[0]
        extras = dict((e['key'], e['value']) for e in dataset['extras'])

        eq_(dataset['title'], 'Title dataset')

        eq_(extras['contact_name'], 'Jane Doe')
        # mailto gets removed for storage and is added again on output
        eq_(extras['contact_email'], '*****@*****.**')

        eq_(len(dataset['resources']), 1)

        resource = dataset['resources'][0]
        eq_(resource['name'], u'download.zip')
        eq_(resource['url'], u'http://example2.org/files/download.zip')
        eq_(
            resource['access_url'],
            u'https://ckan.example.org/dataset/d4ce4e6e-ab89-44cb-bf5c-33a162c234de/resource/a289c289-55c9-410f-b4c7-f88e5f6f4e47'
        )
        eq_(resource['download_url'],
            u'http://example2.org/files/download.zip')
    def test_dataset_access_rights_and_distribution_rights_rights_statement_literal(
            self):
        # license_id retrieved from the URI of dcat:license object
        g = Graph()

        dataset_ref = URIRef("http://example.org/datasets/1")
        g.add((dataset_ref, RDF.type, DCAT.Dataset))

        # access_rights
        access_rights = BNode()
        g.add((access_rights, RDF.type, DCT.RightsStatement))
        g.add((access_rights, RDFS.label, Literal('public dataset')))
        g.add((dataset_ref, DCT.accessRights, access_rights))
        # rights
        rights = BNode()
        g.add((rights, RDF.type, DCT.RightsStatement))
        g.add((rights, RDFS.label, Literal('public distribution')))
        distribution = URIRef("http://example.org/datasets/1/ds/1")
        g.add((dataset_ref, DCAT.distribution, distribution))
        g.add((distribution, RDF.type, DCAT.Distribution))
        g.add((distribution, DCT.rights, rights))

        p = RDFParser(profiles=['euro_dcat_ap'])

        p.g = g

        dataset = [d for d in p.datasets()][0]
        extras = self._extras(dataset)
        assert extras['access_rights'] == 'public dataset'
        resource = dataset['resources'][0]
        assert resource['rights'] == 'public distribution'
    def test_spatial_one_dct_spatial_instance(self):
        g = Graph()

        dataset = URIRef('http://example.org/datasets/1')
        g.add((dataset, RDF.type, DCAT.Dataset))

        spatial_uri = URIRef('http://geonames/Newark')
        g.add((dataset, DCT.spatial, spatial_uri))

        g.add((spatial_uri, RDF.type, DCT.Location))
        g.add((spatial_uri, LOCN.geometry,
               Literal('{"type": "Point", "coordinates": [23, 45]}',
                       datatype=GEOJSON_IMT)))
        g.add((spatial_uri, SKOS.prefLabel, Literal('Newark')))

        p = RDFParser(profiles=['euro_dcat_ap'])

        p.g = g

        datasets = [d for d in p.datasets()]

        extras = self._extras(datasets[0])

        assert extras['spatial_uri'] == 'http://geonames/Newark'
        assert extras['spatial_text'] == 'Newark'
        assert extras[
            'spatial'], '{"type": "Point", "coordinates": [23 == 45]}'
Example #9
0
    def test_datasets_none_found(self):

        p = RDFParser()

        p.g = Graph()

        eq_(len([d for d in p.datasets()]), 0)
Example #10
0
    def test_parse_dataset_default_lang_not_in_graph(self):
        maxrdf = self._get_max_rdf()

        p = RDFParser(profiles=['euro_dcat_ap', 'dcatap_de'])

        p.parse(maxrdf)
        self._add_basic_fields_with_languages(p)

        datasets = [d for d in p.datasets()]
        self.assertEqual(len(datasets), 1)
        dataset = datasets[0]

        # Title and description random
        self.assertIn(u'Naturräume Geest und Marsch', dataset.get('title'))
        self.assertIn(
            u'Die Zuordnung des Hamburger Stadtgebietes zu den Naturräumen Geest und Marsch wird dargestellt',
            dataset.get('notes'))
        # Publisher and ContactPoint
        extras = dataset.get('extras')
        self.assertTrue(len(extras) > 0)
        self.assertIn(u'Behörde für Umwelt und Energie (BUE), Amt für Umweltschutz', self._get_value_from_extras(extras, 'publisher_name'))
        self.assertIn(u'Herr Dr. Michael Schröder', self._get_value_from_extras(extras, 'contact_name'))
        # Resources
        resources = dataset.get('resources')
        self.assertEqual(len(resources), 2)
        for res in resources:
            # Title and description random
            self.assertIn(u'Naturräume Geest und Marsch', res.get('name'))
            self.assertIn(
                u'Das ist eine deutsche Beschreibung der Distribution',
                res.get('description'))
Example #11
0
    def test_datasets_none_found(self):

        p = RDFParser()

        p.g = Graph()

        assert len([d for d in p.datasets()]) == 0
    def test_datasets_none_found(self):

        p = RDFParser()

        p.g = Graph()

        eq_(len([d for d in p.datasets()]), 0)
    def test_dataset_compatibility_mode(self):

        contents = self._get_file_contents('dataset.rdf')

        p = RDFParser(profiles=['euro_dcat_ap'], compatibility_mode=True)

        p.parse(contents)

        datasets = [d for d in p.datasets()]

        eq_(len(datasets), 1)

        dataset = datasets[0]

        def _get_extra_value(key):
            v = [
                extra['value'] for extra in dataset['extras']
                if extra['key'] == key
            ]
            return v[0] if v else None

        eq_(_get_extra_value('dcat_issued'), u'2012-05-10')
        eq_(_get_extra_value('dcat_modified'), u'2012-05-10T21:04:00')
        eq_(_get_extra_value('dcat_publisher_name'),
            'Publishing Organization for dataset 1')
        eq_(_get_extra_value('dcat_publisher_email'), '*****@*****.**')
        eq_(_get_extra_value('language'), 'ca,en,es')
    def test_distribution_both_access_and_download_url(self):
        g = Graph()

        dataset1 = URIRef("http://example.org/datasets/1")
        g.add((dataset1, RDF.type, DCAT.Dataset))

        distribution1_1 = URIRef("http://example.org/datasets/1/ds/1")
        g.add((distribution1_1, RDF.type, DCAT.Distribution))
        g.add((distribution1_1, DCAT.accessURL,
               Literal('http://access.url.org')))
        g.add((distribution1_1, DCAT.downloadURL,
               Literal('http://download.url.org')))
        g.add((dataset1, DCAT.distribution, distribution1_1))

        p = RDFParser(profiles=['euro_dcat_ap'])

        p.g = g

        datasets = [d for d in p.datasets()]

        resource = datasets[0]['resources'][0]

        eq_(resource['url'], u'http://download.url.org')
        eq_(resource['download_url'], u'http://download.url.org')
        eq_(resource['access_url'], u'http://access.url.org')
    def test_distribution_format_IMT_field(self):
        g = Graph()

        dataset1 = URIRef("http://example.org/datasets/1")
        g.add((dataset1, RDF.type, DCAT.Dataset))

        distribution1_1 = URIRef("http://example.org/datasets/1/ds/1")

        imt = BNode()

        g.add((imt, RDF.type, DCT.IMT))
        g.add((imt, RDF.value, Literal('text/turtle')))
        g.add((imt, RDFS.label, Literal('Turtle')))

        g.add((distribution1_1, RDF.type, DCAT.Distribution))
        g.add((distribution1_1, DCT['format'], imt))
        g.add((dataset1, DCAT.distribution, distribution1_1))

        p = RDFParser(profiles=['euro_dcat_ap'])

        p.g = g

        datasets = [d for d in p.datasets()]

        resource = datasets[0]['resources'][0]

        eq_(resource['format'], u'Turtle')
        eq_(resource['mimetype'], u'text/turtle')
    def test_spatial_both_geojson_and_wkt(self):
        g = Graph()

        dataset = URIRef('http://example.org/datasets/1')
        g.add((dataset, RDF.type, DCAT.Dataset))

        spatial_uri = URIRef('http://geonames/Newark')
        g.add((dataset, DCT.spatial, spatial_uri))

        g.add((spatial_uri, RDF.type, DCT.Location))
        g.add((spatial_uri, LOCN.geometry,
               Literal('{"type": "Point", "coordinates": [23, 45]}',
                       datatype=GEOJSON_IMT)))
        g.add((spatial_uri, LOCN.geometry,
               Literal('POINT (67 89)', datatype=GSP.wktLiteral)))

        p = RDFParser(profiles=['euro_dcat_ap'])

        p.g = g

        datasets = [d for d in p.datasets()]

        extras = self._extras(datasets[0])

        eq_(extras['spatial'], '{"type": "Point", "coordinates": [23, 45]}')
    def test_distribution_format_format_normalized(self):
        g = Graph()

        dataset1 = URIRef("http://example.org/datasets/1")
        g.add((dataset1, RDF.type, DCAT.Dataset))

        distribution1_1 = URIRef("http://example.org/datasets/1/ds/1")
        g.add((distribution1_1, RDF.type, DCAT.Distribution))
        g.add((distribution1_1, DCAT.mediaType, Literal('text/csv')))
        g.add((distribution1_1, DCT['format'], Literal('Comma Separated Values')))
        g.add((dataset1, DCAT.distribution, distribution1_1))

        p = RDFParser(profiles=['euro_dcat_ap'])

        p.g = g

        datasets = [d for d in p.datasets()]

        resource = datasets[0]['resources'][0]

        if toolkit.check_ckan_version(min_version='2.3'):
            eq_(resource['format'], u'CSV')
            eq_(resource['mimetype'], u'text/csv')
        else:
            eq_(resource['format'], u'Comma Separated Values')
    def test_distribution_format_format_normalized(self):
        g = Graph()

        dataset1 = URIRef("http://example.org/datasets/1")
        g.add((dataset1, RDF.type, DCAT.Dataset))

        distribution1_1 = URIRef("http://example.org/datasets/1/ds/1")
        g.add((distribution1_1, RDF.type, DCAT.Distribution))
        g.add((distribution1_1, DCAT.mediaType, Literal('text/csv')))
        g.add((distribution1_1, DCT['format'], Literal('Comma Separated Values')))
        g.add((dataset1, DCAT.distribution, distribution1_1))

        p = RDFParser(profiles=['euro_dcat_ap'])

        p.g = g

        datasets = [d for d in p.datasets()]

        resource = datasets[0]['resources'][0]

        if toolkit.check_ckan_version(min_version='2.3'):
            eq_(resource['format'], u'CSV')
            eq_(resource['mimetype'], u'text/csv')
        else:
            eq_(resource['format'], u'Comma Separated Values')
Example #19
0
    def test_spatial_wrong_geometries(self):
        g = Graph()

        dataset = URIRef('http://example.org/datasets/1')
        g.add((dataset, RDF.type, DCAT.Dataset))

        spatial_uri = URIRef('http://geonames/Newark')
        g.add((dataset, DCT.spatial, spatial_uri))

        g.add((spatial_uri, RDF.type, DCT.Location))
        g.add((spatial_uri,
               LOCN.geometry,
               Literal('Not GeoJSON', datatype=GEOJSON_IMT)))
        g.add((spatial_uri,
               LOCN.geometry,
               Literal('Not WKT', datatype=GSP.wktLiteral)))

        p = RDFParser(profiles=['euro_dcat_ap'])

        p.g = g

        datasets = [d for d in p.datasets()]

        extras = self._extras(datasets[0])

        assert_true('spatial' not in extras)
Example #20
0
    def test_dataset_ttl(self):

        dataset = factories.Dataset(notes="Test dataset")

        url = url_for("dcat_dataset", _id=dataset["id"], _format="ttl")

        app = self._get_test_app()

        response = app.get(url)

        eq_(response.headers["Content-Type"], "text/turtle")

        content = response.body

        # Parse the contents to check it's an actual serialization
        p = RDFParser()

        p.parse(content, _format="turtle")

        dcat_datasets = [d for d in p.datasets()]

        eq_(len(dcat_datasets), 1)

        dcat_dataset = dcat_datasets[0]

        eq_(dcat_dataset["title"], dataset["title"])
        eq_(dcat_dataset["notes"], dataset["notes"])
    def test_distribution_format_IMT_field(self):
        g = Graph()

        dataset1 = URIRef("http://example.org/datasets/1")
        g.add((dataset1, RDF.type, DCAT.Dataset))

        distribution1_1 = URIRef("http://example.org/datasets/1/ds/1")

        imt = BNode()

        g.add((imt, RDF.type, DCT.IMT))
        g.add((imt, RDF.value, Literal('text/turtle')))
        g.add((imt, RDFS.label, Literal('Turtle')))

        g.add((distribution1_1, RDF.type, DCAT.Distribution))
        g.add((distribution1_1, DCT['format'], imt))
        g.add((dataset1, DCAT.distribution, distribution1_1))

        p = RDFParser(profiles=['euro_dcat_ap'])

        p.g = g

        datasets = [d for d in p.datasets()]

        resource = datasets[0]['resources'][0]

        eq_(resource['format'], u'Turtle')
        eq_(resource['mimetype'], u'text/turtle')
    def test_spatial_multiple_dct_spatial_instances(self):
        g = Graph()

        dataset = URIRef("http://example.org/datasets/1")
        g.add((dataset, RDF.type, DCAT.Dataset))

        spatial_uri = URIRef("http://geonames/Newark")
        g.add((dataset, DCT.spatial, spatial_uri))

        location_ref = BNode()
        g.add((location_ref, RDF.type, DCT.Location))
        g.add((dataset, DCT.spatial, location_ref))
        g.add(
            (location_ref, LOCN.geometry, Literal('{"type": "Point", "coordinates": [23, 45]}', datatype=GEOJSON_IMT))
        )

        location_ref = BNode()
        g.add((location_ref, RDF.type, DCT.Location))
        g.add((dataset, DCT.spatial, location_ref))
        g.add((location_ref, SKOS.prefLabel, Literal("Newark")))

        p = RDFParser(profiles=["euro_dcat_ap"])

        p.g = g

        datasets = [d for d in p.datasets()]

        extras = self._extras(datasets[0])

        eq_(extras["spatial_uri"], "http://geonames/Newark")
        eq_(extras["spatial_text"], "Newark")
        eq_(extras["spatial"], '{"type": "Point", "coordinates": [23, 45]}')
    def test_dataset_json_ld_1(self):

        contents = self._get_file_contents("catalog_pod.jsonld")

        p = RDFParser(profiles=["euro_dcat_ap"])

        p.parse(contents, _format="json-ld")

        datasets = [d for d in p.datasets()]

        eq_(len(datasets), 1)

        dataset = datasets[0]
        extras = dict((e["key"], e["value"]) for e in dataset["extras"])

        eq_(dataset["title"], "U.S. Widget Manufacturing Statistics")

        eq_(extras["contact_name"], "Jane Doe")
        eq_(extras["contact_email"], "mailto:[email protected]")
        eq_(extras["publisher_name"], "Widget Services")
        eq_(extras["publisher_email"], "*****@*****.**")

        eq_(len(dataset["resources"]), 4)

        resource = [r for r in dataset["resources"] if r["name"] == "widgets.csv"][0]
        eq_(resource["name"], u"widgets.csv")
        eq_(resource["url"], u"https://data.agency.gov/datasets/widgets-statistics/widgets.csv")
        eq_(resource["download_url"], u"https://data.agency.gov/datasets/widgets-statistics/widgets.csv")
    def test_dataset_ttl(self):

        dataset = factories.Dataset(
            notes='Test dataset'
        )

        url = url_for('dcat_dataset', _id=dataset['id'], _format='ttl')

        app = self._get_test_app()

        response = app.get(url)

        eq_(response.headers['Content-Type'], 'text/turtle')

        content = response.body

        # Parse the contents to check it's an actual serialization
        p = RDFParser()

        p.parse(content, _format='turtle')

        dcat_datasets = [d for d in p.datasets()]

        eq_(len(dcat_datasets), 1)

        dcat_dataset = dcat_datasets[0]

        eq_(dcat_dataset['title'], dataset['title'])
        eq_(dcat_dataset['notes'], dataset['notes'])
Example #25
0
    def test__datasets(self):

        p = RDFParser()

        p.g = _default_graph()

        assert len([d for d in p._datasets()]) == 3
    def test_catalog_modified_date(self):

        dataset1 = factories.Dataset(title='First dataset')
        time.sleep(1)
        dataset2 = factories.Dataset(title='Second dataset')

        url = url_for('dcat_catalog',
                      _format='ttl',
                      modified_since=dataset2['metadata_modified'])

        app = self._get_test_app()

        response = app.get(url)

        content = response.body

        p = RDFParser()

        p.parse(content, _format='turtle')

        dcat_datasets = [d for d in p.datasets()]

        eq_(len(dcat_datasets), 1)

        eq_(dcat_datasets[0]['title'], dataset2['title'])
    def test_spatial_one_dct_spatial_instance_no_uri(self):
        g = Graph()

        dataset = URIRef('http://example.org/datasets/1')
        g.add((dataset, RDF.type, DCAT.Dataset))

        location_ref = BNode()
        g.add((dataset, DCT.spatial, location_ref))

        g.add((location_ref, RDF.type, DCT.Location))
        g.add((location_ref,
               LOCN.geometry,
               Literal('{"type": "Point", "coordinates": [23, 45]}', datatype=GEOJSON_IMT)))
        g.add((location_ref, SKOS.prefLabel, Literal('Newark')))

        p = RDFParser(profiles=['euro_dcat_ap'])

        p.g = g

        datasets = [d for d in p.datasets()]

        extras = self._extras(datasets[0])

        assert_true('spatial_uri' not in extras)
        eq_(extras['spatial_text'], 'Newark')
        eq_(extras['spatial'], '{"type": "Point", "coordinates": [23, 45]}')
    def test_spatial_both_geojson_and_wkt(self):
        g = Graph()

        dataset = URIRef('http://example.org/datasets/1')
        g.add((dataset, RDF.type, DCAT.Dataset))

        spatial_uri = URIRef('http://geonames/Newark')
        g.add((dataset, DCT.spatial, spatial_uri))

        g.add((spatial_uri, RDF.type, DCT.Location))
        g.add((spatial_uri,
               LOCN.geometry,
               Literal('{"type": "Point", "coordinates": [23, 45]}', datatype=GEOJSON_IMT)))
        g.add((spatial_uri,
               LOCN.geometry,
               Literal('POINT (67 89)', datatype=GSP.wktLiteral)))

        p = RDFParser(profiles=['euro_dcat_ap'])

        p.g = g

        datasets = [d for d in p.datasets()]

        extras = self._extras(datasets[0])

        eq_(extras['spatial'], '{"type": "Point", "coordinates": [23, 45]}')
Example #29
0
    def test_parse_dataset_default_lang_de(self):
        maxrdf = self._get_max_rdf()

        p = RDFParser(profiles=['euro_dcat_ap', 'dcatap_de'])

        p.parse(maxrdf)
        self._add_basic_fields_with_languages(p)

        datasets = [d for d in p.datasets()]
        self.assertEqual(len(datasets), 1)
        dataset = datasets[0]

        # Title and description to be in default language "de"
        self.assertEqual(dataset.get('title'),
                         u'Naturräume Geest und Marsch (DE)')
        self.assertEqual(
            dataset.get('notes'),
            u'Die Zuordnung des Hamburger Stadtgebietes zu den Naturräumen Geest und Marsch wird dargestellt. (DE)'
        )
        # Publisher and ContactPoint
        extras = dataset.get('extras')
        self.assertTrue(len(extras) > 0)
        self._assert_extras_string(
            extras, 'publisher_name',
            u'Behörde für Umwelt und Energie (BUE), Amt für Umweltschutz (DE)')
        self._assert_extras_string(extras, 'contact_name',
                                   u'Herr Dr. Michael Schröder (DE)')
        # Resources
        self._assert_resource_lang(dataset, 'DE')
Example #30
0
    def test__datasets(self):

        p = RDFParser()

        p.g = _default_graph()

        eq_(len([d for d in p._datasets()]), 3)
Example #31
0
    def test_publisher(self):

        with open(get_example_file('catalog_dati_unibo.rdf'), 'r') as f:
            contents = f.read()

        p = RDFParser(profiles=['it_dcat_ap'])

        p.parse(contents)
        g = p.g

        datasets = [d for d in p.datasets()]
        assert (len(datasets) > 1)
        for d in datasets:
            did = d['identifier']
            pname = d.get('publisher_name')
            pid = d.get('publisher_identifier')
            dat_ref = list(g.subjects(DCT.identifier, Literal(did)))[0]
            pub_ref = g.value(dat_ref, DCT.publisher)
            pubnames = list(g.objects(pub_ref, FOAF.name))
            if not pubnames:
                assert pname is None and pid is None,\
                    'Got {}/{} for publisher, when no ref in graph'.format(pname, pid)
            else:
                assert pname and pid, 'no pname {} and pid {} for {}'.format(
                    pname, pid, pubnames)

                lang_hit = False
                for lname in pubnames:
                    if hasattr(lname, 'lang'):
                        if lname.lang and lname.lang == DEFAULT_LANG:
                            lang_hit = pname == lname.value
                    else:
                        if not lang_hit:
                            lang_hit = pname == lname.value
                assert lang_hit, 'There should be lang hit'
    def test_dataset_turtle_1(self):

        contents = self._get_file_contents('dataset_deri.ttl')

        p = RDFParser(profiles=['euro_dcat_ap'])

        p.parse(contents, _format='n3')

        datasets = [d for d in p.datasets()]

        eq_(len(datasets), 1)

        dataset = datasets[0]

        eq_(dataset['title'], 'Abandoned Vehicles')
        eq_(len(dataset['resources']), 1)

        resource = dataset['resources'][0]
        eq_(resource['name'], u'CSV distribution of: Abandoned Vehicles')
        eq_(
            resource['url'],
            u'http://data.london.gov.uk/datafiles/environment/abandoned-vehicles-borough.csv'
        )
        eq_(resource['uri'],
            u'http://data.london.gov.uk/dataset/Abandoned_Vehicles/csv')
    def test_dataset_json_ld_1(self):

        contents = self._get_file_contents('catalog_pod.jsonld')

        p = RDFParser(profiles=['euro_dcat_ap'])

        p.parse(contents, _format='json-ld')

        datasets = [d for d in p.datasets()]

        eq_(len(datasets), 1)

        dataset = datasets[0]
        extras = dict((e['key'], e['value']) for e in dataset['extras'])

        eq_(dataset['title'], 'U.S. Widget Manufacturing Statistics')

        eq_(extras['contact_name'], 'Jane Doe')
        eq_(extras['contact_email'], 'mailto:[email protected]')
        eq_(extras['publisher_name'], 'Widget Services')
        eq_(extras['publisher_email'], '*****@*****.**')

        eq_(len(dataset['resources']), 4)

        resource = [r for r in dataset['resources'] if r['name'] == 'widgets.csv'][0]
        eq_(resource['name'], u'widgets.csv')
        eq_(resource['url'], u'https://data.agency.gov/datasets/widgets-statistics/widgets.csv')
        eq_(resource['download_url'], u'https://data.agency.gov/datasets/widgets-statistics/widgets.csv')
    def test_spatial_wrong_geometries(self):
        g = Graph()

        dataset = URIRef('http://example.org/datasets/1')
        g.add((dataset, RDF.type, DCAT.Dataset))

        spatial_uri = URIRef('http://geonames/Newark')
        g.add((dataset, DCT.spatial, spatial_uri))

        g.add((spatial_uri, RDF.type, DCT.Location))
        g.add((spatial_uri,
               LOCN.geometry,
               Literal('Not GeoJSON', datatype=GEOJSON_IMT)))
        g.add((spatial_uri,
               LOCN.geometry,
               Literal('Not WKT', datatype=GSP.wktLiteral)))

        p = RDFParser(profiles=['euro_dcat_ap'])

        p.g = g

        datasets = [d for d in p.datasets()]

        extras = self._extras(datasets[0])

        assert_true('spatial' not in extras)
    def test__datasets(self):

        p = RDFParser()

        p.g = _default_graph()

        eq_(len([d for d in p._datasets()]), 3)
    def test_spatial_one_dct_spatial_instance_no_uri(self):
        g = Graph()

        dataset = URIRef('http://example.org/datasets/1')
        g.add((dataset, RDF.type, DCAT.Dataset))

        location_ref = BNode()
        g.add((dataset, DCT.spatial, location_ref))

        g.add((location_ref, RDF.type, DCT.Location))
        g.add((location_ref, LOCN.geometry,
               Literal('{"type": "Point", "coordinates": [23, 45]}',
                       datatype=GEOJSON_IMT)))
        g.add((location_ref, SKOS.prefLabel, Literal('Newark')))

        p = RDFParser(profiles=['euro_dcat_ap'])

        p.g = g

        datasets = [d for d in p.datasets()]

        extras = self._extras(datasets[0])

        assert_true('spatial_uri' not in extras)
        eq_(extras['spatial_text'], 'Newark')
        eq_(extras['spatial'], '{"type": "Point", "coordinates": [23, 45]}')
    def test_creators(self):

        creators = [{'creator_name': {DEFAULT_LANG: 'abc', 'it': 'abc it'}, 'creator_identifier': "ABC"},
                    {'creator_name': {DEFAULT_LANG: 'cde', 'it': 'cde it'}, 'creator_identifier': "CDE"},
                    ]
        dataset = {
            'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name': 'test-dataset',
            'title': 'Dataset di test DCAT_AP-IT',
            'notes': 'dcatapit dataset di test',
            'metadata_created': '2015-06-26T15:21:09.034694',
            'metadata_modified': '2015-06-26T15:21:09.075774',
            'tags': [{'name': 'Tag 1'}, {'name': 'Tag 2'}],
            'issued':'2016-11-29',
            'modified':'2016-11-29',
            'identifier':'ISBN',
            'temporal_start':'2016-11-01',
            'temporal_end':'2016-11-30',
            'frequency':'UPDATE_CONT',
            'publisher_name':'bolzano',
            'publisher_identifier':'234234234',
            'creator_name':'test',
            'creator_identifier':'412946129',
            'holder_name':'bolzano',
            'holder_identifier':'234234234',
            'alternate_identifier':'ISBN,TEST',
            'theme':'{ECON,ENVI}',
            'geographical_geonames_url':'http://www.geonames.org/3181913',
            'language':'{DEU,ENG,ITA}',
            'is_version_of':'http://dcat.geo-solutions.it/dataset/energia-da-fonti-rinnovabili2',
            'creator': json.dumps(creators)
        }

        s = RDFSerializer()
        p = RDFParser(profiles=['euro_dcat_ap', 'it_dcat_ap'])
        
        serialized = s.serialize_dataset(dataset)

        p.parse(serialized)
        datasets = list(p.datasets())
        
        assert len(datasets) == 1
        d = datasets[0]
        creators.append({'creator_identifier': dataset['creator_identifier'],
                              'creator_name': {DEFAULT_LANG: dataset['creator_name']}})

        creators_dict = dict((v['creator_identifier'], v) for v in creators)

        creators_in = json.loads(d['creator'])

        for c in creators_in:
            assert c['creator_identifier'] in creators_dict.keys(), "no {} key in {}".format(c['creator_identifier'],
                                                                                             creators_dict.keys())
            assert c['creator_name'] == creators_dict[c['creator_identifier']]['creator_name'],\
                "{} vs {}".format(c['creator_name'], creators_dict[c['creator_identifier']]['creator_name'])
        for c in creators_dict.keys():
            assert c in [_c['creator_identifier'] for _c in creators_in]
            cdata = creators_dict[c]
            assert cdata in creators_in
    def test_parse_subcatalog(self):
        publisher = {'name': 'Publisher',
                     'email': '*****@*****.**',
                     'type': 'Publisher',
                     'uri': 'http://pub.lish.er'}
        dataset = {
            'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name': 'test-dataset',
            'title': 'test dataset',
            'extras': [
                {'key': 'source_catalog_title', 'value': 'Subcatalog example'},
                {'key': 'source_catalog_homepage', 'value': 'http://subcatalog.example'},
                {'key': 'source_catalog_description', 'value': 'Subcatalog example description'},
                {'key': 'source_catalog_language', 'value': 'http://publications.europa.eu/resource/authority/language/ITA'},
                {'key': 'source_catalog_modified', 'value': '2000-01-01'},
                {'key': 'source_catalog_publisher', 'value': json.dumps(publisher)}
            ]
        }        
        catalog_dict = {
            'title': 'My Catalog',
            'description': 'An Open Data Catalog',
            'homepage': 'http://example.com',
            'language': 'de',
        }

        s = RDFSerializer()
        s.serialize_catalog(catalog_dict, dataset_dicts=[dataset])
        g = s.g

        p = RDFParser(profiles=['euro_dcat_ap'])

        p.g = g

        # at least one subcatalog with hasPart
        subcatalogs = list(p.g.objects(None, DCT.hasPart))
        assert_true(subcatalogs)

        # at least one dataset in subcatalogs
        subdatasets = []
        for subcatalog in subcatalogs:
            datasets = p.g.objects(subcatalog, DCAT.dataset)
            for dataset in datasets:
                subdatasets.append((dataset,subcatalog,))
        assert_true(subdatasets)
        
        datasets = dict([(d['title'], d) for d in p.datasets()])

        for subdataset, subcatalog in subdatasets:
            title = unicode(list(p.g.objects(subdataset, DCT.title))[0])
            dataset = datasets[title]
            has_subcat = False
            for ex in dataset['extras']:
                exval = ex['value']
                exkey = ex['key']
                if exkey == 'source_catalog_homepage':
                    has_subcat = True
                    eq_(exval, unicode(subcatalog))
            # check if we had subcatalog in extras
            assert_true(has_subcat)
    def test_parse_subcatalog(self):
        publisher = {'name': 'Publisher',
                     'email': '*****@*****.**',
                     'type': 'Publisher',
                     'uri': 'http://pub.lish.er'}
        dataset = {
            'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name': 'test-dataset',
            'title': 'test dataset',
            'extras': [
                {'key': 'source_catalog_title', 'value': 'Subcatalog example'},
                {'key': 'source_catalog_homepage', 'value': 'http://subcatalog.example'},
                {'key': 'source_catalog_description', 'value': 'Subcatalog example description'},
                {'key': 'source_catalog_language', 'value': 'http://publications.europa.eu/resource/authority/language/ITA'},
                {'key': 'source_catalog_modified', 'value': '2000-01-01'},
                {'key': 'source_catalog_publisher', 'value': json.dumps(publisher)}
            ]
        }        
        catalog_dict = {
            'title': 'My Catalog',
            'description': 'An Open Data Catalog',
            'homepage': 'http://example.com',
            'language': 'de',
        }

        s = RDFSerializer()
        s.serialize_catalog(catalog_dict, dataset_dicts=[dataset])
        g = s.g

        p = RDFParser(profiles=['euro_dcat_ap'])

        p.g = g

        # at least one subcatalog with hasPart
        subcatalogs = list(p.g.objects(None, DCT.hasPart))
        assert_true(subcatalogs)

        # at least one dataset in subcatalogs
        subdatasets = []
        for subcatalog in subcatalogs:
            datasets = p.g.objects(subcatalog, DCAT.dataset)
            for dataset in datasets:
                subdatasets.append((dataset,subcatalog,))
        assert_true(subdatasets)
        
        datasets = dict([(d['title'], d) for d in p.datasets()])

        for subdataset, subcatalog in subdatasets:
            title = unicode(list(p.g.objects(subdataset, DCT.title))[0])
            dataset = datasets[title]
            has_subcat = False
            for ex in dataset['extras']:
                exval = ex['value']
                exkey = ex['key']
                if exkey == 'source_catalog_homepage':
                    has_subcat = True
                    eq_(exval, unicode(subcatalog))
            # check if we had subcatalog in extras
            assert_true(has_subcat)
    def test_profiles_are_called_on_datasets(self):

        p = RDFParser()

        p._profiles = [MockRDFProfile1, MockRDFProfile2]

        p.g = _default_graph()

        for dataset in p.datasets():
            assert dataset['profile_1']
            assert dataset['profile_2']
Example #41
0
    def test_catalog(self):

        contents = self._get_file_contents('catalog.xml')

        p = RDFParser(profiles=['swiss_dcat_ap'])

        p.parse(contents)

        datasets = [d for d in p.datasets()]

        eq_(len(datasets), 2)
Example #42
0
    def test_profiles_are_called_on_datasets(self):

        p = RDFParser()

        p._profiles = [MockRDFProfile1, MockRDFProfile2]

        p.g = _default_graph()

        for dataset in p.datasets():
            assert dataset['profile_1']
            assert dataset['profile_2']
    def test_alternate_identifiers(self):

        contents = self._get_file_contents('dataset_identifier.rdf')

        p = RDFParser(profiles=['it_dcat_ap'])
        p.parse(contents)
        g = p.g
        datasets = [d for d in p.datasets()]
        assert len(datasets) == 1
        assert datasets[0]['alternate_identifier'] =='[{"identifier": "ISBN:alt id 123", "agent": {}}]',\
                    datasets[0]['alternate_identifier']
    def test_temporal_coverage(self):

        load_themes()
        temporal_coverage = [{'temporal_start': '2001-01-01T00:00:00', 'temporal_end': '2001-02-01T10:11:12'},
                             {'temporal_start': '2001-01-01T00:00:00', 'temporal_end': '2001-02-01T10:11:12'},
                            ]
        dataset = {
            'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name': 'test-dataset',
            'title': 'Dataset di test DCAT_AP-IT',
            'notes': 'dcatapit dataset di test',
            'metadata_created': '2015-06-26T15:21:09.034694',
            'metadata_modified': '2015-06-26T15:21:09.075774',
            'tags': [{'name': 'Tag 1'}, {'name': 'Tag 2'}],
            'issued':'2016-11-29',
            'modified':'2016-11-29',
            'identifier':'ISBN',
            'temporal_start':'2016-11-01T00:00:00',
            'temporal_end':'2016-11-30T00:00:00',
            'temporal_coverage': json.dumps(temporal_coverage),
            'frequency':'UPDATE_CONT',
            'publisher_name':'bolzano',
            'publisher_identifier':'234234234',
            'creator_name':'test',
            'creator_identifier':'412946129',
            'holder_name':'bolzano',
            'holder_identifier':'234234234',
            'alternate_identifier':'ISBN,TEST',
            'theme':'{ECON,ENVI}',
            'geographical_geonames_url':'http://www.geonames.org/3181913',
            'language':'{DEU,ENG,ITA}',
            'is_version_of':'http://dcat.geo-solutions.it/dataset/energia-da-fonti-rinnovabili2',
        }

        s = RDFSerializer()
        p = RDFParser(profiles=['euro_dcat_ap', 'it_dcat_ap'])
        
        serialized = s.serialize_dataset(dataset)

        p.parse(serialized)
        datasets = list(p.datasets())
        
        assert len(datasets) == 1
        d = datasets[0]

        temporal_coverage.append({'temporal_start': dataset['temporal_start'],
                                  'temporal_end': dataset['temporal_end']})

        try:
            validators.dcatapit_temporal_coverage(d['temporal_coverage'], {})
            # this should not raise exception
            assert True
        except validators.Invalid, err:
            assert False, "Temporal coverage should be valid: {}".format(err)
Example #45
0
    def test_alternate_identifiers(self):

        with open(get_example_file('dataset_identifier.rdf'), 'r') as f:
            contents = f.read()

        p = RDFParser(profiles=['it_dcat_ap'])
        p.parse(contents)

        datasets = [d for d in p.datasets()]
        assert len(datasets) == 1
        assert datasets[0]['alternate_identifier'] == '[{"identifier": "ISBN:alt id 123", "agent": {}}]',\
            datasets[0]['alternate_identifier']
    def test_tags_with_commas(self):
        g = Graph()

        dataset = URIRef('http://example.org/datasets/1')
        g.add((dataset, RDF.type, DCAT.Dataset))
        g.add((dataset, DCAT.keyword, Literal('Tree, forest, shrub')))
        p = RDFParser(profiles=['euro_dcat_ap'])

        p.g = g

        datasets = [d for d in p.datasets()]
        
        eq_(len(datasets[0]['tags']), 3)
    def test_tags_with_commas(self):
        g = Graph()

        dataset = URIRef('http://example.org/datasets/1')
        g.add((dataset, RDF.type, DCAT.Dataset))
        g.add((dataset, DCAT.keyword, Literal('Tree, forest, shrub')))
        p = RDFParser(profiles=['euro_dcat_ap'])

        p.g = g

        datasets = [d for d in p.datasets()]

        assert len(datasets[0]['tags']) == 3
    def test_datasets(self):

        p = RDFParser()

        p.g = _default_graph()

        datasets = []
        for dataset in p.datasets():

            assert 'title' in dataset

            datasets.append(dataset)

        eq_(len(datasets), 3)
    def test_tags_with_commas_clean_tags_on(self):
        g = Graph()

        dataset = URIRef('http://example.org/datasets/1')
        g.add((dataset, RDF.type, DCAT.Dataset))
        g.add((dataset, DCAT.keyword, Literal(self.INVALID_TAG)))
        p = RDFParser(profiles=['euro_dcat_ap'])

        p.g = g

        datasets = [d for d in p.datasets()]

        assert_true(self.VALID_TAG in datasets[0]['tags'])
        assert_true(self.INVALID_TAG not in datasets[0]['tags'])
    def test_tags_with_commas_clean_tags_off(self):
        g = Graph()

        dataset = URIRef('http://example.org/datasets/1')
        g.add((dataset, RDF.type, DCAT.Dataset))
        g.add((dataset, DCAT.keyword, Literal(self.INVALID_TAG)))
        p = RDFParser(profiles=['euro_dcat_ap'])

        p.g = g

        # when config flag is set to false, bad tags can happen
        
        datasets = [d for d in p.datasets()]
        assert_true(self.VALID_TAG not in datasets[0]['tags'])
        assert_true({'name': self.INVALID_TAG} in datasets[0]['tags'])
    def test_dataset_version_adms(self):
        g = Graph()

        dataset1 = URIRef("http://example.org/datasets/1")
        g.add((dataset1, RDF.type, DCAT.Dataset))

        g.add((dataset1, ADMS.version, Literal('2.3a')))

        p = RDFParser(profiles=['euro_dcat_ap'])

        p.g = g

        dataset = [d for d in p.datasets()][0]

        eq_(dataset['version'], u'2.3a')
    def test_parse_without_pagination(self):

        data = '''<?xml version="1.0" encoding="utf-8" ?>
        <rdf:RDF
         xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
         xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#">
        <rdfs:SomeClass rdf:about="http://example.org">
            <rdfs:label>Some label</rdfs:label>
        </rdfs:SomeClass>
        </rdf:RDF>
        '''

        p = RDFParser()

        p.parse(data)

        eq_(p.next_page(), None)
    def test_parse_data_different_format(self):

        data = '''
        @prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
        @prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .

        <http://example.org> a rdfs:SomeClass ;
            rdfs:label "Some label" .
        '''

        p = RDFParser()

        eq_(len(p.g), 0)

        p.parse(data, _format='n3')

        eq_(len(p.g), 2)
    def test_catalog_xml_rdf(self):

        contents = self._get_file_contents("catalog.rdf")

        p = RDFParser(profiles=["euro_dcat_ap"])

        p.parse(contents)

        datasets = [d for d in p.datasets()]

        eq_(len(datasets), 2)

        dataset = datasets[0] if datasets[0]["title"] == "Example dataset 1" else datasets[1]

        eq_(dataset["title"], "Example dataset 1")
        eq_(len(dataset["resources"]), 3)
        eq_(len(dataset["tags"]), 2)
    def test_catalog_xml_rdf(self):

        contents = self._get_file_contents('catalog.rdf')

        p = RDFParser(profiles=['euro_dcat_ap'])

        p.parse(contents)

        datasets = [d for d in p.datasets()]

        eq_(len(datasets), 2)

        dataset = (datasets[0] if datasets[0]['title'] == 'Example dataset 1'
                   else datasets[1])

        eq_(dataset['title'], 'Example dataset 1')
        eq_(len(dataset['resources']), 3)
        eq_(len(dataset['tags']), 2)
    def test_spatial_uri_only(self):
        g = Graph()

        dataset = URIRef("http://example.org/datasets/1")
        g.add((dataset, RDF.type, DCAT.Dataset))

        spatial_uri = URIRef("http://geonames/Newark")
        g.add((dataset, DCT.spatial, spatial_uri))
        p = RDFParser(profiles=["euro_dcat_ap"])

        p.g = g

        datasets = [d for d in p.datasets()]

        extras = self._extras(datasets[0])

        eq_(extras["spatial_uri"], "http://geonames/Newark")
        assert_true("spatial_text" not in extras)
        assert_true("spatial" not in extras)
    def test_spatial_uri_only(self):
        g = Graph()

        dataset = URIRef('http://example.org/datasets/1')
        g.add((dataset, RDF.type, DCAT.Dataset))

        spatial_uri = URIRef('http://geonames/Newark')
        g.add((dataset, DCT.spatial, spatial_uri))
        p = RDFParser(profiles=['euro_dcat_ap'])

        p.g = g

        datasets = [d for d in p.datasets()]

        extras = self._extras(datasets[0])

        eq_(extras['spatial_uri'], 'http://geonames/Newark')
        assert_true('spatial_text' not in extras)
        assert_true('spatial' not in extras)
    def test_dataset_license_from_distribution_by_uri(self):
        # license_id retrieved from the URI of dcat:license object
        g = Graph()

        dataset = URIRef("http://example.org/datasets/1")
        g.add((dataset, RDF.type, DCAT.Dataset))

        distribution = URIRef("http://example.org/datasets/1/ds/1")
        g.add((dataset, DCAT.distribution, distribution))
        g.add((distribution, RDF.type, DCAT.Distribution))
        g.add((distribution, DCT.license,
               URIRef("http://www.opendefinition.org/licenses/cc-by")))

        p = RDFParser(profiles=['euro_dcat_ap'])

        p.g = g

        dataset = [d for d in p.datasets()][0]
        eq_(dataset['license_id'], 'cc-by')
Example #59
0
    def parse_chunk(self, harvest_job, content, rdf_format, guids_in_source, object_ids):
        # TODO: store content?
        for harvester in p.PluginImplementations(IDCATRDFHarvester):
            content, after_download_errors = harvester.after_download(content, harvest_job)

            for error_msg in after_download_errors:
                self._save_gather_error(error_msg, harvest_job)

        if not content:
            return False

        # TODO: profiles conf
        parser = RDFParser()

        try:
            parser.parse(content, _format=rdf_format)
        except RDFParserException, e:
            self._save_gather_error('Error parsing the RDF file: {0}'.format(e), harvest_job)
            return False
    def test_distribution_format_format_only(self):
        g = Graph()

        dataset1 = URIRef("http://example.org/datasets/1")
        g.add((dataset1, RDF.type, DCAT.Dataset))

        distribution1_1 = URIRef("http://example.org/datasets/1/ds/1")
        g.add((distribution1_1, RDF.type, DCAT.Distribution))
        g.add((distribution1_1, DCT['format'], Literal('CSV')))
        g.add((dataset1, DCAT.distribution, distribution1_1))

        p = RDFParser(profiles=['euro_dcat_ap'])

        p.g = g

        datasets = [d for d in p.datasets()]

        resource = datasets[0]['resources'][0]

        eq_(resource['format'], u'CSV')