def test_uri_to_iri_valid(self):
     uri = 'https://www.etat.ge.ch/geoportail/pro/?res=map&visiblelayerindexes={"CATALOGUE":[0632]}'  # noqa
     result = helpers.uri_to_iri(uri)
     self.assertEqual(
         'https://www.etat.ge.ch/geoportail/pro/?res=map&visiblelayerindexes=%7B%22CATALOGUE%22%3A%5B0632%5D%7D',  #noqa
         result
     )
Example #2
0
    def graph_from_dataset(self, dataset_dict, dataset_ref):  # noqa

        log.debug("Create graph from dataset '%s'" % dataset_dict['name'])

        g = self.g

        for prefix, namespace in namespaces.iteritems():
            g.bind(prefix, namespace)

        g.add((dataset_ref, RDF.type, DCAT.Dataset))

        # Basic fields
        items = [
            ('identifier', DCT.identifier, ['guid', 'id'], Literal),
            ('version', OWL.versionInfo, ['dcat_version'], Literal),
            ('version_notes', ADMS.versionNotes, None, Literal),
            ('frequency', DCT.accrualPeriodicity, None, Literal),
            ('access_rights', DCT.accessRights, None, Literal),
            ('dcat_type', DCT.type, None, Literal),
            ('provenance', DCT.provenance, None, Literal),
            ('spatial', DCT.spatial, None, Literal),
        ]
        self._add_triples_from_dict(dataset_dict, dataset_ref, items)

        self._add_multilang_value(dataset_ref, DCT.description, 'description',
                                  dataset_dict)
        self._add_multilang_value(dataset_ref, DCT.title, 'title',
                                  dataset_dict)

        # LandingPage
        try:
            landing_page = uri_to_iri(dataset_dict['url'])
        except ValueError:
            landing_page = ''

        g.add((dataset_ref, DCAT.landingPage, Literal(landing_page)))

        # Keywords
        self._add_multilang_value(dataset_ref, DCAT.keyword, 'keywords',
                                  dataset_dict)

        # Dates
        items = [
            ('issued', DCT.issued, ['metadata_created'], Literal),
            ('modified', DCT.modified, ['metadata_modified'], Literal),
        ]
        self._add_date_triples_from_dict(dataset_dict, dataset_ref, items)

        # Update Interval
        accrual_periodicity = dataset_dict.get('accrual_periodicity')
        if accrual_periodicity:
            g.add((dataset_ref, DCT.accrualPeriodicity,
                   URIRef(accrual_periodicity)))

        # Lists
        items = [
            ('language', DCT.language, None, Literal),
            ('theme', DCAT.theme, None, URIRef),
            ('conforms_to', DCT.conformsTo, None, Literal),
            ('alternate_identifier', ADMS.identifier, None, Literal),
            ('documentation', FOAF.page, None, Literal),
            ('has_version', DCT.hasVersion, None, Literal),
            ('is_version_of', DCT.isVersionOf, None, Literal),
            ('source', DCT.source, None, Literal),
            ('sample', ADMS.sample, None, Literal),
        ]
        self._add_list_triples_from_dict(dataset_dict, dataset_ref, items)

        # Relations
        if dataset_dict.get('relations'):
            relations = dataset_dict.get('relations')
            for relation in relations:
                relation_name = relation['label']
                try:
                    relation_url = uri_to_iri(relation['url'])
                except ValueError:
                    # skip this relation if the URL is invalid
                    continue

                relation = URIRef(relation_url)
                g.add((relation, RDFS.label, Literal(relation_name)))
                g.add((dataset_ref, DCT.relation, relation))

        # References
        if dataset_dict.get('see_alsos'):
            references = dataset_dict.get('see_alsos')
            for reference in references:
                # we only excpect dicts here
                if not isinstance(reference, dict):
                    continue
                reference_identifier = reference.get('dataset_identifier')
                if reference_identifier:
                    g.add((dataset_ref, RDFS.seeAlso,
                           Literal(reference_identifier)))

        # Contact details
        if dataset_dict.get('contact_points'):
            contact_points = self._get_dataset_value(dataset_dict,
                                                     'contact_points')  # noqa
            for contact_point in contact_points:
                contact_details = BNode()
                contact_point_email = contact_point['email']
                contact_point_name = contact_point['name']

                g.add((contact_details, RDF.type, VCARD.Organization))
                g.add((contact_details, VCARD.hasEmail,
                       URIRef(contact_point_email)))  # noqa
                g.add((contact_details, VCARD.fn, Literal(contact_point_name)))

                g.add((dataset_ref, DCAT.contactPoint, contact_details))

        # Publisher
        if dataset_dict.get('publishers'):
            publishers = dataset_dict.get('publishers')
            for publisher in publishers:
                publisher_name = publisher['label']

                publisher_details = BNode()
                g.add((publisher_details, RDF.type, RDF.Description))
                g.add((publisher_details, RDFS.label, Literal(publisher_name)))
                g.add((dataset_ref, DCT.publisher, publisher_details))

        # Temporals
        temporals = dataset_dict.get('temporals')
        if temporals:
            for temporal in temporals:
                start = temporal['start_date']
                end = temporal['end_date']
                if start or end:
                    temporal_extent = BNode()
                    g.add((temporal_extent, RDF.type, DCT.PeriodOfTime))
                    if start:
                        self._add_date_triple(temporal_extent,
                                              SCHEMA.startDate, start)
                    if end:
                        self._add_date_triple(temporal_extent, SCHEMA.endDate,
                                              end)
                    g.add((dataset_ref, DCT.temporal, temporal_extent))

        # Themes
        groups = self._get_dataset_value(dataset_dict, 'groups')
        for group_name in groups:
            g.add((dataset_ref, DCAT.theme,
                   URIRef(ogd_theme_base_url + group_name.get('name'))))

        # Resources
        for resource_dict in dataset_dict.get('resources', []):

            distribution = URIRef(resource_uri(resource_dict))

            g.add((dataset_ref, DCAT.distribution, distribution))
            g.add((distribution, RDF.type, DCAT.Distribution))

            #  Simple values
            items = [
                ('status', ADMS.status, None, Literal),
                ('rights', DCT.rights, None, Literal),
                ('license', DCT.license, None, Literal),
                ('identifier', DCT.identifier, None, Literal),
                ('media_type', DCAT.mediaType, None, Literal),
                ('spatial', DCT.spatial, None, Literal),
            ]

            self._add_triples_from_dict(resource_dict, distribution, items)

            self._add_multilang_value(distribution, DCT.title, 'display_name',
                                      resource_dict)  # noqa
            self._add_multilang_value(distribution, DCT.description,
                                      'description', resource_dict)  # noqa

            #  Lists
            items = [
                ('documentation', FOAF.page, None, Literal),
                ('language', DCT.language, None, Literal),
                ('conforms_to', DCT.conformsTo, None, Literal),
            ]
            self._add_list_triples_from_dict(resource_dict, distribution,
                                             items)

            # Download URL & Access URL
            download_url = resource_dict.get('download_url')
            if download_url:
                try:
                    download_url = uri_to_iri(download_url)
                    g.add(
                        (distribution, DCAT.downloadURL, URIRef(download_url)))
                except ValueError:
                    # only add valid URL
                    pass

            url = resource_dict.get('url')
            if (url and not download_url) or (url and url != download_url):
                try:
                    url = uri_to_iri(url)
                    g.add((distribution, DCAT.accessURL, URIRef(url)))
                except ValueError:
                    # only add valid URL
                    pass
            elif download_url:
                g.add((distribution, DCAT.accessURL, URIRef(download_url)))

            # Format
            if resource_dict.get('format'):
                g.add((distribution, DCT['format'],
                       Literal(resource_dict['format'])))

            # Mime-Type
            if resource_dict.get('mimetype'):
                g.add((distribution, DCAT.mediaType,
                       Literal(resource_dict['mimetype'])))

            # Dates
            items = [
                ('issued', DCT.issued, None, Literal),
                ('modified', DCT.modified, None, Literal),
            ]

            self._add_date_triples_from_dict(resource_dict, distribution,
                                             items)

            # ByteSize
            if resource_dict.get('byte_size'):
                g.add((distribution, DCAT.byteSize,
                       Literal(resource_dict['byte_size'])))
 def test_uri_to_iri_invalid(self):
     # 'test' is not a valid URI-like string
     uri = 'test'  # noqa
     with self.assertRaises(Exception):
         helpers.uri_to_iri(uri)