def test_uri_to_iri_valid(self): uri = 'https://www.etat.ge.ch/geoportail/pro/?res=map&visiblelayerindexes={"CATALOGUE":[0632]}' # noqa result = helpers.uri_to_iri(uri) self.assertEqual( 'https://www.etat.ge.ch/geoportail/pro/?res=map&visiblelayerindexes=%7B%22CATALOGUE%22%3A%5B0632%5D%7D', #noqa result )
def graph_from_dataset(self, dataset_dict, dataset_ref): # noqa log.debug("Create graph from dataset '%s'" % dataset_dict['name']) g = self.g for prefix, namespace in namespaces.iteritems(): g.bind(prefix, namespace) g.add((dataset_ref, RDF.type, DCAT.Dataset)) # Basic fields items = [ ('identifier', DCT.identifier, ['guid', 'id'], Literal), ('version', OWL.versionInfo, ['dcat_version'], Literal), ('version_notes', ADMS.versionNotes, None, Literal), ('frequency', DCT.accrualPeriodicity, None, Literal), ('access_rights', DCT.accessRights, None, Literal), ('dcat_type', DCT.type, None, Literal), ('provenance', DCT.provenance, None, Literal), ('spatial', DCT.spatial, None, Literal), ] self._add_triples_from_dict(dataset_dict, dataset_ref, items) self._add_multilang_value(dataset_ref, DCT.description, 'description', dataset_dict) self._add_multilang_value(dataset_ref, DCT.title, 'title', dataset_dict) # LandingPage try: landing_page = uri_to_iri(dataset_dict['url']) except ValueError: landing_page = '' g.add((dataset_ref, DCAT.landingPage, Literal(landing_page))) # Keywords self._add_multilang_value(dataset_ref, DCAT.keyword, 'keywords', dataset_dict) # Dates items = [ ('issued', DCT.issued, ['metadata_created'], Literal), ('modified', DCT.modified, ['metadata_modified'], Literal), ] self._add_date_triples_from_dict(dataset_dict, dataset_ref, items) # Update Interval accrual_periodicity = dataset_dict.get('accrual_periodicity') if accrual_periodicity: g.add((dataset_ref, DCT.accrualPeriodicity, URIRef(accrual_periodicity))) # Lists items = [ ('language', DCT.language, None, Literal), ('theme', DCAT.theme, None, URIRef), ('conforms_to', DCT.conformsTo, None, Literal), ('alternate_identifier', ADMS.identifier, None, Literal), ('documentation', FOAF.page, None, Literal), ('has_version', DCT.hasVersion, None, Literal), ('is_version_of', DCT.isVersionOf, None, Literal), ('source', DCT.source, None, Literal), ('sample', ADMS.sample, None, Literal), ] self._add_list_triples_from_dict(dataset_dict, dataset_ref, items) # Relations if dataset_dict.get('relations'): relations = dataset_dict.get('relations') for relation in relations: relation_name = relation['label'] try: relation_url = uri_to_iri(relation['url']) except ValueError: # skip this relation if the URL is invalid continue relation = URIRef(relation_url) g.add((relation, RDFS.label, Literal(relation_name))) g.add((dataset_ref, DCT.relation, relation)) # References if dataset_dict.get('see_alsos'): references = dataset_dict.get('see_alsos') for reference in references: # we only excpect dicts here if not isinstance(reference, dict): continue reference_identifier = reference.get('dataset_identifier') if reference_identifier: g.add((dataset_ref, RDFS.seeAlso, Literal(reference_identifier))) # Contact details if dataset_dict.get('contact_points'): contact_points = self._get_dataset_value(dataset_dict, 'contact_points') # noqa for contact_point in contact_points: contact_details = BNode() contact_point_email = contact_point['email'] contact_point_name = contact_point['name'] g.add((contact_details, RDF.type, VCARD.Organization)) g.add((contact_details, VCARD.hasEmail, URIRef(contact_point_email))) # noqa g.add((contact_details, VCARD.fn, Literal(contact_point_name))) g.add((dataset_ref, DCAT.contactPoint, contact_details)) # Publisher if dataset_dict.get('publishers'): publishers = dataset_dict.get('publishers') for publisher in publishers: publisher_name = publisher['label'] publisher_details = BNode() g.add((publisher_details, RDF.type, RDF.Description)) g.add((publisher_details, RDFS.label, Literal(publisher_name))) g.add((dataset_ref, DCT.publisher, publisher_details)) # Temporals temporals = dataset_dict.get('temporals') if temporals: for temporal in temporals: start = temporal['start_date'] end = temporal['end_date'] if start or end: temporal_extent = BNode() g.add((temporal_extent, RDF.type, DCT.PeriodOfTime)) if start: self._add_date_triple(temporal_extent, SCHEMA.startDate, start) if end: self._add_date_triple(temporal_extent, SCHEMA.endDate, end) g.add((dataset_ref, DCT.temporal, temporal_extent)) # Themes groups = self._get_dataset_value(dataset_dict, 'groups') for group_name in groups: g.add((dataset_ref, DCAT.theme, URIRef(ogd_theme_base_url + group_name.get('name')))) # Resources for resource_dict in dataset_dict.get('resources', []): distribution = URIRef(resource_uri(resource_dict)) g.add((dataset_ref, DCAT.distribution, distribution)) g.add((distribution, RDF.type, DCAT.Distribution)) # Simple values items = [ ('status', ADMS.status, None, Literal), ('rights', DCT.rights, None, Literal), ('license', DCT.license, None, Literal), ('identifier', DCT.identifier, None, Literal), ('media_type', DCAT.mediaType, None, Literal), ('spatial', DCT.spatial, None, Literal), ] self._add_triples_from_dict(resource_dict, distribution, items) self._add_multilang_value(distribution, DCT.title, 'display_name', resource_dict) # noqa self._add_multilang_value(distribution, DCT.description, 'description', resource_dict) # noqa # Lists items = [ ('documentation', FOAF.page, None, Literal), ('language', DCT.language, None, Literal), ('conforms_to', DCT.conformsTo, None, Literal), ] self._add_list_triples_from_dict(resource_dict, distribution, items) # Download URL & Access URL download_url = resource_dict.get('download_url') if download_url: try: download_url = uri_to_iri(download_url) g.add( (distribution, DCAT.downloadURL, URIRef(download_url))) except ValueError: # only add valid URL pass url = resource_dict.get('url') if (url and not download_url) or (url and url != download_url): try: url = uri_to_iri(url) g.add((distribution, DCAT.accessURL, URIRef(url))) except ValueError: # only add valid URL pass elif download_url: g.add((distribution, DCAT.accessURL, URIRef(download_url))) # Format if resource_dict.get('format'): g.add((distribution, DCT['format'], Literal(resource_dict['format']))) # Mime-Type if resource_dict.get('mimetype'): g.add((distribution, DCAT.mediaType, Literal(resource_dict['mimetype']))) # Dates items = [ ('issued', DCT.issued, None, Literal), ('modified', DCT.modified, None, Literal), ] self._add_date_triples_from_dict(resource_dict, distribution, items) # ByteSize if resource_dict.get('byte_size'): g.add((distribution, DCAT.byteSize, Literal(resource_dict['byte_size'])))
def test_uri_to_iri_invalid(self): # 'test' is not a valid URI-like string uri = 'test' # noqa with self.assertRaises(Exception): helpers.uri_to_iri(uri)