def _layer_to_graph(g, gdb, layer_name, area, mapper): ns = dict(g.namespace_manager.namespaces()) logger.info("Processing layer {}".format(layer_name)) mapping = mapper.schema['schema'][layer_name] # define class class_node = _layer_to_class(g, mapping) # translate features attributes = list(mapper.attributes(layer_name)) for feat in gdb.features_of(layer_name): if mapping['identifier'] is None: continue fid = feat.GetFID() if fid is None or fid <= 0: continue # continue if out of scope if not gdb.within(gdb.geometry_of(feat), area): continue # geometry geom_wkt, gtype = gdb.geometryWKT_of(feat) if geom_wkt is None: continue # node for this feature feat_node = URIRef(ns[DEFAULT_PREFIX] + gen_hash(layer_name+mapping['classname'], fid)) add_type(g, feat_node, class_node) add_label(g, feat_node, "{} {} ({})".format(mapping['classname'], fid, gtype)) for k,v in feat.items().items(): if v is None or v in EXCL_VALUES: continue if k in attributes: # create node attr = mapping['attributes'][k] try: if type(v) is str: v = v.strip() attr_node = Literal(v, datatype=URIRef(attr['datatype'])) except UnicodeDecodeError: if type(v) is bytes: v = v.decode('utf-8', 'ignore') attr_node = Literal(v, datatype=URIRef(attr['datatype'])) # link to node attr_link = URIRef(ns[DEFAULT_SCHEMA_PREFIX] + "{}_{}".format(mapping['classname'].lower(), attr['property'])) add_property(g, feat_node, attr_node, attr_link) # add geometry node geom_node = URIRef(ns[DEFAULT_PREFIX] + gen_hash(geom_wkt, gtype)) add_type(g, geom_node, URIRef(ns['sf'] + _geo_type(gtype))) add_label(g, geom_node, "{} Geometry".format(_geo_type(gtype))) add_property(g, feat_node, geom_node, URIRef(ns['geo'] + 'hasGeometry')) # include WKT geom_wkt_node = Literal(geom_wkt, datatype=URIRef(ns['geo'] + 'wktLiteral')) add_property(g, geom_node, geom_wkt_node, URIRef(ns['geo'] + 'asWKT'))
def _interlink(g, schema, source_database, target_database, source_graph, target_graph): ns_tbox_source = _generate_tbox_namespace(source_graph) ns_tbox_target = _generate_tbox_namespace(target_graph) # update namespaces ns_abox_source = default_namespace_of(source_graph)[0] ns_abox_target = default_namespace_of(target_graph)[0] _update_namespaces(g.namespace_manager,\ ('rws.' + source_database.lower(), ns_abox_source),\ ('rws.' + target_database.lower(), ns_abox_target)) for relation in schema.from_database(source_database): source_def = relation['source'] target_def = relation['target'] if target_def['database'] != target_database: continue # define relation source_class = _classname_from_def(source_database, source_def) target_class = _classname_from_def(target_database, target_def) rel = _property_from_def(source_class, target_class, target_def['property']) # select target references q = _generate_query( URIRef(ns_tbox_source + source_class), property_from_mapping(ns_tbox_source, source_class, source_def['property'])) for source_id, target_id in source_graph.query(q): if target_id is None or target_id == "": continue for match_id in target_graph.subjects(predicate=property_from_mapping(ns_tbox_target,\ target_class,\ target_def['property']), object=target_id): add_property(g, source_id, match_id, rel)
def update_metadata(g, base_namespace, timestamp): logger.info("Updating meta-data") # update namespaces _update_namespaces(g.namespace_manager) ns = dict(g.namespace_manager.namespaces()) base = base_namespace # remove old data g.remove((None, URIRef(ns['dcterms'] + 'modified'), None)) g.remove((None, URIRef(ns['void'] + 'triples'), None)) # modified modified = Literal(datetime.fromtimestamp(timestamp).isoformat(), datatype=URIRef(ns['xsd'] + 'dateTime')) add_property(g, base, modified, URIRef(ns['dcterms'] + 'modified')) # number of triples ntriples = Literal(len(g) + 1, datatype=URIRef(ns['xsd'] + 'nonNegativeInteger')) add_property(g, base, ntriples, URIRef(ns['void'] + 'triples'))
def _generate_branch(schema, ns_abox, tail, parent, values): g = Graph() if type(tail['tail']) is str: if tail['type'] == "URIRef": class_node = URIRef(tail['head']) node = URIRef(ns_abox + gen_hash(tail['head'], parent.toPython())) add_type(g, node, class_node) add_property(g, parent, node, URIRef(tail['property'])) else: node = Literal(schema.compile_value(tail, values), datatype=URIRef(tail['head'])) add_property(g, parent, node, URIRef(tail['property'])) else: class_node = URIRef(tail['head']) node = URIRef(ns_abox + gen_hash(tail['head'], parent.toPython())) add_type(g, node, class_node) add_property(g, parent, node, URIRef(tail['property'])) g += _generate_branch(schema, ns_abox, tail['tail'], node, values) return g
def _table_to_graph(g, server, references, table, mapper): ns = dict(g.namespace_manager.namespaces()) logger.info("Processing table {}".format(table)) mapping = mapper.schema['schema'][table] # define class class_node = _table_to_class(g, mapping) # translate records referenced_nodes = list(references.references(table=table)) attributes = list(mapper.attributes(table)) relations = list(mapper.relations(table)) records = server.records(table) for rec in records: if mapping['identifier'] is None: continue if rec[mapping['identifier']] not in referenced_nodes: continue # node for this record rec_node = URIRef( ns[DEFAULT_PREFIX] + gen_hash(mapping['classname'], rec[mapping['identifier']])) add_type(g, rec_node, class_node) add_label( g, rec_node, "{} {}".format(mapping['classname'], rec[mapping['identifier']])) for k, v in rec.items(): if v is None: continue if k in attributes: # create node attr = mapping['attributes'][k] try: if type(v) is str: v = v.strip() attr_node = Literal(v, datatype=URIRef(attr['datatype'])) except UnicodeDecodeError: if type(v) is bytes: v = v.decode('utf-8', 'ignore') attr_node = Literal(v, datatype=URIRef(attr['datatype'])) # link to node attr_link = URIRef(ns[DEFAULT_SCHEMA_PREFIX] + "{}_{}".format( mapping['classname'].lower(), attr['property'])) add_property(g, rec_node, attr_node, attr_link) if k in relations: rel = mapping['relations'][k] # create node referenced_node = URIRef(ns[DEFAULT_PREFIX] + gen_hash(rel['targetclassname'], v)) # link to node rel_link = URIRef(ns[DEFAULT_SCHEMA_PREFIX] + "{}_{}".format( mapping['classname'].lower(), rel['property'])) add_property(g, rec_node, referenced_node, rel_link) # add back link inverse_rel_link = URIRef( ns[DEFAULT_SCHEMA_PREFIX] + "{}_inv_{}".format(rel['targetclassname'].lower(), mapping['classname'].lower())) add_property(g, referenced_node, rec_node, inverse_rel_link) # store referenced nodes for further processing references.add_reference(rel['targettable'], v)
def add_metadata(g, base_namespace, timestamp, database, is_ontology=False): logger.info("Adding meta-data") # update namespaces _update_namespaces(g.namespace_manager) ns = dict(g.namespace_manager.namespaces()) base = URIRef(ns[base_namespace]) # type if is_ontology: add_type(g, base, URIRef(ns['owl'] + 'Ontology')) descriptiontype = "ontology" else: add_type(g, base, URIRef(ns['void'] + 'Dataset')) descriptiontype = "dataset" # modified modified = Literal(datetime.fromtimestamp(timestamp).isoformat(), datatype=URIRef(ns['xsd'] + 'dateTime')) add_property(g, base, modified, URIRef(ns['dcterms'] + 'modified')) # creator/published creator = URIRef(ns['dbpr'] + 'Vrije_Universiteit_Amsterdam') add_property(g, base, creator, URIRef(ns['dcterms'] + 'creator')) add_property(g, base, creator, URIRef(ns['dcterms'] + 'publisher')) # rights holder rightsholder = URIRef(ns['dbpr'] + 'Rijkswaterstaat') add_property(g, base, rightsholder, URIRef(ns['dcterms'] + 'rightsHolder')) # language language = URIRef("http://id.loc.gov/vocabulary/iso639-1/nl") add_property(g, base, language, URIRef(ns['dcterms'] + 'language')) # title title_nl = Literal("Rijkswaterstaat Linked Data Pilot Project - {}".format( database.upper()), lang="nl") title_en = Literal("Rijkswaterstaat Linked Data Pilot Project - {}".format( database.upper()), lang="en") add_property(g, base, title_nl, URIRef(ns['dcterms'] + 'title')) add_property(g, base, title_en, URIRef(ns['dcterms'] + 'title')) # subject asset_management = URIRef(ns['dbpr'] + 'Asset_management') add_property(g, base, asset_management, URIRef(ns['dcterms'] + 'subject')) # description description_nl = Literal("Een {} voor experimentele doeleinden ten ".format(descriptiontype)\ + "behoeve van Rijkswaterstaats Linked Data "\ + "pilot project.", lang="nl") description_en = Literal("An experimental {} for purposes of ".format(descriptiontype)\ + "Rijkswaterstaat's Linked Data pilot "\ + "project.", lang="en") add_property(g, base, description_nl, URIRef(ns['dcterms'] + 'description')) add_property(g, base, description_en, URIRef(ns['dcterms'] + 'description')) # number of triples ntriples = Literal(len(g) + 1, datatype=URIRef(ns['xsd'] + 'nonNegativeInteger')) add_property(g, base, ntriples, URIRef(ns['void'] + 'triples'))