def _interlink(g, schema, source_database, target_database, source_graph, target_graph): ns_tbox_source = default_namespace_of(source_graph)[0] ns_tbox_target = default_namespace_of(target_graph)[0] # update namespaces _update_namespaces(g.namespace_manager,\ ('rws.schema.' + source_database.lower(), ns_tbox_source+"/"),\ ('rws.schema.' + target_database.lower(), ns_tbox_target+"/")) for relation in schema.from_database(source_database): source_def = relation['source'] target_def = relation['target'] if target_def['database'] != target_database: continue # define relation source_class = _classname_from_def(source_database, source_def) target_class = _classname_from_def(target_database, target_def) rel = _property_from_def(source_class, target_class, target_def['property']) # forward link add_type(g, rel, URIRef(OWL + "ObjectProperty")) add_domain(g, rel, URIRef(ns_tbox_source + "/" + source_class)) add_range(g, rel, URIRef(ns_tbox_target + "/" + target_class)) add_label( g, rel, "Relatie tussen {} and {}".format(source_class, target_class))
def _enrich(g, database, schema, graph): # update namespaces ns_otl = Namespace("http://otl.rws.nl/otl#") ns_abox = default_namespace_of(graph)[0] ns_tbox = _generate_tbox_namespace(graph) _update_namespaces(g.namespace_manager,\ ('rws.' + database.lower(), ns_abox), ('otl', ns_otl)) for tablename in schema.from_database(database): source_class_name = _classname_from_def(database, tablename) # build in-memory map otl_map = _generate_otl_map(database, tablename, schema) source_class = URIRef(ns_tbox + source_class_name) for reference in otl_map.keys(): source_property = URIRef(ns_tbox + source_class_name.lower() + '_' + reference) referenced_class = _classname_from_def( database, otl_map[reference]['referenced_table']) referenced_property = URIRef(ns_tbox + referenced_class.lower() + '_id') q = _generate_query(source_class, source_property, referenced_property) for source_instance, target_id in graph.query(q): if target_id.toPython() in otl_map[reference].keys(): add_type( g, source_instance, URIRef(ns_otl + otl_map[reference][target_id.toPython()]))
def _enrich(g, database, schema, graph): ns_tbox = _generate_tbox_namespace(graph) # update namespaces ns_abox = default_namespace_of(graph)[0] _update_namespaces(g.namespace_manager,\ ('rws.' + database.lower(), ns_abox), ('geo', Namespace("http://www.opengis.net/ont/geosparql#"))) for entry in schema.from_database(database): source_def = entry['source'] target_def = entry['target'] source_class = _classname_from_def(database, source_def[0]) attributes = [definition['property'] for definition in source_def] properties = [ property_from_mapping(ns_tbox, source_class, attr)\ for attr in attributes ] # select target references q = _generate_query(URIRef(ns_tbox + source_class), attributes, properties) for binding in graph.query(q).bindings: source_uri = binding[Variable('source_id')] if source_uri is None or source_uri == "": continue values = { '['+attr+']': binding[Variable(attr)].toPython() for attr in attributes\ if Variable(attr) in binding.keys() } g += _generate_branch(schema, ns_abox, target_def, source_uri, values)
def _generate_tbox_namespace(graph): ns_match = match('(?P<base>.*/linked_data/)(?P<database>[a-z]*/)', default_namespace_of(graph)[0]) if ns_match is None or ns_match.group('database') == "" or ns_match.group( 'base') == "": raise Exception("Unable to determine namespace") return Namespace( ns_match.group('base') + "schema/" + ns_match.group('database'))
def _interlink(g, schema, source_database, target_database, source_graph, target_graph): ns_tbox_source = _generate_tbox_namespace(source_graph) ns_tbox_target = _generate_tbox_namespace(target_graph) # update namespaces ns_abox_source = default_namespace_of(source_graph)[0] ns_abox_target = default_namespace_of(target_graph)[0] _update_namespaces(g.namespace_manager,\ ('rws.' + source_database.lower(), ns_abox_source),\ ('rws.' + target_database.lower(), ns_abox_target)) for relation in schema.from_database(source_database): source_def = relation['source'] target_def = relation['target'] if target_def['database'] != target_database: continue # define relation source_class = _classname_from_def(source_database, source_def) target_class = _classname_from_def(target_database, target_def) rel = _property_from_def(source_class, target_class, target_def['property']) # select target references q = _generate_query( URIRef(ns_tbox_source + source_class), property_from_mapping(ns_tbox_source, source_class, source_def['property'])) for source_id, target_id in source_graph.query(q): if target_id is None or target_id == "": continue for match_id in target_graph.subjects(predicate=property_from_mapping(ns_tbox_target,\ target_class,\ target_def['property']), object=target_id): add_property(g, source_id, match_id, rel)
def import_graphs(source, target, schema): for filename in [source, target]: if not is_readable(filename): raise Exception("File missing or wrong permissions: {}".format(filename)) source_graph = read(source) target_graph = read(target) source_namespace, source_type = default_namespace_of(source_graph) target_namespace, target_type = default_namespace_of(target_graph) linker = _determine_type(source_type, target_type) databases = _determine_databases(source_namespace, target_namespace) for _,v in databases.items(): if v not in schema: raise Exception("Database not contained in mapping: {}".format(v)) return { 'source_graph': source_graph, 'target_graph': target_graph, 'source_name': databases['source'], 'target_name': databases['target'], 'linker': linker }
def import_graph(filename, schema): if not is_readable(filename): raise Exception( "File missing or wrong permissions: {}".format(filename)) graph = read(filename) namespace, gtype = default_namespace_of(graph) database = _determine_database(namespace) if database not in schema.schema.keys(): raise Exception( "Database not contained in mapping: {}".format(database)) return {'graph': graph, 'type': gtype, 'name': database}
def run(args, timestamp): # validate input paths if len(args.graphs) < 2: raise Exception("Requires at least 2 input graphs") _check_paths(args.graphs) # validate output path output_path = args.output if output_path is None: output_path = "./merge_{}".format(timestamp) if not is_writable(output_path): return print("Merging graphs...") pi = ProgressIndicator() pi.start() graph = multiread(args.graphs) update_metadata(graph, default_namespace_of(graph)[0], timestamp) pi.stop() # write graph print("Writing graph to disk...") write(graph, output_path, args.serialization_format)