Ejemplo n.º 1
0
def _interlink(g, schema, source_database, target_database, source_graph,
               target_graph):
    ns_tbox_source = default_namespace_of(source_graph)[0]
    ns_tbox_target = default_namespace_of(target_graph)[0]

    # update namespaces
    _update_namespaces(g.namespace_manager,\
                       ('rws.schema.' + source_database.lower(), ns_tbox_source+"/"),\
                       ('rws.schema.' + target_database.lower(), ns_tbox_target+"/"))

    for relation in schema.from_database(source_database):
        source_def = relation['source']
        target_def = relation['target']

        if target_def['database'] != target_database:
            continue

        # define relation
        source_class = _classname_from_def(source_database, source_def)
        target_class = _classname_from_def(target_database, target_def)
        rel = _property_from_def(source_class, target_class,
                                 target_def['property'])

        # forward link
        add_type(g, rel, URIRef(OWL + "ObjectProperty"))
        add_domain(g, rel, URIRef(ns_tbox_source + "/" + source_class))
        add_range(g, rel, URIRef(ns_tbox_target + "/" + target_class))
        add_label(
            g, rel, "Relatie tussen {} and {}".format(source_class,
                                                      target_class))
Ejemplo n.º 2
0
def _enrich(g, database, schema, graph):
    # update namespaces
    ns_otl = Namespace("http://otl.rws.nl/otl#")
    ns_abox = default_namespace_of(graph)[0]
    ns_tbox = _generate_tbox_namespace(graph)
    _update_namespaces(g.namespace_manager,\
                       ('rws.' + database.lower(), ns_abox),
                       ('otl', ns_otl))

    for tablename in schema.from_database(database):
        source_class_name = _classname_from_def(database, tablename)

        # build in-memory map
        otl_map = _generate_otl_map(database, tablename, schema)

        source_class = URIRef(ns_tbox + source_class_name)
        for reference in otl_map.keys():
            source_property = URIRef(ns_tbox + source_class_name.lower() +
                                     '_' + reference)
            referenced_class = _classname_from_def(
                database, otl_map[reference]['referenced_table'])
            referenced_property = URIRef(ns_tbox + referenced_class.lower() +
                                         '_id')

            q = _generate_query(source_class, source_property,
                                referenced_property)
            for source_instance, target_id in graph.query(q):
                if target_id.toPython() in otl_map[reference].keys():
                    add_type(
                        g, source_instance,
                        URIRef(ns_otl +
                               otl_map[reference][target_id.toPython()]))
Ejemplo n.º 3
0
def _enrich(g, database, schema, graph):
    ns_tbox = _generate_tbox_namespace(graph)

    # update namespaces
    ns_abox = default_namespace_of(graph)[0]
    _update_namespaces(g.namespace_manager,\
                       ('rws.' + database.lower(), ns_abox),
                       ('geo', Namespace("http://www.opengis.net/ont/geosparql#")))

    for entry in schema.from_database(database):
        source_def = entry['source']
        target_def = entry['target']

        source_class = _classname_from_def(database, source_def[0])

        attributes = [definition['property'] for definition in source_def]
        properties = [ property_from_mapping(ns_tbox, source_class, attr)\
                      for attr in attributes ]

        # select target references
        q = _generate_query(URIRef(ns_tbox + source_class), attributes,
                            properties)

        for binding in graph.query(q).bindings:
            source_uri = binding[Variable('source_id')]
            if source_uri is None or source_uri == "":
                continue

            values = { '['+attr+']': binding[Variable(attr)].toPython() for attr in attributes\
                      if Variable(attr) in binding.keys() }

            g += _generate_branch(schema, ns_abox, target_def, source_uri,
                                  values)
Ejemplo n.º 4
0
def _generate_tbox_namespace(graph):
    ns_match = match('(?P<base>.*/linked_data/)(?P<database>[a-z]*/)',
                     default_namespace_of(graph)[0])
    if ns_match is None or ns_match.group('database') == "" or ns_match.group(
            'base') == "":
        raise Exception("Unable to determine namespace")

    return Namespace(
        ns_match.group('base') + "schema/" + ns_match.group('database'))
Ejemplo n.º 5
0
def _interlink(g, schema, source_database, target_database, source_graph,
               target_graph):
    ns_tbox_source = _generate_tbox_namespace(source_graph)
    ns_tbox_target = _generate_tbox_namespace(target_graph)

    # update namespaces
    ns_abox_source = default_namespace_of(source_graph)[0]
    ns_abox_target = default_namespace_of(target_graph)[0]
    _update_namespaces(g.namespace_manager,\
                       ('rws.' + source_database.lower(), ns_abox_source),\
                       ('rws.' + target_database.lower(), ns_abox_target))

    for relation in schema.from_database(source_database):
        source_def = relation['source']
        target_def = relation['target']

        if target_def['database'] != target_database:
            continue

        # define relation
        source_class = _classname_from_def(source_database, source_def)
        target_class = _classname_from_def(target_database, target_def)
        rel = _property_from_def(source_class, target_class,
                                 target_def['property'])

        # select target references
        q = _generate_query(
            URIRef(ns_tbox_source + source_class),
            property_from_mapping(ns_tbox_source, source_class,
                                  source_def['property']))

        for source_id, target_id in source_graph.query(q):
            if target_id is None or target_id == "":
                continue

            for match_id in target_graph.subjects(predicate=property_from_mapping(ns_tbox_target,\
                                                                               target_class,\
                                                                               target_def['property']),
                                               object=target_id):
                add_property(g, source_id, match_id, rel)
Ejemplo n.º 6
0
def import_graphs(source, target, schema):
    for filename in [source, target]:
        if not is_readable(filename):
            raise Exception("File missing or wrong permissions: {}".format(filename))

    source_graph = read(source)
    target_graph = read(target)

    source_namespace, source_type = default_namespace_of(source_graph)
    target_namespace, target_type = default_namespace_of(target_graph)

    linker = _determine_type(source_type, target_type)
    databases = _determine_databases(source_namespace, target_namespace)

    for _,v in databases.items():
        if v not in schema:
            raise Exception("Database not contained in mapping: {}".format(v))

    return { 'source_graph': source_graph,
             'target_graph': target_graph,
             'source_name': databases['source'],
             'target_name': databases['target'],
             'linker': linker }
Ejemplo n.º 7
0
def import_graph(filename, schema):
    if not is_readable(filename):
        raise Exception(
            "File missing or wrong permissions: {}".format(filename))

    graph = read(filename)
    namespace, gtype = default_namespace_of(graph)
    database = _determine_database(namespace)

    if database not in schema.schema.keys():
        raise Exception(
            "Database not contained in mapping: {}".format(database))

    return {'graph': graph, 'type': gtype, 'name': database}
Ejemplo n.º 8
0
def run(args, timestamp):
    # validate input paths
    if len(args.graphs) < 2:
        raise Exception("Requires at least 2 input graphs")
    _check_paths(args.graphs)

    # validate output path
    output_path = args.output
    if output_path is None:
        output_path = "./merge_{}".format(timestamp)
    if not is_writable(output_path):
        return

    print("Merging graphs...")
    pi = ProgressIndicator()
    pi.start()
    graph = multiread(args.graphs)
    update_metadata(graph, default_namespace_of(graph)[0], timestamp)
    pi.stop()

    # write graph
    print("Writing graph to disk...")
    write(graph, output_path, args.serialization_format)