Ejemplo n.º 1
0
def _enrich(g, database, schema, graph):
    # update namespaces
    ns_otl = Namespace("http://otl.rws.nl/otl#")
    ns_abox = default_namespace_of(graph)[0]
    ns_tbox = _generate_tbox_namespace(graph)
    _update_namespaces(g.namespace_manager,\
                       ('rws.' + database.lower(), ns_abox),
                       ('otl', ns_otl))

    for tablename in schema.from_database(database):
        source_class_name = _classname_from_def(database, tablename)

        # build in-memory map
        otl_map = _generate_otl_map(database, tablename, schema)

        source_class = URIRef(ns_tbox + source_class_name)
        for reference in otl_map.keys():
            source_property = URIRef(ns_tbox + source_class_name.lower() +
                                     '_' + reference)
            referenced_class = _classname_from_def(
                database, otl_map[reference]['referenced_table'])
            referenced_property = URIRef(ns_tbox + referenced_class.lower() +
                                         '_id')

            q = _generate_query(source_class, source_property,
                                referenced_property)
            for source_instance, target_id in graph.query(q):
                if target_id.toPython() in otl_map[reference].keys():
                    add_type(
                        g, source_instance,
                        URIRef(ns_otl +
                               otl_map[reference][target_id.toPython()]))
Ejemplo n.º 2
0
def _interlink(g, schema, source_database, target_database, source_graph,
               target_graph):
    ns_tbox_source = default_namespace_of(source_graph)[0]
    ns_tbox_target = default_namespace_of(target_graph)[0]

    # update namespaces
    _update_namespaces(g.namespace_manager,\
                       ('rws.schema.' + source_database.lower(), ns_tbox_source+"/"),\
                       ('rws.schema.' + target_database.lower(), ns_tbox_target+"/"))

    for relation in schema.from_database(source_database):
        source_def = relation['source']
        target_def = relation['target']

        if target_def['database'] != target_database:
            continue

        # define relation
        source_class = _classname_from_def(source_database, source_def)
        target_class = _classname_from_def(target_database, target_def)
        rel = _property_from_def(source_class, target_class,
                                 target_def['property'])

        # forward link
        add_type(g, rel, URIRef(OWL + "ObjectProperty"))
        add_domain(g, rel, URIRef(ns_tbox_source + "/" + source_class))
        add_range(g, rel, URIRef(ns_tbox_target + "/" + target_class))
        add_label(
            g, rel, "Relatie tussen {} and {}".format(source_class,
                                                      target_class))
Ejemplo n.º 3
0
def _relation_to_graph(g, classname, class_node, mapping):
    ns = dict(g.namespace_manager.namespaces())

    # forward link
    rel_node = property_from_mapping(ns[DEFAULT_PREFIX], classname,
                                     mapping['property'])
    add_type(g, rel_node, URIRef(ns['owl'] + 'ObjectProperty'))
    add_domain(g, rel_node, class_node)
    add_range(g, rel_node,
              URIRef(ns[DEFAULT_PREFIX] + mapping['targetclassname']))
    add_label(g, rel_node, mapping['property'].title())
    add_comment(
        g, rel_node, "Relatie '{}' tussen classen '{}' en '{}'".format(
            mapping['property'], classname, mapping['targetclassname']))

    # make link a subProperty is exists
    if mapping['subPropertyOf'] is not None:
        add_subPropertyOf(g, rel_node, URIRef(mapping['subPropertyOf']))

    # backwards link
    inverse_rel_node = URIRef(ns[DEFAULT_PREFIX] + "{}_inv_{}".format(
        mapping['targetclassname'].lower(), classname.lower()))
    add_type(g, inverse_rel_node, URIRef(ns['owl'] + 'ObjectProperty'))
    add_domain(g, inverse_rel_node,
               URIRef(ns[DEFAULT_PREFIX] + mapping['targetclassname']))
    add_range(g, inverse_rel_node, class_node)
    add_label(g, inverse_rel_node, classname.title() + " (inverse)")
    add_comment(
        g, inverse_rel_node,
        "Relatie '{}' (inverse) tussen classen '{}' en '{}'".format(
            classname, mapping['targetclassname'], classname))
Ejemplo n.º 4
0
def _layer_to_graph(g, gdb, layer_name, area, mapper):
    ns = dict(g.namespace_manager.namespaces())
    logger.info("Processing layer {}".format(layer_name))
    mapping = mapper.schema['schema'][layer_name]

    # define class
    class_node = _layer_to_class(g, mapping)

    # translate features
    attributes = list(mapper.attributes(layer_name))

    for feat in gdb.features_of(layer_name):
        if mapping['identifier'] is None:
            continue
        fid = feat.GetFID()
        if fid is None or fid <= 0:
            continue

        # continue if out of scope
        if not gdb.within(gdb.geometry_of(feat), area):
            continue

        # geometry
        geom_wkt, gtype = gdb.geometryWKT_of(feat)
        if geom_wkt is None:
            continue

        # node for this feature
        feat_node = URIRef(ns[DEFAULT_PREFIX] + gen_hash(layer_name+mapping['classname'], fid))
        add_type(g, feat_node, class_node)
        add_label(g, feat_node, "{} {} ({})".format(mapping['classname'], fid, gtype))

        for k,v in feat.items().items():
            if v is None or v in EXCL_VALUES:
                continue
            if k in attributes:
                # create node
                attr = mapping['attributes'][k]
                try:
                    if type(v) is str:
                        v = v.strip()
                    attr_node = Literal(v, datatype=URIRef(attr['datatype']))
                except UnicodeDecodeError:
                    if type(v) is bytes:
                        v = v.decode('utf-8', 'ignore')
                        attr_node = Literal(v, datatype=URIRef(attr['datatype']))

                # link to node
                attr_link = URIRef(ns[DEFAULT_SCHEMA_PREFIX] + "{}_{}".format(mapping['classname'].lower(), attr['property']))
                add_property(g, feat_node, attr_node, attr_link)

        # add geometry node
        geom_node = URIRef(ns[DEFAULT_PREFIX] + gen_hash(geom_wkt, gtype))
        add_type(g, geom_node, URIRef(ns['sf'] + _geo_type(gtype)))
        add_label(g, geom_node, "{} Geometry".format(_geo_type(gtype)))
        add_property(g, feat_node, geom_node, URIRef(ns['geo'] + 'hasGeometry'))

        # include WKT
        geom_wkt_node = Literal(geom_wkt, datatype=URIRef(ns['geo'] + 'wktLiteral'))
        add_property(g, geom_node, geom_wkt_node, URIRef(ns['geo'] + 'asWKT'))
Ejemplo n.º 5
0
def _attribute_to_graph(g, classname, class_node, mapping):
    ns = dict(g.namespace_manager.namespaces())

    attr_node = URIRef(ns[DEFAULT_PREFIX] +
                       "{}_{}".format(classname.lower(), mapping['property']))
    add_type(g, attr_node, URIRef(ns['owl'] + 'DatatypeProperty'))
    add_domain(g, attr_node, class_node)
    add_range(g, attr_node, URIRef(mapping['datatype']))
    add_label(g, attr_node, mapping['property'].title())
    add_comment(
        g, attr_node, "Attribuut '{}' behorende tot class '{}'".format(
            mapping['property'], classname))

    # make link a subProperty is exists
    if mapping['subPropertyOf'] is not None:
        add_subPropertyOf(g, attr_node, URIRef(mapping['subPropertyOf']))
Ejemplo n.º 6
0
def _table_to_class(g, table, mapper):
    ns = dict(g.namespace_manager.namespaces())
    mapping = mapper.schema['schema'][table]

    class_node = URIRef(ns[DEFAULT_PREFIX] + mapping['classname'])
    add_type(g, class_node, URIRef(ns['owl'] + "Class"))
    add_label(g, class_node, mapping['classname'])

    if mapping['subClassOf'] is not None:
        add_subClassOf(g, class_node, URIRef(mapping['subClassOf']))

    for attr in mapper.attributes(table):
        _attribute_to_graph(g, mapping['classname'], class_node,
                            mapping['attributes'][attr])
    for rel in mapper.relations(table):
        _relation_to_graph(g, mapping['classname'], class_node,
                           mapping['relations'][rel])
Ejemplo n.º 7
0
def _generate_branch(schema, ns_abox, tail, parent, values):
    g = Graph()

    if type(tail['tail']) is str:
        if tail['type'] == "URIRef":
            class_node = URIRef(tail['head'])
            node = URIRef(ns_abox + gen_hash(tail['head'], parent.toPython()))

            add_type(g, node, class_node)
            add_property(g, parent, node, URIRef(tail['property']))
        else:
            node = Literal(schema.compile_value(tail, values),
                           datatype=URIRef(tail['head']))
            add_property(g, parent, node, URIRef(tail['property']))
    else:
        class_node = URIRef(tail['head'])
        node = URIRef(ns_abox + gen_hash(tail['head'], parent.toPython()))

        add_type(g, node, class_node)
        add_property(g, parent, node, URIRef(tail['property']))

        g += _generate_branch(schema, ns_abox, tail['tail'], node, values)

    return g
Ejemplo n.º 8
0
def _table_to_graph(g, server, references, table, mapper):
    ns = dict(g.namespace_manager.namespaces())
    logger.info("Processing table {}".format(table))
    mapping = mapper.schema['schema'][table]

    # define class
    class_node = _table_to_class(g, mapping)

    # translate records
    referenced_nodes = list(references.references(table=table))
    attributes = list(mapper.attributes(table))
    relations = list(mapper.relations(table))

    records = server.records(table)
    for rec in records:
        if mapping['identifier'] is None:
            continue
        if rec[mapping['identifier']] not in referenced_nodes:
            continue

        # node for this record
        rec_node = URIRef(
            ns[DEFAULT_PREFIX] +
            gen_hash(mapping['classname'], rec[mapping['identifier']]))
        add_type(g, rec_node, class_node)
        add_label(
            g, rec_node, "{} {}".format(mapping['classname'],
                                        rec[mapping['identifier']]))

        for k, v in rec.items():
            if v is None:
                continue
            if k in attributes:
                # create node
                attr = mapping['attributes'][k]
                try:
                    if type(v) is str:
                        v = v.strip()
                    attr_node = Literal(v, datatype=URIRef(attr['datatype']))
                except UnicodeDecodeError:
                    if type(v) is bytes:
                        v = v.decode('utf-8', 'ignore')
                        attr_node = Literal(v,
                                            datatype=URIRef(attr['datatype']))

                # link to node
                attr_link = URIRef(ns[DEFAULT_SCHEMA_PREFIX] + "{}_{}".format(
                    mapping['classname'].lower(), attr['property']))
                add_property(g, rec_node, attr_node, attr_link)
            if k in relations:
                rel = mapping['relations'][k]

                # create node
                referenced_node = URIRef(ns[DEFAULT_PREFIX] +
                                         gen_hash(rel['targetclassname'], v))

                # link to node
                rel_link = URIRef(ns[DEFAULT_SCHEMA_PREFIX] + "{}_{}".format(
                    mapping['classname'].lower(), rel['property']))
                add_property(g, rec_node, referenced_node, rel_link)

                # add back link
                inverse_rel_link = URIRef(
                    ns[DEFAULT_SCHEMA_PREFIX] +
                    "{}_inv_{}".format(rel['targetclassname'].lower(),
                                       mapping['classname'].lower()))
                add_property(g, referenced_node, rec_node, inverse_rel_link)

                # store referenced nodes for further processing
                references.add_reference(rel['targettable'], v)
Ejemplo n.º 9
0
def add_metadata(g, base_namespace, timestamp, database, is_ontology=False):
    logger.info("Adding meta-data")

    # update namespaces
    _update_namespaces(g.namespace_manager)

    ns = dict(g.namespace_manager.namespaces())
    base = URIRef(ns[base_namespace])

    # type
    if is_ontology:
        add_type(g, base, URIRef(ns['owl'] + 'Ontology'))
        descriptiontype = "ontology"
    else:
        add_type(g, base, URIRef(ns['void'] + 'Dataset'))
        descriptiontype = "dataset"

    # modified
    modified = Literal(datetime.fromtimestamp(timestamp).isoformat(),
                       datatype=URIRef(ns['xsd'] + 'dateTime'))
    add_property(g, base, modified, URIRef(ns['dcterms'] + 'modified'))

    # creator/published
    creator = URIRef(ns['dbpr'] + 'Vrije_Universiteit_Amsterdam')
    add_property(g, base, creator, URIRef(ns['dcterms'] + 'creator'))
    add_property(g, base, creator, URIRef(ns['dcterms'] + 'publisher'))

    # rights holder
    rightsholder = URIRef(ns['dbpr'] + 'Rijkswaterstaat')
    add_property(g, base, rightsholder, URIRef(ns['dcterms'] + 'rightsHolder'))

    # language
    language = URIRef("http://id.loc.gov/vocabulary/iso639-1/nl")
    add_property(g, base, language, URIRef(ns['dcterms'] + 'language'))

    # title
    title_nl = Literal("Rijkswaterstaat Linked Data Pilot Project - {}".format(
        database.upper()),
                       lang="nl")
    title_en = Literal("Rijkswaterstaat Linked Data Pilot Project - {}".format(
        database.upper()),
                       lang="en")
    add_property(g, base, title_nl, URIRef(ns['dcterms'] + 'title'))
    add_property(g, base, title_en, URIRef(ns['dcterms'] + 'title'))

    # subject
    asset_management = URIRef(ns['dbpr'] + 'Asset_management')
    add_property(g, base, asset_management, URIRef(ns['dcterms'] + 'subject'))

    # description
    description_nl = Literal("Een {} voor experimentele doeleinden ten ".format(descriptiontype)\
                             + "behoeve van Rijkswaterstaats Linked Data "\
                             + "pilot project.", lang="nl")
    description_en = Literal("An experimental {} for purposes of ".format(descriptiontype)\
                             + "Rijkswaterstaat's Linked Data pilot "\
                             + "project.", lang="en")
    add_property(g, base, description_nl,
                 URIRef(ns['dcterms'] + 'description'))
    add_property(g, base, description_en,
                 URIRef(ns['dcterms'] + 'description'))

    # number of triples
    ntriples = Literal(len(g) + 1,
                       datatype=URIRef(ns['xsd'] + 'nonNegativeInteger'))
    add_property(g, base, ntriples, URIRef(ns['void'] + 'triples'))