Example #1
0
def _layer_to_graph(g, gdb, layer_name, area, mapper):
    ns = dict(g.namespace_manager.namespaces())
    logger.info("Processing layer {}".format(layer_name))
    mapping = mapper.schema['schema'][layer_name]

    # define class
    class_node = _layer_to_class(g, mapping)

    # translate features
    attributes = list(mapper.attributes(layer_name))

    for feat in gdb.features_of(layer_name):
        if mapping['identifier'] is None:
            continue
        fid = feat.GetFID()
        if fid is None or fid <= 0:
            continue

        # continue if out of scope
        if not gdb.within(gdb.geometry_of(feat), area):
            continue

        # geometry
        geom_wkt, gtype = gdb.geometryWKT_of(feat)
        if geom_wkt is None:
            continue

        # node for this feature
        feat_node = URIRef(ns[DEFAULT_PREFIX] + gen_hash(layer_name+mapping['classname'], fid))
        add_type(g, feat_node, class_node)
        add_label(g, feat_node, "{} {} ({})".format(mapping['classname'], fid, gtype))

        for k,v in feat.items().items():
            if v is None or v in EXCL_VALUES:
                continue
            if k in attributes:
                # create node
                attr = mapping['attributes'][k]
                try:
                    if type(v) is str:
                        v = v.strip()
                    attr_node = Literal(v, datatype=URIRef(attr['datatype']))
                except UnicodeDecodeError:
                    if type(v) is bytes:
                        v = v.decode('utf-8', 'ignore')
                        attr_node = Literal(v, datatype=URIRef(attr['datatype']))

                # link to node
                attr_link = URIRef(ns[DEFAULT_SCHEMA_PREFIX] + "{}_{}".format(mapping['classname'].lower(), attr['property']))
                add_property(g, feat_node, attr_node, attr_link)

        # add geometry node
        geom_node = URIRef(ns[DEFAULT_PREFIX] + gen_hash(geom_wkt, gtype))
        add_type(g, geom_node, URIRef(ns['sf'] + _geo_type(gtype)))
        add_label(g, geom_node, "{} Geometry".format(_geo_type(gtype)))
        add_property(g, feat_node, geom_node, URIRef(ns['geo'] + 'hasGeometry'))

        # include WKT
        geom_wkt_node = Literal(geom_wkt, datatype=URIRef(ns['geo'] + 'wktLiteral'))
        add_property(g, geom_node, geom_wkt_node, URIRef(ns['geo'] + 'asWKT'))
Example #2
0
def _interlink(g, schema, source_database, target_database, source_graph,
               target_graph):
    ns_tbox_source = _generate_tbox_namespace(source_graph)
    ns_tbox_target = _generate_tbox_namespace(target_graph)

    # update namespaces
    ns_abox_source = default_namespace_of(source_graph)[0]
    ns_abox_target = default_namespace_of(target_graph)[0]
    _update_namespaces(g.namespace_manager,\
                       ('rws.' + source_database.lower(), ns_abox_source),\
                       ('rws.' + target_database.lower(), ns_abox_target))

    for relation in schema.from_database(source_database):
        source_def = relation['source']
        target_def = relation['target']

        if target_def['database'] != target_database:
            continue

        # define relation
        source_class = _classname_from_def(source_database, source_def)
        target_class = _classname_from_def(target_database, target_def)
        rel = _property_from_def(source_class, target_class,
                                 target_def['property'])

        # select target references
        q = _generate_query(
            URIRef(ns_tbox_source + source_class),
            property_from_mapping(ns_tbox_source, source_class,
                                  source_def['property']))

        for source_id, target_id in source_graph.query(q):
            if target_id is None or target_id == "":
                continue

            for match_id in target_graph.subjects(predicate=property_from_mapping(ns_tbox_target,\
                                                                               target_class,\
                                                                               target_def['property']),
                                               object=target_id):
                add_property(g, source_id, match_id, rel)
Example #3
0
def update_metadata(g, base_namespace, timestamp):
    logger.info("Updating meta-data")

    # update namespaces
    _update_namespaces(g.namespace_manager)

    ns = dict(g.namespace_manager.namespaces())
    base = base_namespace

    # remove old data
    g.remove((None, URIRef(ns['dcterms'] + 'modified'), None))
    g.remove((None, URIRef(ns['void'] + 'triples'), None))

    # modified
    modified = Literal(datetime.fromtimestamp(timestamp).isoformat(),
                       datatype=URIRef(ns['xsd'] + 'dateTime'))
    add_property(g, base, modified, URIRef(ns['dcterms'] + 'modified'))

    # number of triples
    ntriples = Literal(len(g) + 1,
                       datatype=URIRef(ns['xsd'] + 'nonNegativeInteger'))
    add_property(g, base, ntriples, URIRef(ns['void'] + 'triples'))
Example #4
0
def _generate_branch(schema, ns_abox, tail, parent, values):
    g = Graph()

    if type(tail['tail']) is str:
        if tail['type'] == "URIRef":
            class_node = URIRef(tail['head'])
            node = URIRef(ns_abox + gen_hash(tail['head'], parent.toPython()))

            add_type(g, node, class_node)
            add_property(g, parent, node, URIRef(tail['property']))
        else:
            node = Literal(schema.compile_value(tail, values),
                           datatype=URIRef(tail['head']))
            add_property(g, parent, node, URIRef(tail['property']))
    else:
        class_node = URIRef(tail['head'])
        node = URIRef(ns_abox + gen_hash(tail['head'], parent.toPython()))

        add_type(g, node, class_node)
        add_property(g, parent, node, URIRef(tail['property']))

        g += _generate_branch(schema, ns_abox, tail['tail'], node, values)

    return g
Example #5
0
def _table_to_graph(g, server, references, table, mapper):
    ns = dict(g.namespace_manager.namespaces())
    logger.info("Processing table {}".format(table))
    mapping = mapper.schema['schema'][table]

    # define class
    class_node = _table_to_class(g, mapping)

    # translate records
    referenced_nodes = list(references.references(table=table))
    attributes = list(mapper.attributes(table))
    relations = list(mapper.relations(table))

    records = server.records(table)
    for rec in records:
        if mapping['identifier'] is None:
            continue
        if rec[mapping['identifier']] not in referenced_nodes:
            continue

        # node for this record
        rec_node = URIRef(
            ns[DEFAULT_PREFIX] +
            gen_hash(mapping['classname'], rec[mapping['identifier']]))
        add_type(g, rec_node, class_node)
        add_label(
            g, rec_node, "{} {}".format(mapping['classname'],
                                        rec[mapping['identifier']]))

        for k, v in rec.items():
            if v is None:
                continue
            if k in attributes:
                # create node
                attr = mapping['attributes'][k]
                try:
                    if type(v) is str:
                        v = v.strip()
                    attr_node = Literal(v, datatype=URIRef(attr['datatype']))
                except UnicodeDecodeError:
                    if type(v) is bytes:
                        v = v.decode('utf-8', 'ignore')
                        attr_node = Literal(v,
                                            datatype=URIRef(attr['datatype']))

                # link to node
                attr_link = URIRef(ns[DEFAULT_SCHEMA_PREFIX] + "{}_{}".format(
                    mapping['classname'].lower(), attr['property']))
                add_property(g, rec_node, attr_node, attr_link)
            if k in relations:
                rel = mapping['relations'][k]

                # create node
                referenced_node = URIRef(ns[DEFAULT_PREFIX] +
                                         gen_hash(rel['targetclassname'], v))

                # link to node
                rel_link = URIRef(ns[DEFAULT_SCHEMA_PREFIX] + "{}_{}".format(
                    mapping['classname'].lower(), rel['property']))
                add_property(g, rec_node, referenced_node, rel_link)

                # add back link
                inverse_rel_link = URIRef(
                    ns[DEFAULT_SCHEMA_PREFIX] +
                    "{}_inv_{}".format(rel['targetclassname'].lower(),
                                       mapping['classname'].lower()))
                add_property(g, referenced_node, rec_node, inverse_rel_link)

                # store referenced nodes for further processing
                references.add_reference(rel['targettable'], v)
Example #6
0
def add_metadata(g, base_namespace, timestamp, database, is_ontology=False):
    logger.info("Adding meta-data")

    # update namespaces
    _update_namespaces(g.namespace_manager)

    ns = dict(g.namespace_manager.namespaces())
    base = URIRef(ns[base_namespace])

    # type
    if is_ontology:
        add_type(g, base, URIRef(ns['owl'] + 'Ontology'))
        descriptiontype = "ontology"
    else:
        add_type(g, base, URIRef(ns['void'] + 'Dataset'))
        descriptiontype = "dataset"

    # modified
    modified = Literal(datetime.fromtimestamp(timestamp).isoformat(),
                       datatype=URIRef(ns['xsd'] + 'dateTime'))
    add_property(g, base, modified, URIRef(ns['dcterms'] + 'modified'))

    # creator/published
    creator = URIRef(ns['dbpr'] + 'Vrije_Universiteit_Amsterdam')
    add_property(g, base, creator, URIRef(ns['dcterms'] + 'creator'))
    add_property(g, base, creator, URIRef(ns['dcterms'] + 'publisher'))

    # rights holder
    rightsholder = URIRef(ns['dbpr'] + 'Rijkswaterstaat')
    add_property(g, base, rightsholder, URIRef(ns['dcterms'] + 'rightsHolder'))

    # language
    language = URIRef("http://id.loc.gov/vocabulary/iso639-1/nl")
    add_property(g, base, language, URIRef(ns['dcterms'] + 'language'))

    # title
    title_nl = Literal("Rijkswaterstaat Linked Data Pilot Project - {}".format(
        database.upper()),
                       lang="nl")
    title_en = Literal("Rijkswaterstaat Linked Data Pilot Project - {}".format(
        database.upper()),
                       lang="en")
    add_property(g, base, title_nl, URIRef(ns['dcterms'] + 'title'))
    add_property(g, base, title_en, URIRef(ns['dcterms'] + 'title'))

    # subject
    asset_management = URIRef(ns['dbpr'] + 'Asset_management')
    add_property(g, base, asset_management, URIRef(ns['dcterms'] + 'subject'))

    # description
    description_nl = Literal("Een {} voor experimentele doeleinden ten ".format(descriptiontype)\
                             + "behoeve van Rijkswaterstaats Linked Data "\
                             + "pilot project.", lang="nl")
    description_en = Literal("An experimental {} for purposes of ".format(descriptiontype)\
                             + "Rijkswaterstaat's Linked Data pilot "\
                             + "project.", lang="en")
    add_property(g, base, description_nl,
                 URIRef(ns['dcterms'] + 'description'))
    add_property(g, base, description_en,
                 URIRef(ns['dcterms'] + 'description'))

    # number of triples
    ntriples = Literal(len(g) + 1,
                       datatype=URIRef(ns['xsd'] + 'nonNegativeInteger'))
    add_property(g, base, ntriples, URIRef(ns['void'] + 'triples'))