Esempio n. 1
0
def dump_as_rdf(g: Dataset, table_name: str) -> bool:
    """
    Dump the contents of Graph g in RDF turtle
    :param g: Dataset to dump
    :param table_name: name of the base table
    :return: success indicator
    """

    # Propagate the mapped concepts up the tree
    def add_to_ancestors(s: URIRef, vm: URIRef):
        g.add((s, ISO['enumeratedConceptualDomain.hasMember'], vm))
        for parent in g.objects(s, SKOS.broader):
            add_to_ancestors(parent, vm)

    if COMPUTE_MEMBERS and EXPLICIT_MEMBERS:
        for subj, obj in g.subject_objects(SKOS.exactMatch):
            add_to_ancestors(subj, obj)
        # TODO: this gives us a list of all concepts in the scheme... useful?
        for scheme, tc in g.subject_objects(SKOS.hasTopConcept):
            for member in g.objects(
                    tc, ISO['enumeratedConceptualDomain.hasMember']):
                g.add((scheme, ISO['enumeratedConceptualDomain.hasMember'],
                       member))

    for name, ns in namespaces.items():
        g.bind(name.lower(), ns)
    outfile = os.path.join(DATA_DIR, table_name + '.ttl')
    print(f"Saving output to {outfile}")
    g.serialize(outfile, format='turtle')
    print(f"{len(g)} triples written")
    return True
Esempio n. 2
0
class Fragment(object):

    HYDRA = Namespace("http://www.w3.org/ns/hydra/core#")
    VOID = Namespace("http://rdfs.org/ns/void#")
    FOAF = Namespace("http://xmlns.com/foaf/0.1/")
    DCTERMS = Namespace("http://purl.org/dc/terms/")

    def __init__(self):
        self.rdf_graph = Dataset()

    def add_data_triple(self, subject, predicate, obj):
        self.rdf_graph.add((subject, predicate, obj))

    def add_graph(self, identifier):
        self.rdf_graph.graph(identifier)

    def add_meta_quad(self, graph, subject, predicate, obj):
        self.rdf_graph.add((graph, subject, predicate, obj))

    def add_prefix(self, prefix, uri):
        self.rdf_graph.bind(prefix, uri)

    def serialize(self):
        return self.rdf_graph.serialize(format="trig", encoding="utf-8")
            csvfile, skipinitialspace=True, quotechar='"', delimiter=',')]
    return csv_contents


#//*************** csv parser ****************//#

graph_uri_base = resource

path = 'source_datasets/'
filename_population = 'all_population_by_type.csv'
filename_unemployment = 'unemployment_eu.csv'
filename_inflow = 'inflow_dataset.csv'
filename_asylum = 'asylum_seekers.csv'

dataset = Dataset()
dataset.bind('mpr', RESOURCE)
dataset.bind('mpo', VOCAB)
dataset.bind('geo', GEO)
dataset.bind('geo_country_code', GCC)
dataset.bind('dbo', DBO)
dataset.bind('dbr', DBR)
dataset.bind('sdmx', SDMX)

dataset.default_context.parse(VOCAB_FILE, format='turtle')

dataset, unemployment_eu_graph = convert_unemployment_csv(
    filename_unemployment, dataset,
    URIRef(graph_uri_base + 'unemployment_eu_graph'))

dataset, population_eu_graph = convert_population_csv(
    filename_population, dataset,
Esempio n. 4
0
add quads directly to a specific Graph within the Dataset.

This example file shows how to decalre a Dataset, add content to it, serialise it, query it
and remove things from it.
"""

from rdflib import Dataset, URIRef, Literal, Namespace

#
#   Create & Add
#

# Create an empty Dataset
d = Dataset()
# Add a namespace prefix to it, just like for Graph
d.bind("ex", Namespace("http://example.com/"))

# Declare a Graph URI to be used to identify a Graph
graph_1 = URIRef("http://example.com/graph-1")

# Add an empty Graph, identified by graph_1, to the Dataset
d.graph(identifier=graph_1)

# Add two quads to Graph graph_1 in the Dataset
d.add((
    URIRef("http://example.com/subject-x"),
    URIRef("http://example.com/predicate-x"),
    Literal("Triple X"),
    graph_1
))
d.add((
Esempio n. 5
0
            k: v
            for k, v in row.items()
        } for row in csv.DictReader(
            csvfile, skipinitialspace=True, quotechar='"', delimiter=',')]
    return csv_contents


#//*************** csv parser ****************//#

graph_uri_base = resource + 'movement_of_people/'

path = 'source_datasets/'
filename = 'Movement_of_people_across_borders_dataset.csv'

dataset = Dataset()
dataset.bind('trumpres', RESOURCE)
dataset.bind('trumpvoc', VOCAB)
dataset.bind('geo', GEO)
dataset.bind('dbo', DBO)
dataset.bind('dbr', DBR)

dataset.default_context.parse(VOCAB_FILE, format='turtle')

dataset, movement_graph = convert_csv(
    path + filename, dataset, URIRef(graph_uri_base + 'movement_graph'))
serialize_upload(OUTPUT_DIR + 'movement_of_people.trig', dataset)

### Generate VoID metadata
from rdflib.void import generateVoID
from rdflib.namespace import VOID
dcterms_uri = 'http://purl.org/dc/terms/'
Esempio n. 6
0
        source=None,
        date=Literal(datetime.datetime.now().isoformat(),
                     datatype=XSD.datetime),
        created=None,
        issued=None,
        modified=None,
        exampleResource=exampleResource,
        vocabulary=[URIRef("https://schema.org/")],
        triples=sum(1 for i in ds.graph(
            identifier="https://data.create.humanities.uva.nl/id/kohier1674/").
                    subjects()),
        temporalCoverage=Literal("1674", datatype=XSD.gYear, normalize=False),
        licenseprop=URIRef(
            "https://creativecommons.org/licenses/by-nc-sa/4.0/"),
        distribution=download)

    ds.bind('owl', OWL)
    ds.bind('create', create)
    ds.bind('schema', schema)
    ds.bind('void', void)
    ds.bind('foaf', foaf)
    ds.bind('edm', edm)
    ds.bind('pnv', pnv)
    ds.bind('roar', roar)
    ds.bind('dc', dc)
    ds.bind('dcterms', dcterms)
    ds.bind('oa', oa)
    ds.bind('prov', prov)

    print("Serializing!")
    ds.serialize('data/kohier1674.trig', format='trig')
Esempio n. 7
0
    with open(filename, 'r', encoding="ISO-8859-1") as csvfile:
        # Set the right quote character and delimiter
        csv_contents = [{
            k: v
            for k, v in row.items()
        } for row in csv.DictReader(
            csvfile, skipinitialspace=True, quotechar='"', delimiter=';')]

    # The URI for our dataset
    url = 'http://few.vu.nl/~mvr320/KRweb/resource/' + short[i] + '/'
    SETNAME = Namespace(url)
    graph_uri = URIRef(url)

    # We initialize a dataset, and bind our namespaces
    dataset = Dataset()
    dataset.bind('g13data', DATA)
    dataset.bind('g13vocab', VOCAB)
    dataset.bind('g13set', SETNAME)
    dataset.bind('geo', GEO)
    dataset.bind('geof', GEOF)
    dataset.bind('dbo', DBO)
    dataset.bind('dbp', DBP)
    dataset.bind('schema', SCHEMA)
    dataset.bind('vcard', VCARD)
    dataset.bind('wgs', WGS)
    dataset.bind('void', VOID)

    # We then get a new dataset object with our URI from the dataset.
    graph = dataset.graph(graph_uri)

    # Load the externally defined schema into the default dataset (context) of the dataset
Esempio n. 8
0
def main(search=None, cache=None, identifiers=[]):

    ns = Namespace("https://data.create.humanities.uva.nl/id/rkd/")

    ds = Dataset()
    ds.bind('rdfs', RDFS)
    ds.bind('schema', schema)
    ds.bind('sem', sem)
    ds.bind('bio', bio)
    ds.bind('foaf', foaf)
    ds.bind('void', void)
    ds.bind('skos', SKOS)
    ds.bind('owl', OWL)
    ds.bind('dc', dc)

    ds.bind('rkdArtist', URIRef("https://data.rkd.nl/artists/"))
    ds.bind('rkdThes', nsThesaurus)
    ds.bind('rkdPerson', nsPerson)
    ds.bind('rkdImage', URIRef("https://rkd.nl/explore/images/"))
    ds.bind('rkdThumb', URIRef("https://images.rkd.nl/rkd/thumb/650x650/"))

    ds.bind('aat', URIRef("http://vocab.getty.edu/aat/"))

    ## First the images

    g = rdfSubject.db = ds.graph(identifier=ns)

    # Load cache thesaurus
    if os.path.isfile('rkdthesaurus.json'):
        with open('rkdthesaurus.json') as infile:
            thesaurusDict = json.load(infile)
    else:
        thesaurusDict = dict()

    # Load cache images
    if os.path.isfile('imagecache.json'):
        with open('imagecache.json') as infile:
            imageCache = json.load(infile)
    else:
        imageCache = dict()

    # to fetch all identifiers from the search
    if search:
        thesaurusDict, imageCache = parseURL(search,
                                             thesaurusDict=thesaurusDict,
                                             imageCache=imageCache)
    elif cache:
        # assume that everything in the thesaurus is also cached
        for doc in cache.values():
            parseData(doc, thesaurusDict=thesaurusDict)
    elif identifiers:
        for i in identifiers:
            thesaurusDict, imageCache = parseURL(APIURL + str(i),
                                                 thesaurusDict=thesaurusDict,
                                                 imageCache=imageCache)

    # Any images without labels?
    # These were not included in the search, but fetch them anyway.
    print("Finding referred images that were not included")
    q = """
    PREFIX schema: <http://schema.org/>
    SELECT ?uri WHERE {
        ?role a schema:Role ; schema:isRelatedTo ?uri .

        FILTER NOT EXISTS { ?uri schema:name ?name }
    }
    """
    images = g.query(q)
    print(f"Found {len(images)}!")
    for i in images:
        identifier = str(i['uri']).replace('https://rkd.nl/explore/images/',
                                           '')
        thesaurusDict, imageCache = parseURL(
            "https://api.rkd.nl/api/record/images/" + str(identifier),
            thesaurusDict=thesaurusDict,
            imageCache=imageCache)

    ## Then the thesaurus
    print("Converting the thesaurus")
    rdfSubject.db = ds.graph(identifier=ns.term('thesaurus/'))

    ids = list(thesaurusDict.keys())
    for i in ids:
        _, thesaurusDict = getThesaurus(i, thesaurusDict, 'concept')

    # Save updated cache
    with open('rkdthesaurus.json', 'w') as outfile:
        json.dump(thesaurusDict, outfile)

    with open('imagecache.json', 'w') as outfile:
        json.dump(imageCache, outfile)

    ## Serialize
    print("Serializing!")
    ds.serialize('rkdportraits14751825.trig', format='trig')
Esempio n. 9
0
g7 += g
g7.bind("dct", DCTERMS)
g7.bind("skos", SKOS)
assert "@xml:base" not in g7.serialize(format="xml").decode("utf-8")
assert "@base <http://one.org/> ." in g7.serialize(
    format="n3", base=base_one).decode("utf-8")
g7.base = base_two
assert "@base <http://two.org/> ." in g7.serialize(format="n3").decode("utf-8")
assert "@base <http://one.org/> ." in g7.serialize(
    format="n3", base=base_one).decode("utf-8")

# 8. checking results for TriX & TriG
# TriX can specify a base per graph but setting a base for the whole
base_three = Namespace("http://three.org/")
ds1 = Dataset()
ds1.bind("dct", DCTERMS)
ds1.bind("skos", SKOS)
g8 = ds1.graph(URIRef("http://g8.com/"), base=base_one)
g9 = ds1.graph(URIRef("http://g9.com/"))
g8 += g
g9 += g
g9.base = base_two
ds1.base = base_three

trix = ds1.serialize(format="trix",
                     base=Namespace("http://two.org/")).decode("utf-8")
assert '<graph xml:base="http://one.org/">' in trix
assert '<graph xml:base="http://two.org/">' in trix
assert '<TriX xml:base="http://two.org/"' in trix

trig = ds1.serialize(format="trig",
Esempio n. 10
0
def data_structure_definition(profile, dataset_name, dataset_base_uri,
                              variables, source_path, source_hash):
    """Converts the dataset + variables to a set of rdflib Graphs (a nanopublication with provenance annotations)
    that contains the data structure definition (from the DataCube vocabulary) and
    the mappings to external datasets.

    Arguments:
    dataset     -- the name of the dataset
    variables   -- the list of dictionaries with the variables and their mappings to URIs
    profile     -- the Google signin profile
    source_path -- the path to the dataset file that was annotated
    source_hash -- the Git hash of the dataset file version of the dataset

    :returns: an RDF graph store containing a nanopublication
    """
    BASE = Namespace('{}/'.format(dataset_base_uri))
    dataset_uri = URIRef(dataset_base_uri)

    # Initialize a conjunctive graph for the whole lot
    rdf_dataset = Dataset()
    rdf_dataset.bind('qbrv', QBRV)
    rdf_dataset.bind('qbr', QBR)
    rdf_dataset.bind('qb', QB)
    rdf_dataset.bind('skos', SKOS)
    rdf_dataset.bind('prov', PROV)
    rdf_dataset.bind('np', NP)
    rdf_dataset.bind('foaf', FOAF)

    # Initialize the graphs needed for the nanopublication
    timestamp = datetime.datetime.utcnow().strftime("%Y-%m-%dT%H:%M")

    # Shorten the source hash to 8 digits (similar to Github)
    source_hash = source_hash[:8]

    hash_part = source_hash + '/' + timestamp

    # The Nanopublication consists of three graphs
    assertion_graph_uri = BASE['assertion/' + hash_part]
    assertion_graph = rdf_dataset.graph(assertion_graph_uri)

    provenance_graph_uri = BASE['provenance/' + hash_part]
    provenance_graph = rdf_dataset.graph(provenance_graph_uri)

    pubinfo_graph_uri = BASE['pubinfo/' + hash_part]
    pubinfo_graph = rdf_dataset.graph(pubinfo_graph_uri)

    # A URI that represents the author
    author_uri = QBR['person/' + profile['email']]

    rdf_dataset.add((author_uri, RDF.type, FOAF['Person']))
    rdf_dataset.add((author_uri, FOAF['name'], Literal(profile['name'])))
    rdf_dataset.add((author_uri, FOAF['email'], Literal(profile['email'])))
    rdf_dataset.add((author_uri, QBRV['googleId'], Literal(profile['id'])))
    try:
        rdf_dataset.add(
            (author_uri, FOAF['depiction'], URIRef(profile['image'])))
    except KeyError:
        pass

    # A URI that represents the version of the dataset source file
    dataset_version_uri = BASE[source_hash]

    # Some information about the source file used
    rdf_dataset.add((dataset_version_uri, QBRV['path'],
                     Literal(source_path, datatype=XSD.string)))
    rdf_dataset.add((dataset_version_uri, QBRV['sha1_hash'],
                     Literal(source_hash, datatype=XSD.string)))

    # ----
    # The nanopublication itself
    # ----
    nanopublication_uri = BASE['nanopublication/' + hash_part]

    rdf_dataset.add((nanopublication_uri, RDF.type, NP['Nanopublication']))
    rdf_dataset.add(
        (nanopublication_uri, NP['hasAssertion'], assertion_graph_uri))
    rdf_dataset.add((assertion_graph_uri, RDF.type, NP['Assertion']))
    rdf_dataset.add(
        (nanopublication_uri, NP['hasProvenance'], provenance_graph_uri))
    rdf_dataset.add((provenance_graph_uri, RDF.type, NP['Provenance']))
    rdf_dataset.add(
        (nanopublication_uri, NP['hasPublicationInfo'], pubinfo_graph_uri))
    rdf_dataset.add((pubinfo_graph_uri, RDF.type, NP['PublicationInfo']))

    # ----
    # The provenance graph
    # ----

    # Provenance information for the assertion graph (the data structure definition itself)
    provenance_graph.add(
        (assertion_graph_uri, PROV['wasDerivedFrom'], dataset_version_uri))
    provenance_graph.add(
        (dataset_uri, PROV['wasDerivedFrom'], dataset_version_uri))
    provenance_graph.add((assertion_graph_uri, PROV['generatedAtTime'],
                          Literal(timestamp, datatype=XSD.datetime)))
    provenance_graph.add(
        (assertion_graph_uri, PROV['wasAttributedTo'], author_uri))

    # ----
    # The publication info graph
    # ----

    # The URI of the latest version of QBer
    # TODO: should point to the actual latest commit of this QBer source file.
    # TODO: consider linking to this as the plan of some activity, rather than an activity itself.
    qber_uri = URIRef('https://github.com/CLARIAH/qber.git')

    pubinfo_graph.add((nanopublication_uri, PROV['wasGeneratedBy'], qber_uri))
    pubinfo_graph.add((nanopublication_uri, PROV['generatedAtTime'],
                       Literal(timestamp, datatype=XSD.datetime)))
    pubinfo_graph.add(
        (nanopublication_uri, PROV['wasAttributedTo'], author_uri))

    # ----
    # The assertion graph
    # ----

    structure_uri = BASE['structure']

    assertion_graph.add((dataset_uri, RDF.type, QB['DataSet']))
    assertion_graph.add((dataset_uri, RDFS.label, Literal(dataset_name)))
    assertion_graph.add(
        (structure_uri, RDF.type, QB['DataStructureDefinition']))

    assertion_graph.add((dataset_uri, QB['structure'], structure_uri))

    for variable_id, variable in variables.items():
        variable_uri = URIRef(variable['original']['uri'])
        variable_label = Literal(variable['original']['label'])
        variable_type = URIRef(variable['type'])

        codelist_uri = URIRef(variable['codelist']['original']['uri'])
        codelist_label = Literal(variable['codelist']['original']['label'])

        # The variable as component of the definition
        component_uri = safe_url(BASE,
                                 'component/' + variable['original']['label'])

        # Add link between the definition and the component
        assertion_graph.add((structure_uri, QB['component'], component_uri))

        # Add label to variable
        # TODO: We may need to do something with a changed label for the variable
        assertion_graph.add((variable_uri, RDFS.label, variable_label))

        if 'description' in variable and variable['description'] != "":
            assertion_graph.add(
                (variable_uri, RDFS.comment, Literal(variable['description'])))

        # If the variable URI is not the same as the original,
        # it is a specialization of a prior variable property.
        if variable['uri'] != str(variable_uri):
            assertion_graph.add(
                (variable_uri, RDFS['subPropertyOf'], URIRef(variable['uri'])))

        if variable_type == QB['DimensionProperty']:
            assertion_graph.add((variable_uri, RDF.type, variable_type))
            assertion_graph.add((component_uri, QB['dimension'], variable_uri))

            # Coded variables are also of type coded property (a subproperty of dimension property)
            if variable['category'] == 'coded':
                assertion_graph.add(
                    (variable_uri, RDF.type, QB['CodedProperty']))

        elif variable_type == QB['MeasureProperty']:
            # The category 'other'
            assertion_graph.add((variable_uri, RDF.type, variable_type))
            assertion_graph.add((component_uri, QB['measure'], variable_uri))
        elif variable_type == QB['AttributeProperty']:
            # Actually never produced by QBer at this stage
            assertion_graph.add((variable_uri, RDF.type, variable_type))
            assertion_graph.add((component_uri, QB['attribute'], variable_uri))

        # If this variable is of category 'coded', we add codelist and URIs for
        # each variable (including mappings between value uris and etc....)
        if variable['category'] == 'coded':
            assertion_graph.add((codelist_uri, RDF.type, SKOS['Collection']))
            assertion_graph.add(
                (codelist_uri, RDFS.label, Literal(codelist_label)))

            # The variable should point to the codelist
            assertion_graph.add((variable_uri, QB['codeList'], codelist_uri))

            # The variable is mapped onto an external code list.
            # If the codelist uri is not the same as the original one, we
            # have a derived codelist.
            if variable['codelist']['uri'] != str(codelist_uri):
                assertion_graph.add((codelist_uri, PROV['wasDerivedFrom'],
                                     URIRef(variable['codelist']['uri'])))

            # Generate a SKOS concept for each of the values and map it to the
            # assigned codelist
            for value in variable['values']:
                value_uri = URIRef(value['original']['uri'])
                value_label = Literal(value['original']['label'])

                assertion_graph.add((value_uri, RDF.type, SKOS['Concept']))
                assertion_graph.add(
                    (value_uri, SKOS['prefLabel'], Literal(value_label)))
                assertion_graph.add((codelist_uri, SKOS['member'], value_uri))

                # The value has been changed, and therefore there is a mapping
                if value['original']['uri'] != value['uri']:
                    assertion_graph.add(
                        (value_uri, SKOS['exactMatch'], URIRef(value['uri'])))
                    assertion_graph.add(
                        (value_uri, RDFS.label, Literal(value['label'])))

        elif variable['category'] == 'identifier':
            # Generate a SKOS concept for each of the values
            for value in variable['values']:
                value_uri = URIRef(value['original']['uri'])
                value_label = Literal(value['original']['label'])

                assertion_graph.add((value_uri, RDF.type, SKOS['Concept']))
                assertion_graph.add(
                    (value_uri, SKOS['prefLabel'], value_label))

                # The value has been changed, and therefore there is a mapping
                if value['original']['uri'] != value['uri']:
                    assertion_graph.add(
                        (value_uri, SKOS['exactMatch'], URIRef(value['uri'])))
                    assertion_graph.add(
                        (value_uri, RDFS.label, Literal(value['label'])))

        elif variable['category'] == 'other':
            # Generate a literal for each of the values when converting the dataset (but not here)
            pass

    return rdf_dataset
Esempio n. 11
0
UUIDNS = Namespace("urn:uuid:")
DOCKER = Namespace("http://w3id.org/daspos/docker#")
# W3C namespace:
POSIX = Namespace("http://www.w3.org/ns/posix/stat#")
ACL = Namespace("http://www.w3.org/ns/auth/acl#")

# DASPOS namespaces
SC = Namespace("https://w3id.org/daspos/smartcontainers#")
CA = Namespace("https://w3id.org/daspos/computationalactivity#")
CE = Namespace("https://w3id.org/daspos/computationalenvironment#")


# Need to handle DOI
# http://bitwacker.com/2010/02/04/dois-uris-and-cool-resolution/

ds.bind("prov", PROV)
ds.bind("ore", ORE)
ds.bind("owl", OWL)
ds.bind("dc", DC)
ds.bind("uuidns", UUIDNS)
ds.bind("docker", DOCKER)
ds.bind("posix", POSIX)
ds.bind("acl", ACL)
ds.bind("sc", SC)
ds.bind("ca", CA)
ds.bind("ce", CE)

default_graph = ds


# image_name = cmd_string.rsplit(' ', 1) [1]
Esempio n. 12
0
def data_structure_definition(profile, dataset_name, dataset_base_uri, variables, source_path, source_hash):
    """Converts the dataset + variables to a set of rdflib Graphs (a nanopublication with provenance annotations)
    that contains the data structure definition (from the DataCube vocabulary) and
    the mappings to external datasets.

    Arguments:
    dataset     -- the name of the dataset
    variables   -- the list of dictionaries with the variables and their mappings to URIs
    profile     -- the Google signin profile
    source_path -- the path to the dataset file that was annotated
    source_hash -- the Git hash of the dataset file version of the dataset

    :returns: an RDF graph store containing a nanopublication
    """
    BASE = Namespace("{}/".format(dataset_base_uri))
    dataset_uri = URIRef(dataset_base_uri)

    # Initialize a conjunctive graph for the whole lot
    rdf_dataset = Dataset()
    rdf_dataset.bind("qbrv", QBRV)
    rdf_dataset.bind("qbr", QBR)
    rdf_dataset.bind("qb", QB)
    rdf_dataset.bind("skos", SKOS)
    rdf_dataset.bind("prov", PROV)
    rdf_dataset.bind("np", NP)
    rdf_dataset.bind("foaf", FOAF)

    # Initialize the graphs needed for the nanopublication
    timestamp = datetime.datetime.utcnow().strftime("%Y-%m-%dT%H:%M")

    # Shorten the source hash to 8 digits (similar to Github)
    source_hash = source_hash[:8]

    hash_part = source_hash + "/" + timestamp

    # The Nanopublication consists of three graphs
    assertion_graph_uri = BASE["assertion/" + hash_part]
    assertion_graph = rdf_dataset.graph(assertion_graph_uri)

    provenance_graph_uri = BASE["provenance/" + hash_part]
    provenance_graph = rdf_dataset.graph(provenance_graph_uri)

    pubinfo_graph_uri = BASE["pubinfo/" + hash_part]
    pubinfo_graph = rdf_dataset.graph(pubinfo_graph_uri)

    # A URI that represents the author
    author_uri = QBR["person/" + profile["email"]]

    rdf_dataset.add((author_uri, RDF.type, FOAF["Person"]))
    rdf_dataset.add((author_uri, FOAF["name"], Literal(profile["name"])))
    rdf_dataset.add((author_uri, FOAF["email"], Literal(profile["email"])))
    rdf_dataset.add((author_uri, QBRV["googleId"], Literal(profile["id"])))
    try:
        rdf_dataset.add((author_uri, FOAF["depiction"], URIRef(profile["image"])))
    except KeyError:
        pass

    # A URI that represents the version of the dataset source file
    dataset_version_uri = BASE[source_hash]

    # Some information about the source file used
    rdf_dataset.add((dataset_version_uri, QBRV["path"], Literal(source_path, datatype=XSD.string)))
    rdf_dataset.add((dataset_version_uri, QBRV["sha1_hash"], Literal(source_hash, datatype=XSD.string)))

    # ----
    # The nanopublication itself
    # ----
    nanopublication_uri = BASE["nanopublication/" + hash_part]

    rdf_dataset.add((nanopublication_uri, RDF.type, NP["Nanopublication"]))
    rdf_dataset.add((nanopublication_uri, NP["hasAssertion"], assertion_graph_uri))
    rdf_dataset.add((assertion_graph_uri, RDF.type, NP["Assertion"]))
    rdf_dataset.add((nanopublication_uri, NP["hasProvenance"], provenance_graph_uri))
    rdf_dataset.add((provenance_graph_uri, RDF.type, NP["Provenance"]))
    rdf_dataset.add((nanopublication_uri, NP["hasPublicationInfo"], pubinfo_graph_uri))
    rdf_dataset.add((pubinfo_graph_uri, RDF.type, NP["PublicationInfo"]))

    # ----
    # The provenance graph
    # ----

    # Provenance information for the assertion graph (the data structure definition itself)
    provenance_graph.add((assertion_graph_uri, PROV["wasDerivedFrom"], dataset_version_uri))
    provenance_graph.add((dataset_uri, PROV["wasDerivedFrom"], dataset_version_uri))
    provenance_graph.add((assertion_graph_uri, PROV["generatedAtTime"], Literal(timestamp, datatype=XSD.datetime)))
    provenance_graph.add((assertion_graph_uri, PROV["wasAttributedTo"], author_uri))

    # ----
    # The publication info graph
    # ----

    # The URI of the latest version of QBer
    # TODO: should point to the actual latest commit of this QBer source file.
    # TODO: consider linking to this as the plan of some activity, rather than an activity itself.
    qber_uri = URIRef("https://github.com/CLARIAH/qber.git")

    pubinfo_graph.add((nanopublication_uri, PROV["wasGeneratedBy"], qber_uri))
    pubinfo_graph.add((nanopublication_uri, PROV["generatedAtTime"], Literal(timestamp, datatype=XSD.datetime)))
    pubinfo_graph.add((nanopublication_uri, PROV["wasAttributedTo"], author_uri))

    # ----
    # The assertion graph
    # ----

    structure_uri = BASE["structure"]

    assertion_graph.add((dataset_uri, RDF.type, QB["DataSet"]))
    assertion_graph.add((dataset_uri, RDFS.label, Literal(dataset_name)))
    assertion_graph.add((structure_uri, RDF.type, QB["DataStructureDefinition"]))

    assertion_graph.add((dataset_uri, QB["structure"], structure_uri))

    for variable_id, variable in variables.items():
        variable_uri = URIRef(variable["original"]["uri"])
        variable_label = Literal(variable["original"]["label"])
        variable_type = URIRef(variable["type"])

        codelist_uri = URIRef(variable["codelist"]["original"]["uri"])
        codelist_label = Literal(variable["codelist"]["original"]["label"])

        # The variable as component of the definition
        component_uri = safe_url(BASE, "component/" + variable["original"]["label"])

        # Add link between the definition and the component
        assertion_graph.add((structure_uri, QB["component"], component_uri))

        # Add label to variable
        # TODO: We may need to do something with a changed label for the variable
        assertion_graph.add((variable_uri, RDFS.label, variable_label))

        if "description" in variable and variable["description"] != "":
            assertion_graph.add((variable_uri, RDFS.comment, Literal(variable["description"])))

        # If the variable URI is not the same as the original,
        # it is a specialization of a prior variable property.
        if variable["uri"] != str(variable_uri):
            assertion_graph.add((variable_uri, RDFS["subPropertyOf"], URIRef(variable["uri"])))

        if variable_type == QB["DimensionProperty"]:
            assertion_graph.add((variable_uri, RDF.type, variable_type))
            assertion_graph.add((component_uri, QB["dimension"], variable_uri))

            # Coded variables are also of type coded property (a subproperty of dimension property)
            if variable["category"] == "coded":
                assertion_graph.add((variable_uri, RDF.type, QB["CodedProperty"]))

        elif variable_type == QB["MeasureProperty"]:
            # The category 'other'
            assertion_graph.add((variable_uri, RDF.type, variable_type))
            assertion_graph.add((component_uri, QB["measure"], variable_uri))
        elif variable_type == QB["AttributeProperty"]:
            # Actually never produced by QBer at this stage
            assertion_graph.add((variable_uri, RDF.type, variable_type))
            assertion_graph.add((component_uri, QB["attribute"], variable_uri))

        # If this variable is of category 'coded', we add codelist and URIs for
        # each variable (including mappings between value uris and etc....)
        if variable["category"] == "coded":
            assertion_graph.add((codelist_uri, RDF.type, SKOS["Collection"]))
            assertion_graph.add((codelist_uri, RDFS.label, Literal(codelist_label)))

            # The variable should point to the codelist
            assertion_graph.add((variable_uri, QB["codeList"], codelist_uri))

            # The variable is mapped onto an external code list.
            # If the codelist uri is not the same as the original one, we
            # have a derived codelist.
            if variable["codelist"]["uri"] != str(codelist_uri):
                assertion_graph.add((codelist_uri, PROV["wasDerivedFrom"], URIRef(variable["codelist"]["uri"])))

            # Generate a SKOS concept for each of the values and map it to the
            # assigned codelist
            for value in variable["values"]:
                value_uri = URIRef(value["original"]["uri"])
                value_label = Literal(value["original"]["label"])

                assertion_graph.add((value_uri, RDF.type, SKOS["Concept"]))
                assertion_graph.add((value_uri, SKOS["prefLabel"], Literal(value_label)))
                assertion_graph.add((codelist_uri, SKOS["member"], value_uri))

                # The value has been changed, and therefore there is a mapping
                if value["original"]["uri"] != value["uri"]:
                    assertion_graph.add((value_uri, SKOS["exactMatch"], URIRef(value["uri"])))
                    assertion_graph.add((value_uri, RDFS.label, Literal(value["label"])))

        elif variable["category"] == "identifier":
            # Generate a SKOS concept for each of the values
            for value in variable["values"]:
                value_uri = URIRef(value["original"]["uri"])
                value_label = Literal(value["original"]["label"])

                assertion_graph.add((value_uri, RDF.type, SKOS["Concept"]))
                assertion_graph.add((value_uri, SKOS["prefLabel"], value_label))

                # The value has been changed, and therefore there is a mapping
                if value["original"]["uri"] != value["uri"]:
                    assertion_graph.add((value_uri, SKOS["exactMatch"], URIRef(value["uri"])))
                    assertion_graph.add((value_uri, RDFS.label, Literal(value["label"])))

        elif variable["category"] == "other":
            # Generate a literal for each of the values when converting the dataset (but not here)
            pass

    return rdf_dataset
        #             line[str] = unicode(line[str], errors='replace')
        #         #print line
    with open(filename,'r') as csvfile:
        csv_contents = [{k: v for k, v in row.items()}
            for row in csv.DictReader(csvfile, skipinitialspace=True, quotechar='"', delimiter=',')]
    return csv_contents
#//*************** csv parser ****************//#

graph_uri_base = resource + 'movement_of_people/'


path = 'source_datasets/'
filename = 'Movement_of_people_across_borders_dataset.csv'

dataset = Dataset()
dataset.bind('trumpres', RESOURCE)
dataset.bind('trumpvoc', VOCAB)
dataset.bind('geo', GEO)
dataset.bind('dbo', DBO)
dataset.bind('dbr', DBR)

dataset.default_context.parse(VOCAB_FILE, format='turtle')

dataset, movement_graph = convert_csv(path + filename,dataset,URIRef(graph_uri_base + 'movement_graph'))
serialize_upload(OUTPUT_DIR + 'movement_of_people.trig',dataset)



### Generate VoID metadata
from rdflib.void import generateVoID
from rdflib.namespace import VOID
Esempio n. 14
0
class LongTermMemory(object):

    ONE_TO_ONE_PREDICATES = [
        'age', 'born_in', 'faceID', 'favorite', 'favorite_of', 'id', 'is_from',
        'manufactured_in', 'mother_is', 'name'
    ]

    def __init__(self, address=config.BRAIN_URL_LOCAL):
        """
        Interact with Triple store

        Parameters
        ----------
        address: str
            IP address and port of the Triple store
        """

        self.address = address
        self.namespaces = {}
        self.ontology_paths = {}
        self.format = 'trig'
        self.dataset = Dataset()
        self.query_prefixes = """
                    prefix gaf: <http://groundedannotationframework.org/gaf#> 
                    prefix grasp: <http://groundedannotationframework.org/grasp#> 
                    prefix leolaniInputs: <http://cltl.nl/leolani/inputs/>
                    prefix leolaniFriends: <http://cltl.nl/leolani/friends/> 
                    prefix leolaniTalk: <http://cltl.nl/leolani/talk/> 
                    prefix leolaniTime: <http://cltl.nl/leolani/time/> 
                    prefix leolaniWorld: <http://cltl.nl/leolani/world/> 
                    prefix n2mu: <http://cltl.nl/leolani/n2mu/> 
                    prefix ns1: <urn:x-rdflib:> 
                    prefix owl: <http://www.w3.org/2002/07/owl#> 
                    prefix prov: <http://www.w3.org/ns/prov#> 
                    prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> 
                    prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> 
                    prefix sem: <http://semanticweb.cs.vu.nl/2009/11/sem/> 
                    prefix skos: <http://www.w3.org/2004/02/skos/core#> 
                    prefix time: <http://www.w3.org/TR/owl-time/#> 
                    prefix xml: <http://www.w3.org/XML/1998/namespace> 
                    prefix xml1: <https://www.w3.org/TR/xmlschema-2/#> 
                    prefix xsd: <http://www.w3.org/2001/XMLSchema#>
                    """

        self._define_namespaces()
        self._get_ontology_path()
        self._bind_namespaces()

        self.my_uri = None

        self._log = logger.getChild(self.__class__.__name__)
        self._log.debug("Booted")

    #################################### Main functions to interact with the brain ####################################

    def update(self, capsule):
        """
        Main function to interact with if a statement is coming into the brain. Takes in a structured parsed statement,
        transforms them to triples, and posts them to the triple store
        :param statement: Structured data of a parsed statement
        :return: json response containing the status for posting the triples, and the original statement
        """
        # Case fold
        capsule = casefold_capsule(capsule)

        # Create graphs and triples
        self._model_graphs_(capsule)

        data = self._serialize(config.BRAIN_LOG)

        code = self._upload_to_brain(data)

        # Create JSON output
        capsule["date"] = str(capsule["date"])
        output = {'response': code, 'statement': capsule}

        return output

    def experience(self, capsule):
        """
        Main function to interact with if a statement is coming into the brain. Takes in a structured parsed statement,
        transforms them to triples, and posts them to the triple store
        :param capsule: Structured data of a parsed statement
        :return: json response containing the status for posting the triples, and the original statement
        """
        # Case fold
        capsule = casefold_capsule(capsule)

        # Create graphs and triples
        self._model_graphs_(capsule, type='Experience')

        data = self._serialize(config.BRAIN_LOG)

        code = self._upload_to_brain(data)

        # Create JSON output
        capsule["date"] = str(capsule["date"])
        output = {'response': code, 'statement': capsule}

        return output

    def query_brain(self, capsule):
        """
        Main function to interact with if a question is coming into the brain. Takes in a structured parsed question,
        transforms it into a query, and queries the triple store for a response
        :param capsule: Structured data of a parsed question
        :return: json response containing the results of the query, and the original question
        """
        # Case fold
        capsule = casefold_capsule(capsule)

        # Generate query
        query = self._create_query(capsule)

        # Perform query
        response = self._submit_query(query)

        # Create JSON output
        if 'date' in capsule.keys():
            capsule["date"] = str(capsule["date"])
        output = {'response': response, 'question': capsule}

        return output

    def process_visual(self, item, exact_only=True):
        """
        Main function to determine if this item can be recognized by the brain, learned, or none
        :param item:
        :return:
        """

        if casefold(item) in self.get_classes():
            # If this is in the ontology already, create sensor triples directly
            text = 'I know about %s. I will remember this object' % item
            return item, text

        temp = self.get_labels_and_classes()
        if casefold(item) in temp.keys():
            # If this is in the ontology already, create sensor triples directly
            text = 'I know about %s. It is of type %s. I will remember this object' % (
                item, temp[item])
            return item, text

        # Query the web for information
        class_type, description = self.exact_match_dbpedia(item)
        if class_type is not None:
            # Had to learn it, but I can create triples now
            text = 'I did not know what %s is, but I searched on the web and I found that it is a %s. ' \
                   'I will remember this object' % (item, class_type)
            return casefold(class_type), text

        if not exact_only:
            # Second go at dbpedia, relaxed approach
            class_type, description = self.keyword_match_dbpedia(item)
            if class_type is not None:
                # Had to really search for it to learn it, but I can create triples now
                text = 'I did not know what %s is, but I searched for fuzzy matches on the web and I found that it ' \
                       'is a %s. I will remember this object' % (item, class_type)
                return casefold(class_type), text

        # Failure, nothing found
        text = 'I am sorry, I could not learn anything on %s so I will not remember it' % item
        return None, text

    ########## management system for keeping track of chats and turns ##########
    def get_last_chat_id(self):
        """
        Get the id for the last interaction recorded
        :return: id
        """
        query = read_query('last_chat_id')
        response = self._submit_query(query)

        return int(response[0]['chatid']['value']) if response else 0

    def get_last_turn_id(self, chat_id):
        """
        Get the id for the last turn in the given chat
        :param chat_id: id for chat of interest
        :return:  id
        """
        query = read_query('last_turn_id') % (chat_id)
        response = self._submit_query(query)

        last_turn = 0
        for turn in response:
            turn_uri = turn['s']['value']
            turn_id = turn_uri.split('/')[-1][10:]
            turn_id = int(turn_id)

            if turn_id > last_turn:
                last_turn = turn_id

        return last_turn

    ########## brain structure exploration ##########
    def get_predicates(self):
        """
        Get predicates in social ontology
        :return:
        """
        query = read_query('predicates')
        response = self._submit_query(query)

        return [elem['p']['value'].split('/')[-1] for elem in response]

    def get_classes(self):
        """
        Get classes in social ontology
        :return:
        """
        query = read_query('classes')
        response = self._submit_query(query)

        return [elem['o']['value'].split('/')[-1] for elem in response]

    def get_labels_and_classes(self):
        """
        Get classes in social ontology
        :return:
        """
        query = read_query('labels_and_classes')
        response = self._submit_query(query)

        temp = dict()
        for r in response:
            temp[r['l']['value']] = r['o']['value'].split('/')[-1]

        return temp

    ########## learned facts exploration ##########
    def count_statements(self):
        """
        Count statements or 'facts' in the brain
        :return:
        """
        query = read_query('count_statements')
        response = self._submit_query(query)
        return response[0]['count']['value']

    def count_friends(self):
        """
        Count number of people I have talked to
        :return:
        """
        query = read_query('count_friends')
        response = self._submit_query(query)
        return response[0]['count']['value']

    def get_my_friends(self):
        """
        Get names of people I have talked to
        :return:
        """
        query = read_query('my_friends')
        response = self._submit_query(query)
        return [elem['name']['value'].split('/')[-1] for elem in response]

    def get_best_friends(self):
        """
        Get names of the 5 people I have talked to the most
        :return:
        """
        query = read_query('best_friends')
        response = self._submit_query(query)
        return [elem['name']['value'] for elem in response]

    def get_instance_of_type(self, instance_type):
        """
        Get isntances of a certain class type
        :param instance_type: name of class in ontology
        :return:
        """
        query = read_query('instance_of_type') % (instance_type)
        response = self._submit_query(query)
        return [elem['name']['value'] for elem in response]

    def when_last_chat_with(self, actor_label):
        """
        Get time value for the last time I chatted with this person
        :param actor_label: name of person
        :return:
        """
        query = read_query('when_last_chat_with') % (actor_label)
        response = self._submit_query(query)
        return response[0]['time']['value'].split('/')[-1]

    def get_triples_with_predicate(self, predicate):
        """
        Get triples that contain this predicate
        :param predicate:
        :return:
        """
        query = read_query('triples_with_predicate') % predicate
        response = self._submit_query(query)
        return [(elem['sname']['value'], elem['oname']['value'])
                for elem in response]

    ########## conflicts ##########
    def get_all_conflicts(self):
        """
        Aggregate all conflicts in brain
        :return:
        """
        conflicts = []
        for predicate in self.ONE_TO_ONE_PREDICATES:
            conflicts.extend(self._get_conflicts_with_predicate(predicate))

        return conflicts

    ########## semantic web ##########
    def exact_match_dbpedia(self, item):
        """
        Query dbpedia for information on this item to get it's semantic type and description.
        :param item:
        :return:
        """

        # Gather combinations
        combinations = [item, item.lower(), item.capitalize(), item.title()]

        for comb in combinations:
            # Try exact matching query
            query = read_query('dbpedia_type_and_description') % (comb)
            response = self._submit_query(query)

            # break if we have a hit
            if response:
                break

        class_type = response[0]['label_type']['value'] if response else None
        description = response[0]['description']['value'].split(
            '.')[0] if response else None

        return class_type, description

    def keyword_match_dbpedia(self, item):
        # Query API
        r = requests.get(
            'http://lookup.dbpedia.org/api/search.asmx/KeywordSearch',
            params={
                'QueryString': item,
                'MaxHits': '10'
            },
            headers={
                'Accept': 'application/json'
            }).json()['results']

        # Fuzzy match
        choices = [e['label'] for e in r]
        best_match = process.extractOne("item", choices)

        # Get best match object
        r = [{
            'label': e['label'],
            'classes': e['classes'],
            'description': e['description']
        } for e in r if e['label'] == best_match[0]]

        if r:
            r = r[0]

            if r['classes']:
                # process dbpedia classes only
                r['classes'] = [
                    c['label'] for c in r['classes'] if 'dbpedia' in c['uri']
                ]

        else:
            r = {'label': None, 'classes': None, 'description': None}

        return r['classes'][0] if r['classes'] else None, r[
            'description'].split('.')[0] if r['description'] else None

    ######################################## Helpers for setting up connection ########################################

    def _define_namespaces(self):
        """
        Define namespaces for different layers (ontology/vocab and resource). Assign them to self
        :return:
        """
        # Namespaces for the instance layer
        instance_vocab = 'http://cltl.nl/leolani/n2mu/'
        self.namespaces['N2MU'] = Namespace(instance_vocab)
        instance_resource = 'http://cltl.nl/leolani/world/'
        self.namespaces['LW'] = Namespace(instance_resource)

        # Namespaces for the mention layer
        mention_vocab = 'http://groundedannotationframework.org/gaf#'
        self.namespaces['GAF'] = Namespace(mention_vocab)
        mention_resource = 'http://cltl.nl/leolani/talk/'
        self.namespaces['LTa'] = Namespace(mention_resource)

        # Namespaces for the attribution layer
        attribution_vocab = 'http://groundedannotationframework.org/grasp#'
        self.namespaces['GRASP'] = Namespace(attribution_vocab)
        attribution_resource_friends = 'http://cltl.nl/leolani/friends/'
        self.namespaces['LF'] = Namespace(attribution_resource_friends)
        attribution_resource_inputs = 'http://cltl.nl/leolani/inputs/'
        self.namespaces['LI'] = Namespace(attribution_resource_inputs)

        # Namespaces for the temporal layer-ish
        time_vocab = 'http://www.w3.org/TR/owl-time/#'
        self.namespaces['TIME'] = Namespace(time_vocab)
        time_resource = 'http://cltl.nl/leolani/time/'
        self.namespaces['LTi'] = Namespace(time_resource)

        # The namespaces of external ontologies
        skos = 'http://www.w3.org/2004/02/skos/core#'
        self.namespaces['SKOS'] = Namespace(skos)

        prov = 'http://www.w3.org/ns/prov#'
        self.namespaces['PROV'] = Namespace(prov)

        sem = 'http://semanticweb.cs.vu.nl/2009/11/sem/'
        self.namespaces['SEM'] = Namespace(sem)

        xml = 'https://www.w3.org/TR/xmlschema-2/#'
        self.namespaces['XML'] = Namespace(xml)

    def _get_ontology_path(self):
        """
        Define ontology paths to key vocabularies
        :return:
        """
        self.ontology_paths[
            'n2mu'] = './../../knowledge_representation/ontologies/leolani.ttl'
        self.ontology_paths[
            'gaf'] = './../../knowledge_representation/ontologies/gaf.rdf'
        self.ontology_paths[
            'grasp'] = './../../knowledge_representation/ontologies/grasp.rdf'
        self.ontology_paths[
            'sem'] = './../../knowledge_representation/ontologies/sem.rdf'

    def _bind_namespaces(self):
        """
        Bnd namespaces
        :return:
        """
        self.dataset.bind('n2mu', self.namespaces['N2MU'])
        self.dataset.bind('leolaniWorld', self.namespaces['LW'])
        self.dataset.bind('gaf', self.namespaces['GAF'])
        self.dataset.bind('leolaniTalk', self.namespaces['LTa'])
        self.dataset.bind('grasp', self.namespaces['GRASP'])
        self.dataset.bind('leolaniFriends', self.namespaces['LF'])
        self.dataset.bind('leolaniInputs', self.namespaces['LI'])
        self.dataset.bind('time', self.namespaces['TIME'])
        self.dataset.bind('leolaniTime', self.namespaces['LTi'])
        self.dataset.bind('skos', self.namespaces['SKOS'])
        self.dataset.bind('prov', self.namespaces['PROV'])
        self.dataset.bind('sem', self.namespaces['SEM'])
        self.dataset.bind('xml', self.namespaces['XML'])
        self.dataset.bind('owl', OWL)

    ######################################## Helpers for statement processing ########################################

    def create_chat_id(self, actor, date):
        """
        Determine chat id depending on my last conversation with this person
        :param actor:
        :param date:
        :return:
        """
        self._log.debug('Chat with {} on {}'.format(actor, date))

        query = read_query('last_chat_with') % (actor)
        response = self._submit_query(query)

        if response and int(response[0]['day']['value']) == int(date.day) \
                and int(response[0]['month']['value']) == int(date.month) \
                and int(response[0]['year']['value']) == int(date.year):
            # Chatted with this person today so same chat id
            chat_id = int(response[0]['chatid']['value'])
        else:
            # Either have never chatted with this person, or I have but not today. Add one to latest chat
            chat_id = self.get_last_chat_id() + 1

        return chat_id

    def create_turn_id(self, chat_id):
        self._log.debug('Turn in chat {}'.format(chat_id))

        query = read_query('last_turn_in_chat') % (chat_id)
        response = self._submit_query(query)
        return int(response['turnid']['value']) + 1 if response else 1

    def _generate_leolani(self, instance_graph):
        # Create Leolani
        leolani_id = 'leolani'
        leolani_label = 'leolani'

        leolani = URIRef(to_iri(self.namespaces['LW'] + leolani_id))
        leolani_label = Literal(leolani_label)
        leolani_type1 = URIRef(to_iri(self.namespaces['N2MU'] + 'robot'))
        leolani_type2 = URIRef(to_iri(self.namespaces['GRASP'] + 'Instance'))

        instance_graph.add((leolani, RDFS.label, leolani_label))
        instance_graph.add((leolani, RDF.type, leolani_type1))
        instance_graph.add((leolani, RDF.type, leolani_type2))

        self.my_uri = leolani

        return leolani

    def _generate_subject(self, capsule, instance_graph):
        if capsule['subject']['type'] == '':  # We only get the label
            subject_vocab = OWL
            subject_type = 'Thing'
        else:
            subject_vocab = self.namespaces['N2MU']
            subject_type = capsule['subject']['type']

        subject_id = capsule['subject']['label']

        subject = URIRef(to_iri(self.namespaces['LW'] + subject_id))
        subject_label = Literal(subject_id)
        subject_type1 = URIRef(to_iri(subject_vocab + subject_type))
        subject_type2 = URIRef(to_iri(self.namespaces['GRASP'] + 'Instance'))

        instance_graph.add((subject, RDFS.label, subject_label))
        instance_graph.add((subject, RDF.type, subject_type1))
        instance_graph.add((subject, RDF.type, subject_type2))

        return subject, subject_label

    def _create_leolani_world(self, capsule, type='Statement'):
        # Instance graph
        instance_graph_uri = URIRef(to_iri(self.namespaces['LW'] +
                                           'Instances'))
        instance_graph = self.dataset.graph(instance_graph_uri)

        # Subject
        if type == 'Statement':
            subject, subject_label = self._generate_subject(
                capsule, instance_graph)
        elif type == 'Experience':
            subject = self._generate_leolani(
                instance_graph) if self.my_uri is None else self.my_uri
            subject_label = 'leolani'

        # Object
        if capsule['object']['type'] == '':  # We only get the label
            object_vocab = OWL
            object_type = 'Thing'
        else:
            object_vocab = self.namespaces['N2MU']
            object_type = capsule['object']['type']

        object_id = capsule['object']['label']

        object = URIRef(to_iri(self.namespaces['LW'] + object_id))
        object_label = Literal(object_id)
        object_type1 = URIRef(to_iri(object_vocab + object_type))
        object_type2 = URIRef(to_iri(self.namespaces['GRASP'] + 'Instance'))

        instance_graph.add((object, RDFS.label, object_label))
        instance_graph.add((object, RDF.type, object_type1))
        instance_graph.add((object, RDF.type, object_type2))

        if type == 'Statement':
            claim_graph, statement = self._create_claim_graph(
                subject,
                subject_label,
                object,
                object_label,
                capsule['predicate']['type'],
                type='Statement')
        elif type == 'Experience':
            claim_graph, statement = self._create_claim_graph(
                subject,
                subject_label,
                object,
                object_label,
                'sees',
                type='Experience')

        return instance_graph, claim_graph, subject, object, statement

    def _create_claim_graph(self,
                            subject,
                            subject_label,
                            object,
                            object_label,
                            predicate,
                            type='Statement'):
        # Claim graph
        claim_graph_uri = URIRef(to_iri(self.namespaces['LW'] + 'Claims'))
        claim_graph = self.dataset.graph(claim_graph_uri)

        # Statement
        statement_id = hash_statement_id(
            [subject_label, predicate, object_label])

        statement = URIRef(to_iri(self.namespaces['LW'] + statement_id))
        statement_type1 = URIRef(to_iri(self.namespaces['GRASP'] + type))
        statement_type2 = URIRef(to_iri(self.namespaces['GRASP'] + 'Instance'))
        statement_type3 = URIRef(to_iri(self.namespaces['SEM'] + 'Event'))

        # Create graph and add triple
        graph = self.dataset.graph(statement)
        graph.add((subject, self.namespaces['N2MU'][predicate], object))

        claim_graph.add((statement, RDF.type, statement_type1))
        claim_graph.add((statement, RDF.type, statement_type2))
        claim_graph.add((statement, RDF.type, statement_type3))

        return claim_graph, statement

    def _create_leolani_talk(self, capsule, leolani, type='Statement'):
        # Interaction graph
        if type == 'Statement':
            graph_to_write = 'Interactions'
        elif type == 'Experience':
            graph_to_write = 'Sensors'

        interaction_graph_uri = URIRef(
            to_iri(self.namespaces['LTa'] + graph_to_write))
        interaction_graph = self.dataset.graph(interaction_graph_uri)

        # Time
        date = capsule["date"]
        time = URIRef(
            to_iri(self.namespaces['LTi'] + str(capsule["date"].isoformat())))
        time_type = URIRef(
            to_iri(self.namespaces['TIME'] + 'DateTimeDescription'))
        day = Literal(date.day, datatype=self.namespaces['XML']['gDay'])
        month = Literal(date.month,
                        datatype=self.namespaces['XML']['gMonthDay'])
        year = Literal(date.year, datatype=self.namespaces['XML']['gYear'])
        time_unitType = URIRef(to_iri(self.namespaces['TIME'] + 'unitDay'))

        interaction_graph.add((time, RDF.type, time_type))
        interaction_graph.add((time, self.namespaces['TIME']['day'], day))
        interaction_graph.add((time, self.namespaces['TIME']['month'], month))
        interaction_graph.add((time, self.namespaces['TIME']['year'], year))
        interaction_graph.add(
            (time, self.namespaces['TIME']['unitType'], time_unitType))

        # Actor
        actor_id = capsule['author']
        actor_label = capsule['author']

        actor = URIRef(to_iri(to_iri(self.namespaces['LF'] + actor_id)))
        actor_label = Literal(actor_label)
        actor_type1 = URIRef(to_iri(self.namespaces['SEM'] + 'Actor'))
        actor_type2 = URIRef(to_iri(self.namespaces['GRASP'] + 'Instance'))

        if type == 'Statement':
            actor_type3 = URIRef(to_iri(self.namespaces['N2MU'] + 'person'))
        elif type == 'Experience':
            actor_type3 = URIRef(to_iri(self.namespaces['N2MU'] + 'sensor'))

        interaction_graph.add((actor, RDFS.label, actor_label))
        interaction_graph.add((actor, RDF.type, actor_type1))
        interaction_graph.add((actor, RDF.type, actor_type2))
        interaction_graph.add((actor, RDF.type, actor_type3))

        # Add leolani knows/senses actor
        if type == 'Statement':
            predicate = 'knows'
        elif type == 'Experience':
            predicate = 'senses'

        interaction_graph.add(
            (leolani, self.namespaces['N2MU'][predicate], actor))
        _, _ = self._create_claim_graph(leolani, 'leolani', actor, actor_label,
                                        predicate, type)

        # Event and subevent
        event_id = self.create_chat_id(actor_label, date)
        if type == 'Statement':
            event_label = 'chat%s' % event_id
        elif type == 'Experience':
            event_label = 'visual%s' % event_id

        subevent_id = self.create_turn_id(event_id)
        if type == 'Statement':
            subevent_label = event_label + '_turn%s' % subevent_id
        elif type == 'Experience':
            subevent_label = event_label + '_object%s' % subevent_id

        turn = URIRef(to_iri(self.namespaces['LTa'] + subevent_label))
        turn_type1 = URIRef(to_iri(self.namespaces['SEM'] + 'Event'))
        if type == 'Statement':
            turn_type2 = URIRef(to_iri(self.namespaces['GRASP'] + 'Turn'))
        elif type == 'Experience':
            turn_type2 = URIRef(to_iri(self.namespaces['GRASP'] + 'Object'))

        interaction_graph.add((turn, RDF.type, turn_type1))
        interaction_graph.add((turn, RDF.type, turn_type2))
        interaction_graph.add(
            (turn, self.namespaces['N2MU']['id'], Literal(subevent_id)))
        interaction_graph.add(
            (turn, self.namespaces['SEM']['hasActor'], actor))
        interaction_graph.add((turn, self.namespaces['SEM']['hasTime'], time))

        chat = URIRef(to_iri(self.namespaces['LTa'] + event_label))
        chat_type1 = URIRef(to_iri(self.namespaces['SEM'] + 'Event'))
        if type == 'Statement':
            chat_type2 = URIRef(to_iri(self.namespaces['GRASP'] + 'Chat'))
        elif type == 'Experience':
            chat_type2 = URIRef(to_iri(self.namespaces['GRASP'] + 'Visual'))

        interaction_graph.add((chat, RDF.type, chat_type1))
        interaction_graph.add((chat, RDF.type, chat_type2))
        interaction_graph.add(
            (chat, self.namespaces['N2MU']['id'], Literal(event_id)))
        interaction_graph.add(
            (chat, self.namespaces['SEM']['hasActor'], actor))
        interaction_graph.add((chat, self.namespaces['SEM']['hasTime'], time))
        interaction_graph.add(
            (chat, self.namespaces['SEM']['hasSubevent'], turn))

        perspective_graph, mention, attribution = self._create_perspective_graph(
            capsule, subevent_label)

        # Link interactions and perspectives
        perspective_graph.add(
            (mention, self.namespaces['GRASP']['wasAttributedTo'], actor))
        perspective_graph.add(
            (mention, self.namespaces['GRASP']['hasAttribution'], attribution))
        perspective_graph.add(
            (mention, self.namespaces['PROV']['wasDerivedFrom'], chat))
        perspective_graph.add(
            (mention, self.namespaces['PROV']['wasDerivedFrom'], turn))

        return interaction_graph, perspective_graph, actor, time, mention, attribution

    def _create_perspective_graph(self, capsule, turn_label, type='Statement'):
        # Perspective graph
        perspective_graph_uri = URIRef(
            to_iri(self.namespaces['LTa'] + 'Perspectives'))
        perspective_graph = self.dataset.graph(perspective_graph_uri)

        # Mention
        if type == 'Statement':
            mention_id = turn_label + '_char%s' % capsule['position']
        elif type == 'Experience':
            mention_id = turn_label + '_pixel%s' % capsule['position']
        mention = URIRef(to_iri(self.namespaces['LTa'] + mention_id))
        mention_type = URIRef(to_iri(self.namespaces['GRASP'] + 'Mention'))

        perspective_graph.add((mention, RDF.type, mention_type))

        # Attribution
        attribution_id = mention_id + '_CERTAIN'
        attribution = URIRef(to_iri(self.namespaces['LTa'] + attribution_id))
        attribution_type = URIRef(
            to_iri(self.namespaces['GRASP'] + 'Attribution'))
        attribution_value = URIRef(to_iri(self.namespaces['GRASP'] +
                                          'CERTAIN'))

        perspective_graph.add((attribution, RDF.type, attribution_type))
        perspective_graph.add((attribution, RDF.value, attribution_value))

        return perspective_graph, mention, attribution

    def _serialize(self, file_path):
        """
        Save graph to local file and return the serialized string
        :param file_path: path to where data will be saved
        :return: serialized data as string
        """
        # Save to file but return the python representation
        with open(file_path + '.' + self.format, 'w') as f:
            self.dataset.serialize(f, format=self.format)
        return self.dataset.serialize(format=self.format)

    def _upload_to_brain(self, data):
        """
        Post data to the brain
        :param data: serialized data as string
        :return: response status
        """
        self._log.debug("Posting triples")

        # From serialized string
        post_url = self.address + "/statements"
        response = requests.post(
            post_url,
            data=data,
            headers={'Content-Type': 'application/x-' + self.format})

        return str(response.status_code)

    def _model_graphs_(self, capsule, type='Statement'):
        # Leolani world (includes instance and claim graphs)
        instance_graph, claim_graph, subject, object, instance = self._create_leolani_world(
            capsule, type)

        # Identity
        leolani = self._generate_leolani(
            instance_graph) if self.my_uri is None else self.my_uri

        # Leolani talk (includes interaction and perspective graphs)
        interaction_graph, perspective_graph, actor, time, mention, attribution = self._create_leolani_talk(
            capsule, leolani, type)

        # Interconnections
        instance_graph.add(
            (subject, self.namespaces['GRASP']['denotedIn'], mention))
        instance_graph.add(
            (object, self.namespaces['GRASP']['denotedIn'], mention))

        instance_graph.add(
            (instance, self.namespaces['GRASP']['denotedBy'], mention))
        instance_graph.add(
            (instance, self.namespaces['SEM']['hasActor'], actor))
        instance_graph.add((instance, self.namespaces['SEM']['hasTime'], time))

        perspective_graph.add(
            (mention, self.namespaces['GRASP']['containsDenotation'], subject))
        perspective_graph.add(
            (mention, self.namespaces['GRASP']['containsDenotation'], object))
        perspective_graph.add(
            (mention, self.namespaces['GRASP']['denotes'], instance))

        perspective_graph.add(
            (attribution, self.namespaces['GRASP']['isAttributionFor'],
             mention))

    ######################################### Helpers for question processing #########################################

    def _create_query(self, parsed_question):
        _ = hash_statement_id([
            parsed_question['subject']['label'],
            parsed_question['predicate']['type'],
            parsed_question['object']['label']
        ])

        # Query subject
        if parsed_question['subject']['label'] == "":
            # Case fold
            # object_label = casefold_label(parsed_question['object']['label'])

            query = """
                SELECT ?slabel ?authorlabel
                        WHERE { 
                            ?s n2mu:%s ?o . 
                            ?s rdfs:label ?slabel . 
                            ?o rdfs:label '%s' .  
                            GRAPH ?g {
                                ?s n2mu:%s ?o . 
                            } . 
                            ?g grasp:denotedBy ?m . 
                            ?m grasp:wasAttributedTo ?author . 
                            ?author rdfs:label ?authorlabel .
                        }
                """ % (parsed_question['predicate']['type'],
                       parsed_question['object']['label'],
                       parsed_question['predicate']['type'])

        # Query object
        elif parsed_question['object']['label'] == "":
            query = """
                SELECT ?olabel ?authorlabel
                        WHERE { 
                            ?s n2mu:%s ?o .   
                            ?s rdfs:label '%s' .  
                            ?o rdfs:label ?olabel .  
                            GRAPH ?g {
                                ?s n2mu:%s ?o . 
                            } . 
                            ?g grasp:denotedBy ?m . 
                            ?m grasp:wasAttributedTo ?author . 
                            ?author rdfs:label ?authorlabel .
                        }
                """ % (parsed_question['predicate']['type'],
                       parsed_question['subject']['label'],
                       parsed_question['predicate']['type'])

        # Query existence
        else:
            query = """
                SELECT ?authorlabel ?v
                        WHERE { 
                            ?s n2mu:%s ?o .   
                            ?s rdfs:label '%s' .  
                            ?o rdfs:label '%s' .  
                            GRAPH ?g {
                                ?s n2mu:%s ?o . 
                            } . 
                            ?g grasp:denotedBy ?m . 
                            ?m grasp:wasAttributedTo ?author . 
                            ?author rdfs:label ?authorlabel .
                            ?m grasp:hasAttribution ?att .
                            ?att rdf:value ?v .
                        }
                """ % (parsed_question['predicate']['type'],
                       parsed_question['subject']['label'],
                       parsed_question['object']['label'],
                       parsed_question['predicate']['type'])

        query = self.query_prefixes + query

        return query

    def _submit_query(self, query):
        # Set up connection
        sparql = SPARQLWrapper(self.address)

        # Response parameters
        sparql.setQuery(query)
        sparql.setReturnFormat(JSON)
        sparql.addParameter('Accept', 'application/sparql-results+json')
        response = sparql.query().convert()

        return response["results"]["bindings"]

    ######################################### Helpers for conflict processing #########################################
    def _get_conflicts_with_predicate(self, one_to_one_predicate):
        query = """
            PREFIX n2mu: <http://cltl.nl/leolani/n2mu/>
            PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
            PREFIX grasp: <http://groundedannotationframework.org/grasp#>

            select ?sname 
                    (group_concat(?oname ; separator=";") as ?onames) 
                    (group_concat(?authorlabel ; separator=";") as ?authorlabels) 
            where { 
                GRAPH ?g {
                    ?s n2mu:%s ?o .
                    } .
                ?s rdfs:label ?sname .
                ?o rdfs:label ?oname .

                ?g grasp:denotedBy ?m . 
                ?m grasp:wasAttributedTo ?author . 
                ?author rdfs:label ?authorlabel .

            } group by ?sname having (count(distinct ?oname) > 1)
        """ % one_to_one_predicate

        response = self._submit_query(query)
        conflicts = []
        for item in response:
            conflict = {
                'subject': item['sname']['value'],
                'predicate': one_to_one_predicate,
                'objects': []
            }

            values = item['onames']['value'].split(';')
            authors = item['authorlabels']['value'].split(';')

            for val, auth in zip(values, authors):
                option = {'value': val, 'author': auth}
                conflict['objects'].append(option)

            conflicts.append(conflict)

        return conflicts
    with open(filename,'r') as csvfile:
        csv_contents = [{k: v for k, v in row.items()}
            for row in csv.DictReader(csvfile, skipinitialspace=True, quotechar='"', delimiter=',')]
    return csv_contents
#//*************** csv parser ****************//#

graph_uri_base = resource

path = 'source_datasets/'
filename_population = 'all_population_by_type.csv'
filename_unemployment = 'unemployment_eu.csv'
filename_inflow = 'inflow_dataset.csv'
filename_asylum = 'asylum_seekers.csv'

dataset = Dataset()
dataset.bind('mpr', RESOURCE)
dataset.bind('mpo', VOCAB)
dataset.bind('geo', GEO)
dataset.bind('geo_country_code', GCC)
dataset.bind('dbo', DBO)
dataset.bind('dbr', DBR)
dataset.bind('sdmx', SDMX)

dataset.default_context.parse(VOCAB_FILE, format='turtle')

dataset, unemployment_eu_graph = convert_unemployment_csv(filename_unemployment,dataset,URIRef(graph_uri_base + 'unemployment_eu_graph'))

dataset, population_eu_graph = convert_population_csv(filename_population,dataset,URIRef(graph_uri_base + 'population_eu_graph'))

dataset, inflow_graph = convert_inflow_csv(filename_inflow,dataset,URIRef(graph_uri_base + 'inflow_graph'))
Esempio n. 16
0
#!/usr/bin/python
from rdflib import Literal, BNode, Namespace, URIRef, Graph, Dataset, RDF, RDFS, XSD
import rdflib.resource
from provmodified import Entity
import provmodified as prov
import json
import subprocess, shlex
import collections

DOCKER = Namespace("http://www.example.org/ns/docker#")
PROV = Namespace("http://www.w3.org/ns/prov#")

ds = Dataset(default_union=True)
ds.bind("docker", DOCKER)
ds.bind("prov", PROV)
default_graph = ds


def bind_ns(prefix, namespace):
    ds.namespace_manager.bind(prefix, Namespace(namespace))


def parse_json_byfile(filename):
    with open(filename) as data_file:
        data = json.load(data_file)
    return data[0]


def inspect_json(cmd):
    # print cmd
    p = subprocess.Popen(shlex.split(cmd), stdout=subprocess.PIPE, stderr=subprocess.PIPE)
Esempio n. 17
0
from rdflib import Literal, BNode, Namespace, URIRef, Graph, Dataset, RDF, RDFS, XSD
import rdflib.resource

"""
@newfield iri: IRI
"""

PROV = Namespace("http://www.w3.org/ns/prov#")

ds = Dataset(default_union=True)
ds.bind("prov", PROV)
default_graph = ds
#print type(default_graph)


config = {
    "useInverseProperties": False
}


def set_use_inverse_properties(flag=False):
    config["useInverseProperties"] = flag


def using_inverse_properties():
    return config["useInverseProperties"]


def clear_graph(bundle=default_graph):
    bundle.remove((None, None, None))
Esempio n. 18
0
DC = Namespace("http://purl.org/dc/terms/")
UUIDNS = Namespace("urn:uuid:")
DOCKER = Namespace("http://w3id.org/daspos/docker#")
# W3C namespace:
POSIX = Namespace("http://www.w3.org/ns/posix/stat#")
ACL = Namespace("http://www.w3.org/ns/auth/acl#")

# DASPOS namespaces
SC = Namespace("https://w3id.org/daspos/smartcontainers#")
CA = Namespace("https://w3id.org/daspos/computationalactivity#")
CE = Namespace("https://w3id.org/daspos/computationalenvironment#")

# Need to handle DOI
# http://bitwacker.com/2010/02/04/dois-uris-and-cool-resolution/

ds.bind("prov", PROV)
ds.bind("ore", ORE)
ds.bind("owl", OWL)
ds.bind("dc", DC)
ds.bind("uuidns", UUIDNS)
ds.bind("docker", DOCKER)
ds.bind("posix", POSIX)
ds.bind("acl", ACL)
ds.bind("sc", SC)
ds.bind("ca", CA)
ds.bind("ce", CE)

default_graph = ds

#image_name = cmd_string.rsplit(' ', 1) [1]
#image_id = self.get_imageID(image_name)
Esempio n. 19
0
from rdflib import Dataset, URIRef, Literal, Namespace, RDF, RDFS, OWL, XSD

host = "http://localhost:5820/MATH"
# A namespace for our resources
data = host + '/'  # + '/resource/'
DATA = Namespace(data)
# A namespace for our vocabulary items (schema information, RDFS, OWL classes and properties etc.)
vocab = host  # + '/vocab/'
VOCAB = Namespace(host + '/vocab/')

# The URI for our graph
graph_uri = URIRef(host)  # + '/graph')

# We initialize a dataset, and bind our namespaces
dataset = Dataset()
dataset.bind('data', DATA)
dataset.bind('vocab', VOCAB)

# We then get a new graph object with our URI from the dataset.
graph = dataset.graph(graph_uri)

dataset.default_context.parse("../vocab.ttl", format="turtle")
# IRI baker is a library that reliably creates valid (parts of) IRIs from strings (spaces are turned into underscores, etc.).

# for row in same_set:
#     # graph.add((row[0], RDF.type, VOCAB['Formula']))
#     # graph.add((row[1], RDF.type, VOCAB['Formula']))
#     graph.add((URIRef(row[0]), OWL.sameas, URIRef(row[1])))
#
# with open('same_formula_db.trig', 'w') as f:
#     graph.serialize(f, format='trig')
Esempio n. 20
0
def test_scenarios() -> None:
    """
    Testing scenarios:
        1. no base set
        2. base set at graph creation
        3. base set at serialization
        4. base set at both graph creation & serialization, serialization overrides
        5. multiple serialization side effect checking
        6. checking results for RDF/XML
        7. checking results for N3
        8. checking results for TriX & TriG
    """

    # variables
    base_one = Namespace("http://one.org/")
    base_two = Namespace("http://two.org/")
    title = Literal("Title", lang="en")
    description = Literal("Test Description", lang="en")
    creator = URIRef("https://creator.com")
    cs = URIRef("")

    # starting graph
    g = Graph()
    g.add((cs, RDF.type, SKOS.ConceptScheme))
    g.add((cs, DCTERMS.creator, creator))
    g.add((cs, DCTERMS.source, URIRef("nick")))
    g.bind("dct", DCTERMS)
    g.bind("skos", SKOS)

    # 1. no base set for graph, no base set for serialization
    g1 = Graph()
    g1 += g
    # @base should not be in output
    assert "@base" not in g.serialize(format="turtle")

    # 2. base one set for graph, no base set for serialization
    g2 = Graph(base=base_one)
    g2 += g
    # @base should be in output, from Graph (one)
    assert "@base <http://one.org/> ." in g2.serialize(format="turtle")

    # 3. no base set for graph, base two set for serialization
    g3 = Graph()
    g3 += g
    # @base should be in output, from serialization (two)
    assert "@base <http://two.org/> ." in g3.serialize(format="turtle",
                                                       base=base_two)

    # 4. base one set for graph, base two set for serialization, Graph one overrides
    g4 = Graph(base=base_one)
    g4 += g
    # @base should be in output, from graph (one)
    assert "@base <http://two.org/> ." in g4.serialize(format="turtle",
                                                       base=base_two)
    # just checking that the serialization setting (two) hasn't snuck through
    assert "@base <http://one.org/> ." not in g4.serialize(format="turtle",
                                                           base=base_two)

    # 5. multiple serialization side effect checking
    g5 = Graph()
    g5 += g
    # @base should be in output, from serialization (two)
    assert "@base <http://two.org/> ." in g5.serialize(format="turtle",
                                                       base=base_two)

    # checking for side affects - no base now set for this serialization
    # @base should not be in output
    assert "@base" not in g5.serialize(format="turtle")

    # 6. checking results for RDF/XML
    g6 = Graph()
    g6 += g
    g6.bind("dct", DCTERMS)
    g6.bind("skos", SKOS)
    assert "@xml:base" not in g6.serialize(format="xml")
    assert 'xml:base="http://one.org/"' in g6.serialize(format="xml",
                                                        base=base_one)
    g6.base = base_two
    assert 'xml:base="http://two.org/"' in g6.serialize(format="xml")
    assert 'xml:base="http://one.org/"' in g6.serialize(format="xml",
                                                        base=base_one)

    # 7. checking results for N3
    g7 = Graph()
    g7 += g
    g7.bind("dct", DCTERMS)
    g7.bind("skos", SKOS)
    assert "@xml:base" not in g7.serialize(format="xml")
    assert "@base <http://one.org/> ." in g7.serialize(format="n3",
                                                       base=base_one)
    g7.base = base_two
    assert "@base <http://two.org/> ." in g7.serialize(format="n3")
    assert "@base <http://one.org/> ." in g7.serialize(format="n3",
                                                       base=base_one)

    # 8. checking results for TriX & TriG
    # TriX can specify a base per graph but setting a base for the whole
    base_three = Namespace("http://three.org/")
    ds1 = Dataset()
    ds1.bind("dct", DCTERMS)
    ds1.bind("skos", SKOS)
    g8 = ds1.graph(URIRef("http://g8.com/"), base=base_one)
    g9 = ds1.graph(URIRef("http://g9.com/"))
    g8 += g
    g9 += g
    g9.base = base_two
    ds1.base = base_three

    trix = ds1.serialize(format="trix", base=Namespace("http://two.org/"))
    assert '<graph xml:base="http://one.org/">' in trix
    assert '<graph xml:base="http://two.org/">' in trix
    assert '<TriX xml:base="http://two.org/"' in trix

    trig = ds1.serialize(format="trig", base=Namespace("http://two.org/"))
    assert "@base <http://one.org/> ." not in trig
    assert "@base <http://three.org/> ." not in trig
    assert "@base <http://two.org/> ." in trig
    def make_RDF(self,contents):

        host  = "http://localhost:5820/MATH"
        # A namespace for our resources
        data = host +'/'# + '/resource/'
        DATA = Namespace(data)
        # A namespace for our vocabulary items (schema information, RDFS, OWL classes and properties etc.)
        vocab = host #+ '/vocab/'
        VOCAB = Namespace(host + '/vocab/')

        # The URI for our graph
        graph_uri = URIRef(host)#+ '/graph')

        # We initialize a dataset, and bind our namespaces
        dataset = Dataset()
        dataset.bind('data', DATA)
        dataset.bind('vocab', VOCAB)

        # We then get a new graph object with our URI from the dataset.
        graph = dataset.graph(graph_uri)

        dataset.default_context.parse("db/vocab.ttl",format="turtle")
        # IRI baker is a library that reliably creates valid (parts of) IRIs from strings (spaces are turned into underscores, etc.).


        for row in contents:


            id = URIRef((data + str(row['id']))) # primary key for the object

            id_ = URIRef((data + str(row['id_'])))
            # graph.add((id, VOCAB['previous_id'] ,id_))
            # graph.add((id, RDF.type, OWL.NamedIndividual))


            if ('Formula' in row):

                graph.add((id,RDF.type,VOCAB['Formula']))
                formula_xml = Literal(row['Formula'], datatype=XSD['string'])
                graph.add((id, VOCAB['xml'], formula_xml))
                Description = Literal(row['description'], datatype=XSD['string'])
                graph.add((id,VOCAB['Description'],Description))
                label = Literal(row['label'], datatype=XSD['string'])
                graph.add((id,VOCAB['label'],label))
                if row['id_'] !=0:
                    parent_id = URIRef((data + str(row['id_'])))
                    graph.add((id, VOCAB['subFormulaOf'], parent_id))


            if ('Symbol' in row):
                graph.add((id, RDF.type, VOCAB['Symbol']))
                # print(row['Symbol'])
                symbol = Literal(row['Symbol'], datatype=XSD['string'])
                # print(symbol)
                graph.add((id, VOCAB['label'], symbol))
                parent_id = URIRef((data + str(row['parent_id'])))
                graph.add((id, VOCAB['partOf'], parent_id))


            if ('Operator' in row):
                graph.add((id, RDF.type, VOCAB['Operator']))
                operator = Literal(row['Operator'], datatype=XSD['string'])
                graph.add((id, VOCAB['label'], operator))
                parent_id = URIRef((data + str(row['parent_id'])))
                graph.add((id, VOCAB['partOf'], parent_id))


            if ('Function_add' in row):
                # print(row)
                id = URIRef((data + str(row['Function_add'])))
                graph.add((id, RDF.type, VOCAB['Operator']))
                left = URIRef((data + str(row['left'])))
                graph.add((id, VOCAB['left'], left))
                right = URIRef((data + str(row['right'])))
                graph.add((id, VOCAB['right'], right))

            if ('Function_subtract' in row):
                print(row)
                id = URIRef((data + str(row['Function_subtract'])))
                graph.add((id, RDF.type, VOCAB['Operator']))
                left = URIRef((data + str(row['left'])))
                graph.add((id, VOCAB['left'], left))
                right = URIRef((data + str(row['right'])))
                graph.add((id, VOCAB['right'], right))

        with open('db/math_db.trig','w') as f:
            graph.serialize(f, format='trig')
Esempio n. 22
0
    # Info on the item
    g.add((item, RDF.type, saa.Item))
    g.add((item, saa.term('index'), Literal(record['assigned_item_no'])))

    if record['persistent_uid'] != "":
        g.add((item, saa.identifier, Literal(record['persistent_uid'])))

    g.add((item, RDFS.label, Literal(record['title'], lang='nl')))
    g.add((item, saa.artist, Literal(record['artist_name_1'])))
    g.add((item, saa.transcription, Literal(record['entry'], lang='nl')))
    g.add((item, saa.workType, Literal(record['object_type_1'], lang='nl')))

    if record['room'] != "":
        g.add((item, saa.room, Literal(record['room'], lang='nl')))

    if record['valuation_amount'] != "":
        g.add((item, saa.valuation, Literal(record['valuation_amount'])))

    return g


if __name__ == "__main__":

    ds = Dataset()
    ds.bind('ga', ga)
    ds.bind('saa', saa)

    ds = main(dataset=ds)
    ds.serialize('Dutch_Archival_Descriptions_Getty.trig', format='trig')
Esempio n. 23
0
class RdfBuilder(object):
    ONTOLOGY_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), '../../../ontologies'))

    def __init__(self):
        # type: () -> RdfBuilder

        self.ontology_paths = {}
        self.namespaces = {}
        self.dataset = Dataset()

        self._log = logger.getChild(self.__class__.__name__)
        self._log.debug("Booted")

        self._define_namespaces()
        self._bind_namespaces()
        self.define_named_graphs()
        self.load_ontology_integration()

    ########## setting up connection ##########
    def _define_namespaces(self):
        """
        Define namespaces for different layers (ontology/vocab and resource). Assign them to self
        :return:
        """
        # Namespaces for the instance layer
        instance_vocab = 'http://cltl.nl/leolani/n2mu/'
        self.namespaces['N2MU'] = Namespace(instance_vocab)
        instance_resource = 'http://cltl.nl/leolani/world/'
        self.namespaces['LW'] = Namespace(instance_resource)

        # Namespaces for the mention layer
        mention_vocab = 'http://groundedannotationframework.org/gaf#'
        self.namespaces['GAF'] = Namespace(mention_vocab)
        mention_resource = 'http://cltl.nl/leolani/talk/'
        self.namespaces['LTa'] = Namespace(mention_resource)

        # Namespaces for the attribution layer
        attribution_vocab = 'http://groundedannotationframework.org/grasp#'
        self.namespaces['GRASP'] = Namespace(attribution_vocab)
        attribution_resource_friends = 'http://cltl.nl/leolani/friends/'
        self.namespaces['LF'] = Namespace(attribution_resource_friends)
        attribution_resource_inputs = 'http://cltl.nl/leolani/inputs/'
        self.namespaces['LI'] = Namespace(attribution_resource_inputs)

        # Namespaces for the temporal layer-ish
        context_vocab = 'http://cltl.nl/episodicawareness/'
        self.namespaces['EPS'] = Namespace(context_vocab)
        self.namespaces['LC'] = Namespace('http://cltl.nl/leolani/context/')

        # The namespaces of external ontologies
        skos = 'http://www.w3.org/2004/02/skos/core#'
        self.namespaces['SKOS'] = Namespace(skos)

        prov = 'http://www.w3.org/ns/prov#'
        self.namespaces['PROV'] = Namespace(prov)

        sem = 'http://semanticweb.cs.vu.nl/2009/11/sem/'
        self.namespaces['SEM'] = Namespace(sem)

        time = 'http://www.w3.org/TR/owl-time/#'
        self.namespaces['TIME'] = Namespace(time)

        xml = 'https://www.w3.org/TR/xmlschema-2/#'
        self.namespaces['XML'] = Namespace(xml)

        wd = 'http://www.wikidata.org/entity/'
        self.namespaces['WD'] = Namespace(wd)

        wdt = 'http://www.wikidata.org/prop/direct/'
        self.namespaces['WDT'] = Namespace(wdt)

        wikibase = 'http://wikiba.se/ontology#'
        self.namespaces['wikibase'] = Namespace(wikibase)

    def define_named_graphs(self):
        # Instance graph
        self.ontology_graph = self.dataset.graph(self.create_resource_uri('LW', 'Ontology'))
        self.instance_graph = self.dataset.graph(self.create_resource_uri('LW', 'Instances'))
        self.claim_graph = self.dataset.graph(self.create_resource_uri('LW', 'Claims'))
        self.perspective_graph = self.dataset.graph(self.create_resource_uri('LTa', 'Perspectives'))
        self.interaction_graph = self.dataset.graph(self.create_resource_uri('LTa', 'Interactions'))

    def _get_ontology_path(self):
        """
        Define ontology paths to key vocabularies
        :return:
        """
        self.ontology_paths['n2mu'] = os.path.join(self.ONTOLOGY_ROOT, 'leolani.ttl')
        self.ontology_paths['gaf'] = os.path.join(self.ONTOLOGY_ROOT, 'gaf.rdf')
        self.ontology_paths['grasp'] = os.path.join(self.ONTOLOGY_ROOT, 'grasp.rdf')
        self.ontology_paths['sem'] = os.path.join(self.ONTOLOGY_ROOT, 'sem.rdf')

    def load_ontology_integration(self):
        self.ontology_graph.parse(location=os.path.join(self.ONTOLOGY_ROOT, 'integration.ttl'), format="turtle")

    def _bind_namespaces(self):
        """
        Bind namespaces
        :return:
        """
        self.dataset.bind('n2mu', self.namespaces['N2MU'])
        self.dataset.bind('leolaniWorld', self.namespaces['LW'])

        self.dataset.bind('gaf', self.namespaces['GAF'])
        self.dataset.bind('leolaniTalk', self.namespaces['LTa'])

        self.dataset.bind('grasp', self.namespaces['GRASP'])
        self.dataset.bind('leolaniFriends', self.namespaces['LF'])
        self.dataset.bind('leolaniInputs', self.namespaces['LI'])

        self.dataset.bind('time', self.namespaces['TIME'])
        self.dataset.bind('eps', self.namespaces['EPS'])
        self.dataset.bind('leolaniContext', self.namespaces['LC'])

        self.dataset.bind('skos', self.namespaces['SKOS'])
        self.dataset.bind('prov', self.namespaces['PROV'])
        self.dataset.bind('sem', self.namespaces['SEM'])
        self.dataset.bind('xml', self.namespaces['XML'])
        self.dataset.bind('owl', OWL)

        self.dataset.bind('wd', self.namespaces['WD'])
        self.dataset.bind('wdt', self.namespaces['WDT'])
        self.dataset.bind('wikibase', self.namespaces['wikibase'])

    ########## basic constructors ##########
    def _fix_nlp_types(self, types):
        # TODO here we know if two types are different category (aka noun and verb) we might need to split the triple
        fixed_types = []
        for el in types:
            if len(el) == 1:
                # this was just a char
                fixed_types.append(types.split('.')[-1])
                break
            elif '.' in el:
                fixed_types.append(el.split('.')[-1])
            else:
                fixed_types.append(el)

        # Hand fixed mappings
        if 'artifact' in fixed_types:
            fixed_types.append('object')

        return fixed_types

    def create_resource_uri(self, namespace, resource_name):
        """
        Create an URI for the given resource (entity, predicate, named graph, etc) in the given namespace
        Parameters
        ----------
        namespace: str
            Namespace where entity belongs to
        resource_name: str
            Label of resource

        Returns
        -------
        uri: str
            Representing the URI of the resource

        """
        if namespace in self.namespaces.keys():
            uri = URIRef(to_iri(self.namespaces[namespace] + resource_name))
        else:
            uri = URIRef(to_iri('{}:{}'.format(namespace, resource_name)))

        return uri

    def fill_literal(self, value, datatype=None):
        """
        Create an RDF literal given its value and datatype
        Parameters
        ----------
        value: str
            Value of the literal resource
        datatype: str
            Datatype of the literal

        Returns
        -------
            Literal with value and datatype given
        """

        return Literal(value, datatype=datatype) if datatype is not None else Literal(value)

    def fill_entity(self, label, types, namespace='LW', uri=None):
        """
        Create an RDF entity given its label, types and its namespace
        Parameters
        ----------
        label: str
            Label of entity
        types: List[str]
            List of types for this entity
        uri: str
            URI of the entity, is available (i.e. when extracting concepts from wikidata)
        namespace: str
            Namespace where entity belongs to

        Returns
        -------
            Entity object with given label
        """
        if types in [None, ''] and label != '':
            self._log.warning('Unknown type: {}'.format(label))
            return self.fill_entity_from_label(label, namespace)
        else:
            entity_id = self.create_resource_uri(namespace, label) if not uri else URIRef(to_iri(uri))
            fixed_types = self._fix_nlp_types(types)
            return Entity(entity_id, Literal(label), fixed_types)

    def fill_predicate(self, label, namespace='N2MU', uri=None):
        """
        Create an RDF predicate given its label and its namespace
        Parameters
        ----------
        label: str
            Label of predicate
        uri: str
            URI of the predicate, is available (i.e. when extracting concepts from wikidata)
        namespace:
            Namespace where predicate belongs to

        Returns
        -------
            Predicate object with given label
        """
        predicate_id = self.create_resource_uri(namespace, label) if not uri else URIRef(to_iri(uri))

        return Predicate(predicate_id, Literal(label))

    def fill_entity_from_label(self, label, namespace='LW', uri=None):
        """
        Create an RDF entity given its label and its namespace
        Parameters
        ----------
        label: str
            Label of entity
        uri: str
            URI of the entity, is available (i.e. when extracting concepts from wikidata)
        namespace: str
            Namespace where entity belongs to

        Returns
        -------
            Entity object with given label and no type information
        """
        entity_id = self.create_resource_uri(namespace, label) if not uri else URIRef(to_iri(uri))

        return Entity(entity_id, Literal(label), [''])

    def empty_entity(self):
        """
        Create an empty RDF entity
        Parameters
        ----------

        Returns
        -------
            Entity object with no label and no type information
        """
        return Entity('', Literal(''), [''])

    def fill_provenance(self, author, date):
        """
        Structure provenance to pair authors and dates when mentions are created
        Parameters
        ----------
        author: str
            Actor that generated the knowledge
        date: date
            Date when knowledge was generated

        Returns
        -------
            Provenance object containing author and date
        """

        return Provenance(author, date)

    def fill_triple(self, subject_dict, predicate_dict, object_dict, namespace='LW'):
        """
        Create an RDF entity given its label and its namespace
        Parameters
        ----------
        subject_dict: dict
            Information about label and type of subject
        predicate_dict: dict
            Information about type of predicate
        object_dict: dict
            Information about label and type of object
        namespace: str
            Information about which namespace the entities belongs to

        Returns
        -------
            Entity object with given label
        """
        subject = self.fill_entity(subject_dict['label'], [subject_dict['type']], namespace=namespace)
        predicate = self.fill_predicate(predicate_dict['type'])
        object = self.fill_entity(object_dict['label'], [object_dict['type']], namespace=namespace)

        return Triple(subject, predicate, object)

    def fill_triple_from_label(self, subject_label, predicate, object_label, namespace='LW'):
        """
        Create an RDF entity given its label and its namespace
        Parameters
        ----------
        subject_label: str
            Information about label of subject
        predicate: str
            Information about predicate
        object_label: str
            Information about label of object
        namespace: str
            Information about which namespace the entities belongs to

        Returns
        -------
            Entity object with given label
        """
        subject = self.fill_entity_from_label(subject_label, namespace=namespace)
        predicate = self.fill_predicate(predicate)
        object = self.fill_entity_from_label(object_label, namespace=namespace)

        return Triple(subject, predicate, object)

    ########## basic reverse engineer ##########
    def label_from_uri(self, uri, namespace='LTi'):
        return uri.strip(self.namespaces[namespace])

    def clean_aggregated_types(self, aggregated_types):
        split_types = aggregated_types.split('|')

        clean_types = []
        for type_uri in split_types:
            if '#' in type_uri:
                [prefix, bare_type] = type_uri.split('#', 1)
            elif '/' in type_uri:
                [prefix, bare_type] = type_uri.rsplit('/', 1)
            else:
                bare_type = type_uri

            bare_type = casefold_text(bare_type, format='triple')
            clean_types.append(bare_type)

        return clean_types

    def clean_aggregated_detections(self, aggregared_detections):
        split_detections = aggregared_detections.split('|')

        clean_detections = []
        for detection_label in split_detections:
            if '-' in detection_label:
                [detection_label, detection_id] = detection_label.rsplit('-', 1)
            clean_detections.append(detection_label)

        return clean_detections
Esempio n. 24
0
def main(source, target, geometryfile='data/point2wkt.json'):
    with open(source) as infile:
        data = json.load(infile)

    with open(geometryfile) as infile:
        point2wkt = json.load(infile)

    ds = Dataset()
    dataset = lp.term('')

    g = rdfSubject.db = ds.graph(identifier=lp)

    ### Custom triples / Ontology

    g.add((lpOnt.Adres, OWL.equivalentClass, schema.PostalAddress))

    g.add((lpOnt.Straat, OWL.equivalentClass, hg.Street))
    g.add((lpOnt.Buurt, OWL.equivalentClass, hg.Neighbourhood))

    g.add((lpOnt.adres, OWL.equivalentProperty, schema.address))

    ########
    # Data #
    ########

    adres2locatie = defaultdict(lambda: defaultdict(list))

    for n, adresLabel in enumerate(data, 1):

        if n % 5000 == 0:
            print(f"{n}/{len(data)}", end='\r')
            # break

        # # geometry
        # wkt = point2wkt.get(locatiepunt)

        # wktLiteral = Literal(wkt, datatype=geo.wktLiteral)
        # geometry = Geometry(lpGeo.term(str(locatiepunt)),
        #                     asWKT=wktLiteral,
        #                     label=[str(locatiepunt)])

        addresses = getAdres(data[adresLabel], adresLabel, point2wkt)

        # adres2locatie[adres][year].append(geometry)

        # observations.append(locpdetail)
        # locp.observation = observations

        # addresses.append(
        #     Role(
        #         None,
        #         label=address.label,
        #         address=address,
        #         hasLatestBeginTimeStamp=locpdetail.hasLatestBeginTimeStamp,
        #         hasEarliestEndTimeStamp=locpdetail.hasEarliestEndTimeStamp,
        #         startDate=Literal(year, datatype=XSD.gYear)))

    ds.bind('create', create)
    ds.bind('schema', schema)
    ds.bind('sem', sem)
    ds.bind('geo', geo)
    ds.bind('juso', juso)
    ds.bind('qb', qb)
    ds.bind('void', void)

    print("Serializing!")
    ds.serialize(target, format='trig')
Esempio n. 25
0
NP = Namespace('http://www.nanopub.org/nschema#')
FOAF = Namespace('http://xmlns.com/foaf/0.1/')




dataset = "gdppc"
pathtofile = '../../sdh-public-datasets/allcliodata_raw.csv'


BASE = Namespace('http://data.socialhistory.org/resource/{}/'.format(dataset))


# Initialize a conjunctive graph for the whole lot
rdf_dataset = Dataset()
rdf_dataset.bind('qbrv', QBRV)
rdf_dataset.bind('qbr', QBR)
rdf_dataset.bind('qb', QB)
rdf_dataset.bind('skos', SKOS)
rdf_dataset.bind('prov', PROV)
rdf_dataset.bind('np', NP)
rdf_dataset.bind('foaf', FOAF)

rdf_dataset.bind('clio-property', CLIOPROP)
rdf_dataset.bind('clio-indicator', CLIOIND)
rdf_dataset.bind('clio-country', CLIOCTR)
rdf_dataset.bind('clio', CLIO)

rdf_dataset.bind('sdmx', SDMX)
rdf_dataset.bind('sdmx-dimension', SDMXDIM)
rdf_dataset.bind('sdmx-measure', SDMXMSR)
    return str(response.status_code)


def serialize_upload(filename, dataset, upload=True):
    with open(filename, 'w') as f:
        dataset.serialize(f, format='trig')
    upload_to_stardog(dataset.serialize(format='trig'))


graph_uri_base = resource + 'findaslot/'

drop_stardog()

dataset = Dataset()
dataset.bind('fasdat', RESOURCE)
dataset.bind('fasont', VOCAB)
dataset.bind('geo', GEO)
dataset.bind('dbo', DBO)
dataset.bind('dbr', DBR)

dataset.default_context.parse(VOCAB_FILE, format='turtle')

# Upload vocabulary
with open(VOCAB_FILE, 'r') as f:
    upload_to_stardog(f.read())

dataset, t_graph = convert_dataset(
    SOURCE_DATA_DIR + 'Theater.json', dataset, URIRef(graph_uri_base + 'theaters'), museums=False)
serialize_upload(OUTPUT_DIR + 'theaters.trig', t_graph)
# dataset.remove_graph(t_graph)