def dump_as_rdf(g: Dataset, table_name: str) -> bool:
    Dump the contents of Graph g in RDF turtle
    :param g: Dataset to dump
    :param table_name: name of the base table
    :return: success indicator

    # Propagate the mapped concepts up the tree
    def add_to_ancestors(s: URIRef, vm: URIRef):
        g.add((s, ISO['enumeratedConceptualDomain.hasMember'], vm))
        for parent in g.objects(s, SKOS.broader):
            add_to_ancestors(parent, vm)

        for subj, obj in g.subject_objects(SKOS.exactMatch):
            add_to_ancestors(subj, obj)
        # TODO: this gives us a list of all concepts in the scheme... useful?
        for scheme, tc in g.subject_objects(SKOS.hasTopConcept):
            for member in g.objects(
                    tc, ISO['enumeratedConceptualDomain.hasMember']):
                g.add((scheme, ISO['enumeratedConceptualDomain.hasMember'],

    for name, ns in namespaces.items():
        g.bind(name.lower(), ns)
    outfile = os.path.join(DATA_DIR, table_name + '.ttl')
    print(f"Saving output to {outfile}")
    g.serialize(outfile, format='turtle')
    print(f"{len(g)} triples written")
    return True
class Fragment(object):

    HYDRA = Namespace("http://www.w3.org/ns/hydra/core#")
    VOID = Namespace("http://rdfs.org/ns/void#")
    FOAF = Namespace("http://xmlns.com/foaf/0.1/")
    DCTERMS = Namespace("http://purl.org/dc/terms/")

    def __init__(self):
        self.rdf_graph = Dataset()

    def add_data_triple(self, subject, predicate, obj):
        self.rdf_graph.add((subject, predicate, obj))

    def add_graph(self, identifier):

    def add_meta_quad(self, graph, subject, predicate, obj):
        self.rdf_graph.add((graph, subject, predicate, obj))

    def add_prefix(self, prefix, uri):
        self.rdf_graph.bind(prefix, uri)

    def serialize(self):
        return self.rdf_graph.serialize(format="trig", encoding="utf-8")
            csvfile, skipinitialspace=True, quotechar='"', delimiter=',')]
    return csv_contents

#//*************** csv parser ****************//#

graph_uri_base = resource

path = 'source_datasets/'
filename_population = 'all_population_by_type.csv'
filename_unemployment = 'unemployment_eu.csv'
filename_inflow = 'inflow_dataset.csv'
filename_asylum = 'asylum_seekers.csv'

dataset = Dataset()
dataset.bind('mpr', RESOURCE)
dataset.bind('mpo', VOCAB)
dataset.bind('geo', GEO)
dataset.bind('geo_country_code', GCC)
dataset.bind('dbo', DBO)
dataset.bind('dbr', DBR)
dataset.bind('sdmx', SDMX)

dataset.default_context.parse(VOCAB_FILE, format='turtle')

dataset, unemployment_eu_graph = convert_unemployment_csv(
    filename_unemployment, dataset,
    URIRef(graph_uri_base + 'unemployment_eu_graph'))

dataset, population_eu_graph = convert_population_csv(
    filename_population, dataset,
add quads directly to a specific Graph within the Dataset.

This example file shows how to decalre a Dataset, add content to it, serialise it, query it
and remove things from it.

from rdflib import Dataset, URIRef, Literal, Namespace

#   Create & Add

# Create an empty Dataset
d = Dataset()
# Add a namespace prefix to it, just like for Graph
d.bind("ex", Namespace("http://example.com/"))

# Declare a Graph URI to be used to identify a Graph
graph_1 = URIRef("http://example.com/graph-1")

# Add an empty Graph, identified by graph_1, to the Dataset

# Add two quads to Graph graph_1 in the Dataset
    Literal("Triple X"),
            k: v
            for k, v in row.items()
        } for row in csv.DictReader(
            csvfile, skipinitialspace=True, quotechar='"', delimiter=',')]
    return csv_contents

#//*************** csv parser ****************//#

graph_uri_base = resource + 'movement_of_people/'

path = 'source_datasets/'
filename = 'Movement_of_people_across_borders_dataset.csv'

dataset = Dataset()
dataset.bind('trumpres', RESOURCE)
dataset.bind('trumpvoc', VOCAB)
dataset.bind('geo', GEO)
dataset.bind('dbo', DBO)
dataset.bind('dbr', DBR)

dataset.default_context.parse(VOCAB_FILE, format='turtle')

dataset, movement_graph = convert_csv(
    path + filename, dataset, URIRef(graph_uri_base + 'movement_graph'))
serialize_upload(OUTPUT_DIR + 'movement_of_people.trig', dataset)

### Generate VoID metadata
from rdflib.void import generateVoID
from rdflib.namespace import VOID
dcterms_uri = 'http://purl.org/dc/terms/'
        triples=sum(1 for i in ds.graph(
        temporalCoverage=Literal("1674", datatype=XSD.gYear, normalize=False),

    ds.bind('owl', OWL)
    ds.bind('create', create)
    ds.bind('schema', schema)
    ds.bind('void', void)
    ds.bind('foaf', foaf)
    ds.bind('edm', edm)
    ds.bind('pnv', pnv)
    ds.bind('roar', roar)
    ds.bind('dc', dc)
    ds.bind('dcterms', dcterms)
    ds.bind('oa', oa)
    ds.bind('prov', prov)

    ds.serialize('data/kohier1674.trig', format='trig')
    with open(filename, 'r', encoding="ISO-8859-1") as csvfile:
        # Set the right quote character and delimiter
        csv_contents = [{
            k: v
            for k, v in row.items()
        } for row in csv.DictReader(
            csvfile, skipinitialspace=True, quotechar='"', delimiter=';')]

    # The URI for our dataset
    url = 'http://few.vu.nl/~mvr320/KRweb/resource/' + short[i] + '/'
    SETNAME = Namespace(url)
    graph_uri = URIRef(url)

    # We initialize a dataset, and bind our namespaces
    dataset = Dataset()
    dataset.bind('g13data', DATA)
    dataset.bind('g13vocab', VOCAB)
    dataset.bind('g13set', SETNAME)
    dataset.bind('geo', GEO)
    dataset.bind('geof', GEOF)
    dataset.bind('dbo', DBO)
    dataset.bind('dbp', DBP)
    dataset.bind('schema', SCHEMA)
    dataset.bind('vcard', VCARD)
    dataset.bind('wgs', WGS)
    dataset.bind('void', VOID)

    # We then get a new dataset object with our URI from the dataset.
    graph = dataset.graph(graph_uri)

    # Load the externally defined schema into the default dataset (context) of the dataset
def main(search=None, cache=None, identifiers=[]):

    ns = Namespace("https://data.create.humanities.uva.nl/id/rkd/")

    ds = Dataset()
    ds.bind('rdfs', RDFS)
    ds.bind('schema', schema)
    ds.bind('sem', sem)
    ds.bind('bio', bio)
    ds.bind('foaf', foaf)
    ds.bind('void', void)
    ds.bind('skos', SKOS)
    ds.bind('owl', OWL)
    ds.bind('dc', dc)

    ds.bind('rkdArtist', URIRef("https://data.rkd.nl/artists/"))
    ds.bind('rkdThes', nsThesaurus)
    ds.bind('rkdPerson', nsPerson)
    ds.bind('rkdImage', URIRef("https://rkd.nl/explore/images/"))
    ds.bind('rkdThumb', URIRef("https://images.rkd.nl/rkd/thumb/650x650/"))

    ds.bind('aat', URIRef("http://vocab.getty.edu/aat/"))

    ## First the images

    g = rdfSubject.db = ds.graph(identifier=ns)

    # Load cache thesaurus
    if os.path.isfile('rkdthesaurus.json'):
        with open('rkdthesaurus.json') as infile:
            thesaurusDict = json.load(infile)
        thesaurusDict = dict()

    # Load cache images
    if os.path.isfile('imagecache.json'):
        with open('imagecache.json') as infile:
            imageCache = json.load(infile)
        imageCache = dict()

    # to fetch all identifiers from the search
    if search:
        thesaurusDict, imageCache = parseURL(search,
    elif cache:
        # assume that everything in the thesaurus is also cached
        for doc in cache.values():
            parseData(doc, thesaurusDict=thesaurusDict)
    elif identifiers:
        for i in identifiers:
            thesaurusDict, imageCache = parseURL(APIURL + str(i),

    # Any images without labels?
    # These were not included in the search, but fetch them anyway.
    print("Finding referred images that were not included")
    q = """
    PREFIX schema: <http://schema.org/>
    SELECT ?uri WHERE {
        ?role a schema:Role ; schema:isRelatedTo ?uri .

        FILTER NOT EXISTS { ?uri schema:name ?name }
    images = g.query(q)
    print(f"Found {len(images)}!")
    for i in images:
        identifier = str(i['uri']).replace('https://rkd.nl/explore/images/',
        thesaurusDict, imageCache = parseURL(
            "https://api.rkd.nl/api/record/images/" + str(identifier),

    ## Then the thesaurus
    print("Converting the thesaurus")
    rdfSubject.db = ds.graph(identifier=ns.term('thesaurus/'))

    ids = list(thesaurusDict.keys())
    for i in ids:
        _, thesaurusDict = getThesaurus(i, thesaurusDict, 'concept')

    # Save updated cache
    with open('rkdthesaurus.json', 'w') as outfile:
        json.dump(thesaurusDict, outfile)

    with open('imagecache.json', 'w') as outfile:
        json.dump(imageCache, outfile)

    ## Serialize
    ds.serialize('rkdportraits14751825.trig', format='trig')
g7 += g
g7.bind("dct", DCTERMS)
g7.bind("skos", SKOS)
assert "@xml:base" not in g7.serialize(format="xml").decode("utf-8")
assert "@base <http://one.org/> ." in g7.serialize(
    format="n3", base=base_one).decode("utf-8")
g7.base = base_two
assert "@base <http://two.org/> ." in g7.serialize(format="n3").decode("utf-8")
assert "@base <http://one.org/> ." in g7.serialize(
    format="n3", base=base_one).decode("utf-8")

# 8. checking results for TriX & TriG
# TriX can specify a base per graph but setting a base for the whole
base_three = Namespace("http://three.org/")
ds1 = Dataset()
ds1.bind("dct", DCTERMS)
ds1.bind("skos", SKOS)
g8 = ds1.graph(URIRef("http://g8.com/"), base=base_one)
g9 = ds1.graph(URIRef("http://g9.com/"))
g8 += g
g9 += g
g9.base = base_two
ds1.base = base_three

trix = ds1.serialize(format="trix",
assert '<graph xml:base="http://one.org/">' in trix
assert '<graph xml:base="http://two.org/">' in trix
assert '<TriX xml:base="http://two.org/"' in trix

trig = ds1.serialize(format="trig",
def data_structure_definition(profile, dataset_name, dataset_base_uri,
                              variables, source_path, source_hash):
    """Converts the dataset + variables to a set of rdflib Graphs (a nanopublication with provenance annotations)
    that contains the data structure definition (from the DataCube vocabulary) and
    the mappings to external datasets.

    dataset     -- the name of the dataset
    variables   -- the list of dictionaries with the variables and their mappings to URIs
    profile     -- the Google signin profile
    source_path -- the path to the dataset file that was annotated
    source_hash -- the Git hash of the dataset file version of the dataset

    :returns: an RDF graph store containing a nanopublication
    BASE = Namespace('{}/'.format(dataset_base_uri))
    dataset_uri = URIRef(dataset_base_uri)

    # Initialize a conjunctive graph for the whole lot
    rdf_dataset = Dataset()
    rdf_dataset.bind('qbrv', QBRV)
    rdf_dataset.bind('qbr', QBR)
    rdf_dataset.bind('qb', QB)
    rdf_dataset.bind('skos', SKOS)
    rdf_dataset.bind('prov', PROV)
    rdf_dataset.bind('np', NP)
    rdf_dataset.bind('foaf', FOAF)

    # Initialize the graphs needed for the nanopublication
    timestamp = datetime.datetime.utcnow().strftime("%Y-%m-%dT%H:%M")

    # Shorten the source hash to 8 digits (similar to Github)
    source_hash = source_hash[:8]

    hash_part = source_hash + '/' + timestamp

    # The Nanopublication consists of three graphs
    assertion_graph_uri = BASE['assertion/' + hash_part]
    assertion_graph = rdf_dataset.graph(assertion_graph_uri)

    provenance_graph_uri = BASE['provenance/' + hash_part]
    provenance_graph = rdf_dataset.graph(provenance_graph_uri)

    pubinfo_graph_uri = BASE['pubinfo/' + hash_part]
    pubinfo_graph = rdf_dataset.graph(pubinfo_graph_uri)

    # A URI that represents the author
    author_uri = QBR['person/' + profile['email']]

    rdf_dataset.add((author_uri, RDF.type, FOAF['Person']))
    rdf_dataset.add((author_uri, FOAF['name'], Literal(profile['name'])))
    rdf_dataset.add((author_uri, FOAF['email'], Literal(profile['email'])))
    rdf_dataset.add((author_uri, QBRV['googleId'], Literal(profile['id'])))
            (author_uri, FOAF['depiction'], URIRef(profile['image'])))
    except KeyError:

    # A URI that represents the version of the dataset source file
    dataset_version_uri = BASE[source_hash]

    # Some information about the source file used
    rdf_dataset.add((dataset_version_uri, QBRV['path'],
                     Literal(source_path, datatype=XSD.string)))
    rdf_dataset.add((dataset_version_uri, QBRV['sha1_hash'],
                     Literal(source_hash, datatype=XSD.string)))

    # ----
    # The nanopublication itself
    # ----
    nanopublication_uri = BASE['nanopublication/' + hash_part]

    rdf_dataset.add((nanopublication_uri, RDF.type, NP['Nanopublication']))
        (nanopublication_uri, NP['hasAssertion'], assertion_graph_uri))
    rdf_dataset.add((assertion_graph_uri, RDF.type, NP['Assertion']))
        (nanopublication_uri, NP['hasProvenance'], provenance_graph_uri))
    rdf_dataset.add((provenance_graph_uri, RDF.type, NP['Provenance']))
        (nanopublication_uri, NP['hasPublicationInfo'], pubinfo_graph_uri))
    rdf_dataset.add((pubinfo_graph_uri, RDF.type, NP['PublicationInfo']))

    # ----
    # The provenance graph
    # ----

    # Provenance information for the assertion graph (the data structure definition itself)
        (assertion_graph_uri, PROV['wasDerivedFrom'], dataset_version_uri))
        (dataset_uri, PROV['wasDerivedFrom'], dataset_version_uri))
    provenance_graph.add((assertion_graph_uri, PROV['generatedAtTime'],
                          Literal(timestamp, datatype=XSD.datetime)))
        (assertion_graph_uri, PROV['wasAttributedTo'], author_uri))

    # ----
    # The publication info graph
    # ----

    # The URI of the latest version of QBer
    # TODO: should point to the actual latest commit of this QBer source file.
    # TODO: consider linking to this as the plan of some activity, rather than an activity itself.
    qber_uri = URIRef('https://github.com/CLARIAH/qber.git')

    pubinfo_graph.add((nanopublication_uri, PROV['wasGeneratedBy'], qber_uri))
    pubinfo_graph.add((nanopublication_uri, PROV['generatedAtTime'],
                       Literal(timestamp, datatype=XSD.datetime)))
        (nanopublication_uri, PROV['wasAttributedTo'], author_uri))

    # ----
    # The assertion graph
    # ----

    structure_uri = BASE['structure']

    assertion_graph.add((dataset_uri, RDF.type, QB['DataSet']))
    assertion_graph.add((dataset_uri, RDFS.label, Literal(dataset_name)))
        (structure_uri, RDF.type, QB['DataStructureDefinition']))

    assertion_graph.add((dataset_uri, QB['structure'], structure_uri))

    for variable_id, variable in variables.items():
        variable_uri = URIRef(variable['original']['uri'])
        variable_label = Literal(variable['original']['label'])
        variable_type = URIRef(variable['type'])

        codelist_uri = URIRef(variable['codelist']['original']['uri'])
        codelist_label = Literal(variable['codelist']['original']['label'])

        # The variable as component of the definition
        component_uri = safe_url(BASE,
                                 'component/' + variable['original']['label'])

        # Add link between the definition and the component
        assertion_graph.add((structure_uri, QB['component'], component_uri))

        # Add label to variable
        # TODO: We may need to do something with a changed label for the variable
        assertion_graph.add((variable_uri, RDFS.label, variable_label))

        if 'description' in variable and variable['description'] != "":
                (variable_uri, RDFS.comment, Literal(variable['description'])))

        # If the variable URI is not the same as the original,
        # it is a specialization of a prior variable property.
        if variable['uri'] != str(variable_uri):
                (variable_uri, RDFS['subPropertyOf'], URIRef(variable['uri'])))

        if variable_type == QB['DimensionProperty']:
            assertion_graph.add((variable_uri, RDF.type, variable_type))
            assertion_graph.add((component_uri, QB['dimension'], variable_uri))

            # Coded variables are also of type coded property (a subproperty of dimension property)
            if variable['category'] == 'coded':
                    (variable_uri, RDF.type, QB['CodedProperty']))

        elif variable_type == QB['MeasureProperty']:
            # The category 'other'
            assertion_graph.add((variable_uri, RDF.type, variable_type))
            assertion_graph.add((component_uri, QB['measure'], variable_uri))
        elif variable_type == QB['AttributeProperty']:
            # Actually never produced by QBer at this stage
            assertion_graph.add((variable_uri, RDF.type, variable_type))
            assertion_graph.add((component_uri, QB['attribute'], variable_uri))

        # If this variable is of category 'coded', we add codelist and URIs for
        # each variable (including mappings between value uris and etc....)
        if variable['category'] == 'coded':
            assertion_graph.add((codelist_uri, RDF.type, SKOS['Collection']))
                (codelist_uri, RDFS.label, Literal(codelist_label)))

            # The variable should point to the codelist
            assertion_graph.add((variable_uri, QB['codeList'], codelist_uri))

            # The variable is mapped onto an external code list.
            # If the codelist uri is not the same as the original one, we
            # have a derived codelist.
            if variable['codelist']['uri'] != str(codelist_uri):
                assertion_graph.add((codelist_uri, PROV['wasDerivedFrom'],

            # Generate a SKOS concept for each of the values and map it to the
            # assigned codelist
            for value in variable['values']:
                value_uri = URIRef(value['original']['uri'])
                value_label = Literal(value['original']['label'])

                assertion_graph.add((value_uri, RDF.type, SKOS['Concept']))
                    (value_uri, SKOS['prefLabel'], Literal(value_label)))
                assertion_graph.add((codelist_uri, SKOS['member'], value_uri))

                # The value has been changed, and therefore there is a mapping
                if value['original']['uri'] != value['uri']:
                        (value_uri, SKOS['exactMatch'], URIRef(value['uri'])))
                        (value_uri, RDFS.label, Literal(value['label'])))

        elif variable['category'] == 'identifier':
            # Generate a SKOS concept for each of the values
            for value in variable['values']:
                value_uri = URIRef(value['original']['uri'])
                value_label = Literal(value['original']['label'])

                assertion_graph.add((value_uri, RDF.type, SKOS['Concept']))
                    (value_uri, SKOS['prefLabel'], value_label))

                # The value has been changed, and therefore there is a mapping
                if value['original']['uri'] != value['uri']:
                        (value_uri, SKOS['exactMatch'], URIRef(value['uri'])))
                        (value_uri, RDFS.label, Literal(value['label'])))

        elif variable['category'] == 'other':
            # Generate a literal for each of the values when converting the dataset (but not here)

    return rdf_dataset
UUIDNS = Namespace("urn:uuid:")
DOCKER = Namespace("http://w3id.org/daspos/docker#")
# W3C namespace:
POSIX = Namespace("http://www.w3.org/ns/posix/stat#")
ACL = Namespace("http://www.w3.org/ns/auth/acl#")

# DASPOS namespaces
SC = Namespace("https://w3id.org/daspos/smartcontainers#")
CA = Namespace("https://w3id.org/daspos/computationalactivity#")
CE = Namespace("https://w3id.org/daspos/computationalenvironment#")

# Need to handle DOI
# http://bitwacker.com/2010/02/04/dois-uris-and-cool-resolution/

ds.bind("prov", PROV)
ds.bind("ore", ORE)
ds.bind("owl", OWL)
ds.bind("dc", DC)
ds.bind("uuidns", UUIDNS)
ds.bind("docker", DOCKER)
ds.bind("posix", POSIX)
ds.bind("acl", ACL)
ds.bind("sc", SC)
ds.bind("ca", CA)
ds.bind("ce", CE)

default_graph = ds

# image_name = cmd_string.rsplit(' ', 1) [1]
def data_structure_definition(profile, dataset_name, dataset_base_uri, variables, source_path, source_hash):
    """Converts the dataset + variables to a set of rdflib Graphs (a nanopublication with provenance annotations)
    that contains the data structure definition (from the DataCube vocabulary) and
    the mappings to external datasets.

    dataset     -- the name of the dataset
    variables   -- the list of dictionaries with the variables and their mappings to URIs
    profile     -- the Google signin profile
    source_path -- the path to the dataset file that was annotated
    source_hash -- the Git hash of the dataset file version of the dataset

    :returns: an RDF graph store containing a nanopublication
    BASE = Namespace("{}/".format(dataset_base_uri))
    dataset_uri = URIRef(dataset_base_uri)

    # Initialize a conjunctive graph for the whole lot
    rdf_dataset = Dataset()
    rdf_dataset.bind("qbrv", QBRV)
    rdf_dataset.bind("qbr", QBR)
    rdf_dataset.bind("qb", QB)
    rdf_dataset.bind("skos", SKOS)
    rdf_dataset.bind("prov", PROV)
    rdf_dataset.bind("np", NP)
    rdf_dataset.bind("foaf", FOAF)

    # Initialize the graphs needed for the nanopublication
    timestamp = datetime.datetime.utcnow().strftime("%Y-%m-%dT%H:%M")

    # Shorten the source hash to 8 digits (similar to Github)
    source_hash = source_hash[:8]

    hash_part = source_hash + "/" + timestamp

    # The Nanopublication consists of three graphs
    assertion_graph_uri = BASE["assertion/" + hash_part]
    assertion_graph = rdf_dataset.graph(assertion_graph_uri)

    provenance_graph_uri = BASE["provenance/" + hash_part]
    provenance_graph = rdf_dataset.graph(provenance_graph_uri)

    pubinfo_graph_uri = BASE["pubinfo/" + hash_part]
    pubinfo_graph = rdf_dataset.graph(pubinfo_graph_uri)

    # A URI that represents the author
    author_uri = QBR["person/" + profile["email"]]

    rdf_dataset.add((author_uri, RDF.type, FOAF["Person"]))
    rdf_dataset.add((author_uri, FOAF["name"], Literal(profile["name"])))
    rdf_dataset.add((author_uri, FOAF["email"], Literal(profile["email"])))
    rdf_dataset.add((author_uri, QBRV["googleId"], Literal(profile["id"])))
        rdf_dataset.add((author_uri, FOAF["depiction"], URIRef(profile["image"])))
    except KeyError:

    # A URI that represents the version of the dataset source file
    dataset_version_uri = BASE[source_hash]

    # Some information about the source file used
    rdf_dataset.add((dataset_version_uri, QBRV["path"], Literal(source_path, datatype=XSD.string)))
    rdf_dataset.add((dataset_version_uri, QBRV["sha1_hash"], Literal(source_hash, datatype=XSD.string)))

    # ----
    # The nanopublication itself
    # ----
    nanopublication_uri = BASE["nanopublication/" + hash_part]

    rdf_dataset.add((nanopublication_uri, RDF.type, NP["Nanopublication"]))
    rdf_dataset.add((nanopublication_uri, NP["hasAssertion"], assertion_graph_uri))
    rdf_dataset.add((assertion_graph_uri, RDF.type, NP["Assertion"]))
    rdf_dataset.add((nanopublication_uri, NP["hasProvenance"], provenance_graph_uri))
    rdf_dataset.add((provenance_graph_uri, RDF.type, NP["Provenance"]))
    rdf_dataset.add((nanopublication_uri, NP["hasPublicationInfo"], pubinfo_graph_uri))
    rdf_dataset.add((pubinfo_graph_uri, RDF.type, NP["PublicationInfo"]))

    # ----
    # The provenance graph
    # ----

    # Provenance information for the assertion graph (the data structure definition itself)
    provenance_graph.add((assertion_graph_uri, PROV["wasDerivedFrom"], dataset_version_uri))
    provenance_graph.add((dataset_uri, PROV["wasDerivedFrom"], dataset_version_uri))
    provenance_graph.add((assertion_graph_uri, PROV["generatedAtTime"], Literal(timestamp, datatype=XSD.datetime)))
    provenance_graph.add((assertion_graph_uri, PROV["wasAttributedTo"], author_uri))

    # ----
    # The publication info graph
    # ----

    # The URI of the latest version of QBer
    # TODO: should point to the actual latest commit of this QBer source file.
    # TODO: consider linking to this as the plan of some activity, rather than an activity itself.
    qber_uri = URIRef("https://github.com/CLARIAH/qber.git")

    pubinfo_graph.add((nanopublication_uri, PROV["wasGeneratedBy"], qber_uri))
    pubinfo_graph.add((nanopublication_uri, PROV["generatedAtTime"], Literal(timestamp, datatype=XSD.datetime)))
    pubinfo_graph.add((nanopublication_uri, PROV["wasAttributedTo"], author_uri))

    # ----
    # The assertion graph
    # ----

    structure_uri = BASE["structure"]

    assertion_graph.add((dataset_uri, RDF.type, QB["DataSet"]))
    assertion_graph.add((dataset_uri, RDFS.label, Literal(dataset_name)))
    assertion_graph.add((structure_uri, RDF.type, QB["DataStructureDefinition"]))

    assertion_graph.add((dataset_uri, QB["structure"], structure_uri))

    for variable_id, variable in variables.items():
        variable_uri = URIRef(variable["original"]["uri"])
        variable_label = Literal(variable["original"]["label"])
        variable_type = URIRef(variable["type"])

        codelist_uri = URIRef(variable["codelist"]["original"]["uri"])
        codelist_label = Literal(variable["codelist"]["original"]["label"])

        # The variable as component of the definition
        component_uri = safe_url(BASE, "component/" + variable["original"]["label"])

        # Add link between the definition and the component
        assertion_graph.add((structure_uri, QB["component"], component_uri))

        # Add label to variable
        # TODO: We may need to do something with a changed label for the variable
        assertion_graph.add((variable_uri, RDFS.label, variable_label))

        if "description" in variable and variable["description"] != "":
            assertion_graph.add((variable_uri, RDFS.comment, Literal(variable["description"])))

        # If the variable URI is not the same as the original,
        # it is a specialization of a prior variable property.
        if variable["uri"] != str(variable_uri):
            assertion_graph.add((variable_uri, RDFS["subPropertyOf"], URIRef(variable["uri"])))

        if variable_type == QB["DimensionProperty"]:
            assertion_graph.add((variable_uri, RDF.type, variable_type))
            assertion_graph.add((component_uri, QB["dimension"], variable_uri))

            # Coded variables are also of type coded property (a subproperty of dimension property)
            if variable["category"] == "coded":
                assertion_graph.add((variable_uri, RDF.type, QB["CodedProperty"]))

        elif variable_type == QB["MeasureProperty"]:
            # The category 'other'
            assertion_graph.add((variable_uri, RDF.type, variable_type))
            assertion_graph.add((component_uri, QB["measure"], variable_uri))
        elif variable_type == QB["AttributeProperty"]:
            # Actually never produced by QBer at this stage
            assertion_graph.add((variable_uri, RDF.type, variable_type))
            assertion_graph.add((component_uri, QB["attribute"], variable_uri))

        # If this variable is of category 'coded', we add codelist and URIs for
        # each variable (including mappings between value uris and etc....)
        if variable["category"] == "coded":
            assertion_graph.add((codelist_uri, RDF.type, SKOS["Collection"]))
            assertion_graph.add((codelist_uri, RDFS.label, Literal(codelist_label)))

            # The variable should point to the codelist
            assertion_graph.add((variable_uri, QB["codeList"], codelist_uri))

            # The variable is mapped onto an external code list.
            # If the codelist uri is not the same as the original one, we
            # have a derived codelist.
            if variable["codelist"]["uri"] != str(codelist_uri):
                assertion_graph.add((codelist_uri, PROV["wasDerivedFrom"], URIRef(variable["codelist"]["uri"])))

            # Generate a SKOS concept for each of the values and map it to the
            # assigned codelist
            for value in variable["values"]:
                value_uri = URIRef(value["original"]["uri"])
                value_label = Literal(value["original"]["label"])

                assertion_graph.add((value_uri, RDF.type, SKOS["Concept"]))
                assertion_graph.add((value_uri, SKOS["prefLabel"], Literal(value_label)))
                assertion_graph.add((codelist_uri, SKOS["member"], value_uri))

                # The value has been changed, and therefore there is a mapping
                if value["original"]["uri"] != value["uri"]:
                    assertion_graph.add((value_uri, SKOS["exactMatch"], URIRef(value["uri"])))
                    assertion_graph.add((value_uri, RDFS.label, Literal(value["label"])))

        elif variable["category"] == "identifier":
            # Generate a SKOS concept for each of the values
            for value in variable["values"]:
                value_uri = URIRef(value["original"]["uri"])
                value_label = Literal(value["original"]["label"])

                assertion_graph.add((value_uri, RDF.type, SKOS["Concept"]))
                assertion_graph.add((value_uri, SKOS["prefLabel"], value_label))

                # The value has been changed, and therefore there is a mapping
                if value["original"]["uri"] != value["uri"]:
                    assertion_graph.add((value_uri, SKOS["exactMatch"], URIRef(value["uri"])))
                    assertion_graph.add((value_uri, RDFS.label, Literal(value["label"])))

        elif variable["category"] == "other":
            # Generate a literal for each of the values when converting the dataset (but not here)

    return rdf_dataset
        #             line[str] = unicode(line[str], errors='replace')
        #         #print line
    with open(filename,'r') as csvfile:
        csv_contents = [{k: v for k, v in row.items()}
            for row in csv.DictReader(csvfile, skipinitialspace=True, quotechar='"', delimiter=',')]
    return csv_contents
#//*************** csv parser ****************//#

graph_uri_base = resource + 'movement_of_people/'

path = 'source_datasets/'
filename = 'Movement_of_people_across_borders_dataset.csv'

dataset = Dataset()
dataset.bind('trumpres', RESOURCE)
dataset.bind('trumpvoc', VOCAB)
dataset.bind('geo', GEO)
dataset.bind('dbo', DBO)
dataset.bind('dbr', DBR)

dataset.default_context.parse(VOCAB_FILE, format='turtle')

dataset, movement_graph = convert_csv(path + filename,dataset,URIRef(graph_uri_base + 'movement_graph'))
serialize_upload(OUTPUT_DIR + 'movement_of_people.trig',dataset)

### Generate VoID metadata
from rdflib.void import generateVoID
from rdflib.namespace import VOID
class LongTermMemory(object):

        'age', 'born_in', 'faceID', 'favorite', 'favorite_of', 'id', 'is_from',
        'manufactured_in', 'mother_is', 'name'

    def __init__(self, address=config.BRAIN_URL_LOCAL):
        Interact with Triple store

        address: str
            IP address and port of the Triple store

        self.address = address
        self.namespaces = {}
        self.ontology_paths = {}
        self.format = 'trig'
        self.dataset = Dataset()
        self.query_prefixes = """
                    prefix gaf: <http://groundedannotationframework.org/gaf#> 
                    prefix grasp: <http://groundedannotationframework.org/grasp#> 
                    prefix leolaniInputs: <http://cltl.nl/leolani/inputs/>
                    prefix leolaniFriends: <http://cltl.nl/leolani/friends/> 
                    prefix leolaniTalk: <http://cltl.nl/leolani/talk/> 
                    prefix leolaniTime: <http://cltl.nl/leolani/time/> 
                    prefix leolaniWorld: <http://cltl.nl/leolani/world/> 
                    prefix n2mu: <http://cltl.nl/leolani/n2mu/> 
                    prefix ns1: <urn:x-rdflib:> 
                    prefix owl: <http://www.w3.org/2002/07/owl#> 
                    prefix prov: <http://www.w3.org/ns/prov#> 
                    prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> 
                    prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> 
                    prefix sem: <http://semanticweb.cs.vu.nl/2009/11/sem/> 
                    prefix skos: <http://www.w3.org/2004/02/skos/core#> 
                    prefix time: <http://www.w3.org/TR/owl-time/#> 
                    prefix xml: <http://www.w3.org/XML/1998/namespace> 
                    prefix xml1: <https://www.w3.org/TR/xmlschema-2/#> 
                    prefix xsd: <http://www.w3.org/2001/XMLSchema#>


        self.my_uri = None

        self._log = logger.getChild(self.__class__.__name__)

    #################################### Main functions to interact with the brain ####################################

    def update(self, capsule):
        Main function to interact with if a statement is coming into the brain. Takes in a structured parsed statement,
        transforms them to triples, and posts them to the triple store
        :param statement: Structured data of a parsed statement
        :return: json response containing the status for posting the triples, and the original statement
        # Case fold
        capsule = casefold_capsule(capsule)

        # Create graphs and triples

        data = self._serialize(config.BRAIN_LOG)

        code = self._upload_to_brain(data)

        # Create JSON output
        capsule["date"] = str(capsule["date"])
        output = {'response': code, 'statement': capsule}

        return output

    def experience(self, capsule):
        Main function to interact with if a statement is coming into the brain. Takes in a structured parsed statement,
        transforms them to triples, and posts them to the triple store
        :param capsule: Structured data of a parsed statement
        :return: json response containing the status for posting the triples, and the original statement
        # Case fold
        capsule = casefold_capsule(capsule)

        # Create graphs and triples
        self._model_graphs_(capsule, type='Experience')

        data = self._serialize(config.BRAIN_LOG)

        code = self._upload_to_brain(data)

        # Create JSON output
        capsule["date"] = str(capsule["date"])
        output = {'response': code, 'statement': capsule}

        return output

    def query_brain(self, capsule):
        Main function to interact with if a question is coming into the brain. Takes in a structured parsed question,
        transforms it into a query, and queries the triple store for a response
        :param capsule: Structured data of a parsed question
        :return: json response containing the results of the query, and the original question
        # Case fold
        capsule = casefold_capsule(capsule)

        # Generate query
        query = self._create_query(capsule)

        # Perform query
        response = self._submit_query(query)

        # Create JSON output
        if 'date' in capsule.keys():
            capsule["date"] = str(capsule["date"])
        output = {'response': response, 'question': capsule}

        return output

    def process_visual(self, item, exact_only=True):
        Main function to determine if this item can be recognized by the brain, learned, or none
        :param item:

        if casefold(item) in self.get_classes():
            # If this is in the ontology already, create sensor triples directly
            text = 'I know about %s. I will remember this object' % item
            return item, text

        temp = self.get_labels_and_classes()
        if casefold(item) in temp.keys():
            # If this is in the ontology already, create sensor triples directly
            text = 'I know about %s. It is of type %s. I will remember this object' % (
                item, temp[item])
            return item, text

        # Query the web for information
        class_type, description = self.exact_match_dbpedia(item)
        if class_type is not None:
            # Had to learn it, but I can create triples now
            text = 'I did not know what %s is, but I searched on the web and I found that it is a %s. ' \
                   'I will remember this object' % (item, class_type)
            return casefold(class_type), text

        if not exact_only:
            # Second go at dbpedia, relaxed approach
            class_type, description = self.keyword_match_dbpedia(item)
            if class_type is not None:
                # Had to really search for it to learn it, but I can create triples now
                text = 'I did not know what %s is, but I searched for fuzzy matches on the web and I found that it ' \
                       'is a %s. I will remember this object' % (item, class_type)
                return casefold(class_type), text

        # Failure, nothing found
        text = 'I am sorry, I could not learn anything on %s so I will not remember it' % item
        return None, text

    ########## management system for keeping track of chats and turns ##########
    def get_last_chat_id(self):
        Get the id for the last interaction recorded
        :return: id
        query = read_query('last_chat_id')
        response = self._submit_query(query)

        return int(response[0]['chatid']['value']) if response else 0

    def get_last_turn_id(self, chat_id):
        Get the id for the last turn in the given chat
        :param chat_id: id for chat of interest
        :return:  id
        query = read_query('last_turn_id') % (chat_id)
        response = self._submit_query(query)

        last_turn = 0
        for turn in response:
            turn_uri = turn['s']['value']
            turn_id = turn_uri.split('/')[-1][10:]
            turn_id = int(turn_id)

            if turn_id > last_turn:
                last_turn = turn_id

        return last_turn

    ########## brain structure exploration ##########
    def get_predicates(self):
        Get predicates in social ontology
        query = read_query('predicates')
        response = self._submit_query(query)

        return [elem['p']['value'].split('/')[-1] for elem in response]

    def get_classes(self):
        Get classes in social ontology
        query = read_query('classes')
        response = self._submit_query(query)

        return [elem['o']['value'].split('/')[-1] for elem in response]

    def get_labels_and_classes(self):
        Get classes in social ontology
        query = read_query('labels_and_classes')
        response = self._submit_query(query)

        temp = dict()
        for r in response:
            temp[r['l']['value']] = r['o']['value'].split('/')[-1]

        return temp

    ########## learned facts exploration ##########
    def count_statements(self):
        Count statements or 'facts' in the brain
        query = read_query('count_statements')
        response = self._submit_query(query)
        return response[0]['count']['value']

    def count_friends(self):
        Count number of people I have talked to
        query = read_query('count_friends')
        response = self._submit_query(query)
        return response[0]['count']['value']

    def get_my_friends(self):
        Get names of people I have talked to
        query = read_query('my_friends')
        response = self._submit_query(query)
        return [elem['name']['value'].split('/')[-1] for elem in response]

    def get_best_friends(self):
        Get names of the 5 people I have talked to the most
        query = read_query('best_friends')
        response = self._submit_query(query)
        return [elem['name']['value'] for elem in response]

    def get_instance_of_type(self, instance_type):
        Get isntances of a certain class type
        :param instance_type: name of class in ontology
        query = read_query('instance_of_type') % (instance_type)
        response = self._submit_query(query)
        return [elem['name']['value'] for elem in response]

    def when_last_chat_with(self, actor_label):
        Get time value for the last time I chatted with this person
        :param actor_label: name of person
        query = read_query('when_last_chat_with') % (actor_label)
        response = self._submit_query(query)
        return response[0]['time']['value'].split('/')[-1]

    def get_triples_with_predicate(self, predicate):
        Get triples that contain this predicate
        :param predicate:
        query = read_query('triples_with_predicate') % predicate
        response = self._submit_query(query)
        return [(elem['sname']['value'], elem['oname']['value'])
                for elem in response]

    ########## conflicts ##########
    def get_all_conflicts(self):
        Aggregate all conflicts in brain
        conflicts = []
        for predicate in self.ONE_TO_ONE_PREDICATES:

        return conflicts

    ########## semantic web ##########
    def exact_match_dbpedia(self, item):
        Query dbpedia for information on this item to get it's semantic type and description.
        :param item:

        # Gather combinations
        combinations = [item, item.lower(), item.capitalize(), item.title()]

        for comb in combinations:
            # Try exact matching query
            query = read_query('dbpedia_type_and_description') % (comb)
            response = self._submit_query(query)

            # break if we have a hit
            if response:

        class_type = response[0]['label_type']['value'] if response else None
        description = response[0]['description']['value'].split(
            '.')[0] if response else None

        return class_type, description

    def keyword_match_dbpedia(self, item):
        # Query API
        r = requests.get(
                'QueryString': item,
                'MaxHits': '10'
                'Accept': 'application/json'

        # Fuzzy match
        choices = [e['label'] for e in r]
        best_match = process.extractOne("item", choices)

        # Get best match object
        r = [{
            'label': e['label'],
            'classes': e['classes'],
            'description': e['description']
        } for e in r if e['label'] == best_match[0]]

        if r:
            r = r[0]

            if r['classes']:
                # process dbpedia classes only
                r['classes'] = [
                    c['label'] for c in r['classes'] if 'dbpedia' in c['uri']

            r = {'label': None, 'classes': None, 'description': None}

        return r['classes'][0] if r['classes'] else None, r[
            'description'].split('.')[0] if r['description'] else None

    ######################################## Helpers for setting up connection ########################################

    def _define_namespaces(self):
        Define namespaces for different layers (ontology/vocab and resource). Assign them to self
        # Namespaces for the instance layer
        instance_vocab = 'http://cltl.nl/leolani/n2mu/'
        self.namespaces['N2MU'] = Namespace(instance_vocab)
        instance_resource = 'http://cltl.nl/leolani/world/'
        self.namespaces['LW'] = Namespace(instance_resource)

        # Namespaces for the mention layer
        mention_vocab = 'http://groundedannotationframework.org/gaf#'
        self.namespaces['GAF'] = Namespace(mention_vocab)
        mention_resource = 'http://cltl.nl/leolani/talk/'
        self.namespaces['LTa'] = Namespace(mention_resource)

        # Namespaces for the attribution layer
        attribution_vocab = 'http://groundedannotationframework.org/grasp#'
        self.namespaces['GRASP'] = Namespace(attribution_vocab)
        attribution_resource_friends = 'http://cltl.nl/leolani/friends/'
        self.namespaces['LF'] = Namespace(attribution_resource_friends)
        attribution_resource_inputs = 'http://cltl.nl/leolani/inputs/'
        self.namespaces['LI'] = Namespace(attribution_resource_inputs)

        # Namespaces for the temporal layer-ish
        time_vocab = 'http://www.w3.org/TR/owl-time/#'
        self.namespaces['TIME'] = Namespace(time_vocab)
        time_resource = 'http://cltl.nl/leolani/time/'
        self.namespaces['LTi'] = Namespace(time_resource)

        # The namespaces of external ontologies
        skos = 'http://www.w3.org/2004/02/skos/core#'
        self.namespaces['SKOS'] = Namespace(skos)

        prov = 'http://www.w3.org/ns/prov#'
        self.namespaces['PROV'] = Namespace(prov)

        sem = 'http://semanticweb.cs.vu.nl/2009/11/sem/'
        self.namespaces['SEM'] = Namespace(sem)

        xml = 'https://www.w3.org/TR/xmlschema-2/#'
        self.namespaces['XML'] = Namespace(xml)

    def _get_ontology_path(self):
        Define ontology paths to key vocabularies
            'n2mu'] = './../../knowledge_representation/ontologies/leolani.ttl'
            'gaf'] = './../../knowledge_representation/ontologies/gaf.rdf'
            'grasp'] = './../../knowledge_representation/ontologies/grasp.rdf'
            'sem'] = './../../knowledge_representation/ontologies/sem.rdf'

    def _bind_namespaces(self):
        Bnd namespaces
        self.dataset.bind('n2mu', self.namespaces['N2MU'])
        self.dataset.bind('leolaniWorld', self.namespaces['LW'])
        self.dataset.bind('gaf', self.namespaces['GAF'])
        self.dataset.bind('leolaniTalk', self.namespaces['LTa'])
        self.dataset.bind('grasp', self.namespaces['GRASP'])
        self.dataset.bind('leolaniFriends', self.namespaces['LF'])
        self.dataset.bind('leolaniInputs', self.namespaces['LI'])
        self.dataset.bind('time', self.namespaces['TIME'])
        self.dataset.bind('leolaniTime', self.namespaces['LTi'])
        self.dataset.bind('skos', self.namespaces['SKOS'])
        self.dataset.bind('prov', self.namespaces['PROV'])
        self.dataset.bind('sem', self.namespaces['SEM'])
        self.dataset.bind('xml', self.namespaces['XML'])
        self.dataset.bind('owl', OWL)

    ######################################## Helpers for statement processing ########################################

    def create_chat_id(self, actor, date):
        Determine chat id depending on my last conversation with this person
        :param actor:
        :param date:
        self._log.debug('Chat with {} on {}'.format(actor, date))

        query = read_query('last_chat_with') % (actor)
        response = self._submit_query(query)

        if response and int(response[0]['day']['value']) == int(date.day) \
                and int(response[0]['month']['value']) == int(date.month) \
                and int(response[0]['year']['value']) == int(date.year):
            # Chatted with this person today so same chat id
            chat_id = int(response[0]['chatid']['value'])
            # Either have never chatted with this person, or I have but not today. Add one to latest chat
            chat_id = self.get_last_chat_id() + 1

        return chat_id

    def create_turn_id(self, chat_id):
        self._log.debug('Turn in chat {}'.format(chat_id))

        query = read_query('last_turn_in_chat') % (chat_id)
        response = self._submit_query(query)
        return int(response['turnid']['value']) + 1 if response else 1

    def _generate_leolani(self, instance_graph):
        # Create Leolani
        leolani_id = 'leolani'
        leolani_label = 'leolani'

        leolani = URIRef(to_iri(self.namespaces['LW'] + leolani_id))
        leolani_label = Literal(leolani_label)
        leolani_type1 = URIRef(to_iri(self.namespaces['N2MU'] + 'robot'))
        leolani_type2 = URIRef(to_iri(self.namespaces['GRASP'] + 'Instance'))

        instance_graph.add((leolani, RDFS.label, leolani_label))
        instance_graph.add((leolani, RDF.type, leolani_type1))
        instance_graph.add((leolani, RDF.type, leolani_type2))

        self.my_uri = leolani

        return leolani

    def _generate_subject(self, capsule, instance_graph):
        if capsule['subject']['type'] == '':  # We only get the label
            subject_vocab = OWL
            subject_type = 'Thing'
            subject_vocab = self.namespaces['N2MU']
            subject_type = capsule['subject']['type']

        subject_id = capsule['subject']['label']

        subject = URIRef(to_iri(self.namespaces['LW'] + subject_id))
        subject_label = Literal(subject_id)
        subject_type1 = URIRef(to_iri(subject_vocab + subject_type))
        subject_type2 = URIRef(to_iri(self.namespaces['GRASP'] + 'Instance'))

        instance_graph.add((subject, RDFS.label, subject_label))
        instance_graph.add((subject, RDF.type, subject_type1))
        instance_graph.add((subject, RDF.type, subject_type2))

        return subject, subject_label

    def _create_leolani_world(self, capsule, type='Statement'):
        # Instance graph
        instance_graph_uri = URIRef(to_iri(self.namespaces['LW'] +
        instance_graph = self.dataset.graph(instance_graph_uri)

        # Subject
        if type == 'Statement':
            subject, subject_label = self._generate_subject(
                capsule, instance_graph)
        elif type == 'Experience':
            subject = self._generate_leolani(
                instance_graph) if self.my_uri is None else self.my_uri
            subject_label = 'leolani'

        # Object
        if capsule['object']['type'] == '':  # We only get the label
            object_vocab = OWL
            object_type = 'Thing'
            object_vocab = self.namespaces['N2MU']
            object_type = capsule['object']['type']

        object_id = capsule['object']['label']

        object = URIRef(to_iri(self.namespaces['LW'] + object_id))
        object_label = Literal(object_id)
        object_type1 = URIRef(to_iri(object_vocab + object_type))
        object_type2 = URIRef(to_iri(self.namespaces['GRASP'] + 'Instance'))

        instance_graph.add((object, RDFS.label, object_label))
        instance_graph.add((object, RDF.type, object_type1))
        instance_graph.add((object, RDF.type, object_type2))

        if type == 'Statement':
            claim_graph, statement = self._create_claim_graph(
        elif type == 'Experience':
            claim_graph, statement = self._create_claim_graph(

        return instance_graph, claim_graph, subject, object, statement

    def _create_claim_graph(self,
        # Claim graph
        claim_graph_uri = URIRef(to_iri(self.namespaces['LW'] + 'Claims'))
        claim_graph = self.dataset.graph(claim_graph_uri)

        # Statement
        statement_id = hash_statement_id(
            [subject_label, predicate, object_label])

        statement = URIRef(to_iri(self.namespaces['LW'] + statement_id))
        statement_type1 = URIRef(to_iri(self.namespaces['GRASP'] + type))
        statement_type2 = URIRef(to_iri(self.namespaces['GRASP'] + 'Instance'))
        statement_type3 = URIRef(to_iri(self.namespaces['SEM'] + 'Event'))

        # Create graph and add triple
        graph = self.dataset.graph(statement)
        graph.add((subject, self.namespaces['N2MU'][predicate], object))

        claim_graph.add((statement, RDF.type, statement_type1))
        claim_graph.add((statement, RDF.type, statement_type2))
        claim_graph.add((statement, RDF.type, statement_type3))

        return claim_graph, statement

    def _create_leolani_talk(self, capsule, leolani, type='Statement'):
        # Interaction graph
        if type == 'Statement':
            graph_to_write = 'Interactions'
        elif type == 'Experience':
            graph_to_write = 'Sensors'

        interaction_graph_uri = URIRef(
            to_iri(self.namespaces['LTa'] + graph_to_write))
        interaction_graph = self.dataset.graph(interaction_graph_uri)

        # Time
        date = capsule["date"]
        time = URIRef(
            to_iri(self.namespaces['LTi'] + str(capsule["date"].isoformat())))
        time_type = URIRef(
            to_iri(self.namespaces['TIME'] + 'DateTimeDescription'))
        day = Literal(date.day, datatype=self.namespaces['XML']['gDay'])
        month = Literal(date.month,
        year = Literal(date.year, datatype=self.namespaces['XML']['gYear'])
        time_unitType = URIRef(to_iri(self.namespaces['TIME'] + 'unitDay'))

        interaction_graph.add((time, RDF.type, time_type))
        interaction_graph.add((time, self.namespaces['TIME']['day'], day))
        interaction_graph.add((time, self.namespaces['TIME']['month'], month))
        interaction_graph.add((time, self.namespaces['TIME']['year'], year))
            (time, self.namespaces['TIME']['unitType'], time_unitType))

        # Actor
        actor_id = capsule['author']
        actor_label = capsule['author']

        actor = URIRef(to_iri(to_iri(self.namespaces['LF'] + actor_id)))
        actor_label = Literal(actor_label)
        actor_type1 = URIRef(to_iri(self.namespaces['SEM'] + 'Actor'))
        actor_type2 = URIRef(to_iri(self.namespaces['GRASP'] + 'Instance'))

        if type == 'Statement':
            actor_type3 = URIRef(to_iri(self.namespaces['N2MU'] + 'person'))
        elif type == 'Experience':
            actor_type3 = URIRef(to_iri(self.namespaces['N2MU'] + 'sensor'))

        interaction_graph.add((actor, RDFS.label, actor_label))
        interaction_graph.add((actor, RDF.type, actor_type1))
        interaction_graph.add((actor, RDF.type, actor_type2))
        interaction_graph.add((actor, RDF.type, actor_type3))

        # Add leolani knows/senses actor
        if type == 'Statement':
            predicate = 'knows'
        elif type == 'Experience':
            predicate = 'senses'

            (leolani, self.namespaces['N2MU'][predicate], actor))
        _, _ = self._create_claim_graph(leolani, 'leolani', actor, actor_label,
                                        predicate, type)

        # Event and subevent
        event_id = self.create_chat_id(actor_label, date)
        if type == 'Statement':
            event_label = 'chat%s' % event_id
        elif type == 'Experience':
            event_label = 'visual%s' % event_id

        subevent_id = self.create_turn_id(event_id)
        if type == 'Statement':
            subevent_label = event_label + '_turn%s' % subevent_id
        elif type == 'Experience':
            subevent_label = event_label + '_object%s' % subevent_id

        turn = URIRef(to_iri(self.namespaces['LTa'] + subevent_label))
        turn_type1 = URIRef(to_iri(self.namespaces['SEM'] + 'Event'))
        if type == 'Statement':
            turn_type2 = URIRef(to_iri(self.namespaces['GRASP'] + 'Turn'))
        elif type == 'Experience':
            turn_type2 = URIRef(to_iri(self.namespaces['GRASP'] + 'Object'))

        interaction_graph.add((turn, RDF.type, turn_type1))
        interaction_graph.add((turn, RDF.type, turn_type2))
            (turn, self.namespaces['N2MU']['id'], Literal(subevent_id)))
            (turn, self.namespaces['SEM']['hasActor'], actor))
        interaction_graph.add((turn, self.namespaces['SEM']['hasTime'], time))

        chat = URIRef(to_iri(self.namespaces['LTa'] + event_label))
        chat_type1 = URIRef(to_iri(self.namespaces['SEM'] + 'Event'))
        if type == 'Statement':
            chat_type2 = URIRef(to_iri(self.namespaces['GRASP'] + 'Chat'))
        elif type == 'Experience':
            chat_type2 = URIRef(to_iri(self.namespaces['GRASP'] + 'Visual'))

        interaction_graph.add((chat, RDF.type, chat_type1))
        interaction_graph.add((chat, RDF.type, chat_type2))
            (chat, self.namespaces['N2MU']['id'], Literal(event_id)))
            (chat, self.namespaces['SEM']['hasActor'], actor))
        interaction_graph.add((chat, self.namespaces['SEM']['hasTime'], time))
            (chat, self.namespaces['SEM']['hasSubevent'], turn))

        perspective_graph, mention, attribution = self._create_perspective_graph(
            capsule, subevent_label)

        # Link interactions and perspectives
            (mention, self.namespaces['GRASP']['wasAttributedTo'], actor))
            (mention, self.namespaces['GRASP']['hasAttribution'], attribution))
            (mention, self.namespaces['PROV']['wasDerivedFrom'], chat))
            (mention, self.namespaces['PROV']['wasDerivedFrom'], turn))

        return interaction_graph, perspective_graph, actor, time, mention, attribution

    def _create_perspective_graph(self, capsule, turn_label, type='Statement'):
        # Perspective graph
        perspective_graph_uri = URIRef(
            to_iri(self.namespaces['LTa'] + 'Perspectives'))
        perspective_graph = self.dataset.graph(perspective_graph_uri)

        # Mention
        if type == 'Statement':
            mention_id = turn_label + '_char%s' % capsule['position']
        elif type == 'Experience':
            mention_id = turn_label + '_pixel%s' % capsule['position']
        mention = URIRef(to_iri(self.namespaces['LTa'] + mention_id))
        mention_type = URIRef(to_iri(self.namespaces['GRASP'] + 'Mention'))

        perspective_graph.add((mention, RDF.type, mention_type))

        # Attribution
        attribution_id = mention_id + '_CERTAIN'
        attribution = URIRef(to_iri(self.namespaces['LTa'] + attribution_id))
        attribution_type = URIRef(
            to_iri(self.namespaces['GRASP'] + 'Attribution'))
        attribution_value = URIRef(to_iri(self.namespaces['GRASP'] +

        perspective_graph.add((attribution, RDF.type, attribution_type))
        perspective_graph.add((attribution, RDF.value, attribution_value))

        return perspective_graph, mention, attribution

    def _serialize(self, file_path):
        Save graph to local file and return the serialized string
        :param file_path: path to where data will be saved
        :return: serialized data as string
        # Save to file but return the python representation
        with open(file_path + '.' + self.format, 'w') as f:
            self.dataset.serialize(f, format=self.format)
        return self.dataset.serialize(format=self.format)

    def _upload_to_brain(self, data):
        Post data to the brain
        :param data: serialized data as string
        :return: response status
        self._log.debug("Posting triples")

        # From serialized string
        post_url = self.address + "/statements"
        response = requests.post(
            headers={'Content-Type': 'application/x-' + self.format})

        return str(response.status_code)

    def _model_graphs_(self, capsule, type='Statement'):
        # Leolani world (includes instance and claim graphs)
        instance_graph, claim_graph, subject, object, instance = self._create_leolani_world(
            capsule, type)

        # Identity
        leolani = self._generate_leolani(
            instance_graph) if self.my_uri is None else self.my_uri

        # Leolani talk (includes interaction and perspective graphs)
        interaction_graph, perspective_graph, actor, time, mention, attribution = self._create_leolani_talk(
            capsule, leolani, type)

        # Interconnections
            (subject, self.namespaces['GRASP']['denotedIn'], mention))
            (object, self.namespaces['GRASP']['denotedIn'], mention))

            (instance, self.namespaces['GRASP']['denotedBy'], mention))
            (instance, self.namespaces['SEM']['hasActor'], actor))
        instance_graph.add((instance, self.namespaces['SEM']['hasTime'], time))

            (mention, self.namespaces['GRASP']['containsDenotation'], subject))
            (mention, self.namespaces['GRASP']['containsDenotation'], object))
            (mention, self.namespaces['GRASP']['denotes'], instance))

            (attribution, self.namespaces['GRASP']['isAttributionFor'],

    ######################################### Helpers for question processing #########################################

    def _create_query(self, parsed_question):
        _ = hash_statement_id([

        # Query subject
        if parsed_question['subject']['label'] == "":
            # Case fold
            # object_label = casefold_label(parsed_question['object']['label'])

            query = """
                SELECT ?slabel ?authorlabel
                        WHERE { 
                            ?s n2mu:%s ?o . 
                            ?s rdfs:label ?slabel . 
                            ?o rdfs:label '%s' .  
                            GRAPH ?g {
                                ?s n2mu:%s ?o . 
                            } . 
                            ?g grasp:denotedBy ?m . 
                            ?m grasp:wasAttributedTo ?author . 
                            ?author rdfs:label ?authorlabel .
                """ % (parsed_question['predicate']['type'],

        # Query object
        elif parsed_question['object']['label'] == "":
            query = """
                SELECT ?olabel ?authorlabel
                        WHERE { 
                            ?s n2mu:%s ?o .   
                            ?s rdfs:label '%s' .  
                            ?o rdfs:label ?olabel .  
                            GRAPH ?g {
                                ?s n2mu:%s ?o . 
                            } . 
                            ?g grasp:denotedBy ?m . 
                            ?m grasp:wasAttributedTo ?author . 
                            ?author rdfs:label ?authorlabel .
                """ % (parsed_question['predicate']['type'],

        # Query existence
            query = """
                SELECT ?authorlabel ?v
                        WHERE { 
                            ?s n2mu:%s ?o .   
                            ?s rdfs:label '%s' .  
                            ?o rdfs:label '%s' .  
                            GRAPH ?g {
                                ?s n2mu:%s ?o . 
                            } . 
                            ?g grasp:denotedBy ?m . 
                            ?m grasp:wasAttributedTo ?author . 
                            ?author rdfs:label ?authorlabel .
                            ?m grasp:hasAttribution ?att .
                            ?att rdf:value ?v .
                """ % (parsed_question['predicate']['type'],

        query = self.query_prefixes + query

        return query

    def _submit_query(self, query):
        # Set up connection
        sparql = SPARQLWrapper(self.address)

        # Response parameters
        sparql.addParameter('Accept', 'application/sparql-results+json')
        response = sparql.query().convert()

        return response["results"]["bindings"]

    ######################################### Helpers for conflict processing #########################################
    def _get_conflicts_with_predicate(self, one_to_one_predicate):
        query = """
            PREFIX n2mu: <http://cltl.nl/leolani/n2mu/>
            PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
            PREFIX grasp: <http://groundedannotationframework.org/grasp#>

            select ?sname 
                    (group_concat(?oname ; separator=";") as ?onames) 
                    (group_concat(?authorlabel ; separator=";") as ?authorlabels) 
            where { 
                GRAPH ?g {
                    ?s n2mu:%s ?o .
                    } .
                ?s rdfs:label ?sname .
                ?o rdfs:label ?oname .

                ?g grasp:denotedBy ?m . 
                ?m grasp:wasAttributedTo ?author . 
                ?author rdfs:label ?authorlabel .

            } group by ?sname having (count(distinct ?oname) > 1)
        """ % one_to_one_predicate

        response = self._submit_query(query)
        conflicts = []
        for item in response:
            conflict = {
                'subject': item['sname']['value'],
                'predicate': one_to_one_predicate,
                'objects': []

            values = item['onames']['value'].split(';')
            authors = item['authorlabels']['value'].split(';')

            for val, auth in zip(values, authors):
                option = {'value': val, 'author': auth}


        return conflicts
    with open(filename,'r') as csvfile:
        csv_contents = [{k: v for k, v in row.items()}
            for row in csv.DictReader(csvfile, skipinitialspace=True, quotechar='"', delimiter=',')]
    return csv_contents
#//*************** csv parser ****************//#

graph_uri_base = resource

path = 'source_datasets/'
filename_population = 'all_population_by_type.csv'
filename_unemployment = 'unemployment_eu.csv'
filename_inflow = 'inflow_dataset.csv'
filename_asylum = 'asylum_seekers.csv'

dataset = Dataset()
dataset.bind('mpr', RESOURCE)
dataset.bind('mpo', VOCAB)
dataset.bind('geo', GEO)
dataset.bind('geo_country_code', GCC)
dataset.bind('dbo', DBO)
dataset.bind('dbr', DBR)
dataset.bind('sdmx', SDMX)

dataset.default_context.parse(VOCAB_FILE, format='turtle')

dataset, unemployment_eu_graph = convert_unemployment_csv(filename_unemployment,dataset,URIRef(graph_uri_base + 'unemployment_eu_graph'))

dataset, population_eu_graph = convert_population_csv(filename_population,dataset,URIRef(graph_uri_base + 'population_eu_graph'))

dataset, inflow_graph = convert_inflow_csv(filename_inflow,dataset,URIRef(graph_uri_base + 'inflow_graph'))
from rdflib import Literal, BNode, Namespace, URIRef, Graph, Dataset, RDF, RDFS, XSD
import rdflib.resource
from provmodified import Entity
import provmodified as prov
import json
import subprocess, shlex
import collections

DOCKER = Namespace("http://www.example.org/ns/docker#")
PROV = Namespace("http://www.w3.org/ns/prov#")

ds = Dataset(default_union=True)
ds.bind("docker", DOCKER)
ds.bind("prov", PROV)
default_graph = ds

def bind_ns(prefix, namespace):
    ds.namespace_manager.bind(prefix, Namespace(namespace))

def parse_json_byfile(filename):
    with open(filename) as data_file:
        data = json.load(data_file)
    return data[0]

def inspect_json(cmd):
    # print cmd
    p = subprocess.Popen(shlex.split(cmd), stdout=subprocess.PIPE, stderr=subprocess.PIPE)
from rdflib import Literal, BNode, Namespace, URIRef, Graph, Dataset, RDF, RDFS, XSD
import rdflib.resource

@newfield iri: IRI

PROV = Namespace("http://www.w3.org/ns/prov#")

ds = Dataset(default_union=True)
ds.bind("prov", PROV)
default_graph = ds
#print type(default_graph)

config = {
    "useInverseProperties": False

def set_use_inverse_properties(flag=False):
    config["useInverseProperties"] = flag

def using_inverse_properties():
    return config["useInverseProperties"]

def clear_graph(bundle=default_graph):
    bundle.remove((None, None, None))
DC = Namespace("http://purl.org/dc/terms/")
UUIDNS = Namespace("urn:uuid:")
DOCKER = Namespace("http://w3id.org/daspos/docker#")
# W3C namespace:
POSIX = Namespace("http://www.w3.org/ns/posix/stat#")
ACL = Namespace("http://www.w3.org/ns/auth/acl#")

# DASPOS namespaces
SC = Namespace("https://w3id.org/daspos/smartcontainers#")
CA = Namespace("https://w3id.org/daspos/computationalactivity#")
CE = Namespace("https://w3id.org/daspos/computationalenvironment#")

# Need to handle DOI
# http://bitwacker.com/2010/02/04/dois-uris-and-cool-resolution/

ds.bind("prov", PROV)
ds.bind("ore", ORE)
ds.bind("owl", OWL)
ds.bind("dc", DC)
ds.bind("uuidns", UUIDNS)
ds.bind("docker", DOCKER)
ds.bind("posix", POSIX)
ds.bind("acl", ACL)
ds.bind("sc", SC)
ds.bind("ca", CA)
ds.bind("ce", CE)

default_graph = ds

#image_name = cmd_string.rsplit(' ', 1) [1]
#image_id = self.get_imageID(image_name)
from rdflib import Dataset, URIRef, Literal, Namespace, RDF, RDFS, OWL, XSD

host = "http://localhost:5820/MATH"
# A namespace for our resources
data = host + '/'  # + '/resource/'
DATA = Namespace(data)
# A namespace for our vocabulary items (schema information, RDFS, OWL classes and properties etc.)
vocab = host  # + '/vocab/'
VOCAB = Namespace(host + '/vocab/')

# The URI for our graph
graph_uri = URIRef(host)  # + '/graph')

# We initialize a dataset, and bind our namespaces
dataset = Dataset()
dataset.bind('data', DATA)
dataset.bind('vocab', VOCAB)

# We then get a new graph object with our URI from the dataset.
graph = dataset.graph(graph_uri)

dataset.default_context.parse("../vocab.ttl", format="turtle")
# IRI baker is a library that reliably creates valid (parts of) IRIs from strings (spaces are turned into underscores, etc.).

# for row in same_set:
#     # graph.add((row[0], RDF.type, VOCAB['Formula']))
#     # graph.add((row[1], RDF.type, VOCAB['Formula']))
#     graph.add((URIRef(row[0]), OWL.sameas, URIRef(row[1])))
# with open('same_formula_db.trig', 'w') as f:
#     graph.serialize(f, format='trig')
def test_scenarios() -> None:
    Testing scenarios:
        1. no base set
        2. base set at graph creation
        3. base set at serialization
        4. base set at both graph creation & serialization, serialization overrides
        5. multiple serialization side effect checking
        6. checking results for RDF/XML
        7. checking results for N3
        8. checking results for TriX & TriG

    # variables
    base_one = Namespace("http://one.org/")
    base_two = Namespace("http://two.org/")
    title = Literal("Title", lang="en")
    description = Literal("Test Description", lang="en")
    creator = URIRef("https://creator.com")
    cs = URIRef("")

    # starting graph
    g = Graph()
    g.add((cs, RDF.type, SKOS.ConceptScheme))
    g.add((cs, DCTERMS.creator, creator))
    g.add((cs, DCTERMS.source, URIRef("nick")))
    g.bind("dct", DCTERMS)
    g.bind("skos", SKOS)

    # 1. no base set for graph, no base set for serialization
    g1 = Graph()
    g1 += g
    # @base should not be in output
    assert "@base" not in g.serialize(format="turtle")

    # 2. base one set for graph, no base set for serialization
    g2 = Graph(base=base_one)
    g2 += g
    # @base should be in output, from Graph (one)
    assert "@base <http://one.org/> ." in g2.serialize(format="turtle")

    # 3. no base set for graph, base two set for serialization
    g3 = Graph()
    g3 += g
    # @base should be in output, from serialization (two)
    assert "@base <http://two.org/> ." in g3.serialize(format="turtle",

    # 4. base one set for graph, base two set for serialization, Graph one overrides
    g4 = Graph(base=base_one)
    g4 += g
    # @base should be in output, from graph (one)
    assert "@base <http://two.org/> ." in g4.serialize(format="turtle",
    # just checking that the serialization setting (two) hasn't snuck through
    assert "@base <http://one.org/> ." not in g4.serialize(format="turtle",

    # 5. multiple serialization side effect checking
    g5 = Graph()
    g5 += g
    # @base should be in output, from serialization (two)
    assert "@base <http://two.org/> ." in g5.serialize(format="turtle",

    # checking for side affects - no base now set for this serialization
    # @base should not be in output
    assert "@base" not in g5.serialize(format="turtle")

    # 6. checking results for RDF/XML
    g6 = Graph()
    g6 += g
    g6.bind("dct", DCTERMS)
    g6.bind("skos", SKOS)
    assert "@xml:base" not in g6.serialize(format="xml")
    assert 'xml:base="http://one.org/"' in g6.serialize(format="xml",
    g6.base = base_two
    assert 'xml:base="http://two.org/"' in g6.serialize(format="xml")
    assert 'xml:base="http://one.org/"' in g6.serialize(format="xml",

    # 7. checking results for N3
    g7 = Graph()
    g7 += g
    g7.bind("dct", DCTERMS)
    g7.bind("skos", SKOS)
    assert "@xml:base" not in g7.serialize(format="xml")
    assert "@base <http://one.org/> ." in g7.serialize(format="n3",
    g7.base = base_two
    assert "@base <http://two.org/> ." in g7.serialize(format="n3")
    assert "@base <http://one.org/> ." in g7.serialize(format="n3",

    # 8. checking results for TriX & TriG
    # TriX can specify a base per graph but setting a base for the whole
    base_three = Namespace("http://three.org/")
    ds1 = Dataset()
    ds1.bind("dct", DCTERMS)
    ds1.bind("skos", SKOS)
    g8 = ds1.graph(URIRef("http://g8.com/"), base=base_one)
    g9 = ds1.graph(URIRef("http://g9.com/"))
    g8 += g
    g9 += g
    g9.base = base_two
    ds1.base = base_three

    trix = ds1.serialize(format="trix", base=Namespace("http://two.org/"))
    assert '<graph xml:base="http://one.org/">' in trix
    assert '<graph xml:base="http://two.org/">' in trix
    assert '<TriX xml:base="http://two.org/"' in trix

    trig = ds1.serialize(format="trig", base=Namespace("http://two.org/"))
    assert "@base <http://one.org/> ." not in trig
    assert "@base <http://three.org/> ." not in trig
    assert "@base <http://two.org/> ." in trig
    def make_RDF(self,contents):

        host  = "http://localhost:5820/MATH"
        # A namespace for our resources
        data = host +'/'# + '/resource/'
        DATA = Namespace(data)
        # A namespace for our vocabulary items (schema information, RDFS, OWL classes and properties etc.)
        vocab = host #+ '/vocab/'
        VOCAB = Namespace(host + '/vocab/')

        # The URI for our graph
        graph_uri = URIRef(host)#+ '/graph')

        # We initialize a dataset, and bind our namespaces
        dataset = Dataset()
        dataset.bind('data', DATA)
        dataset.bind('vocab', VOCAB)

        # We then get a new graph object with our URI from the dataset.
        graph = dataset.graph(graph_uri)

        # IRI baker is a library that reliably creates valid (parts of) IRIs from strings (spaces are turned into underscores, etc.).

        for row in contents:

            id = URIRef((data + str(row['id']))) # primary key for the object

            id_ = URIRef((data + str(row['id_'])))
            # graph.add((id, VOCAB['previous_id'] ,id_))
            # graph.add((id, RDF.type, OWL.NamedIndividual))

            if ('Formula' in row):

                formula_xml = Literal(row['Formula'], datatype=XSD['string'])
                graph.add((id, VOCAB['xml'], formula_xml))
                Description = Literal(row['description'], datatype=XSD['string'])
                label = Literal(row['label'], datatype=XSD['string'])
                if row['id_'] !=0:
                    parent_id = URIRef((data + str(row['id_'])))
                    graph.add((id, VOCAB['subFormulaOf'], parent_id))

            if ('Symbol' in row):
                graph.add((id, RDF.type, VOCAB['Symbol']))
                # print(row['Symbol'])
                symbol = Literal(row['Symbol'], datatype=XSD['string'])
                # print(symbol)
                graph.add((id, VOCAB['label'], symbol))
                parent_id = URIRef((data + str(row['parent_id'])))
                graph.add((id, VOCAB['partOf'], parent_id))

            if ('Operator' in row):
                graph.add((id, RDF.type, VOCAB['Operator']))
                operator = Literal(row['Operator'], datatype=XSD['string'])
                graph.add((id, VOCAB['label'], operator))
                parent_id = URIRef((data + str(row['parent_id'])))
                graph.add((id, VOCAB['partOf'], parent_id))

            if ('Function_add' in row):
                # print(row)
                id = URIRef((data + str(row['Function_add'])))
                graph.add((id, RDF.type, VOCAB['Operator']))
                left = URIRef((data + str(row['left'])))
                graph.add((id, VOCAB['left'], left))
                right = URIRef((data + str(row['right'])))
                graph.add((id, VOCAB['right'], right))

            if ('Function_subtract' in row):
                id = URIRef((data + str(row['Function_subtract'])))
                graph.add((id, RDF.type, VOCAB['Operator']))
                left = URIRef((data + str(row['left'])))
                graph.add((id, VOCAB['left'], left))
                right = URIRef((data + str(row['right'])))
                graph.add((id, VOCAB['right'], right))

        with open('db/math_db.trig','w') as f:
            graph.serialize(f, format='trig')
    # Info on the item
    g.add((item, RDF.type, saa.Item))
    g.add((item, saa.term('index'), Literal(record['assigned_item_no'])))

    if record['persistent_uid'] != "":
        g.add((item, saa.identifier, Literal(record['persistent_uid'])))

    g.add((item, RDFS.label, Literal(record['title'], lang='nl')))
    g.add((item, saa.artist, Literal(record['artist_name_1'])))
    g.add((item, saa.transcription, Literal(record['entry'], lang='nl')))
    g.add((item, saa.workType, Literal(record['object_type_1'], lang='nl')))

    if record['room'] != "":
        g.add((item, saa.room, Literal(record['room'], lang='nl')))

    if record['valuation_amount'] != "":
        g.add((item, saa.valuation, Literal(record['valuation_amount'])))

    return g

if __name__ == "__main__":

    ds = Dataset()
    ds.bind('ga', ga)
    ds.bind('saa', saa)

    ds = main(dataset=ds)
    ds.serialize('Dutch_Archival_Descriptions_Getty.trig', format='trig')
class RdfBuilder(object):
    ONTOLOGY_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), '../../../ontologies'))

    def __init__(self):
        # type: () -> RdfBuilder

        self.ontology_paths = {}
        self.namespaces = {}
        self.dataset = Dataset()

        self._log = logger.getChild(self.__class__.__name__)


    ########## setting up connection ##########
    def _define_namespaces(self):
        Define namespaces for different layers (ontology/vocab and resource). Assign them to self
        # Namespaces for the instance layer
        instance_vocab = 'http://cltl.nl/leolani/n2mu/'
        self.namespaces['N2MU'] = Namespace(instance_vocab)
        instance_resource = 'http://cltl.nl/leolani/world/'
        self.namespaces['LW'] = Namespace(instance_resource)

        # Namespaces for the mention layer
        mention_vocab = 'http://groundedannotationframework.org/gaf#'
        self.namespaces['GAF'] = Namespace(mention_vocab)
        mention_resource = 'http://cltl.nl/leolani/talk/'
        self.namespaces['LTa'] = Namespace(mention_resource)

        # Namespaces for the attribution layer
        attribution_vocab = 'http://groundedannotationframework.org/grasp#'
        self.namespaces['GRASP'] = Namespace(attribution_vocab)
        attribution_resource_friends = 'http://cltl.nl/leolani/friends/'
        self.namespaces['LF'] = Namespace(attribution_resource_friends)
        attribution_resource_inputs = 'http://cltl.nl/leolani/inputs/'
        self.namespaces['LI'] = Namespace(attribution_resource_inputs)

        # Namespaces for the temporal layer-ish
        context_vocab = 'http://cltl.nl/episodicawareness/'
        self.namespaces['EPS'] = Namespace(context_vocab)
        self.namespaces['LC'] = Namespace('http://cltl.nl/leolani/context/')

        # The namespaces of external ontologies
        skos = 'http://www.w3.org/2004/02/skos/core#'
        self.namespaces['SKOS'] = Namespace(skos)

        prov = 'http://www.w3.org/ns/prov#'
        self.namespaces['PROV'] = Namespace(prov)

        sem = 'http://semanticweb.cs.vu.nl/2009/11/sem/'
        self.namespaces['SEM'] = Namespace(sem)

        time = 'http://www.w3.org/TR/owl-time/#'
        self.namespaces['TIME'] = Namespace(time)

        xml = 'https://www.w3.org/TR/xmlschema-2/#'
        self.namespaces['XML'] = Namespace(xml)

        wd = 'http://www.wikidata.org/entity/'
        self.namespaces['WD'] = Namespace(wd)

        wdt = 'http://www.wikidata.org/prop/direct/'
        self.namespaces['WDT'] = Namespace(wdt)

        wikibase = 'http://wikiba.se/ontology#'
        self.namespaces['wikibase'] = Namespace(wikibase)

    def define_named_graphs(self):
        # Instance graph
        self.ontology_graph = self.dataset.graph(self.create_resource_uri('LW', 'Ontology'))
        self.instance_graph = self.dataset.graph(self.create_resource_uri('LW', 'Instances'))
        self.claim_graph = self.dataset.graph(self.create_resource_uri('LW', 'Claims'))
        self.perspective_graph = self.dataset.graph(self.create_resource_uri('LTa', 'Perspectives'))
        self.interaction_graph = self.dataset.graph(self.create_resource_uri('LTa', 'Interactions'))

    def _get_ontology_path(self):
        Define ontology paths to key vocabularies
        self.ontology_paths['n2mu'] = os.path.join(self.ONTOLOGY_ROOT, 'leolani.ttl')
        self.ontology_paths['gaf'] = os.path.join(self.ONTOLOGY_ROOT, 'gaf.rdf')
        self.ontology_paths['grasp'] = os.path.join(self.ONTOLOGY_ROOT, 'grasp.rdf')
        self.ontology_paths['sem'] = os.path.join(self.ONTOLOGY_ROOT, 'sem.rdf')

    def load_ontology_integration(self):
        self.ontology_graph.parse(location=os.path.join(self.ONTOLOGY_ROOT, 'integration.ttl'), format="turtle")

    def _bind_namespaces(self):
        Bind namespaces
        self.dataset.bind('n2mu', self.namespaces['N2MU'])
        self.dataset.bind('leolaniWorld', self.namespaces['LW'])

        self.dataset.bind('gaf', self.namespaces['GAF'])
        self.dataset.bind('leolaniTalk', self.namespaces['LTa'])

        self.dataset.bind('grasp', self.namespaces['GRASP'])
        self.dataset.bind('leolaniFriends', self.namespaces['LF'])
        self.dataset.bind('leolaniInputs', self.namespaces['LI'])

        self.dataset.bind('time', self.namespaces['TIME'])
        self.dataset.bind('eps', self.namespaces['EPS'])
        self.dataset.bind('leolaniContext', self.namespaces['LC'])

        self.dataset.bind('skos', self.namespaces['SKOS'])
        self.dataset.bind('prov', self.namespaces['PROV'])
        self.dataset.bind('sem', self.namespaces['SEM'])
        self.dataset.bind('xml', self.namespaces['XML'])
        self.dataset.bind('owl', OWL)

        self.dataset.bind('wd', self.namespaces['WD'])
        self.dataset.bind('wdt', self.namespaces['WDT'])
        self.dataset.bind('wikibase', self.namespaces['wikibase'])

    ########## basic constructors ##########
    def _fix_nlp_types(self, types):
        # TODO here we know if two types are different category (aka noun and verb) we might need to split the triple
        fixed_types = []
        for el in types:
            if len(el) == 1:
                # this was just a char
            elif '.' in el:

        # Hand fixed mappings
        if 'artifact' in fixed_types:

        return fixed_types

    def create_resource_uri(self, namespace, resource_name):
        Create an URI for the given resource (entity, predicate, named graph, etc) in the given namespace
        namespace: str
            Namespace where entity belongs to
        resource_name: str
            Label of resource

        uri: str
            Representing the URI of the resource

        if namespace in self.namespaces.keys():
            uri = URIRef(to_iri(self.namespaces[namespace] + resource_name))
            uri = URIRef(to_iri('{}:{}'.format(namespace, resource_name)))

        return uri

    def fill_literal(self, value, datatype=None):
        Create an RDF literal given its value and datatype
        value: str
            Value of the literal resource
        datatype: str
            Datatype of the literal

            Literal with value and datatype given

        return Literal(value, datatype=datatype) if datatype is not None else Literal(value)

    def fill_entity(self, label, types, namespace='LW', uri=None):
        Create an RDF entity given its label, types and its namespace
        label: str
            Label of entity
        types: List[str]
            List of types for this entity
        uri: str
            URI of the entity, is available (i.e. when extracting concepts from wikidata)
        namespace: str
            Namespace where entity belongs to

            Entity object with given label
        if types in [None, ''] and label != '':
            self._log.warning('Unknown type: {}'.format(label))
            return self.fill_entity_from_label(label, namespace)
            entity_id = self.create_resource_uri(namespace, label) if not uri else URIRef(to_iri(uri))
            fixed_types = self._fix_nlp_types(types)
            return Entity(entity_id, Literal(label), fixed_types)

    def fill_predicate(self, label, namespace='N2MU', uri=None):
        Create an RDF predicate given its label and its namespace
        label: str
            Label of predicate
        uri: str
            URI of the predicate, is available (i.e. when extracting concepts from wikidata)
            Namespace where predicate belongs to

            Predicate object with given label
        predicate_id = self.create_resource_uri(namespace, label) if not uri else URIRef(to_iri(uri))

        return Predicate(predicate_id, Literal(label))

    def fill_entity_from_label(self, label, namespace='LW', uri=None):
        Create an RDF entity given its label and its namespace
        label: str
            Label of entity
        uri: str
            URI of the entity, is available (i.e. when extracting concepts from wikidata)
        namespace: str
            Namespace where entity belongs to

            Entity object with given label and no type information
        entity_id = self.create_resource_uri(namespace, label) if not uri else URIRef(to_iri(uri))

        return Entity(entity_id, Literal(label), [''])

    def empty_entity(self):
        Create an empty RDF entity

            Entity object with no label and no type information
        return Entity('', Literal(''), [''])

    def fill_provenance(self, author, date):
        Structure provenance to pair authors and dates when mentions are created
        author: str
            Actor that generated the knowledge
        date: date
            Date when knowledge was generated

            Provenance object containing author and date

        return Provenance(author, date)

    def fill_triple(self, subject_dict, predicate_dict, object_dict, namespace='LW'):
        Create an RDF entity given its label and its namespace
        subject_dict: dict
            Information about label and type of subject
        predicate_dict: dict
            Information about type of predicate
        object_dict: dict
            Information about label and type of object
        namespace: str
            Information about which namespace the entities belongs to

            Entity object with given label
        subject = self.fill_entity(subject_dict['label'], [subject_dict['type']], namespace=namespace)
        predicate = self.fill_predicate(predicate_dict['type'])
        object = self.fill_entity(object_dict['label'], [object_dict['type']], namespace=namespace)

        return Triple(subject, predicate, object)

    def fill_triple_from_label(self, subject_label, predicate, object_label, namespace='LW'):
        Create an RDF entity given its label and its namespace
        subject_label: str
            Information about label of subject
        predicate: str
            Information about predicate
        object_label: str
            Information about label of object
        namespace: str
            Information about which namespace the entities belongs to

            Entity object with given label
        subject = self.fill_entity_from_label(subject_label, namespace=namespace)
        predicate = self.fill_predicate(predicate)
        object = self.fill_entity_from_label(object_label, namespace=namespace)

        return Triple(subject, predicate, object)

    ########## basic reverse engineer ##########
    def label_from_uri(self, uri, namespace='LTi'):
        return uri.strip(self.namespaces[namespace])

    def clean_aggregated_types(self, aggregated_types):
        split_types = aggregated_types.split('|')

        clean_types = []
        for type_uri in split_types:
            if '#' in type_uri:
                [prefix, bare_type] = type_uri.split('#', 1)
            elif '/' in type_uri:
                [prefix, bare_type] = type_uri.rsplit('/', 1)
                bare_type = type_uri

            bare_type = casefold_text(bare_type, format='triple')

        return clean_types

    def clean_aggregated_detections(self, aggregared_detections):
        split_detections = aggregared_detections.split('|')

        clean_detections = []
        for detection_label in split_detections:
            if '-' in detection_label:
                [detection_label, detection_id] = detection_label.rsplit('-', 1)

        return clean_detections
def main(source, target, geometryfile='data/point2wkt.json'):
    with open(source) as infile:
        data = json.load(infile)

    with open(geometryfile) as infile:
        point2wkt = json.load(infile)

    ds = Dataset()
    dataset = lp.term('')

    g = rdfSubject.db = ds.graph(identifier=lp)

    ### Custom triples / Ontology

    g.add((lpOnt.Adres, OWL.equivalentClass, schema.PostalAddress))

    g.add((lpOnt.Straat, OWL.equivalentClass, hg.Street))
    g.add((lpOnt.Buurt, OWL.equivalentClass, hg.Neighbourhood))

    g.add((lpOnt.adres, OWL.equivalentProperty, schema.address))

    # Data #

    adres2locatie = defaultdict(lambda: defaultdict(list))

    for n, adresLabel in enumerate(data, 1):

        if n % 5000 == 0:
            print(f"{n}/{len(data)}", end='\r')
            # break

        # # geometry
        # wkt = point2wkt.get(locatiepunt)

        # wktLiteral = Literal(wkt, datatype=geo.wktLiteral)
        # geometry = Geometry(lpGeo.term(str(locatiepunt)),
        #                     asWKT=wktLiteral,
        #                     label=[str(locatiepunt)])

        addresses = getAdres(data[adresLabel], adresLabel, point2wkt)

        # adres2locatie[adres][year].append(geometry)

        # observations.append(locpdetail)
        # locp.observation = observations

        # addresses.append(
        #     Role(
        #         None,
        #         label=address.label,
        #         address=address,
        #         hasLatestBeginTimeStamp=locpdetail.hasLatestBeginTimeStamp,
        #         hasEarliestEndTimeStamp=locpdetail.hasEarliestEndTimeStamp,
        #         startDate=Literal(year, datatype=XSD.gYear)))

    ds.bind('create', create)
    ds.bind('schema', schema)
    ds.bind('sem', sem)
    ds.bind('geo', geo)
    ds.bind('juso', juso)
    ds.bind('qb', qb)
    ds.bind('void', void)

    ds.serialize(target, format='trig')
NP = Namespace('http://www.nanopub.org/nschema#')
FOAF = Namespace('http://xmlns.com/foaf/0.1/')

dataset = "gdppc"
pathtofile = '../../sdh-public-datasets/allcliodata_raw.csv'

BASE = Namespace('http://data.socialhistory.org/resource/{}/'.format(dataset))

# Initialize a conjunctive graph for the whole lot
rdf_dataset = Dataset()
rdf_dataset.bind('qbrv', QBRV)
rdf_dataset.bind('qbr', QBR)
rdf_dataset.bind('qb', QB)
rdf_dataset.bind('skos', SKOS)
rdf_dataset.bind('prov', PROV)
rdf_dataset.bind('np', NP)
rdf_dataset.bind('foaf', FOAF)

rdf_dataset.bind('clio-property', CLIOPROP)
rdf_dataset.bind('clio-indicator', CLIOIND)
rdf_dataset.bind('clio-country', CLIOCTR)
rdf_dataset.bind('clio', CLIO)

rdf_dataset.bind('sdmx', SDMX)
rdf_dataset.bind('sdmx-dimension', SDMXDIM)
rdf_dataset.bind('sdmx-measure', SDMXMSR)
    return str(response.status_code)

def serialize_upload(filename, dataset, upload=True):
    with open(filename, 'w') as f:
        dataset.serialize(f, format='trig')

graph_uri_base = resource + 'findaslot/'


dataset = Dataset()
dataset.bind('fasdat', RESOURCE)
dataset.bind('fasont', VOCAB)
dataset.bind('geo', GEO)
dataset.bind('dbo', DBO)
dataset.bind('dbr', DBR)

dataset.default_context.parse(VOCAB_FILE, format='turtle')

# Upload vocabulary
with open(VOCAB_FILE, 'r') as f:

dataset, t_graph = convert_dataset(
    SOURCE_DATA_DIR + 'Theater.json', dataset, URIRef(graph_uri_base + 'theaters'), museums=False)
serialize_upload(OUTPUT_DIR + 'theaters.trig', t_graph)
# dataset.remove_graph(t_graph)