Ejemplo n.º 1
0
 def connect(self, dburi):
     if rdflib is None:
         raise ConnectionError('RDFLib backend is not installed')
     if dburi == 'memory':
         self._store = IOMemory()
     else:
         raise ConnectionError('Unknown database config: %s' % dburi)
def test_len_with_ctx_excluded1():
    iom = IOMemory()
    iom.add((URIRef('http://example.org/a'), URIRef('http://example.org/b'),
             URIRef('http://example.org/c')),
            context='http://example.org/ctx')
    bds = BundleDependencyStore(iom, excludes=set(['http://example.org/ctx']))
    assert 0 == bds.__len__('http://example.org/ctx')
def test_includes_triples():
    iom = IOMemory()
    iom.add((URIRef('http://example.org/a'), URIRef('http://example.org/b'),
             URIRef('http://example.org/c')),
            context='http://example.org/ctx')
    bds = BundleDependencyStore(iom)
    assert 1 == sum(1 for _ in bds.triples((None, None, None)))
def test_excludes_contexts():
    iom = IOMemory()
    iom.add((URIRef('http://example.org/a'), URIRef('http://example.org/b'),
             URIRef('http://example.org/c')),
            context='http://example.org/ctx')
    bds = BundleDependencyStore(iom, excludes=set(['http://example.org/ctx']))
    assert set([]) == set(bds.contexts())
Ejemplo n.º 5
0
    def __init__(self, configuration=None, identifier=None):
        IOMemory.__init__(self, configuration, identifier)
        self.__namespace = OrderedDict() #{}
        self.__prefix = OrderedDict() #{}

        # Mappings for encoding RDF nodes using integer keys, to save memory
        # in the indexes Note that None is always mapped to itself, to make
        # it easy to test for it in either encoded or unencoded form.
        #self.__int2obj = {None: None}  # maps integer keys to objects
        #self.__obj2int = {None: None}  # maps objects to integer keys
        self.__int2obj = OrderedDict()
        self.__int2obj[None] = None
        self.__obj2int = OrderedDict()
        self.__obj2int[None] = None
 

        # Indexes for each triple part, and a list of contexts for each triple
        self.__subjectIndex = OrderedDict() #{}    # key: sid    val: set(enctriples)
        self.__predicateIndex = OrderedDict() #{}  # key: pid    val: set(enctriples)
        self.__objectIndex = OrderedDict() #{}     # key: oid    val: set(enctriples)
        self.__tripleContexts = OrderedDict() #{}  # key: enctriple    val: {cid1: quoted, cid2: quoted ...}
        #self.__contextTriples = {None: set()}  # key: cid    val: set(enctriples)
        self.__contextTriples = OrderedDict()
        self.__contextTriples[None] = OrderedSet()

        # all contexts used in store (unencoded)
        self.__all_contexts = OrderedSet() #set()
        # default context information for triples
        self.__defaultContexts = None
Ejemplo n.º 6
0
class RDFLibTripleStore(BaseBackend):
    implements(ITripleStore, ISPARQLEndpoint)

    def __init__(self):
        super(RDFLibTripleStore, self).__init__()
        self.logger = logging.getLogger(__name__)
        self._nsmap = {}
        self._nsobjmap = {}
        self._querytext = {}

    def add_pquery(self, qidstring, querytext):
        self._pqueries[qidstring] = prepareQuery(querytext,
                                                 initNs=self._nsobjmap)
        self._querytext[qidstring] = querytext

    def connect(self, dburi):
        if rdflib is None:
            raise ConnectionError('RDFLib backend is not installed')
        if dburi == 'memory':
            self._store = IOMemory()
        else:
            raise ConnectionError('Unknown database config: %s' % dburi)

    def disconnect(self):
        pass

    def _rdflib_format(self, format):
        return {'ntriples': 'nt', 'rdfxml': 'xml', 'turtle': 'n3'}[format]

    def contexts(self):
        return [
            c._Graph__identifier.decode('utf8')
            for c in self._store.contexts()
        ]

    def _get_context(self, context_name):
        for ctxt in self._store.contexts():
            if ctxt._Graph__identifier == context_name:
                return ctxt

    def register_prefix(self, prefix, namespace):
        self._nsmap[prefix] = namespace
        self._nsobjmap[prefix] = Namespace(namespace)

    def _parse(self, graph, file, format, base_uri=None):
        try:
            graph.parse(file, base_uri, format)
        except rdflib.exceptions.ParserError, err:
            # not sure if this ever happens
            raise TripleStoreError(err)
        except Exception, err:
            # each parser throws different errors,
            # there's an ntriples error, but the rdfxml
            # parser throws no errors so you end up with
            # a saxparser exception.
            # The n3 parser just silently fails
            # without any traceback
            raise TripleStoreError(err)
Ejemplo n.º 7
0
    def _init_store(self, ctx):
        self.ctx = ctx

        if self._include_stored:
            self._store_store = RDFContextStore(ctx)
        else:
            self._store_store = None
        if self._memory_store is None:
            self._memory_store = IOMemory()
            self._init_store0(ctx)
Ejemplo n.º 8
0
    def convert_gml(self, ttl_output_file, uri_part, specific_part):
            """
            Pelagios conversion GML to TTL
            @type       ttl_output_file: string
            @param      ttl_output_file: Absolute path to TTL output file
            @type       uri_part: string
            @param      uri_part: URI for the region to be displayed (e.g. http://earkdev.ait.ac.at/earkweb/sip2aip/working_area/sip2aip/34809536-b9f8-4c51-83d1-ef365ca658f5/)
            @type       specific_part: string
            @param      specific_part: Specific part that distinguishes the URI from other URIs (e.g. 1994)
            """
            cito_ns = Namespace("http://purl.org/spar/cito")
            cnt_ns = Namespace("http://www.w3.org/2011/content#")
            dcterms_ns = Namespace("http://purl.org/dc/terms/")
            foaf_ns = Namespace("http://xmlns.com/foaf/0.1/")
            geo_ns = Namespace("http://www.w3.org/2003/01/geo/wgs84_pos#")
            geosparql_ns = Namespace("http://www.opengis.net/ont/geosparql#")
            gn_ns = Namespace("http://www.geonames.org/ontology#")
            lawd_ns = Namespace("http://lawd.info/ontology/")
            rdfs_ns = Namespace("http://www.w3.org/2000/01/rdf-schema#")
            skos_ns = Namespace("http://www.w3.org/2004/02/skos/core#")

            slovenia = URIRef("http://earkdev.ait.ac.at/earkweb/sip2aip/working_area/sip2aip/5c6f5563-7665-4719-a2b6-4356ea033c1d/#place/Slovenia")

            store = IOMemory()

            g = ConjunctiveGraph(store=store)
            g.bind("cito", cito_ns)
            g.bind("cnt", cnt_ns)
            g.bind("dcterms", dcterms_ns)
            g.bind("foaf", foaf_ns)
            g.bind("geo", geo_ns)
            g.bind("geosparql", geosparql_ns)
            g.bind("gn", gn_ns)
            g.bind("lawd", lawd_ns)
            g.bind("rdfs", rdfs_ns)
            g.bind("skos", skos_ns)

            graph_slovenian_districts = Graph(store=store, identifier=slovenia)
            gml_to_wkt = GMLtoWKT(self.gml_file)
            district_included = {}
            i = 1
            print "Processing GML file: %s" % self.gml_file
            for district_wkt in gml_to_wkt.get_wkt_linear_ring():
                techname = whsp_to_unsc(district_wkt["name"])
                print "District %d: %s" % (i, whsp_to_unsc(district_wkt["name"]))
                if techname not in district_included:
                    district = URIRef("%s#place/%s/%s" % (uri_part, whsp_to_unsc(district_wkt["name"]), specific_part))
                    graph_slovenian_districts.add((district, RDF.type, lawd_ns.Place))
                    graph_slovenian_districts.add((district, dcterms_ns['isPartOf'], slovenia))
                    graph_slovenian_districts.add((district, dcterms_ns['temporal'], Literal(str(district_wkt["year"]))))
                    graph_slovenian_districts.add((district, gn_ns['countryCode'], Literal(u'SI')))
                    graph_slovenian_districts.add((district, rdfs_ns['label'], Literal(district_wkt["name"], lang=u'si')))
                    polygons = BNode()
                    graph_slovenian_districts.add((district, geosparql_ns['hasGeometry'], polygons))
                    g.add((polygons, geosparql_ns['asWKT'], Literal(district_wkt["polygon"])))
                    district_included[techname] = True
                i += 1
            with open(ttl_output_file, 'w') as f:
                f.write(g.serialize(format='n3'))
            f.close()
Ejemplo n.º 9
0
    def create_ontology(self, tr, predicate, subClass, address, booktitle):
        LDT = Namespace("http://www.JceFinalProjectOntology.com/")
        ut = Namespace("http://www.JceFinalProjectOntology.com/subject/#")
        usubClass = URIRef("http://www.JceFinalProjectOntology.com/subject/" +
                           subClass.strip() + '#')
        #LDT.subClass=LDT[subClass]
        print(ut)
        print(usubClass)

        store = IOMemory()

        sty = LDT[predicate]
        g = rdflib.Graph(store=store, identifier=LDT)
        t = ConjunctiveGraph(store=store, identifier=ut)
        print('Triples in graph before add: ', len(t))
        #g.add((LDT,RDF.type,RDFS.Class))
        g.add((URIRef(LDT), RDF.type, RDFS.Class))
        g.add((URIRef(LDT), RDFS.label, Literal("JFPO")))
        g.add((URIRef(LDT), RDFS.comment, Literal('class of all properties')))
        for v in self.symbols.values():
            if self.if_compoTerm(v) == True:
                vs = self.splitTerms(v)[0]
            else:
                vs = v
            g.add((LDT[vs], RDF.type, RDF.Property))
            g.add((LDT[vs], RDFS.label, Literal('has' + vs)))
            g.add((LDT[vs], RDFS.comment, Literal(v)))
            g.add((LDT[vs], RDFS.range, OWL.Class))
            g.add((LDT[vs], RDFS.domain, Literal(vs)))
        g.bind('JFPO', LDT)
        #g.commit()
        g.serialize('trtst.rdf', format='turtle')

        t.add((ut[tr], RDF.type, OWL.Class))
        t.add((ut[tr], RDFS.subClassOf, OWL.Thing))
        t.add((ut[tr], RDFS.label, Literal(tr)))
        t.add((ut[tr], DC.title, Literal(booktitle)))
        t.add((ut[tr], DC.source, Literal(address)))

        t.add((ut[tr], DC[predicate], URIRef(usubClass)))
        t.add((ut[tr], LDT[predicate], RDF.Property))

        t.add((ut[tr], DC[predicate], URIRef(usubClass)))

        t.add((ut[tr], DC[predicate], URIRef(usubClass)))
        relation = 'has' + predicate
        t.add((ut[tr], LDT.term(predicate), URIRef(usubClass)))

        t.add((usubClass, RDF.type, OWL.Class))
        t.add((usubClass, RDFS.subClassOf, OWL.Thing))
        t.add((usubClass, RDFS.subClassOf, URIRef(sty)))
        t.add((usubClass, RDFS.label, Literal(subClass)))

        #tc=Graph(store=store,identifier=usubClass)
        t.bind("dc", "http://http://purl.org/dc/elements/1.1/")
        t.bind('JFPO', LDT)
        t.commit()
        #print(t.serialize(format='pretty-xml'))

        t.serialize('test2.owl', format='turtle')
Ejemplo n.º 10
0
 def __init__(self, id: str) -> None:
     self.store = IOMemory()
     self.g = rdflib.Graph(store=self.store)
     # default ontology
     self.namespaces = {'owl': 'http://www.w3.org/2002/07/owl#'}
     self.reversed_namespaces = {}
     self.id: str = id
Ejemplo n.º 11
0
def graph() -> rdflib.ConjunctiveGraph:
    store = IOMemory()

    universe = rdflib.ConjunctiveGraph(store=store)
    universe.bind("iolanta", "https://iolanta.tech/")

    return universe
Ejemplo n.º 12
0
def get_all_obs():
	query= "SELECT distinct ?a ?c where {\
		 ?a <http://www.w3.org/1999/02/22-rdf-syntax-ns%23type> <http://purl.oclc.org/NET/ssnx/ssn%23Sensor> . \
		 ?b <http://purl.oclc.org/NET/ssnx/ssn%23observedBy> ?a . \
		 ?b <http://purl.oclc.org/NET/ssnx/ssn%23observedProperty> ?c . }"

	url = ENDPOINT_URL + query + "&output=json"

	print "Resolving URL..."
	json = simplejson.load(urllib.urlopen(url))
	print "Done!"
	#f = open("allprop.n3", 'a')
	store = IOMemory()
	gpr = Graph(store=store)

	#Add data to the graph


	for st in json['results']['bindings']:
		#f.write('<' + st['a']['value'] + '> <http://purl.oclc.org/NET/ssnx/ssn#observes> "' + st['c']['value'] + '" . \n')
		gpr.add((st['a']['value'], 'http://purl.oclc.org/NET/ssnx/ssn#observes', st['c']['value']))
		suprint st['a']['value']

	print "Inserting graph in SPARQL endpoint..."
	insertGraph(g=gpr, sparql=VIRTUOSO_URL, resourceuri=RESOURCE_URI)
	print "Done!"
	print "Finish!"
Ejemplo n.º 13
0
def properties_rdf_generator():
    for prop in properties:
        observedprop, obsunit, typeofuri, xsdclass, uriprefix = prop.values(
        )[0]

        if observedprop:
            #.../station/ZORRO2/NO2/15022011/10
            uri = RESOURCE_URI + 'prop/' + uriprefix

            #Initialization of graph
            ssn = Namespace("http://purl.oclc.org/NET/ssnx/ssn#")
            dc = Namespace("http://purl.org/dc/elements/1.1/")
            owl = Namespace("http://www.w3.org/2002/07/owl#")

            store = IOMemory()

            g = ConjunctiveGraph(store=store)
            g.bind("ssn", ssn)
            g.bind("dc", dc)
            g.bind("owl", owl)

            cpr = URIRef(uri)
            gpr = Graph(store=store, identifier=cpr)

            #Add data to the graph
            gpr.add((cpr, dc['description'], prop.keys()[0]))
            if typeofuri == 'Class':
                gpr.add((cpr, RDF.type, URIRef(observedprop)))
            else:
                gpr.add((cpr, owl["sameAs"], URIRef(observedprop)))

            gpr.add((cpr, RDF.type, ssn['Property']))

            #print g.serialize(format="pretty-xml")
            insertGraph(g=gpr, sparql=VIRTUOSO_URL, resourceuri=RESOURCE_URI)
Ejemplo n.º 14
0
 def render(self,
            data1,
            media_type=None,
            renderer_context=None,
            format_1=None,
            binary=False,
            store=False,
            named_graph=None):
     if type(data1) == dict:
         data1 = [data1]
     if format_1 is not None:
         self.format = format_1
     cidoc = Namespace("http://www.cidoc-crm.org/cidoc-crm/")
     geo = Namespace("http://www.opengis.net/ont/geosparql#")
     if not store:
         store = IOMemory()
     if named_graph:
         uri_entities = URIRef(named_graph)
     else:
         uri_entities = URIRef(f'{base_uri}/entities#')
     g = Graph(store, identifier=uri_entities)
     g.bind('cidoc', cidoc, override=False)
     g.bind('geo', geo, override=False)
     g.bind('owl', OWL, override=False)
     ns = {'cidoc': cidoc, 'geo': geo}
     if type(data1) == list:
         for data in data1:
             g, ent = self.ent_func[data['entity_type']](g,
                                                         ns,
                                                         data,
                                                         drill_down=True)
     elif type(data1) == str:
         directory = os.fsencode(data1)
         for fn in os.listdir(directory):
             with open(os.path.join(directory, fn), 'rb') as inf:
                 data2 = pickle.load(inf)
                 for data in data2:
                     g, ent = self.ent_func[data['entity_type']](
                         g, ns, data, drill_down=True)
     g_prov = Graph(
         store,
         identifier=URIRef('https://omnipot.acdh.oeaw.ac.at/provenance'))
     g_prov.bind('dct', DCTERMS, override=False)
     g_prov.bind('void', VOID, override=False)
     g_prov.add((uri_entities, DCTERMS.title,
                 Literal(PROJECT_METADATA['title'], lang=lang)))
     g_prov.add((uri_entities, DCTERMS.description,
                 Literal(PROJECT_METADATA['description'], lang=lang)))
     g_prov.add((uri_entities, DCTERMS.creator,
                 Literal(PROJECT_METADATA['author'], lang=lang)))
     g_prov.add(
         (uri_entities, DCTERMS.publisher, Literal('ACDH-OeAW', lang=lang)))
     g_prov.add((uri_entities, DCTERMS.source, URIRef(base_uri)))
     g_prov.add((uri_entities, DCTERMS.created,
                 Literal(str(date.today()), datatype=XSD.date)))
     g_prov, g = generateVoID(g, dataset=uri_entities, res=g_prov)
     g_all = ConjunctiveGraph(store=store)
     if binary:
         return g_all, store
     return g_all.serialize(format=self.format.split('+')[-1]), store
Ejemplo n.º 15
0
def test_issue682_signing_named_graphs():
    ns = Namespace("http://love.com#")

    mary = BNode()
    john = URIRef("http://love.com/lovers/john#")

    cmary=URIRef("http://love.com/lovers/mary#")
    cjohn=URIRef("http://love.com/lovers/john#")

    store = IOMemory()

    g = ConjunctiveGraph(store=store)
    g.bind("love",ns)

    gmary = Graph(store=store, identifier=cmary)

    gmary.add((mary, ns['hasName'], Literal("Mary")))
    gmary.add((mary, ns['loves'], john))

    gjohn = Graph(store=store, identifier=cjohn)
    gjohn.add((john, ns['hasName'], Literal("John")))

    ig = to_isomorphic(g)
    igmary = to_isomorphic(gmary)

    assert len(igmary) == len(gmary)
    assert len(ig) == len(g)
    assert len(igmary) < len(ig)
    assert ig.graph_digest() != igmary.graph_digest()
Ejemplo n.º 16
0
    def _init_store(self, ctx):
        self.ctx = ctx

        if self._include_stored:
            self._store_store = RDFContextStore(ctx)
        else:
            self._store_store = None
        if self._memory_store is None:
            self._memory_store = IOMemory()
            self._init_store0(ctx)
Ejemplo n.º 17
0
    def __init__(self, src=None, name=None):
        # TODO: What about the 'name' option? How to treat it, in case src
        # provides a name already? For now use it only if src==None.
        # type(src) == Collection => copy has its own 'name'?
        # type(src) == Backend => rename in backend?

        super(Collection, self).__init__()

        if isinstance(src, Collection):
            self._backend = None
            # TODO: confirm this is correct behaviour and document it.
            # Means, it is a pure runtime copy with no persistence and no
            # update from backend.

            self.update(src)
            self.store = IOMemory()
            for graph in src.store.contexts():
                self.store.add_graph(graph)
                if graph.identifier == Literal(src.name):
                    self.meta = graph
                else:
                    self[str(graph.identifier)].meta = graph

            self.conjunctive_graph = ConjunctiveGraph(store=self.store)

        elif isinstance(src, CollectionBackend):
            self._backend = src
            self.store = None
            # TODO: check for existence in reload() fails otherwise;
            # If it turns out, that reload is never required outside of
            # constructor, that check isn't needed!

            self._reload()
        elif src is None:
            self._backend = None
            self.store = IOMemory()
            self.meta = Graph(store=self.store, identifier=Literal(name))
            self.meta.add((DLNS.this, RDF.type, DLNS.Collection))
            self.conjunctive_graph = ConjunctiveGraph(store=self.store)

        else:
            lgr.error("Unknown source for Collection(): %s" % type(src))
            raise TypeError('Unknown source for Collection(): %s' % type(src))
def test_len_some_excludes():
    iom = IOMemory()
    iom.add((URIRef('http://example.org/a'), URIRef('http://example.org/b'),
             URIRef('http://example.org/c')),
            context='http://example.org/ctx1')
    iom.add((URIRef('http://example.org/a'), URIRef('http://example.org/b'),
             URIRef('http://example.org/c')),
            context='http://example.org/ctx2')
    iom.add((URIRef('http://example.org/a'), URIRef('http://example.org/b'),
             URIRef('http://example.org/c')),
            context='http://example.org/ctx3')
    bds = BundleDependencyStore(iom, excludes=set(['http://example.org/ctx3']))
    assert 1 == len(bds)
Ejemplo n.º 19
0
    def __init__(self, src=None, name=None):
        super(MetaCollection, self).__init__()

        self.name = name
        self.store = IOMemory()

        if isinstance(src, MetaCollection):
            self.update(src)
            self.name = src.name
            # TODO: See Collection: How to treat names in case of a copy?

        elif isinstance(src, list):
            for item in src:
                if isinstance(item, Collection):
                    self[str(item.name)] = item
                elif isinstance(item, CollectionBackend):
                    new_item = Collection(src=item)
                    self[str(new_item.name)] = new_item
                else:
                    e_msg = "Can't retrieve collection from %s." % type(item)
                    lgr.error(e_msg)
                    raise TypeError(e_msg)

        elif isinstance(src, dict):
            for key in src:
                if isinstance(src[key], Collection):
                    self[key] = src[key]
                elif isinstance(src[key], CollectionBackend):
                    self[key] = Collection(src=src[key])
                else:
                    e_msg = "Can't retrieve collection from %s." % \
                            type(src[key])
                    lgr.error(e_msg)
                    raise TypeError(e_msg)

        elif src is None:
            pass
        else:
            e_msg = "Invalid source type for MetaCollection: %s" % type(src)
            lgr.error(e_msg)
            raise TypeError(e_msg)

        # join the stores:
        for collection in self:
            for graph in self[collection].store.contexts():
                self.store.add_graph(graph)
                # TODO: Note: Removed all the copying of the graphs and correcting
                # their references, since we now always use
                # 'collection/branch/handle' as key. But: Implementation of
                # this changed behaviour is not well tested yet.

        self.conjunctive_graph = ConjunctiveGraph(store=self.store)
 def rdflib_graph(self):
     g = GraphShard._graph_cache.get(self._parsed_memcache_key(), GraphShard._not_found)
     if g is not GraphShard._not_found:
         return g
     g = memcache.get(self._parsed_memcache_key())
     if g is not None:
         GraphShard._graph_cache[self._parsed_memcache_key()] = g
         return g
     g = Graph(store = IOMemory())
     g.parse(data = self.graph_n3, format='n3')
     GraphShard._graph_cache[self._parsed_memcache_key()] = g
     memcache.add(self._parsed_memcache_key(), g, 86400)
     return g
def test_excludes_all_for_excluded_context():
    iom = IOMemory()
    iom.add((URIRef('http://example.org/a'), URIRef('http://example.org/b'),
             URIRef('http://example.org/c')),
            context='http://example.org/ctx')
    iom.add((URIRef('http://example.org/a'), URIRef('http://example.org/b'),
             URIRef('http://example.org/c')),
            context='http://example.org/ctx1')
    bds = BundleDependencyStore(iom, excludes=set(['http://example.org/ctx']))
    assert 0 == sum(1 for _ in bds.triples((None, None, None),
                                           context='http://example.org/ctx'))
Ejemplo n.º 22
0
    def _reload(self):
        # TODO: When do we need to reload outside of the constructor?
        # May be override self.update() to additionally reload in case
        # there is a backend.

        if not self._backend:
            # TODO: Error or warning? Depends on when we want to call this one.
            # By now this should be an error (or even an exception).
            lgr.error("Missing collection backend.")
            return

        # get the handles as instances of class Handle:
        self.update(self._backend.get_handles())

        # get collection level data:
        collection_data = self._backend.get_collection()

        # TODO: May be a backend can just pass a newly created store containing
        # all the needed graphs. Would save us time and space for copy, but
        # seems to be less flexible in case we find another way to store a set
        # of named graphs and their conjunctive graph without the need of every
        # collection to have its own store.
        # Note: By using store.add() there seems to be no copy at all.
        # Need to check in detail, how this is stored and whether it still
        # works as intended.
        # Note 2: Definitely not a copy and seems to work. Need more queries to
        # check.

        # cleanup old store, if exists
        if self.store is not None:
            self.store.gc()
            del self.store
            gc.collect()
        # create new store for the graphs:
        self.store = IOMemory()

        # add collection's own graph:
        self.store.add_graph(collection_data)
        self.meta = collection_data

        # add handles' graphs:
        for handle in self:
            self.store.add_graph(self[handle].meta)

        # reference to the conjunctive graph to be queried:
        self.conjunctive_graph = ConjunctiveGraph(store=self.store)
def test_triples_contexts():
    iom = IOMemory()
    ctx = 'http://example.org/ctx'
    ctx1 = 'http://example.org/ctx1'
    iom.add((URIRef('http://example.org/a'), URIRef('http://example.org/b'),
             URIRef('http://example.org/c')),
            context=ctx)
    iom.add((URIRef('http://example.org/a'), URIRef('http://example.org/b'),
             URIRef('http://example.org/c')),
            context=ctx1)
    bds = BundleDependencyStore(iom)
    for t, ctxs in bds.triples((None, None, None)):
        assert set([ctx, ctx1]) == set(ctxs)
def test_triples_choices_excluded():
    iom = IOMemory()
    iom.add((URIRef('http://example.org/a'), URIRef('http://example.org/b'),
             URIRef('http://example.org/c')),
            context='http://example.org/ctx')
    iom.add((URIRef('http://example.org/e'), URIRef('http://example.org/b'),
             URIRef('http://example.org/d')),
            context='http://example.org/ctx')
    bds = BundleDependencyStore(iom, excludes=set(['http://example.org/ctx']))
    assert set() == set(
        bds.triples_choices(
            (None, None,
             [URIRef('http://example.org/c'),
              URIRef('http://example.org/d')])))
Ejemplo n.º 25
0
def updateObservationProps(stationcod, typeslist):
    for obstype in typeslist:
        uri = RESOURCE_URI + 'station/' + stationcod

        #Initialization of the graph
        ssn = Namespace("http://purl.oclc.org/NET/ssnx/ssn#")

        store = IOMemory()

        g = ConjunctiveGraph(store=store)
        g.bind("ssn", ssn)

        cpr = URIRef(uri)
        gpr = Graph(store=store, identifier=cpr)

        #Add data to the graph
        gpr.add((cpr, RDF.type, ssn['Sensor']))
        gpr.add((cpr, ssn['observes'], RESOURCE_URI + 'prop/' + obstype))

        #Update RDF
        print uri + ' | ' + obstype
        insertGraph(g=gpr, sparql=VIRTUOSO_URL, resourceuri=RESOURCE_URI)
Ejemplo n.º 26
0
    def __init__(self):

        store = IOMemory()

        self.g = ConjunctiveGraph(store=store)

        self.g.bind("lada",ns_lada)
        self.g.bind('data', ns_data)
        self.g.bind('cube', ns_cube)
        self.g.bind('qb', ns_cube)
        self.g.bind('lcd', ns_lcd)
        self.g.bind('xsd', ns_xsd)
        self.g.bind('qb4cc', ns_qb4cc)
        self.g.bind('skos', ns_skos)

        self.initNs = {
            'lada': ns_lada,
            'data': ns_data,
            'qb': ns_cube,
            'lcd': ns_lcd,
            'xsd': ns_xsd,
            'qb4cc': ns_qb4cc,
            'skos': ns_skos
        }
Ejemplo n.º 27
0
def _setup_global_graph():
    """Lazy creation of the graph used to store events by events generators, and read by CGI scripts. """
    global _store_input
    global _events_conjunctive_graph

    if _store_input is None:
        if _events_conjunctive_graph is not None:
            raise Exception("_events_conjunctive_graph should be None")

        if not isinstance(_events_storage_style, tuple):
            raise Exception("Wrong type for _events_storage_style")
        if _events_storage_style[0] == "IOMemory":
            _store_input = IOMemory()
            _events_conjunctive_graph = rdflib.ConjunctiveGraph(
                store=_store_input)
        elif _events_storage_style[0] == "SQLAlchemy":
            # How to install rdflib-sqlalchemy
            # pip install rdflib-sqlalchemy
            #
            # py -2.7 -m pip install rdflib-sqlalchemy
            #       from glob import glob
            #   ImportError: No module named glob
            #
            # py -3.6 -m pip install rdflib-sqlalchemy
            # OK

            sqlite_ident = rdflib.URIRef("rdflib_survol")

            sqlite_path = _events_storage_style[1]
            # This path might contain environment variables.
            sqlite_path_expanded = os.path.expandvars(sqlite_path)
            sqlite_uri = rdflib.Literal(sqlite_path_expanded)

            _store_input = rdflib.plugin.get(
                "SQLAlchemy", rdflib.store.Store)(identifier=sqlite_ident)
            _events_conjunctive_graph = rdflib.ConjunctiveGraph(
                _store_input, identifier=sqlite_ident)
            try:
                # _events_conjunctive_graph.open(sqlite_uri, create=True)

                # Open previously created store, or create it if it doesn't exist yet
                _log_db_access("_setup_global_graph", "O", "1", sqlite_uri)
                rt = _events_conjunctive_graph.open(sqlite_uri, create=False)
            except Exception as exc:
                logging.error("sqlite_uri=%s. Exception=%s", sqlite_uri, exc)
                logging.error("Trace=%s" % traceback.format_exc())
                logging.error("Stack=%s" % traceback.format_stack())

                # According to the documentation, it should rather return this value instead of throwing.
                rt = rdflib.store.NO_STORE

            try:
                if rt == rdflib.store.NO_STORE:
                    # There is no underlying SQLAlchemy infrastructure, create it
                    _log_db_access("_setup_global_graph", "C", "2", sqlite_uri)
                    _events_conjunctive_graph.open(sqlite_uri, create=True)
                elif rt != rdflib.store.VALID_STORE:
                    raise Exception("sqlite_uri=%s rt=%d" % (sqlite_uri, rt))

            except Exception as exc:
                raise Exception("sqlite_uri=%s.Exception=%s" %
                                (sqlite_uri, exc))

        else:
            raise Exception("Unknown storage style:" +
                            str(_events_storage_style))

    _check_globals("_setup_global_graph")

    return _events_conjunctive_graph
Ejemplo n.º 28
0
"""

from rdflib import Namespace, Literal, URIRef
from rdflib.graph import Graph, ConjunctiveGraph
from rdflib.plugins.memory import IOMemory

if __name__ == '__main__':
    ns = Namespace("http://love.com#"
                   )  #Cogemos el espacio de nombres con el que trabajaremos
    mary = URIRef(
        "http://love.com/lovers/mary#"
    )  #Obtenemos la referencia del sujeto con el que vamos a trabajar
    john = URIRef("http://love.com/lovers/john#")
    cmary = URIRef("http://love.com/lovers/mary#")
    cjohn = URIRef("http://love.com/lovers/john#")
    store = IOMemory()  #Reservamos memoria para nuestro grafo
    g = ConjunctiveGraph(
        store=store
    )  #Creamos un nuevo grafo de conjuntos donde iremos almacenando los grafos de cada sujeto
    g.bind("love", ns)  #Liga a la etiqueta love el espacio de nombre ns
    gmary = Graph(
        store=store, identifier=cmary
    )  #Creamos un grafo para Mary para así almanenar sus propiedades
    gmary.add((mary, ns['hasName'], Literal("Mary")))
    gmary.add((mary, ns['loves'], john))
    gjohn = Graph(store=store, identifier=cjohn)
    gjohn.add((john, ns['hasName'], Literal("John")))

    #Una vez creados los grafos tanto para Mary como para John Mostraremos el contenido

    print('#Contenido del grafo de Conjuntos')
Ejemplo n.º 29
0
class Collection(dict):
    """A collection of handles.

    Runtime representation of a collection's metadata. This is independent on
    its physical representation and therefore uses any CollectionBackend to set
    and/or retrieve the data.

    A Collection is a dictionary, which's keys are the handles' names.
    The values are Handle instances representing the metadata of these handles.
    Additionally, a Collection has attributes to store data about the
    collection itself:

    Attributes of a collection:
    name:               str
    store:              IOMemory
    meta:               (named) Graph
    conjunctive_graph:  ConjunctiveGraph

    To represent the metadata, Collections use a named graph per handle and an
    additional named graph for collection level metadata. These graphs can be
    queried via the collection's graph store and its corresponding conjunctive
    graph.
    """

    def __init__(self, src=None, name=None):
        # TODO: What about the 'name' option? How to treat it, in case src
        # provides a name already? For now use it only if src==None.
        # type(src) == Collection => copy has its own 'name'?
        # type(src) == Backend => rename in backend?

        super(Collection, self).__init__()

        if isinstance(src, Collection):
            self._backend = None
            # TODO: confirm this is correct behaviour and document it.
            # Means, it is a pure runtime copy with no persistence and no
            # update from backend.

            self.update(src)
            self.store = IOMemory()
            for graph in src.store.contexts():
                self.store.add_graph(graph)
                if graph.identifier == Literal(src.name):
                    self.meta = graph
                else:
                    self[str(graph.identifier)].meta = graph

            self.conjunctive_graph = ConjunctiveGraph(store=self.store)

        elif isinstance(src, CollectionBackend):
            self._backend = src
            self.store = None
            # TODO: check for existence in reload() fails otherwise;
            # If it turns out, that reload is never required outside of
            # constructor, that check isn't needed!

            self._reload()
        elif src is None:
            self._backend = None
            self.store = IOMemory()
            self.meta = Graph(store=self.store, identifier=Literal(name))
            self.meta.add((DLNS.this, RDF.type, DLNS.Collection))
            self.conjunctive_graph = ConjunctiveGraph(store=self.store)

        else:
            lgr.error("Unknown source for Collection(): %s" % type(src))
            raise TypeError('Unknown source for Collection(): %s' % type(src))

    @property
    def name(self):
        return str(self.meta.identifier)

    @property
    def url(self):
        return self._backend.url

    def __delitem__(self, key):

        lgr.error("__delitem__ called.")
        self_uri = self.meta.value(predicate=RDF.type, object=DLNS.Collection)
        key_uri = self[key].meta.value(predicate=RDF.type, object=DLNS.Handle)
        self.meta.remove((self_uri, DCTERMS.hasPart, key_uri))
        self.store.remove_graph(self[key].name)
        super(Collection, self).__delitem__(key)

    def __setitem__(self, key, value):
        if not isinstance(value, Handle):
            raise TypeError("Can't add non-Handle object to a collection.")

        super(Collection, self).__setitem__(key, value)
        self_uri = self.meta.value(predicate=RDF.type, object=DLNS.Collection)
        key_uri = self[key].meta.value(predicate=RDF.type, object=DLNS.Handle)
        self.meta.add((self_uri, DCTERMS.hasPart, key_uri))
        self.store.add_graph(self[key].meta)

    def _reload(self):
        # TODO: When do we need to reload outside of the constructor?
        # May be override self.update() to additionally reload in case
        # there is a backend.

        if not self._backend:
            # TODO: Error or warning? Depends on when we want to call this one.
            # By now this should be an error (or even an exception).
            lgr.error("Missing collection backend.")
            return

        # get the handles as instances of class Handle:
        self.update(self._backend.get_handles())

        # get collection level data:
        collection_data = self._backend.get_collection()

        # TODO: May be a backend can just pass a newly created store containing
        # all the needed graphs. Would save us time and space for copy, but
        # seems to be less flexible in case we find another way to store a set
        # of named graphs and their conjunctive graph without the need of every
        # collection to have its own store.
        # Note: By using store.add() there seems to be no copy at all.
        # Need to check in detail, how this is stored and whether it still
        # works as intended.
        # Note 2: Definitely not a copy and seems to work. Need more queries to
        # check.

        # cleanup old store, if exists
        if self.store is not None:
            self.store.gc()
            del self.store
            gc.collect()
        # create new store for the graphs:
        self.store = IOMemory()

        # add collection's own graph:
        self.store.add_graph(collection_data)
        self.meta = collection_data

        # add handles' graphs:
        for handle in self:
            self.store.add_graph(self[handle].meta)

        # reference to the conjunctive graph to be queried:
        self.conjunctive_graph = ConjunctiveGraph(store=self.store)

    def query(self):
        # Note: As long as we use general SPARQL-Queries, no method is needed,
        # since this is a method of rdflib.Graph/rdflib.Store.
        # But we will need some kind of prepared queries here.
        # Also depends on the implementation of the 'ontology translation layer'
        pass

    def commit(self, msg="Collection updated."):

        if not self._backend:
            lgr.error("Missing collection backend.")
            raise RuntimeError("Missing collection backend.")

        self._backend.commit_collection(self, msg)
Ejemplo n.º 30
0
    def __init__(self, ont: Ontology, tbl: DataTable,
                 kr2rml_file: Path) -> None:
        g = rdflib.Graph(store=IOMemory())
        g.parse(location=str(kr2rml_file), format="n3")

        worksheet_history = list(
            g.triples(
                (None,
                 URIRef(
                     "http://isi.edu/integration/karma/dev#hasWorksheetHistory"
                 ), None)))
        assert len(worksheet_history) == 1
        worksheet_history = ujson.loads(worksheet_history[0][-1])

        input_columns = list(
            g.triples((
                None,
                URIRef("http://isi.edu/integration/karma/dev#hasInputColumns"),
                None)))
        assert len(input_columns) == 1
        input_columns = ujson.loads(input_columns[0][-1])

        # construct mapping between kr2rml attribute paths to tbl_attr_paths
        tbl_attr_paths = tbl.schema.get_attr_paths()
        n_attr_paths = len(tbl_attr_paths)
        tbl_attr_paths = {
            apath.replace("@", ""): apath
            for apath in tbl_attr_paths
        }
        assert len(tbl_attr_paths) == n_attr_paths

        start_idx = 0
        for i, cname in enumerate(input_columns[0]):
            cpath = Schema.PATH_DELIMITER.join(
                cname['columnName'] for cname in input_columns[0][i:])
            # cname = Schema.PATH_DELIMITERinput_columns[i:]) cname['columnName'] + Schema.PATH_DELIMITER
            found_attr = False
            for attr_path in tbl_attr_paths:
                if (attr_path + Schema.PATH_DELIMITER).startswith(cpath):
                    found_attr = True
                    break
            if found_attr:
                start_idx = i
                break

        literal_nodes = {}
        col2col = {}
        for col in input_columns:
            attr_path = Schema.PATH_DELIMITER.join(
                cname['columnName'] for cname in col[start_idx:])
            if attr_path not in tbl_attr_paths:
                attr_path = Schema.PATH_DELIMITER.join(
                    cname['columnName'] for cname in col[start_idx:-1])
                if col[-1]['columnName'] == 'Values':
                    assert attr_path in tbl_attr_paths
                elif col[-1]['columnName'] == 'content':
                    attr_path += Schema.PATH_DELIMITER + "#text"
                    assert attr_path in tbl_attr_paths
                else:
                    raise ValueError(
                        f"Invalid column type: {col[-1]['columnName']}")

            col2col[Schema.PATH_DELIMITER.join(
                cname['columnName']
                for cname in col)] = tbl_attr_paths[attr_path]
        assert len(set(
            col2col.values())) == len(input_columns), "No duplication"

        # extracting commands
        commands = []
        for command in worksheet_history:
            if command['commandName'] == "SubmitPythonTransformationCommand":
                cmd_start_col = command['inputParameters'][0]
                cmd_input_parent_col = Schema.PATH_DELIMITER.join(
                    [col['columnName'] for col in cmd_start_col['value'][:-1]])
                cmd_input_col = command['inputParameters'][-2]
                cmd_output_col = command['inputParameters'][-1]

                if command['inputParameters'][-3]['name'] == 'isJSONOutput':
                    cmd_code = command['inputParameters'][-5]
                    default_error_value = command['inputParameters'][-4]
                    assert command['inputParameters'][-3]['value'] == "false"
                else:
                    default_error_value = command['inputParameters'][-3]
                    cmd_code = command['inputParameters'][-4]

                assert cmd_input_col['name'] == "inputColumns" and cmd_output_col[
                    "name"] == "outputColumns" and cmd_code[
                        'name'] == 'transformationCode' and default_error_value[
                            'name'] == 'errorDefaultValue'
                cmd_input_cols = [[
                    cname['columnName'] for cname in o['value']
                ] for o in ujson.loads(cmd_input_col['value'])]
                karma_input_attr_paths = [
                    col2col[Schema.PATH_DELIMITER.join(cmd_input_col)]
                    for cmd_input_col in cmd_input_cols
                ]

                # update col2col because of new columns
                new_attr_name = ujson.loads(
                    cmd_output_col['value'])[0]['value'][-1]['columnName']
                new_attr_path = new_attr_name if cmd_input_parent_col == "" else (
                    cmd_input_parent_col + Schema.PATH_DELIMITER +
                    new_attr_name)
                cmd_output_col = Schema.PATH_DELIMITER.join(
                    cname['columnName'] for cname in ujson.loads(
                        cmd_output_col['value'])[0]['value'])
                col2col[cmd_output_col] = new_attr_path

                cmd_code = cmd_code['value'].replace("return ",
                                                     "__return__ = ")
                input_attr_paths = []
                for match in reversed(
                        list(re.finditer("getValue\(([^)]+)\)", cmd_code))):
                    start, end = match.span(1)
                    field = cmd_code[start:end].replace("'", "").replace(
                        '"""', "").replace('"', '')
                    # it seems that Karma use last column name, we need to recover full name
                    # using the provided input first
                    for cmd_input_col, input_attr_path in zip(
                            cmd_input_cols, karma_input_attr_paths):
                        if field == cmd_input_col[-1]:
                            field = input_attr_path
                            break
                    else:
                        # otherwise construct from the start columns
                        full_field = field if cmd_input_parent_col == "" else (
                            cmd_input_parent_col + Schema.PATH_DELIMITER +
                            field)
                        field = col2col[full_field]
                    cmd_code = cmd_code[:start] + f'"{field}"' + cmd_code[end:]

                    input_attr_paths.append(field)

                default_error_value = default_error_value['value']
                commands.append(
                    PyTransformNewColumnCmd(input_attr_paths, new_attr_name,
                                            cmd_code, default_error_value))
            elif command["commandName"] == "SetSemanticTypeCommand" or command[
                    "commandName"] == "SetMetaPropertyCommand":
                cmd_input_col = command['inputParameters'][-2]
                if command["inputParameters"][-5][
                        'name'] == 'SemanticTypesArray':
                    cmd_stype = command['inputParameters'][-5]
                else:
                    cmd_stype = command['inputParameters'][-6]

                if cmd_stype['name'] == 'SemanticTypesArray':
                    assert cmd_input_col['name'] == "inputColumns" and len(
                        cmd_stype['value']
                    ) == 1 and cmd_stype['value'][0]['isPrimary']
                    cmd_input_col = col2col[Schema.PATH_DELIMITER.join(
                        cname['columnName'] for cname in ujson.loads(
                            cmd_input_col['value'])[0]['value'])]
                    cmd_stype = cmd_stype['value'][0]

                    commands.append(
                        SetSemanticTypeCmd(
                            cmd_input_col,
                            domain=ont.simplify_uri(cmd_stype['DomainUri']),
                            type=ont.simplify_uri(cmd_stype['FullType']),
                            node_id=ont.simplify_uri(
                                cmd_stype['DomainId'].replace(" (add)", ""))))
                else:
                    cmd_stype_domain = command['inputParameters'][-7]
                    cmd_stype_id = command['inputParameters'][-6]
                    assert cmd_input_col['name'] == "inputColumns" and cmd_stype_domain['name'] == 'metaPropertyUri' \
                           and cmd_stype_id['name'] == 'metaPropertyId'
                    cmd_input_col = col2col[Schema.PATH_DELIMITER.join(
                        cname['columnName'] for cname in ujson.loads(
                            cmd_input_col['value'])[0]['value'])]

                    commands.append(
                        SetSemanticTypeCmd(
                            cmd_input_col,
                            domain=ont.simplify_uri(cmd_stype_domain['value']),
                            type="karma:classLink",
                            node_id=ont.simplify_uri(cmd_stype_id['value'])))
            elif command['commandName'] == 'UnassignSemanticTypeCommand':
                cmd_input_col = command['inputParameters'][-2]
                assert cmd_input_col['name'] == "inputColumns"
                cmd_input_col = col2col[Schema.PATH_DELIMITER.join(
                    cname['columnName'] for cname in ujson.loads(
                        cmd_input_col['value'])[0]['value'])]

                delete_cmds = []
                for i, cmd in enumerate(commands):
                    if isinstance(cmd, SetSemanticTypeCmd
                                  ) and cmd.input_attr_path == cmd_input_col:
                        delete_cmds.append(i)

                for i in reversed(delete_cmds):
                    commands.pop(i)
            elif command["commandName"] == "ChangeInternalNodeLinksCommand":
                cmd_edges = command['inputParameters'][-3]
                assert cmd_edges['name'] == 'newEdges'
                # cmd_initial_edges = command['inputParameters'][-4]
                # if cmd_initial_edges['name'] == 'initialEdges' and len(cmd_initial_edges['value']) > 0:
                #     delete_cmds = []
                #     for cmd_edge in cmd_initial_edges['value']:
                #         edge_lbl = ont.simplify_uri(cmd_edge['edgeId'])
                #         source_id = ont.simplify_uri(cmd_edge['edgeSourceId'])
                #
                #         if cmd_edge['edgeTargetId'] in literal_nodes:
                #             for i, cmd in enumerate(commands):
                #                 if isinstance(cmd, SetSemanticTypeCmd) and cmd.type == edge_lbl and cmd.node_id == source_id:
                #                         delete_cmds.append(i)
                #         else:
                #             target_id = ont.simplify_uri(cmd_edge['edgeTargetId'])
                #             for i, cmd in enumerate(commands):
                #                 if isinstance(cmd, SetInternalLinkCmd) and cmd.link_lbl == edge_lbl and cmd.target_id == target_id and cmd.source_id == source_id:
                #                     delete_cmds.append(i)
                #
                #     for idx in sorted(delete_cmds, reverse=True):
                #         commands.pop(idx)

                for cmd_edge in cmd_edges['value']:
                    source_uri = cmd_edge.get('edgeSourceUri', None)
                    target_uri = cmd_edge.get('edgeTargetUri', None)

                    if source_uri is not None and source_uri != cmd_edge[
                            'edgeSourceId']:
                        source_uri = ont.simplify_uri(source_uri)
                    else:
                        source_uri = None

                    if target_uri is not None and target_uri != cmd_edge[
                            'edgeTargetId']:
                        target_uri = ont.simplify_uri(target_uri)
                    else:
                        target_uri = None

                    if cmd_edge['edgeTargetId'] in literal_nodes:
                        # convert this command to SetSemanticType
                        commands.append(
                            SetSemanticTypeCmd(
                                literal_nodes[cmd_edge['edgeTargetId']],
                                domain=ont.simplify_uri(source_uri),
                                type=ont.simplify_uri(cmd_edge['edgeId']),
                                node_id=ont.simplify_uri(
                                    cmd_edge['edgeSourceId'])))
                    else:
                        commands.append(
                            SetInternalLinkCmd(
                                ont.simplify_uri(cmd_edge['edgeSourceId']),
                                ont.simplify_uri(cmd_edge['edgeTargetId']),
                                ont.simplify_uri(cmd_edge['edgeId']),
                                source_uri, target_uri))
            elif command['commandName'] == "AddLinkCommand":
                cmd_edges = command['inputParameters'][-3]
                assert cmd_edges['name'] == 'edge'
                cmd_edge = cmd_edges['value']
                source_uri = cmd_edge.get('edgeSourceUri', None)
                target_uri = cmd_edge.get('edgeTargetUri', None)
                if source_uri is not None:
                    source_uri = ont.simplify_uri(source_uri)
                else:
                    source_uri = None

                if cmd_edge['edgeTargetId'] in literal_nodes:
                    # convert this command to SetSemanticType
                    commands.append(
                        SetSemanticTypeCmd(
                            literal_nodes[cmd_edge['edgeTargetId']],
                            domain=ont.simplify_uri(source_uri),
                            type=ont.simplify_uri(cmd_edge['edgeId']),
                            node_id=ont.simplify_uri(
                                cmd_edge['edgeSourceId'])))
                else:
                    if target_uri is not None:
                        target_uri = ont.simplify_uri(target_uri)
                    else:
                        target_uri = None

                    commands.append(
                        SetInternalLinkCmd(
                            ont.simplify_uri(cmd_edge['edgeSourceId']),
                            ont.simplify_uri(cmd_edge['edgeTargetId']),
                            ont.simplify_uri(cmd_edge['edgeId']), source_uri,
                            target_uri))
            elif command['commandName'] == 'DeleteLinkCommand':
                cmd_edge = command['inputParameters'][-3]
                assert cmd_edge['name'] == 'edge'
                cmd_edge = cmd_edge['value']
                for i, cmd in enumerate(commands):
                    if isinstance(cmd, SetInternalLinkCmd):
                        if cmd.source_id == cmd_edge[
                                'edgeSourceId'] and cmd.target_id == cmd_edge[
                                    'edgeTargetId'] and cmd.link_lbl == ont.simplify_uri(
                                        cmd_edge['edgeId']):
                            commands.pop(i)
                            break
            elif command["commandName"] == "AddLiteralNodeCommand":
                cmd_literal_value = command["inputParameters"][0]
                assert cmd_literal_value['name'] == 'literalValue'
                cmd_literal_value = cmd_literal_value['value']

                # they may re-use literal_values, let's user fix it manually
                if cmd_literal_value.startswith("http"):
                    new_attr_path = f"literal:{ont.simplify_uri(cmd_literal_value)}"
                else:
                    new_attr_path = f"literal:{cmd_literal_value}"

                if cmd_literal_value + "1" not in literal_nodes:
                    new_attr_path += ":1"
                    literal_nodes[cmd_literal_value + "1"] = new_attr_path
                elif cmd_literal_value + "2" not in literal_nodes:
                    new_attr_path += ":2"
                    literal_nodes[cmd_literal_value + "2"] = new_attr_path
                elif cmd_literal_value + "3" not in literal_nodes:
                    new_attr_path += ":3"
                    literal_nodes[cmd_literal_value + "3"] = new_attr_path
                else:
                    assert False

                col2col[new_attr_path] = new_attr_path
                commands.append(
                    AddLiteralColumnCmd(new_attr_path, cmd_literal_value))
            elif command["commandName"] == "OperateSelectionCommand":
                # no way to see it in the KARMA UI
                continue
            elif command["commandName"] == "OrganizeColumnsCommand":
                continue
            elif command["commandName"] == "SetWorksheetPropertiesCommand":
                # this command doesn't affect the model
                continue
            # elif command["commandName"] == "UnfoldCommand":
            #     cmd_input_col = command["inputParameters"][-2]
            #     cmd_output_col = command["inputParameters"][-1]
            #     assert cmd_input_col['name'] == "inputColumns" and cmd_output_col['name'] == 'outputColumns'
            #     cmd_input_cols = [
            #         [cname['columnName'] for cname in o['value']] for o in ujson.loads(cmd_input_col['value'])
            #     ]
            #     input_attr_paths = [col2col[Schema.PATH_DELIMITER.join(cmd_input_col)] for cmd_input_col in cmd_input_cols]
            #     cmd_output_cols = [
            #         [cname['columnName'] for cname in o['value']] for o in ujson.loads(cmd_output_col['value'])
            #     ]
            #
            #     output_attr_paths = []
            #     # update columns mapping
            #     for cmd_output_col in cmd_output_cols:
            #         attr_path = Schema.PATH_DELIMITER.join(cmd_output_col[start_idx:])
            #         col2col[Schema.PATH_DELIMITER.join(cmd_output_col)] = attr_path
            #         output_attr_paths.append(attr_path)
            #
            #     commands.append(UnrollCmd(input_attr_paths, output_attr_paths))
            # elif command["commandName"] == "GlueCommand":
            #     cmd_input_col = command["inputParameters"][-2]
            #     cmd_output_col = command["inputParameters"][-1]
            else:
                assert False, "Source: %s. Doesn't handle command %s" % (
                    tbl.id, command["commandName"])

        # fixing conflict modeling command
        conflicts = defaultdict(lambda: [])
        for i, cmd in enumerate(commands):
            if isinstance(cmd, SetSemanticTypeCmd):
                conflicts[cmd.input_attr_path].append((i, cmd))
            if isinstance(cmd, SetInternalLinkCmd):
                conflicts[(cmd.source_id, cmd.target_id)].append((i, cmd))

        delete_commands = []
        for cmds in conflicts.values():
            if len(cmds) > 1:
                display_warn = False
                for idx, cmd in cmds[1:]:
                    if cmd != cmds[0][1]:
                        if not display_warn:
                            display_warn = True
                            KR2RML.logger.warning(
                                "Table: %s. Conflict between command: \n\t+ %s \n\t+ %s",
                                tbl.id, cmds[0][1], cmd)
                        else:
                            print("\t+", cmd)

                # only keep final commands
                for idx, cmd in cmds[:-1]:
                    delete_commands.append(idx)

                if isinstance(cmds[0][1], SetInternalLinkCmd):
                    # need to update source_uri & target_uri first (for duplicate commands, source_uri, target_uri = None)
                    key = (cmds[-1][1].source_id, cmds[-1][1].link_lbl,
                           cmds[-1][1].target_id)
                    for idx, cmd in cmds[:-1]:
                        if (cmd.source_id, cmd.link_lbl, cmd.target_id) == key:
                            cmds[-1][1].source_uri = cmd.source_uri
                            cmds[-1][1].target_uri = cmd.target_uri
                            break

        delete_commands.sort(reverse=True)
        for idx in delete_commands:
            commands.pop(idx)

        super().__init__(commands)
Ejemplo n.º 31
0
from rdflib import Dataset, Literal, URIRef, Namespace
from rdflib import ConjunctiveGraph, Graph, BNode
from rdflib.plugins.memory import IOMemory
from rdflib.namespace import DC, FOAF, OWL


attrs = Namespace('http://api.datawi.re/attributes/')
default_ctx = URIRef('http://api.datawi.re')
store = IOMemory()
store.bind('attrs', attrs)

bn = URIRef('mailto:[email protected]')
ig = Graph(store, identifier=default_ctx)
# ig.bind('attr', attrs)
# ig.add((bn, attrs.label, Literal('Test of the thing')))

# context = {'x': types}
# print('-' * 72)
# print(g.serialize(format='turtle', indent=2))

other_ctx = URIRef('urn:f****d')
xg = Graph(store, identifier=other_ctx)
# g.default_context = Graph(identifier=URIRef('http://api.datawi.re'))
# g.bind('types', types)
# xg.add((bn, attrs.label, Literal('Test of the thing')))


a = URIRef('http://pudo.org/test')
xg.add((a, DC.title, Literal('Test value')))

b = URIRef('http://datawi.re/test')
Ejemplo n.º 32
0
def get_store():
    store = IOMemory()
    for alias, ns in NAMESPACES.items():
        store.bind(alias, ns)
    return store
Ejemplo n.º 33
0
class MetaCollection(dict):
    """A collection of collections.

    This is a dictionary, which's keys are the collections' names.
    Values are Collection instances.

    Like Collections this class collects the named metadata graphs of its items
    in a graph store (and its conjunctive graph), that can be queried.
    Additionally, a MetaCollection can have a name.

    Attributes of a MetaCollection:
    name:               str
    store:              IOMemory
    conjunctive_graph:  ConjunctiveGraph
    """

    def __init__(self, src=None, name=None):
        super(MetaCollection, self).__init__()

        self.name = name
        self.store = IOMemory()

        if isinstance(src, MetaCollection):
            self.update(src)
            self.name = src.name
            # TODO: See Collection: How to treat names in case of a copy?

        elif isinstance(src, list):
            for item in src:
                if isinstance(item, Collection):
                    self[str(item.name)] = item
                elif isinstance(item, CollectionBackend):
                    new_item = Collection(src=item)
                    self[str(new_item.name)] = new_item
                else:
                    e_msg = "Can't retrieve collection from %s." % type(item)
                    lgr.error(e_msg)
                    raise TypeError(e_msg)

        elif isinstance(src, dict):
            for key in src:
                if isinstance(src[key], Collection):
                    self[key] = src[key]
                elif isinstance(src[key], CollectionBackend):
                    self[key] = Collection(src=src[key])
                else:
                    e_msg = "Can't retrieve collection from %s." % \
                            type(src[key])
                    lgr.error(e_msg)
                    raise TypeError(e_msg)

        elif src is None:
            pass
        else:
            e_msg = "Invalid source type for MetaCollection: %s" % type(src)
            lgr.error(e_msg)
            raise TypeError(e_msg)

        # join the stores:
        for collection in self:
            for graph in self[collection].store.contexts():
                self.store.add_graph(graph)
                # TODO: Note: Removed all the copying of the graphs and correcting
                # their references, since we now always use
                # 'collection/branch/handle' as key. But: Implementation of
                # this changed behaviour is not well tested yet.

        self.conjunctive_graph = ConjunctiveGraph(store=self.store)

    def __setitem__(self, key, value):
        if not isinstance(value, Collection):
            raise TypeError("Can't add non-Collection type to MetaCollection.")

        super(MetaCollection, self).__setitem__(key, value)
        for graph in value.store.contexts():
            self.store.add_graph(graph)

    def __delitem__(self, key):
        # delete the graphs of the collection and its handles:
        for graph in self[key].store.contexts():
            self.store.remove_graph(graph)
        # delete the entry itself:
        super(MetaCollection, self).__delitem__(key)

    def query(self):
        """ Perform query on the meta collection.
        Note: It's self.conjunctive_graph or self.store respectively,
        what is to be queried here.
        """
        pass
Ejemplo n.º 34
0
def generateMeditionRDF(medition, typeofobs, typeofuri, obsunit, stationcod,
                        dateday, uriprefix):
    '''  
		<?xml version="1.0"?>
			<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
			  xmlns:ssn="http://purl.oclc.org/NET/ssnx/ssn#"
			  xmlns:dc="http://purl.org/dc/elements/1.1/"
			  xmlns:xsd="http://www.w3.org/2001/XMLSchema"
			  xmlns:obs="http://www.bidei.com/resource/station/ZORRO2/1510211/02/N02#"
			  xmlns:dul="http://www.loa.istc.cnr.it/ontologies/DUL.owl#"
			  xmlns:owl="http://www.w3.org/2002/07/owl#"
			>

			  <ssn:Observation rdf:about="http://www.bidei.com/resource/station/ZORRO2/1510211/02/N02">
				<ssn:observedProperty rdf:resource="obs:property"/>
				<ssn:observationResult rdf:resource="obs:sensoroutput"/>
				<ssn:observedBy rdf:resource="http://www.bidei.com/resource/station/ZORRO2"/>
				<dc:date>1997-07-16T19:20+01:00</dc:date>
			  </ssn:Observation>

			  <ssn:Property rdf:about="obs:property>
				<owl:sameAs rdf:resource=" URI  a NASA Instance " />
			      OR
			    <rdf:type rdf:resource=" URI a NASA Class " />
			  </ssn:Property>
			  
			  <ssn:SensorOutput rdf:about="obs:sensoroutput">
				<ssn:hasValue rdf:resource="obs:outputvalue"/>
			  </ssn:SensorOutput>
			  
			  <ssn:ObservationValue rdf:about="obs:outputvalue">
		      	<dul:isClassifiedBy rdf:resource=" URI a Celsius, Farenheit, Microgramo/metro..."/>
				<ssn:hasQuantityValue rdf:datatype="xsd:float">0.98</ssn:hasQuantityValue>
			  </ssn:ObservationValue>
		</rdf:RDF>
	'''

    #.../station/ZORRO2/NO2/15022011/10
    uri = RESOURCE_URI + 'station/' + stationcod + "/" + uriprefix + dateday.strftime(
        '/%d%m%Y/%H')

    #Initialization of graph
    ssn = Namespace("http://purl.oclc.org/NET/ssnx/ssn#")
    dc = Namespace("http://purl.org/dc/elements/1.1/")
    xsd = Namespace("http://www.w3.org/2001/XMLSchema")
    dul = Namespace("http://www.loa.istc.cnr.it/ontologies/DUL.owl#")
    owl = Namespace("http://www.w3.org/2002/07/owl#")
    obs = Namespace(uri + "#")

    store = IOMemory()

    g = ConjunctiveGraph(store=store)
    g.bind("ssn", ssn)
    g.bind("dc", dc)
    g.bind("xsd", xsd)
    g.bind("dul", dul)
    g.bind("owl", owl)
    g.bind("obs", obs)

    cpr = URIRef(uri)
    gpr = Graph(store=store, identifier=cpr)

    #Add data to the graph
    gpr.add((cpr, RDF.type, ssn['Observation']))

    gpr.add((cpr, ssn['observationResult'], obs["sensoroutput"]))

    gpr.add((cpr, ssn['observedProperty'],
             URIRef(RESOURCE_URI + 'prop/' + uriprefix)))

    gpr.add((cpr, ssn['observedBy'],
             URIRef(RESOURCE_URI + 'station/' + stationcod)))

    gpr.add((cpr, dc['date'], Literal(dateday.isoformat())))

    #SUBGRAPH1 (sensoroutput)
    cpr1 = obs["sensoroutput"]
    gpr.add((cpr1, RDF.type, ssn['SensorOutput']))
    gpr.add((cpr1, ssn['hasValue'], obs["outputvalue"]))

    #SUBGRAPH2 (outputvalue)
    cpr2 = obs["outputvalue"]
    gpr.add((cpr2, RDF.type, ssn['ObservationValue']))
    gpr.add((cpr2, dul['isClassifiedBy'], URIRef(obsunit)))
    gpr.add((cpr2, ssn['hasQuantityValue'], Literal(medition)))

    #Create RDF file
    if OBS_PATH:
        print gpr.serialize(format='pretty-xml')
        filename = "Med_" + dateday.strftime('%d_%m_%Y') + '.nt'
        f = open(OBS_PATH + filename, 'a')
        f.write(gpr.serialize(format='nt'))  #'pretty-xml'))
        f.close()
    else:
        print "Inserting " + uri + "..."
        insertGraph(g=gpr, sparql=VIRTUOSO_URL, resourceuri=RESOURCE_URI)
        print "OK"
Ejemplo n.º 35
0
def DoTheTestMemory():
    ns = Namespace("http://love.com#")

    # AssertionError: ConjunctiveGraph must be backed by a context aware store.
    mary = URIRef("http://love.com/lovers/mary")
    john = URIRef("http://love.com/lovers/john")

    cmary = URIRef("http://love.com/lovers/context_mary")
    cjohn = URIRef("http://love.com/lovers/context_john")

    # my_store = Memory()
    store_input = IOMemory()

    gconjunctive = ConjunctiveGraph(store=store_input)
    gconjunctive.bind("love", ns)

    # add a graph for Mary's facts to the Conjunctive Graph
    gmary = Graph(store=store_input, identifier=cmary)
    # Mary's graph only contains the URI of the person she love, not his cute name
    gmary.add((mary, ns["hasName"], Literal("Mary")))
    gmary.add((mary, ns["loves"], john))

    # add a graph for John's facts to the Conjunctive Graph
    gjohn = Graph(store=store_input, identifier=cjohn)
    # John's graph contains his cute name
    gjohn.add((john, ns["hasCuteName"], Literal("Johnny Boy")))

    # enumerate contexts
    print("Input contexts")
    for c in gconjunctive.contexts():
        print("-- %s " % c)

    # separate graphs
    if False:
        print("===================")
        print("GJOHN")
        print(gjohn.serialize(format="n3").decode("utf-8"))
        print("===================")
        print("GMARY")
        print(gmary.serialize(format="n3").decode("utf-8"))
        print("===================")

    # full graph
    print("===================")
    print("GCONJUNCTIVE NATIVE")
    print(gconjunctive.serialize(format="n3").decode("utf-8"))

    # query the conjunction of all graphs
    xx = None
    for x in gconjunctive[mary:ns.loves / ns.hasCuteName]:
        xx = x
    print("Q: Who does Mary love?")
    print("A: Mary loves {}".format(xx))

    # Ensuite, on sauve un seul sous-graphe, puis on le recharge et le resultat doit etre le meme.
    gjohn.serialize(destination='gjohn_copy.xml', format='xml')
    gmary.serialize(destination='gmary_copy.xml', format='xml')

    gjohn_copy = Graph()
    gjohn_copy.parse('gjohn_copy.xml', format='xml')
    gmary_copy = Graph()
    gmary_copy.parse('gmary_copy.xml', format='xml')

    if True:
        print("===================")
        print("GJOHN")
        print(gjohn_copy.serialize(format="n3").decode("utf-8"))
        print("===================")
        print("GMARY")
        print(gmary_copy.serialize(format="n3").decode("utf-8"))
        print("===================")

    print("===================")
    print("GCONJUNCTIVE WITH QUADS")
    print(list(gconjunctive.quads(None)))
    print("===================")

    gconjunctive.serialize(destination='gconjunctive_copy.xml', format='xml')

    gconjunctive_copy = ConjunctiveGraph()
    gconjunctive_copy.parse('gconjunctive_copy.xml', format='xml')

    print("===================")
    print("GCONJUNCTIVE AS CONJUNCTIVE")
    print(gconjunctive_copy.serialize(format="n3").decode("utf-8"))
    print("Output contexts")
    for c in gconjunctive_copy.contexts():
        print("-- %s " % c)
    print("===================")

    gconjunctive_graph_copy = Graph()
    gconjunctive_graph_copy.parse('gconjunctive_copy.xml', format='xml')

    print("===================")
    print("GCONJUNCTIVE AS GRAPH")
    print(gconjunctive_graph_copy.serialize(format="n3").decode("utf-8"))
    #print("Output contexts")
    #for c in gconjunctive_graph_copy.contexts():
    #    print("-- %s " % c)
    print("===================")
Ejemplo n.º 36
0
	   ?country factbook:area_land ?area_land .
	   ?country factbook:area_water ?area_water .
	   ?country factbook:background ?background .
	   ?country factbook:capital_timedifference ?capital_timedifference .
	   ?country factbook:currency_code ?currency .
	   ?country factbook:climate ?climate .
	   ?country factbook:birthrate ?birthrate .
	   ?country factbook:deathrate ?deathrate .
	   ?country factbook:population_total ?population .
	   ?country factbook:landboundary ?landboundary .
	   
       }"""

sparql.setQuery(construct_query)
sparql.setReturnFormat(RDF)

# creating the RDF store and graph
memory_store = IOMemory()
graph_id = URIRef('http://www.semanticweb.org/store/movie_country')
g = Graph(store=memory_store, identifier=graph_id)
rdflib.plugin.register('sparql', rdflib.query.Processor,
                       'rdfextras.sparql.processor', 'Processor')
rdflib.plugin.register('sparql', rdflib.query.Result, 'rdfextras.sparql.query',
                       'SPARQLQueryResult')

# merging results and saving the store
g = sparql.query().convert()
g.parse("result_basic.owl")
# the graph will be saved as full_example.owl. You can open the file with Protege to inspect it.
g.serialize("result_bonus.owl", "xml")
Ejemplo n.º 37
0
from rdflib import Namespace, Literal, URIRef
from rdflib.graph import Graph, ConjunctiveGraph
from rdflib.plugins.memory import IOMemory

if __name__ == '__main__':

    ns = Namespace("http://love.com#")

    mary = URIRef("http://love.com/lovers/mary#")
    john = URIRef("http://love.com/lovers/john#")

    cmary = URIRef("http://love.com/lovers/mary#")
    cjohn = URIRef("http://love.com/lovers/john#")

    store = IOMemory()

    g = ConjunctiveGraph(store=store)
    g.bind("love", ns)

    gmary = Graph(store=store, identifier=cmary)

    gmary.add((mary, ns['hasName'], Literal("Mary")))
    gmary.add((mary, ns['loves'], john))

    gjohn = Graph(store=store, identifier=cjohn)
    gjohn.add((john, ns['hasName'], Literal("John")))

    #enumerate contexts
    for c in g.contexts():
        print("-- %s " % c)
Ejemplo n.º 38
0
class ContextStore(Store):
    context_aware = True

    def __init__(self, context=None, include_stored=False, **kwargs):
        """
        Parameters
        ----------
            context : PyOpenWorm.context.Context
                context

        """
        super(ContextStore, self).__init__(**kwargs)
        self._memory_store = None
        self._include_stored = include_stored
        if context is not None:
            self._init_store(context)

    def open(self, configuration, create=False):
        if self.ctx is not None:
            return VALID_STORE
        else:
            return NO_STORE

    def _init_store(self, ctx):
        self.ctx = ctx

        if self._include_stored:
            self._store_store = RDFContextStore(ctx)
        else:
            self._store_store = None

        if self._memory_store is None:
            self._memory_store = IOMemory()
            self._init_store0(ctx)

    def _init_store0(self, ctx, seen=None):
        if seen is None:
            seen = set()
        ctxid = ctx.identifier
        if ctxid in seen:
            return
        seen.add(ctxid)
        self._memory_store.addN(
            (s, p, o, ctxid) for s, p, o in ctx.contents_triples()
            if not (isinstance(s, Variable) or isinstance(p, Variable)
                    or isinstance(o, Variable)))
        for cctx in ctx.imports:
            self._init_store0(cctx, seen)

    def close(self, commit_pending_transaction=False):
        self.ctx = None
        self._memory_store = None

    # RDF APIs
    def add(self, triple, context, quoted=False):
        raise NotImplementedError("This is a query-only store")

    def addN(self, quads):
        raise NotImplementedError("This is a query-only store")

    def remove(self, triple, context=None):
        raise NotImplementedError("This is a query-only store")

    def triples(self, triple_pattern, context=None):
        if self._memory_store is None:
            raise ContextStoreException("Database has not been opened")
        context = getattr(context, 'identifier', context)
        context_triples = []
        if self._store_store is not None:
            context_triples.append(
                self._store_store.triples(triple_pattern, context))
        return chain(self._memory_store.triples(triple_pattern, context),
                     *context_triples)

    def __len__(self, context=None):
        """
        Number of statements in the store. This should only account for non-
        quoted (asserted) statements if the context is not specified,
        otherwise it should return the number of statements in the formula or
        context given.

        :param context: a graph instance to query or None

        """
        if self._memory_store is None:
            raise ContextStoreException("Database has not been opened")
        if self._store_store is None:
            return len(self._memory_store)
        else:
            # We don't know which triples may overlap, so we can't return an accurate count without doing something
            # expensive, so we just give up
            raise NotImplementedError()

    def contexts(self, triple=None):
        """
        Generator over all contexts in the graph. If triple is specified,
        a generator over all contexts the triple is in.

        if store is graph_aware, may also return empty contexts

        :returns: a generator over Nodes
        """
        if self._memory_store is None:
            raise ContextStoreException("Database has not been opened")
        seen = set()
        rest = ()

        if self._store_store is not None:
            rest = self._store_store.contexts(triple)

        for ctx in chain(self._memory_store.contexts(triple), rest):
            if ctx in seen:
                continue
            seen.add(ctx)
            yield ctx
Ejemplo n.º 39
0
class ContextStore(Store):
    context_aware = True

    def __init__(self, context=None, include_stored=False, **kwargs):
        """
        Parameters
        ----------
            context : PyOpenWorm.context.Context
                context

        """
        super(ContextStore, self).__init__(**kwargs)
        self._memory_store = None
        self._include_stored = include_stored
        if context is not None:
            self._init_store(context)

    def open(self, configuration, create=False):
        from .context import Contexts
        ctx = Contexts.get(configuration)
        if ctx is not None:
            self._init_store(ctx)
            return VALID_STORE
        else:
            return NO_STORE

    def _init_store(self, ctx):
        self.ctx = ctx

        if self._include_stored:
            self._store_store = RDFContextStore(ctx)
        else:
            self._store_store = None
        if self._memory_store is None:
            self._memory_store = IOMemory()
            self._init_store0(ctx)

    def _init_store0(self, ctx, seen=None):
        if seen is None:
            seen = set()
        ctxid = ctx.identifier
        if ctxid in seen:
            return
        seen.add(ctxid)
        self._memory_store.addN((s, p, o, ctxid)
                                for s, p, o
                                in ctx.contents_triples()
                                if not (isinstance(s, Variable) or
                                        isinstance(p, Variable) or
                                        isinstance(o, Variable)))
        for cctx in ctx.imports:
            self._init_store0(cctx, seen)

    def close(self, commit_pending_transaction=False):
        self.ctx = None
        self._memory_store = None

    # RDF APIs
    def add(self, triple, context, quoted=False):
        raise NotImplementedError("This is a query-only store")

    def addN(self, quads):
        raise NotImplementedError("This is a query-only store")

    def remove(self, triple, context=None):
        raise NotImplementedError("This is a query-only store")

    def triples(self, triple_pattern, context=None):
        context = getattr(context, 'identifier', context)
        if self._memory_store is None:
            raise Exception("Database has not been opened")
        context_triples = []
        if self._store_store is not None:
            context_triples.append(self._store_store.triples(triple_pattern,
                                                             context))
        return chain(self._memory_store.triples(triple_pattern, context),
                     *context_triples)

    def __len__(self, context=None):
        """
        Number of statements in the store. This should only account for non-
        quoted (asserted) statements if the context is not specified,
        otherwise it should return the number of statements in the formula or
        context given.

        :param context: a graph instance to query or None

        """
        if self._memory_store is None:
            raise Exception("Database has not been opened")
        if self._store_store is None:
            return len(self._memory_store)
        else:
            # We don't know which triples may overlap, so we can't return an accurate count without doing something
            # expensive, so we just give up
            raise NotImplementedError()

    def contexts(self, triple=None):
        """
        Generator over all contexts in the graph. If triple is specified,
        a generator over all contexts the triple is in.

        if store is graph_aware, may also return empty contexts

        :returns: a generator over Nodes
        """
        if self._memory_store is None:
            raise Exception("Database has not been opened")
        seen = set()
        rest = ()

        if self._store_store is not None:
            rest = self._store_store.contexts(triple)

        for ctx in chain(self._memory_store.contexts(triple), rest):
            if ctx in seen:
                continue
            seen.add(ctx)
            yield ctx