def construct(self, strOrTriple, initBindings={}, initNs={}): """ Executes a SPARQL Construct :param strOrTriple: can be either * a string in which case it it considered a CONSTRUCT query * a triple in which case it acts as the rdflib `triples((s,p,o))` :param initBindings: A mapping from a Variable to an RDFLib term (used as initial bindings for SPARQL query) :param initNs: A mapping from a namespace prefix to a namespace :returns: an instance of rdflib.ConjuctiveGraph('IOMemory') """ if isinstance(strOrTriple, str): query = strOrTriple if initNs: prefixes = ''.join(["prefix %s: <%s>\n"%(p,n) for p,n in initNs.items()]) query = prefixes + query else: s,p,o = strOrTriple t='%s %s %s'%((s and s.n3() or '?s'),(p and p.n3() or '?p'),(o and o.n3() or '?o')) query='construct {%s} where {%s}'%(t,t) query = dict(query=query) url = self.url+"?"+urlencode(query) req = Request(url) req.add_header('Accept','application/rdf+xml') log.debug("Request url: %s\n with headers: %s" % (req.get_full_url(), req.header_items())) subgraph = ConjunctiveGraph('IOMemory') subgraph.parse(urlopen(req)) return subgraph
def generate(cls, utensils): graph = ConjunctiveGraph() load_rdf_file(STORE['actions'], graph) for utensil in utensils: for action in utensil.actions: map(rdfSubject.db.add, graph.triples((action.resUri, None, None)))
def __call__(self, url, **kwargs): if not url: return [] graph = ConjunctiveGraph() graph.parse(url) output = {} for subject, predicate, context in graph: key = self.strip(subject) prop = self.strip(predicate) value = self.defrag(context) output.setdefault(key, { 'label': key, 'uri': unicode(subject) }) if prop in output[key]: old = output[key][prop] if not isinstance(old, list): output[key][prop] = [old] output[key][prop].append(value) else: output[key][prop] = value return output.values()
def _test_serializer(inputpath, expectedpath, context, serpar): test_tree, test_graph = _load_test_data(inputpath, expectedpath, context) if isinstance(test_tree, ConjunctiveGraph): expected = test_tree.serialize(format="json-ld") else: expected = _to_json(_to_ordered(test_tree)) if test_graph is not None: # toRdf, expected are nquads result_tree = to_tree(test_graph, context_data=context) result = _to_json(_to_ordered(result_tree)) elif inputpath.startswith('fromRdf'): # fromRdf, expected in json-ld g = ConjunctiveGraph() data = open(p.join(test_dir, inputpath), 'rb').read() g.parse(data=data, format="nquads", context=context) result = g.serialize(format="json-ld", base=context) else: # json f = open(p.join(test_dir, inputpath), 'rb') result = json.load(f)[0] f.close() if isinstance(result, ConjunctiveGraph): assert isomorphic(result, expected), \ "Expected graph of %s:\n%s\nGot graph of %s:\n %s" % ( expected.serialize(format='n3'), result.serialize(format='n3')) else: assert jsonld_compare(expected, result) == True, \ "Expected JSON:\n%s\nGot:\n%s" % (expected, result)
def ConvertToRDFN3 (filename, destinationFileName): _graph = ConjunctiveGraph() _graph.parse(filename, format="nt") of = open(destinationFileName, "wb") of.write(_graph.serialize(format="n3")) of.close()
def discussion_as_graph(self, discussion_id): self.ensure_discussion_storage(None) from assembl.models import Discussion d_storage_name = self.discussion_storage_name() d_graph_iri = URIRef(self.discussion_graph_iri()) v = get_virtuoso(self.session, d_storage_name) discussion_uri = URIRef( Discussion.uri_generic(discussion_id, self.local_uri())) subjects = list(v.query( """SELECT DISTINCT ?s WHERE { ?s assembl:in_conversation %s }""" % (discussion_uri.n3()))) subjects.append([discussion_uri]) # print len(subjects) cg = ConjunctiveGraph(identifier=d_graph_iri) for (s,) in subjects: # Absurdly slow. DISTINCT speeds up a lot, but I get numbers. for p, o in v.query( 'SELECT ?p ?o WHERE { graph %s { %s ?p ?o }}' % ( d_graph_iri.n3(), s.n3())): cg.add((s, p, o)) for (s, o, g) in v.query( '''SELECT ?s ?o ?g WHERE { GRAPH ?g {?s catalyst:expressesIdea ?o } . ?o assembl:in_conversation %s }''' % (discussion_uri.n3())): cg.add((s, CATALYST.expressesIdea, o, g)) # TODO: Add roles return cg
def validate_sparql_endpoint(form, field): try: g = ConjunctiveGraph('SPARQLStore') g.open(field.data) g.query('SELECT * WHERE { ?s ?p ?o } LIMIT 1') except: raise ValidationError('This is not a valid SPARQL endpoint.')
def _user_graph(self, uri): userGraph = Graph() try: userGraph.parse(uri) except Exception, e: u = "http://www.w3.org/2007/08/pyRdfa/extract?space-preserve=true&uri=" + uri userGraph.parse(u, identifier=uri)
class FOAF(callbacks.Privmsg): DATAFILE = "/var/www/rc98.net/zoia.rdf" def __init__(self, irc): self.g = Graph() # self.g.parse('http://rc98.net/zoia.rdf') self.g.parse(self.DATAFILE, format="xml") self.uri = rdflib.URIRef("http://www.code4lib.org/id/zoia") self.FOAF = Namespace("http://xmlns.com/foaf/0.1/") super(callbacks.Plugin, self).__init__(irc) def _uri_of_user(self, nick): result = self.g.query( """ PREFIX foaf: <http://xmlns.com/foaf/0.1/> SELECT ?uri WHERE {<http://www.code4lib.org/id/zoia> foaf:knows ?uri . ?uri foaf:nick ?nick .} """, initBindings={"nick": nick}, ) if len(result) > 0: userURI = list(result)[0][0] return userURI else: return None def _user_graph(self, uri): userGraph = Graph() try: userGraph.parse(uri) except Exception, e: u = "http://www.w3.org/2007/08/pyRdfa/extract?space-preserve=true&uri=" + uri userGraph.parse(u, identifier=uri) return userGraph
def get_graph(self, with_mappings=False, include_mapping_target=False, acceptance=False, target_uri=None): """Get Graph instance of this EDMRecord. :param target_uri: target_uri if you want a sub-selection of the whole graph :param acceptance: if the acceptance data should be listed :param include_mapping_target: Boolean also include the mapping target triples in graph :param with_mappings: Boolean integrate the ProxyMapping into the graph """ rdf_string = self.source_rdf if acceptance and self.acceptance_rdf: rdf_string = self.acceptance_rdf graph = ConjunctiveGraph(identifier=self.named_graph) graph.namespace_manager = namespace_manager graph.parse(data=rdf_string, format='nt') if with_mappings: proxy_resources, graph = ProxyResource.update_proxy_resource_uris(self.dataset, graph) self.proxy_resources.add(*proxy_resources) for proxy_resource in proxy_resources: graph = graph + proxy_resource.to_graph(include_mapping_target=include_mapping_target) if target_uri and not target_uri.endswith("/about") and target_uri != self.document_uri: g = Graph(identifier=URIRef(self.named_graph)) subject = URIRef(target_uri) for p, o in graph.predicate_objects(subject=subject): g.add((subject, p, o)) graph = g return graph
def rdfFromText(self, text): """Take text, return an RDF graph.""" postdata = {} postdata['licenseID'] = self.api_key postdata['paramsXML'] = ' '.join(['<c:params xmlns:c="http://s.opencalais.com/1/pred/"' ,'xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">' ,'<c:processingDirectives c:contentType="text/raw"' ,'c:outputFormat="text/xml"' ,'c:enableMetadataType="GenericRelations,SocialTags">' ,'</c:processingDirectives>' ,'<c:userDirectives c:allowDistribution="false"' ,'c:allowSearch="false"' ,'c:externalID="{0}"'.format(uuid.uuid4()) ,'c:submitter="{0}">'.format(self.app_name) ,'</c:userDirectives>' ,'<c:externalMetadata></c:externalMetadata>' ,'</c:params>']) postdata['content'] = text poststring = urllib.urlencode(postdata) data = self.post_data("{0}".format(self.api_url), poststring, timeout=60*5) graph = Graph() inpt = StringInputSource(data) try: graph.parse(inpt, 'xml') except: print data raise return graph
def instance_view_jsonld(request): from assembl.semantic.virtuoso_mapping import AssemblQuadStorageManager from rdflib import URIRef, ConjunctiveGraph ctx = request.context user_id = authenticated_userid(request) or Everyone permissions = get_permissions( user_id, ctx.get_discussion_id()) instance = ctx._instance if not instance.user_can(user_id, CrudPermissions.READ, permissions): return HTTPUnauthorized() discussion = ctx.get_instance_of_class(Discussion) if not discussion: raise HTTPNotFound() aqsm = AssemblQuadStorageManager() uri = URIRef(aqsm.local_uri() + instance.uri()[6:]) d_storage_name = aqsm.discussion_storage_name(discussion.id) v = get_virtuoso(instance.db, d_storage_name) cg = ConjunctiveGraph(v, d_storage_name) result = cg.triples((uri, None, None)) #result = v.query('select ?p ?o ?g where {graph ?g {<%s> ?p ?o}}' % uri) # Something is wrong here. triples = '\n'.join([ '%s %s %s.' % (uri.n3(), p.n3(), o.n3()) for (s, p, o) in result if '_with_no_name_entry' not in o]) return aqsm.quads_to_jsonld(triples)
def discussion_as_graph(self, discussion_id): from assembl.models import Discussion, AgentProfile local_uri = self.local_uri() discussion = Discussion.get(discussion_id) d_storage_name = self.discussion_storage_name() d_graph_iri = URIRef(self.discussion_graph_iri()) v = get_virtuoso(self.session, d_storage_name) discussion_uri = URIRef( Discussion.uri_generic(discussion_id, local_uri)) subjects = [s for (s,) in v.query( """SELECT DISTINCT ?s WHERE { ?s assembl:in_conversation %s }""" % (discussion_uri.n3()))] subjects.append(discussion_uri) participant_ids = list(discussion.get_participants(True)) profiles = {URIRef(AgentProfile.uri_generic(id, local_uri)) for id in participant_ids} subjects.extend(profiles) # add pseudo-accounts subjects.extend((URIRef("%sAgentAccount/%d" % (local_uri, id)) for id in participant_ids)) # print len(subjects) cg = ConjunctiveGraph(identifier=d_graph_iri) self.add_subject_data(v, cg, subjects) # add relationships of non-pseudo accounts for ((account, p, profile), g) in v.triples((None, SIOC.account_of, None)): if profile in profiles: cg.add((account, SIOC.account_of, profile, g)) # Tempting: simplify with this. # cg.add((profile, FOAF.account, account, g)) for (s, o, g) in v.query( '''SELECT ?s ?o ?g WHERE { GRAPH ?g {?s catalyst:expressesIdea ?o } . ?o assembl:in_conversation %s }''' % (discussion_uri.n3())): cg.add((s, CATALYST.expressesIdea, o, g)) return cg
def partsites(self): context = aq_inner(self.context) _partsiteType = _mcltype.FundedSite # title _title = _terms.title # description _description = _terms.description # Temporary rdf read rdfDataSource = "https://edrn-dev.jpl.nasa.gov/ksdb/publishrdf/?rdftype=fundedsite" graph = ConjunctiveGraph() graph.parse(URLInputSource(rdfDataSource)) statements = _parseRDF(graph) partsites = [] for uri, i in statements.items(): partsite = dict(url=uri, title="", description="") if _title in i: partsite["title"] = unicode(i[_title][0]) if _description in i: partsite["description"] = strip_tags(unicode(i[_description][0])) partsites.append(partsite) return partsites
def fill_graph_by_subject(basegraph, newgraph, subject, loop_count=0): """ Fills an Graph with all triples with an certain subject. Includes the necessary triples for the objects until the deepth of 5. :param basegraph: Graph with the data for the new Graph :param newgraph: Instance of the new Graph :param subject: subject of triples which is looked for in the basegraph :return: Graph """ subject_list=[BNode,URIRef] if not issubclass(type(basegraph),Graph): log.error("The given basegraph is not a subclass of Graph!") return ConjunctiveGraph() elif subject == "": log.info("The given subject was empty. Returning the basegraph") return basegraph elif type(subject) not in subject_list: log.info("The given subject was not of type BNode or URIRef. Returning the basegraph") return basegraph elif not issubclass(type(newgraph),Graph): newgraph=ConjunctiveGraph() loop_count += 1 for s, p, o in basegraph.triples((subject, None, None)): newgraph.add((s, p, o)) if type(o) in subject_list and loop_count < 6: # it will do: (S1,P1,O1) -> if O1 has an own Description: (O1,P2,O2)... 5 times newgraph = fill_graph_by_subject(basegraph, newgraph, o, loop_count) return newgraph
def __load_graph(file_p, tmp_dir=None): errors = "" current_graph = ConjunctiveGraph() if tmp_dir is not None: file_path = tmp_dir + os.sep + "tmp_rdf_file.rdf" shutil.copyfile(file_p, file_path) else: file_path = file_p try: with open(file_path) as f: json_ld_file = json.load(f) if isinstance(json_ld_file, dict): json_ld_file = [json_ld_file] for json_ld_resource in json_ld_file: # Trick to force the use of a pre-loaded context if the format # specified is JSON-LD cur_context = json_ld_resource["@context"] json_ld_resource["@context"] = context_json current_graph.parse(data=json.dumps(json_ld_resource), format="json-ld") return current_graph except Exception as e: errors = " | " + str(e) # Try another format if tmp_dir is not None: os.remove(file_path) raise IOError("[1]", "It was impossible to handle the format used for storing the file '%s'%s" % (file_path, errors))
class RecursionTests(unittest.TestCase): # debug = True def setUp(self): self.graph = ConjunctiveGraph() self.graph.load(StringIO(testContent), format='n3') def test_simple_recursion(self): graph = ConjunctiveGraph() graph.load(StringIO(BASIC_KNOWS_DATA), format='n3') results = graph.query(KNOWS_QUERY, processor="sparql", DEBUG=False) results = set(results) person1 = URIRef('ex:person.1') person2 = URIRef('ex:person.2') nose.tools.assert_equal( results, set([(person1, None), (person1, Literal('person 3')), (person2, Literal('person 3'))])) def test_secondary_recursion(self): graph = ConjunctiveGraph() graph.load(StringIO(SUBCLASS_DATA), format='n3') results = graph.query(SUBCLASS_QUERY, processor="sparql", DEBUG=False) results = set(results) ob = URIRef('ex:ob') class1 = URIRef('ex:class.1') class2 = URIRef('ex:class.2') class3 = URIRef('ex:class.3') nose.tools.assert_equal( results, set([(ob, class1), (ob, class2), (ob, class3)]))
def query_graph(self, subj=None, pred=None, obj=None, exhaustive=False): """Return a graph of all triples with subect `sub`, predicate `pred` OR object `obj. If `exhaustive`, return all subelements of the given arguments (If sub is http://127.0.0.1/api/v1/wine/, return http://127.0.0.1/api/v1/wine/{s} for all s). Arguments must be of type URIRef or Literal""" g = ConjunctiveGraph() count = 0 if not isinstance(subj, list): subj = [subj] for sub in subj: for uri_s, uri_p, uri_o in sorted(self.graph): s, p, o = str(uri_s), str(uri_p), str(uri_o) if exhaustive: s = s.rpartition('/')[0] p = p.rpartition('/')[0] o = o.rpartition('/')[0] else: s = s[:-1] if s.endswith('/') else s p = p[:-1] if p.endswith('/') else p o = o[:-1] if o.endswith('/') else o if (sub and sub == s) or (pred and pred == p) or (obj and obj == o): g.add((uri_s, uri_p, uri_o)) count += 1 return g
def open(self): # XXX: If we have a source that's read only, should we need to set the # store separately?? g0 = ConjunctiveGraph('SPARQLUpdateStore') g0.open(tuple(self.conf['rdf.store_conf'])) self.graph = g0 return self.graph
def getPropFile(fname): g = ConjunctiveGraph(identifier=URIRef(ads_baseurl)) bindgraph(g) recordstree=ElementTree.parse(fname) rootnode=recordstree.getroot() xobj=XMLObj(recordstree) trec={} trec['propname']=rootnode.attrib['name'] trec['propid']=rootnode.attrib['id'] trec['title']=xobj.title trec['category']=xobj.category #we used a proposalType here, as this is somewhat different from justscienceprocess. add to ontology trec['abstract']=xobj.abstract trec['pi']=[xobj.elementAttribute('pi', 'last'),xobj.elementAttribute('pi', 'first')] #print trec propuri=getPropURI(trec['propid']) #This is FALSE. TODO..fix to ads normed name or lookitup How? Blanknode? WOW. qplabel=trec['pi'][0]+'_'+trec['pi'][1] fullname=trec['pi'][0]+', '+trec['pi'][1] auth_uri = uri_agents["PersonName/"+qplabel+"/"+str(uuid.uuid4())] gdadd(g, auth_uri, [ a, agent.PersonName, agent.fullName, Literal(fullname) ]) gadd(g, propuri, a, adsbase.ObservationProposal) gdadd(g, propuri, [ adsobsv.observationProposalId, Literal(trec['propid']), adsobsv.observationProposalType, Literal("CHANDRA/"+trec['category']), adsbase.principalInvestigator, auth_uri, adsbase.title, Literal(trec['title']) ] ) serializedstuff=g.serialize(format='xml') return serializedstuff
def test_flowcells_index_rdfa(self): model = ConjunctiveGraph() response = self.client.get(reverse('flowcell_index')) self.assertEqual(response.status_code, 200) model.parse(data=smart_text(response.content), format='rdfa') add_default_schemas(model) inference = Infer(model) errmsgs = list(inference.run_validation()) self.assertEqual(len(errmsgs), 0, errmsgs) body = """prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> prefix libns: <http://jumpgate.caltech.edu/wiki/LibraryOntology#> select ?flowcell where { ?flowcell a libns:IlluminaFlowcell . }""" bindings = set(['flowcell']) count = 0 for r in model.query(body): count += 1 self.assertEqual(count, len(FlowCell.objects.all()))
class OntoInspector(object): """Class that includes methods for querying an RDFS/OWL ontology""" def __init__(self, uri, language=""): super(OntoInspector, self).__init__() self.rdfGraph = ConjunctiveGraph() try: self.rdfGraph.parse(uri, format="xml") except: try: self.rdfGraph.parse(uri, format="n3") except: raise exceptions.Error("Could not parse the file! Is it a valid RDF/OWL ontology?") finally: # let's cache some useful info for faster access self.baseURI = self.get_OntologyURI() or uri self.allclasses = self.__getAllClasses(classPredicate) self.toplayer = self.__getTopclasses() self.tree = self.__getTree() def get_OntologyURI(self, ....): # todo pass
def check(kws): cg = ConjunctiveGraph() cg.parse(**kws) for g in cg.contexts(): gid = g.identifier assert isinstance(gid, Identifier)
def describe(self, s_or_po, initBindings={}, initNs={}): """ Executes a SPARQL describe of resource :param s_or_po: is either * a subject ... should be a URIRef * a tuple of (predicate,object) ... pred should be inverse functional * a describe query string :param initBindings: A mapping from a Variable to an RDFLib term (used as initial bindings for SPARQL query) :param initNs: A mapping from a namespace prefix to a namespace """ if isinstance(s_or_po, str): query = s_or_po if initNs: prefixes = ''.join(["prefix %s: <%s>\n" % (p, n) for p, n in initNs.items()]) query = prefixes + query elif isinstance(s_or_po, URIRef) or isinstance(s_or_po, BNode): query = "describe %s" % (s_or_po.n3()) else: p, o = s_or_po query = "describe ?s where {?s %s %s}" % (p.n3(), o.n3()) query = dict(query=query) url = self.url + "?" + urlencode(query) req = Request(url) req.add_header('Accept', 'application/rdf+xml') log.debug("opening url: %s\n with headers: %s" % (req.get_full_url(), req.header_items())) subgraph = ConjunctiveGraph() subgraph.parse(urlopen(req)) return subgraph
def _construct(compiler, sources, query=None): dataset = ConjunctiveGraph() if not isinstance(sources, list): sources = [sources] for sourcedfn in sources: source = sourcedfn['source'] graph = dataset.get_context(URIRef(sourcedfn.get('dataset') or source)) if isinstance(source, (dict, list)): context_data = sourcedfn['context'] if not isinstance(context_data, list): context_data = compiler.load_json(context_data )['@context'] context_data = [compiler.load_json(ctx)['@context'] if isinstance(ctx, unicode) else ctx for ctx in context_data] to_rdf(source, graph, context_data=context_data) elif isinstance(source, Graph): graph += source else: graph += compiler.cached_rdf(source) if not query: return graph with compiler.path(query).open() as fp: result = dataset.query(fp.read()) g = Graph() for spo in result: g.add(spo) return g
def _graphFromQuads2(q): g = ConjunctiveGraph() #g.addN(q) # no effect on nquad output for s,p,o,c in q: g.get_context(c).add((s,p,o)) # kind of works with broken rdflib nquad serializer code #g.store.add((s,p,o), c) # no effect on nquad output return g
class Serializer(PythonSerializer): """ Convert a queryset to RDF """ internal_use_only = False def end_serialization(self): FOAF = Namespace('http://xmlns.com/foaf/0.1/') DC = Namespace('http://purl.org/dc/elements/1.1/') self.graph = ConjunctiveGraph() self.options.pop('stream', None) fields = filter(None, self.options.pop('fields','').split(',')) meta = None subject = None for object in self.objects: if not fields: fields = object['fields'].keys() newmeta = object['model'] if newmeta != meta: meta = newmeta subject = BNode('%s.%s'%(FOAF[newmeta],object['pk'])) self.graph.add((subject,FOAF['pk'],Literal(object['pk']))) for k in fields: if k: self.graph.add((subject,FOAF[k],Literal(object['fields'][k]))) def getvalue(self): if callable(getattr(self.graph, 'serialize', None)): return self.graph.serialize()
def test(request, ttype, test): # Get the request_host (without the port) request_path = request.path request_host = 'http://'+request.get_host()+request_path.replace(request.path_info,'') # print request_host if ttype in ['publications', 'mascc'] : if test in [re.search(r'ttl/(.*)\.ttl',n).group(1) for n in glob('/var/www/semweb.cs.vu.nl/plugins/ttl/*.ttl')] : filename = "/var/www/semweb.cs.vu.nl/plugins/ttl/{}.ttl".format(test) cg = ConjunctiveGraph() cg.parse(filename, format='n3') # print "Request received" if ttype == 'publications' : response = HttpResponseRedirect(request_host+'/publications/{}'.format(urllib.quote(cg.serialize(format='turtle'),safe=''))) elif ttype == 'mascc' : response = HttpResponseRedirect(request_host+'/mascc/{}'.format(urllib.quote(cg.serialize(format='turtle'),safe=''))) else : response = HttpResponseBadRequest() else : response = HttpResponseNotFound() elif ttype == 'bad' : response = HttpResponseRedirect(request_host+'/mascc/bad_request') else : response = HttpResponseNotFound() return response
def test_rdf(mfile): g = ConjunctiveGraph() try: g = g.parse(mfile, format='xml') return True except Exception as inst: return False
def __init__(self, store=None, id=None): if store is not None and id is not None: ConjunctiveGraph.__init__(self, store, id) else: ConjunctiveGraph.__init__(self) for (key,val) in namespaces.iteritems(): self.bind(key, val)
class Store: def __init__(self): self.graph = ConjunctiveGraph() if os.path.exists(storefn): self.graph.load(storeuri, format='n3') self.graph.bind('dc', DC) self.graph.bind('foaf', FOAF) self.graph.bind('imdb', IMDB) self.graph.bind('rev', 'http://purl.org/stuff/rev#') def save(self): self.graph.serialize(storeuri, format='n3') def who(self, who=None): if who is not None: name, email = (r_who.match(who).group(1), r_who.match(who).group(2)) self.graph.add( (URIRef(storeuri), DC['title'], Literal(title % name))) self.graph.add( (URIRef(storeuri + '#author'), RDF.type, FOAF['Person'])) self.graph.add( (URIRef(storeuri + '#author'), FOAF['name'], Literal(name))) self.graph.add( (URIRef(storeuri + '#author'), FOAF['mbox'], Literal(email))) self.save() else: return self.graph.objects(URIRef(storeuri + '#author'), FOAF['name']) def new_movie(self, movie): movieuri = URIRef('http://www.imdb.com/title/tt%s/' % movie.movieID) self.graph.add((movieuri, RDF.type, IMDB['Movie'])) self.graph.add((movieuri, DC['title'], Literal(movie['title']))) self.graph.add((movieuri, IMDB['year'], Literal(int(movie['year'])))) self.save() def new_review(self, movie, date, rating, comment=None): review = BNode( ) # @@ humanize the identifier (something like #rev-$date) movieuri = URIRef('http://www.imdb.com/title/tt%s/' % movie.movieID) self.graph.add( (movieuri, REV['hasReview'], URIRef('%s#%s' % (storeuri, review)))) self.graph.add((review, RDF.type, REV['Review'])) self.graph.add((review, DC['date'], Literal(date))) self.graph.add((review, REV['maxRating'], Literal(5))) self.graph.add((review, REV['minRating'], Literal(0))) self.graph.add((review, REV['reviewer'], URIRef(storeuri + '#author'))) self.graph.add((review, REV['rating'], Literal(rating))) if comment is not None: self.graph.add((review, REV['text'], Literal(comment))) self.save() def movie_is_in(self, uri): return (URIRef(uri), RDF.type, IMDB['Movie']) in self.graph
def transpose_to_rdf(doc, con, text, context_included, name, f): g = ConjunctiveGraph() if text not in con: if not context_included: if mp: with lock: get_context(con, text) else: get_context(con, text) if not args.debug: opener = open if ".bz" in name: opener = bz2.open if context_included: g.parse(data=json.dumps(doc), format='json-ld') else: g.parse(data=json.dumps(doc), format='json-ld', context=con[text]) with opener(name, "at") as fd: print(str(g.serialize(format='nt').decode('utf-8').rstrip()), file=fd) else: if context_included: g.parse(data=json.dumps(doc), format='json-ld') else: g.parse(data=json.dumps(doc), format='json-ld', context=con[text]) print(str(g.serialize(format=f).decode('utf-8').rstrip()))
def get_graph(): path = '../data/triplestore_linkedmdb' graph = ConjunctiveGraph('Sleepycat') graph.open(path, create=False) return graph
def bound_graph(identifier=None): g = ConjunctiveGraph(identifier=identifier) g.bind('core', CORE) g.bind('wot', WOT) g.bind('map', MAP) return g
from __future__ import print_function import rdfalchemy from rdfalchemy.samples.doap import FOAF from rdfalchemy.samples.foaf import Person from rdfalchemy.orm import mapper from rdflib import ConjunctiveGraph import logging log = logging.getLogger('rdfalchemy') if not log.handlers: log.addHandler(logging.StreamHandler()) #log.setLevel(10) Person.db = ConjunctiveGraph() Person.knows = rdfalchemy.rdfMultiple(FOAF.knows, range_type=FOAF.Person) def test_start(): assert len(Person.db) == 0 p = Person(last="Cooper", first="Philip") assert len(Person.db) == 3 del p def test_addBNodeKnowsL(): Person.knows = rdfalchemy.rdfList(FOAF.knows, range_type=FOAF.Person) mapper() p1 = Person.get_by(first="Philip") p2 = Person(last="Cooper", first="Ben") p3 = Person(last="Cooper", first="Matt") assert len(Person.db) == 9
def test_import_jsonld_into_named_graph(): """Test named graphs we use.""" graph = ConjunctiveGraph() serialized_document = json.dumps(jsonld.expand( jsonld.flatten(JSONLD_DOCUMENT, ), ), indent=4) graph.parse( data=serialized_document, format='json-ld', # All the semantic data about my blog is stored in a particular # named graph. publicID=PUBLIC_ID, ) assert list( map( operator.itemgetter(Variable('g')), graph.query( 'SELECT DISTINCT ?g WHERE { GRAPH ?g { ?s ?p ?o . } } ORDER BY ?g', ).bindings, )) == [ PUBLIC_ID, ] assert graph.query(''' SELECT * WHERE { ?subclass_of_robot rdfs:subClassOf ex:Robot . } ''', initNs=NAMESPACES).bindings == [{ Variable('subclass_of_robot'): URIRef('https://example.org/Rover'), }] # THIS FAILS! # The information about ex:Rover cannot be found if I specify the GRAPH. # Meaning, this information is not in one of the named graphs. assert graph.query(''' SELECT * WHERE { GRAPH ?g { ex:Rover rdfs:subClassOf ex:Robot . } } ''', initNs=NAMESPACES).bindings == [{ Variable('g'): PUBLIC_ID, }] # `publicID` was used for the part of data which was on the top level # of the document. assert graph.query(''' SELECT * WHERE { GRAPH ?g { blog:JSONLD-and-named-graphs a schema:blogPost . } } ''', initNs=NAMESPACES).bindings == [{ Variable('g'): PUBLIC_ID, }]
def getObsFile(fname): g = ConjunctiveGraph(identifier=URIRef(ads_baseurl)) bindgraph(g) recordstree = ElementTree.parse(fname) rootnode = recordstree.getroot() xobj = XMLObj(recordstree) trec = {} trec['obsname'] = rootnode.attrib['name'] trec['obsid'] = rootnode.attrib['obsid'] trec['instrument_name'] = xobj.elementAttribute('instrument', 'name') trec['obsvtype'] = xobj.type trec['time'] = xobj.observed_time trec['created_time'] = xobj.public_avail #Bug: in some of Sherry's stuff this is null #print "Created",trec['created_time'] trec['date'] = xobj.start_date trec['ra'] = xobj.ra trec['dec'] = xobj.dec trec['proposal_id'] = xobj.elementAttribute('proposal', 'id') #print trec obsuri = getObsURI(trec['obsid']) daturi = getDatURI(trec['obsid'], fragment="I") daturi2 = getDatURI(trec['obsid'], fragment="S") gadd(g, daturi, a, adsobsv.Datum) gadd(g, daturi2, a, adsobsv.Datum) gadd(g, obsuri, a, adsobsv.SimpleObservation) #Connect the data product and the observation access_url = "http://cda.harvard.edu/chaser/ocatList.do?obsid=" + trec[ 'obsid'] gdadd(g, daturi, [ adsobsv.dataProductId, Literal(trec['obsid'] + "/I"), adsobsv.forSimpleObservation, obsuri, adsobsv.dataURL, URIRef(access_url) ]) gdadd(g, daturi2, [ adsobsv.dataProductId, Literal(trec['obsid'] + "/S"), adsobsv.forSimpleObservation, obsuri, adsobsv.dataURL, URIRef(access_url) ]) addVals(g, daturi, [ adsobsv.calibLevel, 2, asInt, adsbase.dataType, "image", Literal, ]) #These are untrue anyway: creation time is not public time, but we are using it now. if trec['created_time'] != None: addVals(g, daturi, [ pav.createdOn, trec['created_time'], asDateTime('%b %d %Y %H:%M%p') ]) addVals(g, daturi2, [ pav.createdOn, trec['created_time'], asDateTime('%b %d %Y %H:%M%p') ]) addVals(g, daturi2, [ adsobsv.calibLevel, 2, asInt, adsbase.dataType, "spectra", Literal, ]) tname = trec['obsname'].strip() gdadd(g, obsuri, [ adsobsv.observationId, Literal(trec['obsid']), adsobsv.observationType, Literal(trec['obsvtype']), adsbase.atObservatory, uri_infra['observatory/CHANDRA'], adsobsv.atTelescope, uri_infra['telescope/CHANDRA'], adsbase.usingInstrument, uri_infra['instrument/CHANDRA_' + trec['instrument_name']], adsobsv.hasDatum, daturi, adsobsv.hasDatum, daturi2, adsbase.title, Literal(tname), adsbase.asAResultOfProposal, getPropURI(trec['proposal_id']) ]) #fstring: Sep 17 2000 8:01PM %b %d %Y %H:%M%p emmin = 0.1e-10 emmax = 100e-10 addVals(g, obsuri, [ adsbase.atTime, trec['date'], asDateTime('%b %d %Y %H:%M%p'), adsobsv.observedTime, float(trec['time']) * 1000, asDuration, adsobsv.tExptime, float(trec['time']) * 1000, asDouble, adsobsv.wavelengthStart, emmin, asDouble, adsobsv.wavelengthEnd, emmax, asDouble, ]) if tname != '': tnameuri = uri_target["CHANDRA/" + quote_plus(tname)] gadd(g, obsuri, adsbase.target, tnameuri) addVals(g, tnameuri, [ a, adsobsv.AstronomicalSourceName, None, adsbase.name, tname, Literal, ]) for domain in getEMDomains(float(emmin), float(emmax)): addVal(g, obsuri, adsobsv.wavelengthDomain, domain) print "RA?DEC", trec['ra'], trec['dec'] if trec['ra'] != None and trec['dec'] != None: gdbnadd(g, obsuri, adsobsv.associatedPosition, [ a, adsobsv.Pointing, adsobsv.ra, asDouble(trec['ra']), adsobsv.dec, asDouble(trec['dec']) ]) #should this be under uri_agents or collaboration instead? #the typing for this should be done in a conf file cnameuri = uri_conf["project/CHANDRA"] gadd(g, obsuri, adsobsv.observationMadeBy, cnameuri) # gdadd(graph, cnameuri, [ This stuff is thought off as configuration # a, adsbase.Project, # agent.fullName, Literal(cname) # ]) serializedstuff = g.serialize(format='xml') return serializedstuff
def getPubFile(fname): # Do we really need to create one per file? Could be # cached/made global but leave that for later if it # ever is determined to be a problem. # hparser = HTMLParser.HTMLParser() g = ConjunctiveGraph(identifier=URIRef(ads_baseurl)) bindgraph(g) recordstree = ElementTree.parse(fname) rootnode = recordstree.getroot() xobj = XMLObj(recordstree) trec = {} # Change by Doug: # It looks like the bibcode elements have been percent encoded # in the input XML files - e.g. # #cat ../chandradata/Publications/2000A%26A...359..489C.xml # <paper> # <bibcode>2000A%26A...359..489C</bibcode> # <classified_by>CDA</classified_by> # <paper_type>science</paper_type> # <flags> # <data_use>indirect</data_use> # <multi_observatory /> # <followup /> # </flags> # </paper> # # so we have to decode it here. # # For now we *only* replace %26 by & but add a check # to fail if a % is found as a safety check. # I include the HTML unescape routine in case upstream # changes its format. # trec['bibcode'] = hparser.unescape(xobj.bibcode) trec['bibcode'] = trec['bibcode'].replace('%26', '&') if trec['bibcode'].find('%') != -1: raise ValueError( "Problem cleaning bibcode: original='{0}' after='{1}'".format( xobj.bibcode, trec['bibcode'])) trec['classified_by'] = xobj.classified_by #this above coild also be figured by bibgroup #shouldnt this be a curated statement. But what is the curation. Not a source curation #later. trec['paper_type'] = xobj.paper_type #trec['obsids']=[e.text for e in xobj.rec.findall('data')[0].findall('obsid')] boolobsids = False if len(xobj.rec.findall('data')) > 0: if len(xobj.rec.findall('data')[0].findall('obsid')) > 0: print "1" trec['obsids'] = [ e.text for e in xobj.rec.findall('data')[0].findall('obsid') ] boolobsids = True else: print "2" trec['obsids'] = [] #print trec bibcode_uri = uri_bib[trec['bibcode']] gadd(g, bibcode_uri, adsbib.paperType, Literal(trec['paper_type'])) print bibcode_uri if len(trec['obsids']) > 0: gadd(g, bibcode_uri, adsobsv.datum_p, Literal(str(boolobsids).lower())) for obsid in trec['obsids']: obsuri = getObsURI(obsid) daturi = getDatURI(obsid) #obsuri=uri_obs['CHANDRA_'+obsid] #daturi=uri_dat['CHANDRA_'+obsid] gadd(g, bibcode_uri, adsbase.aboutScienceProcess, obsuri) gadd(g, bibcode_uri, adsbase.aboutScienceProduct, daturi) #This is temporary. must map papertype to scienceprocesses and use those ones exactly serializedstuff = g.serialize(format='xml') return serializedstuff
min = Property(PTREC.hasDateTimeMin) max = Property(PTREC.hasDateTimeMax) def __repr__(self): return "TemporalData(%r, %r)" % (self.min, self.max) class Event(Subject): RDF_TYPE = PTREC.Event type = Term(RDF.type) def __repr__(self): return "Event(%r)" % (self.type, ) graph = ConjunctiveGraph() def get_index_event(cohort_line, graph=graph): """ Splits `cohort_line`, a string formatted like "<ccfid> <YYYY-mm-dd> <n>", into the CCFID and operation event identifier, and returns the result of querying `graph` for that operation. The patient record graph will be loaded into `graph`, and must be an RDF/XML file named "<ccfid>.rdf" in the directory specified by the environment variable `RDF_DIRECTORY`. For example, "12345678 2009-01-01 2" will load "12345678.rdf" into `graph` and find that patient's second operation event on January 1, 2009.
class BerkeleyDBTestCase(unittest.TestCase): def setUp(self): self.store_name = "BerkeleyDB" self.path = mktemp() self.g = ConjunctiveGraph(store=self.store_name) self.rt = self.g.open(self.path, create=True) assert self.rt == VALID_STORE, "The underlying store is corrupt" assert ( len(self.g) == 0 ), "There must be zero triples in the graph just after store (file) creation" data = """ PREFIX : <https://example.org/> :a :b :c . :d :e :f . :d :g :h . """ self.g.parse(data=data, format="ttl") def tearDown(self): self.g.close() def test_write(self): assert ( len(self.g) == 3 ), "There must be three triples in the graph after the first data chunk parse" data2 = """ PREFIX : <https://example.org/> :d :i :j . """ self.g.parse(data=data2, format="ttl") assert ( len(self.g) == 4 ), "There must be four triples in the graph after the second data chunk parse" data3 = """ PREFIX : <https://example.org/> :d :i :j . """ self.g.parse(data=data3, format="ttl") assert ( len(self.g) == 4 ), "There must still be four triples in the graph after the thrd data chunk parse" def test_read(self): sx = None for s in self.g.subjects( predicate=URIRef("https://example.org/e"), object=URIRef("https://example.org/f"), ): sx = s assert sx == URIRef("https://example.org/d") def test_sparql_query(self): q = """ PREFIX : <https://example.org/> SELECT (COUNT(*) AS ?c) WHERE { :d ?p ?o . }""" c = 0 for row in self.g.query(q): c = int(row.c) assert c == 2, "SPARQL COUNT must return 2" def test_sparql_insert(self): q = """ PREFIX : <https://example.org/> INSERT DATA { :x :y :z . }""" self.g.update(q) assert len(self.g) == 4, "After extra triple insert, length must be 4" def test_multigraph(self): q = """ PREFIX : <https://example.org/> INSERT DATA { GRAPH :m { :x :y :z . } GRAPH :n { :x :y :z . } }""" self.g.update(q) q = """ SELECT (COUNT(?g) AS ?c) WHERE { SELECT DISTINCT ?g WHERE { GRAPH ?g { ?s ?p ?o } } } """ c = 0 for row in self.g.query(q): c = int(row.c) assert c == 3, "SPARQL COUNT must return 3 (default, :m & :n)" def test_open_shut(self): assert len(self.g) == 3, "Initially we must have 3 triples from setUp" self.g.close() self.g = None # reopen the graph self.g = ConjunctiveGraph("BerkeleyDB") self.g.open(self.path, create=False) assert ( len(self.g) == 3 ), "After close and reopen, we should still have the 3 originally added triples"
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> . _:a rdf:type foaf:Person . _:a foaf:name "Alice" . _:a foaf:mbox <mailto:[email protected]> . _:a foaf:mbox <mailto:[email protected]> . _:b rdf:type foaf:Person . _:b foaf:name "Bob" . """ test_query = """ PREFIX foaf: <http://xmlns.com/foaf/0.1/> SELECT ?name ?mbox WHERE { ?x foaf:name ?name . OPTIONAL { ?x foaf:mbox ?mbox } } """ graph = ConjunctiveGraph(plugin.get('IOMemory', Store)()) graph.parse(StringIO(text), format="n3") print graph.serialize(format='xml') print print 'Value Constraints:' print results = graph.query(test_query) #print results.serialize(format='xml') for row in results: print row.name, row.mbox
#dabib='2005ApJ...629..700N' ##file to read is output of simad1.py and assumes bibcode.simbad #print "SIMBAD", simbad[dabib] #sys.exit(-1) #Issue, some sources will come again and again and have multiple metadata strings. I think this is fine #as the triplestore will kill repeated triples. But what if they come in different contexts. Wont we #have multiple statements then. I think we can deal with that but it is something to remember. odir = DATA + "/data/rdf" if not os.path.isdir(odir): os.makedirs(odir) for bibcode in simbad.keys(): g = ConjunctiveGraph(identifier=URIRef(None)) bindgraph(g) for aobject in simbad[bibcode]: #print bibcode, aobject['id'] euri=uri_bib[bibcode] eleid=quote_plus("_".join(aobject['id'].split())) gadd(g,euri, adsbase.hasAstronomicalSource, uri_source[eleid]) gadd(g,uri_source[eleid], a, adsbase.AstronomicalSource) gadd(g,uri_source[eleid], adsbase.name , Literal(aobject['id'])) gadd(g,uri_source[eleid], adsobsv.curatedAt, uri_conf['SIMBAD']) gadd(g,uri_source[eleid], adsbase.hasMetadataString, Literal(str(aobject))) serializedstuff=g.serialize() if not os.path.isdir(DATA+"/data/rdf"): os.makedirs(DATA+"/data/rdf") fd=open(odir+"/simbad."+quote_plus(bibcode)+".rdf", "w")
self[key] = value def delete(self, key): if key in self: del self[key] DEFAULT_HTTP_CLIENT = Http(_MemCache()) DEFAULT_REQUEST_HEADERS = { # NB: the spaces and line-breaks in 'accept' below are a hack # to work around a problem in httplib2: # the cache does not work with arbibtrary long lines "accept": "application/ld+json, application/n-quads;q=0.9,\r\n application/turtle;q=0.8, application/n-triples;q=0.7,\r\n application/rdf+xml;q=0.6, text/html;q=0.5, */*;q=0.1", "user-agent": "hydra-py-v" + __version__, } BACKGROUND_KNOWLEDGE = ConjunctiveGraph( identifier=URIRef("urn:x-hydra-py:background-knowledge")) SUBCLASS = RDFS.subClassOf * "*" SUBPROP = RDFS.subPropertyOf * "*" LINK_OP = SUBPROP / HYDRA.supportedOperation RANGE = RDFS.range / SUBCLASS RANGE_OP = RANGE / HYDRA.supportedOperation TYPE = RDF.type / SUBCLASS TYPE_OP = TYPE / HYDRA.supportedOperation APIDOC_RE = regex( r'^<([^>]*)>; rel="http://www.w3.org/ns/hydra/core#apiDocumentation"$')
class ContextTestCase(unittest.TestCase): store = "default" slow = True tmppath = None def setUp(self): try: self.graph = ConjunctiveGraph(store=self.store) except ImportError: raise SkipTest("Dependencies for store '%s' not available!" % self.store) if self.store == "SQLite": _, self.tmppath = mkstemp(prefix="test", dir="/tmp", suffix=".sqlite") else: self.tmppath = mkdtemp() self.graph.open(self.tmppath, create=True) self.michel = URIRef("michel") self.tarek = URIRef("tarek") self.bob = URIRef("bob") self.likes = URIRef("likes") self.hates = URIRef("hates") self.pizza = URIRef("pizza") self.cheese = URIRef("cheese") self.c1 = URIRef("context-1") self.c2 = URIRef("context-2") # delete the graph for each test! self.graph.remove((None, None, None)) def tearDown(self): self.graph.close() if os.path.isdir(self.tmppath): shutil.rmtree(self.tmppath) else: os.remove(self.tmppath) def addStuff(self): tarek = self.tarek michel = self.michel bob = self.bob likes = self.likes hates = self.hates pizza = self.pizza cheese = self.cheese c1 = self.c1 graph = Graph(self.graph.store, c1) graph.add((tarek, likes, pizza)) graph.add((tarek, likes, cheese)) graph.add((michel, likes, pizza)) graph.add((michel, likes, cheese)) graph.add((bob, likes, cheese)) graph.add((bob, hates, pizza)) graph.add((bob, hates, michel)) # gasp! def removeStuff(self): tarek = self.tarek michel = self.michel bob = self.bob likes = self.likes hates = self.hates pizza = self.pizza cheese = self.cheese c1 = self.c1 graph = Graph(self.graph.store, c1) graph.remove((tarek, likes, pizza)) graph.remove((tarek, likes, cheese)) graph.remove((michel, likes, pizza)) graph.remove((michel, likes, cheese)) graph.remove((bob, likes, cheese)) graph.remove((bob, hates, pizza)) graph.remove((bob, hates, michel)) # gasp! def addStuffInMultipleContexts(self): c1 = self.c1 c2 = self.c2 triple = (self.pizza, self.hates, self.tarek) # revenge! # add to default context self.graph.add(triple) # add to context 1 graph = Graph(self.graph.store, c1) graph.add(triple) # add to context 2 graph = Graph(self.graph.store, c2) graph.add(triple) def testConjunction(self): if self.store == "SQLite": raise SkipTest("Skipping known issue with __len__") self.addStuffInMultipleContexts() triple = (self.pizza, self.likes, self.pizza) # add to context 1 graph = Graph(self.graph.store, self.c1) graph.add(triple) self.assertEqual(len(self.graph), len(graph)) def testAdd(self): self.addStuff() def testRemove(self): self.addStuff() self.removeStuff() def testLenInOneContext(self): c1 = self.c1 # make sure context is empty self.graph.remove_context(self.graph.get_context(c1)) graph = Graph(self.graph.store, c1) oldLen = len(self.graph) for i in range(0, 10): graph.add((BNode(), self.hates, self.hates)) self.assertEqual(len(graph), oldLen + 10) self.assertEqual(len(self.graph.get_context(c1)), oldLen + 10) self.graph.remove_context(self.graph.get_context(c1)) self.assertEqual(len(self.graph), oldLen) self.assertEqual(len(graph), 0) def testLenInMultipleContexts(self): if self.store == "SQLite": raise SkipTest("Skipping known issue with __len__") oldLen = len(self.graph) self.addStuffInMultipleContexts() # addStuffInMultipleContexts is adding the same triple to # three different contexts. So it's only + 1 self.assertEqual(len(self.graph), oldLen + 1) graph = Graph(self.graph.store, self.c1) self.assertEqual(len(graph), oldLen + 1) def testRemoveInMultipleContexts(self): c1 = self.c1 c2 = self.c2 triple = (self.pizza, self.hates, self.tarek) # revenge! self.addStuffInMultipleContexts() # triple should be still in store after removing it from c1 + c2 self.assertTrue(triple in self.graph) graph = Graph(self.graph.store, c1) graph.remove(triple) self.assertTrue(triple in self.graph) graph = Graph(self.graph.store, c2) graph.remove(triple) self.assertTrue(triple in self.graph) self.graph.remove(triple) # now gone! self.assertTrue(triple not in self.graph) # add again and see if remove without context removes all triples! self.addStuffInMultipleContexts() self.graph.remove(triple) self.assertTrue(triple not in self.graph) def testContexts(self): triple = (self.pizza, self.hates, self.tarek) # revenge! self.addStuffInMultipleContexts() def cid(c): return c.identifier self.assertTrue(self.c1 in map(cid, self.graph.contexts())) self.assertTrue(self.c2 in map(cid, self.graph.contexts())) contextList = list(map(cid, list(self.graph.contexts(triple)))) self.assertTrue(self.c1 in contextList, (self.c1, contextList)) self.assertTrue(self.c2 in contextList, (self.c2, contextList)) def testRemoveContext(self): c1 = self.c1 self.addStuffInMultipleContexts() self.assertEqual(len(Graph(self.graph.store, c1)), 1) self.assertEqual(len(self.graph.get_context(c1)), 1) self.graph.remove_context(self.graph.get_context(c1)) self.assertTrue(self.c1 not in self.graph.contexts()) def testRemoveAny(self): Any = None self.addStuffInMultipleContexts() self.graph.remove((Any, Any, Any)) self.assertEqual(len(self.graph), 0) def testTriples(self): tarek = self.tarek michel = self.michel bob = self.bob likes = self.likes hates = self.hates pizza = self.pizza cheese = self.cheese c1 = self.c1 asserte = self.assertEqual triples = self.graph.triples graph = self.graph c1graph = Graph(self.graph.store, c1) c1triples = c1graph.triples Any = None self.addStuff() # unbound subjects with context asserte(len(list(c1triples((Any, likes, pizza)))), 2) asserte(len(list(c1triples((Any, hates, pizza)))), 1) asserte(len(list(c1triples((Any, likes, cheese)))), 3) asserte(len(list(c1triples((Any, hates, cheese)))), 0) # unbound subjects without context, same results! asserte(len(list(triples((Any, likes, pizza)))), 2) asserte(len(list(triples((Any, hates, pizza)))), 1) asserte(len(list(triples((Any, likes, cheese)))), 3) asserte(len(list(triples((Any, hates, cheese)))), 0) # unbound objects with context asserte(len(list(c1triples((michel, likes, Any)))), 2) asserte(len(list(c1triples((tarek, likes, Any)))), 2) asserte(len(list(c1triples((bob, hates, Any)))), 2) asserte(len(list(c1triples((bob, likes, Any)))), 1) # unbound objects without context, same results! asserte(len(list(triples((michel, likes, Any)))), 2) asserte(len(list(triples((tarek, likes, Any)))), 2) asserte(len(list(triples((bob, hates, Any)))), 2) asserte(len(list(triples((bob, likes, Any)))), 1) # unbound predicates with context asserte(len(list(c1triples((michel, Any, cheese)))), 1) asserte(len(list(c1triples((tarek, Any, cheese)))), 1) asserte(len(list(c1triples((bob, Any, pizza)))), 1) asserte(len(list(c1triples((bob, Any, michel)))), 1) # unbound predicates without context, same results! asserte(len(list(triples((michel, Any, cheese)))), 1) asserte(len(list(triples((tarek, Any, cheese)))), 1) asserte(len(list(triples((bob, Any, pizza)))), 1) asserte(len(list(triples((bob, Any, michel)))), 1) # unbound subject, objects with context asserte(len(list(c1triples((Any, hates, Any)))), 2) asserte(len(list(c1triples((Any, likes, Any)))), 5) # unbound subject, objects without context, same results! asserte(len(list(triples((Any, hates, Any)))), 2) asserte(len(list(triples((Any, likes, Any)))), 5) # unbound predicates, objects with context asserte(len(list(c1triples((michel, Any, Any)))), 2) asserte(len(list(c1triples((bob, Any, Any)))), 3) asserte(len(list(c1triples((tarek, Any, Any)))), 2) # unbound predicates, objects without context, same results! asserte(len(list(triples((michel, Any, Any)))), 2) asserte(len(list(triples((bob, Any, Any)))), 3) asserte(len(list(triples((tarek, Any, Any)))), 2) # unbound subjects, predicates with context asserte(len(list(c1triples((Any, Any, pizza)))), 3) asserte(len(list(c1triples((Any, Any, cheese)))), 3) asserte(len(list(c1triples((Any, Any, michel)))), 1) # unbound subjects, predicates without context, same results! asserte(len(list(triples((Any, Any, pizza)))), 3) asserte(len(list(triples((Any, Any, cheese)))), 3) asserte(len(list(triples((Any, Any, michel)))), 1) # all unbound with context asserte(len(list(c1triples((Any, Any, Any)))), 7) # all unbound without context, same result! asserte(len(list(triples((Any, Any, Any)))), 7) for c in [graph, self.graph.get_context(c1)]: # unbound subjects asserte(set(c.subjects(likes, pizza)), set((michel, tarek))) asserte(set(c.subjects(hates, pizza)), set((bob,))) asserte(set(c.subjects(likes, cheese)), set([tarek, bob, michel])) asserte(set(c.subjects(hates, cheese)), set()) # unbound objects asserte(set(c.objects(michel, likes)), set([cheese, pizza])) asserte(set(c.objects(tarek, likes)), set([cheese, pizza])) asserte(set(c.objects(bob, hates)), set([michel, pizza])) asserte(set(c.objects(bob, likes)), set([cheese])) # unbound predicates asserte(set(c.predicates(michel, cheese)), set([likes])) asserte(set(c.predicates(tarek, cheese)), set([likes])) asserte(set(c.predicates(bob, pizza)), set([hates])) asserte(set(c.predicates(bob, michel)), set([hates])) asserte(set(c.subject_objects(hates)), set([(bob, pizza), (bob, michel)])) asserte( set(c.subject_objects(likes)), set( [ (tarek, cheese), (michel, cheese), (michel, pizza), (bob, cheese), (tarek, pizza), ] ), ) asserte( set(c.predicate_objects(michel)), set([(likes, cheese), (likes, pizza)]) ) asserte( set(c.predicate_objects(bob)), set([(likes, cheese), (hates, pizza), (hates, michel)]), ) asserte( set(c.predicate_objects(tarek)), set([(likes, cheese), (likes, pizza)]) ) asserte( set(c.subject_predicates(pizza)), set([(bob, hates), (tarek, likes), (michel, likes)]), ) asserte( set(c.subject_predicates(cheese)), set([(bob, likes), (tarek, likes), (michel, likes)]), ) asserte(set(c.subject_predicates(michel)), set([(bob, hates)])) asserte( set(c), set( [ (bob, hates, michel), (bob, likes, cheese), (tarek, likes, pizza), (michel, likes, pizza), (michel, likes, cheese), (bob, hates, pizza), (tarek, likes, cheese), ] ), ) # remove stuff and make sure the graph is empty again self.removeStuff() asserte(len(list(c1triples((Any, Any, Any)))), 0) asserte(len(list(triples((Any, Any, Any)))), 0)
import os namespaces = dict( rdf=Namespace("http://www.w3.org/1999/02/22-rdf-syntax-ns#"), rdfs=Namespace("http://www.w3.org/2000/01/rdf-schema#"), cogrobtut=Namespace(model.COGROBTUT), dash=Namespace("http://datashapes.org/dash#"), shema=Namespace("http://schema.org/"), sh=Namespace("http://www.w3.org/ns/shacl#"), xsd=Namespace("http://www.w3.org/2001/XMLSchema#"), ) #path_database = "/home/admin-franka/franka-web-app/flaskr/static/owl/franka_robolab/" path_database = "/home/anglerau/GitLab/flask-app/flaskr/static/owl/franka_robolab/" file_extension = ".owl" kb = ConjunctiveGraph() list_parsed_files = [] for f in os.listdir(path_database): if f.endswith(file_extension): list_parsed_files.append(f) with open(path_database + f) as onto: new_context = Graph(kb.store, f) new_context.parse(onto) def reload_kb(): for f in os.listdir(path_database): if not (f in list_parsed_files): with open(path_database + f) as onto: new_context = Graph(kb.store, f) new_context.parse(onto)
def setup_sparql(self): self.g = ConjunctiveGraph(store='SPARQLStore') self.g.open(self.endpt)
class rdfSubject(object): db = ConjunctiveGraph() """Default graph for access to instances of this type""" rdf_type = None """rdf:type of instances of this class""" def __init__(self, resUri=None, **kwargs): """The constructor tries hard to do return you an rdfSubject :param resUri: the "resource uri". If `None` then create an instance with a BNode resUri. Can be given as one of: * an instance of an rdfSubject * an instance of a BNode or a URIRef * an n3 uriref string like: "<urn:isbn:1234567890>" * an n3 bnode string like: "_:xyz1234" :param kwargs: is a set of values that will be set using the keys to find the appropriate descriptor""" if not resUri: # create a bnode self.resUri = BNode() if self.rdf_type: self.db.add((self.resUri, RDF.type, self.rdf_type)) elif isinstance(resUri, (BNode, URIRef)): # use the identifier passed self.resUri = resUri if self.rdf_type \ and not list(self.db.triples( (self.resUri, RDF.type, self.rdf_type))): self.db.add((self.resUri, RDF.type, self.rdf_type)) elif isinstance(resUri, rdfSubject): # use the resUri of the subject self.resUri = resUri.resUri self.db = resUri.db elif isinstance(resUri, (str, unicode)): # create one from a <uri> or if resUri[0] == "<" and resUri[-1] == ">": # _:bnode string self.resUri = URIRef(resUri[1:-1]) elif resUri.startswith("_:"): self.resUri = BNode(resUri[2:]) if self.rdf_type: self.db.add((self.resUri, RDF.type, self.rdf_type)) else: raise AttributeError("cannot construct rdfSubject from %s" % ( str(resUri))) if kwargs: self._set_with_dict(kwargs) def n3(self): """n3 repr of this node""" return self.resUri.n3() @classmethod def _getdescriptor(cls, key): """__get_descriptor returns the descriptor for the key. It essentially cls.__dict__[key] with recursive calls to super""" # NOT SURE if mro is the way to do this or if we should call # super() or bases? for kls in cls.mro(): if key in kls.__dict__: return kls.__dict__[key] raise AttributeError( "descriptor %s not found for class %s" % (key, cls)) # short term hack. Need to go to a sqlalchemy 0.4 style query method # obj.query.get_by should map to obj.get_by ..same for fetch_by @classmethod def query(cls): return cls @classmethod def get_by(cls, **kwargs): """Class Method, returns a single instance of the class by a single kwarg. the keyword must be a descriptor of the class. example: .. code-block:: python bigBlue = Company.get_by(symbol='IBM') :Note: the keyword should map to an rdf predicate that is of type owl:InverseFunctional""" if len(kwargs) != 1: raise ValueError( "get_by wanted exactly 1 but got %i args\n" + "Maybe you wanted filter_by" % (len(kwargs))) key, value = kwargs.popitem() if isinstance(value, (URIRef, BNode, Literal)): o = value else: o = Literal(value) pred = cls._getdescriptor(key).pred uri = cls.db.value(None, pred, o) if uri: return cls(uri) else: raise LookupError("%s = %s not found" % (key, value)) @classmethod def filter_by(cls, **kwargs): """Class method returns a generator over classs instances meeting the kwargs conditions. Each keyword must be a class descriptor filter by RDF.type == cls.rdf_type is implicit Order helps, the first keyword should be the most restrictive """ filters = [] for key, value in kwargs.items(): pred = cls._getdescriptor(key).pred # try to make the value be OK for the triple query as an object if isinstance(value, Identifier): obj = value else: obj = Literal(value) filters.append((pred, obj)) # make sure we filter by type if not (RDF.type, cls.rdf_type) in filters: filters.append((RDF.type, cls.rdf_type)) pred, obj = filters[0] log.debug("Checking %s, %s" % (pred, obj)) for sub in cls.db.subjects(pred, obj): log.debug("maybe %s" % sub) for pred, obj in filters[1:]: log.debug("Checking %s, %s" % (pred, obj)) try: cls.db.triples((sub, pred, obj)).next() except: log.warn("No %s" % sub) break else: yield cls(sub) @classmethod def ClassInstances(cls): """return a generator for instances of this rdf:type you can look in MyClass.rdf_type to see the predicate being used""" beenthere = set([]) for i in cls.db.subjects(RDF.type, cls.rdf_type): if not i in beenthere: yield cls(i) beenthere.add(i) @classmethod def GetRandom(cls): """for develoment just returns a random instance of this class""" from random import choice xii = list(cls.ClassInstances()) return choice(xii) def __hash__(self): return hash("ranD0Mi$h_" + self.n3()) def __cmp__(self, other): if other is None: return False else: return cmp(self.n3(), other.n3()) def __repr__(self): return """%s('%s')""" % ( self.__class__.__name__, self.n3().encode('utf-8')) if rdflibversion.startswith('2'): def __str__(self): return str(self.resUri) def __getitem__(self, pred): log.debug("Getting with __getitem__ %s for %s" % (pred, self.n3())) val = self.db.value(self.resUri, pred) if isinstance(val, Literal): val = val.toPython() elif isinstance(val, (BNode, URIRef)): val = rdfSubject(val) return val def __delitem__(self, pred): log.debug("Deleting with __delitem__ %s for %s" % (pred, self)) for s, p, o in self.db.triples((self.resUri, pred, None)): self.db.remove((s, p, o)) # finally if the object in the triple was a bnode # cascade delete the thing it referenced # ?? FIXME Do we really want to cascade if it's an rdfSubject?? if isinstance(o, (BNode, rdfSubject)): rdfSubject(o)._remove(db=self.db, cascade='bnode') def _set_with_dict(self, kv): """ :param kv: a dict for each key,value pair in dict kv set self.key = value """ for key, value in kv.items(): descriptor = self.__class__._getdescriptor(key) descriptor.__set__(self, value) def _remove( self, db=None, cascade='bnode', bnodeCheck=True, objectCascade=False): """ Remove all triples where this rdfSubject is the subject of the triple :param db: limit the remove operation to this graph :param cascade: must be one of: * none -- remove none * bnode -- (default) remove all unreferenced bnodes * all -- remove all unreferenced bnode(s) AND uri(s) :param bnodeCheck: boolean * True -- (default) check bnodes and raise exception if there are still references to this node * False -- do not check. This can leave orphaned object reference in triples. Use only if you are resetting the value in the same transaction :param objectCascade: boolean * False -- (default) do nothing * True -- delete also all triples where this refSubject is the object of the triple. """ noderef = self.resUri log.debug("Called remove on %s" % self) if not db: db = self.db # we cannot delete a bnode if it is still referenced, # i.e. if it is the o of a s,p,o if bnodeCheck and isinstance(noderef, BNode): for s, p, o in db.triples((None, None, noderef)): raise RDFAlchemyError( "Cannot delete BNode %s because %s still references it" % ( noderef.n3(), s.n3())) # determine an appropriate test for cascade decisions if cascade == 'bnode': # we cannot delete a bnode if there are still references to it def test(node): if isinstance(node, (URIRef, Literal)): return False for s, p, o in db.triples((None, None, node)): return False return True elif cascade == 'none': def f1(node): return False test = f1 elif cascade == 'all': def f2(node): if isinstance(node, Literal): return False for s, p, o in db.triples((None, None, node)): return False return True test = f2 else: raise AttributeError("unknown cascade argument") for s, p, o in db.triples((noderef, None, None)): db.remove((s, p, o)) if test(o): rdfSubject(o)._remove(db=db, cascade=cascade) if objectCascade: for s, p, o in db.triples((None, None, noderef)): db.remove((s, p, o)) def _rename(self, name, db=None): """rename a node """ if not db: db = self.db if not (isinstance(name, (BNode, URIRef))): raise AttributeError("cannot rename to %s" % name) for s, p, o in db.triples((self.resUri, None, None)): db.remove((s, p, o)) db.add((name, p, o)) for s, p, o in db.triples((None, None, self.resUri)): db.set((s, p, name)) self.resUri = name def _ppo(self, db=None): """Like pretty print... Return a 'pretty predicate,object' of self returning all predicate object pairs with qnames""" db = db or self.db for p, o in db.predicate_objects(self.resUri): print("%20s = %s" % (db.qname(p), str(o))) print(" ")
def test_named_filter_graph_query(): g = ConjunctiveGraph() g.namespace_manager.bind('rdf', RDF) g.namespace_manager.bind('rdfs', RDFS) ex = Namespace('https://ex.com/') g.namespace_manager.bind('ex', ex) g.get_context(ex.g1).parse(format="turtle", data=f""" PREFIX ex: <{str(ex)}> PREFIX rdfs: <{str(RDFS)}> ex:Boris rdfs:label "Boris" . ex:Susan rdfs:label "Susan" . """) g.get_context(ex.g2).parse(format="turtle", data=f""" PREFIX ex: <{str(ex)}> ex:Boris a ex:Person . """) assert list( g.query( "SELECT ?l WHERE { GRAPH ex:g1 { ?a rdfs:label ?l } ?a a ?type }", initNs={'ex': ex})) == [(Literal('Boris'), )] assert list( g.query( "SELECT ?l WHERE { GRAPH ex:g1 { ?a rdfs:label ?l } FILTER EXISTS { ?a a ?type }}", initNs={'ex': ex})) == [(Literal('Boris'), )] assert list( g.query( "SELECT ?l WHERE { GRAPH ex:g1 { ?a rdfs:label ?l } FILTER NOT EXISTS { ?a a ?type }}", initNs={'ex': ex})) == [(Literal('Susan'), )] assert list( g.query("SELECT ?l WHERE { GRAPH ?g { ?a rdfs:label ?l } ?a a ?type }", initNs={'ex': ex})) == [(Literal('Boris'), )] assert list( g.query( "SELECT ?l WHERE { GRAPH ?g { ?a rdfs:label ?l } FILTER EXISTS { ?a a ?type }}", initNs={'ex': ex})) == [(Literal('Boris'), )] assert list( g.query( "SELECT ?l WHERE { GRAPH ?g { ?a rdfs:label ?l } FILTER NOT EXISTS { ?a a ?type }}", initNs={'ex': ex})) == [(Literal('Susan'), )]
app = Flask(__name__) app.config['TEMPLATES_AUTO_RELOAD'] = True app.config['CACHE_TYPE'] = 'SimpleCache' app.config['CACHE_DEFAULT_TIMEOUT'] = 300 config = yload(open(join(app.root_path, 'config.yml')).read(), Loader=FullLoader) app.jinja_env.line_statement_prefix = '#' scache = Cache(app) extractors = {} cache = config.get('cache', False) base = config['base'] store = None if config.get('store', False): store = ConjunctiveGraph('Sleepycat') store.open(config['store'], create=True) @app.route('/') @scache.cached(timeout=300) def index(): #j = sparql('select ?class (count(?class) as ?count) where { ?s a ?class } group by ?class order by DESC(?count)') return render_template('index.html', counts=[], base=base, title=config.get('title', 'No title'), description=config.get('description', None), empty_message=config.get('empty_message', None))
def handle_embedded_annot(data): step = 0 sid = request.sid print(sid) uri = str(data['url']) print('retrieving embedded annotations for ' + uri) print("Retrieve KG for uri: " + uri) page = requests.get(uri) html = page.content d = extruct.extract(html, syntaxes=['microdata', 'rdfa', 'json-ld'], errors='ignore') print(d) kg = ConjunctiveGraph() #TODO this is a workaround for Schema.org / json-ld issue #print(url_for('static', filename='data/jsonldcontext.json')) context_url = "http://*****:*****@context' in md.keys(): print(md['@context']) if ('https://schema.org' in md['@context']) or ('http://schema.org' in md['@context']): # md['@context'] = 'https://schema.org/docs/jsonldcontext.json' md['@context'] = context_url kg.parse(data=json.dumps(md, ensure_ascii=False), format="json-ld") for md in d['rdfa']: if '@context' in md.keys(): if ('https://schema.org' in md['@context']) or ('http://schema.org' in md['@context']): #md['@context'] = 'https://schema.org/docs/jsonldcontext.json' md['@context'] = context_url kg.parse(data=json.dumps(md, ensure_ascii=False), format="json-ld") for md in d['microdata']: if '@context' in md.keys(): if ('https://schema.org' in md['@context']) or ('http://schema.org' in md['@context']): #md['@context'] = 'https://schema.org/docs/jsonldcontext.json' md['@context'] = context_url kg.parse(data=json.dumps(md, ensure_ascii=False), format="json-ld") kgs[sid] = kg step += 1 emit('update_annot', step) emit('send_annot', str(kg.serialize(format='turtle').decode())) print(len(kg)) #check if id or doi in uri if util.is_DOI(uri): uri = util.get_DOI(uri) print(f'FOUND DOI: {uri}') # describe on lod.openair kg = util.describe_loa(uri, kg) step += 1 emit('update_annot', step) emit('send_annot', str(kg.serialize(format='turtle').decode())) print(len(kg)) kg = util.describe_opencitation(uri, kg) step += 1 emit('update_annot', step) emit('send_annot', str(kg.serialize(format='turtle').decode())) print(len(kg)) kg = util.describe_wikidata(uri, kg) step += 1 emit('update_annot', step) emit('send_annot', str(kg.serialize(format='turtle').decode())) print(len(kg)) kg = util.describe_biotools(uri, kg) step += 1 emit('update_annot', step) emit('send_annot', str(kg.serialize(format='turtle').decode())) print(f'ended with step {step}') print(len(kg)) print(step)
def testQueryPlus(self): graph = ConjunctiveGraph() graph.parse(StringIO(test_data), format="n3") result_json = graph.query(test_query).serialize(format='json') self.failUnless(result_json.find(correct) > 0)
def _process_data(self, document): ''' Creates the RDF graph describing the event @param document: the DOM document of the event ''' # Create the graph graph = ConjunctiveGraph() graph.bind('swc', SWC) graph.bind('cfp', CFP) graph.bind('ical', ICAL) graph.bind('foaf', FOAF) graph.bind('dct', DCT) graph.bind('lode', LODE) # Init the event resource_event = LDES[self.get_resource_name()] graph.add((resource_event, RDF.type, SWC['AcademicEvent'])) # Get the title if document.find(id='inner_left') != None: title = document.find(id='inner_left').find('h1').text graph.add((resource_event, RDFS.label, Literal(title))) # Get the location if document.find(text='City:') != None and document.find( text='Country:') != None: city = document.find( text='City:').findParent().findNextSibling().renderContents() country = document.find(text='Country:').findParent( ).findNextSibling().renderContents() location = get_location(city, country) if location == None: location = Literal("%s, %s" % (city, country)) graph.add((resource_event, FOAF['based_near'], location)) # Get the starting and ending dates if document.find(text='Period:') != None: text = document.find(text='Period:').findParent().findNextSibling( ).renderContents() parts = re.search( '(?P<begin>[^-,]*)(-(?P<end>[^,]*))?, (?P<year>\d{4})', text).groupdict() if parts['begin'] != None and parts['year'] != None: (month, start_day) = parts['begin'].split(' ') begin_date = datetime.strptime( "%s %s %s" % (start_day, month, parts['year']), "%d %B %Y") graph.add( (resource_event, ICAL['dtstart'], Literal(begin_date))) if parts['end'] != None: end_parts = parts['end'].split(' ') end_date = None if len(end_parts) == 2: end_date = datetime.strptime( "%s %s %s" % (end_parts[1], end_parts[0], parts['year']), "%d %B %Y") elif len(end_parts) == 1: end_date = datetime.strptime( "%s %s %s" % (end_parts[0], month, parts['year']), "%d %B %Y") if end_date != None: graph.add( (resource_event, ICAL['dtend'], Literal(end_date))) # Get the data for the CFP resource_cfp = LDES[self.get_resource_name() + "_cfp"] graph.add((resource_cfp, RDF.type, CFP['CallForPapers'])) graph.add((resource_cfp, CFP['for'], LDES[self.entity_id])) graph.add( (resource_cfp, CFP['details'], URIRef(BASE + 'data/' + self.get_resource_name() + '_cfp.txt'))) # Get the deadlines deadlines = [] for a in document.findAll('script'): res = re.search('var deadlineList = ([^;]*);', a.renderContents()) if res != None: txt = res.group(1).replace('\n', '').replace('\t', '').replace("'", '"') txt = re.sub(r'<span [^>]*>([^<]*)</span>', '\g<1>', txt, flags=re.IGNORECASE) txt = txt.replace('Date:', '"Date":').replace('Title:', '"Title":') deadlines = json.loads(txt) i = 0 for deadline in deadlines: resource_deadline = LDES[self.get_resource_name() + '_deadline_' + str(i)] graph.add((resource_deadline, RDF.type, ICAL['Vevent'])) graph.add((resource_deadline, ICAL['dtstart'], Literal(datetime.strptime(deadline['Date'], "%d %b %Y")))) graph.add((resource_deadline, ICAL['dtend'], Literal(datetime.strptime(deadline['Date'], "%d %b %Y")))) graph.add((resource_deadline, ICAL['summary'], Literal(deadline['Title']))) graph.add((resource_deadline, ICAL['relatedTo'], resource_event)) i = i + 1 # Add the topics and persons if document.find(id='cfp-content') != None: for link in document.find(id='cfp-content').findAll('a'): link = link.get('href') if link != None: if link[:3] == '/t/' and link not in self.topics_set: try: graph.add( (resource_event, DCT['subject'], LDES[Topic(link[1:-1]).get_resource_name()])) self.topics_set.add(link[1:-1]) except: # Ignore bad topic links pass if link[:3] == '/p/' and link not in self.persons_set: try: graph.add( (resource_event, LODE['involvedAgent'], LDES[Person(link[1:-1]).get_resource_name()])) self.persons_set.add(link[1:-1]) except: # Ignore bad person link pass # Set the last modification date graph.add( (self.get_named_graph(), DCT['modified'], Literal(datetime.now()))) # Save the data self.rdf_data = graph.serialize()
def __init__(self, db_path): self.g = ConjunctiveGraph() self.path = db_path self.choices = set() self.labels = {}
def example_1(): """Creates a ConjunctiveGraph and performs some BerkeleyDB tasks with it """ path = mktemp() # Declare we are using a BerkeleyDB Store graph = ConjunctiveGraph("BerkeleyDB") # Open previously created store, or create it if it doesn't exist yet # (always doesn't exist in this example as using temp file location) rt = graph.open(path, create=False) if rt == NO_STORE: # There is no underlying BerkeleyDB infrastructure, so create it print("Creating new DB") graph.open(path, create=True) else: print("Using existing DB") assert rt == VALID_STORE, "The underlying store is corrupt" print("Triples in graph before add:", len(graph)) print("(will always be 0 when using temp file for DB)") # Now we'll add some triples to the graph & commit the changes EG = Namespace("http://example.net/test/") graph.bind("eg", EG) graph.add((EG["pic:1"], EG.name, Literal("Jane & Bob"))) graph.add((EG["pic:2"], EG.name, Literal("Squirrel in Tree"))) graph.commit() print("Triples in graph after add:", len(graph)) print("(should be 2)") # display the graph in Turtle print(graph.serialize()) # close when done, otherwise BerkeleyDB will leak lock entries. graph.close() graph = None # reopen the graph graph = ConjunctiveGraph("BerkeleyDB") graph.open(path, create=False) print("Triples still in graph:", len(graph)) print("(should still be 2)") graph.close() # Clean up the temp folder to remove the BerkeleyDB database files... for f in os.listdir(path): os.unlink(path + "/" + f) os.rmdir(path)
def test_serialize(self): g = ConjunctiveGraph() uri1 = URIRef("http://example.org/mygraph1") uri2 = URIRef("http://example.org/mygraph2") bob = URIRef("urn:bob") likes = URIRef("urn:likes") pizza = URIRef("urn:pizza") g.get_context(uri1).add((bob, likes, pizza)) g.get_context(uri2).add((bob, likes, pizza)) s = g.serialize(format="nquads", encoding="latin-1") self.assertEqual(len([x for x in s.split(b"\n") if x.strip()]), 2) g2 = ConjunctiveGraph() g2.parse(data=s, format="nquads") self.assertEqual(len(g), len(g2)) self.assertEqual( sorted(x.identifier for x in g.contexts()), sorted(x.identifier for x in g2.contexts()), )
def convert(inputFilesOrDirs, inputFormat, inputExtensions, outputDir, outputFormat, outputExt, recursive=True, overwrite=True, loggingFunction=None): """ Conversion function. @param inputFilesOrDirs : a list of paths (to a file or to a directory) @param inputFormat : input files format (the keys of INPUT_FORMAT_TO_EXTENSIONS) @param inputExtensions : a list of input files extensions (one or more values of INPUT_FORMAT_TO_EXTENSIONS) @param outputFormat : output files format (the keys of OUTPUT_FORMAT_TO_EXTENSIONS) @param outputExt : the output files extension (one of the values of OUTPUT_FORMAT_TO_EXTENSIONS) @param recursive : if inputFilesOrDirs contains directories, descend into these directories to find all files @param overwrite : True to overwrite any existing file. """ if loggingFunction is None: loggingFunction = INFO # process each input file sequentially: for inputFileOrDir in inputFilesOrDirs: loggingFunction("Processing input file or directory '%s'" % inputFileOrDir) # check if the file exists, and if it's a directory or a file isdir = False if os.path.exists(inputFileOrDir): if os.path.isdir(inputFileOrDir): DEBUG("'%s' exists and is a directory" % inputFileOrDir) inputFileOrDir = os.path.abspath(inputFileOrDir) isdir = True else: DEBUG("'%s' exists and is a file" % inputFileOrDir) else: raise IOError("Input file '%s' was not found" % inputFileOrDir) DEBUG("Input format: %s" % inputFormat) DEBUG("Output format: %s" % outputFormat) # find out which extensions we should match if inputExtensions is None: inputExtensions = INPUT_FORMAT_TO_EXTENSIONS[inputFormat] DEBUG("Input extensions: %s" % inputExtensions) # find out which output extension we should write if outputExt: outputExtension = outputExt else: outputExtension = OUTPUT_FORMAT_TO_EXTENSION[outputFormat] DEBUG("Output extension: '%s'" % outputExtension) inputFiles = [] if isdir: DEBUG("Now walking the directory (recursive = %s):" % recursive) for root, dirnames, filenames in os.walk(inputFileOrDir): DEBUG(" * Finding files in '%s'" % root) for extension in inputExtensions: for filename in fnmatch.filter(filenames, "*%s" % extension): DEBUG(" -> found '%s'" % filename) inputFiles.append(os.path.join(root, filename)) if not recursive: break else: inputFiles.append(inputFileOrDir) # create the graph, and parse the input files for inputFile in inputFiles: g = ConjunctiveGraph() g.parse(inputFile, format=inputFormat) DEBUG("the graph was parsed successfully") # if no output directory is specified, just print the output to the stdout if outputDir is None: output = g.serialize(None, format=outputFormat) DEBUG("output:") print(output) # if an output directory was provided, but it doesn't exist, then exit the function elif not os.path.exists(outputDir): raise IOError("Output dir '%s' was not found" % outputDir) # if the output directory was given and it exists, then figure out the output filename # and write the output to disk else: head, tail = os.path.split(inputFile) DEBUG("head, tail: %s, %s" % (head, tail)) # remove the common prefix from the head and the input directory # (otherwise the given input path will also be added to the output path) commonPrefix = os.path.commonprefix([head, inputFileOrDir]) DEBUG("inputFileOrDir: %s" % inputFileOrDir) DEBUG("common prefix: %s" % commonPrefix) headWithoutCommonPrefix = head[len(commonPrefix) + 1:] DEBUG("head without common prefix: %s" % headWithoutCommonPrefix) outputAbsPath = os.path.join(os.path.abspath(outputDir), headWithoutCommonPrefix) DEBUG("output absolute path: %s" % outputAbsPath) outputFileName = os.path.splitext(tail)[0] + outputExtension outputAbsFileName = os.path.join(outputAbsPath, outputFileName) DEBUG("output filename: '%s'" % outputAbsFileName) # for safety, check that we're not overwriting the input file if outputAbsFileName == os.path.abspath(inputFile): IOError("Input file '%s' is the same as output file" % outputAbsFileName) else: DEBUG("this file is different from the input filename") # check if we need to skip this file skipThisFile = os.path.exists( outputAbsFileName) and not overwrite if skipThisFile: DEBUG("this file will be skipped") else: dirName = os.path.dirname(outputAbsFileName) if not os.path.exists(dirName): DEBUG("Now creating %s since it does not exist yet" % dirName) os.makedirs(dirName) loggingFunction("Writing %s" % outputAbsFileName) g.serialize(outputAbsFileName, auto_compact=True, format=outputFormat)
class SyncedGraph(CurrentStateGraphApi, AutoDepGraphApi, GraphEditApi): """ graph for clients to use. Changes are synced with the master graph in the rdfdb process. self.patch(p: Patch) is the only way to write to the graph. Reading can be done with the AutoDepGraphApi methods which set up watchers to call you back when the results of the read have changed (like knockoutjs). Or you can read with CurrentStateGraphApi which doesn't have watchers, but you have to opt into using it so it's clear you aren't in an auto-dep context and meant to set up watchers. You may want to attach to self.initiallySynced deferred so you don't attempt patches before we've heard the initial contents of the graph. It would be ok to accumulate some patches of new material, but usually you won't correctly remove the existing statements unless we have the correct graph. If we get out of sync, we abandon our local graph (even any pending local changes) and get the data again from the server. """ def __init__(self, rdfdbRoot: URIRef, label: str, receiverHost: Optional[str] = None): """ label is a string that the server will display in association with your connection receiverHost is the hostname other nodes can use to talk to me """ self.isConnected = False self.currentClient: Optional[WsClientProtocol] = None self.rdfdbRoot = rdfdbRoot self.connectSocket() self.initiallySynced: defer.Deferred[None] = defer.Deferred() self._graph = ConjunctiveGraph() AutoDepGraphApi.__init__(self) # this needs more state to track if we're doing a resync (and # everything has to error or wait) or if we're live def lostRdfdbConnection(self) -> None: self.isConnected = False self.patch(Patch(delQuads=self._graph.quads())) log.info(f'cleared graph to {len(self._graph)}') log.error('graph is not updating- you need to restart') self.connectSocket() def connectSocket(self) -> None: factory = autobahn.twisted.websocket.WebSocketClientFactory( self.rdfdbRoot.replace('http://', 'ws://') + 'syncedGraph', # Don't know if this is required by spec, but # cyclone.websocket breaks with no origin header. origin='foo') factory.protocol = lambda: WsClientProtocol(self) rr = urllib.parse.urlparse(self.rdfdbRoot) conn = reactor.connectTCP(rr.hostname.encode('ascii'), rr.port, factory) #WsClientProtocol sets our currentClient. Needs rewrite using agents. def resync(self): """ get the whole graph again from the server (e.g. we had a conflict while applying a patch and want to return to the truth). To avoid too much churn, we remember our old graph and diff it against the replacement. This way, our callers only see the corrections. Edits you make during a resync will surely be lost, so I should just fail them. There should be a notification back to UIs who want to show that we're doing a resync. """ log.info('resync') self.currentClient.dropConnection() def _resyncGraph(self, response): log.warn("new graph in") self.currentClient.dropConnection() #diff against old entire graph #broadcast that change def patch(self, p: Patch) -> None: """send this patch to the server and apply it to our local graph and run handlers""" if not self.isConnected or self.currentClient is None: log.warn("not currently connected- dropping patch") return if p.isNoop(): log.info("skipping no-op patch") return # these could fail if we're out of sync. One approach: # Rerequest the full state from the server, try the patch # again after that, then give up. debugKey = '[id=%s]' % (id(p) % 1000) log.debug("\napply local patch %s %s", debugKey, p) try: self._applyPatchLocally(p) except ValueError as e: log.error(e) self.resync() return log.debug('runDepsOnNewPatch') self.runDepsOnNewPatch(p) log.debug('sendPatch') self.currentClient.sendPatch(p) log.debug('patch is done %s', debugKey) def suggestPrefixes(self, ctx, prefixes): """ when writing files for this ctx, try to use these n3 prefixes. async, not guaranteed to finish before any particular file flush """ treq.post(self.rdfdbRoot + 'prefixes', json.dumps({ 'ctx': ctx, 'prefixes': prefixes }).encode('utf8')) def _applyPatchLocally(self, p: Patch): # .. and disconnect on failure patchQuads(self._graph, p.delQuads, p.addQuads, perfect=True) log.debug("graph now has %s statements" % len(self._graph)) def onPatchFromDb(self, p): """ central server has sent us a patch """ if log.isEnabledFor(logging.DEBUG): if len(p.addQuads) > 50: log.debug('server has sent us %s', p.shortSummary()) else: log.debug('server has sent us %s', p) self._applyPatchLocally(p) try: self.runDepsOnNewPatch(p) except Exception: # don't reflect this error back to the server; we did # receive its patch correctly. However, we're in a bad # state since some dependencies may not have rerun traceback.print_exc() log.warn("some graph dependencies may not have completely run") if self.initiallySynced: self.initiallySynced.callback(None) self.initiallySynced = None
class MirbaseDB(object): def __init__(self, db_path): self.g = ConjunctiveGraph() self.path = db_path self.choices = set() self.labels = {} def create_graph(self): self.g.open(self.path + "data.rdf", create=True) data = self.parse_mirbase(self.path) #g = ConjunctiveGraph(store="SPARQLUpdateStore") # g.bind() mirna_class = URIRef("http://purl.obolibrary.org/obo/SO_0000276") for mid in data: mirna_instance = URIRef(MIRBASE + data[mid]["acc"]) self.g.add((mirna_instance, RDF.type, mirna_class)) label = Literal(data[mid]["name"]) self.g.add((mirna_instance, RDFS.label, label)) description = Literal(data[mid]["description"]) self.g.add((mirna_instance, RDFS.comment, description)) for p in data[mid]["previous_names"]: if p.strip(): previous_name = Literal(p) self.g.add((mirna_instance, MIRBASE["previous_acc"], previous_name)) for mature in data[mid]["mature"]: mature_instance = URIRef(MIRBASE + data[mid]["mature"][mature]["acc"]) self.g.add((mature_instance, RDF.type, mirna_class)) mature_label = Literal(data[mid]["mature"][mature]["name"]) self.g.add((mature_instance, RDFS.label, mature_label)) for mature_p in data[mid]["mature"][mature]["previous_names"]: if mature_p.strip(): mature_previous_name = Literal(mature_p) self.g.add((mature_instance, MIRBASE["previous_acc"], mature_previous_name)) self.g.add((mirna_instance, MIRBASE["stemloopOf"], mature_instance)) def parse_mirbase(self, mirbase_root): mirna_dic = {} with open(mirbase_root + "mirna.txt") as mirnas: for m in mirnas: props = m.strip().split("\t") mname = props[2] mid = props[0] macc = props[1] mdesc = props[4] mprev = props[3].split(";") if int(props[-1]) != 22: # not h**o sapiens continue mirna_dic[mid] = {} mirna_dic[mid]["name"] = mname mirna_dic[mid]["acc"] = macc mirna_dic[mid]["previous_names"] = mprev mirna_dic[mid]["description"] = mdesc mature_dic = {} with open(mirbase_root + "mirna_mature.txt") as mirnas: for m in mirnas: props = m.strip().split("\t") mname = props[1] mid = props[0] macc = props[3] # mdesc = props[4] mprev = props[2].split(";") if not mname.startswith("hsa-"): # not h**o sapiens continue mature_dic[mid] = {} mature_dic[mid]["name"] = mname mature_dic[mid]["previous_names"] = mprev mature_dic[mid]["acc"] = macc with open(mirbase_root + "mirna_pre_mature.txt") as mirnas: for m in mirnas: props = m.strip().split("\t") mid, matureid = props[:2] if mid in mirna_dic: if "mature" not in mirna_dic[mid]: mirna_dic[mid]["mature"] = {} mirna_dic[mid]["mature"][matureid] = mature_dic[matureid] # pp.pprint(mirna_dic) return mirna_dic def map_label(self, label): label = label.lower() label = label.replace("microrna", "mir") label = label.replace("mirna", "mir") if not label.startswith("hsa-"): label = "hsa-" + label result = process.extractOne(label, self.choices) # result = process.extract(label, choices, limit=3) """if result[1] != 100: print print "original:", label.encode("utf-8"), result # if label[-1].isdigit(): # label += "a" # else: new_label = label + "-1" revised_result = process.extractOne(new_label, self.choices) if revised_result[1] != 100: new_label = label + "a" revised_result = process.extractOne(new_label, self.choices) if revised_result[1] > result[1]: result = revised_result print "revised:", label.encode("utf-8"), result""" return result def load_graph(self): self.g.load(self.path + "data.rdf") # print "Opened graph with {} triples".format(len(self.g)) self.get_label_to_acc() self.choices = self.labels.keys() def get_label_to_acc(self): for subj, pred, obj in self.g.triples((None, RDFS.label, None)): self.labels[str(obj)] = str(subj) for subj, pred, obj in self.g.triples((None, RDFS.label, None)): self.labels[str(obj)] = str(subj) def save_graph(self): self.g.serialize(self.path + "data.rdf", format='pretty-xml') print('Triples in graph after add: ', len(self.g)) self.g.close()
def handle(self, **options): LOGGER.debug("linking places") for place in models.Place.objects.filter(dbpedia__isnull=True): if not place.city or not place.state: continue # formulate a dbpedia place uri path = urllib2.quote('%s,_%s' % (_clean(place.city), _clean(place.state))) url = URIRef('http://dbpedia.org/resource/%s' % path) # attempt to get a graph from it graph = ConjunctiveGraph() try: LOGGER.debug("looking up %s" % url) graph.load(url) except urllib2.HTTPError as e: LOGGER.error(e) # if we've got more than 3 assertions extract some stuff from # the graph and save back some info to the db, would be nice # to have a triple store underneath where we could persist # all the facts eh? if len(graph) >= 3: place.dbpedia = url place.latitude = graph.value(url, geo['lat']) place.longitude = graph.value(url, geo['long']) for object in graph.objects(URIRef(url), owl['sameAs']): if object.startswith('http://sws.geonames.org'): place.geonames = object place.save() LOGGER.info("found dbpedia resource %s" % url) else: LOGGER.warn("couldn't find dbpedia resource for %s" % url) reset_queries() LOGGER.info("finished looking up places in dbpedia") LOGGER.info("dumping place_links.json fixture") # so it would be nice to use django.core.serializer here # but it serializes everything about the model, including # titles that are linked to ... and this could theoretically # change over time, so we only preserve the facts that have # been harvested from dbpedia, so they can overlay over # the places that have been extracted during title load json_src = [] places_qs = models.Place.objects.filter(dbpedia__isnull=False) for p in places_qs.order_by('name'): json_src.append({ 'name': p.name, 'dbpedia': p.dbpedia, 'geonames': p.geonames, 'longitude': p.longitude, 'latitude': p.latitude }) reset_queries() json.dump(json_src, file('core/fixtures/place_links.json', 'w'), indent=2) LOGGER.info("finished dumping place_links.json fixture")
class Inspector(object): """ Class that includes methods for querying an RDFS/OWL ontology """ def __init__(self, uri, language=""): super(Inspector, self).__init__() self.rdfGraph = ConjunctiveGraph() try: self.rdfGraph.parse(uri, format="application/rdf+xml") except: try: self.rdfGraph.parse(uri, format="n3") except: raise exceptions.Error("Could not parse the file! Is it a valid RDF/OWL ontology?") finally: self.baseURI = self.get_OntologyURI() or uri self.allclasses = self.__getAllClasses(includeDomainRange=True, includeImplicit=True, removeBlankNodes=False, excludeRDF_OWL=False) def get_OntologyURI(self, return_as_string=True): test = [x for x, y, z in self.rdfGraph.triples((None, RDF.type, Ontology))] if test: if return_as_string: return str(test[0]) else: return test[0] else: return None def __getAllClasses(self, classPredicate="", includeDomainRange=False, includeImplicit=False, removeBlankNodes=True, addOWLThing=True, excludeRDF_OWL=True): rdfGraph = self.rdfGraph exit = {} def addIfYouCan(x, mydict): if excludeRDF_OWL: if x.startswith('http://www.w3.org/2002/07/owl#') or \ x.startswith("http://www.w3.org/1999/02/22-rdf-syntax-ns#") or \ x.startswith("http://www.w3.org/2000/01/rdf-schema#"): return mydict if x not in mydict: mydict[x] = None return mydict if addOWLThing: exit = addIfYouCan(Thing, exit) if classPredicate == "rdfs" or classPredicate == "": for s in rdfGraph.subjects(RDF.type, RDFS.Class): exit = addIfYouCan(s, exit) if classPredicate == "owl" or classPredicate == "": for s in rdfGraph.subjects(RDF.type, Class): exit = addIfYouCan(s, exit) if includeDomainRange: for o in rdfGraph.objects(None, RDFS.domain): exit = addIfYouCan(o, exit) for o in rdfGraph.objects(None, RDFS.range): exit = addIfYouCan(o, exit) if includeImplicit: for s, v, o in rdfGraph.triples((None, RDFS.subClassOf, None)): exit = addIfYouCan(s, exit) exit = addIfYouCan(o, exit) for o in rdfGraph.objects(None, RDF.type): exit = addIfYouCan(o, exit) # get a list exit = exit.keys() if removeBlankNodes: exit = [x for x in exit if not isBlankNode(x)] return sort_uri_list_by_name(exit) def __getTopclasses(self, classPredicate=''): returnlist = [] for eachclass in self.__getAllClasses(classPredicate): x = self.get_classDirectSupers(eachclass) if not x: returnlist.append(eachclass) return sort_uri_list_by_name(returnlist) def __getTree(self, father=None, out=None): if not father: out = {} topclasses = self.toplayer out[0] = topclasses for top in topclasses: children = self.get_classDirectSubs(top) out[top] = children for potentialfather in children: self.__getTree(potentialfather, out) return out else: children = self.get_classDirectSubs(father) out[father] = children for ch in children: self.__getTree(ch, out) def __buildClassTree(self, father=None, out=None): if not father: out = {} topclasses = self.toplayer out[0] = [Thing] out[Thing] = sort_uri_list_by_name(topclasses) for top in topclasses: children = self.get_classDirectSubs(top) out[top] = sort_uri_list_by_name(children) for potentialfather in children: self.__buildClassTree(potentialfather, out) return out else: children = self.get_classDirectSubs(father) out[father] = sort_uri_list_by_name(children) for ch in children: self.__buildClassTree(ch, out) # methods for getting ancestores and descendants of classes: by default, we do not include blank nodes def get_classDirectSupers(self, aClass, excludeBnodes=True, sortUriName=False): returnlist = [] for o in self.rdfGraph.objects(aClass, RDFS.subClassOf): if not (o == Thing): if excludeBnodes: if not isBlankNode(o): returnlist.append(o) else: returnlist.append(o) if sortUriName: return sort_uri_list_by_name(remove_duplicates(returnlist)) else: return remove_duplicates(returnlist) def get_classDirectSubs(self, aClass, excludeBnodes=True): returnlist = [] for s, v, o in self.rdfGraph.triples((None, RDFS.subClassOf, aClass)): if excludeBnodes: if not isBlankNode(s): returnlist.append(s) else: returnlist.append(s) return sort_uri_list_by_name(remove_duplicates(returnlist)) def get_classSiblings(self, aClass, excludeBnodes=True): returnlist = [] for father in self.get_classDirectSupers(aClass, excludeBnodes): for child in self.get_classDirectSubs(father, excludeBnodes): if child != aClass: returnlist.append(child) return sort_uri_list_by_name(remove_duplicates(returnlist)) def entitySynonyms(self, anEntity, language=DEFAULT_LANGUAGE, getall=True): if getall: temp = [] # Uberon synonyms for o in self.rdfGraph.objects(anEntity, Synonym): temp += [o] # EFO synonyms for o in self.rdfGraph.objects(anEntity, EFO_Synonym): temp += [o] # OBI synonyms for o in self.rdfGraph.objects(anEntity, OBO_Synonym): temp += [o] return temp else: for o in self.rdfGraph.objects(anEntity, Synonym): if getattr(o, 'language') and getattr(o, 'language') == language: return o return "" def classFind(self, name, exact=False): temp = [] if name: for x in self.allclasses: if exact: if x.__str__().lower() == str(name).lower(): return [x] else: if x.__str__().lower().find(str(name).lower()) >= 0: temp.append(x) return temp