def buildhhear(ctx): setl_graph = Graph() setl_graph.parse('hhear-ontology.setl.ttl', format="turtle") cwd = os.getcwd() formats = ['ttl', 'owl', 'json'] ontology_output_files = [ setl_graph.resource(URIRef('file://' + cwd + '/hhear.' + x)) for x in formats ] print(len(setl_graph)) for filename in os.listdir(HHEAR_DIR): if not filename.endswith('.ttl') or filename.startswith('#'): continue print('Adding fragment', filename) fragment = setl_graph.resource(BNode()) for ontology_output_file in ontology_output_files: print(ontology_output_file.identifier, list(ontology_output_file[prov.wasGeneratedBy])) ontology_output_file.value(prov.wasGeneratedBy).add( prov.used, fragment) fragment.add(RDF.type, setlr.void.Dataset) fragment_extract = setl_graph.resource(BNode()) fragment.add(prov.wasGeneratedBy, fragment_extract) fragment_extract.add(RDF.type, setl.Extract) fragment_extract.add(prov.used, URIRef('file://' + HHEAR_DIR + filename)) setlr._setl(setl_graph)
def build(): setl_graph = Graph() setl_graph.load(SETL_FILE, format="turtle") cwd = os.getcwd() formats = ['ttl', 'owl', 'json'] ontology_output_files = [ setl_graph.resource(URIRef('file://' + cwd + '/hasco.' + x)) for x in formats ] for filename in os.listdir(HASCO_DIR): if not filename.endswith('.ttl'): continue print 'Adding fragment', filename fragment = setl_graph.resource(BNode()) for ontology_output_file in ontology_output_files: ontology_output_file.value(prov.wasGeneratedBy).add( prov.used, fragment) fragment.add(RDF.type, setlr.void.Dataset) fragment_extract = setl_graph.resource(BNode()) fragment.add(prov.wasGeneratedBy, fragment_extract) fragment_extract.add(RDF.type, setl.Extract) fragment_extract.add(prov.used, URIRef('file://' + HASCO_DIR + filename)) print setl_graph.serialize(format="turtle") setlr._setl(setl_graph)
def process(self, i, o): query_store = self.app.db.store if hasattr(query_store, 'endpoint'): query_store = database.create_query_store(self.app.db.store) db_graph = rdflib.ConjunctiveGraph(store=query_store) db_graph.NS = self.app.NS setlr.actions[whyis.sparql] = db_graph setlr.actions[whyis.NanopublicationManager] = self.app.nanopub_manager setlr.actions[whyis.Nanopublication] = self.app.nanopub_manager.new setl_graph = i.graph # setlr.run_samples = True resources = setlr._setl(setl_graph) # retire old copies old_np_map = {} to_retire = [] for new_np, assertion, orig in self.app.db.query( '''select distinct ?np ?assertion ?original_uri where { ?np np:hasAssertion ?assertion. ?assertion a np:Assertion; prov:wasGeneratedBy/a ?setl; prov:wasQuotedFrom ?original_uri. }''', initBindings=dict(setl=i.identifier), initNs=dict(prov=prov, np=np)): old_np_map[orig] = assertion to_retire.append(new_np) if len(to_retire) > 100: self.app.nanopub_manager.retire(*to_retire) to_retire = [] self.app.nanopub_manager.retire(*to_retire) # print resources for output_graph in setl_graph.subjects(prov.wasGeneratedBy, i.identifier): print(output_graph) if setl_graph.resource(output_graph)[rdflib.RDF.type:whyis. NanopublicationCollection]: self.app.nanopub_manager.publish(resources[output_graph]) else: out = resources[output_graph] out_conjunctive = rdflib.ConjunctiveGraph( store=out.store, identifier=output_graph) to_publish = [] triples = 0 for new_np in self.app.nanopub_manager.prepare( out_conjunctive): self.explain(new_np, i, o) to_publish.append(new_np) # triples += len(new_np) # if triples > 10000: self.app.nanopub_manager.publish(*to_publish) for resource, obj in list(resources.items()): if hasattr(i, 'close'): print("Closing", resource) try: i.close() except: pass
def convert_to_rdf(json_file): setl_graph = rdflib.Graph() setl_graph.parse(SETL_FILE,format="turtle") cwd = os.getcwd() extract = setl_graph.value(rdflib.URIRef('http://purl.org/twc/codegraph/setl/codegraph_json'), prov.wasGeneratedBy) setl_graph.add((extract, prov.used,rdflib.URIRef('file://'+os.path.join(cwd,json_file)))) results = setlr._setl(setl_graph) single_g = results[rdflib.URIRef('http://purl.org/twc/codegraph/setl/codegraph')] print("analyzed " + json_file) return single_g
def process(self, i, o): setl_graph = i.graph resources = setlr._setl(setl_graph) # retire old copies old_np_map = {} for new_np, assertion, orig in self.app.db.query( '''select distinct ?np ?assertion ?original_uri where { ?np np:hasAssertion ?assertion. ?assertion a np:Assertion; prov:wasGeneratedBy/a ?setl; prov:wasQuotedFrom ?original_uri. }''', initBindings=dict(setl=i.identifier), initNs=dict(prov=prov, np=np)): old_np_map[orig] = assertion self.app.nanopub_manager.retire(new_np) #print resources for output_graph in setl_graph.subjects(prov.wasGeneratedBy, i.identifier): out = resources[output_graph] out_conjunctive = rdflib.ConjunctiveGraph(store=out.store, identifier=output_graph) #print "Generated graph", out.identifier, len(out), len(out_conjunctive) mappings = {} for new_np in self.app.nanopub_manager.prepare(out_conjunctive, mappings=mappings): self.explain(new_np, i, o) print "Publishing", new_np.identifier orig = [ orig for orig, new in mappings.items() if new == new_np.assertion.identifier ] if len(orig) == 0: continue orig = orig[0] print orig if isinstance(orig, rdflib.URIRef): new_np.pubinfo.add((new_np.assertion.identifier, prov.wasQuotedFrom, orig)) if orig in old_np_map: new_np.pubinfo.add( (new_np.assertion.identifier, prov.wasRevisionOf, old_np_map[orig])) print "Nanopub assertion has", len( new_np.assertion), "statements." self.app.nanopub_manager.publish(new_np) print 'Published'
def convert_xml(debug=None): setl_graph = Graph() setl_graph.load(SETL_FILE, format="turtle") cwd = os.getcwd() if debug: files = [debug] else: files = os.listdir(XML_DIR)[:PROCESS_FILE_COUNT] for filename in files: try: print 'Processing', filename local_setl_graph = Graph() local_setl_graph += setl_graph input_file_resource = local_setl_graph.resource( URIRef('http://nanomine.tw.rpi.edu/setl/xml/nanomine_xml')) input_file_resource.value(prov.wasGeneratedBy).set( prov.used, URIRef('file://' + XML_DIR + filename)) output_file_resource = local_setl_graph.resource( URIRef(OUTPUT_DIR + filename.replace('.xml', '.trig'))) output_file_resource.set(prov.used, URIRef('file://' + OUTPUT_DIR + filename)) output_file_resource.set(dc['format'], Literal('trig')) output_file_resource.set(RDF.type, pv.File) generated_by = local_setl_graph.resource(BNode()) output_file_resource.set(prov.wasGeneratedBy, generated_by) generated_by.set(RDF.type, setl.Load) generated_by.set( prov.used, URIRef('http://nanomine.tw.rpi.edu/setl/xml/nanopubs')) resources = setlr._setl(local_setl_graph) for identifier, graph in resources.items(): if hasattr(graph, 'close'): print "Closing", identifier graph.close() except Exception as e: print e
def process(self, i, o): query_store = database.create_query_store(self.app.db.store) db_graph = rdflib.ConjunctiveGraph(store=query_store) db_graph.NS = self.app.NS setlr.actions[whyis.sparql] = db_graph setl_graph = i.graph #setlr.run_samples = True resources = setlr._setl(setl_graph) # retire old copies old_np_map = {} for new_np, assertion, orig in self.app.db.query( '''select distinct ?np ?assertion ?original_uri where { ?np np:hasAssertion ?assertion. ?assertion a np:Assertion; prov:wasGeneratedBy/a ?setl; prov:wasQuotedFrom ?original_uri. }''', initBindings=dict(setl=i.identifier), initNs=dict(prov=prov, np=np)): old_np_map[orig] = assertion self.app.nanopub_manager.retire(new_np) #print resources for output_graph in setl_graph.subjects(prov.wasGeneratedBy, i.identifier): out = resources[output_graph] out_conjunctive = rdflib.ConjunctiveGraph(store=out.store, identifier=output_graph) #print "Generated graph", out.identifier, len(out), len(out_conjunctive) nanopub_prepare_graph = rdflib.ConjunctiveGraph(store="Sleepycat") nanopub_prepare_graph_tempdir = tempfile.mkdtemp() nanopub_prepare_graph.store.open(nanopub_prepare_graph_tempdir, True) mappings = {} to_publish = [] triples = 0 for new_np in self.app.nanopub_manager.prepare( out_conjunctive, mappings=mappings, store=nanopub_prepare_graph.store): self.explain(new_np, i, o) orig = [ orig for orig, new in mappings.items() if new == new_np.assertion.identifier ] if len(orig) == 0: continue orig = orig[0] print orig if isinstance(orig, rdflib.URIRef): new_np.pubinfo.add((new_np.assertion.identifier, prov.wasQuotedFrom, orig)) if orig in old_np_map: new_np.pubinfo.add( (new_np.assertion.identifier, prov.wasRevisionOf, old_np_map[orig])) print "Publishing %s with %s assertions." % ( new_np.identifier, len(new_np.assertion)) to_publish.append(new_np) #triples += len(new_np) #if triples > 10000: self.app.nanopub_manager.publish(*to_publish) print 'Published'
def sdd2owl(c, inputfile, ontology, outputfile, infosheet="InfoSheet"): import openpyxl import re import pandas as pd import json cwd = os.getcwd() setl_graph = Graph() setl_graph.parse('sdd_owl_semantics.setl.ttl', format="turtle") sddns = rdflib.Namespace('http://purl.org/twc/sdd/setl/') tab_config = { "Data_Dictionary": sddns.dm_table, "Codebook": sddns.codebook_table, "Code_Mappings": sddns.codemapping_table, "Timeline": sddns.timeline_table } wb = openpyxl.load_workbook(inputfile) infosheet_tab = wb[infosheet] infosheet_dict = dict([(row[0].value, row[1].value) for row in infosheet_tab]) infosheet_resource = setl_graph.resource(sddns.info_sheet) infosheet_resource.add(rdflib.RDF.type, setl.Excel) infosheet_resource.add(setl.sheetname, rdflib.Literal(infosheet)) gen = infosheet_resource.value(prov.wasGeneratedBy) gen.add(prov.used, URIRef('file://' + os.path.join(cwd, inputfile))) for entry, uri in tab_config.items(): res = setl_graph.resource(uri) sheet = infosheet_dict[entry] gen = res.value(prov.wasGeneratedBy) if sheet.startswith('#'): res.add(rdflib.RDF.type, setl.Excel) res.add(setl.sheetname, rdflib.Literal(sheet[1:])) gen.add(prov.used, URIRef('file://' + os.path.join(cwd, inputfile))) else: res.add(rdflib.RDF.type, csvw.Table) gen.add(prov.used, URIRef(sheet)) context = { "@base": "http://purl.org/twc/ctxid/", "sio": "http://semanticscience.org/resource/", "chear": "http://hadatac.org/ont/chear#", "skos": "http://www.w3.org/2004/02/skos/core#", "prov": "http://www.w3.org/ns/prov#", "dc": "http://purl.org/dc/terms/", "cmo": "http://purl.obolibrary.org/obo/CMO_", "doid": "http://purl.obolibrary.org/obo/DOID_", "owl": "http://www.w3.org/2002/07/owl#", "rdfs": "http://www.w3.org/2000/01/rdf-schema#", "rdfs": "http://www.w3.org/2000/01/rdf-schema#", "chebi": "http://purl.obolibrary.org/obo/CHEBI_", "stato": "http://purl.obolibrary.org/obo/STATO_", "obo": "http://purl.obolibrary.org/obo/", "pubchem": "http://rdf.ncbi.nlm.nih.gov/pubchem/compound/", "dc": "http://purl.org/dc/terms/", "hasco": "http://hadatac.org/ont/hasco#", "vstoi": "http://hadatac.org/ont/vstoi#", "hasneto": "http://hadatac.org/ont/hasneto#", "uberon": "http://purl.obolibrary.org/obo/UBERON_", "prv": "http://hadatac.org/ont/prov#" } namespaces = load_namespaces() context.update(namespaces) setl_graph.add((sddns.metadata_transform, setl.hasContext, Literal(json.dumps(context)))) setl_graph.add((sddns.namespaces, prov.value, Literal("result = %s" % json.dumps(context)))) resources = setlr._setl(setl_graph) ont = resources[sddns.metadata] prefix = "http://purl.org/twc/ctxid/" mappings = {} for cls, identifier in ont.query('''select ?cls ?identifier where { ?cls a owl:Class; dc:identifier ?identifier. }''', initNs={ "owl": rdflib.OWL, "dc": dc }): if prefix in str(cls): id = cls.replace(prefix, '') integer_id = int(id, 16) ont.add((cls, skos.notation, rdflib.Literal(integer_id))) column, code = identifier.split('||||') print("Mapping (%s, %s) => %s" % (column, code, cls)) mappings[(column, code)] = "ctxid:" + id with open(ontology, 'wb') as o: ont.serialize(o, format="turtle") codebook = wb[infosheet_dict['Codebook'][1:]] header = None code_col = None class_col = None column_col = None for row in codebook.rows: if header is None: header = row for cell in header: if cell.value == "Code": code_col = cell.column - 1 if cell.value == "Class": class_col = cell.column - 1 if cell.value == "Column": column_col = cell.column - 1 print('Found Column Headers: Code==%s Class==%s Column=%s' % (code_col, class_col, column_col)) continue if (row[column_col].value == None): continue column = str(row[column_col].value) code = str(row[code_col].value) if (column, code) in mappings: print("%s.%s = %s" % (column, code, mappings[(column, code)])) row[class_col].value = mappings[(column, code)] wb.save(outputfile)