def _get_contributors(g, IRI): contributors = [] for s, p, o in g.triples((URIRef(IRI), DCTERMS.contributor, None)): contributors.append(o) for s, p, o in g.triples((URIRef(IRI), DC.contributor, None)): contributors.append(o) contributors.sort() return contributors
def _get_authors(g, IRI): authors = [] for s, p, o in g.triples((URIRef(IRI), DCTERMS.creator, None)): authors.append(o) for s, p, o in g.triples( (URIRef(IRI), DC.creator, None) ): authors.append(o) authors.sort() return authors
def _get_publishers(g, IRI): publishers = [] for s, p, o in g.triples((URIRef(IRI), DCTERMS.publisher, None)): publishers.append(o) for s, p, o in g.triples((URIRef(IRI), DC.publisher, None)): publishers.append(o) publishers.sort() return publishers
def RadiomicsStore(featureVector, exportDir, PatientID, ROI, export_format, export_name): if export_format == 'txt': json.dump( featureVector, open(exportDir + os.sep + "RF_" + '_' + ROI + PatientID + ".txt", 'w')) elif export_format == 'csv': with open(os.path.join(exportDir, export_name), 'ab') as mydata: w = csv.DictWriter(mydata, featureVector.keys()) w.writeheader() w.writerow(featureVector) elif export_format == 'rdf': #print "RDF Output:" g = Graph() # Create a rdflib graph object feature_type = [] # Create a list for feature type feature_uri = [] # Create a list for feature uri (ontology) # Load the radiomics_ontology mapping table pyradiomics_ro = os.path.join(os.getcwd(), 'RadiomicsOntology', 'RadiomicsOntology_Table.csv') with open(pyradiomics_ro, 'rb') as mydata: reader = csv.reader(mydata) for row in reader: feature_type.append(row[0]) feature_uri.append(row[1]) # Adding Radiomics Ontology to namespace ro = Namespace('http://www.radiomics.org/RO/') g.bind('ro', ro) CalculationRun = URIRef('http://www.radiomics.org/RO/0101') has_radiomics_feature = URIRef('http://www.radiomics.org/RO/0102') has_value = URIRef('http://www.radiomics.org/RO/0103') has_unit = URIRef('http://www.radiomics.org/RO/0104') #extract feature keys and values from featureVector that is the output of pyradiomcis radiomics_key = featureVector.keys() radiomics_value = featureVector.values() # A dictionary contains the radiomic feature that has a unit dict_unit = { 'original_shape_Volume': 'mm^3', 'original_shape_SurfaceArea': 'mm^2', 'original_shape_Maximum3DDiameter': 'mm' } # Adding to graph for i in range(len(radiomics_key)): ind = feature_type.index(radiomics_key[i]) tmp_uri = feature_uri[ind] tmp_value = Literal(radiomics_value[i]) feature_ontology = URIRef(tmp_uri) g.add((CalculationRun, has_radiomics_feature, feature_ontology)) g.add((feature_ontology, has_value, tmp_value)) # If radiomics have an unit, then add it to the graph. if radiomics_key[i] in dict_unit.keys(): tmp_unit = Literal(dict_unit[radiomics_key[i]]) g.add((feature_ontology, has_unit, tmp_unit)) g.serialize(exportDir + os.sep + export_name + ".ttl", format='turtle')
def _get_title(g, IRI): for s, p, o in g.triples((URIRef(IRI), DCTERMS.title, None)): return o for s, p, o in g.triples((URIRef(IRI), DC.title, None)): return o for s, p, o in g.triples((URIRef(IRI), SKOS.prefLabel, None)): return o for s, p, o in g.triples( ( URIRef(IRI), RDFS.label, None) ): return o
def connect_main_class_and_characteristics(organization, graph): url, data = fetch_data(organization) uri_ref = URIRef(url) graph.add((uri_ref, RDFS.Class, OPW[MAIN_CLASS])) for key, value in data.items(): if key != 'url': graph.add((uri_ref, URIRef(f'/{key}'), Literal(value))) return url, data
def test_addGenomicBackgroundToGenotype_adds_genotype(self): """ test that addGenomicBackgroundToGenotype() correctly assigns subject/object category """ genotype_id = "GENO:0000002" background_id = "GENO:0000002" # no idea what a good example background ID is self.genotype.addGenomicBackgroundToGenotype( background_id=background_id, genotype_id=genotype_id) geno_triples = list( self.graph.triples((URIRef(self.cutil.get_uri(genotype_id)), URIRef(self.test_cat_pred), URIRef(self.test_cat_genotype_category))))
def countSiblings(g, n): structprop = URIRef(args.str) siblings = [] for s, p, o in g.triples( (n, structprop, None) ): for s2, p2, o2 in g.triples( (None, structprop, o) ): siblings.append([s2, p2, o2]) return len(siblings)
def __semantic_point_citypulse(g, datapoint, id, stream, stream_node): point = URIRef('_'.join(['point', id, str(int(time()))])) g.add((point, RDF.type, SAO.Point)) timestamp = BNode() g.add((timestamp, RDF.type, TL.Instant)) g.add((timestamp, TL.at, Literal(datapoint['timestamp'], datatype=XSD.dateTime))) g.add((point, SAO.time, timestamp)) g.add((point, SAO.value, Literal(datapoint['value']))) unit = stream['unit'] type = stream['type'] if unit == "celsius": unit = "degree-Celsius" if unit == "Fahrenheit": unit = "degree-Fahrenheit" if unit != '': unit_node = UCUM.term(unit) else: unit_node = BNode() g.add((unit_node, RDF.type, OWL.Nothing)) g.add((point, SAO.hasUnitOfMeasurement, unit_node)) if type != '': type_node = UCUM.term(type) else: type_node = BNode() g.add((type_node, RDF.type, OWL.Nothing)) g.add((point, SSN.observedProperty, type_node)) g.add((point, SSN.featureOfInterest, stream_node)) return point
def recSKOSc(g, h, n, structprop = URIRef(args.str)): if n not in h: h[n] = [] for s, p, o in g.triples( (None, structprop, n) ): if s not in h[n]: h[n].append(s) recSKOSc(g, h, s)
def get_devilFruitG(graph): for dfruit in DEVIL_FRUITS: dfruit_object = get_devil_fruit(dfruit) dfruit_url = URIRef(dfruit_object['url']) print(dfruit_object['type_url']) print(dfruit) graph.add((dfruit_url, RDF.type, OPW.Devil_Fruit)) for key in dfruit_object.keys(): if key != 'url': if key == 'type': graph.add((dfruit_url, RDF.type, URIRef(dfruit_object['type_url']))) if key not in ['type', 'type_url', 'user']: graph.add((dfruit_url, properties[key], Literal(dfruit_object[key]))) return graph
def test_addGenotype(self): cutil = CurieUtil(self.curie_map) gid = 'MGI:5515892' label = \ 'Pmp22<Tr-2J>/Pmp22<+> [C57BL/6J-Pmp22<Tr-2J>/GrsrJ]' self.genotype.addGenotype(gid, label) self.assertTrue((URIRef(cutil.get_uri(gid)), RDFS['label'], Literal(label)) in self.genotype.graph)
def write_mappings(): # Include all pairs: # ll = pair_association(unique_pairs, loglike) # Only pairs with frequency >= 5: unique_pars_5 = np.array([a.split('!') for a, b in pc.items() if b >= 5]) ll = pair_association(unique_pars_5, loglike) print 'Write mappings' lls = sorted(ll.items(), key=lambda x: x[1]) ww = np.array(ll.values()) mn = ww.mean() print "- Mean LL is {:2f}".format(mn) print "- {:.2f} % is >= mean LL".format( float(ww[ww >= mn].shape[0]) / ww.shape[0]) print "- {:.2f} % is < mean LL".format( float(ww[ww < mn].shape[0]) / ww.shape[0]) # Whether to lookup DDC labels and add them to the mapping sheet addDdcLabels = False if addDdcLabels: # Load WebDewey data g = Graph() for x in glob('../../webdewey/DDK23/*.ttl'): print x g.load(x, format='turtle') fsj = re.compile('.*\(Form\)') with open('mappings.csv', 'w') as f: writer = csv.writer(f, delimiter='\t') for x in lls[::-1]: if x[1] < mn: break q = x[0].split('!', 1) if fsj.match(q[0]): # Utelat form continue if addDdcLabels: lab = g.preferredLabel( URIRef('http://dewey.info/class/' + q[1] + '/e23/'), labelProperties=[SKOS.prefLabel, SKOS.altLabel]) if len(lab) != 0: lab = lab[0][1].value else: lab = '(no label)' # Term, Dewey, Dewey Caption, Loglike writer.writerow([q[0], q[1], lab.encode('utf-8'), x[1]]) else: # Term, Dewey, Loglike writer.writerow([q[0], q[1], x[1]])
def get_devilTypeFruitG(g): for devilType in devilType_objects: devilType_ = URIRef(devilType["url"]) g.add((devilType_, RDFS.Class,OPW.Devil_Fruit)) for key in devilType.keys(): if key != 'url': g.add((devilType_, properties[key], Literal(devilType[key]))) return g
def map_organizations(graph): for organization_node in constants.ORGANIZATIONS: if isinstance(organization_node, str): connect_main_class_and_characteristics(organization_node, graph) elif isinstance(organization_node, dict): value = organization_node['value'] sub_items = organization_node['sub_items'] superior_class, _ = connect_main_class_and_characteristics( value, graph) for sub_item in sub_items: url, data = connect_main_class_and_characteristics( sub_item, graph) graph.add((URIRef(url), RDFS.Class, URIRef(superior_class))) return graph
def test_addGenomicBackgroundToGenotype_adds_categories(self): """ test that addGenomicBackgroundToGenotype() correctly assigns subject/object category """ genotype_id = "GENO:0000002" background_id = "GENO:0000002" # no idea what a good example background ID is self.genotype.addGenomicBackgroundToGenotype( background_id=background_id, genotype_id=genotype_id) geno_triples = list( self.graph.triples((URIRef(self.cutil.get_uri(genotype_id)), URIRef(self.test_cat_pred), URIRef(self.test_cat_genotype_category)))) self.assertEqual( len(geno_triples), 1, "addTriples() didn't make exactly 1 genotype category triple") self.assertEqual( geno_triples[0][2], URIRef(self.test_cat_genotype_category), "addTriples() didn't assign the right genotype category") background_triples = list( self.graph.triples((URIRef(self.cutil.get_uri(background_id)), URIRef(self.test_cat_pred), URIRef(self.test_cat_background_category)))) self.assertEqual( len(background_triples), 1, "addTriples() didn't make exactly 1 genotype category triple") self.assertEqual( background_triples[0][2], URIRef(self.test_cat_background_category), "addTriples() didn't assign the right background category") # does not compile # def test_addParts(self): # """ # """ # if part_relationship is None: # part_relationship = self.globaltt['has_part'] # # Fail loudly if parent or child identifiers are None # if parent_id is None: # raise TypeError('Attempt to pass None as parent') # elif part_id is None: # raise TypeError('Attempt to pass None as child') # elif part_relationship is None: # part_relationship = self.globaltt['has_part'] # # self.graph.addTriple(parent_id, part_relationship, part_id, # subject_category=subject_category, # object_category=object_category) return
def test_addGenotype(self): from rdflib.namespace import RDFS, URIRef from rdflib import Literal from dipper.utils.CurieUtil import CurieUtil cutil = CurieUtil(self.curie_map) gid = 'MGI:5515892' label = \ 'Pmp22<Tr-2J>/Pmp22<+> [C57BL/6J-Pmp22<Tr-2J>/GrsrJ]' self.genotype.addGenotype(gid, label) self.assertTrue((URIRef(cutil.get_uri(gid)), RDFS['label'], Literal(label)) in self.genotype.graph)
def graph(self): uri = URIRef(self.uri) g = Graph() g.add((uri, RDF.type, PROV.SoftwareAgent)) for name in self.name: g.add((uri, FOAF.name, Literal(name))) for version in self.version: g.add((uri, SCHEMA.softwareVersion, Literal(version))) for homepage in self.homepage: g.add((uri, FOAF.homepage, Literal(homepage))) return g
def graph(self): uri = URIRef(self.uri) g = Graph() g.add((uri, RDF.type, PROV.Organization)) for name in self.name: g.add((uri, FOAF.name, Literal(name))) for homepage in self.homepage: g.add((uri, FOAF.homepage, Literal(homepage))) return g
def countArticlesChildren(g, h, n, r, dcsubject = URIRef(args.member)): # How many articles in this node and r children levels if n not in h: return countArticles(g, n, dcsubject) else: if r > 0: childCounts = [] for child in h[n]: childCounts.append(countArticlesChildren(g, h, child, r - 1, dcsubject)) return countArticles(g, n, dcsubject) + sum(childCounts) else: return countArticles(g, n, dcsubject)
def get_oceans(g): for ocean in oceans: ocean_object = get_geography_data(ocean) ocean_ = URIRef(ocean_object["uriRef"]) g.add((ocean_, RDFS.Class, OPW.Blue_Sea)) for key in ocean_object: if key != 'uriRef': g.add((ocean_, properties[key], Literal(ocean_object[key]))) return g
def countArticlesChildren(g, h, n, r, dcsubject = URIRef("http://purl.org/dc/terms/subject")): # How many articles in this node and r children levels if n not in h: return countArticles(g, n, dcsubject) else: if r > 0: childCounts = [] for child in h[n]: childCounts.append(countArticlesChildren(g, h, child, r - 1, dcsubject)) return countArticles(g, n, dcsubject) + sum(childCounts) else: return countArticles(g, n, dcsubject)
def __semantic_stream(g, stream, id): stream_node = URIRef(id) g.add((stream_node, RDF.type, SSN.Sensor)) g.add((stream_node, RDF.type, FOAF.Person)) g.add((stream_node, RDF.ID, Literal(id))) accuracy = BNode() g.add((accuracy, RDF.type, SSN.Accuracy)) g.add((accuracy, DUL.hasDataValue, Literal(stream['accuracy']))) g.add((stream_node, SSN.hasMeasurementProperty, accuracy)) deployment = BNode() g.add((deployment, RDF.type, SSN.Deployment)) g.add((deployment, DUL.hasEventDate, Literal(stream['creation_date'], datatype=XSD.date))) g.add((stream_node, SSN.hasDeployment, deployment)) g.add((stream_node, FOAF.depiction, Literal(stream['description']))) lon = stream['location']['lon'] lat = stream['location']['lat'] location = BNode() g.add((location, RDF.type, GEO.Point)) g.add((location, GEO.hasDataValue, Literal(lon))) g.add((stream_node, FOAF.based_near, location)) g.add((location, RDF.type, GEO.Point)) g.add((location, GEO.hasDataValue, Literal(lat))) g.add((stream_node, FOAF.based_near, location)) observation = BNode() g.add((observation, RDF.type, SSN.Observation)) g.add((observation, DUL.hasEventDate, Literal(stream['last_updated'], datatype=XSD.dateTime))) g.add((stream_node, SSN.madeObservation, observation)) g.add((stream_node, FOAF.name, Literal(stream['name']))) g.add((stream_node, SSN.observes, Literal(stream['type']))) # theFeatureOfInterest = BNode() # g.add((observation, SSN.featureOfInterest, theFeatureOfInterest) # g.add((stream_node, SSN.observes, observation))) unit = BNode() g.add((unit, RDF.type, DUL.UnitOfMeasure)) g.add((unit, DUL.hasParameterDataValue, Literal(stream['unit']))) g.add((stream_node, SSN.hasProperty, unit)) maker = BNode() g.add((maker, RDF.type, FOAF.Person)) g.add((maker, RDF.ID, Literal(stream['user_id']))) g.add((stream_node, FOAF.maker, maker))
def setUp(self): self.curie_map = curie_map.get() cu = CurieUtil(self.curie_map) # Fake credentials as these tests do not require a database connection database = 'foo' user = '******' password = '******' self.cgd = CGD(database, user, password) test_data = ((387, 'MLH1 any mutation', 13, 'Adenocarcinoma', None, 'Colon', 'no response', 1, '5FU-based adjuvant therapy', 'late trials', '20498393'),) self.cgd.add_disease_drug_variant_to_graph(test_data) (variant_key, variant_label, diagnoses_key, diagnoses, specific_diagnosis, organ, relationship, drug_key, drug, therapy_status, pubmed_id) = test_data[0] source_id = "PMID:{0}".format(pubmed_id) variant_id = self.cgd.make_cgd_id('variant{0}'.format(variant_key)) disease_id = self.cgd.make_cgd_id('disease{0}{1}'.format(diagnoses_key, diagnoses)) relationship_id = "RO:has_environment" disease_quality = ("CGD:{0}".format(relationship)).replace(" ", "_") has_quality_property = "BFO:0000159" drug_id = self.cgd.make_cgd_id('drug{0}'.format(drug_key)) disease_instance_id = self.cgd.make_cgd_id('phenotype{0}{1}{2}'.format( diagnoses, variant_key, relationship)) variant_disease_annot = self.cgd.make_cgd_id("assoc{0}{1}".format(variant_key, diagnoses)) # Set up URIs self.source_uri = URIRef(cu.get_uri(source_id)) self.variant_uri = URIRef(cu.get_uri(variant_id)) self.disease_uri = URIRef(cu.get_uri(disease_id)) self.disease_ind_uri = URIRef(cu.get_uri(disease_instance_id)) self.relationship_uri = URIRef(cu.get_uri(relationship_id)) self.drug_uri = URIRef(cu.get_uri(drug_id)) self.vd_annot_uri = URIRef(cu.get_uri(variant_disease_annot)) self.disease_quality_uri = URIRef(cu.get_uri(disease_quality)) self.variant_label = variant_label self.disease_label = diagnoses self.disease_instance_label = "{0} with {1} to therapy".format(diagnoses, relationship) self.drug_label = drug return
def graph(self): uri = URIRef(self.uri) g = Graph() g.add((uri, RDF.type, PROV.Person)) for name in self.name: g.add((uri, FOAF.name, Literal(name))) for institution in self.institution: g.add((uri, FOAF.member, PROVIT[institution])) for homepage in self.homepage: g.add((uri, FOAF.homepage, Literal(homepage))) for email in self.email: g.add((uri, FOAF.mbox, Literal(email))) return g
def __semantic_datapoint(g, datapoint, id): datapoint_node = URIRef(id) g.add((datapoint_node, RDF.type, SSN.SensorOutput)) g.add((datapoint_node, RDF.ID, Literal(id))) sensor = BNode() g.add((sensor, RDF.type, SSN.Sensor)) g.add((sensor, RDF.ID, Literal(datapoint['stream_id']))) g.add((datapoint_node, SSN.isProducedBy, sensor)) value = BNode() g.add((value, RDF.type, SSN.ObservationValue)) g.add((value, DUL.hasDataValue, Literal(datapoint['value']))) g.add((datapoint_node, SSN.hasValue, value)) g.add((datapoint_node, DUL.hasEventDate, Literal(datapoint['timestamp'], datatype=XSD.dateTime)))
def fillLabelsC(l, t, n, g): labelProp = URIRef(args.label) nodeStack = [] nodeStack.append(n) doneNodeSet = set() while nodeStack: c = nodeStack.pop() if c not in l: l[c] = [] for s, p, o in g.triples( (c, labelProp, None) ): l[c].append(o) # print o.encode('utf-8') doneNodeSet.add(c) if c in t: for child in t[c]: if child not in doneNodeSet: nodeStack.append(child)
def test_associations(self): """ Given the above sample input, produce the following: CGD:VariantID has_phenotype(RO:0002200) CGD:DiseaseInstance A CGD:AssociationID OBO:RO_0002558 Traceable Author Statement (ECO:0000033) A CGD:AssociationID dc:source PMID:20498393 A CGD:AssociationID has_environment CGD:DrugID A CGD:AssociationID OBAN:association_has_subject CGD:VariantID A CGD:AssociationID OBAN:association_has_object_property has_phenotype A CGD:AssociationID OBAN:association_has_object CGD:DiseaseInstance """ from dipper.utils.TestUtils import TestUtils # Make testutils object and load bindings cu = CurieUtil(self.curie_map) test_env = TestUtils(self.cgd.graph) self.cgd.load_bindings() evidence = 'OBO:ECO_0000033' evidence_uri = URIRef(cu.get_uri(evidence)) sparql_query = """ SELECT ?diseaseInd ?variant ?drug ?vdannot ?source ?evidence WHERE {{ ?variant OBO:RO_0002200 ?diseaseInd . ?vdannot a OBAN:association ; OBO:RO_0002558 ?evidence ; dc:source ?source ; <{0}> ?drug ; OBAN:association_has_object ?diseaseInd ; OBAN:association_has_object_property OBO:RO_0002200 ; OBAN:association_has_subject ?variant . }} """.format(self.relationship_uri) # Expected Results expected_results = [[self.disease_ind_uri, self.variant_uri, self.drug_uri, self.vd_annot_uri, self.source_uri, evidence_uri]] # Query graph sparql_output = test_env.query_graph(sparql_query) self.assertEqual(expected_results, sparql_output)
def __semantic_stream_citypulse(g, id): es = __connect() results = es.search(index=__INDEX, doc_type='stream', params={'q': '_id:' + lucene_escape(id)}) stream = results[__HITS][__HITS][0][__SOURCE] stream_node = URIRef('_'.join(['stream', id, str(int(time()))])) g.add((stream_node, RDF.type, SSN.FeatureOfInterest)) first_node = BNode() g.add((first_node, RDF.type, CT.Node)) lon = stream['location']['lon'] lat = stream['location']['lat'] g.add((first_node, CT.hasLongtitude, Literal(Decimal(lon)))) g.add((first_node, CT.hasLatitude, Literal(Decimal(lat)))) g.add((first_node, CT.hasNodeName, Literal(stream['name']))) g.add((stream_node, CT.hasFirstNode, first_node)) return stream, stream_node
def test_genotype_labels(self): """ test that genotype label is set correctly after parse() """ if self.source is not None: test_resource_dir = "../../tests/resources/zfin/" self.source.files['fish_components']['file'] = test_resource_dir + \ "genotype-label-test-fish_components_fish.txt" self.source.files['backgrounds']['file'] = test_resource_dir + \ "genotype-label-test-genotype_backgrounds.txt" self.source.files['geno']['file'] = test_resource_dir + \ "genotype-label-test-genotype_features.txt" self.source.parse() this_iri = URIRef("http://zfin.org/ZDB-GENO-070228-3") expect_genotype_label = "shha<sup>tbx392/tbx392</sup> (AB)" self.assertEqual(str(self.source.testgraph.label(this_iri, None)), expect_genotype_label)