def test_abox(self): with warnings.catch_warnings(): warnings.simplefilter("ignore", pronto.warnings.NotImplementedWarning) ont = pronto.Ontology(self.get_path("abox")) self.assertEqual(len(ont.terms()), 3) # Person (implicit), Male and Female
import json import pronto from fairshake_assessments.core import metric from fairshake_assessments.utils.jsonld_frame import jsonld_frame from fairshake_assessments.utils.force_list import force_list from fairshake_assessments.utils.IRI_to_NS import IRI_to_NS from fairshake_assessments.utils.fetch_and_cache import fetch_and_cache NCBITaxon = pronto.Ontology( fetch_and_cache('http://purl.obolibrary.org/obo/ncbitaxon.obo', '.cache/ncbitaxon.obo')) NCBITaxon_reversed = { node.name: node.id for node in map(NCBITaxon.get, NCBITaxon) if node } NCBITaxon_reversed_synonyms = { synonym: node.id for node in map(NCBITaxon.get, NCBITaxon) if node for synonym in node.synonyms } @metric({ '@id': 143, 'name': 'Taxonomy', 'description': 'A taxonomy is present with a valid NCBITaxon identifier', 'principle': 'Interoperable', }) def metric_143_ncbitaxon(doc): taxonomies = list( map(
import pandas import pronto import csv import re from pronto import Relationship aro = pronto.Ontology(snakemake.params["ontology_aro"]) ro = pronto.Ontology(snakemake.params["ontology_ro"]) mo = pronto.Ontology(snakemake.params["ontology_mo"]) cat = snakemake.params["aro_categories"] categories = pandas.read_csv(cat, sep="\t") resistance_mechanism = categories.loc[categories['ARO Category'] == 'Resistance Mechanism'] obo2resistance_mechanism = pandas.Series( resistance_mechanism['ARO Name'].values, index=resistance_mechanism['ARO Accession']).to_dict() aro.merge(ro) aro.merge(mo) ontology_results = [] rgi_results = pandas.read_csv(snakemake.input['rgi'], sep="\t") def get_all_path(obo_list, all_obo): new_obo = [] for obo in obo_list: new_obo += obo.parents
'198346': '41688', '22473': '32988', '3669': '16822', '23008': '16646', '3736': '48947', '578003': '65172' } """has_input has_intermediate has_output has_participant has_primary_input has_primary_input_or_output has_primary_output""" ont = pronto.Ontology('/home/ralf/go-ontology/src/ontology/go-edit.obo') check = True for term in ont.terms.values(): goid = term.id if goid[:3] != 'GO:': continue goit = efs.get(goid) if goit is None: continue ns = term.other.get('namespace') if ns is None: continue if ns[0] != 'biological_process' and ns[0] != 'molecular_function': continue l = []
def psimi_loader(property_file, step): logging.info("property file loading starts") output_folder = config_output(property_file) logging.info("property file loading ends") logging.info("psimi obo file downloading starts") obo_file = wget_obo_url(output_folder) logging.info("psimi obo file downloading ends") logging.info("obo file loading starts") mi = pronto.Ontology(obo_file) #mi = pronto.Ontology.from_obo_library("mi.obo") #can do it online also ont_keys = mi.keys() ont_vals = mi.values() ont_repr = mi.__repr__() ont_rel = mi.relationships() #print("nb mi\t"+str(len(mi))) #print("nb terms\t"+str(len(ont_terms))) #print("nb relationships\t"+str(len(ont_rel))) ont_terms = mi.terms() with open("cv-psi-mi.xml", 'w') as outputf: root = etree.Element("object-stream") for method in ont_terms: m_obsolete = method.obsolete if m_obsolete == False: m_obsolete = "VALID" elif m_obsolete == True: continue #do not write if obsolete method #subclasses = mi[method.id].subclasses(with_self=False) #for i in subclasses: # print (i) #parent_node = mi[method.id].is_leaf() #print (parent_node) cvtermwrapper_data = etree.SubElement( root, "com.genebio.nextprot.dataloader.cv.Cvtermwrapper_Data") psimi = etree.SubElement(cvtermwrapper_data, "termCategory") psimi.text = "PSI-MI" wrappedbean = etree.SubElement(cvtermwrapper_data, "wrappedBean") m_name = "![CDATA[" + method.name + "]]" cvname = etree.SubElement(wrappedbean, "cvname") cvname.text = m_name m_def = "![CDATA[" + method.definition + "]]" description = etree.SubElement(wrappedbean, "description") description.text = m_def status = etree.SubElement(wrappedbean, "status") status.text = m_obsolete dbxref = etree.SubElement(wrappedbean, "dbXref") resourcetype = etree.SubElement(dbxref, "resourceType") resourcetype.text = "DATABASE" m_id = "![CDATA[" + method.id + "]]" accession = etree.SubElement(dbxref, "accession") accession.text = m_id cvdatabase = etree.SubElement(dbxref, "cvDatabase") cvname = etree.SubElement(cvdatabase, "cvName") cvname.text = "PSI_MI" ## cvTermSynonyms cvtermsynonyms = etree.SubElement(wrappedbean, "cvTermSynonyms") print("=======================\t" + method.id) synonym_frozenset = method.synonyms if (len(synonym_frozenset) > 0): for synonym_obj in synonym_frozenset: mystring = synonym_obj.__repr__() description = mystring.split("',")[0].split("(")[1][1:] ismain_bool = "true" if "SynonymType" in mystring: #if empty ismain_bool = true also alternate = mystring.split("SynonymType('")[1].split( "'")[0] if "alternate" in alternate: ismain_bool = "false" cvtermsynonym_data = etree.SubElement( cvtermsynonyms, "com.genebio.nextprot.datamodel.cv.CvTermSynonym") synonymname = etree.SubElement(cvtermsynonym_data, "synonymName") synonymname.text = description synonymtype = etree.SubElement(cvtermsynonym_data, "synonymType") synonymtype.text = "NAME" #only value found in nextprot xml files ismain = etree.SubElement(cvtermsynonym_data, "isMain") ismain.text = ismain_bool #true if not alternate cvtermref = etree.SubElement(cvtermsynonym_data, "cvTerm", reference="../../..") ## relationships relationships = etree.SubElement(cvtermwrapper_data, "relationships") relationship_frozendict = method.relationships for relationship in relationship_frozendict.items(): if (len(relationship) > 0): typedef = relationship[0] mi_name = relationship[1] for i in mi_name: mi_id = "![CDATA[" + i.id + "]]" #mi_name = i.name ##not needed relationship_data = etree.SubElement( relationships, "com.genebio.nextprot.dataloader.cv.Relationship") relationship_elt = etree.SubElement( relationship_data, "relationship") if (typedef.name == "is a"): relationship_elt.text = "is_a" elif (typedef.name == "part of"): relationship_elt.text = "part_of" accession = etree.SubElement(relationship_data, "accession") accession.text = mi_id termcategory = etree.SubElement( relationship_data, "termCategory") termcategory.text = "![CDATA[]]" #print (method.subsets) #also part_of Drugable and PSI-MI_slim ## secondary ACs secondaryacs = etree.SubElement(cvtermwrapper_data, "secondaryAcs") secondaryac_frozenset = method.alternate_ids if (len(secondaryac_frozenset) > 0): for secondaryac_obj in secondaryac_frozenset: secondaryac = secondaryac_obj.__repr__() string_ac = etree.SubElement(secondaryacs, "string") string_ac.text = "![CDATA[" + secondaryac + "]]" ## cv xrefs and dbxref cvxrefs = etree.SubElement(cvtermwrapper_data, "cvXrefs") externaldbxrefs = etree.SubElement(cvtermwrapper_data, "externalDbXrefs") m_xref = method.definition.xrefs for xref in m_xref: db_ac_list = xref.id.split(":") db = db_ac_list[0] ac = db_ac_list[1] #print(db+"\t"+ac) if (db.upper() == "PUBMED"): externaldbxref_data = etree.SubElement( externaldbxrefs, "com.genebio.nextprot.dataloader.cv.CvXref") accession = etree.SubElement(externaldbxref_data, "accession") dbname = etree.SubElement(externaldbxref_data, "dbName") accession.text = "![CDATA[" + ac + "]]" dbname.text = "PubMed" if (db.upper() == "DOI"): externaldbxref_data = etree.SubElement( externaldbxrefs, "com.genebio.nextprot.dataloader.cv.CvXref") accession = etree.SubElement(externaldbxref_data, "accession") dbname = etree.SubElement(externaldbxref_data, "dbName") accession.text = "![CDATA[" + ac + "]]" dbname.text = "DOI" if (db.upper() == "RESID"): externaldbxref_data = etree.SubElement( externaldbxrefs, "com.genebio.nextprot.dataloader.cv.CvXref") accession = etree.SubElement(externaldbxref_data, "accession") dbname = etree.SubElement(externaldbxref_data, "dbName") accession.text = "![CDATA[" + ac + "]]" dbname.text = "RESID" if (db.upper() == "GO"): #does not exist in dbxref but cvxref cvxref_data = etree.SubElement( cvxrefs, "com.genebio.nextprot.dataloader.cv.CvXref") accession = etree.SubElement(cvxref_data, "accession") dbname = etree.SubElement(cvxref_data, "dbName") accession.text = "![CDATA[" + xref.id + "]]" dbname.text = "GO" #if (db.upper() == "SO" ): #does not exist in more dbxref and cvxref # print (method) # accession.text = "![CDATA["+xref.id+"]]" # dbname.text = "SO" #print("=======================\t"+method.id) xmlstr = etree.tostring(root, xml_declaration=True, encoding="UTF-8", pretty_print=True).decode() outputf.write(xmlstr) logging.info("obo file loading ends")
def setUp(self): self.ont = ont = pronto.Ontology() self.t1 = self.create_entity(ont, "TST:001") self.t2 = self.create_entity(ont, "TST:002") self.t3 = self.create_entity(ont, "TST:003") self.t4 = self.create_entity(ont, "TST:004")
#!/usr/bin/env python3 import pronto ont = pronto.Ontology('go.owl') term_obj = ont['GO:0006355'] term_name = term_obj.name print(term_obj) print(term_name)
def test_whole_ontology(self): warnings.simplefilter("ignore") path = os.path.join(__file__, "..", "..", "data", "iao.owl") iao = pronto.Ontology(os.path.realpath(path)) self.assertEqual(len(iao.terms()), 245)
parser.add_argument( '--umlsConceptFile', required=True, type=str, help='Path on the MRCONSO.RRF file in UMLS metathesaurus') parser.add_argument('--outFile', required=True, type=str, help='Path to output wordlist file') args = parser.parse_args() print "Loading metathesaurus..." metathesaurus = loadMetathesaurus(args.umlsConceptFile) print "Loading disease ontology..." ont = pronto.Ontology(args.ontologyFile) #cancerTerm = findTerm(ont,'cancer') print "Loading stopwords..." with codecs.open(args.stopwordsFile, 'r', 'utf8') as f: stopwords = [line.strip().lower() for line in f] stopwords = set(stopwords) print "Processing" allterms = [] # Skip down to the grandchildren of the cancer term and then find all their descendents (recursive children) count = 0 for term in ont: # Get the CUIDs for this term cuids = getCUIDs(term)
def setUp(self): self.ont = ont = pronto.Ontology() self.t1 = ont.create_term("TST:001") self.t2 = ont.create_term("TST:002") self.t3 = ont.create_term("TST:003") self.has_part = ont.create_relationship("has_part")
(count, IdList) = getCountAndIdList(searchExpression) print("Search for ", termId, ":", searchExpression, " returned ", count, "hits") return ((termId, name, count, IdList)) def getCountForSubBranch(onto, rootTermId, results={}): term = onto[rootTermId] subclasses = [sc for sc in term.subclasses(distance=1)] for sc in subclasses: if sc.id == rootTermId: continue elif sc.id in results.keys(): continue else: time.sleep( 0.2 ) # Be sure not to hit the Entrez limit of queries per second results[sc.id] = getCountForTermId(onto, sc.id) results = getCountForSubBranch(onto, sc.id, results) return (results) if __name__ == '__main__': onto = pronto.Ontology("addicto.obo") termId = "ADDICTO:0000279" # root -- product results = getCountForSubBranch(onto, termId)
parser = argparse.ArgumentParser(description='Generate term list from Disease Ontology and UMLS Metathesarus for cancer-specific terms') parser.add_argument('--diseaseOntologyFile', required=True, type=str, help='Path to the Disease Ontology OBO file') parser.add_argument('--cancerStopwords',required=True,type=str,help='File containing cancer terms to ignore') parser.add_argument('--umlsConceptFile', required=True, type=str, help='Path on the MRCONSO.RRF file in UMLS metathesaurus') parser.add_argument('--customAdditions', required=False, type=str, help='Some custom additions to the wordlist') parser.add_argument('--customDeletions', required=False, type=str, help='Some custom deletions from the wordlist') parser.add_argument('--outFile', required=True, type=str, help='Path to output wordlist file') args = parser.parse_args() print("Loading metathesaurus...") metathesaurus = loadMetathesaurus(args.umlsConceptFile) metathesaurusMainTerm = { terms[0].lower():cuid for cuid,terms in metathesaurus.items() } print("Loading disease ontology...") ont = pronto.Ontology(args.diseaseOntologyFile) cancerRoot = ont.get('DOID:162') print("Loading cancer stopwords...") with codecs.open(args.cancerStopwords,'r','utf8') as f: cancerstopwords = [ line.strip().lower() for line in f ] cancerstopwords = set(cancerstopwords) customAdditions = defaultdict(list) if args.customAdditions: print("Loading additions...") with codecs.open(args.customAdditions,'r','utf-8') as f: for line in f: termid,singleterm,terms = line.strip().split('\t') customAdditions[termid] += terms.split('|') customDeletions = defaultdict(list)
import pronto from glob import glob aro = pronto.Ontology('aro.owl') name2id = {} for t in aro.terms(): name2id[t.name] = t.id for syn in t.synonyms: assert syn.scope == 'EXACT' name2id[syn.description] = t.id name2id = {k.lower(): v for k, v in name2id.items()} matched = [] unmatched = [] for fname in glob('notes.txt'): for line in open(fname): gene = line.split(':')[0] if gene.lower() in name2id: matched.append(gene) elif gene[:3] == 'bla': gene = gene[4:] if gene.lower() in name2id: matched.append(gene) else: unmatched.append(gene) frac = (len(matched) / (len(unmatched) + len(matched))) print( f'Matched {len(matched)} of {len(matched)+len(unmatched)} ({frac:.2%}) of identifiers' )
def test_nucleus(self): ont = pronto.Ontology(self.get_path("equivNodeSetTest"))
#!/usr/bin/env python3 import sys import pronto GO_GENES = sys.argv[1] MY_GO_ID = sys.argv[2] # create an object of class Ontology with the GO ontology ont = pronto.Ontology('/Users/smr/Desktop/PFB2017/pfb2017/files/go.owl') # get the term name of the provided GO ID term_obj = ont[MY_GO_ID] term_name = term_obj.name print("These genes have all been annotated with" , MY_GO_ID + ', "' + term_name + '" or any of its child terms' ) # add the parent GO ID to dictionary of IDs to search for all_children={} all_children[MY_GO_ID] = term_name # add all children of the parent term to dictionary for child in ont[MY_GO_ID].rchildren(): all_children[child.id] = child.name # open genes file and add gene names and their annotatoted GO terms to a dictionary genes = {} file = open(GO_GENES , "r") for line in file: line = line.rstrip() columns = line.split("\t")
# Create an object of class type 'Ontology' with the gene ontology owl file. # Get the term name for a specific Gene ontology accession (i.e., GO:0006355) from the command line # Use 'pronto rchildren()' to retrieve all children terms of your term and to store the ID of each term in a dictionary with the term name as the value # [use the 'id' method to retrieve the id of each term object that is returned by 'rchildren()'] # Also add the parent term to the dictionary # Open a file generated from biomart with these columns: # Gene or transcript ID # GO TERM ID # GO TERM Name # Check to see if each gene's GO TERM is the provided parent GO TERM ID or one of its children. import pronto import sys GO_term = sys.argv[1] ont = pronto.Ontology('/Users/admin/go.owl') term_obj = ont[GO_term] term_name = term_obj.name print("These genes have all been annotated with" , GO_term + ', "' + term_name + '" or any of its child terms' ) all_children = {} for child in ont[GO_term].rchildren(): all_children[child.id] = child.name all_children[GO_term] = term_name print(len(all_children))
def test_length(self): path = os.path.join(utils.DATADIR, "hp.obo") hp = pronto.Ontology(path, import_depth=0, threads=1) self.assertEqual(len(hp["HP:0009882"].alternate_ids), 10)
def test_repr_new(self): ont = pronto.Ontology() self.assertEqual(repr(ont), "Ontology()")
print('duplicate items for ChEBI:{}: {} {}'.format( ch, it, items.get(ch)), file=sys.stderr) dups.add(ch) continue items[ch] = it if taut is None: continue t = tauts.get(ch) if t is not None: t.add(taut) else: tauts[ch] = set([taut]) print('reading ChEBI...', file=sys.stderr) ont = pronto.Ontology('chebi.obo') ref = {STATED_IN: CHEBI_RELEASE} for ch in items.keys(): term = ont.get('CHEBI:' + ch) if term is None: print('Obsolete CHEBI:{} on {}'.format(ch, items.get(ch)), file=sys.stderr) continue if ch in dups: print('duplicate requested ChEBI subject: {}'.format(ch), file=sys.stderr) continue tt = tauts.get(ch) for rel in term.relationships: if rel.name == 'is tautomer of': tset = term.relationships.get(rel)
def test_repr_path(self): path = os.path.join(DATADIR, "pato.obo") ont = pronto.Ontology(path) self.assertEqual(repr(ont), "Ontology({!r})".format(path))
def AddSoftwareEngineerTerms(self): print( "********************************* Adding SoftwareEngineer Terms to the ontology **************************************" ) target = os.path.join(APP_ROOT, 'ExampleOntology\\') if not os.path.isdir(target): os.mkdir(target) target = "".join([target, "nmrCVTest.owl"]) ontSoftwareEngineer = pronto.Ontology(target) # t1 = Term('ONT:001', 'my 1st term', 'this is my first term') # t2 = Term('ONT:0002', 'my 2nd term', 'this is my second term', {Relationship('part_of'): ['ONT:001']}) #Here we prepare key value dataset to enter to the ontology #START termList = {} termList[ 'Software engineering'] = 'Software engineering is an engineering branch associated with software system development' termList[ 'Computer software'] = 'Computer software is a complete package, which includes software program, its documentation and user guide on how to use the software' termList[ 'computer program'] = 'A computer program is piece of programming code which performs a well defined task' termList[ 'Software Development Life Cycle(SDLC)'] = 'Software Development Life Cycle, or software process is the systematic development of software by following every stage in the development process namely, Requirement Gathering, System Analysis, Design, Coding, Testing, Maintenance and Documentation in that order' termList[ 'Software project management'] = 'Software project management is process of managing all activities like time, cost and quality management involved in software development' termList[ 'software project manager'] = 'A software project manager is a person who undertakes the responsibility of carrying out the software project' termList[ 'Software scope'] = 'Software scope is a well-defined boundary, which encompasses all the activities that are done to develop and deliver the software product. The software scope clearly defines all functionalities and artifacts to be delivered as a part of the software. The scope identifies what the product will do and what it will not do, what the end product will contain and what it will not contain' termList[ 'Baseline'] = 'Baseline is a measurement that defines completeness of a phase. After all activities associated with a particular phase are accomplished, the phase is complete and acts as a baseline for next phase' termList[ 'Software Configuration management'] = 'Software Configuration management is a process of tracking and controlling the changes in software in terms of the requirements, design, functions and development of the product' termList[ 'measure project execution'] = 'We can measure project execution by means of Activity Monitoring, Status Reports and Milestone Checklists' termList['feasibility study'] = {} termList['feasibility study'][ 'feasibility study'] = 'It is a measure to assess how practical and beneficial the software project development will be for an organization. The software analyzer conducts a thorough study to understand economic, technical and operational feasibility of the project' termList['feasibility study'][ 'Economic'] = 'Resource transportation, cost for training, cost of additional utilities and tools and overall estimation of costs and benefits of the project' termList['feasibility study'][ 'Technical'] = 'Checking whether Is it possible to develop this system ? Assessing suitability of machine(s) and operating system(s) on which software will execute, existing developers’ knowledge and skills, training, utilities or tools for project' termList['feasibility study'][ 'Operational'] = 'Checking whether Can the organization adjust smoothly to the changes done as per the demand of project ? Is the problem worth solving ?' termList[ 'gather requirements'] = 'Requirements can be gathered from users via interviews, surveys, task analysis, brainstorming, domain analysis, prototyping, studying existing usable version of software, and by observation' termList[ 'SRS or Software Requirement Specification'] = 'SRS or Software Requirement Specification is a document produced at the time of requirement gathering process. It can be also seen as a process of refining requirements and documenting them' termList[ 'functional requirements'] = ' Functional requirements are functional features and specifications expected by users from the proposed software product' termList[ 'non-functional requirements'] = 'Non-functional requirements are implicit and are related to security, performance, look and feel of user interface, interoperability, cost etc' termList[ 'concurrency'] = 'Concurrency is the tendency of events or actions to happen simultaneously. In software, when two or more processes execute simultaneously, they are called concurrent processes' termList[ 'cohesion'] = 'Cohesion is a measure that defines the degree of intra-dependability among the elements of the module' termList[ 'Coupling '] = 'Coupling is a measure that defines the level of inter-dependability among modules of a program' termList[ 'Data dictionary'] = 'Data dictionary is referred to as meta-data. Meaning, it is a repository of data about data. Data dictionary is used to organize the names and their references used in system such as objects and files along with their naming conventions' termList[ 'Structured design'] = 'Structured design is a conceptualization of problem into several well-organized elements of solution. It is concern with the solution design and based on ‘divide and conquer’ strategy' termList[ 'top-down and bottom-up design model'] = 'Top-down model starts with generalized view of system and decomposes it to more specific ones, whereas bottom-up model starts with most specific and basic components first and keeps composing the components to get higher level of abstraction' termList[ 'functional programming'] = 'Functional programming is style of programming language, which uses the concepts of mathematical function. It provides means of computation as mathematical functions, which produces results irrespective of program state' termList['acid properties'] = {} termList['acid properties'][ 'acid properties'] = 'ACID is an acronym for atomicity, consistency, isolation, and durability' termList['acid properties'][ 'Atomicity '] = 'This property states that a transaction must be treated as an atomic unit, that is, either all of its operations are executed or none. There must be no state in a database where a transaction is left partially completed. States should be defined either before the execution of the transaction or after the execution/abortion/failure of the transaction' termList['acid properties'][ 'Consistency '] = 'The database must remain in a consistent state after any transaction. No transaction should have any adverse effect on the data residing in the database. If the database was in a consistent state before the execution of a transaction, it must remain consistent after the execution of the transaction as well' termList['acid properties'][ 'Durability '] = 'The database should be durable enough to hold all its latest updates even if the system fails or restarts. If a transaction updates a chunk of data in a database and commits, then the database will hold the modified data. If a transaction commits but the system fails before the data could be written on to the disk, then that data will be updated once the system springs back into action' termList['acid properties'][ 'Isolation '] = 'In a database system where more than one transaction are being executed simultaneously and in parallel, the property of isolation states that all the transactions will be carried out and executed as if it is the only transaction in the system. No transaction will affect the existence of any other transaction' termList[ 'JVM'] = 'JVM is an acronym for Java Virtual Machine, it is an abstract machine which provides the runtime environment in which java bytecode can be executed. It is a specification. JVMs are available for many hardware and software platforms (so JVM is platform dependent)' termList[ 'JRE'] = 'JRE stands for Java Runtime Environment. It is the implementation of JVM' termList[ 'JDK'] = 'JDK is an acronym for Java Development Kit. It physically exists. It contains JRE + development tools' termList[ 'Just-In-Time(JIT) compiler'] = 'It is used to improve the performance. JIT compiles parts of the byte code that have similar functionality at the same time, and hence reduces the amount of time needed for compilation.Here the term “compiler” refers to a translator from the instruction set of a Java virtual machine (JVM) to the instruction set of a specific CPU' termList[ 'platform'] = 'A platform is basically the hardware or software environment in which a program runs. There are two types of platforms software-based and hardware-based. Java provides software-based platform' termList[ 'classloader'] = 'The classloader is a subsystem of JVM that is used to load classes and interfaces.There are many types of classloaders e.g. Bootstrap classloader, Extension classloader, System classloader, Plugin classloader etc' termList[ 'difference between object oriented programming language and object based programming language'] = 'Object based programming languages follow all the features of OOPs except Inheritance. Examples of object based programming languages are JavaScript, VBScript etc' termList[ 'constructor'] = 'Constructor is just like a method that is used to initialize the state of an object. It is invoked at the time of object creation' #END #Here we add above prepared key value pairs to the ontology #START x = 0 termListObject = {} for key in termList: if (isinstance(key, str)): x = x + 1 ontVal = 'ONT:' + str(x).zfill(6) termListObject[ontVal] = Term('ONT:' + str(x).zfill(6), key, termList[key]) else: for innerKey in termList[key]: if (isinstance(innerKey, str)): x = x + 1 ontValInner = 'ONT:' + str(x).zfill(6) termListObject[ontValInner] = Term( 'ONT:' + str(x).zfill(6), innerKey, termList[key][key], {Relationship('part_of'): [ontVal]}) else: for innerInnerKey in termList[key][innerKey]: x = x + 1 ontValInnerInner = 'ONT:' + str(x).zfill(6) termListObject[ontValInnerInner] = Term( 'ONT:' + str(x).zfill(6), innerInnerKey, termList[key][innerKey][innerInnerKey], {Relationship('part_of'): [ontValInner]}) #END #Here we add terms to the ontology using pronto include term #START for key in termListObject: ontSoftwareEngineer.include( termListObject[key]) #ontSoftwareEngineer.include(t1, t2) #END print(ontSoftwareEngineer.obo) print( "********************************* The Json Obj **************************************" ) print(ontSoftwareEngineer.json) print( "********************************* The Json Obj **************************************" ) for r in Relationship.topdown(): print(r) return ontSoftwareEngineer
def test_repr_path_with_import_depth(self): path = os.path.join(DATADIR, "pato.obo") ont = pronto.Ontology(path, import_depth=1) self.assertEqual(repr(ont), "Ontology({!r}, import_depth=1)".format(path))
import pronto #nmr = pronto.Ontology('http://nmrml.org/cv/v1.1.0/nmrCV.owl') #ms = pronto.Ontology('https://raw.githubusercontent.com/HUPO-PSI/psi-ms-CV/master/psi-ms.obo') #ms.merge(nmr) #print(nmr.json) #gaz = pronto.Ontology('http://ontologies.berkeleybop.org/gaz.owl') #print "printing thar last thing" #print 'NMR:1400302' in ms #print ms.json #thing= ms['UO:0010039'] #print thing.json #print thing.obo #json should be a method for a term just as it is a method for the whole ontology sd = pronto.Ontology( 'https://raw.githubusercontent.com/SDG-InterfaceOntology/sdgio/master/sdgio.owl' ) print('SDGIO:00000061' in sd) #gaz = pronto.Ontology('http://ontologies.berkeleybop.org/gaz.obo') envo = pronto.Ontology( 'https://raw.githubusercontent.com/EnvironmentOntology/envo/master/envo.obo' ) print('SDGIO:00000061' in envo) envo.merge(sd) print('SDGIO:00000061' in envo)
def test_repr_file(self): path = os.path.join(DATADIR, "pato.obo") with open(path, "rb") as src: ont = pronto.Ontology(src) self.assertEqual(repr(ont), "Ontology({!r})".format(path))
import json import pronto from fairshake_assessments.core import metric from fairshake_assessments.utils.jsonld_frame import jsonld_frame from fairshake_assessments.utils.force_list import force_list from fairshake_assessments.utils.IRI_to_NS import IRI_to_NS from fairshake_assessments.utils.fetch_and_cache import fetch_and_cache EDAM = pronto.Ontology(fetch_and_cache('http://edamontology.org/EDAM.owl', '.cache/EDAM.owl')) EDAM_reversed = { node.name: node.id for node in map(EDAM.get, EDAM) if node } EDAM_reversed_synonyms = { synonym: node.id for node in map(EDAM.get, EDAM) if node for synonym in node.synonyms } @metric({ '@id': 142, 'name': 'File type', 'description': 'A file type is present with a valid EDAM identifier', 'principle': 'Interoperable', }) def metric_142_edam(doc): filetypes = list(map(json.loads,set( json.dumps({ 'value': information['value'], 'valueIRI': information['valueIRI'], }) for node in jsonld_frame(doc, { '@type': 'Dataset', 'types': { 'information': { 'value': { '@default': '' }, 'valueIRI': { '@default': '' }
def test_repr_file_handle(self): path = os.path.join(DATADIR, "pato.obo") with open(path, "rb") as src: handle = io.BytesIO(src.read()) ont = pronto.Ontology(handle) self.assertEqual(repr(ont), "Ontology({!r})".format(handle))
addicto_files = [] for root, dirs_list, files_list in os.walk(path): for file_name in files_list: full_file_name = os.path.join(root, file_name) addicto_files.append(full_file_name) entries = {} bad_entries = [] revisionmsg = "February 2022 ADDICTO release" # All the external content + hierarchy is in the file addicto_external.obo # Must be parsed and sent to AOVocab. externalonto = pronto.Ontology("addicto_external.obo") for term in externalonto.terms(): label_id_map[term.name] = term.id # get the "ready" input data, indexed by ID for filename in addicto_files: with open(filename, 'r') as csvfile: csvreader = csv.reader(csvfile) header = next(csvreader) for row in csvreader: rowdata = row id = rowdata[0] entries[id] = (header, rowdata) ### Update entries with new data
def setUpClass(cls): warnings.simplefilter('error') warnings.simplefilter('ignore', category=UnicodeWarning) with open(os.path.join(DATADIR, "ms.obo"), "rb") as f: cls.ms = pronto.Ontology(cls.file)
process.append(name + ' ' +'('+x+')') except: pass unique_terms = np.unique(process) return(unique_terms) ############################# ^ FOR GO ontology only ^ ############################# ############################## Upload ontology files ############################### # fly anatomy ontology file: http://www.obofoundry.org/ontology/fbbt.html fly_anatomy_ont = pronto.Ontology('/Users/maayanlab/Downloads/fbbt.obo.txt') # fly phenotype ontology: http://www.obofoundry.org/ontology/dpo.html fly_phenotype_ont = pronto.Ontology('/Users/maayanlab/Downloads/fbcv.obo.txt') # worm anatomy ontology file: http://www.obofoundry.org/ontology/wbbt.html worm_anatomy_ont = pronto.Ontology('/Users/maayanlab/Downloads/wbbt.owl') # worm phenptype file: http://www.obofoundry.org/ontology/wbphenotype.html worm_phenotype_ont = pronto.Ontology('/Users/maayanlab/Downloads/wbphenotype.obo.txt') # zebrafish anatomy ontology file: http://www.obofoundry.org/ontology/zfa.html zebrafish_anatomy_ont = pronto.Ontology('/Users/maayanlab/Downloads/zfa.obo.txt') # zebrafish phenotype ontology file: https://zfin.org/downloads/gene_expression_phenotype.txt zebrafish_phenotype_ont = pd.DataFrame.from_csv('/Users/maayanlab/Downloads/phenotype_fish_2018.10.19.txt',sep= '\t', header=1).reset_index() # yeast cellular component ontology : http://www.geneontology.org/ontology/subsets/goslim_yeast.obo yeast_anatomy_ont = pronto.Ontology('/Users/maayanlab/Downloads/goslim_yeast.obo.txt')
def test_remote_owl_imports(self): """Try to import a remote owl ontology with its imports """ owl = pronto.Ontology("http://purl.obolibrary.org/obo/xao.owl") self.check_ontology(owl)