Example #1
0
 def test_abox(self):
     with warnings.catch_warnings():
         warnings.simplefilter("ignore", pronto.warnings.NotImplementedWarning)
         ont = pronto.Ontology(self.get_path("abox"))
     self.assertEqual(len(ont.terms()), 3) # Person (implicit), Male and Female
import json
import pronto
from fairshake_assessments.core import metric
from fairshake_assessments.utils.jsonld_frame import jsonld_frame
from fairshake_assessments.utils.force_list import force_list
from fairshake_assessments.utils.IRI_to_NS import IRI_to_NS
from fairshake_assessments.utils.fetch_and_cache import fetch_and_cache

NCBITaxon = pronto.Ontology(
    fetch_and_cache('http://purl.obolibrary.org/obo/ncbitaxon.obo',
                    '.cache/ncbitaxon.obo'))
NCBITaxon_reversed = {
    node.name: node.id
    for node in map(NCBITaxon.get, NCBITaxon) if node
}
NCBITaxon_reversed_synonyms = {
    synonym: node.id
    for node in map(NCBITaxon.get, NCBITaxon) if node
    for synonym in node.synonyms
}


@metric({
    '@id': 143,
    'name': 'Taxonomy',
    'description': 'A taxonomy is present with a valid NCBITaxon identifier',
    'principle': 'Interoperable',
})
def metric_143_ncbitaxon(doc):
    taxonomies = list(
        map(
import pandas
import pronto
import csv
import re
from pronto import Relationship

aro = pronto.Ontology(snakemake.params["ontology_aro"])
ro = pronto.Ontology(snakemake.params["ontology_ro"])
mo = pronto.Ontology(snakemake.params["ontology_mo"])
cat = snakemake.params["aro_categories"]
categories = pandas.read_csv(cat, sep="\t")

resistance_mechanism = categories.loc[categories['ARO Category'] ==
                                      'Resistance Mechanism']

obo2resistance_mechanism = pandas.Series(
    resistance_mechanism['ARO Name'].values,
    index=resistance_mechanism['ARO Accession']).to_dict()

aro.merge(ro)
aro.merge(mo)

ontology_results = []

rgi_results = pandas.read_csv(snakemake.input['rgi'], sep="\t")


def get_all_path(obo_list, all_obo):
    new_obo = []
    for obo in obo_list:
        new_obo += obo.parents
Example #4
0
    '198346': '41688',
    '22473': '32988',
    '3669': '16822',
    '23008': '16646',
    '3736': '48947',
    '578003': '65172'
}
"""has_input
has_intermediate
has_output
has_participant
has_primary_input
has_primary_input_or_output
has_primary_output"""

ont = pronto.Ontology('/home/ralf/go-ontology/src/ontology/go-edit.obo')
check = True

for term in ont.terms.values():
    goid = term.id
    if goid[:3] != 'GO:':
        continue
    goit = efs.get(goid)
    if goit is None:
        continue
    ns = term.other.get('namespace')
    if ns is None:
        continue
    if ns[0] != 'biological_process' and ns[0] != 'molecular_function':
        continue
    l = []
Example #5
0
def psimi_loader(property_file, step):

    logging.info("property file loading starts")
    output_folder = config_output(property_file)
    logging.info("property file loading ends")

    logging.info("psimi obo file downloading starts")
    obo_file = wget_obo_url(output_folder)
    logging.info("psimi obo file downloading ends")

    logging.info("obo file loading starts")
    mi = pronto.Ontology(obo_file)
    #mi = pronto.Ontology.from_obo_library("mi.obo") #can do it online also

    ont_keys = mi.keys()
    ont_vals = mi.values()
    ont_repr = mi.__repr__()
    ont_rel = mi.relationships()
    #print("nb mi\t"+str(len(mi)))
    #print("nb terms\t"+str(len(ont_terms)))
    #print("nb relationships\t"+str(len(ont_rel)))

    ont_terms = mi.terms()

    with open("cv-psi-mi.xml", 'w') as outputf:

        root = etree.Element("object-stream")

        for method in ont_terms:

            m_obsolete = method.obsolete
            if m_obsolete == False:
                m_obsolete = "VALID"
            elif m_obsolete == True:
                continue  #do not write if obsolete method

            #subclasses = mi[method.id].subclasses(with_self=False)
            #for i in subclasses:
            #    print (i)

            #parent_node = mi[method.id].is_leaf()
            #print (parent_node)

            cvtermwrapper_data = etree.SubElement(
                root, "com.genebio.nextprot.dataloader.cv.Cvtermwrapper_Data")
            psimi = etree.SubElement(cvtermwrapper_data, "termCategory")
            psimi.text = "PSI-MI"
            wrappedbean = etree.SubElement(cvtermwrapper_data, "wrappedBean")

            m_name = "![CDATA[" + method.name + "]]"
            cvname = etree.SubElement(wrappedbean, "cvname")
            cvname.text = m_name

            m_def = "![CDATA[" + method.definition + "]]"
            description = etree.SubElement(wrappedbean, "description")
            description.text = m_def

            status = etree.SubElement(wrappedbean, "status")
            status.text = m_obsolete

            dbxref = etree.SubElement(wrappedbean, "dbXref")
            resourcetype = etree.SubElement(dbxref, "resourceType")
            resourcetype.text = "DATABASE"

            m_id = "![CDATA[" + method.id + "]]"
            accession = etree.SubElement(dbxref, "accession")
            accession.text = m_id

            cvdatabase = etree.SubElement(dbxref, "cvDatabase")
            cvname = etree.SubElement(cvdatabase, "cvName")
            cvname.text = "PSI_MI"

            ## cvTermSynonyms
            cvtermsynonyms = etree.SubElement(wrappedbean, "cvTermSynonyms")
            print("=======================\t" + method.id)
            synonym_frozenset = method.synonyms
            if (len(synonym_frozenset) > 0):
                for synonym_obj in synonym_frozenset:
                    mystring = synonym_obj.__repr__()
                    description = mystring.split("',")[0].split("(")[1][1:]

                    ismain_bool = "true"
                    if "SynonymType" in mystring:  #if empty ismain_bool = true also
                        alternate = mystring.split("SynonymType('")[1].split(
                            "'")[0]
                        if "alternate" in alternate:
                            ismain_bool = "false"

                    cvtermsynonym_data = etree.SubElement(
                        cvtermsynonyms,
                        "com.genebio.nextprot.datamodel.cv.CvTermSynonym")
                    synonymname = etree.SubElement(cvtermsynonym_data,
                                                   "synonymName")
                    synonymname.text = description
                    synonymtype = etree.SubElement(cvtermsynonym_data,
                                                   "synonymType")
                    synonymtype.text = "NAME"  #only value found in nextprot xml files
                    ismain = etree.SubElement(cvtermsynonym_data, "isMain")
                    ismain.text = ismain_bool  #true if not alternate
                    cvtermref = etree.SubElement(cvtermsynonym_data,
                                                 "cvTerm",
                                                 reference="../../..")

            ## relationships
            relationships = etree.SubElement(cvtermwrapper_data,
                                             "relationships")

            relationship_frozendict = method.relationships
            for relationship in relationship_frozendict.items():
                if (len(relationship) > 0):
                    typedef = relationship[0]
                    mi_name = relationship[1]
                    for i in mi_name:
                        mi_id = "![CDATA[" + i.id + "]]"
                        #mi_name = i.name ##not needed

                        relationship_data = etree.SubElement(
                            relationships,
                            "com.genebio.nextprot.dataloader.cv.Relationship")
                        relationship_elt = etree.SubElement(
                            relationship_data, "relationship")

                        if (typedef.name == "is a"):
                            relationship_elt.text = "is_a"
                        elif (typedef.name == "part of"):
                            relationship_elt.text = "part_of"

                        accession = etree.SubElement(relationship_data,
                                                     "accession")
                        accession.text = mi_id
                        termcategory = etree.SubElement(
                            relationship_data, "termCategory")
                        termcategory.text = "![CDATA[]]"

            #print (method.subsets) #also part_of Drugable and PSI-MI_slim

            ## secondary ACs
            secondaryacs = etree.SubElement(cvtermwrapper_data, "secondaryAcs")

            secondaryac_frozenset = method.alternate_ids
            if (len(secondaryac_frozenset) > 0):
                for secondaryac_obj in secondaryac_frozenset:
                    secondaryac = secondaryac_obj.__repr__()

                    string_ac = etree.SubElement(secondaryacs, "string")
                    string_ac.text = "![CDATA[" + secondaryac + "]]"

            ## cv xrefs and dbxref
            cvxrefs = etree.SubElement(cvtermwrapper_data, "cvXrefs")
            externaldbxrefs = etree.SubElement(cvtermwrapper_data,
                                               "externalDbXrefs")

            m_xref = method.definition.xrefs
            for xref in m_xref:
                db_ac_list = xref.id.split(":")
                db = db_ac_list[0]
                ac = db_ac_list[1]
                #print(db+"\t"+ac)

                if (db.upper() == "PUBMED"):
                    externaldbxref_data = etree.SubElement(
                        externaldbxrefs,
                        "com.genebio.nextprot.dataloader.cv.CvXref")
                    accession = etree.SubElement(externaldbxref_data,
                                                 "accession")
                    dbname = etree.SubElement(externaldbxref_data, "dbName")
                    accession.text = "![CDATA[" + ac + "]]"
                    dbname.text = "PubMed"

                if (db.upper() == "DOI"):
                    externaldbxref_data = etree.SubElement(
                        externaldbxrefs,
                        "com.genebio.nextprot.dataloader.cv.CvXref")
                    accession = etree.SubElement(externaldbxref_data,
                                                 "accession")
                    dbname = etree.SubElement(externaldbxref_data, "dbName")
                    accession.text = "![CDATA[" + ac + "]]"
                    dbname.text = "DOI"

                if (db.upper() == "RESID"):
                    externaldbxref_data = etree.SubElement(
                        externaldbxrefs,
                        "com.genebio.nextprot.dataloader.cv.CvXref")
                    accession = etree.SubElement(externaldbxref_data,
                                                 "accession")
                    dbname = etree.SubElement(externaldbxref_data, "dbName")
                    accession.text = "![CDATA[" + ac + "]]"
                    dbname.text = "RESID"

                if (db.upper() == "GO"):  #does not exist in dbxref but cvxref
                    cvxref_data = etree.SubElement(
                        cvxrefs, "com.genebio.nextprot.dataloader.cv.CvXref")
                    accession = etree.SubElement(cvxref_data, "accession")
                    dbname = etree.SubElement(cvxref_data, "dbName")
                    accession.text = "![CDATA[" + xref.id + "]]"
                    dbname.text = "GO"

                #if (db.upper() == "SO" ): #does not exist in more dbxref and cvxref
                #    print (method)
                #    accession.text = "![CDATA["+xref.id+"]]"
                #    dbname.text = "SO"

            #print("=======================\t"+method.id)

        xmlstr = etree.tostring(root,
                                xml_declaration=True,
                                encoding="UTF-8",
                                pretty_print=True).decode()
        outputf.write(xmlstr)

    logging.info("obo file loading ends")
Example #6
0
 def setUp(self):
     self.ont = ont = pronto.Ontology()
     self.t1 = self.create_entity(ont, "TST:001")
     self.t2 = self.create_entity(ont, "TST:002")
     self.t3 = self.create_entity(ont, "TST:003")
     self.t4 = self.create_entity(ont, "TST:004")
Example #7
0
#!/usr/bin/env python3

import pronto

ont = pronto.Ontology('go.owl')

term_obj = ont['GO:0006355']
term_name = term_obj.name


print(term_obj)
print(term_name)





Example #8
0
 def test_whole_ontology(self):
     warnings.simplefilter("ignore")
     path = os.path.join(__file__, "..", "..", "data", "iao.owl")
     iao = pronto.Ontology(os.path.realpath(path))
     self.assertEqual(len(iao.terms()), 245)
Example #9
0
    parser.add_argument(
        '--umlsConceptFile',
        required=True,
        type=str,
        help='Path on the MRCONSO.RRF file in UMLS metathesaurus')
    parser.add_argument('--outFile',
                        required=True,
                        type=str,
                        help='Path to output wordlist file')
    args = parser.parse_args()

    print "Loading metathesaurus..."
    metathesaurus = loadMetathesaurus(args.umlsConceptFile)

    print "Loading disease ontology..."
    ont = pronto.Ontology(args.ontologyFile)
    #cancerTerm = findTerm(ont,'cancer')

    print "Loading stopwords..."
    with codecs.open(args.stopwordsFile, 'r', 'utf8') as f:
        stopwords = [line.strip().lower() for line in f]
        stopwords = set(stopwords)

    print "Processing"
    allterms = []
    # Skip down to the grandchildren of the cancer term and then find all their descendents (recursive children)
    count = 0
    for term in ont:
        # Get the CUIDs for this term
        cuids = getCUIDs(term)
Example #10
0
 def setUp(self):
     self.ont = ont = pronto.Ontology()
     self.t1 = ont.create_term("TST:001")
     self.t2 = ont.create_term("TST:002")
     self.t3 = ont.create_term("TST:003")
     self.has_part = ont.create_relationship("has_part")
Example #11
0
    (count, IdList) = getCountAndIdList(searchExpression)
    print("Search for ", termId, ":", searchExpression, " returned ", count,
          "hits")
    return ((termId, name, count, IdList))


def getCountForSubBranch(onto, rootTermId, results={}):
    term = onto[rootTermId]
    subclasses = [sc for sc in term.subclasses(distance=1)]
    for sc in subclasses:
        if sc.id == rootTermId:
            continue
        elif sc.id in results.keys():
            continue
        else:
            time.sleep(
                0.2
            )  # Be sure not to hit the Entrez limit of queries per second
            results[sc.id] = getCountForTermId(onto, sc.id)
            results = getCountForSubBranch(onto, sc.id, results)
    return (results)


if __name__ == '__main__':

    onto = pronto.Ontology("addicto.obo")

    termId = "ADDICTO:0000279"  # root -- product

    results = getCountForSubBranch(onto, termId)
	parser = argparse.ArgumentParser(description='Generate term list from Disease Ontology and UMLS Metathesarus for cancer-specific terms')
	parser.add_argument('--diseaseOntologyFile', required=True, type=str, help='Path to the Disease Ontology OBO file')
	parser.add_argument('--cancerStopwords',required=True,type=str,help='File containing cancer terms to ignore')
	parser.add_argument('--umlsConceptFile', required=True, type=str, help='Path on the MRCONSO.RRF file in UMLS metathesaurus')
	parser.add_argument('--customAdditions', required=False, type=str, help='Some custom additions to the wordlist')
	parser.add_argument('--customDeletions', required=False, type=str, help='Some custom deletions from the wordlist')
	parser.add_argument('--outFile', required=True, type=str, help='Path to output wordlist file')
	args = parser.parse_args()

	print("Loading metathesaurus...")
	metathesaurus = loadMetathesaurus(args.umlsConceptFile)
	metathesaurusMainTerm = { terms[0].lower():cuid for cuid,terms in metathesaurus.items() }

	print("Loading disease ontology...")
	ont = pronto.Ontology(args.diseaseOntologyFile)
	cancerRoot = ont.get('DOID:162')

	print("Loading cancer stopwords...")
	with codecs.open(args.cancerStopwords,'r','utf8') as f:
		cancerstopwords = [ line.strip().lower() for line in f ]
		cancerstopwords = set(cancerstopwords)

	customAdditions = defaultdict(list)
	if args.customAdditions:
		print("Loading additions...")
		with codecs.open(args.customAdditions,'r','utf-8') as f:
			for line in f:
				termid,singleterm,terms = line.strip().split('\t')
				customAdditions[termid] += terms.split('|')
	customDeletions = defaultdict(list)
import pronto
from glob import glob
aro = pronto.Ontology('aro.owl')

name2id = {}
for t in aro.terms():
    name2id[t.name] = t.id
    for syn in t.synonyms:
        assert syn.scope == 'EXACT'
        name2id[syn.description] = t.id

name2id = {k.lower(): v for k, v in name2id.items()}

matched = []
unmatched = []
for fname in glob('notes.txt'):
    for line in open(fname):
        gene = line.split(':')[0]
        if gene.lower() in name2id:
            matched.append(gene)
        elif gene[:3] == 'bla':
            gene = gene[4:]
            if gene.lower() in name2id:
                matched.append(gene)
        else:
            unmatched.append(gene)

frac = (len(matched) / (len(unmatched) + len(matched)))
print(
    f'Matched {len(matched)} of {len(matched)+len(unmatched)} ({frac:.2%}) of identifiers'
)
Example #14
0
 def test_nucleus(self):
     ont = pronto.Ontology(self.get_path("equivNodeSetTest"))
#!/usr/bin/env python3

import sys
import pronto

GO_GENES = sys.argv[1]
MY_GO_ID = sys.argv[2]

# create an object of class Ontology with the GO ontology 
ont = pronto.Ontology('/Users/smr/Desktop/PFB2017/pfb2017/files/go.owl')

# get the term name of the provided GO ID
term_obj = ont[MY_GO_ID]
term_name = term_obj.name
print("These genes have all been annotated with" , MY_GO_ID + ', "' + term_name + '" or any of its child terms' )


# add the parent GO ID to dictionary of IDs to search for
all_children={}
all_children[MY_GO_ID] = term_name

# add all children of the parent term to dictionary
for child in ont[MY_GO_ID].rchildren():
  all_children[child.id] = child.name

# open genes file and add gene names and their annotatoted GO terms to a dictionary 
genes = {}
file = open(GO_GENES , "r")
for line in file:
  line = line.rstrip()
  columns = line.split("\t")
# Create an object of class type 'Ontology' with the gene ontology owl file.
# Get the term name for a specific Gene ontology accession (i.e., GO:0006355) from the command line
# Use 'pronto rchildren()' to retrieve all children terms of your term and to store the ID of each term in a dictionary with the term name as the value
# [use the 'id' method to retrieve the id of each term object that is returned by 'rchildren()']
# Also add the parent term to the dictionary

# Open a file generated from biomart with these columns:
#    Gene or transcript ID
#    GO TERM ID
#    GO TERM Name
# Check to see if each gene's GO TERM is the provided parent GO TERM ID or one of its children.    


import pronto
import sys

GO_term = sys.argv[1]

ont = pronto.Ontology('/Users/admin/go.owl')
term_obj = ont[GO_term]
term_name = term_obj.name
print("These genes have all been annotated with" , GO_term + ', "' + term_name + '" or any of its child terms' )


all_children = {}
for child in ont[GO_term].rchildren():
    all_children[child.id] = child.name
all_children[GO_term] = term_name

print(len(all_children))
Example #17
0
 def test_length(self):
     path = os.path.join(utils.DATADIR, "hp.obo")
     hp = pronto.Ontology(path, import_depth=0, threads=1)
     self.assertEqual(len(hp["HP:0009882"].alternate_ids), 10)
Example #18
0
 def test_repr_new(self):
     ont = pronto.Ontology()
     self.assertEqual(repr(ont), "Ontology()")
        print('duplicate items for ChEBI:{}: {} {}'.format(
            ch, it, items.get(ch)),
              file=sys.stderr)
        dups.add(ch)
        continue
    items[ch] = it
    if taut is None:
        continue
    t = tauts.get(ch)
    if t is not None:
        t.add(taut)
    else:
        tauts[ch] = set([taut])

print('reading ChEBI...', file=sys.stderr)
ont = pronto.Ontology('chebi.obo')
ref = {STATED_IN: CHEBI_RELEASE}
for ch in items.keys():
    term = ont.get('CHEBI:' + ch)
    if term is None:
        print('Obsolete CHEBI:{} on {}'.format(ch, items.get(ch)),
              file=sys.stderr)
        continue
    if ch in dups:
        print('duplicate requested ChEBI subject: {}'.format(ch),
              file=sys.stderr)
        continue
    tt = tauts.get(ch)
    for rel in term.relationships:
        if rel.name == 'is tautomer of':
            tset = term.relationships.get(rel)
Example #20
0
 def test_repr_path(self):
     path = os.path.join(DATADIR, "pato.obo")
     ont = pronto.Ontology(path)
     self.assertEqual(repr(ont), "Ontology({!r})".format(path))
Example #21
0
    def AddSoftwareEngineerTerms(self):

        print(
            "********************************* Adding SoftwareEngineer Terms to the ontology **************************************"
        )

        target = os.path.join(APP_ROOT, 'ExampleOntology\\')
        if not os.path.isdir(target):
            os.mkdir(target)

        target = "".join([target, "nmrCVTest.owl"])

        ontSoftwareEngineer = pronto.Ontology(target)

        # t1 = Term('ONT:001', 'my 1st term', 'this is my first term')
        # t2 = Term('ONT:0002', 'my 2nd term', 'this is my second term', {Relationship('part_of'): ['ONT:001']})

        #Here we prepare key value dataset to enter to the ontology
        #START
        termList = {}
        termList[
            'Software engineering'] = 'Software engineering is an engineering branch associated with software system development'
        termList[
            'Computer software'] = 'Computer software is a complete package, which includes software program, its documentation and user guide on how to use the software'
        termList[
            'computer program'] = 'A computer program is piece of programming code which performs a well defined task'
        termList[
            'Software Development Life Cycle(SDLC)'] = 'Software Development Life Cycle, or software process is the systematic development of software by following every stage in the development process namely, Requirement Gathering, System Analysis, Design, Coding, Testing, Maintenance and Documentation in that order'
        termList[
            'Software project management'] = 'Software project management is process of managing all activities like time, cost and quality management involved in software development'
        termList[
            'software project manager'] = 'A software project manager is a person who undertakes the responsibility of carrying out the software project'
        termList[
            'Software scope'] = 'Software scope is a well-defined boundary, which encompasses all the activities that are done to develop and deliver the software product. The software scope clearly defines all functionalities and artifacts to be delivered as a part of the software. The scope identifies what the product will do and what it will not do, what the end product will contain and what it will not contain'
        termList[
            'Baseline'] = 'Baseline is a measurement that defines completeness of a phase. After all activities associated with a particular phase are accomplished, the phase is complete and acts as a baseline for next phase'
        termList[
            'Software Configuration management'] = 'Software Configuration management is a process of tracking and controlling the changes in software in terms of the requirements, design, functions and development of the product'
        termList[
            'measure project execution'] = 'We can measure project execution by means of Activity Monitoring, Status Reports and Milestone Checklists'
        termList['feasibility study'] = {}
        termList['feasibility study'][
            'feasibility study'] = 'It is a measure to assess how practical and beneficial the software project development will be for an organization. The software analyzer conducts a thorough study to understand economic, technical and operational feasibility of the project'
        termList['feasibility study'][
            'Economic'] = 'Resource transportation, cost for training, cost of additional utilities and tools and overall estimation of costs and benefits of the project'
        termList['feasibility study'][
            'Technical'] = 'Checking whether Is it possible to develop this system ? Assessing suitability of machine(s) and operating system(s) on which software will execute, existing developers’ knowledge and skills, training, utilities or tools for project'
        termList['feasibility study'][
            'Operational'] = 'Checking whether Can the organization adjust smoothly to the changes done as per the demand of project ? Is the problem worth solving ?'
        termList[
            'gather requirements'] = 'Requirements can be gathered from users via interviews, surveys, task analysis, brainstorming, domain analysis, prototyping, studying existing usable version of software, and by observation'
        termList[
            'SRS or Software Requirement Specification'] = 'SRS or Software Requirement Specification is a document produced at the time of requirement gathering process. It can be also seen as a process of refining requirements and documenting them'
        termList[
            'functional requirements'] = ' Functional requirements are functional features and specifications expected by users from the proposed software product'
        termList[
            'non-functional requirements'] = 'Non-functional requirements are implicit and are related to security, performance, look and feel of user interface, interoperability, cost etc'
        termList[
            'concurrency'] = 'Concurrency is the tendency of events or actions to happen simultaneously. In software, when two or more processes execute simultaneously, they are called concurrent processes'
        termList[
            'cohesion'] = 'Cohesion is a measure that defines the degree of intra-dependability among the elements of the module'
        termList[
            'Coupling '] = 'Coupling is a measure that defines the level of inter-dependability among modules of a program'
        termList[
            'Data dictionary'] = 'Data dictionary is referred to as meta-data. Meaning, it is a repository of data about data. Data dictionary is used to organize the names and their references used in system such as objects and files along with their naming conventions'
        termList[
            'Structured design'] = 'Structured design is a conceptualization of problem into several well-organized elements of solution. It is concern with the solution design and based on ‘divide and conquer’ strategy'
        termList[
            'top-down and bottom-up design model'] = 'Top-down model starts with generalized view of system and decomposes it to more specific ones, whereas bottom-up model starts with most specific and basic components first and keeps composing the components to get higher level of abstraction'
        termList[
            'functional programming'] = 'Functional programming is style of programming language, which uses the concepts of mathematical function. It provides means of computation as mathematical functions, which produces results irrespective of program state'
        termList['acid properties'] = {}
        termList['acid properties'][
            'acid properties'] = 'ACID is an acronym for atomicity, consistency, isolation, and durability'
        termList['acid properties'][
            'Atomicity '] = 'This property states that a transaction must be treated as an atomic unit, that is, either all of its operations are executed or none. There must be no state in a database where a transaction is left partially completed. States should be defined either before the execution of the transaction or after the execution/abortion/failure of the transaction'
        termList['acid properties'][
            'Consistency '] = 'The database must remain in a consistent state after any transaction. No transaction should have any adverse effect on the data residing in the database. If the database was in a consistent state before the execution of a transaction, it must remain consistent after the execution of the transaction as well'
        termList['acid properties'][
            'Durability '] = 'The database should be durable enough to hold all its latest updates even if the system fails or restarts. If a transaction updates a chunk of data in a database and commits, then the database will hold the modified data. If a transaction commits but the system fails before the data could be written on to the disk, then that data will be updated once the system springs back into action'
        termList['acid properties'][
            'Isolation '] = 'In a database system where more than one transaction are being executed simultaneously and in parallel, the property of isolation states that all the transactions will be carried out and executed as if it is the only transaction in the system. No transaction will affect the existence of any other transaction'
        termList[
            'JVM'] = 'JVM is an acronym for Java Virtual Machine, it is an abstract machine which provides the runtime environment in which java bytecode can be executed. It is a specification. JVMs are available for many hardware and software platforms (so JVM is platform dependent)'
        termList[
            'JRE'] = 'JRE stands for Java Runtime Environment. It is the implementation of JVM'
        termList[
            'JDK'] = 'JDK is an acronym for Java Development Kit. It physically exists. It contains JRE + development tools'
        termList[
            'Just-In-Time(JIT) compiler'] = 'It is used to improve the performance. JIT compiles parts of the byte code that have similar functionality at the same time, and hence reduces the amount of time needed for compilation.Here the term “compiler” refers to a translator from the instruction set of a Java virtual machine (JVM) to the instruction set of a specific CPU'
        termList[
            'platform'] = 'A platform is basically the hardware or software environment in which a program runs. There are two types of platforms software-based and hardware-based. Java provides software-based platform'
        termList[
            'classloader'] = 'The classloader is a subsystem of JVM that is used to load classes and interfaces.There are many types of classloaders e.g. Bootstrap classloader, Extension classloader, System classloader, Plugin classloader etc'
        termList[
            'difference between object oriented programming language and object based programming language'] = 'Object based programming languages follow all the features of OOPs except Inheritance. Examples of object based programming languages are JavaScript, VBScript etc'
        termList[
            'constructor'] = 'Constructor is just like a method that is used to initialize the state of an object. It is invoked at the time of object creation'
        #END

        #Here we add above prepared key value pairs to the ontology
        #START
        x = 0
        termListObject = {}
        for key in termList:
            if (isinstance(key, str)):
                x = x + 1
                ontVal = 'ONT:' + str(x).zfill(6)
                termListObject[ontVal] = Term('ONT:' + str(x).zfill(6), key,
                                              termList[key])

            else:
                for innerKey in termList[key]:
                    if (isinstance(innerKey, str)):
                        x = x + 1
                        ontValInner = 'ONT:' + str(x).zfill(6)
                        termListObject[ontValInner] = Term(
                            'ONT:' + str(x).zfill(6), innerKey,
                            termList[key][key],
                            {Relationship('part_of'): [ontVal]})
                    else:
                        for innerInnerKey in termList[key][innerKey]:
                            x = x + 1
                            ontValInnerInner = 'ONT:' + str(x).zfill(6)
                            termListObject[ontValInnerInner] = Term(
                                'ONT:' + str(x).zfill(6), innerInnerKey,
                                termList[key][innerKey][innerInnerKey],
                                {Relationship('part_of'): [ontValInner]})

        #END

        #Here we add terms to the ontology using pronto include term
        #START
        for key in termListObject:
            ontSoftwareEngineer.include(
                termListObject[key])  #ontSoftwareEngineer.include(t1, t2)
        #END

        print(ontSoftwareEngineer.obo)
        print(
            "********************************* The Json Obj **************************************"
        )
        print(ontSoftwareEngineer.json)
        print(
            "********************************* The Json Obj **************************************"
        )
        for r in Relationship.topdown():
            print(r)

        return ontSoftwareEngineer
Example #22
0
 def test_repr_path_with_import_depth(self):
     path = os.path.join(DATADIR, "pato.obo")
     ont = pronto.Ontology(path, import_depth=1)
     self.assertEqual(repr(ont),
                      "Ontology({!r}, import_depth=1)".format(path))
Example #23
0
import pronto

#nmr = pronto.Ontology('http://nmrml.org/cv/v1.1.0/nmrCV.owl')
#ms  = pronto.Ontology('https://raw.githubusercontent.com/HUPO-PSI/psi-ms-CV/master/psi-ms.obo')
#ms.merge(nmr)
#print(nmr.json)
#gaz = pronto.Ontology('http://ontologies.berkeleybop.org/gaz.owl')
#print "printing thar last thing"
#print 'NMR:1400302' in ms
#print ms.json
#thing= ms['UO:0010039']
#print thing.json
#print thing.obo #json should be a method for a term just as it is a method for the whole ontology

sd = pronto.Ontology(
    'https://raw.githubusercontent.com/SDG-InterfaceOntology/sdgio/master/sdgio.owl'
)
print('SDGIO:00000061' in sd)

#gaz = pronto.Ontology('http://ontologies.berkeleybop.org/gaz.obo')

envo = pronto.Ontology(
    'https://raw.githubusercontent.com/EnvironmentOntology/envo/master/envo.obo'
)
print('SDGIO:00000061' in envo)
envo.merge(sd)
print('SDGIO:00000061' in envo)
Example #24
0
 def test_repr_file(self):
     path = os.path.join(DATADIR, "pato.obo")
     with open(path, "rb") as src:
         ont = pronto.Ontology(src)
     self.assertEqual(repr(ont), "Ontology({!r})".format(path))
Example #25
0
import json
import pronto
from fairshake_assessments.core import metric
from fairshake_assessments.utils.jsonld_frame import jsonld_frame
from fairshake_assessments.utils.force_list import force_list
from fairshake_assessments.utils.IRI_to_NS import IRI_to_NS
from fairshake_assessments.utils.fetch_and_cache import fetch_and_cache


EDAM = pronto.Ontology(fetch_and_cache('http://edamontology.org/EDAM.owl', '.cache/EDAM.owl'))
EDAM_reversed = { node.name: node.id for node in map(EDAM.get, EDAM) if node }
EDAM_reversed_synonyms = { synonym: node.id for node in map(EDAM.get, EDAM) if node for synonym in node.synonyms }

@metric({
  '@id': 142,
  'name': 'File type',
  'description': 'A file type is present with a valid EDAM identifier',
  'principle': 'Interoperable',
})
def metric_142_edam(doc):
  filetypes = list(map(json.loads,set(
    json.dumps({
      'value': information['value'],
      'valueIRI': information['valueIRI'],
    })
    for node in jsonld_frame(doc, {
      '@type': 'Dataset',
      'types': {
        'information': {
          'value': { '@default': '' },
          'valueIRI': { '@default': '' }
Example #26
0
 def test_repr_file_handle(self):
     path = os.path.join(DATADIR, "pato.obo")
     with open(path, "rb") as src:
         handle = io.BytesIO(src.read())
     ont = pronto.Ontology(handle)
     self.assertEqual(repr(ont), "Ontology({!r})".format(handle))
addicto_files = []

for root, dirs_list, files_list in os.walk(path):
    for file_name in files_list:
        full_file_name = os.path.join(root, file_name)
        addicto_files.append(full_file_name)

entries = {}
bad_entries = []
revisionmsg = "February 2022 ADDICTO release"

# All the external content + hierarchy is in the file addicto_external.obo
# Must be parsed and sent to AOVocab.

externalonto = pronto.Ontology("addicto_external.obo")
for term in externalonto.terms():
    label_id_map[term.name] = term.id

# get the "ready" input data, indexed by ID
for filename in addicto_files:
    with open(filename, 'r') as csvfile:
        csvreader = csv.reader(csvfile)
        header = next(csvreader)

        for row in csvreader:
            rowdata = row
            id = rowdata[0]
            entries[id] = (header, rowdata)

### Update entries with new data
Example #28
0
 def setUpClass(cls):
     warnings.simplefilter('error')
     warnings.simplefilter('ignore', category=UnicodeWarning)
     with open(os.path.join(DATADIR, "ms.obo"), "rb") as f:
         cls.ms = pronto.Ontology(cls.file)
            process.append(name + ' ' +'('+x+')')
        except: 
            pass
    unique_terms = np.unique(process)
    return(unique_terms)

############################# ^ FOR GO ontology only ^ #############################





############################## Upload ontology files ###############################

# fly anatomy ontology file: http://www.obofoundry.org/ontology/fbbt.html 
fly_anatomy_ont = pronto.Ontology('/Users/maayanlab/Downloads/fbbt.obo.txt')
# fly phenotype ontology: http://www.obofoundry.org/ontology/dpo.html
fly_phenotype_ont = pronto.Ontology('/Users/maayanlab/Downloads/fbcv.obo.txt')

# worm anatomy ontology file: http://www.obofoundry.org/ontology/wbbt.html
worm_anatomy_ont = pronto.Ontology('/Users/maayanlab/Downloads/wbbt.owl')
# worm phenptype file: http://www.obofoundry.org/ontology/wbphenotype.html
worm_phenotype_ont = pronto.Ontology('/Users/maayanlab/Downloads/wbphenotype.obo.txt')

# zebrafish anatomy ontology file: http://www.obofoundry.org/ontology/zfa.html
zebrafish_anatomy_ont = pronto.Ontology('/Users/maayanlab/Downloads/zfa.obo.txt')
# zebrafish phenotype ontology file: https://zfin.org/downloads/gene_expression_phenotype.txt
zebrafish_phenotype_ont = pd.DataFrame.from_csv('/Users/maayanlab/Downloads/phenotype_fish_2018.10.19.txt',sep= '\t', header=1).reset_index()

# yeast cellular component ontology : http://www.geneontology.org/ontology/subsets/goslim_yeast.obo
yeast_anatomy_ont = pronto.Ontology('/Users/maayanlab/Downloads/goslim_yeast.obo.txt')
Example #30
0
 def test_remote_owl_imports(self):
     """Try to import a remote owl ontology with its imports
     """
     owl = pronto.Ontology("http://purl.obolibrary.org/obo/xao.owl")
     self.check_ontology(owl)