def parse_pato(ontology_file):
    terms = []
    for elt in parseGOOBO(ontology_file):
        terms.append(elt["name"])
        if 'synonym' in elt:
            if isinstance(elt['synonym'], list):
                for syn in elt['synonym']:
                    try:
                        terms.append(syn.split('"')[1])
                    except:
                        print 'error parsing ontology synonym'
            else:
                try:
                    terms.append(elt['synonym'].split('"')[1])
                except:
                    print 'error parsing ontology synonym non list'

    blacklist = read_blacklist()
    blacklist.extend([stem(b) for b in blacklist])
    #blacklist = [stem_word(b) for b in blacklist]
    return [
        pheno.lower() for pheno in terms
        if pheno.lower() not in blacklist and len(pheno) > 1
    ]

    return terms
def parse_pato(ontology_file):
    terms = []
    for elt in parseGOOBO(ontology_file):
        #terms.append(elt["name"])
        #if len(elt["name"].split()) > 1:
        p = re.sub(r'\([^)]*\)', ' ', elt["name"])
        terms.extend(p.split())
        if 'synonym' in elt:
            if isinstance(elt['synonym'], list):
                for syn in elt['synonym']:
                    try:
                        #terms.append(syn.split('"')[1])
                        #if len(syn.split('"')[1].split()) > 1:
                        p = re.sub(r'\([^)]*\)', ' ', syn.split('"')[1])
                        terms.extend(p.split())
                    except:
                        print 'error parsing ontology synonym'
            else:
                try:
                    #terms.append(elt['synonym'].split('"')[1])
                    #if len(elt['synonym'].split('"')[1].split()) > 1:
                    p = re.sub(r'\([^)]*\)', ' ', elt['synonym'].split('"')[1])
                    terms.extend(p.split())
                except:
                    print 'error parsing ontology synonym non list'
    terms = list(set(terms))
    blacklist = read_blacklist()
    return [
        pheno.lower() for pheno in terms if pheno.lower() not in blacklist
        and len(pheno) > 1 and re.search(r'[a-zA-Z]', pheno)
    ]
def parse_ontology(ontology_file):
    terms = []
    for elt in parseGOOBO(ontology_file):
        #terms.append(elt["name"])
        #if len(elt["name"].split()) > 1:
        p = re.sub(r'\([^)]*\)', ' ', elt["name"])
        terms.extend(p.split())
        if 'synonym' in elt:
            if isinstance(elt['synonym'], list):
                for syn in elt['synonym']:
                    try:
                        #terms.append(syn.split('"')[1])
                        #if len(syn.split('"')[1].split()) > 1:
                        p = re.sub(r'\([^)]*\)', ' ', syn.split('"')[1])
                        terms.extend(p.split())
                    except:
                        print 'error parsing ontology synonym'
            else:
                try:
                    #terms.append(elt['synonym'].split('"')[1])
                    #if len(elt['synonym'].split('"')[1].split()) > 1:
                    p = re.sub(r'\([^)]*\)', ' ', elt['synonym'].split('"')[1])
                    terms.extend(p.split())
                except:
                    print 'error parsing ontology synonym non list'
    terms = list(set(terms))
    return terms
Beispiel #4
0
def parse_pato(ontology_file):
    terms = []
    for elt in parseGOOBO(ontology_file):
        terms.append(elt["name"])
        if 'synonym' in elt:
            if isinstance(elt['synonym'], list):
                for syn in elt['synonym']:
                    try:
                        terms.append(syn.split('"')[1])
                    except:
                        print 'error parsing ontology synonym'
            else:
                try:
                    terms.append(elt['synonym'].split('"')[1])
                except:
                    print 'error parsing ontology synonym non list'
    return terms
Beispiel #5
0
"""
Output fields:
id, name, synonyms, related terms, alt IDs, parent, MeSh terms
"""
import argparse

from obo_parser import parseGOOBO

if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument('infile', help='Input HPO file in OBO v1.2 format.')
    parser.add_argument('outfile', help='Output TSV file name.')
    args = parser.parse_args()

    with open(args.outfile, 'w') as out:
        for term in parseGOOBO(args.infile):
            id = term['id'][0]
            name = term['name'][0]
            alt_ids = '|'.join(term['alt_id']) if 'alt_id' in term else ''
            is_a = '|'.join(x.partition(' ')[0]
                            for x in term['is_a']) if 'is_a' in term else ''
            synonyms = set()
            related = set()
            mesh = set()
            for s in term.get('synonym', []):
                if ' EXACT [' in s:
                    synonyms.add(s.split(' EXACT [')[0].strip('" '))
                else:
                    # RELATED, BROAD, etc.
                    related.add(s.split('" ')[0].strip('"'))
            for n in term.get('xref', []):
Beispiel #6
0
import argparse

from obo_parser import parseGOOBO


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument('infile', help='Input HPO file in OBO v1.2 format.')
    parser.add_argument('outfile', help='Output TSV file name.')
    args = parser.parse_args()

    with open(args.outfile, 'w') as out:
        for term in parseGOOBO(args.infile):
            id = term['id'][0]
            name = term['name'][0]
            alt_ids = '|'.join(term['alt_id']) if 'alt_id' in term else ''
            is_a = '|'.join(x.partition(' ')[0] for x in term['is_a']) if 'is_a' in term else ''
            synonyms = set()
            related = set()
            for s in term.get('synonym', []):
                if ' EXACT [' in s:
                    synonyms.add(s.split(' EXACT [')[0].strip('" '))
                else:
                    # RELATED, BROAD, etc.
                    related.add(s.split('" ')[0].strip('"'))
            for n in term.get('xref', []):
                if ' ' in n:
                    synonyms.add(n.partition(' ')[-1].strip('" '))
            synonyms.discard(name)
            related.discard(name)
            synonyms = '|'.join(sorted(synonyms)) if synonyms else ''