Exemple #1
0
 def process_ontology(self, obo_file):
     obo_dag = GODag(obo_file)
     obo_dag.populate_terms()
     diseases = self.diseases
     ontology_map = {}
     self.category_map = defaultdict(list)
     for item_id, item in obo_dag.items():
         # Considers those diseases whose names are subsets of the disease ontology names
         correlated_diseases = [
             disease for disease in diseases if disease in item.name
         ]
         if len(correlated_diseases) > 0:
             d = {}
             for parent in (obo_dag.paths_to_top(item.id)[0]):
                 d[parent.level] = parent.name
                 for corr_disease in correlated_diseases:
                     self.category_map[parent.name] += [corr_disease]
             for corr_disease in correlated_diseases:
                 # Chooses the most specific disease mapping
                 if corr_disease in ontology_map:
                     if len(ontology_map[corr_disease]) > len(d):
                         continue
                 ontology_map[corr_disease] = d
     self.dag = obo_dag
     self.disease_class_map = ontology_map
class Data(object):
    """Holds data used in test."""

    def __init__(self, fin_obo):
        self.repo = os.path.join(os.path.dirname(os.path.abspath(__file__)), "../")
        self.fin_obo = os.path.join(self.repo, fin_obo)
        self.dag = GODag(self.fin_obo)
        self.go2obj = {go:o for go, o in self.dag.items() if not o.is_obsolete}
        self.goids_all = self.go2obj.keys()

    def get_goids(self, num):
        """Return N randomly chosen GO IDs."""
        shuffle(self.goids_all)
        return set(self.goids_all[:num])
Exemple #3
0
def get_pathway_mapping(organism=9606, ontology='basic', exclude=None, force=False):

	obo = 'goslim_generic.obo' if 'slim' in ontology else 'go-basic.obo'

	namespace_filter = get_namespace_filter(exclude)

	if force & (os.path.isfile(obo)):
		os.remove(obo)

	obo_fname = goatools.base.download_go_basic_obo(obo)

	obodag = GODag(obo_fname)

	return {term_id:term.name for term_id,term in obodag.items() if namespace_filter(term.namespace)}
Exemple #4
0
class Data(object):
    """Holds data used in test."""
    def __init__(self, fin_obo):
        self.repo = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                                 "../")
        self.fin_obo = os.path.join(self.repo, fin_obo)
        self.dag = GODag(self.fin_obo)
        self.go2obj = {
            go: o
            for go, o in self.dag.items() if not o.is_obsolete
        }
        self.goids_all = self.go2obj.keys()

    def get_goids(self, num):
        """Return N randomly chosen GO IDs."""
        shuffle(self.goids_all)
        return set(self.goids_all[:num])
Exemple #5
0
def main():
    data = collections.defaultdict(set) 
    g = GODag()
    selection = set()
    for name, rec in g.items():
        if rec.namespace!="biological_process" or rec.level < 1: continue
        selection.add(rec.id)
    
    fp = file("gene_association.tair")
    for row in fp:
        if row[0]=="!": continue
        atoms = row.split("\t")
        #['TAIR', 'locus:2185485', 'AT5G14850', '', 'GO:0000030', 'TAIR:Communication:501714663', 'ISS', 'NCBI_gi:1552169|NCBI_gi:7634741', 'F', 'AT5G14850', 'AT5G14850|T9L3.150|T9L3_150', 'protein', 'taxon:3702', '20021003', 'TIGR', '', 'TAIR:locus:2185485\n']
        domain, name, go = atoms[0], atoms[10], atoms[4]
        name = name.split("|", 1)[0]
        if go in selection and domain=="TAIR":
            data[name].add(go)

    fw = file("microarray.assoc", "w")
    print >>fw, "#gene,go_terms"
    for key, val in sorted(data.items()):
        print >>fw, "%s,%s" % (key, ";".join(sorted(val)))
Exemple #6
0
#                 syn = TermSynonym(term=dbTerm, synonym=synonym[0][:255])
#                 syn.save()
#             except:
#                 pass
#
#         for synonym in term.alt_ids:
#             try:
#                 syn = TermSynonym(term=dbTerm, synonym=synonym[0][:255])
#                 syn.save()
#             except:
#                 pass

names = {}
cache = {x.identifier:x for x in Term.objects.filter(ontology=ontology)}
go_dag = GODag("/data/databases/so/so.obo", optional_attrs=['relationship'], load_obsolete=False)
for go, term in tqdm(go_dag.items()):

    if term.name in names:
        continue


    names[term.name] = 1


    for child in term.children:
        if go in cache and child.id in cache:
            r = TermRelationship(
                subject_term=cache[go],  # parent
                predicate_term=is_a,
                object_term=cache[child.id],  # child
                ontology=ontology
Exemple #7
0
    ' wget http://www.geneontology.org/ontology/subsets/goslim_generic.obo')

obo_fname = download_go_basic_obo()

from goatools.base import download_ncbi_associations

gene2go = download_ncbi_associations()

if goset == 'goslim':
    obodag = GODag("goslim_generic.obo")
else:
    obodag = GODag("go-basic.obo")

geneid2gos = read_ncbi_gene2go("gene2go", taxids=[9606])

levels = [r.level for go, r in obodag.items()]
[(i, levels.count(i)) for i in range(1, 12)]

bad_go = []
for go, r in obodag.iteritems():
    if r.level > cutlvl:
        bad_go.append(go)
bad_go = set(bad_go)

len(bad_go)

for go, r in obodag.items():
    nps = set()
    for p in r._parents:
        if p in bad_go:
            nps |= set([p2 for p2 in obodag[p]._parents if not p2 in bad_go])