def process_ontology(self, obo_file): obo_dag = GODag(obo_file) obo_dag.populate_terms() diseases = self.diseases ontology_map = {} self.category_map = defaultdict(list) for item_id, item in obo_dag.items(): # Considers those diseases whose names are subsets of the disease ontology names correlated_diseases = [ disease for disease in diseases if disease in item.name ] if len(correlated_diseases) > 0: d = {} for parent in (obo_dag.paths_to_top(item.id)[0]): d[parent.level] = parent.name for corr_disease in correlated_diseases: self.category_map[parent.name] += [corr_disease] for corr_disease in correlated_diseases: # Chooses the most specific disease mapping if corr_disease in ontology_map: if len(ontology_map[corr_disease]) > len(d): continue ontology_map[corr_disease] = d self.dag = obo_dag self.disease_class_map = ontology_map
class Data(object): """Holds data used in test.""" def __init__(self, fin_obo): self.repo = os.path.join(os.path.dirname(os.path.abspath(__file__)), "../") self.fin_obo = os.path.join(self.repo, fin_obo) self.dag = GODag(self.fin_obo) self.go2obj = {go:o for go, o in self.dag.items() if not o.is_obsolete} self.goids_all = self.go2obj.keys() def get_goids(self, num): """Return N randomly chosen GO IDs.""" shuffle(self.goids_all) return set(self.goids_all[:num])
def get_pathway_mapping(organism=9606, ontology='basic', exclude=None, force=False): obo = 'goslim_generic.obo' if 'slim' in ontology else 'go-basic.obo' namespace_filter = get_namespace_filter(exclude) if force & (os.path.isfile(obo)): os.remove(obo) obo_fname = goatools.base.download_go_basic_obo(obo) obodag = GODag(obo_fname) return {term_id:term.name for term_id,term in obodag.items() if namespace_filter(term.namespace)}
class Data(object): """Holds data used in test.""" def __init__(self, fin_obo): self.repo = os.path.join(os.path.dirname(os.path.abspath(__file__)), "../") self.fin_obo = os.path.join(self.repo, fin_obo) self.dag = GODag(self.fin_obo) self.go2obj = { go: o for go, o in self.dag.items() if not o.is_obsolete } self.goids_all = self.go2obj.keys() def get_goids(self, num): """Return N randomly chosen GO IDs.""" shuffle(self.goids_all) return set(self.goids_all[:num])
def main(): data = collections.defaultdict(set) g = GODag() selection = set() for name, rec in g.items(): if rec.namespace!="biological_process" or rec.level < 1: continue selection.add(rec.id) fp = file("gene_association.tair") for row in fp: if row[0]=="!": continue atoms = row.split("\t") #['TAIR', 'locus:2185485', 'AT5G14850', '', 'GO:0000030', 'TAIR:Communication:501714663', 'ISS', 'NCBI_gi:1552169|NCBI_gi:7634741', 'F', 'AT5G14850', 'AT5G14850|T9L3.150|T9L3_150', 'protein', 'taxon:3702', '20021003', 'TIGR', '', 'TAIR:locus:2185485\n'] domain, name, go = atoms[0], atoms[10], atoms[4] name = name.split("|", 1)[0] if go in selection and domain=="TAIR": data[name].add(go) fw = file("microarray.assoc", "w") print >>fw, "#gene,go_terms" for key, val in sorted(data.items()): print >>fw, "%s,%s" % (key, ";".join(sorted(val)))
# syn = TermSynonym(term=dbTerm, synonym=synonym[0][:255]) # syn.save() # except: # pass # # for synonym in term.alt_ids: # try: # syn = TermSynonym(term=dbTerm, synonym=synonym[0][:255]) # syn.save() # except: # pass names = {} cache = {x.identifier:x for x in Term.objects.filter(ontology=ontology)} go_dag = GODag("/data/databases/so/so.obo", optional_attrs=['relationship'], load_obsolete=False) for go, term in tqdm(go_dag.items()): if term.name in names: continue names[term.name] = 1 for child in term.children: if go in cache and child.id in cache: r = TermRelationship( subject_term=cache[go], # parent predicate_term=is_a, object_term=cache[child.id], # child ontology=ontology
' wget http://www.geneontology.org/ontology/subsets/goslim_generic.obo') obo_fname = download_go_basic_obo() from goatools.base import download_ncbi_associations gene2go = download_ncbi_associations() if goset == 'goslim': obodag = GODag("goslim_generic.obo") else: obodag = GODag("go-basic.obo") geneid2gos = read_ncbi_gene2go("gene2go", taxids=[9606]) levels = [r.level for go, r in obodag.items()] [(i, levels.count(i)) for i in range(1, 12)] bad_go = [] for go, r in obodag.iteritems(): if r.level > cutlvl: bad_go.append(go) bad_go = set(bad_go) len(bad_go) for go, r in obodag.items(): nps = set() for p in r._parents: if p in bad_go: nps |= set([p2 for p2 in obodag[p]._parents if not p2 in bad_go])