def test01GetDicts(self): """ ensure we can create, save and retrieve gene2go and go2gene dictionaries """ termsPath = 'terms.pickle' if os.path.exists(termsPath) == True: os.remove(termsPath) go = GeneOntology([self.taxId],upass=UPASS,idType='ncbi',useIea=True,\ aspect='biological_process') go.create_dicts(termsPath) gene2go, go2gene = go.load_dicts(termsPath) print("there are %s genes"%(len(gene2go.keys()))) print("there are %s terms"%(len(go2gene.keys())))
from htsint.blast import BlastMapper # specify main variables (biological_process, molecular_function, cellular_component) homeDir = os.path.join(".","demo") if not os.path.isdir(homeDir): os.mkdir(homeDir) _aspect = 'bp' aspect = 'biological_process' # Create a term graph go = GeneOntology(["8364","8355"],useIea=False,aspect=aspect) termsPath = os.path.join(homeDir,"go-terms.pickle") graphPath = os.path.join(homeDir,"go-graph.pickle") if not os.path.exists(termsPath): go.create_dicts(termsPath) gene2go,go2gene = go.load_dicts(termsPath) G = go.create_gograph(termsPath=termsPath,graphPath=graphPath) print("%s genes have at least one annotation"%(len(gene2go.keys()))) print("Term graph for with %s nodes successfully created."%(len(G.nodes()))) # Calculate term distances (most time consuming step) termDistancePath = os.path.join(homeDir,"term-distances.npy") if not os.path.exists(termDistancePath): td = TermDistances(termsPath,graphPath) print("total distances to evaluate: %s"%td.totalDistances) timeStart = time.time() td.run_with_multiprocessing(termDistancePath,cpus=30) print time.strftime('%H:%M:%S', time.gmtime(time.time()-timeStart)) # Calculate gene distances
if not os.path.exists(gsaDir): os.mkdir(gsaDir) ## make imports and specify variables from htsint import GeneOntology, TermDistances useIea = True aspect = "biological_process" _aspect = 'bp' taxaList = ['9606'] go = GeneOntology(taxaList, useIea=useIea, aspect=aspect) termsPath = os.path.join(gsaDir, "go-terms-%s.pickle" % (_aspect)) graphPath = os.path.join(gsaDir, "go-graph-%s.pickle" % (_aspect)) geneIds = geneList.keys() if not os.path.exists(termsPath): go.create_dicts(termsPath, accepted=geneIds) gene2go, go2gene = go.load_dicts(termsPath) print("pathway genes with terms: %s/%s" % (len(gene2go.keys()), len(geneIds))) if not os.path.exists(graphPath): G = go.create_gograph(termsPath=termsPath, graphPath=graphPath) print("Term graph for with %s nodes successfully created." % (len(G.nodes()))) # Calculate term distances termDistancePath = os.path.join(gsaDir, "term-distances-%s.npy" % (_aspect)) if not os.path.exists(termDistancePath): td = TermDistances(termsPath, graphPath) print("total distances to evaluate: %s" % td.totalDistances) td.run_with_multiprocessing(termDistancePath, cpus=7)
if not os.path.exists(gsaDir): os.mkdir(gsaDir) ## make imports and specify variables from htsint import GeneOntology,TermDistances useIea = True aspect = "biological_process" _aspect = 'bp' taxaList = ['9606'] go = GeneOntology(taxaList,useIea=useIea,aspect=aspect) termsPath = os.path.join(gsaDir,"go-terms-%s.pickle"%(_aspect)) graphPath = os.path.join(gsaDir,"go-graph-%s.pickle"%(_aspect)) geneIds = geneList.keys() if not os.path.exists(termsPath): go.create_dicts(termsPath,accepted=geneIds) gene2go,go2gene = go.load_dicts(termsPath) print("pathway genes with terms: %s/%s"%(len(gene2go.keys()),len(geneIds))) if not os.path.exists(graphPath): G = go.create_gograph(termsPath=termsPath,graphPath=graphPath) print("Term graph for with %s nodes successfully created."%(len(G.nodes()))) # Calculate term distances termDistancePath = os.path.join(gsaDir,"term-distances-%s.npy"%(_aspect)) if not os.path.exists(termDistancePath): td = TermDistances(termsPath,graphPath) print("total distances to evaluate: %s"%td.totalDistances) td.run_with_multiprocessing(termDistancePath,cpus=7) # Calculate gene distances
from htsint.blast import BlastMapper # specify main variables (biological_process, molecular_function, cellular_component) homeDir = os.path.join(".", "demo") if not os.path.isdir(homeDir): os.mkdir(homeDir) _aspect = 'bp' aspect = 'biological_process' # Create a term graph go = GeneOntology(["8364", "8355"], useIea=False, aspect=aspect) termsPath = os.path.join(homeDir, "go-terms.pickle") graphPath = os.path.join(homeDir, "go-graph.pickle") if not os.path.exists(termsPath): go.create_dicts(termsPath) gene2go, go2gene = go.load_dicts(termsPath) G = go.create_gograph(termsPath=termsPath, graphPath=graphPath) print("%s genes have at least one annotation" % (len(gene2go.keys()))) print("Term graph for with %s nodes successfully created." % (len(G.nodes()))) # Calculate term distances (most time consuming step) termDistancePath = os.path.join(homeDir, "term-distances.npy") if not os.path.exists(termDistancePath): td = TermDistances(termsPath, graphPath) print("total distances to evaluate: %s" % td.totalDistances) timeStart = time.time() td.run_with_multiprocessing(termDistancePath, cpus=30) print time.strftime('%H:%M:%S', time.gmtime(time.time() - timeStart)) # Calculate gene distances