def get_tree(): try: data = request.get_json() pid = data["pid"] uid = data["uid"] sid = data["sid"] selval = data["value"] pipeline = data["pip"] dbname = data["rid"] # dbname="isuezrouja" analysis = "taxonomy" rf = rootvar.result_files(pid, analysis, pipeline, sid, dbname) x = sql.SQL(main_db) samples = x.exe('select * from samples where project_id="' + pid+'" and sample_id="'+sid+'"') xpath = x.project(pid)[0][4] sample = rootvar.samples(samples[0], xpath) with open(rf.json) as data_file: data = json.load(data_file) x.close() ###print data view = rootvar.ViewSampleResults( sample, pipeline, dbname, analysis, rf) return jsonify(tree=[data], range=view.range(selval)[0], pip=pipeline) except Exception as inst: return "ERROR: "+str(inst)
def get_taxo_by_name(): data = request.get_json() pid = data["pid"] uid = data["uid"] sid = data["sid"] tid = data["tid"] lid = data["lid"] pip = data["pip"] dbname = data["rid"] pipeline = pip # dbname="isuezrouja" analysis = "taxonomy" x = sql.SQL(main_db) rf = rootvar.result_files(pid, analysis, pip, sid, dbname) samples = x.exe('select * from samples where project_id="' + pid+'" and sample_id="'+sid+'"') xpath = x.project(pid)[0][4] sample = rootvar.samples(samples[0], xpath) x.close() view = rootvar.ViewSampleResults(sample, pipeline, dbname, analysis, rf) try: # return jsonify(x=pip) return jsonify(matrix=view.level(lid, type="matches")) except Exception as inst: return "ERROR: "+str(inst)
def functional(data): pid = data["pid"] uid = data["uid"] sids = data["sid"] pipeline = data["pip"] rid = data["rid"] edges = [] FULL_MATRIX = [] #first see if the data set contains taxonomy, function or both annotations analysis = "function" #print rid # # os.system("mkdir " + rootvar.__ROOTPRO__ + "/" + pid + "/" + pipeline + "/RESULTS/ >> " + rootvar.log + " 2>&1") all_samples_tree_file = rootvar.__ROOTPRO__ + "/" + pid + "/" + pipeline + "/RESULTS/" + rid + ".all_samples_tree.pk" log = open( rootvar.__ROOTPRO__ + "/" + pid + "/" + pipeline + "/RESULTS/" + rid + ".log", "w") # # if os.path.isfile(all_samples_tree_file): # x=sql.SQL(all_samples_tree_file+".db") # val=x.exe("select distinct sample_name from full_matrix") # stored_samples=len(val) # # x=sql.SQL(main_db) # # load all the samples that have been finalized to run # sids=x.exe('select c.sample_id from (select * from samples a inner join (select * from sample_status where pip="'+str(pipeline)+'") b on a.sample_id==b.sid) c where c.project_id=="'+pid+'" and c.status="Done" and c.rid="'+rid+'"') # x.close() # if os.path.isfile(all_samples_tree_file) and stored_samples == len(sids): # x=sql.SQL(all_samples_tree_file+".db") # FULL_MATRIX=x.exe("select * from full_matrix") # return ["function",FULL_MATRIX,all_samples_tree_file+".json"] # nodes={} x = sql.SQL(main_db) log.write("echo #LOG for Project ID: " + str(pid) + "\n") for sid in sids: samples = x.exe('select * from samples where project_id="' + pid + '" and sample_id="' + sid + '"') # log.write(str(sid)+"\n") xpath = x.project(pid)[0][4] sample = rootvar.samples(samples[0], xpath) #print sample.name # log.write(str(samples[0])+"\n") rf = rootvar.result_files(pid, analysis, pipeline, sample.id, rid) view = rootvar.ViewSampleResults(sample, pipeline, rid, analysis, rf) FULL_MATRIX += view.all_func() #print sample.name x.close() # log.write(str(FULL_MATRIX)) #print FULL_MATRIX[0] #rootvar.full_matrix_function(all_samples_tree_file+".db", FULL_MATRIX) #print "matrix done" # tree=["none"] return ["function", FULL_MATRIX, all_samples_tree_file + ".json"]
def get_childs_of_taxonomy(): data = request.get_json() pid = data["pid"] uid = data["uid"] sid = data["sid"] tid = data["tid"] lid = data["lid"] cond = data["cond"] pip = data["pip"] rid = data["rid"] dbname = data["rid"] analysis = "taxonomy" ##print cond, pip, rid, pid, uid, sid, tid, lid if cond == "one": # this is if I am analyzing one or multiple samples rf = rootvar.result_files(pid, analysis, pip, sid, dbname) x = sql.SQL(main_db) samples = x.exe('select * from samples where project_id="' + pid+'" and sample_id="'+sid+'"') xpath = x.project(pid)[0][4] sample = rootvar.samples(samples[0], xpath) x.close() view = rootvar.ViewSampleResults(sample, pip, dbname, analysis, rf) matrix = view.childs_of(tid, 'rpkm') ###print matrix return jsonify(matrix=matrix) else: # load big tree all_samples_tree_file = rootvar.__ROOTPRO__+"/" + \ pid+"/"+pip+"/RESULTS/"+rid+".all_samples_tree.pk" G = nx.read_gpickle(all_samples_tree_file) tree = json_graph.tree_data(G, root='R') data = [] for i in G.successors(tid): data.append(i) x = sql.SQL(all_samples_tree_file+".db") matrix = rootvar.get_matrix_level_childs(x, data) samples_sel = list(set([str(i[0]) for i in matrix])) M, N = rootvar.v2m(matrix, samples_sel, 0, 0) ##print M heatmap = 'none' # heatmap=iclust.main(M,None) return jsonify(data=matrix[0], heatmap=heatmap)
def get_statistics(): data = request.get_json() pid = data["pid"] uid = data["uid"] sid = data["sid"] programs = data["value"] dbname = "isuezrouja" x = sql.SQL(main_db) samples = x.exe('select * from samples where project_id="' + pid+'" and sample_id="'+sid+'"') xpath = x.project(pid)[0][4] pname = str(x.project(pid)[0][1]) ##print pname sample = rootvar.samples(samples[0], xpath) x.close() ###print matrix data = [] pipeline = "matches" for program in programs: data.append(statsP(sample, program, sid, dbname, pipeline)) return jsonify(stats=data, sname=sample.name, pname=pname)
def get_functional_counts(): try: data = request.get_json() pid = data["pid"] uid = data["uid"] sid = data["sid"] pip = data["pip"] rid = data["rid"] pip = data['pip'] analysis = "function" # lid="assembly" # file=rootvar.__ROOTPRO__+"/"+pid+"/assembly/idba_ud/"+sid+"/pred.genes."+rid+".matches.function.abundance.results.sqlite3.db" x = sql.SQL(main_db) rf = rootvar.result_files(pid, "function", pip, sid, rid) samples = x.exe('select * from samples where project_id="' + pid+'" and sample_id="'+sid+'"') xpath = x.project(pid)[0][4] sample = rootvar.samples(samples[0], xpath) x.close() view = rootvar.ViewSampleResults(sample, pip, rid, analysis, rf) return jsonify(matrix=view.func_one_sample(), m2=view.func_structure(pid, pip, sid)) except Exception as inst: return "ERROR: "+str(inst)
def idbaud(projectid,sampleid,db,protocol,reads1, reads2, good_reads): #db=root.dataset(db) #1 get project path x=sql.SQL(root.filedb()) xpath=x.project(projectid)[0][4] #print 'here------------' ########################################################################### #2 update the reads on the sql dataset, doit anyway, so if the sample is re run just take the new input, it could be modified. ########################################################################### val=x.exe('update samples set reads1="'+reads1+'" where project_id="'+projectid+'" and sample_id="'+sampleid+'"') val=x.exe('update samples set reads2="'+reads2+'" where project_id="'+projectid+'" and sample_id="'+sampleid+'"') ########################################################################### #3 get the sample full information - load the class samples ########################################################################### samples=x.exe('select * from samples where project_id="'+projectid+'" and sample_id="'+sampleid+'"') sample=samples[0] sample=root.samples(sample,xpath) root.mkdir(sample.assemblyDir) ########################################################################### # 4.1 Run fq2fa - this is used by udba_ud program ########################################################################### idba_ud=root.program('idba_ud',sample,db) update_status(x,sampleid,db.id,protocol,"Preprocessing") fq2fa=root.program('fq2fa', sample,db) if not root.isdir(idba_ud.out): fq2fa.run() #make sure that there is a scaffold.fa file. If not, it computes again the fastq to fasta and the assembly ########################################################################### # 4.2 Run idba_ud - assembly the samples ########################################################################### update_status(x,sampleid,db.id,protocol,"Assembling") idba_ud=root.program('idba_ud',sample,db) if not root.isdir(idba_ud.out): idba_ud.run(); os.system(' cd ' + idba_ud.path + ' && rm kmer contig-* align-* graph-* local-contig-* reads.fa') ########################################################################### # 4.2 Run gene finder - look at the genes over the scaffolds ########################################################################### prodigal=root.program("prodigal", sample,db) update_status(x,sampleid,db.id,protocol,"Finding Genes") if not root.isdir(prodigal.output+".gff"): prodigal.run() if db.name=="abcdefghij": print "MetaPlAn2" update_status(x,sampleid,db.id,protocol,"Processing") metaphlan=root.program('MetaPhlAn',sample,db) if not root.isdir(metaphlan.out): metaphlan.run() #print "Here 2" G=txp.metaphlan_taxonomy_tree(metaphlan.out) abn=root.SampleResults(sample,G,protocol, db.name, "taxonomy", metaphlan.out) abn.start() update_status(x,sampleid,db.id,protocol,"Done") if db.name=='MyTaxa': #print "MyTaxa" taxa=root.mytaxa(sample,db) update_status(x,sampleid,db.id,protocol,"Screening") if not root.isdir(taxa.output+".prot.mytaxa.fa"): taxa.pre() if not root.isdir(taxa.output+".MyTaxa.matches.daa"): taxa.align() if not root.isdir(taxa.output+".MyTaxa.align"): taxa.postd() if not root.isdir(taxa.output+".MyTaxa.input"): taxa.mpre() if not root.isdir(taxa.output+".MyTaxa.out"): taxa.run() update_status(x,sampleid,db.id,protocol,"Quantification") data=taxa.postM() G=txp.mytaxa_taxonomy_tree(data,taxa.output+".MyTaxa.matches.taxonomy.abundance") abn=root.SampleResults(sample,G,protocol, "MyTaxa", "taxonomy", taxa.output+".MyTaxa.matches") abn.start() update_status(x,sampleid,db.id,protocol,"Done") if not db.taxo=="none": print "taxonomy" ########################################################################### # 4.3 Run bowtie to find matches ########################################################################### update_status(x,sampleid,db.id,protocol,"Screening") if db.name=="ryaetguxun": blastn=root.program('diamond_blastp',sample,db) else: blastn=root.program('blastn',sample,db) blastn.run() #blastn.run() ########################################################################### # 4.4 taxonomy abundance ########################################################################### update_status(x,sampleid,db.id,protocol,"Quantification") abundance=pb(blastn.out, db.taxo, db.len, db.taxodb, "taxonomy", db.name, "none",good_reads) ########################################################################### # 4.5 processing Visualization ########################################################################### G=txp.taxonomy_tree(abundance,blastn.out, protocol, "taxonomy", db.name ) abn=root.SampleResults(sample,G,protocol, db.name, "taxonomy", blastn.out) abn.start() root.updateStatus(x,projectid,sampleid,"done") update_status(x,sampleid,db.id,protocol,"Done") if not db.func=="none": print "functional annotation" update_status(x,sampleid,db.id,protocol,"Screening") fileso=root.result_files(projectid, "function", protocol, sampleid, db.name) ########################################################################### # 4.3 Run bowtie to find matches ########################################################################### root.updateStatus(x,projectid,sampleid,"functional annotation") blastn=root.program('diamond_blastp',sample,db) blastn.run() ########################################################################### # 4.4 taxonomy abundance ########################################################################### update_status(x,sampleid,db.id,protocol,"Quantification") abundance=pb(blastn.out, db.func, db.len, db.funcdb, "function", db.name, fileso.GGenes+".rpkm", good_reads) ########################################################################### # 4.5 processing Visualization ########################################################################### abn=root.SampleResults(sample,'none',protocol, db.name, "function", blastn.out) abn.createFuncDb(abundance) update_status(x,sampleid,db.id,protocol,"Done")
def process(projectid, sampleid, db, protocol, reads1, reads2, good_reads): #db=root.dataset(db) x = sql.SQL(root.filedb()) xpath = x.project(projectid)[0][4] val = x.exe('update samples set reads1="' + reads1 + '" where project_id="' + projectid + '" and sample_id="' + sampleid + '"') val = x.exe('update samples set reads2="' + reads2 + '" where project_id="' + projectid + '" and sample_id="' + sampleid + '"') samples = x.exe('select * from samples where project_id="' + projectid + '" and sample_id="' + sampleid + '"') sample = samples[0] sample = root.samples(sample, xpath) root.mkdir(sample.matchesDir) rdir = root.__ROOTPRO__ + "/" + projectid + "/READS/" if db.name == "abcdefghij": #print "MetaPhlAnn" update_status(x, sampleid, db.id, protocol, "Processing") metaphlan = root.program('MetaPhlAnR', sample, db) if not root.isdir(metaphlan.out): metaphlan.run() G = txp.metaphlan_taxonomy_tree(metaphlan.out) abn = root.SampleResults(sample, G, protocol, db.name, "taxonomy", metaphlan.out) abn.start() update_status(x, sampleid, db.id, protocol, "Done") if not db.taxo == "none": # run bowtie using the paired end reads update_status(x, sampleid, db.id, 'matches', "Screening") cmd = " ".join([ root.__ROOTEXEDIR__ + 'bowtie2', '--very-fast-local -p ' + p + ' --no-unal --no-hd --no-sq -x', db.bowtie, '-1', sample.reads1, '-2', sample.reads2, '-S', sample.matchesDir + '/alignment.' + db.id + '.matches >>', root.log, '2>&1' ]) if not root.isdir(sample.matchesDir + '/alignment.' + db.id + '.matches'): os.system(cmd) #process output in sam format to get genes and number of reads per gene. update_status(x, sampleid, db.id, protocol, "Quantification") if not root.isdir(sample.matchesDir + '/alignment.' + db.id + '.matches.taxonomy.abundance.results.sqlite3.db'): abundance = parse_sam( sample.matchesDir + '/alignment.' + db.id + '.matches', db, good_reads) G = txp.taxonomy_tree( abundance, sample.matchesDir + '/alignment.' + db.id + '.matches', protocol, "taxonomy", db.id) abn = root.SampleResults(sample, G, protocol, db.id, "taxonomy", sample.matchesDir + '/alignment.' + db.id + '.matches') # Store data in the sql TABLE abn.start() update_status(x, sampleid, db.id, protocol, "Done") return 'success' if not db.func == "none": fileso = root.result_files(projectid, "function", protocol, sampleid, db.name) #Merge paired ends update_status(x, sampleid, db.id, protocol, "Merge") cmd = " ".join([ 'python', root.__ROOTEXEDIR__ + "pairend_join.py -s -p " + p + " -m 8 -o ", sample.matchesDir + '/merged.reads.fastq', sample.reads1, sample.reads2 ]) #print cmd root.flog(cmd) #print cmd if not root.isdir(sample.matchesDir + '/merged.reads.fastq'): os.system(cmd) #Get fasta files cmd = ' '.join([ root.__ROOTEXEDIR__ + '/seqtk seq -a', sample.matchesDir + '/merged.reads.fastq >', sample.matchesDir + '/merged.reads.fasta' ]) if not root.isdir(sample.matchesDir + '/merged.reads.fasta'): os.system(cmd) #BlastX from diamond update_status(x, sampleid, db.id, protocol, "Screening") dout = sample.matchesDir + 'alignment.' + db.id din = sample.matchesDir + '/merged.reads.fasta' cmd = ' '.join([ root.__ROOTEXEDIR__ + '/diamond blastx --id 60 -p ' + p + ' -k 1 -e 1e-5 -d', db.diamond, '-a', dout + '.pre', '-q', din, '>>', root.log, "2>&1" ]) if not root.isdir(dout + '.daa'): os.system(cmd) cmd = ' '.join([ root.__ROOTEXEDIR__ + '/diamond view -a', dout + '.pre.daa', '-o', dout + '.matches -f tab', ">>", root.log, "2>&1" ]) if not root.isdir(dout + '.matches'): os.system(cmd) # parse diamond output update_status(x, sampleid, db.id, protocol, "Quantification") if not root.isdir(sample.matchesDir + '/alignment.' + db.id + '.matches.function.abundance.results.sqlite3.db'): abundance = pb(dout + '.matches', db.func, db.len, db.funcdb, "function", db.name, fileso.GGenes + ".rpkm", good_reads) #abundance=pdx(dout+'.matches', db, good_reads) abn = root.SampleResults(sample, 'none', protocol, db.name, "function", dout + '.matches') abn.createFuncDb(abundance) update_status(x, sampleid, db.id, protocol, "Done") os.system('rm ' + sample.matchesDir + '/merged.reads.fastq >> ' + root.log + " 2>&1") os.system('rm ' + sample.matchesDir + '/merged.reads.fasta >> ' + root.log + " 2>&1") return 'success'
def taxonomy(data): pid = data["pid"] uid = data["uid"] sids = data["sid"] pipeline = data["pip"] rid = data["rid"] edges = [] FULL_MATRIX = [] #first see if the data set contains taxonomy, function or both annotations analysis = "taxonomy" #print '\n\n\n here we goo!!!\n\n\n', pipeline rootvar.mkdir(rootvar.__ROOTPRO__ + "/" + pid + "/" + pipeline + "/RESULTS/") all_samples_tree_file = rootvar.__ROOTPRO__ + "/" + pid + "/" + pipeline + "/RESULTS/" + rid + ".all_samples_tree.pk" # stored_samples = 0 if os.path.isfile(all_samples_tree_file): #print '\n\n\n if the X file has been created' x = sql.SQL(all_samples_tree_file + ".db") val = x.exe("select distinct sample_name from full_matrix") stored_samples = len(val) # x = sql.SQL(main_db) sids = x.exe( 'select c.sample_id from (select * from samples a inner join (select * from sample_status where pip="' + str(pipeline) + '") b on a.sample_id==b.sid) c where c.project_id=="' + pid + '" and c.status="Done" and c.rid="' + rid + '"') # # #print '\n\n\n\n super important \n', sids #print '\n\n\n get all the samples from the X file' # if os.path.isfile(all_samples_tree_file) and stored_samples == len(sids): #print 'both have the same length' G = nx.read_gpickle(all_samples_tree_file) tree = json_graph.tree_data(G, root='R') return ["taxonomy", tree] #section #print '\n\n\n SO the file has not been created, because the matrix of abundances has not been fetched for those samples' nodes = {} #print '\n\n\n So for each sample I get all the information from the sql tables. And create the tree \n\n\n\n\n\n' # for sid in sids: samples = x.exe('select * from samples where project_id="' + pid + '" and sample_id="' + sid[0] + '"') xpath = x.project(pid)[0][4] sample = rootvar.samples(samples[0], xpath) rf = rootvar.result_files(pid, analysis, pipeline, sample.id, rid) view = rootvar.ViewSampleResults(sample, pipeline, rid, analysis, rf) FULL_MATRIX += view.all() Gp = nx.read_gpickle(rf.pk) edges += Gp.edges() for node in Gp.nodes(): if not node in nodes: nodes[node] = Gp.node[node]['level'] x.close() rootvar.full_matrix_sql(all_samples_tree_file + ".db", FULL_MATRIX) G = nx.DiGraph() for i in edges: if not i in G.edges(): if not i[1] in G.nodes(): G.add_node(i[1], samples=1, level=nodes[i[1]]) else: G.node[i[1]]['samples'] += 1 if not i[0] in G.nodes(): G.add_node(i[0], samples=1, level=nodes[i[0]]) else: G.node[i[0]]['samples'] += 1 if not G.predecessors(i[1]): G.add_edge(i[0], i[1]) else: G.node[i[0]]['samples'] += 1 G.node[i[1]]['samples'] += 1 tree = json_graph.tree_data(G, root='R') nx.write_gpickle(G, all_samples_tree_file) return ["taxonomy", tree, all_samples_tree_file + ".json"]