def register_dataset(): try: # {'rname':'test', 'rtype':'prot', 'rdesc':'test', 'uid':'test'}# data = request.get_json() rname = data["rname"] rtype = data["rtype"] rdesc = data["rdesc"] uid = data["uid"] if uid == "TesREPDooc73Ohw": return 'User not allowed' # x=sql.SQL(rootvar.__FILEDB__) rid = rootvar.get_alphabet(10) rpath = rootvar.__ROOTDBS__+rid ref_format_path = rootvar.__ROOTDBS__+rid rootvar.mkdir(rpath) g.db.execute('INSERT INTO reference VALUES (?,?,?,?,?,?,?,?,?,?,?)', (rid, rname, rtype, rdesc, rpath, uid, "none", "none", "none", "created", ref_format_path)) g.db.commit() return jsonify(rid=rid) except Exception as inst: return "ERROR: "+str(inst)
def upload(file, dir): app.config['UPLOAD_FOLDER'] = dir rootvar.mkdir(dir) if file: filename = secure_filename(file.filename) file.save(os.path.join(app.config['UPLOAD_FOLDER'], filename))
def process(projectid, sampleid, db, protocol, reads1, reads2, good_reads): #db=root.dataset(db) x = sql.SQL(root.filedb()) xpath = x.project(projectid)[0][4] val = x.exe('update samples set reads1="' + reads1 + '" where project_id="' + projectid + '" and sample_id="' + sampleid + '"') val = x.exe('update samples set reads2="' + reads2 + '" where project_id="' + projectid + '" and sample_id="' + sampleid + '"') samples = x.exe('select * from samples where project_id="' + projectid + '" and sample_id="' + sampleid + '"') sample = samples[0] sample = root.samples(sample, xpath) root.mkdir(sample.matchesDir) rdir = root.__ROOTPRO__ + "/" + projectid + "/READS/" if db.name == "abcdefghij": #print "MetaPhlAnn" update_status(x, sampleid, db.id, protocol, "Processing") metaphlan = root.program('MetaPhlAnR', sample, db) if not root.isdir(metaphlan.out): metaphlan.run() G = txp.metaphlan_taxonomy_tree(metaphlan.out) abn = root.SampleResults(sample, G, protocol, db.name, "taxonomy", metaphlan.out) abn.start() update_status(x, sampleid, db.id, protocol, "Done") if not db.taxo == "none": # run bowtie using the paired end reads update_status(x, sampleid, db.id, 'matches', "Screening") cmd = " ".join([ root.__ROOTEXEDIR__ + 'bowtie2', '--very-fast-local -p ' + p + ' --no-unal --no-hd --no-sq -x', db.bowtie, '-1', sample.reads1, '-2', sample.reads2, '-S', sample.matchesDir + '/alignment.' + db.id + '.matches >>', root.log, '2>&1' ]) if not root.isdir(sample.matchesDir + '/alignment.' + db.id + '.matches'): os.system(cmd) #process output in sam format to get genes and number of reads per gene. update_status(x, sampleid, db.id, protocol, "Quantification") if not root.isdir(sample.matchesDir + '/alignment.' + db.id + '.matches.taxonomy.abundance.results.sqlite3.db'): abundance = parse_sam( sample.matchesDir + '/alignment.' + db.id + '.matches', db, good_reads) G = txp.taxonomy_tree( abundance, sample.matchesDir + '/alignment.' + db.id + '.matches', protocol, "taxonomy", db.id) abn = root.SampleResults(sample, G, protocol, db.id, "taxonomy", sample.matchesDir + '/alignment.' + db.id + '.matches') # Store data in the sql TABLE abn.start() update_status(x, sampleid, db.id, protocol, "Done") return 'success' if not db.func == "none": fileso = root.result_files(projectid, "function", protocol, sampleid, db.name) #Merge paired ends update_status(x, sampleid, db.id, protocol, "Merge") cmd = " ".join([ 'python', root.__ROOTEXEDIR__ + "pairend_join.py -s -p " + p + " -m 8 -o ", sample.matchesDir + '/merged.reads.fastq', sample.reads1, sample.reads2 ]) #print cmd root.flog(cmd) #print cmd if not root.isdir(sample.matchesDir + '/merged.reads.fastq'): os.system(cmd) #Get fasta files cmd = ' '.join([ root.__ROOTEXEDIR__ + '/seqtk seq -a', sample.matchesDir + '/merged.reads.fastq >', sample.matchesDir + '/merged.reads.fasta' ]) if not root.isdir(sample.matchesDir + '/merged.reads.fasta'): os.system(cmd) #BlastX from diamond update_status(x, sampleid, db.id, protocol, "Screening") dout = sample.matchesDir + 'alignment.' + db.id din = sample.matchesDir + '/merged.reads.fasta' cmd = ' '.join([ root.__ROOTEXEDIR__ + '/diamond blastx --id 60 -p ' + p + ' -k 1 -e 1e-5 -d', db.diamond, '-a', dout + '.pre', '-q', din, '>>', root.log, "2>&1" ]) if not root.isdir(dout + '.daa'): os.system(cmd) cmd = ' '.join([ root.__ROOTEXEDIR__ + '/diamond view -a', dout + '.pre.daa', '-o', dout + '.matches -f tab', ">>", root.log, "2>&1" ]) if not root.isdir(dout + '.matches'): os.system(cmd) # parse diamond output update_status(x, sampleid, db.id, protocol, "Quantification") if not root.isdir(sample.matchesDir + '/alignment.' + db.id + '.matches.function.abundance.results.sqlite3.db'): abundance = pb(dout + '.matches', db.func, db.len, db.funcdb, "function", db.name, fileso.GGenes + ".rpkm", good_reads) #abundance=pdx(dout+'.matches', db, good_reads) abn = root.SampleResults(sample, 'none', protocol, db.name, "function", dout + '.matches') abn.createFuncDb(abundance) update_status(x, sampleid, db.id, protocol, "Done") os.system('rm ' + sample.matchesDir + '/merged.reads.fastq >> ' + root.log + " 2>&1") os.system('rm ' + sample.matchesDir + '/merged.reads.fasta >> ' + root.log + " 2>&1") return 'success'
def idbaud(projectid,sampleid,db,protocol,reads1, reads2, good_reads): #db=root.dataset(db) #1 get project path x=sql.SQL(root.filedb()) xpath=x.project(projectid)[0][4] #print 'here------------' ########################################################################### #2 update the reads on the sql dataset, doit anyway, so if the sample is re run just take the new input, it could be modified. ########################################################################### val=x.exe('update samples set reads1="'+reads1+'" where project_id="'+projectid+'" and sample_id="'+sampleid+'"') val=x.exe('update samples set reads2="'+reads2+'" where project_id="'+projectid+'" and sample_id="'+sampleid+'"') ########################################################################### #3 get the sample full information - load the class samples ########################################################################### samples=x.exe('select * from samples where project_id="'+projectid+'" and sample_id="'+sampleid+'"') sample=samples[0] sample=root.samples(sample,xpath) root.mkdir(sample.assemblyDir) ########################################################################### # 4.1 Run fq2fa - this is used by udba_ud program ########################################################################### idba_ud=root.program('idba_ud',sample,db) update_status(x,sampleid,db.id,protocol,"Preprocessing") fq2fa=root.program('fq2fa', sample,db) if not root.isdir(idba_ud.out): fq2fa.run() #make sure that there is a scaffold.fa file. If not, it computes again the fastq to fasta and the assembly ########################################################################### # 4.2 Run idba_ud - assembly the samples ########################################################################### update_status(x,sampleid,db.id,protocol,"Assembling") idba_ud=root.program('idba_ud',sample,db) if not root.isdir(idba_ud.out): idba_ud.run(); os.system(' cd ' + idba_ud.path + ' && rm kmer contig-* align-* graph-* local-contig-* reads.fa') ########################################################################### # 4.2 Run gene finder - look at the genes over the scaffolds ########################################################################### prodigal=root.program("prodigal", sample,db) update_status(x,sampleid,db.id,protocol,"Finding Genes") if not root.isdir(prodigal.output+".gff"): prodigal.run() if db.name=="abcdefghij": print "MetaPlAn2" update_status(x,sampleid,db.id,protocol,"Processing") metaphlan=root.program('MetaPhlAn',sample,db) if not root.isdir(metaphlan.out): metaphlan.run() #print "Here 2" G=txp.metaphlan_taxonomy_tree(metaphlan.out) abn=root.SampleResults(sample,G,protocol, db.name, "taxonomy", metaphlan.out) abn.start() update_status(x,sampleid,db.id,protocol,"Done") if db.name=='MyTaxa': #print "MyTaxa" taxa=root.mytaxa(sample,db) update_status(x,sampleid,db.id,protocol,"Screening") if not root.isdir(taxa.output+".prot.mytaxa.fa"): taxa.pre() if not root.isdir(taxa.output+".MyTaxa.matches.daa"): taxa.align() if not root.isdir(taxa.output+".MyTaxa.align"): taxa.postd() if not root.isdir(taxa.output+".MyTaxa.input"): taxa.mpre() if not root.isdir(taxa.output+".MyTaxa.out"): taxa.run() update_status(x,sampleid,db.id,protocol,"Quantification") data=taxa.postM() G=txp.mytaxa_taxonomy_tree(data,taxa.output+".MyTaxa.matches.taxonomy.abundance") abn=root.SampleResults(sample,G,protocol, "MyTaxa", "taxonomy", taxa.output+".MyTaxa.matches") abn.start() update_status(x,sampleid,db.id,protocol,"Done") if not db.taxo=="none": print "taxonomy" ########################################################################### # 4.3 Run bowtie to find matches ########################################################################### update_status(x,sampleid,db.id,protocol,"Screening") if db.name=="ryaetguxun": blastn=root.program('diamond_blastp',sample,db) else: blastn=root.program('blastn',sample,db) blastn.run() #blastn.run() ########################################################################### # 4.4 taxonomy abundance ########################################################################### update_status(x,sampleid,db.id,protocol,"Quantification") abundance=pb(blastn.out, db.taxo, db.len, db.taxodb, "taxonomy", db.name, "none",good_reads) ########################################################################### # 4.5 processing Visualization ########################################################################### G=txp.taxonomy_tree(abundance,blastn.out, protocol, "taxonomy", db.name ) abn=root.SampleResults(sample,G,protocol, db.name, "taxonomy", blastn.out) abn.start() root.updateStatus(x,projectid,sampleid,"done") update_status(x,sampleid,db.id,protocol,"Done") if not db.func=="none": print "functional annotation" update_status(x,sampleid,db.id,protocol,"Screening") fileso=root.result_files(projectid, "function", protocol, sampleid, db.name) ########################################################################### # 4.3 Run bowtie to find matches ########################################################################### root.updateStatus(x,projectid,sampleid,"functional annotation") blastn=root.program('diamond_blastp',sample,db) blastn.run() ########################################################################### # 4.4 taxonomy abundance ########################################################################### update_status(x,sampleid,db.id,protocol,"Quantification") abundance=pb(blastn.out, db.func, db.len, db.funcdb, "function", db.name, fileso.GGenes+".rpkm", good_reads) ########################################################################### # 4.5 processing Visualization ########################################################################### abn=root.SampleResults(sample,'none',protocol, db.name, "function", blastn.out) abn.createFuncDb(abundance) update_status(x,sampleid,db.id,protocol,"Done")
def taxonomy(data): pid = data["pid"] uid = data["uid"] sids = data["sid"] pipeline = data["pip"] rid = data["rid"] edges = [] FULL_MATRIX = [] #first see if the data set contains taxonomy, function or both annotations analysis = "taxonomy" #print '\n\n\n here we goo!!!\n\n\n', pipeline rootvar.mkdir(rootvar.__ROOTPRO__ + "/" + pid + "/" + pipeline + "/RESULTS/") all_samples_tree_file = rootvar.__ROOTPRO__ + "/" + pid + "/" + pipeline + "/RESULTS/" + rid + ".all_samples_tree.pk" # stored_samples = 0 if os.path.isfile(all_samples_tree_file): #print '\n\n\n if the X file has been created' x = sql.SQL(all_samples_tree_file + ".db") val = x.exe("select distinct sample_name from full_matrix") stored_samples = len(val) # x = sql.SQL(main_db) sids = x.exe( 'select c.sample_id from (select * from samples a inner join (select * from sample_status where pip="' + str(pipeline) + '") b on a.sample_id==b.sid) c where c.project_id=="' + pid + '" and c.status="Done" and c.rid="' + rid + '"') # # #print '\n\n\n\n super important \n', sids #print '\n\n\n get all the samples from the X file' # if os.path.isfile(all_samples_tree_file) and stored_samples == len(sids): #print 'both have the same length' G = nx.read_gpickle(all_samples_tree_file) tree = json_graph.tree_data(G, root='R') return ["taxonomy", tree] #section #print '\n\n\n SO the file has not been created, because the matrix of abundances has not been fetched for those samples' nodes = {} #print '\n\n\n So for each sample I get all the information from the sql tables. And create the tree \n\n\n\n\n\n' # for sid in sids: samples = x.exe('select * from samples where project_id="' + pid + '" and sample_id="' + sid[0] + '"') xpath = x.project(pid)[0][4] sample = rootvar.samples(samples[0], xpath) rf = rootvar.result_files(pid, analysis, pipeline, sample.id, rid) view = rootvar.ViewSampleResults(sample, pipeline, rid, analysis, rf) FULL_MATRIX += view.all() Gp = nx.read_gpickle(rf.pk) edges += Gp.edges() for node in Gp.nodes(): if not node in nodes: nodes[node] = Gp.node[node]['level'] x.close() rootvar.full_matrix_sql(all_samples_tree_file + ".db", FULL_MATRIX) G = nx.DiGraph() for i in edges: if not i in G.edges(): if not i[1] in G.nodes(): G.add_node(i[1], samples=1, level=nodes[i[1]]) else: G.node[i[1]]['samples'] += 1 if not i[0] in G.nodes(): G.add_node(i[0], samples=1, level=nodes[i[0]]) else: G.node[i[0]]['samples'] += 1 if not G.predecessors(i[1]): G.add_edge(i[0], i[1]) else: G.node[i[0]]['samples'] += 1 G.node[i[1]]['samples'] += 1 tree = json_graph.tree_data(G, root='R') nx.write_gpickle(G, all_samples_tree_file) return ["taxonomy", tree, all_samples_tree_file + ".json"]