def on_background_change(self, base_path): with open(self.get_path(base_path)) as config_file: colours = config_file.read() execute(["gconftool-2", "--set", "/apps/gnome-terminal/profiles/Default/palette", "--type", "string", colours])
def Classify(input,output): setFailFast(False) if "Classify" in _skipsteps or _cls == None: run_process(_settings, "touch %s/Logs/annotate.skip"%(_settings.rundir), "Classify") run_process(_settings, "touch %s/Classify/out/%s.hits"%(_settings.rundir, _settings.PREFIX), "Classify") run_process(_settings, "touch %s/Classify/out/%s.annots"%(_settings.rundir, _settings.PREFIX), "Classify") return 0 listOfFiles = "%s/Classify/in/%s.asm.contig"%(_settings.rundir, _settings.PREFIX) # clean up any existing files run_process(_settings, "touch %s/Classify/out/%s.annots"%(_settings.rundir, _settings.PREFIX), "Classify") run_process(_settings, "unlink %s/Classify/in/%s.asm.contig"%(_settings.rundir, _settings.PREFIX), "Classify") run_process(_settings, "ln %s/Assemble/out/%s.asm.contig %s/Classify/in/%s.asm.contig"%(_settings.rundir, _settings.PREFIX, _settings.rundir, _settings.PREFIX), "Classify") run_process(_settings, "unlink %s/Classify/out/%s.hits"%(_settings.rundir, _settings.PREFIX), "Classify") run_process(_settings, "rm -f %s/Classify/out/*.hits"%(_settings.rundir), "Classify") run_process(_settings, "rm -f %s/Classify/out/*.epsilon-nb_results.txt"%(_settings.rundir), "Classify") run_process(_settings, "rm -f %s/Classify/out/*.phymm.out"%(_settings.rundir), "Classify") pool = Pool(processes=_settings.threads) tasks = [] if "fcp" in _cls or "phymm" in _cls: # hack to use gridX if _USE_GRID: size = sizeFastaFile("%s/Classify/in/%s.asm.contig"%(_settings.rundir, _settings.PREFIX)) perThread = ceil(float(size) / 200) #print "The size of the contigs is %d per thread %d\n"%(size, perThread) #run_process(_settings, "python %s/python/splitfasta.py %s/Classify/in/%s.asm.contig %d %s/Classify/in/%s %d"%(_settings.METAMOS_UTILS, _settings.rundir, _settings.PREFIX, perThread, _settings.rundir, _settings.PREFIX, 1), "Classify") splitfasta("%s/Classify/in/%s.asm.contig"%(_settings.rundir,_settings.PREFIX),"%d"%(perThread),"%s/Classify/in/%s"%(_settings.rundir,_settings.PREFIX),"%d"%(1)) totalJobs = 0 for partFile in os.listdir("%s/Classify/in/"%(_settings.rundir)): if "_part" in partFile and "%s_part"%(_settings.PREFIX) in partFile: print "A file I have to process is %s\n"%(partFile) totalJobs += 1 for lib in _readlibs: listOfFiles += ":%s/Assemble/out/lib%d.unaligned.fasta"%(_settings.rundir, lib.id) run_process(_settings, "ln %s/Assemble/out/lib%d.unaligned.fasta %s/Classify/in/lib%d.unaligned.fasta"%(_settings.rundir, lib.id, _settings.rundir, lib.id), "Classify") size = sizeFastaFile("%s/Classify/in/lib%d.unaligned.fasta"%(_settings.rundir, lib.id)) perThread = ceil(float(size) / 200) #print "The size of the lib %d is %d per one %d\n"%(lib.id, size, perThread) #run_process(_settings, "python %s/python/splitfasta.py %s/Classify/in/lib%d.unaligned.fasta %d %s/Classify/in/%s %d"%(_settings.METAMOS_UTILS, _settings.rundir, lib.id, perThread, _settings.rundir, _settings.PREFIX, totalJobs+1), "Classify") #splitfasta("%s/Classify/in/%s.asm.contig,%d,%s/Classify/in/%s,%d"%(_settings.rundir,_settings.PREFIX,perThread,_settings.rundir,_settings.PREFIX,totalJobs+1)) splitfasta("%s/Classify/in/lib%d.unaligned.fasta"%(_settings.rundir,lib.id),"%d"%(perThread),"%s/Classify/in/%s"%(_settings.rundir,_settings.PREFIX),"%d"%(totalJobs+1)) totalJobs = 0 for partFile in os.listdir("%s/Classify/in/"%(_settings.rundir)): if "_part" in partFile and "%s_part"%(_settings.PREFIX) in partFile: #print "A file I have to process is %s\n"%(partFile) totalJobs += 1 cmdfile = open("%s/Classify/out/runAnnot.sh"%(_settings.rundir), "w") cmdfile.write("#!/bin/sh\n") cmdfile.write("\n") cmdfile.write("jobid=$GRID_TASK\n") cmdfile.write("if [ x$jobid = x -o x$jobid = xundefined -o x$jobid = 0 ]; then\n") cmdfile.write(" jobid=$1\n") cmdfile.write("fi\n") cmdfile.write("if test x$jobid = x; then\n") cmdfile.write(" echo Error: I need a job index on the command line\n") cmdfile.write(" exit 1\n") cmdfile.write("fi\n") cmdfile.write("if [ $jobid -gt %d ]; then\n"%(totalJobs)) cmdfile.write(" echo Job id $jobid is out of range %d\n"%(totalJobs)) cmdfile.write(" exit 0\n") cmdfile.write("fi\n") cmdfile.write("if test -e %s/Classify/out/$jobid.success ; then\n"%(_settings.rundir)) cmdfile.write(" echo Job previously completed successfully.\n") cmdfile.write("else\n") cmdfile.write("ln -s %s/.blastData\n"%(_settings.PHYMM)) cmdfile.write("ln -s %s/.genomeData\n"%(_settings.PHYMM)) cmdfile.write("ln -s %s/.scripts\n"%(_settings.PHYMM)) cmdfile.write("ln -s %s/.taxonomyData\n"%(_settings.PHYMM)) cmdfile.write("mkdir .logs\n") cmdfile.write("perl %s/scoreReads.pl %s/Classify/in/%s_part$jobid.fa"%(_settings.PHYMM,_settings.rundir,_settings.PREFIX)) cmdfile.write(" && touch %s/Classify/out/$jobid.success\n"%(_settings.rundir)) cmdfile.write("fi\n") cmdfile.close() run_process(_settings, "chmod u+x %s/Classify/out/runAnnot.sh"%(_settings.rundir), "Classify") #run_process(_settings, "gridx -p %d -r %d -T -c %s/Classify/out/runAnnot.sh"%(min(totalJobs+1, 200), totalJobs+1, _settings.rundir), "Classify") run_process(_settings, "cat %s/Classify/out/gridx-ibissub00*/wrk_*/results.03.phymmBL_%s_Annotate_in_*%s* | grep -v \"QUERY_ID\" > %s/Classify/out/%s.phymm.out"%(_settings.rundir, _settings.rundir.replace(os.sep, "_").replace(".", "_"), _settings.PREFIX, _settings.rundir, _settings.PREFIX)) run_process(_settings, "ln %s/Classify/out/%s.phymm.out %s/Classify/out/%s.hits"%(_settings.rundir, _settings.PREFIX, _settings.rundir, _settings.PREFIX)) # for now we only work as phymm # generate Krona output ImportPhymmBL.pl importPhymm = "%s%sperl%sImportPhymmBL.pl"%(_settings.METAMOS_UTILS, os.sep, os.sep) if not os.path.exists(importPhymm): print "Error: Krona importer for Phymm not found in %s. Please check your path and try again.\n"%(importPhymm) raise(JobSignalledBreak) run_process(_settings, "perl %s %s -f %s %s/Classify/out/%s.phymm.out:%s/Assemble/out/%s.contig.cnt:%s"%(importPhymm, "-l" if _settings.local_krona else "", listOfFiles,_settings.rundir, _settings.PREFIX, _settings.rundir, _settings.PREFIX, _settings.taxa_level),"Classify") # TODO: local url (after next KronaTools release) # generate taxonomic-level annots readctg_dict = {} for lib in _readlibs: ctgfile = open("%s/Assemble/out/%s.lib%dcontig.reads"%(_settings.rundir, _settings.PREFIX, lib.id), 'r') for line in ctgfile.xreadlines(): line = line.replace("\n","") read, ctg = line.split() if ctg in readctg_dict: readctg_dict[ctg].append(read) else: readctg_dict[ctg] = [read,] ctgfile.close() annotsfile = open("%s/Classify/out/%s.annots"%(_settings.rundir, _settings.PREFIX), 'r') annotreads = open("%s/Classify/out/%s.reads.annots"%(_settings.rundir, _settings.PREFIX), 'w') for line in annotsfile.xreadlines(): line = line.replace("\n", "") ctg, annot = line.split() if ctg in readctg_dict: for x in readctg_dict[ctg]: annotreads.write("%s\t%s\n"%(x, annot)) else: annotreads.write("%s\t%s\n"%(ctg, annot)) annotsfile.close() annotreads.close() readctg_dict.clear() return # we should also split the fna and faa file but for now this is good enough size = sizeFastaFile("%s/Classify/in/%s.asm.contig"%(_settings.rundir, _settings.PREFIX)) perThread = max(ceil(float(size) / _settings.threads), _MIN_SEQ_LENGTH) #print "The size of the contigs is %d per thread %d\n"%(size, perThread) #run_process(_settings, "python %s/python/splitfasta.py %s/Classify/in/%s.asm.contig %d"%(_settings.METAMOS_UTILS, _settings.rundir, _settings.PREFIX, perThread), "Classify") #splitfasta("%s/Classify/in/%s.asm.contig,%d,%s/Classify/in/%s,%d"%(_settings.rundir,_settings.PREFIX,perThread,_settings.rundir,_settings.PREFIX,1)) splitfasta("%s/Classify/in/%s.asm.contig"%(_settings.rundir,_settings.PREFIX),"%d"%(perThread)) for partFile in os.listdir("%s/Classify/in/"%(_settings.rundir)): if "_part" in partFile and "%s.asm.contig"%(_settings.PREFIX) in partFile: partStart = partFile.find("_part")+5 partEnd = partFile.find(".fa", partStart, len(partFile)) partNumber = int(partFile[partStart:partEnd]) params = {} params["jobID"] = len(tasks) params["cls"] = _cls params["contigs"] = "%s/Classify/in/%s"%(_settings.rundir, partFile) params["orfAA"] = "" params["orfFA"] = "" params["out"] = "%s.ctg_%d"%(_settings.PREFIX, partNumber) tasks.append(params) else: annotateSeq(_cls, "%s/Classify/in/%s.asm.contig"%(_settings.rundir, _settings.PREFIX), "%s/Classify/in/%s.faa"%(_settings.rundir, _settings.PREFIX), "%s/Classify/in/%s.fna"%(_settings.rundir, _settings.PREFIX), "%s.ctg"%(_settings.PREFIX)) # annotate all the unmapped sequences using FCP if _cls == "blast" or _cls == "phmmer" or _cls == "metaphyler" or not _settings.classify_unmapped: #print "Warning: blast, PHMMER, and metaphyler is not supported for annotating unmapped sequences" #print "Warning: unmapped/unaligned sequences will not be annotated!" pass else: for lib in _readlibs: listOfFiles += ":%s/Assemble/out/lib%d.unaligned.fasta"%(_settings.rundir, lib.id) run_process(_settings, "ln %s/Assemble/out/lib%d.unaligned.fasta %s/Classify/in/lib%d.unaligned.fasta"%(_settings.rundir, lib.id, _settings.rundir, lib.id), "Classify") if "fcp" in _cls or "phymm" in _cls: size = sizeFastaFile("%s/Classify/in/lib%d.unaligned.fasta"%(_settings.rundir, lib.id)) perThread = max(ceil(float(size) / _settings.threads), _MIN_SEQ_LENGTH) #run_process(_settings, "python %s/python/splitfasta.py %s/Classify/in/lib%d.unaligned.fasta %d"%(_settings.METAMOS_UTILS, _settings.rundir, lib.id, perThread), "Classify") splitfasta("%s/Classify/in/lib%d.unaligned.fasta"%(_settings.rundir,lib.id),"%d"%(perThread)) for partFile in os.listdir("%s/Classify/in/"%(_settings.rundir)): if "_part" in partFile and "lib%d.unaligned.fasta"%(lib.id) in partFile: partStart = partFile.find("_part")+5 partEnd = partFile.find(".fa", partStart, len(partFile)) partNumber = int(partFile[partStart:partEnd]) params = {} params["jobID"] = len(tasks) params["cls"] = _cls params["contigs"] = "%s/Classify/in/%s"%(_settings.rundir, partFile) params["orfAA"] = "" params["orfFA"] = "" params["out"] = "%s.lib%d_%d"%(_settings.PREFIX, lib.id, partNumber) tasks.append(params) else: annotateSeq(_cls, "%s/Assemble/out/lib%d.unaligned.fasta"%(_settings.rundir, lib.id), "", "", "%s.lib%d"%(_settings.PREFIX, lib.id)) if "fcp" in _cls or "phymm" in _cls: result = pool.map_async(parallelWrapper, tasks).get(sys.maxint) for i in result: if (i["status"] == 1): run_process(_settings, "rm %s"%(tasks[i["jobID"]]["contigs"]), "Classify") else: print "Error: parallel annotation job %d failed\n"%(i["jobID"]) raise(JobSignalledBreak) pool.close() pool.join() if generic.checkIfExists(STEP_NAMES.ANNOTATE, _cls.lower()): generic.execute(STEP_NAMES.ANNOTATE, _cls.lower(), _settings) else: # merge results run_process(_settings, "cat %s/Classify/out/*.intermediate.hits > %s/Classify/out/%s.hits"%(_settings.rundir, _settings.rundir, _settings.PREFIX), "Classify") if _cls == "phylosift": importPS = "%s%sperl%sImportPhyloSift.pl"%(_settings.METAMOS_UTILS, os.sep, os.sep) if not os.path.exists(importPS): print "Error: Krona importer for PhyloSift not found in %s. Please check your path and try again.\n"%(_settings.KRONA) raise(JobSignalledBreak) run_process(_settings, "perl %s %s -c -i -f %s %s/Classify/out/%s.hits:%s/Assemble/out/%s.contig.cnt:%s"%(importPS,"-l" if _settings.local_krona else "",listOfFiles,_settings.rundir,_settings.PREFIX,_settings.rundir,_settings.PREFIX, _settings.taxa_level), "Classify") elif _cls == "fcp": # generate Krona output importFCP = "%s%sperl%sImportFCP.pl"%(_settings.METAMOS_UTILS, os.sep, os.sep) if not os.path.exists(importFCP): print "Error: Krona importer for FCP not found in %s. Please check your path and try again.\n"%(importFCP) raise(JobSignalledBreak) run_process(_settings, "cat %s/Classify/out/*.intermediate.epsilon-nb_results.txt | grep -v 'Fragment Id' > %s/Classify/out/%s.epsilon-nb_results.txt"%(_settings.rundir, _settings.rundir, _settings.PREFIX), "Classify") run_process(_settings, "perl %s %s -c -i -f %s %s/Classify/out/%s.epsilon-nb_results.txt:%s/Assemble/out/%s.contig.cnt:%s"%(importFCP, "-l" if _settings.local_krona else "", listOfFiles, _settings.rundir,_settings.PREFIX,_settings.rundir, _settings.PREFIX, _settings.taxa_level),"Classify") # TODO: local url (after next KronaTools release) elif _cls == "phymm": # generate Krona output ImportPhymmBL.pl importPhymm = "%s%sperl%sImportPhymmBL.pl"%(_settings.METAMOS_UTILS, os.sep, os.sep) if not os.path.exists(importPhymm): print "Error: Krona importer for Phymm not found in %s. Please check your path and try again.\n"%(importPhymm) raise(JobSignalledBreak) run_process(_settings, "cat %s/Classify/out/*.intermediate.phymm.out > %s/Classify/out/%s.phymm.out"%(_settings.rundir, _settings.rundir, _settings.PREFIX), "Classify") run_process(_settings, "perl %s %s -f %s %s/Classify/out/%s.phymm.out:%s/Assemble/out/%s.contig.cnt:%s"%(importPhymm, "-l" if _settings.local_krona else "", listOfFiles,_settings.rundir, _settings.PREFIX, _settings.rundir, _settings.PREFIX, _settings.taxa_level),"Classify") # TODO: local url (after next KronaTools release) elif generic.checkIfExists(STEP_NAMES.ANNOTATE, _cls.lower()): genericImport = "%s%sperl%sImport%s.pl"%(_settings.METAMOS_UTILS, os.sep, os.sep, _cls.title()) if os.path.exists(genericImport): run_process(_settings, "perl %s %s -c -i -f %s %s/Classify/out/%s.hits:%s/Assemble/out/%s.contig.cnt:%s"%(genericImport, "-l" if _settings.local_krona else "", listOfFiles, _settings.rundir,_settings.PREFIX,_settings.rundir, _settings.PREFIX, _settings.taxa_level),"Classify") # TODO: local url (after next KronaTools release) else: genericImport = "%s%sperl%sImportGeneric.pl"%(_settings.METAMOS_UTILS, os.sep, os.sep) if not os.path.exists(genericImport): print "Error: Krona importer for generic classifier not found in %s. Please check your path and try again.\n"%(genericImport) raise(JobSignalledBreak) run_process(_settings, "perl %s %s -c -i -f %s %s/Classify/out/%s.hits:%s/Assemble/out/%s.contig.cnt:%s"%(genericImport, "-l" if _settings.local_krona else "", listOfFiles, _settings.rundir,_settings.PREFIX,_settings.rundir, _settings.PREFIX, _settings.taxa_level),"Classify") # TODO: local url (after next KronaTools release) run_process(_settings, "unlink %s/Postprocess/in/%s.hits"%(_settings.rundir, _settings.PREFIX), "Classify") run_process(_settings, "unlink %s/Postprocess/out/%s.hits"%(_settings.rundir, _settings.PREFIX), "Classify") run_process(_settings, "ln %s/Classify/out/%s.hits %s/Postprocess/in/%s.hits"%(_settings.rundir, _settings.PREFIX, _settings.rundir, _settings.PREFIX), "Classify") run_process(_settings, "ln %s/Classify/out/%s.hits %s/Postprocess/out/%s.hits"%(_settings.rundir, _settings.PREFIX, _settings.rundir, _settings.PREFIX), "Classify") # generate taxonomic-level annots readctg_dict = {} for lib in _readlibs: ctgfile = open("%s/Assemble/out/%s.lib%dcontig.reads"%(_settings.rundir, _settings.PREFIX, lib.id), 'r') for line in ctgfile.xreadlines(): line = line.replace("\n","") read, ctg = line.split() if ctg in readctg_dict: readctg_dict[ctg].append(read) else: readctg_dict[ctg] = [read,] ctgfile.close() annotsfile = open("%s/Classify/out/%s.annots"%(_settings.rundir, _settings.PREFIX), 'r') annotreads = open("%s/Classify/out/%s.reads.annots"%(_settings.rundir, _settings.PREFIX), 'w') for line in annotsfile.xreadlines(): line = line.replace("\n", "") ctg, annot = line.split() if ctg in readctg_dict: for x in readctg_dict[ctg]: annotreads.write("%s\t%s\n"%(x, annot)) else: annotreads.write("%s\t%s\n"%(ctg, annot)) annotsfile.close() annotreads.close() readctg_dict.clear()
def Assemble(input,output): # turn off fail fast option setFailFast(False) originalPrefix = _settings.PREFIX originalKmer = _settings.kmer asmPrefix = output.replace("%s/Assemble/out/"%(_settings.rundir), "") asmPrefix = asmPrefix.replace(".asm.contig", "") asmName = input.replace("%s/Assemble/out/"%(_settings.rundir), "") asmName = asmName.replace(".run", "") isContig = os.path.exists("%s/Preprocess/out/%s.asm.contig"%(_settings.rundir, asmName)) if (len(asmName.split(".")) > 1) and not isContig: (asmName, kmer) = asmName.split(".") _settings.kmer = int(kmer) _settings.PREFIX = asmPrefix mated = False for lib in _readlibs: if lib.mated: mated = True break if "Assemble" in _skipsteps or "assemble" in _skipsteps: run_process(_settings, "touch %s/Logs/assemble.skip"%(_settings.rundir), "Assemble") return 0 if os.path.exists("%s/Preprocess/out/%s.asm.contig"%(_settings.rundir, asmName)): # we had contigs input run_process(_settings, "unlink %s/Assemble/out/%s.asm.contig"%(_settings.rundir, asmName), "Assemble") run_process(_settings, "ln %s/Preprocess/out/%s.asm.contig %s/Assemble/out/%s.asm.contig"%(_settings.rundir, asmName, _settings.rundir, asmName), "Assemble") #pick assembler elif asmName == "none" or asmName == None: pass elif asmName == "soapdenovo" or asmName == "soapdenovo2": #open & update config soapf = open("%s/config.txt"%(_settings.rundir),'r') soapd = soapf.read() soapf.close() cnt = 1 libno = 1 #print libs for lib in _readlibs: if (lib.format == "fastq" or lib.format == "fasta") and lib.mated and not lib.interleaved: soapd = soapd.replace("LIB%dQ1REPLACE"%(lib.id),"%s/Preprocess/out/lib%d.1.fastq"%(_settings.rundir,lib.id)) soapd = soapd.replace("LIB%dQ2REPLACE"%(lib.id),"%s/Preprocess/out/lib%d.2.fastq"%(_settings.rundir,lib.id)) elif lib.format == "fastq" and lib.mated and lib.interleaved: #this is NOT supported by SOAP, make sure files are split into two.. #need to update lib.f2 path run_process(_settings, "perl %s/perl/split_fastq.pl %s/Preprocess/out/%s %s/Assemble/in/%s %s/Assemble/in/%s.f2"%(_settings.METAMOS_UTILS,_settings.rundir,lib.f1.fname,_settings.rundir,lib.f1.fname,_settings.rundir,lib.f1.fname),"Assemble") soapd = soapd.replace("LIB%dQ1REPLACE"%(lib.id),"%s/Assemble/in/%s"%(_settings.rundir,lib.f1.fname)) soapd = soapd.replace("LIB%dQ2REPLACE"%(lib.id),"%s/Assemble/in/%s"%(_settings.rundir,lib.f1.fname+".f2")) elif lib.format == "fasta" and lib.mated: soapd = soapd.replace("LIB%dQ1REPLACE"%(lib.id),"%s/Preprocess/out/lib%d.1.fastq"%(_settings.rundir,lib.id)) soapd = soapd.replace("LIB%dQ2REPLACE"%(lib.id),"%s/Preprocess/out/lib%d.2.fastq"%(_settings.rundir,lib.id)) else: soapd = soapd.replace("LIB%dQ1REPLACE"%(lib.id),"%s/Preprocess/out/lib%d.fastq"%(_settings.rundir,lib.id)) #cnt +=1 soapw = open("%s/soapconfig.txt"%(_settings.rundir),'w') soapw.write(soapd) soapw.close() specName = "soap.spec" configName = "%s/soapconfig.txt"%(_settings.rundir) run_process(_settings, "cat %s |grep -v max_rd_len > %s/soap2config.txt"%(configName, _settings.rundir), "Assemble") binPath = _settings.SOAPDENOVO if asmName == "soapdenovo2": configName = "%s/soap2config.txt"%(_settings.rundir) binPath = _settings.SOAPDENOVO2 specName = "soap2.spec" if not os.path.exists(binPath + os.sep + "SOAPdenovo-63mer"): print "Error: %s not found in %s. Please check your path and try again.\n"%(asmName.title(), binPath) run_process(_settings, "touch %s/Assemble/out/%s.asm.contig"%(_settings.rundir, _settings.PREFIX), "Assemble") _settings.kmer = originalKmer _settings.PREFIX = originalPrefix setFailFast(True) return soapOptions = getProgramParams(_settings.METAMOS_UTILS, specName, "pregraph", "-") soapContigOptions = getProgramParams(_settings.METAMOS_UTILS, specName, "contig", "-") soapMapOptions = getProgramParams(_settings.METAMOS_UTILS, specName, "map", "-") soapScaffOptions = getProgramParams(_settings.METAMOS_UTILS, specName, "scaff", "-") #start stopwatch soapEXE="SOAPdenovo-63mer" if _settings.kmer > 63: soapEXE="SOAPdenovo-127mer" run_process(_settings, "%s/%s pregraph -p %d -K %d %s -s %s -o %s/Assemble/out/%s.asm"%(binPath, soapEXE, _settings.threads, _settings.kmer, soapOptions, configName,_settings.rundir,_settings.PREFIX),"Assemble")#SOAPdenovo config.txt run_process(_settings, "%s/%s contig -g %s/Assemble/out/%s.asm %s"%(binPath, soapEXE, _settings.rundir,_settings.PREFIX, soapContigOptions),"Assemble")#SOAPdenovo config.txt if _settings.doscaffolding and mated: run_process(_settings, "%s/%s map -g %s/Assemble/out/%s.asm -p %d %s -s %s"%(binPath, soapEXE, _settings.rundir,_settings.PREFIX, _settings.threads, soapMapOptions, configName),"Assemble")#SOAPdenovo config.txt run_process(_settings, "%s/%s scaff -g %s/Assemble/out/%s.asm -p %d %s"%(binPath, soapEXE, _settings.rundir,_settings.PREFIX, _settings.threads, soapScaffOptions),"Assemble")#SOAPdenovo config.txt if os.path.exists("%s/Assemble/out/%s.asm.scafSeq"%(_settings.rundir, _settings.PREFIX)): if os.path.exists("%s/GapCloser"%(binPath)): run_process(_settings, "%s/GapCloser -b %s -o %s/Assemble/out/%s.linearize.scaffolds.final -a %s/Assemble/out/%s.asm.scafSeq -t %d"%(binPath, configName, _settings.rundir, _settings.PREFIX, _settings.rundir, _settings.PREFIX, _settings.threads), "Assemble") else: run_process(_settings, "ln %s/Assemble/out/%s.asm.scafSeq %s/Assemble/out/%s.linearize.scaffolds.final"%(_settings.rundir, _settings.PREFIX, _settings.rundir, _settings.PREFIX), "Assemble") run_process(_settings, "java -cp %s SplitFastaByLetter %s/Assemble/out/%s.linearize.scaffolds.final NNN > %s/Assemble/out/%s.asm.contig"%(_settings.METAMOS_JAVA, _settings.rundir, _settings.PREFIX, _settings.rundir, _settings.PREFIX), "Assemble") #if OK, convert output to AMOS elif asmName == "metaidba": bowtie_mapping = 1 for lib in _readlibs: if lib.format != "fasta" or (lib.mated and not lib.interleaved): print "Warning: meta-IDBA requires reads to be in (interleaved) fasta format, converting library" #apparently connect = scaffold? need to convert fastq to interleaved fasta to run, one lib per run?? #print "%s/metaidba --read %s/Preprocess/out/lib%d.fasta --output %s/Assemble/out/%s.asm --mink 21 --maxk %d --cover 1 --connect"%(_settings.METAIDBA,_settings.rundir,lib.id,_settings.rundir,_settings.PREFIX,_settings.kmer) metaidbaOptions = getProgramParams(_settings.METAMOS_UTILS, "metaidba.spec", "", "--") run_process(_settings, "%s/metaidba --read %s/Preprocess/out/lib%d.fasta --output %s/Assemble/out/%s.asm %s --maxk %d"%(_settings.METAIDBA,_settings.rundir,lib.id,_settings.rundir,_settings.PREFIX,metaidbaOptions,_settings.kmer),"Assemble") run_process(_settings, "mv %s/Assemble/out/%s.asm-contig.fa %s/Assemble/out/%s.asm.contig"%(_settings.rundir,_settings.PREFIX,_settings.rundir,_settings.PREFIX),"Assemble") elif asmName == "newbler": if not os.path.exists(_settings.NEWBLER + os.sep + "newAssembly"): print "Error: Newbler not found in %s. Please check your path and try again.\n"%(_settings.NEWBLER) run_process(_settings, "touch %s/Assemble/out/%s.asm.contig"%(_settings.rundir, _settings.PREFIX), "Assemble") _settings.kmer = originalKmer _settings.PREFIX = originalPrefix setFailFast(True) return run_process(_settings, "%s/newAssembly -force %s/Assemble/out"%(_settings.NEWBLER, _settings.rundir),"Assemble") NEWBLER_VERSION = 0.0; p = subprocess.Popen("%s/newAssembly --version | head -n 1"%(_settings.NEWBLER), shell=True, stdin=None, stdout=subprocess.PIPE, stderr=subprocess.PIPE) (checkStdout, checkStderr) = p.communicate() if checkStderr != "": print "Warning: Cannot determine Newbler version" else: mymatch = re.findall('\d+\.\d+', checkStdout.strip()) if (len(mymatch) == 1 and mymatch[0] != None): NEWBLER_VERSION = float(mymatch[0]) for lib in _readlibs: if lib.format == "fasta": run_process(_settings, "%s/addRun %s/Assemble/out %s/Preprocess/out/lib%d.seq"%(_settings.NEWBLER, _settings.rundir, _settings.rundir,lib.id),"Assemble") elif lib.format == "sff": run_process(_settings, "%s/addRun %s %s/Assemble/out %s/Preprocess/out/lib%d.sff"%(_settings.NEWBLER, ("-p" if lib.mated else ""), _settings.rundir, _settings.rundir, lib.id), "Assemble") elif lib.format == "fastq": if (NEWBLER_VERSION < 2.6): print "Error: FASTQ + Newbler only supported in Newbler version 2.6+. You are using version %s."%(_settings.NEWBLER_VERSION) run_process(_settings, "touch %s/Assemble/out/%s.asm.contig"%(_settings.rundir, _settings.PREFIX), "Assemble") _settings.kmer = originalKmer _settings.PREFIX = originalPrefix setFailFast(True) return run_process(_settings, "%s/addRun %s/Assemble/out %s/Preprocess/out/lib%d.fastq"%(_settings.NEWBLER, _settings.rundir, _settings.rundir, lib.id),"Assemble") newblerCmd = "%s%srunProject "%(_settings.NEWBLER, os.sep) # read spec file to input to newbler parameters newblerCmd += getProgramParams(_settings.METAMOS_UTILS, "newbler.spec", "", "-") run_process(_settings, "%s -cpu %d %s/Assemble/out"%(newblerCmd,_settings.threads,_settings.rundir),"Assemble") # unlike other assemblers, we can only get the preprocess info for newbler after assembly (since it has to split sff files by mates) extractNewblerReads() # convert to AMOS run_process(_settings, "cat %s/Assemble/out/assembly/454Contigs.ace |awk '{if (match($2, \"\\\\.\")) {STR= $1\" \"substr($2, 1, index($2, \".\")-1); for (i = 3; i <=NF; i++) STR= STR\" \"$i; print STR} else { print $0} }' > %s/Assemble/out/%s.ace"%(_settings.rundir, _settings.rundir,_settings.PREFIX), "Assemble") run_process(_settings, "%s/toAmos -o %s/Assemble/out/%s.mates.afg -m %s/Preprocess/out/all.seq.mates -ace %s/Assemble/out/%s.ace"%(_settings.AMOS,_settings.rundir, _settings.PREFIX, _settings.rundir, _settings.rundir, _settings.PREFIX),"Assemble") # get info on EID/IIDs for contigs run_process(_settings, "cat %s/Assemble/out/%s.mates.afg | grep -A 3 \"{CTG\" |awk '{if (match($1, \"iid\") != 0) {IID = $1} else if (match($1, \"eid\") != 0) {print $1\" \"IID; } }'|sed s/eid://g |sed s/iid://g > %s/Assemble/out/454eidToIID"%(_settings.rundir, _settings.PREFIX, _settings.rundir),"Assemble") run_process(_settings, "java -cp %s convert454GraphToCTL %s/Assemble/out/454eidToIID %s/Assemble/out/assembly/454ContigGraph.txt > %s/Assemble/out/%s.graph.cte"%(_settings.METAMOS_JAVA, _settings.rundir, _settings.rundir, _settings.rundir, _settings.PREFIX),"Assemble") run_process(_settings, "cat %s/Assemble/out/%s.mates.afg %s/Assemble/out/%s.graph.cte > %s/Assemble/out/%s.afg"%(_settings.rundir, _settings.PREFIX, _settings.rundir, _settings.PREFIX, _settings.rundir, _settings.PREFIX),"Assemble") # make symlink for subsequent steps run_process(_settings, "rm %s/Assemble/out/%s.asm.contig"%(_settings.rundir, _settings.PREFIX),"Assemble") run_process(_settings, "ln %s/Assemble/out/assembly/454AllContigs.fna %s/Assemble/out/%s.asm.contig"%(_settings.rundir, _settings.rundir, _settings.PREFIX),"Assemble") if _settings.doscaffolding and mated == True: run_process(_settings, "ln %s/Assemble/out/assembly/454Scaffolds.fna %s/Assemble/out/%s.linearize.scaffolds.final"%(_settings.rundir, _settings.rundir, _settings.PREFIX),"Assemble") elif asmName == "amos": run_process(_settings, "rm -rf %s/Assemble/in/%s.bnk"%(_settings.rundir, _settings.PREFIX), "Assemble") for lib in _readlibs: if lib.format == "fasta": run_process(_settings, "%s/toAmos_new -s %s/Preprocess/out/lib%d.seq -b %s/Assemble/in/%s.bnk "%(_settings.AMOS,_settings.rundir,lib.id,_settings.rundir, _settings.PREFIX),"Assemble") elif lib.format == "fastq": run_process(_settings, "%s/toAmos_new -Q %s/Preprocess/out/lib%d.seq -i --libname lib%d --min %d --max %d -b %s/Assemble/in/%s.bnk "%(_settings.AMOS,_settings.rundir,lib.id,lib.id,lib.mean,lib.stdev,_settings.rundir,_settings.PREFIX),"Assemble") run_process(_settings, "%s/hash-overlap -B %s/Assemble/in/%s.bnk"%(_settings.AMOS, _settings.rundir, _settings.PREFIX), "Assemble") run_process(_settings, "%s/tigger -b %s/Assemble/in/%s.bnk"%(_settings.AMOS, _settings.rundir, _settings.PREFIX), "Assemble") run_process(_settings, "%s/make-consensus -B -b %s/Assemble/in/%s.bnk"%(_settings.AMOS, _settings.rundir, _settings.PREFIX), "Assemble") run_process(_settings, "%s/bank2fasta -b %s/Assemble/in/%s.bnk > %s.asm.contig"%(_settings.AMOS, _settings.rundir, _settings.PREFIX, _settings.PREFIX), "Assemble") elif asmName.lower() == "ca": #runCA script frglist = "" matedString = "" for lib in _readlibs: if not os.path.exists("%s/Preprocess/out/lib%d.frg"%(_settings.rundir, lib.id)): if lib.format == "fastq": if lib.mated: matedString = "-insertsize %d %d -%s -mates"%(lib.mean, lib.stdev, "innie" if lib.innie else "outtie") else: matedString = "-reads" run_process(_settings, "%s/fastqToCA -libraryname %s -technology illumina-long %s %s/Preprocess/out/lib%d.seq > %s/Preprocess/out/lib%d.frg"%(_settings.CA, lib.sid, matedString, _settings.rundir, lib.id, _settings.rundir, lib.id),"Assemble") elif lib.format == "fasta": if lib.mated: matedString = "-mean %d -stddev %d -m %s/Preprocess/out/lib%d.seq.mates"%(lib.mean, lib.stdev, _settings.rundir, lib.id) run_process(_settings, "%s/convert-fasta-to-v2.pl -l %s %s -s %s/Preprocess/out/lib%d.seq -q %s/Preprocess/out/lib%d.seq.qual > %s/Preprocess/out/lib%d.frg"%(_settings.CA, lib.sid, matedString, _settings.rundir, lib.id, _settings.rundir, lib.id, _settings.rundir, lib.id),"Assemble") frglist += "%s/Preprocess/out/lib%d.frg "%(_settings.rundir, lib.id) specFile="%s/config/asm.spec"%(_settings.METAMOS_UTILS) if os.path.exists("%s/Assemble/out/asm.spec"%(_settings.rundir)): specFile="%s/Assemble/out/asm.spec"%(_settings.rundir) run_process(_settings, "%s/runCA -p %s -d %s/Assemble/out/ -s %s %s %s"%(_settings.CA,_settings.PREFIX,_settings.rundir,specFile,"" if _settings.doscaffolding else "stopAfter=utgcns", frglist),"Assemble") #convert CA to AMOS run_process(_settings, "%s/gatekeeper -dumpfrg -allreads %s.gkpStore > %s.frg"%(_settings.CA, _settings.PREFIX, _settings.PREFIX),"Assemble") if _settings.doscaffolding: run_process(_settings, "ln 9-terminator/%s.ctg.fasta %s.asm.contig"%(_settings.PREFIX, _settings.PREFIX), "Assemble") run_process(_settings, "ln 9-terminator/%s.scf.fasta %s.linearize.scaffolds.final"%(_settings.PREFIX, _settings.PREFIX), "Assemble") else: run_process(_settings, "%s/terminator -g %s.gkpStore -t %s.tigStore/ 2 -o %s"%(_settings.CA, _settings.PREFIX, _settings.PREFIX, _settings.PREFIX),"Assemble") run_process(_settings, "%s/asmOutputFasta -p %s < %s.asm"%(_settings.CA, _settings.PREFIX, _settings.PREFIX), "Assemble") run_process(_settings, "ln %s.utg.fasta %s.asm.contig"%(_settings.PREFIX, _settings.PREFIX), "Assemble") elif asmName == "velvet": runVelvet(_settings.VELVET, "velvet") elif asmName == "velvet-sc": runVelvet(_settings.VELVET_SC, "velvet-sc") elif asmName == "metavelvet": runMetaVelvet(_settings.VELVET, _settings.METAVELVET, "metavelvet") elif asmName.lower() == "sparseassembler": runSparseAssembler(_settings.SPARSEASSEMBLER, "SparseAssembler"); elif generic.checkIfExists(STEP_NAMES.ASSEMBLE, asmName.lower()): generic.execute(STEP_NAMES.ASSEMBLE, asmName.lower(), _settings) else: print "Error: %s is an unknown assembler. No valid assembler specified."%(asmName) if not os.path.exists("%s/Assemble/out/%s.asm.contig"%(_settings.rundir, _settings.PREFIX)): run_process(_settings, "touch %s/Assemble/out/%s.asm.contig"%(_settings.rundir, _settings.PREFIX), "Assemble") _settings.kmer = originalKmer _settings.PREFIX = originalPrefix setFailFast(True)
def Classify(input, output): setFailFast(False) if "Classify" in _skipsteps or _cls == None: run_process(_settings, "touch %s/Logs/annotate.skip" % (_settings.rundir), "Classify") run_process( _settings, "touch %s/Classify/out/%s.hits" % (_settings.rundir, _settings.PREFIX), "Classify") run_process( _settings, "touch %s/Classify/out/%s.annots" % (_settings.rundir, _settings.PREFIX), "Classify") return 0 listOfFiles = "%s/Classify/in/%s.asm.contig" % (_settings.rundir, _settings.PREFIX) # clean up any existing files run_process( _settings, "touch %s/Classify/out/%s.annots" % (_settings.rundir, _settings.PREFIX), "Classify") run_process( _settings, "unlink %s/Classify/in/%s.asm.contig" % (_settings.rundir, _settings.PREFIX), "Classify") run_process( _settings, "ln %s/Assemble/out/%s.asm.contig %s/Classify/in/%s.asm.contig" % (_settings.rundir, _settings.PREFIX, _settings.rundir, _settings.PREFIX), "Classify") run_process( _settings, "unlink %s/Classify/out/%s.hits" % (_settings.rundir, _settings.PREFIX), "Classify") run_process(_settings, "rm -f %s/Classify/out/*.hits" % (_settings.rundir), "Classify") run_process( _settings, "rm -f %s/Classify/out/*.epsilon-nb_results.txt" % (_settings.rundir), "Classify") run_process(_settings, "rm -f %s/Classify/out/*.phymm.out" % (_settings.rundir), "Classify") pool = Pool(processes=_settings.threads) tasks = [] if "fcp" in _cls or "phymm" in _cls: # hack to use gridX if _USE_GRID: size = sizeFastaFile("%s/Classify/in/%s.asm.contig" % (_settings.rundir, _settings.PREFIX)) perThread = ceil(float(size) / 200) #print "The size of the contigs is %d per thread %d\n"%(size, perThread) #run_process(_settings, "python %s/python/splitfasta.py %s/Classify/in/%s.asm.contig %d %s/Classify/in/%s %d"%(_settings.METAMOS_UTILS, _settings.rundir, _settings.PREFIX, perThread, _settings.rundir, _settings.PREFIX, 1), "Classify") splitfasta( "%s/Classify/in/%s.asm.contig" % (_settings.rundir, _settings.PREFIX), "%d" % (perThread), "%s/Classify/in/%s" % (_settings.rundir, _settings.PREFIX), "%d" % (1)) totalJobs = 0 for partFile in os.listdir("%s/Classify/in/" % (_settings.rundir)): if "_part" in partFile and "%s_part" % ( _settings.PREFIX) in partFile: print "A file I have to process is %s\n" % (partFile) totalJobs += 1 for lib in _readlibs: listOfFiles += ":%s/Assemble/out/lib%d.unaligned.fasta" % ( _settings.rundir, lib.id) run_process( _settings, "ln %s/Assemble/out/lib%d.unaligned.fasta %s/Classify/in/lib%d.unaligned.fasta" % (_settings.rundir, lib.id, _settings.rundir, lib.id), "Classify") size = sizeFastaFile("%s/Classify/in/lib%d.unaligned.fasta" % (_settings.rundir, lib.id)) perThread = ceil(float(size) / 200) #print "The size of the lib %d is %d per one %d\n"%(lib.id, size, perThread) #run_process(_settings, "python %s/python/splitfasta.py %s/Classify/in/lib%d.unaligned.fasta %d %s/Classify/in/%s %d"%(_settings.METAMOS_UTILS, _settings.rundir, lib.id, perThread, _settings.rundir, _settings.PREFIX, totalJobs+1), "Classify") #splitfasta("%s/Classify/in/%s.asm.contig,%d,%s/Classify/in/%s,%d"%(_settings.rundir,_settings.PREFIX,perThread,_settings.rundir,_settings.PREFIX,totalJobs+1)) splitfasta( "%s/Classify/in/lib%d.unaligned.fasta" % (_settings.rundir, lib.id), "%d" % (perThread), "%s/Classify/in/%s" % (_settings.rundir, _settings.PREFIX), "%d" % (totalJobs + 1)) totalJobs = 0 for partFile in os.listdir("%s/Classify/in/" % (_settings.rundir)): if "_part" in partFile and "%s_part" % ( _settings.PREFIX) in partFile: #print "A file I have to process is %s\n"%(partFile) totalJobs += 1 cmdfile = open("%s/Classify/out/runAnnot.sh" % (_settings.rundir), "w") cmdfile.write("#!/bin/sh\n") cmdfile.write("\n") cmdfile.write("jobid=$GRID_TASK\n") cmdfile.write( "if [ x$jobid = x -o x$jobid = xundefined -o x$jobid = 0 ]; then\n" ) cmdfile.write(" jobid=$1\n") cmdfile.write("fi\n") cmdfile.write("if test x$jobid = x; then\n") cmdfile.write( " echo Error: I need a job index on the command line\n") cmdfile.write(" exit 1\n") cmdfile.write("fi\n") cmdfile.write("if [ $jobid -gt %d ]; then\n" % (totalJobs)) cmdfile.write(" echo Job id $jobid is out of range %d\n" % (totalJobs)) cmdfile.write(" exit 0\n") cmdfile.write("fi\n") cmdfile.write( "if test -e %s/Classify/out/$jobid.success ; then\n" % (_settings.rundir)) cmdfile.write(" echo Job previously completed successfully.\n") cmdfile.write("else\n") cmdfile.write("ln -s %s/.blastData\n" % (_settings.PHYMM)) cmdfile.write("ln -s %s/.genomeData\n" % (_settings.PHYMM)) cmdfile.write("ln -s %s/.scripts\n" % (_settings.PHYMM)) cmdfile.write("ln -s %s/.taxonomyData\n" % (_settings.PHYMM)) cmdfile.write("mkdir .logs\n") cmdfile.write( "perl %s/scoreReads.pl %s/Classify/in/%s_part$jobid.fa" % (_settings.PHYMM, _settings.rundir, _settings.PREFIX)) cmdfile.write(" && touch %s/Classify/out/$jobid.success\n" % (_settings.rundir)) cmdfile.write("fi\n") cmdfile.close() run_process( _settings, "chmod u+x %s/Classify/out/runAnnot.sh" % (_settings.rundir), "Classify") #run_process(_settings, "gridx -p %d -r %d -T -c %s/Classify/out/runAnnot.sh"%(min(totalJobs+1, 200), totalJobs+1, _settings.rundir), "Classify") run_process( _settings, "cat %s/Classify/out/gridx-ibissub00*/wrk_*/results.03.phymmBL_%s_Annotate_in_*%s* | grep -v \"QUERY_ID\" > %s/Classify/out/%s.phymm.out" % (_settings.rundir, _settings.rundir.replace( os.sep, "_").replace(".", "_"), _settings.PREFIX, _settings.rundir, _settings.PREFIX)) run_process( _settings, "ln %s/Classify/out/%s.phymm.out %s/Classify/out/%s.hits" % (_settings.rundir, _settings.PREFIX, _settings.rundir, _settings.PREFIX)) # for now we only work as phymm # generate Krona output ImportPhymmBL.pl importPhymm = "%s%sperl%sImportPhymmBL.pl" % ( _settings.METAMOS_UTILS, os.sep, os.sep) if not os.path.exists(importPhymm): print "Error: Krona importer for Phymm not found in %s. Please check your path and try again.\n" % ( importPhymm) raise (JobSignalledBreak) run_process( _settings, "perl %s %s -f %s %s/Classify/out/%s.phymm.out:%s/Assemble/out/%s.contig.cnt:%s" % (importPhymm, "-l" if _settings.local_krona else "", listOfFiles, _settings.rundir, _settings.PREFIX, _settings.rundir, _settings.PREFIX, _settings.taxa_level), "Classify") # TODO: local url (after next KronaTools release) # generate taxonomic-level annots readctg_dict = {} for lib in _readlibs: ctgfile = open( "%s/Assemble/out/%s.lib%dcontig.reads" % (_settings.rundir, _settings.PREFIX, lib.id), 'r') for line in ctgfile.xreadlines(): line = line.replace("\n", "") read, ctg = line.split() if ctg in readctg_dict: readctg_dict[ctg].append(read) else: readctg_dict[ctg] = [ read, ] ctgfile.close() annotsfile = open( "%s/Classify/out/%s.annots" % (_settings.rundir, _settings.PREFIX), 'r') annotreads = open( "%s/Classify/out/%s.reads.annots" % (_settings.rundir, _settings.PREFIX), 'w') for line in annotsfile.xreadlines(): line = line.replace("\n", "") ctg, annot = line.split() if ctg in readctg_dict: for x in readctg_dict[ctg]: annotreads.write("%s\t%s\n" % (x, annot)) else: annotreads.write("%s\t%s\n" % (ctg, annot)) annotsfile.close() annotreads.close() readctg_dict.clear() return # we should also split the fna and faa file but for now this is good enough size = sizeFastaFile("%s/Classify/in/%s.asm.contig" % (_settings.rundir, _settings.PREFIX)) perThread = max(ceil(float(size) / _settings.threads), _MIN_SEQ_LENGTH) #print "The size of the contigs is %d per thread %d\n"%(size, perThread) #run_process(_settings, "python %s/python/splitfasta.py %s/Classify/in/%s.asm.contig %d"%(_settings.METAMOS_UTILS, _settings.rundir, _settings.PREFIX, perThread), "Classify") #splitfasta("%s/Classify/in/%s.asm.contig,%d,%s/Classify/in/%s,%d"%(_settings.rundir,_settings.PREFIX,perThread,_settings.rundir,_settings.PREFIX,1)) splitfasta( "%s/Classify/in/%s.asm.contig" % (_settings.rundir, _settings.PREFIX), "%d" % (perThread)) for partFile in os.listdir("%s/Classify/in/" % (_settings.rundir)): if "_part" in partFile and "%s.asm.contig" % ( _settings.PREFIX) in partFile: partStart = partFile.find("_part") + 5 partEnd = partFile.find(".fa", partStart, len(partFile)) partNumber = int(partFile[partStart:partEnd]) params = {} params["jobID"] = len(tasks) params["cls"] = _cls params["contigs"] = "%s/Classify/in/%s" % (_settings.rundir, partFile) params["orfAA"] = "" params["orfFA"] = "" params["out"] = "%s.ctg_%d" % (_settings.PREFIX, partNumber) tasks.append(params) else: annotateSeq( _cls, "%s/Classify/in/%s.asm.contig" % (_settings.rundir, _settings.PREFIX), "%s/Classify/in/%s.faa" % (_settings.rundir, _settings.PREFIX), "%s/Classify/in/%s.fna" % (_settings.rundir, _settings.PREFIX), "%s.ctg" % (_settings.PREFIX)) # annotate all the unmapped sequences using FCP if _cls == "blast" or _cls == "phmmer" or _cls == "metaphyler" or not _settings.classify_unmapped: #print "Warning: blast, PHMMER, and metaphyler is not supported for annotating unmapped sequences" #print "Warning: unmapped/unaligned sequences will not be annotated!" pass else: for lib in _readlibs: listOfFiles += ":%s/Assemble/out/lib%d.unaligned.fasta" % ( _settings.rundir, lib.id) run_process( _settings, "ln %s/Assemble/out/lib%d.unaligned.fasta %s/Classify/in/lib%d.unaligned.fasta" % (_settings.rundir, lib.id, _settings.rundir, lib.id), "Classify") if "fcp" in _cls or "phymm" in _cls: size = sizeFastaFile("%s/Classify/in/lib%d.unaligned.fasta" % (_settings.rundir, lib.id)) perThread = max(ceil(float(size) / _settings.threads), _MIN_SEQ_LENGTH) #run_process(_settings, "python %s/python/splitfasta.py %s/Classify/in/lib%d.unaligned.fasta %d"%(_settings.METAMOS_UTILS, _settings.rundir, lib.id, perThread), "Classify") splitfasta( "%s/Classify/in/lib%d.unaligned.fasta" % (_settings.rundir, lib.id), "%d" % (perThread)) for partFile in os.listdir("%s/Classify/in/" % (_settings.rundir)): if "_part" in partFile and "lib%d.unaligned.fasta" % ( lib.id) in partFile: partStart = partFile.find("_part") + 5 partEnd = partFile.find(".fa", partStart, len(partFile)) partNumber = int(partFile[partStart:partEnd]) params = {} params["jobID"] = len(tasks) params["cls"] = _cls params["contigs"] = "%s/Classify/in/%s" % ( _settings.rundir, partFile) params["orfAA"] = "" params["orfFA"] = "" params["out"] = "%s.lib%d_%d" % (_settings.PREFIX, lib.id, partNumber) tasks.append(params) else: annotateSeq( _cls, "%s/Assemble/out/lib%d.unaligned.fasta" % (_settings.rundir, lib.id), "", "", "%s.lib%d" % (_settings.PREFIX, lib.id)) if "fcp" in _cls or "phymm" in _cls: result = pool.map_async(parallelWrapper, tasks).get(sys.maxint) for i in result: if (i["status"] == 1): run_process(_settings, "rm %s" % (tasks[i["jobID"]]["contigs"]), "Classify") else: print "Error: parallel annotation job %d failed\n" % ( i["jobID"]) raise (JobSignalledBreak) pool.close() pool.join() if generic.checkIfExists(STEP_NAMES.ANNOTATE, _cls.lower()): generic.execute(STEP_NAMES.ANNOTATE, _cls.lower(), _settings) else: # merge results run_process( _settings, "cat %s/Classify/out/*.intermediate.hits > %s/Classify/out/%s.hits" % (_settings.rundir, _settings.rundir, _settings.PREFIX), "Classify") if _cls == "phylosift": importPS = "%s%sperl%sImportPhyloSift.pl" % (_settings.METAMOS_UTILS, os.sep, os.sep) if not os.path.exists(importPS): print "Error: Krona importer for PhyloSift not found in %s. Please check your path and try again.\n" % ( _settings.KRONA) raise (JobSignalledBreak) run_process( _settings, "perl %s %s -c -i -f %s %s/Classify/out/%s.hits:%s/Assemble/out/%s.contig.cnt:%s" % (importPS, "-l" if _settings.local_krona else "", listOfFiles, _settings.rundir, _settings.PREFIX, _settings.rundir, _settings.PREFIX, _settings.taxa_level), "Classify") elif _cls == "fcp": # generate Krona output importFCP = "%s%sperl%sImportFCP.pl" % (_settings.METAMOS_UTILS, os.sep, os.sep) if not os.path.exists(importFCP): print "Error: Krona importer for FCP not found in %s. Please check your path and try again.\n" % ( importFCP) raise (JobSignalledBreak) run_process( _settings, "cat %s/Classify/out/*.intermediate.epsilon-nb_results.txt | grep -v 'Fragment Id' > %s/Classify/out/%s.epsilon-nb_results.txt" % (_settings.rundir, _settings.rundir, _settings.PREFIX), "Classify") run_process( _settings, "perl %s %s -c -i -f %s %s/Classify/out/%s.epsilon-nb_results.txt:%s/Assemble/out/%s.contig.cnt:%s" % (importFCP, "-l" if _settings.local_krona else "", listOfFiles, _settings.rundir, _settings.PREFIX, _settings.rundir, _settings.PREFIX, _settings.taxa_level), "Classify") # TODO: local url (after next KronaTools release) elif _cls == "phymm": # generate Krona output ImportPhymmBL.pl importPhymm = "%s%sperl%sImportPhymmBL.pl" % (_settings.METAMOS_UTILS, os.sep, os.sep) if not os.path.exists(importPhymm): print "Error: Krona importer for Phymm not found in %s. Please check your path and try again.\n" % ( importPhymm) raise (JobSignalledBreak) run_process( _settings, "cat %s/Classify/out/*.intermediate.phymm.out > %s/Classify/out/%s.phymm.out" % (_settings.rundir, _settings.rundir, _settings.PREFIX), "Classify") run_process( _settings, "perl %s %s -f %s %s/Classify/out/%s.phymm.out:%s/Assemble/out/%s.contig.cnt:%s" % (importPhymm, "-l" if _settings.local_krona else "", listOfFiles, _settings.rundir, _settings.PREFIX, _settings.rundir, _settings.PREFIX, _settings.taxa_level), "Classify") # TODO: local url (after next KronaTools release) elif generic.checkIfExists(STEP_NAMES.ANNOTATE, _cls.lower()): genericImport = "%s%sperl%sImport%s.pl" % ( _settings.METAMOS_UTILS, os.sep, os.sep, _cls.title()) if os.path.exists(genericImport): run_process( _settings, "perl %s %s -c -i -f %s %s/Classify/out/%s.hits:%s/Assemble/out/%s.contig.cnt:%s" % (genericImport, "-l" if _settings.local_krona else "", listOfFiles, _settings.rundir, _settings.PREFIX, _settings.rundir, _settings.PREFIX, _settings.taxa_level), "Classify") # TODO: local url (after next KronaTools release) else: genericImport = "%s%sperl%sImportGeneric.pl" % ( _settings.METAMOS_UTILS, os.sep, os.sep) if not os.path.exists(genericImport): print "Error: Krona importer for generic classifier not found in %s. Please check your path and try again.\n" % ( genericImport) raise (JobSignalledBreak) run_process( _settings, "perl %s %s -c -i -f %s %s/Classify/out/%s.hits:%s/Assemble/out/%s.contig.cnt:%s" % (genericImport, "-l" if _settings.local_krona else "", listOfFiles, _settings.rundir, _settings.PREFIX, _settings.rundir, _settings.PREFIX, _settings.taxa_level), "Classify") # TODO: local url (after next KronaTools release) run_process( _settings, "unlink %s/Postprocess/in/%s.hits" % (_settings.rundir, _settings.PREFIX), "Classify") run_process( _settings, "unlink %s/Postprocess/out/%s.hits" % (_settings.rundir, _settings.PREFIX), "Classify") run_process( _settings, "ln %s/Classify/out/%s.hits %s/Postprocess/in/%s.hits" % (_settings.rundir, _settings.PREFIX, _settings.rundir, _settings.PREFIX), "Classify") run_process( _settings, "ln %s/Classify/out/%s.hits %s/Postprocess/out/%s.hits" % (_settings.rundir, _settings.PREFIX, _settings.rundir, _settings.PREFIX), "Classify") # generate taxonomic-level annots readctg_dict = {} for lib in _readlibs: ctgfile = open( "%s/Assemble/out/%s.lib%dcontig.reads" % (_settings.rundir, _settings.PREFIX, lib.id), 'r') for line in ctgfile.xreadlines(): line = line.replace("\n", "") read, ctg = line.split() if ctg in readctg_dict: readctg_dict[ctg].append(read) else: readctg_dict[ctg] = [ read, ] ctgfile.close() annotsfile = open( "%s/Classify/out/%s.annots" % (_settings.rundir, _settings.PREFIX), 'r') annotreads = open( "%s/Classify/out/%s.reads.annots" % (_settings.rundir, _settings.PREFIX), 'w') for line in annotsfile.xreadlines(): line = line.replace("\n", "") ctg, annot = line.split() if ctg in readctg_dict: for x in readctg_dict[ctg]: annotreads.write("%s\t%s\n" % (x, annot)) else: annotreads.write("%s\t%s\n" % (ctg, annot)) annotsfile.close() annotreads.close() readctg_dict.clear()
def change_background(self, path): execute(["gsettings", "set", "org.gnome.desktop.background", "picture-uri", "'file://" + path + "'"])
def change_background(self, path): execute(["feh", "--bg-fill", path])