def _classify(self, mode, inputFastaFile, outLog=None): mothur = os.path.join(os.path.normpath(self._config.get('mothurInstallDir')), 'mothur') if mode == 16: extractedRegionsFasta = os.path.join(self._workingDir, str(os.path.basename(inputFastaFile) + '.16S_rRNA.fna')) taxonomyFile = os.path.join(self._refDir, os.path.normpath(self._refDict[('16S_rRNA','taxonomyDNA')][0])) templateFile = os.path.join(self._refDir, os.path.normpath(self._refDict[('16S_rRNA','templateDNA')][0])) #mothurPredFileName = str(extractedRegionsFasta[0:extractedRegionsFasta.rindex('.')] + '.taxonomy') mothurPredFileName = common.getMothurOutputFilePath(extractedRegionsFasta, taxonomyFile) predFileName = os.path.join(self._workingDir, str(os.path.basename(inputFastaFile) + '.16P')) #extractedRegionsFasta = str(inputFastaFile + '.16S_rRNA.fna') #templateFile = os.path.normpath(self._configRRNA16S.get('mothurClassifyParam16STemplate')) #taxonomyFile = os.path.normpath(self._configRRNA16S.get('mothurClassifyParam16STaxonomy')) #mothurPredFileName = str(inputFastaFile + '.16S_rRNA.bacteria+archaea.taxonomy') #mothurPredFileName = os.path.join(self._workingDir, str(os.path.basename(inputFastaFile) + '.16S_rRNA.bacteria+archaea.taxonomy')) #mothurPredFileName = os.path.join(self._workingDir, str(os.path.basename(inputFastaFile) + '.16S_rRNA.fasta.taxonomy')) #predFileName = str(inputFastaFile + '.16P') elif mode == 23: extractedRegionsFasta = os.path.join(self._workingDir, str(os.path.basename(inputFastaFile) + '.23S_rRNA.fna')) taxonomyFile = os.path.join(self._refDir, os.path.normpath(self._refDict[('23S_rRNA','taxonomyDNA')][0])) templateFile = os.path.join(self._refDir, os.path.normpath(self._refDict[('23S_rRNA','templateDNA')][0])) #mothurPredFileName = str(extractedRegionsFasta[0:extractedRegionsFasta.rindex('.')] + '.taxonomy') mothurPredFileName = common.getMothurOutputFilePath(extractedRegionsFasta, taxonomyFile) predFileName = os.path.join(self._workingDir, str(os.path.basename(inputFastaFile) + '.23P')) #extractedRegionsFasta = str(inputFastaFile + '.23S_rRNA.fna') #templateFile = os.path.normpath(self._configRRNA16S.get('mothurClassifyParam23STemplate')) #taxonomyFile = os.path.normpath(self._configRRNA16S.get('mothurClassifyParam23STaxonomy')) #mothurPredFileName = str(inputFastaFile + '.23S_rRNA.bacteria+archaea.taxonomy') #mothurPredFileName = os.path.join(self._workingDir, str(os.path.basename(inputFastaFile) + '.23S_rRNA.bacteria+archaea.taxonomy')) #mothurPredFileName = os.path.join(self._workingDir, str(os.path.basename(inputFastaFile) + '.23S_rRNA.fasta.taxonomy')) #predFileName = str(inputFastaFile + '.23P') elif mode == 5: #extractedRegionsFasta = str(inputFastaFile + '.5S_rRNA.fna') extractedRegionsFasta = os.path.join(self._workingDir, str(os.path.basename(inputFastaFile) + '.5S_rRNA.fna')) taxonomyFile = os.path.join(self._refDir, os.path.normpath(self._refDict[('5S_rRNA','taxonomyDNA')][0])) templateFile = os.path.join(self._refDir, os.path.normpath(self._refDict[('5S_rRNA','templateDNA')][0])) mothurPredFileName = common.getMothurOutputFilePath(extractedRegionsFasta, taxonomyFile) predFileName = os.path.join(self._workingDir, str(os.path.basename(inputFastaFile) + '.5P')) #templateFile = os.path.normpath(self._configRRNA16S.get('mothurClassifyParam5STemplate')) #taxonomyFile = os.path.normpath(self._configRRNA16S.get('mothurClassifyParam5STaxonomy')) #mothurPredFileName = os.path.join(self._workingDir, # str(os.path.basename(inputFastaFile) + '.5S_rRNA.' + os.path.basename(taxonomyFile) + 'onomy'))#.taxonomy #predFileName = str(inputFastaFile + '.5P') else: raise Exception('Wrong branch') if not os.path.isfile(mothurPredFileName): mothurPredFileName = common.getMothurOutputFilePath(extractedRegionsFasta, taxonomyFile, suffix='.bayesian.taxonomy') param = self._config.get('mothurClassifyParamOther') cmd = str('time ' + mothur + ' "#classify.seqs(fasta=' + extractedRegionsFasta + ', template=' + templateFile + ', taxonomy=' + taxonomyFile + ', ' + param + ')"') if os.name == 'posix': if outLog is not None: stdoutLog = open(outLog, 'w') else: stdoutLog = subprocess.STDOUT mothurProc = subprocess.Popen(cmd, shell=True, bufsize=-1, cwd=self._workingDir, stdout=stdoutLog) print 'run cmd:', cmd mothurProc.wait() if outLog is not None: stdoutLog.close() print 'mothur return code:', mothurProc.returncode if mothurProc.returncode != 0: raise Exception("Command returned with non-zero %s status: %s" % (mothurProc.returncode, cmd)) else: print 'Cannot run mothur since your system is not "posix" but', str('"' + os.name + '"'), '\n', cmd #transform mothur prediction files to the tab separated files self.mothurPredToTabSepPred(mothurPredFileName, predFileName)
def runMarkerGeneAnalysis(self, fastaFileDNA, outLog=None): """ Run hmmer HMM and mothur classify (bayesian), same param as for the 16S analysis. """ #read list of marker genes mgFiles = forEachLine(self.markerGeneListFile, _MgFiles(self.markerGeneListFileDir)) #translate DNA to protein sequences fastaFileProt = os.path.join(self.markerGeneWorkingDir, str(os.path.basename(fastaFileDNA) + '.PROT')) dnaToProt(fastaFileDNA, fastaFileProt) #read DNA fasta file try: handle = open(fastaFileDNA, "rU") dnaSeqDict = SeqIO.to_dict(SeqIO.parse(handle, "fasta")) handle.close() except Exception: sys.stderr.write(str('Cannot read file: ' + str(fastaFileDNA))) raise #to output all predictions in one file outPredAllFileName = os.path.join(self.markerGeneWorkingDir, str(os.path.basename(fastaFileDNA) + '_all.mP')) outAllBuffer = OutFileBuffer(outPredAllFileName) #run HMM search mgList = mgFiles.getGeneNameList() if outLog is not None: stdoutLog = open(outLog,'w') else: stdoutLog = subprocess.STDOUT #for each gene perform the analysis separately for geneName in mgList: domFileArray = [os.path.join(self.markerGeneWorkingDir, str(geneName + '_1.dom')), os.path.join(self.markerGeneWorkingDir, str(geneName + '_2.dom'))] outFileArray = [os.path.join(self.markerGeneWorkingDir, str(geneName + '_1.out')), os.path.join(self.markerGeneWorkingDir, str(geneName + '_2.out'))] hmmFileArray = [mgFiles.getFilePath(geneName, 'hmmPROTPrim'), mgFiles.getFilePath(geneName, 'hmmPROTSec')] cmdArray = list([]) #define cmd for i in range(2): if hmmFileArray[i] is not None: cmdArray.append(str(os.path.join(self.hmmerBinDir, 'hmmsearch') + ' --domtblout ' + domFileArray[i] + ' -E 0.01' + ' -o ' + outFileArray[i] + ' ' + hmmFileArray[i] + ' ' + fastaFileProt)) else: cmdArray.append(None) #run cmd for cmd in cmdArray: if cmd is not None and os.name == 'posix': hmmProc = subprocess.Popen(cmd, shell=True, bufsize=-1, cwd=self.hmmInstallDir, stdout=stdoutLog) print 'run cmd:', cmd hmmProc.wait() print 'HMM return code:', hmmProc.returncode if hmmProc.returncode != 0: raise Exception("Command returned with non-zero %s status: %s" % (hmmProc.returncode, cmd)) else: print 'Marker genes analysis, doesn`t run (no posix): ', cmd #get regions that match to the HMM profile () entryDictList = [] for i in range(2): if cmdArray[i] is not None: entryDictList.append(forEachLine(domFileArray[i], _MgRegions()).getEntryDict()) else: entryDictList.append(None) entryDict1 = entryDictList[0] entryDict2 = entryDictList[1] #extract regions found in the protein sequences that were found by the HMM and generate corresponding DNA sequences regionDnaFasta = os.path.join(self.markerGeneWorkingDir, str(geneName + '_dna.gff')) outFileBuffer = OutFileBuffer(regionDnaFasta) for seqName in entryDict1: i = -1 for e in entryDict1[seqName]: i += 1 from1 = entryDict1[seqName][i][0] to1 = entryDict1[seqName][i][1] assert ((from1 != None) and (to1 != None)) #compare the results found by the primary and secondary HMM profiles if (entryDict2 != None) and (seqName in entryDict2): if len(entryDict2[seqName]) >= (i+1): from2 = entryDict2[seqName][i][0] to2 = entryDict2[seqName][i][1] #if from1 != from2 or to1 != to2: # print str('Different positions in' + seqName + ' from1:' + str(from1) + ' from2:' + str(from2) # + ' to1:' + str(to1) + ' to2:' + str(to2)) #extract regions from the DNA sequences (consider 3 ORF and reverse complements) #name of the whole sequence dnaSeqName = re.sub(r'([0-9]+_[0-9]+)_[pr]+[012]', r'\1', seqName) #whole DNA sequence dnaSeq = dnaSeqDict[dnaSeqName].seq #reverse complement (contains "pr") tagRev = 'p' if re.match(r'[0-9]+_[0-9]+_pr[012]', seqName): dnaSeq = dnaSeq.reverse_complement() tagRev = 'pr' #shift "0" if re.match(r'[0-9]+_[0-9]+_[pr]+0', seqName): tagFrom = ((from1 - 1)*3) tagTo = (to1*3) tagRev += '0' dnaSeq = dnaSeq[tagFrom:tagTo] #shift "1" elif re.match(r'[0-9]+_[0-9]+_[pr]+1', seqName): tagFrom = (((from1 - 1)*3) + 1) tagTo = ((to1*3) + 1) tagRev += '1' dnaSeq = dnaSeq[tagFrom:tagTo] #shift "2" elif re.match(r'[0-9]+_[0-9]+_[pr]+2', seqName): tagFrom = (((from1 - 1)*3) + 2) tagTo = ((to1*3) + 2) tagRev += '2' dnaSeq = dnaSeq[tagFrom:tagTo] #error else: sys.stderr.write('Wrong seq name: ' + seqName + ' \n') dnaSeq = None tag = str(str(tagFrom) + '_' + str(tagTo) + '_' + tagRev) outFileBuffer.writeText(str('>' + dnaSeqName + '_' + tag + '\n' + dnaSeq + '\n')) outFileBuffer.close() #if no marker gene found if outFileBuffer.isEmpty(): continue #run mothur classify (bayesian? the same as for the 16S analysis) templateFile = mgFiles.getFilePath(geneName, 'templateDNA') taxonomyFile = mgFiles.getFilePath(geneName, 'taxonomyDNA') assert ((templateFile is not None) and (taxonomyFile is not None)) cmd = str('time ' + self.mothur + ' "#classify.seqs(fasta=' + regionDnaFasta + ', template=' + templateFile + ', taxonomy=' + taxonomyFile + ', ' + self.mothurParam + ')"') if os.name == 'posix': mothurProc = subprocess.Popen(cmd, shell=True, bufsize=-1, cwd=self.markerGeneWorkingDir, stdout=stdoutLog) print 'run cmd:', cmd mothurProc.wait() print 'mothur return code:', mothurProc.returncode if mothurProc.returncode != 0: raise Exception("Command returned with non-zero %s status: %s" % (mothurProc.returncode, cmd)) else: print 'Cannot run mothur since your system is not "posix" but', str('"' + os.name + '"'), '\n', cmd #transform the mothur output to a simple output (name, ncbid, weight) #mothurPredFileName = os.path.join(self.markerGeneWorkingDir, # str(geneName + '_dna.' + os.path.basename(taxonomyFile) + 'onomy')) # taxonomy #!!!!!!!!!!!!! mothurPredFileName = common.getMothurOutputFilePath(regionDnaFasta, taxonomyFile) if not os.path.isfile(mothurPredFileName): mothurPredFileName = common.getMothurOutputFilePath(regionDnaFasta, taxonomyFile, suffix='.bayesian.taxonomy') if not os.path.isfile(mothurPredFileName): print("Can't open file: %s" % mothurPredFileName) outPredFileName = os.path.join(self.markerGeneWorkingDir, str(os.path.basename(fastaFileDNA) + '_' + geneName + '.mP')) outBuffer = OutFileBuffer(outPredFileName, bufferText=True) forEachLine(mothurPredFileName, _MothurOutFileParser(outBuffer, geneName)) if not outAllBuffer.isEmpty(): outAllBuffer.writeText('\n') outAllBuffer.writeText(outBuffer.getTextBuffer()) if outLog is not None: stdoutLog.close() outAllBuffer.close()
def runMarkerGeneAnalysis(self, fastaFileDNA, outLog=None): """ Run hmmer HMM and mothur classify (bayesian), same param as for the 16S analysis. """ #read list of marker genes mgFiles = forEachLine(self.markerGeneListFile, _MgFiles(self.markerGeneListFileDir)) #translate DNA to protein sequences fastaFileProt = os.path.join(self.markerGeneWorkingDir, str(os.path.basename(fastaFileDNA) + '.PROT')) dnaToProt(fastaFileDNA, fastaFileProt) #read DNA fasta file try: handle = open(fastaFileDNA, "rU") dnaSeqDict = SeqIO.to_dict(SeqIO.parse(handle, "fasta")) handle.close() except Exception: sys.stderr.write(str('Cannot read file: ' + str(fastaFileDNA))) raise #to output all predictions in one file outPredAllFileName = os.path.join(self.markerGeneWorkingDir, str(os.path.basename(fastaFileDNA) + '_all.mP')) outAllBuffer = OutFileBuffer(outPredAllFileName) #run HMM search mgList = mgFiles.getGeneNameList() if outLog is not None: stdoutLog = open(outLog, 'w') else: stdoutLog = subprocess.STDOUT #for each gene perform the analysis separately for geneName in mgList: domFileArray = [os.path.join(self.markerGeneWorkingDir, str(geneName + '_1.dom'))] #, # os.path.join(self.markerGeneWorkingDir, str(geneName + '_2.dom'))] outFileArray = [os.path.join(self.markerGeneWorkingDir, str(geneName + '_1.out'))] #, # os.path.join(self.markerGeneWorkingDir, str(geneName + '_2.out'))] hmmFileArray = [mgFiles.getFilePath(geneName, 'hmmPROTPrim')] #, # mgFiles.getFilePath(geneName, 'hmmPROTSec')] cmdArray = list([]) #define cmd for i in range(1): if hmmFileArray[i] is not None: cmdArray.append(str(os.path.join(self.hmmerBinDir, 'hmmsearch') + ' --domtblout ' + domFileArray[i] + ' -E 0.01' + self.processorsHmm + ' -o ' + outFileArray[i] + ' ' + hmmFileArray[i] + ' ' + fastaFileProt)) else: cmdArray.append(None) #run cmd for cmd in cmdArray: if cmd is not None and os.name == 'posix': cwd = self.hmmInstallDir if parallel.reportFailedCmd(parallel.runCmdSerial([parallel.TaskCmd(cmd, cwd)])) is not None: sys.exit(-1) # hmmProc = subprocess.Popen(cmd, shell=True, bufsize=-1, cwd=self.hmmInstallDir, stdout=stdoutLog) # print 'run cmd:', cmd # hmmProc.wait() # print 'HMM return code:', hmmProc.returncode # if hmmProc.returncode != 0: # raise Exception("Command returned with non-zero %s status: %s" % (hmmProc.returncode, cmd)) else: print 'Marker genes analysis, doesn`t run (no posix): ', cmd #get regions that match to the HMM profile () entryDictList = [] for i in range(1): if cmdArray[i] is not None: entryDictList.append(forEachLine(domFileArray[i], _MgRegions()).getEntryDict()) else: entryDictList.append(None) entryDict1 = entryDictList[0] # entryDict2 = entryDictList[1] #extract regions found in the protein sequences that were found by the HMM and generate corresponding DNA sequences regionDnaFasta = os.path.join(self.markerGeneWorkingDir, str(geneName + '_dna.gff')) outFileBuffer = OutFileBuffer(regionDnaFasta) for seqName in entryDict1: i = -1 for e in entryDict1[seqName]: i += 1 from1 = entryDict1[seqName][i][0] to1 = entryDict1[seqName][i][1] assert ((from1 != None) and (to1 != None)) #compare the results found by the primary and secondary HMM profiles # if (entryDict2 != None) and (seqName in entryDict2): # if len(entryDict2[seqName]) >= (i+1): # from2 = entryDict2[seqName][i][0] # to2 = entryDict2[seqName][i][1] #if from1 != from2 or to1 != to2: # print str('Different positions in' + seqName + ' from1:' + str(from1) + ' from2:' + str(from2) # + ' to1:' + str(to1) + ' to2:' + str(to2)) #extract regions from the DNA sequences (consider 3 ORF and reverse complements) #name of the whole sequence dnaSeqName = re.sub(r'([0-9]+_[0-9]+)_[pr]+[012]', r'\1', seqName) #whole DNA sequence dnaSeq = dnaSeqDict[dnaSeqName].seq #reverse complement (contains "pr") tagRev = 'p' if re.match(r'[0-9]+_[0-9]+_pr[012]', seqName): dnaSeq = dnaSeq.reverse_complement() tagRev = 'pr' #shift "0" if re.match(r'[0-9]+_[0-9]+_[pr]+0', seqName): tagFrom = ((from1 - 1)*3) tagTo = (to1*3) tagRev += '0' dnaSeq = dnaSeq[tagFrom:tagTo] #shift "1" elif re.match(r'[0-9]+_[0-9]+_[pr]+1', seqName): tagFrom = (((from1 - 1)*3) + 1) tagTo = ((to1*3) + 1) tagRev += '1' dnaSeq = dnaSeq[tagFrom:tagTo] #shift "2" elif re.match(r'[0-9]+_[0-9]+_[pr]+2', seqName): tagFrom = (((from1 - 1)*3) + 2) tagTo = ((to1*3) + 2) tagRev += '2' dnaSeq = dnaSeq[tagFrom:tagTo] #error else: sys.stderr.write('Wrong seq name: ' + seqName + ' \n') dnaSeq = None tag = str(str(tagFrom) + '_' + str(tagTo) + '_' + tagRev) outFileBuffer.writeText(str('>' + dnaSeqName + '_' + tag + '\n' + dnaSeq + '\n')) outFileBuffer.close() #if no marker gene found if outFileBuffer.isEmpty(): continue #run mothur classify (bayesian? the same as for the 16S analysis) templateFile = mgFiles.getFilePath(geneName, 'templateDNA') taxonomyFile = mgFiles.getFilePath(geneName, 'taxonomyDNA') assert ((templateFile is not None) and (taxonomyFile is not None)) cmd = str('' + self.mothur + ' "#classify.seqs(fasta=' + regionDnaFasta + ', template=' + templateFile + ', taxonomy=' + taxonomyFile + ', ' + self.mothurParam + ')"') if os.name == 'posix': print('Mothur processing: %s' % os.path.basename(templateFile).split('_', 1)[0]) cwd = self.markerGeneWorkingDir if parallel.reportFailedCmd(parallel.runCmdSerial([parallel.TaskCmd(cmd, cwd, stdout=stdoutLog)])) is not None: sys.exit(-1) # mothurProc = subprocess.Popen(cmd, shell=True, bufsize=-1, cwd=self.markerGeneWorkingDir, stdout=stdoutLog) # print 'run cmd:', cmd # mothurProc.wait() # print 'mothur return code:', mothurProc.returncode # if mothurProc.returncode != 0: # raise Exception("Command returned with non-zero %s status: %s" % (mothurProc.returncode, cmd)) else: print 'Cannot run mothur since your system is not "posix" but', str('"' + os.name + '"'), '\n', cmd #transform the mothur output to a simple output (name, ncbid, weight) #mothurPredFileName = os.path.join(self.markerGeneWorkingDir, # str(geneName + '_dna.' + os.path.basename(taxonomyFile) + 'onomy')) # taxonomy #!!!!!!!!!!!!! mothurPredFileName = common.getMothurOutputFilePath(regionDnaFasta, taxonomyFile) if not os.path.isfile(mothurPredFileName): mothurPredFileName = common.getMothurOutputFilePath(regionDnaFasta, taxonomyFile, suffix='.bayesian.taxonomy') if not os.path.isfile(mothurPredFileName): print("Can't open file: %s" % mothurPredFileName) outPredFileName = os.path.join(self.markerGeneWorkingDir, str(os.path.basename(fastaFileDNA) + '_' + geneName + '.mP')) outBuffer = OutFileBuffer(outPredFileName, bufferText=True) forEachLine(mothurPredFileName, _MothurOutFileParser(outBuffer, geneName)) if not outAllBuffer.isEmpty(): outAllBuffer.writeText('\n') outAllBuffer.writeText(outBuffer.getTextBuffer()) if outLog is not None: stdoutLog.close() outAllBuffer.close()
def _classify(self, mode, inputFastaFile, outLog=None): mothur = os.path.join(os.path.normpath(self._config.get('mothurInstallDir')), 'mothur') if mode == 16: extractedRegionsFasta = os.path.join(self._workingDir, str(os.path.basename(inputFastaFile) + '.16S_rRNA.fna')) taxonomyFile = os.path.join(self._refDir, os.path.normpath(self._refDict[('16S_rRNA','taxonomyDNA')][0])) templateFile = os.path.join(self._refDir, os.path.normpath(self._refDict[('16S_rRNA','templateDNA')][0])) #mothurPredFileName = str(extractedRegionsFasta[0:extractedRegionsFasta.rindex('.')] + '.taxonomy') mothurPredFileName = common.getMothurOutputFilePath(extractedRegionsFasta, taxonomyFile) predFileName = os.path.join(self._workingDir, str(os.path.basename(inputFastaFile) + '.16P')) #extractedRegionsFasta = str(inputFastaFile + '.16S_rRNA.fna') #templateFile = os.path.normpath(self._configRRNA16S.get('mothurClassifyParam16STemplate')) #taxonomyFile = os.path.normpath(self._configRRNA16S.get('mothurClassifyParam16STaxonomy')) #mothurPredFileName = str(inputFastaFile + '.16S_rRNA.bacteria+archaea.taxonomy') #mothurPredFileName = os.path.join(self._workingDir, str(os.path.basename(inputFastaFile) + '.16S_rRNA.bacteria+archaea.taxonomy')) #mothurPredFileName = os.path.join(self._workingDir, str(os.path.basename(inputFastaFile) + '.16S_rRNA.fasta.taxonomy')) #predFileName = str(inputFastaFile + '.16P') elif mode == 23: extractedRegionsFasta = os.path.join(self._workingDir, str(os.path.basename(inputFastaFile) + '.23S_rRNA.fna')) taxonomyFile = os.path.join(self._refDir, os.path.normpath(self._refDict[('23S_rRNA','taxonomyDNA')][0])) templateFile = os.path.join(self._refDir, os.path.normpath(self._refDict[('23S_rRNA','templateDNA')][0])) #mothurPredFileName = str(extractedRegionsFasta[0:extractedRegionsFasta.rindex('.')] + '.taxonomy') mothurPredFileName = common.getMothurOutputFilePath(extractedRegionsFasta, taxonomyFile) predFileName = os.path.join(self._workingDir, str(os.path.basename(inputFastaFile) + '.23P')) #extractedRegionsFasta = str(inputFastaFile + '.23S_rRNA.fna') #templateFile = os.path.normpath(self._configRRNA16S.get('mothurClassifyParam23STemplate')) #taxonomyFile = os.path.normpath(self._configRRNA16S.get('mothurClassifyParam23STaxonomy')) #mothurPredFileName = str(inputFastaFile + '.23S_rRNA.bacteria+archaea.taxonomy') #mothurPredFileName = os.path.join(self._workingDir, str(os.path.basename(inputFastaFile) + '.23S_rRNA.bacteria+archaea.taxonomy')) #mothurPredFileName = os.path.join(self._workingDir, str(os.path.basename(inputFastaFile) + '.23S_rRNA.fasta.taxonomy')) #predFileName = str(inputFastaFile + '.23P') elif mode == 5: #extractedRegionsFasta = str(inputFastaFile + '.5S_rRNA.fna') extractedRegionsFasta = os.path.join(self._workingDir, str(os.path.basename(inputFastaFile) + '.5S_rRNA.fna')) taxonomyFile = os.path.join(self._refDir, os.path.normpath(self._refDict[('5S_rRNA', 'taxonomyDNA')][0])) templateFile = os.path.join(self._refDir, os.path.normpath(self._refDict[('5S_rRNA', 'templateDNA')][0])) mothurPredFileName = common.getMothurOutputFilePath(extractedRegionsFasta, taxonomyFile) predFileName = os.path.join(self._workingDir, str(os.path.basename(inputFastaFile) + '.5P')) #templateFile = os.path.normpath(self._configRRNA16S.get('mothurClassifyParam5STemplate')) #taxonomyFile = os.path.normpath(self._configRRNA16S.get('mothurClassifyParam5STaxonomy')) #mothurPredFileName = os.path.join(self._workingDir, # str(os.path.basename(inputFastaFile) + '.5S_rRNA.' + os.path.basename(taxonomyFile) + 'onomy'))#.taxonomy #predFileName = str(inputFastaFile + '.5P') else: raise Exception('Wrong branch') if not os.path.isfile(mothurPredFileName): mothurPredFileName = common.getMothurOutputFilePath(extractedRegionsFasta, taxonomyFile, suffix='.bayesian.taxonomy') param = self._config.get('mothurClassifyParamOther') cmd = str(mothur + ' "#classify.seqs(fasta=' + extractedRegionsFasta + ', template=' + templateFile + ', taxonomy=' + taxonomyFile + ', ' + param + ')"') if os.name == 'posix': print('Mothur processing: %s' % os.path.basename(templateFile).split('_', 1)[0]) cwd = self._workingDir if outLog is not None: stdoutLog = open(outLog, 'w') else: stdoutLog = subprocess.STDOUT if parallel.reportFailedCmd(parallel.runCmdSerial([parallel.TaskCmd(cmd, cwd, stdout=stdoutLog)])) is not None: sys.exit(-1) if outLog is not None: stdoutLog.close() # mothurProc = subprocess.Popen(cmd, shell=True, bufsize=-1, cwd=self._workingDir, stdout=stdoutLog) # print 'run cmd:', cmd # mothurProc.wait() # if outLog is not None: # stdoutLog.close() # print 'mothur return code:', mothurProc.returncode # if mothurProc.returncode != 0: # raise Exception("Command returned with non-zero %s status: %s" % (mothurProc.returncode, cmd)) else: print 'Cannot run mothur since your system is not "posix" but', str('"' + os.name + '"'), '\n', cmd #transform mothur prediction files to the tab separated files self.mothurPredToTabSepPred(mothurPredFileName, predFileName)