def createHmmModels(self, outDir, binIds, markerFile): """Create HMM model for each bins marker set.""" # determine type of marker set file markerFileType = self.markerFileType(markerFile) # get HMM file for each bin binIdToModels = {} if markerFileType == BinMarkerSets.TAXONOMIC_MARKER_SET: hmmModelFile = self.createHmmModelFile(binIds.keys()[0], markerFile) modelParser = HmmModelParser(hmmModelFile) models = modelParser.models() for binId in binIds: binIdToModels[binId] = models os.remove(hmmModelFile) elif markerFileType == BinMarkerSets.TREE_MARKER_SET: binIdToModels = self.__createLineageHmmModels(binIds, markerFile) else: modelParser = HmmModelParser(markerFile) models = modelParser.models() for binId in binIds: binIdToModels[binId] = models return binIdToModels
def createHmmModels(self, outDir, binIds, markerFile): """Create HMM model for each bins marker set.""" # determine type of marker set file markerFileType = self.markerFileType(markerFile) # get HMM file for each bin binIdToModels = {} if markerFileType == BinMarkerSets.TAXONOMIC_MARKER_SET: hmmModelFile = self.createHmmModelFile( list(binIds.keys())[0], markerFile) modelParser = HmmModelParser(hmmModelFile) models = modelParser.models() for binId in binIds: binIdToModels[binId] = models os.remove(hmmModelFile) elif markerFileType == BinMarkerSets.TREE_MARKER_SET: binIdToModels = self.__createLineageHmmModels(binIds, markerFile) else: modelParser = HmmModelParser(markerFile) models = modelParser.models() for binId in binIds: binIdToModels[binId] = models return binIdToModels
def __reportProgress(self, numBins, binIdToModels, queueIn): """Report number of processed bins.""" numProcessedBins = 0 if self.logger.getEffectiveLevel() <= logging.INFO: statusStr = ' Finished processing %d of %d (%.2f%%) bins.' % (numProcessedBins, numBins, float(numProcessedBins)*100/numBins) sys.stderr.write('%s\r' % statusStr) sys.stderr.flush() while True: binId, hmmModelFile = queueIn.get(block=True, timeout=None) if binId == None: break # parse HMM file # (This is done here as pushing the models onto the shared queue is too memory intensive) modelParser = HmmModelParser(hmmModelFile) models = modelParser.models() binIdToModels[binId] = models os.remove(hmmModelFile) if self.logger.getEffectiveLevel() <= logging.INFO: numProcessedBins += 1 statusStr = ' Finished processing %d of %d (%.2f%%) bins.' % (numProcessedBins, numBins, float(numProcessedBins)*100/numBins) sys.stderr.write('%s\r' % statusStr) sys.stderr.flush() if self.logger.getEffectiveLevel() <= logging.INFO: sys.stderr.write('\n')
def __fetchModelInfo(self, binIdToModels, markerFile, queueIn, queueOut): """Fetch HMM.""" while True: binId = queueIn.get(block=True, timeout=None) if binId == None: break hmmModelFile = self.createHmmModelFile(binId, markerFile) modelParser = HmmModelParser(hmmModelFile) binIdToModels[binId] = modelParser.models() os.remove(hmmModelFile) queueOut.put(binId)
def __reportProgress(self, numBins, binIdToModels, queueIn): """Report number of processed bins.""" numProcessedBins = 0 if self.logger.getEffectiveLevel() <= logging.INFO: statusStr = ' Finished processing %d of %d (%.2f%%) bins.' % ( numProcessedBins, numBins, float(numProcessedBins) * 100 / numBins) sys.stderr.write('%s\r' % statusStr) sys.stderr.flush() while True: binId, hmmModelFile = queueIn.get(block=True, timeout=None) if binId == None: break # parse HMM file # (This is done here as pushing the models onto the shared queue is too memory intensive) modelParser = HmmModelParser(hmmModelFile) models = modelParser.models() binIdToModels[binId] = models if os.path.exists(hmmModelFile): os.remove(hmmModelFile) indexFile = hmmModelFile + '.ssi' if os.path.exists(indexFile): os.remove(indexFile) if self.logger.getEffectiveLevel() <= logging.INFO: numProcessedBins += 1 statusStr = ' Finished processing %d of %d (%.2f%%) bins.' % ( numProcessedBins, numBins, float(numProcessedBins) * 100 / numBins) sys.stderr.write('%s\r' % statusStr) sys.stderr.flush() if self.logger.getEffectiveLevel() <= logging.INFO: sys.stderr.write('\n')