def __reportProgress(self, numBins, binIdToModels, queueIn): """Report number of processed bins.""" numProcessedBins = 0 if self.logger.getEffectiveLevel() <= logging.INFO: statusStr = ' Finished processing %d of %d (%.2f%%) bins.' % (numProcessedBins, numBins, float(numProcessedBins)*100/numBins) sys.stderr.write('%s\r' % statusStr) sys.stderr.flush() while True: binId, hmmModelFile = queueIn.get(block=True, timeout=None) if binId == None: break # parse HMM file # (This is done here as pushing the models onto the shared queue is too memory intensive) modelParser = HmmModelParser(hmmModelFile) models = modelParser.models() binIdToModels[binId] = models os.remove(hmmModelFile) if self.logger.getEffectiveLevel() <= logging.INFO: numProcessedBins += 1 statusStr = ' Finished processing %d of %d (%.2f%%) bins.' % (numProcessedBins, numBins, float(numProcessedBins)*100/numBins) sys.stderr.write('%s\r' % statusStr) sys.stderr.flush() if self.logger.getEffectiveLevel() <= logging.INFO: sys.stderr.write('\n')
def createHmmModels(self, outDir, binIds, markerFile): """Create HMM model for each bins marker set.""" # determine type of marker set file markerFileType = self.markerFileType(markerFile) # get HMM file for each bin binIdToModels = {} if markerFileType == BinMarkerSets.TAXONOMIC_MARKER_SET: hmmModelFile = self.createHmmModelFile(binIds.keys()[0], markerFile) modelParser = HmmModelParser(hmmModelFile) models = modelParser.models() for binId in binIds: binIdToModels[binId] = models os.remove(hmmModelFile) elif markerFileType == BinMarkerSets.TREE_MARKER_SET: binIdToModels = self.__createLineageHmmModels(binIds, markerFile) else: modelParser = HmmModelParser(markerFile) models = modelParser.models() for binId in binIds: binIdToModels[binId] = models return binIdToModels
def __fetchModelInfo(self, binIdToModels, markerFile, queueIn, queueOut): """Fetch HMM.""" while True: binId = queueIn.get(block=True, timeout=None) if binId == None: break hmmModelFile = self.createHmmModelFile(binId, markerFile) modelParser = HmmModelParser(hmmModelFile) binIdToModels[binId] = modelParser.models() os.remove(hmmModelFile) queueOut.put(binId)
def getMarkerSets(self, outDir, binIds, markerFile, excludeMarkersFile=None): """Determine marker set for each bin.""" # determine type of marker set file markerFileType = self.markerFileType(markerFile) # get marker set for each bin binIdToBinMarkerSets = {} if markerFileType == BinMarkerSets.TAXONOMIC_MARKER_SET: binMarkerSets = self.parseTaxonomicMarkerSetFile(markerFile) for binId in binIds: binIdToBinMarkerSets[binId] = binMarkerSets elif markerFileType == BinMarkerSets.TREE_MARKER_SET: binIdToBinMarkerSets = self.parseLineageMarkerSetFile(markerFile) else: markers = [set()] modelParser = HmmModelParser(markerFile) for model in modelParser.parse(): markers[0].add(model.acc) markerSet = MarkerSet(0, "N/A", -1, markers) for binId in binIds: binMarkerSets = BinMarkerSets(binId, BinMarkerSets.HMM_MODELS_SET) binMarkerSets.addMarkerSet(markerSet) binIdToBinMarkerSets[binId] = binMarkerSets # remove marker genes specified by user or marker for exclusion markersToExclude = set() if excludeMarkersFile: markersToExclude = self.readExcludeMarkersFile(excludeMarkersFile) markersToExclude.update(DefaultValues.MARKERS_TO_EXCLUDE) for binId, binMarkerSet in binIdToBinMarkerSets.items(): binMarkerSet.removeMarkers(markersToExclude) return binIdToBinMarkerSets
def createHmmModels(self, outDir, binIds, markerFile): """Create HMM model for each bins marker set.""" # determine type of marker set file markerFileType = self.markerFileType(markerFile) # get HMM file for each bin binIdToModels = {} if markerFileType == BinMarkerSets.TAXONOMIC_MARKER_SET: hmmModelFile = self.createHmmModelFile( list(binIds.keys())[0], markerFile) modelParser = HmmModelParser(hmmModelFile) models = modelParser.models() for binId in binIds: binIdToModels[binId] = models os.remove(hmmModelFile) elif markerFileType == BinMarkerSets.TREE_MARKER_SET: binIdToModels = self.__createLineageHmmModels(binIds, markerFile) else: modelParser = HmmModelParser(markerFile) models = modelParser.models() for binId in binIds: binIdToModels[binId] = models return binIdToModels
def __reportProgress(self, numBins, binIdToModels, queueIn): """Report number of processed bins.""" numProcessedBins = 0 if self.logger.getEffectiveLevel() <= logging.INFO: statusStr = ' Finished processing %d of %d (%.2f%%) bins.' % ( numProcessedBins, numBins, float(numProcessedBins) * 100 / numBins) sys.stderr.write('%s\r' % statusStr) sys.stderr.flush() while True: binId, hmmModelFile = queueIn.get(block=True, timeout=None) if binId == None: break # parse HMM file # (This is done here as pushing the models onto the shared queue is too memory intensive) modelParser = HmmModelParser(hmmModelFile) models = modelParser.models() binIdToModels[binId] = models if os.path.exists(hmmModelFile): os.remove(hmmModelFile) indexFile = hmmModelFile + '.ssi' if os.path.exists(indexFile): os.remove(indexFile) if self.logger.getEffectiveLevel() <= logging.INFO: numProcessedBins += 1 statusStr = ' Finished processing %d of %d (%.2f%%) bins.' % ( numProcessedBins, numBins, float(numProcessedBins) * 100 / numBins) sys.stderr.write('%s\r' % statusStr) sys.stderr.flush() if self.logger.getEffectiveLevel() <= logging.INFO: sys.stderr.write('\n')
def getMarkerSets(self, outDir, binIds, markerFile, excludeMarkersFile=None): """Determine marker set for each bin.""" # determine type of marker set file markerFileType = self.markerFileType(markerFile) # get marker set for each bin binIdToBinMarkerSets = {} if markerFileType == BinMarkerSets.TAXONOMIC_MARKER_SET: binMarkerSets = self.parseTaxonomicMarkerSetFile(markerFile) for binId in binIds: binIdToBinMarkerSets[binId] = binMarkerSets elif markerFileType == BinMarkerSets.TREE_MARKER_SET: binIdToBinMarkerSets = self.parseLineageMarkerSetFile(markerFile) else: markers = [set()] modelParser = HmmModelParser(markerFile) for model in modelParser.parse(): markers[0].add(model.acc) markerSet = MarkerSet(0, "N/A", -1, markers) for binId in binIds: binMarkerSets = BinMarkerSets(binId, BinMarkerSets.HMM_MODELS_SET) binMarkerSets.addMarkerSet(markerSet) binIdToBinMarkerSets[binId] = binMarkerSets # remove marker genes specified by user or marker for exclusion markersToExclude = set() if excludeMarkersFile: markersToExclude = self.readExcludeMarkersFile(excludeMarkersFile) markersToExclude.update(DefaultValues.MARKERS_TO_EXCLUDE) for binId, binMarkerSet in binIdToBinMarkerSets.iteritems(): binMarkerSet.removeMarkers(markersToExclude) return binIdToBinMarkerSets