Example #1
0
    def __reportProgress(self, numBins, binIdToModels, queueIn):
        """Report number of processed bins."""

        numProcessedBins = 0
        if self.logger.getEffectiveLevel() <= logging.INFO:
            statusStr = '    Finished processing %d of %d (%.2f%%) bins.' % (numProcessedBins, numBins, float(numProcessedBins)*100/numBins)
            sys.stderr.write('%s\r' % statusStr)
            sys.stderr.flush()

        while True:
            binId, hmmModelFile = queueIn.get(block=True, timeout=None)
            if binId == None:
                break
            
            # parse HMM file
            # (This is done here as pushing the models onto the shared queue is too memory intensive)
            modelParser = HmmModelParser(hmmModelFile)
            models = modelParser.models()
            
            binIdToModels[binId] = models
            
            os.remove(hmmModelFile)

            if self.logger.getEffectiveLevel() <= logging.INFO:
                numProcessedBins += 1
                statusStr = '    Finished processing %d of %d (%.2f%%) bins.' % (numProcessedBins, numBins, float(numProcessedBins)*100/numBins)
                sys.stderr.write('%s\r' % statusStr)
                sys.stderr.flush()

        if self.logger.getEffectiveLevel() <= logging.INFO:
            sys.stderr.write('\n')
Example #2
0
    def createHmmModels(self, outDir, binIds, markerFile):
        """Create HMM model for each bins marker set."""

        # determine type of marker set file
        markerFileType = self.markerFileType(markerFile)

        # get HMM file for each bin
        binIdToModels = {}
        if markerFileType == BinMarkerSets.TAXONOMIC_MARKER_SET:
            hmmModelFile = self.createHmmModelFile(binIds.keys()[0], markerFile)

            modelParser = HmmModelParser(hmmModelFile)
            models = modelParser.models()
            for binId in binIds:
                binIdToModels[binId] = models

            os.remove(hmmModelFile)
        elif markerFileType == BinMarkerSets.TREE_MARKER_SET:
            binIdToModels = self.__createLineageHmmModels(binIds, markerFile)
        else:
            modelParser = HmmModelParser(markerFile)
            models = modelParser.models()
            for binId in binIds:
                binIdToModels[binId] = models

        return binIdToModels
Example #3
0
    def __fetchModelInfo(self, binIdToModels, markerFile, queueIn, queueOut):
        """Fetch HMM."""
        while True:
            binId = queueIn.get(block=True, timeout=None)
            if binId == None:
                break

            hmmModelFile = self.createHmmModelFile(binId, markerFile)

            modelParser = HmmModelParser(hmmModelFile)
            binIdToModels[binId] = modelParser.models()

            os.remove(hmmModelFile)

            queueOut.put(binId)
Example #4
0
    def __fetchModelInfo(self, binIdToModels, markerFile, queueIn, queueOut):
        """Fetch HMM."""
        while True:
            binId = queueIn.get(block=True, timeout=None)
            if binId == None:
                break

            hmmModelFile = self.createHmmModelFile(binId, markerFile)

            modelParser = HmmModelParser(hmmModelFile)
            binIdToModels[binId] = modelParser.models()

            os.remove(hmmModelFile)

            queueOut.put(binId)
Example #5
0
    def getMarkerSets(self,
                      outDir,
                      binIds,
                      markerFile,
                      excludeMarkersFile=None):
        """Determine marker set for each bin."""

        # determine type of marker set file
        markerFileType = self.markerFileType(markerFile)

        # get marker set for each bin
        binIdToBinMarkerSets = {}

        if markerFileType == BinMarkerSets.TAXONOMIC_MARKER_SET:
            binMarkerSets = self.parseTaxonomicMarkerSetFile(markerFile)

            for binId in binIds:
                binIdToBinMarkerSets[binId] = binMarkerSets
        elif markerFileType == BinMarkerSets.TREE_MARKER_SET:
            binIdToBinMarkerSets = self.parseLineageMarkerSetFile(markerFile)
        else:
            markers = [set()]
            modelParser = HmmModelParser(markerFile)
            for model in modelParser.parse():
                markers[0].add(model.acc)
            markerSet = MarkerSet(0, "N/A", -1, markers)

            for binId in binIds:
                binMarkerSets = BinMarkerSets(binId,
                                              BinMarkerSets.HMM_MODELS_SET)
                binMarkerSets.addMarkerSet(markerSet)
                binIdToBinMarkerSets[binId] = binMarkerSets

        # remove marker genes specified by user or marker for exclusion
        markersToExclude = set()
        if excludeMarkersFile:
            markersToExclude = self.readExcludeMarkersFile(excludeMarkersFile)

        markersToExclude.update(DefaultValues.MARKERS_TO_EXCLUDE)
        for binId, binMarkerSet in binIdToBinMarkerSets.items():
            binMarkerSet.removeMarkers(markersToExclude)

        return binIdToBinMarkerSets
Example #6
0
    def createHmmModels(self, outDir, binIds, markerFile):
        """Create HMM model for each bins marker set."""

        # determine type of marker set file
        markerFileType = self.markerFileType(markerFile)

        # get HMM file for each bin
        binIdToModels = {}
        if markerFileType == BinMarkerSets.TAXONOMIC_MARKER_SET:
            hmmModelFile = self.createHmmModelFile(
                list(binIds.keys())[0], markerFile)

            modelParser = HmmModelParser(hmmModelFile)
            models = modelParser.models()
            for binId in binIds:
                binIdToModels[binId] = models

            os.remove(hmmModelFile)
        elif markerFileType == BinMarkerSets.TREE_MARKER_SET:
            binIdToModels = self.__createLineageHmmModels(binIds, markerFile)
        else:
            modelParser = HmmModelParser(markerFile)
            models = modelParser.models()
            for binId in binIds:
                binIdToModels[binId] = models

        return binIdToModels
    def __reportProgress(self, numBins, binIdToModels, queueIn):
        """Report number of processed bins."""

        numProcessedBins = 0
        if self.logger.getEffectiveLevel() <= logging.INFO:
            statusStr = '    Finished processing %d of %d (%.2f%%) bins.' % (
                numProcessedBins, numBins,
                float(numProcessedBins) * 100 / numBins)
            sys.stderr.write('%s\r' % statusStr)
            sys.stderr.flush()

        while True:
            binId, hmmModelFile = queueIn.get(block=True, timeout=None)
            if binId == None:
                break

            # parse HMM file
            # (This is done here as pushing the models onto the shared queue is too memory intensive)
            modelParser = HmmModelParser(hmmModelFile)
            models = modelParser.models()

            binIdToModels[binId] = models

            if os.path.exists(hmmModelFile):
                os.remove(hmmModelFile)

            indexFile = hmmModelFile + '.ssi'
            if os.path.exists(indexFile):
                os.remove(indexFile)

            if self.logger.getEffectiveLevel() <= logging.INFO:
                numProcessedBins += 1
                statusStr = '    Finished processing %d of %d (%.2f%%) bins.' % (
                    numProcessedBins, numBins,
                    float(numProcessedBins) * 100 / numBins)
                sys.stderr.write('%s\r' % statusStr)
                sys.stderr.flush()

        if self.logger.getEffectiveLevel() <= logging.INFO:
            sys.stderr.write('\n')
Example #8
0
    def getMarkerSets(self, outDir, binIds, markerFile, excludeMarkersFile=None):
        """Determine marker set for each bin."""

        # determine type of marker set file
        markerFileType = self.markerFileType(markerFile)

        # get marker set for each bin
        binIdToBinMarkerSets = {}

        if markerFileType == BinMarkerSets.TAXONOMIC_MARKER_SET:
            binMarkerSets = self.parseTaxonomicMarkerSetFile(markerFile)

            for binId in binIds:
                binIdToBinMarkerSets[binId] = binMarkerSets
        elif markerFileType == BinMarkerSets.TREE_MARKER_SET:
            binIdToBinMarkerSets = self.parseLineageMarkerSetFile(markerFile)
        else:
            markers = [set()]
            modelParser = HmmModelParser(markerFile)
            for model in modelParser.parse():
                markers[0].add(model.acc)
            markerSet = MarkerSet(0, "N/A", -1, markers)

            for binId in binIds:
                binMarkerSets = BinMarkerSets(binId, BinMarkerSets.HMM_MODELS_SET)
                binMarkerSets.addMarkerSet(markerSet)
                binIdToBinMarkerSets[binId] = binMarkerSets

        # remove marker genes specified by user or marker for exclusion
        markersToExclude = set()
        if excludeMarkersFile:
            markersToExclude = self.readExcludeMarkersFile(excludeMarkersFile)

        markersToExclude.update(DefaultValues.MARKERS_TO_EXCLUDE)
        for binId, binMarkerSet in binIdToBinMarkerSets.iteritems():
            binMarkerSet.removeMarkers(markersToExclude)

        return binIdToBinMarkerSets