コード例 #1
0
ファイル: hmmerAligner.py プロジェクト: Ecogenomics/CheckM
    def __createMSA(self, resultsParser, binIdToBinMarkerSets, hmmModelFile, outDir, alignOutputDir, queueIn, queueOut):
        """Create multiple sequence alignment for markers with multiple hits in a bin."""

        HF = HMMERRunner(mode='fetch')

        while True:
            binId = queueIn.get(block=True, timeout=None)
            if binId == None:
                break

            markersWithMultipleHits = self.__extractMarkersWithMultipleHits(outDir, binId, resultsParser, binIdToBinMarkerSets[binId])

            if len(markersWithMultipleHits) != 0:
                # create multiple sequence alignments for markers with multiple hits
                binAlignOutputDir = os.path.join(alignOutputDir, binId)
                makeSurePathExists(binAlignOutputDir)
                for markerId in markersWithMultipleHits:
                    tempModelFile = os.path.join(tempfile.gettempdir(), str(uuid.uuid4()))
                    HF.fetch(hmmModelFile, markerId, tempModelFile)

                    self.__alignMarker(markerId, markersWithMultipleHits[markerId], None, False, binAlignOutputDir, tempModelFile, bKeepUnmaskedAlign=False)

                    os.remove(tempModelFile)

            queueOut.put(binId)
コード例 #2
0
    def __createMSA(self, resultsParser, binIdToBinMarkerSets, hmmModelFile,
                    outDir, alignOutputDir, queueIn, queueOut):
        """Create multiple sequence alignment for markers with multiple hits in a bin."""

        HF = HMMERRunner(mode='fetch')

        while True:
            binId = queueIn.get(block=True, timeout=None)
            if binId == None:
                break

            markersWithMultipleHits = self.__extractMarkersWithMultipleHits(
                outDir, binId, resultsParser, binIdToBinMarkerSets[binId])

            if len(markersWithMultipleHits) != 0:
                # create multiple sequence alignments for markers with multiple hits
                binAlignOutputDir = os.path.join(alignOutputDir, binId)
                makeSurePathExists(binAlignOutputDir)
                for markerId in markersWithMultipleHits:
                    tempModelFile = os.path.join(tempfile.gettempdir(),
                                                 str(uuid.uuid4()))
                    HF.fetch(hmmModelFile, markerId, tempModelFile)

                    self.__alignMarker(markerId,
                                       markersWithMultipleHits[markerId],
                                       None,
                                       False,
                                       binAlignOutputDir,
                                       tempModelFile,
                                       bKeepUnmaskedAlign=False)

                    os.remove(tempModelFile)

            queueOut.put(binId)
コード例 #3
0
    def __createMarkerHMMs(self, binMarkerSet, outputFile, bReportProgress=True):
        """Create HMM file for markers."""

        # get list of marker genes
        markerGenes = binMarkerSet.getMarkerGenes()

        # get all genes from the same clan as any marker gene
        pfam = PFAM(DefaultValues.PFAM_CLAN_FILE)
        genesInSameClan = pfam.genesInSameClan(markerGenes)

        # extract marker genes along with all genes from the same clan
        allMarkers = markerGenes | genesInSameClan

        if bReportProgress:
            self.logger.info("  There are %d genes in the marker set and %d genes from the same PFAM clan." % (len(markerGenes), len(genesInSameClan)))

        # create file with all model accession numbers
        keyFile = os.path.join(tempfile.gettempdir(), str(uuid.uuid4()))
        fout = open(keyFile, 'w')
        for modelAcc in allMarkers:
            fout.write(modelAcc + '\n')
        fout.close()

        # fetch specified models
        HF = HMMERRunner(mode='fetch')
        HF.fetch(DefaultValues.HMM_MODELS, keyFile, outputFile, bKeyFile=True)

        # index the HMM file
        if os.path.exists(outputFile + '.ssi'):
            os.remove(outputFile + '.ssi')
        HF.index(outputFile)

        # remove key file
        os.remove(keyFile)
コード例 #4
0
ファイル: markerSets.py プロジェクト: Ecogenomics/CheckM
    def __createMarkerHMMs(self, binMarkerSet, outputFile, bReportProgress=True):
        """Create HMM file for markers."""

        # get list of marker genes
        markerGenes = binMarkerSet.getMarkerGenes()

        # get all genes from the same clan as any marker gene
        pfam = PFAM(DefaultValues.PFAM_CLAN_FILE)
        genesInSameClan = pfam.genesInSameClan(markerGenes)

        # extract marker genes along with all genes from the same clan
        allMarkers = markerGenes | genesInSameClan

        if bReportProgress:
            self.logger.info("  There are %d genes in the marker set and %d genes from the same PFAM clan." % (len(markerGenes), len(genesInSameClan)))

        # create file with all model accession numbers
        keyFile = os.path.join(tempfile.gettempdir(), str(uuid.uuid4()))
        fout = open(keyFile, 'w')
        for modelAcc in allMarkers:
            fout.write(modelAcc + '\n')
        fout.close()

        # fetch specified models
        HF = HMMERRunner(mode='fetch')
        HF.fetch(DefaultValues.HMM_MODELS, keyFile, outputFile, bKeyFile=True)

        # index the HMM file
        if os.path.exists(outputFile + '.ssi'):
            os.remove(outputFile + '.ssi')
        HF.index(outputFile)

        # remove key file
        os.remove(keyFile)
コード例 #5
0
    def __extractModel(self, hmmModelFile, queueIn, queueOut):
        """Extract HMM."""
        HF = HMMERRunner(mode='fetch')

        while True:
            modelId, fetchFilename = queueIn.get(block=True, timeout=None)
            if modelId == None:
                break

            HF.fetch(hmmModelFile, modelId, fetchFilename)

            queueOut.put(modelId)
コード例 #6
0
ファイル: hmmerAligner.py プロジェクト: Ecogenomics/CheckM
    def __extractModel(self, hmmModelFile, queueIn, queueOut):
        """Extract HMM."""
        HF = HMMERRunner(mode='fetch')

        while True:
            modelId, fetchFilename = queueIn.get(block=True, timeout=None)
            if modelId == None:
                break

            HF.fetch(hmmModelFile, modelId, fetchFilename)

            queueOut.put(modelId)
コード例 #7
0
    def run(self):
        # read all taxonomic-specific marker genes
        print('Reading taxonomic-specific marker genes.')
        taxonomicMarkers = set()
        taxonParser = TaxonParser()
        taxonMarkerSets = taxonParser.readMarkerSets()
        for _, taxa in taxonMarkerSets.items():
            for _, markerSet in taxa.items():
                taxonomicMarkers = taxonomicMarkers.union(
                    markerSet.getMarkerGenes())

        print('  Taxonomic-specific marker genes: %d' % len(taxonomicMarkers))

        # read all lineage-specific marker genes
        print('Reading lineage-specific marker genes.')
        lineageMarkers = set()
        treeParser = TreeParser()
        uniqueIdToLineageStatistics = treeParser.readNodeMetadata()
        for uniqueId, d in uniqueIdToLineageStatistics.items():
            markerSet = MarkerSet(uniqueId, 'NA', int(d['# genomes']),
                                  eval(d['marker set']))
            lineageMarkers = lineageMarkers.union(markerSet.getMarkerGenes())

        print('  Lineage-specific marker genes: %d' % len(lineageMarkers))

        # gather all marker genes
        markerGenes = taxonomicMarkers.union(lineageMarkers)
        print('  Total marker genes: %d' % len(markerGenes))

        # get genes from same clan as marker genes
        print('Gathering HMMs from the same clan as marker genes.')
        pfam = PFAM()
        genesInSameClan = pfam.genesInSameClan(markerGenes)
        allMarkers = markerGenes.union(genesInSameClan)

        # create file with all model accession numbers
        keyFile = os.path.join(tempfile.gettempdir(), str(uuid.uuid4()))
        fout = open(keyFile, 'w')
        for modelAcc in allMarkers:
            fout.write(modelAcc + '\n')
        fout.close()

        # fetch specified models
        HF = HMMERRunner(mode='fetch')
        HF.fetch(self.hmms, keyFile, self.outputHMMs, bKeyFile=True)

        # index the HMM file
        if os.path.exists(self.outputHMMs + '.ssi'):
            os.remove(self.outputHMMs + '.ssi')
        HF.index(self.outputHMMs)

        # remove key file
        os.remove(keyFile)
コード例 #8
0
 def run(self):
     # read all taxonomic-specific marker genes
     print 'Reading taxonomic-specific marker genes.'
     taxonomicMarkers = set()
     taxonParser = TaxonParser()
     taxonMarkerSets = taxonParser.readMarkerSets()
     for _, taxa in taxonMarkerSets.iteritems():
         for _, markerSet in taxa.iteritems():
             taxonomicMarkers = taxonomicMarkers.union(markerSet.getMarkerGenes())
             
     print '  Taxonomic-specific marker genes: %d' % len(taxonomicMarkers)
             
     # read all lineage-specific marker genes
     print 'Reading lineage-specific marker genes.'
     lineageMarkers = set()
     treeParser = TreeParser()
     uniqueIdToLineageStatistics = treeParser.readNodeMetadata()
     for uniqueId, d in uniqueIdToLineageStatistics.iteritems():
         markerSet = MarkerSet(uniqueId, 'NA', int(d['# genomes']), eval(d['marker set']))
         lineageMarkers = lineageMarkers.union(markerSet.getMarkerGenes())
         
     print '  Lineage-specific marker genes: %d' % len(lineageMarkers)
     
     # gather all marker genes
     markerGenes = taxonomicMarkers.union(lineageMarkers)
     print '  Total marker genes: %d' % len(markerGenes)
     
     # get genes from same clan as marker genes
     print 'Gathering HMMs from the same clan as marker genes.'
     pfam = PFAM()
     genesInSameClan = pfam.genesInSameClan(markerGenes)
     allMarkers = markerGenes.union(genesInSameClan)
     
     # create file with all model accession numbers
     keyFile = os.path.join(tempfile.gettempdir(), str(uuid.uuid4()))
     fout = open(keyFile, 'w')
     for modelAcc in allMarkers:
         fout.write(modelAcc + '\n')
     fout.close()
     
     # fetch specified models
     HF = HMMERRunner(mode='fetch')
     HF.fetch(self.hmms, keyFile, self.outputHMMs, bKeyFile=True)
     
     # index the HMM file
     if os.path.exists(self.outputHMMs + '.ssi'):
         os.remove(self.outputHMMs + '.ssi')
     HF.index(self.outputHMMs)
     
     # remove key file
     os.remove(keyFile)