예제 #1
0
    def run(self):
        img = IMG()

        fout = open('./data/evaluate_prodigal.txt', 'w', 1)

        # get list of all marker genes
        markerset = MarkerSet()
        pfamMarkers, tigrMarkers = markerset.getCalculatedMarkerGenes()

        print('PFAM marker genes: ' + str(len(tigrMarkers)))
        print('TIGR marker genes: ' + str(len(pfamMarkers)))
        print('')

        # run HMMs on each of the finished genomes
        genomeIds = img.genomeIds('Finished')
        for genomeId in genomeIds:
            print(genomeId + ':')
            fout.write(genomeId + ':\n')

            self.runProdigal(genomeId)
            self.runGeneMark(genomeId)

            self.runPFAM(genomeId)
            self.runTIGRFAM(genomeId)

            self.compareResults(genomeId, pfamMarkers, tigrMarkers, fout)

        fout.close()
예제 #2
0
    def run(self):
        img = IMG()

        fout = open('./data/evaluate_prodigal.txt', 'w', 1)

        # get list of all marker genes
        markerset = MarkerSet()
        pfamMarkers, tigrMarkers = markerset.getCalculatedMarkerGenes()

        print 'PFAM marker genes: ' + str(len(tigrMarkers))
        print 'TIGR marker genes: ' + str(len(pfamMarkers))
        print ''

        # run HMMs on each of the finished genomes
        genomeIds = img.genomeIds('Finished')
        for genomeId in genomeIds:
            print genomeId + ':'
            fout.write(genomeId + ':\n')

            self.runProdigal(genomeId)
            self.runGeneMark(genomeId)

            self.runPFAM(genomeId)
            self.runTIGRFAM(genomeId)

            self.compareResults(genomeId, pfamMarkers, tigrMarkers, fout)

        fout.close()
예제 #3
0
파일: getHMMs.py 프로젝트: zjyzjjzmt/CheckM
    def run(self, minGenomes, minMarkerSets):
        img = IMG()
        pfam = PFAM()

        # get list of all marker genes
        markerset = MarkerSet()
        pfamIds, tigrIds = markerset.getCalculatedMarkerGenes()

        print 'TIGR marker genes: ' + str(len(tigrIds))
        print 'PFAM marker genes: ' + str(len(pfamIds))

        # get all PFAM HMMs that are in the same clan
        # as any of the marker genes
        pfamIdToClanId = pfam.pfamIdToClanId()
        clans = set()
        for pfamId in pfamIds:
            if pfamId.replace('PF', 'pfam') in pfamIdToClanId:
                clans.add(pfamIdToClanId[pfamId])

        for pfamId, clanId in pfamIdToClanId.iteritems():
            if clanId in clans:
                pfamIds.add(pfamId)

        print '  PFAM HMMs require to cover marker gene clans: ' + str(
            len(pfamIds))

        # get name of each PFAM HMM
        fout = open('./hmm/pfam.keyfile.txt', 'w')
        pfamNames = []
        for line in open(img.pfamHMMs):
            if 'NAME' in line:
                name = line[line.find(' '):].strip()
            elif 'ACC' in line:
                acc = line[line.find(' '):line.rfind('.')].strip()
                if acc.replace('PF', 'pfam') in pfamIds:
                    pfamNames.append(name)
                    fout.write(name + '\n')
        fout.close()

        print 'PFAM names: ' + str(len(pfamNames))

        # extract each PFAM HMM
        os.system('hmmfetch -f ' + img.pfamHMMs +
                  ' ./hmm/pfam.keyfile.txt > ./hmm/pfam_markers.hmm')

        # get name of each PFAM HMM
        fout = open('./hmm/tigr.keyfile.txt', 'w')
        for tigrId in tigrIds:
            fout.write(tigrId + '\n')
        fout.close()

        # extract each PFAM HMM
        os.system('hmmfetch -f ' + img.tigrHMMs +
                  ' ./hmm/tigr.keyfile.txt > ./hmm/tigr_markers.hmm')
예제 #4
0
    def run(self, minGenomes, minMarkerSets):
        img = IMG()
        pfam = PFAM()

        # get list of all marker genes
        markerset = MarkerSet()
        pfamIds, tigrIds = markerset.getCalculatedMarkerGenes()

        print 'TIGR marker genes: ' + str(len(tigrIds))
        print 'PFAM marker genes: ' + str(len(pfamIds))

        # get all PFAM HMMs that are in the same clan
        # as any of the marker genes
        pfamIdToClanId = pfam.pfamIdToClanId()
        clans = set()
        for pfamId in pfamIds:
            if pfamId.replace('PF', 'pfam') in pfamIdToClanId:
                clans.add(pfamIdToClanId[pfamId])

        for pfamId, clanId in pfamIdToClanId.iteritems():
            if clanId in clans:
                pfamIds.add(pfamId)

        print '  PFAM HMMs require to cover marker gene clans: ' + str(len(pfamIds))

        # get name of each PFAM HMM
        fout = open('./hmm/pfam.keyfile.txt', 'w')
        pfamNames = []
        for line in open(img.pfamHMMs):
            if 'NAME' in line:
                name = line[line.find(' '):].strip()
            elif 'ACC' in line:
                acc = line[line.find(' '):line.rfind('.')].strip()
                if acc.replace('PF', 'pfam') in pfamIds:
                    pfamNames.append(name)
                    fout.write(name + '\n')
        fout.close()

        print 'PFAM names: ' + str(len(pfamNames))

        # extract each PFAM HMM
        os.system('hmmfetch -f ' + img.pfamHMMs + ' ./hmm/pfam.keyfile.txt > ./hmm/pfam_markers.hmm')

        # get name of each PFAM HMM
        fout = open('./hmm/tigr.keyfile.txt', 'w')
        for tigrId in tigrIds:
            fout.write(tigrId + '\n')
        fout.close()

        # extract each PFAM HMM
        os.system('hmmfetch -f ' + img.tigrHMMs + ' ./hmm/tigr.keyfile.txt > ./hmm/tigr_markers.hmm')