Python IMG.IMG Examples, checkm.util.img.IMG.IMG Python Examples

Example #1

0

Show file

File: simComparePlots.py Project: sufforest/SolidBin

 def __init__(self):
     
     self.plotPrefix = './simulations/simulation.draft.w_refinement_50'
     self.simCompareFile = './simulations/simCompare.draft.w_refinement_50.full.tsv'
     self.simCompareMarkerSetOut = './simulations/simCompare.draft.marker_set_table.w_refinement_50.tsv'
     self.simCompareConditionOut = './simulations/simCompare.draft.condition_table.w_refinement_50.tsv'
     self.simCompareTaxonomyTableOut = './simulations/simCompare.draft.taxonomy_table.w_refinement_50.tsv'
     self.simCompareRefinementTableOut = './simulations/simCompare.draft.refinment_table.w_refinement_50.tsv'
            
     #self.plotPrefix = './simulations/simulation.scaffolds.draft.w_refinement_50'
     #self.simCompareFile = './simulations/simCompare.scaffolds.draft.w_refinement_50.full.tsv'
     #self.simCompareMarkerSetOut = './simulations/simCompare.scaffolds.draft.marker_set_table.w_refinement_50.tsv'
     #self.simCompareConditionOut = './simulations/simCompare.scaffolds.draft.condition_table.w_refinement_50.tsv'
     #self.simCompareTaxonomyTableOut = './simulations/simCompare.scaffolds.draft.taxonomy_table.w_refinement_50.tsv'
     #self.simCompareRefinementTableOut = './simulations/simCompare.scaffolds.draft.refinment_table.w_refinement_50.tsv'
     
     #self.plotPrefix = './simulations/simulation.random_scaffolds.w_refinement_50'
     #self.simCompareFile = './simulations/simCompare.random_scaffolds.w_refinement_50.full.tsv'
     #self.simCompareMarkerSetOut = './simulations/simCompare.random_scaffolds.marker_set_table.w_refinement_50.tsv'
     #self.simCompareConditionOut = './simulations/simCompare.random_scaffolds.condition_table.w_refinement_50.tsv'
     #self.simCompareTaxonomyTableOut = './simulations/simCompare.random_scaffolds.taxonomy_table.w_refinement_50.tsv'
     #self.simCompareRefinementTableOut = './simulations/simCompare.random_scaffolds.refinment_table.w_refinement_50.tsv'
     
     self.img = IMG('/srv/whitlam/bio/db/checkm/img/img_metadata.tsv', '/srv/whitlam/bio/db/checkm/pfam/tigrfam2pfam.tsv')
     
     self.compsToConsider = [0.5, 0.7, 0.8, 0.9] #[0.5, 0.7, 0.8, 0.9]
     self.contsToConsider = [0.05, 0.1, 0.15] #[0.05, 0.1, 0.15]
     
     self.dpi = 1200

Example #2

0

Show file

File: simulation.py Project: HadrienG/CheckM

    def __init__(self):
        self.markerSetBuilder = MarkerSetBuilder()
        self.img = IMG('/srv/whitlam/bio/db/checkm/img/img_metadata.tsv', '/srv/whitlam/bio/db/checkm/pfam/tigrfam2pfam.tsv')

        self.contigLens = [1000, 2000, 5000, 10000, 20000, 50000]
        self.percentComps = [0.5, 0.7, 0.8, 0.9, 0.95, 1.0]
        self.percentConts = [0.0, 0.05, 0.1, 0.15, 0.2]

Example #3

0

Show file

    def __workerThread(self, ubiquityThreshold, singleCopyThreshold,
                       minGenomes, colocatedDistThreshold,
                       colocatedGenomeThreshold, metadata, queueIn, queueOut):
        """Process each data item in parallel."""

        img = IMG('/srv/whitlam/bio/db/checkm/img/img_metadata.tsv',
                  '/srv/whitlam/bio/db/checkm/pfam/tigrfam2pfam.tsv')
        markerSetBuilder = MarkerSetBuilder()

        while True:
            lineage = queueIn.get(block=True, timeout=None)
            if lineage == None:
                break

            if lineage == 'Universal':
                genomeIds = img.genomeIdsByTaxonomy('prokaryotes', metadata)
            else:
                genomeIds = img.genomeIdsByTaxonomy(lineage, metadata)
            if len(genomeIds) >= minGenomes:
                markerSet = markerSetBuilder.buildMarkerSet(
                    genomeIds, ubiquityThreshold, singleCopyThreshold,
                    colocatedDistThreshold)
                colocatedSets = markerSet.markerSet
            else:
                colocatedSets = None

            # allow results to be processed or written to file
            queueOut.put((lineage, colocatedSets, len(genomeIds)))

Example #4

0

Show file

File: markerSetBuilder.py Project: zjyzjjzmt/CheckM

 def __init__(self):
     self.img = IMG('/srv/whitlam/bio/db/checkm/img/img_metadata.tsv', '/srv/whitlam/bio/db/checkm/pfam/tigrfam2pfam.tsv')
     self.colocatedFile = './data/colocated.tsv'
     self.duplicateSeqs = self.readDuplicateSeqs()
     self.uniqueIdToLineageStatistics = self.__readNodeMetadata()
     
     self.cachedGeneCountTable = None

Example #5

0

Show file

    def __init__(self, outputDir):
        self.__checkForFastTree()

        self.derepConcatenatedAlignFile = os.path.join(outputDir, 'genome_tree.concatenated.derep.fasta')
        self.tree = os.path.join(outputDir, 'genome_tree.final.tre')

        self.img = IMG('/srv/whitlam/bio/db/checkm/img/img_metadata.tsv', '/srv/whitlam/bio/db/checkm/pfam/tigrfam2pfam.tsv')
        self.metadata = self.img.genomeMetadata()

Example #6

0

Show file

    def __getUniversalMarkerGenes(self, phyloUbiquityThreshold,
                                  phyloSingleCopyThreshold, outputGeneDir):
        img = IMG('/srv/whitlam/bio/db/checkm/img/img_metadata.tsv',
                  '/srv/whitlam/bio/db/checkm/pfam/tigrfam2pfam.tsv')
        markerSetBuilder = MarkerSetBuilder()

        metadata = img.genomeMetadata()

        allTrustedGenomeIds = set()
        phyloMarkerGenes = {}
        for lineage in ['Archaea', 'Bacteria']:
            # get all genomes in lineage
            print('\nIdentifying all ' + lineage + ' genomes.')
            trustedGenomeIds = img.genomeIdsByTaxonomy(lineage, metadata)
            print('  Trusted genomes in lineage: ' +
                  str(len(trustedGenomeIds)))
            if len(trustedGenomeIds) < 1:
                print(
                    '  Skipping lineage due to insufficient number of genomes.'
                )
                continue

            allTrustedGenomeIds.update(trustedGenomeIds)

            print('  Building marker set.')
            markerGenes = markerSetBuilder.buildMarkerGenes(
                trustedGenomeIds, phyloUbiquityThreshold,
                phyloSingleCopyThreshold)
            phyloMarkerGenes[lineage] = markerGenes

            #print lineage
            #print len(markerGenes)
            #print 'pfam01379: ', ('pfam01379' in markerGenes)
            #print '--------------------'

        # universal marker genes
        universalMarkerGenes = None
        for markerGenes in list(phyloMarkerGenes.values()):
            if universalMarkerGenes == None:
                universalMarkerGenes = markerGenes
            else:
                universalMarkerGenes.intersection_update(markerGenes)

        fout = open(os.path.join(outputGeneDir, 'phylo_marker_set.txt'), 'w')
        fout.write(str(universalMarkerGenes))
        fout.close()

        print('')
        print('  Universal marker genes: ' + str(len(universalMarkerGenes)))

        return allTrustedGenomeIds, universalMarkerGenes

Example #7

0

Show file

    def run(self, outputDir, ubiquityThreshold, singleCopyThreshold,
            minGenomes, colocatedDistThreshold, colocatedGenomeThreshold,
            threads):
        if not os.path.exists(outputDir):
            os.makedirs(outputDir)

        # determine lineages to process
        img = IMG('/srv/whitlam/bio/db/checkm/img/img_metadata.tsv',
                  '/srv/whitlam/bio/db/checkm/pfam/tigrfam2pfam.tsv')
        metadata = img.genomeMetadata()
        lineages = img.lineagesSorted(metadata)
        lineages.append('Universal')

        # determine HMM model accession numbers
        pfamIdToPfamAcc = self.__pfamIdToPfamAcc(img)

        # populate worker queue with data to process
        workerQueue = mp.Queue()
        writerQueue = mp.Queue()

        for lineage in lineages:
            workerQueue.put(lineage)

        for _ in range(threads):
            workerQueue.put(None)

        workerProc = [
            mp.Process(target=self.__workerThread,
                       args=(ubiquityThreshold, singleCopyThreshold,
                             minGenomes, colocatedDistThreshold,
                             colocatedGenomeThreshold, metadata, workerQueue,
                             writerQueue)) for _ in range(threads)
        ]
        writeProc = mp.Process(
            target=self.__writerThread,
            args=(pfamIdToPfamAcc, ubiquityThreshold, singleCopyThreshold,
                  colocatedDistThreshold, colocatedGenomeThreshold, outputDir,
                  len(lineages), writerQueue))

        writeProc.start()

        for p in workerProc:
            p.start()

        for p in workerProc:
            p.join()

        writerQueue.put((None, None, None))
        writeProc.join()

Example #8

0

Show file

File: identifyGeneLossAndDuplication.py Project: zjyzjjzmt/CheckM

 def __init__(self):
     self.markerSetBuilder = MarkerSetBuilder()
     self.img = IMG('/srv/whitlam/bio/db/checkm/img/img_metadata.tsv', '/srv/whitlam/bio/db/checkm/pfam/tigrfam2pfam.tsv')

Example #9

0

Show file

File: paralogTest.py Project: HadrienG/CheckM

    def run(self, geneTreeDir, acceptPer, extension, outputDir):
        # make sure output directory is empty
        if not os.path.exists(outputDir):
            os.makedirs(outputDir)

        files = os.listdir(outputDir)
        for f in files:
            os.remove(os.path.join(outputDir, f))

        img = IMG('/srv/whitlam/bio/db/checkm/img/img_metadata.tsv',
                  '/srv/whitlam/bio/db/checkm/pfam/tigrfam2pfam.tsv')
        metadata = img.genomeMetadata()

        files = os.listdir(geneTreeDir)
        print('Identifying gene trees with only conspecific paralogous genes:')
        filteredGeneTrees = 0
        retainedGeneTrees = 0
        for f in files:
            if not f.endswith(extension):
                continue

            geneId = f[0:f.find('.')]
            print('  Testing gene tree: ' + geneId)

            tree = dendropy.Tree.get_from_path(os.path.join(geneTreeDir, f),
                                               schema='newick',
                                               as_rooted=False,
                                               preserve_underscores=True)

            taxa = tree.leaf_nodes()
            numTaxa = len(taxa)
            print('  Genes in tree: ' + str(numTaxa))

            # root tree with archaeal genomes
            rerootTree = RerootTree()
            rerootTree.reroot(tree)

            # get species name of each taxa
            leafNodeToSpeciesName = {}
            for t in taxa:
                genomeId = t.taxon.label.split('|')[0]
                genus = metadata[genomeId]['taxonomy'][5]
                sp = metadata[genomeId]['taxonomy'][6].lower()

                leafNodeToSpeciesName[t.taxon.label] = genus + ' ' + sp

            # find all paralogous genes
            print('  Finding paralogous genes.')

            paralogs = defaultdict(set)
            for i in range(0, len(taxa)):
                genomeId = taxa[i].taxon.label.split('|')[0]
                for j in range(i + 1, len(taxa)):
                    # genes from the same genome are paralogs, but we filter out
                    # those that are identical (distance of 0 on the tree) to
                    # speed up computation and because these clearly do not
                    # adversely effect phylogenetic inference
                    if genomeId == taxa[j].taxon.label.split(
                            '|')[0] and self.__patristicDist(
                                tree, taxa[i], taxa[j]) > 0:
                        paralogs[genomeId].add(taxa[i].taxon.label)
                        paralogs[genomeId].add(taxa[j].taxon.label)

            print('    Paralogous genes: ' + str(len(paralogs)))

            # check if paralogous genes are conspecific
            print('  Determining if paralogous genes are conspecific.')
            nonConspecificGenomes = []
            for genomeId, taxaLabels in paralogs.iteritems():
                lcaNode = tree.mrca(taxon_labels=taxaLabels)

                children = lcaNode.leaf_nodes()
                species = set()
                for child in children:
                    childGenomeId = child.taxon.label.split('|')[0]

                    genus = metadata[childGenomeId]['taxonomy'][5]
                    sp = metadata[childGenomeId]['taxonomy'][6].lower()
                    if sp != '' and sp != 'unclassified' and genus != 'unclassified':
                        species.add(genus + ' ' + sp)

                if len(species) > 1:
                    nonConspecificGenomes.append(genomeId)

            if len(nonConspecificGenomes) > acceptPer * numTaxa:
                filteredGeneTrees += 1
                print('  Tree is not conspecific for the following genome: ' +
                      str(nonConspecificGenomes))
            else:
                retainedGeneTrees += 1

                if len(nonConspecificGenomes) > 1:
                    print(
                        '  An acceptable number of genomes are not conspecific: '
                        + str(nonConspecificGenomes))
                else:
                    print('  Tree is conspecific.')

                os.system('cp ' + os.path.join(geneTreeDir, f) + ' ' +
                          os.path.join(outputDir, f))

            print('')

        print('Filtered gene trees: ' + str(filteredGeneTrees))
        print('Retained gene trees: ' + str(retainedGeneTrees))

Example #10

0

Show file

File: rerootTree.py Project: sufforest/SolidBin

 def __init__(self):
     img = IMG('/srv/whitlam/bio/db/checkm/img/img_metadata.tsv',
               '/srv/whitlam/bio/db/checkm/pfam/tigrfam2pfam.tsv')
     self.metadata = img.genomeMetadata()

Example #11

0

Show file

File: plotScaffoldLenVsMarkers.py Project: sufforest/SolidBin

 def __init__(self):
     self.img = IMG('/srv/whitlam/bio/db/checkm/img/img_metadata.tsv', '/srv/whitlam/bio/db/checkm/pfam/tigrfam2pfam.tsv')

Example #12

0

Show file

    def run(self, geneTreeDir, treeExtension, consistencyThreshold,
            minTaxaForAverage, outputFile, outputDir):
        # make sure output directory is empty
        if not os.path.exists(outputDir):
            os.makedirs(outputDir)

        files = os.listdir(outputDir)
        for f in files:
            if os.path.isfile(os.path.join(outputDir, f)):
                os.remove(os.path.join(outputDir, f))

        # get TIGRFam info
        descDict = {}
        files = os.listdir('/srv/db/tigrfam/13.0/TIGRFAMs_13.0_INFO')
        for f in files:
            shortDesc = longDesc = ''
            for line in open('/srv/db/tigrfam/13.0/TIGRFAMs_13.0_INFO/' + f):
                lineSplit = line.split('  ')
                if lineSplit[0] == 'AC':
                    acc = lineSplit[1].strip()
                elif lineSplit[0] == 'DE':
                    shortDesc = lineSplit[1].strip()
                elif lineSplit[0] == 'CC':
                    longDesc = lineSplit[1].strip()

            descDict[acc] = [shortDesc, longDesc]

        # get PFam info
        for line in open('/srv/db/pfam/27/Pfam-A.clans.tsv'):
            lineSplit = line.split('\t')
            acc = lineSplit[0]
            shortDesc = lineSplit[3]
            longDesc = lineSplit[4].strip()

            descDict[acc] = [shortDesc, longDesc]

        # get IMG taxonomy
        img = IMG('/srv/whitlam/bio/db/checkm/img/img_metadata.tsv',
                  '/srv/whitlam/bio/db/checkm/pfam/tigrfam2pfam.tsv')
        metadata = img.genomeMetadata()
        genomeIdToTaxonomy = {}
        for genomeId, m in metadata.iteritems():
            genomeIdToTaxonomy[genomeId] = m['taxonomy']

        # perform analysis for each tree
        treeFiles = os.listdir(geneTreeDir)
        allResults = {}
        allTaxa = [set([]), set([]), set([])]
        taxaCounts = {}
        avgConsistency = {}
        for treeFile in treeFiles:
            if not treeFile.endswith(treeExtension):
                continue

            print treeFile
            tree = dendropy.Tree.get_from_path(os.path.join(
                geneTreeDir, treeFile),
                                               schema='newick',
                                               as_rooted=True,
                                               preserve_underscores=True)

            domainConsistency = {}
            phylaConsistency = {}
            classConsistency = {}
            consistencyDict = [
                domainConsistency, phylaConsistency, classConsistency
            ]

            # get abundance of taxa at different taxonomic ranks
            totals = [{}, {}, {}]
            leaves = tree.leaf_nodes()
            print '  Number of leaves: ' + str(len(leaves))
            totalValidLeaves = 0

            for leaf in leaves:
                genomeId = self.__genomeId(leaf.taxon.label)

                if genomeId not in metadata:
                    print '[Error] Genome is missing metadata: ' + genomeId
                    sys.exit()

                totalValidLeaves += 1
                taxonomy = genomeIdToTaxonomy[genomeId]
                for r in xrange(0, 3):
                    totals[r][taxonomy[r]] = totals[r].get(taxonomy[r], 0) + 1
                    consistencyDict[r][taxonomy[r]] = 0
                    allTaxa[r].add(taxonomy[r])

            taxaCounts[treeFile] = [
                totalValidLeaves, totals[0].get('Bacteria', 0),
                totals[0].get('Archaea', 0)
            ]

            # find highest consistency nodes (congruent descendant taxa / (total taxa + incongruent descendant taxa))
            internalNodes = tree.internal_nodes()
            for node in internalNodes:
                leaves = node.leaf_nodes()

                for r in xrange(0, 3):
                    leafCounts = {}
                    for leaf in leaves:
                        genomeId = self.__genomeId(leaf.taxon.label)
                        taxonomy = genomeIdToTaxonomy[genomeId]
                        leafCounts[taxonomy[r]] = leafCounts.get(
                            taxonomy[r], 0) + 1

                    # calculate consistency for node
                    for taxa in consistencyDict[r]:
                        totalTaxaCount = totals[r][taxa]
                        if totalTaxaCount <= 1 or taxa == 'unclassified':
                            consistencyDict[r][taxa] = 'N/A'
                            continue

                        taxaCount = leafCounts.get(taxa, 0)
                        incongruentTaxa = len(leaves) - taxaCount
                        c = float(taxaCount) / (totalTaxaCount +
                                                incongruentTaxa)
                        if c > consistencyDict[r][taxa]:
                            consistencyDict[r][taxa] = c

                        # consider clan in other direction since the trees are unrooted
                        taxaCount = totalTaxaCount - leafCounts.get(taxa, 0)
                        incongruentTaxa = totalValidLeaves - len(
                            leaves) - taxaCount
                        c = float(taxaCount) / (totalTaxaCount +
                                                incongruentTaxa)
                        if c > consistencyDict[r][taxa]:
                            consistencyDict[r][taxa] = c

            # write results
            consistencyDir = os.path.join(outputDir, 'consistency')
            if not os.path.exists(consistencyDir):
                os.makedirs(consistencyDir)
            fout = open(
                os.path.join(consistencyDir, treeFile + '.results.tsv'), 'w')
            fout.write('Tree')
            for r in xrange(0, 3):
                for taxa in sorted(consistencyDict[r].keys()):
                    fout.write('\t' + taxa)
            fout.write('\n')

            fout.write(treeFile)
            for r in xrange(0, 3):
                for taxa in sorted(consistencyDict[r].keys()):
                    if consistencyDict[r][taxa] != 'N/A':
                        fout.write('\t%.2f' % (consistencyDict[r][taxa] * 100))
                    else:
                        fout.write('\tN/A')
            fout.close()

            # calculate average consistency at each taxonomic rank
            average = []
            for r in xrange(0, 3):
                sumConsistency = []
                for taxa in consistencyDict[r]:
                    if totals[r][taxa] > minTaxaForAverage and consistencyDict[
                            r][taxa] != 'N/A':
                        sumConsistency.append(consistencyDict[r][taxa])

                if len(sumConsistency) > 0:
                    average.append(sum(sumConsistency) / len(sumConsistency))
                else:
                    average.append(0)
            avgConsistency[treeFile] = average
            allResults[treeFile] = consistencyDict

            print '  Average consistency: ' + str(
                average) + ', mean = %.2f' % (sum(average) / len(average))
            print ''

        # print out combined results
        fout = open(outputFile, 'w')
        fout.write(
            'Tree\tShort Desc.\tLong Desc.\tAlignment Length\t# Taxa\t# Bacteria\t# Archaea\tAvg. Consistency\tAvg. Domain Consistency\tAvg. Phylum Consistency\tAvg. Class Consistency'
        )
        for r in xrange(0, 3):
            for t in sorted(allTaxa[r]):
                fout.write('\t' + t)
        fout.write('\n')

        filteredGeneTrees = 0
        retainedGeneTrees = 0
        for treeFile in sorted(allResults.keys()):
            consistencyDict = allResults[treeFile]
            treeId = treeFile[0:treeFile.find('.')].replace('pfam', 'PF')

            fout.write(treeId + '\t' + descDict[treeId][0] + '\t' +
                       descDict[treeId][1])

            # Taxa count
            fout.write('\t' + str(taxaCounts[treeFile][0]) + '\t' +
                       str(taxaCounts[treeFile][1]) + '\t' +
                       str(taxaCounts[treeFile][2]))

            avgCon = 0
            for r in xrange(0, 3):
                avgCon += avgConsistency[treeFile][r]
            avgCon /= 3
            fout.write('\t' + str(avgCon))

            if avgCon >= consistencyThreshold:
                retainedGeneTrees += 1
                os.system('cp ' + os.path.join(geneTreeDir, treeFile) + ' ' +
                          os.path.join(outputDir, treeFile))
            else:
                filteredGeneTrees += 1
                print 'Filtered % s with an average consistency of %.4f.' % (
                    treeFile, avgCon)

            for r in xrange(0, 3):
                fout.write('\t' + str(avgConsistency[treeFile][r]))

            for r in xrange(0, 3):
                for t in sorted(allTaxa[r]):
                    if t in consistencyDict[r]:
                        if consistencyDict[r][t] != 'N/A':
                            fout.write('\t%.2f' %
                                       (consistencyDict[r][t] * 100))
                        else:
                            fout.write('\tN/A')
                    else:
                        fout.write('\tN/A')
            fout.write('\n')
        fout.close()

        print 'Retained gene trees: ' + str(retainedGeneTrees)
        print 'Filtered gene trees: ' + str(filteredGeneTrees)

Example #13

0

Show file

File: inferGenomeTree.py Project: zjyzjjzmt/CheckM

    def run(self,
            geneTreeDir,
            alignmentDir,
            extension,
            outputAlignFile,
            outputTree,
            outputTaxonomy,
            bSupportValues=False):
        # read gene trees
        print 'Reading gene trees.'
        geneIds = set()
        files = os.listdir(geneTreeDir)
        for f in files:
            if f.endswith('.tre'):
                geneId = f[0:f.find('.')]
                geneIds.add(geneId)

        # write out genome tree taxonomy
        print 'Reading trusted genomes.'
        img = IMG('/srv/whitlam/bio/db/checkm/img/img_metadata.tsv',
                  '/srv/whitlam/bio/db/checkm/pfam/tigrfam2pfam.tsv')
        genomeIds = img.genomeMetadata().keys()
        self.__taxonomy(img, genomeIds, outputTaxonomy)

        print '  There are %d trusted genomes.' % (len(genomeIds))

        # get genes in genomes
        print 'Reading all PFAM and TIGRFAM hits in trusted genomes.'
        genesInGenomes = self.__genesInGenomes(genomeIds)

        # read alignment files
        print 'Reading alignment files.'
        alignments = {}
        genomeIds = set()
        files = os.listdir(alignmentDir)
        for f in files:
            geneId = f[0:f.find('.')]
            if f.endswith(extension) and geneId in geneIds:
                seqs = readFasta(os.path.join(alignmentDir, f))

                imgGeneId = geneId
                if imgGeneId.startswith('PF'):
                    imgGeneId = imgGeneId.replace('PF', 'pfam')
                seqs = self.__filterParalogs(seqs, imgGeneId, genesInGenomes)

                genomeIds.update(set(seqs.keys()))
                alignments[geneId] = seqs

        # create concatenated alignment
        print 'Concatenating alignments:'
        concatenatedSeqs = {}
        totalAlignLen = 0
        for geneId in sorted(alignments.keys()):
            seqs = alignments[geneId]
            alignLen = len(seqs[seqs.keys()[0]])
            print '  ' + str(geneId) + ',' + str(alignLen)
            totalAlignLen += alignLen
            for genomeId in genomeIds:
                if genomeId in seqs:
                    # append alignment
                    concatenatedSeqs['IMG_' + genomeId] = concatenatedSeqs.get(
                        'IMG_' + genomeId, '') + seqs[genomeId]
                else:
                    # missing gene
                    concatenatedSeqs['IMG_' + genomeId] = concatenatedSeqs.get(
                        'IMG_' + genomeId, '') + '-' * alignLen

        print '  Total alignment length: ' + str(totalAlignLen)

        # save concatenated alignment
        writeFasta(concatenatedSeqs, outputAlignFile)

        # infer genome tree
        print 'Inferring genome tree.'
        outputLog = outputTree[0:outputTree.rfind('.')] + '.log'

        supportStr = ' '
        if not bSupportValues:
            supportStr = ' -nosupport '

        cmd = 'FastTreeMP' + supportStr + '-wag -gamma -log ' + outputLog + ' ' + outputAlignFile + ' > ' + outputTree
        os.system(cmd)