def run(self, numThreads, outgroupSize):

        # identify genes suitable for phylogenetic inference
        print '--- Identifying genes suitable for phylogenetic inference ---'
        genomeIds = self.inferGeneTrees(self.phyloUbiquity, self.phyloSingleCopy, numThreads, self.alignmentDir, self.hmmDir, outgroupSize)

        # infer gene trees
        print ''
        print '--- Inferring gene trees ---'
        makeTrees = MakeTrees()
        makeTrees.run(self.alignmentDir, self.geneTreeDir, '.aln.masked.faa', numThreads)

        # test gene trees for paralogs
        print ''
        print '--- Testing for paralogs in gene trees ---'
        paralogTest = ParalogTest()
        paralogTest.run(self.geneTreeDir, self.paralogAcceptPer, '.tre', self.conspecificGeneTreeDir)

        # test gene trees for consistency with IMG taxonomy
        print ''
        print '--- Testing taxonomic consistency of gene trees ---'
        consistencyTest = ConsistencyTest()
        consistencyTest.run(self.conspecificGeneTreeDir, '.tre', self.consistencyAcceptPer, self.consistencyMinTaxa, self.consistencyOut, self.finalGeneTreeDir)

        # gather phylogenetically informative HMMs into a single model file
        print ''
        print '--- Gathering phylogenetically informative HMMs ---'
        getPhylogeneticHMMs = GetPhylogeneticHMMs()
        getPhylogeneticHMMs.run(self.hmmDir, self.finalGeneTreeDir, self.phyloHMMsOut)

        # infer genome tree
        print ''
        print '--- Inferring full genome tree ---'
        inferGenomeTree = InferGenomeTree()
        inferGenomeTree.run(self.finalGeneTreeDir, self.alignmentDir, '.aln.masked.faa', self.concatenatedAlignFile, self.treeOut, self.taxonomyOut, bSupportValues = True)

        # replace IMG identifiers with ACE identifiers
        imgIdToAceId = self.imgIdsToAceIds(genomeIds)
        with open(self.treeOut) as f:
            tree = ''.join(f.readlines())

            for genomeId in genomeIds:
                if genomeId in imgIdToAceId:
                    tree = tree.replace('IMG_' + genomeId, imgIdToAceId[genomeId])

        fout = open(self.treeOutAce, 'w')
        fout.write(tree)
        fout.close()
Example #2
0
    def run(self, numThreads):
        # identify genes suitable for phylogenetic inference
        if False:
            print(
                '--- Identifying genes suitable for phylogenetic inference ---'
            )
            phylogeneticInferenceGenes = PhylogeneticInferenceGenes()
            phylogeneticInferenceGenes.run(self.phyloUbiquity,
                                           self.phyloSingleCopy, numThreads,
                                           self.alignmentDir, self.hmmDir)

            # infer gene trees
            print('')
            print('--- Inferring gene trees ---')
            makeTrees = MakeTrees()
            makeTrees.run(self.alignmentDir, self.geneTreeDir,
                          '.aln.masked.faa', numThreads)

            # test gene trees for paralogs
            print('')
            print('--- Testing for paralogs in gene trees ---')
            paralogTest = ParalogTest()
            paralogTest.run(self.geneTreeDir, self.paralogAcceptPer, '.tre',
                            self.conspecificGeneTreeDir)

            sys.exit()

            # test gene trees for consistency with IMG taxonomy
            print('')
            print('--- Testing taxonomic consistency of gene trees ---')
            consistencyTest = ConsistencyTest()
            consistencyTest.run(self.conspecificGeneTreeDir, '.tre',
                                self.consistencyAcceptPer,
                                self.consistencyMinTaxa, self.consistencyOut,
                                self.finalGeneTreeDir)

            # gather phylogenetically informative HMMs into a single model file
            print('')
            print('--- Gathering phylogenetically informative HMMs ---')
            getPhylogeneticHMMs = GetPhylogeneticHMMs()
            getPhylogeneticHMMs.run(self.hmmDir, self.finalGeneTreeDir,
                                    self.phyloHMMsOut)

            # infer genome tree
            print('')
            print('--- Inferring full genome tree ---')
            inferGenomeTree = InferGenomeTree()
            inferGenomeTree.run(self.finalGeneTreeDir, self.alignmentDir,
                                '.aln.masked.faa', self.concatenatedAlignFile,
                                self.treeOut, self.taxonomyOut)

            # root genome tree between archaea and bacteria
            print('')
            print('--- Rooting full genome tree ---')
            rerootTree = RerootTree()
            rerootTree.run(self.treeOut, self.treeRootedOut)

        # decorate genome tree with taxonomy using nlevel from tax2tree
        print('')
        print(
            '--- Decorating full genome tree with taxonomic information using tax2tree ---'
        )
        os.system('t2t decorate -t %s -m %s -o %s' %
                  (self.treeRootedOut, self.taxonomyOut, self.treeTaxonomyOut))

        if False:
            # dereplicate identical sequences
            print('')
            print('--- Identifying duplicate sequences ---')
            os.system(
                'seqmagick convert --deduplicate-sequences --deduplicated-sequences-file '
                + self.derepSeqFile + ' ' + self.concatenatedAlignFile + ' ' +
                self.derepConcatenatedAlignFile)

            # infer dereplicated genome tree
            print('')
            print('--- Inferring dereplicated genome tree ---')
            outputLog = self.treeDerepOut[0:self.treeDerepOut.rfind('.'
                                                                    )] + '.log'
            # cmd = 'FastTreeMP -nosupport -wag -gamma -log ' + outputLog + ' ' + self.derepConcatenatedAlignFile + ' > ' + self.treeDerepOut
            cmd = 'FastTreeMP -wag -gamma -log ' + outputLog + ' ' + self.derepConcatenatedAlignFile + ' > ' + self.treeDerepOut
            os.system(cmd)

            # root genome tree between archaea and bacteria
            print('')
            print('--- Rooting dereplicated genome tree ---')
            rerootTree = RerootTree()
            rerootTree.run(self.treeDerepOut, self.treeDerepRootedOut)

            # calculate bootstraps for genome tree
            print('')
            print('--- Calculating bootstrap support ---')
            # bootstrapTree = BootstrapTree()
            # bootstrapTree.run(self.bootstrapDir, self.treeDerepRootedOut, self.concatenatedAlignFile, 100, numThreads, self.treeDerepBootstrapOut)

            # os.system('cp ' + self.treeDerepBootstrapOut + ' ' + self.treeDerepFinalOut)

        # just use FastTree support values
        os.system('cp ' + self.treeDerepRootedOut + ' ' +
                  self.treeDerepFinalOut)

        # decorate dereplicated tree with unique IDs and a complementary file indicating properties of each internal node
        print('')
        print(
            '--- Decorating final tree with lineage-specific statistics and marker set information ---'
        )
        decorateTree = DecorateTree()
        decorateTree.decorate(self.treeTaxonomyOut, self.derepSeqFile,
                              self.treeDerepFinalOut, self.treeMetadata,
                              numThreads)
Example #3
0
    def run(self, numThreads, outgroupSize):

        # identify genes suitable for phylogenetic inference
        print '--- Identifying genes suitable for phylogenetic inference ---'
        genomeIds = self.inferGeneTrees(self.phyloUbiquity,
                                        self.phyloSingleCopy, numThreads,
                                        self.alignmentDir, self.hmmDir,
                                        outgroupSize)

        # infer gene trees
        print ''
        print '--- Inferring gene trees ---'
        makeTrees = MakeTrees()
        makeTrees.run(self.alignmentDir, self.geneTreeDir, '.aln.masked.faa',
                      numThreads)

        # test gene trees for paralogs
        print ''
        print '--- Testing for paralogs in gene trees ---'
        paralogTest = ParalogTest()
        paralogTest.run(self.geneTreeDir, self.paralogAcceptPer, '.tre',
                        self.conspecificGeneTreeDir)

        # test gene trees for consistency with IMG taxonomy
        print ''
        print '--- Testing taxonomic consistency of gene trees ---'
        consistencyTest = ConsistencyTest()
        consistencyTest.run(self.conspecificGeneTreeDir, '.tre',
                            self.consistencyAcceptPer, self.consistencyMinTaxa,
                            self.consistencyOut, self.finalGeneTreeDir)

        # gather phylogenetically informative HMMs into a single model file
        print ''
        print '--- Gathering phylogenetically informative HMMs ---'
        getPhylogeneticHMMs = GetPhylogeneticHMMs()
        getPhylogeneticHMMs.run(self.hmmDir, self.finalGeneTreeDir,
                                self.phyloHMMsOut)

        # infer genome tree
        print ''
        print '--- Inferring full genome tree ---'
        inferGenomeTree = InferGenomeTree()
        inferGenomeTree.run(self.finalGeneTreeDir,
                            self.alignmentDir,
                            '.aln.masked.faa',
                            self.concatenatedAlignFile,
                            self.treeOut,
                            self.taxonomyOut,
                            bSupportValues=True)

        # replace IMG identifiers with ACE identifiers
        imgIdToAceId = self.imgIdsToAceIds(genomeIds)
        with open(self.treeOut) as f:
            tree = ''.join(f.readlines())

            for genomeId in genomeIds:
                if genomeId in imgIdToAceId:
                    tree = tree.replace('IMG_' + genomeId,
                                        imgIdToAceId[genomeId])

        fout = open(self.treeOutAce, 'w')
        fout.write(tree)
        fout.close()
Example #4
0
    def run(self, numThreads):
        # identify genes suitable for phylogenetic inference
        if False:
            print '--- Identifying genes suitable for phylogenetic inference ---'
            phylogeneticInferenceGenes = PhylogeneticInferenceGenes()
            phylogeneticInferenceGenes.run(self.phyloUbiquity, self.phyloSingleCopy, numThreads, self.alignmentDir, self.hmmDir)

            # infer gene trees
            print ''
            print '--- Inferring gene trees ---'
            makeTrees = MakeTrees()
            makeTrees.run(self.alignmentDir, self.geneTreeDir, '.aln.masked.faa', numThreads)

            # test gene trees for paralogs
            print ''
            print '--- Testing for paralogs in gene trees ---'
            paralogTest = ParalogTest()
            paralogTest.run(self.geneTreeDir, self.paralogAcceptPer, '.tre', self.conspecificGeneTreeDir)

            sys.exit()

            # test gene trees for consistency with IMG taxonomy
            print ''
            print '--- Testing taxonomic consistency of gene trees ---'
            consistencyTest = ConsistencyTest()
            consistencyTest.run(self.conspecificGeneTreeDir, '.tre', self.consistencyAcceptPer, self.consistencyMinTaxa, self.consistencyOut, self.finalGeneTreeDir)

            # gather phylogenetically informative HMMs into a single model file
            print ''
            print '--- Gathering phylogenetically informative HMMs ---'
            getPhylogeneticHMMs = GetPhylogeneticHMMs()
            getPhylogeneticHMMs.run(self.hmmDir, self.finalGeneTreeDir, self.phyloHMMsOut)

            # infer genome tree
            print ''
            print '--- Inferring full genome tree ---'
            inferGenomeTree = InferGenomeTree()
            inferGenomeTree.run(self.finalGeneTreeDir, self.alignmentDir, '.aln.masked.faa', self.concatenatedAlignFile, self.treeOut, self.taxonomyOut)

            # root genome tree between archaea and bacteria
            print ''
            print '--- Rooting full genome tree ---'
            rerootTree = RerootTree()
            rerootTree.run(self.treeOut, self.treeRootedOut)

        # decorate genome tree with taxonomy using nlevel from tax2tree
        print ''
        print '--- Decorating full genome tree with taxonomic information using tax2tree ---'
        os.system('t2t decorate -t %s -m %s -o %s' % (self.treeRootedOut, self.taxonomyOut, self.treeTaxonomyOut))

        if False:
            # dereplicate identical sequences
            print ''
            print '--- Identifying duplicate sequences ---'
            os.system('seqmagick convert --deduplicate-sequences --deduplicated-sequences-file ' + self.derepSeqFile + ' ' + self.concatenatedAlignFile + ' ' + self.derepConcatenatedAlignFile)

            # infer dereplicated genome tree
            print ''
            print '--- Inferring dereplicated genome tree ---'
            outputLog = self.treeDerepOut[0:self.treeDerepOut.rfind('.')] + '.log'
            # cmd = 'FastTreeMP -nosupport -wag -gamma -log ' + outputLog + ' ' + self.derepConcatenatedAlignFile + ' > ' + self.treeDerepOut
            cmd = 'FastTreeMP -wag -gamma -log ' + outputLog + ' ' + self.derepConcatenatedAlignFile + ' > ' + self.treeDerepOut
            os.system(cmd)

            # root genome tree between archaea and bacteria
            print ''
            print '--- Rooting dereplicated genome tree ---'
            rerootTree = RerootTree()
            rerootTree.run(self.treeDerepOut, self.treeDerepRootedOut)

            # calculate bootstraps for genome tree
            print ''
            print '--- Calculating bootstrap support ---'
            # bootstrapTree = BootstrapTree()
            # bootstrapTree.run(self.bootstrapDir, self.treeDerepRootedOut, self.concatenatedAlignFile, 100, numThreads, self.treeDerepBootstrapOut)

            # os.system('cp ' + self.treeDerepBootstrapOut + ' ' + self.treeDerepFinalOut)

        # just use FastTree support values
        os.system('cp ' + self.treeDerepRootedOut + ' ' + self.treeDerepFinalOut)

        # decorate dereplicated tree with unique IDs and a complementary file indicating properties of each internal node
        print ''
        print '--- Decorating final tree with lineage-specific statistics and marker set information ---'
        decorateTree = DecorateTree()
        decorateTree.decorate(self.treeTaxonomyOut, self.derepSeqFile, self.treeDerepFinalOut, self.treeMetadata, numThreads)