def run(self, numThreads, outgroupSize):

        # identify genes suitable for phylogenetic inference
        print '--- Identifying genes suitable for phylogenetic inference ---'
        genomeIds = self.inferGeneTrees(self.phyloUbiquity, self.phyloSingleCopy, numThreads, self.alignmentDir, self.hmmDir, outgroupSize)

        # infer gene trees
        print ''
        print '--- Inferring gene trees ---'
        makeTrees = MakeTrees()
        makeTrees.run(self.alignmentDir, self.geneTreeDir, '.aln.masked.faa', numThreads)

        # test gene trees for paralogs
        print ''
        print '--- Testing for paralogs in gene trees ---'
        paralogTest = ParalogTest()
        paralogTest.run(self.geneTreeDir, self.paralogAcceptPer, '.tre', self.conspecificGeneTreeDir)

        # test gene trees for consistency with IMG taxonomy
        print ''
        print '--- Testing taxonomic consistency of gene trees ---'
        consistencyTest = ConsistencyTest()
        consistencyTest.run(self.conspecificGeneTreeDir, '.tre', self.consistencyAcceptPer, self.consistencyMinTaxa, self.consistencyOut, self.finalGeneTreeDir)

        # gather phylogenetically informative HMMs into a single model file
        print ''
        print '--- Gathering phylogenetically informative HMMs ---'
        getPhylogeneticHMMs = GetPhylogeneticHMMs()
        getPhylogeneticHMMs.run(self.hmmDir, self.finalGeneTreeDir, self.phyloHMMsOut)

        # infer genome tree
        print ''
        print '--- Inferring full genome tree ---'
        inferGenomeTree = InferGenomeTree()
        inferGenomeTree.run(self.finalGeneTreeDir, self.alignmentDir, '.aln.masked.faa', self.concatenatedAlignFile, self.treeOut, self.taxonomyOut, bSupportValues = True)

        # replace IMG identifiers with ACE identifiers
        imgIdToAceId = self.imgIdsToAceIds(genomeIds)
        with open(self.treeOut) as f:
            tree = ''.join(f.readlines())

            for genomeId in genomeIds:
                if genomeId in imgIdToAceId:
                    tree = tree.replace('IMG_' + genomeId, imgIdToAceId[genomeId])

        fout = open(self.treeOutAce, 'w')
        fout.write(tree)
        fout.close()
Esempio n. 2
0
    def run(self, numThreads):
        # identify genes suitable for phylogenetic inference
        if False:
            print '--- Identifying genes suitable for phylogenetic inference ---'
            phylogeneticInferenceGenes = PhylogeneticInferenceGenes()
            phylogeneticInferenceGenes.run(self.phyloUbiquity, self.phyloSingleCopy, numThreads, self.alignmentDir, self.hmmDir)

            # infer gene trees
            print ''
            print '--- Inferring gene trees ---'
            makeTrees = MakeTrees()
            makeTrees.run(self.alignmentDir, self.geneTreeDir, '.aln.masked.faa', numThreads)

            # test gene trees for paralogs
            print ''
            print '--- Testing for paralogs in gene trees ---'
            paralogTest = ParalogTest()
            paralogTest.run(self.geneTreeDir, self.paralogAcceptPer, '.tre', self.conspecificGeneTreeDir)

            sys.exit()

            # test gene trees for consistency with IMG taxonomy
            print ''
            print '--- Testing taxonomic consistency of gene trees ---'
            consistencyTest = ConsistencyTest()
            consistencyTest.run(self.conspecificGeneTreeDir, '.tre', self.consistencyAcceptPer, self.consistencyMinTaxa, self.consistencyOut, self.finalGeneTreeDir)

            # gather phylogenetically informative HMMs into a single model file
            print ''
            print '--- Gathering phylogenetically informative HMMs ---'
            getPhylogeneticHMMs = GetPhylogeneticHMMs()
            getPhylogeneticHMMs.run(self.hmmDir, self.finalGeneTreeDir, self.phyloHMMsOut)

            # infer genome tree
            print ''
            print '--- Inferring full genome tree ---'
            inferGenomeTree = InferGenomeTree()
            inferGenomeTree.run(self.finalGeneTreeDir, self.alignmentDir, '.aln.masked.faa', self.concatenatedAlignFile, self.treeOut, self.taxonomyOut)

            # root genome tree between archaea and bacteria
            print ''
            print '--- Rooting full genome tree ---'
            rerootTree = RerootTree()
            rerootTree.run(self.treeOut, self.treeRootedOut)

        # decorate genome tree with taxonomy using nlevel from tax2tree
        print ''
        print '--- Decorating full genome tree with taxonomic information using tax2tree ---'
        os.system('t2t decorate -t %s -m %s -o %s' % (self.treeRootedOut, self.taxonomyOut, self.treeTaxonomyOut))

        if False:
            # dereplicate identical sequences
            print ''
            print '--- Identifying duplicate sequences ---'
            os.system('seqmagick convert --deduplicate-sequences --deduplicated-sequences-file ' + self.derepSeqFile + ' ' + self.concatenatedAlignFile + ' ' + self.derepConcatenatedAlignFile)

            # infer dereplicated genome tree
            print ''
            print '--- Inferring dereplicated genome tree ---'
            outputLog = self.treeDerepOut[0:self.treeDerepOut.rfind('.')] + '.log'
            # cmd = 'FastTreeMP -nosupport -wag -gamma -log ' + outputLog + ' ' + self.derepConcatenatedAlignFile + ' > ' + self.treeDerepOut
            cmd = 'FastTreeMP -wag -gamma -log ' + outputLog + ' ' + self.derepConcatenatedAlignFile + ' > ' + self.treeDerepOut
            os.system(cmd)

            # root genome tree between archaea and bacteria
            print ''
            print '--- Rooting dereplicated genome tree ---'
            rerootTree = RerootTree()
            rerootTree.run(self.treeDerepOut, self.treeDerepRootedOut)

            # calculate bootstraps for genome tree
            print ''
            print '--- Calculating bootstrap support ---'
            # bootstrapTree = BootstrapTree()
            # bootstrapTree.run(self.bootstrapDir, self.treeDerepRootedOut, self.concatenatedAlignFile, 100, numThreads, self.treeDerepBootstrapOut)

            # os.system('cp ' + self.treeDerepBootstrapOut + ' ' + self.treeDerepFinalOut)

        # just use FastTree support values
        os.system('cp ' + self.treeDerepRootedOut + ' ' + self.treeDerepFinalOut)

        # decorate dereplicated tree with unique IDs and a complementary file indicating properties of each internal node
        print ''
        print '--- Decorating final tree with lineage-specific statistics and marker set information ---'
        decorateTree = DecorateTree()
        decorateTree.decorate(self.treeTaxonomyOut, self.derepSeqFile, self.treeDerepFinalOut, self.treeMetadata, numThreads)