def run(self, numThreads, outgroupSize): # identify genes suitable for phylogenetic inference print '--- Identifying genes suitable for phylogenetic inference ---' genomeIds = self.inferGeneTrees(self.phyloUbiquity, self.phyloSingleCopy, numThreads, self.alignmentDir, self.hmmDir, outgroupSize) # infer gene trees print '' print '--- Inferring gene trees ---' makeTrees = MakeTrees() makeTrees.run(self.alignmentDir, self.geneTreeDir, '.aln.masked.faa', numThreads) # test gene trees for paralogs print '' print '--- Testing for paralogs in gene trees ---' paralogTest = ParalogTest() paralogTest.run(self.geneTreeDir, self.paralogAcceptPer, '.tre', self.conspecificGeneTreeDir) # test gene trees for consistency with IMG taxonomy print '' print '--- Testing taxonomic consistency of gene trees ---' consistencyTest = ConsistencyTest() consistencyTest.run(self.conspecificGeneTreeDir, '.tre', self.consistencyAcceptPer, self.consistencyMinTaxa, self.consistencyOut, self.finalGeneTreeDir) # gather phylogenetically informative HMMs into a single model file print '' print '--- Gathering phylogenetically informative HMMs ---' getPhylogeneticHMMs = GetPhylogeneticHMMs() getPhylogeneticHMMs.run(self.hmmDir, self.finalGeneTreeDir, self.phyloHMMsOut) # infer genome tree print '' print '--- Inferring full genome tree ---' inferGenomeTree = InferGenomeTree() inferGenomeTree.run(self.finalGeneTreeDir, self.alignmentDir, '.aln.masked.faa', self.concatenatedAlignFile, self.treeOut, self.taxonomyOut, bSupportValues = True) # replace IMG identifiers with ACE identifiers imgIdToAceId = self.imgIdsToAceIds(genomeIds) with open(self.treeOut) as f: tree = ''.join(f.readlines()) for genomeId in genomeIds: if genomeId in imgIdToAceId: tree = tree.replace('IMG_' + genomeId, imgIdToAceId[genomeId]) fout = open(self.treeOutAce, 'w') fout.write(tree) fout.close()
def run(self, numThreads): # identify genes suitable for phylogenetic inference if False: print( '--- Identifying genes suitable for phylogenetic inference ---' ) phylogeneticInferenceGenes = PhylogeneticInferenceGenes() phylogeneticInferenceGenes.run(self.phyloUbiquity, self.phyloSingleCopy, numThreads, self.alignmentDir, self.hmmDir) # infer gene trees print('') print('--- Inferring gene trees ---') makeTrees = MakeTrees() makeTrees.run(self.alignmentDir, self.geneTreeDir, '.aln.masked.faa', numThreads) # test gene trees for paralogs print('') print('--- Testing for paralogs in gene trees ---') paralogTest = ParalogTest() paralogTest.run(self.geneTreeDir, self.paralogAcceptPer, '.tre', self.conspecificGeneTreeDir) sys.exit() # test gene trees for consistency with IMG taxonomy print('') print('--- Testing taxonomic consistency of gene trees ---') consistencyTest = ConsistencyTest() consistencyTest.run(self.conspecificGeneTreeDir, '.tre', self.consistencyAcceptPer, self.consistencyMinTaxa, self.consistencyOut, self.finalGeneTreeDir) # gather phylogenetically informative HMMs into a single model file print('') print('--- Gathering phylogenetically informative HMMs ---') getPhylogeneticHMMs = GetPhylogeneticHMMs() getPhylogeneticHMMs.run(self.hmmDir, self.finalGeneTreeDir, self.phyloHMMsOut) # infer genome tree print('') print('--- Inferring full genome tree ---') inferGenomeTree = InferGenomeTree() inferGenomeTree.run(self.finalGeneTreeDir, self.alignmentDir, '.aln.masked.faa', self.concatenatedAlignFile, self.treeOut, self.taxonomyOut) # root genome tree between archaea and bacteria print('') print('--- Rooting full genome tree ---') rerootTree = RerootTree() rerootTree.run(self.treeOut, self.treeRootedOut) # decorate genome tree with taxonomy using nlevel from tax2tree print('') print( '--- Decorating full genome tree with taxonomic information using tax2tree ---' ) os.system('t2t decorate -t %s -m %s -o %s' % (self.treeRootedOut, self.taxonomyOut, self.treeTaxonomyOut)) if False: # dereplicate identical sequences print('') print('--- Identifying duplicate sequences ---') os.system( 'seqmagick convert --deduplicate-sequences --deduplicated-sequences-file ' + self.derepSeqFile + ' ' + self.concatenatedAlignFile + ' ' + self.derepConcatenatedAlignFile) # infer dereplicated genome tree print('') print('--- Inferring dereplicated genome tree ---') outputLog = self.treeDerepOut[0:self.treeDerepOut.rfind('.' )] + '.log' # cmd = 'FastTreeMP -nosupport -wag -gamma -log ' + outputLog + ' ' + self.derepConcatenatedAlignFile + ' > ' + self.treeDerepOut cmd = 'FastTreeMP -wag -gamma -log ' + outputLog + ' ' + self.derepConcatenatedAlignFile + ' > ' + self.treeDerepOut os.system(cmd) # root genome tree between archaea and bacteria print('') print('--- Rooting dereplicated genome tree ---') rerootTree = RerootTree() rerootTree.run(self.treeDerepOut, self.treeDerepRootedOut) # calculate bootstraps for genome tree print('') print('--- Calculating bootstrap support ---') # bootstrapTree = BootstrapTree() # bootstrapTree.run(self.bootstrapDir, self.treeDerepRootedOut, self.concatenatedAlignFile, 100, numThreads, self.treeDerepBootstrapOut) # os.system('cp ' + self.treeDerepBootstrapOut + ' ' + self.treeDerepFinalOut) # just use FastTree support values os.system('cp ' + self.treeDerepRootedOut + ' ' + self.treeDerepFinalOut) # decorate dereplicated tree with unique IDs and a complementary file indicating properties of each internal node print('') print( '--- Decorating final tree with lineage-specific statistics and marker set information ---' ) decorateTree = DecorateTree() decorateTree.decorate(self.treeTaxonomyOut, self.derepSeqFile, self.treeDerepFinalOut, self.treeMetadata, numThreads)
def run(self, numThreads, outgroupSize): # identify genes suitable for phylogenetic inference print '--- Identifying genes suitable for phylogenetic inference ---' genomeIds = self.inferGeneTrees(self.phyloUbiquity, self.phyloSingleCopy, numThreads, self.alignmentDir, self.hmmDir, outgroupSize) # infer gene trees print '' print '--- Inferring gene trees ---' makeTrees = MakeTrees() makeTrees.run(self.alignmentDir, self.geneTreeDir, '.aln.masked.faa', numThreads) # test gene trees for paralogs print '' print '--- Testing for paralogs in gene trees ---' paralogTest = ParalogTest() paralogTest.run(self.geneTreeDir, self.paralogAcceptPer, '.tre', self.conspecificGeneTreeDir) # test gene trees for consistency with IMG taxonomy print '' print '--- Testing taxonomic consistency of gene trees ---' consistencyTest = ConsistencyTest() consistencyTest.run(self.conspecificGeneTreeDir, '.tre', self.consistencyAcceptPer, self.consistencyMinTaxa, self.consistencyOut, self.finalGeneTreeDir) # gather phylogenetically informative HMMs into a single model file print '' print '--- Gathering phylogenetically informative HMMs ---' getPhylogeneticHMMs = GetPhylogeneticHMMs() getPhylogeneticHMMs.run(self.hmmDir, self.finalGeneTreeDir, self.phyloHMMsOut) # infer genome tree print '' print '--- Inferring full genome tree ---' inferGenomeTree = InferGenomeTree() inferGenomeTree.run(self.finalGeneTreeDir, self.alignmentDir, '.aln.masked.faa', self.concatenatedAlignFile, self.treeOut, self.taxonomyOut, bSupportValues=True) # replace IMG identifiers with ACE identifiers imgIdToAceId = self.imgIdsToAceIds(genomeIds) with open(self.treeOut) as f: tree = ''.join(f.readlines()) for genomeId in genomeIds: if genomeId in imgIdToAceId: tree = tree.replace('IMG_' + genomeId, imgIdToAceId[genomeId]) fout = open(self.treeOutAce, 'w') fout.write(tree) fout.close()
def run(self, numThreads): # identify genes suitable for phylogenetic inference if False: print '--- Identifying genes suitable for phylogenetic inference ---' phylogeneticInferenceGenes = PhylogeneticInferenceGenes() phylogeneticInferenceGenes.run(self.phyloUbiquity, self.phyloSingleCopy, numThreads, self.alignmentDir, self.hmmDir) # infer gene trees print '' print '--- Inferring gene trees ---' makeTrees = MakeTrees() makeTrees.run(self.alignmentDir, self.geneTreeDir, '.aln.masked.faa', numThreads) # test gene trees for paralogs print '' print '--- Testing for paralogs in gene trees ---' paralogTest = ParalogTest() paralogTest.run(self.geneTreeDir, self.paralogAcceptPer, '.tre', self.conspecificGeneTreeDir) sys.exit() # test gene trees for consistency with IMG taxonomy print '' print '--- Testing taxonomic consistency of gene trees ---' consistencyTest = ConsistencyTest() consistencyTest.run(self.conspecificGeneTreeDir, '.tre', self.consistencyAcceptPer, self.consistencyMinTaxa, self.consistencyOut, self.finalGeneTreeDir) # gather phylogenetically informative HMMs into a single model file print '' print '--- Gathering phylogenetically informative HMMs ---' getPhylogeneticHMMs = GetPhylogeneticHMMs() getPhylogeneticHMMs.run(self.hmmDir, self.finalGeneTreeDir, self.phyloHMMsOut) # infer genome tree print '' print '--- Inferring full genome tree ---' inferGenomeTree = InferGenomeTree() inferGenomeTree.run(self.finalGeneTreeDir, self.alignmentDir, '.aln.masked.faa', self.concatenatedAlignFile, self.treeOut, self.taxonomyOut) # root genome tree between archaea and bacteria print '' print '--- Rooting full genome tree ---' rerootTree = RerootTree() rerootTree.run(self.treeOut, self.treeRootedOut) # decorate genome tree with taxonomy using nlevel from tax2tree print '' print '--- Decorating full genome tree with taxonomic information using tax2tree ---' os.system('t2t decorate -t %s -m %s -o %s' % (self.treeRootedOut, self.taxonomyOut, self.treeTaxonomyOut)) if False: # dereplicate identical sequences print '' print '--- Identifying duplicate sequences ---' os.system('seqmagick convert --deduplicate-sequences --deduplicated-sequences-file ' + self.derepSeqFile + ' ' + self.concatenatedAlignFile + ' ' + self.derepConcatenatedAlignFile) # infer dereplicated genome tree print '' print '--- Inferring dereplicated genome tree ---' outputLog = self.treeDerepOut[0:self.treeDerepOut.rfind('.')] + '.log' # cmd = 'FastTreeMP -nosupport -wag -gamma -log ' + outputLog + ' ' + self.derepConcatenatedAlignFile + ' > ' + self.treeDerepOut cmd = 'FastTreeMP -wag -gamma -log ' + outputLog + ' ' + self.derepConcatenatedAlignFile + ' > ' + self.treeDerepOut os.system(cmd) # root genome tree between archaea and bacteria print '' print '--- Rooting dereplicated genome tree ---' rerootTree = RerootTree() rerootTree.run(self.treeDerepOut, self.treeDerepRootedOut) # calculate bootstraps for genome tree print '' print '--- Calculating bootstrap support ---' # bootstrapTree = BootstrapTree() # bootstrapTree.run(self.bootstrapDir, self.treeDerepRootedOut, self.concatenatedAlignFile, 100, numThreads, self.treeDerepBootstrapOut) # os.system('cp ' + self.treeDerepBootstrapOut + ' ' + self.treeDerepFinalOut) # just use FastTree support values os.system('cp ' + self.treeDerepRootedOut + ' ' + self.treeDerepFinalOut) # decorate dereplicated tree with unique IDs and a complementary file indicating properties of each internal node print '' print '--- Decorating final tree with lineage-specific statistics and marker set information ---' decorateTree = DecorateTree() decorateTree.decorate(self.treeTaxonomyOut, self.derepSeqFile, self.treeDerepFinalOut, self.treeMetadata, numThreads)