def test_majority_consensus(self): ref_trees = Phylo.parse('./TreeConstruction/majority_ref.tre', 'newick') ref_tree = next(ref_trees) consensus_tree = Consensus.majority_consensus(self.trees) self.assertTrue(Consensus._equal_topology(consensus_tree, ref_tree)) ref_tree = next(ref_trees) consensus_tree = Consensus.majority_consensus(self.trees, 1) self.assertTrue(Consensus._equal_topology(consensus_tree, ref_tree))
def test_majority_consensus(self): ref_trees = Phylo.parse('./TreeConstruction/majority_ref.tre', 'newick') ref_tree = next(ref_trees) consensus_tree = Consensus.majority_consensus(self.trees) self.assertTrue(Consensus._equal_topology(consensus_tree, ref_tree)) ref_tree = next(ref_trees) consensus_tree = Consensus.majority_consensus(self.trees, 1) self.assertTrue(Consensus._equal_topology(consensus_tree, ref_tree))
def test_majority_consensus(self): # three trees # ref_tree = open('./TreeConstruction/majority_ref.tre') ref_tree = list(Phylo.parse("./TreeConstruction/majority_ref.tre", "newick")) consensus_tree = Consensus.majority_consensus(self.trees) # tree_file = StringIO() # Phylo.write(consensus_tree, tree_file, 'newick') self.assertTrue(Consensus._equal_topology(consensus_tree, ref_tree[0])) consensus_tree = Consensus.majority_consensus(self.trees, 1) # tree_file = StringIO() # Phylo.write(consensus_tree, tree_file, 'newick') self.assertTrue(Consensus._equal_topology(consensus_tree, ref_tree[1]))
def test_majority_consensus(self): ref_trees = Phylo.parse("./TreeConstruction/majority_ref.tre", "newick") ref_tree = next(ref_trees) consensus_tree = Consensus.majority_consensus(self.trees) self.assertTrue(Consensus._equal_topology(consensus_tree, ref_tree)) # ref_tree = next(ref_trees) # consensus_tree_mcmc = Consensus.majority_consensus(self.mcmc_trees, mcmc=True) # self.assertTrue(Consensus._equal_topology(consensus_tree_mcmc, ref_tree)) ref_tree = next(ref_trees) consensus_tree = Consensus.majority_consensus(self.trees, 1) self.assertTrue(Consensus._equal_topology(consensus_tree, ref_tree)) consensus_tree_mcmc = Consensus.majority_consensus(self.mcmc_trees, 1, mcmc=True) self.assertTrue(Consensus._equal_topology(consensus_tree_mcmc, ref_tree))
def _calculate_gsi(self): """ Method for calculating Gene Support Indices :return: """ LOGGER.info("Calculating Gene Support Indices (GSIs)" " from the gene trees..") genome_num = 0 bcg_dir = os.path.join(self._dirpath, self.config.bcg_dir) for file in os.listdir(bcg_dir): if file.endswith('.bcg'): genome_num += 1 nwk_file = os.path.join(self._align_output_dir, "all_gene.trees") trees = Phylo.parse(nwk_file, 'newick') tree = Consensus.majority_consensus(trees, cutoff=(100-self.config.gsi_threshold) * genome_num/100) Phylo.draw_ascii(tree) ubcg_gsi_file = os.path.join(self._align_output_dir, f'UBCG_gsi({self._bcg_num}' f'){self.config.postfixes.align_tree_const}') with open(ubcg_gsi_file, 'w') as f: Phylo.write(tree, f, 'newick') LOGGER.info("The final tree marked with GSI was written" " to %s", ubcg_gsi_file)
def bootstrap(self, afbased=True, basename='majorityTree', treebuilder='nj', bootstraps=1000, outgroup=None, useAllLoci=False): """treebuilder could be nj/upgma, outgroup: a population name or 'midpoint'""" ## allLoci: all loci that are variable in at least one population allpolySites, pwm = {True: ['allpolySites', 'pwm'], False: ['allpolySitesVCF', 'pwmVCF']}[afbased] allLoci = set() for pop in self.populations: allLoci = allLoci.union(getattr(pop, allpolySites)) allLoci = list(allLoci) ## sort it? Reduce to independent sites (www.pnas.org/content/93/23/13429, run LD?) sites = len(allLoci) trees = [] print ("Bootstrapping, rounds:", end=' ') for bootstrap in range(bootstraps): ## see also parallelized version print(bootstrap, end=' ') if useAllLoci: selectedLoci = allLoci else: selectedLoci0 = np.random.choice(range(len(allLoci)), sites, replace=True) selectedLoci = [allLoci[l] for l in selectedLoci0] df = pd.DataFrame([pop.bootstrap(selectedLoci, afbased) for pop in self.populations], index=self.popnames) #import pdb; pdb.set_trace() self.dmNei = neiDF(df, [5]*(sites-1)) ## annoying conversion, BioPython couldnt be just more compatible with scipy/pdist? dmTriangular = [list(self.dmNei[i, :(i + 1)]) for i in range(len(self.dmNei))] m = _DistanceMatrix(self.popnames, dmTriangular) constructor = DistanceTreeConstructor() # could've passed treebuilder here too tree = getattr(constructor, treebuilder)(m) if outgroup == 'midpoint': tree.root_at_midpoint() elif not outgroup is None: tree.root_with_outgroup({'name': outgroup}) trees.append(tree) ## use nj! ## debug info: print(f'selectedLoci: {selectedLoci[:30]}') ## see https://biopython.org/wiki/Phylo, turned out to be more suitable than dendropy/sumtrees self.majorityTree = Consensus.majority_consensus(trees) ## also consider strict_consensus and adam_consensus (but they don't have bootstrap support values) treefile = '%s/%s_%s_%s_%s.nwk' %(resultDir, basename, bootstraps,treebuilder, len(self.populations)) Phylo.write(self.majorityTree, treefile, format='newick') print(f'wrote {treefile}') Phylo.draw_ascii(self.majorityTree)
the script parallelBootstrap.py, which dumps a list of trees that were created with BioPython's DistanceTreeConstructor. Alternative consensus construction that BioPython provides: * strict_consensus * adam_consensus They don't provide bootstrap support values, though. #run on HPC #source activate bio3 """ datadir = "/research/btc_bioinformatic/operations/HLA/FreqRT/Data" treebase = "maj_ABC_1_nj_92" if len(sys.path) > 2: treebase = sys.argv[-1] #maj_AB_min95_1_nj_246_0001.pcl treefiles = glob.glob("%s/%s*.pcl" % (datadir, treebase)) trees = [] for treefile in treefiles: with open(treefile, "rb") as tf: trees += pickle.load(tf) majorityTree = Consensus.majority_consensus(trees) Phylo.write(majorityTree, '../Data/%s.nwk' % treebase, format='newick') Phylo.draw_ascii(majorityTree)