def test_majority_consensus(self):
     ref_trees = Phylo.parse('./TreeConstruction/majority_ref.tre', 'newick')
     ref_tree = next(ref_trees)
     consensus_tree = Consensus.majority_consensus(self.trees)
     self.assertTrue(Consensus._equal_topology(consensus_tree, ref_tree))
     ref_tree = next(ref_trees)
     consensus_tree = Consensus.majority_consensus(self.trees, 1)
     self.assertTrue(Consensus._equal_topology(consensus_tree, ref_tree))
Example #2
0
 def test_majority_consensus(self):
     ref_trees = Phylo.parse('./TreeConstruction/majority_ref.tre', 'newick')
     ref_tree = next(ref_trees)
     consensus_tree = Consensus.majority_consensus(self.trees)
     self.assertTrue(Consensus._equal_topology(consensus_tree, ref_tree))
     ref_tree = next(ref_trees)
     consensus_tree = Consensus.majority_consensus(self.trees, 1)
     self.assertTrue(Consensus._equal_topology(consensus_tree, ref_tree))
Example #3
0
 def test_majority_consensus(self):
     # three trees
     # ref_tree = open('./TreeConstruction/majority_ref.tre')
     ref_tree = list(Phylo.parse("./TreeConstruction/majority_ref.tre", "newick"))
     consensus_tree = Consensus.majority_consensus(self.trees)
     # tree_file = StringIO()
     # Phylo.write(consensus_tree, tree_file, 'newick')
     self.assertTrue(Consensus._equal_topology(consensus_tree, ref_tree[0]))
     consensus_tree = Consensus.majority_consensus(self.trees, 1)
     # tree_file = StringIO()
     # Phylo.write(consensus_tree, tree_file, 'newick')
     self.assertTrue(Consensus._equal_topology(consensus_tree, ref_tree[1]))
Example #4
0
 def test_upgma(self):
     tree = self.constructor.upgma(self.dm)
     self.assertTrue(isinstance(tree, BaseTree.Tree))
     # tree_file = StringIO()
     # Phylo.write(tree, tree_file, 'newick')
     ref_tree = Phylo.read("./TreeConstruction/upgma.tre", "newick")
     self.assertTrue(Consensus._equal_topology(tree, ref_tree))
 def test_built_tree(self):
     tree = self.constructor.build_tree(self.aln)
     self.assertTrue(isinstance(tree, BaseTree.Tree))
     # tree_file = StringIO()
     # Phylo.write(tree, tree_file, 'newick')
     ref_tree = Phylo.read('./TreeConstruction/nj.tre', 'newick')
     self.assertTrue(Consensus._equal_topology(tree, ref_tree))
 def test_built_tree(self):
     tree = self.constructor.build_tree(self.aln)
     self.assertTrue(isinstance(tree, BaseTree.Tree))
     # tree_file = StringIO()
     # Phylo.write(tree, tree_file, 'newick')
     ref_tree = Phylo.read('./TreeConstruction/nj.tre', 'newick')
     self.assertTrue(Consensus._equal_topology(tree, ref_tree))
Example #7
0
    def _calculate_gsi(self):
        """
        Method for calculating Gene Support Indices
        :return:
        """
        LOGGER.info("Calculating Gene Support Indices (GSIs)"
                    " from the gene trees..")
        genome_num = 0
        bcg_dir = os.path.join(self._dirpath, self.config.bcg_dir)
        for file in os.listdir(bcg_dir):
            if file.endswith('.bcg'):
                genome_num += 1

        nwk_file = os.path.join(self._align_output_dir, "all_gene.trees")
        trees = Phylo.parse(nwk_file, 'newick')
        tree = Consensus.majority_consensus(trees,
                                            cutoff=(100-self.config.gsi_threshold) * genome_num/100)
        Phylo.draw_ascii(tree)
        ubcg_gsi_file = os.path.join(self._align_output_dir,
                                     f'UBCG_gsi({self._bcg_num}'
                                     f'){self.config.postfixes.align_tree_const}')
        with open(ubcg_gsi_file, 'w') as f:
            Phylo.write(tree, f, 'newick')

        LOGGER.info("The final tree marked with GSI was written"
                    " to %s", ubcg_gsi_file)
 def test_get_support(self):
     support_tree = Consensus.get_support(self.trees[0], self.trees)
     clade = support_tree.common_ancestor([support_tree.find_any(name="Beta"), support_tree.find_any(name="Gamma")])
     self.assertEqual(clade.confidence, 2 * 100.0 / 3)
     clade = support_tree.common_ancestor([support_tree.find_any(name="Alpha"), support_tree.find_any(name="Beta")])
     self.assertEqual(clade.confidence, 3 * 100.0 / 3)
     clade = support_tree.common_ancestor([support_tree.find_any(name="Delta"), support_tree.find_any(name="Epsilon")])
     self.assertEqual(clade.confidence, 2 * 100.0 / 3)
Example #9
0
 def test_bootstrap_consensus(self):
     calculator = DistanceCalculator("blosum62")
     constructor = DistanceTreeConstructor(calculator, "nj")
     tree = Consensus.bootstrap_consensus(self.msa, 100, constructor,
                                          Consensus.majority_consensus)
     self.assertTrue(isinstance(tree, BaseTree.Tree))
     Phylo.write(tree, os.path.join(temp_dir, "bootstrap_consensus.tre"),
                 "newick")
Example #10
0
 def test_get_support(self):
     support_tree = Consensus.get_support(self.trees[0], self.trees)
     clade = support_tree.common_ancestor([support_tree.find_any(name="Beta"), support_tree.find_any(name="Gamma")])
     self.assertEqual(clade.confidence, 2 * 100.0 / 3)
     clade = support_tree.common_ancestor([support_tree.find_any(name="Alpha"), support_tree.find_any(name="Beta")])
     self.assertEqual(clade.confidence, 3 * 100.0 / 3)
     clade = support_tree.common_ancestor([support_tree.find_any(name="Delta"), support_tree.find_any(name="Epsilon")])
     self.assertEqual(clade.confidence, 2 * 100.0 / 3)
Example #11
0
 def test_bootstrap_consensus(self):
     calculator = DistanceCalculator('blosum62')
     constructor = DistanceTreeConstructor(calculator, 'nj')
     tree = Consensus.bootstrap_consensus(self.msa, 100, constructor,
                                          Consensus.majority_consensus)
     self.assertTrue(isinstance(tree, BaseTree.Tree))
     Phylo.write(tree, './TreeConstruction/bootstrap_consensus.tre',
                 'newick')
 def test_count_clades(self):
     bitstr_counts = Consensus._count_clades(self.trees)
     self.assertEqual(len(bitstr_counts), 6)
     self.assertEqual(bitstr_counts[_BitString('11111')][0], 3)
     self.assertEqual(bitstr_counts[_BitString('11000')][0], 2)
     self.assertEqual(bitstr_counts[_BitString('00111')][0], 3)
     self.assertEqual(bitstr_counts[_BitString('00110')][0], 2)
     self.assertEqual(bitstr_counts[_BitString('00011')][0], 1)
     self.assertEqual(bitstr_counts[_BitString('01111')][0], 1)
Example #13
0
 def test_count_clades(self):
     bitstr_counts = Consensus._count_clades(self.trees)
     self.assertEqual(len(bitstr_counts), 6)
     self.assertEqual(bitstr_counts[_BitString('11111')][0], 3)
     self.assertEqual(bitstr_counts[_BitString('11000')][0], 2)
     self.assertEqual(bitstr_counts[_BitString('00111')][0], 3)
     self.assertEqual(bitstr_counts[_BitString('00110')][0], 2)
     self.assertEqual(bitstr_counts[_BitString('00011')][0], 1)
     self.assertEqual(bitstr_counts[_BitString('01111')][0], 1)
Example #14
0
 def test_count_clades(self):
     bitstr_counts, len_trees = Consensus._count_clades(self.trees)
     self.assertEqual(len_trees, len(self.trees))
     self.assertEqual(len(bitstr_counts), 6)
     self.assertEqual(bitstr_counts[_BitString("11111")][0], 3)
     self.assertEqual(bitstr_counts[_BitString("11000")][0], 2)
     self.assertEqual(bitstr_counts[_BitString("00111")][0], 3)
     self.assertEqual(bitstr_counts[_BitString("00110")][0], 2)
     self.assertEqual(bitstr_counts[_BitString("00011")][0], 1)
     self.assertEqual(bitstr_counts[_BitString("01111")][0], 1)
    def test_nj(self):
        tree = self.constructor.nj(self.dm)
        self.assertTrue(isinstance(tree, BaseTree.Tree))
        # tree_file = StringIO()
        # Phylo.write(tree, tree_file, 'newick')
        ref_tree = Phylo.read('./TreeConstruction/nj.tre', 'newick')
        self.assertTrue(Consensus._equal_topology(tree, ref_tree))
        # ref_tree.close()

        # create a matrix of length 2
        calculator = DistanceCalculator('blosum62')
        self.min_dm = calculator.get_distance(self.aln)
        for i in range(len(self.min_dm) - 2):
            del self.min_dm[len(self.min_dm) - 1]

        min_tree = self.constructor.nj(self.min_dm)
        self.assertTrue(isinstance(min_tree, BaseTree.Tree))

        ref_min_tree = Phylo.read('./TreeConstruction/nj_min.tre', 'newick')
        self.assertTrue(Consensus._equal_topology(min_tree, ref_min_tree))
Example #16
0
    def test_nj(self):
        tree = self.constructor.nj(self.dm)
        self.assertTrue(isinstance(tree, BaseTree.Tree))
        # tree_file = StringIO()
        # Phylo.write(tree, tree_file, 'newick')
        ref_tree = Phylo.read("./TreeConstruction/nj.tre", "newick")
        self.assertTrue(Consensus._equal_topology(tree, ref_tree))
        # ref_tree.close()

        # create a matrix of length 2
        calculator = DistanceCalculator("blosum62")
        self.min_dm = calculator.get_distance(self.aln)
        for i in range(len(self.min_dm) - 2):
            del self.min_dm[len(self.min_dm) - 1]

        min_tree = self.constructor.nj(self.min_dm)
        self.assertTrue(isinstance(min_tree, BaseTree.Tree))

        ref_min_tree = Phylo.read("./TreeConstruction/nj_min.tre", "newick")
        self.assertTrue(Consensus._equal_topology(min_tree, ref_min_tree))
Example #17
0
 def test_adam_consensus(self):
     ref_trees = list(Phylo.parse("./TreeConstruction/adam_refs.tre", "newick"))
     # three trees
     consensus_tree = Consensus.adam_consensus(self.trees)
     # tree_file = '/home/yeyanbo/adam.tres'
     # tree_file = StringIO()
     # Phylo.write(consensus_tree, tree_file, 'newick')
     self.assertTrue(Consensus._equal_topology(consensus_tree, ref_trees[0]))
     consensus_tree_mcmc = Consensus.adam_consensus(self.mcmc_trees, mcmc=True)
     self.assertTrue(Consensus._equal_topology(consensus_tree_mcmc, ref_trees[0]))
     # tree 1 and tree 2
     consensus_tree = Consensus.adam_consensus(self.trees[:2])
     # tree_file = StringIO()
     # Phylo.write(consensus_tree, tree_file, 'newick')
     self.assertTrue(Consensus._equal_topology(consensus_tree, ref_trees[1]))
     consensus_tree_mcmc = Consensus.adam_consensus(self.mcmc_trees[:2], mcmc=True)
     # tree 1 and tree 3
     consensus_tree = Consensus.adam_consensus(self.trees[::2])
     # tree_file = StringIO()
     # Phylo.write(consensus_tree, tree_file, 'newick')
     self.assertTrue(Consensus._equal_topology(consensus_tree, ref_trees[2]))
Example #18
0
 def test_majority_consensus(self):
     ref_trees = Phylo.parse("./TreeConstruction/majority_ref.tre", "newick")
     ref_tree = next(ref_trees)
     consensus_tree = Consensus.majority_consensus(self.trees)
     self.assertTrue(Consensus._equal_topology(consensus_tree, ref_tree))
     # ref_tree = next(ref_trees)
     # consensus_tree_mcmc = Consensus.majority_consensus(self.mcmc_trees, mcmc=True)
     # self.assertTrue(Consensus._equal_topology(consensus_tree_mcmc, ref_tree))
     ref_tree = next(ref_trees)
     consensus_tree = Consensus.majority_consensus(self.trees, 1)
     self.assertTrue(Consensus._equal_topology(consensus_tree, ref_tree))
     consensus_tree_mcmc = Consensus.majority_consensus(self.mcmc_trees, 1, mcmc=True)
     self.assertTrue(Consensus._equal_topology(consensus_tree_mcmc, ref_tree))
Example #19
0
    def bootstrap(self, afbased=True, basename='majorityTree', treebuilder='nj', bootstraps=1000, outgroup=None, useAllLoci=False):
        """treebuilder could be nj/upgma, outgroup: a population name or 'midpoint'"""
        ## allLoci: all loci that are variable in at least one population
        allpolySites, pwm = {True:  ['allpolySites', 'pwm'],
                             False: ['allpolySitesVCF', 'pwmVCF']}[afbased]
        allLoci = set()
        for pop in self.populations:
            allLoci = allLoci.union(getattr(pop, allpolySites))
        allLoci = list(allLoci) ## sort it? Reduce to independent sites (www.pnas.org/content/93/23/13429, run LD?)
        
        sites = len(allLoci)
        trees = []
        print ("Bootstrapping, rounds:", end=' ')
        for bootstrap in range(bootstraps): ## see also parallelized version
            print(bootstrap, end=' ')
            if useAllLoci:
                selectedLoci = allLoci
            else:
                selectedLoci0 = np.random.choice(range(len(allLoci)), sites, replace=True)
                selectedLoci = [allLoci[l] for l in selectedLoci0]
            df = pd.DataFrame([pop.bootstrap(selectedLoci, afbased) for pop in self.populations], index=self.popnames)
            #import pdb; pdb.set_trace()
            self.dmNei = neiDF(df, [5]*(sites-1))
            ## annoying conversion, BioPython couldnt be just more compatible with scipy/pdist?
            dmTriangular = [list(self.dmNei[i, :(i + 1)]) for i in range(len(self.dmNei))]

            m = _DistanceMatrix(self.popnames, dmTriangular)
            constructor = DistanceTreeConstructor() # could've passed treebuilder here too
            tree = getattr(constructor, treebuilder)(m)
            if outgroup == 'midpoint':
                tree.root_at_midpoint()
            elif not outgroup is None:
                tree.root_with_outgroup({'name': outgroup})
            trees.append(tree) ## use nj!
        ## debug info:
        print(f'selectedLoci: {selectedLoci[:30]}')
        ## see https://biopython.org/wiki/Phylo, turned out to be more suitable than dendropy/sumtrees
        self.majorityTree = Consensus.majority_consensus(trees) ## also consider strict_consensus and adam_consensus (but they don't have bootstrap support values)
        treefile = '%s/%s_%s_%s_%s.nwk' %(resultDir, basename, bootstraps,treebuilder, len(self.populations))
        Phylo.write(self.majorityTree, treefile, format='newick')
        print(f'wrote {treefile}')
        Phylo.draw_ascii(self.majorityTree)
 def test_strict_consensus(self):
     ref_trees = list(Phylo.parse('./TreeConstruction/strict_refs.tre', 'newick'))
     # three trees
     consensus_tree = Consensus.strict_consensus(self.trees)
     #tree_file = StringIO()
     #Phylo.write(consensus_tree, tree_file, 'newick')
     self.assertTrue(Consensus._equal_topology(consensus_tree, ref_trees[0]))
     # tree 1 and tree 2
     consensus_tree = Consensus.strict_consensus(self.trees[:2])
     #tree_file = StringIO()
     #Phylo.write(consensus_tree, tree_file, 'newick')
     self.assertTrue(Consensus._equal_topology(consensus_tree, ref_trees[1]))
     # tree 1 and tree 3
     consensus_tree = Consensus.strict_consensus(self.trees[::2])
     #tree_file = StringIO()
     #Phylo.write(consensus_tree, tree_file, 'newick')
     self.assertTrue(Consensus._equal_topology(consensus_tree, ref_trees[2]))
Example #21
0
 def test_strict_consensus(self):
     ref_trees = list(Phylo.parse('./TreeConstruction/strict_refs.tre', 'newick'))
     # three trees
     consensus_tree = Consensus.strict_consensus(self.trees)
     # tree_file = StringIO()
     # Phylo.write(consensus_tree, tree_file, 'newick')
     self.assertTrue(Consensus._equal_topology(consensus_tree, ref_trees[0]))
     # tree 1 and tree 2
     consensus_tree = Consensus.strict_consensus(self.trees[:2])
     # tree_file = StringIO()
     # Phylo.write(consensus_tree, tree_file, 'newick')
     self.assertTrue(Consensus._equal_topology(consensus_tree, ref_trees[1]))
     # tree 1 and tree 3
     consensus_tree = Consensus.strict_consensus(self.trees[::2])
     # tree_file = StringIO()
     # Phylo.write(consensus_tree, tree_file, 'newick')
     self.assertTrue(Consensus._equal_topology(consensus_tree, ref_trees[2]))
Example #22
0
 def test_adam_consensus(self):
     # ref_trees = open('./TreeConstruction/adam_refs.tre')
     ref_trees = list(Phylo.parse("./TreeConstruction/adam_refs.tre", "newick"))
     # three trees
     consensus_tree = Consensus.adam_consensus(self.trees)
     # tree_file = '/home/yeyanbo/adam.tres'
     # tree_file = StringIO()
     # Phylo.write(consensus_tree, tree_file, 'newick')
     self.assertTrue(Consensus._equal_topology(consensus_tree, ref_trees[0]))
     # tree 1 and tree 2
     consensus_tree = Consensus.adam_consensus(self.trees[:2])
     # tree_file = StringIO()
     # Phylo.write(consensus_tree, tree_file, 'newick')
     self.assertTrue(Consensus._equal_topology(consensus_tree, ref_trees[1]))
     # tree 1 and tree 3
     consensus_tree = Consensus.adam_consensus(self.trees[::2])
     # tree_file = StringIO()
     # Phylo.write(consensus_tree, tree_file, 'newick')
     self.assertTrue(Consensus._equal_topology(consensus_tree, ref_trees[2]))
Example #23
0
 def test_bootstrap_consensus(self):
     calculator = DistanceCalculator('blosum62')
     constructor = DistanceTreeConstructor(calculator, 'nj')
     tree = Consensus.bootstrap_consensus(self.msa, 100, constructor, Consensus.majority_consensus)
     self.assertTrue(isinstance(tree, BaseTree.Tree))
     Phylo.write(tree, os.path.join(temp_dir, 'bootstrap_consensus.tre'), 'newick')
Example #24
0
 def test_bootstrap_trees(self):
     calculator = DistanceCalculator('blosum62')
     constructor = DistanceTreeConstructor(calculator)
     trees = list(Consensus.bootstrap_trees(self.msa, 100, constructor))
     self.assertEqual(len(trees), 100)
     self.assertTrue(isinstance(trees[0], BaseTree.Tree))
Example #25
0
 def test_bootstrap(self):
     msa_list = list(Consensus.bootstrap(self.msa, 100))
     self.assertEqual(len(msa_list), 100)
     self.assertEqual(len(msa_list[0]), len(self.msa))
     self.assertEqual(len(msa_list[0][0]), len(self.msa[0]))
Example #26
0
 def test_maximum_clade_probability_consensus(self):
     ref_trees = list(Phylo.parse("./TreeConstruction/maximum_clade_probability_ref.tre", "newick"))
     consensus_tree = Consensus.maximum_clade_probability_consensus(self.trees)[0]
     self.assertTrue(Consensus._equal_topology(consensus_tree, ref_trees[0]))
     consensus_tree_mcmc = Consensus.maximum_clade_probability_consensus(self.mcmc_trees, mcmc=True)[0]
     self.assertTrue(Consensus._equal_topology(consensus_tree_mcmc, ref_trees[0]))
Example #27
0
the script parallelBootstrap.py, which dumps a list of trees that were created with BioPython's
DistanceTreeConstructor. 

Alternative consensus construction that BioPython provides:
 * strict_consensus
 * adam_consensus
They don't provide bootstrap support values, though.

#run on HPC
#source activate bio3
"""

datadir = "/research/btc_bioinformatic/operations/HLA/FreqRT/Data"

treebase = "maj_ABC_1_nj_92"
if len(sys.path) > 2:
    treebase = sys.argv[-1]
#maj_AB_min95_1_nj_246_0001.pcl

treefiles = glob.glob("%s/%s*.pcl" % (datadir, treebase))
trees = []
for treefile in treefiles:
    with open(treefile, "rb") as tf:
        trees += pickle.load(tf)

majorityTree = Consensus.majority_consensus(trees)

Phylo.write(majorityTree, '../Data/%s.nwk' % treebase, format='newick')

Phylo.draw_ascii(majorityTree)
 def test_bootstrap_trees(self):
     calculator = DistanceCalculator('blosum62')
     constructor = DistanceTreeConstructor(calculator)
     trees = list(Consensus.bootstrap_trees(self.msa, 100, constructor))
     self.assertEqual(len(trees), 100)
     self.assertTrue(isinstance(trees[0], BaseTree.Tree))
 def test_bootstrap(self):
     msa_list = list(Consensus.bootstrap(self.msa, 100))
     self.assertEqual(len(msa_list), 100)
     self.assertEqual(len(msa_list[0]), len(self.msa))
     self.assertEqual(len(msa_list[0][0]), len(self.msa[0]))
Example #30
0
 def test_bootstrap_consensus(self):
     calculator = DistanceCalculator("blosum62")
     constructor = DistanceTreeConstructor(calculator, "nj")
     tree = Consensus.bootstrap_consensus(self.msa, 100, constructor, Consensus.majority_consensus)
     self.assertTrue(isinstance(tree, BaseTree.Tree))
     Phylo.write(tree, "./TreeConstruction/bootstrap_consensus.tre", "newick")