def test_majority_consensus(self):
     ref_trees = Phylo.parse('./TreeConstruction/majority_ref.tre', 'newick')
     ref_tree = next(ref_trees)
     consensus_tree = Consensus.majority_consensus(self.trees)
     self.assertTrue(Consensus._equal_topology(consensus_tree, ref_tree))
     ref_tree = next(ref_trees)
     consensus_tree = Consensus.majority_consensus(self.trees, 1)
     self.assertTrue(Consensus._equal_topology(consensus_tree, ref_tree))
Esempio n. 2
0
 def test_majority_consensus(self):
     ref_trees = Phylo.parse('./TreeConstruction/majority_ref.tre', 'newick')
     ref_tree = next(ref_trees)
     consensus_tree = Consensus.majority_consensus(self.trees)
     self.assertTrue(Consensus._equal_topology(consensus_tree, ref_tree))
     ref_tree = next(ref_trees)
     consensus_tree = Consensus.majority_consensus(self.trees, 1)
     self.assertTrue(Consensus._equal_topology(consensus_tree, ref_tree))
Esempio n. 3
0
 def test_majority_consensus(self):
     # three trees
     # ref_tree = open('./TreeConstruction/majority_ref.tre')
     ref_tree = list(Phylo.parse("./TreeConstruction/majority_ref.tre", "newick"))
     consensus_tree = Consensus.majority_consensus(self.trees)
     # tree_file = StringIO()
     # Phylo.write(consensus_tree, tree_file, 'newick')
     self.assertTrue(Consensus._equal_topology(consensus_tree, ref_tree[0]))
     consensus_tree = Consensus.majority_consensus(self.trees, 1)
     # tree_file = StringIO()
     # Phylo.write(consensus_tree, tree_file, 'newick')
     self.assertTrue(Consensus._equal_topology(consensus_tree, ref_tree[1]))
Esempio n. 4
0
 def test_majority_consensus(self):
     ref_trees = Phylo.parse("./TreeConstruction/majority_ref.tre", "newick")
     ref_tree = next(ref_trees)
     consensus_tree = Consensus.majority_consensus(self.trees)
     self.assertTrue(Consensus._equal_topology(consensus_tree, ref_tree))
     # ref_tree = next(ref_trees)
     # consensus_tree_mcmc = Consensus.majority_consensus(self.mcmc_trees, mcmc=True)
     # self.assertTrue(Consensus._equal_topology(consensus_tree_mcmc, ref_tree))
     ref_tree = next(ref_trees)
     consensus_tree = Consensus.majority_consensus(self.trees, 1)
     self.assertTrue(Consensus._equal_topology(consensus_tree, ref_tree))
     consensus_tree_mcmc = Consensus.majority_consensus(self.mcmc_trees, 1, mcmc=True)
     self.assertTrue(Consensus._equal_topology(consensus_tree_mcmc, ref_tree))
Esempio n. 5
0
    def _calculate_gsi(self):
        """
        Method for calculating Gene Support Indices
        :return:
        """
        LOGGER.info("Calculating Gene Support Indices (GSIs)"
                    " from the gene trees..")
        genome_num = 0
        bcg_dir = os.path.join(self._dirpath, self.config.bcg_dir)
        for file in os.listdir(bcg_dir):
            if file.endswith('.bcg'):
                genome_num += 1

        nwk_file = os.path.join(self._align_output_dir, "all_gene.trees")
        trees = Phylo.parse(nwk_file, 'newick')
        tree = Consensus.majority_consensus(trees,
                                            cutoff=(100-self.config.gsi_threshold) * genome_num/100)
        Phylo.draw_ascii(tree)
        ubcg_gsi_file = os.path.join(self._align_output_dir,
                                     f'UBCG_gsi({self._bcg_num}'
                                     f'){self.config.postfixes.align_tree_const}')
        with open(ubcg_gsi_file, 'w') as f:
            Phylo.write(tree, f, 'newick')

        LOGGER.info("The final tree marked with GSI was written"
                    " to %s", ubcg_gsi_file)
Esempio n. 6
0
    def bootstrap(self, afbased=True, basename='majorityTree', treebuilder='nj', bootstraps=1000, outgroup=None, useAllLoci=False):
        """treebuilder could be nj/upgma, outgroup: a population name or 'midpoint'"""
        ## allLoci: all loci that are variable in at least one population
        allpolySites, pwm = {True:  ['allpolySites', 'pwm'],
                             False: ['allpolySitesVCF', 'pwmVCF']}[afbased]
        allLoci = set()
        for pop in self.populations:
            allLoci = allLoci.union(getattr(pop, allpolySites))
        allLoci = list(allLoci) ## sort it? Reduce to independent sites (www.pnas.org/content/93/23/13429, run LD?)
        
        sites = len(allLoci)
        trees = []
        print ("Bootstrapping, rounds:", end=' ')
        for bootstrap in range(bootstraps): ## see also parallelized version
            print(bootstrap, end=' ')
            if useAllLoci:
                selectedLoci = allLoci
            else:
                selectedLoci0 = np.random.choice(range(len(allLoci)), sites, replace=True)
                selectedLoci = [allLoci[l] for l in selectedLoci0]
            df = pd.DataFrame([pop.bootstrap(selectedLoci, afbased) for pop in self.populations], index=self.popnames)
            #import pdb; pdb.set_trace()
            self.dmNei = neiDF(df, [5]*(sites-1))
            ## annoying conversion, BioPython couldnt be just more compatible with scipy/pdist?
            dmTriangular = [list(self.dmNei[i, :(i + 1)]) for i in range(len(self.dmNei))]

            m = _DistanceMatrix(self.popnames, dmTriangular)
            constructor = DistanceTreeConstructor() # could've passed treebuilder here too
            tree = getattr(constructor, treebuilder)(m)
            if outgroup == 'midpoint':
                tree.root_at_midpoint()
            elif not outgroup is None:
                tree.root_with_outgroup({'name': outgroup})
            trees.append(tree) ## use nj!
        ## debug info:
        print(f'selectedLoci: {selectedLoci[:30]}')
        ## see https://biopython.org/wiki/Phylo, turned out to be more suitable than dendropy/sumtrees
        self.majorityTree = Consensus.majority_consensus(trees) ## also consider strict_consensus and adam_consensus (but they don't have bootstrap support values)
        treefile = '%s/%s_%s_%s_%s.nwk' %(resultDir, basename, bootstraps,treebuilder, len(self.populations))
        Phylo.write(self.majorityTree, treefile, format='newick')
        print(f'wrote {treefile}')
        Phylo.draw_ascii(self.majorityTree)
Esempio n. 7
0
the script parallelBootstrap.py, which dumps a list of trees that were created with BioPython's
DistanceTreeConstructor. 

Alternative consensus construction that BioPython provides:
 * strict_consensus
 * adam_consensus
They don't provide bootstrap support values, though.

#run on HPC
#source activate bio3
"""

datadir = "/research/btc_bioinformatic/operations/HLA/FreqRT/Data"

treebase = "maj_ABC_1_nj_92"
if len(sys.path) > 2:
    treebase = sys.argv[-1]
#maj_AB_min95_1_nj_246_0001.pcl

treefiles = glob.glob("%s/%s*.pcl" % (datadir, treebase))
trees = []
for treefile in treefiles:
    with open(treefile, "rb") as tf:
        trees += pickle.load(tf)

majorityTree = Consensus.majority_consensus(trees)

Phylo.write(majorityTree, '../Data/%s.nwk' % treebase, format='newick')

Phylo.draw_ascii(majorityTree)