Esempio n. 1
0
    def get_dn_ds_tree(self,
                       dn_ds_method="NG86",
                       tree_method="UPGMA",
                       codon_table=default_codon_table):
        """Method for constructing dn tree and ds tree.

        Argument:

            - dn_ds_method - Available methods include NG86, LWL85, YN00 and ML.
            - tree_method  - Available methods include UPGMA and NJ.
        """
        from Bio.Phylo.TreeConstruction import DistanceTreeConstructor
        dn_dm, ds_dm = self.get_dn_ds_matrix(method=dn_ds_method,
                                             codon_table=codon_table)
        dn_constructor = DistanceTreeConstructor()
        ds_constructor = DistanceTreeConstructor()
        if tree_method == "UPGMA":
            dn_tree = dn_constructor.upgma(dn_dm)
            ds_tree = ds_constructor.upgma(ds_dm)
        elif tree_method == "NJ":
            dn_tree = dn_constructor.nj(dn_dm)
            ds_tree = ds_constructor.nj(ds_dm)
        else:
            raise RuntimeError("Unknown tree method ({0}). Only NJ and UPGMA "
                               "are accepted.".format(tree_method))
        return dn_tree, ds_tree
Esempio n. 2
0
    def get_dn_ds_tree(self,
                       dn_ds_method="NG86",
                       tree_method="UPGMA",
                       codon_table=None):
        """Construct dn tree and ds tree.

        Argument:
         - dn_ds_method - Available methods include NG86, LWL85, YN00 and ML.
         - tree_method  - Available methods include UPGMA and NJ.

        """
        from Bio.Phylo.TreeConstruction import DistanceTreeConstructor

        if codon_table is None:
            codon_table = CodonTable.generic_by_id[1]
        dn_dm, ds_dm = self.get_dn_ds_matrix(method=dn_ds_method,
                                             codon_table=codon_table)
        dn_constructor = DistanceTreeConstructor()
        ds_constructor = DistanceTreeConstructor()
        if tree_method == "UPGMA":
            dn_tree = dn_constructor.upgma(dn_dm)
            ds_tree = ds_constructor.upgma(ds_dm)
        elif tree_method == "NJ":
            dn_tree = dn_constructor.nj(dn_dm)
            ds_tree = ds_constructor.nj(ds_dm)
        else:
            raise RuntimeError(f"Unknown tree method ({tree_method})."
                               " Only NJ and UPGMA are accepted.")
        return dn_tree, ds_tree
def upgma_tree_constructor(x):
    constructor = DistanceTreeConstructor()
    calculator = DistanceCalculator("identity")
    dm = calculator.get_distance(x)
    upgmatree = constructor.upgma(dm)
    print(upgmatree)
    Phylo.draw_ascii(upgmatree)
def nj_tree_constructor(x):
    constructor = DistanceTreeConstructor()
    calculator = DistanceCalculator("identity")
    dm = calculator.get_distance(x)
    njtree = constructor.nj(dm)
    print(njtree)
    Phylo.draw_ascii(njtree)
Esempio n. 5
0
def fastaToNJTree(fastaFile, outputFile):
    aln = AlignIO.read(fastaFile, 'fasta')
    calculator = DistanceCalculator('identity')
    dm = calculator.get_distance(aln)
    constructor = DistanceTreeConstructor(calculator, 'nj')
    tree = constructor.build_tree(aln)
    Phylo.write(tree, outputFile, 'newick')
Esempio n. 6
0
def dna(file_path, file_format, algorithm):
    # Read the sequences and align
    aln = AlignIO.read(file_path, file_format)

    # Print the alignment
    print(aln)

    # Calculate the distance matrix
    calculator = DistanceCalculator('identity')
    dm = calculator.get_distance(aln)

    # Print the distance Matrix
    print('\nDistance Matrix\n===================')
    print(calculator)

    # Construct the phylogenetic tree using choosen algorithm
    constructor = DistanceTreeConstructor()
    if algorithm.lower() == 'upgma':
        tree = constructor.upgma(dm)
    elif algorithm.lower() == 'nj':
        tree = constructor.nj(dm)
    else:
        click.echo('Invalid algorithm!')

    # Draw the phylogenetic tree
    Phylo.draw(tree)

    # Print the phylogenetic tree in the terminal
    print('\nPhylogenetic Tree\n===================')
    Phylo.draw_ascii(tree)
def build_trees(filename, tree_name):
    # Compute alignment with ClustalW algorithm
    clustalw_cline = ClustalwCommandline("clustalw",
                                         infile="{}.fa".format(filename))
    clustalw_cline()
    alignment = AlignIO.read("{}.aln".format(filename), format="clustal")

    # Create distance matrix
    calculator = DistanceCalculator('blosum62')
    dist_matrix = calculator.get_distance(alignment)

    # Build phylogenetic trees using upgma and nj methods
    constructor = DistanceTreeConstructor()
    upgma_tree = constructor.upgma(dist_matrix)
    nj_tree = constructor.nj(dist_matrix)

    # Draw the trees
    label_func = lambda clade: "" if clade.name.startswith("Inner") else clade

    Phylo.draw(upgma_tree, label_func=label_func, do_show=False)
    plt.title("{} × upgma".format(tree_name))
    plt.show()

    Phylo.draw(nj_tree, label_func=label_func, do_show=False)
    plt.title("{} × nj".format(tree_name))
    plt.show()
Esempio n. 8
0
def make_newick_tree(dm):
    constructor = DistanceTreeConstructor()
    upgmatree = constructor.upgma(dm)
    njtree = constructor.nj(dm)
    upgmatree.root_with_outgroup({'name': "KE136308.1"})
    njtree.root_with_outgroup({'name': "KE136308.1"})
    return upgmatree, njtree
Esempio n. 9
0
    def get_tree(self,
                 chrom,
                 start=1,
                 end=None,
                 samples=None,
                 return_format="tree_obj"):

        print("chrom: {} start: {} end: {} samples: {}".format(
            chrom, start, end, samples))
        names, matrix = self.get_matrix(chrom,
                                        start=start,
                                        end=end,
                                        samples=samples,
                                        return_format="Phylo")
        distance_matrix = _DistanceMatrix(names, matrix)

        constructor = DistanceTreeConstructor()
        tree = constructor.nj(distance_matrix)  # neighbour joining tree

        if return_format == "tree_obj":
            return tree
        elif return_format == "newick":
            treeIO = StringIO()
            Phylo.write(tree, treeIO, "newick")
            treeString = treeIO.getvalue()
            treeString = treeString.strip()
            return treeString
Esempio n. 10
0
def consensus(msa):
    alignment = MultipleSeqAlignment(msa)
    calculator = DistanceCalculator('identity')
    dm = calculator.get_distance(alignment)
    constructor = DistanceTreeConstructor(calculator, 'nj')
    tree = constructor.build_tree(alignment)
    print tree
Esempio n. 11
0
 def printGeneTree(self):
     """
     Print gene trees with matplotlib and in the terminal for the four largest target ORFs of coronaviruses.
     Takes a .phy file containing multiple alligned sequences, generates a matrix based on sequence composition 
     and compares each sequence (genome) to one another. sequences with grater scores (similarity) are ranked closer
     together on the phylogenetic trees.
     input: A .phy file that contains coronavirus gene sequences to draw phylogenetic tree
     output: A visual representation of a gene tree on terminal and matplotlib
     """
     align = AlignIO.read(
         self.newPhylip,
         'phylip')  # Reads created .phy file containing the SeqRecord
     #print (align) # prints concatenated allignments
     calculator = DistanceCalculator('identity')
     dm = calculator.get_distance(align)  # Calculate the distance matrix
     print(
         '\n======================================== DISTANCE MATRIX =======================================\n'
     )
     print(dm, "\n\n")  # Print the distance Matrix
     constructor = DistanceTreeConstructor(
     )  # Construct the phylogenetic tree using UPGMA algorithm
     tree = constructor.upgma(dm)
     print(
         '\n========================================= GENE TREE ===========================================\n'
     )
     Phylo.draw(
         tree
     )  # Draw the phylogenetic tree (must install matplotlib to use this formatting)
     Phylo.draw_ascii(tree)  # Print the phylogenetic tree in terminal
Esempio n. 12
0
    def summarise_dist(self, rf_results: RfResults, dir_out):

        for use_norm in (True, False):
            if use_norm:
                path_out = os.path.join(dir_out, 'rf_normed.tree')
                path_hm = os.path.join(dir_out, 'rf_normed_heatmap.svg')
                plt_title = 'Normalised Robinson-Foulds Distance'
            else:
                path_out = os.path.join(dir_out, 'rf_un_normed.tree')
                path_hm = os.path.join(dir_out, 'rf_un_normed_heatmap.svg')
                plt_title = '(un)Normalised Robinson-Foulds Distance'

            metrics = defaultdict(dict)
            names = set()
            for (tid_a, tid_b), (rf, norm_rf) in rf_results.data.items():
                if use_norm:
                    metrics[tid_a][tid_b] = norm_rf
                    metrics[tid_b][tid_a] = norm_rf
                else:
                    metrics[tid_a][tid_b] = rf
                    metrics[tid_b][tid_a] = rf
                names.add(tid_a)
                names.add(tid_b)

            labels = sorted(list(names))
            mat_vals = list()
            mat = np.zeros((len(labels), len(labels)))
            for i in range(len(labels)):
                cur_row = list()
                tid_a = labels[i]
                for j in range(i + 1):
                    tid_b = labels[j]
                    if tid_a == tid_b:
                        cur_row.append(0.0)
                    else:
                        cur_row.append(metrics[tid_a][tid_b])
                        mat[i, j] = metrics[tid_a][tid_b]
                mat_vals.append(cur_row)
            mat = mat + mat.T

            # Newick
            dm = DistanceMatrix(names=labels, matrix=mat_vals)
            constructor = DistanceTreeConstructor()
            tree = constructor.nj(dm)

            Phylo.write(tree, path_out, 'newick')

            # Heatmap
            cmap = sns.cubehelix_palette(100, reverse=True)

            sns.set(font_scale=1)
            fig_size = (15, 15)

            rf_df = pd.DataFrame(mat, columns=labels, index=labels)
            sns.clustermap(rf_df,
                           annot=True,
                           fmt='.3f',
                           cmap=cmap,
                           figsize=fig_size).fig.suptitle(plt_title)
            plt.savefig(path_hm)
Esempio n. 13
0
def plot_phylo_tree(align: MultipleSeqAlignment, accession_numbers: dict):
    """
    Plots a phylogenetic tree
    :param align: MultipleSeqAlignment with the alignment result to be plotted
    :param accession_numbers: dict of accession numbers and their translation to human-understandable names
    :return: figure-handle of the plotted phylogenetic tree
    """
    # calculate distance - https://biopython.org/wiki/Phylo
    calculator = DistanceCalculator('identity')
    dm = calculator.get_distance(align)

    # construct a tree
    constructor = DistanceTreeConstructor()
    tree = constructor.upgma(dm)

    # remove the names for the non-terminals for better visual appeal
    for non_terminal in tree.get_nonterminals():
        non_terminal.name = ''

    # change accession numbers into human more understandable names
    for terminal in tree.get_terminals():
        terminal.name = accession_numbers[re.match("(^\S*)(?=\.)",
                                                   terminal.name)[0]]

    print(Phylo.draw_ascii(tree))

    # plot the tree
    fig, ax = plt.subplots(1, 1)
    # draw the resulting tree
    Phylo.draw(tree, show_confidence=False, axes=ax, do_show=False)
    ax.set_xlim(right=0.8)
    return fig
Esempio n. 14
0
def construct_tree(gene_name, with_marburg=1, algorithm='UPGMA'):  # Construct Tree with specific type (Default = UPGMA)
    if with_marburg == 1:
        print('Constructing Tree with All Viruses without Marburg')
        filename = algorithm + '_' + gene_name
        names = ['Bundibugyo', 'Reston', 'Sudan', 'TaiForest', 'Zaire']
    else:
        print('Constructing {0}\'s Tree with All Viruses with Marburg'.format(gene_name))
        filename = algorithm + '_' + gene_name + '_with_Marburg'
        names = ['Bundibugyo', 'Reston', 'Sudan', 'TaiForest', 'Zaire', 'Marburg']
        marburg_genome = SeqIO.read("./Data/Marburg_genome.fasta", "fasta")
        Alignment.read_data()
        print('Aligning Genes for marburg_genome')
        gene_name += '_with_marburg'
        Alignment.read_genes(marburg_genome)
    print('Reading edit matrix and construct tree')
    edit_matrix = pd.read_csv("./Output/edit_matrices/" + gene_name + ".csv", header=None)  # read edit matrix file
    constructor = DistanceTreeConstructor()  # Create a tree constructor object
    edit_matrix = convert_tu_lower_triangular(edit_matrix)  # Convert Edit Distance matrix to lower triangular
    distance_matrix = DistanceMatrix(names=names, matrix=edit_matrix)
    if algorithm == 'NJ':  # Neighbor-Joining Alogrithm
        tree = constructor.nj(distance_matrix)
    else:  # UPGMA Algorithm
        tree = constructor.upgma(distance_matrix)
    save_tree(tree, filename)  # Save Tree into a file
    return tree
Esempio n. 15
0
def get_tree(aln, kind='nj'):
    from Bio.Phylo.TreeConstruction import DistanceCalculator,DistanceTreeConstructor
    calculator = DistanceCalculator('identity')
    dm = calculator.get_distance(aln)
    constructor = DistanceTreeConstructor()
    tree = constructor.nj(dm)
    return dm, tree
def buildTree(FASTAFile):
    myAlignment = AlignIO.read(FASTAFile, "fasta")
    
    # Create a tip mapping from the fasta file
    tipMapping = {}
    for record in myAlignment:
        tipMapping[record.id] = str(record.seq)
        
    # Compute a distance matrix and construct tree
    calculator = DistanceCalculator("identity") 
    myMatrix = calculator.get_distance(myAlignment)
    constructor = DistanceTreeConstructor()
    upgmaTree = constructor.nj(myMatrix)
    upgmaTree.root_at_midpoint()
    Phylo.draw(upgmaTree)
    # Convert phyloxml tree to newick
    # biopython does not provide a function to do this so it was necessary
    # to write to a buffer in newick to convert then get rid of unneeded info
    for clade in upgmaTree.get_terminals():
        clade.name = "\"" + clade.name + "\""
    buf = cStringIO.StringIO()
    Phylo.write(upgmaTree, buf, 'newick', plain = True)
    tree = buf.getvalue()
    tree = re.sub(r'Inner\d*', '', tree)
    tree = tree.replace(";", "")
    tree = literal_eval(tree)    #newick format

    # RLR tree required for maxParsimony function
    tree = NewicktoRLR(tree)
    return tree
Esempio n. 17
0
def tree_reconstruction(phy_file, method, model, phyformat):
    '''Construct tree with given method and model'''

    aln = AlignIO.read(phy_file, 'phylip-' + phyformat)

    constructor = DistanceTreeConstructor()
    calculator = DistanceCalculator(model)
    dm = calculator.get_distance(aln)

    if method == 'upgma':
        tree = constructor.upgma(dm)
    elif method == 'nj':
        tree = constructor.nj(dm)

    tree.ladderize()

    for c in tree.find_clades():
        if 'Inner' in c.name:
            c.name = ''

    Phylo.write(tree, args.output + '/tree.nwk', 'newick')

    plt.rcParams['font.style'] = 'italic'
    plt.rc('font', size=8)
    plt.rc('axes', titlesize=14)
    plt.rc('xtick', labelsize=10)
    plt.rc('ytick', labelsize=10)
    plt.rc('figure', titlesize=18)

    draw(tree, do_show=False)
    plt.savefig(args.output + "/tree.svg", format='svg', dpi=1200)
Esempio n. 18
0
def build_phylogeny_trees():
    path = "out/homologous_gene_sequences/"
    output_path = "out/aligned_homologous_gene_sequences/"

    for homologous_gene_sequence in os.listdir(path):
        input = path + homologous_gene_sequence
        output = output_path + homologous_gene_sequence
    
        clustal_omega = ClustalOmegaCommandline(infile=input, outfile=output, verbose=True, auto=True)
        os.system(str(clustal_omega))

        multi_seq_align = AlignIO.read(output, 'fasta')

        # Distance Matrix
        calculator = DistanceCalculator('identity')
        dist_mat = calculator.get_distance(multi_seq_align)

        tree_constructor = DistanceTreeConstructor()
        phylo_tree = tree_constructor.upgma(dist_mat)

        Phylo.draw(phylo_tree)

        print('\nPhylogenetic Tree\n', homologous_gene_sequence)
        Phylo.draw_ascii(phylo_tree)
        Phylo.write([phylo_tree], 'out/phylogenetic_trees/{}_tree.nex'.format(homologous_gene_sequence), 'nexus')
Esempio n. 19
0
def NJ(thatdm):
    # Reconstruct tree
    treehat = DistanceTreeConstructor().nj(thatdm)
    xtreehat = XTree(
        treehat,
        dict((clade, set([clade.name])) for clade in treehat.get_terminals()))
    return (xtreehat)
Esempio n. 20
0
def construct_tree(matrix, nj=True):
    """Build a tree from a distance matrix

    Can either use neighbor-joining (nj) or UPGMA.
    """

    if not (matrix and type(matrix) == list and len(matrix) > 0):
        print "matrix has invalid value"
        return

    dm = _DistanceMatrix(names=[str(i) for i in range(len(matrix))],
                         matrix=matrix)

    constructor = DistanceTreeConstructor()
    if nj:
        tree = constructor.nj(dm)
    else:
        tree = constructor.upgma(dm)

    # this will remove the names from the inner nodes
    # this is critical for seq-gen to read in the tree
    for clade in tree.get_nonterminals():
        clade.name = ''

    return tree
Esempio n. 21
0
def main(argv):
    # Test table data and corresponding labels
    M_labels = [
        'Wuttagoonaspis', 'Romundina', 'Brindabellaspis', 'Eurycaraspis',
        'Entelognathus'
    ]
    print(M_labels)  #A through G
    M = np.loadtxt(open(argv[1], "rb"), delimiter=",")
    l = np.tril(M)
    temp = np.ones((5, 5))
    u = np.triu(temp)
    l += u
    np.fill_diagonal(l, 0)

    M = l.tolist()

    for j in range(0, 5):
        for i in range(0, 5):
            M[i] = list(filter(lambda a: a != 1, M[i]))

    m = _Matrix(M_labels, M)
    print(type(m))

    constructor = DistanceTreeConstructor()
    tree = constructor.upgma(m)
    Phylo.draw(tree)
Esempio n. 22
0
def main():
    file_name = "data/coding.fa"
    # file_name = "data/cons_noncode.fa"

    alignment = MultipleSeqAlignment([], Gapped(IUPAC.unambiguous_dna, "-"))
    for seq_record in SeqIO.parse(file_name, "fasta"):
        alignment.extend([seq_record])

    print("Number of characters in alignment:", len(alignment[0]))

    ####################
    # Neighbor joining #
    ####################
    calculator = DistanceCalculator('identity')
    dm = calculator.get_distance(alignment)

    constructor = DistanceTreeConstructor()
    start = time.time()
    tree = constructor.nj(dm)
    end = time.time()
    print("Neighbor joining ran in {} seconds.".format(end - start))
    Phylo.draw(tree, label_func=get_label)

    #########
    # UPGMA #
    #########

    start = time.time()
    tree = constructor.upgma(dm)
    end = time.time()
    print("UPGMA ran in {} seconds.".format(end - start))
    Phylo.draw(tree, label_func=get_label)
Esempio n. 23
0
def nj_wordlist(
        wordlist,
        column="Value",
        method=DistanceTreeConstructor.nj):
    """Create a tree using Hamming distances.

    From the CLDF Dataframe `wordlist`, create a tree using a distance
    method (neighbor joining, the default, or UPGMA) based on the
    Hamming distance (size of the symmetric difference) of
    presence/absence of the set of values in `column`.

    """
    wordlist = pandas.read_csv(wordlist, sep="\t")
    cogids = []
    languages = []
    for language, data in wordlist.groupby("Language_ID"):
        languages.append(language)
        cogids.append(set(data[column]))

    dm = _DistanceMatrix(languages, [
        [len(cogids[i] ^ cogids[j])
         for j in range(i + 1)]
        for i in range(len(cogids))])

    constructor = DistanceTreeConstructor()
    tree = method(constructor, dm)
    return tree
Esempio n. 24
0
def main():
    alignment = AlignIO.read(open("protein.fasta"), "fasta")
    calculator = DistanceCalculator('identity')
    dm = calculator.get_distance(alignment)
    constructor = DistanceTreeConstructor(calculator, 'upgma')
    tree = constructor.build_tree(alignment)
    tree.ladderize()
    Phylo.draw(tree)
Esempio n. 25
0
 def test_bootstrap_consensus(self):
     calculator = DistanceCalculator("blosum62")
     constructor = DistanceTreeConstructor(calculator, "nj")
     tree = Consensus.bootstrap_consensus(self.msa, 100, constructor,
                                          Consensus.majority_consensus)
     self.assertTrue(isinstance(tree, BaseTree.Tree))
     Phylo.write(tree, os.path.join(temp_dir, "bootstrap_consensus.tre"),
                 "newick")
Esempio n. 26
0
 def test_bootstrap_consensus(self):
     calculator = DistanceCalculator('blosum62')
     constructor = DistanceTreeConstructor(calculator, 'nj')
     tree = Consensus.bootstrap_consensus(self.msa, 100, constructor,
                                          Consensus.majority_consensus)
     self.assertTrue(isinstance(tree, BaseTree.Tree))
     Phylo.write(tree, './TreeConstruction/bootstrap_consensus.tre',
                 'newick')
Esempio n. 27
0
def create_tree_distance_impl(msa, algorithm):
    calculator = DistanceCalculator('identity')
    constructor = DistanceTreeConstructor(distance_calculator=calculator,method=algorithm)
    tree = constructor.build_tree(msa)
    Phylo.write(tree, "../../data/created/tree" + str(random.randint(0,10000000)) + ".nex", "nexus")
    Phylo.draw(tree,do_show=False)
    plt.savefig("../../data/created/createdTree"+algorithm+".png")
    return "../../data/created/createdTree"+algorithm+".png"
Esempio n. 28
0
def get_tree():
    #biopython-extract the unrooted  tree
    aln = AlignIO.read('agc.aln', 'clustal')
    calculator = DistanceCalculator('identity')
    dm = calculator.get_distance(aln)
    constructor = DistanceTreeConstructor()
    tree = constructor.nj(dm)
    return tree
Esempio n. 29
0
 def build_nj_tree(self):
     dm = self.distance_matrix()
     constructor = DistanceTreeConstructor()
     tree = constructor.nj(dm)
     treeio = StringIO.StringIO()
     Phylo.write(tree, treeio, 'newick')
     treestr = treeio.getvalue()
     treeio.close()
     return treestr
Esempio n. 30
0
def build_tree(aln, kind='nj'):
    """Build a tree with bio.phylo module"""

    from Bio.Phylo.TreeConstruction import DistanceCalculator,DistanceTreeConstructor
    calculator = DistanceCalculator('identity')
    dm = calculator.get_distance(aln)
    constructor = DistanceTreeConstructor()
    tree = constructor.nj(dm)
    return dm, tree