Esempio n. 1
0
    def get_dn_ds_tree(self,
                       dn_ds_method="NG86",
                       tree_method="UPGMA",
                       codon_table=None):
        """Construct dn tree and ds tree.

        Argument:
         - dn_ds_method - Available methods include NG86, LWL85, YN00 and ML.
         - tree_method  - Available methods include UPGMA and NJ.

        """
        from Bio.Phylo.TreeConstruction import DistanceTreeConstructor

        if codon_table is None:
            codon_table = CodonTable.generic_by_id[1]
        dn_dm, ds_dm = self.get_dn_ds_matrix(method=dn_ds_method,
                                             codon_table=codon_table)
        dn_constructor = DistanceTreeConstructor()
        ds_constructor = DistanceTreeConstructor()
        if tree_method == "UPGMA":
            dn_tree = dn_constructor.upgma(dn_dm)
            ds_tree = ds_constructor.upgma(ds_dm)
        elif tree_method == "NJ":
            dn_tree = dn_constructor.nj(dn_dm)
            ds_tree = ds_constructor.nj(ds_dm)
        else:
            raise RuntimeError(f"Unknown tree method ({tree_method})."
                               " Only NJ and UPGMA are accepted.")
        return dn_tree, ds_tree
Esempio n. 2
0
    def get_dn_ds_tree(self,
                       dn_ds_method="NG86",
                       tree_method="UPGMA",
                       codon_table=default_codon_table):
        """Method for constructing dn tree and ds tree.

        Argument:

            - dn_ds_method - Available methods include NG86, LWL85, YN00 and ML.
            - tree_method  - Available methods include UPGMA and NJ.
        """
        from Bio.Phylo.TreeConstruction import DistanceTreeConstructor
        dn_dm, ds_dm = self.get_dn_ds_matrix(method=dn_ds_method,
                                             codon_table=codon_table)
        dn_constructor = DistanceTreeConstructor()
        ds_constructor = DistanceTreeConstructor()
        if tree_method == "UPGMA":
            dn_tree = dn_constructor.upgma(dn_dm)
            ds_tree = ds_constructor.upgma(ds_dm)
        elif tree_method == "NJ":
            dn_tree = dn_constructor.nj(dn_dm)
            ds_tree = ds_constructor.nj(ds_dm)
        else:
            raise RuntimeError("Unknown tree method ({0}). Only NJ and UPGMA "
                               "are accepted.".format(tree_method))
        return dn_tree, ds_tree
Esempio n. 3
0
def make_newick_tree(dm):
    constructor = DistanceTreeConstructor()
    upgmatree = constructor.upgma(dm)
    njtree = constructor.nj(dm)
    upgmatree.root_with_outgroup({'name': "KE136308.1"})
    njtree.root_with_outgroup({'name': "KE136308.1"})
    return upgmatree, njtree
Esempio n. 4
0
 def printGeneTree(self):
     """
     Print gene trees with matplotlib and in the terminal for the four largest target ORFs of coronaviruses.
     Takes a .phy file containing multiple alligned sequences, generates a matrix based on sequence composition 
     and compares each sequence (genome) to one another. sequences with grater scores (similarity) are ranked closer
     together on the phylogenetic trees.
     input: A .phy file that contains coronavirus gene sequences to draw phylogenetic tree
     output: A visual representation of a gene tree on terminal and matplotlib
     """
     align = AlignIO.read(
         self.newPhylip,
         'phylip')  # Reads created .phy file containing the SeqRecord
     #print (align) # prints concatenated allignments
     calculator = DistanceCalculator('identity')
     dm = calculator.get_distance(align)  # Calculate the distance matrix
     print(
         '\n======================================== DISTANCE MATRIX =======================================\n'
     )
     print(dm, "\n\n")  # Print the distance Matrix
     constructor = DistanceTreeConstructor(
     )  # Construct the phylogenetic tree using UPGMA algorithm
     tree = constructor.upgma(dm)
     print(
         '\n========================================= GENE TREE ===========================================\n'
     )
     Phylo.draw(
         tree
     )  # Draw the phylogenetic tree (must install matplotlib to use this formatting)
     Phylo.draw_ascii(tree)  # Print the phylogenetic tree in terminal
Esempio n. 5
0
def construct_tree(matrix, nj=True):
    """Build a tree from a distance matrix

    Can either use neighbor-joining (nj) or UPGMA.
    """

    if not (matrix and type(matrix) == list and len(matrix) > 0):
        print "matrix has invalid value"
        return

    dm = _DistanceMatrix(names=[str(i) for i in range(len(matrix))],
                         matrix=matrix)

    constructor = DistanceTreeConstructor()
    if nj:
        tree = constructor.nj(dm)
    else:
        tree = constructor.upgma(dm)

    # this will remove the names from the inner nodes
    # this is critical for seq-gen to read in the tree
    for clade in tree.get_nonterminals():
        clade.name = ''

    return tree
Esempio n. 6
0
def dna(file_path, file_format, algorithm):
    # Read the sequences and align
    aln = AlignIO.read(file_path, file_format)

    # Print the alignment
    print(aln)

    # Calculate the distance matrix
    calculator = DistanceCalculator('identity')
    dm = calculator.get_distance(aln)

    # Print the distance Matrix
    print('\nDistance Matrix\n===================')
    print(calculator)

    # Construct the phylogenetic tree using choosen algorithm
    constructor = DistanceTreeConstructor()
    if algorithm.lower() == 'upgma':
        tree = constructor.upgma(dm)
    elif algorithm.lower() == 'nj':
        tree = constructor.nj(dm)
    else:
        click.echo('Invalid algorithm!')

    # Draw the phylogenetic tree
    Phylo.draw(tree)

    # Print the phylogenetic tree in the terminal
    print('\nPhylogenetic Tree\n===================')
    Phylo.draw_ascii(tree)
class DistanceTreeConstructorTest(unittest.TestCase):
    """Test DistanceTreeConstructor"""
    def setUp(self):
        self.aln = AlignIO.read(open('TreeConstruction/msa.phy'), 'phylip')
        calculator = DistanceCalculator('blosum62')
        self.dm = calculator.get_distance(self.aln)
        self.constructor = DistanceTreeConstructor(calculator)

    def test_upgma(self):
        tree = self.constructor.upgma(self.dm)
        self.assertTrue(isinstance(tree, BaseTree.Tree))
        tree_file = StringIO.StringIO()
        Phylo.write(tree, tree_file, 'newick')
        ref_tree = open('./TreeConstruction/upgma.tre')
        self.assertEqual(tree_file.getvalue(), ref_tree.readline())
        ref_tree.close()

    def test_nj(self):
        tree = self.constructor.nj(self.dm)
        self.assertTrue(isinstance(tree, BaseTree.Tree))
        tree_file = StringIO.StringIO()
        Phylo.write(tree, tree_file, 'newick')
        ref_tree = open('./TreeConstruction/nj.tre')
        self.assertEqual(tree_file.getvalue(), ref_tree.readline())
        ref_tree.close()

    def test_built_tree(self):
        tree = self.constructor.build_tree(self.aln)
        self.assertTrue(isinstance(tree, BaseTree.Tree))
        tree_file = StringIO.StringIO()
        Phylo.write(tree, tree_file, 'newick')
        ref_tree = open('./TreeConstruction/nj.tre')
        self.assertEqual(tree_file.getvalue(), ref_tree.readline())
        ref_tree.close()
def build_trees(filename, tree_name):
    # Compute alignment with ClustalW algorithm
    clustalw_cline = ClustalwCommandline("clustalw",
                                         infile="{}.fa".format(filename))
    clustalw_cline()
    alignment = AlignIO.read("{}.aln".format(filename), format="clustal")

    # Create distance matrix
    calculator = DistanceCalculator('blosum62')
    dist_matrix = calculator.get_distance(alignment)

    # Build phylogenetic trees using upgma and nj methods
    constructor = DistanceTreeConstructor()
    upgma_tree = constructor.upgma(dist_matrix)
    nj_tree = constructor.nj(dist_matrix)

    # Draw the trees
    label_func = lambda clade: "" if clade.name.startswith("Inner") else clade

    Phylo.draw(upgma_tree, label_func=label_func, do_show=False)
    plt.title("{} × upgma".format(tree_name))
    plt.show()

    Phylo.draw(nj_tree, label_func=label_func, do_show=False)
    plt.title("{} × nj".format(tree_name))
    plt.show()
Esempio n. 9
0
def main(argv):
    # Test table data and corresponding labels
    M_labels = [
        'Wuttagoonaspis', 'Romundina', 'Brindabellaspis', 'Eurycaraspis',
        'Entelognathus'
    ]
    print(M_labels)  #A through G
    M = np.loadtxt(open(argv[1], "rb"), delimiter=",")
    l = np.tril(M)
    temp = np.ones((5, 5))
    u = np.triu(temp)
    l += u
    np.fill_diagonal(l, 0)

    M = l.tolist()

    for j in range(0, 5):
        for i in range(0, 5):
            M[i] = list(filter(lambda a: a != 1, M[i]))

    m = _Matrix(M_labels, M)
    print(type(m))

    constructor = DistanceTreeConstructor()
    tree = constructor.upgma(m)
    Phylo.draw(tree)
Esempio n. 10
0
def build_phylogeny_trees():
    path = "out/homologous_gene_sequences/"
    output_path = "out/aligned_homologous_gene_sequences/"

    for homologous_gene_sequence in os.listdir(path):
        input = path + homologous_gene_sequence
        output = output_path + homologous_gene_sequence
    
        clustal_omega = ClustalOmegaCommandline(infile=input, outfile=output, verbose=True, auto=True)
        os.system(str(clustal_omega))

        multi_seq_align = AlignIO.read(output, 'fasta')

        # Distance Matrix
        calculator = DistanceCalculator('identity')
        dist_mat = calculator.get_distance(multi_seq_align)

        tree_constructor = DistanceTreeConstructor()
        phylo_tree = tree_constructor.upgma(dist_mat)

        Phylo.draw(phylo_tree)

        print('\nPhylogenetic Tree\n', homologous_gene_sequence)
        Phylo.draw_ascii(phylo_tree)
        Phylo.write([phylo_tree], 'out/phylogenetic_trees/{}_tree.nex'.format(homologous_gene_sequence), 'nexus')
Esempio n. 11
0
def main():
    file_name = "data/coding.fa"
    # file_name = "data/cons_noncode.fa"

    alignment = MultipleSeqAlignment([], Gapped(IUPAC.unambiguous_dna, "-"))
    for seq_record in SeqIO.parse(file_name, "fasta"):
        alignment.extend([seq_record])

    print("Number of characters in alignment:", len(alignment[0]))

    ####################
    # Neighbor joining #
    ####################
    calculator = DistanceCalculator('identity')
    dm = calculator.get_distance(alignment)

    constructor = DistanceTreeConstructor()
    start = time.time()
    tree = constructor.nj(dm)
    end = time.time()
    print("Neighbor joining ran in {} seconds.".format(end - start))
    Phylo.draw(tree, label_func=get_label)

    #########
    # UPGMA #
    #########

    start = time.time()
    tree = constructor.upgma(dm)
    end = time.time()
    print("UPGMA ran in {} seconds.".format(end - start))
    Phylo.draw(tree, label_func=get_label)
Esempio n. 12
0
class DistanceTreeConstructorTest(unittest.TestCase):
    """Test DistanceTreeConstructor"""
    def setUp(self):
        self.aln = AlignIO.read('TreeConstruction/msa.phy', 'phylip')
        calculator = DistanceCalculator('blosum62')
        self.dm = calculator.get_distance(self.aln)
        self.constructor = DistanceTreeConstructor(calculator)

    def test_upgma(self):
        tree = self.constructor.upgma(self.dm)
        self.assertTrue(isinstance(tree, BaseTree.Tree))
        # tree_file = StringIO()
        # Phylo.write(tree, tree_file, 'newick')
        ref_tree = Phylo.read('./TreeConstruction/upgma.tre', 'newick')
        self.assertTrue(Consensus._equal_topology(tree, ref_tree))
        # ref_tree.close()

    def test_nj(self):
        tree = self.constructor.nj(self.dm)
        self.assertTrue(isinstance(tree, BaseTree.Tree))
        # tree_file = StringIO()
        # Phylo.write(tree, tree_file, 'newick')
        ref_tree = Phylo.read('./TreeConstruction/nj.tre', 'newick')
        self.assertTrue(Consensus._equal_topology(tree, ref_tree))
        # ref_tree.close()

    def test_built_tree(self):
        tree = self.constructor.build_tree(self.aln)
        self.assertTrue(isinstance(tree, BaseTree.Tree))
        # tree_file = StringIO()
        # Phylo.write(tree, tree_file, 'newick')
        ref_tree = Phylo.read('./TreeConstruction/nj.tre', 'newick')
        self.assertTrue(Consensus._equal_topology(tree, ref_tree))
Esempio n. 13
0
def plot_phylo_tree(align: MultipleSeqAlignment, accession_numbers: dict):
    """
    Plots a phylogenetic tree
    :param align: MultipleSeqAlignment with the alignment result to be plotted
    :param accession_numbers: dict of accession numbers and their translation to human-understandable names
    :return: figure-handle of the plotted phylogenetic tree
    """
    # calculate distance - https://biopython.org/wiki/Phylo
    calculator = DistanceCalculator('identity')
    dm = calculator.get_distance(align)

    # construct a tree
    constructor = DistanceTreeConstructor()
    tree = constructor.upgma(dm)

    # remove the names for the non-terminals for better visual appeal
    for non_terminal in tree.get_nonterminals():
        non_terminal.name = ''

    # change accession numbers into human more understandable names
    for terminal in tree.get_terminals():
        terminal.name = accession_numbers[re.match("(^\S*)(?=\.)",
                                                   terminal.name)[0]]

    print(Phylo.draw_ascii(tree))

    # plot the tree
    fig, ax = plt.subplots(1, 1)
    # draw the resulting tree
    Phylo.draw(tree, show_confidence=False, axes=ax, do_show=False)
    ax.set_xlim(right=0.8)
    return fig
Esempio n. 14
0
def measure_D_net(G,qmod,qcon):
    D_net_dic = {}
    D_net_ret = {}
    D_net = []
    for u in G: D_net_dic[u] = {}

    for u in sorted(G):
        key1 = "Taxon" + str(u)
        tmp_row = []
        for v in sorted(G):
            key2 = "Taxon" + str(v)
            if u < v: continue
            D_net_dic[u][v] = 1.0 - G.dmc_likelihood(u,v,qmod,qcon)
            tmp_row.append(D_net_dic[u][v])

            print D_net_dic[u][v],
        D_net.append(tmp_row)
        print '\n'


    names = []
    for u in G: names.append('Taxon'+str(u))
    print names 
    print D_net
    D_net_final = _DistanceMatrix(names,D_net)
    #print D_net_final.names 

    constructor = DistanceTreeConstructor()
    tree_dmc = constructor.upgma(D_net_final)
    #print tree_dmc
    Phylo.write(tree_dmc,'ph_dmc.nre','newick')
    
    return D_net_final
def upgma_tree_constructor(x):
    constructor = DistanceTreeConstructor()
    calculator = DistanceCalculator("identity")
    dm = calculator.get_distance(x)
    upgmatree = constructor.upgma(dm)
    print(upgmatree)
    Phylo.draw_ascii(upgmatree)
Esempio n. 16
0
def construct_tree(gene_name, with_marburg=1, algorithm='UPGMA'):  # Construct Tree with specific type (Default = UPGMA)
    if with_marburg == 1:
        print('Constructing Tree with All Viruses without Marburg')
        filename = algorithm + '_' + gene_name
        names = ['Bundibugyo', 'Reston', 'Sudan', 'TaiForest', 'Zaire']
    else:
        print('Constructing {0}\'s Tree with All Viruses with Marburg'.format(gene_name))
        filename = algorithm + '_' + gene_name + '_with_Marburg'
        names = ['Bundibugyo', 'Reston', 'Sudan', 'TaiForest', 'Zaire', 'Marburg']
        marburg_genome = SeqIO.read("./Data/Marburg_genome.fasta", "fasta")
        Alignment.read_data()
        print('Aligning Genes for marburg_genome')
        gene_name += '_with_marburg'
        Alignment.read_genes(marburg_genome)
    print('Reading edit matrix and construct tree')
    edit_matrix = pd.read_csv("./Output/edit_matrices/" + gene_name + ".csv", header=None)  # read edit matrix file
    constructor = DistanceTreeConstructor()  # Create a tree constructor object
    edit_matrix = convert_tu_lower_triangular(edit_matrix)  # Convert Edit Distance matrix to lower triangular
    distance_matrix = DistanceMatrix(names=names, matrix=edit_matrix)
    if algorithm == 'NJ':  # Neighbor-Joining Alogrithm
        tree = constructor.nj(distance_matrix)
    else:  # UPGMA Algorithm
        tree = constructor.upgma(distance_matrix)
    save_tree(tree, filename)  # Save Tree into a file
    return tree
Esempio n. 17
0
class DistanceTreeConstructorTest(unittest.TestCase):
    """Test DistanceTreeConstructor"""

    def setUp(self):
        self.aln = AlignIO.read('TreeConstruction/msa.phy', 'phylip')
        calculator = DistanceCalculator('blosum62')
        self.dm = calculator.get_distance(self.aln)
        self.constructor = DistanceTreeConstructor(calculator)

    def test_upgma(self):
        tree = self.constructor.upgma(self.dm)
        self.assertTrue(isinstance(tree, BaseTree.Tree))
        # tree_file = StringIO()
        # Phylo.write(tree, tree_file, 'newick')
        ref_tree = Phylo.read('./TreeConstruction/upgma.tre', 'newick')
        self.assertTrue(Consensus._equal_topology(tree, ref_tree))
        # ref_tree.close()

    def test_nj(self):
        tree = self.constructor.nj(self.dm)
        self.assertTrue(isinstance(tree, BaseTree.Tree))
        # tree_file = StringIO()
        # Phylo.write(tree, tree_file, 'newick')
        ref_tree = Phylo.read('./TreeConstruction/nj.tre', 'newick')
        self.assertTrue(Consensus._equal_topology(tree, ref_tree))
        # ref_tree.close()

    def test_built_tree(self):
        tree = self.constructor.build_tree(self.aln)
        self.assertTrue(isinstance(tree, BaseTree.Tree))
        # tree_file = StringIO()
        # Phylo.write(tree, tree_file, 'newick')
        ref_tree = Phylo.read('./TreeConstruction/nj.tre', 'newick')
        self.assertTrue(Consensus._equal_topology(tree, ref_tree))
Esempio n. 18
0
def tree_reconstruction(phy_file, method, model, phyformat):
    '''Construct tree with given method and model'''

    aln = AlignIO.read(phy_file, 'phylip-' + phyformat)

    constructor = DistanceTreeConstructor()
    calculator = DistanceCalculator(model)
    dm = calculator.get_distance(aln)

    if method == 'upgma':
        tree = constructor.upgma(dm)
    elif method == 'nj':
        tree = constructor.nj(dm)

    tree.ladderize()

    for c in tree.find_clades():
        if 'Inner' in c.name:
            c.name = ''

    Phylo.write(tree, args.output + '/tree.nwk', 'newick')

    plt.rcParams['font.style'] = 'italic'
    plt.rc('font', size=8)
    plt.rc('axes', titlesize=14)
    plt.rc('xtick', labelsize=10)
    plt.rc('ytick', labelsize=10)
    plt.rc('figure', titlesize=18)

    draw(tree, do_show=False)
    plt.savefig(args.output + "/tree.svg", format='svg', dpi=1200)
Esempio n. 19
0
def D_seq_matrix(fasta_file):
    aln = AlignIO.read(fasta_file, 'fasta')
    calculator = DistanceCalculator('identity')
    dm = calculator.get_distance(aln)
    constructor = DistanceTreeConstructor()
    tree_seq = constructor.upgma(dm)
    #print tree_dmc
    Phylo.write(tree_seq,'ph_seq.nre','newick')
    print dm.names 
    return dm
def createTree(file):
    aln = AlignIO.read(file, 'phylip')
    # Calculate the distance matrix
    calculator = DistanceCalculator('identity')
    dm = calculator.get_distance(aln)

    # Construct the phylogenetic tree using UPGMA algorithm
    constructor = DistanceTreeConstructor()
    tree = constructor.upgma(dm)
    Phylo.write(tree, 'new.xml', 'phyloxml')
Esempio n. 21
0
def print_trees(country, position_table):
    ### Pull out the concensus sequence

    concensus_seq = position_table.drop('seqid', axis=1).mode(axis=0).T[0]
    concensus_seq

    position_table = position_table.set_index('seqid')

    ### Determine which samples are farthest from the concensus sequence

    distance_from_concensus_seq = position_table.apply(
        lambda row: sum(row != concensus_seq), axis=1)
    distance_from_concensus_seq_sorted = distance_from_concensus_seq.sort_values(
        ascending=False)
    distance_from_concensus_seq_sorted

    ### Select 10 sequences to do our first analysis

    subset_seqs = distance_from_concensus_seq_sorted[:10].index
    subset_seqs

    ### Construct a distance matrix for our sequences

    distances = {}
    for i, seqid1 in enumerate(subset_seqs):
        distances[seqid1, seqid1] = 0
        for j in range(i + 1, len(subset_seqs)):
            seqid2 = subset_seqs[j]
            distances[seqid1, seqid2] = sum(
                position_table.loc[seqid1] != position_table.loc[seqid2])
            distances[seqid2, seqid1] = distances[seqid1, seqid2]
    distances = pd.Series(distances).unstack()

    matrix = np.tril(distances.values).tolist()
    for i in range(len(matrix)):
        matrix[i] = matrix[i][:i + 1]
    dm = DistanceMatrix(list(distances.index), matrix)

    ### Now construct our tree
    constructor = DistanceTreeConstructor()
    tree = constructor.nj(dm)
    print(country.upper())
    print("Neighbor Joining Tree")
    tree.ladderize()  # Flip branches so deeper clades are displayed at top
    display(Phylo.draw(tree))
    #**Please see the guidance at the top of the page for what to try**

    if (len(dm) > 1):
        tree2 = constructor.upgma(dm)
        #Construction of a distance tree using clustering with the Unweighted Pair Group Method with Arithmatic Mean (UPGMA) -- stepwise differences
        print("UPGMA Tree")
        tree2.ladderize(
        )  # Flip branches so deeper clades are displayed at top
        display(Phylo.draw(tree2))
    return
Esempio n. 22
0
def phyloxml_from_msa(msa, phyloxml):
    from Bio import AlignIO
    from Bio.Phylo.TreeConstruction import DistanceCalculator
    from Bio.Phylo.TreeConstruction import DistanceTreeConstructor
    from Bio import Phylo
    ms_alignment = AlignIO.read(msa, "fasta")
    calculator = DistanceCalculator("ident")
    dist_matrix = calculator.get_distance(ms_alignment)
    constructor = DistanceTreeConstructor()
    tree = constructor.upgma(dist_matrix)
    Phylo.write(tree, phyloxml, "phyloxml")
Esempio n. 23
0
    def draw(self):
        """
        visualize the phylo tree
        """
        mat = list(
            map(lambda x: list(filter(lambda x: x > 0, x)),
                self.distMat.tolist()))
        constructor = DistanceTreeConstructor()
        upgmatree = constructor.upgma(DistanceMatrix(self.names, mat))

        Phylo.draw_ascii(upgmatree)
Esempio n. 24
0
def build_tree_UPGMA(msa, distanceMatrix=None):
    if not distanceMatrix:
        distCalculator = DistanceCalculator("identity")
        distanceMatrix = distCalculator.get_distance(msa)
    # Construct the tree with the distance Matrix
    constructor = DistanceTreeConstructor()
    tree = constructor.upgma(distanceMatrix)
    # Make the tree rooted
    #tree.root_at_midpoint()
    #return newick format
    return "[&R] " + tree.format("newick").strip()
Esempio n. 25
0
 def get_dn_ds_tree(self, dn_ds_method="NG86", tree_method="UPGMA"):
     """Method for constructing dn tree and ds tree.
     Argument:
         -   dn_ds_method - Available methods include NG86, LWL85, YN00
                            and ML.
         -   tree_method  - Available methods include UPGMA and NJ.
     """
     from Bio.Phylo.TreeConstruction import DistanceTreeConstructor
     dn_dm, ds_dm = self.get_dn_ds_matrix(method=dn_ds_method)
     dn_constructor = DistanceTreeConstructor()
     ds_constructor = DistanceTreeConstructor()
     if tree_method == "UPGMA":
         dn_tree = dn_constructor.upgma(dn_dm)
         ds_tree = ds_constructor.upgma(ds_dm)
     elif tree_method == "NJ":
         dn_tree = dn_constructor.nj(dn_dm)
         ds_tree = ds_constructor.nj(ds_dm)
     else:
         raise RuntimeError("Unkown tree method ({0}). Only NJ and UPGMA "
                            "are accepted.".format(tree_method))
     return dn_tree, ds_tree
Esempio n. 26
0
def UPGMA_tree_reconstruction(dm):
    '''
    input:
        dm: distance matrix
    
    output:
        tree: reconstructed tree
    '''
    constructor = DistanceTreeConstructor()
    tree = constructor.upgma(dm)
    #tree = constructor.nj(dm)
    print(tree)
    return(tree)
Esempio n. 27
0
    def display(self):
        # Create description to be shown on the tree
        self.create_description_labels()

        # Print the distance Matrix
        print('\nDistance Matrix\n===================')
        print(self.distance_matrix)

        # Construct the phylogenetic tree using UPGMA algorithm
        constructor = DistanceTreeConstructor()
        tree = constructor.upgma(self.distance_matrix)

        self.draw_tree(tree)
Esempio n. 28
0
def construct_tree(align, ssr_regions, motifs, weights=[1, 0.1]):
    """
    Construct an upgma tree based on a pairwise Levenshtein distance matrix.
    For each pairwise comparison, the Levenshtein distances are calculated
    for sequences of non-SSR and SSR regions separately, and the weighted
    sum of them are used as the distance to construct an upgma tree. By default,
    weights for non-SSR and SSR regions are 1 and 0.1, respectively. In SSR
    regions, one repeat difference is considered to be one edit distance.

    Parameters
    ----------
    align: Bio.AlignIO.MultipleSeqAlignment
        input sequence alignment
    ssr_regions: list of tuple
        start and end positions of SSR regions in the alignment
    motifs: list
        repeat motifs
    weights: list
        weights for non-SSR and SSR regions to culculate pairwise distances
        (default: [1, 0.1])
    """
    non_ssr_seqs = []
    ssr_seqs = []
    for a in align:
        seq = str(a.seq.upper())
        ssr_idx = np.array(list(chain(*[list(range(*x))
                                        for x in ssr_regions])))
        non_ssr_idx = list(set(range(len(seq))) - set(ssr_idx))
        seq_arr = np.array(list(seq))
        non_ssr_seq = "".join(seq_arr[non_ssr_idx])
        non_ssr_seqs.append(non_ssr_seq)

        ssr_seq = ""
        for rr, mot in zip(ssr_regions, motifs):
            ssr_seq += seq[rr[0]:rr[1]].replace("-", "").replace(mot, "x")
        ssr_seqs.append(ssr_seq)

    mat1 = pairwise_dist_Levenstein(non_ssr_seqs)
    mat2 = pairwise_dist_Levenstein(ssr_seqs)

    mat = [
        list(np.array(i) * weights[0] + np.array(j) * weights[1])
        for i, j in zip(mat1, mat2)
    ]
    names = ["seq{}".format(i) for i in range(len(align))]
    dmat = _DistanceMatrix(names, mat)

    constructor = DistanceTreeConstructor()
    return constructor.upgma(dmat)
Esempio n. 29
0
def ex01():
    alignments = get_alignments()
    calculator = DistanceCalculator('blosum62')
    constructor = DistanceTreeConstructor()

    for a, name in alignments:
        dist_matrix = calculator.get_distance(a)
        upgma_tree = constructor.upgma(dist_matrix)
        nj_tree = constructor.nj(dist_matrix)

        print("\n\n>>> {}".format(name))
        # print(dist_matrix)
        # draw_ascii(upgma_tree)
        # draw_ascii(nj_tree)
        draw(upgma_tree)
        draw(nj_tree)
Esempio n. 30
0
    def onClick5(self):
        with open("Conjunto_fasta.aln", "r") as aln:
            #usar AlignIO tpara leer el archivo de alineamiento en formato 'clustal' format
            alignment = align.read(aln, "clustal")
        #calcular la  matriz de distancias
        calculator = DistanceCalculator('identity')
        # añade la matriz de  distancias al objeto calculator y lo retorna
        dm = calculator.get_distance(alignment)

        #Construir el arbol filogenetico aprtir de las distancias
        constructor = DistanceTreeConstructor(calculator)

        upgma_tree = constructor.upgma(dm)

        Phylo.draw_ascii(upgma_tree)
        Phylo.draw(upgma_tree)
def run_optimization():
    '''
    '''
    
    params = get_data()
    
    
    num_samples = 16
    

#---------------------------------------------------------------------------------------------------------------------------------------------------    
    NUM_OF_VERTICES = 200
    
    
    distances = np.zeros((num_samples, num_samples))
    
    for i in range(num_samples):
        for j in range(i + 1, num_samples): 
            print("working on the pair", (i, j))
            distances[i, j] = np.abs(compare_curves(params[i], params[j], num_of_verts=NUM_OF_VERTICES))
            distances[j, i] = distances[i,j]
#---------------------------------------------------------------------------------------------------------------------------------------------------  
                
 
    
# Plot distance matrix and make phylogenetic tree
#---------------------------------------------------------------------------------------------------------------------------------------------------    
    plt.matshow(distances)
    plt.colorbar()
    plt.show
    
    distaceMat = [list(distances[i, :i+1]) for i in range(16)]
    
    distaceMatrix = DistanceMatrix(names=['a1', 'a2', 'a3', 'a4', 'b1', 'b2', 'b3', 'b4', 'c1', 'c2', 'c3', 'c4', 'd1', 'd2', 'd3', 'd4'],
                                   matrix=distaceMat)
    
    constructor = DistanceTreeConstructor()
    
    tree_up = constructor.upgma(distaceMatrix)
    
    tree_nj = constructor.nj(distaceMatrix)
    
    Phylo.draw_ascii(tree_nj)
    
    Phylo.draw_ascii(tree_up)
    
    return distances
class DistanceTreeConstructorTest(unittest.TestCase):
    """Test DistanceTreeConstructor."""

    def setUp(self):
        self.aln = AlignIO.read("TreeConstruction/msa.phy", "phylip")
        calculator = DistanceCalculator("blosum62")
        self.dm = calculator.get_distance(self.aln)
        self.constructor = DistanceTreeConstructor(calculator)

    def test_upgma(self):
        tree = self.constructor.upgma(self.dm)
        self.assertIsInstance(tree, BaseTree.Tree)
        # tree_file = StringIO()
        # Phylo.write(tree, tree_file, 'newick')
        ref_tree = Phylo.read("./TreeConstruction/upgma.tre", "newick")
        self.assertTrue(Consensus._equal_topology(tree, ref_tree))
        # ref_tree.close()

    def test_nj(self):
        tree = self.constructor.nj(self.dm)
        self.assertIsInstance(tree, BaseTree.Tree)
        # tree_file = StringIO()
        # Phylo.write(tree, tree_file, 'newick')
        ref_tree = Phylo.read("./TreeConstruction/nj.tre", "newick")
        self.assertTrue(Consensus._equal_topology(tree, ref_tree))
        # ref_tree.close()

        # create a matrix of length 2
        calculator = DistanceCalculator("blosum62")
        self.min_dm = calculator.get_distance(self.aln)
        for i in range(len(self.min_dm) - 2):
            del self.min_dm[len(self.min_dm) - 1]

        min_tree = self.constructor.nj(self.min_dm)
        self.assertIsInstance(min_tree, BaseTree.Tree)

        ref_min_tree = Phylo.read("./TreeConstruction/nj_min.tre", "newick")
        self.assertTrue(Consensus._equal_topology(min_tree, ref_min_tree))

    def test_built_tree(self):
        tree = self.constructor.build_tree(self.aln)
        self.assertIsInstance(tree, BaseTree.Tree)
        # tree_file = StringIO()
        # Phylo.write(tree, tree_file, 'newick')
        ref_tree = Phylo.read("./TreeConstruction/nj.tre", "newick")
        self.assertTrue(Consensus._equal_topology(tree, ref_tree))
Esempio n. 33
0
def get_phylogenetic_tree(max_str_len=1,
                          norm="JSD",
                          cpc_function="Square25",
                          joining_alg="nj"):
    desc, genes = iter_over_files()
    pm = pd_matrix(genes,
                   max_str_len=max_str_len,
                   norm=norm,
                   cpc_function="Square25")
    pm = convert_triangle(pm)
    dm = DistanceMatrix(names=desc, matrix=pm)
    constructor = DistanceTreeConstructor()
    if (joining_alg == "nj"):
        tree = constructor.nj(dm)
    elif (joining_alg == "upgma"):
        tree = constructor.upgma(dm)
    Phylo.write(tree, 'phylo-tree/result.xml', 'newick')
Esempio n. 34
0
def main():
    dist_mat = ParseMatrix(args.DISTMAT)

    if args.distout is True:
        print 'Distance Matrix:'
        print dist_mat

    tree_constructor = DistanceTreeConstructor()
    if args.method == 'nj':
        tree = tree_constructor.nj(dist_mat)
    elif args.method == 'upgma':
        tree = tree_constructor.upgma(dist_mat)

    if args.draw is True:
        Phylo.draw(tree)

    #Write NEWICK file
    Phylo.write(tree, args.out + '.tree', args.outfmt)
Esempio n. 35
0
    def tree_from_scores(list_with_scores):
        """Generates Guide_tree object from list of pairwise scoring input from graph matching algorithms.
	
			Parameters
			----------
			list with scores : scores from the pairwise alignments of the graphs. Example for three graphs a, b, c: [["a", "b", 2], ["a", "c", 4], ["b", "c", 3]]
		
			Output
			------
			Guide_tree object
		"""
        matrix = Guide_tree_Generator.score_to_matrix(list_with_scores)
        constructor = DistanceTreeConstructor()
        upgmatree = constructor.upgma(matrix)
        tree = Phylo.to_networkx(upgmatree)
        guide_tree = Guide_tree(tree)

        return guide_tree
Esempio n. 36
0
    def tree_from_random(list_of_scores):
        """Generates a random guide tree for MGA.
	
			Parameters
			----------
			list_of_scores : scores from the pairwise alignments of the graphs to get graph names. Example for three graphs a, b, c: [["a", "b", 2], ["a", "c", 4], ["b", "c", 3]]
		
			Output
			------
			Guide_tree object
		"""
        names = Guide_tree_Generator.make_graph_list(list_of_scores)
        matrix = Guide_tree_Generator.random_score_matrix(names)
        constructor = DistanceTreeConstructor()
        upgmatree = constructor.upgma(matrix)
        tree = Phylo.to_networkx(upgmatree)
        guide_tree = Guide_tree(tree)

        return guide_tree
Esempio n. 37
0
def build_tree(dist_matrix, names_list, clust):

    tree = None
    if clust == 'nj':
        # print(dist_matrix)
        dm = DistanceMatrix(dist_matrix, names_list)
        tree_scikit = nj(dm,result_constructor=str)
        tree = Tree(tree_scikit)
    elif clust == 'upgma':
        dm = _DistanceMatrix(names=names_list, matrix=condense_matrix(dist_matrix))
        constructor = DistanceTreeConstructor()
        tree_biopython = constructor.upgma(dm)
        # remove InnerNode names
        for i in tree_biopython.get_nonterminals():
            i.name = None
        output = StringIO()
        Phylo.write(tree_biopython,output, "newick")
        tree = Tree(output.getvalue())
    else:
        print("Unknown tree clustering method ! Aborting")
        sys.exit()

    return tree
Esempio n. 38
0
def D_F_matrix(D_Seq,D_net,final_tree):

    names_Seq = D_Seq.names
    names_Net = D_net.names
    D_F = []
    D_F_names = []

    for key1 in names_Net:
        i = names_Net.index(key1)
        #print key1
        temp_row = []
        for j in range(0,i+1):
            
            
            key2 = names_Net[j]
            #print key2,
            if key1 in names_Net and key2 in names_Seq:
                if not key1 in D_F_names:
                    D_F_names.append(key1)
                i1 = names_Net.index(key1)
                j2 = names_Net.index(key2)
                new_val = (0.5*D_net[key1,key2] + 0.5*D_Seq[key1,key2])
                #print new_val,
                temp_row.append(new_val)
        #print temp_row
        D_F.append(temp_row)

    print D_F 

    D_F_final = _DistanceMatrix(D_F_names,D_F)

    constructor = DistanceTreeConstructor()
    tree_D_F = constructor.upgma(D_F_final)
    #print tree_dmc
    Phylo.write(tree_D_F,final_tree,'newick')
    return D_F_final
Esempio n. 39
0
def D_F_matrix(D_Seq,D_net,final_tree, alpha):

    names_Seq = D_Seq.names
    names_Net = D_net.names
    D_F = []
    D_F_names = []

    for key1 in names_Net:
        i = names_Net.index(key1)
        #print key1
        temp_row = []
        for j in range(0,i+1):


            key2 = names_Net[j]
            #print key2,
            if key1 in names_Net and key2 in names_Seq:
                if not key1 in D_F_names:
                    D_F_names.append(key1)
                i1 = names_Net.index(key1)
                j2 = names_Net.index(key2)                              # should be 1-alpha * D_net and alpha * D_seq
                new_val = ((1-alpha) * D_net[key1,key2]) + (alpha * D_Seq[key1,key2])  # alpha can be set to any value (between 0 and 1)
                #print new_val,                                          # we can change alpha to choose how much of D_Seq and D_net we want to use
                temp_row.append(new_val)
        #print temp_row
        D_F.append(temp_row)

    print D_F

    D_F_final = _DistanceMatrix(D_F_names,D_F)

    constructor = DistanceTreeConstructor()
    tree_D_F = constructor.upgma(D_F_final)
    #print tree_dmc
    Phylo.write(tree_D_F,final_tree,'newick')
    return D_F_final
def NNIheuristic(FASTAFile, sampleSize, threshold, outputDir):
    """"Find the maximum parsimony score for that tree"""
    random.seed(0)
    outputFile = FASTAFile.replace(".align", ".out")
    if "/" in outputFile:
        outputFile = outputFile[outputFile.rfind("/"):]
    output = open(outputDir + "/" + outputFile, 'w')
    output.write("*****************RUN STARTS HERE!*****************")
    #start time
    startTime = time.clock()
    output.write("\n" + "Filename: " + FASTAFile + "\n")
    output.write("Program Start: {:%Y-%m-%d %H:%M:%S}".format(datetime.datetime.now()) + "\n")
    output.write("Sample Size: " + str(sampleSize) + "\nThreshold: " + str(threshold) + "\n\n")
    # Import fasta alignment file
    myAlignment = AlignIO.read(FASTAFile, "fasta")
    
    # Create a tip mapping from the fasta file
    tipMapping = {}
    for record in myAlignment:
        tipMapping[record.id] = str(record.seq)
        
    # Compute a distance matrix and construct tree
    calculator = DistanceCalculator("identity") 
    myMatrix = calculator.get_distance(myAlignment)
    output.write("matrix constructed here")
    constructor = DistanceTreeConstructor()
    upgmaTree = constructor.upgma(myMatrix)
    
    output.write("constructed upgma tree")
        
    # Convert phyloxml tree to newick
    # biopython does not provide a function to do this so it was necessary
    # to write to a buffer in newick to convert then get rid of unneeded info
    for clade in upgmaTree.get_terminals():
        clade.name = "\"" + clade.name + "\""
    buf = cStringIO.StringIO()
    Phylo.write(upgmaTree, buf, 'newick', plain = True)
    tree = buf.getvalue()
    tree = re.sub(r'Inner\d*', '', tree)
    tree = tree.replace(";", "")
    tree = literal_eval(tree)    #newick format
    output.write("created the original tree into newick format")

    # RLR tree required for maxParsimony function
    tree = NewicktoRLR(tree)
    score = maxParsimony(tree, tipMapping)
    graph = nx.Graph()
    makeGraph(graph, tree)
    output.write("made a graph")
    leaves = getLeaves(tree)
    currentFeasible = isFeasible(graph,leaves)
    
    output.write("tested isFeasible")
    
    # Perform NNI heuristic
    counter = 0
    loopCounter = 0
    while True:
        output.write("in the while loop")
        loopCounter += 1
        output.write("Loop Iteration: " + str(loopCounter) + "\n")
        output.write("Loop Start Time: {:%H:%M:%S}".format(datetime.datetime.now()) + "\n")
        output.write("Current Tree\nFeasibility: " + str(currentFeasible) + "\nScore: " + str(score) + "\nTree:\n" + str(tree) + "\n\n")
        NNIs = allNNIs(tree)
        if len(NNIs)-1 < sampleSize:
            sampleSize = len(NNIs)-1
        toScore = random.sample(NNIs, sampleSize)
        
        # add feasibility test
        output.write("starting feasibility test")
        feasible = []
        infeasible = []
        for tree in toScore:
            graph = nx.Graph()
            makeGraph(graph, tree)
            leaves = getLeaves(tree)
            if isFeasible(graph, leaves): #if this tree is possible
                feasible.append(tree)
            else:
                infeasible.append(tree) #if this tree is not possible
        output.write("Number of Feasible Neighbor Trees: " + str(len(feasible)) + "\n")
        output.write("Number of Infeasible Neighbor Trees: " + str(len(infeasible)) + "\n")
        if len(feasible) != 0: #if feasible trees were found
            if isFeasible(graph, leaves): #if this NNI is possible
                feasible.append(tree) 
            else:
                infeasible.append(tree) #if this NNI is not possible
        if len(feasible) != 0: #if feasible NNIs were found
            scoredList = map(lambda x: (maxParsimony(x, tipMapping), x), feasible)
            sortedList = sorted(scoredList)
            counter = 0
            if not currentFeasible or sortedList[0][0] < score:
                score = sortedList[0][0]
                tree = sortedList[0][1]
                currentFeasible = True
                output.write("Found a New Feasible Tree!\n\n")
            else:
                output.write("Best Possible Feasible Tree Found\n" + str(tree) + "\n" + "Score: " + str(score) + "\n\n")
                break
        else: #if no possible trees we're found
            if currentFeasible: #checks if the original tree was feasible
                output.write("No Feasible Neighbors, Best Possible Feasible Tree\n" + str(tree) + "\n\n")
                break
            counter += 1
            output.write("Threshold counter: " + str(counter) + "\n\n")
            if counter >= threshold:
                output.write("Threshold Met: No Feasible Tree Found\n")
                stopTime = (time.clock() - startTime)
                output.write("Program Stop: " + str(stopTime) + " seconds\n\n")
                return
            output.write("Searching Infeasible Space\n")
            scoredList = map(lambda x: (maxParsimony(x, tipMapping), x), infeasible)
            sortedList = sorted(scoredList)
            choseNeighbor = False    
            for neighbor in sortedList: #if the original tree was infeasible and no feasible neighbors were found, take the next best infeasible tree and run again
                if neighbor[0] > score:
                    score = neighbor[0]
                    tree = neighbor[1]
                    choseNeighbor = True
                    break
            if not choseNeighbor: 
                score = sortedList[-1][0]
                tree = sortedList[-1][1]
            currentFeasible = False
            output.write("Next Best Infeasible Tree\n\n")
    endTime = (time.clock() - startTime)
    output.write("Program End: " + str(endTime) + " seconds\n\n")
                
    #outputTree = RLRtoNewick(tree)
    #print "Final score", score
    return
Esempio n. 41
0
## pad sequences so that they all have the same length
#for record in records:
#    if len(record.seq) != maxlen:
#        sequence = str(record.seq).ljust(maxlen, '.')
#        record.seq = Seq.Seq(sequence)
#assert all(len(record.seq) == maxlen for record in records)

## write to temporary file and do alignment
#output_file = '{}_padded.fasta'.format(os.path.splitext(input_file)[0])
#with open(output_file, 'w') as f:
#    SeqIO.write(records, f, 'fasta')
#alignment = AlignIO.read(output_file, "fasta")

#cline = ClustalwCommandline("clustalw2", infile=input_file)
#print(cline)
#print type(cline)

muscle_cline = MuscleCommandline(input=input_file)
stdout, stderr = muscle_cline()
alignment = AlignIO.read(StringIO(stdout), "fasta")
print(alignment)

#alignment = AlignIO.read('../data/ls_orchid.fasta', 'fasta')
#print alignment
calculator = DistanceCalculator('ident')
dm = calculator.get_distance(alignment)
constructor = DistanceTreeConstructor()
tree = constructor.upgma(dm)
Phylo.write(tree, 'phyloxml.xml', 'phyloxml')
def noFeasibleTest(FASTAFile, sampleSize, outputDir):
    """"takes a FASTAFile, constructs a UPGMA Tree from the file data, converts this tree to RLR format,
    tries to find the tree with the lowest parsimony score (ignores feasibility check)"""
    random.seed(0)
    outputFile = FASTAFile.replace(".align", ".out")
    if "/" in outputFile:
        outputFile = outputFile[outputFile.rfind("/"):]
    output = open(outputDir + "/" + outputFile, 'w')
    output.write("*****************RUN STARTS HERE!*****************")
    #start time
    startTime = time.clock()
    output.write("\n" + "Filename: " + FASTAFile + "\n")
    output.write("Program Start: {:%Y-%m-%d %H:%M:%S}".format(datetime.datetime.now()) + "\n")
    output.write("Sample Size: " + str(sampleSize) + "\n\n")
    # Import fasta alignment file
    myAlignment = AlignIO.read(FASTAFile, "fasta")
    
    # Create a tip mapping from the fasta file
    tipMapping = {}
    for record in myAlignment:
        tipMapping[record.id] = str(record.seq)
        
    # Compute a distance matrix and construct tree
    calculator = DistanceCalculator("identity") 
    myMatrix = calculator.get_distance(myAlignment)
    constructor = DistanceTreeConstructor()
    upgmaTree = constructor.upgma(myMatrix)
        
    # Convert phyloxml tree to newick
    # biopython does not provide a function to do this so it was necessary
    # to write to a buffer in newick to convert then get rid of unneeded info
    for clade in upgmaTree.get_terminals():
        clade.name = "\"" + clade.name + "\""
    buf = cStringIO.StringIO()
    Phylo.write(upgmaTree, buf, 'newick', plain = True)
    tree = buf.getvalue()
    tree = re.sub(r'Inner\d*', '', tree)
    tree = tree.replace(";", "")
    tree = literal_eval(tree)    #newick format

    # RLR tree required for maxParsimony function
    tree = NNI.NewicktoRLR(tree)
    score = NNI.maxParsimony(tree, tipMapping)
        
    # Perform NNI heuristic
    loopCounter = 0
    while True:
        loopCounter += 1
        output.write("Loop Iteration: " + str(loopCounter) + "\n")
        output.write("Loop Start Time: {:%H:%M:%S}".format(datetime.datetime.now()) + "\n")
        output.write("Current Tree\nScore: " + str(score) + "\nTree:\n" + str(tree) + "\n\n")
        NNIs = NNI.allNNIs(tree)
        if len(NNIs)-1 < sampleSize:
            sampleSize = len(NNIs)-1
        toScore = random.sample(NNIs, sampleSize)
        
        scoredList = map(lambda x: (NNI.maxParsimony(x, tipMapping), x), toScore)
        sortedlist = sorted(scoredList)
        if sortedlist[0][0] < score:
            score = sortedlist[0][0]
            tree = sortedlist[0][1]
            output.write("Found A More Parsimonious Tree!\n\n")
            
        else:
            break
            output.write("No Neighbors With Better Scores Found\n\n")
    output.write("Final Tree:\n" + str(tree) + "\nScore: " + str(score) + "\n\n")
    endTime = (time.clock() - startTime)
    output.write("Program End: " + str(endTime) + " seconds\n\n")
    return
Esempio n. 43
0
# Creates the distance matrix
calculator = DistanceCalculator('ident')
dm_ape = calculator.get_distance(alignApe)
dm_hiv = calculator.get_distance(alignHIV)


# Jukes Cantor corrections
dm_ape_corrected = dm_ape
for d in dm_ape_corrected.matrix:
	d[:] = [-3/4*np.log(1-4/3*x) for x in d]

dm_hiv_corrected = dm_hiv
for d in dm_hiv_corrected.matrix:
	d[:] = [-3/4*np.log(1-4/3*x) for x in d]


# Constructs the tree using the upgma algorithm
constructor = DistanceTreeConstructor()

tree_ape = constructor.upgma(dm_ape)
tree_ape_corrected = constructor.upgma(dm_ape_corrected)

tree_hiv = constructor.upgma(dm_hiv)
tree_hiv_corrected = constructor.upgma(dm_hiv_corrected)

# Outputs the trees as a xml
Phylo.write(tree_ape, 'treeApe.xml', 'phyloxml')
Phylo.write(tree_ape_corrected, 'treeApe_corrected.xml', 'phyloxml')

Phylo.write(tree_hiv, 'treeHIV.xml', 'phyloxml')
Phylo.write(tree_hiv_corrected, 'treeHIV_corrected.xml', 'phyloxml')
Esempio n. 44
0
def compute_tree(options, mat, names):
    """ make upgma hierarchical clustering and write it as png and
    graphviz dot
    """
    # oops, convert to biopython matrix
    matrix = []
    for i in xrange(len(names)):
        row = []
        for j in xrange(i + 1):
            # tree constructor writes 0-distances as 1s for some reason
            # so we hack around here
            val = float(mat[names[i]][names[j]])
            if val == 0.:
                val = 1e-10
            elif val == 1.:
                val = 1.1
            row.append(val)
        matrix.append(row)
    dm = _DistanceMatrix(names, matrix)

    # upgma tree
    constructor = DistanceTreeConstructor()
    tree = constructor.upgma(dm)
    robust_makedirs(os.path.dirname(tree_path(options)))
    Phylo.write(tree, tree_path(options), "newick")

    # png tree -- note : doesn't work in toil
    def f(x):
        if "Inner" in str(x):
            return ""
        else:
            return x
    Phylo.draw_graphviz(tree, label_func = f, node_size=1000, node_shape="s", font_size=10)
    pylab.savefig(tree_path(options).replace("newick", "png"))

    # graphviz
    # get networkx graph
    nxgraph = Phylo.to_networkx(tree)
    # make undirected
    nxgraph = nx.Graph(nxgraph)
    # push names to name labels
    nxgraph = nx.convert_node_labels_to_integers(nxgraph, label_attribute="label")
    for node_id in nxgraph.nodes():
        node = nxgraph.node[node_id]
        if "Inner" in str(node["label"]):
            node["label"] = "\"\""
            node["width"] = 0.001
            node["height"] = 0.001
        else:
            node["fontsize"] = 18
    for edge_id in nxgraph.edges():
        edge = nxgraph.edge[edge_id[0]][edge_id[1]]
        # in graphviz, weight means something else, so make it a label
        weight = float(edge["weight"])
        # undo hack from above
        if weight > 1:
            weight = 1.
        if weight <= 1e-10 or weight == 1.:
            weight = 0.
        edge["weight"] = None
        edge["label"] = "{0:.3g}".format(float(weight) * 100.)
        edge["fontsize"] = 14
        edge["len"] = draw_len(weight)
    nx.write_dot(nxgraph, tree_path(options).replace("newick", "dot"))