Esempio n. 1
0
    def test_linkage_matrix(self):
        # Ensure matches: http://www.southampton.ac.uk/~re1u06/teaching/upgma/
        id_list = ['A', 'B', 'C', 'D', 'E', 'F', 'G']
        linkage = np.asarray([[1.0, 5.0, 1.0, 2.0], [0.0, 3.0, 8.0, 2.0],
                              [6.0, 7.0, 12.5, 3.0], [8.0, 9.0, 16.5, 5.0],
                              [2.0, 10.0, 29.0, 6.0], [4.0, 11.0, 34.0, 7.0]])

        tree = TreeNode.from_linkage_matrix(linkage, id_list)
        self.assertEquals(
            "(E:17.0,(C:14.5,((A:4.0,D:4.0):4.25,(G:6.25,(B:0.5,"
            "F:0.5):5.75):2.0):6.25):2.5);",
            tree.to_newick(with_distances=True))
Esempio n. 2
0
def get_clusters(x_original, axis=['row', 'column'][0]):
    """Performs UPGMA clustering using euclidean distances"""
    x = x_original.copy()
    if axis == 'column':
        x = x.T
    nr = x.shape[0]
    metric_f = get_nonphylogenetic_metric('euclidean')
    row_dissims = DistanceMatrix(metric_f(x), map(str, range(nr)))
    # do upgma - rows
    # Average in SciPy's cluster.heirarchy.linkage is UPGMA
    linkage_matrix = linkage(row_dissims.condensed_form(), method='average')
    tree = TreeNode.from_linkage_matrix(linkage_matrix, row_dissims.ids)
    row_order = [int(tip.name) for tip in tree.tips()]
    return row_order
Esempio n. 3
0
    def test_linkage_matrix(self):
        # Ensure matches: http://www.southampton.ac.uk/~re1u06/teaching/upgma/
        id_list = ['A', 'B', 'C', 'D', 'E', 'F', 'G']
        linkage = np.asarray([[1.0,  5.0,  1.0,  2.0],
                              [0.0,  3.0,  8.0,  2.0],
                              [6.0,  7.0, 12.5,  3.0],
                              [8.0,  9.0, 16.5,  5.0],
                              [2.0, 10.0, 29.0,  6.0],
                              [4.0, 11.0, 34.0,  7.0]])

        tree = TreeNode.from_linkage_matrix(linkage, id_list)
        self.assertEqual("(E:17.0,(C:14.5,((A:4.0,D:4.0):4.25,(G:6.25,(B:0.5,"
                         "F:0.5):5.75):2.0):6.25):2.5);",
                         tree.to_newick(with_distances=True))
Esempio n. 4
0
def get_clusters(x_original, axis=['row', 'column'][0]):
    """Performs UPGMA clustering using euclidean distances"""
    x = x_original.copy()
    if axis == 'column':
        x = x.T
    nr = x.shape[0]
    metric_f = get_nonphylogenetic_metric('euclidean')
    row_dissims = DistanceMatrix(metric_f(x), map(str, range(nr)))
    # do upgma - rows
    # Average in SciPy's cluster.heirarchy.linkage is UPGMA
    linkage_matrix = linkage(row_dissims.condensed_form(), method='average')
    tree = TreeNode.from_linkage_matrix(linkage_matrix, row_dissims.ids)
    row_order = [int(tip.name) for tip in tree.tips()]
    return row_order
Esempio n. 5
0
def single_file_upgma(input_file, output_file):
    # read in dist matrix
    dist_mat = DistanceMatrix.from_file(input_file)

    # SciPy uses average as UPGMA:
    # http://docs.scipy.org/doc/scipy/reference/generated/
    #    scipy.cluster.hierarchy.linkage.html#scipy.cluster.hierarchy.linkage
    linkage_matrix = linkage(dist_mat.condensed_form(), method='average')

    tree = TreeNode.from_linkage_matrix(linkage_matrix, dist_mat.ids)

    # write output
    f = open(output_file, 'w')
    try:
        f.write(tree.to_newick(with_distances=True))
    except AttributeError:
        if c is None:
            raise RuntimeError("""input file %s did not make a UPGMA tree.
 Ensure it has more than one sample present""" % (str(input_file), ))
        raise
    f.close()
def single_file_upgma(input_file, output_file):
    # read in dist matrix
    dist_mat = DistanceMatrix.from_file(input_file)

    # SciPy uses average as UPGMA:
    # http://docs.scipy.org/doc/scipy/reference/generated/
    #    scipy.cluster.hierarchy.linkage.html#scipy.cluster.hierarchy.linkage
    linkage_matrix = linkage(dist_mat.condensed_form(), method='average')

    tree = TreeNode.from_linkage_matrix(linkage_matrix, dist_mat.ids)

    # write output
    f = open(output_file, 'w')
    try:
        f.write(tree.to_newick(with_distances=True))
    except AttributeError:
        if c is None:
            raise RuntimeError("""input file %s did not make a UPGMA tree.
 Ensure it has more than one sample present""" % (str(input_file),))
        raise
    f.close()