def unifrac_tasks_from_matrix(u, env_names, modes=UNIFRAC_DEFAULT_MODES):
    """Returns the UniFrac matrix, PCoA, and/or cluster from the matrix."""
    result = {}

    if UNIFRAC_DIST_MATRIX in modes:
        result[UNIFRAC_DIST_MATRIX] = (u, env_names)

    if UNIFRAC_PCOA in modes:
        point_matrix, eigvals = principal_coordinates_analysis(u)
        result[UNIFRAC_PCOA] = output_pca(point_matrix, eigvals, env_names)

    if UNIFRAC_CLUST_ENVS in modes:
        nodes = map(PhyloNode, env_names)
        BIG = 1e305
        U = u.copy()
        for i in range(len(U)):
            U[i, i] = BIG
        c = UPGMA_cluster(U, nodes, BIG)
        result[UNIFRAC_CLUST_ENVS] = c

    if UNIFRAC_NJ_ENVS in modes:
        c = nj(dists_to_nj(u, env_names))
        result[UNIFRAC_NJ_ENVS] = c

    return result
def get_clusters(x_original, axis=['row','column'][0]):
    """Performs UPGMA clustering using euclidean distances"""
    x = x_original.copy()
    if axis=='column':
        x = x.T
    nr = x.shape[0]
    metric_f = get_nonphylogenetic_metric('euclidean')
    row_dissims = metric_f(x)
    # do upgma - rows
    BIG = 1e305
    row_nodes = map(PhyloNode, map(str,range(nr)))
    for i in range(len(row_dissims)):
        row_dissims[i,i] = BIG
    row_tree = UPGMA_cluster(row_dissims, row_nodes, BIG)
    row_order = [int(tip.Name) for tip in row_tree.iterTips()]
    return row_order
Esempio n. 3
0
def get_clusters(x_original, axis=['row', 'column'][0]):
    """Performs UPGMA clustering using euclidean distances"""
    x = x_original.copy()
    if axis == 'column':
        x = x.T
    nr = x.shape[0]
    metric_f = get_nonphylogenetic_metric('euclidean')
    row_dissims = metric_f(x)
    # do upgma - rows
    BIG = 1e305
    row_nodes = map(PhyloNode, map(str, range(nr)))
    for i in range(len(row_dissims)):
        row_dissims[i, i] = BIG
    row_tree = UPGMA_cluster(row_dissims, row_nodes, BIG)
    row_order = [int(tip.Name) for tip in row_tree.iterTips()]
    return row_order
Esempio n. 4
0
 def test_upgma_cluster(self):
     """UPGMA_cluster clusters nodes based on info in a matrix with UPGMA
     """
     matrix = self.matrix
     node_order = self.node_order
     large_number = 9999999999
     tree = UPGMA_cluster(matrix, node_order, large_number)
     self.assertEqual(str(tree), \
             '(((a:0.5,b:0.5):1.75,c:2.25):5.875,(d:1.0,e:1.0):7.125);')
Esempio n. 5
0
def single_file_upgma(input_file, output_file):
    # read in dist matrix
    f = open(input_file, 'U')
    headers, data = parse_distmat(f)
    f.close()
    
    # do upgma
    nodes = map(PhyloNode, headers)
    BIG = 1e305
    U = data.copy()
    for i in range(len(U)):
        U[i,i] = BIG
    c = UPGMA_cluster(U, nodes, BIG)

    # write output
    f = open(output_file,'w')
    f.write(c.getNewick(with_distances=True))
    f.close()
Esempio n. 6
0
 def test_UPGMA_cluster_diag(self):
     """UPGMA_cluster works when the diagonal has intermediate values
     """
     #test that checking the diagonal works
     matrix = self.matrix_five
     node_order = self.node_order
     large_number = 9999999999
     tree = UPGMA_cluster(matrix, node_order, large_number)
     self.assertEqual(str(tree), \
             '(((a:0.5,b:0.5):1.75,c:2.25):5.875,(d:1.0,e:1.0):7.125);')
def single_file_upgma(input_file, output_file):
    # read in dist matrix
    f = open(input_file, 'U')
    headers, data = parse_distmat(f)
    f.close()
    
    # do upgma
    nodes = map(PhyloNode, headers)
    BIG = 1e305
    U = data.copy()
    for i in range(len(U)):
        U[i,i] = BIG
    c = UPGMA_cluster(U, nodes, BIG)

    # write output
    f = open(output_file,'w')
    try:
        f.write(c.getNewick(with_distances=True))
    except AttributeError:
        if c == None:
            raise RuntimeError("""input file %s did not make a UPGMA tree.
 Ensure it has more than one sample present""" % (str(input_file),))
        raise
    f.close()
def single_file_upgma(input_file, output_file):
    # read in dist matrix
    f = open(input_file, 'U')
    headers, data = parse_distmat(f)
    f.close()

    # do upgma
    nodes = map(PhyloNode, headers)
    BIG = 1e305
    U = data.copy()
    for i in range(len(U)):
        U[i, i] = BIG
    c = UPGMA_cluster(U, nodes, BIG)

    # write output
    f = open(output_file, 'w')
    try:
        f.write(c.getNewick(with_distances=True))
    except AttributeError:
        if c == None:
            raise RuntimeError("""input file %s did not make a UPGMA tree.
 Ensure it has more than one sample present""" % (str(input_file), ))
        raise
    f.close()