Example #1
0
    def test_procrustes2(self):
        """procrustes disparity should not depend on order of matrices"""
        m1, m3, disp13 = procrustes(self.data1, self.data3)
        m3_2, m1_2, disp31 = procrustes(self.data3, self.data1)
        np.testing.assert_almost_equal(disp13, disp31)

        # try with 3d, 8 pts per
        rand1 = np.array([[2.61955202, 0.30522265, 0.55515826],
                          [0.41124708, -0.03966978, -0.31854548],
                          [0.91910318, 1.39451809, -0.15295084],
                          [2.00452023, 0.50150048, 0.29485268],
                          [0.09453595, 0.67528885, 0.03283872],
                          [0.07015232, 2.18892599, -1.67266852],
                          [0.65029688, 1.60551637, 0.80013549],
                          [-0.6607528, 0.53644208, 0.17033891]])

        rand3 = np.array([[0.0809969, 0.09731461, -0.173442],
                          [-1.84888465, -0.92589646, -1.29335743],
                          [0.67031855, -1.35957463, 0.41938621],
                          [0.73967209, -0.20230757, 0.52418027],
                          [0.17752796, 0.09065607, 0.29827466],
                          [0.47999368, -0.88455717, -0.57547934],
                          [-0.11486344, -0.12608506, -0.3395779],
                          [-0.86106154, -0.28687488, 0.9644429]])
        res1, res3, disp13 = procrustes(rand1, rand3)
        res3_2, res1_2, disp31 = procrustes(rand3, rand1)
        np.testing.assert_almost_equal(disp13, disp31)
Example #2
0
    def test_procrustes2(self):
        """procrustes disparity should not depend on order of matrices"""
        m1, m3, disp13 = procrustes(self.data1, self.data3)
        m3_2, m1_2, disp31 = procrustes(self.data3, self.data1)
        np.testing.assert_almost_equal(disp13, disp31)

        # try with 3d, 8 pts per
        rand1 = np.array([[2.61955202,  0.30522265,  0.55515826],
                         [0.41124708, -0.03966978, -0.31854548],
                         [0.91910318,  1.39451809, -0.15295084],
                         [2.00452023,  0.50150048,  0.29485268],
                         [0.09453595,  0.67528885,  0.03283872],
                         [0.07015232,  2.18892599, -1.67266852],
                         [0.65029688,  1.60551637,  0.80013549],
                         [-0.6607528,  0.53644208,  0.17033891]])

        rand3 = np.array([[0.0809969,  0.09731461, -0.173442],
                         [-1.84888465, -0.92589646, -1.29335743],
                         [0.67031855, -1.35957463,  0.41938621],
                         [0.73967209, -0.20230757,  0.52418027],
                         [0.17752796,  0.09065607,  0.29827466],
                         [0.47999368, -0.88455717, -0.57547934],
                         [-0.11486344, -0.12608506, -0.3395779],
                         [-0.86106154, -0.28687488,  0.9644429]])
        res1, res3, disp13 = procrustes(rand1, rand3)
        res3_2, res1_2, disp31 = procrustes(rand3, rand1)
        np.testing.assert_almost_equal(disp13, disp31)
Example #3
0
    def test_summarize_pcoas(self):
        """summarize_pcoas works
        """
        master_pcoa = [['1', '2', '3'], \
            array([[-1.0, 0.0, 1.0], [2.0, 4.0, -4.0]]), \
            array([.76, .24])]
        jn1 = [['1', '2', '3'], \
            array([[1.2, 0.1, -1.2],[-2.5, -4.0, 4.5]]), \
            array([0.80, .20])]
        jn2 = [['1', '2', '3'], \
            array([[-1.4, 0.05, 1.3],[2.6, 4.1, -4.7]]), \
            array([0.76, .24])]
        jn3 = [['1', '2', '3'], \
            array([[-1.5, 0.05, 1.6],[2.4, 4.0, -4.8]]), \
            array([0.84, .16])]
        jn4 = [['1', '2', '3'], \
            array([[-1.5, 0.05, 1.6],[2.4, 4.0, -4.8]]), \
            array([0.84, .16])]
        support_pcoas = [jn1, jn2, jn3, jn4]
        #test with the ideal_fourths option
        matrix_average, matrix_low, matrix_high, eigval_average, m_names = \
            summarize_pcoas(master_pcoa, support_pcoas, 'ideal_fourths',
                            apply_procrustes=False)
        self.assertEqual(m_names, ['1', '2', '3'])
        assert_almost_equal(matrix_average[(0, 0)], -1.4)
        assert_almost_equal(matrix_average[(0, 1)], 0.0125)
        assert_almost_equal(matrix_low[(0, 0)], -1.5)
        assert_almost_equal(matrix_high[(0, 0)], -1.28333333)
        assert_almost_equal(matrix_low[(0, 1)], -0.0375)
        assert_almost_equal(matrix_high[(0, 1)], 0.05)
        assert_almost_equal(eigval_average[0], 0.81)
        assert_almost_equal(eigval_average[1], 0.19)
        #test with the IQR option
        matrix_average, matrix_low, matrix_high, eigval_average, m_names = \
            summarize_pcoas(master_pcoa, support_pcoas, method='IQR',
                            apply_procrustes=False)
        assert_almost_equal(matrix_low[(0, 0)], -1.5)
        assert_almost_equal(matrix_high[(0, 0)], -1.3)

        #test with procrustes option followed by sdev
        m, m1, msq = procrustes(master_pcoa[1], jn1[1])
        m, m2, msq = procrustes(master_pcoa[1], jn2[1])
        m, m3, msq = procrustes(master_pcoa[1], jn3[1])
        m, m4, msq = procrustes(master_pcoa[1], jn4[1])
        matrix_average, matrix_low, matrix_high, eigval_average, m_names = \
            summarize_pcoas(master_pcoa, support_pcoas, method='sdev',
                            apply_procrustes=True)

        x = array([m1[0, 0], m2[0, 0], m3[0, 0], m4[0, 0]])
        self.assertEqual(x.mean(), matrix_average[0, 0])
        self.assertEqual(-x.std(ddof=1) / 2, matrix_low[0, 0])
        self.assertEqual(x.std(ddof=1) / 2, matrix_high[0, 0])
    def test_summarize_pcoas(self):
        """summarize_pcoas works
        """
        master_pcoa = [['1', '2', '3'], \
            array([[-1.0, 0.0, 1.0], [2.0, 4.0, -4.0]]), \
            array([.76, .24])]
        jn1 = [['1', '2', '3'], \
            array([[1.2, 0.1, -1.2],[-2.5, -4.0, 4.5]]), \
            array([0.80, .20])]
        jn2 = [['1', '2', '3'], \
            array([[-1.4, 0.05, 1.3],[2.6, 4.1, -4.7]]), \
            array([0.76, .24])]
        jn3 = [['1', '2', '3'], \
            array([[-1.5, 0.05, 1.6],[2.4, 4.0, -4.8]]), \
            array([0.84, .16])]
        jn4 = [['1', '2', '3'], \
            array([[-1.5, 0.05, 1.6],[2.4, 4.0, -4.8]]), \
            array([0.84, .16])]
        support_pcoas = [jn1, jn2, jn3, jn4]
        #test with the ideal_fourths option
        matrix_average, matrix_low, matrix_high, eigval_average, m_names = \
            summarize_pcoas(master_pcoa, support_pcoas, 'ideal_fourths',
                            apply_procrustes=False)
        self.assertEqual(m_names, ['1', '2', '3'])
        assert_almost_equal(matrix_average[(0,0)], -1.4)
        assert_almost_equal(matrix_average[(0,1)], 0.0125)
        assert_almost_equal(matrix_low[(0,0)], -1.5)
        assert_almost_equal(matrix_high[(0,0)], -1.28333333)
        assert_almost_equal(matrix_low[(0,1)], -0.0375)
        assert_almost_equal(matrix_high[(0,1)], 0.05)
        assert_almost_equal(eigval_average[0], 0.81)
        assert_almost_equal(eigval_average[1], 0.19)
        #test with the IQR option
        matrix_average, matrix_low, matrix_high, eigval_average, m_names = \
            summarize_pcoas(master_pcoa, support_pcoas, method='IQR',
                            apply_procrustes=False)
        assert_almost_equal(matrix_low[(0,0)], -1.5)
        assert_almost_equal(matrix_high[(0,0)], -1.3)

        #test with procrustes option followed by sdev
        m, m1, msq = procrustes(master_pcoa[1],jn1[1])
        m, m2, msq = procrustes(master_pcoa[1],jn2[1])
        m, m3, msq = procrustes(master_pcoa[1],jn3[1])
        m, m4, msq = procrustes(master_pcoa[1],jn4[1])
        matrix_average, matrix_low, matrix_high, eigval_average, m_names = \
            summarize_pcoas(master_pcoa, support_pcoas, method='sdev',
                            apply_procrustes=True)

        x = array([m1[0,0],m2[0,0],m3[0,0],m4[0,0]])
        self.assertEqual(x.mean(),matrix_average[0,0])
        self.assertEqual(-x.std(ddof=1)/2,matrix_low[0,0])
        self.assertEqual(x.std(ddof=1)/2,matrix_high[0,0])
Example #5
0
def summarize_pcoas(master_pcoa, support_pcoas, method='IQR', apply_procrustes=True):
    """returns the average PCoA vector values for the support pcoas

    Also returns the ranges as calculated with the specified method.
    The choices are:
        IQR: the Interquartile Range
        ideal fourths: Ideal fourths method as implemented in scipy
    """
    if apply_procrustes:
        # perform procrustes before averaging
        support_pcoas = [list(sp) for sp in support_pcoas]
        master_pcoa = list(master_pcoa)
        for i, pcoa in enumerate(support_pcoas):
            master_std, pcoa_std, m_squared = procrustes(master_pcoa[1],pcoa[1])
            support_pcoas[i][1] = pcoa_std
        master_pcoa[1] = master_std

    m_matrix = master_pcoa[1]
    m_eigvals = master_pcoa[2]
    m_names = master_pcoa[0]
    jn_flipped_matrices = []
    all_eigvals = []
    for rep in support_pcoas:
        matrix = rep[1]
        eigvals = rep[2]
        all_eigvals.append(eigvals)
        jn_flipped_matrices.append(_flip_vectors(matrix, m_matrix))
    matrix_average, matrix_low, matrix_high = _compute_jn_pcoa_avg_ranges(\
            jn_flipped_matrices, method)
    #compute average eigvals
    all_eigvals_stack = vstack(all_eigvals)
    eigval_sum = numpy_sum(all_eigvals_stack, axis=0)
    eigval_average = eigval_sum / float(len(all_eigvals))
    return matrix_average, matrix_low, matrix_high, eigval_average, m_names
Example #6
0
    def test_procrustes(self):
        """tests procrustes' ability to match two matrices.

        the second matrix is a rotated, shifted, scaled, and mirrored version
        of the first, in two dimensions only
        """
        # can shift, mirror, and scale an 'L'?
        a, b, disparity = procrustes(self.data1, self.data2)
        np.testing.assert_allclose(b, a)
        np.testing.assert_almost_equal(disparity, 0.)

        # if first mtx is standardized, leaves first mtx unchanged?
        m4, m5, disp45 = procrustes(self.data4, self.data5)
        np.testing.assert_equal(m4, self.data4)

        # at worst, data3 is an 'L' with one point off by .5
        m1, m3, disp13 = procrustes(self.data1, self.data3)
        self.assertTrue(disp13 < 0.5 ** 2)
Example #7
0
    def test_procrustes(self):
        """tests procrustes' ability to match two matrices.

        the second matrix is a rotated, shifted, scaled, and mirrored version
        of the first, in two dimensions only
        """
        # can shift, mirror, and scale an 'L'?
        a, b, disparity = procrustes(self.data1, self.data2)
        np.testing.assert_allclose(b, a)
        np.testing.assert_almost_equal(disparity, 0.)

        # if first mtx is standardized, leaves first mtx unchanged?
        m4, m5, disp45 = procrustes(self.data4, self.data5)
        np.testing.assert_equal(m4, self.data4)

        # at worst, data3 is an 'L' with one point off by .5
        m1, m3, disp13 = procrustes(self.data1, self.data3)
        self.assertTrue(disp13 < 0.5**2)
Example #8
0
def summarize_pcoas(master_pcoa,
                    support_pcoas,
                    method='IQR',
                    apply_procrustes=True):
    """returns the average PCoA vector values for the support pcoas

    Also returns the ranges as calculated with the specified method.
    The choices are:
        IQR: the Interquartile Range
        ideal fourths: Ideal fourths method as implemented in scipy
    """
    if apply_procrustes:
        # perform procrustes before averaging
        support_pcoas = [list(sp) for sp in support_pcoas]
        master_pcoa = list(master_pcoa)
        for i, pcoa in enumerate(support_pcoas):
            master_std, pcoa_std, m_squared = procrustes(
                master_pcoa[1], pcoa[1])
            support_pcoas[i][1] = pcoa_std
        master_pcoa[1] = master_std

    m_matrix = master_pcoa[1]
    m_eigvals = master_pcoa[2]
    m_names = master_pcoa[0]
    jn_flipped_matrices = []
    all_eigvals = []
    for rep in support_pcoas:
        matrix = rep[1]
        eigvals = rep[2]
        all_eigvals.append(eigvals)
        jn_flipped_matrices.append(_flip_vectors(matrix, m_matrix))
    matrix_average, matrix_low, matrix_high = _compute_jn_pcoa_avg_ranges(\
            jn_flipped_matrices, method)
    #compute average eigvals
    all_eigvals_stack = vstack(all_eigvals)
    eigval_sum = numpy_sum(all_eigvals_stack, axis=0)
    eigval_average = eigval_sum / float(len(all_eigvals))
    return matrix_average, matrix_low, matrix_high, eigval_average, m_names
Example #9
0
 def test_procrustes_shape_mismatch(self):
     with self.assertRaises(ValueError):
         procrustes(np.array([[1, 2], [3, 4]]),
                    np.array([[5, 6, 7], [8, 9, 10]]))
Example #10
0
from skbio.stats.distance import mantel
r, p_value, n = mantel(j_dm, bc_dm)
print(r)
# -0.209362157621
print(p_value < 0.05)
# False

# Compute PCoA for both distance matrices, and then find the Procrustes
# M-squared value that results from comparing the coordinate matrices.

from skbio.stats.ordination import PCoA
bc_pc = PCoA(bc_dm).scores()
j_pc = PCoA(j_dm).scores()
from skbio.stats.spatial import procrustes
print(procrustes(bc_pc.site, j_pc.site)[2])
# 0.466134984787

# All of this only gets interesting in the context of sample metadata, so
# let's define some:

import pandas as pd
try:
    # not necessary for normal use
    pd.set_option('show_dimensions', True)
except KeyError:
    pass
sample_md = {
    'A': {
        'body_site': 'gut',
        'subject': 's1'
Example #11
0
 def test_procrustes_no_variation(self):
     with self.assertRaises(ValueError):
         procrustes(np.array([[42, 42], [42, 42]]),
                    np.array([[45, 45], [45, 45]]))
Example #12
0
 def test_procrustes_empty_rows_or_cols(self):
     empty = np.array([[]])
     with self.assertRaises(ValueError):
         procrustes(empty, empty)
def get_procrustes_results(coords_f1, coords_f2, sample_id_map=None,
                           randomize=None, max_dimensions=None,
                           get_eigenvalues=get_mean_eigenvalues,
                           get_percent_variation_explained=get_mean_percent_variation):
    """ """
    # Parse the PCoA files
    ord_res_1 = OrdinationResults.read(coords_f1)
    ord_res_2 = OrdinationResults.read(coords_f2)

    sample_ids1 = ord_res_1.site_ids
    coords1 = ord_res_1.site
    eigvals1 = ord_res_1.eigvals
    pct_var1 = ord_res_1.proportion_explained

    sample_ids2 = ord_res_2.site_ids
    coords2 = ord_res_2.site
    eigvals2 = ord_res_2.eigvals
    pct_var2 = ord_res_2.proportion_explained

    if sample_id_map:
        sample_ids1 = map_sample_ids(sample_ids1, sample_id_map)
        sample_ids2 = map_sample_ids(sample_ids2, sample_id_map)
    # rearrange the order of coords in coords2 to correspond to
    # the order of coords in coords1
    order = list(set(sample_ids1) & set(sample_ids2))
    coords1 = reorder_coords(coords1, sample_ids1, order)
    coords2 = reorder_coords(coords2, sample_ids2, order)
    if len(order) == 0:
        raise ValueError('No overlapping samples in the two files')

    # If this is a random trial, apply the shuffling function passed as
    # randomize()
    if randomize:
        coords2 = randomize(coords2)
        randomized_coords2 = OrdinationResults(eigvals=eigvals2,
                                               proportion_explained=pct_var2,
                                               site=coords2,
                                               site_ids=order)
    else:
        randomized_coords2 = None

    coords1, coords2 = pad_coords_matrices(coords1, coords2)
    if max_dimensions:
        coords1 = filter_coords_matrix(coords1, max_dimensions)
        coords2 = filter_coords_matrix(coords2, max_dimensions)
        pct_var1 = pct_var1[:max_dimensions]
        pct_var2 = pct_var2[:max_dimensions]
        eigvals1 = eigvals1[:max_dimensions]
        eigvals2 = eigvals2[:max_dimensions]
    else:
        if len(pct_var1) > len(pct_var2):
            pct_var2 = append(pct_var2, zeros(len(pct_var1) - len(pct_var2)))
            eigvals2 = append(eigvals2, zeros(len(eigvals1) - len(eigvals2)))
        elif len(pct_var1) < len(pct_var2):
            pct_var1 = append(pct_var1, zeros(len(pct_var2) - len(pct_var1)))
            eigvals1 = append(eigvals1, zeros(len(eigvals2) - len(eigvals1)))

    # Run the Procrustes analysis
    transformed_coords_m1, transformed_coords_m2, m_squared =\
        procrustes(coords1, coords2)
    # print coords2
    # print transformed_coords_m2

    eigvals = get_eigenvalues(eigvals1, eigvals2)
    pct_var = get_percent_variation_explained(pct_var1, pct_var2)

    transformed_coords1 = OrdinationResults(eigvals=asarray(eigvals),
                                            proportion_explained=asarray(pct_var),
                                            site=asarray(transformed_coords_m1),
                                            site_ids=order)
    transformed_coords2 = OrdinationResults(eigvals=asarray(eigvals),
                                            proportion_explained=asarray(pct_var),
                                            site=asarray(transformed_coords_m2),
                                            site_ids=order)

    # Return the results
    return (transformed_coords1, transformed_coords2,
            m_squared, randomized_coords2)
Example #14
0
 def test_procrustes_shape_mismatch(self):
     with self.assertRaises(ValueError):
         procrustes(np.array([[1, 2], [3, 4]]),
                    np.array([[5, 6, 7], [8, 9, 10]]))
Example #15
0
def get_procrustes_results(coords_f1, coords_f2, sample_id_map=None,
                           randomize=None, max_dimensions=None,
                           get_eigenvalues=get_mean_eigenvalues,
                           get_percent_variation_explained=get_mean_percent_variation):
    """ """
    # Parse the PCoA files
    ord_res_1 = OrdinationResults.read(coords_f1)
    ord_res_2 = OrdinationResults.read(coords_f2)

    sample_ids1 = ord_res_1.site_ids
    coords1 = ord_res_1.site
    eigvals1 = ord_res_1.eigvals
    pct_var1 = ord_res_1.proportion_explained

    sample_ids2 = ord_res_2.site_ids
    coords2 = ord_res_2.site
    eigvals2 = ord_res_2.eigvals
    pct_var2 = ord_res_2.proportion_explained

    if sample_id_map:
        sample_ids1 = map_sample_ids(sample_ids1, sample_id_map)
        sample_ids2 = map_sample_ids(sample_ids2, sample_id_map)
    # rearrange the order of coords in coords2 to correspond to
    # the order of coords in coords1
    order = list(set(sample_ids1) & set(sample_ids2))
    coords1 = reorder_coords(coords1, sample_ids1, order)
    coords2 = reorder_coords(coords2, sample_ids2, order)
    if len(order) == 0:
        raise ValueError('No overlapping samples in the two files')

    # If this is a random trial, apply the shuffling function passed as
    # randomize()
    if randomize:
        coords2 = randomize(coords2)
        randomized_coords2 = OrdinationResults(eigvals=eigvals2,
                                               proportion_explained=pct_var2,
                                               site=coords2,
                                               site_ids=order)
    else:
        randomized_coords2 = None

    coords1, coords2 = pad_coords_matrices(coords1, coords2)
    if max_dimensions:
        coords1 = filter_coords_matrix(coords1, max_dimensions)
        coords2 = filter_coords_matrix(coords2, max_dimensions)
        pct_var1 = pct_var1[:max_dimensions]
        pct_var2 = pct_var2[:max_dimensions]
        eigvals1 = eigvals1[:max_dimensions]
        eigvals2 = eigvals2[:max_dimensions]
    else:
        if len(pct_var1) > len(pct_var2):
            pct_var2 = append(pct_var2, zeros(len(pct_var1) - len(pct_var2)))
            eigvals2 = append(eigvals2, zeros(len(eigvals1) - len(eigvals2)))
        elif len(pct_var1) < len(pct_var2):
            pct_var1 = append(pct_var1, zeros(len(pct_var2) - len(pct_var1)))
            eigvals1 = append(eigvals1, zeros(len(eigvals2) - len(eigvals1)))

    # Run the Procrustes analysis
    transformed_coords_m1, transformed_coords_m2, m_squared =\
        procrustes(coords1, coords2)
    # print coords2
    # print transformed_coords_m2

    eigvals = get_eigenvalues(eigvals1, eigvals2)
    pct_var = get_percent_variation_explained(pct_var1, pct_var2)

    transformed_coords1 = OrdinationResults(eigvals=asarray(eigvals),
                                            proportion_explained=asarray(pct_var),
                                            site=asarray(transformed_coords_m1),
                                            site_ids=order)
    transformed_coords2 = OrdinationResults(eigvals=asarray(eigvals),
                                            proportion_explained=asarray(pct_var),
                                            site=asarray(transformed_coords_m2),
                                            site_ids=order)

    # Return the results
    return (transformed_coords1, transformed_coords2,
            m_squared, randomized_coords2)
Example #16
0
 def test_procrustes_no_variation(self):
     with self.assertRaises(ValueError):
         procrustes(np.array([[42, 42], [42, 42]]),
                    np.array([[45, 45], [45, 45]]))
Example #17
0
 def test_procrustes_empty_rows_or_cols(self):
     empty = np.array([[]])
     with self.assertRaises(ValueError):
         procrustes(empty, empty)