Beispiel #1
0
    def test_procrustes2(self):
        """procrustes disparity should not depend on order of matrices"""
        m1, m3, disp13 = procrustes(self.data1, self.data3)
        m3_2, m1_2, disp31 = procrustes(self.data3, self.data1)
        self.assertFloatEqual(disp13, disp31)

        # try with 3d, 8 pts per
        rand1 = array([[2.61955202, 0.30522265, 0.55515826],
                       [0.41124708, -0.03966978, -0.31854548],
                       [0.91910318, 1.39451809, -0.15295084],
                       [2.00452023, 0.50150048, 0.29485268],
                       [0.09453595, 0.67528885, 0.03283872],
                       [0.07015232, 2.18892599, -1.67266852],
                       [0.65029688, 1.60551637, 0.80013549],
                       [-0.6607528, 0.53644208, 0.17033891]])

        rand3 = array([[0.0809969, 0.09731461, -0.173442],
                       [-1.84888465, -0.92589646, -1.29335743],
                       [0.67031855, -1.35957463, 0.41938621],
                       [0.73967209, -0.20230757, 0.52418027],
                       [0.17752796, 0.09065607, 0.29827466],
                       [0.47999368, -0.88455717, -0.57547934],
                       [-0.11486344, -0.12608506, -0.3395779],
                       [-0.86106154, -0.28687488, 0.9644429]])
        res1, res3, disp13 = procrustes(rand1, rand3)
        res3_2, res1_2, disp31 = procrustes(rand3, rand1)
        self.assertFloatEqual(disp13, disp31)
Beispiel #2
0
 def test_procrustes2(self):
     """procrustes disparity should not depend on order of matrices"""
     m1, m3, disp13 = procrustes(self.data1, self.data3)
     m3_2, m1_2, disp31 = procrustes(self.data3, self.data1)
     self.assertFloatEqual(disp13, disp31)
     
     # try with 3d, 8 pts per
     rand1 = array([[ 2.61955202,  0.30522265,  0.55515826],
     [ 0.41124708, -0.03966978, -0.31854548],
     [ 0.91910318,  1.39451809, -0.15295084],
     [ 2.00452023,  0.50150048,  0.29485268],
     [ 0.09453595,  0.67528885,  0.03283872],
     [ 0.07015232,  2.18892599, -1.67266852],
     [ 0.65029688,  1.60551637,  0.80013549],
     [-0.6607528 ,  0.53644208,  0.17033891]])
     
     rand3 = array([[ 0.0809969 ,  0.09731461, -0.173442  ],
     [-1.84888465, -0.92589646, -1.29335743],
     [ 0.67031855, -1.35957463,  0.41938621],
     [ 0.73967209, -0.20230757,  0.52418027],
     [ 0.17752796,  0.09065607,  0.29827466],
     [ 0.47999368, -0.88455717, -0.57547934],
     [-0.11486344, -0.12608506, -0.3395779 ],
     [-0.86106154, -0.28687488,  0.9644429 ]])
     res1, res3, disp13 = procrustes(rand1,rand3)
     res3_2, res1_2, disp31 = procrustes(rand3, rand1)
     self.assertFloatEqual(disp13, disp31)
Beispiel #3
0
    def test_summarize_pcoas(self):
        """summarize_pcoas works
        """
        master_pcoa = [['1', '2', '3'], \
            array([[-1.0, 0.0, 1.0], [2.0, 4.0, -4.0]]), \
            array([.76, .24])]
        jn1 = [['1', '2', '3'], \
            array([[1.2, 0.1, -1.2],[-2.5, -4.0, 4.5]]), \
            array([0.80, .20])]
        jn2 = [['1', '2', '3'], \
            array([[-1.4, 0.05, 1.3],[2.6, 4.1, -4.7]]), \
            array([0.76, .24])]
        jn3 = [['1', '2', '3'], \
            array([[-1.5, 0.05, 1.6],[2.4, 4.0, -4.8]]), \
            array([0.84, .16])]
        jn4 = [['1', '2', '3'], \
            array([[-1.5, 0.05, 1.6],[2.4, 4.0, -4.8]]), \
            array([0.84, .16])]
        support_pcoas = [jn1, jn2, jn3, jn4]
        #test with the ideal_fourths option
        matrix_average, matrix_low, matrix_high, eigval_average, m_names = \
            summarize_pcoas(master_pcoa, support_pcoas, 'ideal_fourths',
                            apply_procrustes=False)
        self.assertEqual(m_names, ['1', '2', '3'])
        self.assertFloatEqual(matrix_average[(0, 0)], -1.4)
        self.assertFloatEqual(matrix_average[(0, 1)], 0.0125)
        self.assertFloatEqual(matrix_low[(0, 0)], -1.5)
        self.assertFloatEqual(matrix_high[(0, 0)], -1.28333333)
        self.assertFloatEqual(matrix_low[(0, 1)], -0.0375)
        self.assertFloatEqual(matrix_high[(0, 1)], 0.05)
        self.assertFloatEqual(eigval_average[0], 0.81)
        self.assertFloatEqual(eigval_average[1], 0.19)
        #test with the IQR option
        matrix_average, matrix_low, matrix_high, eigval_average, m_names = \
            summarize_pcoas(master_pcoa, support_pcoas, method='IQR',
                            apply_procrustes=False)
        self.assertFloatEqual(matrix_low[(0, 0)], -1.5)
        self.assertFloatEqual(matrix_high[(0, 0)], -1.3)

        #test with procrustes option followed by sdev
        m, m1, msq = procrustes(master_pcoa[1], jn1[1])
        m, m2, msq = procrustes(master_pcoa[1], jn2[1])
        m, m3, msq = procrustes(master_pcoa[1], jn3[1])
        m, m4, msq = procrustes(master_pcoa[1], jn4[1])
        matrix_average, matrix_low, matrix_high, eigval_average, m_names = \
            summarize_pcoas(master_pcoa, support_pcoas, method='sdev',
                            apply_procrustes=True)

        x = array([m1[0, 0], m2[0, 0], m3[0, 0], m4[0, 0]])
        self.assertEqual(x.mean(), matrix_average[0, 0])
        self.assertEqual(-x.std(ddof=1) / 2, matrix_low[0, 0])
        self.assertEqual(x.std(ddof=1) / 2, matrix_high[0, 0])
Beispiel #4
0
    def test_summarize_pcoas(self):
        """summarize_pcoas works
        """
        master_pcoa = [['1', '2', '3'], \
            array([[-1.0, 0.0, 1.0], [2.0, 4.0, -4.0]]), \
            array([.76, .24])]
        jn1 = [['1', '2', '3'], \
            array([[1.2, 0.1, -1.2],[-2.5, -4.0, 4.5]]), \
            array([0.80, .20])]
        jn2 = [['1', '2', '3'], \
            array([[-1.4, 0.05, 1.3],[2.6, 4.1, -4.7]]), \
            array([0.76, .24])]
        jn3 = [['1', '2', '3'], \
            array([[-1.5, 0.05, 1.6],[2.4, 4.0, -4.8]]), \
            array([0.84, .16])]
        jn4 = [['1', '2', '3'], \
            array([[-1.5, 0.05, 1.6],[2.4, 4.0, -4.8]]), \
            array([0.84, .16])]
        support_pcoas = [jn1, jn2, jn3, jn4]
        #test with the ideal_fourths option
        matrix_average, matrix_low, matrix_high, eigval_average, m_names = \
            summarize_pcoas(master_pcoa, support_pcoas, 'ideal_fourths',
                            apply_procrustes=False)
        self.assertEqual(m_names, ['1', '2', '3'])
        self.assertFloatEqual(matrix_average[(0,0)], -1.4)
        self.assertFloatEqual(matrix_average[(0,1)], 0.0125)
        self.assertFloatEqual(matrix_low[(0,0)], -1.5)
        self.assertFloatEqual(matrix_high[(0,0)], -1.28333333)
        self.assertFloatEqual(matrix_low[(0,1)], -0.0375)
        self.assertFloatEqual(matrix_high[(0,1)], 0.05)
        self.assertFloatEqual(eigval_average[0], 0.81)
        self.assertFloatEqual(eigval_average[1], 0.19)
        #test with the IQR option
        matrix_average, matrix_low, matrix_high, eigval_average, m_names = \
            summarize_pcoas(master_pcoa, support_pcoas, method='IQR',
                            apply_procrustes=False)
        self.assertFloatEqual(matrix_low[(0,0)], -1.5)
        self.assertFloatEqual(matrix_high[(0,0)], -1.3)

        #test with procrustes option followed by sdev
        m, m1, msq = procrustes(master_pcoa[1],jn1[1])
        m, m2, msq = procrustes(master_pcoa[1],jn2[1])
        m, m3, msq = procrustes(master_pcoa[1],jn3[1])
        m, m4, msq = procrustes(master_pcoa[1],jn4[1])
        matrix_average, matrix_low, matrix_high, eigval_average, m_names = \
            summarize_pcoas(master_pcoa, support_pcoas, method='sdev',
                            apply_procrustes=True)

        x = array([m1[0,0],m2[0,0],m3[0,0],m4[0,0]])
        self.assertEqual(x.mean(),matrix_average[0,0])
        self.assertEqual(-x.std(ddof=1)/2,matrix_low[0,0])
        self.assertEqual(x.std(ddof=1)/2,matrix_high[0,0])
def get_procrustes_results(coords_f1,coords_f2,sample_id_map=None,\
    randomize=None,max_dimensions=None,\
    get_eigenvalues=get_mean_eigenvalues,\
    get_percent_variation_explained=get_mean_percent_variation):
    """ """
    # Parse the PCoA files
    sample_ids1, coords1, eigvals1, pct_var1 = parse_coords(coords_f1)
    sample_ids2, coords2, eigvals2, pct_var2 = parse_coords(coords_f2)
    
    if sample_id_map:
        sample_ids1 = map_sample_ids(sample_ids1,sample_id_map)
        sample_ids2 = map_sample_ids(sample_ids2,sample_id_map)
    # rearrange the order of coords in coords2 to correspond to 
    # the order of coords in coords1 
    order = list(set(sample_ids1) & set(sample_ids2)) 
    coords1 = reorder_coords(coords1,sample_ids1,order)
    coords2 = reorder_coords(coords2,sample_ids2,order)
    
    # If this is a random trial, apply the shuffling function passed as 
    # randomize()
    if randomize:
        coords2 = randomize(coords2)
        
    coords1, coords2 = pad_coords_matrices(coords1,coords2)
    if max_dimensions:
        coords1 = filter_coords_matrix(coords1,max_dimensions)
        coords2 = filter_coords_matrix(coords2,max_dimensions)
        pct_var1 = pct_var1[:max_dimensions]
        pct_var2 = pct_var2[:max_dimensions]
        eigvals1 = eigvals1[:max_dimensions]
        eigvals2 = eigvals2[:max_dimensions]
    else:
        if len(pct_var1)>len(pct_var2):
            pct_var2 = append(pct_var2,zeros(len(pct_var1)-len(pct_var2)))
            eigvals2 = append(eigvals2,zeros(len(eigvals1)-len(eigvals2)))
        elif len(pct_var1)<len(pct_var2):
            pct_var1 = append(pct_var1,zeros(len(pct_var2)-len(pct_var1)))
            eigvals1 = append(eigvals1,zeros(len(eigvals2)-len(eigvals1)))

    # Run the Procrustes analysis
    transformed_coords_m1, transformed_coords_m2, m_squared =\
     procrustes(coords1,coords2)
    # print coords2
    #print transformed_coords_m2
    
    eigvals = get_eigenvalues(eigvals1, eigvals2)
    pct_var = get_percent_variation_explained(pct_var1,pct_var2)
    
    transformed_coords1 = format_coords(coord_header=order,\
                                        coords=transformed_coords_m1,\
                                        eigvals=eigvals,\
                                        pct_var=pct_var)
    transformed_coords2 = format_coords(coord_header=order,\
                                        coords=transformed_coords_m2,\
                                        eigvals=eigvals,\
                                        pct_var=pct_var)
    
    # Return the results
    return transformed_coords1, transformed_coords2, m_squared
Beispiel #6
0
    def test_procrustes(self):
        """tests procrustes' ability to match two matrices.
        
        the second matrix is a rotated, shifted, scaled, and mirrored version
        of the first, in two dimensions only
        """
        # can shift, mirror, and scale an 'L'?
        a, b, disparity = procrustes(self.data1, self.data2)
        self.assertFloatEqual(b, a)
        self.assertFloatEqual(disparity, 0.)

        # if first mtx is standardized, leaves first mtx unchanged?
        m4, m5, disp45 = procrustes(self.data4, self.data5)
        self.assertFloatEqual(m4, self.data4)

        # at worst, data3 is an 'L' with one point off by .5
        m1, m3, disp13 = procrustes(self.data1, self.data3)
        self.assertLessThan(disp13, .5**2)
Beispiel #7
0
    def test_procrustes(self):
        """tests procrustes' ability to match two matrices.
        
        the second matrix is a rotated, shifted, scaled, and mirrored version
        of the first, in two dimensions only
        """
        # can shift, mirror, and scale an 'L'?
        a,b,disparity = procrustes(self.data1, self.data2)
        self.assertFloatEqual(b, a)
        self.assertFloatEqual(disparity,0.)
        
        # if first mtx is standardized, leaves first mtx unchanged?
        m4, m5, disp45 = procrustes(self.data4, self.data5)
        self.assertFloatEqual(m4, self.data4)

        # at worst, data3 is an 'L' with one point off by .5
        m1, m3, disp13 = procrustes(self.data1, self.data3)
        self.assertLessThan(disp13, .5**2)
Beispiel #8
0
def get_procrustes_results(coords_f1,coords_f2,sample_id_map=None,\
    randomize=None,max_dimensions=None,\
    get_eigenvalues=get_mean_eigenvalues,\
    get_percent_variation_explained=get_mean_percent_variation):
    """ """
    # Parse the PCoA files
    sample_ids1, coords1, eigvals1, pct_var1 = parse_coords(coords_f1)
    sample_ids2, coords2, eigvals2, pct_var2 = parse_coords(coords_f2)
    
    if sample_id_map:
        sample_ids1 = map_sample_ids(sample_ids1,sample_id_map)
        sample_ids2 = map_sample_ids(sample_ids2,sample_id_map)
    # rearrange the order of coords in coords2 to correspond to 
    # the order of coords in coords1 
    order = list(set(sample_ids1) & set(sample_ids2)) 
    coords1 = reorder_coords(coords1,sample_ids1,order)
    coords2 = reorder_coords(coords2,sample_ids2,order)
    
    # If this is a random trial, apply the shuffling function passed as 
    # randomize()
    if randomize:
        coords2 = randomize(coords2)
        
    coords1, coords2 = pad_coords_matrices(coords1,coords2)
    if max_dimensions:
        coords1 = filter_coords_matrix(coords1,max_dimensions)
        coords2 = filter_coords_matrix(coords2,max_dimensions)
        pct_var1 = pct_var1[:max_dimensions]
        pct_var2 = pct_var2[:max_dimensions]
        eigvals1 = eigvals1[:max_dimensions]
        eigvals2 = eigvals2[:max_dimensions]
    
    # Run the Procrustes analysis
    transformed_coords_m1, transformed_coords_m2, m_squared =\
     procrustes(coords1,coords2)
    
    #print transformed_coords_m2
    
    eigvals = get_eigenvalues(eigvals1, eigvals2)
    pct_var = get_percent_variation_explained(pct_var1,pct_var2)
    
    transformed_coords1 = format_coords(coord_header=order,\
                                        coords=transformed_coords_m1,\
                                        eigvals=eigvals,\
                                        pct_var=pct_var)
    transformed_coords2 = format_coords(coord_header=order,\
                                        coords=transformed_coords_m2,\
                                        eigvals=eigvals,\
                                        pct_var=pct_var)
    
    # Return the results
    return transformed_coords1, transformed_coords2, m_squared
Beispiel #9
0
def summarize_pcoas(master_pcoa,
                    support_pcoas,
                    method='IQR',
                    apply_procrustes=True):
    """returns the average PCoA vector values for the support pcoas

    Also returns the ranges as calculated with the specified method. 
    The choices are:
        IQR: the Interquartile Range
        ideal fourths: Ideal fourths method as implemented in scipy
    """
    if apply_procrustes:
        # perform procrustes before averaging
        support_pcoas = [list(sp) for sp in support_pcoas]
        master_pcoa = list(master_pcoa)
        for i, pcoa in enumerate(support_pcoas):
            master_std, pcoa_std, m_squared = procrustes(
                master_pcoa[1], pcoa[1])
            support_pcoas[i][1] = pcoa_std
        master_pcoa[1] = master_std

    m_matrix = master_pcoa[1]
    m_eigvals = master_pcoa[2]
    m_names = master_pcoa[0]
    jn_flipped_matrices = []
    all_eigvals = []
    for rep in support_pcoas:
        matrix = rep[1]
        eigvals = rep[2]
        all_eigvals.append(eigvals)
        jn_flipped_matrices.append(_flip_vectors(matrix, m_matrix))
    matrix_average, matrix_low, matrix_high = _compute_jn_pcoa_avg_ranges(\
            jn_flipped_matrices, method)
    #compute average eigvals
    all_eigvals_stack = vstack(all_eigvals)
    eigval_sum = numpy.sum(all_eigvals_stack, axis=0)
    eigval_average = eigval_sum / float(len(all_eigvals))
    return matrix_average, matrix_low, matrix_high, eigval_average, m_names
def get_procrustes_results(
        coords_f1,
        coords_f2,
        sample_id_map=None,
        randomize=None,
        max_dimensions=None,
        get_eigenvalues=get_mean_eigenvalues,
        get_percent_variation_explained=get_mean_percent_variation):
    """ """
    # Parse the PCoA files
    sample_ids1, coords1, eigvals1, pct_var1 = parse_coords(coords_f1)
    sample_ids2, coords2, eigvals2, pct_var2 = parse_coords(coords_f2)
    if sample_id_map:
        sample_ids1 = map_sample_ids(sample_ids1, sample_id_map)
        sample_ids2 = map_sample_ids(sample_ids2, sample_id_map)
    # rearrange the order of coords in coords2 to correspond to
    # the order of coords in coords1
    order = list(set(sample_ids1) & set(sample_ids2))
    coords1 = reorder_coords(coords1, sample_ids1, order)
    coords2 = reorder_coords(coords2, sample_ids2, order)
    if len(order) == 0:
        raise ValueError('No overlapping samples in the two files')

    # If this is a random trial, apply the shuffling function passed as
    # randomize()
    if randomize:
        coords2 = randomize(coords2)
        randomized_coords2 = format_coords(coord_header=order,
                                           coords=coords2,
                                           eigvals=eigvals2,
                                           pct_var=pct_var2)
    else:
        randomized_coords2 = None

    coords1, coords2 = pad_coords_matrices(coords1, coords2)
    if max_dimensions:
        coords1 = filter_coords_matrix(coords1, max_dimensions)
        coords2 = filter_coords_matrix(coords2, max_dimensions)
        pct_var1 = pct_var1[:max_dimensions]
        pct_var2 = pct_var2[:max_dimensions]
        eigvals1 = eigvals1[:max_dimensions]
        eigvals2 = eigvals2[:max_dimensions]
    else:
        if len(pct_var1) > len(pct_var2):
            pct_var2 = append(pct_var2, zeros(len(pct_var1) - len(pct_var2)))
            eigvals2 = append(eigvals2, zeros(len(eigvals1) - len(eigvals2)))
        elif len(pct_var1) < len(pct_var2):
            pct_var1 = append(pct_var1, zeros(len(pct_var2) - len(pct_var1)))
            eigvals1 = append(eigvals1, zeros(len(eigvals2) - len(eigvals1)))

    # Run the Procrustes analysis
    transformed_coords_m1, transformed_coords_m2, m_squared =\
        procrustes(coords1, coords2)
    # print coords2
    # print transformed_coords_m2

    eigvals = get_eigenvalues(eigvals1, eigvals2)
    pct_var = get_percent_variation_explained(pct_var1, pct_var2)

    transformed_coords1 = format_coords(coord_header=order,
                                        coords=transformed_coords_m1,
                                        eigvals=eigvals,
                                        pct_var=pct_var)
    transformed_coords2 = format_coords(coord_header=order,
                                        coords=transformed_coords_m2,
                                        eigvals=eigvals,
                                        pct_var=pct_var)

    # Return the results
    return (transformed_coords1, transformed_coords2, m_squared,
            randomized_coords2)