def test_procrustes2(self): """procrustes disparity should not depend on order of matrices""" m1, m3, disp13 = procrustes(self.data1, self.data3) m3_2, m1_2, disp31 = procrustes(self.data3, self.data1) self.assertFloatEqual(disp13, disp31) # try with 3d, 8 pts per rand1 = array([[2.61955202, 0.30522265, 0.55515826], [0.41124708, -0.03966978, -0.31854548], [0.91910318, 1.39451809, -0.15295084], [2.00452023, 0.50150048, 0.29485268], [0.09453595, 0.67528885, 0.03283872], [0.07015232, 2.18892599, -1.67266852], [0.65029688, 1.60551637, 0.80013549], [-0.6607528, 0.53644208, 0.17033891]]) rand3 = array([[0.0809969, 0.09731461, -0.173442], [-1.84888465, -0.92589646, -1.29335743], [0.67031855, -1.35957463, 0.41938621], [0.73967209, -0.20230757, 0.52418027], [0.17752796, 0.09065607, 0.29827466], [0.47999368, -0.88455717, -0.57547934], [-0.11486344, -0.12608506, -0.3395779], [-0.86106154, -0.28687488, 0.9644429]]) res1, res3, disp13 = procrustes(rand1, rand3) res3_2, res1_2, disp31 = procrustes(rand3, rand1) self.assertFloatEqual(disp13, disp31)
def test_procrustes2(self): """procrustes disparity should not depend on order of matrices""" m1, m3, disp13 = procrustes(self.data1, self.data3) m3_2, m1_2, disp31 = procrustes(self.data3, self.data1) self.assertFloatEqual(disp13, disp31) # try with 3d, 8 pts per rand1 = array([[ 2.61955202, 0.30522265, 0.55515826], [ 0.41124708, -0.03966978, -0.31854548], [ 0.91910318, 1.39451809, -0.15295084], [ 2.00452023, 0.50150048, 0.29485268], [ 0.09453595, 0.67528885, 0.03283872], [ 0.07015232, 2.18892599, -1.67266852], [ 0.65029688, 1.60551637, 0.80013549], [-0.6607528 , 0.53644208, 0.17033891]]) rand3 = array([[ 0.0809969 , 0.09731461, -0.173442 ], [-1.84888465, -0.92589646, -1.29335743], [ 0.67031855, -1.35957463, 0.41938621], [ 0.73967209, -0.20230757, 0.52418027], [ 0.17752796, 0.09065607, 0.29827466], [ 0.47999368, -0.88455717, -0.57547934], [-0.11486344, -0.12608506, -0.3395779 ], [-0.86106154, -0.28687488, 0.9644429 ]]) res1, res3, disp13 = procrustes(rand1,rand3) res3_2, res1_2, disp31 = procrustes(rand3, rand1) self.assertFloatEqual(disp13, disp31)
def test_summarize_pcoas(self): """summarize_pcoas works """ master_pcoa = [['1', '2', '3'], \ array([[-1.0, 0.0, 1.0], [2.0, 4.0, -4.0]]), \ array([.76, .24])] jn1 = [['1', '2', '3'], \ array([[1.2, 0.1, -1.2],[-2.5, -4.0, 4.5]]), \ array([0.80, .20])] jn2 = [['1', '2', '3'], \ array([[-1.4, 0.05, 1.3],[2.6, 4.1, -4.7]]), \ array([0.76, .24])] jn3 = [['1', '2', '3'], \ array([[-1.5, 0.05, 1.6],[2.4, 4.0, -4.8]]), \ array([0.84, .16])] jn4 = [['1', '2', '3'], \ array([[-1.5, 0.05, 1.6],[2.4, 4.0, -4.8]]), \ array([0.84, .16])] support_pcoas = [jn1, jn2, jn3, jn4] #test with the ideal_fourths option matrix_average, matrix_low, matrix_high, eigval_average, m_names = \ summarize_pcoas(master_pcoa, support_pcoas, 'ideal_fourths', apply_procrustes=False) self.assertEqual(m_names, ['1', '2', '3']) self.assertFloatEqual(matrix_average[(0, 0)], -1.4) self.assertFloatEqual(matrix_average[(0, 1)], 0.0125) self.assertFloatEqual(matrix_low[(0, 0)], -1.5) self.assertFloatEqual(matrix_high[(0, 0)], -1.28333333) self.assertFloatEqual(matrix_low[(0, 1)], -0.0375) self.assertFloatEqual(matrix_high[(0, 1)], 0.05) self.assertFloatEqual(eigval_average[0], 0.81) self.assertFloatEqual(eigval_average[1], 0.19) #test with the IQR option matrix_average, matrix_low, matrix_high, eigval_average, m_names = \ summarize_pcoas(master_pcoa, support_pcoas, method='IQR', apply_procrustes=False) self.assertFloatEqual(matrix_low[(0, 0)], -1.5) self.assertFloatEqual(matrix_high[(0, 0)], -1.3) #test with procrustes option followed by sdev m, m1, msq = procrustes(master_pcoa[1], jn1[1]) m, m2, msq = procrustes(master_pcoa[1], jn2[1]) m, m3, msq = procrustes(master_pcoa[1], jn3[1]) m, m4, msq = procrustes(master_pcoa[1], jn4[1]) matrix_average, matrix_low, matrix_high, eigval_average, m_names = \ summarize_pcoas(master_pcoa, support_pcoas, method='sdev', apply_procrustes=True) x = array([m1[0, 0], m2[0, 0], m3[0, 0], m4[0, 0]]) self.assertEqual(x.mean(), matrix_average[0, 0]) self.assertEqual(-x.std(ddof=1) / 2, matrix_low[0, 0]) self.assertEqual(x.std(ddof=1) / 2, matrix_high[0, 0])
def test_summarize_pcoas(self): """summarize_pcoas works """ master_pcoa = [['1', '2', '3'], \ array([[-1.0, 0.0, 1.0], [2.0, 4.0, -4.0]]), \ array([.76, .24])] jn1 = [['1', '2', '3'], \ array([[1.2, 0.1, -1.2],[-2.5, -4.0, 4.5]]), \ array([0.80, .20])] jn2 = [['1', '2', '3'], \ array([[-1.4, 0.05, 1.3],[2.6, 4.1, -4.7]]), \ array([0.76, .24])] jn3 = [['1', '2', '3'], \ array([[-1.5, 0.05, 1.6],[2.4, 4.0, -4.8]]), \ array([0.84, .16])] jn4 = [['1', '2', '3'], \ array([[-1.5, 0.05, 1.6],[2.4, 4.0, -4.8]]), \ array([0.84, .16])] support_pcoas = [jn1, jn2, jn3, jn4] #test with the ideal_fourths option matrix_average, matrix_low, matrix_high, eigval_average, m_names = \ summarize_pcoas(master_pcoa, support_pcoas, 'ideal_fourths', apply_procrustes=False) self.assertEqual(m_names, ['1', '2', '3']) self.assertFloatEqual(matrix_average[(0,0)], -1.4) self.assertFloatEqual(matrix_average[(0,1)], 0.0125) self.assertFloatEqual(matrix_low[(0,0)], -1.5) self.assertFloatEqual(matrix_high[(0,0)], -1.28333333) self.assertFloatEqual(matrix_low[(0,1)], -0.0375) self.assertFloatEqual(matrix_high[(0,1)], 0.05) self.assertFloatEqual(eigval_average[0], 0.81) self.assertFloatEqual(eigval_average[1], 0.19) #test with the IQR option matrix_average, matrix_low, matrix_high, eigval_average, m_names = \ summarize_pcoas(master_pcoa, support_pcoas, method='IQR', apply_procrustes=False) self.assertFloatEqual(matrix_low[(0,0)], -1.5) self.assertFloatEqual(matrix_high[(0,0)], -1.3) #test with procrustes option followed by sdev m, m1, msq = procrustes(master_pcoa[1],jn1[1]) m, m2, msq = procrustes(master_pcoa[1],jn2[1]) m, m3, msq = procrustes(master_pcoa[1],jn3[1]) m, m4, msq = procrustes(master_pcoa[1],jn4[1]) matrix_average, matrix_low, matrix_high, eigval_average, m_names = \ summarize_pcoas(master_pcoa, support_pcoas, method='sdev', apply_procrustes=True) x = array([m1[0,0],m2[0,0],m3[0,0],m4[0,0]]) self.assertEqual(x.mean(),matrix_average[0,0]) self.assertEqual(-x.std(ddof=1)/2,matrix_low[0,0]) self.assertEqual(x.std(ddof=1)/2,matrix_high[0,0])
def get_procrustes_results(coords_f1,coords_f2,sample_id_map=None,\ randomize=None,max_dimensions=None,\ get_eigenvalues=get_mean_eigenvalues,\ get_percent_variation_explained=get_mean_percent_variation): """ """ # Parse the PCoA files sample_ids1, coords1, eigvals1, pct_var1 = parse_coords(coords_f1) sample_ids2, coords2, eigvals2, pct_var2 = parse_coords(coords_f2) if sample_id_map: sample_ids1 = map_sample_ids(sample_ids1,sample_id_map) sample_ids2 = map_sample_ids(sample_ids2,sample_id_map) # rearrange the order of coords in coords2 to correspond to # the order of coords in coords1 order = list(set(sample_ids1) & set(sample_ids2)) coords1 = reorder_coords(coords1,sample_ids1,order) coords2 = reorder_coords(coords2,sample_ids2,order) # If this is a random trial, apply the shuffling function passed as # randomize() if randomize: coords2 = randomize(coords2) coords1, coords2 = pad_coords_matrices(coords1,coords2) if max_dimensions: coords1 = filter_coords_matrix(coords1,max_dimensions) coords2 = filter_coords_matrix(coords2,max_dimensions) pct_var1 = pct_var1[:max_dimensions] pct_var2 = pct_var2[:max_dimensions] eigvals1 = eigvals1[:max_dimensions] eigvals2 = eigvals2[:max_dimensions] else: if len(pct_var1)>len(pct_var2): pct_var2 = append(pct_var2,zeros(len(pct_var1)-len(pct_var2))) eigvals2 = append(eigvals2,zeros(len(eigvals1)-len(eigvals2))) elif len(pct_var1)<len(pct_var2): pct_var1 = append(pct_var1,zeros(len(pct_var2)-len(pct_var1))) eigvals1 = append(eigvals1,zeros(len(eigvals2)-len(eigvals1))) # Run the Procrustes analysis transformed_coords_m1, transformed_coords_m2, m_squared =\ procrustes(coords1,coords2) # print coords2 #print transformed_coords_m2 eigvals = get_eigenvalues(eigvals1, eigvals2) pct_var = get_percent_variation_explained(pct_var1,pct_var2) transformed_coords1 = format_coords(coord_header=order,\ coords=transformed_coords_m1,\ eigvals=eigvals,\ pct_var=pct_var) transformed_coords2 = format_coords(coord_header=order,\ coords=transformed_coords_m2,\ eigvals=eigvals,\ pct_var=pct_var) # Return the results return transformed_coords1, transformed_coords2, m_squared
def test_procrustes(self): """tests procrustes' ability to match two matrices. the second matrix is a rotated, shifted, scaled, and mirrored version of the first, in two dimensions only """ # can shift, mirror, and scale an 'L'? a, b, disparity = procrustes(self.data1, self.data2) self.assertFloatEqual(b, a) self.assertFloatEqual(disparity, 0.) # if first mtx is standardized, leaves first mtx unchanged? m4, m5, disp45 = procrustes(self.data4, self.data5) self.assertFloatEqual(m4, self.data4) # at worst, data3 is an 'L' with one point off by .5 m1, m3, disp13 = procrustes(self.data1, self.data3) self.assertLessThan(disp13, .5**2)
def test_procrustes(self): """tests procrustes' ability to match two matrices. the second matrix is a rotated, shifted, scaled, and mirrored version of the first, in two dimensions only """ # can shift, mirror, and scale an 'L'? a,b,disparity = procrustes(self.data1, self.data2) self.assertFloatEqual(b, a) self.assertFloatEqual(disparity,0.) # if first mtx is standardized, leaves first mtx unchanged? m4, m5, disp45 = procrustes(self.data4, self.data5) self.assertFloatEqual(m4, self.data4) # at worst, data3 is an 'L' with one point off by .5 m1, m3, disp13 = procrustes(self.data1, self.data3) self.assertLessThan(disp13, .5**2)
def get_procrustes_results(coords_f1,coords_f2,sample_id_map=None,\ randomize=None,max_dimensions=None,\ get_eigenvalues=get_mean_eigenvalues,\ get_percent_variation_explained=get_mean_percent_variation): """ """ # Parse the PCoA files sample_ids1, coords1, eigvals1, pct_var1 = parse_coords(coords_f1) sample_ids2, coords2, eigvals2, pct_var2 = parse_coords(coords_f2) if sample_id_map: sample_ids1 = map_sample_ids(sample_ids1,sample_id_map) sample_ids2 = map_sample_ids(sample_ids2,sample_id_map) # rearrange the order of coords in coords2 to correspond to # the order of coords in coords1 order = list(set(sample_ids1) & set(sample_ids2)) coords1 = reorder_coords(coords1,sample_ids1,order) coords2 = reorder_coords(coords2,sample_ids2,order) # If this is a random trial, apply the shuffling function passed as # randomize() if randomize: coords2 = randomize(coords2) coords1, coords2 = pad_coords_matrices(coords1,coords2) if max_dimensions: coords1 = filter_coords_matrix(coords1,max_dimensions) coords2 = filter_coords_matrix(coords2,max_dimensions) pct_var1 = pct_var1[:max_dimensions] pct_var2 = pct_var2[:max_dimensions] eigvals1 = eigvals1[:max_dimensions] eigvals2 = eigvals2[:max_dimensions] # Run the Procrustes analysis transformed_coords_m1, transformed_coords_m2, m_squared =\ procrustes(coords1,coords2) #print transformed_coords_m2 eigvals = get_eigenvalues(eigvals1, eigvals2) pct_var = get_percent_variation_explained(pct_var1,pct_var2) transformed_coords1 = format_coords(coord_header=order,\ coords=transformed_coords_m1,\ eigvals=eigvals,\ pct_var=pct_var) transformed_coords2 = format_coords(coord_header=order,\ coords=transformed_coords_m2,\ eigvals=eigvals,\ pct_var=pct_var) # Return the results return transformed_coords1, transformed_coords2, m_squared
def summarize_pcoas(master_pcoa, support_pcoas, method='IQR', apply_procrustes=True): """returns the average PCoA vector values for the support pcoas Also returns the ranges as calculated with the specified method. The choices are: IQR: the Interquartile Range ideal fourths: Ideal fourths method as implemented in scipy """ if apply_procrustes: # perform procrustes before averaging support_pcoas = [list(sp) for sp in support_pcoas] master_pcoa = list(master_pcoa) for i, pcoa in enumerate(support_pcoas): master_std, pcoa_std, m_squared = procrustes( master_pcoa[1], pcoa[1]) support_pcoas[i][1] = pcoa_std master_pcoa[1] = master_std m_matrix = master_pcoa[1] m_eigvals = master_pcoa[2] m_names = master_pcoa[0] jn_flipped_matrices = [] all_eigvals = [] for rep in support_pcoas: matrix = rep[1] eigvals = rep[2] all_eigvals.append(eigvals) jn_flipped_matrices.append(_flip_vectors(matrix, m_matrix)) matrix_average, matrix_low, matrix_high = _compute_jn_pcoa_avg_ranges(\ jn_flipped_matrices, method) #compute average eigvals all_eigvals_stack = vstack(all_eigvals) eigval_sum = numpy.sum(all_eigvals_stack, axis=0) eigval_average = eigval_sum / float(len(all_eigvals)) return matrix_average, matrix_low, matrix_high, eigval_average, m_names
def get_procrustes_results( coords_f1, coords_f2, sample_id_map=None, randomize=None, max_dimensions=None, get_eigenvalues=get_mean_eigenvalues, get_percent_variation_explained=get_mean_percent_variation): """ """ # Parse the PCoA files sample_ids1, coords1, eigvals1, pct_var1 = parse_coords(coords_f1) sample_ids2, coords2, eigvals2, pct_var2 = parse_coords(coords_f2) if sample_id_map: sample_ids1 = map_sample_ids(sample_ids1, sample_id_map) sample_ids2 = map_sample_ids(sample_ids2, sample_id_map) # rearrange the order of coords in coords2 to correspond to # the order of coords in coords1 order = list(set(sample_ids1) & set(sample_ids2)) coords1 = reorder_coords(coords1, sample_ids1, order) coords2 = reorder_coords(coords2, sample_ids2, order) if len(order) == 0: raise ValueError('No overlapping samples in the two files') # If this is a random trial, apply the shuffling function passed as # randomize() if randomize: coords2 = randomize(coords2) randomized_coords2 = format_coords(coord_header=order, coords=coords2, eigvals=eigvals2, pct_var=pct_var2) else: randomized_coords2 = None coords1, coords2 = pad_coords_matrices(coords1, coords2) if max_dimensions: coords1 = filter_coords_matrix(coords1, max_dimensions) coords2 = filter_coords_matrix(coords2, max_dimensions) pct_var1 = pct_var1[:max_dimensions] pct_var2 = pct_var2[:max_dimensions] eigvals1 = eigvals1[:max_dimensions] eigvals2 = eigvals2[:max_dimensions] else: if len(pct_var1) > len(pct_var2): pct_var2 = append(pct_var2, zeros(len(pct_var1) - len(pct_var2))) eigvals2 = append(eigvals2, zeros(len(eigvals1) - len(eigvals2))) elif len(pct_var1) < len(pct_var2): pct_var1 = append(pct_var1, zeros(len(pct_var2) - len(pct_var1))) eigvals1 = append(eigvals1, zeros(len(eigvals2) - len(eigvals1))) # Run the Procrustes analysis transformed_coords_m1, transformed_coords_m2, m_squared =\ procrustes(coords1, coords2) # print coords2 # print transformed_coords_m2 eigvals = get_eigenvalues(eigvals1, eigvals2) pct_var = get_percent_variation_explained(pct_var1, pct_var2) transformed_coords1 = format_coords(coord_header=order, coords=transformed_coords_m1, eigvals=eigvals, pct_var=pct_var) transformed_coords2 = format_coords(coord_header=order, coords=transformed_coords_m2, eigvals=eigvals, pct_var=pct_var) # Return the results return (transformed_coords1, transformed_coords2, m_squared, randomized_coords2)