def dm_to_pcoa(dm, sample_md, category): title = "Samples colored by %s." % category pcoa_results = PCoA(dm).scores() _ = pcoa_results.plot(df=sample_md, column=category, axis_labels=['PC 1', 'PC 2', 'PC 3'], title=title, s=35)
def setup(self): dist_matrix = DistanceMatrix.read(get_data_path('PCoA_sample_data_3')) self.ordination = PCoA(dist_matrix) self.ids = [ 'PC.636', 'PC.635', 'PC.356', 'PC.481', 'PC.354', 'PC.593', 'PC.355', 'PC.607', 'PC.634' ]
def pcoa(lines): """Run PCoA on the distance matrix present on lines""" # Parse the distance matrix dist_mtx = DistanceMatrix.read(lines) # Create the PCoA object pcoa_obj = PCoA(dist_mtx) # Get the PCoA results and return them return pcoa_obj.scores()
def setup(self): with open(get_data_path('PCoA_sample_data_3'), 'U') as lines: dist_matrix = DistanceMatrix.from_file(lines) self.ordination = PCoA(dist_matrix) self.ids = [ 'PC.636', 'PC.635', 'PC.356', 'PC.481', 'PC.354', 'PC.593', 'PC.355', 'PC.607', 'PC.634' ]
def test_values(self): """Adapted from cogent's `test_principal_coordinate_analysis`: "I took the example in the book (see intro info), and did the principal coordinates analysis, plotted the data and it looked right".""" with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=RuntimeWarning) ordination = PCoA(self.dist_matrix) scores = ordination.scores() exp_eigvals = np.array( [ 0.73599103, 0.26260032, 0.14926222, 0.06990457, 0.02956972, 0.01931184, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ] ) exp_site = np.loadtxt(get_data_path("exp_PCoAzeros_site")) exp_prop_expl = np.array( [ 0.58105792, 0.20732046, 0.1178411, 0.05518899, 0.02334502, 0.01524651, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ] ) exp_site_ids = ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13"] # Note the absolute value because column can have signs swapped npt.assert_almost_equal(scores.eigvals, exp_eigvals) npt.assert_almost_equal(np.abs(scores.site), exp_site) npt.assert_almost_equal(scores.proportion_explained, exp_prop_expl) npt.assert_equal(scores.site_ids, exp_site_ids)
class TestPCoAEigenResults(object): def setup(self): dist_matrix = DistanceMatrix.read(get_data_path('PCoA_sample_data_3')) self.ordination = PCoA(dist_matrix) self.ids = [ 'PC.636', 'PC.635', 'PC.356', 'PC.481', 'PC.354', 'PC.593', 'PC.355', 'PC.607', 'PC.634' ] def test_values(self): results = self.ordination.scores() npt.assert_almost_equal(len(results.eigvals), len(results.site[0])) expected = np.loadtxt(get_data_path('exp_PCoAEigenResults_site')) npt.assert_almost_equal(*normalize_signs(expected, results.site)) expected = np.array([ 0.51236726, 0.30071909, 0.26791207, 0.20898868, 0.19169895, 0.16054235, 0.15017696, 0.12245775, 0.0 ]) npt.assert_almost_equal(results.eigvals, expected) expected = np.array([ 0.2675738328, 0.157044696, 0.1399118638, 0.1091402725, 0.1001110485, 0.0838401162, 0.0784269939, 0.0639511764, 0.0 ]) npt.assert_almost_equal(results.proportion_explained, expected) npt.assert_equal(results.site_ids, self.ids)
class TestPCoAResultsExtensive(object): def setup(self): matrix = np.loadtxt(get_data_path('PCoA_sample_data_2')) self.ids = [str(i) for i in range(matrix.shape[0])] dist_matrix = DistanceMatrix(matrix, self.ids) self.ordination = PCoA(dist_matrix) def test_values(self): results = self.ordination.scores() npt.assert_equal(len(results.eigvals), len(results.site[0])) expected = np.array([[-0.028597, 0.22903853, 0.07055272, 0.26163576, 0.28398669, 0.0], [0.37494056, 0.22334055, -0.20892914, 0.05057395, -0.18710366, 0.0], [-0.33517593, -0.23855979, -0.3099887, 0.11521787, -0.05021553, 0.0], [0.25412394, -0.4123464, 0.23343642, 0.06403168, -0.00482608, 0.0], [-0.28256844, 0.18606911, 0.28875631, -0.06455635, -0.21141632, 0.0], [0.01727687, 0.012458, -0.07382761, -0.42690292, 0.1695749, 0.0]]) npt.assert_almost_equal(*normalize_signs(expected, results.site)) expected = np.array([0.3984635, 0.36405689, 0.28804535, 0.27479983, 0.19165361, 0.0]) npt.assert_almost_equal(results.eigvals, expected) expected = np.array([0.2626621381, 0.2399817314, 0.1898758748, 0.1811445992, 0.1263356565, 0.0]) npt.assert_almost_equal(results.proportion_explained, expected) npt.assert_equal(results.site_ids, self.ids)
class TestPCoAEigenResults(object): def setup(self): dist_matrix = DistanceMatrix.read(get_data_path('PCoA_sample_data_3')) self.ordination = PCoA(dist_matrix) self.ids = ['PC.636', 'PC.635', 'PC.356', 'PC.481', 'PC.354', 'PC.593', 'PC.355', 'PC.607', 'PC.634'] def test_values(self): results = self.ordination.scores() npt.assert_almost_equal(len(results.eigvals), len(results.site[0])) expected = np.loadtxt(get_data_path('exp_PCoAEigenResults_site')) npt.assert_almost_equal(*normalize_signs(expected, results.site)) expected = np.array([0.51236726, 0.30071909, 0.26791207, 0.20898868, 0.19169895, 0.16054235, 0.15017696, 0.12245775, 0.0]) npt.assert_almost_equal(results.eigvals, expected) expected = np.array([0.2675738328, 0.157044696, 0.1399118638, 0.1091402725, 0.1001110485, 0.0838401162, 0.0784269939, 0.0639511764, 0.0]) npt.assert_almost_equal(results.proportion_explained, expected) npt.assert_equal(results.site_ids, self.ids)
def setup(self): with open(get_data_path('PCoA_sample_data_3'), 'U') as lines: dist_matrix = DistanceMatrix.from_file(lines) self.ordination = PCoA(dist_matrix) self.ids = ['PC.636', 'PC.635', 'PC.356', 'PC.481', 'PC.354', 'PC.593', 'PC.355', 'PC.607', 'PC.634']
def test_values(self): """Adapted from cogent's `test_principal_coordinate_analysis`: "I took the example in the book (see intro info), and did the principal coordinates analysis, plotted the data and it looked right".""" with warnings.catch_warnings(): warnings.filterwarnings('ignore', category=RuntimeWarning) ordination = PCoA(self.dist_matrix) scores = ordination.scores() exp_eigvals = np.array([0.73599103, 0.26260032, 0.14926222, 0.06990457, 0.02956972, 0.01931184, 0., 0., 0., 0., 0., 0., 0., 0.]) exp_site = np.loadtxt(get_data_path('exp_PCoAzeros_site')) exp_prop_expl = np.array([0.58105792, 0.20732046, 0.1178411, 0.05518899, 0.02334502, 0.01524651, 0., 0., 0., 0., 0., 0., 0., 0.]) exp_site_ids = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13'] # Note the absolute value because column can have signs swapped npt.assert_almost_equal(scores.eigvals, exp_eigvals) npt.assert_almost_equal(np.abs(scores.site), exp_site) npt.assert_almost_equal(scores.proportion_explained, exp_prop_expl) npt.assert_equal(scores.site_ids, exp_site_ids)
def js_PCoA(distributions): """Dimension reduction via Jensen-Shannon Divergence & Principal Components Parameters ---------- distributions : array-like, shape (`n_dists`, `k`) Matrix of distributions probabilities. Returns ------- pcoa : array, shape (`n_dists`, 2) """ dist_matrix = DistanceMatrix(dist.squareform(dist.pdist(distributions.values, _jensen_shannon))) pcoa = PCoA(dist_matrix).scores() return pcoa.site[:,0:2]
def setup(self): dist_matrix = DistanceMatrix.read(get_data_path('PCoA_sample_data_3')) self.ordination = PCoA(dist_matrix) self.ids = ['PC.636', 'PC.635', 'PC.356', 'PC.481', 'PC.354', 'PC.593', 'PC.355', 'PC.607', 'PC.634']
def test_E_matrix(self): E = PCoA._E_matrix(self.matrix) expected_E = np.array([[-0.5, -2., -4.5], [-8., -12.5, -18.]]) npt.assert_almost_equal(E, expected_E)
def setup(self): matrix = np.loadtxt(get_data_path('PCoA_sample_data_2')) self.ids = [str(i) for i in range(matrix.shape[0])] dist_matrix = DistanceMatrix(matrix, self.ids) self.ordination = PCoA(dist_matrix)
def setup(self): dist_matrix = DistanceMatrix.read(get_data_path("PCoA_sample_data_3")) self.ordination = PCoA(dist_matrix) self.ids = ["PC.636", "PC.635", "PC.356", "PC.481", "PC.354", "PC.593", "PC.355", "PC.607", "PC.634"]
def test_input(self): with npt.assert_raises(TypeError): PCoA([[1, 2], [3, 4]])
# Determine if the resulting distance matrices are significantly correlated # by computing the Mantel correlation between them. Then determine if the # p-value is significant based on an alpha of 0.05: from skbio.stats.distance import mantel r, p_value, n = mantel(j_dm, bc_dm) print(r) # -0.209362157621 print(p_value < 0.05) # False # Compute PCoA for both distance matrices, and then find the Procrustes # M-squared value that results from comparing the coordinate matrices. from skbio.stats.ordination import PCoA bc_pc = PCoA(bc_dm).scores() j_pc = PCoA(j_dm).scores() from skbio.stats.spatial import procrustes print(procrustes(bc_pc.site, j_pc.site)[2]) # 0.466134984787 # All of this only gets interesting in the context of sample metadata, so # let's define some: import pandas as pd try: # not necessary for normal use pd.set_option('show_dimensions', True) except KeyError: pass sample_md = {
def test_F_matrix(self): F = PCoA._F_matrix(self.matrix2) expected_F = np.zeros((3, 3)) # Note that `test_make_F_matrix` in cogent is wrong npt.assert_almost_equal(F, expected_F)
}, 'B': { 'Méthode': 's2' }, 'C': { 'Méthode': 's3' }, 'D': { 'Méthode': 's4' }, 'E': { 'Méthode': 's5' } } df = pd.DataFrame.from_dict(metadata, orient='index') pcoa_results = PCoA(dm).scores() print(pcoa_results) fig = pcoa_results.plot( df=df, column='Méthode', title='Estimation methods projected on 3 first principal components', cmap='Set1', s=500) plt.show() """ digits = datasets.load_digits() X = np.array([[ 0. ,35.57933426 ,17.75168991 ,32.03273392 ,33.87740707],[35.57933426 , 0. ,17.86463547 , 7.161726 , 5.87323952], [17.75168991 ,17.86463547 , 0. ,14.88137054 ,16.6187191 ], [32.03273392 , 7.161726 ,14.88137054 , 0. ,3.63054395], [33.87740707 , 5.87323952 ,16.6187191 , 3.63054395 ,0. ]] ) print(type(X) ) y = np.array( [1, 2, 3, 4, 5]) print(y) print(type(y) )
def pcoa(adist, cluster_members=None): from skbio import DistanceMatrix from skbio.stats.ordination import PCoA pcoa_results = PCoA(DistanceMatrix(adist)).scores()
import pandas as pd metadata = { 'A': { 'body_site': 'skin' }, 'B': { 'body_site': 'gut' }, 'C': { 'body_site': 'gut' }, 'D': { 'body_site': 'skin' } } df = pd.DataFrame.from_dict(metadata, orient='index') # Run principal coordinate analysis (PCoA) on the distance matrix: from skbio.stats.ordination import PCoA pcoa_results = PCoA(dm).scores() # Plot the ordination results, where each site is colored by body site # (a categorical variable): fig = pcoa_results.plot(df=df, column='body_site', title='Sites colored by body site', cmap='Set1', s=50)