Esempio n. 1
0
def dm_to_pcoa(dm, sample_md, category):
    title = "Samples colored by %s." % category
    pcoa_results = PCoA(dm).scores()
    _ = pcoa_results.plot(df=sample_md,
                          column=category,
                          axis_labels=['PC 1', 'PC 2', 'PC 3'],
                          title=title,
                          s=35)
Esempio n. 2
0
    def setup(self):
        dist_matrix = DistanceMatrix.read(get_data_path('PCoA_sample_data_3'))
        self.ordination = PCoA(dist_matrix)

        self.ids = [
            'PC.636', 'PC.635', 'PC.356', 'PC.481', 'PC.354', 'PC.593',
            'PC.355', 'PC.607', 'PC.634'
        ]
def pcoa(lines):
    """Run PCoA on the distance matrix present on lines"""
    # Parse the distance matrix
    dist_mtx = DistanceMatrix.read(lines)
    # Create the PCoA object
    pcoa_obj = PCoA(dist_mtx)
    # Get the PCoA results and return them
    return pcoa_obj.scores()
Esempio n. 4
0
def pcoa(lines):
    """Run PCoA on the distance matrix present on lines"""
    # Parse the distance matrix
    dist_mtx = DistanceMatrix.read(lines)
    # Create the PCoA object
    pcoa_obj = PCoA(dist_mtx)
    # Get the PCoA results and return them
    return pcoa_obj.scores()
Esempio n. 5
0
    def setup(self):
        with open(get_data_path('PCoA_sample_data_3'), 'U') as lines:
            dist_matrix = DistanceMatrix.from_file(lines)

        self.ordination = PCoA(dist_matrix)

        self.ids = [
            'PC.636', 'PC.635', 'PC.356', 'PC.481', 'PC.354', 'PC.593',
            'PC.355', 'PC.607', 'PC.634'
        ]
Esempio n. 6
0
    def test_values(self):
        """Adapted from cogent's `test_principal_coordinate_analysis`:
        "I took the example in the book (see intro info), and did the
        principal coordinates analysis, plotted the data and it looked
        right"."""
        with warnings.catch_warnings():
            warnings.filterwarnings("ignore", category=RuntimeWarning)
            ordination = PCoA(self.dist_matrix)
        scores = ordination.scores()

        exp_eigvals = np.array(
            [
                0.73599103,
                0.26260032,
                0.14926222,
                0.06990457,
                0.02956972,
                0.01931184,
                0.0,
                0.0,
                0.0,
                0.0,
                0.0,
                0.0,
                0.0,
                0.0,
            ]
        )
        exp_site = np.loadtxt(get_data_path("exp_PCoAzeros_site"))
        exp_prop_expl = np.array(
            [
                0.58105792,
                0.20732046,
                0.1178411,
                0.05518899,
                0.02334502,
                0.01524651,
                0.0,
                0.0,
                0.0,
                0.0,
                0.0,
                0.0,
                0.0,
                0.0,
            ]
        )
        exp_site_ids = ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13"]
        # Note the absolute value because column can have signs swapped
        npt.assert_almost_equal(scores.eigvals, exp_eigvals)
        npt.assert_almost_equal(np.abs(scores.site), exp_site)
        npt.assert_almost_equal(scores.proportion_explained, exp_prop_expl)
        npt.assert_equal(scores.site_ids, exp_site_ids)
Esempio n. 7
0
class TestPCoAEigenResults(object):
    def setup(self):
        dist_matrix = DistanceMatrix.read(get_data_path('PCoA_sample_data_3'))
        self.ordination = PCoA(dist_matrix)

        self.ids = [
            'PC.636', 'PC.635', 'PC.356', 'PC.481', 'PC.354', 'PC.593',
            'PC.355', 'PC.607', 'PC.634'
        ]

    def test_values(self):
        results = self.ordination.scores()

        npt.assert_almost_equal(len(results.eigvals), len(results.site[0]))

        expected = np.loadtxt(get_data_path('exp_PCoAEigenResults_site'))
        npt.assert_almost_equal(*normalize_signs(expected, results.site))

        expected = np.array([
            0.51236726, 0.30071909, 0.26791207, 0.20898868, 0.19169895,
            0.16054235, 0.15017696, 0.12245775, 0.0
        ])
        npt.assert_almost_equal(results.eigvals, expected)

        expected = np.array([
            0.2675738328, 0.157044696, 0.1399118638, 0.1091402725,
            0.1001110485, 0.0838401162, 0.0784269939, 0.0639511764, 0.0
        ])
        npt.assert_almost_equal(results.proportion_explained, expected)

        npt.assert_equal(results.site_ids, self.ids)
Esempio n. 8
0
class TestPCoAResultsExtensive(object):
    def setup(self):
        matrix = np.loadtxt(get_data_path('PCoA_sample_data_2'))
        self.ids = [str(i) for i in range(matrix.shape[0])]
        dist_matrix = DistanceMatrix(matrix, self.ids)
        self.ordination = PCoA(dist_matrix)

    def test_values(self):
        results = self.ordination.scores()

        npt.assert_equal(len(results.eigvals), len(results.site[0]))

        expected = np.array([[-0.028597, 0.22903853, 0.07055272,
                              0.26163576, 0.28398669, 0.0],
                             [0.37494056, 0.22334055, -0.20892914,
                              0.05057395, -0.18710366, 0.0],
                             [-0.33517593, -0.23855979, -0.3099887,
                              0.11521787, -0.05021553, 0.0],
                             [0.25412394, -0.4123464, 0.23343642,
                              0.06403168, -0.00482608, 0.0],
                             [-0.28256844, 0.18606911, 0.28875631,
                              -0.06455635, -0.21141632, 0.0],
                             [0.01727687, 0.012458, -0.07382761,
                              -0.42690292, 0.1695749, 0.0]])
        npt.assert_almost_equal(*normalize_signs(expected, results.site))

        expected = np.array([0.3984635, 0.36405689, 0.28804535, 0.27479983,
                            0.19165361, 0.0])
        npt.assert_almost_equal(results.eigvals, expected)

        expected = np.array([0.2626621381, 0.2399817314, 0.1898758748,
                             0.1811445992, 0.1263356565, 0.0])
        npt.assert_almost_equal(results.proportion_explained, expected)

        npt.assert_equal(results.site_ids, self.ids)
Esempio n. 9
0
class TestPCoAResultsExtensive(object):
    def setup(self):
        matrix = np.loadtxt(get_data_path('PCoA_sample_data_2'))
        self.ids = [str(i) for i in range(matrix.shape[0])]
        dist_matrix = DistanceMatrix(matrix, self.ids)
        self.ordination = PCoA(dist_matrix)

    def test_values(self):
        results = self.ordination.scores()

        npt.assert_equal(len(results.eigvals), len(results.site[0]))

        expected = np.array([[-0.028597, 0.22903853, 0.07055272,
                              0.26163576, 0.28398669, 0.0],
                             [0.37494056, 0.22334055, -0.20892914,
                              0.05057395, -0.18710366, 0.0],
                             [-0.33517593, -0.23855979, -0.3099887,
                              0.11521787, -0.05021553, 0.0],
                             [0.25412394, -0.4123464, 0.23343642,
                              0.06403168, -0.00482608, 0.0],
                             [-0.28256844, 0.18606911, 0.28875631,
                              -0.06455635, -0.21141632, 0.0],
                             [0.01727687, 0.012458, -0.07382761,
                              -0.42690292, 0.1695749, 0.0]])
        npt.assert_almost_equal(*normalize_signs(expected, results.site))

        expected = np.array([0.3984635, 0.36405689, 0.28804535, 0.27479983,
                            0.19165361, 0.0])
        npt.assert_almost_equal(results.eigvals, expected)

        expected = np.array([0.2626621381, 0.2399817314, 0.1898758748,
                             0.1811445992, 0.1263356565, 0.0])
        npt.assert_almost_equal(results.proportion_explained, expected)

        npt.assert_equal(results.site_ids, self.ids)
Esempio n. 10
0
class TestPCoAEigenResults(object):
    def setup(self):
        dist_matrix = DistanceMatrix.read(get_data_path('PCoA_sample_data_3'))
        self.ordination = PCoA(dist_matrix)

        self.ids = ['PC.636', 'PC.635', 'PC.356', 'PC.481', 'PC.354', 'PC.593',
                    'PC.355', 'PC.607', 'PC.634']

    def test_values(self):
        results = self.ordination.scores()

        npt.assert_almost_equal(len(results.eigvals), len(results.site[0]))

        expected = np.loadtxt(get_data_path('exp_PCoAEigenResults_site'))
        npt.assert_almost_equal(*normalize_signs(expected, results.site))

        expected = np.array([0.51236726, 0.30071909, 0.26791207, 0.20898868,
                             0.19169895, 0.16054235,  0.15017696,  0.12245775,
                             0.0])
        npt.assert_almost_equal(results.eigvals, expected)

        expected = np.array([0.2675738328, 0.157044696, 0.1399118638,
                             0.1091402725, 0.1001110485, 0.0838401162,
                             0.0784269939, 0.0639511764, 0.0])
        npt.assert_almost_equal(results.proportion_explained, expected)

        npt.assert_equal(results.site_ids, self.ids)
Esempio n. 11
0
    def setup(self):
        with open(get_data_path('PCoA_sample_data_3'), 'U') as lines:
            dist_matrix = DistanceMatrix.from_file(lines)

        self.ordination = PCoA(dist_matrix)

        self.ids = ['PC.636', 'PC.635', 'PC.356', 'PC.481', 'PC.354', 'PC.593',
                    'PC.355', 'PC.607', 'PC.634']
Esempio n. 12
0
    def test_values(self):
        """Adapted from cogent's `test_principal_coordinate_analysis`:
        "I took the example in the book (see intro info), and did the
        principal coordinates analysis, plotted the data and it looked
        right"."""
        with warnings.catch_warnings():
            warnings.filterwarnings('ignore', category=RuntimeWarning)
            ordination = PCoA(self.dist_matrix)
        scores = ordination.scores()

        exp_eigvals = np.array([0.73599103, 0.26260032, 0.14926222, 0.06990457,
                                0.02956972, 0.01931184, 0., 0., 0., 0., 0., 0.,
                                0., 0.])
        exp_site = np.loadtxt(get_data_path('exp_PCoAzeros_site'))
        exp_prop_expl = np.array([0.58105792, 0.20732046, 0.1178411,
                                  0.05518899, 0.02334502, 0.01524651, 0., 0.,
                                  0., 0., 0., 0., 0., 0.])
        exp_site_ids = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
                        '10', '11', '12', '13']
        # Note the absolute value because column can have signs swapped
        npt.assert_almost_equal(scores.eigvals, exp_eigvals)
        npt.assert_almost_equal(np.abs(scores.site), exp_site)
        npt.assert_almost_equal(scores.proportion_explained, exp_prop_expl)
        npt.assert_equal(scores.site_ids, exp_site_ids)
Esempio n. 13
0
def js_PCoA(distributions):
   """Dimension reduction via Jensen-Shannon Divergence & Principal Components

    Parameters
    ----------
    distributions : array-like, shape (`n_dists`, `k`)
        Matrix of distributions probabilities.

    Returns
    -------
    pcoa : array, shape (`n_dists`, 2)
   """
   dist_matrix = DistanceMatrix(dist.squareform(dist.pdist(distributions.values, _jensen_shannon)))
   pcoa = PCoA(dist_matrix).scores()
   return pcoa.site[:,0:2]
Esempio n. 14
0
    def setup(self):
        dist_matrix = DistanceMatrix.read(get_data_path('PCoA_sample_data_3'))
        self.ordination = PCoA(dist_matrix)

        self.ids = ['PC.636', 'PC.635', 'PC.356', 'PC.481', 'PC.354', 'PC.593',
                    'PC.355', 'PC.607', 'PC.634']
Esempio n. 15
0
 def test_E_matrix(self):
     E = PCoA._E_matrix(self.matrix)
     expected_E = np.array([[-0.5,  -2.,  -4.5],
                            [-8., -12.5, -18.]])
     npt.assert_almost_equal(E, expected_E)
Esempio n. 16
0
 def setup(self):
     matrix = np.loadtxt(get_data_path('PCoA_sample_data_2'))
     self.ids = [str(i) for i in range(matrix.shape[0])]
     dist_matrix = DistanceMatrix(matrix, self.ids)
     self.ordination = PCoA(dist_matrix)
Esempio n. 17
0
    def setup(self):
        dist_matrix = DistanceMatrix.read(get_data_path("PCoA_sample_data_3"))
        self.ordination = PCoA(dist_matrix)

        self.ids = ["PC.636", "PC.635", "PC.356", "PC.481", "PC.354", "PC.593", "PC.355", "PC.607", "PC.634"]
Esempio n. 18
0
 def test_input(self):
     with npt.assert_raises(TypeError):
         PCoA([[1, 2], [3, 4]])
Esempio n. 19
0
# Determine if the resulting distance matrices are significantly correlated
# by computing the Mantel correlation between them. Then determine if the
# p-value is significant based on an alpha of 0.05:

from skbio.stats.distance import mantel
r, p_value, n = mantel(j_dm, bc_dm)
print(r)
# -0.209362157621
print(p_value < 0.05)
# False

# Compute PCoA for both distance matrices, and then find the Procrustes
# M-squared value that results from comparing the coordinate matrices.

from skbio.stats.ordination import PCoA
bc_pc = PCoA(bc_dm).scores()
j_pc = PCoA(j_dm).scores()
from skbio.stats.spatial import procrustes
print(procrustes(bc_pc.site, j_pc.site)[2])
# 0.466134984787

# All of this only gets interesting in the context of sample metadata, so
# let's define some:

import pandas as pd
try:
    # not necessary for normal use
    pd.set_option('show_dimensions', True)
except KeyError:
    pass
sample_md = {
Esempio n. 20
0
 def test_F_matrix(self):
     F = PCoA._F_matrix(self.matrix2)
     expected_F = np.zeros((3, 3))
     # Note that `test_make_F_matrix` in cogent is wrong
     npt.assert_almost_equal(F, expected_F)
Esempio n. 21
0
 def test_E_matrix(self):
     E = PCoA._E_matrix(self.matrix)
     expected_E = np.array([[-0.5, -2., -4.5], [-8., -12.5, -18.]])
     npt.assert_almost_equal(E, expected_E)
Esempio n. 22
0
    },
    'B': {
        'Méthode': 's2'
    },
    'C': {
        'Méthode': 's3'
    },
    'D': {
        'Méthode': 's4'
    },
    'E': {
        'Méthode': 's5'
    }
}
df = pd.DataFrame.from_dict(metadata, orient='index')
pcoa_results = PCoA(dm).scores()
print(pcoa_results)
fig = pcoa_results.plot(
    df=df,
    column='Méthode',
    title='Estimation methods projected on 3 first principal components',
    cmap='Set1',
    s=500)
plt.show()
"""
digits = datasets.load_digits()
X = np.array([[ 0.         ,35.57933426 ,17.75168991 ,32.03273392 ,33.87740707],[35.57933426 , 0.         ,17.86463547 , 7.161726   , 5.87323952], [17.75168991 ,17.86463547 , 0.         ,14.88137054 ,16.6187191 ], [32.03273392 , 7.161726   ,14.88137054 , 0.          ,3.63054395], [33.87740707 , 5.87323952 ,16.6187191  , 3.63054395  ,0.        ]] )
print(type(X) )
y = np.array( [1, 2, 3, 4, 5])
print(y)
print(type(y) )
Esempio n. 23
0
def pcoa(adist, cluster_members=None):
    from skbio import DistanceMatrix
    from skbio.stats.ordination import PCoA

    pcoa_results = PCoA(DistanceMatrix(adist)).scores()
Esempio n. 24
0
 def test_F_matrix(self):
     F = PCoA._F_matrix(self.matrix2)
     expected_F = np.zeros((3, 3))
     # Note that `test_make_F_matrix` in cogent is wrong
     npt.assert_almost_equal(F, expected_F)
Esempio n. 25
0
 def setup(self):
     matrix = np.loadtxt(get_data_path('PCoA_sample_data_2'))
     self.ids = [str(i) for i in range(matrix.shape[0])]
     dist_matrix = DistanceMatrix(matrix, self.ids)
     self.ordination = PCoA(dist_matrix)
import pandas as pd
metadata = {
    'A': {
        'body_site': 'skin'
    },
    'B': {
        'body_site': 'gut'
    },
    'C': {
        'body_site': 'gut'
    },
    'D': {
        'body_site': 'skin'
    }
}
df = pd.DataFrame.from_dict(metadata, orient='index')

# Run principal coordinate analysis (PCoA) on the distance matrix:

from skbio.stats.ordination import PCoA
pcoa_results = PCoA(dm).scores()

# Plot the ordination results, where each site is colored by body site
# (a categorical variable):

fig = pcoa_results.plot(df=df,
                        column='body_site',
                        title='Sites colored by body site',
                        cmap='Set1',
                        s=50)