예제 #1
0
    def test_pw_distances_braycurtis(self):
        actual_dm = pw_distances(self.t1, self.ids1, 'braycurtis')
        self.assertEqual(actual_dm.shape, (3, 3))
        npt.assert_almost_equal(actual_dm['A', 'A'], 0.0)
        npt.assert_almost_equal(actual_dm['B', 'B'], 0.0)
        npt.assert_almost_equal(actual_dm['C', 'C'], 0.0)
        npt.assert_almost_equal(actual_dm['A', 'B'], 0.27272727)
        npt.assert_almost_equal(actual_dm['B', 'A'], 0.27272727)
        npt.assert_almost_equal(actual_dm['A', 'C'], 0.71428571)
        npt.assert_almost_equal(actual_dm['C', 'A'], 0.71428571)
        npt.assert_almost_equal(actual_dm['B', 'C'], 0.66666667)
        npt.assert_almost_equal(actual_dm['C', 'B'], 0.66666667)

        actual_dm = pw_distances(self.t2, self.ids2, 'braycurtis')
        expected_data = [
            [0., 0.78787879, 0.86666667, 0.30927835, 0.85714286, 0.81521739],
            [0.78787879, 0., 0.78142077, 0.86813187, 0.75, 0.1627907],
            [0.86666667, 0.78142077, 0., 0.87709497, 0.09392265, 0.71597633],
            [0.30927835, 0.86813187, 0.87709497, 0., 0.87777778, 0.89285714],
            [0.85714286, 0.75, 0.09392265, 0.87777778, 0., 0.68235294],
            [0.81521739, 0.1627907, 0.71597633, 0.89285714, 0.68235294, 0.]]
        expected_dm = DistanceMatrix(expected_data, self.ids2)
        for id1 in self.ids2:
            for id2 in self.ids2:
                npt.assert_almost_equal(actual_dm[id1, id2],
                                        expected_dm[id1, id2], 6)
예제 #2
0
    def test_pw_distances_braycurtis(self):
        actual_dm = pw_distances(self.t1, self.ids1, 'braycurtis')
        self.assertEqual(actual_dm.shape, (3, 3))
        npt.assert_almost_equal(actual_dm['A', 'A'], 0.0)
        npt.assert_almost_equal(actual_dm['B', 'B'], 0.0)
        npt.assert_almost_equal(actual_dm['C', 'C'], 0.0)
        npt.assert_almost_equal(actual_dm['A', 'B'], 0.27272727)
        npt.assert_almost_equal(actual_dm['B', 'A'], 0.27272727)
        npt.assert_almost_equal(actual_dm['A', 'C'], 0.71428571)
        npt.assert_almost_equal(actual_dm['C', 'A'], 0.71428571)
        npt.assert_almost_equal(actual_dm['B', 'C'], 0.66666667)
        npt.assert_almost_equal(actual_dm['C', 'B'], 0.66666667)

        actual_dm = pw_distances(self.t2, self.ids2, 'braycurtis')
        expected_data = [
            [0., 0.78787879, 0.86666667, 0.30927835, 0.85714286, 0.81521739],
            [0.78787879, 0., 0.78142077, 0.86813187, 0.75, 0.1627907],
            [0.86666667, 0.78142077, 0., 0.87709497, 0.09392265, 0.71597633],
            [0.30927835, 0.86813187, 0.87709497, 0., 0.87777778, 0.89285714],
            [0.85714286, 0.75, 0.09392265, 0.87777778, 0., 0.68235294],
            [0.81521739, 0.1627907, 0.71597633, 0.89285714, 0.68235294, 0.]]
        expected_dm = DistanceMatrix(expected_data, self.ids2)
        for id1 in self.ids2:
            for id2 in self.ids2:
                npt.assert_almost_equal(actual_dm[id1, id2],
                                        expected_dm[id1, id2], 6)
예제 #3
0
    def test_pw_distances_euclidean(self):
        actual_dm = pw_distances(self.t1, self.ids1, 'euclidean')
        self.assertEqual(actual_dm.shape, (3, 3))
        npt.assert_almost_equal(actual_dm['A', 'A'], 0.0)
        npt.assert_almost_equal(actual_dm['B', 'B'], 0.0)
        npt.assert_almost_equal(actual_dm['C', 'C'], 0.0)
        npt.assert_almost_equal(actual_dm['A', 'B'], 2.23606798)
        npt.assert_almost_equal(actual_dm['B', 'A'], 2.23606798)
        npt.assert_almost_equal(actual_dm['A', 'C'], 4.12310563)
        npt.assert_almost_equal(actual_dm['C', 'A'], 4.12310563)
        npt.assert_almost_equal(actual_dm['B', 'C'], 2.82842712)
        npt.assert_almost_equal(actual_dm['C', 'B'], 2.82842712)

        actual_dm = pw_distances(self.t2, self.ids2, 'euclidean')
        expected_data = [
            [0., 80.8455317, 84.0297566, 36.3042697, 86.0116271, 78.9176786],
            [80.8455317, 0., 71.0844568, 74.4714710, 69.3397433, 14.422205],
            [84.0297566, 71.0844568, 0., 77.2851861, 8.3066238, 60.7536007],
            [36.3042697, 74.4714710, 77.2851861, 0., 78.7908624, 70.7389567],
            [86.0116271, 69.3397433, 8.3066238, 78.7908624, 0., 58.4807660],
            [78.9176786, 14.422205, 60.7536007, 70.7389567, 58.4807660, 0.]]
        expected_dm = DistanceMatrix(expected_data, self.ids2)
        for id1 in self.ids2:
            for id2 in self.ids2:
                npt.assert_almost_equal(actual_dm[id1, id2],
                                        expected_dm[id1, id2], 6)
예제 #4
0
    def test_pw_distances_euclidean(self):
        actual_dm = pw_distances(self.t1, self.ids1, 'euclidean')
        self.assertEqual(actual_dm.shape, (3, 3))
        npt.assert_almost_equal(actual_dm['A', 'A'], 0.0)
        npt.assert_almost_equal(actual_dm['B', 'B'], 0.0)
        npt.assert_almost_equal(actual_dm['C', 'C'], 0.0)
        npt.assert_almost_equal(actual_dm['A', 'B'], 2.23606798)
        npt.assert_almost_equal(actual_dm['B', 'A'], 2.23606798)
        npt.assert_almost_equal(actual_dm['A', 'C'], 4.12310563)
        npt.assert_almost_equal(actual_dm['C', 'A'], 4.12310563)
        npt.assert_almost_equal(actual_dm['B', 'C'], 2.82842712)
        npt.assert_almost_equal(actual_dm['C', 'B'], 2.82842712)

        actual_dm = pw_distances(self.t2, self.ids2, 'euclidean')
        expected_data = [
            [0., 80.8455317, 84.0297566, 36.3042697, 86.0116271, 78.9176786],
            [80.8455317, 0., 71.0844568, 74.4714710, 69.3397433, 14.422205],
            [84.0297566, 71.0844568, 0., 77.2851861, 8.3066238, 60.7536007],
            [36.3042697, 74.4714710, 77.2851861, 0., 78.7908624, 70.7389567],
            [86.0116271, 69.3397433, 8.3066238, 78.7908624, 0., 58.4807660],
            [78.9176786, 14.422205, 60.7536007, 70.7389567, 58.4807660, 0.]]
        expected_dm = DistanceMatrix(expected_data, self.ids2)
        for id1 in self.ids2:
            for id2 in self.ids2:
                npt.assert_almost_equal(actual_dm[id1, id2],
                                        expected_dm[id1, id2], 6)
예제 #5
0
    def test_pw_distances_from_table_euclidean(self):
        # results are equal when passed as Table or matrix
        m_dm = pw_distances(self.t1, self.ids1, 'euclidean')
        t_dm = npt.assert_warns(DeprecationWarning, pw_distances_from_table,
                                self.table1, 'euclidean')
        for id1 in self.ids1:
            for id2 in self.ids1:
                npt.assert_almost_equal(m_dm[id1, id2], t_dm[id1, id2])

        m_dm = pw_distances(self.t2, self.ids2, 'euclidean')
        t_dm = npt.assert_warns(DeprecationWarning, pw_distances_from_table,
                                self.table2, 'euclidean')
        for id1 in self.ids2:
            for id2 in self.ids2:
                npt.assert_almost_equal(m_dm[id1, id2], t_dm[id1, id2])
예제 #6
0
    def test_pw_distances_from_table_braycurtis(self):
        # results are equal when passed as Table or matrix
        m_dm = pw_distances(self.t1, self.ids1, 'braycurtis')
        t_dm = npt.assert_warns(
            UserWarning, pw_distances_from_table, self.table1, 'braycurtis')
        for id1 in self.ids1:
            for id2 in self.ids1:
                npt.assert_almost_equal(m_dm[id1, id2], t_dm[id1, id2])

        m_dm = pw_distances(self.t2, self.ids2, 'braycurtis')
        t_dm = npt.assert_warns(
            UserWarning, pw_distances_from_table, self.table2, 'braycurtis')
        for id1 in self.ids2:
            for id2 in self.ids2:
                npt.assert_almost_equal(m_dm[id1, id2], t_dm[id1, id2])
예제 #7
0
    def test_pw_distances_from_table_braycurtis(self):
        # results are equal when passed as Table or matrix
        m_dm = pw_distances(self.t1, self.ids1, 'braycurtis')
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            t_dm = pw_distances_from_table(self.table1, 'braycurtis')
        for id1 in self.ids1:
            for id2 in self.ids1:
                npt.assert_almost_equal(m_dm[id1, id2], t_dm[id1, id2])

        m_dm = pw_distances(self.t2, self.ids2, 'braycurtis')
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            t_dm = pw_distances_from_table(self.table2, 'braycurtis')
        for id1 in self.ids2:
            for id2 in self.ids2:
                npt.assert_almost_equal(m_dm[id1, id2], t_dm[id1, id2])
예제 #8
0
    def test_pw_distances_from_table_braycurtis(self):
        # results are equal when passed as Table or matrix
        m_dm = pw_distances(self.t1, self.ids1, 'braycurtis')
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            t_dm = pw_distances_from_table(self.table1, 'braycurtis')
        for id1 in self.ids1:
            for id2 in self.ids1:
                npt.assert_almost_equal(m_dm[id1, id2], t_dm[id1, id2])

        m_dm = pw_distances(self.t2, self.ids2, 'braycurtis')
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            t_dm = pw_distances_from_table(self.table2, 'braycurtis')
        for id1 in self.ids2:
            for id2 in self.ids2:
                npt.assert_almost_equal(m_dm[id1, id2], t_dm[id1, id2])
예제 #9
0
 def test_pw_distances_weighted_unifrac(self):
     # expected values calculated by hand
     dm1 = pw_distances('weighted_unifrac', self.t1, self.ids1,
                        otu_ids=self.otu_ids1, tree=self.tree1)
     dm2 = pw_distances(weighted_unifrac, self.t1, self.ids1,
                        otu_ids=self.otu_ids1, tree=self.tree1)
     self.assertEqual(dm1.shape, (3, 3))
     self.assertEqual(dm1, dm2)
     expected_data = [
         [0.0, 0.1750000, 0.12499999],
         [0.1750000, 0.0, 0.3000000],
         [0.12499999, 0.3000000, 0.0]]
     expected_dm = DistanceMatrix(expected_data, ids=self.ids1)
     for id1 in self.ids1:
         for id2 in self.ids1:
             npt.assert_almost_equal(dm1[id1, id2],
                                     expected_dm[id1, id2], 6)
예제 #10
0
    def test_pw_distances_from_table_euclidean(self):
        # results are equal when passed as Table or matrix
        m_dm = pw_distances(self.t1, self.ids1, 'euclidean')
        t_dm = npt.assert_warns(
            DeprecationWarning, pw_distances_from_table, self.table1,
            'euclidean')
        for id1 in self.ids1:
            for id2 in self.ids1:
                npt.assert_almost_equal(m_dm[id1, id2], t_dm[id1, id2])

        m_dm = pw_distances(self.t2, self.ids2, 'euclidean')
        t_dm = npt.assert_warns(
            DeprecationWarning, pw_distances_from_table, self.table2,
            'euclidean')
        for id1 in self.ids2:
            for id2 in self.ids2:
                npt.assert_almost_equal(m_dm[id1, id2], t_dm[id1, id2])
예제 #11
0
 def test_pw_distances_weighted_unifrac_normalized(self):
     # expected values calculated by hand
     dm1 = pw_distances('weighted_unifrac', self.t1, self.ids1,
                        otu_ids=self.otu_ids1, tree=self.tree1,
                        normalized=True)
     dm2 = pw_distances(weighted_unifrac, self.t1, self.ids1,
                        otu_ids=self.otu_ids1, tree=self.tree1,
                        normalized=True)
     self.assertEqual(dm1.shape, (3, 3))
     self.assertEqual(dm1, dm2)
     expected_data = [
         [0.0, 0.128834, 0.085714],
         [0.128834, 0.0, 0.2142857],
         [0.085714, 0.2142857, 0.0]]
     expected_dm = DistanceMatrix(expected_data, ids=self.ids1)
     for id1 in self.ids1:
         for id2 in self.ids1:
             npt.assert_almost_equal(dm1[id1, id2],
                                     expected_dm[id1, id2], 6)
예제 #12
0
def get_clusters(x_original, axis='row'):
    """Performs UPGMA clustering using euclidean distances"""
    x = x_original.copy()
    if axis == 'column':
        x = x.T
    nr = x.shape[0]
    row_dissims = pw_distances(x, ids=map(str, range(nr)), metric='euclidean')
    # do upgma - rows
    # Average in SciPy's cluster.hierarchy.linkage is UPGMA
    linkage_matrix = linkage(row_dissims.condensed_form(), method='average')
    tree = TreeNode.from_linkage_matrix(linkage_matrix, row_dissims.ids)
    return [int(tip.name) for tip in tree.tips()]
예제 #13
0
def get_clusters(x_original, axis='row'):
    """Performs UPGMA clustering using euclidean distances"""
    x = x_original.copy()
    if axis == 'column':
        x = x.T
    nr = x.shape[0]
    row_dissims = pw_distances(x, ids=map(str, range(nr)), metric='euclidean')
    # do upgma - rows
    # Average in SciPy's cluster.hierarchy.linkage is UPGMA
    linkage_matrix = linkage(row_dissims.condensed_form(), method='average')
    tree = TreeNode.from_linkage_matrix(linkage_matrix, row_dissims.ids)
    return [int(tip.name) for tip in tree.tips()]
예제 #14
0
 def test_pw_distances_unweighted_unifrac(self):
     # expected values calculated by hand
     dm1 = pw_distances('unweighted_unifrac',
                        self.t1,
                        self.ids1,
                        otu_ids=self.otu_ids1,
                        tree=self.tree1)
     dm2 = pw_distances(unweighted_unifrac,
                        self.t1,
                        self.ids1,
                        otu_ids=self.otu_ids1,
                        tree=self.tree1)
     self.assertEqual(dm1.shape, (3, 3))
     self.assertEqual(dm1, dm2)
     expected_data = [[0.0, 0.0, 0.25 / 1.0], [0.0, 0.0, 0.25 / 1.0],
                      [0.25 / 1.0, 0.25 / 1.0, 0.0]]
     expected_dm = DistanceMatrix(expected_data, ids=self.ids1)
     for id1 in self.ids1:
         for id2 in self.ids1:
             npt.assert_almost_equal(dm1[id1, id2], expected_dm[id1, id2],
                                     6)
예제 #15
0
 def test_pw_distances_weighted_unifrac_normalized(self):
     # expected values calculated by hand
     dm1 = pw_distances('weighted_unifrac',
                        self.t1,
                        self.ids1,
                        otu_ids=self.otu_ids1,
                        tree=self.tree1,
                        normalized=True)
     dm2 = pw_distances(weighted_unifrac,
                        self.t1,
                        self.ids1,
                        otu_ids=self.otu_ids1,
                        tree=self.tree1,
                        normalized=True)
     self.assertEqual(dm1.shape, (3, 3))
     self.assertEqual(dm1, dm2)
     expected_data = [[0.0, 0.128834, 0.085714], [0.128834, 0.0, 0.2142857],
                      [0.085714, 0.2142857, 0.0]]
     expected_dm = DistanceMatrix(expected_data, ids=self.ids1)
     for id1 in self.ids1:
         for id2 in self.ids1:
             npt.assert_almost_equal(dm1[id1, id2], expected_dm[id1, id2],
                                     6)
예제 #16
0
파일: __init__.py 프로젝트: jairideout/q2d2
def biom_to_dm(metric, biom):
    return pw_distances(metric=metric, counts=biom.T, ids=biom.columns)
예제 #17
0
#!/usr/bin/env python3
import numpy as np
import csv
from numpy import genfromtxt
from skbio.diversity.beta import pw_distances
taxonomy_tbl = np.genfromtxt("BAM_9ancient_hmp_protein.csv",
                             delimiter=',',
                             dtype=float,
                             skip_header=1,
                             usecols=range(1, 157))
taxonomy_trans = taxonomy_tbl.transpose()

with open(r'BAM_9ancient_hmp_protein.csv') as csvfile:
    csv_reader = csv.reader(csvfile)
    sample_ID = next(csv_reader)
sample_ID.pop(0)
print(sample_ID[1])

#jaccard_distance
j_dm = pw_distances(taxonomy_trans, sample_ID, "jaccard")
print(j_dm[0:3])
j_dm.write('jaccard__9ancient_hmp_protein.csv')
예제 #18
0
from skbio.diversity.beta import pw_distances
import numpy as np
data = [[23, 64, 14, 0, 0, 3, 1], [0, 3, 35, 42, 0, 12, 1],
        [0, 5, 5, 0, 40, 40, 0], [44, 35, 9, 0, 1, 0, 0],
        [0, 2, 8, 0, 35, 45, 1], [0, 0, 25, 35, 0, 19, 0]]
ids = list('ABCDEF')

# Compute Bray-Curtis distances between all pairs of samples and return a
# ``DistanceMatrix`` object:

bc_dm = pw_distances(data, ids, "braycurtis")
print(bc_dm)
# 6x6 distance matrix
# IDs:
# 'A', 'B', 'C', 'D', 'E', 'F'
# Data:
# [[ 0.          0.78787879  0.86666667  0.30927835  0.85714286  0.81521739]
# [ 0.78787879  0.          0.78142077  0.86813187  0.75        0.1627907 ]
# [ 0.86666667  0.78142077  0.          0.87709497  0.09392265  0.71597633]
# [ 0.30927835  0.86813187  0.87709497  0.          0.87777778  0.89285714]
# [ 0.85714286  0.75        0.09392265  0.87777778  0.          0.68235294]
# [ 0.81521739  0.1627907   0.71597633  0.89285714  0.68235294  0.        ]]

# Compute Jaccard distances between all pairs of samples and return a
# ``DistanceMatrix`` object:

j_dm = pw_distances(data, ids, "jaccard")
print(j_dm)
# 6x6 distance matrix
# IDs:
# 'A', 'B', 'C', 'D', 'E', 'F'