def test_pw_distances_braycurtis(self): actual_dm = pw_distances(self.t1, self.ids1, 'braycurtis') self.assertEqual(actual_dm.shape, (3, 3)) npt.assert_almost_equal(actual_dm['A', 'A'], 0.0) npt.assert_almost_equal(actual_dm['B', 'B'], 0.0) npt.assert_almost_equal(actual_dm['C', 'C'], 0.0) npt.assert_almost_equal(actual_dm['A', 'B'], 0.27272727) npt.assert_almost_equal(actual_dm['B', 'A'], 0.27272727) npt.assert_almost_equal(actual_dm['A', 'C'], 0.71428571) npt.assert_almost_equal(actual_dm['C', 'A'], 0.71428571) npt.assert_almost_equal(actual_dm['B', 'C'], 0.66666667) npt.assert_almost_equal(actual_dm['C', 'B'], 0.66666667) actual_dm = pw_distances(self.t2, self.ids2, 'braycurtis') expected_data = [ [0., 0.78787879, 0.86666667, 0.30927835, 0.85714286, 0.81521739], [0.78787879, 0., 0.78142077, 0.86813187, 0.75, 0.1627907], [0.86666667, 0.78142077, 0., 0.87709497, 0.09392265, 0.71597633], [0.30927835, 0.86813187, 0.87709497, 0., 0.87777778, 0.89285714], [0.85714286, 0.75, 0.09392265, 0.87777778, 0., 0.68235294], [0.81521739, 0.1627907, 0.71597633, 0.89285714, 0.68235294, 0.]] expected_dm = DistanceMatrix(expected_data, self.ids2) for id1 in self.ids2: for id2 in self.ids2: npt.assert_almost_equal(actual_dm[id1, id2], expected_dm[id1, id2], 6)
def test_pw_distances_euclidean(self): actual_dm = pw_distances(self.t1, self.ids1, 'euclidean') self.assertEqual(actual_dm.shape, (3, 3)) npt.assert_almost_equal(actual_dm['A', 'A'], 0.0) npt.assert_almost_equal(actual_dm['B', 'B'], 0.0) npt.assert_almost_equal(actual_dm['C', 'C'], 0.0) npt.assert_almost_equal(actual_dm['A', 'B'], 2.23606798) npt.assert_almost_equal(actual_dm['B', 'A'], 2.23606798) npt.assert_almost_equal(actual_dm['A', 'C'], 4.12310563) npt.assert_almost_equal(actual_dm['C', 'A'], 4.12310563) npt.assert_almost_equal(actual_dm['B', 'C'], 2.82842712) npt.assert_almost_equal(actual_dm['C', 'B'], 2.82842712) actual_dm = pw_distances(self.t2, self.ids2, 'euclidean') expected_data = [ [0., 80.8455317, 84.0297566, 36.3042697, 86.0116271, 78.9176786], [80.8455317, 0., 71.0844568, 74.4714710, 69.3397433, 14.422205], [84.0297566, 71.0844568, 0., 77.2851861, 8.3066238, 60.7536007], [36.3042697, 74.4714710, 77.2851861, 0., 78.7908624, 70.7389567], [86.0116271, 69.3397433, 8.3066238, 78.7908624, 0., 58.4807660], [78.9176786, 14.422205, 60.7536007, 70.7389567, 58.4807660, 0.]] expected_dm = DistanceMatrix(expected_data, self.ids2) for id1 in self.ids2: for id2 in self.ids2: npt.assert_almost_equal(actual_dm[id1, id2], expected_dm[id1, id2], 6)
def test_pw_distances_from_table_euclidean(self): # results are equal when passed as Table or matrix m_dm = pw_distances(self.t1, self.ids1, 'euclidean') t_dm = npt.assert_warns(DeprecationWarning, pw_distances_from_table, self.table1, 'euclidean') for id1 in self.ids1: for id2 in self.ids1: npt.assert_almost_equal(m_dm[id1, id2], t_dm[id1, id2]) m_dm = pw_distances(self.t2, self.ids2, 'euclidean') t_dm = npt.assert_warns(DeprecationWarning, pw_distances_from_table, self.table2, 'euclidean') for id1 in self.ids2: for id2 in self.ids2: npt.assert_almost_equal(m_dm[id1, id2], t_dm[id1, id2])
def test_pw_distances_from_table_braycurtis(self): # results are equal when passed as Table or matrix m_dm = pw_distances(self.t1, self.ids1, 'braycurtis') t_dm = npt.assert_warns( UserWarning, pw_distances_from_table, self.table1, 'braycurtis') for id1 in self.ids1: for id2 in self.ids1: npt.assert_almost_equal(m_dm[id1, id2], t_dm[id1, id2]) m_dm = pw_distances(self.t2, self.ids2, 'braycurtis') t_dm = npt.assert_warns( UserWarning, pw_distances_from_table, self.table2, 'braycurtis') for id1 in self.ids2: for id2 in self.ids2: npt.assert_almost_equal(m_dm[id1, id2], t_dm[id1, id2])
def test_pw_distances_from_table_braycurtis(self): # results are equal when passed as Table or matrix m_dm = pw_distances(self.t1, self.ids1, 'braycurtis') with warnings.catch_warnings(): warnings.simplefilter("ignore") t_dm = pw_distances_from_table(self.table1, 'braycurtis') for id1 in self.ids1: for id2 in self.ids1: npt.assert_almost_equal(m_dm[id1, id2], t_dm[id1, id2]) m_dm = pw_distances(self.t2, self.ids2, 'braycurtis') with warnings.catch_warnings(): warnings.simplefilter("ignore") t_dm = pw_distances_from_table(self.table2, 'braycurtis') for id1 in self.ids2: for id2 in self.ids2: npt.assert_almost_equal(m_dm[id1, id2], t_dm[id1, id2])
def test_pw_distances_weighted_unifrac(self): # expected values calculated by hand dm1 = pw_distances('weighted_unifrac', self.t1, self.ids1, otu_ids=self.otu_ids1, tree=self.tree1) dm2 = pw_distances(weighted_unifrac, self.t1, self.ids1, otu_ids=self.otu_ids1, tree=self.tree1) self.assertEqual(dm1.shape, (3, 3)) self.assertEqual(dm1, dm2) expected_data = [ [0.0, 0.1750000, 0.12499999], [0.1750000, 0.0, 0.3000000], [0.12499999, 0.3000000, 0.0]] expected_dm = DistanceMatrix(expected_data, ids=self.ids1) for id1 in self.ids1: for id2 in self.ids1: npt.assert_almost_equal(dm1[id1, id2], expected_dm[id1, id2], 6)
def test_pw_distances_from_table_euclidean(self): # results are equal when passed as Table or matrix m_dm = pw_distances(self.t1, self.ids1, 'euclidean') t_dm = npt.assert_warns( DeprecationWarning, pw_distances_from_table, self.table1, 'euclidean') for id1 in self.ids1: for id2 in self.ids1: npt.assert_almost_equal(m_dm[id1, id2], t_dm[id1, id2]) m_dm = pw_distances(self.t2, self.ids2, 'euclidean') t_dm = npt.assert_warns( DeprecationWarning, pw_distances_from_table, self.table2, 'euclidean') for id1 in self.ids2: for id2 in self.ids2: npt.assert_almost_equal(m_dm[id1, id2], t_dm[id1, id2])
def test_pw_distances_weighted_unifrac_normalized(self): # expected values calculated by hand dm1 = pw_distances('weighted_unifrac', self.t1, self.ids1, otu_ids=self.otu_ids1, tree=self.tree1, normalized=True) dm2 = pw_distances(weighted_unifrac, self.t1, self.ids1, otu_ids=self.otu_ids1, tree=self.tree1, normalized=True) self.assertEqual(dm1.shape, (3, 3)) self.assertEqual(dm1, dm2) expected_data = [ [0.0, 0.128834, 0.085714], [0.128834, 0.0, 0.2142857], [0.085714, 0.2142857, 0.0]] expected_dm = DistanceMatrix(expected_data, ids=self.ids1) for id1 in self.ids1: for id2 in self.ids1: npt.assert_almost_equal(dm1[id1, id2], expected_dm[id1, id2], 6)
def get_clusters(x_original, axis='row'): """Performs UPGMA clustering using euclidean distances""" x = x_original.copy() if axis == 'column': x = x.T nr = x.shape[0] row_dissims = pw_distances(x, ids=map(str, range(nr)), metric='euclidean') # do upgma - rows # Average in SciPy's cluster.hierarchy.linkage is UPGMA linkage_matrix = linkage(row_dissims.condensed_form(), method='average') tree = TreeNode.from_linkage_matrix(linkage_matrix, row_dissims.ids) return [int(tip.name) for tip in tree.tips()]
def test_pw_distances_unweighted_unifrac(self): # expected values calculated by hand dm1 = pw_distances('unweighted_unifrac', self.t1, self.ids1, otu_ids=self.otu_ids1, tree=self.tree1) dm2 = pw_distances(unweighted_unifrac, self.t1, self.ids1, otu_ids=self.otu_ids1, tree=self.tree1) self.assertEqual(dm1.shape, (3, 3)) self.assertEqual(dm1, dm2) expected_data = [[0.0, 0.0, 0.25 / 1.0], [0.0, 0.0, 0.25 / 1.0], [0.25 / 1.0, 0.25 / 1.0, 0.0]] expected_dm = DistanceMatrix(expected_data, ids=self.ids1) for id1 in self.ids1: for id2 in self.ids1: npt.assert_almost_equal(dm1[id1, id2], expected_dm[id1, id2], 6)
def test_pw_distances_weighted_unifrac_normalized(self): # expected values calculated by hand dm1 = pw_distances('weighted_unifrac', self.t1, self.ids1, otu_ids=self.otu_ids1, tree=self.tree1, normalized=True) dm2 = pw_distances(weighted_unifrac, self.t1, self.ids1, otu_ids=self.otu_ids1, tree=self.tree1, normalized=True) self.assertEqual(dm1.shape, (3, 3)) self.assertEqual(dm1, dm2) expected_data = [[0.0, 0.128834, 0.085714], [0.128834, 0.0, 0.2142857], [0.085714, 0.2142857, 0.0]] expected_dm = DistanceMatrix(expected_data, ids=self.ids1) for id1 in self.ids1: for id2 in self.ids1: npt.assert_almost_equal(dm1[id1, id2], expected_dm[id1, id2], 6)
def biom_to_dm(metric, biom): return pw_distances(metric=metric, counts=biom.T, ids=biom.columns)
#!/usr/bin/env python3 import numpy as np import csv from numpy import genfromtxt from skbio.diversity.beta import pw_distances taxonomy_tbl = np.genfromtxt("BAM_9ancient_hmp_protein.csv", delimiter=',', dtype=float, skip_header=1, usecols=range(1, 157)) taxonomy_trans = taxonomy_tbl.transpose() with open(r'BAM_9ancient_hmp_protein.csv') as csvfile: csv_reader = csv.reader(csvfile) sample_ID = next(csv_reader) sample_ID.pop(0) print(sample_ID[1]) #jaccard_distance j_dm = pw_distances(taxonomy_trans, sample_ID, "jaccard") print(j_dm[0:3]) j_dm.write('jaccard__9ancient_hmp_protein.csv')
from skbio.diversity.beta import pw_distances import numpy as np data = [[23, 64, 14, 0, 0, 3, 1], [0, 3, 35, 42, 0, 12, 1], [0, 5, 5, 0, 40, 40, 0], [44, 35, 9, 0, 1, 0, 0], [0, 2, 8, 0, 35, 45, 1], [0, 0, 25, 35, 0, 19, 0]] ids = list('ABCDEF') # Compute Bray-Curtis distances between all pairs of samples and return a # ``DistanceMatrix`` object: bc_dm = pw_distances(data, ids, "braycurtis") print(bc_dm) # 6x6 distance matrix # IDs: # 'A', 'B', 'C', 'D', 'E', 'F' # Data: # [[ 0. 0.78787879 0.86666667 0.30927835 0.85714286 0.81521739] # [ 0.78787879 0. 0.78142077 0.86813187 0.75 0.1627907 ] # [ 0.86666667 0.78142077 0. 0.87709497 0.09392265 0.71597633] # [ 0.30927835 0.86813187 0.87709497 0. 0.87777778 0.89285714] # [ 0.85714286 0.75 0.09392265 0.87777778 0. 0.68235294] # [ 0.81521739 0.1627907 0.71597633 0.89285714 0.68235294 0. ]] # Compute Jaccard distances between all pairs of samples and return a # ``DistanceMatrix`` object: j_dm = pw_distances(data, ids, "jaccard") print(j_dm) # 6x6 distance matrix # IDs: # 'A', 'B', 'C', 'D', 'E', 'F'