def test_weighted_unifrac_symmetry(self): for i in range(len(self.table1)): for j in range(len(self.table1)): actual = weighted_unifrac( self.table1[i], self.table1[j], self.oids1, self.t1) expected = weighted_unifrac( self.table1[j], self.table1[i], self.oids1, self.t1) self.assertAlmostEqual(actual, expected)
def test_weighted_unifrac_symmetry(self): for i in range(len(self.b1)): for j in range(len(self.b1)): actual = weighted_unifrac(self.b1[i], self.b1[j], self.oids1, self.t1) expected = weighted_unifrac(self.b1[j], self.b1[i], self.oids1, self.t1) self.assertAlmostEqual(actual, expected)
def test_weighted_extra_tips(self): # UniFrac values are the same despite unobserved tips in the tree for i in range(len(self.b1)): for j in range(len(self.b1)): actual = weighted_unifrac( self.b1[i], self.b1[j], self.oids1, self.t1_w_extra_tips) expected = weighted_unifrac( self.b1[i], self.b1[j], self.oids1, self.t1) self.assertAlmostEqual(actual, expected)
def test_weighted_extra_tips(self): # UniFrac values are the same despite unobserved tips in the tree for i in range(len(self.b1)): for j in range(len(self.b1)): actual = weighted_unifrac(self.b1[i], self.b1[j], self.oids1, self.t1_w_extra_tips) expected = weighted_unifrac(self.b1[i], self.b1[j], self.oids1, self.t1) self.assertAlmostEqual(actual, expected)
def test_weighted_unifrac_symmetry_normalized(self): for i in range(len(self.b1)): for j in range(len(self.b1)): actual = weighted_unifrac( self.b1[i], self.b1[j], self.oids1, self.t1, normalized=True) expected = weighted_unifrac( self.b1[j], self.b1[i], self.oids1, self.t1, normalized=True) self.assertAlmostEqual(actual, expected)
def test_weighted_unifrac_symmetry_normalized(self): for i in range(len(self.table1)): for j in range(len(self.table1)): actual = weighted_unifrac( self.table1[i], self.table1[j], self.oids1, self.t1, normalized=True) expected = weighted_unifrac( self.table1[j], self.table1[i], self.oids1, self.t1, normalized=True) self.assertAlmostEqual(actual, expected)
def test_weighted_unifrac_non_overlapping_normalized(self): # these communities only share the root node actual = weighted_unifrac( self.b1[4], self.b1[5], self.oids1, self.t1, normalized=True) expected = 1.0 self.assertAlmostEqual(actual, expected) actual = weighted_unifrac( [1, 1, 1, 0, 0], [0, 0, 0, 1, 1], self.oids1, self.t1, normalized=True) expected = 1.0 self.assertAlmostEqual(actual, expected)
def test_weighted_minimal_trees(self): # expected values computed by hand # zero tips tree = TreeNode.read(StringIO(u'root;')) actual = weighted_unifrac([], [], [], tree) expected = 0.0 self.assertEqual(actual, expected) # two tips tree = TreeNode.read(StringIO(u'(OTU1:0.25, OTU2:0.25)root;')) actual = weighted_unifrac([1, 0], [0, 0], ['OTU1', 'OTU2'], tree) expected = 0.25 self.assertEqual(actual, expected)
def test_weighted_unifrac_zero_counts(self): actual = weighted_unifrac([0, 0, 0, 0, 0], [0, 0, 0, 0, 0], self.oids1, self.t1) expected = 0.0 self.assertAlmostEqual(actual, expected) # calculated the following by hand, as QIIME 1.9.1 tells the user # that values involving empty vectors will be uninformative, and # returns 1.0 actual = weighted_unifrac([1, 1, 1, 0, 0], [0, 0, 0, 0, 0], self.oids1, self.t1) expected = 2.0 self.assertAlmostEqual(actual, expected) actual = weighted_unifrac([], [], [], self.t1) expected = 0.0 self.assertAlmostEqual(actual, expected)
def test_weighted_normalized_root_not_observed(self): # expected values computed by hand, these disagree with QIIME 1.9.1 # root node not observed, but branch between (OTU1, OTU2) and root # is considered shared actual = weighted_unifrac([1, 0, 0, 0], [1, 1, 0, 0], self.oids2, self.t2, normalized=True) expected = 0.1764705882 self.assertAlmostEqual(actual, expected) # root node not observed, but branch between (OTU3, OTU4) and root # is considered shared actual = weighted_unifrac([0, 0, 1, 1], [0, 0, 1, 0], self.oids2, self.t2, normalized=True) expected = 0.1818181818 self.assertAlmostEqual(actual, expected)
def test_weighted_root_not_observed(self): # expected values computed by hand, these disagree with QIIME 1.9.1 # root node not observed, but branch between (OTU1, OTU2) and root # is considered shared actual = weighted_unifrac([1, 0, 0, 0], [1, 1, 0, 0], self.oids2, self.t2) expected = 0.15 self.assertAlmostEqual(actual, expected) # root node not observed, but branch between (OTU3, OTU4) and root # is considered shared actual = weighted_unifrac([0, 0, 1, 1], [0, 0, 1, 0], self.oids2, self.t2) expected = 0.6 self.assertAlmostEqual(actual, expected)
def test_weighted_otus_out_of_order(self): # UniFrac API does not assert the observations are in tip order of the # input tree shuffled_ids = self.oids1[:] shuffled_b1 = self.b1.copy() shuffled_ids[0], shuffled_ids[-1] = shuffled_ids[-1], shuffled_ids[0] shuffled_b1[:, [0, -1]] = shuffled_b1[:, [-1, 0]] for i in range(len(self.b1)): for j in range(len(self.b1)): actual = weighted_unifrac(self.b1[i], self.b1[j], self.oids1, self.t1) expected = weighted_unifrac(shuffled_b1[i], shuffled_b1[j], shuffled_ids, self.t1) self.assertAlmostEqual(actual, expected)
def test_weighted_unifrac_identity_normalized(self): for i in range(len(self.table1)): actual = weighted_unifrac( self.table1[i], self.table1[i], self.oids1, self.t1, normalized=True) expected = 0.0 self.assertAlmostEqual(actual, expected)
def test_weighted_unifrac_zero_counts(self): actual = weighted_unifrac( [0, 0, 0, 0, 0], [0, 0, 0, 0, 0], self.oids1, self.t1) expected = 0.0 self.assertAlmostEqual(actual, expected) # calculated the following by hand, as QIIME 1.9.1 tells the user # that values involving empty vectors will be uninformative, and # returns 1.0 actual = weighted_unifrac( [1, 1, 1, 0, 0], [0, 0, 0, 0, 0], self.oids1, self.t1) expected = 2.0 self.assertAlmostEqual(actual, expected) actual = weighted_unifrac( [], [], [], self.t1) expected = 0.0 self.assertAlmostEqual(actual, expected)
def test_weighted_otus_out_of_order(self): # UniFrac API does not assert the observations are in tip order of the # input tree shuffled_ids = self.oids1[:] shuffled_b1 = self.b1.copy() shuffled_ids[0], shuffled_ids[-1] = shuffled_ids[-1], shuffled_ids[0] shuffled_b1[:, [0, -1]] = shuffled_b1[:, [-1, 0]] for i in range(len(self.b1)): for j in range(len(self.b1)): actual = weighted_unifrac( self.b1[i], self.b1[j], self.oids1, self.t1) expected = weighted_unifrac( shuffled_b1[i], shuffled_b1[j], shuffled_ids, self.t1) self.assertAlmostEqual(actual, expected)
def compute_beta_unifraq(df, m): from skbio.diversity.beta import unweighted_unifrac, weighted_unifrac # get the phylogenetic tree and drop OTUs that are not NCBI annotated otu_ids = list(df.index.values) tree, notfound = load_taxonomy_tree(otu_ids) df = df.drop(list(notfound)) # The numpy matrix of counts, in which the rows are sample counts mt = df.values.T.astype(np.dtype('int64')) # Sample list sl = list(df.columns.values) # OTU list otu_ids = list(df.index.values) # The beta diversity matrix nsamples = len(sl) bm = np.zeros((nsamples, nsamples)) # Compute the pairwise unifraq for i in range(nsamples): for j in range(i): u_counts = mt[i] v_counts = mt[j] if m == "unifraq": uu = unweighted_unifrac(u_counts, v_counts, otu_ids, tree) if m == "wunifraq": uu = weighted_unifrac(u_counts, v_counts, otu_ids, tree) bm[i, j] = uu bm[j, i] = uu return bm
def test_weighted_unifrac_non_overlapping(self): # expected results derived from QIIME 1.9.1, which # is a completely different implementation skbio's initial # weighted unifrac implementation # these communities only share the root node actual = weighted_unifrac(self.b1[4], self.b1[5], self.oids1, self.t1) expected = 4.0 self.assertAlmostEqual(actual, expected)
def test_weighted_unifrac_zero_counts_normalized(self): # expected results derived from QIIME 1.9.1, which # is a completely different implementation skbio's initial # weighted unifrac implementation actual = weighted_unifrac( [0, 0, 0, 0, 0], [0, 0, 0, 0, 0], self.oids1, self.t1, normalized=True) expected = 0.0 self.assertAlmostEqual(actual, expected) actual = weighted_unifrac( [1, 1, 1, 0, 0], [0, 0, 0, 0, 0], self.oids1, self.t1, normalized=True) expected = 1.0 self.assertAlmostEqual(actual, expected) actual = weighted_unifrac( [], [], [], self.t1, normalized=True) expected = 0.0 self.assertAlmostEqual(actual, expected)
def test_weighted_unifrac_non_overlapping(self): # expected results derived from QIIME 1.9.1, which # is a completely different implementation skbio's initial # weighted unifrac implementation # these communities only share the root node actual = weighted_unifrac( self.table1[4], self.table1[5], self.oids1, self.t1) expected = 4.0 self.assertAlmostEqual(actual, expected)
def __call__(self, a, b): """ :param a: Sample :param b: Sample :return: float """ s1 = self.otu_table.data(a.name)[self.id_mask] s2 = self.otu_table.data(b.name)[self.id_mask] return weighted_unifrac(s1, s2, self.masked_ids, self.tree_index, normalized=False)
def test_weighted_unifrac_identity_normalized(self): for i in range(len(self.b1)): actual = weighted_unifrac(self.b1[i], self.b1[i], self.oids1, self.t1, normalized=True) expected = 0.0 self.assertAlmostEqual(actual, expected)
def test_weighted_unifrac_qiime_tiny_test(self): dm_fp = get_data_path( os.path.join('qiime-191-tt', 'weighted_unifrac_dm.txt'), 'data') expected = DistanceMatrix.read(dm_fp) for sid1 in self.q_table.columns: for sid2 in self.q_table.columns: actual = weighted_unifrac( self.q_table[sid1], self.q_table[sid2], otu_ids=self.q_table.index, tree=self.q_tree) self.assertAlmostEqual(actual, expected[sid1, sid2], msg="%s, %s" % (sid1, sid2))
def get_unifrac( otu_file_1: pathlib.Path, otu_file_2: pathlib.Path, tree_file: pathlib.Path, weighted: bool, threshold: int, ): otu_1 = load_table(str(otu_file_1)).to_dataframe(dense=True) otu_2 = load_table(str(otu_file_2)).to_dataframe(dense=True) tree = TreeNode.read(str(tree_file)) unifrac_data = dict() for u, v, otu_ids, col in get_vectors(otu_1, otu_2, threshold): if weighted: unifrac_value = weighted_unifrac( u, v, otu_ids, tree, normalized=True, validate=True ) else: unifrac_value = unweighted_unifrac(u, v, otu_ids, tree, validate=True) unifrac_data[col] = unifrac_value return pd.Series(unifrac_data), otu_1.shape[0], otu_2.shape[0]
def test_weighted_unifrac(self): # expected results derived from QIIME 1.9.1, which # is a completely different implementation skbio's initial # weighted unifrac implementation actual = weighted_unifrac( self.table1[0], self.table1[1], self.oids1, self.t1) expected = 2.4 self.assertAlmostEqual(actual, expected) actual = weighted_unifrac( self.table1[0], self.table1[2], self.oids1, self.t1) expected = 1.86666666667 self.assertAlmostEqual(actual, expected) actual = weighted_unifrac( self.table1[0], self.table1[3], self.oids1, self.t1) expected = 2.53333333333 self.assertAlmostEqual(actual, expected) actual = weighted_unifrac( self.table1[0], self.table1[4], self.oids1, self.t1) expected = 1.35384615385 self.assertAlmostEqual(actual, expected) actual = weighted_unifrac( self.table1[0], self.table1[5], self.oids1, self.t1) expected = 3.2 self.assertAlmostEqual(actual, expected) # sample B versus remaining actual = weighted_unifrac( self.table1[1], self.table1[2], self.oids1, self.t1) expected = 2.26666666667 self.assertAlmostEqual(actual, expected) actual = weighted_unifrac( self.table1[1], self.table1[3], self.oids1, self.t1) expected = 0.933333333333 self.assertAlmostEqual(actual, expected) actual = weighted_unifrac( self.table1[1], self.table1[4], self.oids1, self.t1) expected = 3.2 self.assertAlmostEqual(actual, expected) actual = weighted_unifrac( self.table1[1], self.table1[5], self.oids1, self.t1) expected = 0.8375 self.assertAlmostEqual(actual, expected) # sample C versus remaining actual = weighted_unifrac( self.table1[2], self.table1[3], self.oids1, self.t1) expected = 1.33333333333 self.assertAlmostEqual(actual, expected) actual = weighted_unifrac( self.table1[2], self.table1[4], self.oids1, self.t1) expected = 1.89743589744 self.assertAlmostEqual(actual, expected) actual = weighted_unifrac( self.table1[2], self.table1[5], self.oids1, self.t1) expected = 2.66666666667 self.assertAlmostEqual(actual, expected) # sample D versus remaining actual = weighted_unifrac( self.table1[3], self.table1[4], self.oids1, self.t1) expected = 2.66666666667 self.assertAlmostEqual(actual, expected) actual = weighted_unifrac( self.table1[3], self.table1[5], self.oids1, self.t1) expected = 1.33333333333 self.assertAlmostEqual(actual, expected) # sample E versus remaining actual = weighted_unifrac( self.table1[4], self.table1[5], self.oids1, self.t1) expected = 4.0 self.assertAlmostEqual(actual, expected)
def test_weighted_unifrac_identity(self): for i in range(len(self.b1)): actual = weighted_unifrac( self.b1[i], self.b1[i], self.oids1, self.t1) expected = 0.0 self.assertAlmostEqual(actual, expected)
def test_weighted_unifrac_normalized(self): # expected results derived from QIIME 1.9.1, which # is a completely different implementation skbio's initial # weighted unifrac implementation actual = weighted_unifrac(self.b1[0], self.b1[1], self.oids1, self.t1, normalized=True) expected = 0.6 self.assertAlmostEqual(actual, expected) actual = weighted_unifrac(self.b1[0], self.b1[2], self.oids1, self.t1, normalized=True) expected = 0.466666666667 self.assertAlmostEqual(actual, expected) actual = weighted_unifrac(self.b1[0], self.b1[3], self.oids1, self.t1, normalized=True) expected = 0.633333333333 self.assertAlmostEqual(actual, expected) actual = weighted_unifrac(self.b1[0], self.b1[4], self.oids1, self.t1, normalized=True) expected = 0.338461538462 self.assertAlmostEqual(actual, expected) actual = weighted_unifrac(self.b1[0], self.b1[5], self.oids1, self.t1, normalized=True) expected = 0.8 self.assertAlmostEqual(actual, expected) # sample B versus remaining actual = weighted_unifrac(self.b1[1], self.b1[2], self.oids1, self.t1, normalized=True) expected = 0.566666666667 self.assertAlmostEqual(actual, expected) actual = weighted_unifrac(self.b1[1], self.b1[3], self.oids1, self.t1, normalized=True) expected = 0.233333333333 self.assertAlmostEqual(actual, expected) actual = weighted_unifrac(self.b1[1], self.b1[4], self.oids1, self.t1, normalized=True) expected = 0.8 self.assertAlmostEqual(actual, expected) actual = weighted_unifrac(self.b1[1], self.b1[5], self.oids1, self.t1, normalized=True) expected = 0.209375 self.assertAlmostEqual(actual, expected) # sample C versus remaining actual = weighted_unifrac(self.b1[2], self.b1[3], self.oids1, self.t1, normalized=True) expected = 0.333333333333 self.assertAlmostEqual(actual, expected) actual = weighted_unifrac(self.b1[2], self.b1[4], self.oids1, self.t1, normalized=True) expected = 0.474358974359 self.assertAlmostEqual(actual, expected) actual = weighted_unifrac(self.b1[2], self.b1[5], self.oids1, self.t1, normalized=True) expected = 0.666666666667 self.assertAlmostEqual(actual, expected) # sample D versus remaining actual = weighted_unifrac(self.b1[3], self.b1[4], self.oids1, self.t1, normalized=True) expected = 0.666666666667 self.assertAlmostEqual(actual, expected) actual = weighted_unifrac(self.b1[3], self.b1[5], self.oids1, self.t1, normalized=True) expected = 0.333333333333 self.assertAlmostEqual(actual, expected) # sample E versus remaining actual = weighted_unifrac(self.b1[4], self.b1[5], self.oids1, self.t1, normalized=True) expected = 1.0 self.assertAlmostEqual(actual, expected)
def test_weighted_unifrac(self): # expected results derived from QIIME 1.9.1, which # is a completely different implementation skbio's initial # weighted unifrac implementation actual = weighted_unifrac(self.b1[0], self.b1[1], self.oids1, self.t1) expected = 2.4 self.assertAlmostEqual(actual, expected) actual = weighted_unifrac(self.b1[0], self.b1[2], self.oids1, self.t1) expected = 1.86666666667 self.assertAlmostEqual(actual, expected) actual = weighted_unifrac(self.b1[0], self.b1[3], self.oids1, self.t1) expected = 2.53333333333 self.assertAlmostEqual(actual, expected) actual = weighted_unifrac(self.b1[0], self.b1[4], self.oids1, self.t1) expected = 1.35384615385 self.assertAlmostEqual(actual, expected) actual = weighted_unifrac(self.b1[0], self.b1[5], self.oids1, self.t1) expected = 3.2 self.assertAlmostEqual(actual, expected) # sample B versus remaining actual = weighted_unifrac(self.b1[1], self.b1[2], self.oids1, self.t1) expected = 2.26666666667 self.assertAlmostEqual(actual, expected) actual = weighted_unifrac(self.b1[1], self.b1[3], self.oids1, self.t1) expected = 0.933333333333 self.assertAlmostEqual(actual, expected) actual = weighted_unifrac(self.b1[1], self.b1[4], self.oids1, self.t1) expected = 3.2 self.assertAlmostEqual(actual, expected) actual = weighted_unifrac(self.b1[1], self.b1[5], self.oids1, self.t1) expected = 0.8375 self.assertAlmostEqual(actual, expected) # sample C versus remaining actual = weighted_unifrac(self.b1[2], self.b1[3], self.oids1, self.t1) expected = 1.33333333333 self.assertAlmostEqual(actual, expected) actual = weighted_unifrac(self.b1[2], self.b1[4], self.oids1, self.t1) expected = 1.89743589744 self.assertAlmostEqual(actual, expected) actual = weighted_unifrac(self.b1[2], self.b1[5], self.oids1, self.t1) expected = 2.66666666667 self.assertAlmostEqual(actual, expected) # sample D versus remaining actual = weighted_unifrac(self.b1[3], self.b1[4], self.oids1, self.t1) expected = 2.66666666667 self.assertAlmostEqual(actual, expected) actual = weighted_unifrac(self.b1[3], self.b1[5], self.oids1, self.t1) expected = 1.33333333333 self.assertAlmostEqual(actual, expected) # sample E versus remaining actual = weighted_unifrac(self.b1[4], self.b1[5], self.oids1, self.t1) expected = 4.0 self.assertAlmostEqual(actual, expected)
def test_weighted_unifrac_kwargs(self): # confirm that **kwargs can be passed actual = weighted_unifrac(self.table1[0], self.table1[0], self.oids1, self.t1, not_a_known_parameter=42) self.assertAlmostEqual(actual, 0.0)
def test_weighted_unifrac_identity(self): for i in range(len(self.table1)): actual = weighted_unifrac( self.table1[i], self.table1[i], self.oids1, self.t1) expected = 0.0 self.assertAlmostEqual(actual, expected)
def test_weighted_minimal_trees(self): # two tips tree = TreeNode.read(StringIO('(OTU1:0.25, OTU2:0.25)root;')) actual = weighted_unifrac([1, 0], [0, 0], ['OTU1', 'OTU2'], tree) expected = 0.25 self.assertEqual(actual, expected)
def test_weighted_unifrac_normalized(self): # expected results derived from QIIME 1.9.1, which # is a completely different implementation skbio's initial # weighted unifrac implementation actual = weighted_unifrac( self.table1[0], self.table1[1], self.oids1, self.t1, normalized=True) expected = 0.6 self.assertAlmostEqual(actual, expected) actual = weighted_unifrac( self.table1[0], self.table1[2], self.oids1, self.t1, normalized=True) expected = 0.466666666667 self.assertAlmostEqual(actual, expected) actual = weighted_unifrac( self.table1[0], self.table1[3], self.oids1, self.t1, normalized=True) expected = 0.633333333333 self.assertAlmostEqual(actual, expected) actual = weighted_unifrac( self.table1[0], self.table1[4], self.oids1, self.t1, normalized=True) expected = 0.338461538462 self.assertAlmostEqual(actual, expected) actual = weighted_unifrac( self.table1[0], self.table1[5], self.oids1, self.t1, normalized=True) expected = 0.8 self.assertAlmostEqual(actual, expected) # sample B versus remaining actual = weighted_unifrac( self.table1[1], self.table1[2], self.oids1, self.t1, normalized=True) expected = 0.566666666667 self.assertAlmostEqual(actual, expected) actual = weighted_unifrac( self.table1[1], self.table1[3], self.oids1, self.t1, normalized=True) expected = 0.233333333333 self.assertAlmostEqual(actual, expected) actual = weighted_unifrac( self.table1[1], self.table1[4], self.oids1, self.t1, normalized=True) expected = 0.8 self.assertAlmostEqual(actual, expected) actual = weighted_unifrac( self.table1[1], self.table1[5], self.oids1, self.t1, normalized=True) expected = 0.209375 self.assertAlmostEqual(actual, expected) # sample C versus remaining actual = weighted_unifrac( self.table1[2], self.table1[3], self.oids1, self.t1, normalized=True) expected = 0.333333333333 self.assertAlmostEqual(actual, expected) actual = weighted_unifrac( self.table1[2], self.table1[4], self.oids1, self.t1, normalized=True) expected = 0.474358974359 self.assertAlmostEqual(actual, expected) actual = weighted_unifrac( self.table1[2], self.table1[5], self.oids1, self.t1, normalized=True) expected = 0.666666666667 self.assertAlmostEqual(actual, expected) # sample D versus remaining actual = weighted_unifrac( self.table1[3], self.table1[4], self.oids1, self.t1, normalized=True) expected = 0.666666666667 self.assertAlmostEqual(actual, expected) actual = weighted_unifrac( self.table1[3], self.table1[5], self.oids1, self.t1, normalized=True) expected = 0.333333333333 self.assertAlmostEqual(actual, expected) # sample E versus remaining actual = weighted_unifrac( self.table1[4], self.table1[5], self.oids1, self.t1, normalized=True) expected = 1.0 self.assertAlmostEqual(actual, expected)