def test_compare_tip_distances(self): t = TreeNode.from_newick('((H:1,G:1):2,(R:0.5,M:0.7):3);') t2 = TreeNode.from_newick('(((H:1,G:1,O:1):2,R:3):1,X:4);') obs = t.compare_tip_distances(t2) # note: common taxa are H, G, R (only) m1 = np.array([[0, 2, 6.5], [2, 0, 6.5], [6.5, 6.5, 0]]) m2 = np.array([[0, 2, 6], [2, 0, 6], [6, 6, 0]]) r = correlation_t(m1.flat, m2.flat)[0] self.assertEqual(obs, (1 - r) / 2)
def test_correlation_test_no_permutations(self): """Test correlation_t with no permutations.""" # These results were verified with R. exp = (-0.2581988897471611, 0.7418011102528389, [], None, (-0.97687328610475876, 0.93488023560400879)) obs = correlation_t([1, 2, 3, 4], [1, 2, 1, 1], permutations=0) np.testing.assert_allclose(obs[0], exp[0]) np.testing.assert_allclose(obs[1], exp[1]) np.testing.assert_allclose(obs[2], exp[2]) self.assertEqual(obs[3], exp[3]) np.testing.assert_allclose(obs[4], exp[4])
def test_correlation_test_small_obs(self): """Test correlation_t with a small number of observations.""" # These results were verified with R. obs = correlation_t([1, 2, 3], [1, 2, 3]) np.testing.assert_allclose(obs[:2], (1.0, 0)) self.assertEqual(len(obs[2]), 999) for r in obs[2]: self.assertTrue(r >= -1.0 and r <= 1.0) self.assertCorrectPValue(0.3, 0.4, correlation_t, ([1, 2, 3], [1, 2, 3]), p_val_idx=3) self.assertEqual(obs[4], (None, None)) obs = correlation_t([1, 2, 3], [1, 2, 3], method='spearman') np.testing.assert_allclose(obs[:2], (1.0, 0)) self.assertEqual(len(obs[2]), 999) for r in obs[2]: self.assertTrue(r >= -1.0 and r <= 1.0) self.assertCorrectPValue(0.3, 0.4, correlation_t, ([1, 2, 3], [1, 2, 3]), {'method': 'spearman'}, p_val_idx=3) self.assertEqual(obs[4], (None, None))
def test_correlation_test_perfect_correlation(self): """Test correlation_t with perfectly-correlated input vectors.""" # These results were verified with R. obs = correlation_t([1, 2, 3, 4], [1, 2, 3, 4]) np.testing.assert_allclose(obs[:2], (0.99999999999999978, 2.2204460492503131e-16)) self.assertEqual(len(obs[2]), 999) for r in obs[2]: self.assertTrue(r >= -1.0 and r <= 1.0) self.assertCorrectPValue(0.06, 0.09, correlation_t, ([1, 2, 3, 4], [1, 2, 3, 4]), p_val_idx=3) np.testing.assert_allclose(obs[4], (0.99999999999998879, 1.0))
def test_correlation_test_spearman(self): """Test correlation_t using spearman on valid input.""" # This example taken from Wikipedia page: # http://en.wikipedia.org/wiki/Spearman's_rank_correlation_coefficient obs = correlation_t(self.data1, self.data2, method='spearman', tails='high') np.testing.assert_allclose(obs[:2], (-0.17575757575757578, 0.686405827612)) self.assertEqual(len(obs[2]), 999) for rho in obs[2]: self.assertTrue(rho >= -1.0 and rho <= 1.0) self.assertCorrectPValue(0.67, 0.7, correlation_t, (self.data1, self.data2), {'method': 'spearman', 'tails': 'high'}, p_val_idx=3) np.testing.assert_allclose(obs[4], (-0.7251388558041697, 0.51034422964834503)) # The p-value is off because the example uses a one-tailed test, while # we use a two-tailed test. Someone confirms the answer that we get # here for a two-tailed test: # http://stats.stackexchange.com/questions/22816/calculating-p-value- # for-spearmans-rank-correlation-coefficient-example-on-wikip obs = correlation_t(self.data1, self.data2, method='spearman', tails=None) np.testing.assert_allclose(obs[:2], (-0.17575757575757578, 0.62718834477648433)) self.assertEqual(len(obs[2]), 999) for rho in obs[2]: self.assertTrue(rho >= -1.0 and rho <= 1.0) self.assertCorrectPValue(0.60, 0.64, correlation_t, (self.data1, self.data2), {'method': 'spearman', 'tails': None}, p_val_idx=3) np.testing.assert_allclose(obs[4], (-0.7251388558041697, 0.51034422964834503))
def test_correlation_test_pearson(self): """Test correlation_t using pearson on valid input.""" # These results were verified with R. # Test with non-default confidence level and permutations. obs = correlation_t(self.data1, self.data2, method='pearson', confidence_level=0.90, permutations=990) np.testing.assert_allclose(obs[:2], (-0.03760147, 0.91786297277172868), atol=10e-7) self.assertEqual(len(obs[2]), 990) for r in obs[2]: self.assertTrue(r >= -1.0 and r <= 1.0) self.assertCorrectPValue(0.9, 0.93, correlation_t, (self.data1, self.data2), {'method': 'pearson', 'confidence_level': 0.90, 'permutations': 990}, p_val_idx=3) np.testing.assert_allclose(obs[4], (-0.5779077, 0.5256224)) # Test with non-default tail type. obs = correlation_t(self.data1, self.data2, method='pearson', confidence_level=0.90, permutations=990, tails='low') np.testing.assert_allclose(obs[:2], (-0.03760147, 0.45893148638586434), atol=10e-7) self.assertEqual(len(obs[2]), 990) for r in obs[2]: self.assertTrue(r >= -1.0 and r <= 1.0) self.assertCorrectPValue(0.41, 0.46, correlation_t, (self.data1, self.data2), {'method': 'pearson', 'confidence_level': 0.90, 'permutations': 990, 'tails': 'low'}, p_val_idx=3) np.testing.assert_allclose(obs[4], (-0.5779077, 0.5256224))
def test_compare_tip_distances_sample(self): t = TreeNode.from_newick('((H:1,G:1):2,(R:0.5,M:0.7):3);') t2 = TreeNode.from_newick('(((H:1,G:1,O:1):2,R:3):1,X:4);') obs = t.compare_tip_distances(t2, sample=3, shuffle_f=sorted) # note: common taxa are H, G, R (only) m1 = np.array([[0, 2, 6.5], [2, 0, 6.5], [6.5, 6.5, 0]]) m2 = np.array([[0, 2, 6], [2, 0, 6], [6, 6, 0]]) r = correlation_t(m1.flat, m2.flat)[0] self.assertEqual(obs, (1 - r) / 2) # 4 common taxa, still picking H, G, R s = '((H:1,G:1):2,(R:0.5,M:0.7,Q:5):3);' t = TreeNode.from_newick(s, TreeNode) s3 = '(((H:1,G:1,O:1):2,R:3,Q:10):1,X:4);' t3 = TreeNode.from_newick(s3, TreeNode) obs = t.compare_tip_distances(t3, sample=3, shuffle_f=sorted)