def test_to_series_4x4(self): dm = DistanceMatrix([[0.0, 0.2, 0.3, 0.4], [0.2, 0.0, 0.5, 0.6], [0.3, 0.5, 0.0, 0.7], [0.4, 0.6, 0.7, 0.0]], ['a', 'b', 'c', 'd']) series = dm.to_series() exp = pd.Series([0.2, 0.3, 0.4, 0.5, 0.6, 0.7], index=pd.Index([('a', 'b'), ('a', 'c'), ('a', 'd'), ('b', 'c'), ('b', 'd'), ('c', 'd')])) assert_series_almost_equal(series, exp)
def test_to_series_4x4(self): dm = DistanceMatrix([ [0, 0.25, 0.75, 0.75], [0.25, 0.0, 0.5, 0.5], [0.75, 0.5, 0.0, 0.0], [0.75, 0.5, 0.0, 0.0]], ['a', 'b', 'c', 'd']) series = dm.to_series() exp = pd.Series([0.25, 0.75, 0.75, 0.25, 0.5, 0.5, 0.75, 0.5, 0.75, 0.5], index = [('a', 'b'), ('a', 'c'), ('a', 'd'), ('b', 'a'), ('b', 'c'), ('b', 'd'), ('c', 'a'), ('c', 'b'), ('d', 'a'), ('d', 'b')]) assert_series_almost_equal(series, exp)
def test_to_series_4x4(self): dm = DistanceMatrix([ [0.0, 0.2, 0.3, 0.4], [0.2, 0.0, 0.5, 0.6], [0.3, 0.5, 0.0, 0.7], [0.4, 0.6, 0.7, 0.0]], ['a', 'b', 'c', 'd']) series = dm.to_series() exp = pd.Series([0.2, 0.3, 0.4, 0.5, 0.6, 0.7], index=pd.Index([('a', 'b'), ('a', 'c'), ('a', 'd'), ('b', 'c'), ('b', 'd'), ('c', 'd')])) assert_series_almost_equal(series, exp)
class DistanceMatrixTests(DissimilarityMatrixTestData): def setUp(self): super(DistanceMatrixTests, self).setUp() self.dm_1x1 = DistanceMatrix(self.dm_1x1_data, ['a']) self.dm_2x2 = DistanceMatrix(self.dm_2x2_data, ['a', 'b']) self.dm_3x3 = DistanceMatrix(self.dm_3x3_data, ['a', 'b', 'c']) self.dms = [self.dm_1x1, self.dm_2x2, self.dm_3x3] self.dm_condensed_forms = [ np.array([]), np.array([0.123]), np.array([0.01, 4.2, 12.0]) ] def test_init_from_condensed_form(self): data = [1, 2, 3] exp = DistanceMatrix([[0, 1, 2], [1, 0, 3], [2, 3, 0]], ['0', '1', '2']) res = DistanceMatrix(data) self.assertEqual(exp, res) def test_init_invalid_input(self): # Asymmetric. data = [[0.0, 2.0], [1.0, 0.0]] with self.assertRaises(DistanceMatrixError): DistanceMatrix(data, ['a', 'b']) # Non-hollow data = [[1.0, 2.0], [2.0, 1.0]] with self.assertRaises(DistanceMatrixError): DistanceMatrix(data, ['a', 'b']) # Ensure that the superclass validation is still being performed. with self.assertRaises(DissimilarityMatrixError): DistanceMatrix([[1, 2, 3]], ['a']) def test_init_nans(self): with self.assertRaisesRegex(DistanceMatrixError, r'NaNs'): DistanceMatrix([[0.0, np.nan], [np.nan, 0.0]], ['a', 'b']) def test_from_iterable_no_key(self): iterable = (x for x in range(4)) exp = DistanceMatrix([[0, 1, 2, 3], [1, 0, 1, 2], [2, 1, 0, 1], [3, 2, 1, 0]]) res = DistanceMatrix.from_iterable(iterable, lambda a, b: abs(b - a)) self.assertEqual(res, exp) def test_from_iterable_validate_equal_valid_data(self): validate_true = DistanceMatrix.from_iterable((x for x in range(4)), lambda a, b: abs(b - a), validate=True) validate_false = DistanceMatrix.from_iterable((x for x in range(4)), lambda a, b: abs(b - a), validate=False) self.assertEqual(validate_true, validate_false) def test_from_iterable_validate_false(self): iterable = (x for x in range(4)) exp = DistanceMatrix([[0, 1, 2, 3], [1, 0, 1, 2], [2, 1, 0, 1], [3, 2, 1, 0]]) res = DistanceMatrix.from_iterable(iterable, lambda a, b: abs(b - a), validate=False) self.assertEqual(res, exp) def test_from_iterable_validate_non_hollow(self): iterable = (x for x in range(4)) with self.assertRaises(DistanceMatrixError): DistanceMatrix.from_iterable(iterable, lambda a, b: 1) def test_from_iterable_validate_false_non_symmetric(self): exp = DistanceMatrix([[0, 1, 2, 3], [1, 0, 1, 2], [2, 1, 0, 1], [3, 2, 1, 0]]) res = DistanceMatrix.from_iterable((x for x in range(4)), lambda a, b: a - b, validate=False) self.assertEqual(res, exp) def test_from_iterable_validate_asym(self): iterable = (x for x in range(4)) with self.assertRaises(DistanceMatrixError): DistanceMatrix.from_iterable(iterable, lambda a, b: b - a) def test_from_iterable_with_key(self): iterable = (x for x in range(4)) exp = DistanceMatrix( [[0, 1, 2, 3], [1, 0, 1, 2], [2, 1, 0, 1], [3, 2, 1, 0]], ['0', '1', '4', '9']) res = DistanceMatrix.from_iterable(iterable, lambda a, b: abs(b - a), key=lambda x: str(x**2)) self.assertEqual(res, exp) def test_from_iterable_empty(self): with self.assertRaises(DissimilarityMatrixError): DistanceMatrix.from_iterable([], lambda x: x) def test_from_iterable_single(self): exp = DistanceMatrix([[0]]) res = DistanceMatrix.from_iterable(["boo"], lambda a, b: 0) self.assertEqual(res, exp) def test_from_iterable_with_keys(self): iterable = (x for x in range(4)) exp = DistanceMatrix( [[0, 1, 2, 3], [1, 0, 1, 2], [2, 1, 0, 1], [3, 2, 1, 0]], ['0', '1', '4', '9']) res = DistanceMatrix.from_iterable(iterable, lambda a, b: abs(b - a), keys=iter(['0', '1', '4', '9'])) self.assertEqual(res, exp) def test_from_iterable_with_key_and_keys(self): iterable = (x for x in range(4)) with self.assertRaises(ValueError): DistanceMatrix.from_iterable(iterable, lambda a, b: abs(b - a), key=str, keys=['1', '2', '3', '4']) def test_from_iterable_scipy_hamming_metric_with_metadata(self): # test for #1254 seqs = [ Sequence('ACGT'), Sequence('ACGA', metadata={'id': 'seq1'}), Sequence('AAAA', metadata={'id': 'seq2'}), Sequence('AAAA', positional_metadata={'qual': range(4)}) ] exp = DistanceMatrix([[0, 0.25, 0.75, 0.75], [0.25, 0.0, 0.5, 0.5], [0.75, 0.5, 0.0, 0.0], [0.75, 0.5, 0.0, 0.0]], ['a', 'b', 'c', 'd']) dm = DistanceMatrix.from_iterable( seqs, metric=scipy.spatial.distance.hamming, keys=['a', 'b', 'c', 'd']) self.assertEqual(dm, exp) def test_from_iterable_skbio_hamming_metric_with_metadata(self): # test for #1254 seqs = [ Sequence('ACGT'), Sequence('ACGA', metadata={'id': 'seq1'}), Sequence('AAAA', metadata={'id': 'seq2'}), Sequence('AAAA', positional_metadata={'qual': range(4)}) ] exp = DistanceMatrix([[0, 0.25, 0.75, 0.75], [0.25, 0.0, 0.5, 0.5], [0.75, 0.5, 0.0, 0.0], [0.75, 0.5, 0.0, 0.0]], ['a', 'b', 'c', 'd']) dm = DistanceMatrix.from_iterable( seqs, metric=skbio.sequence.distance.hamming, keys=['a', 'b', 'c', 'd']) self.assertEqual(dm, exp) def test_condensed_form(self): for dm, condensed in zip(self.dms, self.dm_condensed_forms): obs = dm.condensed_form() self.assertTrue(np.array_equal(obs, condensed)) def test_permute_condensed(self): # Can't really permute a 1x1 or 2x2... for _ in range(2): obs = self.dm_1x1.permute(condensed=True) npt.assert_equal(obs, np.array([])) for _ in range(2): obs = self.dm_2x2.permute(condensed=True) npt.assert_equal(obs, np.array([0.123])) dm_copy = self.dm_3x3.copy() np.random.seed(0) obs = self.dm_3x3.permute(condensed=True) npt.assert_equal(obs, np.array([12.0, 4.2, 0.01])) obs = self.dm_3x3.permute(condensed=True) npt.assert_equal(obs, np.array([4.2, 12.0, 0.01])) # Ensure dm hasn't changed after calling permute() on it a couple of # times. self.assertEqual(self.dm_3x3, dm_copy) def test_permute_not_condensed(self): obs = self.dm_1x1.permute() self.assertEqual(obs, self.dm_1x1) self.assertFalse(obs is self.dm_1x1) obs = self.dm_2x2.permute() self.assertEqual(obs, self.dm_2x2) self.assertFalse(obs is self.dm_2x2) np.random.seed(0) exp = DistanceMatrix([[0, 12, 4.2], [12, 0, 0.01], [4.2, 0.01, 0]], self.dm_3x3.ids) obs = self.dm_3x3.permute() self.assertEqual(obs, exp) exp = DistanceMatrix([[0, 4.2, 12], [4.2, 0, 0.01], [12, 0.01, 0]], self.dm_3x3.ids) obs = self.dm_3x3.permute() self.assertEqual(obs, exp) def test_eq(self): # Compare DistanceMatrix to DissimilarityMatrix, where both have the # same data and IDs. eq_dm = DissimilarityMatrix(self.dm_3x3_data, ['a', 'b', 'c']) self.assertTrue(self.dm_3x3 == eq_dm) self.assertTrue(eq_dm == self.dm_3x3) def test_to_series_1x1(self): series = self.dm_1x1.to_series() exp = pd.Series([], index=[]) assert_series_almost_equal(series, exp) def test_to_series_2x2(self): series = self.dm_2x2.to_series() exp = pd.Series([0.123], index=pd.Index([('a', 'b')])) assert_series_almost_equal(series, exp) def test_to_series_4x4(self): dm = DistanceMatrix([[0.0, 0.2, 0.3, 0.4], [0.2, 0.0, 0.5, 0.6], [0.3, 0.5, 0.0, 0.7], [0.4, 0.6, 0.7, 0.0]], ['a', 'b', 'c', 'd']) series = dm.to_series() exp = pd.Series([0.2, 0.3, 0.4, 0.5, 0.6, 0.7], index=pd.Index([('a', 'b'), ('a', 'c'), ('a', 'd'), ('b', 'c'), ('b', 'd'), ('c', 'd')])) assert_series_almost_equal(series, exp) def test_to_series_default_ids(self): series = DistanceMatrix(self.dm_2x2_data).to_series() exp = pd.Series([0.123], index=pd.Index([('0', '1')])) assert_series_almost_equal(series, exp) def test_validate_asym_shape(self): # first check it actually likes good matrices data_good = np.array([[0., 42.], [42., 0.]]) data_sym, data_hollow = is_symmetric_and_hollow(data_good) self.assertEqual(data_sym, True) del data_sym self.assertEqual(data_hollow, True) del data_hollow data_sym = skbio.stats.distance._utils.is_symmetric(data_good) self.assertEqual(data_sym, True) del data_sym data_hollow = skbio.stats.distance._utils.is_hollow(data_good) self.assertEqual(data_hollow, True) del data_hollow self.dm_3x3._validate_shape(data_good) del data_good # _validate_shap checks just the shape, not the content bad_data = np.array([[1., 2.], [3., 4.]]) data_sym, data_hollow = is_symmetric_and_hollow(bad_data) self.assertEqual(data_sym, False) del data_sym self.assertEqual(data_hollow, False) del data_hollow data_sym = skbio.stats.distance._utils.is_symmetric(bad_data) self.assertEqual(data_sym, False) del data_sym data_hollow = skbio.stats.distance._utils.is_hollow(bad_data) self.assertEqual(data_hollow, False) del data_hollow self.dm_3x3._validate_shape(bad_data) del bad_data # re-try with partially bad data bad_data = np.array([[0., 2.], [3., 0.]]) data_sym, data_hollow = is_symmetric_and_hollow(bad_data) self.assertEqual(data_sym, False) del data_sym self.assertEqual(data_hollow, True) del data_hollow data_sym = skbio.stats.distance._utils.is_symmetric(bad_data) self.assertEqual(data_sym, False) del data_sym data_hollow = skbio.stats.distance._utils.is_hollow(bad_data) self.assertEqual(data_hollow, True) del data_hollow self.dm_3x3._validate_shape(bad_data) del bad_data
class DistanceMatrixTests(DissimilarityMatrixTestData): def setUp(self): super(DistanceMatrixTests, self).setUp() self.dm_1x1 = DistanceMatrix(self.dm_1x1_data, ['a']) self.dm_2x2 = DistanceMatrix(self.dm_2x2_data, ['a', 'b']) self.dm_3x3 = DistanceMatrix(self.dm_3x3_data, ['a', 'b', 'c']) self.dms = [self.dm_1x1, self.dm_2x2, self.dm_3x3] self.dm_condensed_forms = [np.array([]), np.array([0.123]), np.array([0.01, 4.2, 12.0])] def test_init_from_condensed_form(self): data = [1, 2, 3] exp = DistanceMatrix([[0, 1, 2], [1, 0, 3], [2, 3, 0]], ['0', '1', '2']) res = DistanceMatrix(data) self.assertEqual(exp, res) def test_init_invalid_input(self): # Asymmetric. data = [[0.0, 2.0], [1.0, 0.0]] with self.assertRaises(DistanceMatrixError): DistanceMatrix(data, ['a', 'b']) # Non-hollow data = [[1.0, 2.0], [2.0, 1.0]] with self.assertRaises(DistanceMatrixError): DistanceMatrix(data, ['a', 'b']) # Ensure that the superclass validation is still being performed. with self.assertRaises(DissimilarityMatrixError): DistanceMatrix([[1, 2, 3]], ['a']) def test_init_nans(self): with self.assertRaisesRegex(DistanceMatrixError, 'NaNs'): DistanceMatrix([[0.0, np.nan], [np.nan, 0.0]], ['a', 'b']) def test_from_iterable_no_key(self): iterable = (x for x in range(4)) exp = DistanceMatrix([[0, 1, 2, 3], [1, 0, 1, 2], [2, 1, 0, 1], [3, 2, 1, 0]]) res = DistanceMatrix.from_iterable(iterable, lambda a, b: abs(b - a)) self.assertEqual(res, exp) def test_from_iterable_validate_equal_valid_data(self): validate_true = DistanceMatrix.from_iterable((x for x in range(4)), lambda a, b: abs(b - a), validate=True) validate_false = DistanceMatrix.from_iterable((x for x in range(4)), lambda a, b: abs(b - a), validate=False) self.assertEqual(validate_true, validate_false) def test_from_iterable_validate_false(self): iterable = (x for x in range(4)) exp = DistanceMatrix([[0, 1, 2, 3], [1, 0, 1, 2], [2, 1, 0, 1], [3, 2, 1, 0]]) res = DistanceMatrix.from_iterable(iterable, lambda a, b: abs(b - a), validate=False) self.assertEqual(res, exp) def test_from_iterable_validate_non_hollow(self): iterable = (x for x in range(4)) with self.assertRaises(DistanceMatrixError): DistanceMatrix.from_iterable(iterable, lambda a, b: 1) def test_from_iterable_validate_false_non_symmetric(self): exp = DistanceMatrix([[0, 1, 2, 3], [1, 0, 1, 2], [2, 1, 0, 1], [3, 2, 1, 0]]) res = DistanceMatrix.from_iterable((x for x in range(4)), lambda a, b: a - b, validate=False) self.assertEqual(res, exp) def test_from_iterable_validate_asym(self): iterable = (x for x in range(4)) with self.assertRaises(DistanceMatrixError): DistanceMatrix.from_iterable(iterable, lambda a, b: b - a) def test_from_iterable_with_key(self): iterable = (x for x in range(4)) exp = DistanceMatrix([[0, 1, 2, 3], [1, 0, 1, 2], [2, 1, 0, 1], [3, 2, 1, 0]], ['0', '1', '4', '9']) res = DistanceMatrix.from_iterable(iterable, lambda a, b: abs(b - a), key=lambda x: str(x**2)) self.assertEqual(res, exp) def test_from_iterable_empty(self): with self.assertRaises(DissimilarityMatrixError): DistanceMatrix.from_iterable([], lambda x: x) def test_from_iterable_single(self): exp = DistanceMatrix([[0]]) res = DistanceMatrix.from_iterable(["boo"], lambda a, b: 0) self.assertEqual(res, exp) def test_from_iterable_with_keys(self): iterable = (x for x in range(4)) exp = DistanceMatrix([[0, 1, 2, 3], [1, 0, 1, 2], [2, 1, 0, 1], [3, 2, 1, 0]], ['0', '1', '4', '9']) res = DistanceMatrix.from_iterable(iterable, lambda a, b: abs(b - a), keys=iter(['0', '1', '4', '9'])) self.assertEqual(res, exp) def test_from_iterable_with_key_and_keys(self): iterable = (x for x in range(4)) with self.assertRaises(ValueError): DistanceMatrix.from_iterable(iterable, lambda a, b: abs(b - a), key=str, keys=['1', '2', '3', '4']) def test_from_iterable_scipy_hamming_metric_with_metadata(self): # test for #1254 seqs = [ Sequence('ACGT'), Sequence('ACGA', metadata={'id': 'seq1'}), Sequence('AAAA', metadata={'id': 'seq2'}), Sequence('AAAA', positional_metadata={'qual': range(4)}) ] exp = DistanceMatrix([ [0, 0.25, 0.75, 0.75], [0.25, 0.0, 0.5, 0.5], [0.75, 0.5, 0.0, 0.0], [0.75, 0.5, 0.0, 0.0]], ['a', 'b', 'c', 'd']) dm = DistanceMatrix.from_iterable( seqs, metric=scipy.spatial.distance.hamming, keys=['a', 'b', 'c', 'd']) self.assertEqual(dm, exp) def test_from_iterable_skbio_hamming_metric_with_metadata(self): # test for #1254 seqs = [ Sequence('ACGT'), Sequence('ACGA', metadata={'id': 'seq1'}), Sequence('AAAA', metadata={'id': 'seq2'}), Sequence('AAAA', positional_metadata={'qual': range(4)}) ] exp = DistanceMatrix([ [0, 0.25, 0.75, 0.75], [0.25, 0.0, 0.5, 0.5], [0.75, 0.5, 0.0, 0.0], [0.75, 0.5, 0.0, 0.0]], ['a', 'b', 'c', 'd']) dm = DistanceMatrix.from_iterable( seqs, metric=skbio.sequence.distance.hamming, keys=['a', 'b', 'c', 'd']) self.assertEqual(dm, exp) def test_condensed_form(self): for dm, condensed in zip(self.dms, self.dm_condensed_forms): obs = dm.condensed_form() self.assertTrue(np.array_equal(obs, condensed)) def test_permute_condensed(self): # Can't really permute a 1x1 or 2x2... for _ in range(2): obs = self.dm_1x1.permute(condensed=True) npt.assert_equal(obs, np.array([])) for _ in range(2): obs = self.dm_2x2.permute(condensed=True) npt.assert_equal(obs, np.array([0.123])) dm_copy = self.dm_3x3.copy() np.random.seed(0) obs = self.dm_3x3.permute(condensed=True) npt.assert_equal(obs, np.array([12.0, 4.2, 0.01])) obs = self.dm_3x3.permute(condensed=True) npt.assert_equal(obs, np.array([4.2, 12.0, 0.01])) # Ensure dm hasn't changed after calling permute() on it a couple of # times. self.assertEqual(self.dm_3x3, dm_copy) def test_permute_not_condensed(self): obs = self.dm_1x1.permute() self.assertEqual(obs, self.dm_1x1) self.assertFalse(obs is self.dm_1x1) obs = self.dm_2x2.permute() self.assertEqual(obs, self.dm_2x2) self.assertFalse(obs is self.dm_2x2) np.random.seed(0) exp = DistanceMatrix([[0, 12, 4.2], [12, 0, 0.01], [4.2, 0.01, 0]], self.dm_3x3.ids) obs = self.dm_3x3.permute() self.assertEqual(obs, exp) exp = DistanceMatrix([[0, 4.2, 12], [4.2, 0, 0.01], [12, 0.01, 0]], self.dm_3x3.ids) obs = self.dm_3x3.permute() self.assertEqual(obs, exp) def test_eq(self): # Compare DistanceMatrix to DissimilarityMatrix, where both have the # same data and IDs. eq_dm = DissimilarityMatrix(self.dm_3x3_data, ['a', 'b', 'c']) self.assertTrue(self.dm_3x3 == eq_dm) self.assertTrue(eq_dm == self.dm_3x3) def test_to_series_1x1(self): series = self.dm_1x1.to_series() exp = pd.Series([], index=[]) assert_series_almost_equal(series, exp) def test_to_series_2x2(self): series = self.dm_2x2.to_series() exp = pd.Series([0.123], index=pd.Index([('a', 'b')])) assert_series_almost_equal(series, exp) def test_to_series_4x4(self): dm = DistanceMatrix([ [0.0, 0.2, 0.3, 0.4], [0.2, 0.0, 0.5, 0.6], [0.3, 0.5, 0.0, 0.7], [0.4, 0.6, 0.7, 0.0]], ['a', 'b', 'c', 'd']) series = dm.to_series() exp = pd.Series([0.2, 0.3, 0.4, 0.5, 0.6, 0.7], index=pd.Index([('a', 'b'), ('a', 'c'), ('a', 'd'), ('b', 'c'), ('b', 'd'), ('c', 'd')])) assert_series_almost_equal(series, exp) def test_to_series_default_ids(self): series = DistanceMatrix(self.dm_2x2_data).to_series() exp = pd.Series([0.123], index=pd.Index([('0', '1')])) assert_series_almost_equal(series, exp)