class DissimilarityMatrixTests(DissimilarityMatrixTestData): def setUp(self): super(DissimilarityMatrixTests, self).setUp() self.dm_1x1 = DissimilarityMatrix(self.dm_1x1_data, ['a']) self.dm_2x2 = DissimilarityMatrix(self.dm_2x2_data, ['a', 'b']) self.dm_2x2_asym = DissimilarityMatrix(self.dm_2x2_asym_data, ['a', 'b']) self.dm_3x3 = DissimilarityMatrix(self.dm_3x3_data, ['a', 'b', 'c']) self.dms = [self.dm_1x1, self.dm_2x2, self.dm_2x2_asym, self.dm_3x3] self.dm_shapes = [(1, 1), (2, 2), (2, 2), (3, 3)] self.dm_sizes = [1, 4, 4, 9] self.dm_transposes = [ self.dm_1x1, self.dm_2x2, DissimilarityMatrix([[0, -2], [1, 0]], ['a', 'b']), self.dm_3x3] self.dm_redundant_forms = [np.array(self.dm_1x1_data), np.array(self.dm_2x2_data), np.array(self.dm_2x2_asym_data), np.array(self.dm_3x3_data)] def test_deprecated_io(self): fh = StringIO() npt.assert_warns(UserWarning, self.dm_3x3.to_file, fh) fh.seek(0) deserialized = npt.assert_warns(UserWarning, DissimilarityMatrix.from_file, fh) self.assertEqual(deserialized, self.dm_3x3) self.assertTrue(type(deserialized) == DissimilarityMatrix) def test_init_from_dm(self): ids = ['foo', 'bar', 'baz'] # DissimilarityMatrix -> DissimilarityMatrix exp = DissimilarityMatrix(self.dm_3x3_data, ids) obs = DissimilarityMatrix(self.dm_3x3, ids) self.assertEqual(obs, exp) # Test that copy of data is not made. self.assertTrue(obs.data is self.dm_3x3.data) obs.data[0, 1] = 424242 self.assertTrue(np.array_equal(obs.data, self.dm_3x3.data)) # DistanceMatrix -> DissimilarityMatrix exp = DissimilarityMatrix(self.dm_3x3_data, ids) obs = DissimilarityMatrix( DistanceMatrix(self.dm_3x3_data, ('a', 'b', 'c')), ids) self.assertEqual(obs, exp) # DissimilarityMatrix -> DistanceMatrix with self.assertRaises(DistanceMatrixError): DistanceMatrix(self.dm_2x2_asym, ['foo', 'bar']) def test_init_no_ids(self): exp = DissimilarityMatrix(self.dm_3x3_data, ('0', '1', '2')) obs = DissimilarityMatrix(self.dm_3x3_data) self.assertEqual(obs, exp) self.assertEqual(obs['1', '2'], 12.0) def test_init_invalid_input(self): # Empty data. with self.assertRaises(DissimilarityMatrixError): DissimilarityMatrix([], []) # Another type of empty data. with self.assertRaises(DissimilarityMatrixError): DissimilarityMatrix(np.empty((0, 0)), []) # Invalid number of dimensions. with self.assertRaises(DissimilarityMatrixError): DissimilarityMatrix([1, 2, 3], ['a']) # Dimensions don't match. with self.assertRaises(DissimilarityMatrixError): DissimilarityMatrix([[1, 2, 3]], ['a']) data = [[0, 1], [1, 0]] # Duplicate IDs. with self.assertRaises(DissimilarityMatrixError): DissimilarityMatrix(data, ['a', 'a']) # Number of IDs don't match dimensions. with self.assertRaises(DissimilarityMatrixError): DissimilarityMatrix(data, ['a', 'b', 'c']) # Non-hollow. data = [[0.0, 1.0], [1.0, 0.01]] with self.assertRaises(DissimilarityMatrixError): DissimilarityMatrix(data, ['a', 'b']) def test_data(self): for dm, exp in zip(self.dms, self.dm_redundant_forms): obs = dm.data self.assertTrue(np.array_equal(obs, exp)) with self.assertRaises(AttributeError): self.dm_3x3.data = 'foo' def test_ids(self): obs = self.dm_3x3.ids self.assertEqual(obs, ('a', 'b', 'c')) # Test that we overwrite the existing IDs and that the ID index is # correctly rebuilt. new_ids = ['foo', 'bar', 'baz'] self.dm_3x3.ids = new_ids obs = self.dm_3x3.ids self.assertEqual(obs, tuple(new_ids)) self.assertTrue(np.array_equal(self.dm_3x3['bar'], np.array([0.01, 0.0, 12.0]))) with self.assertRaises(MissingIDError): self.dm_3x3['b'] def test_ids_invalid_input(self): with self.assertRaises(DissimilarityMatrixError): self.dm_3x3.ids = ['foo', 'bar'] # Make sure that we can still use the dissimilarity matrix after trying # to be evil. obs = self.dm_3x3.ids self.assertEqual(obs, ('a', 'b', 'c')) def test_dtype(self): for dm in self.dms: self.assertEqual(dm.dtype, np.float64) def test_shape(self): for dm, shape in zip(self.dms, self.dm_shapes): self.assertEqual(dm.shape, shape) def test_size(self): for dm, size in zip(self.dms, self.dm_sizes): self.assertEqual(dm.size, size) def test_transpose(self): for dm, transpose in zip(self.dms, self.dm_transposes): self.assertEqual(dm.T, transpose) self.assertEqual(dm.transpose(), transpose) # We should get a reference to a different object back, even if the # transpose is the same as the original. self.assertTrue(dm.transpose() is not dm) def test_index(self): self.assertEqual(self.dm_3x3.index('a'), 0) self.assertEqual(self.dm_3x3.index('b'), 1) self.assertEqual(self.dm_3x3.index('c'), 2) with self.assertRaises(MissingIDError): self.dm_3x3.index('d') with self.assertRaises(MissingIDError): self.dm_3x3.index(1) def test_redundant_form(self): for dm, redundant in zip(self.dms, self.dm_redundant_forms): obs = dm.redundant_form() self.assertTrue(np.array_equal(obs, redundant)) def test_copy(self): copy = self.dm_2x2.copy() self.assertEqual(copy, self.dm_2x2) self.assertFalse(copy.data is self.dm_2x2.data) # deepcopy doesn't actually create a copy of the IDs because it is a # tuple of strings, which is fully immutable. self.assertTrue(copy.ids is self.dm_2x2.ids) new_ids = ['hello', 'world'] copy.ids = new_ids self.assertNotEqual(copy.ids, self.dm_2x2.ids) copy = self.dm_2x2.copy() copy.data[0, 1] = 0.0001 self.assertFalse(np.array_equal(copy.data, self.dm_2x2.data)) def test_filter_no_filtering(self): # Don't actually filter anything -- ensure we get back a different # object. obs = self.dm_3x3.filter(['a', 'b', 'c']) self.assertEqual(obs, self.dm_3x3) self.assertFalse(obs is self.dm_3x3) def test_filter_reorder(self): # Don't filter anything, but reorder the distance matrix. order = ['c', 'a', 'b'] exp = DissimilarityMatrix( [[0, 4.2, 12], [4.2, 0, 0.01], [12, 0.01, 0]], order) obs = self.dm_3x3.filter(order) self.assertEqual(obs, exp) def test_filter_single_id(self): ids = ['b'] exp = DissimilarityMatrix([[0]], ids) obs = self.dm_2x2_asym.filter(ids) self.assertEqual(obs, exp) def test_filter_asymmetric(self): # 2x2 ids = ['b', 'a'] exp = DissimilarityMatrix([[0, -2], [1, 0]], ids) obs = self.dm_2x2_asym.filter(ids) self.assertEqual(obs, exp) # 3x3 dm = DissimilarityMatrix([[0, 10, 53], [42, 0, 22.5], [53, 1, 0]], ('bro', 'brah', 'breh')) ids = ['breh', 'brah'] exp = DissimilarityMatrix([[0, 1], [22.5, 0]], ids) obs = dm.filter(ids) self.assertEqual(obs, exp) def test_filter_subset(self): ids = ('c', 'a') exp = DissimilarityMatrix([[0, 4.2], [4.2, 0]], ids) obs = self.dm_3x3.filter(ids) self.assertEqual(obs, exp) ids = ('b', 'a') exp = DissimilarityMatrix([[0, 0.01], [0.01, 0]], ids) obs = self.dm_3x3.filter(ids) self.assertEqual(obs, exp) # 4x4 dm = DissimilarityMatrix([[0, 1, 55, 7], [1, 0, 16, 1], [55, 16, 0, 23], [7, 1, 23, 0]]) ids = np.asarray(['3', '0', '1']) exp = DissimilarityMatrix([[0, 7, 1], [7, 0, 1], [1, 1, 0]], ids) obs = dm.filter(ids) self.assertEqual(obs, exp) def test_filter_duplicate_ids(self): with self.assertRaises(DissimilarityMatrixError): self.dm_3x3.filter(['c', 'a', 'c']) def test_filter_missing_ids(self): with self.assertRaises(MissingIDError): self.dm_3x3.filter(['c', 'bro']) def test_filter_missing_ids_strict_false(self): # no exception should be raised ids = ('c', 'a') exp = DissimilarityMatrix([[0, 4.2], [4.2, 0]], ids) obs = self.dm_3x3.filter(['c', 'a', 'not found'], strict=False) self.assertEqual(obs, exp) def test_filter_empty_ids(self): with self.assertRaises(DissimilarityMatrixError): self.dm_3x3.filter([]) def test_plot_default(self): fig = self.dm_1x1.plot() self.assertIsInstance(fig, mpl.figure.Figure) axes = fig.get_axes() self.assertEqual(len(axes), 2) ax = axes[0] self.assertEqual(ax.get_title(), '') xticks = [] for tick in ax.get_xticklabels(): xticks.append(tick.get_text()) self.assertEqual(xticks, ['a']) yticks = [] for tick in ax.get_yticklabels(): yticks.append(tick.get_text()) self.assertEqual(yticks, ['a']) def test_plot_no_default(self): ids = ['0', 'one', '2', 'three', '4.000'] data = ([0, 1, 2, 3, 4], [1, 0, 1, 2, 3], [2, 1, 0, 1, 2], [3, 2, 1, 0, 1], [4, 3, 2, 1, 0]) dm = DissimilarityMatrix(data, ids) fig = dm.plot(cmap='Reds', title='Testplot') self.assertIsInstance(fig, mpl.figure.Figure) axes = fig.get_axes() self.assertEqual(len(axes), 2) ax = axes[0] self.assertEqual(ax.get_title(), 'Testplot') xticks = [] for tick in ax.get_xticklabels(): xticks.append(tick.get_text()) self.assertEqual(xticks, ['0', 'one', '2', 'three', '4.000']) yticks = [] for tick in ax.get_yticklabels(): yticks.append(tick.get_text()) self.assertEqual(yticks, ['0', 'one', '2', 'three', '4.000']) def test_repr_png(self): dm = self.dm_1x1 obs = dm._repr_png_() self.assertIsInstance(obs, binary_type) self.assertTrue(len(obs) > 0) def test_repr_svg(self): dm = self.dm_1x1 obs = dm._repr_svg_() self.assertIsInstance(obs, text_type) self.assertTrue(len(obs) > 0) def test_png(self): dm = self.dm_1x1 self.assertIsInstance(dm.png, Image) def test_svg(self): dm = self.dm_1x1 self.assertIsInstance(dm.svg, SVG) def test_str(self): for dm in self.dms: obs = str(dm) # Do some very light testing here to make sure we're getting a # non-empty string back. We don't want to test the exact # formatting. self.assertTrue(obs) def test_eq(self): for dm in self.dms: copy = dm.copy() self.assertTrue(dm == dm) self.assertTrue(copy == copy) self.assertTrue(dm == copy) self.assertTrue(copy == dm) self.assertFalse(self.dm_1x1 == self.dm_3x3) def test_ne(self): # Wrong class. self.assertTrue(self.dm_3x3 != 'foo') # Wrong shape. self.assertTrue(self.dm_3x3 != self.dm_1x1) # Wrong IDs. other = self.dm_3x3.copy() other.ids = ['foo', 'bar', 'baz'] self.assertTrue(self.dm_3x3 != other) # Wrong data. other = self.dm_3x3.copy() other.data[1, 0] = 42.42 self.assertTrue(self.dm_3x3 != other) self.assertFalse(self.dm_2x2 != self.dm_2x2) def test_contains(self): self.assertTrue('a' in self.dm_3x3) self.assertTrue('b' in self.dm_3x3) self.assertTrue('c' in self.dm_3x3) self.assertFalse('d' in self.dm_3x3) def test_getslice(self): # Slice of first dimension only. Test that __getslice__ defers to # __getitem__. obs = self.dm_2x2[1:] self.assertTrue(np.array_equal(obs, np.array([[0.123, 0.0]]))) self.assertEqual(type(obs), np.ndarray) def test_getitem_by_id(self): obs = self.dm_1x1['a'] self.assertTrue(np.array_equal(obs, np.array([0.0]))) obs = self.dm_2x2_asym['b'] self.assertTrue(np.array_equal(obs, np.array([-2.0, 0.0]))) obs = self.dm_3x3['c'] self.assertTrue(np.array_equal(obs, np.array([4.2, 12.0, 0.0]))) with self.assertRaises(MissingIDError): self.dm_2x2['c'] def test_getitem_by_id_pair(self): # Same object. self.assertEqual(self.dm_1x1['a', 'a'], 0.0) # Different objects (symmetric). self.assertEqual(self.dm_3x3['b', 'c'], 12.0) self.assertEqual(self.dm_3x3['c', 'b'], 12.0) # Different objects (asymmetric). self.assertEqual(self.dm_2x2_asym['a', 'b'], 1.0) self.assertEqual(self.dm_2x2_asym['b', 'a'], -2.0) with self.assertRaises(MissingIDError): self.dm_2x2['a', 'c'] def test_getitem_ndarray_indexing(self): # Single element access. obs = self.dm_3x3[0, 1] self.assertEqual(obs, 0.01) # Single element access (via two __getitem__ calls). obs = self.dm_3x3[0][1] self.assertEqual(obs, 0.01) # Row access. obs = self.dm_3x3[1] self.assertTrue(np.array_equal(obs, np.array([0.01, 0.0, 12.0]))) self.assertEqual(type(obs), np.ndarray) # Grab all data. obs = self.dm_3x3[:, :] self.assertTrue(np.array_equal(obs, self.dm_3x3.data)) self.assertEqual(type(obs), np.ndarray) with self.assertRaises(IndexError): self.dm_3x3[:, 3] def test_validate_invalid_dtype(self): with self.assertRaises(DissimilarityMatrixError): self.dm_3x3._validate(np.array([[0, 42], [42, 0]]), ['a', 'b']) def test_pprint_ids(self): # No truncation. exp = 'a, b, c' obs = self.dm_3x3._pprint_ids() self.assertEqual(obs, exp) # Truncation. exp = 'a, b, ...' obs = self.dm_3x3._pprint_ids(max_chars=5) self.assertEqual(obs, exp)
class DissimilarityMatrixTests(DissimilarityMatrixTestData): def setUp(self): super(DissimilarityMatrixTests, self).setUp() self.dm_1x1 = DissimilarityMatrix(self.dm_1x1_data, ['a']) self.dm_2x2 = DissimilarityMatrix(self.dm_2x2_data, ['a', 'b']) self.dm_2x2_asym = DissimilarityMatrix(self.dm_2x2_asym_data, ['a', 'b']) self.dm_3x3 = DissimilarityMatrix(self.dm_3x3_data, ['a', 'b', 'c']) self.dm_5x5 = DissimilarityMatrix(self.dm_5x5_data, list('abcde')) self.dms = [self.dm_1x1, self.dm_2x2, self.dm_2x2_asym, self.dm_3x3] self.dm_shapes = [(1, 1), (2, 2), (2, 2), (3, 3)] self.dm_sizes = [1, 4, 4, 9] self.dm_transposes = [ self.dm_1x1, self.dm_2x2, DissimilarityMatrix([[0, -2], [1, 0]], ['a', 'b']), self.dm_3x3 ] self.dm_redundant_forms = [ np.array(self.dm_1x1_data), np.array(self.dm_2x2_data), np.array(self.dm_2x2_asym_data), np.array(self.dm_3x3_data) ] def test_avoid_copy_on_construction(self): # ((data, expect_copy)) tests = (([[0, 1], [1, 0]], True), ([ (0, 1), (1, 0) ], True), (((0, 1), (1, 0)), True), (np.array([[0, 1], [1, 0]], dtype='int'), True), (np.array([[0, 1], [1, 0]], dtype='float'), False), (np.array([[0, 1], [1, 0]], dtype=np.float32), False), (np.array([[0, 1], [1, 0]], dtype=np.float64), False), (np.array([[0, 1], [1, 0]], dtype='double'), False)) for data, expect in tests: obj = DissimilarityMatrix(data) self.assertEqual(id(obj.data) != id(data), expect) def test_within(self): exp = pd.DataFrame([['a', 'a', 0.0], ['a', 'c', 4.2], ['c', 'a', 4.2], ['c', 'c', 0.0]], columns=['i', 'j', 'value']) obs = self.dm_3x3.within(['a', 'c']) pdt.assert_frame_equal(obs, exp) def test_within_order_stability(self): exp = pd.DataFrame([['a', 'a', 0.0], ['a', 'c', 4.2], ['c', 'a', 4.2], ['c', 'c', 0.0]], columns=['i', 'j', 'value']) # NOTE: order was changed from ['a', 'c'] to ['c', 'a'] # but the output order in exp is consistent with # test_within obs = self.dm_3x3.within(['c', 'a']) pdt.assert_frame_equal(obs, exp) obs = self.dm_3x3.within(['a', 'c']) pdt.assert_frame_equal(obs, exp) def test_within_missing_id(self): with self.assertRaisesRegex(MissingIDError, "not found."): self.dm_3x3.within(['x', 'a']) def test_between(self): exp = pd.DataFrame([['b', 'a', 5.], ['b', 'c', 6.], ['b', 'e', 8.], ['d', 'a', 4.], ['d', 'c', 6.], ['d', 'e', 7.]], columns=['i', 'j', 'value']) obs = self.dm_5x5.between(['b', 'd'], ['a', 'c', 'e']) pdt.assert_frame_equal(obs, exp) def test_between_order_stability(self): exp = pd.DataFrame([['b', 'a', 5.], ['b', 'c', 6.], ['b', 'e', 8.], ['d', 'a', 4.], ['d', 'c', 6.], ['d', 'e', 7.]], columns=['i', 'j', 'value']) # varying the order of the "i" values, result remains consistent # with the test_between result obs = self.dm_5x5.between(['d', 'b'], ['a', 'c', 'e']) pdt.assert_frame_equal(obs, exp) # varying the order of the "j" values, result remains consistent # with the test_between result obs = self.dm_5x5.between(['b', 'd'], ['a', 'e', 'c']) pdt.assert_frame_equal(obs, exp) # varying the order of the "i" and "j" values, result remains # consistent with the test_between result obs = self.dm_5x5.between(['d', 'b'], ['a', 'e', 'c']) pdt.assert_frame_equal(obs, exp) def test_between_overlap(self): exp = pd.DataFrame([['b', 'a', 5.], ['b', 'd', 7.], ['b', 'e', 8.], ['d', 'a', 4.], ['d', 'd', 0.], ['d', 'e', 7.]], columns=['i', 'j', 'value']) # 'd' in i and j overlap with self.assertRaisesRegex(KeyError, ("This constraint can " "removed with " "allow_overlap=True.")): self.dm_5x5.between(['b', 'd'], ['a', 'd', 'e']) obs = self.dm_5x5.between(['b', 'd'], ['a', 'd', 'e'], allow_overlap=True) pdt.assert_frame_equal(obs, exp) def test_between_missing_id(self): with self.assertRaisesRegex(MissingIDError, "not found."): self.dm_3x3.between(['x', 'a'], ['a', 'b', 'c']) with self.assertRaisesRegex(MissingIDError, "not found."): self.dm_3x3.between(['a', 'b'], ['a', 'x', 'c']) with self.assertRaisesRegex(MissingIDError, "not found."): self.dm_3x3.between(['a', 'y'], ['a', 'x', 'c']) def test_stable_order(self): exp = np.array([1, 3, 4], dtype=int) obs = self.dm_5x5._stable_order(['d', 'e', 'b']) npt.assert_equal(obs, exp) def test_subset_to_dataframe(self): exp = pd.DataFrame([['b', 'a', 5.], ['b', 'd', 7.], ['b', 'e', 8.], ['d', 'a', 4.], ['d', 'd', 0.], ['d', 'e', 7.]], columns=['i', 'j', 'value']) obs = self.dm_5x5._subset_to_dataframe(['b', 'd'], ['a', 'd', 'e']) pdt.assert_frame_equal(obs, exp) # and the empty edge cases exp = pd.DataFrame([], columns=['i', 'j', 'value'], index=pd.RangeIndex(start=0, stop=0)) obs = self.dm_5x5._subset_to_dataframe([], ['a', 'd', 'e']) pdt.assert_frame_equal(obs, exp, check_dtype=False) obs = self.dm_5x5._subset_to_dataframe(['b', 'd'], []) pdt.assert_frame_equal(obs, exp, check_dtype=False) obs = self.dm_5x5._subset_to_dataframe([], []) pdt.assert_frame_equal(obs, exp, check_dtype=False) def test_init_from_dm(self): ids = ['foo', 'bar', 'baz'] # DissimilarityMatrix -> DissimilarityMatrix exp = DissimilarityMatrix(self.dm_3x3_data, ids) obs = DissimilarityMatrix(self.dm_3x3, ids) self.assertEqual(obs, exp) # Test that copy of data is not made. self.assertTrue(obs.data is self.dm_3x3.data) obs.data[0, 1] = 424242 self.assertTrue(np.array_equal(obs.data, self.dm_3x3.data)) # DistanceMatrix -> DissimilarityMatrix exp = DissimilarityMatrix(self.dm_3x3_data, ids) obs = DissimilarityMatrix( DistanceMatrix(self.dm_3x3_data, ('a', 'b', 'c')), ids) self.assertEqual(obs, exp) # DissimilarityMatrix -> DistanceMatrix with self.assertRaises(DistanceMatrixError): DistanceMatrix(self.dm_2x2_asym, ['foo', 'bar']) def test_init_non_hollow_dm(self): data = [[1, 1], [1, 1]] obs = DissimilarityMatrix(data, ['a', 'b']) self.assertTrue(np.array_equal(obs.data, data)) data_hollow = skbio.stats.distance._utils.is_hollow(obs.data) self.assertEqual(data_hollow, False) def test_init_no_ids(self): exp = DissimilarityMatrix(self.dm_3x3_data, ('0', '1', '2')) obs = DissimilarityMatrix(self.dm_3x3_data) self.assertEqual(obs, exp) self.assertEqual(obs['1', '2'], 12.0) def test_init_invalid_input(self): # Empty data. with self.assertRaises(DissimilarityMatrixError): DissimilarityMatrix([], []) # Another type of empty data. with self.assertRaises(DissimilarityMatrixError): DissimilarityMatrix(np.empty((0, 0)), []) # Invalid number of dimensions. with self.assertRaises(DissimilarityMatrixError): DissimilarityMatrix([1, 2, 3], ['a']) # Dimensions don't match. with self.assertRaises(DissimilarityMatrixError): DissimilarityMatrix([[1, 2, 3]], ['a']) data = [[0, 1], [1, 0]] # Duplicate IDs. with self.assertRaises(DissimilarityMatrixError): DissimilarityMatrix(data, ['a', 'a']) # Number of IDs don't match dimensions. with self.assertRaises(DissimilarityMatrixError): DissimilarityMatrix(data, ['a', 'b', 'c']) with self.assertRaises(DissimilarityMatrixError): DissimilarityMatrix(data, []) def test_from_iterable_non_hollow_data(self): iterable = (x for x in range(4)) exp = DissimilarityMatrix([[1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1]]) res = DissimilarityMatrix.from_iterable(iterable, lambda a, b: 1) self.assertEqual(res, exp) def test_from_iterable_asymmetric_data(self): iterable = (x for x in range(4)) exp = DissimilarityMatrix([[0, 1, 2, 3], [-1, 0, 1, 2], [-2, -1, 0, 1], [-3, -2, -1, 0]]) res = DissimilarityMatrix.from_iterable(iterable, lambda a, b: b - a) self.assertEqual(res, exp) def test_from_iterable_no_key(self): iterable = (x for x in range(4)) exp = DissimilarityMatrix([[0, 1, 2, 3], [1, 0, 1, 2], [2, 1, 0, 1], [3, 2, 1, 0]]) res = DissimilarityMatrix.from_iterable(iterable, lambda a, b: abs(b - a)) self.assertEqual(res, exp) def test_from_iterable_with_key(self): iterable = (x for x in range(4)) exp = DissimilarityMatrix( [[0, 1, 2, 3], [1, 0, 1, 2], [2, 1, 0, 1], [3, 2, 1, 0]], ['0', '1', '4', '9']) res = DissimilarityMatrix.from_iterable(iterable, lambda a, b: abs(b - a), key=lambda x: str(x**2)) self.assertEqual(res, exp) def test_from_iterable_empty(self): with self.assertRaises(DissimilarityMatrixError): DissimilarityMatrix.from_iterable([], lambda x: x) def test_from_iterable_single(self): exp = DissimilarityMatrix([[100]]) res = DissimilarityMatrix.from_iterable(["boo"], lambda a, b: 100) self.assertEqual(res, exp) def test_from_iterable_with_keys(self): iterable = (x for x in range(4)) exp = DissimilarityMatrix( [[0, 1, 2, 3], [1, 0, 1, 2], [2, 1, 0, 1], [3, 2, 1, 0]], ['0', '1', '4', '9']) res = DissimilarityMatrix.from_iterable(iterable, lambda a, b: abs(b - a), keys=iter(['0', '1', '4', '9'])) self.assertEqual(res, exp) def test_from_iterable_with_key_and_keys(self): iterable = (x for x in range(4)) with self.assertRaises(ValueError): DissimilarityMatrix.from_iterable(iterable, lambda a, b: abs(b - a), key=str, keys=['1', '2', '3', '4']) def test_from_iterable_scipy_hamming_metric_with_metadata(self): # test for #1254 seqs = [ Sequence('ACGT'), Sequence('ACGA', metadata={'id': 'seq1'}), Sequence('AAAA', metadata={'id': 'seq2'}), Sequence('AAAA', positional_metadata={'qual': range(4)}) ] exp = DissimilarityMatrix( [[0, 0.25, 0.75, 0.75], [0.25, 0.0, 0.5, 0.5], [0.75, 0.5, 0.0, 0.0], [0.75, 0.5, 0.0, 0.0]], ['a', 'b', 'c', 'd']) dm = DissimilarityMatrix.from_iterable( seqs, metric=scipy.spatial.distance.hamming, keys=['a', 'b', 'c', 'd']) self.assertEqual(dm, exp) def test_from_iterable_skbio_hamming_metric_with_metadata(self): # test for #1254 seqs = [ Sequence('ACGT'), Sequence('ACGA', metadata={'id': 'seq1'}), Sequence('AAAA', metadata={'id': 'seq2'}), Sequence('AAAA', positional_metadata={'qual': range(4)}) ] exp = DissimilarityMatrix( [[0, 0.25, 0.75, 0.75], [0.25, 0.0, 0.5, 0.5], [0.75, 0.5, 0.0, 0.0], [0.75, 0.5, 0.0, 0.0]], ['a', 'b', 'c', 'd']) dm = DissimilarityMatrix.from_iterable( seqs, metric=skbio.sequence.distance.hamming, keys=['a', 'b', 'c', 'd']) self.assertEqual(dm, exp) def test_data(self): for dm, exp in zip(self.dms, self.dm_redundant_forms): obs = dm.data self.assertTrue(np.array_equal(obs, exp)) with self.assertRaises(AttributeError): self.dm_3x3.data = 'foo' def test_ids(self): obs = self.dm_3x3.ids self.assertEqual(obs, ('a', 'b', 'c')) # Test that we overwrite the existing IDs and that the ID index is # correctly rebuilt. new_ids = ['foo', 'bar', 'baz'] self.dm_3x3.ids = new_ids obs = self.dm_3x3.ids self.assertEqual(obs, tuple(new_ids)) self.assertTrue( np.array_equal(self.dm_3x3['bar'], np.array([0.01, 0.0, 12.0]))) with self.assertRaises(MissingIDError): self.dm_3x3['b'] def test_ids_invalid_input(self): with self.assertRaises(DissimilarityMatrixError): self.dm_3x3.ids = ['foo', 'bar'] # Make sure that we can still use the dissimilarity matrix after trying # to be evil. obs = self.dm_3x3.ids self.assertEqual(obs, ('a', 'b', 'c')) def test_dtype(self): for dm in self.dms: self.assertEqual(dm.dtype, np.float64) def test_shape(self): for dm, shape in zip(self.dms, self.dm_shapes): self.assertEqual(dm.shape, shape) def test_size(self): for dm, size in zip(self.dms, self.dm_sizes): self.assertEqual(dm.size, size) def test_transpose(self): for dm, transpose in zip(self.dms, self.dm_transposes): self.assertEqual(dm.T, transpose) self.assertEqual(dm.transpose(), transpose) # We should get a reference to a different object back, even if the # transpose is the same as the original. self.assertTrue(dm.transpose() is not dm) def test_index(self): self.assertEqual(self.dm_3x3.index('a'), 0) self.assertEqual(self.dm_3x3.index('b'), 1) self.assertEqual(self.dm_3x3.index('c'), 2) with self.assertRaises(MissingIDError): self.dm_3x3.index('d') with self.assertRaises(MissingIDError): self.dm_3x3.index(1) def test_redundant_form(self): for dm, redundant in zip(self.dms, self.dm_redundant_forms): obs = dm.redundant_form() self.assertTrue(np.array_equal(obs, redundant)) def test_copy(self): copy = self.dm_2x2.copy() self.assertEqual(copy, self.dm_2x2) self.assertFalse(copy.data is self.dm_2x2.data) # deepcopy doesn't actually create a copy of the IDs because it is a # tuple of strings, which is fully immutable. self.assertTrue(copy.ids is self.dm_2x2.ids) new_ids = ['hello', 'world'] copy.ids = new_ids self.assertNotEqual(copy.ids, self.dm_2x2.ids) copy = self.dm_2x2.copy() copy.data[0, 1] = 0.0001 self.assertFalse(np.array_equal(copy.data, self.dm_2x2.data)) def test_filter_no_filtering(self): # Don't actually filter anything -- ensure we get back a different # object. obs = self.dm_3x3.filter(['a', 'b', 'c']) self.assertEqual(obs, self.dm_3x3) self.assertFalse(obs is self.dm_3x3) def test_filter_reorder(self): # Don't filter anything, but reorder the distance matrix. order = ['c', 'a', 'b'] exp = DissimilarityMatrix( [[0, 4.2, 12], [4.2, 0, 0.01], [12, 0.01, 0]], order) obs = self.dm_3x3.filter(order) self.assertEqual(obs, exp) def test_filter_single_id(self): ids = ['b'] exp = DissimilarityMatrix([[0]], ids) obs = self.dm_2x2_asym.filter(ids) self.assertEqual(obs, exp) def test_filter_asymmetric(self): # 2x2 ids = ['b', 'a'] exp = DissimilarityMatrix([[0, -2], [1, 0]], ids) obs = self.dm_2x2_asym.filter(ids) self.assertEqual(obs, exp) # 3x3 dm = DissimilarityMatrix([[0, 10, 53], [42, 0, 22.5], [53, 1, 0]], ('bro', 'brah', 'breh')) ids = ['breh', 'brah'] exp = DissimilarityMatrix([[0, 1], [22.5, 0]], ids) obs = dm.filter(ids) self.assertEqual(obs, exp) def test_filter_subset(self): ids = ('c', 'a') exp = DissimilarityMatrix([[0, 4.2], [4.2, 0]], ids) obs = self.dm_3x3.filter(ids) self.assertEqual(obs, exp) ids = ('b', 'a') exp = DissimilarityMatrix([[0, 0.01], [0.01, 0]], ids) obs = self.dm_3x3.filter(ids) self.assertEqual(obs, exp) # 4x4 dm = DissimilarityMatrix([[0, 1, 55, 7], [1, 0, 16, 1], [55, 16, 0, 23], [7, 1, 23, 0]]) ids = np.asarray(['3', '0', '1']) exp = DissimilarityMatrix([[0, 7, 1], [7, 0, 1], [1, 1, 0]], ids) obs = dm.filter(ids) self.assertEqual(obs, exp) def test_filter_duplicate_ids(self): with self.assertRaises(DissimilarityMatrixError): self.dm_3x3.filter(['c', 'a', 'c']) def test_filter_missing_ids(self): with self.assertRaises(MissingIDError): self.dm_3x3.filter(['c', 'bro']) def test_filter_missing_ids_strict_false(self): # no exception should be raised ids = ('c', 'a') exp = DissimilarityMatrix([[0, 4.2], [4.2, 0]], ids) obs = self.dm_3x3.filter(['c', 'a', 'not found'], strict=False) self.assertEqual(obs, exp) def test_filter_empty_ids(self): with self.assertRaises(DissimilarityMatrixError): self.dm_3x3.filter([]) def test_plot_default(self): fig = self.dm_1x1.plot() self.assertIsInstance(fig, mpl.figure.Figure) axes = fig.get_axes() self.assertEqual(len(axes), 2) ax = axes[0] self.assertEqual(ax.get_title(), '') xticks = [] for tick in ax.get_xticklabels(): xticks.append(tick.get_text()) self.assertEqual(xticks, ['a']) yticks = [] for tick in ax.get_yticklabels(): yticks.append(tick.get_text()) self.assertEqual(yticks, ['a']) def test_plot_no_default(self): ids = ['0', 'one', '2', 'three', '4.000'] data = ([0, 1, 2, 3, 4], [1, 0, 1, 2, 3], [2, 1, 0, 1, 2], [3, 2, 1, 0, 1], [4, 3, 2, 1, 0]) dm = DissimilarityMatrix(data, ids) fig = dm.plot(cmap='Reds', title='Testplot') self.assertIsInstance(fig, mpl.figure.Figure) axes = fig.get_axes() self.assertEqual(len(axes), 2) ax = axes[0] self.assertEqual(ax.get_title(), 'Testplot') xticks = [] for tick in ax.get_xticklabels(): xticks.append(tick.get_text()) self.assertEqual(xticks, ['0', 'one', '2', 'three', '4.000']) yticks = [] for tick in ax.get_yticklabels(): yticks.append(tick.get_text()) self.assertEqual(yticks, ['0', 'one', '2', 'three', '4.000']) def test_repr_png(self): dm = self.dm_1x1 obs = dm._repr_png_() self.assertIsInstance(obs, bytes) self.assertTrue(len(obs) > 0) def test_repr_svg(self): obs = self.dm_1x1._repr_svg_() self.assertIsInstance(obs, str) self.assertTrue(len(obs) > 0) def test_png(self): dm = self.dm_1x1 self.assertIsInstance(dm.png, Image) def test_svg(self): dm = self.dm_1x1 self.assertIsInstance(dm.svg, SVG) def test_to_data_frame_1x1(self): df = self.dm_1x1.to_data_frame() exp = pd.DataFrame([[0.0]], index=['a'], columns=['a']) assert_data_frame_almost_equal(df, exp) def test_to_data_frame_3x3(self): df = self.dm_3x3.to_data_frame() exp = pd.DataFrame( [[0.0, 0.01, 4.2], [0.01, 0.0, 12.0], [4.2, 12.0, 0.0]], index=['a', 'b', 'c'], columns=['a', 'b', 'c']) assert_data_frame_almost_equal(df, exp) def test_to_data_frame_default_ids(self): df = DissimilarityMatrix(self.dm_2x2_data).to_data_frame() exp = pd.DataFrame([[0.0, 0.123], [0.123, 0.0]], index=['0', '1'], columns=['0', '1']) assert_data_frame_almost_equal(df, exp) def test_str(self): for dm in self.dms: obs = str(dm) # Do some very light testing here to make sure we're getting a # non-empty string back. We don't want to test the exact # formatting. self.assertTrue(obs) def test_eq(self): for dm in self.dms: copy = dm.copy() self.assertTrue(dm == dm) self.assertTrue(copy == copy) self.assertTrue(dm == copy) self.assertTrue(copy == dm) self.assertFalse(self.dm_1x1 == self.dm_3x3) def test_ne(self): # Wrong class. self.assertTrue(self.dm_3x3 != 'foo') # Wrong shape. self.assertTrue(self.dm_3x3 != self.dm_1x1) # Wrong IDs. other = self.dm_3x3.copy() other.ids = ['foo', 'bar', 'baz'] self.assertTrue(self.dm_3x3 != other) # Wrong data. other = self.dm_3x3.copy() other.data[1, 0] = 42.42 self.assertTrue(self.dm_3x3 != other) self.assertFalse(self.dm_2x2 != self.dm_2x2) def test_contains(self): self.assertTrue('a' in self.dm_3x3) self.assertTrue('b' in self.dm_3x3) self.assertTrue('c' in self.dm_3x3) self.assertFalse('d' in self.dm_3x3) def test_getslice(self): # Slice of first dimension only. Test that __getslice__ defers to # __getitem__. obs = self.dm_2x2[1:] self.assertTrue(np.array_equal(obs, np.array([[0.123, 0.0]]))) self.assertEqual(type(obs), np.ndarray) def test_getitem_by_id(self): obs = self.dm_1x1['a'] self.assertTrue(np.array_equal(obs, np.array([0.0]))) obs = self.dm_2x2_asym['b'] self.assertTrue(np.array_equal(obs, np.array([-2.0, 0.0]))) obs = self.dm_3x3['c'] self.assertTrue(np.array_equal(obs, np.array([4.2, 12.0, 0.0]))) with self.assertRaises(MissingIDError): self.dm_2x2['c'] def test_getitem_by_id_pair(self): # Same object. self.assertEqual(self.dm_1x1['a', 'a'], 0.0) # Different objects (symmetric). self.assertEqual(self.dm_3x3['b', 'c'], 12.0) self.assertEqual(self.dm_3x3['c', 'b'], 12.0) # Different objects (asymmetric). self.assertEqual(self.dm_2x2_asym['a', 'b'], 1.0) self.assertEqual(self.dm_2x2_asym['b', 'a'], -2.0) with self.assertRaises(MissingIDError): self.dm_2x2['a', 'c'] def test_getitem_ndarray_indexing(self): # Single element access. obs = self.dm_3x3[0, 1] self.assertEqual(obs, 0.01) # Single element access (via two __getitem__ calls). obs = self.dm_3x3[0][1] self.assertEqual(obs, 0.01) # Row access. obs = self.dm_3x3[1] self.assertTrue(np.array_equal(obs, np.array([0.01, 0.0, 12.0]))) self.assertEqual(type(obs), np.ndarray) # Grab all data. obs = self.dm_3x3[:, :] self.assertTrue(np.array_equal(obs, self.dm_3x3.data)) self.assertEqual(type(obs), np.ndarray) with self.assertRaises(IndexError): self.dm_3x3[:, 3] def test_validate_invalid_dtype(self): with self.assertRaises(DissimilarityMatrixError): self.dm_3x3._validate(np.array([[0, 42], [42, 0]]), ['a', 'b']) def test_validate_invalid_shape(self): # first check it actually likes good matrices self.dm_3x3._validate_shape(np.array([[0., 42.], [42., 0.]])) # it checks just the shape, not the content self.dm_3x3._validate_shape(np.array([[1., 2.], [3., 4.]])) # empty array with self.assertRaises(DissimilarityMatrixError): self.dm_3x3._validate_shape(np.array([])) # invalid shape with self.assertRaises(DissimilarityMatrixError): self.dm_3x3._validate_shape( np.array([[0., 42.], [42., 0.], [22., 22.]])) with self.assertRaises(DissimilarityMatrixError): self.dm_3x3._validate_shape( np.array([[[0., 42.], [42., 0.]], [[0., 24.], [24., 0.]]])) def test_validate_invalid_ids(self): # repeated ids with self.assertRaises(DissimilarityMatrixError): self.dm_3x3._validate_ids(self.dm_3x3.data, ['a', 'a']) # empty ids with self.assertRaises(DissimilarityMatrixError): self.dm_3x3._validate_ids(self.dm_3x3.data, []) # invalid shape with self.assertRaises(DissimilarityMatrixError): self.dm_3x3._validate_ids(self.dm_3x3.data, ['a', 'b', 'c', 'd'])
class DissimilarityMatrixTests(DissimilarityMatrixTestData): def setUp(self): super(DissimilarityMatrixTests, self).setUp() self.dm_1x1 = DissimilarityMatrix(self.dm_1x1_data, ['a']) self.dm_2x2 = DissimilarityMatrix(self.dm_2x2_data, ['a', 'b']) self.dm_2x2_asym = DissimilarityMatrix(self.dm_2x2_asym_data, ['a', 'b']) self.dm_3x3 = DissimilarityMatrix(self.dm_3x3_data, ['a', 'b', 'c']) self.dms = [self.dm_1x1, self.dm_2x2, self.dm_2x2_asym, self.dm_3x3] self.dm_f_lines = [ DM_1x1_F, DM_2x2_F, self.dm_2x2_asym_lines, self.dm_3x3_lines ] self.dm_fs = [ self.dm_1x1_f, self.dm_2x2_f, self.dm_2x2_asym_f, self.dm_3x3_f ] self.dm_shapes = [(1, 1), (2, 2), (2, 2), (3, 3)] self.dm_sizes = [1, 4, 4, 9] self.dm_transposes = [ self.dm_1x1, self.dm_2x2, DissimilarityMatrix([[0, -2], [1, 0]], ['a', 'b']), self.dm_3x3 ] self.dm_redundant_forms = [ np.array(self.dm_1x1_data), np.array(self.dm_2x2_data), np.array(self.dm_2x2_asym_data), np.array(self.dm_3x3_data) ] def test_round_trip_read_write(self): """Test reading, writing, and reading again works as expected.""" for dm_f in self.dm_fs: # Read. dm1 = DissimilarityMatrix.from_file(dm_f) # Write. out_f = StringIO() dm1.to_file(out_f) out_f.seek(0) # Read. dm2 = DissimilarityMatrix.from_file(out_f) self.assertEqual(dm1, dm2) def test_from_file(self): """Should parse and return a valid DissimilarityMatrix given a file.""" for dm_f, dm in zip(self.dm_fs, self.dms): obs = DissimilarityMatrix.from_file(dm_f) self.assertEqual(obs, dm) def test_from_file_with_file_path(self): """Should identify the filepath correctly and parse from it.""" # should fail with the expected exception with self.assertRaises(DissimilarityMatrixFormatError): DissimilarityMatrix.from_file(self.bad_dm_fp) obs = DissimilarityMatrix.from_file(self.dm_2x2_asym_fp) self.assertEqual(self.dm_2x2_asym, obs) self.assertTrue(isinstance(obs, DissimilarityMatrix)) obs = DissimilarityMatrix.from_file(self.dm_3x3_fp) self.assertEqual(self.dm_3x3, obs) self.assertTrue(isinstance(obs, DissimilarityMatrix)) def test_from_file_extra_junk(self): """Should correctly parse a file with extra whitespace and comments.""" obs = DissimilarityMatrix.from_file(self.dm_3x3_whitespace_f) self.assertEqual(obs, self.dm_3x3) def test_from_file_list_of_strings(self): """Should correctly parse a list of strings.""" obs = DissimilarityMatrix.from_file(DM_3x3_WHITESPACE_F) self.assertEqual(obs, self.dm_3x3) def test_from_file_real_file(self): """Should correctly parse a real on-disk file.""" with tempfile.TemporaryFile(mode='r+', prefix='skbio.tests.test_distance', suffix='.txt') as fh: fh.write('\n'.join(DM_3x3_WHITESPACE_F)) fh.seek(0) obs = DissimilarityMatrix.from_file(fh) self.assertEqual(obs, self.dm_3x3) def test_from_file_invalid_input(self): """Raises error on ill-formatted dissimilarity matrix file.""" # Empty dm. with self.assertRaises(DissimilarityMatrixFormatError): DissimilarityMatrix.from_file([]) # Number of values don't match number of IDs. with self.assertRaises(DissimilarityMatrixFormatError): DissimilarityMatrix.from_file(self.bad_dm_f1) # Mismatched IDs. with self.assertRaises(DissimilarityMatrixFormatError): DissimilarityMatrix.from_file(self.bad_dm_f2) # Extra data at end. with self.assertRaises(DissimilarityMatrixFormatError): DissimilarityMatrix.from_file(self.bad_dm_f3) # Missing data. with self.assertRaises(DissimilarityMatrixFormatError): DissimilarityMatrix.from_file(self.bad_dm_f4) # Header, but no data. with self.assertRaises(DissimilarityMatrixFormatError): DissimilarityMatrix.from_file(self.bad_dm_f5) # Non-hollow. with self.assertRaises(DissimilarityMatrixError): DissimilarityMatrix.from_file(self.bad_dm_f6) def test_to_file(self): """Should serialize a DissimilarityMatrix to file.""" for dm_f_line, dm in zip(self.dm_f_lines, self.dms): for file_type in ('file like', 'file name'): if file_type == 'file like': obs_f = StringIO() dm.to_file(obs_f) obs = obs_f.getvalue() obs_f.close() elif file_type == 'file name': with tempfile.NamedTemporaryFile('r+') as temp_file: dm.to_file(temp_file.name) temp_file.flush() temp_file.seek(0) obs = temp_file.read() self.assertEqual(obs, dm_f_line) def test_init_from_dm(self): """Constructs a dm from a dm.""" ids = ['foo', 'bar', 'baz'] # DissimilarityMatrix -> DissimilarityMatrix exp = DissimilarityMatrix(self.dm_3x3_data, ids) obs = DissimilarityMatrix(self.dm_3x3, ids) self.assertEqual(obs, exp) # Test that copy of data is not made. self.assertTrue(obs.data is self.dm_3x3.data) obs.data[0, 1] = 424242 self.assertTrue(np.array_equal(obs.data, self.dm_3x3.data)) # DistanceMatrix -> DissimilarityMatrix exp = DissimilarityMatrix(self.dm_3x3_data, ids) obs = DissimilarityMatrix( DistanceMatrix(self.dm_3x3_data, ('a', 'b', 'c')), ids) self.assertEqual(obs, exp) # DissimilarityMatrix -> DistanceMatrix with self.assertRaises(DistanceMatrixError): DistanceMatrix(self.dm_2x2_asym, ['foo', 'bar']) def test_init_no_ids(self): exp = DissimilarityMatrix(self.dm_3x3_data, ('0', '1', '2')) obs = DissimilarityMatrix(self.dm_3x3_data) self.assertEqual(obs, exp) self.assertEqual(obs['1', '2'], 12.0) def test_init_invalid_input(self): """Raises error on invalid dissimilarity matrix data / IDs.""" # Empty data. with self.assertRaises(DissimilarityMatrixError): DissimilarityMatrix([], []) # Another type of empty data. with self.assertRaises(DissimilarityMatrixError): DissimilarityMatrix(np.empty((0, 0)), []) # Invalid number of dimensions. with self.assertRaises(DissimilarityMatrixError): DissimilarityMatrix([1, 2, 3], ['a']) # Dimensions don't match. with self.assertRaises(DissimilarityMatrixError): DissimilarityMatrix([[1, 2, 3]], ['a']) data = [[0, 1], [1, 0]] # Duplicate IDs. with self.assertRaises(DissimilarityMatrixError): DissimilarityMatrix(data, ['a', 'a']) # Number of IDs don't match dimensions. with self.assertRaises(DissimilarityMatrixError): DissimilarityMatrix(data, ['a', 'b', 'c']) # Non-hollow. data = [[0.0, 1.0], [1.0, 0.01]] with self.assertRaises(DissimilarityMatrixError): DissimilarityMatrix(data, ['a', 'b']) def test_data(self): """Test retrieving/setting data matrix.""" for dm, exp in zip(self.dms, self.dm_redundant_forms): obs = dm.data self.assertTrue(np.array_equal(obs, exp)) with self.assertRaises(AttributeError): self.dm_3x3.data = 'foo' def test_ids(self): """Test retrieving/setting IDs.""" obs = self.dm_3x3.ids self.assertEqual(obs, ('a', 'b', 'c')) # Test that we overwrite the existing IDs and that the ID index is # correctly rebuilt. new_ids = ['foo', 'bar', 'baz'] self.dm_3x3.ids = new_ids obs = self.dm_3x3.ids self.assertEqual(obs, tuple(new_ids)) self.assertTrue( np.array_equal(self.dm_3x3['bar'], np.array([0.01, 0.0, 12.0]))) with self.assertRaises(MissingIDError): self.dm_3x3['b'] def test_ids_invalid_input(self): """Test setting invalid IDs raises an error.""" with self.assertRaises(DissimilarityMatrixError): self.dm_3x3.ids = ['foo', 'bar'] # Make sure that we can still use the dissimilarity matrix after trying # to be evil. obs = self.dm_3x3.ids self.assertEqual(obs, ('a', 'b', 'c')) def test_dtype(self): """Test retrieving dtype of data matrix.""" for dm in self.dms: self.assertEqual(dm.dtype, np.float64) def test_shape(self): """Test retrieving shape of data matrix.""" for dm, shape in zip(self.dms, self.dm_shapes): self.assertEqual(dm.shape, shape) def test_size(self): """Test retrieving size of data matrix.""" for dm, size in zip(self.dms, self.dm_sizes): self.assertEqual(dm.size, size) def test_transpose(self): """Test retrieving transpose of dissimilarity matrix.""" for dm, transpose in zip(self.dms, self.dm_transposes): self.assertEqual(dm.T, transpose) self.assertEqual(dm.transpose(), transpose) # We should get a reference to a different object back, even if the # transpose is the same as the original. self.assertTrue(dm.transpose() is not dm) def test_index(self): self.assertEqual(self.dm_3x3.index('a'), 0) self.assertEqual(self.dm_3x3.index('b'), 1) self.assertEqual(self.dm_3x3.index('c'), 2) with self.assertRaises(MissingIDError): self.dm_3x3.index('d') with self.assertRaises(MissingIDError): self.dm_3x3.index(1) def test_redundant_form(self): """Test retrieving the data matrix in redundant form.""" for dm, redundant in zip(self.dms, self.dm_redundant_forms): obs = dm.redundant_form() self.assertTrue(np.array_equal(obs, redundant)) def test_copy(self): """Test correct copying of a DissimilarityMatrix.""" copy = self.dm_2x2.copy() self.assertEqual(copy, self.dm_2x2) self.assertFalse(copy.data is self.dm_2x2.data) # deepcopy doesn't actually create a copy of the IDs because it is a # tuple of strings, which is fully immutable. self.assertTrue(copy.ids is self.dm_2x2.ids) new_ids = ['hello', 'world'] copy.ids = new_ids self.assertNotEqual(copy.ids, self.dm_2x2.ids) copy = self.dm_2x2.copy() copy.data[0, 1] = 0.0001 self.assertFalse(np.array_equal(copy.data, self.dm_2x2.data)) def test_filter_no_filtering(self): # Don't actually filter anything -- ensure we get back a different # object. obs = self.dm_3x3.filter(['a', 'b', 'c']) self.assertEqual(obs, self.dm_3x3) self.assertFalse(obs is self.dm_3x3) def test_filter_reorder(self): # Don't filter anything, but reorder the distance matrix. order = ['c', 'a', 'b'] exp = DissimilarityMatrix( [[0, 4.2, 12], [4.2, 0, 0.01], [12, 0.01, 0]], order) obs = self.dm_3x3.filter(order) self.assertEqual(obs, exp) def test_filter_single_id(self): ids = ['b'] exp = DissimilarityMatrix([[0]], ids) obs = self.dm_2x2_asym.filter(ids) self.assertEqual(obs, exp) def test_filter_asymmetric(self): # 2x2 ids = ['b', 'a'] exp = DissimilarityMatrix([[0, -2], [1, 0]], ids) obs = self.dm_2x2_asym.filter(ids) self.assertEqual(obs, exp) # 3x3 dm = DissimilarityMatrix([[0, 10, 53], [42, 0, 22.5], [53, 1, 0]], ('bro', 'brah', 'breh')) ids = ['breh', 'brah'] exp = DissimilarityMatrix([[0, 1], [22.5, 0]], ids) obs = dm.filter(ids) self.assertEqual(obs, exp) def test_filter_subset(self): ids = ('c', 'a') exp = DissimilarityMatrix([[0, 4.2], [4.2, 0]], ids) obs = self.dm_3x3.filter(ids) self.assertEqual(obs, exp) ids = ('b', 'a') exp = DissimilarityMatrix([[0, 0.01], [0.01, 0]], ids) obs = self.dm_3x3.filter(ids) self.assertEqual(obs, exp) # 4x4 dm = DissimilarityMatrix([[0, 1, 55, 7], [1, 0, 16, 1], [55, 16, 0, 23], [7, 1, 23, 0]]) ids = np.asarray(['3', '0', '1']) exp = DissimilarityMatrix([[0, 7, 1], [7, 0, 1], [1, 1, 0]], ids) obs = dm.filter(ids) self.assertEqual(obs, exp) def test_filter_duplicate_ids(self): with self.assertRaises(DissimilarityMatrixError): self.dm_3x3.filter(['c', 'a', 'c']) def test_filter_missing_ids(self): with self.assertRaises(MissingIDError): self.dm_3x3.filter(['c', 'bro']) def test_filter_missing_ids_strict_false(self): # no exception should be raised ids = ('c', 'a') exp = DissimilarityMatrix([[0, 4.2], [4.2, 0]], ids) obs = self.dm_3x3.filter(['c', 'a', 'not found'], strict=False) self.assertEqual(obs, exp) def test_filter_empty_ids(self): with self.assertRaises(DissimilarityMatrixError): self.dm_3x3.filter([]) def test_str(self): """Test retrieving string representation of a DissimilarityMatrix.""" for dm in self.dms: obs = str(dm) # Do some very light testing here to make sure we're getting a # non-empty string back. We don't want to test the exact # formatting. self.assertTrue(obs) def test_eq(self): """DissimilarityMatrix equality test functions correctly.""" for dm in self.dms: copy = dm.copy() self.assertTrue(dm == dm) self.assertTrue(copy == copy) self.assertTrue(dm == copy) self.assertTrue(copy == dm) self.assertFalse(self.dm_1x1 == self.dm_3x3) def test_ne(self): """Test unequal dms are identified as such.""" # Wrong class. self.assertTrue(self.dm_3x3 != 'foo') # Wrong shape. self.assertTrue(self.dm_3x3 != self.dm_1x1) # Wrong IDs. other = self.dm_3x3.copy() other.ids = ['foo', 'bar', 'baz'] self.assertTrue(self.dm_3x3 != other) # Wrong data. other = self.dm_3x3.copy() other.data[1, 0] = 42.42 self.assertTrue(self.dm_3x3 != other) self.assertFalse(self.dm_2x2 != self.dm_2x2) def test_contains(self): self.assertTrue('a' in self.dm_3x3) self.assertTrue('b' in self.dm_3x3) self.assertTrue('c' in self.dm_3x3) self.assertFalse('d' in self.dm_3x3) def test_getslice(self): """Test that __getslice__ defers to __getitem__.""" # Slice of first dimension only. obs = self.dm_2x2[1:] self.assertTrue(np.array_equal(obs, np.array([[0.123, 0.0]]))) self.assertEqual(type(obs), np.ndarray) def test_getitem_by_id(self): """Test retrieving row vectors by ID.""" obs = self.dm_1x1['a'] self.assertTrue(np.array_equal(obs, np.array([0.0]))) obs = self.dm_2x2_asym['b'] self.assertTrue(np.array_equal(obs, np.array([-2.0, 0.0]))) obs = self.dm_3x3['c'] self.assertTrue(np.array_equal(obs, np.array([4.2, 12.0, 0.0]))) with self.assertRaises(MissingIDError): self.dm_2x2['c'] def test_getitem_by_id_pair(self): """Test retrieving elements by ID pair.""" # Same object. self.assertEqual(self.dm_1x1['a', 'a'], 0.0) # Different objects (symmetric). self.assertEqual(self.dm_3x3['b', 'c'], 12.0) self.assertEqual(self.dm_3x3['c', 'b'], 12.0) # Different objects (asymmetric). self.assertEqual(self.dm_2x2_asym['a', 'b'], 1.0) self.assertEqual(self.dm_2x2_asym['b', 'a'], -2.0) with self.assertRaises(MissingIDError): self.dm_2x2['a', 'c'] def test_getitem_ndarray_indexing(self): """Test __getitem__ delegates to underlying ndarray.""" # Single element access. obs = self.dm_3x3[0, 1] self.assertEqual(obs, 0.01) # Single element access (via two __getitem__ calls). obs = self.dm_3x3[0][1] self.assertEqual(obs, 0.01) # Row access. obs = self.dm_3x3[1] self.assertTrue(np.array_equal(obs, np.array([0.01, 0.0, 12.0]))) self.assertEqual(type(obs), np.ndarray) # Grab all data. obs = self.dm_3x3[:, :] self.assertTrue(np.array_equal(obs, self.dm_3x3.data)) self.assertEqual(type(obs), np.ndarray) with self.assertRaises(IndexError): self.dm_3x3[:, 3] def test_parse_ids(self): """Empty stub: DissimilarityMatrix._parse_ids tested elsewhere.""" pass def test_validate_invalid_dtype(self): with self.assertRaises(DissimilarityMatrixError): self.dm_3x3._validate(np.array([[0, 42], [42, 0]]), ['a', 'b']) def test_index_list(self): """Empty stub: DissimilarityMatrix._index_list tested elsewhere.""" pass def test_is_id_pair(self): """Empty stub: DissimilarityMatrix._is_id_pair tested elsewhere.""" pass def test_format_ids(self): """Empty stub: DissimilarityMatrix._format_ids tested elsewhere.""" pass def test_pprint_ids(self): """Test pretty-print formatting of IDs.""" # No truncation. exp = 'a, b, c' obs = self.dm_3x3._pprint_ids() self.assertEqual(obs, exp) # Truncation. exp = 'a, b, ...' obs = self.dm_3x3._pprint_ids(max_chars=5) self.assertEqual(obs, exp)
class DissimilarityMatrixTests(DissimilarityMatrixTestData): def setUp(self): super(DissimilarityMatrixTests, self).setUp() self.dm_1x1 = DissimilarityMatrix(self.dm_1x1_data, ['a']) self.dm_2x2 = DissimilarityMatrix(self.dm_2x2_data, ['a', 'b']) self.dm_2x2_asym = DissimilarityMatrix(self.dm_2x2_asym_data, ['a', 'b']) self.dm_3x3 = DissimilarityMatrix(self.dm_3x3_data, ['a', 'b', 'c']) self.dms = [self.dm_1x1, self.dm_2x2, self.dm_2x2_asym, self.dm_3x3] self.dm_shapes = [(1, 1), (2, 2), (2, 2), (3, 3)] self.dm_sizes = [1, 4, 4, 9] self.dm_transposes = [ self.dm_1x1, self.dm_2x2, DissimilarityMatrix([[0, -2], [1, 0]], ['a', 'b']), self.dm_3x3 ] self.dm_redundant_forms = [ np.array(self.dm_1x1_data), np.array(self.dm_2x2_data), np.array(self.dm_2x2_asym_data), np.array(self.dm_3x3_data) ] def test_init_from_dm(self): ids = ['foo', 'bar', 'baz'] # DissimilarityMatrix -> DissimilarityMatrix exp = DissimilarityMatrix(self.dm_3x3_data, ids) obs = DissimilarityMatrix(self.dm_3x3, ids) self.assertEqual(obs, exp) # Test that copy of data is not made. self.assertTrue(obs.data is self.dm_3x3.data) obs.data[0, 1] = 424242 self.assertTrue(np.array_equal(obs.data, self.dm_3x3.data)) # DistanceMatrix -> DissimilarityMatrix exp = DissimilarityMatrix(self.dm_3x3_data, ids) obs = DissimilarityMatrix( DistanceMatrix(self.dm_3x3_data, ('a', 'b', 'c')), ids) self.assertEqual(obs, exp) # DissimilarityMatrix -> DistanceMatrix with self.assertRaises(DistanceMatrixError): DistanceMatrix(self.dm_2x2_asym, ['foo', 'bar']) def test_init_no_ids(self): exp = DissimilarityMatrix(self.dm_3x3_data, ('0', '1', '2')) obs = DissimilarityMatrix(self.dm_3x3_data) self.assertEqual(obs, exp) self.assertEqual(obs['1', '2'], 12.0) def test_init_invalid_input(self): # Empty data. with self.assertRaises(DissimilarityMatrixError): DissimilarityMatrix([], []) # Another type of empty data. with self.assertRaises(DissimilarityMatrixError): DissimilarityMatrix(np.empty((0, 0)), []) # Invalid number of dimensions. with self.assertRaises(DissimilarityMatrixError): DissimilarityMatrix([1, 2, 3], ['a']) # Dimensions don't match. with self.assertRaises(DissimilarityMatrixError): DissimilarityMatrix([[1, 2, 3]], ['a']) data = [[0, 1], [1, 0]] # Duplicate IDs. with self.assertRaises(DissimilarityMatrixError): DissimilarityMatrix(data, ['a', 'a']) # Number of IDs don't match dimensions. with self.assertRaises(DissimilarityMatrixError): DissimilarityMatrix(data, ['a', 'b', 'c']) with self.assertRaises(DissimilarityMatrixError): DissimilarityMatrix(data, []) # Non-hollow. data = [[0.0, 1.0], [1.0, 0.01]] with self.assertRaises(DissimilarityMatrixError): DissimilarityMatrix(data, ['a', 'b']) def test_data(self): for dm, exp in zip(self.dms, self.dm_redundant_forms): obs = dm.data self.assertTrue(np.array_equal(obs, exp)) with self.assertRaises(AttributeError): self.dm_3x3.data = 'foo' def test_ids(self): obs = self.dm_3x3.ids self.assertEqual(obs, ('a', 'b', 'c')) # Test that we overwrite the existing IDs and that the ID index is # correctly rebuilt. new_ids = ['foo', 'bar', 'baz'] self.dm_3x3.ids = new_ids obs = self.dm_3x3.ids self.assertEqual(obs, tuple(new_ids)) self.assertTrue( np.array_equal(self.dm_3x3['bar'], np.array([0.01, 0.0, 12.0]))) with self.assertRaises(MissingIDError): self.dm_3x3['b'] def test_ids_invalid_input(self): with self.assertRaises(DissimilarityMatrixError): self.dm_3x3.ids = ['foo', 'bar'] # Make sure that we can still use the dissimilarity matrix after trying # to be evil. obs = self.dm_3x3.ids self.assertEqual(obs, ('a', 'b', 'c')) def test_dtype(self): for dm in self.dms: self.assertEqual(dm.dtype, np.float64) def test_shape(self): for dm, shape in zip(self.dms, self.dm_shapes): self.assertEqual(dm.shape, shape) def test_size(self): for dm, size in zip(self.dms, self.dm_sizes): self.assertEqual(dm.size, size) def test_transpose(self): for dm, transpose in zip(self.dms, self.dm_transposes): self.assertEqual(dm.T, transpose) self.assertEqual(dm.transpose(), transpose) # We should get a reference to a different object back, even if the # transpose is the same as the original. self.assertTrue(dm.transpose() is not dm) def test_index(self): self.assertEqual(self.dm_3x3.index('a'), 0) self.assertEqual(self.dm_3x3.index('b'), 1) self.assertEqual(self.dm_3x3.index('c'), 2) with self.assertRaises(MissingIDError): self.dm_3x3.index('d') with self.assertRaises(MissingIDError): self.dm_3x3.index(1) def test_redundant_form(self): for dm, redundant in zip(self.dms, self.dm_redundant_forms): obs = dm.redundant_form() self.assertTrue(np.array_equal(obs, redundant)) def test_copy(self): copy = self.dm_2x2.copy() self.assertEqual(copy, self.dm_2x2) self.assertFalse(copy.data is self.dm_2x2.data) # deepcopy doesn't actually create a copy of the IDs because it is a # tuple of strings, which is fully immutable. self.assertTrue(copy.ids is self.dm_2x2.ids) new_ids = ['hello', 'world'] copy.ids = new_ids self.assertNotEqual(copy.ids, self.dm_2x2.ids) copy = self.dm_2x2.copy() copy.data[0, 1] = 0.0001 self.assertFalse(np.array_equal(copy.data, self.dm_2x2.data)) def test_filter_no_filtering(self): # Don't actually filter anything -- ensure we get back a different # object. obs = self.dm_3x3.filter(['a', 'b', 'c']) self.assertEqual(obs, self.dm_3x3) self.assertFalse(obs is self.dm_3x3) def test_filter_reorder(self): # Don't filter anything, but reorder the distance matrix. order = ['c', 'a', 'b'] exp = DissimilarityMatrix( [[0, 4.2, 12], [4.2, 0, 0.01], [12, 0.01, 0]], order) obs = self.dm_3x3.filter(order) self.assertEqual(obs, exp) def test_filter_single_id(self): ids = ['b'] exp = DissimilarityMatrix([[0]], ids) obs = self.dm_2x2_asym.filter(ids) self.assertEqual(obs, exp) def test_filter_asymmetric(self): # 2x2 ids = ['b', 'a'] exp = DissimilarityMatrix([[0, -2], [1, 0]], ids) obs = self.dm_2x2_asym.filter(ids) self.assertEqual(obs, exp) # 3x3 dm = DissimilarityMatrix([[0, 10, 53], [42, 0, 22.5], [53, 1, 0]], ('bro', 'brah', 'breh')) ids = ['breh', 'brah'] exp = DissimilarityMatrix([[0, 1], [22.5, 0]], ids) obs = dm.filter(ids) self.assertEqual(obs, exp) def test_filter_subset(self): ids = ('c', 'a') exp = DissimilarityMatrix([[0, 4.2], [4.2, 0]], ids) obs = self.dm_3x3.filter(ids) self.assertEqual(obs, exp) ids = ('b', 'a') exp = DissimilarityMatrix([[0, 0.01], [0.01, 0]], ids) obs = self.dm_3x3.filter(ids) self.assertEqual(obs, exp) # 4x4 dm = DissimilarityMatrix([[0, 1, 55, 7], [1, 0, 16, 1], [55, 16, 0, 23], [7, 1, 23, 0]]) ids = np.asarray(['3', '0', '1']) exp = DissimilarityMatrix([[0, 7, 1], [7, 0, 1], [1, 1, 0]], ids) obs = dm.filter(ids) self.assertEqual(obs, exp) def test_filter_duplicate_ids(self): with self.assertRaises(DissimilarityMatrixError): self.dm_3x3.filter(['c', 'a', 'c']) def test_filter_missing_ids(self): with self.assertRaises(MissingIDError): self.dm_3x3.filter(['c', 'bro']) def test_filter_missing_ids_strict_false(self): # no exception should be raised ids = ('c', 'a') exp = DissimilarityMatrix([[0, 4.2], [4.2, 0]], ids) obs = self.dm_3x3.filter(['c', 'a', 'not found'], strict=False) self.assertEqual(obs, exp) def test_filter_empty_ids(self): with self.assertRaises(DissimilarityMatrixError): self.dm_3x3.filter([]) def test_plot_default(self): fig = self.dm_1x1.plot() self.assertIsInstance(fig, mpl.figure.Figure) axes = fig.get_axes() self.assertEqual(len(axes), 2) ax = axes[0] self.assertEqual(ax.get_title(), '') xticks = [] for tick in ax.get_xticklabels(): xticks.append(tick.get_text()) self.assertEqual(xticks, ['a']) yticks = [] for tick in ax.get_yticklabels(): yticks.append(tick.get_text()) self.assertEqual(yticks, ['a']) def test_plot_no_default(self): ids = ['0', 'one', '2', 'three', '4.000'] data = ([0, 1, 2, 3, 4], [1, 0, 1, 2, 3], [2, 1, 0, 1, 2], [3, 2, 1, 0, 1], [4, 3, 2, 1, 0]) dm = DissimilarityMatrix(data, ids) fig = dm.plot(cmap='Reds', title='Testplot') self.assertIsInstance(fig, mpl.figure.Figure) axes = fig.get_axes() self.assertEqual(len(axes), 2) ax = axes[0] self.assertEqual(ax.get_title(), 'Testplot') xticks = [] for tick in ax.get_xticklabels(): xticks.append(tick.get_text()) self.assertEqual(xticks, ['0', 'one', '2', 'three', '4.000']) yticks = [] for tick in ax.get_yticklabels(): yticks.append(tick.get_text()) self.assertEqual(yticks, ['0', 'one', '2', 'three', '4.000']) def test_repr_png(self): dm = self.dm_1x1 obs = dm._repr_png_() self.assertIsInstance(obs, binary_type) self.assertTrue(len(obs) > 0) def test_repr_svg(self): obs = self.dm_1x1._repr_svg_() # print_figure(format='svg') can return text or bytes depending on the # version of IPython self.assertTrue( isinstance(obs, text_type) or isinstance(obs, binary_type)) self.assertTrue(len(obs) > 0) def test_png(self): dm = self.dm_1x1 self.assertIsInstance(dm.png, Image) def test_svg(self): dm = self.dm_1x1 self.assertIsInstance(dm.svg, SVG) def test_to_data_frame_1x1(self): df = self.dm_1x1.to_data_frame() exp = pd.DataFrame([[0.0]], index=['a'], columns=['a']) assert_data_frame_almost_equal(df, exp) def test_to_data_frame_3x3(self): df = self.dm_3x3.to_data_frame() exp = pd.DataFrame( [[0.0, 0.01, 4.2], [0.01, 0.0, 12.0], [4.2, 12.0, 0.0]], index=['a', 'b', 'c'], columns=['a', 'b', 'c']) assert_data_frame_almost_equal(df, exp) def test_to_data_frame_default_ids(self): df = DissimilarityMatrix(self.dm_2x2_data).to_data_frame() exp = pd.DataFrame([[0.0, 0.123], [0.123, 0.0]], index=['0', '1'], columns=['0', '1']) assert_data_frame_almost_equal(df, exp) def test_str(self): for dm in self.dms: obs = str(dm) # Do some very light testing here to make sure we're getting a # non-empty string back. We don't want to test the exact # formatting. self.assertTrue(obs) def test_eq(self): for dm in self.dms: copy = dm.copy() self.assertTrue(dm == dm) self.assertTrue(copy == copy) self.assertTrue(dm == copy) self.assertTrue(copy == dm) self.assertFalse(self.dm_1x1 == self.dm_3x3) def test_ne(self): # Wrong class. self.assertTrue(self.dm_3x3 != 'foo') # Wrong shape. self.assertTrue(self.dm_3x3 != self.dm_1x1) # Wrong IDs. other = self.dm_3x3.copy() other.ids = ['foo', 'bar', 'baz'] self.assertTrue(self.dm_3x3 != other) # Wrong data. other = self.dm_3x3.copy() other.data[1, 0] = 42.42 self.assertTrue(self.dm_3x3 != other) self.assertFalse(self.dm_2x2 != self.dm_2x2) def test_contains(self): self.assertTrue('a' in self.dm_3x3) self.assertTrue('b' in self.dm_3x3) self.assertTrue('c' in self.dm_3x3) self.assertFalse('d' in self.dm_3x3) def test_getslice(self): # Slice of first dimension only. Test that __getslice__ defers to # __getitem__. obs = self.dm_2x2[1:] self.assertTrue(np.array_equal(obs, np.array([[0.123, 0.0]]))) self.assertEqual(type(obs), np.ndarray) def test_getitem_by_id(self): obs = self.dm_1x1['a'] self.assertTrue(np.array_equal(obs, np.array([0.0]))) obs = self.dm_2x2_asym['b'] self.assertTrue(np.array_equal(obs, np.array([-2.0, 0.0]))) obs = self.dm_3x3['c'] self.assertTrue(np.array_equal(obs, np.array([4.2, 12.0, 0.0]))) with self.assertRaises(MissingIDError): self.dm_2x2['c'] def test_getitem_by_id_pair(self): # Same object. self.assertEqual(self.dm_1x1['a', 'a'], 0.0) # Different objects (symmetric). self.assertEqual(self.dm_3x3['b', 'c'], 12.0) self.assertEqual(self.dm_3x3['c', 'b'], 12.0) # Different objects (asymmetric). self.assertEqual(self.dm_2x2_asym['a', 'b'], 1.0) self.assertEqual(self.dm_2x2_asym['b', 'a'], -2.0) with self.assertRaises(MissingIDError): self.dm_2x2['a', 'c'] def test_getitem_ndarray_indexing(self): # Single element access. obs = self.dm_3x3[0, 1] self.assertEqual(obs, 0.01) # Single element access (via two __getitem__ calls). obs = self.dm_3x3[0][1] self.assertEqual(obs, 0.01) # Row access. obs = self.dm_3x3[1] self.assertTrue(np.array_equal(obs, np.array([0.01, 0.0, 12.0]))) self.assertEqual(type(obs), np.ndarray) # Grab all data. obs = self.dm_3x3[:, :] self.assertTrue(np.array_equal(obs, self.dm_3x3.data)) self.assertEqual(type(obs), np.ndarray) with self.assertRaises(IndexError): self.dm_3x3[:, 3] def test_validate_invalid_dtype(self): with self.assertRaises(DissimilarityMatrixError): self.dm_3x3._validate(np.array([[0, 42], [42, 0]]), ['a', 'b'])
class DissimilarityMatrixTests(DissimilarityMatrixTestData): def setUp(self): super(DissimilarityMatrixTests, self).setUp() self.dm_1x1 = DissimilarityMatrix(self.dm_1x1_data, ['a']) self.dm_2x2 = DissimilarityMatrix(self.dm_2x2_data, ['a', 'b']) self.dm_2x2_asym = DissimilarityMatrix(self.dm_2x2_asym_data, ['a', 'b']) self.dm_3x3 = DissimilarityMatrix(self.dm_3x3_data, ['a', 'b', 'c']) self.dms = [self.dm_1x1, self.dm_2x2, self.dm_2x2_asym, self.dm_3x3] self.dm_shapes = [(1, 1), (2, 2), (2, 2), (3, 3)] self.dm_sizes = [1, 4, 4, 9] self.dm_transposes = [ self.dm_1x1, self.dm_2x2, DissimilarityMatrix([[0, -2], [1, 0]], ['a', 'b']), self.dm_3x3 ] self.dm_redundant_forms = [ np.array(self.dm_1x1_data), np.array(self.dm_2x2_data), np.array(self.dm_2x2_asym_data), np.array(self.dm_3x3_data) ] def test_deprecated_io(self): fh = StringIO() npt.assert_warns(UserWarning, self.dm_3x3.to_file, fh) fh.seek(0) deserialized = npt.assert_warns(UserWarning, DissimilarityMatrix.from_file, fh) self.assertEqual(deserialized, self.dm_3x3) self.assertTrue(type(deserialized) == DissimilarityMatrix) def test_init_from_dm(self): ids = ['foo', 'bar', 'baz'] # DissimilarityMatrix -> DissimilarityMatrix exp = DissimilarityMatrix(self.dm_3x3_data, ids) obs = DissimilarityMatrix(self.dm_3x3, ids) self.assertEqual(obs, exp) # Test that copy of data is not made. self.assertTrue(obs.data is self.dm_3x3.data) obs.data[0, 1] = 424242 self.assertTrue(np.array_equal(obs.data, self.dm_3x3.data)) # DistanceMatrix -> DissimilarityMatrix exp = DissimilarityMatrix(self.dm_3x3_data, ids) obs = DissimilarityMatrix( DistanceMatrix(self.dm_3x3_data, ('a', 'b', 'c')), ids) self.assertEqual(obs, exp) # DissimilarityMatrix -> DistanceMatrix with self.assertRaises(DistanceMatrixError): DistanceMatrix(self.dm_2x2_asym, ['foo', 'bar']) def test_init_no_ids(self): exp = DissimilarityMatrix(self.dm_3x3_data, ('0', '1', '2')) obs = DissimilarityMatrix(self.dm_3x3_data) self.assertEqual(obs, exp) self.assertEqual(obs['1', '2'], 12.0) def test_init_invalid_input(self): # Empty data. with self.assertRaises(DissimilarityMatrixError): DissimilarityMatrix([], []) # Another type of empty data. with self.assertRaises(DissimilarityMatrixError): DissimilarityMatrix(np.empty((0, 0)), []) # Invalid number of dimensions. with self.assertRaises(DissimilarityMatrixError): DissimilarityMatrix([1, 2, 3], ['a']) # Dimensions don't match. with self.assertRaises(DissimilarityMatrixError): DissimilarityMatrix([[1, 2, 3]], ['a']) data = [[0, 1], [1, 0]] # Duplicate IDs. with self.assertRaises(DissimilarityMatrixError): DissimilarityMatrix(data, ['a', 'a']) # Number of IDs don't match dimensions. with self.assertRaises(DissimilarityMatrixError): DissimilarityMatrix(data, ['a', 'b', 'c']) # Non-hollow. data = [[0.0, 1.0], [1.0, 0.01]] with self.assertRaises(DissimilarityMatrixError): DissimilarityMatrix(data, ['a', 'b']) def test_data(self): for dm, exp in zip(self.dms, self.dm_redundant_forms): obs = dm.data self.assertTrue(np.array_equal(obs, exp)) with self.assertRaises(AttributeError): self.dm_3x3.data = 'foo' def test_ids(self): obs = self.dm_3x3.ids self.assertEqual(obs, ('a', 'b', 'c')) # Test that we overwrite the existing IDs and that the ID index is # correctly rebuilt. new_ids = ['foo', 'bar', 'baz'] self.dm_3x3.ids = new_ids obs = self.dm_3x3.ids self.assertEqual(obs, tuple(new_ids)) self.assertTrue( np.array_equal(self.dm_3x3['bar'], np.array([0.01, 0.0, 12.0]))) with self.assertRaises(MissingIDError): self.dm_3x3['b'] def test_ids_invalid_input(self): with self.assertRaises(DissimilarityMatrixError): self.dm_3x3.ids = ['foo', 'bar'] # Make sure that we can still use the dissimilarity matrix after trying # to be evil. obs = self.dm_3x3.ids self.assertEqual(obs, ('a', 'b', 'c')) def test_dtype(self): for dm in self.dms: self.assertEqual(dm.dtype, np.float64) def test_shape(self): for dm, shape in zip(self.dms, self.dm_shapes): self.assertEqual(dm.shape, shape) def test_size(self): for dm, size in zip(self.dms, self.dm_sizes): self.assertEqual(dm.size, size) def test_transpose(self): for dm, transpose in zip(self.dms, self.dm_transposes): self.assertEqual(dm.T, transpose) self.assertEqual(dm.transpose(), transpose) # We should get a reference to a different object back, even if the # transpose is the same as the original. self.assertTrue(dm.transpose() is not dm) def test_index(self): self.assertEqual(self.dm_3x3.index('a'), 0) self.assertEqual(self.dm_3x3.index('b'), 1) self.assertEqual(self.dm_3x3.index('c'), 2) with self.assertRaises(MissingIDError): self.dm_3x3.index('d') with self.assertRaises(MissingIDError): self.dm_3x3.index(1) def test_redundant_form(self): for dm, redundant in zip(self.dms, self.dm_redundant_forms): obs = dm.redundant_form() self.assertTrue(np.array_equal(obs, redundant)) def test_copy(self): copy = self.dm_2x2.copy() self.assertEqual(copy, self.dm_2x2) self.assertFalse(copy.data is self.dm_2x2.data) # deepcopy doesn't actually create a copy of the IDs because it is a # tuple of strings, which is fully immutable. self.assertTrue(copy.ids is self.dm_2x2.ids) new_ids = ['hello', 'world'] copy.ids = new_ids self.assertNotEqual(copy.ids, self.dm_2x2.ids) copy = self.dm_2x2.copy() copy.data[0, 1] = 0.0001 self.assertFalse(np.array_equal(copy.data, self.dm_2x2.data)) def test_filter_no_filtering(self): # Don't actually filter anything -- ensure we get back a different # object. obs = self.dm_3x3.filter(['a', 'b', 'c']) self.assertEqual(obs, self.dm_3x3) self.assertFalse(obs is self.dm_3x3) def test_filter_reorder(self): # Don't filter anything, but reorder the distance matrix. order = ['c', 'a', 'b'] exp = DissimilarityMatrix( [[0, 4.2, 12], [4.2, 0, 0.01], [12, 0.01, 0]], order) obs = self.dm_3x3.filter(order) self.assertEqual(obs, exp) def test_filter_single_id(self): ids = ['b'] exp = DissimilarityMatrix([[0]], ids) obs = self.dm_2x2_asym.filter(ids) self.assertEqual(obs, exp) def test_filter_asymmetric(self): # 2x2 ids = ['b', 'a'] exp = DissimilarityMatrix([[0, -2], [1, 0]], ids) obs = self.dm_2x2_asym.filter(ids) self.assertEqual(obs, exp) # 3x3 dm = DissimilarityMatrix([[0, 10, 53], [42, 0, 22.5], [53, 1, 0]], ('bro', 'brah', 'breh')) ids = ['breh', 'brah'] exp = DissimilarityMatrix([[0, 1], [22.5, 0]], ids) obs = dm.filter(ids) self.assertEqual(obs, exp) def test_filter_subset(self): ids = ('c', 'a') exp = DissimilarityMatrix([[0, 4.2], [4.2, 0]], ids) obs = self.dm_3x3.filter(ids) self.assertEqual(obs, exp) ids = ('b', 'a') exp = DissimilarityMatrix([[0, 0.01], [0.01, 0]], ids) obs = self.dm_3x3.filter(ids) self.assertEqual(obs, exp) # 4x4 dm = DissimilarityMatrix([[0, 1, 55, 7], [1, 0, 16, 1], [55, 16, 0, 23], [7, 1, 23, 0]]) ids = np.asarray(['3', '0', '1']) exp = DissimilarityMatrix([[0, 7, 1], [7, 0, 1], [1, 1, 0]], ids) obs = dm.filter(ids) self.assertEqual(obs, exp) def test_filter_duplicate_ids(self): with self.assertRaises(DissimilarityMatrixError): self.dm_3x3.filter(['c', 'a', 'c']) def test_filter_missing_ids(self): with self.assertRaises(MissingIDError): self.dm_3x3.filter(['c', 'bro']) def test_filter_missing_ids_strict_false(self): # no exception should be raised ids = ('c', 'a') exp = DissimilarityMatrix([[0, 4.2], [4.2, 0]], ids) obs = self.dm_3x3.filter(['c', 'a', 'not found'], strict=False) self.assertEqual(obs, exp) def test_filter_empty_ids(self): with self.assertRaises(DissimilarityMatrixError): self.dm_3x3.filter([]) def test_str(self): for dm in self.dms: obs = str(dm) # Do some very light testing here to make sure we're getting a # non-empty string back. We don't want to test the exact # formatting. self.assertTrue(obs) def test_eq(self): for dm in self.dms: copy = dm.copy() self.assertTrue(dm == dm) self.assertTrue(copy == copy) self.assertTrue(dm == copy) self.assertTrue(copy == dm) self.assertFalse(self.dm_1x1 == self.dm_3x3) def test_ne(self): # Wrong class. self.assertTrue(self.dm_3x3 != 'foo') # Wrong shape. self.assertTrue(self.dm_3x3 != self.dm_1x1) # Wrong IDs. other = self.dm_3x3.copy() other.ids = ['foo', 'bar', 'baz'] self.assertTrue(self.dm_3x3 != other) # Wrong data. other = self.dm_3x3.copy() other.data[1, 0] = 42.42 self.assertTrue(self.dm_3x3 != other) self.assertFalse(self.dm_2x2 != self.dm_2x2) def test_contains(self): self.assertTrue('a' in self.dm_3x3) self.assertTrue('b' in self.dm_3x3) self.assertTrue('c' in self.dm_3x3) self.assertFalse('d' in self.dm_3x3) def test_getslice(self): # Slice of first dimension only. Test that __getslice__ defers to # __getitem__. obs = self.dm_2x2[1:] self.assertTrue(np.array_equal(obs, np.array([[0.123, 0.0]]))) self.assertEqual(type(obs), np.ndarray) def test_getitem_by_id(self): obs = self.dm_1x1['a'] self.assertTrue(np.array_equal(obs, np.array([0.0]))) obs = self.dm_2x2_asym['b'] self.assertTrue(np.array_equal(obs, np.array([-2.0, 0.0]))) obs = self.dm_3x3['c'] self.assertTrue(np.array_equal(obs, np.array([4.2, 12.0, 0.0]))) with self.assertRaises(MissingIDError): self.dm_2x2['c'] def test_getitem_by_id_pair(self): # Same object. self.assertEqual(self.dm_1x1['a', 'a'], 0.0) # Different objects (symmetric). self.assertEqual(self.dm_3x3['b', 'c'], 12.0) self.assertEqual(self.dm_3x3['c', 'b'], 12.0) # Different objects (asymmetric). self.assertEqual(self.dm_2x2_asym['a', 'b'], 1.0) self.assertEqual(self.dm_2x2_asym['b', 'a'], -2.0) with self.assertRaises(MissingIDError): self.dm_2x2['a', 'c'] def test_getitem_ndarray_indexing(self): # Single element access. obs = self.dm_3x3[0, 1] self.assertEqual(obs, 0.01) # Single element access (via two __getitem__ calls). obs = self.dm_3x3[0][1] self.assertEqual(obs, 0.01) # Row access. obs = self.dm_3x3[1] self.assertTrue(np.array_equal(obs, np.array([0.01, 0.0, 12.0]))) self.assertEqual(type(obs), np.ndarray) # Grab all data. obs = self.dm_3x3[:, :] self.assertTrue(np.array_equal(obs, self.dm_3x3.data)) self.assertEqual(type(obs), np.ndarray) with self.assertRaises(IndexError): self.dm_3x3[:, 3] def test_validate_invalid_dtype(self): with self.assertRaises(DissimilarityMatrixError): self.dm_3x3._validate(np.array([[0, 42], [42, 0]]), ['a', 'b']) def test_pprint_ids(self): # No truncation. exp = 'a, b, c' obs = self.dm_3x3._pprint_ids() self.assertEqual(obs, exp) # Truncation. exp = 'a, b, ...' obs = self.dm_3x3._pprint_ids(max_chars=5) self.assertEqual(obs, exp)