Esempio n. 1
0
class DistanceMatrixTests(DissimilarityMatrixTestData):
    def setUp(self):
        super(DistanceMatrixTests, self).setUp()

        self.dm_1x1 = DistanceMatrix(self.dm_1x1_data, ['a'])
        self.dm_2x2 = DistanceMatrix(self.dm_2x2_data, ['a', 'b'])
        self.dm_3x3 = DistanceMatrix(self.dm_3x3_data, ['a', 'b', 'c'])

        self.dms = [self.dm_1x1, self.dm_2x2, self.dm_3x3]
        self.dm_condensed_forms = [np.array([]), np.array([0.123]),
                                   np.array([0.01, 4.2, 12.0])]

    def test_deprecated_io(self):
        fh = StringIO()
        npt.assert_warns(UserWarning, self.dm_3x3.to_file, fh)
        fh.seek(0)
        deserialized = npt.assert_warns(UserWarning,
                                        DistanceMatrix.from_file, fh)
        self.assertEqual(deserialized, self.dm_3x3)
        self.assertTrue(type(deserialized) == DistanceMatrix)

    def test_init_invalid_input(self):
        # Asymmetric.
        data = [[0.0, 2.0], [1.0, 0.0]]
        with self.assertRaises(DistanceMatrixError):
            DistanceMatrix(data, ['a', 'b'])

        # Ensure that the superclass validation is still being performed.
        with self.assertRaises(DissimilarityMatrixError):
            DistanceMatrix([[1, 2, 3]], ['a'])

    def test_condensed_form(self):
        for dm, condensed in zip(self.dms, self.dm_condensed_forms):
            obs = dm.condensed_form()
            self.assertTrue(np.array_equal(obs, condensed))

    def test_permute_condensed(self):
        # Can't really permute a 1x1 or 2x2...
        for _ in range(2):
            obs = self.dm_1x1.permute(condensed=True)
            npt.assert_equal(obs, np.array([]))

        for _ in range(2):
            obs = self.dm_2x2.permute(condensed=True)
            npt.assert_equal(obs, np.array([0.123]))

        dm_copy = self.dm_3x3.copy()

        np.random.seed(0)

        obs = self.dm_3x3.permute(condensed=True)
        npt.assert_equal(obs, np.array([12.0, 4.2, 0.01]))

        obs = self.dm_3x3.permute(condensed=True)
        npt.assert_equal(obs, np.array([4.2, 12.0, 0.01]))

        # Ensure dm hasn't changed after calling permute() on it a couple of
        # times.
        self.assertEqual(self.dm_3x3, dm_copy)

    def test_permute_not_condensed(self):
        obs = self.dm_1x1.permute()
        self.assertEqual(obs, self.dm_1x1)
        self.assertFalse(obs is self.dm_1x1)

        obs = self.dm_2x2.permute()
        self.assertEqual(obs, self.dm_2x2)
        self.assertFalse(obs is self.dm_2x2)

        np.random.seed(0)

        exp = DistanceMatrix([[0, 12, 4.2],
                              [12, 0, 0.01],
                              [4.2, 0.01, 0]], self.dm_3x3.ids)
        obs = self.dm_3x3.permute()
        self.assertEqual(obs, exp)

        exp = DistanceMatrix([[0, 4.2, 12],
                              [4.2, 0, 0.01],
                              [12, 0.01, 0]], self.dm_3x3.ids)
        obs = self.dm_3x3.permute()
        self.assertEqual(obs, exp)

    def test_eq(self):
        # Compare DistanceMatrix to DissimilarityMatrix, where both have the
        # same data and IDs.
        eq_dm = DissimilarityMatrix(self.dm_3x3_data, ['a', 'b', 'c'])
        self.assertTrue(self.dm_3x3 == eq_dm)
        self.assertTrue(eq_dm == self.dm_3x3)
Esempio n. 2
0
class DistanceMatrixTests(DissimilarityMatrixTestData):
    def setUp(self):
        super(DistanceMatrixTests, self).setUp()

        self.dm_1x1 = DistanceMatrix(self.dm_1x1_data, ['a'])
        self.dm_2x2 = DistanceMatrix(self.dm_2x2_data, ['a', 'b'])
        self.dm_3x3 = DistanceMatrix(self.dm_3x3_data, ['a', 'b', 'c'])

        self.dms = [self.dm_1x1, self.dm_2x2, self.dm_3x3]
        self.dm_condensed_forms = [
            np.array([]),
            np.array([0.123]),
            np.array([0.01, 4.2, 12.0])
        ]

    def test_init_from_condensed_form(self):
        data = [1, 2, 3]
        exp = DistanceMatrix([[0, 1, 2], [1, 0, 3], [2, 3, 0]],
                             ['0', '1', '2'])
        res = DistanceMatrix(data)
        self.assertEqual(exp, res)

    def test_init_invalid_input(self):
        # Asymmetric.
        data = [[0.0, 2.0], [1.0, 0.0]]
        with self.assertRaises(DistanceMatrixError):
            DistanceMatrix(data, ['a', 'b'])

        # Non-hollow
        data = [[1.0, 2.0], [2.0, 1.0]]
        with self.assertRaises(DistanceMatrixError):
            DistanceMatrix(data, ['a', 'b'])

        # Ensure that the superclass validation is still being performed.
        with self.assertRaises(DissimilarityMatrixError):
            DistanceMatrix([[1, 2, 3]], ['a'])

    def test_init_nans(self):
        with self.assertRaisesRegex(DistanceMatrixError, r'NaNs'):
            DistanceMatrix([[0.0, np.nan], [np.nan, 0.0]], ['a', 'b'])

    def test_from_iterable_no_key(self):
        iterable = (x for x in range(4))

        exp = DistanceMatrix([[0, 1, 2, 3], [1, 0, 1, 2], [2, 1, 0, 1],
                              [3, 2, 1, 0]])
        res = DistanceMatrix.from_iterable(iterable, lambda a, b: abs(b - a))
        self.assertEqual(res, exp)

    def test_from_iterable_validate_equal_valid_data(self):
        validate_true = DistanceMatrix.from_iterable((x for x in range(4)),
                                                     lambda a, b: abs(b - a),
                                                     validate=True)
        validate_false = DistanceMatrix.from_iterable((x for x in range(4)),
                                                      lambda a, b: abs(b - a),
                                                      validate=False)
        self.assertEqual(validate_true, validate_false)

    def test_from_iterable_validate_false(self):
        iterable = (x for x in range(4))

        exp = DistanceMatrix([[0, 1, 2, 3], [1, 0, 1, 2], [2, 1, 0, 1],
                              [3, 2, 1, 0]])
        res = DistanceMatrix.from_iterable(iterable,
                                           lambda a, b: abs(b - a),
                                           validate=False)
        self.assertEqual(res, exp)

    def test_from_iterable_validate_non_hollow(self):
        iterable = (x for x in range(4))
        with self.assertRaises(DistanceMatrixError):
            DistanceMatrix.from_iterable(iterable, lambda a, b: 1)

    def test_from_iterable_validate_false_non_symmetric(self):
        exp = DistanceMatrix([[0, 1, 2, 3], [1, 0, 1, 2], [2, 1, 0, 1],
                              [3, 2, 1, 0]])
        res = DistanceMatrix.from_iterable((x for x in range(4)),
                                           lambda a, b: a - b,
                                           validate=False)
        self.assertEqual(res, exp)

    def test_from_iterable_validate_asym(self):
        iterable = (x for x in range(4))
        with self.assertRaises(DistanceMatrixError):
            DistanceMatrix.from_iterable(iterable, lambda a, b: b - a)

    def test_from_iterable_with_key(self):
        iterable = (x for x in range(4))

        exp = DistanceMatrix(
            [[0, 1, 2, 3], [1, 0, 1, 2], [2, 1, 0, 1], [3, 2, 1, 0]],
            ['0', '1', '4', '9'])
        res = DistanceMatrix.from_iterable(iterable,
                                           lambda a, b: abs(b - a),
                                           key=lambda x: str(x**2))
        self.assertEqual(res, exp)

    def test_from_iterable_empty(self):
        with self.assertRaises(DissimilarityMatrixError):
            DistanceMatrix.from_iterable([], lambda x: x)

    def test_from_iterable_single(self):
        exp = DistanceMatrix([[0]])
        res = DistanceMatrix.from_iterable(["boo"], lambda a, b: 0)
        self.assertEqual(res, exp)

    def test_from_iterable_with_keys(self):
        iterable = (x for x in range(4))

        exp = DistanceMatrix(
            [[0, 1, 2, 3], [1, 0, 1, 2], [2, 1, 0, 1], [3, 2, 1, 0]],
            ['0', '1', '4', '9'])
        res = DistanceMatrix.from_iterable(iterable,
                                           lambda a, b: abs(b - a),
                                           keys=iter(['0', '1', '4', '9']))
        self.assertEqual(res, exp)

    def test_from_iterable_with_key_and_keys(self):
        iterable = (x for x in range(4))
        with self.assertRaises(ValueError):
            DistanceMatrix.from_iterable(iterable,
                                         lambda a, b: abs(b - a),
                                         key=str,
                                         keys=['1', '2', '3', '4'])

    def test_from_iterable_scipy_hamming_metric_with_metadata(self):
        # test for #1254
        seqs = [
            Sequence('ACGT'),
            Sequence('ACGA', metadata={'id': 'seq1'}),
            Sequence('AAAA', metadata={'id': 'seq2'}),
            Sequence('AAAA', positional_metadata={'qual': range(4)})
        ]

        exp = DistanceMatrix([[0, 0.25, 0.75, 0.75], [0.25, 0.0, 0.5, 0.5],
                              [0.75, 0.5, 0.0, 0.0], [0.75, 0.5, 0.0, 0.0]],
                             ['a', 'b', 'c', 'd'])

        dm = DistanceMatrix.from_iterable(
            seqs,
            metric=scipy.spatial.distance.hamming,
            keys=['a', 'b', 'c', 'd'])

        self.assertEqual(dm, exp)

    def test_from_iterable_skbio_hamming_metric_with_metadata(self):
        # test for #1254
        seqs = [
            Sequence('ACGT'),
            Sequence('ACGA', metadata={'id': 'seq1'}),
            Sequence('AAAA', metadata={'id': 'seq2'}),
            Sequence('AAAA', positional_metadata={'qual': range(4)})
        ]

        exp = DistanceMatrix([[0, 0.25, 0.75, 0.75], [0.25, 0.0, 0.5, 0.5],
                              [0.75, 0.5, 0.0, 0.0], [0.75, 0.5, 0.0, 0.0]],
                             ['a', 'b', 'c', 'd'])

        dm = DistanceMatrix.from_iterable(
            seqs,
            metric=skbio.sequence.distance.hamming,
            keys=['a', 'b', 'c', 'd'])

        self.assertEqual(dm, exp)

    def test_condensed_form(self):
        for dm, condensed in zip(self.dms, self.dm_condensed_forms):
            obs = dm.condensed_form()
            self.assertTrue(np.array_equal(obs, condensed))

    def test_permute_condensed(self):
        # Can't really permute a 1x1 or 2x2...
        for _ in range(2):
            obs = self.dm_1x1.permute(condensed=True)
            npt.assert_equal(obs, np.array([]))

        for _ in range(2):
            obs = self.dm_2x2.permute(condensed=True)
            npt.assert_equal(obs, np.array([0.123]))

        dm_copy = self.dm_3x3.copy()

        np.random.seed(0)

        obs = self.dm_3x3.permute(condensed=True)
        npt.assert_equal(obs, np.array([12.0, 4.2, 0.01]))

        obs = self.dm_3x3.permute(condensed=True)
        npt.assert_equal(obs, np.array([4.2, 12.0, 0.01]))

        # Ensure dm hasn't changed after calling permute() on it a couple of
        # times.
        self.assertEqual(self.dm_3x3, dm_copy)

    def test_permute_not_condensed(self):
        obs = self.dm_1x1.permute()
        self.assertEqual(obs, self.dm_1x1)
        self.assertFalse(obs is self.dm_1x1)

        obs = self.dm_2x2.permute()
        self.assertEqual(obs, self.dm_2x2)
        self.assertFalse(obs is self.dm_2x2)

        np.random.seed(0)

        exp = DistanceMatrix([[0, 12, 4.2], [12, 0, 0.01], [4.2, 0.01, 0]],
                             self.dm_3x3.ids)
        obs = self.dm_3x3.permute()
        self.assertEqual(obs, exp)

        exp = DistanceMatrix([[0, 4.2, 12], [4.2, 0, 0.01], [12, 0.01, 0]],
                             self.dm_3x3.ids)
        obs = self.dm_3x3.permute()
        self.assertEqual(obs, exp)

    def test_eq(self):
        # Compare DistanceMatrix to DissimilarityMatrix, where both have the
        # same data and IDs.
        eq_dm = DissimilarityMatrix(self.dm_3x3_data, ['a', 'b', 'c'])
        self.assertTrue(self.dm_3x3 == eq_dm)
        self.assertTrue(eq_dm == self.dm_3x3)

    def test_to_series_1x1(self):
        series = self.dm_1x1.to_series()

        exp = pd.Series([], index=[])
        assert_series_almost_equal(series, exp)

    def test_to_series_2x2(self):
        series = self.dm_2x2.to_series()

        exp = pd.Series([0.123], index=pd.Index([('a', 'b')]))
        assert_series_almost_equal(series, exp)

    def test_to_series_4x4(self):
        dm = DistanceMatrix([[0.0, 0.2, 0.3, 0.4], [0.2, 0.0, 0.5, 0.6],
                             [0.3, 0.5, 0.0, 0.7], [0.4, 0.6, 0.7, 0.0]],
                            ['a', 'b', 'c', 'd'])

        series = dm.to_series()

        exp = pd.Series([0.2, 0.3, 0.4, 0.5, 0.6, 0.7],
                        index=pd.Index([('a', 'b'), ('a', 'c'), ('a', 'd'),
                                        ('b', 'c'), ('b', 'd'), ('c', 'd')]))
        assert_series_almost_equal(series, exp)

    def test_to_series_default_ids(self):
        series = DistanceMatrix(self.dm_2x2_data).to_series()

        exp = pd.Series([0.123], index=pd.Index([('0', '1')]))
        assert_series_almost_equal(series, exp)

    def test_validate_asym_shape(self):
        # first check it actually likes good matrices
        data_good = np.array([[0., 42.], [42., 0.]])
        data_sym, data_hollow = is_symmetric_and_hollow(data_good)
        self.assertEqual(data_sym, True)
        del data_sym
        self.assertEqual(data_hollow, True)
        del data_hollow
        data_sym = skbio.stats.distance._utils.is_symmetric(data_good)
        self.assertEqual(data_sym, True)
        del data_sym
        data_hollow = skbio.stats.distance._utils.is_hollow(data_good)
        self.assertEqual(data_hollow, True)
        del data_hollow
        self.dm_3x3._validate_shape(data_good)
        del data_good

        # _validate_shap checks just the shape, not the content
        bad_data = np.array([[1., 2.], [3., 4.]])
        data_sym, data_hollow = is_symmetric_and_hollow(bad_data)
        self.assertEqual(data_sym, False)
        del data_sym
        self.assertEqual(data_hollow, False)
        del data_hollow
        data_sym = skbio.stats.distance._utils.is_symmetric(bad_data)
        self.assertEqual(data_sym, False)
        del data_sym
        data_hollow = skbio.stats.distance._utils.is_hollow(bad_data)
        self.assertEqual(data_hollow, False)
        del data_hollow
        self.dm_3x3._validate_shape(bad_data)
        del bad_data

        # re-try with partially bad data
        bad_data = np.array([[0., 2.], [3., 0.]])
        data_sym, data_hollow = is_symmetric_and_hollow(bad_data)
        self.assertEqual(data_sym, False)
        del data_sym
        self.assertEqual(data_hollow, True)
        del data_hollow
        data_sym = skbio.stats.distance._utils.is_symmetric(bad_data)
        self.assertEqual(data_sym, False)
        del data_sym
        data_hollow = skbio.stats.distance._utils.is_hollow(bad_data)
        self.assertEqual(data_hollow, True)
        del data_hollow
        self.dm_3x3._validate_shape(bad_data)
        del bad_data
Esempio n. 3
0
class DistanceMatrixTests(DissimilarityMatrixTestData):
    def setUp(self):
        super(DistanceMatrixTests, self).setUp()

        self.dm_1x1 = DistanceMatrix(self.dm_1x1_data, ['a'])
        self.dm_2x2 = DistanceMatrix(self.dm_2x2_data, ['a', 'b'])
        self.dm_3x3 = DistanceMatrix(self.dm_3x3_data, ['a', 'b', 'c'])

        self.dms = [self.dm_1x1, self.dm_2x2, self.dm_3x3]
        self.dm_condensed_forms = [np.array([]), np.array([0.123]),
                                   np.array([0.01, 4.2, 12.0])]

    def test_init_invalid_input(self):
        # Asymmetric.
        data = [[0.0, 2.0], [1.0, 0.0]]
        with self.assertRaises(DistanceMatrixError):
            DistanceMatrix(data, ['a', 'b'])

        # Ensure that the superclass validation is still being performed.
        with self.assertRaises(DissimilarityMatrixError):
            DistanceMatrix([[1, 2, 3]], ['a'])

    def test_from_iterable_no_key(self):
        iterable = (x for x in range(4))

        exp = DistanceMatrix([[0, 1, 2, 3],
                              [1, 0, 1, 2],
                              [2, 1, 0, 1],
                              [3, 2, 1, 0]])
        res = DistanceMatrix.from_iterable(iterable, lambda a, b: abs(b - a))
        self.assertEqual(res, exp)

    def test_from_iterable_with_key(self):
        iterable = (x for x in range(4))

        exp = DistanceMatrix([[0, 1, 2, 3],
                              [1, 0, 1, 2],
                              [2, 1, 0, 1],
                              [3, 2, 1, 0]], ['0', '1', '4', '9'])
        res = DistanceMatrix.from_iterable(iterable, lambda a, b: abs(b - a),
                                           key=lambda x: str(x**2))
        self.assertEqual(res, exp)

    def test_from_iterable_empty(self):
        with self.assertRaises(DissimilarityMatrixError):
            DistanceMatrix.from_iterable([], lambda x: x)

    def test_from_iterable_single(self):
        exp = DistanceMatrix([[0]])
        res = DistanceMatrix.from_iterable(["boo"], lambda _: 100)
        self.assertEqual(res, exp)

    def test_from_iterable_with_keys(self):
        iterable = (x for x in range(4))

        exp = DistanceMatrix([[0, 1, 2, 3],
                              [1, 0, 1, 2],
                              [2, 1, 0, 1],
                              [3, 2, 1, 0]], ['0', '1', '4', '9'])
        res = DistanceMatrix.from_iterable(iterable, lambda a, b: abs(b - a),
                                           keys=iter(['0', '1', '4', '9']))
        self.assertEqual(res, exp)

    def test_from_iterable_with_key_and_keys(self):
        iterable = (x for x in range(4))
        with self.assertRaises(ValueError):
            DistanceMatrix.from_iterable(iterable, lambda a, b: abs(b - a),
                                         key=str, keys=['1', '2', '3', '4'])

    def test_condensed_form(self):
        for dm, condensed in zip(self.dms, self.dm_condensed_forms):
            obs = dm.condensed_form()
            self.assertTrue(np.array_equal(obs, condensed))

    def test_permute_condensed(self):
        # Can't really permute a 1x1 or 2x2...
        for _ in range(2):
            obs = self.dm_1x1.permute(condensed=True)
            npt.assert_equal(obs, np.array([]))

        for _ in range(2):
            obs = self.dm_2x2.permute(condensed=True)
            npt.assert_equal(obs, np.array([0.123]))

        dm_copy = self.dm_3x3.copy()

        np.random.seed(0)

        obs = self.dm_3x3.permute(condensed=True)
        npt.assert_equal(obs, np.array([12.0, 4.2, 0.01]))

        obs = self.dm_3x3.permute(condensed=True)
        npt.assert_equal(obs, np.array([4.2, 12.0, 0.01]))

        # Ensure dm hasn't changed after calling permute() on it a couple of
        # times.
        self.assertEqual(self.dm_3x3, dm_copy)

    def test_permute_not_condensed(self):
        obs = self.dm_1x1.permute()
        self.assertEqual(obs, self.dm_1x1)
        self.assertFalse(obs is self.dm_1x1)

        obs = self.dm_2x2.permute()
        self.assertEqual(obs, self.dm_2x2)
        self.assertFalse(obs is self.dm_2x2)

        np.random.seed(0)

        exp = DistanceMatrix([[0, 12, 4.2],
                              [12, 0, 0.01],
                              [4.2, 0.01, 0]], self.dm_3x3.ids)
        obs = self.dm_3x3.permute()
        self.assertEqual(obs, exp)

        exp = DistanceMatrix([[0, 4.2, 12],
                              [4.2, 0, 0.01],
                              [12, 0.01, 0]], self.dm_3x3.ids)
        obs = self.dm_3x3.permute()
        self.assertEqual(obs, exp)

    def test_eq(self):
        # Compare DistanceMatrix to DissimilarityMatrix, where both have the
        # same data and IDs.
        eq_dm = DissimilarityMatrix(self.dm_3x3_data, ['a', 'b', 'c'])
        self.assertTrue(self.dm_3x3 == eq_dm)
        self.assertTrue(eq_dm == self.dm_3x3)
Esempio n. 4
0
class DistanceMatrixTests(DissimilarityMatrixTestData):
    def setUp(self):
        super(DistanceMatrixTests, self).setUp()

        self.dm_1x1 = DistanceMatrix(self.dm_1x1_data, ['a'])
        self.dm_2x2 = DistanceMatrix(self.dm_2x2_data, ['a', 'b'])
        self.dm_3x3 = DistanceMatrix(self.dm_3x3_data, ['a', 'b', 'c'])

        self.dms = [self.dm_1x1, self.dm_2x2, self.dm_3x3]
        self.dm_condensed_forms = [
            np.array([]),
            np.array([0.123]),
            np.array([0.01, 4.2, 12.0])
        ]

    def test_init_from_condensed_form(self):
        data = [1, 2, 3]
        exp = DistanceMatrix([[0, 1, 2], [1, 0, 3], [2, 3, 0]],
                             ['0', '1', '2'])
        res = DistanceMatrix(data)
        self.assertEqual(exp, res)

    def test_init_invalid_input(self):
        # Asymmetric.
        data = [[0.0, 2.0], [1.0, 0.0]]
        with self.assertRaises(DistanceMatrixError):
            DistanceMatrix(data, ['a', 'b'])

        # Ensure that the superclass validation is still being performed.
        with self.assertRaises(DissimilarityMatrixError):
            DistanceMatrix([[1, 2, 3]], ['a'])

    def test_from_iterable_no_key(self):
        iterable = (x for x in range(4))

        exp = DistanceMatrix([[0, 1, 2, 3], [1, 0, 1, 2], [2, 1, 0, 1],
                              [3, 2, 1, 0]])
        res = DistanceMatrix.from_iterable(iterable, lambda a, b: abs(b - a))
        self.assertEqual(res, exp)

    def test_from_iterable_with_key(self):
        iterable = (x for x in range(4))

        exp = DistanceMatrix(
            [[0, 1, 2, 3], [1, 0, 1, 2], [2, 1, 0, 1], [3, 2, 1, 0]],
            ['0', '1', '4', '9'])
        res = DistanceMatrix.from_iterable(iterable,
                                           lambda a, b: abs(b - a),
                                           key=lambda x: str(x**2))
        self.assertEqual(res, exp)

    def test_from_iterable_empty(self):
        with self.assertRaises(DissimilarityMatrixError):
            DistanceMatrix.from_iterable([], lambda x: x)

    def test_from_iterable_single(self):
        exp = DistanceMatrix([[0]])
        res = DistanceMatrix.from_iterable(["boo"], lambda _: 100)
        self.assertEqual(res, exp)

    def test_from_iterable_with_keys(self):
        iterable = (x for x in range(4))

        exp = DistanceMatrix(
            [[0, 1, 2, 3], [1, 0, 1, 2], [2, 1, 0, 1], [3, 2, 1, 0]],
            ['0', '1', '4', '9'])
        res = DistanceMatrix.from_iterable(iterable,
                                           lambda a, b: abs(b - a),
                                           keys=iter(['0', '1', '4', '9']))
        self.assertEqual(res, exp)

    def test_from_iterable_with_key_and_keys(self):
        iterable = (x for x in range(4))
        with self.assertRaises(ValueError):
            DistanceMatrix.from_iterable(iterable,
                                         lambda a, b: abs(b - a),
                                         key=str,
                                         keys=['1', '2', '3', '4'])

    def test_condensed_form(self):
        for dm, condensed in zip(self.dms, self.dm_condensed_forms):
            obs = dm.condensed_form()
            self.assertTrue(np.array_equal(obs, condensed))

    def test_permute_condensed(self):
        # Can't really permute a 1x1 or 2x2...
        for _ in range(2):
            obs = self.dm_1x1.permute(condensed=True)
            npt.assert_equal(obs, np.array([]))

        for _ in range(2):
            obs = self.dm_2x2.permute(condensed=True)
            npt.assert_equal(obs, np.array([0.123]))

        dm_copy = self.dm_3x3.copy()

        np.random.seed(0)

        obs = self.dm_3x3.permute(condensed=True)
        npt.assert_equal(obs, np.array([12.0, 4.2, 0.01]))

        obs = self.dm_3x3.permute(condensed=True)
        npt.assert_equal(obs, np.array([4.2, 12.0, 0.01]))

        # Ensure dm hasn't changed after calling permute() on it a couple of
        # times.
        self.assertEqual(self.dm_3x3, dm_copy)

    def test_permute_not_condensed(self):
        obs = self.dm_1x1.permute()
        self.assertEqual(obs, self.dm_1x1)
        self.assertFalse(obs is self.dm_1x1)

        obs = self.dm_2x2.permute()
        self.assertEqual(obs, self.dm_2x2)
        self.assertFalse(obs is self.dm_2x2)

        np.random.seed(0)

        exp = DistanceMatrix([[0, 12, 4.2], [12, 0, 0.01], [4.2, 0.01, 0]],
                             self.dm_3x3.ids)
        obs = self.dm_3x3.permute()
        self.assertEqual(obs, exp)

        exp = DistanceMatrix([[0, 4.2, 12], [4.2, 0, 0.01], [12, 0.01, 0]],
                             self.dm_3x3.ids)
        obs = self.dm_3x3.permute()
        self.assertEqual(obs, exp)

    def test_eq(self):
        # Compare DistanceMatrix to DissimilarityMatrix, where both have the
        # same data and IDs.
        eq_dm = DissimilarityMatrix(self.dm_3x3_data, ['a', 'b', 'c'])
        self.assertTrue(self.dm_3x3 == eq_dm)
        self.assertTrue(eq_dm == self.dm_3x3)
Esempio n. 5
0
class DistanceMatrixTests(DissimilarityMatrixTestData):
    def setUp(self):
        super(DistanceMatrixTests, self).setUp()

        self.dm_1x1 = DistanceMatrix(self.dm_1x1_data, ['a'])
        self.dm_2x2 = DistanceMatrix(self.dm_2x2_data, ['a', 'b'])
        self.dm_3x3 = DistanceMatrix(self.dm_3x3_data, ['a', 'b', 'c'])

        self.dms = [self.dm_1x1, self.dm_2x2, self.dm_3x3]
        self.dm_condensed_forms = [
            np.array([]),
            np.array([0.123]),
            np.array([0.01, 4.2, 12.0])
        ]

    def test_from_file_with_file_path(self):
        """Should identify the filepath correctly and parse from it."""

        # should fail with the expected exception
        with self.assertRaises(DissimilarityMatrixFormatError):
            DistanceMatrix.from_file(self.bad_dm_fp)

        obs = DistanceMatrix.from_file(self.dm_3x3_fp)
        self.assertEqual(self.dm_3x3, obs)
        self.assertTrue(isinstance(obs, DistanceMatrix))

    def test_from_file_invalid_input(self):
        """Raises error on invalid distance matrix file."""
        # Asymmetric.
        with self.assertRaises(DistanceMatrixError):
            DistanceMatrix.from_file(self.dm_2x2_asym_f)

    def test_init_invalid_input(self):
        """Raises error on invalid distance matrix data / IDs."""
        # Asymmetric.
        data = [[0.0, 2.0], [1.0, 0.0]]
        with self.assertRaises(DistanceMatrixError):
            DistanceMatrix(data, ['a', 'b'])

        # Ensure that the superclass validation is still being performed.
        with self.assertRaises(DissimilarityMatrixError):
            DistanceMatrix([[1, 2, 3]], ['a'])

    def test_condensed_form(self):
        """Test retrieving the data matrix in condensed form."""
        for dm, condensed in zip(self.dms, self.dm_condensed_forms):
            obs = dm.condensed_form()
            self.assertTrue(np.array_equal(obs, condensed))

    def test_permute_condensed(self):
        # Can't really permute a 1x1 or 2x2...
        for _ in range(2):
            obs = self.dm_1x1.permute(condensed=True)
            npt.assert_equal(obs, np.array([]))

        for _ in range(2):
            obs = self.dm_2x2.permute(condensed=True)
            npt.assert_equal(obs, np.array([0.123]))

        dm_copy = self.dm_3x3.copy()

        np.random.seed(0)

        obs = self.dm_3x3.permute(condensed=True)
        npt.assert_equal(obs, np.array([12.0, 4.2, 0.01]))

        obs = self.dm_3x3.permute(condensed=True)
        npt.assert_equal(obs, np.array([4.2, 12.0, 0.01]))

        # Ensure dm hasn't changed after calling permute() on it a couple of
        # times.
        self.assertEqual(self.dm_3x3, dm_copy)

    def test_permute_not_condensed(self):
        obs = self.dm_1x1.permute()
        self.assertEqual(obs, self.dm_1x1)
        self.assertFalse(obs is self.dm_1x1)

        obs = self.dm_2x2.permute()
        self.assertEqual(obs, self.dm_2x2)
        self.assertFalse(obs is self.dm_2x2)

        np.random.seed(0)

        exp = DistanceMatrix([[0, 12, 4.2], [12, 0, 0.01], [4.2, 0.01, 0]],
                             self.dm_3x3.ids)
        obs = self.dm_3x3.permute()
        self.assertEqual(obs, exp)

        exp = DistanceMatrix([[0, 4.2, 12], [4.2, 0, 0.01], [12, 0.01, 0]],
                             self.dm_3x3.ids)
        obs = self.dm_3x3.permute()
        self.assertEqual(obs, exp)

    def test_eq(self):
        """Test data equality between different matrix types."""
        # Compare DistanceMatrix to DissimilarityMatrix, where both have the
        # same data and IDs.
        eq_dm = DissimilarityMatrix(self.dm_3x3_data, ['a', 'b', 'c'])
        self.assertTrue(self.dm_3x3 == eq_dm)
        self.assertTrue(eq_dm == self.dm_3x3)

    def test_validate(self):
        """Empty stub: DistanceMatrix._validate tested elsewhere."""
        pass
Esempio n. 6
0
class DistanceMatrixTests(DissimilarityMatrixTestData):
    def setUp(self):
        super(DistanceMatrixTests, self).setUp()

        self.dm_1x1 = DistanceMatrix(self.dm_1x1_data, ['a'])
        self.dm_2x2 = DistanceMatrix(self.dm_2x2_data, ['a', 'b'])
        self.dm_3x3 = DistanceMatrix(self.dm_3x3_data, ['a', 'b', 'c'])

        self.dms = [self.dm_1x1, self.dm_2x2, self.dm_3x3]
        self.dm_condensed_forms = [np.array([]), np.array([0.123]),
                                   np.array([0.01, 4.2, 12.0])]

    def test_from_file_with_file_path(self):
        """Should identify the filepath correctly and parse from it."""

        # should fail with the expected exception
        with self.assertRaises(DissimilarityMatrixFormatError):
            DistanceMatrix.from_file(self.bad_dm_fp)

        obs = DistanceMatrix.from_file(self.dm_3x3_fp)
        self.assertEqual(self.dm_3x3, obs)
        self.assertTrue(isinstance(obs, DistanceMatrix))

    def test_from_file_invalid_input(self):
        """Raises error on invalid distance matrix file."""
        # Asymmetric.
        with self.assertRaises(DistanceMatrixError):
            DistanceMatrix.from_file(self.dm_2x2_asym_f)

    def test_init_invalid_input(self):
        """Raises error on invalid distance matrix data / IDs."""
        # Asymmetric.
        data = [[0.0, 2.0], [1.0, 0.0]]
        with self.assertRaises(DistanceMatrixError):
            DistanceMatrix(data, ['a', 'b'])

        # Ensure that the superclass validation is still being performed.
        with self.assertRaises(DissimilarityMatrixError):
            DistanceMatrix([[1, 2, 3]], ['a'])

    def test_condensed_form(self):
        """Test retrieving the data matrix in condensed form."""
        for dm, condensed in zip(self.dms, self.dm_condensed_forms):
            obs = dm.condensed_form()
            self.assertTrue(np.array_equal(obs, condensed))

    def test_permute_condensed(self):
        # Can't really permute a 1x1 or 2x2...
        for _ in range(2):
            obs = self.dm_1x1.permute(condensed=True)
            npt.assert_equal(obs, np.array([]))

        for _ in range(2):
            obs = self.dm_2x2.permute(condensed=True)
            npt.assert_equal(obs, np.array([0.123]))

        dm_copy = self.dm_3x3.copy()

        np.random.seed(0)

        obs = self.dm_3x3.permute(condensed=True)
        npt.assert_equal(obs, np.array([12.0, 4.2, 0.01]))

        obs = self.dm_3x3.permute(condensed=True)
        npt.assert_equal(obs, np.array([4.2, 12.0, 0.01]))

        # Ensure dm hasn't changed after calling permute() on it a couple of
        # times.
        self.assertEqual(self.dm_3x3, dm_copy)

    def test_permute_not_condensed(self):
        obs = self.dm_1x1.permute()
        self.assertEqual(obs, self.dm_1x1)
        self.assertFalse(obs is self.dm_1x1)

        obs = self.dm_2x2.permute()
        self.assertEqual(obs, self.dm_2x2)
        self.assertFalse(obs is self.dm_2x2)

        np.random.seed(0)

        exp = DistanceMatrix([[0, 12, 4.2],
                              [12, 0, 0.01],
                              [4.2, 0.01, 0]], self.dm_3x3.ids)
        obs = self.dm_3x3.permute()
        self.assertEqual(obs, exp)

        exp = DistanceMatrix([[0, 4.2, 12],
                              [4.2, 0, 0.01],
                              [12, 0.01, 0]], self.dm_3x3.ids)
        obs = self.dm_3x3.permute()
        self.assertEqual(obs, exp)

    def test_eq(self):
        """Test data equality between different matrix types."""
        # Compare DistanceMatrix to DissimilarityMatrix, where both have the
        # same data and IDs.
        eq_dm = DissimilarityMatrix(self.dm_3x3_data, ['a', 'b', 'c'])
        self.assertTrue(self.dm_3x3 == eq_dm)
        self.assertTrue(eq_dm == self.dm_3x3)

    def test_validate(self):
        """Empty stub: DistanceMatrix._validate tested elsewhere."""
        pass
Esempio n. 7
0
class DistanceMatrixTests(DissimilarityMatrixTestData):
    def setUp(self):
        super(DistanceMatrixTests, self).setUp()

        self.dm_1x1 = DistanceMatrix(self.dm_1x1_data, ['a'])
        self.dm_2x2 = DistanceMatrix(self.dm_2x2_data, ['a', 'b'])
        self.dm_3x3 = DistanceMatrix(self.dm_3x3_data, ['a', 'b', 'c'])

        self.dms = [self.dm_1x1, self.dm_2x2, self.dm_3x3]
        self.dm_condensed_forms = [np.array([]), np.array([0.123]),
                                   np.array([0.01, 4.2, 12.0])]

    def test_init_from_condensed_form(self):
        data = [1, 2, 3]
        exp = DistanceMatrix([[0, 1, 2],
                              [1, 0, 3],
                              [2, 3, 0]], ['0', '1', '2'])
        res = DistanceMatrix(data)
        self.assertEqual(exp, res)

    def test_init_invalid_input(self):
        # Asymmetric.
        data = [[0.0, 2.0], [1.0, 0.0]]
        with self.assertRaises(DistanceMatrixError):
            DistanceMatrix(data, ['a', 'b'])

        # Non-hollow
        data = [[1.0, 2.0], [2.0, 1.0]]
        with self.assertRaises(DistanceMatrixError):
            DistanceMatrix(data, ['a', 'b'])

        # Ensure that the superclass validation is still being performed.
        with self.assertRaises(DissimilarityMatrixError):
            DistanceMatrix([[1, 2, 3]], ['a'])

    def test_init_nans(self):
        with self.assertRaisesRegex(DistanceMatrixError, 'NaNs'):
            DistanceMatrix([[0.0, np.nan], [np.nan, 0.0]], ['a', 'b'])

    def test_from_iterable_no_key(self):
        iterable = (x for x in range(4))

        exp = DistanceMatrix([[0, 1, 2, 3],
                              [1, 0, 1, 2],
                              [2, 1, 0, 1],
                              [3, 2, 1, 0]])
        res = DistanceMatrix.from_iterable(iterable, lambda a, b: abs(b - a))
        self.assertEqual(res, exp)

    def test_from_iterable_validate_equal_valid_data(self):
        validate_true = DistanceMatrix.from_iterable((x for x in range(4)),
                                                     lambda a, b: abs(b - a),
                                                     validate=True)
        validate_false = DistanceMatrix.from_iterable((x for x in range(4)),
                                                      lambda a, b: abs(b - a),
                                                      validate=False)
        self.assertEqual(validate_true, validate_false)

    def test_from_iterable_validate_false(self):
        iterable = (x for x in range(4))

        exp = DistanceMatrix([[0, 1, 2, 3],
                              [1, 0, 1, 2],
                              [2, 1, 0, 1],
                              [3, 2, 1, 0]])
        res = DistanceMatrix.from_iterable(iterable, lambda a, b: abs(b - a),
                                           validate=False)
        self.assertEqual(res, exp)

    def test_from_iterable_validate_non_hollow(self):
        iterable = (x for x in range(4))
        with self.assertRaises(DistanceMatrixError):
            DistanceMatrix.from_iterable(iterable, lambda a, b: 1)

    def test_from_iterable_validate_false_non_symmetric(self):
        exp = DistanceMatrix([[0, 1, 2, 3],
                              [1, 0, 1, 2],
                              [2, 1, 0, 1],
                              [3, 2, 1, 0]])
        res = DistanceMatrix.from_iterable((x for x in range(4)),
                                           lambda a, b: a - b,
                                           validate=False)
        self.assertEqual(res, exp)

    def test_from_iterable_validate_asym(self):
        iterable = (x for x in range(4))
        with self.assertRaises(DistanceMatrixError):
            DistanceMatrix.from_iterable(iterable, lambda a, b: b - a)

    def test_from_iterable_with_key(self):
        iterable = (x for x in range(4))

        exp = DistanceMatrix([[0, 1, 2, 3],
                              [1, 0, 1, 2],
                              [2, 1, 0, 1],
                              [3, 2, 1, 0]], ['0', '1', '4', '9'])
        res = DistanceMatrix.from_iterable(iterable, lambda a, b: abs(b - a),
                                           key=lambda x: str(x**2))
        self.assertEqual(res, exp)

    def test_from_iterable_empty(self):
        with self.assertRaises(DissimilarityMatrixError):
            DistanceMatrix.from_iterable([], lambda x: x)

    def test_from_iterable_single(self):
        exp = DistanceMatrix([[0]])
        res = DistanceMatrix.from_iterable(["boo"], lambda a, b: 0)
        self.assertEqual(res, exp)

    def test_from_iterable_with_keys(self):
        iterable = (x for x in range(4))

        exp = DistanceMatrix([[0, 1, 2, 3],
                              [1, 0, 1, 2],
                              [2, 1, 0, 1],
                              [3, 2, 1, 0]], ['0', '1', '4', '9'])
        res = DistanceMatrix.from_iterable(iterable, lambda a, b: abs(b - a),
                                           keys=iter(['0', '1', '4', '9']))
        self.assertEqual(res, exp)

    def test_from_iterable_with_key_and_keys(self):
        iterable = (x for x in range(4))
        with self.assertRaises(ValueError):
            DistanceMatrix.from_iterable(iterable, lambda a, b: abs(b - a),
                                         key=str, keys=['1', '2', '3', '4'])

    def test_from_iterable_scipy_hamming_metric_with_metadata(self):
        # test for #1254
        seqs = [
            Sequence('ACGT'),
            Sequence('ACGA', metadata={'id': 'seq1'}),
            Sequence('AAAA', metadata={'id': 'seq2'}),
            Sequence('AAAA', positional_metadata={'qual': range(4)})
        ]

        exp = DistanceMatrix([
            [0, 0.25, 0.75, 0.75],
            [0.25, 0.0, 0.5, 0.5],
            [0.75, 0.5, 0.0, 0.0],
            [0.75, 0.5, 0.0, 0.0]], ['a', 'b', 'c', 'd'])

        dm = DistanceMatrix.from_iterable(
            seqs,
            metric=scipy.spatial.distance.hamming,
            keys=['a', 'b', 'c', 'd'])

        self.assertEqual(dm, exp)

    def test_from_iterable_skbio_hamming_metric_with_metadata(self):
        # test for #1254
        seqs = [
            Sequence('ACGT'),
            Sequence('ACGA', metadata={'id': 'seq1'}),
            Sequence('AAAA', metadata={'id': 'seq2'}),
            Sequence('AAAA', positional_metadata={'qual': range(4)})
        ]

        exp = DistanceMatrix([
            [0, 0.25, 0.75, 0.75],
            [0.25, 0.0, 0.5, 0.5],
            [0.75, 0.5, 0.0, 0.0],
            [0.75, 0.5, 0.0, 0.0]], ['a', 'b', 'c', 'd'])

        dm = DistanceMatrix.from_iterable(
            seqs,
            metric=skbio.sequence.distance.hamming,
            keys=['a', 'b', 'c', 'd'])

        self.assertEqual(dm, exp)

    def test_condensed_form(self):
        for dm, condensed in zip(self.dms, self.dm_condensed_forms):
            obs = dm.condensed_form()
            self.assertTrue(np.array_equal(obs, condensed))

    def test_permute_condensed(self):
        # Can't really permute a 1x1 or 2x2...
        for _ in range(2):
            obs = self.dm_1x1.permute(condensed=True)
            npt.assert_equal(obs, np.array([]))

        for _ in range(2):
            obs = self.dm_2x2.permute(condensed=True)
            npt.assert_equal(obs, np.array([0.123]))

        dm_copy = self.dm_3x3.copy()

        np.random.seed(0)

        obs = self.dm_3x3.permute(condensed=True)
        npt.assert_equal(obs, np.array([12.0, 4.2, 0.01]))

        obs = self.dm_3x3.permute(condensed=True)
        npt.assert_equal(obs, np.array([4.2, 12.0, 0.01]))

        # Ensure dm hasn't changed after calling permute() on it a couple of
        # times.
        self.assertEqual(self.dm_3x3, dm_copy)

    def test_permute_not_condensed(self):
        obs = self.dm_1x1.permute()
        self.assertEqual(obs, self.dm_1x1)
        self.assertFalse(obs is self.dm_1x1)

        obs = self.dm_2x2.permute()
        self.assertEqual(obs, self.dm_2x2)
        self.assertFalse(obs is self.dm_2x2)

        np.random.seed(0)

        exp = DistanceMatrix([[0, 12, 4.2],
                              [12, 0, 0.01],
                              [4.2, 0.01, 0]], self.dm_3x3.ids)
        obs = self.dm_3x3.permute()
        self.assertEqual(obs, exp)

        exp = DistanceMatrix([[0, 4.2, 12],
                              [4.2, 0, 0.01],
                              [12, 0.01, 0]], self.dm_3x3.ids)
        obs = self.dm_3x3.permute()
        self.assertEqual(obs, exp)

    def test_eq(self):
        # Compare DistanceMatrix to DissimilarityMatrix, where both have the
        # same data and IDs.
        eq_dm = DissimilarityMatrix(self.dm_3x3_data, ['a', 'b', 'c'])
        self.assertTrue(self.dm_3x3 == eq_dm)
        self.assertTrue(eq_dm == self.dm_3x3)
Esempio n. 8
0
class DistanceMatrixTests(DissimilarityMatrixTestData):
    def setUp(self):
        super(DistanceMatrixTests, self).setUp()

        self.dm_1x1 = DistanceMatrix(self.dm_1x1_data, ['a'])
        self.dm_2x2 = DistanceMatrix(self.dm_2x2_data, ['a', 'b'])
        self.dm_3x3 = DistanceMatrix(self.dm_3x3_data, ['a', 'b', 'c'])

        self.dms = [self.dm_1x1, self.dm_2x2, self.dm_3x3]
        self.dm_condensed_forms = [
            np.array([]),
            np.array([0.123]),
            np.array([0.01, 4.2, 12.0])
        ]

    def test_init_invalid_input(self):
        # Asymmetric.
        data = [[0.0, 2.0], [1.0, 0.0]]
        with self.assertRaises(DistanceMatrixError):
            DistanceMatrix(data, ['a', 'b'])

        # Ensure that the superclass validation is still being performed.
        with self.assertRaises(DissimilarityMatrixError):
            DistanceMatrix([[1, 2, 3]], ['a'])

    def test_condensed_form(self):
        for dm, condensed in zip(self.dms, self.dm_condensed_forms):
            obs = dm.condensed_form()
            self.assertTrue(np.array_equal(obs, condensed))

    def test_permute_condensed(self):
        # Can't really permute a 1x1 or 2x2...
        for _ in range(2):
            obs = self.dm_1x1.permute(condensed=True)
            npt.assert_equal(obs, np.array([]))

        for _ in range(2):
            obs = self.dm_2x2.permute(condensed=True)
            npt.assert_equal(obs, np.array([0.123]))

        dm_copy = self.dm_3x3.copy()

        np.random.seed(0)

        obs = self.dm_3x3.permute(condensed=True)
        npt.assert_equal(obs, np.array([12.0, 4.2, 0.01]))

        obs = self.dm_3x3.permute(condensed=True)
        npt.assert_equal(obs, np.array([4.2, 12.0, 0.01]))

        # Ensure dm hasn't changed after calling permute() on it a couple of
        # times.
        self.assertEqual(self.dm_3x3, dm_copy)

    def test_permute_not_condensed(self):
        obs = self.dm_1x1.permute()
        self.assertEqual(obs, self.dm_1x1)
        self.assertFalse(obs is self.dm_1x1)

        obs = self.dm_2x2.permute()
        self.assertEqual(obs, self.dm_2x2)
        self.assertFalse(obs is self.dm_2x2)

        np.random.seed(0)

        exp = DistanceMatrix([[0, 12, 4.2], [12, 0, 0.01], [4.2, 0.01, 0]],
                             self.dm_3x3.ids)
        obs = self.dm_3x3.permute()
        self.assertEqual(obs, exp)

        exp = DistanceMatrix([[0, 4.2, 12], [4.2, 0, 0.01], [12, 0.01, 0]],
                             self.dm_3x3.ids)
        obs = self.dm_3x3.permute()
        self.assertEqual(obs, exp)

    def test_eq(self):
        # Compare DistanceMatrix to DissimilarityMatrix, where both have the
        # same data and IDs.
        eq_dm = DissimilarityMatrix(self.dm_3x3_data, ['a', 'b', 'c'])
        self.assertTrue(self.dm_3x3 == eq_dm)
        self.assertTrue(eq_dm == self.dm_3x3)