コード例 #1
0
    def test_combines_two_empty(self):
        accumulator1 = variance_util.MeanCovAccumulator()
        accumulator2 = variance_util.MeanCovAccumulator()
        accumulator1.merge(accumulator2)

        self.assertIsNone(accumulator1.mean)
        self.assertIsNone(accumulator1.covariance)
コード例 #2
0
    def test_single_observations(self):
        vectors1 = np.array([[1, 2, 3]])
        vectors2 = np.array([[4, 5, 6]])
        vectors3 = np.array([[7, 8, 9]])
        accumulator1 = variance_util.MeanCovAccumulator()
        accumulator2 = variance_util.MeanCovAccumulator()
        accumulator3 = variance_util.MeanCovAccumulator()

        accumulator1.update(vectors1)
        self.assertListEqual([1, 2, 3], list(accumulator1.mean))
        self.assertListEqual([0, 0, 0, 0, 0, 0, 0, 0, 0],
                             list(accumulator1.covariance.ravel()))

        accumulator1.update(vectors2)
        self.assertListEqual([2.5, 3.5, 4.5], list(accumulator1.mean))
        self.assertListEqual([4.5, 4.5, 4.5, 4.5, 4.5, 4.5, 4.5, 4.5, 4.5],
                             list(accumulator1.covariance.ravel()))

        accumulator3.update(vectors3)
        accumulator2.merge(accumulator3)
        self.assertListEqual([7, 8, 9], list(accumulator2.mean))
        self.assertListEqual([0, 0, 0, 0, 0, 0, 0, 0, 0],
                             list(accumulator2.covariance.ravel()))

        accumulator1.merge(accumulator2)
        self.assertListEqual([4, 5, 6], list(accumulator1.mean))
        self.assertListEqual([9, 9, 9, 9, 9, 9, 9, 9, 9],
                             list(accumulator1.covariance.ravel()))
コード例 #3
0
    def test_merges_random_array(self, array_size, distribution_mean,
                                 distribution_variance, num_vectors):
        rng = np.random.default_rng(4444444)
        vectors = []
        for _ in range(num_vectors):
            vector = rng.standard_normal(array_size) * np.sqrt(
                distribution_variance) + distribution_mean
            vectors.append(vector)
        vectors = np.asarray(vectors)

        expected_mean = np.mean(vectors, axis=0)
        expected_covariance = np.cov(vectors, rowvar=False).ravel()

        # Check a variety of splits of the data.
        for split in range(0, vectors.size, 1 + int(vectors.size / 100)):
            accumulator1 = variance_util.MeanCovAccumulator()
            accumulator1.update(vectors[:split])
            accumulator2 = variance_util.MeanCovAccumulator()
            accumulator2.update(vectors[split:])
            accumulator1.merge(accumulator2)
            actual_mean = accumulator1.mean
            actual_covariance = accumulator1.covariance.ravel()

            self.assertEqual(expected_mean.size, actual_mean.size)
            self.assertEqual(expected_covariance.size, actual_covariance.size)
            for expected, actual in zip(expected_mean, actual_mean):
                self.assertAlmostEqual(expected, actual)
            for expected, actual in zip(expected_covariance,
                                        actual_covariance):
                self.assertAlmostEqual(expected, actual)
コード例 #4
0
    def test_combines_non_empty_empty(self):
        vectors = np.array([[-1, 3, 6], [2, -5, 8], [4, 7, -9]])
        accumulator1 = variance_util.MeanCovAccumulator()
        accumulator2 = variance_util.MeanCovAccumulator()
        accumulator2.update(vectors)
        accumulator2.merge(accumulator1)
        expected_mean = list(np.mean(vectors, axis=0))
        expected_covariance = list(np.cov(vectors, rowvar=False).ravel())
        actual_mean = list(accumulator2.mean)
        actual_covariance = list(accumulator2.covariance.ravel())

        self.assertListEqual(expected_mean, actual_mean)
        self.assertListEqual(expected_covariance, actual_covariance)
コード例 #5
0
    def test_update_random_array(self, array_size, distribution_mean,
                                 distribution_variance, num_vectors):

        rng = np.random.default_rng(4444444)
        vectors = []
        for _ in range(num_vectors):
            vector = rng.standard_normal(array_size) * np.sqrt(
                distribution_variance) + distribution_mean
            vectors.append(vector)
        vectors = np.asarray(vectors)
        accumulator = variance_util.MeanCovAccumulator()

        # Iterate over chunks updating - array_size should be divisible by 10.
        batch_size = 10
        for idx in range(0, vectors.size, batch_size):
            accumulator.update(vectors[idx:idx + batch_size])

        expected_mean = np.mean(vectors, axis=0)
        expected_covariance = np.cov(vectors, rowvar=False).ravel()
        actual_mean = accumulator.mean
        actual_covariance = accumulator.covariance.ravel()

        self.assertEqual(expected_mean.size, actual_mean.size)
        self.assertEqual(expected_covariance.size, actual_covariance.size)
        for expected, actual in zip(expected_mean, actual_mean):
            self.assertAlmostEqual(expected, actual)
        for expected, actual in zip(expected_covariance, actual_covariance):
            self.assertAlmostEqual(expected, actual)
コード例 #6
0
    def test_initialize_from_array(self, vectors):
        accumulator = variance_util.MeanCovAccumulator()
        accumulator.update(vectors)
        expected_mean = np.mean(vectors, axis=0)
        expected_covariance = np.cov(vectors, rowvar=False).ravel()
        actual_mean = accumulator.mean
        actual_covariance = accumulator.covariance.ravel()

        self.assertEqual(expected_mean.size, actual_mean.size)
        self.assertEqual(expected_covariance.size, actual_covariance.size)
        for expected, actual in zip(expected_mean, actual_mean):
            self.assertAlmostEqual(expected, actual)
        for expected, actual in zip(expected_covariance, actual_covariance):
            self.assertAlmostEqual(expected, actual)