def test_combines_two_empty(self): accumulator1 = variance_util.MeanCovAccumulator() accumulator2 = variance_util.MeanCovAccumulator() accumulator1.merge(accumulator2) self.assertIsNone(accumulator1.mean) self.assertIsNone(accumulator1.covariance)
def test_single_observations(self): vectors1 = np.array([[1, 2, 3]]) vectors2 = np.array([[4, 5, 6]]) vectors3 = np.array([[7, 8, 9]]) accumulator1 = variance_util.MeanCovAccumulator() accumulator2 = variance_util.MeanCovAccumulator() accumulator3 = variance_util.MeanCovAccumulator() accumulator1.update(vectors1) self.assertListEqual([1, 2, 3], list(accumulator1.mean)) self.assertListEqual([0, 0, 0, 0, 0, 0, 0, 0, 0], list(accumulator1.covariance.ravel())) accumulator1.update(vectors2) self.assertListEqual([2.5, 3.5, 4.5], list(accumulator1.mean)) self.assertListEqual([4.5, 4.5, 4.5, 4.5, 4.5, 4.5, 4.5, 4.5, 4.5], list(accumulator1.covariance.ravel())) accumulator3.update(vectors3) accumulator2.merge(accumulator3) self.assertListEqual([7, 8, 9], list(accumulator2.mean)) self.assertListEqual([0, 0, 0, 0, 0, 0, 0, 0, 0], list(accumulator2.covariance.ravel())) accumulator1.merge(accumulator2) self.assertListEqual([4, 5, 6], list(accumulator1.mean)) self.assertListEqual([9, 9, 9, 9, 9, 9, 9, 9, 9], list(accumulator1.covariance.ravel()))
def test_merges_random_array(self, array_size, distribution_mean, distribution_variance, num_vectors): rng = np.random.default_rng(4444444) vectors = [] for _ in range(num_vectors): vector = rng.standard_normal(array_size) * np.sqrt( distribution_variance) + distribution_mean vectors.append(vector) vectors = np.asarray(vectors) expected_mean = np.mean(vectors, axis=0) expected_covariance = np.cov(vectors, rowvar=False).ravel() # Check a variety of splits of the data. for split in range(0, vectors.size, 1 + int(vectors.size / 100)): accumulator1 = variance_util.MeanCovAccumulator() accumulator1.update(vectors[:split]) accumulator2 = variance_util.MeanCovAccumulator() accumulator2.update(vectors[split:]) accumulator1.merge(accumulator2) actual_mean = accumulator1.mean actual_covariance = accumulator1.covariance.ravel() self.assertEqual(expected_mean.size, actual_mean.size) self.assertEqual(expected_covariance.size, actual_covariance.size) for expected, actual in zip(expected_mean, actual_mean): self.assertAlmostEqual(expected, actual) for expected, actual in zip(expected_covariance, actual_covariance): self.assertAlmostEqual(expected, actual)
def test_combines_non_empty_empty(self): vectors = np.array([[-1, 3, 6], [2, -5, 8], [4, 7, -9]]) accumulator1 = variance_util.MeanCovAccumulator() accumulator2 = variance_util.MeanCovAccumulator() accumulator2.update(vectors) accumulator2.merge(accumulator1) expected_mean = list(np.mean(vectors, axis=0)) expected_covariance = list(np.cov(vectors, rowvar=False).ravel()) actual_mean = list(accumulator2.mean) actual_covariance = list(accumulator2.covariance.ravel()) self.assertListEqual(expected_mean, actual_mean) self.assertListEqual(expected_covariance, actual_covariance)
def test_update_random_array(self, array_size, distribution_mean, distribution_variance, num_vectors): rng = np.random.default_rng(4444444) vectors = [] for _ in range(num_vectors): vector = rng.standard_normal(array_size) * np.sqrt( distribution_variance) + distribution_mean vectors.append(vector) vectors = np.asarray(vectors) accumulator = variance_util.MeanCovAccumulator() # Iterate over chunks updating - array_size should be divisible by 10. batch_size = 10 for idx in range(0, vectors.size, batch_size): accumulator.update(vectors[idx:idx + batch_size]) expected_mean = np.mean(vectors, axis=0) expected_covariance = np.cov(vectors, rowvar=False).ravel() actual_mean = accumulator.mean actual_covariance = accumulator.covariance.ravel() self.assertEqual(expected_mean.size, actual_mean.size) self.assertEqual(expected_covariance.size, actual_covariance.size) for expected, actual in zip(expected_mean, actual_mean): self.assertAlmostEqual(expected, actual) for expected, actual in zip(expected_covariance, actual_covariance): self.assertAlmostEqual(expected, actual)
def test_initialize_from_array(self, vectors): accumulator = variance_util.MeanCovAccumulator() accumulator.update(vectors) expected_mean = np.mean(vectors, axis=0) expected_covariance = np.cov(vectors, rowvar=False).ravel() actual_mean = accumulator.mean actual_covariance = accumulator.covariance.ravel() self.assertEqual(expected_mean.size, actual_mean.size) self.assertEqual(expected_covariance.size, actual_covariance.size) for expected, actual in zip(expected_mean, actual_mean): self.assertAlmostEqual(expected, actual) for expected, actual in zip(expected_covariance, actual_covariance): self.assertAlmostEqual(expected, actual)