def test_std(self): # Sequence of tests, defined as: # 1. Original array shape. # 2. Sequence of indexing operations to apply. tests = [ [(30, 40), []], [(30, 40), [5]], [(500, 30, 40), [slice(3, 6)]], [(500, 30, 40), [(slice(None), slice(3, 6))]], ] axis = 0 ddof = 0 for shape, cuts in tests: # Define some test data size = np.prod(shape) raw_data = np.linspace(0, 1, num=size).reshape(shape) # "Compute" the biggus standard deviation data = _AccessCounter(raw_data) array = biggus.ArrayAdapter(data) biggus_std = biggus.std(array, axis=axis, ddof=ddof) # Compute the NumPy standard deviation, and then wrap the # result as an array so we can apply biggus-style indexing. np_std_data = np.std(raw_data, axis=axis, ddof=ddof) np_std_array = biggus.ArrayAdapter(np_std_data) # Check the `std` operation doesn't actually read any data. std_array = biggus.std(array, axis=0) self.assertIsInstance(std_array, biggus.Array) self.assertTrue((data.counts == 0).all()) for keys in cuts: # Check slicing doesn't actually read any data. std_array = std_array[keys] self.assertIsInstance(std_array, biggus.Array) self.assertTrue((data.counts == 0).all()) # Update the NumPy result to match np_std_array = np_std_array[keys] # Check resolving `std_array` to a NumPy array only reads # each relevant source value once. std = std_array.ndarray() self.assertTrue((data.counts <= 1).all()) # Check the NumPy and biggus numeric values match. np_std = np_std_array.ndarray() np.testing.assert_array_almost_equal(std, np_std)
def test_sd_and_mean_of_difference(self): # MEAN(A - B) and SD(A - B) shape = (500, 30, 40) size = np.prod(shape) raw_data = np.linspace(0, 1, num=size).reshape(shape) a_counter = AccessCounter(raw_data * 3) a_array = biggus.NumpyArrayAdapter(a_counter) b_counter = AccessCounter(raw_data) b_array = biggus.NumpyArrayAdapter(b_counter) sub_array = biggus.sub(a_array, b_array) mean_array = biggus.mean(sub_array, axis=0) std_array = biggus.std(sub_array, axis=0) mean, std = biggus.ndarrays([mean_array, std_array]) # Are the resulting numbers equivalent? np.testing.assert_array_almost_equal(mean, np.mean(raw_data * 2, axis=0)) np.testing.assert_array_almost_equal(std, np.std(raw_data * 2, axis=0)) # Was the source data read the minimal number of times? # (Allow first slice of A and B to be read twice because both # `mean` and `std` operations use it to to bootstrap their # calculations.) self.assert_counts(a_counter.counts[0], [2]) self.assert_counts(a_counter.counts[1:], [1]) self.assert_counts(b_counter.counts[0], [2]) self.assert_counts(b_counter.counts[1:], [1])
def _check(self, data): array = biggus.NumpyArrayAdapter(data) result = std(array, axis=0, ddof=0).masked_array() expected = ma.std(data, axis=0, ddof=0) if expected.ndim == 0: expected = ma.asarray(expected) np.testing.assert_array_equal(result.filled(), expected.filled()) np.testing.assert_array_equal(result.mask, expected.mask)
def test_dual_aggregation(self): # Check the aggregation operations don't actually read any data. shape = (500, 30, 40) size = np.prod(shape) raw_data = np.linspace(0, 1, num=size).reshape(shape) counter = AccessCounter(raw_data) array = biggus.NumpyArrayAdapter(counter) mean_array = biggus.mean(array, axis=0) std_array = biggus.std(array, axis=0) self.assertIsInstance(mean_array, biggus.Array) self.assertIsInstance(std_array, biggus.Array) self.assertTrue((counter.counts == 0).all()) mean, std_dev = biggus.ndarrays([mean_array, std_array]) # Was the source data read just once? self.assert_counts(counter.counts, [1])
def test_dual_aggregation(self): # Check the aggregation operations don't actually read any data. shape = (500, 30, 40) size = numpy.prod(shape) raw_data = numpy.linspace(0, 1, num=size).reshape(shape) data = _AccessCounter(raw_data) array = biggus.ArrayAdapter(data) mean_array = biggus.mean(array, axis=0) std_array = biggus.std(array, axis=0) self.assertIsInstance(mean_array, biggus.Array) self.assertIsInstance(std_array, biggus.Array) self.assertTrue((data.counts == 0).all()) mean, std_dev = biggus.ndarrays([mean_array, std_array]) # The first slice is read twice because both `mean` and `std` # use it to to bootstrap their rolling calculations. self.assertTrue((data.counts[0] == 2).all()) self.assertTrue((data.counts[1:] == 1).all())
def test_dual_aggregation(self): # Check the aggregation operations don't actually read any data. shape = (500, 30, 40) size = np.prod(shape) raw_data = np.linspace(0, 1, num=size).reshape(shape) counter = AccessCounter(raw_data) array = biggus.NumpyArrayAdapter(counter) mean_array = biggus.mean(array, axis=0) std_array = biggus.std(array, axis=0) self.assertIsInstance(mean_array, biggus.Array) self.assertIsInstance(std_array, biggus.Array) self.assertTrue((counter.counts == 0).all()) mean, std_dev = biggus.ndarrays([mean_array, std_array]) # Was the source data read the minimal number of times? # (Allow first slice of A to be read twice because both # `mean` and `std` operations use it to to bootstrap their # calculations.) self.assert_counts(counter.counts[0], [2]) self.assert_counts(counter.counts[1:], [1])
def test_sd_and_mean_of_difference(self): # MEAN(A - B) and SD(A - B) shape = (500, 30, 40) size = np.prod(shape) raw_data = np.linspace(0, 1, num=size).reshape(shape) a_counter = AccessCounter(raw_data * 3) a_array = biggus.NumpyArrayAdapter(a_counter) b_counter = AccessCounter(raw_data) b_array = biggus.NumpyArrayAdapter(b_counter) sub_array = biggus.sub(a_array, b_array) mean_array = biggus.mean(sub_array, axis=0) std_array = biggus.std(sub_array, axis=0) mean, std = biggus.ndarrays([mean_array, std_array]) # Are the resulting numbers equivalent? np.testing.assert_array_almost_equal(mean, np.mean(raw_data * 2, axis=0)) np.testing.assert_array_almost_equal(std, np.std(raw_data * 2, axis=0)) # Was the source data read just once? self.assert_counts(a_counter.counts, [1]) self.assert_counts(b_counter.counts, [1])
def _compare(self, data, axis=0, ddof=0): array = biggus.ArrayAdapter(data) biggus_std = biggus.std(array, axis=axis, ddof=ddof) np_std = np.std(data, axis=axis, ddof=ddof) np.testing.assert_array_almost_equal(np_std, biggus_std.ndarray())