def test_dual_mean_of_difference(self): # MEAN(B - A) and MEAN(C - A) shape = (500, 30, 40) size = np.prod(shape) raw_data = np.linspace(0, 1, num=size).reshape(shape) a_counter = AccessCounter(raw_data) a_array = biggus.NumpyArrayAdapter(a_counter) b_counter = AccessCounter(raw_data * 3) b_array = biggus.NumpyArrayAdapter(b_counter) c_counter = AccessCounter(raw_data * 5) c_array = biggus.NumpyArrayAdapter(c_counter) b_sub_a_array = biggus.sub(b_array, a_array) mean_b_sub_a_array = biggus.mean(b_sub_a_array, axis=0) c_sub_a_array = biggus.sub(c_array, a_array) mean_c_sub_a_array = biggus.mean(c_sub_a_array, axis=0) mean_b_sub_a, mean_c_sub_a = biggus.ndarrays( [mean_b_sub_a_array, mean_c_sub_a_array]) # Are the resulting numbers equivalent? np.testing.assert_array_almost_equal(mean_b_sub_a, np.mean(raw_data * 2, axis=0)) np.testing.assert_array_almost_equal(mean_c_sub_a, np.mean(raw_data * 4, axis=0)) # Was the source data read just once? self.assert_counts(a_counter.counts, [1]) self.assert_counts(b_counter.counts, [1]) self.assert_counts(c_counter.counts, [1])
def _test_elementwise(self, biggus_op, numpy_op): # Sequence of tests, defined as: # 1. Original array shape1. # 2. Original array shape2 # 3. Sequence of indexing operations to apply. tests = [ [(10, ), (10, ), []], [(30, 40), (30, 40), []], [(30, 40), (30, 40), (5, )], [(10, 30, 1), (1, 40), []], [(2, 3, 1), (1, 4), [slice(1, 2)]], [(500, 30, 40), (500, 30, 40), [slice(3, 6)]], [(500, 30, 40), (500, 30, 40), [(slice(None), slice(3, 6))]], ] axis = 0 ddof = 0 for shape1, shape2, cuts in tests: # Define some test data raw_data1 = np.linspace(0.0, 1.0, np.prod(shape1)).reshape(shape1) raw_data2 = np.linspace(0.2, 1.2, np.prod(shape2)).reshape(shape2) # Check the elementwise operation doesn't actually read any # data. data1 = AccessCounter(raw_data1) data2 = AccessCounter(raw_data2) array1 = biggus.NumpyArrayAdapter(data1) array2 = biggus.NumpyArrayAdapter(data2) op_array = biggus_op(array1, array2) self.assertIsInstance(op_array, biggus.Array) self.assertTrue((data1.counts == 0).all()) self.assertTrue((data2.counts == 0).all()) # Compute the NumPy elementwise operation, and then wrap the # result as an array so we can apply biggus-style indexing. numpy_op_data = numpy_op(raw_data1, raw_data2) numpy_op_array = biggus.NumpyArrayAdapter(numpy_op_data) for keys in cuts: # Check slicing doesn't actually read any data. op_array = op_array[keys] self.assertIsInstance(op_array, biggus.Array) self.assertTrue((data1.counts == 0).all()) self.assertTrue((data2.counts == 0).all()) # Update the NumPy result to match numpy_op_array = numpy_op_array[keys] # Check the NumPy and biggus numeric values match. op_result = op_array.ndarray() numpy_result = numpy_op_array.ndarray() np.testing.assert_array_equal(op_result, numpy_result)
def test_mean_of_difference(self): # MEAN(A - B) shape = (500, 30, 40) size = np.prod(shape) raw_data = np.linspace(0, 1, num=size).reshape(shape) data = AccessCounter(raw_data * 3) a_array = biggus.NumpyArrayAdapter(data) data = AccessCounter(raw_data) b_array = biggus.NumpyArrayAdapter(data) mean_array = biggus.mean(biggus.sub(a_array, b_array), axis=0) mean = mean_array.ndarray() np.testing.assert_array_almost_equal(mean, np.mean(raw_data * 2, axis=0))
def test_sd_and_mean_of_difference(self): # MEAN(A - B) and SD(A - B) shape = (500, 30, 40) size = np.prod(shape) raw_data = np.linspace(0, 1, num=size).reshape(shape) a_counter = AccessCounter(raw_data * 3) a_array = biggus.NumpyArrayAdapter(a_counter) b_counter = AccessCounter(raw_data) b_array = biggus.NumpyArrayAdapter(b_counter) sub_array = biggus.sub(a_array, b_array) mean_array = biggus.mean(sub_array, axis=0) std_array = biggus.std(sub_array, axis=0) mean, std = biggus.ndarrays([mean_array, std_array]) # Are the resulting numbers equivalent? np.testing.assert_array_almost_equal(mean, np.mean(raw_data * 2, axis=0)) np.testing.assert_array_almost_equal(std, np.std(raw_data * 2, axis=0)) # Was the source data read just once? self.assert_counts(a_counter.counts, [1]) self.assert_counts(b_counter.counts, [1])
def test_dual_aggregation(self): # Check the aggregation operations don't actually read any data. shape = (500, 30, 40) size = np.prod(shape) raw_data = np.linspace(0, 1, num=size).reshape(shape) counter = AccessCounter(raw_data) array = biggus.NumpyArrayAdapter(counter) mean_array = biggus.mean(array, axis=0) std_array = biggus.std(array, axis=0) self.assertIsInstance(mean_array, biggus.Array) self.assertIsInstance(std_array, biggus.Array) self.assertTrue((counter.counts == 0).all()) mean, std_dev = biggus.ndarrays([mean_array, std_array]) # Was the source data read just once? self.assert_counts(counter.counts, [1])
def _test_aggregation(self, biggus_op, numpy_op, **kwargs): # Sequence of tests, defined as: # 1. Original array shape. # 2. Sequence of indexing operations to apply. tests = [ [(10, ), []], [(30, 40), []], [(30, 40), [5]], [(500, 30, 40), [slice(3, 6)]], [(500, 30, 40), [(slice(None), slice(3, 6))]], ] for shape, cuts in tests: # Define some test data size = np.prod(shape) raw_data = np.linspace(0, 1, num=size).reshape(shape) for axis in range(len(shape)): # Check the aggregation operation doesn't actually read any # data. data = AccessCounter(raw_data) array = biggus.NumpyArrayAdapter(data) op_array = biggus_op(array, axis=axis, **kwargs) self.assertIsInstance(op_array, biggus.Array) self.assertTrue((data.counts == 0).all()) # Compute the NumPy aggregation, and then wrap the result as # an array so we can apply biggus-style indexing. numpy_op_data = numpy_op(raw_data, axis=axis, **kwargs) numpy_op_array = biggus.NumpyArrayAdapter(numpy_op_data) for keys in cuts: # Check slicing doesn't actually read any data. op_array = op_array[keys] self.assertIsInstance(op_array, biggus.Array) self.assertTrue((data.counts == 0).all()) # Update the NumPy result to match numpy_op_array = numpy_op_array[keys] # Check resolving `op_array` to a NumPy array only reads # each relevant source value once. op_result = op_array.ndarray() self.assertTrue((data.counts <= 1).all()) # Check the NumPy and biggus numeric values match. numpy_result = numpy_op_array.ndarray() np.testing.assert_array_almost_equal(op_result, numpy_result)
def test_means_across_different_axes(self): # MEAN(A, axis=0) and MEAN(A, axis=1) shape = (500, 30, 40) size = np.prod(shape) raw_data = np.linspace(0, 1, num=size).reshape(shape) a_counter = AccessCounter(raw_data * 3) a_array = biggus.NumpyArrayAdapter(a_counter) mean_0_array = biggus.mean(a_array, axis=0) mean_1_array = biggus.mean(a_array, axis=1) mean_0, mean_1 = biggus.ndarrays([mean_0_array, mean_1_array]) # Are the resulting numbers equivalent? np.testing.assert_array_almost_equal(mean_0, np.mean(raw_data * 3, axis=0)) np.testing.assert_array_almost_equal(mean_1, np.mean(raw_data * 3, axis=1)) # Was the source data read the minimal number of times? self.assert_counts(a_counter.counts, [1])
def _test_mean_with_mdtol(self, data, axis, numpy_result, mdtol=None): data = AccessCounter(data) array = biggus.NumpyArrayAdapter(data) # Perform aggregation. if mdtol is None: # Allow testing of default when mdtol is None. biggus_aggregation = biggus.mean(array, axis=axis) else: biggus_aggregation = biggus.mean(array, axis=axis, mdtol=mdtol) # Check the aggregation operation doesn't actually read any data. self.assertTrue((data.counts == 0).all()) # Check results. biggus_result = biggus_aggregation.masked_array() # Check resolving `op_array` to a NumPy array only reads # each relevant source value once. self.assertTrue((data.counts <= 1).all()) numpy_mask = np.ma.getmaskarray(numpy_result) biggus_mask = np.ma.getmaskarray(biggus_result) np.testing.assert_array_equal(biggus_mask, numpy_mask) np.testing.assert_array_equal(biggus_result[~biggus_mask].data, numpy_result[~numpy_mask].data)