Beispiel #1
0
    def test_dual_mean_of_difference(self):
        # MEAN(B - A) and MEAN(C - A)
        shape = (500, 30, 40)
        size = np.prod(shape)
        raw_data = np.linspace(0, 1, num=size).reshape(shape)
        a_counter = AccessCounter(raw_data)
        a_array = biggus.NumpyArrayAdapter(a_counter)
        b_counter = AccessCounter(raw_data * 3)
        b_array = biggus.NumpyArrayAdapter(b_counter)
        c_counter = AccessCounter(raw_data * 5)
        c_array = biggus.NumpyArrayAdapter(c_counter)

        b_sub_a_array = biggus.sub(b_array, a_array)
        mean_b_sub_a_array = biggus.mean(b_sub_a_array, axis=0)
        c_sub_a_array = biggus.sub(c_array, a_array)
        mean_c_sub_a_array = biggus.mean(c_sub_a_array, axis=0)

        mean_b_sub_a, mean_c_sub_a = biggus.ndarrays(
            [mean_b_sub_a_array, mean_c_sub_a_array])

        # Are the resulting numbers equivalent?
        np.testing.assert_array_almost_equal(mean_b_sub_a,
                                             np.mean(raw_data * 2, axis=0))
        np.testing.assert_array_almost_equal(mean_c_sub_a,
                                             np.mean(raw_data * 4, axis=0))
        # Was the source data read just once?
        self.assert_counts(a_counter.counts, [1])
        self.assert_counts(b_counter.counts, [1])
        self.assert_counts(c_counter.counts, [1])
Beispiel #2
0
    def _test_elementwise(self, biggus_op, numpy_op):
        # Sequence of tests, defined as:
        #   1. Original array shape1.
        #   2. Original array shape2
        #   3. Sequence of indexing operations to apply.
        tests = [
            [(10, ), (10, ), []],
            [(30, 40), (30, 40), []],
            [(30, 40), (30, 40), (5, )],
            [(10, 30, 1), (1, 40), []],
            [(2, 3, 1), (1, 4), [slice(1, 2)]],
            [(500, 30, 40), (500, 30, 40), [slice(3, 6)]],
            [(500, 30, 40), (500, 30, 40), [(slice(None), slice(3, 6))]],
        ]
        axis = 0
        ddof = 0
        for shape1, shape2, cuts in tests:
            # Define some test data
            raw_data1 = np.linspace(0.0, 1.0, np.prod(shape1)).reshape(shape1)
            raw_data2 = np.linspace(0.2, 1.2, np.prod(shape2)).reshape(shape2)

            # Check the elementwise operation doesn't actually read any
            # data.
            data1 = AccessCounter(raw_data1)
            data2 = AccessCounter(raw_data2)
            array1 = biggus.NumpyArrayAdapter(data1)
            array2 = biggus.NumpyArrayAdapter(data2)
            op_array = biggus_op(array1, array2)
            self.assertIsInstance(op_array, biggus.Array)
            self.assertTrue((data1.counts == 0).all())
            self.assertTrue((data2.counts == 0).all())

            # Compute the NumPy elementwise operation, and then wrap the
            # result as an array so we can apply biggus-style indexing.
            numpy_op_data = numpy_op(raw_data1, raw_data2)
            numpy_op_array = biggus.NumpyArrayAdapter(numpy_op_data)

            for keys in cuts:
                # Check slicing doesn't actually read any data.
                op_array = op_array[keys]
                self.assertIsInstance(op_array, biggus.Array)
                self.assertTrue((data1.counts == 0).all())
                self.assertTrue((data2.counts == 0).all())
                # Update the NumPy result to match
                numpy_op_array = numpy_op_array[keys]

            # Check the NumPy and biggus numeric values match.
            op_result = op_array.ndarray()
            numpy_result = numpy_op_array.ndarray()
            np.testing.assert_array_equal(op_result, numpy_result)
Beispiel #3
0
    def test_mean_of_difference(self):
        # MEAN(A - B)
        shape = (500, 30, 40)
        size = np.prod(shape)
        raw_data = np.linspace(0, 1, num=size).reshape(shape)
        data = AccessCounter(raw_data * 3)
        a_array = biggus.NumpyArrayAdapter(data)
        data = AccessCounter(raw_data)
        b_array = biggus.NumpyArrayAdapter(data)

        mean_array = biggus.mean(biggus.sub(a_array, b_array), axis=0)

        mean = mean_array.ndarray()
        np.testing.assert_array_almost_equal(mean,
                                             np.mean(raw_data * 2, axis=0))
Beispiel #4
0
    def test_sd_and_mean_of_difference(self):
        # MEAN(A - B) and SD(A - B)
        shape = (500, 30, 40)
        size = np.prod(shape)
        raw_data = np.linspace(0, 1, num=size).reshape(shape)
        a_counter = AccessCounter(raw_data * 3)
        a_array = biggus.NumpyArrayAdapter(a_counter)
        b_counter = AccessCounter(raw_data)
        b_array = biggus.NumpyArrayAdapter(b_counter)

        sub_array = biggus.sub(a_array, b_array)
        mean_array = biggus.mean(sub_array, axis=0)
        std_array = biggus.std(sub_array, axis=0)
        mean, std = biggus.ndarrays([mean_array, std_array])

        # Are the resulting numbers equivalent?
        np.testing.assert_array_almost_equal(mean, np.mean(raw_data * 2,
                                                           axis=0))
        np.testing.assert_array_almost_equal(std, np.std(raw_data * 2, axis=0))
        # Was the source data read just once?
        self.assert_counts(a_counter.counts, [1])
        self.assert_counts(b_counter.counts, [1])
Beispiel #5
0
    def test_dual_aggregation(self):
        # Check the aggregation operations don't actually read any data.
        shape = (500, 30, 40)
        size = np.prod(shape)
        raw_data = np.linspace(0, 1, num=size).reshape(shape)
        counter = AccessCounter(raw_data)
        array = biggus.NumpyArrayAdapter(counter)
        mean_array = biggus.mean(array, axis=0)
        std_array = biggus.std(array, axis=0)
        self.assertIsInstance(mean_array, biggus.Array)
        self.assertIsInstance(std_array, biggus.Array)
        self.assertTrue((counter.counts == 0).all())

        mean, std_dev = biggus.ndarrays([mean_array, std_array])

        # Was the source data read just once?
        self.assert_counts(counter.counts, [1])
Beispiel #6
0
    def _test_aggregation(self, biggus_op, numpy_op, **kwargs):
        # Sequence of tests, defined as:
        #   1. Original array shape.
        #   2. Sequence of indexing operations to apply.
        tests = [
            [(10, ), []],
            [(30, 40), []],
            [(30, 40), [5]],
            [(500, 30, 40), [slice(3, 6)]],
            [(500, 30, 40), [(slice(None), slice(3, 6))]],
        ]
        for shape, cuts in tests:
            # Define some test data
            size = np.prod(shape)
            raw_data = np.linspace(0, 1, num=size).reshape(shape)

            for axis in range(len(shape)):
                # Check the aggregation operation doesn't actually read any
                # data.
                data = AccessCounter(raw_data)
                array = biggus.NumpyArrayAdapter(data)
                op_array = biggus_op(array, axis=axis, **kwargs)
                self.assertIsInstance(op_array, biggus.Array)
                self.assertTrue((data.counts == 0).all())

                # Compute the NumPy aggregation, and then wrap the result as
                # an array so we can apply biggus-style indexing.
                numpy_op_data = numpy_op(raw_data, axis=axis, **kwargs)
                numpy_op_array = biggus.NumpyArrayAdapter(numpy_op_data)

                for keys in cuts:
                    # Check slicing doesn't actually read any data.
                    op_array = op_array[keys]
                    self.assertIsInstance(op_array, biggus.Array)
                    self.assertTrue((data.counts == 0).all())
                    # Update the NumPy result to match
                    numpy_op_array = numpy_op_array[keys]

                # Check resolving `op_array` to a NumPy array only reads
                # each relevant source value once.
                op_result = op_array.ndarray()
                self.assertTrue((data.counts <= 1).all())

                # Check the NumPy and biggus numeric values match.
                numpy_result = numpy_op_array.ndarray()
                np.testing.assert_array_almost_equal(op_result, numpy_result)
Beispiel #7
0
    def test_means_across_different_axes(self):
        # MEAN(A, axis=0) and MEAN(A, axis=1)
        shape = (500, 30, 40)
        size = np.prod(shape)
        raw_data = np.linspace(0, 1, num=size).reshape(shape)
        a_counter = AccessCounter(raw_data * 3)
        a_array = biggus.NumpyArrayAdapter(a_counter)

        mean_0_array = biggus.mean(a_array, axis=0)
        mean_1_array = biggus.mean(a_array, axis=1)
        mean_0, mean_1 = biggus.ndarrays([mean_0_array, mean_1_array])

        # Are the resulting numbers equivalent?
        np.testing.assert_array_almost_equal(mean_0,
                                             np.mean(raw_data * 3, axis=0))
        np.testing.assert_array_almost_equal(mean_1,
                                             np.mean(raw_data * 3, axis=1))

        # Was the source data read the minimal number of times?
        self.assert_counts(a_counter.counts, [1])
Beispiel #8
0
    def _test_mean_with_mdtol(self, data, axis, numpy_result, mdtol=None):
        data = AccessCounter(data)
        array = biggus.NumpyArrayAdapter(data)

        # Perform aggregation.
        if mdtol is None:
            # Allow testing of default when mdtol is None.
            biggus_aggregation = biggus.mean(array, axis=axis)
        else:
            biggus_aggregation = biggus.mean(array, axis=axis, mdtol=mdtol)

        # Check the aggregation operation doesn't actually read any data.
        self.assertTrue((data.counts == 0).all())

        # Check results.
        biggus_result = biggus_aggregation.masked_array()
        # Check resolving `op_array` to a NumPy array only reads
        # each relevant source value once.
        self.assertTrue((data.counts <= 1).all())
        numpy_mask = np.ma.getmaskarray(numpy_result)
        biggus_mask = np.ma.getmaskarray(biggus_result)
        np.testing.assert_array_equal(biggus_mask, numpy_mask)
        np.testing.assert_array_equal(biggus_result[~biggus_mask].data,
                                      numpy_result[~numpy_mask].data)