Ejemplo n.º 1
0
class TestUnit(TestCase):
    @given(get_array_1d2d())  # type: ignore
    def test_mloc(self, array: np.ndarray) -> None:

        x = util.mloc(array)
        self.assertTrue(isinstance(x, int))

    @given(get_array_1d2d())  # type: ignore
    def test_shape_filter(self, shape: np.ndarray) -> None:
        self.assertTrue(len(util.shape_filter(shape)), 2)

    @given(get_dtype_pairs())  # type: ignore
    def test_resolve_dtype(self, dtype_pair: tp.Tuple[np.dtype,
                                                      np.dtype]) -> None:

        x = util.resolve_dtype(*dtype_pair)
        self.assertTrue(isinstance(x, np.dtype))

    @given(get_dtypes(min_size=1))  # type: ignore
    def test_resolve_dtype_iter(self, dtypes: tp.Iterable[np.dtype]) -> None:

        x = util.resolve_dtype_iter(dtypes)
        self.assertTrue(isinstance(x, np.dtype))

    @given(get_labels(min_size=1))  # type: ignore
    def test_resolve_type_iter(self, objects: tp.Iterable[object]) -> None:

        known_types = set(
            (None, type(None), bool, str, object, int, float, complex,
             datetime.date, datetime.datetime, fractions.Fraction))
        resolved, has_tuple, values_post = util.resolve_type_iter(objects)
        self.assertTrue(resolved in known_types)

    @given(get_arrays_2d_aligned_columns())  # type: ignore
    def test_concat_resolved_axis_0(self, arrays: tp.List[np.ndarray]) -> None:
        array = util.concat_resolved(arrays, axis=0)
        self.assertEqual(array.ndim, 2)
        self.assertEqual(array.dtype,
                         util.resolve_dtype_iter((x.dtype for x in arrays)))

    @given(get_arrays_2d_aligned_rows())  # type: ignore
    def test_concat_resolved_axis_1(self, arrays: tp.List[np.ndarray]) -> None:
        array = util.concat_resolved(arrays, axis=1)
        self.assertEqual(array.ndim, 2)
        self.assertEqual(array.dtype,
                         util.resolve_dtype_iter((x.dtype for x in arrays)))

    @given(get_dtype(), get_shape_1d2d(), get_value())  # type: ignore
    def test_full_or_fill(self, dtype: np.dtype,
                          shape: tp.Union[tp.Tuple[int], tp.Tuple[int, int]],
                          value: object) -> None:
        array = util.full_for_fill(dtype, shape, fill_value=value)
        self.assertTrue(array.shape == shape)
        if isinstance(value, (float, complex)) and np.isnan(value):
            pass
        else:
            self.assertTrue(value in array)

    @given(get_dtype())  # type: ignore
    def test_dtype_to_na(self, dtype: util.DtypeSpecifier) -> None:
        post = util.dtype_to_na(dtype)
        self.assertTrue(post in {0, False, None, '', np.nan, util.NAT})

    @given(get_array_1d2d(dtype_group=DTGroup.NUMERIC))  # type: ignore
    def test_ufunc_axis_skipna(self, array: np.ndarray) -> None:

        has_na = util.isna_array(array).any()

        for nt in UFUNC_AXIS_SKIPNA.values():
            ufunc = nt.ufunc
            ufunc_skipna = nt.ufunc_skipna
            # dtypes = nt.dtypes
            # composable = nt.composable
            # doc = nt.doc_header
            # size_one_unity = nt.size_one_unity

            with np.errstate(over='ignore', under='ignore', divide='ignore'):

                post = util.ufunc_axis_skipna(array=array,
                                              skipna=True,
                                              axis=0,
                                              ufunc=ufunc,
                                              ufunc_skipna=ufunc_skipna)
                if array.ndim == 2:
                    self.assertTrue(post.ndim == 1)

    @given(get_array_1d2d())  # type: ignore
    def test_ufunc_unique(self, array: np.ndarray) -> None:
        post = util.ufunc_unique(array, axis=0)
        self.assertTrue(len(post) <= array.shape[0])

    @given(get_array_1d(min_size=1), st.integers())  # type: ignore
    def test_roll_1d(self, array: np.ndarray, shift: int) -> None:
        post = util.roll_1d(array, shift)
        self.assertEqual(len(post), len(array))
        self.assertEqualWithNaN(array[-(shift % len(array))], post[0])

    @given(get_array_2d(min_rows=1, min_columns=1),
           st.integers())  # type: ignore
    def test_roll_2d(self, array: np.ndarray, shift: int) -> None:
        for axis in (0, 1):
            post = util.roll_2d(array, shift=shift, axis=axis)
            self.assertEqual(post.shape, array.shape)

            start = -(shift % array.shape[axis])

            if axis == 0:
                a = array[start]
                b = post[0]
            else:
                a = array[:, start]
                b = post[:, 0]

            self.assertAlmostEqualValues(a, b)

    @given(get_array_1d(dtype_group=DTGroup.OBJECT))  # type: ignore
    def test_iterable_to_array_a(self, array: np.ndarray) -> None:
        values = array.tolist()
        post, _ = util.iterable_to_array(values)
        self.assertAlmostEqualValues(post, values)

        # explicitly giving object dtype
        post, _ = util.iterable_to_array(values, dtype=util.DTYPE_OBJECT)
        self.assertAlmostEqualValues(post, values)

    @given(get_labels())  # type: ignore
    def test_iterable_to_array_b(self, labels: tp.Iterable[tp.Any]) -> None:
        post, _ = util.iterable_to_array(labels)
        self.assertAlmostEqualValues(post, labels)
        self.assertTrue(isinstance(post, np.ndarray))

    @given(st.slices(10))  # type: ignore #pylint: disable=E1120
    def test_slice_to_ascending_slice(self, key: slice) -> None:

        post_key = util.slice_to_ascending_slice(key, size=10)
        self.assertEqual(set(range(*key.indices(10))),
                         set(range(*post_key.indices(10))))

# to_datetime64
# to_timedelta64
# key_to_datetime_key

    @given(get_array_1d2d())  # type: ignore
    def test_array_to_groups_and_locations(self, array: np.ndarray) -> None:

        groups, locations = util.array_to_groups_and_locations(array, 0)

        if len(array) > 0:
            self.assertTrue(len(groups) >= 1)

        # always 1dm locations
        self.assertTrue(locations.ndim == 1)
        self.assertTrue(len(np.unique(locations)) == len(groups))

    @given(get_array_1d2d())  # type: ignore
    def test_isna_array(self, array: np.ndarray) -> None:

        post = util.isna_array(array)
        self.assertTrue(post.dtype == bool)

        values = np.ravel(array)
        count_na = sum(util.isna_element(x) for x in values)

        self.assertTrue(np.ravel(post).sum() == count_na)

    @given(get_array_1d(dtype_group=DTGroup.BOOL))  # type: ignore
    def test_binary_transition(self, array: np.ndarray) -> None:
        post = util.binary_transition(array)

        # could be 32 via result of np.nonzero
        self.assertTrue(post.dtype in (np.int32, np.int64))

        # if no True in original array, result will be empty
        if array.sum() == 0:
            self.assertTrue(len(post) == 0)
        # if all True, result is empty
        elif array.sum() == len(array):
            self.assertTrue(len(post) == 0)
        else:
            # the post selection shold always be indices that are false
            self.assertTrue(array[post].sum() == 0)

    @given(get_array_1d2d())  # type: ignore
    def test_array_to_duplicated(self, array: np.ndarray) -> None:
        if array.ndim == 2:
            for axis in (0, 1):
                post = util.array_to_duplicated(array, axis=axis)
                if axis == 0:
                    unique_count = len(set(tuple(x) for x in array))
                else:
                    unique_count = len(
                        set(tuple(array[:, i]) for i in range(array.shape[1])))
                if unique_count < array.shape[axis]:
                    self.assertTrue(post.sum() > 0)
        else:
            post = util.array_to_duplicated(array)
            # if not all value are unique, we must have some duplicated
            if len(set(array)) < len(array):
                self.assertTrue(post.sum() > 0)

        self.assertTrue(post.dtype == bool)

    @given(get_array_1d2d())  # type: ignore
    def test_array_shift(self, array: np.ndarray) -> None:

        for shift in (-1, 1):
            for wrap in (True, False):

                tests = []
                post1 = util.array_shift(array=array,
                                         shift=shift,
                                         axis=0,
                                         wrap=wrap)
                tests.append(post1)

                if array.ndim == 2:
                    post2 = util.array_shift(array=array,
                                             shift=shift,
                                             axis=1,
                                             wrap=wrap)
                    tests.append(post2)

                for post in tests:
                    self.assertTrue(array.shape == post.shape)

                    # type is only always maintained if we are wrapping
                    if wrap:
                        self.assertTrue(array.dtype == post.dtype)

    @given(st.lists(get_array_1d(), min_size=2, max_size=2))  # type: ignore
    def test_union1d(self, arrays: tp.Sequence[np.ndarray]) -> None:
        post = util.union1d(arrays[0], arrays[1], assume_unique=False)
        self.assertTrue(post.ndim == 1)
        # nan values in complex numbers make direct comparison tricky
        self.assertTrue(len(post) == len(set(arrays[0]) | set(arrays[1])))

        # complex results are tricky to compare after forming sets
        if (post.dtype.kind not in ('O', 'M', 'm', 'c', 'f')
                and not np.isnan(post).any()):
            self.assertTrue(set(post) == (set(arrays[0]) | set(arrays[1])))

    @given(st.lists(get_array_1d(), min_size=2, max_size=2))  # type: ignore
    def test_intersect1d(self, arrays: tp.Sequence[np.ndarray]) -> None:
        post = util.intersect1d(arrays[0], arrays[1], assume_unique=False)
        self.assertTrue(post.ndim == 1)
        # nan values in complex numbers make direct comparison tricky
        self.assertTrue(len(post) == len(set(arrays[0]) & set(arrays[1])))

        if (post.dtype.kind not in ('O', 'M', 'm', 'c', 'f')
                and not np.isnan(post).any()):
            self.assertTrue(set(post) == (set(arrays[0]) & set(arrays[1])))

    @given(get_arrays_2d_aligned_columns(min_size=2,
                                         max_size=2))  # type: ignore
    def test_union2d(self, arrays: tp.Sequence[np.ndarray]) -> None:
        post = util.union2d(arrays[0], arrays[1], assume_unique=False)
        if post.dtype == object:
            self.assertTrue(post.ndim == 1)
        else:
            self.assertTrue(post.ndim == 2)

        self.assertTrue(
            len(post) == len(
                set(util.array2d_to_tuples(arrays[0]))
                | set(util.array2d_to_tuples(arrays[1]))))

    @given(get_arrays_2d_aligned_columns(min_size=2,
                                         max_size=2))  # type: ignore
    def test_intersect2d(self, arrays: tp.Sequence[np.ndarray]) -> None:
        post = util.intersect2d(arrays[0], arrays[1], assume_unique=False)
        if post.dtype == object:
            self.assertTrue(post.ndim == 1)
        else:
            self.assertTrue(post.ndim == 2)

        self.assertTrue(
            len(post) == len(
                set(util.array2d_to_tuples(arrays[0]))
                & set(util.array2d_to_tuples(arrays[1]))))

    @given(get_arrays_2d_aligned_columns())  # type: ignore
    def test_array_set_ufunc_many(self,
                                  arrays: tp.Sequence[np.ndarray]) -> None:

        for union in (True, False):
            post = util.ufunc_set_iter(arrays, union=union)
            if post.dtype == object:
                # returned object arrays might be 2D or 1D of tuples
                self.assertTrue(post.ndim in (1, 2))
            else:
                self.assertTrue(post.ndim == 2)
Ejemplo n.º 2
0
class TestUnit(TestCase):

    @given(sfst.get_array_1d(dtype_group=sfst.DTGroup.NUMERIC, max_size=100))
    def test_rank_1d_ordinal(self, value: np.ndarray) -> None:
        a1 = rankdata(value, method='ordinal')
        a2 = rank_1d(value, method='ordinal', start=1)
        self.assertEqual(a1.tolist(), a2.tolist())

        if len(value):
            a3 = rank_1d(value, method='ordinal')
            self.assertEqual(a3.min(), 0)

            a4 = rank_1d(value, method='ordinal', ascending=False)
            self.assertEqual(a4.min(), 0)

    @given(sfst.get_array_1d(dtype_group=sfst.DTGroup.NUMERIC, max_size=100))
    def test_rank_1d_dense(self, value: np.ndarray) -> None:
        # cannot compare values with NaN as scipy uses quicksort
        if np.isnan(value).any():
            return
        a1 = rankdata(value, method='dense')
        a2 = rank_1d(value, method='dense', start=1)
        self.assertEqual(a1.tolist(), a2.tolist())

        if len(value):
            a3 = rank_1d(value, method='dense')
            self.assertEqual(a3.min(), 0)

            a4 = rank_1d(value, method='dense', ascending=False)
            self.assertEqual(a4.min(), 0)

    @given(sfst.get_array_1d(dtype_group=sfst.DTGroup.NUMERIC, max_size=100))
    def test_rank_1d_min(self, value: np.ndarray) -> None:
        # cannot compare values with NaN as scipy uses quicksort
        if np.isnan(value).any():
            return
        a1 = rankdata(value, method='min')
        a2 = rank_1d(value, method='min', start=1)
        self.assertEqual(a1.tolist(), a2.tolist())

        if len(value):
            a3 = rank_1d(value, method='min')
            self.assertEqual(a3.min(), 0)

            a4 = rank_1d(value, method='min', ascending=False)
            self.assertEqual(a4.min(), 0)

    @given(sfst.get_array_1d(dtype_group=sfst.DTGroup.NUMERIC, max_size=100))
    def test_rank_1d_max(self, value: np.ndarray) -> None:
        # cannot compare values with NaN as scipy uses quicksort
        if np.isnan(value).any():
            return
        a1 = rankdata(value, method='max')
        a2 = rank_1d(value, method='max', start=1)
        self.assertEqual(a1.tolist(), a2.tolist())

    @given(sfst.get_array_1d(dtype_group=sfst.DTGroup.NUMERIC, max_size=100))
    def test_rank_1d_average(self, value: np.ndarray) -> None:
        # cannot compare values with NaN as scipy uses quicksort
        if np.isnan(value).any():
            return
        a1 = rankdata(value, method='average')
        a2 = rank_1d(value, method='mean', start=1)
        self.assertEqual(a1.tolist(), a2.tolist())

    @given(sfst.get_array_2d(dtype_group=sfst.DTGroup.NUMERIC, max_rows=20, max_columns=20))
    def test_rank_2d_ordinal(self, value: np.ndarray) -> None:
        for axis in (0, 1):
            a1 = rankdata(value, method='ordinal', axis=axis)
            a2 = rank_2d(value, method='ordinal', start=1, axis=axis)
            self.assertEqual(a1.tolist(), a2.tolist())

    @given(sfst.get_array_2d(dtype_group=sfst.DTGroup.NUMERIC, max_rows=20, max_columns=20))
    def test_rank_2d_dense(self, value: np.ndarray) -> None:
        # cannot compare values with NaN as scipy uses quicksort
        if np.isnan(value).any():
            return
        for axis in (0, 1):
            a1 = rankdata(value, method='dense', axis=axis)
            a2 = rank_2d(value, method='dense', start=1, axis=axis)
            self.assertEqual(a1.tolist(), a2.tolist())

    @given(sfst.get_array_2d(dtype_group=sfst.DTGroup.NUMERIC, max_rows=20, max_columns=20))
    def test_rank_2d_min(self, value: np.ndarray) -> None:
        # cannot compare values with NaN as scipy uses quicksort
        if np.isnan(value).any():
            return
        for axis in (0, 1):
            a1 = rankdata(value, method='min', axis=axis)
            a2 = rank_2d(value, method='min', start=1, axis=axis)
            self.assertEqual(a1.tolist(), a2.tolist())

    @given(sfst.get_array_2d(dtype_group=sfst.DTGroup.NUMERIC, max_rows=20, max_columns=20))
    def test_rank_2d_max(self, value: np.ndarray) -> None:
        # cannot compare values with NaN as scipy uses quicksort
        if np.isnan(value).any():
            return
        for axis in (0, 1):
            a1 = rankdata(value, method='max', axis=axis)
            a2 = rank_2d(value, method='max', start=1, axis=axis)
            self.assertEqual(a1.tolist(), a2.tolist())

    @given(sfst.get_array_2d(dtype_group=sfst.DTGroup.NUMERIC, max_rows=20, max_columns=20))
    def test_rank_2d_average(self, value: np.ndarray) -> None:
        # cannot compare values with NaN as scipy uses quicksort
        if np.isnan(value).any():
            return
        for axis in (0, 1):
            a1 = rankdata(value, method='average', axis=axis)
            a2 = rank_2d(value, method='mean', start=1, axis=axis)
            self.assertEqual(a1.tolist(), a2.tolist())
Ejemplo n.º 3
0
class TestUnit(TestCase):


    @given(get_array_1d2d())  # type: ignore
    def test_mloc(self, array: np.ndarray) -> None:

        x = util.mloc(array)
        self.assertTrue(isinstance(x, int))


    @given(get_dtype_pairs())  # type: ignore
    def test_resolve_dtype(self, dtype_pair: tp.Tuple[np.dtype, np.dtype]) -> None:

        x = util.resolve_dtype(*dtype_pair)
        self.assertTrue(isinstance(x, np.dtype))

    @given(get_dtypes(min_size=1))  # type: ignore
    def test_resolve_dtype_iter(self, dtypes: tp.Iterable[np.dtype]) -> None:

        x = util.resolve_dtype_iter(dtypes)
        self.assertTrue(isinstance(x, np.dtype))

    @given(get_labels(min_size=1))  # type: ignore
    def test_resolve_type_iter(self, objects: tp.Iterable[object]) -> None:

        known_types = set((
                None,
                type(None),
                bool,
                str,
                object,
                int,
                float,
                complex,
                datetime.date,
                datetime.datetime,
                fractions.Fraction
                ))
        resolved, has_tuple, values_post = util.resolve_type_iter(objects)
        self.assertTrue(resolved in known_types)



    @given(get_arrays_2d_aligned_columns())  # type: ignore
    def test_concat_resolved_axis_0(self, arrays: tp.List[np.ndarray]) -> None:
        array = util.concat_resolved(arrays, axis=0)
        self.assertEqual(array.ndim, 2)
        self.assertEqual(array.dtype, util.resolve_dtype_iter((x.dtype for x in arrays)))

    @given(get_arrays_2d_aligned_rows())  # type: ignore
    def test_concat_resolved_axis_1(self, arrays: tp.List[np.ndarray]) -> None:
        array = util.concat_resolved(arrays, axis=1)
        self.assertEqual(array.ndim, 2)
        self.assertEqual(array.dtype, util.resolve_dtype_iter((x.dtype for x in arrays)))

    @given(get_dtype(), get_shape_1d2d(), get_value())  # type: ignore
    def test_full_or_fill(self,
            dtype: np.dtype,
            shape: tp.Union[tp.Tuple[int], tp.Tuple[int, int]],
            value: object) -> None:
        array = util.full_for_fill(dtype, shape, fill_value=value)
        self.assertTrue(array.shape == shape)
        if isinstance(value, (float, complex)) and np.isnan(value):
            pass
        else:
            self.assertTrue(value in array)

    @given(get_dtype())  # type: ignore
    def test_dtype_to_na(self, dtype: util.DtypeSpecifier) -> None:
        post = util.dtype_to_na(dtype)
        self.assertTrue(post in {0, False, None, '', np.nan, util.NAT})


    @given(get_array_1d(min_size=1, dtype_group=DTGroup.NUMERIC)) # type: ignore
    def test_ufunc_skipna_1d(self, array: np.ndarray) -> None:

        has_na = util.isna_array(array).any()
        for ufunc, ufunc_skipna, dtype in UFUNC_AXIS_SKIPNA.values():

            with np.errstate(over='ignore', under='ignore'):
                v1 = ufunc_skipna(array)
                # this should return a single value
                self.assertFalse(isinstance(v1, np.ndarray))

                if has_na:
                    v2 = ufunc(array)
                    self.assertFalse(isinstance(v2, np.ndarray))

    @given(get_array_1d2d()) # type: ignore
    def test_ufunc_unique(self, array: np.ndarray) -> None:
        post = util.ufunc_unique(array, axis=0)
        self.assertTrue(len(post) <= array.shape[0])

    @given(get_array_1d(min_size=1), st.integers()) # type: ignore
    def test_roll_1d(self, array: np.ndarray, shift: int) -> None:
        post = util.roll_1d(array, shift)
        self.assertEqual(len(post), len(array))
        self.assertEqualWithNaN(array[-(shift % len(array))], post[0])

    @given(get_array_2d(min_rows=1, min_columns=1), st.integers()) # type: ignore
    def test_roll_2d(self, array: np.ndarray, shift: int) -> None:
        for axis in (0, 1):
            post = util.roll_2d(array, shift=shift, axis=axis)
            self.assertEqual(post.shape, array.shape)

            start = -(shift % array.shape[axis])

            if axis == 0:
                a = array[start]
                b = post[0]
            else:
                a = array[:, start]
                b = post[:, 0]

            self.assertAlmostEqualValues(a, b)



    @given(get_array_1d(dtype_group=DTGroup.OBJECT)) # type: ignore
    def test_iterable_to_array_a(self, array: np.ndarray) -> None:
        values = array.tolist()
        post, _ = util.iterable_to_array(values)
        self.assertAlmostEqualValues(post, values)

        # explicitly giving object dtype
        post, _ = util.iterable_to_array(values, dtype=util.DTYPE_OBJECT)
        self.assertAlmostEqualValues(post, values)


    @given(get_labels()) # type: ignore
    def test_iterable_to_array_b(self, labels: tp.Iterable[tp.Any]) -> None:
        post, _ = util.iterable_to_array(labels)
        self.assertAlmostEqualValues(post, labels)
        self.assertTrue(isinstance(post, np.ndarray))


    @given(st.slices(10)) # type: ignore
    def test_slice_to_ascending_slice(self, key: slice) -> None:

        post_key = util.slice_to_ascending_slice(key, size=10)
        self.assertEqual(
            set(range(*key.indices(10))),
            set(range(*post_key.indices(10)))
            )

# to_datetime64
# to_timedelta64
# key_to_datetime_key

    @given(get_array_1d2d()) # type: ignore
    def test_array_to_groups_and_locations(self, array: np.ndarray) -> None:

        groups, locations = util.array_to_groups_and_locations(array, 0)

        if len(array) > 0:
            self.assertTrue(len(groups) >= 1)

        # always 1dm locations
        self.assertTrue(locations.ndim == 1)
        self.assertTrue(len(np.unique(locations)) == len(groups))


    @given(get_array_1d2d()) # type: ignore
    def test_isna_array(self, array: np.ndarray) -> None:

        post = util.isna_array(array)
        self.assertTrue(post.dtype == bool)

        values = np.ravel(array)
        count_na = sum(util.isna_element(x) for x in values)

        self.assertTrue(np.ravel(post).sum() == count_na)


    @given(get_array_1d(dtype_group=DTGroup.BOOL)) # type: ignore
    def test_binary_transition(self, array: np.ndarray) -> None:
        post = util.binary_transition(array)

        # could be 32 via result of np.nonzero
        self.assertTrue(post.dtype in (np.int32, np.int64))

        # if no True in original array, result will be empty
        if array.sum() == 0:
            self.assertTrue(len(post) == 0)
        # if all True, result is empty
        elif array.sum() == len(array):
            self.assertTrue(len(post) == 0)
        else:
            # the post selection shold always be indices that are false
            self.assertTrue(array[post].sum() == 0)