Ejemplo n.º 1
0
class TestUnit(TestCase):
    @given(sfst.get_frame_or_frame_go())
    def test_basic_attributes(self, f1: Frame) -> None:

        self.assertEqual(len(f1.dtypes), f1.shape[1])
        # self.assertEqual(f1.shape, f1.shape)
        self.assertEqual(f1.ndim, 2)
        # self.assertEqual(f1.unified, len(f1.mloc) <= 1)

        if f1.shape[0] > 0 and f1.shape[1] > 0:
            self.assertTrue(f1.size > 0)
            self.assertTrue(f1.nbytes > 0)
        else:
            self.assertTrue(f1.size == 0)
            self.assertTrue(f1.nbytes == 0)

    @given(sfst.get_frame_or_frame_go(dtype_group=sfst.DTGroup.NUMERIC))
    def test_unary_operators_numeric(self, f1: Frame) -> None:
        for op in UFUNC_UNARY_OPERATORS:
            if op == '__invert__':  # invalid on non Boolean
                continue
            func = getattr(operator, op)
            values = f1.values
            # must coerce all blocks to same type to compare to what NP does
            a = func(f1.astype(values.dtype)).values
            b = func(values)
            self.assertAlmostEqualArray(a, b)

    @given(sfst.get_frame_or_frame_go(dtype_group=sfst.DTGroup.BOOL))
    def test_unary_operators_boolean(self, f1: Frame) -> None:
        for op in UFUNC_UNARY_OPERATORS:
            if op != '__invert__':  # valid on Boolean
                continue
            func = getattr(operator, op)
            a = func(f1).values
            b = func(f1.values)
            self.assertAlmostEqualArray(a, b)

    @given(sfst.get_frame_or_frame_go(dtype_group=sfst.DTGroup.NUMERIC))
    def test_binary_operators_numeric(self, f1: Frame) -> None:
        for op in UFUNC_BINARY_OPERATORS:
            if op in {
                    '__matmul__',
                    '__pow__',
                    '__lshift__',
                    '__rshift__',
                    '__and__',
                    '__xor__',
                    '__or__',
                    '__mod__',
                    '__floordiv__',
            }:
                continue
            func = getattr(operator, op)
            values = f1.values
            # must coerce all blocks to same type to compare to what NP does
            f2 = f1.astype(values.dtype)
            a = func(f2, f2).values
            b = func(values, values)
            self.assertAlmostEqualArray(a, b)

    @given(sfst.get_frame_or_frame_go(dtype_group=sfst.DTGroup.BOOL))
    def test_binary_operators_boolean(self, f1: Frame) -> None:
        for op in UFUNC_BINARY_OPERATORS:
            if op not in {
                    '__and__',
                    '__xor__',
                    '__or__',
            }:
                continue
            func = getattr(operator, op)
            a = func(f1, f1).values
            values = f1.values
            b = func(values, values)
            self.assertAlmostEqualArray(a, b)

    @given(
        sfst.get_frame_or_frame_go(dtype_group=sfst.DTGroup.NUMERIC,
                                   index_dtype_group=sfst.DTGroup.STRING,
                                   min_rows=3,
                                   max_rows=3,
                                   min_columns=3,
                                   max_columns=3))
    def test_binary_operators_matmul(
        self,
        f1: Frame,
    ) -> None:

        f2 = f1.relabel(columns=f1.index)
        f3 = f2 @ f1
        self.assertAlmostEqualArray(f3.values, f2.values @ f1.values)

    # from hypothesis import reproduce_failure
    # NOTE: was able to improve many of these, but continued to get compliated type cases, and complications
    @given(
        sfst.get_frame_or_frame_go(dtype_group=sfst.DTGroup.NUMERIC_REAL,
                                   min_rows=1,
                                   min_columns=1))
    def test_ufunc_axis(self, f1: Frame) -> None:

        for attr, attrs in UFUNC_AXIS_SKIPNA.items():

            if attr in ('std', 'var'):
                continue

            for axis in (0, 1):
                values = f1.values
                # must coerce all blocks to same type to compare to what NP does
                # f2 = f1.astype(values.dtype)

                a = getattr(f1, attr)(axis=axis).values  # call the method
                b = attrs.ufunc_skipna(values, axis=axis)

    #             if a.dtype != b.dtype:
    #                 continue
    #             try:
    #                 self.assertAlmostEqualArray(a, b)
    #             except:
    #                 import ipdb; ipdb.set_trace()
    #                 raise

    @given(sfst.get_frame())
    def test_frame_isin(self, f1: Frame) -> None:
        value = f1.iloc[0, 0]
        if (not isna_element(value) and not isinstance(value, np.datetime64)
                and not isinstance(value, np.timedelta64)):
            self.assertTrue(f1.isin((value, )).iloc[0, 0])

    # # TODO: intger tests with pow, mod

    #---------------------------------------------------------------------------

    @given(sfst.get_frame_go(), sfst.get_label())
    def test_frame_go_setitem(self, f1: Frame, label: tp.Hashable) -> None:

        shape = f1.shape
        f1['foo'] = label  # type: ignore
        self.assertEqual(shape[1] + 1, f1.shape[1])

    @given(sfst.get_arrays_2d_aligned_rows(min_size=2, max_size=2))
    def test_frame_go_extend(self, arrays: tp.Sequence[np.ndarray]) -> None:
        f1 = FrameGO(arrays[0], columns=self.get_letters(arrays[0].shape[1]))
        shape = f1.shape
        f2 = Frame(arrays[1])
        f1.extend(f2)
        self.assertEqual(f1.shape[1], shape[1] + f2.shape[1])

    @given(sfst.get_arrays_2d_aligned_rows(min_size=3))
    def test_frame_go_extend_items(self,
                                   arrays: tp.Sequence[np.ndarray]) -> None:
        frame_array = arrays[0]
        # just take first columm form 2d arrays
        series_arrays = [a[:, 0] for a in arrays[1:]]

        f1 = FrameGO(frame_array)
        shape = f1.shape

        letters = self.get_letters(len(series_arrays))

        def items() -> tp.Iterator[tp.Tuple[tp.Hashable, Series]]:
            for idx, label in enumerate(letters):
                s = Series(series_arrays[idx], index=f1.index)
                yield label, s

        f1.extend_items(items())

        self.assertEqual(f1.shape[1], shape[1] + len(series_arrays))

    #---------------------------------------------------------------------------
    # exporters

    @given(sfst.get_frame_or_frame_go())
    def test_frame_to_pairs(self, f1: Frame) -> None:
        for i in range(0, 1):
            post = f1.to_pairs(i)
            if i == 1:
                self.assertEqual(len(post), f1.shape[1])  # type: ignore
            else:
                self.assertEqual(len(post[0][1]), f1.shape[0])  # type: ignore
            self.assertTrue(isinstance(post, tuple))

    @given(
        sfst.get_frame_or_frame_go(
            dtype_group=sfst.DTGroup.BASIC,
            index_dtype_group=sfst.DTGroup.BASIC,
        ))
    def test_frame_to_pandas(self, f1: Frame) -> None:
        post = f1.to_pandas()
        self.assertTrue(post.shape == f1.shape)
        if not f1.isna().any().any():
            self.assertTrue((post.values == f1.values).all())

    @given(
        sfst.get_frame_or_frame_go(
            dtype_group=sfst.DTGroup.BASIC,
            index_dtype_group=sfst.DTGroup.BASIC,
        ))
    def test_frame_to_parquet(self, f1: Frame) -> None:
        import pyarrow
        with temp_file('.parquet') as fp:
            try:
                f1.to_parquet(fp)
                self.assertTrue(os.stat(fp).st_size > 0)
            except pyarrow.lib.ArrowNotImplementedError:
                # could be Byte-swapped arrays not supported
                pass

    @given(
        sfst.get_frame_or_frame_go(
            dtype_group=sfst.DTGroup.CORE,
            index_dtype_group=sfst.DTGroup.CORE,
        ))
    def test_frame_to_msgpack(self, f1: Frame) -> None:
        msg = f1.to_msgpack()

        f2 = Frame.from_msgpack(msg)
        assert f1.equals(f2,
                         compare_name=True,
                         compare_dtype=True,
                         compare_class=True)

        f2 = Frame.from_msgpack(f1.to_msgpack())
        assert f1.equals(f2,
                         compare_name=True,
                         compare_dtype=True,
                         compare_class=True)

    @given(
        sfst.get_frame_or_frame_go(
            dtype_group=sfst.DTGroup.BASIC,
            index_dtype_group=sfst.DTGroup.BASIC,
        ))
    def test_frame_to_xarray(self, f1: Frame) -> None:
        xa = f1.to_xarray()
        self.assertTrue(tuple(xa.keys()) == tuple(f1.columns))

    @given(
        sfst.get_frame(
            dtype_group=sfst.DTGroup.BASIC,
            index_dtype_group=sfst.DTGroup.BASIC,
        ))
    def test_frame_to_frame_go(self, f1: Frame) -> None:
        f2 = f1.to_frame_go()
        f2['__new__'] = 10
        self.assertTrue(len(f2.columns) == len(f1.columns) + 1)

    @skip_win  # type: ignore # get UnicodeEncodeError: 'charmap' codec can't encode character '\u0162' in position 0: character maps to <undefined>
    @given(sfst.get_frame_or_frame_go(dtype_group=sfst.DTGroup.BASIC, ))
    def test_frame_to_csv(self, f1: Frame) -> None:
        with temp_file('.csv') as fp:
            f1.to_csv(fp)
            self.assertTrue(os.stat(fp).st_size > 0)

            # not yet validating result, as edge cases with unusual unicode and non-unique indices are a problem
            # f2 = Frame.from_csv(fp,
            #         index_depth=f1.index.depth,
            #         columns_depth=f1.columns.depth)

    @skip_win  # type: ignore # UnicodeEncodeError
    @given(sfst.get_frame_or_frame_go(dtype_group=sfst.DTGroup.BASIC, ))
    def test_frame_to_tsv(self, f1: Frame) -> None:
        with temp_file('.txt') as fp:
            f1.to_tsv(fp)
            self.assertTrue(os.stat(fp).st_size > 0)

    @given(sfst.get_frame_or_frame_go(dtype_group=sfst.DTGroup.BASIC, ))
    def test_frame_to_xlsx(self, f1: Frame) -> None:
        with temp_file('.xlsx') as fp:
            f1.to_xlsx(fp)
            self.assertTrue(os.stat(fp).st_size > 0)

    @given(sfst.get_frame_or_frame_go(dtype_group=sfst.DTGroup.BASIC, ))
    def test_frame_to_sqlite(self, f1: Frame) -> None:
        with temp_file('.sqlite') as fp:

            try:
                f1.to_sqlite(fp)
                self.assertTrue(os.stat(fp).st_size > 0)
            except (sqlite3.IntegrityError, sqlite3.OperationalError,
                    OverflowError):
                # some indices, after translation, are not unique
                # SQLite is no case sensitive, and does not support unicide
                # OverflowError: Python int too large to convert to SQLite INTEGER
                pass

    @given(
        sfst.get_frame_or_frame_go(dtype_group=sfst.DTGroup.BASIC,
                                   columns_dtype_group=sfst.DTGroup.STRING,
                                   index_dtype_group=sfst.DTGroup.STRING))
    def test_frame_to_hdf5(self, f1: Frame) -> None:
        f1 = f1.rename('f1')
        with temp_file('.hdf5') as fp:

            try:
                f1.to_hdf5(fp)
                self.assertTrue(os.stat(fp).st_size > 0)
            except ValueError:
                # will happen for empty strings and unicde that cannot be handled by HDF5
                pass

    @given(sfst.get_frame_or_frame_go())
    def test_frame_to_html(self, f1: Frame) -> None:
        post = f1.to_html()
        self.assertTrue(len(post) > 0)

    @skip_win  # type: ignore # UnicodeEncodeError
    @given(sfst.get_frame_or_frame_go())
    def test_frame_to_html_datatables(self, f1: Frame) -> None:
        post = f1.to_html_datatables(show=False)
        self.assertTrue(len(post) > 0)

    @given(sfst.get_frame_or_frame_go())
    def test_frame_to_rst(self, f1: Frame) -> None:
        post = f1.to_rst()
        self.assertTrue(len(post) > 0)

    @given(sfst.get_frame_or_frame_go())
    def test_frame_to_markdown(self, f1: Frame) -> None:
        post = f1.to_markdown()
        self.assertTrue(len(post) > 0)

    @given(sfst.get_frame_or_frame_go())
    def test_frame_to_latex(self, f1: Frame) -> None:
        post = f1.to_latex()
        self.assertTrue(len(post) > 0)

    @given(sfst.get_frame_or_frame_go())
    def test_frame_blocks_dont_have_reference_cycles(self, f1: Frame) -> None:
        self.assertEqual([f1], gc.get_referrers(f1._blocks))
Ejemplo n.º 2
0
class TestUnit(TestCase):


    @given(sfst.get_labels())  # type: ignore
    def test_get_labels(self, values: tp.Iterable[tp.Hashable]) -> None:
        for value in values:
            self.assertTrue(isinstance(hash(value), int))

    @given(sfst.get_dtypes())  # type: ignore
    def test_get_dtypes(self, dtypes: tp.Iterable[np.dtype]) -> None:
        for dt in dtypes:
            self.assertTrue(isinstance(dt, np.dtype))

    @given(sfst.get_spacing(10))  # type: ignore
    def test_get_spacing_10(self, spacing: tp.Iterable[int]) -> None:
        self.assertEqual(sum(spacing), 10)

    @hypo_settings(max_examples=10)  # type: ignore
    @given(sfst.get_shape_1d2d())  # type: ignore
    def test_get_shape_1d2d(self, shape: tp.Tuple[int, ...]) -> None:
        self.assertTrue(isinstance(shape, tuple))
        self.assertTrue(len(shape) in (1, 2))

    @hypo_settings(max_examples=10)  # type: ignore
    @given(sfst.get_array_1d2d())  # type: ignore
    def test_get_array_1d2d(self, array: np.ndarray) -> None:
        self.assertTrue(isinstance(array, np.ndarray))
        self.assertTrue(array.ndim in (1, 2))

    @hypo_settings(max_examples=10)  # type: ignore
    @given(sfst.get_arrays_2d_aligned_columns(min_size=2))  # type: ignore
    def test_get_arrays_2s_aligned_columns(self, arrays: tp.Iterable[np.ndarray]) -> None:
        array_iter = iter(arrays)
        a1 = next(array_iter)
        match = a1.shape[1]
        for array in array_iter:
            self.assertEqual(array.shape[1], match)

    @given(sfst.get_arrays_2d_aligned_rows(min_size=2))  # type: ignore
    def test_get_arrays_2s_aligned_rows(self, arrays: tp.Iterable[np.ndarray]) -> None:
        array_iter = iter(arrays)
        a1 = next(array_iter)
        match = a1.shape[0]
        for array in array_iter:
            self.assertEqual(array.shape[0], match)

    @hypo_settings(max_examples=10)  # type: ignore
    @given(sfst.get_blocks())  # type: ignore
    def test_get_blocks(self, blocks: tp.Tuple[np.ndarray]) -> None:
        self.assertTrue(isinstance(blocks, tuple))
        for b in blocks:
            self.assertTrue(isinstance(b, np.ndarray))
            self.assertTrue(b.ndim in (1, 2))

    @hypo_settings(max_examples=10)  # type: ignore
    @given(sfst.get_type_blocks())  # type: ignore
    def test_get_type_blocks(self, tb: TypeBlocks) -> None:
        self.assertTrue(isinstance(tb, TypeBlocks))
        rows, cols = tb.shape
        col_count = 0
        for b in tb._blocks:
            if b.ndim == 1:
                self.assertEqual(len(b), rows)
                col_count += 1
            else:
                self.assertEqual(b.ndim, 2)
                self.assertEqual(b.shape[0], rows)
                col_count += b.shape[1]

        self.assertEqual(col_count, cols)

    @hypo_settings(max_examples=10) # type: ignore
    @given(sfst.get_index()) # type: ignore
    def test_get_index(self, idx: Index) -> None:
        self.assertTrue(isinstance(idx, Index))
        self.assertEqual(len(idx), len(idx.values))

    @hypo_settings(max_examples=10)  # type: ignore
    @given(sfst.get_index_hierarchy()) # type: ignore
    def test_get_index_hierarchy(self, idx: IndexHierarchy) -> None:
        self.assertTrue(isinstance(idx, IndexHierarchy))
        self.assertTrue(idx.depth > 1)
        self.assertEqual(len(idx), len(idx.values))

    @hypo_settings(max_examples=10)  # type: ignore
    @given(sfst.get_series())  # type: ignore
    def test_get_series(self, series: Series) -> None:
        self.assertTrue(isinstance(series, Series))
        self.assertEqual(len(series), len(series.values))

    @hypo_settings(max_examples=10)  # type: ignore
    @given(sfst.get_frame())  # type: ignore
    def test_get_frame(self, frame: Frame) -> None:
        self.assertTrue(isinstance(frame, Frame))
        self.assertEqual(frame.shape, frame.values.shape)

    @hypo_settings(max_examples=10)  # type: ignore
    @given(sfst.get_frame(index_cls=IndexHierarchy, columns_cls=IndexHierarchy))  # type: ignore
    def test_get_frame_hierarchy(self, frame: Frame) -> None:
        self.assertTrue(isinstance(frame, Frame))
        self.assertTrue(frame.index.depth > 1)
        self.assertTrue(frame.columns.depth > 1)
        self.assertEqual(frame.shape, frame.values.shape)
Ejemplo n.º 3
0
class TestUnit(TestCase):
    @given(
        sfst.get_frame(
            dtype_group=sfst.DTGroup.ALL_NO_OBJECT,
            index_dtype_group=sfst.DTGroup.BASIC,
            columns_dtype_group=sfst.DTGroup.BASIC,
        ))
    def test_frame_to_npz_a(self, f1: Frame) -> None:
        # if f1.columns.dtype.kind != 'O' and f1.index.dtype.kind != 'O':
        with temp_file('.npz') as fp:
            f1.to_npz(fp)
            f2 = Frame.from_npz(fp)
            self.assertTrue(
                f1.equals(f2,
                          compare_name=True,
                          compare_dtype=True,
                          compare_class=True))

    @given(
        sfst.get_frame(
            dtype_group=sfst.DTGroup.ALL_NO_OBJECT,
            index_cls=IndexDate,
            index_dtype_group=sfst.DTGroup.DATE,
            columns_cls=IndexDate,
            columns_dtype_group=sfst.DTGroup.DATE,
        ))
    def test_frame_to_npz_b(self, f1: Frame) -> None:
        # if f1.columns.dtype.kind != 'O' and f1.index.dtype.kind != 'O':
        with temp_file('.npz') as fp:
            f1.to_npz(fp)
            f2 = Frame.from_npz(fp)
            self.assertTrue(
                f1.equals(f2,
                          compare_name=True,
                          compare_dtype=True,
                          compare_class=True))

    @given(sfst.get_array_1d2d(dtype_group=sfst.DTGroup.ALL_NO_OBJECT))
    def test_frame_to_npy_a(self, a1: Frame) -> None:

        header_decode_cache: HeaderDecodeCacheType = {}

        with temp_file('.npy') as fp:
            with open(fp, 'wb') as f:
                NPYConverter.to_npy(f, a1)

            # check compatibility with built-in NPY reading
            a2 = np.load(fp)
            if a2.dtype.kind in DTYPE_INEXACT_KINDS:
                self.assertAlmostEqualArray(a1, a2)
            else:
                self.assertTrue((a1 == a2).all())
            self.assertTrue(a1.shape == a2.shape)

            with open(fp, 'rb') as f:
                a3, _ = NPYConverter.from_npy(f, header_decode_cache)
                if a3.dtype.kind in DTYPE_INEXACT_KINDS:
                    self.assertAlmostEqualArray(a1, a3)
                else:
                    self.assertTrue((a1 == a3).all())
                self.assertTrue(a1.shape == a3.shape)

    @given(sfst.get_array_1d2d(dtype_group=sfst.DTGroup.ALL_NO_OBJECT))
    def test_frame_to_npy_b(self, a1: Frame) -> None:

        header_decode_cache: HeaderDecodeCacheType = {}

        with temp_file('.npy') as fp:
            with open(fp, 'wb') as f:
                NPYConverter.to_npy(f, a1)

            with open(fp, 'rb') as f:
                a2, mm = NPYConverter.from_npy(
                    f,
                    header_decode_cache,
                    memory_map=True,
                )
                if a2.dtype.kind in DTYPE_INEXACT_KINDS:
                    self.assertAlmostEqualArray(a1, a2)
                else:
                    self.assertTrue((a1 == a2).all())
                self.assertTrue(a1.shape == a2.shape)