Example #1
0
    def test_store_config_map_init_b(self) -> None:
        maps = {'a': StoreConfig(index_depth=2, label_encoder=str),
                'b': StoreConfig(index_depth=3, label_encoder=str)}
        default = StoreConfig(label_encoder=str)

        sc1m = StoreConfigMap(maps, default=default)
        self.assertEqual(sc1m.default.label_encoder, str)
    def test_store_xlsx_write_b(self) -> None:

        f1 = Frame.from_records(
                ((None, np.nan, 50, 'a'), (None, -np.inf, -50, 'b'), (None, 60.4, -50, 'c')),
                index=('p', 'q', 'r'),
                columns=IndexHierarchy.from_product(('I', 'II'), ('a', 'b')),
                )

        config_map = StoreConfigMap.from_config(
                StoreConfig(include_index=True, include_columns=True))

        with temp_file('.xlsx') as fp:

            st = StoreXLSX(fp)
            st.write(((STORE_LABEL_DEFAULT, f1),), config=config_map)

            c = StoreConfig(
                    index_depth=f1.index.depth,
                    columns_depth=f1.columns.depth
                    )
            f2 = st.read(STORE_LABEL_DEFAULT, config=c)

            # just a sample column for now
            self.assertEqual(
                    f1[HLoc[('II', 'a')]].values.tolist(),
                    f2[HLoc[('II', 'a')]].values.tolist() )

            self.assertEqualFrames(f1, f2)
Example #3
0
    def test_store_xlsx_read_d(self) -> None:

        f1 = Frame.from_records(((10, 20, 50, 60), (50.0, 60.4, -50, -60)),
                                index=('p', 'q'),
                                columns=('a', 'b', 'c', 'd'),
                                name='f1')

        sc1 = StoreConfig(include_index=False, include_columns=True)
        sc2 = StoreConfig(columns_depth=0, index_depth=0)

        with temp_file('.xlsx') as fp:

            st = StoreXLSX(fp)
            st.write(((STORE_LABEL_DEFAULT, f1), ), config=sc1)

            f2 = st.read(STORE_LABEL_DEFAULT)  #  get default config
            self.assertEqual(f2.to_pairs(0),
                             (('a', ((0, 10), (1, 50))), ('b', ((0, 20.0),
                                                                (1, 60.4))),
                              ('c', ((0, 50), (1, -50))), ('d', ((0, 60),
                                                                 (1, -60)))))

            f3 = st.read(STORE_LABEL_DEFAULT, config=sc2)
            self.assertEqual(f3.to_pairs(0),
                             ((0, ((0, 'a'), (1, 10), (2, 50))),
                              (1, ((0, 'b'), (1, 20), (2, 60.4))),
                              (2, ((0, 'c'), (1, 50), (2, -50))),
                              (3, ((0, 'd'), (1, 60), (2, -60)))))
Example #4
0
    def test_store_config_map_get_default_a(self) -> None:
        maps = {
            'a': StoreConfig(index_depth=2),
            'b': StoreConfig(index_depth=3)
        }

        sc1m = StoreConfigMap.from_initializer(maps)
        self.assertTrue(sc1m.default == StoreConfigMap._DEFAULT)
Example #5
0
    def test_store_config_map_init_a(self) -> None:
        maps = {
            'a': StoreConfig(index_depth=2),
            'b': StoreConfig(index_depth=3, label_encoder=str)
        }

        with self.assertRaises(ErrorInitStoreConfig):
            sc1m = StoreConfigMap.from_initializer(maps)
Example #6
0
    def test_store_config_map_b(self) -> None:

        maps = {'a': StoreConfig(index_depth=2),
                'b': StoreConfig(index_depth=3)}
        sc1m = StoreConfigMap(maps)
        self.assertEqual(sc1m['a'].index_depth, 2)
        self.assertEqual(sc1m['b'].index_depth, 3)
        self.assertEqual(sc1m['c'].index_depth, 0)
    def test_store_sqlite_read_many_a(self) -> None:

        f1 = Frame.from_dict(dict(x=(1, 2, -5, 200), y=(3, 4, -5, -3000)),
                             index=IndexHierarchy.from_product(('I', 'II'),
                                                               ('a', 'b')),
                             name='f1')
        f2 = Frame.from_dict(dict(a=(1, 2, 3), b=(4, 5, 6)),
                             index=('x', 'y', 'z'),
                             name='f2')
        f3 = Frame.from_records(((10, 20, 50, 60), (50.0, 60.4, -50, -60)),
                                index=('p', 'q'),
                                columns=IndexHierarchy.from_product(
                                    ('I', 'II'), ('a', 'b')),
                                name='f3')
        f4 = Frame.from_records(
            (
                (10, 20, 50, False, 10, 20, 50, False),
                (50.0, 60.4, -50, True, 50.0, 60.4, -50, True),
                (234, 44452, 0, False, 234, 44452, 0, False),
                (4, -4, 2000, True, 4, -4, 2000, True),
                (10, 20, 50, False, 10, 20, 50, False),
                (50.0, 60.4, -50, True, 50.0, 60.4, -50, True),
                (234, 44452, 0, False, 234, 44452, 0, False),
                (4, -4, 2000, True, 4, -4, 2000, True),
            ),
            index=IndexHierarchy.from_product(
                ('top', 'bottom'), ('far', 'near'), ('left', 'right')),
            columns=IndexHierarchy.from_product(('I', 'II'), ('a', 'b'),
                                                (1, 2)),
            name='f4')

        frames = (f1, f2, f3, f4)
        config_map_write = StoreConfigMap.from_config(
            StoreConfig(include_index=True, include_columns=True))

        with temp_file('.sqlite') as fp:

            st1 = StoreSQLite(fp)
            st1.write(((f.name, f) for f in frames), config=config_map_write)

            labels = tuple(
                st1.labels())  # this will read from file, not in memory
            self.assertEqual(tuple(f.name for f in frames), labels)

            config_map_read: tp.Dict[tp.Hashable, StoreConfig] = {}
            for i, name in enumerate(labels):
                f_src = frames[i]
                c = StoreConfig(index_depth=f_src.index.depth,
                                columns_depth=f_src.columns.depth)
                config_map_read[name] = c

            for i, f_loaded in enumerate(
                    st1.read_many(labels, config=config_map_read)):
                f_src = frames[i]
                self.assertEqualFrames(f_src, f_loaded, compare_dtype=False)
Example #8
0
    def test_store_config_map_a(self) -> None:

        sc1 = StoreConfig(index_depth=3, columns_depth=3)
        sc1m = StoreConfigMap.from_config(sc1)
        self.assertEqual(sc1m['a'].index_depth, 3)
        self.assertEqual(sc1m['b'].index_depth, 3)

        sc2 = StoreConfig(include_index=False)
        sc2m = StoreConfigMap.from_config(sc2)
        self.assertEqual(sc2m['a'].include_index, False)
        self.assertEqual(sc2m['b'].include_index, False)
    def test_store_xlsx_write_a(self) -> None:

        f1 = Frame.from_dict(dict(x=(1, 2, -5, 200), y=(3, 4, -5, -3000)),
                             index=IndexHierarchy.from_product(('I', 'II'),
                                                               ('a', 'b')),
                             name='f1')
        f2 = Frame.from_dict(dict(a=(1, 2, 3), b=(4, 5, 6)),
                             index=('x', 'y', 'z'),
                             name='f2')
        f3 = Frame.from_records(((10, 20, 50, 60), (50.0, 60.4, -50, -60)),
                                index=('p', 'q'),
                                columns=IndexHierarchy.from_product(
                                    ('I', 'II'), ('a', 'b')),
                                name='f3')
        f4 = Frame.from_records(
            (
                (10, 20, 50, False, 10, 20, 50, False),
                (50.0, 60.4, -50, True, 50.0, 60.4, -50, True),
                (234, 44452, 0, False, 234, 44452, 0, False),
                (4, -4, 2000, True, 4, -4, 2000, True),
                (10, 20, 50, False, 10, 20, 50, False),
                (50.0, 60.4, -50, True, 50.0, 60.4, -50, True),
                (234, 44452, 0, False, 234, 44452, 0, False),
                (4, -4, 2000, True, 4, -4, 2000, True),
            ),
            index=IndexHierarchy.from_product(
                ('top', 'bottom'), ('far', 'near'), ('left', 'right')),
            columns=IndexHierarchy.from_product(('I', 'II'), ('a', 'b'),
                                                (1, 2)),
            name='f4')

        frames = (f1, f2, f3, f4)
        config_map = StoreConfigMap.from_config(
            StoreConfig(include_index=True, include_columns=True))

        with temp_file('.xlsx') as fp:

            st1 = StoreXLSX(fp)
            st1.write(((f.name, f) for f in frames), config=config_map)

            # import ipdb; ipdb.set_trace()
            sheet_names = tuple(
                st1.labels())  # this will read from file, not in memory
            self.assertEqual(tuple(f.name for f in frames), sheet_names)

            for i, name in enumerate(sheet_names):
                f_src = frames[i]
                c = StoreConfig(index_depth=f_src.index.depth,
                                columns_depth=f_src.columns.depth)
                f_loaded = st1.read(name, config=c)
                self.assertEqualFrames(f_src, f_loaded, check_dtypes=False)
Example #10
0
    def test_store_config_he_a(self) -> None:
        he_kwargs = dict(
            index_depth=1,
            columns_depth=1,
            consolidate_blocks=True,
            skip_header=1,
            skip_footer=1,
            trim_nadir=True,
            include_index=True,
            include_index_name=True,
            include_columns=True,
            include_columns_name=True,
            merge_hierarchical_labels=True,
            read_max_workers=1,
            read_chunksize=1,
            write_max_workers=1,
            write_chunksize=1,
        )

        kwargs = dict(
            **he_kwargs,
            label_encoder=lambda x: x,
            label_decoder=lambda x: x,
        )

        for (depth_levels, columns_select, dtypes) in product(
            (None, 1, [1, 2], (1, 2)),
            (None, ['a'], ('a', )),
            (None, 'int', int, np.int64, [int], (int, ), {
                'a': int
            }),
        ):
            config = StoreConfig(
                **kwargs,  # type: ignore [arg-type]
                index_name_depth_level=depth_levels,
                columns_name_depth_level=depth_levels,
                columns_select=columns_select,
                dtypes=dtypes,
            )

            config_he = StoreConfigHE(
                **he_kwargs,  # type: ignore [arg-type]
                index_name_depth_level=depth_levels,
                columns_name_depth_level=depth_levels,
                columns_select=columns_select,
                dtypes=dtypes,
            )
            self.assertNotEqual(config_he, config)
            self.assertEqual(config_he, config.to_store_config_he())
            self.assertTrue(isinstance(hash(config_he), int))
Example #11
0
    def test_bus_to_xlsx_f(self) -> None:
        f = Frame.from_records([
                [np.datetime64('1983-02-20 05:34:18.763'), np.datetime64('2020-08-01')],
                [np.datetime64('1975-03-20 05:20:18.001'), np.datetime64('2020-07-31')]
                ],
                columns=(date(2020, 7, 31), date(2020, 8, 1)),
                index=(datetime(2020, 7, 31, 14, 20, 8), datetime(2017, 4, 28, 2, 30, 2)),
                name='frame')
        b1 = Bus.from_frames([f])

        with temp_file('.xlsx') as fp:
            b1.to_xlsx(fp)

            config = StoreConfig(include_index=True, index_depth=1)
            b2 = Bus.from_xlsx(fp, config=config)
            tuple(b2.items()) # force loading all

        self.assertEqual(b2['frame'].index.values.tolist(),
                [datetime(2020, 7, 31, 14, 20, 8),
                datetime(2017, 4, 28, 2, 30, 2)])

        self.assertEqual(b2['frame'].index.values.tolist(),
                [datetime(2020, 7, 31, 14, 20, 8),
                datetime(2017, 4, 28, 2, 30, 2)])

        self.assertEqual(b2['frame'].values.tolist(),
                [[datetime(1983, 2, 20, 5, 34, 18, 763000), datetime(2020, 8, 1, 0, 0)], [datetime(1975, 3, 20, 5, 20, 18, 1000), datetime(2020, 7, 31, 0, 0)]]
)
Example #12
0
    def test_bus_init_a(self) -> None:

        f1 = Frame.from_dict(dict(a=(1, 2), b=(3, 4)),
                             index=('x', 'y'),
                             name='foo')
        f2 = Frame.from_dict(dict(a=(1, 2, 3), b=(4, 5, 6)),
                             index=('x', 'y', 'z'),
                             name='bar')

        config = StoreConfigMap.from_config(StoreConfig(index_depth=1))
        b1 = Bus.from_frames((f1, f2), config=config)

        self.assertEqual(b1.keys().values.tolist(), ['foo', 'bar'])

        with temp_file('.zip') as fp:
            b1.to_zip_tsv(fp)
            b2 = Bus.from_zip_tsv(fp)

            f3 = b2['bar']
            f4 = b2['foo']
            # import ipdb; ipdb.set_trace()
            zs = StoreZipTSV(fp)
            zs.write(b1.items())

            # how to show that this derived getitem has derived type?
            f3 = zs.read('foo', config=config['foo'])
            self.assertEqual(f3.to_pairs(0), (('a', (('x', 1), ('y', 2))),
                                              ('b', (('x', 3), ('y', 4)))))
Example #13
0
    def test_bus_to_xlsx_a(self) -> None:
        f1 = Frame.from_dict(dict(a=(1, 2), b=(3, 4)),
                             index=('x', 'y'),
                             name='f1')
        f2 = Frame.from_dict(dict(c=(1, 2, 3), b=(4, 5, 6)),
                             index=('x', 'y', 'z'),
                             name='f2')
        f3 = Frame.from_dict(dict(d=(10, 20), b=(50, 60)),
                             index=('p', 'q'),
                             name='f3')

        config = StoreConfigMap.from_config(
            StoreConfig(index_depth=1,
                        columns_depth=1,
                        include_columns=True,
                        include_index=True))
        b1 = Bus.from_frames((f1, f2, f3), config=config)

        with temp_file('.xlsx') as fp:
            b1.to_xlsx(fp)

            b2 = Bus.from_xlsx(fp, config=config)
            tuple(b2.items())  # force loading all

        for frame in (f1, f2, f3):
            self.assertEqualFrames(frame, b2[frame.name])
    def test_store_zip_tsv_a(self) -> None:

        f1, f2, f3 = get_test_framesA()

        with temp_file('.zip') as fp:

            st = StoreZipTSV(fp)
            st.write((f.name, f) for f in (f1, f2, f3))

            labels = tuple(st.labels(strip_ext=False))
            self.assertEqual(labels, ('foo.txt', 'bar.txt', 'baz.txt'))

            for label, frame in ((f.name, f) for f in (f1, f2, f3)):
                for read_max_workers in (None, 1, 2):
                    config = StoreConfig(index_depth=1,
                                         read_max_workers=read_max_workers)
                    frame_stored = st.read(label, config=config)
                    self.assertEqual(frame_stored.shape, frame.shape)
                    self.assertTrue((frame_stored == frame).all().all())
                    self.assertEqual(frame.to_pairs(0),
                                     frame_stored.to_pairs(0))

                    frame_stored_2 = st.read(label,
                                             config=config,
                                             container_type=FrameGO)
                    self.assertEqual(frame_stored_2.__class__, FrameGO)
                    self.assertEqual(frame_stored_2.shape, frame.shape)
    def test_store_read_many_single_thread_weak_cache(self) -> None:

        f1, f2, f3 = get_test_framesA()

        with temp_file('.zip') as fp:

            st = StoreZipTSV(fp)
            st.write((f.name, f) for f in (f1, f2, f3))

            kwargs = dict(config_map=StoreConfigMap.from_initializer(
                StoreConfig(index_depth=1)),
                          constructor=st._container_type_to_constructor(Frame),
                          container_type=Frame)

            labels = tuple(st.labels(strip_ext=False))
            self.assertEqual(labels, ('foo.txt', 'bar.txt', 'baz.txt'))

            self.assertEqual(0, len(list(st._weak_cache)))

            # Result is not held onto!
            next(st._read_many_single_thread(('foo', ), **kwargs))

            self.assertEqual(0, len(list(st._weak_cache)))

            # Result IS held onto!
            frame = next(st._read_many_single_thread(('foo', ), **kwargs))

            self.assertEqual(1, len(list(st._weak_cache)))

            # Reference in our weak_cache _is_ `frame`
            self.assertIs(frame, st._weak_cache['foo'])
            del frame

            # Reference is gone now!
            self.assertEqual(0, len(list(st._weak_cache)))
    def test_store_zip_parquet_c(self) -> None:

        f1, f2 = get_test_framesB()

        config = StoreConfig(
            index_depth=1,
            include_index=True,
            index_constructors=IndexDate,
            columns_depth=1,
            include_columns=True,
        )

        with temp_file('.zip') as fp:
            st = StoreZipParquet(fp)
            st.write(((f.name, f) for f in (f1, f2)), config=config)

            post = tuple(
                st.read_many(
                    ('a', 'b'),
                    container_type=Frame,
                    config=config,
                ))

            self.assertIs(post[0].index.__class__, IndexDate)
            self.assertIs(post[1].index.__class__, IndexDate)
    def test_store_zip_parquet_a(self) -> None:

        f1, f2, f3 = get_test_framesA()

        with temp_file('.zip') as fp:
            for read_max_workers in (1, 2):
                config = StoreConfig(index_depth=1,
                                     include_index=True,
                                     columns_depth=1,
                                     read_max_workers=read_max_workers)

                st = StoreZipParquet(fp)
                st.write((f.name, f) for f in (f1, f2, f3))

                f1_post = st.read('foo', config=config)
                self.assertTrue(
                    f1.equals(f1_post, compare_name=True, compare_class=True))

                f2_post = st.read('bar', config=config)
                self.assertTrue(
                    f2.equals(f2_post, compare_name=True, compare_class=True))

                f3_post = st.read('baz', config=config)
                self.assertTrue(
                    f3.equals(f3_post, compare_name=True, compare_class=True))
Example #18
0
    def test_bus_max_persist_3(self) -> None:
        def items() -> tp.Iterator[tp.Tuple[str, Frame]]:
            for i in range(4):
                yield str(i), Frame(np.arange(i, i+10).reshape(2, 5))

        s = Series.from_items(items(), dtype=object)
        b1 = Bus(s)

        config = StoreConfig(
                index_depth=1,
                columns_depth=1,
                include_columns=True,
                include_index=True
                )

        with temp_file('.zip') as fp:
            b1.to_zip_pickle(fp)
            b2 = Bus.from_zip_pickle(fp, config=config, max_persist=4)

            _ = b2.iloc[[0, 1]]
            _ = b2.iloc[[2, 3]]
            self.assertTrue(b2._loaded_all)

            _ = b2.iloc[[1, 0]]
            self.assertEqual(list(b2._last_accessed.keys()),
                    ['2', '3', '1', '0'])

            _ = b2.iloc[3]
            self.assertEqual(list(b2._last_accessed.keys()),
                    ['2', '1', '0', '3'])

            _ = b2.iloc[:3]
            self.assertEqual(list(b2._last_accessed.keys()),
                    ['3', '0', '1', '2'])
Example #19
0
    def test_bus_max_persist_b(self) -> None:
        def items() -> tp.Iterator[tp.Tuple[str, Frame]]:
            for i in range(20):
                yield str(i), Frame(np.arange(i, i+10).reshape(2, 5))

        s = Series.from_items(items(), dtype=object)
        b1 = Bus(s)

        config = StoreConfig(
                index_depth=1,
                columns_depth=1,
                include_columns=True,
                include_index=True
                )

        with temp_file('.zip') as fp:
            b1.to_zip_pickle(fp)

            b2 = Bus.from_zip_pickle(fp, config=config, max_persist=1)
            b3 = b2.iloc[10:]
            self.assertEqual(b3._loaded.sum(), 1)
            # only the last one is loasded
            self.assertEqual(b3._loaded.tolist(),
                    [False, False, False, False, False, False, False, False, False, True]
                    )
            self.assertEqual(b3.iloc[0].sum().sum(), 145)
            self.assertEqual(b3._loaded.tolist(),
                    [True, False, False, False, False, False, False, False, False, False]
                    )
            self.assertEqual(b3.iloc[4].sum().sum(), 185)
            self.assertEqual(b3._loaded.tolist(),
                    [False, False, False, False, True, False, False, False, False, False]
                    )
    def test_store_sqlite_write_d(self) -> None:

        f1 = Frame.from_dict(dict(a=(1, 2, 3), b=(4, 5, 6)),
                             index=('x', 'y', 'z'),
                             name='f2')

        frames = (f1, )

        with temp_file('.sqlite') as fp:

            config = StoreConfig(include_index=False)

            st1 = StoreSQLite(fp)
            st1.write(((f.name, f) for f in frames), config=config)

            f2 = st1.read(f1.name, config=config)

            self.assertEqual(f2.to_pairs(0), (('a', ((0, 1), (1, 2), (2, 3))),
                                              ('b', ((0, 4), (1, 5), (2, 6)))))

            # getting the default config
            f3 = st1.read(f1.name, config=None)

            self.assertEqual(f3.to_pairs(0), (('a', ((0, 1), (1, 2), (2, 3))),
                                              ('b', ((0, 4), (1, 5), (2, 6)))))
Example #21
0
    def test_bus_max_persist_a(self) -> None:
        def items() -> tp.Iterator[tp.Tuple[str, Frame]]:
            for i in range(20):
                yield str(i), Frame(np.arange(i, i+10).reshape(2, 5))

        s = Series.from_items(items(), dtype=object)
        b1 = Bus(s)

        config = StoreConfig(
                index_depth=1,
                columns_depth=1,
                include_columns=True,
                include_index=True
                )

        with temp_file('.zip') as fp:
            b1.to_zip_pickle(fp)

            b2 = Bus.from_zip_pickle(fp, config=config, max_persist=3)
            for i in b2.index:
                _ = b2[i]
                self.assertTrue(b2._loaded.sum() <= 3)

            # after iteration only the last three are loaded
            self.assertEqual(b2._loaded.tolist(),
                    [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, True, True])
Example #22
0
    def test_store_xlsx_read_many_f(self) -> None:
        records = (
            (2, 2, 'a', False, None),
            (30, 73, 'd', True, None),
            (None, None, None, None, None),
            (None, None, None, None, None),
        )
        f1 = Frame.from_records(records, columns=('p', 'q', 'r', 's', 't'))

        with temp_file('.xlsx') as fp:
            f1.to_xlsx(fp,
                       label='f1',
                       include_index=False,
                       include_columns=False)

            st1 = StoreXLSX(fp)
            c = StoreConfig(
                index_depth=3,  # force coverage
                columns_depth=0,
                trim_nadir=True,
            )
            f2 = next(st1.read_many(('f1', ), config=c))
            self.assertEqual(f2.shape, (2, 1))
            self.assertEqual(f2.to_pairs(), ((0, (((2, 2, 'a'), False),
                                                  ((30, 73, 'd'), True))), ))
Example #23
0
    def test_store_xlsx_read_e(self) -> None:

        f1 = Frame.from_records(((np.inf, np.inf), (-np.inf, -np.inf)),
                                index=('p', 'q'),
                                columns=('a', 'b'),
                                name='f1')

        sc1 = StoreConfig(columns_depth=1, index_depth=1)

        with temp_file('.xlsx') as fp:

            st = StoreXLSX(fp)
            st.write(((STORE_LABEL_DEFAULT, f1), ))

            f1 = st.read(STORE_LABEL_DEFAULT, config=sc1, store_filter=None)
            self.assertEqual(f1.to_pairs(0),
                             (('a', (('p', 'inf'), ('q', '-inf'))),
                              ('b', (('p', 'inf'), ('q', '-inf')))))

            f2 = st.read(STORE_LABEL_DEFAULT,
                         config=sc1,
                         store_filter=StoreFilter())
            self.assertEqual(f2.to_pairs(0),
                             (('a', (('p', np.inf), ('q', -np.inf))),
                              ('b', (('p', np.inf), ('q', -np.inf)))))
Example #24
0
    def read(
        self,
        label: tp.Optional[str] = None,
        *,
        config: tp.Optional[StoreConfig] = None,
        container_type: tp.Type[Frame] = Frame,
    ) -> Frame:
        '''
        Args:
            {dtypes}
        '''
        import tables

        if config is None:
            config = StoreConfig()  # get default
        if config.dtypes:
            raise NotImplementedError(
                'using config.dtypes on HDF5 not yet supported')

        index_depth = config.index_depth
        columns_depth = config.columns_depth

        index_arrays = []
        columns_labels = []

        with tables.open_file(self._fp, mode='r') as file:
            table = file.get_node(f'/{label}')
            colnames = table.cols._v_colnames

            def blocks() -> tp.Iterator[np.ndarray]:
                for col_idx, colname in enumerate(colnames):

                    # can also do: table.read(field=colname)
                    array = table.col(colname)

                    if array.dtype.kind in DTYPE_STR_KIND:
                        array = array.astype(str)
                    array.flags.writeable = False

                    if col_idx < index_depth:
                        index_arrays.append(array)
                        continue
                    # only store column labels for those yielded
                    columns_labels.append(colname)
                    yield array

            if config.consolidate_blocks:
                data = TypeBlocks.from_blocks(
                    TypeBlocks.consolidate_blocks(blocks()))
            else:
                data = TypeBlocks.from_blocks(blocks())

        return container_type._from_data_index_arrays_column_labels(
            data=data,
            index_depth=index_depth,
            index_arrays=index_arrays,
            columns_depth=columns_depth,
            columns_labels=columns_labels,
            name=tp.cast(tp.Hashable, label)  # not sure why this is necessary
        )
Example #25
0
    def test_store_config_map_c(self) -> None:
        sc1 = StoreConfig(index_depth=3, columns_depth=3)
        maps = {'a': StoreConfig(index_depth=2),
                'b': StoreConfig(index_depth=3)}
        sc1m = StoreConfigMap(maps)

        sc2m = StoreConfigMap.from_initializer(sc1)
        self.assertEqual(sc2m['a'].index_depth, 3)

        sc3m = StoreConfigMap.from_initializer(sc1m)
        self.assertEqual(sc3m['a'].index_depth, 2)
        self.assertEqual(sc3m['b'].index_depth, 3)

        sc4m = StoreConfigMap.from_initializer(maps)
        self.assertEqual(sc4m['a'].index_depth, 2)
        self.assertEqual(sc4m['b'].index_depth, 3)
Example #26
0
    def test_bus_init_c(self) -> None:

        f1 = Frame.from_dict(
                dict(a=(1,2), b=(3,4)),
                index=('x', 'y'),
                name='foo')
        f2 = Frame.from_dict(
                dict(a=(1,2,3), b=(4,5,6)),
                index=('x', 'y', 'z'),
                name='bar')

        config = StoreConfigMap.from_config(StoreConfig(index_depth=1))
        b1 = Bus.from_frames((f1, f2), config=config)

        self.assertEqual(b1.keys().values.tolist(),
                ['foo', 'bar'])

        with temp_file('.zip') as fp:
            b1.to_zip_csv(fp)
            b2 = Bus.from_zip_csv(fp, config=config)

            f1_loaded = b2['foo']
            f2_loaded = b2['bar']

            self.assertEqualFrames(f1, f1_loaded)
            self.assertEqualFrames(f2, f2_loaded)
Example #27
0
    def test_store_sqlite_write_b(self) -> None:

        f1 = Frame.from_dict(
                dict(
                        x=(Fraction(3,2), Fraction(1,2), Fraction(2,3), Fraction(3,7)),
                        y=(3,4,-5,-3000)),
                index=IndexHierarchy.from_product(('I', 'II'), ('a', 'b')),
                name='f1-dash')

        frames = (f1,)

        with temp_file('.sqlite') as fp:

            st1 = StoreSQLite(fp)
            st1.write((f.name, f) for f in frames)

            config = StoreConfig.from_frame(f1)

            f_loaded = st1.read(f1.name, config=config)

            # for now, Fractions come back as strings
            self.assertEqual(
                    f_loaded['x'].to_pairs(),
                    ((('I', 'a'), '3/2'), (('I', 'b'), '1/2'), (('II', 'a'), '2/3'), (('II', 'b'), '3/7'))
            )
Example #28
0
    def test_batch_to_zip_pickle_a(self) -> None:
        f1 = Frame.from_dict(
                dict(a=(1,2), b=(3,4)),
                index=('x', 'y'),
                name='f1')
        f2 = Frame.from_dict(
                dict(a=(1,2,3), b=(4,5,6)),
                index=('x', 'y', 'z'),
                name='f2')
        f3 = Frame.from_dict(
                dict(a=(10,20), b=(50,60)),
                index=('p', 'q'),
                name='f3')

        config = StoreConfig(
                index_depth=1,
                columns_depth=1,
                include_columns=True,
                include_index=True
                )

        b1 = Batch.from_frames((f1, f2, f3))

        with temp_file('.zip') as fp:
            b1.to_zip_pickle(fp, config=config)
            b2 = Batch.from_zip_pickle(fp, config=config)
            frames = dict(b2.items())

        for frame in (f1, f2, f3):
            # parquet brings in characters as objects, thus forcing different dtypes
            self.assertEqualFrames(frame, frames[frame.name], compare_dtype=False)
    def test_store_xlsx_read_many_d(self) -> None:
        records = (
                (2, 2, 'a', False, None),
                (30, 73, 'd', True, None),
                (None, None, None, None, None),
                (None, None, None, None, None),
                )
        columns = IndexHierarchy.from_labels((
                ('a', 1), ('a', 2), ('b', 1), ('b', 2), (None, None)
                ))
        f1 = Frame.from_records(records, columns=columns)

        with temp_file('.xlsx') as fp:
            f1.to_xlsx(fp, label='f1', include_index=False, include_columns=True)

            st1 = StoreXLSX(fp)
            c = StoreConfig(
                    index_depth=0,
                    columns_depth=2,
                    trim_nadir=True,
                    )
            f2 = next(st1.read_many(('f1',), config=c))
            self.assertEqual(f2.shape, (2, 4))
            self.assertEqual(f2.to_pairs(),
                    ((('a', 1), ((0, 2), (1, 30))), (('a', 2), ((0, 2), (1, 73))), (('b', 1), ((0, 'a'), (1, 'd'))), (('b', 2), ((0, False), (1, True)))))
Example #30
0
    def test_store_zip_csv_a(self) -> None:

        f1 = Frame.from_dict(dict(a=(1, 2), b=(3, 4)),
                             index=('x', 'y'),
                             name='foo')
        f2 = Frame.from_dict(dict(a=(1, 2, 3), b=(4, 5, 6)),
                             index=('x', 'y', 'z'),
                             name='bar')
        f3 = Frame.from_dict(dict(a=(10, 20), b=(50, 60)),
                             index=('p', 'q'),
                             name='baz')

        with temp_file('.zip') as fp:

            st = StoreZipCSV(fp)
            st.write((f.name, f) for f in (f1, f2, f3))

            labels = tuple(st.labels(strip_ext=False))
            self.assertEqual(labels, ('foo.csv', 'bar.csv', 'baz.csv'))

            config = StoreConfig(index_depth=1)

            for label, frame in ((f.name, f) for f in (f1, f2, f3)):
                frame_stored = st.read(label, config=config)
                self.assertEqual(frame_stored.shape, frame.shape)
                self.assertTrue((frame_stored == frame).all().all())
                self.assertEqual(frame.to_pairs(0), frame_stored.to_pairs(0))