def test_store_config_map_init_b(self) -> None: maps = {'a': StoreConfig(index_depth=2, label_encoder=str), 'b': StoreConfig(index_depth=3, label_encoder=str)} default = StoreConfig(label_encoder=str) sc1m = StoreConfigMap(maps, default=default) self.assertEqual(sc1m.default.label_encoder, str)
def test_store_xlsx_write_b(self) -> None: f1 = Frame.from_records( ((None, np.nan, 50, 'a'), (None, -np.inf, -50, 'b'), (None, 60.4, -50, 'c')), index=('p', 'q', 'r'), columns=IndexHierarchy.from_product(('I', 'II'), ('a', 'b')), ) config_map = StoreConfigMap.from_config( StoreConfig(include_index=True, include_columns=True)) with temp_file('.xlsx') as fp: st = StoreXLSX(fp) st.write(((STORE_LABEL_DEFAULT, f1),), config=config_map) c = StoreConfig( index_depth=f1.index.depth, columns_depth=f1.columns.depth ) f2 = st.read(STORE_LABEL_DEFAULT, config=c) # just a sample column for now self.assertEqual( f1[HLoc[('II', 'a')]].values.tolist(), f2[HLoc[('II', 'a')]].values.tolist() ) self.assertEqualFrames(f1, f2)
def test_store_xlsx_read_d(self) -> None: f1 = Frame.from_records(((10, 20, 50, 60), (50.0, 60.4, -50, -60)), index=('p', 'q'), columns=('a', 'b', 'c', 'd'), name='f1') sc1 = StoreConfig(include_index=False, include_columns=True) sc2 = StoreConfig(columns_depth=0, index_depth=0) with temp_file('.xlsx') as fp: st = StoreXLSX(fp) st.write(((STORE_LABEL_DEFAULT, f1), ), config=sc1) f2 = st.read(STORE_LABEL_DEFAULT) # get default config self.assertEqual(f2.to_pairs(0), (('a', ((0, 10), (1, 50))), ('b', ((0, 20.0), (1, 60.4))), ('c', ((0, 50), (1, -50))), ('d', ((0, 60), (1, -60))))) f3 = st.read(STORE_LABEL_DEFAULT, config=sc2) self.assertEqual(f3.to_pairs(0), ((0, ((0, 'a'), (1, 10), (2, 50))), (1, ((0, 'b'), (1, 20), (2, 60.4))), (2, ((0, 'c'), (1, 50), (2, -50))), (3, ((0, 'd'), (1, 60), (2, -60)))))
def test_store_config_map_get_default_a(self) -> None: maps = { 'a': StoreConfig(index_depth=2), 'b': StoreConfig(index_depth=3) } sc1m = StoreConfigMap.from_initializer(maps) self.assertTrue(sc1m.default == StoreConfigMap._DEFAULT)
def test_store_config_map_init_a(self) -> None: maps = { 'a': StoreConfig(index_depth=2), 'b': StoreConfig(index_depth=3, label_encoder=str) } with self.assertRaises(ErrorInitStoreConfig): sc1m = StoreConfigMap.from_initializer(maps)
def test_store_config_map_b(self) -> None: maps = {'a': StoreConfig(index_depth=2), 'b': StoreConfig(index_depth=3)} sc1m = StoreConfigMap(maps) self.assertEqual(sc1m['a'].index_depth, 2) self.assertEqual(sc1m['b'].index_depth, 3) self.assertEqual(sc1m['c'].index_depth, 0)
def test_store_sqlite_read_many_a(self) -> None: f1 = Frame.from_dict(dict(x=(1, 2, -5, 200), y=(3, 4, -5, -3000)), index=IndexHierarchy.from_product(('I', 'II'), ('a', 'b')), name='f1') f2 = Frame.from_dict(dict(a=(1, 2, 3), b=(4, 5, 6)), index=('x', 'y', 'z'), name='f2') f3 = Frame.from_records(((10, 20, 50, 60), (50.0, 60.4, -50, -60)), index=('p', 'q'), columns=IndexHierarchy.from_product( ('I', 'II'), ('a', 'b')), name='f3') f4 = Frame.from_records( ( (10, 20, 50, False, 10, 20, 50, False), (50.0, 60.4, -50, True, 50.0, 60.4, -50, True), (234, 44452, 0, False, 234, 44452, 0, False), (4, -4, 2000, True, 4, -4, 2000, True), (10, 20, 50, False, 10, 20, 50, False), (50.0, 60.4, -50, True, 50.0, 60.4, -50, True), (234, 44452, 0, False, 234, 44452, 0, False), (4, -4, 2000, True, 4, -4, 2000, True), ), index=IndexHierarchy.from_product( ('top', 'bottom'), ('far', 'near'), ('left', 'right')), columns=IndexHierarchy.from_product(('I', 'II'), ('a', 'b'), (1, 2)), name='f4') frames = (f1, f2, f3, f4) config_map_write = StoreConfigMap.from_config( StoreConfig(include_index=True, include_columns=True)) with temp_file('.sqlite') as fp: st1 = StoreSQLite(fp) st1.write(((f.name, f) for f in frames), config=config_map_write) labels = tuple( st1.labels()) # this will read from file, not in memory self.assertEqual(tuple(f.name for f in frames), labels) config_map_read: tp.Dict[tp.Hashable, StoreConfig] = {} for i, name in enumerate(labels): f_src = frames[i] c = StoreConfig(index_depth=f_src.index.depth, columns_depth=f_src.columns.depth) config_map_read[name] = c for i, f_loaded in enumerate( st1.read_many(labels, config=config_map_read)): f_src = frames[i] self.assertEqualFrames(f_src, f_loaded, compare_dtype=False)
def test_store_config_map_a(self) -> None: sc1 = StoreConfig(index_depth=3, columns_depth=3) sc1m = StoreConfigMap.from_config(sc1) self.assertEqual(sc1m['a'].index_depth, 3) self.assertEqual(sc1m['b'].index_depth, 3) sc2 = StoreConfig(include_index=False) sc2m = StoreConfigMap.from_config(sc2) self.assertEqual(sc2m['a'].include_index, False) self.assertEqual(sc2m['b'].include_index, False)
def test_store_xlsx_write_a(self) -> None: f1 = Frame.from_dict(dict(x=(1, 2, -5, 200), y=(3, 4, -5, -3000)), index=IndexHierarchy.from_product(('I', 'II'), ('a', 'b')), name='f1') f2 = Frame.from_dict(dict(a=(1, 2, 3), b=(4, 5, 6)), index=('x', 'y', 'z'), name='f2') f3 = Frame.from_records(((10, 20, 50, 60), (50.0, 60.4, -50, -60)), index=('p', 'q'), columns=IndexHierarchy.from_product( ('I', 'II'), ('a', 'b')), name='f3') f4 = Frame.from_records( ( (10, 20, 50, False, 10, 20, 50, False), (50.0, 60.4, -50, True, 50.0, 60.4, -50, True), (234, 44452, 0, False, 234, 44452, 0, False), (4, -4, 2000, True, 4, -4, 2000, True), (10, 20, 50, False, 10, 20, 50, False), (50.0, 60.4, -50, True, 50.0, 60.4, -50, True), (234, 44452, 0, False, 234, 44452, 0, False), (4, -4, 2000, True, 4, -4, 2000, True), ), index=IndexHierarchy.from_product( ('top', 'bottom'), ('far', 'near'), ('left', 'right')), columns=IndexHierarchy.from_product(('I', 'II'), ('a', 'b'), (1, 2)), name='f4') frames = (f1, f2, f3, f4) config_map = StoreConfigMap.from_config( StoreConfig(include_index=True, include_columns=True)) with temp_file('.xlsx') as fp: st1 = StoreXLSX(fp) st1.write(((f.name, f) for f in frames), config=config_map) # import ipdb; ipdb.set_trace() sheet_names = tuple( st1.labels()) # this will read from file, not in memory self.assertEqual(tuple(f.name for f in frames), sheet_names) for i, name in enumerate(sheet_names): f_src = frames[i] c = StoreConfig(index_depth=f_src.index.depth, columns_depth=f_src.columns.depth) f_loaded = st1.read(name, config=c) self.assertEqualFrames(f_src, f_loaded, check_dtypes=False)
def test_store_config_he_a(self) -> None: he_kwargs = dict( index_depth=1, columns_depth=1, consolidate_blocks=True, skip_header=1, skip_footer=1, trim_nadir=True, include_index=True, include_index_name=True, include_columns=True, include_columns_name=True, merge_hierarchical_labels=True, read_max_workers=1, read_chunksize=1, write_max_workers=1, write_chunksize=1, ) kwargs = dict( **he_kwargs, label_encoder=lambda x: x, label_decoder=lambda x: x, ) for (depth_levels, columns_select, dtypes) in product( (None, 1, [1, 2], (1, 2)), (None, ['a'], ('a', )), (None, 'int', int, np.int64, [int], (int, ), { 'a': int }), ): config = StoreConfig( **kwargs, # type: ignore [arg-type] index_name_depth_level=depth_levels, columns_name_depth_level=depth_levels, columns_select=columns_select, dtypes=dtypes, ) config_he = StoreConfigHE( **he_kwargs, # type: ignore [arg-type] index_name_depth_level=depth_levels, columns_name_depth_level=depth_levels, columns_select=columns_select, dtypes=dtypes, ) self.assertNotEqual(config_he, config) self.assertEqual(config_he, config.to_store_config_he()) self.assertTrue(isinstance(hash(config_he), int))
def test_bus_to_xlsx_f(self) -> None: f = Frame.from_records([ [np.datetime64('1983-02-20 05:34:18.763'), np.datetime64('2020-08-01')], [np.datetime64('1975-03-20 05:20:18.001'), np.datetime64('2020-07-31')] ], columns=(date(2020, 7, 31), date(2020, 8, 1)), index=(datetime(2020, 7, 31, 14, 20, 8), datetime(2017, 4, 28, 2, 30, 2)), name='frame') b1 = Bus.from_frames([f]) with temp_file('.xlsx') as fp: b1.to_xlsx(fp) config = StoreConfig(include_index=True, index_depth=1) b2 = Bus.from_xlsx(fp, config=config) tuple(b2.items()) # force loading all self.assertEqual(b2['frame'].index.values.tolist(), [datetime(2020, 7, 31, 14, 20, 8), datetime(2017, 4, 28, 2, 30, 2)]) self.assertEqual(b2['frame'].index.values.tolist(), [datetime(2020, 7, 31, 14, 20, 8), datetime(2017, 4, 28, 2, 30, 2)]) self.assertEqual(b2['frame'].values.tolist(), [[datetime(1983, 2, 20, 5, 34, 18, 763000), datetime(2020, 8, 1, 0, 0)], [datetime(1975, 3, 20, 5, 20, 18, 1000), datetime(2020, 7, 31, 0, 0)]] )
def test_bus_init_a(self) -> None: f1 = Frame.from_dict(dict(a=(1, 2), b=(3, 4)), index=('x', 'y'), name='foo') f2 = Frame.from_dict(dict(a=(1, 2, 3), b=(4, 5, 6)), index=('x', 'y', 'z'), name='bar') config = StoreConfigMap.from_config(StoreConfig(index_depth=1)) b1 = Bus.from_frames((f1, f2), config=config) self.assertEqual(b1.keys().values.tolist(), ['foo', 'bar']) with temp_file('.zip') as fp: b1.to_zip_tsv(fp) b2 = Bus.from_zip_tsv(fp) f3 = b2['bar'] f4 = b2['foo'] # import ipdb; ipdb.set_trace() zs = StoreZipTSV(fp) zs.write(b1.items()) # how to show that this derived getitem has derived type? f3 = zs.read('foo', config=config['foo']) self.assertEqual(f3.to_pairs(0), (('a', (('x', 1), ('y', 2))), ('b', (('x', 3), ('y', 4)))))
def test_bus_to_xlsx_a(self) -> None: f1 = Frame.from_dict(dict(a=(1, 2), b=(3, 4)), index=('x', 'y'), name='f1') f2 = Frame.from_dict(dict(c=(1, 2, 3), b=(4, 5, 6)), index=('x', 'y', 'z'), name='f2') f3 = Frame.from_dict(dict(d=(10, 20), b=(50, 60)), index=('p', 'q'), name='f3') config = StoreConfigMap.from_config( StoreConfig(index_depth=1, columns_depth=1, include_columns=True, include_index=True)) b1 = Bus.from_frames((f1, f2, f3), config=config) with temp_file('.xlsx') as fp: b1.to_xlsx(fp) b2 = Bus.from_xlsx(fp, config=config) tuple(b2.items()) # force loading all for frame in (f1, f2, f3): self.assertEqualFrames(frame, b2[frame.name])
def test_store_zip_tsv_a(self) -> None: f1, f2, f3 = get_test_framesA() with temp_file('.zip') as fp: st = StoreZipTSV(fp) st.write((f.name, f) for f in (f1, f2, f3)) labels = tuple(st.labels(strip_ext=False)) self.assertEqual(labels, ('foo.txt', 'bar.txt', 'baz.txt')) for label, frame in ((f.name, f) for f in (f1, f2, f3)): for read_max_workers in (None, 1, 2): config = StoreConfig(index_depth=1, read_max_workers=read_max_workers) frame_stored = st.read(label, config=config) self.assertEqual(frame_stored.shape, frame.shape) self.assertTrue((frame_stored == frame).all().all()) self.assertEqual(frame.to_pairs(0), frame_stored.to_pairs(0)) frame_stored_2 = st.read(label, config=config, container_type=FrameGO) self.assertEqual(frame_stored_2.__class__, FrameGO) self.assertEqual(frame_stored_2.shape, frame.shape)
def test_store_read_many_single_thread_weak_cache(self) -> None: f1, f2, f3 = get_test_framesA() with temp_file('.zip') as fp: st = StoreZipTSV(fp) st.write((f.name, f) for f in (f1, f2, f3)) kwargs = dict(config_map=StoreConfigMap.from_initializer( StoreConfig(index_depth=1)), constructor=st._container_type_to_constructor(Frame), container_type=Frame) labels = tuple(st.labels(strip_ext=False)) self.assertEqual(labels, ('foo.txt', 'bar.txt', 'baz.txt')) self.assertEqual(0, len(list(st._weak_cache))) # Result is not held onto! next(st._read_many_single_thread(('foo', ), **kwargs)) self.assertEqual(0, len(list(st._weak_cache))) # Result IS held onto! frame = next(st._read_many_single_thread(('foo', ), **kwargs)) self.assertEqual(1, len(list(st._weak_cache))) # Reference in our weak_cache _is_ `frame` self.assertIs(frame, st._weak_cache['foo']) del frame # Reference is gone now! self.assertEqual(0, len(list(st._weak_cache)))
def test_store_zip_parquet_c(self) -> None: f1, f2 = get_test_framesB() config = StoreConfig( index_depth=1, include_index=True, index_constructors=IndexDate, columns_depth=1, include_columns=True, ) with temp_file('.zip') as fp: st = StoreZipParquet(fp) st.write(((f.name, f) for f in (f1, f2)), config=config) post = tuple( st.read_many( ('a', 'b'), container_type=Frame, config=config, )) self.assertIs(post[0].index.__class__, IndexDate) self.assertIs(post[1].index.__class__, IndexDate)
def test_store_zip_parquet_a(self) -> None: f1, f2, f3 = get_test_framesA() with temp_file('.zip') as fp: for read_max_workers in (1, 2): config = StoreConfig(index_depth=1, include_index=True, columns_depth=1, read_max_workers=read_max_workers) st = StoreZipParquet(fp) st.write((f.name, f) for f in (f1, f2, f3)) f1_post = st.read('foo', config=config) self.assertTrue( f1.equals(f1_post, compare_name=True, compare_class=True)) f2_post = st.read('bar', config=config) self.assertTrue( f2.equals(f2_post, compare_name=True, compare_class=True)) f3_post = st.read('baz', config=config) self.assertTrue( f3.equals(f3_post, compare_name=True, compare_class=True))
def test_bus_max_persist_3(self) -> None: def items() -> tp.Iterator[tp.Tuple[str, Frame]]: for i in range(4): yield str(i), Frame(np.arange(i, i+10).reshape(2, 5)) s = Series.from_items(items(), dtype=object) b1 = Bus(s) config = StoreConfig( index_depth=1, columns_depth=1, include_columns=True, include_index=True ) with temp_file('.zip') as fp: b1.to_zip_pickle(fp) b2 = Bus.from_zip_pickle(fp, config=config, max_persist=4) _ = b2.iloc[[0, 1]] _ = b2.iloc[[2, 3]] self.assertTrue(b2._loaded_all) _ = b2.iloc[[1, 0]] self.assertEqual(list(b2._last_accessed.keys()), ['2', '3', '1', '0']) _ = b2.iloc[3] self.assertEqual(list(b2._last_accessed.keys()), ['2', '1', '0', '3']) _ = b2.iloc[:3] self.assertEqual(list(b2._last_accessed.keys()), ['3', '0', '1', '2'])
def test_bus_max_persist_b(self) -> None: def items() -> tp.Iterator[tp.Tuple[str, Frame]]: for i in range(20): yield str(i), Frame(np.arange(i, i+10).reshape(2, 5)) s = Series.from_items(items(), dtype=object) b1 = Bus(s) config = StoreConfig( index_depth=1, columns_depth=1, include_columns=True, include_index=True ) with temp_file('.zip') as fp: b1.to_zip_pickle(fp) b2 = Bus.from_zip_pickle(fp, config=config, max_persist=1) b3 = b2.iloc[10:] self.assertEqual(b3._loaded.sum(), 1) # only the last one is loasded self.assertEqual(b3._loaded.tolist(), [False, False, False, False, False, False, False, False, False, True] ) self.assertEqual(b3.iloc[0].sum().sum(), 145) self.assertEqual(b3._loaded.tolist(), [True, False, False, False, False, False, False, False, False, False] ) self.assertEqual(b3.iloc[4].sum().sum(), 185) self.assertEqual(b3._loaded.tolist(), [False, False, False, False, True, False, False, False, False, False] )
def test_store_sqlite_write_d(self) -> None: f1 = Frame.from_dict(dict(a=(1, 2, 3), b=(4, 5, 6)), index=('x', 'y', 'z'), name='f2') frames = (f1, ) with temp_file('.sqlite') as fp: config = StoreConfig(include_index=False) st1 = StoreSQLite(fp) st1.write(((f.name, f) for f in frames), config=config) f2 = st1.read(f1.name, config=config) self.assertEqual(f2.to_pairs(0), (('a', ((0, 1), (1, 2), (2, 3))), ('b', ((0, 4), (1, 5), (2, 6))))) # getting the default config f3 = st1.read(f1.name, config=None) self.assertEqual(f3.to_pairs(0), (('a', ((0, 1), (1, 2), (2, 3))), ('b', ((0, 4), (1, 5), (2, 6)))))
def test_bus_max_persist_a(self) -> None: def items() -> tp.Iterator[tp.Tuple[str, Frame]]: for i in range(20): yield str(i), Frame(np.arange(i, i+10).reshape(2, 5)) s = Series.from_items(items(), dtype=object) b1 = Bus(s) config = StoreConfig( index_depth=1, columns_depth=1, include_columns=True, include_index=True ) with temp_file('.zip') as fp: b1.to_zip_pickle(fp) b2 = Bus.from_zip_pickle(fp, config=config, max_persist=3) for i in b2.index: _ = b2[i] self.assertTrue(b2._loaded.sum() <= 3) # after iteration only the last three are loaded self.assertEqual(b2._loaded.tolist(), [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, True, True])
def test_store_xlsx_read_many_f(self) -> None: records = ( (2, 2, 'a', False, None), (30, 73, 'd', True, None), (None, None, None, None, None), (None, None, None, None, None), ) f1 = Frame.from_records(records, columns=('p', 'q', 'r', 's', 't')) with temp_file('.xlsx') as fp: f1.to_xlsx(fp, label='f1', include_index=False, include_columns=False) st1 = StoreXLSX(fp) c = StoreConfig( index_depth=3, # force coverage columns_depth=0, trim_nadir=True, ) f2 = next(st1.read_many(('f1', ), config=c)) self.assertEqual(f2.shape, (2, 1)) self.assertEqual(f2.to_pairs(), ((0, (((2, 2, 'a'), False), ((30, 73, 'd'), True))), ))
def test_store_xlsx_read_e(self) -> None: f1 = Frame.from_records(((np.inf, np.inf), (-np.inf, -np.inf)), index=('p', 'q'), columns=('a', 'b'), name='f1') sc1 = StoreConfig(columns_depth=1, index_depth=1) with temp_file('.xlsx') as fp: st = StoreXLSX(fp) st.write(((STORE_LABEL_DEFAULT, f1), )) f1 = st.read(STORE_LABEL_DEFAULT, config=sc1, store_filter=None) self.assertEqual(f1.to_pairs(0), (('a', (('p', 'inf'), ('q', '-inf'))), ('b', (('p', 'inf'), ('q', '-inf'))))) f2 = st.read(STORE_LABEL_DEFAULT, config=sc1, store_filter=StoreFilter()) self.assertEqual(f2.to_pairs(0), (('a', (('p', np.inf), ('q', -np.inf))), ('b', (('p', np.inf), ('q', -np.inf)))))
def read( self, label: tp.Optional[str] = None, *, config: tp.Optional[StoreConfig] = None, container_type: tp.Type[Frame] = Frame, ) -> Frame: ''' Args: {dtypes} ''' import tables if config is None: config = StoreConfig() # get default if config.dtypes: raise NotImplementedError( 'using config.dtypes on HDF5 not yet supported') index_depth = config.index_depth columns_depth = config.columns_depth index_arrays = [] columns_labels = [] with tables.open_file(self._fp, mode='r') as file: table = file.get_node(f'/{label}') colnames = table.cols._v_colnames def blocks() -> tp.Iterator[np.ndarray]: for col_idx, colname in enumerate(colnames): # can also do: table.read(field=colname) array = table.col(colname) if array.dtype.kind in DTYPE_STR_KIND: array = array.astype(str) array.flags.writeable = False if col_idx < index_depth: index_arrays.append(array) continue # only store column labels for those yielded columns_labels.append(colname) yield array if config.consolidate_blocks: data = TypeBlocks.from_blocks( TypeBlocks.consolidate_blocks(blocks())) else: data = TypeBlocks.from_blocks(blocks()) return container_type._from_data_index_arrays_column_labels( data=data, index_depth=index_depth, index_arrays=index_arrays, columns_depth=columns_depth, columns_labels=columns_labels, name=tp.cast(tp.Hashable, label) # not sure why this is necessary )
def test_store_config_map_c(self) -> None: sc1 = StoreConfig(index_depth=3, columns_depth=3) maps = {'a': StoreConfig(index_depth=2), 'b': StoreConfig(index_depth=3)} sc1m = StoreConfigMap(maps) sc2m = StoreConfigMap.from_initializer(sc1) self.assertEqual(sc2m['a'].index_depth, 3) sc3m = StoreConfigMap.from_initializer(sc1m) self.assertEqual(sc3m['a'].index_depth, 2) self.assertEqual(sc3m['b'].index_depth, 3) sc4m = StoreConfigMap.from_initializer(maps) self.assertEqual(sc4m['a'].index_depth, 2) self.assertEqual(sc4m['b'].index_depth, 3)
def test_bus_init_c(self) -> None: f1 = Frame.from_dict( dict(a=(1,2), b=(3,4)), index=('x', 'y'), name='foo') f2 = Frame.from_dict( dict(a=(1,2,3), b=(4,5,6)), index=('x', 'y', 'z'), name='bar') config = StoreConfigMap.from_config(StoreConfig(index_depth=1)) b1 = Bus.from_frames((f1, f2), config=config) self.assertEqual(b1.keys().values.tolist(), ['foo', 'bar']) with temp_file('.zip') as fp: b1.to_zip_csv(fp) b2 = Bus.from_zip_csv(fp, config=config) f1_loaded = b2['foo'] f2_loaded = b2['bar'] self.assertEqualFrames(f1, f1_loaded) self.assertEqualFrames(f2, f2_loaded)
def test_store_sqlite_write_b(self) -> None: f1 = Frame.from_dict( dict( x=(Fraction(3,2), Fraction(1,2), Fraction(2,3), Fraction(3,7)), y=(3,4,-5,-3000)), index=IndexHierarchy.from_product(('I', 'II'), ('a', 'b')), name='f1-dash') frames = (f1,) with temp_file('.sqlite') as fp: st1 = StoreSQLite(fp) st1.write((f.name, f) for f in frames) config = StoreConfig.from_frame(f1) f_loaded = st1.read(f1.name, config=config) # for now, Fractions come back as strings self.assertEqual( f_loaded['x'].to_pairs(), ((('I', 'a'), '3/2'), (('I', 'b'), '1/2'), (('II', 'a'), '2/3'), (('II', 'b'), '3/7')) )
def test_batch_to_zip_pickle_a(self) -> None: f1 = Frame.from_dict( dict(a=(1,2), b=(3,4)), index=('x', 'y'), name='f1') f2 = Frame.from_dict( dict(a=(1,2,3), b=(4,5,6)), index=('x', 'y', 'z'), name='f2') f3 = Frame.from_dict( dict(a=(10,20), b=(50,60)), index=('p', 'q'), name='f3') config = StoreConfig( index_depth=1, columns_depth=1, include_columns=True, include_index=True ) b1 = Batch.from_frames((f1, f2, f3)) with temp_file('.zip') as fp: b1.to_zip_pickle(fp, config=config) b2 = Batch.from_zip_pickle(fp, config=config) frames = dict(b2.items()) for frame in (f1, f2, f3): # parquet brings in characters as objects, thus forcing different dtypes self.assertEqualFrames(frame, frames[frame.name], compare_dtype=False)
def test_store_xlsx_read_many_d(self) -> None: records = ( (2, 2, 'a', False, None), (30, 73, 'd', True, None), (None, None, None, None, None), (None, None, None, None, None), ) columns = IndexHierarchy.from_labels(( ('a', 1), ('a', 2), ('b', 1), ('b', 2), (None, None) )) f1 = Frame.from_records(records, columns=columns) with temp_file('.xlsx') as fp: f1.to_xlsx(fp, label='f1', include_index=False, include_columns=True) st1 = StoreXLSX(fp) c = StoreConfig( index_depth=0, columns_depth=2, trim_nadir=True, ) f2 = next(st1.read_many(('f1',), config=c)) self.assertEqual(f2.shape, (2, 4)) self.assertEqual(f2.to_pairs(), ((('a', 1), ((0, 2), (1, 30))), (('a', 2), ((0, 2), (1, 73))), (('b', 1), ((0, 'a'), (1, 'd'))), (('b', 2), ((0, False), (1, True)))))
def test_store_zip_csv_a(self) -> None: f1 = Frame.from_dict(dict(a=(1, 2), b=(3, 4)), index=('x', 'y'), name='foo') f2 = Frame.from_dict(dict(a=(1, 2, 3), b=(4, 5, 6)), index=('x', 'y', 'z'), name='bar') f3 = Frame.from_dict(dict(a=(10, 20), b=(50, 60)), index=('p', 'q'), name='baz') with temp_file('.zip') as fp: st = StoreZipCSV(fp) st.write((f.name, f) for f in (f1, f2, f3)) labels = tuple(st.labels(strip_ext=False)) self.assertEqual(labels, ('foo.csv', 'bar.csv', 'baz.csv')) config = StoreConfig(index_depth=1) for label, frame in ((f.name, f) for f in (f1, f2, f3)): frame_stored = st.read(label, config=config) self.assertEqual(frame_stored.shape, frame.shape) self.assertTrue((frame_stored == frame).all().all()) self.assertEqual(frame.to_pairs(0), frame_stored.to_pairs(0))