def test_store_config_map_a(self) -> None: sc1 = StoreConfig(index_depth=3, columns_depth=3) sc1m = StoreConfigMap.from_config(sc1) self.assertEqual(sc1m['a'].index_depth, 3) self.assertEqual(sc1m['b'].index_depth, 3) sc2 = StoreConfig(include_index=False) sc2m = StoreConfigMap.from_config(sc2) self.assertEqual(sc2m['a'].include_index, False) self.assertEqual(sc2m['b'].include_index, False)
def test_store_read_many_single_thread_weak_cache(self) -> None: f1, f2, f3 = get_test_framesA() with temp_file('.zip') as fp: st = StoreZipTSV(fp) st.write((f.name, f) for f in (f1, f2, f3)) kwargs = dict(config_map=StoreConfigMap.from_initializer( StoreConfig(index_depth=1)), constructor=st._container_type_to_constructor(Frame), container_type=Frame) labels = tuple(st.labels(strip_ext=False)) self.assertEqual(labels, ('foo.txt', 'bar.txt', 'baz.txt')) self.assertEqual(0, len(list(st._weak_cache))) # Result is not held onto! next(st._read_many_single_thread(('foo', ), **kwargs)) self.assertEqual(0, len(list(st._weak_cache))) # Result IS held onto! frame = next(st._read_many_single_thread(('foo', ), **kwargs)) self.assertEqual(1, len(list(st._weak_cache))) # Reference in our weak_cache _is_ `frame` self.assertIs(frame, st._weak_cache['foo']) del frame # Reference is gone now! self.assertEqual(0, len(list(st._weak_cache)))
def write(self, items: tp.Iterable[tp.Tuple[tp.Hashable, Frame]], *, config: StoreConfigMapInitializer = None) -> None: config_map = StoreConfigMap.from_initializer(config) multiprocess = (config_map.default.write_max_workers is not None and config_map.default.write_max_workers > 1) def gen() -> tp.Iterable[PayloadFrameToBytes]: for label, frame in items: yield PayloadFrameToBytes( # pylint: disable=no-value-for-parameter name=label, config=config_map[label].to_store_config_he(), frame=frame, exporter=self.__class__._EXPORTER, ) if multiprocess: def label_and_bytes() -> tp.Iterator[LabelAndBytes]: with ProcessPoolExecutor(max_workers=config_map.default. write_max_workers) as executor: yield from executor.map( self._payload_to_bytes, gen(), chunksize=config_map.default.write_chunksize) else: label_and_bytes = lambda: (self._payload_to_bytes(x) for x in gen()) with zipfile.ZipFile(self._fp, 'w', zipfile.ZIP_DEFLATED) as zf: for label, frame_bytes in label_and_bytes(): label_encoded = config_map.default.label_encode(label) # this will write it without a container zf.writestr(label_encoded + self._EXT_CONTAINED, frame_bytes)
def __init__(self, series: Series, *, store: tp.Optional[Store] = None, config: StoreConfigMapInitializer = None ): ''' Args: config: StoreConfig for handling ``Frame`` construction and exporting from Store. ''' if series.dtype != DTYPE_OBJECT: raise ErrorInitBus( f'Series passed to initializer must have dtype object, not {series.dtype}') # do a one time iteration of series def gen() -> tp.Iterator[bool]: for label, value in series.items(): if not isinstance(label, str): raise ErrorInitBus(f'supplied label {label} is not a string.') if isinstance(value, Frame): yield True elif value is FrameDeferred: yield False else: raise ErrorInitBus(f'supplied {value.__class__} is not a Frame or FrameDeferred.') self._loaded = np.fromiter(gen(), dtype=DTYPE_BOOL, count=len(series)) self._loaded_all = self._loaded.all() self._series = series self._store = store # providing None will result in default; providing a StoreConfig or StoreConfigMap will return an appropriate map self._config = StoreConfigMap.from_initializer(config)
def test_bus_init_c(self) -> None: f1 = Frame.from_dict( dict(a=(1,2), b=(3,4)), index=('x', 'y'), name='foo') f2 = Frame.from_dict( dict(a=(1,2,3), b=(4,5,6)), index=('x', 'y', 'z'), name='bar') config = StoreConfigMap.from_config(StoreConfig(index_depth=1)) b1 = Bus.from_frames((f1, f2), config=config) self.assertEqual(b1.keys().values.tolist(), ['foo', 'bar']) with temp_file('.zip') as fp: b1.to_zip_csv(fp) b2 = Bus.from_zip_csv(fp, config=config) f1_loaded = b2['foo'] f2_loaded = b2['bar'] self.assertEqualFrames(f1, f1_loaded) self.assertEqualFrames(f2, f2_loaded)
def test_bus_to_hdf5_a(self) -> None: f1 = Frame.from_dict( dict(a=(1,2), b=(3,4)), index=('x', 'y'), name='f1') f2 = Frame.from_dict( dict(c=(1,2,3), b=(4,5,6)), index=('x', 'y', 'z'), name='f2') f3 = Frame.from_dict( dict(d=(10,20), b=(50,60)), index=('p', 'q'), name='f3') frames = (f1, f2, f3) config = StoreConfigMap.from_frames(frames) b1 = Bus.from_frames(frames, config=config) with temp_file('.h5') as fp: b1.to_hdf5(fp) b2 = Bus.from_hdf5(fp, config=config) tuple(b2.items()) # force loading all for frame in frames: self.assertEqualFrames(frame, b2[frame.name])
def test_bus_to_xlsx_a(self) -> None: f1 = Frame.from_dict(dict(a=(1, 2), b=(3, 4)), index=('x', 'y'), name='f1') f2 = Frame.from_dict(dict(c=(1, 2, 3), b=(4, 5, 6)), index=('x', 'y', 'z'), name='f2') f3 = Frame.from_dict(dict(d=(10, 20), b=(50, 60)), index=('p', 'q'), name='f3') config = StoreConfigMap.from_config( StoreConfig(index_depth=1, columns_depth=1, include_columns=True, include_index=True)) b1 = Bus.from_frames((f1, f2, f3), config=config) with temp_file('.xlsx') as fp: b1.to_xlsx(fp) b2 = Bus.from_xlsx(fp, config=config) tuple(b2.items()) # force loading all for frame in (f1, f2, f3): self.assertEqualFrames(frame, b2[frame.name])
def test_bus_init_a(self) -> None: f1 = Frame.from_dict(dict(a=(1, 2), b=(3, 4)), index=('x', 'y'), name='foo') f2 = Frame.from_dict(dict(a=(1, 2, 3), b=(4, 5, 6)), index=('x', 'y', 'z'), name='bar') config = StoreConfigMap.from_config(StoreConfig(index_depth=1)) b1 = Bus.from_frames((f1, f2), config=config) self.assertEqual(b1.keys().values.tolist(), ['foo', 'bar']) with temp_file('.zip') as fp: b1.to_zip_tsv(fp) b2 = Bus.from_zip_tsv(fp) f3 = b2['bar'] f4 = b2['foo'] # import ipdb; ipdb.set_trace() zs = StoreZipTSV(fp) zs.write(b1.items()) # how to show that this derived getitem has derived type? f3 = zs.read('foo', config=config['foo']) self.assertEqual(f3.to_pairs(0), (('a', (('x', 1), ('y', 2))), ('b', (('x', 3), ('y', 4)))))
def test_store_config_map_c(self) -> None: sc1 = StoreConfig(index_depth=3, columns_depth=3) maps = {'a': StoreConfig(index_depth=2), 'b': StoreConfig(index_depth=3)} sc1m = StoreConfigMap(maps) sc2m = StoreConfigMap.from_initializer(sc1) self.assertEqual(sc2m['a'].index_depth, 3) sc3m = StoreConfigMap.from_initializer(sc1m) self.assertEqual(sc3m['a'].index_depth, 2) self.assertEqual(sc3m['b'].index_depth, 3) sc4m = StoreConfigMap.from_initializer(maps) self.assertEqual(sc4m['a'].index_depth, 2) self.assertEqual(sc4m['b'].index_depth, 3)
def test_store_config_map_init_b(self) -> None: maps = {'a': StoreConfig(index_depth=2, label_encoder=str), 'b': StoreConfig(index_depth=3, label_encoder=str)} default = StoreConfig(label_encoder=str) sc1m = StoreConfigMap(maps, default=default) self.assertEqual(sc1m.default.label_encoder, str)
def test_store_xlsx_write_b(self) -> None: f1 = Frame.from_records( ((None, np.nan, 50, 'a'), (None, -np.inf, -50, 'b'), (None, 60.4, -50, 'c')), index=('p', 'q', 'r'), columns=IndexHierarchy.from_product(('I', 'II'), ('a', 'b')), ) config_map = StoreConfigMap.from_config( StoreConfig(include_index=True, include_columns=True)) with temp_file('.xlsx') as fp: st = StoreXLSX(fp) st.write(((STORE_LABEL_DEFAULT, f1),), config=config_map) c = StoreConfig( index_depth=f1.index.depth, columns_depth=f1.columns.depth ) f2 = st.read(STORE_LABEL_DEFAULT, config=c) # just a sample column for now self.assertEqual( f1[HLoc[('II', 'a')]].values.tolist(), f2[HLoc[('II', 'a')]].values.tolist() ) self.assertEqualFrames(f1, f2)
def write(self, items: tp.Iterable[tp.Tuple[tp.Optional[str], Frame]], *, config: StoreConfigMapInitializer = None, # store_filter: tp.Optional[StoreFilter] = STORE_FILTER_DEFAULT, ) -> None: config_map = StoreConfigMap.from_initializer(config) # NOTE: register adapters for NP types: # numpy types go in as blobs if they are not individualy converted tp python types sqlite3.register_adapter(np.int64, int) sqlite3.register_adapter(np.int32, int) sqlite3.register_adapter(np.int16, int) # common python types sqlite3.register_adapter(Fraction, str) sqlite3.register_adapter(complex, lambda x: f'{x.real}:{x.imag}') # hierarchical columns might be stored as tuples with sqlite3.connect(self._fp, detect_types=sqlite3.PARSE_DECLTYPES) as conn: cursor = conn.cursor() for label, frame in items: c = config_map[label] self._frame_to_table(frame=frame, label=label, cursor=cursor, include_columns=c.include_columns, include_index=c.include_index, # store_filter=store_filter ) conn.commit()
def read_many(self, labels: tp.Iterable[tp.Hashable], *, config: StoreConfigMapInitializer = None, container_type: tp.Type[Frame] = Frame, ) -> tp.Iterator[Frame]: import tables config_map = StoreConfigMap.from_initializer(config) with tables.open_file(self._fp, mode='r') as file: for label in labels: c = config_map[label] label_encoded = config_map.default.label_encode(label) index_depth = c.index_depth index_constructors = c.index_constructors columns_depth = c.columns_depth columns_constructors = c.columns_constructors consolidate_blocks = c.consolidate_blocks if c.dtypes: raise NotImplementedError('using config.dtypes on HDF5 not yet supported') index_arrays = [] columns_labels = [] table = file.get_node(f'/{label_encoded}') colnames = table.cols._v_colnames def blocks() -> tp.Iterator[np.ndarray]: for col_idx, colname in enumerate(colnames): # can also do: table.read(field=colname) array = table.col(colname) if array.dtype.kind in DTYPE_STR_KINDS: array = array.astype(str) array.flags.writeable = False if col_idx < index_depth: index_arrays.append(array) continue # only store column labels for those yielded columns_labels.append(colname) yield array if consolidate_blocks: data = TypeBlocks.from_blocks(TypeBlocks.consolidate_blocks(blocks())) else: data = TypeBlocks.from_blocks(blocks()) # this will own_data in subsequent constructor call yield container_type._from_data_index_arrays_column_labels( data=data, index_depth=index_depth, index_arrays=index_arrays, index_constructors=index_constructors, columns_depth=columns_depth, columns_labels=columns_labels, columns_constructors=columns_constructors, name=label, )
def test_store_config_map_init_d(self) -> None: maps1 = {'a': StoreConfig(read_chunksize=2), 'b': StoreConfig(read_chunksize=3)} default = StoreConfig(read_chunksize=2) with self.assertRaises(ErrorInitStoreConfig): StoreConfigMap(maps1, default=default) # Config has conflicting info maps2 = {'a': StoreConfig(read_chunksize=2), 'b': StoreConfig(read_chunksize=2)} with self.assertRaises(ErrorInitStoreConfig): StoreConfigMap(maps2) # Default is 1 sc1m = StoreConfigMap(maps2, default=default) self.assertEqual(sc1m.default.read_chunksize, 2)
def test_store_config_map_init_e(self) -> None: maps1 = {'a': StoreConfig(write_max_workers=2), 'b': StoreConfig(write_max_workers=3)} default = StoreConfig(write_max_workers=2) with self.assertRaises(ErrorInitStoreConfig): StoreConfigMap(maps1, default=default) # Config has conflicting info maps2 = {'a': StoreConfig(write_max_workers=2), 'b': StoreConfig(write_max_workers=2)} with self.assertRaises(ErrorInitStoreConfig): StoreConfigMap(maps2) # Default is None sc1m = StoreConfigMap(maps2, default=default) self.assertEqual(sc1m.default.write_max_workers, 2)
def test_store_config_map_init_a(self) -> None: maps = { 'a': StoreConfig(index_depth=2), 'b': StoreConfig(index_depth=3, label_encoder=str) } with self.assertRaises(ErrorInitStoreConfig): sc1m = StoreConfigMap.from_initializer(maps)
def test_store_config_map_b(self) -> None: maps = {'a': StoreConfig(index_depth=2), 'b': StoreConfig(index_depth=3)} sc1m = StoreConfigMap(maps) self.assertEqual(sc1m['a'].index_depth, 2) self.assertEqual(sc1m['b'].index_depth, 3) self.assertEqual(sc1m['c'].index_depth, 0)
def test_store_config_map_get_default_a(self) -> None: maps = { 'a': StoreConfig(index_depth=2), 'b': StoreConfig(index_depth=3) } sc1m = StoreConfigMap.from_initializer(maps) self.assertTrue(sc1m.default == StoreConfigMap._DEFAULT)
def _from_store( cls, store: Store, config: StoreConfigMapInitializer = None, ) -> 'Batch': config_map = StoreConfigMap.from_initializer(config) items = ((label, store.read(label, config=config_map[label])) for label in store.labels()) return cls(items, config=config)
def _from_store( cls, store: Store, config: StoreConfigMapInitializer = None, max_persist: tp.Optional[int] = None, # not used ) -> 'Batch': config_map = StoreConfigMap.from_initializer(config) items = ((label, store.read(label, config=config_map[label])) for label in store.labels()) return cls(items, config=config)
def test_store_sqlite_read_many_a(self) -> None: f1 = Frame.from_dict(dict(x=(1, 2, -5, 200), y=(3, 4, -5, -3000)), index=IndexHierarchy.from_product(('I', 'II'), ('a', 'b')), name='f1') f2 = Frame.from_dict(dict(a=(1, 2, 3), b=(4, 5, 6)), index=('x', 'y', 'z'), name='f2') f3 = Frame.from_records(((10, 20, 50, 60), (50.0, 60.4, -50, -60)), index=('p', 'q'), columns=IndexHierarchy.from_product( ('I', 'II'), ('a', 'b')), name='f3') f4 = Frame.from_records( ( (10, 20, 50, False, 10, 20, 50, False), (50.0, 60.4, -50, True, 50.0, 60.4, -50, True), (234, 44452, 0, False, 234, 44452, 0, False), (4, -4, 2000, True, 4, -4, 2000, True), (10, 20, 50, False, 10, 20, 50, False), (50.0, 60.4, -50, True, 50.0, 60.4, -50, True), (234, 44452, 0, False, 234, 44452, 0, False), (4, -4, 2000, True, 4, -4, 2000, True), ), index=IndexHierarchy.from_product( ('top', 'bottom'), ('far', 'near'), ('left', 'right')), columns=IndexHierarchy.from_product(('I', 'II'), ('a', 'b'), (1, 2)), name='f4') frames = (f1, f2, f3, f4) config_map_write = StoreConfigMap.from_config( StoreConfig(include_index=True, include_columns=True)) with temp_file('.sqlite') as fp: st1 = StoreSQLite(fp) st1.write(((f.name, f) for f in frames), config=config_map_write) labels = tuple( st1.labels()) # this will read from file, not in memory self.assertEqual(tuple(f.name for f in frames), labels) config_map_read: tp.Dict[tp.Hashable, StoreConfig] = {} for i, name in enumerate(labels): f_src = frames[i] c = StoreConfig(index_depth=f_src.index.depth, columns_depth=f_src.columns.depth) config_map_read[name] = c for i, f_loaded in enumerate( st1.read_many(labels, config=config_map_read)): f_src = frames[i] self.assertEqualFrames(f_src, f_loaded, compare_dtype=False)
def __init__(self, series: Series, *, store: tp.Optional[Store] = None, config: StoreConfigMapInitializer = None, max_persist: tp.Optional[int] = None, own_data: bool = False, ): ''' Default Bus constructor. {args} ''' if series.dtype != DTYPE_OBJECT: raise ErrorInitBus( f'Series passed to initializer must have dtype object, not {series.dtype}') if max_persist is not None: # use an (ordered) dictionary to give use an ordered set, simply pointing to None for all keys self._last_accessed: tp.Dict[str, None] = {} # do a one time iteration of series def gen() -> tp.Iterator[bool]: for label, value in series.items(): if isinstance(value, Frame): if max_persist is not None: self._last_accessed[label] = None yield True elif value is FrameDeferred: yield False else: raise ErrorInitBus(f'supplied {value.__class__} is not a Frame or FrameDeferred.') self._loaded = np.fromiter(gen(), dtype=DTYPE_BOOL, count=len(series)) self._loaded_all = self._loaded.all() if own_data: self._values_mutable = series.values self._values_mutable.flags.writeable = True else: self._values_mutable = series.values.copy() self._index = series._index self._name = series._name self._store = store # Not handling cases of max_persist being greater than the length of the Series (might floor to length) if max_persist is not None and max_persist < self._loaded.sum(): raise ErrorInitBus('max_persist cannot be less than the number of already loaded Frames') self._max_persist = max_persist # providing None will result in default; providing a StoreConfig or StoreConfigMap will return an appropriate map self._config = StoreConfigMap.from_initializer(config)
def test_store_xlsx_write_a(self) -> None: f1 = Frame.from_dict(dict(x=(1, 2, -5, 200), y=(3, 4, -5, -3000)), index=IndexHierarchy.from_product(('I', 'II'), ('a', 'b')), name='f1') f2 = Frame.from_dict(dict(a=(1, 2, 3), b=(4, 5, 6)), index=('x', 'y', 'z'), name='f2') f3 = Frame.from_records(((10, 20, 50, 60), (50.0, 60.4, -50, -60)), index=('p', 'q'), columns=IndexHierarchy.from_product( ('I', 'II'), ('a', 'b')), name='f3') f4 = Frame.from_records( ( (10, 20, 50, False, 10, 20, 50, False), (50.0, 60.4, -50, True, 50.0, 60.4, -50, True), (234, 44452, 0, False, 234, 44452, 0, False), (4, -4, 2000, True, 4, -4, 2000, True), (10, 20, 50, False, 10, 20, 50, False), (50.0, 60.4, -50, True, 50.0, 60.4, -50, True), (234, 44452, 0, False, 234, 44452, 0, False), (4, -4, 2000, True, 4, -4, 2000, True), ), index=IndexHierarchy.from_product( ('top', 'bottom'), ('far', 'near'), ('left', 'right')), columns=IndexHierarchy.from_product(('I', 'II'), ('a', 'b'), (1, 2)), name='f4') frames = (f1, f2, f3, f4) config_map = StoreConfigMap.from_config( StoreConfig(include_index=True, include_columns=True)) with temp_file('.xlsx') as fp: st1 = StoreXLSX(fp) st1.write(((f.name, f) for f in frames), config=config_map) # import ipdb; ipdb.set_trace() sheet_names = tuple( st1.labels()) # this will read from file, not in memory self.assertEqual(tuple(f.name for f in frames), sheet_names) for i, name in enumerate(sheet_names): f_src = frames[i] c = StoreConfig(index_depth=f_src.index.depth, columns_depth=f_src.columns.depth) f_loaded = st1.read(name, config=c) self.assertEqualFrames(f_src, f_loaded, check_dtypes=False)
def labels( self, *, config: StoreConfigMapInitializer = None, strip_ext: bool = True, ) -> tp.Iterator[tp.Hashable]: config_map = StoreConfigMap.from_initializer(config) with zipfile.ZipFile(self._fp) as zf: for name in zf.namelist(): if strip_ext: name = name.replace(self._EXT_CONTAINED, '') # always use default decoder yield config_map.default.label_decode(name)
def labels( self, *, config: StoreConfigMapInitializer = None, strip_ext: bool = True, ) -> tp.Iterator[tp.Hashable]: config_map = StoreConfigMap.from_initializer(config) with sqlite3.connect(self._fp) as conn: cursor = conn.cursor() cursor.execute( "SELECT name FROM sqlite_master WHERE type='table';") for row in cursor: yield config_map.default.label_decode(row[0])
def test_bus_update_series_cache_iloc(self) -> None: f1 = Frame.from_dict(dict(a=(1, 2), b=(3, 4)), index=('x', 'y'), name='foo') config = StoreConfigMap.from_config(StoreConfig(index_depth=1)) # simulating a Bus with a FrameDefferred but no Store, just for testing s1 = Series((f1, FrameDeferred), index=('p', 'q')) b1 = Bus(s1, config=config) self.assertFalse(b1._loaded_all) with self.assertRaises(RuntimeError): b1._update_series_cache_iloc(1)
def labels( self, *, config: StoreConfigMapInitializer = None, strip_ext: bool = True, ) -> tp.Iterator[tp.Hashable]: config_map = StoreConfigMap.from_initializer(config) wb = self._load_workbook(self._fp) labels = tuple(wb.sheetnames) wb.close() for label in labels: yield config_map.default.label_decode(label)
def __init__(self, series: Series, *, store: tp.Optional[Store] = None, config: StoreConfigMapInitializer = None, max_persist: tp.Optional[int] = None, ): ''' Args: config: StoreConfig for handling :obj:`Frame` construction and exporting from Store. max_persist: When loading :obj:`Frame` from a :obj:`Store`, optionally define the maximum number of :obj:`Frame` to remain in the :obj:`Bus`, regardless of the size of the :obj:`Bus`. If more than ``max_persist`` number of :obj:`Frame` are loaded, least-recently loaded :obj:`Frame` will be replaced by ``FrameDeferred``. A ``max_persist`` of 1, for example, permits reading one :obj:`Frame` at a time without ever holding in memory more than 1 :obj:`Frame`. ''' if series.dtype != DTYPE_OBJECT: raise ErrorInitBus( f'Series passed to initializer must have dtype object, not {series.dtype}') if max_persist is not None: self._last_accessed: tp.Dict[str, None] = {} # do a one time iteration of series def gen() -> tp.Iterator[bool]: for label, value in series.items(): if not isinstance(label, str): raise ErrorInitBus(f'supplied label {label} is not a string.') if isinstance(value, Frame): if max_persist is not None: self._last_accessed[label] = None yield True elif value is FrameDeferred: yield False else: raise ErrorInitBus(f'supplied {value.__class__} is not a Frame or FrameDeferred.') self._loaded = np.fromiter(gen(), dtype=DTYPE_BOOL, count=len(series)) self._loaded_all = self._loaded.all() self._series = series self._store = store # max_persist might be less than the number of Frames already loaded if max_persist is not None: self._max_persist = max(max_persist, self._loaded.sum()) else: self._max_persist = None # providing None will result in default; providing a StoreConfig or StoreConfigMap will return an appropriate map self._config = StoreConfigMap.from_initializer(config)
def test_store_hdf5_write_a(self) -> None: f1 = Frame.from_dict(dict(x=(1, 2, -5, 200), y=(3, 4, -5, -3000)), index=IndexHierarchy.from_product(('I', 'II'), ('a', 'b')), name='f1') f2 = Frame.from_dict(dict(a=(1, 2, 3), b=(4, 5, 6)), index=('x', 'y', 'z'), name='f2') f3 = Frame.from_records(((10, 20, 50, 60), (50.0, 60.4, -50, -60)), index=('p', 'q'), columns=IndexHierarchy.from_product( ('I', 'II'), ('a', 'b')), name='f3') f4 = Frame.from_records( ( (10, 20, 50, False, 10, 20, 50, False), (50.0, 60.4, -50, True, 50.0, 60.4, -50, True), (234, 44452, 0, False, 234, 44452, 0, False), (4, -4, 2000, True, 4, -4, 2000, True), (10, 20, 50, False, 10, 20, 50, False), (50.0, 60.4, -50, True, 50.0, 60.4, -50, True), (234, 44452, 0, False, 234, 44452, 0, False), (4, -4, 2000, True, 4, -4, 2000, True), ), index=IndexHierarchy.from_product( ('top', 'bottom'), ('far', 'near'), ('left', 'right')), columns=IndexHierarchy.from_product(('I', 'II'), ('a', 'b'), (1, 2)), name='f4') frames = (f1, f2, f3, f4) config = StoreConfigMap.from_frames(frames) with temp_file('.hdf5') as fp: st1 = StoreHDF5(fp) st1.write(((f.name, f) for f in frames), config=config) labels = tuple( st1.labels()) # this will read from file, not in memory self.assertEqual(tuple(f.name for f in frames), labels) for i, name in enumerate(labels): f_src = frames[i] c = config[f_src.name] f_loaded = st1.read(name, config=c) self.assertEqualFrames(f_src, f_loaded)
def labels(self, *, config: StoreConfigMapInitializer = None, strip_ext: bool = True, ) -> tp.Iterator[tp.Hashable]: ''' Iterator of labels. ''' import tables config_map = StoreConfigMap.from_initializer(config) with tables.open_file(self._fp, mode='r') as file: for node in file.iter_nodes(where='/', classname=tables.Table.__name__): # NOTE: this is not the complete path yield config_map.default.label_decode(node.name)