def read( self, label: tp.Optional[str] = None, *, config: tp.Optional[StoreConfig] = None, container_type: tp.Type[Frame] = Frame, ) -> Frame: ''' Args: {dtypes} ''' import tables if config is None: config = StoreConfig() # get default if config.dtypes: raise NotImplementedError( 'using config.dtypes on HDF5 not yet supported') index_depth = config.index_depth columns_depth = config.columns_depth index_arrays = [] columns_labels = [] with tables.open_file(self._fp, mode='r') as file: table = file.get_node(f'/{label}') colnames = table.cols._v_colnames def blocks() -> tp.Iterator[np.ndarray]: for col_idx, colname in enumerate(colnames): # can also do: table.read(field=colname) array = table.col(colname) if array.dtype.kind in DTYPE_STR_KIND: array = array.astype(str) array.flags.writeable = False if col_idx < index_depth: index_arrays.append(array) continue # only store column labels for those yielded columns_labels.append(colname) yield array if config.consolidate_blocks: data = TypeBlocks.from_blocks( TypeBlocks.consolidate_blocks(blocks())) else: data = TypeBlocks.from_blocks(blocks()) return container_type._from_data_index_arrays_column_labels( data=data, index_depth=index_depth, index_arrays=index_arrays, columns_depth=columns_depth, columns_labels=columns_labels, name=tp.cast(tp.Hashable, label) # not sure why this is necessary )
def read_many(self, labels: tp.Iterable[tp.Hashable], *, config: StoreConfigMapInitializer = None, container_type: tp.Type[Frame] = Frame, ) -> tp.Iterator[Frame]: import tables config_map = StoreConfigMap.from_initializer(config) with tables.open_file(self._fp, mode='r') as file: for label in labels: c = config_map[label] label_encoded = config_map.default.label_encode(label) index_depth = c.index_depth index_constructors = c.index_constructors columns_depth = c.columns_depth columns_constructors = c.columns_constructors consolidate_blocks = c.consolidate_blocks if c.dtypes: raise NotImplementedError('using config.dtypes on HDF5 not yet supported') index_arrays = [] columns_labels = [] table = file.get_node(f'/{label_encoded}') colnames = table.cols._v_colnames def blocks() -> tp.Iterator[np.ndarray]: for col_idx, colname in enumerate(colnames): # can also do: table.read(field=colname) array = table.col(colname) if array.dtype.kind in DTYPE_STR_KINDS: array = array.astype(str) array.flags.writeable = False if col_idx < index_depth: index_arrays.append(array) continue # only store column labels for those yielded columns_labels.append(colname) yield array if consolidate_blocks: data = TypeBlocks.from_blocks(TypeBlocks.consolidate_blocks(blocks())) else: data = TypeBlocks.from_blocks(blocks()) # this will own_data in subsequent constructor call yield container_type._from_data_index_arrays_column_labels( data=data, index_depth=index_depth, index_arrays=index_arrays, index_constructors=index_constructors, columns_depth=columns_depth, columns_labels=columns_labels, columns_constructors=columns_constructors, name=label, )