class Bus(ContainerBase): __slots__ = ('_series', '_store') _series: Series _store: tp.Optional[Store] @staticmethod def _deferred_series(labels: tp.Iterable[str]) -> Series: # make an object dtype return Series(FrameDeferred, index=labels, dtype=object) @classmethod def from_frames(cls, frames: tp.Iterable[Frame]) -> 'Bus': '''Return a ``Bus`` from an iterable of ``Frame``; labels will be drawn from :obj:`Frame.name`. ''' series = Series.from_items(((f.name, f) for f in frames), dtype=object) return cls(series) #--------------------------------------------------------------------------- # constructors by data format @classmethod def from_zip_tsv(cls, fp: PathSpecifier) -> 'Bus': store = StoreZipTSV(fp) return cls(cls._deferred_series(store.labels()), store=store) @classmethod def from_zip_csv(cls, fp: PathSpecifier) -> 'Bus': store = StoreZipCSV(fp) return cls(cls._deferred_series(store.labels()), store=store) @classmethod def from_zip_pickle(cls, fp: PathSpecifier) -> 'Bus': store = StoreZipPickle(fp) return cls(cls._deferred_series(store.labels()), store=store) @classmethod def from_xlsx(cls, fp: PathSpecifier) -> 'Bus': store = StoreXLSX(fp) return cls(cls._deferred_series(store.labels()), store=store) @classmethod def from_sqlite(cls, fp: PathSpecifier) -> 'Bus': store = StoreSQLite(fp) return cls(cls._deferred_series(store.labels()), store=store) @classmethod def from_hdf5(cls, fp: PathSpecifier) -> 'Bus': store = StoreHDF5(fp) return cls(cls._deferred_series(store.labels()), store=store) #--------------------------------------------------------------------------- def __init__(self, series: Series, *, store: tp.Optional[Store] = None): if series.dtype != DTYPE_OBJECT: raise ErrorInitBus( f'Series passed to initializer must have dtype object, not {series.dtype}' ) # do a one time iteration of series def gen() -> tp.Iterator[bool]: for label, value in series.items(): if not isinstance(label, str): raise ErrorInitBus( f'supplied label {label} is not a string.') if isinstance(value, Frame): yield True elif value is FrameDeferred: yield False else: raise ErrorInitBus( f'supplied {value.__class__} is not a Frame or FrameDeferred.' ) self._loaded = np.fromiter(gen(), dtype=DTYPE_BOOL, count=len(series)) self._loaded_all = self._loaded.all() self._series = series self._store = store #--------------------------------------------------------------------------- # delegation def __getattr__(self, name: str) -> tp.Any: if name == 'interface': return getattr(self.__class__, 'interface') try: return getattr(self._series, name) except AttributeError: # fix the attribute error to reference the Bus raise AttributeError( f"'{self.__class__.__name__}' object has no attribute '{name}'" ) #--------------------------------------------------------------------------- # cache management def _iloc_to_labels(self, key: GetItemKeyType) -> np.ndarray: ''' Given a get-item key, translate to an iterator of loc positions. ''' if isinstance(key, int): return [ self.index.values[key], ] # needs to be a list for usage in loc assignment return self.index.values[key] def _update_series_cache_iloc(self, key: GetItemKeyType) -> None: ''' Update the Series cache with the key specified, where key can be any iloc GetItemKeyType. ''' # do nothing if all loaded, or if the requested keys are already loadsed if not self._loaded_all and not self._loaded[key].all(): if self._store is None: raise RuntimeError('no store defined') labels = set(self._iloc_to_labels(key)) array = np.empty(shape=len(self._series._index), dtype=object) for idx, (label, frame) in enumerate(self._series.items()): if frame is FrameDeferred and label in labels: frame = self._store.read(label) self._loaded[idx] = True # update loaded status array[idx] = frame array.flags.writeable = False self._series = Series(array, index=self._series._index, dtype=object) self._loaded_all = self._loaded.all() def _update_series_cache_all(self) -> None: '''Load all Tables contained in this Bus. ''' if not self._loaded_all: self._update_series_cache_iloc(NULL_SLICE) #--------------------------------------------------------------------------- # extraction def _extract_iloc(self, key: GetItemKeyType) -> 'Bus': self._update_series_cache_iloc(key=key) # iterable selection should be handled by NP values = self._series.values[key] if not isinstance(values, np.ndarray): # if we have a single element return values series = Series( values, index=self._series._index.iloc[key], # type: ignore name=self._name) return self.__class__(series=series, store=self._store) def _extract_loc(self, key: GetItemKeyType) -> 'Bus': iloc_key = self._series._index.loc_to_iloc(key) #type: ignore # NOTE: if we update before slicing, we change the local and the object handed back self._update_series_cache_iloc(key=iloc_key) values = self._series.values[iloc_key] if not isinstance(values, np.ndarray): # if we have a single element if isinstance(key, HLoc) and key.has_key_multiple(): # must return a Series, even though we do not have an array values = np.array(values) values.flags.writeable = False else: return values series = Series( values, index=self._series._index.iloc[iloc_key], #type: ignore own_index=True, name=self._name) return self.__class__(series=series, store=self._store) @doc_inject(selector='selector') def __getitem__(self, key: GetItemKeyType) -> 'Bus': '''Selector of values by label. Args: key: {key_loc} ''' return self._extract_loc(key) #--------------------------------------------------------------------------- # interfaces @property def loc(self) -> InterfaceGetItem[TContainer]: return InterfaceGetItem(self._extract_loc) # type: ignore @property def iloc(self) -> InterfaceGetItem[TContainer]: return InterfaceGetItem(self._extract_iloc) # type: ignore # --------------------------------------------------------------------------- def __reversed__(self) -> tp.Iterator[tp.Hashable]: return reversed(self._series._index) def __len__(self) -> int: return self._series.__len__() #--------------------------------------------------------------------------- # dictionary-like interface def items(self) -> tp.Iterator[tp.Tuple[tp.Any, tp.Any]]: '''Iterator of pairs of index label and value. ''' self._update_series_cache_all() yield from self._series.items() @property def values(self) -> np.ndarray: '''A 1D array of values. ''' self._update_series_cache_all() return self._seires.values #--------------------------------------------------------------------------- def display(self, config: tp.Optional[DisplayConfig] = None) -> Display: '''Return a Display of the Bus. ''' # NOTE: the key change is providing the Bus as the displayed class config = config or DisplayActive.get() d = Display([], config=config, outermost=True, index_depth=1, header_depth=2) # series and index header display_index = self._index.display(config=config) d.extend_display(display_index) d.extend_display( Display.from_values( self._series.values, # do not force loading with self.values header='', config=config)) display_cls = Display.from_values( (), header=DisplayHeader(self.__class__, self._series._name), config=config) d.insert_displays(display_cls.flatten()) return d #--------------------------------------------------------------------------- # extended disciptors @property def mloc(self) -> Series: '''Returns a Series of tuples of dtypes, one for each loaded Frame. ''' if not self._loaded.any(): return Series(None, index=self._series._index) def gen() -> tp.Iterator[tp.Tuple[tp.Hashable, tp.Optional[tp.Tuple[ int, ...]]]]: for label, f in zip(self._series._index, self._series.values): if f is FrameDeferred: yield label, None else: yield label, tuple(f.mloc) return Series.from_items(gen()) @property def dtypes(self) -> Frame: '''Returns a Frame of dtypes for all loaded Frames. ''' if not self._loaded.any(): return Frame(index=self._series.index) f = Frame.from_concat( frames=(f.dtypes for f in self._series.values if f is not FrameDeferred), fill_value=None, ).reindex(index=self._series.index, fill_value=None) return tp.cast(Frame, f) @property def shapes(self) -> Series: '''A :obj:`Series` describing the shape of each loaded :obj:`Frame`. Returns: :obj:`tp.Tuple[int]` ''' values = (f.shape if f is not FrameDeferred else None for f in self._series.values) return Series(values, index=self._series._index, dtype=object, name='shape') @property def nbytes(self) -> int: '''Total bytes of data currently loaded in the Bus. ''' return sum(f.nbytes if f is not FrameDeferred else 0 for f in self._series.values) @property def status(self) -> Frame: ''' Return a ''' def gen() -> tp.Iterator[Series]: yield Series(self._loaded, index=self._series._index, dtype=DTYPE_BOOL, name='loaded') for attr, dtype, missing in (('size', DTYPE_FLOAT_DEFAULT, np.nan), ('nbytes', DTYPE_FLOAT_DEFAULT, np.nan), ('shape', DTYPE_OBJECT, None)): values = (getattr(f, attr) if f is not FrameDeferred else missing for f in self._series.values) yield Series(values, index=self._series._index, dtype=dtype, name=attr) return tp.cast(Frame, Frame.from_concat(gen(), axis=1)) #--------------------------------------------------------------------------- # exporters def to_zip_tsv(self, fp: PathSpecifier) -> None: store = StoreZipTSV(fp) store.write(self.items()) def to_zip_csv(self, fp: PathSpecifier) -> None: store = StoreZipCSV(fp) store.write(self.items()) def to_zip_pickle(self, fp: PathSpecifier) -> None: store = StoreZipPickle(fp) store.write(self.items()) def to_xlsx(self, fp: PathSpecifier) -> None: store = StoreXLSX(fp) store.write(self.items()) def to_sqlite(self, fp: PathSpecifier) -> None: store = StoreSQLite(fp) store.write(self.items()) def to_hdf5(self, fp: PathSpecifier) -> None: store = StoreHDF5(fp) store.write(self.items())
class Bus(ContainerBase, StoreClientMixin): # not a ContainerOperand ''' A lazy, randomly-accessible container of :obj:`Frame`. ''' __slots__ = ( '_loaded', '_loaded_all', '_series', '_store', '_config', ) _series: Series _store: tp.Optional[Store] _config: StoreConfigMap STATIC = False @staticmethod def _deferred_series(labels: tp.Iterable[str]) -> Series: ''' Return an object ``Series`` of ``FrameDeferred`` objects, based on the passed in ``labels``. ''' return Series.from_element(FrameDeferred, index=labels, dtype=object) @classmethod def from_frames(cls, frames: tp.Iterable[Frame], *, config: StoreConfigMapInitializer = None, name: NameType = None, ) -> 'Bus': '''Return a :obj:`Bus` from an iterable of :obj:`Frame`; labels will be drawn from :obj:`Frame.name`. ''' # could take a StoreConfigMap series = Series.from_items( ((f.name, f) for f in frames), dtype=object, name=name, ) return cls(series, config=config) @classmethod def _from_store(cls, store: Store, config: StoreConfigMapInitializer = None ) -> 'Bus': return cls(cls._deferred_series(store.labels()), store=store, config=config ) #--------------------------------------------------------------------------- def __init__(self, series: Series, *, store: tp.Optional[Store] = None, config: StoreConfigMapInitializer = None ): ''' Args: config: StoreConfig for handling ``Frame`` construction and exporting from Store. ''' if series.dtype != DTYPE_OBJECT: raise ErrorInitBus( f'Series passed to initializer must have dtype object, not {series.dtype}') # do a one time iteration of series def gen() -> tp.Iterator[bool]: for label, value in series.items(): if not isinstance(label, str): raise ErrorInitBus(f'supplied label {label} is not a string.') if isinstance(value, Frame): yield True elif value is FrameDeferred: yield False else: raise ErrorInitBus(f'supplied {value.__class__} is not a Frame or FrameDeferred.') self._loaded = np.fromiter(gen(), dtype=DTYPE_BOOL, count=len(series)) self._loaded_all = self._loaded.all() self._series = series self._store = store # providing None will result in default; providing a StoreConfig or StoreConfigMap will return an appropriate map self._config = StoreConfigMap.from_initializer(config) #--------------------------------------------------------------------------- # delegation def __getattr__(self, name: str) -> tp.Any: try: return getattr(self._series, name) except AttributeError: # fix the attribute error to reference the Bus raise AttributeError(f"'{self.__class__.__name__}' object has no attribute '{name}'") #--------------------------------------------------------------------------- # cache management def _iloc_to_labels(self, key: GetItemKeyType ) -> np.ndarray: ''' Given a get-item key, translate to an iterator of loc positions. ''' if isinstance(key, int): return [self.index.values[key],] # needs to be a list for usage in loc assignment return self.index.values[key] def _update_series_cache_iloc(self, key: GetItemKeyType) -> None: ''' Update the Series cache with the key specified, where key can be any iloc GetItemKeyType. ''' # do nothing if all loaded, or if the requested keys are already loadsed if not self._loaded_all and not self._loaded[key].all(): if self._store is None: raise RuntimeError('no store defined') labels = set(self._iloc_to_labels(key)) array = np.empty(shape=len(self._series._index), dtype=object) for idx, (label, frame) in enumerate(self._series.items()): if frame is FrameDeferred and label in labels: frame = self._store.read(label, config=self._config[label]) self._loaded[idx] = True # update loaded status array[idx] = frame array.flags.writeable = False self._series = Series(array, index=self._series._index, dtype=object) self._loaded_all = self._loaded.all() def _update_series_cache_all(self) -> None: '''Load all Tables contained in this Bus. ''' if not self._loaded_all: self._update_series_cache_iloc(NULL_SLICE) #--------------------------------------------------------------------------- # extraction def _extract_iloc(self, key: GetItemKeyType) -> 'Bus': self._update_series_cache_iloc(key=key) # iterable selection should be handled by NP values = self._series.values[key] if not isinstance(values, np.ndarray): # if we have a single element return values #type: ignore series = Series( values, index=self._series._index.iloc[key], name=self._series._name) return self.__class__(series=series, store=self._store, config=self._config, ) def _extract_loc(self, key: GetItemKeyType) -> 'Bus': iloc_key = self._series._index.loc_to_iloc(key) # NOTE: if we update before slicing, we change the local and the object handed back self._update_series_cache_iloc(key=iloc_key) values = self._series.values[iloc_key] if not isinstance(values, np.ndarray): # if we have a single element # NOTE: only support str labels, not IndexHierarchy # if isinstance(key, HLoc) and key.has_key_multiple(): # values = np.array(values) # values.flags.writeable = False return values #type: ignore series = Series(values, index=self._series._index.iloc[iloc_key], own_index=True, name=self._series._name) return self.__class__(series=series, store=self._store, config=self._config, ) @doc_inject(selector='selector') def __getitem__(self, key: GetItemKeyType) -> 'Bus': '''Selector of values by label. Args: key: {key_loc} ''' return self._extract_loc(key) #--------------------------------------------------------------------------- # interfaces @property def loc(self) -> InterfaceGetItem['Bus']: return InterfaceGetItem(self._extract_loc) @property def iloc(self) -> InterfaceGetItem['Bus']: return InterfaceGetItem(self._extract_iloc) # --------------------------------------------------------------------------- def __reversed__(self) -> tp.Iterator[tp.Hashable]: return reversed(self._series._index) #type: ignore def __len__(self) -> int: return self._series.__len__() #--------------------------------------------------------------------------- # dictionary-like interface def items(self) -> tp.Iterator[tp.Tuple[str, tp.Any]]: '''Iterator of pairs of index label and value. ''' self._update_series_cache_all() yield from self._series.items() @property def values(self) -> np.ndarray: '''A 1D array of values. ''' self._update_series_cache_all() return self._series.values #--------------------------------------------------------------------------- @doc_inject() def display(self, config: tp.Optional[DisplayConfig] = None ) -> Display: '''{doc} Args: {config} ''' # NOTE: the key change over serires is providing the Bus as the displayed class config = config or DisplayActive.get() display_cls = Display.from_values((), header=DisplayHeader(self.__class__, self._series._name), config=config) return self._series._display(config, display_cls) #--------------------------------------------------------------------------- # extended disciptors @property def mloc(self) -> Series: '''Returns a Series of tuples of dtypes, one for each loaded Frame. ''' if not self._loaded.any(): return Series.from_element(None, index=self._series._index) def gen() -> tp.Iterator[tp.Tuple[tp.Hashable, tp.Optional[tp.Tuple[int, ...]]]]: for label, f in zip(self._series._index, self._series.values): if f is FrameDeferred: yield label, None else: yield label, tuple(f.mloc) return Series.from_items(gen()) @property def dtypes(self) -> Frame: '''Returns a Frame of dtypes for all loaded Frames. ''' if not self._loaded.any(): return Frame(index=self._series.index) f = Frame.from_concat( frames=(f.dtypes for f in self._series.values if f is not FrameDeferred), fill_value=None, ).reindex(index=self._series.index, fill_value=None) return tp.cast(Frame, f) @property def shapes(self) -> Series: '''A :obj:`Series` describing the shape of each loaded :obj:`Frame`. Returns: :obj:`tp.Tuple[int]` ''' values = (f.shape if f is not FrameDeferred else None for f in self._series.values) return Series(values, index=self._series._index, dtype=object, name='shape') @property def nbytes(self) -> int: '''Total bytes of data currently loaded in the Bus. ''' return sum(f.nbytes if f is not FrameDeferred else 0 for f in self._series.values) @property def status(self) -> Frame: ''' Return a ''' def gen() -> tp.Iterator[Series]: yield Series(self._loaded, index=self._series._index, dtype=DTYPE_BOOL, name='loaded') for attr, dtype, missing in ( ('size', DTYPE_FLOAT_DEFAULT, np.nan), ('nbytes', DTYPE_FLOAT_DEFAULT, np.nan), ('shape', DTYPE_OBJECT, None) ): values = (getattr(f, attr) if f is not FrameDeferred else missing for f in self._series.values) yield Series(values, index=self._series._index, dtype=dtype, name=attr) return tp.cast(Frame, Frame.from_concat(gen(), axis=1)) #--------------------------------------------------------------------------- @doc_inject() def equals(self, other: tp.Any, *, compare_name: bool = False, compare_dtype: bool = False, compare_class: bool = False, skipna: bool = True, ) -> bool: ''' {doc} Args: {compare_name} {compare_dtype} {compare_class} {skipna} ''' if id(other) == id(self): return True if compare_class and self.__class__ != other.__class__: return False elif not isinstance(other, Bus): return False # defer updating cache self._update_series_cache_all() if len(self._series) != len(other._series): return False if compare_name and self._series._name != other._series._name: return False # NOTE: dtype self._series is always object if not self._series.index.equals( other._series.index, compare_name=compare_name, compare_dtype=compare_dtype, compare_class=compare_class, skipna=skipna, ): return False # can zip because length of Series already match for (frame_self, frame_other) in zip( self._series.values, other._series.values): if not frame_self.equals(frame_other, compare_name=compare_name, compare_dtype=compare_dtype, compare_class=compare_class, skipna=skipna, ): return False return True