def _axis_array(self, axis: int) -> tp.Iterator[np.ndarray]: '''Generator of arrays across an axis Args: axis: 0 iterates over columns, 1 iterates over rows ''' extractor = get_extractor( self._deepcopy_from_bus, is_array=True, memo_active=False, ) if axis == 1: # iterate over rows if self._axis == 0: # bus components aligned vertically for _, component in self._bus.items(): for array in component._blocks.axis_values(axis): yield extractor(array) else: # bus components aligned horizontally raise NotImplementedAxis() elif axis == 0: # iterate over columns if self._axis == 1: # bus components aligned horizontally for _, component in self._bus.items(): for array in component._blocks.axis_values(axis): yield extractor(array) else: # bus components aligned horizontally raise NotImplementedAxis() else: raise AxisInvalid(f'no support for axis {axis}')
def _axis_tuple(self, *, axis: int, constructor: tp.Optional[tp.Type[tp.NamedTuple]] = None, ) -> tp.Iterator[tp.NamedTuple]: '''Generator of named tuples across an axis. Args: axis: 0 iterates over columns (index axis), 1 iterates over rows (column axis) ''' if constructor is None: if axis == 1: labels = self._columns.values elif axis == 0: labels = self._index.values else: raise AxisInvalid(f'no support for axis {axis}') # uses _make method to call with iterable constructor = get_tuple_constructor(labels) #type: ignore elif (isinstance(constructor, type) and issubclass(constructor, tuple) and hasattr(constructor, '_make')): constructor = constructor._make #type: ignore assert constructor is not None for axis_values in self._axis_array(axis): yield constructor(axis_values)
def bus_to_hierarchy( bus: tp.Union[Bus, 'Yarn'], axis: int, deepcopy_from_bus: bool, init_exception_cls: tp.Type[Exception], ) -> tp.Tuple[IndexHierarchy, IndexBase]: ''' Given a :obj:`Bus` and an axis, derive a :obj:`IndexHierarchy`; also return and validate the :obj:`Index` of the opposite axis. ''' # NOTE: need to extract just axis labels, not the full Frame; need new Store/Bus loaders just for label data extractor = get_extractor(deepcopy_from_bus, is_array=False, memo_active=False) def tree_extractor(index: IndexBase) -> tp.Union[IndexBase, TreeNodeT]: index = extractor(index) if isinstance(index, IndexHierarchy): return index.to_tree() return index tree: TreeNodeT = {} opposite: tp.Optional[IndexBase] = None for label, f in bus.items(): if axis == 0: tree[label] = tree_extractor(f.index) if opposite is None: opposite = extractor(f.columns) else: if not opposite.equals(f.columns): raise init_exception_cls( 'opposite axis must have equivalent indices') elif axis == 1: tree[label] = tree_extractor(f.columns) if opposite is None: opposite = extractor(f.index) else: if not opposite.equals(f.index): raise init_exception_cls( 'opposite axis must have equivalent indices') else: raise AxisInvalid(f'invalid axis {axis}') # NOTE: we could try to collect index constructors by using the index of the Bus and observing the inidices of the contained Frames, but it is not clear that will be better then using IndexAutoConstructorFactory return IndexHierarchy.from_tree( tree, index_constructors=IndexAutoConstructorFactory ), opposite # type: ignore
def values() -> tp.Iterator[Frame]: nonlocal opposite for start, end in zip_longest(starts, ends, fillvalue=vector_len): if axis == 0: # along rows f = frame.iloc[start:end] label = label_extractor(f.index) #type: ignore axis_map_components[label] = f.index if opposite is None: opposite = f.columns elif axis == 1: # along columns f = frame.iloc[:, start:end] label = label_extractor(f.columns) #type: ignore axis_map_components[label] = f.columns if opposite is None: opposite = f.index else: raise AxisInvalid(f'invalid axis {axis}') yield f.rename(label)
def apex_to_name( rows: tp.Sequence[tp.Sequence[tp.Hashable]], depth_level: tp.Optional[DepthLevelSpecifier], axis: int, # 0 is by row (for index, 1 is by column (for columns) axis_depth: int, ) -> NameType: ''' Utility for translating apex values (the upper left corner created be index/columns) in the appropriate name. ''' if depth_level is None: return None if axis == 0: if isinstance(depth_level, INT_TYPES): row = rows[depth_level] if axis_depth == 1: # return a single label return row[0] if row[0] != '' else None else: return tuple(row) else: # its a list selection targets = [rows[level] for level in depth_level] # combine into tuples if axis_depth == 1: return next(zip(*targets)) else: return tuple(zip(*targets)) elif axis == 1: if isinstance(depth_level, INT_TYPES): # depth_level refers to position in inner row row = [r[depth_level] for r in rows] if axis_depth == 1: # return a single label return row[0] if row[0] != '' else None else: return tuple(row) else: # its a list selection targets = (tuple(row[level] for level in depth_level) for row in rows) #type: ignore # combine into tuples if axis_depth == 1: return next(targets) #type: ignore else: return tuple(targets) raise AxisInvalid(f'invalid axis: {axis}')
def from_frames( self, frames: tp.Iterable['Frame'], *, include_index: bool = True, include_columns: bool = True, axis: int = 0, union: bool = True, name: NameType = None, fill_value: object = np.nan, ) -> None: '''Given an iterable of Frames, write out an NPZ or NPY directly, without building up an intermediary Frame. If axis 0, the Frames must be block compatible; if axis 1, the Frames must have the same number of rows. For both axis, if included, concatenated indices must be unique or aligned. Args: frames: * include_index: include_columns: axis: union: name: fill_value: ''' if not self._writeable: raise UnsupportedOperation('Open with mode "w" to write.') from static_frame.core.type_blocks import TypeBlocks from static_frame.core.frame import Frame frames = [ f if isinstance(f, Frame) else f.to_frame(axis) for f in frames ] # type: ignore # NOTE: based on Frame.from_concat if axis == 1: # stacks columns (extends rows horizontally) if include_columns: try: columns = index_many_concat( (f._columns for f in frames), Index, ) except ErrorInitIndexNonUnique: raise RuntimeError( 'Column names after horizontal concatenation are not unique; set include_columns to None to ignore.' ) else: columns = None if include_index: index = index_many_set( (f._index for f in frames), Index, union=union, ) else: raise RuntimeError( 'Must include index for horizontal alignment.') def blocks() -> tp.Iterator[np.ndarray]: for f in frames: if len(f.index) != len(index) or (f.index != index).any(): f = f.reindex(index=index, fill_value=fill_value) for block in f._blocks._blocks: yield block elif axis == 0: # stacks rows (extends columns vertically) if include_index: try: index = index_many_concat((f._index for f in frames), Index) except ErrorInitIndexNonUnique: raise RuntimeError( 'Index names after vertical concatenation are not unique; set include_index to None to ignore' ) else: index = None if include_columns: columns = index_many_set( (f._columns for f in frames), Index, union=union, ) else: raise RuntimeError( 'Must include columns for vertical alignment.') def blocks() -> tp.Iterator[np.ndarray]: type_blocks = [] previous_f: tp.Optional[Frame] = None block_compatible = True reblock_compatible = True for f in frames: if len(f.columns) != len(columns) or (f.columns != columns).any(): f = f.reindex(columns=columns, fill_value=fill_value) type_blocks.append(f._blocks) # column size is all the same by this point if previous_f is not None: # after the first if block_compatible: block_compatible &= f._blocks.block_compatible( previous_f._blocks, axis=1) # only compare columns if reblock_compatible: reblock_compatible &= f._blocks.reblock_compatible( previous_f._blocks) previous_f = f yield from TypeBlocks.vstack_blocks_to_blocks( type_blocks=type_blocks, block_compatible=block_compatible, reblock_compatible=reblock_compatible, ) else: raise AxisInvalid(f'no support for {axis}') self.from_arrays( blocks=blocks(), index=index, columns=columns, name=name, axis=1, # blocks are normalized for horizontal concat )
def from_arrays( self, blocks: tp.Iterable[np.ndarray], *, index: tp.Optional[IndexInitializer] = None, columns: tp.Optional[IndexInitializer] = None, name: NameType = None, axis: int = 0, ) -> None: ''' Given an iterable of arrays, write out an NPZ or NPY directly, without building up intermediary :obj:`Frame`. If axis 0, the arrays are vertically stacked; if axis 1, they are horizontally stacked. For both axis, if included, indices must be of appropriate length. Args: blocks: *, index: An array, :obj:`Index`, or :obj:`IndexHierarchy`. columns: An array, :obj:`Index`, or :obj:`IndexHierarchy`. name: axis: ''' if not self._writeable: raise UnsupportedOperation('Open with mode "w" to write.') metadata: tp.Dict[str, tp.Any] = {} if isinstance(index, IndexBase): depth_index = index.depth name_index = index.name cls_index = index.__class__ ArchiveIndexConverter.index_encode( metadata=metadata, archive=self._archive, index=index, key_template_values=Label.FILE_TEMPLATE_VALUES_INDEX, key_types=Label.KEY_TYPES_INDEX, depth=depth_index, include=True, ) elif index is not None: if index.__class__ is not np.ndarray: raise RuntimeError( 'index argument must be an Index, IndexHierarchy, or 1D np.ndarray' ) depth_index = 1 name_index = None cls_index = dtype_to_index_cls(True, index.dtype) #type: ignore ArchiveIndexConverter.array_encode( metadata=metadata, archive=self._archive, array=index, key_template_values=Label.FILE_TEMPLATE_VALUES_INDEX, ) else: depth_index = 1 name_index = None cls_index = Index if isinstance(columns, IndexBase): depth_columns = columns.depth name_columns = columns.name cls_columns = columns.__class__ ArchiveIndexConverter.index_encode( metadata=metadata, archive=self._archive, index=columns, key_template_values=Label.FILE_TEMPLATE_VALUES_COLUMNS, key_types=Label.KEY_TYPES_COLUMNS, depth=depth_columns, include=True, ) elif columns is not None: if columns.__class__ is not np.ndarray: raise RuntimeError( 'index argument must be an Index, IndexHierarchy, or 1D np.ndarray' ) depth_columns = 1 # only support 1D name_columns = None cls_columns = dtype_to_index_cls(True, columns.dtype) #type: ignore ArchiveIndexConverter.array_encode( metadata=metadata, archive=self._archive, array=columns, key_template_values=Label.FILE_TEMPLATE_VALUES_COLUMNS, ) else: depth_columns = 1 # only support 1D name_columns = None cls_columns = Index metadata[Label.KEY_NAMES] = [ name, name_index, name_columns, ] # do not store Frame class as caller will determine metadata[Label.KEY_TYPES] = [ cls_index.__name__, cls_columns.__name__, ] if axis == 1: rows = 0 for i, array in enumerate(blocks): if not rows: rows = array.shape[0] else: if array.shape[0] != rows: raise RuntimeError('incompatible block shapes') self._archive.write_array(Label.FILE_TEMPLATE_BLOCKS.format(i), array) elif axis == 0: # for now, just vertically concat and write, though this has a 2X memory requirement resolved = concat_resolved(blocks, axis=0) # if this results in an obect array, an exception will be raised self._archive.write_array(Label.FILE_TEMPLATE_BLOCKS.format(0), resolved) i = 0 else: raise AxisInvalid(f'invalid axis {axis}') metadata[Label.KEY_DEPTHS] = [ i + 1, # block count depth_index, depth_columns ] self._archive.write_metadata(metadata)