def wrap_broadcasted(old_arg, new_arg, is_pd=False, new_index=None, new_columns=None): """If the newly brodcasted array was originally a pandas object, make it pandas object again and assign it the newly broadcasted index/columns.""" if is_pd: if checks.is_pandas(old_arg): if new_index is None: # Take index from original pandas object old_index = index_fns.get_index(old_arg, 0) if old_arg.shape[0] == new_arg.shape[0]: new_index = old_index else: new_index = index_fns.repeat_index(old_index, new_arg.shape[0]) if new_columns is None: # Take columns from original pandas object old_columns = index_fns.get_index(old_arg, 1) new_ncols = new_arg.shape[1] if new_arg.ndim == 2 else 1 if len(old_columns) == new_ncols: new_columns = old_columns else: new_columns = index_fns.repeat_index( old_columns, new_ncols) return array_wrapper.ArrayWrapper(index=new_index, columns=new_columns, ndim=new_arg.ndim).wrap(new_arg) return new_arg
def wrap_broadcasted(old_arg, new_arg, is_pd=False, new_index=None, new_columns=None): """If the newly brodcasted array was originally a pandas object, make it pandas object again and assign it the newly broadcast index/columns.""" if is_pd: if checks.is_pandas(old_arg): if new_index is None: # Take index from original pandas object old_index = index_fns.get_index(old_arg, 0) if old_arg.shape[0] == new_arg.shape[0]: new_index = old_index else: new_index = index_fns.repeat_index(old_index, new_arg.shape[0]) if new_columns is None: # Take columns from original pandas object old_columns = index_fns.get_index(old_arg, 1) new_ncols = new_arg.shape[1] if new_arg.ndim == 2 else 1 if len(old_columns) == new_ncols: new_columns = old_columns else: new_columns = index_fns.repeat_index(old_columns, new_ncols) if new_arg.ndim == 2: return pd.DataFrame(new_arg, index=new_index, columns=new_columns) if new_columns is not None and len(new_columns) == 1: name = new_columns[0] if name == 0: name = None else: name = None return pd.Series(new_arg, index=new_index, name=name) return new_arg
def from_obj(cls: tp.Type[ArrayWrapperT], obj: tp.ArrayLike, *args, **kwargs) -> ArrayWrapperT: """Derive metadata from an object.""" pd_obj = to_pd_array(obj) index = index_fns.get_index(pd_obj, 0) columns = index_fns.get_index(pd_obj, 1) ndim = pd_obj.ndim return cls(index, columns, ndim, *args, **kwargs)
def broadcast_to(arg1, arg2, to_pd=None, index_from=None, columns_from=None, **kwargs): """Broadcast `arg1` to `arg2`. Keyword arguments `**kwargs` are passed to `broadcast`. ## Example ```python-repl >>> import numpy as np >>> import pandas as pd >>> from vectorbt.base.reshape_fns import broadcast_to >>> a = np.array([1, 2, 3]) >>> sr = pd.Series([4, 5, 6], index=pd.Index(['x', 'y', 'z']), name='a') >>> broadcast_to(a, sr) x 1 y 2 z 3 Name: a, dtype: int64 >>> broadcast_to(sr, a) array([4, 5, 6]) ``` """ if not checks.is_array(arg1): arg1 = np.asarray(arg1) if not checks.is_array(arg2): arg2 = np.asarray(arg2) if to_pd is None: to_pd = checks.is_pandas(arg2) if to_pd: # Take index and columns from arg2 if index_from is None: index_from = index_fns.get_index(arg2, 0) if columns_from is None: columns_from = index_fns.get_index(arg2, 1) return broadcast(arg1, to_shape=arg2.shape, to_pd=to_pd, index_from=index_from, columns_from=columns_from, **kwargs)
def broadcast_to(arg1: tp.ArrayLike, arg2: tp.ArrayLike, to_pd: tp.Optional[bool] = None, index_from: tp.Optional[IndexFromLike] = None, columns_from: tp.Optional[IndexFromLike] = None, **kwargs) -> BCRT: """Broadcast `arg1` to `arg2`. Pass None to `index_from`/`columns_from` to use index/columns of the second argument. Keyword arguments `**kwargs` are passed to `broadcast`. ## Example ```python-repl >>> import numpy as np >>> import pandas as pd >>> from vectorbt.base.reshape_fns import broadcast_to >>> a = np.array([1, 2, 3]) >>> sr = pd.Series([4, 5, 6], index=pd.Index(['x', 'y', 'z']), name='a') >>> broadcast_to(a, sr) x 1 y 2 z 3 Name: a, dtype: int64 >>> broadcast_to(sr, a) array([4, 5, 6]) ``` """ arg1 = to_any_array(arg1) arg2 = to_any_array(arg2) if to_pd is None: to_pd = checks.is_pandas(arg2) if to_pd: # Take index and columns from arg2 if index_from is None: index_from = index_fns.get_index(arg2, 0) if columns_from is None: columns_from = index_fns.get_index(arg2, 1) return broadcast(arg1, to_shape=arg2.shape, to_pd=to_pd, index_from=index_from, columns_from=columns_from, **kwargs)
def broadcast_index(args, to_shape, index_from=None, axis=0, ignore_sr_names=None, **kwargs): """Produce a broadcast index/columns. Args: *args (array_like): Array-like objects. to_shape (tuple): Target shape. index_from (None, int, str or array_like): Broadcasting rule for this index/these columns. Accepts the following values: * 'default' - take the value from `vectorbt.settings.broadcasting` * 'strict' - ensure that all pandas objects have the same index/columns * 'stack' - stack different indexes/columns using `vectorbt.base.index_fns.stack_indexes` * 'ignore' - ignore any index/columns * integer - use the index/columns of the i-nth object in `args` * None - use the original index/columns of the objects in `args` * everything else will be converted to `pd.Index` axis (int): Set to 0 for index and 1 for columns. ignore_sr_names (bool): Whether to ignore Series names if they are in conflict. Conflicting Series names are those that are different but not None. **kwargs: Keyword arguments passed to `vectorbt.base.index_fns.stack_indexes`. For defaults, see `vectorbt.settings.broadcasting`. !!! note Series names are treated as columns with a single element but without a name. If a column level without a name loses its meaning, better to convert Series to DataFrames with one column prior to broadcasting. If the name of a Series is not that important, better to drop it altogether by setting it to None. """ from vectorbt import settings if ignore_sr_names is None: ignore_sr_names = settings.broadcasting['ignore_sr_names'] index_str = 'columns' if axis == 1 else 'index' to_shape_2d = (to_shape[0], 1) if len(to_shape) == 1 else to_shape # maxlen stores the length of the longest index maxlen = to_shape_2d[1] if axis == 1 else to_shape_2d[0] new_index = None if index_from is not None: if isinstance(index_from, int): # Take index/columns of the object indexed by index_from if not checks.is_pandas(args[index_from]): raise TypeError( f"Argument under index {index_from} must be a pandas object" ) new_index = index_fns.get_index(args[index_from], axis) elif isinstance(index_from, str): if index_from == 'ignore': # Ignore index/columns new_index = pd.RangeIndex(start=0, stop=maxlen, step=1) elif index_from in ('stack', 'strict'): # Check whether all indexes/columns are equal last_index = None # of type pd.Index index_conflict = False for arg in args: if checks.is_pandas(arg): index = index_fns.get_index(arg, axis) if last_index is not None: if not pd.Index.equals(index, last_index): index_conflict = True last_index = index continue if not index_conflict: new_index = last_index else: # If pandas objects have different index/columns, stack them together for arg in args: if checks.is_pandas(arg): index = index_fns.get_index(arg, axis) if axis == 1 and checks.is_series( arg) and ignore_sr_names: # ignore Series name continue if checks.is_default_index(index): # ignore simple ranges without name continue if new_index is None: new_index = index else: if index_from == 'strict': # If pandas objects have different index/columns, raise an exception if not pd.Index.equals(index, new_index): raise ValueError( f"Broadcasting {index_str} is not allowed when {index_str}_from=strict" ) # Broadcasting index must follow the rules of a regular broadcasting operation # https://docs.scipy.org/doc/numpy/user/basics.broadcasting.html#general-broadcasting-rules # 1. rule: if indexes are of the same length, they are simply stacked # 2. rule: if index has one element, it gets repeated and then stacked if pd.Index.equals(index, new_index): continue if len(index) != len(new_index): if len(index) > 1 and len(new_index) > 1: raise ValueError( "Indexes could not be broadcast together" ) if len(index) > len(new_index): new_index = index_fns.repeat_index( new_index, len(index)) elif len(index) < len(new_index): index = index_fns.repeat_index( index, len(new_index)) new_index = index_fns.stack_indexes( new_index, index, **kwargs) else: raise ValueError( f"Invalid value {index_from} for {'columns' if axis == 1 else 'index'}_from" ) else: new_index = index_from if new_index is not None: if maxlen > len(new_index): if index_from == 'strict': raise ValueError( f"Broadcasting {index_str} is not allowed when {index_str}_from=strict" ) # This happens only when some numpy object is longer than the new pandas index # In this case, new pandas index (one element) should be repeated to match this length. if maxlen > 1 and len(new_index) > 1: raise ValueError("Indexes could not be broadcast together") new_index = index_fns.repeat_index(new_index, maxlen) elif index_from is not None: # new_index=None can mean two things: 1) take original metadata or 2) reset index/columns # In case when index_from is not None, we choose 2) new_index = pd.RangeIndex(start=0, stop=maxlen, step=1) return new_index
def _indexing_func_meta(self, pd_indexing_func, index=None, columns=None, column_only_select=None, group_select=None, group_by=None): """Perform indexing on `ArrayWrapper` and also return indexing metadata. Takes into account column grouping. Set `column_only_select` to True to index the array wrapper as a Series of columns. This way, selection of index (axis 0) can be avoided. Set `group_select` to True to select groups rather than columns. Takes effect only if grouping is enabled. !!! note If `column_only_select` is True, make sure to index the array wrapper as a Series of columns rather than a DataFrame. For example, the operation `.iloc[:, :2]` should become `.iloc[:2]`. Operations are not allowed if the object is already a Series and thus has only one column/group.""" from vectorbt import settings if column_only_select is None: column_only_select = self.column_only_select if column_only_select is None: column_only_select = settings.array_wrapper['column_only_select'] if group_select is None: group_select = self.group_select if group_select is None: group_select = settings.array_wrapper['group_select'] _self = self.regroup(group_by) group_select = group_select and _self.grouper.is_grouped() if index is None: index = _self.index if columns is None: if group_select: columns = _self.grouper.get_columns() else: columns = _self.columns if group_select: # Groups as columns i_wrapper = ArrayWrapper(index, columns, _self.get_ndim()) else: # Columns as columns i_wrapper = ArrayWrapper(index, columns, _self.ndim) n_rows = len(index) n_cols = len(columns) if column_only_select: if i_wrapper.ndim == 1: raise IndexingError( "Columns only: Attempting to select a column on a Series") col_mapper = i_wrapper.wrap_reduced(np.arange(n_cols), columns=columns) try: col_mapper = pd_indexing_func(col_mapper) except pd.core.indexing.IndexingError as e: warnings.warn( "Columns only: Make sure to treat this object " "as a Series of columns rather than a DataFrame", stacklevel=2) raise e if checks.is_series(col_mapper): new_columns = col_mapper.index col_idxs = col_mapper.values new_ndim = 2 else: new_columns = columns[[col_mapper]] col_idxs = col_mapper new_ndim = 1 new_index = index idx_idxs = np.arange(len(index)) else: idx_mapper = i_wrapper.wrap(np.broadcast_to( np.arange(n_rows)[:, None], (n_rows, n_cols)), index=index, columns=columns) idx_mapper = pd_indexing_func(idx_mapper) if i_wrapper.ndim == 1: if not checks.is_series(idx_mapper): raise IndexingError("Selection of a scalar is not allowed") idx_idxs = idx_mapper.values col_idxs = 0 else: col_mapper = i_wrapper.wrap(np.broadcast_to( np.arange(n_cols), (n_rows, n_cols)), index=index, columns=columns) col_mapper = pd_indexing_func(col_mapper) if checks.is_frame(idx_mapper): idx_idxs = idx_mapper.values[:, 0] col_idxs = col_mapper.values[0] elif checks.is_series(idx_mapper): one_col = np.all( col_mapper.values == col_mapper.values.item(0)) one_idx = np.all( idx_mapper.values == idx_mapper.values.item(0)) if one_col and one_idx: # One index and one column selected, multiple times raise IndexingError( "Must select at least two unique indices in one of both axes" ) elif one_col: # One column selected idx_idxs = idx_mapper.values col_idxs = col_mapper.values[0] elif one_idx: # One index selected idx_idxs = idx_mapper.values[0] col_idxs = col_mapper.values else: raise IndexingError else: raise IndexingError("Selection of a scalar is not allowed") new_index = index_fns.get_index(idx_mapper, 0) if not isinstance(idx_idxs, np.ndarray): # One index selected new_columns = index[[idx_idxs]] elif not isinstance(col_idxs, np.ndarray): # One column selected new_columns = columns[[col_idxs]] else: new_columns = index_fns.get_index(idx_mapper, 1) new_ndim = idx_mapper.ndim if _self.grouper.is_grouped(): # Grouping enabled if np.asarray(idx_idxs).ndim == 0: raise IndexingError( "Flipping index and columns is not allowed") if group_select: # Selection based on groups # Get indices of columns corresponding to selected groups group_idxs = col_idxs group_idxs_arr = reshape_fns.to_1d(group_idxs) group_start_idxs = _self.grouper.get_group_start_idxs( )[group_idxs_arr] group_end_idxs = _self.grouper.get_group_end_idxs( )[group_idxs_arr] ungrouped_col_idxs = get_ranges_arr(group_start_idxs, group_end_idxs) ungrouped_columns = _self.columns[ungrouped_col_idxs] if new_ndim == 1 and len(ungrouped_columns) == 1: ungrouped_ndim = 1 ungrouped_col_idxs = ungrouped_col_idxs[0] else: ungrouped_ndim = 2 # Get indices of selected groups corresponding to the new columns # We could do _self.group_by[ungrouped_col_idxs] but indexing operation may have changed the labels group_lens = _self.grouper.get_group_lens()[group_idxs_arr] ungrouped_group_idxs = np.full(len(ungrouped_columns), 0) ungrouped_group_idxs[group_lens[:-1]] = 1 ungrouped_group_idxs = np.cumsum(ungrouped_group_idxs) return _self.copy(index=new_index, columns=ungrouped_columns, ndim=ungrouped_ndim, grouped_ndim=new_ndim, group_by=new_columns[ungrouped_group_idxs] ), idx_idxs, group_idxs, ungrouped_col_idxs # Selection based on columns col_idxs_arr = reshape_fns.to_1d(col_idxs) return _self.copy(index=new_index, columns=new_columns, ndim=new_ndim, grouped_ndim=None, group_by=_self.grouper.group_by[col_idxs_arr] ), idx_idxs, col_idxs, col_idxs # Grouping disabled return _self.copy(index=new_index, columns=new_columns, ndim=new_ndim, grouped_ndim=None, group_by=None), idx_idxs, col_idxs, col_idxs
def from_obj(cls, obj, *args, **kwargs): """Derive metadata from an object.""" index = index_fns.get_index(obj, 0) columns = index_fns.get_index(obj, 1) ndim = obj.ndim return cls(index, columns, ndim, *args, **kwargs)
def broadcast_index(args, to_shape, index_from=None, axis=0, **kwargs): """Produce a broadcasted index/columns. Args: *args (array_like): Array-like objects. to_shape (tuple): Target shape. index_from (None, int, str or array_like): Broadcasting rule for this index/these columns. Accepts the following values: * `'default'` - take the value from `vectorbt.defaults.broadcasting` * `None` - use the original index/columns of the objects in `args` * `int` - use the index/columns of the i-nth object in `args` * `'strict'` - ensure that all pandas objects have the same index/columns * `'stack'` - stack different indexes/columns using `vectorbt.base.index_fns.stack_indexes` * everything else will be converted to `pd.Index` axis (int): Set to 0 for index and 1 for columns. **kwargs: Keyword arguments passed to `vectorbt.base.index_fns.stack_indexes`. For defaults, see `vectorbt.defaults.broadcasting`. """ index_str = 'columns' if axis == 1 else 'index' new_index = None if axis == 1 and len(to_shape) == 1: to_shape = (to_shape[0], 1) maxlen = to_shape[1] if axis == 1 else to_shape[0] if index_from is not None: if isinstance(index_from, int): # Take index/columns of the object indexed by index_from if not checks.is_pandas(args[index_from]): raise TypeError( f"Argument under index {index_from} must be a pandas object" ) new_index = index_fns.get_index(args[index_from], axis) elif isinstance(index_from, str): if index_from in ('stack', 'strict'): # If pandas objects have different index/columns, stack them together # maxlen stores the length of the longest index for arg in args: if checks.is_pandas(arg): index = index_fns.get_index(arg, axis) if checks.is_default_index(index): # ignore simple ranges without name continue if new_index is None: new_index = index else: if index_from == 'strict': # If pandas objects have different index/columns, raise an exception if not pd.Index.equals(index, new_index): raise ValueError( f"Broadcasting {index_str} is not allowed for {index_str}_from=strict" ) # Broadcasting index must follow the rules of a regular broadcasting operation # https://docs.scipy.org/doc/numpy/user/basics.broadcasting.html#general-broadcasting-rules # 1. rule: if indexes are of the same length, they are simply stacked # 2. rule: if index has one element, it gets repeated and then stacked if pd.Index.equals(index, new_index): continue if len(index) != len(new_index): if len(index) > 1 and len(new_index) > 1: raise ValueError( "Indexes could not be broadcast together" ) if len(index) > len(new_index): new_index = index_fns.repeat_index( new_index, len(index)) elif len(index) < len(new_index): index = index_fns.repeat_index( index, len(new_index)) new_index = index_fns.stack_indexes( new_index, index, **kwargs) else: raise ValueError( f"Invalid value {index_from} for {'columns' if axis == 1 else 'index'}_from" ) else: new_index = index_from if new_index is not None: if maxlen > len(new_index): if index_from == 'strict': raise ValueError( f"Broadcasting {index_str} is not allowed for {index_str}_from=strict" ) # This happens only when some numpy object is longer than the new pandas index # In this case, new pandas index (one element) should be repeated to match this length. if maxlen > 1 and len(new_index) > 1: raise ValueError("Indexes could not be broadcast together") new_index = index_fns.repeat_index(new_index, maxlen) return new_index