def repeat_index(index, n): """Repeat each element in `index` `n` times.""" if not isinstance(index, pd.Index): index = pd.Index(index) if checks.is_default_index(index): # ignore simple ranges without name return pd.RangeIndex(start=0, stop=len(index) * n, step=1) return np.repeat(index, n)
def tile_index(index, n): """Tile the whole `index` `n` times.""" if not isinstance(index, pd.Index): index = pd.Index(index) if checks.is_default_index(index): # ignore simple ranges without name return pd.RangeIndex(start=0, stop=len(index) * n, step=1) if isinstance(index, pd.MultiIndex): return pd.MultiIndex.from_tuples(np.tile(index, n), names=index.names) return pd.Index(np.tile(index, n), name=index.name)
def repeat_index(index, n, ignore_default=None): """Repeat each element in `index` `n` times.""" from vectorbt import settings if ignore_default is None: ignore_default = settings.broadcasting['ignore_default'] if not isinstance(index, pd.Index): index = pd.Index(index) if checks.is_default_index(index) and ignore_default: # ignore simple ranges without name return pd.RangeIndex(start=0, stop=len(index) * n, step=1) return np.repeat(index, n)
def tile_index(index, n, ignore_default=None): """Tile the whole `index` `n` times.""" from vectorbt import settings if ignore_default is None: ignore_default = settings.broadcasting['ignore_default'] if not isinstance(index, pd.Index): index = pd.Index(index) if checks.is_default_index(index) and ignore_default: # ignore simple ranges without name return pd.RangeIndex(start=0, stop=len(index) * n, step=1) if isinstance(index, pd.MultiIndex): return pd.MultiIndex.from_tuples(np.tile(index, n), names=index.names) return pd.Index(np.tile(index, n), name=index.name)
def drop_redundant_levels(index): """Drop levels in `index` that either have a single unnamed value or a range from 0 to n.""" if not isinstance(index, pd.MultiIndex): return index levels_to_drop = [] for i in range(index.nlevels): if len(index) > 1 and len(index.levels[i]) == 1 and index.levels[i].name is None: levels_to_drop.append(i) elif checks.is_default_index(index.get_level_values(i)): levels_to_drop.append(i) # Remove redundant levels only if there are some non-redundant levels left if len(levels_to_drop) < index.nlevels: return index.droplevel(levels_to_drop) return index
def repeat_index(index: tp.IndexLike, n: int, ignore_default: tp.Optional[bool] = None) -> tp.Index: """Repeat each element in `index` `n` times. Set `ignore_default` to None to use the default.""" from vectorbt import settings if ignore_default is None: ignore_default = settings.broadcasting['ignore_default'] index = to_any_index(index) if checks.is_default_index( index) and ignore_default: # ignore simple ranges without name return pd.RangeIndex(start=0, stop=len(index) * n, step=1) return index.repeat(n)
def tile_index(index: tp.IndexLike, n: int, ignore_default: tp.Optional[bool] = None) -> tp.Index: """Tile the whole `index` `n` times. Set `ignore_default` to None to use the default.""" from vectorbt import settings if ignore_default is None: ignore_default = settings.broadcasting['ignore_default'] index = to_any_index(index) if checks.is_default_index( index) and ignore_default: # ignore simple ranges without name return pd.RangeIndex(start=0, stop=len(index) * n, step=1) if isinstance(index, pd.MultiIndex): return pd.MultiIndex.from_tuples(np.tile(index, n), names=index.names) return pd.Index(np.tile(index, n), name=index.name)
def broadcast_index(args, to_shape, index_from=None, axis=0, ignore_sr_names=None, **kwargs): """Produce a broadcast index/columns. Args: *args (array_like): Array-like objects. to_shape (tuple): Target shape. index_from (None, int, str or array_like): Broadcasting rule for this index/these columns. Accepts the following values: * 'default' - take the value from `vectorbt.settings.broadcasting` * 'strict' - ensure that all pandas objects have the same index/columns * 'stack' - stack different indexes/columns using `vectorbt.base.index_fns.stack_indexes` * 'ignore' - ignore any index/columns * integer - use the index/columns of the i-nth object in `args` * None - use the original index/columns of the objects in `args` * everything else will be converted to `pd.Index` axis (int): Set to 0 for index and 1 for columns. ignore_sr_names (bool): Whether to ignore Series names if they are in conflict. Conflicting Series names are those that are different but not None. **kwargs: Keyword arguments passed to `vectorbt.base.index_fns.stack_indexes`. For defaults, see `vectorbt.settings.broadcasting`. !!! note Series names are treated as columns with a single element but without a name. If a column level without a name loses its meaning, better to convert Series to DataFrames with one column prior to broadcasting. If the name of a Series is not that important, better to drop it altogether by setting it to None. """ from vectorbt import settings if ignore_sr_names is None: ignore_sr_names = settings.broadcasting['ignore_sr_names'] index_str = 'columns' if axis == 1 else 'index' to_shape_2d = (to_shape[0], 1) if len(to_shape) == 1 else to_shape # maxlen stores the length of the longest index maxlen = to_shape_2d[1] if axis == 1 else to_shape_2d[0] new_index = None if index_from is not None: if isinstance(index_from, int): # Take index/columns of the object indexed by index_from if not checks.is_pandas(args[index_from]): raise TypeError( f"Argument under index {index_from} must be a pandas object" ) new_index = index_fns.get_index(args[index_from], axis) elif isinstance(index_from, str): if index_from == 'ignore': # Ignore index/columns new_index = pd.RangeIndex(start=0, stop=maxlen, step=1) elif index_from in ('stack', 'strict'): # Check whether all indexes/columns are equal last_index = None # of type pd.Index index_conflict = False for arg in args: if checks.is_pandas(arg): index = index_fns.get_index(arg, axis) if last_index is not None: if not pd.Index.equals(index, last_index): index_conflict = True last_index = index continue if not index_conflict: new_index = last_index else: # If pandas objects have different index/columns, stack them together for arg in args: if checks.is_pandas(arg): index = index_fns.get_index(arg, axis) if axis == 1 and checks.is_series( arg) and ignore_sr_names: # ignore Series name continue if checks.is_default_index(index): # ignore simple ranges without name continue if new_index is None: new_index = index else: if index_from == 'strict': # If pandas objects have different index/columns, raise an exception if not pd.Index.equals(index, new_index): raise ValueError( f"Broadcasting {index_str} is not allowed when {index_str}_from=strict" ) # Broadcasting index must follow the rules of a regular broadcasting operation # https://docs.scipy.org/doc/numpy/user/basics.broadcasting.html#general-broadcasting-rules # 1. rule: if indexes are of the same length, they are simply stacked # 2. rule: if index has one element, it gets repeated and then stacked if pd.Index.equals(index, new_index): continue if len(index) != len(new_index): if len(index) > 1 and len(new_index) > 1: raise ValueError( "Indexes could not be broadcast together" ) if len(index) > len(new_index): new_index = index_fns.repeat_index( new_index, len(index)) elif len(index) < len(new_index): index = index_fns.repeat_index( index, len(new_index)) new_index = index_fns.stack_indexes( new_index, index, **kwargs) else: raise ValueError( f"Invalid value {index_from} for {'columns' if axis == 1 else 'index'}_from" ) else: new_index = index_from if new_index is not None: if maxlen > len(new_index): if index_from == 'strict': raise ValueError( f"Broadcasting {index_str} is not allowed when {index_str}_from=strict" ) # This happens only when some numpy object is longer than the new pandas index # In this case, new pandas index (one element) should be repeated to match this length. if maxlen > 1 and len(new_index) > 1: raise ValueError("Indexes could not be broadcast together") new_index = index_fns.repeat_index(new_index, maxlen) elif index_from is not None: # new_index=None can mean two things: 1) take original metadata or 2) reset index/columns # In case when index_from is not None, we choose 2) new_index = pd.RangeIndex(start=0, stop=maxlen, step=1) return new_index
def broadcast_index(args, to_shape, index_from=None, axis=0, **kwargs): """Produce a broadcasted index/columns. Args: *args (array_like): Array-like objects. to_shape (tuple): Target shape. index_from (None, int, str or array_like): Broadcasting rule for this index/these columns. Accepts the following values: * `'default'` - take the value from `vectorbt.defaults.broadcasting` * `None` - use the original index/columns of the objects in `args` * `int` - use the index/columns of the i-nth object in `args` * `'strict'` - ensure that all pandas objects have the same index/columns * `'stack'` - stack different indexes/columns using `vectorbt.base.index_fns.stack_indexes` * everything else will be converted to `pd.Index` axis (int): Set to 0 for index and 1 for columns. **kwargs: Keyword arguments passed to `vectorbt.base.index_fns.stack_indexes`. For defaults, see `vectorbt.defaults.broadcasting`. """ index_str = 'columns' if axis == 1 else 'index' new_index = None if axis == 1 and len(to_shape) == 1: to_shape = (to_shape[0], 1) maxlen = to_shape[1] if axis == 1 else to_shape[0] if index_from is not None: if isinstance(index_from, int): # Take index/columns of the object indexed by index_from if not checks.is_pandas(args[index_from]): raise TypeError( f"Argument under index {index_from} must be a pandas object" ) new_index = index_fns.get_index(args[index_from], axis) elif isinstance(index_from, str): if index_from in ('stack', 'strict'): # If pandas objects have different index/columns, stack them together # maxlen stores the length of the longest index for arg in args: if checks.is_pandas(arg): index = index_fns.get_index(arg, axis) if checks.is_default_index(index): # ignore simple ranges without name continue if new_index is None: new_index = index else: if index_from == 'strict': # If pandas objects have different index/columns, raise an exception if not pd.Index.equals(index, new_index): raise ValueError( f"Broadcasting {index_str} is not allowed for {index_str}_from=strict" ) # Broadcasting index must follow the rules of a regular broadcasting operation # https://docs.scipy.org/doc/numpy/user/basics.broadcasting.html#general-broadcasting-rules # 1. rule: if indexes are of the same length, they are simply stacked # 2. rule: if index has one element, it gets repeated and then stacked if pd.Index.equals(index, new_index): continue if len(index) != len(new_index): if len(index) > 1 and len(new_index) > 1: raise ValueError( "Indexes could not be broadcast together" ) if len(index) > len(new_index): new_index = index_fns.repeat_index( new_index, len(index)) elif len(index) < len(new_index): index = index_fns.repeat_index( index, len(new_index)) new_index = index_fns.stack_indexes( new_index, index, **kwargs) else: raise ValueError( f"Invalid value {index_from} for {'columns' if axis == 1 else 'index'}_from" ) else: new_index = index_from if new_index is not None: if maxlen > len(new_index): if index_from == 'strict': raise ValueError( f"Broadcasting {index_str} is not allowed for {index_str}_from=strict" ) # This happens only when some numpy object is longer than the new pandas index # In this case, new pandas index (one element) should be repeated to match this length. if maxlen > 1 and len(new_index) > 1: raise ValueError("Indexes could not be broadcast together") new_index = index_fns.repeat_index(new_index, maxlen) return new_index
def test_is_default_index(self): assert checks.is_default_index(pd.DataFrame([[1, 2, 3]]).columns) assert checks.is_default_index(pd.Series([1, 2, 3]).to_frame().columns) assert checks.is_default_index(pd.Index([0, 1, 2])) assert not checks.is_default_index(pd.Index([0, 1, 2], name='name'))