Exemple #1
0
 def wrap_metric(self, a):
     if checks.is_frame(self.ts):
         return pd.Series(a, index=self.ts.columns)
     # Single value
     if checks.is_array(a):
         return a[0]
     return a
Exemple #2
0
def unstack_to_df(arg,
                  index_levels=None,
                  column_levels=None,
                  symmetric=False,
                  sort=True):
    """Reshape `arg` based on its multi-index into a DataFrame.

    Use `index_levels` to specify what index levels will form new index, and `column_levels` 
    for new columns. Set `symmetric` to True to make DataFrame symmetric.

    ## Example

    ```python-repl
    >>> import pandas as pd
    >>> from vectorbt.base.reshape_fns import unstack_to_df

    >>> index = pd.MultiIndex.from_arrays(
    ...     [[1, 1, 2, 2], [3, 4, 3, 4], ['a', 'b', 'c', 'd']],
    ...     names=['x', 'y', 'z'])
    >>> sr = pd.Series([1, 2, 3, 4], index=index)

    >>> unstack_to_df(sr, index_levels=(0, 1), column_levels=2)
    z      a    b    c    d
    x y
    1 3  1.0  NaN  NaN  NaN
    1 4  NaN  2.0  NaN  NaN
    2 3  NaN  NaN  3.0  NaN
    2 4  NaN  NaN  NaN  4.0
    ```
    """
    # Perform checks
    checks.assert_type(arg, (pd.Series, pd.DataFrame))
    if checks.is_frame(arg):
        if arg.shape[0] == 1:
            arg = arg.iloc[0, :]
        elif arg.shape[1] == 1:
            arg = arg.iloc[:, 0]
    checks.assert_type(arg.index, pd.MultiIndex)
    sr = to_1d(arg)

    if len(sr.index.levels) > 2:
        if index_levels is None:
            raise ValueError("index_levels must be specified")
        if column_levels is None:
            raise ValueError("column_levels must be specified")
    else:
        index_levels = 0
        column_levels = 1

    # Build new index and column hierarchies
    new_index = index_fns.select_levels(arg.index, index_levels).unique()
    new_columns = index_fns.select_levels(arg.index, column_levels).unique()

    # Unstack and post-process
    unstacked = unstack_to_array(sr, levels=(index_levels, column_levels))
    df = pd.DataFrame(unstacked, index=new_index, columns=new_columns)
    if symmetric:
        return make_symmetric(df, sort=sort)
    return df
Exemple #3
0
 def __call__(self, trace_names=None, **kwargs):
     if trace_names is None:
         if checks.is_frame(self._obj) or (checks.is_series(self._obj)
                                           and self._obj.name is not None):
             trace_names = reshape_fns.to_2d(self._obj).columns
     return widgets.Histogram(trace_names=trace_names,
                              data=self._obj.values,
                              **kwargs)
Exemple #4
0
    def __init__(self,
                 main_price,
                 init_capital,
                 orders,
                 cash,
                 shares,
                 freq=None,
                 year_freq=None,
                 levy_alpha=None,
                 risk_free=None,
                 required_return=None,
                 cutoff=None,
                 factor_returns=None,
                 incl_unrealized_stats=False):
        # Perform checks
        checks.assert_type(main_price, (pd.Series, pd.DataFrame))
        if checks.is_frame(main_price):
            checks.assert_type(init_capital, pd.Series)
            checks.assert_same(main_price.columns, init_capital.index)
        else:
            checks.assert_ndim(init_capital, 0)
        checks.assert_same_meta(main_price, cash)
        checks.assert_same_meta(main_price, shares)

        # Store passed arguments
        self._main_price = main_price
        self._init_capital = init_capital
        self._orders = orders
        self._cash = cash
        self._shares = shares
        self._incl_unrealized_stats = incl_unrealized_stats

        freq = main_price.vbt(freq=freq).freq
        if freq is None:
            raise ValueError(
                "Couldn't parse the frequency of index. You must set `freq`.")
        self._freq = freq

        year_freq = main_price.vbt.returns(year_freq=year_freq).year_freq
        if freq is None:
            raise ValueError("You must set `year_freq`.")
        self._year_freq = year_freq

        # Parameters
        self._levy_alpha = defaults.portfolio[
            'levy_alpha'] if levy_alpha is None else levy_alpha
        self._risk_free = defaults.portfolio[
            'risk_free'] if risk_free is None else risk_free
        self._required_return = defaults.portfolio[
            'required_return'] if required_return is None else required_return
        self._cutoff = defaults.portfolio[
            'cutoff'] if cutoff is None else cutoff
        self._factor_returns = defaults.portfolio[
            'factor_returns'] if factor_returns is None else factor_returns

        # Supercharge
        PandasIndexer.__init__(self, _indexing_func)
        self.wrapper = ArrayWrapper.from_obj(main_price, freq=freq)
Exemple #5
0
 def from_obj(cls, obj):
     """Derive metadata from an object."""
     index = obj.index
     if checks.is_frame(obj):
         columns = obj.columns
     else:
         columns = [obj.name]
     ndim = obj.ndim
     return cls(index=index, columns=columns, ndim=ndim)
Exemple #6
0
 def pd_indexing_func(obj):
     new_obj = obj.iloc[:, indices]
     if not is_multiple:
         # If we selected only one param, then remove its columns levels to keep it clean
         if level_name is not None:
             if checks.is_frame(new_obj):
                 if isinstance(new_obj.columns, pd.MultiIndex):
                     new_obj.columns = index_fns.drop_levels(new_obj.columns, level_name)
     return new_obj
Exemple #7
0
 def __call__(self, x_labels=None, trace_names=None, **kwargs):
     if x_labels is None:
         x_labels = self._obj.index
     if trace_names is None:
         if checks.is_frame(self._obj) or (checks.is_series(self._obj)
                                           and self._obj.name is not None):
             trace_names = reshape_fns.to_2d(self._obj).columns
     return widgets.Scatter(x_labels,
                            trace_names=trace_names,
                            data=self._obj.values,
                            **kwargs)
Exemple #8
0
def wrap_array_as(arg1, arg2, **kwargs):
    """Wrap array `arg1` to be as `arg2`."""
    default_index = arg2.index
    if checks.is_frame(arg2):
        default_columns = arg2.columns
    else:
        default_columns = [arg2.name]
    return wrap_array(arg1,
                      default_index=default_index,
                      default_columns=default_columns,
                      to_ndim=arg2.ndim,
                      **kwargs)
Exemple #9
0
def mapper_indexing_func(mapper, ref_obj, pd_indexing_func):
    """Broadcast `mapper` Series to `ref_obj` and perform pandas indexing using `pd_indexing_func`."""
    checks.assert_type(mapper, pd.Series)
    checks.assert_type(ref_obj, (pd.Series, pd.DataFrame))

    df_range_mapper = reshape_fns.broadcast_to(np.arange(len(mapper.index)), ref_obj)
    loced_range_mapper = pd_indexing_func(df_range_mapper)
    new_mapper = mapper.iloc[loced_range_mapper.values[0]]
    if checks.is_frame(loced_range_mapper):
        return pd.Series(new_mapper.values, index=loced_range_mapper.columns, name=mapper.name)
    elif checks.is_series(loced_range_mapper):
        return pd.Series([new_mapper], index=[loced_range_mapper.name], name=mapper.name)
Exemple #10
0
def indexing_on_mapper(mapper: tp.Series, ref_obj: tp.SeriesFrame,
                       pd_indexing_func: tp.Callable) -> tp.Optional[tp.Series]:
    """Broadcast `mapper` Series to `ref_obj` and perform pandas indexing using `pd_indexing_func`."""
    checks.assert_instance_of(mapper, pd.Series)
    checks.assert_instance_of(ref_obj, (pd.Series, pd.DataFrame))

    df_range_mapper = reshape_fns.broadcast_to(np.arange(len(mapper.index)), ref_obj)
    loced_range_mapper = pd_indexing_func(df_range_mapper)
    new_mapper = mapper.iloc[loced_range_mapper.values[0]]
    if checks.is_frame(loced_range_mapper):
        return pd.Series(new_mapper.values, index=loced_range_mapper.columns, name=mapper.name)
    elif checks.is_series(loced_range_mapper):
        return pd.Series([new_mapper], index=[loced_range_mapper.name], name=mapper.name)
    return None
Exemple #11
0
def get_multiindex_series(arg: tp.SeriesFrame) -> tp.Series:
    """Get Series with a multi-index.

    If DataFrame has been passed, should at maximum have one row or column."""
    checks.assert_instance_of(arg, (pd.Series, pd.DataFrame))
    if checks.is_frame(arg):
        if arg.shape[0] == 1:
            arg = arg.iloc[0, :]
        elif arg.shape[1] == 1:
            arg = arg.iloc[:, 0]
        else:
            raise ValueError(
                "Supported are either Series or DataFrame with one column/row")
    checks.assert_instance_of(arg.index, pd.MultiIndex)
    return arg
Exemple #12
0
def soft_to_ndim(arg, ndim):
    """Try to softly bring `arg` to the specified number of dimensions `ndim` (max 2)."""
    if not checks.is_array(arg):
        arg = np.asarray(arg)
    if ndim == 1:
        if arg.ndim == 2:
            if arg.shape[1] == 1:
                if checks.is_frame(arg):
                    return arg.iloc[:, 0]
                return arg[:, 0]  # downgrade
    if ndim == 2:
        if arg.ndim == 1:
            if checks.is_series(arg):
                return arg.to_frame()
            return arg[:, None]  # upgrade
    return arg  # do nothing
Exemple #13
0
def to_1d(arg, raw=False):
    """Reshape argument to one dimension."""
    if raw:
        arg = np.asarray(arg)
    if not checks.is_array_like(arg):
        arg = np.asarray(arg)
    if arg.ndim == 2:
        if arg.shape[1] == 1:
            if checks.is_frame(arg):
                return arg.iloc[:, 0]
            return arg[:, 0]
    if arg.ndim == 1:
        return arg
    elif arg.ndim == 0:
        return arg.reshape((1, ))
    raise ValueError(
        f"Cannot reshape a {arg.ndim}-dimensional array to 1 dimension")
Exemple #14
0
def soft_to_ndim(arg: tp.ArrayLike,
                 ndim: int,
                 raw: bool = False) -> tp.AnyArray:
    """Try to softly bring `arg` to the specified number of dimensions `ndim` (max 2)."""
    arg = to_any_array(arg, raw=raw)
    if ndim == 1:
        if arg.ndim == 2:
            if arg.shape[1] == 1:
                if checks.is_frame(arg):
                    return arg.iloc[:, 0]
                return arg[:, 0]  # downgrade
    if ndim == 2:
        if arg.ndim == 1:
            if checks.is_series(arg):
                return arg.to_frame()
            return arg[:, None]  # upgrade
    return arg  # do nothing
Exemple #15
0
def to_1d(arg: tp.ArrayLike, raw: bool = False) -> tp.AnyArray1d:
    """Reshape argument to one dimension. 

    If `raw` is True, returns NumPy array.
    If 2-dim, will collapse along axis 1 (i.e., DataFrame with one column to Series)."""
    arg = to_any_array(arg, raw=raw)
    if arg.ndim == 2:
        if arg.shape[1] == 1:
            if checks.is_frame(arg):
                return arg.iloc[:, 0]
            return arg[:, 0]
    if arg.ndim == 1:
        return arg
    elif arg.ndim == 0:
        return arg.reshape((1, ))
    raise ValueError(
        f"Cannot reshape a {arg.ndim}-dimensional array to 1 dimension")
Exemple #16
0
def to_1d(arg, raw=False):
    """Reshape argument to one dimension. 

    If `raw` is `True`, returns NumPy array.
    If 2-dim, will collapse along axis 1 (i.e., DataFrame with one column to Series)."""
    if raw or not checks.is_array(arg):
        arg = np.asarray(arg)
    if arg.ndim == 2:
        if arg.shape[1] == 1:
            if checks.is_frame(arg):
                return arg.iloc[:, 0]
            return arg[:, 0]
    if arg.ndim == 1:
        return arg
    elif arg.ndim == 0:
        return arg.reshape((1, ))
    raise ValueError(
        f"Cannot reshape a {arg.ndim}-dimensional array to 1 dimension")
Exemple #17
0
def _indexing_func(obj, pd_indexing_func):
    """Perform indexing on `Portfolio`."""
    if obj.wrapper.ndim == 1:
        raise TypeError("Indexing on Series is not supported")

    n_rows = len(obj.wrapper.index)
    n_cols = len(obj.wrapper.columns)
    col_mapper = obj.wrapper.wrap(
        np.broadcast_to(np.arange(n_cols), (n_rows, n_cols)))
    col_mapper = pd_indexing_func(col_mapper)
    if not pd.Index.equals(col_mapper.index, obj.wrapper.index):
        raise NotImplementedError(
            "Changing index (time axis) is not supported")
    new_cols = col_mapper.values[0]

    # Array-like params
    def index_arraylike_param(param):
        if np.asarray(param).ndim > 0:
            param = reshape_fns.broadcast_to_axis_of(param, obj.main_price, 1)
            param = param[new_cols]
        return param

    factor_returns = obj.factor_returns
    if factor_returns is not None:
        if checks.is_frame(factor_returns):
            factor_returns = reshape_fns.broadcast_to(factor_returns,
                                                      obj.main_price)
            factor_returns = pd_indexing_func(factor_returns)

    # Create new Portfolio instance
    return obj.__class__(
        pd_indexing_func(obj.main_price),
        obj.init_capital.iloc[new_cols],
        pd_indexing_func(obj.orders),  # Orders class supports indexing
        pd_indexing_func(obj.cash),
        pd_indexing_func(obj.shares),
        freq=obj.freq,
        year_freq=obj.year_freq,
        levy_alpha=index_arraylike_param(obj.levy_alpha),
        risk_free=index_arraylike_param(obj.risk_free),
        required_return=index_arraylike_param(obj.required_return),
        cutoff=index_arraylike_param(obj.cutoff),
        factor_returns=factor_returns,
        incl_unrealized_stats=obj.incl_unrealized_stats)
Exemple #18
0
def unstack_to_df(arg, index_levels=None, column_levels=None, symmetric=False):
    """Reshape object based on multi-index into dataframe."""
    # Perform checks
    checks.assert_type(arg, (pd.Series, pd.DataFrame))
    if checks.is_frame(arg):
        if arg.shape[0] == 1:
            arg = arg.iloc[0, :]
        elif arg.shape[1] == 1:
            arg = arg.iloc[:, 0]
    checks.assert_type(arg.index, pd.MultiIndex)
    sr = to_1d(arg)

    if len(sr.index.levels) > 2:
        checks.assert_not_none(index_levels)
        checks.assert_not_none(column_levels)
    else:
        index_levels = 0
        column_levels = 1

    # Build new index and column hierarchies
    new_index = np.unique(index_fns.select_levels(arg.index, index_levels))
    new_columns = np.unique(index_fns.select_levels(arg.index, column_levels))
    if isinstance(index_levels, (list, tuple)):
        new_index = pd.MultiIndex.from_tuples(new_index, names=index_levels)
    else:
        new_index = pd.Index(new_index, name=index_levels)
    if isinstance(column_levels, (list, tuple)):
        new_columns = pd.MultiIndex.from_tuples(new_columns,
                                                names=column_levels)
    else:
        new_columns = pd.Index(new_columns, name=column_levels)

    # Unstack and post-process
    unstacked = unstack_to_array(sr, levels=(index_levels, column_levels))
    df = pd.DataFrame(unstacked, index=new_index, columns=new_columns)
    if symmetric:
        return make_symmetric(df)
    return df
Exemple #19
0
def from_params_pipeline(ts_list,
                         param_list,
                         level_names,
                         num_outputs,
                         custom_func,
                         *args,
                         pass_lists=False,
                         pass_2d=True,
                         param_product=False,
                         broadcast_kwargs={},
                         return_raw=False,
                         **kwargs):
    """A pipeline for calculating an indicator, used by `IndicatorFactory`.

    Args:
        ts_list (list of array_like): A list of time series objects. At least one must be a pandas object.
        param_list (list of array_like): A list of parameters. Each element is either an array-like object
            or a single value of any type.
        level_names (list of str): A list of column level names corresponding to each parameter.
        num_outputs (int): The number of output arrays.
        custom_func (function): A custom calculation function. See `IndicatorFactory.from_custom_func`.
        *args: Arguments passed to the `custom_func`.
        pass_lists (bool): If `True`, arguments are passed to the `custom_func` as lists.
        pass_2d (bool): If `True`, time series arrays will be passed as two-dimensional, otherwise as is.
        param_product (bool): If `True`, builds a Cartesian product out of all parameters.
        broadcast_kwargs (dict): Keyword arguments passed to the `vectorbt.base.reshape_fns.broadcast`
            on time series objects.
        return_raw (bool): If `True`, returns the raw output without post-processing.
        **kwargs: Keyword arguments passed to the `custom_func`.

            Some common arguments include `return_cache` to return cache and `cache` to pass cache. 
            Those are only applicable to `custom_func` that supports it (`custom_func` created using
            `IndicatorFactory.from_apply_func` are supported by default).
            
    Returns:
        A list of transformed inputs (`pandas_like`), a list of generated outputs (`pandas_like`), 
        a list of parameter arrays (`np.ndarray`), a list of parameter mappers (`pd.Series`),
        a list of other generated outputs that are outside of  `num_outputs`.

    Explanation:

        Does the following:

        * Takes one or multiple time series objects in `ts_list` and broadcasts them. For example:

        ```python-repl
        >>> sr = pd.Series([1, 2], index=['x', 'y'])
        >>> df = pd.DataFrame([[3, 4], [5, 6]], index=['x', 'y'], columns=['a', 'b'])
        >>> ts_list = [sr, df]

        >>> ts_list = vbt.base.reshape_fns.broadcast(*ts_list)
        >>> print(ts_list[0])
        a  b
        x  1  1
        y  2  2
        >>> print(ts_list[1])
        a  b
        x  3  4
        y  5  6
        ```

        * Takes one or multiple parameters in `param_list`, converts them to NumPy arrays and 
            broadcasts them. For example:

        ```python-repl
        >>> p1, p2, p3 = 1, [2, 3, 4], [False]
        >>> param_list = [p1, p2, p3]

        >>> param_list = vbt.base.reshape_fns.broadcast(*param_list)
        >>> print(param_list[0])
        array([1, 1, 1])
        >>> print(param_list[1])
        array([2, 3, 4])
        >>> print(param_list[2])
        array([False, False, False])
        ```

        * Performs calculation using `custom_func` to build output arrays (`output_list`) and 
            other objects (`other_list`, optionally). For example:

        ```python-repl
        >>> def custom_func(ts1, ts2, p1, p2, p3, *args, **kwargs):
        ...     return np.hstack((
        ...         ts1 + ts2 + p1[0] * p2[0],
        ...         ts1 + ts2 + p1[1] * p2[1],
        ...         ts1 + ts2 + p1[2] * p2[2],
        ...     ))

        >>> output = custom_func(*ts_list, *param_list)
        >>> print(output)
        array([[ 6,  7,  7,  8,  8,  9],
               [ 9, 10, 10, 11, 11, 12]])
        ```

        * Creates new column hierarchy based on parameters and level names. For example:

        ```python-repl
        >>> p1_columns = pd.Index(param_list[0], name='p1')
        >>> p2_columns = pd.Index(param_list[1], name='p2')
        >>> p3_columns = pd.Index(param_list[2], name='p3')
        >>> p_columns = vbt.base.index_fns.stack_indexes(p1_columns, p2_columns, p3_columns)
        >>> new_columns = vbt.base.index_fns.combine_indexes(p_columns, ts_list[0].columns)

        >>> output_df = pd.DataFrame(output, columns=new_columns)
        >>> print(output_df)
        p1                                         1                        
        p2             2             3             4    
        p3  False  False  False  False  False  False    
                a      b      a      b      a      b
        0       6      7      7      8      8      9
        1       9     10     10     11     11     12
        ```

        * Broadcasts objects in `ts_list` to match the shape of objects in `output_list` through tiling.
            This is done to be able to compare them and generate signals, since you cannot compare NumPy 
            arrays that have totally different shapes, such as (2, 2) and (2, 6). For example:

        ```python-repl
        >>> new_ts_list = [
        ...     ts_list[0].vbt.tile(len(param_list[0]), keys=p_columns),
        ...     ts_list[1].vbt.tile(len(param_list[0]), keys=p_columns)
        ... ]
        >>> print(new_ts_list[0])
        p1                                         1                        
        p2             2             3             4    
        p3  False  False  False  False  False  False     
                a      b      a      b      a      b
        0       1      1      1      1      1      1
        1       2      2      2      2      2      2
        ```

        * Builds parameter mappers that will link parameters from `param_list` to columns in 
            `ts_list` and `output_list`. This is done to enable column indexing using parameter values.
    """
    if len(ts_list) > 1:
        # Broadcast time series
        ts_list = reshape_fns.broadcast(*ts_list,
                                        **broadcast_kwargs,
                                        writeable=True)
    # Check time series objects
    checks.assert_type(ts_list[0], (pd.Series, pd.DataFrame))
    # Convert params to 1-dim arrays
    param_list = list(map(reshape_fns.to_1d, param_list))
    if len(param_list) > 1:
        # Check level names
        checks.assert_type(level_names, (list, tuple))
        checks.assert_same_len(param_list, level_names)
        for ts in ts_list:
            # Every time series object should be free of the specified level names in its columns
            for level_name in level_names:
                if level_name is not None:
                    if checks.is_frame(ts):
                        checks.assert_level_not_exists(ts.columns, level_name)
        if param_product:
            # Make Cartesian product out of all params
            param_list = create_param_product(param_list)
        else:
            # Broadcast such that each array has the same length
            param_list = reshape_fns.broadcast(*param_list, writeable=True)
    # Perform main calculation
    if pass_2d:
        array_list = tuple(
            map(lambda x: reshape_fns.to_2d(np.asarray(x)), ts_list))
    else:
        array_list = tuple(map(lambda x: np.asarray(x), ts_list))
    if pass_lists:
        output_list = custom_func(array_list, param_list, *args, **kwargs)
    else:
        output_list = custom_func(*array_list, *param_list, *args, **kwargs)
    if return_raw or kwargs.get('return_cache', False):
        return output_list  # return raw cache outputs
    if not isinstance(output_list, (tuple, list, List)):
        output_list = [output_list]
    else:
        output_list = list(output_list)
    # Other outputs should be returned without post-processing (for example cache_dict)
    if len(output_list) > num_outputs:
        other_list = output_list[num_outputs:]
    else:
        other_list = []
    # Process only the num_outputs outputs
    output_list = output_list[:num_outputs]
    if len(param_list) > 0:
        # Build new column levels on top of time series levels
        new_columns = build_column_hierarchy(param_list, level_names,
                                             ts_list[0].vbt.columns)
        # Wrap into new pandas objects both time series and output objects
        new_ts_list = list(
            map(lambda x: broadcast_ts(x, param_list[0].shape[0], new_columns),
                ts_list))
        # Build mappers to easily map between parameters and columns
        mapper_list = [
            build_mapper(x, ts_list[0], new_columns, level_names[i])
            for i, x in enumerate(param_list)
        ]
    else:
        # Some indicators don't have any params
        new_columns = ts_list[0].vbt.columns
        new_ts_list = list(ts_list)
        mapper_list = []
    output_list = list(
        map(lambda x: wrap_output(x, ts_list[0], new_columns), output_list))
    if len(mapper_list) > 1:
        # Tuple object is a mapper that accepts tuples of parameters
        tuple_mapper = build_tuple_mapper(mapper_list, new_columns,
                                          tuple(level_names))
        mapper_list.append(tuple_mapper)
    return new_ts_list, output_list, param_list, mapper_list, other_list
Exemple #20
0
def broadcast(*args,
              to_shape=None,
              to_pd=None,
              to_frame=None,
              align_index=None,
              align_columns=None,
              index_from='default',
              columns_from='default',
              require_kwargs=None,
              keep_raw=False,
              return_meta=False,
              **kwargs):
    """Bring any array-like object in `args` to the same shape by using NumPy broadcasting.

    See [Broadcasting](https://docs.scipy.org/doc/numpy/user/basics.broadcasting.html).

    Can broadcast pandas objects by broadcasting their index/columns with `broadcast_index`.

    Args:
        *args (array_like): Array-like objects.
        to_shape (tuple): Target shape. If set, will broadcast every element in `args` to `to_shape`.
        to_pd (bool, tuple or list): Whether to convert all output arrays to pandas, otherwise returns
            raw NumPy arrays. If None, converts only if there is at least one pandas object among them.
        to_frame (bool): Whether to convert all Series to DataFrames.
        align_index (bool): Whether to align index of pandas objects using multi-index.
        align_columns (bool): Whether to align columns of pandas objects using multi-index.
        index_from (any): Broadcasting rule for index.
        columns_from (any): Broadcasting rule for columns.
        require_kwargs (dict or list of dict): Keyword arguments passed to `np.require`.
        keep_raw (bool, tuple or list): Whether to keep the unbroadcasted version of the array.

            Only makes sure that the array can be broadcast to the target shape.
        return_meta (bool): If True, will also return new shape, index and columns.
        **kwargs: Keyword arguments passed to `broadcast_index`.

    For defaults, see `vectorbt.settings.broadcasting`.

    ## Example

    Without broadcasting index and columns:
    ```python-repl
    >>> import numpy as np
    >>> import pandas as pd
    >>> from vectorbt.base.reshape_fns import broadcast

    >>> v = 0
    >>> a = np.array([1, 2, 3])
    >>> sr = pd.Series([1, 2, 3], index=pd.Index(['x', 'y', 'z']), name='a')
    >>> df = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]],
    ...     index=pd.Index(['x2', 'y2', 'z2']),
    ...     columns=pd.Index(['a2', 'b2', 'c2']))

    >>> for i in broadcast(
    ...     v, a, sr, df,
    ...     index_from=None,
    ...     columns_from=None,
    ... ): print(i)
       0  1  2
    0  0  0  0
    1  0  0  0
    2  0  0  0
       0  1  2
    0  1  2  3
    1  1  2  3
    2  1  2  3
       a  a  a
    x  1  1  1
    y  2  2  2
    z  3  3  3
        a2  b2  c2
    x2   1   2   3
    y2   4   5   6
    z2   7   8   9
    ```

    Taking new index and columns from position:
    ```python-repl
    >>> for i in broadcast(
    ...     v, a, sr, df,
    ...     index_from=2,
    ...     columns_from=3
    ... ): print(i)
       a2  b2  c2
    x   0   0   0
    y   0   0   0
    z   0   0   0
       a2  b2  c2
    x   1   2   3
    y   1   2   3
    z   1   2   3
       a2  b2  c2
    x   1   1   1
    y   2   2   2
    z   3   3   3
       a2  b2  c2
    x   1   2   3
    y   4   5   6
    z   7   8   9
    ```

    Broadcasting index and columns through stacking:
    ```python-repl
    >>> for i in broadcast(
    ...     v, a, sr, df,
    ...     index_from='stack',
    ...     columns_from='stack'
    ... ): print(i)
          a2  b2  c2
    x x2   0   0   0
    y y2   0   0   0
    z z2   0   0   0
          a2  b2  c2
    x x2   1   2   3
    y y2   1   2   3
    z z2   1   2   3
          a2  b2  c2
    x x2   1   1   1
    y y2   2   2   2
    z z2   3   3   3
          a2  b2  c2
    x x2   1   2   3
    y y2   4   5   6
    z z2   7   8   9
    ```

    Setting index and columns manually:
    ```python-repl
    >>> for i in broadcast(
    ...     v, a, sr, df,
    ...     index_from=['a', 'b', 'c'],
    ...     columns_from=['d', 'e', 'f']
    ... ): print(i)
       d  e  f
    a  0  0  0
    b  0  0  0
    c  0  0  0
       d  e  f
    a  1  2  3
    b  1  2  3
    c  1  2  3
       d  e  f
    a  1  1  1
    b  2  2  2
    c  3  3  3
       d  e  f
    a  1  2  3
    b  4  5  6
    c  7  8  9
    ```
    """
    from vectorbt import settings

    is_pd = False
    is_2d = False
    args = list(args)
    if require_kwargs is None:
        require_kwargs = {}
    if align_index is None:
        align_index = settings.broadcasting['align_index']
    if align_columns is None:
        align_columns = settings.broadcasting['align_columns']
    if isinstance(index_from, str) and index_from == 'default':
        index_from = settings.broadcasting['index_from']
    if isinstance(columns_from, str) and columns_from == 'default':
        columns_from = settings.broadcasting['columns_from']

    # Convert to np.ndarray object if not numpy or pandas
    # Also check whether we broadcast to pandas and whether work on 2-dim data
    for i in range(len(args)):
        if not checks.is_array(args[i]):
            args[i] = np.asarray(args[i])
        if args[i].ndim > 1:
            is_2d = True
        if checks.is_pandas(args[i]):
            is_pd = True

    # If target shape specified, check again if we work on 2-dim data
    if to_shape is not None:
        if isinstance(to_shape, int):
            to_shape = (to_shape, )
        checks.assert_type(to_shape, tuple)
        if len(to_shape) > 1:
            is_2d = True

    if to_frame is not None:
        # force either keeping Series or converting them to DataFrames
        is_2d = to_frame

    if to_pd is not None:
        # force either raw or pandas
        if isinstance(to_pd, (tuple, list)):
            is_pd = any(to_pd)
        else:
            is_pd = to_pd

    # Align pandas objects
    if align_index:
        index_to_align = []
        for i in range(len(args)):
            if checks.is_pandas(args[i]) and len(args[i].index) > 1:
                index_to_align.append(i)
        if len(index_to_align) > 1:
            indexes = [args[i].index for i in index_to_align]
            if len(set(map(len, indexes))) > 1:
                index_indices = index_fns.align_indexes(*indexes)
                for i in range(len(args)):
                    if i in index_to_align:
                        args[i] = args[i].iloc[index_indices[
                            index_to_align.index(i)]]
    if align_columns:
        cols_to_align = []
        for i in range(len(args)):
            if checks.is_frame(args[i]) and len(args[i].columns) > 1:
                cols_to_align.append(i)
        if len(cols_to_align) > 1:
            indexes = [args[i].columns for i in cols_to_align]
            if len(set(map(len, indexes))) > 1:
                col_indices = index_fns.align_indexes(*indexes)
                for i in range(len(args)):
                    if i in cols_to_align:
                        args[i] = args[i].iloc[:, col_indices[cols_to_align.
                                                              index(i)]]

    # Convert all pd.Series objects to pd.DataFrame if we work on 2-dim data
    args_2d = [
        arg.to_frame() if is_2d and checks.is_series(arg) else arg
        for arg in args
    ]

    # Get final shape
    if to_shape is None:
        to_shape = np.lib.stride_tricks._broadcast_shape(*args_2d)

    # Perform broadcasting
    new_args = []
    for i, arg in enumerate(args_2d):
        if isinstance(keep_raw, (tuple, list)):
            _keep_raw = keep_raw[i]
        else:
            _keep_raw = keep_raw
        bc_arg = np.broadcast_to(arg, to_shape)
        if _keep_raw:
            new_args.append(arg)
            continue
        new_args.append(bc_arg)

    # Force to match requirements
    for i in range(len(new_args)):
        if isinstance(require_kwargs, (tuple, list)):
            _require_kwargs = require_kwargs[i]
        else:
            _require_kwargs = require_kwargs
        new_args[i] = np.require(new_args[i], **_require_kwargs)

    if is_pd:
        # Decide on index and columns
        # NOTE: Important to pass args, not args_2d, to preserve original shape info
        new_index = broadcast_index(args,
                                    to_shape,
                                    index_from=index_from,
                                    axis=0,
                                    **kwargs)
        new_columns = broadcast_index(args,
                                      to_shape,
                                      index_from=columns_from,
                                      axis=1,
                                      **kwargs)
    else:
        new_index, new_columns = None, None

    # Bring arrays to their old types (e.g. array -> pandas)
    for i in range(len(new_args)):
        if isinstance(keep_raw, (tuple, list)):
            _keep_raw = keep_raw[i]
        else:
            _keep_raw = keep_raw
        if _keep_raw:
            continue
        if isinstance(to_pd, (tuple, list)):
            _is_pd = to_pd[i]
        else:
            _is_pd = is_pd
        new_args[i] = wrap_broadcasted(args[i],
                                       new_args[i],
                                       is_pd=_is_pd,
                                       new_index=new_index,
                                       new_columns=new_columns)

    if len(new_args) > 1:
        if return_meta:
            return tuple(new_args), to_shape, new_index, new_columns
        return tuple(new_args)
    if return_meta:
        return new_args[0], to_shape, new_index, new_columns
    return new_args[0]
Exemple #21
0
    def _indexing_func_meta(self,
                            pd_indexing_func,
                            index=None,
                            columns=None,
                            column_only_select=None,
                            group_select=None,
                            group_by=None):
        """Perform indexing on `ArrayWrapper` and also return indexing metadata.

        Takes into account column grouping.

        Set `column_only_select` to True to index the array wrapper as a Series of columns.
        This way, selection of index (axis 0) can be avoided. Set `group_select` to True
        to select groups rather than columns. Takes effect only if grouping is enabled.

        !!! note
            If `column_only_select` is True, make sure to index the array wrapper
            as a Series of columns rather than a DataFrame. For example, the operation
            `.iloc[:, :2]` should become `.iloc[:2]`. Operations are not allowed if the
            object is already a Series and thus has only one column/group."""
        from vectorbt import settings

        if column_only_select is None:
            column_only_select = self.column_only_select
        if column_only_select is None:
            column_only_select = settings.array_wrapper['column_only_select']
        if group_select is None:
            group_select = self.group_select
        if group_select is None:
            group_select = settings.array_wrapper['group_select']
        _self = self.regroup(group_by)
        group_select = group_select and _self.grouper.is_grouped()
        if index is None:
            index = _self.index
        if columns is None:
            if group_select:
                columns = _self.grouper.get_columns()
            else:
                columns = _self.columns
        if group_select:
            # Groups as columns
            i_wrapper = ArrayWrapper(index, columns, _self.get_ndim())
        else:
            # Columns as columns
            i_wrapper = ArrayWrapper(index, columns, _self.ndim)
        n_rows = len(index)
        n_cols = len(columns)

        if column_only_select:
            if i_wrapper.ndim == 1:
                raise IndexingError(
                    "Columns only: Attempting to select a column on a Series")
            col_mapper = i_wrapper.wrap_reduced(np.arange(n_cols),
                                                columns=columns)
            try:
                col_mapper = pd_indexing_func(col_mapper)
            except pd.core.indexing.IndexingError as e:
                warnings.warn(
                    "Columns only: Make sure to treat this object "
                    "as a Series of columns rather than a DataFrame",
                    stacklevel=2)
                raise e
            if checks.is_series(col_mapper):
                new_columns = col_mapper.index
                col_idxs = col_mapper.values
                new_ndim = 2
            else:
                new_columns = columns[[col_mapper]]
                col_idxs = col_mapper
                new_ndim = 1
            new_index = index
            idx_idxs = np.arange(len(index))
        else:
            idx_mapper = i_wrapper.wrap(np.broadcast_to(
                np.arange(n_rows)[:, None], (n_rows, n_cols)),
                                        index=index,
                                        columns=columns)
            idx_mapper = pd_indexing_func(idx_mapper)
            if i_wrapper.ndim == 1:
                if not checks.is_series(idx_mapper):
                    raise IndexingError("Selection of a scalar is not allowed")
                idx_idxs = idx_mapper.values
                col_idxs = 0
            else:
                col_mapper = i_wrapper.wrap(np.broadcast_to(
                    np.arange(n_cols), (n_rows, n_cols)),
                                            index=index,
                                            columns=columns)
                col_mapper = pd_indexing_func(col_mapper)
                if checks.is_frame(idx_mapper):
                    idx_idxs = idx_mapper.values[:, 0]
                    col_idxs = col_mapper.values[0]
                elif checks.is_series(idx_mapper):
                    one_col = np.all(
                        col_mapper.values == col_mapper.values.item(0))
                    one_idx = np.all(
                        idx_mapper.values == idx_mapper.values.item(0))
                    if one_col and one_idx:
                        # One index and one column selected, multiple times
                        raise IndexingError(
                            "Must select at least two unique indices in one of both axes"
                        )
                    elif one_col:
                        # One column selected
                        idx_idxs = idx_mapper.values
                        col_idxs = col_mapper.values[0]
                    elif one_idx:
                        # One index selected
                        idx_idxs = idx_mapper.values[0]
                        col_idxs = col_mapper.values
                    else:
                        raise IndexingError
                else:
                    raise IndexingError("Selection of a scalar is not allowed")
            new_index = index_fns.get_index(idx_mapper, 0)
            if not isinstance(idx_idxs, np.ndarray):
                # One index selected
                new_columns = index[[idx_idxs]]
            elif not isinstance(col_idxs, np.ndarray):
                # One column selected
                new_columns = columns[[col_idxs]]
            else:
                new_columns = index_fns.get_index(idx_mapper, 1)
            new_ndim = idx_mapper.ndim

        if _self.grouper.is_grouped():
            # Grouping enabled
            if np.asarray(idx_idxs).ndim == 0:
                raise IndexingError(
                    "Flipping index and columns is not allowed")

            if group_select:
                # Selection based on groups
                # Get indices of columns corresponding to selected groups
                group_idxs = col_idxs
                group_idxs_arr = reshape_fns.to_1d(group_idxs)
                group_start_idxs = _self.grouper.get_group_start_idxs(
                )[group_idxs_arr]
                group_end_idxs = _self.grouper.get_group_end_idxs(
                )[group_idxs_arr]
                ungrouped_col_idxs = get_ranges_arr(group_start_idxs,
                                                    group_end_idxs)
                ungrouped_columns = _self.columns[ungrouped_col_idxs]
                if new_ndim == 1 and len(ungrouped_columns) == 1:
                    ungrouped_ndim = 1
                    ungrouped_col_idxs = ungrouped_col_idxs[0]
                else:
                    ungrouped_ndim = 2

                # Get indices of selected groups corresponding to the new columns
                # We could do _self.group_by[ungrouped_col_idxs] but indexing operation may have changed the labels
                group_lens = _self.grouper.get_group_lens()[group_idxs_arr]
                ungrouped_group_idxs = np.full(len(ungrouped_columns), 0)
                ungrouped_group_idxs[group_lens[:-1]] = 1
                ungrouped_group_idxs = np.cumsum(ungrouped_group_idxs)

                return _self.copy(index=new_index,
                                  columns=ungrouped_columns,
                                  ndim=ungrouped_ndim,
                                  grouped_ndim=new_ndim,
                                  group_by=new_columns[ungrouped_group_idxs]
                                  ), idx_idxs, group_idxs, ungrouped_col_idxs

            # Selection based on columns
            col_idxs_arr = reshape_fns.to_1d(col_idxs)
            return _self.copy(index=new_index,
                              columns=new_columns,
                              ndim=new_ndim,
                              grouped_ndim=None,
                              group_by=_self.grouper.group_by[col_idxs_arr]
                              ), idx_idxs, col_idxs, col_idxs

        # Grouping disabled
        return _self.copy(index=new_index,
                          columns=new_columns,
                          ndim=new_ndim,
                          grouped_ndim=None,
                          group_by=None), idx_idxs, col_idxs, col_idxs
Exemple #22
0
    def wrap_reduced(
            self,
            arr: tp.ArrayLike,
            name_or_index: tp.NameIndex = None,
            columns: tp.Optional[tp.IndexLike] = None,
            fillna: tp.Optional[tp.Scalar] = None,
            dtype: tp.Optional[tp.PandasDTypeLike] = None,
            group_by: tp.GroupByLike = None,
            to_timedelta: bool = False,
            to_index: bool = False,
            silence_warnings: tp.Optional[bool] = None) -> tp.MaybeSeriesFrame:
        """Wrap result of reduction.

        `name_or_index` can be the name of the resulting series if reducing to a scalar per column,
        or the index of the resulting series/dataframe if reducing to an array per column.
        `columns` can be set to override object's default columns.

        See `ArrayWrapper.wrap` for the pipeline."""
        from vectorbt._settings import settings
        array_wrapper_cfg = settings['array_wrapper']

        if silence_warnings is None:
            silence_warnings = array_wrapper_cfg['silence_warnings']

        checks.assert_not_none(self.ndim)
        _self = self.resolve(group_by=group_by)

        if columns is None:
            columns = _self.columns
        if not isinstance(columns, pd.Index):
            columns = pd.Index(columns)

        if to_index:
            if dtype is None:
                dtype = np.int_
            if fillna is None:
                fillna = -1

        def _wrap_reduced(arr):
            nonlocal name_or_index

            arr = np.asarray(arr)
            if fillna is not None:
                arr[pd.isnull(arr)] = fillna
            if arr.ndim == 0:
                # Scalar per Series/DataFrame
                return pd.Series(arr, dtype=dtype)[0]
            if arr.ndim == 1:
                if _self.ndim == 1:
                    if arr.shape[0] == 1:
                        # Scalar per Series/DataFrame with one column
                        return pd.Series(arr, dtype=dtype)[0]
                    # Array per Series
                    sr_name = columns[0]
                    if sr_name == 0:  # was arr Series before
                        sr_name = None
                    if isinstance(name_or_index, str):
                        name_or_index = None
                    return pd.Series(arr,
                                     index=name_or_index,
                                     name=sr_name,
                                     dtype=dtype)
                # Scalar per column in arr DataFrame
                return pd.Series(arr,
                                 index=columns,
                                 name=name_or_index,
                                 dtype=dtype)
            if arr.ndim == 2:
                if arr.shape[1] == 1 and _self.ndim == 1:
                    arr = reshape_fns.soft_to_ndim(arr, 1)
                    # Array per Series
                    sr_name = columns[0]
                    if sr_name == 0:  # was arr Series before
                        sr_name = None
                    if isinstance(name_or_index, str):
                        name_or_index = None
                    return pd.Series(arr,
                                     index=name_or_index,
                                     name=sr_name,
                                     dtype=dtype)
                # Array per column in DataFrame
                if isinstance(name_or_index, str):
                    name_or_index = None
                return pd.DataFrame(arr,
                                    index=name_or_index,
                                    columns=columns,
                                    dtype=dtype)
            raise ValueError(f"{arr.ndim}-d input is not supported")

        out = _wrap_reduced(arr)
        if to_index:
            # Convert to index
            if checks.is_series(out):
                out = out.map(lambda x: self.index[x] if x != -1 else np.nan)
            elif checks.is_frame(out):
                out = out.applymap(lambda x: self.index[x]
                                   if x != -1 else np.nan)
            else:
                out = self.index[out] if out != -1 else np.nan
        if to_timedelta:
            # Convert to timedelta
            out = self.to_timedelta(out, silence_warnings=silence_warnings)
        return out
Exemple #23
0
 def test_is_frame(self):
     assert not checks.is_frame(0)
     assert not checks.is_frame(np.array([0]))
     assert not checks.is_frame(pd.Series([1, 2, 3]))
     assert checks.is_frame(pd.DataFrame([1, 2, 3]))