Example #1
0
def test_broadcast_shape():
    # broadcast_shape is already exercized indirectly by broadcast_arrays
    assert_raises(ValueError, _broadcast_shape)
    assert_equal(_broadcast_shape([1, 2]), (2,))
    assert_equal(_broadcast_shape(np.ones((1, 1))), (1, 1))
    assert_equal(_broadcast_shape(np.ones((1, 1)), np.ones((3, 4))), (3, 4))
    assert_equal(_broadcast_shape(*([np.ones((1, 2))] * 32)), (1, 2))
    assert_equal(_broadcast_shape(*([np.ones((1, 2))] * 100)), (1, 2))
Example #2
0
def test_broadcast_shape():
    # broadcast_shape is already exercized indirectly by broadcast_arrays
    assert_raises(ValueError, _broadcast_shape)
    assert_equal(_broadcast_shape([1, 2]), (2, ))
    assert_equal(_broadcast_shape(np.ones((1, 1))), (1, 1))
    assert_equal(_broadcast_shape(np.ones((1, 1)), np.ones((3, 4))), (3, 4))
    assert_equal(_broadcast_shape(*([np.ones((1, 2))] * 32)), (1, 2))
    assert_equal(_broadcast_shape(*([np.ones((1, 2))] * 100)), (1, 2))
Example #3
0
def test_broadcast_shape():
    # broadcast_shape is already exercized indirectly by broadcast_arrays
    assert_equal(_broadcast_shape(), ())
    assert_equal(_broadcast_shape([1, 2]), (2, ))
    assert_equal(_broadcast_shape(np.ones((1, 1))), (1, 1))
    assert_equal(_broadcast_shape(np.ones((1, 1)), np.ones((3, 4))), (3, 4))
    assert_equal(_broadcast_shape(*([np.ones((1, 2))] * 32)), (1, 2))
    assert_equal(_broadcast_shape(*([np.ones((1, 2))] * 100)), (1, 2))

    # regression tests for gh-5862
    assert_equal(_broadcast_shape(*([np.ones(2)] * 32 + [1])), (2, ))
    bad_args = [np.ones(2)] * 32 + [np.ones(3)] * 32
    assert_raises(ValueError, lambda: _broadcast_shape(*bad_args))
def test_broadcast_shape():
    # broadcast_shape is already exercized indirectly by broadcast_arrays
    assert_equal(_broadcast_shape(), ())
    assert_equal(_broadcast_shape([1, 2]), (2,))
    assert_equal(_broadcast_shape(np.ones((1, 1))), (1, 1))
    assert_equal(_broadcast_shape(np.ones((1, 1)), np.ones((3, 4))), (3, 4))
    assert_equal(_broadcast_shape(*([np.ones((1, 2))] * 32)), (1, 2))
    assert_equal(_broadcast_shape(*([np.ones((1, 2))] * 100)), (1, 2))

    # regression tests for gh-5862
    assert_equal(_broadcast_shape(*([np.ones(2)] * 32 + [1])), (2,))
    bad_args = [np.ones(2)] * 32 + [np.ones(3)] * 32
    assert_raises(ValueError, lambda: _broadcast_shape(*bad_args))
Example #5
0
 def broadcast_shapes(*shapes):
     return _broadcast_shape(*[np.empty(x, dtype=[]) for x in shapes])
Example #6
0
def broadcast(*args: tp.ArrayLike,
              to_shape: tp.Optional[tp.RelaxedShape] = None,
              to_pd: tp.Optional[tp.MaybeSequence[bool]] = None,
              to_frame: tp.Optional[bool] = None,
              align_index: tp.Optional[bool] = None,
              align_columns: tp.Optional[bool] = None,
              index_from: tp.Optional[IndexFromLike] = None,
              columns_from: tp.Optional[IndexFromLike] = None,
              require_kwargs: tp.KwargsLikeSequence = None,
              keep_raw: tp.Optional[tp.MaybeSequence[bool]] = False,
              return_meta: bool = False,
              **kwargs) -> BCRT:
    """Bring any array-like object in `args` to the same shape by using NumPy broadcasting.

    See [Broadcasting](https://docs.scipy.org/doc/numpy/user/basics.broadcasting.html).

    Can broadcast pandas objects by broadcasting their index/columns with `broadcast_index`.

    Args:
        *args (array_like): Array-like objects.
        to_shape (tuple of int): Target shape. If set, will broadcast every element in `args` to `to_shape`.
        to_pd (bool or list of bool): Whether to convert all output arrays to pandas, otherwise returns
            raw NumPy arrays. If None, converts only if there is at least one pandas object among them.

            If sequence, applies to each argument.
        to_frame (bool): Whether to convert all Series to DataFrames.
        align_index (bool): Whether to align index of pandas objects using multi-index.

            Pass None to use the default.
        align_columns (bool): Whether to align columns of pandas objects using multi-index.

            Pass None to use the default.
        index_from (any): Broadcasting rule for index.

            Pass None to use the default.
        columns_from (any): Broadcasting rule for columns.

            Pass None to use the default.
        require_kwargs (dict or list of dict): Keyword arguments passed to `np.require`.

            If sequence, applies to each argument.
        keep_raw (bool or list of bool): Whether to keep the unbroadcasted version of the array.

            Only makes sure that the array can be broadcast to the target shape.

            If sequence, applies to each argument.
        return_meta (bool): Whether to also return new shape, index and columns.
        **kwargs: Keyword arguments passed to `broadcast_index`.

    For defaults, see `broadcasting` in `vectorbt._settings.settings`.

    Usage:
        * Without broadcasting index and columns:

        ```pycon
        >>> import numpy as np
        >>> import pandas as pd
        >>> from vectorbt.base.reshape_fns import broadcast

        >>> v = 0
        >>> a = np.array([1, 2, 3])
        >>> sr = pd.Series([1, 2, 3], index=pd.Index(['x', 'y', 'z']), name='a')
        >>> df = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]],
        ...     index=pd.Index(['x2', 'y2', 'z2']),
        ...     columns=pd.Index(['a2', 'b2', 'c2']))

        >>> for i in broadcast(
        ...     v, a, sr, df,
        ...     index_from='keep',
        ...     columns_from='keep',
        ... ): print(i)
           0  1  2
        0  0  0  0
        1  0  0  0
        2  0  0  0
           0  1  2
        0  1  2  3
        1  1  2  3
        2  1  2  3
           a  a  a
        x  1  1  1
        y  2  2  2
        z  3  3  3
            a2  b2  c2
        x2   1   2   3
        y2   4   5   6
        z2   7   8   9
        ```

        * Taking new index and columns from position:

        ```pycon
        >>> for i in broadcast(
        ...     v, a, sr, df,
        ...     index_from=2,
        ...     columns_from=3
        ... ): print(i)
           a2  b2  c2
        x   0   0   0
        y   0   0   0
        z   0   0   0
           a2  b2  c2
        x   1   2   3
        y   1   2   3
        z   1   2   3
           a2  b2  c2
        x   1   1   1
        y   2   2   2
        z   3   3   3
           a2  b2  c2
        x   1   2   3
        y   4   5   6
        z   7   8   9
        ```

        * Broadcasting index and columns through stacking:

        ```pycon
        >>> for i in broadcast(
        ...     v, a, sr, df,
        ...     index_from='stack',
        ...     columns_from='stack'
        ... ): print(i)
              a2  b2  c2
        x x2   0   0   0
        y y2   0   0   0
        z z2   0   0   0
              a2  b2  c2
        x x2   1   2   3
        y y2   1   2   3
        z z2   1   2   3
              a2  b2  c2
        x x2   1   1   1
        y y2   2   2   2
        z z2   3   3   3
              a2  b2  c2
        x x2   1   2   3
        y y2   4   5   6
        z z2   7   8   9
        ```

        * Setting index and columns manually:

        ```pycon
        >>> for i in broadcast(
        ...     v, a, sr, df,
        ...     index_from=['a', 'b', 'c'],
        ...     columns_from=['d', 'e', 'f']
        ... ): print(i)
           d  e  f
        a  0  0  0
        b  0  0  0
        c  0  0  0
           d  e  f
        a  1  2  3
        b  1  2  3
        c  1  2  3
           d  e  f
        a  1  1  1
        b  2  2  2
        c  3  3  3
           d  e  f
        a  1  2  3
        b  4  5  6
        c  7  8  9
        ```
    """
    from vectorbt._settings import settings
    broadcasting_cfg = settings['broadcasting']

    is_pd = False
    is_2d = False
    if require_kwargs is None:
        require_kwargs = {}
    if align_index is None:
        align_index = broadcasting_cfg['align_index']
    if align_columns is None:
        align_columns = broadcasting_cfg['align_columns']
    if index_from is None:
        index_from = broadcasting_cfg['index_from']
    if columns_from is None:
        columns_from = broadcasting_cfg['columns_from']

    # Convert to np.ndarray object if not numpy or pandas
    # Also check whether we broadcast to pandas and whether work on 2-dim data
    arr_args = []
    for i in range(len(args)):
        arg = to_any_array(args[i])
        if arg.ndim > 1:
            is_2d = True
        if checks.is_pandas(arg):
            is_pd = True
        arr_args.append(arg)

    # If target shape specified, check again if we work on 2-dim data
    if to_shape is not None:
        if isinstance(to_shape, int):
            to_shape = (to_shape, )
        checks.assert_instance_of(to_shape, tuple)
        if len(to_shape) > 1:
            is_2d = True

    if to_frame is not None:
        # force either keeping Series or converting them to DataFrames
        is_2d = to_frame

    if to_pd is not None:
        # force either raw or pandas
        if isinstance(to_pd, Sequence):
            is_pd = any(to_pd)
        else:
            is_pd = to_pd

    # Align pandas objects
    if align_index:
        index_to_align = []
        for i in range(len(arr_args)):
            if checks.is_pandas(arr_args[i]) and len(arr_args[i].index) > 1:
                index_to_align.append(i)
        if len(index_to_align) > 1:
            indexes = [arr_args[i].index for i in index_to_align]
            if len(set(map(len, indexes))) > 1:
                index_indices = index_fns.align_indexes(indexes)
                for i in index_to_align:
                    arr_args[i] = arr_args[i].iloc[index_indices[
                        index_to_align.index(i)]]
    if align_columns:
        cols_to_align = []
        for i in range(len(arr_args)):
            if checks.is_frame(arr_args[i]) and len(arr_args[i].columns) > 1:
                cols_to_align.append(i)
        if len(cols_to_align) > 1:
            indexes = [arr_args[i].columns for i in cols_to_align]
            if len(set(map(len, indexes))) > 1:
                col_indices = index_fns.align_indexes(indexes)
                for i in cols_to_align:
                    arr_args[i] = arr_args[i].iloc[:,
                                                   col_indices[cols_to_align.
                                                               index(i)]]

    # Convert all pd.Series objects to pd.DataFrame if we work on 2-dim data
    arr_args_2d = [
        arg.to_frame() if is_2d and checks.is_series(arg) else arg
        for arg in arr_args
    ]

    # Get final shape
    if to_shape is None:
        to_shape = _broadcast_shape(*map(np.asarray, arr_args_2d))

    # Perform broadcasting
    new_args = []
    for i, arg in enumerate(arr_args_2d):
        if isinstance(keep_raw, Sequence):
            _keep_raw = keep_raw[i]
        else:
            _keep_raw = keep_raw
        bc_arg = np.broadcast_to(arg, to_shape)
        if _keep_raw:
            new_args.append(arg)
            continue
        new_args.append(bc_arg)

    # Force to match requirements
    for i in range(len(new_args)):
        _require_kwargs = resolve_dict(require_kwargs, i=i)
        new_args[i] = np.require(new_args[i], **_require_kwargs)

    if is_pd:
        # Decide on index and columns
        # NOTE: Important to pass arr_args, not arr_args_2d, to preserve original shape info
        new_index = broadcast_index(arr_args,
                                    to_shape,
                                    index_from=index_from,
                                    axis=0,
                                    **kwargs)
        new_columns = broadcast_index(arr_args,
                                      to_shape,
                                      index_from=columns_from,
                                      axis=1,
                                      **kwargs)
    else:
        new_index, new_columns = None, None

    # Bring arrays to their old types (e.g. array -> pandas)
    for i in range(len(new_args)):
        if isinstance(keep_raw, Sequence):
            _keep_raw = keep_raw[i]
        else:
            _keep_raw = keep_raw
        if _keep_raw:
            continue
        if isinstance(to_pd, Sequence):
            _is_pd = to_pd[i]
        else:
            _is_pd = is_pd
        new_args[i] = wrap_broadcasted(arr_args[i],
                                       new_args[i],
                                       is_pd=_is_pd,
                                       new_index=new_index,
                                       new_columns=new_columns)

    if len(new_args) > 1:
        if return_meta:
            return tuple(new_args), to_shape, new_index, new_columns
        return tuple(new_args)
    if return_meta:
        return new_args[0], to_shape, new_index, new_columns
    return new_args[0]