def __init__(self, wrapper: ArrayWrapper, data: tp.Data, tz_localize: tp.Optional[tp.TimezoneLike], tz_convert: tp.Optional[tp.TimezoneLike], missing_index: str, missing_columns: str, download_kwargs: dict, **kwargs) -> None: Wrapping.__init__( self, wrapper, data=data, tz_localize=tz_localize, tz_convert=tz_convert, missing_index=missing_index, missing_columns=missing_columns, download_kwargs=download_kwargs, **kwargs ) StatsBuilderMixin.__init__(self) PlotsBuilderMixin.__init__(self) checks.assert_instance_of(data, dict) for k, v in data.items(): checks.assert_meta_equal(v, data[list(data.keys())[0]]) self._data = data self._tz_localize = tz_localize self._tz_convert = tz_convert self._missing_index = missing_index self._missing_columns = missing_columns self._download_kwargs = download_kwargs
def __init__(self, wrapper, data, tz_localize=None, tz_convert=None, missing_index=None, missing_columns=None, download_kwargs=None, **kwargs): Wrapping.__init__(self, wrapper, data=data, tz_localize=tz_localize, tz_convert=tz_convert, missing_index=missing_index, missing_columns=missing_columns, download_kwargs=download_kwargs, **kwargs) checks.assert_type(data, dict) for k, v in data.items(): checks.assert_meta_equal(v, data[list(data.keys())[0]]) self._data = data self._tz_localize = tz_localize self._tz_convert = tz_convert self._missing_index = missing_index self._missing_columns = missing_columns self._download_kwargs = download_kwargs
def combine(self, other: tp.MaybeTupleList[tp.Union[tp.ArrayLike, "BaseAccessor"]], *args, allow_multiple: bool = True, combine_func: tp.Optional[tp.Callable] = None, keep_pd: bool = False, to_2d: bool = False, concat: bool = False, numba_loop: bool = False, use_ray: bool = False, broadcast: bool = True, broadcast_kwargs: tp.KwargsLike = None, keys: tp.Optional[tp.IndexLike] = None, wrap_kwargs: tp.KwargsLike = None, **kwargs) -> tp.SeriesFrame: """Combine with `other` using `combine_func`. Args: other (array_like): Object to combine this array with. *args: Variable arguments passed to `combine_func`. allow_multiple (bool): Whether a tuple/list will be considered as multiple objects in `other`. combine_func (callable): Function to combine two arrays. Can be Numba-compiled. keep_pd (bool): Whether to keep inputs as pandas objects, otherwise convert to NumPy arrays. to_2d (bool): Whether to reshape inputs to 2-dim arrays, otherwise keep as-is. concat (bool): Whether to concatenate the results along the column axis. Otherwise, pairwise combine into a Series/DataFrame of the same shape. If True, see `vectorbt.base.combine_fns.combine_and_concat`. If False, see `vectorbt.base.combine_fns.combine_multiple`. numba_loop (bool): Whether to loop using Numba. Set to True when iterating large number of times over small input, but note that Numba doesn't support variable keyword arguments. use_ray (bool): Whether to use Ray to execute `combine_func` in parallel. Only works with `numba_loop` set to False and `concat` is set to True. See `vectorbt.base.combine_fns.ray_apply` for related keyword arguments. broadcast (bool): Whether to broadcast all inputs. broadcast_kwargs (dict): Keyword arguments passed to `vectorbt.base.reshape_fns.broadcast`. keys (index_like): Outermost column level. wrap_kwargs (dict): Keyword arguments passed to `vectorbt.base.array_wrapper.ArrayWrapper.wrap`. **kwargs: Keyword arguments passed to `combine_func`. !!! note If `combine_func` is Numba-compiled, will broadcast using `WRITEABLE` and `C_CONTIGUOUS` flags, which can lead to an expensive computation overhead if passed objects are large and have different shape/memory order. You also must ensure that all objects have the same data type. Also remember to bring each in `*args` to a Numba-compatible format. ## Example ```python-repl >>> import vectorbt as vbt >>> import pandas as pd >>> sr = pd.Series([1, 2], index=['x', 'y']) >>> df = pd.DataFrame([[3, 4], [5, 6]], index=['x', 'y'], columns=['a', 'b']) >>> sr.vbt.combine(df, combine_func=lambda x, y: x + y) a b x 4 5 y 7 8 >>> sr.vbt.combine([df, df*2], combine_func=lambda x, y: x + y) a b x 10 13 y 17 20 >>> sr.vbt.combine([df, df*2], combine_func=lambda x, y: x + y, concat=True, keys=['c', 'd']) c d a b a b x 4 5 7 9 y 7 8 12 14 ``` Use Ray for small inputs and large processing times: ```python-repl >>> def combine_func(a, b): ... time.sleep(1) ... return a + b >>> sr = pd.Series([1, 2, 3]) >>> %timeit sr.vbt.combine([1, 1, 1], combine_func=combine_func) 3.01 s ± 2.98 ms per loop (mean ± std. dev. of 7 runs, 1 loop each) >>> %timeit sr.vbt.combine([1, 1, 1], combine_func=combine_func, concat=True, use_ray=True) 1.02 s ± 2.32 ms per loop (mean ± std. dev. of 7 runs, 1 loop each) ``` """ if not allow_multiple or not isinstance(other, (tuple, list)): others = (other, ) else: others = other others = tuple( map(lambda x: x.obj if isinstance(x, BaseAccessor) else x, others)) checks.assert_not_none(combine_func) # Broadcast arguments if broadcast: if broadcast_kwargs is None: broadcast_kwargs = {} if checks.is_numba_func(combine_func): # Numba requires writeable arrays # Plus all of our arrays must be in the same order broadcast_kwargs = merge_dicts( dict(require_kwargs=dict(requirements=['W', 'C'])), broadcast_kwargs) new_obj, *new_others = reshape_fns.broadcast( self.obj, *others, **broadcast_kwargs) else: new_obj, new_others = self.obj, others if not checks.is_pandas(new_obj): new_obj = ArrayWrapper.from_shape(new_obj.shape).wrap(new_obj) # Optionally cast to 2d array if to_2d: inputs = tuple( map(lambda x: reshape_fns.to_2d(x, raw=not keep_pd), (new_obj, *new_others))) else: if not keep_pd: inputs = tuple( map(lambda x: np.asarray(x), (new_obj, *new_others))) else: inputs = new_obj, *new_others if len(inputs) == 2: result = combine_func(inputs[0], inputs[1], *args, **kwargs) return ArrayWrapper.from_obj(new_obj).wrap( result, **merge_dicts({}, wrap_kwargs)) if concat: # Concat the results horizontally if checks.is_numba_func(combine_func) and numba_loop: if use_ray: raise ValueError("Ray cannot be used within Numba") for i in range(1, len(inputs)): checks.assert_meta_equal(inputs[i - 1], inputs[i]) result = combine_fns.combine_and_concat_nb( inputs[0], inputs[1:], combine_func, *args, **kwargs) else: if use_ray: result = combine_fns.combine_and_concat_ray( inputs[0], inputs[1:], combine_func, *args, **kwargs) else: result = combine_fns.combine_and_concat( inputs[0], inputs[1:], combine_func, *args, **kwargs) columns = ArrayWrapper.from_obj(new_obj).columns if keys is not None: new_columns = index_fns.combine_indexes([keys, columns]) else: top_columns = pd.Index(np.arange(len(new_others)), name='combine_idx') new_columns = index_fns.combine_indexes([top_columns, columns]) return ArrayWrapper.from_obj(new_obj).wrap( result, **merge_dicts(dict(columns=new_columns), wrap_kwargs)) else: # Combine arguments pairwise into one object if use_ray: raise ValueError("Ray cannot be used with concat=False") if checks.is_numba_func(combine_func) and numba_loop: for i in range(1, len(inputs)): checks.assert_dtype_equal(inputs[i - 1], inputs[i]) result = combine_fns.combine_multiple_nb( inputs, combine_func, *args, **kwargs) else: result = combine_fns.combine_multiple(inputs, combine_func, *args, **kwargs) return ArrayWrapper.from_obj(new_obj).wrap( result, **merge_dicts({}, wrap_kwargs))
def combine_with_multiple(self, others, *args, combine_func=None, to_2d=False, concat=False, broadcast_kwargs={}, keys=None, **kwargs): """Combine with `others` using `combine_func`. All arguments will be broadcast using `vectorbt.base.reshape_fns.broadcast` with `broadcast_kwargs`. If `concat` is True, concatenate the results along columns, see `vectorbt.base.combine_fns.combine_and_concat`. Otherwise, pairwise combine into a Series/DataFrame of the same shape, see `vectorbt.base.combine_fns.combine_multiple`. Arguments `*args` and `**kwargs` will be directly passed to `combine_func`. If `to_2d` is True, 2-dimensional NumPy arrays will be passed, otherwise as is. Use `keys` as the outermost level. !!! note If `combine_func` is Numba-compiled, will broadcast using `WRITEABLE` and `C_CONTIGUOUS` flags, which can lead to an expensive computation overhead if passed objects are large and have different shape/memory order. You also must ensure that all objects have the same data type. Also remember to bring each in `*args` to a Numba-compatible format. ## Example ```python-repl >>> import vectorbt as vbt >>> import pandas as pd >>> sr = pd.Series([1, 2], index=['x', 'y']) >>> df = pd.DataFrame([[3, 4], [5, 6]], index=['x', 'y'], columns=['a', 'b']) >>> sr.vbt.combine_with_multiple([df, df*2], ... combine_func=lambda x, y: x + y) a b x 10 13 y 17 20 >>> sr.vbt.combine_with_multiple([df, df*2], ... combine_func=lambda x, y: x + y, concat=True, keys=['c', 'd']) c d a b a b x 4 5 7 9 y 7 8 12 14 ``` """ others = tuple(map(lambda x: x._obj if isinstance(x, Base_Accessor) else x, others)) checks.assert_not_none(combine_func) checks.assert_type(others, Iterable) # Broadcast arguments if checks.is_numba_func(combine_func): # Numba requires writeable arrays # Plus all of our arrays must be in the same order broadcast_kwargs = merge_dicts(dict(require_kwargs=dict(requirements=['W', 'C'])), broadcast_kwargs) new_obj, *new_others = reshape_fns.broadcast(self._obj, *others, **broadcast_kwargs) # Optionally cast to 2d array if to_2d: bc_arrays = tuple(map(lambda x: reshape_fns.to_2d(x, raw=True), (new_obj, *new_others))) else: bc_arrays = tuple(map(lambda x: np.asarray(x), (new_obj, *new_others))) if concat: # Concat the results horizontally if checks.is_numba_func(combine_func): for i in range(1, len(bc_arrays)): checks.assert_meta_equal(bc_arrays[i - 1], bc_arrays[i]) result = combine_fns.combine_and_concat_nb(bc_arrays[0], bc_arrays[1:], combine_func, *args, **kwargs) else: result = combine_fns.combine_and_concat(bc_arrays[0], bc_arrays[1:], combine_func, *args, **kwargs) columns = new_obj.vbt.wrapper.columns if keys is not None: new_columns = index_fns.combine_indexes(keys, columns) else: top_columns = pd.Index(np.arange(len(new_others)), name='combine_idx') new_columns = index_fns.combine_indexes(top_columns, columns) return new_obj.vbt.wrapper.wrap(result, columns=new_columns) else: # Combine arguments pairwise into one object if checks.is_numba_func(combine_func): for i in range(1, len(bc_arrays)): checks.assert_dtype_equal(bc_arrays[i - 1], bc_arrays[i]) result = combine_fns.combine_multiple_nb(bc_arrays, combine_func, *args, **kwargs) else: result = combine_fns.combine_multiple(bc_arrays, combine_func, *args, **kwargs) return new_obj.vbt.wrapper.wrap(result)
def test_assert_meta_equal(self): index = ['x', 'y', 'z'] columns = ['a', 'b', 'c'] checks.assert_meta_equal(np.array([1, 2, 3]), np.array([1, 2, 3])) checks.assert_meta_equal(pd.Series([1, 2, 3], index=index), pd.Series([1, 2, 3], index=index)) checks.assert_meta_equal(pd.DataFrame([[1, 2, 3]], columns=columns), pd.DataFrame([[1, 2, 3]], columns=columns)) with pytest.raises(Exception) as e_info: checks.assert_meta_equal(pd.Series([1, 2]), pd.DataFrame([1, 2])) with pytest.raises(Exception) as e_info: checks.assert_meta_equal(pd.DataFrame([1, 2]), pd.DataFrame([1, 2, 3])) with pytest.raises(Exception) as e_info: checks.assert_meta_equal(pd.DataFrame([1, 2, 3]), pd.DataFrame([1, 2, 3], index=index)) with pytest.raises(Exception) as e_info: checks.assert_meta_equal(pd.DataFrame([[1, 2, 3]]), pd.DataFrame([[1, 2, 3]], columns=columns))