Esempio n. 1
0
    def groupby_apply(self, by, apply_func_nb, *args, on_matrix=False, **kwargs):
        """See `vectorbt.generic.nb.groupby_apply_nb` and
        `vectorbt.generic.nb.groupby_apply_matrix_nb` for `on_matrix=True`.

        For `by`, see `pd.DataFrame.groupby`.

        Example:
            ```python-repl
            >>> mean_nb = njit(lambda col, i, a: np.nanmean(a))
            >>> df.vbt.groupby_apply([1, 1, 2, 2, 3], mean_nb)
                 a    b    c
            1  1.5  4.5  1.5
            2  3.5  2.5  2.5
            3  5.0  1.0  1.0
            >>> mean_matrix_nb = njit(lambda i, a: np.nanmean(a))
            >>> df.vbt.groupby_apply([1, 1, 2, 2, 3], mean_matrix_nb, on_matrix=True)
                      a         b         c
            1  2.500000  2.500000  2.500000
            2  2.833333  2.833333  2.833333
            3  2.333333  2.333333  2.333333
            ```"""
        checks.assert_numba_func(apply_func_nb)

        regrouped = self._obj.groupby(by, axis=0, **kwargs)
        groups = Dict()
        for i, (k, v) in enumerate(regrouped.indices.items()):
            groups[i] = np.asarray(v)
        if on_matrix:
            result = nb.groupby_apply_matrix_nb(self.to_2d_array(), groups, apply_func_nb, *args)
        else:
            result = nb.groupby_apply_nb(self.to_2d_array(), groups, apply_func_nb, *args)
        return self.wrap_reduced(result, index=list(regrouped.indices.keys()))
Esempio n. 2
0
    def resample_apply(self,
                       freq,
                       apply_func_nb,
                       *args,
                       on_matrix=False,
                       **kwargs):
        """See `vectorbt.generic.nb.groupby_apply_nb` and
        `vectorbt.generic.nb.groupby_apply_matrix_nb` for `on_matrix=True`.

        For `freq`, see `pd.DataFrame.resample`.

        Example:
            ```python-repl
            >>> mean_nb = njit(lambda col, i, a: np.nanmean(a))
            >>> print(df.vbt.resample_apply('2d', mean_nb))
                          a    b    c
            2020-01-01  1.5  4.5  1.5
            2020-01-03  3.5  2.5  2.5
            2020-01-05  5.0  1.0  1.0
            >>> mean_matrix_nb = njit(lambda i, a: np.nanmean(a))
            >>> print(df.vbt.resample_apply('2d', mean_matrix_nb, on_matrix=True))
                               a         b         c
            2020-01-01  2.500000  2.500000  2.500000
            2020-01-03  2.833333  2.833333  2.833333
            2020-01-05  2.333333  2.333333  2.333333
            ```"""
        checks.assert_numba_func(apply_func_nb)

        resampled = self._obj.resample(freq, axis=0, **kwargs)
        groups = Dict()
        for i, (k, v) in enumerate(resampled.indices.items()):
            groups[i] = np.asarray(v)
        if on_matrix:
            result = nb.groupby_apply_matrix_nb(self.to_2d_array(), groups,
                                                apply_func_nb, *args)
        else:
            result = nb.groupby_apply_nb(self.to_2d_array(), groups,
                                         apply_func_nb, *args)
        result_obj = self.wrap(result, index=list(resampled.indices.keys()))
        resampled_arr = np.full(
            (resampled.ngroups, self.to_2d_array().shape[1]), np.nan)
        resampled_obj = self.wrap(resampled_arr,
                                  index=pd.Index(list(resampled.groups.keys()),
                                                 freq=freq))
        resampled_obj.loc[result_obj.index] = result_obj.values
        return resampled_obj