Esempio n. 1
0
    def _col_idxs_meta(self, col_idxs: tp.Array1d) -> tp.Tuple[tp.Array1d, tp.Array1d]:
        """Get metadata of column indices.

        Returns element indices and new column array.
        Automatically decides whether to use column range or column map."""
        if self.is_sorted():
            new_indices, new_col_arr = nb.col_range_select_nb(self.col_range, to_1d_array(col_idxs))  # faster
        else:
            new_indices, new_col_arr = nb.col_map_select_nb(self.col_map, to_1d_array(col_idxs))
        return new_indices, new_col_arr
Esempio n. 2
0
    def get_by_col_idxs(self, col_idxs: tp.Array1d) -> tp.RecordArray:
        """Get records corresponding to column indices.

        Returns new records array."""
        if self.col_mapper.is_sorted():
            new_records_arr = nb.record_col_range_select_nb(
                self.values, self.col_mapper.col_range, to_1d_array(col_idxs))  # faster
        else:
            new_records_arr = nb.record_col_map_select_nb(
                self.values, self.col_mapper.col_map, to_1d_array(col_idxs))
        return new_records_arr
Esempio n. 3
0
 def describe(self,
              percentiles: tp.Optional[tp.ArrayLike] = None,
              ddof: int = 1,
              group_by: tp.GroupByLike = None,
              wrap_kwargs: tp.KwargsLike = None,
              **kwargs) -> tp.SeriesFrame:
     """Return statistics by column/group."""
     if percentiles is not None:
         percentiles = to_1d_array(percentiles)
     else:
         percentiles = np.array([0.25, 0.5, 0.75])
     percentiles = percentiles.tolist()
     if 0.5 not in percentiles:
         percentiles.append(0.5)
     percentiles = np.unique(percentiles)
     perc_formatted = pd.io.formats.format.format_percentiles(percentiles)
     index = pd.Index(
         ['count', 'mean', 'std', 'min', *perc_formatted, 'max'])
     wrap_kwargs = merge_dicts(dict(name_or_index=index), wrap_kwargs)
     out = self.reduce(generic_nb.describe_reduce_nb,
                       percentiles,
                       ddof,
                       returns_array=True,
                       returns_idx=False,
                       group_by=group_by,
                       wrap_kwargs=wrap_kwargs,
                       **kwargs)
     if isinstance(out, pd.DataFrame):
         out.loc['count'].fillna(0., inplace=True)
     else:
         if np.isnan(out.loc['count']):
             out.loc['count'] = 0.
     return out
Esempio n. 4
0
    def to_1d_array(self) -> tp.Array1d:
        """Convert to 1-dim NumPy array

        See `vectorbt.base.reshape_fns.to_1d`."""
        return reshape_fns.to_1d_array(self.obj)
Esempio n. 5
0
    def indexing_func_meta(self: ArrayWrapperT,
                           pd_indexing_func: tp.PandasIndexingFunc,
                           index: tp.Optional[tp.IndexLike] = None,
                           columns: tp.Optional[tp.IndexLike] = None,
                           column_only_select: tp.Optional[bool] = None,
                           group_select: tp.Optional[bool] = None,
                           group_by: tp.GroupByLike = None) -> IndexingMetaT:
        """Perform indexing on `ArrayWrapper` and also return indexing metadata.

        Takes into account column grouping.

        Set `column_only_select` to True to index the array wrapper as a Series of columns.
        This way, selection of index (axis 0) can be avoided. Set `group_select` to True
        to select groups rather than columns. Takes effect only if grouping is enabled.

        !!! note
            If `column_only_select` is True, make sure to index the array wrapper
            as a Series of columns rather than a DataFrame. For example, the operation
            `.iloc[:, :2]` should become `.iloc[:2]`. Operations are not allowed if the
            object is already a Series and thus has only one column/group."""
        from vectorbt._settings import settings
        array_wrapper_cfg = settings['array_wrapper']

        if column_only_select is None:
            column_only_select = self.column_only_select
        if column_only_select is None:
            column_only_select = array_wrapper_cfg['column_only_select']
        if group_select is None:
            group_select = self.group_select
        if group_select is None:
            group_select = array_wrapper_cfg['group_select']
        _self = self.regroup(group_by)
        group_select = group_select and _self.grouper.is_grouped()
        if index is None:
            index = _self.index
        if not isinstance(index, pd.Index):
            index = pd.Index(index)
        if columns is None:
            if group_select:
                columns = _self.grouper.get_columns()
            else:
                columns = _self.columns
        if not isinstance(columns, pd.Index):
            columns = pd.Index(columns)
        if group_select:
            # Groups as columns
            i_wrapper = ArrayWrapper(index, columns, _self.get_ndim())
        else:
            # Columns as columns
            i_wrapper = ArrayWrapper(index, columns, _self.ndim)
        n_rows = len(index)
        n_cols = len(columns)

        if column_only_select:
            if i_wrapper.ndim == 1:
                raise IndexingError(
                    "Columns only: This object already contains one column of data"
                )
            try:
                col_mapper = pd_indexing_func(
                    i_wrapper.wrap_reduced(np.arange(n_cols), columns=columns))
            except pd.core.indexing.IndexingError as e:
                warnings.warn(
                    "Columns only: Make sure to treat this object "
                    "as a Series of columns rather than a DataFrame",
                    stacklevel=2)
                raise e
            if checks.is_series(col_mapper):
                new_columns = col_mapper.index
                col_idxs = col_mapper.values
                new_ndim = 2
            else:
                new_columns = columns[[col_mapper]]
                col_idxs = col_mapper
                new_ndim = 1
            new_index = index
            idx_idxs = np.arange(len(index))
        else:
            idx_mapper = pd_indexing_func(
                i_wrapper.wrap(np.broadcast_to(
                    np.arange(n_rows)[:, None], (n_rows, n_cols)),
                               index=index,
                               columns=columns))
            if i_wrapper.ndim == 1:
                if not checks.is_series(idx_mapper):
                    raise IndexingError("Selection of a scalar is not allowed")
                idx_idxs = idx_mapper.values
                col_idxs = 0
            else:
                col_mapper = pd_indexing_func(
                    i_wrapper.wrap(np.broadcast_to(np.arange(n_cols),
                                                   (n_rows, n_cols)),
                                   index=index,
                                   columns=columns))
                if checks.is_frame(idx_mapper):
                    idx_idxs = idx_mapper.values[:, 0]
                    col_idxs = col_mapper.values[0]
                elif checks.is_series(idx_mapper):
                    one_col = np.all(
                        col_mapper.values == col_mapper.values.item(0))
                    one_idx = np.all(
                        idx_mapper.values == idx_mapper.values.item(0))
                    if one_col and one_idx:
                        # One index and one column selected, multiple times
                        raise IndexingError(
                            "Must select at least two unique indices in one of both axes"
                        )
                    elif one_col:
                        # One column selected
                        idx_idxs = idx_mapper.values
                        col_idxs = col_mapper.values[0]
                    elif one_idx:
                        # One index selected
                        idx_idxs = idx_mapper.values[0]
                        col_idxs = col_mapper.values
                    else:
                        raise IndexingError
                else:
                    raise IndexingError("Selection of a scalar is not allowed")
            new_index = index_fns.get_index(idx_mapper, 0)
            if not isinstance(idx_idxs, np.ndarray):
                # One index selected
                new_columns = index[[idx_idxs]]
            elif not isinstance(col_idxs, np.ndarray):
                # One column selected
                new_columns = columns[[col_idxs]]
            else:
                new_columns = index_fns.get_index(idx_mapper, 1)
            new_ndim = idx_mapper.ndim

        if _self.grouper.is_grouped():
            # Grouping enabled
            if np.asarray(idx_idxs).ndim == 0:
                raise IndexingError(
                    "Flipping index and columns is not allowed")

            if group_select:
                # Selection based on groups
                # Get indices of columns corresponding to selected groups
                group_idxs = col_idxs
                group_idxs_arr = reshape_fns.to_1d_array(group_idxs)
                group_start_idxs = _self.grouper.get_group_start_idxs(
                )[group_idxs_arr]
                group_end_idxs = _self.grouper.get_group_end_idxs(
                )[group_idxs_arr]
                ungrouped_col_idxs = get_ranges_arr(group_start_idxs,
                                                    group_end_idxs)
                ungrouped_columns = _self.columns[ungrouped_col_idxs]
                if new_ndim == 1 and len(ungrouped_columns) == 1:
                    ungrouped_ndim = 1
                    ungrouped_col_idxs = ungrouped_col_idxs[0]
                else:
                    ungrouped_ndim = 2

                # Get indices of selected groups corresponding to the new columns
                # We could do _self.group_by[ungrouped_col_idxs] but indexing operation may have changed the labels
                group_lens = _self.grouper.get_group_lens()[group_idxs_arr]
                ungrouped_group_idxs = np.full(len(ungrouped_columns), 0)
                ungrouped_group_idxs[group_lens[:-1]] = 1
                ungrouped_group_idxs = np.cumsum(ungrouped_group_idxs)

                return _self.replace(
                    index=new_index,
                    columns=ungrouped_columns,
                    ndim=ungrouped_ndim,
                    grouped_ndim=new_ndim,
                    group_by=new_columns[ungrouped_group_idxs]
                ), idx_idxs, group_idxs, ungrouped_col_idxs

            # Selection based on columns
            col_idxs_arr = reshape_fns.to_1d_array(col_idxs)
            return _self.replace(index=new_index,
                                 columns=new_columns,
                                 ndim=new_ndim,
                                 grouped_ndim=None,
                                 group_by=_self.grouper.group_by[col_idxs_arr]
                                 ), idx_idxs, col_idxs, col_idxs

        # Grouping disabled
        return _self.replace(index=new_index,
                             columns=new_columns,
                             ndim=new_ndim,
                             grouped_ndim=None,
                             group_by=None), idx_idxs, col_idxs, col_idxs