Python to_any_array Exemples, vectorbt.base.reshape_fns.to_any_array Python Exemples

Exemple #1

0

Afficher le fichier

    def split(self,
              X: tp.ArrayLike,
              n: tp.Optional[int] = None,
              window_len: tp.Optional[float] = None,
              min_len: int = 1,
              **kwargs) -> RangesT:
        """Split by rolling a window.

        `**kwargs` are passed to `split_ranges_into_sets`."""
        X = to_any_array(X)
        if isinstance(X, (pd.Series, pd.DataFrame)):
            index = X.index
        else:
            index = pd.Index(np.arange(X.shape[0]))

        # Resolve start_idxs and end_idxs
        if window_len is None and n is None:
            raise ValueError("At least n or window_len must be set")
        if window_len is None:
            window_len = len(index) // n
        if 0 < window_len < 1:
            window_len = math.floor(window_len * len(index))
        start_idxs = np.arange(len(index) - window_len + 1)
        end_idxs = np.arange(window_len - 1, len(index))

        # Filter out short ranges
        window_lens = end_idxs - start_idxs + 1
        min_len_mask = window_lens >= min_len
        if not np.any(min_len_mask):
            raise ValueError(
                f"There are no ranges that meet window_len>={min_len}")
        start_idxs = start_idxs[min_len_mask]
        end_idxs = end_idxs[min_len_mask]

        # Evenly select n ranges
        if n is not None:
            if n > len(start_idxs):
                raise ValueError(
                    f"n cannot be bigger than the maximum number of windows {len(start_idxs)}"
                )
            idxs = np.round(np.linspace(0, len(start_idxs) - 1, n)).astype(int)
            start_idxs = start_idxs[idxs]
            end_idxs = end_idxs[idxs]

        return split_ranges_into_sets(start_idxs, end_idxs, **kwargs)

Exemple #2

0

Afficher le fichier

    def split(self,
              X: tp.ArrayLike,
              n: tp.Optional[int] = None,
              min_len: int = 1,
              **kwargs) -> RangesT:
        """Similar to `RollingSplitter.split`, but expanding.

        `**kwargs` are passed to `split_ranges_into_sets`."""
        X = to_any_array(X)
        if isinstance(X, (pd.Series, pd.DataFrame)):
            index = X.index
        else:
            index = pd.Index(np.arange(X.shape[0]))

        # Resolve start_idxs and end_idxs
        start_idxs = np.full(len(index), 0)
        end_idxs = np.arange(len(index))

        # Filter out short ranges
        window_lens = end_idxs - start_idxs + 1
        min_len_mask = window_lens >= min_len
        if not np.any(min_len_mask):
            raise ValueError(
                f"There are no ranges that meet window_len>={min_len}")
        start_idxs = start_idxs[min_len_mask]
        end_idxs = end_idxs[min_len_mask]

        # Evenly select n ranges
        if n is not None:
            if n > len(start_idxs):
                raise ValueError(
                    f"n cannot be bigger than the maximum number of windows {len(start_idxs)}"
                )
            idxs = np.round(np.linspace(0, len(start_idxs) - 1, n)).astype(int)
            start_idxs = start_idxs[idxs]
            end_idxs = end_idxs[idxs]

        return split_ranges_into_sets(start_idxs, end_idxs, **kwargs)

Exemple #3

0

Afficher le fichier

    def split(self,
              X: tp.ArrayLike,
              n: tp.Optional[int] = None,
              range_len: tp.Optional[float] = None,
              min_len: int = 1,
              start_idxs: tp.Optional[tp.ArrayLike] = None,
              end_idxs: tp.Optional[tp.ArrayLike] = None,
              **kwargs) -> RangesT:
        """Either split into `n` ranges each `range_len` long, or split into ranges between
        `start_idxs` and `end_idxs`, and concatenate along the column axis.

        At least one of `range_len`, `n`, or `start_idxs` and `end_idxs` must be set:

        * If `range_len` is None, are split evenly into `n` ranges.
        * If `n` is None, returns the maximum number of ranges of length `range_len` (can be a percentage).
        * If `start_idxs` and `end_idxs`, splits into ranges between both arrays.
        Both index arrays should be either NumPy arrays with absolute positions or
        pandas indexes with labels. The last index should be inclusive. The distance
        between each start and end index can be different, and smaller ranges are filled with NaNs.

        `range_len` can be a floating number between 0 and 1 to indicate a fraction of the total range.

        `**kwargs` are passed to `split_ranges_into_sets`."""
        X = to_any_array(X)
        if isinstance(X, (pd.Series, pd.DataFrame)):
            index = X.index
        else:
            index = pd.Index(np.arange(X.shape[0]))

        # Resolve start_idxs and end_idxs
        if start_idxs is None and end_idxs is None:
            if range_len is None and n is None:
                raise ValueError(
                    "At least n, range_len, or start_idxs and end_idxs must be set"
                )
            if range_len is None:
                range_len = len(index) // n
            if 0 < range_len < 1:
                range_len = math.floor(range_len * len(index))
            start_idxs = np.arange(len(index) - range_len + 1)
            end_idxs = np.arange(range_len - 1, len(index))
        elif start_idxs is None or end_idxs is None:
            raise ValueError("Both start_idxs and end_idxs must be set")
        else:
            if isinstance(start_idxs, pd.Index):
                start_idxs = np.asarray(
                    [find_first_occurrence(idx, index) for idx in start_idxs])
            else:
                start_idxs = np.asarray(start_idxs)
            if isinstance(end_idxs, pd.Index):
                end_idxs = np.asarray(
                    [find_first_occurrence(idx, index) for idx in end_idxs])
            else:
                end_idxs = np.asarray(end_idxs)

        # Filter out short ranges
        start_idxs, end_idxs = np.broadcast_arrays(start_idxs, end_idxs)
        range_lens = end_idxs - start_idxs + 1
        min_len_mask = range_lens >= min_len
        if not np.any(min_len_mask):
            raise ValueError(
                f"There are no ranges that meet range_len>={min_len}")
        start_idxs = start_idxs[min_len_mask]
        end_idxs = end_idxs[min_len_mask]

        # Evenly select n ranges
        if n is not None:
            if n > len(start_idxs):
                raise ValueError(
                    f"n cannot be bigger than the maximum number of ranges {len(start_idxs)}"
                )
            idxs = np.round(np.linspace(0, len(start_idxs) - 1, n)).astype(int)
            start_idxs = start_idxs[idxs]
            end_idxs = end_idxs[idxs]

        return split_ranges_into_sets(start_idxs, end_idxs, **kwargs)