Esempio n. 1
0
    def align_index(cls, data: tp.Data, missing: str = 'nan') -> tp.Data:
        """Align data to have the same index.

        The argument `missing` accepts the following values:

        * 'nan': set missing data points to NaN
        * 'drop': remove missing data points
        * 'raise': raise an error"""
        if len(data) == 1:
            return data

        index = None
        for k, v in data.items():
            if index is None:
                index = v.index
            else:
                if len(index.intersection(v.index)) != len(index.union(v.index)):
                    if missing == 'nan':
                        warnings.warn("Symbols have mismatching index. "
                                      "Setting missing data points to NaN.", stacklevel=2)
                        index = index.union(v.index)
                    elif missing == 'drop':
                        warnings.warn("Symbols have mismatching index. "
                                      "Dropping missing data points.", stacklevel=2)
                        index = index.intersection(v.index)
                    elif missing == 'raise':
                        raise ValueError("Symbols have mismatching index")
                    else:
                        raise ValueError(f"missing='{missing}' is not recognized")

        # reindex
        new_data = {k: v.reindex(index=index) for k, v in data.items()}
        return new_data
Esempio n. 2
0
    def align_columns(cls, data: tp.Data, missing: str = 'raise') -> tp.Data:
        """Align data to have the same columns.

        See `Data.align_index` for `missing`."""
        if len(data) == 1:
            return data

        columns = None
        multiple_columns = False
        name_is_none = False
        for k, v in data.items():
            if isinstance(v, pd.Series):
                if v.name is None:
                    name_is_none = True
                v = v.to_frame()
            else:
                multiple_columns = True
            if columns is None:
                columns = v.columns
            else:
                if len(columns.intersection(v.columns)) != len(
                        columns.union(v.columns)):
                    if missing == 'nan':
                        warnings.warn(
                            "Symbols have mismatching columns. "
                            "Setting missing data points to NaN.",
                            stacklevel=2)
                        columns = columns.union(v.columns)
                    elif missing == 'drop':
                        warnings.warn(
                            "Symbols have mismatching columns. "
                            "Dropping missing data points.",
                            stacklevel=2)
                        columns = columns.intersection(v.columns)
                    elif missing == 'raise':
                        raise ValueError("Symbols have mismatching columns")
                    else:
                        raise ValueError(
                            f"missing='{missing}' is not recognized")

        # reindex
        new_data = {}
        for k, v in data.items():
            if isinstance(v, pd.Series):
                v = v.to_frame(name=v.name)
            v = v.reindex(columns=columns)
            if not multiple_columns:
                v = v[columns[0]]
                if name_is_none:
                    v = v.rename(None)
            new_data[k] = v
        return new_data
Esempio n. 3
0
    def __init__(self,
                 wrapper: ArrayWrapper,
                 data: tp.Data,
                 tz_localize: tp.Optional[tp.TimezoneLike],
                 tz_convert: tp.Optional[tp.TimezoneLike],
                 missing_index: str,
                 missing_columns: str,
                 download_kwargs: dict,
                 **kwargs) -> None:
        Wrapping.__init__(
            self,
            wrapper,
            data=data,
            tz_localize=tz_localize,
            tz_convert=tz_convert,
            missing_index=missing_index,
            missing_columns=missing_columns,
            download_kwargs=download_kwargs,
            **kwargs
        )
        StatsBuilderMixin.__init__(self)
        PlotsBuilderMixin.__init__(self)

        checks.assert_instance_of(data, dict)
        for k, v in data.items():
            checks.assert_meta_equal(v, data[list(data.keys())[0]])
        self._data = data
        self._tz_localize = tz_localize
        self._tz_convert = tz_convert
        self._missing_index = missing_index
        self._missing_columns = missing_columns
        self._download_kwargs = download_kwargs
Esempio n. 4
0
    def from_data(cls: tp.Type[DataT],
                  data: tp.Data,
                  tz_localize: tp.Optional[tp.TimezoneLike] = None,
                  tz_convert: tp.Optional[tp.TimezoneLike] = None,
                  missing_index: tp.Optional[str] = None,
                  missing_columns: tp.Optional[str] = None,
                  wrapper_kwargs: tp.KwargsLike = None,
                  **kwargs) -> DataT:
        """Create a new `Data` instance from (aligned) data.

        Args:
            data (dict): Dictionary of array-like objects keyed by symbol.
            tz_localize (timezone_like): If the index is tz-naive, convert to a timezone.

                See `vectorbt.utils.datetime_.to_timezone`.
            tz_convert (timezone_like): Convert the index from one timezone to another.

                See `vectorbt.utils.datetime_.to_timezone`.
            missing_index (str): See `Data.align_index`.
            missing_columns (str): See `Data.align_columns`.
            wrapper_kwargs (dict): Keyword arguments passed to `vectorbt.base.array_wrapper.ArrayWrapper`.
            **kwargs: Keyword arguments passed to the `__init__` method.

        For defaults, see `data` in `vectorbt._settings.settings`."""
        from vectorbt._settings import settings
        data_cfg = settings['data']

        # Get global defaults
        if tz_localize is None:
            tz_localize = data_cfg['tz_localize']
        if tz_convert is None:
            tz_convert = data_cfg['tz_convert']
        if missing_index is None:
            missing_index = data_cfg['missing_index']
        if missing_columns is None:
            missing_columns = data_cfg['missing_columns']
        if wrapper_kwargs is None:
            wrapper_kwargs = {}

        data = data.copy()
        for k, v in data.items():
            # Convert array to pandas
            if not isinstance(v, (pd.Series, pd.DataFrame)):
                v = np.asarray(v)
                if v.ndim == 1:
                    v = pd.Series(v)
                else:
                    v = pd.DataFrame(v)

            # Perform operations with datetime-like index
            if isinstance(v.index, pd.DatetimeIndex):
                if tz_localize is not None:
                    if not is_tz_aware(v.index):
                        v = v.tz_localize(to_timezone(tz_localize))
                if tz_convert is not None:
                    v = v.tz_convert(to_timezone(tz_convert))
                v.index.freq = v.index.inferred_freq
            data[k] = v

        # Align index and columns
        data = cls.align_index(data, missing=missing_index)
        data = cls.align_columns(data, missing=missing_columns)

        # Create new instance
        symbols = list(data.keys())
        wrapper = ArrayWrapper.from_obj(data[symbols[0]], **wrapper_kwargs)
        return cls(
            wrapper,
            data,
            tz_localize=tz_localize,
            tz_convert=tz_convert,
            missing_index=missing_index,
            missing_columns=missing_columns,
            **kwargs
        )