def align_index(cls, data: tp.Data, missing: str = 'nan') -> tp.Data: """Align data to have the same index. The argument `missing` accepts the following values: * 'nan': set missing data points to NaN * 'drop': remove missing data points * 'raise': raise an error""" if len(data) == 1: return data index = None for k, v in data.items(): if index is None: index = v.index else: if len(index.intersection(v.index)) != len(index.union(v.index)): if missing == 'nan': warnings.warn("Symbols have mismatching index. " "Setting missing data points to NaN.", stacklevel=2) index = index.union(v.index) elif missing == 'drop': warnings.warn("Symbols have mismatching index. " "Dropping missing data points.", stacklevel=2) index = index.intersection(v.index) elif missing == 'raise': raise ValueError("Symbols have mismatching index") else: raise ValueError(f"missing='{missing}' is not recognized") # reindex new_data = {k: v.reindex(index=index) for k, v in data.items()} return new_data
def align_columns(cls, data: tp.Data, missing: str = 'raise') -> tp.Data: """Align data to have the same columns. See `Data.align_index` for `missing`.""" if len(data) == 1: return data columns = None multiple_columns = False name_is_none = False for k, v in data.items(): if isinstance(v, pd.Series): if v.name is None: name_is_none = True v = v.to_frame() else: multiple_columns = True if columns is None: columns = v.columns else: if len(columns.intersection(v.columns)) != len( columns.union(v.columns)): if missing == 'nan': warnings.warn( "Symbols have mismatching columns. " "Setting missing data points to NaN.", stacklevel=2) columns = columns.union(v.columns) elif missing == 'drop': warnings.warn( "Symbols have mismatching columns. " "Dropping missing data points.", stacklevel=2) columns = columns.intersection(v.columns) elif missing == 'raise': raise ValueError("Symbols have mismatching columns") else: raise ValueError( f"missing='{missing}' is not recognized") # reindex new_data = {} for k, v in data.items(): if isinstance(v, pd.Series): v = v.to_frame(name=v.name) v = v.reindex(columns=columns) if not multiple_columns: v = v[columns[0]] if name_is_none: v = v.rename(None) new_data[k] = v return new_data
def __init__(self, wrapper: ArrayWrapper, data: tp.Data, tz_localize: tp.Optional[tp.TimezoneLike], tz_convert: tp.Optional[tp.TimezoneLike], missing_index: str, missing_columns: str, download_kwargs: dict, **kwargs) -> None: Wrapping.__init__( self, wrapper, data=data, tz_localize=tz_localize, tz_convert=tz_convert, missing_index=missing_index, missing_columns=missing_columns, download_kwargs=download_kwargs, **kwargs ) StatsBuilderMixin.__init__(self) PlotsBuilderMixin.__init__(self) checks.assert_instance_of(data, dict) for k, v in data.items(): checks.assert_meta_equal(v, data[list(data.keys())[0]]) self._data = data self._tz_localize = tz_localize self._tz_convert = tz_convert self._missing_index = missing_index self._missing_columns = missing_columns self._download_kwargs = download_kwargs
def from_data(cls: tp.Type[DataT], data: tp.Data, tz_localize: tp.Optional[tp.TimezoneLike] = None, tz_convert: tp.Optional[tp.TimezoneLike] = None, missing_index: tp.Optional[str] = None, missing_columns: tp.Optional[str] = None, wrapper_kwargs: tp.KwargsLike = None, **kwargs) -> DataT: """Create a new `Data` instance from (aligned) data. Args: data (dict): Dictionary of array-like objects keyed by symbol. tz_localize (timezone_like): If the index is tz-naive, convert to a timezone. See `vectorbt.utils.datetime_.to_timezone`. tz_convert (timezone_like): Convert the index from one timezone to another. See `vectorbt.utils.datetime_.to_timezone`. missing_index (str): See `Data.align_index`. missing_columns (str): See `Data.align_columns`. wrapper_kwargs (dict): Keyword arguments passed to `vectorbt.base.array_wrapper.ArrayWrapper`. **kwargs: Keyword arguments passed to the `__init__` method. For defaults, see `data` in `vectorbt._settings.settings`.""" from vectorbt._settings import settings data_cfg = settings['data'] # Get global defaults if tz_localize is None: tz_localize = data_cfg['tz_localize'] if tz_convert is None: tz_convert = data_cfg['tz_convert'] if missing_index is None: missing_index = data_cfg['missing_index'] if missing_columns is None: missing_columns = data_cfg['missing_columns'] if wrapper_kwargs is None: wrapper_kwargs = {} data = data.copy() for k, v in data.items(): # Convert array to pandas if not isinstance(v, (pd.Series, pd.DataFrame)): v = np.asarray(v) if v.ndim == 1: v = pd.Series(v) else: v = pd.DataFrame(v) # Perform operations with datetime-like index if isinstance(v.index, pd.DatetimeIndex): if tz_localize is not None: if not is_tz_aware(v.index): v = v.tz_localize(to_timezone(tz_localize)) if tz_convert is not None: v = v.tz_convert(to_timezone(tz_convert)) v.index.freq = v.index.inferred_freq data[k] = v # Align index and columns data = cls.align_index(data, missing=missing_index) data = cls.align_columns(data, missing=missing_columns) # Create new instance symbols = list(data.keys()) wrapper = ArrayWrapper.from_obj(data[symbols[0]], **wrapper_kwargs) return cls( wrapper, data, tz_localize=tz_localize, tz_convert=tz_convert, missing_index=missing_index, missing_columns=missing_columns, **kwargs )