def read_text(path: str) -> Any: """Reading a singe TimeSeries from to_text() Create a TimeSeries object from a text file Args: path: a String representing the path to the file to read Returns: TimeSeries """ dir_type = check_directory_structure(path) if dir_type == "timeseries": data = "{}/{}.{}".format(path, TIME_SERIES_FILENAME, TIME_SERIES_EXT) series = csv_to_dataframe(data) return TimeSeries(series) elif dir_type == "timeseries with metadata": data = "{}/{}.{}".format(path, TIME_SERIES_FILENAME, TIME_SERIES_EXT) meta = "{}/{}.{}".format(path, METADATA_FILENAME, METADATA_EXT) series = csv_to_dataframe(data) metadata = json_to_metadata(meta) ts = TimeSeries(series, metadata) if METADATA_CLASS_LABEL in ts.metadata: ts.class_label = ts.metadata[METADATA_CLASS_LABEL] return ts elif dir_type is None: raise IOError("The path doesn't' include any recognizable files")
def create(length: int, start: str, end: str, freq: Union[str, 'TimeSeries'] = None) \ -> 'TimeSeriesDataset': """ Create an empty TimeSeriesDataset object with a defined index and period Args: length: int representing the number of TimeSeries to include in the TimeSeriesDataset start: str of the start of the DatetimeIndex (as in Pandas.date_range()) end: the end of the DatetimeIndex (as in Pandas.date_range()) freq: the optional frequency it can be a str or a TimeSeries (to copy its frequency) Returns: TimeSeriesDataset """ # Check length parameter assert length >= 1, 'Length must be >= 1' data = [] ts = TimeSeries.create(start, end, freq) for i in range(length): data.append(ts) return TimeSeriesDataset(data)
def __prepare_time_series_for_prophet(ts: TimeSeries): df = ts.to_df().copy() df["ds"] = df.index df = df.reset_index(drop=True) df = df.rename(columns={"values": "y"}) df.columns = df.columns.astype(str) return df
def minmax(ts: 'TimeSeries') -> 'TimeSeries': r"""Scale a TimeSeries within a [0,1] range (so called min max) .. math:: x_{scaled} = \frac{x - x_{min}}{x_{max} - x_{min}} Args: ts: TimeSeries to scale Returns: TimeSeries """ s = ts.series scaled_series = (s - s.min()) / (s.max() - s.min()) return TimeSeries(scaled_series, ts.metadata)
def predict(self, horizon: Union[str, TimeSeries], freq: str = None) \ -> TimeSeries: """ Predict a TimeSeries given a horizon Args: horizon: str as in https://pandas.pydata.org/pandas-docs/stable/user_guide/timedeltas.html freq: frequency in DateOffset string https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#dateoffset-objects Returns: TimeSeries """ super().predict(horizon) if isinstance(horizon, str): future, index = self.make_future_arrays(horizon) elif isinstance(horizon, TimeSeries): future, y_train = self.__prepare_series_for_sklearn(horizon) index = horizon.series.index forecast = self.model.predict(future) return TimeSeries(Series(data=forecast, index=index))
def predict(self, horizon: Union[str, TimeSeries, TimeSeriesDataset], freq: str = None) \ -> TimeSeries: super().predict(horizon) # Prepare the data if self.type == MODEL_TYPE_UNIVARIATE: if isinstance(horizon, str): future = self.make_future_dataframe(horizon, freq) metadata = None elif isinstance(horizon, TimeSeries): future = self.__prepare_time_series_for_prophet( horizon.empty()) metadata = horizon.metadata elif self.type == MODEL_TYPE_MULTIVARIATE: if isinstance(horizon, TimeSeriesDataset): horizon[:, self.y] = horizon[:, self.y].empty() future = self.__prepare_time_series_dataset_for_prophet( horizon, self.y) metadata = horizon[:, self.y].data[self.y].metadata else: ValueError("horizon argument type isn't recognized") # Predict forecast = self.model.predict(future) forecast.rename(columns={ "yhat": TIME_SERIES_VALUES, "yhat_lower": TIME_SERIES_CI_LOWER, "yhat_upper": TIME_SERIES_CI_UPPER }, inplace=True) df = forecast[[ TIME_SERIES_VALUES, TIME_SERIES_CI_LOWER, TIME_SERIES_CI_UPPER ]] df.index = forecast["ds"] # Register the prediction plot ts = TimeSeries(df, metadata) return ts
def zscore(ts: 'TimeSeries') -> 'TimeSeries': r"""Scale a TimeSeries values by removing the mean and scaling to unit variance .. math:: z = \frac{x - \mu}{\sigma} where : - :math:`z` is the scaled value - :math:`x` is the value - :math:`\mu` is the mean of the time series - :math:`\sigma` is the standard deviation of the time series Args: ts: TimeSeries to scale Returns: TimeSeries """ s = ts.series scaled_series = (s - s.mean()) / s.std() return TimeSeries(scaled_series, ts.metadata)
def csv_to_tsd(path: str) -> 'TimeSeriesDataset': """Load csv-file as TimeSeresDataset Create a TimeSeriesDataset from a csv Args: path: the path to the csv file Returns: TimeSeriesDataset """ tsd = [] df = pd.read_csv(path, index_col=0) df.index = pd.to_datetime(df.index) last_column_name = df.columns[-1] number_of_timeseries = int(last_column_name[0]) for i in range(number_of_timeseries + 1): tmp = df.filter(regex=f'^{i}_') tmp.columns = [col.split(f"{i}_")[-1] for col in tmp.columns] tsd.append(TimeSeries(tmp)) return TimeSeriesDataset(data=tsd)