예제 #1
0
def read_text(path: str) -> Any:
    """Reading a singe TimeSeries from to_text()

    Create a TimeSeries object from a text file

    Args:
        path: a String representing the path to the file to read

    Returns:
       TimeSeries

    """

    dir_type = check_directory_structure(path)

    if dir_type == "timeseries":
        data = "{}/{}.{}".format(path, TIME_SERIES_FILENAME, TIME_SERIES_EXT)
        series = csv_to_dataframe(data)
        return TimeSeries(series)

    elif dir_type == "timeseries with metadata":
        data = "{}/{}.{}".format(path, TIME_SERIES_FILENAME, TIME_SERIES_EXT)
        meta = "{}/{}.{}".format(path, METADATA_FILENAME, METADATA_EXT)
        series = csv_to_dataframe(data)
        metadata = json_to_metadata(meta)
        ts = TimeSeries(series, metadata)
        if METADATA_CLASS_LABEL in ts.metadata:
            ts.class_label = ts.metadata[METADATA_CLASS_LABEL]
        return ts

    elif dir_type is None:
        raise IOError("The path doesn't' include any recognizable files")
    def create(length: int, start: str, end: str,
            freq: Union[str, 'TimeSeries'] = None) \
            -> 'TimeSeriesDataset':
        """
        Create an empty TimeSeriesDataset object with a defined index and period

        Args:
            length: int representing the number of TimeSeries to include in the
                TimeSeriesDataset
            start: str of the start of the DatetimeIndex
                (as in Pandas.date_range())
            end: the end of the DatetimeIndex (as in Pandas.date_range())
            freq: the optional frequency it can be a str or a TimeSeries
                (to copy its frequency)

        Returns:
            TimeSeriesDataset
        """
        # Check length parameter
        assert length >= 1, 'Length must be >= 1'
        data = []
        ts = TimeSeries.create(start, end, freq)
        for i in range(length):
            data.append(ts)
        return TimeSeriesDataset(data)
예제 #3
0
 def __prepare_time_series_for_prophet(ts: TimeSeries):
     df = ts.to_df().copy()
     df["ds"] = df.index
     df = df.reset_index(drop=True)
     df = df.rename(columns={"values": "y"})
     df.columns = df.columns.astype(str)
     return df
예제 #4
0
    def minmax(ts: 'TimeSeries') -> 'TimeSeries':
        r"""Scale a TimeSeries within a [0,1] range (so called min max)

        .. math::
            x_{scaled} = \frac{x - x_{min}}{x_{max} - x_{min}}

        Args:
            ts: TimeSeries to scale

        Returns:
            TimeSeries
        """
        s = ts.series
        scaled_series = (s - s.min()) / (s.max() - s.min())
        return TimeSeries(scaled_series, ts.metadata)
예제 #5
0
    def predict(self, horizon: Union[str, TimeSeries], freq: str = None) \
            -> TimeSeries:
        """
        Predict a TimeSeries given a horizon

        Args:
            horizon: str as in https://pandas.pydata.org/pandas-docs/stable/user_guide/timedeltas.html
            freq: frequency in DateOffset string https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#dateoffset-objects

        Returns:
            TimeSeries
        """
        super().predict(horizon)
        if isinstance(horizon, str):
            future, index = self.make_future_arrays(horizon)
        elif isinstance(horizon, TimeSeries):
            future, y_train = self.__prepare_series_for_sklearn(horizon)
            index = horizon.series.index
        forecast = self.model.predict(future)
        return TimeSeries(Series(data=forecast, index=index))
예제 #6
0
    def predict(self, horizon: Union[str, TimeSeries, TimeSeriesDataset],
            freq: str = None) \
            -> TimeSeries:
        super().predict(horizon)

        # Prepare the data
        if self.type == MODEL_TYPE_UNIVARIATE:
            if isinstance(horizon, str):
                future = self.make_future_dataframe(horizon, freq)
                metadata = None
            elif isinstance(horizon, TimeSeries):
                future = self.__prepare_time_series_for_prophet(
                    horizon.empty())
                metadata = horizon.metadata

        elif self.type == MODEL_TYPE_MULTIVARIATE:
            if isinstance(horizon, TimeSeriesDataset):
                horizon[:, self.y] = horizon[:, self.y].empty()
                future = self.__prepare_time_series_dataset_for_prophet(
                    horizon, self.y)
                metadata = horizon[:, self.y].data[self.y].metadata

        else:
            ValueError("horizon argument type isn't recognized")

        # Predict
        forecast = self.model.predict(future)
        forecast.rename(columns={
            "yhat": TIME_SERIES_VALUES,
            "yhat_lower": TIME_SERIES_CI_LOWER,
            "yhat_upper": TIME_SERIES_CI_UPPER
        },
                        inplace=True)
        df = forecast[[
            TIME_SERIES_VALUES, TIME_SERIES_CI_LOWER, TIME_SERIES_CI_UPPER
        ]]
        df.index = forecast["ds"]

        # Register the prediction plot
        ts = TimeSeries(df, metadata)
        return ts
예제 #7
0
    def zscore(ts: 'TimeSeries') -> 'TimeSeries':
        r"""Scale a TimeSeries values by removing the mean and scaling to unit
        variance

        .. math::
            z = \frac{x - \mu}{\sigma}

        where :
            - :math:`z` is the scaled value
            - :math:`x` is the value
            - :math:`\mu` is the mean of the time series
            - :math:`\sigma` is the standard deviation of the time series

        Args:
            ts: TimeSeries to scale

        Returns:
            TimeSeries
        """
        s = ts.series
        scaled_series = (s - s.mean()) / s.std()
        return TimeSeries(scaled_series, ts.metadata)
예제 #8
0
def csv_to_tsd(path: str) -> 'TimeSeriesDataset':
    """Load csv-file as TimeSeresDataset

    Create a TimeSeriesDataset from a csv

    Args:
        path: the path to the csv file

    Returns: TimeSeriesDataset

    """
    tsd = []
    df = pd.read_csv(path, index_col=0)
    df.index = pd.to_datetime(df.index)

    last_column_name = df.columns[-1]
    number_of_timeseries = int(last_column_name[0])

    for i in range(number_of_timeseries + 1):
        tmp = df.filter(regex=f'^{i}_')
        tmp.columns = [col.split(f"{i}_")[-1] for col in tmp.columns]
        tsd.append(TimeSeries(tmp))

    return TimeSeriesDataset(data=tsd)