Beispiel #1
0
def _auto_fill(series: TimeSeries, **interpolate_kwargs) -> TimeSeries:
    """
    This function fills the missing values in the TimeSeries `series`,
    using the `pandas.Dataframe.interpolate()` method.

    Parameters
    ----------
    series
        The time series
    interpolate_kwargs
        Keyword arguments for `pandas.Dataframe.interpolate()`.
        See `the documentation
        <https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.interpolate.html>`_
        for the list of supported parameters.
    Returns
    -------
    TimeSeries
        A new TimeSeries with all missing values filled according to the rules above.
    """

    series_temp = series.pd_dataframe()

    # pandas interpolate wrapper, with chosen `method`
    if 'limit_direction' not in interpolate_kwargs:
        interpolate_kwargs['limit_direction'] = 'both'
    interpolate_kwargs['inplace'] = True
    series_temp.interpolate(**interpolate_kwargs)

    return TimeSeries.from_times_and_values(series.time_index(), series_temp.values, series.freq())
Beispiel #2
0
def extract_subseries(series: TimeSeries, min_gap_size: Optional[int] = 1) -> List[TimeSeries]:
    """
    Partitions the series into a sequence of sub-series by using significant gaps of missing values

    Parameters
    ----------
    series
        The TimeSeries to partition into sub-series

    min_gap_size
        The minimum number of contiguous missing values to consider a gap as significant. Defaults to 1.

    Returns
    -------
    subseries
        A list of TimeSeries, sub-series without significant gaps of missing values
    """

    # Remove null values from the series extremes
    series = series.strip()
    freq = series.freq

    if series.pd_dataframe().isna().sum().sum() == 0:
        return [series]

    # Get start/end times of sub-series without gaps of missing values
    gaps_df = series.gaps().query(f'gap_size>={min_gap_size}')
    start_times = [series.start_time()] + (gaps_df['gap_end'] + freq).to_list()
    end_times = (gaps_df['gap_start'] - freq).to_list() + [series.end_time() + freq]

    subseries = []
    for start, end in zip(start_times, end_times):
        subseries.append(series[start:end])

    return subseries
Beispiel #3
0
    def filter(self, series: TimeSeries):
        """
        Computes a moving average of this series' values and returns a new TimeSeries.
        The returned series has the same length and time axis as `series`. (Note that this might create border effects).

        Behind the scenes the moving average is computed using :func:`pandas.DataFrame.rolling()` on the underlying
        DataFrame.

        Parameters
        ----------
        series
            The a deterministic series to average

        Returns
        -------
        TimeSeries
            A time series containing the average values
        """
        filtered_df = (series.pd_dataframe(copy=False).rolling(
            window=self.window, min_periods=1, center=self.centered).mean())

        return TimeSeries.from_dataframe(
            filtered_df,
            static_covariates=series.static_covariates,
            hierarchy=series.hierarchy,
        )
Beispiel #4
0
    def ts_inverse_transform(
            series: TimeSeries,
            lmbda: Union[Sequence[float],
                         pd.core.series.Series]) -> TimeSeries:
        def _inv_boxcox_wrapper(col):
            idx = series.pd_dataframe(copy=False).columns.get_loc(
                col.name)  # get index from col name
            return inv_boxcox(col, lmbda[idx])

        return TimeSeries.from_dataframe(
            series.pd_dataframe(copy=False).apply(_inv_boxcox_wrapper))
Beispiel #5
0
def missing_values_ratio(series: TimeSeries) -> float:
    """
    Computes the ratio of missing values

    Parameters
    ----------
    series
        The time series to compute ratio on

    Returns
    -------
    float
        The ratio of missing values
    """

    return series.pd_dataframe().isnull().sum().mean() / len(series)
Beispiel #6
0
    def _fit(self,
             series: TimeSeries,
             future_covariates: Optional[TimeSeries] = None) -> None:
        super()._fit(series, future_covariates)
        series = self.training_series
        future_covariates = future_covariates.values(
        ) if future_covariates else None

        m = staVARMA(
            endog=series.pd_dataframe(copy=False),
            exog=future_covariates,
            order=(self.p, self.q),
            trend=self.trend,
        )

        self.model = m.fit(disp=0)
Beispiel #7
0
    def fit(self,
            series: TimeSeries,
            future_covariates: Optional[TimeSeries] = None):
        # for VARIMA we need to process target `series` before calling DualForecastingModels' fit() method
        self._last_values = (series.last_values()
                             )  # needed for back-transformation when d=1
        for _ in range(self.d):
            series = TimeSeries.from_dataframe(
                df=series.pd_dataframe(copy=False).diff().dropna(),
                static_covariates=series.static_covariates,
                hierarchy=series.hierarchy,
            )

        super().fit(series, future_covariates)

        return self
Beispiel #8
0
    def ts_fit(series: TimeSeries, lmbda: Optional[Union[float,
                                                         Sequence[float]]],
               method) -> Union[Sequence[float], pd.core.series.Series]:
        if lmbda is None:
            # Compute optimal lmbda for each dimension of the time series. In this case, the return type is
            # a pd.core.series.Series, which is not inhering from collections.abs.Sequence
            lmbda = series.pd_dataframe(copy=False).apply(boxcox_normmax,
                                                          method=method)
        elif isinstance(lmbda, Sequence):
            raise_if(
                len(lmbda) != series.width,
                "lmbda should have one value per dimension (ie. column or variable) of the time series",
                logger)
        else:
            # Replicate lmbda to match dimensions of the time series
            lmbda = [lmbda] * series.width

        return lmbda
Beispiel #9
0
def _const_fill(series: TimeSeries, fill: float = 0) -> TimeSeries:
    """
    Fills the missing values of `series` with only the value provided (default zeroes).

    Parameters
    ----------
    series
        The TimeSeries to check for missing values.
    fill
        The value used to replace the missing values.

    Returns
    -------
    TimeSeries
        A TimeSeries, `series` with all missing values set to `fill`.
    """

    return TimeSeries.from_times_and_values(series.time_index(),
                                            series.pd_dataframe().fillna(value=fill),
                                            series.freq())
Beispiel #10
0
    def fit(
        self,
        series: TimeSeries,
        covariates: Optional[TimeSeries] = None,
        num_block_rows: Optional[int] = None,
    ) -> "KalmanFilter":
        """
        Initializes the Kalman filter using the N4SID algorithm.

        Parameters
        ----------
        series : TimeSeries
            The series of outputs (observations) used to infer the underlying state space model.
            This must be a deterministic series (containing one sample).
        covariates : Optional[TimeSeries]
            An optional series of inputs (control signal) that will also be used to infer the underlying state space
            model. This must be a deterministic series (containing one sample).
        num_block_rows : Optional[int]
            The number of block rows to use in the block Hankel matrices used in the N4SID algorithm.
            See the documentation of nfoursid.nfoursid.NFourSID for more information.
            If not provided, the dimensionality of the state space model will be used, with a maximum of 10.

        Returns
        -------
        self
            Fitted Kalman filter.
        """
        if covariates is not None:
            self._expect_covariates = True
            covariates = covariates.slice_intersect(series)
            raise_if_not(
                series.has_same_time_as(covariates),
                "The number of timesteps in the series and the covariates must match.",
            )

        # TODO: Handle multiple timeseries. Needs reimplementation of NFourSID?
        self.dim_y = series.width
        outputs = series.pd_dataframe(copy=False)
        outputs.columns = [f"y_{i}" for i in outputs.columns]

        if covariates is not None:
            self.dim_u = covariates.width
            inputs = covariates.pd_dataframe(copy=False)
            inputs.columns = [f"u_{i}" for i in inputs.columns]
            input_columns = list(inputs.columns)
            measurements = pd.concat([outputs, inputs], axis=1)
        else:
            measurements = outputs
            input_columns = None

        if num_block_rows is None:
            num_block_rows = max(10, self.dim_x)
        nfoursid = NFourSID(
            measurements,
            output_columns=list(outputs.columns),
            input_columns=input_columns,
            num_block_rows=num_block_rows,
        )
        nfoursid.subspace_identification()
        state_space_identified, covariance_matrix = nfoursid.system_identification(
            rank=self.dim_x)

        self.kf = Kalman(state_space_identified, covariance_matrix)

        return self