예제 #1
0
def smape_loss(y_test, y_pred, calculater_per_column: bool=False):
    """Symmetric mean absolute percentage error

    Parameters
    ----------
    y_test : pandas Series of shape = (fh,) where fh is the forecasting horizon
        Ground truth (correct) target values.
    y_pred : pandas Series of shape = (fh,)
        Estimated target values.
    calculater_per_column: bool defines if loss should be calculated per column 
        or not. 

    Returns
    -------
    loss : float
        sMAPE loss
    """
    y_test = check_y(y_test)
    y_pred = check_y(y_pred)

    if calculater_per_column:
        y_test = y_test.stack().reset_index(drop=True)
        y_pred = y_pred.stack().reset_index(drop=True)

    check_equal_time_index(y_test, y_pred)
    nominator = np.abs(y_test - y_pred)
    denominator = np.abs(y_test) + np.abs(y_pred)
    return np.mean(2.0 * nominator / denominator)
예제 #2
0
def mase_loss(y_test, y_pred, y_train, sp=1):
    """Mean absolute scaled error.

    This scale-free error metric can be used to compare forecast methods on
    a single
    series and also to compare forecast accuracy between series. This metric
    is well
    suited to intermittent-demand series because it never gives infinite or
    undefined
    values.

    Parameters
    ----------
    y_test : pandas Series of shape = (fh,) where fh is the forecasting horizon
        Ground truth (correct) target values.
    y_pred : pandas Series of shape = (fh,)
        Estimated target values.
    y_train : pandas Series of shape = (n_obs,)
        Observed training values.
    sp : int
        Seasonal periodicity of training data.

    Returns
    -------
    loss : float
        MASE loss

    References
    ----------
    ..[1]   Hyndman, R. J. (2006). "Another look at measures of forecast
            accuracy", Foresight, Issue 4.
    """
    # input checks
    y_test = check_y(y_test)
    y_pred = check_y(y_pred)
    y_train = check_y(y_train)
    check_equal_time_index(y_test, y_pred)

    # check if training set is prior to test set
    if y_train is not None:
        check_time_index(y_train.index)
        if y_train.index.max() >= y_test.min():
            raise ValueError("Found `y_train` with time index which is not "
                             "before time index of `y_pred`")

    #  naive seasonal prediction
    y_train = np.asarray(y_train)
    y_pred_naive = y_train[:-sp]

    # mean absolute error of naive seasonal prediction
    mae_naive = np.mean(np.abs(y_train[sp:] - y_pred_naive))

    # if training data is flat, mae may be zero,
    # return np.nan to avoid divide by zero error
    # and np.inf values
    if mae_naive == 0:
        return np.nan
    else:
        return np.mean(np.abs(y_test - y_pred)) / mae_naive
예제 #3
0
파일: _pipeline.py 프로젝트: wh28325/sktime
    def fit(self, y, X=None, fh=None):
        """Fit to training data.

        Parameters
        ----------
        y : pd.Series
            Target time series to which to fit the forecaster.
        fh : int, list or np.array, optional (default=None)
            The forecasters horizon with the steps ahead to to predict.
        X : pd.DataFrame, optional (default=None)
            Exogenous variables are ignored
        Returns
        -------
        self : returns an instance of self.
        """
        self.steps_ = self._check_steps()
        self._set_y_X(y, X)
        self._set_fh(fh)

        # transform
        yt = check_y(y)
        for step_idx, name, transformer in self._iter_transformers():
            t = clone(transformer)
            yt = t.fit_transform(yt)
            self.steps_[step_idx] = (name, t)

        # fit forecaster
        name, forecaster = self.steps[-1]
        f = clone(forecaster)
        f.fit(yt, X, fh)
        self.steps_[-1] = (name, f)

        self._is_fitted = True
        return self
예제 #4
0
def seasonality_test_R(y, sp):
    """Seasonality test used in M4 competition
    R and Python versions were inconsistent [2], this is the Python
    translation of the R version [1].
    References
    ----------
    ..[1]  https://github.com/Mcompetitions/M4-methods/blob/master
    /Benchmarks%20and%20Evaluation.R
    ..[2]  https://github.com/Mcompetitions/M4-methods/issues/25
    """
    y = check_y(y)
    y = np.asarray(y)
    n_timepoints = len(y)

    sp = check_sp(sp)
    if sp == 1:
        return False

    if n_timepoints < 3 * sp:
        warn("Did not perform seasonality test, as `y`` is too short for the "
             "given `sp`, returned: False")
        return False

    else:
        coefs = acf(y, nlags=sp, fft=False)  # acf coefficients
        coef = coefs[sp]  # coefficient to check

        tcrit = 1.645  # 90% confidence level
        limits = tcrit / np.sqrt(n_timepoints) * np.sqrt(
            np.cumsum(np.append(1, 2 * coefs[1:]**2)))
        limit = limits[sp - 1]  # zero-based indexing
        return np.abs(coef) > limit
예제 #5
0
    def fit(self, y, **fit_params):
        """Fit to data.

        Parameters
        ----------
        y_train : pd.Series
        fit_params : dict

        Returns
        -------
        self : an instance of self
        """

        y = check_y(y)
        self._set_oh_index(y)
        sp = check_sp(self.sp)
        self.seasonal_ = seasonal_decompose(
            y,
            model=self.model,
            period=sp,
            filt=None,
            two_sided=True,
            extrapolate_trend=0).seasonal.iloc[:sp]
        self._is_fitted = True
        return self
예제 #6
0
 def inverse_transform(self, y):
     self.check_is_fitted()
     yt = check_y(y)
     for step_idx, name, transformer in self._iter_transformers(
             reverse=True):
         yt = transformer.inverse_transform(yt)
     return yt
예제 #7
0
파일: theta.py 프로젝트: wh28325/sktime
    def fit(self, y, X=None, fh=None):
        """Fit to training data.

        Parameters
        ----------
        y : pd.Series
            Target time series to which to fit the forecaster.
        fh : int, list or np.array, optional (default=None)
            The forecasters horizon with the steps ahead to to predict.
        X : pd.DataFrame, optional (default=None)
            Exogenous variables are ignored
        Returns
        -------
        self : returns an instance of self.
        """
        y = check_y(y)
        sp = check_sp(self.sp)
        if sp > 1 and not self.deseasonalize:
            warn("`sp` is ignored when `deseasonalise`=False")

        if self.deseasonalize:
            self.deseasonalizer_ = Deseasonalizer(sp=self.sp,
                                                  model="multiplicative")
            y = self.deseasonalizer_.fit_transform(y)

        # fit exponential smoothing forecaster
        # find theta lines: Theta lines are just SES + drift
        super(ThetaForecaster, self).fit(y, fh=fh)
        self.smoothing_level_ = self._fitted_forecaster.params[
            "smoothing_level"]

        # compute trend
        self.trend_ = self._compute_trend(y)
        self._is_fitted = True
        return self
예제 #8
0
    def inverse_transform(self, y, **transform_params):
        """Inverse transform data.

        Parameters
        ----------
        y : pd.Series

        Returns
        -------
        yt : pd.Series
            Inverse-transformed time series.
        """
        self.check_is_fitted()
        check_y(y)

        x = self._tabularise(y)
        xt = self.transformer_.inverse_transform(x)
        return self._detabularise(xt, index=y.index)
예제 #9
0
    def transform(self, y, **transform_params):
        """Transform data.
        Returns a transformed version of y.

        Parameters
        ----------
        y : pd.Series

        Returns
        -------
        yt : pd.Series
            Transformed time series.
        """
        self.check_is_fitted()
        check_y(y)

        x = self._tabularise(y)
        xt = self.transformer_.transform(x)
        return self._detabularise(xt, index=y.index)
예제 #10
0
def plot_ys(*ys, labels=None):
    """Plot time series

    Parameters
    ----------
    ys : pd.Series
        One or more time series
    labels : list, optional (default=None)
        Names of time series displayed in figure legend

    Returns
    -------
    fig : plt.Figure
    ax : plt.Axis
    """
    import matplotlib.pyplot as plt

    if labels is not None:
        if len(ys) != len(labels):
            raise ValueError("There must be one label for each time series, "
                             "but found inconsistent numbers of series and "
                             "labels.")
        labels_ = labels
    else:
        labels_ = ["" for _ in range(len(ys))]

    fig, ax = plt.subplots(1, figsize=plt.figaspect(.25))

    for y, label in zip(ys, labels_):
        check_y(y)

        # scatter if only a few points are available
        continuous_index = np.arange(y.index.min(), y.index.max() + 1)
        if len(y) < 3 or not np.array_equal(y.index.values, continuous_index):
            ax.scatter(y.index.values, y.values, label=label)
        # otherwise use line plot
        else:
            ax.plot(y.index.values, y.values, label=label)

    if labels is not None:
        plt.legend()

    return fig, ax
예제 #11
0
    def fit(self, y_train, **fit_params):
        """Fit.

        Parameters
        ----------
        y_train : pd.Series
        fit_params : dict

        Returns
        -------
        self
        """
        check_y(y_train)

        x_train = self._tabularise(y_train)
        transformer = clone(self.transformer)
        self.transformer_ = transformer.fit(x_train)
        self._is_fitted = True
        return self
예제 #12
0
def smape_loss(y_test, y_pred):
    """Symmetric mean absolute percentage error

    Parameters
    ----------
    y_test : pandas Series of shape = (fh,) where fh is the forecasting horizon
        Ground truth (correct) target values.
    y_pred : pandas Series of shape = (fh,)
        Estimated target values.

    Returns
    -------
    loss : float
        sMAPE loss
    """
    y_test = check_y(y_test)
    y_pred = check_y(y_pred)
    check_equal_time_index(y_test, y_pred)

    nominator = np.abs(y_test - y_pred)
    denominator = np.abs(y_test) + np.abs(y_pred)
    return np.mean(2.0 * nominator / denominator)
예제 #13
0
def mape_loss(y_test, y_pred):
    """Mean absolute percentage error (MAPE)
        MAPE output is non-negative floating point where the best value is 0.0.
        There is no limit on how large the error can be, particulalrly when `y_test`
        values are close to zero. In such cases the function returns a large value
        instead of `inf`.

    Parameters
    ----------
    y_test : pandas Series of shape = (fh,) where fh is the forecasting horizon
        Ground truth (correct) target values.
    y_pred : pandas Series of shape = (fh,)
        Estimated target values.

    Returns
    -------
    loss : float
        MAPE loss expressed as a fractional number rather than percentage point.


    Examples
    --------
    >>> from sklearn.metrics import mean_absolute_error
    >>> import pandas as pd
    >>> y_test = pd.Series([1, -1, 2])
    >>> y_pred = pd.Series([2, -2, 4])
    >>> mape_loss(y_test, y_pred)
    1.0
    """

    y_test = check_y(y_test)
    y_pred = check_y(y_pred)
    check_equal_time_index(y_test, y_pred)

    eps = np.finfo(np.float64).eps

    return np.mean(np.abs(y_test - y_pred) / np.maximum(np.abs(y_test), eps))
예제 #14
0
def autocorrelation_seasonality_test(y, sp):
    """Seasonality test used in M4 competition

    Parameters
    ----------
    sp : int
        Seasonal periodicity

    Returns
    -------
    is_seasonal : bool
        Test result

    References
    ----------
    ..[1]  https://github.com/Mcompetitions/M4-methods/blob/master
    /Benchmarks%20and%20Evaluation.R
    """
    y = check_y(y)
    sp = check_sp(sp)

    y = np.asarray(y)
    n_timepoints = len(y)

    if sp == 1:
        return False

    if n_timepoints < 3 * sp:
        warn(
            "Did not perform seasonality test, as `y`` is too short for the "
            "given `sp`, returned: False"
        )
        return False

    else:
        coefs = acf(y, nlags=sp, fft=False)  # acf coefficients
        coef = coefs[sp]  # coefficient to check

        tcrit = 1.645  # 90% confidence level
        limits = (
            tcrit
            / np.sqrt(n_timepoints)
            * np.sqrt(np.cumsum(np.append(1, 2 * coefs[1:] ** 2)))
        )
        limit = limits[sp - 1]  #  zero-based indexing
        return np.abs(coef) > limit
예제 #15
0
    def transform(self, y, **transform_params):
        """Transform data.
        Returns a transformed version of y.

        Parameters
        ----------
        y : pd.Series

        Returns
        -------
        yt : pd.Series
            Transformed time series.
        """
        self.check_is_fitted()
        y = check_y(y)
        seasonal = self._align_seasonal(y)
        return self._detrend(y, seasonal)
예제 #16
0
    def update(self, y_new, update_params=False):
        """Update fitted parameters

         Parameters
         ----------
         y_new : pd.Series
         X_new : pd.DataFrame
         update_params : bool, optional (default=False)

         Returns
         -------
         self : an instance of self
         """
        self.check_is_fitted()
        y_new = check_y(y_new)
        self._set_oh_index(y_new)
        return self
예제 #17
0
    def update_predict(
        self,
        y_test,
        cv=None,
        X_test=None,
        update_params=False,
        return_pred_int=False,
        alpha=DEFAULT_ALPHA,
    ):
        """Make and update predictions iteratively over the test set.

        Parameters
        ----------
        y_test : pd.Series
        cv : temporal cross-validation generator, optional (default=None)
        X_test : pd.DataFrame, optional (default=None)
        update_params : bool, optional (default=False)
        return_pred_int : bool, optional (default=False)
        alpha : int or list of ints, optional (default=None)

        Returns
        -------
        y_pred : pd.Series
            Point predictions
        y_pred_int : pd.DataFrame
            Prediction intervals
        """
        if return_pred_int:
            raise NotImplementedError()
        y_test = check_y(y_test)
        if cv is not None:
            cv = check_cv(cv)
        else:
            cv = SlidingWindowSplitter(start_with_window=True,
                                       window_length=1,
                                       fh=1)

        return self._predict_moving_cutoff(
            y_test,
            X=X_test,
            update_params=update_params,
            return_pred_int=return_pred_int,
            alpha=alpha,
            cv=cv,
        )
예제 #18
0
    def fit(self, y_train, **fit_params):
        """Fit to data.

        Parameters
        ----------
        y_train : pd.Series
        fit_params : dict

        Returns
        -------
        self : an instance of self
        """

        y_train = check_y(y_train)
        self._set_y_index(y_train)
        sp = check_sp(self.sp)

        # set default condition
        if self.seasonality_test is None:
            self.seasonality_test_ = autocorrelation_seasonality_test
        else:
            self.seasonality_test_ = self.seasonality_test

        # check if data meets condition
        self.is_seasonal_ = self._check_condition(y_train)

        if self.is_seasonal_:
            # if condition is met, apply de-seasonalisation
            self.seasonal_ = seasonal_decompose(
                y_train,
                model=self.model,
                period=sp,
                filt=None,
                two_sided=True,
                extrapolate_trend=0,
            ).seasonal.iloc[:sp]
        else:
            # otherwise, set idempotent seasonal components
            self.seasonal_ = (np.zeros(self.sp) if self.model == "additive"
                              else np.ones(self.sp))

        self._is_fitted = True
        return self
예제 #19
0
    def update_predict(
        self,
        y,
        cv=None,
        X=None,
        update_params=True,
        return_pred_int=False,
        alpha=DEFAULT_ALPHA,
    ):
        """Make and update predictions iteratively over the test set.

        Parameters
        ----------
        y : pd.Series
        cv : temporal cross-validation generator, optional (default=None)
        X : pd.DataFrame, optional (default=None)
        update_params : bool, optional (default=True)
        return_pred_int : bool, optional (default=False)
        alpha : int or list of ints, optional (default=None)

        Returns
        -------
        y_pred : pd.Series
            Point predictions
        y_pred_int : pd.DataFrame
            Prediction intervals
        """
        self.check_is_fitted()

        if return_pred_int:
            raise NotImplementedError()
        y = check_y(y)
        cv = check_cv(cv)

        return self._predict_moving_cutoff(
            y,
            cv,
            X,
            update_params=update_params,
            return_pred_int=return_pred_int,
            alpha=alpha,
        )
예제 #20
0
    def transform(self, y, X=None):
        """
        Remove trend from the data.

        Parameters
        ----------
        y : pd.Series, list
            Time series to be detrended
        X : pd.DataFrame, optional (default=False)
            Exogenous variables

        Returns
        -------
        y_hat : pd.Series
            De-trended series
        """
        self.check_is_fitted()
        y = check_y(y)
        fh = self._get_relative_fh(y)
        y_pred = self.forecaster_.predict(fh=fh, X=X)
        return y - y_pred
예제 #21
0
    def inverse_transform(self, y, X=None):
        """
        Add trend back to a time series

        Parameters
        ----------
        y : pd.Series, list
            Detrended time series to revert
        X : pd.DataFrame, optional (default=False)
            Exogenous variables

        Returns
        -------
        y_hat : pd.Series
            Series with the trend
        """
        self.check_is_fitted()
        y = check_y(y)
        fh = self._get_relative_fh(y)
        y_pred = self.forecaster_.predict(fh=fh, X=X)
        return y + y_pred
예제 #22
0
    def _set_oh(self, y):
        """Set and update the observation horizon

        Parameters
        ----------
        y : pd.Series
        """
        y = check_y(y, allow_empty=True)

        # update only for non-empty data
        if len(y) > 0:
            # for fitting: since no previous observation horizon is present,
            # set new one
            if self._oh is None:
                self._oh = y

            # for updating: append observation horizon to previous one
            else:
                self._oh = y.combine_first(self.oh)

            # set cutoff to the end of the observation horizon
            self._set_cutoff(y.index[-1])
예제 #23
0
def compute_expected_index_from_update_predict(y, fh, step_length):
    """Helper function to compute expected time index from `update_predict`"""
    # time points at which to make predictions
    fh = check_fh(fh)
    y = check_y(y)
    index = y.index.values

    start = index[0] - 1  # initial cutoff
    end = index[-1]  #  last point to predict
    cutoffs = np.arange(start, end, step_length)

    # only predict at time points if all steps in fh can be predicted before
    # the end of y_test
    cutoffs = cutoffs[cutoffs + max(fh) <= max(index)]
    n_cutoffs = len(cutoffs)

    # all time points predicted, including duplicates from overlapping fhs
    fh_broadcasted = np.repeat(fh, n_cutoffs).reshape(len(fh), n_cutoffs)
    pred_index = cutoffs + fh_broadcasted

    # return only unique time points
    return np.unique(pred_index)
예제 #24
0
파일: _reduce.py 프로젝트: wh28325/sktime
    def _transform(self, y_train, X_train=None):
        """Transform data using rolling window approach"""
        if X_train is not None:
            raise NotImplementedError()
        y_train = check_y(y_train)

        # get integer time index
        cv = self._cv

        # Transform target series into tabular format using
        # rolling window tabularisation
        x_windows = []
        y_windows = []
        for x_index, y_index in cv.split(y_train):
            x_window = y_train.iloc[x_index]
            y_window = y_train.iloc[y_index]

            x_windows.append(x_window)
            y_windows.append(y_window)

        # Put into required input format for regression
        X_train, y_train = self._format_windows(x_windows, y_windows)
        return X_train, y_train
예제 #25
0
    def _transform(self, y, X=None):
        """Transform data using rolling window approach"""
        if X is not None:
            raise NotImplementedError(
                "Exogenous variables `X` are not yet supported.")
        y = check_y(y)

        # get integer time index
        cv = self._cv

        # Transform target series into tabular format using
        # rolling window tabularisation
        x_windows = []
        y_windows = []
        for x_index, y_index in cv.split(y):
            x_window = y.iloc[x_index]
            y_window = y.iloc[y_index]

            x_windows.append(x_window)
            y_windows.append(y_window)

        # Put into required input format for regression
        X, y = self._format_windows(x_windows, y_windows)
        return X, y
예제 #26
0
파일: _tune.py 프로젝트: zerefwayne/sktime
    def fit(self, y_train, fh=None, X_train=None, **fit_params):
        """Fit to training data.

        Parameters
        ----------
        y_train : pd.Series
            Target time series to which to fit the forecaster.
        fh : int, list or np.array, optional (default=None)
            The forecasters horizon with the steps ahead to to predict.
        X_train : pd.DataFrame, optional (default=None)
            Exogenous variables are ignored
        Returns
        -------
        self : returns an instance of self.
        """
        y_train = check_y(y_train)

        # validate cross-validator
        cv = check_cv(self.cv)
        base_forecaster = clone(self.forecaster)

        scoring = check_scoring(self.scoring)
        scorers = {scoring.name: scoring}
        refit_metric = scoring.name

        fit_and_score_kwargs = dict(
            scorer=scorers,
            fit_params=fit_params,
            return_train_score=self.return_train_score,
            return_times=True,
            return_parameters=False,
            error_score=self.error_score,
            verbose=self.verbose,
        )

        results = {}
        all_candidate_params = []
        all_out = []

        def evaluate_candidates(candidate_params):
            candidate_params = list(candidate_params)
            n_candidates = len(candidate_params)

            if self.verbose > 0:
                n_splits = cv.get_n_splits(y_train)
                print(  # noqa
                    "Fitting {0} folds for each of {1} candidates,"
                    " totalling {2} fits".format(n_splits, n_candidates,
                                                 n_candidates * n_splits))

            out = []
            for parameters in candidate_params:
                r = _fit_and_score(clone(base_forecaster),
                                   cv,
                                   y_train,
                                   X_train,
                                   parameters=parameters,
                                   **fit_and_score_kwargs)
                out.append(r)

            n_splits = cv.get_n_splits(y_train)

            if len(out) < 1:
                raise ValueError("No fits were performed. "
                                 "Was the CV iterator empty? "
                                 "Were there no candidates?")

            all_candidate_params.extend(candidate_params)
            all_out.extend(out)

            nonlocal results
            results = self._format_results(all_candidate_params, scorers,
                                           all_out)
            return results

        self._run_search(evaluate_candidates)

        self.best_index_ = results["rank_test_%s" % refit_metric].argmin()
        self.best_score_ = results["mean_test_%s" %
                                   refit_metric][self.best_index_]
        self.best_params_ = results["params"][self.best_index_]

        self.best_forecaster_ = clone(base_forecaster).set_params(
            **self.best_params_)

        if self.refit:
            refit_start_time = time.time()
            self.best_forecaster_.fit(y_train,
                                      fh=fh,
                                      X_train=X_train,
                                      **fit_params)
            self.refit_time_ = time.time() - refit_start_time

        # Store the only scorer not as a dict for single metric evaluation
        self.scorer_ = scorers[scoring.name]

        self.cv_results_ = results
        self.n_splits_ = cv.get_n_splits(y_train)

        self._is_fitted = True
        return self
예제 #27
0
 def inverse_transform(self, y, X=None):
     self.check_is_fitted()
     y = check_y(y)
     fh = self._get_relative_fh(y)
     y_pred = self.forecaster_.predict(fh=fh, X=X)
     return y + y_pred
예제 #28
0
def plot_series(*series,
                labels=None,
                markers=None,
                x_label=None,
                y_label=None,
                ax=None):
    """Plot one or more time series.

    Parameters
    ----------
    series : pd.Series or iterable of pd.Series
        One or more time series
    labels : list, default = None
        Names of series, will be displayed in figure legend
    markers: list, default = None
        Markers of data points, if None the marker "o" is used by default.
        The length of the list has to match with the number of series.

    Returns
    -------
    fig : plt.Figure
    ax : plt.Axis
    """
    _check_soft_dependencies("matplotlib", "seaborn")
    import matplotlib.pyplot as plt
    from matplotlib.ticker import FuncFormatter, MaxNLocator
    from matplotlib.cbook import flatten
    import seaborn as sns

    for y in series:
        check_y(y)

    series = list(series)
    series = [convert_to(y, "pd.Series", "Series") for y in series]

    n_series = len(series)
    _ax_kwarg_is_none = True if ax is None else False
    # labels
    if labels is not None:
        if n_series != len(labels):
            raise ValueError("""There must be one label for each time series,
                but found inconsistent numbers of series and
                labels.""")
        legend = True
    else:
        labels = ["" for _ in range(n_series)]
        legend = False

    # markers
    if markers is not None:
        if n_series != len(markers):
            raise ValueError("""There must be one marker for each time series,
                but found inconsistent numbers of series and
                markers.""")
    else:
        markers = ["o" for _ in range(n_series)]

    # create combined index
    index = series[0].index
    for y in series[1:]:
        # check index types
        check_consistent_index_type(index, y.index)
        index = index.union(y.index)

    # generate integer x-values
    xs = [np.argwhere(index.isin(y.index)).ravel() for y in series]

    # create figure if no Axe provided for plotting
    if _ax_kwarg_is_none:
        fig, ax = plt.subplots(1, figsize=plt.figaspect(0.25))

    colors = sns.color_palette("colorblind", n_colors=n_series)

    # plot series
    for x, y, color, label, marker in zip(xs, series, colors, labels, markers):

        # scatter if little data is available or index is not complete
        if len(x) <= 3 or not np.array_equal(np.arange(x[0], x[-1] + 1), x):
            plot_func = sns.scatterplot
        else:
            plot_func = sns.lineplot

        plot_func(x=x, y=y, ax=ax, marker=marker, label=label, color=color)

    # combine data points for all series
    xs_flat = list(flatten(xs))

    # set x label of data point to the matching index
    def format_fn(tick_val, tick_pos):
        if int(tick_val) in xs_flat:
            return index[int(tick_val)]
        else:
            return ""

    # dynamically set x label ticks and spacing from index labels
    ax.xaxis.set_major_formatter(FuncFormatter(format_fn))
    ax.xaxis.set_major_locator(MaxNLocator(integer=True))

    # Label the x and y axes
    if x_label is not None:
        ax.set_xlabel(x_label)

    _y_label = y_label if y_label is not None else series[0].name
    ax.set_ylabel(_y_label)

    if legend:
        ax.legend()
    if _ax_kwarg_is_none:
        return fig, ax
    else:
        return ax
예제 #29
0
def plot_correlations(
    series,
    lags=24,
    alpha=0.05,
    zero_lag=True,
    acf_fft=False,
    acf_adjusted=True,
    pacf_method="ywadjusted",
    suptitle=None,
    series_title=None,
    acf_title="Autocorrelation",
    pacf_title="Partial Autocorrelation",
):
    """Plot series and its ACF and PACF values.

    Parameters
    ----------
    series : pd.Series
        A time series.

    lags : int, default = 24
        Number of lags to include in ACF and PACF plots

    alpha : int, default = 0.05
        Alpha value used to set confidence intervals. Alpha = 0.05 results in
        95% confidence interval with standard deviation calculated via
        Bartlett's formula.

    zero_lag : bool, default = True
        If True, start ACF and PACF plots at 0th lag

    acf_fft : bool,  = False
        Whether to compute ACF via FFT.

    acf_adjusted : bool, default = True
        If True, denonimator of ACF calculations uses n-k instead of n, where
        n is number of observations and k is the lag.

    pacf_method : str, default = 'ywadjusted'
        Method to use in calculation of PACF.

    suptitle : str, default = None
        The text to use as the Figure's suptitle.

    series_title : str, default = None
        Used to set the title of the series plot if provided. Otherwise, series
        plot has no title.

    acf_title : str, default = 'Autocorrelation'
        Used to set title of ACF plot.

    pacf_title : str, default = 'Partial Autocorrelation'
        Used to set title of PACF plot.

    Returns
    -------
    fig : matplotlib.figure.Figure

    axes : np.ndarray
        Array of the figure's Axe objects
    """
    _check_soft_dependencies("matplotlib")
    import matplotlib.pyplot as plt

    series = check_y(series)
    series = convert_to(series, "pd.Series", "Series")

    # Setup figure for plotting
    fig = plt.figure(constrained_layout=True, figsize=(12, 8))
    gs = fig.add_gridspec(2, 2)
    f_ax1 = fig.add_subplot(gs[0, :])
    if series_title is not None:
        f_ax1.set_title(series_title)
    f_ax2 = fig.add_subplot(gs[1, 0])
    f_ax3 = fig.add_subplot(gs[1, 1])

    # Create expected plots on their respective Axes
    plot_series(series, ax=f_ax1)
    plot_acf(
        series,
        ax=f_ax2,
        lags=lags,
        zero=zero_lag,
        alpha=alpha,
        title=acf_title,
        adjusted=acf_adjusted,
        fft=acf_fft,
    )
    plot_pacf(
        series,
        ax=f_ax3,
        lags=lags,
        zero=zero_lag,
        alpha=alpha,
        title=pacf_title,
        method=pacf_method,
    )
    if suptitle is not None:
        fig.suptitle(suptitle, size="xx-large")

    return fig, np.array(fig.get_axes())
예제 #30
0
def plot_series(*series, labels=None):
    """Plot one or more time series

    Parameters
    ----------
    series : pd.Series
        One or more time series
    labels : list, optional (default=None)
        Names of series, will be displayed in figure legend

    Returns
    -------
    fig : plt.Figure
    ax : plt.Axis
    """
    # lazy imports to avoid hard dependency
    import seaborn as sns
    import matplotlib.pyplot as plt

    n_series = len(series)
    if labels is not None:
        if n_series != len(labels):
            raise ValueError("There must be one label for each time series, "
                             "but found inconsistent numbers of series and "
                             "labels.")
        legend = True
    else:
        labels = ["" for _ in range(n_series)]
        legend = False

    for y in series:
        check_y(y)

    # create combined index
    index = series[0].index
    for y in series[1:]:
        # check types, note that isinstance() does not work here because index
        # types inherit from each other, hence we check for type equality
        if not type(index) is type(y.index):  # noqa
            raise TypeError("Found series with different index types.")
        index = index.union(y.index)

    # generate integer x-values
    xs = [np.argwhere(index.isin(y.index)).ravel() for y in series]

    # create figure
    fig, ax = plt.subplots(1, figsize=plt.figaspect(0.25))
    colors = sns.color_palette("colorblind", n_colors=n_series)

    # plot series
    for x, y, color, label in zip(xs, series, colors, labels):

        # scatter if little data is available or index is not complete
        if len(x) <= 3 or not np.array_equal(np.arange(x[0], x[-1] + 1), x):
            plot_func = sns.scatterplot
        else:
            plot_func = sns.lineplot

        plot_func(x, y, ax=ax, marker="o", label=label, color=color)

    # set combined index as xticklabels, suppress matplotlib warning
    with warnings.catch_warnings():
        warnings.filterwarnings("ignore")
        ax.set(xticklabels=index)

    if legend:
        ax.legend()

    return fig, ax