def smape_loss(y_test, y_pred, calculater_per_column: bool=False): """Symmetric mean absolute percentage error Parameters ---------- y_test : pandas Series of shape = (fh,) where fh is the forecasting horizon Ground truth (correct) target values. y_pred : pandas Series of shape = (fh,) Estimated target values. calculater_per_column: bool defines if loss should be calculated per column or not. Returns ------- loss : float sMAPE loss """ y_test = check_y(y_test) y_pred = check_y(y_pred) if calculater_per_column: y_test = y_test.stack().reset_index(drop=True) y_pred = y_pred.stack().reset_index(drop=True) check_equal_time_index(y_test, y_pred) nominator = np.abs(y_test - y_pred) denominator = np.abs(y_test) + np.abs(y_pred) return np.mean(2.0 * nominator / denominator)
def mase_loss(y_test, y_pred, y_train, sp=1): """Mean absolute scaled error. This scale-free error metric can be used to compare forecast methods on a single series and also to compare forecast accuracy between series. This metric is well suited to intermittent-demand series because it never gives infinite or undefined values. Parameters ---------- y_test : pandas Series of shape = (fh,) where fh is the forecasting horizon Ground truth (correct) target values. y_pred : pandas Series of shape = (fh,) Estimated target values. y_train : pandas Series of shape = (n_obs,) Observed training values. sp : int Seasonal periodicity of training data. Returns ------- loss : float MASE loss References ---------- ..[1] Hyndman, R. J. (2006). "Another look at measures of forecast accuracy", Foresight, Issue 4. """ # input checks y_test = check_y(y_test) y_pred = check_y(y_pred) y_train = check_y(y_train) check_equal_time_index(y_test, y_pred) # check if training set is prior to test set if y_train is not None: check_time_index(y_train.index) if y_train.index.max() >= y_test.min(): raise ValueError("Found `y_train` with time index which is not " "before time index of `y_pred`") # naive seasonal prediction y_train = np.asarray(y_train) y_pred_naive = y_train[:-sp] # mean absolute error of naive seasonal prediction mae_naive = np.mean(np.abs(y_train[sp:] - y_pred_naive)) # if training data is flat, mae may be zero, # return np.nan to avoid divide by zero error # and np.inf values if mae_naive == 0: return np.nan else: return np.mean(np.abs(y_test - y_pred)) / mae_naive
def fit(self, y, X=None, fh=None): """Fit to training data. Parameters ---------- y : pd.Series Target time series to which to fit the forecaster. fh : int, list or np.array, optional (default=None) The forecasters horizon with the steps ahead to to predict. X : pd.DataFrame, optional (default=None) Exogenous variables are ignored Returns ------- self : returns an instance of self. """ self.steps_ = self._check_steps() self._set_y_X(y, X) self._set_fh(fh) # transform yt = check_y(y) for step_idx, name, transformer in self._iter_transformers(): t = clone(transformer) yt = t.fit_transform(yt) self.steps_[step_idx] = (name, t) # fit forecaster name, forecaster = self.steps[-1] f = clone(forecaster) f.fit(yt, X, fh) self.steps_[-1] = (name, f) self._is_fitted = True return self
def seasonality_test_R(y, sp): """Seasonality test used in M4 competition R and Python versions were inconsistent [2], this is the Python translation of the R version [1]. References ---------- ..[1] https://github.com/Mcompetitions/M4-methods/blob/master /Benchmarks%20and%20Evaluation.R ..[2] https://github.com/Mcompetitions/M4-methods/issues/25 """ y = check_y(y) y = np.asarray(y) n_timepoints = len(y) sp = check_sp(sp) if sp == 1: return False if n_timepoints < 3 * sp: warn("Did not perform seasonality test, as `y`` is too short for the " "given `sp`, returned: False") return False else: coefs = acf(y, nlags=sp, fft=False) # acf coefficients coef = coefs[sp] # coefficient to check tcrit = 1.645 # 90% confidence level limits = tcrit / np.sqrt(n_timepoints) * np.sqrt( np.cumsum(np.append(1, 2 * coefs[1:]**2))) limit = limits[sp - 1] # zero-based indexing return np.abs(coef) > limit
def fit(self, y, **fit_params): """Fit to data. Parameters ---------- y_train : pd.Series fit_params : dict Returns ------- self : an instance of self """ y = check_y(y) self._set_oh_index(y) sp = check_sp(self.sp) self.seasonal_ = seasonal_decompose( y, model=self.model, period=sp, filt=None, two_sided=True, extrapolate_trend=0).seasonal.iloc[:sp] self._is_fitted = True return self
def inverse_transform(self, y): self.check_is_fitted() yt = check_y(y) for step_idx, name, transformer in self._iter_transformers( reverse=True): yt = transformer.inverse_transform(yt) return yt
def fit(self, y, X=None, fh=None): """Fit to training data. Parameters ---------- y : pd.Series Target time series to which to fit the forecaster. fh : int, list or np.array, optional (default=None) The forecasters horizon with the steps ahead to to predict. X : pd.DataFrame, optional (default=None) Exogenous variables are ignored Returns ------- self : returns an instance of self. """ y = check_y(y) sp = check_sp(self.sp) if sp > 1 and not self.deseasonalize: warn("`sp` is ignored when `deseasonalise`=False") if self.deseasonalize: self.deseasonalizer_ = Deseasonalizer(sp=self.sp, model="multiplicative") y = self.deseasonalizer_.fit_transform(y) # fit exponential smoothing forecaster # find theta lines: Theta lines are just SES + drift super(ThetaForecaster, self).fit(y, fh=fh) self.smoothing_level_ = self._fitted_forecaster.params[ "smoothing_level"] # compute trend self.trend_ = self._compute_trend(y) self._is_fitted = True return self
def inverse_transform(self, y, **transform_params): """Inverse transform data. Parameters ---------- y : pd.Series Returns ------- yt : pd.Series Inverse-transformed time series. """ self.check_is_fitted() check_y(y) x = self._tabularise(y) xt = self.transformer_.inverse_transform(x) return self._detabularise(xt, index=y.index)
def transform(self, y, **transform_params): """Transform data. Returns a transformed version of y. Parameters ---------- y : pd.Series Returns ------- yt : pd.Series Transformed time series. """ self.check_is_fitted() check_y(y) x = self._tabularise(y) xt = self.transformer_.transform(x) return self._detabularise(xt, index=y.index)
def plot_ys(*ys, labels=None): """Plot time series Parameters ---------- ys : pd.Series One or more time series labels : list, optional (default=None) Names of time series displayed in figure legend Returns ------- fig : plt.Figure ax : plt.Axis """ import matplotlib.pyplot as plt if labels is not None: if len(ys) != len(labels): raise ValueError("There must be one label for each time series, " "but found inconsistent numbers of series and " "labels.") labels_ = labels else: labels_ = ["" for _ in range(len(ys))] fig, ax = plt.subplots(1, figsize=plt.figaspect(.25)) for y, label in zip(ys, labels_): check_y(y) # scatter if only a few points are available continuous_index = np.arange(y.index.min(), y.index.max() + 1) if len(y) < 3 or not np.array_equal(y.index.values, continuous_index): ax.scatter(y.index.values, y.values, label=label) # otherwise use line plot else: ax.plot(y.index.values, y.values, label=label) if labels is not None: plt.legend() return fig, ax
def fit(self, y_train, **fit_params): """Fit. Parameters ---------- y_train : pd.Series fit_params : dict Returns ------- self """ check_y(y_train) x_train = self._tabularise(y_train) transformer = clone(self.transformer) self.transformer_ = transformer.fit(x_train) self._is_fitted = True return self
def smape_loss(y_test, y_pred): """Symmetric mean absolute percentage error Parameters ---------- y_test : pandas Series of shape = (fh,) where fh is the forecasting horizon Ground truth (correct) target values. y_pred : pandas Series of shape = (fh,) Estimated target values. Returns ------- loss : float sMAPE loss """ y_test = check_y(y_test) y_pred = check_y(y_pred) check_equal_time_index(y_test, y_pred) nominator = np.abs(y_test - y_pred) denominator = np.abs(y_test) + np.abs(y_pred) return np.mean(2.0 * nominator / denominator)
def mape_loss(y_test, y_pred): """Mean absolute percentage error (MAPE) MAPE output is non-negative floating point where the best value is 0.0. There is no limit on how large the error can be, particulalrly when `y_test` values are close to zero. In such cases the function returns a large value instead of `inf`. Parameters ---------- y_test : pandas Series of shape = (fh,) where fh is the forecasting horizon Ground truth (correct) target values. y_pred : pandas Series of shape = (fh,) Estimated target values. Returns ------- loss : float MAPE loss expressed as a fractional number rather than percentage point. Examples -------- >>> from sklearn.metrics import mean_absolute_error >>> import pandas as pd >>> y_test = pd.Series([1, -1, 2]) >>> y_pred = pd.Series([2, -2, 4]) >>> mape_loss(y_test, y_pred) 1.0 """ y_test = check_y(y_test) y_pred = check_y(y_pred) check_equal_time_index(y_test, y_pred) eps = np.finfo(np.float64).eps return np.mean(np.abs(y_test - y_pred) / np.maximum(np.abs(y_test), eps))
def autocorrelation_seasonality_test(y, sp): """Seasonality test used in M4 competition Parameters ---------- sp : int Seasonal periodicity Returns ------- is_seasonal : bool Test result References ---------- ..[1] https://github.com/Mcompetitions/M4-methods/blob/master /Benchmarks%20and%20Evaluation.R """ y = check_y(y) sp = check_sp(sp) y = np.asarray(y) n_timepoints = len(y) if sp == 1: return False if n_timepoints < 3 * sp: warn( "Did not perform seasonality test, as `y`` is too short for the " "given `sp`, returned: False" ) return False else: coefs = acf(y, nlags=sp, fft=False) # acf coefficients coef = coefs[sp] # coefficient to check tcrit = 1.645 # 90% confidence level limits = ( tcrit / np.sqrt(n_timepoints) * np.sqrt(np.cumsum(np.append(1, 2 * coefs[1:] ** 2))) ) limit = limits[sp - 1] # zero-based indexing return np.abs(coef) > limit
def transform(self, y, **transform_params): """Transform data. Returns a transformed version of y. Parameters ---------- y : pd.Series Returns ------- yt : pd.Series Transformed time series. """ self.check_is_fitted() y = check_y(y) seasonal = self._align_seasonal(y) return self._detrend(y, seasonal)
def update(self, y_new, update_params=False): """Update fitted parameters Parameters ---------- y_new : pd.Series X_new : pd.DataFrame update_params : bool, optional (default=False) Returns ------- self : an instance of self """ self.check_is_fitted() y_new = check_y(y_new) self._set_oh_index(y_new) return self
def update_predict( self, y_test, cv=None, X_test=None, update_params=False, return_pred_int=False, alpha=DEFAULT_ALPHA, ): """Make and update predictions iteratively over the test set. Parameters ---------- y_test : pd.Series cv : temporal cross-validation generator, optional (default=None) X_test : pd.DataFrame, optional (default=None) update_params : bool, optional (default=False) return_pred_int : bool, optional (default=False) alpha : int or list of ints, optional (default=None) Returns ------- y_pred : pd.Series Point predictions y_pred_int : pd.DataFrame Prediction intervals """ if return_pred_int: raise NotImplementedError() y_test = check_y(y_test) if cv is not None: cv = check_cv(cv) else: cv = SlidingWindowSplitter(start_with_window=True, window_length=1, fh=1) return self._predict_moving_cutoff( y_test, X=X_test, update_params=update_params, return_pred_int=return_pred_int, alpha=alpha, cv=cv, )
def fit(self, y_train, **fit_params): """Fit to data. Parameters ---------- y_train : pd.Series fit_params : dict Returns ------- self : an instance of self """ y_train = check_y(y_train) self._set_y_index(y_train) sp = check_sp(self.sp) # set default condition if self.seasonality_test is None: self.seasonality_test_ = autocorrelation_seasonality_test else: self.seasonality_test_ = self.seasonality_test # check if data meets condition self.is_seasonal_ = self._check_condition(y_train) if self.is_seasonal_: # if condition is met, apply de-seasonalisation self.seasonal_ = seasonal_decompose( y_train, model=self.model, period=sp, filt=None, two_sided=True, extrapolate_trend=0, ).seasonal.iloc[:sp] else: # otherwise, set idempotent seasonal components self.seasonal_ = (np.zeros(self.sp) if self.model == "additive" else np.ones(self.sp)) self._is_fitted = True return self
def update_predict( self, y, cv=None, X=None, update_params=True, return_pred_int=False, alpha=DEFAULT_ALPHA, ): """Make and update predictions iteratively over the test set. Parameters ---------- y : pd.Series cv : temporal cross-validation generator, optional (default=None) X : pd.DataFrame, optional (default=None) update_params : bool, optional (default=True) return_pred_int : bool, optional (default=False) alpha : int or list of ints, optional (default=None) Returns ------- y_pred : pd.Series Point predictions y_pred_int : pd.DataFrame Prediction intervals """ self.check_is_fitted() if return_pred_int: raise NotImplementedError() y = check_y(y) cv = check_cv(cv) return self._predict_moving_cutoff( y, cv, X, update_params=update_params, return_pred_int=return_pred_int, alpha=alpha, )
def transform(self, y, X=None): """ Remove trend from the data. Parameters ---------- y : pd.Series, list Time series to be detrended X : pd.DataFrame, optional (default=False) Exogenous variables Returns ------- y_hat : pd.Series De-trended series """ self.check_is_fitted() y = check_y(y) fh = self._get_relative_fh(y) y_pred = self.forecaster_.predict(fh=fh, X=X) return y - y_pred
def inverse_transform(self, y, X=None): """ Add trend back to a time series Parameters ---------- y : pd.Series, list Detrended time series to revert X : pd.DataFrame, optional (default=False) Exogenous variables Returns ------- y_hat : pd.Series Series with the trend """ self.check_is_fitted() y = check_y(y) fh = self._get_relative_fh(y) y_pred = self.forecaster_.predict(fh=fh, X=X) return y + y_pred
def _set_oh(self, y): """Set and update the observation horizon Parameters ---------- y : pd.Series """ y = check_y(y, allow_empty=True) # update only for non-empty data if len(y) > 0: # for fitting: since no previous observation horizon is present, # set new one if self._oh is None: self._oh = y # for updating: append observation horizon to previous one else: self._oh = y.combine_first(self.oh) # set cutoff to the end of the observation horizon self._set_cutoff(y.index[-1])
def compute_expected_index_from_update_predict(y, fh, step_length): """Helper function to compute expected time index from `update_predict`""" # time points at which to make predictions fh = check_fh(fh) y = check_y(y) index = y.index.values start = index[0] - 1 # initial cutoff end = index[-1] # last point to predict cutoffs = np.arange(start, end, step_length) # only predict at time points if all steps in fh can be predicted before # the end of y_test cutoffs = cutoffs[cutoffs + max(fh) <= max(index)] n_cutoffs = len(cutoffs) # all time points predicted, including duplicates from overlapping fhs fh_broadcasted = np.repeat(fh, n_cutoffs).reshape(len(fh), n_cutoffs) pred_index = cutoffs + fh_broadcasted # return only unique time points return np.unique(pred_index)
def _transform(self, y_train, X_train=None): """Transform data using rolling window approach""" if X_train is not None: raise NotImplementedError() y_train = check_y(y_train) # get integer time index cv = self._cv # Transform target series into tabular format using # rolling window tabularisation x_windows = [] y_windows = [] for x_index, y_index in cv.split(y_train): x_window = y_train.iloc[x_index] y_window = y_train.iloc[y_index] x_windows.append(x_window) y_windows.append(y_window) # Put into required input format for regression X_train, y_train = self._format_windows(x_windows, y_windows) return X_train, y_train
def _transform(self, y, X=None): """Transform data using rolling window approach""" if X is not None: raise NotImplementedError( "Exogenous variables `X` are not yet supported.") y = check_y(y) # get integer time index cv = self._cv # Transform target series into tabular format using # rolling window tabularisation x_windows = [] y_windows = [] for x_index, y_index in cv.split(y): x_window = y.iloc[x_index] y_window = y.iloc[y_index] x_windows.append(x_window) y_windows.append(y_window) # Put into required input format for regression X, y = self._format_windows(x_windows, y_windows) return X, y
def fit(self, y_train, fh=None, X_train=None, **fit_params): """Fit to training data. Parameters ---------- y_train : pd.Series Target time series to which to fit the forecaster. fh : int, list or np.array, optional (default=None) The forecasters horizon with the steps ahead to to predict. X_train : pd.DataFrame, optional (default=None) Exogenous variables are ignored Returns ------- self : returns an instance of self. """ y_train = check_y(y_train) # validate cross-validator cv = check_cv(self.cv) base_forecaster = clone(self.forecaster) scoring = check_scoring(self.scoring) scorers = {scoring.name: scoring} refit_metric = scoring.name fit_and_score_kwargs = dict( scorer=scorers, fit_params=fit_params, return_train_score=self.return_train_score, return_times=True, return_parameters=False, error_score=self.error_score, verbose=self.verbose, ) results = {} all_candidate_params = [] all_out = [] def evaluate_candidates(candidate_params): candidate_params = list(candidate_params) n_candidates = len(candidate_params) if self.verbose > 0: n_splits = cv.get_n_splits(y_train) print( # noqa "Fitting {0} folds for each of {1} candidates," " totalling {2} fits".format(n_splits, n_candidates, n_candidates * n_splits)) out = [] for parameters in candidate_params: r = _fit_and_score(clone(base_forecaster), cv, y_train, X_train, parameters=parameters, **fit_and_score_kwargs) out.append(r) n_splits = cv.get_n_splits(y_train) if len(out) < 1: raise ValueError("No fits were performed. " "Was the CV iterator empty? " "Were there no candidates?") all_candidate_params.extend(candidate_params) all_out.extend(out) nonlocal results results = self._format_results(all_candidate_params, scorers, all_out) return results self._run_search(evaluate_candidates) self.best_index_ = results["rank_test_%s" % refit_metric].argmin() self.best_score_ = results["mean_test_%s" % refit_metric][self.best_index_] self.best_params_ = results["params"][self.best_index_] self.best_forecaster_ = clone(base_forecaster).set_params( **self.best_params_) if self.refit: refit_start_time = time.time() self.best_forecaster_.fit(y_train, fh=fh, X_train=X_train, **fit_params) self.refit_time_ = time.time() - refit_start_time # Store the only scorer not as a dict for single metric evaluation self.scorer_ = scorers[scoring.name] self.cv_results_ = results self.n_splits_ = cv.get_n_splits(y_train) self._is_fitted = True return self
def inverse_transform(self, y, X=None): self.check_is_fitted() y = check_y(y) fh = self._get_relative_fh(y) y_pred = self.forecaster_.predict(fh=fh, X=X) return y + y_pred
def plot_series(*series, labels=None, markers=None, x_label=None, y_label=None, ax=None): """Plot one or more time series. Parameters ---------- series : pd.Series or iterable of pd.Series One or more time series labels : list, default = None Names of series, will be displayed in figure legend markers: list, default = None Markers of data points, if None the marker "o" is used by default. The length of the list has to match with the number of series. Returns ------- fig : plt.Figure ax : plt.Axis """ _check_soft_dependencies("matplotlib", "seaborn") import matplotlib.pyplot as plt from matplotlib.ticker import FuncFormatter, MaxNLocator from matplotlib.cbook import flatten import seaborn as sns for y in series: check_y(y) series = list(series) series = [convert_to(y, "pd.Series", "Series") for y in series] n_series = len(series) _ax_kwarg_is_none = True if ax is None else False # labels if labels is not None: if n_series != len(labels): raise ValueError("""There must be one label for each time series, but found inconsistent numbers of series and labels.""") legend = True else: labels = ["" for _ in range(n_series)] legend = False # markers if markers is not None: if n_series != len(markers): raise ValueError("""There must be one marker for each time series, but found inconsistent numbers of series and markers.""") else: markers = ["o" for _ in range(n_series)] # create combined index index = series[0].index for y in series[1:]: # check index types check_consistent_index_type(index, y.index) index = index.union(y.index) # generate integer x-values xs = [np.argwhere(index.isin(y.index)).ravel() for y in series] # create figure if no Axe provided for plotting if _ax_kwarg_is_none: fig, ax = plt.subplots(1, figsize=plt.figaspect(0.25)) colors = sns.color_palette("colorblind", n_colors=n_series) # plot series for x, y, color, label, marker in zip(xs, series, colors, labels, markers): # scatter if little data is available or index is not complete if len(x) <= 3 or not np.array_equal(np.arange(x[0], x[-1] + 1), x): plot_func = sns.scatterplot else: plot_func = sns.lineplot plot_func(x=x, y=y, ax=ax, marker=marker, label=label, color=color) # combine data points for all series xs_flat = list(flatten(xs)) # set x label of data point to the matching index def format_fn(tick_val, tick_pos): if int(tick_val) in xs_flat: return index[int(tick_val)] else: return "" # dynamically set x label ticks and spacing from index labels ax.xaxis.set_major_formatter(FuncFormatter(format_fn)) ax.xaxis.set_major_locator(MaxNLocator(integer=True)) # Label the x and y axes if x_label is not None: ax.set_xlabel(x_label) _y_label = y_label if y_label is not None else series[0].name ax.set_ylabel(_y_label) if legend: ax.legend() if _ax_kwarg_is_none: return fig, ax else: return ax
def plot_correlations( series, lags=24, alpha=0.05, zero_lag=True, acf_fft=False, acf_adjusted=True, pacf_method="ywadjusted", suptitle=None, series_title=None, acf_title="Autocorrelation", pacf_title="Partial Autocorrelation", ): """Plot series and its ACF and PACF values. Parameters ---------- series : pd.Series A time series. lags : int, default = 24 Number of lags to include in ACF and PACF plots alpha : int, default = 0.05 Alpha value used to set confidence intervals. Alpha = 0.05 results in 95% confidence interval with standard deviation calculated via Bartlett's formula. zero_lag : bool, default = True If True, start ACF and PACF plots at 0th lag acf_fft : bool, = False Whether to compute ACF via FFT. acf_adjusted : bool, default = True If True, denonimator of ACF calculations uses n-k instead of n, where n is number of observations and k is the lag. pacf_method : str, default = 'ywadjusted' Method to use in calculation of PACF. suptitle : str, default = None The text to use as the Figure's suptitle. series_title : str, default = None Used to set the title of the series plot if provided. Otherwise, series plot has no title. acf_title : str, default = 'Autocorrelation' Used to set title of ACF plot. pacf_title : str, default = 'Partial Autocorrelation' Used to set title of PACF plot. Returns ------- fig : matplotlib.figure.Figure axes : np.ndarray Array of the figure's Axe objects """ _check_soft_dependencies("matplotlib") import matplotlib.pyplot as plt series = check_y(series) series = convert_to(series, "pd.Series", "Series") # Setup figure for plotting fig = plt.figure(constrained_layout=True, figsize=(12, 8)) gs = fig.add_gridspec(2, 2) f_ax1 = fig.add_subplot(gs[0, :]) if series_title is not None: f_ax1.set_title(series_title) f_ax2 = fig.add_subplot(gs[1, 0]) f_ax3 = fig.add_subplot(gs[1, 1]) # Create expected plots on their respective Axes plot_series(series, ax=f_ax1) plot_acf( series, ax=f_ax2, lags=lags, zero=zero_lag, alpha=alpha, title=acf_title, adjusted=acf_adjusted, fft=acf_fft, ) plot_pacf( series, ax=f_ax3, lags=lags, zero=zero_lag, alpha=alpha, title=pacf_title, method=pacf_method, ) if suptitle is not None: fig.suptitle(suptitle, size="xx-large") return fig, np.array(fig.get_axes())
def plot_series(*series, labels=None): """Plot one or more time series Parameters ---------- series : pd.Series One or more time series labels : list, optional (default=None) Names of series, will be displayed in figure legend Returns ------- fig : plt.Figure ax : plt.Axis """ # lazy imports to avoid hard dependency import seaborn as sns import matplotlib.pyplot as plt n_series = len(series) if labels is not None: if n_series != len(labels): raise ValueError("There must be one label for each time series, " "but found inconsistent numbers of series and " "labels.") legend = True else: labels = ["" for _ in range(n_series)] legend = False for y in series: check_y(y) # create combined index index = series[0].index for y in series[1:]: # check types, note that isinstance() does not work here because index # types inherit from each other, hence we check for type equality if not type(index) is type(y.index): # noqa raise TypeError("Found series with different index types.") index = index.union(y.index) # generate integer x-values xs = [np.argwhere(index.isin(y.index)).ravel() for y in series] # create figure fig, ax = plt.subplots(1, figsize=plt.figaspect(0.25)) colors = sns.color_palette("colorblind", n_colors=n_series) # plot series for x, y, color, label in zip(xs, series, colors, labels): # scatter if little data is available or index is not complete if len(x) <= 3 or not np.array_equal(np.arange(x[0], x[-1] + 1), x): plot_func = sns.scatterplot else: plot_func = sns.lineplot plot_func(x, y, ax=ax, marker="o", label=label, color=color) # set combined index as xticklabels, suppress matplotlib warning with warnings.catch_warnings(): warnings.filterwarnings("ignore") ax.set(xticklabels=index) if legend: ax.legend() return fig, ax