def _vectorized_transform(self, X, X_input_mtype=None, y=None, inverse=False): """Vectorized application of transform or inverse, and convert back.""" if X_input_mtype is None: X_input_mtype = mtype(X, as_scitype=["Series", "Panel"]) if y is not None: ValueError( "no default behaviour if _fit does not support Panel, " " but X is Panel and y is not None" ) X = convert_to( X, to_type="df-list", as_scitype="Panel", store=self._converter_store_X ) # depending on whether fitting happens, apply fitted or unfitted instances if not self.get_tag("fit-in-transform"): # these are the transformers-per-instanced, fitted in fit transformers = self.transformers_ if len(transformers) != len(X): raise RuntimeError( "found different number of instances in transform than in fit" ) if inverse: Xt = [transformers[i].inverse_transform(X[i]) for i in range(len(X))] else: Xt = [transformers[i].transform(X[i]) for i in range(len(X))] # now we have a list of transformed instances else: # if no fitting happens, just apply transform multiple times if inverse: Xt = [self.inverse_transform(X[i]) for i in range(len(X))] else: Xt = [self.transform(X[i]) for i in range(len(X))] # convert to expected output format ################################### if inverse: output_scitype = self.get_tag("scitype:transform-input") else: output_scitype = self.get_tag("scitype:transform-output") # if the output is Series, Xt is a Panel and we convert back if output_scitype == "Series": Xt = convert_to( Xt, to_type=X_input_mtype, as_scitype="Panel", store=self._converter_store_X, ) # if the output is Primitives, we have a list of one-row dataframes # we concatenate those and overwrite the index with that of X elif output_scitype == "Primitives": Xt = pd.concat(Xt) Xt = Xt.reset_index(drop=True) return Xt
def _convert_output(self, X, X_input_mtype=None, X_was_Series=False, inverse=False): """Convert transform output to expected format.""" Xt = X X_input_scitype = mtype_to_scitype(X_input_mtype) if inverse: # the output of inverse transform is equal to input of transform output_scitype = self.get_tag("scitype:transform-input") else: output_scitype = self.get_tag("scitype:transform-output") # if we converted Series to "one-instance-Panel", revert that if X_was_Series and output_scitype == "Series": Xt = convert_to( Xt, to_type=["pd-multiindex", "numpy3D", "df-list"], as_scitype="Panel" ) Xt = convert_Panel_to_Series(Xt) if output_scitype == "Series": # output mtype is input mtype X_output_mtype = X_input_mtype # exception to this: if the transformer outputs multivariate series, # we cannot convert back to pd.Series, do pd.DataFrame instead then # this happens only for Series, not Panel if X_input_scitype == "Series": _, _, metadata = check_is_mtype( Xt, ["pd.DataFrame", "pd.Series", "np.ndarray"], return_metadata=True, ) if not metadata["is_univariate"] and X_input_mtype == "pd.Series": X_output_mtype = "pd.DataFrame" Xt = convert_to( Xt, to_type=X_output_mtype, as_scitype=X_input_scitype, store=self._converter_store_X, ) elif output_scitype == "Primitives": # we "abuse" the Series converter to ensure df output # & reset index to have integers for instances if isinstance(Xt, (pd.DataFrame, pd.Series)): Xt = Xt.reset_index(drop=True) Xt = convert_to( Xt, to_type="pd.DataFrame", as_scitype="Series", # no converter store since this is not a "1:1 back-conversion" ) # else output_scitype is "Panel" and no need for conversion return Xt
def _convert_X_y(self, X, y): """Convert X, y to inner type.""" X_inner_mtype = _coerce_to_list(self.get_tag("X_inner_mtype")) X_inner_scitypes = mtype_to_scitype(X_inner_mtype, return_unique=True) y_inner_mtype = _coerce_to_list(self.get_tag("y_inner_mtype")) X_mtype = mtype(X, as_scitype=["Series", "Panel"]) X_scitype = mtype_to_scitype(X_mtype) y_mtype = mtype(y, as_scitype=["Series", "Panel"]) y_scitype = mtype_to_scitype(y_mtype) # for debugging, exception if the conversion fails (this should never happen) if X_scitype not in X_inner_scitypes: raise RuntimeError( "conversion of X to X_inner unsuccessful, unexpected") # convert X/y to supported inner type, if necessary ################################################### # subset to the mtypes that are of the same scitype as X/y X_inner_mtype = [ mt for mt in X_inner_mtype if mtype_to_scitype(mt) == X_scitype ] # convert X and y to a supported internal type # if X/y type is already supported, no conversion takes place X_inner = convert_to( X, to_type=X_inner_mtype, as_scitype=X_scitype, store=self._converter_store_X, ) if y_inner_mtype != ["None"]: y_inner_mtype = [ mt for mt in y_inner_mtype if mtype_to_scitype(mt) == y_scitype ] y_inner = convert_to( y, to_type=y_inner_mtype, as_scitype=y_scitype, ) else: y_inner = None return X_inner, y_inner
def _pairwise_table_x_check(self, X, var_name="X"): """Check and coerce input data. Method used to check the input and convert Table input to internally used format, as defined in X_inner_mtype tag Parameters ---------- X: pd.DataFrame, pd.Series, numpy 1D or 2D, list of dicts sktime data container compliant with the Table scitype The value to be checked and coerced var_name: str, variable name to print in error messages Returns ------- X: Panel data container of a supported format in X_inner_mtype usually a 2D np.ndarray or a pd.DataFrame, unless overridden """ X_valid = check_is_scitype(X, "Table", return_metadata=False, var_name=var_name) if not X_valid: msg = ( "X and X2 must be in an sktime compatible format, of scitype Table, " "for instance a pandas.DataFrame or a 2D numpy.ndarray. " "See the data format tutorial examples/AA_datatypes_and_datasets.ipynb" ) raise TypeError(msg) X_inner_mtype = self.get_tag("X_inner_mtype") X_coerced = convert_to(X, to_type=X_inner_mtype, as_scitype="Table") return X_coerced
def plot_series(X: TimeSeriesInstances): _check_soft_dependencies("matplotlib") import matplotlib.patches as mpatches import matplotlib.pyplot as plt if isinstance(X, pd.DataFrame): X = convert_to(X, "numpy3D") plt.figure(figsize=(5, 10)) plt.rcParams["figure.dpi"] = 100 fig, axes = plt.subplots(nrows=len(X), ncols=1) for i in range(len(X)): curr = X[i][0] curr_axes = axes[i] curr_axes.plot(curr, color="b") blue_patch = mpatches.Patch(color="blue", label="Series that belong to the cluster") plt.legend( handles=[blue_patch], loc="upper center", bbox_to_anchor=(0.5, -0.40), fancybox=True, shadow=True, ncol=5, ) plt.tight_layout() plt.show()
def create_test_distance_numpy( n_instance: int, n_columns: int = None, n_timepoints: int = None, random_state: int = 1, ): """Create a test numpy distance. Parameters ---------- n_instance: int Number of instances to create. n_columns: int Number of columns to create. n_timepoints: int, defaults = None Number of timepoints to create in each column. random_state: int, defaults = 1 Random state to initialise with. Returns ------- np.ndarray 2D or 3D numpy Numpy array of shape specific. If 1 instance then 2D array returned, if > 1 instance then 3D array returned. """ num_dims = 3 if n_timepoints is None: n_timepoints = 1 num_dims -= 1 if n_columns is None: n_columns = 1 num_dims -= 1 df = _create_test_distances( n_instance=n_instance, n_columns=n_columns, n_timepoints=n_timepoints, random_state=random_state, ) if num_dims == 3: return convert_to(df, to_type="numpy3D") elif num_dims == 2: return convert_to(df, to_type="numpy3D")[:, :, 0] else: return convert_to(df, to_type="numpy3D")[:, 0, 0]
def test_center_init(center_init_callable: Callable[[np.ndarray], np.ndarray]): """Test center initialisation algorithms.""" k = 5 X, y = load_arrow_head(return_X_y=True) X_train, X_test, y_train, y_test = train_test_split(X, y) X_train = convert_to(X_train, "numpy3D") random_state = check_random_state(1) test_centers = center_init_callable(X_train, k, random_state) assert len(test_centers) == k assert len(np.unique(test_centers, axis=1)) == k
def _check_clusterer_input( self, X: TimeSeriesInstances, enforce_min_instances: int = 1 ) -> TimeSeriesInstances: """Validate the input and prepare for _fit. Parameters ---------- X : np.ndarray (2d or 3d array of shape (n_instances, series_length) or shape (n_instances,n_dimensions,series_length)) or nested pd.DataFrame ( n_instances,n_dimensions). Training time series instances to cluster. Returns ------- X : np.ndarray (3d of shape (n_instances,n_dimensions,series_length)) or pd.Dataframe (n_instances,n_dimensions). Converted X ready for _fit. Raises ------ ValueError If y or X is invalid input data type, or there is not enough data. """ X = self._initial_conversion(X) X_valid, _, X_metadata = check_is_scitype( X, scitype="Panel", return_metadata=True ) if not X_valid: raise TypeError( f"X is not of a supported input data type." f"X must be of type np.ndarray or pd.DataFrame, found {type(X)}" f"Use datatypes.check_is_mtype to check conformance with " f"specifications." ) n_cases = X_metadata["n_instances"] if n_cases < enforce_min_instances: raise ValueError( f"Minimum number of cases required is {enforce_min_instances} but X " f"has : {n_cases}" ) missing = X_metadata["has_nans"] multivariate = not X_metadata["is_univariate"] unequal = not X_metadata["is_equal_length"] self._check_capabilities(missing, multivariate, unequal) return convert_to( X, to_type=self.get_tag("X_inner_mtype"), as_scitype="Panel", )
def _pairwise_panel_x_check(self, X, var_name="X"): """Check and coerce input data. Method used to check the input and convert Series/Panel input to internally used format, as defined in X_inner_mtype tag Parameters ---------- X: List of dfs, Numpy of dfs, 3d numpy sktime data container compliant with the Series or Panel scitype The value to be checked var_name: str, variable name to print in error messages Returns ------- X: Panel data container of a supported format in X_inner_mtype usually df-list, list of pd.DataFrame, unless overridden """ check_res = check_is_scitype(X, ["Series", "Panel"], return_metadata=True, var_name=var_name) X_valid = check_res[0] metadata = check_res[2] X_scitype = metadata["scitype"] if not X_valid: msg = ( "X and X2 must be in an sktime compatible format, " "of scitype Series or Panel, " "for instance a pandas.DataFrame with sktime compatible time indices, " "or with MultiIndex and lowest level a sktime compatible time index. " "See the data format tutorial examples/AA_datatypes_and_datasets.ipynb" ) raise TypeError(msg) # if the input is a single series, convert it to a Panel if X_scitype == "Series": X = convert_Series_to_Panel(X) # can't be anything else if check_is_scitype is working properly elif X_scitype != "Panel": raise RuntimeError( "Unexpected error in check_is_scitype, check validity") X_inner_mtype = self.get_tag("X_inner_mtype") X_coerced = convert_to(X, to_type=X_inner_mtype, as_scitype="Panel") return X_coerced
def _transform(self, X, y=None): """Transform nested pandas dataframe into tabular dataframe. Parameters ---------- X : pandas DataFrame or 3D np.ndarray panel of time series to transform y : ignored argument for interface compatibility Returns ------- Xt : pandas DataFrame Transformed dataframe with only primitives in cells. """ Xt = convert_to(X, to_type="numpyflat", as_scitype="Panel") return Xt
def plot_cluster_algorithm(model: TimeSeriesLloyds, X: TimeSeriesInstances, k: int): """Plot the results from a univariate partitioning algorithm. Parameters ---------- model: BaseClusterer Clustering model to plot predict_series: np.ndarray or pd.Dataframe or List[pd.Dataframe] The series to predict the values for k: int Number of centers """ _check_soft_dependencies("matplotlib") import matplotlib.patches as mpatches import matplotlib.pyplot as plt if isinstance(X, pd.DataFrame): predict_series = convert_to(X, "numpy3D") plt.figure(figsize=(5, 10)) plt.rcParams["figure.dpi"] = 100 indexes = model.predict(predict_series) centers = model.cluster_centers_ series_values = _get_cluster_values(indexes, predict_series, k) fig, axes = plt.subplots(nrows=k, ncols=1) for i in range(k): _plot(series_values[i], centers[i], axes[i]) blue_patch = mpatches.Patch(color="blue", label="Series that belong to the cluster") red_patch = mpatches.Patch(color="red", label="Cluster centers") plt.legend( handles=[red_patch, blue_patch], loc="upper center", bbox_to_anchor=(0.5, -0.40), fancybox=True, shadow=True, ncol=5, ) plt.tight_layout() plt.show()
def _pairwise_panel_x_check(self, X, var_name="X"): """Check and coerce input data. Method used to check the input and convert Series/Panel input to internally used format, as defined in X_inner_mtype tag Parameters ---------- X: List of dfs, Numpy of dfs, 3d numpy The value to be checked var_name: str, variable name to print in error messages Returns ------- X: Panel data container of a supported format in X_inner_mtype usually df-list, list of pd.DataFrame, unless overridden """ check_res = check_is_scitype(X, ["Series", "Panel"], return_metadata=True, var_name=var_name) X_valid = check_res[0] metadata = check_res[2] X_scitype = metadata["scitype"] if not X_valid: raise TypeError("X/X2 must be of Series or Panel scitype") # if the input is a single series, convert it to a Panel if X_scitype == "Series": X = convert_Series_to_Panel(X) # can't be anything else if check_is_scitype is working properly elif X_scitype != "Panel": raise RuntimeError( "Unexpected error in check_is_scitype, check validity") X_inner_mtype = self.get_tag("X_inner_mtype") X_coerced = convert_to(X, to_type=X_inner_mtype, as_scitype="Panel") return X_coerced
def _convert_X(self, X): """Convert equal length series from DataFrame to numpy array or vice versa. Parameters ---------- self : this classifier X : pd.DataFrame or np.ndarray. Input attribute data Returns ------- X : pd.DataFrame or np.array Checked and possibly converted input data """ inner_type = self.get_tag("X_inner_mtype") # convert pd.DataFrame X = convert_to( X, to_type=inner_type, as_scitype="Panel", ) return X
def make_clustering_problem( n_instances=20, n_columns=1, n_timepoints=20, return_numpy=False, random_state=None, ): """Make Clustering Problem.""" # Can only currently support univariate so converting # to univaritate for the time being X = _make_panel_X( n_instances=n_instances, n_columns=n_columns, n_timepoints=n_timepoints, return_numpy=return_numpy, random_state=random_state, ) if return_numpy: return convert_to(X, "numpy3D") else: return X
def plot_dba_example(): """Plot dba.""" import matplotlib.pyplot as plt X_train, y_train = load_arrow_head(split="train") X_train = convert_to(X_train, "numpy3D") def plot_helper(barycenter): for series in X_train: plt.plot(series.ravel(), "k-", alpha=0.2) plt.plot(barycenter.ravel(), "r-", linewidth=2) ax1 = plt.subplot() plt.subplot(4, 1, 1, sharex=ax1) plt.title("Sktime DBA (using dtw)") plot_helper( dba(X_train, distance_metric="dtw", medoids_distance_metric="dtw")) plt.subplot(4, 1, 2, sharex=ax1) plt.title("Sktime DBA (using wdtw)") plot_helper( dba(X_train, distance_metric="wdtw", medoids_distance_metric="wdtw")) plt.subplot(4, 1, 3, sharex=ax1) plt.title("Sktime DBA (using lcss)") plot_helper( dba(X_train, distance_metric="lcss", medoids_distance_metric="lcss")) plt.subplot(4, 1, 4, sharex=ax1) plt.title("Sktime DBA (using msm)") plot_helper(dba(X_train, distance_metric="msm")) ax1.set_xlim([0, X_train.shape[2]]) # show the plot(s) plt.tight_layout() plt.show()
def convert_Hierarchical_to_Panel(obj, store=None): """Convert single-series hierarchical object to a series. Removes one dimensions to obtain a panel, by removing 1 level from MultiIndex. Assumes input is conformant with Hierarchical mtype. This method does not perform full mtype checks, use mtype or check_is_mtype for checks. Parameters ---------- obj: an object of scitype Hierarchical. Returns ------- returns a data container of mtype pd-multiindex, of scitype Panel """ obj_df = convert_to(obj, to_type="pd_multiindex_hier", as_scitype="Hierarchical") obj_df = obj_df.copy() obj_df.index = obj_df.index.get_level_values([-2, -1]) return obj_df
def convert_Panel_to_Hierarchical(obj, store=None): """Convert panel to a single-panel hierarchical object. Adds a dimensions to the panel to obtain a 3-level MultiIndex, 1 level is added. Assumes input is conformant with one of the Panel mtypes. This method does not perform full mtype checks, use mtype or check_is_mtype for checks. Parameters ---------- obj: an object of scitype Panel. Returns ------- returns a data container of mtype pd_multiindex_hier """ obj_df = convert_to(obj, to_type="pd-multiindex", as_scitype="Panel") obj_df = obj_df.copy() obj_df["__level2"] = 0 obj_df = obj_df.set_index(["__level2"], append=True) obj_df = obj_df.reorder_levels([2, 0, 1]) return obj_df
def convert_Series_to_Hierarchical(obj, store=None): """Convert series to a single-series hierarchical object. Adds two dimensions to the series to obtain a 3-level MultiIndex, 2 levels added. Assumes input is conformant with one of the three Series mtypes. This method does not perform full mtype checks, use mtype or check_is_mtype for checks. Parameters ---------- obj: an object of scitype Series, of mtype pd.DataFrame, pd.Series, or np.ndarray. Returns ------- returns a data container of mtype pd_multiindex_hier """ obj_df = convert_to(obj, to_type="pd.DataFrame", as_scitype="Series") obj_df = obj_df.copy() obj_df["__level1"] = 0 obj_df["__level2"] = 0 obj_df = obj_df.set_index(["__level1", "__level2"], append=True) obj_df = obj_df.reorder_levels([1, 2, 0]) return obj_df
def update(self, X, y=None, Z=None, update_params=True): """Update transformer with X, optionally y. State required: Requires state to be "fitted". Accesses in self: Fitted model attributes ending in "_". self._is_fitted Writes to self: May update fitted model attributes ending in "_". Parameters ---------- X : Series or Panel, any supported mtype Data to fit transform to, of python type as follows: Series: pd.Series, pd.DataFrame, or np.ndarray (1D or 2D) Panel: pd.DataFrame with 2-level MultiIndex, list of pd.DataFrame, nested pd.DataFrame, or pd.DataFrame in long/wide format subject to sktime mtype format specifications, for further details see examples/AA_datatypes_and_datasets.ipynb y : Series or Panel, default=None Additional data, e.g., labels for transformation Z : possible alias for X; should not be passed when X is passed alias Z will be deprecated in version 0.10.0 update_params : bool, default=True whether the model is updated. Yes if true, if false, simply skips call. argument exists for compatibility with forecasting module. Returns ------- self : a fitted instance of the estimator """ X = _handle_alias(X, Z) # skip everything if update_params is False if not update_params: return self # skip everything if fit-in-transform is True if self.get_tag("fit-in-transform"): return self # input checks and minor coercions on X, y ########################################### valid, msg, X_metadata = check_is_mtype( X, mtype=self.ALLOWED_INPUT_MTYPES, return_metadata=True, var_name="X" ) if not valid: raise ValueError(msg) # checking X enforce_univariate = self.get_tag("univariate-only") if enforce_univariate and not X_metadata["is_univariate"]: raise ValueError("X must be univariate but is not") # retrieve mtypes/scitypes of all objects ######################################### X_input_scitype = X_metadata["scitype"] X_inner_mtype = _coerce_to_list(self.get_tag("X_inner_mtype")) X_inner_scitypes = mtype_to_scitype(X_inner_mtype, return_unique=True) # treating Series vs Panel conversion for X ########################################### # there are three cases to treat: # 1. if the internal _fit supports X's scitype, move on to mtype conversion # 2. internal only has Panel but X is Series: consider X as one-instance Panel # 3. internal only has Series but X is Panel: auto-vectorization over instances # currently, this is enabled by conversion to df-list mtype # auto-vectorization is not supported if y is passed # individual estimators that vectorize over y must implement individually # 1. nothing to do - simply don't enter any of the ifs below # 2. internal only has Panel but X is Series: consider X as one-instance Panel if X_input_scitype == "Series" and "Series" not in X_inner_scitypes: X = convert_Series_to_Panel(X) # 3. internal only has Series but X is Panel: loop over instances elif X_input_scitype == "Panel" and "Panel" not in X_inner_scitypes: if y is not None: raise ValueError( "no default behaviour if _fit does not support Panel, " " but X is Panel and y is not None" ) X = convert_to( X, to_type="df-list", as_scitype="Panel", store=self._converter_store_X ) # this fits one transformer per instance self.transformers_ = [clone(self).fit(Xi) for Xi in X] # recurse and leave function - recursion does input checks/conversion # also set is_fitted flag to True since we leave function here self._is_fitted = True return self X_inner, y_inner = self._convert_X_y(X, y) # todo: uncomment this once Z is completely gone # self._update(X=X_inner, y=y_inner) # less robust workaround until then self._update(X_inner, y_inner) return self
def _check_X_y(self, X=None, y=None): """Check and coerce X/y for fit/predict/update functions. Parameters ---------- y : pd.Series, pd.DataFrame, or np.ndarray (1D or 2D), optional (default=None) Time series to check. X : pd.DataFrame, or 2D np.array, optional (default=None) Exogeneous time series. Returns ------- y_inner : Series compatible with self.get_tag("y_inner_mtype") format converted/coerced version of y, mtype determined by "y_inner_mtype" tag None if y was None X_inner : Series compatible with self.get_tag("X_inner_mtype") format converted/coerced version of y, mtype determined by "X_inner_mtype" tag None if X was None Raises ------ TypeError if y or X is not one of the permissible Series mtypes TypeError if y is not compatible with self.get_tag("scitype:y") if tag value is "univariate", y must be univariate if tag value is "multivariate", y must be bi- or higher-variate if tag vaule is "both", y can be either TypeError if self.get_tag("X-y-must-have-same-index") is True and the index set of X is not a super-set of the index set of y Writes to self -------------- _y_mtype_last_seen : str, mtype of y _converter_store_y : dict, metadata from conversion for back-conversion """ # input checks and minor coercions on X, y ########################################### enforce_univariate = self.get_tag("scitype:y") == "univariate" enforce_multivariate = self.get_tag("scitype:y") == "multivariate" enforce_index_type = self.get_tag("enforce_index_type") # checking y if y is not None: check_y_args = { "enforce_univariate": enforce_univariate, "enforce_multivariate": enforce_multivariate, "enforce_index_type": enforce_index_type, "allow_None": False, "allow_empty": True, } y = check_series(y, **check_y_args, var_name="y") self._y_mtype_last_seen = mtype(y, as_scitype="Series") # end checking y # checking X if X is not None: X = check_series(X, enforce_index_type=enforce_index_type, var_name="X") if self.get_tag("X-y-must-have-same-index"): check_equal_time_index(X, y) # end checking X # convert X & y to supported inner type, if necessary ##################################################### # retrieve supported mtypes # convert X and y to a supported internal mtype # it X/y mtype is already supported, no conversion takes place # if X/y is None, then no conversion takes place (returns None) y_inner_mtype = self.get_tag("y_inner_mtype") y_inner = convert_to( y, to_type=y_inner_mtype, as_scitype="Series", # we are dealing with series store=self._converter_store_y, ) X_inner_mtype = self.get_tag("X_inner_mtype") X_inner = convert_to( X, to_type=X_inner_mtype, as_scitype="Series", # we are dealing with series ) return X_inner, y_inner
def predict( self, fh=None, X=None, return_pred_int=False, alpha=DEFAULT_ALPHA, keep_old_return_type=True, ): """Forecast time series at future horizon. State required: Requires state to be "fitted". Accesses in self: Fitted model attributes ending in "_". self.cutoff, self._is_fitted Writes to self: Stores fh to self.fh if fh is passed and has not been passed previously. Parameters ---------- fh : int, list, np.ndarray or ForecastingHorizon Forecasting horizon X : pd.DataFrame, or 2D np.ndarray, optional (default=None) Exogeneous time series to predict from if self.get_tag("X-y-must-have-same-index"), X.index must contain fh.index return_pred_int : bool, optional (default=False) If True, returns prediction intervals for given alpha values. alpha : float or list, optional (default=0.95) Returns ------- y_pred : pd.Series, pd.DataFrame, or np.ndarray (1D or 2D) Point forecasts at fh, with same index as fh y_pred has same type as y passed in fit (most recently) y_pred_int : pd.DataFrame - only if return_pred_int=True in this case, return is 2-tuple (otherwise a single y_pred) Prediction intervals """ # handle inputs self.check_is_fitted() self._set_fh(fh) # todo deprecate NotImplementedError in v 10.0.1 if return_pred_int and not self.get_tag("capability:pred_int"): raise NotImplementedError( f"{self.__class__.__name__} does not have the capability to return " "prediction intervals. Please set return_pred_int=False. If you " "think this estimator should have the capability, please open " "an issue on sktime." ) # input check and conversion for X X_inner = self._check_X(X=X) # this is how it is supposed to be after the refactor is complete and effective if not return_pred_int: y_pred = self._predict( self.fh, X=X_inner, ) # convert to output mtype, identical with last y mtype seen y_out = convert_to( y_pred, self._y_mtype_last_seen, as_scitype="Series", store=self._converter_store_y, ) return y_out # keep following code for downward compatibility, # todo: can be deleted once refactor is completed and effective, # todo: deprecate in v 10 else: warn( "return_pred_int in predict() will be deprecated;" "please use predict_interval() instead to generate " "prediction intervals.", FutureWarning, ) if not self._has_predict_quantiles_been_refactored(): # this means the method is not refactored y_pred = self._predict( self.fh, X=X_inner, return_pred_int=return_pred_int, alpha=alpha, ) # returns old return type anyways pred_int = y_pred[1] y_pred = y_pred[0] else: # it's already refactored # opposite definition previously vs. now coverage = [1 - a for a in alpha] pred_int = self.predict_interval(fh=fh, X=X_inner, coverage=coverage) if keep_old_return_type: pred_int = _convert_new_to_old_pred_int(pred_int, alpha) # convert to output mtype, identical with last y mtype seen y_out = convert_to( y_pred, self._y_mtype_last_seen, as_scitype="Series", store=self._converter_store_y, ) return (y_out, pred_int)
np.argmax, np.any, ] X1_list_df = make_transformer_problem(n_instances=4, n_columns=4, n_timepoints=5, random_state=1, return_numpy=False) X2_list_df = make_transformer_problem(n_instances=5, n_columns=4, n_timepoints=5, random_state=2, return_numpy=False) X1_num_pan = convert_to(X1_list_df, to_type="numpy3D") X2_num_pan = convert_to(X2_list_df, to_type="numpy3D") def test_aggr(): """Test that AggrDist produces expected pre-computed result on fixtures.""" # test 3d numpy _run_aggr_dist_test(X1_num_pan, X2_num_pan) # test list of df _run_aggr_dist_test(X1_list_df, X2_list_df) def _run_aggr_dist_test(x, y): # default parametersc default_params = AggrDist(transformer=ScipyDist())
def plot_correlations( series, lags=24, alpha=0.05, zero_lag=True, acf_fft=False, acf_adjusted=True, pacf_method="ywadjusted", suptitle=None, series_title=None, acf_title="Autocorrelation", pacf_title="Partial Autocorrelation", ): """Plot series and its ACF and PACF values. Parameters ---------- series : pd.Series A time series. lags : int, default = 24 Number of lags to include in ACF and PACF plots alpha : int, default = 0.05 Alpha value used to set confidence intervals. Alpha = 0.05 results in 95% confidence interval with standard deviation calculated via Bartlett's formula. zero_lag : bool, default = True If True, start ACF and PACF plots at 0th lag acf_fft : bool, = False Whether to compute ACF via FFT. acf_adjusted : bool, default = True If True, denonimator of ACF calculations uses n-k instead of n, where n is number of observations and k is the lag. pacf_method : str, default = 'ywadjusted' Method to use in calculation of PACF. suptitle : str, default = None The text to use as the Figure's suptitle. series_title : str, default = None Used to set the title of the series plot if provided. Otherwise, series plot has no title. acf_title : str, default = 'Autocorrelation' Used to set title of ACF plot. pacf_title : str, default = 'Partial Autocorrelation' Used to set title of PACF plot. Returns ------- fig : matplotlib.figure.Figure axes : np.ndarray Array of the figure's Axe objects """ _check_soft_dependencies("matplotlib") import matplotlib.pyplot as plt series = check_y(series) series = convert_to(series, "pd.Series", "Series") # Setup figure for plotting fig = plt.figure(constrained_layout=True, figsize=(12, 8)) gs = fig.add_gridspec(2, 2) f_ax1 = fig.add_subplot(gs[0, :]) if series_title is not None: f_ax1.set_title(series_title) f_ax2 = fig.add_subplot(gs[1, 0]) f_ax3 = fig.add_subplot(gs[1, 1]) # Create expected plots on their respective Axes plot_series(series, ax=f_ax1) plot_acf( series, ax=f_ax2, lags=lags, zero=zero_lag, alpha=alpha, title=acf_title, adjusted=acf_adjusted, fft=acf_fft, ) plot_pacf( series, ax=f_ax3, lags=lags, zero=zero_lag, alpha=alpha, title=pacf_title, method=pacf_method, ) if suptitle is not None: fig.suptitle(suptitle, size="xx-large") return fig, np.array(fig.get_axes())
def transform(self, X, y=None, Z=None): """Transform X and return a transformed version. State required: Requires state to be "fitted". Accesses in self: Fitted model attributes ending in "_". self._is_fitted Parameters ---------- X : Series or Panel, any supported mtype Data to be transformed, of python type as follows: Series: pd.Series, pd.DataFrame, or np.ndarray (1D or 2D) Panel: pd.DataFrame with 2-level MultiIndex, list of pd.DataFrame, nested pd.DataFrame, or pd.DataFrame in long/wide format subject to sktime mtype format specifications, for further details see examples/AA_datatypes_and_datasets.ipynb y : Series or Panel, default=None Additional data, e.g., labels for transformation Z : possible alias for X; should not be passed when X is passed alias Z will be deprecated in version 0.10.0 Returns ------- transformed version of X type depends on type of X and scitype:transform-output tag: | | `transform` | | | `X` | `-output` | type of return | |----------|--------------|------------------------| | `Series` | `Primitives` | `pd.DataFrame` (1-row) | | `Panel` | `Primitives` | `pd.DataFrame` | | `Series` | `Series` | `Series` | | `Panel` | `Series` | `Panel` | | `Series` | `Panel` | `Panel` | instances in return correspond to instances in `X` combinations not in the table are currently not supported Explicitly, with examples: if `X` is `Series` (e.g., `pd.DataFrame`) and `transform-output` is `Series` then the return is a single `Series` of the same mtype Example: detrending a single series if `X` is `Panel` (e.g., `pd-multiindex`) and `transform-output` is `Series` then the return is `Panel` with same number of instances as `X` (the transformer is applied to each input Series instance) Example: all series in the panel are detrended individually if `X` is `Series` or `Panel` and `transform-output` is `Primitives` then the return is `pd.DataFrame` with as many rows as instances in `X` Example: i-th row of the return has mean and variance of the i-th series if `X` is `Series` and `transform-output` is `Panel` then the return is a `Panel` object of type `pd-multiindex` Example: i-th instance of the output is the i-th window running over `X` """ X = _handle_alias(X, Z) # check whether is fitted, unless fit-in-transform is true if self.get_tag("fit-in-transform"): self.fit(X=X, y=y, Z=Z) else: self.check_is_fitted() # input checks and minor coercions on X, y ########################################### valid, msg, metadata = check_is(X, mtype=self.ALLOWED_INPUT_MTYPES, return_metadata=True, var_name="X") if not valid: ValueError(msg) # checking X enforce_univariate = self.get_tag("univariate-only") if enforce_univariate and not metadata["is_univariate"]: ValueError("X must be univariate but is not") # retrieve mtypes/scitypes of all objects ######################################### X_input_mtype = mtype(X) X_input_scitype = mtype_to_scitype(X_input_mtype) y_input_mtype = mtype(y) y_input_scitype = mtype_to_scitype(y_input_mtype) output_scitype = self.get_tag("scitype:transform-output") X_inner_mtype = self.get_tag("X_inner_mtype") if not isinstance(X_inner_mtype, list): X_inner_mtype = [X_inner_mtype] X_inner_scitypes = list( set([mtype_to_scitype(mt) for mt in X_inner_mtype])) y_inner_mtype = self.get_tag("y_inner_mtype") if not isinstance(y_inner_mtype, list): y_inner_mtype = [y_inner_mtype] # y_inner_scitypes = list(set([mtype_to_scitype(mt) for mt in y_inner_mtype])) # treating Series vs Panel conversion for X ########################################### # there are three cases to treat: # 1. if the internal _fit supports X's scitype, move on to mtype conversion # 2. internal only has Panel but X is Series: consider X as one-instance Panel # 3. internal only has Series but X is Panel: loop over instances # currently this is enabled by conversion to df-list mtype # and this does not support y (unclear what should happen here) # 1. nothing to do - simply don't enter any of the ifs below # the "ifs" for case 2 and 3 below are skipped under the condition # X_input_scitype in X_inner_scitypes # case 2 has an "else" which remembers that it wasn't entered # 2. internal only has Panel but X is Series: consider X as one-instance Panel if (X_input_scitype == "Series" and "Series" not in X_inner_scitypes and "Panel" in X_inner_scitypes): # convert the Series X to a one-element Panel X = convert_Series_to_Panel(X) # remember that we converted the Series to a one-element Panel X_was_Series = True else: # remember that we didn't convert a Series to a one-element Panel X_was_Series = False # 3. internal only has Series but X is Panel: loop over instances if (X_input_scitype == "Panel" and "Panel" not in X_inner_scitypes and "Series" in X_inner_scitypes): if y is not None: ValueError( "no default behaviour if _fit does not support Panel, " " but X is Panel and y is not None") X = convert_to(X, to_type="df-list", as_scitype="Panel") if self.get_tag("fit-in-transform"): Xt = [clone(self).transform(Xi) for Xi in X] else: transformers = self.transformers_ if len(transformers) != len(X): raise RuntimeError( "found different number of instances in transform than in fit" ) else: Xt = [ transformers[i].transform(X[i]) for i in range(len(X)) ] # now we have a list of transformed instances # if the output is Series, Xt is a Panel and we convert back if output_scitype == "Series": Xt = convert_to(Xt, to_type=X_input_mtype, as_scitype="Panel") # if the output is Primitives, we have a list of one-row dataframes # we concatenate those and overwrite the index with that of X elif output_scitype == "Primitives": Xt = pd.concat(Xt) Xt.index = X.index return Xt # convert X/y to supported inner type, if necessary ################################################### # variables for the scitype of the current X (possibly converted) # y wasn't converted so we can use y_input_scitype X_mtype = mtype(X) X_scitype = mtype_to_scitype(X_mtype) # subset to the mtypes that are of the same scitype as X/y X_inner_mtype = [ mt for mt in X_inner_mtype if mtype_to_scitype(mt) == X_scitype ] y_inner_mtype = [ mt for mt in y_inner_mtype if mtype_to_scitype(mt) == y_input_scitype ] # convert X and y to a supported internal type # if X/y type is already supported, no conversion takes place X_inner = convert_to( X, to_type=X_inner_mtype, as_scitype=X_scitype, ) y_inner = convert_to( y, to_type=y_inner_mtype, as_scitype=y_input_scitype, ) # carry out the transformation ################################################### # todo: uncomment this once Z is completely gone # Xt = self._transform(X=X_inner, y=y_inner) # less robust workaround until then Xt = self._transform(X_inner, y_inner) # convert transformed X back to input mtype ########################################### # if we converted Series to "one-instance-Panel", revert that if X_was_Series and output_scitype == "Series": Xt = convert_Panel_to_Series(Xt) if output_scitype == "Series": Xt = convert_to( Xt, to_type=X_input_mtype, as_scitype=X_input_scitype, ) elif output_scitype == "Primitives": # we "abuse" the Series converter to ensure df output Xt = convert_to( Xt, to_type="pd.DataFrame", as_scitype="Series", ) else: # output_scitype is "Panel" and no need for conversion pass return Xt
def fit(self, X, y=None, Z=None): """Fit transformer to X, optionally to y. State change: Changes state to "fitted". Writes to self: Sets is_fitted flag to True. Sets fitted model attributes ending in "_". Parameters ---------- X : Series or Panel, any supported mtype Data to fit transform to, of python type as follows: Series: pd.Series, pd.DataFrame, or np.ndarray (1D or 2D) Panel: pd.DataFrame with 2-level MultiIndex, list of pd.DataFrame, nested pd.DataFrame, or pd.DataFrame in long/wide format subject to sktime mtype format specifications, for further details see examples/AA_datatypes_and_datasets.ipynb y : Series or Panel, default=None Additional data, e.g., labels for transformation Z : possible alias for X; should not be passed when X is passed alias Z will be deprecated in version 0.10.0 Returns ------- self : a fitted instance of the estimator """ X = _handle_alias(X, Z) self._is_fitted = False # skip everything if fit-in-transform is True if self.get_tag("fit-in-transform"): self._is_fitted = True return self # input checks and minor coercions on X, y ########################################### valid, msg, metadata = check_is(X, mtype=self.ALLOWED_INPUT_MTYPES, return_metadata=True, var_name="X") if not valid: raise ValueError(msg) # checking X enforce_univariate = self.get_tag("univariate-only") if enforce_univariate and not metadata["is_univariate"]: raise ValueError("X must be univariate but is not") # retrieve mtypes/scitypes of all objects ######################################### X_input_mtype = mtype(X) X_input_scitype = mtype_to_scitype(X_input_mtype) y_input_mtype = mtype(y) y_input_scitype = mtype_to_scitype(y_input_mtype) X_inner_mtype = self.get_tag("X_inner_mtype") if not isinstance(X_inner_mtype, list): X_inner_mtype = [X_inner_mtype] X_inner_scitypes = list( set([mtype_to_scitype(mt) for mt in X_inner_mtype])) y_inner_mtype = self.get_tag("y_inner_mtype") if not isinstance(y_inner_mtype, list): y_inner_mtype = [y_inner_mtype] # y_inner_scitypes = list(set([mtype_to_scitype(mt) for mt in y_inner_mtype])) # treating Series vs Panel conversion for X ########################################### # there are three cases to treat: # 1. if the internal _fit supports X's scitype, move on to mtype conversion # 2. internal only has Panel but X is Series: consider X as one-instance Panel # 3. internal only has Series but X is Panel: auto-vectorization over instances # currently, this is enabled by conversion to df-list mtype # auto-vectorization is not supported if y is passed # individual estimators that vectorize over y must implement individually # 1. nothing to do - simply don't enter any of the ifs below # 2. internal only has Panel but X is Series: consider X as one-instance Panel if X_input_scitype == "Series" and "Series" not in X_inner_scitypes: X = convert_Series_to_Panel(X) # 3. internal only has Series but X is Panel: loop over instances elif X_input_scitype == "Panel" and "Panel" not in X_inner_scitypes: if y is not None: raise ValueError( "no default behaviour if _fit does not support Panel, " " but X is Panel and y is not None") X = convert_to(X, to_type="df-list", as_scitype="Panel") # this fits one transformer per instance self.transformers_ = [clone(self).fit(Xi) for Xi in X] # recurse and leave function - recursion does input checks/conversion # also set is_fitted flag to True since we leave function here self._is_fitted = True return self X_mtype = mtype(X) X_scitype = mtype_to_scitype(X_mtype) # for debugging, exception if the conversion fails (this should never happen) if X_scitype not in X_inner_scitypes: raise RuntimeError( "conversion of X to X_inner unsuccessful, unexpected") # convert X/y to supported inner type, if necessary ################################################### # subset to the mtypes that are of the same scitype as X/y X_inner_mtype = [ mt for mt in X_inner_mtype if mtype_to_scitype(mt) == X_scitype ] y_inner_mtype = [ mt for mt in y_inner_mtype if mtype_to_scitype(mt) == y_input_scitype ] # convert X and y to a supported internal type # if X/y type is already supported, no conversion takes place X_inner = convert_to( X, to_type=X_inner_mtype, as_scitype=X_scitype, ) y_inner = convert_to( y, to_type=y_inner_mtype, as_scitype=y_input_scitype, ) # todo: uncomment this once Z is completely gone # self._fit(X=X_inner, y=y_inner) # less robust workaround until then self._fit(X_inner, y_inner) self._is_fitted = True return self
def get_window(obj, window_length=None, lag=0): """Slice obj to the time index window with given length and lag. Returns time series or time series panel with time indices strictly greater than cutoff - lag - window_length, and equal or less than cutoff - lag. Cutoff if of obj, as determined by get_cutoff. Parameters ---------- obj : sktime compatible time series data container or None if not None, must be of one of the following mtypes: pd.Series, pd.DataFrame, np.ndarray, of Series scitype pd.multiindex, numpy3D, nested_univ, df-list, of Panel scitype pd_multiindex_hier, of Hierarchical scitype window_length : int or timedelta, optional, default=-inf must be int if obj is int indexed, timedelta if datetime indexed length of the window to slice to. Default = window of infinite size lag : int or timedelta, optional, default = 0 must be int if obj is int indexed, timedelta if datetime indexed lag of the latest time in the window, with respect to cutoff of obj Returns ------- obj sub-set to time indices in the semi-open interval (cutoff - window_length - lag, cutoff - lag) None if obj was None """ from sktime.datatypes import check_is_scitype, convert_to if window_length is None or obj is None: return obj valid, _, metadata = check_is_scitype( obj, scitype=["Series", "Panel", "Hierarchical"], return_metadata=True) if not valid: raise ValueError( "obj must be of Series, Panel, or Hierarchical scitype") obj_in_mtype = metadata["mtype"] obj = convert_to(obj, GET_LATEST_WINDOW_SUPPORTED_MTYPES) # numpy3D (Panel) or np.npdarray (Series) if isinstance(obj, np.ndarray): obj_len = len(obj) window_start = max(-window_length - lag, -obj_len) window_end = max(-lag, -obj_len) if window_end == 0: return obj[window_start:] else: return obj[window_start:window_end] # pd.DataFrame(Series), pd-multiindex (Panel) and pd_multiindex_hier (Hierarchical) if isinstance(obj, pd.DataFrame): cutoff = get_cutoff(obj) win_start_excl = cutoff - window_length - lag win_end_incl = cutoff - lag if not isinstance(obj.index, pd.MultiIndex): time_indices = obj.index else: time_indices = obj.index.get_level_values(-1) win_select = (time_indices > win_start_excl) & (time_indices <= win_end_incl) obj_subset = obj.iloc[win_select] return convert_to(obj_subset, obj_in_mtype) raise ValueError( "bug in get_latest_window, unreachable condition, ifs should be exhaustive" )
def _transform(self, X, y=None): """Transform X. Transform X, segments time-series in each column into random intervals using interval indices generated during `fit` and extracts features from each interval. Parameters ---------- X : nested pandas.DataFrame of shape [n_instances, n_features] Nested dataframe with time-series in cells. Returns ------- Xt : pandas.DataFrame Transformed pandas DataFrame with n_instances rows and one column for each generated interval. """ # Check input of feature calculators, i.e list of functions to be # applied to time-series features = _check_features(self.features) X = convert_to(X, "numpy3D") # Check that the input is of the same shape as the one passed # during fit. if X.shape[1] != self.input_shape_[1]: raise ValueError( "Number of columns of input is different from what was seen in `fit`" ) # Input validation # if not all([np.array_equal(fit_idx, trans_idx) for trans_idx, # fit_idx in zip(check_equal_index(X), # raise ValueError('Indexes of input time-series are different # from what was seen in `fit`') n_instances, _, _ = X.shape n_features = len(features) intervals = self.intervals_ n_intervals = len(intervals) # Compute features on intervals. Xt = np.zeros((n_instances, n_features * n_intervals)) # Allocate output array # for transformed data columns = [] i = 0 for func in features: # TODO generalise to series-to-series functions and function kwargs for start, end in intervals: interval = X[:, :, start:end] # Try to use optimised computations over axis if possible, # otherwise iterate over rows. try: Xt[:, i] = func(interval, axis=-1).squeeze() except TypeError as e: if ( str(e) == f"{func.__name__}() got an unexpected " f"keyword argument 'axis'" ): Xt[:, i] = np.apply_along_axis( func, axis=2, arr=interval ).squeeze() else: raise i += 1 columns.append(f"{start}_{end}_{func.__name__}") Xt = pd.DataFrame(Xt) Xt.columns = columns return Xt
def transform_single_feature(self, X, feature, case_id=None): """Transform data into a specified catch22 feature. Parameters ---------- X : np.ndarray, 3D, in numpy3D mtype format or other sktime data container of Panel scitype feature : int, catch22 feature id or String, catch22 feature name. case_id : int, identifier for the current set of cases. If the case_id is not None and the same as the previously used case_id, calculations from previous features will be reused. Returns ------- Numpy array containing a catch22 feature for each input series. """ if isinstance(feature, (int, np.integer)) or isinstance(feature, (float, float)): if feature > 21 or feature < 0: raise ValueError("Invalid catch22 feature ID") elif isinstance(feature, str): if feature in feature_names: feature = feature_names.index(feature) else: raise ValueError("Invalid catch22 feature name") else: raise ValueError("catch22 feature name or ID required") if isinstance(X, pd.DataFrame): X = convert_to(X, "numpy3D") if len(X.shape) > 2: n_instances, n_dims, series_length = X.shape if n_dims > 1: raise ValueError( "transform_single_feature can only handle univariate series " "currently.") X = np.reshape(X, (n_instances, -1)) else: n_instances, series_length = X.shape if case_id is not None: if case_id != self._case_id: self._case_id = case_id self._st_n_instances = n_instances self._st_series_length = series_length self._outlier_series = [None] * n_instances self._smin = [None] * n_instances self._smax = [None] * n_instances self._smean = [None] * n_instances self._fft = [None] * n_instances self._ac = [None] * n_instances self._acfz = [None] * n_instances else: if (n_instances != self._st_n_instances or series_length != self._st_series_length): raise ValueError( "Catch22: case_is the same, but n_instances and " "series_length do not match last seen for single " "feature transform.") c22_list = Parallel(n_jobs=self.n_jobs)( delayed(self._transform_case_single)( X[i], feature, case_id, i, ) for i in range(n_instances)) if self.replace_nans: c22_list = np.nan_to_num(c22_list, False, 0, 0, 0) return np.asarray(c22_list)
def plot_series(*series, labels=None, markers=None, x_label=None, y_label=None, ax=None): """Plot one or more time series. Parameters ---------- series : pd.Series or iterable of pd.Series One or more time series labels : list, default = None Names of series, will be displayed in figure legend markers: list, default = None Markers of data points, if None the marker "o" is used by default. The length of the list has to match with the number of series. Returns ------- fig : plt.Figure ax : plt.Axis """ _check_soft_dependencies("matplotlib", "seaborn") import matplotlib.pyplot as plt from matplotlib.ticker import FuncFormatter, MaxNLocator from matplotlib.cbook import flatten import seaborn as sns for y in series: check_y(y) series = list(series) series = [convert_to(y, "pd.Series", "Series") for y in series] n_series = len(series) _ax_kwarg_is_none = True if ax is None else False # labels if labels is not None: if n_series != len(labels): raise ValueError("""There must be one label for each time series, but found inconsistent numbers of series and labels.""") legend = True else: labels = ["" for _ in range(n_series)] legend = False # markers if markers is not None: if n_series != len(markers): raise ValueError("""There must be one marker for each time series, but found inconsistent numbers of series and markers.""") else: markers = ["o" for _ in range(n_series)] # create combined index index = series[0].index for y in series[1:]: # check index types check_consistent_index_type(index, y.index) index = index.union(y.index) # generate integer x-values xs = [np.argwhere(index.isin(y.index)).ravel() for y in series] # create figure if no Axe provided for plotting if _ax_kwarg_is_none: fig, ax = plt.subplots(1, figsize=plt.figaspect(0.25)) colors = sns.color_palette("colorblind", n_colors=n_series) # plot series for x, y, color, label, marker in zip(xs, series, colors, labels, markers): # scatter if little data is available or index is not complete if len(x) <= 3 or not np.array_equal(np.arange(x[0], x[-1] + 1), x): plot_func = sns.scatterplot else: plot_func = sns.lineplot plot_func(x=x, y=y, ax=ax, marker=marker, label=label, color=color) # combine data points for all series xs_flat = list(flatten(xs)) # set x label of data point to the matching index def format_fn(tick_val, tick_pos): if int(tick_val) in xs_flat: return index[int(tick_val)] else: return "" # dynamically set x label ticks and spacing from index labels ax.xaxis.set_major_formatter(FuncFormatter(format_fn)) ax.xaxis.set_major_locator(MaxNLocator(integer=True)) # Label the x and y axes if x_label is not None: ax.set_xlabel(x_label) _y_label = y_label if y_label is not None else series[0].name ax.set_ylabel(_y_label) if legend: ax.legend() if _ax_kwarg_is_none: return fig, ax else: return ax
def predict(self, fh=None, X=None, return_pred_int=False, alpha=DEFAULT_ALPHA): """Forecast time series at future horizon. State required: Requires state to be "fitted". Accesses in self: Fitted model attributes ending in "_". self.cutoff, self._is_fitted Writes to self: Stores fh to self.fh if fh is passed and has not been passed in _fit. Parameters ---------- fh : int, list, np.ndarray or ForecastingHorizon Forecasting horizon X : pd.DataFrame, or 2D np.ndarray, optional (default=None) Exogeneous time series to predict from if self.get_tag("X-y-must-have-same-index"), X.index must contain fh.index return_pred_int : bool, optional (default=False) If True, returns prediction intervals for given alpha values. alpha : float or list, optional (default=0.95) Returns ------- y_pred : pd.Series, pd.DataFrame, or np.ndarray (1D or 2D) Point forecasts at fh, with same index as fh y_pred has same type as y passed in fit (most recently) y_pred_int : pd.DataFrame - only if return_pred_int=True in this case, return is 2-tuple (otherwise a single y_pred) Prediction intervals """ # handle inputs self.check_is_fitted() self._set_fh(fh) if return_pred_int and not self.get_tag("capability:pred_int"): raise NotImplementedError( f"{self.__class__.__name__} does not have the capability to return " "prediction intervals. Please set return_pred_int=False. If you " "think this estimator should have the capability, please open " "an issue on sktime.") # input check and conversion for X X_inner = self._check_X(X=X) # this should be here, but it breaks the ARIMA forecasters # that is because check_alpha converts to list, but ARIMA forecaster # doesn't do the check, and needs it as a float or it breaks # todo: needs fixing in ARIMA and AutoARIMA # alpha = check_alpha(alpha) y_pred = self._predict( self.fh, X=X_inner, return_pred_int=return_pred_int, alpha=alpha, ) # todo: clean this up, predictive intervals should be returned by other method if return_pred_int: pred_int = y_pred[1] y_pred = y_pred[0] # convert to output mtype, identical with last y mtype seen y_out = convert_to( y_pred, self._y_mtype_last_seen, as_scitype="Series", store=self._converter_store_y, ) if return_pred_int: return (y_out, pred_int) else: return y_out