def mase_loss(y_test, y_pred, y_train, sp=1): """Mean absolute scaled error. This scale-free error metric can be used to compare forecast methods on a single series and also to compare forecast accuracy between series. This metric is well suited to intermittent-demand series because it never gives infinite or undefined values. Parameters ---------- y_test : pandas Series of shape = (fh,) where fh is the forecasting horizon Ground truth (correct) target values. y_pred : pandas Series of shape = (fh,) Estimated target values. y_train : pandas Series of shape = (n_obs,) Observed training values. sp : int Seasonal periodicity of training data. Returns ------- loss : float MASE loss References ---------- ..[1] Hyndman, R. J. (2006). "Another look at measures of forecast accuracy", Foresight, Issue 4. """ # input checks y_test = check_y(y_test) y_pred = check_y(y_pred) y_train = check_y(y_train) check_equal_time_index(y_test, y_pred) # check if training set is prior to test set if y_train is not None: check_time_index(y_train.index) if y_train.index.max() >= y_test.index.min(): raise ValueError("Found `y_train` with time index which is not " "before time index of `y_test`") # naive seasonal prediction y_train = np.asarray(y_train) y_pred_naive = y_train[:-sp] # mean absolute error of naive seasonal prediction mae_naive = np.mean(np.abs(y_train[sp:] - y_pred_naive)) # if training data is flat, mae may be zero, # return np.nan to avoid divide by zero error # and np.inf values if mae_naive == 0: return np.nan else: return np.mean(np.abs(y_test - y_pred)) / mae_naive
def _get_duration(x, y=None, coerce_to_int=False, unit=None): """Compute duration between the time indices. Parameters ---------- x : pd.Index, pd.Timestamp, pd.Period, int y : pd.Timestamp, pd.Period, int, optional (default=None) coerce_to_int : bool If True, duration is returned as integer value for given unit unit : str Time unit Returns ------- ret : duration type Duration """ if y is None: x = check_time_index(x) duration = x[-1] - x[0] else: assert isinstance(x, (int, np.integer, pd.Period, pd.Timestamp)) # check types allowing (np.integer, int) combinations to pass assert type(x) is type(y) or (isinstance(x, (np.integer, int)) and isinstance(x, (np.integer, int))) duration = x - y # coerce to integer result for given time unit if coerce_to_int and isinstance( x, (pd.PeriodIndex, pd.DatetimeIndex, pd.Period, pd.Timestamp)): if unit is None: # try to get the unit from the data if not given unit = _get_freq(x) duration = _coerce_duration_to_int(duration, freq=unit) return duration
def _check_y(y: ACCEPTED_Y_TYPES) -> pd.Index: """Coerce input to `split` function. Parameters ---------- y : pd.Series, pd.DataFrame, np.ndarray, or pd.Index coerced and checked version of input y Returns ------- check_time_index(y_index), where y_index is as follows: if y is pd.Series, pd.DataFrame, y_index = y.index if y is pd.Index, y_index = y itself if y is np.ndarray, y_index = pd.Index(y) Raises ------ TypeError if y is not of one of the expected types NotImplementedError if y_index is not a supported sktime index type ValueError if y_index is not monotonous """ if isinstance(y, (pd.Series, pd.DataFrame)): y_index = y.index elif isinstance(y, np.ndarray): y_index = pd.Index(y.flatten()) elif isinstance(y, pd.Index): y_index = y else: raise TypeError( "Input to _check_y must be pd.Series, pd.DataFrame, np.ndarray, or pd.Index" ) return check_time_index(index=y_index)
def _check_y(y): """Check input to `split` function.""" if isinstance(y, (pd.Series, pd.DataFrame)): y_index = y.index if isinstance(y, np.ndarray): y_index = pd.Index(y) return check_time_index(y_index)
def _check_y(y): # allow for pd.Series if isinstance(y, pd.Series): y = y.index return check_time_index(y)
def _check_y(y): """Check input to `split` function""" if isinstance(y, pd.Series): y = y.index return check_time_index(y)