def test_mase(self): mase_smyl = round(MASE(self.test_data, self.smyl_predictions, scale), 3) mase_montero = round( MASE(self.test_data, self.montero_predictions, scale), 3) mase_naive = round(MASE(self.test_data, self.naive_predictions, scale), 3) mase_naive2 = round( MASE(self.test_data, self.naive2_predictions, scale), 3) self.assertEqual(mase_smyl, 1.536, "Should be 1.536") self.assertEqual(mase_montero, 1.551, "Should be 1.551") self.assertEqual(mase_naive, 2.044, "Should be 2.044") self.assertEqual(mase_naive2, 1.912, "Should be 1.912")
def test_naive2(self): smape_naive2 = SMAPE(self.test_data, self.naive2_predictions) mase_naive2 = MASE(self.test_data, self.naive2_predictions, scale) owa_naive2 = round(OWA(mase_naive2, smape_naive2), 3) self.assertEqual(round(smape_naive2, 3), 13.564, "Should be 13.564") self.assertEqual(round(mase_naive2, 3), 1.912, "Should be 1.912") self.assertEqual(owa_naive2, 1.000, "Should be 1.000")
def test_naive(self): smape_naive = SMAPE(self.test_data, self.naive_predictions) mase_naive = MASE(self.test_data, self.naive_predictions, scale) owa_naive = round(OWA(mase_naive, smape_naive), 3) self.assertEqual(round(smape_naive, 3), 14.208, "Should be 14.208") self.assertEqual(round(mase_naive, 3), 2.044, "Should be 2.044") self.assertEqual(owa_naive, 1.058, "Should be 1.058")
def test_montero(self): smape_montero = SMAPE(self.test_data, self.montero_predictions) mase_montero = MASE(self.test_data, self.montero_predictions, scale) owa_montero = round(OWA(mase_montero, smape_montero), 3) self.assertEqual(round(smape_montero, 3), 11.720, "Should be 11.720") self.assertEqual(round(mase_montero, 3), 1.551, "Should be 1.551") self.assertEqual(owa_montero, 0.838, "Should be 0.838")
def test_smyl(self): smape_smyl = SMAPE(self.test_data, self.smyl_predictions) mase_smyl = MASE(self.test_data, self.smyl_predictions, scale) owa_smyl = OWA(mase_smyl, smape_smyl) self.assertEqual(round(smape_smyl, 3), 11.374, "Should be 11.374") self.assertEqual(round(mase_smyl, 3), 1.536, "Should be 1.536") self.assertEqual(round(owa_smyl, 3), 0.821, "Should be 0.821")
def test_owa(self): smape_smyl = SMAPE(self.test_data, self.smyl_predictions) smape_montero = SMAPE(self.test_data, self.montero_predictions) smape_naive = SMAPE(self.test_data, self.naive_predictions) smape_naive2 = SMAPE(self.test_data, self.naive2_predictions) mase_smyl = MASE(self.test_data, self.smyl_predictions, scale) mase_montero = MASE(self.test_data, self.montero_predictions, scale) mase_naive = MASE(self.test_data, self.naive_predictions, scale) mase_naive2 = MASE(self.test_data, self.naive2_predictions, scale) owa_smyl = round(OWA(mase_smyl, smape_smyl), 3) owa_montero = round(OWA(mase_montero, smape_montero), 3) owa_naive = round(OWA(mase_naive, smape_naive), 3) owa_naive2 = round(OWA(mase_naive2, smape_naive2), 3) self.assertEqual(owa_smyl, 0.821, "Should be 0.821") self.assertEqual(owa_montero, 0.838, "Should be 0.838") self.assertEqual(owa_naive, 1.058, "Should be 1.058") self.assertEqual(owa_naive2, 1.000, "Should be 1.000")
def score_M4( predictions: np.array, df_results_name: str = "GPTime/results/M4/test.csv", val:bool=False ) -> Dict: """ Calculating the OWA. Return dict of scores of subfrequencies also.""" frequency_metrics: Dict[str, Dict[str, float]] = {} # Read in and prepare the data if val: all_test_files = glob.glob(cfg.path.m4_val_test + "*") all_train_files = glob.glob(cfg.path.m4_val_train + "*") else: all_test_files = glob.glob(cfg.path.m4_test + "*") all_train_files = glob.glob(cfg.path.m4_train + "*") # Removing hourly for the zero-shot part #all_train_files = [fname for fname in all_train_files if "hourly" not in fname.lower()] #all_test_files = [fname for fname in all_test_files if "hourly" not in fname.lower()] all_test_files.sort() all_train_files.sort() crt_pred_index = 0 tot_mase = 0.0 tot_smape = 0.0 for fname_train, fname_test in zip(all_train_files, all_test_files): #logger.info(fname_test) #logger.info(fname_train) df_train = pd.read_csv(fname_train, index_col=0) df_test = pd.read_csv(fname_test, index_col=0) period_num, period_str = period_from_fname( fname=fname_train, period_dict=cfg.scoring.m4.periods ) horizon = cfg.scoring.m4.horizons[period_str] Y = df_test.values[:, :horizon] index = crt_pred_index + Y.shape[0] predicted = predictions[crt_pred_index:index, :horizon] #logger.info(f"predicted.shape: {predicted.shape}") assert np.sum(np.isnan(Y)) == 0, "NaNs in Y" assert np.sum(np.isnan(predicted)) == 0, f"NaNs in predictions: {np.where(np.isnan(predicted))}" assert Y.shape == predicted.shape, "Y and predicted have different shapes" #scale = Scaler().fit(df_train.values, freq=period_num).scale_.flatten() scale = MASEScaler().fit(df_train.values, freq=period_num).scale_.flatten() mase_freq = MASE(Y, predicted, scale) smape_freq = SMAPE(Y, predicted) owa_freq = OWA(mase=mase_freq, smape=smape_freq, freq=period_str) tot_mase += mase_freq * Y.shape[0] tot_smape += smape_freq * Y.shape[0] #logger.debug(f"mase_freq = {mase_freq}") #logger.debug(f"smape_freq = {smape_freq}") frequency_metrics[period_str] = {} frequency_metrics[period_str]["MASE"] = mase_freq frequency_metrics[period_str]["SMAPE"] = smape_freq frequency_metrics[period_str]["OWA"] = owa_freq crt_pred_index += Y.shape[0] tot_mase = tot_mase / crt_pred_index tot_smape = tot_smape / crt_pred_index tot_owa = OWA(tot_mase, tot_smape, freq="global") frequency_metrics["GLOBAL"] = {} frequency_metrics["GLOBAL"]["MASE"] = tot_mase frequency_metrics["GLOBAL"]["SMAPE"] = tot_smape frequency_metrics["GLOBAL"]["OWA"] = tot_owa df = pd.DataFrame(frequency_metrics).T df.to_csv(df_results_name) return frequency_metrics
def score_M4(predictions: np.array, df_results_name: str = "GPTime/results/M4/test.csv") -> Dict: """ Calculating the OWA. Return dict of scores of subfrequencies also.""" """ metrics = {} frequency_metrics = {} for metric in cfg.scoring.metrics.keys(): if cfg.scoring.metrics[metric]: metrics[metric] = [] frequency_metrics[metric] = [] """ frequency_metrics: Dict[str, Dict[str, float]] = {} # Read in and prepare the data all_test_files = glob.glob(cfg.path.m4_test + "*") all_train_files = glob.glob(cfg.path.m4_test + "*") crt_pred_index = 0 tot_mase = 0.0 tot_smape = 0.0 for fname_train, fname_test in zip(all_train_files, all_test_files): df_train = pd.read_csv(fname_train, index_col=0) df_test = pd.read_csv(fname_test, index_col=0) period_num, period_str = period_num_str_file( fname=fname_train, period_dict=cfg.scoring.m4.periods) horizon = cfg.scoring.m4.horizons[period_str] scale = (df_train.diff( periods=period_num, axis=1).abs().mean(axis=1).reset_index(drop=True)).values Y = df_test.values[:, :horizon] index = crt_pred_index + Y.shape[0] predicted = predictions[crt_pred_index:index, :horizon] assert np.sum(np.isnan(Y)) == 0, "NaNs in Y" assert np.sum(np.isnan(predicted)) == 0, "NaNs in predictions" assert Y.shape == predicted.shape, "Y and predicted have different shapes" mase_freq = MASE(Y, predicted, scale) smape_freq = SMAPE(Y, predicted) owa_freq = OWA(mase=mase_freq, smape=smape_freq, freq=period_str) tot_mase += mase_freq * Y.shape[0] tot_smape += smape_freq * Y.shape[0] frequency_metrics[period_str] = {} frequency_metrics[period_str]["MASE"] = mase_freq frequency_metrics[period_str]["SMAPE"] = smape_freq frequency_metrics[period_str]["OWA"] = owa_freq crt_pred_index += Y.shape[0] tot_mase = tot_mase / crt_pred_index tot_smape = tot_smape / crt_pred_index tot_owa = OWA(tot_mase, tot_smape, freq="global") frequency_metrics["GLOBAL"] = {} frequency_metrics["GLOBAL"]["MASE"] = tot_mase frequency_metrics["GLOBAL"]["SMAPE"] = tot_smape frequency_metrics["GLOBAL"]["OWA"] = tot_owa df = pd.DataFrame(frequency_metrics).T df.to_csv(df_results_name) return frequency_metrics