def test_MP_class(self): import torch device = 'cuda' if torch.cuda.is_available() else 'cpu' path_monthly = os.path.join('test','Data','Monthly') dic_monthly = DP.read_file(path_monthly) n_assets = 1 time_series_group = [] for i in range(n_assets): df = dic_monthly[list(dic_monthly.keys())[i]] ds = DataSeries('ETF', 'monthly', df[0]) time_series_group.append(ds) input_dc = DataCollection('test1', time_series_group) m = ModelESRNN(seasonality = [12], input_size = 4, output_size = 12, device=device) train_dc, test_dc = input_dc.split(numTest = 12) m.train(train_dc) forecast_dc = m.predict(test_dc) # train_dc.to_df().to_csv('insample.csv') test_dc.to_df().to_csv('test.csv') # forecast_dc.to_df().to_csv('forecast.csv') mn = MN.ModelNaive2(2, train_dc) naive2_dc = mn.fit_and_generate_prediction(12, 'MS') naive2_dc.to_df().to_csv('naive.csv') mp = MP.ModelPerformance("test model performance", 2, test_dc, forecast_dc, train_dc, naive2_dc) mase = MP.MASE(test_dc.to_df(), forecast_dc.to_df(), train_dc.to_df(), 2) smape = MP.sMAPE(test_dc.to_df(), forecast_dc.to_df()) mape = MP.MAPE(mp.y_df, mp.y_hat_df) r2 = MP.R2(test_dc.to_df(), forecast_dc.to_df()) rmse = MP.RMSE(test_dc.to_df(), forecast_dc.to_df()) owa = MP.OWA(test_dc.to_df(), forecast_dc.to_df(), train_dc.to_df(), naive2_dc.to_df(), 2) u1 = MP.Theil_U1(test_dc.to_df(), forecast_dc.to_df()) u2 = MP.Theil_U2(test_dc.to_df(), forecast_dc.to_df()) mp.MASE() mp.sMAPE() mp.MAPE() mp.R2() mp.RMSE() mp.OWA() mp.Theil_U1() mp.Theil_U2() self.assertAlmostEqual(mp.metrics['sMAPE'], smape) self.assertAlmostEqual(mp.metrics['MAPE'], mape) self.assertAlmostEqual(mp.metrics['R2'], r2) self.assertAlmostEqual(mp.metrics['RMSE'], rmse) self.assertAlmostEqual(mp.metrics['MASE'], mase) self.assertAlmostEqual(mp.metrics['OWA'], owa) self.assertAlmostEqual(mp.metrics['Theil_U1'], u1) self.assertAlmostEqual(mp.metrics['Theil_U2'], u2)
def test_RMSE(self): numerator = ((self.actual - self.predict)**2).sum() denominator = len(self.actual) RMSE = np.sqrt(numerator / denominator).item() ans = MP.RMSE(self.actual, self.predict) self.assertAlmostEqual(ans, RMSE)
def test_R2(self): SSR = ((self.actual - self.predict)**2).sum().item() SST = ((self.actual - self.actual.mean())**2).sum().item() R2 = 1 - SSR/SST ans = MP.R2(self.actual, self.predict) self.assertAlmostEqual(ans, R2)
def test_sMAPE(self): diff = abs(self.actual - self.predict) abs_sum = abs(self.actual) + abs(self.predict) smape = (diff / abs_sum).mean().item() ans = MP.sMAPE(self.actual, self.predict) self.assertAlmostEqual(ans, smape)
def test_MAPE(self): pct_chg = abs((self.actual - self.predict) / self.actual) mape = pct_chg.mean().item() ans = MP.MAPE(self.actual, self.predict) self.assertAlmostEqual(mape, 0.03431801341798592) self.assertAlmostEqual(ans, mape)
def test_check_input_df_tickers(self): l = [] l.append(self.actual) l.append(self.predict) self.assertEqual(MP.check_input_df_tickers(l), True) y_naive2_hat_df = pd.DataFrame([False], index = ['Naive2 None']) l.append(y_naive2_hat_df) self.assertEqual(MP.check_input_df_tickers(l), True) d = {'bug': [75.0, 72, 69, 70, 76, 36, 74, 78, 72, 62]} test = pd.DataFrame(data=d, \ index=pd.to_datetime(['2020-01-01','2020-01-02','2020-01-03','2020-01-04','2020-01-05', \ '2020-01-06', '2020-01-07', '2020-01-10', '2020-01-11', '2020-01-12'])) l.append(test) self.assertEqual(MP.check_input_df_tickers(l), False)
def test_U1(self): numerator = np.sqrt(((self.actual-self.predict)**2).sum()/len(self.actual)) denominator = np.sqrt((self.actual**2).sum()/len(self.actual))+np.sqrt((self.predict**2).sum()/len(self.predict)) U1 = (numerator/denominator).item() ans = MP.Theil_U1(self.actual, self.predict) self.assertAlmostEqual(ans, U1)
def test_OWA(self): d = {'test': [64, 66.0, 62, 69, 70, 73, 71, 74, 71, 72, 72]} in_sample = pd.DataFrame(data=d, \ index=pd.to_datetime(['2019-12-15','2019-12-16','2019-12-17','2019-12-18','2019-12-19', '2019-12-20', \ '2019-12-21', '2019-12-22', '2019-12-23', '2019-12-24', '2019-12-25'])) d_naive = {'test': [71, 70.5, 66, 71, 74.5, 72, 76, 71, 65, 64]} naive = pd.DataFrame(data=d_naive, \ index=pd.to_datetime(['2020-01-01','2020-01-02','2020-01-03','2020-01-04','2020-01-05', \ '2020-01-06', '2020-01-07', '2020-01-10', '2020-01-11', '2020-01-12'])) mase_1 = MP.MASE(self.actual, self.predict, in_sample, 2) mase_2 = MP.MASE(self.actual, naive, in_sample, 2) smape_1 = MP.sMAPE(self.actual, self.predict) smape_2 = MP.sMAPE(self.actual, naive) owa = ((mase_1/mase_2) + (smape_1/smape_2)) / 2 ans = MP.OWA(self.actual, self.predict, in_sample, naive, 2) self.assertAlmostEqual(ans, owa)
def test_U2(self): actual_shift = self.actual.shift(1) error = self.actual-self.predict error_yt = error/actual_shift numerator = np.sqrt((error_yt**2).sum()) diff = self.actual.diff(1)/actual_shift denominator = np.sqrt((diff**2).sum()) U2 = (numerator/denominator).item() ans = MP.Theil_U2(self.actual, self.predict) self.assertAlmostEqual(ans, U2)
def test_MASE(self): d = {'test': [64, 66.0, 62, 69, 70, 73, 71, 74, 71, 72, 72]} in_sample = pd.DataFrame(data=d, \ index=pd.to_datetime(['2019-12-15','2019-12-16','2019-12-17','2019-12-18','2019-12-19', '2019-12-20', \ '2019-12-21', '2019-12-22', '2019-12-23', '2019-12-24', '2019-12-25'])) numerator = (abs(self.actual - self.predict).sum() / len(self.actual)).item() denominator = (abs(in_sample.diff(periods=4).dropna()).sum() / (len(in_sample) - 4)).item() mase = numerator / denominator ans = MP.MASE(self.actual, self.predict, in_sample, 4) self.assertAlmostEqual(ans, mase)
def validation_rolling(input_dc: DataCollection, num_split: int, numTest: int, max_epochs=15, batch_size=1, batch_size_test=128, freq_of_test=-1, learning_rate=1e-3, lr_scheduler_step_size=9, lr_decay=0.9, per_series_lr_multip=1.0, gradient_eps=1e-8, gradient_clipping_threshold=20, rnn_weight_decay=0, noise_std=0.001, level_variability_penalty=80, testing_percentile=50, training_percentile=50, ensemble=False, cell_type='LSTM', state_hsize=40, dilations=[[1, 2], [4, 8]], add_nl_layer=False, seasonality=[4], input_size=4, output_size=8, frequency=None, max_periods=20, random_seed=1): import time scores_list = [] train_val_dic = {} device = 'cuda' if torch.cuda.is_available() else 'cpu' for i in range(num_split): train, validation = input_dc.split(numTest=numTest) train_val_dic[i] = [train, validation] input_dc = train # record score of error total_score = 0 elapse = 0 for i in range(num_split - 1, -1, -1): train_dc = train_val_dic[i][0] validation_dc = train_val_dic[i][1] validation_df = validation_dc.to_df() start_time = time.time() m = ModelESRNN(max_epochs=max_epochs, batch_size=batch_size, batch_size_test=batch_size_test, freq_of_test=freq_of_test, learning_rate=learning_rate, lr_scheduler_step_size=lr_scheduler_step_size, lr_decay=lr_decay, per_series_lr_multip=per_series_lr_multip, gradient_eps=gradient_eps, gradient_clipping_threshold=gradient_clipping_threshold, rnn_weight_decay=rnn_weight_decay, noise_std=noise_std, level_variability_penalty=level_variability_penalty, testing_percentile=testing_percentile, training_percentile=training_percentile, ensemble=ensemble, cell_type=cell_type, state_hsize=state_hsize, dilations=dilations, add_nl_layer=add_nl_layer, seasonality=seasonality, input_size=input_size, output_size=output_size, frequency=frequency, max_periods=max_periods, random_seed=random_seed, device=device) m.train(train_dc) y_predict = m.predict(validation_dc) y_predict_df = y_predict.to_df() score = MP.MAPE(validation_df, y_predict_df) elapse += time.time() - start_time scores_list.append(score) total_score += score score = total_score / num_split return score, scores_list, elapse / num_split, (max_epochs, batch_size, input_size, output_size)
def validation_simple( input_dc: DataCollection, numTest: int, max_epochs=15, batch_size=1, batch_size_test=128, freq_of_test=-1, learning_rate=1e-3, lr_scheduler_step_size=9, lr_decay=0.9, per_series_lr_multip=1.0, gradient_eps=1e-8, gradient_clipping_threshold=20, rnn_weight_decay=0, noise_std=0.001, level_variability_penalty=80, testing_percentile=50, training_percentile=50, ensemble=False, cell_type='LSTM', state_hsize=40, dilations=[[1, 2], [4, 8]], add_nl_layer=False, seasonality=[4], input_size=4, output_size=8, frequency=None, max_periods=20, random_seed=1, ): train_dc, validation_dc = input_dc.split(numTest=numTest) validation_df = validation_dc.to_df() device = 'cuda' if torch.cuda.is_available() else 'cpu' m = ModelESRNN(max_epochs=max_epochs, batch_size=batch_size, batch_size_test=batch_size_test, freq_of_test=freq_of_test, learning_rate=learning_rate, lr_scheduler_step_size=lr_scheduler_step_size, lr_decay=lr_decay, per_series_lr_multip=per_series_lr_multip, gradient_eps=gradient_eps, gradient_clipping_threshold=gradient_clipping_threshold, rnn_weight_decay=rnn_weight_decay, noise_std=noise_std, level_variability_penalty=level_variability_penalty, testing_percentile=testing_percentile, training_percentile=training_percentile, ensemble=ensemble, cell_type=cell_type, state_hsize=state_hsize, dilations=dilations, add_nl_layer=add_nl_layer, seasonality=seasonality, input_size=input_size, output_size=output_size, frequency=frequency, max_periods=max_periods, random_seed=random_seed, device=device) m.train(train_dc) y_predict = m.predict(validation_dc) y_predict_df = y_predict.to_df() score = MP.MAPE(validation_df, y_predict_df) return score, (max_epochs, batch_size, input_size, output_size)