def test_MP_class(self):
        import torch
        device = 'cuda' if torch.cuda.is_available() else 'cpu'
     
        path_monthly = os.path.join('test','Data','Monthly') 
        dic_monthly = DP.read_file(path_monthly)

        n_assets = 1
        time_series_group = []
        for i in range(n_assets):
            df = dic_monthly[list(dic_monthly.keys())[i]]
            ds = DataSeries('ETF', 'monthly', df[0])
            time_series_group.append(ds)

        input_dc = DataCollection('test1', time_series_group)
        m = ModelESRNN(seasonality = [12], input_size = 4, output_size = 12, device=device)
        train_dc, test_dc = input_dc.split(numTest = 12)

        m.train(train_dc)

        forecast_dc = m.predict(test_dc) 

        # train_dc.to_df().to_csv('insample.csv')
        test_dc.to_df().to_csv('test.csv')
        # forecast_dc.to_df().to_csv('forecast.csv')
        mn = MN.ModelNaive2(2, train_dc)
        naive2_dc = mn.fit_and_generate_prediction(12, 'MS')
        naive2_dc.to_df().to_csv('naive.csv')

        mp = MP.ModelPerformance("test model performance", 2, test_dc, forecast_dc, train_dc, naive2_dc)
        
        mase = MP.MASE(test_dc.to_df(), forecast_dc.to_df(), train_dc.to_df(), 2)
        smape = MP.sMAPE(test_dc.to_df(), forecast_dc.to_df())
        mape = MP.MAPE(mp.y_df, mp.y_hat_df)
        r2 = MP.R2(test_dc.to_df(), forecast_dc.to_df())
        rmse = MP.RMSE(test_dc.to_df(), forecast_dc.to_df())
        owa = MP.OWA(test_dc.to_df(), forecast_dc.to_df(), train_dc.to_df(), naive2_dc.to_df(), 2)
        u1 = MP.Theil_U1(test_dc.to_df(), forecast_dc.to_df())
        u2 = MP.Theil_U2(test_dc.to_df(), forecast_dc.to_df())

        mp.MASE()
        mp.sMAPE()
        mp.MAPE()
        mp.R2()
        mp.RMSE()
        mp.OWA()
        mp.Theil_U1()
        mp.Theil_U2()

        self.assertAlmostEqual(mp.metrics['sMAPE'], smape)
        self.assertAlmostEqual(mp.metrics['MAPE'], mape)
        self.assertAlmostEqual(mp.metrics['R2'], r2)
        self.assertAlmostEqual(mp.metrics['RMSE'], rmse)
        self.assertAlmostEqual(mp.metrics['MASE'], mase)
        self.assertAlmostEqual(mp.metrics['OWA'], owa)
        self.assertAlmostEqual(mp.metrics['Theil_U1'], u1)
        self.assertAlmostEqual(mp.metrics['Theil_U2'], u2)
    def test_RMSE(self):
        numerator = ((self.actual - self.predict)**2).sum()
        denominator = len(self.actual)
        RMSE = np.sqrt(numerator / denominator).item()

        ans = MP.RMSE(self.actual, self.predict)
        self.assertAlmostEqual(ans, RMSE)
 def test_R2(self):
     SSR = ((self.actual - self.predict)**2).sum().item()
     SST = ((self.actual - self.actual.mean())**2).sum().item()
     R2 = 1 - SSR/SST
     
     ans = MP.R2(self.actual, self.predict)
     self.assertAlmostEqual(ans, R2)
    def test_sMAPE(self):
        diff = abs(self.actual - self.predict)
        abs_sum = abs(self.actual) + abs(self.predict)
        smape = (diff / abs_sum).mean().item()

        ans = MP.sMAPE(self.actual, self.predict)
        self.assertAlmostEqual(ans, smape)
    def test_MAPE(self):
        pct_chg = abs((self.actual - self.predict) / self.actual)
        mape = pct_chg.mean().item()

        ans = MP.MAPE(self.actual, self.predict)
        self.assertAlmostEqual(mape, 0.03431801341798592)
        self.assertAlmostEqual(ans, mape)
    def test_check_input_df_tickers(self):
        l = []
        l.append(self.actual)
        l.append(self.predict)
        self.assertEqual(MP.check_input_df_tickers(l), True)

        y_naive2_hat_df = pd.DataFrame([False], index = ['Naive2 None'])
        l.append(y_naive2_hat_df)
        self.assertEqual(MP.check_input_df_tickers(l), True)
        
        d = {'bug': [75.0, 72, 69, 70, 76, 36, 74, 78, 72, 62]}
        test = pd.DataFrame(data=d, \
            index=pd.to_datetime(['2020-01-01','2020-01-02','2020-01-03','2020-01-04','2020-01-05', \
                '2020-01-06', '2020-01-07', '2020-01-10', '2020-01-11', '2020-01-12']))
        l.append(test)
        self.assertEqual(MP.check_input_df_tickers(l), False)
    def test_U1(self):
        numerator = np.sqrt(((self.actual-self.predict)**2).sum()/len(self.actual))
        denominator = np.sqrt((self.actual**2).sum()/len(self.actual))+np.sqrt((self.predict**2).sum()/len(self.predict))
        U1 = (numerator/denominator).item()

        ans = MP.Theil_U1(self.actual, self.predict)
        self.assertAlmostEqual(ans, U1)
    def test_OWA(self):
        d = {'test': [64, 66.0, 62, 69, 70, 73, 71, 74, 71, 72, 72]}
        in_sample = pd.DataFrame(data=d, \
            index=pd.to_datetime(['2019-12-15','2019-12-16','2019-12-17','2019-12-18','2019-12-19', '2019-12-20', \
                '2019-12-21', '2019-12-22', '2019-12-23', '2019-12-24', '2019-12-25']))

        d_naive = {'test': [71, 70.5, 66, 71, 74.5, 72, 76, 71, 65, 64]}
        naive = pd.DataFrame(data=d_naive, \
            index=pd.to_datetime(['2020-01-01','2020-01-02','2020-01-03','2020-01-04','2020-01-05', \
                '2020-01-06', '2020-01-07', '2020-01-10', '2020-01-11', '2020-01-12']))

        mase_1 = MP.MASE(self.actual, self.predict, in_sample, 2)
        mase_2 = MP.MASE(self.actual, naive, in_sample, 2)
        smape_1 = MP.sMAPE(self.actual, self.predict)
        smape_2 = MP.sMAPE(self.actual, naive)
        owa = ((mase_1/mase_2) + (smape_1/smape_2)) / 2

        ans = MP.OWA(self.actual, self.predict, in_sample, naive, 2)
        self.assertAlmostEqual(ans, owa)
    def test_U2(self):
        actual_shift = self.actual.shift(1)
        error = self.actual-self.predict
        error_yt = error/actual_shift
        numerator = np.sqrt((error_yt**2).sum())
        diff = self.actual.diff(1)/actual_shift
        denominator = np.sqrt((diff**2).sum())
        U2 = (numerator/denominator).item()

        ans = MP.Theil_U2(self.actual, self.predict)
        self.assertAlmostEqual(ans, U2)
    def test_MASE(self):
        d = {'test': [64, 66.0, 62, 69, 70, 73, 71, 74, 71, 72, 72]}
        in_sample = pd.DataFrame(data=d, \
            index=pd.to_datetime(['2019-12-15','2019-12-16','2019-12-17','2019-12-18','2019-12-19', '2019-12-20', \
                '2019-12-21', '2019-12-22', '2019-12-23', '2019-12-24', '2019-12-25']))

        numerator = (abs(self.actual - self.predict).sum() / len(self.actual)).item()
        denominator = (abs(in_sample.diff(periods=4).dropna()).sum() / (len(in_sample) - 4)).item()
        mase = numerator / denominator

        ans = MP.MASE(self.actual, self.predict, in_sample, 4)
        self.assertAlmostEqual(ans, mase)
def validation_rolling(input_dc: DataCollection,
                       num_split: int,
                       numTest: int,
                       max_epochs=15,
                       batch_size=1,
                       batch_size_test=128,
                       freq_of_test=-1,
                       learning_rate=1e-3,
                       lr_scheduler_step_size=9,
                       lr_decay=0.9,
                       per_series_lr_multip=1.0,
                       gradient_eps=1e-8,
                       gradient_clipping_threshold=20,
                       rnn_weight_decay=0,
                       noise_std=0.001,
                       level_variability_penalty=80,
                       testing_percentile=50,
                       training_percentile=50,
                       ensemble=False,
                       cell_type='LSTM',
                       state_hsize=40,
                       dilations=[[1, 2], [4, 8]],
                       add_nl_layer=False,
                       seasonality=[4],
                       input_size=4,
                       output_size=8,
                       frequency=None,
                       max_periods=20,
                       random_seed=1):
    import time

    scores_list = []
    train_val_dic = {}
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    for i in range(num_split):
        train, validation = input_dc.split(numTest=numTest)
        train_val_dic[i] = [train, validation]
        input_dc = train

    # record score of error
    total_score = 0
    elapse = 0
    for i in range(num_split - 1, -1, -1):
        train_dc = train_val_dic[i][0]
        validation_dc = train_val_dic[i][1]

        validation_df = validation_dc.to_df()
        start_time = time.time()
        m = ModelESRNN(max_epochs=max_epochs,
                       batch_size=batch_size,
                       batch_size_test=batch_size_test,
                       freq_of_test=freq_of_test,
                       learning_rate=learning_rate,
                       lr_scheduler_step_size=lr_scheduler_step_size,
                       lr_decay=lr_decay,
                       per_series_lr_multip=per_series_lr_multip,
                       gradient_eps=gradient_eps,
                       gradient_clipping_threshold=gradient_clipping_threshold,
                       rnn_weight_decay=rnn_weight_decay,
                       noise_std=noise_std,
                       level_variability_penalty=level_variability_penalty,
                       testing_percentile=testing_percentile,
                       training_percentile=training_percentile,
                       ensemble=ensemble,
                       cell_type=cell_type,
                       state_hsize=state_hsize,
                       dilations=dilations,
                       add_nl_layer=add_nl_layer,
                       seasonality=seasonality,
                       input_size=input_size,
                       output_size=output_size,
                       frequency=frequency,
                       max_periods=max_periods,
                       random_seed=random_seed,
                       device=device)
        m.train(train_dc)
        y_predict = m.predict(validation_dc)
        y_predict_df = y_predict.to_df()

        score = MP.MAPE(validation_df, y_predict_df)
        elapse += time.time() - start_time
        scores_list.append(score)
        total_score += score

    score = total_score / num_split

    return score, scores_list, elapse / num_split, (max_epochs, batch_size,
                                                    input_size, output_size)
def validation_simple(
    input_dc: DataCollection,
    numTest: int,
    max_epochs=15,
    batch_size=1,
    batch_size_test=128,
    freq_of_test=-1,
    learning_rate=1e-3,
    lr_scheduler_step_size=9,
    lr_decay=0.9,
    per_series_lr_multip=1.0,
    gradient_eps=1e-8,
    gradient_clipping_threshold=20,
    rnn_weight_decay=0,
    noise_std=0.001,
    level_variability_penalty=80,
    testing_percentile=50,
    training_percentile=50,
    ensemble=False,
    cell_type='LSTM',
    state_hsize=40,
    dilations=[[1, 2], [4, 8]],
    add_nl_layer=False,
    seasonality=[4],
    input_size=4,
    output_size=8,
    frequency=None,
    max_periods=20,
    random_seed=1,
):
    train_dc, validation_dc = input_dc.split(numTest=numTest)

    validation_df = validation_dc.to_df()

    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    m = ModelESRNN(max_epochs=max_epochs,
                   batch_size=batch_size,
                   batch_size_test=batch_size_test,
                   freq_of_test=freq_of_test,
                   learning_rate=learning_rate,
                   lr_scheduler_step_size=lr_scheduler_step_size,
                   lr_decay=lr_decay,
                   per_series_lr_multip=per_series_lr_multip,
                   gradient_eps=gradient_eps,
                   gradient_clipping_threshold=gradient_clipping_threshold,
                   rnn_weight_decay=rnn_weight_decay,
                   noise_std=noise_std,
                   level_variability_penalty=level_variability_penalty,
                   testing_percentile=testing_percentile,
                   training_percentile=training_percentile,
                   ensemble=ensemble,
                   cell_type=cell_type,
                   state_hsize=state_hsize,
                   dilations=dilations,
                   add_nl_layer=add_nl_layer,
                   seasonality=seasonality,
                   input_size=input_size,
                   output_size=output_size,
                   frequency=frequency,
                   max_periods=max_periods,
                   random_seed=random_seed,
                   device=device)
    m.train(train_dc)
    y_predict = m.predict(validation_dc)
    y_predict_df = y_predict.to_df()

    score = MP.MAPE(validation_df, y_predict_df)

    return score, (max_epochs, batch_size, input_size, output_size)