def run_ESRNN(): import torch device = 'cuda' if torch.cuda.is_available() else 'cpu' path_daily = r'C:\Users\xxxli\Desktop\Daily' dic_daily = preprocess.read_file(path_daily) series_list = [] for k, v in dic_daily.items(): ticker_name = k df, cat = v df = preprocess.single_price(df, ticker_name) # column = [ticker] series_list.append(DataSeries(cat, 'daily', df)) collect = DataCollection('universe daily', series_list) train_dc, test_dc = collect.split(numTest = 24) m = ModelESRNN( max_epochs = 15, batch_size = 32, dilations=[[1,3], [7, 14]], input_size = 12, output_size = 24, device = device) m.train(train_dc) y_test = m.predict(test_dc) y_test_df = y_test.to_df() y_test_df.to_csv('hyper_ESRNN_1.csv')
def dc_generator(path: str, frequency: str): dic, recover_list, ticker_list = DataPreprocessing.read_file(path) series_list = [] for k, v in dic.items(): df, cat = v df = DataPreprocessing.single_price(df, k) series_list.append(DataSeries(cat, frequency, df)) collect = DataCollection(frequency + ' Collection', series_list) return collect, recover_list, ticker_list
def test_MP_class(self): import torch device = 'cuda' if torch.cuda.is_available() else 'cpu' path_monthly = os.path.join('test','Data','Monthly') dic_monthly = DP.read_file(path_monthly) n_assets = 1 time_series_group = [] for i in range(n_assets): df = dic_monthly[list(dic_monthly.keys())[i]] ds = DataSeries('ETF', 'monthly', df[0]) time_series_group.append(ds) input_dc = DataCollection('test1', time_series_group) m = ModelESRNN(seasonality = [12], input_size = 4, output_size = 12, device=device) train_dc, test_dc = input_dc.split(numTest = 12) m.train(train_dc) forecast_dc = m.predict(test_dc) # train_dc.to_df().to_csv('insample.csv') test_dc.to_df().to_csv('test.csv') # forecast_dc.to_df().to_csv('forecast.csv') mn = MN.ModelNaive2(2, train_dc) naive2_dc = mn.fit_and_generate_prediction(12, 'MS') naive2_dc.to_df().to_csv('naive.csv') mp = MP.ModelPerformance("test model performance", 2, test_dc, forecast_dc, train_dc, naive2_dc) mase = MP.MASE(test_dc.to_df(), forecast_dc.to_df(), train_dc.to_df(), 2) smape = MP.sMAPE(test_dc.to_df(), forecast_dc.to_df()) mape = MP.MAPE(mp.y_df, mp.y_hat_df) r2 = MP.R2(test_dc.to_df(), forecast_dc.to_df()) rmse = MP.RMSE(test_dc.to_df(), forecast_dc.to_df()) owa = MP.OWA(test_dc.to_df(), forecast_dc.to_df(), train_dc.to_df(), naive2_dc.to_df(), 2) u1 = MP.Theil_U1(test_dc.to_df(), forecast_dc.to_df()) u2 = MP.Theil_U2(test_dc.to_df(), forecast_dc.to_df()) mp.MASE() mp.sMAPE() mp.MAPE() mp.R2() mp.RMSE() mp.OWA() mp.Theil_U1() mp.Theil_U2() self.assertAlmostEqual(mp.metrics['sMAPE'], smape) self.assertAlmostEqual(mp.metrics['MAPE'], mape) self.assertAlmostEqual(mp.metrics['R2'], r2) self.assertAlmostEqual(mp.metrics['RMSE'], rmse) self.assertAlmostEqual(mp.metrics['MASE'], mase) self.assertAlmostEqual(mp.metrics['OWA'], owa) self.assertAlmostEqual(mp.metrics['Theil_U1'], u1) self.assertAlmostEqual(mp.metrics['Theil_U2'], u2)
def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) # An example of how to use Telescope model path_monthly = os.path.join('test', 'Data', 'Monthly') dic_monthly = preprocess.read_file(path_monthly) series_list = [] for k, v in dic_monthly.items(): df, cat = v df = preprocess.single_price(df, k) series_list.append(DataSeries(cat, 'monthly', df)) self.collect = DataCollection('test1', series_list)
def test_read_file(self): path_daily = os.path.join('test', 'Data', 'Daily') #path_daily = r'test\Data\Daily' dic_daily = DataPreprocessing.read_file(path_daily) # not whole dataset, only have 3 files including # AGG.csv AA.csv SP MidCap 400.xls self.assertTrue(type(dic_daily) == dict) assert (isinstance(dic_daily['AGG'][0], pd.DataFrame)) self.assertTrue(dic_daily['AGG'][1] == 'ETF') self.assertTrue(dic_daily['AA'][1] == 'Stock') self.assertEqual(len(dic_daily), 3) path_monthly = os.path.join('test', 'Data', 'Monthly') dic_monthly = DataPreprocessing.read_file(path_monthly) self.assertTrue(type(dic_monthly) == dict) assert (isinstance(dic_monthly['AGG'][0], pd.DataFrame)) self.assertTrue(dic_monthly['AGG'][1] == 'ETF') self.assertTrue(dic_monthly['AA'][1] == 'Stock') self.assertNotEqual(len(dic_daily), 0)
def test_Naive2(self): path_monthly = os.path.join('test', 'Data', 'Monthly') dic_monthly = preprocess.read_file(path_monthly) series_list = [] for k, v in dic_monthly.items(): df, cat = v df = preprocess.single_price(df, k) series_list.append(DataSeries(cat, 'monthly', df)) collect = DataCollection('test1', series_list) train_dc, test_dc = collect.split(numTest=12) m = ModelNaive2(12, train_dc, test_dc) y_hat_Naive2_dc = m.fit_and_generate_prediction(12, freq='MS') y_hat_Naive2_dc.to_df().to_csv('test_Naive2_result.csv')
def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) # fake data by ZZ Daily self.a_series = DataSeries( 'ETF', 'daily', pd.DataFrame([10.0, 15.0, 20.0, 30.0], columns=['ABC'], index=pd.to_datetime([ '2020-01-01', '2020-01-02', '2020-01-03', '2020-01-04' ]))) self.b_series = DataSeries( 'Bond', 'daily', pd.DataFrame([1.0, 3.5, 4.5], columns=['KKK'], index=pd.to_datetime([ '2020-01-01', '2020-01-02', '2020-01-03', ]))) self.collect = DataCollection('trial', [self.a_series, self.b_series]) d = {'Initial weights': [0.6, 0.4]} self.weights = pd.DataFrame(data=d).T self.weights = self.weights.rename(columns={0: 'ABC', 1: 'KKK'}) self.p = port.EqualPort("test equal port") self.p.calculate_initial_weight(self.collect) # Monthly path_monthly = os.path.join('test', 'Data', 'Monthly') dic_monthly = DataPreprocessing.read_file(path_monthly) n_assets = 4 time_series_group = [] for i in range(n_assets): df = dic_monthly[list(dic_monthly.keys())[i]] ds = DataSeries(df[1], 'monthly', df[0]) time_series_group.append(ds) input_dc_test = DataCollection(label='Test Collection', time_series_group=time_series_group) self.input_dc = input_dc_test self.input_freq = input_dc_test.get_freq() self.input_df = self.input_dc.to_df() self.n_asset = len(self.input_df.columns) input_weights = [[1 / self.n_asset] * self.n_asset] input_weights_df = pd.DataFrame(input_weights, columns=self.input_df.columns, index=['Initial weights']) self.input_weights_df = input_weights_df
def test_ESRNN(self): # An example of how to use ESRNN import torch device = 'cuda' if torch.cuda.is_available() else 'cpu' path_daily = os.path.join('test','Data','daily') dic_daily = preprocess.read_file(path_daily) series_list = [] for k, v in dic_daily.items(): df, cat = v df = preprocess.single_price(df, k) series_list.append(DataSeries(cat, 'daily', df)) collect = DataCollection('test1', series_list) m = ModelESRNN(max_epochs = 5, seasonality = [], batch_size = 64, input_size = 12, output_size = 12, device = device) train_dc, test_dc = collect.split(numTest = 12) m.train(train_dc) y_test = m.predict(test_dc) assert(isinstance(y_test, DataCollection)) y_test_df = y_test.to_df() y_test_df.to_csv('predict_result.csv')
def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) path_monthly = os.path.join('test', 'Data', 'Monthly') dic_monthly = DataPreprocessing.read_file(path_monthly) n_assets = 4 time_series_group = [] for i in range(n_assets): df = dic_monthly[list(dic_monthly.keys())[i]] ds = DataSeries('ETF', 'monthly', df[0]) time_series_group.append(ds) input_dc_test = DataCollection(label='Test Collection', time_series_group=time_series_group) self.input_dc = input_dc_test self.input_freq = input_dc_test.get_freq() self.input_df = self.input_dc.to_df().dropna() self.a = pd.DataFrame([10, 12, 32, 9, 11, 9], columns=['fakeSPY'], index=pd.to_datetime([ '2020-01-01', '2020-02-01', '2020-03-01', '2020-04-01', '2020-05-01', '2020-06-01' ])) self.a_series = DataSeries('ETF', self.input_freq, self.a) self.b = pd.DataFrame([1, 1.2, 3.2, 0.9], columns=['fakeTreasury'], index=pd.to_datetime([ '2019-12-01', '2020-02-01', '2020-03-01', '2020-04-01' ])) self.b_series = DataSeries('Bond', self.input_freq, self.b) self.c_collection = DataCollection('trial', [self.a_series, self.b_series]) self.c_df = self.c_collection.to_df().interpolate(method='linear', axis=0)
numTest = output_size = 30 input_size = 30 max_epochs = 15 batch_size = 64 learning_rate = 1e-2 lr_scheduler_step_size = 9 lr_decay = 0.9 noise_std = 0.001 level_variability_penalty = 80 state_hsize = 40 dilation = [[1]] add_nl_layer = False seasonality = [5] # action path = os.path.join('test', 'Data', 'Daily') dic = preprocess.read_file(path) series_list = [] for k, v in dic.items(): df, cat = v df = preprocess.single_price(df, k) series_list.append(DataSeries(cat, 'daily', df)) collect = DataCollection('RollingValidation', series_list) input_dc, _ = collect.split(numTest=2 * numTest) score, _ = validation_simple( input_dc, numTest=numTest, max_epochs=max_epochs, batch_size=batch_size, learning_rate=learning_rate, lr_scheduler_step_size=lr_scheduler_step_size,