def train_initial_model(train_func, df, timesteps, model_name, save_path): print("============Start Training Initial Model============") train = data_split(df, start=20090000, end=20151001) env_train = DummyVecEnv([lambda: StockEnvTrain(train)]) return train_func(env_train, model_name, timesteps=timesteps, save_path=save_path)
def run_ensemble_strategy(df, unique_trade_date, rebalance_window, validation_window) -> None: """Ensemble Strategy that combines PPO, A2C and DDPG""" print("============Start Ensemble Strategy============") # for ensemble model, it's necessary to feed the last state # of the previous model to the current model as the initial state last_state_ensemble = [] ppo_sharpe_list = [] ddpg_sharpe_list = [] a2c_sharpe_list = [] model_use = [] # based on the analysis of the in-sample data #turbulence_threshold = 140 insample_turbulence = df[(df.datadate<20151000) & (df.datadate>=20090000)] insample_turbulence = insample_turbulence.drop_duplicates(subset=['datadate']) insample_turbulence_threshold = np.quantile(insample_turbulence.turbulence.values, .90) start = time.time() for i in range(rebalance_window + validation_window, len(unique_trade_date), rebalance_window): print("============================================") ## initial state is empty if i - rebalance_window - validation_window == 0: # inital state initial = True else: # previous state initial = False # Tuning trubulence index based on historical data # Turbulence lookback window is one quarter historical_turbulence = df[(df.datadate<unique_trade_date[i - rebalance_window - validation_window]) & (df.datadate>=(unique_trade_date[i - rebalance_window - validation_window-63]))] historical_turbulence = historical_turbulence.drop_duplicates(subset=['datadate']) historical_turbulence_mean = np.mean(historical_turbulence.turbulence.values) if historical_turbulence_mean > insample_turbulence_threshold: # if the mean of the historical data is greater than the 90% quantile of insample turbulence data # then we assume that the current market is volatile, # therefore we set the 90% quantile of insample turbulence data as the turbulence threshold # meaning the current turbulence can't exceed the 90% quantile of insample turbulence data turbulence_threshold = insample_turbulence_threshold else: # if the mean of the historical data is less than the 90% quantile of insample turbulence data # then we tune up the turbulence_threshold, meaning we lower the risk turbulence_threshold = np.quantile(insample_turbulence.turbulence.values, 1) print("turbulence_threshold: ", turbulence_threshold) ############## Environment Setup starts ############## ## training env train = data_split(df, start=20090000, end=unique_trade_date[i - rebalance_window - validation_window]) env_train = DummyVecEnv([lambda: StockEnvTrain(train)]) ## validation env validation = data_split(df, start=unique_trade_date[i - rebalance_window - validation_window], end=unique_trade_date[i - rebalance_window]) env_val = DummyVecEnv([lambda: StockEnvValidation(validation, turbulence_threshold=turbulence_threshold, iteration=i)]) obs_val = env_val.reset() ############## Environment Setup ends ############## ############## Training and Validation starts ############## print("======Model training from: ", 20090000, "to ", unique_trade_date[i - rebalance_window - validation_window]) # print("training: ",len(data_split(df, start=20090000, end=test.datadate.unique()[i-rebalance_window]) )) # print("==============Model Training===========") print("======A2C Training========") model_a2c = train_A2C(env_train, model_name="A2C_30k_dow_{}".format(i), timesteps=30000) print("======A2C Validation from: ", unique_trade_date[i - rebalance_window - validation_window], "to ", unique_trade_date[i - rebalance_window]) DRL_validation(model=model_a2c, test_data=validation, test_env=env_val, test_obs=obs_val) sharpe_a2c = get_validation_sharpe(i) print("A2C Sharpe Ratio: ", sharpe_a2c) print("======PPO Training========") model_ppo = train_PPO(env_train, model_name="PPO_100k_dow_{}".format(i), timesteps=100000) print("======PPO Validation from: ", unique_trade_date[i - rebalance_window - validation_window], "to ", unique_trade_date[i - rebalance_window]) DRL_validation(model=model_ppo, test_data=validation, test_env=env_val, test_obs=obs_val) sharpe_ppo = get_validation_sharpe(i) print("PPO Sharpe Ratio: ", sharpe_ppo) print("======DDPG Training========") model_ddpg = train_DDPG(env_train, model_name="DDPG_10k_dow_{}".format(i), timesteps=10000) #model_ddpg = train_TD3(env_train, model_name="DDPG_10k_dow_{}".format(i), timesteps=20000) print("======DDPG Validation from: ", unique_trade_date[i - rebalance_window - validation_window], "to ", unique_trade_date[i - rebalance_window]) DRL_validation(model=model_ddpg, test_data=validation, test_env=env_val, test_obs=obs_val) sharpe_ddpg = get_validation_sharpe(i) ppo_sharpe_list.append(sharpe_ppo) a2c_sharpe_list.append(sharpe_a2c) ddpg_sharpe_list.append(sharpe_ddpg) # Model Selection based on sharpe ratio if (sharpe_ppo >= sharpe_a2c) & (sharpe_ppo >= sharpe_ddpg): model_ensemble = model_ppo model_use.append('PPO') elif (sharpe_a2c > sharpe_ppo) & (sharpe_a2c > sharpe_ddpg): model_ensemble = model_a2c model_use.append('A2C') else: model_ensemble = model_ddpg model_use.append('DDPG') ############## Training and Validation ends ############## ############## Trading starts ############## print("======Trading from: ", unique_trade_date[i - rebalance_window], "to ", unique_trade_date[i]) #print("Used Model: ", model_ensemble) last_state_ensemble = DRL_prediction(df=df, model=model_ensemble, name="ensemble", last_state=last_state_ensemble, iter_num=i, unique_trade_date=unique_trade_date, rebalance_window=rebalance_window, turbulence_threshold=turbulence_threshold, initial=initial) # print("============Trading Done============") ############## Trading ends ############## end = time.time() print("Ensemble Strategy took: ", (end - start) / 60, " minutes")
def run_ensemble_strategy(df, unique_trade_date, rebalance_window, validation_window) -> None: """Ensemble Strategy that combines PPO, A2C and DDPG""" print("============Start Ensemble Strategy============") # for ensemble model, it's necessary to feed the last state # of the previous model to the current model as the initial state last_state_ensemble = [] ppo_sharpe_list = [] ddpg_sharpe_list = [] a2c_sharpe_list = [] model_use = [] # based on the analysis of the in-sample data turbulence_threshold = 140 start = time.time() for i in range(rebalance_window + validation_window, len(unique_trade_date), rebalance_window): print("============================================") ## initial state is empty if i - rebalance_window - validation_window == 0: # inital state initial = True else: # previous state initial = False # Tuning trubulence index based on historical data # TODO: need a more dynamic model # 2018-2019 if (i >= 692) & (i < 1090): turbulence_threshold = 100 # 2020 if i >= 1090: turbulence_threshold = 90 ############## Environment Setup starts ############## ## training env train = data_split(df, start=20090000, end=unique_trade_date[i - rebalance_window - validation_window]) env_train = DummyVecEnv([lambda: StockEnvTrain(train)]) ## validation env validation = data_split(df, start=unique_trade_date[i - rebalance_window - validation_window], end=unique_trade_date[i - rebalance_window]) env_val = DummyVecEnv([ lambda: StockEnvValidation(validation, turbulence_threshold= turbulence_threshold, iteration=i) ]) obs_val = env_val.reset() ############## Environment Setup ends ############## ############## Training and Validation starts ############## print("======Model training from: ", 20090000, "to ", unique_trade_date[i - rebalance_window - validation_window]) # print("training: ",len(data_split(df, start=20090000, end=test.datadate.unique()[i-rebalance_window]) )) # print("==============Model Training===========") print("======A2C Training========") model_a2c = train_A2C(env_train, model_name="A2C_10k_dow_{}".format(i), timesteps=20000) print("======A2C Validation from: ", unique_trade_date[i - rebalance_window - validation_window], "to ", unique_trade_date[i - rebalance_window]) DRL_validation(model=model_a2c, test_data=validation, test_env=env_val, test_obs=obs_val) sharpe_a2c = get_validation_sharpe(i) print("A2C Sharpe Ratio: ", sharpe_a2c) print("======PPO Training========") model_ppo = train_PPO(env_train, model_name="PPO_100k_dow_{}".format(i), timesteps=50000) print("======PPO Validation from: ", unique_trade_date[i - rebalance_window - validation_window], "to ", unique_trade_date[i - rebalance_window]) DRL_validation(model=model_ppo, test_data=validation, test_env=env_val, test_obs=obs_val) sharpe_ppo = get_validation_sharpe(i) print("PPO Sharpe Ratio: ", sharpe_ppo) print("======DDPG Training========") model_ddpg = train_DDPG(env_train, model_name="DDPG_10k_dow_{}".format(i), timesteps=10000) print("======DDPG Validation from: ", unique_trade_date[i - rebalance_window - validation_window], "to ", unique_trade_date[i - rebalance_window]) DRL_validation(model=model_ddpg, test_data=validation, test_env=env_val, test_obs=obs_val) sharpe_ddpg = get_validation_sharpe(i) ppo_sharpe_list.append(sharpe_ppo) a2c_sharpe_list.append(sharpe_a2c) ddpg_sharpe_list.append(sharpe_ddpg) # Model Selection based on sharpe ratio if (sharpe_ppo >= sharpe_a2c) & (sharpe_ppo >= sharpe_ddpg): model_ensemble = model_ppo model_use.append('PPO') elif (sharpe_a2c > sharpe_ppo) & (sharpe_a2c > sharpe_ddpg): model_ensemble = model_a2c model_use.append('A2C') else: model_ensemble = model_ddpg model_use.append('DDPG') ############## Training and Validation ends ############## ############## Trading starts ############## print("======Trading from: ", unique_trade_date[i - rebalance_window], "to ", unique_trade_date[i]) print("Used Model: ", model_ensemble) last_state_ensemble = DRL_prediction( df=df, model=model_ensemble, name="ensemble", last_state=last_state_ensemble, iter_num=i, unique_trade_date=unique_trade_date, rebalance_window=rebalance_window, turbulence_threshold=turbulence_threshold, initial=initial) # print("============Trading Done============") ############## Trading ends ############## end = time.time() print("Ensemble Strategy took: ", (end - start) / 60, " minutes")
def run_ensemble_strategy( df, unique_trade_date, rebalance_window, validation_window ) -> None: """Ensemble Strategy that combines PPO, A2C and DDPG""" # for ensemble model, it's necessary to feed the last state # of the previous model to the current model as the initial state last_state_ensemble = [] ppo_sharpe_list = [] ddpg_sharpe_list = [] a2c_sharpe_list = [] model_used = [] # based on the analysis of the in-sample data # turbulence_threshold = 140 insample_turbulence = df[ (df.datadate < config.VALIDATION_START_DATE - 1) & (df.datadate >= config.TRAINING_START_DATE) ] insample_turbulence = insample_turbulence.drop_duplicates(subset=["datadate"]) insample_turbulence_threshold = np.quantile( insample_turbulence.turbulence.values, 0.90 ) start = time.time() for i in range( rebalance_window + validation_window, len(unique_trade_date), rebalance_window ): ## initial state is empty if i - rebalance_window - validation_window == 0: # inital state initial = True else: # previous state initial = False # Tuning turbulence index based on historical data # Turbulence lookback window is one quarter end_date_index = df.index[ df["datadate"] == unique_trade_date[i - rebalance_window - validation_window] ].to_list()[-1] start_date_index = end_date_index - validation_window * 30 + 1 historical_turbulence = df.iloc[start_date_index : (end_date_index + 1), :] # historical_turbulence = df[(df.datadate<unique_trade_date[i - rebalance_window - validation_window]) & (df.datadate>=(unique_trade_date[i - rebalance_window - validation_window - 63]))] historical_turbulence = historical_turbulence.drop_duplicates( subset=["datadate"] ) historical_turbulence_mean = np.mean(historical_turbulence.turbulence.values) if historical_turbulence_mean > insample_turbulence_threshold: # if the mean of the historical data is greater than the 90% quantile of insample turbulence data # then we assume that the current market is volatile, # therefore we set the 90% quantile of insample turbulence data as the turbulence threshold # meaning the current turbulence can't exceed the 90% quantile of insample turbulence data turbulence_threshold = insample_turbulence_threshold else: # if the mean of the historical data is less than the 90% quantile of insample turbulence data # then we tune up the turbulence_threshold, meaning we lower the risk turbulence_threshold = np.quantile(insample_turbulence.turbulence.values, 1) style = "[bold #31DDCF]" rprint( Align( f"{style}Turbulence Threshold:[/] {str(turbulence_threshold)}", "center" ) ) ############## Environment Setup starts ############## ## training env train = data_split( df, start=config.TRAINING_START_DATE, end=unique_trade_date[i - rebalance_window - validation_window], ) env_train = DummyVecEnv([lambda: StockEnvTrain(train)]) ## validation env validation = data_split( df, start=unique_trade_date[i - rebalance_window - validation_window], end=unique_trade_date[i - rebalance_window], ) env_val = DummyVecEnv( [ lambda: StockEnvValidation( validation, turbulence_threshold=turbulence_threshold, iteration=i ) ] ) obs_val = env_val.reset() ############## Environment Setup ends ############## ############## Training and Validation starts ############## table = Table( title=f"Training from 20090000 to {unique_trade_date[i - rebalance_window - validation_window]}", expand=True, ) table.add_column("Mode Name", justify="center") table.add_column("Sharpe Ratio") table.add_column("Training Time") with Live(table, auto_refresh=False) as live: model_a2c, a2c_training_time = train_A2C( env_train, model_name="A2C_30k_dow_{}".format(i), timesteps=30000 ) DRL_validation( model=model_a2c, test_data=validation, test_env=env_val, test_obs=obs_val, ) sharpe_a2c = get_validation_sharpe(i) table.add_row("A2C", str(sharpe_a2c), f"{a2c_training_time} minutes") live.update(table, refresh=True) model_ppo, ppo_training_time = train_PPO( env_train, model_name="PPO_100k_dow_{}".format(i), timesteps=100000 ) DRL_validation( model=model_ppo, test_data=validation, test_env=env_val, test_obs=obs_val, ) sharpe_ppo = get_validation_sharpe(i) table.add_row("PPO", str(sharpe_ppo), f"{ppo_training_time} minutes") live.update(table, refresh=True) model_ddpg, ddpg_training_time = train_DDPG( env_train, model_name="DDPG_10k_dow_{}".format(i), timesteps=10000 ) # model_ddpg = train_TD3(env_train, model_name="DDPG_10k_dow_{}".format(i), timesteps=20000) DRL_validation( model=model_ddpg, test_data=validation, test_env=env_val, test_obs=obs_val, ) sharpe_ddpg = get_validation_sharpe(i) table.add_row("DDPG", str(sharpe_ddpg), f"{ddpg_training_time} minutes") live.update(table, refresh=True) ppo_sharpe_list.append(sharpe_ppo) a2c_sharpe_list.append(sharpe_a2c) ddpg_sharpe_list.append(sharpe_ddpg) # Model Selection based on sharpe ratio if (sharpe_ppo >= sharpe_a2c) & (sharpe_ppo >= sharpe_ddpg): model_ensemble = model_ppo model_used.append("PPO") elif (sharpe_a2c > sharpe_ppo) & (sharpe_a2c > sharpe_ddpg): model_ensemble = model_a2c model_used.append("A2C") else: model_ensemble = model_ddpg model_used.append("DDPG") ############## Training and Validation ends ############## ############## Trading starts ############## # print("Used Model: ", model_ensemble) last_state_ensemble = DRL_prediction( df=df, model=model_ensemble, name="ensemble", last_state=last_state_ensemble, iter_num=i, unique_trade_date=unique_trade_date, rebalance_window=rebalance_window, turbulence_threshold=turbulence_threshold, initial=initial, ) print("\n\n") # print("============Trading Done============") ############## Trading ends ############## end = time.time() print("Ensemble Strategy took: ", (end - start) / 60, " minutes")