def train_one(): env_kwargs, processed = prepare_data() # Training & Trading data split train = data_split(processed, config.START_DATE, config.START_TRADE_DATE) trade = data_split(processed, config.START_TRADE_DATE, config.END_DATE) e_train_gym = StockTradingEnv(df=train, **env_kwargs) e_trade_gym = StockTradingEnv(df=trade, turbulence_threshold=250, **env_kwargs) env_train, _ = e_train_gym.get_sb_env() agent = DRLAgent(env=env_train) print("==============Model Training===========") model = agent.get_model(config.CURRENT_MODEL) model = agent.train_model(model=model, total_timesteps=80000) path = f"{config.TRAINED_MODEL_DIR}/model" model.save(path) model = model.load(path) print("==============Start Trading===========") df_account_value, df_actions = DRLAgent.DRL_prediction( model=model, environment=e_trade_gym) log_account(df_account_value, df_actions)
def main(): start_date = '2020-01-01' trade_start_date = '2020-12-01' end_date = '2021-01-01' ticker_list = stock_tickers numerical_df = YahooDownloader(start_date=start_date, end_date=end_date, ticker_list=ticker_list).fetch_data() sentiment_df = generate_sentiment_scores(start_date, end_date) initial_data = get_initial_data(numerical_df, sentiment_df) train_data = data_split(initial_data, start_date, trade_start_date) trade_data = data_split(initial_data, trade_start_date, end_date) indicator_list = config.TECHNICAL_INDICATORS_LIST + ['sentiment'] stock_dimension = len(trade_data.tic.unique()) state_space = 1 + 2 * stock_dimension + len( indicator_list) * stock_dimension env_kwargs = { "hmax": 100, "initial_amount": 1000000, "buy_cost_pct": 0.001, "sell_cost_pct": 0.001, "state_space": state_space, "stock_dim": stock_dimension, "tech_indicator_list": indicator_list, "action_space": stock_dimension, "reward_scaling": 1e-4, "print_verbosity": 5 } e_train_gym = StockTradingEnv(df=train_data, **env_kwargs) env_train, _ = e_train_gym.get_sb_env() # print(train_data.index) # print(trade_data.index) # print(trade_data.loc[0]) e_trade_gym = OnlineStockTradingEnv(trade_data.loc[0], **env_kwargs) training_agent = DRLAgent(env=env_train) model_a2c = training_agent.get_model("a2c") # print(train_data.index) # print(trade_data.index) #trained_a2c = agent.train_model(model=model_a2c, tb_log_name='a2c',total_timesteps=10000) feature_engineer = FeatureEngineer() online_stock_pred = OnlineStockPrediction(e_trade_gym, model_a2c) for i in range(1, trade_data.index.unique().max()): print(trade_data.loc[i]) online_stock_pred.add_data(trade_data.loc[i]) action, states, next_obs, rewards = online_stock_pred.predict() print("Action:", action) print("States: ", states) print("Next observation: ", next_obs) print("Rewards: ", rewards)
def DRL_prediction(self,model,name,last_state,iter_num,turbulence_threshold,initial): ### make a prediction based on trained model### ## trading env trade_data = data_split(self.df, start=self.unique_trade_date[iter_num - self.rebalance_window], end=self.unique_trade_date[iter_num]) trade_env = DummyVecEnv([lambda: StockTradingEnv(trade_data, self.stock_dim, self.hmax, self.initial_amount, self.buy_cost_pct, self.sell_cost_pct, self.reward_scaling, self.state_space, self.action_space, self.tech_indicator_list, turbulence_threshold=turbulence_threshold, initial=initial, previous_state=last_state, model_name=name, mode='trade', iteration=iter_num, print_verbosity=self.print_verbosity)]) trade_obs = trade_env.reset() for i in range(len(trade_data.index.unique())): action, _states = model.predict(trade_obs) trade_obs, rewards, dones, info = trade_env.step(action) if i == (len(trade_data.index.unique()) - 2): # print(env_test.render()) last_state = trade_env.render() df_last_state = pd.DataFrame({'last_state': last_state}) df_last_state.to_csv('results/last_state_{}_{}.csv'.format(name, i), index=False) return last_state
def predict(): env_kwargs, processed = prepare_data() trade = data_split(processed, config.START_TRADE_DATE, config.END_DATE) e_trade_gym = StockTradingEnv(df=trade, **env_kwargs) path = config.TRAINED_MODEL_DIR + "/model" trained_sac = eval(config.CURRENT_MODEL.upper() + ".load(path)") df_account_value, df_actions = DRLAgent.DRL_prediction( model=trained_sac, environment=e_trade_gym) log_account(df_account_value, df_actions)
def new_test(): processed = pd.read_csv( os.path.abspath('./me/datasets/new_data_with_techs_turb.csv'), index_col=0) train = data_split(processed, '2009-01-01', '2018-01-01') trade = data_split(processed, '2018-01-01', '2021-01-01') stock_dimension = len(train.tic.unique()) state_space = 1 + 2 * stock_dimension + len( config.TECHNICAL_INDICATORS_LIST) * stock_dimension print(f"Stock Dimension: {stock_dimension}, State Space: {state_space}") env_kwargs = { "hmax": 100, "initial_amount": 1000000, "transaction_cost_pct": 0.001, "state_space": state_space, "stock_dim": stock_dimension, "tech_indicator_list": config.TECHNICAL_INDICATORS_LIST, "action_space": stock_dimension, "reward_scaling": 1e-4 } e_train_gym = StockTradingEnv(df=train, **env_kwargs) env_train, _ = e_train_gym.get_sb_env() log_dir = "me/tmp/" os.makedirs(log_dir, exist_ok=True) env_train.envs[0] = Monitor(env_train.envs[0], log_dir) agent = DRLAgent(env=env_train) model_a2c = agent.get_model("a2c", verbose=0) trained_a2c = agent.train_model(model=model_a2c, tb_log_name='a2c', total_timesteps=100000) data_turbulence = processed[(processed.date < '2018-01-01') & (processed.date >= '2009-01-01')] insample_turbulence = data_turbulence.drop_duplicates(subset=['date']) turbulence_threshold = np.quantile(insample_turbulence.turbulence.values, 1) e_trade_gym = StockTradingEnv(df=trade, turbulence_threshold=380, **env_kwargs) env_trade, obs_trade = e_trade_gym.get_sb_env() print("BEGIN PREDICTION") df_account_value, df_actions = DRLAgent.DRL_prediction(model=trained_a2c, test_data=trade, test_env=env_trade, test_obs=obs_trade) print(df_account_value) print("END PREDICTION")
def create_training_environment( data, env_kwargs={ "hmax": 100, "initial_amount": 1000000, "buy_cost_pct": 0.001, "sell_cost_pct": 0.001, "state_space": 331, "stock_dim": 30, "tech_indicator_list": ['sentiment'] + config.TECHNICAL_INDICATORS_LIST, "action_space": 30, "reward_scaling": 1e-4 }): e_train_gym = StockTradingEnv(df=data, **env_kwargs) return e_train_gym
def DRL_prediction(model: PPO, environment: StockTradingEnv) -> object: test_env, test_obs = environment.get_sb_env() """make a prediction""" account_memory = [] actions_memory = [] test_env.reset() for i in range(len(environment.df.index.unique())): action, _ = model.predict(test_obs, deterministic=True) test_obs, rewards, dones, info = test_env.step(action) if i == (len(environment.df.index.unique()) - 2): account_memory = test_env.env_method( method_name="save_asset_memory") actions_memory = test_env.env_method( method_name="save_action_memory") if dones[0]: print("hit end!") break return account_memory[0], actions_memory[0]
state_space = 1 + 2*stock_dimension + len(indicators)*stock_dimension print(f"Stock Dimension: {stock_dimension}, State Space: {state_space}") env_kwargs = { "hmax": 500, "initial_amount": 1000000, "buy_cost_pct": 0.001, "sell_cost_pct": 0.001, "state_space": state_space, "stock_dim": stock_dimension, "tech_indicator_list": indicators, "action_space": stock_dimension, "reward_scaling": 1e-4 } test_gym_env = StockTradingEnv(df = df,turbulence_threshold = 329, **env_kwargs) agent = DRLAgent(env = test_gym_env) if model == 'ensemble': trained_model = EnsembleModel(test_gym_env,model_paths,'binaverage') else: model_params = config.__dict__[f"{model.upper()}_PARAMS"] trained_model = agent.get_model(model, model_kwargs = model_params, verbose = 0).load(model_paths) print('Testing...') df_account_value, df_actions = DRLAgent.average_predict(
env_kwargs = { "hmax": 100, "initial_amount": 100000, "buy_cost_pct": 0.001, "sell_cost_pct": 0.001, "state_space": state_space, "stock_dim": stock_dimension, "tech_indicator_list": config.TECHNICAL_INDICATORS_LIST, "action_space": stock_dimension, "reward_scaling": 1e-4 } e_train_gym = StockTradingEnv(df = train, **env_kwargs) # In[25]: env_train, _ = e_train_gym.get_sb_env() print(type(env_train)) # <a id='5'></a> # # Part 6: Implement DRL Algorithms # * The implementation of the DRL algorithms are based on **OpenAI Baselines** and **Stable Baselines**. Stable Baselines is a fork of OpenAI Baselines, with a major structural refactoring, and code cleanups. # * FinRL library includes fine-tuned standard DRL algorithms, such as DQN, DDPG, # Multi-Agent DDPG, PPO, SAC, A2C and TD3. We also allow users to # design their own DRL algorithms by adapting these DRL algorithms.
def train_one(fetch=False): """ train an agent """ if fetch: df = fetch_and_store() else: df = load() counts = df[['date', 'tic']].groupby(['date']).count().tic assert counts.min() == counts.max() print("==============Start Feature Engineering===========") fe = FeatureEngineer( use_technical_indicator=True, tech_indicator_list=config.TECHNICAL_INDICATORS_LIST, use_turbulence=True, # use_turbulence=False, user_defined_feature=False, ) processed = fe.preprocess_data(df) # Training & Trading data split start_date, trade_date, end_date = calculate_split(df, start=config.START_DATE) print(start_date, trade_date, end_date) train = data_split(processed, start_date, trade_date) trade = data_split(processed, trade_date, end_date) print( f'\n******\nRunning from {start_date} to {end_date} for:\n{", ".join(config.CRYPTO_TICKER)}\n******\n' ) # calculate state action space stock_dimension = len(train.tic.unique()) state_space = (1 + (2 * stock_dimension) + (len(config.TECHNICAL_INDICATORS_LIST) * stock_dimension)) env_kwargs = { "hmax": 100, "initial_amount": 100000, "buy_cost_pct": 0.0026, "sell_cost_pct": 0.0026, "state_space": state_space, "stock_dim": stock_dimension, "tech_indicator_list": config.TECHNICAL_INDICATORS_LIST, "action_space": stock_dimension, "reward_scaling": 1e-4 } e_train_gym = StockTradingEnv(df=train, **env_kwargs) e_trade_gym = StockTradingEnv(df=trade, turbulence_threshold=250, make_plots=True, **env_kwargs) env_train, _ = e_train_gym.get_sb_env() env_trade, obs_trade = e_trade_gym.get_sb_env() agent = DRLAgent(env=env_train) print("==============Model Training===========") now = datetime.datetime.now().strftime(config.DATETIME_FMT) model_sac = agent.get_model("sac") trained_sac = agent.train_model( model=model_sac, tb_log_name="sac", # total_timesteps=100 total_timesteps=80000) print("==============Start Trading===========") df_account_value, df_actions = DRLAgent.DRL_prediction( # model=trained_sac, test_data=trade, test_env=env_trade, test_obs=obs_trade trained_sac, e_trade_gym) df_account_value.to_csv( f"./{config.RESULTS_DIR}/df_account_value_{now}.csv") df_actions.to_csv(f"./{config.RESULTS_DIR}/df_actions_{now}.csv") df_txns = pd.DataFrame(e_trade_gym.transactions, columns=['date', 'amount', 'price', 'symbol']) df_txns = df_txns.set_index(pd.DatetimeIndex(df_txns['date'], tz=pytz.utc)) df_txns.to_csv(f'./{config.RESULTS_DIR}/df_txns_{now}.csv') df_positions = pd.DataFrame(e_trade_gym.positions, columns=['date', 'cash'] + config.CRYPTO_TICKER) df_positions = df_positions.set_index( pd.DatetimeIndex(df_positions['date'], tz=pytz.utc)).drop(columns=['date']) df_positions['cash'] = df_positions.astype( {col: np.float64 for col in df_positions.columns}) df_positions.to_csv(f'./{config.RESULTS_DIR}/df_positions_{now}.csv') print("==============Get Backtest Results===========") perf_stats_all = backtest_stats(df_account_value, transactions=df_txns, positions=df_positions) perf_stats_all = pd.DataFrame(perf_stats_all) perf_stats_all.to_csv(f"./{config.RESULTS_DIR}/perf_stats_all_{now}.csv") backtest_plot(df_account_value, baseline_start=trade_date, baseline_end=end_date, positions=df_positions, transactions=df_txns)
config.TECHNICAL_INDICATORS_LIST) * stock_dimension print(f"Stock Dimension: {stock_dimension}, State Space: {state_space}") '''Set Env Parameters''' env_kwargs = { "hmax": 100, "initial_amount": 1000000, "buy_cost_pct": 0.001, "sell_cost_pct": 0.001, "state_space": state_space, "stock_dim": stock_dimension, "tech_indicator_list": config.TECHNICAL_INDICATORS_LIST, "action_space": stock_dimension, "reward_scaling": 1e-4 } '''Create Train Env''' e_train_gym = StockTradingEnv(df=train, **env_kwargs) env_train, _ = e_train_gym.get_sb_env() ############# BUILD AGENT ############# agent = DRLAgent(env=env_train) '''Set Agent Parameters''' A2C_params = { "n_steps": 2048, "ent_coef": 0.01, "learning_rate": 0.00025, } model_a2c = agent.get_model("a2c", model_kwargs=A2C_params) '''Train Agent''' trained_a2c = agent.train_model(model=model_a2c,
def main(): # Basic setup # Disable warnings warnings.filterwarnings('ignore') tech_indicator_list = config.TECHNICAL_INDICATORS_LIST # add following folders if not os.path.exists("./" + config.TRAINED_MODEL_DIR): os.makedirs("./" + config.TRAINED_MODEL_DIR) if not os.path.exists("./" + config.TENSORBOARD_LOG_DIR): os.makedirs("./" + config.TENSORBOARD_LOG_DIR) print() print("==============Load Training Data===========") path_training = "./" + config.DATA_SAVE_DIR + "/training.txt" with open(path_training, "rb") as f: dump = pickle.load(f) stock_dimension = len(dump.tic.unique()) state_space = 1 + 2 * stock_dimension + len( tech_indicator_list) * stock_dimension print(f"Stock Dimension: {stock_dimension}, State Space: {state_space}") env_kwargs = { "hmax": 100, "initial_amount": 1000000, "buy_cost_pct": 0.001, "sell_cost_pct": 0.001, "state_space": state_space, "stock_dim": stock_dimension, "tech_indicator_list": tech_indicator_list, "action_space": stock_dimension, "reward_scaling": 1e-4 } e_training_gym = StockTradingEnv(df=dump, **env_kwargs) print("Get Environment for Training") env_training, _ = e_training_gym.get_sb_env() print(type(env_training)) # Implement DRL Algorithms # # The implementation of the DRL algorithms are based on OpenAI Baselines and Stable # Baselines.Stable Baselines is a fork of OpenAI Baselines, # with a major structural refactoring, and code cleanups. # FinRL library includes fine - tuned standard DRL algorithms, such as DQN, DDPG, Multi - Agent # DDPG, PPO, SAC, A2C and TD3. We also allow users to design their own # DRL algorithms by adapting these DRL algorithms.Instead of installing # FinRL lib I have included the source code and created my own version. agent = Agent(env=env_training) print("======================================================") print("Please select which training you want me to perform.") print("1. A2C - Advalntage Actor-Critic algorithm") print("2. DDPG - Deep Deterministic Policy Gradient algorithm") print("3. PPO - Proximal Policy Optimization algorithm") print("4. TD3 - Twin Delayed Deep Deterministic Policy Gradient algorithm") print("5. SAC - Soft Actor-Critic algorithm") print("6. All Algorithms") print("7. Exit") print("-------------------------------------------------------") selection = int(input("Select what you want me to do: ")) if selection == 1: train_a2c(agent) elif selection == 2: train_ddpg(agent) elif selection == 3: train_ppo(agent) elif selection == 4: train_td3(agent) elif selection == 5: train_sac(agent) elif selection == 6: train_a2c(agent) train_ddpg(agent) train_ppo(agent) train_td3(agent) train_sac(agent) elif selection == 7: print("exit") else: print("Invalid option selected!")
def run_ensemble_strategy(self,A2C_model_kwargs,PPO_model_kwargs,DDPG_model_kwargs,timesteps_dict): """Ensemble Strategy that combines PPO, A2C and DDPG""" print("============Start Ensemble Strategy============") # for ensemble model, it's necessary to feed the last state # of the previous model to the current model as the initial state last_state_ensemble = [] ppo_sharpe_list = [] ddpg_sharpe_list = [] a2c_sharpe_list = [] model_use = [] validation_start_date_list = [] validation_end_date_list = [] iteration_list = [] insample_turbulence = self.df[(self.df.date<self.train_period[1]) & (self.df.date>=self.train_period[0])] insample_turbulence_threshold = np.quantile(insample_turbulence.turbulence.values, .90) start = time.time() for i in range(self.rebalance_window + self.validation_window, len(self.unique_trade_date), self.rebalance_window): validation_start_date = self.unique_trade_date[i - self.rebalance_window - self.validation_window] validation_end_date = self.unique_trade_date[i - self.rebalance_window] validation_start_date_list.append(validation_start_date) validation_end_date_list.append(validation_end_date) iteration_list.append(i) print("============================================") ## initial state is empty if i - self.rebalance_window - self.validation_window == 0: # inital state initial = True else: # previous state initial = False # Tuning trubulence index based on historical data # Turbulence lookback window is one quarter (63 days) end_date_index = self.df.index[self.df["date"] == self.unique_trade_date[i - self.rebalance_window - self.validation_window]].to_list()[-1] start_date_index = end_date_index - 63 + 1 historical_turbulence = self.df.iloc[start_date_index:(end_date_index + 1), :] historical_turbulence = historical_turbulence.drop_duplicates(subset=['date']) historical_turbulence_mean = np.mean(historical_turbulence.turbulence.values) #print(historical_turbulence_mean) if historical_turbulence_mean > insample_turbulence_threshold: # if the mean of the historical data is greater than the 90% quantile of insample turbulence data # then we assume that the current market is volatile, # therefore we set the 90% quantile of insample turbulence data as the turbulence threshold # meaning the current turbulence can't exceed the 90% quantile of insample turbulence data turbulence_threshold = insample_turbulence_threshold else: # if the mean of the historical data is less than the 90% quantile of insample turbulence data # then we tune up the turbulence_threshold, meaning we lower the risk turbulence_threshold = np.quantile(insample_turbulence.turbulence.values, 1) turbulence_threshold = np.quantile(insample_turbulence.turbulence.values, 0.99) print("turbulence_threshold: ", turbulence_threshold) ############## Environment Setup starts ############## ## training env train = data_split(self.df, start=self.train_period[0], end=self.unique_trade_date[i - self.rebalance_window - self.validation_window]) self.train_env = DummyVecEnv([lambda: StockTradingEnv(train, self.stock_dim, self.hmax, self.initial_amount, self.buy_cost_pct, self.sell_cost_pct, self.reward_scaling, self.state_space, self.action_space, self.tech_indicator_list, print_verbosity=self.print_verbosity)]) validation = data_split(self.df, start=self.unique_trade_date[i - self.rebalance_window - self.validation_window], end=self.unique_trade_date[i - self.rebalance_window]) ############## Environment Setup ends ############## ############## Training and Validation starts ############## print("======Model training from: ", self.train_period[0], "to ", self.unique_trade_date[i - self.rebalance_window - self.validation_window]) # print("training: ",len(data_split(df, start=20090000, end=test.datadate.unique()[i-rebalance_window]) )) # print("==============Model Training===========") print("======A2C Training========") model_a2c = self.get_model("a2c",self.train_env,policy="MlpPolicy",model_kwargs=A2C_model_kwargs) model_a2c = self.train_model(model_a2c, "a2c", tb_log_name="a2c_{}".format(i), iter_num = i, total_timesteps=timesteps_dict['a2c']) #100_000 print("======A2C Validation from: ", validation_start_date, "to ",validation_end_date) val_env_a2c = DummyVecEnv([lambda: StockTradingEnv(validation, self.stock_dim, self.hmax, self.initial_amount, self.buy_cost_pct, self.sell_cost_pct, self.reward_scaling, self.state_space, self.action_space, self.tech_indicator_list, turbulence_threshold=turbulence_threshold, iteration=i, model_name='A2C', mode='validation', print_verbosity=self.print_verbosity)]) val_obs_a2c = val_env_a2c.reset() self.DRL_validation(model=model_a2c,test_data=validation,test_env=val_env_a2c,test_obs=val_obs_a2c) sharpe_a2c = self.get_validation_sharpe(i,model_name="A2C") print("A2C Sharpe Ratio: ", sharpe_a2c) print("======PPO Training========") model_ppo = self.get_model("ppo",self.train_env,policy="MlpPolicy",model_kwargs=PPO_model_kwargs) model_ppo = self.train_model(model_ppo, "ppo", tb_log_name="ppo_{}".format(i), iter_num = i, total_timesteps=timesteps_dict['ppo']) #100_000 print("======PPO Validation from: ", validation_start_date, "to ",validation_end_date) val_env_ppo = DummyVecEnv([lambda: StockTradingEnv(validation, self.stock_dim, self.hmax, self.initial_amount, self.buy_cost_pct, self.sell_cost_pct, self.reward_scaling, self.state_space, self.action_space, self.tech_indicator_list, turbulence_threshold=turbulence_threshold, iteration=i, model_name='PPO', mode='validation', print_verbosity=self.print_verbosity)]) val_obs_ppo = val_env_ppo.reset() self.DRL_validation(model=model_ppo,test_data=validation,test_env=val_env_ppo,test_obs=val_obs_ppo) sharpe_ppo = self.get_validation_sharpe(i,model_name="PPO") print("PPO Sharpe Ratio: ", sharpe_ppo) print("======DDPG Training========") model_ddpg = self.get_model("ddpg",self.train_env,policy="MlpPolicy",model_kwargs=DDPG_model_kwargs) model_ddpg = self.train_model(model_ddpg, "ddpg", tb_log_name="ddpg_{}".format(i), iter_num = i, total_timesteps=timesteps_dict['ddpg']) #50_000 print("======DDPG Validation from: ", validation_start_date, "to ",validation_end_date) val_env_ddpg = DummyVecEnv([lambda: StockTradingEnv(validation, self.stock_dim, self.hmax, self.initial_amount, self.buy_cost_pct, self.sell_cost_pct, self.reward_scaling, self.state_space, self.action_space, self.tech_indicator_list, turbulence_threshold=turbulence_threshold, iteration=i, model_name='DDPG', mode='validation', print_verbosity=self.print_verbosity)]) val_obs_ddpg = val_env_ddpg.reset() self.DRL_validation(model=model_ddpg,test_data=validation,test_env=val_env_ddpg,test_obs=val_obs_ddpg) sharpe_ddpg = self.get_validation_sharpe(i,model_name="DDPG") ppo_sharpe_list.append(sharpe_ppo) a2c_sharpe_list.append(sharpe_a2c) ddpg_sharpe_list.append(sharpe_ddpg) print("======Best Model Retraining from: ", self.train_period[0], "to ", self.unique_trade_date[i - self.rebalance_window]) # Environment setup for model retraining up to first trade date #train_full = data_split(self.df, start=self.train_period[0], end=self.unique_trade_date[i - self.rebalance_window]) #self.train_full_env = DummyVecEnv([lambda: StockTradingEnv(train_full, # self.stock_dim, # self.hmax, # self.initial_amount, # self.buy_cost_pct, # self.sell_cost_pct, # self.reward_scaling, # self.state_space, # self.action_space, # self.tech_indicator_list, # print_verbosity=self.print_verbosity)]) # Model Selection based on sharpe ratio if (sharpe_ppo >= sharpe_a2c) & (sharpe_ppo >= sharpe_ddpg): model_use.append('PPO') model_ensemble = model_ppo #model_ensemble = self.get_model("ppo",self.train_full_env,policy="MlpPolicy",model_kwargs=PPO_model_kwargs) #model_ensemble = self.train_model(model_ensemble, "ensemble", tb_log_name="ensemble_{}".format(i), iter_num = i, total_timesteps=timesteps_dict['ppo']) #100_000 elif (sharpe_a2c > sharpe_ppo) & (sharpe_a2c > sharpe_ddpg): model_use.append('A2C') model_ensemble = model_a2c #model_ensemble = self.get_model("a2c",self.train_full_env,policy="MlpPolicy",model_kwargs=A2C_model_kwargs) #model_ensemble = self.train_model(model_ensemble, "ensemble", tb_log_name="ensemble_{}".format(i), iter_num = i, total_timesteps=timesteps_dict['a2c']) #100_000 else: model_use.append('DDPG') model_ensemble = model_ddpg #model_ensemble = self.get_model("ddpg",self.train_full_env,policy="MlpPolicy",model_kwargs=DDPG_model_kwargs) #model_ensemble = self.train_model(model_ensemble, "ensemble", tb_log_name="ensemble_{}".format(i), iter_num = i, total_timesteps=timesteps_dict['ddpg']) #50_000 ############## Training and Validation ends ############## ############## Trading starts ############## print("======Trading from: ", self.unique_trade_date[i - self.rebalance_window], "to ", self.unique_trade_date[i]) #print("Used Model: ", model_ensemble) last_state_ensemble = self.DRL_prediction(model=model_ensemble, name="ensemble", last_state=last_state_ensemble, iter_num=i, turbulence_threshold = turbulence_threshold, initial=initial) ############## Trading ends ############## end = time.time() print("Ensemble Strategy took: ", (end - start) / 60, " minutes") df_summary = pd.DataFrame([iteration_list,validation_start_date_list,validation_end_date_list,model_use,a2c_sharpe_list,ppo_sharpe_list,ddpg_sharpe_list]).T df_summary.columns = ['Iter','Val Start','Val End','Model Used','A2C Sharpe','PPO Sharpe','DDPG Sharpe'] return df_summary
def train_one(): """ train an agent """ print("==============Start Fetching Data===========") df = YahooDownloader( start_date=config.START_DATE, end_date=config.END_DATE, ticker_list=config.DOW_30_TICKER, ).fetch_data() print("==============Start Feature Engineering===========") fe = FeatureEngineer( use_technical_indicator=True, tech_indicator_list=config.TECHNICAL_INDICATORS_LIST, use_turbulence=True, user_defined_feature=False, ) processed = fe.preprocess_data(df) list_ticker = processed["tic"].unique().tolist() list_date = list( pd.date_range(processed['date'].min(), processed['date'].max()).astype(str)) combination = list(itertools.product(list_date, list_ticker)) processed_full = pd.DataFrame(combination, columns=["date", "tic"]).merge(processed, on=["date", "tic"], how="left") processed_full = processed_full[processed_full['date'].isin( processed['date'])] processed_full = processed_full.sort_values(['date', 'tic']) processed_full = processed_full.fillna(0) # Training & Trading data split train = data_split(processed_full, config.START_DATE, config.START_TRADE_DATE) trade = data_split(processed_full, config.START_TRADE_DATE, config.END_DATE) # calculate state action space stock_dimension = len(train.tic.unique()) state_space = (1 + 2 * stock_dimension + len(config.TECHNICAL_INDICATORS_LIST) * stock_dimension) env_kwargs = { "hmax": 100, "initial_amount": 1000000, "buy_cost_pct": 0.001, "sell_cost_pct": 0.001, "state_space": state_space, "stock_dim": stock_dimension, "tech_indicator_list": config.TECHNICAL_INDICATORS_LIST, "action_space": stock_dimension, "reward_scaling": 1e-4 } e_train_gym = StockTradingEnv(df=train, **env_kwargs) env_train, _ = e_train_gym.get_sb_env() agent = DRLAgent(env=env_train) print("==============Model Training===========") now = datetime.datetime.now().strftime("%Y%m%d-%Hh%M") model_sac = agent.get_model("sac") trained_sac = agent.train_model(model=model_sac, tb_log_name="sac", total_timesteps=80000) print("==============Start Trading===========") e_trade_gym = StockTradingEnv(df=trade, turbulence_threshold=250, **env_kwargs) df_account_value, df_actions = DRLAgent.DRL_prediction( model=trained_sac, environment=e_trade_gym) df_account_value.to_csv("./" + config.RESULTS_DIR + "/df_account_value_" + now + ".csv") df_actions.to_csv("./" + config.RESULTS_DIR + "/df_actions_" + now + ".csv") print("==============Get Backtest Results===========") perf_stats_all = backtest_stats(df_account_value) perf_stats_all = pd.DataFrame(perf_stats_all) perf_stats_all.to_csv("./" + config.RESULTS_DIR + "/perf_stats_all_" + now + ".csv")
def train_one(): """ train an agent """ print("==============Start Fetching Data===========") df = YahooDownloader( start_date=config.START_DATE, end_date=config.END_DATE, ticker_list=['FXAIX'], ).fetch_data() print("==============Start Feature Engineering===========") fe = FeatureEngineer( use_technical_indicator=True, tech_indicator_list=config.TECHNICAL_INDICATORS_LIST, use_turbulence=True, user_defined_feature=False, ) processed = fe.preprocess_data(df) # Training & Trading data split train = data_split(processed, config.START_DATE, config.START_TRADE_DATE) trade = data_split(processed, config.START_TRADE_DATE, config.END_DATE) # calculate state action space stock_dimension = len(train.tic.unique()) state_space = (1 + 2 * stock_dimension + len(config.TECHNICAL_INDICATORS_LIST) * stock_dimension) env_kwargs = { "hmax": 100, "initial_amount": 1000000, "buy_cost_pct": 0.001, "sell_cost_pct": 0.001, "state_space": state_space, "stock_dim": stock_dimension, "tech_indicator_list": config.TECHNICAL_INDICATORS_LIST, "action_space": stock_dimension, "reward_scaling": 1e-4 } e_train_gym = StockTradingEnv(df=train, **env_kwargs) e_trade_gym = StockTradingEnv(df=trade, turbulence_threshold=250, **env_kwargs) env_train, _ = e_train_gym.get_sb_env() env_trade, obs_trade = e_trade_gym.get_sb_env() agent = DRLAgent(env=env_train) print("==============Model Training===========") now = datetime.datetime.now().strftime("%Y%m%d-%Hh%M") user_input = input('train model? 1 train 0 don\'t train') if user_input == 1: model_sac = agent.get_model("sac") trained_sac = agent.train_model(model=model_sac, tb_log_name="sac", total_timesteps=8000) trained_sac.save("../models/sac_8k" + df.tic[0] + "_frl") else: trained_sac = SAC.load('../models/sac_80k_msft_working') print("==============Start Trading===========") df_account_value, df_actions = DRLAgent.DRL_prediction( trained_sac, e_trade_gym) df_account_value.to_csv("../" + config.RESULTS_DIR + "/SAC_df_account_value_" + df.tic[0] + "_" + now + ".csv") df_actions.to_csv("../" + config.RESULTS_DIR + "/SAC_df_actions_" + df.tic[0] + "_" + now + ".csv") # print("==============Get Backtest Results===========") perf_stats_all = backtest_stats(df_account_value) perf_stats_all = pd.DataFrame(perf_stats_all) perf_stats_all.to_csv("../" + config.RESULTS_DIR + "/SAC_perf_stats_all_" + df.tic[0] + "_" + now + ".csv") #plot acc value actions = df_actions['actions'] x = np.arange(0, df_account_value['account_value'].shape[0]) y = df_account_value['account_value'] points = np.array([x, y]).T.reshape(-1, 1, 2) segments = np.concatenate([points[:-1], points[1:]], axis=1) fig, axs = plt.subplots(2, 1, sharex=True, sharey=False) # plt.plot(x, y) # Use a boundary norm instead cmap = ListedColormap(['r', 'g', 'b']) norm = BoundaryNorm([-100, -0.1, 0.1, 100], cmap.N) lc = LineCollection(segments, cmap=cmap, norm=norm) lc.set_array(actions) lc.set_linewidth(2) line = axs[0].add_collection(lc) # fig.colorbar(line, ax=axs) axs[1].set_xlabel('Trading Day (' + 'From ' + config.START_TRADE_DATE + " to " + config.END_DATE + ')') axs[0].set_ylabel('Account Value (10000 of USD)') axs[0].set_title("Trading Test on " + df.tic[0]) axs[0].set_xlim(x.min(), x.max()) axs[0].set_ylim(y.min(), y.max()) custom_lines = [ Line2D([0], [0], color=cmap(0.), lw=4), Line2D([0], [0], color=cmap(.5), lw=4), Line2D([0], [0], color=cmap(1.), lw=4) ] # lines = ax.plot(data) axs[0].legend(custom_lines, ['Sell', 'Hold', 'Buy']) #plot stock value tx = np.arange(0, df_account_value['account_value'].shape[0]) ty = trade['close'] plt.ylabel('Price (USD)') plt.title(df.tic[0] + " Closing Price") plt.plot(tx, ty) plt.savefig("../" + config.RESULTS_DIR + "/plots/" "SAC_plot_" + df.tic[0] + "_" + now + ".png")
def train_one(): """ train an agent """ print("==============Start Fetching Data===========") df = YahooDownloader( start_date=config.START_DATE, end_date=config.END_DATE, ticker_list=config.DOW_30_TICKER, ).fetch_data() print("==============Start Feature Engineering===========") fe = FeatureEngineer( use_technical_indicator=True, tech_indicator_list=config.TECHNICAL_INDICATORS_LIST, use_turbulence=True, user_defined_feature=False, ) processed = fe.preprocess_data(df) # Training & Trading data split train = data_split(processed, config.START_DATE, config.START_TRADE_DATE) trade = data_split(processed, config.START_TRADE_DATE, config.END_DATE) # calculate state action space stock_dimension = len(train.tic.unique()) state_space = (1 + 2 * stock_dimension + len(config.TECHNICAL_INDICATORS_LIST) * stock_dimension) env_kwargs = { "hmax": 100, "initial_amount": 1000000, "buy_cost_pct": 0.001, "sell_cost_pct": 0.001, "state_space": state_space, "stock_dim": stock_dimension, "tech_indicator_list": config.TECHNICAL_INDICATORS_LIST, "action_space": stock_dimension, "reward_scaling": 1e-4 } e_train_gym = StockTradingEnv(df=train, **env_kwargs) e_trade_gym = StockTradingEnv(df=trade, turbulence_threshold=250, **env_kwargs) env_train, _ = e_train_gym.get_sb_env() env_trade, obs_trade = e_trade_gym.get_sb_env() agent = DRLAgent(env=env_train) print("==============Model Training===========") now = datetime.datetime.now().strftime("%Y%m%d-%Hh%M") model_sac = agent.get_model("sac") trained_sac = agent.train_model(model=model_sac, tb_log_name="sac", total_timesteps=80000) print("==============Start Trading===========") df_account_value, df_actions = DRLAgent.DRL_prediction(model=trained_sac, test_data=trade, test_env=env_trade, test_obs=obs_trade) df_account_value.to_csv("./" + config.RESULTS_DIR + "/df_account_value_" + now + ".csv") df_actions.to_csv("./" + config.RESULTS_DIR + "/df_actions_" + now + ".csv") print("==============Get Backtest Results===========") perf_stats_all = BackTestStats(df_account_value) perf_stats_all = pd.DataFrame(perf_stats_all) perf_stats_all.to_csv("./" + config.RESULTS_DIR + "/perf_stats_all_" + now + ".csv")
def generate_data(rollouts, data_dir, noise_type): # pylint: disable=R0914 """ Generates data """ assert exists(data_dir), "The data directory does not exist..." df = YahooDownloader(start_date = '2009-01-01', end_date = '2021-01-01', ticker_list = ['AAPL']).fetch_data() df.sort_values(['date','tic'],ignore_index=True) fe = FeatureEngineer( use_technical_indicator=True, tech_indicator_list = config.TECHNICAL_INDICATORS_LIST, use_turbulence=True, user_defined_feature = False) processed = fe.preprocess_data(df) list_ticker = processed["tic"].unique().tolist() list_date = list(pd.date_range(processed['date'].min(),processed['date'].max()).astype(str)) combination = list(itertools.product(list_date,list_ticker)) processed_full = pd.DataFrame(combination,columns=["date","tic"]).merge(processed,on=["date","tic"],how="left") processed_full = processed_full[processed_full['date'].isin(processed['date'])] processed_full = processed_full.sort_values(['date','tic']) processed_full = processed_full.fillna(0) processed_full.sort_values(['date','tic'],ignore_index=True) train = data_split(processed_full, '2009-01-01','2019-01-01') trade = data_split(processed_full, '2019-01-01','2021-01-01') stock_dimension = len(train.tic.unique()) state_space = 1 + 2*stock_dimension + len(config.TECHNICAL_INDICATORS_LIST)*stock_dimension env_kwargs = { "hmax": 100, "initial_amount": 1000000, # "buy_cost_pct": 0.001i, # "sell_cost_pct": 0.001, "transaction_cost_pct": 0.001, "state_space": state_space, "stock_dim": stock_dimension, "tech_indicator_list": config.TECHNICAL_INDICATORS_LIST, "action_space": stock_dimension, "reward_scaling": 1e-4 } e_train_gym = StockTradingEnv(df = train, **env_kwargs) env_train, _ = e_train_gym.get_sb_env() env = env_train # env = gym.make("CarRacing-v0") seq_len = 10000 for i in range(rollouts): env.reset() # env.env.viewer.window.dispatch_events() if noise_type == 'white': a_rollout = [env.action_space.sample() for _ in range(seq_len)] elif noise_type == 'brown': a_rollout = sample_continuous_policy(env.action_space, seq_len, 1. / 50) s_rollout = [] r_rollout = [] d_rollout = [] t = 0 while True: action = a_rollout[t] t += 1 s, r, done, _ = env.step(action) # env.env.viewer.window.dispatch_events() s_rollout += [s] r_rollout += [r] d_rollout += [done] if done: print("> End of rollout {}, {} frames...".format(i, len(s_rollout))) np.savez(join(data_dir, 'rollout_{}'.format(i)), observations=np.array(s_rollout), rewards=np.array(r_rollout), actions=np.array(a_rollout), terminals=np.array(d_rollout)) break
print(f"Stock Dimension: {stock_dimension}, State Space: {state_space}") env_kwargs = { "hmax": 100, "initial_amount": 1000000, "buy_cost_pct": 0.001, "sell_cost_pct": 0.001, "state_space": state_space, "stock_dim": stock_dimension, "tech_indicator_list": features, "action_space": stock_dimension, "reward_scaling": 1e-4, "model_name": model_name } e_train_gym = StockTradingEnv(df = train, **env_kwargs) e_train_gym.seed(42) e_train_gym.action_space.seed(42) env_train, _ = e_train_gym.get_sb_env() env_train.seed(seed) env_train.action_space.seed(seed) print(type(env_train)) agent = DRLAgent(env = env_train) model_ddpg = agent.get_model("ddpg", model_kwargs={"batch_size": batch_size, "buffer_size": 50000, "learning_rate": lr} ) trained_ddpg = agent.train_model(model=model_ddpg,
state_space = 1 + 2 * stock_dimension + len(indicators) * stock_dimension print(f"Stock Dimension: {stock_dimension}, State Space: {state_space}") env_kwargs = { "hmax": 500, "initial_amount": 1000000, "buy_cost_pct": 0.001, "sell_cost_pct": 0.001, "state_space": state_space, "stock_dim": stock_dimension, "tech_indicator_list": indicators, "action_space": stock_dimension, "reward_scaling": 0.6 } e_trade_gym = StockTradingEnv(df=df_test, **env_kwargs) e_train_gym = StockTradingEnv(df=df_train, **env_kwargs) env_trade, _ = e_trade_gym.get_sb_env() env_train, _ = e_train_gym.get_sb_env() agent = DRLAgent(env=env_train) model_params = config.__dict__[f"{args.model.upper()}_PARAMS"] model = agent.get_model(args.model, model_kwargs=model_params, verbose=1) print('Training model') trained_model = model.learn(tb_log_name='{}_{}'.format( modelName, datetime.datetime.now()), total_timesteps=train_steps,