def new_test(): processed = pd.read_csv( os.path.abspath('./me/datasets/new_data_with_techs_turb.csv'), index_col=0) train = data_split(processed, '2009-01-01', '2018-01-01') trade = data_split(processed, '2018-01-01', '2021-01-01') stock_dimension = len(train.tic.unique()) state_space = 1 + 2 * stock_dimension + len( config.TECHNICAL_INDICATORS_LIST) * stock_dimension print(f"Stock Dimension: {stock_dimension}, State Space: {state_space}") env_kwargs = { "hmax": 100, "initial_amount": 1000000, "transaction_cost_pct": 0.001, "state_space": state_space, "stock_dim": stock_dimension, "tech_indicator_list": config.TECHNICAL_INDICATORS_LIST, "action_space": stock_dimension, "reward_scaling": 1e-4 } e_train_gym = StockTradingEnv(df=train, **env_kwargs) env_train, _ = e_train_gym.get_sb_env() log_dir = "me/tmp/" os.makedirs(log_dir, exist_ok=True) env_train.envs[0] = Monitor(env_train.envs[0], log_dir) agent = DRLAgent(env=env_train) model_a2c = agent.get_model("a2c", verbose=0) trained_a2c = agent.train_model(model=model_a2c, tb_log_name='a2c', total_timesteps=100000) data_turbulence = processed[(processed.date < '2018-01-01') & (processed.date >= '2009-01-01')] insample_turbulence = data_turbulence.drop_duplicates(subset=['date']) turbulence_threshold = np.quantile(insample_turbulence.turbulence.values, 1) e_trade_gym = StockTradingEnv(df=trade, turbulence_threshold=380, **env_kwargs) env_trade, obs_trade = e_trade_gym.get_sb_env() print("BEGIN PREDICTION") df_account_value, df_actions = DRLAgent.DRL_prediction(model=trained_a2c, test_data=trade, test_env=env_trade, test_obs=obs_trade) print(df_account_value) print("END PREDICTION")
def train_one(): env_kwargs, processed = prepare_data() # Training & Trading data split train = data_split(processed, config.START_DATE, config.START_TRADE_DATE) trade = data_split(processed, config.START_TRADE_DATE, config.END_DATE) e_train_gym = StockTradingEnv(df=train, **env_kwargs) e_trade_gym = StockTradingEnv(df=trade, turbulence_threshold=250, **env_kwargs) env_train, _ = e_train_gym.get_sb_env() agent = DRLAgent(env=env_train) print("==============Model Training===========") model = agent.get_model(config.CURRENT_MODEL) model = agent.train_model(model=model, total_timesteps=80000) path = f"{config.TRAINED_MODEL_DIR}/model" model.save(path) model = model.load(path) print("==============Start Trading===========") df_account_value, df_actions = DRLAgent.DRL_prediction( model=model, environment=e_trade_gym) log_account(df_account_value, df_actions)
def train(self, origin_data): self.build_model() train = data_split(origin_data, config.START_DATE, config.START_TRADE_DATE) trade = data_split(origin_data, config.START_TRADE_DATE, config.END_DATE) y_train = self.cal_result(train) y_trade = self.cal_result(trade) x_train_tech = self.cal_feature(train, 5) x_trade_tech = self.cal_feature(trade, 5) x_train_idx = self.cal_index(train, 5) x_trade_idx = self.cal_index(trade, 5) print(x_train_tech.shape) self.model.fit([x_train_idx, x_train_tech], y_train, epochs=100, batch_size=128) score = self.model.evaluate([x_trade_idx, x_trade_tech], y_trade, return_dict=True) print(score) self.model.save(self.model_path)
def main(): start_date = '2020-01-01' trade_start_date = '2020-12-01' end_date = '2021-01-01' ticker_list = stock_tickers numerical_df = YahooDownloader(start_date=start_date, end_date=end_date, ticker_list=ticker_list).fetch_data() sentiment_df = generate_sentiment_scores(start_date, end_date) initial_data = get_initial_data(numerical_df, sentiment_df) train_data = data_split(initial_data, start_date, trade_start_date) trade_data = data_split(initial_data, trade_start_date, end_date) indicator_list = config.TECHNICAL_INDICATORS_LIST + ['sentiment'] stock_dimension = len(trade_data.tic.unique()) state_space = 1 + 2 * stock_dimension + len( indicator_list) * stock_dimension env_kwargs = { "hmax": 100, "initial_amount": 1000000, "buy_cost_pct": 0.001, "sell_cost_pct": 0.001, "state_space": state_space, "stock_dim": stock_dimension, "tech_indicator_list": indicator_list, "action_space": stock_dimension, "reward_scaling": 1e-4, "print_verbosity": 5 } e_train_gym = StockTradingEnv(df=train_data, **env_kwargs) env_train, _ = e_train_gym.get_sb_env() # print(train_data.index) # print(trade_data.index) # print(trade_data.loc[0]) e_trade_gym = OnlineStockTradingEnv(trade_data.loc[0], **env_kwargs) training_agent = DRLAgent(env=env_train) model_a2c = training_agent.get_model("a2c") # print(train_data.index) # print(trade_data.index) #trained_a2c = agent.train_model(model=model_a2c, tb_log_name='a2c',total_timesteps=10000) feature_engineer = FeatureEngineer() online_stock_pred = OnlineStockPrediction(e_trade_gym, model_a2c) for i in range(1, trade_data.index.unique().max()): print(trade_data.loc[i]) online_stock_pred.add_data(trade_data.loc[i]) action, states, next_obs, rewards = online_stock_pred.predict() print("Action:", action) print("States: ", states) print("Next observation: ", next_obs) print("Rewards: ", rewards)
def DRL_prediction(self,model,name,last_state,iter_num,turbulence_threshold,initial): ### make a prediction based on trained model### ## trading env trade_data = data_split(self.df, start=self.unique_trade_date[iter_num - self.rebalance_window], end=self.unique_trade_date[iter_num]) trade_env = DummyVecEnv([lambda: StockTradingEnv(trade_data, self.stock_dim, self.hmax, self.initial_amount, self.buy_cost_pct, self.sell_cost_pct, self.reward_scaling, self.state_space, self.action_space, self.tech_indicator_list, turbulence_threshold=turbulence_threshold, initial=initial, previous_state=last_state, model_name=name, mode='trade', iteration=iter_num, print_verbosity=self.print_verbosity)]) trade_obs = trade_env.reset() for i in range(len(trade_data.index.unique())): action, _states = model.predict(trade_obs) trade_obs, rewards, dones, info = trade_env.step(action) if i == (len(trade_data.index.unique()) - 2): # print(env_test.render()) last_state = trade_env.render() df_last_state = pd.DataFrame({'last_state': last_state}) df_last_state.to_csv('results/last_state_{}_{}.csv'.format(name, i), index=False) return last_state
def train_one(): """ train an agent """ print("==============Start Fetching Data===========") df = YahooDownloader(start_date=config.START_DATE, end_date=config.END_DATE, ticker_list=config.DOW_30_TICKER).fetch_data() print("==============Start Feature Engineering===========") df = FeatureEngineer(df, feature_number=5, use_technical_indicator=True, use_turbulence=True).preprocess_data() train = data_split(df, config.START_DATE, config.START_TRADE_DATE) trade = data_split(df, config.START_TRADE_DATE, config.END_DATE) env_setup = EnvSetup(stock_dim=len(train.tic.unique())) env_train = env_setup.create_env_training(data=train, env_class=StockEnvTrain) agent = DRLAgent(env=env_train) print("==============Model Training===========") now = datetime.datetime.now().strftime('%Y%m%d-%Hh%M') a2c_params_tuning = { 'n_steps': 5, 'ent_coef': 0.005, 'learning_rate': 0.0007, 'verbose': 0, 'timesteps': 100000 } model_a2c = agent.train_A2C(model_name="A2C_{}".format(now), model_params=a2c_params_tuning) print("==============Start Trading===========") env_trade, obs_trade = env_setup.create_env_trading( data=trade, env_class=StockEnvTrade, turbulence_threshold=250) df_account_value = DRLAgent.DRL_prediction(model=model_a2c, test_data=trade, test_env=env_trade, test_obs=obs_trade) df_account_value.to_csv("./" + config.RESULTS_DIR + "/" + now + '.csv') print("==============Get Backtest Results===========") perf_stats_all = BackTestStats(df_account_value) perf_stats_all = pd.DataFrame(perf_stats_all) perf_stats_all.to_csv("./" + config.RESULTS_DIR + "/perf_stats_all_" + now + '.csv')
def load_stock_trading_data(): from finrl.config import config cwd = './env/FinRL' raw_data_path = f'{cwd}/StockTradingEnv_raw_data.df' processed_data_path = f'{cwd}/StockTradingEnv_processed_data.df' os.makedirs(cwd, exist_ok=True) print("==============Start Fetching Data===========") if os.path.exists(raw_data_path): raw_df = pd.read_pickle(raw_data_path) # DataFrame of Pandas print('| raw_df.columns.values:', raw_df.columns.values) else: from finrl.marketdata.yahoodownloader import YahooDownloader raw_df = YahooDownloader( start_date=config.START_DATE, end_date=config.END_DATE, ticker_list=config.DOW_30_TICKER, ).fetch_data() raw_df.to_pickle(raw_data_path) print("==============Start Feature Engineering===========") if os.path.exists(processed_data_path): processed_df = pd.read_pickle(processed_data_path) # DataFrame of Pandas print('| processed_df.columns.values:', processed_df.columns.values) else: from finrl.preprocessing.preprocessors import FeatureEngineer fe = FeatureEngineer( use_technical_indicator=True, tech_indicator_list=config.TECHNICAL_INDICATORS_LIST, use_turbulence=True, user_defined_feature=False, ) processed_df = fe.preprocess_data(raw_df) processed_df.to_pickle(processed_data_path) # Training & Trading data split from finrl.preprocessing.data import data_split train_df = data_split(processed_df, '2008-03-19', '2016-01-01') # 1963/3223 eval_df = data_split(processed_df, '2016-01-01', '2021-01-01') # 1260/3223 return train_df, eval_df
def predict(): env_kwargs, processed = prepare_data() trade = data_split(processed, config.START_TRADE_DATE, config.END_DATE) e_trade_gym = StockTradingEnv(df=trade, **env_kwargs) path = config.TRAINED_MODEL_DIR + "/model" trained_sac = eval(config.CURRENT_MODEL.upper() + ".load(path)") df_account_value, df_actions = DRLAgent.DRL_prediction( model=trained_sac, environment=e_trade_gym) log_account(df_account_value, df_actions)
def test_process_data(): start_date = '2020-11-01' end_date='2021-01-01' ticker_list=stock_tickers numerical_df = YahooDownloader(start_date=start_date,end_date=end_date,ticker_list=ticker_list).fetch_data() sentiment_df = generate_sentiment_scores(start_date,end_date) initial_data = get_initial_data(numerical_df,sentiment_df) trade_data = data_split(initial_data,start_date,'2020-12-01') numerical_feed_data = numerical_df[numerical_df.date > '2020-12-01'] sentiment_feed_data = sentiment_df[sentiment_df.date > '2020-12-01'] data_processor = DataProcessor(FeatureEngineer(),trade_data) for date in numerical_feed_data.date.unique(): new_numerical = numerical_feed_data[numerical_feed_data.date==date] new_sentiment = sentiment_feed_data.loc[sentiment_feed_data.date==date] new_df=data_processor.process_data(new_numerical,new_sentiment) print(new_df)
def check_finrl(): from FinRL import StockTradingEnv from numpy import random as rd from finrl.config import config from finrl.preprocessing.data import data_split import pandas as pd # df = pd.read_pickle('finrl_data.df') # DataFrame of Pandas # # from finrl.preprocessing.preprocessors import FeatureEngineer # fe = FeatureEngineer( # use_technical_indicator=True, # tech_indicator_list=config.TECHNICAL_INDICATORS_LIST, # use_turbulence=True, # user_defined_feature=False, # ) # # processed_df = fe.preprocess_data(df) # processed_df.to_pickle('finrl_processed_data.df') # DataFrame of Pandas processed_data_path = 'StockTradingEnv_processed_data.df' processed_df = pd.read_pickle(processed_data_path) # DataFrame of Pandas print(processed_df.columns.values) split_df = data_split(processed_df, start='2008-03-19', end='2021-01-01') # `start` env = StockTradingEnv(df=split_df, tech_indicator_list=config.TECHNICAL_INDICATORS_LIST) action_dim = env.action_dim state = env.reset() print('state_dim', len(state)) done = False step = 1 from time import time timer = time() while not done: action = rd.rand(action_dim) * 2 - 1 next_state, reward, done, _ = env.step(action) print(';', step, len(next_state), env.day, reward) step += 1 print(';;', step, int(time() - timer)) # 44 seconds
def test(): trade = data_split(origin_data, config.START_TRADE_DATE, config.END_DATE) action_list = model.test(trade) # print(action_list) hold_map = {} asset = 10000 hold = 0 price = 0 for index, vec in enumerate(action_list): if index == 0 or index == len(action_list) - 1: continue vec = vec[0] # print("%.2f" % trade.loc[index]["close"], vec) last_vec = action_list[index - 1][0] price = trade.iloc[index + 1]["open"] date = trade.iloc[index + 1]["date"] tic = trade.iloc[index + 1]["tic"] hold = hold_map.get(tic, 0) # print(date, tic, vec, last_vec) if vec[0] > last_vec[0] + 0.1 and vec[0] > 0.3 and hold <= 0: print(f"buy {tic} at {date}, price = {price}") if hold == 0: hold_map[tic] = price else: hold_map.pop(tic) asset -= price * 100 elif vec[2] > last_vec[2] + 0.1 and vec[2] > 0.3 and hold >= 0: print(f"sell {tic} at {date}, price = {price}") if hold == 0: hold_map[tic] = -price else: hold_map.pop(tic) asset += price * 100 print(asset) print(hold_map)
import pandas as pd df = plotdf=processed[processed['tic']=='JPM'] df.plot(x="date", y=["turbulence", "close"]) plt.show() # In[10]: processed['log_volume'] = np.log(processed.volume*processed.close) processed['change'] = (processed.close-processed.open)/processed.close processed['daily_variance'] = (processed.high-processed.low)/processed.close processed.head() # ## Training data split: 2009-01-01 to 2018-12-31 # ## Trade data split: 2019-01-01 to 2020-09-30 # In[11]: train = data_split(processed, '2009-01-01','2019-01-01') trade = data_split(processed, '2019-01-01','2021-01-01') print(len(train)) print(len(trade)) # In[12]: import numpy as np import pandas as pd from gym.utils import seeding import gym from gym import spaces import matplotlib from copy import deepcopy matplotlib.use("Agg") import matplotlib.pyplot as plt
def train_one(): """ train an agent """ print("==============Start Fetching Data===========") df = YahooDownloader(start_date=config.START_DATE, end_date=config.END_DATE, ticker_list=config.SP_500_TICKER).fetch_data() print("==============Start Feature Engineering===========") df = FeatureEngineer(df, use_technical_indicator=True, use_turbulence=True).preprocess_data() # Training & Trade data split train = data_split(df, config.START_DATE, config.START_TRADE_DATE) trade = data_split(df, config.START_TRADE_DATE, config.END_DATE) # data normalization #feaures_list = list(train.columns) #feaures_list.remove('date') #feaures_list.remove('tic') #feaures_list.remove('close') #print(feaures_list) #data_normaliser = preprocessing.StandardScaler() #train[feaures_list] = data_normaliser.fit_transform(train[feaures_list]) #trade[feaures_list] = data_normaliser.fit_transform(trade[feaures_list]) # calculate state action space stock_dimension = len(train.tic.unique()) state_space = 1 + 2 * stock_dimension + len( config.TECHNICAL_INDICATORS_LIST) * stock_dimension env_setup = EnvSetup(stock_dim=stock_dimension, state_space=state_space, hmax=100, initial_amount=3000, transaction_cost_pct=0.001) env_train = env_setup.create_env_training(data=train, env_class=StockEnvTrain) agent = DRLAgent(env=env_train) print("==============Model Training===========") now = datetime.datetime.now().strftime('%Y%m%d-%Hh%M') a2c_params_tuning = { 'n_steps': 5, 'ent_coef': 0.005, 'learning_rate': 0.0007, 'verbose': 0, 'timesteps': 80000 } model = agent.train_A2C(model_name="A2C_{}".format(now), model_params=a2c_params_tuning) print("==============Start Trading===========") env_trade, obs_trade = env_setup.create_env_trading( data=trade, env_class=StockEnvTrade, turbulence_threshold=250) df_account_value, df_actions = DRLAgent.DRL_prediction(model=model, test_data=trade, test_env=env_trade, test_obs=obs_trade) df_account_value.to_csv("./" + config.RESULTS_DIR + "/df_account_value_" + now + '.csv') df_actions.to_csv("./" + config.RESULTS_DIR + "/df_actions_" + now + '.csv') print("==============Get Backtest Results===========") perf_stats_all = BackTestStats(df_account_value) perf_stats_all = pd.DataFrame(perf_stats_all) perf_stats_all.to_csv("./" + config.RESULTS_DIR + "/perf_stats_all_" + now + '.csv')
# # Our trading environments, based on OpenAI Gym framework, simulate live stock markets with real market data according to the principle of time-driven simulation. # # The action space describes the allowed actions that the agent interacts with the environment. Normally, action a includes three actions: {-1, 0, 1}, where -1, 0, 1 represent selling, holding, and buying one share. Also, an action can be carried upon multiple shares. We use an action space {-k,…,-1, 0, 1, …, k}, where k denotes the number of shares to buy and -k denotes the number of shares to sell. For example, "Buy 10 shares of AAPL" or "Sell 10 shares of AAPL" are 10 or -10, respectively. The continuous action space needs to be normalized to [-1, 1], since the policy is defined on a Gaussian distribution, which needs to be normalized and symmetric. # <a id='4.1'></a> # ## 5.1 Training & Trade data split # * Training: 2009-01-01 to 2018-12-31 # * Trade: 2019-01-01 to 2020-09-30 # In[16]: #train = data_split(data_df, start = config.START_DATE, end = config.START_TRADE_DATE) #trade = data_split(data_df, start = config.START_TRADE_DATE, end = config.END_DATE) train = data_split(data_df, start = '2009-01-01', end = '2019-01-01') trade = data_split(data_df, start = '2019-01-01', end = '2021-01-01') # In[17]: ## data normalization, this part is optional, have little impact #feaures_list = list(train.columns) #feaures_list.remove('date') #feaures_list.remove('tic') #feaures_list.remove('close') #print(feaures_list) #from sklearn import preprocessing #data_normaliser = preprocessing.StandardScaler() #train[feaures_list] = data_normaliser.fit_transform(train[feaures_list])
def train_one(fetch=False): """ train an agent """ if fetch: df = fetch_and_store() else: df = load() counts = df[['date', 'tic']].groupby(['date']).count().tic assert counts.min() == counts.max() print("==============Start Feature Engineering===========") fe = FeatureEngineer( use_technical_indicator=True, tech_indicator_list=config.TECHNICAL_INDICATORS_LIST, use_turbulence=True, # use_turbulence=False, user_defined_feature=False, ) processed = fe.preprocess_data(df) # Training & Trading data split start_date, trade_date, end_date = calculate_split(df, start=config.START_DATE) print(start_date, trade_date, end_date) train = data_split(processed, start_date, trade_date) trade = data_split(processed, trade_date, end_date) print( f'\n******\nRunning from {start_date} to {end_date} for:\n{", ".join(config.CRYPTO_TICKER)}\n******\n' ) # calculate state action space stock_dimension = len(train.tic.unique()) state_space = (1 + (2 * stock_dimension) + (len(config.TECHNICAL_INDICATORS_LIST) * stock_dimension)) env_kwargs = { "hmax": 100, "initial_amount": 100000, "buy_cost_pct": 0.0026, "sell_cost_pct": 0.0026, "state_space": state_space, "stock_dim": stock_dimension, "tech_indicator_list": config.TECHNICAL_INDICATORS_LIST, "action_space": stock_dimension, "reward_scaling": 1e-4 } e_train_gym = StockTradingEnv(df=train, **env_kwargs) e_trade_gym = StockTradingEnv(df=trade, turbulence_threshold=250, make_plots=True, **env_kwargs) env_train, _ = e_train_gym.get_sb_env() env_trade, obs_trade = e_trade_gym.get_sb_env() agent = DRLAgent(env=env_train) print("==============Model Training===========") now = datetime.datetime.now().strftime(config.DATETIME_FMT) model_sac = agent.get_model("sac") trained_sac = agent.train_model( model=model_sac, tb_log_name="sac", # total_timesteps=100 total_timesteps=80000) print("==============Start Trading===========") df_account_value, df_actions = DRLAgent.DRL_prediction( # model=trained_sac, test_data=trade, test_env=env_trade, test_obs=obs_trade trained_sac, e_trade_gym) df_account_value.to_csv( f"./{config.RESULTS_DIR}/df_account_value_{now}.csv") df_actions.to_csv(f"./{config.RESULTS_DIR}/df_actions_{now}.csv") df_txns = pd.DataFrame(e_trade_gym.transactions, columns=['date', 'amount', 'price', 'symbol']) df_txns = df_txns.set_index(pd.DatetimeIndex(df_txns['date'], tz=pytz.utc)) df_txns.to_csv(f'./{config.RESULTS_DIR}/df_txns_{now}.csv') df_positions = pd.DataFrame(e_trade_gym.positions, columns=['date', 'cash'] + config.CRYPTO_TICKER) df_positions = df_positions.set_index( pd.DatetimeIndex(df_positions['date'], tz=pytz.utc)).drop(columns=['date']) df_positions['cash'] = df_positions.astype( {col: np.float64 for col in df_positions.columns}) df_positions.to_csv(f'./{config.RESULTS_DIR}/df_positions_{now}.csv') print("==============Get Backtest Results===========") perf_stats_all = backtest_stats(df_account_value, transactions=df_txns, positions=df_positions) perf_stats_all = pd.DataFrame(perf_stats_all) perf_stats_all.to_csv(f"./{config.RESULTS_DIR}/perf_stats_all_{now}.csv") backtest_plot(df_account_value, baseline_start=trade_date, baseline_end=end_date, positions=df_positions, transactions=df_txns)
def generate_data(rollouts, data_dir, noise_type): # pylint: disable=R0914 """ Generates data """ assert exists(data_dir), "The data directory does not exist..." df = YahooDownloader(start_date = '2009-01-01', end_date = '2021-01-01', ticker_list = ['AAPL']).fetch_data() df.sort_values(['date','tic'],ignore_index=True) fe = FeatureEngineer( use_technical_indicator=True, tech_indicator_list = config.TECHNICAL_INDICATORS_LIST, use_turbulence=True, user_defined_feature = False) processed = fe.preprocess_data(df) list_ticker = processed["tic"].unique().tolist() list_date = list(pd.date_range(processed['date'].min(),processed['date'].max()).astype(str)) combination = list(itertools.product(list_date,list_ticker)) processed_full = pd.DataFrame(combination,columns=["date","tic"]).merge(processed,on=["date","tic"],how="left") processed_full = processed_full[processed_full['date'].isin(processed['date'])] processed_full = processed_full.sort_values(['date','tic']) processed_full = processed_full.fillna(0) processed_full.sort_values(['date','tic'],ignore_index=True) train = data_split(processed_full, '2009-01-01','2019-01-01') trade = data_split(processed_full, '2019-01-01','2021-01-01') stock_dimension = len(train.tic.unique()) state_space = 1 + 2*stock_dimension + len(config.TECHNICAL_INDICATORS_LIST)*stock_dimension env_kwargs = { "hmax": 100, "initial_amount": 1000000, # "buy_cost_pct": 0.001i, # "sell_cost_pct": 0.001, "transaction_cost_pct": 0.001, "state_space": state_space, "stock_dim": stock_dimension, "tech_indicator_list": config.TECHNICAL_INDICATORS_LIST, "action_space": stock_dimension, "reward_scaling": 1e-4 } e_train_gym = StockTradingEnv(df = train, **env_kwargs) env_train, _ = e_train_gym.get_sb_env() env = env_train # env = gym.make("CarRacing-v0") seq_len = 10000 for i in range(rollouts): env.reset() # env.env.viewer.window.dispatch_events() if noise_type == 'white': a_rollout = [env.action_space.sample() for _ in range(seq_len)] elif noise_type == 'brown': a_rollout = sample_continuous_policy(env.action_space, seq_len, 1. / 50) s_rollout = [] r_rollout = [] d_rollout = [] t = 0 while True: action = a_rollout[t] t += 1 s, r, done, _ = env.step(action) # env.env.viewer.window.dispatch_events() s_rollout += [s] r_rollout += [r] d_rollout += [done] if done: print("> End of rollout {}, {} frames...".format(i, len(s_rollout))) np.savez(join(data_dir, 'rollout_{}'.format(i)), observations=np.array(s_rollout), rewards=np.array(r_rollout), actions=np.array(a_rollout), terminals=np.array(d_rollout)) break
def train_one(): """ train an agent """ print("==============Start Fetching Data===========") df = YahooDownloader( start_date=config.START_DATE, end_date=config.END_DATE, ticker_list=config.DOW_30_TICKER, ).fetch_data() print("==============Start Feature Engineering===========") fe = FeatureEngineer( use_technical_indicator=True, tech_indicator_list=config.TECHNICAL_INDICATORS_LIST, use_turbulence=True, user_defined_feature=False, ) processed = fe.preprocess_data(df) list_ticker = processed["tic"].unique().tolist() list_date = list( pd.date_range(processed['date'].min(), processed['date'].max()).astype(str)) combination = list(itertools.product(list_date, list_ticker)) processed_full = pd.DataFrame(combination, columns=["date", "tic"]).merge(processed, on=["date", "tic"], how="left") processed_full = processed_full[processed_full['date'].isin( processed['date'])] processed_full = processed_full.sort_values(['date', 'tic']) processed_full = processed_full.fillna(0) # Training & Trading data split train = data_split(processed_full, config.START_DATE, config.START_TRADE_DATE) trade = data_split(processed_full, config.START_TRADE_DATE, config.END_DATE) # calculate state action space stock_dimension = len(train.tic.unique()) state_space = (1 + 2 * stock_dimension + len(config.TECHNICAL_INDICATORS_LIST) * stock_dimension) env_kwargs = { "hmax": 100, "initial_amount": 1000000, "buy_cost_pct": 0.001, "sell_cost_pct": 0.001, "state_space": state_space, "stock_dim": stock_dimension, "tech_indicator_list": config.TECHNICAL_INDICATORS_LIST, "action_space": stock_dimension, "reward_scaling": 1e-4 } e_train_gym = StockTradingEnv(df=train, **env_kwargs) env_train, _ = e_train_gym.get_sb_env() agent = DRLAgent(env=env_train) print("==============Model Training===========") now = datetime.datetime.now().strftime("%Y%m%d-%Hh%M") model_sac = agent.get_model("sac") trained_sac = agent.train_model(model=model_sac, tb_log_name="sac", total_timesteps=80000) print("==============Start Trading===========") e_trade_gym = StockTradingEnv(df=trade, turbulence_threshold=250, **env_kwargs) df_account_value, df_actions = DRLAgent.DRL_prediction( model=trained_sac, environment=e_trade_gym) df_account_value.to_csv("./" + config.RESULTS_DIR + "/df_account_value_" + now + ".csv") df_actions.to_csv("./" + config.RESULTS_DIR + "/df_actions_" + now + ".csv") print("==============Get Backtest Results===========") perf_stats_all = backtest_stats(df_account_value) perf_stats_all = pd.DataFrame(perf_stats_all) perf_stats_all.to_csv("./" + config.RESULTS_DIR + "/perf_stats_all_" + now + ".csv")
def load_and_save(): stocks_tradable = configs["stocks_tradable"] dataset = dict() for root, dirs, files in os.walk("./data/sentiments", topdown=False): for name in files: if name.split("_")[0] in stocks_tradable: dataset[name.split("_")[0]] = pd.read_csv(os.path.join(root, name), index_col=0).reset_index(drop=True) dataset[name.split("_")[0]]["date"] = pd.to_datetime(dataset[name.split("_")[0]]["date"], format="%Y-%m-%d") df = get_stock_data( i configs["train"]["start_date"], configs["test"]["end_date"], configs["stocks_tradable"] ) df = add_sentiments(configs["sentiments"]["days"], dataset, df) train = data_split(df, cofings["train"]["start_date"], configs["train"]["end_date"]) validation = data_split(df, cofings["validation"]["start_date"], configs["validation"]["end_date"]) train_for_test = data_split(df, configs["train"]["start_date"], configs["validation"]["end_date"]) test = data_split(df, configs["test"]["start_date"], configs["test"]["end_date"]) testing_days = pd.Series(test.date.unique()) train.to_csv("./data/train_data.csv", index=False) validation.to_csv("./data/validation_data.csv", index=False) train_for_test.to_csv("./data/train_for_test.csv", index=False) test.to_csv("./data/test_data.csv", index=False) testing_days.to_csv("./data/testing_days.csv", index=False) print("train, validation, train_for_test, test files saved") def train(): train = pd.read_csv("./data/train_data.csv") validation = pd.read_csv("./data/validation_data.csv") features = [ ["open", "high", "low", "close", "volume"], ["open", "high", "low", "close", "volume"] + ["sentiment_mean", "sentiment_std"], config.TECHNICAL_INDICATORS, config.TECHNICAL_INDICATORS + ["sentiment_mean", "sentiment_std"] ] model_names = [ "OHLCV", "OHLCV_sentiments", "MACD", "MACD_sentiments" ] batch_sizes = [32, 64, 128] learning_rates = [0.0001, 0.001, 0.005, 0.01] repetition = 3 for model_name, feature_set in zip(features, model_names): for rep in range(repetition): perf_results = dict() for batch_size in batch_sizes: for lr in learning_rates: ctime = time.time() perf_stats_all, _ = train_configuration( f"{model_name}_{rep}", train, validation, feature_set, batch_sizes, learning_rates, 42 ) perf_results[f"result_{batch_size}_{lr}"] = perf_stats_all.to_json() open(f"{model_name}_{rep}.json","w").write(json.dumps(perf_results)) print(f"Results saved to {save_fname}.json") print(f"Time taken {(time.time() - ctime)/60}") def test(): daily_risk_free_rates = fill_missing_daily_rf_rates( date_to_daily_risk_free_rate, [datetime.datetime.strptime(date, "%Y-%m-%d") for date in dates["0"]] ) train = pd.read_csv("./data/train_for_test.csv") test = pd.read_csv("./data/test_data.csv") if __name__ == "__main__":
def train_one(): """ train an agent """ print("==============Start Fetching Data===========") df = YahooDownloader( start_date=config.START_DATE, end_date=config.END_DATE, ticker_list=['FXAIX'], ).fetch_data() print("==============Start Feature Engineering===========") fe = FeatureEngineer( use_technical_indicator=True, tech_indicator_list=config.TECHNICAL_INDICATORS_LIST, use_turbulence=True, user_defined_feature=False, ) processed = fe.preprocess_data(df) # Training & Trading data split train = data_split(processed, config.START_DATE, config.START_TRADE_DATE) trade = data_split(processed, config.START_TRADE_DATE, config.END_DATE) # calculate state action space stock_dimension = len(train.tic.unique()) state_space = (1 + 2 * stock_dimension + len(config.TECHNICAL_INDICATORS_LIST) * stock_dimension) env_kwargs = { "hmax": 100, "initial_amount": 1000000, "buy_cost_pct": 0.001, "sell_cost_pct": 0.001, "state_space": state_space, "stock_dim": stock_dimension, "tech_indicator_list": config.TECHNICAL_INDICATORS_LIST, "action_space": stock_dimension, "reward_scaling": 1e-4 } e_train_gym = StockTradingEnv(df=train, **env_kwargs) e_trade_gym = StockTradingEnv(df=trade, turbulence_threshold=250, **env_kwargs) env_train, _ = e_train_gym.get_sb_env() env_trade, obs_trade = e_trade_gym.get_sb_env() agent = DRLAgent(env=env_train) print("==============Model Training===========") now = datetime.datetime.now().strftime("%Y%m%d-%Hh%M") user_input = input('train model? 1 train 0 don\'t train') if user_input == 1: model_sac = agent.get_model("sac") trained_sac = agent.train_model(model=model_sac, tb_log_name="sac", total_timesteps=8000) trained_sac.save("../models/sac_8k" + df.tic[0] + "_frl") else: trained_sac = SAC.load('../models/sac_80k_msft_working') print("==============Start Trading===========") df_account_value, df_actions = DRLAgent.DRL_prediction( trained_sac, e_trade_gym) df_account_value.to_csv("../" + config.RESULTS_DIR + "/SAC_df_account_value_" + df.tic[0] + "_" + now + ".csv") df_actions.to_csv("../" + config.RESULTS_DIR + "/SAC_df_actions_" + df.tic[0] + "_" + now + ".csv") # print("==============Get Backtest Results===========") perf_stats_all = backtest_stats(df_account_value) perf_stats_all = pd.DataFrame(perf_stats_all) perf_stats_all.to_csv("../" + config.RESULTS_DIR + "/SAC_perf_stats_all_" + df.tic[0] + "_" + now + ".csv") #plot acc value actions = df_actions['actions'] x = np.arange(0, df_account_value['account_value'].shape[0]) y = df_account_value['account_value'] points = np.array([x, y]).T.reshape(-1, 1, 2) segments = np.concatenate([points[:-1], points[1:]], axis=1) fig, axs = plt.subplots(2, 1, sharex=True, sharey=False) # plt.plot(x, y) # Use a boundary norm instead cmap = ListedColormap(['r', 'g', 'b']) norm = BoundaryNorm([-100, -0.1, 0.1, 100], cmap.N) lc = LineCollection(segments, cmap=cmap, norm=norm) lc.set_array(actions) lc.set_linewidth(2) line = axs[0].add_collection(lc) # fig.colorbar(line, ax=axs) axs[1].set_xlabel('Trading Day (' + 'From ' + config.START_TRADE_DATE + " to " + config.END_DATE + ')') axs[0].set_ylabel('Account Value (10000 of USD)') axs[0].set_title("Trading Test on " + df.tic[0]) axs[0].set_xlim(x.min(), x.max()) axs[0].set_ylim(y.min(), y.max()) custom_lines = [ Line2D([0], [0], color=cmap(0.), lw=4), Line2D([0], [0], color=cmap(.5), lw=4), Line2D([0], [0], color=cmap(1.), lw=4) ] # lines = ax.plot(data) axs[0].legend(custom_lines, ['Sell', 'Hold', 'Buy']) #plot stock value tx = np.arange(0, df_account_value['account_value'].shape[0]) ty = trade['close'] plt.ylabel('Price (USD)') plt.title(df.tic[0] + " Closing Price") plt.plot(tx, ty) plt.savefig("../" + config.RESULTS_DIR + "/plots/" "SAC_plot_" + df.tic[0] + "_" + now + ".png")
def run_ensemble_strategy(self,A2C_model_kwargs,PPO_model_kwargs,DDPG_model_kwargs,timesteps_dict): """Ensemble Strategy that combines PPO, A2C and DDPG""" print("============Start Ensemble Strategy============") # for ensemble model, it's necessary to feed the last state # of the previous model to the current model as the initial state last_state_ensemble = [] ppo_sharpe_list = [] ddpg_sharpe_list = [] a2c_sharpe_list = [] model_use = [] validation_start_date_list = [] validation_end_date_list = [] iteration_list = [] insample_turbulence = self.df[(self.df.date<self.train_period[1]) & (self.df.date>=self.train_period[0])] insample_turbulence_threshold = np.quantile(insample_turbulence.turbulence.values, .90) start = time.time() for i in range(self.rebalance_window + self.validation_window, len(self.unique_trade_date), self.rebalance_window): validation_start_date = self.unique_trade_date[i - self.rebalance_window - self.validation_window] validation_end_date = self.unique_trade_date[i - self.rebalance_window] validation_start_date_list.append(validation_start_date) validation_end_date_list.append(validation_end_date) iteration_list.append(i) print("============================================") ## initial state is empty if i - self.rebalance_window - self.validation_window == 0: # inital state initial = True else: # previous state initial = False # Tuning trubulence index based on historical data # Turbulence lookback window is one quarter (63 days) end_date_index = self.df.index[self.df["date"] == self.unique_trade_date[i - self.rebalance_window - self.validation_window]].to_list()[-1] start_date_index = end_date_index - 63 + 1 historical_turbulence = self.df.iloc[start_date_index:(end_date_index + 1), :] historical_turbulence = historical_turbulence.drop_duplicates(subset=['date']) historical_turbulence_mean = np.mean(historical_turbulence.turbulence.values) #print(historical_turbulence_mean) if historical_turbulence_mean > insample_turbulence_threshold: # if the mean of the historical data is greater than the 90% quantile of insample turbulence data # then we assume that the current market is volatile, # therefore we set the 90% quantile of insample turbulence data as the turbulence threshold # meaning the current turbulence can't exceed the 90% quantile of insample turbulence data turbulence_threshold = insample_turbulence_threshold else: # if the mean of the historical data is less than the 90% quantile of insample turbulence data # then we tune up the turbulence_threshold, meaning we lower the risk turbulence_threshold = np.quantile(insample_turbulence.turbulence.values, 1) turbulence_threshold = np.quantile(insample_turbulence.turbulence.values, 0.99) print("turbulence_threshold: ", turbulence_threshold) ############## Environment Setup starts ############## ## training env train = data_split(self.df, start=self.train_period[0], end=self.unique_trade_date[i - self.rebalance_window - self.validation_window]) self.train_env = DummyVecEnv([lambda: StockTradingEnv(train, self.stock_dim, self.hmax, self.initial_amount, self.buy_cost_pct, self.sell_cost_pct, self.reward_scaling, self.state_space, self.action_space, self.tech_indicator_list, print_verbosity=self.print_verbosity)]) validation = data_split(self.df, start=self.unique_trade_date[i - self.rebalance_window - self.validation_window], end=self.unique_trade_date[i - self.rebalance_window]) ############## Environment Setup ends ############## ############## Training and Validation starts ############## print("======Model training from: ", self.train_period[0], "to ", self.unique_trade_date[i - self.rebalance_window - self.validation_window]) # print("training: ",len(data_split(df, start=20090000, end=test.datadate.unique()[i-rebalance_window]) )) # print("==============Model Training===========") print("======A2C Training========") model_a2c = self.get_model("a2c",self.train_env,policy="MlpPolicy",model_kwargs=A2C_model_kwargs) model_a2c = self.train_model(model_a2c, "a2c", tb_log_name="a2c_{}".format(i), iter_num = i, total_timesteps=timesteps_dict['a2c']) #100_000 print("======A2C Validation from: ", validation_start_date, "to ",validation_end_date) val_env_a2c = DummyVecEnv([lambda: StockTradingEnv(validation, self.stock_dim, self.hmax, self.initial_amount, self.buy_cost_pct, self.sell_cost_pct, self.reward_scaling, self.state_space, self.action_space, self.tech_indicator_list, turbulence_threshold=turbulence_threshold, iteration=i, model_name='A2C', mode='validation', print_verbosity=self.print_verbosity)]) val_obs_a2c = val_env_a2c.reset() self.DRL_validation(model=model_a2c,test_data=validation,test_env=val_env_a2c,test_obs=val_obs_a2c) sharpe_a2c = self.get_validation_sharpe(i,model_name="A2C") print("A2C Sharpe Ratio: ", sharpe_a2c) print("======PPO Training========") model_ppo = self.get_model("ppo",self.train_env,policy="MlpPolicy",model_kwargs=PPO_model_kwargs) model_ppo = self.train_model(model_ppo, "ppo", tb_log_name="ppo_{}".format(i), iter_num = i, total_timesteps=timesteps_dict['ppo']) #100_000 print("======PPO Validation from: ", validation_start_date, "to ",validation_end_date) val_env_ppo = DummyVecEnv([lambda: StockTradingEnv(validation, self.stock_dim, self.hmax, self.initial_amount, self.buy_cost_pct, self.sell_cost_pct, self.reward_scaling, self.state_space, self.action_space, self.tech_indicator_list, turbulence_threshold=turbulence_threshold, iteration=i, model_name='PPO', mode='validation', print_verbosity=self.print_verbosity)]) val_obs_ppo = val_env_ppo.reset() self.DRL_validation(model=model_ppo,test_data=validation,test_env=val_env_ppo,test_obs=val_obs_ppo) sharpe_ppo = self.get_validation_sharpe(i,model_name="PPO") print("PPO Sharpe Ratio: ", sharpe_ppo) print("======DDPG Training========") model_ddpg = self.get_model("ddpg",self.train_env,policy="MlpPolicy",model_kwargs=DDPG_model_kwargs) model_ddpg = self.train_model(model_ddpg, "ddpg", tb_log_name="ddpg_{}".format(i), iter_num = i, total_timesteps=timesteps_dict['ddpg']) #50_000 print("======DDPG Validation from: ", validation_start_date, "to ",validation_end_date) val_env_ddpg = DummyVecEnv([lambda: StockTradingEnv(validation, self.stock_dim, self.hmax, self.initial_amount, self.buy_cost_pct, self.sell_cost_pct, self.reward_scaling, self.state_space, self.action_space, self.tech_indicator_list, turbulence_threshold=turbulence_threshold, iteration=i, model_name='DDPG', mode='validation', print_verbosity=self.print_verbosity)]) val_obs_ddpg = val_env_ddpg.reset() self.DRL_validation(model=model_ddpg,test_data=validation,test_env=val_env_ddpg,test_obs=val_obs_ddpg) sharpe_ddpg = self.get_validation_sharpe(i,model_name="DDPG") ppo_sharpe_list.append(sharpe_ppo) a2c_sharpe_list.append(sharpe_a2c) ddpg_sharpe_list.append(sharpe_ddpg) print("======Best Model Retraining from: ", self.train_period[0], "to ", self.unique_trade_date[i - self.rebalance_window]) # Environment setup for model retraining up to first trade date #train_full = data_split(self.df, start=self.train_period[0], end=self.unique_trade_date[i - self.rebalance_window]) #self.train_full_env = DummyVecEnv([lambda: StockTradingEnv(train_full, # self.stock_dim, # self.hmax, # self.initial_amount, # self.buy_cost_pct, # self.sell_cost_pct, # self.reward_scaling, # self.state_space, # self.action_space, # self.tech_indicator_list, # print_verbosity=self.print_verbosity)]) # Model Selection based on sharpe ratio if (sharpe_ppo >= sharpe_a2c) & (sharpe_ppo >= sharpe_ddpg): model_use.append('PPO') model_ensemble = model_ppo #model_ensemble = self.get_model("ppo",self.train_full_env,policy="MlpPolicy",model_kwargs=PPO_model_kwargs) #model_ensemble = self.train_model(model_ensemble, "ensemble", tb_log_name="ensemble_{}".format(i), iter_num = i, total_timesteps=timesteps_dict['ppo']) #100_000 elif (sharpe_a2c > sharpe_ppo) & (sharpe_a2c > sharpe_ddpg): model_use.append('A2C') model_ensemble = model_a2c #model_ensemble = self.get_model("a2c",self.train_full_env,policy="MlpPolicy",model_kwargs=A2C_model_kwargs) #model_ensemble = self.train_model(model_ensemble, "ensemble", tb_log_name="ensemble_{}".format(i), iter_num = i, total_timesteps=timesteps_dict['a2c']) #100_000 else: model_use.append('DDPG') model_ensemble = model_ddpg #model_ensemble = self.get_model("ddpg",self.train_full_env,policy="MlpPolicy",model_kwargs=DDPG_model_kwargs) #model_ensemble = self.train_model(model_ensemble, "ensemble", tb_log_name="ensemble_{}".format(i), iter_num = i, total_timesteps=timesteps_dict['ddpg']) #50_000 ############## Training and Validation ends ############## ############## Trading starts ############## print("======Trading from: ", self.unique_trade_date[i - self.rebalance_window], "to ", self.unique_trade_date[i]) #print("Used Model: ", model_ensemble) last_state_ensemble = self.DRL_prediction(model=model_ensemble, name="ensemble", last_state=last_state_ensemble, iter_num=i, turbulence_threshold = turbulence_threshold, initial=initial) ############## Trading ends ############## end = time.time() print("Ensemble Strategy took: ", (end - start) / 60, " minutes") df_summary = pd.DataFrame([iteration_list,validation_start_date_list,validation_end_date_list,model_use,a2c_sharpe_list,ppo_sharpe_list,ddpg_sharpe_list]).T df_summary.columns = ['Iter','Val Start','Val End','Model Used','A2C Sharpe','PPO Sharpe','DDPG Sharpe'] return df_summary
processed = fe.preprocess_data(df) list_ticker = processed["tic"].unique().tolist() list_date = list(pd.date_range(processed['date'].min(),processed['date'].max()).astype(str)) combination = list(itertools.product(list_date,list_ticker)) processed["date"] = processed["date"].astype(str) processed_full = pd.DataFrame(combination,columns=["date","tic"]).merge(processed,on=["date","tic"],how="left") processed_full = processed_full[processed_full['date'].isin(processed['date'])] processed_full = processed_full.sort_values(['date','tic']) processed_full = processed_full.fillna(0) processed_full.sort_values(['date','tic'],ignore_index=True).tail() train = data_split(processed_full, '2018-05-16','2020-05-16') trade = data_split(processed_full, '2020-05-17','2020-10-31') print(len(train)) print(len(trade)) train.head() trade.head() config["TECHNICAL_INDICATORS_LIST"] stock_dimension = len(train.tic.unique()) state_space = 1 + 2*stock_dimension + len(config["TECHNICAL_INDICATORS_LIST"])*stock_dimension print(f"Stock Dimension: {stock_dimension}, State Space: {state_space}") env_kwargs = {
def train_one(): """ train an agent """ print("==============Start Fetching Data===========") df = YahooDownloader( start_date=config.START_DATE, end_date=config.END_DATE, ticker_list=config.DOW_30_TICKER, ).fetch_data() print("==============Start Feature Engineering===========") fe = FeatureEngineer( use_technical_indicator=True, tech_indicator_list=config.TECHNICAL_INDICATORS_LIST, use_turbulence=True, user_defined_feature=False, ) processed = fe.preprocess_data(df) # Training & Trading data split train = data_split(processed, config.START_DATE, config.START_TRADE_DATE) trade = data_split(processed, config.START_TRADE_DATE, config.END_DATE) # calculate state action space stock_dimension = len(train.tic.unique()) state_space = (1 + 2 * stock_dimension + len(config.TECHNICAL_INDICATORS_LIST) * stock_dimension) env_kwargs = { "hmax": 100, "initial_amount": 1000000, "buy_cost_pct": 0.001, "sell_cost_pct": 0.001, "state_space": state_space, "stock_dim": stock_dimension, "tech_indicator_list": config.TECHNICAL_INDICATORS_LIST, "action_space": stock_dimension, "reward_scaling": 1e-4 } e_train_gym = StockTradingEnv(df=train, **env_kwargs) e_trade_gym = StockTradingEnv(df=trade, turbulence_threshold=250, **env_kwargs) env_train, _ = e_train_gym.get_sb_env() env_trade, obs_trade = e_trade_gym.get_sb_env() agent = DRLAgent(env=env_train) print("==============Model Training===========") now = datetime.datetime.now().strftime("%Y%m%d-%Hh%M") model_sac = agent.get_model("sac") trained_sac = agent.train_model(model=model_sac, tb_log_name="sac", total_timesteps=80000) print("==============Start Trading===========") df_account_value, df_actions = DRLAgent.DRL_prediction(model=trained_sac, test_data=trade, test_env=env_trade, test_obs=obs_trade) df_account_value.to_csv("./" + config.RESULTS_DIR + "/df_account_value_" + now + ".csv") df_actions.to_csv("./" + config.RESULTS_DIR + "/df_actions_" + now + ".csv") print("==============Get Backtest Results===========") perf_stats_all = BackTestStats(df_account_value) perf_stats_all = pd.DataFrame(perf_stats_all) perf_stats_all.to_csv("./" + config.RESULTS_DIR + "/perf_stats_all_" + now + ".csv")
def main(): parser = build_parser() options = parser.parse_args() # Basic setup #Disable warnings warnings.filterwarnings('ignore') # Load the saved data in a pandas DataFrame: data_frame = pd.read_csv("./" + config.DATA_SAVE_DIR + "/" + options.name + ".csv") print("Data Frame shape is: ", data_frame.shape) print("Data Frame format is following: \n\n", data_frame.head()) ## we store the stockstats technical indicator column names in config.py tech_indicator_list = config.TECHNICAL_INDICATORS_LIST print("Technical Indicators that are going to be calculated: ", tech_indicator_list) feature_engineering = FeatureEngineer( use_technical_indicator=True, tech_indicator_list=tech_indicator_list, use_turbulence=True, user_defined_feature=False) processed = feature_engineering.preprocess_data(data_frame) print(processed.sort_values(['date', 'tic'], ignore_index=True).head(10)) training_set = data_split(processed, config.START_DATE, config.START_TRADE_DATE) testing_set = data_split(processed, config.START_TRADE_DATE, config.END_DATE) print("Size of training set: ", len(training_set)) print("Size of testing set: ", len(testing_set)) print("Training set format:\n\n", training_set.head()) print("Testing set format: \n\n", testing_set.head()) stock_dimension = len(training_set.tic.unique()) state_space = 1 + 2 * stock_dimension + len( tech_indicator_list) * stock_dimension print(f"Stock Dimension: {stock_dimension}, State Space: {state_space}") ## ## Save data to file, both training and trading ## if os.path.exists("./" + config.DATA_SAVE_DIR + "/training.txt"): os.remove("./" + config.DATA_SAVE_DIR + "/training.txt") print("The training data file deleted") else: print("The training data file does not exist") if os.path.exists("./" + config.DATA_SAVE_DIR + "/testing.txt"): os.remove("./" + config.DATA_SAVE_DIR + "/testing.txt") print("The testing data file deleted") else: print("The testing data file does not exist") path_training = "./" + config.DATA_SAVE_DIR + "/training.txt" path_testing = "./" + config.DATA_SAVE_DIR + "/testing.txt" with open(path_training, "wb") as f: pickle.dump(training_set, f, pickle.HIGHEST_PROTOCOL) with open(path_testing, "wb") as f: pickle.dump(testing_set, f, pickle.HIGHEST_PROTOCOL) print( "Successfuly completed the task of creation of test and training data files." )
def objective(trial: optuna.Trial): """ test """ df = pd.read_csv(os.path.abspath('./me/datasets/data_with_techs_turb.csv'), index_col=0) df.sort_values(['date', 'tic'], ignore_index=True) train_data = data_split(df, '2009-01-01', '2016-01-01') trade_data = data_split(df, '2019-01-01', '2020-12-01') # params stock_dimension = len(train_data.tic.unique()) state_space = 1 + 2 * stock_dimension + len( config.TECHNICAL_INDICATORS_LIST) * stock_dimension hmax = 100 starting_capital = 1000000 transaction_cost_pct = 0.001 reward_scaling = 1e-4 technical_indicator_list = config.TECHNICAL_INDICATORS_LIST # test eval_env = gym.make(ENV_ID, df=train_data, stock_dim=stock_dimension, hmax=hmax, initial_amount=starting_capital, transaction_cost_pct=transaction_cost_pct, reward_scaling=reward_scaling, state_space=state_space, action_space=stock_dimension, tech_indicator_list=technical_indicator_list, turbulence_threshold=250, day=0) #kwargs = DEFAULT_HYPERPARAMS.copy() # Sample hyperparameters kwargs = sample_a2c_params(trial) # Create the RL model model = A2C('MlpPolicy', eval_env, **kwargs) # Create env used for evaluation # Create the callback that will periodically evaluate # and report the performance eval_callback = TrialEvalCallback(eval_env, trial, n_eval_episodes=N_EVAL_EPISODES, eval_freq=EVAL_FREQ, deterministic=True) nan_encountered = False try: model.learn(N_TIMESTEPS, callback=eval_callback) except AssertionError as e: # Sometimes, random hyperparams can generate NaN print(e) nan_encountered = True finally: # Free memory model.env.close() eval_env.close() # Tell the optimizer that the trial failed if nan_encountered: return float("nan") if eval_callback.is_pruned: raise optuna.exceptions.TrialPruned() return eval_callback.last_mean_reward
import os import pandas as pd from finrl.preprocessing.data import data_split from finrl.env.env_stocktrading import StockTradingEnv from finrl.model.models import DRLAgent from finrl.trade.backtest import backtest_stats os.environ["CUDA_VISIBLE_DEVICES"] = "2" stock_id = "600570" stock_handle_dir = "dataset/stock/stock_handle/" df = pd.read_csv(stock_handle_dir + stock_id + ".csv") df["tic"] = stock_id # print(df) train = data_split(df, "2004-02-13", "2018-09-11") trade = data_split(df, "2018-09-11", "2021-05-25") technical_indications_list = [ "open", "close", "high", "low", "volume", "ma5", "ma10", "ema12", "ema26", "MACD", "DEA" ] stock_dimension = len(train.tic.unique()) state_space = 1 + 2 * stock_dimension + len( technical_indications_list) * stock_dimension env_kwargs = { "hmax": 100, "initial_amount": 1000000, "buy_cost_pct": 0.001, "sell_cost_pct": 0.001,