def setup_model(initial_data, model_type='a2c', load_path=''): indicator_list = config.TECHNICAL_INDICATORS_LIST + ['sentiment'] stock_dimension = len(initial_data.tic.unique()) state_space = 1 + 2 * stock_dimension + len( indicator_list) * stock_dimension env_kwargs = { "hmax": 100, "initial_amount": 1000000, "buy_cost_pct": 0.001, "sell_cost_pct": 0.001, "state_space": state_space, "stock_dim": stock_dimension, "tech_indicator_list": indicator_list, "action_space": stock_dimension, "reward_scaling": 1e-4, "print_verbosity": 5 } cur_date = initial_data.date.unique()[-1] trade_data = initial_data[initial_data.date == cur_date] e_trade_gym = OnlineStockTradingEnv(trade_data, **env_kwargs) env_trade, _ = e_trade_gym.get_sb_env() trading_agent = DRLAgent(env=env_trade) model = trading_agent.get_model(model_type) if load_path: print("LOADING MODEL PARAMETERS") model = model.load(load_model_path) online_stock_pred = OnlineStockPrediction(e_trade_gym, model) print(online_stock_pred.predict()) return online_stock_pred
def train_one(): env_kwargs, processed = prepare_data() # Training & Trading data split train = data_split(processed, config.START_DATE, config.START_TRADE_DATE) trade = data_split(processed, config.START_TRADE_DATE, config.END_DATE) e_train_gym = StockTradingEnv(df=train, **env_kwargs) e_trade_gym = StockTradingEnv(df=trade, turbulence_threshold=250, **env_kwargs) env_train, _ = e_train_gym.get_sb_env() agent = DRLAgent(env=env_train) print("==============Model Training===========") model = agent.get_model(config.CURRENT_MODEL) model = agent.train_model(model=model, total_timesteps=80000) path = f"{config.TRAINED_MODEL_DIR}/model" model.save(path) model = model.load(path) print("==============Start Trading===========") df_account_value, df_actions = DRLAgent.DRL_prediction( model=model, environment=e_trade_gym) log_account(df_account_value, df_actions)
def new_test(): processed = pd.read_csv( os.path.abspath('./me/datasets/new_data_with_techs_turb.csv'), index_col=0) train = data_split(processed, '2009-01-01', '2018-01-01') trade = data_split(processed, '2018-01-01', '2021-01-01') stock_dimension = len(train.tic.unique()) state_space = 1 + 2 * stock_dimension + len( config.TECHNICAL_INDICATORS_LIST) * stock_dimension print(f"Stock Dimension: {stock_dimension}, State Space: {state_space}") env_kwargs = { "hmax": 100, "initial_amount": 1000000, "transaction_cost_pct": 0.001, "state_space": state_space, "stock_dim": stock_dimension, "tech_indicator_list": config.TECHNICAL_INDICATORS_LIST, "action_space": stock_dimension, "reward_scaling": 1e-4 } e_train_gym = StockTradingEnv(df=train, **env_kwargs) env_train, _ = e_train_gym.get_sb_env() log_dir = "me/tmp/" os.makedirs(log_dir, exist_ok=True) env_train.envs[0] = Monitor(env_train.envs[0], log_dir) agent = DRLAgent(env=env_train) model_a2c = agent.get_model("a2c", verbose=0) trained_a2c = agent.train_model(model=model_a2c, tb_log_name='a2c', total_timesteps=100000) data_turbulence = processed[(processed.date < '2018-01-01') & (processed.date >= '2009-01-01')] insample_turbulence = data_turbulence.drop_duplicates(subset=['date']) turbulence_threshold = np.quantile(insample_turbulence.turbulence.values, 1) e_trade_gym = StockTradingEnv(df=trade, turbulence_threshold=380, **env_kwargs) env_trade, obs_trade = e_trade_gym.get_sb_env() print("BEGIN PREDICTION") df_account_value, df_actions = DRLAgent.DRL_prediction(model=trained_a2c, test_data=trade, test_env=env_trade, test_obs=obs_trade) print(df_account_value) print("END PREDICTION")
def main(): start_date = '2020-01-01' trade_start_date = '2020-12-01' end_date = '2021-01-01' ticker_list = stock_tickers numerical_df = YahooDownloader(start_date=start_date, end_date=end_date, ticker_list=ticker_list).fetch_data() sentiment_df = generate_sentiment_scores(start_date, end_date) initial_data = get_initial_data(numerical_df, sentiment_df) train_data = data_split(initial_data, start_date, trade_start_date) trade_data = data_split(initial_data, trade_start_date, end_date) indicator_list = config.TECHNICAL_INDICATORS_LIST + ['sentiment'] stock_dimension = len(trade_data.tic.unique()) state_space = 1 + 2 * stock_dimension + len( indicator_list) * stock_dimension env_kwargs = { "hmax": 100, "initial_amount": 1000000, "buy_cost_pct": 0.001, "sell_cost_pct": 0.001, "state_space": state_space, "stock_dim": stock_dimension, "tech_indicator_list": indicator_list, "action_space": stock_dimension, "reward_scaling": 1e-4, "print_verbosity": 5 } e_train_gym = StockTradingEnv(df=train_data, **env_kwargs) env_train, _ = e_train_gym.get_sb_env() # print(train_data.index) # print(trade_data.index) # print(trade_data.loc[0]) e_trade_gym = OnlineStockTradingEnv(trade_data.loc[0], **env_kwargs) training_agent = DRLAgent(env=env_train) model_a2c = training_agent.get_model("a2c") # print(train_data.index) # print(trade_data.index) #trained_a2c = agent.train_model(model=model_a2c, tb_log_name='a2c',total_timesteps=10000) feature_engineer = FeatureEngineer() online_stock_pred = OnlineStockPrediction(e_trade_gym, model_a2c) for i in range(1, trade_data.index.unique().max()): print(trade_data.loc[i]) online_stock_pred.add_data(trade_data.loc[i]) action, states, next_obs, rewards = online_stock_pred.predict() print("Action:", action) print("States: ", states) print("Next observation: ", next_obs) print("Rewards: ", rewards)
def train_one(): """ train an agent """ print("==============Start Fetching Data===========") df = YahooDownloader(start_date=config.START_DATE, end_date=config.END_DATE, ticker_list=config.DOW_30_TICKER).fetch_data() print("==============Start Feature Engineering===========") df = FeatureEngineer(df, feature_number=5, use_technical_indicator=True, use_turbulence=True).preprocess_data() train = data_split(df, config.START_DATE, config.START_TRADE_DATE) trade = data_split(df, config.START_TRADE_DATE, config.END_DATE) env_setup = EnvSetup(stock_dim=len(train.tic.unique())) env_train = env_setup.create_env_training(data=train, env_class=StockEnvTrain) agent = DRLAgent(env=env_train) print("==============Model Training===========") now = datetime.datetime.now().strftime('%Y%m%d-%Hh%M') a2c_params_tuning = { 'n_steps': 5, 'ent_coef': 0.005, 'learning_rate': 0.0007, 'verbose': 0, 'timesteps': 100000 } model_a2c = agent.train_A2C(model_name="A2C_{}".format(now), model_params=a2c_params_tuning) print("==============Start Trading===========") env_trade, obs_trade = env_setup.create_env_trading( data=trade, env_class=StockEnvTrade, turbulence_threshold=250) df_account_value = DRLAgent.DRL_prediction(model=model_a2c, test_data=trade, test_env=env_trade, test_obs=obs_trade) df_account_value.to_csv("./" + config.RESULTS_DIR + "/" + now + '.csv') print("==============Get Backtest Results===========") perf_stats_all = BackTestStats(df_account_value) perf_stats_all = pd.DataFrame(perf_stats_all) perf_stats_all.to_csv("./" + config.RESULTS_DIR + "/perf_stats_all_" + now + '.csv')
def train_model(e_train_gym, tb_log_name, model_type="a2c", load_model_path='', save_model_path='', train_timesteps=80000): training_env, _ = e_train_gym.get_sb_env() training_agent = DRLAgent(training_env) model = training_agent.get_model(model_type) if load_model_path: print("LOADING MODEL PARAMETERS") model = model.load(load_model_path) print("=======TRAINING MODEL========") trained_model = training_agent.train_model(model, tb_log_name=tb_log_name, total_timesteps=train_timesteps) trained_model.save(save_model_path) return trained_model
def predict(): env_kwargs, processed = prepare_data() trade = data_split(processed, config.START_TRADE_DATE, config.END_DATE) e_trade_gym = StockTradingEnv(df=trade, **env_kwargs) path = config.TRAINED_MODEL_DIR + "/model" trained_sac = eval(config.CURRENT_MODEL.upper() + ".load(path)") df_account_value, df_actions = DRLAgent.DRL_prediction( model=trained_sac, environment=e_trade_gym) log_account(df_account_value, df_actions)
def get_trade_results(env_setup, model): # # --------------- Trading env_trade, obs_trade = env_setup.create_env_trading( data=trading_data, env_class=StockEnvTrade, turbulence_threshold=230) # # --------------- Predict df_account_value, df_actions = DRLAgent.DRL_prediction( model=model, test_data=trading_data, test_env=env_trade, test_obs=obs_trade) return df_account_value, df_actions
env_kwargs = { "hmax": 100, "initial_amount": 100000, "transaction_cost_pct": 0.001, "state_space": state_space, "stock_dim": stock_dimension, "tech_indicator_list": config.TECHNICAL_INDICATORS_LIST, "action_space": stock_dimension, "reward_scaling": 1e-4 } e_train_gym = StockTradingEnv(df=train, **env_kwargs) env_train, _ = e_train_gym.get_sb_env() agent = DRLAgent(env=env_train) model_wm = agent.get_model("wm") print('START TRAIN') trained_wm = agent.train_model(model=model_wm, tb_log_name='wm', total_timesteps=30000) trade = data_split(data_df, start='2019-01-01', end='2021-01-01') e_trade_gym = StockTradingEnv(df=trade, **env_kwargs) env_trade, obs_trade = e_trade_gym.get_sb_env() df_account_value, df_actions = DRLAgent.DRL_prediction(model=trained_wm, test_data=trade,
"initial_amount": 1000000, "buy_cost_pct": 0.001, "sell_cost_pct": 0.001, "state_space": state_space, "stock_dim": stock_dimension, "tech_indicator_list": config.TECHNICAL_INDICATORS_LIST, "action_space": stock_dimension, "reward_scaling": 1e-4 } '''Create Train Env''' e_train_gym = StockTradingEnv(df=train, **env_kwargs) env_train, _ = e_train_gym.get_sb_env() ############# BUILD AGENT ############# agent = DRLAgent(env=env_train) '''Set Agent Parameters''' A2C_params = { "n_steps": 2048, "ent_coef": 0.01, "learning_rate": 0.00025, } model_a2c = agent.get_model("a2c", model_kwargs=A2C_params) '''Train Agent''' trained_a2c = agent.train_model(model=model_a2c, tb_log_name='a2c', total_timesteps=1000) ############# BUILD TRADING ENV ############# '''Set Turbulence Threshold'''
def test(training_data, trading_data): # params stock_dimension = len(training_data.tic.unique()) state_space = 1 + 2 * stock_dimension + len( config.TECHNICAL_INDICATORS_LIST) * stock_dimension hmax = 100 starting_capital = 1000000 transaction_cost_pct = 0.001 reward_scaling = 1e-4 technical_indicator_list = config.TECHNICAL_INDICATORS_LIST env_setup = EnvSetup(stock_dim=stock_dimension, state_space=state_space, hmax=100, initial_amount=1000000, transaction_cost_pct=0.001) # pre-make training environment # env_train = StockEnvTrain(params) # transition to make # https://medium.com/@apoddar573/making-your-own-custom-environment-in-gym-c3b65ff8cdaa env_train = gym.make('multi-stock-train-v0', df=training_data, stock_dim=stock_dimension, hmax=hmax, initial_amount=starting_capital, transaction_cost_pct=transaction_cost_pct, reward_scaling=reward_scaling, state_space=state_space, action_space=stock_dimension, tech_indicator_list=technical_indicator_list, turbulence_threshold=250, day=0) # --------------- Training log_dir = "me/tmp/" os.makedirs(log_dir, exist_ok=True) env_train = Monitor(env_train, log_dir) callback = SaveOnBestTrainingRewardCallback(check_freq=5000, log_dir=log_dir) agent = DRLAgent(env=env_train) print("==============Model Training===========") now = datetime.datetime.now().strftime('%Y%m%d-%Hh%M') # a2c_params_tuning = {'n_steps': 512, # 'ent_coef': 0.005, # 'learning_rate': 0.0002, # 'verbose': 0, # 'timesteps': 150000} a2c_params_tuning = { "n_steps": 32, "gamma": 0.999304473794672, "gae_lambda": 0.994452346235796, "learning_rate": 0.00010054610987642753, "ent_coef": 0.00215496380633495, "max_grad_norm": 2.217146296318495, 'verbose': 0, 'timesteps': 2e5, # 2e5 "policy_kwargs": { "net_arch": 'tiny', "activation_fn": 'tanh', "ortho_init": False, } } model_a2c = agent.train_A2C( model_name="A2C_full_train_tuned{}".format(now), model_params=a2c_params_tuning, save=True, callback=callback) print("============End Model Training=========") # model_a2c = A2C.load(os.path.abspath('./me/tmp/best_model.zip')) account_value, actions = get_trade_results(env_setup, model_a2c)
def train_one(): """ train an agent """ print("==============Start Fetching Data===========") df = YahooDownloader( start_date=config.START_DATE, end_date=config.END_DATE, ticker_list=['FXAIX'], ).fetch_data() print("==============Start Feature Engineering===========") fe = FeatureEngineer( use_technical_indicator=True, tech_indicator_list=config.TECHNICAL_INDICATORS_LIST, use_turbulence=True, user_defined_feature=False, ) processed = fe.preprocess_data(df) # Training & Trading data split train = data_split(processed, config.START_DATE, config.START_TRADE_DATE) trade = data_split(processed, config.START_TRADE_DATE, config.END_DATE) # calculate state action space stock_dimension = len(train.tic.unique()) state_space = (1 + 2 * stock_dimension + len(config.TECHNICAL_INDICATORS_LIST) * stock_dimension) env_kwargs = { "hmax": 100, "initial_amount": 1000000, "buy_cost_pct": 0.001, "sell_cost_pct": 0.001, "state_space": state_space, "stock_dim": stock_dimension, "tech_indicator_list": config.TECHNICAL_INDICATORS_LIST, "action_space": stock_dimension, "reward_scaling": 1e-4 } e_train_gym = StockTradingEnv(df=train, **env_kwargs) e_trade_gym = StockTradingEnv(df=trade, turbulence_threshold=250, **env_kwargs) env_train, _ = e_train_gym.get_sb_env() env_trade, obs_trade = e_trade_gym.get_sb_env() agent = DRLAgent(env=env_train) print("==============Model Training===========") now = datetime.datetime.now().strftime("%Y%m%d-%Hh%M") user_input = input('train model? 1 train 0 don\'t train') if user_input == 1: model_sac = agent.get_model("sac") trained_sac = agent.train_model(model=model_sac, tb_log_name="sac", total_timesteps=8000) trained_sac.save("../models/sac_8k" + df.tic[0] + "_frl") else: trained_sac = SAC.load('../models/sac_80k_msft_working') print("==============Start Trading===========") df_account_value, df_actions = DRLAgent.DRL_prediction( trained_sac, e_trade_gym) df_account_value.to_csv("../" + config.RESULTS_DIR + "/SAC_df_account_value_" + df.tic[0] + "_" + now + ".csv") df_actions.to_csv("../" + config.RESULTS_DIR + "/SAC_df_actions_" + df.tic[0] + "_" + now + ".csv") # print("==============Get Backtest Results===========") perf_stats_all = backtest_stats(df_account_value) perf_stats_all = pd.DataFrame(perf_stats_all) perf_stats_all.to_csv("../" + config.RESULTS_DIR + "/SAC_perf_stats_all_" + df.tic[0] + "_" + now + ".csv") #plot acc value actions = df_actions['actions'] x = np.arange(0, df_account_value['account_value'].shape[0]) y = df_account_value['account_value'] points = np.array([x, y]).T.reshape(-1, 1, 2) segments = np.concatenate([points[:-1], points[1:]], axis=1) fig, axs = plt.subplots(2, 1, sharex=True, sharey=False) # plt.plot(x, y) # Use a boundary norm instead cmap = ListedColormap(['r', 'g', 'b']) norm = BoundaryNorm([-100, -0.1, 0.1, 100], cmap.N) lc = LineCollection(segments, cmap=cmap, norm=norm) lc.set_array(actions) lc.set_linewidth(2) line = axs[0].add_collection(lc) # fig.colorbar(line, ax=axs) axs[1].set_xlabel('Trading Day (' + 'From ' + config.START_TRADE_DATE + " to " + config.END_DATE + ')') axs[0].set_ylabel('Account Value (10000 of USD)') axs[0].set_title("Trading Test on " + df.tic[0]) axs[0].set_xlim(x.min(), x.max()) axs[0].set_ylim(y.min(), y.max()) custom_lines = [ Line2D([0], [0], color=cmap(0.), lw=4), Line2D([0], [0], color=cmap(.5), lw=4), Line2D([0], [0], color=cmap(1.), lw=4) ] # lines = ax.plot(data) axs[0].legend(custom_lines, ['Sell', 'Hold', 'Buy']) #plot stock value tx = np.arange(0, df_account_value['account_value'].shape[0]) ty = trade['close'] plt.ylabel('Price (USD)') plt.title(df.tic[0] + " Closing Price") plt.plot(tx, ty) plt.savefig("../" + config.RESULTS_DIR + "/plots/" "SAC_plot_" + df.tic[0] + "_" + now + ".png")
def train_one(): """ train an agent """ print("==============Start Fetching Data===========") df = YahooDownloader( start_date=config.START_DATE, end_date=config.END_DATE, ticker_list=config.DOW_30_TICKER, ).fetch_data() print("==============Start Feature Engineering===========") fe = FeatureEngineer( use_technical_indicator=True, tech_indicator_list=config.TECHNICAL_INDICATORS_LIST, use_turbulence=True, user_defined_feature=False, ) processed = fe.preprocess_data(df) list_ticker = processed["tic"].unique().tolist() list_date = list( pd.date_range(processed['date'].min(), processed['date'].max()).astype(str)) combination = list(itertools.product(list_date, list_ticker)) processed_full = pd.DataFrame(combination, columns=["date", "tic"]).merge(processed, on=["date", "tic"], how="left") processed_full = processed_full[processed_full['date'].isin( processed['date'])] processed_full = processed_full.sort_values(['date', 'tic']) processed_full = processed_full.fillna(0) # Training & Trading data split train = data_split(processed_full, config.START_DATE, config.START_TRADE_DATE) trade = data_split(processed_full, config.START_TRADE_DATE, config.END_DATE) # calculate state action space stock_dimension = len(train.tic.unique()) state_space = (1 + 2 * stock_dimension + len(config.TECHNICAL_INDICATORS_LIST) * stock_dimension) env_kwargs = { "hmax": 100, "initial_amount": 1000000, "buy_cost_pct": 0.001, "sell_cost_pct": 0.001, "state_space": state_space, "stock_dim": stock_dimension, "tech_indicator_list": config.TECHNICAL_INDICATORS_LIST, "action_space": stock_dimension, "reward_scaling": 1e-4 } e_train_gym = StockTradingEnv(df=train, **env_kwargs) env_train, _ = e_train_gym.get_sb_env() agent = DRLAgent(env=env_train) print("==============Model Training===========") now = datetime.datetime.now().strftime("%Y%m%d-%Hh%M") model_sac = agent.get_model("sac") trained_sac = agent.train_model(model=model_sac, tb_log_name="sac", total_timesteps=80000) print("==============Start Trading===========") e_trade_gym = StockTradingEnv(df=trade, turbulence_threshold=250, **env_kwargs) df_account_value, df_actions = DRLAgent.DRL_prediction( model=trained_sac, environment=e_trade_gym) df_account_value.to_csv("./" + config.RESULTS_DIR + "/df_account_value_" + now + ".csv") df_actions.to_csv("./" + config.RESULTS_DIR + "/df_actions_" + now + ".csv") print("==============Get Backtest Results===========") perf_stats_all = backtest_stats(df_account_value) perf_stats_all = pd.DataFrame(perf_stats_all) perf_stats_all.to_csv("./" + config.RESULTS_DIR + "/perf_stats_all_" + now + ".csv")
def train_one(): """ train an agent """ print("==============Start Fetching Data===========") df = YahooDownloader(start_date=config.START_DATE, end_date=config.END_DATE, ticker_list=config.SP_500_TICKER).fetch_data() print("==============Start Feature Engineering===========") df = FeatureEngineer(df, use_technical_indicator=True, use_turbulence=True).preprocess_data() # Training & Trade data split train = data_split(df, config.START_DATE, config.START_TRADE_DATE) trade = data_split(df, config.START_TRADE_DATE, config.END_DATE) # data normalization #feaures_list = list(train.columns) #feaures_list.remove('date') #feaures_list.remove('tic') #feaures_list.remove('close') #print(feaures_list) #data_normaliser = preprocessing.StandardScaler() #train[feaures_list] = data_normaliser.fit_transform(train[feaures_list]) #trade[feaures_list] = data_normaliser.fit_transform(trade[feaures_list]) # calculate state action space stock_dimension = len(train.tic.unique()) state_space = 1 + 2 * stock_dimension + len( config.TECHNICAL_INDICATORS_LIST) * stock_dimension env_setup = EnvSetup(stock_dim=stock_dimension, state_space=state_space, hmax=100, initial_amount=3000, transaction_cost_pct=0.001) env_train = env_setup.create_env_training(data=train, env_class=StockEnvTrain) agent = DRLAgent(env=env_train) print("==============Model Training===========") now = datetime.datetime.now().strftime('%Y%m%d-%Hh%M') a2c_params_tuning = { 'n_steps': 5, 'ent_coef': 0.005, 'learning_rate': 0.0007, 'verbose': 0, 'timesteps': 80000 } model = agent.train_A2C(model_name="A2C_{}".format(now), model_params=a2c_params_tuning) print("==============Start Trading===========") env_trade, obs_trade = env_setup.create_env_trading( data=trade, env_class=StockEnvTrade, turbulence_threshold=250) df_account_value, df_actions = DRLAgent.DRL_prediction(model=model, test_data=trade, test_env=env_trade, test_obs=obs_trade) df_account_value.to_csv("./" + config.RESULTS_DIR + "/df_account_value_" + now + '.csv') df_actions.to_csv("./" + config.RESULTS_DIR + "/df_actions_" + now + '.csv') print("==============Get Backtest Results===========") perf_stats_all = BackTestStats(df_account_value) perf_stats_all = pd.DataFrame(perf_stats_all) perf_stats_all.to_csv("./" + config.RESULTS_DIR + "/perf_stats_all_" + now + '.csv')
env_train, _ = e_train_gym.get_sb_env() print(type(env_train)) # <a id='5'></a> # # Part 6: Implement DRL Algorithms # * The implementation of the DRL algorithms are based on **OpenAI Baselines** and **Stable Baselines**. Stable Baselines is a fork of OpenAI Baselines, with a major structural refactoring, and code cleanups. # * FinRL library includes fine-tuned standard DRL algorithms, such as DQN, DDPG, # Multi-Agent DDPG, PPO, SAC, A2C and TD3. We also allow users to # design their own DRL algorithms by adapting these DRL algorithms. # In[26]: agent = DRLAgent(env = env_train) # ### Model Training: 5 models, A2C DDPG, PPO, TD3, SAC # # # ### Model 1: A2C # In[27]: agent = DRLAgent(env = env_train) A2C_PARAMS = {"n_steps": 5, "ent_coef": 0.005, "learning_rate": 0.0002} model_a2c = agent.get_model(model_name="a2c",model_kwargs = A2C_PARAMS)
"tech_indicator_list": features, "action_space": stock_dimension, "reward_scaling": 1e-4, "model_name": model_name } e_train_gym = StockTradingEnv(df = train, **env_kwargs) e_train_gym.seed(42) e_train_gym.action_space.seed(42) env_train, _ = e_train_gym.get_sb_env() env_train.seed(seed) env_train.action_space.seed(seed) print(type(env_train)) agent = DRLAgent(env = env_train) model_ddpg = agent.get_model("ddpg", model_kwargs={"batch_size": batch_size, "buffer_size": 50000, "learning_rate": lr} ) trained_ddpg = agent.train_model(model=model_ddpg, tb_log_name='ddpg', total_timesteps=50000) e_trade_gym = StockTradingEnv(df = validation, **env_kwargs) e_trade_gym.seed(seed) e_trade_gym.action_space.seed(seed) df_account_value, df_actions = DRLAgent.DRL_prediction( model=trained_ddpg, environment = e_trade_gym
env_kwargs = { "hmax": 100, "initial_amount": 100000, "transaction_cost_pct": 0.001, "state_space": state_space, "stock_dim": stock_dimension, "tech_indicator_list": config.TECHNICAL_INDICATORS_LIST, "action_space": stock_dimension, "reward_scaling": 1e-4 } e_train_gym = StockTradingEnv(df=train, **env_kwargs) env_train, _ = e_train_gym.get_sb_env() agent = DRLAgent(env=env_train) model_wm = agent.get_model("wm") trade = data_split(data_df, start='2019-01-01', end='2021-01-01') e_trade_gym = StockTradingEnv(df=trade, **env_kwargs) env_trade, obs_trade = e_trade_gym.get_sb_env() df_account_value, df_actions = DRLAgent.DRL_prediction(model=model_wm, test_data=trade, test_env=env_trade, test_obs=obs_trade) print("==============Get Backtest Results===========") now = datetime.datetime.now().strftime('%Y%m%d-%Hh%M')
env = default.create( portfolio=portfolio, action_scheme="simple",#"managed-risk", simpleBuy reward_scheme="simple",#"risk-adjusted", feed=feed, renderer="screen-log", # ScreenLogger used with default settings window_size=20 ) # if agent is None: # agent = DQNAgent(env) # else: # agent = DQNAgent(env,policy_network=agent.policy_network) # agent.train(n_episodes=1, n_steps=720, render_interval=10) #if agent is None: agent = DRLAgent(env = env) PPO_PARAMS = { "n_steps": 1440, "ent_coef": 0.01, "learning_rate": 0.00025, "batch_size": 128, } model_ppo = agent.get_model("ppo",model_kwargs = PPO_PARAMS) trained_ppo = agent.train_model(model=model_ppo, tb_log_name='ppo', total_timesteps=50000) pd.DataFrame(portfolio.performance).transpose()[['net_worth']].to_pickle('./tmp/result3_'+str(i)+'.pkl')
"initial_amount": 1000000, "buy_cost_pct": 0.001, "sell_cost_pct": 0.001, "state_space": state_space, "stock_dim": stock_dimension, "tech_indicator_list": indicators, "action_space": stock_dimension, "reward_scaling": 0.6 } e_trade_gym = StockTradingEnv(df=df_test, **env_kwargs) e_train_gym = StockTradingEnv(df=df_train, **env_kwargs) env_trade, _ = e_trade_gym.get_sb_env() env_train, _ = e_train_gym.get_sb_env() agent = DRLAgent(env=env_train) model_params = config.__dict__[f"{args.model.upper()}_PARAMS"] model = agent.get_model(args.model, model_kwargs=model_params, verbose=1) print('Training model') trained_model = model.learn(tb_log_name='{}_{}'.format( modelName, datetime.datetime.now()), total_timesteps=train_steps, eval_env=e_trade_gym, n_eval_episodes=10) trained_model.save(os.path.join(args.modeldir, modelName))
print(f"Stock Dimension: {stock_dimension}, State Space: {state_space}") env_kwargs = { "hmax": 1, "initial_amount": 100000, "buy_cost_pct": 0, "sell_cost_pct": 0, "state_space": state_space, "stock_dim": stock_dimension, "tech_indicator_list": tech_indicator_list, "action_space": stock_dimension, "reward_scaling": 1e-5 } e_train_gym = StockTradingEnv(df=train, **env_kwargs) env_train, _ = e_train_gym.get_sb_env() agent = DRLAgent(env=env_train) #A2C ''' A2C_PARAMS = {"n_steps": 5, "ent_coef": 0.005, "learning_rate": 0.0002} model_a2c = agent.get_model(model_name="a2c",model_kwargs = A2C_PARAMS) trained_a2c = agent.train_model(model=model_a2c, tb_log_name='a2c', total_timesteps=50000) ''' #PPO PPO_PARAMS = { "n_steps": 2048, "ent_coef": 0.005, "learning_rate": 0.0001, "batch_size": 128,
def train_one(): """ train an agent """ print("==============Start Fetching Data===========") df = YahooDownloader( start_date=config.START_DATE, end_date=config.END_DATE, ticker_list=config.DOW_30_TICKER, ).fetch_data() print("==============Start Feature Engineering===========") fe = FeatureEngineer( use_technical_indicator=True, tech_indicator_list=config.TECHNICAL_INDICATORS_LIST, use_turbulence=True, user_defined_feature=False, ) processed = fe.preprocess_data(df) # Training & Trading data split train = data_split(processed, config.START_DATE, config.START_TRADE_DATE) trade = data_split(processed, config.START_TRADE_DATE, config.END_DATE) # calculate state action space stock_dimension = len(train.tic.unique()) state_space = (1 + 2 * stock_dimension + len(config.TECHNICAL_INDICATORS_LIST) * stock_dimension) env_kwargs = { "hmax": 100, "initial_amount": 1000000, "buy_cost_pct": 0.001, "sell_cost_pct": 0.001, "state_space": state_space, "stock_dim": stock_dimension, "tech_indicator_list": config.TECHNICAL_INDICATORS_LIST, "action_space": stock_dimension, "reward_scaling": 1e-4 } e_train_gym = StockTradingEnv(df=train, **env_kwargs) e_trade_gym = StockTradingEnv(df=trade, turbulence_threshold=250, **env_kwargs) env_train, _ = e_train_gym.get_sb_env() env_trade, obs_trade = e_trade_gym.get_sb_env() agent = DRLAgent(env=env_train) print("==============Model Training===========") now = datetime.datetime.now().strftime("%Y%m%d-%Hh%M") model_sac = agent.get_model("sac") trained_sac = agent.train_model(model=model_sac, tb_log_name="sac", total_timesteps=80000) print("==============Start Trading===========") df_account_value, df_actions = DRLAgent.DRL_prediction(model=trained_sac, test_data=trade, test_env=env_trade, test_obs=obs_trade) df_account_value.to_csv("./" + config.RESULTS_DIR + "/df_account_value_" + now + ".csv") df_actions.to_csv("./" + config.RESULTS_DIR + "/df_actions_" + now + ".csv") print("==============Get Backtest Results===========") perf_stats_all = BackTestStats(df_account_value) perf_stats_all = pd.DataFrame(perf_stats_all) perf_stats_all.to_csv("./" + config.RESULTS_DIR + "/perf_stats_all_" + now + ".csv")
print(f"Stock Dimension: {stock_dimension}, State Space: {state_space}") env_kwargs = { "hmax": 500, "initial_amount": 1000000, "buy_cost_pct": 0.001, "sell_cost_pct": 0.001, "state_space": state_space, "stock_dim": stock_dimension, "tech_indicator_list": indicators, "action_space": stock_dimension, "reward_scaling": 1e-4 } test_gym_env = StockTradingEnv(df = df,turbulence_threshold = 329, **env_kwargs) agent = DRLAgent(env = test_gym_env) if model == 'ensemble': trained_model = EnsembleModel(test_gym_env,model_paths,'binaverage') else: model_params = config.__dict__[f"{model.upper()}_PARAMS"] trained_model = agent.get_model(model, model_kwargs = model_params, verbose = 0).load(model_paths) print('Testing...') df_account_value, df_actions = DRLAgent.average_predict( model=trained_model,
"initial_amount": 1000000, "buy_cost_pct": 0.001, "sell_cost_pct": 0.001, "state_space": state_space, "stock_dim": stock_dimension, "tech_indicator_list": config["TECHNICAL_INDICATORS_LIST"], "action_space": stock_dimension, "reward_scaling": 1e-4 } e_train_gym = StockTradingEnv(df = train, **env_kwargs) env_train, _ = e_train_gym.get_sb_env() print(type(env_train)) agent = DRLAgent(env = env_train) agent = DRLAgent(env = env_train) model_a2c = agent.get_model("a2c") trained_a2c = agent.train_model(model=model_a2c, tb_log_name='a2c', total_timesteps=100000) ### Model 2: DDPG agent = DRLAgent(env = env_train) model_ddpg = agent.get_model("ddpg") trained_ddpg = agent.train_model(model=model_ddpg, tb_log_name='ddpg',
#this is our training env. It allows multiprocessing # env_train, _ = e_train_gym.get_multiproc_env(n = n_cores) env_train, _ = e_train_gym.get_sb_env() #this is our observation environment. It allows full diagnostics env_trade, _ = e_trade_gym.get_sb_env() # # Part 6: Implement DRL Algorithms # * The implementation of the DRL algorithms are based on **OpenAI Baselines** and **Stable Baselines**. Stable Baselines is a fork of OpenAI Baselines, with a major structural refactoring, and code cleanups. # * FinRL library includes fine-tuned standard DRL algorithms, such as DQN, DDPG, # Multi-Agent DDPG, PPO, SAC, A2C and TD3. We also allow users to # design their own DRL algorithms by adapting these DRL algorithms. # In[53]: agent = DRLAgent(env = env_train) # ### Model PPO # In[54]: # from torch.nn import Softsign, ReLU ppo_params ={'n_steps': 256, 'ent_coef': 0.01, 'learning_rate': 0.00009, 'batch_size': 1024, 'gamma': 0.99} policy_kwargs = { # "activation_fn": ReLU, "net_arch": [1024, 1024,1024, 1024, 1024], # "squash_output": True }
def train_one(fetch=False): """ train an agent """ if fetch: df = fetch_and_store() else: df = load() counts = df[['date', 'tic']].groupby(['date']).count().tic assert counts.min() == counts.max() print("==============Start Feature Engineering===========") fe = FeatureEngineer( use_technical_indicator=True, tech_indicator_list=config.TECHNICAL_INDICATORS_LIST, use_turbulence=True, # use_turbulence=False, user_defined_feature=False, ) processed = fe.preprocess_data(df) # Training & Trading data split start_date, trade_date, end_date = calculate_split(df, start=config.START_DATE) print(start_date, trade_date, end_date) train = data_split(processed, start_date, trade_date) trade = data_split(processed, trade_date, end_date) print( f'\n******\nRunning from {start_date} to {end_date} for:\n{", ".join(config.CRYPTO_TICKER)}\n******\n' ) # calculate state action space stock_dimension = len(train.tic.unique()) state_space = (1 + (2 * stock_dimension) + (len(config.TECHNICAL_INDICATORS_LIST) * stock_dimension)) env_kwargs = { "hmax": 100, "initial_amount": 100000, "buy_cost_pct": 0.0026, "sell_cost_pct": 0.0026, "state_space": state_space, "stock_dim": stock_dimension, "tech_indicator_list": config.TECHNICAL_INDICATORS_LIST, "action_space": stock_dimension, "reward_scaling": 1e-4 } e_train_gym = StockTradingEnv(df=train, **env_kwargs) e_trade_gym = StockTradingEnv(df=trade, turbulence_threshold=250, make_plots=True, **env_kwargs) env_train, _ = e_train_gym.get_sb_env() env_trade, obs_trade = e_trade_gym.get_sb_env() agent = DRLAgent(env=env_train) print("==============Model Training===========") now = datetime.datetime.now().strftime(config.DATETIME_FMT) model_sac = agent.get_model("sac") trained_sac = agent.train_model( model=model_sac, tb_log_name="sac", # total_timesteps=100 total_timesteps=80000) print("==============Start Trading===========") df_account_value, df_actions = DRLAgent.DRL_prediction( # model=trained_sac, test_data=trade, test_env=env_trade, test_obs=obs_trade trained_sac, e_trade_gym) df_account_value.to_csv( f"./{config.RESULTS_DIR}/df_account_value_{now}.csv") df_actions.to_csv(f"./{config.RESULTS_DIR}/df_actions_{now}.csv") df_txns = pd.DataFrame(e_trade_gym.transactions, columns=['date', 'amount', 'price', 'symbol']) df_txns = df_txns.set_index(pd.DatetimeIndex(df_txns['date'], tz=pytz.utc)) df_txns.to_csv(f'./{config.RESULTS_DIR}/df_txns_{now}.csv') df_positions = pd.DataFrame(e_trade_gym.positions, columns=['date', 'cash'] + config.CRYPTO_TICKER) df_positions = df_positions.set_index( pd.DatetimeIndex(df_positions['date'], tz=pytz.utc)).drop(columns=['date']) df_positions['cash'] = df_positions.astype( {col: np.float64 for col in df_positions.columns}) df_positions.to_csv(f'./{config.RESULTS_DIR}/df_positions_{now}.csv') print("==============Get Backtest Results===========") perf_stats_all = backtest_stats(df_account_value, transactions=df_txns, positions=df_positions) perf_stats_all = pd.DataFrame(perf_stats_all) perf_stats_all.to_csv(f"./{config.RESULTS_DIR}/perf_stats_all_{now}.csv") backtest_plot(df_account_value, baseline_start=trade_date, baseline_end=end_date, positions=df_positions, transactions=df_txns)