def train_one(): """ train an agent """ print("==============Start Fetching Data===========") df = YahooDownloader(start_date=config.START_DATE, end_date=config.END_DATE, ticker_list=config.DOW_30_TICKER).fetch_data() print("==============Start Feature Engineering===========") df = FeatureEngineer(df, feature_number=5, use_technical_indicator=True, use_turbulence=True).preprocess_data() train = data_split(df, config.START_DATE, config.START_TRADE_DATE) trade = data_split(df, config.START_TRADE_DATE, config.END_DATE) env_setup = EnvSetup(stock_dim=len(train.tic.unique())) env_train = env_setup.create_env_training(data=train, env_class=StockEnvTrain) agent = DRLAgent(env=env_train) print("==============Model Training===========") now = datetime.datetime.now().strftime('%Y%m%d-%Hh%M') a2c_params_tuning = { 'n_steps': 5, 'ent_coef': 0.005, 'learning_rate': 0.0007, 'verbose': 0, 'timesteps': 100000 } model_a2c = agent.train_A2C(model_name="A2C_{}".format(now), model_params=a2c_params_tuning) print("==============Start Trading===========") env_trade, obs_trade = env_setup.create_env_trading( data=trade, env_class=StockEnvTrade, turbulence_threshold=250) df_account_value = DRLAgent.DRL_prediction(model=model_a2c, test_data=trade, test_env=env_trade, test_obs=obs_trade) df_account_value.to_csv("./" + config.RESULTS_DIR + "/" + now + '.csv') print("==============Get Backtest Results===========") perf_stats_all = BackTestStats(df_account_value) perf_stats_all = pd.DataFrame(perf_stats_all) perf_stats_all.to_csv("./" + config.RESULTS_DIR + "/perf_stats_all_" + now + '.csv')
def train_one(): """ train an agent """ print("==============Start Fetching Data===========") df = YahooDownloader(start_date=config.START_DATE, end_date=config.END_DATE, ticker_list=config.SP_500_TICKER).fetch_data() print("==============Start Feature Engineering===========") df = FeatureEngineer(df, use_technical_indicator=True, use_turbulence=True).preprocess_data() # Training & Trade data split train = data_split(df, config.START_DATE, config.START_TRADE_DATE) trade = data_split(df, config.START_TRADE_DATE, config.END_DATE) # data normalization #feaures_list = list(train.columns) #feaures_list.remove('date') #feaures_list.remove('tic') #feaures_list.remove('close') #print(feaures_list) #data_normaliser = preprocessing.StandardScaler() #train[feaures_list] = data_normaliser.fit_transform(train[feaures_list]) #trade[feaures_list] = data_normaliser.fit_transform(trade[feaures_list]) # calculate state action space stock_dimension = len(train.tic.unique()) state_space = 1 + 2 * stock_dimension + len( config.TECHNICAL_INDICATORS_LIST) * stock_dimension env_setup = EnvSetup(stock_dim=stock_dimension, state_space=state_space, hmax=100, initial_amount=3000, transaction_cost_pct=0.001) env_train = env_setup.create_env_training(data=train, env_class=StockEnvTrain) agent = DRLAgent(env=env_train) print("==============Model Training===========") now = datetime.datetime.now().strftime('%Y%m%d-%Hh%M') a2c_params_tuning = { 'n_steps': 5, 'ent_coef': 0.005, 'learning_rate': 0.0007, 'verbose': 0, 'timesteps': 80000 } model = agent.train_A2C(model_name="A2C_{}".format(now), model_params=a2c_params_tuning) print("==============Start Trading===========") env_trade, obs_trade = env_setup.create_env_trading( data=trade, env_class=StockEnvTrade, turbulence_threshold=250) df_account_value, df_actions = DRLAgent.DRL_prediction(model=model, test_data=trade, test_env=env_trade, test_obs=obs_trade) df_account_value.to_csv("./" + config.RESULTS_DIR + "/df_account_value_" + now + '.csv') df_actions.to_csv("./" + config.RESULTS_DIR + "/df_actions_" + now + '.csv') print("==============Get Backtest Results===========") perf_stats_all = BackTestStats(df_account_value) perf_stats_all = pd.DataFrame(perf_stats_all) perf_stats_all.to_csv("./" + config.RESULTS_DIR + "/perf_stats_all_" + now + '.csv')
def test(training_data, trading_data): # params stock_dimension = len(training_data.tic.unique()) state_space = 1 + 2 * stock_dimension + len( config.TECHNICAL_INDICATORS_LIST) * stock_dimension hmax = 100 starting_capital = 1000000 transaction_cost_pct = 0.001 reward_scaling = 1e-4 technical_indicator_list = config.TECHNICAL_INDICATORS_LIST env_setup = EnvSetup(stock_dim=stock_dimension, state_space=state_space, hmax=100, initial_amount=1000000, transaction_cost_pct=0.001) # pre-make training environment # env_train = StockEnvTrain(params) # transition to make # https://medium.com/@apoddar573/making-your-own-custom-environment-in-gym-c3b65ff8cdaa env_train = gym.make('multi-stock-train-v0', df=training_data, stock_dim=stock_dimension, hmax=hmax, initial_amount=starting_capital, transaction_cost_pct=transaction_cost_pct, reward_scaling=reward_scaling, state_space=state_space, action_space=stock_dimension, tech_indicator_list=technical_indicator_list, turbulence_threshold=250, day=0) # --------------- Training log_dir = "me/tmp/" os.makedirs(log_dir, exist_ok=True) env_train = Monitor(env_train, log_dir) callback = SaveOnBestTrainingRewardCallback(check_freq=5000, log_dir=log_dir) agent = DRLAgent(env=env_train) print("==============Model Training===========") now = datetime.datetime.now().strftime('%Y%m%d-%Hh%M') # a2c_params_tuning = {'n_steps': 512, # 'ent_coef': 0.005, # 'learning_rate': 0.0002, # 'verbose': 0, # 'timesteps': 150000} a2c_params_tuning = { "n_steps": 32, "gamma": 0.999304473794672, "gae_lambda": 0.994452346235796, "learning_rate": 0.00010054610987642753, "ent_coef": 0.00215496380633495, "max_grad_norm": 2.217146296318495, 'verbose': 0, 'timesteps': 2e5, # 2e5 "policy_kwargs": { "net_arch": 'tiny', "activation_fn": 'tanh', "ortho_init": False, } } model_a2c = agent.train_A2C( model_name="A2C_full_train_tuned{}".format(now), model_params=a2c_params_tuning, save=True, callback=callback) print("============End Model Training=========") # model_a2c = A2C.load(os.path.abspath('./me/tmp/best_model.zip')) account_value, actions = get_trade_results(env_setup, model_a2c)