def get_stock_data(start_date:str, end_date:str, stocks_tradable:List[str], tech_indicator_list:List[str]): """ start_date and end_date include the whole period from train, validation to test time periods """ df = YahooDownloader(start_date=start_date, end_date=end_date, ticker_list=stocks_tradable).fetch_data() fe = FeatureEngineer(use_technical_indicator=True, # tech_indicator_list = config.TECHNICAL_INDICATORS_LIST, tech_indicator_list=tech_indicator_list, use_turbulence=False, user_defined_feature=False) processed = fe.preprocess_data(df) list_ticker = processed["tic"].unique().tolist() list_date = list(pd.date_range(processed['date'].min(),processed['date'].max()).astype(str)) combination = list(itertools.product(list_date,list_ticker)) processed_full = pd.DataFrame(combination,columns=["date","tic"]).merge(processed,on=["date","tic"],how="left") processed_full = processed_full[processed_full['date'].isin(processed['date'])] processed_full = processed_full.sort_values(['date','tic']) processed_full = processed_full.fillna(0) return processed_full, list_date
def raw_data_preprocess( prp_data_path, df, beg_date, end_date, tech_id_list, ): if os.path.exists(prp_data_path): import pandas as pd df = pd.read_pickle(prp_data_path) # DataFrame of Pandas else: from finrl.preprocessing.preprocessors import FeatureEngineer fe = FeatureEngineer( use_technical_indicator=True, tech_indicator_list=tech_id_list, use_turbulence=True, user_defined_feature=False, ) df = fe.preprocess_data(df) # preprocess raw_df df = df[(df.date >= beg_date) & (df.date < end_date)] df = df.sort_values(["date", "tic"], ignore_index=True) df.index = df.date.factorize()[0] df.to_pickle(prp_data_path) print('| df.columns.values:', df.columns.values) assert all(df.columns.values == [ 'date', 'open', 'high', 'low', 'close', 'volume', 'tic', 'day', 'macd', 'boll_ub', 'boll_lb', 'rsi_30', 'cci_30', 'dx_30', 'close_30_sma', 'close_60_sma', 'turbulence' ]) return df
def get_feature_engineered_df(df): fe = FeatureEngineer(df.copy(), use_technical_indicator=True, tech_indicator_list=config.TECHNICAL_INDICATORS_LIST, use_turbulence=True, user_defined_feature=False) df = fe.preprocess_data() return df
def get_yahoo_data(start, end): df = YahooDownloader(start_date=start, end_date=end, ticker_list=config.DOW_30_TICKER).fetch_data() df.sort_values(['date', 'tic'], ignore_index=True) x = df.tic.unique() templ = [] # get intersection data, smallest data for name, group in df.groupby('date'): g = group.tic.unique() if len(templ) == 0: templ = [i for i in g if i in x] else: templ = [i for i in g if i in templ] data_merge = pd.DataFrame(columns=list(df.columns)) x = np.array(templ).reshape(-1, 1) temp_df = pd.DataFrame.from_records(x, columns=['tic']) for name, group in df.groupby('date'): temp_df['date'] = name result_outer = pd.merge(group, temp_df, on=['date', 'tic']) result_outer = result_outer.sort_values(['date', 'tic'], ignore_index=True) assert len(result_outer) == len(temp_df.tic.unique()) data_merge = data_merge.append(result_outer) df = data_merge fe = FeatureEngineer(use_technical_indicator=True, tech_indicator_list=config.TECHNICAL_INDICATORS_LIST, use_turbulence=True, user_defined_feature=False) processed = fe.preprocess_data(df) processed.sort_values(['date', 'tic'], ignore_index=True) return processed
def load_stock_trading_data(): from finrl.config import config cwd = './env/FinRL' raw_data_path = f'{cwd}/StockTradingEnv_raw_data.df' processed_data_path = f'{cwd}/StockTradingEnv_processed_data.df' os.makedirs(cwd, exist_ok=True) print("==============Start Fetching Data===========") if os.path.exists(raw_data_path): raw_df = pd.read_pickle(raw_data_path) # DataFrame of Pandas print('| raw_df.columns.values:', raw_df.columns.values) else: from finrl.marketdata.yahoodownloader import YahooDownloader raw_df = YahooDownloader( start_date=config.START_DATE, end_date=config.END_DATE, ticker_list=config.DOW_30_TICKER, ).fetch_data() raw_df.to_pickle(raw_data_path) print("==============Start Feature Engineering===========") if os.path.exists(processed_data_path): processed_df = pd.read_pickle(processed_data_path) # DataFrame of Pandas print('| processed_df.columns.values:', processed_df.columns.values) else: from finrl.preprocessing.preprocessors import FeatureEngineer fe = FeatureEngineer( use_technical_indicator=True, tech_indicator_list=config.TECHNICAL_INDICATORS_LIST, use_turbulence=True, user_defined_feature=False, ) processed_df = fe.preprocess_data(raw_df) processed_df.to_pickle(processed_data_path) # Training & Trading data split from finrl.preprocessing.data import data_split train_df = data_split(processed_df, '2008-03-19', '2016-01-01') # 1963/3223 eval_df = data_split(processed_df, '2016-01-01', '2021-01-01') # 1260/3223 return train_df, eval_df
def main(): start_date = '2020-01-01' trade_start_date = '2020-12-01' end_date = '2021-01-01' ticker_list = stock_tickers numerical_df = YahooDownloader(start_date=start_date, end_date=end_date, ticker_list=ticker_list).fetch_data() sentiment_df = generate_sentiment_scores(start_date, end_date) initial_data = get_initial_data(numerical_df, sentiment_df) train_data = data_split(initial_data, start_date, trade_start_date) trade_data = data_split(initial_data, trade_start_date, end_date) indicator_list = config.TECHNICAL_INDICATORS_LIST + ['sentiment'] stock_dimension = len(trade_data.tic.unique()) state_space = 1 + 2 * stock_dimension + len( indicator_list) * stock_dimension env_kwargs = { "hmax": 100, "initial_amount": 1000000, "buy_cost_pct": 0.001, "sell_cost_pct": 0.001, "state_space": state_space, "stock_dim": stock_dimension, "tech_indicator_list": indicator_list, "action_space": stock_dimension, "reward_scaling": 1e-4, "print_verbosity": 5 } e_train_gym = StockTradingEnv(df=train_data, **env_kwargs) env_train, _ = e_train_gym.get_sb_env() # print(train_data.index) # print(trade_data.index) # print(trade_data.loc[0]) e_trade_gym = OnlineStockTradingEnv(trade_data.loc[0], **env_kwargs) training_agent = DRLAgent(env=env_train) model_a2c = training_agent.get_model("a2c") # print(train_data.index) # print(trade_data.index) #trained_a2c = agent.train_model(model=model_a2c, tb_log_name='a2c',total_timesteps=10000) feature_engineer = FeatureEngineer() online_stock_pred = OnlineStockPrediction(e_trade_gym, model_a2c) for i in range(1, trade_data.index.unique().max()): print(trade_data.loc[i]) online_stock_pred.add_data(trade_data.loc[i]) action, states, next_obs, rewards = online_stock_pred.predict() print("Action:", action) print("States: ", states) print("Next observation: ", next_obs) print("Rewards: ", rewards)
def prepare_data() -> (dict, pd.DataFrame): processed = load_from_cache() if processed is None: print("==============Start Fetching Data===========") df = YahooDownloader( start_date=config.START_DATE, end_date=config.END_DATE, ticker_list=config.CURRENT_TICKER, ).fetch_data() print("==============Start Feature Engineering===========") fe = FeatureEngineer( use_technical_indicator=True, tech_indicator_list=config.TECHNICAL_INDICATORS_LIST, use_turbulence=False, user_defined_feature=False, ) processed = fe.preprocess_data(df) save(processed) # calculate state action space stock_dimension = len(processed.tic.unique()) state_space = (2 * stock_dimension + len(config.TECHNICAL_INDICATORS_LIST) * stock_dimension * 2) env_kwargs = { "hmax": 100, "initial_amount": 1000000, "buy_cost_pct": 0.01, "sell_cost_pct": 0.01, "state_space": state_space, "stock_dim": stock_dimension, "tech_indicator_list": config.TECHNICAL_INDICATORS_LIST, "action_space": stock_dimension, "reward_scaling": 1e-4 } return env_kwargs, processed
def main(): initialize() parser = build_parser() options = parser.parse_args() if options.mode == "train": import finrl.autotrain.training finrl.autotrain.training.train_one() elif options.mode == "download_data": print('Download Data Begin') dow_30 = si.tickers_dow() # ETF #dftmp = pd.read_csv('data/etf_tom.csv',index_col=0) #dow_30 = dftmp.tic.unique() # DOW30 dftmp = pd.read_csv('data/tom_dow_done_data.csv', index_col=0) dow_30 = dftmp.tic.unique() #dow_30 = ['DSS','AAPL','INFY'] #dow_30 = ['^DJI'] price_data = {ticker: si.get_data(ticker) for ticker in dow_30} df = reduce(lambda x, y: x.append(y), price_data.values()) df.reset_index(inplace=True) df = df.rename(columns={'index': 'date', 'ticker': 'tic'}) fe = FeatureEngineer(use_technical_indicator=True, use_turbulence=False, user_defined_feature=False) df = fe.preprocess_data(df) now = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") df.to_csv(config.DATA_SAVE_DIR + "/" + "dow30_" + now + ".csv", index=False) print('Download Complete')
def train_one(): """ train an agent """ print("==============Start Fetching Data===========") df = YahooDownloader(start_date=config.START_DATE, end_date=config.END_DATE, ticker_list=config.DOW_30_TICKER).fetch_data() print("==============Start Feature Engineering===========") df = FeatureEngineer(df, feature_number=5, use_technical_indicator=True, use_turbulence=True).preprocess_data() train = data_split(df, config.START_DATE, config.START_TRADE_DATE) trade = data_split(df, config.START_TRADE_DATE, config.END_DATE) env_setup = EnvSetup(stock_dim=len(train.tic.unique())) env_train = env_setup.create_env_training(data=train, env_class=StockEnvTrain) agent = DRLAgent(env=env_train) print("==============Model Training===========") now = datetime.datetime.now().strftime('%Y%m%d-%Hh%M') a2c_params_tuning = { 'n_steps': 5, 'ent_coef': 0.005, 'learning_rate': 0.0007, 'verbose': 0, 'timesteps': 100000 } model_a2c = agent.train_A2C(model_name="A2C_{}".format(now), model_params=a2c_params_tuning) print("==============Start Trading===========") env_trade, obs_trade = env_setup.create_env_trading( data=trade, env_class=StockEnvTrade, turbulence_threshold=250) df_account_value = DRLAgent.DRL_prediction(model=model_a2c, test_data=trade, test_env=env_trade, test_obs=obs_trade) df_account_value.to_csv("./" + config.RESULTS_DIR + "/" + now + '.csv') print("==============Get Backtest Results===========") perf_stats_all = BackTestStats(df_account_value) perf_stats_all = pd.DataFrame(perf_stats_all) perf_stats_all.to_csv("./" + config.RESULTS_DIR + "/perf_stats_all_" + now + '.csv')
def test_process_data(): start_date = '2020-11-01' end_date='2021-01-01' ticker_list=stock_tickers numerical_df = YahooDownloader(start_date=start_date,end_date=end_date,ticker_list=ticker_list).fetch_data() sentiment_df = generate_sentiment_scores(start_date,end_date) initial_data = get_initial_data(numerical_df,sentiment_df) trade_data = data_split(initial_data,start_date,'2020-12-01') numerical_feed_data = numerical_df[numerical_df.date > '2020-12-01'] sentiment_feed_data = sentiment_df[sentiment_df.date > '2020-12-01'] data_processor = DataProcessor(FeatureEngineer(),trade_data) for date in numerical_feed_data.date.unique(): new_numerical = numerical_feed_data[numerical_feed_data.date==date] new_sentiment = sentiment_feed_data.loc[sentiment_feed_data.date==date] new_df=data_processor.process_data(new_numerical,new_sentiment) print(new_df)
def get_initial_data(numerical_df, sentiment_df, use_turbulence=False): fe = FeatureEngineer(use_turbulence=use_turbulence) numerical_df = fe.preprocess_data(numerical_df) df = numerical_df.merge(sentiment_df, on=["date", "tic"], how="left") df.fillna(0) return df
def main(): parser = build_parser() options = parser.parse_args() # Basic setup #Disable warnings warnings.filterwarnings('ignore') # Load the saved data in a pandas DataFrame: data_frame = pd.read_csv("./" + config.DATA_SAVE_DIR + "/" + options.name + ".csv") print("Data Frame shape is: ", data_frame.shape) print("Data Frame format is following: \n\n", data_frame.head()) ## we store the stockstats technical indicator column names in config.py tech_indicator_list = config.TECHNICAL_INDICATORS_LIST print("Technical Indicators that are going to be calculated: ", tech_indicator_list) feature_engineering = FeatureEngineer( use_technical_indicator=True, tech_indicator_list=tech_indicator_list, use_turbulence=True, user_defined_feature=False) processed = feature_engineering.preprocess_data(data_frame) print(processed.sort_values(['date', 'tic'], ignore_index=True).head(10)) training_set = data_split(processed, config.START_DATE, config.START_TRADE_DATE) testing_set = data_split(processed, config.START_TRADE_DATE, config.END_DATE) print("Size of training set: ", len(training_set)) print("Size of testing set: ", len(testing_set)) print("Training set format:\n\n", training_set.head()) print("Testing set format: \n\n", testing_set.head()) stock_dimension = len(training_set.tic.unique()) state_space = 1 + 2 * stock_dimension + len( tech_indicator_list) * stock_dimension print(f"Stock Dimension: {stock_dimension}, State Space: {state_space}") ## ## Save data to file, both training and trading ## if os.path.exists("./" + config.DATA_SAVE_DIR + "/training.txt"): os.remove("./" + config.DATA_SAVE_DIR + "/training.txt") print("The training data file deleted") else: print("The training data file does not exist") if os.path.exists("./" + config.DATA_SAVE_DIR + "/testing.txt"): os.remove("./" + config.DATA_SAVE_DIR + "/testing.txt") print("The testing data file deleted") else: print("The testing data file does not exist") path_training = "./" + config.DATA_SAVE_DIR + "/training.txt" path_testing = "./" + config.DATA_SAVE_DIR + "/testing.txt" with open(path_training, "wb") as f: pickle.dump(training_set, f, pickle.HIGHEST_PROTOCOL) with open(path_testing, "wb") as f: pickle.dump(testing_set, f, pickle.HIGHEST_PROTOCOL) print( "Successfuly completed the task of creation of test and training data files." )
## user can add more technical indicators ## check https://github.com/jealous/stockstats for different names tech_indicator_list=tech_indicator_list+['kdjk','open_2_sma','boll','close_10.0_le_5_c','wr_10','dma','trix'] print(tech_indicator_list) # <a id='3.2'></a> # ## 4.2 Perform Feature Engineering # In[14]: fe = FeatureEngineer( use_technical_indicator=True, tech_indicator_list = tech_indicator_list, use_turbulence=False, user_defined_feature = False) data_df = fe.preprocess_data(data_df) # In[15]: data_df.head() # <a id='4'></a> # # Part 5. Build Environment # Considering the stochastic and interactive nature of the automated stock trading tasks, a financial task is modeled as a **Markov Decision Process (MDP)** problem. The training process involves observing stock price change, taking an action and reward's calculation to have the agent adjusting its strategy accordingly. By interacting with the environment, the trading agent will derive a trading strategy with the maximized rewards as time proceeds.
def generate_data(rollouts, data_dir, noise_type): # pylint: disable=R0914 """ Generates data """ assert exists(data_dir), "The data directory does not exist..." df = YahooDownloader(start_date = '2009-01-01', end_date = '2021-01-01', ticker_list = ['AAPL']).fetch_data() df.sort_values(['date','tic'],ignore_index=True) fe = FeatureEngineer( use_technical_indicator=True, tech_indicator_list = config.TECHNICAL_INDICATORS_LIST, use_turbulence=True, user_defined_feature = False) processed = fe.preprocess_data(df) list_ticker = processed["tic"].unique().tolist() list_date = list(pd.date_range(processed['date'].min(),processed['date'].max()).astype(str)) combination = list(itertools.product(list_date,list_ticker)) processed_full = pd.DataFrame(combination,columns=["date","tic"]).merge(processed,on=["date","tic"],how="left") processed_full = processed_full[processed_full['date'].isin(processed['date'])] processed_full = processed_full.sort_values(['date','tic']) processed_full = processed_full.fillna(0) processed_full.sort_values(['date','tic'],ignore_index=True) train = data_split(processed_full, '2009-01-01','2019-01-01') trade = data_split(processed_full, '2019-01-01','2021-01-01') stock_dimension = len(train.tic.unique()) state_space = 1 + 2*stock_dimension + len(config.TECHNICAL_INDICATORS_LIST)*stock_dimension env_kwargs = { "hmax": 100, "initial_amount": 1000000, # "buy_cost_pct": 0.001i, # "sell_cost_pct": 0.001, "transaction_cost_pct": 0.001, "state_space": state_space, "stock_dim": stock_dimension, "tech_indicator_list": config.TECHNICAL_INDICATORS_LIST, "action_space": stock_dimension, "reward_scaling": 1e-4 } e_train_gym = StockTradingEnv(df = train, **env_kwargs) env_train, _ = e_train_gym.get_sb_env() env = env_train # env = gym.make("CarRacing-v0") seq_len = 10000 for i in range(rollouts): env.reset() # env.env.viewer.window.dispatch_events() if noise_type == 'white': a_rollout = [env.action_space.sample() for _ in range(seq_len)] elif noise_type == 'brown': a_rollout = sample_continuous_policy(env.action_space, seq_len, 1. / 50) s_rollout = [] r_rollout = [] d_rollout = [] t = 0 while True: action = a_rollout[t] t += 1 s, r, done, _ = env.step(action) # env.env.viewer.window.dispatch_events() s_rollout += [s] r_rollout += [r] d_rollout += [done] if done: print("> End of rollout {}, {} frames...".format(i, len(s_rollout))) np.savez(join(data_dir, 'rollout_{}'.format(i)), observations=np.array(s_rollout), rewards=np.array(r_rollout), actions=np.array(a_rollout), terminals=np.array(d_rollout)) break
def train_one(fetch=False): """ train an agent """ if fetch: df = fetch_and_store() else: df = load() counts = df[['date', 'tic']].groupby(['date']).count().tic assert counts.min() == counts.max() print("==============Start Feature Engineering===========") fe = FeatureEngineer( use_technical_indicator=True, tech_indicator_list=config.TECHNICAL_INDICATORS_LIST, use_turbulence=True, # use_turbulence=False, user_defined_feature=False, ) processed = fe.preprocess_data(df) # Training & Trading data split start_date, trade_date, end_date = calculate_split(df, start=config.START_DATE) print(start_date, trade_date, end_date) train = data_split(processed, start_date, trade_date) trade = data_split(processed, trade_date, end_date) print( f'\n******\nRunning from {start_date} to {end_date} for:\n{", ".join(config.CRYPTO_TICKER)}\n******\n' ) # calculate state action space stock_dimension = len(train.tic.unique()) state_space = (1 + (2 * stock_dimension) + (len(config.TECHNICAL_INDICATORS_LIST) * stock_dimension)) env_kwargs = { "hmax": 100, "initial_amount": 100000, "buy_cost_pct": 0.0026, "sell_cost_pct": 0.0026, "state_space": state_space, "stock_dim": stock_dimension, "tech_indicator_list": config.TECHNICAL_INDICATORS_LIST, "action_space": stock_dimension, "reward_scaling": 1e-4 } e_train_gym = StockTradingEnv(df=train, **env_kwargs) e_trade_gym = StockTradingEnv(df=trade, turbulence_threshold=250, make_plots=True, **env_kwargs) env_train, _ = e_train_gym.get_sb_env() env_trade, obs_trade = e_trade_gym.get_sb_env() agent = DRLAgent(env=env_train) print("==============Model Training===========") now = datetime.datetime.now().strftime(config.DATETIME_FMT) model_sac = agent.get_model("sac") trained_sac = agent.train_model( model=model_sac, tb_log_name="sac", # total_timesteps=100 total_timesteps=80000) print("==============Start Trading===========") df_account_value, df_actions = DRLAgent.DRL_prediction( # model=trained_sac, test_data=trade, test_env=env_trade, test_obs=obs_trade trained_sac, e_trade_gym) df_account_value.to_csv( f"./{config.RESULTS_DIR}/df_account_value_{now}.csv") df_actions.to_csv(f"./{config.RESULTS_DIR}/df_actions_{now}.csv") df_txns = pd.DataFrame(e_trade_gym.transactions, columns=['date', 'amount', 'price', 'symbol']) df_txns = df_txns.set_index(pd.DatetimeIndex(df_txns['date'], tz=pytz.utc)) df_txns.to_csv(f'./{config.RESULTS_DIR}/df_txns_{now}.csv') df_positions = pd.DataFrame(e_trade_gym.positions, columns=['date', 'cash'] + config.CRYPTO_TICKER) df_positions = df_positions.set_index( pd.DatetimeIndex(df_positions['date'], tz=pytz.utc)).drop(columns=['date']) df_positions['cash'] = df_positions.astype( {col: np.float64 for col in df_positions.columns}) df_positions.to_csv(f'./{config.RESULTS_DIR}/df_positions_{now}.csv') print("==============Get Backtest Results===========") perf_stats_all = backtest_stats(df_account_value, transactions=df_txns, positions=df_positions) perf_stats_all = pd.DataFrame(perf_stats_all) perf_stats_all.to_csv(f"./{config.RESULTS_DIR}/perf_stats_all_{now}.csv") backtest_plot(df_account_value, baseline_start=trade_date, baseline_end=end_date, positions=df_positions, transactions=df_txns)
def train_one(): """ train an agent """ print("==============Start Fetching Data===========") df = YahooDownloader( start_date=config.START_DATE, end_date=config.END_DATE, ticker_list=config.DOW_30_TICKER, ).fetch_data() print("==============Start Feature Engineering===========") fe = FeatureEngineer( use_technical_indicator=True, tech_indicator_list=config.TECHNICAL_INDICATORS_LIST, use_turbulence=True, user_defined_feature=False, ) processed = fe.preprocess_data(df) list_ticker = processed["tic"].unique().tolist() list_date = list( pd.date_range(processed['date'].min(), processed['date'].max()).astype(str)) combination = list(itertools.product(list_date, list_ticker)) processed_full = pd.DataFrame(combination, columns=["date", "tic"]).merge(processed, on=["date", "tic"], how="left") processed_full = processed_full[processed_full['date'].isin( processed['date'])] processed_full = processed_full.sort_values(['date', 'tic']) processed_full = processed_full.fillna(0) # Training & Trading data split train = data_split(processed_full, config.START_DATE, config.START_TRADE_DATE) trade = data_split(processed_full, config.START_TRADE_DATE, config.END_DATE) # calculate state action space stock_dimension = len(train.tic.unique()) state_space = (1 + 2 * stock_dimension + len(config.TECHNICAL_INDICATORS_LIST) * stock_dimension) env_kwargs = { "hmax": 100, "initial_amount": 1000000, "buy_cost_pct": 0.001, "sell_cost_pct": 0.001, "state_space": state_space, "stock_dim": stock_dimension, "tech_indicator_list": config.TECHNICAL_INDICATORS_LIST, "action_space": stock_dimension, "reward_scaling": 1e-4 } e_train_gym = StockTradingEnv(df=train, **env_kwargs) env_train, _ = e_train_gym.get_sb_env() agent = DRLAgent(env=env_train) print("==============Model Training===========") now = datetime.datetime.now().strftime("%Y%m%d-%Hh%M") model_sac = agent.get_model("sac") trained_sac = agent.train_model(model=model_sac, tb_log_name="sac", total_timesteps=80000) print("==============Start Trading===========") e_trade_gym = StockTradingEnv(df=trade, turbulence_threshold=250, **env_kwargs) df_account_value, df_actions = DRLAgent.DRL_prediction( model=trained_sac, environment=e_trade_gym) df_account_value.to_csv("./" + config.RESULTS_DIR + "/df_account_value_" + now + ".csv") df_actions.to_csv("./" + config.RESULTS_DIR + "/df_actions_" + now + ".csv") print("==============Get Backtest Results===========") perf_stats_all = backtest_stats(df_account_value) perf_stats_all = pd.DataFrame(perf_stats_all) perf_stats_all.to_csv("./" + config.RESULTS_DIR + "/perf_stats_all_" + now + ".csv")
def train_one(): """ train an agent """ print("==============Start Fetching Data===========") df = YahooDownloader(start_date=config.START_DATE, end_date=config.END_DATE, ticker_list=config.SP_500_TICKER).fetch_data() print("==============Start Feature Engineering===========") df = FeatureEngineer(df, use_technical_indicator=True, use_turbulence=True).preprocess_data() # Training & Trade data split train = data_split(df, config.START_DATE, config.START_TRADE_DATE) trade = data_split(df, config.START_TRADE_DATE, config.END_DATE) # data normalization #feaures_list = list(train.columns) #feaures_list.remove('date') #feaures_list.remove('tic') #feaures_list.remove('close') #print(feaures_list) #data_normaliser = preprocessing.StandardScaler() #train[feaures_list] = data_normaliser.fit_transform(train[feaures_list]) #trade[feaures_list] = data_normaliser.fit_transform(trade[feaures_list]) # calculate state action space stock_dimension = len(train.tic.unique()) state_space = 1 + 2 * stock_dimension + len( config.TECHNICAL_INDICATORS_LIST) * stock_dimension env_setup = EnvSetup(stock_dim=stock_dimension, state_space=state_space, hmax=100, initial_amount=3000, transaction_cost_pct=0.001) env_train = env_setup.create_env_training(data=train, env_class=StockEnvTrain) agent = DRLAgent(env=env_train) print("==============Model Training===========") now = datetime.datetime.now().strftime('%Y%m%d-%Hh%M') a2c_params_tuning = { 'n_steps': 5, 'ent_coef': 0.005, 'learning_rate': 0.0007, 'verbose': 0, 'timesteps': 80000 } model = agent.train_A2C(model_name="A2C_{}".format(now), model_params=a2c_params_tuning) print("==============Start Trading===========") env_trade, obs_trade = env_setup.create_env_trading( data=trade, env_class=StockEnvTrade, turbulence_threshold=250) df_account_value, df_actions = DRLAgent.DRL_prediction(model=model, test_data=trade, test_env=env_trade, test_obs=obs_trade) df_account_value.to_csv("./" + config.RESULTS_DIR + "/df_account_value_" + now + '.csv') df_actions.to_csv("./" + config.RESULTS_DIR + "/df_actions_" + now + '.csv') print("==============Get Backtest Results===========") perf_stats_all = BackTestStats(df_account_value) perf_stats_all = pd.DataFrame(perf_stats_all) perf_stats_all.to_csv("./" + config.RESULTS_DIR + "/perf_stats_all_" + now + '.csv')
def get_dataset(datadir,data_type,start_date,end_date): if not data_type in config.SUPPORTED_DATA: raise ValueError('Market type not supported') data_path = os.path.join(datadir,data_type + '.csv') if not os.path.exists(data_path): if data_type == 'dow29': # If we don't have the data, we can download dow data from yahoo finance stock_tickers = config.DOW_30_TICKER_MINUS_VISA indicators = config.TECHNICAL_INDICATORS_LIST print('Getting Data: ') df = YahooDownloader(start_date = '2000-01-01', end_date = '2021-01-01', ticker_list = stock_tickers).fetch_data() fe = FeatureEngineer( use_technical_indicator=True, tech_indicator_list = indicators, use_turbulence=True, user_defined_feature = False) print('Adding Indicators') processed = fe.preprocess_data(df) list_ticker = processed["tic"].unique().tolist() list_date = list(pd.date_range(processed['date'].min(),processed['date'].max()).astype(str)) combination = list(itertools.product(list_date,list_ticker)) processed_full = pd.DataFrame(combination,columns=["date","tic"]).merge(processed,on=["date","tic"],how="left") processed_full = processed_full[processed_full['date'].isin(processed['date'])] processed_full = processed_full.sort_values(['date','tic']) processed_full = processed_full.fillna(0) processed.to_csv(data_path,index = False) elif data_type == 'nas29': # If we don't have the data, we can download dow data from yahoo finance stock_tickers = config.NAS_29_TICKER indicators = config.TECHNICAL_INDICATORS_LIST print('Getting Data: ') df = YahooDownloader(start_date = '2000-01-01', end_date = '2021-01-01', ticker_list = stock_tickers).fetch_data() fe = FeatureEngineer( use_technical_indicator=True, tech_indicator_list = indicators, use_turbulence=True, user_defined_feature = False) print('Adding Indicators') processed = fe.preprocess_data(df) list_ticker = processed["tic"].unique().tolist() list_date = list(pd.date_range(processed['date'].min(),processed['date'].max()).astype(str)) combination = list(itertools.product(list_date,list_ticker)) processed_full = pd.DataFrame(combination,columns=["date","tic"]).merge(processed,on=["date","tic"],how="left") processed_full = processed_full[processed_full['date'].isin(processed['date'])] processed_full = processed_full.sort_values(['date','tic']) processed_full = processed_full.fillna(0) processed.to_csv(data_path,index = False) elif data_type == 'dow290': raise ValueError('Need to add dow29 with zeros crossings to data directory') elif data_type == 'dow29w0': raise ValueError('Need to add dow29 with OUT zeros crossings to data directory') else: raise ValueError('Need to add crypto data to data directory') # Load and subset data full_df = pd.read_csv(data_path) max_date = max(full_df['date']) min_date = min(full_df['date']) if not (min_date <= start_date): warnings.warn('Earliest possible start date is {}: You have chosen {}. The later date will be used'.format(min_date,start_date)) if not (max_date >= end_date): warnings.warn('Latest possible end date is {}: You have chosen {}. The earlier date will be used'.format(max_date,end_date)) to_return = data_split(full_df,start_date,end_date) return to_return
def train_one(): """ train an agent """ print("==============Start Fetching Data===========") df = YahooDownloader( start_date=config.START_DATE, end_date=config.END_DATE, ticker_list=['FXAIX'], ).fetch_data() print("==============Start Feature Engineering===========") fe = FeatureEngineer( use_technical_indicator=True, tech_indicator_list=config.TECHNICAL_INDICATORS_LIST, use_turbulence=True, user_defined_feature=False, ) processed = fe.preprocess_data(df) # Training & Trading data split train = data_split(processed, config.START_DATE, config.START_TRADE_DATE) trade = data_split(processed, config.START_TRADE_DATE, config.END_DATE) # calculate state action space stock_dimension = len(train.tic.unique()) state_space = (1 + 2 * stock_dimension + len(config.TECHNICAL_INDICATORS_LIST) * stock_dimension) env_kwargs = { "hmax": 100, "initial_amount": 1000000, "buy_cost_pct": 0.001, "sell_cost_pct": 0.001, "state_space": state_space, "stock_dim": stock_dimension, "tech_indicator_list": config.TECHNICAL_INDICATORS_LIST, "action_space": stock_dimension, "reward_scaling": 1e-4 } e_train_gym = StockTradingEnv(df=train, **env_kwargs) e_trade_gym = StockTradingEnv(df=trade, turbulence_threshold=250, **env_kwargs) env_train, _ = e_train_gym.get_sb_env() env_trade, obs_trade = e_trade_gym.get_sb_env() agent = DRLAgent(env=env_train) print("==============Model Training===========") now = datetime.datetime.now().strftime("%Y%m%d-%Hh%M") user_input = input('train model? 1 train 0 don\'t train') if user_input == 1: model_sac = agent.get_model("sac") trained_sac = agent.train_model(model=model_sac, tb_log_name="sac", total_timesteps=8000) trained_sac.save("../models/sac_8k" + df.tic[0] + "_frl") else: trained_sac = SAC.load('../models/sac_80k_msft_working') print("==============Start Trading===========") df_account_value, df_actions = DRLAgent.DRL_prediction( trained_sac, e_trade_gym) df_account_value.to_csv("../" + config.RESULTS_DIR + "/SAC_df_account_value_" + df.tic[0] + "_" + now + ".csv") df_actions.to_csv("../" + config.RESULTS_DIR + "/SAC_df_actions_" + df.tic[0] + "_" + now + ".csv") # print("==============Get Backtest Results===========") perf_stats_all = backtest_stats(df_account_value) perf_stats_all = pd.DataFrame(perf_stats_all) perf_stats_all.to_csv("../" + config.RESULTS_DIR + "/SAC_perf_stats_all_" + df.tic[0] + "_" + now + ".csv") #plot acc value actions = df_actions['actions'] x = np.arange(0, df_account_value['account_value'].shape[0]) y = df_account_value['account_value'] points = np.array([x, y]).T.reshape(-1, 1, 2) segments = np.concatenate([points[:-1], points[1:]], axis=1) fig, axs = plt.subplots(2, 1, sharex=True, sharey=False) # plt.plot(x, y) # Use a boundary norm instead cmap = ListedColormap(['r', 'g', 'b']) norm = BoundaryNorm([-100, -0.1, 0.1, 100], cmap.N) lc = LineCollection(segments, cmap=cmap, norm=norm) lc.set_array(actions) lc.set_linewidth(2) line = axs[0].add_collection(lc) # fig.colorbar(line, ax=axs) axs[1].set_xlabel('Trading Day (' + 'From ' + config.START_TRADE_DATE + " to " + config.END_DATE + ')') axs[0].set_ylabel('Account Value (10000 of USD)') axs[0].set_title("Trading Test on " + df.tic[0]) axs[0].set_xlim(x.min(), x.max()) axs[0].set_ylim(y.min(), y.max()) custom_lines = [ Line2D([0], [0], color=cmap(0.), lw=4), Line2D([0], [0], color=cmap(.5), lw=4), Line2D([0], [0], color=cmap(1.), lw=4) ] # lines = ax.plot(data) axs[0].legend(custom_lines, ['Sell', 'Hold', 'Buy']) #plot stock value tx = np.arange(0, df_account_value['account_value'].shape[0]) ty = trade['close'] plt.ylabel('Price (USD)') plt.title(df.tic[0] + " Closing Price") plt.plot(tx, ty) plt.savefig("../" + config.RESULTS_DIR + "/plots/" "SAC_plot_" + df.tic[0] + "_" + now + ".png")
metavar='<trade_date>', help='trading start date') args = parser.parse_args() consumer = KafkaConsumer(args.topic, auto_offset_reset='latest', \ bootstrap_servers=args.hosts, api_version=(0, 10), consumer_timeout_ms=1000) producer = KafkaProducer(bootstrap_servers=args.hosts, api_version=(0, 10)) # data initialization tday = datetime.date.today() yday = tday - datetime.timedelta(days=1) fmt = "%Y-%m-%d" numerical_df = YahooDownloader(args.start_date, args.end_date, config.stock_tickers).fetch_data() sentiment_df = generate_sentiment_scores(args.start_date, args.end_date) initial_data = get_initial_data(numerical_df, sentiment_df) data_processor = DataProcessor(FeatureEngineer(), initial_data) new_numerical = YahooDownloader(datetime.datetime.strftime(yday, fmt), datetime.datetime.strftime(tday, fmt), config.stock_tickers).fetch_data() new_sentiment = generate_sentiment_scores( datetime.datetime.strftime(yday, fmt), datetime.datetime.strftime(yday, fmt)) # set up model to train on initial data load_path = "./trained_models/a2c_2019-2020_80k.zip" model = setup_model(initial_data) while consumer is None: sleep(20) event = Event()
# In[8]: df = YahooDownloader(start_date = config.START_DATE, end_date = config.END_DATE, ticker_list = config.DOW_30_TICKER).fetch_data() # # Part 4: Preprocess Data # In[9]: fe = FeatureEngineer( use_technical_indicator=True, tech_indicator_list = config.TECHNICAL_INDICATORS_LIST, use_turbulence=True, user_defined_feature = False) processed = fe.preprocess_data(df) # %% Show turbulence # if error open VSCode Settings (JSON) and change # "terminal.integrated.inheritEnv" to true import matplotlib.pyplot as plt import pandas as pd df = plotdf=processed[processed['tic']=='JPM'] df.plot(x="date", y=["turbulence", "close"]) plt.show()
def train_one(): """ train an agent """ print("==============Start Fetching Data===========") df = YahooDownloader( start_date=config.START_DATE, end_date=config.END_DATE, ticker_list=config.DOW_30_TICKER, ).fetch_data() print("==============Start Feature Engineering===========") fe = FeatureEngineer( use_technical_indicator=True, tech_indicator_list=config.TECHNICAL_INDICATORS_LIST, use_turbulence=True, user_defined_feature=False, ) processed = fe.preprocess_data(df) # Training & Trading data split train = data_split(processed, config.START_DATE, config.START_TRADE_DATE) trade = data_split(processed, config.START_TRADE_DATE, config.END_DATE) # calculate state action space stock_dimension = len(train.tic.unique()) state_space = (1 + 2 * stock_dimension + len(config.TECHNICAL_INDICATORS_LIST) * stock_dimension) env_kwargs = { "hmax": 100, "initial_amount": 1000000, "buy_cost_pct": 0.001, "sell_cost_pct": 0.001, "state_space": state_space, "stock_dim": stock_dimension, "tech_indicator_list": config.TECHNICAL_INDICATORS_LIST, "action_space": stock_dimension, "reward_scaling": 1e-4 } e_train_gym = StockTradingEnv(df=train, **env_kwargs) e_trade_gym = StockTradingEnv(df=trade, turbulence_threshold=250, **env_kwargs) env_train, _ = e_train_gym.get_sb_env() env_trade, obs_trade = e_trade_gym.get_sb_env() agent = DRLAgent(env=env_train) print("==============Model Training===========") now = datetime.datetime.now().strftime("%Y%m%d-%Hh%M") model_sac = agent.get_model("sac") trained_sac = agent.train_model(model=model_sac, tb_log_name="sac", total_timesteps=80000) print("==============Start Trading===========") df_account_value, df_actions = DRLAgent.DRL_prediction(model=trained_sac, test_data=trade, test_env=env_trade, test_obs=obs_trade) df_account_value.to_csv("./" + config.RESULTS_DIR + "/df_account_value_" + now + ".csv") df_actions.to_csv("./" + config.RESULTS_DIR + "/df_actions_" + now + ".csv") print("==============Get Backtest Results===========") perf_stats_all = BackTestStats(df_account_value) perf_stats_all = pd.DataFrame(perf_stats_all) perf_stats_all.to_csv("./" + config.RESULTS_DIR + "/perf_stats_all_" + now + ".csv")