def new_test(): processed = pd.read_csv( os.path.abspath('./me/datasets/new_data_with_techs_turb.csv'), index_col=0) train = data_split(processed, '2009-01-01', '2018-01-01') trade = data_split(processed, '2018-01-01', '2021-01-01') stock_dimension = len(train.tic.unique()) state_space = 1 + 2 * stock_dimension + len( config.TECHNICAL_INDICATORS_LIST) * stock_dimension print(f"Stock Dimension: {stock_dimension}, State Space: {state_space}") env_kwargs = { "hmax": 100, "initial_amount": 1000000, "transaction_cost_pct": 0.001, "state_space": state_space, "stock_dim": stock_dimension, "tech_indicator_list": config.TECHNICAL_INDICATORS_LIST, "action_space": stock_dimension, "reward_scaling": 1e-4 } e_train_gym = StockTradingEnv(df=train, **env_kwargs) env_train, _ = e_train_gym.get_sb_env() log_dir = "me/tmp/" os.makedirs(log_dir, exist_ok=True) env_train.envs[0] = Monitor(env_train.envs[0], log_dir) agent = DRLAgent(env=env_train) model_a2c = agent.get_model("a2c", verbose=0) trained_a2c = agent.train_model(model=model_a2c, tb_log_name='a2c', total_timesteps=100000) data_turbulence = processed[(processed.date < '2018-01-01') & (processed.date >= '2009-01-01')] insample_turbulence = data_turbulence.drop_duplicates(subset=['date']) turbulence_threshold = np.quantile(insample_turbulence.turbulence.values, 1) e_trade_gym = StockTradingEnv(df=trade, turbulence_threshold=380, **env_kwargs) env_trade, obs_trade = e_trade_gym.get_sb_env() print("BEGIN PREDICTION") df_account_value, df_actions = DRLAgent.DRL_prediction(model=trained_a2c, test_data=trade, test_env=env_trade, test_obs=obs_trade) print(df_account_value) print("END PREDICTION")
def train_one(): env_kwargs, processed = prepare_data() # Training & Trading data split train = data_split(processed, config.START_DATE, config.START_TRADE_DATE) trade = data_split(processed, config.START_TRADE_DATE, config.END_DATE) e_train_gym = StockTradingEnv(df=train, **env_kwargs) e_trade_gym = StockTradingEnv(df=trade, turbulence_threshold=250, **env_kwargs) env_train, _ = e_train_gym.get_sb_env() agent = DRLAgent(env=env_train) print("==============Model Training===========") model = agent.get_model(config.CURRENT_MODEL) model = agent.train_model(model=model, total_timesteps=80000) path = f"{config.TRAINED_MODEL_DIR}/model" model.save(path) model = model.load(path) print("==============Start Trading===========") df_account_value, df_actions = DRLAgent.DRL_prediction( model=model, environment=e_trade_gym) log_account(df_account_value, df_actions)
def main(): start_date = '2020-01-01' trade_start_date = '2020-12-01' end_date = '2021-01-01' ticker_list = stock_tickers numerical_df = YahooDownloader(start_date=start_date, end_date=end_date, ticker_list=ticker_list).fetch_data() sentiment_df = generate_sentiment_scores(start_date, end_date) initial_data = get_initial_data(numerical_df, sentiment_df) train_data = data_split(initial_data, start_date, trade_start_date) trade_data = data_split(initial_data, trade_start_date, end_date) indicator_list = config.TECHNICAL_INDICATORS_LIST + ['sentiment'] stock_dimension = len(trade_data.tic.unique()) state_space = 1 + 2 * stock_dimension + len( indicator_list) * stock_dimension env_kwargs = { "hmax": 100, "initial_amount": 1000000, "buy_cost_pct": 0.001, "sell_cost_pct": 0.001, "state_space": state_space, "stock_dim": stock_dimension, "tech_indicator_list": indicator_list, "action_space": stock_dimension, "reward_scaling": 1e-4, "print_verbosity": 5 } e_train_gym = StockTradingEnv(df=train_data, **env_kwargs) env_train, _ = e_train_gym.get_sb_env() # print(train_data.index) # print(trade_data.index) # print(trade_data.loc[0]) e_trade_gym = OnlineStockTradingEnv(trade_data.loc[0], **env_kwargs) training_agent = DRLAgent(env=env_train) model_a2c = training_agent.get_model("a2c") # print(train_data.index) # print(trade_data.index) #trained_a2c = agent.train_model(model=model_a2c, tb_log_name='a2c',total_timesteps=10000) feature_engineer = FeatureEngineer() online_stock_pred = OnlineStockPrediction(e_trade_gym, model_a2c) for i in range(1, trade_data.index.unique().max()): print(trade_data.loc[i]) online_stock_pred.add_data(trade_data.loc[i]) action, states, next_obs, rewards = online_stock_pred.predict() print("Action:", action) print("States: ", states) print("Next observation: ", next_obs) print("Rewards: ", rewards)
def DRL_prediction(model: PPO, environment: StockTradingEnv) -> object: test_env, test_obs = environment.get_sb_env() """make a prediction""" account_memory = [] actions_memory = [] test_env.reset() for i in range(len(environment.df.index.unique())): action, _ = model.predict(test_obs, deterministic=True) test_obs, rewards, dones, info = test_env.step(action) if i == (len(environment.df.index.unique()) - 2): account_memory = test_env.env_method( method_name="save_asset_memory") actions_memory = test_env.env_method( method_name="save_action_memory") if dones[0]: print("hit end!") break return account_memory[0], actions_memory[0]
"sell_cost_pct": 0.001, "state_space": state_space, "stock_dim": stock_dimension, "tech_indicator_list": config.TECHNICAL_INDICATORS_LIST, "action_space": stock_dimension, "reward_scaling": 1e-4 } e_train_gym = StockTradingEnv(df = train, **env_kwargs) # In[25]: env_train, _ = e_train_gym.get_sb_env() print(type(env_train)) # <a id='5'></a> # # Part 6: Implement DRL Algorithms # * The implementation of the DRL algorithms are based on **OpenAI Baselines** and **Stable Baselines**. Stable Baselines is a fork of OpenAI Baselines, with a major structural refactoring, and code cleanups. # * FinRL library includes fine-tuned standard DRL algorithms, such as DQN, DDPG, # Multi-Agent DDPG, PPO, SAC, A2C and TD3. We also allow users to # design their own DRL algorithms by adapting these DRL algorithms. # In[26]: agent = DRLAgent(env = env_train)
def main(): # Basic setup # Disable warnings warnings.filterwarnings('ignore') tech_indicator_list = config.TECHNICAL_INDICATORS_LIST # add following folders if not os.path.exists("./" + config.TRAINED_MODEL_DIR): os.makedirs("./" + config.TRAINED_MODEL_DIR) if not os.path.exists("./" + config.TENSORBOARD_LOG_DIR): os.makedirs("./" + config.TENSORBOARD_LOG_DIR) print() print("==============Load Training Data===========") path_training = "./" + config.DATA_SAVE_DIR + "/training.txt" with open(path_training, "rb") as f: dump = pickle.load(f) stock_dimension = len(dump.tic.unique()) state_space = 1 + 2 * stock_dimension + len( tech_indicator_list) * stock_dimension print(f"Stock Dimension: {stock_dimension}, State Space: {state_space}") env_kwargs = { "hmax": 100, "initial_amount": 1000000, "buy_cost_pct": 0.001, "sell_cost_pct": 0.001, "state_space": state_space, "stock_dim": stock_dimension, "tech_indicator_list": tech_indicator_list, "action_space": stock_dimension, "reward_scaling": 1e-4 } e_training_gym = StockTradingEnv(df=dump, **env_kwargs) print("Get Environment for Training") env_training, _ = e_training_gym.get_sb_env() print(type(env_training)) # Implement DRL Algorithms # # The implementation of the DRL algorithms are based on OpenAI Baselines and Stable # Baselines.Stable Baselines is a fork of OpenAI Baselines, # with a major structural refactoring, and code cleanups. # FinRL library includes fine - tuned standard DRL algorithms, such as DQN, DDPG, Multi - Agent # DDPG, PPO, SAC, A2C and TD3. We also allow users to design their own # DRL algorithms by adapting these DRL algorithms.Instead of installing # FinRL lib I have included the source code and created my own version. agent = Agent(env=env_training) print("======================================================") print("Please select which training you want me to perform.") print("1. A2C - Advalntage Actor-Critic algorithm") print("2. DDPG - Deep Deterministic Policy Gradient algorithm") print("3. PPO - Proximal Policy Optimization algorithm") print("4. TD3 - Twin Delayed Deep Deterministic Policy Gradient algorithm") print("5. SAC - Soft Actor-Critic algorithm") print("6. All Algorithms") print("7. Exit") print("-------------------------------------------------------") selection = int(input("Select what you want me to do: ")) if selection == 1: train_a2c(agent) elif selection == 2: train_ddpg(agent) elif selection == 3: train_ppo(agent) elif selection == 4: train_td3(agent) elif selection == 5: train_sac(agent) elif selection == 6: train_a2c(agent) train_ddpg(agent) train_ppo(agent) train_td3(agent) train_sac(agent) elif selection == 7: print("exit") else: print("Invalid option selected!")
print(f"Stock Dimension: {stock_dimension}, State Space: {state_space}") '''Set Env Parameters''' env_kwargs = { "hmax": 100, "initial_amount": 1000000, "buy_cost_pct": 0.001, "sell_cost_pct": 0.001, "state_space": state_space, "stock_dim": stock_dimension, "tech_indicator_list": config.TECHNICAL_INDICATORS_LIST, "action_space": stock_dimension, "reward_scaling": 1e-4 } '''Create Train Env''' e_train_gym = StockTradingEnv(df=train, **env_kwargs) env_train, _ = e_train_gym.get_sb_env() ############# BUILD AGENT ############# agent = DRLAgent(env=env_train) '''Set Agent Parameters''' A2C_params = { "n_steps": 2048, "ent_coef": 0.01, "learning_rate": 0.00025, } model_a2c = agent.get_model("a2c", model_kwargs=A2C_params) '''Train Agent''' trained_a2c = agent.train_model(model=model_a2c, tb_log_name='a2c',
def train_one(): """ train an agent """ print("==============Start Fetching Data===========") df = YahooDownloader( start_date=config.START_DATE, end_date=config.END_DATE, ticker_list=config.DOW_30_TICKER, ).fetch_data() print("==============Start Feature Engineering===========") fe = FeatureEngineer( use_technical_indicator=True, tech_indicator_list=config.TECHNICAL_INDICATORS_LIST, use_turbulence=True, user_defined_feature=False, ) processed = fe.preprocess_data(df) # Training & Trading data split train = data_split(processed, config.START_DATE, config.START_TRADE_DATE) trade = data_split(processed, config.START_TRADE_DATE, config.END_DATE) # calculate state action space stock_dimension = len(train.tic.unique()) state_space = (1 + 2 * stock_dimension + len(config.TECHNICAL_INDICATORS_LIST) * stock_dimension) env_kwargs = { "hmax": 100, "initial_amount": 1000000, "buy_cost_pct": 0.001, "sell_cost_pct": 0.001, "state_space": state_space, "stock_dim": stock_dimension, "tech_indicator_list": config.TECHNICAL_INDICATORS_LIST, "action_space": stock_dimension, "reward_scaling": 1e-4 } e_train_gym = StockTradingEnv(df=train, **env_kwargs) e_trade_gym = StockTradingEnv(df=trade, turbulence_threshold=250, **env_kwargs) env_train, _ = e_train_gym.get_sb_env() env_trade, obs_trade = e_trade_gym.get_sb_env() agent = DRLAgent(env=env_train) print("==============Model Training===========") now = datetime.datetime.now().strftime("%Y%m%d-%Hh%M") model_sac = agent.get_model("sac") trained_sac = agent.train_model(model=model_sac, tb_log_name="sac", total_timesteps=80000) print("==============Start Trading===========") df_account_value, df_actions = DRLAgent.DRL_prediction(model=trained_sac, test_data=trade, test_env=env_trade, test_obs=obs_trade) df_account_value.to_csv("./" + config.RESULTS_DIR + "/df_account_value_" + now + ".csv") df_actions.to_csv("./" + config.RESULTS_DIR + "/df_actions_" + now + ".csv") print("==============Get Backtest Results===========") perf_stats_all = BackTestStats(df_account_value) perf_stats_all = pd.DataFrame(perf_stats_all) perf_stats_all.to_csv("./" + config.RESULTS_DIR + "/perf_stats_all_" + now + ".csv")
def train_one(): """ train an agent """ print("==============Start Fetching Data===========") df = YahooDownloader( start_date=config.START_DATE, end_date=config.END_DATE, ticker_list=['FXAIX'], ).fetch_data() print("==============Start Feature Engineering===========") fe = FeatureEngineer( use_technical_indicator=True, tech_indicator_list=config.TECHNICAL_INDICATORS_LIST, use_turbulence=True, user_defined_feature=False, ) processed = fe.preprocess_data(df) # Training & Trading data split train = data_split(processed, config.START_DATE, config.START_TRADE_DATE) trade = data_split(processed, config.START_TRADE_DATE, config.END_DATE) # calculate state action space stock_dimension = len(train.tic.unique()) state_space = (1 + 2 * stock_dimension + len(config.TECHNICAL_INDICATORS_LIST) * stock_dimension) env_kwargs = { "hmax": 100, "initial_amount": 1000000, "buy_cost_pct": 0.001, "sell_cost_pct": 0.001, "state_space": state_space, "stock_dim": stock_dimension, "tech_indicator_list": config.TECHNICAL_INDICATORS_LIST, "action_space": stock_dimension, "reward_scaling": 1e-4 } e_train_gym = StockTradingEnv(df=train, **env_kwargs) e_trade_gym = StockTradingEnv(df=trade, turbulence_threshold=250, **env_kwargs) env_train, _ = e_train_gym.get_sb_env() env_trade, obs_trade = e_trade_gym.get_sb_env() agent = DRLAgent(env=env_train) print("==============Model Training===========") now = datetime.datetime.now().strftime("%Y%m%d-%Hh%M") user_input = input('train model? 1 train 0 don\'t train') if user_input == 1: model_sac = agent.get_model("sac") trained_sac = agent.train_model(model=model_sac, tb_log_name="sac", total_timesteps=8000) trained_sac.save("../models/sac_8k" + df.tic[0] + "_frl") else: trained_sac = SAC.load('../models/sac_80k_msft_working') print("==============Start Trading===========") df_account_value, df_actions = DRLAgent.DRL_prediction( trained_sac, e_trade_gym) df_account_value.to_csv("../" + config.RESULTS_DIR + "/SAC_df_account_value_" + df.tic[0] + "_" + now + ".csv") df_actions.to_csv("../" + config.RESULTS_DIR + "/SAC_df_actions_" + df.tic[0] + "_" + now + ".csv") # print("==============Get Backtest Results===========") perf_stats_all = backtest_stats(df_account_value) perf_stats_all = pd.DataFrame(perf_stats_all) perf_stats_all.to_csv("../" + config.RESULTS_DIR + "/SAC_perf_stats_all_" + df.tic[0] + "_" + now + ".csv") #plot acc value actions = df_actions['actions'] x = np.arange(0, df_account_value['account_value'].shape[0]) y = df_account_value['account_value'] points = np.array([x, y]).T.reshape(-1, 1, 2) segments = np.concatenate([points[:-1], points[1:]], axis=1) fig, axs = plt.subplots(2, 1, sharex=True, sharey=False) # plt.plot(x, y) # Use a boundary norm instead cmap = ListedColormap(['r', 'g', 'b']) norm = BoundaryNorm([-100, -0.1, 0.1, 100], cmap.N) lc = LineCollection(segments, cmap=cmap, norm=norm) lc.set_array(actions) lc.set_linewidth(2) line = axs[0].add_collection(lc) # fig.colorbar(line, ax=axs) axs[1].set_xlabel('Trading Day (' + 'From ' + config.START_TRADE_DATE + " to " + config.END_DATE + ')') axs[0].set_ylabel('Account Value (10000 of USD)') axs[0].set_title("Trading Test on " + df.tic[0]) axs[0].set_xlim(x.min(), x.max()) axs[0].set_ylim(y.min(), y.max()) custom_lines = [ Line2D([0], [0], color=cmap(0.), lw=4), Line2D([0], [0], color=cmap(.5), lw=4), Line2D([0], [0], color=cmap(1.), lw=4) ] # lines = ax.plot(data) axs[0].legend(custom_lines, ['Sell', 'Hold', 'Buy']) #plot stock value tx = np.arange(0, df_account_value['account_value'].shape[0]) ty = trade['close'] plt.ylabel('Price (USD)') plt.title(df.tic[0] + " Closing Price") plt.plot(tx, ty) plt.savefig("../" + config.RESULTS_DIR + "/plots/" "SAC_plot_" + df.tic[0] + "_" + now + ".png")
def train_one(): """ train an agent """ print("==============Start Fetching Data===========") df = YahooDownloader( start_date=config.START_DATE, end_date=config.END_DATE, ticker_list=config.DOW_30_TICKER, ).fetch_data() print("==============Start Feature Engineering===========") fe = FeatureEngineer( use_technical_indicator=True, tech_indicator_list=config.TECHNICAL_INDICATORS_LIST, use_turbulence=True, user_defined_feature=False, ) processed = fe.preprocess_data(df) list_ticker = processed["tic"].unique().tolist() list_date = list( pd.date_range(processed['date'].min(), processed['date'].max()).astype(str)) combination = list(itertools.product(list_date, list_ticker)) processed_full = pd.DataFrame(combination, columns=["date", "tic"]).merge(processed, on=["date", "tic"], how="left") processed_full = processed_full[processed_full['date'].isin( processed['date'])] processed_full = processed_full.sort_values(['date', 'tic']) processed_full = processed_full.fillna(0) # Training & Trading data split train = data_split(processed_full, config.START_DATE, config.START_TRADE_DATE) trade = data_split(processed_full, config.START_TRADE_DATE, config.END_DATE) # calculate state action space stock_dimension = len(train.tic.unique()) state_space = (1 + 2 * stock_dimension + len(config.TECHNICAL_INDICATORS_LIST) * stock_dimension) env_kwargs = { "hmax": 100, "initial_amount": 1000000, "buy_cost_pct": 0.001, "sell_cost_pct": 0.001, "state_space": state_space, "stock_dim": stock_dimension, "tech_indicator_list": config.TECHNICAL_INDICATORS_LIST, "action_space": stock_dimension, "reward_scaling": 1e-4 } e_train_gym = StockTradingEnv(df=train, **env_kwargs) env_train, _ = e_train_gym.get_sb_env() agent = DRLAgent(env=env_train) print("==============Model Training===========") now = datetime.datetime.now().strftime("%Y%m%d-%Hh%M") model_sac = agent.get_model("sac") trained_sac = agent.train_model(model=model_sac, tb_log_name="sac", total_timesteps=80000) print("==============Start Trading===========") e_trade_gym = StockTradingEnv(df=trade, turbulence_threshold=250, **env_kwargs) df_account_value, df_actions = DRLAgent.DRL_prediction( model=trained_sac, environment=e_trade_gym) df_account_value.to_csv("./" + config.RESULTS_DIR + "/df_account_value_" + now + ".csv") df_actions.to_csv("./" + config.RESULTS_DIR + "/df_actions_" + now + ".csv") print("==============Get Backtest Results===========") perf_stats_all = backtest_stats(df_account_value) perf_stats_all = pd.DataFrame(perf_stats_all) perf_stats_all.to_csv("./" + config.RESULTS_DIR + "/perf_stats_all_" + now + ".csv")
def train_one(fetch=False): """ train an agent """ if fetch: df = fetch_and_store() else: df = load() counts = df[['date', 'tic']].groupby(['date']).count().tic assert counts.min() == counts.max() print("==============Start Feature Engineering===========") fe = FeatureEngineer( use_technical_indicator=True, tech_indicator_list=config.TECHNICAL_INDICATORS_LIST, use_turbulence=True, # use_turbulence=False, user_defined_feature=False, ) processed = fe.preprocess_data(df) # Training & Trading data split start_date, trade_date, end_date = calculate_split(df, start=config.START_DATE) print(start_date, trade_date, end_date) train = data_split(processed, start_date, trade_date) trade = data_split(processed, trade_date, end_date) print( f'\n******\nRunning from {start_date} to {end_date} for:\n{", ".join(config.CRYPTO_TICKER)}\n******\n' ) # calculate state action space stock_dimension = len(train.tic.unique()) state_space = (1 + (2 * stock_dimension) + (len(config.TECHNICAL_INDICATORS_LIST) * stock_dimension)) env_kwargs = { "hmax": 100, "initial_amount": 100000, "buy_cost_pct": 0.0026, "sell_cost_pct": 0.0026, "state_space": state_space, "stock_dim": stock_dimension, "tech_indicator_list": config.TECHNICAL_INDICATORS_LIST, "action_space": stock_dimension, "reward_scaling": 1e-4 } e_train_gym = StockTradingEnv(df=train, **env_kwargs) e_trade_gym = StockTradingEnv(df=trade, turbulence_threshold=250, make_plots=True, **env_kwargs) env_train, _ = e_train_gym.get_sb_env() env_trade, obs_trade = e_trade_gym.get_sb_env() agent = DRLAgent(env=env_train) print("==============Model Training===========") now = datetime.datetime.now().strftime(config.DATETIME_FMT) model_sac = agent.get_model("sac") trained_sac = agent.train_model( model=model_sac, tb_log_name="sac", # total_timesteps=100 total_timesteps=80000) print("==============Start Trading===========") df_account_value, df_actions = DRLAgent.DRL_prediction( # model=trained_sac, test_data=trade, test_env=env_trade, test_obs=obs_trade trained_sac, e_trade_gym) df_account_value.to_csv( f"./{config.RESULTS_DIR}/df_account_value_{now}.csv") df_actions.to_csv(f"./{config.RESULTS_DIR}/df_actions_{now}.csv") df_txns = pd.DataFrame(e_trade_gym.transactions, columns=['date', 'amount', 'price', 'symbol']) df_txns = df_txns.set_index(pd.DatetimeIndex(df_txns['date'], tz=pytz.utc)) df_txns.to_csv(f'./{config.RESULTS_DIR}/df_txns_{now}.csv') df_positions = pd.DataFrame(e_trade_gym.positions, columns=['date', 'cash'] + config.CRYPTO_TICKER) df_positions = df_positions.set_index( pd.DatetimeIndex(df_positions['date'], tz=pytz.utc)).drop(columns=['date']) df_positions['cash'] = df_positions.astype( {col: np.float64 for col in df_positions.columns}) df_positions.to_csv(f'./{config.RESULTS_DIR}/df_positions_{now}.csv') print("==============Get Backtest Results===========") perf_stats_all = backtest_stats(df_account_value, transactions=df_txns, positions=df_positions) perf_stats_all = pd.DataFrame(perf_stats_all) perf_stats_all.to_csv(f"./{config.RESULTS_DIR}/perf_stats_all_{now}.csv") backtest_plot(df_account_value, baseline_start=trade_date, baseline_end=end_date, positions=df_positions, transactions=df_txns)
def generate_data(rollouts, data_dir, noise_type): # pylint: disable=R0914 """ Generates data """ assert exists(data_dir), "The data directory does not exist..." df = YahooDownloader(start_date = '2009-01-01', end_date = '2021-01-01', ticker_list = ['AAPL']).fetch_data() df.sort_values(['date','tic'],ignore_index=True) fe = FeatureEngineer( use_technical_indicator=True, tech_indicator_list = config.TECHNICAL_INDICATORS_LIST, use_turbulence=True, user_defined_feature = False) processed = fe.preprocess_data(df) list_ticker = processed["tic"].unique().tolist() list_date = list(pd.date_range(processed['date'].min(),processed['date'].max()).astype(str)) combination = list(itertools.product(list_date,list_ticker)) processed_full = pd.DataFrame(combination,columns=["date","tic"]).merge(processed,on=["date","tic"],how="left") processed_full = processed_full[processed_full['date'].isin(processed['date'])] processed_full = processed_full.sort_values(['date','tic']) processed_full = processed_full.fillna(0) processed_full.sort_values(['date','tic'],ignore_index=True) train = data_split(processed_full, '2009-01-01','2019-01-01') trade = data_split(processed_full, '2019-01-01','2021-01-01') stock_dimension = len(train.tic.unique()) state_space = 1 + 2*stock_dimension + len(config.TECHNICAL_INDICATORS_LIST)*stock_dimension env_kwargs = { "hmax": 100, "initial_amount": 1000000, # "buy_cost_pct": 0.001i, # "sell_cost_pct": 0.001, "transaction_cost_pct": 0.001, "state_space": state_space, "stock_dim": stock_dimension, "tech_indicator_list": config.TECHNICAL_INDICATORS_LIST, "action_space": stock_dimension, "reward_scaling": 1e-4 } e_train_gym = StockTradingEnv(df = train, **env_kwargs) env_train, _ = e_train_gym.get_sb_env() env = env_train # env = gym.make("CarRacing-v0") seq_len = 10000 for i in range(rollouts): env.reset() # env.env.viewer.window.dispatch_events() if noise_type == 'white': a_rollout = [env.action_space.sample() for _ in range(seq_len)] elif noise_type == 'brown': a_rollout = sample_continuous_policy(env.action_space, seq_len, 1. / 50) s_rollout = [] r_rollout = [] d_rollout = [] t = 0 while True: action = a_rollout[t] t += 1 s, r, done, _ = env.step(action) # env.env.viewer.window.dispatch_events() s_rollout += [s] r_rollout += [r] d_rollout += [done] if done: print("> End of rollout {}, {} frames...".format(i, len(s_rollout))) np.savez(join(data_dir, 'rollout_{}'.format(i)), observations=np.array(s_rollout), rewards=np.array(r_rollout), actions=np.array(a_rollout), terminals=np.array(d_rollout)) break