def main(): try: shutil.rmtree('images') print("delete images directory") except OSError as e: print("Error: %s : %s" % ('images', e.strerror)) gym.logger.set_level(INFO) start_date = date(2019, 5, 1) simulate_company_list = [2, 3, 4, 5, 6, 44, 300, 67, 100, 200] # simulate_company_list = [3] env = gym.make("AsxGym-v0", start_date=start_date, simulate_company_list=simulate_company_list) stock_agent = RandomAgent(env) # stock_agent = RandomAgent(env, min_volume=100, max_volume=500) # stock_agent = BuyAndKeepAgent(env, 3) observation = env.reset() for _ in range(200000 * 24): env.render() company_count = len(env.simulate_company_list) observation, reward, done, info = env.step(stock_agent.action()) if done: env.insert_summary_images(30) observation = env.reset() stock_agent.reset() if observation is not None: asx_observation = AsxObservation(observation) print(asx_observation.to_json_obj()) print(info) env.close()
def run(env_name, agent_name, nb_episodes, render_freq, render_mode): logger.set_level(logger.INFO) env = gym.make(env_name) # You provide the directory to write to (can be an existing # directory, including one with existing data -- all monitor files # will be namespaced). You can also dump to a tempdir if you'd # like: tempfile.mkdtemp(). #outdir = '/tmp/random-agent-results' #video_callable = None if render_mode == 'human' else False #env = wrappers.Monitor(env, directory=outdir, force=True, video_callable=video_callable) #env = DynamicMonitor(env, directory=outdir, force=True, video_callable=video_callable) env.render(mode=render_mode) env.seed(0) if agent_name == 'RandomAgent': agent = RandomAgent(env.env.action_space) elif agent_name == 'EpsilonGreedyAgent': agent = EpsilonGreedy(env.env.action_space) elif agent_name == 'GradientBanditAgent': agent = GradientBandit(env.env.action_space) elif agent_name == 'ucb': agent = ucb(env.env.action_space) elif agent_name == 'ThompsonSampling': agent = ThompsonSampling(env.env.action_space) step = 0 reward = 0 done = False for episode in range(nb_episodes): print(f'--------- Episode {episode} ---------') ob = env.reset() agent = agent.reset() while True: step += 1 # action space may have change # agent = EpsilonGreedy(env.env.action_space) action = agent.act(ob, reward, done) ob, reward, done, _ = env.step(action) if done: break if step % render_freq == 0: env.render() # Note there's no env.render() here. But the environment still can open window and # render if asked by env.monitor: it calls env.render('rgb_array') to record video. # Video is not recorded every episode, see capped_cubic_video_schedule for details. # Close the env and write monitor result info to disk env.env.close()
# from agents.buy_and_keep_agent import BuyAndKeepAgenta from agents.random_agent import RandomAgent from asx_gym.envs import AsxObservation gym.logger.set_level(INFO) start_date = date(2019, 5, 1) simulate_company_list = [2, 3, 4, 5, 6, 44, 300, 67, 100, 200] # simulate_company_list = [3] env = gym.make("AsxGym-v0", start_date=start_date, simulate_company_list=simulate_company_list) stock_agent = RandomAgent(env) # stock_agent = RandomAgent(env, min_volume=100, max_volume=500) # stock_agent = BuyAndKeepAgent(env, 3) observation = env.reset() for _ in range(200000 * 24): env.render() company_count = len(env.simulate_company_list) observation, reward, done, info = env.step(stock_agent.action()) if done: env.insert_summary_images(30) observation = env.reset() stock_agent.reset() if observation is not None: asx_observation = AsxObservation(observation) print(asx_observation.to_json_obj()) print(info) env.close()