def setUp(self) -> None: self.temp_dir = tempfile.TemporaryDirectory() episodes = 80 seeds = [0, 1, 3, 4, 5] experiment_name = "test_env" logger = Logger( output_path=Path(self.temp_dir.name), experiment_name=experiment_name, step_write_frequency=None, episode_write_frequency=None, ) benchmark = SigmoidBenchmark() env = benchmark.get_benchmark() agent = RandomAgent(env) logger.set_env(env) env_logger = logger.add_module(env) for seed in seeds: env.seed(seed) logger.set_additional_info(seed=seed) logger.reset_episode() for episode in range(episodes): state = env.reset() done = False reward = 0 step = 0 while not done: action = agent.act(state, reward) env_logger.log( "logged_step", step, ) env_logger.log( "logged_episode", episode, ) next_state, reward, done, _ = env.step(action) env_logger.log( "reward", reward, ) env_logger.log( "done", done, ) agent.train(next_state, reward) state = next_state logger.next_step() step += 1 agent.end_episode(state, reward) logger.next_episode() env.close() logger.close() self.log_file = env_logger.log_file.name
def train_chainer( agent, env, num_episodes=10, flatten_state=False, logger: Logger = None ): for i in range(num_episodes): state = env.reset() if flatten_state: state = np.array(flatten([state[k] for k in state.keys()])) state = state.astype(np.float32) done = False r = 0 reward = 0 while not done: action = agent.act_and_train(state, reward) next_state, reward, done, _ = env.step(action) r += reward if flatten_state: state = np.array(flatten([next_state[k] for k in next_state.keys()])) state = state.astype(np.float32) else: state = next_state if logger is not None: logger.next_step() agent.stop_episode_and_train(state, reward, done=done) if logger is not None: logger.next_episode() print( f"Episode {i}/{num_episodes}...........................................Reward: {r}" )
# Make chainer agent obs_size = env.observation_space.low.size action_size = env.action_space.low.size agent = make_chainer_a3c(obs_size, action_size) # Training num_episodes = 3 for i in range(num_episodes): # Reset environment to begin episode state = env.reset() # Initialize episode done = False r = 0 reward = 0 while not done: # Select action action = agent.act_and_train(state, reward) # Execute action next_state, reward, done, _ = env.step(action) r += reward logger.next_step() state = next_state logger.next_episode() # Train agent after episode has ended agent.stop_episode_and_train(state, reward, done=done) # Log episode print( f"Episode {i+1}/{num_episodes}...........................................Reward: {r}" )