def init_agent(self): observation_space, action_space = utils.get_gym_spaces( self.runner.make_env) agent = BaseDDPGAgent(observation_space, action_space, actor_lr=self.hparams.actor_lr, critic_lr=self.hparams.critic_lr, gamma=self.hparams.gamma, tau=self.hparams.tau) return agent
def init_agent(self): observation_space, action_space = utils.get_gym_spaces( self.runner.make_env) agent = BaseA2CAgent(observation_space, action_space, lr=self.hparams.actor_lr, gamma=self.hparams.gamma, lmbda=self.hparams.lmbda, alpha=self.hparams.alpha, beta=self.hparams.beta) return agent
def init_agent(self): observation_space, action_space = utils.get_gym_spaces(self.runner.make_env) agent = BaseDQNAgent( observation_space, action_space, double_dqn=self.hparams.double_dqn, lr=self.hparams.actor_lr, gamma=self.hparams.gamma, num_eps_steps=self.hparams.num_eps_steps, target_update_interval=self.hparams.target_update_interval) return agent
def test_gym_runner(env_id: str): runner = GymRunner(env_id) observation_space, action_space = get_gym_spaces(runner.make_env) agent = GymRandomAgent(observation_space, action_space) trajectory_list = runner.rollout(agent) runner.close() assert len(trajectory_list) == runner.n_envs assert trajectory_list[0].obs.ndim == 2 assert trajectory_list[0].action.ndim == 2 assert trajectory_list[0].reward.ndim == 2 assert trajectory_list[0].next_obs.ndim == 2 assert trajectory_list[0].done.ndim == 2
def test_gym_runner(env_id: str): runner = GymRunner(env_id) agent = GymRandomAgent(*get_gym_spaces(runner.make_env)) runner.rollout(agent) runner.close()
def init_agent(self): observation_space, action_space = utils.get_gym_spaces( self.runner.make_env) return GymRandomAgent(observation_space, action_space)