def instantiate_adapt_agent(self) -> agents.Agent: self.agent = agents.Agent( actor=fakes.Actor(self._spec), learner=FakeLearner(), min_observations=1, observations_per_step=1, ) return self.agent
def test_environment_loop(self): # Create the actor/environment and stick them in a loop. environment = fakes.DiscreteEnvironment(episode_length=10) actor = fakes.Actor(specs.make_environment_spec(environment)) loop = environment_loop.EnvironmentLoop(environment, actor) # Run the loop. There should be episode_length+1 update calls per episode. loop.run(num_episodes=10) self.assertEqual(actor.num_updates, 100)
def _parameterized_setup(discount_spec: Optional[types.NestedSpec] = None, reward_spec: Optional[types.NestedSpec] = None): """Common setup code that, unlike self.setUp, takes arguments. Args: discount_spec: None, or a (nested) specs.BoundedArray. reward_spec: None, or a (nested) specs.Array. Returns: environment, actor, loop """ env_kwargs = {'episode_length': EPISODE_LENGTH} if discount_spec: env_kwargs['discount_spec'] = discount_spec if reward_spec: env_kwargs['reward_spec'] = reward_spec environment = fakes.DiscreteEnvironment(**env_kwargs) actor = fakes.Actor(specs.make_environment_spec(environment)) loop = environment_loop.EnvironmentLoop(environment, actor) return actor, loop
def instantiate_test_actor(self) -> acme.Actor: self.test_actor = fakes.Actor(self._spec) return self.test_actor
def setUp(self): super().setUp() # Create the actor/environment and stick them in a loop. environment = fakes.DiscreteEnvironment(episode_length=EPISODE_LENGTH) self.actor = fakes.Actor(specs.make_environment_spec(environment)) self.loop = environment_loop.EnvironmentLoop(environment, self.actor)