def main(_): environment_factory = lp_utils.partial_kwargs(helpers.make_environment, task=FLAGS.task) program = d4pg.DistributedD4PG(environment_factory=environment_factory, network_factory=lp_utils.partial_kwargs( helpers.make_networks), num_actors=2).build() lp.launch(program, xm_resources=lp_utils.make_xm_docker_resources(program))
def main(_): environment_factory = lp_utils.partial_kwargs(helpers.make_environment, task=FLAGS.task) program = d4pg.DistributedD4PG(environment_factory=environment_factory, network_factory=lp_utils.partial_kwargs( helpers.make_networks), num_actors=2).build() lp.launch(program, lp.LaunchType.LOCAL_MULTI_PROCESSING)
def main(_): # Configure the environment factory with requested task. make_environment = functools.partial(helpers.make_environment, domain_name=_DOMAIN.value, task_name=_TASK.value) # Construct the program. program_builder = d4pg.DistributedD4PG( make_environment, make_networks, max_actor_steps=_MAX_ACTOR_STEPS.value, num_actors=4) # Launch experiment. lp.launch(programs=program_builder.build())
def test_control_suite(self): """Tests that the agent can run on the control suite without crashing.""" agent = d4pg.DistributedD4PG( environment_factory=lambda x: fakes.ContinuousEnvironment(bounded= True), network_factory=make_networks, num_actors=2, batch_size=32, min_replay_size=32, max_replay_size=1000, ) program = agent.build() (learner_node, ) = program.groups['learner'] learner_node.disable_run() lp.launch(program, launch_type='test_mt') learner: acme.Learner = learner_node.create_handle().dereference() for _ in range(5): learner.step()