예제 #1
0
def main(_):
    environment_factory = lp_utils.partial_kwargs(helpers.make_environment,
                                                  task=FLAGS.task)

    program = d4pg.DistributedD4PG(environment_factory=environment_factory,
                                   network_factory=lp_utils.partial_kwargs(
                                       helpers.make_networks),
                                   num_actors=2).build()

    lp.launch(program, xm_resources=lp_utils.make_xm_docker_resources(program))
예제 #2
0
def main(_):
    environment_factory = lp_utils.partial_kwargs(helpers.make_environment,
                                                  task=FLAGS.task)

    program = d4pg.DistributedD4PG(environment_factory=environment_factory,
                                   network_factory=lp_utils.partial_kwargs(
                                       helpers.make_networks),
                                   num_actors=2).build()

    lp.launch(program, lp.LaunchType.LOCAL_MULTI_PROCESSING)
예제 #3
0
def main(_):
    # Configure the environment factory with requested task.
    make_environment = functools.partial(helpers.make_environment,
                                         domain_name=_DOMAIN.value,
                                         task_name=_TASK.value)

    # Construct the program.
    program_builder = d4pg.DistributedD4PG(
        make_environment,
        make_networks,
        max_actor_steps=_MAX_ACTOR_STEPS.value,
        num_actors=4)

    # Launch experiment.
    lp.launch(programs=program_builder.build())
예제 #4
0
    def test_control_suite(self):
        """Tests that the agent can run on the control suite without crashing."""

        agent = d4pg.DistributedD4PG(
            environment_factory=lambda x: fakes.ContinuousEnvironment(bounded=
                                                                      True),
            network_factory=make_networks,
            num_actors=2,
            batch_size=32,
            min_replay_size=32,
            max_replay_size=1000,
        )
        program = agent.build()

        (learner_node, ) = program.groups['learner']
        learner_node.disable_run()

        lp.launch(program, launch_type='test_mt')

        learner: acme.Learner = learner_node.create_handle().dereference()

        for _ in range(5):
            learner.step()