Esempio n. 1
0
def main(_):
    environment_factory = lp_utils.partial_kwargs(helpers.make_environment,
                                                  domain_name=FLAGS.domain,
                                                  task_name=FLAGS.task)

    batch_size = 32
    sequence_length = 20
    gradient_steps_per_actor_step = 1.0
    samples_per_insert = (gradient_steps_per_actor_step * batch_size *
                          sequence_length)
    num_actors = 1

    program = svg0_prior.DistributedSVG0(
        environment_factory=environment_factory,
        network_factory=lp_utils.partial_kwargs(
            svg0_prior.make_default_networks),
        batch_size=batch_size,
        sequence_length=sequence_length,
        samples_per_insert=samples_per_insert,
        entropy_regularizer_cost=1e-4,
        max_replay_size=int(2e6),
        target_update_period=250,
        num_actors=num_actors).build()

    lp.launch(program, xm_resources=lp_utils.make_xm_docker_resources(program))
Esempio n. 2
0
def main(_):
    environment_factory = lp_utils.partial_kwargs(helpers.make_environment,
                                                  task=FLAGS.task)

    program = d4pg.DistributedD4PG(environment_factory=environment_factory,
                                   network_factory=lp_utils.partial_kwargs(
                                       helpers.make_networks),
                                   num_actors=2).build()

    lp.launch(program, xm_resources=lp_utils.make_xm_docker_resources(program))
Esempio n. 3
0
def main(_):
  config = build_experiment_config()
  # Evaluation is disabled for performance reasons. Set `num_eval_episodes` to
  # a positive number and remove `evaluator_factories=[]` to enable it.
  if FLAGS.run_distributed:
    program = experiments.make_distributed_experiment(
        experiment=config, num_actors=4)
    lp.launch(program, xm_resources=lp_utils.make_xm_docker_resources(program))
  else:
    experiments.run_experiment(experiment=config, num_eval_episodes=0)
Esempio n. 4
0
def main(_):
  config = build_experiment_config()
  if FLAGS.run_distributed:
    program = experiments.make_distributed_experiment(
        experiment=config, num_actors=4)
    lp.launch(program, xm_resources=lp_utils.make_xm_docker_resources(program))
  else:
    experiments.run_experiment(
        experiment=config,
        eval_every=FLAGS.eval_every,
        num_eval_episodes=FLAGS.evaluation_episodes)
Esempio n. 5
0
def main(_):
    task = FLAGS.task
    environment_factory = lambda seed: helpers.make_environment(task)
    program = sac.DistributedSAC(
        environment_factory=environment_factory,
        network_factory=sac.make_networks,
        config=sac.SACConfig(**{'num_sgd_steps_per_step': 64}),
        num_actors=4,
        seed=1,
        max_number_of_steps=100).build()

    # Launch experiment.
    lp.launch(program, xm_resources=lp_utils.make_xm_docker_resources(program))
Esempio n. 6
0
def main(_):
    task = FLAGS.task
    environment_factory = lambda seed: helpers.make_environment(task)
    sac_config = sac.SACConfig(num_sgd_steps_per_step=64)
    sac_builder = sac.SACBuilder(sac_config)

    ail_config = ail.AILConfig(direct_rl_batch_size=sac_config.batch_size *
                               sac_config.num_sgd_steps_per_step)

    def network_factory(spec: specs.EnvironmentSpec) -> ail.AILNetworks:
        def discriminator(*args, **kwargs) -> networks_lib.Logits:
            return ail.DiscriminatorModule(environment_spec=spec,
                                           use_action=True,
                                           use_next_obs=True,
                                           network_core=ail.DiscriminatorMLP(
                                               [4, 4], ))(*args, **kwargs)

        discriminator_transformed = hk.without_apply_rng(
            hk.transform_with_state(discriminator))

        return ail.AILNetworks(ail.make_discriminator(
            spec, discriminator_transformed),
                               imitation_reward_fn=ail.rewards.gail_reward(),
                               direct_rl_networks=sac.make_networks(spec))

    def policy_network(
            network: ail.AILNetworks,
            eval_mode: bool = False) -> actor_core_lib.FeedForwardPolicy:
        return sac.apply_policy_and_sample(network.direct_rl_networks,
                                           eval_mode=eval_mode)

    program = ail.DistributedAIL(
        environment_factory=environment_factory,
        rl_agent=sac_builder,
        config=ail_config,
        network_factory=network_factory,
        seed=0,
        batch_size=sac_config.batch_size * sac_config.num_sgd_steps_per_step,
        make_demonstrations=functools.partial(
            helpers.make_demonstration_iterator,
            dataset_name=FLAGS.dataset_name),
        policy_network=policy_network,
        evaluator_policy_network=(lambda n: policy_network(n, eval_mode=True)),
        num_actors=4,
        max_number_of_steps=100,
        discriminator_loss=ail.losses.gail_loss()).build()

    # Launch experiment.
    lp.launch(program, xm_resources=lp_utils.make_xm_docker_resources(program))
Esempio n. 7
0
def main(_):
  task = FLAGS.task
  env_factory = lambda seed: helpers.make_environment(task)

  environment_spec = specs.make_environment_spec(env_factory(True))
  program = td3.DistributedTD3(
      environment_factory=env_factory,
      environment_spec=environment_spec,
      network_factory=td3.make_networks,
      config=td3.TD3Config(),
      num_actors=4,
      seed=1,
      max_number_of_steps=100).build()

  lp.launch(program, xm_resources=lp_utils.make_xm_docker_resources(program))
Esempio n. 8
0
def main(_):
    task = FLAGS.task
    environment_factory = lambda seed: helpers.make_environment(task)
    config = ppo.PPOConfig(unroll_length=16,
                           num_minibatches=32,
                           num_epochs=10,
                           batch_size=2048 // 16)
    program = ppo.DistributedPPO(environment_factory=environment_factory,
                                 network_factory=ppo.make_continuous_networks,
                                 config=config,
                                 seed=FLAGS.seed,
                                 num_actors=4,
                                 max_number_of_steps=100).build()

    # Launch experiment.
    lp.launch(program, xm_resources=lp_utils.make_xm_docker_resources(program))
Esempio n. 9
0
def main(_):
    task = FLAGS.env_name
    environment_factory = lambda seed: helpers.make_environment(task)
    config = value_dice.ValueDiceConfig(num_sgd_steps_per_step=64)
    agent = value_dice.DistributedValueDice(
        environment_factory=environment_factory,
        network_factory=value_dice.make_networks,
        config=config,
        num_actors=4,
        log_to_bigtable=True,
        max_number_of_steps=100,
        seed=1,
        make_demonstrations=functools.partial(
            helpers.make_demonstration_iterator,
            dataset_name=FLAGS.dataset_name))
    program = agent.build()

    # Launch experiment.
    lp.launch(program, xm_resources=lp_utils.make_xm_docker_resources(program))