Beispiel #1
0
        # Stepsize of SGD.
        "lr": 0.0005,
        # PPO clip parameter.
        "clip_param": 0.03,
        # Clip param for the value function. Note that this is sensitive to the
        # scale of the rewards. If your expected V is large, increase this.
        "vf_clip_param": 10.0,
        # If specified, clip the global norm of gradients by this amount.
        "grad_clip": None,
        # Target value for KL divergence.
        "kl_target": 0.001,
    }

    tune.run(
        run_or_experiment=trainer_class,
        name=experiment_name,
        metric="br_reward_mean",
        config=hyperparams,
        num_samples=2,
        search_alg=None,
        mode="max",
        local_dir=data_dir(),
        stop={"timesteps_total": int(3e6)},
        loggers=[
            get_trainer_logger_creator(base_dir=data_dir(),
                                       scenario_name=experiment_name,
                                       should_log_result_fn=lambda result:
                                       result["training_iteration"] % 20 == 0)
        ],
    )
        # if async_updates is set, then each worker returns gradients for a
        # batch of this size.
        "train_batch_size": choice([4096, 2048, 1024]),

        # Whether to compute priorities on workers.
        "worker_side_prioritization": False,
        # Prevent iterations from going lower than this time span
        "min_iter_time_s": 0,
        # Minimum env steps to optimize for per train call. This value does
        # not affect learning (JB: this is a lie!), only the length of train iterations.
        "timesteps_per_iteration": 0,
    }

    search = HyperOptSearch(metric="br_reward_mean", mode="max", n_initial_points=20)

    tune.run(run_or_experiment=trainer_class,
             name=experiment_name,
             metric="br_reward_mean",

             config=hyperparams,
             num_samples=200000000,
             search_alg=search,
             mode="max",
             local_dir=data_dir(),
             stop={"timesteps_total": int(3e6)},
             loggers=[get_trainer_logger_creator(
                 base_dir=data_dir(),
                 scenario_name=experiment_name,
                 should_log_result_fn=lambda result: result["training_iteration"] % 20 == 0)],
             )