Beispiel #1
0
def aug_opt(
        name,
        train_spec,
        scheduler,
        inner_config_dir,
        resume=None,
        fresh=False,
        search_alg=None,
):
    from ray.tune import run
    from ray.tune import Experiment
    train_spec['config']['config_dir'] = inner_config_dir
    if resume is None:
        resume = not fresh

    experiment = Experiment.from_json(name=name, spec=train_spec)
    if fresh and os.path.exists(experiment.local_dir):
        import shutil
        shutil.rmtree(experiment.local_dir)

    run(experiment,
        name=name,
        scheduler=scheduler,
        reuse_actors=True,
        verbose=True,
        resume=resume,
        search_alg=search_alg,
        trial_name_creator=tune.function(trial_str_creator))
def run_experiment(args, parser):
    # args.ray_object_store_memory = int(1e10)
    args.ray_redis_max_memory = int(2e9)

    if args.config_file:
        with open(args.config_file) as f:
            exp = yaml.load(f)
    else:
        raise Exception('No config file!')

    exp = merge_dicts(exp, args.config)
    log.info('Num workers: %d, num_envs_per_worker: %d',
             exp['config']['num_workers'],
             exp['config']['num_envs_per_worker'])

    if args.cfg_mixins is not None:
        for cfg_mixin_file in args.cfg_mixins:
            with open(cfg_mixin_file, 'r') as f:
                override_cfg = yaml.load(f)
                log.info('Overriding parameters from %s: %r', cfg_mixin_file,
                         override_cfg)
                exp = merge_dicts(exp, override_cfg)

    if not exp.get("run"):
        parser.error("the following arguments are required: --run")
    if not exp.get("env") and not exp.get("config", {}).get("env"):
        parser.error("the following arguments are required: --env")

    if args.ray_num_nodes:
        cluster = Cluster()
        for _ in range(args.ray_num_nodes):
            cluster.add_node(
                num_cpus=args.ray_num_cpus or 1,
                num_gpus=args.ray_num_gpus or 0,
                object_store_memory=args.ray_object_store_memory,
                redis_max_memory=args.ray_redis_max_memory,
            )
        ray.init(redis_address=cluster.redis_address,
                 local_mode=args.local_mode)
    else:
        ray.init(
            redis_address=args.redis_address,
            object_store_memory=args.ray_object_store_memory,
            redis_max_memory=args.ray_redis_max_memory,
            num_cpus=args.ray_num_cpus,
            num_gpus=args.ray_num_gpus,
            local_mode=args.local_mode,
        )

    exp = Experiment.from_json(args.experiment_name, exp)
    exp.spec['checkpoint_freq'] = 20
    if args.pbt:
        exp.spec['checkpoint_freq'] = 3

    exp.spec['checkpoint_at_end'] = True
    # exp.spec['checkpoint_score_attr'] = 'episode_reward_mean'
    exp.spec['keep_checkpoints_num'] = 5

    if args.stop_seconds > 0:
        exp.spec['stop'] = {'time_total_s': args.stop_seconds}

    # if 'multiagent' in exp.spec['config']:
    #     # noinspection PyProtectedMember
    #     make_env = ray.tune.registry._global_registry.get(ENV_CREATOR, exp.spec['config']['env'])
    #     temp_env = make_env(None)
    #     obs_space, action_space = temp_env.observation_space, temp_env.action_space
    #     temp_env.close()
    #     del temp_env
    #
    #     policies = dict(
    #         main=(None, obs_space, action_space, {}),
    #         dummy=(None, obs_space, action_space, {}),
    #     )
    #
    #     exp.spec['config']['multiagent'] = {
    #         'policies': policies,
    #         'policy_mapping_fn': function(lambda agent_id: 'main'),
    #         'policies_to_train': ['main'],
    #     }
    #
    # if args.dbg:
    #     exp.spec['config']['num_workers'] = 1
    #     exp.spec['config']['num_gpus'] = 1
    #     exp.spec['config']['num_envs_per_worker'] = 1
    #
    # if 'callbacks' not in exp.spec['config']:
    #     exp.spec['config']['callbacks'] = {}
    #
    # fps_helper = FpsHelper()
    #
    # def on_train_result(info):
    #     if 'APPO' in exp.spec['run']:
    #         samples = info['result']['info']['num_steps_sampled']
    #     else:
    #         samples = info['trainer'].optimizer.num_steps_trained
    #
    #     fps_helper.record(samples)
    #     fps = fps_helper.get_fps()
    #     info['result']['custom_metrics']['fps'] = fps
    #
    #     # remove this as currently
    #     skip_frames = exp.spec['config']['env_config']['skip_frames']
    #     info['result']['custom_metrics']['fps_frameskip'] = fps * skip_frames
    #
    # exp.spec['config']['callbacks']['on_train_result'] = function(on_train_result)
    #
    # def on_episode_end(info):
    #     episode = info['episode']
    #     stats = {
    #         'DEATHCOUNT': 0,
    #         'FRAGCOUNT': 0,
    #         'HITCOUNT': 0,
    #         'DAMAGECOUNT': 0,
    #         'KDR': 0,
    #         'FINAL_PLACE': 0,
    #         'LEADER_GAP': 0,
    #         'PLAYER_COUNT': 0,
    #         'BOT_DIFFICULTY': 0,
    #     }
    #
    #     # noinspection PyProtectedMember
    #     agent_to_last_info = episode._agent_to_last_info
    #     for agent in agent_to_last_info.keys():
    #         agent_info = agent_to_last_info[agent]
    #         for stats_key in stats.keys():
    #             stats[stats_key] += agent_info.get(stats_key, 0.0)
    #
    #     for stats_key in stats.keys():
    #         stats[stats_key] /= len(agent_to_last_info.keys())
    #
    #     episode.custom_metrics.update(stats)
    #
    # exp.spec['config']['callbacks']['on_episode_end'] = function(on_episode_end)

    extra_kwargs = {}
    if args.pbt:
        extra_kwargs['reuse_actors'] = False

    run(exp,
        name=args.experiment_name,
        scheduler=make_custom_scheduler(args),
        resume=args.resume,
        queue_trials=args.queue_trials,
        **extra_kwargs)
Beispiel #3
0
        config["multiagent"] = {
            'policy_graphs': policy_graph,
            'policy_mapping_fn': tune.function(lambda agent_id: POLICY_ID),
            'policies_to_train': [POLICY_ID]
        }

        env_name_list.append(env_name)
        config_list.append(config)
        # Register as rllib env
        register_env(env_name, create_env)

    exp_list = []
    for config, env_name in zip(config_list, env_name_list):
        exp_tag = {
            "run": alg_run,
            "env": env_name,
            "config": {
                **config
            },
            "checkpoint_freq": 10,
            "max_failures": 999,
            "stop": {
                "training_iteration": 50
            },
            "num_samples": 6,
        }
        exp_list.append(Experiment.from_json(args.exp_tag, exp_tag))

    trials = run_experiments(experiments=exp_list)