Exemple #1
0
def run_training(
    sub_id: int, run_seed: int, run_options: Dict[str, Any], process_queue: Queue
) -> None:
    """
    Launches training session.
    :param process_queue: Queue used to send signal back to main.
    :param sub_id: Unique id for training session.
    :param run_seed: Random seed used for training.
    :param run_options: Command line arguments for training.
    """
    # Docker Parameters
    docker_target_name = (
        run_options["--docker-target-name"]
        if run_options["--docker-target-name"] != "None"
        else None
    )

    # General parameters
    env_path = run_options["--env"] if run_options["--env"] != "None" else None
    run_id = run_options["--run-id"]
    load_model = run_options["--load"]
    train_model = run_options["--train"]
    save_freq = int(run_options["--save-freq"])
    keep_checkpoints = int(run_options["--keep-checkpoints"])
    base_port = int(run_options["--base-port"])
    num_envs = int(run_options["--num-envs"])
    curriculum_folder = (
        run_options["--curriculum"] if run_options["--curriculum"] != "None" else None
    )
    lesson = int(run_options["--lesson"])
    fast_simulation = not bool(run_options["--slow"])
    no_graphics = run_options["--no-graphics"]
    multi_gpu = run_options["--multi-gpu"]
    trainer_config_path = run_options["<trainer-config-path>"]
    sampler_file_path = (
        run_options["--sampler"] if run_options["--sampler"] != "None" else None
    )

    # Recognize and use docker volume if one is passed as an argument
    if not docker_target_name:
        model_path = "./models/{run_id}-{sub_id}".format(run_id=run_id, sub_id=sub_id)
        summaries_dir = "./summaries"
    else:
        trainer_config_path = "/{docker_target_name}/{trainer_config_path}".format(
            docker_target_name=docker_target_name,
            trainer_config_path=trainer_config_path,
        )
        if curriculum_folder is not None:
            curriculum_folder = "/{docker_target_name}/{curriculum_folder}".format(
                docker_target_name=docker_target_name,
                curriculum_folder=curriculum_folder,
            )
        model_path = "/{docker_target_name}/models/{run_id}-{sub_id}".format(
            docker_target_name=docker_target_name, run_id=run_id, sub_id=sub_id
        )
        summaries_dir = "/{docker_target_name}/summaries".format(
            docker_target_name=docker_target_name
        )

    trainer_config = load_config(trainer_config_path)
    env_factory = create_environment_factory(
        env_path,
        docker_target_name,
        no_graphics,
        run_seed,
        base_port + (sub_id * num_envs),
    )
    env = SubprocessEnvManager(env_factory, num_envs)
    maybe_meta_curriculum = try_create_meta_curriculum(curriculum_folder, env, lesson)
    sampler_manager, resampling_interval = create_sampler_manager(
        sampler_file_path, env.reset_parameters, run_seed
    )

    trainers = initialize_trainers(
        trainer_config,
        env.external_brains,
        summaries_dir,
        run_id,
        model_path,
        keep_checkpoints,
        train_model,
        load_model,
        run_seed,
        maybe_meta_curriculum,
        multi_gpu,
    )

    # Create controller and begin training.
    tc = TrainerController(
        trainers,
        model_path,
        summaries_dir,
        run_id + "-" + str(sub_id),
        save_freq,
        maybe_meta_curriculum,
        train_model,
        run_seed,
        fast_simulation,
        sampler_manager,
        resampling_interval,
    )

    # Signal that environment has been launched.
    process_queue.put(True)

    # Begin training
    tc.start_learning(env)
def run_training(
    sub_id: int, run_seed: int, options: CommandLineOptions, process_queue: Queue
) -> None:
    """
    Launches training session.
    :param process_queue: Queue used to send signal back to main.
    :param sub_id: Unique id for training session.
    :param options: parsed command line arguments
    :param run_seed: Random seed used for training.
    :param run_options: Command line arguments for training.
    """
    # Docker Parameters

    trainer_config_path = options.trainer_config_path
    curriculum_folder = options.curriculum_folder

    # Recognize and use docker volume if one is passed as an argument
    if not options.docker_target_name:
        model_path = "./train/{run_id}-{sub_id}".format(
            run_id=options.run_id, sub_id=sub_id
        )
        summaries_dir = "./summaries"
    else:
        trainer_config_path = "/{docker_target_name}/{trainer_config_path}".format(
            docker_target_name=options.docker_target_name,
            trainer_config_path=trainer_config_path,
        )
        if curriculum_folder is not None:
            curriculum_folder = "/{docker_target_name}/{curriculum_folder}".format(
                docker_target_name=options.docker_target_name,
                curriculum_folder=curriculum_folder,
            )
        model_path = "/{docker_target_name}/models/{run_id}-{sub_id}".format(
            docker_target_name=options.docker_target_name,
            run_id=options.run_id,
            sub_id=sub_id,
        )
        summaries_dir = "/{docker_target_name}/summaries".format(
            docker_target_name=options.docker_target_name
        )

    trainer_config = load_config(trainer_config_path)
    env_factory = create_environment_factory(
        options.env_path,
        options.docker_target_name,
        options.no_graphics,
        run_seed,
        options.base_port + (sub_id * options.num_envs),
        options.env_args,
    )
    env = SubprocessEnvManager(env_factory, options.num_envs)
    maybe_meta_curriculum = try_create_meta_curriculum(
        curriculum_folder, env, options.lesson
    )
    sampler_manager, resampling_interval = create_sampler_manager(
        options.sampler_file_path, env.reset_parameters, run_seed
    )

    trainers = initialize_trainers(
        trainer_config,
        env.external_brains,
        summaries_dir,
        options.run_id,
        model_path,
        options.keep_checkpoints,
        options.train_model,
        options.load_model,
        run_seed,
        maybe_meta_curriculum,
        options.multi_gpu,
    )

    # Create controller and begin training.
    tc = TrainerController(
        trainers,
        model_path,
        summaries_dir,
        options.run_id + "-" + str(sub_id),
        options.save_freq,
        maybe_meta_curriculum,
        options.train_model,
        run_seed,
        options.fast_simulation,
        sampler_manager,
        resampling_interval,
    )

    # Signal that environment has been launched.
    process_queue.put(True)

    # Begin training
    tc.start_learning(env)
def run_training(
    sub_id: int, run_seed: int, run_options: Dict[str, Any], process_queue: Queue, inject_create_environment_factory: None
) -> None:
    """
    Launches training session.
    :param process_queue: Queue used to send signal back to main.
    :param sub_id: Unique id for training session.
    :param run_seed: Random seed used for training.
    :param run_options: Command line arguments for training.
    """
    # Docker Parameters
    docker_target_name = (
        run_options["--docker-target-name"]
        if run_options["--docker-target-name"] != "None"
        else None
    )

    # General parameters
    env_path = run_options["--env"] if run_options["--env"] != "None" else None
    run_id = run_options["--run-id"]
    load_model = run_options["--load"]
    train_model = run_options["--train"]
    save_freq = int(run_options["--save-freq"])
    keep_checkpoints = int(run_options["--keep-checkpoints"])
    base_port = int(run_options["--base-port"])
    num_envs = int(run_options["--num-envs"])
    curriculum_folder = (
        run_options["--curriculum"] if run_options["--curriculum"] != "None" else None
    )
    lesson = int(run_options["--lesson"])
    fast_simulation = not bool(run_options["--slow"])
    no_graphics = run_options["--no-graphics"]
    multi_gpu = run_options["--multi-gpu"]
    trainer_config_path = run_options["<trainer-config-path>"]
    sampler_file_path = (
        run_options["--sampler"] if run_options["--sampler"] != "None" else None
    )

    # Recognize and use docker volume if one is passed as an argument
    if not docker_target_name:
        model_path = "./train/{run_id}-{sub_id}".format(run_id=run_id, sub_id=sub_id)
        summaries_dir = "./summaries"
    else:
        trainer_config_path = "/{docker_target_name}/{trainer_config_path}".format(
            docker_target_name=docker_target_name,
            trainer_config_path=trainer_config_path,
        )
        if curriculum_folder is not None:
            curriculum_folder = "/{docker_target_name}/{curriculum_folder}".format(
                docker_target_name=docker_target_name,
                curriculum_folder=curriculum_folder,
            )
        model_path = "/{docker_target_name}/models/{run_id}-{sub_id}".format(
            docker_target_name=docker_target_name, run_id=run_id, sub_id=sub_id
        )
        summaries_dir = "/{docker_target_name}/summaries".format(
            docker_target_name=docker_target_name
        )

    trainer_config = load_config(trainer_config_path)
    if inject_create_environment_factory is None:
        env_factory = create_environment_factory(
            env_path,
            docker_target_name,
            no_graphics,
            run_seed,
            base_port + (sub_id * num_envs),
        )
    else:
        env_factory = inject_create_environment_factory(
            env_path,
            docker_target_name,
            no_graphics,
            run_seed,
            base_port + (sub_id * num_envs),
        )
    
    # HACK for debug use SimpleEnvManager
    if num_envs > 1:
        # create a mock env for parsin examples (kill internal)
        mock_env = env_factory(9999)
        # from minerl_to_mlagent_wrapper import MineRLToMLAgentWrapper
        # MineRLToMLAgentWrapper.set_wrappers_for_pretraining(mock_env.brain_names[0], mock_env)
        # close inner minerl enviroment
        try:
            for k, v in mock_env._envs.items():
                for e in v:
                        e.unwrapped.close()
        except AttributeError:
            mock_env.close()
        env = SubprocessEnvManager(env_factory, num_envs)
    else:
        env = env_factory(0)
        env = SimpleEnvManager(env)

    maybe_meta_curriculum = try_create_meta_curriculum(curriculum_folder, env, lesson)
    sampler_manager, resampling_interval = create_sampler_manager(
        sampler_file_path, env.reset_parameters, run_seed
    )

    trainers = initialize_trainers(
        trainer_config,
        env.external_brains,
        summaries_dir,
        run_id,
        model_path,
        keep_checkpoints,
        train_model,
        load_model,
        run_seed,
        maybe_meta_curriculum,
        multi_gpu,
    )

    # Create controller and begin training.
    tc = TrainerController(
        trainers,
        model_path,
        summaries_dir,
        run_id + "-" + str(sub_id),
        save_freq,
        maybe_meta_curriculum,
        train_model,
        run_seed,
        fast_simulation,
        sampler_manager,
        resampling_interval,
    )

    # Signal that environment has been launched.
    process_queue.put(True)

    # Begin training
    tc.start_learning(env)
Exemple #4
0
def _check_environment_trains(env):
    config = """
        default:
            trainer: ppo
            batch_size: 16
            beta: 5.0e-3
            buffer_size: 64
            epsilon: 0.2
            hidden_units: 128
            lambd: 0.95
            learning_rate: 5.0e-3
            max_steps: 2500
            memory_size: 256
            normalize: false
            num_epoch: 3
            num_layers: 2
            time_horizon: 64
            sequence_length: 64
            summary_freq: 500
            use_recurrent: false
            reward_signals:
                extrinsic:
                    strength: 1.0
                    gamma: 0.99
    """
    # Create controller and begin training.
    with tempfile.TemporaryDirectory() as dir:
        run_id = "id"
        save_freq = 99999
        seed = 1337

        trainer_config = yaml.safe_load(config)
        env_manager = SimpleEnvManager(env)
        trainers = initialize_trainers(
            trainer_config=trainer_config,
            external_brains=env_manager.external_brains,
            summaries_dir=dir,
            run_id=run_id,
            model_path=dir,
            keep_checkpoints=1,
            train_model=True,
            load_model=False,
            seed=seed,
            meta_curriculum=None,
            multi_gpu=False,
        )
        print(trainers)

        tc = TrainerController(
            trainers=trainers,
            summaries_dir=dir,
            model_path=dir,
            run_id=run_id,
            meta_curriculum=None,
            train=True,
            training_seed=seed,
            fast_simulation=True,
            sampler_manager=SamplerManager(None),
            resampling_interval=None,
            save_freq=save_freq,
        )

        # Begin training
        tc.start_learning(env_manager)
        print(tc._get_measure_vals())
        for brain_name, mean_reward in tc._get_measure_vals().items():
            assert not math.isnan(mean_reward)
            assert mean_reward > 0.99