예제 #1
0
def _check_environment_trains(
    env,
    trainer_config,
    reward_processor=default_reward_processor,
    meta_curriculum=None,
    success_threshold=0.9,
    env_manager=None,
):
    # Create controller and begin training.
    with tempfile.TemporaryDirectory() as dir:
        run_id = "id"
        save_freq = 99999
        seed = 1337
        StatsReporter.writers.clear(
        )  # Clear StatsReporters so we don't write to file
        debug_writer = DebugWriter()
        StatsReporter.add_writer(debug_writer)
        if env_manager is None:
            env_manager = SimpleEnvManager(env, EnvironmentParametersChannel())
        trainer_factory = TrainerFactory(
            trainer_config=trainer_config,
            run_id=run_id,
            output_path=dir,
            train_model=True,
            load_model=False,
            seed=seed,
            meta_curriculum=meta_curriculum,
            multi_gpu=False,
        )

        tc = TrainerController(
            trainer_factory=trainer_factory,
            output_path=dir,
            run_id=run_id,
            meta_curriculum=meta_curriculum,
            train=True,
            training_seed=seed,
            sampler_manager=SamplerManager(None),
            resampling_interval=None,
            save_freq=save_freq,
        )

        # Begin training
        tc.start_learning(env_manager)
        if (success_threshold is not None
            ):  # For tests where we are just checking setup and not reward
            processed_rewards = [
                reward_processor(rewards)
                for rewards in env.final_rewards.values()
            ]
            assert all(not math.isnan(reward) for reward in processed_rewards)
            assert all(reward > success_threshold
                       for reward in processed_rewards)
예제 #2
0
파일: learn.py 프로젝트: hubz00/AvatarMaker
def run_training(sub_id: int, run_seed: int, run_options, dispatcher_pipe):
    """
    Launches training session.
    :param sub_id: Unique id for training session.
    :param run_seed: Random seed used for training.
    :param run_options: Command line arguments for training.
    :param dispatcher_pipe: Pipe to communicate with the dispatcher
    """
    logger = logging.getLogger("anha")

    # General parameters
    env_path = (run_options['--env']
                if run_options['--env'] != 'None' else None)
    run_id = run_options['--run-id']
    load_model = run_options['--load']
    train_model = run_options['--train']
    save_freq = int(run_options['--save-freq'])
    keep_checkpoints = int(run_options['--keep-checkpoints'])
    worker_id = int(run_options['--worker-id'])
    curriculum_folder = (run_options['--curriculum']
                         if run_options['--curriculum'] != 'None' else None)
    lesson = int(run_options['--lesson'])
    no_graphics = run_options['--no-graphics']
    trainer_config_path = run_options['<trainer-config-path>']

    model_path = './models/{run_id}'.format(run_id=run_id)
    summaries_dir = './summaries'

    trainer_config = load_config(trainer_config_path)
    env = init_environment(env_path, no_graphics, worker_id + sub_id, run_seed)

    logger.info("Initialised Environment [" + run_id + "]")

    maybe_meta_curriculum = try_create_meta_curriculum(curriculum_folder, env)

    external_brains = {}
    for brain_name in env.external_brain_names:
        external_brains[brain_name] = env.brains[brain_name]

    newRunId = run_id + '-' + str(sub_id)
    newModelPath = model_path + '-' + str(sub_id)
    # Create controller and begin training.
    tc = TrainerController(newModelPath, summaries_dir, newRunId,
                           save_freq, maybe_meta_curriculum,
                           load_model, train_model,
                           keep_checkpoints, lesson, external_brains, run_seed, dispatcher_pipe)

    # Signal that environment has been launched.
    dispatcher_pipe.send(True)

    # Begin training
    tc.start_learning(env, trainer_config)
def test_load_config(mock_communicator, mock_launcher, dummy_config):
    open_name = 'mlagents.trainers.trainer_controller' + '.open'
    with mock.patch('yaml.load') as mock_load:
        with mock.patch(open_name, create=True) as _:
            mock_load.return_value = dummy_config
            mock_communicator.return_value = MockCommunicator(
                discrete_action=True, visual_inputs=1)
            mock_load.return_value = dummy_config
            tc = TrainerController(' ', ' ', 1, None, True, True, False, 1, 1,
                                   1, 1, '', '', False)
            config = tc._load_config()
            assert (len(config) == 1)
            assert (config['default']['trainer'] == "ppo")
def basic_trainer_controller():
    trainer_factory_mock = MagicMock()
    trainer_factory_mock.ghost_controller = GhostController()
    return TrainerController(
        trainer_factory=trainer_factory_mock,
        output_path="test_model_path",
        run_id="test_run_id",
        meta_curriculum=None,
        train=True,
        training_seed=99,
    )
예제 #5
0
def basic_trainer_controller():
    trainer_factory_mock = MagicMock()
    trainer_factory_mock.ghost_controller = GhostController()
    return TrainerController(
        trainer_factory=trainer_factory_mock,
        output_path="test_model_path",
        run_id="test_run_id",
        param_manager=EnvironmentParameterManager(),
        train=True,
        training_seed=99,
    )
def run_training(sub_id, run_seed, run_options, process_queue):
    """
    Launches training session.
    :param process_queue: Queue used to send signal back to main.
    :param sub_id: Unique id for training session.
    :param run_seed: Random seed used for training.
    :param run_options: Command line arguments for training.
    """
    # Docker Parameters
    docker_target_name = (run_options['--docker-target-name']
                          if run_options['--docker-target-name'] != 'None' else
                          None)

    # General parameters
    env_path = (run_options['--env']
                if run_options['--env'] != 'None' else None)
    run_id = run_options['--run-id']
    load_model = run_options['--load']
    train_model = run_options['--train']
    save_freq = int(run_options['--save-freq'])
    keep_checkpoints = int(run_options['--keep-checkpoints'])
    worker_id = int(run_options['--worker-id'])
    curriculum_file = (run_options['--curriculum']
                       if run_options['--curriculum'] != 'None' else None)
    lesson = int(run_options['--lesson'])
    fast_simulation = not bool(run_options['--slow'])
    no_graphics = run_options['--no-graphics']
    trainer_config_path = run_options['<trainer-config-path>']

    # Create controller and launch environment.
    tc = TrainerController(env_path, run_id + '-' + str(sub_id), save_freq,
                           curriculum_file, fast_simulation, load_model,
                           train_model, worker_id + sub_id, keep_checkpoints,
                           lesson, run_seed, docker_target_name,
                           trainer_config_path, no_graphics)

    # Signal that environment has been launched.
    process_queue.put(True)

    # Begin training
    tc.start_learning()
예제 #7
0
def basic_trainer_controller(brain_info):
    return TrainerController(model_path='test_model_path',
                             summaries_dir='test_summaries_dir',
                             run_id='test_run_id',
                             save_freq=100,
                             meta_curriculum=None,
                             load=True,
                             train=True,
                             keep_checkpoints=False,
                             lesson=None,
                             external_brains={'testbrain': brain_info},
                             training_seed=99)
예제 #8
0
def basic_trainer_controller():
    return TrainerController(
        trainer_factory=None,
        model_path="test_model_path",
        summaries_dir="test_summaries_dir",
        run_id="test_run_id",
        save_freq=100,
        meta_curriculum=None,
        train=True,
        training_seed=99,
        sampler_manager=SamplerManager({}),
        resampling_interval=None,
    )
def test_initialize_offline_trainers(mock_communicator, mock_launcher,
                                     dummy_config, dummy_offline_bc_config,
                                     dummy_online_bc_config, dummy_bad_config):
    open_name = 'mlagents.trainers.trainer_controller' + '.open'
    with mock.patch('yaml.load') as mock_load:
        with mock.patch(open_name, create=True) as _:
            mock_communicator.return_value = MockCommunicator(
                discrete_action=False,
                stack=False,
                visual_inputs=0,
                brain_name="Ball3DBrain",
                vec_obs_size=8)
            tc = TrainerController(' ', ' ', 1, None, True, False, False, 1, 1,
                                   1, 1, '', "tests/test_mlagents.trainers.py",
                                   False)

            # Test for Offline Behavior Cloning Trainer
            mock_load.return_value = dummy_offline_bc_config
            config = tc._load_config()
            tf.reset_default_graph()
            tc._initialize_trainers(config)
            assert (isinstance(tc.trainers['Ball3DBrain'], OfflineBCTrainer))
예제 #10
0
def test_initialization_seed(numpy_random_seed, tensorflow_set_seed):
    seed = 27
    trainer_factory_mock = MagicMock()
    trainer_factory_mock.ghost_controller = GhostController()
    TrainerController(
        trainer_factory=trainer_factory_mock,
        output_path="",
        run_id="1",
        param_manager=None,
        train=True,
        training_seed=seed,
    )
    numpy_random_seed.assert_called_with(seed)
    tensorflow_set_seed.assert_called_with(seed)
def basic_trainer_controller():
    trainer_factory_mock = MagicMock()
    trainer_factory_mock.ghost_controller = GhostController()
    return TrainerController(
        trainer_factory=trainer_factory_mock,
        output_path="test_model_path",
        run_id="test_run_id",
        save_freq=100,
        meta_curriculum=None,
        train=True,
        training_seed=99,
        sampler_manager=SamplerManager({}),
        resampling_interval=None,
    )
예제 #12
0
def test_initialization_seed(numpy_random_seed, tensorflow_set_seed):
    seed = 27
    trainer_factory_mock = MagicMock()
    trainer_factory_mock.ghost_controller = GhostController()
    TrainerController(
        trainer_factory=trainer_factory_mock,
        output_path="",
        run_id="1",
        meta_curriculum=None,
        train=True,
        training_seed=seed,
        sampler_manager=SamplerManager({}),
        resampling_interval=None,
    )
    numpy_random_seed.assert_called_with(seed)
    tensorflow_set_seed.assert_called_with(seed)
예제 #13
0
def test_initialization_seed(numpy_random_seed, tensorflow_set_seed):
    seed = 27
    TrainerController(
        trainer_factory=None,
        model_path="",
        summaries_dir="",
        run_id="1",
        save_freq=1,
        meta_curriculum=None,
        train=True,
        training_seed=seed,
        sampler_manager=SamplerManager({}),
        resampling_interval=None,
    )
    numpy_random_seed.assert_called_with(seed)
    tensorflow_set_seed.assert_called_with(seed)
예제 #14
0
def _check_environment_trains(env,
                              config,
                              meta_curriculum=None,
                              success_threshold=0.99):
    # Create controller and begin training.
    with tempfile.TemporaryDirectory() as dir:
        run_id = "id"
        save_freq = 99999
        seed = 1337
        StatsReporter.writers.clear(
        )  # Clear StatsReporters so we don't write to file
        trainer_config = yaml.safe_load(config)
        env_manager = SimpleEnvManager(env, FloatPropertiesChannel())
        trainer_factory = TrainerFactory(
            trainer_config=trainer_config,
            summaries_dir=dir,
            run_id=run_id,
            model_path=dir,
            keep_checkpoints=1,
            train_model=True,
            load_model=False,
            seed=seed,
            meta_curriculum=meta_curriculum,
            multi_gpu=False,
        )

        tc = TrainerController(
            trainer_factory=trainer_factory,
            summaries_dir=dir,
            model_path=dir,
            run_id=run_id,
            meta_curriculum=meta_curriculum,
            train=True,
            training_seed=seed,
            sampler_manager=SamplerManager(None),
            resampling_interval=None,
            save_freq=save_freq,
        )

        # Begin training
        tc.start_learning(env_manager)
        print(tc._get_measure_vals())
        if (success_threshold is not None
            ):  # For tests where we are just checking setup and not reward
            for mean_reward in tc._get_measure_vals().values():
                assert not math.isnan(mean_reward)
                assert mean_reward > success_threshold
예제 #15
0
def _check_environment_trains(env, config):
    # Create controller and begin training.
    with tempfile.TemporaryDirectory() as dir:
        run_id = "id"
        save_freq = 99999
        seed = 1337

        trainer_config = yaml.safe_load(config)
        env_manager = SimpleEnvManager(env)
        trainer_factory = TrainerFactory(
            trainer_config=trainer_config,
            summaries_dir=dir,
            run_id=run_id,
            model_path=dir,
            keep_checkpoints=1,
            train_model=True,
            load_model=False,
            seed=seed,
            meta_curriculum=None,
            multi_gpu=False,
        )

        tc = TrainerController(
            trainer_factory=trainer_factory,
            summaries_dir=dir,
            model_path=dir,
            run_id=run_id,
            meta_curriculum=None,
            train=True,
            training_seed=seed,
            fast_simulation=True,
            sampler_manager=SamplerManager(None),
            resampling_interval=None,
            save_freq=save_freq,
        )

        # Begin training
        tc.start_learning(env_manager)
        print(tc._get_measure_vals())
        for brain_name, mean_reward in tc._get_measure_vals().items():
            assert not math.isnan(mean_reward)
            assert mean_reward > 0.99
예제 #16
0
def run_training(run_seed: int, options: RunOptions) -> None:
    """
    Launches training session.
    :param options: parsed command line arguments
    :param run_seed: Random seed used for training.
    :param run_options: Command line arguments for training.
    """
    with hierarchical_timer("run_training.setup"):
        model_path = f"./models/{options.run_id}"
        maybe_init_path = (
            f"./models/{options.initialize_from}" if options.initialize_from else None
        )
        summaries_dir = "./summaries"
        port = options.base_port

        # Configure CSV, Tensorboard Writers and StatsReporter
        # We assume reward and episode length are needed in the CSV.
        csv_writer = CSVWriter(
            summaries_dir,
            required_fields=[
                "Environment/Cumulative Reward",
                "Environment/Episode Length",
            ],
        )
        handle_existing_directories(
            model_path, summaries_dir, options.resume, options.force, maybe_init_path
        )
        tb_writer = TensorboardWriter(summaries_dir, clear_past_data=not options.resume)
        gauge_write = GaugeWriter()
        console_writer = ConsoleWriter()
        StatsReporter.add_writer(tb_writer)
        StatsReporter.add_writer(csv_writer)
        StatsReporter.add_writer(gauge_write)
        StatsReporter.add_writer(console_writer)

        if options.env_path is None:
            port = UnityEnvironment.DEFAULT_EDITOR_PORT
        env_factory = create_environment_factory(
            options.env_path, options.no_graphics, run_seed, port, options.env_args
        )
        engine_config = EngineConfig(
            width=options.width,
            height=options.height,
            quality_level=options.quality_level,
            time_scale=options.time_scale,
            target_frame_rate=options.target_frame_rate,
            capture_frame_rate=options.capture_frame_rate,
        )
        env_manager = SubprocessEnvManager(env_factory, engine_config, options.num_envs)
        maybe_meta_curriculum = try_create_meta_curriculum(
            options.curriculum_config, env_manager, options.lesson
        )
        sampler_manager, resampling_interval = create_sampler_manager(
            options.sampler_config, run_seed
        )
        trainer_factory = TrainerFactory(
            options.trainer_config,
            summaries_dir,
            options.run_id,
            model_path,
            options.keep_checkpoints,
            not options.inference,
            options.resume,
            run_seed,
            maybe_init_path,
            maybe_meta_curriculum,
            options.multi_gpu,
        )
        # Create controller and begin training.
        tc = TrainerController(
            trainer_factory,
            model_path,
            summaries_dir,
            options.run_id,
            options.save_freq,
            maybe_meta_curriculum,
            not options.inference,
            run_seed,
            sampler_manager,
            resampling_interval,
        )

    # Begin training
    try:
        tc.start_learning(env_manager)
    finally:
        env_manager.close()
        write_timing_tree(summaries_dir, options.run_id)
예제 #17
0
def run_training(sub_id: int, run_seed: int, run_options: Dict[str, Any],
                 process_queue: Queue) -> None:
    """
    Launches training session.
    :param process_queue: Queue used to send signal back to main.
    :param sub_id: Unique id for training session.
    :param run_seed: Random seed used for training.
    :param run_options: Command line arguments for training.
    """
    # Docker Parameters
    docker_target_name = (run_options["--docker-target-name"]
                          if run_options["--docker-target-name"] != "None" else
                          None)

    # General parameters
    env_path = run_options["--env"] if run_options["--env"] != "None" else None
    run_id = run_options["--run-id"]
    load_model = run_options["--load"]
    train_model = run_options["--train"]
    save_freq = int(run_options["--save-freq"])
    keep_checkpoints = int(run_options["--keep-checkpoints"])
    base_port = int(run_options["--base-port"])
    num_envs = int(run_options["--num-envs"])
    curriculum_folder = (run_options["--curriculum"]
                         if run_options["--curriculum"] != "None" else None)
    lesson = int(run_options["--lesson"])
    fast_simulation = not bool(run_options["--slow"])
    no_graphics = run_options["--no-graphics"]
    trainer_config_path = run_options["<trainer-config-path>"]
    sampler_file_path = (run_options["--sampler"]
                         if run_options["--sampler"] != "None" else None)

    # Recognize and use docker volume if one is passed as an argument
    if not docker_target_name:
        model_path = "./models/{run_id}-{sub_id}".format(run_id=run_id,
                                                         sub_id=sub_id)
        summaries_dir = "./summaries"
    else:
        trainer_config_path = "/{docker_target_name}/{trainer_config_path}".format(
            docker_target_name=docker_target_name,
            trainer_config_path=trainer_config_path,
        )
        if curriculum_folder is not None:
            curriculum_folder = "/{docker_target_name}/{curriculum_folder}".format(
                docker_target_name=docker_target_name,
                curriculum_folder=curriculum_folder,
            )
        model_path = "/{docker_target_name}/models/{run_id}-{sub_id}".format(
            docker_target_name=docker_target_name,
            run_id=run_id,
            sub_id=sub_id)
        summaries_dir = "/{docker_target_name}/summaries".format(
            docker_target_name=docker_target_name)

    trainer_config = load_config(trainer_config_path)
    env_factory = create_environment_factory(
        env_path,
        docker_target_name,
        no_graphics,
        run_seed,
        base_port + (sub_id * num_envs),
        list([str(x) for t in run_options.items()
              for x in t]),  # NOTE passes all arguments to Unity
    )
    env = SubprocessEnvManager(env_factory, num_envs)
    maybe_meta_curriculum = try_create_meta_curriculum(curriculum_folder, env)
    sampler_manager, resampling_interval = create_sampler_manager(
        sampler_file_path, env.reset_parameters)

    # Create controller and begin training.
    tc = TrainerController(
        model_path,
        summaries_dir,
        run_id + "-" + str(sub_id),
        save_freq,
        maybe_meta_curriculum,
        load_model,
        train_model,
        keep_checkpoints,
        lesson,
        run_seed,
        fast_simulation,
        sampler_manager,
        resampling_interval,
    )

    # Signal that environment has been launched.
    process_queue.put(True)

    # Begin training
    tc.start_learning(env, trainer_config)
예제 #18
0
def run_training(run_seed: int, options: RunOptions) -> None:
    """
    Launches training session.
    :param options: parsed command line arguments
    :param run_seed: Random seed used for training.
    :param run_options: Command line arguments for training.
    """

    options.checkpoint_settings.run_id = "test8"

    with hierarchical_timer("run_training.setup"):
        checkpoint_settings = options.checkpoint_settings
        env_settings = options.env_settings
        engine_settings = options.engine_settings
        base_path = "results"
        write_path = os.path.join(base_path, checkpoint_settings.run_id)
        maybe_init_path = (os.path.join(base_path,
                                        checkpoint_settings.initialize_from)
                           if checkpoint_settings.initialize_from else None)
        run_logs_dir = os.path.join(write_path, "run_logs")
        port: Optional[int] = env_settings.base_port
        # Check if directory exists
        handle_existing_directories(
            write_path,
            checkpoint_settings.resume,
            checkpoint_settings.force,
            maybe_init_path,
        )
        # Make run logs directory
        os.makedirs(run_logs_dir, exist_ok=True)
        # Load any needed states
        if checkpoint_settings.resume:
            GlobalTrainingStatus.load_state(
                os.path.join(run_logs_dir, "training_status.json"))
        # Configure CSV, Tensorboard Writers and StatsReporter
        # We assume reward and episode length are needed in the CSV.
        csv_writer = CSVWriter(
            write_path,
            required_fields=[
                "Environment/Cumulative Reward",
                "Environment/Episode Length",
            ],
        )
        tb_writer = TensorboardWriter(
            write_path, clear_past_data=not checkpoint_settings.resume)
        gauge_write = GaugeWriter()
        console_writer = ConsoleWriter()
        StatsReporter.add_writer(tb_writer)
        StatsReporter.add_writer(csv_writer)
        StatsReporter.add_writer(gauge_write)
        StatsReporter.add_writer(console_writer)

    engine_config = EngineConfig(
        width=engine_settings.width,
        height=engine_settings.height,
        quality_level=engine_settings.quality_level,
        time_scale=engine_settings.time_scale,
        target_frame_rate=engine_settings.target_frame_rate,
        capture_frame_rate=engine_settings.capture_frame_rate,
    )
    if env_settings.env_path is None:
        port = None
    # Begin training

    env_settings.env_path = "C:/Users/Sebastian/Desktop/RLUnity/Training/mFindTarget_new/RLProject.exe"
    env_factory = create_environment_factory(
        env_settings.env_path,
        engine_settings.no_graphics,
        run_seed,
        port,
        env_settings.env_args,
        os.path.abspath(
            run_logs_dir),  # Unity environment requires absolute path
    )
    env_manager = SubprocessEnvManager(env_factory, engine_config,
                                       env_settings.num_envs)

    maybe_meta_curriculum = try_create_meta_curriculum(
        options.curriculum, env_manager, restore=checkpoint_settings.resume)
    sampler_manager, resampling_interval = create_sampler_manager(
        options.parameter_randomization, run_seed)
    max_steps = options.behaviors['Brain'].max_steps
    options.behaviors['Brain'].max_steps = 10

    trainer_factory = TrainerFactory(options,
                                     write_path,
                                     not checkpoint_settings.inference,
                                     checkpoint_settings.resume,
                                     run_seed,
                                     maybe_init_path,
                                     maybe_meta_curriculum,
                                     False,
                                     total_steps=0)
    trainer_factory.trainer_config[
        'Brain'].hyperparameters.learning_rate_schedule = ScheduleType.CONSTANT

    # Create controller and begin training.
    tc = TrainerController(
        trainer_factory,
        write_path,
        checkpoint_settings.run_id,
        maybe_meta_curriculum,
        not checkpoint_settings.inference,
        run_seed,
        sampler_manager,
        resampling_interval,
    )
    try:
        # Get inital weights
        tc.init_weights(env_manager)
        inital_weights = deepcopy(tc.weights)
    finally:
        env_manager.close()
        write_run_options(write_path, options)
        write_timing_tree(run_logs_dir)
        write_training_status(run_logs_dir)

    options.behaviors['Brain'].max_steps = max_steps
    step = 0
    counter = 0
    max_meta_updates = 200
    while counter < max_meta_updates:
        sample = np.random.random_sample()
        if (sample > 1):
            print("Performing Meta-learning on Carry Object stage")
            env_settings.env_path = "C:/Users/Sebastian/Desktop/RLUnity/Training/mCarryObject_new/RLProject.exe"
        else:
            print("Performing Meta-learning on Find Target stage")
            env_settings.env_path = "C:/Users/Sebastian/Desktop/RLUnity/Training/mFindTarget_new/RLProject.exe"

        env_factory = create_environment_factory(
            env_settings.env_path,
            engine_settings.no_graphics,
            run_seed,
            port,
            env_settings.env_args,
            os.path.abspath(
                run_logs_dir),  # Unity environment requires absolute path
        )

        env_manager = SubprocessEnvManager(env_factory, engine_config,
                                           env_settings.num_envs)

        maybe_meta_curriculum = try_create_meta_curriculum(
            options.curriculum,
            env_manager,
            restore=checkpoint_settings.resume)
        sampler_manager, resampling_interval = create_sampler_manager(
            options.parameter_randomization, run_seed)

        trainer_factory = TrainerFactory(options,
                                         write_path,
                                         not checkpoint_settings.inference,
                                         checkpoint_settings.resume,
                                         run_seed,
                                         maybe_init_path,
                                         maybe_meta_curriculum,
                                         False,
                                         total_steps=step)

        trainer_factory.trainer_config[
            'Brain'].hyperparameters.learning_rate_schedule = ScheduleType.CONSTANT
        trainer_factory.trainer_config[
            'Brain'].hyperparameters.learning_rate = 0.0005 * (
                1 - counter / max_meta_updates)
        trainer_factory.trainer_config[
            'Brain'].hyperparameters.beta = 0.005 * (
                1 - counter / max_meta_updates)
        trainer_factory.trainer_config[
            'Brain'].hyperparameters.epsilon = 0.2 * (
                1 - counter / max_meta_updates)
        print("Current lr: {}\nCurrent beta: {}\nCurrent epsilon: {}".format(
            trainer_factory.trainer_config['Brain'].hyperparameters.
            learning_rate,
            trainer_factory.trainer_config['Brain'].hyperparameters.beta,
            trainer_factory.trainer_config['Brain'].hyperparameters.epsilon))

        # Create controller and begin training.
        tc = TrainerController(
            trainer_factory,
            write_path,
            checkpoint_settings.run_id,
            maybe_meta_curriculum,
            not checkpoint_settings.inference,
            run_seed,
            sampler_manager,
            resampling_interval,
        )
        try:
            # Get inital weights
            print("Start learning at step: " + str(step) + " meta_step: " +
                  str(counter))
            print("Inital weights: " + str(inital_weights[8]))
            weights_after_train = tc.start_learning(env_manager,
                                                    inital_weights)

            print(tc.trainers['Brain'].optimizer)

            # weights_after_train = tc.weights
            # print("Trained weights: " + str(weights_after_train[8]))
            step += options.behaviors['Brain'].max_steps
            print("meta step:" + str(step))
            # print(weights_after_train)
            # equal = []
            # for i, weight in enumerate(tc.weights):
            #     equal.append(np.array_equal(inital_weights[i], weights_after_train[i]))
            # print(all(equal))
        finally:
            print(len(weights_after_train), len(inital_weights))
            for i, weight in enumerate(weights_after_train):
                inital_weights[i] = weights_after_train[i]
            env_manager.close()
            write_run_options(write_path, options)
            write_timing_tree(run_logs_dir)
            write_training_status(run_logs_dir)
        counter += 1
예제 #19
0
def run_training(run_seed: int, options: RunOptions) -> None:
    """
    Launches training session.
    :param options: parsed command line arguments
    :param run_seed: Random seed used for training.
    :param run_options: Command line arguments for training.
    """
    # Recognize and use docker volume if one is passed as an argument
    if not options.docker_target_name:
        model_path = f"./models/{options.run_id}"
        summaries_dir = "./summaries"
    else:
        model_path = f"/{options.docker_target_name}/models/{options.run_id}"
        summaries_dir = f"/{options.docker_target_name}/summaries"
    port = options.base_port

    # Configure CSV, Tensorboard Writers and StatsReporter
    # We assume reward and episode length are needed in the CSV.
    csv_writer = CSVWriter(
        summaries_dir,
        required_fields=[
            "Environment/Cumulative Reward", "Environment/Episode Length"
        ],
    )
    tb_writer = TensorboardWriter(summaries_dir)
    StatsReporter.add_writer(tb_writer)
    StatsReporter.add_writer(csv_writer)

    if options.env_path is None:
        port = UnityEnvironment.DEFAULT_EDITOR_PORT
    env_factory = create_environment_factory(
        options.env_path,
        options.docker_target_name,
        options.no_graphics,
        run_seed,
        port,
        options.env_args,
    )
    engine_config = EngineConfig(
        options.width,
        options.height,
        options.quality_level,
        options.time_scale,
        options.target_frame_rate,
    )
    env_manager = SubprocessEnvManager(env_factory, engine_config,
                                       options.num_envs)
    maybe_meta_curriculum = try_create_meta_curriculum(
        options.curriculum_config, env_manager, options.lesson)
    sampler_manager, resampling_interval = create_sampler_manager(
        options.sampler_config, run_seed)
    trainer_factory = TrainerFactory(
        options.trainer_config,
        summaries_dir,
        options.run_id,
        model_path,
        options.keep_checkpoints,
        options.train_model,
        options.load_model,
        run_seed,
        maybe_meta_curriculum,
        options.multi_gpu,
    )
    # Create controller and begin training.
    tc = TrainerController(
        trainer_factory,
        model_path,
        summaries_dir,
        options.run_id,
        options.save_freq,
        maybe_meta_curriculum,
        options.train_model,
        run_seed,
        sampler_manager,
        resampling_interval,
    )
    # Begin training
    try:
        tc.start_learning(env_manager)
    finally:
        env_manager.close()
예제 #20
0
def test_initialization_seed(numpy_random_seed, tensorflow_set_seed):
    seed = 27
    TrainerController('', '', '1', 1, None, True, False, False, None, {}, seed)
    numpy_random_seed.assert_called_with(seed)
    tensorflow_set_seed.assert_called_with(seed)
예제 #21
0
def run_training(run_seed: int, options: RunOptions) -> None:
    """
    Launches training session.
    :param options: parsed command line arguments
    :param run_seed: Random seed used for training.
    :param run_options: Command line arguments for training.
    """
    with hierarchical_timer("run_training.setup"):
        checkpoint_settings = options.checkpoint_settings
        env_settings = options.env_settings
        engine_settings = options.engine_settings
        base_path = "results"
        write_path = os.path.join(base_path, checkpoint_settings.run_id)
        maybe_init_path = (
            os.path.join(base_path, checkpoint_settings.initialize_from)
            if checkpoint_settings.initialize_from is not None
            else None
        )
        run_logs_dir = os.path.join(write_path, "run_logs")
        port: Optional[int] = env_settings.base_port
        # Check if directory exists
        validate_existing_directories(
            write_path,
            checkpoint_settings.resume,
            checkpoint_settings.force,
            maybe_init_path,
        )
        # Make run logs directory
        os.makedirs(run_logs_dir, exist_ok=True)
        # Load any needed states
        if checkpoint_settings.resume:
            GlobalTrainingStatus.load_state(
                os.path.join(run_logs_dir, "training_status.json")
            )

        # Configure Tensorboard Writers and StatsReporter
        tb_writer = TensorboardWriter(
            write_path, clear_past_data=not checkpoint_settings.resume
        )
        gauge_write = GaugeWriter()
        console_writer = ConsoleWriter()
        StatsReporter.add_writer(tb_writer)
        StatsReporter.add_writer(gauge_write)
        StatsReporter.add_writer(console_writer)

        if env_settings.env_path is None:
            port = None
        env_factory = create_environment_factory(
            env_settings.env_path,
            engine_settings.no_graphics,
            run_seed,
            port,
            env_settings.env_args,
            os.path.abspath(run_logs_dir),  # Unity environment requires absolute path
        )
        engine_config = EngineConfig(
            width=engine_settings.width,
            height=engine_settings.height,
            quality_level=engine_settings.quality_level,
            time_scale=engine_settings.time_scale,
            target_frame_rate=engine_settings.target_frame_rate,
            capture_frame_rate=engine_settings.capture_frame_rate,
        )
        env_manager = SubprocessEnvManager(
            env_factory, engine_config, env_settings.num_envs
        )
        env_parameter_manager = EnvironmentParameterManager(
            options.environment_parameters, run_seed, restore=checkpoint_settings.resume
        )

        trainer_factory = TrainerFactory(
            trainer_config=options.behaviors,
            output_path=write_path,
            train_model=not checkpoint_settings.inference,
            load_model=checkpoint_settings.resume,
            seed=run_seed,
            param_manager=env_parameter_manager,
            init_path=maybe_init_path,
            multi_gpu=False,
        )
        # Create controller and begin training.
        tc = TrainerController(
            trainer_factory,
            write_path,
            checkpoint_settings.run_id,
            env_parameter_manager,
            not checkpoint_settings.inference,
            run_seed,
        )

    # Begin training
    try:
        tc.start_learning(env_manager)
    finally:
        env_manager.close()
        write_run_options(write_path, options)
        write_timing_tree(run_logs_dir)
        write_training_status(run_logs_dir)
예제 #22
0
def run_training(run_seed: int, options: RunOptions) -> None:
    """
    Launches training session.
    :param options: parsed command line arguments
    :param run_seed: Random seed used for training.
    :param run_options: Command line arguments for training.
    """
    with hierarchical_timer("run_training.setup"):
        checkpoint_settings = options.checkpoint_settings
        env_settings = options.env_settings
        engine_settings = options.engine_settings
        base_path = "results"
        write_path = os.path.join(base_path, checkpoint_settings.run_id)
        maybe_init_path = (
            os.path.join(base_path, checkpoint_settings.initialize_from)
            if checkpoint_settings.initialize_from
            else None
        )
        run_logs_dir = os.path.join(write_path, "run_logs")
        port: Optional[int] = env_settings.base_port
        # Check if directory exists
        handle_existing_directories(
            write_path,
            checkpoint_settings.resume,
            checkpoint_settings.force,
            maybe_init_path,
        )
        # Make run logs directory
        os.makedirs(run_logs_dir, exist_ok=True)
        # Load any needed states
        if checkpoint_settings.resume:
            GlobalTrainingStatus.load_state(
                os.path.join(run_logs_dir, "training_status.json")
            )
        # Configure CSV, Tensorboard Writers and StatsReporter
        # We assume reward and episode length are needed in the CSV.
        csv_writer = CSVWriter(
            write_path,
            required_fields=[
                "Environment/Cumulative Reward",
                "Environment/Episode Length",
            ],
        )
        tb_writer = TensorboardWriter(
            write_path, clear_past_data=not checkpoint_settings.resume
        )
        gauge_write = GaugeWriter()
        console_writer = ConsoleWriter()
        StatsReporter.add_writer(tb_writer)
        StatsReporter.add_writer(csv_writer)
        StatsReporter.add_writer(gauge_write)
        StatsReporter.add_writer(console_writer)

        if env_settings.env_path is None:
            port = None
        env_factory = create_environment_factory(
            env_settings.env_path,
            engine_settings.no_graphics,
            run_seed,
            port,
            env_settings.env_args,
            os.path.abspath(run_logs_dir),  # Unity environment requires absolute path
        )
        engine_config = EngineConfig(
            width=engine_settings.width,
            height=engine_settings.height,
            quality_level=engine_settings.quality_level,
            time_scale=engine_settings.time_scale,
            target_frame_rate=engine_settings.target_frame_rate,
            capture_frame_rate=engine_settings.capture_frame_rate,
        )
        env_manager = SubprocessEnvManager(
            env_factory, engine_config, env_settings.num_envs
        )
        maybe_meta_curriculum = try_create_meta_curriculum(
            options.curriculum, env_manager, restore=checkpoint_settings.resume
        )
        maybe_add_samplers(options.parameter_randomization, env_manager, run_seed)
        trainer_factory = TrainerFactory(
            options.behaviors,
            write_path,
            not checkpoint_settings.inference,
            checkpoint_settings.resume,
            run_seed,
            maybe_init_path,
            maybe_meta_curriculum,
            False,
        )
        # Create controller and begin training.
        tc = TrainerController(
            trainer_factory,
            write_path,
            checkpoint_settings.run_id,
            maybe_meta_curriculum,
            not checkpoint_settings.inference,
            run_seed,
        )

    # Begin training
    try:
        tc.start_learning(env_manager)
    finally:
        env_manager.close()
        write_run_options(write_path, options)
        write_timing_tree(run_logs_dir)
        write_training_status(run_logs_dir)
def run_training(run_seed: int, options: RunOptions) -> None:
    """
    Launches training session.
    :param options: parsed command line arguments
    :param run_seed: Random seed used for training.
    :param run_options: Command line arguments for training.
    """
    model_path = f"./models/{options.run_id}"
    summaries_dir = "./summaries"
    port = options.base_port
    # Configure CSV, Tensorboard Writers and StatsReporter
    # We assume reward and episode length are needed in the CSV.
    csv_writer = CSVWriter(
        summaries_dir,
        required_fields=[
            "Environment/Cumulative Reward", "Environment/Episode Length"
        ],
    )
    tb_writer = TensorboardWriter(summaries_dir)
    StatsReporter.add_writer(tb_writer)
    StatsReporter.add_writer(csv_writer)

    if options.env_path is None:
        port = 5004  # This is the in Editor Training Port
    env_factory = create_environment_factory(options.env_path,
                                             options.no_graphics, run_seed,
                                             port, options.env_args,
                                             options.env_id, options.n_steps)
    env_manager = SubprocessEnvManager(env_factory=env_factory,
                                       n_env=options.num_envs)
    maybe_meta_curriculum = try_create_meta_curriculum(
        options.curriculum_config, env_manager, options.lesson)
    sampler_manager, resampling_interval = create_sampler_manager(
        options.sampler_config, run_seed)
    trainer_factory = TrainerFactory(
        options.trainer_config,
        summaries_dir,
        options.run_id,
        model_path,
        options.keep_checkpoints,
        options.train_model,
        options.load_model,
        run_seed,
        maybe_meta_curriculum,
        options.multi_gpu,
    )

    # Create controller and begin training.
    tc = TrainerController(trainer_factory=trainer_factory,
                           model_path=model_path,
                           summaries_dir=summaries_dir,
                           run_id=options.run_id,
                           save_freq=options.save_freq,
                           meta_curriculum=maybe_meta_curriculum,
                           train=options.train_model,
                           training_seed=run_seed,
                           sampler_manager=sampler_manager,
                           resampling_interval=resampling_interval,
                           n_steps=options.n_steps)
    # Begin training
    try:
        tc.start_learning(env_manager)
    finally:
        env_manager.close()
예제 #24
0
def run_training(
    sub_id: int, run_seed: int, options: CommandLineOptions, process_queue: Queue
) -> None:
    """
    Launches training session.
    :param process_queue: Queue used to send signal back to main.
    :param sub_id: Unique id for training session.
    :param options: parsed command line arguments
    :param run_seed: Random seed used for training.
    :param run_options: Command line arguments for training.
    """
    # Docker Parameters

    trainer_config_path = options.trainer_config_path
    curriculum_folder = options.curriculum_folder

    # Recognize and use docker volume if one is passed as an argument
    if not options.docker_target_name:
        model_path = "./train/{run_id}-{sub_id}".format(
            run_id=options.run_id, sub_id=sub_id
        )
        summaries_dir = "./summaries"
    else:
        trainer_config_path = "/{docker_target_name}/{trainer_config_path}".format(
            docker_target_name=options.docker_target_name,
            trainer_config_path=trainer_config_path,
        )
        if curriculum_folder is not None:
            curriculum_folder = "/{docker_target_name}/{curriculum_folder}".format(
                docker_target_name=options.docker_target_name,
                curriculum_folder=curriculum_folder,
            )
        model_path = "/{docker_target_name}/models/{run_id}-{sub_id}".format(
            docker_target_name=options.docker_target_name,
            run_id=options.run_id,
            sub_id=sub_id,
        )
        summaries_dir = "/{docker_target_name}/summaries".format(
            docker_target_name=options.docker_target_name
        )

    trainer_config = load_config(trainer_config_path)
    env_factory = create_environment_factory(
        options.env_path,
        options.docker_target_name,
        options.no_graphics,
        run_seed,
        options.base_port + (sub_id * options.num_envs),
        options.env_args,
    )
    env = SubprocessEnvManager(env_factory, options.num_envs)
    maybe_meta_curriculum = try_create_meta_curriculum(
        curriculum_folder, env, options.lesson
    )
    sampler_manager, resampling_interval = create_sampler_manager(
        options.sampler_file_path, env.reset_parameters, run_seed
    )

    trainers = initialize_trainers(
        trainer_config,
        env.external_brains,
        summaries_dir,
        options.run_id,
        model_path,
        options.keep_checkpoints,
        options.train_model,
        options.load_model,
        run_seed,
        maybe_meta_curriculum,
        options.multi_gpu,
    )

    # Create controller and begin training.
    tc = TrainerController(
        trainers,
        model_path,
        summaries_dir,
        options.run_id + "-" + str(sub_id),
        options.save_freq,
        maybe_meta_curriculum,
        options.train_model,
        run_seed,
        options.fast_simulation,
        sampler_manager,
        resampling_interval,
    )

    # Signal that environment has been launched.
    process_queue.put(True)

    # Begin training
    tc.start_learning(env)
예제 #25
0
def run_training(
    sub_id: int, run_seed: int, options: CommandLineOptions, process_queue: Queue
) -> None:
    """
    Launches training session.
    :param process_queue: Queue used to send signal back to main.
    :param sub_id: Unique id for training session.
    :param options: parsed command line arguments
    :param run_seed: Random seed used for training.
    :param run_options: Command line arguments for training.
    """
    # Docker Parameters
    trainer_config_path = options.trainer_config_path
    curriculum_folder = options.curriculum_folder
    # Recognize and use docker volume if one is passed as an argument
    if not options.docker_target_name:
        model_path = "./models/{run_id}-{sub_id}".format(
            run_id=options.run_id, sub_id=sub_id
        )
        summaries_dir = "./summaries"
    else:
        trainer_config_path = "/{docker_target_name}/{trainer_config_path}".format(
            docker_target_name=options.docker_target_name,
            trainer_config_path=trainer_config_path,
        )
        if curriculum_folder is not None:
            curriculum_folder = "/{docker_target_name}/{curriculum_folder}".format(
                docker_target_name=options.docker_target_name,
                curriculum_folder=curriculum_folder,
            )
        model_path = "/{docker_target_name}/models/{run_id}-{sub_id}".format(
            docker_target_name=options.docker_target_name,
            run_id=options.run_id,
            sub_id=sub_id,
        )
        summaries_dir = "/{docker_target_name}/summaries".format(
            docker_target_name=options.docker_target_name
        )
    trainer_config = load_config(trainer_config_path)
    port = options.base_port + (sub_id * options.num_envs)
    if options.env_path is None:
        port = 5004  # This is the in Editor Training Port
    env_factory = create_environment_factory(
        options.env_path,
        options.docker_target_name,
        options.no_graphics,
        run_seed,
        port,
        options.env_args,
    )
    engine_config = EngineConfig(
        options.width,
        options.height,
        options.quality_level,
        options.time_scale,
        options.target_frame_rate,
    )
    env_manager = SubprocessEnvManager(env_factory, engine_config, options.num_envs)
    maybe_meta_curriculum = try_create_meta_curriculum(
        curriculum_folder, env_manager, options.lesson
    )
    sampler_manager, resampling_interval = create_sampler_manager(
        options.sampler_file_path, run_seed
    )
    trainer_factory = TrainerFactory(
        trainer_config,
        summaries_dir,
        options.run_id,
        model_path,
        options.keep_checkpoints,
        options.train_model,
        options.load_model,
        run_seed,
        maybe_meta_curriculum,
        options.multi_gpu,
    )
    # Create controller and begin training.
    tc = TrainerController(
        trainer_factory,
        model_path,
        summaries_dir,
        options.run_id + "-" + str(sub_id),
        options.save_freq,
        maybe_meta_curriculum,
        options.train_model,
        run_seed,
        sampler_manager,
        resampling_interval,
    )
    # Signal that environment has been launched.
    process_queue.put(True)
    # Begin training
    try:
        tc.start_learning(env_manager)
    finally:
        env_manager.close()
def run_training(
    sub_id: int, run_seed: int, run_options: Dict[str, Any], process_queue: Queue, inject_create_environment_factory: None
) -> None:
    """
    Launches training session.
    :param process_queue: Queue used to send signal back to main.
    :param sub_id: Unique id for training session.
    :param run_seed: Random seed used for training.
    :param run_options: Command line arguments for training.
    """
    # Docker Parameters
    docker_target_name = (
        run_options["--docker-target-name"]
        if run_options["--docker-target-name"] != "None"
        else None
    )

    # General parameters
    env_path = run_options["--env"] if run_options["--env"] != "None" else None
    run_id = run_options["--run-id"]
    load_model = run_options["--load"]
    train_model = run_options["--train"]
    save_freq = int(run_options["--save-freq"])
    keep_checkpoints = int(run_options["--keep-checkpoints"])
    base_port = int(run_options["--base-port"])
    num_envs = int(run_options["--num-envs"])
    curriculum_folder = (
        run_options["--curriculum"] if run_options["--curriculum"] != "None" else None
    )
    lesson = int(run_options["--lesson"])
    fast_simulation = not bool(run_options["--slow"])
    no_graphics = run_options["--no-graphics"]
    multi_gpu = run_options["--multi-gpu"]
    trainer_config_path = run_options["<trainer-config-path>"]
    sampler_file_path = (
        run_options["--sampler"] if run_options["--sampler"] != "None" else None
    )

    # Recognize and use docker volume if one is passed as an argument
    if not docker_target_name:
        model_path = "./train/{run_id}-{sub_id}".format(run_id=run_id, sub_id=sub_id)
        summaries_dir = "./summaries"
    else:
        trainer_config_path = "/{docker_target_name}/{trainer_config_path}".format(
            docker_target_name=docker_target_name,
            trainer_config_path=trainer_config_path,
        )
        if curriculum_folder is not None:
            curriculum_folder = "/{docker_target_name}/{curriculum_folder}".format(
                docker_target_name=docker_target_name,
                curriculum_folder=curriculum_folder,
            )
        model_path = "/{docker_target_name}/models/{run_id}-{sub_id}".format(
            docker_target_name=docker_target_name, run_id=run_id, sub_id=sub_id
        )
        summaries_dir = "/{docker_target_name}/summaries".format(
            docker_target_name=docker_target_name
        )

    trainer_config = load_config(trainer_config_path)
    if inject_create_environment_factory is None:
        env_factory = create_environment_factory(
            env_path,
            docker_target_name,
            no_graphics,
            run_seed,
            base_port + (sub_id * num_envs),
        )
    else:
        env_factory = inject_create_environment_factory(
            env_path,
            docker_target_name,
            no_graphics,
            run_seed,
            base_port + (sub_id * num_envs),
        )
    
    # HACK for debug use SimpleEnvManager
    if num_envs > 1:
        # create a mock env for parsin examples (kill internal)
        mock_env = env_factory(9999)
        # from minerl_to_mlagent_wrapper import MineRLToMLAgentWrapper
        # MineRLToMLAgentWrapper.set_wrappers_for_pretraining(mock_env.brain_names[0], mock_env)
        # close inner minerl enviroment
        try:
            for k, v in mock_env._envs.items():
                for e in v:
                        e.unwrapped.close()
        except AttributeError:
            mock_env.close()
        env = SubprocessEnvManager(env_factory, num_envs)
    else:
        env = env_factory(0)
        env = SimpleEnvManager(env)

    maybe_meta_curriculum = try_create_meta_curriculum(curriculum_folder, env, lesson)
    sampler_manager, resampling_interval = create_sampler_manager(
        sampler_file_path, env.reset_parameters, run_seed
    )

    trainers = initialize_trainers(
        trainer_config,
        env.external_brains,
        summaries_dir,
        run_id,
        model_path,
        keep_checkpoints,
        train_model,
        load_model,
        run_seed,
        maybe_meta_curriculum,
        multi_gpu,
    )

    # Create controller and begin training.
    tc = TrainerController(
        trainers,
        model_path,
        summaries_dir,
        run_id + "-" + str(sub_id),
        save_freq,
        maybe_meta_curriculum,
        train_model,
        run_seed,
        fast_simulation,
        sampler_manager,
        resampling_interval,
    )

    # Signal that environment has been launched.
    process_queue.put(True)

    # Begin training
    tc.start_learning(env)
예제 #27
0
파일: learn.py 프로젝트: JavierNgNYP/FYPJ2
def run_training(sub_id: int, run_seed: int, run_options, process_queue):
    """
    Launches training session.
    :param process_queue: Queue used to send signal back to main.
    :param sub_id: Unique id for training session.
    :param run_seed: Random seed used for training.
    :param run_options: Command line arguments for training.
    """
    # Docker Parameters
    docker_target_name = (run_options['--docker-target-name']
                          if run_options['--docker-target-name'] != 'None' else None)

    # General parameters
    env_path = (run_options['--env']
                if run_options['--env'] != 'None' else None)
    run_id = run_options['--run-id']
    load_model = run_options['--load']
    train_model = run_options['--train']
    save_freq = int(run_options['--save-freq'])
    keep_checkpoints = int(run_options['--keep-checkpoints'])
    base_port = int(run_options['--base-port'])
    num_envs = int(run_options['--num-envs'])
    curriculum_folder = (run_options['--curriculum']
                         if run_options['--curriculum'] != 'None' else None)
    lesson = int(run_options['--lesson'])
    fast_simulation = not bool(run_options['--slow'])
    no_graphics = run_options['--no-graphics']
    trainer_config_path = run_options['<trainer-config-path>']
    # Recognize and use docker volume if one is passed as an argument
    if not docker_target_name:
        model_path = './models/{run_id}-{sub_id}'.format(run_id=run_id, sub_id=sub_id)
        summaries_dir = './summaries'
    else:
        trainer_config_path = \
            '/{docker_target_name}/{trainer_config_path}'.format(
                docker_target_name=docker_target_name,
                trainer_config_path=trainer_config_path)
        if curriculum_folder is not None:
            curriculum_folder = \
                '/{docker_target_name}/{curriculum_folder}'.format(
                    docker_target_name=docker_target_name,
                    curriculum_folder=curriculum_folder)
        model_path = '/{docker_target_name}/models/{run_id}-{sub_id}'.format(
            docker_target_name=docker_target_name,
            run_id=run_id,
            sub_id=sub_id)
        summaries_dir = '/{docker_target_name}/summaries'.format(
            docker_target_name=docker_target_name)

    trainer_config = load_config(trainer_config_path)
    env_factory = create_environment_factory(
        env_path,
        docker_target_name,
        no_graphics,
        run_seed,
        base_port + (sub_id * num_envs)
    )
    env = SubprocessUnityEnvironment(env_factory, num_envs)
    maybe_meta_curriculum = try_create_meta_curriculum(curriculum_folder, env)

    # Create controller and begin training.
    tc = TrainerController(model_path, summaries_dir, run_id + '-' + str(sub_id),
                           save_freq, maybe_meta_curriculum,
                           load_model, train_model,
                           keep_checkpoints, lesson, env.external_brains,
                           run_seed, fast_simulation)

    # Signal that environment has been launched.
    process_queue.put(True)

    # Begin training
    tc.start_learning(env, trainer_config)
def test_initialization(mock_communicator, mock_launcher):
    mock_communicator.return_value = MockCommunicator(discrete_action=True,
                                                      visual_inputs=1)
    tc = TrainerController(' ', ' ', 1, None, True, True, False, 1, 1, 1, 1,
                           '', "tests/test_mlagents.trainers.py", False)
    assert (tc.env.brain_names[0] == 'RealFakeBrain')
예제 #29
0
파일: learn.py 프로젝트: donlee90/ml-agents
def run_training(run_seed: int, options: RunOptions, num_areas: int) -> None:
    """
    Launches training session.
    :param run_seed: Random seed used for training.
    :param num_areas: Number of training areas to instantiate
    :param options: parsed command line arguments
    """
    with hierarchical_timer("run_training.setup"):
        torch_utils.set_torch_config(options.torch_settings)
        checkpoint_settings = options.checkpoint_settings
        env_settings = options.env_settings
        engine_settings = options.engine_settings

        run_logs_dir = checkpoint_settings.run_logs_dir
        port: Optional[int] = env_settings.base_port
        # Check if directory exists
        validate_existing_directories(
            checkpoint_settings.write_path,
            checkpoint_settings.resume,
            checkpoint_settings.force,
            checkpoint_settings.maybe_init_path,
        )
        # Make run logs directory
        os.makedirs(run_logs_dir, exist_ok=True)
        # Load any needed states in case of resume
        if checkpoint_settings.resume:
            GlobalTrainingStatus.load_state(
                os.path.join(run_logs_dir, "training_status.json")
            )
        # In case of initialization, set full init_path for all behaviors
        elif checkpoint_settings.maybe_init_path is not None:
            setup_init_path(options.behaviors, checkpoint_settings.maybe_init_path)

        # Configure Tensorboard Writers and StatsReporter
        stats_writers = register_stats_writer_plugins(options)
        for sw in stats_writers:
            StatsReporter.add_writer(sw)

        if env_settings.env_path is None:
            port = None
        env_factory = create_environment_factory(
            env_settings.env_path,
            engine_settings.no_graphics,
            run_seed,
            num_areas,
            port,
            env_settings.env_args,
            os.path.abspath(run_logs_dir),  # Unity environment requires absolute path
        )

        env_manager = SubprocessEnvManager(env_factory, options, env_settings.num_envs)
        env_parameter_manager = EnvironmentParameterManager(
            options.environment_parameters, run_seed, restore=checkpoint_settings.resume
        )

        trainer_factory = TrainerFactory(
            trainer_config=options.behaviors,
            output_path=checkpoint_settings.write_path,
            train_model=not checkpoint_settings.inference,
            load_model=checkpoint_settings.resume,
            seed=run_seed,
            param_manager=env_parameter_manager,
            init_path=checkpoint_settings.maybe_init_path,
            multi_gpu=False,
        )
        # Create controller and begin training.
        tc = TrainerController(
            trainer_factory,
            checkpoint_settings.write_path,
            checkpoint_settings.run_id,
            env_parameter_manager,
            not checkpoint_settings.inference,
            run_seed,
        )

    # Begin training
    try:
        tc.start_learning(env_manager)
    finally:
        env_manager.close()
        write_run_options(checkpoint_settings.write_path, options)
        write_timing_tree(run_logs_dir)
        write_training_status(run_logs_dir)
def test_initialize_trainers(mock_communicator, mock_launcher, dummy_config,
                             dummy_offline_bc_config, dummy_online_bc_config,
                             dummy_bad_config):
    open_name = 'mlagents.trainers.trainer_controller' + '.open'
    with mock.patch('yaml.load') as mock_load:
        with mock.patch(open_name, create=True) as _:
            mock_communicator.return_value = MockCommunicator(
                discrete_action=True, visual_inputs=1)
            tc = TrainerController(' ', ' ', 1, None, True, False, False, 1, 1,
                                   1, 1, '', "tests/test_mlagents.trainers.py",
                                   False)

            # Test for PPO trainer
            mock_load.return_value = dummy_config
            config = tc._load_config()
            tf.reset_default_graph()
            tc._initialize_trainers(config)
            assert (len(tc.trainers) == 1)
            assert (isinstance(tc.trainers['RealFakeBrain'], PPOTrainer))

            # Test for Online Behavior Cloning Trainer
            mock_load.return_value = dummy_online_bc_config
            config = tc._load_config()
            tf.reset_default_graph()
            tc._initialize_trainers(config)
            assert (isinstance(tc.trainers['RealFakeBrain'], OnlineBCTrainer))

            # Test for proper exception when trainer name is incorrect
            mock_load.return_value = dummy_bad_config
            config = tc._load_config()
            tf.reset_default_graph()
            with pytest.raises(UnityEnvironmentException):
                tc._initialize_trainers(config)