Exemple #1
0
def process_pixels(image_bytes: bytes, gray_scale: bool) -> np.ndarray:
    """
    Converts byte array observation image into numpy array, re-sizes it,
    and optionally converts it to grey scale
    :param gray_scale: Whether to convert the image to grayscale.
    :param image_bytes: input byte array corresponding to image
    :return: processed numpy array of observation from environment
    """
    with hierarchical_timer("image_decompress"):
        image_bytearray = bytearray(image_bytes)
        image = Image.open(io.BytesIO(image_bytearray))
        # Normally Image loads lazily, this forces it to do loading in the timer scope.
        image.load()
    s = np.array(image) / 255.0
    if gray_scale:
        s = np.mean(s, axis=2)
        s = np.reshape(s, [s.shape[0], s.shape[1], 1])
    return s
Exemple #2
0
    def advance(self, env: EnvManager) -> int:
        # Get steps
        num_steps = self._get_and_process_experiences(env)

        # Report current lesson
        if self.meta_curriculum:
            for brain_name, curr in self.meta_curriculum.brains_to_curricula.items(
            ):
                if brain_name in self.trainers:
                    self.trainers[brain_name].stats_reporter.set_stat(
                        "Environment/Lesson", curr.lesson_num)

        # Advance trainers. This can be done in a separate loop in the future.
        with hierarchical_timer("trainer_advance"):
            for trainer in self.trainers.values():
                trainer.advance()

        return num_steps
Exemple #3
0
 def step(self) -> None:
     if self._is_first_message:
         return self.reset()
     if not self._loaded:
         raise UnityEnvironmentException("No Unity environment is loaded.")
     # fill the blanks for missing actions
     for group_name in self._env_specs:
         if group_name not in self._env_actions:
             n_agents = 0
             if group_name in self._env_state:
                 n_agents = len(self._env_state[group_name][0])
             self._env_actions[group_name] = self._env_specs[
                 group_name].create_empty_action(n_agents)
     step_input = self._generate_step_input(self._env_actions)
     with hierarchical_timer("communicator.exchange"):
         outputs = self.communicator.exchange(step_input)
     if outputs is None:
         raise UnityCommunicationException("Communicator has stopped.")
     self._update_behavior_specs(outputs)
     rl_output = outputs.rl_output
     self._update_state(rl_output)
     self._env_actions.clear()
    def join_threads(self, timeout_seconds: float = 1.0) -> None:
        """
        Wait for threads to finish, and merge their timer information into the main thread.
        :param timeout_seconds:
        :return:
        """
        self.kill_trainers = True
        for t in self.trainer_threads:
            try:
                t.join(timeout_seconds)
            except Exception:
                pass

        with hierarchical_timer("trainer_threads") as main_timer_node:
            for trainer_thread in self.trainer_threads:
                thread_timer_stack = get_timer_stack_for_thread(trainer_thread)
                if thread_timer_stack:
                    main_timer_node.merge(
                        thread_timer_stack.root,
                        root_name="thread_root",
                        is_parallel=True,
                    )
                    merge_gauges(thread_timer_stack.gauges)
Exemple #5
0
def run_training(run_seed: int, options: RunOptions) -> None:
    """
    Launches training session.
    :param options: parsed command line arguments
    :param run_seed: Random seed used for training.
    :param run_options: Command line arguments for training.
    """
    with hierarchical_timer("run_training.setup"):
        model_path = f"./models/{options.run_id}"
        maybe_init_path = (
            f"./models/{options.initialize_from}" if options.initialize_from else None
        )
        summaries_dir = "./summaries"
        port = options.base_port

        # Configure CSV, Tensorboard Writers and StatsReporter
        # We assume reward and episode length are needed in the CSV.
        csv_writer = CSVWriter(
            summaries_dir,
            required_fields=[
                "Environment/Cumulative Reward",
                "Environment/Episode Length",
            ],
        )
        handle_existing_directories(
            model_path, summaries_dir, options.resume, options.force, maybe_init_path
        )
        tb_writer = TensorboardWriter(summaries_dir, clear_past_data=not options.resume)
        gauge_write = GaugeWriter()
        console_writer = ConsoleWriter()
        StatsReporter.add_writer(tb_writer)
        StatsReporter.add_writer(csv_writer)
        StatsReporter.add_writer(gauge_write)
        StatsReporter.add_writer(console_writer)

        if options.env_path is None:
            port = UnityEnvironment.DEFAULT_EDITOR_PORT
        env_factory = create_environment_factory(
            options.env_path, options.no_graphics, run_seed, port, options.env_args
        )
        engine_config = EngineConfig(
            width=options.width,
            height=options.height,
            quality_level=options.quality_level,
            time_scale=options.time_scale,
            target_frame_rate=options.target_frame_rate,
            capture_frame_rate=options.capture_frame_rate,
        )
        env_manager = SubprocessEnvManager(env_factory, engine_config, options.num_envs)
        maybe_meta_curriculum = try_create_meta_curriculum(
            options.curriculum_config, env_manager, options.lesson
        )
        sampler_manager, resampling_interval = create_sampler_manager(
            options.sampler_config, run_seed
        )
        trainer_factory = TrainerFactory(
            options.trainer_config,
            summaries_dir,
            options.run_id,
            model_path,
            options.keep_checkpoints,
            not options.inference,
            options.resume,
            run_seed,
            maybe_init_path,
            maybe_meta_curriculum,
            options.multi_gpu,
        )
        # Create controller and begin training.
        tc = TrainerController(
            trainer_factory,
            model_path,
            summaries_dir,
            options.run_id,
            options.save_freq,
            maybe_meta_curriculum,
            not options.inference,
            run_seed,
            sampler_manager,
            resampling_interval,
        )

    # Begin training
    try:
        tc.start_learning(env_manager)
    finally:
        env_manager.close()
        write_timing_tree(summaries_dir, options.run_id)
def run_training(run_seed: int, options: RunOptions) -> None:
    """
    Launches training session.
    :param options: parsed command line arguments
    :param run_seed: Random seed used for training.
    :param run_options: Command line arguments for training.
    """

    options.checkpoint_settings.run_id = "test8"

    with hierarchical_timer("run_training.setup"):
        checkpoint_settings = options.checkpoint_settings
        env_settings = options.env_settings
        engine_settings = options.engine_settings
        base_path = "results"
        write_path = os.path.join(base_path, checkpoint_settings.run_id)
        maybe_init_path = (os.path.join(base_path,
                                        checkpoint_settings.initialize_from)
                           if checkpoint_settings.initialize_from else None)
        run_logs_dir = os.path.join(write_path, "run_logs")
        port: Optional[int] = env_settings.base_port
        # Check if directory exists
        handle_existing_directories(
            write_path,
            checkpoint_settings.resume,
            checkpoint_settings.force,
            maybe_init_path,
        )
        # Make run logs directory
        os.makedirs(run_logs_dir, exist_ok=True)
        # Load any needed states
        if checkpoint_settings.resume:
            GlobalTrainingStatus.load_state(
                os.path.join(run_logs_dir, "training_status.json"))
        # Configure CSV, Tensorboard Writers and StatsReporter
        # We assume reward and episode length are needed in the CSV.
        csv_writer = CSVWriter(
            write_path,
            required_fields=[
                "Environment/Cumulative Reward",
                "Environment/Episode Length",
            ],
        )
        tb_writer = TensorboardWriter(
            write_path, clear_past_data=not checkpoint_settings.resume)
        gauge_write = GaugeWriter()
        console_writer = ConsoleWriter()
        StatsReporter.add_writer(tb_writer)
        StatsReporter.add_writer(csv_writer)
        StatsReporter.add_writer(gauge_write)
        StatsReporter.add_writer(console_writer)

    engine_config = EngineConfig(
        width=engine_settings.width,
        height=engine_settings.height,
        quality_level=engine_settings.quality_level,
        time_scale=engine_settings.time_scale,
        target_frame_rate=engine_settings.target_frame_rate,
        capture_frame_rate=engine_settings.capture_frame_rate,
    )
    if env_settings.env_path is None:
        port = None
    # Begin training

    env_settings.env_path = "C:/Users/Sebastian/Desktop/RLUnity/Training/mFindTarget_new/RLProject.exe"
    env_factory = create_environment_factory(
        env_settings.env_path,
        engine_settings.no_graphics,
        run_seed,
        port,
        env_settings.env_args,
        os.path.abspath(
            run_logs_dir),  # Unity environment requires absolute path
    )
    env_manager = SubprocessEnvManager(env_factory, engine_config,
                                       env_settings.num_envs)

    maybe_meta_curriculum = try_create_meta_curriculum(
        options.curriculum, env_manager, restore=checkpoint_settings.resume)
    sampler_manager, resampling_interval = create_sampler_manager(
        options.parameter_randomization, run_seed)
    max_steps = options.behaviors['Brain'].max_steps
    options.behaviors['Brain'].max_steps = 10

    trainer_factory = TrainerFactory(options,
                                     write_path,
                                     not checkpoint_settings.inference,
                                     checkpoint_settings.resume,
                                     run_seed,
                                     maybe_init_path,
                                     maybe_meta_curriculum,
                                     False,
                                     total_steps=0)
    trainer_factory.trainer_config[
        'Brain'].hyperparameters.learning_rate_schedule = ScheduleType.CONSTANT

    # Create controller and begin training.
    tc = TrainerController(
        trainer_factory,
        write_path,
        checkpoint_settings.run_id,
        maybe_meta_curriculum,
        not checkpoint_settings.inference,
        run_seed,
        sampler_manager,
        resampling_interval,
    )
    try:
        # Get inital weights
        tc.init_weights(env_manager)
        inital_weights = deepcopy(tc.weights)
    finally:
        env_manager.close()
        write_run_options(write_path, options)
        write_timing_tree(run_logs_dir)
        write_training_status(run_logs_dir)

    options.behaviors['Brain'].max_steps = max_steps
    step = 0
    counter = 0
    max_meta_updates = 200
    while counter < max_meta_updates:
        sample = np.random.random_sample()
        if (sample > 1):
            print("Performing Meta-learning on Carry Object stage")
            env_settings.env_path = "C:/Users/Sebastian/Desktop/RLUnity/Training/mCarryObject_new/RLProject.exe"
        else:
            print("Performing Meta-learning on Find Target stage")
            env_settings.env_path = "C:/Users/Sebastian/Desktop/RLUnity/Training/mFindTarget_new/RLProject.exe"

        env_factory = create_environment_factory(
            env_settings.env_path,
            engine_settings.no_graphics,
            run_seed,
            port,
            env_settings.env_args,
            os.path.abspath(
                run_logs_dir),  # Unity environment requires absolute path
        )

        env_manager = SubprocessEnvManager(env_factory, engine_config,
                                           env_settings.num_envs)

        maybe_meta_curriculum = try_create_meta_curriculum(
            options.curriculum,
            env_manager,
            restore=checkpoint_settings.resume)
        sampler_manager, resampling_interval = create_sampler_manager(
            options.parameter_randomization, run_seed)

        trainer_factory = TrainerFactory(options,
                                         write_path,
                                         not checkpoint_settings.inference,
                                         checkpoint_settings.resume,
                                         run_seed,
                                         maybe_init_path,
                                         maybe_meta_curriculum,
                                         False,
                                         total_steps=step)

        trainer_factory.trainer_config[
            'Brain'].hyperparameters.learning_rate_schedule = ScheduleType.CONSTANT
        trainer_factory.trainer_config[
            'Brain'].hyperparameters.learning_rate = 0.0005 * (
                1 - counter / max_meta_updates)
        trainer_factory.trainer_config[
            'Brain'].hyperparameters.beta = 0.005 * (
                1 - counter / max_meta_updates)
        trainer_factory.trainer_config[
            'Brain'].hyperparameters.epsilon = 0.2 * (
                1 - counter / max_meta_updates)
        print("Current lr: {}\nCurrent beta: {}\nCurrent epsilon: {}".format(
            trainer_factory.trainer_config['Brain'].hyperparameters.
            learning_rate,
            trainer_factory.trainer_config['Brain'].hyperparameters.beta,
            trainer_factory.trainer_config['Brain'].hyperparameters.epsilon))

        # Create controller and begin training.
        tc = TrainerController(
            trainer_factory,
            write_path,
            checkpoint_settings.run_id,
            maybe_meta_curriculum,
            not checkpoint_settings.inference,
            run_seed,
            sampler_manager,
            resampling_interval,
        )
        try:
            # Get inital weights
            print("Start learning at step: " + str(step) + " meta_step: " +
                  str(counter))
            print("Inital weights: " + str(inital_weights[8]))
            weights_after_train = tc.start_learning(env_manager,
                                                    inital_weights)

            print(tc.trainers['Brain'].optimizer)

            # weights_after_train = tc.weights
            # print("Trained weights: " + str(weights_after_train[8]))
            step += options.behaviors['Brain'].max_steps
            print("meta step:" + str(step))
            # print(weights_after_train)
            # equal = []
            # for i, weight in enumerate(tc.weights):
            #     equal.append(np.array_equal(inital_weights[i], weights_after_train[i]))
            # print(all(equal))
        finally:
            print(len(weights_after_train), len(inital_weights))
            for i, weight in enumerate(weights_after_train):
                inital_weights[i] = weights_after_train[i]
            env_manager.close()
            write_run_options(write_path, options)
            write_timing_tree(run_logs_dir)
            write_training_status(run_logs_dir)
        counter += 1
Exemple #7
0
def run_training(run_seed: int, options: RunOptions) -> None:
    """
    Launches training session.
    :param options: parsed command line arguments
    :param run_seed: Random seed used for training.
    :param run_options: Command line arguments for training.
    """
    with hierarchical_timer("run_training.setup"):
        checkpoint_settings = options.checkpoint_settings
        env_settings = options.env_settings
        engine_settings = options.engine_settings
        base_path = "results"
        write_path = os.path.join(base_path, checkpoint_settings.run_id)
        maybe_init_path = (
            os.path.join(base_path, checkpoint_settings.initialize_from)
            if checkpoint_settings.initialize_from is not None
            else None
        )
        run_logs_dir = os.path.join(write_path, "run_logs")
        port: Optional[int] = env_settings.base_port
        # Check if directory exists
        validate_existing_directories(
            write_path,
            checkpoint_settings.resume,
            checkpoint_settings.force,
            maybe_init_path,
        )
        # Make run logs directory
        os.makedirs(run_logs_dir, exist_ok=True)
        # Load any needed states
        if checkpoint_settings.resume:
            GlobalTrainingStatus.load_state(
                os.path.join(run_logs_dir, "training_status.json")
            )

        # Configure Tensorboard Writers and StatsReporter
        tb_writer = TensorboardWriter(
            write_path, clear_past_data=not checkpoint_settings.resume
        )
        gauge_write = GaugeWriter()
        console_writer = ConsoleWriter()
        StatsReporter.add_writer(tb_writer)
        StatsReporter.add_writer(gauge_write)
        StatsReporter.add_writer(console_writer)

        if env_settings.env_path is None:
            port = None
        env_factory = create_environment_factory(
            env_settings.env_path,
            engine_settings.no_graphics,
            run_seed,
            port,
            env_settings.env_args,
            os.path.abspath(run_logs_dir),  # Unity environment requires absolute path
        )
        engine_config = EngineConfig(
            width=engine_settings.width,
            height=engine_settings.height,
            quality_level=engine_settings.quality_level,
            time_scale=engine_settings.time_scale,
            target_frame_rate=engine_settings.target_frame_rate,
            capture_frame_rate=engine_settings.capture_frame_rate,
        )
        env_manager = SubprocessEnvManager(
            env_factory, engine_config, env_settings.num_envs
        )
        env_parameter_manager = EnvironmentParameterManager(
            options.environment_parameters, run_seed, restore=checkpoint_settings.resume
        )

        trainer_factory = TrainerFactory(
            trainer_config=options.behaviors,
            output_path=write_path,
            train_model=not checkpoint_settings.inference,
            load_model=checkpoint_settings.resume,
            seed=run_seed,
            param_manager=env_parameter_manager,
            init_path=maybe_init_path,
            multi_gpu=False,
        )
        # Create controller and begin training.
        tc = TrainerController(
            trainer_factory,
            write_path,
            checkpoint_settings.run_id,
            env_parameter_manager,
            not checkpoint_settings.inference,
            run_seed,
        )

    # Begin training
    try:
        tc.start_learning(env_manager)
    finally:
        env_manager.close()
        write_run_options(write_path, options)
        write_timing_tree(run_logs_dir)
        write_training_status(run_logs_dir)
Exemple #8
0
def test_timers() -> None:
    test_timer = timers.TimerStack()
    with mock.patch("mlagents_envs.timers._get_thread_timer",
                    return_value=test_timer):
        # First, run some simple code
        with timers.hierarchical_timer("top_level"):
            for i in range(3):
                with timers.hierarchical_timer("multiple"):
                    decorated_func(i, i)

            raised = False
            try:
                with timers.hierarchical_timer("raises"):
                    raise RuntimeError("timeout!")
            except RuntimeError:
                raised = True

            with timers.hierarchical_timer("post_raise"):
                assert raised
                pass

        # We expect the hierarchy to look like
        #   (root)
        #       top_level
        #           multiple
        #               decorated_func
        #           raises
        #           post_raise
        root = test_timer.root
        assert root.children.keys() == {"top_level"}

        top_level = root.children["top_level"]
        assert top_level.children.keys() == {
            "multiple", "raises", "post_raise"
        }

        # make sure the scope was closed properly when the exception was raised
        raises = top_level.children["raises"]
        assert raises.count == 1

        multiple = top_level.children["multiple"]
        assert multiple.count == 3

        timer_tree = test_timer.get_timing_tree()

        expected_tree = {
            "name": "root",
            "total": mock.ANY,
            "count": 1,
            "self": mock.ANY,
            "children": {
                "top_level": {
                    "total": mock.ANY,
                    "count": 1,
                    "self": mock.ANY,
                    "children": {
                        "multiple": {
                            "total": mock.ANY,
                            "count": 3,
                            "self": mock.ANY,
                            "children": {
                                "decorated_func": {
                                    "total": mock.ANY,
                                    "count": 3,
                                    "self": mock.ANY,
                                }
                            },
                        },
                        "raises": {
                            "total": mock.ANY,
                            "count": 1,
                            "self": mock.ANY
                        },
                        "post_raise": {
                            "total": mock.ANY,
                            "count": 1,
                            "self": mock.ANY
                        },
                    },
                }
            },
            "gauges": {
                "my_gauge": {
                    "value": 4.0,
                    "max": 4.0,
                    "min": 0.0,
                    "count": 3
                }
            },
            "metadata": {
                "timer_format_version": timers.TIMER_FORMAT_VERSION,
                "start_time_seconds": mock.ANY,
                "end_time_seconds": mock.ANY,
                "python_version": mock.ANY,
                "command_line_arguments": mock.ANY,
            },
        }

        assert timer_tree == expected_tree
Exemple #9
0
def run_training(run_seed: int, options: RunOptions) -> None:
    """
    Launches training session.
    :param options: parsed command line arguments
    :param run_seed: Random seed used for training.
    :param run_options: Command line arguments for training.
    """
    with hierarchical_timer("run_training.setup"):
        checkpoint_settings = options.checkpoint_settings
        env_settings = options.env_settings
        engine_settings = options.engine_settings
        base_path = "results"
        write_path = os.path.join(base_path, checkpoint_settings.run_id)
        maybe_init_path = (
            os.path.join(base_path, checkpoint_settings.initialize_from)
            if checkpoint_settings.initialize_from
            else None
        )
        run_logs_dir = os.path.join(write_path, "run_logs")
        port: Optional[int] = env_settings.base_port
        # Check if directory exists
        handle_existing_directories(
            write_path,
            checkpoint_settings.resume,
            checkpoint_settings.force,
            maybe_init_path,
        )
        # Make run logs directory
        os.makedirs(run_logs_dir, exist_ok=True)
        # Load any needed states
        if checkpoint_settings.resume:
            GlobalTrainingStatus.load_state(
                os.path.join(run_logs_dir, "training_status.json")
            )
        # Configure CSV, Tensorboard Writers and StatsReporter
        # We assume reward and episode length are needed in the CSV.
        csv_writer = CSVWriter(
            write_path,
            required_fields=[
                "Environment/Cumulative Reward",
                "Environment/Episode Length",
            ],
        )
        tb_writer = TensorboardWriter(
            write_path, clear_past_data=not checkpoint_settings.resume
        )
        gauge_write = GaugeWriter()
        console_writer = ConsoleWriter()
        StatsReporter.add_writer(tb_writer)
        StatsReporter.add_writer(csv_writer)
        StatsReporter.add_writer(gauge_write)
        StatsReporter.add_writer(console_writer)

        if env_settings.env_path is None:
            port = None
        env_factory = create_environment_factory(
            env_settings.env_path,
            engine_settings.no_graphics,
            run_seed,
            port,
            env_settings.env_args,
            os.path.abspath(run_logs_dir),  # Unity environment requires absolute path
        )
        engine_config = EngineConfig(
            width=engine_settings.width,
            height=engine_settings.height,
            quality_level=engine_settings.quality_level,
            time_scale=engine_settings.time_scale,
            target_frame_rate=engine_settings.target_frame_rate,
            capture_frame_rate=engine_settings.capture_frame_rate,
        )
        env_manager = SubprocessEnvManager(
            env_factory, engine_config, env_settings.num_envs
        )
        maybe_meta_curriculum = try_create_meta_curriculum(
            options.curriculum, env_manager, restore=checkpoint_settings.resume
        )
        maybe_add_samplers(options.parameter_randomization, env_manager, run_seed)
        trainer_factory = TrainerFactory(
            options.behaviors,
            write_path,
            not checkpoint_settings.inference,
            checkpoint_settings.resume,
            run_seed,
            maybe_init_path,
            maybe_meta_curriculum,
            False,
        )
        # Create controller and begin training.
        tc = TrainerController(
            trainer_factory,
            write_path,
            checkpoint_settings.run_id,
            maybe_meta_curriculum,
            not checkpoint_settings.inference,
            run_seed,
        )

    # Begin training
    try:
        tc.start_learning(env_manager)
    finally:
        env_manager.close()
        write_run_options(write_path, options)
        write_timing_tree(run_logs_dir)
        write_training_status(run_logs_dir)
 def trainer_update_func(self, trainer: Trainer) -> None:
     while not self.kill_trainers:
         with hierarchical_timer("trainer_advance"):
             trainer.advance()
Exemple #11
0
def run_training_aai(run_seed: int, options: RunOptionsAAI) -> None:
    """
    Launches training session.
    :param run_seed: Random seed used for training.
    :param options: training parameters
    """
    with hierarchical_timer("run_training.setup"):
        # Recognize and use docker volume if one is passed as an argument
        # if not options.docker_target_name:
        model_path = f"./models/{options.run_id}"
        summaries_dir = "./summaries"
        # else:
        #     model_path = f"/{options.docker_target_name}/models/{options.run_id}"
        #     summaries_dir = f"/{options.docker_target_name}/summaries"
        port = options.base_port

        # Configure CSV, Tensorboard Writers and StatsReporter
        # We assume reward and episode length are needed in the CSV.
        csv_writer = CSVWriter(
            summaries_dir,
            required_fields=[
                "Environment/Cumulative Reward",
                "Environment/Episode Length",
            ],
        )
        tb_writer = TensorboardWriter(summaries_dir)
        gauge_write = GaugeWriter()
        StatsReporter.add_writer(tb_writer)
        StatsReporter.add_writer(csv_writer)
        StatsReporter.add_writer(gauge_write)

        if options.env_path is None:
            port = AnimalAIEnvironment.DEFAULT_EDITOR_PORT
        env_factory = create_environment_factory_aai(
            options.env_path,
            # options.docker_target_name,
            run_seed,
            port,
            options.n_arenas_per_env,
            options.arena_config,
            options.resolution,
        )
        if options.train_model:
            engine_config = EngineConfig(
                options.width,
                options.height,
                AnimalAIEnvironment.QUALITY_LEVEL.train,
                AnimalAIEnvironment.TIMESCALE.train,
                AnimalAIEnvironment.TARGET_FRAME_RATE.train,
            )
        else:
            engine_config = EngineConfig(
                AnimalAIEnvironment.WINDOW_WIDTH.play,
                AnimalAIEnvironment.WINDOW_HEIGHT.play,
                AnimalAIEnvironment.QUALITY_LEVEL.play,
                AnimalAIEnvironment.TIMESCALE.play,
                AnimalAIEnvironment.TARGET_FRAME_RATE.play,
            )
        env_manager = SubprocessEnvManagerAAI(env_factory, engine_config,
                                              options.num_envs)
        maybe_meta_curriculum = try_create_meta_curriculum(
            options.curriculum_config, env_manager, options.lesson)
        trainer_factory = TrainerFactory(
            options.trainer_config,
            summaries_dir,
            options.run_id,
            model_path,
            options.keep_checkpoints,
            options.train_model,
            options.load_model,
            run_seed,
            maybe_meta_curriculum,
            # options.multi_gpu,
        )
        # Create controller and begin training.
        tc = TrainerControllerAAI(
            trainer_factory,
            model_path,
            summaries_dir,
            options.run_id,
            options.save_freq,
            maybe_meta_curriculum,
            options.train_model,
            run_seed,
        )

    # Begin training
    try:
        tc.start_learning(env_manager)
    finally:
        env_manager.close()
        write_timing_tree(summaries_dir, options.run_id)
Exemple #12
0
def run_training(run_seed: int, options: RunOptions, num_areas: int) -> None:
    """
    Launches training session.
    :param run_seed: Random seed used for training.
    :param num_areas: Number of training areas to instantiate
    :param options: parsed command line arguments
    """
    with hierarchical_timer("run_training.setup"):
        torch_utils.set_torch_config(options.torch_settings)
        checkpoint_settings = options.checkpoint_settings
        env_settings = options.env_settings
        engine_settings = options.engine_settings

        run_logs_dir = checkpoint_settings.run_logs_dir
        port: Optional[int] = env_settings.base_port
        # Check if directory exists
        validate_existing_directories(
            checkpoint_settings.write_path,
            checkpoint_settings.resume,
            checkpoint_settings.force,
            checkpoint_settings.maybe_init_path,
        )
        # Make run logs directory
        os.makedirs(run_logs_dir, exist_ok=True)
        # Load any needed states in case of resume
        if checkpoint_settings.resume:
            GlobalTrainingStatus.load_state(
                os.path.join(run_logs_dir, "training_status.json")
            )
        # In case of initialization, set full init_path for all behaviors
        elif checkpoint_settings.maybe_init_path is not None:
            setup_init_path(options.behaviors, checkpoint_settings.maybe_init_path)

        # Configure Tensorboard Writers and StatsReporter
        stats_writers = register_stats_writer_plugins(options)
        for sw in stats_writers:
            StatsReporter.add_writer(sw)

        if env_settings.env_path is None:
            port = None
        env_factory = create_environment_factory(
            env_settings.env_path,
            engine_settings.no_graphics,
            run_seed,
            num_areas,
            port,
            env_settings.env_args,
            os.path.abspath(run_logs_dir),  # Unity environment requires absolute path
        )

        env_manager = SubprocessEnvManager(env_factory, options, env_settings.num_envs)
        env_parameter_manager = EnvironmentParameterManager(
            options.environment_parameters, run_seed, restore=checkpoint_settings.resume
        )

        trainer_factory = TrainerFactory(
            trainer_config=options.behaviors,
            output_path=checkpoint_settings.write_path,
            train_model=not checkpoint_settings.inference,
            load_model=checkpoint_settings.resume,
            seed=run_seed,
            param_manager=env_parameter_manager,
            init_path=checkpoint_settings.maybe_init_path,
            multi_gpu=False,
        )
        # Create controller and begin training.
        tc = TrainerController(
            trainer_factory,
            checkpoint_settings.write_path,
            checkpoint_settings.run_id,
            env_parameter_manager,
            not checkpoint_settings.inference,
            run_seed,
        )

    # Begin training
    try:
        tc.start_learning(env_manager)
    finally:
        env_manager.close()
        write_run_options(checkpoint_settings.write_path, options)
        write_timing_tree(run_logs_dir)
        write_training_status(run_logs_dir)
Exemple #13
0
def load_demonstration(
    file_path: str
) -> Tuple[BrainParameters, List[AgentInfoActionPairProto], int]:
    """
    Loads and parses a demonstration file.
    :param file_path: Location of demonstration file (.demo).
    :return: BrainParameter and list of AgentInfoActionPairProto containing demonstration data.
    """

    # First 32 bytes of file dedicated to meta-data.
    INITIAL_POS = 33
    file_paths = []
    if os.path.isdir(file_path):
        all_files = os.listdir(file_path)
        for _file in all_files:
            if _file.endswith(".demo"):
                file_paths.append(os.path.join(file_path, _file))
        if not all_files:
            raise ValueError("There are no '.demo' files in the provided directory.")
    elif os.path.isfile(file_path):
        file_paths.append(file_path)
        file_extension = pathlib.Path(file_path).suffix
        if file_extension != ".demo":
            raise ValueError(
                "The file is not a '.demo' file. Please provide a file with the "
                "correct extension."
            )
    else:
        raise FileNotFoundError(
            "The demonstration file or directory {} does not exist.".format(file_path)
        )

    brain_params = None
    brain_param_proto = None
    info_action_pairs = []
    total_expected = 0
    for _file_path in file_paths:
        with open(_file_path, "rb") as fp:
            with hierarchical_timer("read_file"):
                data = fp.read()
            next_pos, pos, obs_decoded = 0, 0, 0
            while pos < len(data):
                next_pos, pos = _DecodeVarint32(data, pos)
                if obs_decoded == 0:
                    meta_data_proto = DemonstrationMetaProto()
                    meta_data_proto.ParseFromString(data[pos : pos + next_pos])
                    total_expected += meta_data_proto.number_steps
                    pos = INITIAL_POS
                if obs_decoded == 1:
                    brain_param_proto = BrainParametersProto()
                    brain_param_proto.ParseFromString(data[pos : pos + next_pos])
                    pos += next_pos
                if obs_decoded > 1:
                    agent_info_action = AgentInfoActionPairProto()
                    agent_info_action.ParseFromString(data[pos : pos + next_pos])
                    if brain_params is None:
                        brain_params = BrainParameters.from_proto(
                            brain_param_proto, agent_info_action.agent_info
                        )
                    info_action_pairs.append(agent_info_action)
                    if len(info_action_pairs) == total_expected:
                        break
                    pos += next_pos
                obs_decoded += 1
    if not brain_params:
        raise RuntimeError(
            f"No BrainParameters found in demonstration file at {file_path}."
        )
    return brain_params, info_action_pairs, total_expected
Exemple #14
0
def test_timers() -> None:
    with mock.patch("mlagents_envs.timers._global_timer_stack",
                    new_callable=timers.TimerStack) as test_timer:
        # First, run some simple code
        with timers.hierarchical_timer("top_level"):
            for i in range(3):
                with timers.hierarchical_timer("multiple"):
                    decorated_func(i, i)

            raised = False
            try:
                with timers.hierarchical_timer("raises"):
                    raise RuntimeError("timeout!")
            except RuntimeError:
                raised = True

            with timers.hierarchical_timer("post_raise"):
                assert raised
                pass

        # We expect the hierarchy to look like
        #   (root)
        #       top_level
        #           multiple
        #               decorated_func
        #           raises
        #           post_raise
        root = test_timer.root
        assert root.children.keys() == {"top_level"}

        top_level = root.children["top_level"]
        assert top_level.children.keys() == {
            "multiple", "raises", "post_raise"
        }

        # make sure the scope was closed properly when the exception was raised
        raises = top_level.children["raises"]
        assert raises.count == 1

        multiple = top_level.children["multiple"]
        assert multiple.count == 3

        timer_tree = test_timer.get_timing_tree()

        expected_tree = {
            "name": "root",
            "total": mock.ANY,
            "count": 1,
            "self": mock.ANY,
            "children": {
                "top_level": {
                    "total": mock.ANY,
                    "count": 1,
                    "self": mock.ANY,
                    "children": {
                        "multiple": {
                            "total": mock.ANY,
                            "count": 3,
                            "self": mock.ANY,
                            "children": {
                                "decorated_func": {
                                    "total": mock.ANY,
                                    "count": 3,
                                    "self": mock.ANY,
                                }
                            },
                        },
                        "raises": {
                            "total": mock.ANY,
                            "count": 1,
                            "self": mock.ANY
                        },
                        "post_raise": {
                            "total": mock.ANY,
                            "count": 1,
                            "self": mock.ANY
                        },
                    },
                }
            },
            "gauges": {
                "my_gauge": {
                    "value": 4.0,
                    "max": 4.0,
                    "min": 0.0,
                    "count": 3
                }
            },
        }

        assert timer_tree == expected_tree
Exemple #15
0
def run_training(run_seed: int, options: RunOptions) -> None:
    """
    Launches training session.
    :param options: parsed command line arguments
    :param run_seed: Random seed used for training.
    :param run_options: Command line arguments for training.
    """
    with hierarchical_timer("run_training.setup"):
        # Recognize and use docker volume if one is passed as an argument
        if not options.docker_target_name:
            model_path = f"./models/{options.run_id}"
            summaries_dir = "./summaries"
        else:
            model_path = f"/{options.docker_target_name}/models/{options.run_id}"
            summaries_dir = f"/{options.docker_target_name}/summaries"
        port = options.base_port

        # Configure CSV, Tensorboard Writers and StatsReporter
        # We assume reward and episode length are needed in the CSV.
        csv_writer = CSVWriter(
            summaries_dir,
            required_fields=[
                "Environment/Cumulative Reward",
                "Environment/Episode Length",
            ],
        )
        tb_writer = TensorboardWriter(summaries_dir)
        gauge_write = GaugeWriter()
        StatsReporter.add_writer(tb_writer)
        StatsReporter.add_writer(csv_writer)
        StatsReporter.add_writer(gauge_write)

        if options.env_path is None:
            port = UnityEnvironment.DEFAULT_EDITOR_PORT
        env_factory = create_environment_factory(
            options.env_path,
            options.docker_target_name,
            options.no_graphics,
            run_seed,
            port,
            options.env_args,
        )
        engine_config = EngineConfig(
            options.width,
            options.height,
            options.quality_level,
            options.time_scale,
            options.target_frame_rate,
        )
        env_manager = SubprocessEnvManager(env_factory, engine_config,
                                           options.num_envs)
        maybe_meta_curriculum = try_create_meta_curriculum(
            options.curriculum_config, env_manager, options.lesson)
        sampler_manager, resampling_interval = create_sampler_manager(
            options.sampler_config, run_seed)
        trainer_factory = TrainerFactory(
            options.trainer_config,
            summaries_dir,
            options.run_id,
            model_path,
            options.keep_checkpoints,
            options.train_model,
            options.load_model,
            run_seed,
            maybe_meta_curriculum,
            options.multi_gpu,
        )
        # Create controller and begin training.
        tc = TrainerController(
            trainer_factory,
            model_path,
            summaries_dir,
            options.run_id,
            options.save_freq,
            maybe_meta_curriculum,
            options.train_model,
            run_seed,
            sampler_manager,
            resampling_interval,
        )

    # Begin training
    try:
        tc.start_learning(env_manager)
    finally:
        env_manager.close()