コード例 #1
0
def make_chickAI_unity_env(options):
    """
    Build ChickAI UnityEnvironment from command line options.
    """
    engine_config = EngineConfig(
        width=options.width,
        height=options.height,
        quality_level=options.quality_level,
        time_scale=options.time_scale,
        target_frame_rate=options.target_frame_rate,
        capture_frame_rate=options.capture_frame_rate,
    )
    env_args = _build_chickAI_env_args(
        input_resolution=options.input_resolution,
        episode_steps=options.episode_steps,
        video_1_path=options.video1,
        video_2_path=options.video2,
        log_dir=options.log_dir,
        test_mode=options.test_mode)
    # Set up FloatPropertiesChannel to receive auxiliary agent information.
    agent_info_channel = FloatPropertiesChannel()
    unity_env = make_unity_env(env_path=options.env_path,
                               port=options.base_port,
                               seed=options.seed,
                               env_args=env_args,
                               engine_config=engine_config,
                               side_channels=[agent_info_channel])
    return unity_env, agent_info_channel
コード例 #2
0
ファイル: test_simple_rl.py プロジェクト: iapetos45/ml-agents
def _check_environment_trains(
    env,
    trainer_config,
    reward_processor=default_reward_processor,
    meta_curriculum=None,
    success_threshold=0.9,
    env_manager=None,
):
    # Create controller and begin training.
    with tempfile.TemporaryDirectory() as dir:
        run_id = "id"
        save_freq = 99999
        seed = 1337
        StatsReporter.writers.clear(
        )  # Clear StatsReporters so we don't write to file
        debug_writer = DebugWriter()
        StatsReporter.add_writer(debug_writer)
        # Make sure threading is turned off for determinism
        trainer_config["threading"] = False
        if env_manager is None:
            env_manager = SimpleEnvManager(env, FloatPropertiesChannel())
        trainer_factory = TrainerFactory(
            trainer_config=trainer_config,
            summaries_dir=dir,
            run_id=run_id,
            model_path=dir,
            keep_checkpoints=1,
            train_model=True,
            load_model=False,
            seed=seed,
            meta_curriculum=meta_curriculum,
            multi_gpu=False,
        )

        tc = TrainerController(
            trainer_factory=trainer_factory,
            summaries_dir=dir,
            model_path=dir,
            run_id=run_id,
            meta_curriculum=meta_curriculum,
            train=True,
            training_seed=seed,
            sampler_manager=SamplerManager(None),
            resampling_interval=None,
            save_freq=save_freq,
        )

        # Begin training
        tc.start_learning(env_manager)
        if (success_threshold is not None
            ):  # For tests where we are just checking setup and not reward
            processed_rewards = [
                reward_processor(rewards)
                for rewards in env.final_rewards.values()
            ]
            assert all(not math.isnan(reward) for reward in processed_rewards)
            assert all(reward > success_threshold
                       for reward in processed_rewards)
コード例 #3
0
    def initialise_environment(self):
        """Initialise and reset unity environment"""
        engine_configuration_channel = EngineConfigurationChannel()
        self.float_properties_channel = FloatPropertiesChannel()
        self.env = UnityEnvironment(file_name=self.env_path,
                                    base_port=5004,
                                    side_channels=[
                                        engine_configuration_channel,
                                        self.float_properties_channel
                                    ])

        # Reset the environment
        self.env.reset()

        # Set the default brain to work with
        self.group_name = self.env.get_agent_groups()[0]
        self.group_spec = self.env.get_agent_group_spec(self.group_name)

        # Set the time scale of the engine
        engine_configuration_channel.set_configuration_parameters(
            time_scale=self.time_scale)
コード例 #4
0
ファイル: test_simple_rl.py プロジェクト: stonerey/ml-agents
def _check_environment_trains(env,
                              config,
                              meta_curriculum=None,
                              success_threshold=0.99):
    # Create controller and begin training.
    with tempfile.TemporaryDirectory() as dir:
        run_id = "id"
        save_freq = 99999
        seed = 1337
        StatsReporter.writers.clear(
        )  # Clear StatsReporters so we don't write to file
        trainer_config = yaml.safe_load(config)
        env_manager = SimpleEnvManager(env, FloatPropertiesChannel())
        trainer_factory = TrainerFactory(
            trainer_config=trainer_config,
            summaries_dir=dir,
            run_id=run_id,
            model_path=dir,
            keep_checkpoints=1,
            train_model=True,
            load_model=False,
            seed=seed,
            meta_curriculum=meta_curriculum,
            multi_gpu=False,
        )

        tc = TrainerController(
            trainer_factory=trainer_factory,
            summaries_dir=dir,
            model_path=dir,
            run_id=run_id,
            meta_curriculum=meta_curriculum,
            train=True,
            training_seed=seed,
            sampler_manager=SamplerManager(None),
            resampling_interval=None,
            save_freq=save_freq,
        )

        # Begin training
        tc.start_learning(env_manager)
        print(tc._get_measure_vals())
        if (success_threshold is not None
            ):  # For tests where we are just checking setup and not reward
            for mean_reward in tc._get_measure_vals().values():
                assert not math.isnan(mean_reward)
                assert mean_reward > success_threshold
コード例 #5
0
ファイル: raycast_env.py プロジェクト: denizhanpak/BBE_Code
 def __init__(
     self,
     env_path: Optional[str] = None,
     imprint_video: Optional[str] = None,
     test_video: Optional[str] = None,
     log_dir: Optional[str] = None,
     input_resolution: int = 64,
     episode_steps: int = 1000,
     seed: int = 0,
     test_mode: bool = False,
     base_port: int = UnityEnvironment.BASE_ENVIRONMENT_PORT,
     time_scale: int = 20,
     capture_frame_rate: int = 60,
     width: int = 80,
     height: int = 80,
     use_visual: bool = True,
     **kwargs,
 ):
     engine_config = EngineConfig(
         width=width,
         height=height,
         quality_level=5,
         time_scale=time_scale,
         target_frame_rate=-1,
         capture_frame_rate=capture_frame_rate,
     )
     env_args = _build_chickAI_env_args(input_resolution=input_resolution,
                                        episode_steps=episode_steps,
                                        imprint_video=imprint_video,
                                        test_video=test_video,
                                        log_dir=log_dir,
                                        test_mode=test_mode)
     agent_info_channel = FloatPropertiesChannel()
     unity_env = _make_unity_env(env_path=env_path,
                                 port=base_port,
                                 seed=seed,
                                 env_args=env_args,
                                 engine_config=engine_config,
                                 side_channels=[agent_info_channel])
     env = UnityToGymWrapper(unity_env,
                             flatten_branched=True,
                             use_visual=use_visual)
     super().__init__(env)
     self.env = env
     self.agent_info_channel = agent_info_channel
コード例 #6
0
def worker(
    parent_conn: Connection,
    step_queue: Queue,
    pickled_env_factory: str,
    worker_id: int,
    engine_configuration: EngineConfig,
) -> None:
    env_factory: Callable[[int, List[SideChannel]],
                          UnityEnvironment] = cloudpickle.loads(
                              pickled_env_factory)
    shared_float_properties = FloatPropertiesChannel()
    engine_configuration_channel = EngineConfigurationChannel()
    engine_configuration_channel.set_configuration(engine_configuration)
    stats_channel = StatsSideChannel()
    env: BaseEnv = env_factory(
        worker_id,
        [shared_float_properties, engine_configuration_channel, stats_channel],
    )

    def _send_response(cmd_name, payload):
        parent_conn.send(EnvironmentResponse(cmd_name, worker_id, payload))

    def _generate_all_results() -> AllStepResult:
        all_step_result: AllStepResult = {}
        for brain_name in env.get_agent_groups():
            all_step_result[brain_name] = env.get_step_result(brain_name)
        return all_step_result

    def external_brains():
        result = {}
        for brain_name in env.get_agent_groups():
            result[brain_name] = group_spec_to_brain_parameters(
                brain_name, env.get_agent_group_spec(brain_name))
        return result

    try:
        while True:
            cmd: EnvironmentCommand = parent_conn.recv()
            if cmd.name == "step":
                all_action_info = cmd.payload
                for brain_name, action_info in all_action_info.items():
                    if len(action_info.action) != 0:
                        env.set_actions(brain_name, action_info.action)
                env.step()
                all_step_result = _generate_all_results()
                # The timers in this process are independent from all the processes and the "main" process
                # So after we send back the root timer, we can safely clear them.
                # Note that we could randomly return timers a fraction of the time if we wanted to reduce
                # the data transferred.
                # TODO get gauges from the workers and merge them in the main process too.
                env_stats = stats_channel.get_and_reset_stats()
                step_response = StepResponse(all_step_result, get_timer_root(),
                                             env_stats)
                step_queue.put(
                    EnvironmentResponse("step", worker_id, step_response))
                reset_timers()
            elif cmd.name == "external_brains":
                _send_response("external_brains", external_brains())
            elif cmd.name == "get_properties":
                reset_params = shared_float_properties.get_property_dict_copy()
                _send_response("get_properties", reset_params)
            elif cmd.name == "reset":
                for k, v in cmd.payload.items():
                    shared_float_properties.set_property(k, v)
                env.reset()
                all_step_result = _generate_all_results()
                _send_response("reset", all_step_result)
            elif cmd.name == "close":
                break
    except (KeyboardInterrupt, UnityCommunicationException,
            UnityTimeOutException):
        logger.info(
            f"UnityEnvironment worker {worker_id}: environment stopping.")
        step_queue.put(EnvironmentResponse("env_close", worker_id, None))
    finally:
        # If this worker has put an item in the step queue that hasn't been processed by the EnvManager, the process
        # will hang until the item is processed. We avoid this behavior by using Queue.cancel_join_thread()
        # See https://docs.python.org/3/library/multiprocessing.html#multiprocessing.Queue.cancel_join_thread for
        # more info.
        logger.debug(f"UnityEnvironment worker {worker_id} closing.")
        step_queue.cancel_join_thread()
        step_queue.close()
        env.close()
        logger.debug(f"UnityEnvironment worker {worker_id} done.")
コード例 #7
0
def test_float_properties():
    sender = FloatPropertiesChannel()
    receiver = FloatPropertiesChannel()

    sender.set_property("prop1", 1.0)

    data = UnityEnvironment._generate_side_channel_data(
        {sender.channel_id: sender})
    UnityEnvironment._parse_side_channel_message(
        {receiver.channel_id: receiver}, data)

    val = receiver.get_property("prop1")
    assert val == 1.0
    val = receiver.get_property("prop2")
    assert val is None
    sender.set_property("prop2", 2.0)

    data = UnityEnvironment._generate_side_channel_data(
        {sender.channel_id: sender})
    UnityEnvironment._parse_side_channel_message(
        {receiver.channel_id: receiver}, data)

    val = receiver.get_property("prop1")
    assert val == 1.0
    val = receiver.get_property("prop2")
    assert val == 2.0
    assert len(receiver.list_properties()) == 2
    assert "prop1" in receiver.list_properties()
    assert "prop2" in receiver.list_properties()
    val = sender.get_property("prop1")
    assert val == 1.0

    assert receiver.get_property_dict_copy() == {"prop1": 1.0, "prop2": 2.0}
    assert receiver.get_property_dict_copy() == sender.get_property_dict_copy()
コード例 #8
0
def test_float_properties():
    sender = FloatPropertiesChannel()
    receiver = FloatPropertiesChannel()

    sender.set_property("prop1", 1.0)

    data = SideChannelManager([sender]).generate_side_channel_messages()
    SideChannelManager([receiver]).process_side_channel_message(data)

    val = receiver.get_property("prop1")
    assert val == 1.0
    val = receiver.get_property("prop2")
    assert val is None
    sender.set_property("prop2", 2.0)

    data = SideChannelManager([sender]).generate_side_channel_messages()
    SideChannelManager([receiver]).process_side_channel_message(data)

    val = receiver.get_property("prop1")
    assert val == 1.0
    val = receiver.get_property("prop2")
    assert val == 2.0
    assert len(receiver.list_properties()) == 2
    assert "prop1" in receiver.list_properties()
    assert "prop2" in receiver.list_properties()
    val = sender.get_property("prop1")
    assert val == 1.0

    assert receiver.get_property_dict_copy() == {"prop1": 1.0, "prop2": 2.0}
    assert receiver.get_property_dict_copy() == sender.get_property_dict_copy()
コード例 #9
0
class UnityContainer:
    """
    Class encapsulating the ML-Agents connection with Unity allowing easy interaction with the simulated environment
    """
    VISUAL_OBSERVATION_INDEX = 0
    VECTOR_OBSERVATIONS_INDEX = 1

    # Path of the pre-build environment
    BUILD_PATH = os.path.join(
        os.path.dirname(__file__),
        "../../Unity Environment/build/deep active inference agent environment"
    )

    def __init__(self, use_editor, time_scale=1):
        """
        Set up the Unity environment
        :param use_editor: Set to true to connect directly to the Unity editor, set to false to use the pre-build
        environment at BUILD_PATH
        :param time_scale: time_scale of the environment (1 is normal time)
        """
        if use_editor:
            self.env_path = None
        else:
            self.env_path = self.BUILD_PATH
        self.time_scale = time_scale
        self.env = None
        self.float_properties_channel = None
        self.group_name = None
        self.group_spec = None

    def initialise_environment(self):
        """Initialise and reset unity environment"""
        engine_configuration_channel = EngineConfigurationChannel()
        self.float_properties_channel = FloatPropertiesChannel()
        self.env = UnityEnvironment(file_name=self.env_path,
                                    base_port=5004,
                                    side_channels=[
                                        engine_configuration_channel,
                                        self.float_properties_channel
                                    ])

        # Reset the environment
        self.env.reset()

        # Set the default brain to work with
        self.group_name = self.env.get_agent_groups()[0]
        self.group_spec = self.env.get_agent_group_spec(self.group_name)

        # Set the time scale of the engine
        engine_configuration_channel.set_configuration_parameters(
            time_scale=self.time_scale)

    def set_condition(self, condition: Condition):
        """Sets the experimental condition setting"""
        self.float_properties_channel.set_property("condition",
                                                   condition.value)

    def set_visible_arm(self, visible_arm: VisibleArm):
        """Sets the visible arm setting"""
        self.float_properties_channel.set_property("visiblearm",
                                                   visible_arm.value)

    def set_stimulation(self, stimulation: Stimulation):
        """Sets the stimulation setting"""
        self.float_properties_channel.set_property("stimulation",
                                                   stimulation.value)

    def get_joint_observation(self):
        """:returns joint angles of the agent"""
        return self.env.get_step_result(
            self.group_name).obs[self.VECTOR_OBSERVATIONS_INDEX][:, :2]

    def get_touch_observation(self):
        """:returns last visual and tactile touch events"""
        return self.env.get_step_result(
            self.group_name).obs[self.VECTOR_OBSERVATIONS_INDEX][:, 2:4]

    def get_current_env_time(self):
        """:returns current env time"""
        return self.env.get_step_result(
            self.group_name).obs[self.VECTOR_OBSERVATIONS_INDEX][:, 4]

    def get_cartesian_distance(self):
        """:returns cartesian euclidean (absolute) distance between real hand and rubber hand"""
        return self.env.get_step_result(
            self.group_name).obs[self.VECTOR_OBSERVATIONS_INDEX][0, 5]

    def get_horizontal_distance(self):
        """:returns horizontal euclidean distance between real hand and rubber hand"""
        return self.env.get_step_result(
            self.group_name).obs[self.VECTOR_OBSERVATIONS_INDEX][0, 6]

    def get_rubber_joint_observation(self):
        """:returns joint angles of the rubber arm"""
        return self.env.get_step_result(
            self.group_name).obs[self.VECTOR_OBSERVATIONS_INDEX][:, 7:9]

    def get_visual_observation(self):
        """:returns visual perception of the agent"""
        return self.env.get_step_result(
            self.group_name).obs[self.VISUAL_OBSERVATION_INDEX][0]

    def act(self, action):
        """Make the agent perform an action (velocity) in the environment"""
        self.env.set_actions(self.group_name, np.append([[0]], action, axis=1))
        self.env.step()

    def set_rotation(self, rotation):
        """Manually set the joint angles to a particular rotation"""
        self.env.set_actions(self.group_name, np.append([[1]],
                                                        rotation,
                                                        axis=1))
        self.env.step()

    def set_rubber_arm_rotation(self, rotation):
        """Manually set the joint angles of the rubber arm to a particular rotation"""
        self.env.set_actions(self.group_name, np.append([[2]],
                                                        rotation,
                                                        axis=1))
        self.env.step()

    def reset(self):
        """Reset the environment"""
        self.env.reset()

    def close(self):
        """Gracefully close the environment"""
        self.env.close()