コード例 #1
    def __init__(self, environment_filename=None, docker_training=False, worker_id=0, retro=True,
                 timeout_wait=30, realtime_mode=False):
          environment_filename: The file path to the Unity executable.  Does not require the extension.
          docker_training: Whether this is running within a docker environment and should use a virtual 
            frame buffer (xvfb).
          worker_id: The index of the worker in the case where multiple environments are running.  Each 
            environment reserves port (5005 + worker_id) for communication with the Unity executable.
          retro: Resize visual observation to 84x84 (int8) and flattens action space.
          timeout_wait: Time for python interface to wait for environment to connect.
          realtime_mode: Whether to render the environment window image and run environment at realtime.
        if self.is_grading():
            environment_filename = None
            docker_training = True

        self._env = UnityEnvironment(environment_filename,

        split_name = self._env.academy_name.split('-v')
        if len(split_name) == 2 and split_name[0] == "ObstacleTower":
            self.name, self.version = split_name
            raise UnityGymException(
                "Attempting to launch non-Obstacle Tower environment"

        if self.version not in self.ALLOWED_VERSIONS:
            raise UnityGymException(
                "Invalid Obstacle Tower version.  Your build is v" + self.version +
                " but only the following versions are compatible with this gym: " +

        self.visual_obs = None
        self._current_state = None
        self._n_agents = None
        self._done_grading = False
        self._flattener = None
        self._seed = None
        self._floor = None
        self.realtime_mode = realtime_mode
        self.game_over = False  # Hidden flag used by Atari environments to determine if the game is over
        self.retro = retro

        flatten_branched = self.retro
        uint8_visual = self.retro

        # Check brain configuration
        if len(self._env.brains) != 1:
            raise UnityGymException(
                "There can only be one brain in a UnityEnvironment "
                "if it is wrapped in a gym.")
        self.brain_name = self._env.external_brain_names[0]
        brain = self._env.brains[self.brain_name]

        if brain.number_visual_observations == 0:
            raise UnityGymException("Environment provides no visual observations.")

        self.uint8_visual = uint8_visual

        if brain.number_visual_observations > 1:
            logger.warning("The environment contains more than one visual observation. "
                           "Please note that only the first will be provided in the observation.")

        # Check for number of agents in scene.
        initial_info = self._env.reset(train_mode=not self.realtime_mode)[self.brain_name]

        # Set observation and action spaces
        if len(brain.vector_action_space_size) == 1:
            self._action_space = spaces.Discrete(brain.vector_action_space_size[0])
            if flatten_branched:
                self._flattener = ActionFlattener(brain.vector_action_space_size)
                self._action_space = self._flattener.action_space
                self._action_space = spaces.MultiDiscrete(brain.vector_action_space_size)

        high = np.array([np.inf] * brain.vector_observation_space_size)
        self.action_meanings = brain.vector_action_descriptions

        depth = 3
        image_space_max = 1.0
        image_space_dtype = np.float32
        camera_height = brain.camera_resolutions[0]["height"]
        camera_width = brain.camera_resolutions[0]["width"]
        if self.retro:
            image_space_max = 255
            image_space_dtype = np.uint8
            camera_height = 84
            camera_width = 84

        image_space = spaces.Box(
            0, image_space_max,
            shape=(camera_height, camera_width, depth)
        if self.retro:
            self._observation_space = image_space
            max_float = np.finfo(np.float32).max
            keys_space = spaces.Discrete(5)
            time_remaining_space = spaces.Box(low=0.0, high=max_float, shape=(1,), dtype=np.float32)
            self._observation_space = spaces.Tuple(
                (image_space, keys_space, time_remaining_space)
    def done_grading(self):
        return self._done_grading

    def is_grading(self):
        return os.getenv('OTC_EVALUATION_ENABLED', False)

    def reset(self):
        """Resets the state of the environment and returns an initial observation.
        In the case of multi-agent environments, this is a list.
        Returns: observation (object/list): the initial observation of the
        reset_params = {}
        if self._floor is not None:
            reset_params['floor-number'] = self._floor
        if self._seed is not None:
            reset_params['tower-seed'] = self._seed

        info = self._env.reset(config=reset_params,
                               train_mode=not self.realtime_mode)[self.brain_name]
        n_agents = len(info.agents)
        self.game_over = False

        obs, reward, done, info = self._single_step(info)
        return obs

    def step(self, action):
        """Run one timestep of the environment's dynamics. When end of
        episode is reached, you are responsible for calling `reset()`
        to reset this environment's state.
        Accepts an action and returns a tuple (observation, reward, done, info).
        In the case of multi-agent environments, these are lists.
            action (object/list): an action provided by the environment
            observation (object/list): agent's observation of the current environment
            reward (float/list) : amount of reward returned after previous action
            done (boolean/list): whether the episode has ended.
            info (dict): contains auxiliary diagnostic information, including BrainInfo.

        # Use random actions for all other agents in environment.
        if self._flattener is not None:
            # Translate action into list
            action = self._flattener.lookup_action(action)

        info = self._env.step(action)[self.brain_name]
        n_agents = len(info.agents)
        self._current_state = info

        obs, reward, done, info = self._single_step(info)
        self.game_over = done

        if info.get('text_observation') == 'evaluation_complete':
            done = True
            self._done_grading = True

        return obs, reward, done, info

    def _single_step(self, info):
        self.visual_obs = self._preprocess_single(info.visual_observations[0][0, :, :, :])

        if self.retro:
            self.visual_obs = self._resize_observation(self.visual_obs)
            self.visual_obs = self._add_stats_to_image(
                self.visual_obs, info.vector_observations[0])
            default_observation = self.visual_obs
            default_observation = self._prepare_tuple_observation(
                self.visual_obs, info.vector_observations[0])

        return default_observation, info.rewards[0], info.local_done[0], {
            "text_observation": info.text_observations[0],
            "brain_info": info}

    def _preprocess_single(self, single_visual_obs):
        if self.uint8_visual:
            return (255.0 * single_visual_obs).astype(np.uint8)
            return single_visual_obs

    def render(self, mode='rgb_array'):
        return self.visual_obs

    def close(self):
        """Override _close in your subclass to perform any necessary cleanup.
        Environments will automatically close() themselves when
        garbage collected or when the program exits.
        if self.is_grading():
            import time
            while True:

    def get_action_meanings(self):
        return self.action_meanings

    def seed(self, seed=None):
        """Sets a fixed seed for this env's random number generator(s).
        The valid range for seeds is [0, 100). By default a random seed
        will be chosen.
        if seed is None:
            self._seed = seed

        seed = int(seed)
        if seed < 0 or seed >= 100:
                "Seed outside of valid range [0, 100). A random seed "
                "within the valid range will be used on next reset."
        logger.warn("New seed " + str(seed) + " will apply on next reset.")
        self._seed = seed

    def floor(self, floor=None):
        """Sets the starting floor to a fixed floor number on subsequent environment
        if floor is None:
            self._floor = floor

        floor = int(floor)
        if floor < 0 or floor >= 25:
                "Starting floor outside of valid range [0, 25). Floor 0 will be used"
                "on next reset."
        logger.warn("New starting floor " + str(floor) + " will apply on next reset.")
        self._floor = floor

    def _resize_observation(observation):
        Re-sizes visual observation to 84x84
        obs_image = Image.fromarray(observation)
        obs_image = obs_image.resize((84, 84), Image.NEAREST)
        return np.array(obs_image)

    def _prepare_tuple_observation(vis_obs, vector_obs):
        Converts separate visual and vector observation into prepared tuple
        key = vector_obs[0:6]
        time = vector_obs[6]
        key_num = np.argmax(key, axis=0)
        return vis_obs, key_num, time

    def _add_stats_to_image(vis_obs, vector_obs):
        Displays time left and number of keys on visual observation
        key = vector_obs[0:6]
        time = vector_obs[6]
        key_num = np.argmax(key, axis=0)
        time_num = min(time, 10000) / 10000

        vis_obs[0:10, :, :] = 0
        for i in range(key_num):
            start = int(i * 16.8) + 4
            end = start + 10
            vis_obs[1:5, start:end, 0:2] = 255
        vis_obs[6:10, 0:int(time_num * 84), 1] = 255
        return vis_obs

    def _check_agents(self, n_agents):
        if n_agents > 1:
            raise UnityGymException(
                "The environment was launched as a single-agent environment, however"
                "there is more than one agent in the scene.")
        if self._n_agents is None:
            self._n_agents = n_agents
            logger.info("{} agents within environment.".format(n_agents))
        elif self._n_agents != n_agents:
            raise UnityGymException("The number of agents in the environment has changed since "
                                    "initialization. This is not supported.")

    def metadata(self):
        return {'render.modes': ['rgb_array']}

    def reward_range(self):
        return -float('inf'), float('inf')

    def spec(self):
        return None

    def action_space(self):
        return self._action_space

    def observation_space(self):
        return self._observation_space

    def number_agents(self):
        return self._n_agents