コード例 #1
class IndoorEnvironment(environment.Environment):


  def get_action_size(env_name):
    return len(IndoorEnvironment.ACTION_LIST)

  def get_objective_size(env_name):
    simargs = sim_config.get(env_name)
    return simargs.get('objective_size', 0)

  def __init__(self, env_name, env_args, thread_index):
    self.last_state = None
    self.last_action = 0
    self.last_reward = 0

    simargs = sim_config.get(env_name)
    simargs['id'] = 'sim%02d' % thread_index
    simargs['logdir'] = os.path.join(IndoorEnvironment.get_log_dir(), simargs['id'])

    # Merge in extra env args
    if env_args is not None:

    self._sim = RoomSimulator(simargs)
    self._sim_obs_space = self._sim.get_observation_space(simargs['outputs'])

  def reset(self):
    result = self._sim.reset()
    self._episode_info = result.get('episode_info')
    self._last_full_state = result.get('observation')
    obs = self._last_full_state['observation']['sensors']['color']['data']
    objective = self._last_full_state.get('measurements')
    state = { 'image': self._preprocess_frame(obs), 'objective': objective }
    self.last_state = state
    self.last_action = 0
    self.last_reward = 0

  def stop(self):
    if self._sim is not None:

  def _preprocess_frame(self, image):
    if len(image.shape) == 2:  # assume gray
        image = np.dstack([image, image, image])
    else:  # assume rgba
        image = image[:, :, :-1]
    image = image.astype(np.float32)
    image = image / 255.0
    return image

  def process(self, action):
    real_action = IndoorEnvironment.ACTION_LIST[action]

    full_state = self._sim.step(real_action)
    self._last_full_state = full_state  # Last observed state
    obs = full_state['observation']['sensors']['color']['data']
    reward = full_state['rewards']
    terminal = full_state['terminals']
    objective = full_state.get('measurements')

    if not terminal:
      state = { 'image': self._preprocess_frame(obs), 'objective': objective }
      state = self.last_state

    pixel_change = self._calc_pixel_change(state['image'], self.last_state['image'])
    self.last_state = state
    self.last_action = action
    self.last_reward = reward
    return state, reward, terminal, pixel_change

  def is_all_scheduled_episodes_done(self):
    return self._sim.is_all_scheduled_episodes_done()
コード例 #2
class IndoorEnvironment(environment.Environment):

    ACTION_LIST = [[1, 0, 0], [0, 1, 0], [0, 0, 1]]

    def get_action_size(env_name):
        return len(IndoorEnvironment.ACTION_LIST)

    def get_objective_size(env_name):
        simargs = sim_config.get(env_name)
        return simargs.get('objective_size', 0)

    def __init__(self, env_name, env_args, termination_time, thread_index):
            self.last_state = None
            self.last_action = 0
            self.last_reward = 0

            self.prev_state = None
            self.prev_action = 0
            self.prev_reward = 0

            simargs = sim_config.get(env_name)
            simargs['id'] = 'sim%02d' % thread_index
            simargs['logdir'] = os.path.join(IndoorEnvironment.get_log_dir(),

            # Merge in extra env args
            if env_args is not None:

            simargs["measure_fun"].termination_time = termination_time

            self.termination_time = termination_time

            # try:
            self._sim = RoomSimulator(simargs)
            self._sim_obs_space = self._sim.get_observation_space(
        except Exception as e:
            print("Error in indoor_env init():", str(e))  #, flush=True)
            raise Exception

    def reset(self):
        result = self._sim.reset()

        self._episode_info = result.get('episode_info')
        self._last_full_state = result.get('observation')
        img = self._last_full_state['observation']['sensors']['color']['data']
        objective = self._last_full_state.get(
            'measurements')  # with measure function!
        state = {'image': self._preprocess_frame(img), 'objective': objective}
        object_type = self._last_full_state["observation"]["sensors"].get(
            "objectType", None)
        if object_type is not None:
            object_type = object_type["data"][:, :, 2]
                {'objectType': self._preprocess_frame(object_type, "segm")})

        # print(object_type.shape)
        self.last_state = state
        self.last_action = 0
        self.last_reward = 0

        self.prev_state = None
        self.prev_action = 0
        self.prev_reward = 0

    def stop(self):
        if self._sim is not None:

    def _preprocess_frame(self, image, mode="segm"):
        if len(image.shape) == 2:  # assume object_type or depth
            image = image.reshape((image.shape[1], image.shape[0]))
            if "segm" in mode:
                image[image == 255] = 0
                return image.astype(np.int32)
            #image = np.dstack([image, image, image])
        else:  # assume rgba
            image = image[:, :, :-1]
        image = image.reshape((image.shape[1], image.shape[0], image.shape[2]))
        #Reshape is essential, when non-square image from simulator!
        image = image.astype(np.float32)
        image = image / 255.0
        return image

    def process(self, action, flag=1):
        real_action = IndoorEnvironment.ACTION_LIST[action]

        full_state = self._sim.step(real_action, flag=flag)
        #print("Step made")
        self._last_full_state = full_state  # Last observed state
        obs = full_state['observation']['sensors']['color']['data']
        reward = full_state[
            'rewards'] / self.termination_time  # reward clipping
        terminal = full_state['terminals']
        objective = full_state.get('measurements')
        object_type = self._last_full_state["observation"]["sensors"].get(
            "objectType", None)

        if not terminal:
            state = {
                'image': self._preprocess_frame(obs),
                'objective': objective
            if object_type is not None:
                object_type = object_type["data"][:, :, 2]
                    self._preprocess_frame(object_type, "segm")

            state = self.last_state

        pixel_change = None
        if object_type is None:
            pixel_change = self._calc_pixel_change(state['image'],

        self.prev_state = self.last_state
        self.prev_action = self.last_action
        self.prev_reward = self.last_reward

        self.last_state = state
        self.last_action = action
        self.last_reward = reward
        return state, reward, terminal, pixel_change

    def is_all_scheduled_episodes_done(self):
        return self._sim.is_all_scheduled_episodes_done()
コード例 #3
class IndoorEnv(gym.Env):
    metadata = {'render.modes': ['human', 'rgb_array']}

    def __init__(self):
        self._last_state = None
        self._sim = None
        self.viewer = None

    def configure(self, sim_args):
        self._sim = RoomSimulator(sim_args)
        #signal.signal(signal.SIGINT, self.signal_handler)
        self._sim_obs_space = self._sim.get_observation_space(sim_args['outputs'])
        #self.action_space = spaces.Discrete(self._sim.num_buttons)
        self.action_space = spaces.MultiBinary(self._sim.num_buttons)
        self.screen_height = self._sim_obs_space['color'].shape[1]
        self.screen_width = self._sim_obs_space['color'].shape[0]
        self.observation_space = spaces.Box(low=0, high=255, 
            shape=(self.screen_height, self.screen_width, 3))
        # TODO: have more complex observation space with additional modalities and measurements
        # obs_space = self._sim.get_observation_space
        #self.observation_space = spaces.Dict({"images": ..., "depth": ...})

    def simulator(self):
        return self._sim.sim

    def _seed(self, seed=0xA3C):
        """Sets the seed for this env's random number generator(s).
            Some environments use multiple pseudorandom number generators.
            We want to capture all such seeds used in order to ensure that
            there aren't accidental correlations between multiple generators.
            list<bigint>: Returns the list of seeds used in this env's random
              number generators. The first value in the list should be the
              "main" seed, or the value which a reproducer should pass to
              'seed'. Often, the main seed equals the provided 'seed', but
              this won't be true if seed=None, for example.
        # TODO: generate another seed for use in simulator? 
        # What happens to this seed?
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    def _reset(self):
        """Resets the state of the environment and returns an initial observation.
        Returns: observation (object): the initial observation of the
        res = self._sim.reset()
        return res.get('observation')

    def _step(self, action):
        """Run one timestep of the environment's dynamics. When end of
        episode is reached, you are responsible for calling `reset()`
        to reset this environment's state.
        Accepts an action and returns a tuple (observation, reward, done, info).
            action (object): an action provided by the environment
            observation (object): agent's observation of the current environment
            reward (float) : amount of reward returned after previous action
            done (boolean): whether the episode has ended, in which case further step() calls will return undefined results
            info (dict): contains auxiliary diagnostic information (helpful for debugging, and sometimes learning)
        ## a = [0]*self._sim.num_buttons
        ## a[action] = 1
        state = self._sim.step(action)
        self._last_state = state  # Last observed state
        observation = {k:v for k,v in state.items() if k not in ['rewards','terminals']}
        info = state['info']
        return observation, state['rewards'], state['terminals'], info

    def _render(self, mode='human', close=False):
        """Renders the environment.
        The set of supported modes varies per environment. (And some
        environments do not support rendering at all.) By convention,
        if mode is:
        - human: render to the current display or terminal and
          return nothing. Usually for human consumption.
        - rgb_array: Return an numpy.ndarray with shape (x, y, 3),
          representing RGB values for an x-by-y pixel image, suitable
          for turning into a video.
        - ansi: Return a string (str) or StringIO.StringIO containing a
          terminal-style text representation. The text can include newlines
          and ANSI escape sequences (e.g. for colors).
            Make sure that your class's metadata 'render.modes' key includes
              the list of supported modes. It's recommended to call super()
              in implementations to use the functionality of this method.
            mode (str): the mode to render with
            close (bool): close all open renderings
        if close:
            if self.viewer is not None:
                self.viewer = None      # If we don't None out this reference pyglet becomes unhappy
        if self._last_state is not None:
            img = self._last_state['observation']['sensors']['color']['data']
            if len(img.shape) == 2:  # assume gray
                img = np.dstack([img, img, img])
            else:  # assume rgba
                img = img[:, :, :-1]
            img = img.reshape((img.shape[1], img.shape[0], img.shape[2]))
            if mode == 'human':
                from gym.envs.classic_control import rendering
                if self.viewer is None:
                    if self.viewer is None:
                        self.viewer = rendering.SimpleImageViewer()
            elif mode == 'rgb_array':
                return img

    def _close(self):
        if self._sim is not None:
コード例 #4
class IndoorEnvironment(environment.Environment):

    ACTION_LIST = [[1, 0, 0], [0, 1, 0], [0, 0, 1]]

    def get_action_size(env_name):
        return len(IndoorEnvironment.ACTION_LIST)

    def get_objective_size(env_name):
        simargs = sim_config.get(env_name)
        return simargs.get('objective_size', 0)

    def __init__(self, env_name, env_args, thread_index):
        self.i_episode = 0

        self.last_state = None
        self.last_action = 0
        self.last_reward = 0

        simargs = sim_config.get(env_name)
        simargs['id'] = 'sim%02d' % thread_index
        simargs['logdir'] = os.path.join(IndoorEnvironment.get_log_dir(),
        self.viewer = rendering.SimpleImageViewer()

        # Merge in extra env args
        if env_args is not None:

        self._sim = RoomSimulator(simargs)
        self._sim_obs_space = self._sim.get_observation_space(

    def render(self, img):
        img = img[:, :, :-1]
        img = img.reshape((img.shape[1], img.shape[0], img.shape[2]))
        img = cv2.resize(img, (512, 512), cv2.INTER_CUBIC)

    def reset(self):
        result = self._sim.reset()

        self._episode_info = result.get('episode_info')
        self._last_full_state = result.get('observation')
        obs = self._last_full_state['observation']['sensors']['color']['data']
        # self.render(obs)
        objective = self._last_full_state.get('measurements')
        state = {'image': self._preprocess_frame(obs), 'objective': objective}
        self.last_state = state
        self.last_action = 0
        self.last_reward = 0
        # self.i_episode = self.i_episode + 1
        # print("Saving episode {}".format(self.i_episode))
        # self.directory = "./{}".format(self.i_episode)
        # os.mkdir(self.directory)
        # with open(os.path.join(self.directory, "episode_info.txt"), "w") as outfile:
        #     json.dump(self._episode_info, outfile, indent=4, cls=NumpyEncoder)
        # self.i = 0

    def stop(self):
        if self._sim is not None:

    def _preprocess_frame(self, image):
        if len(image.shape) == 2:  # assume gray
            image = np.dstack([image, image, image])
        else:  # assume rgba
            image = image[:, :, :-1]
        image = image.astype(np.float32)
        image = image / 255.0
        return image

    def process(self, action):
        real_action = IndoorEnvironment.ACTION_LIST[action]

        full_state = self._sim.step(real_action)
        self._last_full_state = full_state  # Last observed state
        obs = full_state['observation']['sensors']['color']['data']
        # self.render(obs)
        # depth = full_state['observation']['sensors']['depth']['data']
        # Image.fromarray(obs.astype('uint8')).save(os.path.join(self.directory, 'color{}.png'.format(self.i)))
        # Image.fromarray(depth, 'L').save(os.path.join(self.directory, 'depth{}.png'.format(self.i)))
        # self.i+=1
        reward = full_state['rewards']
        terminal = full_state['terminals']
        success = full_state['success']
        objective = full_state.get('measurements')

        if not terminal:
            state = {
                'image': self._preprocess_frame(obs),
                'objective': objective
            state = self.last_state

        pixel_change = self._calc_pixel_change(state['image'],
        self.last_state = state
        self.last_action = action
        self.last_reward = reward
        return state, reward, terminal, pixel_change, success

    def is_all_scheduled_episodes_done(self):
        return self._sim.is_all_scheduled_episodes_done()