コード例 #1
0
class IndoorEnvironment(environment.Environment):

  ACTION_LIST = [
    [1,0,0],
    [0,1,0],
    [0,0,1]
  ]

  @staticmethod
  def get_action_size(env_name):
    return len(IndoorEnvironment.ACTION_LIST)

  @staticmethod
  def get_objective_size(env_name):
    simargs = sim_config.get(env_name)
    return simargs.get('objective_size', 0)

  def __init__(self, env_name, env_args, thread_index):
    environment.Environment.__init__(self)
    
    self.last_state = None
    self.last_action = 0
    self.last_reward = 0

    simargs = sim_config.get(env_name)
    simargs['id'] = 'sim%02d' % thread_index
    simargs['logdir'] = os.path.join(IndoorEnvironment.get_log_dir(), simargs['id'])

    # Merge in extra env args
    if env_args is not None:
      simargs.update(env_args)

    self._sim = RoomSimulator(simargs)
    self._sim_obs_space = self._sim.get_observation_space(simargs['outputs'])
    self.reset()

  def reset(self):
    result = self._sim.reset()
    
    self._episode_info = result.get('episode_info')
    self._last_full_state = result.get('observation')
    obs = self._last_full_state['observation']['sensors']['color']['data']
    objective = self._last_full_state.get('measurements')
    state = { 'image': self._preprocess_frame(obs), 'objective': objective }
    self.last_state = state
    self.last_action = 0
    self.last_reward = 0

  def stop(self):
    if self._sim is not None:
        self._sim.close_game()

  def _preprocess_frame(self, image):
    if len(image.shape) == 2:  # assume gray
        image = np.dstack([image, image, image])
    else:  # assume rgba
        image = image[:, :, :-1]
    image = image.astype(np.float32)
    image = image / 255.0
    return image

  def process(self, action):
    real_action = IndoorEnvironment.ACTION_LIST[action]

    full_state = self._sim.step(real_action)
    self._last_full_state = full_state  # Last observed state
    obs = full_state['observation']['sensors']['color']['data']
    reward = full_state['rewards']
    terminal = full_state['terminals']
    objective = full_state.get('measurements')

    if not terminal:
      state = { 'image': self._preprocess_frame(obs), 'objective': objective }
    else:
      state = self.last_state

    pixel_change = self._calc_pixel_change(state['image'], self.last_state['image'])
    self.last_state = state
    self.last_action = action
    self.last_reward = reward
    return state, reward, terminal, pixel_change

  def is_all_scheduled_episodes_done(self):
    return self._sim.is_all_scheduled_episodes_done()
コード例 #2
0
class IndoorEnvironment(environment.Environment):

    ACTION_LIST = [[1, 0, 0], [0, 1, 0], [0, 0, 1]]

    @staticmethod
    def get_action_size(env_name):
        return len(IndoorEnvironment.ACTION_LIST)

    @staticmethod
    def get_objective_size(env_name):
        simargs = sim_config.get(env_name)
        return simargs.get('objective_size', 0)

    def __init__(self, env_name, env_args, termination_time, thread_index):
        environment.Environment.__init__(self)
        try:
            self.last_state = None
            self.last_action = 0
            self.last_reward = 0

            self.prev_state = None
            self.prev_action = 0
            self.prev_reward = 0

            simargs = sim_config.get(env_name)
            simargs['id'] = 'sim%02d' % thread_index
            simargs['logdir'] = os.path.join(IndoorEnvironment.get_log_dir(),
                                             simargs['id'])

            # Merge in extra env args
            if env_args is not None:
                simargs.update(env_args)

            simargs["measure_fun"].termination_time = termination_time

            self.termination_time = termination_time

            # try:
            self._sim = RoomSimulator(simargs)
            self._sim_obs_space = self._sim.get_observation_space(
                simargs['outputs'])
            self.reset()
        except Exception as e:
            print("Error in indoor_env init():", str(e))  #, flush=True)
            raise Exception

    def reset(self):
        result = self._sim.reset()

        self._episode_info = result.get('episode_info')
        self._last_full_state = result.get('observation')
        img = self._last_full_state['observation']['sensors']['color']['data']
        objective = self._last_full_state.get(
            'measurements')  # with measure function!
        state = {'image': self._preprocess_frame(img), 'objective': objective}
        object_type = self._last_full_state["observation"]["sensors"].get(
            "objectType", None)
        if object_type is not None:
            object_type = object_type["data"][:, :, 2]
            state.update(
                {'objectType': self._preprocess_frame(object_type, "segm")})

        # print(object_type.shape)
        self.last_state = state
        self.last_action = 0
        self.last_reward = 0

        self.prev_state = None
        self.prev_action = 0
        self.prev_reward = 0

    def stop(self):
        if self._sim is not None:
            self._sim.close_game()

    def _preprocess_frame(self, image, mode="segm"):
        if len(image.shape) == 2:  # assume object_type or depth
            image = image.reshape((image.shape[1], image.shape[0]))
            if "segm" in mode:
                image[image == 255] = 0
                return image.astype(np.int32)
            #image = np.dstack([image, image, image])
        else:  # assume rgba
            image = image[:, :, :-1]
        image = image.reshape((image.shape[1], image.shape[0], image.shape[2]))
        #print(image.shape)
        #Reshape is essential, when non-square image from simulator!
        image = image.astype(np.float32)
        image = image / 255.0
        return image

    def process(self, action, flag=1):
        real_action = IndoorEnvironment.ACTION_LIST[action]

        full_state = self._sim.step(real_action, flag=flag)
        #print("Step made")
        self._last_full_state = full_state  # Last observed state
        obs = full_state['observation']['sensors']['color']['data']
        reward = full_state[
            'rewards'] / self.termination_time  # reward clipping
        terminal = full_state['terminals']
        objective = full_state.get('measurements')
        object_type = self._last_full_state["observation"]["sensors"].get(
            "objectType", None)

        if not terminal:
            state = {
                'image': self._preprocess_frame(obs),
                'objective': objective
            }
            if object_type is not None:
                object_type = object_type["data"][:, :, 2]
                state.update({
                    'objectType':
                    self._preprocess_frame(object_type, "segm")
                })

        else:
            state = self.last_state

        pixel_change = None
        if object_type is None:
            pixel_change = self._calc_pixel_change(state['image'],
                                                   self.last_state['image'])

        self.prev_state = self.last_state
        self.prev_action = self.last_action
        self.prev_reward = self.last_reward

        self.last_state = state
        self.last_action = action
        self.last_reward = reward
        return state, reward, terminal, pixel_change

    def is_all_scheduled_episodes_done(self):
        return self._sim.is_all_scheduled_episodes_done()
コード例 #3
0
class IndoorEnv(gym.Env):
    metadata = {'render.modes': ['human', 'rgb_array']}

    def __init__(self):
        self._last_state = None
        self._sim = None
        self.viewer = None

    def configure(self, sim_args):
        self._sim = RoomSimulator(sim_args)
        #signal.signal(signal.SIGINT, self.signal_handler)
        self._sim_obs_space = self._sim.get_observation_space(sim_args['outputs'])
        #self.action_space = spaces.Discrete(self._sim.num_buttons)
        self.action_space = spaces.MultiBinary(self._sim.num_buttons)
        self.screen_height = self._sim_obs_space['color'].shape[1]
        self.screen_width = self._sim_obs_space['color'].shape[0]
        self.observation_space = spaces.Box(low=0, high=255, 
            shape=(self.screen_height, self.screen_width, 3))
        # TODO: have more complex observation space with additional modalities and measurements
        # obs_space = self._sim.get_observation_space
        #self.observation_space = spaces.Dict({"images": ..., "depth": ...})

    @property
    def simulator(self):
        return self._sim.sim

    def _seed(self, seed=0xA3C):
        """Sets the seed for this env's random number generator(s).
        Note:
            Some environments use multiple pseudorandom number generators.
            We want to capture all such seeds used in order to ensure that
            there aren't accidental correlations between multiple generators.
        Returns:
            list<bigint>: Returns the list of seeds used in this env's random
              number generators. The first value in the list should be the
              "main" seed, or the value which a reproducer should pass to
              'seed'. Often, the main seed equals the provided 'seed', but
              this won't be true if seed=None, for example.
        """
        # TODO: generate another seed for use in simulator? 
        # What happens to this seed?
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    def _reset(self):
        """Resets the state of the environment and returns an initial observation.
        Returns: observation (object): the initial observation of the
            space.
        """
        res = self._sim.reset()
        return res.get('observation')

    def _step(self, action):
        """Run one timestep of the environment's dynamics. When end of
        episode is reached, you are responsible for calling `reset()`
        to reset this environment's state.
        Accepts an action and returns a tuple (observation, reward, done, info).
        Args:
            action (object): an action provided by the environment
        Returns:
            observation (object): agent's observation of the current environment
            reward (float) : amount of reward returned after previous action
            done (boolean): whether the episode has ended, in which case further step() calls will return undefined results
            info (dict): contains auxiliary diagnostic information (helpful for debugging, and sometimes learning)
        """
        ## a = [0]*self._sim.num_buttons
        ## a[action] = 1
        state = self._sim.step(action)
        self._last_state = state  # Last observed state
        observation = {k:v for k,v in state.items() if k not in ['rewards','terminals']}
        info = state['info']
        return observation, state['rewards'], state['terminals'], info

    def _render(self, mode='human', close=False):
        """Renders the environment.
        The set of supported modes varies per environment. (And some
        environments do not support rendering at all.) By convention,
        if mode is:
        - human: render to the current display or terminal and
          return nothing. Usually for human consumption.
        - rgb_array: Return an numpy.ndarray with shape (x, y, 3),
          representing RGB values for an x-by-y pixel image, suitable
          for turning into a video.
        - ansi: Return a string (str) or StringIO.StringIO containing a
          terminal-style text representation. The text can include newlines
          and ANSI escape sequences (e.g. for colors).
        Note:
            Make sure that your class's metadata 'render.modes' key includes
              the list of supported modes. It's recommended to call super()
              in implementations to use the functionality of this method.
        Args:
            mode (str): the mode to render with
            close (bool): close all open renderings
        """
        if close:
            if self.viewer is not None:
                self.viewer.close()
                self.viewer = None      # If we don't None out this reference pyglet becomes unhappy
            return
        if self._last_state is not None:
            img = self._last_state['observation']['sensors']['color']['data']
            if len(img.shape) == 2:  # assume gray
                img = np.dstack([img, img, img])
            else:  # assume rgba
                img = img[:, :, :-1]
            img = img.reshape((img.shape[1], img.shape[0], img.shape[2]))
            if mode == 'human':
                from gym.envs.classic_control import rendering
                if self.viewer is None:
                    if self.viewer is None:
                        self.viewer = rendering.SimpleImageViewer()
                self.viewer.imshow(img)
            elif mode == 'rgb_array':
                return img

    def _close(self):
        if self._sim is not None:
            self._sim.close_game()
コード例 #4
0
class IndoorEnvironment(environment.Environment):

    ACTION_LIST = [[1, 0, 0], [0, 1, 0], [0, 0, 1]]

    @staticmethod
    def get_action_size(env_name):
        return len(IndoorEnvironment.ACTION_LIST)

    @staticmethod
    def get_objective_size(env_name):
        simargs = sim_config.get(env_name)
        return simargs.get('objective_size', 0)

    def __init__(self, env_name, env_args, thread_index):
        environment.Environment.__init__(self)
        self.i_episode = 0

        self.last_state = None
        self.last_action = 0
        self.last_reward = 0

        simargs = sim_config.get(env_name)
        simargs['id'] = 'sim%02d' % thread_index
        simargs['logdir'] = os.path.join(IndoorEnvironment.get_log_dir(),
                                         simargs['id'])
        self.viewer = rendering.SimpleImageViewer()

        # Merge in extra env args
        if env_args is not None:
            simargs.update(env_args)

        print(simargs)
        self._sim = RoomSimulator(simargs)
        self._sim_obs_space = self._sim.get_observation_space(
            simargs['outputs'])
        self.reset()

    def render(self, img):
        img = img[:, :, :-1]
        img = img.reshape((img.shape[1], img.shape[0], img.shape[2]))
        img = cv2.resize(img, (512, 512), cv2.INTER_CUBIC)
        self.viewer.imshow(img)
        time.sleep(.1)

    def reset(self):
        result = self._sim.reset()

        self._episode_info = result.get('episode_info')
        self._last_full_state = result.get('observation')
        obs = self._last_full_state['observation']['sensors']['color']['data']
        # self.render(obs)
        objective = self._last_full_state.get('measurements')
        state = {'image': self._preprocess_frame(obs), 'objective': objective}
        self.last_state = state
        self.last_action = 0
        self.last_reward = 0
        # self.i_episode = self.i_episode + 1
        # print("Saving episode {}".format(self.i_episode))
        # self.directory = "./{}".format(self.i_episode)
        # os.mkdir(self.directory)
        # with open(os.path.join(self.directory, "episode_info.txt"), "w") as outfile:
        #     json.dump(self._episode_info, outfile, indent=4, cls=NumpyEncoder)
        # self.i = 0

    def stop(self):
        if self._sim is not None:
            self._sim.close_game()

    def _preprocess_frame(self, image):
        if len(image.shape) == 2:  # assume gray
            image = np.dstack([image, image, image])
        else:  # assume rgba
            image = image[:, :, :-1]
        image = image.astype(np.float32)
        image = image / 255.0
        return image

    def process(self, action):
        real_action = IndoorEnvironment.ACTION_LIST[action]

        full_state = self._sim.step(real_action)
        self._last_full_state = full_state  # Last observed state
        obs = full_state['observation']['sensors']['color']['data']
        # self.render(obs)
        # depth = full_state['observation']['sensors']['depth']['data']
        # Image.fromarray(obs.astype('uint8')).save(os.path.join(self.directory, 'color{}.png'.format(self.i)))
        # Image.fromarray(depth, 'L').save(os.path.join(self.directory, 'depth{}.png'.format(self.i)))
        # self.i+=1
        reward = full_state['rewards']
        terminal = full_state['terminals']
        success = full_state['success']
        objective = full_state.get('measurements')

        if not terminal:
            state = {
                'image': self._preprocess_frame(obs),
                'objective': objective
            }
        else:
            state = self.last_state

        pixel_change = self._calc_pixel_change(state['image'],
                                               self.last_state['image'])
        self.last_state = state
        self.last_action = action
        self.last_reward = reward
        return state, reward, terminal, pixel_change, success

    def is_all_scheduled_episodes_done(self):
        return self._sim.is_all_scheduled_episodes_done()