class IndoorEnvironment(environment.Environment): ACTION_LIST = [ [1,0,0], [0,1,0], [0,0,1] ] @staticmethod def get_action_size(env_name): return len(IndoorEnvironment.ACTION_LIST) @staticmethod def get_objective_size(env_name): simargs = sim_config.get(env_name) return simargs.get('objective_size', 0) def __init__(self, env_name, env_args, thread_index): environment.Environment.__init__(self) self.last_state = None self.last_action = 0 self.last_reward = 0 simargs = sim_config.get(env_name) simargs['id'] = 'sim%02d' % thread_index simargs['logdir'] = os.path.join(IndoorEnvironment.get_log_dir(), simargs['id']) # Merge in extra env args if env_args is not None: simargs.update(env_args) self._sim = RoomSimulator(simargs) self._sim_obs_space = self._sim.get_observation_space(simargs['outputs']) self.reset() def reset(self): result = self._sim.reset() self._episode_info = result.get('episode_info') self._last_full_state = result.get('observation') obs = self._last_full_state['observation']['sensors']['color']['data'] objective = self._last_full_state.get('measurements') state = { 'image': self._preprocess_frame(obs), 'objective': objective } self.last_state = state self.last_action = 0 self.last_reward = 0 def stop(self): if self._sim is not None: self._sim.close_game() def _preprocess_frame(self, image): if len(image.shape) == 2: # assume gray image = np.dstack([image, image, image]) else: # assume rgba image = image[:, :, :-1] image = image.astype(np.float32) image = image / 255.0 return image def process(self, action): real_action = IndoorEnvironment.ACTION_LIST[action] full_state = self._sim.step(real_action) self._last_full_state = full_state # Last observed state obs = full_state['observation']['sensors']['color']['data'] reward = full_state['rewards'] terminal = full_state['terminals'] objective = full_state.get('measurements') if not terminal: state = { 'image': self._preprocess_frame(obs), 'objective': objective } else: state = self.last_state pixel_change = self._calc_pixel_change(state['image'], self.last_state['image']) self.last_state = state self.last_action = action self.last_reward = reward return state, reward, terminal, pixel_change def is_all_scheduled_episodes_done(self): return self._sim.is_all_scheduled_episodes_done()
class IndoorEnvironment(environment.Environment): ACTION_LIST = [[1, 0, 0], [0, 1, 0], [0, 0, 1]] @staticmethod def get_action_size(env_name): return len(IndoorEnvironment.ACTION_LIST) @staticmethod def get_objective_size(env_name): simargs = sim_config.get(env_name) return simargs.get('objective_size', 0) def __init__(self, env_name, env_args, termination_time, thread_index): environment.Environment.__init__(self) try: self.last_state = None self.last_action = 0 self.last_reward = 0 self.prev_state = None self.prev_action = 0 self.prev_reward = 0 simargs = sim_config.get(env_name) simargs['id'] = 'sim%02d' % thread_index simargs['logdir'] = os.path.join(IndoorEnvironment.get_log_dir(), simargs['id']) # Merge in extra env args if env_args is not None: simargs.update(env_args) simargs["measure_fun"].termination_time = termination_time self.termination_time = termination_time # try: self._sim = RoomSimulator(simargs) self._sim_obs_space = self._sim.get_observation_space( simargs['outputs']) self.reset() except Exception as e: print("Error in indoor_env init():", str(e)) #, flush=True) raise Exception def reset(self): result = self._sim.reset() self._episode_info = result.get('episode_info') self._last_full_state = result.get('observation') img = self._last_full_state['observation']['sensors']['color']['data'] objective = self._last_full_state.get( 'measurements') # with measure function! state = {'image': self._preprocess_frame(img), 'objective': objective} object_type = self._last_full_state["observation"]["sensors"].get( "objectType", None) if object_type is not None: object_type = object_type["data"][:, :, 2] state.update( {'objectType': self._preprocess_frame(object_type, "segm")}) # print(object_type.shape) self.last_state = state self.last_action = 0 self.last_reward = 0 self.prev_state = None self.prev_action = 0 self.prev_reward = 0 def stop(self): if self._sim is not None: self._sim.close_game() def _preprocess_frame(self, image, mode="segm"): if len(image.shape) == 2: # assume object_type or depth image = image.reshape((image.shape[1], image.shape[0])) if "segm" in mode: image[image == 255] = 0 return image.astype(np.int32) #image = np.dstack([image, image, image]) else: # assume rgba image = image[:, :, :-1] image = image.reshape((image.shape[1], image.shape[0], image.shape[2])) #print(image.shape) #Reshape is essential, when non-square image from simulator! image = image.astype(np.float32) image = image / 255.0 return image def process(self, action, flag=1): real_action = IndoorEnvironment.ACTION_LIST[action] full_state = self._sim.step(real_action, flag=flag) #print("Step made") self._last_full_state = full_state # Last observed state obs = full_state['observation']['sensors']['color']['data'] reward = full_state[ 'rewards'] / self.termination_time # reward clipping terminal = full_state['terminals'] objective = full_state.get('measurements') object_type = self._last_full_state["observation"]["sensors"].get( "objectType", None) if not terminal: state = { 'image': self._preprocess_frame(obs), 'objective': objective } if object_type is not None: object_type = object_type["data"][:, :, 2] state.update({ 'objectType': self._preprocess_frame(object_type, "segm") }) else: state = self.last_state pixel_change = None if object_type is None: pixel_change = self._calc_pixel_change(state['image'], self.last_state['image']) self.prev_state = self.last_state self.prev_action = self.last_action self.prev_reward = self.last_reward self.last_state = state self.last_action = action self.last_reward = reward return state, reward, terminal, pixel_change def is_all_scheduled_episodes_done(self): return self._sim.is_all_scheduled_episodes_done()
class IndoorEnv(gym.Env): metadata = {'render.modes': ['human', 'rgb_array']} def __init__(self): self._last_state = None self._sim = None self.viewer = None def configure(self, sim_args): self._sim = RoomSimulator(sim_args) #signal.signal(signal.SIGINT, self.signal_handler) self._sim_obs_space = self._sim.get_observation_space(sim_args['outputs']) #self.action_space = spaces.Discrete(self._sim.num_buttons) self.action_space = spaces.MultiBinary(self._sim.num_buttons) self.screen_height = self._sim_obs_space['color'].shape[1] self.screen_width = self._sim_obs_space['color'].shape[0] self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3)) # TODO: have more complex observation space with additional modalities and measurements # obs_space = self._sim.get_observation_space #self.observation_space = spaces.Dict({"images": ..., "depth": ...}) @property def simulator(self): return self._sim.sim def _seed(self, seed=0xA3C): """Sets the seed for this env's random number generator(s). Note: Some environments use multiple pseudorandom number generators. We want to capture all such seeds used in order to ensure that there aren't accidental correlations between multiple generators. Returns: list<bigint>: Returns the list of seeds used in this env's random number generators. The first value in the list should be the "main" seed, or the value which a reproducer should pass to 'seed'. Often, the main seed equals the provided 'seed', but this won't be true if seed=None, for example. """ # TODO: generate another seed for use in simulator? # What happens to this seed? self.np_random, seed = seeding.np_random(seed) return [seed] def _reset(self): """Resets the state of the environment and returns an initial observation. Returns: observation (object): the initial observation of the space. """ res = self._sim.reset() return res.get('observation') def _step(self, action): """Run one timestep of the environment's dynamics. When end of episode is reached, you are responsible for calling `reset()` to reset this environment's state. Accepts an action and returns a tuple (observation, reward, done, info). Args: action (object): an action provided by the environment Returns: observation (object): agent's observation of the current environment reward (float) : amount of reward returned after previous action done (boolean): whether the episode has ended, in which case further step() calls will return undefined results info (dict): contains auxiliary diagnostic information (helpful for debugging, and sometimes learning) """ ## a = [0]*self._sim.num_buttons ## a[action] = 1 state = self._sim.step(action) self._last_state = state # Last observed state observation = {k:v for k,v in state.items() if k not in ['rewards','terminals']} info = state['info'] return observation, state['rewards'], state['terminals'], info def _render(self, mode='human', close=False): """Renders the environment. The set of supported modes varies per environment. (And some environments do not support rendering at all.) By convention, if mode is: - human: render to the current display or terminal and return nothing. Usually for human consumption. - rgb_array: Return an numpy.ndarray with shape (x, y, 3), representing RGB values for an x-by-y pixel image, suitable for turning into a video. - ansi: Return a string (str) or StringIO.StringIO containing a terminal-style text representation. The text can include newlines and ANSI escape sequences (e.g. for colors). Note: Make sure that your class's metadata 'render.modes' key includes the list of supported modes. It's recommended to call super() in implementations to use the functionality of this method. Args: mode (str): the mode to render with close (bool): close all open renderings """ if close: if self.viewer is not None: self.viewer.close() self.viewer = None # If we don't None out this reference pyglet becomes unhappy return if self._last_state is not None: img = self._last_state['observation']['sensors']['color']['data'] if len(img.shape) == 2: # assume gray img = np.dstack([img, img, img]) else: # assume rgba img = img[:, :, :-1] img = img.reshape((img.shape[1], img.shape[0], img.shape[2])) if mode == 'human': from gym.envs.classic_control import rendering if self.viewer is None: if self.viewer is None: self.viewer = rendering.SimpleImageViewer() self.viewer.imshow(img) elif mode == 'rgb_array': return img def _close(self): if self._sim is not None: self._sim.close_game()
class IndoorEnvironment(environment.Environment): ACTION_LIST = [[1, 0, 0], [0, 1, 0], [0, 0, 1]] @staticmethod def get_action_size(env_name): return len(IndoorEnvironment.ACTION_LIST) @staticmethod def get_objective_size(env_name): simargs = sim_config.get(env_name) return simargs.get('objective_size', 0) def __init__(self, env_name, env_args, thread_index): environment.Environment.__init__(self) self.i_episode = 0 self.last_state = None self.last_action = 0 self.last_reward = 0 simargs = sim_config.get(env_name) simargs['id'] = 'sim%02d' % thread_index simargs['logdir'] = os.path.join(IndoorEnvironment.get_log_dir(), simargs['id']) self.viewer = rendering.SimpleImageViewer() # Merge in extra env args if env_args is not None: simargs.update(env_args) print(simargs) self._sim = RoomSimulator(simargs) self._sim_obs_space = self._sim.get_observation_space( simargs['outputs']) self.reset() def render(self, img): img = img[:, :, :-1] img = img.reshape((img.shape[1], img.shape[0], img.shape[2])) img = cv2.resize(img, (512, 512), cv2.INTER_CUBIC) self.viewer.imshow(img) time.sleep(.1) def reset(self): result = self._sim.reset() self._episode_info = result.get('episode_info') self._last_full_state = result.get('observation') obs = self._last_full_state['observation']['sensors']['color']['data'] # self.render(obs) objective = self._last_full_state.get('measurements') state = {'image': self._preprocess_frame(obs), 'objective': objective} self.last_state = state self.last_action = 0 self.last_reward = 0 # self.i_episode = self.i_episode + 1 # print("Saving episode {}".format(self.i_episode)) # self.directory = "./{}".format(self.i_episode) # os.mkdir(self.directory) # with open(os.path.join(self.directory, "episode_info.txt"), "w") as outfile: # json.dump(self._episode_info, outfile, indent=4, cls=NumpyEncoder) # self.i = 0 def stop(self): if self._sim is not None: self._sim.close_game() def _preprocess_frame(self, image): if len(image.shape) == 2: # assume gray image = np.dstack([image, image, image]) else: # assume rgba image = image[:, :, :-1] image = image.astype(np.float32) image = image / 255.0 return image def process(self, action): real_action = IndoorEnvironment.ACTION_LIST[action] full_state = self._sim.step(real_action) self._last_full_state = full_state # Last observed state obs = full_state['observation']['sensors']['color']['data'] # self.render(obs) # depth = full_state['observation']['sensors']['depth']['data'] # Image.fromarray(obs.astype('uint8')).save(os.path.join(self.directory, 'color{}.png'.format(self.i))) # Image.fromarray(depth, 'L').save(os.path.join(self.directory, 'depth{}.png'.format(self.i))) # self.i+=1 reward = full_state['rewards'] terminal = full_state['terminals'] success = full_state['success'] objective = full_state.get('measurements') if not terminal: state = { 'image': self._preprocess_frame(obs), 'objective': objective } else: state = self.last_state pixel_change = self._calc_pixel_change(state['image'], self.last_state['image']) self.last_state = state self.last_action = action self.last_reward = reward return state, reward, terminal, pixel_change, success def is_all_scheduled_episodes_done(self): return self._sim.is_all_scheduled_episodes_done()