def __init__(self, env): super().__init__(env) current = self.env.observation_space["image"] self.observation_space.spaces["image"] = spaces.Box( low=min(OBJECT_TO_IDX.values()), high=max(OBJECT_TO_IDX.values()), shape=(self.env.width, self.env.height), dtype="uint8", )
def __init__(self, env): super().__init__(env) os = self.observation_space.spaces['image'] os.high[:, :, 0] = max(OBJECT_TO_IDX.values()) if os.high.shape[2] >= 2: os.high[:, :, 1] = max(COLOR_TO_IDX.values()) if os.high.shape[2] >= 3: os.high[:, :, 2] = 3 self.observation_space.spaces['image'] = os
def __init__(self, env): super().__init__(env) sz = self.observation_space.spaces['image'].shape npo = np.zeros(sz, dtype=np.object) for i in range(sz[0]): for j in range(sz[1]): for k in range(sz[2]): if k == 0: n = max(OBJECT_TO_IDX.values()) + 1 elif k == 1: n = max(COLOR_TO_IDX.values()) + 1 elif k == 2: n = 4 else: raise Exception("Bad k") npo[i, j, k] = Discrete(n) ospace = tuple(npo.flat) sz = np.cumsum([o.n for o in ospace]) sz = sz - sz[0] self.sz = sz self.observation_space = ospace
DIR_TO_NUM = { (1, 0): 0, (0, -1): 3, (-1, 0): 2, (0, 1): 1, } NUM_TO_DIR = dict([(v, k) for k, v in DIR_TO_NUM.items()]) ACTION_TO_NUM_MAPPING = { 'left': 0, 'right': 1, 'forward': 2, 'toggle': 5, 'done': 6, } IDX_TO_OBJECT = dict([(v, k) for k, v in OBJECT_TO_IDX.items()]) class PlanAgent: # Agent that has a plan! :D def __init__(self, env): # Given the env, get all the parameters like size and agent details # Env would be wrapped in a wrapper that gives agent location and direction self.env = env.env self.agent_view_size = self.env.agent_view_size self.width = self.env.width self.height = self.env.height self.epsilon = 0 self.numobjects = len(OBJECT_TO_IDX) - 1 self.numcolors = len(COLOR_TO_IDX) self.numstates = len(STATE_TO_IDX)
def __init__( self, grid_size: Optional[int] = None, width: Optional[int] = None, height: Optional[int] = None, max_steps: int = 100, see_through_walls: bool = False, seed: int = 1337, ): if grid_size: assert width == None and height == None width = grid_size height = grid_size # Override MiniGridEnv actions self.actions = MiniGridSimple.CardinalActions self.move_actions = [ self.actions.right, self.actions.down, self.actions.left, self.actions.up, ] self.action_space = spaces.Discrete(len(self.actions)) # self.encoding_range = len(MINIMAL_OBJECT_TO_IDX.keys()) self.encoding_range = len(OBJECT_TO_IDX.keys()) self.agent_pos_observation = spaces.Tuple( [spaces.Discrete(grid_size), spaces.Discrete(grid_size)]) self.observation_space = spaces.Dict({ 'agent_pos': self.agent_pos_observation, }) # Renderer object used to render the whole grid (full-scale) self.grid_render = None # Renderer used to render observations (small-scale agent view) self.obs_render = None # Environment configuration self.width = width self.height = height self.max_steps = max_steps self.see_through_walls = see_through_walls # Starting position and direction for the agent self.start_pos = None self.start_dir = None self._done = False # Initialize the RNG self.seed(seed=seed) # Rendering self.render_rgb = True self.CELL_PIXELS = CELL_PIXELS self.render_shape = (self.width * self.CELL_PIXELS, self.height * self.CELL_PIXELS, 3) # Initialize the state self.reset()
def one_hot(self, obs): one_hot_obs = {} # One-hotify direction NUM_DIRECTIONS = 4 dir = np.zeros(NUM_DIRECTIONS) dir[obs["direction"]] = 1 one_hot_obs["direction"] = dir # One-hotify mission # TODO: This is tricky! Missions can be arbitrary lengths, so we could either # (a) One-hot encode each token sequentially, return a variable-length array # (b) Give a fix-sized mission vector where smaller missions are zero-padded # (c) Assume we'll only use levels with a particular mission structure # The code currently does (c). We assume there's at most one object mentioned. # But at some point we should probably switch to the more generalizable version using the babyai # repo's preprocessor InstructionsPreprocessor in file utils/format.py obj_id = np.zeros((len(OBJECT_TO_IDX.keys()))) colors_id = np.zeros((len(COLOR_TO_IDX.keys()) + 1)) found_color = False for key, index in COLOR_TO_IDX.items(): if key in obs["mission"]: colors_id[index] = 1 found_color = True if not found_color: colors_id[-1] = 1 for key, index in OBJECT_TO_IDX.items(): if key in obs["mission"]: obj_id[index] = 1 one_hot_obs["mission"] = np.concatenate([obj_id, colors_id], axis=0) # One-hotify grid observation num_objects = len(OBJECT_TO_IDX.keys()) num_colors = len(COLOR_TO_IDX.keys()) num_states = 3 # open, closed or locked. Looks like they don't have an enum for it. # Observation space is [height, width, 3] image = obs["image"] height, width, _ = image.shape height_index = np.repeat(np.arange(height), width).reshape(height, width) width_index = np.tile(np.arange(width), height).reshape(height, width) # First layer has object IDs obj_ids = np.zeros((height, width, num_objects)) obj_ids[height_index, width_index, image[:,:,0]] = 1 # Second layer has color IDs color_ids = np.zeros((height, width, num_colors)) color_ids[height_index, width_index, image[:,:,1]] = 1 # Third layer has state IDs state_ids = np.zeros((height, width, num_states)) state_ids[height_index, width_index, image[:,:,2]] = 1 image = np.concatenate([obj_ids, color_ids, state_ids], axis=2).transpose((2, 0, 1)) # Zero-pad observations to the nearest multiple of 2. channels, height, width = image.shape pow_2_height = int(2 ** np.ceil(np.log2(height))) pow_2_width = int(2 ** np.ceil(np.log2(width))) pow_2_image = np.zeros((channels, pow_2_height, pow_2_width)) pow_2_image[:, :height, :width] = image one_hot_obs["image"] = pow_2_image return one_hot_obs