def get_actions(self, obss): preprocessed_obss = self.preprocess_obss(obss) with torch.no_grad(): if self.model.recurrent: dist, _, self.memories = self.model(preprocessed_obss, self.memories) else: dist, _, x, y, z = self.model(preprocessed_obss, introspect=True) if self.number == 7: Grid.decode(y[0][0].round().numpy()).render_human() print(len(x), len(y), len(z)) print(y[0].shape, z[0].shape) print(x, y, z) self.number += 1 if self.argmax: actions = dist.probs.max(1, keepdim=True)[1] else: actions = dist.sample() if torch.cuda.is_available(): actions = actions.cpu().numpy() return actions
def _save_obs(obs, out_dir, fname, tile_size=12): """ Render an agent observation and save as image """ from gym_minigrid.minigrid import Grid agent_view_size = obs.shape[0] grid, vis_mask = Grid.decode(obs) # Render the whole grid img = grid.render(tile_size, agent_pos=(agent_view_size // 2, agent_view_size - 1), agent_dir=3, highlight_mask=vis_mask) plt.imsave(os.path.join(out_dir, fname), img) plt.clf()
def get_obs_render(self, obs, tile_pixels=CELL_PIXELS // 2, mode='rgb_array'): """ Render an agent observation for visualization """ if self.obs_render is None: obs_render = Renderer(self.agent_view_size * tile_pixels, self.agent_view_size * tile_pixels, self._render) self.obs_render = obs_render else: obs_render = self.obs_render r = obs_render r.beginFrame() grid = Grid.decode(obs) # Render the whole grid grid.render(r, tile_pixels) # Draw the agent ratio = tile_pixels / CELL_PIXELS r.push() r.scale(ratio, ratio) r.translate(CELL_PIXELS * (0.5 + self.agent_view_size // 2), CELL_PIXELS * (self.agent_view_size - 0.5)) r.rotate(3 * 90) r.setLineColor(255, 0, 0) r.setColor(255, 0, 0) r.drawPolygon([(-12, 10), (12, 0), (-12, -10)]) r.pop() r.endFrame() if mode == 'rgb_array': return get_array_from_pixmap(r) elif mode == 'pixmap': return r.getPixmap() return r.getPixmap()
# Run for a few episodes num_episodes = 0 while num_episodes < 5: # Pick a random action action = random.randint(0, env.action_space.n - 1) obs, reward, done, info = env.step(action) # Validate the agent position assert env.agent_pos[0] < env.width assert env.agent_pos[1] < env.height # Test observation encode/decode roundtrip img = obs['image'] grid, vis_mask = Grid.decode(img) img2 = grid.encode(vis_mask=vis_mask) assert np.array_equal(img, img2) # Test the env to string function str(env) # Check that the reward is within the specified range assert reward >= env.reward_range[0], reward assert reward <= env.reward_range[1], reward if done: num_episodes += 1 env.reset() env.render('rgb_array')
# Run for a few episodes num_episodes = 0 while num_episodes < 5: # Pick a random action action = random.randint(0, env.action_space.n - 1) obs, reward, done, info = env.step(action) # Validate the agent position assert env.agent_pos[0] < env.width assert env.agent_pos[1] < env.height # Test observation encode/decode roundtrip img = obs['image'] vis_mask = img[:, :, 0] != OBJECT_TO_IDX['unseen'] # hackish img2 = Grid.decode(img).encode(vis_mask=vis_mask) assert np.array_equal(img, img2) # Test the env to string function str(env) # Check that the reward is within the specified range assert reward >= env.reward_range[0], reward assert reward <= env.reward_range[1], reward if done: num_episodes += 1 env.reset() env.render('rgb_array')
env.seed(seed) grid2 = env.grid.encode() assert np.array_equal(grid2, grid1) env.reset() # Run for a few episodes for i in range(5 * env.maxSteps): # Pick a random action action = random.randint(0, env.action_space.n - 1) obs, reward, done, info = env.step(action) # Test observation encode/decode roundtrip img = obs['image'] grid = Grid.decode(img) img2 = grid.encode() assert np.array_equal(img2, img) # Check that the reward is within the specified range assert reward >= env.reward_range[0], reward assert reward <= env.reward_range[1], reward if done: env.reset() # Check that the agent doesn't overlap with an object assert env.grid.get(*env.agentPos) is None env.render('rgb_array')
# Run for a few episodes num_episodes = 0 while num_episodes < 5: # Pick a random action action = random.randint(0, env.action_space.n - 1) obs, reward, done, info = env.step(action) # Validate the agent position assert env.agent_pos[0] < env.grid_size assert env.agent_pos[1] < env.grid_size # Test observation encode/decode roundtrip img = obs['image'] grid = Grid.decode(img) img2 = grid.encode() assert np.array_equal(img, img2) # Check that the reward is within the specified range assert reward >= env.reward_range[0], reward assert reward <= env.reward_range[1], reward if done: num_episodes += 1 env.reset() env.render('rgb_array') env.close()
def render(self, agent_pos, agent_dir, state, mode='rgb_array', tile_pixels=CELL_PIXELS // 2, close=False): """ Render the whole-grid human view """ if self.grid_render is None: grid_render = Renderer(self.width * CELL_PIXELS, self.height * CELL_PIXELS, self._render) self.grid_render = grid_render else: grid_render = self.grid_render r = grid_render r.beginFrame() grid = Grid.decode(state) # Render the whole grid grid.render(r, CELL_PIXELS) # # Render the whole grid # self.grid.render(r, CELL_PIXELS) # Draw the agent r.push() r.translate(CELL_PIXELS * (agent_pos[0] + 0.5), CELL_PIXELS * (agent_pos[1] + 0.5)) r.rotate(agent_dir * 90) r.setLineColor(255, 0, 0) r.setColor(255, 0, 0) r.drawPolygon([(-12, 10), (12, 0), (-12, -10)]) r.pop() # # Compute which cells are visible to the agent # _, vis_mask = self.gen_obs_grid() # Compute the absolute coordinates of the bottom-left corner # of the agent's view area # f_vec = self.dir_vec # r_vec = self.right_vec # top_left = self.agent_pos + f_vec * (self.agent_view_size - 1) - r_vec * ( # self.agent_view_size // 2) # # # For each cell in the visibility mask # for vis_j in range(0, self.agent_view_size): # for vis_i in range(0, self.agent_view_size): # # If this cell is not visible, don't highlight it # if not vis_mask[vis_i, vis_j]: # continue # # # Compute the world coordinates of this cell # abs_i, abs_j = top_left - (f_vec * vis_j) + (r_vec * vis_i) # # # Highlight the cell # r.fillRect( # abs_i * CELL_PIXELS, # abs_j * CELL_PIXELS, # CELL_PIXELS, # CELL_PIXELS, # 255, 255, 255, 75 # ) r.endFrame() if mode == 'rgb_array': return get_array_from_pixmap(r) elif mode == 'pixmap': return r.getPixmap() return r