def __init__(self, maze_str, solve_thresh=0.1, p_gain=10.0, d_gain=-1.0): self.maze_str = maze_str self._target = -1000 * ONES self.p_gain = p_gain self.d_gain = d_gain self.solve_thresh = solve_thresh self.vel_thresh = 0.1 self._waypoint_idx = 0 self._waypoints = [] self._waypoint_prev_loc = ZEROS self.env = grid_env.GridEnv(grid_spec.spec_from_string(maze_str))
def __init__(self): self.env = grid_env.GridEnv(grid_spec.spec_from_string(MAZE)) self.reset_locations = list( zip(*np.where(self.env.gs.spec == grid_spec.EMPTY)))
def step(self, action): self.step_count += 1 reward = 0 done = False # Get the position in front of the agent fwd_pos = self.front_pos # Get the contents of the cell in front of the agent fwd_cell = self.grid.get(*fwd_pos) # Rotate left if action == self.actions.left: self.agent_dir -= 1 if self.agent_dir < 0: self.agent_dir += 4 # Rotate right elif action == self.actions.right: self.agent_dir = (self.agent_dir + 1) % 4 # Move forward elif action == self.actions.forward: if fwd_cell == None or fwd_cell.can_overlap(): self.agent_pos = fwd_pos if fwd_cell != None and fwd_cell.type == 'goal': # done = True self._agent_default_pos = self.agent_pos self._agent_dir = self.agent_dir env = grid_env.GridEnv(grid_spec.spec_from_string( MAZE)) # note that the maze will be blocked reset_locations = list( zip(*np.where(env.gs.spec == grid_spec.EMPTY))) # self.step_count = 0 random_loc = random.choice(reset_locations) # reset goal... self._goal_default_pos = random_loc self.seed(self.seed_n) self._gen_grid(self.width, self.height) self.agent_dir = self._agent_dir if fwd_cell != None and fwd_cell.type == 'lava': done = True # Pick up an object elif action == self.actions.pickup: if fwd_cell and fwd_cell.can_pickup(): if self.carrying is None: self.carrying = fwd_cell self.carrying.cur_pos = np.array([-1, -1]) self.grid.set(*fwd_pos, None) # Drop an object elif action == self.actions.drop: if not fwd_cell and self.carrying: self.grid.set(*fwd_pos, self.carrying) self.carrying.cur_pos = fwd_pos self.carrying = None # Toggle/activate an object elif action == self.actions.toggle: if fwd_cell: fwd_cell.toggle(self, fwd_pos) # Done action (not used by default) elif action == self.actions.done: pass else: assert False, "unknown action" if self.step_count >= self.max_steps: done = True obs = self.gen_obs() reward = self._reward() return obs, reward, done, {}