def set_target(self, target): self.target = target self.env.gs[target] = grid_spec.REWARD self.q_values = q_iteration.q_iteration(env=self.env, num_itrs=32, discount=0.99) self.env.gs[target] = grid_spec.EMPTY
def _new_target(self, start, target): # print('Computing waypoints from %s to %s' % (start, target)) start = self.gridify_state(start) start_idx = self.env.gs.xy_to_idx(start) target = self.gridify_state(target) target_idx = self.env.gs.xy_to_idx(target) self._waypoint_idx = 0 self.env.gs[target] = grid_spec.REWARD q_values = q_iteration.q_iteration(env=self.env, num_itrs=50, discount=0.99) # compute waypoints by performing a rollout in the grid max_ts = 100 s = start_idx waypoints = [] for i in range(max_ts): a = np.argmax(q_values[s]) new_s, reward = self.env.step_stateless(s, a) waypoint = self.env.gs.idx_to_xy(new_s) if new_s != target_idx: waypoint = waypoint - np.random.uniform(size=(2, )) * 0.2 waypoints.append(waypoint) s = new_s if new_s == target_idx: break self.env.gs[target] = grid_spec.EMPTY self._waypoints = waypoints self._waypoint_prev_loc = start self._target = target