class PomdpInterface: """Unreal must be running before this is called for now...""" def __init__(self, instance_id=0, cv_mode=False): self.instance_id = instance_id self.env_id = None self.params = get_current_parameters()["PomdpInterface"] step_interval = self.params["step_interval"] flight_height = self.params["flight_height"] self.scale = self.params["scale"] self.drone = DroneController(instance=instance_id, flight_height=flight_height) rate = step_interval / self.drone.clock_speed self.rate = Rate(rate) print("Adjusted rate: " + str(rate), "Step interval: ", step_interval) self.path = None self.reward = None self.reward_shaping = None self.instruction_set = None self.current_segment = None self.done = None self.cv_mode = cv_mode self.seg_start = 0 self.seg_end = 0 def _get_reward(self, state, drone_action): # If we don't have an instruction set, we can't provide a reward if self.instruction_set is None: return 0, False # If the agent already executed all the actions, return 0 error and done thereafter if self.done: return 0, self.done # Obtain the reward from the reward function reward, seg_complete = self.reward.get_reward(state, drone_action) reward += self.reward_shaping.get_reward(state, drone_action) # If the segment was completed, tell the reward function that we will be following the next segment if seg_complete: self.current_segment += 1 if self.current_segment < len(self.instruction_set): self.seg_start = self.seg_end self.seg_end = self.instruction_set[ self.current_segment]["end_idx"] self.reward.set_current_segment(self.seg_start, self.seg_end) # Unless this was the last segment, in which case we set this pomdp as done and return accordingly else: self.done = True return reward, self.done def set_environment(self, env_id, instruction_set=None, fast=False): """ Switch the simulation to env_id. Causes the environment configuration from configs/configs/random_config_<env_id>.json to be loaded and landmarks arranged in the simulator :param env_id: integer ID :param instruction_set: Instruction set to follow for displaying instructions :param fast: Set to True to skip a delay at a risk of environment not loading before subsequent function calls :return: """ self.env_id = env_id self.drone.set_current_env_id(env_id, self.instance_id) self.drone.reset_environment() # This is necessary to allow the new frame to be rendered with the new pomdp, so that the drone doesn't # accidentally see the old pomdp at the start if not fast: time.sleep(0.1) self.current_segment = 0 self.seg_start = 0 self.seg_end = 0 self.path = load_path(env_id) if self.path is not None: self.reward = FollowPathReward(self.path) self.reward_shaping = ImitationReward(self.path) self.seg_end = len(self.path) - 1 self.instruction_set = instruction_set if instruction_set is not None: self.reward.set_current_segment(self.seg_start, self.seg_end) if len(instruction_set) == 0: print("OOOPS! Instruction set of length 0!" + str(env_id)) return self.seg_end = instruction_set[self.current_segment]["end_idx"] if self.seg_end >= len(self.path): print("OOOPS! For env " + str(env_id) + " segment " + str(self.current_segment) + " end oob: " + str(self.seg_end)) self.seg_end = len(self.path) - 1 def reset(self, seg_idx=0, landmark_pos=None, random_yaw=0): self.rate.reset() self.drone.reset_environment() start_pos, start_angle = self.get_start_pos(seg_idx, landmark_pos) if self.cv_mode: start_rpy = start_angle self.drone.teleport_3d(start_pos, start_rpy, pos_in_airsim=False) else: start_yaw = start_angle if self.params["randomize_init_pos"]: yaw_offset = float( np.random.normal(0, self.params["init_yaw_variance"], 1)) pos_offset = np.random.normal(0, self.params["init_pos_variance"], 2) print("offset:", pos_offset, yaw_offset) start_pos = np.asarray(start_pos) + pos_offset start_yaw = start_yaw + yaw_offset self.drone.teleport_to(start_pos, start_yaw) self.rate.sleep(quiet=True) time.sleep(0.2) drone_state, image = self.drone.get_state() self.done = False return DroneState(image, drone_state) def get_start_pos(self, seg_idx=0, landmark_pos=None): # If we are not in CV mode, then we have a path to follow and track reward for following it closely if not self.cv_mode: if self.instruction_set: start_pos = self.instruction_set[seg_idx]["start_pos"] start_angle = self.instruction_set[seg_idx]["start_yaw"] else: start_pos = [0, 0, 0] start_angle = 0 # If we are in CV mode, there is no path to be followed. Instead we are collecting images of landmarks. # Initialize the drone to face the position provided. TODO: Generalize this to other CV uses else: drone_angle = random.uniform(0, 2 * math.pi) drone_dist_mult = random.uniform(0, 1) drone_dist = 60 + drone_dist_mult * 300 drone_pos_dir = np.asarray( [math.cos(drone_angle), math.sin(drone_angle)]) start_pos = landmark_pos + drone_pos_dir * drone_dist start_height = random.uniform(-1.5, -2.5) start_pos = [start_pos[0], start_pos[1], start_height] drone_dir = -drone_pos_dir start_yaw = vec_to_yaw(drone_dir) start_roll = 0 start_pitch = 0 start_angle = [start_roll, start_pitch, start_yaw] return start_pos, start_angle def reset_to_random_cv_env(self, landmark_name=None): config, pos_x, pos_z = make_config_with_landmark(landmark_name) self.drone.set_current_env_from_config(config, instance_id=self.instance_id) time.sleep(0.2) landmark_pos_2d = np.asarray([pos_x, pos_z]) self.cv_mode = True return self.reset(landmark_pos=landmark_pos_2d) def snap_birdseye(self, fast=False, small_env=False): self.drone.reset_environment() # TODO: Check environment dimensions if small_env: pos_birdseye_as = [2.25, 2.35, -3.92] rpy_birdseye_as = [-1.3089, 0, 0] # For 15 deg camera else: pos_birdseye_as = [15, 15, -25] rpy_birdseye_as = [-1.3089, 0, 0] # For 15 deg camera self.drone.teleport_3d(pos_birdseye_as, rpy_birdseye_as, fast=fast) _, image = self.drone.get_state(depth=False) return image def snap_cv(self, pos, quat): self.drone.reset_environment() pos_birdseye_as = pos rpy_birdseye_as = euler.quat2euler(quat) self.drone.teleport_3d(pos_birdseye_as, rpy_birdseye_as, pos_in_airsim=True, fast=True) time.sleep(0.3) self.drone.teleport_3d(pos_birdseye_as, rpy_birdseye_as, pos_in_airsim=True, fast=True) time.sleep(0.3) _, image = self.drone.get_state(depth=False) return image def get_current_nl_command(self): if self.current_segment < len(self.instruction_set): return self.instruction_set[self.current_segment]["instruction"] return "FINISHED!" def step(self, action): """ Takes an action, executes it in the simulation and returns the state, reward and done indicator :param action: array of length 4: [forward velocity, left velocity, yaw rate, stop probability] :return: DroneState object, reward (float), done (bool) """ #Action drone_action = action[0:3] drone_stop = action[3] #print("Action: ", action) if drone_stop > 0.99: drone_action = np.array([0, 0, 0]) self.drone.send_local_velocity_command( unnormalize_action(drone_action)) self.rate.sleep(quiet=True) drone_state, image = self.drone.get_state() state = DroneState(image, drone_state) reward, done = self._get_reward(state, action) return state, reward, done
class PomdpInterface: class EnvException(Exception): def __init__(self, message): super(PomdpInterface.EnvException, self).__init__(message) """Unreal must be running before this is called for now...""" def __init__(self, instance_id=0, cv_mode=False, is_real=False): self.instance_id = instance_id self.env_id = None self.params = get_current_parameters()["PomdpInterface"] step_interval = self.params["step_interval"] flight_height = self.params["flight_height"] self.max_horizon = self.params["max_horizon"] self.scale = self.params["scale"] self.is_real = is_real self.drone = drone_controller_factory(simulator=not is_real)(instance=instance_id, flight_height=flight_height) rate = step_interval / self.drone.get_real_time_rate() self.rate = Rate(rate) print("Adjusted rate: " + str(rate), "Step interval: ", step_interval) self.segment_path = None self.rewards = [] self.reward_weights = [] self.instruction_set = None self.current_segment_ordinal = None self.current_segment_idx = None self.cv_mode = cv_mode self.seg_start = 0 self.seg_end = 0 self.stepcount = 0 self.instruction_override = None def _get_reward(self, state, drone_action, done_now): # If we don't have an instruction set, we can't provide a reward if self.instruction_set is None: raise ValueError("No instruction set: Can't provide a reward.") # Obtain the reward from the reward function rewards = [w * r(state, drone_action, done_now) for r, w in zip(self.rewards, self.reward_weights)] total_reward = sum(rewards) return total_reward def _is_out_of_bounds(self, state): curr_pos = state.get_pos_2d() if len(self.segment_path) == 0: print("OH NO OH NO OH NO!") return True distances = np.asarray([np.linalg.norm(p - curr_pos) for p in self.segment_path]) minidx = np.argmin(distances) mindist = distances[minidx] #print(f"Idx: {minidx} / {len(self.segment_path)}. Dst to path: {mindist}") #done = (mindist > END_DISTANCE and minidx >= len(self.segment_path) - 1) or mindist > PATH_MAX_DISTANCE done = mindist > PATH_MAX_DISTANCE return done def land(self): """ If using the real drone, this causes it to land and disarm :return: """ try: self.drone.land() except RolloutException as e: raise PomdpInterface.EnvException("Retry rollout") def set_environment(self, env_id, instruction_set=None, fast=False): """ Switch the simulation to env_id. Causes the environment configuration from configs/configs/random_config_<env_id>.json to be loaded and landmarks arranged in the simulator :param env_id: integer ID :param instruction_set: Instruction set to follow for displaying instructions :param fast: Set to True to skip a delay at a risk of environment not loading before subsequent function calls :return: """ self.env_id = env_id try: self.drone.set_current_env_id(env_id, self.instance_id) self.drone.reset_environment() except RolloutException as e: raise PomdpInterface.EnvException("Retry rollout") # This is necessary to allow the new frame to be rendered with the new pomdp, so that the drone doesn't # accidentally see the old pomdp at the start if not fast: time.sleep(0.1) self.instruction_set = instruction_set self.stepcount = 0 self.instruction_override = None def set_current_segment(self, seg_idx): self.current_segment_ordinal = None for i, seg in enumerate(self.instruction_set): if seg["seg_idx"] == seg_idx: self.current_segment_ordinal = i assert self.current_segment_ordinal is not None, f"Requested segment {seg_idx} not found in provided instruction data" self.current_segment_idx = seg_idx try: self.drone.set_current_seg_idx(seg_idx, self.instance_id) except RolloutException as e: raise PomdpInterface.EnvException("Retry rollout") end_idx = self.instruction_set[self.current_segment_ordinal]["end_idx"] start_idx = self.instruction_set[self.current_segment_ordinal]["start_idx"] full_path = load_and_convert_path(self.env_id) self.segment_path = full_path[start_idx:end_idx] self.instruction_override = None self.stepcount = 0 if start_idx == end_idx: return False if self.segment_path is not None: self.rewards = [FollowPathFieldReward(self.segment_path), StopCorrectlyReward(self.segment_path)] self.reward_weights = [1.0, 1.0] return True def reset(self, seg_idx=0, landmark_pos=None, random_yaw=0): try: self.rate.reset() self.drone.reset_environment() self.stepcount = 0 start_pos, start_angle = self.get_start_pos(seg_idx, landmark_pos) if self.cv_mode: start_rpy = start_angle self.drone.teleport_3d(start_pos, start_rpy, pos_in_airsim=False) else: start_yaw = start_angle if self.params["randomize_init_pos"]: np.random.seed() yaw_offset = float(np.random.normal(0, self.params["init_yaw_variance"], 1)) pos_offset = np.random.normal(0, self.params["init_pos_variance"], 2) #print("offset:", pos_offset, yaw_offset) start_pos = np.asarray(start_pos) + pos_offset start_yaw = start_yaw + yaw_offset self.drone.teleport_to(start_pos, start_yaw) if self.params.get("voice"): cmd = self.get_current_nl_command() say(cmd) self.drone.rollout_begin(self.get_current_nl_command()) self.rate.sleep(quiet=True) #time.sleep(0.2) drone_state, image = self.drone.get_state() return DroneState(image, drone_state) except RolloutException as e: raise PomdpInterface.EnvException("Retry rollout") def get_end_pos(self, seg_idx=0, landmark_pos=None): seg_ordinal = self.seg_idx_to_ordinal(seg_idx) if not self.cv_mode: if self.instruction_set: end_pos = convert_pos_from_config(self.instruction_set[seg_ordinal]["end_pos"]) end_angle = convert_yaw_from_config(self.instruction_set[seg_ordinal]["end_yaw"]) else: end_pos = [0, 0, 0] end_angle = 0 return end_pos, end_angle def seg_idx_to_ordinal(self, seg_idx): for i, instr in enumerate(self.instruction_set): if instr["seg_idx"] == seg_idx: return i def get_start_pos(self, seg_idx=0, landmark_pos=None): seg_ordinal = self.seg_idx_to_ordinal(seg_idx) # If we are not in CV mode, then we have a path to follow and track reward for following it closely if not self.cv_mode: if self.instruction_set: start_pos = convert_pos_from_config(self.instruction_set[seg_ordinal]["start_pos"]) start_angle = convert_yaw_from_config(self.instruction_set[seg_ordinal]["start_yaw"]) else: start_pos = [0, 0, 0] start_angle = 0 # If we are in CV mode, there is no path to be followed. Instead we are collecting images of landmarks. # Initialize the drone to face the position provided. TODO: Generalize this to other CV uses else: drone_angle = random.uniform(0, 2 * math.pi) drone_dist_mult = random.uniform(0, 1) drone_dist = 60 + drone_dist_mult * 300 drone_pos_dir = np.asarray([math.cos(drone_angle), math.sin(drone_angle)]) start_pos = landmark_pos + drone_pos_dir * drone_dist start_height = random.uniform(-1.5, -2.5) start_pos = [start_pos[0], start_pos[1], start_height] drone_dir = -drone_pos_dir start_yaw = vec_to_yaw(drone_dir) start_roll = 0 start_pitch = 0 start_angle = [start_roll, start_pitch, start_yaw] return start_pos, start_angle def get_current_nl_command(self): if self.instruction_override: return self.instruction_override if self.current_segment_ordinal < len(self.instruction_set): return self.instruction_set[self.current_segment_ordinal]["instruction"] return "FINISHED!" def override_instruction(self, instr_str): self.instruction_override = instr_str def act(self, action): # Action action = deepcopy(action) drone_action = action[0:3] # print("Action: ", action) raw_action = unnormalize_action(drone_action) # print(drone_action, raw_action) try: self.drone.send_local_velocity_command(raw_action) except RolloutException as e: raise PomdpInterface.EnvException("Retry rollout") def await_env(self): self.rate.sleep(quiet=True) def observe(self, prev_action): try: drone_state, image = self.drone.get_state() state = DroneState(image, drone_state) out_of_bounds = self._is_out_of_bounds(state) reward = self._get_reward(state, prev_action, out_of_bounds) self.stepcount += 1 expired = self.stepcount > self.max_horizon drone_stop = prev_action[3] done = out_of_bounds or drone_stop > 0.5 or expired # Actually stop the drone (execute stop-action) if done: self.drone.send_local_velocity_command([0, 0, 0, 1]) self.drone.rollout_end() # TODO: Respect the done return state, reward, done, expired, out_of_bounds except RolloutException as e: raise PomdpInterface.EnvException("Retry rollout") def step(self, action): """ Takes an action, executes it in the simulation and returns the state, reward and done indicator :param action: array of length 4: [forward velocity, left velocity, yaw rate, stop probability] :return: DroneState object, reward (float), done (bool) """ self.act(action) self.await_env() return self.observe(action) # ----------------------------------------------------------------------------------------------------------------- # CRAP: def reset_to_random_cv_env(self, landmark_name=None): config, pos_x, pos_z = make_config_with_landmark(landmark_name) self.drone.set_current_env_from_config(config, instance_id=self.instance_id) time.sleep(0.2) landmark_pos_2d = np.asarray([pos_x, pos_z]) self.cv_mode = True return self.reset(landmark_pos=landmark_pos_2d) def snap_birdseye(self, fast=False, small_env=False): self.drone.reset_environment() # TODO: Check environment dimensions if small_env: pos_birdseye_as = [2.25, 2.35, -4.92*2 - 0.06 - 0.12] rpy_birdseye_as = [-1.3089, 0, 0] # For 15 deg camera else: pos_birdseye_as = [15, 15, -25] rpy_birdseye_as = [-1.3089, 0, 0] # For 15 deg camera self.drone.teleport_3d(pos_birdseye_as, rpy_birdseye_as, fast=fast) _, image = self.drone.get_state(depth=False) return image def snap_cv(self, pos, quat): self.drone.reset_environment() pos_birdseye_as = pos rpy_birdseye_as = euler.quat2euler(quat) self.drone.teleport_3d(pos_birdseye_as, rpy_birdseye_as, pos_in_airsim=True, fast=True) time.sleep(0.3) self.drone.teleport_3d(pos_birdseye_as, rpy_birdseye_as, pos_in_airsim=True, fast=True) time.sleep(0.3) _, image = self.drone.get_state(depth=False) return image