def gen_lm_aux_labels(self, env_id, instruction, affine): env_conf_json = load_env_config(env_id) landmark_names, landmark_indices, landmark_positions = get_landmark_locations_airsim( env_conf_json) landmark_pos_in_img = pos_m_to_px( np.asarray(landmark_positions)[:, 0:2], np.array([self.map_w, self.map_h])) landmark_pos_in_seg_img = apply_affine_on_pts(landmark_pos_in_img, affine) if False: plot_path_on_img(self.latest_img_dbg, landmark_pos_in_img) plot_path_on_img(self.latest_rot_img_dbg, landmark_pos_in_seg_img) cv2.imshow("img", self.latest_img_dbg) cv2.imshow("rot_img", self.latest_rot_img_dbg) cv2.waitKey(0) landmark_pos_t = torch.from_numpy(landmark_pos_in_seg_img).unsqueeze(0) landmark_indices_t = torch.LongTensor(landmark_indices).unsqueeze(0) mask1 = torch.gt(landmark_pos_t, 0) mask2 = torch.lt(landmark_pos_t, self.img_w) mask = mask1 * mask2 mask = mask[:, :, 0] * mask[:, :, 1] mask = mask landmark_pos_t = torch.masked_select( landmark_pos_t, mask.unsqueeze(2).expand_as(landmark_pos_t)).view([-1, 2]) landmark_indices_t = torch.masked_select(landmark_indices_t, mask).view([-1]) mentioned_names, mentioned_indices = get_mentioned_landmarks( self.thesaurus, instruction) mentioned_labels_t = empty_float_tensor(list( landmark_indices_t.size())).long() for i, landmark_idx_present in enumerate(landmark_indices_t): if landmark_idx_present in mentioned_indices: mentioned_labels_t[i] = 1 if len(landmark_indices_t) > 0: aux_label = { "lm_pos": landmark_pos_t, "lm_indices": landmark_indices_t, "lm_mentioned": mentioned_labels_t, "lm_visible": mask, } else: aux_label = { "lm_pos": [], "lm_indices": [], "lm_mentioned": [], "lm_visible": [] } return aux_label
def landmarks_in_env(env_id): lm_names, lm_idx, lm_pos = get_landmark_locations_airsim(env_id=env_id) stage_names = [get_landmark_stage_name(l) for l in lm_names] return stage_names
def __getitem__(self, idx): if self.seg_level: env_id = self.seg_list[idx][0] set_idx = self.seg_list[idx][1] seg_idx = self.seg_list[idx][2] else: env_id = self.env_list[idx] env_conf_json = load_env_config(env_id) landmark_names, landmark_indices, landmark_positions = get_landmark_locations_airsim( env_conf_json) top_down_image = load_env_img(env_id) path = load_path(env_id) img_x = top_down_image.shape[0] img_y = top_down_image.shape[1] path_in_img_coords = self.cf_to_img(img_x, path) landmark_pos_in_img = self.as_to_img( img_x, np.asarray(landmark_positions)[:, 0:2]) self.pos_rand_image = self.pos_rand_range * img_x #self.plot_path_on_img(top_down_image, path_in_img_coords) #self.plot_path_on_img(top_down_image, landmark_pos_in_img) #cv2.imshow("top_down", top_down_image) #cv2.waitKey() input_images = [] input_instructions = [] label_images = [] aux_labels = [] # Somehow load the instruction with the start and end indices for each of the N segments if self.seg_level: instruction_segments = [ self.all_instr[env_id][set_idx]["instructions"][seg_idx] ] else: instruction_segments = self.all_instr[env_id][0]["instructions"] for seg_idx, seg in enumerate(instruction_segments): start_idx = seg["start_idx"] end_idx = seg["end_idx"] instruction = seg["instruction"] # TODO: Check for overflowz seg_path = path_in_img_coords[start_idx:end_idx] seg_img = top_down_image.copy() #test_plot = self.plot_path_on_img(seg_img, seg_path) # TODO: Validate the 0.5 choice, should it be 2? affine, cropsize = self.get_affine_matrix( seg_path, 0, [int(img_x / 2), int(img_y / 2)], 0.5) if affine is None: continue seg_img_rot = self.apply_affine(seg_img, affine, cropsize) seg_labels = np.zeros_like(seg_img[:, :, 0:1]).astype(float) seg_labels = self.plot_path_on_img(seg_labels, seg_path) seg_labels = gaussian_filter(seg_labels, 4) seg_labels_rot = self.apply_affine(seg_labels, affine, cropsize) #seg_labels_rot = gaussian_filter(seg_labels_rot, 4) seg_labels_rot = self.normalize_0_1(seg_labels_rot) # Change to true to visualize the paths / labels if False: cv2.imshow("rot_img", seg_img_rot) cv2.imshow("seg_labels", seg_labels_rot) rot_viz = seg_img_rot.astype(np.float64) / 512 rot_viz[:, :, 0] += seg_labels_rot.squeeze() cv2.imshow("rot_viz", rot_viz) cv2.waitKey(0) tok_instruction = tokenize_instruction(instruction, self.word2token) instruction_t = torch.LongTensor(tok_instruction).unsqueeze(0) # Get landmark classification labels landmark_pos_in_seg_img = self.apply_affine_on_pts( landmark_pos_in_img, affine) # Down-size images and labels if requested by the model if self.img_scale != 1.0: seg_img_rot = transform.resize(seg_img_rot, [ seg_img_rot.shape[0] * self.img_scale, seg_img_rot.shape[1] * self.img_scale ], mode="constant") seg_labels_rot = transform.resize(seg_labels_rot, [ seg_labels_rot.shape[0] * self.img_scale, seg_labels_rot.shape[1] * self.img_scale ], mode="constant") landmark_pos_in_seg_img = landmark_pos_in_seg_img * self.img_scale seg_img_rot = standardize_image(seg_img_rot) seg_labels_rot = standardize_image(seg_labels_rot) seg_img_t = torch.from_numpy(seg_img_rot).unsqueeze(0).float() seg_labels_t = torch.from_numpy(seg_labels_rot).unsqueeze( 0).float() landmark_pos_t = torch.from_numpy( landmark_pos_in_seg_img).unsqueeze(0) landmark_indices_t = torch.LongTensor(landmark_indices).unsqueeze( 0) mask1 = torch.gt(landmark_pos_t, 0) mask2 = torch.lt(landmark_pos_t, seg_img_t.size(2)) mask = mask1 * mask2 mask = mask[:, :, 0] * mask[:, :, 1] mask = mask landmark_pos_t = torch.masked_select( landmark_pos_t, mask.unsqueeze(2).expand_as(landmark_pos_t)).view([-1, 2]) landmark_indices_t = torch.masked_select(landmark_indices_t, mask).view([-1]) mentioned_names, mentioned_indices = get_mentioned_landmarks( self.thesaurus, instruction) mentioned_labels_t = empty_float_tensor( list(landmark_indices_t.size())).long() for i, landmark_idx_present in enumerate(landmark_indices_t): if landmark_idx_present in mentioned_indices: mentioned_labels_t[i] = 1 aux_label = { "landmark_pos": landmark_pos_t, "landmark_indices": landmark_indices_t, "landmark_mentioned": mentioned_labels_t, "visible_mask": mask, } if self.include_instr_negatives: # If we are to be using similar instructions according to the json file, then # initialize choices with similar instructions. Otherwise let choices be empty, and they will # be filled in the following lines. if self.instr_negatives_similar_only: choices = self.similar_instruction_map[str(env_id)][str( seg_idx)] else: choices = [] # If there are no similar instructions to this instruction, pick a completely random instruction if len(choices) == 0: while len(choices) == 0: env_options = list(self.similar_instruction_map.keys()) random_env = random.choice(env_options) seg_options = list( self.similar_instruction_map[random_env].keys()) if len(seg_options) == 0: continue random_seg = random.choice(seg_options) choices = self.similar_instruction_map[random_env][ random_seg] pick = random.choice(choices) picked_env = pick["env_id"] picked_seg = pick["seg_idx"] picked_set = pick["set_idx"] picked_instruction = self.all_instr[picked_env][picked_set][ "instructions"][picked_seg]["instruction"] tok_fake_instruction = tokenize_instruction( picked_instruction, self.word2token) aux_label["negative_instruction"] = torch.LongTensor( tok_fake_instruction).unsqueeze(0) input_images.append(seg_img_t) input_instructions.append(instruction_t) label_images.append(seg_labels_t) aux_labels.append(aux_label) return [input_images, input_instructions, label_images, aux_labels]
def __call__(self, images, states, segment_data, mask): projector = PinholeProjector(img_x=images.size(3), img_y=images.size(2)) # presenter = Presenter() env_id = segment_data.metadata[0]["env_id"] conf_json = load_env_config(env_id) all_landmark_indices = get_landmark_name_to_index() landmark_names, landmark_indices, landmark_pos = get_landmark_locations_airsim(conf_json) path_array = load_path(env_id) goal_loc = self.__get_goal_location_airsim(path_array) # Traj length x 64 landmarks x 14 # 0-5: Present landmarks data # 0 - landmark present in img # 1-2 - landmark pix_x | pix_y # 3-5 - landmark world coords m_x | m_y # 6-7: Template data # 6 - landmark_mentioned index # 7 - mentioned_side index # 8 - landmark mentioned # 9-13: Goal data # 9-10 - goal_x_pix | goal_y_pix # 11-12 - goal_x | goal_y (world) # 13 - goal visible aux_labels = torch.zeros((images.size(0), len(all_landmark_indices), 14)) # Store goal location in airsim coordinates aux_labels[:, :, 11:13] = torch.from_numpy(goal_loc[0:2]).unsqueeze(0).unsqueeze(0).expand_as( aux_labels[:, :, 11:13]) for i, idx in enumerate(landmark_indices): aux_labels[:, idx, 3:6] = torch.from_numpy( landmark_pos[i]).unsqueeze(0).clone().repeat(aux_labels.size(0), 1, 1) for timestep in range(images.size(0)): # presenter.save_image(images[timestep], name="tmp.png", torch=True) if mask[timestep] == 0: continue cam_pos = states[timestep, 9:12] cam_rot = states[timestep, 12:16] goal_in_img, goal_in_cam, status = projector.world_point_to_image(cam_pos, cam_rot, goal_loc) if goal_in_img is not None: aux_labels[timestep, :, 9:11] = torch.from_numpy(goal_in_img[0:2]).unsqueeze(0).expand_as( aux_labels[timestep, :, 9:11]) aux_labels[timestep, :, 13] = 1.0 for i, landmark_world in enumerate(landmark_pos): landmark_idx = landmark_indices[i] landmark_in_img, landmark_in_cam, status = projector.world_point_to_image(cam_pos, cam_rot, landmark_world) # This is None if the landmark is behind the camera. if landmark_in_img is not None: # presenter.save_image(images[timestep], name="tmp.png", torch=True, draw_point=landmark_in_img) aux_labels[timestep, landmark_idx, 0] = 1.0 aux_labels[timestep, landmark_idx, 1:3] = torch.from_numpy(landmark_in_img[0:2]) # aux_labels[timestep, landmark_idx, 3:6] = torch.from_numpy(landmark_in_cam[0:3]) # aux_labels[timestep, landmark_idx, 8] = 1.0 if landmark_idx == mentioned_landmark_idx else 0 return aux_labels
def provider_lm_pos_lm_indices_fpv(self, env_ids, add_null=0): """ Data provider that gives the positions and indices of all landmarks visible in the FPV image. :param pose_list: B*7 list of poses decomposed in 3 position and 4 orientation floats [x,y,z, orient_x, orient_y, orient_z, orient_w] img_x, img_y: shape of images env_ids: list of environments. :return: ("lm_pos", lm_pos) - lm_pos is a list (over timesteps) of lists (over landmarks visible in image) of the landmark locations in image pixel coordinates ("lm_indices", lm_indices) - lm_indices is a list (over timesteps) of lists (over landmarks visible in image) of the landmark indices for every landmark included in lm_pos. These are the landmark classifier labels """ list_of_conf = load_config_files(np.unique(env_ids))#, perception=True) # add add_null empty objects on each config. if add_null > 0: for i, conf in enumerate(list_of_conf): zpos = conf["zPos"] xpos = conf["xPos"] lm_positions = np.stack([xpos, zpos], 1) for _ in range(add_null): # add 2 empty objects on configuration i_null = 0 while i_null < 100: xnull = np.random.rand() * 4.7 znull = np.random.rand() * 4.7 distances_to_lm = np.linalg.norm(lm_positions - np.array([xnull, znull]), axis=1) min_dist_to_lm = np.min(distances_to_lm) if min_dist_to_lm > 1.2: break i_null += 1 list_of_conf[i]["xPos"].append(xnull) list_of_conf[i]["zPos"].append(znull) list_of_conf[i]["landmarkName"].append("0Null") list_of_conf[i]["radius"].append("100") landmark_indices_list = [] landmark_pos_list = [] for conf_json in list_of_conf: lm_names, landmark_indices, landmark_pos = get_landmark_locations_airsim(conf_json, add_empty=True) #landmark_pos = get_landmark_locations(conf_json) landmark_indices_list.append(landmark_indices) landmark_pos_list.append(landmark_pos) # TODO: Grab image size from segment_data # TODO: recode CAM_FOV in parameters instead of hardcoding projector = PinholeCameraProjection( map_size_px=None, world_size_px=None, world_size_m=None, img_x=self.load_img_w, img_y=self.load_img_h, cam_fov=self.cam_h_fov, use_depth=False, start_height_offset=0.0) n_obs = len(self.poses) lm_pos_fpv = [] lm_indices = [] lm_mentioned = [] lm_pos_map = [] for i_obs in range(n_obs): # index of the environment in the list of unique environments env_id = env_ids[i_obs] i_env_id = np.where(np.unique(env_ids) == env_id)[0][0] t_lm_pos_fpv = [] t_lm_indices = [] t_lm_pos_map = [] if self.poses[i_obs] is not None: cam_pos = self.poses[i_obs]['position'] cam_rot = self.poses[i_obs]['orientation'] # convert xyzw to wxyz (airsim convention) cam_rot_airsim = [cam_rot[-1]] + cam_rot[:-1] for i_lm, landmark_in_world in enumerate(landmark_pos_list[i_env_id]): # landmark_in_world = landmark_in_world[0] landmark_idx = landmark_indices_list[i_env_id][i_lm] landmark_in_img, landmark_in_cam, status = projector.world_point_to_image(cam_pos, cam_rot_airsim, landmark_in_world) # This is None if the landmark is behind the camera. if landmark_in_img is not None: # presenter.save_image(images[timestep], name="tmp.png", torch=True, draw_point=landmark_in_img) t_lm_pos_fpv.append(landmark_in_img[0:2]) t_lm_pos_map.append(landmark_in_world[0:2]) t_lm_indices.append(landmark_idx) # t_lm_mentioned.append(this_lm_mentioned) if len(t_lm_pos_fpv) > 0: t_lm_pos_fpv = torch.from_numpy(np.asarray(t_lm_pos_fpv)).float() t_lm_pos_map = torch.from_numpy(np.asarray(t_lm_pos_map)).float() t_lm_indices = torch.from_numpy(np.asarray(t_lm_indices)).long() else: t_lm_pos_fpv = None t_lm_pos_map = None t_lm_indices = None t_lm_mentioned = None lm_pos_fpv.append(t_lm_pos_fpv) lm_pos_map.append(t_lm_pos_map) lm_indices.append(t_lm_indices) # lm_mentioned.append(t_lm_mentioned) return np.array(lm_pos_fpv), np.array(lm_indices), lm_pos_map
def _draw_landmarks(self, image, env_id): lm_names, lm_idx, lm_pos = get_landmark_locations_airsim(env_id=env_id) image = self.presenter.draw_landmarks(image, lm_names, lm_pos, self.world_size_m) return image