Esempio n. 1
0
    def gen_lm_aux_labels(self, env_id, instruction, affine):

        env_conf_json = load_env_config(env_id)
        landmark_names, landmark_indices, landmark_positions = get_landmark_locations_airsim(
            env_conf_json)
        landmark_pos_in_img = pos_m_to_px(
            np.asarray(landmark_positions)[:, 0:2],
            np.array([self.map_w, self.map_h]))
        landmark_pos_in_seg_img = apply_affine_on_pts(landmark_pos_in_img,
                                                      affine)

        if False:
            plot_path_on_img(self.latest_img_dbg, landmark_pos_in_img)
            plot_path_on_img(self.latest_rot_img_dbg, landmark_pos_in_seg_img)
            cv2.imshow("img", self.latest_img_dbg)
            cv2.imshow("rot_img", self.latest_rot_img_dbg)
            cv2.waitKey(0)

        landmark_pos_t = torch.from_numpy(landmark_pos_in_seg_img).unsqueeze(0)
        landmark_indices_t = torch.LongTensor(landmark_indices).unsqueeze(0)

        mask1 = torch.gt(landmark_pos_t, 0)
        mask2 = torch.lt(landmark_pos_t, self.img_w)
        mask = mask1 * mask2
        mask = mask[:, :, 0] * mask[:, :, 1]
        mask = mask

        landmark_pos_t = torch.masked_select(
            landmark_pos_t,
            mask.unsqueeze(2).expand_as(landmark_pos_t)).view([-1, 2])
        landmark_indices_t = torch.masked_select(landmark_indices_t,
                                                 mask).view([-1])

        mentioned_names, mentioned_indices = get_mentioned_landmarks(
            self.thesaurus, instruction)
        mentioned_labels_t = empty_float_tensor(list(
            landmark_indices_t.size())).long()
        for i, landmark_idx_present in enumerate(landmark_indices_t):
            if landmark_idx_present in mentioned_indices:
                mentioned_labels_t[i] = 1

        if len(landmark_indices_t) > 0:
            aux_label = {
                "lm_pos": landmark_pos_t,
                "lm_indices": landmark_indices_t,
                "lm_mentioned": mentioned_labels_t,
                "lm_visible": mask,
            }
        else:
            aux_label = {
                "lm_pos": [],
                "lm_indices": [],
                "lm_mentioned": [],
                "lm_visible": []
            }
        return aux_label
Esempio n. 2
0
def landmarks_in_env(env_id):
    lm_names, lm_idx, lm_pos = get_landmark_locations_airsim(env_id=env_id)
    stage_names = [get_landmark_stage_name(l) for l in lm_names]
    return stage_names
Esempio n. 3
0
    def __getitem__(self, idx):
        if self.seg_level:
            env_id = self.seg_list[idx][0]
            set_idx = self.seg_list[idx][1]
            seg_idx = self.seg_list[idx][2]
        else:
            env_id = self.env_list[idx]

        env_conf_json = load_env_config(env_id)
        landmark_names, landmark_indices, landmark_positions = get_landmark_locations_airsim(
            env_conf_json)

        top_down_image = load_env_img(env_id)

        path = load_path(env_id)

        img_x = top_down_image.shape[0]
        img_y = top_down_image.shape[1]

        path_in_img_coords = self.cf_to_img(img_x, path)
        landmark_pos_in_img = self.as_to_img(
            img_x,
            np.asarray(landmark_positions)[:, 0:2])
        self.pos_rand_image = self.pos_rand_range * img_x

        #self.plot_path_on_img(top_down_image, path_in_img_coords)
        #self.plot_path_on_img(top_down_image, landmark_pos_in_img)
        #cv2.imshow("top_down", top_down_image)
        #cv2.waitKey()

        input_images = []
        input_instructions = []
        label_images = []
        aux_labels = []

        # Somehow load the instruction with the start and end indices for each of the N segments
        if self.seg_level:
            instruction_segments = [
                self.all_instr[env_id][set_idx]["instructions"][seg_idx]
            ]
        else:
            instruction_segments = self.all_instr[env_id][0]["instructions"]

        for seg_idx, seg in enumerate(instruction_segments):
            start_idx = seg["start_idx"]
            end_idx = seg["end_idx"]
            instruction = seg["instruction"]

            # TODO: Check for overflowz
            seg_path = path_in_img_coords[start_idx:end_idx]
            seg_img = top_down_image.copy()

            #test_plot = self.plot_path_on_img(seg_img, seg_path)
            # TODO: Validate the 0.5 choice, should it be 2?
            affine, cropsize = self.get_affine_matrix(
                seg_path, 0, [int(img_x / 2), int(img_y / 2)], 0.5)
            if affine is None:
                continue
            seg_img_rot = self.apply_affine(seg_img, affine, cropsize)

            seg_labels = np.zeros_like(seg_img[:, :, 0:1]).astype(float)
            seg_labels = self.plot_path_on_img(seg_labels, seg_path)
            seg_labels = gaussian_filter(seg_labels, 4)
            seg_labels_rot = self.apply_affine(seg_labels, affine, cropsize)

            #seg_labels_rot = gaussian_filter(seg_labels_rot, 4)
            seg_labels_rot = self.normalize_0_1(seg_labels_rot)

            # Change to true to visualize the paths / labels
            if False:
                cv2.imshow("rot_img", seg_img_rot)
                cv2.imshow("seg_labels", seg_labels_rot)
                rot_viz = seg_img_rot.astype(np.float64) / 512
                rot_viz[:, :, 0] += seg_labels_rot.squeeze()
                cv2.imshow("rot_viz", rot_viz)
                cv2.waitKey(0)

            tok_instruction = tokenize_instruction(instruction,
                                                   self.word2token)
            instruction_t = torch.LongTensor(tok_instruction).unsqueeze(0)

            # Get landmark classification labels
            landmark_pos_in_seg_img = self.apply_affine_on_pts(
                landmark_pos_in_img, affine)

            # Down-size images and labels if requested by the model
            if self.img_scale != 1.0:
                seg_img_rot = transform.resize(seg_img_rot, [
                    seg_img_rot.shape[0] * self.img_scale,
                    seg_img_rot.shape[1] * self.img_scale
                ],
                                               mode="constant")
                seg_labels_rot = transform.resize(seg_labels_rot, [
                    seg_labels_rot.shape[0] * self.img_scale,
                    seg_labels_rot.shape[1] * self.img_scale
                ],
                                                  mode="constant")
                landmark_pos_in_seg_img = landmark_pos_in_seg_img * self.img_scale

            seg_img_rot = standardize_image(seg_img_rot)
            seg_labels_rot = standardize_image(seg_labels_rot)
            seg_img_t = torch.from_numpy(seg_img_rot).unsqueeze(0).float()
            seg_labels_t = torch.from_numpy(seg_labels_rot).unsqueeze(
                0).float()

            landmark_pos_t = torch.from_numpy(
                landmark_pos_in_seg_img).unsqueeze(0)
            landmark_indices_t = torch.LongTensor(landmark_indices).unsqueeze(
                0)

            mask1 = torch.gt(landmark_pos_t, 0)
            mask2 = torch.lt(landmark_pos_t, seg_img_t.size(2))
            mask = mask1 * mask2
            mask = mask[:, :, 0] * mask[:, :, 1]
            mask = mask

            landmark_pos_t = torch.masked_select(
                landmark_pos_t,
                mask.unsqueeze(2).expand_as(landmark_pos_t)).view([-1, 2])
            landmark_indices_t = torch.masked_select(landmark_indices_t,
                                                     mask).view([-1])

            mentioned_names, mentioned_indices = get_mentioned_landmarks(
                self.thesaurus, instruction)
            mentioned_labels_t = empty_float_tensor(
                list(landmark_indices_t.size())).long()
            for i, landmark_idx_present in enumerate(landmark_indices_t):
                if landmark_idx_present in mentioned_indices:
                    mentioned_labels_t[i] = 1

            aux_label = {
                "landmark_pos": landmark_pos_t,
                "landmark_indices": landmark_indices_t,
                "landmark_mentioned": mentioned_labels_t,
                "visible_mask": mask,
            }

            if self.include_instr_negatives:
                # If we are to be using similar instructions according to the json file, then
                # initialize choices with similar instructions. Otherwise let choices be empty, and they will
                # be filled in the following lines.
                if self.instr_negatives_similar_only:
                    choices = self.similar_instruction_map[str(env_id)][str(
                        seg_idx)]
                else:
                    choices = []
                # If there are no similar instructions to this instruction, pick a completely random instruction
                if len(choices) == 0:
                    while len(choices) == 0:
                        env_options = list(self.similar_instruction_map.keys())
                        random_env = random.choice(env_options)
                        seg_options = list(
                            self.similar_instruction_map[random_env].keys())
                        if len(seg_options) == 0:
                            continue
                        random_seg = random.choice(seg_options)
                        choices = self.similar_instruction_map[random_env][
                            random_seg]

                pick = random.choice(choices)
                picked_env = pick["env_id"]
                picked_seg = pick["seg_idx"]
                picked_set = pick["set_idx"]
                picked_instruction = self.all_instr[picked_env][picked_set][
                    "instructions"][picked_seg]["instruction"]
                tok_fake_instruction = tokenize_instruction(
                    picked_instruction, self.word2token)
                aux_label["negative_instruction"] = torch.LongTensor(
                    tok_fake_instruction).unsqueeze(0)

            input_images.append(seg_img_t)
            input_instructions.append(instruction_t)
            label_images.append(seg_labels_t)
            aux_labels.append(aux_label)

        return [input_images, input_instructions, label_images, aux_labels]
Esempio n. 4
0
    def __call__(self, images, states, segment_data, mask):
        projector = PinholeProjector(img_x=images.size(3), img_y=images.size(2))
        # presenter = Presenter()

        env_id = segment_data.metadata[0]["env_id"]

        conf_json = load_env_config(env_id)
        all_landmark_indices = get_landmark_name_to_index()
        landmark_names, landmark_indices, landmark_pos = get_landmark_locations_airsim(conf_json)

        path_array = load_path(env_id)
        goal_loc = self.__get_goal_location_airsim(path_array)

        # Traj length x 64 landmarks x 14
        # 0-5: Present landmarks data
        #   0 - landmark present in img
        #   1-2 - landmark pix_x | pix_y
        #   3-5 - landmark world coords m_x | m_y
        # 6-7: Template data
        #   6 - landmark_mentioned index
        #   7 - mentioned_side index
        #   8 - landmark mentioned
        # 9-13: Goal data
        #   9-10 - goal_x_pix | goal_y_pix
        #   11-12 - goal_x | goal_y (world)
        #   13 - goal visible
        aux_labels = torch.zeros((images.size(0), len(all_landmark_indices), 14))

        # Store goal location in airsim coordinates
        aux_labels[:, :, 11:13] = torch.from_numpy(goal_loc[0:2]).unsqueeze(0).unsqueeze(0).expand_as(
            aux_labels[:, :, 11:13])

        for i, idx in enumerate(landmark_indices):
            aux_labels[:, idx, 3:6] = torch.from_numpy(
                landmark_pos[i]).unsqueeze(0).clone().repeat(aux_labels.size(0), 1, 1)

        for timestep in range(images.size(0)):
            # presenter.save_image(images[timestep], name="tmp.png", torch=True)

            if mask[timestep] == 0:
                continue

            cam_pos = states[timestep, 9:12]
            cam_rot = states[timestep, 12:16]

            goal_in_img, goal_in_cam, status = projector.world_point_to_image(cam_pos, cam_rot, goal_loc)
            if goal_in_img is not None:
                aux_labels[timestep, :, 9:11] = torch.from_numpy(goal_in_img[0:2]).unsqueeze(0).expand_as(
                    aux_labels[timestep, :, 9:11])
                aux_labels[timestep, :, 13] = 1.0

            for i, landmark_world in enumerate(landmark_pos):
                landmark_idx = landmark_indices[i]

                landmark_in_img, landmark_in_cam, status = projector.world_point_to_image(cam_pos, cam_rot,
                                                                                          landmark_world)
                # This is None if the landmark is behind the camera.
                if landmark_in_img is not None:
                    # presenter.save_image(images[timestep], name="tmp.png", torch=True, draw_point=landmark_in_img)
                    aux_labels[timestep, landmark_idx, 0] = 1.0
                    aux_labels[timestep, landmark_idx, 1:3] = torch.from_numpy(landmark_in_img[0:2])
                    # aux_labels[timestep, landmark_idx, 3:6] = torch.from_numpy(landmark_in_cam[0:3])
                    # aux_labels[timestep, landmark_idx, 8] = 1.0 if landmark_idx == mentioned_landmark_idx else 0

        return aux_labels
Esempio n. 5
0
    def provider_lm_pos_lm_indices_fpv(self, env_ids, add_null=0):
        """
        Data provider that gives the positions and indices of all landmarks visible in the FPV image.
        :param pose_list: B*7 list of poses decomposed in 3 position and 4 orientation floats
         [x,y,z, orient_x, orient_y, orient_z, orient_w]
         img_x, img_y: shape of images
         env_ids: list of environments.
        :return: ("lm_pos", lm_pos) - lm_pos is a list (over timesteps) of lists (over landmarks visible in image) of the
                    landmark locations in image pixel coordinates
                 ("lm_indices", lm_indices) - lm_indices is a list (over timesteps) of lists (over landmarks visible in image)
                    of the landmark indices for every landmark included in lm_pos. These are the landmark classifier labels
        """
        list_of_conf = load_config_files(np.unique(env_ids))#, perception=True)
        # add add_null empty objects on each config.
        if add_null > 0:
            for i, conf in enumerate(list_of_conf):
                zpos = conf["zPos"]
                xpos = conf["xPos"]
                lm_positions = np.stack([xpos, zpos], 1)
                for _ in range(add_null):  # add 2 empty objects on configuration
                    i_null = 0
                    while i_null < 100:
                        xnull = np.random.rand() * 4.7
                        znull = np.random.rand() * 4.7
                        distances_to_lm = np.linalg.norm(lm_positions - np.array([xnull, znull]), axis=1)
                        min_dist_to_lm = np.min(distances_to_lm)
                        if min_dist_to_lm > 1.2:
                            break
                        i_null += 1

                    list_of_conf[i]["xPos"].append(xnull)
                    list_of_conf[i]["zPos"].append(znull)
                    list_of_conf[i]["landmarkName"].append("0Null")
                    list_of_conf[i]["radius"].append("100")

        landmark_indices_list = []
        landmark_pos_list = []
        for conf_json in list_of_conf:
            lm_names, landmark_indices, landmark_pos = get_landmark_locations_airsim(conf_json, add_empty=True)
            #landmark_pos = get_landmark_locations(conf_json)
            landmark_indices_list.append(landmark_indices)
            landmark_pos_list.append(landmark_pos)

        # TODO: Grab image size from segment_data

        # TODO: recode CAM_FOV in parameters instead of hardcoding
        projector = PinholeCameraProjection(
            map_size_px=None,
            world_size_px=None,
            world_size_m=None,
            img_x=self.load_img_w,
            img_y=self.load_img_h,
            cam_fov=self.cam_h_fov,
            use_depth=False,
            start_height_offset=0.0)
        n_obs = len(self.poses)

        lm_pos_fpv = []
        lm_indices = []
        lm_mentioned = []
        lm_pos_map = []

        for i_obs in range(n_obs):

            # index of the environment in the list of unique environments
            env_id = env_ids[i_obs]
            i_env_id = np.where(np.unique(env_ids) == env_id)[0][0]

            t_lm_pos_fpv = []
            t_lm_indices = []
            t_lm_pos_map = []

            if self.poses[i_obs] is not None:
                cam_pos = self.poses[i_obs]['position']
                cam_rot = self.poses[i_obs]['orientation']
                # convert xyzw to wxyz (airsim convention)
                cam_rot_airsim = [cam_rot[-1]] + cam_rot[:-1]

                for i_lm, landmark_in_world in enumerate(landmark_pos_list[i_env_id]):
                    # landmark_in_world = landmark_in_world[0]
                    landmark_idx = landmark_indices_list[i_env_id][i_lm]

                    landmark_in_img, landmark_in_cam, status = projector.world_point_to_image(cam_pos, cam_rot_airsim,
                                                                                              landmark_in_world)
                    # This is None if the landmark is behind the camera.
                    if landmark_in_img is not None:
                        # presenter.save_image(images[timestep], name="tmp.png", torch=True, draw_point=landmark_in_img)
                        t_lm_pos_fpv.append(landmark_in_img[0:2])
                        t_lm_pos_map.append(landmark_in_world[0:2])
                        t_lm_indices.append(landmark_idx)
                        # t_lm_mentioned.append(this_lm_mentioned)

            if len(t_lm_pos_fpv) > 0:

                t_lm_pos_fpv = torch.from_numpy(np.asarray(t_lm_pos_fpv)).float()
                t_lm_pos_map = torch.from_numpy(np.asarray(t_lm_pos_map)).float()
                t_lm_indices = torch.from_numpy(np.asarray(t_lm_indices)).long()

            else:
                t_lm_pos_fpv = None
                t_lm_pos_map = None
                t_lm_indices = None
                t_lm_mentioned = None

            lm_pos_fpv.append(t_lm_pos_fpv)
            lm_pos_map.append(t_lm_pos_map)
            lm_indices.append(t_lm_indices)
            # lm_mentioned.append(t_lm_mentioned)

        return np.array(lm_pos_fpv), np.array(lm_indices), lm_pos_map
Esempio n. 6
0
 def _draw_landmarks(self, image, env_id):
     lm_names, lm_idx, lm_pos = get_landmark_locations_airsim(env_id=env_id)
     image = self.presenter.draw_landmarks(image, lm_names, lm_pos,
                                           self.world_size_m)
     return image