예제 #1
0
    def load_samples_from_file(self, image_path, lidar_path, calib_dir):
        """ Loads input-output data for a set of samples. Should only be
            called when a particular sample dict is required. Otherwise,
            samples should be provided by the next_batch function

        Args:
            indices: A list of sample indices from the dataset.sample_list
                to be loaded

        Return:
            samples: a list of data sample dicts
        """
        sample_dicts = []
        sample = self.sample_list[0]
        sample_name = sample.name

        obj_labels = None

        anchors_info = []

        label_anchors = np.zeros((1, 6))
        label_boxes_3d = np.zeros((1, 7))
        label_classes = np.zeros(1)

        # Load image (BGR -> RGB)
        cv_bgr_image = cv2.imread(image_path)
        rgb_image = cv_bgr_image[..., ::-1]
        image_shape = rgb_image.shape[0:2]
        image_input = rgb_image

        # Get ground plane
        ground_plane = obj_utils.get_road_plane_from_file(calib_dir)

        # Get calibration
        stereo_calib = calib_utils.read_raw_calibration(calib_dir)
        stereo_calib_p2 = stereo_calib.p2
        point_cloud = self.kitti_utils.get_point_cloud_from_file(
            self.bev_source, stereo_calib, lidar_path, image_shape)

        # Augmentation (Flipping)
        if kitti_aug.AUG_FLIPPING in sample.augs:
            image_input = kitti_aug.flip_image(image_input)
            point_cloud = kitti_aug.flip_point_cloud(point_cloud)
            obj_labels = [
                kitti_aug.flip_label_in_3d_only(obj) for obj in obj_labels
            ]
            ground_plane = kitti_aug.flip_ground_plane(ground_plane)
            stereo_calib_p2 = kitti_aug.flip_stereo_calib_p2(
                stereo_calib_p2, image_shape)

        # Augmentation (Image Jitter)
        if kitti_aug.AUG_PCA_JITTER in sample.augs:
            image_input[:, :,
                        0:3] = kitti_aug.apply_pca_jitter(image_input[:, :,
                                                                      0:3])

        if obj_labels is not None:
            label_boxes_3d = np.asarray([
                box_3d_encoder.object_label_to_box_3d(obj_label)
                for obj_label in obj_labels
            ])

            label_classes = [
                self.kitti_utils.class_str_to_index(obj_label.type)
                for obj_label in obj_labels
            ]
            label_classes = np.asarray(label_classes, dtype=np.int32)

            # Return empty anchors_info if no ground truth after filtering
            if len(label_boxes_3d) == 0:
                anchors_info = []
                if self.train_on_all_samples:
                    # If training without any positive labels, we cannot
                    # set these to zeros, because later on the offset calc
                    # uses log on these anchors. So setting any arbitrary
                    # number here that does not break the offset calculation
                    # should work, since the negative samples won't be
                    # regressed in any case.
                    dummy_anchors = [[-1000, -1000, -1000, 1, 1, 1]]
                    label_anchors = np.asarray(dummy_anchors)
                    dummy_boxes = [[-1000, -1000, -1000, 1, 1, 1, 0]]
                    label_boxes_3d = np.asarray(dummy_boxes)
                else:
                    label_anchors = np.zeros((1, 6))
                    label_boxes_3d = np.zeros((1, 7))
                label_classes = np.zeros(1)
            else:
                label_anchors = box_3d_encoder.box_3d_to_anchor(
                    label_boxes_3d, ortho_rotate=True)

        # Create BEV maps
        bev_images = self.kitti_utils.create_bev_maps(point_cloud,
                                                      ground_plane)

        height_maps = bev_images.get('height_maps')
        density_map = bev_images.get('density_map')
        bev_input = np.dstack((*height_maps, density_map))

        sample_dict = {
            constants.KEY_LABEL_BOXES_3D: label_boxes_3d,
            constants.KEY_LABEL_ANCHORS: label_anchors,
            constants.KEY_LABEL_CLASSES: label_classes,
            constants.KEY_IMAGE_INPUT: image_input,
            constants.KEY_BEV_INPUT: bev_input,
            constants.KEY_ANCHORS_INFO: anchors_info,
            constants.KEY_POINT_CLOUD: point_cloud,
            constants.KEY_GROUND_PLANE: ground_plane,
            constants.KEY_STEREO_CALIB_P2: stereo_calib_p2,
            constants.KEY_SAMPLE_NAME: sample_name,
            constants.KEY_SAMPLE_AUGS: sample.augs
        }
        sample_dicts.append(sample_dict)

        return sample_dicts
예제 #2
0
    def load_samples(self, indices):
        """ Loads input-output data for a set of samples. Should only be
            called when a particular sample dict is required. Otherwise,
            samples should be provided by the next_batch function

        Args:
            indices: A list of sample indices from the dataset.sample_list
                to be loaded

        Return:
            samples: a list of data sample dicts
        """
        sample_dicts = []
        for sample_idx in indices:
            sample = self.sample_list[sample_idx]
            sample_name = sample.name

            # Only read labels if they exist
            if self.has_labels:
                # Read mini batch first to see if it is empty
                anchors_info = self.get_anchors_info(sample_name)

                if (not anchors_info) and self.train_val_test == 'train' \
                        and (not self.train_on_all_samples):
                    empty_sample_dict = {
                        constants.KEY_SAMPLE_NAME: sample_name,
                        constants.KEY_ANCHORS_INFO: anchors_info
                    }
                    return [empty_sample_dict]

                obj_labels = obj_utils.read_labels(self.label_dir,
                                                   int(sample_name))

                # Only use objects that match dataset classes
                obj_labels = self.kitti_utils.filter_labels(obj_labels)

            else:
                obj_labels = None

                anchors_info = []

                label_anchors = np.zeros((1, 6))
                label_boxes_3d = np.zeros((1, 7))
                label_classes = np.zeros(1)

            img_idx = int(sample_name)

            # Load image (BGR -> RGB)
            cv_bgr_image = cv2.imread(self.get_rgb_image_path(sample_name))
            rgb_image = cv_bgr_image[..., ::-1]
            image_shape = rgb_image.shape[0:2]
            image_input = rgb_image

            # Get ground plane
            ground_plane = obj_utils.get_road_plane(int(sample_name),
                                                    self.planes_dir)

            # Get calibration
            stereo_calib = calib_utils.read_calibration(
                self.calib_dir, int(sample_name))
            stereo_calib_p2 = stereo_calib.p2

            point_cloud = self.kitti_utils.get_point_cloud(
                self.bev_source, img_idx, image_shape)

            # Augmentation (Flipping)
            # WZN: the flipping augmentation flips both image(in camera frame), pointcloud (in Lidar frame), and calibration
            #matrix(between cam and Lidar) so the correspondence is still true.
            if kitti_aug.AUG_FLIPPING in sample.augs:
                image_input = kitti_aug.flip_image(image_input)
                point_cloud = kitti_aug.flip_point_cloud(point_cloud)
                obj_labels = [
                    kitti_aug.flip_label_in_3d_only(obj) for obj in obj_labels
                ]
                ground_plane = kitti_aug.flip_ground_plane(ground_plane)
                stereo_calib_p2 = kitti_aug.flip_stereo_calib_p2(
                    stereo_calib_p2, image_shape)

            # Augmentation (Image Jitter)
            if kitti_aug.AUG_PCA_JITTER in sample.augs:
                image_input[:, :,
                            0:3] = kitti_aug.apply_pca_jitter(image_input[:, :,
                                                                          0:3])

            if obj_labels is not None:
                label_boxes_3d = np.asarray([
                    box_3d_encoder.object_label_to_box_3d(obj_label)
                    for obj_label in obj_labels
                ])

                label_classes = [
                    self.kitti_utils.class_str_to_index(obj_label.type)
                    for obj_label in obj_labels
                ]
                label_classes = np.asarray(label_classes, dtype=np.int32)

                # Return empty anchors_info if no ground truth after filtering
                if len(label_boxes_3d) == 0:
                    anchors_info = []
                    if self.train_on_all_samples:
                        # If training without any positive labels, we cannot
                        # set these to zeros, because later on the offset calc
                        # uses log on these anchors. So setting any arbitrary
                        # number here that does not break the offset calculation
                        # should work, since the negative samples won't be
                        # regressed in any case.
                        dummy_anchors = [[-1000, -1000, -1000, 1, 1, 1]]
                        label_anchors = np.asarray(dummy_anchors)
                        dummy_boxes = [[-1000, -1000, -1000, 1, 1, 1, 0]]
                        label_boxes_3d = np.asarray(dummy_boxes)
                    else:
                        label_anchors = np.zeros((1, 6))
                        label_boxes_3d = np.zeros((1, 7))
                    label_classes = np.zeros(1)
                else:
                    label_anchors = box_3d_encoder.box_3d_to_anchor(
                        label_boxes_3d, ortho_rotate=True)

            # Create BEV maps

            bev_images = self.kitti_utils.create_bev_maps(
                point_cloud, ground_plane, output_indices=self.output_indices)
            #WZN produce input for sparse pooling
            if self.output_indices:
                voxel_indices = bev_images[1]
                pts_in_voxel = bev_images[2]
                bev_images = bev_images[0]

            height_maps = bev_images.get('height_maps')
            density_map = bev_images.get('density_map')
            bev_input = np.dstack((*height_maps, density_map))

            #import pdb
            #pdb.set_trace()
            #WZN produce input for sparse pooling
            if self.output_indices:
                sparse_pooling_input1 = produce_sparse_pooling_input(
                    gen_sparse_pooling_input_avod(
                        pts_in_voxel, voxel_indices, stereo_calib,
                        [image_shape[1], image_shape[0]],
                        bev_input.shape[0:2]),
                    stride=[1, 1])
                #WZN: Note here avod padded the vgg input by 4, so add it
                bev_input_padded = np.copy(bev_input.shape[0:2])
                bev_input_padded[0] = bev_input_padded[0] + 4
                sparse_pooling_input2 = produce_sparse_pooling_input(
                    gen_sparse_pooling_input_avod(
                        pts_in_voxel, voxel_indices, stereo_calib,
                        [image_shape[1], image_shape[0]], bev_input_padded),
                    stride=[8, 8])
                sparse_pooling_input = [
                    sparse_pooling_input1, sparse_pooling_input2
                ]
            else:
                sparse_pooling_input = None

            sample_dict = {
                constants.KEY_LABEL_BOXES_3D: label_boxes_3d,
                constants.KEY_LABEL_ANCHORS: label_anchors,
                constants.KEY_LABEL_CLASSES: label_classes,
                constants.KEY_IMAGE_INPUT: image_input,
                constants.KEY_BEV_INPUT: bev_input,
                #WZN: for sparse pooling
                constants.KEY_SPARSE_POOLING_INPUT: sparse_pooling_input,
                constants.KEY_ANCHORS_INFO: anchors_info,
                constants.KEY_POINT_CLOUD: point_cloud,
                constants.KEY_GROUND_PLANE: ground_plane,
                constants.KEY_STEREO_CALIB_P2: stereo_calib_p2,
                constants.KEY_SAMPLE_NAME: sample_name,
                constants.KEY_SAMPLE_AUGS: sample.augs
            }
            sample_dicts.append(sample_dict)

        return sample_dicts
    def load_samples(self, indices):
        """ Loads input-output data for a set of samples. Should only be
            called when a particular sample dict is required. Otherwise,
            samples should be provided by the next_batch function

        Args:
            indices: A list of sample indices from the dataset.sample_list
                to be loaded

        Return:
            samples: a list of data sample dicts
        """
        sample_dicts = []
        for sample_idx in indices:
            sample = self.sample_list[sample_idx]
            sample_name = sample.name

            # Only read labels if they exist
            if self.has_labels:
                # Read mini batch first to see if it is empty
                anchors_info = self.get_anchors_info(sample_name)

                if (not anchors_info) and self.train_val_test == 'train' \
                        and (not self.train_on_all_samples):
                    empty_sample_dict = {
                        constants.KEY_SAMPLE_NAME: sample_name,
                        constants.KEY_ANCHORS_INFO: anchors_info
                    }
                    return [empty_sample_dict]

                obj_labels = obj_utils.read_labels(self.label_dir,
                                                   int(sample_name))

                # Only use objects that match dataset classes
                obj_labels = self.kitti_utils.filter_labels(obj_labels)

            else:
                obj_labels = None

                anchors_info = []

                label_anchors = np.zeros((1, 6))
                label_boxes_3d = np.zeros((1, 7))
                label_classes = np.zeros(1)

            img_idx = int(sample_name)

            # Load image (BGR -> RGB)
            cv_bgr_image = cv2.imread(self.get_rgb_image_path(sample_name))
            rgb_image = cv_bgr_image[..., ::-1]
            image_shape = rgb_image.shape[0:2]
            image_input = rgb_image

            # Get ground plane
            ground_plane = obj_utils.get_road_plane(int(sample_name),
                                                    self.planes_dir)

            # Get calibration
            stereo_calib_p2 = calib_utils.read_calibration(
                self.calib_dir, int(sample_name)).p2

            point_cloud = self.kitti_utils.get_point_cloud(
                self.bev_source, img_idx, image_shape)

            # Check if the run is training and if the train augmentation is set
            if self.train_val_test == 'train' and self.is_train_aug:
                # Generate a random aug probability
                is_aug = np.random.uniform(0, 1)
                if is_aug > 0.5:
                    # Make a random choice from the list of available aug options
                    random_aug = random.choice(self.augs)
                    # Apply the corresponding aug method to the image
                    image_input[:, :,
                                0:3] = getattr(kitti_aug,
                                               random_aug)(image_input[:, :,
                                                                       0:3])

            # Augmentation (Flipping)
            if kitti_aug.AUG_FLIPPING in sample.augs:
                image_input = kitti_aug.flip_image(image_input)
                point_cloud = kitti_aug.flip_point_cloud(point_cloud)
                obj_labels = [
                    kitti_aug.flip_label_in_3d_only(obj) for obj in obj_labels
                ]
                ground_plane = kitti_aug.flip_ground_plane(ground_plane)
                stereo_calib_p2 = kitti_aug.flip_stereo_calib_p2(
                    stereo_calib_p2, image_shape)

            # Augmentation (Image Jitter)
            if kitti_aug.AUG_PCA_JITTER in sample.augs:
                image_input[:, :,
                            0:3] = kitti_aug.apply_pca_jitter(image_input[:, :,
                                                                          0:3])

            if obj_labels is not None:
                label_boxes_3d = np.asarray([
                    box_3d_encoder.object_label_to_box_3d(obj_label)
                    for obj_label in obj_labels
                ])

                label_classes = [
                    self.kitti_utils.class_str_to_index(obj_label.type)
                    for obj_label in obj_labels
                ]
                label_classes = np.asarray(label_classes, dtype=np.int32)

                # Return empty anchors_info if no ground truth after filtering
                if len(label_boxes_3d) == 0:
                    anchors_info = []
                    if self.train_on_all_samples:
                        # If training without any positive labels, we cannot
                        # set these to zeros, because later on the offset calc
                        # uses log on these anchors. So setting any arbitrary
                        # number here that does not break the offset calculation
                        # should work, since the negative samples won't be
                        # regressed in any case.
                        dummy_anchors = [[-1000, -1000, -1000, 1, 1, 1]]
                        label_anchors = np.asarray(dummy_anchors)
                        dummy_boxes = [[-1000, -1000, -1000, 1, 1, 1, 0]]
                        label_boxes_3d = np.asarray(dummy_boxes)
                    else:
                        label_anchors = np.zeros((1, 6))
                        label_boxes_3d = np.zeros((1, 7))
                    label_classes = np.zeros(1)
                else:
                    label_anchors = box_3d_encoder.box_3d_to_anchor(
                        label_boxes_3d, ortho_rotate=True)

            # Create BEV maps
            bev_images = self.kitti_utils.create_bev_maps(
                point_cloud, ground_plane)

            height_maps = bev_images.get('height_maps')
            density_map = bev_images.get('density_map')
            bev_input = np.dstack((*height_maps, density_map))

            sample_dict = {
                constants.KEY_LABEL_BOXES_3D: label_boxes_3d,
                constants.KEY_LABEL_ANCHORS: label_anchors,
                constants.KEY_LABEL_CLASSES: label_classes,
                constants.KEY_IMAGE_INPUT: image_input,
                constants.KEY_BEV_INPUT: bev_input,
                constants.KEY_ANCHORS_INFO: anchors_info,
                constants.KEY_POINT_CLOUD: point_cloud,
                constants.KEY_GROUND_PLANE: ground_plane,
                constants.KEY_STEREO_CALIB_P2: stereo_calib_p2,
                constants.KEY_SAMPLE_NAME: sample_name,
                constants.KEY_SAMPLE_AUGS: sample.augs
            }
            sample_dicts.append(sample_dict)

        return sample_dicts
예제 #4
0
    def load_samples(self,
                     indices,
                     sin_type=None,
                     sin_level=None,
                     sin_input_name=None,
                     gen_all_sin_inputs=False,
                     list_mask_2d=None):
        """ Loads input-output data for a set of samples. Should only be
            called when a particular sample dict is required. Otherwise,
            samples should be provided by the next_batch function

        Args:
            indices: A list of sample indices from the dataset.sample_list
                to be loaded

        Return:
            samples: a list of data sample dicts
        """
        sample_dicts = []
        for idx, sample_idx in enumerate(indices):
            sample = self.sample_list[sample_idx]
            sample_name = sample.name

            if list_mask_2d:
                mask_2d = list_mask_2d[idx]
            else:
                mask_2d = None

            # Only read labels if they exist
            if self.has_labels:
                # Read mini batch first to see if it is empty
                anchors_info = self.get_anchors_info(sample_name)

                if (not anchors_info) and self.train_val_test == 'train' \
                        and (not self.train_on_all_samples):
                    empty_sample_dict = {
                        constants.KEY_SAMPLE_NAME: sample_name,
                        constants.KEY_ANCHORS_INFO: anchors_info
                    }
                    return [empty_sample_dict]

                obj_labels = obj_utils.read_labels(self.label_dir,
                                                   int(sample_name))

                # Only use objects that match dataset classes
                obj_labels = self.kitti_utils.filter_labels(obj_labels)

            else:
                obj_labels = None

                anchors_info = []

                label_anchors = np.zeros((1, 6))
                label_boxes_3d = np.zeros((1, 7))
                label_classes = np.zeros(1)

            img_idx = int(sample_name)

            # Load image (BGR -> RGB)
            cv_bgr_image = cv2.imread(self.get_rgb_image_path(sample_name))
            rgb_image = cv_bgr_image[..., ::-1]
            image_shape = rgb_image.shape[0:2]
            image_input = rgb_image

            # Get ground plane
            ground_plane = obj_utils.get_road_plane(int(sample_name),
                                                    self.planes_dir)

            # Get calibration
            stereo_calib_p2 = calib_utils.read_calibration(
                self.calib_dir, int(sample_name)).p2

            # Read lidar with subsampling (handled before other preprocessing)
            if (sin_type == 'lowres') and (sin_input_name == 'lidar'):
                stride_sub = get_stride_sub(sin_level)
                point_cloud = get_point_cloud_sub(img_idx, self.calib_dir,
                                                  self.velo_dir, image_shape,
                                                  stride_sub)

            elif (sin_type == 'lowres') and gen_all_sin_inputs:
                stride_sub = get_stride_sub(sin_level)
                point_cloud = get_point_cloud_sub(img_idx, self.calib_dir,
                                                  self.velo_dir, image_shape,
                                                  stride_sub)
            else:
                point_cloud = self.kitti_utils.get_point_cloud(
                    self.bev_source, img_idx, image_shape)

            # Augmentation (Flipping)
            if kitti_aug.AUG_FLIPPING in sample.augs:
                image_input = kitti_aug.flip_image(image_input)
                point_cloud = kitti_aug.flip_point_cloud(point_cloud)
                obj_labels = [
                    kitti_aug.flip_label_in_3d_only(obj) for obj in obj_labels
                ]
                ground_plane = kitti_aug.flip_ground_plane(ground_plane)
                stereo_calib_p2 = kitti_aug.flip_stereo_calib_p2(
                    stereo_calib_p2, image_shape)

            # Augmentation (Image Jitter)
            if kitti_aug.AUG_PCA_JITTER in sample.augs:
                image_input[:, :,
                            0:3] = kitti_aug.apply_pca_jitter(image_input[:, :,
                                                                          0:3])

            # Add Single Input Noise
            if (sin_input_name in SINFields.SIN_INPUT_NAMES) and (
                    sin_type in SINFields.VALID_SIN_TYPES):
                image_input, point_cloud = genSINtoInputs(
                    image_input,
                    point_cloud,
                    sin_type=sin_type,
                    sin_level=sin_level,
                    sin_input_name=sin_input_name,
                    mask_2d=mask_2d,
                    frame_calib_p2=stereo_calib_p2)
            # Add Input Noise to all
            if gen_all_sin_inputs:
                image_input, point_cloud = genSINtoAllInputs(
                    image_input,
                    point_cloud,
                    sin_type=sin_type,
                    sin_level=sin_level,
                    mask_2d=mask_2d,
                    frame_calib_p2=stereo_calib_p2)

            if obj_labels is not None:
                label_boxes_3d = np.asarray([
                    box_3d_encoder.object_label_to_box_3d(obj_label)
                    for obj_label in obj_labels
                ])

                label_classes = [
                    self.kitti_utils.class_str_to_index(obj_label.type)
                    for obj_label in obj_labels
                ]
                label_classes = np.asarray(label_classes, dtype=np.int32)

                # Return empty anchors_info if no ground truth after filtering
                if len(label_boxes_3d) == 0:
                    anchors_info = []
                    if self.train_on_all_samples:
                        # If training without any positive labels, we cannot
                        # set these to zeros, because later on the offset calc
                        # uses log on these anchors. So setting any arbitrary
                        # number here that does not break the offset calculation
                        # should work, since the negative samples won't be
                        # regressed in any case.
                        dummy_anchors = [[-1000, -1000, -1000, 1, 1, 1]]
                        label_anchors = np.asarray(dummy_anchors)
                        dummy_boxes = [[-1000, -1000, -1000, 1, 1, 1, 0]]
                        label_boxes_3d = np.asarray(dummy_boxes)
                    else:
                        label_anchors = np.zeros((1, 6))
                        label_boxes_3d = np.zeros((1, 7))
                    label_classes = np.zeros(1)
                else:
                    label_anchors = box_3d_encoder.box_3d_to_anchor(
                        label_boxes_3d, ortho_rotate=True)

            # Create BEV maps
            bev_images = self.kitti_utils.create_bev_maps(
                point_cloud, ground_plane)

            height_maps = bev_images.get('height_maps')
            density_map = bev_images.get('density_map')
            bev_input = np.dstack((*height_maps, density_map))

            sample_dict = {
                constants.KEY_LABEL_BOXES_3D: label_boxes_3d,
                constants.KEY_LABEL_ANCHORS: label_anchors,
                constants.KEY_LABEL_CLASSES: label_classes,
                constants.KEY_IMAGE_INPUT: image_input,
                constants.KEY_BEV_INPUT: bev_input,
                constants.KEY_ANCHORS_INFO: anchors_info,
                constants.KEY_POINT_CLOUD: point_cloud,
                constants.KEY_GROUND_PLANE: ground_plane,
                constants.KEY_STEREO_CALIB_P2: stereo_calib_p2,
                constants.KEY_SAMPLE_NAME: sample_name,
                constants.KEY_SAMPLE_AUGS: sample.augs
            }
            sample_dicts.append(sample_dict)

        return sample_dicts