def main():

    # Paths
    kitti_dir = os.path.expanduser('~/Kitti/object/')
    data_split_dir = 'training'

    image_dir = os.path.join(kitti_dir, data_split_dir) + '/image_2'
    label_dir = os.path.join(kitti_dir, data_split_dir) + '/label_2'
    calib_dir = os.path.join(kitti_dir, data_split_dir) + '/calib'

    sample_name = '000050'

    frame_calib = calib_utils.get_frame_calib(calib_dir, sample_name)
    cam_p = frame_calib.p2

    f, axes = vis_utils.plots_from_sample_name(image_dir, sample_name, 2, 1)

    # Load labels
    obj_labels = obj_utils.read_labels(label_dir, sample_name)
    for obj in obj_labels:

        # Draw 2D and 3D boxes
        vis_utils.draw_obj_as_box_2d(axes[0], obj)
        vis_utils.draw_obj_as_box_3d(axes[1], obj, cam_p)

    plt.show(block=True)
Example #2
0
    def test_filter_obj_labels(self):

        sample_name = '000050'
        obj_labels = obj_utils.read_labels(self.dataset.kitti_label_dir,
                                           sample_name)

        obj_labels_filt, obj_mask = obj_utils.filter_labels(
            obj_labels, classes=['Car'], depth_range=[5, 45])

        self.assertTrue(len(obj_labels_filt) == 3)
        np.testing.assert_equal(obj_mask, [True, True, False, True, False])
Example #3
0
def main():
    ##############################
    # Options
    ##############################
    dataset = DatasetBuilder.build_kitti_dataset(DatasetBuilder.KITTI_TRAINVAL)

    num_jittered_boxes = 5
    iou_thresh = 0.7

    classes = ['Car']

    sample_name = '000050'

    ##############################

    # Get filtered labels
    dataset.classes = classes
    obj_labels = obj_utils.read_labels(dataset.kitti_label_dir, sample_name)
    obj_labels, class_filter = obj_utils.filter_labels(obj_labels,
                                                       classes=dataset.classes)

    # Image shape
    bgr_image = cv2.imread(dataset.get_rgb_image_path(sample_name))
    rgb_image = bgr_image[..., ::-1]
    image_shape = rgb_image.shape[0:2]

    # Generate jittered boxes
    aug_labels = []
    for label in obj_labels:
        for i in range(num_jittered_boxes):
            aug_label = kitti_aug.jitter_obj_boxes_2d([label], iou_thresh,
                                                      image_shape)
            aug_labels.append(aug_label[0])

    # Visualize boxes
    fig, axes = vis_utils.plots_from_image(rgb_image, display=False)

    # Draw non-augmented boxes in red
    for obj in obj_labels:
        vis_utils.draw_obj_as_box_2d(axes, obj, color='r')

    # Draw augmented boxes in cyan
    for obj in aug_labels:
        vis_utils.draw_obj_as_box_2d(axes, obj, color='c', linewidth=1)

    plt.show(block=True)
Example #4
0
def get_gts_based_on_difficulty(dataset, sample_name):
    """Returns lists of ground-truth based on difficulty.
    """
    # Get all ground truth labels and filter to dataset classes
    all_gt_objs = obj_utils.read_labels(dataset.kitti_label_dir, sample_name)
    gt_objs, _ = obj_utils.filter_labels_by_class(all_gt_objs, dataset.classes)

    # Filter objects to desired difficulty
    easy_gt_objs, _ = obj_utils.filter_labels_by_difficulty(
        copy.deepcopy(gt_objs), difficulty=Difficulty.EASY)
    medium_gt_objs, _ = obj_utils.filter_labels_by_difficulty(
        copy.deepcopy(gt_objs), difficulty=Difficulty.MODERATE)
    hard_gt_objs, _ = obj_utils.filter_labels_by_difficulty(
        copy.deepcopy(gt_objs), difficulty=Difficulty.HARD)

    for gt_obj in easy_gt_objs:
        gt_obj.type = 'Easy GT'
    for gt_obj in medium_gt_objs:
        gt_obj.type = 'Medium GT'
    for gt_obj in hard_gt_objs:
        gt_obj.type = 'Hard GT'

    return easy_gt_objs, medium_gt_objs, hard_gt_objs, all_gt_objs
Example #5
0
def main():

    ##############################
    # Options
    ##############################

    point_cloud_source = 'depth_2_multiscale'

    samples_to_use = None  # all samples

    dataset = DatasetBuilder.build_kitti_dataset(DatasetBuilder.KITTI_TRAINVAL)

    out_instance_dir = 'outputs/instance_2_{}'.format(point_cloud_source)

    required_classes = [
        'Car',
        'Pedestrian',
        'Cyclist',
        'Van',
        'Truck',
        'Person_sitting',
        'Tram',
        'Misc',
    ]

    ##############################
    # End of Options
    ##############################

    # Create instance folder
    os.makedirs(out_instance_dir, exist_ok=True)

    # Get frame ids to process
    if samples_to_use is None:
        samples_to_use = dataset.get_sample_names()

    # Begin instance mask generation
    for sample_idx, sample_name in enumerate(samples_to_use):

        sys.stdout.write(
            '\r{} / {} Generating {} instances for sample {}'.format(
                sample_idx, dataset.num_samples - 1, point_cloud_source,
                sample_name))

        # Get image
        image = obj_utils.get_image(sample_name, dataset.image_2_dir)
        image_shape = image.shape[0:2]

        # Get calibration
        frame_calib = calib_utils.get_frame_calib(dataset.calib_dir,
                                                  sample_name)

        # Get point cloud
        if point_cloud_source.startswith('depth'):
            point_cloud = obj_utils.get_depth_map_point_cloud(
                sample_name, frame_calib, dataset.depth_dir)

        elif point_cloud_source == 'velo':
            point_cloud = obj_utils.get_lidar_point_cloud_for_cam(
                sample_name, frame_calib, dataset.velo_dir, image_shape)
        else:
            raise ValueError('Invalid point cloud source', point_cloud_source)

        # Filter according to classes
        obj_labels = obj_utils.read_labels(dataset.kitti_label_dir,
                                           sample_name)
        obj_labels, _ = obj_utils.filter_labels_by_class(
            obj_labels, required_classes)

        # Get 2D and 3D bounding boxes from labels
        gt_boxes_2d = [
            box_3d_encoder.object_label_to_box_2d(obj_label)
            for obj_label in obj_labels
        ]
        gt_boxes_3d = [
            box_3d_encoder.object_label_to_box_3d(obj_label)
            for obj_label in obj_labels
        ]

        instance_image = np.full(image_shape, 255, dtype=np.uint8)

        # Start instance index at 0 and generate instance masks for all boxes
        inst_idx = 0
        for obj_label, box_2d, box_3d in zip(obj_labels, gt_boxes_2d,
                                             gt_boxes_3d):

            # Apply inflation and offset to box_3d
            modified_box_3d = modify_box_3d(box_3d, obj_label)

            # Get points in 3D box
            box_points, mask = obj_utils.points_in_box_3d(
                modified_box_3d, point_cloud.T)

            # Get points in 2D box
            points_in_im = calib_utils.project_pc_to_image(
                box_points.T, cam_p=frame_calib.p2)
            mask_2d = \
                (points_in_im[0] >= box_2d[1]) & \
                (points_in_im[0] <= box_2d[3]) & \
                (points_in_im[1] >= box_2d[0]) & \
                (points_in_im[1] <= box_2d[2])

            if point_cloud_source.startswith('depth'):
                mask_points_in_im = np.where(mask.reshape(image_shape))
                mask_points_in_im = [
                    mask_points_in_im[0][mask_2d],
                    mask_points_in_im[1][mask_2d]
                ]
                instance_pixels = np.asarray(
                    [mask_points_in_im[1], mask_points_in_im[0]])
            elif point_cloud_source == 'velo':
                # image_points = box_utils.project_to_image(
                #     box_points.T, frame.p_left).astype(np.int32)
                pass

            # Guarantees that indices don't exceed image dimensions
            instance_pixels[0, :] = np.clip(instance_pixels[0, :], 0,
                                            image_shape[1] - 1)
            instance_pixels[1, :] = np.clip(instance_pixels[1, :], 0,
                                            image_shape[0] - 1)

            instance_image[instance_pixels[1, :],
                           instance_pixels[0, :]] = np.uint8(inst_idx)

            inst_idx += 1

        # Write image to directory
        cv2.imwrite(out_instance_dir + '/{}.png'.format(sample_name),
                    instance_image, [cv2.IMWRITE_PNG_COMPRESSION, 1])
Example #6
0
    def get_sample_dict(self, indices):
        """ Loads input-output data for a set of samples. Should only be
            called when a particular sample dict is required. Otherwise,
            samples should be provided by the next_batch function

        Args:
            indices: A list of sample indices from the dataset.sample_list
                to be loaded

        Return:
            samples: a list of data sample dicts
        """
        sample_dicts = []
        for sample_idx in indices:

            sample = self.sample_list[sample_idx]
            sample_name = sample.name

            # Load image (BGR -> RGB)
            bgr_image = cv2.imread(self.get_rgb_image_path(sample_name))
            rgb_image = bgr_image[..., ::-1]
            image_shape = rgb_image.shape[0:2]
            image_input = rgb_image

            # Get calibration
            frame_calib = calib_utils.get_frame_calib(self.calib_dir,
                                                      sample_name)
            cam_p = frame_calib.p2

            # Only read labels if they exist
            if self.train_val_test in ['train', 'val']:

                # Read KITTI object labels
                kitti_obj_labels = obj_utils.read_labels(
                    self.kitti_label_dir, sample_name)

                if self.use_mscnn_detections and self.train_val_test == 'val':
                    # Read mscnn obj labels and replace the KITTI obj label box coords and scores
                    mscnn_obj_labels = obj_utils.read_labels(
                        self.mscnn_label_dir, sample_name)

                    obj_labels = obj_utils.merge_kitti_and_mscnn_obj_labels(
                        kitti_obj_labels,
                        mscnn_obj_labels,
                        min_iou=self.mscnn_merge_min_iou,
                        default_score_type='distance')
                else:
                    obj_labels = kitti_obj_labels

                num_all_objs = len(obj_labels)

                # Filter labels
                obj_labels, obj_mask = obj_utils.apply_obj_filter(
                    obj_labels, self.obj_filter)
                num_objs = len(obj_labels)
                if num_objs < 1:
                    sample_dicts.append(None)
                    continue

                if self.use_mscnn_detections:
                    # Get filtered original kitti_obj_labels
                    kitti_obj_labels, kitti_obj_mask = obj_utils.apply_obj_filter(
                        kitti_obj_labels, self.obj_filter)
                    num_kitti_objs = len(kitti_obj_labels)
                    if num_kitti_objs < 1:
                        sample_dicts.append(None)
                        continue

                # Load instance masks
                instance_image = instance_utils.get_instance_image(
                    sample_name, self.instance_dir)
                instance_masks = instance_utils.get_instance_mask_list(
                    instance_image, num_all_objs)
                instance_masks = instance_masks[obj_mask]

                if self.oversample:
                    # Oversample to required number of boxes
                    num_to_oversample = self.num_boxes - num_objs

                    oversample_indices = np.random.choice(num_objs,
                                                          num_to_oversample,
                                                          replace=True)
                    oversample_indices = np.hstack(
                        [np.arange(0, num_objs), oversample_indices])
                    obj_labels = obj_labels[oversample_indices]
                    instance_masks = instance_masks[oversample_indices]

                # Augmentation if in train mode
                if self.train_val_test == 'train':

                    # Image augmentation
                    use_image_aug = self.aug_config.use_image_aug
                    if use_image_aug:
                        image_input = kitti_aug.apply_image_noise(rgb_image)

                    # Box jittering
                    box_jitter_type = self.aug_config.box_jitter_type
                    if box_jitter_type is None:
                        pass
                    elif box_jitter_type == 'oversample':
                        # Replace oversampled boxes with jittered boxes
                        if not self.oversample:
                            raise ValueError(
                                'Must oversample object labels to use {} '
                                'box jitter type'.format(box_jitter_type))
                        aug_labels = kitti_aug.jitter_obj_boxes_2d(
                            obj_labels[num_objs:], 0.7, image_shape)
                        obj_labels[num_objs:] = aug_labels
                    elif box_jitter_type == 'oversample_gt':
                        # Replace oversampled boxes with jittered gt boxes
                        if not self.oversample:
                            raise ValueError(
                                'Must oversample object labels to use {} '
                                'box jitter type'.format(box_jitter_type))

                        # Get enough gt boxes to jitter
                        gt_num_to_oversample = self.num_boxes - num_objs
                        gt_oversample_indices = np.random.choice(
                            num_kitti_objs, gt_num_to_oversample, replace=True)
                        kitti_obj_labels = kitti_obj_labels[
                            gt_oversample_indices]

                        aug_labels = kitti_aug.jitter_obj_boxes_2d(
                            kitti_obj_labels, 0.7, image_shape)
                        obj_labels[num_objs:] = aug_labels
                    elif box_jitter_type == 'all':
                        # Apply data augmentation on all labels
                        obj_labels = kitti_aug.jitter_obj_boxes_2d(
                            obj_labels, 0.7, image_shape)
                    else:
                        raise ValueError('Invalid box_jitter_type',
                                         box_jitter_type)

                # TODO: Do this some other way
                # Get 2D and 3D boxes
                label_boxes_2d = obj_utils.boxes_2d_from_obj_labels(obj_labels)
                label_boxes_3d = obj_utils.boxes_3d_from_obj_labels(obj_labels)
                label_alphas = np.asarray(
                    [obj_label.alpha for obj_label in obj_labels],
                    dtype=np.float32)

                label_alpha_bins, label_alpha_regs, label_valid_alpha_bins = \
                    zip(*[orientation_encoder.np_orientation_to_angle_bin(
                        obj_label.alpha, self.num_alpha_bins, self.alpha_bin_overlap)
                        for obj_label in obj_labels])

                # Get viewing angles
                label_viewing_angles_2d = np.asarray([
                    obj_utils.get_viewing_angle_box_2d(box_2d, cam_p)
                    for box_2d in label_boxes_2d
                ],
                                                     dtype=np.float32)
                label_viewing_angles_3d = np.asarray([
                    obj_utils.get_viewing_angle_box_3d(box_3d, cam_p)
                    for box_3d in label_boxes_3d
                ],
                                                     dtype=np.float32)

                # Parse class indices
                label_class_indices = [
                    obj_utils.class_str_to_index(obj_label.type, self.classes)
                    for obj_label in obj_labels
                ]
                label_class_indices = np.expand_dims(np.asarray(
                    label_class_indices, dtype=np.int32),
                                                     axis=1)
                label_class_strs = [obj_label.type for obj_label in obj_labels]

                # Get proposal z centroid offset
                prop_cen_z_offset_list = np.asarray([
                    instance_utils.get_prop_cen_z_offset(class_str)
                    for class_str in label_class_strs
                ])

                # Get xyz map in cam_N frame
                depth_map = obj_utils.get_depth_map(sample_name,
                                                    self.depth_dir)

                # Get scores
                label_scores = np.asarray(
                    [obj_label.score for obj_label in obj_labels], np.float32)

                # Get lwh average
                lwh_means = np.asarray([
                    obj_utils.get_mean_lwh_and_std_dev(class_str)[0]
                    for class_str in label_class_strs
                ])

            elif self.train_val_test == 'test':
                # Read object test labels
                obj_labels = obj_utils.read_labels(self.mscnn_label_dir,
                                                   sample_name)
                num_objs = len(obj_labels)
                if num_objs < 1:
                    sample_dicts.append(None)
                    continue

                # Just filter classes
                obj_labels, obj_mask = obj_utils.apply_obj_filter(
                    obj_labels, self.obj_filter)
                num_objs = len(obj_labels)
                if num_objs < 1:
                    sample_dicts.append(None)
                    continue

                # Oversample to required number of boxes
                num_to_oversample = self.num_boxes - num_objs
                oversample_indices = np.random.choice(num_objs,
                                                      num_to_oversample,
                                                      replace=True)
                oversample_indices = np.hstack(
                    [np.arange(0, num_objs), oversample_indices])
                obj_labels = obj_labels[oversample_indices]

                # Get 2D boxes
                label_boxes_2d = obj_utils.boxes_2d_from_obj_labels(obj_labels)

                # Get score
                label_scores = np.asarray(
                    [obj_label.score for obj_label in obj_labels], np.float32)

                # Calculate viewing angles
                label_viewing_angles_2d = np.asarray([
                    obj_utils.get_viewing_angle_box_2d(box_2d, cam_p)
                    for box_2d in label_boxes_2d
                ],
                                                     dtype=np.float32)

                label_class_indices = [
                    obj_utils.class_str_to_index(obj_label.type, self.classes)
                    for obj_label in obj_labels
                ]
                label_class_indices = np.expand_dims(np.asarray(
                    label_class_indices, dtype=np.int32),
                                                     axis=1)
                label_class_strs = [obj_label.type for obj_label in obj_labels]

                # Get lwh average
                lwh_means = np.asarray([
                    obj_utils.get_mean_lwh_and_std_dev(class_str)[0]
                    for class_str in label_class_strs
                ])

                # Get proposal z centroid offset
                prop_cen_z_offset_list = np.asarray([
                    instance_utils.get_prop_cen_z_offset(class_str)
                    for class_str in label_class_strs
                ])

            else:
                raise ValueError('Invalid run mode', self.train_val_test)

            # Common inputs for all train_val_test modes
            # Normalize 2D boxes
            label_boxes_2d_norm = label_boxes_2d / np.tile(image_shape, 2)

            sample_dict = {
                constants.SAMPLE_NUM_OBJS:
                num_objs,
                constants.SAMPLE_IMAGE_INPUT:
                image_input,
                constants.SAMPLE_CAM_P:
                cam_p,
                constants.SAMPLE_NAME:
                sample_name,
                constants.SAMPLE_LABEL_BOXES_2D_NORM:
                label_boxes_2d_norm,
                constants.SAMPLE_LABEL_BOXES_2D:
                label_boxes_2d,
                constants.SAMPLE_LABEL_SCORES:
                label_scores,
                constants.SAMPLE_LABEL_CLASS_STRS:
                np.expand_dims(label_class_strs, 1),
                constants.SAMPLE_LABEL_CLASS_INDICES:
                label_class_indices,
                constants.SAMPLE_MEAN_LWH:
                lwh_means,
                constants.SAMPLE_PROP_CEN_Z_OFFSET:
                prop_cen_z_offset_list,
                constants.SAMPLE_VIEWING_ANGLES_2D:
                label_viewing_angles_2d,
            }

            if self.train_val_test in ['train', 'val']:

                sample_dict.update({
                    constants.SAMPLE_LABEL_BOXES_3D:
                    label_boxes_3d,
                    constants.SAMPLE_ALPHAS:
                    label_alphas,
                    constants.SAMPLE_ALPHA_BINS:
                    np.asarray(label_alpha_bins),
                    constants.SAMPLE_ALPHA_REGS:
                    np.asarray(label_alpha_regs),
                    constants.SAMPLE_ALPHA_VALID_BINS:
                    np.asarray(label_valid_alpha_bins),
                    constants.SAMPLE_VIEWING_ANGLES_3D:
                    label_viewing_angles_3d,
                    constants.SAMPLE_INSTANCE_MASKS:
                    instance_masks,
                    constants.SAMPLE_DEPTH_MAP:
                    depth_map,
                })

            elif self.train_val_test == 'test':
                # No additional labels for test mode
                pass

            sample_dicts.append(sample_dict)

        return sample_dicts
Example #7
0
    def get_clusters(self):
        """
        Calculates clusters for each class

        Returns:
            all_clusters: list of clusters for each class
            all_std_devs: list of cluster standard deviations for each class
        """

        classes = self._dataset.classes
        num_clusters = self._dataset.num_clusters

        all_clusters = [[] for _ in range(len(classes))]
        all_std_devs = [[] for _ in range(len(classes))]

        classes_not_loaded = []

        # Try to read from file first
        for class_idx in range(len(classes)):
            clusters, std_devs = self._read_clusters_from_file(
                self._dataset, classes[class_idx], num_clusters[class_idx])

            if clusters is not None:
                all_clusters[class_idx].extend(np.asarray(clusters))
                all_std_devs[class_idx].extend(np.asarray(std_devs))
            else:
                classes_not_loaded.append(class_idx)

        # Return the data flattened into N x 3 arrays
        if len(classes_not_loaded) == 0:
            return all_clusters, all_std_devs

        # Calculate the remaining clusters
        # Load labels corresponding to the sample list for clustering
        sample_list = self._dataset.load_sample_names(self.cluster_split)
        all_labels = [[] for _ in range(len(classes))]

        num_samples = len(sample_list)
        for sample_idx in range(num_samples):

            sys.stdout.write("\rClustering labels {} / {}".format(
                sample_idx + 1, num_samples))
            sys.stdout.flush()

            sample_name = sample_list[sample_idx]

            obj_labels = obj_utils.read_labels(self._dataset.kitti_label_dir,
                                               sample_name)
            filtered_labels = LabelClusterUtils._filter_labels_by_class(
                obj_labels, self._dataset.classes)

            for class_idx in range(len(classes)):
                all_labels[class_idx].extend(filtered_labels[class_idx])

        print("\nFinished reading labels, clustering data...\n")

        # Cluster
        for class_idx in classes_not_loaded:
            labels_for_class = np.array(all_labels[class_idx])

            n_clusters_for_class = num_clusters[class_idx]
            if len(labels_for_class) < n_clusters_for_class:
                raise ValueError(
                    "Number of samples is less than number of clusters "
                    "{} < {}".format(len(labels_for_class),
                                     n_clusters_for_class))

            k_means = KMeans(n_clusters=n_clusters_for_class,
                             random_state=0).fit(labels_for_class)

            clusters_for_class = []
            std_devs_for_class = []

            for cluster_idx in range(len(k_means.cluster_centers_)):
                cluster_centre = k_means.cluster_centers_[cluster_idx]

                labels_in_cluster = labels_for_class[k_means.labels_ ==
                                                     cluster_idx]

                # Calculate std. dev
                std_dev = np.std(labels_in_cluster, axis=0)

                formatted_cluster = [
                    float('%.3f' % value) for value in cluster_centre
                ]
                formatted_std_dev = [
                    float('%.3f' % value) for value in std_dev
                ]

                clusters_for_class.append(formatted_cluster)
                std_devs_for_class.append(formatted_std_dev)

            # Write to files
            file_path = self._get_cluster_file_path(self._dataset,
                                                    classes[class_idx],
                                                    num_clusters[class_idx])

            self._write_clusters_to_file(file_path, clusters_for_class,
                                         std_devs_for_class)

            # Add to full list
            all_clusters[class_idx].extend(np.asarray(clusters_for_class))
            all_std_devs[class_idx].extend(np.asarray(std_devs_for_class))

        # Return the data flattened into N x 3 arrays
        return all_clusters, all_std_devs