Exemple #1
0
def boxes_3d_project_to_image(boxes_3d, stereo_calib_p2):
    boxes_2d = []
    for box in boxes_3d:
        box2d = box_3d_projector.project_to_image_space(box, stereo_calib_p2)
        boxes_2d.append(box2d)
    boxes_2d = np.asarray(boxes_2d, dtype=np.float32)
    return boxes_2d
def main(checkpoint_name):
    """This demo shows RPN proposals and AVOD predictions in 3D
    and 2D in image space. Given certain thresholds for proposals
    and predictions, it selects and draws the bounding boxes on
    the image sample. It goes through the entire proposal and
    prediction samples for the given dataset split.

    The proposals, overlaid, and prediction images can be toggled on or off
    separately in the options section.
    The prediction score and IoU with ground truth can be toggled on or off
    as well, shown as (score, IoU) above the detection.
    """

    # checkpoint_name =='pyramid_cars_with_aug_example'
    dataset_config = DatasetBuilder.copy_config(DatasetBuilder.KITTI_VAL)

    ##############################
    # Options
    ##############################
    dataset_config.data_split = 'val'

    fig_size = (10, 6.1)

    rpn_score_threshold = 0.1
    avod_score_threshold = 0.002  #0.1 #0.009 # <== final threshold
    sample_names = [90] + list(range(
        98, 104)) + [138, 224, 270, 290, 310, 330, 520]

    # Convert to string with correct format (don't change)
    sample_names = ['{:06d}'.format(x) for x in sample_names]

    # gt_classes = ['Car']
    gt_classes = ['Pedestrian', 'Cyclist']
    # gt_classes = ['Car', 'Pedestrian', 'Cyclist']

    img_dir = '/home/jhuang/repo/avod/input'

    # Overwrite this to select a specific checkpoint
    global_step = None

    # Drawing Toggles
    draw_proposals_separate = False
    draw_overlaid = False
    draw_predictions_separate = True

    # Show orientation for both GT and proposals/predictions
    draw_orientations_on_prop = False
    draw_orientations_on_pred = False

    # Draw 2D bounding boxes
    draw_projected_2d_boxes = True

    # Save images for samples with no detections
    save_empty_images = True

    draw_score = True
    draw_iou = True
    ##############################
    # End of Options
    ##############################

    # Get the dataset
    dataset = None
    data_split = 'val'

    # Setup Paths
    predictions_dir = avod.root_dir() + \
        '/data/outputs/' + checkpoint_name + '/predictions'

    proposals_and_scores_dir = predictions_dir + \
        '/proposals_and_scores/' + data_split

    predictions_and_scores_dir = predictions_dir + \
        '/final_predictions_and_scores/' + data_split

    # Output images directories
    output_dir_base = predictions_dir + '/images_2d'

    # Get checkpoint step
    steps = os.listdir(proposals_and_scores_dir)
    steps.sort(key=int)
    print('Available steps: {}'.format(steps))

    # Use latest checkpoint if no index provided
    if global_step is None:
        global_step = steps[-1]

    if draw_proposals_separate:
        prop_out_dir = output_dir_base + '/proposals/{}/{}/{}'.format(
            data_split, global_step, rpn_score_threshold)

        if not os.path.exists(prop_out_dir):
            os.makedirs(prop_out_dir)

        print('Proposal images saved to:', prop_out_dir)

    if draw_overlaid:
        overlaid_out_dir = output_dir_base + '/overlaid/{}/{}/{}'.format(
            data_split, global_step, avod_score_threshold)

        if not os.path.exists(overlaid_out_dir):
            os.makedirs(overlaid_out_dir)

        print('Overlaid images saved to:', overlaid_out_dir)

    if draw_predictions_separate:
        pred_out_dir = output_dir_base + '/predictions/{}/{}/{}'.format(
            data_split, global_step, avod_score_threshold)

        if not os.path.exists(pred_out_dir):
            os.makedirs(pred_out_dir)

        print('Prediction images saved to:', pred_out_dir)

    # Rolling average array of times for time estimation
    avg_time_arr_length = 10
    last_times = np.repeat(time.time(), avg_time_arr_length) + \
        np.arange(avg_time_arr_length)

    num_samples = len(sample_names)
    for sample_idx in range(num_samples):
        # Estimate time remaining with 5 slowest times
        start_time = time.time()
        last_times = np.roll(last_times, -1)
        last_times[-1] = start_time
        avg_time = np.mean(np.sort(np.diff(last_times))[-5:])
        samples_remaining = num_samples - sample_idx
        est_time_left = avg_time * samples_remaining

        # Print progress and time remaining estimate
        sys.stdout.write('\rSaving {} / {}, Avg Time: {:.3f}s, '
                         'Time Remaining: {:.2f}s\n'.format(
                             sample_idx + 1, num_samples, avg_time,
                             est_time_left))
        sys.stdout.flush()

        sample_name = sample_names[sample_idx]
        img_idx = int(sample_name)

        ##############################
        # Predictions
        ##############################
        if draw_predictions_separate or draw_overlaid:
            predictions_file_path = predictions_and_scores_dir + \
                "/{}/{}.txt".format(global_step,
                                    sample_name)
            if not os.path.exists(predictions_file_path):
                continue

            # Load predictions from files
            predictions_and_scores = np.loadtxt(
                predictions_and_scores_dir +
                "/{}/{}.txt".format(global_step, sample_name))

            prediction_boxes_3d = predictions_and_scores[:, 0:7]
            prediction_scores = predictions_and_scores[:, 7]
            print("scores=", prediction_scores)
            prediction_class_indices = predictions_and_scores[:, 8]

            # process predictions only if we have any predictions left after
            # masking
            if len(prediction_boxes_3d) > 0:

                # Apply score mask
                avod_score_mask = prediction_scores >= avod_score_threshold
                prediction_boxes_3d = prediction_boxes_3d[avod_score_mask]
                print("len(prediction_boxes_3d)=", len(prediction_boxes_3d))
                prediction_scores = prediction_scores[avod_score_mask]
                prediction_class_indices = \
                    prediction_class_indices[avod_score_mask]

                # # Swap l, w for predictions where w > l
                # swapped_indices = \
                #     prediction_boxes_3d[:, 4] > prediction_boxes_3d[:, 3]
                # prediction_boxes_3d = np.copy(prediction_boxes_3d)
                # prediction_boxes_3d[swapped_indices, 3] = \
                #     prediction_boxes_3d[swapped_indices, 4]
                # prediction_boxes_3d[swapped_indices, 4] = \
                #     prediction_boxes_3d[swapped_indices, 3]

        ##############################
        # Ground Truth
        ##############################

        # Get ground truth labels
        gt_objects = []
        image_path = get_rgb_image_path(img_dir, img_idx, 'img_')
        image = Image.open(image_path)
        image_size = image.size

        # Read the stereo calibration matrix for visualization
        calib_dir = img_dir
        stereo_calib = load_calib(calib_dir, img_idx, 'calib.txt')
        calib_p2 = stereo_calib.p2

        if draw_overlaid or draw_predictions_separate:
            num_of_predictions = 0
            if len(prediction_boxes_3d) > 0:
                # Project the 3D box predictions to image space
                image_filter = []
                final_boxes_2d = []
                for i in range(len(prediction_boxes_3d)):
                    box_3d = prediction_boxes_3d[i, 0:7]
                    img_box = box_3d_projector.project_to_image_space(
                        box_3d,
                        calib_p2,
                        truncate=True,
                        image_size=image_size,
                        discard_before_truncation=False)
                    if img_box is not None:
                        image_filter.append(True)
                        final_boxes_2d.append(img_box)
                    else:
                        image_filter.append(False)
                final_boxes_2d = np.asarray(final_boxes_2d)
                final_prediction_boxes_3d = prediction_boxes_3d[image_filter]
                final_scores = prediction_scores[image_filter]
                final_class_indices = prediction_class_indices[image_filter]

                num_of_predictions = final_boxes_2d.shape[0]

                # Convert to objs
                final_prediction_objs = \
                    [box_3d_encoder.box_3d_to_object_label(
                        prediction, obj_type='Prediction')
                        for prediction in final_prediction_boxes_3d]
                for (obj, score) in zip(final_prediction_objs, final_scores):
                    obj.score = score
            # else:
            #     if save_empty_images:
            #         pred_fig, pred_2d_axes, pred_3d_axes = \
            #             vis_utils.visualization(dataset.rgb_image_dir,
            #                                     img_idx,
            #                                     display=False,
            #                                     fig_size=fig_size)
            #         filename = pred_out_dir + '/' + sample_name + '.png'
            #         plt.savefig(filename)
            #         plt.close(pred_fig)
            #     continue

            if draw_predictions_separate and num_of_predictions > 0:
                # Now only draw prediction boxes on images
                # on a new figure handler
                if draw_projected_2d_boxes:
                    pred_fig, pred_2d_axes, pred_3d_axes = \
                        vis_utils.visualization_jhuang(img_dir,
                                                img_idx, 'img_',
                                                display=False,
                                                fig_size=fig_size)

                    draw_predictions([], calib_p2, num_of_predictions,
                                     final_prediction_objs,
                                     final_class_indices, final_boxes_2d,
                                     pred_2d_axes, pred_3d_axes, draw_score,
                                     draw_iou, gt_classes,
                                     draw_orientations_on_pred)
                else:
                    pred_fig, pred_3d_axes = \
                        vis_utils.visualize_single_plot(
                            dataset.rgb_image_dir, img_idx, display=False)

                    draw_3d_predictions([], calib_p2, num_of_predictions,
                                        final_prediction_objs,
                                        final_class_indices, final_boxes_2d,
                                        pred_3d_axes, draw_score, draw_iou,
                                        gt_classes, draw_orientations_on_pred)
                filename = pred_out_dir + '/' + sample_name + '.png'
                plt.savefig(filename)
                plt.close(pred_fig)

    print('\nDone')
def main():
    """This demo shows RPN proposals and AVOD predictions in 3D
    and 2D in image space. Given certain thresholds for proposals
    and predictions, it selects and draws the bounding boxes on
    the image sample. It goes through the entire proposal and
    prediction samples for the given dataset split.

    The proposals, overlaid, and prediction images can be toggled on or off
    separately in the options section.
    The prediction score and IoU with ground truth can be toggled on or off
    as well, shown as (score, IoU) above the detection.
    """
    dataset_config = DatasetBuilder.copy_config(DatasetBuilder.KITTI_VAL)

    ##############################
    # Options
    ##############################
    dataset_config.data_split = 'val'

    fig_size = (10, 6.1)

    rpn_score_threshold = 0.1
    avod_score_threshold = 0.1

    # gt_classes = ['Car']
    gt_classes = ['Pedestrian', 'Cyclist']
    # gt_classes = ['Car', 'Pedestrian', 'Cyclist']

    # Overwrite this to select a specific checkpoint
    global_step = None
    checkpoint_name = sys.argv[1]  #'pyramid_cars_with_aug_example'

    # Drawing Toggles
    draw_proposals_separate = False
    draw_overlaid = False
    draw_predictions_separate = True

    # Show orientation for both GT and proposals/predictions
    draw_orientations_on_prop = False
    draw_orientations_on_pred = False

    # Draw 2D bounding boxes
    draw_projected_2d_boxes = True

    # Save images for samples with no detections
    save_empty_images = True

    draw_score = True
    draw_iou = True
    ##############################
    # End of Options
    ##############################

    # Get the dataset
    dataset = DatasetBuilder.build_kitti_dataset(dataset_config)

    # Setup Paths
    predictions_dir = avod.root_dir() + \
        '/data/outputs/' + checkpoint_name + '/predictions'

    proposals_and_scores_dir = predictions_dir + \
        '/proposals_and_scores/' + dataset.data_split

    predictions_and_scores_dir = predictions_dir + \
        '/final_predictions_and_scores/' + dataset.data_split

    # Output images directories
    output_dir_base = predictions_dir + '/images_2d'

    # Get checkpoint step
    steps = os.listdir(proposals_and_scores_dir)
    steps.sort(key=int)
    print('Available steps: {}'.format(steps))

    # Use latest checkpoint if no index provided
    if global_step is None:
        global_step = steps[-1]

    if draw_proposals_separate:
        prop_out_dir = output_dir_base + '/proposals/{}/{}/{}'.format(
            dataset.data_split, global_step, rpn_score_threshold)

        if not os.path.exists(prop_out_dir):
            os.makedirs(prop_out_dir)

        print('Proposal images saved to:', prop_out_dir)

    if draw_overlaid:
        overlaid_out_dir = output_dir_base + '/overlaid/{}/{}/{}'.format(
            dataset.data_split, global_step, avod_score_threshold)

        if not os.path.exists(overlaid_out_dir):
            os.makedirs(overlaid_out_dir)

        print('Overlaid images saved to:', overlaid_out_dir)

    if draw_predictions_separate:
        pred_out_dir = output_dir_base + '/predictions/{}/{}/{}'.format(
            dataset.data_split, global_step, avod_score_threshold)

        if not os.path.exists(pred_out_dir):
            os.makedirs(pred_out_dir)

        print('Prediction images saved to:', pred_out_dir)

    # Rolling average array of times for time estimation
    avg_time_arr_length = 10
    last_times = np.repeat(time.time(), avg_time_arr_length) + \
        np.arange(avg_time_arr_length)

    for sample_idx in range(dataset.num_samples):
        # Estimate time remaining with 5 slowest times
        start_time = time.time()
        last_times = np.roll(last_times, -1)
        last_times[-1] = start_time
        avg_time = np.mean(np.sort(np.diff(last_times))[-5:])
        samples_remaining = dataset.num_samples - sample_idx
        est_time_left = avg_time * samples_remaining

        # Print progress and time remaining estimate
        sys.stdout.write('\rSaving {} / {}, Avg Time: {:.3f}s, '
                         'Time Remaining: {:.2f}s'.format(
                             sample_idx + 1, dataset.num_samples, avg_time,
                             est_time_left))
        sys.stdout.flush()

        sample_name = dataset.sample_names[sample_idx]
        img_idx = int(sample_name)

        ##############################
        # Proposals
        ##############################
        if draw_proposals_separate or draw_overlaid:
            # Load proposals from files
            proposals_file_path = proposals_and_scores_dir + \
                "/{}/{}.txt".format(global_step, sample_name)
            if not os.path.exists(proposals_file_path):
                print('Sample {}: No proposals, skipping'.format(sample_name))
                continue
            print('Sample {}: Drawing proposals'.format(sample_name))

            proposals_and_scores = np.loadtxt(proposals_file_path)

            proposal_boxes_3d = proposals_and_scores[:, 0:7]
            proposal_scores = proposals_and_scores[:, 7]

            # Apply score mask to proposals
            score_mask = proposal_scores > rpn_score_threshold
            proposal_boxes_3d = proposal_boxes_3d[score_mask]
            proposal_scores = proposal_scores[score_mask]

            proposal_objs = \
                [box_3d_encoder.box_3d_to_object_label(proposal,
                                                       obj_type='Proposal')
                 for proposal in proposal_boxes_3d]

        ##############################
        # Predictions
        ##############################
        if draw_predictions_separate or draw_overlaid:
            predictions_file_path = predictions_and_scores_dir + \
                "/{}/{}.txt".format(global_step,
                                    sample_name)
            if not os.path.exists(predictions_file_path):
                continue

            # Load predictions from files
            predictions_and_scores = np.loadtxt(
                predictions_and_scores_dir +
                "/{}/{}.txt".format(global_step, sample_name))

            prediction_boxes_3d = predictions_and_scores[:, 0:7]
            prediction_scores = predictions_and_scores[:, 7]
            prediction_class_indices = predictions_and_scores[:, 8]

            # process predictions only if we have any predictions left after
            # masking
            if len(prediction_boxes_3d) > 0:

                # Apply score mask
                avod_score_mask = prediction_scores >= avod_score_threshold
                prediction_boxes_3d = prediction_boxes_3d[avod_score_mask]
                prediction_scores = prediction_scores[avod_score_mask]
                prediction_class_indices = \
                    prediction_class_indices[avod_score_mask]

                # # Swap l, w for predictions where w > l
                # swapped_indices = \
                #     prediction_boxes_3d[:, 4] > prediction_boxes_3d[:, 3]
                # prediction_boxes_3d = np.copy(prediction_boxes_3d)
                # prediction_boxes_3d[swapped_indices, 3] = \
                #     prediction_boxes_3d[swapped_indices, 4]
                # prediction_boxes_3d[swapped_indices, 4] = \
                #     prediction_boxes_3d[swapped_indices, 3]

        ##############################
        # Ground Truth
        ##############################

        # Get ground truth labels
        if dataset.has_labels:
            gt_objects = obj_utils.read_labels(dataset.label_dir, img_idx)
        else:
            gt_objects = []

        # Filter objects to desired difficulty
        filtered_gt_objs = dataset.kitti_utils.filter_labels(
            gt_objects, classes=gt_classes)

        boxes2d, _, _ = obj_utils.build_bbs_from_objects(
            filtered_gt_objs, class_needed=gt_classes)

        image_path = dataset.get_rgb_image_path(sample_name)
        image = Image.open(image_path)
        image_size = image.size

        # Read the stereo calibration matrix for visualization
        stereo_calib = calib_utils.read_calibration(dataset.calib_dir, img_idx)
        calib_p2 = stereo_calib.p2

        ##############################
        # Reformat and prepare to draw
        ##############################
        if draw_proposals_separate or draw_overlaid:
            proposals_as_anchors = box_3d_encoder.box_3d_to_anchor(
                proposal_boxes_3d)

            proposal_boxes, _ = anchor_projector.project_to_image_space(
                proposals_as_anchors, calib_p2, image_size)

            num_of_proposals = proposal_boxes_3d.shape[0]

            prop_fig, prop_2d_axes, prop_3d_axes = \
                vis_utils.visualization(dataset.rgb_image_dir,
                                        img_idx,
                                        display=False)

            draw_proposals(filtered_gt_objs, calib_p2, num_of_proposals,
                           proposal_objs, proposal_boxes, prop_2d_axes,
                           prop_3d_axes, draw_orientations_on_prop)

            if draw_proposals_separate:
                # Save just the proposals
                filename = prop_out_dir + '/' + sample_name + '.png'
                plt.savefig(filename)

                if not draw_overlaid:
                    plt.close(prop_fig)

        if draw_overlaid or draw_predictions_separate:
            if len(prediction_boxes_3d) > 0:
                # Project the 3D box predictions to image space
                image_filter = []
                final_boxes_2d = []
                for i in range(len(prediction_boxes_3d)):
                    box_3d = prediction_boxes_3d[i, 0:7]
                    img_box = box_3d_projector.project_to_image_space(
                        box_3d,
                        calib_p2,
                        truncate=True,
                        image_size=image_size,
                        discard_before_truncation=False)
                    if img_box is not None:
                        image_filter.append(True)
                        final_boxes_2d.append(img_box)
                    else:
                        image_filter.append(False)
                final_boxes_2d = np.asarray(final_boxes_2d)
                final_prediction_boxes_3d = prediction_boxes_3d[image_filter]
                final_scores = prediction_scores[image_filter]
                final_class_indices = prediction_class_indices[image_filter]

                num_of_predictions = final_boxes_2d.shape[0]

                # Convert to objs
                final_prediction_objs = \
                    [box_3d_encoder.box_3d_to_object_label(
                        prediction, obj_type='Prediction')
                        for prediction in final_prediction_boxes_3d]
                for (obj, score) in zip(final_prediction_objs, final_scores):
                    obj.score = score
            else:
                if save_empty_images:
                    pred_fig, pred_2d_axes, pred_3d_axes = \
                        vis_utils.visualization(dataset.rgb_image_dir,
                                                img_idx,
                                                display=False,
                                                fig_size=fig_size)
                    filename = pred_out_dir + '/' + sample_name + '.png'
                    plt.savefig(filename)
                    plt.close(pred_fig)
                continue

            if draw_overlaid:
                # Overlay prediction boxes on image
                draw_predictions(filtered_gt_objs, calib_p2,
                                 num_of_predictions, final_prediction_objs,
                                 final_class_indices, final_boxes_2d,
                                 prop_2d_axes, prop_3d_axes, draw_score,
                                 draw_iou, gt_classes,
                                 draw_orientations_on_pred)
                filename = overlaid_out_dir + '/' + sample_name + '.png'
                plt.savefig(filename)

                plt.close(prop_fig)

            if draw_predictions_separate:
                # Now only draw prediction boxes on images
                # on a new figure handler
                if draw_projected_2d_boxes:
                    pred_fig, pred_2d_axes, pred_3d_axes = \
                        vis_utils.visualization(dataset.rgb_image_dir,
                                                img_idx,
                                                display=False,
                                                fig_size=fig_size)

                    draw_predictions(filtered_gt_objs, calib_p2,
                                     num_of_predictions, final_prediction_objs,
                                     final_class_indices, final_boxes_2d,
                                     pred_2d_axes, pred_3d_axes, draw_score,
                                     draw_iou, gt_classes,
                                     draw_orientations_on_pred)
                else:
                    pred_fig, pred_3d_axes = \
                        vis_utils.visualize_single_plot(
                            dataset.rgb_image_dir, img_idx, display=False)

                    draw_3d_predictions(filtered_gt_objs, calib_p2,
                                        num_of_predictions,
                                        final_prediction_objs,
                                        final_class_indices, final_boxes_2d,
                                        pred_3d_axes, draw_score, draw_iou,
                                        gt_classes, draw_orientations_on_pred)
                filename = pred_out_dir + '/' + sample_name + '.png'
                plt.savefig(filename)
                plt.close(pred_fig)

    print('\nDone')
Exemple #4
0
def main():
    """ Converts a set of network predictions into text files required for
    KITTI evaluation.
    """

    ##############################
    # Options
    ##############################
    checkpoint_name = 'fpn_people_dual_NHSP_train'#'fpn_people_dual_SHPL_train'#'pyramid_people_with_NHSP_example_train'#'pyramid_people_with_NHSP_example'

    data_split = 'val'#'test'#'val'

    global_steps = None
    # global_steps = [28000, 19000, 33000, 34000]

    score_threshold = 0.1

    save_2d = False  # Save 2D predictions
    save_3d = True   # Save 2D and 3D predictions together
    save_alphas = True  # Save alphas (observation angles)

    # Checkpoints below this are skipped
    min_step = 20000

    ##############################
    # End of Options
    ##############################

    # Parse experiment config
    pipeline_config_file = \
        avod.root_dir() + '/data/outputs/' + checkpoint_name + \
        '/' + checkpoint_name + '.config'
    _, _, _, dataset_config = \
        config_builder_util.get_configs_from_pipeline_file(
            pipeline_config_file, is_training=False)

    # Overwrite defaults
    dataset_config = config_builder_util.proto_to_obj(dataset_config)
    dataset_config.data_split = data_split
    dataset_config.aug_list = []

    if data_split == 'test':
        dataset_config.data_split_dir = 'testing'

    dataset = DatasetBuilder.build_kitti_dataset(dataset_config,
                                                 use_defaults=False)

    # Get available prediction folders
    predictions_root_dir = avod.root_dir() + '/data/outputs/' + \
        checkpoint_name + '/predictions'

    final_predictions_root_dir = predictions_root_dir + \
        '/final_predictions_and_scores/' + dataset.data_split

    print('Converting detections from', final_predictions_root_dir)

    if not global_steps:
        global_steps = os.listdir(final_predictions_root_dir)
        global_steps.sort(key=int)
        print('Checkpoints found ', global_steps)

    for step_idx in range(len(global_steps)):

        global_step = global_steps[step_idx]

        # Skip first checkpoint
        if int(global_step) < min_step:
            continue

        final_predictions_dir = final_predictions_root_dir + \
            '/' + str(global_step)

        # 2D and 3D prediction directories
        kitti_predictions_2d_dir = predictions_root_dir + \
            '/kitti_predictions_2d/' + \
            dataset.data_split + '/' + \
            str(score_threshold) + '/' + \
            str(global_step) + '/data'
        kitti_predictions_3d_dir = predictions_root_dir + \
            '/kitti_predictions_3d/' + \
            dataset.data_split + '/' + \
            str(score_threshold) + '/' + \
            str(global_step) + '/data'

        if save_2d and not os.path.exists(kitti_predictions_2d_dir):
            os.makedirs(kitti_predictions_2d_dir)
        if save_3d and not os.path.exists(kitti_predictions_3d_dir):
            os.makedirs(kitti_predictions_3d_dir)

        # Do conversion
        num_samples = dataset.num_samples
        num_valid_samples = 0

        print('\nGlobal step:', global_step)
        print('Converting detections from:', final_predictions_dir)

        if save_2d:
            print('2D Detections saved to:', kitti_predictions_2d_dir)
        if save_3d:
            print('3D Detections saved to:', kitti_predictions_3d_dir)

        for sample_idx in range(num_samples):

            # Print progress
            sys.stdout.write('\rConverting {} / {}'.format(
                sample_idx + 1, num_samples))
            sys.stdout.flush()

            sample_name = dataset.sample_names[sample_idx]

            prediction_file = sample_name + '.txt'

            kitti_predictions_2d_file_path = kitti_predictions_2d_dir + \
                '/' + prediction_file
            kitti_predictions_3d_file_path = kitti_predictions_3d_dir + \
                '/' + prediction_file

            predictions_file_path = final_predictions_dir + \
                '/' + prediction_file

            # If no predictions, skip to next file
            if not os.path.exists(predictions_file_path):
                if save_2d:
                    np.savetxt(kitti_predictions_2d_file_path, [])
                if save_3d:
                    np.savetxt(kitti_predictions_3d_file_path, [])
                continue

            all_predictions = np.loadtxt(predictions_file_path)

            # # Swap l, w for predictions where w > l
            # swapped_indices = all_predictions[:, 4] > all_predictions[:, 3]
            # fixed_predictions = np.copy(all_predictions)
            # fixed_predictions[swapped_indices, 3] = all_predictions[
            #     swapped_indices, 4]
            # fixed_predictions[swapped_indices, 4] = all_predictions[
            #     swapped_indices, 3]

            score_filter = all_predictions[:, 7] >= score_threshold
            all_predictions = all_predictions[score_filter]

            # If no predictions, skip to next file
            if len(all_predictions) == 0:
                if save_2d:
                    np.savetxt(kitti_predictions_2d_file_path, [])
                if save_3d:
                    np.savetxt(kitti_predictions_3d_file_path, [])
                continue

            # Project to image space
            sample_name = prediction_file.split('.')[0]
            img_idx = int(sample_name)

            # Load image for truncation
            image = Image.open(dataset.get_rgb_image_path(sample_name))

            stereo_calib_p2 = calib_utils.read_calibration(dataset.calib_dir,
                                                           img_idx).p2

            boxes = []
            image_filter = []
            for i in range(len(all_predictions)):
                box_3d = all_predictions[i, 0:7]
                img_box = box_3d_projector.project_to_image_space(
                    box_3d, stereo_calib_p2,
                    truncate=True, image_size=image.size)

                # Skip invalid boxes (outside image space)
                if img_box is None:
                    image_filter.append(False)
                else:
                    image_filter.append(True)
                    boxes.append(img_box)

            boxes = np.asarray(boxes)
            all_predictions = all_predictions[image_filter]

            # If no predictions, skip to next file
            if len(boxes) == 0:
                if save_2d:
                    np.savetxt(kitti_predictions_2d_file_path, [])
                if save_3d:
                    np.savetxt(kitti_predictions_3d_file_path, [])
                continue

            num_valid_samples += 1

            # To keep each value in its appropriate position, an array of zeros
            # (N, 16) is allocated but only values [4:16] are used
            kitti_predictions = np.zeros([len(boxes), 16])

            # Get object types
            all_pred_classes = all_predictions[:, 8].astype(np.int32)
            obj_types = [dataset.classes[class_idx]
                         for class_idx in all_pred_classes]

            # Truncation and Occlusion are always empty (see below)

            # Alpha
            if not save_alphas:
                kitti_predictions[:, 3] = -10 * \
                    np.ones((len(kitti_predictions)), dtype=np.int32)
            else:
                alphas = all_predictions[:, 6] - \
                    np.arctan2(all_predictions[:, 0], all_predictions[:, 2])
                kitti_predictions[:, 3] = alphas

            # 2D predictions
            kitti_predictions[:, 4:8] = boxes[:, 0:4]

            # 3D predictions
            # (l, w, h)
            kitti_predictions[:, 8] = all_predictions[:, 5]
            kitti_predictions[:, 9] = all_predictions[:, 4]
            kitti_predictions[:, 10] = all_predictions[:, 3]
            # (x, y, z)
            kitti_predictions[:, 11:14] = all_predictions[:, 0:3]
            # (ry, score)
            kitti_predictions[:, 14:16] = all_predictions[:, 6:8]

            # Round detections to 3 decimal places
            kitti_predictions = np.round(kitti_predictions, 3)

            # Empty Truncation, Occlusion
            kitti_empty_1 = -1 * np.ones((len(kitti_predictions), 2),
                                         dtype=np.int32)
            # Empty 3D (x, y, z)
            kitti_empty_2 = -1 * np.ones((len(kitti_predictions), 3),
                                         dtype=np.int32)
            # Empty 3D (h, w, l)
            kitti_empty_3 = -1000 * np.ones((len(kitti_predictions), 3),
                                            dtype=np.int32)
            # Empty 3D (ry)
            kitti_empty_4 = -10 * np.ones((len(kitti_predictions), 1),
                                          dtype=np.int32)

            # Stack 2D predictions text
            kitti_text_2d = np.column_stack([obj_types,
                                             kitti_empty_1,
                                             kitti_predictions[:, 3:8],
                                             kitti_empty_2,
                                             kitti_empty_3,
                                             kitti_empty_4,
                                             kitti_predictions[:, 15]])

            # Stack 3D predictions text
            kitti_text_3d = np.column_stack([obj_types,
                                             kitti_empty_1,
                                             kitti_predictions[:, 3:16]])

            # Save to text files
            if save_2d:
                np.savetxt(kitti_predictions_2d_file_path, kitti_text_2d,
                           newline='\r\n', fmt='%s')
            if save_3d:
                np.savetxt(kitti_predictions_3d_file_path, kitti_text_3d,
                           newline='\r\n', fmt='%s')

        print('\nNum valid:', num_valid_samples)
        print('Num samples:', num_samples)
Exemple #5
0
def save_predictions_in_kitti_format(model,
                                     checkpoint_name,
                                     data_split,
                                     score_threshold,
                                     global_step):
    """ Converts a set of network predictions into text files required for
    KITTI evaluation.
    """

    dataset = model.dataset
    # Round this because protobuf encodes default values as full decimal
    score_threshold = round(score_threshold, 3)

    # Get available prediction folders
    predictions_root_dir = avod.root_dir() + '/data/outputs/' + \
        checkpoint_name + '/predictions'


    final_predictions_root_dir = predictions_root_dir + \
        '/final_predictions_and_scores/' + dataset.data_split

    final_predictions_dir = final_predictions_root_dir + \
        '/' + str(global_step)

    # 3D prediction directories
    kitti_predictions_3d_dir = predictions_root_dir + \
        '/kitti_native_eval/' + \
        str(score_threshold) + '/' + \
        str(global_step) + '/data'

    if not os.path.exists(kitti_predictions_3d_dir):
        os.makedirs(kitti_predictions_3d_dir)

    # Do conversion
    num_samples = dataset.num_samples
    num_valid_samples = 0

    print('\nGlobal step:', global_step)
    print('Converting detections from:', final_predictions_dir)

    print('3D Detections being saved to:', kitti_predictions_3d_dir)

    for sample_idx in range(num_samples):

        # Print progress
        sys.stdout.write('\rConverting {} / {}'.format(
            sample_idx + 1, num_samples))
        sys.stdout.flush()

        sample_name = dataset.sample_names[sample_idx]

        prediction_file = sample_name + '.txt'

        kitti_predictions_3d_file_path = kitti_predictions_3d_dir + \
            '/' + prediction_file

        predictions_file_path = final_predictions_dir + \
            '/' + prediction_file

        # If no predictions, skip to next file
        if not os.path.exists(predictions_file_path):
            np.savetxt(kitti_predictions_3d_file_path, [])
            continue

        all_predictions = np.loadtxt(predictions_file_path)

        # # Swap l, w for predictions where w > l
        # swapped_indices = all_predictions[:, 4] > all_predictions[:, 3]
        # fixed_predictions = np.copy(all_predictions)
        # fixed_predictions[swapped_indices, 3] = all_predictions[
        #     swapped_indices, 4]
        # fixed_predictions[swapped_indices, 4] = all_predictions[
        #     swapped_indices, 3]

        score_filter = all_predictions[:, 7] >= score_threshold
        all_predictions = all_predictions[score_filter]

        # If no predictions, skip to next file
        if len(all_predictions) == 0:
            np.savetxt(kitti_predictions_3d_file_path, [])
            continue

        # Project to image space
        sample_name = prediction_file.split('.')[0]
        img_idx = int(sample_name)

        # Load image for truncation
        image = Image.open(dataset.get_rgb_image_path(sample_name))

        #stereo_calib_p2 = calib_utils.read_calibration(dataset.calib_dir,
                                                       #img_idx).p2

        # Get calibration
        calib = moose_load_calibration.load_calibration(dataset.calib_dir)

        if img_idx < 100:

            T_IMG_CAM = np.eye(4);  # identity matrix
            T_IMG_CAM[0:3, 0:3] = np.array(calib['CAM00']['camera_matrix']['data']).reshape(-1,
                                                                                            3)  # camera to image #intrinsic matrix

            # T_IMG_CAM : 4 x 4 matrix
            T_IMG_CAM = T_IMG_CAM[0:3, 0:4];  # remove last row, #choose the first 3 rows and get rid of the last column

            stereo_calib_p2 = T_IMG_CAM

        elif (img_idx >= 100 and img_idx < 200):
            T_IMG_CAM = np.eye(4);  # identity matrix
            T_IMG_CAM[0:3, 0:3] = np.array(calib['CAM01']['camera_matrix']['data']).reshape(-1,
                                                                                            3)  # camera to image #intrinsic matrix

            # T_IMG_CAM : 4 x 4 matrix
            T_IMG_CAM = T_IMG_CAM[0:3, 0:4];  # remove last row, #choose the first 3 rows and get rid of the last column

            stereo_calib_p2 = T_IMG_CAM

        elif (img_idx >= 200 and img_idx < 300):
            T_IMG_CAM = np.eye(4);  # identity matrix
            T_IMG_CAM[0:3, 0:3] = np.array(calib['CAM02']['camera_matrix']['data']).reshape(-1,
                                                                                            3)  # camera to image #intrinsic matrix

            # T_IMG_CAM : 4 x 4 matrix
            T_IMG_CAM = T_IMG_CAM[0:3, 0:4];  # remove last row, #choose the first 3 rows and get rid of the last column

            stereo_calib_p2 = T_IMG_CAM

        elif (img_idx >= 300 and img_idx < 400):
            T_IMG_CAM = np.eye(4);  # identity matrix
            T_IMG_CAM[0:3, 0:3] = np.array(calib['CAM03']['camera_matrix']['data']).reshape(-1,
                                                                                            3)  # camera to image #intrinsic matrix

            # T_IMG_CAM : 4 x 4 matrix
            T_IMG_CAM = T_IMG_CAM[0:3, 0:4];  # remove last row, #choose the first 3 rows and get rid of the last column

            stereo_calib_p2 = T_IMG_CAM

        elif (img_idx >= 400 and img_idx < 500):
            T_IMG_CAM = np.eye(4);  # identity matrix
            T_IMG_CAM[0:3, 0:3] = np.array(calib['CAM04']['camera_matrix']['data']).reshape(-1,
                                                                                            3)  # camera to image #intrinsic matrix

            # T_IMG_CAM : 4 x 4 matrix
            T_IMG_CAM = T_IMG_CAM[0:3, 0:4];  # remove last row, #choose the first 3 rows and get rid of the last column

            stereo_calib_p2 = T_IMG_CAM

        elif (img_idx >= 500 and img_idx < 600):
            T_IMG_CAM = np.eye(4);  # identity matrix
            T_IMG_CAM[0:3, 0:3] = np.array(calib['CAM05']['camera_matrix']['data']).reshape(-1,
                                                                                            3)  # camera to image #intrinsic matrix

            # T_IMG_CAM : 4 x 4 matrix
            T_IMG_CAM = T_IMG_CAM[0:3, 0:4];  # remove last row, #choose the first 3 rows and get rid of the last column

            stereo_calib_p2 = T_IMG_CAM

        elif (img_idx >= 600 and img_idx < 700):
            T_IMG_CAM = np.eye(4);  # identity matrix
            T_IMG_CAM[0:3, 0:3] = np.array(calib['CAM06']['camera_matrix']['data']).reshape(-1,
                                                                                            3)  # camera to image #intrinsic matrix

            # T_IMG_CAM : 4 x 4 matrix
            T_IMG_CAM = T_IMG_CAM[0:3, 0:4];  # remove last row, #choose the first 3 rows and get rid of the last column

            stereo_calib_p2 = T_IMG_CAM

        elif (img_idx >= 700 and img_idx < 800):
            T_IMG_CAM = np.eye(4);  # identity matrix
            T_IMG_CAM[0:3, 0:3] = np.array(calib['CAM07']['camera_matrix']['data']).reshape(-1,
                                                                                            3)  # camera to image #intrinsic matrix

            # T_IMG_CAM : 4 x 4 matrix
            T_IMG_CAM = T_IMG_CAM[0:3, 0:4];  # remove last row, #choose the first 3 rows and get rid of the last column

            stereo_calib_p2 = T_IMG_CAM

        else:
            print("YOLO")

        boxes = []
        image_filter = []
        for i in range(len(all_predictions)):
            box_3d = all_predictions[i, 0:7]
            img_box = box_3d_projector.project_to_image_space(
                box_3d, stereo_calib_p2,
                truncate=True, image_size=image.size)

            # Skip invalid boxes (outside image space)
            if img_box is None:
                image_filter.append(False)
                continue

            image_filter.append(True)
            boxes.append(img_box)

        boxes = np.asarray(boxes)
        all_predictions = all_predictions[image_filter]

        # If no predictions, skip to next file
        if len(boxes) == 0:
            np.savetxt(kitti_predictions_3d_file_path, [])
            continue

        num_valid_samples += 1

        # To keep each value in its appropriate position, an array of zeros
        # (N, 16) is allocated but only values [4:16] are used
        kitti_predictions = np.zeros([len(boxes), 16])

        # Get object types
        all_pred_classes = all_predictions[:, 8].astype(np.int32)
        obj_types = [dataset.classes[class_idx]
                     for class_idx in all_pred_classes]

        # Truncation and Occlusion are always empty (see below)

        # Alpha (Not computed)
        kitti_predictions[:, 3] = -10 * np.ones((len(kitti_predictions)),
                                                dtype=np.int32)

        # 2D predictions
        kitti_predictions[:, 4:8] = boxes[:, 0:4]

        # 3D predictions
        # (l, w, h)
        kitti_predictions[:, 8] = all_predictions[:, 5]
        kitti_predictions[:, 9] = all_predictions[:, 4]
        kitti_predictions[:, 10] = all_predictions[:, 3]
        # (x, y, z)
        kitti_predictions[:, 11:14] = all_predictions[:, 0:3]
        # (ry, score)
        kitti_predictions[:, 14:16] = all_predictions[:, 6:8]

        # Round detections to 3 decimal places
        kitti_predictions = np.round(kitti_predictions, 3)

        # Empty Truncation, Occlusion
        kitti_empty_1 = -1 * np.ones((len(kitti_predictions), 2),
                                     dtype=np.int32)

        # Stack 3D predictions text
        kitti_text_3d = np.column_stack([obj_types,
                                         kitti_empty_1,
                                         kitti_predictions[:, 3:16]])

        # Save to text files
        np.savetxt(kitti_predictions_3d_file_path, kitti_text_3d,
                   newline='\r\n', fmt='%s')

    print('\nNum valid:', num_valid_samples)
    print('Num samples:', num_samples)
Exemple #6
0
def test(model_config, eval_config,
              dataset_config, data_split,
              ckpt_indices):

    # Overwrite the defaults
    dataset_config = config_builder.proto_to_obj(dataset_config)

    dataset_config.data_split = data_split
    dataset_config.data_split_dir = 'training'
    if data_split == 'test':
        dataset_config.data_split_dir = 'testing'

    eval_config.eval_mode = 'test'
    eval_config.evaluate_repeatedly = False

    dataset_config.has_labels = False
    # Enable this to see the actually memory being used
    eval_config.allow_gpu_mem_growth = True

    eval_config = config_builder.proto_to_obj(eval_config)
    # Grab the checkpoint indices to evaluate
    eval_config.ckpt_indices = ckpt_indices

    # Remove augmentation during evaluation in test mode
    dataset_config.aug_list = []

    # Build the dataset object
    dataset = DatasetBuilder.build_kitti_dataset(dataset_config,
                                                 use_defaults=False)

    # Setup the model
    model_name = model_config.model_name
    # Overwrite repeated field
    model_config = config_builder.proto_to_obj(model_config)
    # Switch path drop off during evaluation
    model_config.path_drop_probabilities = [1.0, 1.0]

    with tf.Graph().as_default():
        if model_name == 'avod_model':
            model = AvodModel(model_config,
                              train_val_test=eval_config.eval_mode,
                              dataset=dataset)
        elif model_name == 'rpn_model':
            model = RpnModel(model_config,
                             train_val_test=eval_config.eval_mode,
                             dataset=dataset)
        else:
            raise ValueError('Invalid model name {}'.format(model_name))

        #model_evaluator = Evaluator(model, dataset_config, eval_config)
        #model_evaluator.run_latest_checkpoints()

        # Create a variable tensor to hold the global step
        global_step_tensor = tf.Variable(0, trainable=False, name='global_step')

        allow_gpu_mem_growth = eval_config.allow_gpu_mem_growth
        if allow_gpu_mem_growth:
            # GPU memory config
            config = tf.ConfigProto()
            config.gpu_options.allow_growth = allow_gpu_mem_growth
            _sess = tf.Session(config=config)
        else:
            _sess = tf.Session()

        _prediction_dict = model.build()
        _saver = tf.train.Saver()

        trainer_utils.load_checkpoints(model_config.paths_config.checkpoint_dir,
                                       _saver)
        num_checkpoints = len(_saver.last_checkpoints)
        print("test:",num_checkpoints)
        checkpoint_to_restore = _saver.last_checkpoints[num_checkpoints-1]

        _saver.restore(_sess, checkpoint_to_restore)

        num_samples = model.dataset.num_samples
        num_valid_samples = 0

        current_epoch = model.dataset.epochs_completed
        while current_epoch == model.dataset.epochs_completed:
            # Keep track of feed_dict speed
            start_time = time.time()
            feed_dict = model.create_feed_dict()
            feed_dict_time = time.time() - start_time

            # Get sample name from model
            sample_name = model.sample_info['sample_name']

            num_valid_samples += 1
            print("Step: {} / {}, Inference on sample {}".format(
                num_valid_samples, num_samples,
                sample_name))

            print("test mode")
            inference_start_time = time.time()
            # Don't calculate loss or run summaries for test
            predictions = _sess.run(_prediction_dict,
                                         feed_dict=feed_dict)
            inference_time = time.time() - inference_start_time

            print("inference time:", inference_time)

            predictions_and_scores = get_avod_predicted_boxes_3d_and_scores(predictions)

            #print(predictions_and_scores)
            #im_path = os.path.join(dataset_dir, 'training/image_2/{:06d}.png'.format(img_idx))
            #im = cv2.imread(im_path)
            #cv2.imshow('result',im)
            #cv2.waitKey(30)

            prediction_boxes_3d = predictions_and_scores[:, 0:7]
            prediction_scores = predictions_and_scores[:, 7]
            prediction_class_indices = predictions_and_scores[:, 8]
            gt_classes = ['Car']
            fig_size = (10, 6.1)

            avod_score_threshold = 0.1
            if len(prediction_boxes_3d) > 0:

                # Apply score mask
                avod_score_mask = prediction_scores >= avod_score_threshold
                prediction_boxes_3d = prediction_boxes_3d[avod_score_mask]
                prediction_scores = prediction_scores[avod_score_mask]
                prediction_class_indices = \
                    prediction_class_indices[avod_score_mask]

            if len(prediction_boxes_3d) > 0:

                dataset_dir = model.dataset.dataset_dir
                sample_name = (model.dataset.sample_names[model.dataset._index_in_epoch - 1])
                img_idx = int(sample_name)
                print("frame_index",img_idx)
                image_path = model.dataset.get_rgb_image_path(sample_name)
                image = Image.open(image_path)
                image_size = image.size

                if model.dataset.has_labels:
                    gt_objects = obj_utils.read_labels(dataset.label_dir, img_idx)
                else:
                    gt_objects = []
                filtered_gt_objs = model.dataset.kitti_utils.filter_labels(
                    gt_objects, classes=gt_classes)

                stereo_calib = calib_utils.read_calibration(dataset.calib_dir,
                                                            img_idx)
                calib_p2 = stereo_calib.p2
                # Project the 3D box predictions to image space
                image_filter = []
                final_boxes_2d = []
                for i in range(len(prediction_boxes_3d)):
                    box_3d = prediction_boxes_3d[i, 0:7]
                    img_box = box_3d_projector.project_to_image_space(
                        box_3d, calib_p2,
                        truncate=True, image_size=image_size,
                        discard_before_truncation=False)
                    if img_box is not None:
                        image_filter.append(True)
                        final_boxes_2d.append(img_box)
                    else:
                        image_filter.append(False)
                final_boxes_2d = np.asarray(final_boxes_2d)
                final_prediction_boxes_3d = prediction_boxes_3d[image_filter]
                final_scores = prediction_scores[image_filter]
                final_class_indices = prediction_class_indices[image_filter]

                num_of_predictions = final_boxes_2d.shape[0]

                # Convert to objs
                final_prediction_objs = \
                    [box_3d_encoder.box_3d_to_object_label(
                        prediction, obj_type='Prediction')
                        for prediction in final_prediction_boxes_3d]
                for (obj, score) in zip(final_prediction_objs, final_scores):
                    obj.score = score

                pred_fig, pred_2d_axes, pred_3d_axes = \
                    vis_utils.visualization(dataset.rgb_image_dir,
                                            img_idx,
                                            display=False,
                                            fig_size=fig_size)

                draw_predictions(filtered_gt_objs,
                                 calib_p2,
                                 num_of_predictions,
                                 final_prediction_objs,
                                 final_class_indices,
                                 final_boxes_2d,
                                 pred_2d_axes,
                                 pred_3d_axes,
                                 True,
                                 True,
                                 gt_classes,
                                 False)

                #cv2.imshow('result',pred_fig)
                print(type(pred_fig))
                pred_fig.canvas.draw()
                img = np.fromstring(pred_fig.canvas.tostring_rgb(), dtype=np.uint8, sep='')
                img  = img.reshape(pred_fig.canvas.get_width_height()[::-1] + (3,))
                cv2.imshow('result',img)

                #draw bird view
                kitti_utils = model.dataset.kitti_utils
                print(img.shape[0:2])
                point_cloud = kitti_utils.get_point_cloud(
                    'lidar', img_idx, (370, 1242))
                ground_plane = kitti_utils.get_ground_plane(sample_name)
                bev_images = kitti_utils.create_bev_maps(point_cloud, ground_plane)

                density_map = np.array(bev_images.get("density_map"))
                _, box_points_norm = box_3d_projector.project_to_bev(
                    final_prediction_boxes_3d, [[-40, 40], [0, 70]])
                density_map = draw_boxes(density_map, box_points_norm)
                cv2.imshow('lidar',density_map)
                cv2.waitKey(-1)
Exemple #7
0
    def _draw_predictions(self, image, predictions_and_scores, frame_calib):
        """ This code draws the bounding boxes with score threshold above the given threshold onto the image.
            This code is borrowed in part from show_predictions_2d.py
        """
        prediction_boxes_3d = predictions_and_scores[:, 0:7]
        print("Number of predictions boxes 3d: ", len(prediction_boxes_3d))
        prediction_scores = predictions_and_scores[:, 7]
        prediction_class_indices = predictions_and_scores[:, 8]
        image_size = image.shape[:2]
        # process predictions only if we have any predictions left after
        # masking
        if len(prediction_boxes_3d) > 0:
            # Apply score mask
            avod_score_mask = prediction_scores >= self.avod_score_threshold
            prediction_boxes_3d = prediction_boxes_3d[avod_score_mask]
            prediction_scores = prediction_scores[avod_score_mask]
            prediction_class_indices = \
                prediction_class_indices[avod_score_mask]
        else:
            return

        image_filter = []
        final_boxes_2d = []
        for i in range(len(prediction_boxes_3d)):
            box_3d = prediction_boxes_3d[i, 0:7]
            img_box = box_3d_projector.project_to_image_space(
                box_3d,
                frame_calib.p2,
                truncate=True,
                image_size=image_size,
                discard_before_truncation=False)
            if img_box is not None:
                image_filter.append(True)
                final_boxes_2d.append(img_box)
            else:
                image_filter.append(False)
        final_boxes_2d = np.asarray(final_boxes_2d)
        final_prediction_boxes_3d = prediction_boxes_3d[image_filter]
        final_scores = prediction_scores[image_filter]
        final_class_indices = prediction_class_indices[image_filter]

        num_of_predictions = final_boxes_2d.shape[0]
        print("Drawing {} predictions".format(num_of_predictions))
        # Convert to objs
        final_prediction_objs = \
            [box_3d_encoder.box_3d_to_object_label(
                prediction, obj_type='Prediction')
                for prediction in final_prediction_boxes_3d]
        for (obj, score) in zip(final_prediction_objs, final_scores):
            obj.score = score
        # Overlay prediction boxes on image
        filtered_gt_objs = []
        draw_orientations_on_pred = True
        fig, ax = self._create_fig(image)
        # Plot the image
        ax.imshow(image)
        # Draw predictions over image
        return draw_3d_predictions(filtered_gt_objs, frame_calib.p2,
                                   num_of_predictions, final_prediction_objs,
                                   final_class_indices, final_boxes_2d, ax,
                                   draw_orientations_on_pred, image)
def convertPredictionsToKitti(dataset, predictions_root_dir, additional_cls):
    """ Converts a set of network predictions into text files required for
    KITTI evaluation.
    """
    open_mode = 'w+'
    if additional_cls:
        open_mode = 'a+'

    ##############################
    # Options
    ##############################
    global_steps = None
    save_to_base = True
    # global_steps = [28000, 19000, 33000, 34000]

    score_threshold = 0.01

    save_2d = False  # Save 2D predictions
    save_3d = True  # Save 2D and 3D predictions together
    save_alphas = True  # Save alphas (observation angles)

    # Checkpoints below this are skipped
    min_step = 20000

    ##############################
    # End of Options
    ##############################

    final_predictions_root_dir = predictions_root_dir + \
        '/final_predictions_and_scores/' + dataset.data_split

    logging.info('Converting detections from %s', final_predictions_root_dir)

    if not global_steps:
        global_steps = os.listdir(final_predictions_root_dir)
        global_steps.sort(key=int)
        logging.debug('Checkpoints found {}'.format(global_steps))

    for step_idx in range(len(global_steps)):

        global_step = global_steps[step_idx]

        # Skip first checkpoint
        if int(global_step) < min_step:
            continue

        final_predictions_dir = final_predictions_root_dir + \
            '/' + str(global_step)

        if save_to_base:
            kitti_predictions_2d_dir = predictions_root_dir
            kitti_predictions_3d_dir = predictions_root_dir
        else:
            # 2D and 3D prediction directories
            kitti_predictions_2d_dir = predictions_root_dir + \
                '/kitti_predictions_2d/' + \
                dataset.data_split + '/' + \
                str(score_threshold) + '/' + \
                str(global_step) + '/data'
            kitti_predictions_3d_dir = predictions_root_dir + \
                '/kitti_predictions_3d/' + \
                dataset.data_split + '/' + \
                str(score_threshold) + '/' + \
                str(global_step) + '/data'

            if save_2d and not os.path.exists(kitti_predictions_2d_dir):
                os.makedirs(kitti_predictions_2d_dir)
            if save_3d and not os.path.exists(kitti_predictions_3d_dir):
                os.makedirs(kitti_predictions_3d_dir)

        # Do conversion
        num_samples = dataset.num_samples
        num_valid_samples = 0

        logging.info('\nGlobal step: %d', int(global_step))
        logging.info('Converting detections from: %s', final_predictions_dir)

        if save_2d:
            logging.info('2D Detections saved to: %s',
                         kitti_predictions_2d_dir)
        if save_3d:
            logging.info('3D Detections saved to: %s',
                         kitti_predictions_3d_dir)

        for sample_idx in range(num_samples):

            # Print progress
            sys.stdout.write('\rConverting {} / {}'.format(
                sample_idx + 1, num_samples))
            sys.stdout.flush()

            sample_name = dataset.sample_names[sample_idx]

            prediction_file = sample_name + '.txt'

            kitti_predictions_2d_file_path = kitti_predictions_2d_dir + \
                '/' + prediction_file
            kitti_predictions_3d_file_path = kitti_predictions_3d_dir + \
                '/' + prediction_file

            predictions_file_path = final_predictions_dir + \
                '/' + prediction_file

            # If no predictions, skip to next file
            if not os.path.exists(predictions_file_path):
                if save_2d:
                    np.savetxt(kitti_predictions_2d_file_path, [])
                if save_3d:
                    np.savetxt(kitti_predictions_3d_file_path, [])
                continue

            all_predictions = np.loadtxt(predictions_file_path, ndmin=2)

            # # Swap l, w for predictions where w > l
            # swapped_indices = all_predictions[:, 4] > all_predictions[:, 3]
            # fixed_predictions = np.copy(all_predictions)
            # fixed_predictions[swapped_indices, 3] = all_predictions[
            #     swapped_indices, 4]
            # fixed_predictions[swapped_indices, 4] = all_predictions[
            #     swapped_indices, 3]

            score_filter = all_predictions[:, 7] >= score_threshold
            all_predictions = all_predictions[score_filter]

            # If no predictions, skip to next file
            if len(all_predictions) == 0:
                if save_2d:
                    np.savetxt(kitti_predictions_2d_file_path, [])
                if save_3d:
                    np.savetxt(kitti_predictions_3d_file_path, [])
                continue

            # Project to image space
            sample_name = prediction_file.split('.')[0]
            img_idx = int(sample_name)

            # Load image for truncation
            image = Image.open(dataset.get_rgb_image_path(sample_name))

            stereo_calib_p2 = calib_utils.read_calibration(
                dataset.calib_dir, img_idx).p2

            boxes = []
            image_filter = []
            for i in range(len(all_predictions)):
                box_3d = all_predictions[i, 0:7]
                img_box = box_3d_projector.project_to_image_space(
                    box_3d,
                    stereo_calib_p2,
                    truncate=True,
                    image_size=image.size)

                # Skip invalid boxes (outside image space)
                if img_box is None:
                    image_filter.append(False)
                else:
                    image_filter.append(True)
                    boxes.append(img_box)

            boxes = np.asarray(boxes)
            all_predictions = all_predictions[image_filter]

            # If no predictions, skip to next file
            if len(boxes) == 0:
                if save_2d:
                    np.savetxt(kitti_predictions_2d_file_path, [])
                if save_3d:
                    np.savetxt(kitti_predictions_3d_file_path, [])
                continue

            num_valid_samples += 1

            # To keep each value in its appropriate position, an array of zeros
            # (N, 16) is allocated but only values [4:16] are used
            kitti_predictions = np.zeros([len(boxes), 16])

            # Get object types
            all_pred_classes = all_predictions[:, 8].astype(np.int32)
            obj_types = [
                dataset.classes[class_idx] for class_idx in all_pred_classes
            ]

            # Truncation and Occlusion are always empty (see below)

            # Alpha
            if not save_alphas:
                kitti_predictions[:, 3] = -10 * \
                    np.ones((len(kitti_predictions)), dtype=np.int32)
            else:
                alphas = all_predictions[:, 6] - \
                    np.arctan2(all_predictions[:, 0], all_predictions[:, 2])
                kitti_predictions[:, 3] = alphas

            # 2D predictions
            kitti_predictions[:, 4:8] = boxes[:, 0:4]

            # 3D predictions
            # (l, w, h)
            kitti_predictions[:, 8] = all_predictions[:, 5]
            kitti_predictions[:, 9] = all_predictions[:, 4]
            kitti_predictions[:, 10] = all_predictions[:, 3]
            # (x, y, z)
            kitti_predictions[:, 11:14] = all_predictions[:, 0:3]
            # (ry, score)
            kitti_predictions[:, 14:16] = all_predictions[:, 6:8]

            # Round detections to 3 decimal places
            kitti_predictions = np.round(kitti_predictions, 3)

            # Empty Truncation, Occlusion
            kitti_empty_1 = -1 * np.ones(
                (len(kitti_predictions), 2), dtype=np.int32)
            # Empty 3D (x, y, z)
            kitti_empty_2 = -1 * np.ones(
                (len(kitti_predictions), 3), dtype=np.int32)
            # Empty 3D (h, w, l)
            kitti_empty_3 = -1000 * np.ones(
                (len(kitti_predictions), 3), dtype=np.int32)
            # Empty 3D (ry)
            kitti_empty_4 = -10 * np.ones(
                (len(kitti_predictions), 1), dtype=np.int32)

            # Stack 2D predictions text
            kitti_text_2d = np.column_stack([
                obj_types, kitti_empty_1, kitti_predictions[:, 3:8],
                kitti_empty_2, kitti_empty_3, kitti_empty_4,
                kitti_predictions[:, 15]
            ])

            # Stack 3D predictions text
            kitti_text_3d = np.column_stack(
                [obj_types, kitti_empty_1, kitti_predictions[:, 3:16]])

            # Save to text files
            if save_2d:
                np.savetxt(kitti_predictions_2d_file_path,
                           kitti_text_2d,
                           newline='\r\n',
                           fmt='%s')
            if save_3d:
                with open(kitti_predictions_3d_file_path, open_mode) as f:
                    np.savetxt(f, kitti_text_3d, newline='\r\n', fmt='%s')

        logging.debug('\nNum valid: %d', num_valid_samples)
        logging.debug('Num samples: %d', num_samples)

    for the_file in os.listdir(predictions_root_dir):
        file_path = os.path.join(predictions_root_dir, the_file)
        try:
            if os.path.isdir(file_path):
                shutil.rmtree(file_path)
                logging.debug("Removing folder: %s", file_path)
        except Exception as e:
            print(e)
            logging.exception(e)
Exemple #9
0
def main():
    """This demo runs through all samples in the trainval set, and checks
    that the 3D box projection of all 'Car', 'Van', 'Pedestrian', and 'Cyclist'
    objects are in the correct flipped 2D location after applying
    modifications to the stereo p2 matrix.
    """

    dataset = DatasetBuilder.build_kitti_dataset(DatasetBuilder.KITTI_TRAINVAL,
                                                 use_defaults=True)

    np.set_printoptions(formatter={'float': lambda x: "{0:0.3f}".format(x)})

    all_samples = dataset.sample_names

    all_pixel_errors = []
    all_max_pixel_errors = []

    total_flip_time = 0.0

    for sample_idx in range(dataset.num_samples):

        sys.stdout.write('\r{} / {}'.format(sample_idx,
                                            dataset.num_samples - 1))

        sample_name = all_samples[sample_idx]

        img_idx = int(sample_name)

        # Run the main loop to run throughout the images
        frame_calibration_info = calib_utils.read_calibration(
            dataset.calib_dir,
            img_idx)

        # Load labels
        gt_labels = obj_utils.read_labels(dataset.label_dir, img_idx)
        gt_labels = dataset.kitti_utils.filter_labels(
            gt_labels, ['Car', 'Van', 'Pedestrian', 'Cyclist'])

        image = cv2.imread(dataset.get_rgb_image_path(sample_name))
        image_size = [image.shape[1], image.shape[0]]

        # Flip p2 matrix
        calib_p2 = frame_calibration_info.p2
        flipped_p2 = np.copy(calib_p2)
        flipped_p2[0, 2] = image.shape[1] - flipped_p2[0, 2]
        flipped_p2[0, 3] = -flipped_p2[0, 3]

        for obj_idx in range(len(gt_labels)):

            obj = gt_labels[obj_idx]

            # Get original 2D bounding boxes
            orig_box_3d = box_3d_encoder.object_label_to_box_3d(obj)
            orig_bbox_2d = box_3d_projector.project_to_image_space(
                orig_box_3d, calib_p2, truncate=True, image_size=image_size)

            # Skip boxes outside image
            if orig_bbox_2d is None:
                continue

            orig_bbox_2d_flipped = flip_box_2d(orig_bbox_2d, image_size)

            # Do flipping
            start_time = time.time()
            flipped_obj = kitti_aug.flip_label_in_3d_only(obj)
            flip_time = time.time() - start_time
            total_flip_time += flip_time

            box_3d_flipped = box_3d_encoder.object_label_to_box_3d(flipped_obj)
            new_bbox_2d_flipped = box_3d_projector.project_to_image_space(
                box_3d_flipped, flipped_p2, truncate=True,
                image_size=image_size)

            pixel_errors = new_bbox_2d_flipped - orig_bbox_2d_flipped
            max_pixel_error = np.amax(np.abs(pixel_errors))

            all_pixel_errors.append(pixel_errors)
            all_max_pixel_errors.append(max_pixel_error)

            if max_pixel_error > 5:
                print(' Error > 5px', sample_idx, max_pixel_error)
                print(np.round(orig_bbox_2d_flipped, 3),
                      np.round(new_bbox_2d_flipped, 3))

    print('Avg flip time:', total_flip_time / dataset.num_samples)

    # Convert to ndarrays
    all_pixel_errors = np.asarray(all_pixel_errors)
    all_max_pixel_errors = np.asarray(all_max_pixel_errors)

    # Print max values
    print(np.amax(all_max_pixel_errors))

    # Plot pixel errors
    fig, axes = plt.subplots(nrows=3, ncols=1)
    ax0, ax1, ax2 = axes.flatten()

    ax0.hist(all_pixel_errors[:, 0], 50, histtype='bar', facecolor='green')
    ax1.hist(all_pixel_errors[:, 2], 50, histtype='bar', facecolor='green')
    ax2.hist(all_max_pixel_errors, 50, histtype='bar', facecolor='green')

    plt.show()
def save_predictions_in_kitti_format(model,
                                     checkpoint_name,
                                     data_split,
                                     score_threshold,
                                     global_step,
                                     output_dir=None,
                                     do_eval_sin=False,
                                     do_eval_ain=False,
                                     sin_type='rand',
                                     sin_level=5,
                                     sin_repeat=10,
                                     sin_input_name=None,
                                     idx_repeat=None):
    """ Converts a set of network predictions into text files required for
    KITTI evaluation.
    """

    if output_dir is None:
        output_dir = avod.root_dir() + '/data/outputs/'

    dataset = model.dataset
    # Round this because protobuf encodes default values as full decimal
    score_threshold = round(score_threshold, 3)

    # Get available prediction folders
    predictions_root_dir = os.path.join(output_dir,checkpoint_name) + \
        '/predictions'
    if do_eval_sin:
        predictions_root_dir += '_sin_{}_{}_{}/{}'.format(
            sin_type, sin_level, sin_repeat, sin_input_name)
    elif do_eval_ain:
        predictions_root_dir += '_ain_{}_{}_{}'.format(sin_type, sin_level,
                                                       sin_repeat)

    final_predictions_root_dir = predictions_root_dir + \
        '/final_predictions_and_scores/' + dataset.data_split

    final_predictions_dir = final_predictions_root_dir + \
        '/' + str(global_step)

    # 3D prediction directories
    if idx_repeat is None:
        kitti_predictions_3d_dir = predictions_root_dir + \
            '/kitti_native_eval/' + \
            str(score_threshold) + '_' + data_split + '/' + \
            str(global_step)  + '/data'
    else:
        kitti_predictions_3d_dir = predictions_root_dir + \
            '/kitti_native_eval/' + \
            str(score_threshold) + '_' + data_split + '_{}_rep/'.format(idx_repeat) + \
            str(global_step) + '/data'

    if not os.path.exists(kitti_predictions_3d_dir):
        os.makedirs(kitti_predictions_3d_dir)

    # Do conversion
    num_samples = dataset.num_samples
    num_valid_samples = 0

    print('\nGlobal step:', global_step)
    print('Converting detections from:', final_predictions_dir)

    print('3D Detections being saved to:', kitti_predictions_3d_dir)

    for sample_idx in range(num_samples):

        # Print progress
        sys.stdout.write('\rConverting {} / {}'.format(sample_idx + 1,
                                                       num_samples))
        sys.stdout.flush()

        sample_name = dataset.sample_names[sample_idx]

        prediction_file = sample_name + '.txt'

        kitti_predictions_3d_file_path = kitti_predictions_3d_dir + \
            '/' + prediction_file

        predictions_file_path = final_predictions_dir + \
            '/' + prediction_file

        # If no predictions, skip to next file
        if not os.path.exists(predictions_file_path):
            np.savetxt(kitti_predictions_3d_file_path, [])
            continue

        all_predictions = np.loadtxt(predictions_file_path)

        # # Swap l, w for predictions where w > l
        # swapped_indices = all_predictions[:, 4] > all_predictions[:, 3]
        # fixed_predictions = np.copy(all_predictions)
        # fixed_predictions[swapped_indices, 3] = all_predictions[
        #     swapped_indices, 4]
        # fixed_predictions[swapped_indices, 4] = all_predictions[
        #     swapped_indices, 3]

        score_filter = all_predictions[:, 7] >= score_threshold
        all_predictions = all_predictions[score_filter]

        # If no predictions, skip to next file
        if len(all_predictions) == 0:
            np.savetxt(kitti_predictions_3d_file_path, [])
            continue

        # Project to image space
        sample_name = prediction_file.split('.')[0]
        img_idx = int(sample_name)

        # Load image for truncation
        image = Image.open(dataset.get_rgb_image_path(sample_name))

        stereo_calib_p2 = calib_utils.read_calibration(dataset.calib_dir,
                                                       img_idx).p2

        boxes = []
        image_filter = []
        for i in range(len(all_predictions)):
            box_3d = all_predictions[i, 0:7]
            img_box = box_3d_projector.project_to_image_space(
                box_3d, stereo_calib_p2, truncate=True, image_size=image.size)

            # Skip invalid boxes (outside image space)
            if img_box is None:
                image_filter.append(False)
                continue

            image_filter.append(True)
            boxes.append(img_box)

        boxes = np.asarray(boxes)
        all_predictions = all_predictions[image_filter]

        # If no predictions, skip to next file
        if len(boxes) == 0:
            np.savetxt(kitti_predictions_3d_file_path, [])
            continue

        num_valid_samples += 1

        # To keep each value in its appropriate position, an array of zeros
        # (N, 16) is allocated but only values [4:16] are used
        kitti_predictions = np.zeros([len(boxes), 16])

        # Get object types
        all_pred_classes = all_predictions[:, 8].astype(np.int32)
        obj_types = [
            dataset.classes[class_idx] for class_idx in all_pred_classes
        ]

        # Truncation and Occlusion are always empty (see below)

        # Alpha (Not computed)
        kitti_predictions[:, 3] = -10 * np.ones(
            (len(kitti_predictions)), dtype=np.int32)

        # 2D predictions
        kitti_predictions[:, 4:8] = boxes[:, 0:4]

        # 3D predictions
        # (l, w, h)
        kitti_predictions[:, 8] = all_predictions[:, 5]
        kitti_predictions[:, 9] = all_predictions[:, 4]
        kitti_predictions[:, 10] = all_predictions[:, 3]
        # (x, y, z)
        kitti_predictions[:, 11:14] = all_predictions[:, 0:3]
        # (ry, score)
        kitti_predictions[:, 14:16] = all_predictions[:, 6:8]

        # Round detections to 3 decimal places
        kitti_predictions = np.round(kitti_predictions, 3)

        # Empty Truncation, Occlusion
        kitti_empty_1 = -1 * np.ones(
            (len(kitti_predictions), 2), dtype=np.int32)

        # Stack 3D predictions text
        kitti_text_3d = np.column_stack(
            [obj_types, kitti_empty_1, kitti_predictions[:, 3:16]])

        # Save to text files
        np.savetxt(kitti_predictions_3d_file_path,
                   kitti_text_3d,
                   newline='\r\n',
                   fmt='%s')

    print('\nNum valid:', num_valid_samples)
    print('Num samples:', num_samples)