Ejemplo n.º 1
0
def test():
    parser = argparse.ArgumentParser()
    parser.add_argument('--avod_config_path',
                    type=str,
                    dest='avod_config_path',
                    required=True,
                    help='avod_config_path')
    parser.add_argument('--sample_idx',
                    type=str,
                    dest='sample_idx',
                    required=True,
                    help='sample id')
    args = parser.parse_args()
    _, _, _, dataset_config = \
    config_builder.get_configs_from_pipeline_file(
        args.avod_config_path, is_training=False)
    dataset = get_dataset(dataset_config, 'val')

    idx = np.argwhere(dataset.sample_names==args.sample_idx).squeeze()
    # print(idx)
    kitti_samples = dataset.load_samples([idx])
    sample = kitti_samples[0]

    label_mask = np.equal(sample[constants.KEY_LABEL_CLASSES], g_type2onehotclass['Car']+1)
    gt_cls = sample[constants.KEY_LABEL_CLASSES][label_mask]
    gt_boxes_3d = sample[constants.KEY_LABEL_BOXES_3D][label_mask]
    gt_boxes_bev = []
    for i in range(len(gt_cls)):
        gt_obj = box_3d_encoder.box_3d_to_object_label(gt_boxes_3d[i], gt_cls[i])
        gt_corner_3d = compute_box_3d(gt_obj)
        gt_boxes_bev.append(gt_corner_3d[:4, [0,2]])
    print(gt_boxes_bev)

    rpn_out = pickle.load(open("rpn_out/%s" % sample[constants.KEY_SAMPLE_NAME], "rb"))
    pos_prop = []
    for prop in rpn_out['proposals_and_scores']:
        corners = compute_box_3d(box_3d_encoder.box_3d_to_object_label(prop[:7]))
        label_idx, iou = find_match_label(corners[:4, [0,2]], gt_boxes_bev)
        if iou > 0.65:
            pos_prop.append(corners)
    pc = sample[constants.KEY_POINT_CLOUD].T
    import mayavi.mlab as mlab
    from viz_util import draw_lidar, draw_gt_boxes3d
    fig = draw_lidar(pc)
    fig = draw_gt_boxes3d(pos_prop, fig, draw_text=False, color=(1, 1, 1))
    input()

    # visualize_rpn_out(sample, rpn_out['proposals_and_scores'])
    prediction = pickle.load(open("%s"%sample[constants.KEY_SAMPLE_NAME], "rb"))
    print(prediction)
    visualize(dataset, sample, prediction)
Ejemplo n.º 2
0
def nms_on_bev(boxes_3d, iou_threshold=0.1):
    scores = np.asarray(boxes_3d)[:,7]
    boxes_3d = np.asarray(boxes_3d)[:,0:7]
    corners = list(map(lambda box: compute_box_3d(box_3d_encoder.box_3d_to_object_label(box)), boxes_3d))
    # TODO: use Polygon to do nms
    bev_boxes = list(map(lambda p: [np.amin(p[0],axis=0)[0], np.amin(p[0], axis=0)[2], np.amax(p[0], axis=0)[0], np.amax(p[0], axis=0)[2], p[1]], zip(corners, scores)))
    bev_boxes = np.array(bev_boxes)
    print('final output before nms: {0}'.format(len(bev_boxes)))
    nms_idxs = non_max_suppression(bev_boxes, iou_threshold)
    print('final output after nms: {0}'.format(len(nms_idxs)))
    return nms_idxs
Ejemplo n.º 3
0
def load_proposals(frame_id):
    rpn_score_threshold = 0.5
    proposals_file_path = './kitti/proposal_120000/{0}.txt'.format(frame_id)
    proposals_and_scores = np.loadtxt(proposals_file_path)
    proposal_boxes_3d = proposals_and_scores[:, 0:7]
    proposal_scores = proposals_and_scores[:, 7]

    # Apply score mask to proposals
    score_mask = proposal_scores > rpn_score_threshold
    # 3D box in the format [x, y, z, l, w, h, ry]
    proposal_boxes_3d = proposal_boxes_3d[score_mask]
    proposal_scores = proposal_scores[score_mask]
    proposal_objs = \
        [box_3d_encoder.box_3d_to_object_label(proposal,
                                               obj_type='Proposal')
         for proposal in proposal_boxes_3d]
    for obj, score in zip(proposal_objs, proposal_scores):
        obj.score = score
    return proposal_objs
Ejemplo n.º 4
0
def draw_boxes(prediction, sample, plot_axes):
    all_corners = []
    for pred in prediction:
        box = np.array(pred[0:7])
        cls_idx = int(pred[8])
        obj = box_3d_encoder.box_3d_to_object_label(box, obj_type=type_whitelist[cls_idx])
        obj.score = pred[7]

        vis_utils.draw_box_3d(plot_axes, obj, sample[constants.KEY_STEREO_CALIB_P2],
                          show_orientation=False,
                          color_table=['r', 'y', 'r', 'w'],
                          line_width=2,
                          double_line=False)
        corners = compute_box_3d(obj)
        all_corners.append(corners)

        # draw text info
        projected = calib_utils.project_to_image(corners.T, sample[constants.KEY_STEREO_CALIB_P2])
        x1 = np.amin(projected[0])
        y1 = np.amin(projected[1])
        x2 = np.amax(projected[0])
        y2 = np.amax(projected[1])
        text_x = (x1 + x2) / 2
        text_y = y1
        text = "{}\n{:.2f}".format(obj.type, obj.score)
        plot_axes.text(text_x, text_y - 4,
            text,
            verticalalignment='bottom',
            horizontalalignment='center',
            color=BOX_COLOUR_SCHEME[obj.type],
            fontsize=10,
            fontweight='bold',
            path_effects=[
                patheffects.withStroke(linewidth=2,
                                       foreground='black')])
    return all_corners
def main(checkpoint_name):
    """This demo shows RPN proposals and AVOD predictions in 3D
    and 2D in image space. Given certain thresholds for proposals
    and predictions, it selects and draws the bounding boxes on
    the image sample. It goes through the entire proposal and
    prediction samples for the given dataset split.

    The proposals, overlaid, and prediction images can be toggled on or off
    separately in the options section.
    The prediction score and IoU with ground truth can be toggled on or off
    as well, shown as (score, IoU) above the detection.
    """

    # checkpoint_name =='pyramid_cars_with_aug_example'
    dataset_config = DatasetBuilder.copy_config(DatasetBuilder.KITTI_VAL)

    ##############################
    # Options
    ##############################
    dataset_config.data_split = 'val'

    fig_size = (10, 6.1)

    rpn_score_threshold = 0.1
    avod_score_threshold = 0.002  #0.1 #0.009 # <== final threshold
    sample_names = [90] + list(range(
        98, 104)) + [138, 224, 270, 290, 310, 330, 520]

    # Convert to string with correct format (don't change)
    sample_names = ['{:06d}'.format(x) for x in sample_names]

    # gt_classes = ['Car']
    gt_classes = ['Pedestrian', 'Cyclist']
    # gt_classes = ['Car', 'Pedestrian', 'Cyclist']

    img_dir = '/home/jhuang/repo/avod/input'

    # Overwrite this to select a specific checkpoint
    global_step = None

    # Drawing Toggles
    draw_proposals_separate = False
    draw_overlaid = False
    draw_predictions_separate = True

    # Show orientation for both GT and proposals/predictions
    draw_orientations_on_prop = False
    draw_orientations_on_pred = False

    # Draw 2D bounding boxes
    draw_projected_2d_boxes = True

    # Save images for samples with no detections
    save_empty_images = True

    draw_score = True
    draw_iou = True
    ##############################
    # End of Options
    ##############################

    # Get the dataset
    dataset = None
    data_split = 'val'

    # Setup Paths
    predictions_dir = avod.root_dir() + \
        '/data/outputs/' + checkpoint_name + '/predictions'

    proposals_and_scores_dir = predictions_dir + \
        '/proposals_and_scores/' + data_split

    predictions_and_scores_dir = predictions_dir + \
        '/final_predictions_and_scores/' + data_split

    # Output images directories
    output_dir_base = predictions_dir + '/images_2d'

    # Get checkpoint step
    steps = os.listdir(proposals_and_scores_dir)
    steps.sort(key=int)
    print('Available steps: {}'.format(steps))

    # Use latest checkpoint if no index provided
    if global_step is None:
        global_step = steps[-1]

    if draw_proposals_separate:
        prop_out_dir = output_dir_base + '/proposals/{}/{}/{}'.format(
            data_split, global_step, rpn_score_threshold)

        if not os.path.exists(prop_out_dir):
            os.makedirs(prop_out_dir)

        print('Proposal images saved to:', prop_out_dir)

    if draw_overlaid:
        overlaid_out_dir = output_dir_base + '/overlaid/{}/{}/{}'.format(
            data_split, global_step, avod_score_threshold)

        if not os.path.exists(overlaid_out_dir):
            os.makedirs(overlaid_out_dir)

        print('Overlaid images saved to:', overlaid_out_dir)

    if draw_predictions_separate:
        pred_out_dir = output_dir_base + '/predictions/{}/{}/{}'.format(
            data_split, global_step, avod_score_threshold)

        if not os.path.exists(pred_out_dir):
            os.makedirs(pred_out_dir)

        print('Prediction images saved to:', pred_out_dir)

    # Rolling average array of times for time estimation
    avg_time_arr_length = 10
    last_times = np.repeat(time.time(), avg_time_arr_length) + \
        np.arange(avg_time_arr_length)

    num_samples = len(sample_names)
    for sample_idx in range(num_samples):
        # Estimate time remaining with 5 slowest times
        start_time = time.time()
        last_times = np.roll(last_times, -1)
        last_times[-1] = start_time
        avg_time = np.mean(np.sort(np.diff(last_times))[-5:])
        samples_remaining = num_samples - sample_idx
        est_time_left = avg_time * samples_remaining

        # Print progress and time remaining estimate
        sys.stdout.write('\rSaving {} / {}, Avg Time: {:.3f}s, '
                         'Time Remaining: {:.2f}s\n'.format(
                             sample_idx + 1, num_samples, avg_time,
                             est_time_left))
        sys.stdout.flush()

        sample_name = sample_names[sample_idx]
        img_idx = int(sample_name)

        ##############################
        # Predictions
        ##############################
        if draw_predictions_separate or draw_overlaid:
            predictions_file_path = predictions_and_scores_dir + \
                "/{}/{}.txt".format(global_step,
                                    sample_name)
            if not os.path.exists(predictions_file_path):
                continue

            # Load predictions from files
            predictions_and_scores = np.loadtxt(
                predictions_and_scores_dir +
                "/{}/{}.txt".format(global_step, sample_name))

            prediction_boxes_3d = predictions_and_scores[:, 0:7]
            prediction_scores = predictions_and_scores[:, 7]
            print("scores=", prediction_scores)
            prediction_class_indices = predictions_and_scores[:, 8]

            # process predictions only if we have any predictions left after
            # masking
            if len(prediction_boxes_3d) > 0:

                # Apply score mask
                avod_score_mask = prediction_scores >= avod_score_threshold
                prediction_boxes_3d = prediction_boxes_3d[avod_score_mask]
                print("len(prediction_boxes_3d)=", len(prediction_boxes_3d))
                prediction_scores = prediction_scores[avod_score_mask]
                prediction_class_indices = \
                    prediction_class_indices[avod_score_mask]

                # # Swap l, w for predictions where w > l
                # swapped_indices = \
                #     prediction_boxes_3d[:, 4] > prediction_boxes_3d[:, 3]
                # prediction_boxes_3d = np.copy(prediction_boxes_3d)
                # prediction_boxes_3d[swapped_indices, 3] = \
                #     prediction_boxes_3d[swapped_indices, 4]
                # prediction_boxes_3d[swapped_indices, 4] = \
                #     prediction_boxes_3d[swapped_indices, 3]

        ##############################
        # Ground Truth
        ##############################

        # Get ground truth labels
        gt_objects = []
        image_path = get_rgb_image_path(img_dir, img_idx, 'img_')
        image = Image.open(image_path)
        image_size = image.size

        # Read the stereo calibration matrix for visualization
        calib_dir = img_dir
        stereo_calib = load_calib(calib_dir, img_idx, 'calib.txt')
        calib_p2 = stereo_calib.p2

        if draw_overlaid or draw_predictions_separate:
            num_of_predictions = 0
            if len(prediction_boxes_3d) > 0:
                # Project the 3D box predictions to image space
                image_filter = []
                final_boxes_2d = []
                for i in range(len(prediction_boxes_3d)):
                    box_3d = prediction_boxes_3d[i, 0:7]
                    img_box = box_3d_projector.project_to_image_space(
                        box_3d,
                        calib_p2,
                        truncate=True,
                        image_size=image_size,
                        discard_before_truncation=False)
                    if img_box is not None:
                        image_filter.append(True)
                        final_boxes_2d.append(img_box)
                    else:
                        image_filter.append(False)
                final_boxes_2d = np.asarray(final_boxes_2d)
                final_prediction_boxes_3d = prediction_boxes_3d[image_filter]
                final_scores = prediction_scores[image_filter]
                final_class_indices = prediction_class_indices[image_filter]

                num_of_predictions = final_boxes_2d.shape[0]

                # Convert to objs
                final_prediction_objs = \
                    [box_3d_encoder.box_3d_to_object_label(
                        prediction, obj_type='Prediction')
                        for prediction in final_prediction_boxes_3d]
                for (obj, score) in zip(final_prediction_objs, final_scores):
                    obj.score = score
            # else:
            #     if save_empty_images:
            #         pred_fig, pred_2d_axes, pred_3d_axes = \
            #             vis_utils.visualization(dataset.rgb_image_dir,
            #                                     img_idx,
            #                                     display=False,
            #                                     fig_size=fig_size)
            #         filename = pred_out_dir + '/' + sample_name + '.png'
            #         plt.savefig(filename)
            #         plt.close(pred_fig)
            #     continue

            if draw_predictions_separate and num_of_predictions > 0:
                # Now only draw prediction boxes on images
                # on a new figure handler
                if draw_projected_2d_boxes:
                    pred_fig, pred_2d_axes, pred_3d_axes = \
                        vis_utils.visualization_jhuang(img_dir,
                                                img_idx, 'img_',
                                                display=False,
                                                fig_size=fig_size)

                    draw_predictions([], calib_p2, num_of_predictions,
                                     final_prediction_objs,
                                     final_class_indices, final_boxes_2d,
                                     pred_2d_axes, pred_3d_axes, draw_score,
                                     draw_iou, gt_classes,
                                     draw_orientations_on_pred)
                else:
                    pred_fig, pred_3d_axes = \
                        vis_utils.visualize_single_plot(
                            dataset.rgb_image_dir, img_idx, display=False)

                    draw_3d_predictions([], calib_p2, num_of_predictions,
                                        final_prediction_objs,
                                        final_class_indices, final_boxes_2d,
                                        pred_3d_axes, draw_score, draw_iou,
                                        gt_classes, draw_orientations_on_pred)
                filename = pred_out_dir + '/' + sample_name + '.png'
                plt.savefig(filename)
                plt.close(pred_fig)

    print('\nDone')
def main():
    """This demo shows RPN proposals and AVOD predictions in 3D
    and 2D in image space. Given certain thresholds for proposals
    and predictions, it selects and draws the bounding boxes on
    the image sample. It goes through the entire proposal and
    prediction samples for the given dataset split.

    The proposals, overlaid, and prediction images can be toggled on or off
    separately in the options section.
    The prediction score and IoU with ground truth can be toggled on or off
    as well, shown as (score, IoU) above the detection.
    """
    dataset_config = DatasetBuilder.copy_config(DatasetBuilder.KITTI_VAL)

    ##############################
    # Options
    ##############################
    dataset_config.data_split = 'val'

    fig_size = (10, 6.1)

    rpn_score_threshold = 0.1
    avod_score_threshold = 0.1

    # gt_classes = ['Car']
    gt_classes = ['Pedestrian', 'Cyclist']
    # gt_classes = ['Car', 'Pedestrian', 'Cyclist']

    # Overwrite this to select a specific checkpoint
    global_step = None
    checkpoint_name = sys.argv[1]  #'pyramid_cars_with_aug_example'

    # Drawing Toggles
    draw_proposals_separate = False
    draw_overlaid = False
    draw_predictions_separate = True

    # Show orientation for both GT and proposals/predictions
    draw_orientations_on_prop = False
    draw_orientations_on_pred = False

    # Draw 2D bounding boxes
    draw_projected_2d_boxes = True

    # Save images for samples with no detections
    save_empty_images = True

    draw_score = True
    draw_iou = True
    ##############################
    # End of Options
    ##############################

    # Get the dataset
    dataset = DatasetBuilder.build_kitti_dataset(dataset_config)

    # Setup Paths
    predictions_dir = avod.root_dir() + \
        '/data/outputs/' + checkpoint_name + '/predictions'

    proposals_and_scores_dir = predictions_dir + \
        '/proposals_and_scores/' + dataset.data_split

    predictions_and_scores_dir = predictions_dir + \
        '/final_predictions_and_scores/' + dataset.data_split

    # Output images directories
    output_dir_base = predictions_dir + '/images_2d'

    # Get checkpoint step
    steps = os.listdir(proposals_and_scores_dir)
    steps.sort(key=int)
    print('Available steps: {}'.format(steps))

    # Use latest checkpoint if no index provided
    if global_step is None:
        global_step = steps[-1]

    if draw_proposals_separate:
        prop_out_dir = output_dir_base + '/proposals/{}/{}/{}'.format(
            dataset.data_split, global_step, rpn_score_threshold)

        if not os.path.exists(prop_out_dir):
            os.makedirs(prop_out_dir)

        print('Proposal images saved to:', prop_out_dir)

    if draw_overlaid:
        overlaid_out_dir = output_dir_base + '/overlaid/{}/{}/{}'.format(
            dataset.data_split, global_step, avod_score_threshold)

        if not os.path.exists(overlaid_out_dir):
            os.makedirs(overlaid_out_dir)

        print('Overlaid images saved to:', overlaid_out_dir)

    if draw_predictions_separate:
        pred_out_dir = output_dir_base + '/predictions/{}/{}/{}'.format(
            dataset.data_split, global_step, avod_score_threshold)

        if not os.path.exists(pred_out_dir):
            os.makedirs(pred_out_dir)

        print('Prediction images saved to:', pred_out_dir)

    # Rolling average array of times for time estimation
    avg_time_arr_length = 10
    last_times = np.repeat(time.time(), avg_time_arr_length) + \
        np.arange(avg_time_arr_length)

    for sample_idx in range(dataset.num_samples):
        # Estimate time remaining with 5 slowest times
        start_time = time.time()
        last_times = np.roll(last_times, -1)
        last_times[-1] = start_time
        avg_time = np.mean(np.sort(np.diff(last_times))[-5:])
        samples_remaining = dataset.num_samples - sample_idx
        est_time_left = avg_time * samples_remaining

        # Print progress and time remaining estimate
        sys.stdout.write('\rSaving {} / {}, Avg Time: {:.3f}s, '
                         'Time Remaining: {:.2f}s'.format(
                             sample_idx + 1, dataset.num_samples, avg_time,
                             est_time_left))
        sys.stdout.flush()

        sample_name = dataset.sample_names[sample_idx]
        img_idx = int(sample_name)

        ##############################
        # Proposals
        ##############################
        if draw_proposals_separate or draw_overlaid:
            # Load proposals from files
            proposals_file_path = proposals_and_scores_dir + \
                "/{}/{}.txt".format(global_step, sample_name)
            if not os.path.exists(proposals_file_path):
                print('Sample {}: No proposals, skipping'.format(sample_name))
                continue
            print('Sample {}: Drawing proposals'.format(sample_name))

            proposals_and_scores = np.loadtxt(proposals_file_path)

            proposal_boxes_3d = proposals_and_scores[:, 0:7]
            proposal_scores = proposals_and_scores[:, 7]

            # Apply score mask to proposals
            score_mask = proposal_scores > rpn_score_threshold
            proposal_boxes_3d = proposal_boxes_3d[score_mask]
            proposal_scores = proposal_scores[score_mask]

            proposal_objs = \
                [box_3d_encoder.box_3d_to_object_label(proposal,
                                                       obj_type='Proposal')
                 for proposal in proposal_boxes_3d]

        ##############################
        # Predictions
        ##############################
        if draw_predictions_separate or draw_overlaid:
            predictions_file_path = predictions_and_scores_dir + \
                "/{}/{}.txt".format(global_step,
                                    sample_name)
            if not os.path.exists(predictions_file_path):
                continue

            # Load predictions from files
            predictions_and_scores = np.loadtxt(
                predictions_and_scores_dir +
                "/{}/{}.txt".format(global_step, sample_name))

            prediction_boxes_3d = predictions_and_scores[:, 0:7]
            prediction_scores = predictions_and_scores[:, 7]
            prediction_class_indices = predictions_and_scores[:, 8]

            # process predictions only if we have any predictions left after
            # masking
            if len(prediction_boxes_3d) > 0:

                # Apply score mask
                avod_score_mask = prediction_scores >= avod_score_threshold
                prediction_boxes_3d = prediction_boxes_3d[avod_score_mask]
                prediction_scores = prediction_scores[avod_score_mask]
                prediction_class_indices = \
                    prediction_class_indices[avod_score_mask]

                # # Swap l, w for predictions where w > l
                # swapped_indices = \
                #     prediction_boxes_3d[:, 4] > prediction_boxes_3d[:, 3]
                # prediction_boxes_3d = np.copy(prediction_boxes_3d)
                # prediction_boxes_3d[swapped_indices, 3] = \
                #     prediction_boxes_3d[swapped_indices, 4]
                # prediction_boxes_3d[swapped_indices, 4] = \
                #     prediction_boxes_3d[swapped_indices, 3]

        ##############################
        # Ground Truth
        ##############################

        # Get ground truth labels
        if dataset.has_labels:
            gt_objects = obj_utils.read_labels(dataset.label_dir, img_idx)
        else:
            gt_objects = []

        # Filter objects to desired difficulty
        filtered_gt_objs = dataset.kitti_utils.filter_labels(
            gt_objects, classes=gt_classes)

        boxes2d, _, _ = obj_utils.build_bbs_from_objects(
            filtered_gt_objs, class_needed=gt_classes)

        image_path = dataset.get_rgb_image_path(sample_name)
        image = Image.open(image_path)
        image_size = image.size

        # Read the stereo calibration matrix for visualization
        stereo_calib = calib_utils.read_calibration(dataset.calib_dir, img_idx)
        calib_p2 = stereo_calib.p2

        ##############################
        # Reformat and prepare to draw
        ##############################
        if draw_proposals_separate or draw_overlaid:
            proposals_as_anchors = box_3d_encoder.box_3d_to_anchor(
                proposal_boxes_3d)

            proposal_boxes, _ = anchor_projector.project_to_image_space(
                proposals_as_anchors, calib_p2, image_size)

            num_of_proposals = proposal_boxes_3d.shape[0]

            prop_fig, prop_2d_axes, prop_3d_axes = \
                vis_utils.visualization(dataset.rgb_image_dir,
                                        img_idx,
                                        display=False)

            draw_proposals(filtered_gt_objs, calib_p2, num_of_proposals,
                           proposal_objs, proposal_boxes, prop_2d_axes,
                           prop_3d_axes, draw_orientations_on_prop)

            if draw_proposals_separate:
                # Save just the proposals
                filename = prop_out_dir + '/' + sample_name + '.png'
                plt.savefig(filename)

                if not draw_overlaid:
                    plt.close(prop_fig)

        if draw_overlaid or draw_predictions_separate:
            if len(prediction_boxes_3d) > 0:
                # Project the 3D box predictions to image space
                image_filter = []
                final_boxes_2d = []
                for i in range(len(prediction_boxes_3d)):
                    box_3d = prediction_boxes_3d[i, 0:7]
                    img_box = box_3d_projector.project_to_image_space(
                        box_3d,
                        calib_p2,
                        truncate=True,
                        image_size=image_size,
                        discard_before_truncation=False)
                    if img_box is not None:
                        image_filter.append(True)
                        final_boxes_2d.append(img_box)
                    else:
                        image_filter.append(False)
                final_boxes_2d = np.asarray(final_boxes_2d)
                final_prediction_boxes_3d = prediction_boxes_3d[image_filter]
                final_scores = prediction_scores[image_filter]
                final_class_indices = prediction_class_indices[image_filter]

                num_of_predictions = final_boxes_2d.shape[0]

                # Convert to objs
                final_prediction_objs = \
                    [box_3d_encoder.box_3d_to_object_label(
                        prediction, obj_type='Prediction')
                        for prediction in final_prediction_boxes_3d]
                for (obj, score) in zip(final_prediction_objs, final_scores):
                    obj.score = score
            else:
                if save_empty_images:
                    pred_fig, pred_2d_axes, pred_3d_axes = \
                        vis_utils.visualization(dataset.rgb_image_dir,
                                                img_idx,
                                                display=False,
                                                fig_size=fig_size)
                    filename = pred_out_dir + '/' + sample_name + '.png'
                    plt.savefig(filename)
                    plt.close(pred_fig)
                continue

            if draw_overlaid:
                # Overlay prediction boxes on image
                draw_predictions(filtered_gt_objs, calib_p2,
                                 num_of_predictions, final_prediction_objs,
                                 final_class_indices, final_boxes_2d,
                                 prop_2d_axes, prop_3d_axes, draw_score,
                                 draw_iou, gt_classes,
                                 draw_orientations_on_pred)
                filename = overlaid_out_dir + '/' + sample_name + '.png'
                plt.savefig(filename)

                plt.close(prop_fig)

            if draw_predictions_separate:
                # Now only draw prediction boxes on images
                # on a new figure handler
                if draw_projected_2d_boxes:
                    pred_fig, pred_2d_axes, pred_3d_axes = \
                        vis_utils.visualization(dataset.rgb_image_dir,
                                                img_idx,
                                                display=False,
                                                fig_size=fig_size)

                    draw_predictions(filtered_gt_objs, calib_p2,
                                     num_of_predictions, final_prediction_objs,
                                     final_class_indices, final_boxes_2d,
                                     pred_2d_axes, pred_3d_axes, draw_score,
                                     draw_iou, gt_classes,
                                     draw_orientations_on_pred)
                else:
                    pred_fig, pred_3d_axes = \
                        vis_utils.visualize_single_plot(
                            dataset.rgb_image_dir, img_idx, display=False)

                    draw_3d_predictions(filtered_gt_objs, calib_p2,
                                        num_of_predictions,
                                        final_prediction_objs,
                                        final_class_indices, final_boxes_2d,
                                        pred_3d_axes, draw_score, draw_iou,
                                        gt_classes, draw_orientations_on_pred)
                filename = pred_out_dir + '/' + sample_name + '.png'
                plt.savefig(filename)
                plt.close(pred_fig)

    print('\nDone')
Ejemplo n.º 7
0
def main():
    """
    Visualization of 3D grid anchor generation, showing 2D projections
        in BEV and image space, and a 3D display of the anchors
    """
    dataset_config = DatasetBuilder.copy_config(DatasetBuilder.KITTI_TRAIN)
    dataset_config.num_clusters[0] = 1
    dataset = DatasetBuilder.build_kitti_dataset(dataset_config)

    label_cluster_utils = LabelClusterUtils(dataset)
    clusters, _ = label_cluster_utils.get_clusters()

    # Options
    img_idx = 1
    # fake_clusters = np.array([[5, 4, 3], [6, 5, 4]])
    # fake_clusters = np.array([[3, 3, 3], [4, 4, 4]])

    fake_clusters = np.array([[4, 2, 3]])
    fake_anchor_stride = [5.0, 5.0]
    ground_plane = [0, -1, 0, 1.72]

    anchor_3d_generator = grid_anchor_3d_generator.GridAnchor3dGenerator()

    area_extents = np.array([[-40, 40], [-5, 5], [0, 70]])

    # Generate anchors for cars only
    start_time = time.time()
    anchor_boxes_3d = anchor_3d_generator.generate(
        area_3d=dataset.kitti_utils.area_extents,
        anchor_3d_sizes=fake_clusters,
        anchor_stride=fake_anchor_stride,
        ground_plane=ground_plane)
    all_anchors = box_3d_encoder.box_3d_to_anchor(anchor_boxes_3d)
    end_time = time.time()
    print("Anchors generated in {} s".format(end_time - start_time))

    # Project into bev
    bev_boxes, bev_normalized_boxes = \
        anchor_projector.project_to_bev(all_anchors, area_extents[[0, 2]])

    bev_fig, (bev_axes, bev_normalized_axes) = \
        plt.subplots(1, 2, figsize=(16, 7))
    bev_axes.set_xlim(0, 80)
    bev_axes.set_ylim(70, 0)
    bev_normalized_axes.set_xlim(0, 1.0)
    bev_normalized_axes.set_ylim(1, 0.0)

    plt.show(block=False)

    for box in bev_boxes:
        box_w = box[2] - box[0]
        box_h = box[3] - box[1]

        rect = patches.Rectangle((box[0], box[1]),
                                 box_w,
                                 box_h,
                                 linewidth=2,
                                 edgecolor='b',
                                 facecolor='none')

        bev_axes.add_patch(rect)

    for normalized_box in bev_normalized_boxes:
        box_w = normalized_box[2] - normalized_box[0]
        box_h = normalized_box[3] - normalized_box[1]

        rect = patches.Rectangle((normalized_box[0], normalized_box[1]),
                                 box_w,
                                 box_h,
                                 linewidth=2,
                                 edgecolor='b',
                                 facecolor='none')

        bev_normalized_axes.add_patch(rect)

    rgb_fig, rgb_2d_axes, rgb_3d_axes = \
        vis_utils.visualization(dataset.rgb_image_dir, img_idx)
    plt.show(block=False)

    image_path = dataset.get_rgb_image_path(dataset.sample_names[img_idx])
    image_shape = np.array(Image.open(image_path)).shape

    stereo_calib_p2 = calib_utils.read_calibration(dataset.calib_dir,
                                                   img_idx).p2

    start_time = time.time()
    rgb_boxes, rgb_normalized_boxes = \
        anchor_projector.project_to_image_space(all_anchors,
                                                stereo_calib_p2,
                                                image_shape)
    end_time = time.time()
    print("Anchors projected in {} s".format(end_time - start_time))

    # Read the stereo calibration matrix for visualization
    stereo_calib = calib_utils.read_calibration(dataset.calib_dir, 0)
    p = stereo_calib.p2

    # Overlay boxes on images

    for anchor_idx in range(len(anchor_boxes_3d)):
        anchor_box_3d = anchor_boxes_3d[anchor_idx]

        obj_label = box_3d_encoder.box_3d_to_object_label(anchor_box_3d)

        # Draw 3D boxes
        vis_utils.draw_box_3d(rgb_3d_axes, obj_label, p)

        # Draw 2D boxes
        rgb_box_2d = rgb_boxes[anchor_idx]

        box_x1 = rgb_box_2d[0]
        box_y1 = rgb_box_2d[1]
        box_w = rgb_box_2d[2] - box_x1
        box_h = rgb_box_2d[3] - box_y1

        rect = patches.Rectangle((box_x1, box_y1),
                                 box_w,
                                 box_h,
                                 linewidth=2,
                                 edgecolor='b',
                                 facecolor='none')

        rgb_2d_axes.add_patch(rect)

        if anchor_idx % 32 == 0:
            rgb_fig.canvas.draw()

    plt.show(block=True)
Ejemplo n.º 8
0
def test(model_config, eval_config,
              dataset_config, data_split,
              ckpt_indices):

    # Overwrite the defaults
    dataset_config = config_builder.proto_to_obj(dataset_config)

    dataset_config.data_split = data_split
    dataset_config.data_split_dir = 'training'
    if data_split == 'test':
        dataset_config.data_split_dir = 'testing'

    eval_config.eval_mode = 'test'
    eval_config.evaluate_repeatedly = False

    dataset_config.has_labels = False
    # Enable this to see the actually memory being used
    eval_config.allow_gpu_mem_growth = True

    eval_config = config_builder.proto_to_obj(eval_config)
    # Grab the checkpoint indices to evaluate
    eval_config.ckpt_indices = ckpt_indices

    # Remove augmentation during evaluation in test mode
    dataset_config.aug_list = []

    # Build the dataset object
    dataset = DatasetBuilder.build_kitti_dataset(dataset_config,
                                                 use_defaults=False)

    # Setup the model
    model_name = model_config.model_name
    # Overwrite repeated field
    model_config = config_builder.proto_to_obj(model_config)
    # Switch path drop off during evaluation
    model_config.path_drop_probabilities = [1.0, 1.0]

    with tf.Graph().as_default():
        if model_name == 'avod_model':
            model = AvodModel(model_config,
                              train_val_test=eval_config.eval_mode,
                              dataset=dataset)
        elif model_name == 'rpn_model':
            model = RpnModel(model_config,
                             train_val_test=eval_config.eval_mode,
                             dataset=dataset)
        else:
            raise ValueError('Invalid model name {}'.format(model_name))

        #model_evaluator = Evaluator(model, dataset_config, eval_config)
        #model_evaluator.run_latest_checkpoints()

        # Create a variable tensor to hold the global step
        global_step_tensor = tf.Variable(0, trainable=False, name='global_step')

        allow_gpu_mem_growth = eval_config.allow_gpu_mem_growth
        if allow_gpu_mem_growth:
            # GPU memory config
            config = tf.ConfigProto()
            config.gpu_options.allow_growth = allow_gpu_mem_growth
            _sess = tf.Session(config=config)
        else:
            _sess = tf.Session()

        _prediction_dict = model.build()
        _saver = tf.train.Saver()

        trainer_utils.load_checkpoints(model_config.paths_config.checkpoint_dir,
                                       _saver)
        num_checkpoints = len(_saver.last_checkpoints)
        print("test:",num_checkpoints)
        checkpoint_to_restore = _saver.last_checkpoints[num_checkpoints-1]

        _saver.restore(_sess, checkpoint_to_restore)

        num_samples = model.dataset.num_samples
        num_valid_samples = 0

        current_epoch = model.dataset.epochs_completed
        while current_epoch == model.dataset.epochs_completed:
            # Keep track of feed_dict speed
            start_time = time.time()
            feed_dict = model.create_feed_dict()
            feed_dict_time = time.time() - start_time

            # Get sample name from model
            sample_name = model.sample_info['sample_name']

            num_valid_samples += 1
            print("Step: {} / {}, Inference on sample {}".format(
                num_valid_samples, num_samples,
                sample_name))

            print("test mode")
            inference_start_time = time.time()
            # Don't calculate loss or run summaries for test
            predictions = _sess.run(_prediction_dict,
                                         feed_dict=feed_dict)
            inference_time = time.time() - inference_start_time

            print("inference time:", inference_time)

            predictions_and_scores = get_avod_predicted_boxes_3d_and_scores(predictions)

            #print(predictions_and_scores)
            #im_path = os.path.join(dataset_dir, 'training/image_2/{:06d}.png'.format(img_idx))
            #im = cv2.imread(im_path)
            #cv2.imshow('result',im)
            #cv2.waitKey(30)

            prediction_boxes_3d = predictions_and_scores[:, 0:7]
            prediction_scores = predictions_and_scores[:, 7]
            prediction_class_indices = predictions_and_scores[:, 8]
            gt_classes = ['Car']
            fig_size = (10, 6.1)

            avod_score_threshold = 0.1
            if len(prediction_boxes_3d) > 0:

                # Apply score mask
                avod_score_mask = prediction_scores >= avod_score_threshold
                prediction_boxes_3d = prediction_boxes_3d[avod_score_mask]
                prediction_scores = prediction_scores[avod_score_mask]
                prediction_class_indices = \
                    prediction_class_indices[avod_score_mask]

            if len(prediction_boxes_3d) > 0:

                dataset_dir = model.dataset.dataset_dir
                sample_name = (model.dataset.sample_names[model.dataset._index_in_epoch - 1])
                img_idx = int(sample_name)
                print("frame_index",img_idx)
                image_path = model.dataset.get_rgb_image_path(sample_name)
                image = Image.open(image_path)
                image_size = image.size

                if model.dataset.has_labels:
                    gt_objects = obj_utils.read_labels(dataset.label_dir, img_idx)
                else:
                    gt_objects = []
                filtered_gt_objs = model.dataset.kitti_utils.filter_labels(
                    gt_objects, classes=gt_classes)

                stereo_calib = calib_utils.read_calibration(dataset.calib_dir,
                                                            img_idx)
                calib_p2 = stereo_calib.p2
                # Project the 3D box predictions to image space
                image_filter = []
                final_boxes_2d = []
                for i in range(len(prediction_boxes_3d)):
                    box_3d = prediction_boxes_3d[i, 0:7]
                    img_box = box_3d_projector.project_to_image_space(
                        box_3d, calib_p2,
                        truncate=True, image_size=image_size,
                        discard_before_truncation=False)
                    if img_box is not None:
                        image_filter.append(True)
                        final_boxes_2d.append(img_box)
                    else:
                        image_filter.append(False)
                final_boxes_2d = np.asarray(final_boxes_2d)
                final_prediction_boxes_3d = prediction_boxes_3d[image_filter]
                final_scores = prediction_scores[image_filter]
                final_class_indices = prediction_class_indices[image_filter]

                num_of_predictions = final_boxes_2d.shape[0]

                # Convert to objs
                final_prediction_objs = \
                    [box_3d_encoder.box_3d_to_object_label(
                        prediction, obj_type='Prediction')
                        for prediction in final_prediction_boxes_3d]
                for (obj, score) in zip(final_prediction_objs, final_scores):
                    obj.score = score

                pred_fig, pred_2d_axes, pred_3d_axes = \
                    vis_utils.visualization(dataset.rgb_image_dir,
                                            img_idx,
                                            display=False,
                                            fig_size=fig_size)

                draw_predictions(filtered_gt_objs,
                                 calib_p2,
                                 num_of_predictions,
                                 final_prediction_objs,
                                 final_class_indices,
                                 final_boxes_2d,
                                 pred_2d_axes,
                                 pred_3d_axes,
                                 True,
                                 True,
                                 gt_classes,
                                 False)

                #cv2.imshow('result',pred_fig)
                print(type(pred_fig))
                pred_fig.canvas.draw()
                img = np.fromstring(pred_fig.canvas.tostring_rgb(), dtype=np.uint8, sep='')
                img  = img.reshape(pred_fig.canvas.get_width_height()[::-1] + (3,))
                cv2.imshow('result',img)

                #draw bird view
                kitti_utils = model.dataset.kitti_utils
                print(img.shape[0:2])
                point_cloud = kitti_utils.get_point_cloud(
                    'lidar', img_idx, (370, 1242))
                ground_plane = kitti_utils.get_ground_plane(sample_name)
                bev_images = kitti_utils.create_bev_maps(point_cloud, ground_plane)

                density_map = np.array(bev_images.get("density_map"))
                _, box_points_norm = box_3d_projector.project_to_bev(
                    final_prediction_boxes_3d, [[-40, 40], [0, 70]])
                density_map = draw_boxes(density_map, box_points_norm)
                cv2.imshow('lidar',density_map)
                cv2.waitKey(-1)
Ejemplo n.º 9
0
def project_to_image_space(box_3d,
                           calib_p2,
                           truncate=False,
                           image_size=None,
                           discard_before_truncation=True):
    """ Projects a box_3d into image space

    Args:
        box_3d: single box_3d to project
        calib_p2: stereo calibration p2 matrix
        truncate: if True, 2D projections are truncated to be inside the image
        image_size: [w, h] must be provided if truncate is True,
            used for truncation
        discard_before_truncation: If True, discard boxes that are larger than
            80% of the image in width OR height BEFORE truncation. If False,
            discard boxes that are larger than 80% of the width AND
            height AFTER truncation.

    Returns:
        Projected box in image space [x1, y1, x2, y2]
            Returns None if box is not inside the image
    """

    format_checker.check_box_3d_format(box_3d)

    obj_label = box_3d_encoder.box_3d_to_object_label(box_3d)
    corners_3d = obj_utils.compute_box_corners_3d(obj_label)

    projected = calib_utils.project_to_image(corners_3d, calib_p2)

    x1 = np.amin(projected[0])
    y1 = np.amin(projected[1])
    x2 = np.amax(projected[0])
    y2 = np.amax(projected[1])

    img_box = np.array([x1, y1, x2, y2])

    if truncate:
        if not image_size:
            raise ValueError('Image size must be provided')

        image_w = image_size[0]
        image_h = image_size[1]

        # Discard invalid boxes (outside image space)
        if img_box[0] > image_w or \
                img_box[1] > image_h or \
                img_box[2] < 0 or \
                img_box[3] < 0:
            return None

        # Discard boxes that are larger than 80% of the image width OR height
        if discard_before_truncation:
            img_box_w = img_box[2] - img_box[0]
            img_box_h = img_box[3] - img_box[1]
            if img_box_w > (image_w * 0.8) or img_box_h > (image_h * 0.8):
                return None

        # Truncate remaining boxes into image space
        if img_box[0] < 0:
            img_box[0] = 0
        if img_box[1] < 0:
            img_box[1] = 0
        if img_box[2] > image_w:
            img_box[2] = image_w
        if img_box[3] > image_h:
            img_box[3] = image_h

        # Discard boxes that are covering the the whole image after truncation
        if not discard_before_truncation:
            img_box_w = img_box[2] - img_box[0]
            img_box_h = img_box[3] - img_box[1]
            if img_box_w > (image_w * 0.8) and img_box_h > (image_h * 0.8):
                return None

    return img_box
Ejemplo n.º 10
0
    def _draw_predictions(self, image, predictions_and_scores, frame_calib):
        """ This code draws the bounding boxes with score threshold above the given threshold onto the image.
            This code is borrowed in part from show_predictions_2d.py
        """
        prediction_boxes_3d = predictions_and_scores[:, 0:7]
        print("Number of predictions boxes 3d: ", len(prediction_boxes_3d))
        prediction_scores = predictions_and_scores[:, 7]
        prediction_class_indices = predictions_and_scores[:, 8]
        image_size = image.shape[:2]
        # process predictions only if we have any predictions left after
        # masking
        if len(prediction_boxes_3d) > 0:
            # Apply score mask
            avod_score_mask = prediction_scores >= self.avod_score_threshold
            prediction_boxes_3d = prediction_boxes_3d[avod_score_mask]
            prediction_scores = prediction_scores[avod_score_mask]
            prediction_class_indices = \
                prediction_class_indices[avod_score_mask]
        else:
            return

        image_filter = []
        final_boxes_2d = []
        for i in range(len(prediction_boxes_3d)):
            box_3d = prediction_boxes_3d[i, 0:7]
            img_box = box_3d_projector.project_to_image_space(
                box_3d,
                frame_calib.p2,
                truncate=True,
                image_size=image_size,
                discard_before_truncation=False)
            if img_box is not None:
                image_filter.append(True)
                final_boxes_2d.append(img_box)
            else:
                image_filter.append(False)
        final_boxes_2d = np.asarray(final_boxes_2d)
        final_prediction_boxes_3d = prediction_boxes_3d[image_filter]
        final_scores = prediction_scores[image_filter]
        final_class_indices = prediction_class_indices[image_filter]

        num_of_predictions = final_boxes_2d.shape[0]
        print("Drawing {} predictions".format(num_of_predictions))
        # Convert to objs
        final_prediction_objs = \
            [box_3d_encoder.box_3d_to_object_label(
                prediction, obj_type='Prediction')
                for prediction in final_prediction_boxes_3d]
        for (obj, score) in zip(final_prediction_objs, final_scores):
            obj.score = score
        # Overlay prediction boxes on image
        filtered_gt_objs = []
        draw_orientations_on_pred = True
        fig, ax = self._create_fig(image)
        # Plot the image
        ax.imshow(image)
        # Draw predictions over image
        return draw_3d_predictions(filtered_gt_objs, frame_calib.p2,
                                   num_of_predictions, final_prediction_objs,
                                   final_class_indices, final_boxes_2d, ax,
                                   draw_orientations_on_pred, image)
Ejemplo n.º 11
0
def main():
    """This demo shows RPN proposals and AVOD predictions in the
    3D point cloud.

    Keys:
        F1: Toggle proposals
        F2: Toggle predictions
        F3: Toggle 3D voxel grid
        F4: Toggle point cloud

        F5: Toggle easy ground truth objects (Green)
        F6: Toggle medium ground truth objects (Orange)
        F7: Toggle hard ground truth objects (Red)
        F8: Toggle all ground truth objects (default off)

        F9: Toggle ground slice filter (default off)
        F10: Toggle offset slice filter (default off)
    """

    ##############################
    # Options
    ##############################
    rpn_score_threshold = 0.1
    avod_score_threshold = 0.1

    proposals_line_width = 1.0
    predictions_line_width = 3.0
    show_orientations = True

    point_cloud_source = 'depth'

    # Config file folder, default (<avod_root>/data/outputs/<checkpoint_name>)
    config_dir = None

    checkpoint_name = 'pyramid_cars_with_aug_example'
    global_step = None  # Latest checkpoint
    global_step = 83000

    #data_split = 'val_half'
    data_split = 'val'
    # data_split = 'test'

    # Show 3D iou text
    draw_ious_3d = True

    name_list =[]


    #name_file = '/media/wavelab/d3cd89ab-7705-4996-94f3-01da25ba8f50/moosey/val.txt'

    #with open(name_file) as f:
        #for line in f:
            #newline = line.replace("\n","")
            #name_list.append(newline)


    #name_list =['0000000003','0000000009','0000000016','0000000233','0000000234','0000000236','0000000422','0000000473','0000000490','0000000494','0000000547','0000000655',\
                #'0000000679','0000000690','0000000692','0000000781']
    name_list =['0000000004']

    for names in name_list:

        sample_name = names
        #sample_name = None

        # # # Cars # # #
        # sample_name = '000050'
        # sample_name = '000104'
        # sample_name = '000169'
        # sample_name = '000191'
        # sample_name = '000360'
        # sample_name = '001783'
        # sample_name = '001820'

        # val split
        # sample_name = '000181'
        # sample_name = '000751'
        # sample_name = '000843'
        # sample_name = '000944'
        # sample_name = '006338'

        # # # People # # #
        # val_half split
        # sample_name = '000001'  # Hard, 1 far cyc
        # sample_name = '000005'  # Easy, 1 ped
        # sample_name = '000122'  # Easy, 1 cyc
        # sample_name = '000134'  # Hard, lots of people
        # sample_name = '000167'  # Medium, 1 ped, 2 cycs
        # sample_name = '000187'  # Medium, 1 ped on left
        # sample_name = '000381'  # Easy, 1 ped
        # sample_name = '000398'  # Easy, 1 ped
        # sample_name = '000401'  # Hard, obscured peds
        # sample_name = '000407'  # Easy, 1 ped
        # sample_name = '000448'  # Hard, several far people
        # sample_name = '000486'  # Hard 2 obscured peds
        # sample_name = '000509'  # Easy, 1 ped
        # sample_name = '000718'  # Hard, lots of people
        # sample_name = '002216'  # Easy, 1 cyc

        # val split
        # sample_name = '000015'
        # sample_name = '000048'
        # sample_name = '000058'
        # sample_name = '000076'    # Medium, few ped, 1 cyc
        # sample_name = '000108'
        # sample_name = '000118'
        # sample_name = '000145'
        # sample_name = '000153'
        # sample_name = '000186'
        # sample_name = '000195'
        # sample_name = '000199'
        # sample_name = '000397'
        # sample_name = '004425'
        # sample_name = '004474'    # Hard, many ped, 1 cyc
        # sample_name = '004657'    # Hard, Few cycl, few ped
        # sample_name = '006071'
        # sample_name = '006828'    # Hard, Few cycl, few ped
        # sample_name = '006908'    # Hard, Few cycl, few ped
        # sample_name = '007412'
        # sample_name = '007318'    # Hard, Few cycl, few ped

        ##############################
        # End of Options
        ##############################

        if data_split == 'test':
            draw_ious_3d = False

        if config_dir is None:
            config_dir = avod.root_dir() + '/data/outputs/' + checkpoint_name

        # Parse experiment config
        pipeline_config_file = \
            config_dir + '/' + checkpoint_name + '.config'
        _, _, _, dataset_config = \
            config_builder_util.get_configs_from_pipeline_file(
                pipeline_config_file, is_training=False)

        dataset_config.data_split = data_split

        if data_split == 'test':
            dataset_config.data_split_dir = 'testing'
            dataset_config.has_labels = False

        dataset = DatasetBuilder.build_kitti_dataset(dataset_config,
                                                     use_defaults=False)

        # Random sample
        if sample_name is None:
            sample_idx = np.random.randint(0, dataset.num_samples)
            sample_name = dataset.sample_names[sample_idx]

        ##############################
        # Setup Paths
        ##############################
        img_idx = int(sample_name)

        # Text files directory
        proposals_and_scores_dir = avod.root_dir() + \
            '/data/outputs/' + checkpoint_name + '/predictions' +  \
            '/proposals_and_scores/' + dataset.data_split

        predictions_and_scores_dir = avod.root_dir() + \
            '/data/outputs/' + checkpoint_name + '/predictions' +  \
            '/final_predictions_and_scores/' + dataset.data_split

        # Get checkpoint step
        steps = os.listdir(proposals_and_scores_dir)
        steps.sort(key=int)
        print('Available steps: {}'.format(steps))

        # Use latest checkpoint if no index provided
        if global_step is None:
            global_step = steps[-1]

        # Output images directory
        img_out_dir = avod.root_dir() + '/data/outputs/' + checkpoint_name + \
            '/predictions/images_3d/{}/{}/{}'.format(dataset.data_split,
                                                     global_step,
                                                     rpn_score_threshold)

        if not os.path.exists(img_out_dir):
            os.makedirs(img_out_dir)

        ##############################
        # Proposals
        ##############################
        # Load proposals from files
        proposals_and_scores = np.loadtxt(proposals_and_scores_dir +
                                          "/{}/{}.txt".format(global_step,
                                                              sample_name))

        proposals = proposals_and_scores[:, 0:7]
        proposal_scores = proposals_and_scores[:, 7]

        rpn_score_mask = proposal_scores > rpn_score_threshold

        proposals = proposals[rpn_score_mask]
        proposal_scores = proposal_scores[rpn_score_mask]
        print('Proposals:', len(proposal_scores), proposal_scores)

        proposal_objs = \
            [box_3d_encoder.box_3d_to_object_label(proposal,
                                                   obj_type='Proposal')
             for proposal in proposals]

        ##############################
        # Predictions
        ##############################
        # Load proposals from files
        predictions_and_scores = np.loadtxt(predictions_and_scores_dir +
                                            "/{}/{}.txt".format(
                                                global_step,
                                                sample_name)).reshape(-1, 9)

        prediction_boxes_3d = predictions_and_scores[:, 0:7]
        prediction_scores = predictions_and_scores[:, 7]
        prediction_types = np.asarray(predictions_and_scores[:, 8], dtype=np.int32)

        avod_score_mask = prediction_scores >= avod_score_threshold
        prediction_boxes_3d = prediction_boxes_3d[avod_score_mask]
        prediction_scores = prediction_scores[avod_score_mask]
        print('Predictions: ', len(prediction_scores), prediction_scores)

        final_predictions = np.copy(prediction_boxes_3d)

        # # Swap l, w for predictions where w > l
        # swapped_indices = predictions[:, 4] > predictions[:, 3]
        # final_predictions[swapped_indices, 3] = predictions[swapped_indices, 4]
        # final_predictions[swapped_indices, 4] = predictions[swapped_indices, 3]

        prediction_objs = []
        for pred_idx in range(len(final_predictions)):
            prediction_box_3d = final_predictions[pred_idx]
            prediction_type = dataset.classes[prediction_types[pred_idx]]
            prediction_obj = box_3d_encoder.box_3d_to_object_label(
                prediction_box_3d, obj_type=prediction_type)
            prediction_objs.append(prediction_obj)

        ##############################
        # Ground Truth
        ##############################
        if dataset.has_labels:
            # Get ground truth labels
            easy_gt_objs, medium_gt_objs, \
                hard_gt_objs, all_gt_objs = \
                demo_utils.get_gts_based_on_difficulty(dataset, img_idx)
        else:
            easy_gt_objs = medium_gt_objs = hard_gt_objs = all_gt_objs = []

        ##############################
        # 3D IoU
        ##############################
        if draw_ious_3d:
            # Convert to box_3d
            all_gt_boxes_3d = [box_3d_encoder.object_label_to_box_3d(gt_obj)
                               for gt_obj in all_gt_objs]
            pred_boxes_3d = [box_3d_encoder.object_label_to_box_3d(pred_obj)
                             for pred_obj in prediction_objs]
            max_ious_3d = demo_utils.get_max_ious_3d(all_gt_boxes_3d,
                                                     pred_boxes_3d)

        ##############################
        # Point Cloud
        ##############################
        image_path = dataset.get_rgb_image_path(sample_name)
        image = cv2.imread(image_path)

        print("***************")
        print(point_cloud_source)
        print(img_idx)
        print(image.shape)

        point_cloud = dataset.kitti_utils.get_point_cloud(point_cloud_source,
                                                          img_idx,
                                                          image_shape=image.shape)


        print("This is the shape of the point_cloud")
        print(point_cloud.shape)
        point_cloud = np.asarray(point_cloud)

        # Filter point cloud to extents
        area_extents = np.asarray([[-40, 40], [-5, 3], [0, 70]])
        bev_extents = area_extents[[0, 2]]

        points = point_cloud.T
        point_filter = obj_utils.get_point_filter(point_cloud, area_extents)
        points = points[point_filter]

        point_colours = vis_utils.project_img_to_point_cloud(points,
                                                             image,
                                                             dataset.calib_dir,
                                                             img_idx)

        # Voxelize the point cloud for visualization
        voxel_grid = VoxelGrid()
        voxel_grid.voxelize(points, voxel_size=0.1,
                            create_leaf_layout=False)

        # Ground plane
        ground_plane = obj_utils.get_road_plane(img_idx, dataset.planes_dir)

        ##############################
        # Visualization
        ##############################
        # Create VtkVoxelGrid
        vtk_voxel_grid = VtkVoxelGrid()
        vtk_voxel_grid.set_voxels(voxel_grid)

        vtk_point_cloud = VtkPointCloud()
        vtk_point_cloud.set_points(points, point_colours)

        # Create VtkAxes
        vtk_axes = vtk.vtkAxesActor()
        vtk_axes.SetTotalLength(5, 5, 5)

        # Create VtkBoxes for proposal boxes
        vtk_proposal_boxes = VtkBoxes()
        vtk_proposal_boxes.set_line_width(proposals_line_width)
        vtk_proposal_boxes.set_objects(proposal_objs,
                                       COLOUR_SCHEME_PREDICTIONS)

        # Create VtkBoxes for prediction boxes
        vtk_prediction_boxes = VtkPyramidBoxes()
        vtk_prediction_boxes.set_line_width(predictions_line_width)
        vtk_prediction_boxes.set_objects(prediction_objs,
                                         COLOUR_SCHEME_PREDICTIONS,
                                         show_orientations)

        # Create VtkBoxes for ground truth
        vtk_hard_gt_boxes = VtkBoxes()
        vtk_medium_gt_boxes = VtkBoxes()
        vtk_easy_gt_boxes = VtkBoxes()
        vtk_all_gt_boxes = VtkBoxes()

        vtk_hard_gt_boxes.set_objects(hard_gt_objs, COLOUR_SCHEME_PREDICTIONS,
                                      show_orientations)
        vtk_medium_gt_boxes.set_objects(medium_gt_objs, COLOUR_SCHEME_PREDICTIONS,
                                        show_orientations)
        vtk_easy_gt_boxes.set_objects(easy_gt_objs, COLOUR_SCHEME_PREDICTIONS,
                                      show_orientations)
        vtk_all_gt_boxes.set_objects(all_gt_objs, VtkBoxes.COLOUR_SCHEME_KITTI,
                                     show_orientations)

        # Create VtkTextLabels for 3D ious
        vtk_text_labels = VtkTextLabels()

        if draw_ious_3d and len(all_gt_boxes_3d) > 0:
            gt_positions_3d = np.asarray(all_gt_boxes_3d)[:, 0:3]
            vtk_text_labels.set_text_labels(
                gt_positions_3d,
                ['{:0.3f}'.format(iou_3d) for iou_3d in max_ious_3d])

        # Create VtkGroundPlane
        vtk_ground_plane = VtkGroundPlane()
        vtk_slice_bot_plane = VtkGroundPlane()
        vtk_slice_top_plane = VtkGroundPlane()

        vtk_ground_plane.set_plane(ground_plane, bev_extents)
        vtk_slice_bot_plane.set_plane(ground_plane + [0, 0, 0, -0.2], bev_extents)
        vtk_slice_top_plane.set_plane(ground_plane + [0, 0, 0, -2.0], bev_extents)

        # Create Voxel Grid Renderer in bottom half
        vtk_renderer = vtk.vtkRenderer()
        vtk_renderer.AddActor(vtk_voxel_grid.vtk_actor)
        vtk_renderer.AddActor(vtk_point_cloud.vtk_actor)

        vtk_renderer.AddActor(vtk_proposal_boxes.vtk_actor)
        vtk_renderer.AddActor(vtk_prediction_boxes.vtk_actor)

        vtk_renderer.AddActor(vtk_hard_gt_boxes.vtk_actor)
        vtk_renderer.AddActor(vtk_medium_gt_boxes.vtk_actor)
        vtk_renderer.AddActor(vtk_easy_gt_boxes.vtk_actor)
        vtk_renderer.AddActor(vtk_all_gt_boxes.vtk_actor)

        vtk_renderer.AddActor(vtk_text_labels.vtk_actor)

        # Add ground plane and slice planes
        vtk_renderer.AddActor(vtk_ground_plane.vtk_actor)
        vtk_renderer.AddActor(vtk_slice_bot_plane.vtk_actor)
        vtk_renderer.AddActor(vtk_slice_top_plane.vtk_actor)

        vtk_renderer.AddActor(vtk_axes)
        vtk_renderer.SetBackground(0.2, 0.3, 0.4)

        # Set initial properties for some actors
        vtk_point_cloud.vtk_actor.GetProperty().SetPointSize(3)
        vtk_proposal_boxes.vtk_actor.SetVisibility(0)
        vtk_voxel_grid.vtk_actor.SetVisibility(0)
        vtk_all_gt_boxes.vtk_actor.SetVisibility(0)

        vtk_ground_plane.vtk_actor.SetVisibility(0)
        vtk_slice_bot_plane.vtk_actor.SetVisibility(0)
        vtk_slice_top_plane.vtk_actor.SetVisibility(0)
        vtk_ground_plane.vtk_actor.GetProperty().SetOpacity(0.9)
        vtk_slice_bot_plane.vtk_actor.GetProperty().SetOpacity(0.9)
        vtk_slice_top_plane.vtk_actor.GetProperty().SetOpacity(0.9)

        # Setup Camera
        current_cam = vtk_renderer.GetActiveCamera()
        current_cam.Pitch(140.0)
        current_cam.Roll(180.0)

        # Zooms out to fit all points on screen
        vtk_renderer.ResetCamera()
        # Zoom in slightly
        current_cam.Zoom(2)

        # Reset the clipping range to show all points
        vtk_renderer.ResetCameraClippingRange()

        # Setup Render Window
        vtk_render_window = vtk.vtkRenderWindow()
        vtk_render_window.SetWindowName(
            "Predictions: Step {}, Sample {}, Min Score {}".format(
                global_step,
                sample_name,
                avod_score_threshold,
            ))
        vtk_render_window.SetSize(900, 600)
        vtk_render_window.AddRenderer(vtk_renderer)

        # Setup custom interactor style, which handles mouse and key events
        vtk_render_window_interactor = vtk.vtkRenderWindowInteractor()
        vtk_render_window_interactor.SetRenderWindow(vtk_render_window)

        # Add custom interactor to toggle actor visibilities
        custom_interactor = vis_utils.CameraInfoInteractorStyle([
            vtk_proposal_boxes.vtk_actor,
            vtk_prediction_boxes.vtk_actor,
            vtk_voxel_grid.vtk_actor,
            vtk_point_cloud.vtk_actor,

            vtk_easy_gt_boxes.vtk_actor,
            vtk_medium_gt_boxes.vtk_actor,
            vtk_hard_gt_boxes.vtk_actor,
            vtk_all_gt_boxes.vtk_actor,

            vtk_ground_plane.vtk_actor,
            vtk_slice_bot_plane.vtk_actor,
            vtk_slice_top_plane.vtk_actor,
            vtk_text_labels.vtk_actor,
        ])

        vtk_render_window_interactor.SetInteractorStyle(custom_interactor)
        # Render in VTK
        vtk_render_window.Render()

        # Take a screenshot
        window_to_image_filter = vtk.vtkWindowToImageFilter()
        window_to_image_filter.SetInput(vtk_render_window)
        window_to_image_filter.Update()

        png_writer = vtk.vtkPNGWriter()
        file_name = img_out_dir + "/{}.png".format(sample_name)
        png_writer.SetFileName(file_name)
        png_writer.SetInputData(window_to_image_filter.GetOutput())
        png_writer.Write()

        print('Screenshot saved to ', file_name)

        #vtk_render_window_interactor.Start()  # Blocking


        vtk_render_window_interactor.Initialize()   # Non-Blocking
Ejemplo n.º 12
0
def inference(rpn_model_path, detect_model_path, avod_config_path):
    model_config, _, eval_config, dataset_config = \
    config_builder.get_configs_from_pipeline_file(
        avod_config_path, is_training=False)

    # Setup the model
    model_name = model_config.model_name
    # Overwrite repeated field
    model_config = config_builder.proto_to_obj(model_config)
    # Switch path drop off during evaluation
    model_config.path_drop_probabilities = [1.0, 1.0]

    dataset = get_dataset(dataset_config, 'val')

    # run avod proposal network
    rpn_endpoints, sess1, rpn_model = get_proposal_network(model_config, dataset, rpn_model_path)
    end_points, sess2 = get_detection_network(detect_model_path)

    all_prediction = []
    all_id_list = None
    all_2d_boxes = []
    for idx in range(3769):
        feed_dict1 = rpn_model.create_feed_dict()
        kitti_samples = dataset.load_samples([idx])
        sample = kitti_samples[0]
        '''
        if sample[constants.KEY_SAMPLE_NAME] < '001100':
            continue
        if sample[constants.KEY_SAMPLE_NAME] > '001200':
            break
        '''
        start_time = time.time()
        rpn_predictions = sess1.run(rpn_endpoints, feed_dict=feed_dict1)
        top_anchors = rpn_predictions[RpnModel.PRED_TOP_ANCHORS]
        top_proposals = box_3d_encoder.anchors_to_box_3d(top_anchors)
        softmax_scores = rpn_predictions[RpnModel.PRED_TOP_OBJECTNESS_SOFTMAX]

        proposals_and_scores = np.column_stack((top_proposals,
                                                softmax_scores))
        top_img_roi = rpn_predictions[RpnModel.PRED_TOP_IMG_ROI]
        top_bev_roi = rpn_predictions[RpnModel.PRED_TOP_BEV_ROI]
        roi_num = len(top_img_roi)
        top_img_roi = np.reshape(top_img_roi, (roi_num, -1))
        top_bev_roi = np.reshape(top_bev_roi, (roi_num, -1))
        roi_features = np.column_stack((top_img_roi, top_bev_roi))
        '''
        # save proposal
        if os.path.exists(os.path.join('/data/ssd/public/jlliu/Kitti/object/training/proposal', '%s.txt'%(sample[constants.KEY_SAMPLE_NAME]))):
            continue
        np.savetxt(os.path.join('./proposals_and_scores/', '%s.txt'%sample[constants.KEY_SAMPLE_NAME]), proposals_and_scores, fmt='%.3f')
        np.savetxt(os.path.join('./roi_features/', '%s_roi.txt'%sample[constants.KEY_SAMPLE_NAME]), roi_features, fmt='%.5f')
        print('save ' + sample[constants.KEY_SAMPLE_NAME])
        '''
        # run frustum_pointnets_v2
        point_clouds, feature_vec, rot_angle_list, prop_cls_labels = get_pointnet_input(sample, proposals_and_scores, roi_features)
        try:
            prediction = detect_batch(sess2, end_points, point_clouds, feature_vec, rot_angle_list, prop_cls_labels)
        except:
            traceback.print_exc()
            continue

        elapsed_time = time.time() - start_time
        print(sample[constants.KEY_SAMPLE_NAME], elapsed_time)
        # concat all predictions for kitti eval
        id_list = np.ones((len(prediction),)) * int(sample[constants.KEY_SAMPLE_NAME])
        if all_id_list is None:
            all_id_list = id_list
        else:
            all_id_list = np.concatenate((all_id_list, id_list), axis=0)
        for pred in prediction:
            obj = box_3d_encoder.box_3d_to_object_label(np.array(pred[0:7]), obj_type=type_whitelist[pred[8]])
            corners = compute_box_3d(obj)
            projected = calib_utils.project_to_image(corners.T, sample[constants.KEY_STEREO_CALIB_P2])
            x1 = np.amin(projected[0])
            y1 = np.amin(projected[1])
            x2 = np.amax(projected[0])
            y2 = np.amax(projected[1])
            all_2d_boxes.append([x1, y1, x2, y2])
        all_prediction += prediction
        # save result
        pickle.dump({'proposals_and_scores': proposals_and_scores, 'roi_features': roi_features}, open("rpn_out/%s"%sample[constants.KEY_SAMPLE_NAME], "wb"))
        pickle.dump(prediction, open('final_out/%s' % sample[constants.KEY_SAMPLE_NAME], 'wb'))
        visualize(dataset, sample, prediction)
    # for kitti eval
    write_detection_results('./detection_results', all_prediction, all_id_list, all_2d_boxes)
Ejemplo n.º 13
0
def get_pointnet_input(sample, proposals_and_scores, roi_features, rpn_score_threshold=0.1):
    proposal_boxes_3d = proposals_and_scores[:, 0:7]
    proposal_scores = proposals_and_scores[:, 7]
    score_mask = proposal_scores > rpn_score_threshold
    # 3D box in the format [x, y, z, l, w, h, ry]
    proposal_boxes_3d = proposal_boxes_3d[score_mask]
    proposal_scores = proposal_scores[score_mask]
    roi_features = roi_features[score_mask]

    proposal_objs = list(map(lambda pair: ProposalObject(pair[0], pair[1], None, None), zip(proposal_boxes_3d, proposal_scores)))
    propsasl_corners = list(map(lambda obj: compute_box_3d(obj), proposal_objs))

    # get groundtruth cls label
    label_mask = np.equal(sample[constants.KEY_LABEL_CLASSES], g_type2onehotclass['Car']+1)
    gt_cls = sample[constants.KEY_LABEL_CLASSES][label_mask]
    gt_boxes_3d = sample[constants.KEY_LABEL_BOXES_3D][label_mask]
    gt_boxes_bev = []
    for i in range(len(gt_cls)):
        gt_obj = box_3d_encoder.box_3d_to_object_label(gt_boxes_3d[i], gt_cls[i])
        gt_corner_3d = compute_box_3d(gt_obj)
        gt_boxes_bev.append(gt_corner_3d[:4, [0,2]])

    # point cloud of this frame
    pc = sample[constants.KEY_POINT_CLOUD].T
    frame_calib = sample[constants.KEY_STEREO_CALIB]
    #pc = calib_utils.lidar_to_cam_frame(pc.T, frame_calib)
    # point cloud in proposals
    point_clouds = []
    features = []
    rot_angle_list = []
    prop_cls_labels = []
    for obj, corners, feat in zip(proposal_objs, propsasl_corners, roi_features):
        _, inds = extract_pc_in_box3d(pc, corners)
        if (np.any(inds) == False):
            # skip proposal with no points
            continue
        # get groundtruth cls label for each proposal
        corners_bev = corners[:4, [0,2]]
        label_idx, iou = find_match_label(corners_bev, gt_boxes_bev)
        if iou >= 0.5:
            prop_cls_labels.append(gt_cls[label_idx] - 1)
        else:
            prop_cls_labels.append(g_type2onehotclass['NonObject'])
        # under rect coorination, x->right, y->down, z->front
        center_rect = (np.min(corners, axis=0) + np.max(corners, axis=0)) / 2
        # FIXME: here induces a 90 degrees offset when visualize, should be fix together with prepare_data.py
        frustum_angle = -1 * np.arctan2(center_rect[2], center_rect[0])
        # rotate to center
        pc_rot = rotate_pc_along_y(pc[inds], np.pi/2.0 + frustum_angle)
        rot_angle_list.append(frustum_angle)
        point_set = pc_rot
        choice = np.random.choice(point_set.shape[0], num_point, replace=True)
        point_set = point_set[choice, :]
        point_clouds.append(point_set)
        features.append(feat)
        # import mayavi.mlab as mlab
        # from viz_util import draw_lidar, draw_gt_boxes3d
        # fig = draw_lidar(pc)
        # fig = draw_gt_boxes3d([corners], fig, draw_text=False, color=(1, 1, 1))
        # mlab.plot3d([0, center_rect[0]], [0, center_rect[1]], [0, center_rect[2]], color=(1,1,1), tube_radius=None, figure=fig)
        # input()
    return point_clouds, features, rot_angle_list, prop_cls_labels