Esempio n. 1
0
def megadb_to_cct(dataset_name, mega_db, output_path, bbox_only):

    mega_db = [i for i in mega_db if i['dataset'] == dataset_name]
    assert len(mega_db) > 0, 'There are no entries from the dataset {}'.format(dataset_name)
    for i in mega_db:
        del i['dataset']  # all remaining fields will be added to the CCT database
    print('Number of entries belonging to dataset {}: {}'.format(dataset_name, len(mega_db)))

    cct_images, cct_annotations = break_into_images_annotations(mega_db, bbox_only)

    # consolidate categories
    category_names = set()
    for anno in cct_annotations:
        category_names.add(anno['category_name'])

    cat_name_to_id = {
        'empty': 0  # always set empty to 0 even for dataset without 'empty' labeled images
    }

    if bbox_only:
        cat_name_to_id['animal'] = 1
        cat_name_to_id['person'] = 2
        cat_name_to_id['group'] = 3
        cat_name_to_id['vehicle'] = 4

    for cat in category_names:
        if cat not in cat_name_to_id:
            cat_name_to_id[cat] = len(cat_name_to_id)

    for anno in cct_annotations:
        anno['category_id'] = cat_name_to_id[anno['category_name']]
        del anno['category_name']

    cct_categories = []
    for name, num_id in cat_name_to_id.items():
        cct_categories.append({
            'id': num_id,
            'name': name
        })

    print('Final CCT DB has {} image entries, and {} annotation entries.'.format(len(cct_images), len(cct_annotations)))
    cct_db = {
        'info': {
            'version': str(datetime.now()),
            'date_created': str(datetime.today().date()),
            'description': ''  # to be filled by main()
        },
        'images': cct_images,
        'categories': cct_categories,
        'annotations': cct_annotations
    }
    cct_db = CameraTrapJsonUtils.order_db_keys(cct_db)

    cct_db['info']['description'] = 'COCO Camera Traps database converted from sequences in dataset {}'.format(
        dataset_name)
    print('Writing to output file...')
    write_json(output_path, cct_db)
    print('Done!')
Esempio n. 2
0
def empty_accuracy_seq_level(gt_db_indexed,
                             detector_output_path,
                             file_to_image_id,
                             threshold=0.5,
                             visualize_wrongly_classified=False,
                             images_dir=''):
    """ Ground truth label is empty if the fine-category label on all images in this sequence are "empty"

    Args:
        gt_db_indexed: an instance of IndexedJsonDb containing the ground truth
        detector_output_path: path to a file containing the detection results in the API output format
        file_to_image_id: see load_api_results.py - a function to convert image_id
        threshold: threshold between 0 and 1 below which an image is considered empty
        visualize_wrongly_classified: True if want to visualize 5 sequences where the predicted
            classes don't agree with gt
        images_dir: directory where the 'file' field in the detector output is rooted at. Relevant only if
           visualize_wrongly_classified is true
    Returns:

    """
    # TODO move detector_output_path specific code out so that this function evaluates only on classification results (confidences)
    gt_seq_id_to_annotations = CameraTrapJsonUtils.annotations_groupby_image_field(
        gt_db_indexed, image_field='seq_id')
    pred_seq_id_to_res = load_api_results.api_results_groupby(
        detector_output_path, gt_db_indexed, file_to_image_id)

    gt_seq_level = []
    pred_seq_level = []

    empty_category_id_in_gt = gt_db_indexed.cat_name_to_id['empty']

    # evaluate on sequences that are present in both gt and the detector output file
    gt_sequences = set(gt_seq_id_to_annotations.keys())
    pred_sequences = set(pred_seq_id_to_res.keys())

    diff = gt_sequences.symmetric_difference(pred_sequences)
    print('Number of sequences not in both gt and pred: {}'.format(len(diff)))

    intersection_sequences = list(gt_sequences.intersection(pred_sequences))

    for seq_id in intersection_sequences:
        gt_seq_level.append(
            is_gt_seq_non_empty(gt_seq_id_to_annotations[seq_id],
                                empty_category_id_in_gt))
        pred_seq_level.append(pred_seq_max_conf(pred_seq_id_to_res[seq_id]))

    pred_class = [
        0 if max_conf < threshold else 1 for max_conf in pred_seq_level
    ]
    accuracy = accuracy_score(gt_seq_level, pred_class)

    if visualize_wrongly_classified:
        show_wrongly_classified_seq(pred_seq_id_to_res, intersection_sequences,
                                    gt_seq_level, pred_class, images_dir)

    return accuracy, gt_seq_level, pred_seq_level, intersection_sequences
Esempio n. 3
0
def get_number_empty_seq(gt_db_indexed):
    gt_seq_id_to_annotations = CameraTrapJsonUtils.annotations_groupby_image_field(
        gt_db_indexed, image_field='seq_id')
    empty_category_id_in_gt = gt_db_indexed.cat_name_to_id['empty']
    gt_seq_level = []
    for seq_id, seq_annotations in gt_seq_id_to_annotations.items():
        gt_seq_level.append(
            is_gt_seq_non_empty(seq_annotations, empty_category_id_in_gt))

    total = len(gt_seq_level)
    num_empty = total - sum(gt_seq_level)
    print('There are {} sequences, {} are empty, which is {}%'.format(
        total, num_empty, 100 * num_empty / total))
        def render_image_with_gt(file_info):

            image_relative_path = file_info[0]
            max_conf = file_info[1]
            detections = file_info[2]

            # This should already have been normalized to either '/' or '\'

            image_id = ground_truth_indexed_db.filename_to_id.get(image_relative_path, None)
            if image_id is None:
                print('Warning: couldn''t find ground truth for image {}'.format(image_relative_path))
                return None

            image = ground_truth_indexed_db.image_id_to_image[image_id]
            annotations = ground_truth_indexed_db.image_id_to_annotations[image_id]

            gt_status = image['_detection_status']

            gt_presence = bool(gt_status)

            gt_classes = CameraTrapJsonUtils.annotations_to_classnames(
                    annotations,ground_truth_indexed_db.cat_id_to_name)
            gt_class_summary = ','.join(gt_classes)

            if gt_status > DetectionStatus.DS_MAX_DEFINITIVE_VALUE:
                print('Skipping image {}, does not have a definitive ground truth status (status: {}, classes: {})'.format(
                        image_id, gt_status, gt_class_summary))
                return None

            detected = max_conf > options.confidence_threshold

            if gt_presence and detected:
                if '_classification_accuracy' not in image.keys():
                    res = 'tp'
                elif np.isclose(1, image['_classification_accuracy']):
                    res = 'tpc'
                else:
                    res = 'tpi'
            elif not gt_presence and detected:
                res = 'fp'
            elif gt_presence and not detected:
                res = 'fn'
            else:
                res = 'tn'

            display_name = '<b>Result type</b>: {}, <b>Presence</b>: {}, <b>Class</b>: {}, <b>Max conf</b>: {:0.3f}%, <b>Image</b>: {}'.format(
                res.upper(), str(gt_presence), gt_class_summary,
                max_conf * 100, image_relative_path)

            rendered_image_html_info = render_bounding_boxes(options.image_base_dir,
                                                                image_relative_path,
                                                                display_name,
                                                                detections,
                                                                res,
                                                                detection_categories_map,
                                                                classification_categories_map,
                                                                options)

            image_result = None
            if len(rendered_image_html_info) > 0:
                image_result = [[res,rendered_image_html_info]]
                for gt_class in gt_classes:
                    image_result.append(['class_{}'.format(gt_class),rendered_image_html_info])
            
            return image_result
def process_batch_results(options):

    ##%% Expand some options for convenience

    output_dir = options.output_dir
    confidence_threshold = options.confidence_threshold

    ##%% Prepare output dir

    os.makedirs(output_dir, exist_ok=True)

    ##%% Load ground truth if available

    ground_truth_indexed_db = None

    if len(options.ground_truth_json_file) > 0:

        ground_truth_indexed_db = IndexedJsonDb(
            options.ground_truth_json_file,
            b_normalize_paths=True,
            filename_replacements=options.ground_truth_filename_replacements)

        # Mark images in the ground truth as positive or negative
        (nNegative, nPositive, nUnknown, nAmbiguous) = mark_detection_status(
            ground_truth_indexed_db,
            negative_classes=options.negative_classes,
            unknown_classes=options.unlabeled_classes)
        print(
            'Finished loading and indexing ground truth: {} negative, {} positive, {} unknown, {} ambiguous'
            .format(nNegative, nPositive, nUnknown, nAmbiguous))

    ##%% Load detection results

    detection_results = load_api_results(
        options.detector_output_file,
        normalize_paths=True,
        filename_replacements=options.detector_output_filename_replacements)

    # Add a column (pred_detection_label) to indicate predicted detection status
    import numpy as np
    detection_results['pred_detection_label'] = \
        np.where(detection_results['max_confidence'] >= options.confidence_threshold,
                 DetectionStatus.DS_POSITIVE, DetectionStatus.DS_NEGATIVE)

    nPositives = sum(detection_results['pred_detection_label'] ==
                     DetectionStatus.DS_POSITIVE)
    print(
        'Finished loading and preprocessing {} rows from detector output, predicted {} positives'
        .format(len(detection_results), nPositives))

    ##%% If we have ground truth, remove images we can't match to ground truth

    # ground_truth_indexed_db.db['images'][0]
    if ground_truth_indexed_db is not None:

        b_match = [False] * len(detection_results)

        detector_files = detection_results['image_path'].to_list()

        for iFn, fn in enumerate(detector_files):

            # assert fn in ground_truth_indexed_db.filename_to_id, 'Could not find ground truth for row {} ({})'.format(iFn,fn)
            if fn in fn in ground_truth_indexed_db.filename_to_id:
                b_match[iFn] = True

        print('Confirmed filename matches to ground truth for {} of {} files'.
              format(sum(b_match), len(detector_files)))

        detection_results = detection_results[b_match]
        detector_files = detection_results['image_path'].to_list()

        print('Trimmed detection results to {} files'.format(
            len(detector_files)))

    ##%% Sample images for visualization

    images_to_visualize = detection_results

    if options.num_images_to_sample > 0 and options.num_images_to_sample < len(
            detection_results):

        images_to_visualize = images_to_visualize.sample(
            options.num_images_to_sample, random_state=options.sample_seed)

    ##%% Fork here depending on whether or not ground truth is available

    # If we have ground truth, we'll compute precision/recall and sample tp/fp/tn/fn.
    #
    # Otherwise we'll just visualize detections/non-detections.

    if ground_truth_indexed_db is not None:

        ##%% Compute precision/recall

        # numpy array of detection probabilities
        p_detection = detection_results['max_confidence'].values
        n_detections = len(p_detection)

        # numpy array of bools (0.0/1.0)
        gt_detections = np.zeros(n_detections, dtype=float)

        for iDetection, fn in enumerate(detector_files):
            image_id = ground_truth_indexed_db.filename_to_id[fn]
            image = ground_truth_indexed_db.image_id_to_image[image_id]
            detection_status = image['_detection_status']

            if detection_status == DetectionStatus.DS_NEGATIVE:
                gt_detections[iDetection] = 0.0
            elif detection_status == DetectionStatus.DS_POSITIVE:
                gt_detections[iDetection] = 1.0
            else:
                gt_detections[iDetection] = -1.0

        # Don't include ambiguous/unknown ground truth in precision/recall analysis
        b_valid_ground_truth = gt_detections >= 0.0

        p_detection_pr = p_detection[b_valid_ground_truth]
        gt_detections_pr = gt_detections[b_valid_ground_truth]

        print('Including {} of {} values in p/r analysis'.format(
            np.sum(b_valid_ground_truth), len(b_valid_ground_truth)))

        precisions, recalls, thresholds = precision_recall_curve(
            gt_detections_pr, p_detection_pr)

        # For completeness, include the result at a confidence threshold of 1.0
        thresholds = np.append(thresholds, [1.0])

        precisions_recalls = pd.DataFrame(
            data={
                'confidence_threshold': thresholds,
                'precision': precisions,
                'recall': recalls
            })

        # Compute and print summary statistics
        average_precision = average_precision_score(gt_detections_pr,
                                                    p_detection_pr)
        print('Average precision: {:.2f}'.format(average_precision))

        # Thresholds go up throughout precisions/recalls/thresholds; find the last
        # value where recall is at or above target.  That's our precision @ target recall.
        target_recall = 0.9
        b_above_target_recall = np.where(recalls >= target_recall)
        if not np.any(b_above_target_recall):
            precision_at_target_recall = 0.0
        else:
            i_target_recall = np.argmax(b_above_target_recall)
            precision_at_target_recall = precisions[i_target_recall]
        print('Precision at {:.2f} recall: {:.2f}'.format(
            target_recall, precision_at_target_recall))

        cm = confusion_matrix(gt_detections_pr,
                              np.array(p_detection_pr) > confidence_threshold)

        # Flatten the confusion matrix
        tn, fp, fn, tp = cm.ravel()

        precision_at_confidence_threshold = tp / (tp + fp)
        recall_at_confidence_threshold = tp / (tp + fn)
        f1 = 2.0 * (precision_at_confidence_threshold * recall_at_confidence_threshold) / \
            (precision_at_confidence_threshold + recall_at_confidence_threshold)

        print(
            'At a confidence threshold of {:.2f}, precision={:.2f}, recall={:.2f}, f1={:.2f}'
            .format(confidence_threshold, precision_at_confidence_threshold,
                    recall_at_confidence_threshold, f1))

        ##%% Render output

        # Write p/r table to .csv file in output directory
        pr_table_filename = os.path.join(output_dir, 'prec_recall.csv')
        precisions_recalls.to_csv(pr_table_filename, index=False)

        # Write precision/recall plot to .png file in output directory
        step_kwargs = ({'step': 'post'})
        fig = plt.figure()
        plt.step(recalls, precisions, color='b', alpha=0.2, where='post')
        plt.fill_between(recalls,
                         precisions,
                         alpha=0.2,
                         color='b',
                         **step_kwargs)

        plt.xlabel('Recall')
        plt.ylabel('Precision')
        plt.ylim([0.0, 1.05])
        plt.xlim([0.0, 1.05])
        t = 'Precision-Recall curve: AP={:0.2f}, P@{:0.2f}={:0.2f}'.format(
            average_precision, target_recall, precision_at_target_recall)
        plt.title(t)
        pr_figure_relative_filename = 'prec_recall.png'
        pr_figure_filename = os.path.join(output_dir,
                                          pr_figure_relative_filename)
        plt.savefig(pr_figure_filename)
        # plt.show(block=False)
        plt.close(fig)

        ##%% Sample true/false positives/negatives and render to html

        os.makedirs(os.path.join(output_dir, 'tp'), exist_ok=True)
        os.makedirs(os.path.join(output_dir, 'fp'), exist_ok=True)
        os.makedirs(os.path.join(output_dir, 'tn'), exist_ok=True)
        os.makedirs(os.path.join(output_dir, 'fn'), exist_ok=True)

        # Accumulate html image structs (in the format expected by write_html_image_lists)
        # for each category
        images_html = {'tp': [], 'fp': [], 'tn': [], 'fn': []}

        count = 0

        # i_row = 0; row = images_to_visualize.iloc[0]
        for i_row, row in tqdm(images_to_visualize.iterrows(),
                               total=len(images_to_visualize)):

            image_relative_path = row['image_path']

            # This should already have been normalized to either '/' or '\'

            image_id = ground_truth_indexed_db.filename_to_id.get(
                image_relative_path, None)
            if image_id is None:
                print('Warning: couldn'
                      't find ground truth for image {}'.format(
                          image_relative_path))
                continue

            image_info = ground_truth_indexed_db.image_id_to_image[image_id]
            annotations = ground_truth_indexed_db.image_id_to_annotations[
                image_id]

            gt_status = image_info['_detection_status']

            if gt_status > DetectionStatus.DS_MAX_DEFINITIVE_VALUE:
                print(
                    'Skipping image {}, does not have a definitive ground truth status'
                    .format(i_row, gt_status))
                continue

            gt_presence = bool(gt_status)

            gt_class_name = CameraTrapJsonUtils.annotationsToString(
                annotations, ground_truth_indexed_db.cat_id_to_name)

            max_conf = row['max_confidence']
            boxes_and_scores = row['detections']

            detected = True if max_conf > confidence_threshold else False

            if gt_presence and detected:
                res = 'tp'
            elif not gt_presence and detected:
                res = 'fp'
            elif gt_presence and not detected:
                res = 'fn'
            else:
                res = 'tn'

            display_name = '<b>Result type</b>: {}, <b>Presence</b>: {}, <b>Class</b>: {}, <b>Max conf</b>: {:0.2f}%, <b>Image</b>: {}'.format(
                res.upper(), str(gt_presence), gt_class_name, max_conf * 100,
                image_relative_path)

            rendered_image_html_info = render_bounding_boxes(
                options.image_base_dir, image_relative_path, display_name,
                boxes_and_scores, res, options)

            if len(rendered_image_html_info) > 0:
                images_html[res].append(rendered_image_html_info)

            count += 1

        # ...for each image in our sample

        print('{} images rendered'.format(count))

        # Prepare the individual html image files
        image_counts = prepare_html_subpages(images_html, output_dir)

        # Write index.HTML
        index_page = """<html><body>
        <p><strong>A sample of {} images, annotated with detections above {:.1f}% confidence.</strong></p>
        
        <a href="tp.html">True positives (tp)</a> ({})<br/>
        <a href="tn.html">True negatives (tn)</a> ({})<br/>
        <a href="fp.html">False positives (fp)</a> ({})<br/>
        <a href="fn.html">False negatives (fn)</a> ({})<br/>
        <p>At a confidence threshold of {:0.1f}%, precision={:0.2f}, recall={:0.2f}</p>
        <p><strong>Precision/recall summary for all {} images</strong></p><img src="{}"><br/>
        </body></html>""".format(
            count, confidence_threshold * 100, image_counts['tp'],
            image_counts['tn'], image_counts['fp'], image_counts['fn'],
            confidence_threshold * 100,
            precision_at_confidence_threshold, recall_at_confidence_threshold,
            len(detection_results), pr_figure_relative_filename)
        output_html_file = os.path.join(output_dir, 'index.html')
        with open(output_html_file, 'w') as f:
            f.write(index_page)

        print('Finished writing html to {}'.format(output_html_file))

    ##%% Otherwise, if we don't have ground truth...

    else:

        ##%% Sample detections/non-detections

        os.makedirs(os.path.join(output_dir, 'detections'), exist_ok=True)
        os.makedirs(os.path.join(output_dir, 'non_detections'), exist_ok=True)

        # Accumulate html image structs (in the format expected by write_html_image_lists)
        # for each category
        images_html = {
            'detections': [],
            'non_detections': [],
        }

        count = 0

        # i_row = 0; row = images_to_visualize.iloc[0]
        for i_row, row in tqdm(images_to_visualize.iterrows(),
                               total=len(images_to_visualize)):

            image_relative_path = row['image_path']

            # This should already have been normalized to either '/' or '\'
            max_conf = row['max_confidence']
            boxes_and_scores = row['detections']
            detected = True if max_conf > confidence_threshold else False

            if detected:
                res = 'detections'
            else:
                res = 'non_detections'

            display_name = '<b>Result type</b>: {}, <b>Image</b>: {}'.format(
                res.upper(), image_relative_path)

            rendered_image_html_info = render_bounding_boxes(
                options.image_base_dir, image_relative_path, display_name,
                boxes_and_scores, res, options)
            if len(rendered_image_html_info) > 0:
                images_html[res].append(rendered_image_html_info)

            count += 1

        # ...for each image in our sample

        print('{} images rendered'.format(count))

        # Prepare the individual html image files
        image_counts = prepare_html_subpages(images_html, output_dir)

        # Write index.HTML
        index_page = """<html><body>
        <p><strong>A sample of {} images, annotated with detections above {:.1f}% confidence.</strong></p>
        
        <a href="detections.html">Detections</a> ({})<br/>
        <a href="non_detections.html">Non-detections</a> ({})<br/>
        </body></html>""".format(count, confidence_threshold * 100,
                                 image_counts['detections'],
                                 image_counts['non_detections'])
        output_html_file = os.path.join(output_dir, 'index.html')
        with open(output_html_file, 'w') as f:
            f.write(index_page)

        print('Finished writing html to {}'.format(output_html_file))
def process_batch_results(options):

    ##%% Expand some options for convenience

    output_dir = options.output_dir
    confidence_threshold = options.confidence_threshold

    ##%% Prepare output dir

    os.makedirs(output_dir, exist_ok=True)

    ##%% Load ground truth if available

    ground_truth_indexed_db = None

    if options.ground_truth_json_file and len(
            options.ground_truth_json_file) > 0:

        ground_truth_indexed_db = IndexedJsonDb(
            options.ground_truth_json_file,
            b_normalize_paths=True,
            filename_replacements=options.ground_truth_filename_replacements)

        # Mark images in the ground truth as positive or negative
        n_negative, n_positive, n_unknown, n_ambiguous = mark_detection_status(
            ground_truth_indexed_db,
            negative_classes=options.negative_classes,
            unknown_classes=options.unlabeled_classes)
        print(
            'Finished loading and indexing ground truth: {} negative, {} positive, {} unknown, {} ambiguous'
            .format(n_negative, n_positive, n_unknown, n_ambiguous))

    ##%% Load detection results

    detection_results, other_fields = load_api_results(
        options.api_output_file,
        normalize_paths=True,
        filename_replacements=options.api_output_filename_replacements)
    detection_categories_map = other_fields['detection_categories']
    if 'classification_categories' in other_fields:
        classification_categories_map = other_fields[
            'classification_categories']
    else:
        classification_categories_map = {}

    # Add a column (pred_detection_label) to indicate predicted detection status, not separating out the classes
    detection_results['pred_detection_label'] = \
        np.where(detection_results['max_detection_conf'] >= options.confidence_threshold,
                 DetectionStatus.DS_POSITIVE, DetectionStatus.DS_NEGATIVE)

    n_positives = sum(detection_results['pred_detection_label'] ==
                      DetectionStatus.DS_POSITIVE)
    print(
        'Finished loading and preprocessing {} rows from detector output, predicted {} positives'
        .format(len(detection_results), n_positives))

    ##%% If we have ground truth, remove images we can't match to ground truth

    # ground_truth_indexed_db.db['images'][0]
    if ground_truth_indexed_db is not None:

        b_match = [False] * len(detection_results)

        detector_files = detection_results['file'].tolist()

        for i_fn, fn in enumerate(detector_files):

            # assert fn in ground_truth_indexed_db.filename_to_id, 'Could not find ground truth for row {} ({})'.format(i_fn,fn)
            if fn in fn in ground_truth_indexed_db.filename_to_id:
                b_match[i_fn] = True

        print('Confirmed filename matches to ground truth for {} of {} files'.
              format(sum(b_match), len(detector_files)))

        detection_results = detection_results[b_match]
        detector_files = detection_results['file'].tolist()

        print('Trimmed detection results to {} files'.format(
            len(detector_files)))

    ##%% Sample images for visualization

    images_to_visualize = detection_results

    if options.num_images_to_sample > 0 and options.num_images_to_sample <= len(
            detection_results):

        images_to_visualize = images_to_visualize.sample(
            options.num_images_to_sample, random_state=options.sample_seed)

    ##%% Fork here depending on whether or not ground truth is available

    output_html_file = ''

    # If we have ground truth, we'll compute precision/recall and sample tp/fp/tn/fn.
    #
    # Otherwise we'll just visualize detections/non-detections.

    if ground_truth_indexed_db is not None:

        ##%% DETECTION EVALUATION: Compute precision/recall

        # numpy array of detection probabilities
        p_detection = detection_results['max_detection_conf'].values
        n_detections = len(p_detection)

        # numpy array of bools (0.0/1.0), and -1 as null value
        gt_detections = np.zeros(n_detections, dtype=float)

        for i_detection, fn in enumerate(detector_files):
            image_id = ground_truth_indexed_db.filename_to_id[fn]
            image = ground_truth_indexed_db.image_id_to_image[image_id]
            detection_status = image['_detection_status']

            if detection_status == DetectionStatus.DS_NEGATIVE:
                gt_detections[i_detection] = 0.0
            elif detection_status == DetectionStatus.DS_POSITIVE:
                gt_detections[i_detection] = 1.0
            else:
                gt_detections[i_detection] = -1.0

        # Don't include ambiguous/unknown ground truth in precision/recall analysis
        b_valid_ground_truth = gt_detections >= 0.0

        p_detection_pr = p_detection[b_valid_ground_truth]
        gt_detections_pr = gt_detections[b_valid_ground_truth]

        print('Including {} of {} values in p/r analysis'.format(
            np.sum(b_valid_ground_truth), len(b_valid_ground_truth)))

        precisions, recalls, thresholds = precision_recall_curve(
            gt_detections_pr, p_detection_pr)

        # For completeness, include the result at a confidence threshold of 1.0
        thresholds = np.append(thresholds, [1.0])

        precisions_recalls = pd.DataFrame(
            data={
                'confidence_threshold': thresholds,
                'precision': precisions,
                'recall': recalls
            })

        # Compute and print summary statistics
        average_precision = average_precision_score(gt_detections_pr,
                                                    p_detection_pr)
        print('Average precision: {:.1%}'.format(average_precision))

        # Thresholds go up throughout precisions/recalls/thresholds; find the last
        # value where recall is at or above target.  That's our precision @ target recall.
        target_recall = 0.9
        b_above_target_recall = np.where(recalls >= target_recall)
        if not np.any(b_above_target_recall):
            precision_at_target_recall = 0.0
        else:
            i_target_recall = np.argmax(b_above_target_recall)
            precision_at_target_recall = precisions[i_target_recall]
        print('Precision at {:.1%} recall: {:.1%}'.format(
            target_recall, precision_at_target_recall))

        cm = confusion_matrix(gt_detections_pr,
                              np.array(p_detection_pr) > confidence_threshold)

        # Flatten the confusion matrix
        tn, fp, fn, tp = cm.ravel()

        precision_at_confidence_threshold = tp / (tp + fp)
        recall_at_confidence_threshold = tp / (tp + fn)
        f1 = 2.0 * (precision_at_confidence_threshold * recall_at_confidence_threshold) / \
            (precision_at_confidence_threshold + recall_at_confidence_threshold)

        print(
            'At a confidence threshold of {:.1%}, precision={:.1%}, recall={:.1%}, f1={:.1%}'
            .format(confidence_threshold, precision_at_confidence_threshold,
                    recall_at_confidence_threshold, f1))

        ##%% CLASSIFICATION evaluation
        classifier_accuracies = []
        # Mapping of classnames to idx for the confusion matrix.
        # The lambda is actually kind of a hack, because we use assume that
        # the following code does not reassign classname_to_idx
        classname_to_idx = collections.defaultdict(
            lambda: len(classname_to_idx))
        # Confusion matrix as defaultdict of defaultdict
        # Rows / first index is ground truth, columns / second index is predicted category
        classifier_cm = collections.defaultdict(
            lambda: collections.defaultdict(lambda: 0))
        for iDetection, fn in enumerate(detector_files):
            image_id = ground_truth_indexed_db.filename_to_id[fn]
            image = ground_truth_indexed_db.image_id_to_image[image_id]
            pred_class_ids = [det['classifications'][0][0] \
                for det in detection_results['detections'][iDetection] if 'classifications' in det.keys()]
            pred_classnames = [
                classification_categories_map[pd] for pd in pred_class_ids
            ]

            # If this image has classification predictions, and an unambiguous class
            # annotated, and is a positive image
            if len(pred_classnames) > 0 \
                    and '_unambiguous_category' in image.keys() \
                    and image['_detection_status'] == DetectionStatus.DS_POSITIVE:
                # The unambiguous category, we make this a set for easier handling afterward
                # TODO: actually we can replace the unambiguous category by all annotated
                # categories. However, then the confusion matrix doesn't make sense anymore
                # TODO: make sure we are using the class names as strings in both, not IDs
                gt_categories = set([image['_unambiguous_category']])
                pred_categories = set(pred_classnames)
                # Compute the accuracy as intersection of union,
                # i.e. (# of categories in both prediciton and GT)
                #      divided by (# of categories in either prediction or GT
                # In case of only one GT category, the result will be 1.0, if
                # prediction is one category and this category matches GT
                # It is 1.0/(# of predicted top-1 categories), if the GT is
                # one of the predicted top-1 categories.
                # It is 0.0, if none of the predicted categories is correct
                classifier_accuracies.append(
                    len(gt_categories & pred_categories) /
                    len(gt_categories | pred_categories))
                image['_classification_accuracy'] = classifier_accuracies[-1]
                # Distribute this accuracy across all predicted categories in the
                # confusion matrix
                assert len(gt_categories) == 1
                gt_class_idx = classname_to_idx[list(gt_categories)[0]]
                for pred_category in pred_categories:
                    pred_class_idx = classname_to_idx[pred_category]
                    classifier_cm[gt_class_idx][pred_class_idx] += 1

        # If we have classification results
        if len(classifier_accuracies) > 0:
            # Build confusion matrix as array from classifier_cm
            all_class_ids = sorted(classname_to_idx.values())
            classifier_cm_array = np.array(
                [[classifier_cm[r_idx][c_idx] for c_idx in all_class_ids]
                 for r_idx in all_class_ids],
                dtype=float)
            classifier_cm_array /= (
                classifier_cm_array.sum(axis=1, keepdims=True) + 1e-7)

            # Print some statistics
            print("Finished computation of {} classification results".format(
                len(classifier_accuracies)))
            print("Mean accuracy: {}".format(np.mean(classifier_accuracies)))

            # Prepare confusion matrix output
            # Get CM matrix as string
            sio = io.StringIO()
            np.savetxt(sio, classifier_cm_array * 100, fmt='%5.1f')
            cm_str = sio.getvalue()
            # Get fixed-size classname for each idx
            idx_to_classname = {v: k for k, v in classname_to_idx.items()}
            classname_list = [
                idx_to_classname[idx]
                for idx in sorted(classname_to_idx.values())
            ]
            classname_headers = [
                '{:<5}'.format(cname[:5]) for cname in classname_list
            ]

            # Prepend class name on each line and add to the top
            cm_str_lines = [' ' * 16 + ' '.join(classname_headers)]
            cm_str_lines += [
                '{:>15}'.format(cn[:15]) + ' ' + cm_line
                for cn, cm_line in zip(classname_list, cm_str.splitlines())
            ]

            # print formatted confusion matrix
            print("Confusion matrix: ")
            print(*cm_str_lines, sep='\n')

            # Plot confusion matrix
            # To manually add more space at bottom: plt.rcParams['figure.subplot.bottom'] = 0.1
            # Add 0.5 to figsize for every class. For two classes, this will result in
            # fig = plt.figure(figsize=[4,4])
            fig = vis_utils.plot_confusion_matrix(classifier_cm_array,
                                                  classname_list,
                                                  normalize=False,
                                                  title='Confusion matrix',
                                                  cmap=plt.cm.Blues,
                                                  vmax=1.0,
                                                  use_colorbar=True,
                                                  y_label=True)
            cm_figure_relative_filename = 'confusion_matrix.png'
            cm_figure_filename = os.path.join(output_dir,
                                              cm_figure_relative_filename)
            plt.savefig(cm_figure_filename)
            plt.close(fig)

        ##%% Render output

        # Write p/r table to .csv file in output directory
        pr_table_filename = os.path.join(output_dir, 'prec_recall.csv')
        precisions_recalls.to_csv(pr_table_filename, index=False)

        # Write precision/recall plot to .png file in output directory
        t = 'Precision-Recall curve: AP={:0.1%}, P@{:0.1%}={:0.1%}'.format(
            average_precision, target_recall, precision_at_target_recall)
        fig = vis_utils.plot_precision_recall_curve(precisions, recalls, t)
        pr_figure_relative_filename = 'prec_recall.png'
        pr_figure_filename = os.path.join(output_dir,
                                          pr_figure_relative_filename)
        plt.savefig(pr_figure_filename)
        # plt.show(block=False)
        plt.close(fig)

        ##%% Sample true/false positives/negatives with correct/incorrect top-1
        # classification and render to html

        # Accumulate html image structs (in the format expected by write_html_image_lists)
        # for each category, e.g. 'tp', 'fp', ..., 'class_bird', ...
        images_html = collections.defaultdict(lambda: [])
        # Add default entries by accessing them for the first time
        [images_html[res] for res in ['tp', 'tpc', 'tpi', 'fp', 'tn', 'fn']]
        for res in images_html.keys():
            os.makedirs(os.path.join(output_dir, res), exist_ok=True)

        count = 0

        # i_row = 0; row = images_to_visualize.iloc[0]
        for i_row, row in tqdm(images_to_visualize.iterrows(),
                               total=len(images_to_visualize)):

            image_relative_path = row['file']

            # This should already have been normalized to either '/' or '\'

            image_id = ground_truth_indexed_db.filename_to_id.get(
                image_relative_path, None)
            if image_id is None:
                print('Warning: couldn'
                      't find ground truth for image {}'.format(
                          image_relative_path))
                continue

            image = ground_truth_indexed_db.image_id_to_image[image_id]
            annotations = ground_truth_indexed_db.image_id_to_annotations[
                image_id]

            gt_status = image['_detection_status']

            if gt_status > DetectionStatus.DS_MAX_DEFINITIVE_VALUE:
                print(
                    'Skipping image {}, does not have a definitive ground truth status'
                    .format(i_row, gt_status))
                continue

            gt_presence = bool(gt_status)

            gt_classes = CameraTrapJsonUtils.annotations_to_classnames(
                annotations, ground_truth_indexed_db.cat_id_to_name)
            gt_class_summary = ','.join(gt_classes)

            max_conf = row['max_detection_conf']
            detections = row['detections']

            detected = max_conf > confidence_threshold

            if gt_presence and detected:
                if '_classification_accuracy' not in image.keys():
                    res = 'tp'
                elif np.isclose(1, image['_classification_accuracy']):
                    res = 'tpc'
                else:
                    res = 'tpi'
            elif not gt_presence and detected:
                res = 'fp'
            elif gt_presence and not detected:
                res = 'fn'
            else:
                res = 'tn'

            display_name = '<b>Result type</b>: {}, <b>Presence</b>: {}, <b>Class</b>: {}, <b>Max conf</b>: {:0.2f}%, <b>Image</b>: {}'.format(
                res.upper(), str(gt_presence), gt_class_summary,
                max_conf * 100, image_relative_path)

            rendered_image_html_info = render_bounding_boxes(
                options.image_base_dir, image_relative_path, display_name,
                detections, res, detection_categories_map,
                classification_categories_map, options)

            if len(rendered_image_html_info) > 0:
                images_html[res].append(rendered_image_html_info)
                for gt_class in gt_classes:
                    images_html['class_{}'.format(gt_class)].append(
                        rendered_image_html_info)

            count += 1

        # ...for each image in our sample

        print('{} images rendered'.format(count))

        # Prepare the individual html image files
        image_counts = prepare_html_subpages(images_html, output_dir)

        # Write index.HTML
        all_tp_count = image_counts['tp'] + image_counts['tpc'] + image_counts[
            'tpi']
        total_count = all_tp_count + image_counts['tn'] + image_counts[
            'fp'] + image_counts['fn']
        index_page = """<html><body>
        <h2>Evaluation</h2>

        <h3>Sample images</h3>
        <p>A sample of {} images, annotated with detections above {:.1%} confidence.</p>
        True positives (TP) ({} or {:0.1%})<br/>
        -- <a href="tpc.html">with all correct top-1 predictions (TPC)</a> ({})<br/>
        -- <a href="tpi.html">with one or more incorrect top-1 prediction (TPI)</a> ({})<br/>
        -- <a href="tp.html">without classification evaluation</a> (*) ({})<br/>
        <a href="tn.html">True negatives (TN)</a> ({} or {:0.1%})<br/>
        <a href="fp.html">False positives (FP)</a> ({} or {:0.1%})<br/>
        <a href="fn.html">False negatives (FN)</a> ({} or {:0.1%})<br/>
        <p>(*) We do not evaluate the classification result of images, if the classification
        information is missing, if the image contains
        categories like 'empty' or 'human', or if the image has multiple classification
        labels.</p>""".format(
            count, confidence_threshold, all_tp_count,
            all_tp_count / total_count, image_counts['tpc'],
            image_counts['tpi'], image_counts['tp'], image_counts['tn'],
            image_counts['tn'] / total_count, image_counts['fp'],
            image_counts['fp'] / total_count, image_counts['fn'],
            image_counts['fn'] / total_count)
        index_page += """
            <h3>Detection results</h3>
            <p>At a confidence threshold of {:0.1%}, precision={:0.1%}, recall={:0.1%}</p>
            <p><strong>Precision/recall summary for all {} images</strong></p><img src="{}"><br/>
            """.format(confidence_threshold, precision_at_confidence_threshold,
                       recall_at_confidence_threshold, len(detection_results),
                       pr_figure_relative_filename)
        if len(classifier_accuracies) > 0:
            index_page += """
                <h3>Classification results</h3>
                <p>Classification accuracy: {:.2%}<br>
                The accuracy is computed only for images with exactly one classification label.
                The accuracy of an image is computed as 1/(number of unique detected top-1 classes),
                i.e. if the model detects multiple boxes with different top-1 classes, then the accuracy
                decreases and the image is put into 'TPI'.</p>
                <p>Confusion matrix:</p>
                <p><img src="{}"></p>
                <div style='font-family:monospace;display:block;'>{}</div>
                """.format(np.mean(classifier_accuracies),
                           cm_figure_relative_filename,
                           "<br>".join(cm_str_lines).replace(' ', '&nbsp;'))
        # Show links to each GT class
        index_page += "<h3>Images of specific classes:</h3>"
        # Add links to all available classes
        for cname in sorted(classname_to_idx.keys()):
            index_page += "<a href='class_{0}.html'>{0}</a> ({1})<br>".format(
                cname, len(images_html['class_{}'.format(cname)]))
        # Close body and html tag
        index_page += "</body></html>"
        output_html_file = os.path.join(output_dir, 'index.html')
        with open(output_html_file, 'w') as f:
            f.write(index_page)

        print('Finished writing html to {}'.format(output_html_file))

    ##%% Otherwise, if we don't have ground truth...

    else:

        ##%% Sample detections/non-detections

        os.makedirs(os.path.join(output_dir, 'detections'), exist_ok=True)
        os.makedirs(os.path.join(output_dir, 'non_detections'), exist_ok=True)

        # Accumulate html image structs (in the format expected by write_html_image_lists)
        # for each category
        images_html = collections.defaultdict(lambda: [])
        # Add default entries by accessing them for the first time
        [images_html[res] for res in ['detections', 'non_detections']]
        for res in images_html.keys():
            os.makedirs(os.path.join(output_dir, res), exist_ok=True)

        count = 0

        has_classification_info = False
        # i_row = 0; row = images_to_visualize.iloc[0]
        for i_row, row in tqdm(images_to_visualize.iterrows(),
                               total=len(images_to_visualize)):

            image_relative_path = row['file']

            # This should already have been normalized to either '/' or '\'
            max_conf = row['max_detection_conf']
            detections = row['detections']
            detected = True if max_conf > confidence_threshold else False

            if detected:
                res = 'detections'
            else:
                res = 'non_detections'

            display_name = '<b>Result type</b>: {}, <b>Image</b>: {}, <b>Max conf</b>: {}'.format(
                res, image_relative_path, max_conf)

            rendered_image_html_info = render_bounding_boxes(
                options.image_base_dir, image_relative_path, display_name,
                detections, res, detection_categories_map,
                classification_categories_map, options)
            if len(rendered_image_html_info) > 0:
                images_html[res].append(rendered_image_html_info)
                for det in detections:
                    if 'classifications' in det:
                        has_classification_info = True
                        top1_class = classification_categories_map[
                            det['classifications'][0][0]]
                        images_html['class_{}'.format(top1_class)].append(
                            rendered_image_html_info)

            count += 1

        # ...for each image in our sample

        print('{} images rendered'.format(count))

        # Prepare the individual html image files
        image_counts = prepare_html_subpages(images_html, output_dir)

        # Write index.HTML
        total_images = image_counts['detections'] + image_counts[
            'non_detections']
        index_page = """<html><body>
        <h2>Visualization of results</h2>
        <p>A sample of {} images, annotated with detections above {:.1%} confidence.</p>
        <h3>Sample images</h3>

        <a href="detections.html">Detections</a> ({}, {:.1%})<br/>
        <a href="non_detections.html">Non-detections</a> ({}, {:.1%})<br/>""".format(
            count, confidence_threshold, image_counts['detections'],
            image_counts['detections'] / total_images,
            image_counts['non_detections'],
            image_counts['non_detections'] / total_images)

        if has_classification_info:
            index_page += "<h3>Images of detected classes</h3>"
            index_page += "<p>The same image might appear under multiple classes if multiple species were detected.</p>"
        # Add links to all available classes
        for cname in sorted(classification_categories_map.values()):
            ccount = len(images_html['class_{}'.format(cname)])
            if ccount > 0:
                index_page += "<a href='class_{0}.html'>{0}</a> ({1})<br>".format(
                    cname, ccount)

        index_page += "</body></html>"
        output_html_file = os.path.join(output_dir, 'index.html')
        with open(output_html_file, 'w') as f:
            f.write(index_page)

        print('Finished writing html to {}'.format(output_html_file))

    # ...if we do/don't have ground truth

    ppresults = PostProcessingResults()
    ppresults.output_html_file = output_html_file
    return ppresults