Exemple #1
0
    def test_greedy_assignment(self):
        gt_big_mask = self.square_mask.copy()
        gt_small_mask = np.zeros(gt_big_mask.shape, dtype=gt_big_mask.dtype)
        gt_small_mask[900:1101, 900:1101] = True

        gt_big = GroundTruthInstance(gt_big_mask, 0)
        gt_small = GroundTruthInstance(gt_small_mask, 0)
        gts = [gt_big, gt_small]
        # Big Det
        det1 = PBoxDetInst(
            self.square_label_list, [700, 700, 1300, 1300],
            [[[10000, 0], [0, 10000]], [[10000, 0], [0, 10000]]])
        # Small Det
        det2 = PBoxDetInst(
            self.square_label_list, [800, 800, 1200, 1200],
            [[[10000, 0], [0, 10000]], [[10000, 0], [0, 10000]]])

        dets = [det1, det2]

        evaluator = PDQ(greedy_mode=True)
        evaluator.score([(gts, dets)])
        det_evals = evaluator._det_evals
        for img_det_evals in det_evals:
            for det_eval in img_det_evals:
                # Assume that big detection should be matched to small gt and small det to big gt
                self.assertNotEqual(det_eval['det_id'], det_eval['gt_id'])
Exemple #2
0
    def test_detection_no_gt(self):
        detections = [BBoxDetInst(self.default_label_list, self.gt_box)]
        gts = []
        evaluator = PDQ()
        score = evaluator.score([(gts, detections)])

        self.assertEqual(score, 0)
Exemple #3
0
    def test_half_position_confidence(self):
        detections = [
            BBoxDetInst(self.square_label_list, self.square_gt_box, 0.5)
        ]
        evaluator = PDQ()
        score = evaluator.score([(self.square_gt, detections)])

        self.assertAlmostEqual(np.sqrt(0.5), score, 4)
Exemple #4
0
    def test_correct_second_detection(self):
        gts = [val for val in self.square_gt]
        detections = [BBoxDetInst(self.default_label_list, [0, 0, 10, 10]),
                      BBoxDetInst(self.default_label_list, self.gt_box)]

        evaluator = PDQ()
        score = evaluator.score([(gts, detections)])

        self.assertAlmostEqual(score, 0.5)
Exemple #5
0
    def test_multiple_detections(self):
        ten_detections = [
            BBoxDetInst(self.square_label_list, self.square_gt_box)
            for _ in range(10)
        ]
        evaluator = PDQ()
        score = evaluator.score([(self.square_gt, ten_detections)])

        self.assertAlmostEqual(score, 0.1)
Exemple #6
0
    def test_no_detections_for_image(self):
        gts1 = [val for val in self.square_gt]
        gts2 = [GroundTruthInstance(self.square_mask, 0, 1, 1)]
        dets1 = [BBoxDetInst(self.default_label_list, self.gt_box)]
        dets2 = []
        evaluator = PDQ()
        score = evaluator.score([(gts1, dets1), (gts2, dets2)])

        self.assertAlmostEqual(score, 0.5)
Exemple #7
0
    def test_run_score_multiple_times_on_single_pdq_instance(self):
        two_detections = [BBoxDetInst(self.default_label_list, self.gt_box),
                          BBoxDetInst(self.default_label_list, self.gt_box)]
        one_detection = [BBoxDetInst(self.default_label_list, self.gt_box)]

        evaluator = PDQ()
        score_two = evaluator.score([(self.square_gt, two_detections)])
        score_one = evaluator.score([(self.square_gt, one_detection)])

        self.assertAlmostEqual(score_two, 0.5)
        self.assertAlmostEqual(score_one, 1.0)
Exemple #8
0
    def test_no_detections_for_image_with_too_small_gt(self):
        gts1 = [val for val in self.square_gt]
        small_mask = np.zeros(self.img_size, dtype=np.bool)
        small_mask[500:504, 500:501] = True
        gts2 = [GroundTruthInstance(small_mask, 0, 1, 1)]
        dets1 = [BBoxDetInst(self.default_label_list, self.gt_box)]
        dets2 = []
        evaluator = PDQ()
        score = evaluator.score([(gts1, dets1), (gts2, dets2)])

        self.assertAlmostEqual(score, 1.0)
Exemple #9
0
    def test_detect_500_extra_pixels(self):
        det_box = [val for val in self.gt_box]
        det_box[2] += 1
        detections = [BBoxDetInst(self.default_label_list, det_box)]
        evaluator = PDQ()
        score = evaluator.score([(self.square_gt, detections)])

        expected_spatial_quality = np.exp((_MAX_LOSS*500)/(500*500))

        expected_gmean = np.sqrt(expected_spatial_quality)

        self.assertAlmostEqual(expected_gmean, score, 4)
Exemple #10
0
def main(method, n_classes):

    if (method == 1):
        print("Extracting GT and Detections")
        param_sequence, len_sequences = gen_param_sequence()

        print("Calculating PDQ")

        # Get summary statistics (PDQ, avg_qualities)
        evaluator = PDQ(filter_gts=True, segment_mode=False, greedy_mode=False)
        pdq = evaluator.score(param_sequence)
        TP, FP, FN = evaluator.get_assignment_counts()
        avg_spatial_quality = evaluator.get_avg_spatial_score()
        avg_label_quality = evaluator.get_avg_label_score()
        avg_overall_quality = evaluator.get_avg_overall_quality_score()
        avg_fg_quality = evaluator.get_avg_fg_quality_score()
        avg_bg_quality = evaluator.get_avg_bg_quality_score()

        # Get the detection-wise and ground-truth-wise qualities and matches for PDQ and save them to file
        all_gt_eval_dicts = evaluator._gt_evals
        all_det_eval_dicts = evaluator._det_evals

        result = {
            "PDQ": pdq,
            "avg_pPDQ": avg_overall_quality,
            "avg_spatial": avg_spatial_quality,
            'avg_fg': avg_fg_quality,
            'avg_bg': avg_bg_quality,
            "avg_label": avg_label_quality,
            "TP": TP,
            "FP": FP,
            "FN": FN
        }

        return result

    #Calculate mAP
    if (method == 0):
        print("Calculating mAP")
        #print("Extracting GT and Detections")
        param_sequence, len_sequences = gen_param_sequence()
        mAP = coco_mAP(param_sequence, n_classes, use_heatmap=False)
        #print("MAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAP ", mAP)
        return mAP

    # Compile evaluation statistics into a single dictionary
    result = {
        "PDQ": pdq,
        "avg_pPDQ": avg_overall_quality,
        "avg_spatial": avg_spatial_quality,
        'avg_fg': avg_fg_quality,
        'avg_bg': avg_bg_quality,
        "avg_label": avg_label_quality,
        "TP": TP,
        "FP": FP,
        "FN": FN,
        'mAP': mAP
    }
Exemple #11
0
    def test_multiple_missed_gts_too_small(self):

        gts = [val for val in self.square_gt]
        for i in range(9):
            # Create small 2x2 boxes which are missed around an edge of the image (buffer of 2 pixels)
            new_gt_mask = np.zeros(gts[0].segmentation_mask.shape, gts[0].segmentation_mask.dtype)
            new_gt_mask[2:4, 2 + i * 4:4 + i * 4] = np.amax(gts[0].segmentation_mask)
            gts.append(GroundTruthInstance(new_gt_mask, 0, 0, i+1))
        detections = [BBoxDetInst(self.default_label_list, self.gt_box)]
        evaluator = PDQ()
        score = evaluator.score([(gts, detections)])

        self.assertAlmostEqual(score, 1.0)
Exemple #12
0
    def test_cross_gt_detected_by_perfect_box_in_segment_mode(self):
        detections = [BBoxDetInst(self.cross_label_list, self.cross_gt_box)]
        evaluator = PDQ(segment_mode=True)
        score = evaluator.score([(self.cross_gt, detections)])

        expected_num_missed_pixels = np.sum(
            np.logical_xor(self.square_mask, self.cross_mask))
        expected_spatial_quality = np.exp(
            (_MAX_LOSS * expected_num_missed_pixels) / np.sum(self.cross_mask))

        expected_gmean = np.sqrt(expected_spatial_quality)

        self.assertAlmostEqual(score, expected_gmean, 4)
Exemple #13
0
    def test_no_detections_for_image_with_small_and_big_gt_with_filtering(
            self):
        gts1 = [val for val in self.square_gt]
        small_mask = np.zeros(self.img_size, dtype=np.bool)
        small_mask[500:504, 500:501] = True
        gts2 = [
            GroundTruthInstance(self.square_mask, 0),
            GroundTruthInstance(small_mask, 0)
        ]
        dets1 = [BBoxDetInst(self.square_label_list, self.square_gt_box)]
        dets2 = []
        evaluator = PDQ(filter_gts=True)
        score = evaluator.score([(gts1, dets1), (gts2, dets2)])

        self.assertAlmostEqual(score, 0.5)
Exemple #14
0
    def test_no_overlap_box(self):
        det_box = [val for val in self.gt_box]
        box_width = (self.gt_box[2]+1) - self.gt_box[0]
        det_box[0] += box_width
        det_box[2] += box_width

        detections = [BBoxDetInst(self.default_label_list, det_box)]
        evaluator = PDQ()
        score = evaluator.score([(self.square_gt, detections)])

        expected_spatial_quality = 0

        expected_gmean = np.sqrt(expected_spatial_quality)

        self.assertAlmostEqual(expected_gmean, score, 4)
def do_evaluation(submission_dir, ground_truth_dir):
    """
    Evaluate a particular image sequence
    :param submission_dir:
    :param ground_truth_dir:
    :return:
    """
    ground_truth = gt_loader.read_ground_truth(ground_truth_dir)
    detections = submission_loader.read_submission(submission_dir,
                                                   expected_sequence_names=set(
                                                       ground_truth.keys()))
    matches = gt_loader.match_sequences(ground_truth, detections)
    evaluator = PDQ()
    score = evaluator.score(matches)
    TP, FP, FN = evaluator.get_assignment_counts()
    avg_spatial_quality = evaluator.get_avg_spatial_score()
    avg_label_quality = evaluator.get_avg_label_score()
    avg_overall_quality = evaluator.get_avg_overall_quality_score()
    return {
        'score': score * 100,
        'avg_spatial': avg_spatial_quality,
        'avg_label': avg_label_quality,
        'avg_pPDQ': avg_overall_quality,
        'TPs': TP,
        'FPs': FP,
        'FNs': FN
    }
Exemple #16
0
    def test_missed_gts_and_unmatched_detections(self):
        gts = [val for val in self.square_gt]
        for i in range(10):
            # Create small 11x11 boxes which are missed around an edge of the image (buffer of 2 pixels)
            new_gt_mask = np.zeros(gts[0].segmentation_mask.shape,
                                   gts[0].segmentation_mask.dtype)
            new_gt_mask[2:14, 2 + i * 14:14 + i * 14] = np.amax(
                gts[0].segmentation_mask)
            gts.append(GroundTruthInstance(new_gt_mask, 0))

        detections = [
            BBoxDetInst(self.square_label_list, self.square_gt_box)
            for _ in range(10)
        ]

        evaluator = PDQ()
        score = evaluator.score([(gts, detections)])

        self.assertAlmostEqual(score, 1 / 20.)
Exemple #17
0
def do_evaluation(submission_dir,
                  ground_truth_dir,
                  sequences=None,
                  num_frames=-1,
                  start_frame=0):
    """
    Evaluate a particular image sequence
    :param submission_dir: location of the detections .json files (one for each sequence)
    :param ground_truth_dir: location of the ground-truth folders (one for each sequence).
    Each ground-truth folder must contain mask images (.png format) and a matching labels.json file.
    :param sequences: A whitelist of sequence ids to include, as integers
    :param num_frames: The number of frames to read from each sequence, default is all available.
    :param start_frame: The index of the first frame to read
    :return: Dictionary containing summary of all metrics used in competition leaderboard
    (score, average spatial quality, average label quality, average overall quality (avg_pPDQ),
    true positives, false positives, and false negatives)
    """
    ground_truth = gt_loader.read_ground_truth(ground_truth_dir,
                                               sequences,
                                               start_index=start_frame,
                                               end_index=start_frame +
                                               num_frames)
    detections = submission_loader.read_submission(
        submission_dir,
        expected_sequence_names=set(ground_truth.keys()),
        start_index=start_frame,
        end_index=start_frame + num_frames)
    matches = gt_loader.match_sequences(ground_truth, detections)
    evaluator = PDQ()
    score = evaluator.score(matches)
    TP, FP, FN = evaluator.get_assignment_counts()
    avg_spatial_quality = evaluator.get_avg_spatial_score()
    avg_label_quality = evaluator.get_avg_label_score()
    avg_overall_quality = evaluator.get_avg_overall_quality_score()
    avg_fp_quality = evaluator.get_avg_fp_score()
    return {
        'score': score * 100,
        'avg_spatial': avg_spatial_quality,
        'avg_label': avg_label_quality,
        'avg_pPDQ': avg_overall_quality,
        'avg_fp_quality': avg_fp_quality,
        'TPs': TP,
        'FPs': FP,
        'FNs': FN
    }
Exemple #18
0
def main():
    if not os.path.isdir(args.save_folder):
        os.makedirs(args.save_folder)

    print("Extracting GT and Detections")
    param_sequence, len_sequences = gen_param_sequence()

    print("Calculating PDQ")

    # Get summary statistics (PDQ, avg_qualities)
    evaluator = PDQ(filter_gts=(args.test_set == 'rvc1'),
                    segment_mode=args.segment_mode,
                    greedy_mode=args.greedy_mode)
    pdq = evaluator.score(param_sequence)
    TP, FP, FN = evaluator.get_assignment_counts()
    avg_spatial_quality = evaluator.get_avg_spatial_score()
    avg_label_quality = evaluator.get_avg_label_score()
    avg_overall_quality = evaluator.get_avg_overall_quality_score()
    avg_fg_quality = evaluator.get_avg_fg_quality_score()
    avg_bg_quality = evaluator.get_avg_bg_quality_score()

    # Get the detection-wise and ground-truth-wise qualities and matches for PDQ and save them to file
    all_gt_eval_dicts = evaluator._gt_evals
    all_det_eval_dicts = evaluator._det_evals

    # Calculate mAP
    print("Calculating mAP")
    # generate the parameter sequence again for new tests (generator does not hold onto data once used)
    print("Extracting GT and Detections")
    param_sequence, len_sequences = gen_param_sequence()
    if args.mAP_heatmap:
        mAP = coco_mAP(param_sequence, use_heatmap=True)
        print('mAP: {0}'.format(mAP))
    else:
        mAP = coco_mAP(param_sequence, use_heatmap=False)
        print('mAP: {0}'.format(mAP))

    # Calculate LRP
    print("Calculating LRP")
    # generate the parameter sequence again for new tests (generator does not hold onto data once used)
    print("Extracting GT and Detections")
    param_sequence, len_sequences = gen_param_sequence()
    # Use same BBox definition as would be used for mAP
    # Extract all moLRP statistics
    if args.mAP_heatmap:
        LRP_dict = coco_LRP(param_sequence, use_heatmap=True, full=True)
    else:
        LRP_dict = coco_LRP(param_sequence, use_heatmap=False, full=True)

    # Compile evaluation statistics into a single dictionary
    result = {
        "PDQ": pdq,
        "avg_pPDQ": avg_overall_quality,
        "avg_spatial": avg_spatial_quality,
        'avg_fg': avg_fg_quality,
        'avg_bg': avg_bg_quality,
        "avg_label": avg_label_quality,
        "TP": TP,
        "FP": FP,
        "FN": FN,
        'mAP': mAP,
        'moLRP': LRP_dict['moLRP'],
        'moLRPLoc': LRP_dict['moLRPLoc'],
        'moLRPFP': LRP_dict['moLRPFP'],
        'moLRPFN': LRP_dict['moLRPFN']
    }
    print("PDQ: {0:4f}\n"
          "mAP: {1:4f}\n"
          "avg_pPDQ:{2:4f}\n"
          "avg_spatial:{3:4f}\n"
          "avg_label:{4:4f}\n"
          "avg_foreground:{5:4f}\n"
          "avg_background:{6:4f}\n"
          "TP:{7}\nFP:{8}\nFN:{9}\n"
          "moLRP:{10:4f}\n"
          "moLRPLoc:{11:4f}\n"
          "moLRPFP:{12:4f}\n"
          "moLRPFN:{13:4f}\n".format(pdq, mAP, avg_overall_quality,
                                     avg_spatial_quality, avg_label_quality,
                                     avg_fg_quality, avg_bg_quality, TP, FP,
                                     FN, LRP_dict['moLRP'],
                                     LRP_dict['moLRPLoc'], LRP_dict['moLRPFP'],
                                     LRP_dict['moLRPFN']))

    # Save evaluation statistics to file
    with open(os.path.join(args.save_folder, 'scores.txt'),
              'w') as output_file:
        output_file.write("\n".join("{0}:{1}".format(k, v)
                                    for k, v in sorted(result.items())))

    # Save pairwise PDQ statistics to file for use in visualisation code (separate file for each sequence)
    prev_idx = 0
    for idx, len_sequence in enumerate(len_sequences):
        seq_gt_eval_dicts = all_gt_eval_dicts[prev_idx:prev_idx + len_sequence]
        seq_det_eval_dicts = all_det_eval_dicts[prev_idx:prev_idx +
                                                len_sequence]
        prev_idx += len_sequence

        with open(
                os.path.join(args.save_folder,
                             'gt_eval_stats_{:02d}.json'.format(idx)),
                'w') as f:
            json.dump(seq_gt_eval_dicts, f)
        with open(
                os.path.join(args.save_folder,
                             'det_eval_stats_{:02d}.json').format(idx),
                'w') as f:
            json.dump(seq_det_eval_dicts, f)
Exemple #19
0
    def test_half_position_and_label_confidences(self):
        detections = [BBoxDetInst([0.5, 0.5], self.gt_box, 0.5)]
        evaluator = PDQ()
        score = evaluator.score([(self.square_gt, detections)])

        self.assertAlmostEqual(0.5, score, 4)
Exemple #20
0
    def test_no_detection(self):
        detections = []
        evaluator = PDQ()
        score = evaluator.score([(self.square_gt, detections)])

        self.assertEqual(score, 0)
Exemple #21
0
    def test_cross_gt_detected_by_perfect_box_in_non_segment_mode(self):
        detections = [BBoxDetInst(self.cross_label_list, self.cross_gt_box)]
        evaluator = PDQ()
        score = evaluator.score([(self.cross_gt, detections)])

        self.assertAlmostEqual(score, 1, 4)
Exemple #22
0
    def test_perfect_bbox(self):
        detections = [BBoxDetInst(self.default_label_list, self.gt_box)]
        evaluator = PDQ()
        score = evaluator.score([(self.square_gt, detections)])

        self.assertAlmostEqual(score, 1, 4)