def do_evaluation(submission_dir, ground_truth_dir): """ Evaluate a particular image sequence :param submission_dir: :param ground_truth_dir: :return: """ ground_truth = gt_loader.read_ground_truth(ground_truth_dir) detections = submission_loader.read_submission(submission_dir, expected_sequence_names=set( ground_truth.keys())) matches = gt_loader.match_sequences(ground_truth, detections) evaluator = PDQ() score = evaluator.score(matches) TP, FP, FN = evaluator.get_assignment_counts() avg_spatial_quality = evaluator.get_avg_spatial_score() avg_label_quality = evaluator.get_avg_label_score() avg_overall_quality = evaluator.get_avg_overall_quality_score() return { 'score': score * 100, 'avg_spatial': avg_spatial_quality, 'avg_label': avg_label_quality, 'avg_pPDQ': avg_overall_quality, 'TPs': TP, 'FPs': FP, 'FNs': FN }
def do_evaluation(submission_dir, ground_truth_dir, sequences=None, num_frames=-1, start_frame=0): """ Evaluate a particular image sequence :param submission_dir: location of the detections .json files (one for each sequence) :param ground_truth_dir: location of the ground-truth folders (one for each sequence). Each ground-truth folder must contain mask images (.png format) and a matching labels.json file. :param sequences: A whitelist of sequence ids to include, as integers :param num_frames: The number of frames to read from each sequence, default is all available. :param start_frame: The index of the first frame to read :return: Dictionary containing summary of all metrics used in competition leaderboard (score, average spatial quality, average label quality, average overall quality (avg_pPDQ), true positives, false positives, and false negatives) """ ground_truth = gt_loader.read_ground_truth(ground_truth_dir, sequences, start_index=start_frame, end_index=start_frame + num_frames) detections = submission_loader.read_submission( submission_dir, expected_sequence_names=set(ground_truth.keys()), start_index=start_frame, end_index=start_frame + num_frames) matches = gt_loader.match_sequences(ground_truth, detections) evaluator = PDQ() score = evaluator.score(matches) TP, FP, FN = evaluator.get_assignment_counts() avg_spatial_quality = evaluator.get_avg_spatial_score() avg_label_quality = evaluator.get_avg_label_score() avg_overall_quality = evaluator.get_avg_overall_quality_score() avg_fp_quality = evaluator.get_avg_fp_score() return { 'score': score * 100, 'avg_spatial': avg_spatial_quality, 'avg_label': avg_label_quality, 'avg_pPDQ': avg_overall_quality, 'avg_fp_quality': avg_fp_quality, 'TPs': TP, 'FPs': FP, 'FNs': FN }
def test_errors_if_missing_sequence(self): ground_truth = { '000000': make_generator([(1, )]), '000001': make_generator([(2, ), (3, )]), '000002': make_generator([(4, 5)]), '000003': make_generator([(6, )]), '000004': make_generator([(7, )]) } submission = { '000000': make_generator([['1']]), # '000001': make_generator([['2'], ['3']]), '000002': make_generator([['4', '5']]), # '000003': make_generator([['6']]), '000004': make_generator([['7']]) } with self.assertRaises(ValueError) as cm: next(gt_loader.match_sequences(ground_truth, submission)) msg = str(cm.exception) self.assertIn('000001', msg) self.assertIn('000003', msg)
def test_warns_if_extra_sequence(self): ground_truth = { '000000': make_generator([(1, )]), '000001': make_generator([(2, ), (3, )]), '000002': make_generator([(4, 5)]), '000003': make_generator([(6, )]), '000004': make_generator([(7, )]) } submission = { '000000': make_generator([['1']]), '000001': make_generator([['2'], ['3']]), '000002': make_generator([['4', '5']]), '000003': make_generator([['6']]), '000004': make_generator([['7']]), '000005': make_generator([['8']]) } with warnings.catch_warnings(record=True) as w: warnings.simplefilter('always') next(gt_loader.match_sequences(ground_truth, submission)) self.assertEqual(1, len(w)) self.assertTrue(issubclass(w[-1].category, UserWarning)) self.assertIn('000005', str(w[-1].message))
def test_returns_iterable_over_image_detections_from_sequences(self): ground_truth = { '000000': make_generator([(1, )]), '000001': make_generator([(2, ), (3, )]), '000002': make_generator([(4, 5)]), '000003': make_generator([(6, )]), '000004': make_generator([(7, )]) } submission = { '000000': make_generator([['1']]), '000001': make_generator([['2'], ['3']]), '000002': make_generator([['4', '5']]), '000003': make_generator([['6']]), '000004': make_generator([['7']]) } matches = gt_loader.match_sequences(ground_truth, submission) for gt, detect in matches: self.assertIsInstance(gt, list) self.assertIsInstance(detect, list) self.assertEqual(len(gt), len(detect)) for idx in range(len(gt)): self.assertEqual(gt[idx], int(detect[idx]))
def test_errors_if_sequences_arent_same_length(self): ground_truth = { '000000': make_generator([(1, )]), '000001': make_generator([(2, ), (3, )]), '000002': make_generator([(4, 5)]), '000003': make_generator([(6, )]), '000004': make_generator([(7, ), (99, )]) } submission = { '000000': make_generator([['1']]), '000001': make_generator([['2'], ['3']]), '000002': make_generator([['4', '5']]), '000003': make_generator([['6']]), '000004': make_generator([['7']]) } gen = gt_loader.match_sequences(ground_truth, submission) # first 5 images are ok for _ in range(5): next(gen) with self.assertRaises(ValueError) as cm: next(gen) msg = str(cm.exception) self.assertIn('000004', msg)