def do_evaluation(submission_dir, ground_truth_dir):
    """
    Evaluate a particular image sequence
    :param submission_dir:
    :param ground_truth_dir:
    :return:
    """
    ground_truth = gt_loader.read_ground_truth(ground_truth_dir)
    detections = submission_loader.read_submission(submission_dir,
                                                   expected_sequence_names=set(
                                                       ground_truth.keys()))
    matches = gt_loader.match_sequences(ground_truth, detections)
    evaluator = PDQ()
    score = evaluator.score(matches)
    TP, FP, FN = evaluator.get_assignment_counts()
    avg_spatial_quality = evaluator.get_avg_spatial_score()
    avg_label_quality = evaluator.get_avg_label_score()
    avg_overall_quality = evaluator.get_avg_overall_quality_score()
    return {
        'score': score * 100,
        'avg_spatial': avg_spatial_quality,
        'avg_label': avg_label_quality,
        'avg_pPDQ': avg_overall_quality,
        'TPs': TP,
        'FPs': FP,
        'FNs': FN
    }
예제 #2
0
def main(method, n_classes):

    if (method == 1):
        print("Extracting GT and Detections")
        param_sequence, len_sequences = gen_param_sequence()

        print("Calculating PDQ")

        # Get summary statistics (PDQ, avg_qualities)
        evaluator = PDQ(filter_gts=True, segment_mode=False, greedy_mode=False)
        pdq = evaluator.score(param_sequence)
        TP, FP, FN = evaluator.get_assignment_counts()
        avg_spatial_quality = evaluator.get_avg_spatial_score()
        avg_label_quality = evaluator.get_avg_label_score()
        avg_overall_quality = evaluator.get_avg_overall_quality_score()
        avg_fg_quality = evaluator.get_avg_fg_quality_score()
        avg_bg_quality = evaluator.get_avg_bg_quality_score()

        # Get the detection-wise and ground-truth-wise qualities and matches for PDQ and save them to file
        all_gt_eval_dicts = evaluator._gt_evals
        all_det_eval_dicts = evaluator._det_evals

        result = {
            "PDQ": pdq,
            "avg_pPDQ": avg_overall_quality,
            "avg_spatial": avg_spatial_quality,
            'avg_fg': avg_fg_quality,
            'avg_bg': avg_bg_quality,
            "avg_label": avg_label_quality,
            "TP": TP,
            "FP": FP,
            "FN": FN
        }

        return result

    #Calculate mAP
    if (method == 0):
        print("Calculating mAP")
        #print("Extracting GT and Detections")
        param_sequence, len_sequences = gen_param_sequence()
        mAP = coco_mAP(param_sequence, n_classes, use_heatmap=False)
        #print("MAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAP ", mAP)
        return mAP

    # Compile evaluation statistics into a single dictionary
    result = {
        "PDQ": pdq,
        "avg_pPDQ": avg_overall_quality,
        "avg_spatial": avg_spatial_quality,
        'avg_fg': avg_fg_quality,
        'avg_bg': avg_bg_quality,
        "avg_label": avg_label_quality,
        "TP": TP,
        "FP": FP,
        "FN": FN,
        'mAP': mAP
    }
예제 #3
0
def do_evaluation(submission_dir,
                  ground_truth_dir,
                  sequences=None,
                  num_frames=-1,
                  start_frame=0):
    """
    Evaluate a particular image sequence
    :param submission_dir: location of the detections .json files (one for each sequence)
    :param ground_truth_dir: location of the ground-truth folders (one for each sequence).
    Each ground-truth folder must contain mask images (.png format) and a matching labels.json file.
    :param sequences: A whitelist of sequence ids to include, as integers
    :param num_frames: The number of frames to read from each sequence, default is all available.
    :param start_frame: The index of the first frame to read
    :return: Dictionary containing summary of all metrics used in competition leaderboard
    (score, average spatial quality, average label quality, average overall quality (avg_pPDQ),
    true positives, false positives, and false negatives)
    """
    ground_truth = gt_loader.read_ground_truth(ground_truth_dir,
                                               sequences,
                                               start_index=start_frame,
                                               end_index=start_frame +
                                               num_frames)
    detections = submission_loader.read_submission(
        submission_dir,
        expected_sequence_names=set(ground_truth.keys()),
        start_index=start_frame,
        end_index=start_frame + num_frames)
    matches = gt_loader.match_sequences(ground_truth, detections)
    evaluator = PDQ()
    score = evaluator.score(matches)
    TP, FP, FN = evaluator.get_assignment_counts()
    avg_spatial_quality = evaluator.get_avg_spatial_score()
    avg_label_quality = evaluator.get_avg_label_score()
    avg_overall_quality = evaluator.get_avg_overall_quality_score()
    avg_fp_quality = evaluator.get_avg_fp_score()
    return {
        'score': score * 100,
        'avg_spatial': avg_spatial_quality,
        'avg_label': avg_label_quality,
        'avg_pPDQ': avg_overall_quality,
        'avg_fp_quality': avg_fp_quality,
        'TPs': TP,
        'FPs': FP,
        'FNs': FN
    }
예제 #4
0
def main():
    if not os.path.isdir(args.save_folder):
        os.makedirs(args.save_folder)

    print("Extracting GT and Detections")
    param_sequence, len_sequences = gen_param_sequence()

    print("Calculating PDQ")

    # Get summary statistics (PDQ, avg_qualities)
    evaluator = PDQ(filter_gts=(args.test_set == 'rvc1'),
                    segment_mode=args.segment_mode,
                    greedy_mode=args.greedy_mode)
    pdq = evaluator.score(param_sequence)
    TP, FP, FN = evaluator.get_assignment_counts()
    avg_spatial_quality = evaluator.get_avg_spatial_score()
    avg_label_quality = evaluator.get_avg_label_score()
    avg_overall_quality = evaluator.get_avg_overall_quality_score()
    avg_fg_quality = evaluator.get_avg_fg_quality_score()
    avg_bg_quality = evaluator.get_avg_bg_quality_score()

    # Get the detection-wise and ground-truth-wise qualities and matches for PDQ and save them to file
    all_gt_eval_dicts = evaluator._gt_evals
    all_det_eval_dicts = evaluator._det_evals

    # Calculate mAP
    print("Calculating mAP")
    # generate the parameter sequence again for new tests (generator does not hold onto data once used)
    print("Extracting GT and Detections")
    param_sequence, len_sequences = gen_param_sequence()
    if args.mAP_heatmap:
        mAP = coco_mAP(param_sequence, use_heatmap=True)
        print('mAP: {0}'.format(mAP))
    else:
        mAP = coco_mAP(param_sequence, use_heatmap=False)
        print('mAP: {0}'.format(mAP))

    # Calculate LRP
    print("Calculating LRP")
    # generate the parameter sequence again for new tests (generator does not hold onto data once used)
    print("Extracting GT and Detections")
    param_sequence, len_sequences = gen_param_sequence()
    # Use same BBox definition as would be used for mAP
    # Extract all moLRP statistics
    if args.mAP_heatmap:
        LRP_dict = coco_LRP(param_sequence, use_heatmap=True, full=True)
    else:
        LRP_dict = coco_LRP(param_sequence, use_heatmap=False, full=True)

    # Compile evaluation statistics into a single dictionary
    result = {
        "PDQ": pdq,
        "avg_pPDQ": avg_overall_quality,
        "avg_spatial": avg_spatial_quality,
        'avg_fg': avg_fg_quality,
        'avg_bg': avg_bg_quality,
        "avg_label": avg_label_quality,
        "TP": TP,
        "FP": FP,
        "FN": FN,
        'mAP': mAP,
        'moLRP': LRP_dict['moLRP'],
        'moLRPLoc': LRP_dict['moLRPLoc'],
        'moLRPFP': LRP_dict['moLRPFP'],
        'moLRPFN': LRP_dict['moLRPFN']
    }
    print("PDQ: {0:4f}\n"
          "mAP: {1:4f}\n"
          "avg_pPDQ:{2:4f}\n"
          "avg_spatial:{3:4f}\n"
          "avg_label:{4:4f}\n"
          "avg_foreground:{5:4f}\n"
          "avg_background:{6:4f}\n"
          "TP:{7}\nFP:{8}\nFN:{9}\n"
          "moLRP:{10:4f}\n"
          "moLRPLoc:{11:4f}\n"
          "moLRPFP:{12:4f}\n"
          "moLRPFN:{13:4f}\n".format(pdq, mAP, avg_overall_quality,
                                     avg_spatial_quality, avg_label_quality,
                                     avg_fg_quality, avg_bg_quality, TP, FP,
                                     FN, LRP_dict['moLRP'],
                                     LRP_dict['moLRPLoc'], LRP_dict['moLRPFP'],
                                     LRP_dict['moLRPFN']))

    # Save evaluation statistics to file
    with open(os.path.join(args.save_folder, 'scores.txt'),
              'w') as output_file:
        output_file.write("\n".join("{0}:{1}".format(k, v)
                                    for k, v in sorted(result.items())))

    # Save pairwise PDQ statistics to file for use in visualisation code (separate file for each sequence)
    prev_idx = 0
    for idx, len_sequence in enumerate(len_sequences):
        seq_gt_eval_dicts = all_gt_eval_dicts[prev_idx:prev_idx + len_sequence]
        seq_det_eval_dicts = all_det_eval_dicts[prev_idx:prev_idx +
                                                len_sequence]
        prev_idx += len_sequence

        with open(
                os.path.join(args.save_folder,
                             'gt_eval_stats_{:02d}.json'.format(idx)),
                'w') as f:
            json.dump(seq_gt_eval_dicts, f)
        with open(
                os.path.join(args.save_folder,
                             'det_eval_stats_{:02d}.json').format(idx),
                'w') as f:
            json.dump(seq_det_eval_dicts, f)