Example #1
0
def find_best_permutation_prec_recall(gt,
                                      output,
                                      acceptable_window=np.pi / 18):
    """
    Finds the best permutation for evaluation.
    Then uses that to find the precision and recall
    
    Inputs:
        gt, output: list of sources. lengths may differ

    Returns: Permutation that matches outputs to gt along with tp, fn and fp
    """
    n = max(len(gt), len(output))

    if len(gt) > len(output):
        output += [np.inf] * (n - len(output))
    elif len(output) > len(gt):
        gt += [np.inf] * (n - len(gt))

    best_perm = None
    best_inliers = -1
    for perm in itertools.permutations(range(n)):
        curr_inliers = 0
        for idx1, idx2 in enumerate(perm):
            if angular_distance(gt[idx1], output[idx2]) < acceptable_window:
                curr_inliers += 1

        if curr_inliers > best_inliers:
            best_inliers = curr_inliers
            best_perm = list(perm)

    return localization_precision_recall(best_perm, gt, output,
                                         acceptable_window)
Example #2
0
def localization_precision_recall(permutation,
                                  gt,
                                  output,
                                  acceptable_window=np.pi / 18):
    tp, fn, fp = 0, 0, 0
    for idx1, idx2 in enumerate(permutation):
        if angular_distance(gt[idx1], output[idx2]) < acceptable_window:
            tp += 1
        elif gt[idx1] == np.inf:
            fp += 1
        elif output[idx2] == np.inf:
            fn += 1
        else:
            fn += 1
            fp += 1

    return permutation, (tp, fn, fp)
def nms(candidate_voices, nms_cutoff):
    """
    Runs non-max suppression on the candidate voices
    """
    final_proposals = []
    initial_proposals = candidate_voices

    while len(initial_proposals) > 0:
        new_initial_proposals = []
        sorted_candidates = sorted(initial_proposals,
                                   key=lambda x: x[1],
                                   reverse=True)

        # Choose the loudest voice
        best_candidate_voice = sorted_candidates[0]
        final_proposals.append(best_candidate_voice)
        sorted_candidates.pop(0)

        # See if any of the rest should be removed
        for candidate_voice in sorted_candidates:
            different_locations = utils.angular_distance(
                candidate_voice.angle, best_candidate_voice.angle) > NMS_RADIUS

            # different_content = abs(
            #     candidate_voice.data -
            #     best_candidate_voice.data).mean() > nms_cutoff

            different_content = si_sdr(
                candidate_voice.data[0],
                best_candidate_voice.data[0]) < nms_cutoff

            if different_locations or different_content:
                new_initial_proposals.append(candidate_voice)

        initial_proposals = new_initial_proposals

    return final_proposals
Example #4
0
    def evaluate_dir(idx):
        if args.debug:
            curr_writing_dir = "{:05d}".format(idx)
            if not os.path.exists(curr_writing_dir):
                os.makedirs(curr_writing_dir)
            args.writing_dir = curr_writing_dir

        curr_dir = all_dirs[idx]

        # Loads the data
        mixed_data, gt = get_items(curr_dir, args)

        # Prevents CUDA out of memory
        gpu_lock.acquire()
        if args.prec_recall:
            # Case where we don't know the number of sources
            candidate_voices = run_separation(mixed_data, model, args)

        # Case where we know the number of sources
        else:
            # Normal run
            if not args.oracle_position:
                candidate_voices = run_separation(mixed_data, model, args,
                                                  0.005)
            # In order to compute SDR or angle error, the number of outputs must match gt
            # We set a very low threshold to ensure we get the correct number of outputs
            if args.oracle_position or len(candidate_voices) < len(gt):
                print("Had to go again\n")
                candidate_voices = run_separation(mixed_data, model, args,
                                                  0.000001)

            # Use the GT positions to find the best sources
            if args.oracle_position:
                trimmed_voices = []
                for gt_idx in range(args.n_voices):
                    best_idx = np.argmin(
                        np.array([
                            angular_distance(x.angle, gt[gt_idx].angle)
                            for x in candidate_voices
                        ]))
                    trimmed_voices.append(candidate_voices[best_idx])
                candidate_voices = trimmed_voices

            # Take the top N voices
            else:
                candidate_voices = candidate_voices[:args.n_voices]
            if len(candidate_voices) != len(gt):
                print(
                    f"Not enough outputs for dir {curr_dir}. Lower threshold to evaluate."
                )
                return

        if args.debug:
            sf.write(os.path.join(args.writing_dir, "mixed.wav"),
                     mixed_data[0], args.sr)
            for voice in candidate_voices:
                fname = "out_angle{:.2f}.wav".format(voice.angle * 180 / np.pi)
                sf.write(os.path.join(args.writing_dir, fname), voice.data[0],
                         args.sr)

        gpu_lock.release()
        curr_angle_errors = []
        curr_input_sdr = []
        curr_output_sdr = []

        best_permutation, (tp, fn, fp) = find_best_permutation_prec_recall(
            [x.angle for x in gt], [x.angle for x in candidate_voices])

        if args.prec_recall:
            all_tp.append(tp)
            all_fn.append(fn)
            all_fp.append(fp)

        # Evaluate SDR and Angular Error
        else:
            for gt_idx, output_idx in enumerate(best_permutation):
                angle_error = angular_distance(
                    candidate_voices[output_idx].angle, gt[gt_idx].angle)
                # print(angle_error)
                curr_angle_errors.append(angle_error)

                # To speed up we only evaluate channel 0. For rigorous results
                # set that to false
                input_sdr = compute_sdr(gt[gt_idx].data,
                                        mixed_data,
                                        single_channel=True)
                output_sdr = compute_sdr(gt[gt_idx].data,
                                         candidate_voices[output_idx].data,
                                         single_channel=True)

                curr_input_sdr.append(input_sdr)
                curr_output_sdr.append(output_sdr)

            # print(curr_input_sdr)
            # print(curr_output_sdr)

            all_angle_errors[idx] = curr_angle_errors
            all_input_sdr[idx] = curr_input_sdr
            all_output_sdr[idx] = curr_output_sdr