def perf(devel_scores, test_scores, threshold_func): from bob.measure import farfrr devel_attack_scores = devel_scores[1][:, 0] devel_real_scores = devel_scores[0][:, 0] test_attack_scores = test_scores[1][:, 0] test_real_scores = test_scores[0][:, 0] devel_real = devel_real_scores.shape[0] devel_attack = devel_attack_scores.shape[0] test_real = test_real_scores.shape[0] test_attack = test_attack_scores.shape[0] thres = threshold_func(devel_attack_scores, devel_real_scores) devel_far, devel_frr = farfrr(devel_attack_scores, devel_real_scores, thres) test_far, test_frr = farfrr(test_attack_scores, test_real_scores, thres) retval = {'threshold': '%.4f' % thres} d = make_dict('devel-', devel_far, devel_attack, devel_frr, devel_real) retval.update(d) d = make_dict('test-', test_far, test_attack, test_frr, test_real) retval.update(d) return retval, thres
def perf_hter(test_scores, devel_scores, threshold_func): """Computes a performance table and returns the HTER for the test and development set, as well as a formatted text with the results and the value of the threshold obtained for the given threshold function Keyword parameters: test_scores - the scores of the samples in the test set devel_scores - the scores of the samples in the development set threshold function - the type of threshold """ from bob.measure import farfrr devel_attack_scores = devel_scores[1][:,0] devel_real_scores = devel_scores[0][:,0] test_attack_scores = test_scores[1][:,0] test_real_scores = test_scores[0][:,0] devel_real = devel_real_scores.shape[0] devel_attack = devel_attack_scores.shape[0] test_real = test_real_scores.shape[0] test_attack = test_attack_scores.shape[0] thres = threshold_func(devel_attack_scores, devel_real_scores) devel_far, devel_frr = farfrr(devel_attack_scores, devel_real_scores, thres) test_far, test_frr = farfrr(test_attack_scores, test_real_scores, thres) devel_hter = 50 * (devel_far + devel_frr) test_hter = 50 * (test_far + test_frr) devel_text = " d: FAR %.2f%% / FRR %.2f%% / HTER %.2f%% " % (100*devel_far, 100*devel_frr, devel_hter) test_text = " t: FAR %.2f%% / FRR %.2f%% / HTER %.2f%% " % (100*test_far, 100*test_frr, test_hter) return (test_hter, devel_hter), (test_text, devel_text), thres
def perf(devel_scores, test_scores, threshold_func): from bob.measure import farfrr devel_attack_scores = devel_scores[1][:, 0] devel_real_scores = devel_scores[0][:, 0] test_attack_scores = test_scores[1][:, 0] test_real_scores = test_scores[0][:, 0] devel_real = devel_real_scores.shape[0] devel_attack = devel_attack_scores.shape[0] test_real = test_real_scores.shape[0] test_attack = test_attack_scores.shape[0] thres = threshold_func(devel_attack_scores, devel_real_scores) devel_far, devel_frr = farfrr(devel_attack_scores, devel_real_scores, thres) test_far, test_frr = farfrr(test_attack_scores, test_real_scores, thres) retval = [] retval.append(" threshold: %.4f" % thres) retval.append( pline("dev ", devel_far, devel_attack, devel_frr, devel_real)) retval.append(pline("test", test_far, test_attack, test_frr, test_real)) return retval, thres
def perf_hter(test_scores, devel_scores, threshold_func): """Computes a performance table and returns the HTER for the test and development set, as well as a formatted text with the results and the value of the threshold obtained for the given threshold function Keyword parameters: test_scores - the scores of the samples in the test set devel_scores - the scores of the samples in the development set threshold function - the type of threshold """ from bob.measure import farfrr devel_attack_scores = devel_scores[1][:, 0] devel_real_scores = devel_scores[0][:, 0] test_attack_scores = test_scores[1][:, 0] test_real_scores = test_scores[0][:, 0] devel_real = devel_real_scores.shape[0] devel_attack = devel_attack_scores.shape[0] test_real = test_real_scores.shape[0] test_attack = test_attack_scores.shape[0] thres = threshold_func(devel_attack_scores, devel_real_scores) devel_far, devel_frr = farfrr(devel_attack_scores, devel_real_scores, thres) test_far, test_frr = farfrr(test_attack_scores, test_real_scores, thres) devel_hter = 50 * (devel_far + devel_frr) test_hter = 50 * (test_far + test_frr) devel_text = " d: FAR %.2f%% / FRR %.2f%% / HTER %.2f%% " % ( 100 * devel_far, 100 * devel_frr, devel_hter) test_text = " t: FAR %.2f%% / FRR %.2f%% / HTER %.2f%% " % ( 100 * test_far, 100 * test_frr, test_hter) return (test_hter, devel_hter), (test_text, devel_text), thres
def perf(devel_scores, test_scores, threshold_func): from bob.measure import farfrr devel_attack_scores = devel_scores[1][:,0] devel_real_scores = devel_scores[0][:,0] test_attack_scores = test_scores[1][:,0] test_real_scores = test_scores[0][:,0] devel_real = devel_real_scores.shape[0] devel_attack = devel_attack_scores.shape[0] test_real = test_real_scores.shape[0] test_attack = test_attack_scores.shape[0] thres = threshold_func(devel_attack_scores, devel_real_scores) devel_far, devel_frr = farfrr(devel_attack_scores, devel_real_scores, thres) test_far, test_frr = farfrr(test_attack_scores, test_real_scores, thres) retval = {'threshold': '%.4f' % thres} d = make_dict('devel-', devel_far, devel_attack, devel_frr, devel_real) retval.update(d) d = make_dict('test-', test_far, test_attack, test_frr, test_real) retval.update(d) return retval, thres
def perf_hter_thorough(test_scores, devel_scores, threshold_func): """Computes a performance table and returns the HTER for the test and development set, as well as a formatted text with the results and the value of the threshold obtained for the given threshold function Keyword parameters: test_scores - the scores of the samples in the test set (tuple) devel_scores - the scores of the samples in the development set (tuple) threshold function - the type of threshold """ from bob.measure import farfrr devel_attack_scores = devel_scores[1] devel_real_scores = devel_scores[0] test_attack_scores = test_scores[1] test_real_scores = test_scores[0] devel_attack_scores = devel_attack_scores.reshape([ len(devel_attack_scores) ]) # all the scores whould be arrays with shape (n,) devel_real_scores = devel_real_scores.reshape([len(devel_real_scores)]) test_attack_scores = test_attack_scores.reshape([len(test_attack_scores)]) test_real_scores = test_real_scores.reshape([len(test_real_scores)]) thres = threshold_func(devel_attack_scores, devel_real_scores) devel_far, devel_frr = farfrr(devel_attack_scores, devel_real_scores, thres) test_far, test_frr = farfrr(test_attack_scores, test_real_scores, thres) return (devel_far, devel_frr), (test_far, test_frr)
def perf_hter_thorough(test_scores, devel_scores, threshold_func): """Computes a performance table and returns the HTER for the test and development set, as well as a formatted text with the results and the value of the threshold obtained for the given threshold function Keyword parameters: test_scores - the scores of the samples in the test set (tuple) devel_scores - the scores of the samples in the development set (tuple) threshold function - the type of threshold """ from bob.measure import farfrr devel_attack_scores = devel_scores[1] devel_real_scores = devel_scores[0] test_attack_scores = test_scores[1] test_real_scores = test_scores[0] devel_attack_scores = devel_attack_scores.reshape( [len(devel_attack_scores)] ) # all the scores whould be arrays with shape (n,) devel_real_scores = devel_real_scores.reshape([len(devel_real_scores)]) test_attack_scores = test_attack_scores.reshape([len(test_attack_scores)]) test_real_scores = test_real_scores.reshape([len(test_real_scores)]) thres = threshold_func(devel_attack_scores, devel_real_scores) devel_far, devel_frr = farfrr(devel_attack_scores, devel_real_scores, thres) test_far, test_frr = farfrr(test_attack_scores, test_real_scores, thres) return (devel_far, devel_frr), (test_far, test_frr)
def test_basic_ratios(): from . import farfrr, precision_recall, f_score # We test the basic functionaly on FAR and FRR calculation. The first # example is separable, with a separation threshold of about 3.0 positives = bob.io.base.load(F('linsep-positives.hdf5')) negatives = bob.io.base.load(F('linsep-negatives.hdf5')) minimum = min(positives.min(), negatives.min()) maximum = max(positives.max(), negatives.max()) # If we take a threshold on the minimum, the FAR should be 1.0 and the FRR # should be 0.0. Precision should be 0.5, recall should be 1.0 far, frr = farfrr(negatives, positives, minimum - 0.1) nose.tools.eq_(far, 1.0) nose.tools.eq_(frr, 0.0) prec, recall = precision_recall(negatives, positives, minimum - 0.1) nose.tools.eq_(prec, 0.5) nose.tools.eq_(recall, 1.0) # Similarly, if we take a threshold on the maximum, the FRR should be 1.0 # while the FAR should be 0.0. Both precision and recall should be 0.0. far, frr = farfrr(negatives, positives, maximum + 0.1) nose.tools.eq_(far, 0.0) nose.tools.eq_(frr, 1.0) prec, recall = precision_recall(negatives, positives, maximum + 0.1) nose.tools.eq_(prec, 0.0) nose.tools.eq_(recall, 0.0) # If we choose the appropriate threshold, we should get 0.0 for both FAR # and FRR. Precision will be 1.0, recall will be 1.0 far, frr = farfrr(negatives, positives, 3.0) nose.tools.eq_(far, 0.0) nose.tools.eq_(frr, 0.0) prec, recall = precision_recall(negatives, positives, 3.0) nose.tools.eq_(prec, 1.0) nose.tools.eq_(recall, 1.0) # Testing the values of F-score depending on different choices of the # threshold f_score_ = f_score(negatives, positives, minimum - 0.1) nose.tools.assert_almost_equal(f_score_, 0.66666667) f_score_ = f_score(negatives, positives, minimum - 0.1, 2) nose.tools.assert_almost_equal(f_score_, 0.83333333) f_score_ = f_score(negatives, positives, maximum + 0.1) nose.tools.eq_(f_score_, 0.0) f_score_ = f_score(negatives, positives, maximum + 0.1, 2) nose.tools.eq_(f_score_, 0.0) f_score_ = f_score(negatives, positives, 3.0) nose.tools.eq_(f_score_, 1.0) f_score_ = f_score(negatives, positives, 3.0, 2) nose.tools.eq_(f_score_, 1.0)
def weighted_neg_error_rate_criteria(data, weight, thres, beta=0.5, criteria="eer"): """Given the single value for the weight parameter balancing between impostors and spoofing attacks and a threshold, calculates the error rates and their relationship depending on the criteria (difference in case of 'eer', hter in case of 'min-hter' criteria) Keyword parameters: - data - the development data used to determine the threshold. List on 4 numpy.arrays containing: negatives (licit), positives (licit), negatives (spoof), positives (spoof) - weight - the weight parameter balancing between impostors and spoofing attacks - thres - the given threshold - beta - the weight parameter balancing between real accesses and all the negative samples (impostors and spoofing attacks). Note that this parameter will be overridden and not considered if the selected criteria is 'min-hter'. - criteria - 'eer', 'wer' or 'min-hter' criteria for decision threshold """ licit_neg = data[0] licit_pos = data[1] spoof_neg = data[2] spoof_pos = data[3] # unpacking the data farfrr_licit = farfrr(licit_neg, licit_pos, thres) farfrr_spoof = farfrr(spoof_neg, spoof_pos, thres) frr = farfrr_licit[1] # farfrr_spoof[1] should have the same value far_i = farfrr_licit[0] far_s = farfrr_spoof[0] far_w = (1 - weight) * far_i + weight * far_s if criteria == "eer": if beta == 0.5: return abs(far_w - frr) else: # return abs(far_w - frr) return abs((1 - beta) * frr - beta * far_w) elif criteria == "min-hter": return (far_w + frr) / 2 else: return (1 - beta) * frr + beta * far_w
def _numbers(self, neg, pos, spoof, threshold, fta): """Computes each metric value""" # fpr and fnr fmr, fnmr = farfrr(neg, pos, threshold) hter = (fmr + fnmr) / 2.0 far = fmr * (1 - fta) frr = fta + fnmr * (1 - fta) ni = neg.shape[0] # number of impostors fm = int(round(fmr * ni)) # number of false accepts nc = pos.shape[0] # number of clients fnm = int(round(fnmr * nc)) # number of false rejects # precision and recall precision, recall = precision_recall(neg, pos, threshold) # f_score f1_score = f_score(neg, pos, threshold, 1) # AUC ROC auc = roc_auc_score(neg, pos) auc_log = roc_auc_score(neg, pos, log_scale=True) # IAPMR at threshold iapmr, _ = farfrr(spoof, [0.0], threshold) spoof_total = len(spoof) spoof_match = int(round(iapmr * spoof_total)) return { "fta": fta, "fmr": fmr, "fnmr": fnmr, "hter": hter, "far": far, "frr": frr, "fm": fm, "ni": ni, "fnm": fnm, "nc": nc, "precision": precision, "recall": recall, "f1_score": f1_score, "auc": auc, "auc_log": auc_log, "iapmr": iapmr, "spoof_match": spoof_match, "spoof_total": spoof_total, }
def test_obvious_thresholds(): from . import far_threshold, frr_threshold, farfrr M = 10 neg = numpy.arange(M, dtype=float) pos = numpy.arange(M, 2 * M, dtype=float) for far, frr in zip(numpy.arange(0, 2 * M + 1, dtype=float) / M / 2, numpy.arange(0, 2 * M + 1, dtype=float) / M / 2): far, expected_far = round(far, 2), math.floor(far * 10) / 10 frr, expected_frr = round(frr, 2), math.floor(frr * 10) / 10 calculated_far_threshold = far_threshold(neg, pos, far) pred_far, _ = farfrr(neg, pos, calculated_far_threshold) calculated_frr_threshold = frr_threshold(neg, pos, frr) _, pred_frr = farfrr(neg, pos, calculated_frr_threshold) assert pred_far <= far, (pred_far, far, calculated_far_threshold) assert pred_far == expected_far, (pred_far, far, calculated_far_threshold) assert pred_frr <= frr, (pred_frr, frr, calculated_frr_threshold) assert pred_frr == expected_frr, (pred_frr, frr, calculated_frr_threshold)
def error_rates_at_weight(licit_neg, licit_pos, spoof_neg, spoof_pos, omega, threshold, beta=0.5): """Calculates several error rates: FRR, FAR (zero-effort impostors), SFAR, FAR_w, HTER_w for a given value of w. It returns the calculated threshold as a last argument Keyword arguments: - licit_neg - numpy.array of scores for the negatives (licit scenario) - licit_pos - numpy.array of scores for the positives (licit scenario) - spoof_neg - numpy.array of scores for the negatives (spoof scenario) - spoof_pos - numpy.array of scores for the positives (spoof scenario) - threshold - the given threshold - omega - the omega parameter balancing between impostors and spoofing attacks - beta - the weight parameter balancing between real accesses and all the negative samples (impostors and spoofing attacks). """ farfrr_licit = farfrr( licit_neg, licit_pos, threshold) # calculate test frr @ threshold (licit scenario) farfrr_spoof = farfrr( spoof_neg, spoof_pos, threshold) # calculate test frr @ threshold (spoof scenario) # we can take this value from farfrr_spoof as well, it doesn't matter frr = farfrr_licit[1] far = farfrr_licit[0] sfar = farfrr_spoof[0] far_w = weighted_err(far, sfar, omega) hter_w = (far_w + frr) / 2 wer_wb = weighted_err(frr, far_w, beta) return (frr, far, sfar, far_w, wer_wb, hter_w, threshold)
def test_mindcf(): """ Test outlier scores in negative set """ from bob.measure import min_weighted_error_rate_threshold, farfrr cost = 0.99 negatives = [-3, -2, -1, -0.5, 4] positives = [0.5, 3] th = min_weighted_error_rate_threshold(negatives, positives, cost, True) far, frr = farfrr(negatives, positives, th) mindcf = (cost * far + (1-cost)*frr)*100 assert mindcf< 1.0 + 1e-8
def compute(self, idx, input_scores, input_names): """Implements plots""" dev_scores = clean_scores(input_scores[0]) if self._eval: eval_scores = clean_scores(input_scores[1]) fmr_list = np.linspace(0, 1, 100) iapmr_list = [] for i, fmr in enumerate(fmr_list): thr = far_threshold(dev_scores["licit_neg"], dev_scores["licit_pos"], fmr) iapmr_list.append(farfrr(eval_scores["spoof"], [0.0], thr)[0]) # re-calculate fmr since threshold might give a different result # for fmr. fmr_list[i], _ = farfrr(eval_scores["licit_neg"], [0.0], thr) label = (self._legends[idx] if self._legends is not None else f"system {idx+1}") logger.info(f"Plot FmrIapmr using: {input_names[1]}") if self._semilogx: mpl.semilogx(fmr_list, iapmr_list, label=label) else: mpl.plot(fmr_list, iapmr_list, label=label)
def perf(devel_scores, test_scores, threshold_func): from bob.measure import farfrr devel_attack_scores = devel_scores[1][:, 0] devel_real_scores = devel_scores[0][:, 0] test_attack_scores = test_scores[1][:, 0] test_real_scores = test_scores[0][:, 0] devel_real = devel_real_scores.shape[0] devel_attack = devel_attack_scores.shape[0] test_real = test_real_scores.shape[0] test_attack = test_attack_scores.shape[0] thres = threshold_func(devel_attack_scores, devel_real_scores) devel_far, devel_frr = farfrr(devel_attack_scores, devel_real_scores, thres) test_far, test_frr = farfrr(test_attack_scores, test_real_scores, thres) retval = [] retval.append(" threshold: %.4f" % thres) retval.append(pline("dev ", devel_far, devel_attack, devel_frr, devel_real)) retval.append(pline("test", test_far, test_attack, test_frr, test_real)) return retval, thres
def _lines(self, threshold, label, neg, pos, idx, **kwargs): spoof = neg[1] neg = neg[0] pos = pos[0] # plot EER treshold vertical line super(HistVuln, self)._lines(threshold, label, neg, pos, idx, **kwargs) if "iapmr_line" not in self._ctx.meta or self._ctx.meta["iapmr_line"]: # Plot iapmr_line (accepted PA vs threshold) iapmr, _ = farfrr(spoof, [0.0], threshold) ax2 = mpl.twinx() # we never want grid lines on axis 2 ax2.grid(False) real_data = self._ctx.meta.get("real_data", True) _iapmr_plot(spoof, threshold, iapmr, real_data=real_data) n = idx % self._step_print col = n % self._ncols rest_print = (self.n_systems - int(idx / self._step_print) * self._step_print) if col == self._ncols - 1 or n == rest_print - 1: ax2.set_ylabel("IAPMR (%)", color="C3") ax2.tick_params(axis="y", colors="C3") ax2.yaxis.label.set_color("C3") ax2.spines["right"].set_color("C3")
def _get_farfrr(self, x, y, thres): points = farfrr(x, y, thres) points2 = (points[0], 1 - points[1]) return points, points2
def test_thresholding(): from . import eer_threshold, far_threshold, frr_threshold, farfrr, \ correctly_classified_positives, correctly_classified_negatives, \ min_hter_threshold def count(array, value=True): """Counts occurrences of a certain value in an array""" return list(array == value).count(True) # This example will demonstrate and check the use of eer_threshold() to # calculate the threshold that minimizes the EER. # This test set is not separable. positives = bob.io.base.load(F('nonsep-positives.hdf5')) negatives = bob.io.base.load(F('nonsep-negatives.hdf5')) threshold = eer_threshold(negatives, positives) sorted_positives = numpy.sort(positives) sorted_negatives = numpy.sort(negatives) # Of course we have to make sure that will set the EER correctly: ccp = count(correctly_classified_positives(positives, threshold)) ccn = count(correctly_classified_negatives(negatives, threshold)) assert (ccp - ccn) <= 1 for t in (0, 0.001, 0.1, 0.5, 0.9, 0.999, 1): # Lets also test the far_threshold and the frr_threshold functions threshold_far = far_threshold(sorted_negatives, [], t, is_sorted=True) threshold_frr = frr_threshold([], sorted_positives, t, is_sorted=True) # Check that the requested FAR and FRR values are smaller than the # requested ones far = farfrr(negatives, positives, threshold_far)[0] frr = farfrr(negatives, positives, threshold_frr)[1] if not math.isnan(threshold_far): assert far <= t, (far, t) assert t - far <= 0.1 if not math.isnan(threshold_frr): assert frr <= t, (frr, t) # test that the values are at least somewhere in the range assert t - frr <= 0.1 # If the set is separable, the calculation of the threshold is a little bit # trickier, as you have no points in the middle of the range to compare # things to. This is where the currently used recursive algorithm seems to # do better. Let's verify positives = bob.io.base.load(F('linsep-positives.hdf5')) negatives = bob.io.base.load(F('linsep-negatives.hdf5')) threshold = eer_threshold(negatives, positives) # the result here is 3.2 (which is what is expect ;-) assert threshold == 3.2 # Of course we have to make sure that will set the EER correctly: ccp = count(correctly_classified_positives(positives, threshold)) ccn = count(correctly_classified_negatives(negatives, threshold)) nose.tools.eq_(ccp, ccn) # The second option for the calculation of the threshold is to use the # minimum HTER. threshold2 = min_hter_threshold(negatives, positives) assert threshold2 == 3.2 nose.tools.eq_(threshold, threshold2) # in this particular case # Of course we have to make sure that will set the EER correctly: ccp = count(correctly_classified_positives(positives, threshold2)) ccn = count(correctly_classified_negatives(negatives, threshold2)) nose.tools.eq_(ccp, ccn)
def _get_farfrr(self, x, y, thres): points = farfrr(x, y, thres) return points, [ppndf(i) for i in points]
def compute(self, idx, input_scores, input_names): """Implements plots""" dev_scores = clean_scores(input_scores[0]) if self._eval: eval_scores = clean_scores(input_scores[1]) else: eval_scores = {"licit_neg": [], "licit_pos": [], "spoof": []} mpl.figure(1) if self._eval: logger.info(f"dev curve using {input_names[0]}") self._plot( dev_scores["licit_neg"], dev_scores["licit_pos"], dev_scores["spoof"], npoints=self._points, tpr=self._tpr, min_far=self._min_dig, color=self._colors[idx], linestyle=self._linestyles[idx], label=self._label("dev", idx), alpha=self._alpha, ) if not self._fnmrs_at: logger.info("Plotting fnmr line at dev eer threshold for dev") dev_threshold = get_thres( criter="eer", neg=dev_scores["licit_neg"], pos=dev_scores["licit_pos"], ) _, fnmr_at_dev_threshold = farfrr([0.0], dev_scores["licit_pos"], dev_threshold) fnmrs_dev = self._fnmrs_at or [fnmr_at_dev_threshold] self._draw_fnmrs(idx, dev_scores, fnmrs_dev) if self._split: mpl.figure(2) # Add the eval plot linestyle = "--" if not self._split else self._linestyles[idx] logger.info(f"eval curve using {input_names[1]}") self._plot( eval_scores["licit_neg"], eval_scores["licit_pos"], eval_scores["spoof"], linestyle=linestyle, npoints=self._points, tpr=self._tpr, min_far=self._min_dig, color=self._colors[idx], label=self._label("eval", idx), alpha=self._alpha, ) if not self._fnmrs_at: logger.info("printing fnmr at dev eer threshold for eval") _, fnmr_at_dev_threshold = farfrr([0.0], eval_scores["licit_pos"], dev_threshold) fnmrs_dev = self._fnmrs_at or [fnmr_at_dev_threshold] self._draw_fnmrs(idx, eval_scores, fnmrs_dev, True) # Only dev scores available else: logger.info(f"dev curve using {input_names[0]}") self._plot( dev_scores["licit_neg"], dev_scores["licit_pos"], dev_scores["spoof"], npoints=self._points, tpr=self._tpr, min_far=self._min_dig, color=self._colors[idx], linestyle=self._linestyles[idx], label=self._label("dev", idx), alpha=self._alpha, ) if not self._fnmrs_at: logger.info("Plotting fnmr line at dev eer threshold for dev") dev_threshold = get_thres( criter="eer", neg=dev_scores["licit_neg"], pos=dev_scores["licit_pos"], ) _, fnmr_at_dev_threshold = farfrr([0.0], dev_scores["licit_pos"], dev_threshold) fnmrs_dev = self._fnmrs_at or [fnmr_at_dev_threshold] self._draw_fnmrs(idx, dev_scores, fnmrs_dev)