def save_csv_scores(output_path: str): print("Saving scores to csv...") approaches = { "Quality RBF": ScoresProvider.get( approach=Approach.QUALITY_LINEAR, protocol=Protocol.GRANDTEST, subset=Subset.TEST, ), # "Quality Linear": ScoresProvider.get( # approach=Approach.QUALITY_RBF, # protocol=Protocol.GRANDTEST, # subset=Subset.TEST, # ), "Auxiliary": ScoresProvider.get( approach=Approach.AUXILIARY, protocol=Protocol.GRANDTEST, subset=Subset.TEST ), } for approach, scores in approaches.items(): folder = approach.lower().replace(" ", "_") demographic_scores = { "sex": scores.get_fair_sex_subset(), "age": scores.get_fair_age_subset(), "skin_tone": scores.get_fair_skin_tone_subset(), "attacks": {"attacks": scores.get_attacks_with_ids()}, } for demographic, fair_scores in demographic_scores.items(): csv_output_path = f"{output_path}/csv/{folder}/{demographic}" os.makedirs(csv_output_path, exist_ok=True) scores_to_csv(fair_scores, approach, csv_output_path)
def calculate_demographic_bpcer_bar_chart(output_path: str): print("> Demographic | Calculating BPCER Bar Charts...") output_path_demographic_bar_charts = f"{output_path}/demographic/bar_charts" os.makedirs(output_path_demographic_bar_charts, exist_ok=True) metric = Metric.BPCER approach_scores_subset = { # "Quality SVM Linear": ScoresProvider.get_subsets( # approach=Approach.QUALITY_LINEAR, protocol=Protocol.GRANDTEST # ), "Quality SVM RBF": ScoresProvider.get_subsets(approach=Approach.QUALITY_RBF, protocol=Protocol.GRANDTEST), "Auxiliary": ScoresProvider.get_subsets(approach=Approach.AUXILIARY, protocol=Protocol.GRANDTEST), } for demographic in [ Demographic.SEX, Demographic.AGE, Demographic.SKIN_TONE ]: filename = f"{output_path_demographic_bar_charts}/{demographic.value}_bpcer_comparision_bar_chart.png" df = create_demographic_dataframe_comparison(metric, demographic, approach_scores_subset) create_metric_bar_chart_comparison(df, filename) if demographic == Demographic.SKIN_TONE: filename = f"{output_path_demographic_bar_charts}/{demographic.value}_grouped_bpcer_comparision_bar_chart.png" df = group_dataframe(df, SKIN_TONE_GROUP_POLICY) create_metric_bar_chart_comparison(df, filename)
def calculate_hists_and_curves(output_path: str, only_grandtest: bool = False): print("> Protocols | Calculating Hists and Curves...") output_path_hists_and_curves = f"{output_path}/hists_and_curves" os.makedirs(output_path_hists_and_curves, exist_ok=True) approach_protocols_subset_scores = { "Quality SVM RBF": ScoresProvider.all(Approach.QUALITY_RBF), # "Quality SVM LINEAR": ScoresProvider.all(Approach.QUALITY_LINEAR), "Auxiliary": ScoresProvider.all(Approach.AUXILIARY), } for approach, protocols_subset_scores in approach_protocols_subset_scores.items( ): protocol_metrics = {} for protocol_name, subset_scores in protocols_subset_scores.items(): if only_grandtest and protocol_name != Protocol.GRANDTEST.value: continue protocol_metrics[protocol_name] = Metrics( devel_scores=subset_scores.get("devel"), test_scores=subset_scores.get("test"), ) for protocol_name, subset_scores in protocols_subset_scores.items(): if only_grandtest and protocol_name != Protocol.GRANDTEST.value: continue approach_name = approach.replace(" ", "_").lower() output_path_hists_and_curves = ( f"{output_path}/hists_and_curves/{approach_name}/{protocol_name}" ) os.makedirs(output_path_hists_and_curves, exist_ok=True) metrics = protocol_metrics.get(protocol_name) eer_th = metrics.get_eer_th(Subset.DEVEL) for subset, scores in subset_scores.items(): output_det_filename = f"{output_path_hists_and_curves}/{subset}_det.png" det = DetPlotter(title=f"Det Curve ({subset})") det.save(output_det_filename, scores) for normalize_hist in [True, False]: output_hist_filename = get_filename( output_path_hists_and_curves, subset, normalize_hist) histogram = HistogramPlotter( genuine_label=0, plot_vertical_line_on_value=eer_th, legend_vertical_line="EER @ Devel", normalize=normalize_hist, ) histogram.save(output_hist_filename, scores) if protocol_name == Protocol.GRANDTEST.value: calculate_hists_and_curves_pai_types( output_path_hists_and_curves, subset, scores, eer_th)
def test_should_compute_eer_correctly_from_scores(): scores = ScoresProvider.get( approach=Approach.QUALITY_LINEAR, protocol=Protocol.GRANDTEST, subset=Subset.DEVEL, ) eer_value, th = eer(scores.get_numpy_scores(), scores.get_numpy_labels()) assert pytest.approx(eer_value, 0.01) == 0.269
def grandtest_fair_demographic_bpcer( approach: Approach, demographic: Demographic ) -> Dict: scores_subsets = {} for subset in Subset.options(): grandtest_scores = ScoresProvider.get( approach=approach, protocol=Protocol.GRANDTEST, subset=subset ) scores_subsets[subset.value] = grandtest_scores metrics = MetricsDemographics.from_subset_scores(scores_subsets) if demographic.value == "sex": bpcer = metrics.get_bpcer_sex() elif demographic.value == "age": bpcer = metrics.get_bpcer_age() else: bpcer = metrics.get_bpcer_skin_tone() return {"bpcer": bpcer}
def get( approach: Approach, protocol: Protocol, dataset: Dataset = None, device: Device = None, pai: CoarseGrainedPai = None, ) -> Dict: scores_subsets = { subset.value: ScoresProvider.get( approach=approach, protocol=protocol, subset=subset, dataset=dataset, device=device, pai=pai, ) for subset in Subset.options() } metrics = Metrics( devel_scores=scores_subsets.get(Subset.DEVEL.value), test_scores=scores_subsets.get(Subset.TEST.value), ) return get_analysis_from_metrics(metrics)
def calculate_demographic_percentile_comparison( output_path: str, protocol_left: Protocol, protocol_right: Protocol, title_left: str, title_right: str, ): print("> Demographic | Calculating Percentile Comparison Graphs...") output_path_percentiles = f"{output_path}/demographic/percentiles/comparison_{protocol_left.value}_and_{protocol_right.value}" os.makedirs(output_path_percentiles, exist_ok=True) left_auxiliary_scores_devel = ScoresProvider.get( approach=Approach.AUXILIARY, protocol=protocol_left, subset=Subset.DEVEL ) left_auxiliary_scores_test = ScoresProvider.get( approach=Approach.AUXILIARY, protocol=protocol_left, subset=Subset.TEST ) left_auxiliary_metrics = Metrics( devel_scores=left_auxiliary_scores_devel, test_scores=left_auxiliary_scores_test ) right_auxiliary_scores_devel = ScoresProvider.get( approach=Approach.AUXILIARY, protocol=protocol_right, subset=Subset.DEVEL ) right_auxiliary_scores_test = ScoresProvider.get( approach=Approach.AUXILIARY, protocol=protocol_right, subset=Subset.TEST ) right_auxiliary_metrics = Metrics( devel_scores=right_auxiliary_scores_devel, test_scores=right_auxiliary_scores_test, ) approaches = { title_left: ( left_auxiliary_scores_test, left_auxiliary_metrics.get_frr_th(Subset.TEST, 0.05), left_auxiliary_metrics.get_frr_th(Subset.TEST, 0.15), ), title_right: ( right_auxiliary_scores_test, right_auxiliary_metrics.get_frr_th(Subset.TEST, 0.05), right_auxiliary_metrics.get_frr_th(Subset.TEST, 0.15), ), } p = np.linspace(0, 100, 6001) interpolation = "linear" demographic_percentiles = {} for ( approach, (scores, lower_frr_th_devel, higher_frr_th_test), ) in approaches.items(): demographic_scores = { "sex": scores.get_fair_sex_subset(), # "age": scores.get_fair_age_subset(), # "skin_tone": scores.get_fair_skin_tone_subset(), # "grouped_skin_tone": scores.get_fair_grouped_skin_tone_subset(), "attacks": {"attacks": scores.get_attacks_with_ids()}, } for demographic, fair_scores in demographic_scores.items(): if demographic == "attacks": continue percentiles = {} for sub_demographic, scores_demographic in fair_scores.items(): values = np.array(list(scores_demographic.values())) percentiles[sub_demographic] = np.percentile( values, p, interpolation=interpolation ) attack_values = np.array(list(scores.get_attacks_with_ids().values())) percentiles["ATTACKS"] = np.percentile( attack_values, p, interpolation=interpolation ) if demographic not in demographic_percentiles: demographic_percentiles[demographic] = { approach: (percentiles, lower_frr_th_devel, higher_frr_th_test) } else: demographic_percentiles[demographic][approach] = ( percentiles, lower_frr_th_devel, higher_frr_th_test, ) calculate_demographic_bias_metric( demographic_percentiles, approaches.keys(), output_path_percentiles, calculate_age=False, calculate_skin_tone=False, verbose_text="> Demographic | Calculating Demographic Bias Metric (DBM) for comparison...", ) for demographic, approaches_percentiles in demographic_percentiles.items(): fig, axlist = plt.subplots(1, 2) # sharex=True, sharey=True subplot_index = 0 for ( approach, (percentiles, lower_frr_th_devel, higher_frr_th_test), ) in approaches_percentiles.items(): for sub_demographic, percentil in percentiles.items(): values = p if sub_demographic != "ATTACKS": values = 100 - p axlist[subplot_index].plot( percentil, values, label=sub_demographic.title().replace("_", " "), linestyle="-", color=COLORS.get(sub_demographic), marker=MARKERS.get(sub_demographic), markevery=300, ) axlist[subplot_index].yaxis.set_major_formatter( mtick.PercentFormatter() ) # x, y = [0, 100], [eer_th_devel, eer_th_devel] # axlist[subplot_index].plot( # y, x, "b--", label="EER @ Devel", color="orange" # ) # x, y = [0, 100], [eer_th_test, eer_th_test] # axlist[subplot_index].plot( # y, x, "b--", label="EER @ Test", color="magenta" # ) axlist[subplot_index].fill_between( [lower_frr_th_devel, higher_frr_th_test], 100, facecolor="gray", alpha=0.2, label="Working Points", ) axlist[subplot_index].set_title(approach) axlist[subplot_index].grid( b=True, which="major", color="#CCCCCC", linestyle="--" ) axlist[subplot_index].grid( b=True, which="minor", color="#CCCCCC", linestyle=":" ) # axlist[subplot_index].xaxis.set_ticks([0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]) axlist[subplot_index].yaxis.set_ticks( [0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100] ) if subplot_index == 1: axlist[subplot_index].yaxis.tick_right() # axlist[subplot_index].xticks(rotation=30) for tick in axlist[subplot_index].get_yticklabels(): tick.set_rotation(55) for tick in axlist[subplot_index].get_xticklabels(): tick.set_rotation(45) subplot_index += 1 # fig.text(0.5, 0.04, "Percentil", ha="center") # fig.text(0.02, 0.59, "Score", va="center", rotation="vertical") # fig.text(0.5, 0.04, "Score", ha="center") fig.text(0.01, 0.59, "BPCER", va="center", rotation="vertical") fig.text(0.97, 0.59, "APCER", va="center", rotation="vertical", color="red") fig.subplots_adjust( top=0.9, left=0.1, right=0.9, bottom=0.15 ) # create some space below the plots by increasing the bottom-value legend_without_duplicate_labels(axlist.flatten()[-2]) filename = f"{output_path_percentiles}/percentile_comparison_{demographic}.png" # plt.tight_layout() plt.savefig(filename) plt.tight_layout() plt.close("all")
import pytest from gradgpad.foundations.metrics.metrics import Metrics from gradgpad.foundations.scores.approach import Approach from gradgpad.foundations.scores.protocol import Protocol from gradgpad.foundations.scores.scores_provider import ScoresProvider from gradgpad.foundations.scores.subset import Subset @pytest.mark.unit @pytest.mark.parametrize( "devel_scores,test_scores", [( ScoresProvider.get( approach=Approach.QUALITY_LINEAR, protocol=Protocol.GRANDTEST, subset=Subset.DEVEL, ), ScoresProvider.get( approach=Approach.QUALITY_LINEAR, protocol=Protocol.GRANDTEST, subset=Subset.TEST, ), )], ) def test_should_calculate_eer_for_devel_and_test(devel_scores, test_scores): metrics = Metrics(devel_scores, test_scores) assert pytest.approx(metrics.get_eer(Subset.DEVEL), 0.01) == 0.269 assert pytest.approx(metrics.get_eer(Subset.TEST), 0.01) == 0.246
def calculate_demographic_percentile( output_path: str, protocol: Protocol = Protocol.GRANDTEST ): print("Calculating Demographic Percentile Graphs...") output_path_percentiles = f"{output_path}/demographic/percentiles/{protocol.value}" os.makedirs(output_path_percentiles, exist_ok=True) quality_scores_devel = ScoresProvider.get( approach=Approach.QUALITY_RBF, protocol=protocol, subset=Subset.DEVEL ) quality_scores_test = ScoresProvider.get( approach=Approach.QUALITY_RBF, protocol=protocol, subset=Subset.TEST ) quality_metrics = Metrics( devel_scores=quality_scores_devel, test_scores=quality_scores_test ) auxiliary_scores_devel = ScoresProvider.get( approach=Approach.AUXILIARY, protocol=protocol, subset=Subset.DEVEL ) auxiliary_scores_test = ScoresProvider.get( approach=Approach.AUXILIARY, protocol=protocol, subset=Subset.TEST ) auxiliary_metrics = Metrics( devel_scores=auxiliary_scores_devel, test_scores=auxiliary_scores_test ) # balanced # quality_balanced_scores_devel = ScoresProvider.get( # approach=Approach.QUALITY_RBF_BALANCED, protocol=protocol, subset=Subset.DEVEL # ) # quality_balanced_scores_test = ScoresProvider.get( # approach=Approach.QUALITY_RBF_BALANCED, protocol=protocol, subset=Subset.TEST # ) # quality_balanced_metrics = Metrics( # devel_scores=quality_balanced_scores_devel, # test_scores=quality_balanced_scores_test, # ) if protocol == Protocol.GRANDTEST_SEX_50_50: approaches = { "Auxiliary": ( auxiliary_scores_test, auxiliary_metrics.get_eer_th(Subset.DEVEL), auxiliary_metrics.get_eer_th(Subset.TEST), ) } else: approaches = { "Quality": ( quality_scores_test, quality_metrics.get_eer_th(Subset.DEVEL), quality_metrics.get_eer_th(Subset.TEST), ), # "Quality Balanced": ( # quality_balanced_scores_test, # quality_balanced_metrics.get_eer_th(Subset.DEVEL), # quality_balanced_metrics.get_eer_th(Subset.TEST), # ), "Auxiliary": ( auxiliary_scores_test, auxiliary_metrics.get_eer_th(Subset.DEVEL), auxiliary_metrics.get_eer_th(Subset.TEST), ), } p = np.linspace(0, 100, 6001) interpolation = "linear" demographic_percentiles = {} for approach, (scores, eer_th_devel, eer_th_test) in approaches.items(): demographic_scores = { "sex": scores.get_fair_sex_subset(), "age": scores.get_fair_age_subset(), "skin_tone": scores.get_fair_skin_tone_subset(), "grouped_skin_tone": scores.get_fair_grouped_skin_tone_subset(), "attacks": {"attacks": scores.get_attacks_with_ids()}, } for demographic, fair_scores in demographic_scores.items(): if demographic == "attacks": continue percentiles = {} for sub_demographic, scores_demographic in fair_scores.items(): values = np.array(list(scores_demographic.values())) percentiles[sub_demographic] = np.percentile( values, p, interpolation=interpolation ) attack_values = np.array(list(scores.get_attacks_with_ids().values())) percentiles["ATTACKS"] = np.percentile( attack_values, p, interpolation=interpolation ) if demographic not in demographic_percentiles: demographic_percentiles[demographic] = { approach: (percentiles, eer_th_devel, eer_th_test) } else: demographic_percentiles[demographic][approach] = ( percentiles, eer_th_devel, eer_th_test, ) for demographic, approaches_percentiles in demographic_percentiles.items(): fig, axlist = plt.subplots(1, 2) # sharex=True, sharey=True subplot_index = 0 for ( approach, (percentiles, eer_th_devel, eer_th_test), ) in approaches_percentiles.items(): for sub_demographic, percentil in percentiles.items(): values = p if sub_demographic != "ATTACKS": values = 100 - p axlist[subplot_index].plot( percentil, values, label=sub_demographic.title().replace("_", " "), linestyle="-", color=COLORS.get(sub_demographic), marker=MARKERS.get(sub_demographic), markevery=300, ) axlist[subplot_index].yaxis.set_major_formatter( mtick.PercentFormatter() ) x, y = [0, 100], [eer_th_devel, eer_th_devel] axlist[subplot_index].plot( y, x, "b--", label="EER @ Devel", color="orange" ) x, y = [0, 100], [eer_th_test, eer_th_test] axlist[subplot_index].plot( y, x, "b--", label="EER @ Test", color="magenta" ) axlist[subplot_index].set_title(approach) axlist[subplot_index].grid( b=True, which="major", color="#CCCCCC", linestyle="--" ) axlist[subplot_index].grid( b=True, which="minor", color="#CCCCCC", linestyle=":" ) # axlist[subplot_index].xaxis.set_ticks([0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]) axlist[subplot_index].yaxis.set_ticks( [0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100] ) if subplot_index == 1: axlist[subplot_index].yaxis.tick_right() # axlist[subplot_index].xticks(rotation=30) for tick in axlist[subplot_index].get_yticklabels(): tick.set_rotation(55) for tick in axlist[subplot_index].get_xticklabels(): tick.set_rotation(45) subplot_index += 1 # fig.text(0.5, 0.04, "Percentil", ha="center") # fig.text(0.02, 0.59, "Score", va="center", rotation="vertical") # fig.text(0.5, 0.04, "Score", ha="center") fig.text(0.01, 0.59, "BPCER", va="center", rotation="vertical") fig.text(0.97, 0.59, "APCER", va="center", rotation="vertical", color="red") fig.subplots_adjust( top=0.9, left=0.1, right=0.9, bottom=0.25 ) # create some space below the plots by increasing the bottom-value legend_without_duplicate_labels(axlist.flatten()[-2]) filename = f"{output_path_percentiles}/percentile_comparison_{demographic}.png" # plt.tight_layout() plt.savefig(filename) plt.tight_layout() plt.close("all")