def get_cllr_df(df_lrs):
    cllrs = []
    all_lrs_per_year = defaultdict(list)
    for rater in df_lrs.columns:
        if rater not in ['Groundtruth', 'pictures', 'pair_id', 'res_pair_id']:
            df_lr_y = df_lrs[False == pd.isna(df_lrs[rater])][[
                rater, 'Groundtruth'
            ]]
            if len(df_lr_y) > 0:
                X1, X2 = Xy_to_Xn(10**df_lr_y[rater], df_lr_y['Groundtruth'])
                if rater[:4] in ['2011', '2012', '2013', '2017']:
                    group = rater[:4]
                    all_lrs_per_year[group] += zip(X1, X2)
                else:
                    group = rater
                cllr_results = calculate_cllr(list(X1), list(X2))
                cllrs.append([
                    rater, group,
                    round(cllr_results.cllr, 4),
                    round(cllr_results.cllr_min, 4)
                ])
    for group, values in all_lrs_per_year.items():
        lrs1, lrs2 = zip(*values)
        cllr_results = calculate_cllr(list(lrs1), list(lrs2))
        cllrs.append([
            group, group + '-all',
            round(cllr_results.cllr, 4),
            round(cllr_results.cllr_min, 4)
        ])
    return pd.DataFrame(cllrs, columns=['rater', 'group', 'cllr', 'cllr_min'])
def plot_score_distribution_and_calibrator_fit(calibrator,
                                               scores,
                                               y,
                                               savefig=None,
                                               show=None):
    """
    plots the distributions of scores calculated by the (fitted) lr_system, as well as the fitted score distributions/
    score-to-posterior map
    (Note - for ELUBbounder calibrator is the firststepcalibrator)
    """
    plt.figure(figsize=(10, 10), dpi=100)
    x = np.arange(0, 1, .01)
    calibrator.transform(x)
    if len(set(y)) == 2:
        points0, points1 = Xy_to_Xn(scores, y)
        plt.hist(points0, bins=20, alpha=.25, density=True, label='class 0')
        plt.hist(points1, bins=20, alpha=.25, density=True, label='class 1')
        plt.plot(x, calibrator.p1, label='fit class 1')
        plt.plot(x, calibrator.p0, label='fit class 0')
    else:
        plt.hist(scores, bins=20, alpha=.25, density=True, label='class x')
        plt.plot(x, calibrator.p1, label='fit class 1')
        plt.plot(x, calibrator.p0, label='fit class 0')
    if savefig is not None:
        plt.savefig(savefig)
        plt.close()
    if show or savefig is None:
        plt.show()
Example #3
0
def calculate_metrics_dict(scores, y, lr_predicted, label):
    """
    Calculates metrics for an lr system given the predicted LRs.
    """
    X1, X2 = Xy_to_Xn(lr_predicted, y)

    return {
        'cllr' + label: round(calculate_cllr(X1, X2).cllr, 4),
        'auc' + label: roc_auc_score(y, scores),
        'accuracy' + label: accuracy_score(y, scores > .5)
    }
Example #4
0
def plot_lr_distributions(predicted_log_lrs, y, savefig=None, show=None):
    """
    Plots the 10log LRs generated for the two hypotheses by the fitted system.
    """
    plt.figure(figsize=(10, 10), dpi=100)
    points0, points1 = Xy_to_Xn(predicted_log_lrs, y)
    plt.hist(points0, bins=20, alpha=.25, density=True)
    plt.hist(points1, bins=20, alpha=.25, density=True)
    plt.xlabel('10log LR')
    if savefig is not None:
        plt.savefig(savefig)
        plt.close()
    if show or savefig is None:
        plt.show()
def calculate_metrics_dict(number_of_scores, scores, y, lr_predicted,
                           cal_fraction_valid, label):
    """
    Calculates metrics for an lr system given the predicted LRs.
    """
    X1, X2 = Xy_to_Xn(lr_predicted, y)
    results = {
        'cllr' + label: round(calculate_cllr(X1, X2).cllr, 4),
        'auc' + label: roc_auc_score(y, scores),
        'accuracy' + label: accuracy_score(y, scores > .5),
        'cal_fraction_valid' + label:
        np.mean(list(cal_fraction_valid.values())),
        'test_fraction_valid' + label: len(scores) / number_of_scores
    }
    for key, value in cal_fraction_valid.items():
        results[f'cal_fraction_{key}'] = value
    return results
Example #6
0
def plot_tippett(predicted_log_lrs, y, savefig=None, show=None):
    """
    Plots the 10log LRs in a Tippett plot.
    """
    xplot = np.linspace(start=np.min(predicted_log_lrs),
                        stop=np.max(predicted_log_lrs),
                        num=100)
    lr_0, lr_1 = Xy_to_Xn(predicted_log_lrs, y)
    perc0 = (sum(i > xplot for i in lr_0) / len(lr_0)) * 100
    perc1 = (sum(i > xplot for i in lr_1) / len(lr_1)) * 100

    plt.figure(figsize=(10, 10), dpi=100)
    plt.plot(xplot, perc1, color='b', label=r'LRs given $\mathregular{H_1}$')
    plt.plot(xplot, perc0, color='r', label=r'LRs given $\mathregular{H_2}$')
    plt.axvline(x=0, color='k', linestyle='--')
    plt.xlabel('Log likelihood ratio')
    plt.ylabel('Cumulative proportion')
    plt.title('Tippett plot')
    plt.legend()
    if savefig is not None:
        plt.savefig(savefig)
        plt.close()
    if show or savefig is None:
        plt.show()