def get_cllr_df(df_lrs):
    cllrs = []
    all_lrs_per_year = defaultdict(list)
    for rater in df_lrs.columns:
        if rater not in ['Groundtruth', 'pictures', 'pair_id', 'res_pair_id']:
            df_lr_y = df_lrs[False == pd.isna(df_lrs[rater])][[
                rater, 'Groundtruth'
            ]]
            if len(df_lr_y) > 0:
                X1, X2 = Xy_to_Xn(10**df_lr_y[rater], df_lr_y['Groundtruth'])
                if rater[:4] in ['2011', '2012', '2013', '2017']:
                    group = rater[:4]
                    all_lrs_per_year[group] += zip(X1, X2)
                else:
                    group = rater
                cllr_results = calculate_cllr(list(X1), list(X2))
                cllrs.append([
                    rater, group,
                    round(cllr_results.cllr, 4),
                    round(cllr_results.cllr_min, 4)
                ])
    for group, values in all_lrs_per_year.items():
        lrs1, lrs2 = zip(*values)
        cllr_results = calculate_cllr(list(lrs1), list(lrs2))
        cllrs.append([
            group, group + '-all',
            round(cllr_results.cllr, 4),
            round(cllr_results.cllr_min, 4)
        ])
    return pd.DataFrame(cllrs, columns=['rater', 'group', 'cllr', 'cllr_min'])
Ejemplo n.º 2
0
def calculate_metrics_dict(scores, y, lr_predicted, label):
    """
    Calculates metrics for an lr system given the predicted LRs.
    """
    X1, X2 = Xy_to_Xn(lr_predicted, y)

    return {
        'cllr' + label: round(calculate_cllr(X1, X2).cllr, 4),
        'auc' + label: roc_auc_score(y, scores),
        'accuracy' + label: accuracy_score(y, scores > .5)
    }
def calculate_metrics_dict(number_of_scores, scores, y, lr_predicted,
                           cal_fraction_valid, label):
    """
    Calculates metrics for an lr system given the predicted LRs.
    """
    X1, X2 = Xy_to_Xn(lr_predicted, y)
    results = {
        'cllr' + label: round(calculate_cllr(X1, X2).cllr, 4),
        'auc' + label: roc_auc_score(y, scores),
        'accuracy' + label: accuracy_score(y, scores > .5),
        'cal_fraction_valid' + label:
        np.mean(list(cal_fraction_valid.values())),
        'test_fraction_valid' + label: len(scores) / number_of_scores
    }
    for key, value in cal_fraction_valid.items():
        results[f'cal_fraction_{key}'] = value
    return results
def cllr(lrs, y_nhot, target_class):
    """
    Computes the Cllr (log-likelihood ratio cost) for one target class.

    :param lrs: numpy array: N_samples with the LRs from the method
    :param y_nhot: N_samples x N_single_cell_type n_hot encoding of the labels
    :param target_class: vector of length n_single_cell_types with at least one 1
    :return: float: the log-likehood ratio cost
    """

    lrs1 = lrs[np.argwhere(
        np.max(np.multiply(y_nhot, target_class), axis=1) == 1)].flatten()
    lrs2 = lrs[np.argwhere(
        np.max(np.multiply(y_nhot, target_class), axis=1) == 0)].flatten()

    if len(lrs1) > 0 and len(lrs2) > 0:
        return calculate_cllr(lrs2, lrs1).cllr
    else:
        # no ground truth labels for the celltype, so cannot calculate the cllr.
        return 9999.0000