def score_sig_1A(sim, est_distrib):
    """
    euclidian norm between normalized trinucleotide context counts (empirical),
    and the reconstituted profile
    """
    raw_data_distrib = np.zeros(96)
    val, c = np.unique(sim.T, return_counts=True)
    raw_data_distrib[val.astype(int)] = c
    raw_data_distrib = raw_data_distrib / sim.N
    return score_sig_1A_base(raw_data_distrib, est_distrib)
     else:
         row_list.append(np.nan)
     ordered_table = pd.merge(pred_subclonal,
                              true_subclonal,
                              on='mutation_id',
                              how='inner')
     auc, accuracy, sensitivity, specificity, precision = \
         score2C_base(ordered_table.true_subclonal,
                      ordered_table.pred_subclonal)
     for v in (auc, accuracy, sensitivity, specificity, precision):
         row_list.append(v)
 else:
     for i in range(6):
         row_list.append(np.nan)
 if pred_profile is not None:
     row_list.append(score_sig_1A_base(sig_profile_1A, pred_profile))
     row_list.append(score_sig_1B_base(sig_profile_1B, pred_profile))
     auc, accuracy, sensitivity, specificity, precision = \
         score_sig_1C_base(true_signatures_1C, pred_signatures)
     for v in (auc, accuracy, sensitivity, specificity, precision):
         row_list.append(v)
     if method == 'deconstructsigs':
         nb_rows = min(est_dist.shape[0], true_profile_1E.shape[0])
         score_sig_1D = score_sig_1D_base(
             true_signatures_1D[0:nb_rows],
             pred_signatures_mut[0:nb_rows])
         (min_diff_distrib_mut, max_diff_distrib_mut, std_diff_distrib_mut,
          median_diff_distrib_mut, perc_dist_5, perc_dist_10) = \
             score_sig_1E_base(true_profile_1E[0:nb_rows, :].astype(float),
                               est_dist[0:nb_rows, :].astype(float))
     else:
Beispiel #3
0
        subclonal_sigs.iloc[0, :][final_cols_used] = \
            new_est.pi[largest_subclonal_idx, :]
    nb_mut_largest_subclonal = \
        sum(new_est.qun.argmax(axis=1) == largest_subclonal_idx)
    clonal_largest_sub_pidist = sq_dist[clonal_idx, largest_subclonal_idx]
else:
    (largest_subclonal_phi, largest_subclonal_xi, nb_mut_largest_subclonal,
     clonal_largest_sub_pidist) = [np.nan] * 4
largest_pi_dist = np.max(sq_dist)

raw_data_distrib = np.zeros(96)
val, c = np.unique(input_table_nona.trinucleotide, return_counts=True)
raw_data_distrib[val.astype(int)] = c
raw_data_distrib = raw_data_distrib / len(input_table_nona)
est_distrib = new_est.xi.dot(new_est.pi).dot(new_est.mu_matrix)
overall_profile_dist = score_sig_1A_base(raw_data_distrib, est_distrib)

metrics_list.append([
    new_est.J, lr, pval, clonal_phi, clonal_xi, nb_mut_clonal,
    largest_subclonal_phi, largest_subclonal_xi, nb_mut_largest_subclonal,
    clonal_largest_sub_pidist, largest_pi_dist, overall_profile_dist,
    end - start
])

id_cols = [
    'patient_id', 'mutation_set', 'prop_diploid', 'ploidy', 'purity', 'nb_mut',
    'nb_sigs', 'nb_sigs_prefit', 'major_cn_mean', 'total_cn_mean', 'method',
    'prefit_bool', 'sigprofiler_bool', 'dof'
]
metrics_cols = [
    'nb_clones', 'lr', 'pval', 'clonal_phi', 'clonal_xi', 'nb_mut_clonal',