def score_sig_1A(sim, est_distrib): """ euclidian norm between normalized trinucleotide context counts (empirical), and the reconstituted profile """ raw_data_distrib = np.zeros(96) val, c = np.unique(sim.T, return_counts=True) raw_data_distrib[val.astype(int)] = c raw_data_distrib = raw_data_distrib / sim.N return score_sig_1A_base(raw_data_distrib, est_distrib)
else: row_list.append(np.nan) ordered_table = pd.merge(pred_subclonal, true_subclonal, on='mutation_id', how='inner') auc, accuracy, sensitivity, specificity, precision = \ score2C_base(ordered_table.true_subclonal, ordered_table.pred_subclonal) for v in (auc, accuracy, sensitivity, specificity, precision): row_list.append(v) else: for i in range(6): row_list.append(np.nan) if pred_profile is not None: row_list.append(score_sig_1A_base(sig_profile_1A, pred_profile)) row_list.append(score_sig_1B_base(sig_profile_1B, pred_profile)) auc, accuracy, sensitivity, specificity, precision = \ score_sig_1C_base(true_signatures_1C, pred_signatures) for v in (auc, accuracy, sensitivity, specificity, precision): row_list.append(v) if method == 'deconstructsigs': nb_rows = min(est_dist.shape[0], true_profile_1E.shape[0]) score_sig_1D = score_sig_1D_base( true_signatures_1D[0:nb_rows], pred_signatures_mut[0:nb_rows]) (min_diff_distrib_mut, max_diff_distrib_mut, std_diff_distrib_mut, median_diff_distrib_mut, perc_dist_5, perc_dist_10) = \ score_sig_1E_base(true_profile_1E[0:nb_rows, :].astype(float), est_dist[0:nb_rows, :].astype(float)) else:
subclonal_sigs.iloc[0, :][final_cols_used] = \ new_est.pi[largest_subclonal_idx, :] nb_mut_largest_subclonal = \ sum(new_est.qun.argmax(axis=1) == largest_subclonal_idx) clonal_largest_sub_pidist = sq_dist[clonal_idx, largest_subclonal_idx] else: (largest_subclonal_phi, largest_subclonal_xi, nb_mut_largest_subclonal, clonal_largest_sub_pidist) = [np.nan] * 4 largest_pi_dist = np.max(sq_dist) raw_data_distrib = np.zeros(96) val, c = np.unique(input_table_nona.trinucleotide, return_counts=True) raw_data_distrib[val.astype(int)] = c raw_data_distrib = raw_data_distrib / len(input_table_nona) est_distrib = new_est.xi.dot(new_est.pi).dot(new_est.mu_matrix) overall_profile_dist = score_sig_1A_base(raw_data_distrib, est_distrib) metrics_list.append([ new_est.J, lr, pval, clonal_phi, clonal_xi, nb_mut_clonal, largest_subclonal_phi, largest_subclonal_xi, nb_mut_largest_subclonal, clonal_largest_sub_pidist, largest_pi_dist, overall_profile_dist, end - start ]) id_cols = [ 'patient_id', 'mutation_set', 'prop_diploid', 'ploidy', 'purity', 'nb_mut', 'nb_sigs', 'nb_sigs_prefit', 'major_cn_mean', 'total_cn_mean', 'method', 'prefit_bool', 'sigprofiler_bool', 'dof' ] metrics_cols = [ 'nb_clones', 'lr', 'pval', 'clonal_phi', 'clonal_xi', 'nb_mut_clonal',