コード例 #1
0
def ccd_analysis_of_spikeins(adata_spikeins, perms):
    '''ERCC spikeins were used as an internal control. We can use them to get an idea of the noise for this analysis.'''
    expression_data_spike = adata_spikeins.X # log normalized
    normalized_exp_data_spike = (expression_data_spike.T / np.max(expression_data_spike, axis=0)[:,None]).T
    fucci_time_inds_spike = np.argsort(adata_spikeins.obs["fucci_time"])
    # fucci_time_sort_spike = np.take(np.array(adata_spikeins.obs["fucci_time"]), fucci_time_inds_spike)
    norm_exp_sort_spike = np.take(normalized_exp_data_spike, fucci_time_inds_spike, axis=0)
    moving_averages_spike = np.apply_along_axis(MovingAverages.mvavg, 0, norm_exp_sort_spike, 100)
    cell_cycle_variance_spike = np.apply_along_axis(np.var, 0, moving_averages_spike)
    total_variance_spike = np.apply_along_axis(np.var, 0, norm_exp_sort_spike)
    total_cv_spike = np.apply_along_axis(scipy.stats.variation, 0, norm_exp_sort_spike)
    percent_ccd_variance_spike = cell_cycle_variance_spike / total_variance_spike
    # avg_expression_spike = np.apply_along_axis(np.median, 0, norm_exp_sort_spike)
    print("Percent variance of spike-in:")
    print(f"mean +/- stdev of spike-in variance explained by cell cycle: {np.mean(percent_ccd_variance_spike)} +/- {np.std(percent_ccd_variance_spike)}")
    print(f"median of spike-in variance explained by cell cycle: {np.median(percent_ccd_variance_spike)}")

    percent_ccd_variance_rng_spike = []
    for iii, perm in enumerate(perms):
        if iii % 1000 == 0: print(f"permutation {iii}")
        norm_exp_sort_perm_spike = np.take(normalized_exp_data_spike, perm, axis=0)
        moving_averages_perm_spike = np.apply_along_axis(MovingAverages.mvavg, 0, norm_exp_sort_perm_spike, WINDOW)
        percent_ccd_variance_rng_spike.append(
                np.var(moving_averages_perm_spike, axis=0) / np.var(norm_exp_sort_perm_spike, axis=0))
    percent_ccd_variance_rng_spike = np.asarray(percent_ccd_variance_rng_spike)
    mean_diff_from_rng_spike = np.mean((percent_ccd_variance_spike - percent_ccd_variance_rng_spike).T, 1)
    print("Percent additional variance CCD than random of spike-in")
    print(f"mean +/- stdev of spike-in mean additional percent variance from random: {np.mean(mean_diff_from_rng_spike)} +/- {np.std(mean_diff_from_rng_spike)}")
    print(f"median of spike-in addtional variance explained by cell cycle than random: {np.median(mean_diff_from_rng_spike)}")

    utils.general_boxplot((percent_ccd_variance_spike, mean_diff_from_rng_spike), ("Percent Variance\nCCD Spike-In", "Percent Additional\nCCD Variance Spike-In"), "", "Percent Variance CCD", "", True, "figures/RNASpikeinVarianceBoxplot.png")
コード例 #2
0
def analyze_cnv_calls(adata, ccdtranscript):
    '''Take results from cnvkit calls to analyze effects of copy number variation'''
    cnsresults = pd.read_csv("input/RNAData/CnsCallSummary.tsv", sep="\t")
    cnsresults_gene = cnsresults["gene"]
    cnsresults_allgenes = np.concatenate([g.split(',') for g in cnsresults_gene])
    genenamedict = utils.getGeneNameDict()
    adata_names = np.array(utils.ccd_gene_names_gapped(adata.var_names[ccdtranscript], genenamedict))
    adata_ccd_isInCns = adata[np.isin(adata.obs["Well_Plate"], cnsresults.columns), 
                              np.arange(len(ccdtranscript))[ccdtranscript][np.isin(adata_names, cnsresults_allgenes)]]
    adata_ccd_isInCns_names = utils.ccd_gene_names_gapped(adata_ccd_isInCns.var_names, genenamedict)
    cnsresultIdx = np.array([[n in genelist for genelist in cnsresults_gene] for n in adata_ccd_isInCns_names])
    geneInJustOneList = np.array([sum(x) == 1 for x in cnsresultIdx])
    adata_ccd_isInCns_inJustOneList = adata_ccd_isInCns[:, geneInJustOneList]
    adata_ccd_isInCns_inJustOneList_names = utils.ccd_gene_names_gapped(adata_ccd_isInCns_inJustOneList.var_names, genenamedict)
    cnsresultIdx_inJustOneList = cnsresultIdx[geneInJustOneList]
    cnsResultsCellData = np.array(cnsresults)[:, np.isin(cnsresults.columns, adata_ccd_isInCns_inJustOneList.obs["Well_Plate"])]
    
    # evaluate consistency of CNVs
    heatmap = np.zeros(cnsResultsCellData.T.shape)
    heatmap[cnsResultsCellData.T == -5] = -1
    heatmap[(cnsResultsCellData.T > -5) & (cnsResultsCellData.T < 1)] = 0
    heatmap[cnsResultsCellData.T == 1] = 1
    heatmap[cnsResultsCellData.T == 2] = 2
    heatmap[cnsResultsCellData.T > 2] = 3
    clustergrid = sbn.clustermap(heatmap[:,:-8], col_cluster=False)
    plt.savefig("figures/CnvConsistency.pdf")
    plt.close()
    
    # heatmaps for phases
    adata_idx = np.array([list(adata.obs["Well_Plate"]).index(wp) for wp in cnsresults.columns[np.isin(cnsresults.columns, 
                                               adata_ccd_isInCns_inJustOneList.obs["Well_Plate"])]])
    sbn.heatmap([adata_ccd_isInCns.obs["phase"][np.asarray(clustergrid.dendrogram_row.reordered_ind)] == "G1",
                 adata_ccd_isInCns.obs["phase"][np.asarray(clustergrid.dendrogram_row.reordered_ind)] == "S-ph",
                 adata_ccd_isInCns.obs["phase"][np.asarray(clustergrid.dendrogram_row.reordered_ind)] == "G2M"],
                yticklabels=["G1", "S", "G2"])
    plt.savefig("figures/CnvConsistencyPhases.pdf")
    plt.close()
    
    # is there enrichment for phase in the highly amplified genes?
    # print(adata_ccd_isInCns.obs["phase"][clustergrid.dendrogram_row.reordered_ind[:100]].value_counts())
    
    # yes, so is there correlation?
    x = adata_ccd_isInCns.obs["fucci_time"]
    y = np.mean(cnsResultsCellData, axis=0)
    linearModel = scipy.stats.linregress(np.asarray(x).astype(float), np.asarray(y).astype(float))
    plt.scatter(x * fucci.TOT_LEN, y)
    plt.scatter(x * fucci.TOT_LEN, linearModel.intercept + x * linearModel.slope)
    plt.xlabel("Cell Division Time, hrs")
    plt.ylabel("Mean CNV of All Chromosome Arms")
    plt.savefig("figures/CnvCorrelation.pdf")
    plt.close()
    
    print(f"{linearModel[3]}: p-value for nonzero slope by two-sided t test")
    residualLinearModel = scipy.stats.linregress(np.asarray(x).astype(float), np.asarray(y - (linearModel.intercept + x * linearModel.slope)).astype(float))
    residualNormality = scipy.stats.normaltest(np.asarray(y - (linearModel.intercept + x * linearModel.slope)))
    print(f"{residualLinearModel[3]}: p-value for nonzero slope of residuals by two-sided t-test")
    print(f"{residualNormality[1]}: p-value for normality of residuals")
    
    # what if we only look at one phase? G1 before doubling? for all genes?
    adata_names = np.array(utils.ccd_gene_names_gapped(adata.var_names, genenamedict))
    adata_ccd_isInCns = adata[np.isin(adata.obs["Well_Plate"], cnsresults.columns) & (adata.obs["phase"] == "G1"), np.arange(len(adata_names))[np.isin(adata_names, cnsresults_allgenes)]]
    adata_ccd_isInCns_names = utils.ccd_gene_names_gapped(adata_ccd_isInCns.var_names, genenamedict)
    cnsresultIdx = np.array([[n in genelist for genelist in cnsresults_gene] for n in adata_ccd_isInCns_names])
    geneInJustOneList = np.array([sum(x) == 1 for x in cnsresultIdx])
    adata_ccd_isInCns_inJustOneList = adata_ccd_isInCns[:, geneInJustOneList]
    adata_ccd_isInCns_inJustOneList_names = utils.ccd_gene_names_gapped(adata_ccd_isInCns_inJustOneList.var_names, genenamedict)
    cnsresultIdx_inJustOneList = cnsresultIdx[geneInJustOneList]
    cnsResultsCellData = np.array(cnsresults)[:, np.isin(cnsresults.columns, adata_ccd_isInCns_inJustOneList.obs["Well_Plate"])]
    cnvAmplified, cnvPvalOneSided = [],[]
    cnvDeleted, cnvPvalOneSidedDeleted = [],[]
    amplifiedTpmsAll, neutralTpmsAll, deletionTpmsAll = [],[],[]
    for ii, tpm in enumerate(adata_ccd_isInCns.X.T[geneInJustOneList]):
        cnv = np.concatenate(cnsResultsCellData[cnsresultIdx_inJustOneList[ii],:])
        missingData = cnv == -5
        amplified, amplifiedTpms = cnv[~missingData & (cnv > 1)], tpm[~missingData & (cnv > 1)]
        neutral, neutralTpms = cnv[~missingData & (cnv == 1)], tpm[~missingData & (cnv == 1)]
        deletion, deletionTpms = cnv[~missingData & (cnv < 1)], tpm[~missingData & (cnv < 1)]
        cnvAmplified.append(np.median(amplifiedTpms) > np.median(tpm[~missingData]))
        cnvPvalOneSided.append(scipy.stats.kruskal(amplifiedTpms, neutralTpms)[1] * 2)
        cnvDeleted.append(np.median(deletionTpms) < np.median(tpm[~missingData]))
        cnvPvalOneSidedDeleted.append(scipy.stats.kruskal(deletionTpms, neutralTpms)[1] * 2)
        amplifiedTpmsAll.extend(amplifiedTpms)
        neutralTpmsAll.extend(neutralTpms)
        deletionTpmsAll.extend(deletionTpms)
    cnvAmplified = np.asarray(cnvAmplified)
    cnvTestPvals_BH, cnvTestPvals_rejectBH = utils.benji_hoch(0.01, cnvPvalOneSided)
    cnvTestPvalsDel_BH, cnvTestPvalsDel_rejectBH = utils.benji_hoch(0.01, cnvPvalOneSidedDeleted)
    print(f"{sum(cnvAmplified & cnvTestPvals_rejectBH)}: number of novel CCD with significantly higher expression with amplified CNVs than neutral")
    print(f"{sum(cnvDeleted & cnvTestPvalsDel_rejectBH)}: number of novel CCD with significantly higher expression with amplified CNVs than neutral")
    utils.general_boxplot([amplifiedTpmsAll, neutralTpmsAll, deletionTpmsAll], 
                          ["amplified", "neutral", "deletion"], "", "logTPMs", "", False, "figures/CNVStateBoxplot.pdf")
    print(f"Of {len(cnvAmplified)} genes:")
    print(f"{scipy.stats.kruskal(amplifiedTpmsAll, neutralTpmsAll, deletionTpmsAll)[1]}: kruskal two sided pval that there's a difference between the three")
    print(f"{scipy.stats.kruskal(amplifiedTpmsAll, neutralTpmsAll)[1]}: kruskal two sided pval that there's a difference between amplified/neutral")
コード例 #3
0
def calculate_variation(use_log, u_well_plates, wp_iscell, wp_isnuc, wp_iscyto,
                        pol_sort_well_plate, pol_sort_ab_cell, pol_sort_ab_nuc,
                        pol_sort_ab_cyto, pol_sort_mt_cell,
                        pol_sort_well_plate_imgnb):
    '''Calculate overall variation of protein staining intensity in single cells'''
    var_cell, var_nuc, var_cyto, var_mt = [], [], [], [
    ]  # mean intensity variances per antibody
    cv_cell, cv_nuc, cv_cyto, cv_mt = [], [], [], []
    gini_cell, gini_nuc, gini_cyto, gini_mt = [], [], [], [
    ]  # mean intensity ginis per antibody
    mean_mean_cell, mean_mean_nuc, mean_mean_cyto, mean_mean_mt = [], [], [], [
    ]  # mean mean-intensity
    cell_counts = []

    wpi_img = []
    gini_cell_img, gini_nuc_img, gini_cyto_img, gini_mt_img = [], [], [], [
    ]  # mean intensity g per field of view
    var_cell_img, var_nuc_img, var_cyto_img, var_mt_img = [], [], [], [
    ]  # mean intensity variances per field of view
    cv_cell_img, cv_nuc_img, cv_cyto_img, cv_mt_img = [], [], [], []

    # The variance needs to be calculated separately for each well because they all have different numbers of cells
    for well in u_well_plates:
        curr_well_inds = pol_sort_well_plate == well
        curr_ab_cell = pol_sort_ab_cell[
            curr_well_inds] if not use_log else np.log10(
                pol_sort_ab_cell[curr_well_inds])
        curr_ab_nuc = pol_sort_ab_nuc[
            curr_well_inds] if not use_log else np.log10(
                pol_sort_ab_nuc[curr_well_inds])
        curr_ab_cyto = pol_sort_ab_cyto[
            curr_well_inds] if not use_log else np.log10(
                pol_sort_ab_cyto[curr_well_inds])
        curr_mt_cell = pol_sort_mt_cell[
            curr_well_inds] if not use_log else np.log10(
                pol_sort_mt_cell[curr_well_inds])

        cell_counts.append(len(curr_ab_cell))

        var_cell.append(np.var(curr_ab_cell))
        var_nuc.append(np.var(curr_ab_nuc))
        var_cyto.append(np.var(curr_ab_cyto))
        var_mt.append(np.var(curr_mt_cell))

        cv_cell.append(scipy.stats.variation(curr_ab_cell))
        cv_nuc.append(scipy.stats.variation(curr_ab_nuc))
        cv_cyto.append(scipy.stats.variation(curr_ab_cyto))
        cv_mt.append(scipy.stats.variation(curr_mt_cell))

        gini_cell.append(utils.gini(curr_ab_cell))
        gini_nuc.append(utils.gini(curr_ab_nuc))
        gini_cyto.append(utils.gini(curr_ab_cyto))
        gini_mt.append(utils.gini(curr_mt_cell))

        # Save the mean mean intensities
        mean_mean_cell.append(np.mean(curr_ab_cell))
        mean_mean_nuc.append(np.mean(curr_ab_nuc))
        mean_mean_cyto.append(np.mean(curr_ab_cyto))
        mean_mean_mt.append(np.mean(curr_mt_cell))

        curr_well_plate_imgnbs = pol_sort_well_plate_imgnb[curr_well_inds]
        curr_wpi_img = []
        curr_gini_cell_img, curr_gini_nuc_img, curr_gini_cyto_img, curr_gini_mt_img = [],[],[],[] # mean intensity variances per field of view
        curr_var_cell_img, curr_var_nuc_img, curr_var_cyto_img, curr_var_mt_img = [],[],[],[] # mean intensity variances per field of view
        curr_cv_cell_img, curr_cv_nuc_img, curr_cv_cyto_img, curr_cv_mt_img = [], [], [], []
        for wpi in np.unique(curr_well_plate_imgnbs):
            curr_wpis = pol_sort_well_plate_imgnb == wpi
            curr_ab_cell = pol_sort_ab_cell[
                curr_wpis] if not use_log else np.log10(
                    pol_sort_ab_cell[curr_wpis])
            curr_ab_nuc = pol_sort_ab_nuc[
                curr_wpis] if not use_log else np.log10(
                    pol_sort_ab_nuc[curr_wpis])
            curr_ab_cyto = pol_sort_ab_cyto[
                curr_wpis] if not use_log else np.log10(
                    pol_sort_ab_cyto[curr_wpis])
            curr_mt_cell = pol_sort_mt_cell[
                curr_wpis] if not use_log else np.log10(
                    pol_sort_mt_cell[curr_wpis])

            curr_wpi_img.append(wpi)

            curr_var_cell_img.append(np.var(curr_ab_cell))
            curr_var_nuc_img.append(np.var(curr_ab_nuc))
            curr_var_cyto_img.append(np.var(curr_ab_cyto))
            curr_var_mt_img.append(np.var(curr_mt_cell))

            curr_gini_cell_img.append(utils.gini(curr_ab_cell))
            curr_gini_nuc_img.append(utils.gini(curr_ab_nuc))
            curr_gini_cyto_img.append(utils.gini(curr_ab_cyto))
            curr_gini_mt_img.append(utils.gini(curr_mt_cell))

            curr_cv_cell_img.append(scipy.stats.variation(curr_ab_cell))
            curr_cv_nuc_img.append(scipy.stats.variation(curr_ab_nuc))
            curr_cv_cyto_img.append(scipy.stats.variation(curr_ab_cyto))
            curr_cv_mt_img.append(scipy.stats.variation(curr_mt_cell))

        wpi_img.append(curr_wpi_img)
        var_cell_img.append(curr_var_cell_img)
        var_nuc_img.append(curr_var_nuc_img)
        var_cyto_img.append(curr_var_cyto_img)
        var_mt_img.append(curr_var_mt_img)

        gini_cell_img.append(curr_gini_cell_img)
        gini_nuc_img.append(curr_gini_nuc_img)
        gini_cyto_img.append(curr_gini_cyto_img)
        gini_mt_img.append(curr_gini_mt_img)

        cv_cell_img.append(curr_cv_cell_img)
        cv_nuc_img.append(curr_cv_nuc_img)
        cv_cyto_img.append(curr_cv_cyto_img)
        cv_mt_img.append(curr_cv_mt_img)

    print(
        "Plotting average intensities of proteins and microtubules by batch.")
    plot_average_intensities_by_batch(u_well_plates, mean_mean_cell,
                                      mean_mean_nuc, mean_mean_cyto,
                                      mean_mean_mt, wp_iscell, wp_isnuc,
                                      wp_iscyto)

    print("Making general plots for variance, CV, and gini by compartment")
    var_cell, var_nuc, var_cyto, var_mt = np.array(var_cell), np.array(
        var_nuc), np.array(var_cyto), np.array(var_mt)
    gini_cell, gini_nuc, gini_cyto, gini_mt = np.array(gini_cell), np.array(
        gini_nuc), np.array(gini_cyto), np.array(gini_mt)
    cv_cell, cv_nuc, cv_cyto, cv_mt = np.array(cv_cell), np.array(
        cv_nuc), np.array(cv_cyto), np.array(cv_mt)
    utils.general_boxplot(
        (var_cell, var_cyto, var_nuc, var_mt),
        ("var_cell", "var_cyto", "var_nuc", "var_mt"), "Metacompartment",
        f"Variance using {'log' if use_log else 'natural'} intensity values",
        "", True, "figures/VarianceBoxplot.png")
    utils.general_boxplot(
        (cv_cell, cv_cyto, cv_nuc, cv_mt),
        ("cv_cell", "cv_cyto", "cv_nuc", "cv_mt"), "Metacompartment",
        f"Coeff. of Var. using {'log' if use_log else 'natural'} intensity values",
        "", True, "figures/CVBoxplot.png")
    utils.general_boxplot(
        (gini_cell, gini_cyto, gini_nuc, gini_mt),
        ("gini_cell", "gini_cyto", "gini_nuc", "gini_mt"), "Metacompartment",
        f"Gini using {'log' if use_log else 'natural'} intensity values", "",
        True, "figures/GiniBoxplot.png")

    print(
        "Making general plots for variance, CV, and gini in the compartment the protein localizes to"
    )
    mean_mean_comp = utils.values_comp(mean_mean_cell, mean_mean_nuc,
                                       mean_mean_cyto, wp_iscell, wp_isnuc,
                                       wp_iscyto)
    cv_comp = utils.values_comp(cv_cell, cv_nuc, cv_cyto, wp_iscell, wp_isnuc,
                                wp_iscyto)
    gini_comp = utils.values_comp(gini_cell, gini_nuc, gini_cyto, wp_iscell,
                                  wp_isnuc, wp_iscyto)
    var_comp = utils.values_comp(var_cell, var_nuc, var_cyto, wp_iscell,
                                 wp_isnuc, wp_iscyto)
    utils.general_scatter(var_comp, var_mt, "var_comp", "var_mt",
                          "figures/var_comp_mt.png")
    utils.general_scatter(cv_comp, cv_mt, "cv_comp", "cv_mt",
                          "figures/cv_comp_mt.png")
    utils.general_scatter(gini_comp, gini_mt, "gini_comp", "gini_mt",
                          "figures/gini_comp_mt.png")
    utils.general_scatter(var_comp, mean_mean_comp, "var_comp",
                          f"{'log10' if use_log else 'natural'} intensity",
                          "figures/VarianceVsIntensityComp.png")

    print("Comparing image to sample variance")
    var_comp_img = utils.values_comp(var_cell_img, var_nuc_img, var_cyto_img,
                                     wp_iscell, wp_isnuc, wp_iscyto)
    gini_comp_img = utils.values_comp(gini_cell_img, gini_nuc_img,
                                      gini_cyto_img, wp_iscell, wp_isnuc,
                                      wp_iscyto)
    cv_comp_img = utils.values_comp(cv_cell_img, cv_nuc_img, cv_cyto_img,
                                    wp_iscell, wp_isnuc, wp_iscyto)
    utils.general_scatter(
        np.concatenate([[var_comp[i]] * len(vvv)
                        for i, vvv in enumerate(var_comp_img)]),
        np.concatenate(var_comp_img), "variance within compartment",
        "variance for each image", "figures/VarianceByImage.png")
    utils.general_scatter(
        np.concatenate([[gini_comp[i]] * len(vvv)
                        for i, vvv in enumerate(gini_comp_img)]),
        np.concatenate(gini_comp_img), "gini within compartment",
        "gini for each image", "figures/GiniByImage.png")
    utils.general_scatter(
        np.concatenate([[cv_comp[i]] * len(vvv)
                        for i, vvv in enumerate(cv_comp_img)]),
        np.concatenate(cv_comp_img), "cv within compartment",
        "cv for each image", "figures/CVByImage.png")
    print(
        np.concatenate(wpi_img)[np.argmax(np.concatenate(var_comp_img))] +
        ": the image with the max variance")

    plt.hist(
        np.concatenate(
            [vvv / var_comp[i] for i, vvv in enumerate(var_comp_img)]))
    # plt.show()
    plt.close()
    high_var_img = np.concatenate(wpi_img)[np.concatenate(
        [vvv > 4 * var_comp[i] for i, vvv in enumerate(var_comp_img)])]
    print(
        f"{high_var_img}: the images with greater than 4x the variance of the whole sample"
    )

    norm_cv_img = np.concatenate(
        [vvv / cv_comp[i] for i, vvv in enumerate(cv_comp_img)])
    plt.hist(norm_cv_img)
    # plt.show()
    plt.close()
    cutoff = np.mean(norm_cv_img) + 3 * np.std(norm_cv_img)
    high_cv_img = np.concatenate(wpi_img)[norm_cv_img > cutoff]
    print(
        f"{high_cv_img}: the images with greater than 4x the variance of the whole sample"
    )

    np.intersect1d(high_var_img, high_cv_img)

    # Pickle and return main results
    utils.np_save_overwriting("output/pickles/mean_mean_comp.npy",
                              mean_mean_comp)
    utils.np_save_overwriting("output/pickles/cv_comp.npy", cv_comp)
    utils.np_save_overwriting("output/pickles/gini_comp.npy", gini_comp)
    utils.np_save_overwriting("output/pickles/var_comp.npy", var_comp)
    utils.np_save_overwriting("output/pickles/cv_cell.npy", cv_cell)
    utils.np_save_overwriting("output/pickles/gini_cell.npy", gini_cell)
    utils.np_save_overwriting("output/pickles/var_cell.npy", var_cell)

    return mean_mean_comp, var_comp, gini_comp, cv_comp, var_cell, gini_cell, cv_cell, var_mt, gini_mt, cv_mt
コード例 #4
0
def compare_peak_expression_prot_v_rna(adata, wp_ensg, ccd_comp, ccdtranscript,
                                       wp_max_pol, wp_max_pol_ccd,
                                       sorted_maxpol_array, max_moving_avg_pol,
                                       sorted_max_moving_avg_pol_ccd):
    '''Compare the time of peak expression of protein and RNA'''
    prot_ccd_ensg = list(wp_ensg[ccd_comp])
    rna_ccd_ensg = list(adata.var_names[ccdtranscript])
    both_ccd_ensg = np.intersect1d(prot_ccd_ensg, rna_ccd_ensg)
    both_prot_ccd_idx = np.array(
        [prot_ccd_ensg.index(ensg) for ensg in both_ccd_ensg])
    both_rna_ccd_idx = np.array(
        [rna_ccd_ensg.index(ensg) for ensg in both_ccd_ensg])
    insct_prot_max_pol_ccd = wp_max_pol_ccd[both_prot_ccd_idx]
    insct_rna_max_pol_ccd = sorted_max_moving_avg_pol_ccd[both_rna_ccd_idx]
    diff_max_pol = insct_prot_max_pol_ccd - insct_rna_max_pol_ccd

    #% Sanity check: double check that the names line up
    prot_names = np.array(prot_ccd_ensg)[both_prot_ccd_idx]
    rna_names = np.array(rna_ccd_ensg)[both_rna_ccd_idx]
    print(f"The name arrays are the same: {all(prot_names == rna_names)}")

    # Alluvial plot showing RNA-protein phase of peak expression for each genes
    peak_expression_alluvial(diff_max_pol, insct_rna_max_pol_ccd,
                             insct_prot_max_pol_ccd)
    # Histogram for peak expression
    utils.general_histogram(
        diff_max_pol * fucci.TOT_LEN,
        "Delay in peak protein expression from peak RNA expression, hrs",
        "Count of CCD Proteins", 0.5, "figures/DelayPeakProteinRNA.pdf")
    # Scatter for peak expression with colorbar for the delay
    peak_expression_delay_scatter(insct_rna_max_pol_ccd,
                                  insct_prot_max_pol_ccd, diff_max_pol)
    # Boxplot for delay of peak expression
    utils.general_boxplot((insct_prot_max_pol_ccd * fucci.TOT_LEN,
                           insct_rna_max_pol_ccd * fucci.TOT_LEN),
                          ("Protein", "RNA"), "", "Peak Expression, hrs", "",
                          True, "figures/DelayPeakProteinRNA_boxplot.png")

    print(f"Count of prot CCD genes: {len(prot_ccd_ensg)}")
    print(f"Count of CCD RNA genes: {len(rna_ccd_ensg)}")
    print(
        f"Count of intersection betweeen CCD prot and CCD RNA: {len(both_ccd_ensg)}"
    )
    print(
        f"Median delay of RNA and protein expression time for CCD proteins: {fucci.TOT_LEN * np.median(diff_max_pol)}"
    )
    print(
        f"Median RNA expression time for CCD proteins: {fucci.TOT_LEN * np.median(insct_rna_max_pol_ccd)}"
    )
    print(
        f"Median protein expression time for CCD proteins: {fucci.TOT_LEN * np.median(insct_prot_max_pol_ccd)}"
    )
    t, p = scipy.stats.kruskal(insct_rna_max_pol_ccd, insct_prot_max_pol_ccd)
    print(
        f"One-sided kruskal for median protein expression time higher than median RNA expression time: {2*p}"
    )
    t, p = scipy.stats.ttest_1samp(diff_max_pol, 0)
    print(
        f"One-sided, one-sample t-test for mean delay in protein expression larger than zero: {2*p}"
    )

    #% Output tables
    pd.DataFrame({
        "gene": wp_ensg,
        "max_pol_protein": wp_max_pol,
        "max_time_protein": wp_max_pol * fucci.TOT_LEN
    }).to_csv("output/max_pol_protein.csv", index=False)
    pd.DataFrame({
        "gene": adata.var_names,
        "max_pol_rna": max_moving_avg_pol,
        "max_time_rna": max_moving_avg_pol * fucci.TOT_LEN
    }).to_csv("output/max_pol_rna.csv", index=False)
    pd.DataFrame({
        "gene": both_ccd_ensg,
        "insct_prot_max_pol_ccd": insct_prot_max_pol_ccd,
        "insct_rna_max_pol_ccd": insct_rna_max_pol_ccd,
        "diff_max_pol": diff_max_pol
    }).to_csv("output/diff_max_pol.csv", index=False)

    #% Figures of merit
    peaked_after_g1_prot = sorted_maxpol_array * fucci.TOT_LEN > fucci.G1_LEN
    wp_ensg_counts_ccd = np.array([
        sum([eeee == ensg for eeee in wp_ensg[ccd_comp]])
        for ensg in wp_ensg[ccd_comp]
    ])
    with open("output/figuresofmerit.txt", "a") as file:
        fom = "--- temporal delay\n\n"
        fom += f"significant delay in peak protein expression compared to transcript expression, {fucci.TOT_LEN * np.median(diff_max_pol)} hours on average" + "\n\n"
        fom += f"G1 is the longest period of the cell cycle, in which the majority of RNAs ({100 * sum(sorted_max_moving_avg_pol_ccd * fucci.TOT_LEN <=fucci. G1_LEN) / len(sorted_max_moving_avg_pol_ccd)}%) peak in expression" + "\n\n"
        fom += f"However, the majority ({100 * sum(peaked_after_g1_prot) / len(np.unique(wp_ensg[ccd_comp]))}%) of the proteins peaked towards the end of the cell cycle corresponding to the S&G2 phases" + "\n\n"
        fom += f"The delay between peak RNA and protein expression for the 50 CCD proteins that also had CCD transcripts was {fucci.TOT_LEN * np.median(diff_max_pol)} hrs on average " + "\n\n"
        fom += f"this delay indicates that it may take a little less than the same amount of time ({12 - fucci.TOT_LEN * np.median(diff_max_pol)} hrs) to produce a target metabolite after peak expression of an enzyme." + "\n\n"
        fom += f"" + "\n\n"
        fom += f"" + "\n\n"
        fom += f"" + "\n\n"
        print(fom)
        file.write(fom)