Esempio n. 1
0
def compute_degree_of_clustering(genes_list, analysis_repo, molecule_type):
    gene2_degree_of_clustering = {}
    gene2median_degree_of_clustering = {}
    gene2error_degree_of_clustering = {}
    gene2confidence_interval = {}
    degrees_of_clustering = []

    for gene in genes_list:
        image_set = ImageSet(analysis_repo,
                             ['{0}/{1}/'.format(molecule_type, gene)])
        d_of_c = np.array(image_set.compute_degree_of_clustering())
        degrees_of_clustering.append(d_of_c)

    for gene, degree_of_clustering in zip(genes_list, degrees_of_clustering):
        degree_of_clustering = np.log(degree_of_clustering)
        gene2_degree_of_clustering[gene] = degree_of_clustering
        gene2median_degree_of_clustering[gene] = np.median(
            degree_of_clustering)
        # Standard error and CI computation
        gene2error_degree_of_clustering[gene] = helpers.sem(
            degree_of_clustering, factor=0)
        lower, higher = helpers.median_confidence_interval(
            degree_of_clustering)
        gene2confidence_interval[gene] = [lower, higher]

    return gene2_degree_of_clustering, gene2median_degree_of_clustering, gene2error_degree_of_clustering, gene2confidence_interval
Esempio n. 2
0
def plot_dynamic_barplot(analysis_repo):
    '''
    Formats the data and calls the plotting function
    '''
    plot_colors = constants.analysis_config['PLOT_COLORS']

    # paired mRNA-protein barplots, so we go through proteins (we have less proteins than mRNA)
    tp_mrna = constants.dataset_config['TIMEPOINTS_MRNA']
    tp_proteins = constants.dataset_config['TIMEPOINTS_PROTEIN']
    all_timepoints = np.sort(list(set(tp_mrna) | set(tp_proteins)))
    for i, gene in enumerate(constants.analysis_config['PROTEINS']):
        df = pd.DataFrame(
            columns=["Molecule", "Timepoint", "d_of_c", "error", "CI"])
        for molecule, timepoints in zip(["mrna", "protein"],
                                        [tp_mrna, tp_proteins]):
            for j, tp in enumerate(all_timepoints):
                if tp not in timepoints:
                    df = df.append(
                        {
                            "Molecule": molecule,
                            "Timepoint": tp,
                            "error": 0,
                            "CI": [0, 0],
                            "d_of_c": 0
                        },
                        ignore_index=True)
                    continue
                image_set = ImageSet(
                    analysis_repo, ["{0}/{1}/{2}/".format(molecule, gene, tp)])
                degree_of_clustering = np.log(
                    image_set.compute_degree_of_clustering(
                    ))  # * factor[gene][molecule][j]
                err = helpers.sem(degree_of_clustering, factor=6)
                lower, higher = helpers.median_confidence_interval(
                    degree_of_clustering)
                df = df.append(
                    {
                        "Molecule": molecule,
                        "Timepoint": tp,
                        "error": err,
                        "CI": [lower, higher],
                        "d_of_c": degree_of_clustering
                    },
                    ignore_index=True)
        df = df.sort_values('Timepoint')
        df = df.groupby('Molecule').apply(mean_column)
        my_pal = {
            "mrna": str(plot_colors[i]),
            "protein": str(color_variant(plot_colors[i], +80))
        }
        tgt_image_name = constants.analysis_config[
            'DYNAMIC_FIGURE_NAME_FORMAT'].format(gene=gene)
        tgt_fp = pathlib.Path(
            constants.analysis_config['FIGURE_OUTPUT_PATH'].format(
                root_dir=global_root_dir), tgt_image_name)
        plot.bar_profile_median_timepoints(df,
                                           palette=my_pal,
                                           figname=tgt_fp,
                                           fixed_yscale=15)