コード例 #1
0
ファイル: cytoplasmic_spread.py プロジェクト: cbib/dypfish
def build_cytoplasmic_statistics(analysis_repo, statistics_type, molecule_type,
                                 genes, keyorder):
    gene2stat, gene2median, gene2error, gene2confidence_interval = {}, {}, {}, {}

    for gene in genes:
        logger.info("Running {} cytoplasmic {} analysis for {}", molecule_type,
                    statistics_type, gene)
        image_set = ImageSet(analysis_repo,
                             ['{0}/{1}/'.format(molecule_type, gene)])
        if statistics_type == 'centrality':
            if molecule_type == 'mrna':
                gene2stat[
                    gene] = image_set.compute_cytoplasmic_spots_centrality()
            else:
                gene2stat[
                    gene] = image_set.compute_cytoplasmic_intensities_centrality(
                    )
        if statistics_type == 'spread':
            if molecule_type == 'mrna':
                gene2stat[gene] = image_set.compute_cytoplasmic_spots_spread()
            else:
                gene2stat[
                    gene] = image_set.compute_intensities_cytoplasmic_spread()
        if statistics_type == 'centrality':
            gene2median[gene] = np.mean(gene2stat[gene])
            gene2error[gene] = helpers.sem(gene2stat[gene], factor=0)
            lower, higher = helpers.median_confidence_interval(gene2stat[gene])
            gene2confidence_interval[gene] = [lower, higher]

    if statistics_type == 'spread':
        max_entropy = np.max([np.max(gene2stat[k]) for k in gene2stat.keys()])
        for gene in gene2stat.keys():
            gene2stat[gene] = gene2stat[gene] / max_entropy
            gene2median[gene] = np.median(gene2stat[gene])
            gene2error[gene] = helpers.sem(gene2stat[gene], factor=0)
            lower, higher = helpers.median_confidence_interval(gene2stat[gene])
            gene2confidence_interval[gene] = [lower, higher]

    gene2stat = collections.OrderedDict(
        sorted(gene2stat.items(), key=lambda i: keyorder.index(i[0])))
    gene2median = collections.OrderedDict(
        sorted(gene2median.items(), key=lambda i: keyorder.index(i[0])))
    gene2error = collections.OrderedDict(
        sorted(gene2error.items(), key=lambda i: keyorder.index(i[0])))
    gene2confidence_interval = collections.OrderedDict(
        sorted(gene2confidence_interval.items(),
               key=lambda i: keyorder.index(i[0])))
    return gene2median, gene2stat, gene2error, gene2confidence_interval
コード例 #2
0
ファイル: degree_of_clustering.py プロジェクト: cbib/dypfish
def compute_degree_of_clustering(genes_list, analysis_repo, molecule_type):
    gene2_degree_of_clustering = {}
    gene2median_degree_of_clustering = {}
    gene2error_degree_of_clustering = {}
    gene2confidence_interval = {}
    degrees_of_clustering = []

    for gene in genes_list:
        image_set = ImageSet(analysis_repo,
                             ['{0}/{1}/'.format(molecule_type, gene)])
        d_of_c = np.array(image_set.compute_degree_of_clustering())
        degrees_of_clustering.append(d_of_c)

    for gene, degree_of_clustering in zip(genes_list, degrees_of_clustering):
        degree_of_clustering = np.log(degree_of_clustering)
        gene2_degree_of_clustering[gene] = degree_of_clustering
        gene2median_degree_of_clustering[gene] = np.median(
            degree_of_clustering)
        # Standard error and CI computation
        gene2error_degree_of_clustering[gene] = helpers.sem(
            degree_of_clustering, factor=0)
        lower, higher = helpers.median_confidence_interval(
            degree_of_clustering)
        gene2confidence_interval[gene] = [lower, higher]

    return gene2_degree_of_clustering, gene2median_degree_of_clustering, gene2error_degree_of_clustering, gene2confidence_interval
コード例 #3
0
def plot_dynamic_barplot(analysis_repo):
    '''
    Formats the data and calls the plotting function
    '''
    plot_colors = constants.analysis_config['PLOT_COLORS']

    # paired mRNA-protein barplots, so we go through proteins (we have less proteins than mRNA)
    tp_mrna = constants.dataset_config['TIMEPOINTS_MRNA']
    tp_proteins = constants.dataset_config['TIMEPOINTS_PROTEIN']
    all_timepoints = np.sort(list(set(tp_mrna) | set(tp_proteins)))
    for i, gene in enumerate(constants.analysis_config['PROTEINS']):
        df = pd.DataFrame(
            columns=["Molecule", "Timepoint", "d_of_c", "error", "CI"])
        for molecule, timepoints in zip(["mrna", "protein"],
                                        [tp_mrna, tp_proteins]):
            for j, tp in enumerate(all_timepoints):
                if tp not in timepoints:
                    df = df.append(
                        {
                            "Molecule": molecule,
                            "Timepoint": tp,
                            "error": 0,
                            "CI": [0, 0],
                            "d_of_c": 0
                        },
                        ignore_index=True)
                    continue
                image_set = ImageSet(
                    analysis_repo, ["{0}/{1}/{2}/".format(molecule, gene, tp)])
                degree_of_clustering = np.log(
                    image_set.compute_degree_of_clustering(
                    ))  # * factor[gene][molecule][j]
                err = helpers.sem(degree_of_clustering, factor=6)
                lower, higher = helpers.median_confidence_interval(
                    degree_of_clustering)
                df = df.append(
                    {
                        "Molecule": molecule,
                        "Timepoint": tp,
                        "error": err,
                        "CI": [lower, higher],
                        "d_of_c": degree_of_clustering
                    },
                    ignore_index=True)
        df = df.sort_values('Timepoint')
        df = df.groupby('Molecule').apply(mean_column)
        my_pal = {
            "mrna": str(plot_colors[i]),
            "protein": str(color_variant(plot_colors[i], +80))
        }
        tgt_image_name = constants.analysis_config[
            'DYNAMIC_FIGURE_NAME_FORMAT'].format(gene=gene)
        tgt_fp = pathlib.Path(
            constants.analysis_config['FIGURE_OUTPUT_PATH'].format(
                root_dir=global_root_dir), tgt_image_name)
        plot.bar_profile_median_timepoints(df,
                                           palette=my_pal,
                                           figname=tgt_fp,
                                           fixed_yscale=15)
コード例 #4
0
def mrna_cytoplasmic_total_count(analysis_repo, keyorder):
    gene2image_set = {}
    gene2cyto_count = {}
    gene2median_cyto_count = {}
    gene2error = {}
    gene2confidence_interval = {}

    for gene in constants.analysis_config['MRNA_GENES']:
        logger.info("Running mrna cytoplasmic total count analysis for {}", gene)
        gene2image_set[gene] = ImageSet(analysis_repo, ['mrna/%s/' % gene])
        gene2cyto_count[gene] = gene2image_set[gene].compute_cytoplasmic_spots_counts()
        gene2median_cyto_count[gene] = np.median(gene2cyto_count[gene])
        gene2error[gene] = helpers.sem(gene2cyto_count[gene], factor=0)
        lower, higher = helpers.median_confidence_interval(gene2cyto_count[gene])
        gene2confidence_interval[gene] = [lower, higher]

    # generate bar plot image

    gene2median_cyto_count = collections.OrderedDict(sorted(gene2median_cyto_count.items(), key=lambda i: keyorder.index(i[0])))
    gene2error = collections.OrderedDict(sorted(gene2error.items(), key=lambda i: keyorder.index(i[0])))
    gene2confidence_interval = collections.OrderedDict(sorted(gene2confidence_interval.items(), key=lambda i: keyorder.index(i[0])))
    xlabels = constants.analysis_config['MRNA_GENES_LABEL']

    tgt_image_name = constants.analysis_config['FIGURE_NAME_FORMAT'].format(molecule_type="mrna")
    tgt_fp = pathlib.Path(constants.analysis_config['FIGURE_OUTPUT_PATH'].format(root_dir=global_root_dir),
                          tgt_image_name)
    plot.bar_profile_median(gene2median_cyto_count,
                            gene2error.values(),
                            'mrna',
                            xlabels,
                            tgt_fp,
                            gene2confidence_interval,
                            annot=False,
                            data_to_annot=gene2cyto_count
                            )

    # generate violin plot image
    tgt_image_name = constants.analysis_config['FIGURE_NAME_VIOLIN_FORMAT'].format(molecule_type="mrna")
    tgt_fp = pathlib.Path(constants.analysis_config['FIGURE_OUTPUT_PATH'].format(root_dir=global_root_dir),
                          tgt_image_name)
    plot.violin_profile(gene2cyto_count, tgt_fp, xlabels, rotation=0, annot=False)
コード例 #5
0
ファイル: test_helpers.py プロジェクト: cbib/dypfish
 def test_median_confidence_interval(self):
     a = np.array([24, 38, 61, 22, 16, 57, 31, 29, 35])
     l, h = helpers.median_confidence_interval(a, cutoff=0.8)
     self.assertEqual(l, 29)
     self.assertEqual(h, 57)