def compute_degree_of_clustering(genes_list, analysis_repo, molecule_type): gene2_degree_of_clustering = {} gene2median_degree_of_clustering = {} gene2error_degree_of_clustering = {} gene2confidence_interval = {} degrees_of_clustering = [] for gene in genes_list: image_set = ImageSet(analysis_repo, ['{0}/{1}/'.format(molecule_type, gene)]) d_of_c = np.array(image_set.compute_degree_of_clustering()) degrees_of_clustering.append(d_of_c) for gene, degree_of_clustering in zip(genes_list, degrees_of_clustering): degree_of_clustering = np.log(degree_of_clustering) gene2_degree_of_clustering[gene] = degree_of_clustering gene2median_degree_of_clustering[gene] = np.median( degree_of_clustering) # Standard error and CI computation gene2error_degree_of_clustering[gene] = helpers.sem( degree_of_clustering, factor=0) lower, higher = helpers.median_confidence_interval( degree_of_clustering) gene2confidence_interval[gene] = [lower, higher] return gene2_degree_of_clustering, gene2median_degree_of_clustering, gene2error_degree_of_clustering, gene2confidence_interval
def plot_dynamic_barplot(analysis_repo): ''' Formats the data and calls the plotting function ''' plot_colors = constants.analysis_config['PLOT_COLORS'] # paired mRNA-protein barplots, so we go through proteins (we have less proteins than mRNA) tp_mrna = constants.dataset_config['TIMEPOINTS_MRNA'] tp_proteins = constants.dataset_config['TIMEPOINTS_PROTEIN'] all_timepoints = np.sort(list(set(tp_mrna) | set(tp_proteins))) for i, gene in enumerate(constants.analysis_config['PROTEINS']): df = pd.DataFrame( columns=["Molecule", "Timepoint", "d_of_c", "error", "CI"]) for molecule, timepoints in zip(["mrna", "protein"], [tp_mrna, tp_proteins]): for j, tp in enumerate(all_timepoints): if tp not in timepoints: df = df.append( { "Molecule": molecule, "Timepoint": tp, "error": 0, "CI": [0, 0], "d_of_c": 0 }, ignore_index=True) continue image_set = ImageSet( analysis_repo, ["{0}/{1}/{2}/".format(molecule, gene, tp)]) degree_of_clustering = np.log( image_set.compute_degree_of_clustering( )) # * factor[gene][molecule][j] err = helpers.sem(degree_of_clustering, factor=6) lower, higher = helpers.median_confidence_interval( degree_of_clustering) df = df.append( { "Molecule": molecule, "Timepoint": tp, "error": err, "CI": [lower, higher], "d_of_c": degree_of_clustering }, ignore_index=True) df = df.sort_values('Timepoint') df = df.groupby('Molecule').apply(mean_column) my_pal = { "mrna": str(plot_colors[i]), "protein": str(color_variant(plot_colors[i], +80)) } tgt_image_name = constants.analysis_config[ 'DYNAMIC_FIGURE_NAME_FORMAT'].format(gene=gene) tgt_fp = pathlib.Path( constants.analysis_config['FIGURE_OUTPUT_PATH'].format( root_dir=global_root_dir), tgt_image_name) plot.bar_profile_median_timepoints(df, palette=my_pal, figname=tgt_fp, fixed_yscale=15)