def plot_hypothesis(hypothesis, file_name): bin_types = list(hypothesis) scores = list(hypothesis[bin_types[0]]) plots = [] for bin_type, score in product(bin_types, scores): mean_name = "Mean: " + score df = pd.DataFrame(columns=["Bin", "Dataset", mean_name]) df2 = pd.DataFrame(columns=["Bin", "t-statistic", 'p-value']) for bin_ in hypothesis[bin_type][score]: h = list(bin_.values())[0] bin_name = list(bin_)[0] parameter1 = h.p1 parameter2 = h.p2 mean1 = h.mean1 mean2 = h.mean2 row1 = { "Bin": bin_name, 'Dataset': parameter1, mean_name: str(round(float(mean1), 3)) } row2 = { "Bin": bin_name, 'Dataset': parameter2, mean_name: str(round(float(mean2), 3)) } df = df.append(row1, ignore_index=True) df = df.append(row2, ignore_index=True) t_statistic = h.t p_value = h.p row = { "Bin": bin_name, 't-statistic': str(round(t_statistic, 3)), 'p-value': str(p_value), '95% Confidence': "Significant" if p_value <= 0.05 else "Not Significant" } df2 = df2.append(row, ignore_index=True) plots.append( (ggplot(df, aes(x='Bin', y=mean_name, fill='Dataset')) + geom_col(stat='identity', position='dodge') + ggtitle("{0} bin distribution| {1}\nBin's Average Scores".format( bin_type, score)))) plots.append( (ggplot(df2, aes(x='Bin', y='p-value', fill='95% Confidence')) + geom_col(stat='identity', width=0.2) + ggtitle( "{0} bin distribution| {1}\nBin's 95% Confidence Level Test". format(bin_type, score)) + scale_fill_manual(values={ 'Significant': "#214517", 'Not Significant': '#c62f2d' }))) save_as_pdf_pages(plots, file_name) return
def plots_by_site(self, as_pdf=True, filename="plot/figs/All_sites_by_plot.pdf"): self.opts["plt_group_by"] = ["site", "plot", "julian", "type"] if self.plot_data is None: self.create_plot_data() sites = self.plot_data["site"].unique() # Update plot options plot_options = self.get_plot_options() plot_options["colour"] = "plot" plot_options["facet_by"] = "plot" plots = [self.plot_plots(site, plot_options, as_pdf) for site in sites] plots = [plot for plot in plots if plot is not None] if as_pdf: save_as_pdf_pages(plots, filename)
def save_as_pdf(plot: p9.ggplot, filename: str = None, path: str = None, dpi: int = None, verbose: bool = False) -> None: """Save a plotnine ggplot as pdf Parameters ---------- plot : p9.ggplot The plot to save filename : str, optional (default: None) Filename to write to. If None, a name is generated. path : str, optional (default: None) Path to save to. If None, saves to "out". dpi : int, optional (default: None) DPI of saved plot. If None, set to 300. verbose : bool, optional (default: False) Print progress report. Returns ------- None """ if path is None: path = "out" if filename is None: dateTimeObj = datetime.now() filename = "{}/cellex_plot_{}.pdf".format( path, dateTimeObj.strftime("%y%m%d_%H%M%S")) if dpi is None: dpi = 300 os.makedirs(path, exist_ok=True) # make dir if it doesn't already exist p9.save_as_pdf_pages(plots=[plot], filename=(filename), dpi=dpi) if verbose: print("Saved: {}".format(filename))
def ologram_merge_stats(inputfiles=None, pdf_width=None, pdf_height=None, output=None, labels=None): # ------------------------------------------------------------------------- # Check user provided labels # ------------------------------------------------------------------------- if labels is not None: labels = labels.split(",") for elmt in labels: if not re.search("^[A-Za-z0-9_]+$", elmt): message( "Only alphanumeric characters and '_' allowed for --more-bed-labels", type="ERROR") if len(labels) != len(inputfiles): message("--labels: the number of labels should be" " the same as the number of input files ", type="ERROR") if len(labels) != len(set(labels)): message("Redundant labels not allowed.", type="ERROR") # ------------------------------------------------------------------------- # Loop over input files # ------------------------------------------------------------------------- df_list = list() df_label = list() for pos, infile in enumerate(inputfiles): message("Reading file : " + infile.name) # Read the dataset into a temporay dataframe df_tmp = pd.read_csv(infile, sep='\t', header=0, index_col=None) # Change name of 'feature_type' column. df_tmp = df_tmp.rename(index=str, columns={"feature_type": "Feature"}) # Assign the name of the dataset to a new column if labels is None: file_short_name = os.path.basename(os.path.normpath(os.path.dirname(infile.name))) df_label += [file_short_name] else: file_short_name = labels[pos] df_label += [labels[pos]] df_tmp = df_tmp.assign(**{"dataset": [file_short_name] * df_tmp.shape[0]}) # Pval set to 0 or -1 are changed to 1e-320 and NaN respectively df_tmp.loc[df_tmp['summed_bp_overlaps_pvalue'] == 0, 'summed_bp_overlaps_pvalue'] = 1e-320 df_tmp.loc[df_tmp['summed_bp_overlaps_pvalue'] == -1, 'summed_bp_overlaps_pvalue'] = np.nan # Compute -log10(pval) df_tmp = df_tmp.assign(**{"-log_10(pval)": -np.log10(df_tmp.summed_bp_overlaps_pvalue)}) # Which p-values are signifcant ? # TODO: For now, draws all p-values. Add Benjamini-Hochberg correction, and distinguish between NaN and 0. df_tmp = df_tmp.assign(**{"pval_signif": df_tmp.summed_bp_overlaps_pvalue > 0}) # Add the df to the list to be subsequently merged df_list += [df_tmp] if len(set(df_label)) < len(df_label): message('Enclosing directories are ambiguous and cannot be used as labels. You may use "--labels".', type="ERROR") # ------------------------------------------------------------------------- # Concatenate dataframes (row bind) # ------------------------------------------------------------------------- message("Merging dataframes.") df_merged = pd.concat(df_list, axis=0) # ------------------------------------------------------------------------- # Plotting # ------------------------------------------------------------------------- message("Plotting") my_plot = ggplot(data=df_merged, mapping=aes(y='Feature', x='dataset')) my_plot += geom_tile(aes(fill = 'summed_bp_overlaps_log2_fold_change')) my_plot += scale_fill_gradient2() my_plot += labs(fill = "log2(fold change) for summed bp overlaps") # Points for p-val. Must be after geom_tile() my_plot += geom_point(data = df_merged.loc[df_merged['pval_signif']], mapping = aes(x='dataset',y='Feature',color = '-log_10(pval)'), size=4, shape ='D', inherit_aes = False) my_plot += scale_color_gradientn(colors = ["#160E00","#FFB025","#FFE7BD"]) my_plot += labs(color = "-log10(p-value)") # Theming my_plot += theme_bw() my_plot += theme(panel_grid_major=element_blank(), axis_text_x=element_text(rotation=90), panel_border=element_blank(), axis_ticks=element_blank()) # ------------------------------------------------------------------------- # Saving # ------------------------------------------------------------------------- message("Saving") nb_ft = len(list(df_merged['Feature'].unique())) nb_datasets = len(list(df_merged['dataset'].unique())) if pdf_width is None: panel_width = 0.6 pdf_width = panel_width * nb_datasets if pdf_width > 100: pdf_width = 100 message("Setting --pdf-width to 100 (limit)") if pdf_height is None: panel_height = 0.6 pdf_height = panel_height * nb_ft if pdf_height > 500: pdf_height = 500 message("Setting --pdf-height to 500 (limit)") message("Page width set to " + str(pdf_width)) message("Page height set to " + str(pdf_height)) figsize = (pdf_width, pdf_height) # ------------------------------------------------------------------------- # Turn warning off. Both pandas and plotnine use warnings for deprecated # functions. I need to turn they off although I'm not really satisfied with # this solution... # ------------------------------------------------------------------------- def fxn(): warnings.warn("deprecated", DeprecationWarning) # ------------------------------------------------------------------------- # Saving # ------------------------------------------------------------------------- with warnings.catch_warnings(): warnings.simplefilter("ignore") fxn() message("Saving diagram to file : " + output.name) message("Be patient. This may be long for large datasets.") # NOTE : We must manually specify figure size with save_as_pdf_pages save_as_pdf_pages(filename=output.name, plots=[my_plot + theme(figure_size=figsize)], width=pdf_width, height=pdf_height)
}).reset_index().astype({"tag": "category"})) plt = ggplot( data=tags_summary, mapping=aes( x="tag", y="n_tags", ), # "factor(species, ordered=False)", ) plt = ( plt + geom_bar( stat="identity", show_legend=True, position=position_dodge()) + xlab("Species") + ylab("Number of annotations") + geom_text(mapping=aes(label="n_tags"), position=position_dodge(width=0.9)) + theme_classic() + theme( axis_text_x=element_text( angle=90, vjust=1, hjust=1, margin={"r": -30}), figure_size=(20, 8), ) + ggtitle("_".join([database["name"], db_type, "tag_species.png"]) + "(n = " + str(tag_df.shape[0]) + ")") # + scale_x_discrete(limits=SPECIES_LIST, labels=xlabels) ) plots.append(plt) # plt.save( # ), width=10, height=8 # ) # print(tags_summary) save_as_pdf_pages(plots, "tag_summaries3_" + opts["class_type"] + ".pdf")
+p9.geom_abline(p9.aes(intercept=wyn['Intercept'],slope=wyn["btystdave"]))) print(fig1) figures.append(fig1) df2=beauty df2['y_pred']=results.predict() df2['residuals']=df2['courseevaluation']-df2['y_pred'] fig2_res=(p9.ggplot(p9.aes(x='btystdave',y='residuals'),data=beauty) +p9.geom_point()) print(fig2_res) figures.append(fig2_res) results = smf.ols("courseevaluation" +"~btystdavepos + btystdave", data=dane).fit() wyn=results.params fig2=(p9.ggplot(p9.aes(x="btystdave",y="courseevaluation"),data=dane) +p9.geom_jitter(width=0.1) +p9.geom_abline(p9.aes(intercept=wyn['Intercept'],slope=wyn["btystdave"]))) print(fig2) figures.append(fig2) df2=beauty df2['y_pred']=results.predict() df2['residuals']=df2['courseevaluation']-df2['y_pred'] fig2_res=(p9.ggplot(p9.aes(x='btystdave',y='residuals'),data=beauty) +p9.geom_point()) print(fig2_res) figures.append(fig2_res) save_as_pdf_pages(figures, filename="./zad2results/figures.pdf")
max_avg_dist_df, aes(x="Metric", y="Distance", fill="Dataset") ) + geom_bar(stat='identity', position='dodge') + labs( title= "Feature Selection {} | Maximum Classifier Score | Average Error Distance" .format(fs_method), ) + ylab("Average Error"))) plots.append((ggplot( max_max_dist_df, aes(x="Metric", y="Distance", fill="Dataset") ) + geom_bar(stat='identity', position='dodge') + labs( title= "Feature Selection {} | Maximum Classifier Score | Maximum Error Distance" .format(fs_method), ) + ylab("Max Error"))) plots.append((ggplot( mean_avg_dist_df, aes(x="Metric", y="Distance", fill="Dataset") ) + geom_bar(stat='identity', position='dodge') + labs( title= "Feature Selection {} | Average Classifier Score | Average Error Distance" .format(fs_method), ) + ylab("Avg Error"))) plots.append((ggplot( mean_max_dist_df, aes(x="Metric", y="Distance", fill="Dataset") ) + geom_bar(stat='identity', position='dodge') + labs( title= "Feature Selection {} | Average Classifier Score | Maximum Error Distance" .format(fs_method), ) + ylab("Max Error"))) path = Config.get_work_dir_path( os.path.join("paper", "hypothesis", "robustness.pdf")) save_as_pdf_pages(plots, path=path)
+ gg.ylab("UMAP Y") + gg.theme( legend_position="none", strip_text=gg.element_text(size=5), strip_background=gg.element_rect(colour="black", fill="#fdfff4"), axis_text=gg.element_text(size=6), axis_title=gg.element_text(size=7), title=gg.element_text(size=7), figure_size=(5.5, 3) ) ) plotlist.append(embedding_gg) output_file = pathlib.Path(output_dir[batch], f"{batch}_UMAPs.pdf") output_file.parent.mkdir(exist_ok=True) gg.save_as_pdf_pages(plotlist, output_file) # In[6]: batch = "2017_12_05_Batch2" plotlist = [] for norm_method in norm_methods: for color_type in [ "Metadata_broad_sample", "Metadata_Plate", "Metadata_cell_line", "Metadata_time_point" ]: output_file = pathlib.Path(output_dir[batch], f"{batch}_{norm_method}_colorby{color_type}.png") output_file.parent.mkdir(exist_ok=True)
fig2 = (p9.ggplot(p9.aes(x='x2', y='y'), data=excersise) + p9.geom_jitter(width=0.1) + p9.geom_abline(p9.aes(intercept=wyn['Intercept'], slope=wyn['x2']))) plots.append(fig2) print(fig2) df = excersise df['excersise_predict'] = results.predict() df['residuals'] = df['y'] - df['excersise_predict'] fig1_res = (p9.ggplot(p9.aes(x='x1', y='residuals'), data=excersise) + p9.geom_point()) print(fig1_res) plots.append(fig1_res) fig2_res = (p9.ggplot(p9.aes(x='x2', y='residuals'), data=excersise) + p9.geom_point()) print(fig2_res) plots.append(fig2_res) save_as_pdf_pages(plots, filename="./zad1results/figures.pdf") exercise_v2 = pd.read_csv("exercise.csv") results_v2 = smf.ols('y ~ x1 + x2', data=exercise_v2).fit() X = exercise_v2[40:60].drop('y', axis=1) Y_pred = results_v2.predict(X) plt.plot(Y_pred) plt.savefig('./zad1results/prediction.png') plt.show()