categories=( site_stat_df.groupby("site")["num_assigned_cells"] .sum() .sort_values(ascending=False) .index.tolist() ), ) unique_pert_count_gg = ( gg.ggplot(num_unique_pert_df, gg.aes(x="site", y="pert_count")) + gg.geom_bar(gg.aes(fill="pert_class"), stat="identity") + gg.theme_bw() + gg.theme(axis_text_x=gg.element_text(rotation=90, size=5)) + gg.xlab("Sites") + gg.ylab("Perturbation Count") + gg.scale_fill_discrete(name="Perturbation Class") ) output_file = pathlib.Path( output_figuresdir, "all_cellpainting_unique_perturbations_across_sites.png" ) if check_if_write(output_file, force, throw_warning=True): unique_pert_count_gg.save(output_file, dpi=300, width=10, height=7, verbose=False) # Process overall perturbation counts per batch pert_count_df = pd.concat(pert_counts_list, axis="rows").reset_index() # Output a full count of perturbations per site output_file = pathlib.Path( output_resultsdir, "complete_perturbation_count_per_site.tsv.gz" )
strip_background=gg.element_rect(colour="black", fill="#fdfff4"))) file = pathlib.Path("figures", "predictions", "wt_parental_single_cell_proba.png") wt_gg.save(file, height=3, width=6, dpi=400) wt_gg # In[10]: resistant_clone_gg = ( gg.ggplot(scores_df.query("Metadata_clone_number == 'WT parental'"), gg.aes(y="Clone A", x="Clone E")) + gg.geom_point(gg.aes(fill="Metadata_clone_number"), size=0.01, alpha=0.2) + gg.facet_grid("data_fit~shuffle_label") + gg.xlab("Clone E") + gg.ylab("Clone A") + gg.theme_bw() + gg.scale_fill_discrete(guide=False) + gg.theme(strip_text=gg.element_text(size=6, color="black"), strip_background=gg.element_rect(colour="black", fill="#fdfff4"))) file = pathlib.Path("figures", "predictions", "resistant_clone_scatter.png") resistant_clone_gg.save(file, height=6, width=4, dpi=400) resistant_clone_gg # In[11]: other_treatment_gg = ( gg.ggplot(scores_df.query("data_fit == 'othertreatment'"), gg.aes(y="Clone A", x="treatment_label")) + gg.geom_boxplot(gg.aes(fill="Metadata_clone_number")) + gg.facet_grid("data_fit~shuffle_label") + gg.xlab("Bortezomib dose") +
# In[19]: # Visualize UMAP results clone_facet_gg = ( gg.ggplot(embedding_df, gg.aes('x', 'y')) + gg.geom_point( gg.aes(fill='factor(Metadata_Plate)', shape="Metadata_treatment"), alpha=0.6 ) + gg.theme_bw() + gg.xlab("UMAP X") + gg.ylab("UMAP Y") + gg.scale_shape_manual(name="Treatment", values=[".", "+"]) + gg.scale_fill_discrete(name="Plate") + gg.facet_wrap("~Metadata_clone_number") + gg.ggtitle("Four Clone Dataset - Merged") + gg.theme( legend_key=gg.element_rect(color="black", fill = "white"), strip_text=gg.element_text(size=6, color="black"), strip_background=gg.element_rect(colour="black", fill="#fdfff4") ) ) file = os.path.join("figures", "umap", "four_clone_umap_facet_clone_sample") for extension in save_file_extensions: clone_facet_gg.save(filename='{}{}'.format(file, extension), height=4, width=4.5, dpi=400) clone_facet_gg
document=full_paper_dataset.document.tolist(), ) print(full_dataset.shape) full_dataset.to_csv( Path("output/paper_dataset") / Path("paper_dataset_full_tsne.tsv"), sep="\t", index=False, ) full_dataset.head() # - g = (p9.ggplot(full_dataset.sample(10000, random_state=100)) + p9.aes(x="dim1", y="dim2", fill="journal") + p9.geom_point() + p9.scale_fill_discrete(guide=False)) print(g) # # Generate Bin plots # ## Square Plot data_df = pd.read_csv( Path("output") / Path("paper_dataset") / Path("paper_dataset_full_tsne.tsv"), sep="\t", ) print(data_df.shape) data_df.head() data_df.describe()
def make_bar_chart(survey_data, topic, facet_by=[], proportional=False): """Make a barchart showing the number of respondents listing each column that starts with topic for a single year. If facet_by is not empty, the resulting plot will be faceted into subplots by the variables given. Args: survey_data (pandas.DataFrame): Raw data read in from Kubernetes Survey topic (str): String that all questions of interest start with facet_by (list,optional): List of columns use for grouping proportional (bool, optiona ): Defaults to False. If True, the bars heights are determined proportionally to the total number of responses in that facet. Returns: (plotnine.ggplot): Plot object which can be displayed in a notebook or saved out to a file """ show_legend = False if facet_by: show_legend = True topic_data_long = get_single_year_data_subset(survey_data, topic, facet_by) x = topic_data_long.columns.tolist() x.remove("level_1") if facet_by: period = False if "." in facet_by: facet_by.remove(".") period = True aggregate_data = ( topic_data_long[topic_data_long.rating == 1] .dropna() .groupby(["level_0"] + facet_by) .count() .reset_index() ) if period: facet_by.append(".") else: aggregate_data = ( topic_data_long[topic_data_long.rating == 1] .dropna() .groupby("level_0") .count() .reset_index() ) if proportional and facet_by: period = False if "." in facet_by: facet_by.remove(".") period = True facet_sums = ( topic_data_long[topic_data_long.rating == 1] .dropna() .groupby(facet_by) .count() .reset_index() ) aggregate_data = aggregate_data.merge(facet_sums, on=facet_by).rename( columns={"level_0_x": "level_0"} ) aggregate_data = aggregate_data.assign( rating=aggregate_data.rating_x / aggregate_data.rating_y ) if period: facet_by.append(".") br = ( p9.ggplot(aggregate_data, p9.aes(x="level_0", fill="level_0", y="rating")) + p9.geom_bar(show_legend=show_legend, stat="identity") + p9.theme( axis_text_x=p9.element_text(angle=45, ha="right"), strip_text_y=p9.element_text(angle=0, ha="left"), ) + p9.scale_x_discrete( limits=topic_data_long["level_0"].unique().tolist(), labels=[ "\n".join( textwrap.wrap(x.replace(topic, "").replace("_", " "), width=35)[0:2] ) for x in topic_data_long["level_0"].unique().tolist() ], ) ) if facet_by: br = ( br + p9.facet_grid( facet_by, shrink=False, labeller=lambda x: "\n".join(wrap(x, 15)) ) + p9.theme( axis_text_x=p9.element_blank(), strip_text_x=p9.element_text( wrap=True, va="bottom", margin={"b": -0.5} ), ) + p9.scale_fill_discrete( limits=topic_data_long["level_0"].unique().tolist(), labels=[ "\n".join( wrap( x.replace(topic, "") .replace("_", " ") .replace("/", "/ ") .strip(), 30, ) ) for x in topic_data_long["level_0"].unique().tolist() ], ) ) return br
gg_rep_act.save(os.path.join(dir_output, 'gg_rep_act.png'), width=8, height=4) di_notes = { 'chi2': 'χ2-correction', 'insig': 'Erroneous', 'specification': 'Specification', 'non-replicable': 'Inconsistent' } # (ii) Breakdown of counts tmp = acc_tt.merge( res_fisher.tt.value_counts().reset_index().rename(columns={ 'index': 'tt', 'tt': 'n_lit' })) tmp = tmp.assign(tt=lambda x: x.tt.map(di_tt), notes=lambda x: x.notes.map(di_notes), share=lambda x: x.n / x.n_lit) gg_acc_notes = ( pn.ggplot(tmp, pn.aes(x='notes', y='share', fill='tt')) + pn.theme_bw() + pn.scale_y_continuous(labels=percent_format(), limits=[0, 0.1]) + pn.scale_fill_discrete(name='Literature') + pn.geom_col(color='black', position=pn.position_dodge(0.5), width=0.5) + pn.labs(y='Percent', x='Investigation') + pn.theme(axis_text_x=pn.element_text(angle=45), axis_title_x=pn.element_blank())) gg_acc_notes.save(os.path.join(dir_output, 'gg_acc_notes.png'), width=7, height=3) print('~~~ End of 4_results_insig.py ~~~')
var_ordered = df[var_col][df[val_col].sort_values( ascending=ascending).index.tolist()] df[var_col] = pd.Categorical(df[var_col], categories=list(reversed(list(var_ordered))), ordered=True) return (df) skills_summary_df = sort_df(skills_summary_df, var_col="variable") skills_summary_df["type"] = pd.Categorical(skills_summary_df["type"]) skills_summary_df["type"] = skills_summary_df["type"].cat.reorder_categories( ["Requirements", "Assets"]) (p9.ggplot(skills_summary_df, p9.aes('attribute', 'value', fill='variable')) + p9.geom_col() + p9.coord_flip() + p9.scale_fill_discrete(guide=False)) #skills_summary_df["type"] #Languages languages = ["R", "sql", "python", "java", "scala", "C", "sas"] lang_clean = { "sql": "SQL", "python": "Python", "R": "R", "java": "Java", "scala": "Scala", "C": "C", "sas": "SAS" }
def main(): mpl.rc('mathtext', fontset='cm') warnings.filterwarnings('ignore', r'(geom|position)_\w+ ?: Removed \d+ rows') warnings.filterwarnings('ignore', r'Saving .+ x .+ in image') warnings.filterwarnings('ignore', r'Filename: .+\.png') df = concat_map(Pf_Ob_Ol, 'P_f', np.linspace(0.1, 1, 10)) save_both(my_plot(df, 'O_b', 'O_l', 'P_f') + titles('P_f(O_b, O_l)') + limits((1, 10)) + gg.geom_abline(slope=1, intercept=0, linetype='dashed', color='grey') + gg.geom_line() , 'Pf_Ob_Ol') df = concat_map(Pf_Ob_σ, 'P_f', np.linspace(0.1, 1, 10)) save_both(my_plot(df, 'O_b', 'σ', 'P_f') + titles('P_f(O_b, σ)') + limits((1, 10), (0, 5)) + gg.geom_line() , 'Pf_Ob_σ') df = concat_map(Pq_Ob_Ol, 'P_q', np.linspace(-0.9, 0, 10)) save_both(my_plot(df, 'O_b', 'O_l', 'P_q') + titles('P_q(O_b, O_l)') + limits((1, 10)) + gg.geom_abline(slope=1, intercept=0, linetype='dashed', color='grey') + gg.geom_line() , 'Pq_Ob_Ol') df = concat_map(Pq_Ob_σ, 'P_q', np.linspace(-0.9, 0, 10)) save_both(my_plot(df, 'O_b', 'σ', 'P_q') + titles('P_q(O_b, σ)') + limits((1, 10), (0, 5)) + gg.geom_line() , 'Pq_Ob_σ') df = concat_map(Opr_Ob_Ol, 'Opr', np.linspace(1, 5, 9)) save_both(my_plot(df, 'O_b', 'O_l', 'Opr') + titles("O'(O_b, O_l)") + limits((1, 10), (1, 10)) + gg.geom_line() + gg.geom_abline(slope=1, intercept=0, linetype='dashed', color='grey') , 'Opr_Ob_Ol') df = concat_map(Opr_Ob_σ, 'Opr', np.linspace(1, 5, 9)) save_both(my_plot(df, 'O_b', 'σ', 'Opr') + titles("O'(O_b, σ)") + limits((1, 10), (0, 5)) + gg.geom_line() , 'Opr_Ob_σ') df = (pd.DataFrame({'Opr': np.linspace(1, 21, 101)}) .assign(Pf=lambda x: Opr_Pf(x.Opr))) save_both(my_plot(df, 'Opr', 'Pf') + titles("P_f(O')") + labs("O'", 'P_f') + limits((1, 20), (0, 1), xbreaks=np.linspace(2, 20, 10), ybreaks=np.linspace(0, 1, 11)) + gg.geom_line() + gg.geom_hline(yintercept=C, linetype='dashed', color='grey') , 'Pf_Opr') df = concat_map(σpr_Ob_σ, 'σpr', np.linspace(0, 5, 11)) save_both(my_plot(df, 'O_b', 'σ', 'σpr') + titles("σ'(O_b, σ)") + limits((1, 10), (0, 5)) + gg.geom_line() , 'σpr_Ob_σ') df = (pd.DataFrame({'σpr': np.linspace(0, 21, 106)}) .assign(Pq=lambda x: σpr_Pq(x.σpr))) save_both(my_plot(df, 'σpr', 'Pq') + titles("P_q(σ')") + labs("σ'", 'P_q') + limits((0, 20), (-1, 0), xbreaks=np.linspace(0, 20, 11), ybreaks=np.linspace(-1, 0, 11)) + gg.geom_line() , 'Pq_σpr') df = concat_map(liab_Ob_Ol_free, 'liab', np.linspace(0, 10, 11)) save_both(my_plot(df, 'O_b', 'O_l', 'liab', clab='-R_{bl}') + titles("-R_{bl}(O_b, O_l)", "S_b = 1, C_b = 0, C_l = 0.02", mathrm('Free bet', dollars=False)) + limits((1,20), (1, 10)) + gg.geom_line() + gg.geom_abline(slope=1, intercept=0, linetype='dashed', color='grey') , 'liab_Ob_Ol_free') df = concat_map(liab_Ob_Ol_free, 'liab', np.linspace(0, 10, 11)) save_both(my_plot(df, 'O_b', 'σ', 'liab', clab='-R_{bl}') + titles("-R_{bl}(O_b, σ)", "S_b = 1, C_b = 0, C_l = 0.02", mathrm('Free bet', dollars=False)) + limits((1,20), (1, 10)) + gg.geom_line() , 'liab_Ob_σ_free') df = concat_map(liab_Ob_Ol_qual, 'liab', np.linspace(0, 10, 11)) save_both(my_plot(df, 'O_b', 'O_l', 'liab', clab='-R_{bl}') + titles("-R_{bl}(O_b, O_l)", "S_b = 1, C_b = 0, C_l = 0.02", mathrm('Qualifying bet', dollars=False)) + limits((1,20), (1, 10)) + gg.geom_line() + gg.geom_abline(slope=1, intercept=0, linetype='dashed', color='grey') , 'liab_Ob_Ol_qual') df = concat_map(liab_Ob_Ol_qual, 'liab', np.linspace(0, 10, 11)) save_both(my_plot(df, 'O_b', 'σ', 'liab', clab='-R_{bl}') + titles("-R_{bl}(O_b, σ)", "S_b = 1, C_b = 0, C_l = 0.02", mathrm('Qualifying bet', dollars=False)) + limits((1,20), (1, 10)) + gg.geom_line() , 'liab_Ob_σ_qual') df_Pf = Pf_Ob_σ(0.6).assign(profit=dollars('P_f')) df_Pq = Pq_Ob_σ(-0.3).assign(profit=dollars('P_q')) df = pd.concat((df_Pf, df_Pq), ignore_index=True) df.drop_duplicates('O_b', inplace=True) Opr = df_Pf.query('σ==0').O_b[0] σpr = df_Pq.query('O_b==1').σ[0] labels = pd.DataFrame({ 'x': [Opr+0.1, 1, 9.8], 'y': [4.8, σpr, σpr + 0.3], 'label': ["$O'$", "$σ'$", mathrm('More profit')] }) lab_aes = gg.aes('x', 'y', label='label') save_both( gg.ggplot(df, gg.aes(x='O_b', y='σ')) + gg.geom_area(gg.aes(fill='profit'), alpha=0.3) + gg.geom_vline(xintercept=Opr, linetype='dashed') + gg.geom_hline(yintercept=σpr, linetype='dashed') # text alignment can't be specified in an aes + gg.geom_text(lab_aes, data=labels.ix[:0], ha='left', va='top') + gg.geom_text(lab_aes, data=labels.ix[1:1], ha='left', va='bottom') + gg.geom_text(lab_aes, data=labels.ix[2:], ha='right', va='bottom') + gg.scale_fill_discrete(name=mathrm('Bet type'), labels=[mathrm('Free'), mathrm('Qualifying')]) + limits((1, 10), (0, 5)) + gg.ggtitle('%s "%s" %s' % (mathrm('Shape of the'), mathrm('more profitable'), mathrm('space'))) + labs('O_b', 'σ') , 'Px_shapes')