how="inner", ) null_spot_df = complete_foci_df.loc[( complete_foci_df.loc[:, spot_parent_cols] == 0).squeeze(), :] cell_spot_df = complete_foci_df.loc[( complete_foci_df.loc[:, spot_parent_cols] != 0).squeeze(), :] num_assigned_cells = len( cell_spot_df.loc[:, spot_parent_cols].squeeze().unique()) num_unassigned_spots = null_spot_df.shape[0] num_assigned_spots = cell_spot_df.shape[0] # Figure 1 - histogram of barcode counts per cell fig_file = pathlib.Path(output_dir, "num_spots_per_cell_histogram.png") if check_if_write(fig_file, force): spot_counts_per_cell_histogram(cell_spot_df, spot_parent_cols, fig_file) # Figure 2 - histogram of barcode scores per spot fig_file = pathlib.Path(output_dir, "barcode_scores_per_spot_histogram.png") if check_if_write(fig_file, force): spot_score_histogram(cell_spot_df, spot_score_cols, fig_file) # Figure 3 - Joint plot of relationship of barcode counts per cell and mean score fig_file = pathlib.Path( output_dir, "per_cell_barcode_count_by_mean_score_jointplot.png") if check_if_write(fig_file, force): spot_count_score_jointplot(cell_spot_df, spot_parent_cols[0], spot_score_cols[0], fig_file)
columns={ quality_col: "cell_count" }).assign( site=site, plate=plate, well=well, site_location=site_location, )) output_folder = pathlib.Path(output_paintdir, site) if output_folder.exists(): if force: warnings.warn("Output files likely exist, now overwriting...") else: warnings.warn( "Output files likely exist. If they do, NOT overwriting...") os.makedirs(output_folder, exist_ok=True) meta_output_file = pathlib.Path(output_folder, f"metadata_{site}.tsv.gz") count_output_file = pathlib.Path(output_folder, f"cell_counts_{site}.tsv") # Save files if check_if_write(meta_output_file, force): metadata_df.to_csv(meta_output_file, sep="\t", index=False) if check_if_write(count_output_file, force): cell_count_df.to_csv(count_output_file, sep="\t", index_label="Cell_Quality") print("All sites complete.")
os.makedirs(output_figuresdir, exist_ok=True) by_well_gg = ( gg.ggplot(cell_count_totalcells_df, gg.aes(x="x_loc", y="y_loc")) + gg.geom_point(gg.aes(fill="total_cell_count"), size=10) + gg.geom_text(gg.aes(label="site_location"), color="lightgrey") + gg.facet_wrap("~well") + gg.coord_fixed() + gg.theme_bw() + gg.ggtitle(f"Total Cells/Well\n{plate}") + gg.theme( axis_text=gg.element_blank(), axis_title=gg.element_blank(), strip_background=gg.element_rect(colour="black", fill="#fdfff4"), ) + gg.labs(fill="Cells") + gg.scale_fill_cmap(name="magma")) output_file = pathlib.Path(output_figuresdir, "plate_layout_cells_count_per_well.png") if check_if_write(output_file, force, throw_warning=True): by_well_gg.save(output_file, dpi=300, verbose=False) # Plot cell category ratios per well ratio_df = pd.pivot_table( cell_count_df, values="cell_count", index=["site", "plate", "well", "site_location", "x_loc", "y_loc"], columns=["Cell_Quality"], ) ratio_df = ratio_df.assign(Sum=ratio_df.sum(axis=1), Pass_Filter=ratio_df[cell_filter].sum(axis=1)) fail_filter = [cat for cat in cell_category_order if cat not in cell_filter] fail_filter_noempty = [ cat for cat in cell_category_order if cat not in cell_filter if cat != "Empty"
cell_count_df.loc[:, "Cell_Quality"] = pd.Categorical( cell_count_df.Cell_Quality, categories=cell_category_order ) # Assigns the site column to the category datatype cell_count_df.loc[:, "site"] = pd.Categorical( cell_count_df.site, categories=( cell_count_df.groupby("site")["cell_count"] .sum() .sort_values(ascending=False) .index.tolist() ), ) if check_if_write(cell_count_output_file, force, throw_warning=True): cell_count_df.to_csv(cell_count_output_file, sep="\t", index=False) # Graph: Cell count with all wells in same graph cell_count_gg = ( gg.ggplot(cell_count_df, gg.aes(x="site", y="cell_count")) + gg.geom_bar(gg.aes(fill="Cell_Quality"), stat="identity") + gg.theme_bw() + gg.theme(axis_text_x=gg.element_text(rotation=90, size=5)) + gg.xlab("Sites") + gg.ylab("Cell Count") + gg.scale_fill_manual( name="Cell Quality", labels=cell_category_list, values=cell_category_colors ) )
Also check 'perform: true' is set in the config. (Note that 'perform: false' will still output a file lacking prefiltered features.) """ force_warning = """ Warning, prefilter file already exists! Overwriting file. This may be intended. """ if prefilter_file.exists(): if not force: warnings.warn(file_exist_warning) logging.warning("Prefilter file exists. NOT overwriting") else: warnings.warn(force_warning) logging.warning("Prefilter file exists. Overwriting") # Perform prefiltering and output file print("Starting 0.prefilter-features") logging.info("0.prefilter-features started") if perform: features_df = prefilter_features(core_option_args, example_site_dir, flag_cols) else: features_df = load_features(core_option_args, example_site_dir) features_df = features_df.assign(prefilter_column=False) if check_if_write(prefilter_file, force): features_df.to_csv(prefilter_file, sep="\t", index=False) print("Finished 0.prefilter-features") logging.info("0.prefilter-features finished")