def project_score_data(sgrnas, subset=None): ddir = pkg_resources.resource_filename("crispy", "data/") score_manifest = pd.read_csv( f"{ddir}/crispr_manifests/project_score_manifest.csv.gz") s_map = [] for i in score_manifest.index: s_map.append( pd.DataFrame( dict( model_id=score_manifest.iloc[i]["model_id"], s_ids=score_manifest.iloc[i]["library"].split(", "), s_lib=score_manifest.iloc[i] ["experiment_identifier"].split(", "), ))) s_map = pd.concat(s_map).set_index("s_lib") if subset is not None: s_map = s_map[s_map["model_id"].isin(subset)] score_v1 = CRISPRDataSet("Yusa_v1") score_v1_fc = score_v1.counts.norm_rpm().foldchange(score_v1.plasmids) score_v1_fc = score_v1_fc.groupby(s_map["model_id"], axis=1).mean() score_v11 = CRISPRDataSet("Yusa_v1.1") score_v11_fc = score_v11.counts.norm_rpm().foldchange(score_v11.plasmids) score_v11_fc = score_v11_fc.groupby(s_map["model_id"], axis=1).mean() ess = set(score_v1.lib[score_v1.lib["Gene"].isin( Utils.get_essential_genes())].index) ness = set(score_v1.lib[score_v1.lib["Gene"].isin( Utils.get_non_essential_genes())].index) score_v1_fc = ReadCounts(score_v1_fc).scale(essential=ess, non_essential=ness) score_v11_fc = ReadCounts(score_v11_fc).scale(essential=ess, non_essential=ness) score_fc = pd.concat([score_v1_fc.loc[sgrnas], score_v11_fc.loc[sgrnas]], axis=1).dropna() return score_fc
"EGAN00002143462.sample", "EGAN00002143463.sample", "EGAN00002143464.sample", "EGAN00002143465.sample", "EGAN00002143466.sample", ], ) # - Imports ddir = pkg_resources.resource_filename("data", "organoids/bme2/") dreports = pkg_resources.resource_filename("notebooks", "bme/reports/") ss = pd.read_excel(f"{ddir}/{organoids['samplesheet']}", index_col=1).query("organoid == 'COLO-027'") counts = CRISPRDataSet(organoids, ddir=ddir) # - samples = list(set(ss.index).intersection(ss.index)) palette = ss.set_index("name")["palette"] # - Fold-changes fc = (counts.counts.remove_low_counts( counts.plasmids).norm_rpm().foldchange(counts.plasmids)) fc_gene = fc.groupby(counts.lib.reindex(fc.index)["Gene"]).mean() fc_gene_scaled = ReadCounts(fc_gene).scale() # - fc_gene_scaled.rename(columns=ss["name"]).round(5).to_excel(
import pandas as pd import pkg_resources import matplotlib.pyplot as plt from crispy.CRISPRData import CRISPRDataSet from minlib.Utils import project_score_sample_map, density_interpolate, downsample_sgrnas LOG = logging.getLogger("Crispy") DPATH = pkg_resources.resource_filename("crispy", "data/") RPATH = pkg_resources.resource_filename("notebooks", "minlib/reports/") # Project Score samples acquired with Kosuke_Yusa v1.1 library # ky = CRISPRDataSet("Yusa_v1.1") ky_smap = project_score_sample_map() ky_counts = ky.counts.remove_low_counts(ky.plasmids) # Downsample number of sgRNAs per gene # ds_scores = downsample_sgrnas( ky_counts, ky.lib, ky_smap, [1, 2, 3, 4, 5, 100], n_iters=10 ) ds_scores.to_excel(f"{RPATH}/YusaDownsample_AROCs.xlsx", index=False) # Plot #
import logging import numpy as np import pandas as pd import pkg_resources from crispy.CRISPRData import CRISPRDataSet, Library from minlib.Utils import define_sgrnas_sets, estimate_ks LOG = logging.getLogger("Crispy") DPATH = pkg_resources.resource_filename("crispy", "data/") RPATH = pkg_resources.resource_filename("notebooks", "minlib/reports/") # Project Score KY v1.1 # ky = CRISPRDataSet("Yusa_v1.1") ky_fc = ky.counts.remove_low_counts(ky.plasmids).norm_rpm().foldchange( ky.plasmids) ky_gsets = define_sgrnas_sets(ky.lib, ky_fc, add_controls=True) ky_ks = estimate_ks(ky_fc, ky_gsets["nontargeting"]["fc"]) # DepMap 19Q2 Avana # avana = CRISPRDataSet("Avana_DepMap19Q2") avana_fc = (avana.counts.remove_low_counts( avana.plasmids).norm_rpm().foldchange(avana.plasmids)) avana_gsets = define_sgrnas_sets(avana.lib, avana_fc, dataset_name="Avana_DepMap19Q2", add_controls=True)
from natsort import natsorted from crispy.QCPlot import QCplot from scipy.stats import spearmanr from minlib.Utils import replicates_correlation from crispy.CRISPRData import CRISPRDataSet, Library LOG = logging.getLogger("Crispy") DPATH = pkg_resources.resource_filename("crispy", "data/") RPATH = pkg_resources.resource_filename("notebooks", "minlib/reports/") # Project Score samples acquired with Kosuke_Yusa v1.1 library # ky = CRISPRDataSet("KM12_coverage") ky_counts = ky.counts.remove_low_counts(ky.plasmids) ky_ss = pd.read_excel( f"{DPATH}/crispr_manifests/KM12_coverage_samplesheet.xlsx", index_col="sample" ) ky_ss["name"] = [f"{c}x ({e})" for e, c in ky_ss[["experiment", "coverage"]].values] ky_ss["rep_name"] = [f"{n} (Rep{r})" for n, r in ky_ss[["name", "replicate"]].values] # KY v1.1 library # ky_lib = Library.load_library("MasterLib_v1.csv.gz").query("Library == 'KosukeYusa'") ky_lib = ky_lib[ky_lib.index.isin(ky_counts.index)] ky_lib_fc = ky_counts.loc[ky_lib.index].norm_rpm().foldchange(ky.plasmids) ky_lib_gene = ky_lib_fc.groupby(ky_lib["Approved_Symbol"]).mean() ky_lib_gene_avg = ky_lib_gene.groupby(ky_ss["name"], axis=1).mean()
ml_lib = Library.load_library(ml_lib_name).query("Library == 'KosukeYusa'") ml_lib = ml_lib.loc[[i for i in ml_lib.index if not i.startswith("CTRL0")]] libraries = dict( All=dict( name="All", lib=Library.load_library("MasterLib_v1.csv.gz").query( "Library == 'KosukeYusa'"), ), Minimal=dict(name="Minimal", lib=ml_lib), ) # HT-29 CRISPR-Cas9 + Dabrafenib timecourse (Day 8, 10, 14, 18 and 21) # dabraf_data = CRISPRDataSet("HT29_Dabraf") dabraf_count = dabraf_data.counts.remove_low_counts(dabraf_data.plasmids) dabraf_ss = pd.read_csv( f"{DPATH}/crispr_manifests/HT29_Dabraf_samplesheet.csv.gz", index_col="sample") # Export data # minlibcas9 = Library.load_library(ml_lib_name) kylib = Library.load_library("MasterLib_v1.csv.gz").query( "Library == 'KosukeYusa'") data_export = dabraf_data.counts.copy() data_export.insert(0, "MinLibCas9_guide", data_export.index.isin(minlibcas9.index))
], axis=1, sort=False, ) rawcounts = rawcounts.loc[:, ~rawcounts.columns.duplicated(keep="last")] rawcounts.to_csv(f"{SPATH}/MinLibCas9_rawcounts.csv.gz", compression="gzip") rawcounts.to_excel(f"{RPATH}/MinLibCas9_rawcounts.xlsx") mlib_dataset = dict( name="MinLibCas9_Screens", read_counts="MinLibCas9_rawcounts.csv.gz", library="MinLibCas9.csv.gz", plasmids=["MHG_library_v1"], ) mlib_data = CRISPRDataSet(mlib_dataset, ddir=SPATH) mlib_count = mlib_data.counts.remove_low_counts(mlib_data.plasmids) mlib_count.index = [ i if i.startswith("CTRL") else i.split(".")[0] for i in mlib_count.index ] mlib_fc_sgrna = mlib_count.norm_rpm().foldchange(mlib_data.plasmids) mlib_fc_gene = mlib_fc_sgrna.groupby(mlib["Approved_Symbol"]).mean() # Project Score samples acquired with Kosuke_Yusa v1.1 library # ky = CRISPRDataSet("Yusa_v1.1") ky_smap = project_score_sample_map() ky_smap = ky_smap[[i.startswith("HT29") for i in ky_smap.index]]
import seaborn as sns import matplotlib.pyplot as plt from math import sqrt from crispy.QCPlot import QCplot from scipy.stats import spearmanr from sklearn.metrics import mean_squared_error from crispy.CRISPRData import CRISPRDataSet, Library LOG = logging.getLogger("Crispy") DPATH = pkg_resources.resource_filename("crispy", "data/") RPATH = pkg_resources.resource_filename("notebooks", "minlib/reports/") # CRISPR-Cas9 screens in 2 colorectal cancer organoids # org_data = CRISPRDataSet("Organoids") org_count = org_data.counts.remove_low_counts(org_data.plasmids) # Libraries # NGUIDES, REMOVE_DISCORDANT = 2, True ml_lib_name = ( f"MinimalLib_top{NGUIDES}{'_disconcordant' if REMOVE_DISCORDANT else ''}.csv.gz" ) ml_lib = Library.load_library(ml_lib_name).query("Library == 'KosukeYusa'") ml_lib = ml_lib.loc[[i for i in ml_lib.index if not i.startswith("CTRL0")]] libraries = dict( All=dict( name="All",