import pandas as pd import pkg_resources from crispy.CRISPRData import CRISPRDataSet, Library from minlib.Utils import define_sgrnas_sets, estimate_ks LOG = logging.getLogger("Crispy") DPATH = pkg_resources.resource_filename("crispy", "data/") RPATH = pkg_resources.resource_filename("notebooks", "minlib/reports/") # Project Score KY v1.1 # ky = CRISPRDataSet("Yusa_v1.1") ky_fc = ky.counts.remove_low_counts(ky.plasmids).norm_rpm().foldchange( ky.plasmids) ky_gsets = define_sgrnas_sets(ky.lib, ky_fc, add_controls=True) ky_ks = estimate_ks(ky_fc, ky_gsets["nontargeting"]["fc"]) # DepMap 19Q2 Avana # avana = CRISPRDataSet("Avana_DepMap19Q2") avana_fc = (avana.counts.remove_low_counts( avana.plasmids).norm_rpm().foldchange(avana.plasmids)) avana_gsets = define_sgrnas_sets(avana.lib, avana_fc, dataset_name="Avana_DepMap19Q2", add_controls=True) avana_ks = estimate_ks(avana_fc, avana_gsets["nontargeting"]["fc"]) # CRISPR-Cas9 libraries
# Master library (KosukeYusa v1.1 + Avana + Brunello) # master_lib = Library.load_library("MasterLib_v1.csv.gz", set_index=False) master_lib = master_lib.query("Library == 'KosukeYusa'") # Project Score samples acquired with Kosuke_Yusa v1.1 library # ky = CRISPRDataSet("Yusa_v1.1") ky_counts = ky.counts.remove_low_counts(ky.plasmids) ky_fc = ky_counts.norm_rpm().norm_rpm().foldchange(ky.plasmids) ky_gsets = define_sgrnas_sets(ky.lib, ky_fc, add_controls=True) ky_gmetrics = pd.concat( [ ky_fc.median(1).rename("Median"), master_lib.set_index("sgRNA_ID")[[ "JACKS", "RuleSet2", "FORECAST", "KS" ]], ], axis=1, sort=False, ) # sgRNA metrics scatter plots #