axis=1) # Y matrices # gexp_obj = GeneExpression() gexp = gexp_obj.filter() LOG.info(f"Gexp: {gexp.shape}") prot_obj = Proteomics() prot = prot_obj.filter() prot = prot[prot.count(1) > 300] LOG.info(f"Prot: {prot.shape}") # X matrices # crispr_obj = CRISPR() crispr = crispr_obj.filter(dtype="merged") LOG.info(f"CRISPR: {crispr.shape}") drespo_obj = DrugResponse() drespo = drespo_obj.filter() drespo = drespo[drespo.count(1) > 300] drespo = drespo[["+" not in i for i in drespo.index]] drespo.index = [";".join(map(str, i)) for i in drespo.index] dtargets = drespo_obj.drugresponse.groupby( ["drug_id", "drug_name", "dataset"])["putative_gene_target"].first() dtargets.index = [";".join(map(str, i)) for i in dtargets.index] LOG.info(f"Drug: {drespo.shape}") # Covariates
return "intergenic" elif gene in ESS_GENES: return "essential" elif gene in NESS_GENES: return "non-essential" else: return "unclassified" if __name__ == "__main__": # Project score # cscore_obj = CRISPR() cscore = cscore_obj.filter(dtype="merged") cscore_ht29 = cscore["SIDM00136"] # Samplesheet # lib_name = "2gCRISPR_Pilot_library_v2.0.0.xlsx" lib_ss = pd.read_excel(f"{DPATH}/gi_samplesheet.xlsx") lib_ss = lib_ss.query(f"library == '{lib_name}'") lib = read_gi_library(lib_name) lib["sgRNA1_class"] = [classify_gene(g, c) for g, c in lib[["sgRNA1_Approved_Symbol", "sgRNA1_Chr"]].values] lib["sgRNA2_class"] = [classify_gene(g, c) for g, c in lib[["sgRNA2_Approved_Symbol", "sgRNA2_Chr"]].values] lib["vector_class"] = lib["sgRNA1_class"] + " + " + lib["sgRNA2_class"]
from sklearn.preprocessing import StandardScaler from sklearn.linear_model import LinearRegression from statsmodels.stats.multitest import multipletests from crispy.DataImporter import Proteomics, CRISPR, GeneExpression, DrugResponse, Sample LOG = logging.getLogger("Crispy") RPATH = pkg_resources.resource_filename("reports", "eg/") if __name__ == "__main__": # Data-sets # gexp_obj = GeneExpression() prot_obj = Proteomics() crispr_obj = CRISPR() drespo_obj = DrugResponse() # Samples # samples_crispr = set.intersection( set(prot_obj.get_data()), set(crispr_obj.get_data(dtype="merged")) ) LOG.info(f"CRISPR samples: {len(samples_crispr)}") samples_drug = set.intersection( set(prot_obj.get_data()), set(drespo_obj.get_data()) ) LOG.info(f"Drug samples: {len(samples_drug)}") # Filter data-sets
DPATH = pkg_resources.resource_filename("crispy", "data/") RPATH = pkg_resources.resource_filename("reports", "eg/") if __name__ == "__main__": # Data-sets # wes_obj = WES() mobem_obj = Mobem() cn_obj = CopyNumber() prot_obj = Proteomics() gexp_obj = GeneExpression() crispr_obj = CRISPR() drug_obj = DrugResponse() # Samples # samples = set.intersection(set(prot_obj.get_data())) LOG.info(f"Samples: {len(samples)}") # Filter data-sets # prot = prot_obj.filter(subset=samples) LOG.info(f"Proteomics: {prot.shape}") gexp = gexp_obj.filter(subset=samples) LOG.info(f"Transcriptomics: {gexp.shape}")
RPATH = pkg_resources.resource_filename("reports", "eg/") TPATH = pkg_resources.resource_filename("tables", "/") if __name__ == "__main__": # Data-sets # prot_obj = Proteomics() prot = prot_obj.filter() LOG.info(f"Proteomics: {prot.shape}") gexp_obj = GeneExpression() gexp = gexp_obj.filter(subset=list(prot)) LOG.info(f"Transcriptomics: {gexp.shape}") crispr_obj = CRISPR() crispr = crispr_obj.filter(subset=list(prot)) LOG.info(f"CRISPR: {crispr.shape}") drespo_obj = DrugResponse() drespo = drespo_obj.filter() drespo.index = [";".join(map(str, i)) for i in drespo.index] dmax = drespo_obj.drugresponse.groupby(["drug_id", "drug_name", "dataset"])[ "max_screening_conc" ].first() dmax = (dmax * 0.5).pipe(np.log) dmax.index = [";".join(map(str, i)) for i in dmax.index] dtargets = drespo_obj.drugresponse.groupby(["drug_id", "drug_name", "dataset"])[