def run(self): sqlrunner = SQL(database=os.path.join(self.root, "databases", self.database)) gRNA_db = sqlrunner.get_global_gRNA(mismatch=str(self.mismatch)) # This is a rate limiting step if bool(self.gene_mask_dict['genes']): query_data = self.get_targeted_data(dataframe=gRNA_db, gene_mask_dict=self.gene_mask_dict) else: query_data = gRNA_db multifasta = sqlrunner.get_gene_multifasta() gRNA_runner = RefineCripri(grna_dataframe=query_data, strand=self.strand, fasta_dataframe=multifasta, cas9=self.cas9_organism, offtarget_ids=sqlrunner.custom_sql("SELECT name, strand FROM global_offtarget")) candidates, backup, dropped = gRNA_runner.cripr_interference() candidates, backup, dropped = map(self.utils.annotate_dataframe, [candidates, backup, dropped]) offtargets = sqlrunner.get_offtargets_by_mismatch(mismatch=self.mismatch) offtargets.dropna(subset=['annotation'], inplace=True) offtargets = offtargets[offtargets['strand'] != '+'] offtargets['annotation'] = offtargets['annotation'].apply( lambda x: x.replace("_", "") if isinstance(x, str) else x) offtargets = offtargets.query("gene != annotation") offtargets.reset_index(drop=True, inplace=True) offtarget_ids = list(set(offtargets['name'])) candidates_has_offtargets = self.list_comparison(list1=candidates['names'], list2=offtarget_ids) backup_has_offtargets = self.list_comparison(list1=backup['names'], list2=offtarget_ids) if candidates_has_offtargets: candidate_off_ids = list(set(candidates['names']) & set(offtarget_ids)) candidates_offtargets = self.grab_offtargets(query=candidates, offtargets=offtargets, offtarget_ids=offtarget_ids) candidates = self.negate_pam_mismatch(grna_dataframe=candidates, offtarget_dataframe=candidates_offtargets, target_ids=candidate_off_ids) candidates, dropped = self.move_grna_by_offtargets(grna_dataframe=candidates, dropped_dataframe=dropped, offtarget_dataframe=candidates_offtargets, masks=self.gene_mask_dict['masks']) candidates_offtargets = pd.DataFrame(candidates_offtargets) else: candidates_offtargets = dict.fromkeys(offtargets, []) candidates_offtargets = pd.DataFrame(candidates_offtargets) if backup_has_offtargets: backup_off_ids = list(set(backup['names']) & set(offtarget_ids)) backup_offtargets = self.grab_offtargets(query=backup, offtargets=offtargets, offtarget_ids=offtarget_ids) backup = self.negate_pam_mismatch(grna_dataframe=backup, offtarget_dataframe=backup_offtargets, target_ids=backup_off_ids) backup, dropped = self.move_grna_by_offtargets(grna_dataframe=backup, dropped_dataframe=dropped, offtarget_dataframe=backup_offtargets, masks=self.gene_mask_dict['masks']) backup_offtargets = pd.DataFrame(backup_offtargets) else: backup_offtargets = dict.fromkeys(offtargets, []) backup_offtargets = pd.DataFrame(backup_offtargets) ## add ranking to pam, move between dataframes if ranking is f****d candidates, backup = self.scan_maxmismatches(candidates=candidates, backup=backup) candidates, backup = self.force_max_grna_in_candidates(candidates=candidates, backup=backup, max_grna=self.max_grna) candidates = self.force_ag_base(dataframe=candidates, max_primer_size=self.max_primer_size) backup = self.force_ag_base(dataframe=backup, max_primer_size=self.max_primer_size) candidates, backup, dropped = map(self.calculate_primer_len, [candidates, backup, dropped]) candidates, backup, dropped = map(self.calculate_gc_content, [candidates, backup, dropped]) candidates = self.design_primers(dataframe=candidates, cas9=self.cas9_organism, fiveprime=self.fiveprime, threeprime=self.threeprime) backup = self.design_primers(dataframe=backup, cas9=self.cas9_organism, fiveprime=self.fiveprime, threeprime=self.threeprime) candidates, backup, dropped = map(pd.DataFrame, [candidates, backup, dropped]) offtarget_empty = [candidates_offtargets.empty, backup_offtargets.empty] final_offtargets = pd.DataFrame() if not all(offtarget_empty): final_offtargets = candidates_offtargets final_offtargets['from'] = "candidates" backup_offtargets['from'] = "backup" final_offtargets = final_offtargets.append(backup_offtargets, ignore_index=True) else: if not offtarget_empty[0]: final_offtargets = candidates_offtargets final_offtargets['from'] = "candidates" if not offtarget_empty[1]: final_offtargets = backup_offtargets final_offtargets['from'] = "backup" if final_offtargets.empty: final_offtargets = pd.DataFrame(columns=offtargets.columns) candidates.to_csv(os.path.join(self.root, "temp", "candidates.txt"), header=True, index=False, sep=",") backup.to_csv(os.path.join(self.root, "temp", "backup.txt"), header=True, index=False, sep=",") dropped.to_csv(os.path.join(self.root, "temp", "dropped.txt"), header=True, index=False, sep=",") final_offtargets.to_csv(os.path.join(self.root, "temp", "offtargets.txt"), header=True, index=False, sep=",")
class CrisprFuncHelpers: """ unittest class for the crispr.py """ def __init__(self, database: str, strand: str, mismatch: int, cas9: str): self.root = os.path.dirname(os.path.abspath("../main.py")) os.chdir(self.root) self.sql = SQL(database=database) self.strand = strand self.mismatch = mismatch self.cas9 = cas9 def initial_filter_test(self): data = self.sql.get_global_gRNA(mismatch=self.mismatch) genes = [genes.split("_")[0] for genes in data['names']] data['genes'] = genes query = ["Rv0899", "Rv0934"] out = pd.DataFrame() for items in query: if items in genes: grad_idx = [ idx for idx, val in data.iterrows() if items in val['genes'] ] out = out.append(data.loc[grad_idx, :], ignore_index=True) runner = RefineCripri(grna_dataframe=out, strand=self.strand, fasta_dataframe=None, cas9=self.cas9) candidates, backup, dropped = map(pd.DataFrame, *[runner.initial_filter()]) candidates_out = list( set([ True if row['score'] < 2 else False for _, row in candidates.iterrows() ])) backup_out = list( set([ True if row['score'] >= 2 else False for _, row in backup.iterrows() ])) dropped_out = list( set([ True if row['names'][-1] != self.strand else False for _, row in dropped.iterrows() ])) return [candidates_out[0], backup_out[0], dropped_out[0]] def initial_filter_result(self): return [True, True, True] def has_offtarget_test(self): data = self.sql.get_global_gRNA(mismatch=self.mismatch) genes = [genes.split("_")[0] for genes in data['names']] data['genes'] = genes query = ["Rv0899", "Rv0934", "Rv0051"] out = pd.DataFrame() for items in query: if items in genes: grad_idx = [ idx for idx, val in data.iterrows() if items in val['genes'] ] out = out.append(data.loc[grad_idx, :], ignore_index=True) runner = RefineCripri(grna_dataframe=out, strand=self.strand, fasta_dataframe=None, cas9=self.cas9) candidates, backup, dropped = runner.initial_filter() candidates, backup, dropped = runner.has_offtarget( candidates=candidates, backup=backup, dropped_gRNA=dropped) candidates, backup, dropped = map(pd.DataFrame, [candidates, backup, dropped]) return True def has_offtarget_result(self): return True