def run(self): sqlrunner = SQL(database=self.database) out = sqlrunner.custom_sql(statement=self.sql_query) out.to_csv(os.path.join(self.root, "temp", "query.txt"), header=True, index=False, sep=",")
def grna_display_runner(self): display_grna = DisplayGuideRNA(database_list=self.availible_databases, cas9_list=self.availible_cas9) if display_grna.exec_(): holder = PandasModel(pd.DataFrame({'': []})) self.display_candidates.setModel(holder) self.display_backup.setModel(holder) self.display_dropped.setModel(holder) self.display_offtargets.setModel(holder) self.database_querried = False self.statusBar().showMessage("Preparing ...") self.main_progressbar_value += 1 self.main_progressbar.setValue(self.main_progressbar_value) user_options = display_grna.out() if "temp" not in os.listdir(self.root): tempdir = os.path.join(self.root, "temp") os.mkdir(os.path.join(tempdir)) else: tempdir = os.path.join(self.root, "temp") database = str(user_options['organism']).replace(" ", "_") mismatch = user_options['max_mismatch'] max_grna = user_options['max_grna_count'] max_primer_len = user_options['max_primer_len'] user_cas9 = user_options['cas9'] user_pam_tolerance = user_options['pam_tolerance'] user_fiveprime = user_options['nucleotides_5'] user_threeprime = user_options['nucleotides_3'] gene_mask_dictionary = { 'genes': [items.replace("_", "").lower() if "_" in items else items.lower() for items in user_options['genes']], 'masks': [items.replace("_", "").lower() if "_" in items else items.lower() for items in user_options['masks']] } sqlrunner = SQL(database=database) headers = sqlrunner.custom_sql("SELECT header FROM genes").to_dict('list') gene_check = [True if gene in headers['header'] else False for gene in gene_mask_dictionary['genes']] mask_check = [True if gene in headers['header'] else False for gene in gene_mask_dictionary['masks']] for idx, val in enumerate(gene_check): if not val: db = database.replace("_", " ") QtWidgets.QMessageBox.about(self, "Error", f"{gene_mask_dictionary['genes'][idx]} was not found in {db}") self.main_progressbar_value = 0 self.main_progressbar.setValue(self.main_progressbar_value) return None for idx, val in enumerate(mask_check): if not val: db = database.replace("_", " ") QtWidgets.QMessageBox.about(self, "Error", f"{gene_mask_dictionary['masks'][idx]} was not found in {db}") self.main_progressbar_value = 0 self.main_progressbar.setValue(self.main_progressbar_value) return None if mismatch == "": QtWidgets.QMessageBox.about(self, "Error", "First search guide RNA's") self.main_progressbar_value = 0 self.main_progressbar.setValue(self.main_progressbar_value) return None # Strand is r for reverse worker = CrisprInterference_worker(database=database, mismatch=mismatch, strand='r', max_grna=max_grna, genes_masks=gene_mask_dictionary, max_primer_size=max_primer_len, cas9_organism=user_cas9, pam_tolerance=user_pam_tolerance, fiveprime_nucleotides=user_fiveprime, threeprime_nucleotides=user_threeprime) self.threadingPool.start(worker) while self.threadingPool.activeThreadCount() == 1: self.statusBar().showMessage("Gathering guide RNA's...") QtWidgets.QApplication.processEvents() if self.main_progressbar_value < 90: self.main_progressbar_value += 1 self.main_progressbar.setValue(self.main_progressbar_value) time.sleep(0.8) if self.threadingPool.waitForDone(): self.statusBar().showMessage("Gathering data ...") self.candidate_gRNA_df = pd.read_csv( filepath_or_buffer=os.path.join(self.root, "temp", "candidates.txt"), sep=",") self.backup_gRNA_df = pd.read_csv(filepath_or_buffer=os.path.join(self.root, "temp", "backup.txt"), sep=",") self.dropped_gRNA_df = pd.read_csv(filepath_or_buffer=os.path.join(self.root, "temp", "dropped.txt"), sep=",") self.offtarget_df = pd.read_csv(filepath_or_buffer=os.path.join(self.root, "temp", "offtargets.txt"), sep=",") cand_model, backup_model, dropped_model, offtargets_model = map(PandasModel, [self.candidate_gRNA_df, self.backup_gRNA_df, self.dropped_gRNA_df, self.offtarget_df]) while self.main_progressbar_value < 100: self.main_progressbar_value += 1 self.statusBar().showMessage("Formatting for display...") self.main_progressbar.setValue(self.main_progressbar_value) time.sleep(0.01) self.display_candidates.setModel(cand_model) self.display_backup.setModel(backup_model) self.display_dropped.setModel(dropped_model) self.display_offtargets.setModel(offtargets_model) self.database_querried = True self.main_progressbar_value = 0 self.main_progressbar.setValue(self.main_progressbar_value) self.statusBar().showMessage("Ready") hits = [genes for genes in self.candidate_gRNA_df['genes']] missed = list(set(gene_mask_dictionary['genes']) - set(hits)) EOSpopup(missed_genes=missed).exec_() shutil.rmtree(tempdir)
def run(self): sqlrunner = SQL(database=os.path.join(self.root, "databases", self.database)) gRNA_db = sqlrunner.get_global_gRNA(mismatch=str(self.mismatch)) # This is a rate limiting step if bool(self.gene_mask_dict['genes']): query_data = self.get_targeted_data(dataframe=gRNA_db, gene_mask_dict=self.gene_mask_dict) else: query_data = gRNA_db multifasta = sqlrunner.get_gene_multifasta() gRNA_runner = RefineCripri(grna_dataframe=query_data, strand=self.strand, fasta_dataframe=multifasta, cas9=self.cas9_organism, offtarget_ids=sqlrunner.custom_sql("SELECT name, strand FROM global_offtarget")) candidates, backup, dropped = gRNA_runner.cripr_interference() candidates, backup, dropped = map(self.utils.annotate_dataframe, [candidates, backup, dropped]) offtargets = sqlrunner.get_offtargets_by_mismatch(mismatch=self.mismatch) offtargets.dropna(subset=['annotation'], inplace=True) offtargets = offtargets[offtargets['strand'] != '+'] offtargets['annotation'] = offtargets['annotation'].apply( lambda x: x.replace("_", "") if isinstance(x, str) else x) offtargets = offtargets.query("gene != annotation") offtargets.reset_index(drop=True, inplace=True) offtarget_ids = list(set(offtargets['name'])) candidates_has_offtargets = self.list_comparison(list1=candidates['names'], list2=offtarget_ids) backup_has_offtargets = self.list_comparison(list1=backup['names'], list2=offtarget_ids) if candidates_has_offtargets: candidate_off_ids = list(set(candidates['names']) & set(offtarget_ids)) candidates_offtargets = self.grab_offtargets(query=candidates, offtargets=offtargets, offtarget_ids=offtarget_ids) candidates = self.negate_pam_mismatch(grna_dataframe=candidates, offtarget_dataframe=candidates_offtargets, target_ids=candidate_off_ids) candidates, dropped = self.move_grna_by_offtargets(grna_dataframe=candidates, dropped_dataframe=dropped, offtarget_dataframe=candidates_offtargets, masks=self.gene_mask_dict['masks']) candidates_offtargets = pd.DataFrame(candidates_offtargets) else: candidates_offtargets = dict.fromkeys(offtargets, []) candidates_offtargets = pd.DataFrame(candidates_offtargets) if backup_has_offtargets: backup_off_ids = list(set(backup['names']) & set(offtarget_ids)) backup_offtargets = self.grab_offtargets(query=backup, offtargets=offtargets, offtarget_ids=offtarget_ids) backup = self.negate_pam_mismatch(grna_dataframe=backup, offtarget_dataframe=backup_offtargets, target_ids=backup_off_ids) backup, dropped = self.move_grna_by_offtargets(grna_dataframe=backup, dropped_dataframe=dropped, offtarget_dataframe=backup_offtargets, masks=self.gene_mask_dict['masks']) backup_offtargets = pd.DataFrame(backup_offtargets) else: backup_offtargets = dict.fromkeys(offtargets, []) backup_offtargets = pd.DataFrame(backup_offtargets) ## add ranking to pam, move between dataframes if ranking is f****d candidates, backup = self.scan_maxmismatches(candidates=candidates, backup=backup) candidates, backup = self.force_max_grna_in_candidates(candidates=candidates, backup=backup, max_grna=self.max_grna) candidates = self.force_ag_base(dataframe=candidates, max_primer_size=self.max_primer_size) backup = self.force_ag_base(dataframe=backup, max_primer_size=self.max_primer_size) candidates, backup, dropped = map(self.calculate_primer_len, [candidates, backup, dropped]) candidates, backup, dropped = map(self.calculate_gc_content, [candidates, backup, dropped]) candidates = self.design_primers(dataframe=candidates, cas9=self.cas9_organism, fiveprime=self.fiveprime, threeprime=self.threeprime) backup = self.design_primers(dataframe=backup, cas9=self.cas9_organism, fiveprime=self.fiveprime, threeprime=self.threeprime) candidates, backup, dropped = map(pd.DataFrame, [candidates, backup, dropped]) offtarget_empty = [candidates_offtargets.empty, backup_offtargets.empty] final_offtargets = pd.DataFrame() if not all(offtarget_empty): final_offtargets = candidates_offtargets final_offtargets['from'] = "candidates" backup_offtargets['from'] = "backup" final_offtargets = final_offtargets.append(backup_offtargets, ignore_index=True) else: if not offtarget_empty[0]: final_offtargets = candidates_offtargets final_offtargets['from'] = "candidates" if not offtarget_empty[1]: final_offtargets = backup_offtargets final_offtargets['from'] = "backup" if final_offtargets.empty: final_offtargets = pd.DataFrame(columns=offtargets.columns) candidates.to_csv(os.path.join(self.root, "temp", "candidates.txt"), header=True, index=False, sep=",") backup.to_csv(os.path.join(self.root, "temp", "backup.txt"), header=True, index=False, sep=",") dropped.to_csv(os.path.join(self.root, "temp", "dropped.txt"), header=True, index=False, sep=",") final_offtargets.to_csv(os.path.join(self.root, "temp", "offtargets.txt"), header=True, index=False, sep=",")