Example #1
0
    def run(self):
        sqlrunner = SQL(database=os.path.join(self.root, "databases", self.database))
        gRNA_db = sqlrunner.get_global_gRNA(mismatch=str(self.mismatch))

        # This is a rate limiting step
        if bool(self.gene_mask_dict['genes']):
            query_data = self.get_targeted_data(dataframe=gRNA_db, gene_mask_dict=self.gene_mask_dict)
        else:
            query_data = gRNA_db

        multifasta = sqlrunner.get_gene_multifasta()

        gRNA_runner = RefineCripri(grna_dataframe=query_data,
                                   strand=self.strand,
                                   fasta_dataframe=multifasta,
                                   cas9=self.cas9_organism,
                                   offtarget_ids=sqlrunner.custom_sql("SELECT name, strand FROM global_offtarget"))

        candidates, backup, dropped = gRNA_runner.cripr_interference()

        candidates, backup, dropped = map(self.utils.annotate_dataframe,
                                          [candidates, backup, dropped])

        offtargets = sqlrunner.get_offtargets_by_mismatch(mismatch=self.mismatch)
        offtargets.dropna(subset=['annotation'], inplace=True)
        offtargets = offtargets[offtargets['strand'] != '+']
        offtargets['annotation'] = offtargets['annotation'].apply(
            lambda x: x.replace("_", "") if isinstance(x, str) else x)

        offtargets = offtargets.query("gene != annotation")
        offtargets.reset_index(drop=True, inplace=True)

        offtarget_ids = list(set(offtargets['name']))
        candidates_has_offtargets = self.list_comparison(list1=candidates['names'], list2=offtarget_ids)
        backup_has_offtargets = self.list_comparison(list1=backup['names'], list2=offtarget_ids)

        if candidates_has_offtargets:
            candidate_off_ids = list(set(candidates['names']) & set(offtarget_ids))

            candidates_offtargets = self.grab_offtargets(query=candidates,
                                                         offtargets=offtargets,
                                                         offtarget_ids=offtarget_ids)

            candidates = self.negate_pam_mismatch(grna_dataframe=candidates,
                                                  offtarget_dataframe=candidates_offtargets,
                                                  target_ids=candidate_off_ids)

            candidates, dropped = self.move_grna_by_offtargets(grna_dataframe=candidates,
                                                               dropped_dataframe=dropped,
                                                               offtarget_dataframe=candidates_offtargets,
                                                               masks=self.gene_mask_dict['masks'])

            candidates_offtargets = pd.DataFrame(candidates_offtargets)
        else:
            candidates_offtargets = dict.fromkeys(offtargets, [])
            candidates_offtargets = pd.DataFrame(candidates_offtargets)

        if backup_has_offtargets:
            backup_off_ids = list(set(backup['names']) & set(offtarget_ids))

            backup_offtargets = self.grab_offtargets(query=backup, offtargets=offtargets, offtarget_ids=offtarget_ids)

            backup = self.negate_pam_mismatch(grna_dataframe=backup,
                                              offtarget_dataframe=backup_offtargets,
                                              target_ids=backup_off_ids)

            backup, dropped = self.move_grna_by_offtargets(grna_dataframe=backup,
                                                           dropped_dataframe=dropped,
                                                           offtarget_dataframe=backup_offtargets,
                                                           masks=self.gene_mask_dict['masks'])

            backup_offtargets = pd.DataFrame(backup_offtargets)

        else:
            backup_offtargets = dict.fromkeys(offtargets, [])
            backup_offtargets = pd.DataFrame(backup_offtargets)

        ## add ranking to pam, move between dataframes if ranking is f****d
        candidates, backup = self.scan_maxmismatches(candidates=candidates, backup=backup)
        candidates, backup = self.force_max_grna_in_candidates(candidates=candidates, backup=backup,
                                                               max_grna=self.max_grna)

        candidates = self.force_ag_base(dataframe=candidates, max_primer_size=self.max_primer_size)

        backup = self.force_ag_base(dataframe=backup, max_primer_size=self.max_primer_size)

        candidates, backup, dropped = map(self.calculate_primer_len, [candidates, backup, dropped])

        candidates, backup, dropped = map(self.calculate_gc_content, [candidates, backup, dropped])

        candidates = self.design_primers(dataframe=candidates, cas9=self.cas9_organism,
                                         fiveprime=self.fiveprime, threeprime=self.threeprime)

        backup = self.design_primers(dataframe=backup, cas9=self.cas9_organism,
                                     fiveprime=self.fiveprime, threeprime=self.threeprime)

        candidates, backup, dropped = map(pd.DataFrame,
                                          [candidates, backup, dropped])

        offtarget_empty = [candidates_offtargets.empty, backup_offtargets.empty]

        final_offtargets = pd.DataFrame()

        if not all(offtarget_empty):
            final_offtargets = candidates_offtargets
            final_offtargets['from'] = "candidates"
            backup_offtargets['from'] = "backup"
            final_offtargets = final_offtargets.append(backup_offtargets, ignore_index=True)

        else:
            if not offtarget_empty[0]:
                final_offtargets = candidates_offtargets
                final_offtargets['from'] = "candidates"

            if not offtarget_empty[1]:
                final_offtargets = backup_offtargets
                final_offtargets['from'] = "backup"

        if final_offtargets.empty:
            final_offtargets = pd.DataFrame(columns=offtargets.columns)

        candidates.to_csv(os.path.join(self.root, "temp", "candidates.txt"), header=True, index=False, sep=",")
        backup.to_csv(os.path.join(self.root, "temp", "backup.txt"), header=True, index=False, sep=",")
        dropped.to_csv(os.path.join(self.root, "temp", "dropped.txt"), header=True, index=False, sep=",")
        final_offtargets.to_csv(os.path.join(self.root, "temp", "offtargets.txt"), header=True, index=False, sep=",")
Example #2
0
class CrisprFuncHelpers:
    """
    unittest class for the crispr.py
    """
    def __init__(self, database: str, strand: str, mismatch: int, cas9: str):
        self.root = os.path.dirname(os.path.abspath("../main.py"))
        os.chdir(self.root)
        self.sql = SQL(database=database)
        self.strand = strand
        self.mismatch = mismatch
        self.cas9 = cas9

    def initial_filter_test(self):
        data = self.sql.get_global_gRNA(mismatch=self.mismatch)
        genes = [genes.split("_")[0] for genes in data['names']]
        data['genes'] = genes
        query = ["Rv0899", "Rv0934"]
        out = pd.DataFrame()
        for items in query:
            if items in genes:
                grad_idx = [
                    idx for idx, val in data.iterrows()
                    if items in val['genes']
                ]
                out = out.append(data.loc[grad_idx, :], ignore_index=True)

        runner = RefineCripri(grna_dataframe=out,
                              strand=self.strand,
                              fasta_dataframe=None,
                              cas9=self.cas9)

        candidates, backup, dropped = map(pd.DataFrame,
                                          *[runner.initial_filter()])

        candidates_out = list(
            set([
                True if row['score'] < 2 else False
                for _, row in candidates.iterrows()
            ]))
        backup_out = list(
            set([
                True if row['score'] >= 2 else False
                for _, row in backup.iterrows()
            ]))
        dropped_out = list(
            set([
                True if row['names'][-1] != self.strand else False
                for _, row in dropped.iterrows()
            ]))

        return [candidates_out[0], backup_out[0], dropped_out[0]]

    def initial_filter_result(self):
        return [True, True, True]

    def has_offtarget_test(self):
        data = self.sql.get_global_gRNA(mismatch=self.mismatch)
        genes = [genes.split("_")[0] for genes in data['names']]
        data['genes'] = genes
        query = ["Rv0899", "Rv0934", "Rv0051"]
        out = pd.DataFrame()
        for items in query:
            if items in genes:
                grad_idx = [
                    idx for idx, val in data.iterrows()
                    if items in val['genes']
                ]
                out = out.append(data.loc[grad_idx, :], ignore_index=True)

        runner = RefineCripri(grna_dataframe=out,
                              strand=self.strand,
                              fasta_dataframe=None,
                              cas9=self.cas9)

        candidates, backup, dropped = runner.initial_filter()
        candidates, backup, dropped = runner.has_offtarget(
            candidates=candidates, backup=backup, dropped_gRNA=dropped)
        candidates, backup, dropped = map(pd.DataFrame,
                                          [candidates, backup, dropped])
        return True

    def has_offtarget_result(self):
        return True