Ejemplo n.º 1
0
    def _update_ORF_frame(self, i, nuc, j, frame, d_vars):
        d_vars["codon"][j] = d_vars["codon"][j] + nuc

        if len(d_vars["codon"][j]) == 3:
            # test for start forward (if none already found)
            is_start = d_vars["codon"][j] in self._codons_start
            if is_start & np.isnan(d_vars["pos_ATG_f"][j]):
                d_vars["pos_ATG_f"][j] = i - 2
            # test for stop forward
            is_stop = d_vars["codon"][j] in self._codons_stop
            if is_stop:
                d_vars["end_f"][j] = i
                # test is length of CDS or ORF found is enough to be in output
                if self._type_filter == "CDS":
                    len_to_filter = d_vars["end_f"][j] - d_vars["pos_ATG_f"][
                        j]  # len_CDS
                else:
                    len_to_filter = d_vars["end_f"][j] - d_vars["begin_f"][
                        j]  # len_ORF

                if len_to_filter > self._threshold:
                    len_ORF = d_vars["end_f"][j] - d_vars["begin_f"][j]
                    len_CDS = d_vars["end_f"][j] - d_vars["pos_ATG_f"][j]
                    self._ORF_pos.append([d_vars["begin_f"][j],d_vars["end_f"][j],\
                        frame+1, d_vars["pos_ATG_f"][j], len_ORF, len_CDS])
                d_vars["begin_f"][j] = i + 1
                d_vars["pos_ATG_f"][j] = np.nan

            # test for start reverse
            is_start_rev = d_vars["codon"][j] in self._codons_start_rev
            if is_start_rev:
                d_vars["pos_ATG_r"][j] = i
            # test for stop reverse
            is_stop_rev = d_vars["codon"][j] in self._codons_stop_rev
            if is_stop_rev:
                d_vars["end_r"][j] = i - 3
                # test is length of CDS or ORF found is enough to be in output
                if self._type_filter == "CDS":
                    len_to_filter = d_vars["pos_ATG_r"][j] - d_vars["begin_r"][
                        j]  # len_CDS
                else:
                    len_to_filter = d_vars["end_r"][j] - d_vars["begin_r"][
                        j]  # len_ORF

                if len_to_filter > self._threshold:
                    len_ORF = d_vars["end_r"][j] - d_vars["begin_r"][j]
                    len_CDS = d_vars["pos_ATG_r"][j] - d_vars["begin_r"][j]
                    self._ORF_pos.append([d_vars["begin_r"][j],d_vars["end_r"][j],\
                        -(frame+1), d_vars["pos_ATG_r"][j], len_ORF, len_CDS])
                d_vars["begin_r"][j] = i - 3 + 1
                d_vars["pos_ATG_r"][j] = np.nan

            # reset codon
            d_vars["codon"][j] = ""

        return d_vars
Ejemplo n.º 2
0
    def _update_ORF_frame(self, i, nuc, j, frame, d_vars):
        d_vars["codon"][j] = d_vars["codon"][j] + nuc

        if len(d_vars["codon"][j]) == 3:
            # test for start forward (if none already found)
            is_start = d_vars["codon"][j] in self._codons_start
            if is_start & np.isnan(d_vars["pos_ATG_f"][j]):
                d_vars["pos_ATG_f"][j] = i-2
            # test for stop forward
            is_stop = d_vars["codon"][j] in self._codons_stop
            if is_stop:
                d_vars["end_f"][j] = i
                # test is length of CDS or ORF found is enough to be in output
                if self._type_filter == "CDS":
                    len_to_filter = d_vars["end_f"][j] - d_vars["pos_ATG_f"][j] # len_CDS
                else:
                    len_to_filter = d_vars["end_f"][j] - d_vars["begin_f"][j]   # len_ORF

                if len_to_filter > self._threshold:
                    len_ORF = d_vars["end_f"][j] - d_vars["begin_f"][j]
                    len_CDS = d_vars["end_f"][j] - d_vars["pos_ATG_f"][j]
                    self._ORF_pos.append([d_vars["begin_f"][j],d_vars["end_f"][j],\
                        frame+1, d_vars["pos_ATG_f"][j], len_ORF, len_CDS])
                d_vars["begin_f"][j] = i+1
                d_vars["pos_ATG_f"][j] = np.nan

            # test for start reverse
            is_start_rev = d_vars["codon"][j] in self._codons_start_rev
            if is_start_rev :
                d_vars["pos_ATG_r"][j] = i
            # test for stop reverse
            is_stop_rev = d_vars["codon"][j] in self._codons_stop_rev
            if is_stop_rev:
                d_vars["end_r"][j] = i-3
                # test is length of CDS or ORF found is enough to be in output
                if self._type_filter == "CDS":
                    len_to_filter = d_vars["pos_ATG_r"][j] - d_vars["begin_r"][j]# len_CDS
                else:
                    len_to_filter = d_vars["end_r"][j] - d_vars["begin_r"][j]   # len_ORF

                if len_to_filter > self._threshold:
                    len_ORF = d_vars["end_r"][j] - d_vars["begin_r"][j]
                    len_CDS = d_vars["pos_ATG_r"][j] - d_vars["begin_r"][j]
                    self._ORF_pos.append([d_vars["begin_r"][j],d_vars["end_r"][j],\
                        -(frame+1), d_vars["pos_ATG_r"][j], len_ORF, len_CDS])
                d_vars["begin_r"][j] = i-3+1
                d_vars["pos_ATG_r"][j] = np.nan

            # reset codon
            d_vars["codon"][j] = ""

        return d_vars