Beispiel #1
0
 def test_get_code(self):
     """correctly return the genetic code"""
     for code_id in [1, "1", "Standard Nuclear", DEFAULT]:
         got = get_code(code_id)
         self.assertEqual(got, DEFAULT)
     got = get_code(2)
     self.assertEqual(got.name.lower(), "vertebrate mitochondrial")
Beispiel #2
0
def translate_frames(seq, moltype=None, gc=1, allow_rc=False):
    """translates a nucleic acid sequence

    Parameters
    ----------
    moltype
        molecular type, must be either DNA or RNA
    gc
        identifer for a genetic code or a genetic code instance
    allow_rc : bool
        includes frames sequence reverse complement

    Returns
    -------
    [(frame, translation), ..]
    Reverse complement frame numbers are negative
    """
    gc = get_code(gc)
    if moltype:
        moltype = get_moltype(moltype)
        seq = moltype.make_seq(seq)

    translations = gc.sixframes(seq)
    if not allow_rc:
        translations = translations[:3]

    return translations
Beispiel #3
0
    def __init__(self,
                 moltype="dna",
                 gc=1,
                 allow_rc=False,
                 trim_terminal_stop=True):
        """generates aa sequences

        Parameters
        ----------
        moltype : str
            molecular type, must be either DNA or RNA
        gc
            identifier for a genetic code or a genetic code instance
        trim_terminal_stop : bool
            exclude terminal stop codon from seqs

        Returns
        -------
        A sequence collection. Sequences that could not be translated
        are excluded.
        """
        super(translate_seqs, self).__init__(
            input_types=self._input_types,
            output_types=self._output_types,
            data_types=self._data_types,
        )
        self._formatted_params()

        moltype = get_moltype(moltype)
        assert moltype.label.lower() in ("dna", "rna"), "Invalid moltype"

        self._moltype = moltype
        self._gc = get_code(gc)
        self._trim_terminal_stop = trim_terminal_stop
        self.func = self.get_translated
 def test_repr_html(self):
     """exercising the _repr_html_ method"""
     gc = get_code(1)
     got = gc._repr_html_().strip()
     self.assertTrue(got.startswith("<table>"))
     self.assertTrue(got.endswith("</table>"))
     self.assertIn("Standard Nuclear", got)
Beispiel #5
0
    def __init__(
        self,
        *positions,
        fourfold_degenerate=False,
        gc="Standard Nuclear",
        moltype="dna",
    ):
        """selects the indicated codon positions from an alignment

        Parameters
        ----------
        positions
            either an integer (1, 2, 3), or a tuple of position numbers,
            e.g. 3 is third position, (1,2) is first and second codon position
        fourfold_degenerate : bool
            if True, returns third positions from four-fold degenerate codons.
            Overrides positions.
        gc
            identifer for a genetic code or a genetic code instance
        moltype : str
            molecular type, must be either DNA or RNA
        """
        super(take_codon_positions, self).__init__(
            input_types=self._input_types,
            output_types=self._output_types,
            data_types=self._data_types,
        )
        self._formatted_params()
        assert moltype is not None
        moltype = get_moltype(moltype)

        assert moltype.label.lower() in ("dna", "rna"), "Invalid moltype"

        self._moltype = moltype
        self._four_fold_degen = fourfold_degenerate
        self._fourfold_degen_sets = None

        if fourfold_degenerate:
            gc = get_code(gc)
            sets = get_fourfold_degenerate_sets(
                gc, alphabet=moltype.alphabet, as_indices=True
            )
            self._fourfold_degen_sets = sets
            self.func = self.take_fourfold_positions
            return

        assert (
            1 <= min(positions) <= 3 and 1 <= max(positions) <= 3
        ), "Invalid codon positions"

        by_index = True if len(positions) == 1 else False
        if by_index:
            positions = positions[0] - 1
            self.func = self.take_codon_position
        else:
            positions = tuple(p - 1 for p in sorted(positions))
            self.func = self.take_codon_positions

        self._positions = positions
Beispiel #6
0
 def test_repr_html(self):
     """exercising the _repr_html_ method"""
     gc = get_code(1)
     got = gc._repr_html_().strip()
     self.assertTrue('<div class="c3table">' in got
                     or '<div class="c3align">' in got)
     self.assertTrue("<table>" in got)
     self.assertTrue("</table>" in got)
     self.assertIn("Standard Nuclear", got)
Beispiel #7
0
def CodonAlphabet(gc=1, include_stop_codons=False):
    if isinstance(gc, (int, str)):
        gc = get_code(gc)
    if include_stop_codons:
        motifset = list(gc.codons)
    else:
        motifset = list(gc.sense_codons)
    motifset = [codon.upper().replace("U", "T") for codon in motifset]
    a = _CodonAlphabet(motifset, moltype=DNA)
    a._gc = gc
    return a
Beispiel #8
0
def deserialise_moltype(data):
    """returns a cogent3 MolType instance, or a CodonAlphabet"""
    data.pop("version", None)
    label = data["moltype"]
    data["moltype"] = get_moltype(label)
    klass = _get_class(data.pop("type"))
    if klass == _CodonAlphabet:
        gc = get_code(data.pop("genetic_code"))
        result = _CodonAlphabet(**data)
        result._gc = gc
    else:
        result = data["moltype"]

    return result
Beispiel #9
0
    def __init__(self,
                 moltype="dna",
                 gc=1,
                 allow_rc=False,
                 trim_terminal_stop=True):
        """selects translatable sequences

        Sequences are truncated to modulo 3. seqs.info has a translation_errors
        entry.

        Parameters
        ----------
        moltype : str
            molecular type, must be either DNA or RNA
        gc
            identifier for a genetic code or a genetic code instance
        allow_rc : bool
            If False, forward strand considered only. If True, and
              best frame on rc, it will be negative
        trim_terminal_stop : bool
            exclude terminal stop codon from seqs

        Returns
        -------
        A sequence collection. Sequences that could not be translated
        are excluded.
        """
        super(select_translatable, self).__init__(
            input_types=self._input_types,
            output_types=self._output_types,
            data_types=self._data_types,
        )
        self._formatted_params()

        moltype = get_moltype(moltype)
        assert moltype.label.lower() in ("dna", "rna"), "Invalid moltype"

        self._moltype = moltype
        self._gc = get_code(gc)
        self._allow_rc = allow_rc
        self._trim_terminal_stop = trim_terminal_stop
        self.func = self.get_translatable
Beispiel #10
0
def best_frame(seq, gc=1, allow_rc=False, require_stop=False):
    """returns reading frame start that has either no stops or a single
    terminal stop codon

    result will be either 1, 2, 3 (or -1, -2, -3)

    Parameters
    ----------
    gc
        genetic code ID, name or instance
    allow_rc
        If False, forward strand considered only. If True, and
          best frame on rc, it will be negative
    require_stop
        a terminal stop must be present

    Returns
    -------
    int
        1, 2, 3 if the best frame on the +_ strand; -1, -2, -3 if the best
        frame is on the reverse strand

    Raises
    ------
    ValueError
        if the minimum number of stop codons across all frames exceeds 1,
        or the the stop codon is not at the sequence end
    """
    gc = get_code(gc)
    translations = gc.sixframes(seq)
    if not allow_rc:
        translations = translations[:3]

    if not require_stop:
        # don't count stops if they're at the end of the aa sequence
        for i in range(len(translations)):
            if translations[i].endswith("*"):
                translations[i] = translations[i][:-1]

    stops_in_frame = [(tr.count("*"), i) for i, tr in enumerate(translations)]
    stops_in_frame.sort()
    min_stops, frame = stops_in_frame[0]
    # if min_stops > 1, cannot be translated
    if min_stops > 1:
        raise ValueError("%s cannot be robustly translated" % seq.name)
    elif min_stops == 0 and require_stop:
        # find seq with 1 stop
        min_stops = 20  # nonsense value
        for idx, (n, fr) in enumerate(stops_in_frame):
            if n == 1:
                min_stops, frame = n, fr
                break

    if 0 <= min_stops <= 1:
        if min_stops == 1 and not translations[frame].endswith("*"):
            raise ValueError("%s cannot be robustly translated" % seq.name)
    else:
        raise ValueError("%s cannot be robustly translated" % seq.name)

    frame += 1
    if allow_rc and frame > 3:
        frame = 3 - frame
    return frame