def test_get_code(self): """correctly return the genetic code""" for code_id in [1, "1", "Standard Nuclear", DEFAULT]: got = get_code(code_id) self.assertEqual(got, DEFAULT) got = get_code(2) self.assertEqual(got.name.lower(), "vertebrate mitochondrial")
def translate_frames(seq, moltype=None, gc=1, allow_rc=False): """translates a nucleic acid sequence Parameters ---------- moltype molecular type, must be either DNA or RNA gc identifer for a genetic code or a genetic code instance allow_rc : bool includes frames sequence reverse complement Returns ------- [(frame, translation), ..] Reverse complement frame numbers are negative """ gc = get_code(gc) if moltype: moltype = get_moltype(moltype) seq = moltype.make_seq(seq) translations = gc.sixframes(seq) if not allow_rc: translations = translations[:3] return translations
def __init__(self, moltype="dna", gc=1, allow_rc=False, trim_terminal_stop=True): """generates aa sequences Parameters ---------- moltype : str molecular type, must be either DNA or RNA gc identifier for a genetic code or a genetic code instance trim_terminal_stop : bool exclude terminal stop codon from seqs Returns ------- A sequence collection. Sequences that could not be translated are excluded. """ super(translate_seqs, self).__init__( input_types=self._input_types, output_types=self._output_types, data_types=self._data_types, ) self._formatted_params() moltype = get_moltype(moltype) assert moltype.label.lower() in ("dna", "rna"), "Invalid moltype" self._moltype = moltype self._gc = get_code(gc) self._trim_terminal_stop = trim_terminal_stop self.func = self.get_translated
def test_repr_html(self): """exercising the _repr_html_ method""" gc = get_code(1) got = gc._repr_html_().strip() self.assertTrue(got.startswith("<table>")) self.assertTrue(got.endswith("</table>")) self.assertIn("Standard Nuclear", got)
def __init__( self, *positions, fourfold_degenerate=False, gc="Standard Nuclear", moltype="dna", ): """selects the indicated codon positions from an alignment Parameters ---------- positions either an integer (1, 2, 3), or a tuple of position numbers, e.g. 3 is third position, (1,2) is first and second codon position fourfold_degenerate : bool if True, returns third positions from four-fold degenerate codons. Overrides positions. gc identifer for a genetic code or a genetic code instance moltype : str molecular type, must be either DNA or RNA """ super(take_codon_positions, self).__init__( input_types=self._input_types, output_types=self._output_types, data_types=self._data_types, ) self._formatted_params() assert moltype is not None moltype = get_moltype(moltype) assert moltype.label.lower() in ("dna", "rna"), "Invalid moltype" self._moltype = moltype self._four_fold_degen = fourfold_degenerate self._fourfold_degen_sets = None if fourfold_degenerate: gc = get_code(gc) sets = get_fourfold_degenerate_sets( gc, alphabet=moltype.alphabet, as_indices=True ) self._fourfold_degen_sets = sets self.func = self.take_fourfold_positions return assert ( 1 <= min(positions) <= 3 and 1 <= max(positions) <= 3 ), "Invalid codon positions" by_index = True if len(positions) == 1 else False if by_index: positions = positions[0] - 1 self.func = self.take_codon_position else: positions = tuple(p - 1 for p in sorted(positions)) self.func = self.take_codon_positions self._positions = positions
def test_repr_html(self): """exercising the _repr_html_ method""" gc = get_code(1) got = gc._repr_html_().strip() self.assertTrue('<div class="c3table">' in got or '<div class="c3align">' in got) self.assertTrue("<table>" in got) self.assertTrue("</table>" in got) self.assertIn("Standard Nuclear", got)
def CodonAlphabet(gc=1, include_stop_codons=False): if isinstance(gc, (int, str)): gc = get_code(gc) if include_stop_codons: motifset = list(gc.codons) else: motifset = list(gc.sense_codons) motifset = [codon.upper().replace("U", "T") for codon in motifset] a = _CodonAlphabet(motifset, moltype=DNA) a._gc = gc return a
def deserialise_moltype(data): """returns a cogent3 MolType instance, or a CodonAlphabet""" data.pop("version", None) label = data["moltype"] data["moltype"] = get_moltype(label) klass = _get_class(data.pop("type")) if klass == _CodonAlphabet: gc = get_code(data.pop("genetic_code")) result = _CodonAlphabet(**data) result._gc = gc else: result = data["moltype"] return result
def __init__(self, moltype="dna", gc=1, allow_rc=False, trim_terminal_stop=True): """selects translatable sequences Sequences are truncated to modulo 3. seqs.info has a translation_errors entry. Parameters ---------- moltype : str molecular type, must be either DNA or RNA gc identifier for a genetic code or a genetic code instance allow_rc : bool If False, forward strand considered only. If True, and best frame on rc, it will be negative trim_terminal_stop : bool exclude terminal stop codon from seqs Returns ------- A sequence collection. Sequences that could not be translated are excluded. """ super(select_translatable, self).__init__( input_types=self._input_types, output_types=self._output_types, data_types=self._data_types, ) self._formatted_params() moltype = get_moltype(moltype) assert moltype.label.lower() in ("dna", "rna"), "Invalid moltype" self._moltype = moltype self._gc = get_code(gc) self._allow_rc = allow_rc self._trim_terminal_stop = trim_terminal_stop self.func = self.get_translatable
def best_frame(seq, gc=1, allow_rc=False, require_stop=False): """returns reading frame start that has either no stops or a single terminal stop codon result will be either 1, 2, 3 (or -1, -2, -3) Parameters ---------- gc genetic code ID, name or instance allow_rc If False, forward strand considered only. If True, and best frame on rc, it will be negative require_stop a terminal stop must be present Returns ------- int 1, 2, 3 if the best frame on the +_ strand; -1, -2, -3 if the best frame is on the reverse strand Raises ------ ValueError if the minimum number of stop codons across all frames exceeds 1, or the the stop codon is not at the sequence end """ gc = get_code(gc) translations = gc.sixframes(seq) if not allow_rc: translations = translations[:3] if not require_stop: # don't count stops if they're at the end of the aa sequence for i in range(len(translations)): if translations[i].endswith("*"): translations[i] = translations[i][:-1] stops_in_frame = [(tr.count("*"), i) for i, tr in enumerate(translations)] stops_in_frame.sort() min_stops, frame = stops_in_frame[0] # if min_stops > 1, cannot be translated if min_stops > 1: raise ValueError("%s cannot be robustly translated" % seq.name) elif min_stops == 0 and require_stop: # find seq with 1 stop min_stops = 20 # nonsense value for idx, (n, fr) in enumerate(stops_in_frame): if n == 1: min_stops, frame = n, fr break if 0 <= min_stops <= 1: if min_stops == 1 and not translations[frame].endswith("*"): raise ValueError("%s cannot be robustly translated" % seq.name) else: raise ValueError("%s cannot be robustly translated" % seq.name) frame += 1 if allow_rc and frame > 3: frame = 3 - frame return frame