def __init__(self, _seq, gene_id="unknown", transcript_id=None, orig_transcript=None, vars=None): """ :param str _seq: String of an IUPACProtein alphabet, representing the protein :param str gene_id: ID of the genome the protein originated from :param str transcript_id: ID of the transcript the protein originated from :param orig_transcript: Reference to the originating transcript object :type orig_transcript: :class:`~Fred2.Core.Transcript.Transcript` :param vars: Nonsynonymous variants that are associated with the protein. key=position within protein, value=list of variants at that pos :type vars: dict(int,list(:class:`~Fred2.Core.Variant.Variant`)) """ # Init parent type: MetadataLogger.__init__(self) Seq.__init__(self, _seq.upper(), IUPAC.IUPACProtein) # Init own member: if vars is None: self.vars = dict() else: self.vars = vars # {prot-position: list(variant)} self.orig_transcript = orig_transcript self.transcript_id = "Protein_%i" % Protein.newid( ) if transcript_id is None else transcript_id self.gene_id = gene_id
def __init__(self, _seq, _gene_id="unknown", _transcript_id=None, _orig_transcript=None, _vars=None): """ :param str _seq: String of an IUPACProtein alphabet, representing the protein :param str _gene_id: ID of the genome the protein originated from :param str _transcript_id: ID of the transcript the protein originated from :param Transcript _orig_transcript: Reference to the originating transcript :param dict(int,list(Variant)) _vars: Nonsynonymous variants that are assoziated with the protein. key=position within protein, value=list of variants at that pos """ # Init parent type: MetadataLogger.__init__(self) Seq.__init__(self, _seq.upper(), IUPAC.IUPACProtein) # Init own member: if _vars is None: self.vars = dict() else: self.vars = _vars # {prot-position: list(variant)} self.orig_transcript = _orig_transcript self.transcript_id = "Protein_%i"%Protein.newid() if _transcript_id is None else _transcript_id self.gene_id = _gene_id
def __init__(self, seq, protein_pos=None): """ :param str seq: Sequence of the peptide in one letter amino acid code :param protein_pos: Dict of transcript_IDs to position of origin in protein :type protein_pos: dict(:class:`~Fred2.Core.Protein.Protein`,list(int))` """ MetadataLogger.__init__(self) Seq.__init__(self, seq.upper(), IUPAC.IUPACProtein) # Enforce dict storage if protein_pos and \ any(not isinstance(p, Protein) or any(not isinstance(i, (int, long)) for i in pos) for p, pos in protein_pos.iteritems()): raise TypeError( "The proteins_pos given to a Peptide object should be dict(Protein,list(int))" ) self.proteins = dict() if protein_pos is None else { p.transcript_id: p for p in protein_pos.iterkeys() } self.proteinPos = collections.defaultdict( list) if protein_pos is None else { p.transcript_id: pos for p, pos in protein_pos.iteritems() }
def __init__(self, data="", gap_char="-", rf_table=None): """Initialize the class.""" # rf_table should be a tuple or list indicating the every # codon position along the sequence. For example: # sequence = 'AAATTTGGGCCAAATTT' # rf_table = (0, 3, 6, 8, 11, 14) # the translated protein sequences will be # AAA TTT GGG GCC AAA TTT # K F G A K F # Notice: rf_table applies to ungapped sequence. If there # are gaps in the sequence, they will be discarded. This # feature ensures the rf_table is independent of where the # codon sequence appears in the alignment Seq.__init__(self, data.upper()) self.gap_char = gap_char # check the length of the alignment to be a triple if rf_table is None: length = len(self) if length % 3 != 0: raise ValueError("Sequence length is not a multiple of " "three (i.e. a whole number of codons)") self.rf_table = list(range(0, length - self.count(gap_char), 3)) else: # if gap_char in self: # assert len(self) % 3 == 0, \ # "Gapped sequence length is not a triple number" if not isinstance(rf_table, (tuple, list)): raise TypeError("rf_table should be a tuple or list object") if not all(isinstance(i, int) for i in rf_table): raise TypeError("Elements in rf_table should be int " "that specify the codon positions of " "the sequence") self.rf_table = rf_table
def __init__( self, data="", alphabet=default_codon_alphabet, gap_char="-", rf_table=None ): """Initialize the class.""" # rf_table should be a tuple or list indicating the every # codon position along the sequence. For example: # sequence = 'AAATTTGGGCCAAATTT' # rf_table = (0, 3, 6, 8, 11, 14) # the translated protein sequences will be # AAA TTT GGG GCC AAA TTT # K F G A K F # Notice: rf_table applies to ungapped sequence. If there # are gaps in the sequence, they will be discarded. This # feature ensures the rf_table is independent of where the # codon sequence appears in the alignment Seq.__init__(self, data.upper(), alphabet=alphabet) self.gap_char = gap_char if not isinstance(alphabet, CodonAlphabet): raise TypeError("Input alphabet should be a CodonAlphabet object.") # check the length of the alignment to be a triple if rf_table is None: seq_ungapped = self._data.replace(gap_char, "") if len(self) % 3 != 0: raise ValueError( "Sequence length is not a multiple of " "three (i.e. a whole number of codons)" ) self.rf_table = list(filter(lambda x: x % 3 == 0, range(len(seq_ungapped)))) # check alphabet # Not use Alphabet._verify_alphabet function because it # only works for single alphabet for i in self.rf_table: if self._data[i : i + 3] not in alphabet.letters: raise ValueError( "Sequence contain codon not in the alphabet" f" ({self._data[i:i + 3]})!" ) else: # if gap_char in self._data: # assert len(self) % 3 == 0, \ # "Gapped sequence length is not a triple number" if not isinstance(rf_table, (tuple, list)): raise TypeError("rf_table should be a tuple or list object") if not all(isinstance(i, int) for i in rf_table): raise TypeError( "Elements in rf_table should be int " "that specify the codon positions of " "the sequence" ) seq_ungapped = self._data.replace(gap_char, "") for i in rf_table: if seq_ungapped[i : i + 3] not in alphabet.letters: raise ValueError( "Sequence contain undefined letters from alphabet" f" ({seq_ungapped[i:i + 3]})!" ) self.rf_table = rf_table
def __init__(self, pred, weights): """ @type pred: str. Is a string of the SS prediction (eg: CCCHHEEC) @type weights: list<int> """ Seq.__init__(self, pred, PsipredAlphabet()) self.prediction = pred self.weights = weights
def __init__(self, _seq, proteins=None, vars=None, transcripts=None): """ :param str _seq: sequence of the peptide in one letter amino acid code """ MetadataLogger.__init__(self) Seq.__init__(self, _seq, IUPAC.IUPACProtein) self.proteins = {} if proteins is None else proteins self.vars = {} if vars is None else vars self.transcripts = {} if transcripts is None else transcripts
def __init__(self, length, alphabet=IUPAC.unambiguous_dna): """Initialize a randomized sequence of the given length. Args: length: the sequence length. alphabet: the alphabet to choose from. """ seq_str = self.SampleLetters(alphabet.letters, length) Seq.__init__(self, seq_str.upper(), alphabet)
def __init__(self, data='', alphabet=default_codon_alphabet, gap_char="-", rf_table=None): """Initialize the class.""" # rf_table should be a tuple or list indicating the every # codon position along the sequence. For example: # sequence = 'AAATTTGGGCCAAATTT' # rf_table = (0, 3, 6, 8, 11, 14) # the translated protein sequences will be # AAA TTT GGG GCC AAA TTT # K F G A K F # Notice: rf_table applies to ungapped sequence. If there # are gaps in the sequence, they will be discarded. This # feature ensures the rf_table is independent of where the # codon sequence appears in the alignment Seq.__init__(self, data.upper(), alphabet=alphabet) self.gap_char = gap_char if not isinstance(alphabet, CodonAlphabet): raise TypeError("Input alphabet should be a CodonAlphabet object.") # check the length of the alignment to be a triple if rf_table is None: seq_ungapped = self._data.replace(gap_char, "") if len(self) % 3 != 0: raise ValueError("Sequence length is not a multiple of " "three (i.e. a whole number of codons)") self.rf_table = list(filter(lambda x: x % 3 == 0, range(len(seq_ungapped)))) # check alphabet # Not use Alphabet._verify_alphabet function because it # only works for single alphabet for i in self.rf_table: if self._data[i:i + 3] not in alphabet.letters: raise ValueError("Sequence contain codon not in the alphabet " "({0})! ".format(self._data[i:i + 3])) else: # if gap_char in self._data: # assert len(self) % 3 == 0, \ # "Gapped sequence length is not a triple number" if not isinstance(rf_table, (tuple, list)): raise TypeError("rf_table should be a tuple or list object") if not all(isinstance(i, int) for i in rf_table): raise TypeError("Elements in rf_table should be int " "that specify the codon positions of " "the sequence") seq_ungapped = self._data.replace(gap_char, "") for i in rf_table: if seq_ungapped[i:i + 3] not in alphabet.letters: raise ValueError("Sequence contain undefined letters " "from alphabet " "({0})!".format(seq_ungapped[i:i + 3])) self.rf_table = rf_table
def __init__(self, _seq, _gene_id="unknown", _transcript_id=None, _vars=None): """ :param str _gene_id: input genome ID :param str _transcript_id: input transcript RefSeqID :param str _seq: Transcript RefSeq sequence :param dict(int,Variant) _vars: a dict of transcript position to Variant that is specific to the transcript. """ MetadataLogger.__init__(self) Seq.__init__(self, _seq.upper(), generic_rna) self.gene_id = _gene_id self.transcript_id = Transcript.newid() if _transcript_id is None else _transcript_id #TODO: this is not what the doc string says: self.vars = dict() if _vars is None else _vars
def __init__(self, seq, gene_id="unknown", transcript_id=None, vars=None): """ :param str gene_id: Genome ID :param str transcript_id: :class:`~Fred2.Core.Transcript.Transcript` RefSeqID :param str seq: :class:`~Fred2.Core.Transcript.Transcript` sequence :param vars: A dict of :class:`~Fred2.Core.Transcript.Transcript` position to :class:`Fred2.Core.Variant.Variant` that is specific to the :class:`~Fred2.Core.Transcript.Transcript` :type vars: dict(int,:class:`Fred2.Core.Variant.Variant`) """ MetadataLogger.__init__(self) Seq.__init__(self, seq.upper(), generic_rna) self.gene_id = gene_id self.transcript_id = Transcript.newid() if transcript_id is None else transcript_id self.vars = dict() if vars is None else vars
def __init__(self, seq, gene_id="unknown", transcript_id=None, vars=None): """ :param str gene_id: Genome ID :param str transcript_id: :class:`~Fred2.Core.Transcript.Transcript` RefSeqID :param str seq: :class:`~Fred2.Core.Transcript.Transcript` sequence :param vars: A dict of :class:`~Fred2.Core.Transcript.Transcript` position to :class:`Fred2.Core.Variant.Variant` that is specific to the :class:`~Fred2.Core.Transcript.Transcript` :type vars: dict(int,:class:`Fred2.Core.Variant.Variant`) """ MetadataLogger.__init__(self) Seq.__init__(self, seq.upper(), generic_rna) self.gene_id = gene_id self.transcript_id = Transcript.newid( ) if transcript_id is None else transcript_id self.vars = dict() if vars is None else vars
def __init__(self, data='', alphabet=default_codon_alphabet, gap_char="-", rf_table=None): # rf_table should be a tuple or list indicating the every # codon position along the sequence. For example: # sequence = 'AAATTTGGGCCAAATTT' # rf_table = (0, 3, 6, 8, 11, 14) # the translated protein sequences will be # AAA TTT GGG GCC AAA TTT # K F G A K F # Notice: rf_table applies to ungapped sequence. If there # are gaps in the sequence, they will be discarded. This # feature ensures the rf_table is independent of where the # codon sequence appears in the alignment Seq.__init__(self, data.upper(), alphabet=alphabet) self.gap_char = gap_char # check the length of the alignment to be a triple if rf_table is None: seq_ungapped = self._data.replace(gap_char, "") assert len(self) % 3 == 0, "Sequence length is not a triple number" self.rf_table = list(filter(lambda x: x % 3 == 0, range(len(seq_ungapped)))) # check alphabet # Not use Alphabet._verify_alphabet function because it # only works for single alphabet for i in self.rf_table: if self._data[i:i + 3] not in alphabet.letters: raise ValueError("Sequence contain undefined letters from" " alphabet " "({0})! ".format(self._data[i:i + 3])) else: # if gap_char in self._data: # assert len(self) % 3 == 0, \ # "Gapped sequence length is not a triple number" assert isinstance(rf_table, (tuple, list)), \ "rf_table should be a tuple or list object" assert all(isinstance(i, int) for i in rf_table), \ "elements in rf_table should be int that specify " \ + "the codon positions of the sequence" seq_ungapped = self._data.replace(gap_char, "") for i in rf_table: if seq_ungapped[i:i + 3] not in alphabet.letters: raise ValueError("Sequence contain undefined letters " "from alphabet " "({0})!".format(seq_ungapped[i:i + 3])) self.rf_table = rf_table
def __init__(self, _seq, protein_pos=None): """ :param str _seq: sequence of the peptide in one letter amino acid code :param dict(Protein,list(int)) protein_pos: dict of transcript_IDs to position of origin in protein """ MetadataLogger.__init__(self) Seq.__init__(self, _seq.upper(), IUPAC.IUPACProtein) # Enforce dict storage if protein_pos and \ any(not isinstance(p, Protein) or any(not isinstance(i, (int, long)) for i in pos) for p, pos in protein_pos.iteritems()): raise TypeError("The proteins_pos given to a Peptide object should be dict(Protein,list(int))") self.proteins = dict() if protein_pos is None else {p.transcript_id:p for p in protein_pos.iterkeys()} self.proteinPos = collections.defaultdict(list) if protein_pos is None else {p.transcript_id: pos for p, pos in protein_pos.iteritems()}
def __init__(self, _seq, _gene_id="unknown", _transcript_id=None, _vars=None): """ :param str _gene_id: input genome ID :param str _transcript_id: input transcript RefSeqID :param str _seq: Transcript RefSeq sequence :param dict(int,Variant) _vars: Dict of Variants for specific positions in the transcript. key=position, value=Variant """ MetadataLogger.__init__(self) Seq.__init__(self, _seq, generic_rna) self.gene_id = _gene_id self.transcript_id = Transcript.newid() if _transcript_id is None else _transcript_id if _vars is not None: self.vars = {v.get_transcript_position(_transcript_id): v \ for v in _vars} else: self.vars = dict()
def __init__(self, _seq, gene_id="unknown", transcript_id=None, orig_transcript=None, vars=None): """ :param str _seq: String of an IUPACProtein alphabet, representing the protein :param str gene_id: ID of the genome the protein originated from :param str transcript_id: ID of the transcript the protein originated from :param orig_transcript: Reference to the originating transcript object :type orig_transcript: :class:`~Fred2.Core.Transcript.Transcript` :param vars: Nonsynonymous variants that are associated with the protein. key=position within protein, value=list of variants at that pos :type vars: dict(int,list(:class:`~Fred2.Core.Variant.Variant`)) """ # Init parent type: MetadataLogger.__init__(self) Seq.__init__(self, _seq.upper(), IUPAC.IUPACProtein) # Init own member: if vars is None: self.vars = dict() else: self.vars = vars # {prot-position: list(variant)} self.orig_transcript = orig_transcript self.transcript_id = "Protein_%i"%Protein.newid() if transcript_id is None else transcript_id self.gene_id = gene_id
def __init__(self, seq_str): Seq.__init__(self, seq_str.upper(), IUPACAmbiguousDNA()) self._seq_list = list(self.tostring())
def __init__(self, seq_str): Seq.__init__(self, seq_str.upper(), IUPACAmbiguousDNA()) self._seq_list = list(self.tostring())
def __init__(self, sequence): Seq.__init__(self, sequence.upper()) self.sequence = str(self) self.qc_msg = []
def __init__(self, data, left_end=None, right_end=None, **k): Seq.__init__(self, str(data), **k) self.left_end = left_end self.right_end = right_end
def __init__(self, data, strand, **k): Seq.__init__(self, str(data).upper(), **k) self.strand = strand