コード例 #1
0
 def __init__(self,
              _seq,
              gene_id="unknown",
              transcript_id=None,
              orig_transcript=None,
              vars=None):
     """
     :param str _seq: String of an IUPACProtein alphabet, representing the protein
     :param str gene_id: ID of the genome the protein originated from
     :param str transcript_id: ID of the transcript the protein originated from
     :param orig_transcript: Reference to the originating transcript object
     :type orig_transcript: :class:`~Fred2.Core.Transcript.Transcript`
     :param vars: Nonsynonymous variants that are associated with the protein. key=position within protein,
                  value=list of variants at that pos
     :type vars: dict(int,list(:class:`~Fred2.Core.Variant.Variant`))
     """
     # Init parent type:
     MetadataLogger.__init__(self)
     Seq.__init__(self, _seq.upper(), IUPAC.IUPACProtein)
     # Init own member:
     if vars is None:
         self.vars = dict()
     else:
         self.vars = vars  # {prot-position: list(variant)}
     self.orig_transcript = orig_transcript
     self.transcript_id = "Protein_%i" % Protein.newid(
     ) if transcript_id is None else transcript_id
     self.gene_id = gene_id
コード例 #2
0
ファイル: Protein.py プロジェクト: SteffenK12/Fred2
 def __init__(self, _seq, _gene_id="unknown", _transcript_id=None, _orig_transcript=None, _vars=None):
     """
     :param str _seq: String of an IUPACProtein alphabet, representing the
                      protein
     :param str _gene_id: ID of the genome the protein originated from
     :param str _transcript_id: ID of the transcript the protein originated 
                                from
     :param Transcript _orig_transcript: Reference to the originating 
                                         transcript
     :param dict(int,list(Variant)) _vars: Nonsynonymous variants that are
                                           assoziated with the protein. 
                                           key=position within protein, 
                                           value=list of variants at that pos
     """
     # Init parent type:
     MetadataLogger.__init__(self)
     Seq.__init__(self, _seq.upper(), IUPAC.IUPACProtein)
     # Init own member:
     if _vars is None:
         self.vars = dict()
     else:
         self.vars = _vars  # {prot-position: list(variant)}
     self.orig_transcript = _orig_transcript
     self.transcript_id = "Protein_%i"%Protein.newid() if _transcript_id is None else _transcript_id
     self.gene_id = _gene_id
コード例 #3
0
ファイル: Peptide.py プロジェクト: lkuchenb/fred
    def __init__(self, seq, protein_pos=None):
        """
        :param str seq: Sequence of the peptide in one letter amino acid code
        :param protein_pos: Dict of transcript_IDs to position of origin in protein
        :type protein_pos: dict(:class:`~Fred2.Core.Protein.Protein`,list(int))`

        """
        MetadataLogger.__init__(self)
        Seq.__init__(self, seq.upper(), IUPAC.IUPACProtein)

        # Enforce dict storage
        if protein_pos and \
                any(not isinstance(p, Protein) or any(not isinstance(i, (int, long)) for i in pos) for p, pos in
                    protein_pos.iteritems()):
            raise TypeError(
                "The proteins_pos given to a Peptide object should be dict(Protein,list(int))"
            )
        self.proteins = dict() if protein_pos is None else {
            p.transcript_id: p
            for p in protein_pos.iterkeys()
        }
        self.proteinPos = collections.defaultdict(
            list) if protein_pos is None else {
                p.transcript_id: pos
                for p, pos in protein_pos.iteritems()
            }
コード例 #4
0
    def __init__(self, data="", gap_char="-", rf_table=None):
        """Initialize the class."""
        # rf_table should be a tuple or list indicating the every
        # codon position along the sequence. For example:
        # sequence = 'AAATTTGGGCCAAATTT'
        # rf_table = (0, 3, 6, 8, 11, 14)
        # the translated protein sequences will be
        # AAA TTT GGG GCC AAA TTT
        #  K   F   G   A   K   F
        # Notice: rf_table applies to ungapped sequence. If there
        #   are gaps in the sequence, they will be discarded. This
        #   feature ensures the rf_table is independent of where the
        #   codon sequence appears in the alignment

        Seq.__init__(self, data.upper())
        self.gap_char = gap_char

        # check the length of the alignment to be a triple
        if rf_table is None:
            length = len(self)
            if length % 3 != 0:
                raise ValueError("Sequence length is not a multiple of "
                                 "three (i.e. a whole number of codons)")
            self.rf_table = list(range(0, length - self.count(gap_char), 3))
        else:
            # if gap_char in self:
            #    assert  len(self) % 3 == 0, \
            #            "Gapped sequence length is not a triple number"
            if not isinstance(rf_table, (tuple, list)):
                raise TypeError("rf_table should be a tuple or list object")
            if not all(isinstance(i, int) for i in rf_table):
                raise TypeError("Elements in rf_table should be int "
                                "that specify the codon positions of "
                                "the sequence")
            self.rf_table = rf_table
コード例 #5
0
ファイル: codonseq.py プロジェクト: ttung/biopython
    def __init__(
        self, data="", alphabet=default_codon_alphabet, gap_char="-", rf_table=None
    ):
        """Initialize the class."""
        # rf_table should be a tuple or list indicating the every
        # codon position along the sequence. For example:
        # sequence = 'AAATTTGGGCCAAATTT'
        # rf_table = (0, 3, 6, 8, 11, 14)
        # the translated protein sequences will be
        # AAA TTT GGG GCC AAA TTT
        #  K   F   G   A   K   F
        # Notice: rf_table applies to ungapped sequence. If there
        #   are gaps in the sequence, they will be discarded. This
        #   feature ensures the rf_table is independent of where the
        #   codon sequence appears in the alignment

        Seq.__init__(self, data.upper(), alphabet=alphabet)
        self.gap_char = gap_char

        if not isinstance(alphabet, CodonAlphabet):
            raise TypeError("Input alphabet should be a CodonAlphabet object.")
        # check the length of the alignment to be a triple
        if rf_table is None:
            seq_ungapped = self._data.replace(gap_char, "")
            if len(self) % 3 != 0:
                raise ValueError(
                    "Sequence length is not a multiple of "
                    "three (i.e. a whole number of codons)"
                )
            self.rf_table = list(filter(lambda x: x % 3 == 0, range(len(seq_ungapped))))
            # check alphabet
            # Not use Alphabet._verify_alphabet function because it
            # only works for single alphabet
            for i in self.rf_table:
                if self._data[i : i + 3] not in alphabet.letters:
                    raise ValueError(
                        "Sequence contain codon not in the alphabet"
                        f" ({self._data[i:i + 3]})!"
                    )
        else:
            # if gap_char in self._data:
            #    assert  len(self) % 3 == 0, \
            #            "Gapped sequence length is not a triple number"
            if not isinstance(rf_table, (tuple, list)):
                raise TypeError("rf_table should be a tuple or list object")
            if not all(isinstance(i, int) for i in rf_table):
                raise TypeError(
                    "Elements in rf_table should be int "
                    "that specify the codon positions of "
                    "the sequence"
                )
            seq_ungapped = self._data.replace(gap_char, "")
            for i in rf_table:
                if seq_ungapped[i : i + 3] not in alphabet.letters:
                    raise ValueError(
                        "Sequence contain undefined letters from alphabet"
                        f" ({seq_ungapped[i:i + 3]})!"
                    )
            self.rf_table = rf_table
コード例 #6
0
ファイル: psipred.py プロジェクト: bsmithers/hpf
 def __init__(self, pred, weights):
     """
     @type pred: str. Is a string of the SS prediction (eg: CCCHHEEC)
     @type weights: list<int>
     """
     Seq.__init__(self, pred, PsipredAlphabet())
     self.prediction = pred
     self.weights = weights
コード例 #7
0
    def __init__(self, _seq, proteins=None, vars=None,  transcripts=None):
        """
        :param str _seq: sequence of the peptide in one letter amino acid code

        """
        MetadataLogger.__init__(self)
        Seq.__init__(self, _seq, IUPAC.IUPACProtein)
        self.proteins = {} if proteins is None else proteins
        self.vars = {} if vars is None else vars
        self.transcripts = {} if transcripts is None else transcripts
コード例 #8
0
ファイル: random_seq.py プロジェクト: titus0810/milo-lab
 def __init__(self, length, alphabet=IUPAC.unambiguous_dna):
     """Initialize a randomized sequence of the given length.
     
     Args:
         length: the sequence length.
         alphabet: the alphabet to choose from.
     """
     seq_str = self.SampleLetters(alphabet.letters, length)
     
     Seq.__init__(self, seq_str.upper(), alphabet)
コード例 #9
0
ファイル: codonseq.py プロジェクト: HuttonICS/biopython
    def __init__(self, data='', alphabet=default_codon_alphabet,
                 gap_char="-", rf_table=None):
        """Initialize the class."""
        # rf_table should be a tuple or list indicating the every
        # codon position along the sequence. For example:
        # sequence = 'AAATTTGGGCCAAATTT'
        # rf_table = (0, 3, 6, 8, 11, 14)
        # the translated protein sequences will be
        # AAA TTT GGG GCC AAA TTT
        #  K   F   G   A   K   F
        # Notice: rf_table applies to ungapped sequence. If there
        #   are gaps in the sequence, they will be discarded. This
        #   feature ensures the rf_table is independent of where the
        #   codon sequence appears in the alignment

        Seq.__init__(self, data.upper(), alphabet=alphabet)
        self.gap_char = gap_char

        if not isinstance(alphabet, CodonAlphabet):
            raise TypeError("Input alphabet should be a CodonAlphabet object.")
        # check the length of the alignment to be a triple
        if rf_table is None:
            seq_ungapped = self._data.replace(gap_char, "")
            if len(self) % 3 != 0:
                raise ValueError("Sequence length is not a multiple of "
                                 "three (i.e. a whole number of codons)")
            self.rf_table = list(filter(lambda x: x % 3 == 0,
                                        range(len(seq_ungapped))))
            # check alphabet
            # Not use Alphabet._verify_alphabet function because it
            # only works for single alphabet
            for i in self.rf_table:
                if self._data[i:i + 3] not in alphabet.letters:
                    raise ValueError("Sequence contain codon not in the alphabet "
                                     "({0})! ".format(self._data[i:i + 3]))
        else:
            # if gap_char in self._data:
            #    assert  len(self) % 3 == 0, \
            #            "Gapped sequence length is not a triple number"
            if not isinstance(rf_table, (tuple, list)):
                raise TypeError("rf_table should be a tuple or list object")
            if not all(isinstance(i, int) for i in rf_table):
                raise TypeError("Elements in rf_table should be int "
                                "that specify the codon positions of "
                                "the sequence")
            seq_ungapped = self._data.replace(gap_char, "")
            for i in rf_table:
                if seq_ungapped[i:i + 3] not in alphabet.letters:
                    raise ValueError("Sequence contain undefined letters "
                                     "from alphabet "
                                     "({0})!".format(seq_ungapped[i:i + 3]))
            self.rf_table = rf_table
コード例 #10
0
ファイル: Transcript.py プロジェクト: SteffenK12/Fred2
 def __init__(self, _seq, _gene_id="unknown", _transcript_id=None, _vars=None):
     """
     :param str _gene_id: input genome ID
     :param str _transcript_id: input transcript RefSeqID
     :param str _seq: Transcript RefSeq sequence
     :param dict(int,Variant) _vars: a dict of transcript position to Variant that is specific to the transcript.
     """
     MetadataLogger.__init__(self)
     Seq.__init__(self, _seq.upper(), generic_rna)
     self.gene_id = _gene_id
     self.transcript_id = Transcript.newid() if _transcript_id is None else _transcript_id
     #TODO: this is not what the doc string says:
     self.vars = dict() if _vars is None else _vars
コード例 #11
0
ファイル: Transcript.py プロジェクト: Al3n70rn/Fred2
 def __init__(self, seq, gene_id="unknown", transcript_id=None, vars=None):
     """
     :param str gene_id: Genome ID
     :param str transcript_id: :class:`~Fred2.Core.Transcript.Transcript` RefSeqID
     :param str seq: :class:`~Fred2.Core.Transcript.Transcript` sequence
     :param vars: A dict of :class:`~Fred2.Core.Transcript.Transcript` position to :class:`Fred2.Core.Variant.Variant`
                  that is specific to the :class:`~Fred2.Core.Transcript.Transcript`
     :type vars: dict(int,:class:`Fred2.Core.Variant.Variant`)
     """
     MetadataLogger.__init__(self)
     Seq.__init__(self, seq.upper(), generic_rna)
     self.gene_id = gene_id
     self.transcript_id = Transcript.newid() if transcript_id is None else transcript_id
     self.vars = dict() if vars is None else vars
コード例 #12
0
 def __init__(self, seq, gene_id="unknown", transcript_id=None, vars=None):
     """
     :param str gene_id: Genome ID
     :param str transcript_id: :class:`~Fred2.Core.Transcript.Transcript` RefSeqID
     :param str seq: :class:`~Fred2.Core.Transcript.Transcript` sequence
     :param vars: A dict of :class:`~Fred2.Core.Transcript.Transcript` position to :class:`Fred2.Core.Variant.Variant`
                  that is specific to the :class:`~Fred2.Core.Transcript.Transcript`
     :type vars: dict(int,:class:`Fred2.Core.Variant.Variant`)
     """
     MetadataLogger.__init__(self)
     Seq.__init__(self, seq.upper(), generic_rna)
     self.gene_id = gene_id
     self.transcript_id = Transcript.newid(
     ) if transcript_id is None else transcript_id
     self.vars = dict() if vars is None else vars
コード例 #13
0
    def __init__(self, data='', alphabet=default_codon_alphabet,
                 gap_char="-", rf_table=None):
        # rf_table should be a tuple or list indicating the every
        # codon position along the sequence. For example:
        # sequence = 'AAATTTGGGCCAAATTT'
        # rf_table = (0, 3, 6, 8, 11, 14)
        # the translated protein sequences will be
        # AAA TTT GGG GCC AAA TTT
        #  K   F   G   A   K   F
        # Notice: rf_table applies to ungapped sequence. If there
        #   are gaps in the sequence, they will be discarded. This
        #   feature ensures the rf_table is independent of where the
        #   codon sequence appears in the alignment

        Seq.__init__(self, data.upper(), alphabet=alphabet)
        self.gap_char = gap_char

        # check the length of the alignment to be a triple
        if rf_table is None:
            seq_ungapped = self._data.replace(gap_char, "")
            assert len(self) % 3 == 0, "Sequence length is not a triple number"
            self.rf_table = list(filter(lambda x: x % 3 == 0,
                                        range(len(seq_ungapped))))
            # check alphabet
            # Not use Alphabet._verify_alphabet function because it
            # only works for single alphabet
            for i in self.rf_table:
                if self._data[i:i + 3] not in alphabet.letters:
                    raise ValueError("Sequence contain undefined letters from"
                                     " alphabet "
                                     "({0})! ".format(self._data[i:i + 3]))
        else:
            # if gap_char in self._data:
            #    assert  len(self) % 3 == 0, \
            #            "Gapped sequence length is not a triple number"
            assert isinstance(rf_table, (tuple, list)), \
                    "rf_table should be a tuple or list object"
            assert all(isinstance(i, int) for i in rf_table), \
                    "elements in rf_table should be int that specify " \
                  + "the codon positions of the sequence"
            seq_ungapped = self._data.replace(gap_char, "")
            for i in rf_table:
                if seq_ungapped[i:i + 3] not in alphabet.letters:
                    raise ValueError("Sequence contain undefined letters "
                                     "from alphabet "
                                     "({0})!".format(seq_ungapped[i:i + 3]))
            self.rf_table = rf_table
コード例 #14
0
ファイル: Peptide.py プロジェクト: SteffenK12/Fred2
    def __init__(self, _seq, protein_pos=None):
        """
        :param str _seq: sequence of the peptide in one letter amino acid code
        :param dict(Protein,list(int)) protein_pos: dict of transcript_IDs to position of origin in protein

        """
        MetadataLogger.__init__(self)
        Seq.__init__(self, _seq.upper(), IUPAC.IUPACProtein)

        # Enforce dict storage
        if protein_pos and \
                any(not isinstance(p, Protein) or any(not isinstance(i, (int, long)) for i in pos) for p, pos in
                    protein_pos.iteritems()):
            raise TypeError("The proteins_pos given to a Peptide object should be dict(Protein,list(int))")
        self.proteins = dict() if protein_pos is None else {p.transcript_id:p for p in protein_pos.iterkeys()}
        self.proteinPos = collections.defaultdict(list) if protein_pos is None else {p.transcript_id: pos for p, pos in
                                                                                     protein_pos.iteritems()}
コード例 #15
0
    def __init__(self, _seq, _gene_id="unknown", _transcript_id=None, _vars=None):
        """
        :param str _gene_id: input genome ID
        :param str _transcript_id: input transcript RefSeqID
        :param str _seq: Transcript RefSeq sequence
        :param dict(int,Variant) _vars: Dict of Variants for specific positions 
                                        in the transcript. key=position, 
                                        value=Variant
        """
        MetadataLogger.__init__(self)
        Seq.__init__(self, _seq, generic_rna)
        self.gene_id = _gene_id
        self.transcript_id = Transcript.newid() if _transcript_id is None else _transcript_id
        if _vars is not None:
            self.vars = {v.get_transcript_position(_transcript_id): v \
                         for v in _vars}

        else:
            self.vars = dict()
コード例 #16
0
ファイル: Protein.py プロジェクト: Al3n70rn/Fred2
 def __init__(self, _seq, gene_id="unknown", transcript_id=None, orig_transcript=None, vars=None):
     """
     :param str _seq: String of an IUPACProtein alphabet, representing the protein
     :param str gene_id: ID of the genome the protein originated from
     :param str transcript_id: ID of the transcript the protein originated from
     :param orig_transcript: Reference to the originating transcript object
     :type orig_transcript: :class:`~Fred2.Core.Transcript.Transcript`
     :param vars: Nonsynonymous variants that are associated with the protein. key=position within protein,
                  value=list of variants at that pos
     :type vars: dict(int,list(:class:`~Fred2.Core.Variant.Variant`))
     """
     # Init parent type:
     MetadataLogger.__init__(self)
     Seq.__init__(self, _seq.upper(), IUPAC.IUPACProtein)
     # Init own member:
     if vars is None:
         self.vars = dict()
     else:
         self.vars = vars  # {prot-position: list(variant)}
     self.orig_transcript = orig_transcript
     self.transcript_id = "Protein_%i"%Protein.newid() if transcript_id is None else transcript_id
     self.gene_id = gene_id
コード例 #17
0
ファイル: ambiguous_seq.py プロジェクト: titus0810/milo-lab
    def __init__(self, seq_str):
        Seq.__init__(self, seq_str.upper(), IUPACAmbiguousDNA())

        self._seq_list = list(self.tostring())
コード例 #18
0
ファイル: ambiguous_seq.py プロジェクト: issfangks/milo-lab
 def __init__(self, seq_str):
     Seq.__init__(self, seq_str.upper(), IUPACAmbiguousDNA())
     
     self._seq_list = list(self.tostring())
コード例 #19
0
 def __init__(self, sequence):
     Seq.__init__(self, sequence.upper())
     self.sequence = str(self)
     self.qc_msg = []
コード例 #20
0
ファイル: StickyEndsSeq.py プロジェクト: sandyg05/DnaCauldron
 def __init__(self, data, left_end=None, right_end=None, **k):
     Seq.__init__(self, str(data), **k)
     self.left_end = left_end
     self.right_end = right_end
コード例 #21
0
 def __init__(self, data, strand, **k):
     Seq.__init__(self, str(data).upper(), **k)
     self.strand = strand