Beispiel #1
0
    def lookForRes(self, resCode):
        """ Returns a Residue objected defined by the resCode (can be
            three letter or one letter - we don't care!
        """

        # if we're using a three letter code
        if len(resCode) == 1:
            if resCode in ONE_TO_THREE.keys():
                return self.residue_table[ONE_TO_THREE[resCode]]

        # if we're usin a one letter code
        elif len(resCode) == 3:
            if resCode in THREE_TO_ONE.keys():
                return self.residue_table[resCode]

        # if we got here we had an invalid AA code
        raise ResTableException("Invalid amino acid code provided [" +
                                str(resCode) + "]")
Beispiel #2
0
    def lookForRes(self, resCode):
        """ Returns a Residue objected defined by the resCode (can be
            three letter or one letter - we don't care!
        """

        # if we're using a three letter code
        if len(resCode) == 1:
            if resCode in ONE_TO_THREE.keys():
                return self.residue_table[ONE_TO_THREE[resCode]]

        # if we're usin a one letter code
        elif len(resCode) == 3:
            if resCode in THREE_TO_ONE.keys():
                return self.residue_table[resCode]

        # if we got here we had an invalid AA code
        raise ResTableException(
            "Invalid amino acid code provided [" + str(resCode) + "]")
    def __validSeq(self, sequence):
        """
        Internal function which validates if a [region of]
        a sequence is a valid protein sequence.

        The validation skips spaces and numbers, but will raise an exception on any other character

        """

        parsed_seq = ""

        # for each residue in the sequence
        for i in sequence:

            # if the residue is not in the three letter code
            if i not in ONE_TO_THREE.keys():
                if i == " ":
                    # skip spaces
                    continue

                elif i == "*":
                    # Add * for now but then validate at the end (* can be a stop codon)
                    parsed_seq = parsed_seq + i                    
                    continue
                elif i in "1234567890":
                    warning_message(
                        "Found '" + i + "' in sequence, stripping out and ignoring...")
                    # strip out numbers (useful for copy/pasted FASTA formats)
                    continue
                else:
                    raise SequenceFileParserException(
                        "\n\nERROR: Invalid sequence file, found [" +
                        i +
                        "] in sequence region\n\n" +
                        sequence +
                        "\n\n")
            # if the residue *is* one of the 20 AAs then append to the growing
            # sequence
            else:
                parsed_seq = parsed_seq + i
        return parsed_seq
Beispiel #4
0
    def __validSeq(self, sequence):
        """
        Internal function which validates if a [region of]
        a sequence is a valid protein sequence.

        The validation skips spaces and numbers, but will raise an exception on any other character

        """

        parsed_seq = ""

        # for each residue in the sequence
        for i in sequence:

            # if the residue is not in the three letter code
            if i not in ONE_TO_THREE.keys():
                if i == " ":
                    # skip spaces
                    continue
                elif i in "1234567890":
                    warning_message(
                        "Found '" + i + "' in sequence, stripping out and ignoring...")
                    # strip out numbers (useful for copy/pasted FASTA formats)
                    continue
                else:
                    raise SequenceFileParserException(
                        "\n\nERROR: Invalid sequence file, found [" +
                        i +
                        "] in sequence region\n\n" +
                        sequence +
                        "\n\n")
            # if the residue *is* one of the 20 AAs then append to the growing
            # sequence
            else:
                parsed_seq = parsed_seq + i
        return parsed_seq