def lookForRes(self, resCode): """ Returns a Residue objected defined by the resCode (can be three letter or one letter - we don't care! """ # if we're using a three letter code if len(resCode) == 1: if resCode in ONE_TO_THREE.keys(): return self.residue_table[ONE_TO_THREE[resCode]] # if we're usin a one letter code elif len(resCode) == 3: if resCode in THREE_TO_ONE.keys(): return self.residue_table[resCode] # if we got here we had an invalid AA code raise ResTableException("Invalid amino acid code provided [" + str(resCode) + "]")
def lookForRes(self, resCode): """ Returns a Residue objected defined by the resCode (can be three letter or one letter - we don't care! """ # if we're using a three letter code if len(resCode) == 1: if resCode in ONE_TO_THREE.keys(): return self.residue_table[ONE_TO_THREE[resCode]] # if we're usin a one letter code elif len(resCode) == 3: if resCode in THREE_TO_ONE.keys(): return self.residue_table[resCode] # if we got here we had an invalid AA code raise ResTableException( "Invalid amino acid code provided [" + str(resCode) + "]")
def __validSeq(self, sequence): """ Internal function which validates if a [region of] a sequence is a valid protein sequence. The validation skips spaces and numbers, but will raise an exception on any other character """ parsed_seq = "" # for each residue in the sequence for i in sequence: # if the residue is not in the three letter code if i not in ONE_TO_THREE.keys(): if i == " ": # skip spaces continue elif i == "*": # Add * for now but then validate at the end (* can be a stop codon) parsed_seq = parsed_seq + i continue elif i in "1234567890": warning_message( "Found '" + i + "' in sequence, stripping out and ignoring...") # strip out numbers (useful for copy/pasted FASTA formats) continue else: raise SequenceFileParserException( "\n\nERROR: Invalid sequence file, found [" + i + "] in sequence region\n\n" + sequence + "\n\n") # if the residue *is* one of the 20 AAs then append to the growing # sequence else: parsed_seq = parsed_seq + i return parsed_seq
def __validSeq(self, sequence): """ Internal function which validates if a [region of] a sequence is a valid protein sequence. The validation skips spaces and numbers, but will raise an exception on any other character """ parsed_seq = "" # for each residue in the sequence for i in sequence: # if the residue is not in the three letter code if i not in ONE_TO_THREE.keys(): if i == " ": # skip spaces continue elif i in "1234567890": warning_message( "Found '" + i + "' in sequence, stripping out and ignoring...") # strip out numbers (useful for copy/pasted FASTA formats) continue else: raise SequenceFileParserException( "\n\nERROR: Invalid sequence file, found [" + i + "] in sequence region\n\n" + sequence + "\n\n") # if the residue *is* one of the 20 AAs then append to the growing # sequence else: parsed_seq = parsed_seq + i return parsed_seq