def invokeAction(self, inActionName): """ Applies an action on the current sequence displayed in the PM. @param inActionName: The action name. @type inActionName: str @return: The sequence after the action has been applied. @rtype: str """ sequence, allKnown = self._getSequence() outResult = "" if inActionName == self._action_Complement: outResult = getComplementSequence(sequence) elif inActionName == self._action_Reverse: outResult = getReverseSequence(sequence) elif inActionName == self._action_ConvertUnrecognized: outResult = replaceUnrecognized(sequence, replaceBase='N') self.setSequence(outResult) elif inActionName == self._action_RemoveUnrecognized: outResult = replaceUnrecognized(sequence, replaceBase='') self.setSequence(outResult) return
def setStrandSequence(self, sequenceString, complement = True): """ Set the strand sequence i.e.assign the baseNames for the PAM atoms in this strand AND the complementary baseNames to the PAM atoms of the complementary strand ('mate strand') @param sequenceString: The sequence to be assigned to this strand chunk @type sequenceString: str """ #TO BE REVISED SEE A TODO COMMENT AT THE TOP sequenceString = str(sequenceString) #Remove whitespaces and tabs from the sequence string sequenceString = re.sub(r'\s', '', sequenceString) #May be we set this beginning with an atom marked by the #Dna Atom Marker in dna data model? -- Ninad 2008-01-11 # [yes, see my longer reply comment above -- Bruce 080117] atomList = [] rawAtomList = self.get_strand_atoms_in_bond_direction() atomList = filter(lambda atm: not atm.is_singlet(), rawAtomList) for atm in atomList: atomIndex = atomList.index(atm) if atomIndex > (len(sequenceString) - 1): #In this case, set an unassigned base ('X') for the remaining #atoms baseName = 'X' else: baseName = sequenceString[atomIndex] atm.setDnaBaseName(baseName) #Also assign the baseNames for the PAM atoms on the complementary #('mate') strand. if complement: strandAtomMate = atm.get_strand_atom_mate() complementBaseName= getComplementSequence(str(baseName)) if strandAtomMate is not None: strandAtomMate.setDnaBaseName(str(complementBaseName)) # piotr 080319: # Redraw the chunks in DNA display style # to reflect the sequence changes. from utilities.constants import diDNACYLINDER for c in self.members: if isinstance(c, DnaStrandChunk): if c.get_dispdef() == diDNACYLINDER: c.inval_display_list() # redraw the chunk # do the same for all complementary chunks prev_cc = None for atom in c.atoms.itervalues(): atm_mate = atom.get_strand_atom_mate() if atm_mate: cc = atm_mate.molecule if cc!=prev_cc and isinstance(cc, DnaStrandChunk): prev_cc = cc if cc.get_dispdef() == diDNACYLINDER: cc.inval_display_list()
def invokeAction( self, inActionName ): """ Applies an action on the current sequence displayed in the PM. @param inActionName: The action name. @type inActionName: str @return: The sequence after the action has been applied. @rtype: str """ sequence, allKnown = self._getSequence() outResult = "" if inActionName == self._action_Complement: outResult = getComplementSequence(sequence) elif inActionName == self._action_Reverse: outResult = getReverseSequence(sequence) elif inActionName == self._action_ConvertUnrecognized: outResult = replaceUnrecognized(sequence, replaceBase = 'N') self.setSequence( outResult ) elif inActionName == self._action_RemoveUnrecognized: outResult = replaceUnrecognized(sequence, replaceBase = '') self.setSequence( outResult ) return
def _determine_complementSequence(self, inSequence): """ Determine the complementary sequence based on the main sequence. It does lots of thing than just obtaining the information by using 'getComplementSequence'. Examples of what it does: 1. Suppose you have a duplex with 10 basepairs. You are editing strand A and you lengthen it to create a sticky end. Lets assume that it is lengthened by 5 bases. Since there will be no complementary strand baseatoms for these, the sequence editor will show an asterisk('*'), indicating that its missing the strand mate base atom. Sequence Editor itself doesn't check each time if the strand mate is missing. Rather, it relies on what the caller supplied as the initial complement sequence. (@see: self._setComplementSequence) . The caller determines the sequence of the strand being edited and also its complement. If the complement doesn't exist, it replace the complement with a '*' and passes this information to the sequence editor. Everytime sequence editor is updating its sequnece, it updates the mate sequence and skips the positions marked '*' (by using self._initial_complementSequence), and thus those remain unaltered. If user enters a sequence which has more characters than the original sequence, then it doesn't update the complement portion of that extra portion of the sequence. This gives a visual indication of where the sequence ends. (see NFR bug 2787). Reversing the sequence also reverses the complement (including the '*' positions) @see Bug 2787 for details of the implementation. @see: self._setComplementSequence() @see: self._updateSequenceAndItsComplement() @see: self._setSequence() @see: DnaStrand_PropertyManager.updateSequence() (the caller) @see: Dna_Constants.MISSING_COMPLEMENTARY_STRAND_ATOM_SYMBOL @see: DnaStrand.getStrandSequenceAndItsComplement() """ if not self._initial_complementSequence: #This is unlikely. Do nothing in this case. return '' complementSequence = '' #Make sure that the insequence is a string object inSequence = str(inSequence) #Do the following only when the length of sequence (inSequence) is #greater than or equal to length of original complement sequence #REVIEW This could be SLOW, need to think of a better way to do this. if len(inSequence) >= len(self._initial_complementSequence): for i in range(len(self._initial_complementSequence)): if self._initial_complementSequence[i] == \ MISSING_COMPLEMENTARY_STRAND_ATOM_SYMBOL: complementSequence += self._initial_complementSequence[i] else: complementSequence += getComplementSequence(inSequence[i]) else: #Use complementSequence as a list object as we will need to modify #some of the charactes within it. We can't do for example #string[i] = 'X' as it is not permitted in python. So we will first #treat the complementSequence as a list, do necessary modifications #to it and then convert is back to a string. #TODO: see if re.sub or re.subn can be used directly to replace #some characters (the reason re.suib is not used here is that #it does find and repalce for all matching patterns, which we don't # want here ) complementSequence = list(self._initial_complementSequence) for i in range(len(inSequence)): if complementSequence[ i] != MISSING_COMPLEMENTARY_STRAND_ATOM_SYMBOL: complementSequence[i] = getComplementSequence( inSequence[i]) #Now there is additinal complementary sequence (because lenght of the #main sequence provided is less than the 'original complementary #sequence' (or the 'original main strand sequence) . In this case, #for the remaining complementary sequence, we will use unassigned #base symbol 'X' . #Example: If user starts editing a strand, #the initial strand sequence and its complement are shown in the #sequence editor. Now, the user deletes some characters from the #main sequence (using , e.g. backspace in sequence text edit to #delete those), here, we will assume that this deletion of a #character is as good as making it an unassigned base 'X', #so its new complement will be of course 'X' --Ninad 2008-04-10 extra_length = len(complementSequence) - len(inSequence) #do the above mentioned replacement count = extra_length while count > 0: if complementSequence[ -count] != MISSING_COMPLEMENTARY_STRAND_ATOM_SYMBOL: complementSequence[-count] = 'X' count -= 1 #convert the complement sequence back to a string complementSequence = ''.join(complementSequence) return complementSequence
def getStrandSequenceAndItsComplement(self): """ Returns the strand sequence and the sequence of the complementary strands of the for the DnaStrandChunks within this DnaStrand group. If the complementary strand base atom is not found (e.g. a single stranded DNA), it returns the corresponding sequence character (for the complementary sequence) as '*' meaning its missing. @return: strand Sequence string @rtype: str @TODO: REFACTOR this. See how to split out common part of this method and self.getStrandSequence() Basically we could have simply replaced self.getStrandSequence with this method , but keeping self.getStrandSequence has an advantage that we don't compute the complement sequence (not sure if that would improve performance but, in theory, that will improve it.) One possibility is to pass an argument compute_complement_sequence = True' to this method. """ # TODO: Is there a way to make use of DnaStrandMarkers to get the strand # atoms in bond direction for this DnaStrandGroup?? # [A: they are not needed for that, but they could be used # to define an unambiguous sequence origin for a ring.] # # OR: does self.members alway return DnaStrandChunks in the # direction of bond direction? [A. no.] # # While the above questions remain unanswered, the following # makes use of a method self.get_strand_atoms_in_bond_direction # This method is mostly copied here from chunk class with some # modifications ... i.e. it accepts an atomList and uses a random # start atom within that list to find out the connected atoms # in the bond direction. Actually, sending the list # with *all atoms* of the strand isn't really necessary. All we are # interested in is a start Ss atom and bond direction which can # ideally be obtained by using even a single DnaStrandChunk within # this DnaStrand Group. For a short time, we will pass the whole # atom list. Will definitely be revised and refactored within the # coming days (need to discuss with Bruce) -- Ninad 2008-03-01 #see a to do comment about rawAtom list above sequenceString = '' complementSequenceString = '' atomList = self.get_strand_atoms_in_bond_direction() for atm in atomList: baseName = str(atm.getDnaBaseName()) complementBaseAtom = atm.get_strand_atom_mate() if baseName: sequenceString = sequenceString + baseName else: #What if baseName is not assigned due to some error?? Example #while reading in an mmp file. #As a fallback, we should assign unassigned base letter 'X' #to all the base atoms that don't have a baseletter defined #also, make sure that the atom is not a bondpoint. if atm.element.symbol != 'X': baseName = 'X' sequenceString = sequenceString + baseName complementBaseName = '' if complementBaseAtom: complementBaseName = getComplementSequence(baseName) else: #This means the complementary strand base atom is not present. #(its a single stranded dna) .So just indicate the complementary #sequence as '*' which means its missing. if atm.element.symbol != 'X': complementBaseName = MISSING_COMPLEMENTARY_STRAND_ATOM_SYMBOL if complementBaseName: complementSequenceString = complementSequenceString + \ complementBaseName return (sequenceString, complementSequenceString)
def _determine_complementSequence(self, inSequence): """ Determine the complementary sequence based on the main sequence. It does lots of thing than just obtaining the information by using 'getComplementSequence'. Examples of what it does: 1. Suppose you have a duplex with 10 basepairs. You are editing strand A and you lengthen it to create a sticky end. Lets assume that it is lengthened by 5 bases. Since there will be no complementary strand baseatoms for these, the sequence editor will show an asterisk('*'), indicating that its missing the strand mate base atom. Sequence Editor itself doesn't check each time if the strand mate is missing. Rather, it relies on what the caller supplied as the initial complement sequence. (@see: self._setComplementSequence) . The caller determines the sequence of the strand being edited and also its complement. If the complement doesn't exist, it replace the complement with a '*' and passes this information to the sequence editor. Everytime sequence editor is updating its sequnece, it updates the mate sequence and skips the positions marked '*' (by using self._initial_complementSequence), and thus those remain unaltered. If user enters a sequence which has more characters than the original sequence, then it doesn't update the complement portion of that extra portion of the sequence. This gives a visual indication of where the sequence ends. (see NFR bug 2787). Reversing the sequence also reverses the complement (including the '*' positions) @see Bug 2787 for details of the implementation. @see: self._setComplementSequence() @see: self._updateSequenceAndItsComplement() @see: self._setSequence() @see: DnaStrand_PropertyManager.updateSequence() (the caller) @see: Dna_Constants.MISSING_COMPLEMENTARY_STRAND_ATOM_SYMBOL @see: DnaStrand.getStrandSequenceAndItsComplement() """ if not self._initial_complementSequence: #This is unlikely. Do nothing in this case. return '' complementSequence = '' #Make sure that the insequence is a string object inSequence = str(inSequence) #Do the following only when the length of sequence (inSequence) is #greater than or equal to length of original complement sequence #REVIEW This could be SLOW, need to think of a better way to do this. if len(inSequence) >= len(self._initial_complementSequence): for i in range(len(self._initial_complementSequence)): if self._initial_complementSequence[i] == \ MISSING_COMPLEMENTARY_STRAND_ATOM_SYMBOL: complementSequence += self._initial_complementSequence[i] else: complementSequence += getComplementSequence(inSequence[i]) else: #Use complementSequence as a list object as we will need to modify #some of the charactes within it. We can't do for example #string[i] = 'X' as it is not permitted in python. So we will first #treat the complementSequence as a list, do necessary modifications #to it and then convert is back to a string. #TODO: see if re.sub or re.subn can be used directly to replace #some characters (the reason re.suib is not used here is that #it does find and repalce for all matching patterns, which we don't # want here ) complementSequence = list(self._initial_complementSequence) for i in range(len(inSequence)): if complementSequence[i] != MISSING_COMPLEMENTARY_STRAND_ATOM_SYMBOL: complementSequence[i] = getComplementSequence(inSequence[i]) #Now there is additinal complementary sequence (because lenght of the #main sequence provided is less than the 'original complementary #sequence' (or the 'original main strand sequence) . In this case, #for the remaining complementary sequence, we will use unassigned #base symbol 'X' . #Example: If user starts editing a strand, #the initial strand sequence and its complement are shown in the #sequence editor. Now, the user deletes some characters from the #main sequence (using , e.g. backspace in sequence text edit to #delete those), here, we will assume that this deletion of a #character is as good as making it an unassigned base 'X', #so its new complement will be of course 'X' --Ninad 2008-04-10 extra_length = len(complementSequence) - len(inSequence) #do the above mentioned replacement count = extra_length while count > 0: if complementSequence[-count] != MISSING_COMPLEMENTARY_STRAND_ATOM_SYMBOL: complementSequence[-count] = 'X' count -= 1 #convert the complement sequence back to a string complementSequence = ''.join(complementSequence) return complementSequence
def setStrandSequence(self, sequenceString, complement=True): """ Set the strand sequence i.e.assign the baseNames for the PAM atoms in this strand AND the complementary baseNames to the PAM atoms of the complementary strand ('mate strand') @param sequenceString: The sequence to be assigned to this strand chunk @type sequenceString: str """ #TO BE REVISED SEE A TODO COMMENT AT THE TOP sequenceString = str(sequenceString) #Remove whitespaces and tabs from the sequence string sequenceString = re.sub(r'\s', '', sequenceString) #May be we set this beginning with an atom marked by the #Dna Atom Marker in dna data model? -- Ninad 2008-01-11 # [yes, see my longer reply comment above -- Bruce 080117] atomList = [] rawAtomList = self.get_strand_atoms_in_bond_direction() atomList = filter(lambda atm: not atm.is_singlet(), rawAtomList) for atm in atomList: atomIndex = atomList.index(atm) if atomIndex > (len(sequenceString) - 1): #In this case, set an unassigned base ('X') for the remaining #atoms baseName = 'X' else: baseName = sequenceString[atomIndex] atm.setDnaBaseName(baseName) #Also assign the baseNames for the PAM atoms on the complementary #('mate') strand. if complement: strandAtomMate = atm.get_strand_atom_mate() complementBaseName = getComplementSequence(str(baseName)) if strandAtomMate is not None: strandAtomMate.setDnaBaseName(str(complementBaseName)) # piotr 080319: # Redraw the chunks in DNA display style # to reflect the sequence changes. from utilities.constants import diDNACYLINDER for c in self.members: if isinstance(c, DnaStrandChunk): if c.get_dispdef() == diDNACYLINDER: c.inval_display_list() # redraw the chunk # do the same for all complementary chunks prev_cc = None for atom in c.atoms.itervalues(): atm_mate = atom.get_strand_atom_mate() if atm_mate: cc = atm_mate.molecule if cc != prev_cc and isinstance( cc, DnaStrandChunk): prev_cc = cc if cc.get_dispdef() == diDNACYLINDER: cc.inval_display_list()
def getStrandSequenceAndItsComplement(self): """ Returns the strand sequence and the sequence of the complementary strands of the for the DnaStrandChunks within this DnaStrand group. If the complementary strand base atom is not found (e.g. a single stranded DNA), it returns the corresponding sequence character (for the complementary sequence) as '*' meaning its missing. @return: strand Sequence string @rtype: str @TODO: REFACTOR this. See how to split out common part of this method and self.getStrandSequence() Basically we could have simply replaced self.getStrandSequence with this method , but keeping self.getStrandSequence has an advantage that we don't compute the complement sequence (not sure if that would improve performance but, in theory, that will improve it.) One possibility is to pass an argument compute_complement_sequence = True' to this method. """ # TODO: Is there a way to make use of DnaStrandMarkers to get the strand # atoms in bond direction for this DnaStrandGroup?? # [A: they are not needed for that, but they could be used # to define an unambiguous sequence origin for a ring.] # # OR: does self.members alway return DnaStrandChunks in the # direction of bond direction? [A. no.] # # While the above questions remain unanswered, the following # makes use of a method self.get_strand_atoms_in_bond_direction # This method is mostly copied here from chunk class with some # modifications ... i.e. it accepts an atomList and uses a random # start atom within that list to find out the connected atoms # in the bond direction. Actually, sending the list # with *all atoms* of the strand isn't really necessary. All we are # interested in is a start Ss atom and bond direction which can # ideally be obtained by using even a single DnaStrandChunk within # this DnaStrand Group. For a short time, we will pass the whole # atom list. Will definitely be revised and refactored within the # coming days (need to discuss with Bruce) -- Ninad 2008-03-01 #see a to do comment about rawAtom list above sequenceString = '' complementSequenceString = '' atomList = self.get_strand_atoms_in_bond_direction() for atm in atomList: baseName = str(atm.getDnaBaseName()) complementBaseAtom = atm.get_strand_atom_mate() if baseName: sequenceString = sequenceString + baseName else: #What if baseName is not assigned due to some error?? Example #while reading in an mmp file. #As a fallback, we should assign unassigned base letter 'X' #to all the base atoms that don't have a baseletter defined #also, make sure that the atom is not a bondpoint. if atm.element.symbol != 'X': baseName = 'X' sequenceString = sequenceString + baseName complementBaseName = '' if complementBaseAtom: complementBaseName = getComplementSequence(baseName) else: #This means the complementary strand base atom is not present. #(its a single stranded dna) .So just indicate the complementary #sequence as '*' which means its missing. if atm.element.symbol != 'X': complementBaseName = MISSING_COMPLEMENTARY_STRAND_ATOM_SYMBOL if complementBaseName: complementSequenceString = complementSequenceString + \ complementBaseName return (sequenceString, complementSequenceString)