Example #1
0
    def invokeAction(self, inActionName):
        """
        Applies an action on the current sequence displayed in the PM.

        @param inActionName: The action name.
        @type  inActionName: str

        @return: The sequence after the action has been applied.
        @rtype:  str
        """
        sequence, allKnown = self._getSequence()
        outResult = ""

        if inActionName == self._action_Complement:
            outResult = getComplementSequence(sequence)
        elif inActionName == self._action_Reverse:
            outResult = getReverseSequence(sequence)
        elif inActionName == self._action_ConvertUnrecognized:
            outResult = replaceUnrecognized(sequence, replaceBase='N')
            self.setSequence(outResult)
        elif inActionName == self._action_RemoveUnrecognized:
            outResult = replaceUnrecognized(sequence, replaceBase='')

        self.setSequence(outResult)

        return
Example #2
0
    def setStrandSequence(self, sequenceString, complement = True):
        """
        Set the strand sequence i.e.assign the baseNames for the PAM atoms in 
        this strand AND the complementary baseNames to the PAM atoms of the 
        complementary strand ('mate strand')
        @param sequenceString: The sequence to be assigned to this strand chunk
        @type sequenceString: str
        """      
        #TO BE REVISED SEE A TODO COMMENT AT THE TOP
        
        sequenceString = str(sequenceString)
        #Remove whitespaces and tabs from the sequence string
        sequenceString = re.sub(r'\s', '', sequenceString)

        #May be we set this beginning with an atom marked by the 
        #Dna Atom Marker in dna data model? -- Ninad 2008-01-11
        # [yes, see my longer reply comment above -- Bruce 080117]
        atomList = []     
        rawAtomList = self.get_strand_atoms_in_bond_direction()
        
        atomList = filter(lambda atm: not atm.is_singlet(), rawAtomList)
        
        for atm in atomList:   
            atomIndex = atomList.index(atm)
            if atomIndex > (len(sequenceString) - 1):
                #In this case, set an unassigned base ('X') for the remaining 
                #atoms
                baseName = 'X'
            else:
                baseName = sequenceString[atomIndex]

            atm.setDnaBaseName(baseName)

            #Also assign the baseNames for the PAM atoms on the complementary 
            #('mate') strand.
            if complement:
                
                strandAtomMate = atm.get_strand_atom_mate()
                complementBaseName= getComplementSequence(str(baseName))
                if strandAtomMate is not None:
                    strandAtomMate.setDnaBaseName(str(complementBaseName)) 

        # piotr 080319:
        # Redraw the chunks in DNA display style
        # to reflect the sequence changes.
        from utilities.constants import diDNACYLINDER
        for c in self.members: 
            if isinstance(c, DnaStrandChunk):
                if c.get_dispdef() == diDNACYLINDER:
                    c.inval_display_list() # redraw the chunk
                    # do the same for all complementary chunks
                    prev_cc = None
                    for atom in c.atoms.itervalues():
                        atm_mate = atom.get_strand_atom_mate()
                        if atm_mate:
                            cc = atm_mate.molecule
                            if cc!=prev_cc and isinstance(cc, DnaStrandChunk):
                                prev_cc = cc
                                if cc.get_dispdef() == diDNACYLINDER:
                                    cc.inval_display_list()
    def invokeAction( self, inActionName ):
        """
        Applies an action on the current sequence displayed in the PM.

        @param inActionName: The action name.
        @type  inActionName: str

        @return: The sequence after the action has been applied.
        @rtype:  str
        """
        sequence, allKnown = self._getSequence()
        outResult  =  ""

        if inActionName == self._action_Complement:
            outResult  =  getComplementSequence(sequence)
        elif inActionName == self._action_Reverse:
            outResult  =  getReverseSequence(sequence)
        elif inActionName == self._action_ConvertUnrecognized:
            outResult  =  replaceUnrecognized(sequence, replaceBase = 'N')
            self.setSequence( outResult )
        elif inActionName == self._action_RemoveUnrecognized:
            outResult  =  replaceUnrecognized(sequence, replaceBase = '')

        self.setSequence( outResult )

        return
Example #4
0
    def _determine_complementSequence(self, inSequence):
        """
        Determine the complementary sequence based on the main sequence.
        It does lots of thing than just obtaining the information by using
        'getComplementSequence'.

        Examples of what it does:

        1. Suppose you have a duplex with 10 basepairs.
        You are editing strand A and you lengthen it to create a sticky end.
        Lets assume that it is lengthened by 5 bases. Since there will be no
        complementary strand baseatoms for these, the sequence editor will show
        an asterisk('*'), indicating that its missing the strand mate base atom.

        Sequence Editor itself doesn't check each time if the strand mate is
        missing. Rather, it relies on what the caller supplied as the initial
        complement sequence. (@see: self._setComplementSequence) . The caller
        determines the sequence of the strand being edited and also its complement.
        If the complement doesn't exist, it replace the complement with a '*'
        and passes this information to the sequence editor. Everytime sequence
        editor is updating its sequnece, it updates the mate sequence and skips
        the positions marked '*' (by using self._initial_complementSequence),
        and thus those remain unaltered.

        If user enters a sequence which has more characters than the original
        sequence, then it doesn't update the complement portion of that
        extra portion of the sequence. This gives a visual indication of
        where the sequence ends. (see NFR bug 2787).

        Reversing the sequence also reverses the complement (including the '*'
        positions)

        @see Bug 2787 for details of the implementation.

        @see: self._setComplementSequence()
        @see: self._updateSequenceAndItsComplement()
        @see: self._setSequence()
        @see: DnaStrand_PropertyManager.updateSequence() (the caller)
        @see: Dna_Constants.MISSING_COMPLEMENTARY_STRAND_ATOM_SYMBOL
        @see: DnaStrand.getStrandSequenceAndItsComplement()

        """
        if not self._initial_complementSequence:
            #This is unlikely. Do nothing in this case.
            return ''

        complementSequence = ''

        #Make sure that the insequence is a string object
        inSequence = str(inSequence)

        #Do the following only when the length of sequence (inSequence) is
        #greater than or equal to length of original complement sequence

        #REVIEW This could be SLOW, need to think of a better way to do this.
        if len(inSequence) >= len(self._initial_complementSequence):
            for i in range(len(self._initial_complementSequence)):
                if self._initial_complementSequence[i] == \
                   MISSING_COMPLEMENTARY_STRAND_ATOM_SYMBOL:
                    complementSequence += self._initial_complementSequence[i]
                else:
                    complementSequence += getComplementSequence(inSequence[i])

        else:
            #Use complementSequence as a list object as we will need to modify
            #some of the charactes within it. We can't do for example
            #string[i] = 'X' as it is not permitted in python. So we will first
            #treat the complementSequence as a list, do necessary modifications
            #to it and then convert is back to a string.
            #TODO: see if re.sub or re.subn can be used directly to replace
            #some characters (the reason re.suib is not used here is that
            #it does find and repalce for all matching patterns, which we don't
            # want here )

            complementSequence = list(self._initial_complementSequence)

            for i in range(len(inSequence)):
                if complementSequence[
                        i] != MISSING_COMPLEMENTARY_STRAND_ATOM_SYMBOL:
                    complementSequence[i] = getComplementSequence(
                        inSequence[i])

            #Now there is additinal complementary sequence (because lenght of the
            #main sequence provided is less than the 'original complementary
            #sequence' (or the 'original main strand sequence) . In this case,
            #for the remaining complementary sequence, we will use unassigned
            #base symbol 'X' .
            #Example: If user starts editing a strand,
            #the initial strand sequence and its complement are shown in the
            #sequence editor. Now, the user deletes some characters from the
            #main sequence (using , e.g. backspace in sequence text edit to
            #delete those), here, we will assume that this deletion of a
            #character is as good as making it an unassigned base 'X',
            #so its new complement will be of course 'X' --Ninad 2008-04-10
            extra_length = len(complementSequence) - len(inSequence)
            #do the above mentioned replacement
            count = extra_length
            while count > 0:
                if complementSequence[
                        -count] != MISSING_COMPLEMENTARY_STRAND_ATOM_SYMBOL:
                    complementSequence[-count] = 'X'
                count -= 1

            #convert the complement sequence back to a string
            complementSequence = ''.join(complementSequence)

        return complementSequence
Example #5
0
    def getStrandSequenceAndItsComplement(self):
        """
        Returns the strand sequence  and the sequence of the complementary 
        strands of the for the DnaStrandChunks within this
        DnaStrand group. If the complementary strand base atom is not found 
        (e.g. a single stranded DNA), it returns the corresponding sequence
        character (for the complementary sequence) as '*' meaning its 
        missing.

        @return: strand Sequence string
        @rtype: str

        @TODO: REFACTOR this. See how to split out common part of 
        this method and self.getStrandSequence() Basically we could have simply
        replaced self.getStrandSequence with this method , but keeping
        self.getStrandSequence has an advantage that we don't compute the 
        complement sequence (not sure if that would improve performance but,
        in theory, that will improve it.) One possibility is to pass an argument 
        compute_complement_sequence = True' to this method. 
        """
        # TODO: Is there a way to make use of DnaStrandMarkers to get the strand
        #       atoms in bond direction for this DnaStrandGroup??
        #       [A: they are not needed for that, but they could be used
        #        to define an unambiguous sequence origin for a ring.]
        #       
        #       OR: does self.members alway return DnaStrandChunks in the 
        #       direction of bond direction? [A. no.]
        #       
        #       While the above questions remain unanswered, the following 
        #       makes use of a method self.get_strand_atoms_in_bond_direction 
        #       This method is mostly copied here from chunk class with some 
        #       modifications ... i.e. it accepts an atomList and uses a random 
        #       start atom within that list to find out the connected atoms 
        #       in the bond direction. Actually, sending the list 
        #       with *all atoms* of the strand isn't really necessary. All we are 
        #       interested in is a start Ss atom and bond direction which can 
        #       ideally be obtained by using even a single DnaStrandChunk within 
        #       this DnaStrand Group. For a short time, we will pass the whole 
        #       atom list. Will definitely be revised and refactored within the
        #       coming days (need to discuss with Bruce) -- Ninad 2008-03-01

        
        

        #see a to do comment about rawAtom list above

        sequenceString = ''  
        complementSequenceString = ''
        
        atomList = self.get_strand_atoms_in_bond_direction()
        for atm in atomList:

            baseName = str(atm.getDnaBaseName())
            complementBaseAtom = atm.get_strand_atom_mate()

            if baseName:
                sequenceString = sequenceString + baseName
            else:
                #What if baseName is not assigned due to some error?? Example
                #while reading in an mmp file. 
                #As a fallback, we should assign unassigned base letter 'X'
                #to all the base atoms that don't have a baseletter defined
                #also, make sure that the atom is not a bondpoint. 
                if atm.element.symbol != 'X':                    
                    baseName = 'X'
                    sequenceString = sequenceString + baseName

            complementBaseName = ''
            if complementBaseAtom:
                complementBaseName = getComplementSequence(baseName)

            else:
                #This means the complementary strand base atom is not present.
                #(its a single stranded dna) .So just indicate the complementary
                #sequence as '*' which means its missing.
                if atm.element.symbol != 'X':
                    complementBaseName = MISSING_COMPLEMENTARY_STRAND_ATOM_SYMBOL                
            if complementBaseName:            
                complementSequenceString = complementSequenceString + \
                                         complementBaseName 

        return (sequenceString, complementSequenceString)
Example #6
0
   def _determine_complementSequence(self, inSequence):
       """
       Determine the complementary sequence based on the main sequence. 
       It does lots of thing than just obtaining the information by using 
       'getComplementSequence'. 
       
       Examples of what it does: 
       
       1. Suppose you have a duplex with 10 basepairs. 
       You are editing strand A and you lengthen it to create a sticky end. 
       Lets assume that it is lengthened by 5 bases. Since there will be no 
       complementary strand baseatoms for these, the sequence editor will show 
       an asterisk('*'), indicating that its missing the strand mate base atom.
       
       Sequence Editor itself doesn't check each time if the strand mate is 
       missing. Rather, it relies on what the caller supplied as the initial 
       complement sequence. (@see: self._setComplementSequence) . The caller 
       determines the sequence of the strand being edited and also its complement.
       If the complement doesn't exist, it replace the complement with a '*' 
       and passes this information to the sequence editor. Everytime sequence
       editor is updating its sequnece, it updates the mate sequence and skips
       the positions marked '*' (by using self._initial_complementSequence), 
       and thus those remain unaltered. 
       
       If user enters a sequence which has more characters than the original
       sequence, then it doesn't update the complement portion of that
       extra portion of the sequence. This gives a visual indication of 
       where the sequence ends. (see NFR bug 2787). 
       
       Reversing the sequence also reverses the complement (including the '*'
       positions)
       
       @see Bug 2787 for details of the implementation. 
       
       @see: self._setComplementSequence()
       @see: self._updateSequenceAndItsComplement()
       @see: self._setSequence()
       @see: DnaStrand_PropertyManager.updateSequence() (the caller)
       @see: Dna_Constants.MISSING_COMPLEMENTARY_STRAND_ATOM_SYMBOL
       @see: DnaStrand.getStrandSequenceAndItsComplement()
       
       """
       if not self._initial_complementSequence:
           #This is unlikely. Do nothing in this case. 
           return ''
       
       complementSequence = ''
       
       #Make sure that the insequence is a string object
       inSequence = str(inSequence)
      
       #Do the following only when the length of sequence (inSequence) is 
       #greater than or equal to length of original complement sequence
       
       #REVIEW This could be SLOW, need to think of a better way to do this.
       if len(inSequence) >= len(self._initial_complementSequence):                
           for i in range(len(self._initial_complementSequence)):
               if self._initial_complementSequence[i] == \
                  MISSING_COMPLEMENTARY_STRAND_ATOM_SYMBOL:
                   complementSequence += self._initial_complementSequence[i]
               else:
                   complementSequence += getComplementSequence(inSequence[i])
                   
       else:
           #Use complementSequence as a list object as we will need to modify 
           #some of the charactes within it. We can't do for example
           #string[i] = 'X' as it is not permitted in python. So we will first 
           #treat the complementSequence as a list, do necessary modifications
           #to it and then convert is back to a string. 
           #TODO: see if re.sub or re.subn can be used directly to replace
           #some characters (the reason re.suib is not used here is that 
           #it does find and repalce for all matching patterns, which we don't
           # want here )
           
           complementSequence = list(self._initial_complementSequence)
           
           for i in range(len(inSequence)):
               if complementSequence[i] != MISSING_COMPLEMENTARY_STRAND_ATOM_SYMBOL:
                   complementSequence[i] = getComplementSequence(inSequence[i])
           
           #Now there is additinal complementary sequence (because lenght of the
           #main sequence provided is less than the 'original complementary
           #sequence' (or the 'original main strand sequence) . In this case, 
           #for the remaining complementary sequence, we will use unassigned
           #base symbol 'X' . 
           #Example: If user starts editing a strand, 
           #the initial strand sequence and its complement are shown in the
           #sequence editor. Now, the user deletes some characters from the 
           #main sequence (using , e.g. backspace in sequence text edit to 
           #delete those), here, we will assume that this deletion of a 
           #character is as good as making it an unassigned base 'X', 
           #so its new complement will be of course 'X' --Ninad 2008-04-10
           extra_length = len(complementSequence) - len(inSequence)
           #do the above mentioned replacement 
           count = extra_length
           while count > 0:
               if complementSequence[-count] != MISSING_COMPLEMENTARY_STRAND_ATOM_SYMBOL:
                   complementSequence[-count] = 'X'
               count -= 1                           
      
           #convert the complement sequence back to a string
           complementSequence = ''.join(complementSequence)
 
       return complementSequence
Example #7
0
    def setStrandSequence(self, sequenceString, complement=True):
        """
        Set the strand sequence i.e.assign the baseNames for the PAM atoms in 
        this strand AND the complementary baseNames to the PAM atoms of the 
        complementary strand ('mate strand')
        @param sequenceString: The sequence to be assigned to this strand chunk
        @type sequenceString: str
        """
        #TO BE REVISED SEE A TODO COMMENT AT THE TOP

        sequenceString = str(sequenceString)
        #Remove whitespaces and tabs from the sequence string
        sequenceString = re.sub(r'\s', '', sequenceString)

        #May be we set this beginning with an atom marked by the
        #Dna Atom Marker in dna data model? -- Ninad 2008-01-11
        # [yes, see my longer reply comment above -- Bruce 080117]
        atomList = []
        rawAtomList = self.get_strand_atoms_in_bond_direction()

        atomList = filter(lambda atm: not atm.is_singlet(), rawAtomList)

        for atm in atomList:
            atomIndex = atomList.index(atm)
            if atomIndex > (len(sequenceString) - 1):
                #In this case, set an unassigned base ('X') for the remaining
                #atoms
                baseName = 'X'
            else:
                baseName = sequenceString[atomIndex]

            atm.setDnaBaseName(baseName)

            #Also assign the baseNames for the PAM atoms on the complementary
            #('mate') strand.
            if complement:

                strandAtomMate = atm.get_strand_atom_mate()
                complementBaseName = getComplementSequence(str(baseName))
                if strandAtomMate is not None:
                    strandAtomMate.setDnaBaseName(str(complementBaseName))

        # piotr 080319:
        # Redraw the chunks in DNA display style
        # to reflect the sequence changes.
        from utilities.constants import diDNACYLINDER
        for c in self.members:
            if isinstance(c, DnaStrandChunk):
                if c.get_dispdef() == diDNACYLINDER:
                    c.inval_display_list()  # redraw the chunk
                    # do the same for all complementary chunks
                    prev_cc = None
                    for atom in c.atoms.itervalues():
                        atm_mate = atom.get_strand_atom_mate()
                        if atm_mate:
                            cc = atm_mate.molecule
                            if cc != prev_cc and isinstance(
                                    cc, DnaStrandChunk):
                                prev_cc = cc
                                if cc.get_dispdef() == diDNACYLINDER:
                                    cc.inval_display_list()
Example #8
0
    def getStrandSequenceAndItsComplement(self):
        """
        Returns the strand sequence  and the sequence of the complementary 
        strands of the for the DnaStrandChunks within this
        DnaStrand group. If the complementary strand base atom is not found 
        (e.g. a single stranded DNA), it returns the corresponding sequence
        character (for the complementary sequence) as '*' meaning its 
        missing.

        @return: strand Sequence string
        @rtype: str

        @TODO: REFACTOR this. See how to split out common part of 
        this method and self.getStrandSequence() Basically we could have simply
        replaced self.getStrandSequence with this method , but keeping
        self.getStrandSequence has an advantage that we don't compute the 
        complement sequence (not sure if that would improve performance but,
        in theory, that will improve it.) One possibility is to pass an argument 
        compute_complement_sequence = True' to this method. 
        """
        # TODO: Is there a way to make use of DnaStrandMarkers to get the strand
        #       atoms in bond direction for this DnaStrandGroup??
        #       [A: they are not needed for that, but they could be used
        #        to define an unambiguous sequence origin for a ring.]
        #
        #       OR: does self.members alway return DnaStrandChunks in the
        #       direction of bond direction? [A. no.]
        #
        #       While the above questions remain unanswered, the following
        #       makes use of a method self.get_strand_atoms_in_bond_direction
        #       This method is mostly copied here from chunk class with some
        #       modifications ... i.e. it accepts an atomList and uses a random
        #       start atom within that list to find out the connected atoms
        #       in the bond direction. Actually, sending the list
        #       with *all atoms* of the strand isn't really necessary. All we are
        #       interested in is a start Ss atom and bond direction which can
        #       ideally be obtained by using even a single DnaStrandChunk within
        #       this DnaStrand Group. For a short time, we will pass the whole
        #       atom list. Will definitely be revised and refactored within the
        #       coming days (need to discuss with Bruce) -- Ninad 2008-03-01

        #see a to do comment about rawAtom list above

        sequenceString = ''
        complementSequenceString = ''

        atomList = self.get_strand_atoms_in_bond_direction()
        for atm in atomList:

            baseName = str(atm.getDnaBaseName())
            complementBaseAtom = atm.get_strand_atom_mate()

            if baseName:
                sequenceString = sequenceString + baseName
            else:
                #What if baseName is not assigned due to some error?? Example
                #while reading in an mmp file.
                #As a fallback, we should assign unassigned base letter 'X'
                #to all the base atoms that don't have a baseletter defined
                #also, make sure that the atom is not a bondpoint.
                if atm.element.symbol != 'X':
                    baseName = 'X'
                    sequenceString = sequenceString + baseName

            complementBaseName = ''
            if complementBaseAtom:
                complementBaseName = getComplementSequence(baseName)

            else:
                #This means the complementary strand base atom is not present.
                #(its a single stranded dna) .So just indicate the complementary
                #sequence as '*' which means its missing.
                if atm.element.symbol != 'X':
                    complementBaseName = MISSING_COMPLEMENTARY_STRAND_ATOM_SYMBOL
            if complementBaseName:
                complementSequenceString = complementSequenceString + \
                                         complementBaseName

        return (sequenceString, complementSequenceString)