Example #1
0
def tuplecmp(tup1, tup2):
    """
    Comparison function for (chain, pdb_seqres_strt, pdb_seqres_end, type)
    tuples used by sort in PTSecStruct.check_validity() and
    PTSecStruct.__str__() functions.
    """
    if tup1[0] < tup2[0]:
        return -1
    elif tup1[0] > tup2[0]:
        return 1
    else:
        return pdb_res_seq_cmp(tup1[1], tup2[1])
Example #2
0
def tuplecmp(tup1, tup2):
    """
    Comparison function for (chain, pdb_seqres_strt, pdb_seqres_end, type)
    tuples used by sort in PTSecStruct.check_validity() and
    PTSecStruct.__str__() functions.
    """
    if tup1[0] < tup2[0]:
        return -1
    elif tup1[0] > tup2[0]:
        return 1
    else:
        return pdb_res_seq_cmp(tup1[1], tup2[1])
Example #3
0
    def check_validity_and_fix(self):
        """
        Check for overlapping secondary structures. This happens for
        example in the PDB HELIX records for 1DLC.  In such a case we
        recover from it in for example this case
        by adding or subtracting one to start/end of ovlerlapping
        HELIX records,

        Parameters:
          None
        Return value:
          True if OK, False if invalid (overlapping structures)
          (Now returns True if it has fixed up overlaps itself)
        Uses data members (READ/WRITE):
           helix_list, strand_list
            (start and end in helix and strand tuples may be modified;
            lists are sorted by increasing residue sequence number)
        """
        helices = [(chain, start, end, endchain, 'H', htype)
                   for (chain, start, endchain, end, htype) in self.helix_list]
        strands = [(chain, start, end, endchain, 'E', None)
                   for (chain, start, endchain, end) in self.strand_list]
        sselist = helices + strands
        sselist.sort(cmp=tuplecmp)
        is_valid = True
        for i in xrange(1, len(sselist)):
            sse = sselist[i]
            prevsse = sselist[i - 1]
            if (prevsse[0] == sse[0]
                    and pdb_res_seq_cmp(sse[1], prevsse[2]) <= 0):
                sys.stderr.write('WARNING: PDB has overlapping SSE definitions'
                                 ' ' + str(prevsse) + ' and ' + str(sse) +
                                 ': ')
                # remove overlap by shortening longer one and lengthing
                # shorter one
                # FIXME: this is ignoring insertion codes etc., really
                # should convert to proper sequential residue sequence numbers
                # to do this
                (prevsse_start,
                 prevsse_start_icode) = get_int_icode(prevsse[1])
                (prevsse_end, prevsse_end_icode) = get_int_icode(prevsse[2])
                (sse_start, sse_start_icode) = get_int_icode(sse[1])
                (sse_end, sse_end_icode) = get_int_icode(sse[2])
                if (prevsse_end_icode or sse_start_icode):
                    sys.stderr.write('contains insertion codes, giving up\n')
                    is_valid = False
                    continue
                prevsse_len = prevsse_end - prevsse_start + 1
                sse_len = sse_end - sse_start + 1
                overlap = prevsse_end - sse_start + 1
                if sse_len > prevsse_len:
                    sse_start += overlap
                else:
                    prevsse_end -= overlap
                sselist[i] = (sse[0], str(sse_start), str(sse_end), sse[3],
                              sse[4], sse[5])
                sselist[i - 1] = (prevsse[0], str(prevsse_start),
                                  str(prevsse_end), prevsse[3], prevsse[4],
                                  prevsse[5])
                sys.stderr.write('changed to ' + str(sselist[i - 1]) +
                                 ' and ' + str(sselist[i]) + '\n')
            i += 1

        # rebuild the helix_list and strand_list with our modified tuples
        self.helix_list = [(chain, start, endchain, end, htype)
                           for (chain, start, end, endchain, ssetype,
                                htype) in sselist if ssetype == 'H']
        self.strand_list = [(chain, start, endchain, end)
                            for (chain, start, end, endchain, ssetype,
                                 htype) in sselist if ssetype == 'E']
        return is_valid
Example #4
0
    def check_validity_and_fix(self):
        """
        Check for overlapping secondary structures. This happens for
        example in the PDB HELIX records for 1DLC.  In such a case we
        recover from it in for example this case
        by adding or subtracting one to start/end of ovlerlapping
        HELIX records,

        Parameters:
          None
        Return value:
          True if OK, False if invalid (overlapping structures)
          (Now returns True if it has fixed up overlaps itself)
        Uses data members (READ/WRITE):
           helix_list, strand_list
            (start and end in helix and strand tuples may be modified;
            lists are sorted by increasing residue sequence number)
        """
        helices = [ (chain, start, end, endchain, 'H', htype) 
                    for (chain, start, endchain, end, htype)
                    in self.helix_list ]
        strands = [ (chain, start, end, endchain, 'E', None)
                    for (chain, start, endchain, end)
                    in self.strand_list ]
        sselist = helices + strands
        sselist.sort(cmp=tuplecmp)
        is_valid = True
        for i in xrange(1, len(sselist)):
            sse = sselist[i]
            prevsse = sselist[i-1]
            if (prevsse[0] == sse[0] and
                pdb_res_seq_cmp(sse[1], prevsse[2]) <= 0):
                sys.stderr.write('WARNING: PDB has overlapping SSE definitions'
                                 ' ' + str(prevsse) + ' and ' + str(sse) + ': ')
                # remove overlap by shortening longer one and lengthing
                # shorter one
                # FIXME: this is ignoring insertion codes etc., really
                # should convert to proper sequential residue sequence numbers
                # to do this
                (prevsse_start,prevsse_start_icode) = get_int_icode(prevsse[1])
                (prevsse_end,prevsse_end_icode) = get_int_icode(prevsse[2])
                (sse_start,sse_start_icode) = get_int_icode(sse[1])
                (sse_end,sse_end_icode) = get_int_icode(sse[2])
                if (prevsse_end_icode or sse_start_icode):
                    sys.stderr.write('contains insertion codes, giving up\n')
                    is_valid = False
                    continue
                prevsse_len = prevsse_end - prevsse_start + 1
                sse_len = sse_end - sse_start + 1
                overlap = prevsse_end - sse_start + 1
                if sse_len > prevsse_len:
                    sse_start += overlap
                else:
                    prevsse_end -= overlap
                sselist[i] = (sse[0],str(sse_start),str(sse_end),
                               sse[3],sse[4],sse[5])
                sselist[i-1] = (prevsse[0],str(prevsse_start),str(prevsse_end),
                                 prevsse[3],prevsse[4],prevsse[5])
                sys.stderr.write('changed to ' + str(sselist[i-1]) + ' and ' +
                                 str(sselist[i]) + '\n')
            i += 1

        # rebuild the helix_list and strand_list with our modified tuples
        self.helix_list = [ (chain, start, endchain, end, htype)
                            for (chain, start, end, endchain, ssetype, htype)
                            in sselist if ssetype == 'H' ]
        self.strand_list = [ (chain, start, endchain, end) 
                             for (chain, start, end, endchain, ssetype, htype)
                             in sselist if ssetype == 'E' ]
        return is_valid