Ejemplo n.º 1
0
    def get_nearest_sse_residues(self, sse1, sse2):
        """
        Find the residue in each of the two SSEs that are nearest to each
        other and were used in building the SSE distance matrix.
        Uses the sse_residue_map built by calc_sse_dist_matrix() to do this;
        the idea is that nearest SSEs are found with get_min_distance_sse()
        or other functions using the SSE distance matrix, then if required
        this functino is used to retrieve the particular residues that
        were used in calculating the min distance between SSEs.
        
        Parameters:
           sse1 - PTNode for helix/strand 1 
           sse2 - PTNode for helix/strand 2

        Return value:
           tuple (res_seq_num_1, res_seq_num_2) where res_seq_num_1 and
           res_seq_num_2 are the residue sequence numbers in sse1 and sse2
           respectively that have min distance to each other (of all
           residues in sse1 and sse2)

        Uses data members:
            sse_residue_map - 
              dict of {(ptnode1, ptnode2) : (residue1, residue2)}
              which for every pair of sses gives the residue
              in each which are closest (used in the distance
              matrix). Note both (ptnode1,ptnode2) and
              (ptnode2,ptnode1) are stored, with residues
              swapped appropriately.
                             
        """
        (residue1, residue2) = self.sse_residue_map[sse1, sse2]
        # id of a residue in Bio.PDB is tuple (hetatm, resseqnum, icode)
        res_seq_num_1 = biopdbresid_to_pdbresseq(residue1.get_id())
        res_seq_num_2 = biopdbresid_to_pdbresseq(residue2.get_id())
        return (res_seq_num_1, res_seq_num_2)
Ejemplo n.º 2
0
    def get_nearest_sse_residues(self, sse1, sse2):
        """
        Find the residue in each of the two SSEs that are nearest to each
        other and were used in building the SSE distance matrix.
        Uses the sse_residue_map built by calc_sse_dist_matrix() to do this;
        the idea is that nearest SSEs are found with get_min_distance_sse()
        or other functions using the SSE distance matrix, then if required
        this functino is used to retrieve the particular residues that
        were used in calculating the min distance between SSEs.
        
        Parameters:
           sse1 - PTNode for helix/strand 1 
           sse2 - PTNode for helix/strand 2

        Return value:
           tuple (res_seq_num_1, res_seq_num_2) where res_seq_num_1 and
           res_seq_num_2 are the residue sequence numbers in sse1 and sse2
           respectively that have min distance to each other (of all
           residues in sse1 and sse2)

        Uses data members:
            sse_residue_map - 
              dict of {(ptnode1, ptnode2) : (residue1, residue2)}
              which for every pair of sses gives the residue
              in each which are closest (used in the distance
              matrix). Note both (ptnode1,ptnode2) and
              (ptnode2,ptnode1) are stored, with residues
              swapped appropriately.
                             
        """
        (residue1, residue2) = self.sse_residue_map[sse1, sse2]
        # id of a residue in Bio.PDB is tuple (hetatm, resseqnum, icode)
        res_seq_num_1 = biopdbresid_to_pdbresseq(residue1.get_id())
        res_seq_num_2 = biopdbresid_to_pdbresseq(residue2.get_id())
        return (res_seq_num_1, res_seq_num_2)
Ejemplo n.º 3
0
    def build_graph_from_secstruct(self,
                                   secstruct,
                                   domain,
                                   chainid=None,
                                   ignore_insertion_codes=False):
        """
        Build the list of nodes from the the supplied PTSecStruct
        object. 


        Parameters:
            secstruct - PTSecStruct (ptsecstruct.py) object to build from
            domain - PTDomain (ptdomain.py) object listing the segment(s)
                     that make up this domain (only one domain processed at a
                     time).
                     (in/out) NOTE: may be modified by having a segment
                     added if SSE is only partly in domain.
            chainid - chain identifier to build graph for only this chain,
                      or None for all chains (default)
            ignore_insertion_codes - If True, a hack to make it work with
                      PMML (only) which does not report insertion codes
                      unlike DSSP and STRIDE

        Uses member data (write):
            chain_dict - dict of { chainid : node_list } where node_list is
                          list of nodes in order, built in this function
            secstruct - keeps a pointer to the supplied secstruct

          (readonly):
            pdb_struct - The Bio.PDB parsed PDB struct (atomic co-ordinates)
                         for this protein.
            include_310_helices, include_pi_helices - if true, include
                         these kinds of helices.

        Raises exceptions:
           NoSSE_Exception if no helices or strands found
        
        Return value:
            None.
            
        """

        self.secstruct = secstruct

        helix_num = 1
        strand_num = 1

        num_helices_in_domain = 0
        num_strands_in_domain = 0

        #
        # Build dictionary mapping (chainid, pdb_resid) to index in residue_list
        # for ALL residues, not just those in this domain.
        #
        self.residue_list = self.get_residue_list(self.pdb_struct,
                                                  PTDomain(None, None))
        self.pdb_resid_dict = {}
        seq_indx = 0
        while seq_indx < len(self.residue_list):
            residue = self.residue_list[seq_indx]
            self.pdb_resid_dict[(ptsecstruct.pdb_chainid_to_stride_chainid(
                residue.get_full_id()[2]),
                                 biopdbresid_to_pdbresseq(
                                     residue.get_id(),
                                     ignore_insertion_codes))] = seq_indx
            seq_indx += 1

        # Note that now we are only adding elements in the supplied domain,
        # so the so-called 'chains' may really be segments, i.e. subsequences
        # of chains (rest of chain may be in other domain(s)

        self.chain_dict = {}  # dict of {chainid : node_list}

        for (start_chainid, start_resnum, end_chainid, end_resnum, helixtype) \
              in secstruct.helix_list:
            assert (start_chainid == end_chainid)  #helix must be same chain
            if chainid and chainid != start_chainid:
                continue  # chainid specified, skip ones not in that chain
            # will consider structures in domain if first residue is in domain
            if domain.is_in_domain(start_chainid,
                                   get_int_icode(start_resnum)[0]):
                num_helices_in_domain += 1
                if helixtype == "H":
                    idprefix = "ALPHAHELIX_"
                    htype = "ALPHA"
                    this_helix_num = helix_num
                    helix_num += 1
                elif helixtype == "I":
                    if not self.include_pi_helices:
                        continue
                    idprefix = "PIHELIX_"
                    htype = "PI"
                    this_helix_num = helix_num
                    helix_num += 1
                elif helixtype == "G":
                    if not self.include_310_helices:
                        continue
                    idprefix = "310HELIX_"
                    htype = "310"
                    this_helix_num = helix_num
                    helix_num += 1
                else:  # shouldn't happen
                    sys.stderr.write("ERROR: bad helix type " + helixtype +
                                     "\n")
                ah_node = PTNodeHelix(htype,
                                      idprefix + start_chainid+"_" +\
                                      str(this_helix_num),
                                      this_helix_num,
                                      start_resnum, end_resnum, start_chainid,
                                      domain.domainid,
                                      self.residue_list, self.pdb_resid_dict)
                if not self.chain_dict.has_key(start_chainid):
                    self.chain_dict[start_chainid] = []
                self.chain_dict[start_chainid].append(ah_node)

                # we must already have handled the case of SSEs that cross
                # domain boundaries (by moving whole SSE to one of the domains)
                assert (domain.is_in_domain(end_chainid,
                                            get_int_icode(end_resnum)[0]))

        for (start_chainid, start_resnum, end_chainid, end_resnum) \
                in secstruct.strand_list:
            assert (start_chainid == end_chainid)  # must be in same chain
            if chainid and chainid != start_chainid:
                continue  # chainid specified, skip ones not in that chain
            if domain.is_in_domain(start_chainid,
                                   get_int_icode(start_resnum)[0]):
                num_strands_in_domain += 1
                bs_node = PTNodeStrand("STRAND_"+start_chainid +"_"+\
                                       str(strand_num),
                                       strand_num,
                                       start_resnum, end_resnum, start_chainid,
                                       domain.domainid,
                                       self.residue_list,
                                       self.pdb_resid_dict)
                strand_num += 1
                if not self.chain_dict.has_key(start_chainid):
                    self.chain_dict[start_chainid] = []

                # we must already have handled the case of SSEs that cross
                # domain boundaries (by moving whole SSE to one of the domains)
                assert (domain.is_in_domain(end_chainid,
                                            get_int_icode(end_resnum)[0]))
                self.chain_dict[start_chainid].append(bs_node)

        # raise an exception if there are no SSEs at all in this domain
        if num_helices_in_domain == 0 and num_strands_in_domain == 0:
            raise NoSSE_Exception

        delete_chainid_list = []  # list of chainids to delete from chain_dict
        for (chainid, nodelist) in self.chain_dict.iteritems():
            # sort in order of start residue id ascending (all must be disjoint)
            nodelist.sort()

            if len(nodelist) < 1:
                # There are no SSEs in this chain, get rid of it.
                sys.stderr.write('WARNING: no SSEs in chain ' + chainid +
                                 '; chain ignored\n')
                delete_chainid_list.append(
                    chainid)  # don't delete while in loop
                continue
            else:
                # Check for chain with only SSEs that will not be drawn
                # (i.e. pi or 310 helices), and delete those too
                found_useful_node = False
                for ptnode in nodelist:
                    if isinstance(ptnode, PTNodeStrand):
                        found_useful_node = True
                        break
                    elif isinstance(ptnode, PTNodeHelix):
                        if ptnode.get_type() == "ALPHA":
                            found_useful_node = True
                            break
                        elif ((ptnode.get_type() == "310"
                               and self.include_310_helices)
                              or (ptnode.get_type() == "PI"
                                  and self.include_pi_helices)):
                            found_useful_node = True
                            break
                if not found_useful_node:
                    sys.stderr.write(
                        'WARNING: only pi or 310 helices in chain ' + chainid +
                        '; chain ignored\n')
                    delete_chainid_list.append(chainid)
                    continue

        # delete chains from chain_dict that were marked earlier for deletion
        for chainid in delete_chainid_list:
            self.chain_dict.pop(chainid)

        # -------------------------------------------------------------------

        # This is needed only for labelling sheets for HH and KK codes
        # (see dfs_strands() etc. below)

        # add edges for hydrogen bonds
        # uses secstruct and chainid member data
        # these are used for determining which side bridge partners are
        # on (and also for drawing a hydrogen bond graph if requested)
        self.add_hbond_edges_from_secstruct()

        # add edges for bridge partners
        # uses secstruct and chainid member data
        self.add_bridge_edges_from_secstruct()

        #---------------------------------------------------------------------

        # for sequential numbering, we'll build this dictionary mapping
        # sequential number (note NOT restarting for each chain)
        # to PTNode
        # so that sequential numbers as used in ptgraph2 -b sequential
        # option.
        # this is a dictionary of { seqnum : PTNode }
        self.seqnum2node = {}
        for (seqnum, node) in \
            enumerate([node for node in self.iter_nodes() if \
                       not ( (isinstance(node, PTNodeTerminus)) or
                              (isinstance(node, PTNodeHelix) and
                               ( (node.get_type() == "310" and
                                  not self.include_310_helices) or
                                 (node.get_type() == "PI" and
                                  not self.include_pi_helices) ) ) ) ]):
            self.seqnum2node[seqnum + 1] = node  # start at 1 not 0
    def build_graph_from_secstruct(self, secstruct, domain, chainid=None, ignore_insertion_codes=False):
        """
        Build the list of nodes from the the supplied PTSecStruct
        object. 


        Parameters:
            secstruct - PTSecStruct (ptsecstruct.py) object to build from
            domain - PTDomain (ptdomain.py) object listing the segment(s)
                     that make up this domain (only one domain processed at a
                     time).
                     (in/out) NOTE: may be modified by having a segment
                     added if SSE is only partly in domain.
            chainid - chain identifier to build graph for only this chain,
                      or None for all chains (default)
            ignore_insertion_codes - If True, a hack to make it work with
                      PMML (only) which does not report insertion codes
                      unlike DSSP and STRIDE

        Uses member data (write):
            chain_dict - dict of { chainid : node_list } where node_list is
                          list of nodes in order, built in this function
            secstruct - keeps a pointer to the supplied secstruct

          (readonly):
            pdb_struct - The Bio.PDB parsed PDB struct (atomic co-ordinates)
                         for this protein.
            include_310_helices, include_pi_helices - if true, include
                         these kinds of helices.

        Raises exceptions:
           NoSSE_Exception if no helices or strands found
        
        Return value:
            None.
            
        """

        self.secstruct = secstruct

        helix_num = 1
        strand_num = 1

        num_helices_in_domain = 0
        num_strands_in_domain = 0

        #
        # Build dictionary mapping (chainid, pdb_resid) to index in residue_list
        # for ALL residues, not just those in this domain.
        #
        self.residue_list = self.get_residue_list(self.pdb_struct, PTDomain(None, None))
        self.pdb_resid_dict = {}
        seq_indx = 0
        while seq_indx < len(self.residue_list):
            residue = self.residue_list[seq_indx]
            self.pdb_resid_dict[
                (
                    ptsecstruct.pdb_chainid_to_stride_chainid(residue.get_full_id()[2]),
                    biopdbresid_to_pdbresseq(residue.get_id(), ignore_insertion_codes),
                )
            ] = seq_indx
            seq_indx += 1

        # Note that now we are only adding elements in the supplied domain,
        # so the so-called 'chains' may really be segments, i.e. subsequences
        # of chains (rest of chain may be in other domain(s)

        self.chain_dict = {}  # dict of {chainid : node_list}

        for (start_chainid, start_resnum, end_chainid, end_resnum, helixtype) in secstruct.helix_list:
            assert start_chainid == end_chainid  # helix must be same chain
            if chainid and chainid != start_chainid:
                continue  # chainid specified, skip ones not in that chain
            # will consider structures in domain if first residue is in domain
            if domain.is_in_domain(start_chainid, get_int_icode(start_resnum)[0]):
                num_helices_in_domain += 1
                if helixtype == "H":
                    idprefix = "ALPHAHELIX_"
                    htype = "ALPHA"
                    this_helix_num = helix_num
                    helix_num += 1
                elif helixtype == "I":
                    if not self.include_pi_helices:
                        continue
                    idprefix = "PIHELIX_"
                    htype = "PI"
                    this_helix_num = helix_num
                    helix_num += 1
                elif helixtype == "G":
                    if not self.include_310_helices:
                        continue
                    idprefix = "310HELIX_"
                    htype = "310"
                    this_helix_num = helix_num
                    helix_num += 1
                else:  # shouldn't happen
                    sys.stderr.write("ERROR: bad helix type " + helixtype + "\n")
                ah_node = PTNodeHelix(
                    htype,
                    idprefix + start_chainid + "_" + str(this_helix_num),
                    this_helix_num,
                    start_resnum,
                    end_resnum,
                    start_chainid,
                    domain.domainid,
                    self.residue_list,
                    self.pdb_resid_dict,
                )
                if not self.chain_dict.has_key(start_chainid):
                    self.chain_dict[start_chainid] = []
                self.chain_dict[start_chainid].append(ah_node)

                # we must already have handled the case of SSEs that cross
                # domain boundaries (by moving whole SSE to one of the domains)
                assert domain.is_in_domain(end_chainid, get_int_icode(end_resnum)[0])

        for (start_chainid, start_resnum, end_chainid, end_resnum) in secstruct.strand_list:
            assert start_chainid == end_chainid  # must be in same chain
            if chainid and chainid != start_chainid:
                continue  # chainid specified, skip ones not in that chain
            if domain.is_in_domain(start_chainid, get_int_icode(start_resnum)[0]):
                num_strands_in_domain += 1
                bs_node = PTNodeStrand(
                    "STRAND_" + start_chainid + "_" + str(strand_num),
                    strand_num,
                    start_resnum,
                    end_resnum,
                    start_chainid,
                    domain.domainid,
                    self.residue_list,
                    self.pdb_resid_dict,
                )
                strand_num += 1
                if not self.chain_dict.has_key(start_chainid):
                    self.chain_dict[start_chainid] = []

                # we must already have handled the case of SSEs that cross
                # domain boundaries (by moving whole SSE to one of the domains)
                assert domain.is_in_domain(end_chainid, get_int_icode(end_resnum)[0])
                self.chain_dict[start_chainid].append(bs_node)

        # raise an exception if there are no SSEs at all in this domain
        if num_helices_in_domain == 0 and num_strands_in_domain == 0:
            raise NoSSE_Exception

        delete_chainid_list = []  # list of chainids to delete from chain_dict
        for (chainid, nodelist) in self.chain_dict.iteritems():
            # sort in order of start residue id ascending (all must be disjoint)
            nodelist.sort()

            if len(nodelist) < 1:
                # There are no SSEs in this chain, get rid of it.
                sys.stderr.write("WARNING: no SSEs in chain " + chainid + "; chain ignored\n")
                delete_chainid_list.append(chainid)  # don't delete while in loop
                continue
            else:
                # Check for chain with only SSEs that will not be drawn
                # (i.e. pi or 310 helices), and delete those too
                found_useful_node = False
                for ptnode in nodelist:
                    if isinstance(ptnode, PTNodeStrand):
                        found_useful_node = True
                        break
                    elif isinstance(ptnode, PTNodeHelix):
                        if ptnode.get_type() == "ALPHA":
                            found_useful_node = True
                            break
                        elif (ptnode.get_type() == "310" and self.include_310_helices) or (
                            ptnode.get_type() == "PI" and self.include_pi_helices
                        ):
                            found_useful_node = True
                            break
                if not found_useful_node:
                    sys.stderr.write("WARNING: only pi or 310 helices in chain " + chainid + "; chain ignored\n")
                    delete_chainid_list.append(chainid)
                    continue

        # delete chains from chain_dict that were marked earlier for deletion
        for chainid in delete_chainid_list:
            self.chain_dict.pop(chainid)

        # -------------------------------------------------------------------

        # This is needed only for labelling sheets for HH and KK codes
        # (see dfs_strands() etc. below)

        # add edges for hydrogen bonds
        # uses secstruct and chainid member data
        # these are used for determining which side bridge partners are
        # on (and also for drawing a hydrogen bond graph if requested)
        self.add_hbond_edges_from_secstruct()

        # add edges for bridge partners
        # uses secstruct and chainid member data
        self.add_bridge_edges_from_secstruct()

        # ---------------------------------------------------------------------

        # for sequential numbering, we'll build this dictionary mapping
        # sequential number (note NOT restarting for each chain)
        # to PTNode
        # so that sequential numbers as used in ptgraph2 -b sequential
        # option.
        # this is a dictionary of { seqnum : PTNode }
        self.seqnum2node = {}
        for (seqnum, node) in enumerate(
            [
                node
                for node in self.iter_nodes()
                if not (
                    (isinstance(node, PTNodeTerminus))
                    or (
                        isinstance(node, PTNodeHelix)
                        and (
                            (node.get_type() == "310" and not self.include_310_helices)
                            or (node.get_type() == "PI" and not self.include_pi_helices)
                        )
                    )
                )
            ]
        ):
            self.seqnum2node[seqnum + 1] = node  # start at 1 not 0