def get_residue_list(self, pdb_struct, domain, getchainid=None): """ Return list of Bio.PDB Residue objects in this domain, and optionally in the specified chain., Parameters: pdb_struct - Bio.PDB parsed PDB struct for the protein domain - PTDomain (ptdomain.py) object listing the segment(s) that make up this domain (only one domain processed at a time). getchainid - chain identifier to get residues in (default None - all chains). Return value: list of Bio.PDB Residue objects in the domain (and optionally chain). Raises exceptions: NoSSE_Exception for empty structure (happens eg on d1oayi_.ent) """ residue_list = [] try: pdb_model = self.pdb_struct[0] # TODO always using model 0 for now except KeyError: raise NoSSE_Exception for chain in pdb_model: chainid = ptsecstruct.pdb_chainid_to_stride_chainid(chain.get_id()) if getchainid and getchainid != chainid: continue # this is not the chain we want # Build a list of Bio.PDB Residue objects that are in this # domain. # id of a residue in Bio.PDB is tuple (hetatm, resseqnum, icode) # so we choose those where residue PDB number # (in the current chain) is in the domain. # TODO: maybe should use polypeptide builder for this instead # (and indeed should probably use it right from the beginning) - residue_list += [ residue for residue in chain.get_unpacked_list() if is_aa(residue) and domain.is_in_domain(chainid, residue.get_id()[1]) ] if getchainid: break # if getchainid specified, we now have it so can quit return residue_list
def get_residue_list(self, pdb_struct, domain, getchainid=None): """ Return list of Bio.PDB Residue objects in this domain, and optionally in the specified chain., Parameters: pdb_struct - Bio.PDB parsed PDB struct for the protein domain - PTDomain (ptdomain.py) object listing the segment(s) that make up this domain (only one domain processed at a time). getchainid - chain identifier to get residues in (default None - all chains). Return value: list of Bio.PDB Residue objects in the domain (and optionally chain). Raises exceptions: NoSSE_Exception for empty structure (happens eg on d1oayi_.ent) """ residue_list = [] try: pdb_model = self.pdb_struct[0] # TODO always using model 0 for now except KeyError: raise NoSSE_Exception for chain in pdb_model: chainid = ptsecstruct.pdb_chainid_to_stride_chainid(chain.get_id()) if getchainid and getchainid != chainid: continue # this is not the chain we want # Build a list of Bio.PDB Residue objects that are in this # domain. # id of a residue in Bio.PDB is tuple (hetatm, resseqnum, icode) # so we choose those where residue PDB number # (in the current chain) is in the domain. # TODO: maybe should use polypeptide builder for this instead # (and indeed should probably use it right from the beginning) - residue_list += [ residue for residue in chain.get_unpacked_list() if is_aa(residue) and domain.is_in_domain(chainid, residue.get_id()[1]) ] if getchainid: break # if getchainid specified, we now have it so can quit return residue_list
def accept_residue(self, residue): """ overrides the base accept_residue() function to accept only residues in our domain. Also reject HETATMS. Paramteters: residue - Bio.PDB Residue object of residue to test Return value: 1 to accept residue, 0 to reject. """ chain = residue.get_parent() chainid = pdb_chainid_to_stride_chainid(chain.get_id()) # id of a residue in Bio.PDB is tuple (hetatm, resseqnum, icode) # so we choose those where chain and residue PDB number # is in the domain. resnum = residue.get_id()[1] if (self.domain.is_in_domain(chainid, resnum) and residue.get_id()[0] == ' '): return 1 else: return 0
def accept_residue(self, residue): """ overrides the base accept_residue() function to accept only residues in our domain. Also reject HETATMS. Paramteters: residue - Bio.PDB Residue object of residue to test Return value: 1 to accept residue, 0 to reject. """ chain = residue.get_parent() chainid = pdb_chainid_to_stride_chainid(chain.get_id()) # id of a residue in Bio.PDB is tuple (hetatm, resseqnum, icode) # so we choose those where chain and residue PDB number # is in the domain. resnum = residue.get_id()[1] if (self.domain.is_in_domain(chainid, resnum) and residue.get_id()[0] == ' '): return 1 else: return 0
def build_graph_from_secstruct(self, secstruct, domain, chainid=None, ignore_insertion_codes=False): """ Build the list of nodes from the the supplied PTSecStruct object. Parameters: secstruct - PTSecStruct (ptsecstruct.py) object to build from domain - PTDomain (ptdomain.py) object listing the segment(s) that make up this domain (only one domain processed at a time). (in/out) NOTE: may be modified by having a segment added if SSE is only partly in domain. chainid - chain identifier to build graph for only this chain, or None for all chains (default) ignore_insertion_codes - If True, a hack to make it work with PMML (only) which does not report insertion codes unlike DSSP and STRIDE Uses member data (write): chain_dict - dict of { chainid : node_list } where node_list is list of nodes in order, built in this function secstruct - keeps a pointer to the supplied secstruct (readonly): pdb_struct - The Bio.PDB parsed PDB struct (atomic co-ordinates) for this protein. include_310_helices, include_pi_helices - if true, include these kinds of helices. Raises exceptions: NoSSE_Exception if no helices or strands found Return value: None. """ self.secstruct = secstruct helix_num = 1 strand_num = 1 num_helices_in_domain = 0 num_strands_in_domain = 0 # # Build dictionary mapping (chainid, pdb_resid) to index in residue_list # for ALL residues, not just those in this domain. # self.residue_list = self.get_residue_list(self.pdb_struct, PTDomain(None, None)) self.pdb_resid_dict = {} seq_indx = 0 while seq_indx < len(self.residue_list): residue = self.residue_list[seq_indx] self.pdb_resid_dict[(ptsecstruct.pdb_chainid_to_stride_chainid( residue.get_full_id()[2]), biopdbresid_to_pdbresseq( residue.get_id(), ignore_insertion_codes))] = seq_indx seq_indx += 1 # Note that now we are only adding elements in the supplied domain, # so the so-called 'chains' may really be segments, i.e. subsequences # of chains (rest of chain may be in other domain(s) self.chain_dict = {} # dict of {chainid : node_list} for (start_chainid, start_resnum, end_chainid, end_resnum, helixtype) \ in secstruct.helix_list: assert (start_chainid == end_chainid) #helix must be same chain if chainid and chainid != start_chainid: continue # chainid specified, skip ones not in that chain # will consider structures in domain if first residue is in domain if domain.is_in_domain(start_chainid, get_int_icode(start_resnum)[0]): num_helices_in_domain += 1 if helixtype == "H": idprefix = "ALPHAHELIX_" htype = "ALPHA" this_helix_num = helix_num helix_num += 1 elif helixtype == "I": if not self.include_pi_helices: continue idprefix = "PIHELIX_" htype = "PI" this_helix_num = helix_num helix_num += 1 elif helixtype == "G": if not self.include_310_helices: continue idprefix = "310HELIX_" htype = "310" this_helix_num = helix_num helix_num += 1 else: # shouldn't happen sys.stderr.write("ERROR: bad helix type " + helixtype + "\n") ah_node = PTNodeHelix(htype, idprefix + start_chainid+"_" +\ str(this_helix_num), this_helix_num, start_resnum, end_resnum, start_chainid, domain.domainid, self.residue_list, self.pdb_resid_dict) if not self.chain_dict.has_key(start_chainid): self.chain_dict[start_chainid] = [] self.chain_dict[start_chainid].append(ah_node) # we must already have handled the case of SSEs that cross # domain boundaries (by moving whole SSE to one of the domains) assert (domain.is_in_domain(end_chainid, get_int_icode(end_resnum)[0])) for (start_chainid, start_resnum, end_chainid, end_resnum) \ in secstruct.strand_list: assert (start_chainid == end_chainid) # must be in same chain if chainid and chainid != start_chainid: continue # chainid specified, skip ones not in that chain if domain.is_in_domain(start_chainid, get_int_icode(start_resnum)[0]): num_strands_in_domain += 1 bs_node = PTNodeStrand("STRAND_"+start_chainid +"_"+\ str(strand_num), strand_num, start_resnum, end_resnum, start_chainid, domain.domainid, self.residue_list, self.pdb_resid_dict) strand_num += 1 if not self.chain_dict.has_key(start_chainid): self.chain_dict[start_chainid] = [] # we must already have handled the case of SSEs that cross # domain boundaries (by moving whole SSE to one of the domains) assert (domain.is_in_domain(end_chainid, get_int_icode(end_resnum)[0])) self.chain_dict[start_chainid].append(bs_node) # raise an exception if there are no SSEs at all in this domain if num_helices_in_domain == 0 and num_strands_in_domain == 0: raise NoSSE_Exception delete_chainid_list = [] # list of chainids to delete from chain_dict for (chainid, nodelist) in self.chain_dict.iteritems(): # sort in order of start residue id ascending (all must be disjoint) nodelist.sort() if len(nodelist) < 1: # There are no SSEs in this chain, get rid of it. sys.stderr.write('WARNING: no SSEs in chain ' + chainid + '; chain ignored\n') delete_chainid_list.append( chainid) # don't delete while in loop continue else: # Check for chain with only SSEs that will not be drawn # (i.e. pi or 310 helices), and delete those too found_useful_node = False for ptnode in nodelist: if isinstance(ptnode, PTNodeStrand): found_useful_node = True break elif isinstance(ptnode, PTNodeHelix): if ptnode.get_type() == "ALPHA": found_useful_node = True break elif ((ptnode.get_type() == "310" and self.include_310_helices) or (ptnode.get_type() == "PI" and self.include_pi_helices)): found_useful_node = True break if not found_useful_node: sys.stderr.write( 'WARNING: only pi or 310 helices in chain ' + chainid + '; chain ignored\n') delete_chainid_list.append(chainid) continue # delete chains from chain_dict that were marked earlier for deletion for chainid in delete_chainid_list: self.chain_dict.pop(chainid) # ------------------------------------------------------------------- # This is needed only for labelling sheets for HH and KK codes # (see dfs_strands() etc. below) # add edges for hydrogen bonds # uses secstruct and chainid member data # these are used for determining which side bridge partners are # on (and also for drawing a hydrogen bond graph if requested) self.add_hbond_edges_from_secstruct() # add edges for bridge partners # uses secstruct and chainid member data self.add_bridge_edges_from_secstruct() #--------------------------------------------------------------------- # for sequential numbering, we'll build this dictionary mapping # sequential number (note NOT restarting for each chain) # to PTNode # so that sequential numbers as used in ptgraph2 -b sequential # option. # this is a dictionary of { seqnum : PTNode } self.seqnum2node = {} for (seqnum, node) in \ enumerate([node for node in self.iter_nodes() if \ not ( (isinstance(node, PTNodeTerminus)) or (isinstance(node, PTNodeHelix) and ( (node.get_type() == "310" and not self.include_310_helices) or (node.get_type() == "PI" and not self.include_pi_helices) ) ) ) ]): self.seqnum2node[seqnum + 1] = node # start at 1 not 0
def build_graph_from_secstruct(self, secstruct, domain, chainid=None, ignore_insertion_codes=False): """ Build the list of nodes from the the supplied PTSecStruct object. Parameters: secstruct - PTSecStruct (ptsecstruct.py) object to build from domain - PTDomain (ptdomain.py) object listing the segment(s) that make up this domain (only one domain processed at a time). (in/out) NOTE: may be modified by having a segment added if SSE is only partly in domain. chainid - chain identifier to build graph for only this chain, or None for all chains (default) ignore_insertion_codes - If True, a hack to make it work with PMML (only) which does not report insertion codes unlike DSSP and STRIDE Uses member data (write): chain_dict - dict of { chainid : node_list } where node_list is list of nodes in order, built in this function secstruct - keeps a pointer to the supplied secstruct (readonly): pdb_struct - The Bio.PDB parsed PDB struct (atomic co-ordinates) for this protein. include_310_helices, include_pi_helices - if true, include these kinds of helices. Raises exceptions: NoSSE_Exception if no helices or strands found Return value: None. """ self.secstruct = secstruct helix_num = 1 strand_num = 1 num_helices_in_domain = 0 num_strands_in_domain = 0 # # Build dictionary mapping (chainid, pdb_resid) to index in residue_list # for ALL residues, not just those in this domain. # self.residue_list = self.get_residue_list(self.pdb_struct, PTDomain(None, None)) self.pdb_resid_dict = {} seq_indx = 0 while seq_indx < len(self.residue_list): residue = self.residue_list[seq_indx] self.pdb_resid_dict[ ( ptsecstruct.pdb_chainid_to_stride_chainid(residue.get_full_id()[2]), biopdbresid_to_pdbresseq(residue.get_id(), ignore_insertion_codes), ) ] = seq_indx seq_indx += 1 # Note that now we are only adding elements in the supplied domain, # so the so-called 'chains' may really be segments, i.e. subsequences # of chains (rest of chain may be in other domain(s) self.chain_dict = {} # dict of {chainid : node_list} for (start_chainid, start_resnum, end_chainid, end_resnum, helixtype) in secstruct.helix_list: assert start_chainid == end_chainid # helix must be same chain if chainid and chainid != start_chainid: continue # chainid specified, skip ones not in that chain # will consider structures in domain if first residue is in domain if domain.is_in_domain(start_chainid, get_int_icode(start_resnum)[0]): num_helices_in_domain += 1 if helixtype == "H": idprefix = "ALPHAHELIX_" htype = "ALPHA" this_helix_num = helix_num helix_num += 1 elif helixtype == "I": if not self.include_pi_helices: continue idprefix = "PIHELIX_" htype = "PI" this_helix_num = helix_num helix_num += 1 elif helixtype == "G": if not self.include_310_helices: continue idprefix = "310HELIX_" htype = "310" this_helix_num = helix_num helix_num += 1 else: # shouldn't happen sys.stderr.write("ERROR: bad helix type " + helixtype + "\n") ah_node = PTNodeHelix( htype, idprefix + start_chainid + "_" + str(this_helix_num), this_helix_num, start_resnum, end_resnum, start_chainid, domain.domainid, self.residue_list, self.pdb_resid_dict, ) if not self.chain_dict.has_key(start_chainid): self.chain_dict[start_chainid] = [] self.chain_dict[start_chainid].append(ah_node) # we must already have handled the case of SSEs that cross # domain boundaries (by moving whole SSE to one of the domains) assert domain.is_in_domain(end_chainid, get_int_icode(end_resnum)[0]) for (start_chainid, start_resnum, end_chainid, end_resnum) in secstruct.strand_list: assert start_chainid == end_chainid # must be in same chain if chainid and chainid != start_chainid: continue # chainid specified, skip ones not in that chain if domain.is_in_domain(start_chainid, get_int_icode(start_resnum)[0]): num_strands_in_domain += 1 bs_node = PTNodeStrand( "STRAND_" + start_chainid + "_" + str(strand_num), strand_num, start_resnum, end_resnum, start_chainid, domain.domainid, self.residue_list, self.pdb_resid_dict, ) strand_num += 1 if not self.chain_dict.has_key(start_chainid): self.chain_dict[start_chainid] = [] # we must already have handled the case of SSEs that cross # domain boundaries (by moving whole SSE to one of the domains) assert domain.is_in_domain(end_chainid, get_int_icode(end_resnum)[0]) self.chain_dict[start_chainid].append(bs_node) # raise an exception if there are no SSEs at all in this domain if num_helices_in_domain == 0 and num_strands_in_domain == 0: raise NoSSE_Exception delete_chainid_list = [] # list of chainids to delete from chain_dict for (chainid, nodelist) in self.chain_dict.iteritems(): # sort in order of start residue id ascending (all must be disjoint) nodelist.sort() if len(nodelist) < 1: # There are no SSEs in this chain, get rid of it. sys.stderr.write("WARNING: no SSEs in chain " + chainid + "; chain ignored\n") delete_chainid_list.append(chainid) # don't delete while in loop continue else: # Check for chain with only SSEs that will not be drawn # (i.e. pi or 310 helices), and delete those too found_useful_node = False for ptnode in nodelist: if isinstance(ptnode, PTNodeStrand): found_useful_node = True break elif isinstance(ptnode, PTNodeHelix): if ptnode.get_type() == "ALPHA": found_useful_node = True break elif (ptnode.get_type() == "310" and self.include_310_helices) or ( ptnode.get_type() == "PI" and self.include_pi_helices ): found_useful_node = True break if not found_useful_node: sys.stderr.write("WARNING: only pi or 310 helices in chain " + chainid + "; chain ignored\n") delete_chainid_list.append(chainid) continue # delete chains from chain_dict that were marked earlier for deletion for chainid in delete_chainid_list: self.chain_dict.pop(chainid) # ------------------------------------------------------------------- # This is needed only for labelling sheets for HH and KK codes # (see dfs_strands() etc. below) # add edges for hydrogen bonds # uses secstruct and chainid member data # these are used for determining which side bridge partners are # on (and also for drawing a hydrogen bond graph if requested) self.add_hbond_edges_from_secstruct() # add edges for bridge partners # uses secstruct and chainid member data self.add_bridge_edges_from_secstruct() # --------------------------------------------------------------------- # for sequential numbering, we'll build this dictionary mapping # sequential number (note NOT restarting for each chain) # to PTNode # so that sequential numbers as used in ptgraph2 -b sequential # option. # this is a dictionary of { seqnum : PTNode } self.seqnum2node = {} for (seqnum, node) in enumerate( [ node for node in self.iter_nodes() if not ( (isinstance(node, PTNodeTerminus)) or ( isinstance(node, PTNodeHelix) and ( (node.get_type() == "310" and not self.include_310_helices) or (node.get_type() == "PI" and not self.include_pi_helices) ) ) ) ] ): self.seqnum2node[seqnum + 1] = node # start at 1 not 0