def build_peptides(self, entity, aa_only=1): """Build and return a list of Polypeptide objects. @param entity: polypeptides are searched for in this object @type entity: L{Structure}, L{Model} or L{Chain} @param aa_only: if 1, the residue needs to be a standard AA @type aa_only: int """ is_connected=self._is_connected accept=self._accept level=entity.get_level() # Decide which entity we are dealing with if level=="S": model=entity[0] chain_list=model.get_list() elif level=="M": chain_list=entity.get_list() elif level=="C": chain_list=[entity] else: raise PDBException("Entity should be Structure, Model or Chain.") pp_list=[] for chain in chain_list: chain_it=iter(chain) try: prev_res = chain_it.next() while not accept(prev_res, aa_only): prev_res = chain_it.next() except StopIteration: #No interesting residues at all in this chain continue pp=None for next_res in chain_it: if accept(prev_res, aa_only) \ and accept(next_res, aa_only) \ and is_connected(prev_res, next_res): if pp is None: pp=Polypeptide() pp.append(prev_res) pp_list.append(pp) pp.append(next_res) else: #Either too far apart, or one of the residues is unwanted. #End the current peptide pp=None prev_res=next_res return pp_list
def set_atoms(self, fixed, moving): """Extract coordinates from atoms and continue with coordinates""" if not len(fixed) == len(moving): raise PDBException( "Lists of fixed and moving atoms differ in size") l = len(fixed) fixedCoords = np.zeros((l, 3)) movingCoords = np.zeros((l, 3)) for i in range(l): fixedCoords[i] = fixed[i].get_coord() movingCoords[i] = moving[i].get_coord() pass self.set_coords(fixedCoords, movingCoords) return
def run(self): """Superimpose the coordinate sets.""" if self.coords is None or self.reference_coords is None: raise PDBException("No coordinates set.") coords = self.coords.copy() coords_ref = self.reference_coords.copy() # Center Coordinates com1 = np.mean(coords, axis=0) com2 = np.mean(coords_ref, axis=0) coords -= com1 coords_ref -= com2 (self.rms, self.rot, self.lquart) = qcp(coords_ref, coords, self._natoms) self.tran = com2 - np.dot(com1, self.rot)
def build_peptides(self, entity, aa_only=1): """ Build and return a list of Polypeptide objects. @param entity: polypeptides are searched for in this object @type entity: L{Structure}, L{Model} or L{Chain} @param aa_only: if 1, the residue needs to be a standard AA @type aa_only: int """ is_connected = self._is_connected accept = self._accept level = entity.get_level() # Decide wich entity we are dealing with if level == "S": model = entity[0] chain_list = model.get_list() elif level == "M": chain_list = entity.get_list() elif level == "C": chain_list = [entity] else: raise PDBException("Entity should be Structure, Model or Chain.") pp_list = [] for chain in chain_list: chain_it = iter(chain) prev = chain_it.next() pp = None for next in chain_it: if aa_only and not accept(prev): prev = next continue if is_connected(prev, next): if pp is None: pp = Polypeptide() pp.append(prev) pp_list.append(pp) pp.append(next) else: pp = None prev = next return pp_list
def get_guide_coord_from_structure(self, structure): """Return the coordinates of guide atoms in the structure. We use guide atoms (C-alpha and C4' atoms) since it is much faster than using all atoms in the calculation without a significant loss in accuracy. """ coords = [] # CE algorithm is sensitive to atom ordering. To reproduce Pymol # results, sort atoms by chain and then residue number. for chain in sorted(structure.get_chains()): for resid in sorted(chain, key=_RESID_SORTER): if "CA" in resid: coords.append(resid["CA"].coord.tolist()) elif "C4'" in resid: coords.append(resid["C4'"].coord.tolist()) if not coords: msg = f"Structure {structure.id} does not have any guide atoms." raise PDBException(msg) return coords
def set_atoms(self, fixed, moving): """Put (translate/rotate) the atoms in fixed on the atoms in moving, in such a way that the RMSD is minimized. :param fixed: list of (fixed) atoms :param moving: list of (moving) atoms :type fixed,moving: [L{Atom}, L{Atom},...] """ if not len(fixed) == len(moving): raise PDBException("Fixed and moving atom lists differ in size") length = len(fixed) fixed_coord = numpy.zeros((length, 3)) moving_coord = numpy.zeros((length, 3)) for i in range(0, length): fixed_coord[i] = fixed[i].get_coord() moving_coord[i] = moving[i].get_coord() sup = SVDSuperimposer() sup.set(fixed_coord, moving_coord) sup.run() self.rms = sup.get_rms() self.rotran = sup.get_rotran()
def atom_to_internal_coordinates(entity, allBonds=False): """Create/update internal coordinates from Atom X,Y,Z coordinates. Internal coordinates are bond length, angle and dihedral angles. :param Entity entity: Biopython PDB Entity object :param allBonds bool: default False include hedra and dihedra to close sidechain rings """ lev = entity.level if 'A' == lev: raise PDBException("Cannot generate internal coordinates for " "Atom alone") elif 'R' == lev: # works better at chain level but leave option here if not hasattr(entity, 'internal_coord'): entity.internal_coord = IC_Residue(entity) entity.internal_coord.dihedra_from_atoms(allBonds) elif 'C' == lev: _chn_atic(entity, allBonds) else: for chn in entity.get_chains(): _chn_atic(chn, allBonds)
def _map(self, model): """Map (PRIVATE). :param model: the model that will be mapped :type model: L{Model} """ ppb = PPBuilder() ppl = ppb.build_peptides(model) fd = {} for pp in ppl: try: # make fragments flist = _make_fragment_list(pp, self.flength) # classify fragments mflist = _map_fragment_list(flist, self.reflist) for i in range(0, len(pp)): res = pp[i] if i < self.edge: # start residues continue elif i >= (len(pp) - self.edge): # end residues continue else: # fragment index = i - self.edge assert (index >= 0) fd[res] = mflist[index] except PDBException as why: if why == 'CHAINBREAK': # Funny polypeptide - skip pass else: raise PDBException(why) return fd
def get_rotran(self): """Return right multiplying rotation matrix and translation vector.""" if self.rot is None: raise PDBException("Nothing is superimposed yet.") return self.rot, self.tran
def __init__(self, model, in_file, dssp="dssp", acc_array="Sander", file_type=""): """Create a DSSP object. Parameters ---------- model : Model The first model of the structure in_file : string Either a PDB file or a DSSP file. dssp : string The dssp executable (ie. the argument to subprocess) acc_array : string Accessible surface area (ASA) from either Miller et al. (1987), Sander & Rost (1994), or Wilke: Tien et al. 2013, as string Sander/Wilke/Miller. Defaults to Sander. file_type: string File type switch: either PDB, MMCIF or DSSP. Inferred from the file extension by default. """ self.residue_max_acc = residue_max_acc[acc_array] # create DSSP dictionary if file_type == "": file_type = os.path.splitext(in_file)[1][1:] file_type = file_type.upper() if file_type == "CIF": file_type = "MMCIF" assert file_type in [ "PDB", "MMCIF", "DSSP", ], "File type must be PDB, mmCIF or DSSP" # If the input file is a PDB or mmCIF file run DSSP and parse output: if file_type == "PDB" or file_type == "MMCIF": # Newer versions of DSSP program call the binary 'mkdssp', so # calling 'dssp' will not work in some operating systems # (Debian distribution of DSSP includes a symlink for 'dssp' argument) try: dssp_dict, dssp_keys = dssp_dict_from_pdb_file(in_file, dssp) except FileNotFoundError: if dssp == "dssp": dssp = "mkdssp" elif dssp == "mkdssp": dssp = "dssp" else: raise dssp_dict, dssp_keys = dssp_dict_from_pdb_file(in_file, dssp) # If the input file is a DSSP file just parse it directly: elif file_type == "DSSP": dssp_dict, dssp_keys = make_dssp_dict(in_file) dssp_map = {} dssp_list = [] def resid2code(res_id): """Serialize a residue's resseq and icode for easy comparison.""" return f"{res_id[1]}{res_id[2]}" # DSSP outputs label_asym_id from the mmCIF file as the chain ID # But MMCIFParser reads in the auth_asym_id # Here we create a dictionary to map label_asym_id to auth_asym_id # using the mmCIF file if file_type == "MMCIF": mmcif_dict = MMCIF2Dict(in_file) mmcif_chain_dict = {} for i, c in enumerate(mmcif_dict["_atom_site.label_asym_id"]): if c not in mmcif_chain_dict: mmcif_chain_dict[c] = mmcif_dict[ "_atom_site.auth_asym_id"][i] dssp_mapped_keys = [] # Now create a dictionary that maps Residue objects to # secondary structure and accessibility, and a list of # (residue, (secondary structure, accessibility)) tuples for key in dssp_keys: chain_id, res_id = key if file_type == "MMCIF": chain_id = mmcif_chain_dict[chain_id] dssp_mapped_keys.append((chain_id, res_id)) chain = model[chain_id] try: res = chain[res_id] except KeyError: # In DSSP, HET field is not considered in residue identifier. # Thus HETATM records may cause unnecessary exceptions. # (See 3jui chain A res 593.) # Try the lookup again with all HETATM other than water res_seq_icode = resid2code(res_id) for r in chain: if r.id[0] not in (" ", "W"): # Compare resseq + icode if resid2code(r.id) == res_seq_icode: # Found a matching residue res = r break else: raise KeyError(res_id) from None # For disordered residues of point mutations, Biopython uses the # last one as default, But DSSP takes the first one (alternative # location is blank, A or 1). See 1h9h chain E resi 22. # Here we select the res in which all atoms have altloc blank, A or # 1. If no such residues are found, simply use the first one appears # (as DSSP does). if res.is_disordered() == 2: for rk in res.disordered_get_id_list(): # All atoms in the disordered residue should have the same # altloc, so it suffices to check the altloc of the first # atom. altloc = res.child_dict[rk].get_list()[0].get_altloc() if altloc in tuple("A1 "): res.disordered_select(rk) break else: # Simply select the first one res.disordered_select(res.disordered_get_id_list()[0]) # Sometimes point mutations are put into HETATM and ATOM with altloc # 'A' and 'B'. # See 3piu chain A residue 273: # <Residue LLP het=H_LLP resseq=273 icode= > # <Residue LYS het= resseq=273 icode= > # DSSP uses the HETATM LLP as it has altloc 'A' # We check the altloc code here. elif res.is_disordered() == 1: # Check altloc of all atoms in the DisorderedResidue. If it # contains blank, A or 1, then use it. Otherwise, look for HET # residues of the same seq+icode. If not such HET residues are # found, just accept the current one. altlocs = {a.get_altloc() for a in res.get_unpacked_list()} if altlocs.isdisjoint("A1 "): # Try again with all HETATM other than water res_seq_icode = resid2code(res_id) for r in chain: if r.id[0] not in (" ", "W"): if resid2code( r.id) == res_seq_icode and r.get_list( )[0].get_altloc() in tuple("A1 "): res = r break ( aa, ss, acc, phi, psi, dssp_index, NH_O_1_relidx, NH_O_1_energy, O_NH_1_relidx, O_NH_1_energy, NH_O_2_relidx, NH_O_2_energy, O_NH_2_relidx, O_NH_2_energy, ) = dssp_dict[key] res.xtra["SS_DSSP"] = ss res.xtra["EXP_DSSP_ASA"] = acc res.xtra["PHI_DSSP"] = phi res.xtra["PSI_DSSP"] = psi res.xtra["DSSP_INDEX"] = dssp_index res.xtra["NH_O_1_RELIDX_DSSP"] = NH_O_1_relidx res.xtra["NH_O_1_ENERGY_DSSP"] = NH_O_1_energy res.xtra["O_NH_1_RELIDX_DSSP"] = O_NH_1_relidx res.xtra["O_NH_1_ENERGY_DSSP"] = O_NH_1_energy res.xtra["NH_O_2_RELIDX_DSSP"] = NH_O_2_relidx res.xtra["NH_O_2_ENERGY_DSSP"] = NH_O_2_energy res.xtra["O_NH_2_RELIDX_DSSP"] = O_NH_2_relidx res.xtra["O_NH_2_ENERGY_DSSP"] = O_NH_2_energy # Relative accessibility resname = res.get_resname() try: rel_acc = acc / self.residue_max_acc[resname] except KeyError: # Invalid value for resname rel_acc = "NA" else: if rel_acc > 1.0: rel_acc = 1.0 res.xtra["EXP_DSSP_RASA"] = rel_acc # Verify if AA in DSSP == AA in Structure # Something went wrong if this is not true! # NB: DSSP uses X often try: resname = three_to_one(resname) except KeyError: resname = "X" if resname == "C": # DSSP renames C in C-bridges to a,b,c,d,... # - we rename it back to 'C' if _dssp_cys.match(aa): aa = "C" # Take care of HETATM again if (resname != aa) and (res.id[0] == " " or aa != "X"): raise PDBException(f"Structure/DSSP mismatch at {res}") dssp_vals = ( dssp_index, aa, ss, rel_acc, phi, psi, NH_O_1_relidx, NH_O_1_energy, O_NH_1_relidx, O_NH_1_energy, NH_O_2_relidx, NH_O_2_energy, O_NH_2_relidx, O_NH_2_energy, ) dssp_map[(chain_id, res_id)] = dssp_vals dssp_list.append(dssp_vals) if file_type == "MMCIF": dssp_keys = dssp_mapped_keys AbstractResiduePropertyMap.__init__(self, dssp_map, dssp_keys, dssp_list)
def __init__(self, model, pdb_file, dssp="dssp"): """ :: @param model: the first model of the structure @type model: L{Model} :: @param pdb_file: a PDB file @type pdb_file: string :: @param dssp: the dssp executable (ie. the argument to os.system) @type dssp: string """ # create DSSP dictionary dssp_dict, dssp_keys = dssp_dict_from_pdb_file(pdb_file, dssp) dssp_map = {} dssp_list = [] def resid2code(res_id): """Serialize a residue's resseq and icode for easy comparison.""" return '%s%s' % (res_id[1], res_id[2]) # Now create a dictionary that maps Residue objects to # secondary structure and accessibility, and a list of # (residue, (secondary structure, accessibility)) tuples for key in dssp_keys: chain_id, res_id = key chain = model[chain_id] try: res = chain[res_id] except KeyError: # In DSSP, HET field is not considered in residue identifier. # Thus HETATM records may cause unnecessary exceptions. # (See 3jui chain A res 593.) # Try the lookup again with all HETATM other than water res_seq_icode = resid2code(res_id) for r in chain: if r.id[0] not in (' ', 'W'): # Compare resseq + icode if resid2code(r.id) == res_seq_icode: # Found a matching residue res = r break else: raise KeyError(res_id) # For disordered residues of point mutations, BioPython uses the # last one as default, But DSSP takes the first one (alternative # location is blank, A or 1). See 1h9h chain E resi 22. # Here we select the res in which all atoms have altloc blank, A or # 1. If no such residues are found, simply use the first one appears # (as DSSP does). if res.is_disordered() == 2: for rk in res.disordered_get_id_list(): # All atoms in the disordered residue should have the same # altloc, so it suffices to check the altloc of the first # atom. altloc = res.child_dict[rk].get_list()[0].get_altloc() if altloc in tuple('A1 '): res.disordered_select(rk) break else: # Simply select the first one res.disordered_select(res.disordered_get_id_list()[0]) # Sometimes point mutations are put into HETATM and ATOM with altloc # 'A' and 'B'. # See 3piu chain A residue 273: # <Residue LLP het=H_LLP resseq=273 icode= > # <Residue LYS het= resseq=273 icode= > # DSSP uses the HETATM LLP as it has altloc 'A' # We check the altloc code here. elif res.is_disordered() == 1: # Check altloc of all atoms in the DisorderedResidue. If it # contains blank, A or 1, then use it. Otherwise, look for HET # residues of the same seq+icode. If not such HET residues are # found, just accept the current one. altlocs = set(a.get_altloc() for a in res.get_unpacked_list()) if altlocs.isdisjoint('A1 '): # Try again with all HETATM other than water res_seq_icode = resid2code(res_id) for r in chain: if r.id[0] not in (' ', 'W'): if resid2code(r.id) == res_seq_icode and \ r.get_list()[0].get_altloc() in tuple('A1 '): res = r break (aa, ss, acc, phi, psi, dssp_index, NH_O_1_relidx, NH_O_1_energy, O_NH_1_relidx, O_NH_1_energy, NH_O_2_relidx, NH_O_2_energy, O_NH_2_relidx, O_NH_2_energy) = dssp_dict[key] res.xtra["SS_DSSP"] = ss res.xtra["EXP_DSSP_ASA"] = acc res.xtra["PHI_DSSP"] = phi res.xtra["PSI_DSSP"] = psi res.xtra["DSSP_INDEX"] = dssp_index res.xtra["NH_O_1_RELIDX_DSSP"] = NH_O_1_relidx res.xtra["NH_O_1_ENERGY_DSSP"] = NH_O_1_energy res.xtra["O_NH_1_RELIDX_DSSP"] = O_NH_1_relidx res.xtra["O_NH_1_ENERGY_DSSP"] = O_NH_1_energy res.xtra["NH_O_2_RELIDX_DSSP"] = NH_O_2_relidx res.xtra["NH_O_2_ENERGY_DSSP"] = NH_O_2_energy res.xtra["O_NH_2_RELIDX_DSSP"] = O_NH_2_relidx res.xtra["O_NH_2_ENERGY_DSSP"] = O_NH_2_energy # Relative accessibility resname = res.get_resname() try: rel_acc = acc / MAX_ACC[resname] except KeyError: # Invalid value for resname rel_acc = 'NA' else: if rel_acc > 1.0: rel_acc = 1.0 res.xtra["EXP_DSSP_RASA"] = rel_acc # Verify if AA in DSSP == AA in Structure # Something went wrong if this is not true! # NB: DSSP uses X often resname = SCOPData.protein_letters_3to1.get(resname, 'X') if resname == "C": # DSSP renames C in C-bridges to a,b,c,d,... # - we rename it back to 'C' if _dssp_cys.match(aa): aa = 'C' # Take care of HETATM again if (resname != aa) and (res.id[0] == ' ' or aa != 'X'): raise PDBException("Structure/DSSP mismatch at %s" % res) dssp_vals = (dssp_index, aa, ss, rel_acc, phi, psi, NH_O_1_relidx, NH_O_1_energy, O_NH_1_relidx, O_NH_1_energy, NH_O_2_relidx, NH_O_2_energy, O_NH_2_relidx, O_NH_2_energy) dssp_map[key] = dssp_vals dssp_list.append(dssp_vals) AbstractResiduePropertyMap.__init__(self, dssp_map, dssp_keys, dssp_list)
def write_SCAD( entity, file, scale=None, pdbid=None, backboneOnly=False, includeCode=True, maxPeptideBond=None, handle="protein", ): """Write hedron assembly to file as OpenSCAD matrices. This routine calls both internal_to_atom_coordinates() and atom_to_internal_coordinates() due to requirements for scaling, explicit bonds around rings, and setting the coordinate space of the output model. Output data format is primarily: - matrix for each hedron: len1, angle2, len3, atom covalent bond class, flags to indicate atom/bond represented in previous hedron (OpenSCAD very slow with redundant overlapping elements), flags for bond features - transform matrices to assemble each hedron into residue dihedra sets - transform matrices for each residue to position in chain OpenSCAD software is included in this Python file to process these matrices into a model suitable for a 3D printing project. :param entity: Biopython PDB structure entity structure data to export :param file: Bipoython as_handle filename or open file pointer file to write data to :param scale: float units (usually mm) per angstrom for STL output, written in output :param pdbid: str PDB idcode, written in output. Defaults to '0PDB' if not supplied and no 'idcode' set in entity :param backboneOnly: bool default False Do not output side chain data past Cbeta if True :param includeCode: bool default True Include OpenSCAD software (inline below) so output file can be loaded into OpenSCAD; if False, output data matrices only :param maxPeptideBond: Optional[float] default None Override the cut-off in IC_Chain class (default 1.4) for detecting chain breaks. If your target has chain breaks, pass a large number here to create a very long 'bond' spanning the break. :param handle: str, default 'protein' name for top level of generated OpenSCAD matrix structure """ if maxPeptideBond is not None: mpbStash = IC_Chain.MaxPeptideBond IC_Chain.MaxPeptideBond = float(maxPeptideBond) # step one need IC_Residue atom_coords loaded in order to scale # so if no internal_coords, initialise from Atom coordinates added_IC_Atoms = False if "S" == entity.level or "M" == entity.level: for chn in entity.get_chains(): if not chn.internal_coord: chn.internal_coord = IC_Chain(chn) added_IC_Atoms = True elif "C" == entity.level: if not entity.internal_coord: entity.internal_coord = IC_Chain(entity) added_IC_Atoms = True else: raise PDBException("level not S, M or C: " + str(entity.level)) if not added_IC_Atoms and scale is not None: # if loaded pic file and need to scale, generate atom coords entity.internal_to_atom_coordinates() # need to reset rnext and rprev in case MaxPeptideBond changed if not added_IC_Atoms: if "C" == entity.level: if entity.internal_coord is not None: entity.internal_coord.clear_ic() chnp = entity.internal_coord = IC_Chain(entity) chnp.atom_to_internal_coordinates() # chnp.link_residues() # chnp.init_edra() # render_dihedra() # chnp.init_atom_coords() else: for chn in entity.get_chains(): if chn.internal_coord is not None: chn.internal_coord.clear_ic() chnp = chn.internal_coord = IC_Chain(chn) chnp.atom_to_internal_coordinates() # chnp.link_residues() # chnp.init_edra() # render_dihedra() # chnp.init_atom_coords() if scale is not None: scaleMtx = homog_scale_mtx(scale) for res in entity.get_residues(): if 2 == res.is_disordered(): for r in res.child_dict.values(): _scale_residue(r, scale, scaleMtx) else: _scale_residue(res, scale, scaleMtx) # generate internal coords for scaled entity # (hedron bond lengths have changed if scaled) # if not scaling, still need to generate internal coordinate # bonds for ring sidechains # AllBonds is a class attribute for IC_Residue.atom_to_internal_coordinates # to generate explicit hedra covering all bonds allBondsStash = IC_Residue.AllBonds IC_Residue.AllBonds = True entity.atom_to_internal_coordinates() IC_Residue.AllBonds = allBondsStash # clear initNCaC - want at origin, not match PDB file if "C" == entity.level: entity.internal_coord.initNCaC = {} else: for chn in entity.get_chains(): chn.internal_coord.initNCaC = {} # rebuild atom coordinates now starting at origin: in OpenSCAD code, each # residue model is transformed to N-Ca-C start position instead of updating # transform matrix along chain entity.internal_to_atom_coordinates() with as_handle(file, "w") as fp: if includeCode: fp.write(peptide_scad) if not pdbid and hasattr(entity, "header"): pdbid = entity.header.get("idcode", None) if pdbid is None or "" == pdbid: pdbid = "0PDB" fp.write('protein = [ "' + pdbid + '", ' + str(scale) + ", // ID, protein_scale\n") if "S" == entity.level or "M" == entity.level: for chn in entity.get_chains(): fp.write(" [\n") chn.internal_coord.write_SCAD(fp, backboneOnly) fp.write(" ]\n") elif "C" == entity.level: fp.write(" [\n") entity.internal_coord.write_SCAD(fp, backboneOnly) fp.write(" ]\n") elif "R" == entity.level: raise NotImplementedError( "writescad single residue not yet implemented.") fp.write("\n];\n") if maxPeptideBond is not None: IC_Chain.MaxPeptideBond = mpbStash
def __init__(self, model, in_file, dssp="dssp", acc_array="Sander", file_type='PDB'): """Create a DSSP object. Parameters ---------- model : Model The first model of the structure in_file : string Either a PDB file or a DSSP file. dssp : string The dssp executable (ie. the argument to os.system) acc_array : string Accessible surface area (ASA) from either Miller et al. (1987), Sander & Rost (1994), or Wilke: Tien et al. 2013, as string Sander/Wilke/Miller. Defaults to Sander. file_type: string File type switch, either PDB or DSSP with PDB as default. """ self.residue_max_acc = residue_max_acc[acc_array] # create DSSP dictionary file_type = file_type.upper() assert (file_type in ['PDB', 'DSSP']) # If the input file is a PDB file run DSSP and parse output: if file_type == 'PDB': # Newer versions of DSSP program call the binary 'mkdssp', so # calling 'dssp' will not work in some operating systems # (Debian distribution of DSSP includes a symlink for 'dssp' argument) try: dssp_dict, dssp_keys = dssp_dict_from_pdb_file(in_file, dssp) except FileNotFoundError: if dssp == 'dssp': dssp = 'mkdssp' elif dssp == 'mkdssp': dssp = 'dssp' else: raise dssp_dict, dssp_keys = dssp_dict_from_pdb_file(in_file, dssp) # If the input file is a DSSP file just parse it directly: elif file_type == 'DSSP': dssp_dict, dssp_keys = make_dssp_dict(in_file) dssp_map = {} dssp_list = [] def resid2code(res_id): """Serialize a residue's resseq and icode for easy comparison.""" return '%s%s' % (res_id[1], res_id[2]) # Now create a dictionary that maps Residue objects to # secondary structure and accessibility, and a list of # (residue, (secondary structure, accessibility)) tuples for key in dssp_keys: chain_id, res_id = key chain = model[chain_id] try: res = chain[res_id] except KeyError: # In DSSP, HET field is not considered in residue identifier. # Thus HETATM records may cause unnecessary exceptions. # (See 3jui chain A res 593.) # Try the lookup again with all HETATM other than water res_seq_icode = resid2code(res_id) for r in chain: if r.id[0] not in (' ', 'W'): # Compare resseq + icode if resid2code(r.id) == res_seq_icode: # Found a matching residue res = r break else: raise KeyError(res_id) # For disordered residues of point mutations, BioPython uses the # last one as default, But DSSP takes the first one (alternative # location is blank, A or 1). See 1h9h chain E resi 22. # Here we select the res in which all atoms have altloc blank, A or # 1. If no such residues are found, simply use the first one appears # (as DSSP does). if res.is_disordered() == 2: for rk in res.disordered_get_id_list(): # All atoms in the disordered residue should have the same # altloc, so it suffices to check the altloc of the first # atom. altloc = res.child_dict[rk].get_list()[0].get_altloc() if altloc in tuple('A1 '): res.disordered_select(rk) break else: # Simply select the first one res.disordered_select(res.disordered_get_id_list()[0]) # Sometimes point mutations are put into HETATM and ATOM with altloc # 'A' and 'B'. # See 3piu chain A residue 273: # <Residue LLP het=H_LLP resseq=273 icode= > # <Residue LYS het= resseq=273 icode= > # DSSP uses the HETATM LLP as it has altloc 'A' # We check the altloc code here. elif res.is_disordered() == 1: # Check altloc of all atoms in the DisorderedResidue. If it # contains blank, A or 1, then use it. Otherwise, look for HET # residues of the same seq+icode. If not such HET residues are # found, just accept the current one. altlocs = set(a.get_altloc() for a in res.get_unpacked_list()) if altlocs.isdisjoint('A1 '): # Try again with all HETATM other than water res_seq_icode = resid2code(res_id) for r in chain: if r.id[0] not in (' ', 'W'): if resid2code(r.id) == res_seq_icode and \ r.get_list()[0].get_altloc() in tuple('A1 '): res = r break (aa, ss, acc, phi, psi, dssp_index, NH_O_1_relidx, NH_O_1_energy, O_NH_1_relidx, O_NH_1_energy, NH_O_2_relidx, NH_O_2_energy, O_NH_2_relidx, O_NH_2_energy) = dssp_dict[key] res.xtra["SS_DSSP"] = ss res.xtra["EXP_DSSP_ASA"] = acc res.xtra["PHI_DSSP"] = phi res.xtra["PSI_DSSP"] = psi res.xtra["DSSP_INDEX"] = dssp_index res.xtra["NH_O_1_RELIDX_DSSP"] = NH_O_1_relidx res.xtra["NH_O_1_ENERGY_DSSP"] = NH_O_1_energy res.xtra["O_NH_1_RELIDX_DSSP"] = O_NH_1_relidx res.xtra["O_NH_1_ENERGY_DSSP"] = O_NH_1_energy res.xtra["NH_O_2_RELIDX_DSSP"] = NH_O_2_relidx res.xtra["NH_O_2_ENERGY_DSSP"] = NH_O_2_energy res.xtra["O_NH_2_RELIDX_DSSP"] = O_NH_2_relidx res.xtra["O_NH_2_ENERGY_DSSP"] = O_NH_2_energy # Relative accessibility resname = res.get_resname() try: rel_acc = acc / self.residue_max_acc[resname] except KeyError: # Invalid value for resname rel_acc = 'NA' else: if rel_acc > 1.0: rel_acc = 1.0 res.xtra["EXP_DSSP_RASA"] = rel_acc # Verify if AA in DSSP == AA in Structure # Something went wrong if this is not true! # NB: DSSP uses X often resname = SCOPData.protein_letters_3to1.get(resname, 'X') if resname == "C": # DSSP renames C in C-bridges to a,b,c,d,... # - we rename it back to 'C' if _dssp_cys.match(aa): aa = 'C' # Take care of HETATM again if (resname != aa) and (res.id[0] == ' ' or aa != 'X'): raise PDBException("Structure/DSSP mismatch at %s" % res) dssp_vals = (dssp_index, aa, ss, rel_acc, phi, psi, NH_O_1_relidx, NH_O_1_energy, O_NH_1_relidx, O_NH_1_energy, NH_O_2_relidx, NH_O_2_energy, O_NH_2_relidx, O_NH_2_energy) dssp_map[key] = dssp_vals dssp_list.append(dssp_vals) AbstractResiduePropertyMap.__init__(self, dssp_map, dssp_keys, dssp_list)
def report_PIC(entity, reportDict=None, verbose=False): """Generate dict with counts of PIC data elements for each entity level. reportDict entries are: idcode : PDB ID hdr : PDB header lines mdl : models chn : chains res : residue objects res_e : residues with dihedra and/or hedra dih : dihedra hed : hedra :param Entity entity: Biopython PDB Entity object: S, M, C or R :raises PDBException: if entity level not S, M, C, or R :raises Exception: if entity does not have .level attribute :returns: dict with counts of PIC data elements """ if reportDict is None: reportDict = { 'idcode': None, 'hdr': 0, 'mdl': 0, 'chn': 0, 'res': 0, 'res_e': 0, 'dih': 0, 'hed': 0 } try: if 'A' == entity.level: raise PDBException("No PIC output at Atom level") elif 'R' == entity.level: if hasattr(entity, 'internal_coord'): reportDict['res'] += 1 dlen = len(entity.internal_coord.dihedra) hlen = len(entity.internal_coord.hedra) if 0 < dlen or 0 < hlen: reportDict['res_e'] += 1 reportDict['dih'] += dlen reportDict['hed'] += hlen elif 'C' == entity.level: reportDict['chn'] += 1 for res in entity: reportDict = report_PIC(res, reportDict) elif 'M' == entity.level: reportDict['mdl'] += 1 for chn in entity: reportDict = report_PIC(chn, reportDict) elif 'S' == entity.level: if reportDict['idcode'] is None: reportDict['idcode'] = entity.header.get('idcode', None) hdr = entity.header.get('head', None) if hdr: reportDict['hdr'] += 1 nam = entity.header.get('name', None) if nam: reportDict['hdr'] += 1 for mdl in entity: reportDict = report_PIC(mdl, reportDict) else: raise PDBException("Cannot identify level: " + str(entity.level)) except KeyError: raise Exception("write_PIC: argument is not a Biopython PDB Entity " + str(entity)) if verbose: print("{} : {} models {} chains {} residue objects " "{} residues with {} dihedra {} hedra".format( reportDict['idcode'], reportDict['mdl'], reportDict['chn'], reportDict['res'], reportDict['res_e'], reportDict['dih'], reportDict['hed'])) return reportDict
def get_rms(self): """Root mean square deviation of superimposed coordinates.""" if self.rms is None: raise PDBException("Nothing superimposed yet.") return self.rms
def report_IC( entity: Union[Structure, Model, Chain, Residue], reportDict: Dict[str, Any] = None, verbose: bool = False, ) -> Dict[str, Any]: """Generate dict with counts of ic data elements for each entity level. reportDict entries are: - idcode : PDB ID - hdr : PDB header lines - mdl : models - chn : chains - res : residue objects - res_e : residues with dihedra and/or hedra - dih : dihedra - hed : hedra :param Entity entity: Biopython PDB Entity object: S, M, C or R :raises PDBException: if entity level not S, M, C, or R :raises Exception: if entity does not have .level attribute :returns: dict with counts of IC data elements """ if reportDict is None: reportDict = { "idcode": None, "hdr": 0, "mdl": 0, "chn": 0, "chn_ids": [], "res": 0, "res_e": 0, "dih": 0, "hed": 0, } try: if "A" == entity.level: raise PDBException("No IC output at Atom level") elif isinstance(entity, Residue) or isinstance( entity, DisorderedResidue): # "R" == entity.level: if entity.internal_coord: reportDict["res"] += 1 dlen = len(entity.internal_coord.dihedra) hlen = len(entity.internal_coord.hedra) if 0 < dlen or 0 < hlen: reportDict["res_e"] += 1 reportDict["dih"] += dlen reportDict["hed"] += hlen elif isinstance(entity, Chain): # "C" == entity.level: reportDict["chn"] += 1 reportDict["chn_ids"].append(entity.id) for res in entity: reportDict = report_IC(res, reportDict) elif isinstance(entity, Model): # "M" == entity.level: reportDict["mdl"] += 1 for chn in entity: reportDict = report_IC(chn, reportDict) elif isinstance(entity, Structure): # "S" == entity.level: if hasattr(entity, "header"): if reportDict["idcode"] is None: reportDict["idcode"] = entity.header.get("idcode", None) hdr = entity.header.get("head", None) if hdr: reportDict["hdr"] += 1 nam = entity.header.get("name", None) if nam: reportDict["hdr"] += 1 for mdl in entity: reportDict = report_IC(mdl, reportDict) else: raise PDBException("Cannot identify level: " + str(entity.level)) except KeyError: raise Exception("write_PIC: argument is not a Biopython PDB Entity " + str(entity)) if verbose: print("{} : {} models {} chains {} {} residue objects " "{} residues with {} dihedra {} hedra".format( reportDict["idcode"], reportDict["mdl"], reportDict["chn"], reportDict["chn_ids"], reportDict["res"], reportDict["res_e"], reportDict["dih"], reportDict["hed"], )) return reportDict
def write_SCAD( entity, file, scale=None, pdbid=None, backboneOnly=False, includeCode=True, maxPeptideBond=None, start=None, fin=None, handle="protein", ): """Write hedron assembly to file as OpenSCAD matrices. This routine calls both :meth:`.IC_Chain.internal_to_atom_coordinates` and :meth:`.IC_Chain.atom_to_internal_coordinates` due to requirements for scaling, explicit bonds around rings, and setting the coordinate space of the output model. Output data format is primarily: - matrix for each hedron: len1, angle2, len3, atom covalent bond class, flags to indicate atom/bond represented in previous hedron (OpenSCAD very slow with redundant overlapping elements), flags for bond features - transform matrices to assemble each hedron into residue dihedra sets - transform matrices for each residue to position in chain OpenSCAD software is included in this Python file to process these matrices into a model suitable for a 3D printing project. :param entity: Biopython PDB :class:`.Structure` entity structure data to export :param file: Bipoython :func:`.as_handle` filename or open file pointer file to write data to :param float scale: units (usually mm) per angstrom for STL output, written in output :param str pdbid: PDB idcode, written in output. Defaults to '0PDB' if not supplied and no 'idcode' set in entity :param bool backboneOnly: default False. Do not output side chain data past Cbeta if True :param bool includeCode: default True. Include OpenSCAD software (inline below) so output file can be loaded into OpenSCAD; if False, output data matrices only :param float maxPeptideBond: Optional default None. Override the cut-off in IC_Chain class (default 1.4) for detecting chain breaks. If your target has chain breaks, pass a large number here to create a very long 'bond' spanning the break. :param int start,fin: default None Parameters for internal_to_atom_coords() to limit chain segment. :param str handle: default 'protein' name for top level of generated OpenSCAD matrix structure See :meth:`.IC_Residue.set_flexible` to set flags for specific residues to have rotatable bonds, and :meth:`.IC_Residue.set_hbond` to include cavities for small magnets to work as hydrogen bonds. See <https://www.thingiverse.com/thing:3957471> for implementation example. The OpenSCAD code explicitly creates spheres and cylinders to represent atoms and bonds in a 3D model. Options are available to support rotatable bonds and magnetic hydrogen bonds. Matrices are written to link, enumerate and describe residues, dihedra, hedra, and chains, mirroring contents of the relevant IC_* data structures. The OpenSCAD matrix of hedra has additional information as follows: * the atom and bond state (single, double, resonance) are logged so that covalent radii may be used for atom spheres in the 3D models * bonds and atoms are tracked so that each is only created once * bond options for rotation and magnet holders for hydrogen bonds may be specified (see :meth:`.IC_Residue.set_flexible` and :meth:`.IC_Residue.set_hbond` ) Note the application of :data:`Bio.PDB.internal_coords.IC_Chain.MaxPeptideBond` : missing residues may be linked (joining chain segments with arbitrarily long bonds) by setting this to a large value. Note this uses the serial assembly per residue, placing each residue at the origin and supplying the coordinate space transform to OpenaSCAD All ALTLOC (disordered) residues and atoms are written to the output model. (see :data:`Bio.PDB.internal_coords.IC_Residue.no_altloc`) """ if maxPeptideBond is not None: mpbStash = IC_Chain.MaxPeptideBond IC_Chain.MaxPeptideBond = float(maxPeptideBond) # step one need IC_Residue atom_coords loaded in order to scale # so if no internal_coords, initialise from Atom coordinates added_IC_Atoms = False if "S" == entity.level or "M" == entity.level: for chn in entity.get_chains(): if not chn.internal_coord: chn.internal_coord = IC_Chain(chn) added_IC_Atoms = True elif "C" == entity.level: if not entity.internal_coord: # entity.internal_coord: entity.internal_coord = IC_Chain(entity) added_IC_Atoms = True else: raise PDBException("level not S, M or C: " + str(entity.level)) if added_IC_Atoms: # if loaded pdb, need to scale, and asm, gen atomArray entity.atom_to_internal_coordinates() else: # if loaded pic file and need to scale, generate atom coords entity.internal_to_atom_coordinates(None) if scale is not None: scaleMtx = homog_scale_mtx(scale) if "C" == entity.level: entity.internal_coord.atomArray = np.dot( entity.internal_coord.atomArray[:], scaleMtx) entity.internal_coord.hAtoms_needs_update[:] = True entity.internal_coord.scale = scale else: for chn in entity.get_chains(): if hasattr(chn.internal_coord, "atomArray"): chn.internal_coord.atomArray = np.dot( chn.internal_coord.atomArray[:], scaleMtx) chn.internal_coord.hAtoms_needs_update[:] = True chn.internal_coord.scale = scale # generate internal coords for scaled entity # (hedron bond lengths have changed if scaled) # if not scaling, still need to generate internal coordinate # bonds for ring sidechains # AllBonds is a class attribute for IC_Residue.atom_to_internal_coordinates # to generate explicit hedra covering all bonds allBondsStash = IC_Residue._AllBonds IC_Residue._AllBonds = True # trigger rebuild of hedra for AllBonds if "C" == entity.level: entity.internal_coord.ordered_aa_ic_list[0].hedra = {} delattr(entity.internal_coord, "hAtoms_needs_update") delattr(entity.internal_coord, "hedraLen") else: for chn in entity.get_chains(): chn.internal_coord.ordered_aa_ic_list[0].hedra = {} delattr(chn.internal_coord, "hAtoms_needs_update") delattr(chn.internal_coord, "hedraLen") entity.atom_to_internal_coordinates() IC_Residue._AllBonds = allBondsStash # rebuild atom coordinates now with chain starting at origin: in OpenSCAD # code, each residue model is transformed to N-Ca-C start position instead # of updating transform matrix along chain entity.internal_to_atom_coordinates() with as_handle(file, "w") as fp: if includeCode: fp.write(peptide_scad) if not pdbid and hasattr(entity, "header"): pdbid = entity.header.get("idcode", None) if pdbid is None or "" == pdbid: pdbid = "0PDB" fp.write('protein = [ "' + pdbid + '", ' + str(scale) + ", // ID, protein_scale\n") if "S" == entity.level or "M" == entity.level: for chn in entity.get_chains(): fp.write(" [\n") chn.internal_coord._write_SCAD(fp, backboneOnly=backboneOnly, start=start, fin=fin) fp.write(" ]\n") elif "C" == entity.level: fp.write(" [\n") entity.internal_coord._write_SCAD(fp, backboneOnly=backboneOnly, start=start, fin=fin) fp.write(" ]\n") elif "R" == entity.level: raise NotImplementedError( "writescad single residue not yet implemented.") fp.write("\n];\n") if maxPeptideBond is not None: IC_Chain.MaxPeptideBond = mpbStash
def write_PIC( entity, file, pdbid=None, chainid=None, picFlags: int = IC_Residue.picFlagsDefault, hCut: Optional[Union[float, None]] = None, pCut: Optional[Union[float, None]] = None, ): """Write Protein Internal Coordinates (PIC) to file. See :func:`read_PIC` for file format. Recurses to lower entity levels (M, C, R). :param Entity entity: Biopython PDB Entity object: S, M, C or R :param Bio.File file: :func:`.as_handle` file name or handle :param str pdbid: PDB idcode, read from entity if not supplied :param char chainid: PDB Chain ID, set from C level entity.id if needed :param int picFlags: boolean flags controlling output, defined in :data:`Bio.PDB.internal_coords.IC_Residue.pic_flags` * "psi", * "omg", * "phi", * "tau", # tau hedron (N-Ca-C) * "chi1", * "chi2", * "chi3", * "chi4", * "chi5", * "pomg", # proline omega * "chi", # chi1 through chi5 * "classic_b", # psi | phi | tau | pomg * "classic", # classic_b | chi * "hedra", # all hedra including bond lengths * "primary", # all primary dihedra * "secondary", # all secondary dihedra * "all", # hedra | primary | secondary * "initAtoms", # XYZ coordinates of initial Tau (N-Ca-C) * "bFactors" default is everything:: picFlagsDefault = ( pic_flags.all | pic_flags.initAtoms | pic_flags.bFactors ) Usage in your code:: # just primary dihedra and all hedra picFlags = ( IC_Residue.pic_flags.primary | IC_Residue.pic_flags.hedra ) # no B-factors: picFlags = IC_Residue.picFlagsDefault picFlags &= ~IC_Residue.pic_flags.bFactors :func:`read_PIC` with `(defaults=True)` will use default values for anything left out :param float hCut: default None only write hedra with ref db angle std dev greater than this value :param float pCut: default None only write primary dihedra with ref db angle std dev greater than this value **Default values**: Data averaged from Sep 2019 Dunbrack cullpdb_pc20_res2.2_R1.0. Please see `PISCES: A Protein Sequence Culling Server <https://dunbrack.fccc.edu/pisces/>`_ 'G. Wang and R. L. Dunbrack, Jr. PISCES: a protein sequence culling server. Bioinformatics, 19:1589-1591, 2003.' 'primary' and 'secondary' dihedra are defined in ic_data.py. Specifically, secondary dihedra can be determined as a fixed rotation from another known angle, for example N-Ca-C-O can be estimated from N-Ca-C-N (psi). Standard deviations are listed in <biopython distribution>/Bio/PDB/ic_data.py for default values, and can be used to limit which hedra and dihedra are defaulted vs. output exact measurements from structure (see hCut and pCut above). Default values for primary dihedra (psi, phi, omega, chi1, etc.) are chosen as the most common integer value, not an average. :raises PDBException: if entity level is A (Atom) :raises Exception: if entity does not have .level attribute """ enumerate_atoms(entity) with as_handle(file, "w") as fp: try: if "A" == entity.level: raise PDBException("No PIC output at Atom level") elif "R" == entity.level: if 2 == entity.is_disordered(): for r in entity.child_dict.values(): _wpr( r, fp, pdbid, chainid, picFlags=picFlags, hCut=hCut, pCut=pCut, ) else: _wpr( entity, fp, pdbid, chainid, picFlags=picFlags, hCut=hCut, pCut=pCut, ) elif "C" == entity.level: if not chainid: chainid = entity.id for res in entity: write_PIC( res, fp, pdbid, chainid, picFlags=picFlags, hCut=hCut, pCut=pCut, ) elif "M" == entity.level: for chn in entity: write_PIC( chn, fp, pdbid, chainid, picFlags=picFlags, hCut=hCut, pCut=pCut, ) elif "S" == entity.level: if not pdbid: pdbid = entity.header.get("idcode", None) hdr = entity.header.get("head", None) dd = pdb_date(entity.header.get("deposition_date", None)) if hdr: fp.write(("HEADER {:40}{:8} {:4}\n").format( hdr.upper(), (dd or ""), (pdbid or ""))) nam = entity.header.get("name", None) if nam: fp.write("TITLE " + nam.upper() + "\n") for mdl in entity: write_PIC( mdl, fp, pdbid, chainid, picFlags=picFlags, hCut=hCut, pCut=pCut, ) else: raise PDBException("Cannot identify level: " + str(entity.level)) except KeyError: raise Exception( "write_PIC: argument is not a Biopython PDB Entity " + str(entity))
def write_SCAD(entity, file, scale=None, handle='protein', pdbid=None, backboneOnly=False, includeCode=True): """Write hedron assembly to file as OpenSCAD matrices. Output data format is primarily: matrix for each hedron: len1, angle2, len3, atom covalent bond class, flags toindicate atom/bond represented in previous hedron (OpenSCAD very slow for overlapping elements) matrices for each residue to assemble hedra into dihedrons matrices to transform each residue set of dihedra to position in chain :param entity: Biopython PDB structure entity structure data to export :param file: Bipoython as_handle filename or open file pointer file to write data to :param scale: float units (usually mm) per angstrom for STL output, written in output :param handle: str, default 'protein' name for top level of generated OpenSCAD matrix structure, :param pdbid: str PDB idcode, written in output. Defaults to '0PDB' if not supplied and no 'idcode' set in entity :param backboneOnly: bool default False Do not output side chain data if True :param includeCode: bool default True Include Bio/PDB/peptide.scad so output file can be loaded into OpenSCAD; if False, output data matrices only """ # step one need IC_Residue atom_coords loaded in order to scale have_PIC_Atoms = False if 'S' == entity.level or 'M' == entity.level: for chn in entity.get_chains(): if not hasattr(chn, 'internal_coord'): chn.internal_coord = IC_Chain(chn) have_PIC_Atoms = True elif 'C' == entity.level: if not hasattr(entity, 'internal_coord'): entity.internal_coord = IC_Chain(entity) have_PIC_Atoms = True elif 'R' == entity.level: if not hasattr(entity, 'internal_coord'): entity.internal_coord = IC_Residue(entity) have_PIC_Atoms = True else: raise PDBException("level not S, M. C or R: " + str(entity.level)) if not have_PIC_Atoms and scale is not None: # if loaded pic file and need to scale, generate atom coords internal_to_atom_coordinates(entity) if scale is not None: scaleMtx = homog_scale_mtx(scale) for res in entity.get_residues(): if hasattr(res, 'internal_coord'): res.internal_coord.applyMtx(scaleMtx) if res.internal_coord.gly_Cbeta: res.internal_coord.scale = scale # generate internal coords for scaled entity # -- hedron bond lengths have changed # if not scaling, still need to generate internal coordinate # bonds for ring sidechains atom_to_internal_coordinates(entity, allBonds=True) # clear initNCaC - want at origin, not match PDB file for chn in entity.get_chains(): chn.internal_coord.initNCaC = {} internal_to_atom_coordinates(entity) with as_handle(file, 'w') as fp: fp.write('protein_scale=' + str(scale) + ';\n') if includeCode: fp.write('$fn=20;\nchain(protein);\n') fp.write(peptide_scad) # codeFile = re.sub(r"pic.py\Z", "peptide.scad", __file__) # with as_handle(codeFile, 'r') as cf: # for line in cf.readlines(): # fp.write(line) if not pdbid and hasattr(entity, 'header'): pdbid = entity.header.get('idcode', None) if pdbid is None or '' == pdbid: pdbid = '0PDB' fp.write('protein = [ "' + pdbid + '", protein_scale,\n') if 'S' == entity.level or 'M' == entity.level: for chn in entity.get_chains(): fp.write(' [\n') chn.internal_coord.write_SCAD(fp, scale, backboneOnly) fp.write(' ]\n') elif 'C' == entity.level: fp.write(' [\n') entity.internal_coord.write_SCAD(fp, scale, backboneOnly) fp.write(' ]\n') elif 'R' == entity.level: raise NotImplementedError( 'writescad single residue not yet implemented.') fp.write('\n];\n')