def __init__(self, model, pdb_file = None, naccess_binary = 'naccess', tmp_directory = '/tmp'): res_data, atm_data = run_naccess(model, pdb_file, naccess = naccess_binary, temp_path = tmp_directory) naccess_dict = process_rsa_data(res_data) res_list = [] property_dict={} property_keys=[] property_list=[] # Now create a dictionary that maps Residue objects to accessibility for chain in model: chain_id=chain.get_id() for res in chain: res_id=res.get_id() if (chain_id, res_id) in naccess_dict: item = naccess_dict[(chain_id, res_id)] res_name = item['res_name'] assert (res_name == res.get_resname()) property_dict[(chain_id, res_id)] = item property_keys.append((chain_id, res_id)) property_list.append((res, item)) res.xtra["EXP_NACCESS"]=item else: pass AbstractResiduePropertyMap.__init__(self, property_dict, property_keys, property_list)
def __init__(self, model, pdb_file=None, naccess_binary='naccess', tmp_directory='/tmp'): res_data, atm_data = run_naccess(model, pdb_file, naccess=naccess_binary, temp_path=tmp_directory) naccess_dict = process_rsa_data(res_data) res_list = [] property_dict = {} property_keys = [] property_list = [] # Now create a dictionary that maps Residue objects to accessibility for chain in model: chain_id = chain.get_id() for res in chain: res_id = res.get_id() if (chain_id, res_id) in naccess_dict: item = naccess_dict[(chain_id, res_id)] res_name = item['res_name'] assert (res_name == res.get_resname()) property_dict[(chain_id, res_id)] = item property_keys.append((chain_id, res_id)) property_list.append((res, item)) res.xtra["EXP_NACCESS"] = item else: pass AbstractResiduePropertyMap.__init__(self, property_dict, property_keys, property_list)
def __init__(self, structure, dssp="dsspcmbi", id=None): """ @param model: the first model of the structure @type model: L{Model} @param pdb_file: a PDB file @type pdb_file: string @param dssp: the dssp executable (ie. the argument to os.system) @type dssp: string @param id: the structure id to use if parsed @type id: string """ # create DSSP dictionary structure, dssp_dict, dssp_keys = dssp_dict_from_structure(structure, dssp, id=id) self.structure = structure self.model = structure while self.model.get_parent() != None: self.model = self.model.get_parent() self.model = self.model[0] # assert isinstance(self.model, Model) dssp_map = {} dssp_list = [] # Now create a dictionary that maps Residue objects to # secondary structure and accessibility, and a list of # (residue, (secondary structure, accessibility)) tuples for key in dssp_keys: try: chain_id, res_id = key chain = self.model[chain_id] res = chain[res_id] except: continue aa, ss, acc = dssp_dict[key] res.xtra[SS] = ss res.xtra[ASA] = acc # relative accessibility resname = res.get_resname() rel_acc = acc / MAX_ACC[resname] if rel_acc > 1.0: rel_acc = 1.0 res.xtra[RASA] = rel_acc # Verify if AA in DSSP == AA in Structure # Something went wrong if this is not true! resname = to_one_letter_code[resname] if resname == "C": # DSSP renames C in C-bridges to a,b,c,d,... # - we rename it back to 'C' if _dssp_cys.match(aa): aa = "C" if not (resname == aa): raise PDBException("Structure/DSSP mismatch at " + str(res)) dssp_map[key] = (res, ss, acc, rel_acc) dssp_list.append((res, ss, acc, rel_acc)) AbstractResiduePropertyMap.__init__(self, dssp_map, dssp_keys, dssp_list)
def __init__(self, model, pdb_file, dssp="dssp"): """ @param model: the first model of the structure @type model: L{Model} @param pdb_file: a PDB file @type pdb_file: string @param dssp: the dssp executable (ie. the argument to os.system) @type dssp: string """ # create DSSP dictionary dssp_dict, dssp_keys=dssp_dict_from_pdb_file(pdb_file, dssp) dssp_map={} dssp_list=[] # Now create a dictionary that maps Residue objects to # secondary structure and accessibility, and a list of # (residue, (secondary structure, accessibility)) tuples for key in dssp_keys: chain_id, res_id=key chain=model[chain_id] res=chain[res_id] aa, ss, acc=dssp_dict[key] res.xtra["SS_DSSP"]=ss res.xtra["EXP_DSSP_ASA"]=acc # relative accessibility resname=res.get_resname() rel_acc=acc/MAX_ACC[resname] if rel_acc>1.0: rel_acc=1.0 res.xtra["EXP_DSSP_RASA"]=rel_acc # Verify if AA in DSSP == AA in Structure # Something went wrong if this is not true! resname=to_one_letter_code[resname] if resname=="C": # DSSP renames C in C-bridges to a,b,c,d,... # - we rename it back to 'C' if _dssp_cys.match(aa): aa='C' if not (resname==aa): raise PDBException("Structure/DSSP mismatch at "+str(res)) dssp_map[key]=((res, ss, acc, rel_acc)) dssp_list.append((res, ss, acc, rel_acc)) AbstractResiduePropertyMap.__init__(self, dssp_map, dssp_keys, dssp_list)
def __init__(self, model, pdb_file, dssp="dssp"): """ :: @param model: the first model of the structure @type model: L{Model} :: @param pdb_file: a PDB file @type pdb_file: string :: @param dssp: the dssp executable (ie. the argument to os.system) @type dssp: string """ # create DSSP dictionary dssp_dict, dssp_keys = dssp_dict_from_pdb_file(pdb_file, dssp) dssp_map = {} dssp_list = [] def resid2code(res_id): """Serialize a residue's resseq and icode for easy comparison.""" return '%s%s' % (res_id[1], res_id[2]) # Now create a dictionary that maps Residue objects to # secondary structure and accessibility, and a list of # (residue, (secondary structure, accessibility)) tuples for key in dssp_keys: chain_id, res_id = key chain = model[chain_id] try: res = chain[res_id] except KeyError: # In DSSP, HET field is not considered in residue identifier. # Thus HETATM records may cause unnecessary exceptions. # (See 3jui chain A res 593.) # Try the lookup again with all HETATM other than water res_seq_icode = resid2code(res_id) for r in chain: if r.id[0] not in (' ', 'W'): # Compare resseq + icode if resid2code(r.id) == res_seq_icode: # Found a matching residue res = r break else: raise KeyError(res_id) # For disordered residues of point mutations, BioPython uses the # last one as default, But DSSP takes the first one (alternative # location is blank, A or 1). See 1h9h chain E resi 22. # Here we select the res in which all atoms have altloc blank, A or # 1. If no such residues are found, simply use the first one appears # (as DSSP does). if res.is_disordered() == 2: for rk in res.disordered_get_id_list(): # All atoms in the disordered residue should have the same # altloc, so it suffices to check the altloc of the first # atom. altloc = res.child_dict[rk].get_list()[0].get_altloc() if altloc in tuple('A1 '): res.disordered_select(rk) break else: # Simply select the first one res.disordered_select(res.disordered_get_id_list()[0]) # Sometimes point mutations are put into HETATM and ATOM with altloc # 'A' and 'B'. # See 3piu chain A residue 273: # <Residue LLP het=H_LLP resseq=273 icode= > # <Residue LYS het= resseq=273 icode= > # DSSP uses the HETATM LLP as it has altloc 'A' # We check the altloc code here. elif res.is_disordered() == 1: # Check altloc of all atoms in the DisorderedResidue. If it # contains blank, A or 1, then use it. Otherwise, look for HET # residues of the same seq+icode. If not such HET residues are # found, just accept the current one. altlocs = set(a.get_altloc() for a in res.get_unpacked_list()) if altlocs.isdisjoint('A1 '): # Try again with all HETATM other than water res_seq_icode = resid2code(res_id) for r in chain: if r.id[0] not in (' ', 'W'): if resid2code(r.id) == res_seq_icode and \ r.get_list()[0].get_altloc() in tuple('A1 '): res = r break aa, ss, acc, phi, psi = dssp_dict[key] res.xtra["SS_DSSP"] = ss res.xtra["EXP_DSSP_ASA"] = acc res.xtra["PHI_DSSP"] = phi res.xtra["PSI_DSSP"] = psi # Relative accessibility resname = res.get_resname() try: rel_acc = acc / MAX_ACC[resname] except KeyError: # Invalid value for resname rel_acc = 'NA' else: if rel_acc > 1.0: rel_acc = 1.0 res.xtra["EXP_DSSP_RASA"] = rel_acc # Verify if AA in DSSP == AA in Structure # Something went wrong if this is not true! # NB: DSSP uses X often resname = SCOPData.protein_letters_3to1.get(resname, 'X') if resname == "C": # DSSP renames C in C-bridges to a,b,c,d,... # - we rename it back to 'C' if _dssp_cys.match(aa): aa = 'C' # Take care of HETATM again if (resname != aa) and (res.id[0] == ' ' or aa != 'X'): raise PDBException("Structure/DSSP mismatch at %s" % res) dssp_map[key] = ((res, ss, acc, rel_acc, phi, psi)) dssp_list.append((res, ss, acc, rel_acc, phi, psi)) AbstractResiduePropertyMap.__init__(self, dssp_map, dssp_keys, dssp_list)
def __init__(self, model, in_file, dssp="dssp", acc_array="Sander", file_type='PDB'): """Create a DSSP object. Parameters ---------- model : Model The first model of the structure in_file : string Either a PDB file or a DSSP file. dssp : string The dssp executable (ie. the argument to os.system) acc_array : string Accessible surface area (ASA) from either Miller et al. (1987), Sander & Rost (1994), or Wilke: Tien et al. 2013, as string Sander/Wilke/Miller. Defaults to Sander. file_type: string File type switch, either PDB or DSSP with PDB as default. """ self.residue_max_acc = residue_max_acc[acc_array] # create DSSP dictionary file_type = file_type.upper() assert(file_type in ['PDB', 'DSSP']) # If the input file is a PDB file run DSSP and parse output: if file_type == 'PDB': # Newer versions of DSSP program call the binary 'mkdssp', so # calling 'dssp' will not work in some operating systems # (Debian distribution of DSSP includes a symlink for 'dssp' argument) try: dssp_dict, dssp_keys = dssp_dict_from_pdb_file(in_file, dssp) except OSError: # TODO: Use FileNotFoundError once drop Python 2 if dssp == 'dssp': dssp = 'mkdssp' elif dssp == 'mkdssp': dssp = 'dssp' else: raise dssp_dict, dssp_keys = dssp_dict_from_pdb_file(in_file, dssp) # If the input file is a DSSP file just parse it directly: elif file_type == 'DSSP': dssp_dict, dssp_keys = make_dssp_dict(in_file) dssp_map = {} dssp_list = [] def resid2code(res_id): """Serialize a residue's resseq and icode for easy comparison.""" return '%s%s' % (res_id[1], res_id[2]) # Now create a dictionary that maps Residue objects to # secondary structure and accessibility, and a list of # (residue, (secondary structure, accessibility)) tuples for key in dssp_keys: chain_id, res_id = key chain = model[chain_id] try: res = chain[res_id] except KeyError: # In DSSP, HET field is not considered in residue identifier. # Thus HETATM records may cause unnecessary exceptions. # (See 3jui chain A res 593.) # Try the lookup again with all HETATM other than water res_seq_icode = resid2code(res_id) for r in chain: if r.id[0] not in (' ', 'W'): # Compare resseq + icode if resid2code(r.id) == res_seq_icode: # Found a matching residue res = r break else: raise KeyError(res_id) # For disordered residues of point mutations, Biopython uses the # last one as default, But DSSP takes the first one (alternative # location is blank, A or 1). See 1h9h chain E resi 22. # Here we select the res in which all atoms have altloc blank, A or # 1. If no such residues are found, simply use the first one appears # (as DSSP does). if res.is_disordered() == 2: for rk in res.disordered_get_id_list(): # All atoms in the disordered residue should have the same # altloc, so it suffices to check the altloc of the first # atom. altloc = res.child_dict[rk].get_list()[0].get_altloc() if altloc in tuple('A1 '): res.disordered_select(rk) break else: # Simply select the first one res.disordered_select(res.disordered_get_id_list()[0]) # Sometimes point mutations are put into HETATM and ATOM with altloc # 'A' and 'B'. # See 3piu chain A residue 273: # <Residue LLP het=H_LLP resseq=273 icode= > # <Residue LYS het= resseq=273 icode= > # DSSP uses the HETATM LLP as it has altloc 'A' # We check the altloc code here. elif res.is_disordered() == 1: # Check altloc of all atoms in the DisorderedResidue. If it # contains blank, A or 1, then use it. Otherwise, look for HET # residues of the same seq+icode. If not such HET residues are # found, just accept the current one. altlocs = set(a.get_altloc() for a in res.get_unpacked_list()) if altlocs.isdisjoint('A1 '): # Try again with all HETATM other than water res_seq_icode = resid2code(res_id) for r in chain: if r.id[0] not in (' ', 'W'): if resid2code(r.id) == res_seq_icode and \ r.get_list()[0].get_altloc() in tuple('A1 '): res = r break (aa, ss, acc, phi, psi, dssp_index, NH_O_1_relidx, NH_O_1_energy, O_NH_1_relidx, O_NH_1_energy, NH_O_2_relidx, NH_O_2_energy, O_NH_2_relidx, O_NH_2_energy) = dssp_dict[key] res.xtra["SS_DSSP"] = ss res.xtra["EXP_DSSP_ASA"] = acc res.xtra["PHI_DSSP"] = phi res.xtra["PSI_DSSP"] = psi res.xtra["DSSP_INDEX"] = dssp_index res.xtra["NH_O_1_RELIDX_DSSP"] = NH_O_1_relidx res.xtra["NH_O_1_ENERGY_DSSP"] = NH_O_1_energy res.xtra["O_NH_1_RELIDX_DSSP"] = O_NH_1_relidx res.xtra["O_NH_1_ENERGY_DSSP"] = O_NH_1_energy res.xtra["NH_O_2_RELIDX_DSSP"] = NH_O_2_relidx res.xtra["NH_O_2_ENERGY_DSSP"] = NH_O_2_energy res.xtra["O_NH_2_RELIDX_DSSP"] = O_NH_2_relidx res.xtra["O_NH_2_ENERGY_DSSP"] = O_NH_2_energy # Relative accessibility resname = res.get_resname() try: rel_acc = acc / self.residue_max_acc[resname] except KeyError: # Invalid value for resname rel_acc = 'NA' else: if rel_acc > 1.0: rel_acc = 1.0 res.xtra["EXP_DSSP_RASA"] = rel_acc # Verify if AA in DSSP == AA in Structure # Something went wrong if this is not true! # NB: DSSP uses X often try: resname = three_to_one(resname) except KeyError: resname = 'X' if resname == "C": # DSSP renames C in C-bridges to a,b,c,d,... # - we rename it back to 'C' if _dssp_cys.match(aa): aa = 'C' # Take care of HETATM again if (resname != aa) and (res.id[0] == ' ' or aa != 'X'): raise PDBException("Structure/DSSP mismatch at %s" % res) dssp_vals = (dssp_index, aa, ss, rel_acc, phi, psi, NH_O_1_relidx, NH_O_1_energy, O_NH_1_relidx, O_NH_1_energy, NH_O_2_relidx, NH_O_2_energy, O_NH_2_relidx, O_NH_2_energy) dssp_map[key] = dssp_vals dssp_list.append(dssp_vals) AbstractResiduePropertyMap.__init__(self, dssp_map, dssp_keys, dssp_list)
def __init__(self, model, in_file, dssp="dssp", acc_array="Sander", file_type=""): """Create a DSSP object. Parameters ---------- model : Model The first model of the structure in_file : string Either a PDB file or a DSSP file. dssp : string The dssp executable (ie. the argument to subprocess) acc_array : string Accessible surface area (ASA) from either Miller et al. (1987), Sander & Rost (1994), or Wilke: Tien et al. 2013, as string Sander/Wilke/Miller. Defaults to Sander. file_type: string File type switch: either PDB, MMCIF or DSSP. Inferred from the file extension by default. """ self.residue_max_acc = residue_max_acc[acc_array] # create DSSP dictionary if file_type == "": file_type = os.path.splitext(in_file)[1][1:] file_type = file_type.upper() if file_type == "CIF": file_type = "MMCIF" assert file_type in [ "PDB", "MMCIF", "DSSP", ], "File type must be PDB, mmCIF or DSSP" # If the input file is a PDB or mmCIF file run DSSP and parse output: if file_type == "PDB" or file_type == "MMCIF": # Newer versions of DSSP program call the binary 'mkdssp', so # calling 'dssp' will not work in some operating systems # (Debian distribution of DSSP includes a symlink for 'dssp' argument) try: dssp_dict, dssp_keys = dssp_dict_from_pdb_file(in_file, dssp) except FileNotFoundError: if dssp == "dssp": dssp = "mkdssp" elif dssp == "mkdssp": dssp = "dssp" else: raise dssp_dict, dssp_keys = dssp_dict_from_pdb_file(in_file, dssp) # If the input file is a DSSP file just parse it directly: elif file_type == "DSSP": dssp_dict, dssp_keys = make_dssp_dict(in_file) dssp_map = {} dssp_list = [] def resid2code(res_id): """Serialize a residue's resseq and icode for easy comparison.""" return f"{res_id[1]}{res_id[2]}" # DSSP outputs label_asym_id from the mmCIF file as the chain ID # But MMCIFParser reads in the auth_asym_id # Here we create a dictionary to map label_asym_id to auth_asym_id # using the mmCIF file if file_type == "MMCIF": mmcif_dict = MMCIF2Dict(in_file) mmcif_chain_dict = {} for i, c in enumerate(mmcif_dict["_atom_site.label_asym_id"]): if c not in mmcif_chain_dict: mmcif_chain_dict[c] = mmcif_dict[ "_atom_site.auth_asym_id"][i] dssp_mapped_keys = [] # Now create a dictionary that maps Residue objects to # secondary structure and accessibility, and a list of # (residue, (secondary structure, accessibility)) tuples for key in dssp_keys: chain_id, res_id = key if file_type == "MMCIF": chain_id = mmcif_chain_dict[chain_id] dssp_mapped_keys.append((chain_id, res_id)) chain = model[chain_id] try: res = chain[res_id] except KeyError: # In DSSP, HET field is not considered in residue identifier. # Thus HETATM records may cause unnecessary exceptions. # (See 3jui chain A res 593.) # Try the lookup again with all HETATM other than water res_seq_icode = resid2code(res_id) for r in chain: if r.id[0] not in (" ", "W"): # Compare resseq + icode if resid2code(r.id) == res_seq_icode: # Found a matching residue res = r break else: raise KeyError(res_id) from None # For disordered residues of point mutations, Biopython uses the # last one as default, But DSSP takes the first one (alternative # location is blank, A or 1). See 1h9h chain E resi 22. # Here we select the res in which all atoms have altloc blank, A or # 1. If no such residues are found, simply use the first one appears # (as DSSP does). if res.is_disordered() == 2: for rk in res.disordered_get_id_list(): # All atoms in the disordered residue should have the same # altloc, so it suffices to check the altloc of the first # atom. altloc = res.child_dict[rk].get_list()[0].get_altloc() if altloc in tuple("A1 "): res.disordered_select(rk) break else: # Simply select the first one res.disordered_select(res.disordered_get_id_list()[0]) # Sometimes point mutations are put into HETATM and ATOM with altloc # 'A' and 'B'. # See 3piu chain A residue 273: # <Residue LLP het=H_LLP resseq=273 icode= > # <Residue LYS het= resseq=273 icode= > # DSSP uses the HETATM LLP as it has altloc 'A' # We check the altloc code here. elif res.is_disordered() == 1: # Check altloc of all atoms in the DisorderedResidue. If it # contains blank, A or 1, then use it. Otherwise, look for HET # residues of the same seq+icode. If not such HET residues are # found, just accept the current one. altlocs = {a.get_altloc() for a in res.get_unpacked_list()} if altlocs.isdisjoint("A1 "): # Try again with all HETATM other than water res_seq_icode = resid2code(res_id) for r in chain: if r.id[0] not in (" ", "W"): if resid2code( r.id) == res_seq_icode and r.get_list( )[0].get_altloc() in tuple("A1 "): res = r break ( aa, ss, acc, phi, psi, dssp_index, NH_O_1_relidx, NH_O_1_energy, O_NH_1_relidx, O_NH_1_energy, NH_O_2_relidx, NH_O_2_energy, O_NH_2_relidx, O_NH_2_energy, ) = dssp_dict[key] res.xtra["SS_DSSP"] = ss res.xtra["EXP_DSSP_ASA"] = acc res.xtra["PHI_DSSP"] = phi res.xtra["PSI_DSSP"] = psi res.xtra["DSSP_INDEX"] = dssp_index res.xtra["NH_O_1_RELIDX_DSSP"] = NH_O_1_relidx res.xtra["NH_O_1_ENERGY_DSSP"] = NH_O_1_energy res.xtra["O_NH_1_RELIDX_DSSP"] = O_NH_1_relidx res.xtra["O_NH_1_ENERGY_DSSP"] = O_NH_1_energy res.xtra["NH_O_2_RELIDX_DSSP"] = NH_O_2_relidx res.xtra["NH_O_2_ENERGY_DSSP"] = NH_O_2_energy res.xtra["O_NH_2_RELIDX_DSSP"] = O_NH_2_relidx res.xtra["O_NH_2_ENERGY_DSSP"] = O_NH_2_energy # Relative accessibility resname = res.get_resname() try: rel_acc = acc / self.residue_max_acc[resname] except KeyError: # Invalid value for resname rel_acc = "NA" else: if rel_acc > 1.0: rel_acc = 1.0 res.xtra["EXP_DSSP_RASA"] = rel_acc # Verify if AA in DSSP == AA in Structure # Something went wrong if this is not true! # NB: DSSP uses X often try: resname = three_to_one(resname) except KeyError: resname = "X" if resname == "C": # DSSP renames C in C-bridges to a,b,c,d,... # - we rename it back to 'C' if _dssp_cys.match(aa): aa = "C" # Take care of HETATM again if (resname != aa) and (res.id[0] == " " or aa != "X"): raise PDBException(f"Structure/DSSP mismatch at {res}") dssp_vals = ( dssp_index, aa, ss, rel_acc, phi, psi, NH_O_1_relidx, NH_O_1_energy, O_NH_1_relidx, O_NH_1_energy, NH_O_2_relidx, NH_O_2_energy, O_NH_2_relidx, O_NH_2_energy, ) dssp_map[(chain_id, res_id)] = dssp_vals dssp_list.append(dssp_vals) if file_type == "MMCIF": dssp_keys = dssp_mapped_keys AbstractResiduePropertyMap.__init__(self, dssp_map, dssp_keys, dssp_list)
def __init__(self, model, pdb_file, dssp="dssp"): """ :: @param model: the first model of the structure @type model: L{Model} :: @param pdb_file: a PDB file @type pdb_file: string :: @param dssp: the dssp executable (ie. the argument to os.system) @type dssp: string """ # create DSSP dictionary dssp_dict, dssp_keys = dssp_dict_from_pdb_file(pdb_file, dssp) dssp_map = {} dssp_list = [] def resid2code(res_id): """Serialize a residue's resseq and icode for easy comparison.""" return '%s%s' % (res_id[1], res_id[2]) # Now create a dictionary that maps Residue objects to # secondary structure and accessibility, and a list of # (residue, (secondary structure, accessibility)) tuples for key in dssp_keys: chain_id, res_id = key chain = model[chain_id] try: res = chain[res_id] except KeyError: # In DSSP, HET field is not considered in residue identifier. # Thus HETATM records may cause unnecessary exceptions. # (See 3jui chain A res 593.) # Try the lookup again with all HETATM other than water res_seq_icode = resid2code(res_id) for r in chain: if r.id[0] not in (' ', 'W'): # Compare resseq + icode if resid2code(r.id) == res_seq_icode: # Found a matching residue res = r break else: raise KeyError(res_id) # For disordered residues of point mutations, BioPython uses the # last one as default, But DSSP takes the first one (alternative # location is blank, A or 1). See 1h9h chain E resi 22. # Here we select the res in which all atoms have altloc blank, A or # 1. If no such residues are found, simply use the first one appears # (as DSSP does). if res.is_disordered() == 2: for rk in res.disordered_get_id_list(): # All atoms in the disordered residue should have the same # altloc, so it suffices to check the altloc of the first # atom. altloc = res.child_dict[rk].get_list()[0].get_altloc() if altloc in tuple('A1 '): res.disordered_select(rk) break else: # Simply select the first one res.disordered_select(res.disordered_get_id_list()[0]) # Sometimes point mutations are put into HETATM and ATOM with altloc # 'A' and 'B'. # See 3piu chain A residue 273: # <Residue LLP het=H_LLP resseq=273 icode= > # <Residue LYS het= resseq=273 icode= > # DSSP uses the HETATM LLP as it has altloc 'A' # We check the altloc code here. elif res.is_disordered() == 1: # Check altloc of all atoms in the DisorderedResidue. If it # contains blank, A or 1, then use it. Otherwise, look for HET # residues of the same seq+icode. If not such HET residues are # found, just accept the current one. altlocs = set(a.get_altloc() for a in res.get_unpacked_list()) if altlocs.isdisjoint('A1 '): # Try again with all HETATM other than water res_seq_icode = resid2code(res_id) for r in chain: if r.id[0] not in (' ', 'W'): if resid2code(r.id) == res_seq_icode and \ r.get_list()[0].get_altloc() in tuple('A1 '): res = r break (aa, ss, acc, phi, psi, dssp_index, NH_O_1_relidx, NH_O_1_energy, O_NH_1_relidx, O_NH_1_energy, NH_O_2_relidx, NH_O_2_energy, O_NH_2_relidx, O_NH_2_energy) = dssp_dict[key] res.xtra["SS_DSSP"] = ss res.xtra["EXP_DSSP_ASA"] = acc res.xtra["PHI_DSSP"] = phi res.xtra["PSI_DSSP"] = psi res.xtra["DSSP_INDEX"] = dssp_index res.xtra["NH_O_1_RELIDX_DSSP"] = NH_O_1_relidx res.xtra["NH_O_1_ENERGY_DSSP"] = NH_O_1_energy res.xtra["O_NH_1_RELIDX_DSSP"] = O_NH_1_relidx res.xtra["O_NH_1_ENERGY_DSSP"] = O_NH_1_energy res.xtra["NH_O_2_RELIDX_DSSP"] = NH_O_2_relidx res.xtra["NH_O_2_ENERGY_DSSP"] = NH_O_2_energy res.xtra["O_NH_2_RELIDX_DSSP"] = O_NH_2_relidx res.xtra["O_NH_2_ENERGY_DSSP"] = O_NH_2_energy # Relative accessibility resname = res.get_resname() try: rel_acc = acc / MAX_ACC[resname] except KeyError: # Invalid value for resname rel_acc = 'NA' else: if rel_acc > 1.0: rel_acc = 1.0 res.xtra["EXP_DSSP_RASA"] = rel_acc # Verify if AA in DSSP == AA in Structure # Something went wrong if this is not true! # NB: DSSP uses X often resname = SCOPData.protein_letters_3to1.get(resname, 'X') if resname == "C": # DSSP renames C in C-bridges to a,b,c,d,... # - we rename it back to 'C' if _dssp_cys.match(aa): aa = 'C' # Take care of HETATM again if (resname != aa) and (res.id[0] == ' ' or aa != 'X'): raise PDBException("Structure/DSSP mismatch at %s" % res) dssp_vals = (dssp_index, aa, ss, rel_acc, phi, psi, NH_O_1_relidx, NH_O_1_energy, O_NH_1_relidx, O_NH_1_energy, NH_O_2_relidx, NH_O_2_energy, O_NH_2_relidx, O_NH_2_energy) dssp_map[key] = dssp_vals dssp_list.append(dssp_vals) AbstractResiduePropertyMap.__init__(self, dssp_map, dssp_keys, dssp_list)
def __init__(self, model, in_file, dssp="dssp", acc_array="Sander", file_type='PDB'): """Create a DSSP object. Parameters ---------- model : Model The first model of the structure in_file : string Either a PDB file or a DSSP file. dssp : string The dssp executable (ie. the argument to os.system) acc_array : string Accessible surface area (ASA) from either Miller et al. (1987), Sander & Rost (1994), or Wilke: Tien et al. 2013, as string Sander/Wilke/Miller. Defaults to Sander. file_type: string File type switch, either PDB or DSSP with PDB as default. """ self.residue_max_acc = residue_max_acc[acc_array] # create DSSP dictionary file_type = file_type.upper() assert (file_type in ['PDB', 'DSSP']) # If the input file is a PDB file run DSSP and parse output: if file_type == 'PDB': # Newer versions of DSSP program call the binary 'mkdssp', so # calling 'dssp' will not work in some operating systems # (Debian distribution of DSSP includes a symlink for 'dssp' argument) try: dssp_dict, dssp_keys = dssp_dict_from_pdb_file(in_file, dssp) except FileNotFoundError: if dssp == 'dssp': dssp = 'mkdssp' elif dssp == 'mkdssp': dssp = 'dssp' else: raise dssp_dict, dssp_keys = dssp_dict_from_pdb_file(in_file, dssp) # If the input file is a DSSP file just parse it directly: elif file_type == 'DSSP': dssp_dict, dssp_keys = make_dssp_dict(in_file) dssp_map = {} dssp_list = [] def resid2code(res_id): """Serialize a residue's resseq and icode for easy comparison.""" return '%s%s' % (res_id[1], res_id[2]) # Now create a dictionary that maps Residue objects to # secondary structure and accessibility, and a list of # (residue, (secondary structure, accessibility)) tuples for key in dssp_keys: chain_id, res_id = key chain = model[chain_id] try: res = chain[res_id] except KeyError: # In DSSP, HET field is not considered in residue identifier. # Thus HETATM records may cause unnecessary exceptions. # (See 3jui chain A res 593.) # Try the lookup again with all HETATM other than water res_seq_icode = resid2code(res_id) for r in chain: if r.id[0] not in (' ', 'W'): # Compare resseq + icode if resid2code(r.id) == res_seq_icode: # Found a matching residue res = r break else: raise KeyError(res_id) # For disordered residues of point mutations, BioPython uses the # last one as default, But DSSP takes the first one (alternative # location is blank, A or 1). See 1h9h chain E resi 22. # Here we select the res in which all atoms have altloc blank, A or # 1. If no such residues are found, simply use the first one appears # (as DSSP does). if res.is_disordered() == 2: for rk in res.disordered_get_id_list(): # All atoms in the disordered residue should have the same # altloc, so it suffices to check the altloc of the first # atom. altloc = res.child_dict[rk].get_list()[0].get_altloc() if altloc in tuple('A1 '): res.disordered_select(rk) break else: # Simply select the first one res.disordered_select(res.disordered_get_id_list()[0]) # Sometimes point mutations are put into HETATM and ATOM with altloc # 'A' and 'B'. # See 3piu chain A residue 273: # <Residue LLP het=H_LLP resseq=273 icode= > # <Residue LYS het= resseq=273 icode= > # DSSP uses the HETATM LLP as it has altloc 'A' # We check the altloc code here. elif res.is_disordered() == 1: # Check altloc of all atoms in the DisorderedResidue. If it # contains blank, A or 1, then use it. Otherwise, look for HET # residues of the same seq+icode. If not such HET residues are # found, just accept the current one. altlocs = set(a.get_altloc() for a in res.get_unpacked_list()) if altlocs.isdisjoint('A1 '): # Try again with all HETATM other than water res_seq_icode = resid2code(res_id) for r in chain: if r.id[0] not in (' ', 'W'): if resid2code(r.id) == res_seq_icode and \ r.get_list()[0].get_altloc() in tuple('A1 '): res = r break (aa, ss, acc, phi, psi, dssp_index, NH_O_1_relidx, NH_O_1_energy, O_NH_1_relidx, O_NH_1_energy, NH_O_2_relidx, NH_O_2_energy, O_NH_2_relidx, O_NH_2_energy) = dssp_dict[key] res.xtra["SS_DSSP"] = ss res.xtra["EXP_DSSP_ASA"] = acc res.xtra["PHI_DSSP"] = phi res.xtra["PSI_DSSP"] = psi res.xtra["DSSP_INDEX"] = dssp_index res.xtra["NH_O_1_RELIDX_DSSP"] = NH_O_1_relidx res.xtra["NH_O_1_ENERGY_DSSP"] = NH_O_1_energy res.xtra["O_NH_1_RELIDX_DSSP"] = O_NH_1_relidx res.xtra["O_NH_1_ENERGY_DSSP"] = O_NH_1_energy res.xtra["NH_O_2_RELIDX_DSSP"] = NH_O_2_relidx res.xtra["NH_O_2_ENERGY_DSSP"] = NH_O_2_energy res.xtra["O_NH_2_RELIDX_DSSP"] = O_NH_2_relidx res.xtra["O_NH_2_ENERGY_DSSP"] = O_NH_2_energy # Relative accessibility resname = res.get_resname() try: rel_acc = acc / self.residue_max_acc[resname] except KeyError: # Invalid value for resname rel_acc = 'NA' else: if rel_acc > 1.0: rel_acc = 1.0 res.xtra["EXP_DSSP_RASA"] = rel_acc # Verify if AA in DSSP == AA in Structure # Something went wrong if this is not true! # NB: DSSP uses X often resname = SCOPData.protein_letters_3to1.get(resname, 'X') if resname == "C": # DSSP renames C in C-bridges to a,b,c,d,... # - we rename it back to 'C' if _dssp_cys.match(aa): aa = 'C' # Take care of HETATM again if (resname != aa) and (res.id[0] == ' ' or aa != 'X'): raise PDBException("Structure/DSSP mismatch at %s" % res) dssp_vals = (dssp_index, aa, ss, rel_acc, phi, psi, NH_O_1_relidx, NH_O_1_energy, O_NH_1_relidx, O_NH_1_energy, NH_O_2_relidx, NH_O_2_energy, O_NH_2_relidx, O_NH_2_energy) dssp_map[key] = dssp_vals dssp_list.append(dssp_vals) AbstractResiduePropertyMap.__init__(self, dssp_map, dssp_keys, dssp_list)