def get_sequence(self): """Return the AA sequence as a Seq object. @return: polypeptide sequence @rtype: L{Seq} """ s="" for res in self: s += to_one_letter_code.get(res.get_resname(), 'X') seq=Seq(s, generic_protein) return seq
def __init__(self, model, pdb_file, dssp="dssp"): """ @param model: the first model of the structure @type model: L{Model} @param pdb_file: a PDB file @type pdb_file: string @param dssp: the dssp executable (ie. the argument to os.system) @type dssp: string """ # create DSSP dictionary dssp_dict, dssp_keys = dssp_dict_from_pdb_file(pdb_file, dssp) dssp_map = {} dssp_list = [] def resid2code(res_id): """Serialize a residue's resseq and icode for easy comparison.""" return '%s%s' % (res_id[1], res_id[2]) # Now create a dictionary that maps Residue objects to # secondary structure and accessibility, and a list of # (residue, (secondary structure, accessibility)) tuples for key in dssp_keys: chain_id, res_id = key chain = model[chain_id] try: res = chain[res_id] except KeyError: # In DSSP, HET field is not considered in residue identifier. # Thus HETATM records may cause unnecessary exceptions. # (See 3jui chain A res 593.) # Try the lookup again with all HETATM other than water res_seq_icode = resid2code(res_id) for r in chain: if r.id[0] not in (' ', 'W'): # Compare resseq + icode if resid2code(r.id) == res_seq_icode: # Found a matching residue res = r break else: raise KeyError(res_id) # For disordered residues of point mutations, BioPython uses the # last one as default, But DSSP takes the first one (alternative # location is blank, A or 1). See 1h9h chain E resi 22. # Here we select the res in which all atoms have altloc blank, A or # 1. If no such residues are found, simply use the first one appears # (as DSSP does). if res.is_disordered() == 2: for rk in res.disordered_get_id_list(): # All atoms in the disordered residue should have the same # altloc, so it suffices to check the altloc of the first # atom. altloc = res.child_dict[rk].get_list()[0].get_altloc() if altloc in tuple('A1 '): res.disordered_select(rk) break else: # Simply select the first one res.disordered_select(res.disordered_get_id_list()[0]) # Sometimes point mutations are put into HETATM and ATOM with altloc # 'A' and 'B'. # See 3piu chain A residue 273: # <Residue LLP het=H_LLP resseq=273 icode= > # <Residue LYS het= resseq=273 icode= > # DSSP uses the HETATM LLP as it has altloc 'A' # We check the altloc code here. elif res.is_disordered() == 1: # Check altloc of all atoms in the DisorderedResidue. If it # contains blank, A or 1, then use it. Otherwise, look for HET # residues of the same seq+icode. If not such HET residues are # found, just accept the current one. altlocs = set(a.get_altloc() for a in res.get_unpacked_list()) if altlocs.isdisjoint('A1 '): # Try again with all HETATM other than water res_seq_icode = resid2code(res_id) for r in chain: if r.id[0] not in (' ', 'W'): if resid2code(r.id) == res_seq_icode and \ r.get_list()[0].get_altloc() in tuple('A1 '): res = r break aa, ss, acc, phi, psi = dssp_dict[key] res.xtra["SS_DSSP"] = ss res.xtra["EXP_DSSP_ASA"] = acc res.xtra["PHI_DSSP"] = phi res.xtra["PSI_DSSP"] = psi # Relative accessibility resname = res.get_resname() try: rel_acc = acc/MAX_ACC[resname] except KeyError: # Invalid value for resname rel_acc = 'NA' else: if rel_acc > 1.0: rel_acc = 1.0 res.xtra["EXP_DSSP_RASA"] = rel_acc # Verify if AA in DSSP == AA in Structure # Something went wrong if this is not true! # NB: DSSP uses X often resname = to_one_letter_code.get(resname, 'X') if resname == "C": # DSSP renames C in C-bridges to a,b,c,d,... # - we rename it back to 'C' if _dssp_cys.match(aa): aa = 'C' # Take care of HETATM again if (resname != aa) and (res.id[0] == ' ' or aa != 'X'): raise PDBException("Structure/DSSP mismatch at %s" % res) dssp_map[key] = ((res, ss, acc, rel_acc, phi, psi)) dssp_list.append((res, ss, acc, rel_acc, phi, psi)) AbstractResiduePropertyMap.__init__(self, dssp_map, dssp_keys, dssp_list)
def __init__(self, model, pdb_file, dssp="dssp"): """ @param model: the first model of the structure @type model: L{Model} @param pdb_file: a PDB file @type pdb_file: string @param dssp: the dssp executable (ie. the argument to os.system) @type dssp: string """ # create DSSP dictionary dssp_dict, dssp_keys = dssp_dict_from_pdb_file(pdb_file, dssp) dssp_map = {} dssp_list = [] def resid2code(res_id): """Serialize a residue's resseq and icode for easy comparison.""" return '%s%s' % (res_id[1], res_id[2]) # Now create a dictionary that maps Residue objects to # secondary structure and accessibility, and a list of # (residue, (secondary structure, accessibility)) tuples for key in dssp_keys: chain_id, res_id = key chain = model[chain_id] try: res = chain[res_id] except KeyError: # In DSSP, HET field is not considered in residue identifier. # Thus HETATM records may cause unnecessary exceptions. # (See 3jui chain A res 593.) # Try the lookup again with all HETATM other than water res_seq_icode = resid2code(res_id) for r in chain: if r.id[0] not in (' ', 'W'): # Compare resseq + icode if resid2code(r.id) == res_seq_icode: # Found a matching residue res = r break else: raise KeyError(res_id) # For disordered residues of point mutations, BioPython uses the # last one as default, But DSSP takes the first one (alternative # location is blank, A or 1). See 1h9h chain E resi 22. # Here we select the res in which all atoms have altloc blank, A or # 1. If no such residues are found, simply use the first one appears # (as DSSP does). if res.is_disordered() == 2: for rk in res.disordered_get_id_list(): # All atoms in the disordered residue should have the same # altloc, so it suffices to check the altloc of the first # atom. altloc = res.child_dict[rk].get_list()[0].get_altloc() if altloc in tuple('A1 '): res.disordered_select(rk) break else: # Simply select the first one res.disordered_select(res.disordered_get_id_list()[0]) # Sometimes point mutations are put into HETATM and ATOM with altloc # 'A' and 'B'. # See 3piu chain A residue 273: # <Residue LLP het=H_LLP resseq=273 icode= > # <Residue LYS het= resseq=273 icode= > # DSSP uses the HETATM LLP as it has altloc 'A' # We check the altloc code here. elif res.is_disordered() == 1: # Check altloc of all atoms in the DisorderedResidue. If it # contains blank, A or 1, then use it. Otherwise, look for HET # residues of the same seq+icode. If not such HET residues are # found, just accept the current one. altlocs = set(a.get_altloc() for a in res.get_unpacked_list()) if altlocs.isdisjoint('A1 '): # Try again with all HETATM other than water res_seq_icode = resid2code(res_id) for r in chain: if r.id[0] not in (' ', 'W'): if resid2code(r.id) == res_seq_icode and \ r.get_list()[0].get_altloc() in tuple('A1 '): res = r break aa, ss, acc, phi, psi = dssp_dict[key] res.xtra["SS_DSSP"] = ss res.xtra["EXP_DSSP_ASA"] = acc res.xtra["PHI_DSSP"] = phi res.xtra["PSI_DSSP"] = psi # Relative accessibility resname = res.get_resname() try: rel_acc = acc / MAX_ACC[resname] except KeyError: # Invalid value for resname rel_acc = 'NA' else: if rel_acc > 1.0: rel_acc = 1.0 res.xtra["EXP_DSSP_RASA"] = rel_acc # Verify if AA in DSSP == AA in Structure # Something went wrong if this is not true! # NB: DSSP uses X often resname = to_one_letter_code.get(resname, 'X') if resname == "C": # DSSP renames C in C-bridges to a,b,c,d,... # - we rename it back to 'C' if _dssp_cys.match(aa): aa = 'C' # Take care of HETATM again if (resname != aa) and (res.id[0] == ' ' or aa != 'X'): raise PDBException("Structure/DSSP mismatch at %s" % res) dssp_map[key] = ((res, ss, acc, rel_acc, phi, psi)) dssp_list.append((res, ss, acc, rel_acc, phi, psi)) AbstractResiduePropertyMap.__init__(self, dssp_map, dssp_keys, dssp_list)