def __init__(self, pdb_hierarchy, nontrans_only=False, out=sys.stdout, quiet=True): validation.__init__(self) self.residue_count = [0, 0] #[OMEGA_GENERAL, OMEGA_PRO] self.omega_count = [[0,0,0], [0,0,0]] #[OMEGA_GENERAL, OMEGA_PRO], then #[OMEGALYZE_TRANS, OMEGALYZE_CIS, OMEGALYZE_TWISTED] from mmtbx.validation import utils from scitbx.array_family import flex self._outlier_i_seqs = flex.size_t() pdb_atoms = pdb_hierarchy.atoms() all_i_seqs = pdb_atoms.extract_i_seq() if all_i_seqs.all_eq(0): pdb_atoms.reset_i_seq() use_segids = utils.use_segids_in_place_of_chainids( hierarchy=pdb_hierarchy) prev_rezes, next_rezes = None, None prev_resid = None cur_resseq = None next_resseq = None for model in pdb_hierarchy.models(): for chain in model.chains(): prev_rezes, next_rezes = None, None prev_resid = None cur_resseq = None next_resseq = None if use_segids: chain_id = utils.get_segid_as_chainid(chain=chain) else: chain_id = chain.id residues = list(chain.residue_groups()) for i, residue_group in enumerate(residues): # The reason I pass lists of atom_groups to get_phi and get_psi is to # deal with the particular issue where some residues have an A alt # conf that needs some atoms from a "" alt conf to get calculated # correctly. See 1jxt.pdb for examples. This way I can search both # the alt conf atoms and the "" atoms if necessary. prev_atom_list, next_atom_list, atom_list = None, None, None if cur_resseq is not None: prev_rezes = rezes prev_resseq = cur_resseq rezes = construct_residues(residues[i]) cur_resseq = residue_group.resseq_as_int() cur_icode = residue_group.icode.strip() if (i > 0): #check for insertion codes if (cur_resseq == residues[i-1].resseq_as_int()) : if (cur_icode == '') and (residues[i-1].icode.strip() == '') : continue elif (cur_resseq != (residues[i-1].resseq_as_int())+1): continue for atom_group in residue_group.atom_groups(): alt_conf = atom_group.altloc if rezes is not None: atom_list = rezes.get(alt_conf) if prev_rezes is not None: prev_atom_list = prev_rezes.get(alt_conf) if (prev_atom_list is None): prev_keys = sorted(prev_rezes.keys()) prev_atom_list = prev_rezes.get(prev_keys[0]) omega=get_omega(prev_atom_list, atom_list) highest_mc_b = get_highest_mc_b(prev_atom_list, atom_list) if omega is not None: resname = atom_group.resname[0:3] coords = get_center(atom_group) if resname == "PRO": res_type = OMEGA_PRO else: res_type = OMEGA_GENERAL self.residue_count[res_type] += 1 omega_type = find_omega_type(omega) is_nontrans = False if omega_type == OMEGALYZE_CIS or omega_type == OMEGALYZE_TWISTED: self.n_outliers += 1 is_nontrans = True self.omega_count[res_type][omega_type] += 1 markup_atoms = [None, None, None, None] #for kinemage markup if is_nontrans: for a in prev_atom_list: if a is None: continue a_ = atom(pdb_atom=a) if a.name.strip() == "CA": markup_atoms[0] = kin_atom( id_str=a_.atom_group_id_str(),xyz=a_.xyz) elif a.name.strip() == "C": markup_atoms[1] = kin_atom( id_str=a_.atom_group_id_str(),xyz=a_.xyz) for a in atom_list: if a is None: continue a_ = atom(pdb_atom=a) if a.name.strip() == "N": markup_atoms[2] = kin_atom( id_str=a_.atom_group_id_str(),xyz=a_.xyz) elif a.name.strip() == "CA": markup_atoms[3] = kin_atom( id_str=a_.atom_group_id_str(),xyz=a_.xyz) #------------ #prevres=residues[i-1] #find prev res identities for printing prev_alts = [] prev_resnames = {} for ag in residues[i-1].atom_groups(): prev_alts.append(ag.altloc) prev_resnames[ag.altloc] = ag.resname if alt_conf in prev_alts: prev_altloc = alt_conf else: if len(prev_alts) > 1: prev_altloc = prev_alts[1] else: prev_altloc = prev_alts[0] prev_resname = prev_resnames[prev_altloc] #done finding prev res identities result = omega_result( chain_id=chain_id, resseq=residue_group.resseq, icode=residue_group.icode, resname=atom_group.resname, altloc=atom_group.altloc, prev_resseq=residues[i-1].resseq, prev_icode=residues[i-1].icode, prev_resname=prev_resname, prev_altloc=prev_altloc, segid=None, omega=omega, omega_type=omega_type, res_type=res_type, is_nontrans=is_nontrans, outlier=is_nontrans, highest_mc_b=highest_mc_b, xyz=coords, markup_atoms=markup_atoms) if is_nontrans or not nontrans_only: #(not nontrans_only or is_nontrans) self.results.append(result) if is_nontrans: i_seqs = atom_group.atoms().extract_i_seq() assert (not i_seqs.all_eq(0)) #This assert copied from ramalyze self._outlier_i_seqs.extend(i_seqs)
def __init__(self, pdb_hierarchy, data_version="8000", outliers_only=False, show_errors=False, out=sys.stdout, quiet=False): validation.__init__(self) self.n_allowed = 0 self.n_favored = 0 from mmtbx.rotamer.sidechain_angles import SidechainAngles from mmtbx.rotamer import rotamer_eval from mmtbx.rotamer.rotamer_eval import RotamerID from mmtbx.validation import utils self.data_version = data_version # if self.data_version == "500": self.outlier_threshold = 0.01 if self.data_version == "8000": self.outlier_threshold = OUTLIER_THRESHOLD else: raise ValueError( "data_version given to RotamerEval not recognized (%s)." % data_version) sidechain_angles = SidechainAngles(show_errors) rotamer_evaluator = rotamer_eval.RotamerEval( data_version=data_version) rotamer_id = rotamer_eval.RotamerID() # loads in the rotamer names use_segids = utils.use_segids_in_place_of_chainids( hierarchy=pdb_hierarchy) current_rotamers = {} for model in pdb_hierarchy.models(): for chain in model.chains(): if use_segids: chain_id = utils.get_segid_as_chainid(chain=chain) else: chain_id = chain.id for rg in chain.residue_groups(): all_dict = construct_complete_sidechain(rg) for atom_group in rg.atom_groups(): coords = get_center(atom_group) resname = atom_group.resname occupancy = get_occupancy(atom_group) kwargs = { "chain_id" : chain_id, "resseq" : rg.resseq, "icode" : rg.icode, "altloc" : atom_group.altloc, "resname" : resname, "xyz" : coords, "occupancy" : occupancy, } atom_dict = all_dict.get(atom_group.altloc) res_key = get_residue_key(atom_group=atom_group) try: chis = sidechain_angles.measureChiAngles( atom_group, atom_dict)#.get(conformer.altloc)) except AttributeError: if show_errors: kwargs['incomplete'] = True result = rotamer(**kwargs) print >> out, '%s is missing some sidechain atoms' % \ result.id_str() self.results.append(result) continue if (chis is not None): if None in chis: continue cur_res = resname.lower().strip() if cur_res == 'mse': cur_res = 'met' value = rotamer_evaluator.evaluate(cur_res, chis) if value is not None: self.n_total += 1 kwargs['score'] = value * 100 wrap_chis = rotamer_id.wrap_chis(resname.strip(), chis, symmetry=False) sym_chis = wrap_chis[:] sym_chis = rotamer_id.wrap_sym(resname.strip(), sym_chis) evaluation = self.evaluateScore(value) kwargs['evaluation'] = evaluation if evaluation == "OUTLIER": kwargs['outlier'] = True kwargs['rotamer_name'] = evaluation else: kwargs['outlier'] = False kwargs['rotamer_name'] = rotamer_id.identify(resname, wrap_chis) #deal with unclassified rotamers if kwargs['rotamer_name'] == '': kwargs['rotamer_name'] = "UNCLASSIFIED" while (len(wrap_chis) < 4): wrap_chis.append(None) kwargs['chi_angles'] = wrap_chis result = rotamer(**kwargs) if (result.is_outlier()) or (not outliers_only): self.results.append(result) out_count, out_percent = self.get_outliers_count_and_fraction() self.out_percent = out_percent * 100.0
def __init__(self, pdb_hierarchy, nontrans_only=False, out=sys.stdout, quiet=True): validation.__init__(self) self.residue_count = [0, 0] #[OMEGA_GENERAL, OMEGA_PRO] self.omega_count = [[0, 0, 0], [0, 0, 0]] #[OMEGA_GENERAL, OMEGA_PRO], then #[OMEGALYZE_TRANS, OMEGALYZE_CIS, OMEGALYZE_TWISTED] from mmtbx.validation import utils from scitbx.array_family import flex self._outlier_i_seqs = flex.size_t() pdb_atoms = pdb_hierarchy.atoms() all_i_seqs = pdb_atoms.extract_i_seq() if all_i_seqs.all_eq(0): pdb_atoms.reset_i_seq() use_segids = utils.use_segids_in_place_of_chainids( hierarchy=pdb_hierarchy) first_conf_altloc = None prev_chain_id = None for twores in generate_protein_fragments( pdb_hierarchy, length=2, geometry=None, include_non_standard_peptides=True): main_residue = twores[ 1] #this is the relevant residue for id-ing cis-Pro conf_altloc = get_conformer_altloc(twores) prevres_altloc, mainres_altloc = get_local_omega_altlocs(twores) twores_altloc = prevres_altloc or mainres_altloc #default '' evals False chain = main_residue.parent().parent() if use_segids: chain_id = utils.get_segid_as_chainid(chain=chain) else: chain_id = chain.id if chain_id != prev_chain_id: #if we've moved to a new chain... first_conf_altloc = conf_altloc #...reset reference altloc prev_chain_id = chain_id if (conf_altloc != first_conf_altloc) and twores_altloc == '': #skip non-alternate residues unless this is the first time thru a chain continue omega_atoms = get_omega_atoms(twores) #omega_atoms is the list [CA1 C1 N2 CA2], with None for missing atoms if None in omega_atoms: continue omega = get_omega(omega_atoms) if omega is None: continue omega_type = find_omega_type(omega) if omega_type == OMEGALYZE_TRANS: is_nontrans = False else: is_nontrans = True self.n_outliers += 1 if main_residue.resname == "PRO": res_type = OMEGA_PRO else: res_type = OMEGA_GENERAL self.residue_count[res_type] += 1 self.omega_count[res_type][omega_type] += 1 highest_mc_b = get_highest_mc_b(twores[0].atoms(), twores[1].atoms()) coords = get_center(main_residue) markup_atoms = [] for omega_atom in omega_atoms: markup_atoms.append( kin_atom(omega_atom.parent().id_str(), omega_atom.xyz)) result = omega_result( model_id=twores[0].parent().parent().parent().id, chain_id=chain_id, resseq=main_residue.resseq, icode=main_residue.icode, resname=main_residue.resname, altloc=mainres_altloc, prev_resseq=twores[0].resseq, prev_icode=twores[0].icode, prev_resname=twores[0].resname, prev_altloc=prevres_altloc, segid=None, omega=omega, omega_type=omega_type, res_type=res_type, is_nontrans=is_nontrans, outlier=is_nontrans, highest_mc_b=highest_mc_b, xyz=coords, markup_atoms=markup_atoms) if is_nontrans or not nontrans_only: #(not nontrans_only or is_nontrans) self.results.append(result) if is_nontrans: i_seqs = main_residue.atoms().extract_i_seq() assert (not i_seqs.all_eq(0) ) #This assert copied from ramalyze self._outlier_i_seqs.extend(i_seqs) self.results.sort(key=lambda x: x.model_id + ':' + x.id_str())
def __init__(self, pdb_hierarchy, outliers_only=False, out=sys.stdout, collect_ideal=False, quiet=False): validation.__init__(self) self._outlier_i_seqs = flex.size_t() self.beta_ideal = {} relevant_atom_names = { " CA ": None, " N ": None, " C ": None, " CB ": None } # FUTURE: set output_list = [] self.stats = group_args(n_results=0, n_weighted_results=0, n_weighted_outliers=0) from mmtbx.validation import utils use_segids = utils.use_segids_in_place_of_chainids( hierarchy=pdb_hierarchy) for model in pdb_hierarchy.models(): for chain in model.chains(): if use_segids: chain_id = utils.get_segid_as_chainid(chain=chain) else: chain_id = chain.id for rg in chain.residue_groups(): for i_cf, cf in enumerate(rg.conformers()): for i_residue, residue in enumerate(cf.residues()): if (residue.resname == "GLY"): continue is_first = (i_cf == 0) is_alt_conf = False relevant_atoms = {} for atom in residue.atoms(): if (atom.name in relevant_atom_names): relevant_atoms[atom.name] = atom if (len(atom.parent().altloc) != 0): is_alt_conf = True if ((is_first or is_alt_conf) and len(relevant_atoms) == 4): result = calculate_ideal_and_deviation( relevant_atoms=relevant_atoms, resname=residue.resname) dev = result.deviation dihedralNABB = result.dihedral betaxyz = result.ideal if (dev is None): continue resCB = relevant_atoms[" CB "] self.stats.n_results += 1 self.stats.n_weighted_results += resCB.occ if (dev >= 0.25 or outliers_only == False): if (dev >= 0.25): self.n_outliers += 1 self.stats.n_weighted_outliers += resCB.occ self._outlier_i_seqs.append(atom.i_seq) if (is_alt_conf): altchar = cf.altloc else: altchar = " " res = residue.resname.lower() sub = chain.id if (len(sub) == 1): sub = " " + sub result = cbeta(chain_id=chain_id, resname=residue.resname, resseq=residue.resseq, icode=residue.icode, altloc=altchar, xyz=resCB.xyz, occupancy=resCB.occ, deviation=dev, dihedral_NABB=dihedralNABB, ideal_xyz=betaxyz, outlier=(dev >= 0.25)) self.results.append(result) key = result.id_str() if (collect_ideal): self.beta_ideal[key] = betaxyz
def run(args): """ I suggest adding here: cctbx_project/mmtbx/validation/regression/tst_mp_geo.py test cases with just .pdb, without arguments, etc. """ master_phil = get_master_phil() import iotbx.phil input_objects = iotbx.phil.process_command_line_with_files( args=args, master_phil=master_phil, pdb_file_def="mp_geo.pdb") work_params = input_objects.work.extract() assert len(work_params.mp_geo.pdb) == 1, "Need a model file to run" file_name = work_params.mp_geo.pdb[0] out_file = None if work_params.mp_geo.out_file != None: out_file = work_params.mp_geo.out_file do_bonds_and_angles = work_params.mp_geo.bonds_and_angles do_kinemage = work_params.mp_geo.kinemage do_rna_backbone = work_params.mp_geo.rna_backbone outliers_only = work_params.mp_geo.outliers_only use_cdl = work_params.mp_geo.cdl log = StringIO() basename = os.path.basename(file_name) if out_file == None: import sys out = sys.stdout else: if do_bonds_and_angles: out = file(out_file, 'w') elif do_kinemage: out = file(out_file, 'a') elif do_rna_backbone: out = file(out_file, 'w') restraints_loading_flags = {} restraints_loading_flags["use_neutron_distances"] = False from mmtbx.validation import utils params = pdb_interpretation.master_params.extract() params.restraints_library.cdl = use_cdl params.clash_guard.nonbonded_distance_threshold = None processed_pdb_file = pdb_interpretation.process( params=params, mon_lib_srv=server.server(), ener_lib=server.ener_lib(), file_name=file_name, strict_conflict_handling=True, restraints_loading_flags=restraints_loading_flags, force_symmetry=True, substitute_non_crystallographic_unit_cell_if_necessary=True, log=log) grm = processed_pdb_file.geometry_restraints_manager() use_segids = utils.use_segids_in_place_of_chainids( hierarchy=processed_pdb_file.all_chain_proxies.pdb_hierarchy) if do_bonds_and_angles or do_kinemage: rc = get_bond_and_angle_outliers( pdb_hierarchy=processed_pdb_file.all_chain_proxies.pdb_hierarchy, xray_structure=processed_pdb_file.xray_structure(), geometry_restraints_manager=grm, use_segids=use_segids, outliers_only=outliers_only) #get chain types chain_types = {} for chain in processed_pdb_file.all_chain_proxies.\ pdb_hierarchy.models()[0].chains(): if use_segids: chain_id = utils.get_segid_as_chainid(chain=chain) else: chain_id = chain.id main_conf = chain.conformers()[0] if chain_types.get(chain_id) not in ["NA", "PROTEIN"]: if (main_conf.is_na()): chain_types[chain_id] = "NA" elif (main_conf.is_protein()): chain_types[chain_id] = "PROTEIN" else: chain_types[chain_id] = "UNK" outliers = [] #bonds #for result in rc.bonds.results: for result in sorted( rc.bonds.results, key=lambda x: (x.atoms_info[0].resseq, get_altloc(atoms_info=x.atoms_info), get_atoms_str(atoms_info=x.atoms_info))): atom_info = result.atoms_info[0] # label:chain:number:ins:alt:type:measure:value:sigmas:class atoms_str = get_atoms_str(atoms_info=result.atoms_info) altloc = get_altloc(atoms_info=result.atoms_info) chain_id = atom_info.chain_id outliers.append([ chain_id, atom_info.resseq, atom_info.icode, altloc, atom_info.resname, atoms_str, result.model, result.score, chain_types[atom_info.chain_id] ]) #angles #for result in rc.angles.results: for result in sorted( rc.angles.results, key=lambda x: (x.atoms_info[0].resseq, get_altloc(atoms_info=x.atoms_info), get_atoms_str(atoms_info=x.atoms_info))): atom_info = result.atoms_info[0] # label:chain:number:ins:alt:type:measure:value:sigmas:class atoms_str = get_atoms_str(atoms_info=result.atoms_info) altloc = get_altloc(atoms_info=result.atoms_info) chain_id = atom_info.chain_id outliers.append([ chain_id, atom_info.resseq, atom_info.icode, altloc, atom_info.resname, atoms_str, result.model, result.score, chain_types[atom_info.chain_id] ]) if do_bonds_and_angles: for outlier in outliers: print >> out, "%s:%2s:%s:%s:%s:%s:%s:%.3f:%.3f:%s" % ( basename, outlier[0], outlier[1], outlier[2], outlier[3], outlier[4], outlier[5], outlier[6], outlier[7], outlier[8]) elif do_kinemage: print >> out, rc.bonds.kinemage_header for result in rc.bonds.results: print >> out, result.as_kinemage() print >> out, rc.angles.kinemage_header for result in rc.angles.results: print >> out, result.as_kinemage() out.close() elif do_rna_backbone: from mmtbx.validation import utils rna_bb = utils.get_rna_backbone_dihedrals(processed_pdb_file) print >> out, rna_bb if out_file is not None: out.close()
def __init__ (self, pdb_hierarchy, outliers_only=False, out=sys.stdout, collect_ideal=False, quiet=False) : validation.__init__(self) self._outlier_i_seqs = flex.size_t() self.beta_ideal = {} relevant_atom_names = { " CA ": None, " N ": None, " C ": None, " CB ": None} # FUTURE: set output_list = [] from mmtbx.validation import utils use_segids = utils.use_segids_in_place_of_chainids( hierarchy=pdb_hierarchy) for model in pdb_hierarchy.models(): for chain in model.chains(): if use_segids: chain_id = utils.get_segid_as_chainid(chain=chain) else: chain_id = chain.id for rg in chain.residue_groups(): for i_cf,cf in enumerate(rg.conformers()): for i_residue,residue in enumerate(cf.residues()): if (residue.resname == "GLY") : continue is_first = (i_cf == 0) is_alt_conf = False relevant_atoms = {} for atom in residue.atoms(): if (atom.name in relevant_atom_names): relevant_atoms[atom.name] = atom if (len(atom.parent().altloc) != 0): is_alt_conf = True if ((is_first or is_alt_conf) and len(relevant_atoms) == 4): result = calculate_ideal_and_deviation( relevant_atoms=relevant_atoms, resname=residue.resname) dev = result.deviation dihedralNABB = result.dihedral betaxyz = result.ideal if (dev is None) : continue if(dev >=0.25 or outliers_only==False): if(dev >=0.25): self.n_outliers+=1 self._outlier_i_seqs.append(atom.i_seq) if (is_alt_conf): altchar = cf.altloc else: altchar = " " res=residue.resname.lower() sub=chain.id if(len(sub)==1): sub=" "+sub resCB = relevant_atoms[" CB "] result = cbeta( chain_id=chain_id, resname=residue.resname, resseq=residue.resseq, icode=residue.icode, altloc=altchar, xyz=resCB.xyz, occupancy=resCB.occ, deviation=dev, dihedral_NABB=dihedralNABB, ideal_xyz=betaxyz, outlier=(dev >= 0.25)) self.results.append(result) key = result.id_str() if (collect_ideal) : self.beta_ideal[key] = betaxyz
def __init__(self, pdb_hierarchy, outliers_only=False, out=sys.stdout, collect_ideal=False, apply_phi_psi_correction=False, display_phi_psi_correction=False, quiet=False): validation.__init__(self) self._outlier_i_seqs = flex.size_t() self.beta_ideal = {} output_list = [] self.stats = group_args(n_results=0, n_weighted_results=0, n_weighted_outliers=0) if apply_phi_psi_correction: phi_psi_angles = get_phi_psi_dict(pdb_hierarchy) new_outliers = 0 outliers_removed = 0 total_residues = 0 from mmtbx.validation import utils use_segids = utils.use_segids_in_place_of_chainids( hierarchy=pdb_hierarchy) for model in pdb_hierarchy.models(): for chain in model.chains(): if use_segids: chain_id = utils.get_segid_as_chainid(chain=chain) else: chain_id = chain.id for rg in chain.residue_groups(): for i_cf, cf in enumerate(rg.conformers()): for i_residue, residue in enumerate(cf.residues()): if (residue.resname == "GLY"): continue is_first = (i_cf == 0) is_alt_conf = False relevant_atoms = {} for atom in residue.atoms(): if (atom.name in relevant_atom_names): relevant_atoms[atom.name] = atom if (len(atom.parent().altloc) != 0): is_alt_conf = True if ((is_first or is_alt_conf) and len(relevant_atoms) == 4): result = calculate_ideal_and_deviation( relevant_atoms=relevant_atoms, resname=residue.resname) dev = result.deviation dihedralNABB = result.dihedral betaxyz = result.ideal if (dev is None): continue resCB = relevant_atoms[" CB "] self.stats.n_results += 1 self.stats.n_weighted_results += resCB.occ if (is_alt_conf): altchar = cf.altloc else: altchar = " " if apply_phi_psi_correction: total_residues += 1 id_str = '|%s:%s|' % (residue.id_str(), altchar) phi_psi = phi_psi_angles.get(id_str, None) if phi_psi: rc = cbd_utils.get_phi_psi_correction( result, residue, phi_psi, display_phi_psi_correction= display_phi_psi_correction, ) if rc: dev, dihedralNABB, start, finish = rc if start and not finish: outliers_removed += 1 elif not start and finish: new_outliers += 1 if (dev >= 0.25 or outliers_only == False): if (dev >= 0.25): self.n_outliers += 1 self.stats.n_weighted_outliers += resCB.occ self._outlier_i_seqs.append(atom.i_seq) res = residue.resname.lower() sub = chain.id if (len(sub) == 1): sub = " " + sub result = cbeta(chain_id=chain_id, resname=residue.resname, resseq=residue.resseq, icode=residue.icode, altloc=altchar, xyz=resCB.xyz, occupancy=resCB.occ, deviation=dev, dihedral_NABB=dihedralNABB, ideal_xyz=betaxyz, outlier=(dev >= 0.25)) self.results.append(result) key = result.id_str() if (collect_ideal): self.beta_ideal[key] = betaxyz if apply_phi_psi_correction: print(''' Outliers removed : %5d New outliers : %5d Num. of outliers : %5d Num. of residues : %5d ''' % ( outliers_removed, new_outliers, self.n_outliers, total_residues, ))
def __init__( self, pdb_hierarchy, data_version="8000", outliers_only=False, show_errors=False, out=sys.stdout, quiet=False ): validation.__init__(self) self.n_allowed = 0 self.n_favored = 0 from mmtbx.rotamer.sidechain_angles import SidechainAngles from mmtbx.rotamer import rotamer_eval from mmtbx.rotamer.rotamer_eval import RotamerID from mmtbx.validation import utils self.data_version = data_version # if self.data_version == "500": self.outlier_threshold = 0.01 if self.data_version == "8000": self.outlier_threshold = 0.003 else: raise ValueError("data_version given to RotamerEval not recognized (%s)." % data_version) sidechain_angles = SidechainAngles(show_errors) rotamer_evaluator = rotamer_eval.RotamerEval(data_version=data_version) rotamer_id = rotamer_eval.RotamerID() # loads in the rotamer names use_segids = utils.use_segids_in_place_of_chainids(hierarchy=pdb_hierarchy) current_rotamers = {} for model in pdb_hierarchy.models(): for chain in model.chains(): if use_segids: chain_id = utils.get_segid_as_chainid(chain=chain) else: chain_id = chain.id for rg in chain.residue_groups(): all_dict = construct_complete_sidechain(rg) for atom_group in rg.atom_groups(): coords = get_center(atom_group) resname = atom_group.resname occupancy = get_occupancy(atom_group) kwargs = { "chain_id": chain_id, "resseq": rg.resseq, "icode": rg.icode, "altloc": atom_group.altloc, "resname": resname, "xyz": coords, "occupancy": occupancy, } atom_dict = all_dict.get(atom_group.altloc) res_key = get_residue_key(atom_group=atom_group) try: chis = sidechain_angles.measureChiAngles(atom_group, atom_dict) # .get(conformer.altloc)) except AttributeError: if show_errors: kwargs["incomplete"] = True result = rotamer(**kwargs) print >> out, "%s is missing some sidechain atoms" % result.id_str() self.results.append(result) continue if chis is not None: if None in chis: continue cur_res = resname.lower().strip() if cur_res == "mse": cur_res = "met" value = rotamer_evaluator.evaluate(cur_res, chis) if value is not None: self.n_total += 1 kwargs["score"] = value * 100 wrap_chis = rotamer_id.wrap_chis(resname.strip(), chis, symmetry=False) sym_chis = wrap_chis[:] sym_chis = rotamer_id.wrap_sym(resname.strip(), sym_chis) evaluation = self.evaluateScore(value) kwargs["evaluation"] = evaluation if evaluation == "OUTLIER": kwargs["outlier"] = True kwargs["rotamer_name"] = evaluation else: kwargs["outlier"] = False kwargs["rotamer_name"] = rotamer_id.identify(resname, wrap_chis) # deal with unclassified rotamers if kwargs["rotamer_name"] == "": kwargs["rotamer_name"] = "UNCLASSIFIED" while len(wrap_chis) < 4: wrap_chis.append(None) kwargs["chi_angles"] = wrap_chis result = rotamer(**kwargs) if (result.is_outlier()) or (not outliers_only): self.results.append(result) out_count, out_percent = self.get_outliers_count_and_fraction() self.out_percent = out_percent * 100.0
def __init__ (self, pdb_hierarchy, outliers_only=False, show_errors=False, out=sys.stdout, quiet=False) : validation.__init__(self) self.n_allowed = 0 self.n_favored = 0 self.n_type = [ 0 ] * 6 from mmtbx.validation import utils import mmtbx.rotamer from mmtbx.rotamer import ramachandran_eval from scitbx.array_family import flex self._outlier_i_seqs = flex.size_t() pdb_atoms = pdb_hierarchy.atoms() all_i_seqs = pdb_atoms.extract_i_seq() if (all_i_seqs.all_eq(0)) : pdb_atoms.reset_i_seq() use_segids = utils.use_segids_in_place_of_chainids( hierarchy=pdb_hierarchy) analysis = "" output_list = [] r = ramachandran_eval.RamachandranEval() prev_rezes, next_rezes = None, None prev_resid = None cur_resseq = None next_resseq = None for model in pdb_hierarchy.models(): for chain in model.chains(): if use_segids: chain_id = utils.get_segid_as_chainid(chain=chain) else: chain_id = chain.id residues = list(chain.residue_groups()) for i, residue_group in enumerate(residues): # The reason I pass lists of atom_groups to get_phi and get_psi is to # deal with the particular issue where some residues have an A alt # conf that needs some atoms from a "" alt conf to get calculated # correctly. See 1jxt.pdb for examples. This way I can search both # the alt conf atoms and the "" atoms if necessary. prev_atom_list, next_atom_list, atom_list = None, None, None if cur_resseq is not None: prev_rezes = rezes prev_resseq = cur_resseq rezes = construct_complete_residues(residues[i]) cur_resseq = residue_group.resseq_as_int() cur_icode = residue_group.icode.strip() if (i > 0): #check for insertion codes if (cur_resseq == residues[i-1].resseq_as_int()) : if (cur_icode == '') and (residues[i-1].icode.strip() == '') : continue elif (cur_resseq != (residues[i-1].resseq_as_int())+1): continue if (i < len(residues)-1): #find next residue if residue_group.resseq_as_int() == \ residues[i+1].resseq_as_int(): if (cur_icode == '') and (residues[i+1].icode.strip() == '') : continue elif residue_group.resseq_as_int() != \ (residues[i+1].resseq_as_int())-1: continue next_rezes = construct_complete_residues(residues[i+1]) next_resid = residues[i+1].resseq_as_int() else: next_rezes = None next_resid = None for atom_group in residue_group.atom_groups(): alt_conf = atom_group.altloc if rezes is not None: atom_list = rezes.get(alt_conf) if prev_rezes is not None: prev_atom_list = prev_rezes.get(alt_conf) if (prev_atom_list is None): prev_keys = sorted(prev_rezes.keys()) prev_atom_list = prev_rezes.get(prev_keys[0]) if next_rezes is not None: next_atom_list = next_rezes.get(alt_conf) if (next_atom_list is None): next_keys = sorted(next_rezes.keys()) next_atom_list = next_rezes.get(next_keys[0]) phi = get_phi(prev_atom_list, atom_list) psi = get_psi(atom_list, next_atom_list) coords = get_center(atom_group) if (phi is not None and psi is not None): res_type = RAMA_GENERAL self.n_total += 1 if (atom_group.resname[0:3] == "GLY"): res_type = RAMA_GLYCINE elif (atom_group.resname[0:3] == "PRO"): is_cis = is_cis_peptide(prev_atom_list, atom_list) if is_cis: res_type = RAMA_CISPRO else: res_type = RAMA_TRANSPRO elif (isPrePro(residues, i)): res_type = RAMA_PREPRO elif (atom_group.resname[0:3] == "ILE" or \ atom_group.resname[0:3] == "VAL"): res_type = RAMA_ILE_VAL self.n_type[res_type] += 1 value = r.evaluate(res_types[res_type], [phi, psi]) ramaType = self.evaluateScore(res_type, value) is_outlier = ramaType == RAMALYZE_OUTLIER c_alphas = None # XXX only save kinemage data for outliers if is_outlier : c_alphas = [] for atoms in [prev_atom_list, atom_list, next_atom_list] : for a in atoms : if (a.name.strip() == "CA") : a_ = atom(pdb_atom=a) c_alphas.append(c_alpha( id_str=a_.atom_group_id_str(), xyz=a_.xyz)) assert (len(c_alphas) == 3) result = ramachandran( chain_id=chain_id, resseq=residue_group.resseq, icode=residue_group.icode, resname=atom_group.resname, altloc=atom_group.altloc, segid=None, # XXX ??? phi=phi, psi=psi, rama_type=ramaType, res_type=res_type, score=value*100, outlier=is_outlier, xyz=coords, c_alphas=c_alphas) if (not outliers_only or is_outlier) : self.results.append(result) if is_outlier : i_seqs = atom_group.atoms().extract_i_seq() assert (not i_seqs.all_eq(0)) self._outlier_i_seqs.extend(i_seqs) out_count, out_percent = self.get_outliers_count_and_fraction() fav_count, fav_percent = self.get_favored_count_and_fraction() self.out_percent = out_percent * 100.0 self.fav_percent = fav_percent * 100.0