def __init__ (self, fmodel, pdb_hierarchy, cc_min=0.8, molprobity_map_params=None) : from iotbx.pdb.amino_acid_codes import one_letter_given_three_letter from mmtbx import real_space_correlation validation.__init__(self) # arrays for different components self.everything = list() self.protein = list() self.other = list() self.water = list() aa_codes = one_letter_given_three_letter.keys() # redo real_space_corelation.simple to use map objects instead of filenames try : rsc_params = real_space_correlation.master_params().extract() rsc_params.detail="residue" rsc_params.map_1.fill_missing_reflections = False rsc_params.map_2.fill_missing_reflections = False if (molprobity_map_params is not None): rsc_params.map_file_name = molprobity_map_params.map_file_name rsc_params.map_coefficients_file_name = \ molprobity_map_params.map_coefficients_file_name rsc_params.map_coefficients_label = \ molprobity_map_params.map_coefficients_label rsc = real_space_correlation.simple( fmodel=fmodel, pdb_hierarchy=pdb_hierarchy, params=rsc_params, log=null_out()) except Exception, e : raise
def __init__ (self, fmodel, pdb_hierarchy, cc_min=0.8) : from iotbx.pdb.amino_acid_codes import one_letter_given_three_letter from mmtbx import real_space_correlation validation.__init__(self) # arrays for different components self.everything = list() self.protein = list() self.other = list() self.water = list() aa_codes = one_letter_given_three_letter.keys() try : rsc_params = real_space_correlation.master_params().extract() rsc_params.detail="residue" rsc_params.map_1.fill_missing_reflections = False rsc_params.map_2.fill_missing_reflections = False rsc = real_space_correlation.simple( fmodel=fmodel, pdb_hierarchy=pdb_hierarchy, params=rsc_params, log=null_out()) except Exception, e : raise "Error: %s" % str(e)
def __init__ (self, fmodel, pdb_hierarchy, cc_min=0.8, molprobity_map_params=None) : from iotbx.pdb.amino_acid_codes import one_letter_given_three_letter from mmtbx import real_space_correlation validation.__init__(self) # arrays for different components self.everything = list() self.protein = list() self.other = list() self.water = list() aa_codes = one_letter_given_three_letter.keys() # redo real_space_corelation.simple to use map objects instead of filenames try : rsc_params = real_space_correlation.master_params().extract() rsc_params.detail="residue" rsc_params.map_1.fill_missing_reflections = False rsc_params.map_2.fill_missing_reflections = False if (molprobity_map_params is not None): rsc_params.map_file_name = molprobity_map_params.map_file_name rsc_params.map_coefficients_file_name = \ molprobity_map_params.map_coefficients_file_name rsc_params.map_coefficients_label = \ molprobity_map_params.map_coefficients_label rsc = real_space_correlation.simple( fmodel=fmodel, pdb_hierarchy=pdb_hierarchy, params=rsc_params, log=null_out()) except Exception, e : raise e
def __init__(self, pdb_hierarchy): re_flip = re.compile(":FLIP") validation.__init__(self) in_lines = pdb_hierarchy.as_pdb_string() reduce_out = run_reduce_with_timeout( parameters="-BUILD -", stdin_lines=in_lines) check_and_report_reduce_failure( fb_object=reduce_out, input_lines=in_lines, output_fname="reduce_fail.pdb") from mmtbx.validation import utils use_segids = utils.use_segids_in_place_of_chainids( hierarchy=pdb_hierarchy) for line in reduce_out.stdout_lines: #chain format (2-char chain) #USER MOD Set 1.1: B 49 GLN :FLIP amide:sc= -2.7! C(o=-5.8!,f=-1.3!) #segid format (4-char segid) #USER MOD Set 1.1:B 49 GLN :FLIP amide:sc= -2.7! C(o=-5.8!,f=-1.3!) if re_flip.search(line): resid = line.split(":")[1] #reduce has slightly different outputs using chains versus segid if len(resid) == 15: #chain chain_id = resid[0:2].strip() segid = None if (len(chain_id) == 0): chain_id = ' ' resid_less_chain = resid[2:] elif len(resid) == 17: #segid #self.results = [] #return chain_id = None segid = resid[0:4].strip() #chain_id = resid[0:4].strip() resid_less_chain = resid[4:] else: raise Sorry("unexpected length of residue identifier in reduce USER MODs.") resname = resid_less_chain[5:8] assert (resname in ["ASN", "GLN", "HIS"]) flip = nqh_flip( chain_id=chain_id, segid=segid, resseq=resid_less_chain[0:4].strip(), icode= resid_less_chain[4:5], altloc=resid_less_chain[12:13], resname=resname, outlier=True) flip.set_coordinates_from_hierarchy(pdb_hierarchy) self.results.append(flip) self.n_outliers += 1
def __init__ (self, pdb_hierarchy) : re_flip = re.compile(":FLIP") validation.__init__(self) reduce_out = easy_run.fully_buffered("phenix.reduce -BUILD -", stdin_lines=pdb_hierarchy.as_pdb_string()) for line in reduce_out.stderr_lines : #orientation 4: A 68 HIS :FLIP no HD1: bump=-0.607, HB=0.998, total=0.390 if re_flip.search(line) : resname = line[22:25] assert (resname in ["ASN", "GLN", "HIS"]) flip = nqh_flip( chain_id=line[15:17].strip(), resseq=line[17:21].strip(), icode=line[21], altloc=line[29], resname=resname) flip.set_coordinates_from_hierarchy(pdb_hierarchy) self.results.append(flip) self.n_outliers += 1
def __init__(self, pdb_atoms, sites_cart, energies_sites, restraint_proxies, unit_cell, ignore_hd=True, sigma_cutoff=4.0, outliers_only=True, use_segids_in_place_of_chainids=False): validation.__init__(self) self.z_min = self.z_max = self.z_mean = None deviations_method = getattr(energies_sites, "%s_deviations" % self.restraint_type) self.min, self.max, self.mean = deviations_method() target = getattr(energies_sites, "%s_residual_sum" % self.restraint_type) self.n_total = getattr(energies_sites, "n_%s_proxies" % self.restraint_type) if (self.n_total > 0): self.target = target / self.n_total else: self.target = 0 deviations_z_method = getattr(energies_sites, "%s_deviations_z" % self.restraint_type, None) if (deviations_z_method is not None): deviations_z = deviations_z_method() self.z_min, self.z_max, self.z_mean = deviations_z_method() self.results = sorted( self.get_outliers( proxies=restraint_proxies, unit_cell=unit_cell, sites_cart=sites_cart, pdb_atoms=pdb_atoms, sigma_cutoff=sigma_cutoff, outliers_only=outliers_only, use_segids_in_place_of_chainids=use_segids_in_place_of_chainids )) self.n_outliers = len(self.results)
def __init__ (self, pdb_atoms, sites_cart, energies_sites, restraint_proxies, unit_cell, ignore_hd=True, sigma_cutoff=4.0, outliers_only=True, use_segids_in_place_of_chainids=False) : validation.__init__(self) self.z_min = self.z_max = self.z_mean = None deviations_method = getattr(energies_sites, "%s_deviations" % self.restraint_type) self.min, self.max, self.mean = deviations_method() target = getattr(energies_sites, "%s_residual_sum" % self.restraint_type) self.n_total = getattr(energies_sites, "n_%s_proxies" % self.restraint_type) if (self.n_total > 0) : self.target = target / self.n_total else : self.target = 0 deviations_z_method = getattr(energies_sites, "%s_deviations_z" % self.restraint_type, None) if (deviations_z_method is not None) : deviations_z = deviations_z_method() self.z_min, self.z_max, self.z_mean = deviations_z_method() self.results = sorted(self.get_outliers( proxies=restraint_proxies, unit_cell=unit_cell, sites_cart=sites_cart, pdb_atoms=pdb_atoms, sigma_cutoff=sigma_cutoff, outliers_only=outliers_only, use_segids_in_place_of_chainids=use_segids_in_place_of_chainids)) self.n_outliers = len(self.results)
def __init__(self, pdb_hierarchy): re_flip = re.compile(":FLIP") validation.__init__(self) reduce_out = easy_run.fully_buffered( "phenix.reduce -BUILD -", stdin_lines=pdb_hierarchy.as_pdb_string()) for line in reduce_out.stdout_lines: #USER MOD Set 1.1: B 49 GLN :FLIP amide:sc= -2.7! C(o=-5.8!,f=-1.3!) if re_flip.search(line): resid = line.split(":")[1] chain_id = resid[0:2].strip() if (len(chain_id) == 0): chain_id = ' ' resname = resid[7:10] assert (resname in ["ASN", "GLN", "HIS"]) flip = nqh_flip(chain_id=chain_id, resseq=resid[2:6].strip(), icode=resid[6:7], altloc=resid[14:15], resname=resname, outlier=True) flip.set_coordinates_from_hierarchy(pdb_hierarchy) self.results.append(flip) self.n_outliers += 1
def __init__ (self, pdb_hierarchy) : re_flip = re.compile(":FLIP") validation.__init__(self) reduce_out = easy_run.fully_buffered("phenix.reduce -BUILD -", stdin_lines=pdb_hierarchy.as_pdb_string()) for line in reduce_out.stdout_lines: #USER MOD Set 1.1: B 49 GLN :FLIP amide:sc= -2.7! C(o=-5.8!,f=-1.3!) if re_flip.search(line) : resid = line.split(":")[1] chain_id = resid[0:2].strip() if (len(chain_id) == 0): chain_id = ' ' resname = resid[7:10] assert (resname in ["ASN", "GLN", "HIS"]) flip = nqh_flip( chain_id=chain_id, resseq=resid[2:6].strip(), icode=resid[6:7], altloc=resid[14:15], resname=resname, outlier=True) flip.set_coordinates_from_hierarchy(pdb_hierarchy) self.results.append(flip) self.n_outliers += 1
def __init__(self, pdb_hierarchy, xray_structure, ignore_hd=True, collect_outliers=True): for name in self.__slots__: setattr(self, name, None) validation.__init__(self) assert len(xray_structure.scatterers()) != 0 from cctbx import adptbx from scitbx.array_family import flex xrs = xray_structure self.n_total = xrs.scatterers().size() # always include H/D self.results = None pdb_atoms = pdb_hierarchy.atoms() pdb_atoms.reset_i_seq() hd_selection = xrs.hd_selection() subtract_hd = True self.n_all = hd_selection.size() self.n_hd = hd_selection.count(True) if (ignore_hd) and (0 < self.n_hd < self.n_all): xrs = xrs.select(~hd_selection) subtract_hd = False u_isos = xrs.extract_u_iso_or_u_equiv() occ = xrs.scatterers().extract_occupancies() self.n_atoms = xrs.scatterers().size() self.n_non_hd = self.n_all - self.n_hd self.n_aniso = xrs.use_u_aniso().count(True) self.n_aniso_h = (xray_structure.use_u_aniso() & hd_selection).count(True) self.n_npd = xrs.is_positive_definite_u().count(False) self.n_zero_b = (u_isos == 0).count(True) self.n_zero_occ = (occ == 0).count(True) u_cutoff_high = sys.maxsize u_cutoff_low = 0 u_non_zero = u_isos.select(u_isos > 0) if (len(u_non_zero) > 1): mv = flex.mean_and_variance(u_non_zero) sigma = mv.unweighted_sample_standard_deviation() u_cutoff_high = mv.mean() + (4.0 * sigma) u_cutoff_low = mv.mean() - (4.0 * sigma) self.b_mean = adptbx.u_as_b(flex.mean(u_isos)) self.b_min = adptbx.u_as_b(flex.min(u_isos)) self.b_max = adptbx.u_as_b(flex.max(u_isos)) self.o_mean = flex.mean(occ) self.o_min = flex.min(occ) self.o_max = flex.max(occ) self.n_outliers = self.n_aniso_h + self.n_npd self.zero_occ = [] self.partial_occ = [] self.different_occ = [] self.bad_adps = [] self.b_histogram = None # TODO def is_u_iso_outlier(u): return (u < u_cutoff_low) or (u > u_cutoff_high) or (u <= 0) # these statistics cover all atoms! occupancies = xray_structure.scatterers().extract_occupancies() u_isos = xray_structure.extract_u_iso_or_u_equiv() collected = flex.bool(occupancies.size(), False) if (collect_outliers): for i_seq, occ in enumerate(occupancies): if (hd_selection[i_seq] and ignore_hd) or collected[i_seq]: continue pdb_atom = pdb_atoms[i_seq] parent = pdb_atom.parent() if (occ <= 0): group_atoms = parent.atoms() labels = pdb_atom.fetch_labels() if (len(group_atoms) > 1) and (group_atoms.extract_occ().all_eq(0)): i_seqs = group_atoms.extract_i_seq() b_mean = adptbx.u_as_b(flex.mean( u_isos.select(i_seqs))) outlier = residue_occupancy( chain_id=labels.chain_id, resseq=labels.resseq, icode=labels.icode, altloc=labels.altloc, resname=labels.resname, occupancy=occ, outlier=True, xyz=group_atoms.extract_xyz().mean(), b_iso=b_mean) self.zero_occ.append(outlier) self.n_outliers += 1 collected.set_selected(i_seqs, True) else: assert (pdb_atom.occ == occ), "%s: %s <--> %s" % ( pdb_atom.id_str(), pdb_atom.occ, occ) outlier = atom_occupancy(pdb_atom=pdb_atom, occupancy=occ, b_iso=adptbx.u_as_b( u_isos[i_seq]), xyz=pdb_atom.xyz, outlier=True) self.zero_occ.append(outlier) self.n_outliers += 1 elif is_u_iso_outlier(u_isos[i_seq]): # zero displacements will always be recorded on a per-atom basis if (u_isos[i_seq] <= 0): outlier = atom_bfactor(pdb_atom=pdb_atom, occupancy=occ, b_iso=adptbx.u_as_b( u_isos[i_seq]), xyz=pdb_atom.xyz, outlier=True) self.bad_adps.append(outlier) self.n_outliers += 1 else: # if the average displacement for the entire residue falls outside # the cutoffs, save as a single residue outlier group_atoms = parent.atoms() i_seqs = group_atoms.extract_i_seq() u_mean = flex.mean(u_isos.select(i_seqs)) if is_u_iso_outlier(u_mean): labels = pdb_atom.fetch_labels() outlier = residue_bfactor( chain_id=labels.chain_id, resseq=labels.resseq, icode=labels.icode, altloc=labels.altloc, resname=labels.resname, occupancy=occ, outlier=True, xyz=group_atoms.extract_xyz().mean(), b_iso=adptbx.u_as_b(u_mean)) self.bad_adps.append(outlier) self.n_outliers += 1 collected.set_selected(i_seqs, True) # otherwise, just save this atom else: outlier = atom_bfactor(pdb_atom=pdb_atom, occupancy=occ, b_iso=adptbx.u_as_b( u_isos[i_seq]), xyz=pdb_atom.xyz, outlier=True) self.bad_adps.append(outlier) self.n_outliers += 1 # analyze occupancies for first model model = pdb_hierarchy.models()[0] for chain in model.chains(): residue_groups = chain.residue_groups() for residue_group in chain.residue_groups(): # get unique set of atom names atom_names = set() for atom in residue_group.atoms(): atom_names.add(atom.name.strip()) # check total occupancy for each atom for name in atom_names: occupancy = 0.0 atoms = list() for atom_group in residue_group.atom_groups(): atom = atom_group.get_atom(name) if (atom is not None): occupancy += atom.occ atoms.append(atom) if (not approx_equal( occupancy, 1.0, out=None, eps=1.0e-3)): for atom in atoms: outlier = atom_occupancy(pdb_atom=atom, occupancy=atom.occ, b_iso=adptbx.u_as_b( atom.b), xyz=atom.xyz, outlier=True) self.partial_occ.append(outlier) self.n_outliers += 1 # check that atoms in an atom group have the same occupancy for atom_group in residue_group.atom_groups(): residue_is_okay = True base_occupancy = atom_group.atoms()[0].occ for atom in atom_group.atoms(): if (not approx_equal( base_occupancy, atom.occ, out=None, eps=1.0e-3)): labels = atom.fetch_labels() i_seqs = atom_group.atoms().extract_i_seq() b_mean = adptbx.u_as_b( flex.mean(u_isos.select(i_seqs))) outlier = residue_occupancy( chain_id=labels.chain_id, resseq=labels.resseq, icode=labels.icode, altloc=labels.altloc, resname=labels.resname, occupancy=occ, outlier=True, xyz=atom_group.atoms().extract_xyz().mean( ), b_iso=b_mean) self.different_occ.append(outlier) self.n_outliers += 1 residue_is_okay = False break if (not residue_is_okay): break
def __init__(self, pdb_hierarchy, nontrans_only=False, out=sys.stdout, quiet=True): validation.__init__(self) self.residue_count = [0, 0] #[OMEGA_GENERAL, OMEGA_PRO] self.omega_count = [[0,0,0], [0,0,0]] #[OMEGA_GENERAL, OMEGA_PRO], then #[OMEGALYZE_TRANS, OMEGALYZE_CIS, OMEGALYZE_TWISTED] from mmtbx.validation import utils from scitbx.array_family import flex self._outlier_i_seqs = flex.size_t() pdb_atoms = pdb_hierarchy.atoms() all_i_seqs = pdb_atoms.extract_i_seq() if all_i_seqs.all_eq(0): pdb_atoms.reset_i_seq() use_segids = utils.use_segids_in_place_of_chainids( hierarchy=pdb_hierarchy) prev_rezes, next_rezes = None, None prev_resid = None cur_resseq = None next_resseq = None for model in pdb_hierarchy.models(): for chain in model.chains(): prev_rezes, next_rezes = None, None prev_resid = None cur_resseq = None next_resseq = None if use_segids: chain_id = utils.get_segid_as_chainid(chain=chain) else: chain_id = chain.id residues = list(chain.residue_groups()) for i, residue_group in enumerate(residues): # The reason I pass lists of atom_groups to get_phi and get_psi is to # deal with the particular issue where some residues have an A alt # conf that needs some atoms from a "" alt conf to get calculated # correctly. See 1jxt.pdb for examples. This way I can search both # the alt conf atoms and the "" atoms if necessary. prev_atom_list, next_atom_list, atom_list = None, None, None if cur_resseq is not None: prev_rezes = rezes prev_resseq = cur_resseq rezes = construct_residues(residues[i]) cur_resseq = residue_group.resseq_as_int() cur_icode = residue_group.icode.strip() if (i > 0): #check for insertion codes if (cur_resseq == residues[i-1].resseq_as_int()) : if (cur_icode == '') and (residues[i-1].icode.strip() == '') : continue elif (cur_resseq != (residues[i-1].resseq_as_int())+1): continue for atom_group in residue_group.atom_groups(): alt_conf = atom_group.altloc if rezes is not None: atom_list = rezes.get(alt_conf) if prev_rezes is not None: prev_atom_list = prev_rezes.get(alt_conf) if (prev_atom_list is None): prev_keys = sorted(prev_rezes.keys()) prev_atom_list = prev_rezes.get(prev_keys[0]) omega=get_omega(prev_atom_list, atom_list) highest_mc_b = get_highest_mc_b(prev_atom_list, atom_list) if omega is not None: resname = atom_group.resname[0:3] coords = get_center(atom_group) if resname == "PRO": res_type = OMEGA_PRO else: res_type = OMEGA_GENERAL self.residue_count[res_type] += 1 omega_type = find_omega_type(omega) is_nontrans = False if omega_type == OMEGALYZE_CIS or omega_type == OMEGALYZE_TWISTED: self.n_outliers += 1 is_nontrans = True self.omega_count[res_type][omega_type] += 1 markup_atoms = [None, None, None, None] #for kinemage markup if is_nontrans: for a in prev_atom_list: if a is None: continue a_ = atom(pdb_atom=a) if a.name.strip() == "CA": markup_atoms[0] = kin_atom( id_str=a_.atom_group_id_str(),xyz=a_.xyz) elif a.name.strip() == "C": markup_atoms[1] = kin_atom( id_str=a_.atom_group_id_str(),xyz=a_.xyz) for a in atom_list: if a is None: continue a_ = atom(pdb_atom=a) if a.name.strip() == "N": markup_atoms[2] = kin_atom( id_str=a_.atom_group_id_str(),xyz=a_.xyz) elif a.name.strip() == "CA": markup_atoms[3] = kin_atom( id_str=a_.atom_group_id_str(),xyz=a_.xyz) #------------ #prevres=residues[i-1] #find prev res identities for printing prev_alts = [] prev_resnames = {} for ag in residues[i-1].atom_groups(): prev_alts.append(ag.altloc) prev_resnames[ag.altloc] = ag.resname if alt_conf in prev_alts: prev_altloc = alt_conf else: if len(prev_alts) > 1: prev_altloc = prev_alts[1] else: prev_altloc = prev_alts[0] prev_resname = prev_resnames[prev_altloc] #done finding prev res identities result = omega_result( chain_id=chain_id, resseq=residue_group.resseq, icode=residue_group.icode, resname=atom_group.resname, altloc=atom_group.altloc, prev_resseq=residues[i-1].resseq, prev_icode=residues[i-1].icode, prev_resname=prev_resname, prev_altloc=prev_altloc, segid=None, omega=omega, omega_type=omega_type, res_type=res_type, is_nontrans=is_nontrans, outlier=is_nontrans, highest_mc_b=highest_mc_b, xyz=coords, markup_atoms=markup_atoms) if is_nontrans or not nontrans_only: #(not nontrans_only or is_nontrans) self.results.append(result) if is_nontrans: i_seqs = atom_group.atoms().extract_i_seq() assert (not i_seqs.all_eq(0)) #This assert copied from ramalyze self._outlier_i_seqs.extend(i_seqs)
def __init__(self, pdb_hierarchy, data_version="8000", outliers_only=False, show_errors=False, out=sys.stdout, quiet=False): validation.__init__(self) self.n_allowed = 0 self.n_favored = 0 from mmtbx.rotamer.sidechain_angles import SidechainAngles from mmtbx.rotamer import rotamer_eval from mmtbx.rotamer.rotamer_eval import RotamerID from mmtbx.validation import utils self.data_version = data_version # if self.data_version == "500": self.outlier_threshold = 0.01 if self.data_version == "8000": self.outlier_threshold = OUTLIER_THRESHOLD else: raise ValueError( "data_version given to RotamerEval not recognized (%s)." % data_version) sidechain_angles = SidechainAngles(show_errors) rotamer_evaluator = rotamer_eval.RotamerEval( data_version=data_version) rotamer_id = rotamer_eval.RotamerID() # loads in the rotamer names use_segids = utils.use_segids_in_place_of_chainids( hierarchy=pdb_hierarchy) current_rotamers = {} for model in pdb_hierarchy.models(): for chain in model.chains(): if use_segids: chain_id = utils.get_segid_as_chainid(chain=chain) else: chain_id = chain.id for rg in chain.residue_groups(): all_dict = construct_complete_sidechain(rg) for atom_group in rg.atom_groups(): coords = get_center(atom_group) resname = atom_group.resname occupancy = get_occupancy(atom_group) kwargs = { "chain_id" : chain_id, "resseq" : rg.resseq, "icode" : rg.icode, "altloc" : atom_group.altloc, "resname" : resname, "xyz" : coords, "occupancy" : occupancy, } atom_dict = all_dict.get(atom_group.altloc) res_key = get_residue_key(atom_group=atom_group) try: chis = sidechain_angles.measureChiAngles( atom_group, atom_dict)#.get(conformer.altloc)) except AttributeError: if show_errors: kwargs['incomplete'] = True result = rotamer(**kwargs) print >> out, '%s is missing some sidechain atoms' % \ result.id_str() self.results.append(result) continue if (chis is not None): if None in chis: continue cur_res = resname.lower().strip() if cur_res == 'mse': cur_res = 'met' value = rotamer_evaluator.evaluate(cur_res, chis) if value is not None: self.n_total += 1 kwargs['score'] = value * 100 wrap_chis = rotamer_id.wrap_chis(resname.strip(), chis, symmetry=False) sym_chis = wrap_chis[:] sym_chis = rotamer_id.wrap_sym(resname.strip(), sym_chis) evaluation = self.evaluateScore(value) kwargs['evaluation'] = evaluation if evaluation == "OUTLIER": kwargs['outlier'] = True kwargs['rotamer_name'] = evaluation else: kwargs['outlier'] = False kwargs['rotamer_name'] = rotamer_id.identify(resname, wrap_chis) #deal with unclassified rotamers if kwargs['rotamer_name'] == '': kwargs['rotamer_name'] = "UNCLASSIFIED" while (len(wrap_chis) < 4): wrap_chis.append(None) kwargs['chi_angles'] = wrap_chis result = rotamer(**kwargs) if (result.is_outlier()) or (not outliers_only): self.results.append(result) out_count, out_percent = self.get_outliers_count_and_fraction() self.out_percent = out_percent * 100.0
def __init__(self, pdb_hierarchy, outliers_only=False, out=sys.stdout, collect_ideal=False, quiet=False): validation.__init__(self) self._outlier_i_seqs = flex.size_t() self.beta_ideal = {} relevant_atom_names = { " CA ": None, " N ": None, " C ": None, " CB ": None } # FUTURE: set output_list = [] self.stats = group_args(n_results=0, n_weighted_results=0, n_weighted_outliers=0) from mmtbx.validation import utils use_segids = utils.use_segids_in_place_of_chainids( hierarchy=pdb_hierarchy) for model in pdb_hierarchy.models(): for chain in model.chains(): if use_segids: chain_id = utils.get_segid_as_chainid(chain=chain) else: chain_id = chain.id for rg in chain.residue_groups(): for i_cf, cf in enumerate(rg.conformers()): for i_residue, residue in enumerate(cf.residues()): if (residue.resname == "GLY"): continue is_first = (i_cf == 0) is_alt_conf = False relevant_atoms = {} for atom in residue.atoms(): if (atom.name in relevant_atom_names): relevant_atoms[atom.name] = atom if (len(atom.parent().altloc) != 0): is_alt_conf = True if ((is_first or is_alt_conf) and len(relevant_atoms) == 4): result = calculate_ideal_and_deviation( relevant_atoms=relevant_atoms, resname=residue.resname) dev = result.deviation dihedralNABB = result.dihedral betaxyz = result.ideal if (dev is None): continue resCB = relevant_atoms[" CB "] self.stats.n_results += 1 self.stats.n_weighted_results += resCB.occ if (dev >= 0.25 or outliers_only == False): if (dev >= 0.25): self.n_outliers += 1 self.stats.n_weighted_outliers += resCB.occ self._outlier_i_seqs.append(atom.i_seq) if (is_alt_conf): altchar = cf.altloc else: altchar = " " res = residue.resname.lower() sub = chain.id if (len(sub) == 1): sub = " " + sub result = cbeta(chain_id=chain_id, resname=residue.resname, resseq=residue.resseq, icode=residue.icode, altloc=altchar, xyz=resCB.xyz, occupancy=resCB.occ, deviation=dev, dihedral_NABB=dihedralNABB, ideal_xyz=betaxyz, outlier=(dev >= 0.25)) self.results.append(result) key = result.id_str() if (collect_ideal): self.beta_ideal[key] = betaxyz
def __init__ (self, pdb_hierarchy, xray_structure, fmodel, distance_cutoff=4.0, collect_all=True) : validation.__init__(self) from mmtbx.real_space_correlation import extract_map_stats_for_single_atoms from cctbx import adptbx from scitbx.matrix import col self.n_bad = 0 self.n_heavy = 0 pdb_atoms = pdb_hierarchy.atoms() if(len(pdb_atoms)>1): assert (not pdb_atoms.extract_i_seq().all_eq(0)) unit_cell = xray_structure.unit_cell() pair_asu_table = xray_structure.pair_asu_table( distance_cutoff = distance_cutoff) asu_mappings = pair_asu_table.asu_mappings() asu_table = pair_asu_table.table() u_isos = xray_structure.extract_u_iso_or_u_equiv() occupancies = xray_structure.scatterers().extract_occupancies() sites_cart = xray_structure.sites_cart() sites_frac = xray_structure.sites_frac() sel_cache = pdb_hierarchy.atom_selection_cache() water_sel = sel_cache.selection("resname HOH and name O") map_stats = extract_map_stats_for_single_atoms( pdb_atoms=pdb_atoms, xray_structure=xray_structure, fmodel=fmodel, selection=water_sel) waters = [] for i_seq, atom in enumerate(pdb_atoms) : if (water_sel[i_seq]) : rt_mx_i_inv = asu_mappings.get_rt_mx(i_seq, 0).inverse() self.n_total += 1 asu_dict = asu_table[i_seq] nearest_atom = nearest_contact = None for j_seq, j_sym_groups in asu_dict.items() : atom_j = pdb_atoms[j_seq] site_j = sites_frac[j_seq] # Filter out hydrogens if atom_j.element.upper().strip() in ["H", "D"]: continue for j_sym_group in j_sym_groups: rt_mx = rt_mx_i_inv.multiply(asu_mappings.get_rt_mx(j_seq, j_sym_group[0])) site_ji = rt_mx * site_j site_ji_cart = xray_structure.unit_cell().orthogonalize(site_ji) vec_i = col(atom.xyz) vec_ji = col(site_ji_cart) dxyz = abs(vec_i - vec_ji) if (nearest_contact is None) or (dxyz < nearest_contact) : nearest_contact = dxyz nearest_atom = atom_info(pdb_atom=atom_j, symop=rt_mx) w = water( pdb_atom=atom, b_iso=adptbx.u_as_b(u_isos[i_seq]), occupancy=occupancies[i_seq], nearest_contact=nearest_contact, nearest_atom=nearest_atom, score=map_stats.two_fofc_ccs[i_seq], fmodel=map_stats.fmodel_values[i_seq], two_fofc=map_stats.two_fofc_values[i_seq], fofc=map_stats.fofc_values[i_seq], anom=map_stats.anom_values[i_seq], n_hbonds=None) # TODO if (w.is_bad_water()) : w.outlier = True self.n_bad += 1 elif (w.is_heavy_atom()) : w.outlier = True self.n_heavy += 1 if (w.outlier) or (collect_all) : self.results.append(w) self.n_outliers = len(self.results)
def __init__ (self, pdb_hierarchy, outliers_only=False, show_errors=False, out=sys.stdout, quiet=False) : # Optimization hint: make it possible to pass # ramachandran_eval.RamachandranEval() from outside. # Better - convert this to using mmtbx.model.manager where # RamachandranEval is already available. validation.__init__(self) self.n_allowed = 0 self.n_favored = 0 self.n_type = [ 0 ] * 6 self._outlier_i_seqs = flex.size_t() pdb_atoms = pdb_hierarchy.atoms() all_i_seqs = pdb_atoms.extract_i_seq() if (all_i_seqs.all_eq(0)) : pdb_atoms.reset_i_seq() use_segids = utils.use_segids_in_place_of_chainids( hierarchy=pdb_hierarchy) analysis = "" output_list = [] count_keys = [] uniqueness_keys = [] r = ramachandran_eval.RamachandranEval() ##if use_segids: ## chain_id = utils.get_segid_as_chainid(chain=chain) ## else: ## chain_id = chain.id for three in generate_protein_threes(hierarchy=pdb_hierarchy, geometry=None): main_residue = three[1] phi_psi_atoms = three.get_phi_psi_atoms() if phi_psi_atoms is None: continue phi_atoms, psi_atoms = phi_psi_atoms phi = get_dihedral(phi_atoms) psi = get_dihedral(psi_atoms) coords = get_center(main_residue) #should find the CA of the center residue if (phi is not None and psi is not None): res_type = RAMA_GENERAL #self.n_total += 1 if (main_residue.resname[0:3] == "GLY"): res_type = RAMA_GLYCINE elif (main_residue.resname[0:3] == "PRO"): is_cis = is_cis_peptide(three) if is_cis: res_type = RAMA_CISPRO else: res_type = RAMA_TRANSPRO elif (three[2].resname == "PRO"): res_type = RAMA_PREPRO elif (main_residue.resname[0:3] == "ILE" or \ main_residue.resname[0:3] == "VAL"): res_type = RAMA_ILE_VAL #self.n_type[res_type] += 1 value = r.evaluate(res_types[res_type], [phi, psi]) ramaType = self.evaluateScore(res_type, value) is_outlier = ramaType == RAMALYZE_OUTLIER c_alphas = None # XXX only save kinemage data for outliers if is_outlier : c_alphas = get_cas_from_three(three) assert (len(c_alphas) == 3) markup = self.as_markup_for_kinemage(c_alphas) else: markup = None result = ramachandran( model_id=main_residue.parent().parent().parent().id, chain_id=main_residue.parent().parent().id, resseq=main_residue.resseq, icode=main_residue.icode, resname=main_residue.resname, #altloc=main_residue.parent().altloc, altloc=get_altloc_from_three(three), segid=None, # XXX ??? phi=phi, psi=psi, rama_type=ramaType, res_type=res_type, score=value*100, outlier=is_outlier, xyz=coords, markup=markup) #if result.chain_id+result.resseq+result.icode not in count_keys: result_key = result.model_id+result.chain_id+result.resseq+result.icode if result.altloc in ['','A'] and result_key not in count_keys: self.n_total += 1 self.n_type[res_type] += 1 self.add_to_validation_counts(ramaType) count_keys.append(result_key) if (not outliers_only or is_outlier) : if (result.altloc != '' or result_key not in uniqueness_keys): #the threes/conformers method results in some redundant result # calculations in structures with alternates. Using the # uniqueness_keys list prevents redundant results being added to # the final list self.results.append(result) uniqueness_keys.append(result_key) if is_outlier : i_seqs = main_residue.atoms().extract_i_seq() assert (not i_seqs.all_eq(0)) self._outlier_i_seqs.extend(i_seqs) self.results.sort(key=lambda r: r.model_id+r.id_str()) out_count, out_percent = self.get_outliers_count_and_fraction() fav_count, fav_percent = self.get_favored_count_and_fraction() self.out_percent = out_percent * 100.0 self.fav_percent = fav_percent * 100.0
def __init__( self, pdb_hierarchy, data_version="8000", outliers_only=False, show_errors=False, out=sys.stdout, quiet=False ): validation.__init__(self) self.n_allowed = 0 self.n_favored = 0 from mmtbx.rotamer.sidechain_angles import SidechainAngles from mmtbx.rotamer import rotamer_eval from mmtbx.rotamer.rotamer_eval import RotamerID from mmtbx.validation import utils self.data_version = data_version # if self.data_version == "500": self.outlier_threshold = 0.01 if self.data_version == "8000": self.outlier_threshold = 0.003 else: raise ValueError("data_version given to RotamerEval not recognized (%s)." % data_version) sidechain_angles = SidechainAngles(show_errors) rotamer_evaluator = rotamer_eval.RotamerEval(data_version=data_version) rotamer_id = rotamer_eval.RotamerID() # loads in the rotamer names use_segids = utils.use_segids_in_place_of_chainids(hierarchy=pdb_hierarchy) current_rotamers = {} for model in pdb_hierarchy.models(): for chain in model.chains(): if use_segids: chain_id = utils.get_segid_as_chainid(chain=chain) else: chain_id = chain.id for rg in chain.residue_groups(): all_dict = construct_complete_sidechain(rg) for atom_group in rg.atom_groups(): coords = get_center(atom_group) resname = atom_group.resname occupancy = get_occupancy(atom_group) kwargs = { "chain_id": chain_id, "resseq": rg.resseq, "icode": rg.icode, "altloc": atom_group.altloc, "resname": resname, "xyz": coords, "occupancy": occupancy, } atom_dict = all_dict.get(atom_group.altloc) res_key = get_residue_key(atom_group=atom_group) try: chis = sidechain_angles.measureChiAngles(atom_group, atom_dict) # .get(conformer.altloc)) except AttributeError: if show_errors: kwargs["incomplete"] = True result = rotamer(**kwargs) print >> out, "%s is missing some sidechain atoms" % result.id_str() self.results.append(result) continue if chis is not None: if None in chis: continue cur_res = resname.lower().strip() if cur_res == "mse": cur_res = "met" value = rotamer_evaluator.evaluate(cur_res, chis) if value is not None: self.n_total += 1 kwargs["score"] = value * 100 wrap_chis = rotamer_id.wrap_chis(resname.strip(), chis, symmetry=False) sym_chis = wrap_chis[:] sym_chis = rotamer_id.wrap_sym(resname.strip(), sym_chis) evaluation = self.evaluateScore(value) kwargs["evaluation"] = evaluation if evaluation == "OUTLIER": kwargs["outlier"] = True kwargs["rotamer_name"] = evaluation else: kwargs["outlier"] = False kwargs["rotamer_name"] = rotamer_id.identify(resname, wrap_chis) # deal with unclassified rotamers if kwargs["rotamer_name"] == "": kwargs["rotamer_name"] = "UNCLASSIFIED" while len(wrap_chis) < 4: wrap_chis.append(None) kwargs["chi_angles"] = wrap_chis result = rotamer(**kwargs) if (result.is_outlier()) or (not outliers_only): self.results.append(result) out_count, out_percent = self.get_outliers_count_and_fraction() self.out_percent = out_percent * 100.0
def __init__ (self, pdb_hierarchy, outliers_only=False, out=sys.stdout, collect_ideal=False, quiet=False) : validation.__init__(self) self._outlier_i_seqs = flex.size_t() self.beta_ideal = {} relevant_atom_names = { " CA ": None, " N ": None, " C ": None, " CB ": None} # FUTURE: set output_list = [] from mmtbx.validation import utils use_segids = utils.use_segids_in_place_of_chainids( hierarchy=pdb_hierarchy) for model in pdb_hierarchy.models(): for chain in model.chains(): if use_segids: chain_id = utils.get_segid_as_chainid(chain=chain) else: chain_id = chain.id for rg in chain.residue_groups(): for i_cf,cf in enumerate(rg.conformers()): for i_residue,residue in enumerate(cf.residues()): if (residue.resname == "GLY") : continue is_first = (i_cf == 0) is_alt_conf = False relevant_atoms = {} for atom in residue.atoms(): if (atom.name in relevant_atom_names): relevant_atoms[atom.name] = atom if (len(atom.parent().altloc) != 0): is_alt_conf = True if ((is_first or is_alt_conf) and len(relevant_atoms) == 4): result = calculate_ideal_and_deviation( relevant_atoms=relevant_atoms, resname=residue.resname) dev = result.deviation dihedralNABB = result.dihedral betaxyz = result.ideal if (dev is None) : continue if(dev >=0.25 or outliers_only==False): if(dev >=0.25): self.n_outliers+=1 self._outlier_i_seqs.append(atom.i_seq) if (is_alt_conf): altchar = cf.altloc else: altchar = " " res=residue.resname.lower() sub=chain.id if(len(sub)==1): sub=" "+sub resCB = relevant_atoms[" CB "] result = cbeta( chain_id=chain_id, resname=residue.resname, resseq=residue.resseq, icode=residue.icode, altloc=altchar, xyz=resCB.xyz, occupancy=resCB.occ, deviation=dev, dihedral_NABB=dihedralNABB, ideal_xyz=betaxyz, outlier=(dev >= 0.25)) self.results.append(result) key = result.id_str() if (collect_ideal) : self.beta_ideal[key] = betaxyz
def __init__ (self, pdb_hierarchy, keep_hydrogens=True, nuclear=False, force_unique_chain_ids=False, time_limit=120, b_factor_cutoff=None, save_probe_unformatted_file=None, save_modified_hierarchy=False, verbose=False, out=sys.stdout) : validation.__init__(self) self.b_factor_cutoff = b_factor_cutoff self.clashscore = None self.clashscore_b_cutoff = None self.clash_dict = {} self.clash_dict_b_cutoff = {} self.list_dict = {} self.probe_file = None if (not libtbx.env.has_module(name="probe")): raise RuntimeError( "Probe could not be detected on your system. Please make sure "+ "Probe is in your path.\nProbe is available at "+ "http://kinemage.biochem.duke.edu/") if verbose: if not nuclear: print "\nUsing electron cloud x-H distances and vdW radii" else: print "\nUsing nuclear cloud x-H distances and vdW radii" import iotbx.pdb.hierarchy from scitbx.array_family import flex from mmtbx.validation import utils n_models = len(pdb_hierarchy.models()) use_segids = utils.use_segids_in_place_of_chainids( hierarchy=pdb_hierarchy) for i_mod, model in enumerate(pdb_hierarchy.models()): input_str,_ = check_and_add_hydrogen( pdb_hierarchy=pdb_hierarchy, model_number=i_mod, nuclear=nuclear, verbose=verbose, time_limit=time_limit, keep_hydrogens=keep_hydrogens, log=out) r = iotbx.pdb.hierarchy.root() mdc = model.detached_copy() r.append_model(mdc) occ_max = flex.max(r.atoms().extract_occ()) pcm = probe_clashscore_manager( h_pdb_string=input_str, nuclear=nuclear, largest_occupancy=occ_max, b_factor_cutoff=b_factor_cutoff, use_segids=use_segids, verbose=verbose) if (save_modified_hierarchy) : self.pdb_hierarchy = iotbx.pdb.hierarchy.input( pdb_string=pcm.h_pdb_string).hierarchy self.clash_dict[model.id] = pcm.clashscore self.clash_dict_b_cutoff[model.id] = pcm.clashscore_b_cutoff self.list_dict[model.id] = pcm.bad_clashes if (n_models == 1) or (self.clashscore is None) : self.results = pcm.bad_clashes self.n_outliers = len(self.results) self.clashscore = pcm.clashscore self.clashscore_b_cutoff = pcm.clashscore_b_cutoff if (save_probe_unformatted_file is not None) and (n_models == 1) : open(save_probe_unformatted_file, "w").write(pcm.probe_unformatted) self.probe_file = save_probe_unformatted_file
def __init__(self, model, fmodel, cc_min=0.8, molprobity_map_params=None): from iotbx.pdb.amino_acid_codes import one_letter_given_three_letter from mmtbx import real_space_correlation validation.__init__(self) pdb_hierarchy = model.get_hierarchy() crystal_symmetry = model.crystal_symmetry() # arrays for different components self.everything = list() self.protein = list() self.other = list() self.water = list() aa_codes = one_letter_given_three_letter.keys() # redo real_space_corelation.simple to use map objects instead of filenames self.overall_rsc = None rsc = None try: rsc_params = real_space_correlation.master_params().extract() rsc_params.detail = "residue" rsc_params.map_1.fill_missing_reflections = False rsc_params.map_2.fill_missing_reflections = False use_maps = False if (molprobity_map_params is not None): rsc_params.map_coefficients_file_name = \ molprobity_map_params.map_coefficients_file_name rsc_params.map_coefficients_label = \ molprobity_map_params.map_coefficients_label if (molprobity_map_params.map_file_name is not None): use_maps = True # use mmtbx/command_line/map_model_cc.py for maps self.fsc = None if (use_maps): from iotbx import map_and_model from mmtbx.maps import map_model_cc from mmtbx.command_line.map_model_cc import get_fsc from iotbx.file_reader import any_file params = map_model_cc.master_params().extract() params.map_model_cc.resolution = molprobity_map_params.d_min map_object = any_file( molprobity_map_params.map_file_name).file_object # check that model crystal symmetry matches map crystal symmetry mmi = map_and_model.input(map_data=map_object.map_data(), model=model) rsc_object = map_model_cc.map_model_cc( mmi.map_data(), mmi.model().get_hierarchy(), mmi.crystal_symmetry(), params.map_model_cc) rsc_object.validate() rsc_object.run() rsc = rsc_object.get_results() self.overall_rsc = (rsc.cc_mask, rsc.cc_volume, rsc.cc_peaks) self.fsc = get_fsc(mmi.map_data(), mmi.model(), params.map_model_cc) self.fsc.atom_radius = rsc.atom_radius rsc = rsc.cc_per_residue # mmtbx/real_space_correlation.py for X-ray/neutron data and map # coefficients else: self.overall_rsc, rsc = real_space_correlation.simple( fmodel=fmodel, pdb_hierarchy=pdb_hierarchy, params=rsc_params, log=null_out()) except Exception, e: raise
def __init__(self, model, fmodel, cc_min=0.8, molprobity_map_params=None): from iotbx.pdb.amino_acid_codes import one_letter_given_three_letter from mmtbx import real_space_correlation validation.__init__(self) pdb_hierarchy = model.get_hierarchy() crystal_symmetry = model.crystal_symmetry() # arrays for different components self.everything = list() self.protein = list() self.other = list() self.water = list() aa_codes = one_letter_given_three_letter # redo real_space_corelation.simple to use map objects instead of filenames self.overall_rsc = None rsc = None try: rsc_params = real_space_correlation.master_params().extract() rsc_params.detail = "residue" rsc_params.map_1.fill_missing_reflections = False rsc_params.map_2.fill_missing_reflections = False use_maps = False if (molprobity_map_params is not None): rsc_params.map_coefficients_file_name = \ molprobity_map_params.map_coefficients_file_name rsc_params.map_coefficients_label = \ molprobity_map_params.map_coefficients_label if (molprobity_map_params.map_file_name is not None): use_maps = True # use mmtbx/command_line/map_model_cc.py for maps self.fsc = None if (use_maps): from iotbx import map_and_model from mmtbx.maps import map_model_cc from mmtbx.command_line.map_model_cc import get_fsc from iotbx.file_reader import any_file params = map_model_cc.master_params().extract() params.map_model_cc.resolution = molprobity_map_params.d_min map_object = any_file( molprobity_map_params.map_file_name).file_object # check that model crystal symmetry matches map crystal symmetry mmi = map_and_model.input(map_data=map_object.map_data(), model=model) rsc_object = map_model_cc.map_model_cc( mmi.map_data(), mmi.model().get_hierarchy(), mmi.crystal_symmetry(), params.map_model_cc) rsc_object.validate() rsc_object.run() rsc = rsc_object.get_results() self.overall_rsc = (rsc.cc_mask, rsc.cc_volume, rsc.cc_peaks) self.fsc = get_fsc(mmi.map_data(), mmi.model(), params.map_model_cc) self.fsc.atom_radius = rsc.atom_radius rsc = rsc.cc_per_residue # mmtbx/real_space_correlation.py for X-ray/neutron data and map # coefficients else: self.overall_rsc, rsc = real_space_correlation.simple( fmodel=fmodel, pdb_hierarchy=pdb_hierarchy, params=rsc_params, log=null_out()) except Exception as e: raise else: assert ((self.overall_rsc is not None) and (rsc is not None)) for i, result_ in enumerate(rsc): if (use_maps ): # new rsc calculation (mmtbx/maps/model_map_cc.py) result = residue_real_space(chain_id=result_.chain_id, resname=result_.resname, resseq=result_.resseq, icode=result_.icode, altloc="", score=result_.cc, b_iso=result_.b_iso_mean, occupancy=result_.occ_mean, outlier=result_.cc < cc_min, xyz=result_.xyz_mean) else: # old rsc calculation (mmtbx/maps/real_space_correlation.py) result = residue_real_space( chain_id=result_.chain_id, resname=result_.residue.resname, resseq=result_.residue.resseq, icode=result_.residue.icode, altloc="", score=result_.cc, b_iso=result_.b, occupancy=result_.occupancy, fmodel=result_.map_value_1, two_fofc=result_.map_value_2, outlier=result_.cc < cc_min, xyz=result_.residue.atoms().extract_xyz().mean()) if result.is_outlier(): self.n_outliers += 1 # XXX unlike other validation metrics, we always save the results for # the real-space correlation, since these are used as the basis for # the multi-criterion plot in Phenix. The show() method will only # print outliers, however. if (result_.residue.resname != 'HOH'): # water is handled by waters.py self.everything.append(result) if result_.residue.resname in one_letter_given_three_letter: self.protein.append(result) else: self.other.append(result) self.everything += self.water self.results = self.protein
def __init__ (self, fmodel, pdb_hierarchy, crystal_symmetry=None, cc_min=0.8, molprobity_map_params=None) : from iotbx.pdb.amino_acid_codes import one_letter_given_three_letter from mmtbx import real_space_correlation validation.__init__(self) # arrays for different components self.everything = list() self.protein = list() self.other = list() self.water = list() aa_codes = one_letter_given_three_letter.keys() # redo real_space_corelation.simple to use map objects instead of filenames self.overall_rsc = None rsc = None try : rsc_params = real_space_correlation.master_params().extract() rsc_params.detail="residue" rsc_params.map_1.fill_missing_reflections = False rsc_params.map_2.fill_missing_reflections = False use_maps = False if (molprobity_map_params is not None): rsc_params.map_coefficients_file_name = \ molprobity_map_params.map_coefficients_file_name rsc_params.map_coefficients_label = \ molprobity_map_params.map_coefficients_label if (molprobity_map_params.map_file_name is not None): use_maps = True # use mmtbx/command_line/map_model_cc.py for maps self.fsc = None if (use_maps): from scitbx.array_family import flex import iotbx.pdb from mmtbx.maps import map_model_cc from mmtbx.command_line.map_model_cc import get_fsc from iotbx.file_reader import any_file from cctbx import crystal, sgtbx params = map_model_cc.master_params().extract() params.map_model_cc.resolution = molprobity_map_params.d_min map_object = any_file(molprobity_map_params.map_file_name).file_object # --------------------------------------------------------------------- # check that model crystal symmetry matches map crystal symmetry # if inconsistent, map parameters take precedence # TODO: centralize data consistency checks prior to running validation map_crystal_symmetry = crystal.symmetry( unit_cell=map_object.unit_cell(), space_group=sgtbx.space_group_info( map_object.space_group_number).group()) if (not map_crystal_symmetry.is_similar_symmetry(crystal_symmetry)): crystal_symmetry = map_crystal_symmetry # --------------------------------------------------------------------- map_data = map_object.map_data() rsc_object = map_model_cc.map_model_cc( map_data, pdb_hierarchy, crystal_symmetry, params.map_model_cc) rsc_object.validate() rsc_object.run() rsc = rsc_object.get_results() self.overall_rsc = (rsc.cc_mask, rsc.cc_volume, rsc.cc_peaks) # pdb_hierarchy.as_pdb_input is being phased out since that function # just re-processes the file from text and can be lossy # this is a placeholder until tools get updated to use the model class pdb_input = iotbx.pdb.input( source_info='pdb_hierarchy', lines=flex.split_lines(pdb_hierarchy.as_pdb_string())) model = mmtbx.model.manager(model_input = pdb_input) self.fsc = get_fsc(map_data, model, params.map_model_cc) # self.fsc.atom_radius = rsc.atom_radius rsc = rsc.cc_per_residue # mmtbx/real_space_correlation.py for X-ray/neutron data and map # coefficients else: self.overall_rsc, rsc = real_space_correlation.simple( fmodel=fmodel, pdb_hierarchy=pdb_hierarchy, params=rsc_params, log=null_out()) except Exception, e : raise
def __init__ (self, pdb_hierarchy, xray_structure, ignore_hd=True, collect_outliers=True) : for name in self.__slots__ : setattr(self, name, None) validation.__init__(self) assert len(xray_structure.scatterers()) != 0 from cctbx import adptbx from scitbx.array_family import flex xrs = xray_structure self.n_total = xrs.scatterers().size() # always include H/D self.results = None pdb_atoms = pdb_hierarchy.atoms() pdb_atoms.reset_i_seq() hd_selection = xrs.hd_selection() subtract_hd = True self.n_all = hd_selection.size() self.n_hd = hd_selection.count(True) if (ignore_hd) and (0 < self.n_hd < self.n_all) : xrs = xrs.select(~hd_selection) subtract_hd = False u_isos = xrs.extract_u_iso_or_u_equiv() occ = xrs.scatterers().extract_occupancies() self.n_atoms = xrs.scatterers().size() self.n_non_hd = self.n_all - self.n_hd self.n_aniso = xrs.use_u_aniso().count(True) self.n_aniso_h = (xray_structure.use_u_aniso() & hd_selection).count(True) self.n_npd = xrs.is_positive_definite_u().count(False) self.n_zero_b = (u_isos == 0).count(True) self.n_zero_occ = (occ == 0).count(True) u_cutoff_high = sys.maxint u_cutoff_low = 0 u_non_zero = u_isos.select(u_isos > 0) if (len(u_non_zero) > 1) : mv = flex.mean_and_variance(u_non_zero) sigma = mv.unweighted_sample_standard_deviation() u_cutoff_high = mv.mean() + (4.0 * sigma) u_cutoff_low = mv.mean() - (4.0 * sigma) self.b_mean = adptbx.u_as_b(flex.mean(u_isos)) self.b_min = adptbx.u_as_b(flex.min(u_isos)) self.b_max = adptbx.u_as_b(flex.max(u_isos)) self.o_mean = flex.mean(occ) self.o_min = flex.min(occ) self.o_max = flex.max(occ) self.n_outliers = self.n_aniso_h + self.n_npd self.zero_occ = [] self.partial_occ = [] self.different_occ = [] self.bad_adps = [] self.b_histogram = None # TODO def is_u_iso_outlier (u) : return (u < u_cutoff_low) or (u > u_cutoff_high) or (u <= 0) # these statistics cover all atoms! occupancies = xray_structure.scatterers().extract_occupancies() u_isos = xray_structure.extract_u_iso_or_u_equiv() collected = flex.bool(occupancies.size(), False) if (collect_outliers) : for i_seq, occ in enumerate(occupancies) : if (hd_selection[i_seq] and ignore_hd) or collected[i_seq] : continue pdb_atom = pdb_atoms[i_seq] parent = pdb_atom.parent() if (occ <= 0) : group_atoms = parent.atoms() labels = pdb_atom.fetch_labels() if (len(group_atoms) > 1) and (group_atoms.extract_occ().all_eq(0)) : i_seqs = group_atoms.extract_i_seq() b_mean = adptbx.u_as_b(flex.mean(u_isos.select(i_seqs))) outlier = residue_occupancy( chain_id=labels.chain_id, resseq=labels.resseq, icode=labels.icode, altloc=labels.altloc, resname=labels.resname, occupancy=occ, outlier=True, xyz=group_atoms.extract_xyz().mean(), b_iso=b_mean) self.zero_occ.append(outlier) self.n_outliers += 1 collected.set_selected(i_seqs, True) else : assert (pdb_atom.occ == occ), "%s: %s <--> %s" % (pdb_atom.id_str(), pdb_atom.occ, occ) outlier = atom_occupancy( pdb_atom=pdb_atom, occupancy=occ, b_iso=adptbx.u_as_b(u_isos[i_seq]), xyz=pdb_atom.xyz, outlier=True) self.zero_occ.append(outlier) self.n_outliers += 1 elif is_u_iso_outlier(u_isos[i_seq]) : # zero displacements will always be recorded on a per-atom basis if (u_isos[i_seq] <= 0) : outlier = atom_bfactor( pdb_atom=pdb_atom, occupancy=occ, b_iso=adptbx.u_as_b(u_isos[i_seq]), xyz=pdb_atom.xyz, outlier=True) self.bad_adps.append(outlier) self.n_outliers += 1 else : # if the average displacement for the entire residue falls outside # the cutoffs, save as a single residue outlier group_atoms = parent.atoms() i_seqs = group_atoms.extract_i_seq() u_mean = flex.mean(u_isos.select(i_seqs)) if is_u_iso_outlier(u_mean) : labels = pdb_atom.fetch_labels() outlier = residue_bfactor( chain_id=labels.chain_id, resseq=labels.resseq, icode=labels.icode, altloc=labels.altloc, resname=labels.resname, occupancy=occ, outlier=True, xyz=group_atoms.extract_xyz().mean(), b_iso=adptbx.u_as_b(u_mean)) self.bad_adps.append(outlier) self.n_outliers += 1 collected.set_selected(i_seqs, True) # otherwise, just save this atom else : outlier = atom_bfactor( pdb_atom=pdb_atom, occupancy=occ, b_iso=adptbx.u_as_b(u_isos[i_seq]), xyz=pdb_atom.xyz, outlier=True) self.bad_adps.append(outlier) self.n_outliers += 1 # analyze occupancies for first model model = pdb_hierarchy.models()[0] for chain in model.chains() : residue_groups = chain.residue_groups() for residue_group in chain.residue_groups() : # get unique set of atom names atom_names = set() for atom in residue_group.atoms(): atom_names.add(atom.name.strip()) # check total occupancy for each atom for name in atom_names: occupancy = 0.0 atoms = list() for atom_group in residue_group.atom_groups(): atom = atom_group.get_atom(name) if (atom is not None): occupancy += atom.occ atoms.append(atom) if ( not approx_equal(occupancy, 1.0, out=None, eps=1.0e-3) ): for atom in atoms: outlier = atom_occupancy( pdb_atom=atom, occupancy=atom.occ, b_iso=adptbx.u_as_b(atom.b), xyz=atom.xyz, outlier=True) self.partial_occ.append(outlier) self.n_outliers += 1 # check that atoms in an atom group have the same occupancy for atom_group in residue_group.atom_groups(): residue_is_okay = True base_occupancy = atom_group.atoms()[0].occ for atom in atom_group.atoms(): if (not approx_equal(base_occupancy, atom.occ, out=None, eps=1.0e-3)): labels = atom.fetch_labels() i_seqs = atom_group.atoms().extract_i_seq() b_mean = adptbx.u_as_b(flex.mean(u_isos.select(i_seqs))) outlier = residue_occupancy( chain_id=labels.chain_id, resseq=labels.resseq, icode=labels.icode, altloc=labels.altloc, resname=labels.resname, occupancy=occ, outlier=True, xyz=atom_group.atoms().extract_xyz().mean(), b_iso=b_mean) self.different_occ.append(outlier) self.n_outliers += 1 residue_is_okay = False break if (not residue_is_okay): break
def __init__(self, pdb_hierarchy, outliers_only=False, out=sys.stdout, collect_ideal=False, apply_phi_psi_correction=False, display_phi_psi_correction=False, quiet=False): validation.__init__(self) self._outlier_i_seqs = flex.size_t() self.beta_ideal = {} output_list = [] self.stats = group_args(n_results=0, n_weighted_results=0, n_weighted_outliers=0) if apply_phi_psi_correction: phi_psi_angles = get_phi_psi_dict(pdb_hierarchy) new_outliers = 0 outliers_removed = 0 total_residues = 0 from mmtbx.validation import utils use_segids = utils.use_segids_in_place_of_chainids( hierarchy=pdb_hierarchy) for model in pdb_hierarchy.models(): for chain in model.chains(): if use_segids: chain_id = utils.get_segid_as_chainid(chain=chain) else: chain_id = chain.id for rg in chain.residue_groups(): for i_cf, cf in enumerate(rg.conformers()): for i_residue, residue in enumerate(cf.residues()): if (residue.resname == "GLY"): continue is_first = (i_cf == 0) is_alt_conf = False relevant_atoms = {} for atom in residue.atoms(): if (atom.name in relevant_atom_names): relevant_atoms[atom.name] = atom if (len(atom.parent().altloc) != 0): is_alt_conf = True if ((is_first or is_alt_conf) and len(relevant_atoms) == 4): result = calculate_ideal_and_deviation( relevant_atoms=relevant_atoms, resname=residue.resname) dev = result.deviation dihedralNABB = result.dihedral betaxyz = result.ideal if (dev is None): continue resCB = relevant_atoms[" CB "] self.stats.n_results += 1 self.stats.n_weighted_results += resCB.occ if (is_alt_conf): altchar = cf.altloc else: altchar = " " if apply_phi_psi_correction: total_residues += 1 id_str = '|%s:%s|' % (residue.id_str(), altchar) phi_psi = phi_psi_angles.get(id_str, None) if phi_psi: rc = cbd_utils.get_phi_psi_correction( result, residue, phi_psi, display_phi_psi_correction= display_phi_psi_correction, ) if rc: dev, dihedralNABB, start, finish = rc if start and not finish: outliers_removed += 1 elif not start and finish: new_outliers += 1 if (dev >= 0.25 or outliers_only == False): if (dev >= 0.25): self.n_outliers += 1 self.stats.n_weighted_outliers += resCB.occ self._outlier_i_seqs.append(atom.i_seq) res = residue.resname.lower() sub = chain.id if (len(sub) == 1): sub = " " + sub result = cbeta(chain_id=chain_id, resname=residue.resname, resseq=residue.resseq, icode=residue.icode, altloc=altchar, xyz=resCB.xyz, occupancy=resCB.occ, deviation=dev, dihedral_NABB=dihedralNABB, ideal_xyz=betaxyz, outlier=(dev >= 0.25)) self.results.append(result) key = result.id_str() if (collect_ideal): self.beta_ideal[key] = betaxyz if apply_phi_psi_correction: print(''' Outliers removed : %5d New outliers : %5d Num. of outliers : %5d Num. of residues : %5d ''' % ( outliers_removed, new_outliers, self.n_outliers, total_residues, ))
def __init__(self, pdb_hierarchy, keep_hydrogens=True, nuclear=False, force_unique_chain_ids=False, time_limit=120, b_factor_cutoff=None, save_modified_hierarchy=False, verbose=False, do_flips=False, out=sys.stdout): validation.__init__(self) self.b_factor_cutoff = b_factor_cutoff self.clashscore = None self.clashscore_b_cutoff = None self.clash_dict = {} self.clash_dict_b_cutoff = {} self.list_dict = {} self.probe_file = None if (not libtbx.env.has_module(name="probe")): raise RuntimeError( "Probe could not be detected on your system. Please make sure " + "Probe is in your path.\nProbe is available at " + "http://kinemage.biochem.duke.edu/") if verbose: if not nuclear: print "\nUsing electron cloud x-H distances and vdW radii" else: print "\nUsing nuclear cloud x-H distances and vdW radii" import iotbx.pdb.hierarchy from scitbx.array_family import flex from mmtbx.validation import utils n_models = len(pdb_hierarchy.models()) use_segids = utils.use_segids_in_place_of_chainids( hierarchy=pdb_hierarchy) for i_mod, model in enumerate(pdb_hierarchy.models()): input_str, _ = check_and_add_hydrogen( pdb_hierarchy=pdb_hierarchy, model_number=i_mod, nuclear=nuclear, verbose=verbose, time_limit=time_limit, keep_hydrogens=keep_hydrogens, do_flips=do_flips, log=out) r = iotbx.pdb.hierarchy.root() mdc = model.detached_copy() r.append_model(mdc) occ_max = flex.max(r.atoms().extract_occ()) self.probe_clashscore_manager = probe_clashscore_manager( h_pdb_string=input_str, nuclear=nuclear, largest_occupancy=occ_max, b_factor_cutoff=b_factor_cutoff, use_segids=use_segids, verbose=verbose) if (save_modified_hierarchy): self.pdb_hierarchy = iotbx.pdb.hierarchy.input( pdb_string=self.probe_clashscore_manager.h_pdb_string ).hierarchy self.clash_dict[ model.id] = self.probe_clashscore_manager.clashscore self.clash_dict_b_cutoff[model.id] = self.probe_clashscore_manager.\ clashscore_b_cutoff self.list_dict[ model.id] = self.probe_clashscore_manager.bad_clashes if (n_models == 1) or (self.clashscore is None): self.results = self.probe_clashscore_manager.bad_clashes self.n_outliers = len(self.results) self.clashscore = self.probe_clashscore_manager.clashscore self.clashscore_b_cutoff = self.probe_clashscore_manager.\ clashscore_b_cutoff
def __init__ (self, pdb_hierarchy, outliers_only=False, show_errors=False, out=sys.stdout, quiet=False) : validation.__init__(self) self.n_allowed = 0 self.n_favored = 0 self.n_type = [ 0 ] * 6 self._outlier_i_seqs = flex.size_t() pdb_atoms = pdb_hierarchy.atoms() all_i_seqs = pdb_atoms.extract_i_seq() if (all_i_seqs.all_eq(0)) : pdb_atoms.reset_i_seq() use_segids = utils.use_segids_in_place_of_chainids( hierarchy=pdb_hierarchy) analysis = "" output_list = [] count_keys = [] uniqueness_keys = [] r = ramachandran_eval.RamachandranEval() ##if use_segids: ## chain_id = utils.get_segid_as_chainid(chain=chain) ## else: ## chain_id = chain.id for three in generate_protein_threes(hierarchy=pdb_hierarchy, geometry=None): main_residue = three[1] phi_psi_atoms = three.get_phi_psi_atoms() if phi_psi_atoms is None: continue phi_atoms, psi_atoms = phi_psi_atoms phi = get_dihedral(phi_atoms) psi = get_dihedral(psi_atoms) coords = get_center(main_residue) #should find the CA of the center residue if (phi is not None and psi is not None): res_type = RAMA_GENERAL #self.n_total += 1 if (main_residue.resname[0:3] == "GLY"): res_type = RAMA_GLYCINE elif (main_residue.resname[0:3] == "PRO"): is_cis = is_cis_peptide(three) if is_cis: res_type = RAMA_CISPRO else: res_type = RAMA_TRANSPRO elif (three[2].resname == "PRO"): res_type = RAMA_PREPRO elif (main_residue.resname[0:3] == "ILE" or \ main_residue.resname[0:3] == "VAL"): res_type = RAMA_ILE_VAL #self.n_type[res_type] += 1 value = r.evaluate(res_types[res_type], [phi, psi]) ramaType = self.evaluateScore(res_type, value) is_outlier = ramaType == RAMALYZE_OUTLIER c_alphas = None # XXX only save kinemage data for outliers if is_outlier : c_alphas = get_cas_from_three(three) assert (len(c_alphas) == 3) markup = self.as_markup_for_kinemage(c_alphas) else: markup = None result = ramachandran( chain_id=main_residue.parent().parent().id, resseq=main_residue.resseq, icode=main_residue.icode, resname=main_residue.resname, #altloc=main_residue.parent().altloc, altloc=get_altloc_from_three(three), segid=None, # XXX ??? phi=phi, psi=psi, rama_type=ramaType, res_type=res_type, score=value*100, outlier=is_outlier, xyz=coords, markup=markup) #if result.chain_id+result.resseq+result.icode not in count_keys: if result.altloc in ['','A'] and result.chain_id+result.resseq+result.icode not in count_keys: self.n_total += 1 self.n_type[res_type] += 1 self.add_to_validation_counts(ramaType) count_keys.append(result.chain_id+result.resseq+result.icode) if (not outliers_only or is_outlier) : if (result.altloc != '' or result.chain_id+result.resseq+result.icode not in uniqueness_keys): #the threes/conformers method results in some redundant result # calculations in structures with alternates. Using the # uniqueness_keys list prevents redundant results being added to # the final list self.results.append(result) uniqueness_keys.append(result.chain_id+result.resseq+result.icode) if is_outlier : i_seqs = main_residue.atoms().extract_i_seq() assert (not i_seqs.all_eq(0)) self._outlier_i_seqs.extend(i_seqs) self.results.sort(key=lambda r: r.id_str()) out_count, out_percent = self.get_outliers_count_and_fraction() fav_count, fav_percent = self.get_favored_count_and_fraction() self.out_percent = out_percent * 100.0 self.fav_percent = fav_percent * 100.0
def __init__(self, pdb_hierarchy, xray_structure, fmodel, distance_cutoff=4.0, collect_all=True, molprobity_map_params=None): validation.__init__(self) from mmtbx.real_space_correlation import extract_map_stats_for_single_atoms from cctbx import adptbx from scitbx.matrix import col self.n_bad = 0 self.n_heavy = 0 pdb_atoms = pdb_hierarchy.atoms() if (len(pdb_atoms) > 1): assert (not pdb_atoms.extract_i_seq().all_eq(0)) unit_cell = xray_structure.unit_cell() pair_asu_table = xray_structure.pair_asu_table( distance_cutoff=distance_cutoff) asu_mappings = pair_asu_table.asu_mappings() asu_table = pair_asu_table.table() u_isos = xray_structure.extract_u_iso_or_u_equiv() occupancies = xray_structure.scatterers().extract_occupancies() sites_frac = xray_structure.sites_frac() sel_cache = pdb_hierarchy.atom_selection_cache() water_sel = sel_cache.selection("water") if (molprobity_map_params is not None): # assume parameters have been validated (symmetry of pdb and map matches) two_fofc_map = None fc_map = None d_min = None crystal_gridding = None # read two_fofc_map if (molprobity_map_params.map_file_name is not None): f = any_file(molprobity_map_params.map_file_name) two_fofc_map = f.file_object.map_data() d_min = molprobity_map_params.d_min crystal_gridding = maptbx.crystal_gridding( f.file_object.unit_cell(), space_group_info=space_group_info( f.file_object.space_group_number), pre_determined_n_real=f.file_object.unit_cell_grid) pdb_atoms = pdb_hierarchy.atoms() xray_structure = pdb_hierarchy.extract_xray_structure( crystal_symmetry=f.crystal_symmetry()) unit_cell = xray_structure.unit_cell() # check for origin shift # --------------------------------------------------------------------- soin = maptbx.shift_origin_if_needed( map_data=two_fofc_map, sites_cart=xray_structure.sites_cart(), crystal_symmetry=xray_structure.crystal_symmetry()) two_fofc_map = soin.map_data xray_structure.set_sites_cart(soin.sites_cart) # --------------------------------------------------------------------- pair_asu_table = xray_structure.pair_asu_table( distance_cutoff=distance_cutoff) asu_mappings = pair_asu_table.asu_mappings() asu_table = pair_asu_table.table() u_isos = xray_structure.extract_u_iso_or_u_equiv() occupancies = xray_structure.scatterers().extract_occupancies() sites_frac = xray_structure.sites_frac() sel_cache = pdb_hierarchy.atom_selection_cache() water_sel = sel_cache.selection("water") elif (molprobity_map_params.map_coefficients_file_name is not None): f = any_file(molprobity_map_params.map_coefficients_file_name) fourier_coefficients = f.file_server.get_miller_array( molprobity_map_params.map_coefficients_label) crystal_symmetry = fourier_coefficients.crystal_symmetry() d_min = fourier_coefficients.d_min() crystal_gridding = maptbx.crystal_gridding( crystal_symmetry.unit_cell(), d_min, resolution_factor=0.25, space_group_info=crystal_symmetry.space_group_info()) two_fofc_map = miller.fft_map( crystal_gridding=crystal_gridding, fourier_coefficients=fourier_coefficients).apply_sigma_scaling().\ real_map_unpadded() # calculate fc_map assert ((d_min is not None) and (crystal_gridding is not None)) f_calc = xray_structure.structure_factors(d_min=d_min).f_calc() fc_map = miller.fft_map(crystal_gridding=crystal_gridding, fourier_coefficients=f_calc) fc_map = fc_map.apply_sigma_scaling().real_map_unpadded() map_stats = extract_map_stats_for_single_atoms( pdb_atoms=pdb_atoms, xray_structure=xray_structure, fmodel=None, selection=water_sel, fc_map=fc_map, two_fofc_map=two_fofc_map) else: map_stats = extract_map_stats_for_single_atoms( pdb_atoms=pdb_atoms, xray_structure=xray_structure, fmodel=fmodel, selection=water_sel) waters = [] for i_seq, atom in enumerate(pdb_atoms): if (water_sel[i_seq]): rt_mx_i_inv = asu_mappings.get_rt_mx(i_seq, 0).inverse() self.n_total += 1 asu_dict = asu_table[i_seq] nearest_atom = nearest_contact = None for j_seq, j_sym_groups in asu_dict.items(): atom_j = pdb_atoms[j_seq] site_j = sites_frac[j_seq] # Filter out hydrogens if atom_j.element.upper().strip() in ["H", "D"]: continue for j_sym_group in j_sym_groups: rt_mx = rt_mx_i_inv.multiply( asu_mappings.get_rt_mx(j_seq, j_sym_group[0])) site_ji = rt_mx * site_j site_ji_cart = xray_structure.unit_cell( ).orthogonalize(site_ji) vec_i = col(atom.xyz) vec_ji = col(site_ji_cart) dxyz = abs(vec_i - vec_ji) if (nearest_contact is None) or (dxyz < nearest_contact): nearest_contact = dxyz nearest_atom = atom_info(pdb_atom=atom_j, symop=rt_mx) w = water(pdb_atom=atom, b_iso=adptbx.u_as_b(u_isos[i_seq]), occupancy=occupancies[i_seq], nearest_contact=nearest_contact, nearest_atom=nearest_atom, score=map_stats.two_fofc_ccs[i_seq], fmodel=map_stats.fmodel_values[i_seq], two_fofc=map_stats.two_fofc_values[i_seq], fofc=map_stats.fofc_values[i_seq], anom=map_stats.anom_values[i_seq], n_hbonds=None) # TODO if (w.is_bad_water()): w.outlier = True self.n_bad += 1 elif (w.is_heavy_atom()): w.outlier = True self.n_heavy += 1 if (w.outlier) or (collect_all): self.results.append(w) self.n_outliers = len(self.results)
def __init__(self, pdb_hierarchy, nontrans_only=False, out=sys.stdout, quiet=True): validation.__init__(self) self.residue_count = [0, 0] #[OMEGA_GENERAL, OMEGA_PRO] self.omega_count = [[0, 0, 0], [0, 0, 0]] #[OMEGA_GENERAL, OMEGA_PRO], then #[OMEGALYZE_TRANS, OMEGALYZE_CIS, OMEGALYZE_TWISTED] from mmtbx.validation import utils from scitbx.array_family import flex self._outlier_i_seqs = flex.size_t() pdb_atoms = pdb_hierarchy.atoms() all_i_seqs = pdb_atoms.extract_i_seq() if all_i_seqs.all_eq(0): pdb_atoms.reset_i_seq() use_segids = utils.use_segids_in_place_of_chainids( hierarchy=pdb_hierarchy) first_conf_altloc = None prev_chain_id = None for twores in generate_protein_fragments( pdb_hierarchy, length=2, geometry=None, include_non_standard_peptides=True): main_residue = twores[ 1] #this is the relevant residue for id-ing cis-Pro conf_altloc = get_conformer_altloc(twores) prevres_altloc, mainres_altloc = get_local_omega_altlocs(twores) twores_altloc = prevres_altloc or mainres_altloc #default '' evals False chain = main_residue.parent().parent() if use_segids: chain_id = utils.get_segid_as_chainid(chain=chain) else: chain_id = chain.id if chain_id != prev_chain_id: #if we've moved to a new chain... first_conf_altloc = conf_altloc #...reset reference altloc prev_chain_id = chain_id if (conf_altloc != first_conf_altloc) and twores_altloc == '': #skip non-alternate residues unless this is the first time thru a chain continue omega_atoms = get_omega_atoms(twores) #omega_atoms is the list [CA1 C1 N2 CA2], with None for missing atoms if None in omega_atoms: continue omega = get_omega(omega_atoms) if omega is None: continue omega_type = find_omega_type(omega) if omega_type == OMEGALYZE_TRANS: is_nontrans = False else: is_nontrans = True self.n_outliers += 1 if main_residue.resname == "PRO": res_type = OMEGA_PRO else: res_type = OMEGA_GENERAL self.residue_count[res_type] += 1 self.omega_count[res_type][omega_type] += 1 highest_mc_b = get_highest_mc_b(twores[0].atoms(), twores[1].atoms()) coords = get_center(main_residue) markup_atoms = [] for omega_atom in omega_atoms: markup_atoms.append( kin_atom(omega_atom.parent().id_str(), omega_atom.xyz)) result = omega_result( model_id=twores[0].parent().parent().parent().id, chain_id=chain_id, resseq=main_residue.resseq, icode=main_residue.icode, resname=main_residue.resname, altloc=mainres_altloc, prev_resseq=twores[0].resseq, prev_icode=twores[0].icode, prev_resname=twores[0].resname, prev_altloc=prevres_altloc, segid=None, omega=omega, omega_type=omega_type, res_type=res_type, is_nontrans=is_nontrans, outlier=is_nontrans, highest_mc_b=highest_mc_b, xyz=coords, markup_atoms=markup_atoms) if is_nontrans or not nontrans_only: #(not nontrans_only or is_nontrans) self.results.append(result) if is_nontrans: i_seqs = main_residue.atoms().extract_i_seq() assert (not i_seqs.all_eq(0) ) #This assert copied from ramalyze self._outlier_i_seqs.extend(i_seqs) self.results.sort(key=lambda x: x.model_id + ':' + x.id_str())
def __init__ (self, pdb_hierarchy, xray_structure, fmodel, distance_cutoff=4.0, collect_all=True, molprobity_map_params=None) : validation.__init__(self) from mmtbx.real_space_correlation import extract_map_stats_for_single_atoms from cctbx import adptbx from scitbx.matrix import col self.n_bad = 0 self.n_heavy = 0 pdb_atoms = pdb_hierarchy.atoms() if(len(pdb_atoms)>1): assert (not pdb_atoms.extract_i_seq().all_eq(0)) unit_cell = xray_structure.unit_cell() pair_asu_table = xray_structure.pair_asu_table( distance_cutoff = distance_cutoff) asu_mappings = pair_asu_table.asu_mappings() asu_table = pair_asu_table.table() u_isos = xray_structure.extract_u_iso_or_u_equiv() occupancies = xray_structure.scatterers().extract_occupancies() sites_cart = xray_structure.sites_cart() sites_frac = xray_structure.sites_frac() sel_cache = pdb_hierarchy.atom_selection_cache() water_sel = sel_cache.selection("resname HOH and name O") if (molprobity_map_params is not None): # assume parameters have been validated (symmetry of pdb and map matches) two_fofc_map = None fc_map = None d_min = None crystal_gridding = None # read two_fofc_map if (molprobity_map_params.map_file_name is not None): f = any_file(molprobity_map_params.map_file_name) two_fofc_map = f.file_object.map_data() d_min = molprobity_map_params.d_min crystal_gridding = maptbx.crystal_gridding( f.file_object.unit_cell(), space_group_info=space_group_info(f.file_object.space_group_number), pre_determined_n_real=f.file_object.unit_cell_grid) elif (molprobity_map_params.map_coefficients_file_name is not None): f = any_file(molprobity_map_params.map_coefficients_file_name) fourier_coefficients = f.file_server.get_miller_array( molprobity_map_params.map_coefficients_label) crystal_symmetry = fourier_coefficients.crystal_symmetry() d_min = fourier_coefficients.d_min() crystal_gridding = maptbx.crystal_gridding( crystal_symmetry.unit_cell(), d_min, resolution_factor=0.25, space_group_info=crystal_symmetry.space_group_info()) two_fofc_map = miller.fft_map( crystal_gridding=crystal_gridding, fourier_coefficients=fourier_coefficients).apply_sigma_scaling().\ real_map_unpadded() # calculate fc_map assert( (d_min is not None) and (crystal_gridding is not None) ) f_calc = xray_structure.structure_factors(d_min=d_min).f_calc() fc_map = miller.fft_map(crystal_gridding=crystal_gridding, fourier_coefficients=f_calc) fc_map = fc_map.apply_sigma_scaling().real_map_unpadded() map_stats = extract_map_stats_for_single_atoms( pdb_atoms=pdb_atoms, xray_structure=xray_structure, fmodel=None, selection=water_sel, fc_map=fc_map, two_fofc_map=two_fofc_map) else: map_stats = extract_map_stats_for_single_atoms( pdb_atoms=pdb_atoms, xray_structure=xray_structure, fmodel=fmodel, selection=water_sel) waters = [] for i_seq, atom in enumerate(pdb_atoms) : if (water_sel[i_seq]) : rt_mx_i_inv = asu_mappings.get_rt_mx(i_seq, 0).inverse() self.n_total += 1 asu_dict = asu_table[i_seq] nearest_atom = nearest_contact = None for j_seq, j_sym_groups in asu_dict.items() : atom_j = pdb_atoms[j_seq] site_j = sites_frac[j_seq] # Filter out hydrogens if atom_j.element.upper().strip() in ["H", "D"]: continue for j_sym_group in j_sym_groups: rt_mx = rt_mx_i_inv.multiply(asu_mappings.get_rt_mx(j_seq, j_sym_group[0])) site_ji = rt_mx * site_j site_ji_cart = xray_structure.unit_cell().orthogonalize(site_ji) vec_i = col(atom.xyz) vec_ji = col(site_ji_cart) dxyz = abs(vec_i - vec_ji) if (nearest_contact is None) or (dxyz < nearest_contact) : nearest_contact = dxyz nearest_atom = atom_info(pdb_atom=atom_j, symop=rt_mx) w = water( pdb_atom=atom, b_iso=adptbx.u_as_b(u_isos[i_seq]), occupancy=occupancies[i_seq], nearest_contact=nearest_contact, nearest_atom=nearest_atom, score=map_stats.two_fofc_ccs[i_seq], fmodel=map_stats.fmodel_values[i_seq], two_fofc=map_stats.two_fofc_values[i_seq], fofc=map_stats.fofc_values[i_seq], anom=map_stats.anom_values[i_seq], n_hbonds=None) # TODO if (w.is_bad_water()) : w.outlier = True self.n_bad += 1 elif (w.is_heavy_atom()) : w.outlier = True self.n_heavy += 1 if (w.outlier) or (collect_all) : self.results.append(w) self.n_outliers = len(self.results)
def __init__ (self, pdb_hierarchy, outliers_only=False, show_errors=False, out=sys.stdout, quiet=False) : validation.__init__(self) self.n_allowed = 0 self.n_favored = 0 self.n_type = [ 0 ] * 6 from mmtbx.validation import utils import mmtbx.rotamer from mmtbx.rotamer import ramachandran_eval from scitbx.array_family import flex self._outlier_i_seqs = flex.size_t() pdb_atoms = pdb_hierarchy.atoms() all_i_seqs = pdb_atoms.extract_i_seq() if (all_i_seqs.all_eq(0)) : pdb_atoms.reset_i_seq() use_segids = utils.use_segids_in_place_of_chainids( hierarchy=pdb_hierarchy) analysis = "" output_list = [] r = ramachandran_eval.RamachandranEval() prev_rezes, next_rezes = None, None prev_resid = None cur_resseq = None next_resseq = None for model in pdb_hierarchy.models(): for chain in model.chains(): if use_segids: chain_id = utils.get_segid_as_chainid(chain=chain) else: chain_id = chain.id residues = list(chain.residue_groups()) for i, residue_group in enumerate(residues): # The reason I pass lists of atom_groups to get_phi and get_psi is to # deal with the particular issue where some residues have an A alt # conf that needs some atoms from a "" alt conf to get calculated # correctly. See 1jxt.pdb for examples. This way I can search both # the alt conf atoms and the "" atoms if necessary. prev_atom_list, next_atom_list, atom_list = None, None, None if cur_resseq is not None: prev_rezes = rezes prev_resseq = cur_resseq rezes = construct_complete_residues(residues[i]) cur_resseq = residue_group.resseq_as_int() cur_icode = residue_group.icode.strip() if (i > 0): #check for insertion codes if (cur_resseq == residues[i-1].resseq_as_int()) : if (cur_icode == '') and (residues[i-1].icode.strip() == '') : continue elif (cur_resseq != (residues[i-1].resseq_as_int())+1): continue if (i < len(residues)-1): #find next residue if residue_group.resseq_as_int() == \ residues[i+1].resseq_as_int(): if (cur_icode == '') and (residues[i+1].icode.strip() == '') : continue elif residue_group.resseq_as_int() != \ (residues[i+1].resseq_as_int())-1: continue next_rezes = construct_complete_residues(residues[i+1]) next_resid = residues[i+1].resseq_as_int() else: next_rezes = None next_resid = None for atom_group in residue_group.atom_groups(): alt_conf = atom_group.altloc if rezes is not None: atom_list = rezes.get(alt_conf) if prev_rezes is not None: prev_atom_list = prev_rezes.get(alt_conf) if (prev_atom_list is None): prev_keys = sorted(prev_rezes.keys()) prev_atom_list = prev_rezes.get(prev_keys[0]) if next_rezes is not None: next_atom_list = next_rezes.get(alt_conf) if (next_atom_list is None): next_keys = sorted(next_rezes.keys()) next_atom_list = next_rezes.get(next_keys[0]) phi = get_phi(prev_atom_list, atom_list) psi = get_psi(atom_list, next_atom_list) coords = get_center(atom_group) if (phi is not None and psi is not None): res_type = RAMA_GENERAL self.n_total += 1 if (atom_group.resname[0:3] == "GLY"): res_type = RAMA_GLYCINE elif (atom_group.resname[0:3] == "PRO"): is_cis = is_cis_peptide(prev_atom_list, atom_list) if is_cis: res_type = RAMA_CISPRO else: res_type = RAMA_TRANSPRO elif (isPrePro(residues, i)): res_type = RAMA_PREPRO elif (atom_group.resname[0:3] == "ILE" or \ atom_group.resname[0:3] == "VAL"): res_type = RAMA_ILE_VAL self.n_type[res_type] += 1 value = r.evaluate(res_types[res_type], [phi, psi]) ramaType = self.evaluateScore(res_type, value) is_outlier = ramaType == RAMALYZE_OUTLIER c_alphas = None # XXX only save kinemage data for outliers if is_outlier : c_alphas = [] for atoms in [prev_atom_list, atom_list, next_atom_list] : for a in atoms : if (a.name.strip() == "CA") : a_ = atom(pdb_atom=a) c_alphas.append(c_alpha( id_str=a_.atom_group_id_str(), xyz=a_.xyz)) assert (len(c_alphas) == 3) result = ramachandran( chain_id=chain_id, resseq=residue_group.resseq, icode=residue_group.icode, resname=atom_group.resname, altloc=atom_group.altloc, segid=None, # XXX ??? phi=phi, psi=psi, rama_type=ramaType, res_type=res_type, score=value*100, outlier=is_outlier, xyz=coords, c_alphas=c_alphas) if (not outliers_only or is_outlier) : self.results.append(result) if is_outlier : i_seqs = atom_group.atoms().extract_i_seq() assert (not i_seqs.all_eq(0)) self._outlier_i_seqs.extend(i_seqs) out_count, out_percent = self.get_outliers_count_and_fraction() fav_count, fav_percent = self.get_favored_count_and_fraction() self.out_percent = out_percent * 100.0 self.fav_percent = fav_percent * 100.0