def exercise_percentile_based_spread(): from libtbx.math_utils import percentile_based_spread import random import math n_points = 123456 deltas = [] for i in range(n_points): x = random.gauss(100, 10) deltas.append(x) for i in range(1000): x = random.gauss(300, 30) deltas.append(x) pbs = percentile_based_spread(deltas) rmsd = math.sqrt(sum([x**2 for x in deltas]) / n_points) assert (pbs > 100) and (pbs < rmsd) # Test small list processing assert percentile_based_spread([1, 1]) > 0
def exercise_percentile_based_spread() : from libtbx.math_utils import percentile_based_spread import random import math n_points = 123456 deltas = [] for i in range(n_points) : x = random.gauss(100, 10) deltas.append(x) for i in range(1000) : x = random.gauss(300, 30) deltas.append(x) pbs = percentile_based_spread(deltas) rmsd = math.sqrt(sum([ x**2 for x in deltas]) / n_points) assert (pbs > 100) and (pbs < rmsd) # Test small list processing assert percentile_based_spread([1,1]) > 0
def compare_ligands_impl(ligand, reference_ligands, max_distance_between_centers_of_mass=8.0, exclude_hydrogens=True, implicit_matching=False, verbose=False, quiet=False, raise_sorry_if_no_matching_atoms=True, out=sys.stdout): """ Given a target ligand and a list of reference ligands, return the RMSD(s) for any ligand determined to be approximately equivalent. (Usually there will be just one of these, but this allows for alternate conformations.) """ from scitbx.array_family import flex from scitbx.matrix import col matching = [] atoms_1 = ligand.atoms() sites_1 = atoms_1.extract_xyz() xyz_mean_1 = sites_1.mean() for ligand_2 in reference_ligands: sites_2 = ligand_2.atoms().extract_xyz() xyz_mean_2 = sites_2.mean() dxyz = abs(col(xyz_mean_1) - col(xyz_mean_2)) if (dxyz < max_distance_between_centers_of_mass): matching.append(ligand_2) rmsds = [] pbss = [] for ligand_2 in matching: atoms_2 = ligand_2.atoms() isel_1 = flex.size_t() isel_2 = flex.size_t() for i_seq, atom_1 in enumerate(ligand.atoms()): if (atom_1.element.strip() in ["H", "D"]) and (exclude_hydrogens): continue for j_seq, atom_2 in enumerate(ligand_2.atoms()): if (atom_1.name == atom_2.name): isel_1.append(i_seq) isel_2.append(j_seq) break if (len(isel_1) == 0): if (implicit_matching): print( " warning: no atom name matches found - will guess equivalence from sites", file=out) # XXX this is embarrassing... needs to be much smarter for i_seq, atom_1 in enumerate(ligand.atoms()): if (atom_1.element.strip() in ["H", "D" ]) and (exclude_hydrogens): continue j_seq_best = None name_best = None dxyz_best = sys.maxsize for j_seq, atom_2 in enumerate(ligand_2.atoms()): if (atom_1.element == atom_2.element): dxyz = abs(col(atom_1.xyz) - col(atom_2.xyz)) if (dxyz < dxyz_best): j_seq_best = j_seq name_best = atom_2.name dxyz_best = dxyz if (j_seq_best is not None): print(" '%s' : '%s' (distance = %.2f)" % (atom_1.name, name_best, dxyz_best), file=out) isel_1.append(i_seq) isel_2.append(j_seq_best) if (len(isel_1) == 0): if (raise_sorry_if_no_matching_atoms): raise Sorry("No matching atoms found!") else: print(" WARNING: no matching atoms found!", file=out) return None sites_1 = sites_1.select(isel_1) sites_2 = ligand_2.atoms().extract_xyz().select(isel_2) rmsd = sites_1.rms_difference(sites_2) pbs = percentile_based_spread((sites_2 - sites_1).norms()) if (not quiet): print(" '%s' matches '%s': atoms=%d rmsd=%.3f" % (ligand.id_str(), ligand_2.id_str(), sites_1.size(), rmsd), file=out) rmsds.append(rmsd) pbss.append(pbs) if (verbose) and (not quiet): atoms = ligand.atoms() dxyz = (sites_2 - sites_1).norms() for i_seq, j_seq in zip(isel_1, isel_2): print(" %s: dxyz=%.2f" % (atoms_1[i_seq].id_str(), dxyz[i_seq]), file=out) return rmsds, pbss
def compare_ligands_impl (ligand, reference_ligands, max_distance_between_centers_of_mass=8.0, exclude_hydrogens=True, implicit_matching=False, verbose=False, quiet=False, raise_sorry_if_no_matching_atoms=True, out=sys.stdout) : """ Given a target ligand and a list of reference ligands, return the RMSD(s) for any ligand determined to be approximately equivalent. (Usually there will be just one of these, but this allows for alternate conformations.) """ from scitbx.array_family import flex from scitbx.matrix import col matching = [] atoms_1 = ligand.atoms() sites_1 = atoms_1.extract_xyz() xyz_mean_1 = sites_1.mean() for ligand_2 in reference_ligands : sites_2 = ligand_2.atoms().extract_xyz() xyz_mean_2 = sites_2.mean() dxyz = abs(col(xyz_mean_1) - col(xyz_mean_2)) if (dxyz < max_distance_between_centers_of_mass) : matching.append(ligand_2) rmsds = [] pbss = [] for ligand_2 in matching : atoms_2 = ligand_2.atoms() isel_1 = flex.size_t() isel_2 = flex.size_t() for i_seq, atom_1 in enumerate(ligand.atoms()) : if (atom_1.element.strip() in ["H","D"]) and (exclude_hydrogens) : continue for j_seq, atom_2 in enumerate(ligand_2.atoms()) : if (atom_1.name == atom_2.name) : isel_1.append(i_seq) isel_2.append(j_seq) break if (len(isel_1) == 0) : if (implicit_matching) : print >> out, " warning: no atom name matches found - will guess equivalence from sites" # XXX this is embarrassing... needs to be much smarter for i_seq, atom_1 in enumerate(ligand.atoms()) : if (atom_1.element.strip() in ["H","D"]) and (exclude_hydrogens) : continue j_seq_best = None name_best = None dxyz_best = sys.maxint for j_seq, atom_2 in enumerate(ligand_2.atoms()) : if (atom_1.element == atom_2.element) : dxyz = abs(col(atom_1.xyz) - col(atom_2.xyz)) if (dxyz < dxyz_best) : j_seq_best = j_seq name_best = atom_2.name dxyz_best = dxyz if (j_seq_best is not None) : print >> out, " '%s' : '%s' (distance = %.2f)" % (atom_1.name, name_best, dxyz_best) isel_1.append(i_seq) isel_2.append(j_seq_best) if (len(isel_1) == 0) : if (raise_sorry_if_no_matching_atoms) : raise Sorry("No matching atoms found!") else : print >> out, " WARNING: no matching atoms found!" return None sites_1 = sites_1.select(isel_1) sites_2 = ligand_2.atoms().extract_xyz().select(isel_2) rmsd = sites_1.rms_difference(sites_2) pbs = percentile_based_spread((sites_2 - sites_1).norms()) if (not quiet) : print >> out, " '%s' matches '%s': atoms=%d rmsd=%.3f" % ( ligand.id_str(), ligand_2.id_str(), sites_1.size(), rmsd) rmsds.append(rmsd) pbss.append(pbs) if (verbose) and (not quiet) : atoms = ligand.atoms() dxyz = (sites_2 - sites_1).norms() for i_seq, j_seq in zip(isel_1, isel_2) : print >> out, " %s: dxyz=%.2f" % (atoms_1[i_seq].id_str(), dxyz[i_seq]) return rmsds, pbss