Beispiel #1
0
def exercise_percentile_based_spread():
    from libtbx.math_utils import percentile_based_spread
    import random
    import math
    n_points = 123456
    deltas = []
    for i in range(n_points):
        x = random.gauss(100, 10)
        deltas.append(x)
    for i in range(1000):
        x = random.gauss(300, 30)
        deltas.append(x)
    pbs = percentile_based_spread(deltas)
    rmsd = math.sqrt(sum([x**2 for x in deltas]) / n_points)
    assert (pbs > 100) and (pbs < rmsd)
    # Test small list processing
    assert percentile_based_spread([1, 1]) > 0
def exercise_percentile_based_spread() :
  from libtbx.math_utils import percentile_based_spread
  import random
  import math
  n_points = 123456
  deltas = []
  for i in range(n_points) :
    x = random.gauss(100, 10)
    deltas.append(x)
  for i in range(1000) :
    x = random.gauss(300, 30)
    deltas.append(x)
  pbs = percentile_based_spread(deltas)
  rmsd = math.sqrt(sum([ x**2 for x in deltas]) / n_points)
  assert (pbs > 100) and (pbs < rmsd)
  # Test small list processing
  assert percentile_based_spread([1,1]) > 0
Beispiel #3
0
def compare_ligands_impl(ligand,
                         reference_ligands,
                         max_distance_between_centers_of_mass=8.0,
                         exclude_hydrogens=True,
                         implicit_matching=False,
                         verbose=False,
                         quiet=False,
                         raise_sorry_if_no_matching_atoms=True,
                         out=sys.stdout):
    """
  Given a target ligand and a list of reference ligands, return the RMSD(s)
  for any ligand determined to be approximately equivalent.  (Usually there
  will be just one of these, but this allows for alternate conformations.)
  """
    from scitbx.array_family import flex
    from scitbx.matrix import col
    matching = []
    atoms_1 = ligand.atoms()
    sites_1 = atoms_1.extract_xyz()
    xyz_mean_1 = sites_1.mean()
    for ligand_2 in reference_ligands:
        sites_2 = ligand_2.atoms().extract_xyz()
        xyz_mean_2 = sites_2.mean()
        dxyz = abs(col(xyz_mean_1) - col(xyz_mean_2))
        if (dxyz < max_distance_between_centers_of_mass):
            matching.append(ligand_2)
    rmsds = []
    pbss = []
    for ligand_2 in matching:
        atoms_2 = ligand_2.atoms()
        isel_1 = flex.size_t()
        isel_2 = flex.size_t()
        for i_seq, atom_1 in enumerate(ligand.atoms()):
            if (atom_1.element.strip() in ["H", "D"]) and (exclude_hydrogens):
                continue
            for j_seq, atom_2 in enumerate(ligand_2.atoms()):
                if (atom_1.name == atom_2.name):
                    isel_1.append(i_seq)
                    isel_2.append(j_seq)
                    break
        if (len(isel_1) == 0):
            if (implicit_matching):
                print(
                    "  warning: no atom name matches found - will guess equivalence from sites",
                    file=out)
                # XXX this is embarrassing... needs to be much smarter
                for i_seq, atom_1 in enumerate(ligand.atoms()):
                    if (atom_1.element.strip() in ["H", "D"
                                                   ]) and (exclude_hydrogens):
                        continue
                    j_seq_best = None
                    name_best = None
                    dxyz_best = sys.maxsize
                    for j_seq, atom_2 in enumerate(ligand_2.atoms()):
                        if (atom_1.element == atom_2.element):
                            dxyz = abs(col(atom_1.xyz) - col(atom_2.xyz))
                            if (dxyz < dxyz_best):
                                j_seq_best = j_seq
                                name_best = atom_2.name
                                dxyz_best = dxyz
                    if (j_seq_best is not None):
                        print("    '%s' : '%s' (distance = %.2f)" %
                              (atom_1.name, name_best, dxyz_best),
                              file=out)
                        isel_1.append(i_seq)
                        isel_2.append(j_seq_best)
            if (len(isel_1) == 0):
                if (raise_sorry_if_no_matching_atoms):
                    raise Sorry("No matching atoms found!")
                else:
                    print("  WARNING: no matching atoms found!", file=out)
                    return None
        sites_1 = sites_1.select(isel_1)
        sites_2 = ligand_2.atoms().extract_xyz().select(isel_2)
        rmsd = sites_1.rms_difference(sites_2)
        pbs = percentile_based_spread((sites_2 - sites_1).norms())
        if (not quiet):
            print("  '%s' matches '%s': atoms=%d rmsd=%.3f" %
                  (ligand.id_str(), ligand_2.id_str(), sites_1.size(), rmsd),
                  file=out)
        rmsds.append(rmsd)
        pbss.append(pbs)
        if (verbose) and (not quiet):
            atoms = ligand.atoms()
            dxyz = (sites_2 - sites_1).norms()
            for i_seq, j_seq in zip(isel_1, isel_2):
                print("    %s: dxyz=%.2f" %
                      (atoms_1[i_seq].id_str(), dxyz[i_seq]),
                      file=out)
    return rmsds, pbss
Beispiel #4
0
def compare_ligands_impl (ligand,
    reference_ligands,
    max_distance_between_centers_of_mass=8.0,
    exclude_hydrogens=True,
    implicit_matching=False,
    verbose=False,
    quiet=False,
    raise_sorry_if_no_matching_atoms=True,
    out=sys.stdout) :
  """
  Given a target ligand and a list of reference ligands, return the RMSD(s)
  for any ligand determined to be approximately equivalent.  (Usually there
  will be just one of these, but this allows for alternate conformations.)
  """
  from scitbx.array_family import flex
  from scitbx.matrix import col
  matching = []
  atoms_1 = ligand.atoms()
  sites_1 = atoms_1.extract_xyz()
  xyz_mean_1 = sites_1.mean()
  for ligand_2 in reference_ligands :
    sites_2 = ligand_2.atoms().extract_xyz()
    xyz_mean_2 = sites_2.mean()
    dxyz = abs(col(xyz_mean_1) - col(xyz_mean_2))
    if (dxyz < max_distance_between_centers_of_mass) :
      matching.append(ligand_2)
  rmsds = []
  pbss = []
  for ligand_2 in matching :
    atoms_2 = ligand_2.atoms()
    isel_1 = flex.size_t()
    isel_2 = flex.size_t()
    for i_seq, atom_1 in enumerate(ligand.atoms()) :
      if (atom_1.element.strip() in ["H","D"]) and (exclude_hydrogens) :
        continue
      for j_seq, atom_2 in enumerate(ligand_2.atoms()) :
        if (atom_1.name == atom_2.name) :
          isel_1.append(i_seq)
          isel_2.append(j_seq)
          break
    if (len(isel_1) == 0) :
      if (implicit_matching) :
        print >> out, "  warning: no atom name matches found - will guess equivalence from sites"
        # XXX this is embarrassing... needs to be much smarter
        for i_seq, atom_1 in enumerate(ligand.atoms()) :
          if (atom_1.element.strip() in ["H","D"]) and (exclude_hydrogens) :
            continue
          j_seq_best = None
          name_best = None
          dxyz_best = sys.maxint
          for j_seq, atom_2 in enumerate(ligand_2.atoms()) :
            if (atom_1.element == atom_2.element) :
              dxyz = abs(col(atom_1.xyz) - col(atom_2.xyz))
              if (dxyz < dxyz_best) :
                j_seq_best = j_seq
                name_best = atom_2.name
                dxyz_best = dxyz
          if (j_seq_best is not None) :
            print >> out, "    '%s' : '%s' (distance = %.2f)" % (atom_1.name,
              name_best, dxyz_best)
            isel_1.append(i_seq)
            isel_2.append(j_seq_best)
      if (len(isel_1) == 0) :
        if (raise_sorry_if_no_matching_atoms) :
          raise Sorry("No matching atoms found!")
        else :
          print >> out, "  WARNING: no matching atoms found!"
          return None
    sites_1 = sites_1.select(isel_1)
    sites_2 = ligand_2.atoms().extract_xyz().select(isel_2)
    rmsd = sites_1.rms_difference(sites_2)
    pbs = percentile_based_spread((sites_2 - sites_1).norms())
    if (not quiet) :
      print >> out, "  '%s' matches '%s': atoms=%d rmsd=%.3f" % (
        ligand.id_str(), ligand_2.id_str(), sites_1.size(), rmsd)
    rmsds.append(rmsd)
    pbss.append(pbs)
    if (verbose) and (not quiet) :
      atoms = ligand.atoms()
      dxyz = (sites_2 - sites_1).norms()
      for i_seq, j_seq in zip(isel_1, isel_2) :
        print >> out, "    %s: dxyz=%.2f" % (atoms_1[i_seq].id_str(),
          dxyz[i_seq])
  return rmsds, pbss