Example #1
0
    def zipper(self, initial_rms, level):
        matches = []
        for jj in xrange(1, self.n - 1):
            ii = jj - 1
            kk = jj + 1
            #make triplets of sequence related sites
            xi = self.set_a[ii]
            xpi = self.set_b[ii]
            xj = self.set_a[jj]
            xpj = self.set_b[jj]
            xk = self.set_a[kk]
            xpk = self.set_b[kk]
            #get the lsq matrix
            ref = flex.vec3_double([xi, xj, xk])
            mov = flex.vec3_double([xpi, xpj, xpk])

            lsq = superpose.least_squares_fit(ref, mov)
            #here we have the rotation and translation operators
            r = lsq.r
            t = lsq.t
            rmsd = 10.0
            #we would like to know the rmsd on the coords used for superposition
            new_sites = lsq.other_sites_best_fit()
            deltas = ref - new_sites
            rmsd = deltas.rms_length()
            if rmsd < initial_rms:
                # please apply this rotation to the full set
                converged = False
                count = 0
                match_size = 0
                previous_match_size = 0
                tmp_a = None
                tmp_b = None
                select = flex.bool()
                while not converged:
                    previous_match_size = match_size
                    tmp_a, tmp_b, select = self.pair_sites(r, t, level)
                    #print count, tmp_a.size()
                    match_size = tmp_a.size()
                    if match_size <= previous_match_size:
                        converged = True
                        break
                    if count > self.max_iter:
                        converged = True
                        break
                    if tmp_b.size() > 0:
                        lsq = superpose.least_squares_fit(tmp_a, tmp_b)
                        tmp_sites = lsq.other_sites_best_fit()
                        rmsd = tmp_a.rms_difference(tmp_sites)
                        r = lsq.r
                        t = lsq.t
                        count += 1
                if converged:

                    matches.append([
                        select.deep_copy().iselection(), r, t, rmsd,
                        select.deep_copy().iselection().size()
                    ])
        return matches
    def zipper(self, initial_rms, level):
        matches = []
        for jj in xrange(1, self.n - 1):
            ii = jj - 1
            kk = jj + 1
            # make triplets of sequence related sites
            xi = self.set_a[ii]
            xpi = self.set_b[ii]
            xj = self.set_a[jj]
            xpj = self.set_b[jj]
            xk = self.set_a[kk]
            xpk = self.set_b[kk]
            # get the lsq matrix
            ref = flex.vec3_double([xi, xj, xk])
            mov = flex.vec3_double([xpi, xpj, xpk])

            lsq = superpose.least_squares_fit(ref, mov)
            # here we have the rotation and translation operators
            r = lsq.r
            t = lsq.t
            rmsd = 10.0
            # we would like to know the rmsd on the coords used for superposition
            new_sites = lsq.other_sites_best_fit()
            deltas = ref - new_sites
            rmsd = deltas.rms_length()
            if rmsd < initial_rms:
                # please apply this rotation to the full set
                converged = False
                count = 0
                match_size = 0
                previous_match_size = 0
                tmp_a = None
                tmp_b = None
                select = flex.bool()
                while not converged:
                    previous_match_size = match_size
                    tmp_a, tmp_b, select = self.pair_sites(r, t, level)
                    # print count, tmp_a.size()
                    match_size = tmp_a.size()
                    if match_size <= previous_match_size:
                        converged = True
                        break
                    if count > self.max_iter:
                        converged = True
                        break
                    if tmp_b.size() > 0:
                        lsq = superpose.least_squares_fit(tmp_a, tmp_b)
                        tmp_sites = lsq.other_sites_best_fit()
                        rmsd = tmp_a.rms_difference(tmp_sites)
                        r = lsq.r
                        t = lsq.t
                        count += 1
                if converged:

                    matches.append(
                        [select.deep_copy().iselection(), r, t, rmsd, select.deep_copy().iselection().size()]
                    )
        return matches
Example #3
0
 def __init__(self, pdb_hierarchy, eps = 0.01, add_identity=True):
   self.pdb_hierarchy = pdb_hierarchy
   n_atoms_per_chain = flex.int()
   sites_cart_chain_0 = None
   self.rotation_matrices = []
   self.translation_vectors = []
   self.back_rotation_matrices = []
   self.back_translation_vectors = []
   self.ph_first_chain = None
   #
   for i_chain, chain in enumerate(pdb_hierarchy.chains()):
     n_atoms_per_chain.append(chain.atoms_size())
   #
   outlier_found = False
   if(n_atoms_per_chain.all_eq(n_atoms_per_chain[0])):
     for i_chain, chain in enumerate(pdb_hierarchy.chains()):
       if(chain.is_na() or chain.is_protein()):
         n_atoms_per_chain.append(chain.atoms_size())
         if(sites_cart_chain_0 is None and i_chain==0):
           sites_cart_chain_0 = chain.atoms().extract_xyz()
           sel = flex.size_t(xrange(sites_cart_chain_0.size()))
           self.ph_first_chain = pdb_hierarchy.select(sel)
           if(add_identity):
             um = scitbx.matrix.sqr((
               1,0,0,
               0,1,0,
               0,0,1))
             zv = scitbx.matrix.col((0, 0, 0))
             self.rotation_matrices.append(um)
             self.translation_vectors.append(zv)
             self.back_rotation_matrices.append(um)
             self.back_translation_vectors.append(zv)
         if(i_chain > 0):
           # first copy onto others
           lsq_fit_obj = superpose.least_squares_fit(
             reference_sites = sites_cart_chain_0,
             other_sites     = chain.atoms().extract_xyz())
           self.rotation_matrices.append(lsq_fit_obj.r.transpose())
           self.translation_vectors.append(lsq_fit_obj.t)
           d =  flex.sqrt((sites_cart_chain_0-
             lsq_fit_obj.other_sites_best_fit()).dot()).min_max_mean().as_tuple()
           if(d[1]>2):
             outlier_found=True
           # others onto first copy
           lsq_fit_obj = superpose.least_squares_fit(
             reference_sites = chain.atoms().extract_xyz(),
             other_sites     = sites_cart_chain_0)
           self.back_rotation_matrices.append(lsq_fit_obj.r)
           self.back_translation_vectors.append(lsq_fit_obj.t)
   if(outlier_found): self._init()
Example #4
0
 def __init__(self, pdb_hierarchy, eps = 0.01, add_identity=True):
   self.pdb_hierarchy = pdb_hierarchy
   n_atoms_per_chain = flex.int()
   sites_cart_chain_0 = None
   self.rotation_matrices = []
   self.translation_vectors = []
   self.back_rotation_matrices = []
   self.back_translation_vectors = []
   self.ph_first_chain = None
   #
   for i_chain, chain in enumerate(pdb_hierarchy.chains()):
     n_atoms_per_chain.append(chain.atoms_size())
   #
   outlier_found = False
   if(n_atoms_per_chain.all_eq(n_atoms_per_chain[0])):
     for i_chain, chain in enumerate(pdb_hierarchy.chains()):
       if(chain.is_na() or chain.is_protein()):
         n_atoms_per_chain.append(chain.atoms_size())
         if(sites_cart_chain_0 is None and i_chain==0):
           sites_cart_chain_0 = chain.atoms().extract_xyz()
           sel = flex.size_t(xrange(sites_cart_chain_0.size()))
           self.ph_first_chain = pdb_hierarchy.select(sel)
           if(add_identity):
             um = scitbx.matrix.sqr((
               1,0,0,
               0,1,0,
               0,0,1))
             zv = scitbx.matrix.col((0, 0, 0))
             self.rotation_matrices.append(um)
             self.translation_vectors.append(zv)
             self.back_rotation_matrices.append(um)
             self.back_translation_vectors.append(zv)
         if(i_chain > 0):
           # first copy onto others
           lsq_fit_obj = superpose.least_squares_fit(
             reference_sites = sites_cart_chain_0,
             other_sites     = chain.atoms().extract_xyz())
           self.rotation_matrices.append(lsq_fit_obj.r.transpose())
           self.translation_vectors.append(lsq_fit_obj.t)
           d =  flex.sqrt((sites_cart_chain_0-
             lsq_fit_obj.other_sites_best_fit()).dot()).min_max_mean().as_tuple()
           if(d[1]>2):
             outlier_found=True
           # others onto first copy
           lsq_fit_obj = superpose.least_squares_fit(
             reference_sites = chain.atoms().extract_xyz(),
             other_sites     = sites_cart_chain_0)
           self.back_rotation_matrices.append(lsq_fit_obj.r)
           self.back_translation_vectors.append(lsq_fit_obj.t)
   if(outlier_found): self._init()
Example #5
0
def my_get_rot_trans(ph, master_selection, copy_selection, master_chain_id,
                     copy_chain_id):
    """
  Get rotation and translation using superpose.

  This function is used only when phil parameters are provided. In this case
  we require the selection of NCS master and copies to be correct.
  Correct means:
    1) residue sequence in master and copies is exactly the same
    2) the number of atoms in master and copies is exactly the same

  One can get exact selection strings by ncs_object.show(verbose=True)

  Args:
    ph : hierarchy
    master/copy_selection: master and copy iselections
  """

    other_h = my_selection(ph, master_chain_id, list(master_selection))
    ref_h = my_selection(ph, copy_chain_id, list(copy_selection))
    other_sites = other_h.atoms().extract_xyz()
    ref_sites = ref_h.atoms().extract_xyz()

    assert other_sites.size() == ref_sites.size(), "%d, %d" % (
        other_sites.size(), ref_sites.size())
    if ref_sites.size() > 0:
        lsq_fit_obj = superpose.least_squares_fit(reference_sites=ref_sites,
                                                  other_sites=other_sites)
        r = lsq_fit_obj.r
        t = lsq_fit_obj.t
        rmsd = ref_sites.rms_difference(lsq_fit_obj.other_sites_best_fit())
        return r, t, rmsd
    else:
        return None, None, None
Example #6
0
def superpose_ideal_ligand_on_poor_ligand(
    ideal_hierarchy,
    poor_hierarchy,
):
    """Function superpose an ideal ligand onto the mangled ligand from a
     ligand fitting procedure

  Args:
      ideal_hierarchy (pdb_hierarchy): Ideal ligand
      poor_hierarchy (pdb_hierarchy): Poor ligand with correct c.o.m. and same
        atom names in order. Could become more sophisticated.
  """
    sites_moving = flex.vec3_double()
    sites_fixed = flex.vec3_double()
    for atom1, atom2 in zip(ideal_hierarchy.atoms(), poor_hierarchy.atoms()):
        assert atom1.name == atom2.name, '%s!=%s' % (atom1.quote(),
                                                     atom2.quote())
        sites_moving.append(atom1.xyz)
        sites_fixed.append(atom2.xyz)
    lsq_fit = superpose.least_squares_fit(reference_sites=sites_fixed,
                                          other_sites=sites_moving)
    sites_new = ideal_hierarchy.atoms().extract_xyz()
    sites_new = lsq_fit.r.elems * sites_new + lsq_fit.t.elems
    # rmsd = sites_fixed.rms_difference(lsq_fit.other_sites_best_fit())
    ideal_hierarchy.atoms().set_xyz(sites_new)
    return ideal_hierarchy
Example #7
0
def superpose_ideal_residue_coordinates(pdb_hierarchy,
                                        resname,
                                        superpose_element=None,
                                        ):
  element_lookup = {'SF4' : 'Fe',
                    'F3S' : 'S',
                    #'F4S' : 'S', # not done yet
                    #'CLF' : 'Fe', # too flexible
                    }
  from iotbx import pdb
  from mmtbx.monomer_library import pdb_interpretation
  t0=time.time()
  rmsd_list = {}
  if superpose_element is None:
    superpose_element = element_lookup.get(resname, None)
  if resname in pdb_interpretation.ideal_ligands:
    ideal_hierarchy = get_pdb_hierarchy_from_restraints(resname)
  else:
    assert 0
  sites_moving = _extract_sites_cart(ideal_hierarchy, superpose_element)
  assert len(sites_moving), 'No atoms %s found' % superpose_element
  for ideal_ag in ideal_hierarchy.atom_groups(): break
  for sites_fixed, ag in generate_sites_fixed(pdb_hierarchy,
                                              resname,
                                              superpose_element,
                                              ):
    assert sites_fixed.size() == sites_moving.size(), '%(resname)s residue is missing atoms' % locals()
    import random
    min_rmsd = 1e9
    min_sites_cart = None
    for i in range(100):
      random.shuffle(sites_moving)
      lsq_fit = superpose.least_squares_fit(
        reference_sites = sites_fixed,
        other_sites     = sites_moving)
      new_atoms = ideal_ag.detached_copy().atoms()
      sites_new = new_atoms.extract_xyz()
      sites_new = lsq_fit.r.elems * sites_new + lsq_fit.t.elems
      rmsd = sites_fixed.rms_difference(lsq_fit.other_sites_best_fit())
      if rmsd<min_rmsd:
        min_rmsd=rmsd
        min_sites_cart = sites_new
    rmsd_list[ag.id_str()] = min_rmsd
    sites_new = min_sites_cart
    new_atoms.set_xyz(sites_new)
    for atom1 in ag.atoms():
      for atom2 in new_atoms:
        if atom1.name.strip()==atom2.name.strip():
          atom1.xyz=atom2.xyz
          break
      else:
        assert 0, 'not all atoms updated - missing %s' % atom1.quote()
  outl = ''
  if rmsd_list:
    outl = '\n  %(resname)s Regularisation' % locals()
    outl+= '\n    residue        rmsd'
    for id_str, rmsd in sorted(rmsd_list.items()):
      outl += '\n    "%s"   %0.1f' % (id_str, rmsd)
    outl += '\n  Time to superpose : %0.2fs\n' % (time.time()-t0)
  return outl
Example #8
0
 def __init__(self,
              all_sites_cart,
              lsq_fits,
              buffer=10.0,
              move_to_frame_of_reference=True,
              log=sys.stdout):
   fitted_sites = []
   original_sites = []
   minima = flex.vec3_double()
   for sites_cart, lsq_fit in zip(all_sites_cart, lsq_fits):
     fitted_sites.append(sites_cart.deep_copy())
     minima.append(sites_cart.min())
     if lsq_fit is None :
       original_sites.append(sites_cart.deep_copy())
     else :
       old_sites = lsq_fit.r.inverse().elems * (sites_cart - lsq_fit.t.elems)
       original_sites.append(old_sites)
   xyz_min = minima.min()
   if move_to_frame_of_reference:
     dxyz = (buffer - xyz_min[0], buffer - xyz_min[1], buffer - xyz_min[2])
   else:
     dxyz = (0,0,0)
   self.shifted_sites = []
   self.transformation_matrices = []
   for i, sites_cart in enumerate(fitted_sites):
     new_sites_cart = sites_cart + dxyz
     #print new_sites_cart.min()
     self.shifted_sites.append(new_sites_cart)
     lsq_fit = superpose.least_squares_fit(
       reference_sites=new_sites_cart,
       other_sites=original_sites[i])
     self.transformation_matrices.append(lsq_fit.rt())
Example #9
0
def exercise(method):
  assert method in ["kearsley", "kabsch"]
  # global shifts
  for n_sites in [1,3,7,10,30]:
    reference = flex.vec3_double(flex.random_double(n_sites*3)*10-5)
    other = reference + list(flex.random_double(3)*100-50)
    for i_trial in xrange(10):
      s = least_squares_fit(reference, other, method)
      assert approx_equal(reference, s.other_sites_best_fit())
      c = random_rotation()
      s = least_squares_fit(reference, tuple(c)*other, method)
      if method == "kearsley": # Kabsch fails in special cases
        assert approx_equal(s.r.determinant(), 1)
      assert approx_equal(reference, s.other_sites_best_fit())
      assert approx_equal(s.rt().r, s.r)
      assert approx_equal(s.rt().t, s.t)
      assert approx_equal(reference, s.rt() * s.other_sites)
Example #10
0
def exercise(method):
    assert method in ["kearsley", "kabsch"]
    # global shifts
    for n_sites in [1, 3, 7, 10, 30]:
        reference = flex.vec3_double(flex.random_double(n_sites * 3) * 10 - 5)
        other = reference + list(flex.random_double(3) * 100 - 50)
        for i_trial in xrange(10):
            s = least_squares_fit(reference, other, method)
            assert approx_equal(reference, s.other_sites_best_fit())
            c = random_rotation()
            s = least_squares_fit(reference, tuple(c) * other, method)
            if method == "kearsley":  # Kabsch fails in special cases
                assert approx_equal(s.r.determinant(), 1)
            assert approx_equal(reference, s.other_sites_best_fit())
            assert approx_equal(s.rt().r, s.r)
            assert approx_equal(s.rt().t, s.t)
            assert approx_equal(reference, s.rt() * s.other_sites)
Example #11
0
    def _fit_U_from_superposed_points(reference, other):

        # Add the origin to both sets of points
        reference.append((0, 0, 0))
        other.append((0, 0, 0))

        # Find U matrix that takes ideal relps to the reference
        fit = superpose.least_squares_fit(reference, other)
        return fit.r
Example #12
0
def get_difference_chainsAB(strings):
  pdb_h = iotbx.pdb.input(source_info=None, lines=strings).construct_hierarchy()
  cache = pdb_h.atom_selection_cache()
  ref_sites = pdb_h.select(cache.selection("chain A")).atoms().extract_xyz()
  other_sites = pdb_h.select(cache.selection("chain B")).atoms().extract_xyz()
  lsq_obj = superpose.least_squares_fit(ref_sites, other_sites)
  bf = lsq_obj.other_sites_best_fit()
  dif = ref_sites - bf
  return dif
Example #13
0
def side_chain_placement(ag_to_place, current_reference_ag, rotamer_manager):
  """
  Works with poly_gly truncated hierarchy.
  Also used in fix_rama_outliers.
  """
  resname = current_reference_ag.resname.upper()
  c = one_three.get(resname, None)

  # seems to work with unusual residues...
  # if c is None:
  #   msg = "Only standard protein residues are currently supported.\n"
  #   msg += "The residue %s (chain %s, resid %s) chain is not standard." % (
  #       resname,
  #       current_reference_ag.parent().parent().id,
  #       current_reference_ag.parent().resid())
  #   raise Sorry(msg)
  ag_to_place.resname = three_one.get(c,resname)
  if c == 'G':
    return

  # align residue from ideal_res_dict to just placed ALA (ag_to_place)
  # or from pdb_hierarchy_template
  fixed_sites = flex.vec3_double()
  moving_sites = flex.vec3_double()
  reper_atoms = ["C","CA", "N"]
  for (ag, arr) in [(ag_to_place, fixed_sites),
                    (current_reference_ag, moving_sites)]:
    for a in ag.atoms():
      if a.name.strip() in reper_atoms:
        arr.append(a.xyz)
  assert len(fixed_sites) == 3
  if len(moving_sites) < 3:
    error_msg = "C, CA or N atoms are absent in secondary structure element." +\
        "\nPlease add them to the model and try again."
    raise Sorry(error_msg)
  assert len(moving_sites) == 3
  lsq_fit_obj = superpose.least_squares_fit(reference_sites = fixed_sites,
                                            other_sites = moving_sites)
  ideal_correct_ag = current_reference_ag.detached_copy()
  ideal_correct_ag.atoms().set_xyz(
      lsq_fit_obj.r.elems*ideal_correct_ag.atoms().extract_xyz()+\
      lsq_fit_obj.t.elems)
  ideal_correct_ag.atoms().set_xyz(
      rotamer_manager.nearest_rotamer_sites_cart(ideal_correct_ag))
  if len(ideal_correct_ag.atoms()) > 4:
    ag_to_place.pre_allocate_atoms(number_of_additional_atoms=\
                                                len(ideal_correct_ag.atoms())-4)
    for a in ideal_correct_ag.atoms():
      if a.name.strip() not in ["N","CA","C","O"]:
        at = a.detached_copy()
        at.uij_erase()
        ag_to_place.append_atom(atom=at)
  else:
    # This means something wrong with input model, e.g. only 3 atoms in
    # the residue and they happened to be N, CA, C
    pass
Example #14
0
    def _fit_U_from_superposed_points(reference, other):

        # Add the origin to both sets of points
        origin = flex.vec3_double(1)
        reference.extend(origin)
        other.extend(origin)

        # Find U matrix that takes ideal relps to the reference
        fit = superpose.least_squares_fit(reference, other)
        return fit.r
Example #15
0
 def compute_operators(self, sites_cart):
     for pair in self.selection_pairs:
         superposition = superpose.least_squares_fit(
             reference_sites=sites_cart.select(pair[0]),
             other_sites=sites_cart.select(pair[1]))
         rtmx = matrix.rt((superposition.r, superposition.t))
         self.matrices.append(rtmx)
         x = sites_cart.select(pair[0])
         y = rtmx * sites_cart.select(pair[1])
         d_sq = (x - y).dot()
         self.rms.append(flex.mean(d_sq)**0.5)
Example #16
0
def side_chain_placement(ag_to_place, current_reference_ag, rotamer_manager):
  """
  Works with poly_gly truncated hierarchy.
  Also used in fix_rama_outliers.
  """
  resname = current_reference_ag.resname.upper()
  c = one_three.get(resname, None)
  if c is None:
    msg = "Only standard protein residues are currently supported.\n"
    msg += "The residue %s (chain %s, resid %s) chain is not standard." % (
        resname,
        current_reference_ag.parent().parent().id,
        current_reference_ag.parent().resid())
    raise Sorry(msg)
  ag_to_place.resname = three_one[c]
  if c == 'G':
    return

  # align residue from ideal_res_dict to just placed ALA (ag_to_place)
  # or from pdb_hierarchy_template
  fixed_sites = flex.vec3_double()
  moving_sites = flex.vec3_double()
  reper_atoms = ["C","CA", "N"]
  for (ag, arr) in [(ag_to_place, fixed_sites),
                    (current_reference_ag, moving_sites)]:
    for a in ag.atoms():
      if a.name.strip() in reper_atoms:
        arr.append(a.xyz)
  assert len(fixed_sites) == 3
  if len(moving_sites) < 3:
    error_msg = "C, CA or N atoms are absent in secondary structure element." +\
        "\nPlease add them to the model and try again."
    raise Sorry(error_msg)
  assert len(moving_sites) == 3
  lsq_fit_obj = superpose.least_squares_fit(reference_sites = fixed_sites,
                                            other_sites = moving_sites)
  ideal_correct_ag = current_reference_ag.detached_copy()
  ideal_correct_ag.atoms().set_xyz(
      lsq_fit_obj.r.elems*ideal_correct_ag.atoms().extract_xyz()+\
      lsq_fit_obj.t.elems)
  ideal_correct_ag.atoms().set_xyz(
      rotamer_manager.nearest_rotamer_sites_cart(ideal_correct_ag))
  if len(ideal_correct_ag.atoms()) > 4:
    ag_to_place.pre_allocate_atoms(number_of_additional_atoms=\
                                                len(ideal_correct_ag.atoms())-4)
    for a in ideal_correct_ag.atoms():
      if a.name.strip() not in ["N","CA","C","O"]:
        at = a.detached_copy()
        at.uij_erase()
        ag_to_place.append_atom(atom=at)
  else:
    # This means something wrong with input model, e.g. only 3 atoms in
    # the residue and they happened to be N, CA, C
    pass
Example #17
0
def fit_sites (sites_fixed, sites_moving, selection) : # TODO
  """
  Simple least-squares superposition of sites on reference structure
  """
  from scitbx.math import superpose
  sites_fixed_aln = sites_fixed.select(selection)
  sites_moving_aln = sites_moving.select(selection)
  assert (len(sites_fixed_aln) == len(sites_moving_aln) > 0)
  lsq_fit_obj = superpose.least_squares_fit(
    reference_sites=sites_fixed_aln,
    other_sites=sites_moving_aln)
  sites_moved = lsq_fit_obj.r.elems * sites_moving + lsq_fit_obj.t.elems
  return sites_moved
Example #18
0
def exercise () :
  pdb_file_name_1 = libtbx.env.find_in_repositories(
    relative_path="phenix_regression/misc/1l3r_no_ligand.pdb",
    test=os.path.isfile)
  pdb_in_1 = iotbx.pdb.input(file_name = pdb_file_name_1)
  xrs1 = pdb_in_1.xray_structure_simple()
  ofn = "1l3r_rt.pdb"
  cmd = " ".join([
    "phenix.pdbtools",
    "%s"%pdb_file_name_1,
    "rotate='90 10 20' translate='10 10 10'",
    "output.file_name=%s"%ofn,
    "--quiet"])
  easy_run.call(cmd)
  pdb_in_rt = iotbx.pdb.input(file_name = ofn)
  xrs_rt = pdb_in_rt.xray_structure_simple()
  fft_map_1 = xrs1.structure_factors(d_min=1.5).f_calc().fft_map(
    resolution_factor = 1./3)
  fft_map_1.apply_sigma_scaling()
  map_data_1 = fft_map_1.real_map_unpadded()
  mmtbx.maps.utils.write_xplor_map(sites_cart = xrs1.sites_cart(),
    unit_cell  = xrs1.unit_cell(),
    map_data   = map_data_1,
    n_real     = fft_map_1.n_real(),
    file_name  = "1l3r.xplor")
  lsq_fit_obj = superpose.least_squares_fit(
    reference_sites=xrs_rt.sites_cart(),
    other_sites=xrs1.sites_cart().deep_copy())
  f_o_r = common_frame_of_reference(
    all_sites_cart=[xrs1.sites_cart(),xrs_rt.sites_cart()],
    lsq_fits=[None, lsq_fit_obj])
  hierarchy_rt = pdb_in_rt.construct_hierarchy()
  lsq_fit_obj = f_o_r.transformation_matrices[1]
  hierarchy_rt.atoms().set_xyz(f_o_r.shifted_sites[1])
  open("1l3r_rt.pdb", "w").write(hierarchy_rt.as_pdb_string())
  xrs_rt, map_data_rt = transform_map_by_lsq_fit(
    fft_map=fft_map_1,
    unit_cell=xrs1.unit_cell(),
    lsq_fit_obj=lsq_fit_obj.inverse(),
    pdb_hierarchy=hierarchy_rt,
    d_min=1.5,
    file_name="1l3r_rt.xplor",
    log=null_out())
  f_o_r.inverse_transform_hierarchy(1, hierarchy_rt)
  open("1l3r.pdb", "w").write(hierarchy_rt.as_pdb_string())
  #for sf1, sf2 in zip(xrs1.sites_frac(), xrs_rt.sites_frac()):
  #  e1 = map_data_1.eight_point_interpolation(sf1)
  #  e2 = map_data_rt.eight_point_interpolation(sf2)
  #  print abs(e1-e2)
  #  assert abs(e1-e2) < 1.
  print "OK"
Example #19
0
def exercise():
  pdb_file_name_1 = libtbx.env.find_in_repositories(
    relative_path="phenix_regression/misc/1l3r_no_ligand.pdb",
    test=os.path.isfile)
  pdb_in_1 = iotbx.pdb.input(file_name = pdb_file_name_1)
  xrs1 = pdb_in_1.xray_structure_simple()
  ofn = "1l3r_rt.pdb"
  cmd = " ".join([
    "phenix.pdbtools",
    "%s"%pdb_file_name_1,
    "rotate='90 10 20' translate='10 10 10'",
    "output.file_name=%s"%ofn,
    "--quiet"])
  easy_run.call(cmd)
  pdb_in_rt = iotbx.pdb.input(file_name = ofn)
  xrs_rt = pdb_in_rt.xray_structure_simple()
  fft_map_1 = xrs1.structure_factors(d_min=1.5).f_calc().fft_map(
    resolution_factor = 1./3)
  fft_map_1.apply_sigma_scaling()
  map_data_1 = fft_map_1.real_map_unpadded()
  mmtbx.maps.utils.write_xplor_map(sites_cart = xrs1.sites_cart(),
    unit_cell  = xrs1.unit_cell(),
    map_data   = map_data_1,
    n_real     = fft_map_1.n_real(),
    file_name  = "1l3r.xplor")
  lsq_fit_obj = superpose.least_squares_fit(
    reference_sites=xrs_rt.sites_cart(),
    other_sites=xrs1.sites_cart().deep_copy())
  f_o_r = common_frame_of_reference(
    all_sites_cart=[xrs1.sites_cart(),xrs_rt.sites_cart()],
    lsq_fits=[None, lsq_fit_obj])
  hierarchy_rt = pdb_in_rt.construct_hierarchy()
  lsq_fit_obj = f_o_r.transformation_matrices[1]
  hierarchy_rt.atoms().set_xyz(f_o_r.shifted_sites[1])
  open("1l3r_rt.pdb", "w").write(hierarchy_rt.as_pdb_string())
  xrs_rt, map_data_rt = transform_map_by_lsq_fit(
    fft_map=fft_map_1,
    unit_cell=xrs1.unit_cell(),
    lsq_fit_obj=lsq_fit_obj.inverse(),
    pdb_hierarchy=hierarchy_rt,
    d_min=1.5,
    file_name="1l3r_rt.xplor",
    log=null_out())
  f_o_r.inverse_transform_hierarchy(1, hierarchy_rt)
  open("1l3r.pdb", "w").write(hierarchy_rt.as_pdb_string())
  #for sf1, sf2 in zip(xrs1.sites_frac(), xrs_rt.sites_frac()):
  #  e1 = map_data_1.eight_point_interpolation(sf1)
  #  e2 = map_data_rt.eight_point_interpolation(sf2)
  #  print abs(e1-e2)
  #  assert abs(e1-e2) < 1.
  print("OK")
Example #20
0
File: rigid.py Project: dials/cctbx
 def fit(self, fragment, reference_sites, control_point_indices=None):
   """ fits given fragment to given sites, if control_points indices
   are not given - all points are fit, otherwise only control points
   are fit and the result is propagated to the rest of the fragment
   coordinates. returns coordinates of the trasformed fragment
   """
   if not control_point_indices:
     control_point_indices = range(0, len(fragment))
   to_fit = [
     (fragment[i].x, fragment[i].y, 0) for i in control_point_indices]
   lsf = superpose.least_squares_fit(
     flex.vec3_double(reference_sites), flex.vec3_double(to_fit))
   to_fit = flex.vec3_double([(i.x, i.y, 0) for i in fragment])
   return lsf.r.elems * to_fit + lsf.t.elems
Example #21
0
 def __init__(self, group, sites_cart):
   self.group = group
   self.matrices = []
   self.rms = []
   for pair in self.group.selection_pairs:
     superposition = superpose.least_squares_fit(
       reference_sites=sites_cart.select(pair[0]),
       other_sites=sites_cart.select(pair[1]))
     rtmx = matrix.rt((superposition.r, superposition.t))
     self.matrices.append(rtmx)
     x = sites_cart.select(pair[0])
     y = rtmx * sites_cart.select(pair[1])
     d_sq = (x-y).dot()
     self.rms.append(flex.mean(d_sq)**0.5)
Example #22
0
 def fit(self, fragment, reference_sites, control_point_indices=None):
   """ fits given fragment to given sites, if control_points indices
   are not given - all points are fit, otherwise only control points
   are fit and the result is propagated to the rest of the fragment
   coordinates. returns coordinates of the trasformed fragment
   """
   if not control_point_indices:
     control_point_indices = range(0, len(fragment))
   to_fit = [
     (fragment[i].x, fragment[i].y, 0) for i in control_point_indices]
   lsf = superpose.least_squares_fit(
     flex.vec3_double(reference_sites), flex.vec3_double(to_fit))
   to_fit = flex.vec3_double([(i.x, i.y, 0) for i in fragment])
   return lsf.r.elems * to_fit + lsf.t.elems
Example #23
0
 def __init__(self, group, sites_cart):
     self.group = group
     self.matrices = []
     self.rms = []
     for pair in self.group.selection_pairs:
         superposition = superpose.least_squares_fit(
             reference_sites=sites_cart.select(pair[0]),
             other_sites=sites_cart.select(pair[1]))
         rtmx = matrix.rt((superposition.r, superposition.t))
         self.matrices.append(rtmx)
         x = sites_cart.select(pair[0])
         y = rtmx * sites_cart.select(pair[1])
         d_sq = (x - y).dot()
         self.rms.append(flex.mean(d_sq)**0.5)
Example #24
0
def get_match_rmsd(ph, match):
    assert len(ph.models()) == 1
    [ch_a_id, ch_b_id, list_a, list_b, res_list_a, res_list_b,
     similarity] = match
    sel_list_extended_a = [x for y in list_a for x in y]
    sel_list_extended_b = [x for y in list_b for x in y]
    sel_list_extended_a.sort()
    sel_list_extended_b.sort()

    if len(sel_list_extended_a) == 0 or len(sel_list_extended_b) == 0:
        # e.g. 3liy (whole chain in AC)
        return None, None, None, None, None
    #
    # attempt to avoid selection of huge model
    # This is absolutely necessary for models of size > ~ 50 Mb in PDB format.
    # This brings runtime of this function alone for:
    # 3iyw ( 75 Mb)  88 -> 10 seconds. Total runtime  220 -> 160s.
    # 5vu2 (150 Mb) 506 -> 22 seconds. Total runtime 1067 -> 573s.
    # As one can easily see, now runtime of this function is ~N,
    # where N - size of molecule.
    # More shocking results should be expected for
    # even larger molecules (1.2Gb is currently the max).
    # At this point no hierarchy selections left in this module.
    #
    other_h = my_selection(ph, ch_a_id, sel_list_extended_a)
    ref_h = my_selection(ph, ch_b_id, sel_list_extended_b)
    #
    other_atoms = other_h.atoms()
    ref_atoms = ref_h.atoms()
    #
    # Here we want to flip atom names, even before chain alignment, so
    # we will get correct chain RMSD
    flipped_other_selection = make_flips_if_necessary_torsion(
        ref_h.deep_copy(), other_h.deep_copy())
    # if flipped_other_selection is not None:
    other_sites = other_atoms.select(flipped_other_selection).extract_xyz()
    # else:
    #   other_sites = other_atoms.extract_xyz()
    ref_sites = ref_atoms.extract_xyz()
    lsq_fit_obj = superpose.least_squares_fit(reference_sites=ref_sites,
                                              other_sites=other_sites)
    r = lsq_fit_obj.r
    t = lsq_fit_obj.t
    # todo: find r_2*A = r*A + t (where the translation is zero)
    # use B = r*A + t, r_2*A = B , r_2 = B*A.inverse()
    other_sites_best = lsq_fit_obj.other_sites_best_fit()
    rmsd = round(ref_sites.rms_difference(other_sites_best), 4)
    # print "chain rmsd after flip:", rmsd
    return rmsd, ref_sites, other_sites_best, r, t
Example #25
0
def align_chains_flexible(chn_mov, chn_ref, altlocs=['','A'], cutoff_radius=15):
    """
    Take two chains and perform flexible alignment on them.
    Only alternate conformations supplied in (e.g. altlocs=['','A']) will be used for alignment (maximum one conformer).
    Residues are removed that do not contain a full set of backbone atoms (N,CA,C,O) for the conformers selected (e.g. altlocs=['','A'])
    Chains will be truncated so that the chains contain an "aligned" set of residues (currently sequence-identical)

    returns LocalAlignment
    """

    # Trim both chains to residues with complete backbones
    chn_mov_cb = complete_backbone(chn_mov, altlocs=altlocs)
    chn_ref_cb = complete_backbone(chn_ref, altlocs=altlocs)
    # Trim both chains to the same set of residues
    chn_ref_cr, chn_mov_cr = common_residues(chn_ref_cb, chn_mov_cb)
    # Create new hierarchies to perform most processing
    h_mov = iotbx.pdb.hierarchy.new_hierarchy_from_chain(chn_mov_cr); h_mov.sort_atoms_in_place();
    h_ref = iotbx.pdb.hierarchy.new_hierarchy_from_chain(chn_ref_cr); h_ref.sort_atoms_in_place();
    # Extract new processed chain objects
    c_mov = h_mov.only_chain()
    c_ref = h_ref.only_chain()
    # Check that the chains contain the same atoms
    c_mov.atoms().extract_element() == c_ref.atoms().extract_element(), 'chn_mov and chn_ref must contain the same atoms'
    c_mov.atoms().extract_name()    == c_ref.atoms().extract_name(),    'chn_mov and chn_ref must contain the same atoms'

    # List of output alignments and alignment sites
    o_rts = []; o_xyz = []; r_xyz = []

    # Extract xyz coords
    xyz_mov = c_mov.atoms().extract_xyz()
    xyz_ref = c_ref.atoms().extract_xyz()

    # Iterate through and create an alignment for each C-alpha
    for rg_mov in c_mov.residue_groups():
        # Find the atoms near the C-alpha
        ca_atm = extract_atom(residue=rg_mov.conformers()[0].only_residue(), atom='CA')
        nr_sel = nearby_coords_bool(query=ca_atm.xyz, coords=xyz_mov, cutoff=cutoff_radius)
        # Select the sites from both chains
        xyz_mov_sel = xyz_mov.select(nr_sel)
        xyz_ref_sel = xyz_ref.select(nr_sel)
        # Calculate the alignment for this residue
        rt_atm = superpose.least_squares_fit(reference_sites=xyz_ref_sel, other_sites=xyz_mov_sel).rt()
        # Save the rotation matrix and the coordinates of the c-alpha
        o_xyz.append(ca_atm.xyz)
        o_rts.append(rt_atm)
        r_xyz.append(xyz_ref_sel.select(((xyz_mov_sel-ca_atm.xyz).dot() == 0.0))[0])
    # Return LocalAlignment object
    return LocalAlignment(alignments=o_rts, alignment_sites=o_xyz, reference_sites=r_xyz)
Example #26
0
def evaluate_backrub_pair_impl (
    calphas_A,
    calphas_B,
    labels=(),
    max_calpha_sep=5.0,
    rmsd_limit=0.1,
    backrub_angle_limit=10.0) : # FIXME is this an appropriate cutoff?
  assert (len(calphas_A) == len(calphas_B) == 5)
  if (None in calphas_A) or (None in calphas_B) :
    return None
  for k_res in range(0, 4) :
    dist = calphas_A[k_res].distance(calphas_A[k_res+1])
    if (dist > max_calpha_sep) :
      return None
  from scitbx.array_family import flex
  from scitbx.math import superpose
  from scitbx.matrix import col
  import scitbx.math
  sites_A = flex.vec3_double([ calphas_A[k].xyz for k in [0,1,3,4] ])
  sites_B = flex.vec3_double([ calphas_B[k].xyz for k in [0,1,3,4] ])
  lsq_fit = superpose.least_squares_fit(
    reference_sites=sites_A,
    other_sites=sites_B)
  sites_B_new = lsq_fit.other_sites_best_fit()
  rmsd = sites_B_new.rms_difference(sites_A)
  ca2 = (col(sites_A[1]) + col(sites_B_new[1])) / 2
  ca3r = col(calphas_A[2].xyz)
  ca3m = lsq_fit.rt() * calphas_B[2].xyz
  ca4 = (col(sites_A[2]) + col(sites_B_new[2])) / 2
  backrub_angle = scitbx.math.dihedral_angle(
    sites=[ca3r.elems, ca2.elems, ca4.elems, ca3m.elems],
    deg=True)
  if ((rmsd <= rmsd_limit) and
      (abs(backrub_angle) >= backrub_angle_limit)) :
    if (len(labels) == 0) :
      labels = (calphas_A[2].fetch_labels().altloc,
                calphas_B[2].fetch_labels().altloc)
    return backrub_residue(
      calpha=calphas_A[2],
      i_mod=labels[0],
      j_mod=labels[1],
      rmsd=rmsd,
      backrub_angle=backrub_angle)
  return None
Example #27
0
def align_chains_rigid(mov_chain, ref_chain):
    """Takes two chains and aligns them - return rt_mx"""

    mov_seq, mov_sites, mov_flags = extract_sites_for_alignment(mov_chain)
    ref_seq, ref_sites, ref_flags = extract_sites_for_alignment(ref_chain)

    align_obj = mmtbx.alignment.align(
        seq_a=ref_seq,
        seq_b=mov_seq,
        gap_opening_penalty = 20,
        gap_extension_penalty = 2,
        similarity_function = 'blosum50',
        style = 'local')

    # Extract the alignment
    alignment = align_obj.extract_alignment()
    # List of matches - '|' for exact match, '*' for good match
    matches = alignment.matches()
    equal = matches.count("|")
    similar = matches.count("*")
    total = len(alignment.a) - alignment.a.count("-")
    alignment.pretty_print(
        matches=matches,
        block_size=50,
        n_block=1,
        top_name="fixed",
        bottom_name="moving")

    # Create list of selected sites
    ref_sites_sel = flex.vec3_double()
    mov_sites_sel = flex.vec3_double()
    for ia,ib,m in zip(alignment.i_seqs_a, alignment.i_seqs_b, matches):
        if (m not in ["|", "*"]): continue
        # Check that the sites are flagged to be used
        if (ref_flags[ia] and mov_flags[ib]):
            # Append sites to list to align
            ref_sites_sel.append(ref_sites[ia])
            mov_sites_sel.append(mov_sites[ib])

    if (ref_sites_sel.size() == 0):
      raise Exception("No matching C-alpha atoms.")

    lsq_rt = superpose.least_squares_fit(reference_sites=ref_sites_sel, other_sites=mov_sites_sel).rt()
    return lsq_rt, mov_sites_sel, ref_sites_sel
Example #28
0
    def recalculate_ncs_transforms(self, asu_site_cart):
        """
    Re-evaluate the rotation and translation in the ncs groups list, base on
    the ncs groups selection and the atoms location.
    Updates self.

    Args:
      asu_site_cart (flex.vec_3): the complete ASU sites cart (coordinates)
    """
        for gr in self:
            m_sel = gr.master_iselection
            for cp in gr.copies:
                c_sel = cp.iselection
                # other_sites are the master, reference_sites are the copies
                lsq_fit_obj = superpose.least_squares_fit(
                    reference_sites=asu_site_cart.select(c_sel),
                    other_sites=asu_site_cart.select(m_sel))
                cp.r = lsq_fit_obj.r
                cp.t = lsq_fit_obj.t
def recalculate_ncs_transforms(ncs_restraints_group_list,asu_site_cart):
  """
  Re-evaluate the rotation and translation in the ncs groups list, base on
  the ncs groups selection and the atoms location.
  Updates the ncs_restraints_group_list object

  Args:
    ncs_restraints_group_list: list of ncs restraints group objects
    asu_site_cart (flex.vec_3): the complete ASU sites cart (coordinates)
  """
  for gr in ncs_restraints_group_list:
    m_sel = gr.master_iselection
    for cp in gr.copies:
      c_sel = cp.iselection
      # other_sites are the master, reference_sites are the copies
      lsq_fit_obj = superpose.least_squares_fit(
          reference_sites = asu_site_cart.select(c_sel),
          other_sites     = asu_site_cart.select(m_sel))
      cp.r = lsq_fit_obj.r
      cp.t = lsq_fit_obj.t
Example #30
0
def evaluate_backrub_pair_impl(
        calphas_A,
        calphas_B,
        labels=(),
        max_calpha_sep=5.0,
        rmsd_limit=0.1,
        backrub_angle_limit=10.0):  # FIXME is this an appropriate cutoff?
    assert (len(calphas_A) == len(calphas_B) == 5)
    if (None in calphas_A) or (None in calphas_B):
        return None
    for k_res in range(0, 4):
        dist = calphas_A[k_res].distance(calphas_A[k_res + 1])
        if (dist > max_calpha_sep):
            return None
    from scitbx.array_family import flex
    from scitbx.math import superpose
    from scitbx.matrix import col
    import scitbx.math
    sites_A = flex.vec3_double([calphas_A[k].xyz for k in [0, 1, 3, 4]])
    sites_B = flex.vec3_double([calphas_B[k].xyz for k in [0, 1, 3, 4]])
    lsq_fit = superpose.least_squares_fit(reference_sites=sites_A,
                                          other_sites=sites_B)
    sites_B_new = lsq_fit.other_sites_best_fit()
    rmsd = sites_B_new.rms_difference(sites_A)
    ca2 = (col(sites_A[1]) + col(sites_B_new[1])) / 2
    ca3r = col(calphas_A[2].xyz)
    ca3m = lsq_fit.rt() * calphas_B[2].xyz
    ca4 = (col(sites_A[2]) + col(sites_B_new[2])) / 2
    backrub_angle = scitbx.math.dihedral_angle(
        sites=[ca3r.elems, ca2.elems, ca4.elems, ca3m.elems], deg=True)
    if ((rmsd <= rmsd_limit) and (abs(backrub_angle) >= backrub_angle_limit)):
        if (len(labels) == 0):
            labels = (calphas_A[2].fetch_labels().altloc,
                      calphas_B[2].fetch_labels().altloc)
        return backrub_residue(calpha=calphas_A[2],
                               i_mod=labels[0],
                               j_mod=labels[1],
                               rmsd=rmsd,
                               backrub_angle=backrub_angle)
    return None
Example #31
0
def get_match_rmsd(ph, match):
  assert len(ph.models()) == 1
  [ch_a_id,ch_b_id,list_a,list_b,res_list_a,res_list_b,similarity] = match
  # print "Cleaning chains", ch_a_id, ch_b_id, similarity,
  t0 = time()
  sel_a = make_selection_from_lists(list_a)
  sel_b = make_selection_from_lists(list_b)
  # print "debug: lista, listb", list_a, list_b
  if sel_a.size() == 0 or sel_b.size() == 0:
    # e.g. 3liy (whole chain in AC)
    return None, None, None, None, None

  other_h = ph.select(sel_a)
  other_atoms = other_h.atoms()
  ref_h = ph.select(sel_b)
  ref_atoms = ref_h.atoms()
  #
  # Here we want to flip atom names, even before chain alignment, so
  # we will get correct chain RMSD

  # flipped_other_selection = make_flips_if_necessary(ref_h.deep_copy(), other_h.deep_copy())
  flipped_other_selection = make_flips_if_necessary_torsion(
      ref_h.deep_copy(), other_h.deep_copy())
  # if flipped_other_selection is not None:
  other_sites = other_atoms.select(flipped_other_selection).extract_xyz()
  # else:
  #   other_sites = other_atoms.extract_xyz()
  ref_sites = ref_atoms.extract_xyz()
  lsq_fit_obj = superpose.least_squares_fit(
    reference_sites = ref_sites,
    other_sites     = other_sites)
  r = lsq_fit_obj.r
  t = lsq_fit_obj.t
  # todo: find r_2*A = r*A + t (where the translation is zero)
  # use B = r*A + t, r_2*A = B , r_2 = B*A.inverse()
  other_sites_best = lsq_fit_obj.other_sites_best_fit()
  rmsd = round(ref_sites.rms_difference(other_sites_best),4)
  # print "chain rmsd after flip:", rmsd
  return rmsd, ref_sites, other_sites_best, r,t
Example #32
0
def get_r_t_matrices_from_structure(pdb_str):
  """ Return rotation and translation matrices for the ideal structure.

  The function determines r and t matrices from alingment of 1st and 2nd
  residues of the structure passed in pdb_str.
  """
  pdb_hierarchy = iotbx.pdb.input(source_info=None, lines=pdb_str).\
    construct_hierarchy()
  conformer = pdb_hierarchy.models()[0].chains()[0].conformers()[0]
  residues = conformer.residues()
  fixed_sites = flex.vec3_double()
  moving_sites = flex.vec3_double()
  main_chain_atoms = ["N","CA","C","O"]
  if len(residues)>=2:
    for (r, arr) in [(residues[0], fixed_sites), (residues[1], moving_sites)]:
      for a in r.atoms():
        if a.name.strip() in main_chain_atoms:
          arr.append(a.xyz)
  else:
    raise Sorry('pdb_str should contain at least 2 residues')
  lsq_fit_obj = superpose.least_squares_fit(reference_sites = moving_sites,
                                            other_sites = fixed_sites)
  return lsq_fit_obj.r, lsq_fit_obj.t
Example #33
0
def get_r_t_matrices_from_structure(pdb_str):
  """ Return rotation and translation matrices for the ideal structure.

  The function determines r and t matrices from alingment of 1st and 2nd
  residues of the structure passed in pdb_str.
  """
  pdb_hierarchy = iotbx.pdb.input(source_info=None, lines=pdb_str).\
    construct_hierarchy()
  conformer = pdb_hierarchy.models()[0].chains()[0].conformers()[0]
  residues = conformer.residues()
  fixed_sites = flex.vec3_double()
  moving_sites = flex.vec3_double()
  main_chain_atoms = ["N","CA","C","O"]
  if len(residues)>=2:
    for (r, arr) in [(residues[0], fixed_sites), (residues[1], moving_sites)]:
      for a in r.atoms():
        if a.name.strip() in main_chain_atoms:
          arr.append(a.xyz)
  else:
    raise Sorry('pdb_str should contain at least 2 residues')
  lsq_fit_obj = superpose.least_squares_fit(reference_sites = moving_sites,
                                            other_sites = fixed_sites)
  return lsq_fit_obj.r, lsq_fit_obj.t
Example #34
0
def my_get_rot_trans(
    ph,
    master_selection,
    copy_selection):
  """
  Get rotation and translation using superpose.

  This function is used only when phil parameters are provided. In this case
  we require the selection of NCS master and copies to be correct.
  Correct means:
    1) residue sequence in master and copies is exactly the same
    2) the number of atoms in master and copies is exactly the same

  One can get exact selection strings by ncs_object.show(verbose=True)

  Args:
    ph : hierarchy
    master/copy_selection: master and copy iselections
  """

  atoms = ph.atoms()
  # master
  other_sites = atoms.select(master_selection).extract_xyz()
  # copy
  ref_sites = atoms.select(copy_selection).extract_xyz()
  assert other_sites.size() == ref_sites.size(), "%d, %d" % (
      other_sites.size(), ref_sites.size())
  if ref_sites.size() > 0:
    lsq_fit_obj = superpose.least_squares_fit(
        reference_sites = ref_sites,
        other_sites     = other_sites)
    r = lsq_fit_obj.r
    t = lsq_fit_obj.t
    rmsd = ref_sites.rms_difference(lsq_fit_obj.other_sites_best_fit())
    return r,t,rmsd
  else:
    return None, None, None
Example #35
0
 def align_model(self, i_model):
   from scitbx.array_family import flex
   from scitbx.math import superpose
   hierarchy_moving = self.related_chains[i_model].pdb_hierarchy
   mov_atoms = hierarchy_moving.atoms()
   mov_atoms.reset_i_seq()
   sel_cache = hierarchy_moving.atom_selection_cache()
   mov_atom_selection = sel_cache.selection(self.atom_selection_string)
   mov_chain = hierarchy_moving.only_model().only_chain()
   sel_ref = flex.size_t()
   sel_mov = flex.size_t()
   for residue_group in mov_chain.residue_groups():
     for atom in residue_group.only_atom_group().atoms():
       if (not mov_atom_selection[atom.i_seq]):
         continue
       resid = residue_group.resid()
       ref_name = "%s %s" % (resid, atom.name.strip())
       if (ref_name in self.atoms_ref):
         sel_mov.append(atom.i_seq)
         sel_ref.append(self.atoms_ref.index(ref_name))
   if (len(sel_ref) == 0):
     assert (self.atom_selection_string is not None)
     return None
   assert (len(sel_ref) > 0) and (len(sel_ref) == len(sel_mov))
   xyz_mov = mov_atoms.extract_xyz()
   sites_mov = xyz_mov.select(sel_mov)
   sites_ref = self.reference_sites.select(sel_ref)
   if (self.sieve_fit):
     return superpose.sieve_fit(
       sites_fixed=sites_ref,
       sites_moving=sites_mov,
       frac_discard=self.frac_discard)
   else :
     return superpose.least_squares_fit(
       reference_sites=sites_ref,
       other_sites=sites_mov)
Example #36
0
 def __init__ (self, all_sites_cart, lsq_fits, buffer=10.0, log=sys.stdout) :
   fitted_sites = []
   original_sites = []
   minima = flex.vec3_double()
   for sites_cart, lsq_fit in zip(all_sites_cart, lsq_fits) :
     fitted_sites.append(sites_cart.deep_copy())
     minima.append(sites_cart.min())
     if lsq_fit is None :
       original_sites.append(sites_cart.deep_copy())
     else :
       old_sites = lsq_fit.r.inverse().elems * (sites_cart - lsq_fit.t.elems)
       original_sites.append(old_sites)
   xyz_min = minima.min()
   dxyz = (buffer - xyz_min[0], buffer - xyz_min[1], buffer - xyz_min[2])
   self.shifted_sites = []
   self.transformation_matrices = []
   for i, sites_cart in enumerate(fitted_sites) :
     new_sites_cart = sites_cart + dxyz
     #print new_sites_cart.min()
     self.shifted_sites.append(new_sites_cart)
     lsq_fit = superpose.least_squares_fit(
       reference_sites=new_sites_cart,
       other_sites=original_sites[i])
     self.transformation_matrices.append(lsq_fit.rt())
Example #37
0
 def align_model (self, i_model) :
   from scitbx.array_family import flex
   from scitbx.math import superpose
   hierarchy_moving = self.related_chains[i_model].pdb_hierarchy
   mov_atoms = hierarchy_moving.atoms()
   mov_atoms.reset_i_seq()
   sel_cache = hierarchy_moving.atom_selection_cache()
   mov_atom_selection = sel_cache.selection(self.atom_selection_string)
   mov_chain = hierarchy_moving.only_model().only_chain()
   sel_ref = flex.size_t()
   sel_mov = flex.size_t()
   for residue_group in mov_chain.residue_groups() :
     for atom in residue_group.only_atom_group().atoms() :
       if (not mov_atom_selection[atom.i_seq]) :
         continue
       resid = residue_group.resid()
       ref_name = "%s %s" % (resid, atom.name.strip())
       if (ref_name in self.atoms_ref) :
         sel_mov.append(atom.i_seq)
         sel_ref.append(self.atoms_ref.index(ref_name))
   if (len(sel_ref) == 0) :
     assert (self.atom_selection_string is not None)
     return None
   assert (len(sel_ref) > 0) and (len(sel_ref) == len(sel_mov))
   xyz_mov = mov_atoms.extract_xyz()
   sites_mov = xyz_mov.select(sel_mov)
   sites_ref = self.reference_sites.select(sel_ref)
   if (self.sieve_fit) :
     return superpose.sieve_fit(
       sites_fixed=sites_ref,
       sites_moving=sites_mov,
       frac_discard=self.frac_discard)
   else :
     return superpose.least_squares_fit(
       reference_sites=sites_ref,
       other_sites=sites_mov)
Example #38
0
def find_ncs_operators (pdb_hierarchy, max_rmsd=2.0, try_sieve_fit=True,
    log=None) :
  """
  Determines all possible NCS transformation matrices for the input structure,
  based on sequence alignemnt and simple C-alpha superposition.  There may be
  multiple sets of operators but these will eventually become a flat list.

  :param max_rmsd: maximum allowable RMSD between NCS-related chains for use
    in ligand superposition
  :param try_sieve_fit: also perform a sieve fit between chains and use the
    resulting operator if the RMSD is lower than the global fit
  :param log: filehandle-like object
  :returns: list of lists of group_operators objects
  """
  import iotbx.ncs
  from scitbx.math import superpose
  from scitbx.array_family import flex
  ncs_obj = iotbx.ncs.input(hierarchy=pdb_hierarchy)
  ncs_groups = []
  for k,v in ncs_obj.ncs_to_asu_selection.iteritems():
    ncs_groups.append([k]+v)
  if (len(ncs_groups) == 0) :
    raise Sorry("No NCS present in the input model.")
  for k, group in enumerate(ncs_groups) :
    print >> log, "Group %d:" % (k+1)
    for sele in group :
      print >> log, "  %s" % sele
  selection_cache = pdb_hierarchy.atom_selection_cache()
  pdb_atoms = pdb_hierarchy.atoms()
  sites_cart = pdb_atoms.extract_xyz()
  operators = []
  def get_selection (sele_str) :
    sele_str = "(%s) and name CA and (altloc ' ' or altloc A)" % sele_str
    return selection_cache.selection(sele_str).iselection()
  for restraint_group in ncs_groups :
    group_ops = []
    assert (len(restraint_group) >= 2)
    # XXX This is currently an all-vs-all loop, which means that each
    # NCS relationship will be calculated (and stored) twice.  Need to figure
    # out whether this actually matters in practice.
    for j, sele_str in enumerate(restraint_group) :
      sele_j = get_selection(sele_str)
      group = group_operators(sele_j, sele_str, sites_cart)
      assert (len(sele_j) > 0)
      calpha_ids = []
      for i_seq in sele_j :
        resid = resid_str(pdb_atoms[i_seq])
        if (not resid in calpha_ids) :
          calpha_ids.append(resid)
      for k, sele_str_k in enumerate(restraint_group) :
        if (k == j) : continue
        sele_k = get_selection(sele_str_k)
        group_sele = flex.size_t()
        group_ids = set([])
        assert (len(sele_k) > 0)
        # poor man's sequence alignment
        for i_seq in sele_k :
          id_str = resid_str(pdb_atoms[i_seq])
          if (id_str in group_ids) :
            continue
          group_ids.add(id_str)
          if (id_str in calpha_ids) :
            group_sele.append(i_seq)
        first_sele_copy = flex.size_t() #first_sele.deep_copy()
        delete_indices = []
        for i_seq, id_str in zip(sele_j, calpha_ids) :
          if (id_str in group_ids) :
            first_sele_copy.append(i_seq)
        assert (len(first_sele_copy) == len(group_sele))
        assert (len(group_sele) > 0)
        sites_ref = sites_cart.select(first_sele_copy)
        sites_group = sites_cart.select(group_sele).deep_copy()
        lsq_fit = superpose.least_squares_fit(
          reference_sites=sites_ref,
          other_sites=sites_group)
        sites_fit = lsq_fit.r.elems * sites_group + lsq_fit.t.elems
        rmsd = sites_ref.rms_difference(sites_fit)
        if (try_sieve_fit) :
          lsq_fit_2 = superpose.sieve_fit(
            sites_fixed=sites_ref,
            sites_moving=sites_group,
            frac_discard=0.25)
          sites_fit_2 = lsq_fit_2.r.elems * sites_group + lsq_fit_2.t.elems
          rmsd_2 = sites_ref.rms_difference(sites_fit)
          if (rmsd_2 < rmsd) :
            print >> log, "  using sieve fit (RMSD = %.3f, RMSD(all) = %.3f)" %\
              (rmsd_2, rmsd)
            lsq_fit = lsq_fit_2
            rmsd = rmsd_2
        print >> log, "  %d versus %d RMSD = %.3f" % (j+1, k+1, rmsd)
        if (rmsd <= max_rmsd) :
          group.add_operator(lsq_fit.rt().inverse(), sele_str_k)
        else :
          print >> log, "  exceeds cutoff, will not use this operator"
      group_ops.append(group)
    operators.append(group_ops)
  return operators
Example #39
0
File: rigid.py Project: dials/cctbx
 def add_to(self, reparametrisation):
   if not self.fix_u and not self.fix_xyz:
     return
   scatterers = reparametrisation.structure.scatterers()
   ref_sites = []
   ref_u_isos = []
   ref_u_stars = []
   ref_adps = []
   src_crds = []
   inv_src_crds = []
   uc = reparametrisation.structure.unit_cell()
   for i in self.groups[0]:
     src_crds.append(uc.orthogonalize(scatterers[i].site))
     if self.fix_xyz:
       ref_sites.append(reparametrisation.add_new_site_parameter(i))
     if self.fix_u:
       if scatterers[i].flags.use_u_iso():
         ref_u_isos.append(
           reparametrisation.add_new_thermal_displacement_parameter(i))
       else:
         ref_u_stars.append(
           reparametrisation.add_new_thermal_displacement_parameter(i))
   for g in self.groups[1:]:
     if len(g) != len(self.groups[0]):
       raise InvalidConstraint("Group size mismatch")
     g_scatterers = []
     g_u_iso_scatterers  =[]
     g_u_star_scatterers = []
     crds = []
     for idx, i in enumerate(g):
       if scatterers[i].flags.use_u_iso() !=\
          scatterers[self.groups[0][idx]].flags.use_u_iso():
         raise InvalidConstraint("Mixing isotropic and anisotropic parameters")
       g_scatterers.append(scatterers[i])
       crds.append(uc.orthogonalize(scatterers[i].site))
       if scatterers[i].flags.use_u_iso():
         g_u_iso_scatterers.append(scatterers[i])
       else:
         g_u_star_scatterers.append(scatterers[i])
     #need to map reference to target
     lsf = superpose.least_squares_fit(
       flex.vec3_double(crds), flex.vec3_double(src_crds))
     #create a list of inverted coordinates if needed
     if len(inv_src_crds) == 0:
       for i in range(0, len(g)):
         inv_src_crds.append(
           2*matrix.col(lsf.other_shift)-matrix.col(src_crds[i]))
     rm = lsf.r
     t = matrix.col(lsf.reference_shift)-matrix.col(lsf.other_shift)
     new_crd = lsf.other_sites_best_fit()
     d = 0
     for i, c in enumerate(new_crd):
       d += matrix.col(matrix.col(c)-matrix.col(crds[i])).length_sq()
     lsf = superpose.least_squares_fit(
       flex.vec3_double(crds), flex.vec3_double(inv_src_crds))
     new_crd = lsf.other_sites_best_fit()
     d_inv = 0
     for i, c in enumerate(new_crd):
       d_inv += matrix.col(matrix.col(c)-matrix.col(crds[i])).length_sq()
     if d_inv < d:
       rm = -lsf.r
     if self.fix_xyz:
       shifts_and_angles =\
         reparametrisation.add(_.independent_small_6_vector_parameter,
                               value=(t[0],t[1],t[2],0,0,0), variable=True)
       if len(ref_u_stars) > 0:
         u_star_param = reparametrisation.add(
           _.same_group_u_star,
           scatterers=g_u_star_scatterers,
           u_stars=ref_u_stars,
           alignment_matrix=rm,
           shifts_and_angles=shifts_and_angles
         )
     elif len(ref_u_stars) > 0:
       angles =\
         reparametrisation.add(_.independent_small_3_vector_parameter,
                               value=self.angles, variable=True)
       u_star_param = reparametrisation.add(
         _.same_group_u_star,
         scatterers=g_u_star_scatterers,
         u_stars=ref_u_stars,
         alignment_matrix=rm,
         angles=angles
       )
     if self.fix_xyz:
       site_param = reparametrisation.add(
         _.same_group_xyz,
         scatterers=g_scatterers,
         sites=ref_sites,
         alignment_matrix=rm,
         shifts_and_angles=shifts_and_angles
       )
     if len(ref_u_isos) > 0:
       u_iso_param = reparametrisation.add(
         _.same_group_u_iso,
         scatterers=g_u_iso_scatterers,
         u_isos=ref_u_isos
       )
     site_proxy_index = 0
     u_star_proxy_index = 0
     u_iso_proxy_index = 0
     for i in g:
       if self.fix_xyz:
         reparametrisation.asu_scatterer_parameters[i].site = site_param
         reparametrisation.add_new_same_group_site_proxy_parameter(
           site_param, site_proxy_index, i)
         site_proxy_index += 1
       if self.fix_u:
         if scatterers[i].flags.use_u_iso():
           reparametrisation.asu_scatterer_parameters[i].u = u_iso_param
           reparametrisation.shared_Us[i] = reparametrisation.add(
             _.same_group_u_iso_proxy,
             parent=u_iso_param,
             index=u_iso_proxy_index
             )
           u_iso_proxy_index += 1
         else:
           reparametrisation.asu_scatterer_parameters[i].u = u_star_param
           reparametrisation.shared_Us[i] = reparametrisation.add(
             _.same_group_u_star_proxy,
             parent=u_star_param,
             index=u_star_proxy_index
             )
           u_star_proxy_index += 1
Example #40
0
def get_rot_trans(ph,
                  master_selection,
                  copy_selection,
                  chain_max_rmsd=0.02):
  """
  Get rotation and translation using superpose.

  This function is used only when phil parameters are provided. In this case
  we require the selection of NCS master and copies to be correct.
  Correct means:
    1) residue sequence in master and copies is exactly the same
    2) the number of atoms in master and copies is exactly the same

  One can get exact selection strings by ncs_object.show(verbose=True)

  Args:
    ph : pdb.hierarchy
    master/copy_selection (str): master and copy selection strings
    chain_max_rmsd (float): limit of rms difference between chains to be considered
      as copies

  Returns:
    r: rotation matrix
    t: translation vector
    rmsd (float): RMSD between master and copy
    msg (str): error messages
  """
  msg = ''
  r_zero = matrix.sqr([0]*9)
  t_zero = matrix.col([0,0,0])
  #
  if ph:
    cache = ph.atom_selection_cache().selection
    master_ncs_ph = ph.select(cache(master_selection))
    ncs_copy_ph = ph.select(cache(copy_selection))
    seq_m,res_ids_m  = get_residue_sequence(master_ncs_ph)
    seq_c,res_ids_c = get_residue_sequence(ncs_copy_ph)
    res_sel_m, res_sel_c, similarity = mmtbx_res_alignment(
        seq_m, seq_c, min_percent=0)
    # res_sel_m, res_sel_c, similarity = res_alignment(
    #   seq_a=seq_m,seq_b=seq_c,
    #   min_contig_length=0,min_percent=0)
    m_atoms = master_ncs_ph.atoms()
    c_atoms = ncs_copy_ph.atoms()
    # Check that master and copy are identical
    if (similarity != 1) or (m_atoms.size() != c_atoms.size()) :
      return r_zero,t_zero,0,'Master and Copy selection do not exactly match'
    # master
    other_sites = m_atoms.extract_xyz()
    # copy
    ref_sites = c_atoms.extract_xyz()
    if ref_sites.size() > 0:
      lsq_fit_obj = superpose.least_squares_fit(
          reference_sites = ref_sites,
          other_sites     = other_sites)
      r = lsq_fit_obj.r
      t = lsq_fit_obj.t
      rmsd = ref_sites.rms_difference(lsq_fit_obj.other_sites_best_fit())
      if rmsd > chain_max_rmsd:
        return r_zero,t_zero,0,msg
    else:
      return r_zero,t_zero,0,'No sites to compare.\n'
    return r,t,round(rmsd,4),msg
  else:
    return r_zero,t_zero,0,msg
Example #41
0
 def __init__(self,
              pdb_hierarchy,
              crystal_symmetry,
              angular_difference_threshold_deg=5.,
              sequence_identity_threshold=90.):
   h = pdb_hierarchy
   superposition_threshold = 2*sequence_identity_threshold - 100.
   n_atoms_all = h.atoms_size()
   s_str = "altloc ' ' and (protein or nucleotide)"
   h = h.select(h.atom_selection_cache().selection(s_str))
   h1 = iotbx.pdb.hierarchy.root()
   h1.append_model(h.models()[0].detached_copy())
   unit_cell = crystal_symmetry.unit_cell()
   result = {}
   print "Find groups of chains related by translational NCS"
   # double loop over chains to find matching pairs related by pure translation
   for c1 in h1.chains():
     c1.parent().remove_chain(c1)
     nchains = len(h1.models()[0].chains())
     if([c1.is_protein(), c1.is_na()].count(True)==0): continue
     r1 = list(c1.residues())
     c1_seq = "".join(c1.as_sequence())
     sc_1_tmp = c1.atoms().extract_xyz()
     h1_p1 = h1.expand_to_p1(crystal_symmetry=crystal_symmetry)
     for (ii,c2) in enumerate(h1_p1.chains()):
       orig_c2 = h1.models()[0].chains()[ii%nchains]
       r2 = list(c2.residues())
       c2_seq = "".join(c2.as_sequence())
       sites_cart_1, sites_cart_2 = None,None
       sc_2_tmp = c2.atoms().extract_xyz()
       # chains are identical
       if(c1_seq==c2_seq and sc_1_tmp.size()==sc_2_tmp.size()):
         sites_cart_1 = sc_1_tmp
         sites_cart_2 = sc_2_tmp
         p_identity = 100.
       # chains are not identical, do alignment
       else:
         align_obj = mmtbx.alignment.align(seq_a = c1_seq, seq_b = c2_seq)
         alignment = align_obj.extract_alignment()
         matches = alignment.matches()
         equal = matches.count("|")
         total = len(alignment.a) - alignment.a.count("-")
         p_identity = 100.*equal/max(1,total)
         if(p_identity>superposition_threshold):
           sites_cart_1 = flex.vec3_double()
           sites_cart_2 = flex.vec3_double()
           for i1, i2, match in zip(alignment.i_seqs_a, alignment.i_seqs_b,
                                    matches):
             if(i1 is not None and i2 is not None and match=="|"):
               r1i, r2i = r1[i1], r2[i2]
               assert r1i.resname==r2i.resname, [r1i.resname,r2i.resname,i1,i2]
               for a1 in r1i.atoms():
                 for a2 in r2i.atoms():
                   if(a1.name == a2.name):
                     sites_cart_1.append(a1.xyz)
                     sites_cart_2.append(a2.xyz)
                     break
       # superpose two sequence-aligned chains
       if([sites_cart_1,sites_cart_2].count(None)==0):
         lsq_fit_obj = superpose.least_squares_fit(
           reference_sites = sites_cart_1,
           other_sites     = sites_cart_2)
         angle = lsq_fit_obj.r.rotation_angle()
         t_frac = unit_cell.fractionalize((sites_cart_1-sites_cart_2).mean())
         t_frac = [math.modf(t)[0] for t in t_frac] # put into [-1,1]
         radius = flex.sum(flex.sqrt((sites_cart_1-
           sites_cart_1.mean()).dot()))/sites_cart_1.size()*4./3.
         fracscat = min(c1.atoms_size(),c2.atoms_size())/n_atoms_all
         result.setdefault( frozenset([c1,orig_c2]), [] ).append( [p_identity,[lsq_fit_obj.r, t_frac, angle, radius, fracscat]] )
       else:
         result.setdefault( frozenset([c1,orig_c2]), [] ).append( [p_identity,None] )
   # Build graph
   g = graph.adjacency_list()
   vertex_handle = {}
   for key in result:
     seqid = result[key][0][0]
     sup = min( result[key],key=lambda s:0 if s[1] is None else s[1][2])[1]
     result[key] = [seqid,sup]
     if ((seqid > sequence_identity_threshold) and (sup[2] < angular_difference_threshold_deg)):
       (c1,c2) = key
       if (c1 not in vertex_handle):
         vertex_handle[c1] = g.add_vertex(label=c1)
       if (c2 not in vertex_handle):
         vertex_handle[c2] = g.add_vertex(label=c2)
       g.add_edge(vertex1=vertex_handle[c1],vertex2=vertex_handle[c2])
   # Do connected component analysis and compose final tNCS pairs object
   components = connected_component_algorithm.connected_components(g)
   import itertools
   self.ncs_pairs = []
   for (i,group) in enumerate(components):
     chains = [g.vertex_label(vertex=v) for v in group]
     fracscats = []
     radii = []
     for pair in itertools.combinations(chains,2):
       sup = result[frozenset(pair)][1]
       fracscats.append(sup[-1])
       radii.append(sup[-2])
     fs = sum(fracscats)/len(fracscats)
     rad = sum(radii)/len(radii)
     for pair in itertools.combinations(chains,2):
       sup = result[frozenset(pair)][1]
       ncs_pair = ext.pair(
         r = sup[0],
         t = sup[1],
         radius = rad,
         radius_estimate = rad,
         fracscat = fs,
         rho_mn = flex.double(), # rho_mn undefined, needs to be set later
         id = i)
       self.ncs_pairs.append(ncs_pair)
       # show tNCS pairs in group
       fmt="group %d chains %s <> %s angle: %4.2f trans.vect.: (%s) fracscat: %5.3f"
       t = ",".join([("%6.3f"%t_).strip() for t_ in sup[1]]).strip()
       print fmt%(i, pair[0].id, pair[1].id, sup[2], t, fs)
Example #42
0
    def run(self):
        ''' Parse the options. '''
        # Parse the command line arguments
        params, options = self.parser.parse_args(show_diff_phil=True)

        reference_experiments = ExperimentListFactory.from_json_file(
            params.reference_experiments, check_format=False)
        if len(reference_experiments.detectors()) != 1:
            raise Sorry("Please ensure reference has only 1 detector model")
        reference = reference_experiments.detectors()[0]

        moving_experiments = ExperimentListFactory.from_json_file(
            params.moving_experiments, check_format=False)
        if len(moving_experiments.detectors()) != 1:
            raise Sorry("Please ensure moving has only 1 detector model")
        moving = moving_experiments.detectors()[0]

        # Get list of panels to compare
        if params.panel_list is None or len(params.panel_list) == 0:
            assert len(reference) == len(moving), "Detectors not same length"
            panel_ids = range(len(reference))
        else:
            max_p_id = max(params.panel_list)
            assert max_p_id < len(
                reference
            ), "Reference detector must be at least %d panels long given the panel list" % (
                max_p_id + 1)
            assert max_p_id < len(
                moving
            ), "Moving detector must be at least %d panels long given the panel list" % (
                max_p_id + 1)
            panel_ids = params.panel_list

        if params.fit_target == "centers":
            assert len(
                panel_ids
            ) >= 3, "When using centers as target for superpose, detector needs at least 3 panels"

        def rmsd_from_centers(a, b):
            assert len(a) == len(b)
            assert len(a) % 4 == len(b) % 4 == 0
            ca = flex.vec3_double()
            cb = flex.vec3_double()
            for i in xrange(len(a) // 4):
                ca.append(a[i:i + 4].mean())
                cb.append(b[i:i + 4].mean())
            return 1000 * math.sqrt((ca - cb).sum_sq() / len(ca))

        cycles = 0
        while True:
            cycles += 1

            # Treat panels as a list of 4 sites (corners) or 1 site (centers) for use with lsq superpose
            reference_sites = flex.vec3_double()
            moving_sites = flex.vec3_double()
            for panel_id in panel_ids:
                for detector, sites in zip([reference, moving],
                                           [reference_sites, moving_sites]):
                    panel = detector[panel_id]
                    size = panel.get_image_size()
                    corners = flex.vec3_double([
                        panel.get_pixel_lab_coord(point)
                        for point in [(0, 0), (0, size[1] -
                                               1), (size[0] - 1, size[1] -
                                                    1), (size[0] - 1, 0)]
                    ])
                    if params.fit_target == "corners":
                        sites.extend(corners)
                    elif params.fit_target == "centers":
                        sites.append(corners.mean())

            # Compute super position
            rmsd = 1000 * math.sqrt((reference_sites - moving_sites).sum_sq() /
                                    len(reference_sites))
            print("RMSD before fit: %.1f microns" % rmsd)
            if params.fit_target == "corners":
                rmsd = rmsd_from_centers(reference_sites, moving_sites)
                print("RMSD of centers before fit: %.1f microns" % rmsd)
            lsq = least_squares_fit(reference_sites, moving_sites)
            rmsd = 1000 * math.sqrt(
                (reference_sites - lsq.other_sites_best_fit()).sum_sq() /
                len(reference_sites))
            print("RMSD of fit: %.1f microns" % rmsd)
            if params.fit_target == "corners":
                rmsd = rmsd_from_centers(reference_sites,
                                         lsq.other_sites_best_fit())
                print("RMSD of fit of centers: %.1f microns" % rmsd)
            angle, axis = lsq.r.r3_rotation_matrix_as_unit_quaternion(
            ).unit_quaternion_as_axis_and_angle(deg=True)
            print(
                "Axis and angle of rotation: (%.3f, %.3f, %.3f), %.2f degrees"
                % (axis[0], axis[1], axis[2], angle))
            print("Translation (x, y, z, in microns): (%.3f, %.3f, %.3f)" %
                  (1000 * lsq.t).elems)

            # Apply the shifts
            if params.apply_at_hierarchy_level == None:
                iterable = moving
            else:
                iterable = iterate_detector_at_level(
                    moving.hierarchy(), level=params.apply_at_hierarchy_level)

            for group in iterable:
                fast = col(group.get_fast_axis())
                slow = col(group.get_slow_axis())
                ori = col(group.get_origin())

                group.set_frame(lsq.r * fast, lsq.r * slow,
                                (lsq.r * ori) + lsq.t)

                fast = col(group.get_fast_axis())
                slow = col(group.get_slow_axis())
                ori = col(group.get_origin())

            if not params.repeat_until_converged:
                break

            if approx_equal(angle, 0.0, out=None) and approx_equal(
                (1000 * lsq.t).length(), 0.0, out=None):
                print("Converged after", cycles, "cycles")
                break
            else:
                print("Movement not close to zero, repeating fit")
                print()

        from dxtbx.serialize import dump
        dump.experiment_list(moving_experiments, params.output_experiments)

        moved_sites = flex.vec3_double()
        for panel_id in panel_ids:
            panel = moving[panel_id]
            size = panel.get_image_size()
            corners = flex.vec3_double([
                panel.get_pixel_lab_coord(point)
                for point in [(0, 0), (0,
                                       size[1] - 1), (size[0] - 1, size[1] -
                                                      1), (size[0] - 1, 0)]
            ])
            if params.fit_target == "corners":
                moved_sites.extend(corners)
            elif params.fit_target == "centers":
                moved_sites.append(corners.mean())

        # Re-compute RMSD after moving detector components
        rmsd = 1000 * math.sqrt(
            (reference_sites - moved_sites).sum_sq() / len(reference_sites))
        print("RMSD of fit after movement: %.1f microns" % rmsd)
        if params.fit_target == "corners":
            rmsd = rmsd_from_centers(reference_sites, moved_sites)
            print("RMSD of fit of centers after movement: %.1f microns" % rmsd)

        if params.panel_list is not None:
            reference_sites = flex.vec3_double()
            moved_sites = flex.vec3_double()
            for panel_id in xrange(len(reference)):
                for detector, sites in zip([reference, moving],
                                           [reference_sites, moved_sites]):
                    panel = detector[panel_id]
                    size = panel.get_image_size()
                    corners = flex.vec3_double([
                        panel.get_pixel_lab_coord(point)
                        for point in [(0, 0), (0, size[1] -
                                               1), (size[0] - 1, size[1] -
                                                    1), (size[0] - 1, 0)]
                    ])
                    if params.fit_target == "corners":
                        sites.extend(corners)
                    elif params.fit_target == "centers":
                        sites.append(corners.mean())
            # Re-compute RMSD for full detector after moving detector components
            rmsd = 1000 * math.sqrt((reference_sites - moved_sites).sum_sq() /
                                    len(reference_sites))
            print("RMSD of whole detector fit after movement: %.1f microns" %
                  rmsd)
            if params.fit_target == "corners":
                rmsd = rmsd_from_centers(reference_sites, moved_sites)
                print(
                    "RMSD of whole detector fit of centers after movement: %.1f microns"
                    % rmsd)
Example #43
0
 def add_to(self, reparametrisation):
   if not self.fix_u and not self.fix_xyz:
     return
   scatterers = reparametrisation.structure.scatterers()
   ref_sites = []
   ref_u_isos = []
   ref_u_stars = []
   ref_adps = []
   src_crds = []
   inv_src_crds = []
   uc = reparametrisation.structure.unit_cell()
   for i in self.groups[0]:
     src_crds.append(uc.orthogonalize(scatterers[i].site))
     if self.fix_xyz:
       ref_sites.append(reparametrisation.add_new_site_parameter(i))
     if self.fix_u:
       if scatterers[i].flags.use_u_iso():
         ref_u_isos.append(
           reparametrisation.add_new_thermal_displacement_parameter(i))
       else:
         ref_u_stars.append(
           reparametrisation.add_new_thermal_displacement_parameter(i))
   for g in self.groups[1:]:
     if len(g) != len(self.groups[0]):
       raise InvalidConstraint("Group size mismatch")
     g_scatterers = []
     g_u_iso_scatterers  =[]
     g_u_star_scatterers = []
     crds = []
     for idx, i in enumerate(g):
       if scatterers[i].flags.use_u_iso() !=\
          scatterers[self.groups[0][idx]].flags.use_u_iso():
         raise InvalidConstraint("Mixing isotropic and anisotropic parameters")
       g_scatterers.append(scatterers[i])
       crds.append(uc.orthogonalize(scatterers[i].site))
       if scatterers[i].flags.use_u_iso():
         g_u_iso_scatterers.append(scatterers[i])
       else:
         g_u_star_scatterers.append(scatterers[i])
     #need to map reference to target
     lsf = superpose.least_squares_fit(
       flex.vec3_double(crds), flex.vec3_double(src_crds))
     #create a list of inverted coordinates if needed
     if len(inv_src_crds) == 0:
       for i in xrange(0, len(g)):
         inv_src_crds.append(
           2*matrix.col(lsf.other_shift)-matrix.col(src_crds[i]))
     rm = lsf.r
     t = matrix.col(lsf.reference_shift)-matrix.col(lsf.other_shift)
     new_crd = lsf.other_sites_best_fit()
     d = 0
     for i, c in enumerate(new_crd):
       d += matrix.col(matrix.col(c)-matrix.col(crds[i])).length_sq()
     lsf = superpose.least_squares_fit(
       flex.vec3_double(crds), flex.vec3_double(inv_src_crds))
     new_crd = lsf.other_sites_best_fit()
     d_inv = 0
     for i, c in enumerate(new_crd):
       d_inv += matrix.col(matrix.col(c)-matrix.col(crds[i])).length_sq()
     if d_inv < d:
       rm = -lsf.r
     if self.fix_xyz:
       shifts_and_angles =\
         reparametrisation.add(_.independent_small_6_vector_parameter,
                               value=(t[0],t[1],t[2],0,0,0), variable=True)
       if len(ref_u_stars) > 0:
         u_star_param = reparametrisation.add(
           _.same_group_u_star,
           scatterers=g_u_star_scatterers,
           u_stars=ref_u_stars,
           alignment_matrix=rm,
           shifts_and_angles=shifts_and_angles
         )
     elif len(ref_u_stars) > 0:
       angles =\
         reparametrisation.add(_.independent_small_3_vector_parameter,
                               value=self.angles, variable=True)
       u_star_param = reparametrisation.add(
         _.same_group_u_star,
         scatterers=g_u_star_scatterers,
         u_stars=ref_u_stars,
         alignment_matrix=rm,
         angles=angles
       )
     if self.fix_xyz:
       site_param = reparametrisation.add(
         _.same_group_xyz,
         scatterers=g_scatterers,
         sites=ref_sites,
         alignment_matrix=rm,
         shifts_and_angles=shifts_and_angles
       )
     if len(ref_u_isos) > 0:
       u_iso_param = reparametrisation.add(
         _.same_group_u_iso,
         scatterers=g_u_iso_scatterers,
         u_isos=ref_u_isos
       )
     site_proxy_index = 0
     u_star_proxy_index = 0
     u_iso_proxy_index = 0
     for i in g:
       if self.fix_xyz:
         reparametrisation.asu_scatterer_parameters[i].site = site_param
         reparametrisation.add_new_same_group_site_proxy_parameter(
           site_param, site_proxy_index, i)
         site_proxy_index += 1
       if self.fix_u:
         if scatterers[i].flags.use_u_iso():
           reparametrisation.asu_scatterer_parameters[i].u = u_iso_param
           reparametrisation.shared_Us[i] = reparametrisation.add(
             _.same_group_u_iso_proxy,
             parent=u_iso_param,
             index=u_iso_proxy_index
             )
           u_iso_proxy_index += 1
         else:
           reparametrisation.asu_scatterer_parameters[i].u = u_star_param
           reparametrisation.shared_Us[i] = reparametrisation.add(
             _.same_group_u_star_proxy,
             parent=u_star_param,
             index=u_star_proxy_index
             )
           u_star_proxy_index += 1
Example #44
0
def run(args, command_name="mmtbx.super"):
  if (len(args) == 0):
    print "usage: %s fixed.pdb moving.pdb [parameter=value ...]" % command_name
    return

  print "#"
  print "#                       ", command_name
  print "#"
  print "# A lightweight sequence-based structure superposition tool."
  print "#"
  print "#"

  phil_objects = []
  argument_interpreter = master_params.command_line_argument_interpreter(
    home_scope="super")
  fixed_pdb_file_name = None
  moving_pdb_file_name = None
  for arg in args:
    if (os.path.isfile(arg)):
      if (fixed_pdb_file_name is None): fixed_pdb_file_name = arg
      elif (moving_pdb_file_name is None): moving_pdb_file_name = arg
      else: raise Sorry("Too many file names.")
    else:
      try: command_line_params = argument_interpreter.process(arg=arg)
      except KeyboardInterrupt: raise
      except Exception: raise Sorry("Unknown file or keyword: %s" % arg)
      else: phil_objects.append(command_line_params)

  working_params = master_params.fetch(sources=phil_objects)
  params = working_params.extract()

  def raise_missing(what):
      raise Sorry("""\
Missing file name for %(what)s structure:
  Please add
    %(what)s=file_name
  to the command line to specify the %(what)s structure.""" % vars())

  if (fixed_pdb_file_name is None):
    if (params.super.fixed is None): raise_missing("fixed")
  else:
    params.super.fixed = fixed_pdb_file_name
  if (moving_pdb_file_name is None):
    if (params.super.moving is None): raise_missing("moving")
  else:
    params.super.moving = moving_pdb_file_name

  print "#Parameters used:"
  print "#phil __ON__"
  print
  working_params = master_params.format(python_object=params)
  working_params.show()
  print
  print "#phil __OFF__"
  print

  print "Reading fixed structure:", params.super.fixed
  fixed_pdb = iotbx.pdb.input(file_name=params.super.fixed)
  print
  print "Reading moving structure:", params.super.moving
  moving_pdb = iotbx.pdb.input(file_name=params.super.moving)
  print

  fixed_seq, fixed_sites, fixed_site_flags = extract_sequence_and_sites(
    pdb_input=fixed_pdb)
  moving_seq, moving_sites, moving_site_flags = extract_sequence_and_sites(
    pdb_input=moving_pdb)

  print "Computing sequence alignment..."
  align_obj = mmtbx.alignment.align(
    seq_a=fixed_seq,
    seq_b=moving_seq,
    gap_opening_penalty=params.super.gap_opening_penalty,
    gap_extension_penalty=params.super.gap_extension_penalty,
    similarity_function=params.super.similarity_matrix,
    style=params.super.alignment_style)
  print "done."
  print

  alignment = align_obj.extract_alignment()
  matches = alignment.matches()
  equal = matches.count("|")
  similar = matches.count("*")
  total = len(alignment.a) - alignment.a.count("-")
  alignment.pretty_print(
    matches=matches,
    block_size=50,
    n_block=1,
    top_name="fixed",
    bottom_name="moving",
    comment="""\
The alignment used in the superposition is shown below.

The sequence identity (fraction of | symbols) is %4.1f%%
of the aligned length of the fixed molecule sequence.

The sequence similarity (fraction of | and * symbols) is %4.1f%%
of the aligned length of the fixed molecule sequence.
""" % (100.*equal/max(1,total), 100.*(equal+similar)/max(1,total)))

  fixed_sites_sel = flex.vec3_double()
  moving_sites_sel = flex.vec3_double()
  for ia,ib,m in zip(alignment.i_seqs_a, alignment.i_seqs_b, matches):
    if (m not in ["|", "*"]): continue
    if (fixed_site_flags[ia] and moving_site_flags[ib]):
      fixed_sites_sel.append(fixed_sites[ia])
      moving_sites_sel.append(moving_sites[ib])

  print "Performing least-squares superposition of C-alpha atom pairs:"
  print "  Number of C-alpha atoms pairs in matching residues"
  print "  indicated by | or * above:", fixed_sites_sel.size()
  if (fixed_sites_sel.size() == 0):
    raise Sorry("No matching C-alpha atoms.")
  lsq_fit = superpose.least_squares_fit(
    reference_sites=fixed_sites_sel,
    other_sites=moving_sites_sel)
  rmsd = fixed_sites_sel.rms_difference(lsq_fit.other_sites_best_fit())
  print "  RMSD between the aligned C-alpha atoms: %.3f" % rmsd
  print

  print "Writing moved pdb to file: %s" % params.super.moved
  pdb_hierarchy = moving_pdb.construct_hierarchy()
  for atom in pdb_hierarchy.atoms():
    atom.xyz = lsq_fit.r * matrix.col(atom.xyz) + lsq_fit.t
  pdb_hierarchy.write_pdb_file(file_name=params.super.moved, append_end=True)
  print
def clean_chain_matching(chain_match_list,ph,
                         chain_max_rmsd=10.0,
                         residue_match_radius=4.0):
  """
  Remove all bad matches from chain_match_list

  Args:
    ph (object): hierarchy
    chain_match_list (list): list of
      [chain_ID_1, chain_ID_2, sel_1, sel_2,res_m/res_c similarity]
      chain_ID (str), sel_1/2 (list of lists)
      res_m/res_c (lists): indices of the aligned components
      similarity (float): similarity between chains
    chain_max_rmsd (float): limit of rms difference chains
    residue_match_radius (float): max allow distance difference between pairs of matching
      atoms of two residues
    chain_similarity_threshold (float): min similarity between matching chains

  Returns:
    match_dict(dict): key:(chains_id_a,chains_id_b)
                      val:[selection_a,selection_b,
                           res_list_a,res_list_b,rot,trans,rmsd]
  """
  # remove all non-matching pairs, where similarity == 0
  match_list = [x for x in chain_match_list if x[4] > 0]
  match_dict = {}
  # print "match_list", match_list
  for match in match_list:
    [ch_a_id,ch_b_id,list_a,list_b,res_list_a,res_list_b,similarity] = match
    t0 = time()
    sel_a = make_selection_from_lists(list_a)
    sel_b = make_selection_from_lists(list_b)

    other_h = ph.select(sel_a)
    other_atoms = other_h.atoms()
    ref_h = ph.select(sel_b)
    ref_atoms = ref_h.atoms()
    #
    # Here we want to flip atom names, even before chain alignment, so
    # we will get correct chain RMSD

    # flipped_other_selection = make_flips_if_necessary(ref_h.deep_copy(), other_h.deep_copy())
    flipped_other_selection = make_flips_if_necessary_torsion(
        ref_h.deep_copy(), other_h.deep_copy())
    # if flipped_other_selection is not None:
    other_sites = other_atoms.select(flipped_other_selection).extract_xyz()
    # else:
    #   other_sites = other_atoms.extract_xyz()
    ref_sites = ref_atoms.extract_xyz()
    lsq_fit_obj = superpose.least_squares_fit(
      reference_sites = ref_sites,
      other_sites     = other_sites)
    r = lsq_fit_obj.r
    t = lsq_fit_obj.t
    # todo: find r_2*A = r*A + t (where the translation is zero)
    # use B = r*A + t, r_2*A = B , r_2 = B*A.inverse()
    other_sites_best = lsq_fit_obj.other_sites_best_fit()
    rmsd = round(ref_sites.rms_difference(other_sites_best),4)
    # print "chain rmsd after flip:", rmsd
    if rmsd <= chain_max_rmsd:
      # get the chains atoms and convert selection to flex bool
      sel_aa,sel_bb,res_list_a,res_list_b,ref_sites,other_sites_best = \
        remove_far_atoms(
          list_a, list_b,
          res_list_a,res_list_b,
          ref_sites,lsq_fit_obj.other_sites_best_fit(),
          residue_match_radius=residue_match_radius)
      if sel_a.size() > 0:
        match_dict[ch_a_id,ch_b_id]=[sel_aa,sel_bb,res_list_a,res_list_b,r,t,rmsd]
  return match_dict
Example #46
0
 def __init__(self,
              pdb_hierarchy,
              crystal_symmetry,
              angular_difference_threshold_deg=5.,
              sequence_identity_threshold=90.,
              quiet=False):
     h = pdb_hierarchy
     superposition_threshold = 2 * sequence_identity_threshold - 100.
     n_atoms_all = h.atoms_size()
     s_str = "altloc ' ' and (protein or nucleotide)"
     h = h.select(h.atom_selection_cache().selection(s_str))
     h1 = iotbx.pdb.hierarchy.root()
     h1.append_model(h.models()[0].detached_copy())
     unit_cell = crystal_symmetry.unit_cell()
     result = {}
     if not quiet:
         print("Find groups of chains related by translational NCS")
     # double loop over chains to find matching pairs related by pure translation
     for c1 in h1.chains():
         c1.parent().remove_chain(c1)
         nchains = len(h1.models()[0].chains())
         if ([c1.is_protein(), c1.is_na()].count(True) == 0): continue
         r1 = list(c1.residues())
         c1_seq = "".join(c1.as_sequence())
         sc_1_tmp = c1.atoms().extract_xyz()
         h1_p1 = h1.expand_to_p1(crystal_symmetry=crystal_symmetry)
         for (ii, c2) in enumerate(h1_p1.chains()):
             orig_c2 = h1.models()[0].chains()[ii % nchains]
             r2 = list(c2.residues())
             c2_seq = "".join(c2.as_sequence())
             sites_cart_1, sites_cart_2 = None, None
             sc_2_tmp = c2.atoms().extract_xyz()
             # chains are identical
             if (c1_seq == c2_seq and sc_1_tmp.size() == sc_2_tmp.size()):
                 sites_cart_1 = sc_1_tmp
                 sites_cart_2 = sc_2_tmp
                 p_identity = 100.
             # chains are not identical, do alignment
             else:
                 align_obj = mmtbx.alignment.align(seq_a=c1_seq,
                                                   seq_b=c2_seq)
                 alignment = align_obj.extract_alignment()
                 matches = alignment.matches()
                 equal = matches.count("|")
                 total = len(alignment.a) - alignment.a.count("-")
                 p_identity = 100. * equal / max(1, total)
                 if (p_identity > superposition_threshold):
                     sites_cart_1 = flex.vec3_double()
                     sites_cart_2 = flex.vec3_double()
                     for i1, i2, match in zip(alignment.i_seqs_a,
                                              alignment.i_seqs_b, matches):
                         if (i1 is not None and i2 is not None
                                 and match == "|"):
                             r1i, r2i = r1[i1], r2[i2]
                             assert r1i.resname == r2i.resname, [
                                 r1i.resname, r2i.resname, i1, i2
                             ]
                             for a1 in r1i.atoms():
                                 for a2 in r2i.atoms():
                                     if (a1.name == a2.name):
                                         sites_cart_1.append(a1.xyz)
                                         sites_cart_2.append(a2.xyz)
                                         break
             # superpose two sequence-aligned chains
             if ([sites_cart_1, sites_cart_2].count(None) == 0):
                 lsq_fit_obj = superpose.least_squares_fit(
                     reference_sites=sites_cart_1, other_sites=sites_cart_2)
                 angle = lsq_fit_obj.r.rotation_angle()
                 t_frac = unit_cell.fractionalize(
                     (sites_cart_1 - sites_cart_2).mean())
                 t_frac = [math.modf(t)[0]
                           for t in t_frac]  # put into [-1,1]
                 radius = flex.sum(
                     flex.sqrt((sites_cart_1 - sites_cart_1.mean()
                                ).dot())) / sites_cart_1.size() * 4. / 3.
                 fracscat = min(c1.atoms_size(),
                                c2.atoms_size()) / n_atoms_all
                 result.setdefault(frozenset([c1, orig_c2]), []).append([
                     p_identity,
                     [lsq_fit_obj.r, t_frac, angle, radius, fracscat]
                 ])
             else:
                 result.setdefault(frozenset([c1, orig_c2]),
                                   []).append([p_identity, None])
     # Build graph
     g = graph.adjacency_list()
     vertex_handle = {}
     for key in result:
         seqid = result[key][0][0]
         sup = min(result[key],
                   key=lambda s: 0 if s[1] is None else s[1][2])[1]
         result[key] = [seqid, sup]
         if ((seqid > sequence_identity_threshold)
                 and (sup[2] < angular_difference_threshold_deg)):
             (c1, c2) = key
             if (c1 not in vertex_handle):
                 vertex_handle[c1] = g.add_vertex(label=c1)
             if (c2 not in vertex_handle):
                 vertex_handle[c2] = g.add_vertex(label=c2)
             g.add_edge(vertex1=vertex_handle[c1],
                        vertex2=vertex_handle[c2])
     # Do connected component analysis and compose final tNCS pairs object
     components = connected_component_algorithm.connected_components(g)
     import itertools
     self.ncs_pairs = []
     self.tncsresults = [0, "", [], 0.0]
     for (i, group) in enumerate(components):
         chains = [g.vertex_label(vertex=v) for v in group]
         fracscats = []
         radii = []
         for pair in itertools.combinations(chains, 2):
             sup = result[frozenset(pair)][1]
             fracscats.append(sup[-1])
             radii.append(sup[-2])
         fs = sum(fracscats) / len(fracscats)
         self.tncsresults[3] = fs  # store fracscat in array
         rad = sum(radii) / len(radii)
         #import code, traceback; code.interact(local=locals(), banner="".join( traceback.format_stack(limit=10) ) )
         maxorder = 1
         vectors = []
         previous_id = next(itertools.combinations(chains, 2))[0].id
         for pair in itertools.combinations(chains, 2):
             sup = result[frozenset(pair)][1]
             ncs_pair = ext.pair(
                 r=sup[0],
                 t=sup[1],
                 radius=rad,
                 radius_estimate=rad,
                 fracscat=fs,
                 rho_mn=flex.double(
                 ),  # rho_mn undefined, needs to be set later
                 id=i)
             self.ncs_pairs.append(ncs_pair)
             # show tNCS pairs in group
             fmt = "group %d chains %s <> %s angle: %4.2f trans.vect.: (%s) fracscat: %5.3f"
             t = ",".join([("%6.3f" % t_).strip() for t_ in sup[1]]).strip()
             if not quiet:
                 print(fmt % (i, pair[0].id, pair[1].id, sup[2], t, fs))
             if pair[0].id == previous_id:
                 maxorder += 1
                 orthoxyz = unit_cell.orthogonalize(sup[1])
                 vectors.append((sup[1], orthoxyz, sup[2]))
             else:
                 previous_id = pair[0].id
                 maxorder = 1
                 vectors = []
             if maxorder > self.tncsresults[0]:
                 self.tncsresults[0] = maxorder
                 self.tncsresults[1] = previous_id
                 self.tncsresults[2] = vectors
     if not quiet:
         print("Largest TNCS order, peptide chain, fracvector, orthvector, angle, fracscat = ", \
          str(self.tncsresults))
Example #47
0
def run(args, command_name="mmtbx.super"):
    if len(args) == 0:
        print "usage: %s fixed.pdb moving.pdb [parameter=value ...]" % command_name
        return

    print "#"
    print "#                       ", command_name
    print "#"
    print "# A lightweight sequence-based structure superposition tool."
    print "#"
    print "#"

    phil_objects = []
    argument_interpreter = master_params.command_line_argument_interpreter(home_scope="super")
    fixed_pdb_file_name = None
    moving_pdb_file_name = None
    for arg in args:
        if os.path.isfile(arg):
            if fixed_pdb_file_name is None:
                fixed_pdb_file_name = arg
            elif moving_pdb_file_name is None:
                moving_pdb_file_name = arg
            else:
                raise Sorry("Too many file names.")
        else:
            try:
                command_line_params = argument_interpreter.process(arg=arg)
            except KeyboardInterrupt:
                raise
            except Exception:
                raise Sorry("Unknown file or keyword: %s" % arg)
            else:
                phil_objects.append(command_line_params)

    working_params = master_params.fetch(sources=phil_objects)
    params = working_params.extract()

    def raise_missing(what):
        raise Sorry(
            """\
Missing file name for %(what)s structure:
  Please add
    %(what)s=file_name
  to the command line to specify the %(what)s structure."""
            % vars()
        )

    if fixed_pdb_file_name is None:
        if params.super.fixed is None:
            raise_missing("fixed")
    else:
        params.super.fixed = fixed_pdb_file_name
    if moving_pdb_file_name is None:
        if params.super.moving is None:
            raise_missing("moving")
    else:
        params.super.moving = moving_pdb_file_name

    print "#Parameters used:"
    print "#phil __ON__"
    print
    working_params = master_params.format(python_object=params)
    working_params.show()
    print
    print "#phil __OFF__"
    print

    print "Reading fixed structure:", params.super.fixed
    fixed_pdb = iotbx.pdb.input(file_name=params.super.fixed)
    print
    print "Reading moving structure:", params.super.moving
    moving_pdb = iotbx.pdb.input(file_name=params.super.moving)
    print

    fixed_seq, fixed_sites, fixed_site_flags = extract_sequence_and_sites(pdb_input=fixed_pdb)
    moving_seq, moving_sites, moving_site_flags = extract_sequence_and_sites(pdb_input=moving_pdb)

    print "Computing sequence alignment..."
    align_obj = mmtbx.alignment.align(
        seq_a=fixed_seq,
        seq_b=moving_seq,
        gap_opening_penalty=params.super.gap_opening_penalty,
        gap_extension_penalty=params.super.gap_extension_penalty,
        similarity_function=params.super.similarity_matrix,
        style=params.super.alignment_style,
    )
    print "done."
    print

    alignment = align_obj.extract_alignment()
    matches = alignment.matches()
    equal = matches.count("|")
    similar = matches.count("*")
    total = len(alignment.a) - alignment.a.count("-")
    alignment.pretty_print(
        matches=matches,
        block_size=50,
        n_block=1,
        top_name="fixed",
        bottom_name="moving",
        comment="""\
The alignment used in the superposition is shown below.

The sequence identity (fraction of | symbols) is %4.1f%%
of the aligned length of the fixed molecule sequence.

The sequence similarity (fraction of | and * symbols) is %4.1f%%
of the aligned length of the fixed molecule sequence.
"""
        % (100.0 * equal / max(1, total), 100.0 * (equal + similar) / max(1, total)),
    )

    fixed_sites_sel = flex.vec3_double()
    moving_sites_sel = flex.vec3_double()
    for ia, ib, m in zip(alignment.i_seqs_a, alignment.i_seqs_b, matches):
        if m not in ["|", "*"]:
            continue
        if fixed_site_flags[ia] and moving_site_flags[ib]:
            fixed_sites_sel.append(fixed_sites[ia])
            moving_sites_sel.append(moving_sites[ib])

    print "Performing least-squares superposition of C-alpha atom pairs:"
    print "  Number of C-alpha atoms pairs in matching residues"
    print "  indicated by | or * above:", fixed_sites_sel.size()
    if fixed_sites_sel.size() == 0:
        raise Sorry("No matching C-alpha atoms.")
    lsq_fit = superpose.least_squares_fit(reference_sites=fixed_sites_sel, other_sites=moving_sites_sel)
    rmsd = fixed_sites_sel.rms_difference(lsq_fit.other_sites_best_fit())
    print "  RMSD between the aligned C-alpha atoms: %.3f" % rmsd
    print

    print "Writing moved pdb to file: %s" % params.super.moved
    pdb_hierarchy = moving_pdb.construct_hierarchy()
    for atom in pdb_hierarchy.atoms():
        atom.xyz = lsq_fit.r * matrix.col(atom.xyz) + lsq_fit.t
    pdb_hierarchy.write_pdb_file(file_name=params.super.moved, append_end=True)
    print
Example #48
0
def secondary_structure_from_sequence(pdb_str,
      sequence=None,
      pdb_hierarchy_template=None,
      rotamer_manager=None):
  """ Return pdb.hierarchy with secondary structure according to sequence or
  reference hierarcy. If reference hierarchy provided, the resulting hierarchy
  will be rigid body aligned to it. Residue numbers will start from 1.

  pdb_str - "ideal" structure at least 2 residues long.
  sequence - string with sequence (one-letter codes)
  pdb_hierarchy_template - reference hierarchy.
  """
  if rotamer_manager is None:
    rotamer_manager = RotamerEval()
  pht = pdb_hierarchy_template
  assert [sequence, pht].count(None) == 1
  if pht is not None:
    lk = len(pht.altloc_indices().keys())
    if lk ==0:
      raise Sorry(
          "Hierarchy template in secondary_structure_from_sequence is empty")
    else:
      assert len(pht.altloc_indices().keys()) == 1, \
          "Alternative conformations are not supported"
  number_of_residues = len(sequence) if sequence!=None else \
    len(pht.models()[0].chains()[0].conformers()[0].residues())
  if number_of_residues<1:
    raise Sorry('sequence should contain at least one residue.')
  ideal_res_dict = idealized_aa.residue_dict()
  real_res_list = None
  if pht:
    real_res_list = pht.models()[0].chains()[0].residue_groups()
  pdb_hierarchy = iotbx.pdb.input(source_info=None, lines=pdb_str).\
      construct_hierarchy()
  truncate_to_poly_gly(pdb_hierarchy)
  chain = pdb_hierarchy.models()[0].chains()[0]
  current_gly_ag = chain.residue_groups()[0].atom_groups()[0]
  new_chain = iotbx.pdb.hierarchy.chain(id="A")
  new_chain.pre_allocate_residue_groups(number_of_additional_residue_groups=\
                                                            number_of_residues)
  r, t = get_r_t_matrices_from_structure(pdb_str)
  for j in range(number_of_residues):
    # put ALA
    rg = iotbx.pdb.hierarchy.residue_group(icode="")
    rg.resseq = j+1
    new_chain.append_residue_group(residue_group=rg)
    ag_to_place = current_gly_ag.detached_copy()
    rg.append_atom_group(atom_group=ag_to_place)
    current_gly_ag.atoms().set_xyz(
                          r.elems*current_gly_ag.atoms().extract_xyz()+t.elems)
    current_reference_ag = real_res_list[j].atom_groups()[0] if pht else \
        ideal_res_dict[three_one[sequence[j]].lower()].models()[0].chains()[0].\
        residue_groups()[0].atom_groups()[0]
    side_chain_placement(ag_to_place, current_reference_ag, rotamer_manager)
  new_pdb_h = iotbx.pdb.hierarchy.new_hierarchy_from_chain(new_chain)
  # align to real
  if pht != None:
    fixed_sites, moving_sites = get_matching_sites_cart_in_both_h(pht, new_pdb_h)
    assert len(fixed_sites) == len(moving_sites)
    lsq_fit_obj = superpose.least_squares_fit(reference_sites = fixed_sites,
                                              other_sites = moving_sites)
    new_pdb_h.atoms().set_xyz(
        lsq_fit_obj.r.elems*new_pdb_h.atoms().extract_xyz()+lsq_fit_obj.t.elems)
  return new_pdb_h
Example #49
0
def shortcut_1(hierarchy, chains_info, chain_similarity_threshold,
               chain_max_rmsd, log, residue_match_radius):
    """
  Checking the case when whole hierarchy was produced by multiplication of
  molecule with BIOMT or MTRIX matrices (or both). In this case we are expecting
  to find identical chains with 0 rmsd between them.
  """
    def flatten_list_of_list(lofl):
        return [x for y in lofl for x in y]

    assert chains_info is not None
    assert len(chains_info) > 1
    empty_result = class_ncs_restraints_group_list()

    # new convenience structure: {<n_atoms>:[ch_id, ch_id, ch_id]}
    n_atom_chain_id_dict = {}
    for k, v in six.iteritems(chains_info):
        if v.chains_atom_number not in n_atom_chain_id_dict:
            n_atom_chain_id_dict[v.chains_atom_number] = [k]
        else:
            n_atom_chain_id_dict[v.chains_atom_number].append(k)
    print("n_atom_chain_id_dict", n_atom_chain_id_dict, file=log)
    for k, v in six.iteritems(n_atom_chain_id_dict):
        if len(v) == 1:
            print("No shortcut, there is a chain with unique number of atoms:",
                  v,
                  file=log)
            return empty_result
    # now we starting to check atom names, align chains, check rmsd and
    # populate result. If at some point we are not satisfied with any measure,
    # we will return empty result.
    result = class_ncs_restraints_group_list()
    for n_atoms, chains_list in six.iteritems(n_atom_chain_id_dict):
        # this should make one ncs group
        master_chain_id = chains_list[0]
        master_iselection = flatten_list_of_list(
            chains_info[master_chain_id].atom_selection)
        ncs_gr = NCS_restraint_group(
            master_iselection=flex.size_t(master_iselection),
            str_selection="chain '%s'" % master_chain_id)
        master_xyz = get_chain_xyz(hierarchy, master_chain_id)
        for copy_chain_id in chains_list[1:]:
            # these are copies
            if chains_info[master_chain_id].atom_names != chains_info[
                    copy_chain_id].atom_names:
                print("No shortcut, atom names are not identical", file=log)
                return empty_result
            copy_iselection = flatten_list_of_list(
                chains_info[copy_chain_id].atom_selection)
            copy_xyz = get_chain_xyz(hierarchy, copy_chain_id)
            lsq_fit_obj = superpose.least_squares_fit(reference_sites=copy_xyz,
                                                      other_sites=master_xyz)
            r = lsq_fit_obj.r
            t = lsq_fit_obj.t
            rmsd = copy_xyz.rms_difference(lsq_fit_obj.other_sites_best_fit())
            print("rmsd", master_chain_id, copy_chain_id, rmsd, file=log)
            #
            # XXX should we compare rmsd to chain_max_rmsd to be more relaxed and
            #     process more structures quickly?
            #
            if rmsd is None or rmsd > 0.2:
                print("No shortcut, low rmsd:",
                      rmsd,
                      "for chains",
                      master_chain_id,
                      copy_chain_id,
                      file=log)
                return empty_result
            # seems like a good enough copy
            c = NCS_copy(copy_iselection=flex.size_t(copy_iselection),
                         rot=r,
                         tran=t,
                         str_selection="chain '%s'" % copy_chain_id,
                         rmsd=rmsd)
            ncs_gr.append_copy(c)
        result.append(ncs_gr)
    print("Shortcut complete.", file=log)
    return result
Example #50
0
def secondary_structure_from_sequence(pdb_str,
      sequence=None,
      pdb_hierarchy_template=None,
      rotamer_manager=None):
  """ Return pdb.hierarchy with secondary structure according to sequence or
  reference hierarcy. If reference hierarchy provided, the resulting hierarchy
  will be rigid body aligned to it. Residue numbers will start from 1.

  pdb_str - "ideal" structure at least 2 residues long.
  sequence - string with sequence (one-letter codes)
  pdb_hierarchy_template - reference hierarchy.
  """
  if rotamer_manager is None:
    rotamer_manager = RotamerEval()
  pht = pdb_hierarchy_template
  assert [sequence, pht].count(None) == 1
  if pht is not None:
    lk = len(pht.altloc_indices().keys())
    if lk ==0:
      raise Sorry(
          "Hierarchy template in secondary_structure_from_sequence is empty")
    else:
      assert len(pht.altloc_indices().keys()) == 1, \
          "Alternative conformations are not supported"
  number_of_residues = len(sequence) if sequence!=None else \
    len(pht.models()[0].chains()[0].conformers()[0].residues())
  if number_of_residues<1:
    raise Sorry('sequence should contain at least one residue.')
  ideal_res_dict = idealized_aa.residue_dict()
  real_res_list = None
  if pht:
    real_res_list = pht.models()[0].chains()[0].residue_groups()
  pdb_hierarchy = iotbx.pdb.input(source_info=None, lines=pdb_str).\
      construct_hierarchy()
  truncate_to_poly_gly(pdb_hierarchy)
  chain = pdb_hierarchy.models()[0].chains()[0]
  current_gly_ag = chain.residue_groups()[0].atom_groups()[0]
  new_chain = iotbx.pdb.hierarchy.chain(id="A")
  new_chain.pre_allocate_residue_groups(number_of_additional_residue_groups=\
                                                            number_of_residues)
  r, t = get_r_t_matrices_from_structure(pdb_str)
  for j in range(number_of_residues):
    # put ALA
    rg = iotbx.pdb.hierarchy.residue_group(icode="")
    rg.resseq = j+1
    new_chain.append_residue_group(residue_group=rg)
    ag_to_place = current_gly_ag.detached_copy()
    rg.append_atom_group(atom_group=ag_to_place)
    current_gly_ag.atoms().set_xyz(
                          r.elems*current_gly_ag.atoms().extract_xyz()+t.elems)
    current_reference_ag = real_res_list[j].atom_groups()[0] if pht else \
        ideal_res_dict[three_one[sequence[j]].lower()].models()[0].chains()[0].\
        residue_groups()[0].atom_groups()[0]
    side_chain_placement(ag_to_place, current_reference_ag, rotamer_manager)
  new_pdb_h = iotbx.pdb.hierarchy.new_hierarchy_from_chain(new_chain)
  # align to real
  if pht != None:
    fixed_sites, moving_sites = get_matching_sites_cart_in_both_h(pht, new_pdb_h)
    assert len(fixed_sites) == len(moving_sites)
    lsq_fit_obj = superpose.least_squares_fit(reference_sites = fixed_sites,
                                              other_sites = moving_sites)
    new_pdb_h.atoms().set_xyz(
        lsq_fit_obj.r.elems*new_pdb_h.atoms().extract_xyz()+lsq_fit_obj.t.elems)
  return new_pdb_h
Example #51
0
 def __init__(self,
              pdb_hierarchy,
              crystal_symmetry,
              angular_difference_threshold_deg=10.,
              sequence_identity_threshold=90.):
   h = pdb_hierarchy
   n_atoms_all = h.atoms_size()
   s_str = "altloc ' ' and (protein or nucleotide)"
   h = h.select(h.atom_selection_cache().selection(s_str))
   h1 = h.deep_copy()
   unit_cell = crystal_symmetry.unit_cell()
   result = []
   # double loop over chains to find matching pairs related by pure translation
   for c1 in h1.chains():
     c1.parent().remove_chain(c1)
     if([c1.is_protein(), c1.is_na()].count(True)==0): continue
     r1 = list(c1.residues())
     c1_seq = "".join(c1.as_sequence())
     sc_1_tmp = c1.atoms().extract_xyz()
     h1_p1 = h1.expand_to_p1(crystal_symmetry=crystal_symmetry)
     for c2 in h1_p1.chains():
       r2 = list(c2.residues())
       c2_seq = "".join(c2.as_sequence())
       sites_cart_1, sites_cart_2 = None,None
       sc_2_tmp = c2.atoms().extract_xyz()
       # chains are identical
       if(c1_seq==c2_seq and sc_1_tmp.size()==sc_2_tmp.size()):
         sites_cart_1 = sc_1_tmp
         sites_cart_2 = sc_2_tmp
       # chains are not identical, do alignment
       else:
         align_obj = mmtbx.alignment.align(seq_a = c1_seq, seq_b = c2_seq)
         alignment = align_obj.extract_alignment()
         matches = alignment.matches()
         equal = matches.count("|")
         total = len(alignment.a) - alignment.a.count("-")
         p_identity = 100.*equal/max(1,total)
         if(p_identity>sequence_identity_threshold):
           sites_cart_1 = flex.vec3_double()
           sites_cart_2 = flex.vec3_double()
           for i1, i2, match in zip(alignment.i_seqs_a, alignment.i_seqs_b,
                                    matches):
             if(i1 is not None and i2 is not None and match=="|"):
               r1i, r2i = r1[i1], r2[i2]
               assert r1i.resname==r2i.resname, [r1i.resname,r2i.resname,i1,i2]
               for a1 in r1i.atoms():
                 for a2 in r2i.atoms():
                   if(a1.name == a2.name):
                     sites_cart_1.append(a1.xyz)
                     sites_cart_2.append(a2.xyz)
                     break
       # superpose two sequence-aligned chains
       if([sites_cart_1,sites_cart_2].count(None)==0):
         lsq_fit_obj = superpose.least_squares_fit(
           reference_sites = sites_cart_1,
           other_sites     = sites_cart_2)
         angle = lsq_fit_obj.r.rotation_angle()
         if(angle < angular_difference_threshold_deg):
           t_frac = unit_cell.fractionalize((sites_cart_1-sites_cart_2).mean())
           t_frac = [math.modf(t)[0] for t in t_frac] # put into [-1,1]
           radius = flex.sum(flex.sqrt((sites_cart_1-
             sites_cart_1.mean()).dot()))/sites_cart_1.size()*4./3.
           fracscat = c1.atoms_size()/n_atoms_all
           result.append([lsq_fit_obj.r, t_frac, angle, radius, fracscat])
           # show tNCS group
           fmt="chains %s <> %s angle: %4.2f trans.vect.: (%s) fracscat: %5.3f"
           t = ",".join([("%6.3f"%t_).strip() for t_ in t_frac]).strip()
           print fmt%(c1.id, c2.id, angle, t, fracscat)
   # compose final tNCS pairs object
   self.ncs_pairs = []
   for _ in result:
     r, t, angle, rad, fs = _
     ncs_pair = ext.pair(
       r = r,
       t = t,
       radius=rad,
       radius_estimate=rad,
       fracscat=fs,
       rho_mn=flex.double()) # rho_mn undefined, needs to be set later
     self.ncs_pairs.append(ncs_pair)