def zipper(self, initial_rms, level): matches = [] for jj in xrange(1, self.n - 1): ii = jj - 1 kk = jj + 1 #make triplets of sequence related sites xi = self.set_a[ii] xpi = self.set_b[ii] xj = self.set_a[jj] xpj = self.set_b[jj] xk = self.set_a[kk] xpk = self.set_b[kk] #get the lsq matrix ref = flex.vec3_double([xi, xj, xk]) mov = flex.vec3_double([xpi, xpj, xpk]) lsq = superpose.least_squares_fit(ref, mov) #here we have the rotation and translation operators r = lsq.r t = lsq.t rmsd = 10.0 #we would like to know the rmsd on the coords used for superposition new_sites = lsq.other_sites_best_fit() deltas = ref - new_sites rmsd = deltas.rms_length() if rmsd < initial_rms: # please apply this rotation to the full set converged = False count = 0 match_size = 0 previous_match_size = 0 tmp_a = None tmp_b = None select = flex.bool() while not converged: previous_match_size = match_size tmp_a, tmp_b, select = self.pair_sites(r, t, level) #print count, tmp_a.size() match_size = tmp_a.size() if match_size <= previous_match_size: converged = True break if count > self.max_iter: converged = True break if tmp_b.size() > 0: lsq = superpose.least_squares_fit(tmp_a, tmp_b) tmp_sites = lsq.other_sites_best_fit() rmsd = tmp_a.rms_difference(tmp_sites) r = lsq.r t = lsq.t count += 1 if converged: matches.append([ select.deep_copy().iselection(), r, t, rmsd, select.deep_copy().iselection().size() ]) return matches
def zipper(self, initial_rms, level): matches = [] for jj in xrange(1, self.n - 1): ii = jj - 1 kk = jj + 1 # make triplets of sequence related sites xi = self.set_a[ii] xpi = self.set_b[ii] xj = self.set_a[jj] xpj = self.set_b[jj] xk = self.set_a[kk] xpk = self.set_b[kk] # get the lsq matrix ref = flex.vec3_double([xi, xj, xk]) mov = flex.vec3_double([xpi, xpj, xpk]) lsq = superpose.least_squares_fit(ref, mov) # here we have the rotation and translation operators r = lsq.r t = lsq.t rmsd = 10.0 # we would like to know the rmsd on the coords used for superposition new_sites = lsq.other_sites_best_fit() deltas = ref - new_sites rmsd = deltas.rms_length() if rmsd < initial_rms: # please apply this rotation to the full set converged = False count = 0 match_size = 0 previous_match_size = 0 tmp_a = None tmp_b = None select = flex.bool() while not converged: previous_match_size = match_size tmp_a, tmp_b, select = self.pair_sites(r, t, level) # print count, tmp_a.size() match_size = tmp_a.size() if match_size <= previous_match_size: converged = True break if count > self.max_iter: converged = True break if tmp_b.size() > 0: lsq = superpose.least_squares_fit(tmp_a, tmp_b) tmp_sites = lsq.other_sites_best_fit() rmsd = tmp_a.rms_difference(tmp_sites) r = lsq.r t = lsq.t count += 1 if converged: matches.append( [select.deep_copy().iselection(), r, t, rmsd, select.deep_copy().iselection().size()] ) return matches
def __init__(self, pdb_hierarchy, eps = 0.01, add_identity=True): self.pdb_hierarchy = pdb_hierarchy n_atoms_per_chain = flex.int() sites_cart_chain_0 = None self.rotation_matrices = [] self.translation_vectors = [] self.back_rotation_matrices = [] self.back_translation_vectors = [] self.ph_first_chain = None # for i_chain, chain in enumerate(pdb_hierarchy.chains()): n_atoms_per_chain.append(chain.atoms_size()) # outlier_found = False if(n_atoms_per_chain.all_eq(n_atoms_per_chain[0])): for i_chain, chain in enumerate(pdb_hierarchy.chains()): if(chain.is_na() or chain.is_protein()): n_atoms_per_chain.append(chain.atoms_size()) if(sites_cart_chain_0 is None and i_chain==0): sites_cart_chain_0 = chain.atoms().extract_xyz() sel = flex.size_t(xrange(sites_cart_chain_0.size())) self.ph_first_chain = pdb_hierarchy.select(sel) if(add_identity): um = scitbx.matrix.sqr(( 1,0,0, 0,1,0, 0,0,1)) zv = scitbx.matrix.col((0, 0, 0)) self.rotation_matrices.append(um) self.translation_vectors.append(zv) self.back_rotation_matrices.append(um) self.back_translation_vectors.append(zv) if(i_chain > 0): # first copy onto others lsq_fit_obj = superpose.least_squares_fit( reference_sites = sites_cart_chain_0, other_sites = chain.atoms().extract_xyz()) self.rotation_matrices.append(lsq_fit_obj.r.transpose()) self.translation_vectors.append(lsq_fit_obj.t) d = flex.sqrt((sites_cart_chain_0- lsq_fit_obj.other_sites_best_fit()).dot()).min_max_mean().as_tuple() if(d[1]>2): outlier_found=True # others onto first copy lsq_fit_obj = superpose.least_squares_fit( reference_sites = chain.atoms().extract_xyz(), other_sites = sites_cart_chain_0) self.back_rotation_matrices.append(lsq_fit_obj.r) self.back_translation_vectors.append(lsq_fit_obj.t) if(outlier_found): self._init()
def my_get_rot_trans(ph, master_selection, copy_selection, master_chain_id, copy_chain_id): """ Get rotation and translation using superpose. This function is used only when phil parameters are provided. In this case we require the selection of NCS master and copies to be correct. Correct means: 1) residue sequence in master and copies is exactly the same 2) the number of atoms in master and copies is exactly the same One can get exact selection strings by ncs_object.show(verbose=True) Args: ph : hierarchy master/copy_selection: master and copy iselections """ other_h = my_selection(ph, master_chain_id, list(master_selection)) ref_h = my_selection(ph, copy_chain_id, list(copy_selection)) other_sites = other_h.atoms().extract_xyz() ref_sites = ref_h.atoms().extract_xyz() assert other_sites.size() == ref_sites.size(), "%d, %d" % ( other_sites.size(), ref_sites.size()) if ref_sites.size() > 0: lsq_fit_obj = superpose.least_squares_fit(reference_sites=ref_sites, other_sites=other_sites) r = lsq_fit_obj.r t = lsq_fit_obj.t rmsd = ref_sites.rms_difference(lsq_fit_obj.other_sites_best_fit()) return r, t, rmsd else: return None, None, None
def superpose_ideal_ligand_on_poor_ligand( ideal_hierarchy, poor_hierarchy, ): """Function superpose an ideal ligand onto the mangled ligand from a ligand fitting procedure Args: ideal_hierarchy (pdb_hierarchy): Ideal ligand poor_hierarchy (pdb_hierarchy): Poor ligand with correct c.o.m. and same atom names in order. Could become more sophisticated. """ sites_moving = flex.vec3_double() sites_fixed = flex.vec3_double() for atom1, atom2 in zip(ideal_hierarchy.atoms(), poor_hierarchy.atoms()): assert atom1.name == atom2.name, '%s!=%s' % (atom1.quote(), atom2.quote()) sites_moving.append(atom1.xyz) sites_fixed.append(atom2.xyz) lsq_fit = superpose.least_squares_fit(reference_sites=sites_fixed, other_sites=sites_moving) sites_new = ideal_hierarchy.atoms().extract_xyz() sites_new = lsq_fit.r.elems * sites_new + lsq_fit.t.elems # rmsd = sites_fixed.rms_difference(lsq_fit.other_sites_best_fit()) ideal_hierarchy.atoms().set_xyz(sites_new) return ideal_hierarchy
def superpose_ideal_residue_coordinates(pdb_hierarchy, resname, superpose_element=None, ): element_lookup = {'SF4' : 'Fe', 'F3S' : 'S', #'F4S' : 'S', # not done yet #'CLF' : 'Fe', # too flexible } from iotbx import pdb from mmtbx.monomer_library import pdb_interpretation t0=time.time() rmsd_list = {} if superpose_element is None: superpose_element = element_lookup.get(resname, None) if resname in pdb_interpretation.ideal_ligands: ideal_hierarchy = get_pdb_hierarchy_from_restraints(resname) else: assert 0 sites_moving = _extract_sites_cart(ideal_hierarchy, superpose_element) assert len(sites_moving), 'No atoms %s found' % superpose_element for ideal_ag in ideal_hierarchy.atom_groups(): break for sites_fixed, ag in generate_sites_fixed(pdb_hierarchy, resname, superpose_element, ): assert sites_fixed.size() == sites_moving.size(), '%(resname)s residue is missing atoms' % locals() import random min_rmsd = 1e9 min_sites_cart = None for i in range(100): random.shuffle(sites_moving) lsq_fit = superpose.least_squares_fit( reference_sites = sites_fixed, other_sites = sites_moving) new_atoms = ideal_ag.detached_copy().atoms() sites_new = new_atoms.extract_xyz() sites_new = lsq_fit.r.elems * sites_new + lsq_fit.t.elems rmsd = sites_fixed.rms_difference(lsq_fit.other_sites_best_fit()) if rmsd<min_rmsd: min_rmsd=rmsd min_sites_cart = sites_new rmsd_list[ag.id_str()] = min_rmsd sites_new = min_sites_cart new_atoms.set_xyz(sites_new) for atom1 in ag.atoms(): for atom2 in new_atoms: if atom1.name.strip()==atom2.name.strip(): atom1.xyz=atom2.xyz break else: assert 0, 'not all atoms updated - missing %s' % atom1.quote() outl = '' if rmsd_list: outl = '\n %(resname)s Regularisation' % locals() outl+= '\n residue rmsd' for id_str, rmsd in sorted(rmsd_list.items()): outl += '\n "%s" %0.1f' % (id_str, rmsd) outl += '\n Time to superpose : %0.2fs\n' % (time.time()-t0) return outl
def __init__(self, all_sites_cart, lsq_fits, buffer=10.0, move_to_frame_of_reference=True, log=sys.stdout): fitted_sites = [] original_sites = [] minima = flex.vec3_double() for sites_cart, lsq_fit in zip(all_sites_cart, lsq_fits): fitted_sites.append(sites_cart.deep_copy()) minima.append(sites_cart.min()) if lsq_fit is None : original_sites.append(sites_cart.deep_copy()) else : old_sites = lsq_fit.r.inverse().elems * (sites_cart - lsq_fit.t.elems) original_sites.append(old_sites) xyz_min = minima.min() if move_to_frame_of_reference: dxyz = (buffer - xyz_min[0], buffer - xyz_min[1], buffer - xyz_min[2]) else: dxyz = (0,0,0) self.shifted_sites = [] self.transformation_matrices = [] for i, sites_cart in enumerate(fitted_sites): new_sites_cart = sites_cart + dxyz #print new_sites_cart.min() self.shifted_sites.append(new_sites_cart) lsq_fit = superpose.least_squares_fit( reference_sites=new_sites_cart, other_sites=original_sites[i]) self.transformation_matrices.append(lsq_fit.rt())
def exercise(method): assert method in ["kearsley", "kabsch"] # global shifts for n_sites in [1,3,7,10,30]: reference = flex.vec3_double(flex.random_double(n_sites*3)*10-5) other = reference + list(flex.random_double(3)*100-50) for i_trial in xrange(10): s = least_squares_fit(reference, other, method) assert approx_equal(reference, s.other_sites_best_fit()) c = random_rotation() s = least_squares_fit(reference, tuple(c)*other, method) if method == "kearsley": # Kabsch fails in special cases assert approx_equal(s.r.determinant(), 1) assert approx_equal(reference, s.other_sites_best_fit()) assert approx_equal(s.rt().r, s.r) assert approx_equal(s.rt().t, s.t) assert approx_equal(reference, s.rt() * s.other_sites)
def exercise(method): assert method in ["kearsley", "kabsch"] # global shifts for n_sites in [1, 3, 7, 10, 30]: reference = flex.vec3_double(flex.random_double(n_sites * 3) * 10 - 5) other = reference + list(flex.random_double(3) * 100 - 50) for i_trial in xrange(10): s = least_squares_fit(reference, other, method) assert approx_equal(reference, s.other_sites_best_fit()) c = random_rotation() s = least_squares_fit(reference, tuple(c) * other, method) if method == "kearsley": # Kabsch fails in special cases assert approx_equal(s.r.determinant(), 1) assert approx_equal(reference, s.other_sites_best_fit()) assert approx_equal(s.rt().r, s.r) assert approx_equal(s.rt().t, s.t) assert approx_equal(reference, s.rt() * s.other_sites)
def _fit_U_from_superposed_points(reference, other): # Add the origin to both sets of points reference.append((0, 0, 0)) other.append((0, 0, 0)) # Find U matrix that takes ideal relps to the reference fit = superpose.least_squares_fit(reference, other) return fit.r
def get_difference_chainsAB(strings): pdb_h = iotbx.pdb.input(source_info=None, lines=strings).construct_hierarchy() cache = pdb_h.atom_selection_cache() ref_sites = pdb_h.select(cache.selection("chain A")).atoms().extract_xyz() other_sites = pdb_h.select(cache.selection("chain B")).atoms().extract_xyz() lsq_obj = superpose.least_squares_fit(ref_sites, other_sites) bf = lsq_obj.other_sites_best_fit() dif = ref_sites - bf return dif
def side_chain_placement(ag_to_place, current_reference_ag, rotamer_manager): """ Works with poly_gly truncated hierarchy. Also used in fix_rama_outliers. """ resname = current_reference_ag.resname.upper() c = one_three.get(resname, None) # seems to work with unusual residues... # if c is None: # msg = "Only standard protein residues are currently supported.\n" # msg += "The residue %s (chain %s, resid %s) chain is not standard." % ( # resname, # current_reference_ag.parent().parent().id, # current_reference_ag.parent().resid()) # raise Sorry(msg) ag_to_place.resname = three_one.get(c,resname) if c == 'G': return # align residue from ideal_res_dict to just placed ALA (ag_to_place) # or from pdb_hierarchy_template fixed_sites = flex.vec3_double() moving_sites = flex.vec3_double() reper_atoms = ["C","CA", "N"] for (ag, arr) in [(ag_to_place, fixed_sites), (current_reference_ag, moving_sites)]: for a in ag.atoms(): if a.name.strip() in reper_atoms: arr.append(a.xyz) assert len(fixed_sites) == 3 if len(moving_sites) < 3: error_msg = "C, CA or N atoms are absent in secondary structure element." +\ "\nPlease add them to the model and try again." raise Sorry(error_msg) assert len(moving_sites) == 3 lsq_fit_obj = superpose.least_squares_fit(reference_sites = fixed_sites, other_sites = moving_sites) ideal_correct_ag = current_reference_ag.detached_copy() ideal_correct_ag.atoms().set_xyz( lsq_fit_obj.r.elems*ideal_correct_ag.atoms().extract_xyz()+\ lsq_fit_obj.t.elems) ideal_correct_ag.atoms().set_xyz( rotamer_manager.nearest_rotamer_sites_cart(ideal_correct_ag)) if len(ideal_correct_ag.atoms()) > 4: ag_to_place.pre_allocate_atoms(number_of_additional_atoms=\ len(ideal_correct_ag.atoms())-4) for a in ideal_correct_ag.atoms(): if a.name.strip() not in ["N","CA","C","O"]: at = a.detached_copy() at.uij_erase() ag_to_place.append_atom(atom=at) else: # This means something wrong with input model, e.g. only 3 atoms in # the residue and they happened to be N, CA, C pass
def _fit_U_from_superposed_points(reference, other): # Add the origin to both sets of points origin = flex.vec3_double(1) reference.extend(origin) other.extend(origin) # Find U matrix that takes ideal relps to the reference fit = superpose.least_squares_fit(reference, other) return fit.r
def compute_operators(self, sites_cart): for pair in self.selection_pairs: superposition = superpose.least_squares_fit( reference_sites=sites_cart.select(pair[0]), other_sites=sites_cart.select(pair[1])) rtmx = matrix.rt((superposition.r, superposition.t)) self.matrices.append(rtmx) x = sites_cart.select(pair[0]) y = rtmx * sites_cart.select(pair[1]) d_sq = (x - y).dot() self.rms.append(flex.mean(d_sq)**0.5)
def side_chain_placement(ag_to_place, current_reference_ag, rotamer_manager): """ Works with poly_gly truncated hierarchy. Also used in fix_rama_outliers. """ resname = current_reference_ag.resname.upper() c = one_three.get(resname, None) if c is None: msg = "Only standard protein residues are currently supported.\n" msg += "The residue %s (chain %s, resid %s) chain is not standard." % ( resname, current_reference_ag.parent().parent().id, current_reference_ag.parent().resid()) raise Sorry(msg) ag_to_place.resname = three_one[c] if c == 'G': return # align residue from ideal_res_dict to just placed ALA (ag_to_place) # or from pdb_hierarchy_template fixed_sites = flex.vec3_double() moving_sites = flex.vec3_double() reper_atoms = ["C","CA", "N"] for (ag, arr) in [(ag_to_place, fixed_sites), (current_reference_ag, moving_sites)]: for a in ag.atoms(): if a.name.strip() in reper_atoms: arr.append(a.xyz) assert len(fixed_sites) == 3 if len(moving_sites) < 3: error_msg = "C, CA or N atoms are absent in secondary structure element." +\ "\nPlease add them to the model and try again." raise Sorry(error_msg) assert len(moving_sites) == 3 lsq_fit_obj = superpose.least_squares_fit(reference_sites = fixed_sites, other_sites = moving_sites) ideal_correct_ag = current_reference_ag.detached_copy() ideal_correct_ag.atoms().set_xyz( lsq_fit_obj.r.elems*ideal_correct_ag.atoms().extract_xyz()+\ lsq_fit_obj.t.elems) ideal_correct_ag.atoms().set_xyz( rotamer_manager.nearest_rotamer_sites_cart(ideal_correct_ag)) if len(ideal_correct_ag.atoms()) > 4: ag_to_place.pre_allocate_atoms(number_of_additional_atoms=\ len(ideal_correct_ag.atoms())-4) for a in ideal_correct_ag.atoms(): if a.name.strip() not in ["N","CA","C","O"]: at = a.detached_copy() at.uij_erase() ag_to_place.append_atom(atom=at) else: # This means something wrong with input model, e.g. only 3 atoms in # the residue and they happened to be N, CA, C pass
def fit_sites (sites_fixed, sites_moving, selection) : # TODO """ Simple least-squares superposition of sites on reference structure """ from scitbx.math import superpose sites_fixed_aln = sites_fixed.select(selection) sites_moving_aln = sites_moving.select(selection) assert (len(sites_fixed_aln) == len(sites_moving_aln) > 0) lsq_fit_obj = superpose.least_squares_fit( reference_sites=sites_fixed_aln, other_sites=sites_moving_aln) sites_moved = lsq_fit_obj.r.elems * sites_moving + lsq_fit_obj.t.elems return sites_moved
def exercise () : pdb_file_name_1 = libtbx.env.find_in_repositories( relative_path="phenix_regression/misc/1l3r_no_ligand.pdb", test=os.path.isfile) pdb_in_1 = iotbx.pdb.input(file_name = pdb_file_name_1) xrs1 = pdb_in_1.xray_structure_simple() ofn = "1l3r_rt.pdb" cmd = " ".join([ "phenix.pdbtools", "%s"%pdb_file_name_1, "rotate='90 10 20' translate='10 10 10'", "output.file_name=%s"%ofn, "--quiet"]) easy_run.call(cmd) pdb_in_rt = iotbx.pdb.input(file_name = ofn) xrs_rt = pdb_in_rt.xray_structure_simple() fft_map_1 = xrs1.structure_factors(d_min=1.5).f_calc().fft_map( resolution_factor = 1./3) fft_map_1.apply_sigma_scaling() map_data_1 = fft_map_1.real_map_unpadded() mmtbx.maps.utils.write_xplor_map(sites_cart = xrs1.sites_cart(), unit_cell = xrs1.unit_cell(), map_data = map_data_1, n_real = fft_map_1.n_real(), file_name = "1l3r.xplor") lsq_fit_obj = superpose.least_squares_fit( reference_sites=xrs_rt.sites_cart(), other_sites=xrs1.sites_cart().deep_copy()) f_o_r = common_frame_of_reference( all_sites_cart=[xrs1.sites_cart(),xrs_rt.sites_cart()], lsq_fits=[None, lsq_fit_obj]) hierarchy_rt = pdb_in_rt.construct_hierarchy() lsq_fit_obj = f_o_r.transformation_matrices[1] hierarchy_rt.atoms().set_xyz(f_o_r.shifted_sites[1]) open("1l3r_rt.pdb", "w").write(hierarchy_rt.as_pdb_string()) xrs_rt, map_data_rt = transform_map_by_lsq_fit( fft_map=fft_map_1, unit_cell=xrs1.unit_cell(), lsq_fit_obj=lsq_fit_obj.inverse(), pdb_hierarchy=hierarchy_rt, d_min=1.5, file_name="1l3r_rt.xplor", log=null_out()) f_o_r.inverse_transform_hierarchy(1, hierarchy_rt) open("1l3r.pdb", "w").write(hierarchy_rt.as_pdb_string()) #for sf1, sf2 in zip(xrs1.sites_frac(), xrs_rt.sites_frac()): # e1 = map_data_1.eight_point_interpolation(sf1) # e2 = map_data_rt.eight_point_interpolation(sf2) # print abs(e1-e2) # assert abs(e1-e2) < 1. print "OK"
def exercise(): pdb_file_name_1 = libtbx.env.find_in_repositories( relative_path="phenix_regression/misc/1l3r_no_ligand.pdb", test=os.path.isfile) pdb_in_1 = iotbx.pdb.input(file_name = pdb_file_name_1) xrs1 = pdb_in_1.xray_structure_simple() ofn = "1l3r_rt.pdb" cmd = " ".join([ "phenix.pdbtools", "%s"%pdb_file_name_1, "rotate='90 10 20' translate='10 10 10'", "output.file_name=%s"%ofn, "--quiet"]) easy_run.call(cmd) pdb_in_rt = iotbx.pdb.input(file_name = ofn) xrs_rt = pdb_in_rt.xray_structure_simple() fft_map_1 = xrs1.structure_factors(d_min=1.5).f_calc().fft_map( resolution_factor = 1./3) fft_map_1.apply_sigma_scaling() map_data_1 = fft_map_1.real_map_unpadded() mmtbx.maps.utils.write_xplor_map(sites_cart = xrs1.sites_cart(), unit_cell = xrs1.unit_cell(), map_data = map_data_1, n_real = fft_map_1.n_real(), file_name = "1l3r.xplor") lsq_fit_obj = superpose.least_squares_fit( reference_sites=xrs_rt.sites_cart(), other_sites=xrs1.sites_cart().deep_copy()) f_o_r = common_frame_of_reference( all_sites_cart=[xrs1.sites_cart(),xrs_rt.sites_cart()], lsq_fits=[None, lsq_fit_obj]) hierarchy_rt = pdb_in_rt.construct_hierarchy() lsq_fit_obj = f_o_r.transformation_matrices[1] hierarchy_rt.atoms().set_xyz(f_o_r.shifted_sites[1]) open("1l3r_rt.pdb", "w").write(hierarchy_rt.as_pdb_string()) xrs_rt, map_data_rt = transform_map_by_lsq_fit( fft_map=fft_map_1, unit_cell=xrs1.unit_cell(), lsq_fit_obj=lsq_fit_obj.inverse(), pdb_hierarchy=hierarchy_rt, d_min=1.5, file_name="1l3r_rt.xplor", log=null_out()) f_o_r.inverse_transform_hierarchy(1, hierarchy_rt) open("1l3r.pdb", "w").write(hierarchy_rt.as_pdb_string()) #for sf1, sf2 in zip(xrs1.sites_frac(), xrs_rt.sites_frac()): # e1 = map_data_1.eight_point_interpolation(sf1) # e2 = map_data_rt.eight_point_interpolation(sf2) # print abs(e1-e2) # assert abs(e1-e2) < 1. print("OK")
def fit(self, fragment, reference_sites, control_point_indices=None): """ fits given fragment to given sites, if control_points indices are not given - all points are fit, otherwise only control points are fit and the result is propagated to the rest of the fragment coordinates. returns coordinates of the trasformed fragment """ if not control_point_indices: control_point_indices = range(0, len(fragment)) to_fit = [ (fragment[i].x, fragment[i].y, 0) for i in control_point_indices] lsf = superpose.least_squares_fit( flex.vec3_double(reference_sites), flex.vec3_double(to_fit)) to_fit = flex.vec3_double([(i.x, i.y, 0) for i in fragment]) return lsf.r.elems * to_fit + lsf.t.elems
def __init__(self, group, sites_cart): self.group = group self.matrices = [] self.rms = [] for pair in self.group.selection_pairs: superposition = superpose.least_squares_fit( reference_sites=sites_cart.select(pair[0]), other_sites=sites_cart.select(pair[1])) rtmx = matrix.rt((superposition.r, superposition.t)) self.matrices.append(rtmx) x = sites_cart.select(pair[0]) y = rtmx * sites_cart.select(pair[1]) d_sq = (x-y).dot() self.rms.append(flex.mean(d_sq)**0.5)
def __init__(self, group, sites_cart): self.group = group self.matrices = [] self.rms = [] for pair in self.group.selection_pairs: superposition = superpose.least_squares_fit( reference_sites=sites_cart.select(pair[0]), other_sites=sites_cart.select(pair[1])) rtmx = matrix.rt((superposition.r, superposition.t)) self.matrices.append(rtmx) x = sites_cart.select(pair[0]) y = rtmx * sites_cart.select(pair[1]) d_sq = (x - y).dot() self.rms.append(flex.mean(d_sq)**0.5)
def get_match_rmsd(ph, match): assert len(ph.models()) == 1 [ch_a_id, ch_b_id, list_a, list_b, res_list_a, res_list_b, similarity] = match sel_list_extended_a = [x for y in list_a for x in y] sel_list_extended_b = [x for y in list_b for x in y] sel_list_extended_a.sort() sel_list_extended_b.sort() if len(sel_list_extended_a) == 0 or len(sel_list_extended_b) == 0: # e.g. 3liy (whole chain in AC) return None, None, None, None, None # # attempt to avoid selection of huge model # This is absolutely necessary for models of size > ~ 50 Mb in PDB format. # This brings runtime of this function alone for: # 3iyw ( 75 Mb) 88 -> 10 seconds. Total runtime 220 -> 160s. # 5vu2 (150 Mb) 506 -> 22 seconds. Total runtime 1067 -> 573s. # As one can easily see, now runtime of this function is ~N, # where N - size of molecule. # More shocking results should be expected for # even larger molecules (1.2Gb is currently the max). # At this point no hierarchy selections left in this module. # other_h = my_selection(ph, ch_a_id, sel_list_extended_a) ref_h = my_selection(ph, ch_b_id, sel_list_extended_b) # other_atoms = other_h.atoms() ref_atoms = ref_h.atoms() # # Here we want to flip atom names, even before chain alignment, so # we will get correct chain RMSD flipped_other_selection = make_flips_if_necessary_torsion( ref_h.deep_copy(), other_h.deep_copy()) # if flipped_other_selection is not None: other_sites = other_atoms.select(flipped_other_selection).extract_xyz() # else: # other_sites = other_atoms.extract_xyz() ref_sites = ref_atoms.extract_xyz() lsq_fit_obj = superpose.least_squares_fit(reference_sites=ref_sites, other_sites=other_sites) r = lsq_fit_obj.r t = lsq_fit_obj.t # todo: find r_2*A = r*A + t (where the translation is zero) # use B = r*A + t, r_2*A = B , r_2 = B*A.inverse() other_sites_best = lsq_fit_obj.other_sites_best_fit() rmsd = round(ref_sites.rms_difference(other_sites_best), 4) # print "chain rmsd after flip:", rmsd return rmsd, ref_sites, other_sites_best, r, t
def align_chains_flexible(chn_mov, chn_ref, altlocs=['','A'], cutoff_radius=15): """ Take two chains and perform flexible alignment on them. Only alternate conformations supplied in (e.g. altlocs=['','A']) will be used for alignment (maximum one conformer). Residues are removed that do not contain a full set of backbone atoms (N,CA,C,O) for the conformers selected (e.g. altlocs=['','A']) Chains will be truncated so that the chains contain an "aligned" set of residues (currently sequence-identical) returns LocalAlignment """ # Trim both chains to residues with complete backbones chn_mov_cb = complete_backbone(chn_mov, altlocs=altlocs) chn_ref_cb = complete_backbone(chn_ref, altlocs=altlocs) # Trim both chains to the same set of residues chn_ref_cr, chn_mov_cr = common_residues(chn_ref_cb, chn_mov_cb) # Create new hierarchies to perform most processing h_mov = iotbx.pdb.hierarchy.new_hierarchy_from_chain(chn_mov_cr); h_mov.sort_atoms_in_place(); h_ref = iotbx.pdb.hierarchy.new_hierarchy_from_chain(chn_ref_cr); h_ref.sort_atoms_in_place(); # Extract new processed chain objects c_mov = h_mov.only_chain() c_ref = h_ref.only_chain() # Check that the chains contain the same atoms c_mov.atoms().extract_element() == c_ref.atoms().extract_element(), 'chn_mov and chn_ref must contain the same atoms' c_mov.atoms().extract_name() == c_ref.atoms().extract_name(), 'chn_mov and chn_ref must contain the same atoms' # List of output alignments and alignment sites o_rts = []; o_xyz = []; r_xyz = [] # Extract xyz coords xyz_mov = c_mov.atoms().extract_xyz() xyz_ref = c_ref.atoms().extract_xyz() # Iterate through and create an alignment for each C-alpha for rg_mov in c_mov.residue_groups(): # Find the atoms near the C-alpha ca_atm = extract_atom(residue=rg_mov.conformers()[0].only_residue(), atom='CA') nr_sel = nearby_coords_bool(query=ca_atm.xyz, coords=xyz_mov, cutoff=cutoff_radius) # Select the sites from both chains xyz_mov_sel = xyz_mov.select(nr_sel) xyz_ref_sel = xyz_ref.select(nr_sel) # Calculate the alignment for this residue rt_atm = superpose.least_squares_fit(reference_sites=xyz_ref_sel, other_sites=xyz_mov_sel).rt() # Save the rotation matrix and the coordinates of the c-alpha o_xyz.append(ca_atm.xyz) o_rts.append(rt_atm) r_xyz.append(xyz_ref_sel.select(((xyz_mov_sel-ca_atm.xyz).dot() == 0.0))[0]) # Return LocalAlignment object return LocalAlignment(alignments=o_rts, alignment_sites=o_xyz, reference_sites=r_xyz)
def evaluate_backrub_pair_impl ( calphas_A, calphas_B, labels=(), max_calpha_sep=5.0, rmsd_limit=0.1, backrub_angle_limit=10.0) : # FIXME is this an appropriate cutoff? assert (len(calphas_A) == len(calphas_B) == 5) if (None in calphas_A) or (None in calphas_B) : return None for k_res in range(0, 4) : dist = calphas_A[k_res].distance(calphas_A[k_res+1]) if (dist > max_calpha_sep) : return None from scitbx.array_family import flex from scitbx.math import superpose from scitbx.matrix import col import scitbx.math sites_A = flex.vec3_double([ calphas_A[k].xyz for k in [0,1,3,4] ]) sites_B = flex.vec3_double([ calphas_B[k].xyz for k in [0,1,3,4] ]) lsq_fit = superpose.least_squares_fit( reference_sites=sites_A, other_sites=sites_B) sites_B_new = lsq_fit.other_sites_best_fit() rmsd = sites_B_new.rms_difference(sites_A) ca2 = (col(sites_A[1]) + col(sites_B_new[1])) / 2 ca3r = col(calphas_A[2].xyz) ca3m = lsq_fit.rt() * calphas_B[2].xyz ca4 = (col(sites_A[2]) + col(sites_B_new[2])) / 2 backrub_angle = scitbx.math.dihedral_angle( sites=[ca3r.elems, ca2.elems, ca4.elems, ca3m.elems], deg=True) if ((rmsd <= rmsd_limit) and (abs(backrub_angle) >= backrub_angle_limit)) : if (len(labels) == 0) : labels = (calphas_A[2].fetch_labels().altloc, calphas_B[2].fetch_labels().altloc) return backrub_residue( calpha=calphas_A[2], i_mod=labels[0], j_mod=labels[1], rmsd=rmsd, backrub_angle=backrub_angle) return None
def align_chains_rigid(mov_chain, ref_chain): """Takes two chains and aligns them - return rt_mx""" mov_seq, mov_sites, mov_flags = extract_sites_for_alignment(mov_chain) ref_seq, ref_sites, ref_flags = extract_sites_for_alignment(ref_chain) align_obj = mmtbx.alignment.align( seq_a=ref_seq, seq_b=mov_seq, gap_opening_penalty = 20, gap_extension_penalty = 2, similarity_function = 'blosum50', style = 'local') # Extract the alignment alignment = align_obj.extract_alignment() # List of matches - '|' for exact match, '*' for good match matches = alignment.matches() equal = matches.count("|") similar = matches.count("*") total = len(alignment.a) - alignment.a.count("-") alignment.pretty_print( matches=matches, block_size=50, n_block=1, top_name="fixed", bottom_name="moving") # Create list of selected sites ref_sites_sel = flex.vec3_double() mov_sites_sel = flex.vec3_double() for ia,ib,m in zip(alignment.i_seqs_a, alignment.i_seqs_b, matches): if (m not in ["|", "*"]): continue # Check that the sites are flagged to be used if (ref_flags[ia] and mov_flags[ib]): # Append sites to list to align ref_sites_sel.append(ref_sites[ia]) mov_sites_sel.append(mov_sites[ib]) if (ref_sites_sel.size() == 0): raise Exception("No matching C-alpha atoms.") lsq_rt = superpose.least_squares_fit(reference_sites=ref_sites_sel, other_sites=mov_sites_sel).rt() return lsq_rt, mov_sites_sel, ref_sites_sel
def recalculate_ncs_transforms(self, asu_site_cart): """ Re-evaluate the rotation and translation in the ncs groups list, base on the ncs groups selection and the atoms location. Updates self. Args: asu_site_cart (flex.vec_3): the complete ASU sites cart (coordinates) """ for gr in self: m_sel = gr.master_iselection for cp in gr.copies: c_sel = cp.iselection # other_sites are the master, reference_sites are the copies lsq_fit_obj = superpose.least_squares_fit( reference_sites=asu_site_cart.select(c_sel), other_sites=asu_site_cart.select(m_sel)) cp.r = lsq_fit_obj.r cp.t = lsq_fit_obj.t
def recalculate_ncs_transforms(ncs_restraints_group_list,asu_site_cart): """ Re-evaluate the rotation and translation in the ncs groups list, base on the ncs groups selection and the atoms location. Updates the ncs_restraints_group_list object Args: ncs_restraints_group_list: list of ncs restraints group objects asu_site_cart (flex.vec_3): the complete ASU sites cart (coordinates) """ for gr in ncs_restraints_group_list: m_sel = gr.master_iselection for cp in gr.copies: c_sel = cp.iselection # other_sites are the master, reference_sites are the copies lsq_fit_obj = superpose.least_squares_fit( reference_sites = asu_site_cart.select(c_sel), other_sites = asu_site_cart.select(m_sel)) cp.r = lsq_fit_obj.r cp.t = lsq_fit_obj.t
def evaluate_backrub_pair_impl( calphas_A, calphas_B, labels=(), max_calpha_sep=5.0, rmsd_limit=0.1, backrub_angle_limit=10.0): # FIXME is this an appropriate cutoff? assert (len(calphas_A) == len(calphas_B) == 5) if (None in calphas_A) or (None in calphas_B): return None for k_res in range(0, 4): dist = calphas_A[k_res].distance(calphas_A[k_res + 1]) if (dist > max_calpha_sep): return None from scitbx.array_family import flex from scitbx.math import superpose from scitbx.matrix import col import scitbx.math sites_A = flex.vec3_double([calphas_A[k].xyz for k in [0, 1, 3, 4]]) sites_B = flex.vec3_double([calphas_B[k].xyz for k in [0, 1, 3, 4]]) lsq_fit = superpose.least_squares_fit(reference_sites=sites_A, other_sites=sites_B) sites_B_new = lsq_fit.other_sites_best_fit() rmsd = sites_B_new.rms_difference(sites_A) ca2 = (col(sites_A[1]) + col(sites_B_new[1])) / 2 ca3r = col(calphas_A[2].xyz) ca3m = lsq_fit.rt() * calphas_B[2].xyz ca4 = (col(sites_A[2]) + col(sites_B_new[2])) / 2 backrub_angle = scitbx.math.dihedral_angle( sites=[ca3r.elems, ca2.elems, ca4.elems, ca3m.elems], deg=True) if ((rmsd <= rmsd_limit) and (abs(backrub_angle) >= backrub_angle_limit)): if (len(labels) == 0): labels = (calphas_A[2].fetch_labels().altloc, calphas_B[2].fetch_labels().altloc) return backrub_residue(calpha=calphas_A[2], i_mod=labels[0], j_mod=labels[1], rmsd=rmsd, backrub_angle=backrub_angle) return None
def get_match_rmsd(ph, match): assert len(ph.models()) == 1 [ch_a_id,ch_b_id,list_a,list_b,res_list_a,res_list_b,similarity] = match # print "Cleaning chains", ch_a_id, ch_b_id, similarity, t0 = time() sel_a = make_selection_from_lists(list_a) sel_b = make_selection_from_lists(list_b) # print "debug: lista, listb", list_a, list_b if sel_a.size() == 0 or sel_b.size() == 0: # e.g. 3liy (whole chain in AC) return None, None, None, None, None other_h = ph.select(sel_a) other_atoms = other_h.atoms() ref_h = ph.select(sel_b) ref_atoms = ref_h.atoms() # # Here we want to flip atom names, even before chain alignment, so # we will get correct chain RMSD # flipped_other_selection = make_flips_if_necessary(ref_h.deep_copy(), other_h.deep_copy()) flipped_other_selection = make_flips_if_necessary_torsion( ref_h.deep_copy(), other_h.deep_copy()) # if flipped_other_selection is not None: other_sites = other_atoms.select(flipped_other_selection).extract_xyz() # else: # other_sites = other_atoms.extract_xyz() ref_sites = ref_atoms.extract_xyz() lsq_fit_obj = superpose.least_squares_fit( reference_sites = ref_sites, other_sites = other_sites) r = lsq_fit_obj.r t = lsq_fit_obj.t # todo: find r_2*A = r*A + t (where the translation is zero) # use B = r*A + t, r_2*A = B , r_2 = B*A.inverse() other_sites_best = lsq_fit_obj.other_sites_best_fit() rmsd = round(ref_sites.rms_difference(other_sites_best),4) # print "chain rmsd after flip:", rmsd return rmsd, ref_sites, other_sites_best, r,t
def get_r_t_matrices_from_structure(pdb_str): """ Return rotation and translation matrices for the ideal structure. The function determines r and t matrices from alingment of 1st and 2nd residues of the structure passed in pdb_str. """ pdb_hierarchy = iotbx.pdb.input(source_info=None, lines=pdb_str).\ construct_hierarchy() conformer = pdb_hierarchy.models()[0].chains()[0].conformers()[0] residues = conformer.residues() fixed_sites = flex.vec3_double() moving_sites = flex.vec3_double() main_chain_atoms = ["N","CA","C","O"] if len(residues)>=2: for (r, arr) in [(residues[0], fixed_sites), (residues[1], moving_sites)]: for a in r.atoms(): if a.name.strip() in main_chain_atoms: arr.append(a.xyz) else: raise Sorry('pdb_str should contain at least 2 residues') lsq_fit_obj = superpose.least_squares_fit(reference_sites = moving_sites, other_sites = fixed_sites) return lsq_fit_obj.r, lsq_fit_obj.t
def my_get_rot_trans( ph, master_selection, copy_selection): """ Get rotation and translation using superpose. This function is used only when phil parameters are provided. In this case we require the selection of NCS master and copies to be correct. Correct means: 1) residue sequence in master and copies is exactly the same 2) the number of atoms in master and copies is exactly the same One can get exact selection strings by ncs_object.show(verbose=True) Args: ph : hierarchy master/copy_selection: master and copy iselections """ atoms = ph.atoms() # master other_sites = atoms.select(master_selection).extract_xyz() # copy ref_sites = atoms.select(copy_selection).extract_xyz() assert other_sites.size() == ref_sites.size(), "%d, %d" % ( other_sites.size(), ref_sites.size()) if ref_sites.size() > 0: lsq_fit_obj = superpose.least_squares_fit( reference_sites = ref_sites, other_sites = other_sites) r = lsq_fit_obj.r t = lsq_fit_obj.t rmsd = ref_sites.rms_difference(lsq_fit_obj.other_sites_best_fit()) return r,t,rmsd else: return None, None, None
def align_model(self, i_model): from scitbx.array_family import flex from scitbx.math import superpose hierarchy_moving = self.related_chains[i_model].pdb_hierarchy mov_atoms = hierarchy_moving.atoms() mov_atoms.reset_i_seq() sel_cache = hierarchy_moving.atom_selection_cache() mov_atom_selection = sel_cache.selection(self.atom_selection_string) mov_chain = hierarchy_moving.only_model().only_chain() sel_ref = flex.size_t() sel_mov = flex.size_t() for residue_group in mov_chain.residue_groups(): for atom in residue_group.only_atom_group().atoms(): if (not mov_atom_selection[atom.i_seq]): continue resid = residue_group.resid() ref_name = "%s %s" % (resid, atom.name.strip()) if (ref_name in self.atoms_ref): sel_mov.append(atom.i_seq) sel_ref.append(self.atoms_ref.index(ref_name)) if (len(sel_ref) == 0): assert (self.atom_selection_string is not None) return None assert (len(sel_ref) > 0) and (len(sel_ref) == len(sel_mov)) xyz_mov = mov_atoms.extract_xyz() sites_mov = xyz_mov.select(sel_mov) sites_ref = self.reference_sites.select(sel_ref) if (self.sieve_fit): return superpose.sieve_fit( sites_fixed=sites_ref, sites_moving=sites_mov, frac_discard=self.frac_discard) else : return superpose.least_squares_fit( reference_sites=sites_ref, other_sites=sites_mov)
def __init__ (self, all_sites_cart, lsq_fits, buffer=10.0, log=sys.stdout) : fitted_sites = [] original_sites = [] minima = flex.vec3_double() for sites_cart, lsq_fit in zip(all_sites_cart, lsq_fits) : fitted_sites.append(sites_cart.deep_copy()) minima.append(sites_cart.min()) if lsq_fit is None : original_sites.append(sites_cart.deep_copy()) else : old_sites = lsq_fit.r.inverse().elems * (sites_cart - lsq_fit.t.elems) original_sites.append(old_sites) xyz_min = minima.min() dxyz = (buffer - xyz_min[0], buffer - xyz_min[1], buffer - xyz_min[2]) self.shifted_sites = [] self.transformation_matrices = [] for i, sites_cart in enumerate(fitted_sites) : new_sites_cart = sites_cart + dxyz #print new_sites_cart.min() self.shifted_sites.append(new_sites_cart) lsq_fit = superpose.least_squares_fit( reference_sites=new_sites_cart, other_sites=original_sites[i]) self.transformation_matrices.append(lsq_fit.rt())
def align_model (self, i_model) : from scitbx.array_family import flex from scitbx.math import superpose hierarchy_moving = self.related_chains[i_model].pdb_hierarchy mov_atoms = hierarchy_moving.atoms() mov_atoms.reset_i_seq() sel_cache = hierarchy_moving.atom_selection_cache() mov_atom_selection = sel_cache.selection(self.atom_selection_string) mov_chain = hierarchy_moving.only_model().only_chain() sel_ref = flex.size_t() sel_mov = flex.size_t() for residue_group in mov_chain.residue_groups() : for atom in residue_group.only_atom_group().atoms() : if (not mov_atom_selection[atom.i_seq]) : continue resid = residue_group.resid() ref_name = "%s %s" % (resid, atom.name.strip()) if (ref_name in self.atoms_ref) : sel_mov.append(atom.i_seq) sel_ref.append(self.atoms_ref.index(ref_name)) if (len(sel_ref) == 0) : assert (self.atom_selection_string is not None) return None assert (len(sel_ref) > 0) and (len(sel_ref) == len(sel_mov)) xyz_mov = mov_atoms.extract_xyz() sites_mov = xyz_mov.select(sel_mov) sites_ref = self.reference_sites.select(sel_ref) if (self.sieve_fit) : return superpose.sieve_fit( sites_fixed=sites_ref, sites_moving=sites_mov, frac_discard=self.frac_discard) else : return superpose.least_squares_fit( reference_sites=sites_ref, other_sites=sites_mov)
def find_ncs_operators (pdb_hierarchy, max_rmsd=2.0, try_sieve_fit=True, log=None) : """ Determines all possible NCS transformation matrices for the input structure, based on sequence alignemnt and simple C-alpha superposition. There may be multiple sets of operators but these will eventually become a flat list. :param max_rmsd: maximum allowable RMSD between NCS-related chains for use in ligand superposition :param try_sieve_fit: also perform a sieve fit between chains and use the resulting operator if the RMSD is lower than the global fit :param log: filehandle-like object :returns: list of lists of group_operators objects """ import iotbx.ncs from scitbx.math import superpose from scitbx.array_family import flex ncs_obj = iotbx.ncs.input(hierarchy=pdb_hierarchy) ncs_groups = [] for k,v in ncs_obj.ncs_to_asu_selection.iteritems(): ncs_groups.append([k]+v) if (len(ncs_groups) == 0) : raise Sorry("No NCS present in the input model.") for k, group in enumerate(ncs_groups) : print >> log, "Group %d:" % (k+1) for sele in group : print >> log, " %s" % sele selection_cache = pdb_hierarchy.atom_selection_cache() pdb_atoms = pdb_hierarchy.atoms() sites_cart = pdb_atoms.extract_xyz() operators = [] def get_selection (sele_str) : sele_str = "(%s) and name CA and (altloc ' ' or altloc A)" % sele_str return selection_cache.selection(sele_str).iselection() for restraint_group in ncs_groups : group_ops = [] assert (len(restraint_group) >= 2) # XXX This is currently an all-vs-all loop, which means that each # NCS relationship will be calculated (and stored) twice. Need to figure # out whether this actually matters in practice. for j, sele_str in enumerate(restraint_group) : sele_j = get_selection(sele_str) group = group_operators(sele_j, sele_str, sites_cart) assert (len(sele_j) > 0) calpha_ids = [] for i_seq in sele_j : resid = resid_str(pdb_atoms[i_seq]) if (not resid in calpha_ids) : calpha_ids.append(resid) for k, sele_str_k in enumerate(restraint_group) : if (k == j) : continue sele_k = get_selection(sele_str_k) group_sele = flex.size_t() group_ids = set([]) assert (len(sele_k) > 0) # poor man's sequence alignment for i_seq in sele_k : id_str = resid_str(pdb_atoms[i_seq]) if (id_str in group_ids) : continue group_ids.add(id_str) if (id_str in calpha_ids) : group_sele.append(i_seq) first_sele_copy = flex.size_t() #first_sele.deep_copy() delete_indices = [] for i_seq, id_str in zip(sele_j, calpha_ids) : if (id_str in group_ids) : first_sele_copy.append(i_seq) assert (len(first_sele_copy) == len(group_sele)) assert (len(group_sele) > 0) sites_ref = sites_cart.select(first_sele_copy) sites_group = sites_cart.select(group_sele).deep_copy() lsq_fit = superpose.least_squares_fit( reference_sites=sites_ref, other_sites=sites_group) sites_fit = lsq_fit.r.elems * sites_group + lsq_fit.t.elems rmsd = sites_ref.rms_difference(sites_fit) if (try_sieve_fit) : lsq_fit_2 = superpose.sieve_fit( sites_fixed=sites_ref, sites_moving=sites_group, frac_discard=0.25) sites_fit_2 = lsq_fit_2.r.elems * sites_group + lsq_fit_2.t.elems rmsd_2 = sites_ref.rms_difference(sites_fit) if (rmsd_2 < rmsd) : print >> log, " using sieve fit (RMSD = %.3f, RMSD(all) = %.3f)" %\ (rmsd_2, rmsd) lsq_fit = lsq_fit_2 rmsd = rmsd_2 print >> log, " %d versus %d RMSD = %.3f" % (j+1, k+1, rmsd) if (rmsd <= max_rmsd) : group.add_operator(lsq_fit.rt().inverse(), sele_str_k) else : print >> log, " exceeds cutoff, will not use this operator" group_ops.append(group) operators.append(group_ops) return operators
def add_to(self, reparametrisation): if not self.fix_u and not self.fix_xyz: return scatterers = reparametrisation.structure.scatterers() ref_sites = [] ref_u_isos = [] ref_u_stars = [] ref_adps = [] src_crds = [] inv_src_crds = [] uc = reparametrisation.structure.unit_cell() for i in self.groups[0]: src_crds.append(uc.orthogonalize(scatterers[i].site)) if self.fix_xyz: ref_sites.append(reparametrisation.add_new_site_parameter(i)) if self.fix_u: if scatterers[i].flags.use_u_iso(): ref_u_isos.append( reparametrisation.add_new_thermal_displacement_parameter(i)) else: ref_u_stars.append( reparametrisation.add_new_thermal_displacement_parameter(i)) for g in self.groups[1:]: if len(g) != len(self.groups[0]): raise InvalidConstraint("Group size mismatch") g_scatterers = [] g_u_iso_scatterers =[] g_u_star_scatterers = [] crds = [] for idx, i in enumerate(g): if scatterers[i].flags.use_u_iso() !=\ scatterers[self.groups[0][idx]].flags.use_u_iso(): raise InvalidConstraint("Mixing isotropic and anisotropic parameters") g_scatterers.append(scatterers[i]) crds.append(uc.orthogonalize(scatterers[i].site)) if scatterers[i].flags.use_u_iso(): g_u_iso_scatterers.append(scatterers[i]) else: g_u_star_scatterers.append(scatterers[i]) #need to map reference to target lsf = superpose.least_squares_fit( flex.vec3_double(crds), flex.vec3_double(src_crds)) #create a list of inverted coordinates if needed if len(inv_src_crds) == 0: for i in range(0, len(g)): inv_src_crds.append( 2*matrix.col(lsf.other_shift)-matrix.col(src_crds[i])) rm = lsf.r t = matrix.col(lsf.reference_shift)-matrix.col(lsf.other_shift) new_crd = lsf.other_sites_best_fit() d = 0 for i, c in enumerate(new_crd): d += matrix.col(matrix.col(c)-matrix.col(crds[i])).length_sq() lsf = superpose.least_squares_fit( flex.vec3_double(crds), flex.vec3_double(inv_src_crds)) new_crd = lsf.other_sites_best_fit() d_inv = 0 for i, c in enumerate(new_crd): d_inv += matrix.col(matrix.col(c)-matrix.col(crds[i])).length_sq() if d_inv < d: rm = -lsf.r if self.fix_xyz: shifts_and_angles =\ reparametrisation.add(_.independent_small_6_vector_parameter, value=(t[0],t[1],t[2],0,0,0), variable=True) if len(ref_u_stars) > 0: u_star_param = reparametrisation.add( _.same_group_u_star, scatterers=g_u_star_scatterers, u_stars=ref_u_stars, alignment_matrix=rm, shifts_and_angles=shifts_and_angles ) elif len(ref_u_stars) > 0: angles =\ reparametrisation.add(_.independent_small_3_vector_parameter, value=self.angles, variable=True) u_star_param = reparametrisation.add( _.same_group_u_star, scatterers=g_u_star_scatterers, u_stars=ref_u_stars, alignment_matrix=rm, angles=angles ) if self.fix_xyz: site_param = reparametrisation.add( _.same_group_xyz, scatterers=g_scatterers, sites=ref_sites, alignment_matrix=rm, shifts_and_angles=shifts_and_angles ) if len(ref_u_isos) > 0: u_iso_param = reparametrisation.add( _.same_group_u_iso, scatterers=g_u_iso_scatterers, u_isos=ref_u_isos ) site_proxy_index = 0 u_star_proxy_index = 0 u_iso_proxy_index = 0 for i in g: if self.fix_xyz: reparametrisation.asu_scatterer_parameters[i].site = site_param reparametrisation.add_new_same_group_site_proxy_parameter( site_param, site_proxy_index, i) site_proxy_index += 1 if self.fix_u: if scatterers[i].flags.use_u_iso(): reparametrisation.asu_scatterer_parameters[i].u = u_iso_param reparametrisation.shared_Us[i] = reparametrisation.add( _.same_group_u_iso_proxy, parent=u_iso_param, index=u_iso_proxy_index ) u_iso_proxy_index += 1 else: reparametrisation.asu_scatterer_parameters[i].u = u_star_param reparametrisation.shared_Us[i] = reparametrisation.add( _.same_group_u_star_proxy, parent=u_star_param, index=u_star_proxy_index ) u_star_proxy_index += 1
def get_rot_trans(ph, master_selection, copy_selection, chain_max_rmsd=0.02): """ Get rotation and translation using superpose. This function is used only when phil parameters are provided. In this case we require the selection of NCS master and copies to be correct. Correct means: 1) residue sequence in master and copies is exactly the same 2) the number of atoms in master and copies is exactly the same One can get exact selection strings by ncs_object.show(verbose=True) Args: ph : pdb.hierarchy master/copy_selection (str): master and copy selection strings chain_max_rmsd (float): limit of rms difference between chains to be considered as copies Returns: r: rotation matrix t: translation vector rmsd (float): RMSD between master and copy msg (str): error messages """ msg = '' r_zero = matrix.sqr([0]*9) t_zero = matrix.col([0,0,0]) # if ph: cache = ph.atom_selection_cache().selection master_ncs_ph = ph.select(cache(master_selection)) ncs_copy_ph = ph.select(cache(copy_selection)) seq_m,res_ids_m = get_residue_sequence(master_ncs_ph) seq_c,res_ids_c = get_residue_sequence(ncs_copy_ph) res_sel_m, res_sel_c, similarity = mmtbx_res_alignment( seq_m, seq_c, min_percent=0) # res_sel_m, res_sel_c, similarity = res_alignment( # seq_a=seq_m,seq_b=seq_c, # min_contig_length=0,min_percent=0) m_atoms = master_ncs_ph.atoms() c_atoms = ncs_copy_ph.atoms() # Check that master and copy are identical if (similarity != 1) or (m_atoms.size() != c_atoms.size()) : return r_zero,t_zero,0,'Master and Copy selection do not exactly match' # master other_sites = m_atoms.extract_xyz() # copy ref_sites = c_atoms.extract_xyz() if ref_sites.size() > 0: lsq_fit_obj = superpose.least_squares_fit( reference_sites = ref_sites, other_sites = other_sites) r = lsq_fit_obj.r t = lsq_fit_obj.t rmsd = ref_sites.rms_difference(lsq_fit_obj.other_sites_best_fit()) if rmsd > chain_max_rmsd: return r_zero,t_zero,0,msg else: return r_zero,t_zero,0,'No sites to compare.\n' return r,t,round(rmsd,4),msg else: return r_zero,t_zero,0,msg
def __init__(self, pdb_hierarchy, crystal_symmetry, angular_difference_threshold_deg=5., sequence_identity_threshold=90.): h = pdb_hierarchy superposition_threshold = 2*sequence_identity_threshold - 100. n_atoms_all = h.atoms_size() s_str = "altloc ' ' and (protein or nucleotide)" h = h.select(h.atom_selection_cache().selection(s_str)) h1 = iotbx.pdb.hierarchy.root() h1.append_model(h.models()[0].detached_copy()) unit_cell = crystal_symmetry.unit_cell() result = {} print "Find groups of chains related by translational NCS" # double loop over chains to find matching pairs related by pure translation for c1 in h1.chains(): c1.parent().remove_chain(c1) nchains = len(h1.models()[0].chains()) if([c1.is_protein(), c1.is_na()].count(True)==0): continue r1 = list(c1.residues()) c1_seq = "".join(c1.as_sequence()) sc_1_tmp = c1.atoms().extract_xyz() h1_p1 = h1.expand_to_p1(crystal_symmetry=crystal_symmetry) for (ii,c2) in enumerate(h1_p1.chains()): orig_c2 = h1.models()[0].chains()[ii%nchains] r2 = list(c2.residues()) c2_seq = "".join(c2.as_sequence()) sites_cart_1, sites_cart_2 = None,None sc_2_tmp = c2.atoms().extract_xyz() # chains are identical if(c1_seq==c2_seq and sc_1_tmp.size()==sc_2_tmp.size()): sites_cart_1 = sc_1_tmp sites_cart_2 = sc_2_tmp p_identity = 100. # chains are not identical, do alignment else: align_obj = mmtbx.alignment.align(seq_a = c1_seq, seq_b = c2_seq) alignment = align_obj.extract_alignment() matches = alignment.matches() equal = matches.count("|") total = len(alignment.a) - alignment.a.count("-") p_identity = 100.*equal/max(1,total) if(p_identity>superposition_threshold): sites_cart_1 = flex.vec3_double() sites_cart_2 = flex.vec3_double() for i1, i2, match in zip(alignment.i_seqs_a, alignment.i_seqs_b, matches): if(i1 is not None and i2 is not None and match=="|"): r1i, r2i = r1[i1], r2[i2] assert r1i.resname==r2i.resname, [r1i.resname,r2i.resname,i1,i2] for a1 in r1i.atoms(): for a2 in r2i.atoms(): if(a1.name == a2.name): sites_cart_1.append(a1.xyz) sites_cart_2.append(a2.xyz) break # superpose two sequence-aligned chains if([sites_cart_1,sites_cart_2].count(None)==0): lsq_fit_obj = superpose.least_squares_fit( reference_sites = sites_cart_1, other_sites = sites_cart_2) angle = lsq_fit_obj.r.rotation_angle() t_frac = unit_cell.fractionalize((sites_cart_1-sites_cart_2).mean()) t_frac = [math.modf(t)[0] for t in t_frac] # put into [-1,1] radius = flex.sum(flex.sqrt((sites_cart_1- sites_cart_1.mean()).dot()))/sites_cart_1.size()*4./3. fracscat = min(c1.atoms_size(),c2.atoms_size())/n_atoms_all result.setdefault( frozenset([c1,orig_c2]), [] ).append( [p_identity,[lsq_fit_obj.r, t_frac, angle, radius, fracscat]] ) else: result.setdefault( frozenset([c1,orig_c2]), [] ).append( [p_identity,None] ) # Build graph g = graph.adjacency_list() vertex_handle = {} for key in result: seqid = result[key][0][0] sup = min( result[key],key=lambda s:0 if s[1] is None else s[1][2])[1] result[key] = [seqid,sup] if ((seqid > sequence_identity_threshold) and (sup[2] < angular_difference_threshold_deg)): (c1,c2) = key if (c1 not in vertex_handle): vertex_handle[c1] = g.add_vertex(label=c1) if (c2 not in vertex_handle): vertex_handle[c2] = g.add_vertex(label=c2) g.add_edge(vertex1=vertex_handle[c1],vertex2=vertex_handle[c2]) # Do connected component analysis and compose final tNCS pairs object components = connected_component_algorithm.connected_components(g) import itertools self.ncs_pairs = [] for (i,group) in enumerate(components): chains = [g.vertex_label(vertex=v) for v in group] fracscats = [] radii = [] for pair in itertools.combinations(chains,2): sup = result[frozenset(pair)][1] fracscats.append(sup[-1]) radii.append(sup[-2]) fs = sum(fracscats)/len(fracscats) rad = sum(radii)/len(radii) for pair in itertools.combinations(chains,2): sup = result[frozenset(pair)][1] ncs_pair = ext.pair( r = sup[0], t = sup[1], radius = rad, radius_estimate = rad, fracscat = fs, rho_mn = flex.double(), # rho_mn undefined, needs to be set later id = i) self.ncs_pairs.append(ncs_pair) # show tNCS pairs in group fmt="group %d chains %s <> %s angle: %4.2f trans.vect.: (%s) fracscat: %5.3f" t = ",".join([("%6.3f"%t_).strip() for t_ in sup[1]]).strip() print fmt%(i, pair[0].id, pair[1].id, sup[2], t, fs)
def run(self): ''' Parse the options. ''' # Parse the command line arguments params, options = self.parser.parse_args(show_diff_phil=True) reference_experiments = ExperimentListFactory.from_json_file( params.reference_experiments, check_format=False) if len(reference_experiments.detectors()) != 1: raise Sorry("Please ensure reference has only 1 detector model") reference = reference_experiments.detectors()[0] moving_experiments = ExperimentListFactory.from_json_file( params.moving_experiments, check_format=False) if len(moving_experiments.detectors()) != 1: raise Sorry("Please ensure moving has only 1 detector model") moving = moving_experiments.detectors()[0] # Get list of panels to compare if params.panel_list is None or len(params.panel_list) == 0: assert len(reference) == len(moving), "Detectors not same length" panel_ids = range(len(reference)) else: max_p_id = max(params.panel_list) assert max_p_id < len( reference ), "Reference detector must be at least %d panels long given the panel list" % ( max_p_id + 1) assert max_p_id < len( moving ), "Moving detector must be at least %d panels long given the panel list" % ( max_p_id + 1) panel_ids = params.panel_list if params.fit_target == "centers": assert len( panel_ids ) >= 3, "When using centers as target for superpose, detector needs at least 3 panels" def rmsd_from_centers(a, b): assert len(a) == len(b) assert len(a) % 4 == len(b) % 4 == 0 ca = flex.vec3_double() cb = flex.vec3_double() for i in xrange(len(a) // 4): ca.append(a[i:i + 4].mean()) cb.append(b[i:i + 4].mean()) return 1000 * math.sqrt((ca - cb).sum_sq() / len(ca)) cycles = 0 while True: cycles += 1 # Treat panels as a list of 4 sites (corners) or 1 site (centers) for use with lsq superpose reference_sites = flex.vec3_double() moving_sites = flex.vec3_double() for panel_id in panel_ids: for detector, sites in zip([reference, moving], [reference_sites, moving_sites]): panel = detector[panel_id] size = panel.get_image_size() corners = flex.vec3_double([ panel.get_pixel_lab_coord(point) for point in [(0, 0), (0, size[1] - 1), (size[0] - 1, size[1] - 1), (size[0] - 1, 0)] ]) if params.fit_target == "corners": sites.extend(corners) elif params.fit_target == "centers": sites.append(corners.mean()) # Compute super position rmsd = 1000 * math.sqrt((reference_sites - moving_sites).sum_sq() / len(reference_sites)) print("RMSD before fit: %.1f microns" % rmsd) if params.fit_target == "corners": rmsd = rmsd_from_centers(reference_sites, moving_sites) print("RMSD of centers before fit: %.1f microns" % rmsd) lsq = least_squares_fit(reference_sites, moving_sites) rmsd = 1000 * math.sqrt( (reference_sites - lsq.other_sites_best_fit()).sum_sq() / len(reference_sites)) print("RMSD of fit: %.1f microns" % rmsd) if params.fit_target == "corners": rmsd = rmsd_from_centers(reference_sites, lsq.other_sites_best_fit()) print("RMSD of fit of centers: %.1f microns" % rmsd) angle, axis = lsq.r.r3_rotation_matrix_as_unit_quaternion( ).unit_quaternion_as_axis_and_angle(deg=True) print( "Axis and angle of rotation: (%.3f, %.3f, %.3f), %.2f degrees" % (axis[0], axis[1], axis[2], angle)) print("Translation (x, y, z, in microns): (%.3f, %.3f, %.3f)" % (1000 * lsq.t).elems) # Apply the shifts if params.apply_at_hierarchy_level == None: iterable = moving else: iterable = iterate_detector_at_level( moving.hierarchy(), level=params.apply_at_hierarchy_level) for group in iterable: fast = col(group.get_fast_axis()) slow = col(group.get_slow_axis()) ori = col(group.get_origin()) group.set_frame(lsq.r * fast, lsq.r * slow, (lsq.r * ori) + lsq.t) fast = col(group.get_fast_axis()) slow = col(group.get_slow_axis()) ori = col(group.get_origin()) if not params.repeat_until_converged: break if approx_equal(angle, 0.0, out=None) and approx_equal( (1000 * lsq.t).length(), 0.0, out=None): print("Converged after", cycles, "cycles") break else: print("Movement not close to zero, repeating fit") print() from dxtbx.serialize import dump dump.experiment_list(moving_experiments, params.output_experiments) moved_sites = flex.vec3_double() for panel_id in panel_ids: panel = moving[panel_id] size = panel.get_image_size() corners = flex.vec3_double([ panel.get_pixel_lab_coord(point) for point in [(0, 0), (0, size[1] - 1), (size[0] - 1, size[1] - 1), (size[0] - 1, 0)] ]) if params.fit_target == "corners": moved_sites.extend(corners) elif params.fit_target == "centers": moved_sites.append(corners.mean()) # Re-compute RMSD after moving detector components rmsd = 1000 * math.sqrt( (reference_sites - moved_sites).sum_sq() / len(reference_sites)) print("RMSD of fit after movement: %.1f microns" % rmsd) if params.fit_target == "corners": rmsd = rmsd_from_centers(reference_sites, moved_sites) print("RMSD of fit of centers after movement: %.1f microns" % rmsd) if params.panel_list is not None: reference_sites = flex.vec3_double() moved_sites = flex.vec3_double() for panel_id in xrange(len(reference)): for detector, sites in zip([reference, moving], [reference_sites, moved_sites]): panel = detector[panel_id] size = panel.get_image_size() corners = flex.vec3_double([ panel.get_pixel_lab_coord(point) for point in [(0, 0), (0, size[1] - 1), (size[0] - 1, size[1] - 1), (size[0] - 1, 0)] ]) if params.fit_target == "corners": sites.extend(corners) elif params.fit_target == "centers": sites.append(corners.mean()) # Re-compute RMSD for full detector after moving detector components rmsd = 1000 * math.sqrt((reference_sites - moved_sites).sum_sq() / len(reference_sites)) print("RMSD of whole detector fit after movement: %.1f microns" % rmsd) if params.fit_target == "corners": rmsd = rmsd_from_centers(reference_sites, moved_sites) print( "RMSD of whole detector fit of centers after movement: %.1f microns" % rmsd)
def add_to(self, reparametrisation): if not self.fix_u and not self.fix_xyz: return scatterers = reparametrisation.structure.scatterers() ref_sites = [] ref_u_isos = [] ref_u_stars = [] ref_adps = [] src_crds = [] inv_src_crds = [] uc = reparametrisation.structure.unit_cell() for i in self.groups[0]: src_crds.append(uc.orthogonalize(scatterers[i].site)) if self.fix_xyz: ref_sites.append(reparametrisation.add_new_site_parameter(i)) if self.fix_u: if scatterers[i].flags.use_u_iso(): ref_u_isos.append( reparametrisation.add_new_thermal_displacement_parameter(i)) else: ref_u_stars.append( reparametrisation.add_new_thermal_displacement_parameter(i)) for g in self.groups[1:]: if len(g) != len(self.groups[0]): raise InvalidConstraint("Group size mismatch") g_scatterers = [] g_u_iso_scatterers =[] g_u_star_scatterers = [] crds = [] for idx, i in enumerate(g): if scatterers[i].flags.use_u_iso() !=\ scatterers[self.groups[0][idx]].flags.use_u_iso(): raise InvalidConstraint("Mixing isotropic and anisotropic parameters") g_scatterers.append(scatterers[i]) crds.append(uc.orthogonalize(scatterers[i].site)) if scatterers[i].flags.use_u_iso(): g_u_iso_scatterers.append(scatterers[i]) else: g_u_star_scatterers.append(scatterers[i]) #need to map reference to target lsf = superpose.least_squares_fit( flex.vec3_double(crds), flex.vec3_double(src_crds)) #create a list of inverted coordinates if needed if len(inv_src_crds) == 0: for i in xrange(0, len(g)): inv_src_crds.append( 2*matrix.col(lsf.other_shift)-matrix.col(src_crds[i])) rm = lsf.r t = matrix.col(lsf.reference_shift)-matrix.col(lsf.other_shift) new_crd = lsf.other_sites_best_fit() d = 0 for i, c in enumerate(new_crd): d += matrix.col(matrix.col(c)-matrix.col(crds[i])).length_sq() lsf = superpose.least_squares_fit( flex.vec3_double(crds), flex.vec3_double(inv_src_crds)) new_crd = lsf.other_sites_best_fit() d_inv = 0 for i, c in enumerate(new_crd): d_inv += matrix.col(matrix.col(c)-matrix.col(crds[i])).length_sq() if d_inv < d: rm = -lsf.r if self.fix_xyz: shifts_and_angles =\ reparametrisation.add(_.independent_small_6_vector_parameter, value=(t[0],t[1],t[2],0,0,0), variable=True) if len(ref_u_stars) > 0: u_star_param = reparametrisation.add( _.same_group_u_star, scatterers=g_u_star_scatterers, u_stars=ref_u_stars, alignment_matrix=rm, shifts_and_angles=shifts_and_angles ) elif len(ref_u_stars) > 0: angles =\ reparametrisation.add(_.independent_small_3_vector_parameter, value=self.angles, variable=True) u_star_param = reparametrisation.add( _.same_group_u_star, scatterers=g_u_star_scatterers, u_stars=ref_u_stars, alignment_matrix=rm, angles=angles ) if self.fix_xyz: site_param = reparametrisation.add( _.same_group_xyz, scatterers=g_scatterers, sites=ref_sites, alignment_matrix=rm, shifts_and_angles=shifts_and_angles ) if len(ref_u_isos) > 0: u_iso_param = reparametrisation.add( _.same_group_u_iso, scatterers=g_u_iso_scatterers, u_isos=ref_u_isos ) site_proxy_index = 0 u_star_proxy_index = 0 u_iso_proxy_index = 0 for i in g: if self.fix_xyz: reparametrisation.asu_scatterer_parameters[i].site = site_param reparametrisation.add_new_same_group_site_proxy_parameter( site_param, site_proxy_index, i) site_proxy_index += 1 if self.fix_u: if scatterers[i].flags.use_u_iso(): reparametrisation.asu_scatterer_parameters[i].u = u_iso_param reparametrisation.shared_Us[i] = reparametrisation.add( _.same_group_u_iso_proxy, parent=u_iso_param, index=u_iso_proxy_index ) u_iso_proxy_index += 1 else: reparametrisation.asu_scatterer_parameters[i].u = u_star_param reparametrisation.shared_Us[i] = reparametrisation.add( _.same_group_u_star_proxy, parent=u_star_param, index=u_star_proxy_index ) u_star_proxy_index += 1
def run(args, command_name="mmtbx.super"): if (len(args) == 0): print "usage: %s fixed.pdb moving.pdb [parameter=value ...]" % command_name return print "#" print "# ", command_name print "#" print "# A lightweight sequence-based structure superposition tool." print "#" print "#" phil_objects = [] argument_interpreter = master_params.command_line_argument_interpreter( home_scope="super") fixed_pdb_file_name = None moving_pdb_file_name = None for arg in args: if (os.path.isfile(arg)): if (fixed_pdb_file_name is None): fixed_pdb_file_name = arg elif (moving_pdb_file_name is None): moving_pdb_file_name = arg else: raise Sorry("Too many file names.") else: try: command_line_params = argument_interpreter.process(arg=arg) except KeyboardInterrupt: raise except Exception: raise Sorry("Unknown file or keyword: %s" % arg) else: phil_objects.append(command_line_params) working_params = master_params.fetch(sources=phil_objects) params = working_params.extract() def raise_missing(what): raise Sorry("""\ Missing file name for %(what)s structure: Please add %(what)s=file_name to the command line to specify the %(what)s structure.""" % vars()) if (fixed_pdb_file_name is None): if (params.super.fixed is None): raise_missing("fixed") else: params.super.fixed = fixed_pdb_file_name if (moving_pdb_file_name is None): if (params.super.moving is None): raise_missing("moving") else: params.super.moving = moving_pdb_file_name print "#Parameters used:" print "#phil __ON__" print working_params = master_params.format(python_object=params) working_params.show() print print "#phil __OFF__" print print "Reading fixed structure:", params.super.fixed fixed_pdb = iotbx.pdb.input(file_name=params.super.fixed) print print "Reading moving structure:", params.super.moving moving_pdb = iotbx.pdb.input(file_name=params.super.moving) print fixed_seq, fixed_sites, fixed_site_flags = extract_sequence_and_sites( pdb_input=fixed_pdb) moving_seq, moving_sites, moving_site_flags = extract_sequence_and_sites( pdb_input=moving_pdb) print "Computing sequence alignment..." align_obj = mmtbx.alignment.align( seq_a=fixed_seq, seq_b=moving_seq, gap_opening_penalty=params.super.gap_opening_penalty, gap_extension_penalty=params.super.gap_extension_penalty, similarity_function=params.super.similarity_matrix, style=params.super.alignment_style) print "done." print alignment = align_obj.extract_alignment() matches = alignment.matches() equal = matches.count("|") similar = matches.count("*") total = len(alignment.a) - alignment.a.count("-") alignment.pretty_print( matches=matches, block_size=50, n_block=1, top_name="fixed", bottom_name="moving", comment="""\ The alignment used in the superposition is shown below. The sequence identity (fraction of | symbols) is %4.1f%% of the aligned length of the fixed molecule sequence. The sequence similarity (fraction of | and * symbols) is %4.1f%% of the aligned length of the fixed molecule sequence. """ % (100.*equal/max(1,total), 100.*(equal+similar)/max(1,total))) fixed_sites_sel = flex.vec3_double() moving_sites_sel = flex.vec3_double() for ia,ib,m in zip(alignment.i_seqs_a, alignment.i_seqs_b, matches): if (m not in ["|", "*"]): continue if (fixed_site_flags[ia] and moving_site_flags[ib]): fixed_sites_sel.append(fixed_sites[ia]) moving_sites_sel.append(moving_sites[ib]) print "Performing least-squares superposition of C-alpha atom pairs:" print " Number of C-alpha atoms pairs in matching residues" print " indicated by | or * above:", fixed_sites_sel.size() if (fixed_sites_sel.size() == 0): raise Sorry("No matching C-alpha atoms.") lsq_fit = superpose.least_squares_fit( reference_sites=fixed_sites_sel, other_sites=moving_sites_sel) rmsd = fixed_sites_sel.rms_difference(lsq_fit.other_sites_best_fit()) print " RMSD between the aligned C-alpha atoms: %.3f" % rmsd print print "Writing moved pdb to file: %s" % params.super.moved pdb_hierarchy = moving_pdb.construct_hierarchy() for atom in pdb_hierarchy.atoms(): atom.xyz = lsq_fit.r * matrix.col(atom.xyz) + lsq_fit.t pdb_hierarchy.write_pdb_file(file_name=params.super.moved, append_end=True) print
def clean_chain_matching(chain_match_list,ph, chain_max_rmsd=10.0, residue_match_radius=4.0): """ Remove all bad matches from chain_match_list Args: ph (object): hierarchy chain_match_list (list): list of [chain_ID_1, chain_ID_2, sel_1, sel_2,res_m/res_c similarity] chain_ID (str), sel_1/2 (list of lists) res_m/res_c (lists): indices of the aligned components similarity (float): similarity between chains chain_max_rmsd (float): limit of rms difference chains residue_match_radius (float): max allow distance difference between pairs of matching atoms of two residues chain_similarity_threshold (float): min similarity between matching chains Returns: match_dict(dict): key:(chains_id_a,chains_id_b) val:[selection_a,selection_b, res_list_a,res_list_b,rot,trans,rmsd] """ # remove all non-matching pairs, where similarity == 0 match_list = [x for x in chain_match_list if x[4] > 0] match_dict = {} # print "match_list", match_list for match in match_list: [ch_a_id,ch_b_id,list_a,list_b,res_list_a,res_list_b,similarity] = match t0 = time() sel_a = make_selection_from_lists(list_a) sel_b = make_selection_from_lists(list_b) other_h = ph.select(sel_a) other_atoms = other_h.atoms() ref_h = ph.select(sel_b) ref_atoms = ref_h.atoms() # # Here we want to flip atom names, even before chain alignment, so # we will get correct chain RMSD # flipped_other_selection = make_flips_if_necessary(ref_h.deep_copy(), other_h.deep_copy()) flipped_other_selection = make_flips_if_necessary_torsion( ref_h.deep_copy(), other_h.deep_copy()) # if flipped_other_selection is not None: other_sites = other_atoms.select(flipped_other_selection).extract_xyz() # else: # other_sites = other_atoms.extract_xyz() ref_sites = ref_atoms.extract_xyz() lsq_fit_obj = superpose.least_squares_fit( reference_sites = ref_sites, other_sites = other_sites) r = lsq_fit_obj.r t = lsq_fit_obj.t # todo: find r_2*A = r*A + t (where the translation is zero) # use B = r*A + t, r_2*A = B , r_2 = B*A.inverse() other_sites_best = lsq_fit_obj.other_sites_best_fit() rmsd = round(ref_sites.rms_difference(other_sites_best),4) # print "chain rmsd after flip:", rmsd if rmsd <= chain_max_rmsd: # get the chains atoms and convert selection to flex bool sel_aa,sel_bb,res_list_a,res_list_b,ref_sites,other_sites_best = \ remove_far_atoms( list_a, list_b, res_list_a,res_list_b, ref_sites,lsq_fit_obj.other_sites_best_fit(), residue_match_radius=residue_match_radius) if sel_a.size() > 0: match_dict[ch_a_id,ch_b_id]=[sel_aa,sel_bb,res_list_a,res_list_b,r,t,rmsd] return match_dict
def __init__(self, pdb_hierarchy, crystal_symmetry, angular_difference_threshold_deg=5., sequence_identity_threshold=90., quiet=False): h = pdb_hierarchy superposition_threshold = 2 * sequence_identity_threshold - 100. n_atoms_all = h.atoms_size() s_str = "altloc ' ' and (protein or nucleotide)" h = h.select(h.atom_selection_cache().selection(s_str)) h1 = iotbx.pdb.hierarchy.root() h1.append_model(h.models()[0].detached_copy()) unit_cell = crystal_symmetry.unit_cell() result = {} if not quiet: print("Find groups of chains related by translational NCS") # double loop over chains to find matching pairs related by pure translation for c1 in h1.chains(): c1.parent().remove_chain(c1) nchains = len(h1.models()[0].chains()) if ([c1.is_protein(), c1.is_na()].count(True) == 0): continue r1 = list(c1.residues()) c1_seq = "".join(c1.as_sequence()) sc_1_tmp = c1.atoms().extract_xyz() h1_p1 = h1.expand_to_p1(crystal_symmetry=crystal_symmetry) for (ii, c2) in enumerate(h1_p1.chains()): orig_c2 = h1.models()[0].chains()[ii % nchains] r2 = list(c2.residues()) c2_seq = "".join(c2.as_sequence()) sites_cart_1, sites_cart_2 = None, None sc_2_tmp = c2.atoms().extract_xyz() # chains are identical if (c1_seq == c2_seq and sc_1_tmp.size() == sc_2_tmp.size()): sites_cart_1 = sc_1_tmp sites_cart_2 = sc_2_tmp p_identity = 100. # chains are not identical, do alignment else: align_obj = mmtbx.alignment.align(seq_a=c1_seq, seq_b=c2_seq) alignment = align_obj.extract_alignment() matches = alignment.matches() equal = matches.count("|") total = len(alignment.a) - alignment.a.count("-") p_identity = 100. * equal / max(1, total) if (p_identity > superposition_threshold): sites_cart_1 = flex.vec3_double() sites_cart_2 = flex.vec3_double() for i1, i2, match in zip(alignment.i_seqs_a, alignment.i_seqs_b, matches): if (i1 is not None and i2 is not None and match == "|"): r1i, r2i = r1[i1], r2[i2] assert r1i.resname == r2i.resname, [ r1i.resname, r2i.resname, i1, i2 ] for a1 in r1i.atoms(): for a2 in r2i.atoms(): if (a1.name == a2.name): sites_cart_1.append(a1.xyz) sites_cart_2.append(a2.xyz) break # superpose two sequence-aligned chains if ([sites_cart_1, sites_cart_2].count(None) == 0): lsq_fit_obj = superpose.least_squares_fit( reference_sites=sites_cart_1, other_sites=sites_cart_2) angle = lsq_fit_obj.r.rotation_angle() t_frac = unit_cell.fractionalize( (sites_cart_1 - sites_cart_2).mean()) t_frac = [math.modf(t)[0] for t in t_frac] # put into [-1,1] radius = flex.sum( flex.sqrt((sites_cart_1 - sites_cart_1.mean() ).dot())) / sites_cart_1.size() * 4. / 3. fracscat = min(c1.atoms_size(), c2.atoms_size()) / n_atoms_all result.setdefault(frozenset([c1, orig_c2]), []).append([ p_identity, [lsq_fit_obj.r, t_frac, angle, radius, fracscat] ]) else: result.setdefault(frozenset([c1, orig_c2]), []).append([p_identity, None]) # Build graph g = graph.adjacency_list() vertex_handle = {} for key in result: seqid = result[key][0][0] sup = min(result[key], key=lambda s: 0 if s[1] is None else s[1][2])[1] result[key] = [seqid, sup] if ((seqid > sequence_identity_threshold) and (sup[2] < angular_difference_threshold_deg)): (c1, c2) = key if (c1 not in vertex_handle): vertex_handle[c1] = g.add_vertex(label=c1) if (c2 not in vertex_handle): vertex_handle[c2] = g.add_vertex(label=c2) g.add_edge(vertex1=vertex_handle[c1], vertex2=vertex_handle[c2]) # Do connected component analysis and compose final tNCS pairs object components = connected_component_algorithm.connected_components(g) import itertools self.ncs_pairs = [] self.tncsresults = [0, "", [], 0.0] for (i, group) in enumerate(components): chains = [g.vertex_label(vertex=v) for v in group] fracscats = [] radii = [] for pair in itertools.combinations(chains, 2): sup = result[frozenset(pair)][1] fracscats.append(sup[-1]) radii.append(sup[-2]) fs = sum(fracscats) / len(fracscats) self.tncsresults[3] = fs # store fracscat in array rad = sum(radii) / len(radii) #import code, traceback; code.interact(local=locals(), banner="".join( traceback.format_stack(limit=10) ) ) maxorder = 1 vectors = [] previous_id = next(itertools.combinations(chains, 2))[0].id for pair in itertools.combinations(chains, 2): sup = result[frozenset(pair)][1] ncs_pair = ext.pair( r=sup[0], t=sup[1], radius=rad, radius_estimate=rad, fracscat=fs, rho_mn=flex.double( ), # rho_mn undefined, needs to be set later id=i) self.ncs_pairs.append(ncs_pair) # show tNCS pairs in group fmt = "group %d chains %s <> %s angle: %4.2f trans.vect.: (%s) fracscat: %5.3f" t = ",".join([("%6.3f" % t_).strip() for t_ in sup[1]]).strip() if not quiet: print(fmt % (i, pair[0].id, pair[1].id, sup[2], t, fs)) if pair[0].id == previous_id: maxorder += 1 orthoxyz = unit_cell.orthogonalize(sup[1]) vectors.append((sup[1], orthoxyz, sup[2])) else: previous_id = pair[0].id maxorder = 1 vectors = [] if maxorder > self.tncsresults[0]: self.tncsresults[0] = maxorder self.tncsresults[1] = previous_id self.tncsresults[2] = vectors if not quiet: print("Largest TNCS order, peptide chain, fracvector, orthvector, angle, fracscat = ", \ str(self.tncsresults))
def run(args, command_name="mmtbx.super"): if len(args) == 0: print "usage: %s fixed.pdb moving.pdb [parameter=value ...]" % command_name return print "#" print "# ", command_name print "#" print "# A lightweight sequence-based structure superposition tool." print "#" print "#" phil_objects = [] argument_interpreter = master_params.command_line_argument_interpreter(home_scope="super") fixed_pdb_file_name = None moving_pdb_file_name = None for arg in args: if os.path.isfile(arg): if fixed_pdb_file_name is None: fixed_pdb_file_name = arg elif moving_pdb_file_name is None: moving_pdb_file_name = arg else: raise Sorry("Too many file names.") else: try: command_line_params = argument_interpreter.process(arg=arg) except KeyboardInterrupt: raise except Exception: raise Sorry("Unknown file or keyword: %s" % arg) else: phil_objects.append(command_line_params) working_params = master_params.fetch(sources=phil_objects) params = working_params.extract() def raise_missing(what): raise Sorry( """\ Missing file name for %(what)s structure: Please add %(what)s=file_name to the command line to specify the %(what)s structure.""" % vars() ) if fixed_pdb_file_name is None: if params.super.fixed is None: raise_missing("fixed") else: params.super.fixed = fixed_pdb_file_name if moving_pdb_file_name is None: if params.super.moving is None: raise_missing("moving") else: params.super.moving = moving_pdb_file_name print "#Parameters used:" print "#phil __ON__" print working_params = master_params.format(python_object=params) working_params.show() print print "#phil __OFF__" print print "Reading fixed structure:", params.super.fixed fixed_pdb = iotbx.pdb.input(file_name=params.super.fixed) print print "Reading moving structure:", params.super.moving moving_pdb = iotbx.pdb.input(file_name=params.super.moving) print fixed_seq, fixed_sites, fixed_site_flags = extract_sequence_and_sites(pdb_input=fixed_pdb) moving_seq, moving_sites, moving_site_flags = extract_sequence_and_sites(pdb_input=moving_pdb) print "Computing sequence alignment..." align_obj = mmtbx.alignment.align( seq_a=fixed_seq, seq_b=moving_seq, gap_opening_penalty=params.super.gap_opening_penalty, gap_extension_penalty=params.super.gap_extension_penalty, similarity_function=params.super.similarity_matrix, style=params.super.alignment_style, ) print "done." print alignment = align_obj.extract_alignment() matches = alignment.matches() equal = matches.count("|") similar = matches.count("*") total = len(alignment.a) - alignment.a.count("-") alignment.pretty_print( matches=matches, block_size=50, n_block=1, top_name="fixed", bottom_name="moving", comment="""\ The alignment used in the superposition is shown below. The sequence identity (fraction of | symbols) is %4.1f%% of the aligned length of the fixed molecule sequence. The sequence similarity (fraction of | and * symbols) is %4.1f%% of the aligned length of the fixed molecule sequence. """ % (100.0 * equal / max(1, total), 100.0 * (equal + similar) / max(1, total)), ) fixed_sites_sel = flex.vec3_double() moving_sites_sel = flex.vec3_double() for ia, ib, m in zip(alignment.i_seqs_a, alignment.i_seqs_b, matches): if m not in ["|", "*"]: continue if fixed_site_flags[ia] and moving_site_flags[ib]: fixed_sites_sel.append(fixed_sites[ia]) moving_sites_sel.append(moving_sites[ib]) print "Performing least-squares superposition of C-alpha atom pairs:" print " Number of C-alpha atoms pairs in matching residues" print " indicated by | or * above:", fixed_sites_sel.size() if fixed_sites_sel.size() == 0: raise Sorry("No matching C-alpha atoms.") lsq_fit = superpose.least_squares_fit(reference_sites=fixed_sites_sel, other_sites=moving_sites_sel) rmsd = fixed_sites_sel.rms_difference(lsq_fit.other_sites_best_fit()) print " RMSD between the aligned C-alpha atoms: %.3f" % rmsd print print "Writing moved pdb to file: %s" % params.super.moved pdb_hierarchy = moving_pdb.construct_hierarchy() for atom in pdb_hierarchy.atoms(): atom.xyz = lsq_fit.r * matrix.col(atom.xyz) + lsq_fit.t pdb_hierarchy.write_pdb_file(file_name=params.super.moved, append_end=True) print
def secondary_structure_from_sequence(pdb_str, sequence=None, pdb_hierarchy_template=None, rotamer_manager=None): """ Return pdb.hierarchy with secondary structure according to sequence or reference hierarcy. If reference hierarchy provided, the resulting hierarchy will be rigid body aligned to it. Residue numbers will start from 1. pdb_str - "ideal" structure at least 2 residues long. sequence - string with sequence (one-letter codes) pdb_hierarchy_template - reference hierarchy. """ if rotamer_manager is None: rotamer_manager = RotamerEval() pht = pdb_hierarchy_template assert [sequence, pht].count(None) == 1 if pht is not None: lk = len(pht.altloc_indices().keys()) if lk ==0: raise Sorry( "Hierarchy template in secondary_structure_from_sequence is empty") else: assert len(pht.altloc_indices().keys()) == 1, \ "Alternative conformations are not supported" number_of_residues = len(sequence) if sequence!=None else \ len(pht.models()[0].chains()[0].conformers()[0].residues()) if number_of_residues<1: raise Sorry('sequence should contain at least one residue.') ideal_res_dict = idealized_aa.residue_dict() real_res_list = None if pht: real_res_list = pht.models()[0].chains()[0].residue_groups() pdb_hierarchy = iotbx.pdb.input(source_info=None, lines=pdb_str).\ construct_hierarchy() truncate_to_poly_gly(pdb_hierarchy) chain = pdb_hierarchy.models()[0].chains()[0] current_gly_ag = chain.residue_groups()[0].atom_groups()[0] new_chain = iotbx.pdb.hierarchy.chain(id="A") new_chain.pre_allocate_residue_groups(number_of_additional_residue_groups=\ number_of_residues) r, t = get_r_t_matrices_from_structure(pdb_str) for j in range(number_of_residues): # put ALA rg = iotbx.pdb.hierarchy.residue_group(icode="") rg.resseq = j+1 new_chain.append_residue_group(residue_group=rg) ag_to_place = current_gly_ag.detached_copy() rg.append_atom_group(atom_group=ag_to_place) current_gly_ag.atoms().set_xyz( r.elems*current_gly_ag.atoms().extract_xyz()+t.elems) current_reference_ag = real_res_list[j].atom_groups()[0] if pht else \ ideal_res_dict[three_one[sequence[j]].lower()].models()[0].chains()[0].\ residue_groups()[0].atom_groups()[0] side_chain_placement(ag_to_place, current_reference_ag, rotamer_manager) new_pdb_h = iotbx.pdb.hierarchy.new_hierarchy_from_chain(new_chain) # align to real if pht != None: fixed_sites, moving_sites = get_matching_sites_cart_in_both_h(pht, new_pdb_h) assert len(fixed_sites) == len(moving_sites) lsq_fit_obj = superpose.least_squares_fit(reference_sites = fixed_sites, other_sites = moving_sites) new_pdb_h.atoms().set_xyz( lsq_fit_obj.r.elems*new_pdb_h.atoms().extract_xyz()+lsq_fit_obj.t.elems) return new_pdb_h
def shortcut_1(hierarchy, chains_info, chain_similarity_threshold, chain_max_rmsd, log, residue_match_radius): """ Checking the case when whole hierarchy was produced by multiplication of molecule with BIOMT or MTRIX matrices (or both). In this case we are expecting to find identical chains with 0 rmsd between them. """ def flatten_list_of_list(lofl): return [x for y in lofl for x in y] assert chains_info is not None assert len(chains_info) > 1 empty_result = class_ncs_restraints_group_list() # new convenience structure: {<n_atoms>:[ch_id, ch_id, ch_id]} n_atom_chain_id_dict = {} for k, v in six.iteritems(chains_info): if v.chains_atom_number not in n_atom_chain_id_dict: n_atom_chain_id_dict[v.chains_atom_number] = [k] else: n_atom_chain_id_dict[v.chains_atom_number].append(k) print("n_atom_chain_id_dict", n_atom_chain_id_dict, file=log) for k, v in six.iteritems(n_atom_chain_id_dict): if len(v) == 1: print("No shortcut, there is a chain with unique number of atoms:", v, file=log) return empty_result # now we starting to check atom names, align chains, check rmsd and # populate result. If at some point we are not satisfied with any measure, # we will return empty result. result = class_ncs_restraints_group_list() for n_atoms, chains_list in six.iteritems(n_atom_chain_id_dict): # this should make one ncs group master_chain_id = chains_list[0] master_iselection = flatten_list_of_list( chains_info[master_chain_id].atom_selection) ncs_gr = NCS_restraint_group( master_iselection=flex.size_t(master_iselection), str_selection="chain '%s'" % master_chain_id) master_xyz = get_chain_xyz(hierarchy, master_chain_id) for copy_chain_id in chains_list[1:]: # these are copies if chains_info[master_chain_id].atom_names != chains_info[ copy_chain_id].atom_names: print("No shortcut, atom names are not identical", file=log) return empty_result copy_iselection = flatten_list_of_list( chains_info[copy_chain_id].atom_selection) copy_xyz = get_chain_xyz(hierarchy, copy_chain_id) lsq_fit_obj = superpose.least_squares_fit(reference_sites=copy_xyz, other_sites=master_xyz) r = lsq_fit_obj.r t = lsq_fit_obj.t rmsd = copy_xyz.rms_difference(lsq_fit_obj.other_sites_best_fit()) print("rmsd", master_chain_id, copy_chain_id, rmsd, file=log) # # XXX should we compare rmsd to chain_max_rmsd to be more relaxed and # process more structures quickly? # if rmsd is None or rmsd > 0.2: print("No shortcut, low rmsd:", rmsd, "for chains", master_chain_id, copy_chain_id, file=log) return empty_result # seems like a good enough copy c = NCS_copy(copy_iselection=flex.size_t(copy_iselection), rot=r, tran=t, str_selection="chain '%s'" % copy_chain_id, rmsd=rmsd) ncs_gr.append_copy(c) result.append(ncs_gr) print("Shortcut complete.", file=log) return result
def __init__(self, pdb_hierarchy, crystal_symmetry, angular_difference_threshold_deg=10., sequence_identity_threshold=90.): h = pdb_hierarchy n_atoms_all = h.atoms_size() s_str = "altloc ' ' and (protein or nucleotide)" h = h.select(h.atom_selection_cache().selection(s_str)) h1 = h.deep_copy() unit_cell = crystal_symmetry.unit_cell() result = [] # double loop over chains to find matching pairs related by pure translation for c1 in h1.chains(): c1.parent().remove_chain(c1) if([c1.is_protein(), c1.is_na()].count(True)==0): continue r1 = list(c1.residues()) c1_seq = "".join(c1.as_sequence()) sc_1_tmp = c1.atoms().extract_xyz() h1_p1 = h1.expand_to_p1(crystal_symmetry=crystal_symmetry) for c2 in h1_p1.chains(): r2 = list(c2.residues()) c2_seq = "".join(c2.as_sequence()) sites_cart_1, sites_cart_2 = None,None sc_2_tmp = c2.atoms().extract_xyz() # chains are identical if(c1_seq==c2_seq and sc_1_tmp.size()==sc_2_tmp.size()): sites_cart_1 = sc_1_tmp sites_cart_2 = sc_2_tmp # chains are not identical, do alignment else: align_obj = mmtbx.alignment.align(seq_a = c1_seq, seq_b = c2_seq) alignment = align_obj.extract_alignment() matches = alignment.matches() equal = matches.count("|") total = len(alignment.a) - alignment.a.count("-") p_identity = 100.*equal/max(1,total) if(p_identity>sequence_identity_threshold): sites_cart_1 = flex.vec3_double() sites_cart_2 = flex.vec3_double() for i1, i2, match in zip(alignment.i_seqs_a, alignment.i_seqs_b, matches): if(i1 is not None and i2 is not None and match=="|"): r1i, r2i = r1[i1], r2[i2] assert r1i.resname==r2i.resname, [r1i.resname,r2i.resname,i1,i2] for a1 in r1i.atoms(): for a2 in r2i.atoms(): if(a1.name == a2.name): sites_cart_1.append(a1.xyz) sites_cart_2.append(a2.xyz) break # superpose two sequence-aligned chains if([sites_cart_1,sites_cart_2].count(None)==0): lsq_fit_obj = superpose.least_squares_fit( reference_sites = sites_cart_1, other_sites = sites_cart_2) angle = lsq_fit_obj.r.rotation_angle() if(angle < angular_difference_threshold_deg): t_frac = unit_cell.fractionalize((sites_cart_1-sites_cart_2).mean()) t_frac = [math.modf(t)[0] for t in t_frac] # put into [-1,1] radius = flex.sum(flex.sqrt((sites_cart_1- sites_cart_1.mean()).dot()))/sites_cart_1.size()*4./3. fracscat = c1.atoms_size()/n_atoms_all result.append([lsq_fit_obj.r, t_frac, angle, radius, fracscat]) # show tNCS group fmt="chains %s <> %s angle: %4.2f trans.vect.: (%s) fracscat: %5.3f" t = ",".join([("%6.3f"%t_).strip() for t_ in t_frac]).strip() print fmt%(c1.id, c2.id, angle, t, fracscat) # compose final tNCS pairs object self.ncs_pairs = [] for _ in result: r, t, angle, rad, fs = _ ncs_pair = ext.pair( r = r, t = t, radius=rad, radius_estimate=rad, fracscat=fs, rho_mn=flex.double()) # rho_mn undefined, needs to be set later self.ncs_pairs.append(ncs_pair)