def exercise_heavy () : from mmtbx.regression import make_fake_anomalous_data from mmtbx.command_line import validate_waters import mmtbx.ions.utils from iotbx.file_reader import any_file file_base = "tst_validate_waters_1" pdb_file = make_fake_anomalous_data.write_pdb_input_cd_cl(file_base=file_base) mtz_file = make_fake_anomalous_data.generate_mtz_file( file_base="tst_validate_waters_1", d_min=1.5, anomalous_scatterers=[ group_args(selection="element CD", fp=-0.29, fdp=2.676), group_args(selection="element CL", fp=0.256, fdp=0.5), ]) pdb_in = any_file(pdb_file) hierarchy = pdb_in.file_object.hierarchy hierarchy, n = mmtbx.ions.utils.anonymize_ions(hierarchy, log=null_out()) hierarchy.write_pdb_file("%s_start.pdb" % file_base, crystal_symmetry=pdb_in.file_object.crystal_symmetry()) args = ["tst_validate_waters_1_start.pdb", "tst_validate_waters_1.mtz", "skip_twin_detection=True"] results = validate_waters.run(args=args, out=null_out()) out = StringIO() results.show(out=out) s = easy_pickle.dumps(results) r2 = easy_pickle.loads(s) out2 = StringIO() r2.show(out=out2) assert not show_diff(out.getvalue(), out2.getvalue()) assert (results.n_bad >= 1) and (results.n_heavy == 2)
def _extract_sequence_chain(self, chain, selection): """Extract the sequence and residue groups of a chain.""" sequence = [] rgs = [] counter = 0 for rg in chain.residue_groups(): good = False for ai in rg.atoms().extract_i_seq(): # print ai, selection[ai] if selection[ai]: good = True break if good: if len(rg.unique_resnames()) == 1: resname = rg.unique_resnames()[0] olc = iotbx.pdb.amino_acid_codes.one_letter_given_three_letter.get(resname, "X") if olc != "X": # proteins only sequence.append(olc) rgs.append(libtbx.group_args(i_seq=counter, rg=rg)) counter += 1 elif iotbx.pdb.common_residue_names_get_class(name=resname.strip()) == "common_rna_dna": # rna/dna sequence.append(resname.strip()) rgs.append(libtbx.group_args(i_seq=counter, rg=rg)) counter += 1 return "".join(sequence), rgs
def simple_align_measures(self) : similar_regions = [] smallest_mean = 5000 self.simple_aligned_regions = [] # for each continuous segment in res 1 compare to each segment it seg 2 # and find similar regions for seg_1 in self.residues_1.segments : measures_1 = self.get_measures(seg_1) for seg_2 in self.residues_2.segments : measures_2 = self.get_measures(seg_2) # sr = find_similar_regions(measures_1,measures_2) similar_regions = cpputils.get_similar_regions(measures_1, measures_2, self.threshold, self.window_size) # similar_regions is a list of objects (made in cpp) having these # attributes: i_1, i_2, mean, window_length. i_1 and i_2 are the stating # indecies in measures_1 and measures_2, respectively, which align. By # extention, i_1 and i_2 are also the stating idecies in seg_ and seg_2. # get similar regions residues (cablam linked_residue objects) for sr in similar_regions : is_1 = [i for i in range(sr.i_1, sr.i_1 + sr.window_length)] is_2 = [i for i in range(sr.i_2, sr.i_2 + sr.window_length)] ar = [] for i in range(len(is_1)) : ar.append((seg_1[is_1[i]], seg_2[is_2[i]])) if self.threshold > 0 : self.simple_aligned_regions.append(group_args(aligned_residues = ar, mean = sr.mean)) else: if sr.mean < smallest_mean : smallest_mean = sr.mean self.simple_aligned_regions = [group_args(aligned_residues = ar, mean = sr.mean)]
def run (args=(), params=None, out=sys.stdout) : if (len(args) > 0) : cmdline = iotbx.phil.process_command_line_with_files( args=args, master_phil=master_phil, pdb_file_def="symmetry_search.file_name", reflection_file_def="symmetry_search.file_name") params = cmdline.work.extract().symmetry_search elif (params is None) : raise Usage("""mmtbx.search_pdb_symmetry [file] [space_group] [unit_cell] Utility for finding similar unit cells deposited in the PDB. """) else : params = params.symmetry_search from mmtbx import pdb_symmetry from iotbx import crystal_symmetry_from_any from cctbx import crystal db = pdb_symmetry.load_db() if (params.file_name is not None) : symm = crystal_symmetry_from_any.extract_from(file_name=params.file_name) if (symm is None) : raise Sorry("The file %s does not include symmetry information." % params.file_name) elif (symm.space_group() is None) or (symm.unit_cell() is None) : raise Sorry("Incomplete symmetry information in %s." % params.file_name) else : symm = crystal.symmetry( unit_cell=params.unit_cell, space_group_info=params.space_group) print >> out, "" print >> out, "Input symmetry:" symm.show_summary() scores = pdb_symmetry.symmetry_search(db, symm, max_rmsd=params.max_rmsd) niggli_cell = symm.niggli_cell().unit_cell().parameters() print >> out, "" print >> out, "Top %d matches (sorted by RMSD):" results = [] for scored in scores[:params.max_hits_to_display] : print >> out, "%s (rmsd = %.3f, volume ratio = %.2f)" % \ (scored.entry.pdb_id, scored.rmsd, scored.volume_ratio) print >> out, " Unit cell: %8.3f %8.3f %8.3f %8.3f %8.3f %8.3f" % \ scored.entry.crystal_symmetry.unit_cell().parameters() print >> out, " Niggli cell: %8.3f %8.3f %8.3f %8.3f %8.3f %8.3f" % \ scored.entry.niggli_cell.unit_cell().parameters() print >> out, " Target cell: %8.3f %8.3f %8.3f %8.3f %8.3f %8.3f" % \ niggli_cell print >> out, "" results.append(group_args( pdb_id=scored.entry.pdb_id, rmsd=scored.rmsd, volume_ratio=scored.volume_ratio, pdb_symmetry=scored.entry.crystal_symmetry)) return group_args( crystal_symmetry=symm, hits=results)
def coord_stats_with_flips (sites1, sites2, atoms) : """ Calculate RMSD and maximum distance for a pair of coordinate arrays, taking into account the symmetric or pseudo-symmetric nature of many sidechains. """ from scitbx.matrix import col assert (len(sites1) == len(sites2) == len(atoms) > 0) rmsd_no_flip = rmsd_flip = None mean_sq = max_deviation_no_flip = 0 n_sites = 0 for site1, site2, atom in zip(sites1, sites2, atoms) : if (atom.element.strip() == "H") : continue distance = abs(col(site1) - col(site2)) mean_sq += distance**2 if (distance > max_deviation_no_flip) : max_deviation_no_flip = distance n_sites += 1 assert (n_sites > 0) rmsd_no_flip = sqrt(mean_sq/n_sites) # TODO add HIS? if (not atoms[0].parent().resname in ["ASP","GLU","ASN","GLN","PHE","TYR"]) : return group_args(rmsd=rmsd_no_flip, max_dev=max_deviation_no_flip) mean_sq = max_deviation_flip = 0 for site1, site2, atom in zip(sites1, sites2, atoms) : if (atom.element.strip() == "H") : continue atom_name = atom.name.strip() labels = atom.fetch_labels() symmetric_name = symmetric_atom_names_dict.get(atom_name, None) if (symmetric_name is not None) : for site1_flip, site2_flip, atom_flip in zip(sites1, sites2, atoms) : labels_flip = atom_flip.fetch_labels() if ((labels_flip.resid() == labels.resid()) and (labels_flip.chain_id == labels.chain_id) and (atom_flip.name.strip() == symmetric_name)) : distance = abs(col(site1) - col(site2_flip)) mean_sq += distance**2 if (distance > max_deviation_flip) : max_deviation_flip = distance break else : # didn't find the symmetry atom, rmsd_flip = float(sys.maxint) max_deviation_flip = float(sys.maxint) break else : distance = abs(col(site1) - col(site2)) mean_sq += distance**2 if (distance > max_deviation_flip) : max_deviation_flip = distance rmsd_flip = sqrt(mean_sq/n_sites) return group_args( rmsd=min(rmsd_no_flip, rmsd_flip), max_dev=min(max_deviation_no_flip, max_deviation_flip))
def eigen_system_default_handler(self, m, suffix): ### def zero(x, e): for i in xrange(len(x)): if(abs(x[i])<e): x[i]=0 return x ### # special case m11,m12,m13, m21,m22,m23, m31,m32,m33 = m.as_flex_double_matrix() if(self.is_zero(m12) and self.is_zero(m13) and self.is_zero(m21) and self.is_zero(m23) and self.is_zero(m31) and self.is_zero(m32)): l_x = matrix.col((1.0, 0.0, 0.0)) l_y = matrix.col((0.0, 1.0, 0.0)) l_z = matrix.col((0.0, 0.0, 1.0)) return group_args(x=l_x, y=l_y, z=l_z, vals=zero([m11,m22,m33], self.eps)) # es = eigensystem.real_symmetric(m.as_sym_mat3()) vals, vecs = es.values(), es.vectors() print >> self.log, " eigen values (%s):"%suffix, " ".join([self.ff%i for i in vals]) print >> self.log, " eigen vectors (%s):"%suffix, " ".join([self.ff%i for i in vecs]) assert vals[0]>=vals[1]>=vals[2] ### vals = zero(vals, self.eps) vecs = zero(vecs, self.eps) ### # case 1: all different if(abs(vals[0]-vals[1])>=self.eps and abs(vals[1]-vals[2])>=self.eps and abs(vals[0]-vals[2])>=self.eps): l_z = matrix.col((vecs[0], vecs[1], vecs[2])) l_y = matrix.col((vecs[3], vecs[4], vecs[5])) l_x = l_y.cross(l_z) vals = [vals[2], vals[1], vals[0]] # case 2: all three coincide elif((abs(vals[0]-vals[1])<self.eps and abs(vals[1]-vals[2])<self.eps and abs(vals[0]-vals[2])<self.eps)): print >> self.log, " three eigenvalues are equal: make eigenvectors unit." l_x = matrix.col((1, 0, 0)) l_y = matrix.col((0, 1, 0)) l_z = matrix.col((0, 0, 1)) elif([abs(vals[0]-vals[1])<self.eps, abs(vals[1]-vals[2])<self.eps, abs(vals[0]-vals[2])<self.eps].count(True)==1): print >> self.log, " two eigenvalues are equal." # l_z = matrix.col((vecs[0], vecs[1], vecs[2])) l_y = matrix.col((vecs[3], vecs[4], vecs[5])) l_x = l_y.cross(l_z) vals = [vals[2], vals[1], vals[0]] return group_args(x=l_x, y=l_y, z=l_z, vals=vals)
def get_module_tests (module_name, valgrind=False) : dist_path = libtbx.env.dist_path(module_name) if (dist_path is None) : raise Sorry("'%s' is not a valid CCTBX module." % module_name) elif (not os.path.isfile(os.path.join(dist_path, "run_tests.py"))) : raise Sorry("%s/run_tests.py does not exist." % module_name) tst_list = import_python_object( import_path="%s.run_tests.tst_list" % module_name, error_prefix="", target_must_be="", where_str="").object assert (isinstance(tst_list, tuple) or isinstance(tst_list, list)) build_path = libtbx.env.under_build(module_name) assert (build_path is not None) and (dist_path is not None) commands = [] co = group_args( verbose=False, quick=True, valgrind=valgrind) for cmd in test_utils.iter_tests_cmd( co=co, build_dir=build_path, dist_dir=dist_path, tst_list=tst_list) : commands.append(cmd) return commands
def step_h(self, V_L, b_o): """ Three uncorrelated translations. """ print_step("Step h:", self.log) V_M = b_o.R_PL * V_L * b_o.R_PL.transpose() self.show_matrix(x=V_M, title="V_M ") es = self.eigen_system_default_handler(m=V_M) v_x, v_y, v_z = es.x, es.y, es.z lam_u,lam_v,lam_w = es.vals self.show_vector(x=v_x, title="v_x") self.show_vector(x=v_y, title="v_y") self.show_vector(x=v_z, title="v_z") assert approx_equal(v_x.dot(v_y), 0) assert approx_equal(v_y.dot(v_z), 0) assert approx_equal(v_z.dot(v_x), 0) R_MV = matrix.sqr([ v_x[0], v_y[0], v_z[0], v_x[1], v_y[1], v_z[1], v_x[2], v_y[2], v_z[2]]) self.show_matrix(x=R_MV, title="R_MV") V_V = matrix.sym(sym_mat3=[lam_u, lam_v, lam_w, 0,0,0]) self.show_matrix(x=V_V, title="V_V") assert approx_equal(V_V, R_MV.transpose() * V_M * R_MV) # formula (20) return group_args( v_x = v_x, v_y = v_y, v_z = v_z, V_M = V_M, V_V = V_V, R_MV = R_MV)
def step_b(self, T_p, L_p, S_p): """ Principal libration axes and transition to L-base. """ print_step("Step b:", self.log) es = self.eigen_system_default_handler(m=L_p) l_x, l_y, l_z = es.x, es.y, es.z self.show_vector(x=l_x, title="l_x") self.show_vector(x=l_y, title="l_y") self.show_vector(x=l_z, title="l_z") R_PL = matrix.sqr( [l_x[0], l_y[0], l_z[0], l_x[1], l_y[1], l_z[1], l_x[2], l_y[2], l_z[2]]) self.show_matrix(x=R_PL, title="rotation matrix R_PL") assert approx_equal(R_PL.transpose(), R_PL.inverse()) T_L = R_PL.transpose()*T_p*R_PL L_L = R_PL.transpose()*L_p*R_PL S_L = R_PL.transpose()*S_p*R_PL self.show_matrix(x=T_L, title="T_L") self.show_matrix(x=L_L, title="L_L") self.show_matrix(x=S_L, title="S_L") return group_args( l_x = l_x, l_y = l_y, l_z = l_z, T_L = T_L, L_L = L_L, S_L = S_L, R_PL = R_PL)
def get_map_stats_for_atoms (self, atoms) : from cctbx import maptbx from scitbx.array_family import flex sites_cart = flex.vec3_double() sites_cart_nonH = flex.vec3_double() values_2fofc = flex.double() values_fofc = flex.double() for atom in atoms : sites_cart.append(atom.xyz) if (not atom.element.strip() in ["H","D"]) : #XXX trap: neutrons? sites_cart_nonH.append(atom.xyz) site_frac = self.unit_cell.fractionalize(atom.xyz) values_2fofc.append(self.f_map.eight_point_interpolation(site_frac)) values_fofc.append(self.diff_map.eight_point_interpolation(site_frac)) if (len(sites_cart_nonH) == 0) : return None sel = maptbx.grid_indices_around_sites( unit_cell=self.unit_cell, fft_n_real=self.f_map.focus(), fft_m_real=self.f_map.all(), sites_cart=sites_cart, site_radii=get_atom_radii(atoms, self.atom_radius)) f_map_sel = self.f_map.select(sel) model_map_sel = self.model_map.select(sel) diff_map_sel = self.diff_map.select(sel) cc = flex.linear_correlation(x=f_map_sel, y=model_map_sel).coefficient() return group_args(cc=cc, mean_2fofc=flex.mean(values_2fofc), mean_fofc=flex.mean(values_fofc))
def get_grm(file_name): from mmtbx import monomer_library import mmtbx.monomer_library.server import mmtbx.monomer_library.pdb_interpretation params = monomer_library.pdb_interpretation.master_params.extract() params.use_neutron_distances = True params.restraints_library.cdl = False processed_pdb_file = monomer_library.pdb_interpretation.process( mon_lib_srv = monomer_library.server.server(), ener_lib = monomer_library.server.ener_lib( use_neutron_distances=True), file_name = file_name, params = params, force_symmetry = True) xray_structure = processed_pdb_file.xray_structure() sctr_keys = \ xray_structure.scattering_type_registry().type_count_dict().keys() has_hd = "H" in sctr_keys or "D" in sctr_keys geometry = processed_pdb_file.geometry_restraints_manager( show_energies = False, assume_hydrogens_all_missing = not has_hd, plain_pairs_radius = 5.0) restraints_manager = mmtbx.restraints.manager( geometry = geometry, normalization = False) return group_args( restraints_manager = restraints_manager, pdb_hierarchy = processed_pdb_file.all_chain_proxies.pdb_hierarchy)
def callback_wrapper (self, message, data, accumulate=True, cached=True) : if cached : self.callback_other(data=group_args( message=message, data=data, accumulate=accumulate, cached=cached))
def get_sites_cc (self, atoms, sites=None) : from cctbx import maptbx from scitbx.array_family import flex radii = flex.double() for atom in atoms : if (atom.element.strip() in ["H", "D"]) : radii.append(1.) else : radii.append(1.5) fcalc_map = self.fcalc_real_map if (sites is None) : sites = atoms.extract_xyz() else : fcalc_map = self.get_new_fcalc_map( sites_new=sites, i_seqs=atoms.extract_i_seq()) sel = maptbx.grid_indices_around_sites( unit_cell = self.unit_cell, fft_n_real = self.n_real, fft_m_real = self.m_real, sites_cart = sites, site_radii = radii) m1 = self.real_map.select(sel) m2 = fcalc_map.select(sel) cc = flex.linear_correlation(x=m1, y=m2).coefficient() return group_args( cc=cc, map_mean=flex.mean(m1.as_1d()))
def show_model_vs_data(fmodel): d_max, d_min = fmodel.f_obs().d_max_min() flags_pc = fmodel.r_free_flags().data().count(True)*100./\ fmodel.r_free_flags().data().size() if(flags_pc == 0): r_free = None else: r_free = fmodel.r_free() sc = None mm = getattr(fmodel, "mask_manager", None) if (mm is not None): sc = mm.solvent_content_via_mask r_work_outer_shell = r_free_outer_shell = None if (type(fmodel).__name__ != "twin_model_manager") : f_obs_work_copy = fmodel.f_obs_work().customized_copy() f_obs_work_copy.setup_binner(n_bins=10) bin_selection_work = f_obs_work_copy.binner().selection(10) f_obs_free_copy = fmodel.f_obs_free().customized_copy() f_obs_free_copy.setup_binner(n_bins=10) bin_selection_free = f_obs_free_copy.binner().selection(10) r_work_outer_shell = fmodel.r_work(selection=bin_selection_work) r_free_outer_shell = fmodel.r_free(selection=bin_selection_free) return group_args( r_work = fmodel.r_work(), r_free = r_free, r_work_outer_shell = r_work_outer_shell, r_free_outer_shell = r_free_outer_shell, solvent_content_via_mask = sc)
def extract_f_model_core_constants(cif_block): k_sol = _float_or_None(cif_block.get('_refine.solvent_model_param_ksol')) b_sol = _float_or_None(cif_block.get('_refine.solvent_model_param_bsol')) b_cart = [_float_or_None(cif_block.get('_refine.aniso_B[%s][%s]' %(i, j))) for i, j in ('11', '22', '33', '12', '13', '23')] assert b_cart.count(None) in (0, 6) r_solv = _float_or_None(cif_block.get('_refine.pdbx_solvent_vdw_probe_radii')) r_shrink = _float_or_None(cif_block.get('_refine.pdbx_solvent_shrinkage_radii')) r_work = _float_or_None(cif_block.get('_refine.ls_R_factor_R_work')) r_free = _float_or_None(cif_block.get('_refine.ls_R_factor_R_free')) # TODO: extract these from the CIF? twin_fraction = None twin_law = None grid_step_factor = None return group_args( k_sol = k_sol, b_sol = b_sol, b_cart = b_cart, twin_fraction = twin_fraction, twin_law = twin_law, r_solv = r_solv, r_shrink = r_shrink, grid_step_factor = grid_step_factor, r_work = r_work, r_free = r_free)
def extract_residues(self, model_i, number_previous_scatters, combine = True): result = [] model = self.pdb_hierarchy.models()[model_i] rm = [] for chain in model.chains(): for rg in chain.residue_groups(): rg_i_seqs = [] r_name = None for ag in rg.atom_groups(): if(r_name is None): r_name = ag.resname for atom in ag.atoms(): if(self.selection[atom.i_seq - number_previous_scatters]): rg_i_seqs.append(atom.i_seq - number_previous_scatters) if(len(rg_i_seqs) != 0): rm.append(group_args( selection = flex.size_t(rg_i_seqs), name = r_name, model_id = model_i, resid = rg.resid(), chain_id = chain.id)) result.append(rm) if(combine): r0 = result[0] for r in result[1:]: for i, ri in enumerate(r): r0[i].selection.extend(ri.selection) assert r0[i].name == ri.name else: r0 = result[0] for r in result[1:]: r0.extend(r) return r0
def stats_single_image(imageset, reflections, i=None, resolution_analysis=True, plot=False): reflections = map_to_reciprocal_space(reflections, imageset) if plot and i is not None: filename = "i_over_sigi_vs_resolution_%d.png" %(i+1) hist_filename = "spot_count_vs_resolution_%d.png" %(i+1) extra_filename = "log_sum_i_sigi_vs_resolution_%d.png" %(i+1) distl_method_1_filename = "distl_method_1_%d.png" %(i+1) distl_method_2_filename = "distl_method_2_%d.png" %(i+1) else: filename = None hist_filename = None extra_filename = None distl_method_1_filename = None distl_method_2_filename = None d_star_sq = flex.pow2(reflections['rlp'].norms()) d_spacings = uctbx.d_star_sq_as_d(d_star_sq) #plot_ordered_d_star_sq(reflections, imageset) reflections_all = reflections ice_sel = ice_rings_selection(reflections_all) reflections_no_ice = reflections_all.select(~ice_sel) n_spots_total = len(reflections_all) n_spots_no_ice = len(reflections_no_ice) n_spot_4A = (d_spacings > 4).count(True) intensities = reflections_no_ice['intensity.sum.value'] total_intensity = flex.sum(intensities) #print i if hist_filename is not None: resolution_histogram( reflections, imageset, plot_filename=hist_filename) if extra_filename is not None: log_sum_i_sigi_vs_resolution( reflections, imageset, plot_filename=extra_filename) if resolution_analysis and n_spots_no_ice > 10: estimated_d_min = estimate_resolution_limit( reflections_all, imageset, ice_sel=ice_sel, plot_filename=filename) d_min_distl_method_1, noisiness_method_1 \ = estimate_resolution_limit_distl_method1( reflections_all, imageset, ice_sel, plot_filename=distl_method_1_filename) d_min_distl_method_2, noisiness_method_2 = \ estimate_resolution_limit_distl_method2( reflections_all, imageset, ice_sel, plot_filename=distl_method_2_filename) else: estimated_d_min = -1.0 d_min_distl_method_1 = -1.0 noisiness_method_1 = -1.0 d_min_distl_method_2 = -1.0 noisiness_method_2 = -1.0 return group_args(n_spots_total=n_spots_total, n_spots_no_ice=n_spots_no_ice, n_spots_4A=n_spot_4A, total_intensity=total_intensity, estimated_d_min=estimated_d_min, d_min_distl_method_1=d_min_distl_method_1, noisiness_method_1=noisiness_method_1, d_min_distl_method_2=d_min_distl_method_2, noisiness_method_2=noisiness_method_2)
def init_result(): return group_args( k_mask_bin_orig = None, k_mask_bin_smooth = None, k_mask = None, k_isotropic = None, k_mask_fit_params = None)
def apply_back_trace_of_overall_exp_scale_matrix(self, xray_structure=None): k,b=self.overall_isotropic_kb_estimate() k_total = self.core.k_isotropic * self.core.k_anisotropic * \ self.core.k_isotropic_exp k,b,r = mmtbx.bulk_solvent.fit_k_exp_b_to_k_total(k_total, self.ss, k, b) if(r<0.7): self.k_exp_overall,self.b_exp_overall = k,b if(xray_structure is None): return None b_adj = 0 if([self.k_exp_overall,self.b_exp_overall].count(None)==0 and k != 0): bs1 = xray_structure.extract_u_iso_or_u_equiv()*adptbx.u_as_b(1.) def split(b_trace, xray_structure): b_min = xray_structure.min_u_cart_eigenvalue()*adptbx.u_as_b(1.) b_res = min(0, b_min + b_trace+1.e-6) b_adj = b_trace-b_res xray_structure.shift_us(b_shift = b_adj) return b_adj, b_res b_adj,b_res=split(b_trace=self.b_exp_overall,xray_structure=xray_structure) k_new = self.k_exp_overall*flex.exp(-self.ss*b_adj) bs2 = xray_structure.extract_u_iso_or_u_equiv()*adptbx.u_as_b(1.) diff = bs2-bs1 assert approx_equal(flex.min(diff), flex.max(diff)) assert approx_equal(flex.max(diff), b_adj) self.core = self.core.update( k_isotropic = self.core.k_isotropic, k_isotropic_exp = self.core.k_isotropic_exp/k_new, k_masks = [m*flex.exp(-self.ss*b_adj) for m in self.core.k_masks]) return group_args( xray_structure = xray_structure, k_isotropic = self.k_isotropic(), k_anisotropic = self.k_anisotropic(), k_mask = self.k_masks(), b_adj = b_adj)
def parse_database (file_name) : from cctbx import crystal from cctbx import sgtbx from cctbx import uctbx db = [] lines = open(file_name).readlines() process = False for line in lines : line = line.strip() if (process) : fields = line.split() pdb_id = fields[0] try : uc = uctbx.unit_cell([ float(x.replace(",","")) for x in fields[2:8] ]) except RuntimeError, e : print "Unit cell error:" print line continue try : sg = sgtbx.space_group_info(" ".join(fields[8:-1])) except RuntimeError, e : print "Unrecognized space group:" print line continue try : symm = crystal.symmetry(unit_cell=uc, space_group_info=sg) except AssertionError : print "Incompatible unit cell parameters:" print line continue niggli_symm = symm.niggli_cell() db.append(group_args( pdb_id=pdb_id, crystal_symmetry=symm, niggli_cell=niggli_symm))
def finalize(self): return group_args( # Libration rms around L-axes dx = math.sqrt(truncate(self.Lxx)), dy = math.sqrt(truncate(self.Lyy)), dz = math.sqrt(truncate(self.Lzz)), # Unit vectors defining three Libration axes l_x = self.l_x, l_y = self.l_y, l_z = self.l_z, # Rotation axes pass through the points in the L base w_L_lx = self.w.w_lx, w_L_ly = self.w.w_ly, w_L_lz = self.w.w_lz, # Rotation axes pass through the points in the M base w_M_lx = self.R_ML*self.w.w_lx, w_M_ly = self.R_ML*self.w.w_ly, w_M_lz = self.R_ML*self.w.w_lz, # Correlation shifts sx,sy,sz for libration sx = self.sx, sy = self.sy, sz = self.sz, # Vectors defining three Vibration axes v_x_M = self.v_x_M, v_y_M = self.v_y_M, v_z_M = self.v_z_M, v_x = self.v_x, v_y = self.v_y, v_z = self.v_z, # Vibration rms along V-axes tx = self.tx, ty = self.ty, tz = self.tz)
def get_module_tests (module_name, valgrind=False) : dist_path = libtbx.env.dist_path(module_name) if (dist_path is None) : raise Sorry("'%s' is not a valid CCTBX module." % module_name) # XXX don't check for file name, because import conventions differ among # derived modules - dist_path can be either the sys.path entry or the actual # module contents. If the file is missing the import below will fail, which # is okay for testing purposes. tst_list = import_python_object( import_path="%s.run_tests.tst_list" % module_name, error_prefix="", target_must_be="", where_str="").object assert (isinstance(tst_list, tuple) or isinstance(tst_list, list)) build_path = libtbx.env.under_build(module_name) assert (build_path is not None) and (dist_path is not None) commands = [] co = group_args( verbose=False, quick=True, valgrind=valgrind) for cmd in test_utils.iter_tests_cmd( co=co, build_dir=build_path, dist_dir=dist_path, tst_list=tst_list) : commands.append(cmd) return commands
def show_short(self, out=None, silent=False): if out is None: out = sys.stdout if not silent: print >> out if not silent: print >> out, "SigmaA vs Resolution" if not silent: print >> out, "--------------------" if not silent: print >> out, "1/d^3 d sum weights sigmaA" resolution = [] sigmaa = [] for h, sa in zip(self.h_array, self.sigmaa_array): if h == 0: d = " " * 7 else: d = "%7.4f" % (1.0 / h) ** (1 / 3) if not silent: print >> out, "%s %7.4f" % (d, sa) resolution.append(d) sigmaa.append(sa) if not silent: print >> out if not silent: print >> out return group_args(resolution=resolution, sigmaa=sigmaa)
def show_xray_structure_statistics(xray_structure, atom_selections, hd_sel = None): result = group_args( all = None, macromolecule = None, sidechain = None, solvent = None, ligand = None, backbone = None) if(hd_sel is not None): xray_structure = xray_structure.select(~hd_sel) for key in atom_selections.__dict__.keys(): value = atom_selections.__dict__[key] if(value.count(True) > 0): if(hd_sel is not None): value = value.select(~hd_sel) xrs = xray_structure.select(value) atom_counts = xrs.scattering_types_counts_and_occupancy_sums() atom_counts_strs = [] for ac in atom_counts: atom_counts_strs.append("%s:%s:%s"%(ac.scattering_type,str(ac.count), str("%10.2f"%ac.occupancy_sum).strip())) atom_counts_str = " ".join(atom_counts_strs) b_isos = xrs.extract_u_iso_or_u_equiv() n_aniso = xrs.use_u_aniso().count(True) n_not_positive_definite = xrs.is_positive_definite_u().count(False) b_mean = format_value("%-6.1f",adptbx.u_as_b(flex.mean(b_isos))) b_min = format_value("%-6.1f",adptbx.u_as_b(flex.min(b_isos))) b_max = format_value("%-6.1f",adptbx.u_as_b(flex.max(b_isos))) n_atoms = format_value("%-8d",xrs.scatterers().size()).strip() n_npd = format_value("%-8s",n_not_positive_definite).strip() occ = xrs.scatterers().extract_occupancies() o_mean = format_value("%-6.2f",flex.mean(occ)).strip() o_min = format_value("%-6.2f",flex.min(occ)).strip() o_max = format_value("%-6.2f",flex.max(occ)).strip() tmp_result = group_args( n_atoms = n_atoms, atom_counts_str = atom_counts_str, b_min = b_min, b_max = b_max, b_mean = b_mean, o_min = o_min, o_max = o_max, o_mean = o_mean, n_aniso = n_aniso, n_npd = n_npd) setattr(result,key,tmp_result) return result
def collect_sidechain_chi_angles (pdb_hierarchy, atom_selection=None) : angle_lookup = SidechainAngles(False) residue_chis = [] if atom_selection is not None: if (isinstance(atom_selection, flex.bool)): actual_selection = atom_selection elif (isinstance(atom_selection, flex.size_t)): actual_selection = flex.bool(pdb_hierarchy.atoms_size(), False) actual_selection.set_selected(atom_selection, True) if atom_selection is None: actual_selection = flex.bool(pdb_hierarchy.atoms_size(), True) for model in pdb_hierarchy.models() : for chain in model.chains() : for conformer in chain.conformers() : for residue in conformer.residues() : n_chi = angle_lookup.chisPerAA.get(residue.resname.lower(), 0) try : n_chi = int(n_chi) except ValueError : continue chis = [] altloc = residue.atoms()[0].fetch_labels().altloc i_seqs = [] for i in range(1, n_chi+1) : atoms = angle_lookup.extract_chi_atoms("chi%d" % i, residue) if atoms is None: pass else : i_seqs = [ atom.i_seq for atom in atoms ] chis.append(group_args(chi_id=i, i_seqs=i_seqs)) atoms_in_selection = True for i_seq in i_seqs: if not actual_selection[i_seq]: atoms_in_selection = False break if len(chis) > 0 and atoms_in_selection: residue_info = group_args( residue_name=residue.resname, chain_id=chain.id, altloc=altloc, resid=residue.resid(), chis=chis) residue_chis.append(residue_info) return residue_chis
def grid(sphere, gap, overlap): c = flex.double(sphere.center) x_start, y_start, z_start = c - float(sphere.radius) x_end, y_end, z_end = c + float(sphere.radius) x_range = frange(c[0], c[0]+gap, overlap) y_range = frange(c[1], c[1]+gap, overlap) z_range = frange(c[2], c[2]+gap, overlap) return group_args(x_range = x_range, y_range = y_range, z_range = z_range)
def get_pdb_fields (atom_group) : residue_group = atom_group.parent() chain = residue_group.parent() return group_args( resname=atom_group.resname, chain_id=chain.id, resseq=residue_group.resseq_as_int(), icode=residue_group.icode)
def exercise(i_pdb, pdb_for_map, rotamer_manager, sin_cos_table, d_min = 1.5, resolution_factor = 0.1): # Best fitting residue is a rotamer outlier (PHE 407), two scenarious: # - outlier fits density perfectly # - outlier fits not so good. # No better options to fit other than keep the outlier unchanged. # # answer PDB pdb_inp = iotbx.pdb.input(source_info=None, lines=pdb_answer) pdb_inp.write_pdb_file(file_name = "answer.pdb") xrs_answer = pdb_inp.xray_structure_simple() # answer map pdb_inp = iotbx.pdb.input(source_info=None, lines=pdb_for_map) pdb_inp.write_pdb_file(file_name = "for_map.pdb") xrs_map = pdb_inp.xray_structure_simple() f_calc = xrs_map.structure_factors(d_min = d_min).f_calc() fft_map = f_calc.fft_map(resolution_factor=resolution_factor) fft_map.apply_sigma_scaling() target_map = fft_map.real_map_unpadded() mtz_dataset = f_calc.as_mtz_dataset(column_root_label = "FCmap") mtz_object = mtz_dataset.mtz_object() mtz_object.write(file_name = "answer_%s.mtz"%str(i_pdb)) # poor mon_lib_srv = monomer_library.server.server() processed_pdb_file = monomer_library.pdb_interpretation.process( mon_lib_srv = mon_lib_srv, ener_lib = monomer_library.server.ener_lib(), raw_records = flex.std_string(pdb_poor.splitlines()), strict_conflict_handling = True, force_symmetry = True, log = None) pdb_hierarchy_poor = processed_pdb_file.all_chain_proxies.pdb_hierarchy xrs_poor = processed_pdb_file.xray_structure() sites_cart_poor = xrs_poor.sites_cart() pdb_hierarchy_poor.write_pdb_file(file_name = "poor.pdb") # target_map_object = group_args( data = target_map, f_map_diff = None, miller_array = f_calc, crystal_gridding = fft_map) grm = mmtbx.restraints.manager( geometry=processed_pdb_file.geometry_restraints_manager(show_energies=False), normalization = True) sm = mmtbx.refinement.real_space.structure_monitor( pdb_hierarchy = pdb_hierarchy_poor, xray_structure = xrs_poor, target_map_object = target_map_object, geometry_restraints_manager = grm.geometry) result = mmtbx.refinement.real_space.fit_residues.manager( structure_monitor = sm, rotamer_manager = rotamer_manager, sin_cos_table = sin_cos_table, mon_lib_srv = mon_lib_srv) # sm.pdb_hierarchy.write_pdb_file(file_name = "refined_%s.pdb"%str(i_pdb)) dist = xrs_answer.mean_distance(other = sm.xray_structure) assert dist < 0.3, dist
def add_ramalyze_result(self, result): # print dir(result);exit() self.ramalyze = group_args( is_outlier=result.is_outlier(), type=result.ramalyze_type(), phi=result.phi, psi=result.psi, score=result.score, )
def get_pdb_inputs(pdb_str): raw_records = flex.std_string(pdb_str.splitlines()) processed_pdb_file = rs.get_processed_pdb_object(raw_records=raw_records, rama_potential=None, log=None) xrs = processed_pdb_file.xray_structure(show_summary=False) geometry_restraints_manager = rs.get_geometry_restraints_manager( processed_pdb_file=processed_pdb_file, xray_structure=xrs ) pdb_hierarchy = processed_pdb_file.all_chain_proxies.pdb_hierarchy return group_args(ph=pdb_hierarchy, grm=geometry_restraints_manager, xrs=xrs)
def __init__( self, xray_structure, step, volume_cutoff=None, mean_diff_map_threshold=None, compute_whole=False, largest_only=False, wrapping=True, # should be False if working with ASU f_obs=None, r_sol=1.1, r_shrink=0.9, f_calc=None, log=None, write_masks=False): adopt_init_args(self, locals()) # self.d_spacings = f_obs.d_spacings().data() self.sel_gte3 = self.d_spacings >= 3 self.miller_array = f_obs.select(self.sel_gte3) # self.crystal_symmetry = self.xray_structure.crystal_symmetry() # Compute mask in p1 (via ASU) self.crystal_gridding = maptbx.crystal_gridding( unit_cell=xray_structure.unit_cell(), space_group_info=xray_structure.space_group_info(), symmetry_flags=maptbx.use_space_group_symmetry, step=step) self.n_real = self.crystal_gridding.n_real() # XXX Where do we want to deal with H and occ==0? self._mask_p1 = self._compute_mask_in_p1() self.solvent_content = 100.*(self._mask_p1 != 0).count(True)/\ self._mask_p1.size() # Optionally compute Fmask from original whole mask, zero-ed at dmin<3A. self.f_mask_whole = self._compute_f_mask_whole() # Connectivity analysis co = maptbx.connectivity(map_data=self._mask_p1, threshold=0.01, preprocess_against_shallow=False, wrapping=wrapping) if (xray_structure.space_group().type().number() != 1): # not P1 co.merge_symmetry_related_regions( space_group=xray_structure.space_group()) # self.conn = co.result().as_double() z = zip(co.regions(), range(0, co.regions().size())) sorted_by_volume = sorted(z, key=lambda x: x[0], reverse=True) # f_mask_data_0 = flex.complex_double(f_obs.data().size(), 0) self.f_mask_0 = None self.FV = OrderedDict() self.mFoDFc_0 = None diff_map = None # mFo-DFc map computed using F_mask_0 (main mask) self.regions = OrderedDict() small_selection = None weak_selection = None # if (log is not None): print(" # volume_p1 uc(%) mFo-DFc: min,max,mean,sd", file=log) # for i_seq, p in enumerate(sorted_by_volume): v, i = p self._region_i_selection = None # must be here inside the loop! f_mask_i = None # must be here inside the loop! # skip macromolecule if (i == 0): continue # skip small volume and accumulate small volumes volume = v * step**3 uc_fraction = v * 100. / self.conn.size() if (volume_cutoff is not None and volume < volume_cutoff): if (volume >= 10): if (small_selection is None): small_selection = self._get_region_i_selection(i) else: small_selection |= self._get_region_i_selection(i) continue # Accumulate regions with volume greater than volume_cutoff (if # volume_cutoff is defined). Weak density regions are included. self.regions[i_seq] = group_args(id=i, i_seq=i_seq, volume=volume, uc_fraction=uc_fraction) # Compute i-th region mask mask_i_asu = self.compute_i_mask_asu( selection=self._get_region_i_selection(i), volume=volume) # Compute F_mask_0 (F_mask for main mask) if (uc_fraction >= 1): f_mask_i = self.compute_f_mask_i(mask_i_asu) f_mask_data_0 += f_mask_i.data() elif (largest_only): break # Compute mFo-DFc map using main mask (once done computing main mask!) if (uc_fraction < 1 and diff_map is None): diff_map = self.compute_diff_map(f_mask_data_0=f_mask_data_0) # Analyze mFo-DFc map in the i-th region mi, ma, me, sd = None, None, None, None if (diff_map is not None): iselection = self._get_region_i_selection(i).iselection() blob = diff_map.select(iselection) mean_diff_map = flex.mean(diff_map.select(iselection)) mi, ma, me = flex.min(blob), flex.max(blob), flex.mean(blob) sd = blob.sample_standard_deviation() if (log is not None): print("%3d" % i_seq, "%12.3f" % volume, "%8.4f" % round(uc_fraction, 4), "%7.3f %7.3f %7.3f %7.3f" % (mi, ma, me, sd), file=log) # Accumulate regions with weak density into one region, then skip if (mean_diff_map_threshold is not None): if (mean_diff_map <= mean_diff_map_threshold): if (mean_diff_map > 0.1): if (weak_selection is None): weak_selection = self._get_region_i_selection( i) else: weak_selection |= self._get_region_i_selection( i) continue else: if (log is not None): print("%3d" % i_seq, "%12.3f" % volume, "%8.4f" % round(uc_fraction, 4), "%7s" % str(None), file=log) # Compute F_maks for i-th region if (f_mask_i is None): f_mask_i = self.compute_f_mask_i(mask_i_asu) # Compose result object self.FV[f_mask_i] = [round(volume, 3), round(uc_fraction, 1)] # # Determine number of secondary regions. Must happen here! # Preliminarily if need to do mosaic. self.n_regions = len(self.FV.values()) self.do_mosaic = False if (self.n_regions > 1 and flex.max(self.d_spacings) > 6): self.do_mosaic = True # Add aggregated small regions (if present) self._add_from_aggregated(selection=small_selection, diff_map=diff_map) # Add aggregated weak map regions (if present) self._add_from_aggregated(selection=weak_selection, diff_map=diff_map) # Finalize main Fmask self.f_mask_0 = f_obs.customized_copy(data=f_mask_data_0) # Delete bulk whole mask from memory del self._mask_p1
def analyze_hd_sites(self): sites_different_xyz = [] sites_different_b = [] sites_sum_occ_not_1 = [] sites_occ_sum_no_scattering = [] rotatable_hd_selection = self.model.rotatable_hd_selection() eps_xyz = 0.001 eps_b = 0.01 delta_occ_sum = 0.001 occ_h_zero_scattering = 0.64 # value for which sum occ H and D is zero eps_occ_zero_scatt = 0.05 # For rotatable H, H and D may be at different positions # However, when they are close to each other, cancellation may occur # Introduce max distance, corresponds to approx. 45 deg between O-D and O-H max_distance_between_rotatable_H = 0.8 for iseq in self.hd_exchanged_sites: atom_H = self.hd_exchanged_sites[iseq][0] atom_D = self.hd_exchanged_sites[iseq][1] # H/D at different positions delta_xyz = atom_H.distance(atom_D) if (delta_xyz >= eps_xyz): sites_different_xyz.append( (atom_H.id_str(), atom_D.id_str(), delta_xyz, atom_H.xyz, atom_D.xyz)) # H/D with different B delta_b = abs(atom_H.b - atom_D.b) if (delta_b >= eps_b): delta_b = atom_H.b - atom_D.b sites_different_b.append( (atom_H.id_str(), atom_D.id_str(), delta_b, atom_H.xyz, atom_D.xyz)) # H/D with sum of occupancies lt or gt 1 occupancy_sum = atom_H.occ + atom_D.occ if (abs(1-occupancy_sum) >= delta_occ_sum): sites_sum_occ_not_1.append( (atom_H.id_str(), atom_D.id_str(), occupancy_sum, atom_H.xyz, atom_D.xyz)) # rotatable H/D with zero scattering sum, if closer than cut off apart if ((atom_H.i_seq in rotatable_hd_selection) and (atom_D.i_seq in rotatable_hd_selection)): if (atom_H.distance(atom_D) < max_distance_between_rotatable_H): if ((abs(atom_H.occ-occ_h_zero_scattering) <= eps_occ_zero_scatt) and (abs(atom_D.occ-(1-occ_h_zero_scattering))<= eps_occ_zero_scatt)): sites_occ_sum_no_scattering.append( (atom_H.id_str(), atom_D.id_str(), atom_H.occ, atom_D.occ, atom_H.xyz, atom_D.xyz)) self.hd_sites_analysis = group_args( sites_different_xyz = sites_different_xyz, sites_different_b = sites_different_b, sites_sum_occ_not_1 = sites_sum_occ_not_1, sites_occ_sum_no_scattering = sites_occ_sum_no_scattering)
def get_inputs(pdb_str): pdb_inp = iotbx.pdb.input(source_info=None, lines=pdb_str) ph = pdb_inp.construct_hierarchy() xrs = ph.extract_xray_structure( crystal_symmetry=pdb_inp.crystal_symmetry()) return group_args(pdb_hierarchy=ph, xray_structure=xrs)
def run_core_algorithm(self, group, alternates, use_weights, asymmetric=1, show_plot=True, save_plot=False, plot_name='xy.png'): # asymmetric 0=do nothing; 1=up/up; 2=down/up; 3=up/up + down/up #T = Profiler("coset") # construct rij matrix NN = group.count(True) if self.verbose: print "IN RUN CORE", group, alternates, use_weights, asymmetric, "Group of %d" % NN index_selected = group.iselection() rij = [] wij = [] slices = {} for coset in ['h,k,l'] + alternates.keys(): slices[coset] = {} twin_data = alternates.get( coset, self.data) # i.e., for 'h,k,l' the twin data is self.data for itr in xrange(NN): slices[coset][itr] = self.one_lattice_slice( indices=twin_data.indices(), lattice_id=index_selected[itr]) for twin_law in alternates.keys(): twin_data = alternates[twin_law] rij_ = flex.double(flex.grid(NN, NN), 0.) wij_ = flex.double(flex.grid(NN, NN), 0.) for i in xrange(NN): wij_[(i, i)] = 0.0 indices_i = slices["h,k,l"][i] indices_i_rev = slices[twin_law][i] i_start = self.lattices[index_selected[i]] # this would be a good compromise point for a detail call to C++ to speed things up XXX for j in xrange(i + 1, NN): indices_j = slices["h,k,l"][j] j_start = self.lattices[index_selected[j]] if asymmetric % 2 == 1: # up - up update_wij_rij(i, j, indices_i, indices_j, self.data.data(), self.data.data(), i_start, j_start, wij_, rij_, 1., use_weights) #matches = miller_ext.match_indices(indices_i, indices_j) #intensities_i = flex.double() #intensities_j = flex.double() #for pair in matches.pairs(): #print indices_i[pair[0]], indices_j[pair[1]],i_start,j_start,self.data.data()[i_start+pair[0]],self.data.data()[j_start+pair[1]] # intensities_i.append( self.data.data()[i_start+pair[0]] ) # intensities_j.append( self.data.data()[j_start+pair[1]] ) #corr = flex.linear_correlation(intensities_i, intensities_j) #if corr.is_well_defined(): # print i,j,corr.coefficient(),corr.n() # if use_weights: # wij_[(i,j)] = corr.n() # wij_[(j,i)] = corr.n() # rij_[(i,j)] = corr.coefficient() # rij_[(j,i)] = corr.coefficient() if asymmetric >= 2: # down - up update_wij_rij(i, j, indices_i_rev, indices_j, twin_data.data(), self.data.data(), i_start, j_start, wij_, rij_, -1., use_weights) #matches_rev = miller_ext.match_indices(indices_i_rev, indices_j) #intensities_i = flex.double() #intensities_j = flex.double() #for pair in matches_rev.pairs(): #print indices_i_rev[pair[0]], indices_j[pair[1]],i_start,j_start,twin_data.data()[i_start+pair[0]],self.data.data()[j_start+pair[1]] # intensities_i.append( twin_data.data()[i_start+pair[0]] ) # intensities_j.append( self.data.data()[j_start+pair[1]] ) #corr = flex.linear_correlation(intensities_i, intensities_j) #if corr.is_well_defined(): # if use_weights: # wij_[(i,j)] += corr.n() # wij_[(j,i)] += corr.n() # rij_[(i,j)] -= corr.coefficient() # rij_[(j,i)] -= corr.coefficient() focus = wij_.focus() flat_wij = flex.double(list(wij_)) selection = (flat_wij > 1) print "w_ij is a %dx%d matrix with %d/%d >1 elements with average value %4.1f" % ( focus[0], focus[1], selection.count(True), len(wij_), flex.mean(flat_wij.select(selection))) rij.append(rij_) wij.append(wij_) if self.verbose: print "CONSTRUCTED RIJ" xcoord = flex.random_double(NN) ycoord = flex.random_double(NN) M = minimize(xcoord, ycoord, rij[0], wij[0], self.verbose) coord_x = M.x[0:NN] coord_y = M.x[NN:2 * NN] P = minimize_divide(coord_x, coord_y) selection = P.plus_minus() if show_plot or save_plot: import matplotlib if not show_plot: # http://matplotlib.org/faq/howto_faq.html#generate-images-without-having-a-window-appear matplotlib.use('Agg') # use a non-interactive backend from matplotlib import pyplot as plt plt.plot(coord_x.select(selection), coord_y.select(selection), "r.", markersize=2.) plt.plot(coord_x.select(~selection), coord_y.select(~selection), "k.", markersize=3.) plt.axes().set_aspect("equal") if save_plot: plt.savefig(plot_name, size_inches=(10, 10), dpi=300, bbox_inches='tight') if show_plot: plt.show() grouped_lattice_ids = self.lattices.select(group) assert len(grouped_lattice_ids) == len(selection) from libtbx import group_args return group_args(reindexing_sets={ "h,k,l": set(grouped_lattice_ids.select(selection)), alternates.keys()[0]: set(grouped_lattice_ids.select(~selection)) }, rij=rij, wij=wij, coord_x=coord_x, coord_y=coord_y) return { "h,k,l": set(grouped_lattice_ids.select(selection)), alternates.keys()[0]: set(grouped_lattice_ids.select(~selection)) }
def __init__( self, model, Hs=["H", "D"], As=["O", "N", "S", "F", "CL"], Ds=["O", "N", "S"], d_HA_cutoff=[1.4, 3.0], # original: [1.4, 2.4], d_DA_cutoff=[2.5, 3.5], # not used a_DHA_cutoff=120, # should be greater than this a_YAH_cutoff=[90, 180], # should be within this interval, not used protein_only=False): self.result = [] self.atoms = model.get_hierarchy().atoms() geometry = model.get_restraints_manager() bond_proxies_simple, asu = geometry.geometry.get_all_bond_proxies( sites_cart=model.get_sites_cart()) h_bonded_to = {} for p in bond_proxies_simple: i, j = p.i_seqs ei, ej = self.atoms[p.i_seqs[0]].element, self.atoms[ p.i_seqs[1]].element if (ei in Hs): h_bonded_to[i] = self.atoms[j] if (ej in Hs): h_bonded_to[j] = self.atoms[i] # sites_cart = model.get_sites_cart() crystal_symmetry = model.crystal_symmetry() fm = crystal_symmetry.unit_cell().fractionalization_matrix() om = crystal_symmetry.unit_cell().orthogonalization_matrix() pg = get_pair_generator(crystal_symmetry=crystal_symmetry, buffer_thickness=d_HA_cutoff[1], sites_cart=sites_cart) get_class = iotbx.pdb.common_residue_names_get_class for p in pg.pair_generator: i, j = p.i_seq, p.j_seq ei, ej = self.atoms[i].element, self.atoms[j].element altloc_i = self.atoms[i].parent().altloc altloc_j = self.atoms[j].parent().altloc resseq_i = self.atoms[i].parent().parent().resseq resseq_j = self.atoms[j].parent().parent().resseq # pre-screen candidates begin one_is_Hs = ei in Hs or ej in Hs other_is_acceptor = ei in As or ej in As d_HA = math.sqrt(p.dist_sq) assert d_HA <= d_HA_cutoff[1] is_candidate = one_is_Hs and other_is_acceptor and \ d_HA >= d_HA_cutoff[0] and \ altloc_i == altloc_j and resseq_i != resseq_j if (protein_only): for it in [i, j]: resname = self.atoms[it].parent().resname is_candidate &= get_class( name=resname) == "common_amino_acid" if (not is_candidate): continue if (ei in Hs and not h_bonded_to[i].element in As): continue if (ej in Hs and not h_bonded_to[j].element in As): continue # pre-screen candidates end rt_mx_i = pg.conn_asu_mappings.get_rt_mx_i(p) rt_mx_j = pg.conn_asu_mappings.get_rt_mx_j(p) rt_mx_ji = rt_mx_i.inverse().multiply(rt_mx_j) # if (ei in Hs): H = self.atoms[i] D = self.atoms[h_bonded_to[H.i_seq].i_seq] A = self.atoms[j] if (str(rt_mx_ji) != "x,y,z"): A = apply_symop_to_copy(A, rt_mx_ji, fm, om) if (ej in Hs): H = self.atoms[j] D = self.atoms[h_bonded_to[H.i_seq].i_seq] A = self.atoms[i] if (str(rt_mx_ji) != "x,y,z"): H = apply_symop_to_copy(H, rt_mx_ji, fm, om) D = apply_symop_to_copy(D, rt_mx_ji, fm, om) assert H.distance(D) < 1.15 # filter by a_DHA a_DHA = H.angle(A, D, deg=True) if (a_DHA < a_DHA_cutoff): continue # assert approx_equal(d_HA, H.distance(A), 1.e-3) self.result.append( group_args(i=i, j=j, symop=rt_mx_ji, d_HA=d_HA, a_DHA=a_DHA, d_AD=A.distance(D)))
def simple_parallel(**kw): """ This simple_parallel interface allows you to run in parallel with a call that is very similar to one you would use for a simple iteration NOTE: all these multiprocessing methods work poorly if a large object (> 1 MB) is returned. Better to write the object as a pickle to a unique file, pass the file name back, and read in the object afterwards. Parameters: function: the function to run iteration_list: list of objects to pass, one at a time, to function nproc: number of processors run_in_batches: If None or True, run nproc jobs, grouping as necessary log: optional log stream any other kw items: passed directly to function Sample use: result_list = simple_parallel( function = run_something, # function to run iteration_list = iteration_list, # list of N values or objects that vary nproc = nproc, # number of processors other_kw1 = other_kw1, # any other keywords used by run_something other_kw2 = other_kw2, # any other keywords used by run_something log = log, # pass log stream if used ) This will run N jobs of run_something, where run_something looks like: def run_something( one_iteration = None, other_kw1 = None, other_kw2 = None, log = None): # do something with value and other_kw1, other_kw2 result = do_something(one_iteration, other_kw1, other_kw2, log = log) return result Example as simple iteration: def run_something(value): return value * 2 def run_as_is(): # run in usual way iteration_list = [5,7,9] # list of anything result_list = [] for i in range(len(iteration_list)): result = run_something(iteration_list[i]) result_list.append(result) return result_list def run_parallel(): # run in parallel iteration_list = [5,7,9] # list of anything from libtbx.easy_mp import simple_parallel result_list = simple_parallel( iteration_list = iteration_list, function = run_something, nproc = 4, ) return result_list """ run_in_batches = kw.get('run_in_batches', None) function = kw.get('function', None) iteration_list = kw.get('iteration_list', None) nproc = kw.get('nproc', None) run_info = kw.get('run_info', None) log = kw.get('log', None) if function is not None: del kw['function'] if run_in_batches is not None: del kw['run_in_batches'] if iteration_list is not None: del kw['iteration_list'] if nproc is not None: del kw['nproc'] if log is not None: del kw['log'] if run_info is not None: del kw['run_info'] if function is not None and iteration_list is not None and nproc is not None: n_tot = len(list(iteration_list)) end_number = -1 if run_in_batches is None or run_in_batches: n_in_batch = n_tot // nproc if n_in_batch * nproc < n_tot: n_in_batch = n_in_batch + 1 assert n_in_batch * nproc >= n_tot n_runs = nproc else: n_in_batch = 1 n_runs = n_tot runs_to_carry_out = [] for run_id in range(n_tot): start_number = end_number + 1 end_number = min(n_tot - 1, start_number + n_in_batch - 1) if end_number < start_number: continue runs_to_carry_out.append( group_args( run_id=run_id, start_number=start_number, end_number=end_number, )) kw_dict = kw.copy() kw_dict['function'] = function kw_dict['iteration_list'] = iteration_list if log is None: log = sys.stdout from libtbx.easy_mp import run_jobs_with_large_fixed_objects runs_carried_out = run_jobs_with_large_fixed_objects( nproc=nproc, verbose=False, kw_dict=kw_dict, run_info_list=runs_to_carry_out, job_to_run=simple_parallel, log=log) runs_carried_out = sorted(runs_carried_out, key=lambda r: r.start_number if r else None) result_list = [] printed_something = False for result_info in runs_carried_out: if result_info and result_info.result and result_info.result.result_list: result = result_info.result for r in result.result_list: if r: result_list.append(r) if not printed_something: print(result.log_as_text, file=log) printed_something = True return result_list else: assert run_info is not None and iteration_list is not None kw_dict = kw.copy() # Determine if function has the kw "log" import inspect use_log = 'log' in inspect.getargspec(function).args if use_log: # capture the log if it is present in the function call kw_dict['log'] = log result_list = [] for i in range(run_info.start_number, run_info.end_number + 1): result_list.append(function(iteration_list[i], **kw_dict)) return group_args( group_args_type='runs %s to %s of %s' % (run_info.start_number, run_info.end_number, str(function)), result_list=result_list)
def run(args, command_name): from iotbx.option_parser import option_parser as iotbx_option_parser import libtbx.utils show_times = libtbx.utils.show_times(time_start="now") command_line = (iotbx_option_parser( usage=command_name + " [options] [cod_id...]").enable_chunk( easy_all=True).enable_multiprocessing().option( None, "--parse_only", action="store_true").option( None, "--cif_only", action="store_true").option( None, "--hkl_only", action="store_true").option( "-v", "--verbose", action="store_true")).process(args=args) if (command_line.run_multiprocessing_chunks_if_applicable( command_call=[command_name, __file__])): show_times() return co = command_line.options cod_ids = command_line.args assert [co.cif_only, co.hkl_only].count(True) <= 1 if co.cif_only: ext = "cif" elif co.hkl_only: ext = "hkl" else: ext = None verbose = co.verbose parse_only = co.parse_only # cod_hkl_cif = cod_tools.build_hkl_cif(cod_ids=cod_ids, ext=ext) cod_hkl_cif.show_summary() hkl_files = cod_hkl_cif.hkl cif_files = cod_hkl_cif.cif # n_total = 0 # parsing_errors = {} build_errors = {} ignored_errors = {} skipped = set() # files_to_parse = [] files_to_parse.extend(hkl_files.values()) files_to_parse.extend(cif_files.values()) for i, path in enumerate(files_to_parse): n_total += 1 if (i % command_line.chunk.n != command_line.chunk.i): continue try: cod_id = os.path.basename(path) cif_obj = iotbx.cif.reader(file_path=path) if parse_only: continue skip_file = False for cif_block in cif_obj.model().values(): value = cif_block.get("_cod_error_flag") keys = set(cif_block.keys()) if (value in ["errors", "retracted"]): skip_file = True skipped.add(cod_id) if verbose: print("SKIPPING: _cod_error_flag %s: %s" % (value, cod_id)) elif (len( set([ "_space_group_symop_ssg_operation_algebraic", "_space_group_ssg_name" ]).intersection(keys)) != 0): skipped.add(cod_id) if verbose: print("SKIPPING: COD entry with super-space group:", cod_id) elif (len( set(["_refln_index_m", "_refln_index_m_1" ]).intersection(keys)) != 0): if verbose: print("SKIPPING: COD entry with _refln_index_m:", cod_id) skipped.add(cod_id) if skip_file: continue if path.endswith('.cif'): cif_obj.build_crystal_structures() elif path.endswith('.hkl'): cif_obj.build_miller_arrays() else: iotbx.cif.cctbx_data_structures_from_cif( cif_model=cif_obj.model()) except KeyboardInterrupt: print("CAUGHT EXCEPTION: KeyboardInterrupt") return except CifBuilderError as e: e_str = str(e) if not verbose and (e_str.startswith( "No atomic coordinates could be found" ) or e_str.startswith( "No symmetry instructions could be extracted from the cif block" )): ignored_errors.setdefault(cod_id, e_str) continue sys.stdout.flush() print("CAUGHT EXCEPTION: %s: %s: %s" % (command_name, cod_id, str(e)), file=sys.stderr) if verbose: traceback.print_exc() print(file=sys.stderr) build_errors.setdefault(cod_id, e_str) sys.stderr.flush() except CifParserError as e: sys.stdout.flush() e_str = str(e) parsing_errors.setdefault(cod_id, e_str) print("PARSING ERROR: %s: %s: %s" % (command_name, cod_id, e_str), file=sys.stderr) if verbose: traceback.print_exc() print(file=sys.stderr) sys.stderr.flush() except Exception as e: sys.stdout.flush() e_str = str(e) build_errors.setdefault(cod_id, e_str) print("CAUGHT EXCEPTION: %s: %s: %s" % (command_name, cod_id, e_str), file=sys.stderr) if verbose: traceback.print_exc() print(file=sys.stderr) sys.stderr.flush() print() print("Number successfully parsed: %i/%i" \ % (n_total-len(parsing_errors),n_total)) if not parse_only: print("Number skipped:", len(skipped)) print("Number of exceptions caught:", len(build_errors)) print("Number of exceptions ignored:", len(ignored_errors)) print() # show_times() result = group_args(n_hkl=len(hkl_files), n_cif=len(cif_files), n_hkl_cif_pairs=len(cod_hkl_cif.hkl_cif_pairs), parsing_errors=parsing_errors, build_errors=build_errors, ignored_errors=ignored_errors, skipped=skipped) easy_pickle.dump("result_%03i.pickle" % command_line.chunk.i, result) print()
def exercise(): from mmtbx.regression import make_fake_anomalous_data import mmtbx.command_line.water_screen import mmtbx.ions.utils from iotbx.file_reader import any_file pdb_in = """\ CRYST1 51.491 51.491 35.389 90.00 90.00 120.00 P 31 2 1 SCALE1 0.019421 0.011213 0.000000 0.00000 SCALE2 0.000000 0.022425 0.000000 0.00000 SCALE3 0.000000 0.000000 0.028257 0.00000 HETATM 32 CA CGU A 17 7.453 25.360 36.702 1.00 25.21 C HETATM 33 C CGU A 17 6.252 24.666 36.060 1.00 24.08 C HETATM 34 O CGU A 17 6.408 23.698 35.327 1.00 22.85 O HETATM 35 CB CGU A 17 7.547 24.924 38.163 1.00 28.34 C HETATM 36 CG CGU A 17 8.807 24.090 38.525 1.00 29.46 C HETATM 37 CD1 CGU A 17 9.396 23.286 37.336 1.00 28.04 C HETATM 38 CD2 CGU A 17 8.411 23.255 39.740 1.00 32.29 C HETATM 39 OE11 CGU A 17 10.339 23.775 36.690 1.00 31.46 O HETATM 40 OE12 CGU A 17 8.917 22.160 37.075 1.00 26.97 O HETATM 41 OE21 CGU A 17 7.958 23.926 40.668 1.00 35.00 O HETATM 42 OE22 CGU A 17 8.527 22.036 39.780 1.00 33.69 O ATOM 43 N PRO A 18 5.029 25.135 36.349 1.00 23.16 N ATOM 62 CA ARG A 20 7.902 23.943 32.052 1.00 22.37 C ATOM 63 C ARG A 20 7.515 22.468 32.019 1.00 24.90 C ATOM 64 O ARG A 20 7.956 21.738 31.130 1.00 24.00 O ATOM 65 CB ARG A 20 9.024 24.136 33.067 1.00 26.75 C ATOM 67 CD ARG A 20 10.812 25.597 34.000 1.00 36.42 C HETATM 72 N CGU A 21 6.701 22.022 32.980 1.00 24.22 N HETATM 73 CA CGU A 21 6.293 20.612 33.012 1.00 23.24 C HETATM 74 C CGU A 21 5.432 20.293 31.805 1.00 23.70 C HETATM 75 O CGU A 21 5.561 19.221 31.216 1.00 20.30 O HETATM 76 CB CGU A 21 5.506 20.267 34.289 1.00 24.58 C HETATM 77 CG CGU A 21 6.392 20.445 35.528 1.00 26.52 C HETATM 78 CD1 CGU A 21 7.353 19.249 35.754 1.00 27.96 C HETATM 79 CD2 CGU A 21 5.507 20.718 36.738 1.00 29.78 C HETATM 80 OE11 CGU A 21 8.366 19.406 36.482 1.00 27.23 O HETATM 81 OE12 CGU A 21 7.056 18.159 35.217 1.00 25.25 O HETATM 82 OE21 CGU A 21 4.695 21.625 36.586 1.00 36.91 O HETATM 83 OE22 CGU A 21 5.664 20.139 37.797 1.00 32.02 O ATOM 93 C CYS A 23 7.212 20.248 27.692 1.00 25.63 C ATOM 94 O CYS A 23 7.306 19.599 26.656 1.00 22.02 O HETATM 97 N CGU A 24 7.761 19.852 28.842 1.00 26.69 N HETATM 98 CA CGU A 24 8.527 18.607 28.931 1.00 29.70 C HETATM 99 C CGU A 24 7.665 17.456 28.476 1.00 31.08 C HETATM 100 O CGU A 24 8.143 16.541 27.812 1.00 32.94 O HETATM 101 CB CGU A 24 8.981 18.304 30.367 1.00 26.05 C HETATM 102 CG CGU A 24 9.966 19.357 30.876 1.00 26.18 C HETATM 103 CD1 CGU A 24 11.275 19.290 30.093 1.00 24.75 C HETATM 104 CD2 CGU A 24 10.148 19.172 32.390 1.00 27.43 C HETATM 105 OE11 CGU A 24 12.023 18.293 30.233 1.00 29.79 O HETATM 106 OE12 CGU A 24 11.537 20.244 29.348 1.00 24.99 O HETATM 107 OE21 CGU A 24 9.100 19.190 33.043 1.00 28.87 O HETATM 108 OE22 CGU A 24 11.260 19.084 32.908 1.00 24.87 O ATOM 143 O CYS A 29 10.353 21.841 23.789 1.00 30.74 O ATOM 146 N ASP A 30 9.604 19.770 24.234 1.00 32.83 N ATOM 147 CA ASP A 30 10.776 19.402 25.014 1.00 34.15 C ATOM 148 C ASP A 30 12.026 19.580 24.177 1.00 36.29 C ATOM 149 O ASP A 30 12.937 20.322 24.544 1.00 34.50 O ATOM 150 CB ASP A 30 10.685 17.949 25.464 1.00 33.18 C ATOM 151 CG ASP A 30 11.714 17.607 26.523 1.00 32.22 C ATOM 152 OD1 ASP A 30 12.621 18.428 26.752 1.00 32.53 O ATOM 153 OD2 ASP A 30 11.608 16.524 27.125 1.00 31.78 O ATOM 154 N GLU A 31 12.056 18.885 23.045 1.00 39.34 N ATOM 155 CA GLU A 31 13.186 18.954 22.135 1.00 40.16 C ATOM 172 CA ALA A 33 13.225 23.877 24.346 1.00 39.26 C ATOM 173 C ALA A 33 14.746 23.914 24.481 1.00 38.24 C ATOM 175 CB ALA A 33 12.600 23.326 25.630 1.00 37.33 C ATOM 176 N ASP A 34 15.400 22.799 24.170 1.00 39.56 N ATOM 177 CA ASP A 34 16.857 22.723 24.258 1.00 40.96 C ATOM 180 CB ASP A 34 17.352 21.300 23.976 1.00 40.20 C ATOM 181 CG ASP A 34 17.006 20.327 25.083 1.00 38.93 C ATOM 182 OD1 ASP A 34 16.981 20.742 26.262 1.00 41.79 O ATOM 183 OD2 ASP A 34 16.777 19.140 24.778 1.00 37.45 O TER HETATM 316 CA CA A 71 13.077 17.433 32.271 1.00 22.23 CA HETATM 317 CA CA A 72 13.835 18.867 28.887 1.00 30.50 CA HETATM 318 CA CA A 73 10.897 18.813 35.385 1.00 50.79 CA HETATM 320 O HOH A 75 13.387 22.461 33.530 1.00 24.93 O HETATM 323 O HOH A 78 10.578 15.304 29.567 1.00 23.15 O HETATM 324 O HOH A 79 5.020 20.563 40.636 1.00 44.02 O HETATM 325 O HOH A 80 2.823 22.144 38.546 1.00 36.74 O HETATM 326 O HOH A 81 10.434 22.631 29.604 1.00 25.89 O HETATM 327 O HOH A 82 6.522 15.691 36.473 1.00 27.82 O HETATM 332 O HOH A 87 11.624 15.358 31.822 1.00 24.92 O HETATM 333 O HOH A 88 13.763 16.798 28.667 1.00 29.47 O HETATM 334 O HOH A 89 6.350 16.973 32.340 1.00 37.83 O HETATM 338 O HOH A 93 10.474 21.054 34.739 1.00 25.48 O HETATM 342 O HOH A 97 16.203 18.688 27.720 1.00 28.10 O HETATM 343 O HOH A 98 8.186 14.327 30.477 1.00 49.44 O HETATM 344 O HOH A 99 8.625 16.477 33.868 1.00 48.13 O HETATM 347 O HOH A 102 15.462 16.714 24.789 1.00 42.90 O HETATM 356 O HOH A 111 4.757 17.423 38.879 1.00 34.26 O HETATM 358 O HOH A 113 10.313 14.495 25.452 1.00 40.66 O HETATM 359 O HOH A 114 1.979 18.616 37.760 1.00 34.25 O HETATM 363 O HOH A 118 13.926 20.627 27.271 1.00 29.62 O HETATM 365 O HOH A 120 16.240 23.471 27.700 1.00 48.79 O HETATM 370 O HOH A 125 2.747 18.823 35.170 1.00 50.30 O HETATM 372 O HOH A 127 5.228 23.553 41.559 1.00 42.02 O HETATM 373 O HOH A 128 5.298 21.833 43.473 1.00 41.96 O HETATM 377 O HOH A 132 13.181 22.613 29.210 1.00 35.43 O TER """ file_base = "tst_symmetry_axis" with open(file_base + ".pdb", "w") as f: f.write(pdb_in) f = any_file(file_base + ".pdb") hierarchy = f.file_object.hierarchy xrs = f.file_object.xray_structure_simple() hierarchy, n = mmtbx.ions.utils.anonymize_ions(hierarchy, log=null_out()) assert (n == 3) with open(file_base + "_in.pdb", "w") as f: f.write(hierarchy.as_pdb_string(crystal_symmetry=xrs)) mtz_file = make_fake_anomalous_data.generate_mtz_file( file_base=file_base, d_min=1.5, anomalous_scatterers=[ group_args(selection="element CA", fp=0.0, fdp=0.4) ]) args = [ file_base + "_in.pdb", file_base + ".mtz", "wavelength=0.9792", "use_phaser=False", "nproc=1", "skip_twin_test=True", "elements=CA", ] out = StringIO() mmtbx.command_line.water_screen.run(args=args, out=out) assert "Valence sum: 1.916" in out.getvalue() assert out.getvalue().count("Probable cation: CA+2") >= 1 os.remove(file_base + ".pdb") os.remove(file_base + "_in.pdb") os.remove(file_base + ".mtz") os.remove(file_base + "_fmodel.eff")
if (seq_file is None): raise ValueError("Could not parse %s" % file_name) except Exception, e: print >> log, str(e) return None else: if (len(non_compliant) > 0): print >> log, "Warning: non-compliant entries in sequence file" for nc in non_compliant: print >> log, " " + str(nc) n_residues = n_bases = 0 for seq_entry in seq_file: (n_res_seq, n_base_seq) = composition_from_sequence(seq_entry.sequence) n_residues += n_res_seq n_bases += n_base_seq return group_args(n_residues=n_residues, n_bases=n_bases) def composition_from_sequence(sequence): seq = sequence.upper() n_residues = n_bases = 0 n_valid = len(seq) - seq.count("X") n_na = seq.count("A") + seq.count("U") + seq.count("T") + \ seq.count("G") + seq.count("C") if (n_na >= int(n_valid * 0.9)): n_bases += len(seq) else: n_residues += len(seq) return n_residues, n_bases
def bond_angle_outliers(self): get_class = common_residue_names_get_class rc = restraints.combined( pdb_hierarchy = self.pdb_hierarchy, xray_structure = self.model.get_xray_structure(), geometry_restraints_manager = self.model.get_restraints_manager().\ geometry, ignore_hd = False, # important outliers_only = False, use_segids_in_place_of_chainids = False) bond_mean_delta, n_bonds, bond_mean = 0, 0, 0 # bond outliers involving hydrogens outliers_bonds = [] for result in rc.bonds.results: atom_info_hd = get_atom_info_if_hd(atoms_info = result.atoms_info) # Consider only H/D atoms if atom_info_hd is not None: # Calculate mean bond length and delta for non-water # --> used to get rough idea if H are at X-ray or neutron bond lengths. if (get_class(name=atom_info_hd.resname) != 'common_water'): bond_mean_delta = bond_mean_delta + result.delta bond_mean = bond_mean + result.model n_bonds += 1 if result.is_outlier(): atoms_str = mp_geo.get_atoms_str(atoms_info=result.atoms_info) outliers_bonds.append( (atom_info_hd.id_str(), atoms_str, result.model, result.delta, result.target, atom_info_hd.xyz) ) self.outliers_bonds = outliers_bonds if n_bonds: bond_mean_delta = bond_mean_delta/n_bonds bond_mean = bond_mean/n_bonds xray_distances_used = False # value 0.08 was obtained by checking all 123 neutron models deposited # until Sep 2017 and by analysing delta if (bond_mean_delta >= 0.08 and self.use_neutron_distances): xray_distances_used = True self.bond_results = group_args( bond_mean_delta = bond_mean_delta, bond_mean = bond_mean, xray_distances_used = xray_distances_used ) # angle outliers involving hydrogens outliers_angles = [] for result in rc.angles.results: atom_info_hd = get_atom_info_if_hd(atoms_info = result.atoms_info) # Consider only H/D atoms if atom_info_hd is not None: if result.is_outlier(): atoms_str = mp_geo.get_atoms_str(atoms_info=result.atoms_info) outliers_angles.append( [atom_info_hd.id_str(), atoms_str, result.model, result.delta, result.target, atom_info_hd.xyz] ) self.outliers_angles = outliers_angles
def initialize_json(self): self.json_fn = os.path.join(self.dest_dir , self.pdb_code + '.json') self.pickle_fn = os.path.join(self.dest_dir , self.pdb_code + '.pickle') if (os.path.isfile(self.json_fn)): print('Opened file', os.path.basename(self.json_fn), file=self.logger) with open(self.json_fn, 'r') as fp: self.json_data = json.load(fp) # initialize values if key does not exist yet for key in keys: if key not in self.json_data: self.json_data[key] = None # delete keys that are not in list jd = {k:v for k,v in self.json_data.items() if k in keys} self.json_data = jd # for jkey in self.json_data: # if jkey not in keys: # self.json_data.pop(jkey) else: self.json_data = dict() for key in keys: self.json_data[key] = None self.json_data['success'] = True for s in ['success_composition', 'success_readyset', 'success_refinement', 'success_statistics', 'success_plots']: self.json_data[s] = None print('Initialized json', file=self.logger) if (os.path.isfile(self.pickle_fn)): self.pickle_data = easy_pickle.load(self.pickle_fn) print('Opened file', os.path.basename(self.pickle_fn), file=self.logger) else: self.pickle_data = group_args( pdb_code = self.pdb_code, map_code = self.map_code, adp_hist = None, r_adp_hist = None, cc_per_chain = None, r_cc_per_chain = None, cc_per_residue = None, r_cc_per_residue = None, ramalyze = None, r_ramalyze = None, chain_dict = None ) print('Initialized pickle', file=self.logger) self.json_data['pdb_code'] = self.pdb_code self.json_data['map_code'] = self.map_code self.json_data['prefix'] = self.prefix self.json_data['datum'] = str(datetime.date.today()) #self.json_data['datum'] = str(datetime.now().day) pd = easy_pickle.load(os.path.join(self.folder, self.prefix + '.pkl')) self.json_data['date_from_header'] = pd.date self.resolution = pd.resolution print('Calculations performed with resolution: ', self.resolution, file=self.logger) self.json_data['resolution'] = self.resolution self.initialize_files(pd = pd)
ga.get_best_gene() ''' ############################################################################## ########## EXAMPLE PARAMS FOR GENETIC ALGORITHM WITH MULTIPROCESSING ####### ############################################################################## ''' Default parameters for genetic algorithm''' example_params = group_args( group_args_type='parameters for genetic algorithm', nproc=1, random_seed=784321, mutation_rate=0.5, recombination_rate=0.5, number_of_variants=None, total_number_of_cycles=1000, number_of_cycles=None, number_of_macro_cycles=None, top_fraction_of_variants_to_keep=0.2, number_of_variants_per_gene_unit=10, total_number_of_cycles_per_gene_unit=10, number_of_tries_for_mutations_and_crossovers=2, typical_gene_length=10, end_cycles_if_no_improvement_for_n_cycles=2, min_fraction_of_cycles_to_run=0.1, ) ############################################################################## ########## EXAMPLE METHODS FOR GENETIC ALGORITHM WITH MULTIPROCESSING ####### ############################################################################## def example_new_gene_method(params, n): '''
def get_counts(self): return group_args(number_h_final=self.n_H_final, no_H_placed_mlq=self.no_H_placed_mlq, site_labels_disulfides=self.site_labels_disulfides, site_labels_no_para=self.site_labels_no_para)
def run_something( self, params=None, genes=None, macro_cycle=None, create=None, mutate=None, recombine=None, score_only=None, ): if not params: params = self.params if not genes: genes = self.genes all_new_genes = [] nproc = params.nproc end_number = -1 if create: n_tot = self.get_number_of_variants_to_make() else: n_tot = len(self.genes) n = n_tot // nproc if n * nproc < n_tot: n = n + 1 assert n * nproc >= n_tot runs_to_carry_out = [] for run_id in range(nproc): start_number = end_number + 1 end_number = min(n_tot - 1, start_number + n - 1) if end_number < start_number: continue runs_to_carry_out.append( group_args( run_id=run_id, random_seed=np_random.randint(0, 100000), start_number=start_number, end_number=end_number, )) local_params = group_args(**params().copy()) local_params.nproc = 1 # Required kw_dict = { 'params': local_params, 'genes': genes, 'create': create, 'mutate': mutate, 'macro_cycle': macro_cycle, 'recombine': recombine, 'score_only': score_only, 'new_gene_method': self.new_gene_method, 'mutation_method': self.mutation_method, 'recombination_method': self.recombination_method, 'genes_are_identical_method': self.genes_are_identical_method, 'scoring_method': self.scoring_method, } runs_carried_out = run_jobs_with_large_fixed_objects( nproc=nproc, verbose=False, kw_dict=kw_dict, run_info_list=runs_to_carry_out, job_to_run=group_of_run_something, log=self.log) for run_info in runs_carried_out: new_genes = run_info.result.new_genes if new_genes: all_new_genes += new_genes all_new_genes = make_unique(all_new_genes, self.genes_are_identical_method) if score_only or create or macro_cycle: # keep id and replace genes self.genes = all_new_genes else: # usual self.set_gene_id_values(all_new_genes) self.genes += all_new_genes
def start_coot_and_wait(pdb_file, map_file, data_file, work_dir=None, coot_cmd="coot", needs_rebuild=False, log=None): if (log is None): log = sys.stdout if (work_dir is None): work_dir = os.getcwd() if (not os.path.isdir(work_dir)): os.makedirs(work_dir) import mmtbx.maps.utils from libtbx.str_utils import make_header from libtbx import easy_run from libtbx import group_args import cootbx base_script = __file__.replace(".pyc", ".py") os.chdir(work_dir) if (os.path.exists("coot_out_tmp.pdb")): os.remove("coot_out_tmp.pdb") if (os.path.exists("coot_out.pdb")): os.remove("coot_out.pdb") f = open("edit_in_coot.py", "w") f.write(open(base_script).read()) f.write("\n") f.write("import coot\n") cootbx.write_disable_nomenclature_errors(f) f.write("m = manager(\"%s\", \"%s\", needs_rebuild=%s)\n" % (pdb_file, map_file, needs_rebuild)) f.close() make_header("Interactive editing in Coot", log) easy_run.call("\"%s\" --no-state-script --script edit_in_coot.py &" % coot_cmd) print >> log, " Waiting for coot_out_tmp.pdb to appear at %s" % \ str(time.asctime()) base_dir = os.path.dirname(pdb_file) tmp_file = os.path.join(base_dir, "coot_out_tmp.pdb") edit_file = os.path.join(base_dir, "coot_tmp_edits.pdb") maps_file = os.path.join(base_dir, ".NEW_MAPS") while (True): if (os.path.isfile(tmp_file)): print >> log, " Coot editing complete at %s" % str(time.asctime()) break elif (os.path.isfile(maps_file)): t1 = time.time() assert os.path.isfile(edit_file) mmtbx.maps.utils.create_map_from_pdb_and_mtz( pdb_file=edit_file, mtz_file=data_file, output_file=os.path.join(base_dir, "maps_for_coot.mtz"), fill=True, out=log) t2 = time.time() print >> log, "Calculated new map coefficients in %.1fs" % (t2 - t1) os.remove(maps_file) else: time.sleep(t_wait / 1000.) shutil.move(tmp_file, "coot_out.pdb") mmtbx.maps.utils.create_map_from_pdb_and_mtz( pdb_file="coot_out.pdb", mtz_file=data_file, output_file="coot_out_maps.mtz", fill=True, out=log) new_model = os.path.join(work_dir, "coot_out.pdb") new_map = os.path.join(work_dir, "coot_out_maps.mtz") skip_rebuild = None if (needs_rebuild): if (os.path.isfile(os.path.join(base_dir, "NO_BUILD"))): skip_rebuild = True else: skip_rebuild = False return group_args(pdb_file=new_model, map_file=new_map, skip_rebuild=skip_rebuild)
def group_of_run_something(run_info, params, genes=None, macro_cycle=None, recombine=None, create=None, mutate=None, score_only=None, new_gene_method=None, mutation_method=None, recombination_method=None, scoring_method=None, genes_are_identical_method=None, log=sys.stdout): np_random.seed(run_info.random_seed) # different for each run params = group_args(**params().copy()) params.random_seed = np_random.randint(0, 100000) if macro_cycle: # Run a macro-cycle ga = genetic_algorithm( genes=genes, params=params, new_gene_method=new_gene_method, mutation_method=mutation_method, recombination_method=recombination_method, scoring_method=scoring_method, genes_are_identical_method=genes_are_identical_method, log=null_out(), ) return group_args( group_args_type='set of genes after running one macro_cycle', new_genes=ga.genes, ) # Usual new_genes = [] for index in range(run_info.start_number, run_info.end_number + 1): params.random_seed = np_random.randint(0, 100000) info = run_one_something(params, genes, index, recombine, create, mutate, score_only, new_gene_method, mutation_method, recombination_method, scoring_method, genes_are_identical_method, log=log) if info and info.new_genes: new_genes += info.new_genes # Make sure all new ones are unique if (not score_only ) and genes: # we have existing ones and not just scoring new_genes = make_unique(new_genes, genes_are_identical_method) return group_args( group_args_type=' one set of recombined/mutated genes', new_genes=new_genes, )
def get_results(self): return group_args(model=self.model, model_minimized=self.cablam_fixed_minimized, n_tried_residues=self.n_tried_residues, n_rotated_residues=self.n_rotated_residues)
def current_max_sizes(self): return group_args(virtual_memory=self.max_virtual_memory_size, resident_set=self.max_resident_set_size, stack=self.max_stack_size)
def work_all(host, port, filenames, params, plot=False, table=False, json_file=None, grid=None, nproc=None): import json from multiprocessing.pool import ThreadPool as thread_pool if nproc is None: nproc = _nproc() pool = thread_pool(processes=nproc) threads = {} for filename in filenames: threads[filename] = pool.apply_async(work, (host, port, filename, params)) results = [] for filename in filenames: response = threads[filename].get() d = json.loads(response) results.append(d) print response_to_xml(d) if json_file is not None: 'Writing results to %s' % json_file with open(json_file, 'wb') as f: json.dump(results, f) if plot or table: from scitbx.array_family import flex from libtbx import group_args from dials.algorithms.spot_finding.per_image_analysis \ import plot_stats, print_table estimated_d_min = flex.double() d_min_distl_method_1 = flex.double() d_min_distl_method_2 = flex.double() n_spots_total = flex.int() n_spots_no_ice = flex.int() total_intensity = flex.double() for d in results: estimated_d_min.append(d['estimated_d_min']) d_min_distl_method_1.append(d['d_min_distl_method_1']) d_min_distl_method_2.append(d['d_min_distl_method_2']) n_spots_total.append(d['n_spots_total']) n_spots_no_ice.append(d['n_spots_no_ice']) total_intensity.append(d['total_intensity']) stats = group_args(n_spots_total=n_spots_total, n_spots_no_ice=n_spots_no_ice, n_spots_4A=None, total_intensity=total_intensity, estimated_d_min=estimated_d_min, d_min_distl_method_1=d_min_distl_method_1, d_min_distl_method_2=d_min_distl_method_2, noisiness_method_1=None, noisiness_method_2=None) if plot: plot_stats(stats) if table: print_table(stats) if grid is not None: from matplotlib import pyplot n_spots_no_ice.reshape(flex.grid(grid)) print n_spots_no_ice.size() from matplotlib import pyplot fig = pyplot.figure() pyplot.pcolormesh(n_spots_no_ice.as_numpy_array(), cmap=pyplot.cm.Reds) pyplot.savefig("spot_count.png") return
def callback_wrapper(self, message, data, accumulate=True, cached=True): if cached: self.callback_other(data=group_args(message=message, data=data, accumulate=accumulate, cached=cached))
def get_results(self): return group_args(model=self.model)
def get_fragments(model): rm = model.get_restraints_manager() atoms = model.get_hierarchy().atoms() all_selection = list(range(atoms.size())) # Planes planes = [] planes_all = [] for p in rm.geometry.planarity_proxies: planes.append(list(p.i_seqs)) planes_all.extend(list(p.i_seqs)) planes_all = list(set(planes_all)) # print "planes :", planes # print "planes_all:", planes_all # print # Chiral chirals = [] chirals_unique = [] for p in rm.geometry.chirality_proxies: # print "chiral:", p.i_seqs, [atoms[i].name for i in p.i_seqs] chirals.append(list(p.i_seqs)) tmp = [] for i in p.i_seqs: if (not i in planes_all): tmp.append(i) chirals_unique.append(tmp) chirals_unique = list(merge_common(chirals_unique))[0] chirals_all = list(merge_common(chirals))[0] # print "chirals_unique:", chirals_unique # print "chirals_all :", chirals_all chirals_mapping = [chirals_all.index(u) for u in chirals_unique] # print "mapping:", chirals_mapping # Dihedral dihedrals = [] dihedrals_unique = [] for p in rm.geometry.dihedral_proxies: # print "dihedral:", p.i_seqs, [atoms[i].name for i in p.i_seqs] dihedrals.append(list(p.i_seqs)) tmp = [] for i in p.i_seqs: if (not i in planes_all + chirals_all): tmp.append(i) dihedrals_unique.append(tmp) dihedrals_unique = list(merge_common(dihedrals_unique))[0] dihedrals_all = list(merge_common(dihedrals))[0] # print "dihedrals_unique:", dihedrals_unique # print "dihedrals_all :", dihedrals_all # Finalize pcd = dihedrals_all + chirals_all + planes_all # tmp = [] for s in all_selection: if s in pcd: continue tmp.append(s) left = tmp[:] # print "pcd :", pcd # print "left:", left # angles = [] for p in rm.geometry.angle_proxies: present = False for i in p.i_seqs: if i in left: present = True break if not present: continue # print list(p.i_seqs) angles.append(list(p.i_seqs)) angles = list(merge_common(angles)) # dihedrals_unique = [dihedrals_unique] + angles dihedrals_all = [dihedrals_all] + angles dihedrals_unique = list(merge_common(dihedrals_unique))[0] dihedrals_all = list(merge_common(dihedrals_all))[0] # print "dihedrals_unique:", dihedrals_unique # print "dihedrals_all :", dihedrals_all dihedrals_mapping = [dihedrals_all.index(u) for u in dihedrals_unique] # print "mapping:", dihedrals_mapping # check pcd = dihedrals_unique + chirals_unique + planes_all pcd.sort() assert approx_equal(pcd, all_selection) # return group_args(planes_all=planes_all, chirals_unique=chirals_unique, chirals_all=chirals_all, chirals_mapping=flex.size_t(chirals_mapping), dihedrals_unique=dihedrals_unique, dihedrals_all=dihedrals_all, dihedrals_mapping=flex.size_t(dihedrals_mapping))
def __init__(self, map_1, xray_structure, fft_map, atom_radius, hydrogen_atom_radius, model_i, number_previous_scatters, ignore_hd=False, residue_detail=True, selection=None, pdb_hierarchy=None): self.xray_structure = xray_structure self.selection = selection self.pdb_hierarchy = pdb_hierarchy self.result = [] self.map_1_size = map_1.size() self.map_1_stat = maptbx.statistics(map_1) self.atoms_with_labels = None self.residue_detail = residue_detail self.model_i = model_i if (pdb_hierarchy is not None): self.atoms_with_labels = list(pdb_hierarchy.atoms_with_labels()) scatterers = self.xray_structure.scatterers() sigma_occ = flex.double() if (self.selection is None): self.selection = flex.bool(scatterers.size(), True) real_map_unpadded = fft_map.real_map_unpadded() sites_cart = self.xray_structure.sites_cart() if not self.residue_detail: self.gifes = [ None, ] * scatterers.size() self._result = [ None, ] * scatterers.size() # atom_radii = flex.double(scatterers.size(), atom_radius) for i_seq, sc in enumerate(scatterers): if (self.selection[i_seq]): if (sc.element_symbol().strip().lower() in ["h", "d"]): atom_radii[i_seq] = hydrogen_atom_radius # for i_seq, site_cart in enumerate(sites_cart): if (self.selection[i_seq]): sel = maptbx.grid_indices_around_sites( unit_cell=self.xray_structure.unit_cell(), fft_n_real=real_map_unpadded.focus(), fft_m_real=real_map_unpadded.all(), sites_cart=flex.vec3_double([site_cart]), site_radii=flex.double([atom_radii[i_seq]])) self.gifes[i_seq] = sel m1 = map_1.select(sel) ed1 = map_1.eight_point_interpolation( scatterers[i_seq].site) sigma_occ.append(ed1) a = None if (self.atoms_with_labels is not None): a = self.atoms_with_labels[i_seq] self._result[i_seq] = group_args(atom=a, m1=m1, ed1=ed1, xyz=site_cart) self.xray_structure.set_occupancies(sigma_occ) ### For testing other residue averaging options residues = self.extract_residues( model_i=model_i, number_previous_scatters=number_previous_scatters) self.xray_structure.residue_selections = residues # Residue detail if self.residue_detail: assert self.pdb_hierarchy is not None residues = self.extract_residues( model_i=model_i, number_previous_scatters=number_previous_scatters) self.gifes = [ None, ] * len(residues) self._result = [ None, ] * len(residues) for i_seq, residue in enumerate(residues): residue_sites_cart = sites_cart.select(residue.selection) if 0: print(i_seq, list(residue.selection)) # DEBUG sel = maptbx.grid_indices_around_sites( unit_cell=self.xray_structure.unit_cell(), fft_n_real=real_map_unpadded.focus(), fft_m_real=real_map_unpadded.all(), sites_cart=residue_sites_cart, site_radii=flex.double(residue.selection.size(), atom_radius)) self.gifes[i_seq] = sel m1 = map_1.select(sel) ed1 = flex.double() for i_seq_r in residue.selection: ed1.append( map_1.eight_point_interpolation( scatterers[i_seq_r].site)) self._result[i_seq] = \ group_args(residue = residue, m1 = m1, ed1 = flex.mean(ed1), xyz=residue_sites_cart.mean(), n_atoms=residue_sites_cart.size()) residue_scatterers = scatterers.select(residue.selection) residue_ed1 = flex.double() for n, scatter in enumerate(residue_scatterers): if ignore_hd: if scatter.element_symbol() not in ['H', 'D']: residue_ed1.append(ed1[n]) else: residue_ed1.append(ed1[n]) for x in range(ed1.size()): sigma_occ.append(flex.mean(residue_ed1)) self.xray_structure.set_occupancies(sigma_occ) self.xray_structure.residue_selections = residues del map_1
def collect_residue_torsion_angles(pdb_hierarchy, atom_selection=None, chi_angles_only=False): get_class = iotbx.pdb.common_residue_names_get_class residue_torsions = [] ### chi angles ### residue_chis = collect_sidechain_chi_angles(pdb_hierarchy=pdb_hierarchy, atom_selection=atom_selection) residue_torsions = residue_chis if chi_angles_only: return residue_torsions ################## if atom_selection is not None: if (isinstance(atom_selection, flex.bool)): actual_selection = atom_selection elif (isinstance(atom_selection, flex.size_t)): actual_selection = flex.bool(pdb_hierarchy.atoms_size(), False) actual_selection.set_selected(atom_selection, True) if atom_selection is None: actual_selection = flex.bool(pdb_hierarchy.atoms_size(), True) previous_residue = None next_residue = None for model in pdb_hierarchy.models(): for chain in model.chains(): for conformer in chain.conformers(): for i_res, residue in enumerate(conformer.residues()): if (get_class(residue.resname) != "common_amino_acid"): continue if i_res < (len(conformer.residues()) - 1): next_residue = conformer.residues()[i_res + 1] else: next_residue = None torsions = [] # atoms_to_work = [prevCA, prevC, curN, curCA, curC, nextN] atoms_to_work = [None] * 6 atoms_to_work[2] = residue.find_atom_by(name=" N ") atoms_to_work[3] = residue.find_atom_by(name=" CA ") atoms_to_work[4] = residue.find_atom_by(name=" C ") if previous_residue is not None: atoms_to_work[0] = previous_residue.find_atom_by( name=" CA ") atoms_to_work[1] = previous_residue.find_atom_by( name=" C ") if next_residue is not None: atoms_to_work[5] = next_residue.find_atom_by( name=" N ") # atoms_to_work = [prevCA, prevC, curN, curCA, curC, nextN] for i in range(len(atoms_to_work)): if (atoms_to_work[i] is not None and not actual_selection[atoms_to_work[i].i_seq]): atoms_to_work[i] = None for i, name in enumerate(["omega", "phi", "psi"]): if atoms_to_work[i:i + 4].count(None) == 0: angle = mmtbx.rotamer.omega_from_atoms( atoms_to_work[i], atoms_to_work[i + 1], atoms_to_work[i + 2], atoms_to_work[i + 3]) if angle is not None: i_seqs = [ atoms_to_work[i].i_seq, atoms_to_work[i + 1].i_seq, atoms_to_work[i + 2].i_seq, atoms_to_work[i + 3].i_seq ] torsions.append( group_args(chi_id=name, i_seqs=i_seqs)) altloc = residue.atoms()[0].fetch_labels().altloc if len(torsions) > 0: residue_info = group_args(residue_name=residue.resname, chain_id=chain.id, altloc=altloc, resid=residue.resid(), chis=torsions) residue_torsions.append(residue_info) previous_residue = residue return residue_torsions
def __init__(self, model, params=None, log=sys.stdout): self.model = model self.params = params if self.params is None: self.params = validate.get_default_params().ss_validation self.log = log self.results = None ss_log = cStringIO.StringIO() try: ss_annot = self.model.get_ss_annotation(log=ss_log) except Sorry as e: print >> self.log, " Syntax error in SS: %s" % e.message return ss_log_cont = ss_log.getvalue() n_bad_helices = ss_log_cont.count("Bad HELIX") n_bad_sheets = ss_log_cont.count("Bad SHEET") pdb_h = self.model.get_hierarchy() if ss_annot is None or ss_annot.is_empty(): print >> self.log, "No SS annotation, nothing to analyze" return if n_bad_helices > 0: print >> self.log, "Number of helices with syntax error: %d" % n_bad_helices if n_bad_helices > 0: print >> self.log, "Number of sheets with syntax error: %d" % n_bad_sheets if model.get_number_of_models() != 1: raise Sorry("Multiple models not supported.") if not pdb_h.contains_protein(): print >> self.log, "Protein is not found in the model" return if pdb_h.is_ca_only(): print >> self.log, "Error: CA-only model" return if is_ca_and_something(pdb_h): print >> self.log, "CA-only and something model" return if some_chains_are_ca(pdb_h): print >> self.log, "some chains are CA-only" return n_total_helix_sheet_records = ss_annot.get_n_helices( ) + ss_annot.get_n_sheets() n_bad_helix_sheet_records = 0 # Empty stuff: empty_annots = ss_annot.remove_empty_annotations(pdb_h) number_of_empty_helices = empty_annots.get_n_helices() number_of_empty_sheets = empty_annots.get_n_sheets() n_bad_helix_sheet_records += (number_of_empty_helices + number_of_empty_sheets) if number_of_empty_helices > 0: print >> self.log, "Helices without corresponding atoms in the model (%d):" % number_of_empty_helices for h in empty_annots.helices: print >> self.log, " ", h.as_pdb_str() if number_of_empty_sheets > 0: print >> self.log, "Sheets without corresponding atoms in the model (%d):" % number_of_empty_sheets for sh in empty_annots.sheets: print >> self.log, " ", sh.as_pdb_str() print >> self.log, "Checking annotations thoroughly, use nproc=<number> if it is too slow..." hsh_tuples = [] for h in ss_annot.helices: hsh_tuples.append(([h], [])) for sh in ss_annot.sheets: hsh_tuples.append(([], [sh])) calc_ss_stats = gather_ss_stats( pdb_h, mediocre_hbond_cutoff=self.params.mediocre_hbond_cutoff, bad_hbond_cutoff=self.params.bad_hbond_cutoff) results = [] if len(hsh_tuples) > 0: results = easy_mp.pool_map(processes=self.params.nproc, fixed_func=calc_ss_stats, args=hsh_tuples) cumm_n_hbonds = 0 cumm_n_bad_hbonds = 0 cumm_n_mediocre_hbonds = 0 cumm_n_rama_out = 0 cumm_n_wrong_reg = 0 n_elem_with_wrong_rama = 0 n_elem_with_rama_out = 0 n_elem_with_bad_hbond = 0 # # Hydrogen Bonds in Proteins: Role and Strength # Roderick E Hubbard, Muhammad Kamran Haider # ENCYCLOPEDIA OF LIFE SCIENCES & 2010, John Wiley & Sons, Ltd. www.els.net # # See also: http://proteopedia.org/wiki/index.php/Hydrogen_bonds # for ss_elem, r in zip(ss_annot.helices + ss_annot.sheets, results): if r is not None: n_hbonds, n_bad_hbonds, n_mediocre_hbonds, hb_lens, n_outliers, n_wrong_region = r cumm_n_hbonds += n_hbonds cumm_n_bad_hbonds += n_bad_hbonds cumm_n_mediocre_hbonds += n_mediocre_hbonds cumm_n_rama_out += n_outliers cumm_n_wrong_reg += n_wrong_region if n_wrong_region > 0: n_elem_with_wrong_rama += 1 if n_outliers > 0: n_elem_with_rama_out += 1 if n_bad_hbonds > 0: n_elem_with_bad_hbond += 1 if n_bad_hbonds + n_outliers + n_wrong_region > 0: n_bad_helix_sheet_records += 1 if n_bad_hbonds + n_mediocre_hbonds + n_outliers + n_wrong_region > 0: # this is bad annotation, printing it to log with separate stats: print >> self.log, "Bad annotation found:" print >> self.log, "%s" % ss_elem.as_pdb_str() print >> self.log, " Total hb: %d, mediocre: %d, bad: %d, Rama outliers: %d, Rama wrong %d" % ( n_hbonds, n_mediocre_hbonds, n_bad_hbonds, n_outliers, n_wrong_region) print >> self.log, "-" * 80 # n1 = percentage of bad SS elements (per given model); # bad here means: n_bad_hbonds + n_outliers + n_wrong_region > 0 n1 = safe_div(n_bad_helix_sheet_records, n_total_helix_sheet_records) * 100. # n2 = percentage of SS elements that have at least one residue belonging to a wrong region of Ramachandran plot (per given model); n2 = safe_div(n_elem_with_wrong_rama, n_total_helix_sheet_records) * 100. # n3 = percentage of SS elements that have at least one residue being a Ramachandran plot outlier (per given model); n3 = safe_div(n_elem_with_rama_out, n_total_helix_sheet_records) * 100. # n4 = percentage of bad H bonds (per given model). n4 = safe_div(cumm_n_bad_hbonds, cumm_n_hbonds) * 100. # No per SS element separation # percentage of SS elements that have at least one bad H bond (per given model) n5 = safe_div(n_elem_with_bad_hbond, n_total_helix_sheet_records) * 100. print >> self.log, "Overall info:" print >> self.log, " Total HELIX+SHEET recods :", n_total_helix_sheet_records print >> self.log, " Total bad HELIX+SHEET recods :", n_bad_helix_sheet_records print >> self.log, " Total declared H-bonds :", cumm_n_hbonds print >> self.log, " Total mediocre H-bonds (%.1f-%.1fA):" % ( self.params.mediocre_hbond_cutoff, self.params.bad_hbond_cutoff), \ cumm_n_mediocre_hbonds print >> self.log, " Total bad H-bonds (>%.1fA) :" % self.params.bad_hbond_cutoff, \ cumm_n_bad_hbonds print >> self.log, " Total Ramachandran outliers :", cumm_n_rama_out print >> self.log, " Total wrong Ramachandrans :", cumm_n_wrong_reg print >> self.log, "All done." help_string = """\ Total bad HELIX+SHEET recods does not include records with syntax mistakes (they are outputted separately in the beginning of the log), but includes empty records (without corresponding atoms in the model) and records with any deviations in geometry (bad/mediocre bonds, Ramachandran angles are outliers or wrong). Ramachandran outliers - residues in disallowed region of Ramachandran plot. Wrong Ramachandrans - residues in favored and allowed regions of Ramachandran plot, but don't belong to region of annotated secondary structure element. For example, residue annotated as HELIX has phi-psi angles in beta-strand region and vice versa. """ print >> self.log, help_string if self.params.filter_annotation: filtered_ann = ss_annot.filter_annotation(hierarchy=pdb_h) print >> self.log, "Filtered annotation:" print >> self.log, filtered_ann.as_pdb_str() self.results = group_args( n_total_helix_sheet_records=n_total_helix_sheet_records, n_bad_helix_sheet_records=n_bad_helix_sheet_records, n_hbonds=cumm_n_hbonds, n_mediocre_hbonds=cumm_n_mediocre_hbonds, n_bad_hbonds=cumm_n_bad_hbonds, n_rama_out=cumm_n_rama_out, n_wrong_reg=cumm_n_wrong_reg, n1=n1, n2=n2, n3=n3, n4=n4, n5=n5, # Number of helices with syntax error. Specifically, those producing # ValueError on converting the field to a number. n_bad_helices=n_bad_helices, n_bad_sheets=n_bad_sheets)
def nonbonded(self): mi,ma,me,n = 0,0,0,0 if(self.from_restraints is not None): mi,ma,me = self.from_restraints.nonbonded_deviations() n = self.from_restraints.n_nonbonded_proxies return group_args(min = mi, max = ma, mean = me, n = n)
def count_hd_atoms(self): count_h, count_d, n_water = self.get_overall_counts() get_class = common_residue_names_get_class count_hd_atoms_protein = 0 count_h_protein, count_d_protein = 0, 0 count_h_water, count_d_water = 0, 0 count_water = 0 count_water_0h, count_water_1h, count_water_2h = 0, 0, 0 count_water_more_h = 0 count_water_altconf = 0 count_water_no_oxygen = 0 hd_atoms_with_occ_0 = [] single_hd_atoms_occ_lt_1 = [] for residue_group in self.pdb_hierarchy.residue_groups(): for resname in residue_group.unique_resnames(): if (get_class(name=resname) == 'common_water'): count_water +=1 count_hd_in_rg, count_o_in_rg = 0, 0 for atom in residue_group.atoms(): is_alt_conf = False # XXX No break down for water in alt conf for now if (atom.parent().altloc != ''): count_water_altconf +=1 is_alt_conf = True break else: if (atom.element_is_hydrogen()): count_hd_in_rg += 1 elif (atom.element.strip().upper() == 'O'): count_o_in_rg += 1 if not is_alt_conf: if count_hd_in_rg == 1 and count_o_in_rg == 1: count_water_1h += 1 elif count_hd_in_rg == 2 and count_o_in_rg == 1: count_water_2h += 1 elif count_hd_in_rg == 0 and count_o_in_rg == 1: count_water_0h += 1 elif count_o_in_rg == 0: count_water_no_oxygen += 1 elif count_hd_in_rg > 2: count_water_more_h += 1 for atom in residue_group.atoms(): resname = atom.parent().resname if (get_class(name=resname) in protein): if (not atom.element_is_hydrogen()): continue count_hd_atoms_protein += 1 if (atom.occ == 0): hd_atoms_with_occ_0.append((atom.id_str(), atom.xyz)) if (atom.occ <1 and atom.occ > 0 and atom.parent().altloc == ''): single_hd_atoms_occ_lt_1.append( (atom.id_str(), atom.occ, atom.xyz)) if (is_hydrogen(atom)): count_h_protein += 1 elif (is_deuterium(atom)): count_d_protein += 1 elif (get_class(name=resname) == 'common_water'): if (is_hydrogen(atom)): count_h_water += 1 elif (is_deuterium(atom)): count_d_water += 1 assert (count_hd_atoms_protein == count_h_protein + count_d_protein) assert (count_water_1h + count_water_2h + count_water_0h + \ count_water_altconf + count_water_no_oxygen + count_water_more_h == count_water) assert (count_water == n_water) count_h_other = count_h - count_h_protein - count_h_water count_d_other = count_d - count_d_protein - count_d_water self.overall_counts_hd = group_args( count_h = count_h, count_d = count_d, count_h_protein = count_h_protein, count_d_protein = count_d_protein, count_h_water = count_h_water, count_d_water = count_d_water, count_h_other = count_h_other, count_d_other = count_d_other, count_water = count_water, count_water_0h = count_water_0h, count_water_1h = count_water_1h, count_water_2h = count_water_2h, count_water_altconf = count_water_altconf, count_water_no_oxygen = count_water_no_oxygen, hd_atoms_with_occ_0 = hd_atoms_with_occ_0, single_hd_atoms_occ_lt_1 = single_hd_atoms_occ_lt_1 )
def init_result(): return group_args(k_mask_bin_orig=None, k_mask_bin_smooth=None, k_mask=None, k_isotropic=None, k_mask_fit_params=None)
def run_once(directory): from dxtbx.serialize import load sweep_dir = os.path.basename(directory) print(sweep_dir) datablock_name = os.path.join(directory, "datablock.json") if not os.path.exists(datablock_name): # this is what xia2 calls it: datablock_name = os.path.join(directory, "datablock_import.json") strong_spots_name = os.path.join(directory, "strong.pickle") experiments_name = os.path.join(directory, "experiments.json") indexed_spots_name = os.path.join(directory, "indexed.pickle") unindexed_spots_name = os.path.join(directory, "unindexed.pickle") if not (os.path.exists(datablock_name) and os.path.exists(strong_spots_name)): return datablock = load.datablock(datablock_name) assert len(datablock) == 1 if len(datablock[0].extract_sweeps()) == 0: print("Skipping %s" % directory) return sweep = datablock[0].extract_sweeps()[0] template = sweep.get_template() strong_spots = easy_pickle.load(strong_spots_name) n_strong_spots = len(strong_spots) if os.path.exists(experiments_name): experiments = load.experiment_list(experiments_name) n_indexed_lattices = len(experiments) else: experiments = None n_indexed_lattices = 0 g = glob.glob(os.path.join(directory, "xds*", "run_2", "INTEGRATE.HKL")) n_integrated_lattices = len(g) if os.path.exists(indexed_spots_name): indexed_spots = easy_pickle.load(indexed_spots_name) else: indexed_spots = None g = glob.glob(os.path.join(directory, "indexed_*.pickle")) if len(g): for path in g: if indexed_spots is None: indexed_spots = easy_pickle.load(path) else: indexed_spots.extend(easy_pickle.load(path)) if os.path.exists(unindexed_spots_name): unindexed_spots = easy_pickle.load(unindexed_spots_name) n_unindexed_spots = len(unindexed_spots) else: n_unindexed_spots = 0 # calculate estimated d_min for sweep based on 95th percentile from dials.algorithms.indexing import indexer detector = sweep.get_detector() scan = sweep.get_scan() beam = sweep.get_beam() goniometer = sweep.get_goniometer() if len(strong_spots) == 0: d_strong_spots_99th_percentile = 0 d_strong_spots_95th_percentile = 0 d_strong_spots_50th_percentile = 0 n_strong_spots_dmin_4 = 0 else: spots_mm = indexer.Indexer.map_spots_pixel_to_mm_rad( strong_spots, detector, scan) indexer.Indexer.map_centroids_to_reciprocal_space( spots_mm, detector, beam, goniometer) d_spacings = 1 / spots_mm["rlp"].norms() perm = flex.sort_permutation(d_spacings, reverse=True) d_spacings_sorted = d_spacings.select(perm) percentile_99th = int(math.floor(0.99 * len(d_spacings))) percentile_95th = int(math.floor(0.95 * len(d_spacings))) percentile_50th = int(math.floor(0.5 * len(d_spacings))) d_strong_spots_99th_percentile = d_spacings_sorted[percentile_99th] d_strong_spots_95th_percentile = d_spacings_sorted[percentile_95th] d_strong_spots_50th_percentile = d_spacings_sorted[percentile_50th] n_strong_spots_dmin_4 = (d_spacings >= 4).count(True) cell_params = flex.sym_mat3_double() n_indexed = flex.double() d_min_indexed = flex.double() rmsds = flex.vec3_double() sweep_dir_cryst = flex.std_string() if experiments is not None: for i, experiment in enumerate(experiments): sweep_dir_cryst.append(sweep_dir) crystal_model = experiment.crystal unit_cell = crystal_model.get_unit_cell() space_group = crystal_model.get_space_group() crystal_symmetry = crystal.symmetry(unit_cell=unit_cell, space_group=space_group) cb_op_reference_setting = ( crystal_symmetry.change_of_basis_op_to_reference_setting()) crystal_symmetry_reference_setting = crystal_symmetry.change_basis( cb_op_reference_setting) cell_params.append( crystal_symmetry_reference_setting.unit_cell().parameters()) spots_mm = indexed_spots.select(indexed_spots["id"] == i) n_indexed.append(len(spots_mm)) if len(spots_mm) == 0: d_min_indexed.append(0) else: indexer.Indexer.map_centroids_to_reciprocal_space( spots_mm, detector, beam, goniometer) d_spacings = 1 / spots_mm["rlp"].norms() perm = flex.sort_permutation(d_spacings, reverse=True) d_min_indexed.append(d_spacings[perm[-1]]) try: rmsds.append(get_rmsds_obs_pred(spots_mm, experiment)) except Exception as e: print(e) rmsds.append((-1, -1, -1)) continue return group_args( sweep_dir=sweep_dir, template=template, n_strong_spots=n_strong_spots, n_strong_spots_dmin_4=n_strong_spots_dmin_4, n_unindexed_spots=n_unindexed_spots, n_indexed_lattices=n_indexed_lattices, n_integrated_lattices=n_integrated_lattices, d_strong_spots_50th_percentile=d_strong_spots_50th_percentile, d_strong_spots_95th_percentile=d_strong_spots_95th_percentile, d_strong_spots_99th_percentile=d_strong_spots_99th_percentile, cell_params=cell_params, n_indexed=n_indexed, d_min_indexed=d_min_indexed, rmsds=rmsds, sweep_dir_cryst=sweep_dir_cryst, )
def get_results(self): return group_args(output_file=self.output_file, cif_model=self.cif_model)