def skewness_calculation(space_group_info, n_test_points=10, n_sites=20, d_min=3, volume_per_atom=200): structure = random_structure.xray_structure( space_group_info=space_group_info, elements=["Se"]*n_sites, volume_per_atom=volume_per_atom, random_u_iso=True) structure.show_summary() print f_calc = structure.structure_factors( d_min=d_min, anomalous_flag=False).f_calc() f_calc.show_summary() print for i_fudge_factor in xrange(n_test_points+1): fudge_factor = i_fudge_factor/float(n_test_points) randomized_f_calc = randomize_phases(f_calc, fudge_factor) mwpe = f_calc.mean_weighted_phase_error(randomized_f_calc) rho = randomized_f_calc.fft_map().real_map_unpadded() # <(rho-rho_bar)**3>/<(rho-rho_bar)**2>**3/2 rho_rho_bar = rho - flex.mean(rho) num = flex.mean(flex.pow(rho_rho_bar, 3)) den = flex.mean(flex.pow(rho_rho_bar, 2))**(3/2.) assert den != 0 skewness = num / den print "fudge factor, phase difference, map skewness:", print "%4.2f, %5.2f, %.4g" % (fudge_factor, mwpe, skewness) print
def exercise(): if (not libtbx.env.has_module("mmtbx")): print "Skipping exercise(): mmtbx module not available" return if (libtbx.env.find_in_repositories(relative_path="chem_data") is None): print "Skipping exercise(): chem_data directory not available" return from mmtbx.monomer_library import pdb_interpretation file_name = "phe_tst_adp_aniso_restraints.pdb" open(file_name, "w").write(phe_pdb) out = StringIO() processed_pdb_file = pdb_interpretation.run( args = [file_name], strict_conflict_handling = False, log = out) geo = processed_pdb_file.geometry_restraints_manager() xray_structure = processed_pdb_file.xray_structure() xray_structure.scatterers().flags_set_grads(state=False) xray_structure.scatterers().flags_set_grad_u_iso( iselection=xray_structure.use_u_iso().iselection()) xray_structure.scatterers().flags_set_grad_u_aniso( iselection=xray_structure.use_u_aniso().iselection()) adp_rm = cctbx.adp_restraints.adp_aniso_restraints( xray_structure = xray_structure, restraints_manager = geo, use_hd = False) assert approx_equal(flex.mean(adp_rm.gradients_iso), 0.713756592583) assert approx_equal(flex.mean(adp_rm.gradients_aniso_cart.as_double()), -0.118959432097) assert approx_equal(adp_rm.target, 8.97112989232) fd(xray_structure = xray_structure, restraints_manager = geo, eps=1.e-4)
def calculate_solvent_mask(self): # calculate mask lsd = local_standard_deviation_map( self.map_coeffs, self.radius, mean_solvent_density=self.mean_solvent_density, symmetry_flags=maptbx.use_space_group_symmetry, resolution_factor=self.params.grid_resolution_factor, method=2) self.rms_map = lsd.map self.mask = lsd.mask(self.params.solvent_fraction) # setup solvent/protein selections self.solvent_selection = (self.mask == 1) self.protein_selection = (self.mask == 0) self.solvent_iselection = self.solvent_selection.iselection() self.protein_iselection = self.protein_selection.iselection() self.n_solvent_grid_points = self.mask.count(1) self.n_protein_grid_points = self.mask.count(0) # map statistics self.mean_protein_density = self.mean_protein_density_start = flex.mean( self.map.select(self.protein_iselection)) self.mean_solvent_density = self.mean_solvent_density_start = flex.mean( self.map.select(self.solvent_iselection)) self.mask_percent = self.n_solvent_grid_points/(self.mask.size()) * 100 self.f000_over_v = (( (1/self.params.protein_solvent_ratio) * self.mean_protein_density) - self.mean_solvent_density) \ * (self.params.protein_solvent_ratio/(self.params.protein_solvent_ratio-1)) self.rms_protein_density = rms(self.map.select(self.protein_iselection)) self.rms_solvent_density = rms(self.map.select(self.solvent_iselection)) self.standard_deviation_local_rms = flex.mean_and_variance( lsd.map.as_1d()).unweighted_sample_standard_deviation()
def exercise_f_model_no_scales(symbol = "C 2"): random.seed(0) flex.set_random_seed(0) x = random_structure.xray_structure( space_group_info = sgtbx.space_group_info(symbol=symbol), elements =(("O","N","C")*5+("H",)*10), volume_per_atom = 200, min_distance = 1.5, general_positions_only = True, random_u_iso = True, random_occupancy = False) f_obs = abs(x.structure_factors(d_min = 1.5, algorithm="fft").f_calc()) x.shake_sites_in_place(mean_distance=1) k_iso = flex.double(f_obs.data().size(), 2) k_aniso = flex.double(f_obs.data().size(), 3) fmodel = mmtbx.f_model.manager( xray_structure = x, k_isotropic = k_iso, k_anisotropic = k_aniso, f_obs = f_obs) fc = abs(fmodel.f_calc()).data() fm = abs(fmodel.f_model()).data() fmns = abs(fmodel.f_model_no_scales()).data() assert approx_equal(flex.mean(fm/fc), 6) assert approx_equal(flex.mean(fmns/fc), 1)
def exercise_SFweight_spline_core(structure, d_min, verbose=0): structure.scattering_type_registry(d_min=d_min) f_obs = abs(structure.structure_factors( d_min=d_min, anomalous_flag=False).f_calc()) if (0 or verbose): f_obs.show_summary() f_obs = miller.array( miller_set=f_obs, data=f_obs.data(), sigmas=flex.sqrt(f_obs.data())) partial_structure = xray.structure( crystal_symmetry=structure, scatterers=structure.scatterers()[:-2]) f_calc = f_obs.structure_factors_from_scatterers( xray_structure=partial_structure).f_calc() test_set_flags = (flex.random_double(size=f_obs.indices().size()) < 0.1) sfweight = clipper.SFweight_spline_interface( unit_cell=f_obs.unit_cell(), space_group=f_obs.space_group(), miller_indices=f_obs.indices(), anomalous_flag=f_obs.anomalous_flag(), f_obs_data=f_obs.data(), f_obs_sigmas=f_obs.sigmas(), f_calc=f_calc.data(), test_set_flags=test_set_flags, n_refln=f_obs.indices().size()//10, n_param=20) if (0 or verbose): print "number_of_spline_parameters:",sfweight.number_of_spline_parameters() print "mean fb: %.8g" % flex.mean(flex.abs(sfweight.fb())) print "mean fd: %.8g" % flex.mean(flex.abs(sfweight.fd())) print "mean phi: %.8g" % flex.mean(sfweight.centroid_phases()) print "mean fom: %.8g" % flex.mean(sfweight.figures_of_merit()) return sfweight
def set_chunk_stats(chunk, stats, stat_choice, n_residues=None, ref_cell=None, space_group=None, d_min=None, ref_data=None): if "reslimit" in stat_choice: stats["reslimit"].append(chunk.res_lim) else: stats["reslimit"].append(float("nan")) if "pr" in stat_choice: stats["pr"].append(chunk.profile_radius) else: stats["pr"].append(float("nan")) stats["ccref"].append(float("nan")) if set(["ioversigma","resnatsnr1","ccref"]).intersection(stat_choice): iobs = chunk.data_array(space_group, False) iobs = iobs.select(iobs.sigmas()>0).merge_equivalents(use_internal_variance=False).array() binner = iobs.setup_binner(auto_binning=True) if "resnatsnr1" in stat_choice: res = float("nan") for i_bin in binner.range_used(): sel = binner.selection(i_bin) tmp = iobs.select(sel) if tmp.size() == 0: continue sn = flex.mean(tmp.data()/tmp.sigmas()) if sn <= 1: res = binner.bin_d_range(i_bin)[1] break stats["resnatsnr1"].append(res) else: stats["resnatsnr1"].append(float("nan")) if d_min: iobs = iobs.resolution_filter(d_min=d_min) if "ccref" in stat_choice: corr = iobs.correlation(ref_data, assert_is_similar_symmetry=False) if corr.is_well_defined(): stats["ccref"][-1] = corr.coefficient() if "ioversigma" in stat_choice: stats["ioversigma"].append(flex.mean(iobs.data()/iobs.sigmas())) else: stats["ioversigma"].append(float("nan")) else: stats["ioversigma"].append(float("nan")) stats["resnatsnr1"].append(float("nan")) if "abdist" in stat_choice: from cctbx.uctbx.determine_unit_cell import NCDist G6a, G6b = make_G6(ref_cell), make_G6(chunk.cell) abdist = NCDist(G6a, G6b) stats["abdist"].append(abdist) else: stats["abdist"].append(float("nan")) if "wilsonb" in stat_choice: iso_scale_and_b = ml_iso_absolute_scaling(iobs, n_residues, 0) stats["wilsonb"].append(iso_scale_and_b.b_wilson) else: stats["wilsonb"].append(float("nan"))
def print_table(self): from libtbx import table_utils from libtbx.str_utils import format_value table_header = ["Tile","Dist","Nobs","aRmsd","Rmsd","delx","dely","disp","rotdeg","Rsigma","Tsigma"] table_data = [] table_data.append(table_header) sort_radii = flex.sort_permutation(flex.double(self.radii)) tile_rmsds = flex.double() radial_sigmas = flex.double(len(self.tiles) // 4) tangen_sigmas = flex.double(len(self.tiles) // 4) for idx in range(len(self.tiles) // 4): x = sort_radii[idx] if self.tilecounts[x] < 3: wtaveg = 0.0 radial = (0,0) tangential = (0,0) rmean,tmean,rsigma,tsigma=(0,0,1,1) else: wtaveg = self.weighted_average_angle_deg_from_tile(x) radial,tangential,rmean,tmean,rsigma,tsigma = get_radial_tangential_vectors(self,x) radial_sigmas[x]=rsigma tangen_sigmas[x]=tsigma table_data.append( [ format_value("%3d", x), format_value("%7.2f", self.radii[x]), format_value("%6d", self.tilecounts[x]), format_value("%5.2f", self.asymmetric_tile_rmsd[x]), format_value("%5.2f", self.tile_rmsd[x]), format_value("%5.2f", self.mean_cv[x][0]), format_value("%5.2f", self.mean_cv[x][1]), format_value("%5.2f", matrix.col(self.mean_cv[x]).length()), format_value("%6.2f", wtaveg), format_value("%6.2f", rsigma), format_value("%6.2f", tsigma), ]) table_data.append([""]*len(table_header)) rstats = flex.mean_and_variance(radial_sigmas,self.tilecounts.as_double()) tstats = flex.mean_and_variance(tangen_sigmas,self.tilecounts.as_double()) table_data.append( [ format_value("%3s", "ALL"), format_value("%s", ""), format_value("%6d", self.overall_N), format_value("%5.2f", math.sqrt(flex.mean(self.delrsq))), format_value("%5.2f", self.overall_rmsd), format_value("%5.2f", self.overall_cv[0]), format_value("%5.2f", self.overall_cv[1]), format_value("%5.2f", flex.mean(flex.double([matrix.col(cv).length() for cv in self.mean_cv]))), format_value("%s", ""), format_value("%6.2f", rstats.mean()), format_value("%6.2f", tstats.mean()), ]) print print table_utils.format(table_data,has_header=1,justify='center',delim=" ")
def get_rmsds_obs_pred(self, observations, experiment): reflections = observations.select(observations.get_flags( observations.flags.used_in_refinement)) assert len(reflections) > 0 obs_x, obs_y, obs_z = reflections['xyzobs.mm.value'].parts() calc_x, calc_y, calc_z = reflections['xyzcal.mm'].parts() rmsd_x = flex.mean(flex.pow2(obs_x-calc_x))**0.5 rmsd_y = flex.mean(flex.pow2(obs_y-calc_y))**0.5 rmsd_z = flex.mean(flex.pow2(obs_z-calc_z))**0.5 return (rmsd_x, rmsd_y, rmsd_z)
def prepare_simulation_with_noise(sim, transmittance, apply_noise, ordered_intensities=None, half_data_flag = 0): result = intensity_data() result.frame = sim["frame_lookup"] result.miller= sim['miller_lookup'] raw_obs_no_noise = transmittance * sim['observed_intensity'] if apply_noise: import scitbx.random from scitbx.random import variate, normal_distribution # bernoulli_distribution, gamma_distribution, poisson_distribution scitbx.random.set_random_seed(321) g = variate(normal_distribution()) noise = flex.sqrt(raw_obs_no_noise) * g(len(raw_obs_no_noise)) # adds in Gauss noise to signal else: noise = flex.double(len(raw_obs_no_noise),0.) raw_obs = raw_obs_no_noise + noise if half_data_flag in [1,2]: # apply selection after random numbers have been applied half_data_selection = (sim["frame_lookup"]%2)==(half_data_flag%2) result.frame = sim["frame_lookup"].select(half_data_selection) result.miller = sim['miller_lookup'].select(half_data_selection) raw_obs = raw_obs.select(half_data_selection) mean_signal = flex.mean(raw_obs) sigma_obs = flex.sqrt(flex.abs(raw_obs)) mean_sigma = flex.mean(sigma_obs) print "<I> / <sigma>", (mean_signal/ mean_sigma) scale_factor = mean_signal/10. print "Mean signal is",mean_signal,"Applying a constant scale factor of ",scale_factor #most important line; puts input data on a numerically reasonable scale result.raw_obs = raw_obs / scale_factor scaled_sigma = sigma_obs / scale_factor result.exp_var = scaled_sigma * scaled_sigma #ordered intensities gets us the unit cell & miller indices to # gain a static array of (sin theta over lambda)**2 if ordered_intensities is not None: uc = ordered_intensities.unit_cell() stol_sq = flex.double() for i in xrange(len(result.miller)): this_hkl = ordered_intensities.indices()[result.miller[i]] stol_sq_item = uc.stol_sq(this_hkl) stol_sq.append(stol_sq_item) result.stol_sq = stol_sq return result
def prepare_observations_for_scaling(work_params,obs,reference_intensities=None, half_data_flag = 0,files = None): result = intensity_data() result.frame = obs["frame_lookup"] result.miller= obs['miller_lookup'] result.origHKL = flex.miller_index(obs["original_H"],obs["original_K"],obs["original_L"]) raw_obs = obs["observed_intensity"] sigma_obs = obs["observed_sigI"] if half_data_flag in [1,2]: # apply selection after random numbers have been applied if files==None: half_data_selection = (obs["frame_lookup"]%2)==(half_data_flag%2) else: # if file names are available, base half data selection on the last digit in filename. extension = work_params.filename_extension frame_selection = flex.bool([ (half_data_flag==1 and (int(item.split("."+extension)[0][-1])%2==1)) or \ (half_data_flag==2 and (int(item.split("."+extension)[0][-1])%2==0)) for item in files]) half_data_selection = frame_selection.select(obs["frame_lookup"]) result.frame = obs["frame_lookup"].select(half_data_selection) result.miller = obs['miller_lookup'].select(half_data_selection) result.origHKL = result.origHKL.select(half_data_selection) raw_obs = raw_obs.select(half_data_selection) sigma_obs = sigma_obs.select(half_data_selection) mean_signal = flex.mean(raw_obs) mean_sigma = flex.mean(sigma_obs) print "<I> / <sigma>", (mean_signal/ mean_sigma) scale_factor = mean_signal/10. print "Mean signal is",mean_signal,"Applying a constant scale factor of ",scale_factor SDFAC_FROM_CHISQ = work_params.levmar.sdfac_value #most important line; puts input data on a numerically reasonable scale # XXX result.raw_obs = raw_obs / scale_factor scaled_sigma = SDFAC_FROM_CHISQ * sigma_obs / scale_factor result.exp_var = scaled_sigma * scaled_sigma #reference intensities gets us the unit cell & miller indices to # gain a static array of (sin theta over lambda)**2 if reference_intensities is not None: uc = reference_intensities.unit_cell() stol_sq = flex.double() for i in xrange(len(result.miller)): this_hkl = reference_intensities.indices()[result.miller[i]] stol_sq_item = uc.stol_sq(this_hkl) stol_sq.append(stol_sq_item) result.stol_sq = stol_sq return result
def get_phase_scores(miller_arrays): result = [] for miller_array in miller_arrays: score = 0 if ( miller_array.is_complex_array() or miller_array.is_hendrickson_lattman_array()): score = 4 elif (miller_array.is_real_array()): if (miller_array.is_xray_reconstructed_amplitude_array()): pass elif (miller_array.is_xray_amplitude_array()): pass elif (miller_array.is_xray_intensity_array()): pass elif (miller_array.data().size() == 0): pass else: m = flex.mean(flex.abs(miller_array.data())) if (m < 5): score = 2 elif (m < 500): score = 3 else: score = 1 result.append(score) return result
def quick_test(file_name): from libtbx.utils import user_plus_sys_time t = user_plus_sys_time() s = reader(file_name) print "Time read:", t.delta() s.show_summary() print tuple(s.original_indices[:3]) print tuple(s.unique_indices[:3]) print tuple(s.batch_numbers[:3]) print tuple(s.centric_tags[:3]) print tuple(s.spindle_flags[:3]) print tuple(s.asymmetric_unit_indices[:3]) print tuple(s.i_obs[:3]) print tuple(s.sigmas[:3]) print tuple(s.original_indices[-3:]) print tuple(s.unique_indices[-3:]) print tuple(s.batch_numbers[-3:]) print tuple(s.centric_tags[-3:]) print tuple(s.spindle_flags[-3:]) print tuple(s.asymmetric_unit_indices[-3:]) print tuple(s.i_obs[-3:]) print tuple(s.sigmas[-3:]) m = s.as_miller_array(merge_equivalents=False).merge_equivalents() print "min redundancies:", flex.min(m.redundancies().data()) print "max redundancies:", flex.max(m.redundancies().data()) print "mean redundancies:", flex.mean(m.redundancies().data().as_double()) s.as_miller_arrays()[0].show_summary() print
def unit_cell_bases_mean_square_difference(self, other): diff_sqs = flex.double() for basis_vector in [(1,0,0),(0,1,0),(0,0,1)]: self_v = matrix.col(self.orthogonalize(basis_vector)) other_v = matrix.col(other.orthogonalize(basis_vector)) diff_sqs.append((self_v - other_v).norm_sq()) return flex.mean(diff_sqs)
def dump_R_in_bins(obs, calc, scale_B=True, log_out=sys.stdout, n_bins=20): #obs, calc = obs.common_sets(calc, assert_is_similar_symmetry=False) if scale_B: scale, B = kBdecider(obs, calc).run() d_star_sq = calc.d_star_sq().data() calc = calc.customized_copy(data = scale * flex.exp(-B*d_star_sq) * calc.data()) binner = obs.setup_binner(n_bins=n_bins) count=0 log_out.write("dmax - dmin: R (nref) <I1> <I2> scale\n") for i_bin in binner.range_used(): tmp_obs = obs.select(binner.bin_indices() == i_bin) tmp_calc = calc.select(binner.bin_indices() == i_bin) low = binner.bin_d_range(i_bin)[0] high = binner.bin_d_range(i_bin)[1] if scale_B: scale = 1. else: scale = flex.sum(tmp_obs.data()*tmp_calc.data()) / flex.sum(flex.pow2(tmp_calc.data())) R = flex.sum(flex.abs(tmp_obs.data() - scale*tmp_calc.data())) / flex.sum(0.5 * tmp_obs.data() + 0.5 * scale*tmp_calc.data()) log_out.write("%5.2f - %5.2f: %.5f (%d) %.1f %.1f %.3e\n" % (low, high, R, len(tmp_obs.data()), flex.mean(tmp_obs.data()), flex.mean(tmp_calc.data()), scale)) log_out.write("Overall R = %.5f (scale=%.3e, %%comp=%.3f)\n\n" % (calc_R(obs, calc, do_scale=not scale_B) + (obs.completeness()*100.,)) )
def amplitude_quasi_normalisations(ma, d_star_power=1, set_to_minimum=None): epsilons = ma.epsilons().data().as_double() mean_f_sq_over_epsilon = flex.double() for i_bin in ma.binner().range_used(): sel = ma.binner().selection(i_bin) #sel_f_sq = flex.pow2(ma.data().select(sel)) sel_f_sq = ma.data().select(sel) if (sel_f_sq.size() > 0): sel_epsilons = epsilons.select(sel) sel_f_sq_over_epsilon = sel_f_sq / sel_epsilons mean_f_sq_over_epsilon.append(flex.mean(sel_f_sq_over_epsilon)) else: mean_f_sq_over_epsilon.append(0) mean_f_sq_over_epsilon_interp = ma.binner().interpolate( mean_f_sq_over_epsilon, d_star_power) if set_to_minimum and not mean_f_sq_over_epsilon_interp.all_gt(0): # HACK NO REASON THIS SHOULD WORK sel = (mean_f_sq_over_epsilon_interp <= set_to_minimum) mean_f_sq_over_epsilon_interp.set_selected(sel,-mean_f_sq_over_epsilon_interp) sel = (mean_f_sq_over_epsilon_interp <= set_to_minimum) mean_f_sq_over_epsilon_interp.set_selected(sel,set_to_minimum) assert mean_f_sq_over_epsilon_interp.all_gt(0) from cctbx.miller import array #return array(ma, flex.sqrt(mean_f_sq_over_epsilon_interp)) return array(ma, mean_f_sq_over_epsilon_interp)
def target_and_gradients(self, xray_structure, to_compute_weight=False): if(to_compute_weight): xrs = xray_structure.deep_copy_scatterers() # This may be useful to explore: #xrs.shake_adp_if_all_equal(b_iso_tolerance = 1.e-3) #xrs.shake_adp(spread=10, keep_anisotropic= False) else: xrs = xray_structure if(self.refine_adp): params = xrs.extract_u_iso_or_u_equiv() if(self.refine_occ): params = xrs.scatterers().extract_occupancies() if(to_compute_weight): pmin = flex.min(params) pmax = flex.max(params) if(abs(pmin-pmax)/abs(pmin+pmax)*2*100<1.e-3): pmean = flex.mean(params) n_par = params.size() params = flex.double() for i in xrange(n_par): params.append(pmean + 0.1 * pmean * random.choice([-1,0,1])) return crystal.adp_iso_local_sphere_restraints_energies( pair_sym_table = self.pair_sym_table, orthogonalization_matrix = self.orthogonalization_matrix, sites_frac = self.sites_frac, u_isos = params, selection = self.selection, use_u_iso = self.selection, grad_u_iso = self.selection, sphere_radius = self.sphere_radius, distance_power = 2, average_power = 1, min_u_sum = 1.e-6, compute_gradients = True, collect = False)
def create_da_xray_structures(xray_structure, params): def grid(sphere, gap, overlap): c = flex.double(sphere.center) x_start, y_start, z_start = c - float(sphere.radius) x_end, y_end, z_end = c + float(sphere.radius) x_range = frange(c[0], c[0]+gap, overlap) y_range = frange(c[1], c[1]+gap, overlap) z_range = frange(c[2], c[2]+gap, overlap) return group_args(x_range = x_range, y_range = y_range, z_range = z_range) grids = [] for sphere in params.sphere: grids.append(grid(sphere = sphere, gap = params.atom_gap, overlap = params.overlap_interval)) initial_b_factor = params.initial_b_factor if(initial_b_factor is None): initial_b_factor = flex.mean( xray_structure.extract_u_iso_or_u_equiv()*adptbx.u_as_b(1.)) da_xray_structures = [] counter = 0 for grid, sphere in zip(grids, params.sphere): cntr_g = 0 # XXX for x_start in grid.x_range: for y_start in grid.y_range: for z_start in grid.z_range: cntr_g += 1 if(cntr_g>1): continue # XXX counter += 1 new_center = flex.double([x_start, y_start, z_start]) atom_grid = make_grid( center = new_center, radius = sphere.radius, gap = params.atom_gap, occupancy = params.initial_occupancy, b_factor = initial_b_factor, atom_name = params.atom_name, scattering_type = params.atom_type, resname = params.residue_name) da_xray_structure = pdb_atoms_as_xray_structure(pdb_atoms = atom_grid, crystal_symmetry = xray_structure.crystal_symmetry()) closest_distances_result = xray_structure.closest_distances( sites_frac = da_xray_structure.sites_frac(), distance_cutoff = 5) selection = closest_distances_result.smallest_distances > 0 selection &= closest_distances_result.smallest_distances < 1 da_xray_structure = da_xray_structure.select(~selection) #print counter, da_xray_structure.scatterers().size() if(cntr_g==1): # XXX da_xray_structures.append(da_xray_structure) ### result = [] for i, x1 in enumerate(da_xray_structures): for j, x2 in enumerate(da_xray_structures): if(x1 is not x2): closest_distances_result = x1.closest_distances( sites_frac = x2.sites_frac(), distance_cutoff = 5) # XXX ??? selection = closest_distances_result.smallest_distances > 0 selection &= closest_distances_result.smallest_distances < params.atom_gap da_xray_structures[j] = x2.select(~selection) return da_xray_structures
def show(self, weight = None, prefix = "", show_neutron=True, print_stats=True): deltab = self.model.rms_b_iso_or_b_equiv_bonded() r_work = self.fmodels.fmodel_xray().r_work()*100. r_free = self.fmodels.fmodel_xray().r_free()*100. mean_b = flex.mean( self.model.xray_structure.extract_u_iso_or_u_equiv())*adptbx.u_as_b(1) if(deltab is None): print >> self.log, " r_work=%5.2f r_free=%5.2f"%(r_work, r_free) return None neutron_r_work = neutron_r_free = None if (show_neutron) and (self.fmodels.fmodel_neutron() is not None) : neutron_r_work = self.fmodels.fmodel_neutron().r_work()*100. neutron_r_free = self.fmodels.fmodel_neutron().r_free()*100. xrs = self.fmodels.fmodel_xray().xray_structure result = weight_result( r_work=r_work, r_free=r_free, delta_b=deltab, mean_b=mean_b, weight=weight, xray_target=self.fmodels.fmodel_xray().target_w(), neutron_r_work=neutron_r_work, neutron_r_free=neutron_r_free, u_star=xrs.scatterers().extract_u_star(), u_iso=xrs.scatterers().extract_u_iso()) if (print_stats) : result.show(out=self.log) return result
def fvec_callable(pfh,current_values): rotz = current_values[0] indep = current_values[1:] effective_orientation = OO.input_orientation.rotate_thru((0,0,1),rotz) pfh.convert.set_orientation(effective_orientation) pfh.convert.forward_independent_parameters() effective_orientation = pfh.convert.backward_orientation(independent=indep) OO.ucbp3.set_orientation(effective_orientation) pfh.last_set_orientation = effective_orientation OO.ucbp3.gaussian_fast_slow() # note the reversal of x & y with obs vs. predicted displacements = flex.double( [( col( OO.ucbp3.simple_forward_calculation_spot_position( wavelength = OO.central_wavelength_ang, observation_no = obsno).position) - col( (OO.parent.spots[OO.parent.indexed_pairs[obsno]["spot"]].ctr_mass_y(), OO.parent.spots[OO.parent.indexed_pairs[obsno]["spot"]].ctr_mass_x(), 0.0)) ).length() for obsno in xrange(len(OO.parent.indexed_pairs))]) rmsdexc = math.sqrt(flex.mean(displacements*displacements)) print "rotz %7.3f degrees, RMSD displacement %7.3f pixels"%( (rotz * 180./math.pi), rmsdexc) return list(displacements)
def run(fmodel, model, log, params = None): print_statistics.make_header("Fit water hydrogens into residual map", out = log) if(params is None): params = all_master_params().extract() print_statistics.make_sub_header("find peak-candidates", out = log) peaks = find_hydrogen_peaks( fmodel = fmodel, pdb_atoms = model.pdb_atoms, params = params, log = log) waters_and_peaks = extract_hoh_peaks( peaks = peaks, pdb_hierarchy = model.pdb_hierarchy(), pdb_atoms = model.pdb_atoms, xray_structure = model.xray_structure) print_statistics.make_sub_header("6D rigid body fit of HOH", out = log) print >> log, "Fit quality:" for water_and_peaks in waters_and_peaks: fit_water(water_and_peaks = water_and_peaks, xray_structure = model.xray_structure, params = params, log = log) # adjust ADP for H # TODO mrt: probably H bfactors should be equal to those # of the bonded atom u_isos = model.xray_structure.extract_u_iso_or_u_equiv() u_iso_mean = flex.mean(u_isos) sel_big = u_isos > u_iso_mean*2 hd_sel = model.xray_structure.hd_selection() sel_big.set_selected(~hd_sel, False) model.xray_structure.set_u_iso(value = u_iso_mean, selection = sel_big)
def map_stat(distances, map_values): result = [] # n_points_max = -1 nn=20 x = [[i/100,i/100+nn/100.] for i in range(0,800, nn)] for x_ in x: l,r = x_ sel = distances >= l sel &= distances < r mv = map_values.select(sel) if(mv.size()>n_points_max): n_points_max = mv.size() # for x_ in x: l,r = x_ sel = distances >= l sel &= distances < r mv = map_values.select(sel) if(mv.size()>0): sz = mv.size() rms = math.sqrt( flex.sum(mv*mv)/sz ) #fr = sz*100./map_values.size() fr = sz*1./n_points_max result.append([l, r, flex.mean(mv), rms, sz, fr]) return result
def vectors(self): self.database.initialize_tables_and_insert_command() self.tile_rmsd = [0.]*64 for run,tokens in self.literals(): try: itile = self.register_line( float(tokens[2]),float(tokens[3]), float(tokens[5]),float(tokens[6]), float(tokens[8]),float(tokens[9]), float(tokens[11]),float(tokens[12]) ) if run is not None: self.database.insert(run,itile,tokens) yield "OK" except ValueError: print "Valueerror" self.database.send_insert_command() for x in xrange(64): if self.tilecounts[x]==0: continue self.radii[x]/=self.tilecounts[x] sum_cv = matrix.col(self.mean_cv[x]) self.mean_cv[x] = sum_cv/self.tilecounts[x] mean_cv = matrix.col(self.mean_cv[x]) selection = (self.master_tiles == x) selected_cv = self.master_cv.select(selection) if len(selected_cv)>0: self.tile_rmsd[x] = math.sqrt( flex.mean(flex.double([ (matrix.col(cv) - mean_cv).length_sq() for cv in selected_cv ])) ) else: self.tile_rmsd[x]=0. self.overall_N = flex.sum(flex.int( [int(t) for t in self.tilecounts] )) self.overall_cv = matrix.col(self.overall_cv)/self.overall_N self.overall_rmsd = math.sqrt( self.sum_sq_cv / self.overall_N )
def run(hklin, n_bins): for array in iotbx.file_reader.any_file(hklin).file_server.miller_arrays: # skip if not anomalous intensity data if not (array.is_xray_intensity_array() and array.anomalous_flag()): print "skipping", array.info() continue # We assume that data is already merged assert array.is_unique_set_under_symmetry() # take anomalous differences dano = array.anomalous_differences() # process with binning dano.setup_binner(n_bins=n_bins) binner = dano.binner() print "Array:", array.info() print " dmax dmin nrefs dano" for i_bin in binner.range_used(): # selection for this bin. sel is flex.bool object (list of True of False) sel = binner.selection(i_bin) # take mean of absolute value of anomalous differences in a bin bin_mean = flex.mean(flex.abs(dano.select(sel).data())) d_max, d_min = binner.bin_d_range(i_bin) print "%7.2f %7.2f %6d %.2f" % (d_max, d_min, binner.count(i_bin), bin_mean)
def nearest_rotamer_sites_cart(self, residue): sites_cart_result = residue.atoms().extract_xyz() get_class = iotbx.pdb.common_residue_names_get_class if get_class(residue.resname) == "common_amino_acid": sites_cart = residue.atoms().extract_xyz() rotamer_iterator = self.mon_lib_srv.rotamer_iterator( fine_sampling=True, comp_id=residue.resname, atom_names=residue.atoms().extract_name(), sites_cart=sites_cart, ) if ( rotamer_iterator is None or rotamer_iterator.problem_message is not None or rotamer_iterator.rotamer_info is None ): rotamer_iterator = None if rotamer_iterator is not None: dist_min = 1.0e9 for r, rotamer_sites_cart in rotamer_iterator: d = flex.mean(flex.sqrt((sites_cart - rotamer_sites_cart).dot())) if d < dist_min: dist_min = d sites_cart_result = rotamer_sites_cart return sites_cart_result
def fvec_callable_pvr(pfh,current_values): rotx = current_values[0] roty = current_values[1] effective_orientation = OO.input_orientation.rotate_thru((1,0,0),rotx ).rotate_thru((0,1,0),roty ).rotate_thru((0,0,1),0.0) OO.ucbp3.set_orientation(effective_orientation) pfh.last_set_orientation = effective_orientation OO.ucbp3.gaussian_fast_slow() excursions = flex.double( [OO.ucbp3.simple_forward_calculation_spot_position( wavelength = OO.central_wavelength_ang, observation_no = obsno).rotax_excursion_rad_pvr/(2.*math.pi) for obsno in xrange(len(OO.parent.indexed_pairs))]) degrees = 360.*excursions rmsdexc = math.sqrt(flex.mean(degrees*degrees)) #print "rotx %7.3f roty %7.3f degrees, -PVR excursion %7.3f degrees"%( #(rotx * 180./math.pi),(roty * 180./math.pi), rmsdexc) # Note. Luc Bourhis wants scale to be from 0 to 1. So instead of # returning on scale of degrees, use radians/(2*pi) # The parameters rotx roty are still expressed in radians return excursions
def run(hklfiles, params): arrays = map(lambda x: crystfel.hkl.HKLfile(symm_source=params.pdb, hklin=x), hklfiles) for a in arrays: a.set_resolution(d_min=params.dmin, d_max=params.dmax) ofs = open(params.datout, "w") ofs.write(" dmax dmin nref cmpl red1 red2 %s\n" % " ".join(map(lambda x: "%7s" % x, params.fom))) a1, a2 = arrays[0].array.common_sets(arrays[1].array) r1, r2 = arrays[0].redundancies.common_sets(arrays[1].redundancies) r1, r2 = map(lambda x: x.as_double(), (r1, r2)) binner = a1.setup_binner(n_bins=params.nshells) for i_bin in binner.range_used(): sel = binner.selection(i_bin) d_max, d_min = binner.bin_d_range(i_bin) r1s, r2s = r1.select(sel), r2.select(sel) r1sm = flex.mean(r1s.data()) if r1s.size() > 0 else float("nan") r2sm = flex.mean(r2s.data()) if r2s.size() > 0 else float("nan") a1s, a2s = a1.select(sel), a2.select(sel) ofs.write( "%6.2f %6.2f %5d %5.1f %6.1f %6.1f " % (d_max, d_min, a1s.size(), a1s.completeness(d_max=d_max) * 100.0, r1sm, r2sm) ) if "cc" in params.fom: ofs.write("% 7.4f " % calc_cc(a1s, a2s)) if "ccano" in params.fom: ofs.write("% 7.4f " % calc_ccano(a1s, a2s)) if "rsplit" in params.fom: ofs.write("% 7.4f " % calc_rsplit(a1s, a2s)) ofs.write("\n") ofs.write( "# overall %5d %5.1f %6.1f %6.1f " % (a1.size(), a1.completeness(d_max=params.dmax) * 100.0, flex.mean(r1.data()), flex.mean(r2.data())) ) if "cc" in params.fom: ofs.write("% 7.4f " % calc_cc(a1, a2)) if "ccano" in params.fom: ofs.write("% 7.4f " % calc_ccano(a1, a2)) if "rsplit" in params.fom: ofs.write("% 7.4f " % calc_rsplit(a1, a2)) ofs.write("\n")
def show_xray_structure_statistics(xray_structure, atom_selections, hd_sel = None): result = group_args( all = None, macromolecule = None, sidechain = None, solvent = None, ligand = None, backbone = None) if(hd_sel is not None): xray_structure = xray_structure.select(~hd_sel) for key in atom_selections.__dict__.keys(): value = atom_selections.__dict__[key] if(value.count(True) > 0): if(hd_sel is not None): value = value.select(~hd_sel) xrs = xray_structure.select(value) atom_counts = xrs.scattering_types_counts_and_occupancy_sums() atom_counts_strs = [] for ac in atom_counts: atom_counts_strs.append("%s:%s:%s"%(ac.scattering_type,str(ac.count), str("%10.2f"%ac.occupancy_sum).strip())) atom_counts_str = " ".join(atom_counts_strs) b_isos = xrs.extract_u_iso_or_u_equiv() n_aniso = xrs.use_u_aniso().count(True) n_not_positive_definite = xrs.is_positive_definite_u().count(False) b_mean = format_value("%-6.1f",adptbx.u_as_b(flex.mean(b_isos))) b_min = format_value("%-6.1f",adptbx.u_as_b(flex.min(b_isos))) b_max = format_value("%-6.1f",adptbx.u_as_b(flex.max(b_isos))) n_atoms = format_value("%-8d",xrs.scatterers().size()).strip() n_npd = format_value("%-8s",n_not_positive_definite).strip() occ = xrs.scatterers().extract_occupancies() o_mean = format_value("%-6.2f",flex.mean(occ)).strip() o_min = format_value("%-6.2f",flex.min(occ)).strip() o_max = format_value("%-6.2f",flex.max(occ)).strip() tmp_result = group_args( n_atoms = n_atoms, atom_counts_str = atom_counts_str, b_min = b_min, b_max = b_max, b_mean = b_mean, o_min = o_min, o_max = o_max, o_mean = o_mean, n_aniso = n_aniso, n_npd = n_npd) setattr(result,key,tmp_result) return result
def run(): def compute_map(xray_structure, d_min=1.5, resolution_factor=1./4): fc = xray_structure.structure_factors(d_min = d_min).f_calc() fft_map = fc.fft_map(resolution_factor=resolution_factor) fft_map.apply_sigma_scaling() result = fft_map.real_map_unpadded() return result, fc, fft_map xrs = random_structure.xray_structure( space_group_info = sgtbx.space_group_info("P212121"), elements = ["N","C","O","S","P"]*10, volume_per_atom = 50) map_target,tmp,tmp = compute_map(xray_structure = xrs) xrs_sh = xrs.deep_copy_scatterers() xrs_sh.shake_sites_in_place(mean_distance=0.8) start_error = flex.mean(xrs.distances(other = xrs_sh)) print "Start:", start_error map_current, miller_array, crystal_gridding = compute_map( xray_structure = xrs_sh) for step in [miller_array.d_min()/4]*5: if(1): minimized = real_space_target_and_gradients.minimization( xray_structure = xrs_sh, miller_array = miller_array, crystal_gridding = crystal_gridding, map_target = map_target, max_iterations = 500, min_iterations = 25, step = step, geometry_restraints_manager = None, target_type = "diff_map") xrs_sh = minimized.xray_structure map_current = minimized.map_current final_error = flex.mean(xrs.distances(other = minimized.xray_structure)) if(0): minimized = real_space_refinement_simple.lbfgs( sites_cart=xrs_sh.sites_cart(), density_map=map_target, unit_cell=xrs_sh.unit_cell(), geometry_restraints_manager=None, real_space_gradients_delta=step) xrs_sh = xrs_sh.replace_sites_cart(minimized.sites_cart) final_error = flex.mean(xrs.distances(other = xrs_sh)) print "Final:", final_error assert approx_equal(start_error, 0.8, 1.e-3) assert final_error < 1.e-4 print "OK"
def exercise(pdb_poor_str, d_min = 1.0, resolution_factor = 0.25): # Fit one residue in many-residues model # # answer pdb_inp = iotbx.pdb.input(source_info=None, lines=pdb_answer) pdb_inp.write_pdb_file(file_name = "answer.pdb") xrs_answer = pdb_inp.xray_structure_simple() f_calc = xrs_answer.structure_factors(d_min = d_min).f_calc() fft_map = f_calc.fft_map(resolution_factor=resolution_factor) fft_map.apply_sigma_scaling() target_map = fft_map.real_map_unpadded() mtz_dataset = f_calc.as_mtz_dataset(column_root_label = "FCmap") mtz_object = mtz_dataset.mtz_object() mtz_object.write(file_name = "answer.mtz") # take TYR9 sites_answer = list( pdb_inp.construct_hierarchy().residue_groups())[1].atoms().extract_xyz() # poor mon_lib_srv = monomer_library.server.server() master_params = iotbx.phil.parse( input_string=mmtbx.monomer_library.pdb_interpretation.master_params_str, process_includes=True).extract() master_params.link_distance_cutoff=999 processed_pdb_file = monomer_library.pdb_interpretation.process( mon_lib_srv = mon_lib_srv, params = master_params, ener_lib = monomer_library.server.ener_lib(), raw_records = flex.std_string(pdb_poor_str.splitlines()), strict_conflict_handling = True, force_symmetry = True, log = None) pdb_hierarchy_poor = processed_pdb_file.all_chain_proxies.pdb_hierarchy xrs_poor = processed_pdb_file.xray_structure() sites_cart_poor = xrs_poor.sites_cart() pdb_hierarchy_poor.write_pdb_file(file_name = "poor.pdb") # rotamer_manager = RotamerEval() get_class = iotbx.pdb.common_residue_names_get_class for model in pdb_hierarchy_poor.models(): for chain in model.chains(): for residue in chain.only_conformer().residues(): if(get_class(residue.resname) == "common_amino_acid" and int(residue.resseq)==9): # take TYR9 t0 = time.time() ro = mmtbx.refinement.real_space.fit_residue.run_with_minimization( target_map = target_map, residue = residue, xray_structure = xrs_poor, mon_lib_srv = mon_lib_srv, rotamer_manager = rotamer_manager, real_space_gradients_delta = d_min*resolution_factor, geometry_restraints_manager = processed_pdb_file.geometry_restraints_manager(show_energies=False)) sites_final = residue.atoms().extract_xyz() t1 = time.time()-t0 pdb_hierarchy_poor.adopt_xray_structure(ro.xray_structure) pdb_hierarchy_poor.write_pdb_file(file_name = "refined.pdb") dist = flex.mean(flex.sqrt((sites_answer - sites_final).dot())) # Highly unstable test assert dist < 0.9
def check_sites_rt( cmd, xrsp_init, output, selection, selection_str, verbose, tolerance=1.e-3): remove_files(output) run_command(command=cmd, verbose=verbose) xrsp = xray_structure_plus(file_name = output) assert approx_equal(xrsp.occ, xrsp_init.occ,tolerance) assert approx_equal(xrsp.u_iso, xrsp_init.u_iso,tolerance) assert approx_equal(xrsp.u_cart, xrsp_init.u_cart,tolerance) if(selection_str is None): diff = xrsp.sites_cart - xrsp_init.sites_cart assert math.sqrt(flex.mean(diff.dot())) > 1.0 else: diff = xrsp.sites_cart - xrsp_init.sites_cart assert math.sqrt(flex.mean(diff.select(selection).dot())) > 1.0 assert approx_equal( math.sqrt(flex.mean(diff.select(~selection).dot())),0.,tolerance)
def run(hklin, pdbin, wdir, anisotropy_correction=False): arrays = iotbx.file_reader.any_file(hklin).file_server.miller_arrays i_arrays = filter( lambda x: x.is_xray_intensity_array() and x.anomalous_flag(), arrays) f_arrays = filter( lambda x: x.is_xray_amplitude_array() and x.anomalous_flag(), arrays) if not i_arrays and not f_arrays: print "No anomalous observation data" return if os.path.exists(wdir): print "%s already exists. quiting." % wdir return os.mkdir(wdir) xs = crystal_symmetry_from_any.extract_from(pdbin) sh_out = open(os.path.join(wdir, "run_anode.sh"), "w") sh_out.write("#!/bin/sh\n\n") sh_out.write("shelxc anode <<+ > shelxc.log 2>&1\n") sh_out.write("cell %s\n" % format_unit_cell(xs.unit_cell())) sh_out.write("spag %s\n" % str(xs.space_group_info()).replace(" ", "")) if i_arrays: obs_array = i_arrays[0] infile = "%s.hkl" % os.path.splitext(os.path.basename(hklin))[0] in_opt = "%s" % infile print "Using intensity array:", obs_array.info().label_string() else: obs_array = f_arrays[0] infile = "%s_f.hkl" % os.path.splitext(os.path.basename(hklin))[0] in_opt = "-f %s" % infile print "No intensity arrays. Using amplitude arrays instead:", obs_array.info( ).label_string() sh_out.write("! data from %s : %s\n" % (os.path.abspath(hklin), obs_array.info().label_string())) obs_array.crystal_symmetry().show_summary(sh_out, prefix="! ") check_symm(obs_array.crystal_symmetry(), xs) n_org = obs_array.size() obs_array = obs_array.eliminate_sys_absent() n_sys_abs = n_org - obs_array.size() if n_sys_abs > 0: print " %d systematic absences removed." % n_sys_abs if anisotropy_correction: print "Correcting anisotropy.." n_residues = p_vm_calculator(obs_array, 1, 0).best_guess abss = ml_aniso_absolute_scaling(obs_array, n_residues=n_residues) abss.show() tmp = -2. if i_arrays else -1. b_cart = map(lambda x: x * tmp, abss.b_cart) obs_array = obs_array.apply_debye_waller_factors(b_cart=b_cart) sh_out.write("sad %s\n" % in_opt) iotbx.shelx.hklf.miller_array_export_as_shelx_hklf( obs_array, open(os.path.join(wdir, infile), "w"), normalise_if_format_overflow=True) sh_out.write("+\n\n") sh_out.write('ln -s "%s" anode.pdb\n\n' % os.path.relpath(pdbin, wdir)) sh_out.write("anode anode\n") sh_out.close() call(cmd="sh", arg="./run_anode.sh", wdir=wdir) pha_file = os.path.join(wdir, "anode.pha") if os.path.isfile(pha_file): pha2mtz(pha_file, xs, os.path.join(wdir, "anode.pha.mtz")) print "Done. See %s/" % wdir fa_file = os.path.join(wdir, "anode_fa.hkl") if os.path.isfile(fa_file): r = iotbx.shelx.hklf.reader(open(fa_file)) fa_array = r.as_miller_arrays(crystal_symmetry=xs)[0] print "\nData stats:" print " # Cmpl.o = Anomalous completeness in original data" print " # Cmpl.c = Anomalous completeness in shelxc result (rejections)" print " # SigAno = <d''/sigma> in shelxc result" print " d_max d_min Cmpl.o Cmpl.c SigAno" binner = obs_array.setup_binner(n_bins=12) for i_bin in binner.range_used(): d_max_bin, d_min_bin = binner.bin_d_range(i_bin) obs_sel = obs_array.resolution_filter(d_max_bin, d_min_bin) obs_sel_ano = obs_sel.anomalous_differences() fa_sel = fa_array.resolution_filter(d_max_bin, d_min_bin) cmplset = obs_sel_ano.complete_set( d_max=d_max_bin, d_min=d_min_bin).select_acentric() n_acentric = cmplset.size() sigano = flex.mean( fa_sel.data() / fa_sel.sigmas()) if fa_sel.size() else float("nan") print " %5.2f %5.2f %6.2f %6.2f %6.2f" % ( d_max_bin, d_min_bin, 100. * obs_sel_ano.size() / n_acentric, 100. * fa_sel.size() / n_acentric, sigano) lsa_file = os.path.join(wdir, "anode.lsa") if os.path.isfile(lsa_file): print "" flag = False for l in open(lsa_file): if "Strongest unique anomalous peaks" in l: flag = True elif "Reflections written to" in l: flag = False if flag: print l.rstrip() if os.path.isfile(("anode_fa.res")): x = iotbx.shelx.cctbx_xray_structure_from(file=open("anode_fa.res")) open("anode_fa.pdb", "w").write(x.as_pdb_file())
def exercise_3(): #test torsion restraints for use_reference in ['True', 'False', 'top_out', 'None']: pdb_inp = iotbx.pdb.input( lines=flex.std_string(pdb_str_2.splitlines()), source_info=None) model = manager( model_input=pdb_inp, log=null_out()) grm = model.get_restraints_manager().geometry xrs2 = model.get_xray_structure() awl2 = model.get_hierarchy().atoms_with_labels() pdb2 = model.get_hierarchy() pdb_inp3 = iotbx.pdb.input(source_info=None, lines=pdb_str_3) xrs3 = pdb_inp3.xray_structure_simple() ph3 = pdb_inp3.construct_hierarchy() ph3.atoms().reset_i_seq() awl3 = ph3.atoms_with_labels() sites_cart_reference = flex.vec3_double() selection = flex.size_t() min_selection = flex.size_t() reference_names = ["N", "CA", "CB", "CG", "CD", "NE", "CZ", "NH1", "NH2"] minimize_names = ["CG", "CD", "NE", "CZ", "NH1", "NH2"] for a2,a3 in zip(tuple(awl2), tuple(awl3)): assert a2.resname == a3.resname assert a2.name == a3.name assert a2.i_seq == a3.i_seq if(a2.resname == "ARG" and a2.name.strip() in reference_names): selection.append(a2.i_seq) sites_cart_reference.append(a3.xyz) if a2.name.strip() in minimize_names: min_selection.append(a2.i_seq) assert selection.size() == len(reference_names) selection_bool = flex.bool(xrs2.scatterers().size(), min_selection) if(use_reference == 'True'): grm.add_chi_torsion_restraints_in_place( pdb_hierarchy = pdb2, sites_cart = sites_cart_reference, selection = selection, sigma = 2.5) elif(use_reference == 'top_out'): grm.add_chi_torsion_restraints_in_place( pdb_hierarchy = pdb2, sites_cart = sites_cart_reference, selection = selection, sigma = 2.5, limit = 180.0, top_out_potential=True) elif(use_reference == 'None'): grm.add_chi_torsion_restraints_in_place( pdb_hierarchy = pdb2, sites_cart = sites_cart_reference, selection = selection, sigma = 2.5) grm.remove_chi_torsion_restraints_in_place( selection = selection) d1 = flex.mean(flex.sqrt((xrs2.sites_cart().select(min_selection) - xrs3.sites_cart().select(min_selection)).dot())) print("distance start (use_reference: %s): %6.4f"%(str(use_reference), d1)) assert d1>4.0 assert approx_equal( flex.max(flex.sqrt((xrs2.sites_cart().select(~selection_bool) - xrs3.sites_cart().select(~selection_bool)).dot())), 0) from cctbx import geometry_restraints import mmtbx.refinement.geometry_minimization import scitbx.lbfgs grf = geometry_restraints.flags.flags(default=True) grf.nonbonded = False sites_cart = xrs2.sites_cart() minimized = mmtbx.refinement.geometry_minimization.lbfgs( sites_cart = sites_cart, correct_special_position_tolerance=1.0, geometry_restraints_manager = grm, sites_cart_selection = flex.bool(sites_cart.size(), min_selection), geometry_restraints_flags = grf, lbfgs_termination_params = scitbx.lbfgs.termination_parameters( max_iterations=5000)) xrs2.set_sites_cart(sites_cart = sites_cart) d2 = flex.mean(flex.sqrt((xrs2.sites_cart().select(min_selection) - xrs3.sites_cart().select(min_selection)).dot())) print("distance final (use_reference: %s): %6.4f"%(str(use_reference), d2)) if (use_reference in ['True', 'top_out']): assert d2<0.3, "%s, %f" % (use_reference, d2) else: assert d2>4.0, d2 assert approx_equal( flex.max(flex.sqrt((xrs2.sites_cart().select(~selection_bool) - xrs3.sites_cart().select(~selection_bool)).dot())), 0) #test torsion manipulation grm.remove_chi_torsion_restraints_in_place() grm.remove_chi_torsion_restraints_in_place() sites_cart_reference = [] selections_reference = [] for model in pdb2.models(): for chain in model.chains(): for residue in chain.residues(): sites_cart_reference.append(residue.atoms().extract_xyz()) selections_reference.append(residue.atoms().extract_i_seq()) #one residue at a time (effectively chi angles only) for sites_cart, selection in zip(sites_cart_reference, selections_reference): grm.add_chi_torsion_restraints_in_place( pdb_hierarchy = pdb2, sites_cart = sites_cart, selection = selection) assert grm.get_n_chi_torsion_proixes() == 6 grm.remove_chi_torsion_restraints_in_place() #all sites at once, chi angles only sites_cart = xrs2.sites_cart() grm.add_chi_torsion_restraints_in_place( pdb_hierarchy = pdb2, sites_cart = sites_cart, selection = None, chi_angles_only = True) assert grm.get_n_chi_torsion_proixes() == 6 #all sites at once, all torsions grm.add_chi_torsion_restraints_in_place( pdb_hierarchy = pdb2, sites_cart = sites_cart, selection = None, chi_angles_only = False) # grm.get_chi_torsion_proxies().show_sorted( # by_value='residual', # sites_cart=sites_cart, # site_labels=[atom.id_str() for atom in pdb2.atoms()]) assert grm.get_n_chi_torsion_proixes() == 12, grm.get_n_chi_torsion_proixes()
def get_map_values_and_grid_sites_frac(fmodel, map_type, grid_step, d_min, apply_sigma_scaling, apply_volume_scaling, include_f000, sel_bb, use_exact_phases): # resolution_factor = grid_step / d_min mp = mmtbx.masks.mask_master_params.extract() mp.grid_step_factor = 1. / resolution_factor mmtbx_masks_asu_mask_obj = mmtbx.masks.asu_mask( xray_structure=fmodel.xray_structure, d_min=d_min, mask_params=mp) bulk_solvent_mask = mmtbx_masks_asu_mask_obj.mask_data_whole_uc() sel = bulk_solvent_mask > 0 bulk_solvent_mask = bulk_solvent_mask.set_selected(sel, 1) cr_gr = maptbx.crystal_gridding( unit_cell=fmodel.xray_structure.unit_cell(), space_group_info=fmodel.f_obs().space_group_info(), pre_determined_n_real=bulk_solvent_mask.focus()) from mmtbx import map_tools from cctbx import miller # #mc = map_tools.electron_density_map(fmodel = fmodel).map_coefficients( # map_type = map_type, # acentrics_scale = 1.0, # centrics_pre_scale = 1.0) if not use_exact_phases: k = fmodel.k_isotropic() * fmodel.k_anisotropic() print("flex.mean(k):", flex.mean(k)) f_model = fmodel.f_model() mc_data = abs(fmodel.f_obs()).data() / k - abs(f_model).data() / k tmp = miller.array(miller_set=f_model, data=flex.double( f_model.indices().size(), 1)).phase_transfer(phase_source=f_model) mc = miller.array(miller_set=tmp, data=mc_data * tmp.data()) else: fmodel.update_all_scales(fast=True, remove_outliers=False) k = fmodel.k_isotropic() * fmodel.k_anisotropic() fo = fmodel.f_obs().customized_copy(data=fmodel.f_obs().data() / k) fo = fo.phase_transfer(phase_source=fmodel.f_model()) fc = fmodel.f_calc().customized_copy(data=fmodel.f_calc().data()) mc = miller.array(miller_set=fo, data=fo.data() - fc.data()) ######## XXX fft_map = miller.fft_map(crystal_gridding=cr_gr, fourier_coefficients=mc) fft_map.apply_volume_scaling() map_data = fft_map.real_map_unpadded() xrs = fmodel.xray_structure sites_cart = xrs.sites_cart().select(sel_bb) sel = maptbx.grid_indices_around_sites(unit_cell=xrs.unit_cell(), fft_n_real=map_data.focus(), fft_m_real=map_data.all(), sites_cart=sites_cart, site_radii=flex.double( sites_cart.size(), 0.5)) map_in = map_data.select(sel) mm = flex.mean(map_in) print("mean in (1):", mm) # #sites_frac = xrs.sites_frac().select(sel_bb) #mm = 0 #for sf in sites_frac: # mm += map_data.eight_point_interpolation(sf) #mm = mm/sites_frac.size() #print "mean in (2):", mm ######## # # Add F000 #reg = fmodel.xray_structure.scattering_type_registry(table = "wk1995") #f_000 = reg.sum_of_scattering_factors_at_diffraction_angle_0() +\ # 0.4*fmodel.xray_structure.unit_cell().volume() if (include_f000): #f_000 = include_f000*fmodel.xray_structure.unit_cell().volume()*0.3 #f_000 = None # XXX f_000 = abs(mm * xrs.unit_cell().volume()) #f_000 = 0.626*fmodel.xray_structure.unit_cell().volume()*0.35 else: f_000 = None print("f_000:", f_000) #print "XXX", include_f000*fmodel.xray_structure.unit_cell().volume()*0.3 # fft_map = miller.fft_map(crystal_gridding=cr_gr, fourier_coefficients=mc, f_000=f_000) # assert [apply_sigma_scaling, apply_volume_scaling].count(True) == 1 if (apply_sigma_scaling): fft_map.apply_sigma_scaling() elif (apply_volume_scaling): fft_map.apply_volume_scaling() else: assert RuntimeError nx, ny, nz = fft_map.n_real() map_data = fft_map.real_map_unpadded() #map_data = map_data * bulk_solvent_mask print("n_real:", nx, ny, nz, map_data.size()) grid_sites_frac = flex.vec3_double() map_values = flex.double() for ix in range(nx): for iy in range(ny): for iz in range(nz): mv = map_data[(ix, iy, iz)] if 1: #if(mv != 0): xf, yf, zf = ix / float(nx), iy / float(ny), iz / float(nz) grid_sites_frac.append([xf, yf, zf]) map_at_ixiyiz = map_data[(ix, iy, iz)] map_values.append(map_at_ixiyiz) return map_values, grid_sites_frac
def __init__(self, map_data, xray_structure, pdb_hierarchy, geometry_restraints_manager, gradients_method="fd", ncs_groups=None, rms_bonds_limit=0.015, rms_angles_limit=2.0, real_space_gradients_delta=1. / 4, max_iterations=100, range_size=10, n_ranges=10, default_weight=50): """ Fast determination of optimal data/restraints weight for real-space refinement of individual sites. """ self.msg_strings = [] # split chains into chunks result = [] for model in pdb_hierarchy.models(): for chain in model.chains(): if (chain.is_protein() or chain.is_na()): residue_range_sel = flex.size_t() cntr = 0 for rg in chain.residue_groups(): i_seqs = rg.atoms().extract_i_seq() cntr += 1 if (cntr < 10): residue_range_sel.extend(i_seqs) else: result.append(residue_range_sel) residue_range_sel = flex.size_t() residue_range_sel.extend(i_seqs) cntr = 0 if (len(result) == 0): assert residue_range_sel.size() > 0 result.append(residue_range_sel) self.msg_strings.append("number of chunks: %d" % len(result)) # randomly pick chunks random_chunks = [] if (len(result) > 0): for i in xrange(n_ranges): random_chunks.append(random.choice(xrange(len(result)))) self.msg_strings.append("random chunks:" % random_chunks) # setup refinery xrs_dc = xray_structure.deep_copy_scatterers() sel_all = flex.bool(xrs_dc.scatterers().size(), True) grm_dc = geometry_restraints_manager.select(sel_all) ro = mmtbx.refinement.real_space.individual_sites.box_refinement_manager( xray_structure=xrs_dc, target_map=map_data, geometry_restraints_manager=grm_dc.geometry, real_space_gradients_delta=real_space_gradients_delta, max_iterations=max_iterations, ncs_groups=ncs_groups, gradients_method=gradients_method) optimal_weights = flex.double() # loop over chunks: determine best weight for each chunk if (len(result) == 0): random_chunks = [None] for chunk in random_chunks: if (chunk is None): sel = flex.bool(xrs_dc.scatterers().size(), True) else: sel = result[chunk] sel = flex.bool(xrs_dc.scatterers().size(), sel) ro.refine(selection=sel, rms_bonds_limit=rms_bonds_limit, rms_angles_limit=rms_angles_limit) self.msg_strings.append("chunk %s optimal weight: %9.4f" % (str(chunk), ro.weight_optimal)) if (ro.weight_optimal is not None): optimal_weights.append(ro.weight_optimal) # select overall best weight mean = flex.mean(optimal_weights) sel = optimal_weights < mean * 3 sel &= optimal_weights > mean / 3 if (sel.count(True) > 0): optimal_weights = optimal_weights.select(sel) self.weight = flex.mean_default(optimal_weights, default_weight) self.msg_strings.append("overall best weight: %9.4f" % self.weight)
def __init__(self, **kwargs): group_args.__init__(self, **kwargs) # require Dij, d_c P = Profiler("2. calculate rho density") print("finished Dij, now calculating rho_i, the density") from xfel.clustering import Rodriguez_Laio_clustering_2014 # alternative clustering algorithms: see http://scikit-learn.org/stable/modules/clustering.html R = Rodriguez_Laio_clustering_2014(distance_matrix = self.Dij, d_c = self.d_c) self.rho = rho = R.get_rho() ave_rho = flex.mean(rho.as_double()) NN = self.Dij.focus()[0] print("The average rho_i is %5.2f, or %4.1f%%"%(ave_rho, 100*ave_rho/NN)) i_max = flex.max_index(rho) P = Profiler("3.transition") print("the index with the highest density is %d"%(i_max)) delta_i_max = flex.max(flex.double([self.Dij[i_max,j] for j in range(NN)])) print("delta_i_max",delta_i_max) rho_order = flex.sort_permutation(rho,reverse=True) rho_order_list = list(rho_order) P = Profiler("4. delta") self.delta = delta = R.get_delta(rho_order=rho_order, delta_i_max=delta_i_max) P = Profiler("5. find cluster maxima") #---- Now hunting for clusters ---Lot's of room for improvement (or simplification) here!!! cluster_id = flex.int(NN,-1) # default -1 means no cluster delta_order = flex.sort_permutation(delta,reverse=True) N_CLUST = 10 # maximum of 10 points to be considered as possible clusters #MAX_PERCENTILE_DELTA = 0.99 # cluster centers have to be in the top 10% percentile delta MAX_PERCENTILE_RHO = 0.99 # cluster centers have to be in the top 75% percentile rho n_cluster = 0 #max_n_delta = min(N_CLUST, int(MAX_PERCENTILE_DELTA*NN)) for ic in range(NN): # test the density, rho item_idx = delta_order[ic] if delta[item_idx]>100: print("A: iteration", ic, "delta", delta[item_idx], delta[item_idx] < 0.25 * delta[delta_order[0]]) if delta[item_idx] < 0.25 * delta[delta_order[0]]: # too low (another heuristic!) continue item_rho_order = rho_order_list.index(item_idx) if delta[item_idx]>100: print("B: iteration", ic, item_rho_order,item_rho_order/NN,MAX_PERCENTILE_RHO) if item_rho_order/NN < MAX_PERCENTILE_RHO : cluster_id[item_idx] = n_cluster print(ic,item_idx,item_rho_order,cluster_id[item_idx]) n_cluster += 1 print("Found %d clusters"%n_cluster) for x in range(NN): if cluster_id[x]>=0: print("XC",x,cluster_id[x],rho[x],delta[x]) self.cluster_id_maxima = cluster_id.deep_copy() P = Profiler("6. assign all points") R.cluster_assignment(rho_order,cluster_id) self.cluster_id_full = cluster_id.deep_copy() # assign the halos P = Profiler("7. assign halos") halo = flex.bool(NN,False) border = R.get_border( cluster_id = cluster_id ) for ic in range(n_cluster): #loop thru all border regions; find highest density print("cluster",ic, "in border",border.count(True)) this_border = (cluster_id == ic) & (border==True) print(len(this_border), this_border.count(True)) if this_border.count(True)>0: highest_density = flex.max(rho.select(this_border)) halo_selection = (rho < highest_density) & (this_border==True) if halo_selection.count(True)>0: cluster_id.set_selected(halo_selection,-1) core_selection = (cluster_id == ic) & ~halo_selection highest_density = flex.max(rho.select(core_selection)) too_sparse = core_selection & (rho.as_double() < highest_density/10.) # another heuristic if too_sparse.count(True)>0: cluster_id.set_selected(too_sparse,-1) self.cluster_id_final = cluster_id.deep_copy() print("%d in the excluded halo"%((cluster_id==-1).count(True)))
def scale_frame_by_mean_I(self, frame_no, pickle_filename, iparams, mean_of_mean_I, avg_mode): observations_pickle = read_frame(pickle_filename) pickle_filepaths = pickle_filename.split('/') img_filename_only = pickle_filepaths[len(pickle_filepaths) - 1] txt_exception = ' {0:40} ==> '.format(img_filename_only) if observations_pickle is None: txt_exception += 'empty or bad input file\n' return None, txt_exception inputs, txt_organize_input = self.organize_input( observations_pickle, iparams, avg_mode, pickle_filename=pickle_filename) if inputs is not None: observations_original, alpha_angle, spot_pred_x_mm, spot_pred_y_mm, detector_distance_mm, wavelength, crystal_init_orientation = inputs else: txt_exception += txt_organize_input + '\n' return None, txt_exception #select only reflections matched with scale input params. #filter by resolution i_sel_res = observations_original.resolution_filter_selection( d_min=iparams.scale.d_min, d_max=iparams.scale.d_max) observations_original_sel = observations_original.select(i_sel_res) alpha_angle_sel = alpha_angle.select(i_sel_res) spot_pred_x_mm_sel = spot_pred_x_mm.select(i_sel_res) spot_pred_y_mm_sel = spot_pred_y_mm.select(i_sel_res) #filter by sigma i_sel_sigmas = ( observations_original_sel.data() / observations_original_sel.sigmas()) > iparams.scale.sigma_min observations_original_sel = observations_original_sel.select( i_sel_sigmas) alpha_angle_sel = alpha_angle_sel.select(i_sel_sigmas) spot_pred_x_mm_sel = spot_pred_x_mm_sel.select(i_sel_sigmas) spot_pred_y_mm_sel = spot_pred_y_mm_sel.select(i_sel_sigmas) observations_non_polar_sel, index_basis_name = self.get_observations_non_polar( observations_original_sel, pickle_filename, iparams) observations_non_polar, index_basis_name = self.get_observations_non_polar( observations_original, pickle_filename, iparams) uc_params = observations_original.unit_cell().parameters() ph = partiality_handler() r0 = ph.calc_spot_radius( sqr(crystal_init_orientation.reciprocal_matrix()), observations_original_sel.indices(), wavelength) #calculate first G (G, B) = (1, 0) stats = (0, 0, 0, 0, 0, 0, 0, 0, 0, 0) if mean_of_mean_I > 0: G = flex.median(observations_original_sel.data()) / mean_of_mean_I if iparams.flag_apply_b_by_frame: try: mxh = mx_handler() asu_contents = mxh.get_asu_contents(iparams.n_residues) observations_as_f = observations_non_polar_sel.as_amplitude_array( ) binner_template_asu = observations_as_f.setup_binner( auto_binning=True) wp = statistics.wilson_plot(observations_as_f, asu_contents, e_statistics=True) G = wp.wilson_intensity_scale_factor * 1e2 B = wp.wilson_b except Exception: txt_exception += 'warning B-factor calculation failed.\n' return None, txt_exception two_theta = observations_original.two_theta( wavelength=wavelength).data() sin_theta_over_lambda_sq = observations_original.two_theta( wavelength=wavelength).sin_theta_over_lambda_sq().data() ry, rz, re, voigt_nu, rotx, roty = (0, 0, iparams.gamma_e, iparams.voigt_nu, 0, 0) partiality_init, delta_xy_init, rs_init, rh_init = ph.calc_partiality_anisotropy_set(\ crystal_init_orientation.unit_cell(), rotx, roty, observations_original.indices(), ry, rz, r0, re, voigt_nu, two_theta, alpha_angle, wavelength, crystal_init_orientation, spot_pred_x_mm, spot_pred_y_mm, detector_distance_mm, iparams.partiality_model, iparams.flag_beam_divergence) if iparams.flag_plot_expert: n_bins = 20 binner = observations_original.setup_binner(n_bins=n_bins) binner_indices = binner.bin_indices() avg_partiality_init = flex.double() avg_rs_init = flex.double() avg_rh_init = flex.double() one_dsqr_bin = flex.double() for i in range(1, n_bins + 1): i_binner = (binner_indices == i) if len(observations_original.data().select(i_binner)) > 0: print( binner.bin_d_range(i)[1], flex.mean(partiality_init.select(i_binner)), flex.mean(rs_init.select(i_binner)), flex.mean(rh_init.select(i_binner)), len(partiality_init.select(i_binner))) #monte-carlo merge if iparams.flag_monte_carlo: G = 1 B = 0 partiality_init = flex.double([1] * len(partiality_init)) #save results refined_params = flex.double([ G, B, rotx, roty, ry, rz, r0, re, voigt_nu, uc_params[0], uc_params[1], uc_params[2], uc_params[3], uc_params[4], uc_params[5] ]) pres = postref_results() pres.set_params(observations=observations_non_polar, observations_original=observations_original, refined_params=refined_params, stats=stats, partiality=partiality_init, rs_set=rs_init, rh_set=rh_init, frame_no=frame_no, pickle_filename=pickle_filename, wavelength=wavelength, crystal_orientation=crystal_init_orientation, detector_distance_mm=detector_distance_mm) txt_scale_frame_by_mean_I = ' {0:40} ==> RES:{1:5.2f} NREFL:{2:5d} G:{3:6.4f} B:{4:6.1f} CELL:{5:6.2f} {6:6.2f} {7:6.2f} {8:6.2f} {9:6.2f} {10:6.2f}'.format( img_filename_only + ' (' + index_basis_name + ')', observations_original.d_min(), len(observations_original_sel.data()), G, B, uc_params[0], uc_params[1], uc_params[2], uc_params[3], uc_params[4], uc_params[5]) print(txt_scale_frame_by_mean_I) txt_scale_frame_by_mean_I += '\n' return pres, txt_scale_frame_by_mean_I
def refine_anomalous_substructure(fmodel, pdb_hierarchy, wavelength=None, map_type="anom_residual", exclude_waters=False, exclude_non_water_light_elements=True, n_cycles_max=None, map_sigma_min=3.0, refine=("f_prime", "f_double_prime"), reset_water_u_iso=True, use_all_anomalous=True, verbose=True, out=sys.stdout): """ Crude mimic of Phaser's substructure completion, with two essential differences: only the existing real scatterers in the input model will be used (with the assumption that the model is already more or less complete), and the anomalous refinement will be performed in Phenix, yielding both f-prime and f-double-prime. The refined f-prime provides us with an orthogonal estimate of the number of electrons missing from an incorrectly labeled scatterer. :param wavelength: X-ray wavelenth in Angstroms :param exclude_waters: Don't refine anomalous scattering for water oxygens :param exclude_non_water_light_elements: Don't refine anomalous scattering for light atoms other than water (CHNO). :param n_cycles_max: Maximum number of refinement cycles :param map_sigma_min: Sigma cutoff for identify anomalous scatterers :param reset_water_u_iso: Reset B-factors for water atoms prior to f' refinement :param use_all_anomalous: include any scatterers which are already modeled as anomalous in the refinement """ from cctbx import xray assert (fmodel.f_obs().anomalous_flag()) assert (map_type in ["llg", "anom_residual"]) make_sub_header("Iterative anomalous substructure refinement", out=out) fmodel.update(target_name="ls") pdb_atoms = pdb_hierarchy.atoms() non_water_non_hd_selection = pdb_hierarchy.atom_selection_cache( ).selection("(not element H and not element D and not resname HOH)") sites_frac = fmodel.xray_structure.sites_frac() scatterers = fmodel.xray_structure.scatterers() u_iso_mean = flex.mean( fmodel.xray_structure.extract_u_iso_or_u_equiv().select( non_water_non_hd_selection)) anomalous_iselection = flex.size_t() anomalous_groups = [] t_start = time.time() n_cycle = 0 while ((n_cycles_max is None) or (n_cycle < n_cycles_max)): n_cycle += 1 n_new_groups = 0 t_start_cycle = time.time() print >> out, "Cycle %d" % n_cycle anom_map = fmodel.map_coefficients(map_type=map_type).fft_map( resolution_factor=0.25).apply_sigma_scaling().real_map_unpadded() map_min = abs(flex.min(anom_map.as_1d())) map_max = flex.max(anom_map.as_1d()) print >> out, " map range: -%.2f sigma to %.2f sigma" % (map_min, map_max) reset_u_iso_selection = flex.size_t() for i_seq, atom in enumerate(pdb_atoms): resname = atom.parent().resname elem = atom.element.strip() if ((i_seq in anomalous_iselection) or ((exclude_waters) and (resname == "HOH")) or ((elem in ["H", "D", "N", "C", "O"]) and (resname != "HOH") and exclude_non_water_light_elements)): continue scatterer = scatterers[i_seq] site_frac = sites_frac[i_seq] anom_map_value = anom_map.tricubic_interpolation(site_frac) if ((anom_map_value >= map_sigma_min) or ((scatterer.fdp != 0) and use_all_anomalous)): if (verbose): if (n_new_groups == 0): print >> out, "" print >> out, " new anomalous scatterers:" print >> out, " %-34s map height: %6.2f sigma" % ( atom.id_str(), anom_map_value) anomalous_iselection.append(i_seq) selection_string = get_single_atom_selection_string(atom) group = xray.anomalous_scatterer_group( iselection=flex.size_t([i_seq]), f_prime=0, f_double_prime=0, refine=list(refine), selection_string=selection_string) anomalous_groups.append(group) n_new_groups += 1 if (resname == "HOH") and (reset_water_u_iso): water_u_iso = scatterer.u_iso if (water_u_iso < u_iso_mean): reset_u_iso_selection.append(i_seq) if (n_new_groups == 0): print >> out, "" print >> out, "No new groups - anomalous scatterer search terminated." break elif (not verbose): print >> out, " %d new groups" % n_new_groups for i_seq in anomalous_iselection: sc = scatterers[i_seq] sc.fp = 0 sc.fdp = 0 if (verbose): print >> out, "" print >> out, "Anomalous refinement:" fmodel.info().show_targets(text="before minimization", out=out) print >> out, "" u_iso = fmodel.xray_structure.extract_u_iso_or_u_equiv() u_iso.set_selected(reset_u_iso_selection, u_iso_mean) fmodel.xray_structure.set_u_iso(values=u_iso) fmodel.update_xray_structure(update_f_calc=True) minimizer(fmodel=fmodel, groups=anomalous_groups) if (verbose): fmodel.info().show_targets(text="after minimization", out=out) print >> out, "" print >> out, " Refined sites:" for i_seq, group in zip(anomalous_iselection, anomalous_groups): print >> out, " %-34s f' = %6.3f f'' = %6.3f" % ( pdb_atoms[i_seq].id_str(), group.f_prime, group.f_double_prime) t_end_cycle = time.time() print >> out, "" if (verbose): print >> out, " time for this cycle: %.1fs" % (t_end_cycle - t_start_cycle) fmodel.update(target_name="ml") print >> out, "%d anomalous scatterer groups refined" % len( anomalous_groups) t_end = time.time() print >> out, "overall time: %.1fs" % (t_end - t_start) return anomalous_groups
def _index_prepare(self): """Prepare to do autoindexing - in XDS terms this will mean calling xycorr, init and colspot on the input images.""" # decide on images to work with logger.debug("XDS INDEX PREPARE:") logger.debug("Wavelength: %.6f", self.get_wavelength()) logger.debug("Distance: %.2f", self.get_distance()) if self._indxr_images == []: _select_images_function = getattr( self, "_index_select_images_%s" % self._index_select_images ) wedges = _select_images_function() for wedge in wedges: self.add_indexer_image_wedge(wedge) self.set_indexer_prepare_done(True) all_images = self.get_matching_images() first = min(all_images) last = max(all_images) # next start to process these - first xycorr xycorr = self.Xycorr() xycorr.set_data_range(first, last) xycorr.set_background_range(self._indxr_images[0][0], self._indxr_images[0][1]) converter = to_xds(self.get_imageset()) xds_beam_centre = converter.detector_origin xycorr.set_beam_centre(xds_beam_centre[0], xds_beam_centre[1]) for block in self._indxr_images: xycorr.add_spot_range(block[0], block[1]) # FIXME need to set the origin here xycorr.run() for file in ["X-CORRECTIONS.cbf", "Y-CORRECTIONS.cbf"]: self._indxr_payload[file] = xycorr.get_output_data_file(file) # next start to process these - then init if PhilIndex.params.xia2.settings.input.format.dynamic_shadowing: imageset = self._indxr_imagesets[0] masker = ( imageset.get_format_class() .get_instance(imageset.paths()[0]) .get_masker() ) if masker is None: # disable dynamic_shadowing PhilIndex.params.xia2.settings.input.format.dynamic_shadowing = False if PhilIndex.params.xia2.settings.input.format.dynamic_shadowing: # find the region of the scan with the least predicted shadow # to use for background determination in XDS INIT step from dxtbx.model.experiment_list import ExperimentListFactory imageset = self._indxr_imagesets[0] xsweep = self._indxr_sweeps[0] sweep_filename = os.path.join( self.get_working_directory(), "%s_indexed.expt" % xsweep.get_name() ) ExperimentListFactory.from_imageset_and_crystal(imageset, None).as_file( sweep_filename ) from xia2.Wrappers.Dials.ShadowPlot import ShadowPlot shadow_plot = ShadowPlot() shadow_plot.set_working_directory(self.get_working_directory()) auto_logfiler(shadow_plot) shadow_plot.set_sweep_filename(sweep_filename) shadow_plot.set_json_filename( os.path.join( self.get_working_directory(), "%s_shadow_plot.json" % shadow_plot.get_xpid(), ) ) shadow_plot.run() results = shadow_plot.get_results() fraction_shadowed = flex.double(results["fraction_shadowed"]) if flex.max(fraction_shadowed) == 0: PhilIndex.params.xia2.settings.input.format.dynamic_shadowing = False else: scan_points = flex.double(results["scan_points"]) scan = imageset.get_scan() oscillation = scan.get_oscillation() if self._background_images is not None: bg_images = self._background_images bg_range_deg = ( scan.get_angle_from_image_index(bg_images[0]), scan.get_angle_from_image_index(bg_images[1]), ) bg_range_width = bg_range_deg[1] - bg_range_deg[0] min_shadow = 100 best_bg_range = bg_range_deg from libtbx.utils import frange for bg_range_start in frange( flex.min(scan_points), flex.max(scan_points) - bg_range_width, step=oscillation[1], ): bg_range_deg = (bg_range_start, bg_range_start + bg_range_width) sel = (scan_points >= bg_range_deg[0]) & ( scan_points <= bg_range_deg[1] ) mean_shadow = flex.mean(fraction_shadowed.select(sel)) if mean_shadow < min_shadow: min_shadow = mean_shadow best_bg_range = bg_range_deg self._background_images = ( scan.get_image_index_from_angle(best_bg_range[0]), scan.get_image_index_from_angle(best_bg_range[1]), ) logger.debug( "Setting background images: %s -> %s" % self._background_images ) init = self.Init() for file in ["X-CORRECTIONS.cbf", "Y-CORRECTIONS.cbf"]: init.set_input_data_file(file, self._indxr_payload[file]) init.set_data_range(first, last) if self._background_images: init.set_background_range( self._background_images[0], self._background_images[1] ) else: init.set_background_range( self._indxr_images[0][0], self._indxr_images[0][1] ) for block in self._indxr_images: init.add_spot_range(block[0], block[1]) init.run() # at this stage, need to (perhaps) modify the BKGINIT.cbf image # to mark out the back stop if PhilIndex.params.xds.backstop_mask: logger.debug("Applying mask to BKGINIT.pck") # copy the original file cbf_old = os.path.join(init.get_working_directory(), "BKGINIT.cbf") cbf_save = os.path.join(init.get_working_directory(), "BKGINIT.sav") shutil.copyfile(cbf_old, cbf_save) # modify the file to give the new mask from xia2.Toolkit.BackstopMask import BackstopMask mask = BackstopMask(PhilIndex.params.xds.backstop_mask) mask.apply_mask_xds(self.get_header(), cbf_save, cbf_old) init.reload() for file in ["BLANK.cbf", "BKGINIT.cbf", "GAIN.cbf"]: self._indxr_payload[file] = init.get_output_data_file(file) if PhilIndex.params.xia2.settings.developmental.use_dials_spotfinder: spotfinder = self.DialsSpotfinder() for block in self._indxr_images: spotfinder.add_spot_range(block[0], block[1]) spotfinder.run() export = self.DialsExportSpotXDS() export.set_input_data_file( "observations.refl", spotfinder.get_output_data_file("observations.refl"), ) export.run() for file in ["SPOT.XDS"]: self._indxr_payload[file] = export.get_output_data_file(file) else: # next start to process these - then colspot colspot = self.Colspot() for file in ( "X-CORRECTIONS.cbf", "Y-CORRECTIONS.cbf", "BLANK.cbf", "BKGINIT.cbf", "GAIN.cbf", ): colspot.set_input_data_file(file, self._indxr_payload[file]) colspot.set_data_range(first, last) colspot.set_background_range( self._indxr_images[0][0], self._indxr_images[0][1] ) for block in self._indxr_images: colspot.add_spot_range(block[0], block[1]) colspot.run() for file in ["SPOT.XDS"]: self._indxr_payload[file] = colspot.get_output_data_file(file)
def run(params, mtzfiles): arrays = get_arrays(mtzfiles, d_min=params.dmin, d_max=params.dmax) if params.take_common: arrays = commonalize(arrays) maxlen_f = max(map(lambda x: len(x[0]), arrays)) ref_f_obs = arrays[0][1] scales = [] for f, f_obs, f_model, flag in arrays: if ref_f_obs == f_obs: k, B = 1., 0 else: k, B = kBdecider(ref_f_obs, f_obs).run() scales.append((k, B)) if params.reference != "first": if params.reference == "bmin": # scale to strongest kref, bref = max(scales, key=lambda x:x[1]) elif params.reference == "bmax": # scale to most weak kref, bref = min(scales, key=lambda x:x[1]) elif params.reference == "bmed": # scale to most weak perm = range(len(scales)) perm.sort(key=lambda i:scales[i][1]) kref, bref = scales[perm[len(perm)//2]] else: raise "Never reaches here" print "# Set K=%.2f B=%.2f as reference" % (kref,bref) scales = map(lambda x: (x[0]/kref, x[1]-bref), scales) # not bref-x[1], because negated later print ("%"+str(maxlen_f)+"s r_work r_free cc_work.E cc_free.E sigmaa fom k B") % "filename" for (f, f_obs, f_model, flag), (k, B) in zip(arrays, scales): d_star_sq = f_obs.d_star_sq().data() scale = k * flex.exp(-B*d_star_sq) # Normalized #f_obs.setup_binner(auto_binning=True) #f_model.setup_binner(auto_binning=True) #e_obs, e_model = map(lambda x:x.quasi_normalize_structure_factors(), (f_obs, f_model)) e_obs = absolute_scaling.kernel_normalisation(f_obs.customized_copy(data=f_obs.data()*scale, sigmas=None), auto_kernel=True) e_obs = e_obs.normalised_miller_dev_eps.f_sq_as_f() e_model = absolute_scaling.kernel_normalisation(f_model.customized_copy(data=f_model.data()*scale, sigmas=None), auto_kernel=True) e_model = e_model.normalised_miller_dev_eps.f_sq_as_f() f_obs_w, f_obs_t = f_obs.select(~flag.data()), f_obs.select(flag.data()) f_model_w, f_model_t = f_model.select(~flag.data()), f_model.select(flag.data()) e_obs_w, e_obs_t = e_obs.select(~flag.data()), e_obs.select(flag.data()) e_model_w, e_model_t = e_model.select(~flag.data()), e_model.select(flag.data()) r_work = calc_r(f_obs_w, f_model_w, scale.select(~flag.data())) r_free = calc_r(f_obs_t, f_model_t, scale.select(flag.data())) cc_work_E = calc_cc(e_obs_w, e_model_w, False) cc_free_E = calc_cc(e_obs_t, e_model_t, False) #cc_work_E2 = calc_cc(e_obs_w, e_model_w, True) #cc_free_E2 = calc_cc(e_obs_t, e_model_t, True) se = calc_sigmaa(f_obs, f_model, flag) sigmaa = flex.mean(se.sigmaa().data()) fom = flex.mean(se.fom().data()) print ("%"+str(maxlen_f)+"s %.4f %.4f % 7.4f % 7.4f %.4e %.4e %.3e %.3e") % (f, r_work, r_free, cc_work_E, cc_free_E, sigmaa, fom, k, B)
def __init__( self, model, fmodels, target_weights, individual_adp_params, adp_restraints_params, h_params, log, all_params, nproc=None): adopt_init_args(self, locals()) d_min = fmodels.fmodel_xray().f_obs().d_min() # initialize with defaults... if(target_weights is not None): import mmtbx.refinement.weights_params wcp = mmtbx.refinement.weights_params.tw_customizations_params.extract() for w_s_c in wcp.weight_selection_criteria: if(d_min >= w_s_c.d_min and d_min < w_s_c.d_max): r_free_range_width = w_s_c.r_free_range_width r_free_r_work_gap = w_s_c.r_free_minus_r_work mean_diff_b_iso_bonded_fraction = w_s_c.mean_diff_b_iso_bonded_fraction min_diff_b_iso_bonded = w_s_c.min_diff_b_iso_bonded break # ...then customize wsc = all_params.target_weights.weight_selection_criteria if(wsc.r_free_minus_r_work is not None): r_free_r_work_gap = wsc.r_free_minus_r_work if(wsc.r_free_range_width is not None): r_free_range_width = wsc.r_free_range_width if(wsc.mean_diff_b_iso_bonded_fraction is not None): mean_diff_b_iso_bonded_fraction = wsc.mean_diff_b_iso_bonded_fraction if(wsc.min_diff_b_iso_bonded is not None): min_diff_b_iso_bonded = wsc.min_diff_b_iso_bonded # print_statistics.make_sub_header(text="Individual ADP refinement", out = log) assert fmodels.fmodel_xray().xray_structure is model.get_xray_structure() # fmodels.create_target_functors() assert approx_equal(self.fmodels.fmodel_xray().target_w(), self.fmodels.target_functor_result_xray( compute_gradients=False).target_work()) rw = flex.double() rf = flex.double() rfrw = flex.double() deltab = flex.double() w = flex.double() if(self.target_weights is not None): fmth =" R-FACTORS <Bi-Bj> <B> WEIGHT TARGETS" print(fmth, file=self.log) print(" work free delta data restr", file=self.log) else: print("Unresrained refinement...", file=self.log) self.save_scatterers = self.fmodels.fmodel_xray().xray_structure.\ deep_copy_scatterers().scatterers() if(self.target_weights is not None): default_weight = self.target_weights.adp_weights_result.wx*\ self.target_weights.adp_weights_result.wx_scale if(self.target_weights.twp.optimize_adp_weight): wx_scale = [0.03,0.125,0.5,1.,1.5,2.,2.5,3.,3.5,4.,4.5,5.] trial_weights = list( flex.double(wx_scale)*self.target_weights.adp_weights_result.wx ) self.wx_scale = 1 else: trial_weights = [self.target_weights.adp_weights_result.wx] self.wx_scale = self.target_weights.adp_weights_result.wx_scale else: default_weight = 1 trial_weights = [1] self.wx_scale = 1 self.show(weight=default_weight) trial_results = [] if nproc is None: nproc = all_params.main.nproc parallel = False if (len(trial_weights) > 1) and ((nproc is Auto) or (nproc > 1)): parallel = True from libtbx import easy_mp stdout_and_results = easy_mp.pool_map( processes=nproc, fixed_func=self.try_weight, args=trial_weights, func_wrapper="buffer_stdout_stderr") # XXX safer for phenix GUI trial_results = [ r for so, r in stdout_and_results ] else : for weight in trial_weights: result = self.try_weight(weight, print_stats=True) trial_results.append(result) for result in trial_results : if(result is not None) and (result.r_work is not None): if (parallel): result.show(out=self.log) rw .append(result.r_work) rf .append(result.r_free) rfrw .append(result.r_gap) deltab .append(result.delta_b) w .append(result.weight) # if(len(trial_weights)>1 and rw.size()>0): # filter by rfree-rwork rw,rf,rfrw,deltab,w = self.score(rw=rw,rf=rf,rfrw=rfrw,deltab=deltab,w=w, score_target=rfrw,score_target_value=r_free_r_work_gap, secondary_target=deltab) # filter by rfree rw,rf,rfrw,deltab,w = self.score(rw=rw,rf=rf,rfrw=rfrw,deltab=deltab,w=w, score_target=rf,score_target_value=flex.min(rf)+r_free_range_width) # filter by <Bi-Bj> delta_b_target = max(min_diff_b_iso_bonded, flex.mean(self.fmodels. fmodel_xray().xray_structure.extract_u_iso_or_u_equiv()* adptbx.u_as_b(1))*mean_diff_b_iso_bonded_fraction) print(" max suggested <Bi-Bj> for this run: %7.2f"%delta_b_target, file=log) print(" max allowed Rfree-Rwork gap: %5.1f"%r_free_r_work_gap, file=log) print(" range of equivalent Rfree: %5.1f"%r_free_range_width, file=log) rw,rf,rfrw,deltab,w = self.score(rw=rw,rf=rf,rfrw=rfrw,deltab=deltab,w=w, score_target=deltab,score_target_value=delta_b_target) # select the result with lowest rfree sel = flex.sort_permutation(rf) rw,rf,rfrw,deltab,w= self.select( rw=rw,rf=rf,rfrw=rfrw,deltab=deltab,w=w,sel=sel) # w_best = w[0] rw_best = rw[0] print("Best ADP weight: %8.3f"%w_best, file=self.log) # self.target_weights.adp_weights_result.wx = w_best self.target_weights.adp_weights_result.wx_scale = 1 best_u_star = None best_u_iso = None for result in trial_results : if(abs(result.weight-w_best)<=1.e-8): best_u_star = result.u_star best_u_iso = result.u_iso break if(best_u_iso is None) : # XXX this probably shouldn't happen... self.fmodels.fmodel_xray().xray_structure.replace_scatterers( self.save_scatterers.deep_copy()) else : assert (best_u_star is not None) xrs = self.fmodels.fmodel_xray().xray_structure xrs.set_u_iso(values=best_u_iso) xrs.scatterers().set_u_star(best_u_star) new_u_iso = xrs.scatterers().extract_u_iso() assert (new_u_iso.all_eq(best_u_iso)) self.fmodels.update_xray_structure( xray_structure = self.fmodels.fmodel_xray().xray_structure, update_f_calc = True) print("Accepted refinement result:", file=self.log) # reset alpha/beta parameters - if this is not done, the assertion # below will fail fmodels.create_target_functors() if(self.fmodels.fmodel_neutron() is None): assert approx_equal(self.fmodels.fmodel_xray().r_work()*100, rw_best, eps=0.001) # this needs to be done again again, just in case fmodels.create_target_functors() self.show(weight=w_best) self.fmodels.fmodel_xray().xray_structure.tidy_us() self.fmodels.update_xray_structure( xray_structure = self.fmodels.fmodel_xray().xray_structure, update_f_calc = True) fmodels.create_target_functors() assert approx_equal(self.fmodels.fmodel_xray().target_w(), self.fmodels.target_functor_result_xray( compute_gradients=False).target_work()) self.model.set_xray_structure(self.fmodels.fmodel_xray().xray_structure)
def __init__(self, obs, unobstructed, params, out=None, n_bins=12): if out == None: import sys out = sys.stdout from libtbx.str_utils import format_value self.params = params self.out = out self.obs = obs obs.setup_binner(n_bins=n_bins) attenuated = obs.select(~unobstructed) unattenuated = obs.select(unobstructed) attenuated.use_binning_of(obs) unattenuated.use_binning_of(obs) self.result = [] counts_given = obs.binner().counts_given() counts_complete = obs.binner().counts_complete() counts_given_attenuated = attenuated.binner().counts_given() counts_given_unattenuated = unattenuated.binner().counts_given() for i_bin in obs.binner().range_used(): sel_w = obs.binner().selection(i_bin) sel_fo_all = obs.select(sel_w) d_max_, d_min_ = sel_fo_all.d_max_min() d_range = obs.binner().bin_legend(i_bin=i_bin, show_bin_number=False, show_counts=True) sel_data = obs.select(sel_w).data() sel_sig = obs.select(sel_w).sigmas() sel_unatten_w = unattenuated.binner().selection(i_bin) sel_unatten_data = unattenuated.select(sel_unatten_w).data() sel_unatten_sig = unattenuated.select(sel_unatten_w).sigmas() sel_atten_w = attenuated.binner().selection(i_bin) sel_atten_data = attenuated.select(sel_atten_w).data() sel_atten_sig = attenuated.select(sel_atten_w).sigmas() if len(sel_unatten_data) > 0: unatten_mean_I = flex.mean(sel_unatten_data) unatten_mean_I_sigI = flex.mean(sel_unatten_data / sel_unatten_sig) else: unatten_mean_I = 0 unatten_mean_I_sigI = 0 if len(sel_atten_data) > 0: atten_mean_I = flex.mean(sel_atten_data) atten_mean_I_sigI = flex.mean(sel_atten_data / sel_atten_sig) else: atten_mean_I = 0 atten_mean_I_sigI = 0 if (sel_data.size() > 0): bin = resolution_bin( i_bin=i_bin, d_range=d_range, mean_I=flex.mean(sel_data), n_work=sel_data.size(), mean_I_sigI=flex.mean(sel_data / sel_sig), d_max_min=(d_max_, d_min_), completeness=(counts_given[i_bin], counts_complete[i_bin]), given_unatten=counts_given_unattenuated[i_bin], unatten_mean_I=unatten_mean_I, unatten_mean_I_sigI=unatten_mean_I_sigI, given_atten=counts_given_attenuated[i_bin], atten_mean_I=atten_mean_I, atten_mean_I_sigI=atten_mean_I_sigI, ) self.result.append(bin) self.set_limits(unobstructed) print( "\n Bin Resolution Range Compl. <I> <I/sig(I)> Unobstructed <I> <I/sig(I)> Obstructed <I> <I/sig(I)>", file=out) for bin in self.result: fmt = " %s %s %s %s%s %s %s %s%s %s %s %s%s" print(fmt % ( format_value("%3d", bin.i_bin), format_value("%-17s", bin.d_range), format_value("%8.1f", bin.mean_I), format_value("%8.2f", bin.mean_I_sigI), format_value("%1s", getattr(bin, "limit", " ")), format_value("%6d", bin.given_unatten), format_value("%8.1f", bin.unatten_mean_I), format_value("%8.2f", bin.unatten_mean_I_sigI), format_value("%1s", getattr(bin, "unatten_limit", " ")), format_value("%6d", bin.given_atten), format_value("%8.1f", bin.atten_mean_I), format_value("%8.2f", bin.atten_mean_I_sigI), format_value("%1s", getattr(bin, "atten_limit", " ")), ), file=out)
def exercise(space_group_info, n_elements = 10, table = "wk1995", d_min = 2.0, k_sol = 0.35, b_sol = 45.0, b_cart = None, quick=False, verbose=0): xray_structure = random_structure.xray_structure( space_group_info = space_group_info, elements =(("O","N","C")*(n_elements//3+1))[:n_elements], volume_per_atom = 100, min_distance = 1.5, general_positions_only = True, random_u_iso = False, random_occupancy = False) xray_structure.scattering_type_registry(table = table) sg = xray_structure.space_group() uc = xray_structure.unit_cell() u_cart_1 = adptbx.random_u_cart(u_scale=5, u_min=5) u_star_1 = adptbx.u_cart_as_u_star(uc, u_cart_1) b_cart = adptbx.u_star_as_u_cart(uc, sg.average_u_star(u_star = u_star_1)) for anomalous_flag in [False, True]: scatterers = xray_structure.scatterers() if (anomalous_flag): assert scatterers.size() >= 7 for i in [1,7]: scatterers[i].fp = -0.2 scatterers[i].fdp = 5 have_non_zero_fdp = True else: for i in [1,7]: scatterers[i].fp = 0 scatterers[i].fdp = 0 have_non_zero_fdp = False f_obs = abs(xray_structure.structure_factors( d_min = d_min, anomalous_flag = anomalous_flag, cos_sin_table = sfg_params.cos_sin_table, algorithm = sfg_params.algorithm).f_calc()) f_obs_comp = f_obs.structure_factors_from_scatterers( xray_structure = xray_structure, algorithm = sfg_params.algorithm, cos_sin_table = sfg_params.cos_sin_table).f_calc() f_obs = abs(f_obs_comp) flags = f_obs.generate_r_free_flags(fraction = 0.1, max_free = 99999999) #flags = flags.array(data = flex.bool(f_obs.data().size(), False)) xrs = xray_structure.deep_copy_scatterers() xrs.shake_sites_in_place(rms_difference=0.3) for target in mmtbx.refinement.targets.target_names: if (quick): if (target not in ["ls_wunit_k1", "ml", "mlhl", "ml_sad"]): continue if (target == "mlhl"): if (have_non_zero_fdp): continue # XXX gradients not correct! experimental_phases = generate_random_hl(miller_set=f_obs) else: experimental_phases = None if (target == "ml_sad" and (not anomalous_flag or mmtbx.refinement.targets.phaser is None)): continue print " ",target xray.set_scatterer_grad_flags( scatterers = xrs.scatterers(), site = True) ss = 1./flex.pow2(f_obs.d_spacings().data()) / 4. u_star = adptbx.u_cart_as_u_star( f_obs.unit_cell(), adptbx.b_as_u(b_cart)) k_anisotropic = mmtbx.f_model.ext.k_anisotropic( f_obs.indices(), u_star) k_mask = mmtbx.f_model.ext.k_mask(ss, k_sol, b_sol) fmodel = mmtbx.f_model.manager( xray_structure = xrs, f_obs = f_obs, r_free_flags = flags, target_name = target, abcd = experimental_phases, sf_and_grads_accuracy_params = sfg_params, k_mask = k_mask, k_anisotropic = k_anisotropic, mask_params = masks.mask_master_params.extract()) fmodel.update_xray_structure( xray_structure=xrs, update_f_calc=True, update_f_mask=True) xray.set_scatterer_grad_flags( scatterers=fmodel.xray_structure.scatterers(), site=True) fmodel.update_xray_structure(update_f_calc=True) t_f = fmodel.target_functor() t_f.prepare_for_minimization() gs = t_f(compute_gradients=True).d_target_d_site_cart().as_double() gfd = finite_differences_site(target_functor=t_f) cc = flex.linear_correlation(gs, gfd).coefficient() if (0 or verbose): print "ana:", list(gs) print "fin:", list(gfd) print "rat:", [f/a for a,f in zip(gs,gfd)] print target, "corr:", cc, space_group_info print diff = gs - gfd diff /= max(1, flex.max(flex.abs(gfd))) tolerance = 1.2e-5 assert approx_equal(abs(flex.min(diff) ), 0.0, tolerance) assert approx_equal(abs(flex.mean(diff)), 0.0, tolerance) assert approx_equal(abs(flex.max(diff) ), 0.0, tolerance) assert approx_equal(cc, 1.0, tolerance) fmodel.model_error_ml()
def collect(self, model, fmodel, step, wilson_b=None, rigid_body_shift_accumulator=None): global time_collect_and_process t1 = time.time() if (self.sites_cart_start is None): self.sites_cart_start = model.get_sites_cart() sites_cart_curr = model.get_sites_cart() if (sites_cart_curr.size() == self.sites_cart_start.size()): self.shifts.append( flex.mean( flex.sqrt( (self.sites_cart_start - sites_cart_curr).dot()))) else: self.shifts.append("n/a") if (wilson_b is not None): self.wilson_b = wilson_b self.steps.append(step) self.r_works.append(fmodel.r_work()) self.r_frees.append(fmodel.r_free()) use_amber = False if hasattr(self.params, "amber"): # loaded amber scope use_amber = self.params.amber.use_amber self.is_amber_monitor = use_amber use_afitt = False if hasattr(self.params, "afitt"): # loaded amber scope use_afitt = self.params.afitt.use_afitt general_selection = None if use_afitt: from mmtbx.geometry_restraints import afitt general_selection = afitt.get_non_afitt_selection( model.restraints_manager, model.get_sites_cart(), model.get_hd_selection(), None) geom = model.geometry_statistics() if (geom is not None): self.geom.bonds.append(geom.bond().mean) self.geom.angles.append(geom.angle().mean) hd_sel = None if (not self.neutron_refinement and not self.is_neutron_monitor): hd_sel = model.get_hd_selection() b_isos = model.get_xray_structure().extract_u_iso_or_u_equiv( ) * math.pi**2 * 8 if (hd_sel is not None): b_isos = b_isos.select(~hd_sel) self.bs_iso_max_a.append(flex.max_default(b_isos, 0)) self.bs_iso_min_a.append(flex.min_default(b_isos, 0)) self.bs_iso_ave_a.append(flex.mean_default(b_isos, 0)) self.n_solv.append(model.number_of_ordered_solvent_molecules()) if (len(self.geom.bonds) > 0): if ([self.bond_start, self.angle_start].count(None) == 2): if (len(self.geom.bonds) > 0): self.bond_start = self.geom.bonds[0] self.angle_start = self.geom.angles[0] if (len(self.geom.bonds) > 0): self.bond_final = self.geom.bonds[len(self.geom.bonds) - 1] self.angle_final = self.geom.angles[len(self.geom.angles) - 1] elif (len(self.geom) == 1): self.bond_final = self.geom.bonds[0] self.angle_final = self.geom.angles[0] if (rigid_body_shift_accumulator is not None): self.rigid_body_shift_accumulator = rigid_body_shift_accumulator t2 = time.time() time_collect_and_process += (t2 - t1) self.call_back(model, fmodel, method=step)
def __init__(self, xray_structure, step, volume_cutoff=None, mean_diff_map_threshold=None, compute_whole=False, largest_only=False, wrapping=True, f_obs=None, r_sol=1.1, r_shrink=0.9, f_calc=None, log=None, write_masks=False): adopt_init_args(self, locals()) # self.d_spacings = f_obs.d_spacings().data() self.sel_3inf = self.d_spacings >= 3 self.miller_array = f_obs.select(self.sel_3inf) # self.crystal_symmetry = self.xray_structure.crystal_symmetry() # compute mask in p1 (via ASU) self.crystal_gridding = maptbx.crystal_gridding( unit_cell=xray_structure.unit_cell(), space_group_info=xray_structure.space_group_info(), symmetry_flags=maptbx.use_space_group_symmetry, step=step) self.n_real = self.crystal_gridding.n_real() # XXX Where do we want to deal with H and occ==0? mask_p1 = mmtbx.masks.mask_from_xray_structure( xray_structure=xray_structure, p1=True, for_structure_factors=True, solvent_radius=r_sol, shrink_truncation_radius=r_shrink, n_real=self.n_real, in_asu=False).mask_data maptbx.unpad_in_place(map=mask_p1) self.f_mask_whole = None if (compute_whole): mask = asu_map_ext.asymmetric_map( xray_structure.crystal_symmetry().space_group().type(), mask_p1).data() self.f_mask_whole = self._inflate( self.miller_array.structure_factors_from_asu_map( asu_map_data=mask, n_real=self.n_real)) self.solvent_content = 100. * mask_p1.count(1) / mask_p1.size() if (write_masks): write_map_file(crystal_symmetry=xray_structure.crystal_symmetry(), map_data=mask_p1, file_name="mask_whole.mrc") # conn analysis co = maptbx.connectivity(map_data=mask_p1, threshold=0.01, preprocess_against_shallow=False, wrapping=wrapping) co.merge_symmetry_related_regions( space_group=xray_structure.space_group()) del mask_p1 self.conn = co.result().as_double() z = zip(co.regions(), range(0, co.regions().size())) sorted_by_volume = sorted(z, key=lambda x: x[0], reverse=True) # f_mask_data_0 = flex.complex_double(f_obs.data().size(), 0) f_mask_data = flex.complex_double(f_obs.data().size(), 0) self.FV = OrderedDict() self.mc = None diff_map = None mean_diff_map = None self.regions = OrderedDict() self.f_mask_0 = None self.f_mask = None small_selection = None weak_selection = None # if (log is not None): print(" # volume_p1 uc(%) mFo-DFc: min,max,mean,sd", file=log) # for i_seq, p in enumerate(sorted_by_volume): v, i = p # skip macromolecule if (i == 0): continue # skip small volume volume = v * step**3 uc_fraction = v * 100. / self.conn.size() if (volume_cutoff is not None): if (volume < volume_cutoff and volume >= 10): if (small_selection is None): small_selection = self.conn == i else: small_selection = small_selection | (self.conn == i) continue if volume < volume_cutoff: continue self.regions[i_seq] = group_args(id=i, i_seq=i_seq, volume=volume, uc_fraction=uc_fraction) selection = self.conn == i mask_i_asu = self.compute_i_mask_asu(selection=selection, volume=volume) if (uc_fraction >= 1): f_mask_i = self.compute_f_mask_i(mask_i_asu) f_mask_data_0 += f_mask_i.data() elif (largest_only): break if (uc_fraction < 1 and diff_map is None): diff_map = self.compute_diff_map(f_mask_data=f_mask_data_0) mi, ma, me, sd = None, None, None, None if (diff_map is not None): blob = diff_map.select(selection.iselection()) mean_diff_map = flex.mean( diff_map.select(selection.iselection())) mi, ma, me = flex.min(blob), flex.max(blob), flex.mean(blob) sd = blob.sample_standard_deviation() if (log is not None): print("%3d" % i_seq, "%12.3f" % volume, "%8.4f" % round(uc_fraction, 4), "%7s" % str(None) if diff_map is None else "%7.3f %7.3f %7.3f %7.3f" % (mi, ma, me, sd), file=log) if (mean_diff_map_threshold is not None and mean_diff_map is not None and mean_diff_map <= mean_diff_map_threshold and mean_diff_map > 0.1): if (weak_selection is None): weak_selection = self.conn == i else: weak_selection = weak_selection | (self.conn == i) if (mean_diff_map_threshold is not None and mean_diff_map is not None and mean_diff_map <= mean_diff_map_threshold): continue f_mask_i = self.compute_f_mask_i(mask_i_asu) f_mask_data += f_mask_i.data() self.FV[f_mask_i] = [round(volume, 3), round(uc_fraction, 1)] ##### # Determine number of secondary regions. Must happen here! self.n_regions = len(self.FV.values()) self.do_mosaic = False if (self.n_regions > 1): self.do_mosaic = True # Handle accumulation of small if (small_selection is not None and self.do_mosaic): v = small_selection.count(True) volume = v * step**3 uc_fraction = v * 100. / self.conn.size() mask_i = flex.double(flex.grid(self.n_real), 0) mask_i = mask_i.set_selected(small_selection, 1) diff_map = diff_map.set_selected(diff_map < 0, 0) #diff_map = diff_map.set_selected(diff_map>0,1) mx = flex.mean(diff_map.select((diff_map > 0).iselection())) diff_map = diff_map / mx mask_i = mask_i * diff_map mask_i_asu = asu_map_ext.asymmetric_map( self.crystal_symmetry.space_group().type(), mask_i).data() f_mask_i = self.compute_f_mask_i(mask_i_asu) self.FV[f_mask_i] = [round(volume, 3), round(uc_fraction, 1)] if (weak_selection is not None and self.do_mosaic): v = weak_selection.count(True) volume = v * step**3 uc_fraction = v * 100. / self.conn.size() mask_i = flex.double(flex.grid(self.n_real), 0) mask_i = mask_i.set_selected(weak_selection, 1) diff_map = diff_map.set_selected(diff_map < 0, 0) #diff_map = diff_map.set_selected(diff_map>0,1) mx = flex.mean(diff_map.select((diff_map > 0).iselection())) diff_map = diff_map / mx mask_i = mask_i * diff_map mask_i_asu = asu_map_ext.asymmetric_map( self.crystal_symmetry.space_group().type(), mask_i).data() f_mask_i = self.compute_f_mask_i(mask_i_asu) self.FV[f_mask_i] = [round(volume, 3), round(uc_fraction, 1)] ##### if (self.do_mosaic): self.f_mask_0 = f_obs.customized_copy(data=f_mask_data_0) self.f_mask = f_obs.customized_copy(data=f_mask_data)
def get_mean_sigI(self): return flex.mean(self.sigI_merge) if self.get_size() else 0
def __init__(self, **kwargs): group_args.__init__(self, **kwargs) print('finished Dij, now calculating rho_i and density') from xfel.clustering import Rodriguez_Laio_clustering_2014 as RL R = RL(distance_matrix=self.Dij, d_c=self.d_c) #from clustering.plot_with_dimensional_embedding import plot_with_dimensional_embedding #plot_with_dimensional_embedding(1-self.Dij/flex.max(self.Dij), show_plot=True) if hasattr(self, 'strategy') is False: self.strategy = 'default' self.rho = rho = R.get_rho() ave_rho = flex.mean(rho.as_double()) NN = self.Dij.focus()[0] i_max = flex.max_index(rho) delta_i_max = flex.max( flex.double([self.Dij[i_max, j] for j in range(NN)])) rho_order = flex.sort_permutation(rho, reverse=True) rho_order_list = list(rho_order) self.delta = delta = R.get_delta(rho_order=rho_order, delta_i_max=delta_i_max) cluster_id = flex.int(NN, -1) # -1 means no cluster delta_order = flex.sort_permutation(delta, reverse=True) MAX_PERCENTILE_RHO = self.max_percentile_rho # cluster centers have to be in the top percentile n_cluster = 0 # # print('Z_DELTA = ', self.Z_delta) pick_top_solution = False rho_stdev = flex.mean_and_variance( rho.as_double()).unweighted_sample_standard_deviation() delta_stdev = flex.mean_and_variance( delta).unweighted_sample_standard_deviation() if rho_stdev != 0.0 and delta_stdev != 0: rho_z = (rho.as_double() - flex.mean(rho.as_double())) / (rho_stdev) delta_z = (delta - flex.mean(delta)) / (delta_stdev) else: pick_top_solution = True if rho_stdev == 0.0: centroids = [flex.first_index(delta, flex.max(delta))] elif delta_stdev == 0.0: centroids = [flex.first_index(rho, flex.max(rho))] significant_delta = [] significant_rho = [] # Define strategy to decide cluster center here. Only one should be true debug_fix_clustering = True if self.strategy == 'one_cluster': debug_fix_clustering = False strategy2 = True if self.strategy == 'strategy_3': debug_fix_clustering = False strategy3 = True strategy2 = False if debug_fix_clustering: if not pick_top_solution: delta_z_cutoff = min(1.0, max(delta_z)) rho_z_cutoff = min(1.0, max(rho_z)) for ic in range(NN): # test the density & rho if delta_z[ic] >= delta_z_cutoff or delta_z[ ic] <= -delta_z_cutoff: significant_delta.append(ic) if rho_z[ic] >= rho_z_cutoff or rho_z[ic] <= -rho_z_cutoff: significant_rho.append(ic) if True: # Use idea quoted in Rodriguez Laio 2014 paper # " Thus, cluster centers are recognized as points for which the value of delta is anomalously large." centroid_candidates = list(significant_delta) candidate_delta_z = flex.double() for ic in centroid_candidates: if ic == rho_order[0]: delta_z_of_rho_order_0 = delta_z[ic] candidate_delta_z.append(delta_z[ic]) i_sorted = flex.sort_permutation(candidate_delta_z, reverse=True) # Check that once sorted the top one is not equal to the 2nd or 3rd position # If there is a tie, assign centroid to the first one in rho order centroids = [] # rho_order[0] has to be a centroid centroids.append(rho_order[0]) #centroids.append(centroid_candidates[i_sorted[0]]) for i in range(0, len(i_sorted[:])): if centroid_candidates[i_sorted[i]] == rho_order[0]: continue if delta_z_of_rho_order_0 - candidate_delta_z[ i_sorted[i]] > 1.0: if i > 1: if -candidate_delta_z[i_sorted[ i - 1]] + candidate_delta_z[ i_sorted[0]] > 1.0: centroids.append( centroid_candidates[i_sorted[i]]) else: centroids.append( centroid_candidates[i_sorted[i]]) else: break if False: centroid_candidates = list( set(significant_delta).intersection( set(significant_rho))) # Now compare the relative orders of the max delta_z and max rho_z to make sure they are within 1 stdev centroids = [] max_delta_z_candidates = -999.9 max_rho_z_candidates = -999.9 for ic in centroid_candidates: if delta_z[ic] > max_delta_z_candidates: max_delta_z_candidates = delta_z[ic] if rho_z[ic] > max_rho_z_candidates: max_rho_z_candidates = rho_z[ic] for ic in centroid_candidates: if max_delta_z_candidates - delta_z[ ic] < 1.0 and max_rho_z_candidates - rho_z[ ic] < 1.0: centroids.append(ic) #item_idxs = [delta_order[ic] for ic,centroid in enumerate(centroids)] item_idxs = centroids for item_idx in item_idxs: cluster_id[item_idx] = n_cluster print('CLUSTERING_STATS', item_idx, cluster_id[item_idx]) n_cluster += 1 #### elif strategy2: # Go through list of clusters, see which one has highest joint rank in both rho and delta lists # This will only assign one cluster center based on highest product of rho and delta ranks product_list_of_ranks = [] for ic in range(NN): rho_tmp = self.rho[ic] delta_tmp = self.delta[ic] product_list_of_ranks.append(rho_tmp * delta_tmp) import numpy as np item_idx = np.argmax(product_list_of_ranks) cluster_id[item_idx] = n_cluster # Only cluster assigned print('CLUSTERING_STATS', item_idx, cluster_id[item_idx]) n_cluster += 1 elif strategy3: # use product of delta and rho and pick out top candidates # have to use a significance z_score to filter out the very best product_list_of_ranks = flex.double() for ic in range(NN): rho_tmp = self.rho[ic] delta_tmp = self.delta[ic] product_list_of_ranks.append(rho_tmp * delta_tmp) import numpy as np iid_sorted = flex.sort_permutation(product_list_of_ranks, reverse=True) cluster_id[ iid_sorted[0]] = n_cluster # first point always a cluster n_cluster += 1 print('CLUSTERING_STATS S3', iid_sorted[0], cluster_id[iid_sorted[0]]) #product_list_of_ranks[iid_sorted[0]]=0.0 # set this to 0.0 so that the mean/stdev does not get biased by one point stdev = np.std(product_list_of_ranks) mean = np.mean(product_list_of_ranks) n_sorted = 3 #if stdev == 0.0: # n_sorted=1 z_critical = 3.0 # 2 sigma significance ? # Only go through say 3-4 datapoints # basically there won't be more than 2-3 lattices on an image realistically for iid in iid_sorted[1:n_sorted]: z_score = (product_list_of_ranks[iid] - mean) / stdev if z_score > z_critical: cluster_id[iid] = n_cluster n_cluster += 1 print('CLUSTERING_STATS S3', iid, cluster_id[iid]) else: break # No point going over all points once below threshold z_score else: for ic in range(NN): item_idx = delta_order[ic] if ic != 0: if delta[item_idx] <= 0.25 * delta[ delta_order[0]]: # too low to be a medoid continue item_rho_order = rho_order_list.index(item_idx) if (item_rho_order) / NN < MAX_PERCENTILE_RHO: cluster_id[item_idx] = n_cluster print('CLUSTERING_STATS', ic, item_idx, item_rho_order, cluster_id[item_idx]) n_cluster += 1 ### # print('Found %d clusters' % n_cluster) for x in range(NN): if cluster_id[x] >= 0: print("XC", x, cluster_id[x], rho[x], delta[x]) self.cluster_id_maxima = cluster_id.deep_copy() R.cluster_assignment(rho_order, cluster_id, rho) self.cluster_id_full = cluster_id.deep_copy() #halo = flex.bool(NN,False) #border = R.get_border( cluster_id = cluster_id ) #for ic in range(n_cluster): #loop thru all border regions; find highest density # this_border = (cluster_id == ic) & (border==True) # if this_border.count(True)>0: # highest_density = flex.max(rho.select(this_border)) # halo_selection = (rho < highest_density) & (this_border==True) # if halo_selection.count(True)>0: # cluster_id.set_selected(halo_selection,-1) # core_selection = (cluster_id == ic) & ~halo_selection # highest_density = flex.max(rho.select(core_selection)) # too_sparse = core_selection & (rho.as_double() < highest_density/10.) # another heuristic # if too_sparse.count(True)>0: # cluster_id.set_selected(too_sparse,-1) self.cluster_id_final = cluster_id.deep_copy()
def calculate_fsc(si=None, f_array=None, # just used for binner map_coeffs=None, model_map_coeffs=None, first_half_map_coeffs=None, second_half_map_coeffs=None, resolution=None, fraction_complete=None, min_fraction_complete=None, is_model_based=None, cc_cut=None, scale_using_last=None, max_cc_for_rescale=None, pseudo_likelihood=False, skip_scale_factor=False, verbose=None, out=sys.stdout): # calculate anticipated fall-off of model data with resolution if si.rmsd is None and is_model_based: si.rmsd=resolution*si.rmsd_resolution_factor print("Setting rmsd to %5.1f A based on resolution of %5.1f A" %( si.rmsd,resolution), file=out) # get f and model_f vs resolution and FSC vs resolution and apply # scale to f_array and return sharpened map dsd = f_array.d_spacings().data() from cctbx.maptbx.segment_and_split_map import map_coeffs_to_fp if is_model_based: mc1=map_coeffs mc2=model_map_coeffs fo_map=map_coeffs # scale map_coeffs to model_map_coeffs*FSC fc_map=model_map_coeffs b_eff=get_b_eff(si=si,out=out) else: # half_dataset mc1=first_half_map_coeffs mc2=second_half_map_coeffs fo_map=map_coeffs # scale map_coeffs to cc* fc_map=model_map_coeffs b_eff=None ratio_list=flex.double() target_sthol2=flex.double() cc_list=flex.double() sthol_list=flex.double() d_min_list=flex.double() rms_fo_list=flex.double() rms_fc_list=flex.double() max_possible_cc=None for i_bin in f_array.binner().range_used(): sel = f_array.binner().selection(i_bin) d = dsd.select(sel) if d.size()<1: raise Sorry("Please reduce number of bins (no data in bin "+ "%s) from current value of %s" %(i_bin,f_array.binner().n_bins_used())) d_min = flex.min(d) d_max = flex.max(d) d_avg = flex.mean(d) n = d.size() m1 = mc1.select(sel) m2 = mc2.select(sel) cc = m1.map_correlation(other = m2) if fo_map: fo = fo_map.select(sel) f_array_fo=map_coeffs_to_fp(fo) rms_fo=f_array_fo.data().norm() else: rms_fo=1. if fc_map: fc = fc_map.select(sel) f_array_fc=map_coeffs_to_fp(fc) rms_fc=f_array_fc.data().norm() else: rms_fc=1. sthol2=0.25/d_avg**2 ratio_list.append(max(1.e-10,rms_fc)/max(1.e-10,rms_fo)) target_sthol2.append(sthol2) if cc is None: cc=0. cc_list.append(cc) sthol_list.append(1/d_avg) d_min_list.append(d_min) rms_fo_list.append(rms_fo) rms_fc_list.append(rms_fc) if b_eff is not None: max_cc_estimate=cc* math.exp(min(20.,sthol2*b_eff)) else: max_cc_estimate=cc max_cc_estimate=max(0.,min(1.,max_cc_estimate)) if max_possible_cc is None or ( max_cc_estimate > 0 and max_cc_estimate > max_possible_cc): max_possible_cc=max_cc_estimate if verbose: print("d_min: %5.1f FC: %7.1f FOBS: %7.1f CC: %5.2f" %( d_avg,rms_fc,rms_fo,cc), file=out) if scale_using_last: # rescale to give final value average==0 cc_list,baseline=rescale_cc_list( cc_list=cc_list,scale_using_last=scale_using_last, max_cc_for_rescale=max_cc_for_rescale) if baseline is None: # don't use it scale_using_last=None original_cc_list=deepcopy(cc_list) if not is_model_based: # calculate cc* for half-dataset cc cc_list=estimate_cc_star(cc_list=cc_list,sthol_list=sthol_list, cc_cut=cc_cut,scale_using_last=scale_using_last) if not max_possible_cc: max_possible_cc=0.01 if si.target_scale_factors: # not using these max_possible_cc=1. fraction_complete=1. elif (not is_model_based): max_possible_cc=1. fraction_complete=1. else: # Define overall CC based on model completeness (CC=sqrt(fraction_complete)) if fraction_complete is None: fraction_complete=max_possible_cc**2 print("Estimated fraction complete is %5.2f based on low_res CC of %5.2f" %( fraction_complete,max_possible_cc), file=out) else: print("Using fraction complete value of %5.2f " %(fraction_complete), file=out) max_possible_cc=fraction_complete**0.5 target_scale_factors=flex.double() for i_bin in f_array.binner().range_used(): index=i_bin-1 ratio=ratio_list[index] cc=cc_list[index] sthol2=target_sthol2[index] d_min=d_min_list[index] corrected_cc=max(0.00001,min(1.,cc/max_possible_cc)) if (not is_model_based): # cc is already cc* scale_on_fo=ratio * corrected_cc elif b_eff is not None: if pseudo_likelihood: scale_on_fo=(cc/max(0.001,1-cc**2)) else: # usual scale_on_fo=ratio * min(1., max(0.00001,corrected_cc) * math.exp(min(20.,sthol2*b_eff)) ) else: scale_on_fo=ratio * min(1.,max(0.00001,corrected_cc)) target_scale_factors.append(scale_on_fo) if not pseudo_likelihood and not skip_scale_factor: # normalize scale_factor=1./target_scale_factors.min_max_mean().max target_scale_factors=\ target_scale_factors*scale_factor print("Scale factor A: %.5f" %(scale_factor), file=out) if fraction_complete < min_fraction_complete: print("\nFraction complete (%5.2f) is less than minimum (%5.2f)..." %( fraction_complete,min_fraction_complete) + "\nSkipping scaling", file=out) target_scale_factors=flex.double(target_scale_factors.size()*(1.0,)) print ("\nAverage CC: %.3f" %(cc_list.min_max_mean().mean),file=out) print("\nScale factors vs resolution:", file=out) print("Note 1: CC* estimated from sqrt(2*CC/(1+CC))", file=out) print("Note 2: CC estimated by fitting (smoothing) for values < %s" %(cc_cut), file=out) print("Note 3: Scale = A CC* rmsFc/rmsFo (A is normalization)", file=out) print(" d_min rmsFo rmsFc CC CC* Scale", file=out) for sthol2,scale,rms_fo,cc,rms_fc,orig_cc in zip( target_sthol2,target_scale_factors,rms_fo_list,cc_list,rms_fc_list, original_cc_list): print("%7.2f %9.1f %9.1f %7.3f %7.3f %5.2f" %( 0.5/sthol2**0.5,rms_fo,rms_fc,orig_cc,cc,scale), file=out) si.target_scale_factors=target_scale_factors si.target_sthol2=target_sthol2 si.d_min_list=d_min_list si.cc_list=cc_list return si
def run_correction_vector_plot(working_phil): L = lines(working_phil) for line in L.vectors(): pass # pull out the information, lines class does all the work close_x = flex.double() close_y = flex.double() far_x = flex.double() far_y = flex.double() master_coords = L.master_coords master_cv = L.master_cv master_tiles = L.master_tiles for idx in range(0, len(master_coords), 10): if matrix.col( master_cv[idx]).length() < L.tile_rmsd[master_tiles[idx]]: pass #close_x.append(master_coords[idx][0]) #close_y.append(master_coords[idx][1]) else: far_x.append(master_coords[idx][0]) far_y.append(master_coords[idx][1]) close_x.append(master_coords[idx][0] + master_cv[idx][0]) close_y.append(master_coords[idx][1] + master_cv[idx][1]) if working_phil.show_plots is True: from matplotlib import pyplot as plt plt.plot(close_x, close_y, "r.") plt.plot(far_x, far_y, "g.") plt.axes().set_aspect("equal") plt.show() sort_radii = flex.sort_permutation(flex.double(L.radii)) tile_rmsds = flex.double() radial_sigmas = flex.double(64) tangen_sigmas = flex.double(64) for idx in range(64): x = sort_radii[idx] print "Tile %2d: radius %7.2f, %6d observations, delx %5.2f dely %5.2f, rmsd = %5.2f" % ( x, L.radii[x], L.tilecounts[x], L.mean_cv[x][0], L.mean_cv[x][1], L.tile_rmsd[x]), if L.tilecounts[x] < 3: print radial = (0, 0) tangential = (0, 0) rmean, tmean, rsigma, tsigma = (0, 0, 1, 1) else: wtaveg = L.weighted_average_angle_deg_from_tile(x) print "Tile rotation %6.2f deg" % wtaveg, radial, tangential, rmean, tmean, rsigma, tsigma = get_radial_tangential_vectors( L, x) print "%6.2f %6.2f" % (rsigma, tsigma) radial_sigmas[x] = rsigma tangen_sigmas[x] = tsigma rstats = flex.mean_and_variance(radial_sigmas, L.tilecounts.as_double()) tstats = flex.mean_and_variance(tangen_sigmas, L.tilecounts.as_double()) print "\nOverall %8d observations, delx %5.2f dely %5.2f, rmsd = %5.2f" % ( L.overall_N, L.overall_cv[0], L.overall_cv[1], L.overall_rmsd) print "Average tile rmsd %5.2f" % flex.mean(flex.double(L.tile_rmsd)) print "Average tile displacement %5.2f" % (flex.mean( flex.double([matrix.col(cv).length() for cv in L.mean_cv]))) print "Weighted average radial sigma %6.2f" % rstats.mean() print "Weighted average tangential sigma %6.2f" % tstats.mean() if working_phil.show_plots is True: plt.plot([(L.tiles[4 * x + 0] + L.tiles[4 * x + 2]) / 2. for x in range(64)], [(L.tiles[4 * x + 1] + L.tiles[4 * x + 3]) / 2. for x in range(64)], "go") for x in range(64): plt.text(10 + (L.tiles[4 * x + 0] + L.tiles[4 * x + 2]) / 2., 10 + (L.tiles[4 * x + 1] + L.tiles[4 * x + 3]) / 2., "%d" % x) plt.show() for idx in range(64): x = sort_radii[idx] print "Tile %2d: radius %7.2f, %6d observations, delx %5.2f dely %5.2f, rmsd = %5.2f" % ( x, L.radii[x], L.tilecounts[x], L.mean_cv[x][0], L.mean_cv[x][1], L.tile_rmsd[x]), if L.tilecounts[x] < 3: print radial = (0, 0) tangential = (0, 0) rmean, tmean, rsigma, tsigma = (0, 0, 1, 1) else: wtaveg = L.weighted_average_angle_deg_from_tile(x) print "Tile rotation %6.2f deg" % wtaveg, radial, tangential, rmean, tmean, rsigma, tsigma = get_radial_tangential_vectors( L, x) print "%6.2f %6.2f" % (rsigma, tsigma) if working_phil.colormap: from pylab import imshow, axes, colorbar, show import numpy xcv, ycv = get_correction_vector_xy(L, x) _min = min(min(xcv), min(ycv)) _max = max(max(xcv), max(ycv)) hist, xedges, yedges = numpy.histogram2d(xcv, ycv, bins=40, range=[[_min, _max], [_min, _max]]) extent = [xedges[0], xedges[-1], yedges[0], yedges[-1]] imshow(hist.T, extent=extent, interpolation='nearest', origin='lower') from matplotlib.patches import Ellipse ell = Ellipse(xy=(L.mean_cv[x][0], L.mean_cv[x][1]), width=2. * rsigma, height=2. * tsigma, angle=math.atan2(-(radial[1]), -(radial[0])) * 180. / math.pi, edgecolor="y", linewidth=2, fill=False, zorder=100) axes().add_artist(ell) colorbar() show() else: from matplotlib import pyplot as plt xcv, ycv = get_correction_vector_xy(L, x) if len(xcv) == 0 or len(ycv) == 0: continue plt.plot(xcv, ycv, "r.") plt.plot([L.mean_cv[x][0]], [L.mean_cv[x][1]], "go") plt.plot([L.mean_cv[x][0] + radial[0]], [L.mean_cv[x][1] + radial[1]], "yo") plt.plot([L.mean_cv[x][0] + tangential[0]], [L.mean_cv[x][1] + tangential[1]], "bo") from matplotlib.patches import Ellipse ell = Ellipse(xy=(L.mean_cv[x][0], L.mean_cv[x][1]), width=2. * rsigma, height=2. * tsigma, angle=math.atan2(-(radial[1]), -(radial[0])) * 180. / math.pi, edgecolor="y", linewidth=2, fill=False, zorder=100) plt.axes().add_artist(ell) plt.axes().set_aspect("equal") _min = min(min(xcv), min(ycv)) _max = max(max(xcv), max(ycv)) plt.axes().set_xlim(_min, _max) plt.axes().set_ylim(_min, _max) plt.show()
def set_chunk_stats(chunk, stats, stat_choice, n_residues=None, ref_cell=None, space_group=None, d_min=None, ref_data=None): if "reslimit" in stat_choice: stats["reslimit"].append(chunk.res_lim) else: stats["reslimit"].append(float("nan")) if "pr" in stat_choice: stats["pr"].append(chunk.profile_radius) else: stats["pr"].append(float("nan")) stats["ccref"].append(float("nan")) if set(["ioversigma", "resnatsnr1", "ccref"]).intersection(stat_choice): iobs = chunk.data_array(space_group, False) iobs = iobs.select(iobs.sigmas() > 0).merge_equivalents( use_internal_variance=False).array() binner = iobs.setup_binner(auto_binning=True) if "resnatsnr1" in stat_choice: res = float("nan") for i_bin in binner.range_used(): sel = binner.selection(i_bin) tmp = iobs.select(sel) if tmp.size() == 0: continue sn = flex.mean(tmp.data() / tmp.sigmas()) if sn <= 1: res = binner.bin_d_range(i_bin)[1] break stats["resnatsnr1"].append(res) else: stats["resnatsnr1"].append(float("nan")) if d_min: iobs = iobs.resolution_filter(d_min=d_min) if "ccref" in stat_choice: corr = iobs.correlation(ref_data, assert_is_similar_symmetry=False) if corr.is_well_defined(): stats["ccref"][-1] = corr.coefficient() if "ioversigma" in stat_choice: stats["ioversigma"].append(flex.mean(iobs.data() / iobs.sigmas())) else: stats["ioversigma"].append(float("nan")) else: stats["ioversigma"].append(float("nan")) stats["resnatsnr1"].append(float("nan")) if "abdist" in stat_choice: from cctbx.uctbx.determine_unit_cell import NCDist G6a, G6b = make_G6(ref_cell), make_G6(chunk.cell) abdist = NCDist(G6a, G6b) stats["abdist"].append(abdist) else: stats["abdist"].append(float("nan")) if "wilsonb" in stat_choice: iso_scale_and_b = ml_iso_absolute_scaling(iobs, n_residues, 0) stats["wilsonb"].append(iso_scale_and_b.b_wilson) else: stats["wilsonb"].append(float("nan"))
def compute(pdb_hierarchy, unit_cell, fft_n_real, fft_m_real, map_1, map_2, detail, atom_radius, use_hydrogens, hydrogen_atom_radius): assert detail in ["atom", "residue"] results = [] for chain in pdb_hierarchy.chains(): for residue_group in chain.residue_groups(): for conformer in residue_group.conformers(): for residue in conformer.residues(): r_id_str = "%2s %1s %3s %4s %1s" % ( chain.id, conformer.altloc, residue.resname, residue.resseq, residue.icode) r_sites_cart = flex.vec3_double() r_b = flex.double() r_occ = flex.double() r_mv1 = flex.double() r_mv2 = flex.double() r_rad = flex.double() for atom in residue.atoms(): a_id_str = "%s %4s" % (r_id_str, atom.name) if (atom.element_is_hydrogen()): rad = hydrogen_atom_radius else: rad = atom_radius if (not (atom.element_is_hydrogen() and not use_hydrogens)): map_value_1 = map_1.eight_point_interpolation( unit_cell.fractionalize(atom.xyz)) map_value_2 = map_2.eight_point_interpolation( unit_cell.fractionalize(atom.xyz)) r_sites_cart.append(atom.xyz) r_b.append(atom.b) r_occ.append(atom.occ) r_mv1.append(map_value_1) r_mv2.append(map_value_2) r_rad.append(rad) if (detail == "atom"): sel = maptbx.grid_indices_around_sites( unit_cell=unit_cell, fft_n_real=fft_n_real, fft_m_real=fft_m_real, sites_cart=flex.vec3_double([atom.xyz]), site_radii=flex.double([rad])) cc = flex.linear_correlation( x=map_1.select(sel), y=map_2.select(sel)).coefficient() result = group_args(chain_id=chain.id, atom=atom, id_str=a_id_str, cc=cc, map_value_1=map_value_1, map_value_2=map_value_2, b=atom.b, occupancy=atom.occ, n_atoms=1) results.append(result) if (detail == "residue") and (len(r_mv1) > 0): sel = maptbx.grid_indices_around_sites( unit_cell=unit_cell, fft_n_real=fft_n_real, fft_m_real=fft_m_real, sites_cart=r_sites_cart, site_radii=r_rad) cc = flex.linear_correlation( x=map_1.select(sel), y=map_2.select(sel)).coefficient() result = group_args(residue=residue, chain_id=chain.id, id_str=r_id_str, cc=cc, map_value_1=flex.mean(r_mv1), map_value_2=flex.mean(r_mv2), b=flex.mean(r_b), occupancy=flex.mean(r_occ), n_atoms=r_sites_cart.size()) results.append(result) return results
def __init__(self, miller_array, parameters, out=None, n_residues=100, n_bases=0): self.params=parameters self.miller_array=miller_array.deep_copy().set_observation_type( miller_array).merge_equivalents().array() self.out = out if self.out is None: self.out = sys.stdout if self.out == "silent": self.out = null_out() self.no_aniso_array = self.miller_array if self.params.aniso.action == "remove_aniso": # first perfom aniso scaling aniso_scale_and_b = absolute_scaling.ml_aniso_absolute_scaling( miller_array = self.miller_array, n_residues = n_residues, n_bases = n_bases) aniso_scale_and_b.p_scale = 0 # set the p_scale back to 0! aniso_scale_and_b.show(out=out) # now do aniso correction please self.aniso_p_scale = aniso_scale_and_b.p_scale self.aniso_u_star = aniso_scale_and_b.u_star self.aniso_b_cart = aniso_scale_and_b.b_cart if self.params.aniso.final_b == "eigen_min": b_use=aniso_scale_and_b.eigen_values[2] elif self.params.aniso.final_b == "eigen_mean" : b_use=flex.mean(aniso_scale_and_b.eigen_values) elif self.params.aniso.final_b == "user_b_iso": assert self.params.aniso.b_iso is not None b_use=self.params.aniso.b_iso else: b_use = 30 b_cart_aniso_removed = [ -b_use, -b_use, -b_use, 0, 0, 0] u_star_aniso_removed = adptbx.u_cart_as_u_star( miller_array.unit_cell(), adptbx.b_as_u( b_cart_aniso_removed ) ) ## I do things in two steps, but can easely be done in 1 step ## just for clarity, thats all. self.no_aniso_array = absolute_scaling.anisotropic_correction( self.miller_array,0.0,aniso_scale_and_b.u_star ) self.no_aniso_array = absolute_scaling.anisotropic_correction( self.no_aniso_array,0.0,u_star_aniso_removed) self.no_aniso_array = self.no_aniso_array.set_observation_type( miller_array ) # that is done now, now we can do outlier detection if desired outlier_manager = outlier_rejection.outlier_manager( self.no_aniso_array, None, out=self.out) self.new_miller_array = self.no_aniso_array if self.params.outlier.action == "basic": print >> self.out, "Non-outliers found by the basic wilson statistics" print >> self.out, "protocol will be written out." basic_array = outlier_manager.basic_wilson_outliers( p_basic_wilson = self.params.outlier.parameters.basic_wilson.level, return_data = True) self.new_miller_array = basic_array if self.params.outlier.action == "extreme": print >> self.out, "Non-outliers found by the extreme value wilson statistics" print >> self.out, "protocol will be written out." extreme_array = outlier_manager.extreme_wilson_outliers( p_extreme_wilson = self.params.outlier.parameters.extreme_wilson.level, return_data = True) self.new_miller_array = extreme_array if self.params.outlier.action == "beamstop": print >> self.out, "Outliers found for the beamstop shadow" print >> self.out, "problems detection protocol will be written out." beamstop_array = outlier_manager.beamstop_shadow_outliers( level = self.params.outlier.parameters.beamstop.level, d_min = self.params.outlier.parameters.beamstop.d_min, return_data=True) self.new_miller_array = beamstop_array if self.params.outlier.action == "None": self.new_miller_array = self.no_aniso_array # now we can twin or detwin the data if needed self.final_array = self.new_miller_array if self.params.symmetry.action == "twin": alpha = self.params.symmetry.twinning_parameters.fraction if (alpha is None): raise Sorry("Twin fraction not specified, not twinning data") elif not (0 <= alpha <= 0.5): raise Sorry("Twin fraction must be between 0 and 0.5.") print >> self.out print >> self.out, "Twinning given data" print >> self.out, "-------------------" print >> self.out print >> self.out, "Artifically twinning the data with fraction %3.2f" %\ alpha self.final_array = self.new_miller_array.twin_data( twin_law = self.params.symmetry.twinning_parameters.twin_law, alpha=alpha).as_intensity_array() elif (self.params.symmetry.action == "detwin"): twin_law = self.params.symmetry.twinning_parameters.twin_law alpha = self.params.symmetry.twinning_parameters.fraction if (alpha is None): raise Sorry("Twin fraction not specified, not detwinning data") elif not (0 <= alpha <= 0.5): raise Sorry("Twin fraction must be between 0 and 0.5.") print >> self.out, """ Attempting to detwin data ------------------------- Detwinning data with: - twin law: %s - twin fraciton: %.2f BE WARNED! DETWINNING OF DATA DOES NOT SOLVE YOUR TWINNING PROBLEM! PREFERABLY, REFINEMENT SHOULD BE CARRIED OUT AGAINST ORIGINAL DATA ONLY USING A TWIN SPECIFIC TARGET FUNCTION! """ % (twin_law, alpha) self.final_array = self.new_miller_array.detwin_data( twin_law=twin_law, alpha=alpha).as_intensity_array() assert self.final_array is not None
def run(hklin): xscaled = xds_ascii.XDS_ASCII(hklin) # Must be XSCALE output merged_iobs = xscaled.i_obs().merge_equivalents( use_internal_variance=False).array() binner = merged_iobs.setup_binner(n_bins=100) isets = set(xscaled.iset) for_plot = {} cut_ios = (2, 1, 0.5, 0) print "iset file", for cut in cut_ios: print "cut_ios_%.2f" % cut, print for iset in isets: sel = (xscaled.iset == iset) data_i = xscaled.i_obs().select(sel).merge_equivalents( use_internal_variance=False).array() cutoffs = eval_resolution(data_i, 100, cut_ios) print "%3d %s %s" % (iset, xscaled.input_files[iset][0], " ".join( map(lambda x: "%.2f" % x, cutoffs))) for i_bin in binner.range_used(): dmax, dmin = binner.bin_d_range(i_bin) Isel = data_i.resolution_filter(d_max=dmax, d_min=dmin) for_plot.setdefault( iset, []).append(flex.mean(Isel.data()) if Isel.size() > 0 else 0) import matplotlib matplotlib.use('Agg') # Allow to work without X import pylab import math from matplotlib.ticker import FuncFormatter s2_formatter = lambda x, pos: "inf" if x == 0 else "%.2f" % (1. / numpy. sqrt(x)) exp_formatter = lambda x, pos: "%.1e" % x plot_x = [binner.bin_d_range(i)[1]**(-2) for i in binner.range_used()] from matplotlib.backends.backend_pdf import PdfPages pp = PdfPages("test.pdf") keys = sorted(for_plot) for names in (keys[i:i + 100] for i in xrange(0, len(keys), 100)): ncols = 5 nrows = int(math.ceil(len(names) / float(ncols))) fig, axes = pylab.plt.subplots(ncols=ncols, nrows=nrows, figsize=(5 * ncols, 5 * nrows), sharex=False, sharey=False) axes = axes.flatten() for name, ax in zip(names, axes): ax.plot( plot_x, for_plot[name], linewidth=1, ) ax.axhline(y=0, color="red", linestyle="-") ax.set_xlabel('(d^-2)') ax.set_ylabel('<I>') ax.xaxis.set_major_formatter(FuncFormatter(s2_formatter)) ax.yaxis.set_major_formatter(FuncFormatter(exp_formatter)) ax.set_title("data%.4d" % name) ax.grid(True) pylab.plt.tight_layout() plot_title = "" pylab.title(plot_title) pp.savefig() #pylab.savefig("test_%.3d.png"%i) #pylab.show() pp.close()
def scale_frame_detail(self, result, file_name, db_mgr, out): # If the pickled integration file does not contain a wavelength, # fall back on the value given on the command line. XXX The # wavelength parameter should probably be removed from master_phil # once all pickled integration files contain it. if (result.has_key("wavelength")): wavelength = result["wavelength"] elif (self.params.wavelength is not None): wavelength = self.params.wavelength else: # XXX Give error, or raise exception? return None assert (wavelength > 0) observations = result["observations"][0] cos_two_polar_angle = result["cos_two_polar_angle"] assert observations.size() == cos_two_polar_angle.size() tt_vec = observations.two_theta(wavelength) #print "mean tt degrees",180.*flex.mean(tt_vec.data())/math.pi cos_tt_vec = flex.cos(tt_vec.data()) sin_tt_vec = flex.sin(tt_vec.data()) cos_sq_tt_vec = cos_tt_vec * cos_tt_vec sin_sq_tt_vec = sin_tt_vec * sin_tt_vec P_nought_vec = 0.5 * (1. + cos_sq_tt_vec) F_prime = -1.0 # Hard-coded value defines the incident polarization axis P_prime = 0.5 * F_prime * cos_two_polar_angle * sin_sq_tt_vec # XXX added as a diagnostic prange = P_nought_vec - P_prime other_F_prime = 1.0 otherP_prime = 0.5 * other_F_prime * cos_two_polar_angle * sin_sq_tt_vec otherprange = P_nought_vec - otherP_prime diff2 = flex.abs(prange - otherprange) print "mean diff is", flex.mean(diff2), "range", flex.min( diff2), flex.max(diff2) # XXX done observations = observations / (P_nought_vec - P_prime) # This corrects observations for polarization assuming 100% polarization on # one axis (thus the F_prime = -1.0 rather than the perpendicular axis, 1.0) # Polarization model as described by Kahn, Fourme, Gadet, Janin, Dumas & Andre # (1982) J. Appl. Cryst. 15, 330-337, equations 13 - 15. print "Step 3. Correct for polarization." indexed_cell = observations.unit_cell() observations_original_index = observations.deep_copy() if result.get( "model_partialities", None ) is not None and result["model_partialities"][0] is not None: # some recordkeeping useful for simulations partialities_original_index = observations.customized_copy( crystal_symmetry=self.miller_set.crystal_symmetry(), data=result["model_partialities"][0]["data"], sigmas=flex.double(result["model_partialities"][0] ["data"].size()), #dummy value for sigmas indices=result["model_partialities"][0]["indices"], ).resolution_filter(d_min=self.params.d_min) assert len(observations_original_index.indices()) == len( observations.indices()) # Now manipulate the data to conform to unit cell, asu, and space group # of reference. The resolution will be cut later. # Only works if there is NOT an indexing ambiguity! observations = observations.customized_copy( anomalous_flag=not self.params.merge_anomalous, crystal_symmetry=self.miller_set.crystal_symmetry()).map_to_asu() observations_original_index = observations_original_index.customized_copy( anomalous_flag=not self.params.merge_anomalous, crystal_symmetry=self.miller_set.crystal_symmetry()) print "Step 4. Filter on global resolution and map to asu" print >> out, "Data in reference setting:" #observations.show_summary(f=out, prefix=" ") show_observations(observations, out=out) #if self.params.significance_filter.apply is True: # raise Exception("significance filter not implemented in samosa") if self.params.significance_filter.apply is True: #------------------------------------ # Apply an I/sigma filter ... accept resolution bins only if they # have significant signal; tends to screen out higher resolution observations # if the integration model doesn't quite fit N_obs_pre_filter = observations.size() N_bins_small_set = N_obs_pre_filter // self.params.significance_filter.min_ct N_bins_large_set = N_obs_pre_filter // self.params.significance_filter.max_ct # Ensure there is at least one bin. N_bins = max([ min([self.params.significance_filter.n_bins, N_bins_small_set]), N_bins_large_set, 1 ]) print "Total obs %d Choose n bins = %d" % (N_obs_pre_filter, N_bins) bin_results = show_observations(observations, out=out, n_bins=N_bins) #show_observations(observations, out=sys.stdout, n_bins=N_bins) acceptable_resolution_bins = [ bin.mean_I_sigI > self.params.significance_filter.sigma for bin in bin_results ] acceptable_nested_bin_sequences = [ i for i in xrange(len(acceptable_resolution_bins)) if False not in acceptable_resolution_bins[:i + 1] ] if len(acceptable_nested_bin_sequences) == 0: return null_data(file_name=file_name, log_out=out.getvalue(), low_signal=True) else: N_acceptable_bins = max(acceptable_nested_bin_sequences) + 1 imposed_res_filter = float(bin_results[N_acceptable_bins - 1].d_range.split()[2]) imposed_res_sel = observations.resolution_filter_selection( d_min=imposed_res_filter) observations = observations.select(imposed_res_sel) observations_original_index = observations_original_index.select( imposed_res_sel) print "New resolution filter at %7.2f" % imposed_res_filter, file_name print "N acceptable bins", N_acceptable_bins print "Old n_obs: %d, new n_obs: %d" % (N_obs_pre_filter, observations.size()) print "Step 5. Frame by frame resolution filter" # Finished applying the binwise I/sigma filter--------------------------------------- if self.params.raw_data.sdfac_auto is True: raise Exception("sdfac auto not implemented in samosa.") print "Step 6. Match to reference intensities, filter by correlation, filter out negative intensities." assert len(observations_original_index.indices()) \ == len(observations.indices()) data = frame_data(self.n_refl, file_name) data.set_indexed_cell(indexed_cell) data.d_min = observations.d_min() # Ensure that match_multi_indices() will return identical results # when a frame's observations are matched against the # pre-generated Miller set, self.miller_set, and the reference # data set, self.i_model. The implication is that the same match # can be used to map Miller indices to array indices for intensity # accumulation, and for determination of the correlation # coefficient in the presence of a scaling reference. if self.i_model is not None: assert len(self.i_model.indices()) == len(self.miller_set.indices()) \ and (self.i_model.indices() == self.miller_set.indices()).count(False) == 0 matches = miller.match_multi_indices( miller_indices_unique=self.miller_set.indices(), miller_indices=observations.indices()) use_weights = False # New facility for getting variance-weighted correlation if self.params.scaling.algorithm in ['mark1', 'levmar']: # Because no correlation is computed, the correlation # coefficient is fixed at zero. Setting slope = 1 means # intensities are added without applying a scale factor. sum_x = 0 sum_y = 0 for pair in matches.pairs(): data.n_obs += 1 if not self.params.include_negatives and observations.data()[ pair[1]] <= 0: data.n_rejected += 1 else: sum_y += observations.data()[pair[1]] N = data.n_obs - data.n_rejected # Early return if there are no positive reflections on the frame. if data.n_obs <= data.n_rejected: return null_data(file_name=file_name, log_out=out.getvalue(), low_signal=True) # Update the count for each matched reflection. This counts # reflections with non-positive intensities, too. data.completeness += matches.number_of_matches(0).as_int() data.wavelength = wavelength if not self.params.scaling.enable: # Do not scale anything print "Scale factor to an isomorphous reference PDB will NOT be applied." slope = 1.0 offset = 0.0 observations_original_index_indices = observations_original_index.indices( ) if db_mgr is None: return unpack(MINI.x) # special exit for two-color indexing kwargs = { 'wavelength': wavelength, 'beam_x': result['xbeam'], 'beam_y': result['ybeam'], 'distance': result['distance'], 'unique_file_name': data.file_name } ORI = result["current_orientation"][0] Astar = matrix.sqr(ORI.reciprocal_matrix()) kwargs['res_ori_1'] = Astar[0] kwargs['res_ori_2'] = Astar[1] kwargs['res_ori_3'] = Astar[2] kwargs['res_ori_4'] = Astar[3] kwargs['res_ori_5'] = Astar[4] kwargs['res_ori_6'] = Astar[5] kwargs['res_ori_7'] = Astar[6] kwargs['res_ori_8'] = Astar[7] kwargs['res_ori_9'] = Astar[8] assert self.params.scaling.report_ML is True kwargs['half_mosaicity_deg'] = result["ML_half_mosaicity_deg"][0] kwargs['domain_size_ang'] = result["ML_domain_size_ang"][0] frame_id_0_base = db_mgr.insert_frame(**kwargs) xypred = result["mapped_predictions"][0] indices = flex.size_t([pair[1] for pair in matches.pairs()]) sel_observations = flex.intersection(size=observations.data().size(), iselections=[indices]) set_original_hkl = observations_original_index_indices.select( flex.intersection(size=observations_original_index_indices.size(), iselections=[indices])) set_xypred = xypred.select( flex.intersection(size=xypred.size(), iselections=[indices])) kwargs = { 'hkl_id_0_base': [pair[0] for pair in matches.pairs()], 'i': observations.data().select(sel_observations), 'sigi': observations.sigmas().select(sel_observations), 'detector_x': [xy[0] for xy in set_xypred], 'detector_y': [xy[1] for xy in set_xypred], 'frame_id_0_base': [frame_id_0_base] * len(matches.pairs()), 'overload_flag': [0] * len(matches.pairs()), 'original_h': [hkl[0] for hkl in set_original_hkl], 'original_k': [hkl[1] for hkl in set_original_hkl], 'original_l': [hkl[2] for hkl in set_original_hkl] } db_mgr.insert_observation(**kwargs) print >> out, "Lattice: %d reflections" % (data.n_obs - data.n_rejected) print >> out, "average obs", sum_y / (data.n_obs - data.n_rejected), \ "average calc", sum_x / (data.n_obs - data.n_rejected) print >> out, "Rejected %d reflections with negative intensities" % \ data.n_rejected data.accept = True for pair in matches.pairs(): if not self.params.include_negatives and ( observations.data()[pair[1]] <= 0): continue Intensity = observations.data()[pair[1]] # Super-rare exception. If saved sigmas instead of I/sigmas in the ISIGI dict, this wouldn't be needed. if Intensity == 0: continue # Add the reflection as a two-tuple of intensity and I/sig(I) # to the dictionary of observations. index = self.miller_set.indices()[pair[0]] isigi = (Intensity, observations.data()[pair[1]] / observations.sigmas()[pair[1]], 1.0) if index in data.ISIGI: data.ISIGI[index].append(isigi) else: data.ISIGI[index] = [isigi] sigma = observations.sigmas()[pair[1]] variance = sigma * sigma data.summed_N[pair[0]] += 1 data.summed_wt_I[pair[0]] += Intensity / variance data.summed_weight[pair[0]] += 1 / variance data.set_log_out(out.getvalue()) return data
def run(x1, x2, n_bins): hemispheres = [] merged_data = [] for x in (x1, x2): print "Processing %s" % x print "====================" data = read_x(x, take_full=False) # if you need only full, change this to True. data.crystal_symmetry().show_summary() merge = data.merge_equivalents(use_internal_variance=False) merge.show_summary() print array = merge.array() array = array.select(array.sigmas() > 0) merged_data.append(array) # separate + and - matches = array.match_bijvoet_mates()[1] # returns asu and matches sel_p = matches.pairs_hemisphere_selection("+") sel_p.extend(matches.singles("+")) sel_m = matches.pairs_hemisphere_selection("-") sel_m.extend(matches.singles("-")) hemispheres.append([array.select(sel_p, anomalous_flag=False), array.select(sel_m, anomalous_flag=False).map_to_asu()]) print "x1: merged=%d (+)=%d (-)=%d" % (merged_data[0].size(), hemispheres[0][0].size(), hemispheres[0][1].size()) print "x2: merged=%d (+)=%d (-)=%d" % (merged_data[1].size(), hemispheres[1][0].size(), hemispheres[1][1].size()) # for sigma calculation (new_sigma^2 = sigma1^2 + sigma2^2) additive_sigmas = lambda x, y: flex.sqrt(flex.pow2(x.sigmas()) + flex.pow2(y.sigmas())) # calculate data1(+) - data2(-) """ # for debug for i, x in enumerate(hemispheres[0][0]): print x if i > 10: break print for i, x in enumerate(hemispheres[1][0]): print x if i > 10: break """ h1p, h2m = hemispheres[0][0].common_sets(hemispheres[1][1]) h1p_h2m = h1p.customized_copy(data=h1p.data() - h2m.data(), sigmas=additive_sigmas(h1p, h2m)) print h1p_h2m.size() #for x in h1p_h2m: print x # calculate data2(+) - data1(-) h2p, h1m = hemispheres[1][0].common_sets(hemispheres[0][1]) h2p_h1m = h2p.customized_copy(data=h2p.data() - h1m.data(), sigmas=additive_sigmas(h2p, h1m)) print h2p_h1m.size() print #for x in h2p_h1m: print x # concatenate data1(+) - data2(-) and data2(+) - data1(-) dano_tmp = h1p_h2m.concatenate(h2p_h1m) merge = dano_tmp.merge_equivalents(use_internal_variance=False) print "Merging stats of (+)-(-) data" print "=============================" merge.show_summary() dano = merge.array() print "num_dano=", dano.size() print # process with binning dano.setup_binner(n_bins=n_bins) binner = dano.binner() print "Result:" print " dmax dmin nrefs dano" for i_bin in binner.range_used(): # selection for this bin. sel is flex.bool object (list of True of False) sel = binner.selection(i_bin) count = binner.count(i_bin) # take mean of absolute value of anomalous differences in a bin if count > 0: bin_mean = flex.mean(flex.abs(dano.select(sel).data())) else: bin_mean = float("nan") d_max, d_min = binner.bin_d_range(i_bin) print "%7.2f %7.2f %6d %.2f" % (d_max, d_min, count, bin_mean)
def exercise_2(): for use_reference in [True, False, None]: pdb_inp = iotbx.pdb.input( lines=flex.std_string(pdb_str_2.splitlines()), source_info=None) model = manager( model_input=pdb_inp, log=null_out()) grm = model.get_restraints_manager().geometry xrs2 = model.get_xray_structure() awl2 = model.get_hierarchy().atoms_with_labels() pdb_inp3 = iotbx.pdb.input(source_info=None, lines=pdb_str_3) xrs3 = pdb_inp3.xray_structure_simple() ph3 = pdb_inp3.construct_hierarchy() ph3.atoms().reset_i_seq() awl3 = ph3.atoms_with_labels() sites_cart_reference = flex.vec3_double() selection = flex.size_t() reference_names = ["CG", "CD", "NE", "CZ", "NH1", "NH2"] for a2,a3 in zip(tuple(awl2), tuple(awl3)): assert a2.resname == a3.resname assert a2.name == a3.name assert a2.i_seq == a3.i_seq if(a2.resname == "ARG" and a2.name.strip() in reference_names): selection.append(a2.i_seq) sites_cart_reference.append(a3.xyz) assert selection.size() == len(reference_names) selection_bool = flex.bool(xrs2.scatterers().size(), selection) if(use_reference): grm.adopt_reference_coordinate_restraints_in_place( reference.add_coordinate_restraints( sites_cart = sites_cart_reference, selection = selection, sigma = 0.01)) elif(use_reference is None): grm.adopt_reference_coordinate_restraints_in_place( reference.add_coordinate_restraints( sites_cart = sites_cart_reference, selection = selection, sigma = 0.01)) grm.remove_reference_coordinate_restraints_in_place( selection = selection) d1 = flex.mean(flex.sqrt((xrs2.sites_cart().select(selection) - xrs3.sites_cart().select(selection)).dot())) print("distance start (use_reference: %s): %6.4f"%(str(use_reference), d1)) assert d1>4.0 assert approx_equal( flex.max(flex.sqrt((xrs2.sites_cart().select(~selection_bool) - xrs3.sites_cart().select(~selection_bool)).dot())), 0) from cctbx import geometry_restraints import mmtbx.refinement.geometry_minimization import scitbx.lbfgs grf = geometry_restraints.flags.flags(default=True) sites_cart = xrs2.sites_cart() minimized = mmtbx.refinement.geometry_minimization.lbfgs( sites_cart = sites_cart, correct_special_position_tolerance=1.0, geometry_restraints_manager = grm, sites_cart_selection = flex.bool(sites_cart.size(), selection), geometry_restraints_flags = grf, lbfgs_termination_params = scitbx.lbfgs.termination_parameters( max_iterations=5000)) xrs2.set_sites_cart(sites_cart = sites_cart) d2 = flex.mean(flex.sqrt((xrs2.sites_cart().select(selection) - xrs3.sites_cart().select(selection)).dot())) print("distance final (use_reference: %s): %6.4f"%(str(use_reference), d2)) if(use_reference): assert d2<0.005, "failed: %f<0.05" % d2 else: assert d2>4.0, d2 assert approx_equal( flex.max(flex.sqrt((xrs2.sites_cart().select(~selection_bool) - xrs3.sites_cart().select(~selection_bool)).dot())), 0)
def coefficient_of_determination(y, y_model): mean_y = flex.mean(y) r_sqr = flex.sum((y_model - mean_y)**2) / flex.sum((y - mean_y)**2) return r_sqr
def __init__(self, fmodel, free_reflections_per_bin=140, max_number_of_bins=30, n_bins=None): from cctbx.array_family import flex mp = fmodel.mask_params self.target_name = fmodel.target_name if (self.target_name == "twin_lsq_f"): self.twin_fraction = fmodel.twin_fraction self.twin_law = fmodel.twin_law else: self.twin_fraction = None self.twin_law = None self.r_work = fmodel.r_work() self.r_free = fmodel.r_free() self.r_all = fmodel.r_all() self.target_work = fmodel.target_w() self.target_free = fmodel.target_t() self.target_work_no_norm = fmodel.target_w() if (self.target_work_no_norm is not None): self.target_work_no_norm *= fmodel.f_calc_w().data().size() self.target_free_no_norm = fmodel.target_t() if (self.target_free_no_norm is not None): self.target_free_no_norm *= fmodel.f_calc_t().data().size() self.overall_scale_k1 = fmodel.scale_k1() self.number_of_test_reflections = fmodel.f_calc_t().data().size() self.number_of_work_reflections = fmodel.f_calc_w().data().size() self.number_of_reflections = fmodel.f_obs().data().size() self.number_of_reflections_merged = self.number_of_reflections if (fmodel.f_obs().anomalous_flag() in (None, True)): something, matches = fmodel.f_obs().match_bijvoet_mates() self.number_of_reflections_merged = matches.pairs().size() + \ matches.n_singles() self.mask_solvent_radius = mp.solvent_radius self.mask_shrink_radius = mp.shrink_truncation_radius self.mask_grid_step_factor = mp.grid_step_factor self.ml_phase_error = flex.mean(fmodel.phase_errors()) self.ml_coordinate_error = fmodel.model_error_ml() self.d_max, self.d_min = fmodel.f_obs().resolution_range() self.completeness_in_range = fmodel.f_obs().completeness( d_max=self.d_max) self.completeness_d_min_inf = fmodel.f_obs().completeness() f_obs_6 = fmodel.f_obs().resolution_filter(d_min=6) self.completeness_6_inf = f_obs_6.completeness() self.min_f_obs_over_sigma = fmodel.f_obs().min_f_over_sigma( return_none_if_zero_sigmas=True) self.sf_algorithm = fmodel.sfg_params.algorithm self.alpha_w, self.beta_w = fmodel.alpha_beta_w() self.alpha_work_min, self.alpha_work_max, self.alpha_work_mean = \ self.alpha_w.data().min_max_mean().as_tuple() self.beta_work_min, self.beta_work_max, self.beta_work_mean = \ self.beta_w.data().min_max_mean().as_tuple() self.fom = fmodel.figures_of_merit_work() self.fom_work_min, self.fom_work_max, self.fom_work_mean = \ self.fom.min_max_mean().as_tuple() self.pher_w = fmodel.phase_errors_work() self.pher_work_min, self.pher_work_max, self.pher_work_mean = \ self.pher_w.min_max_mean().as_tuple() self.pher_t = fmodel.phase_errors_test() self.pher_free_min, self.pher_free_max, self.pher_free_mean = \ self.pher_t.min_max_mean().as_tuple() self.bins = self.statistics_in_resolution_bins( fmodel=fmodel, free_reflections_per_bin=free_reflections_per_bin, max_number_of_bins=max_number_of_bins, n_bins=n_bins)
def run(args, log=None, ccp4_map=None, return_as_miller_arrays=False, nohl=False, return_f_obs=False, space_group_number=None, out=sys.stdout): if log is None: log = out inputs = mmtbx.utils.process_command_line_args( args=args, master_params=master_params()) got_map = False if ccp4_map: got_map = True broadcast(m="Parameters:", log=log) inputs.params.show(prefix=" ", out=out) params = inputs.params.extract() if (ccp4_map is None and inputs.ccp4_map is not None): broadcast(m="Processing input CCP4 map file: %s" % inputs.ccp4_map_file_name, log=log) ccp4_map = inputs.ccp4_map ccp4_map.show_summary(prefix=" ", out=out) got_map = True if (not got_map): raise Sorry("Map file is needed.") # m = ccp4_map if (m.space_group_number > 1): raise Sorry("Input map space group: %d. Must be P1." % m.space_group_number) broadcast(m="Input map information:", log=log) print >> out, "m.all() :", m.data.all() print >> out, "m.focus() :", m.data.focus() print >> out, "m.origin():", m.data.origin() print >> out, "m.nd() :", m.data.nd() print >> out, "m.size() :", m.data.size() print >> out, "m.focus_size_1d():", m.data.focus_size_1d() print >> out, "m.is_0_based() :", m.data.is_0_based() print >> out, "map: min/max/mean:", flex.min(m.data), flex.max( m.data), flex.mean(m.data) print >> out, "unit cell:", m.unit_cell_parameters # if not space_group_number: space_group_number = 1 if space_group_number <= 1: symmetry_flags = None else: symmetry_flags = maptbx.use_space_group_symmetry, cs = crystal.symmetry(m.unit_cell_parameters, space_group_number) map_data = m.data # Get origin in grid units and new position of origin in grid units original_origin = map_data.origin() print >> out, "Input map has origin at grid point (%s,%s,%s)" % ( tuple(original_origin)) if params.output_origin_grid_units is not None: params.keep_origin = False new_origin = tuple(params.output_origin_grid_units) print >> out, "User-specified origin at grid point (%s,%s,%s)" % ( tuple(params.output_origin_grid_units)) if tuple(params.output_origin_grid_units) == tuple(original_origin): print >> out, "This is the same as the input origin. No origin shift." elif params.keep_origin: new_origin = original_origin print >> out, "Keeping origin at grid point (%s,%s,%s)" % ( tuple(original_origin)) else: new_origin = ( 0, 0, 0, ) print >> out, "New origin at grid point (%s,%s,%s)" % (tuple(( 0, 0, 0, ))) # shift_cart is shift away from (0,0,0) if new_origin != ( 0, 0, 0, ): shift_cart = get_shift_cart(map_data=map_data, crystal_symmetry=cs, origin=new_origin) else: shift_cart = ( 0, 0, 0, ) map_data = maptbx.shift_origin_if_needed(map_data=map_data).map_data # generate complete set of Miller indices up to given high resolution d_min n_real = map_data.focus() crystal_gridding = maptbx.crystal_gridding( unit_cell=cs.unit_cell(), space_group_info=cs.space_group_info(), symmetry_flags=symmetry_flags, pre_determined_n_real=n_real) # d_min = params.d_min if (d_min is None and not params.box): d_min = maptbx.d_min_from_map(map_data=map_data, unit_cell=cs.unit_cell()) if (d_min is None): # box of reflections in |h|<N1/2, |k|<N2/2, 0<=|l|<N3/2 f_obs_cmpl = miller.structure_factor_box_from_map( map=map_data.as_double(), crystal_symmetry=cs, include_000=True) else: complete_set = miller.build_set(crystal_symmetry=cs, anomalous_flag=False, d_min=d_min) try: f_obs_cmpl = complete_set.structure_factors_from_map( map=map_data.as_double(), use_scale=True, anomalous_flag=False, use_sg=False) except Exception, e: if (str(e) == "cctbx Error: Miller index not in structure factor map."): msg = "Too high resolution requested. Try running with larger d_min." raise Sorry(msg) else: raise Sorry(str(e))
def run(args): centroids_filename = args[0] hkl = flex.miller_index() frame_obs = flex.double() x_obs = flex.double() y_obs = flex.double() phi_obs = flex.double() x_calc = flex.double() y_calc = flex.double() phi_calc = flex.double() with open(centroids_filename, "rb") as f: for i, line in enumerate(f.readlines()): tokens = line.split() if i == 0: print(tokens) assert tokens == [ "H", "K", "L", "Frame_obs", "X_obs", "Y_obs", "Phi_obs", "X_calc", "Y_calc", "Phi_calc", ] else: hkl.append([int(t) for t in tokens[:3]]) frame_obs.append(float(tokens[3])) x_obs.append(float(tokens[4])) y_obs.append(float(tokens[5])) phi_obs.append(float(tokens[6])) x_calc.append(float(tokens[7])) y_calc.append(float(tokens[8])) phi_calc.append(float(tokens[9])) phi_obs_deg = (180 / math.pi) * phi_obs x_residuals = x_calc - x_obs y_residuals = y_calc - y_obs phi_residuals = phi_calc - phi_obs mean_residuals_x = [] mean_residuals_y = [] mean_residuals_phi = [] phi = [] for i_phi in range(int(math.floor(flex.min(phi_obs_deg))), int(math.ceil(flex.max(phi_obs_deg)))): sel = (phi_obs_deg >= i_phi) & (phi_obs_deg < (i_phi + 1)) if sel.count(True) == 0: continue mean_residuals_x.append(flex.mean(x_residuals.select(sel))) mean_residuals_y.append(flex.mean(y_residuals.select(sel))) mean_residuals_phi.append(flex.mean(phi_residuals.select(sel))) phi.append(i_phi) from matplotlib import pyplot fig = pyplot.figure() ax = fig.add_subplot(311) pyplot.axhline(0, color="grey") ax.scatter(phi, mean_residuals_x) ax.set_xlabel("phi (deg)") ax.set_ylabel("mean residual_x") ax = fig.add_subplot(312) pyplot.axhline(0, color="grey") ax.scatter(phi, mean_residuals_y) ax.set_xlabel("phi (deg)") ax.set_ylabel("mean residual_y") ax = fig.add_subplot(313) pyplot.axhline(0, color="grey") ax.scatter(phi, mean_residuals_phi) ax.set_xlabel("phi (deg)") ax.set_ylabel("mean residual_phi") pyplot.show()
def get_average_cell_dimensions(self): a = flex.mean(self.all_uc_a_values) b = flex.mean(self.all_uc_b_values) c = flex.mean(self.all_uc_c_values) return a, b, c