def wR2(self, cutoff_factor=None): if cutoff_factor is None: return math.sqrt(2*self.objective_data_only) fo_sq = self.observations.fo_sq strong = fo_sq.data() >= cutoff_factor*fo_sq.sigmas() fo_sq = fo_sq.select(strong) fc_sq = self.fc_sq.select(strong) wght = self.weights.select(strong) fc_sq = fc_sq.data() fo_sq = fo_sq.data() fc_sq *= self.scale_factor() wR2 = flex.sum(wght*flex.pow2((fo_sq-fc_sq)))/flex.sum(wght*flex.pow2(fo_sq)) return math.sqrt(wR2)
def run(args): from dials.util.options import OptionParser from dials.util.options import flatten_datablocks import libtbx.load_env usage = "%s [options] image_*.cbf" % ( libtbx.env.dispatcher_name) parser = OptionParser( usage=usage, phil=phil_scope, read_datablocks=True, read_datablocks_from_images=True, epilog=help_message) params, options = parser.parse_args(show_diff_phil=True) datablocks = flatten_datablocks(params.input.datablock) if len(datablocks) == 0 and len(experiments) == 0 and len(reflections) == 0: parser.print_help() exit() assert(len(datablocks) == 1) datablock = datablocks[0] imagesets = datablock.extract_imagesets() assert(len(imagesets) == 1) imageset = imagesets[0] images = imageset.indices() if params.frames: images = params.frames d_spacings = [] intensities = [] sigmas = [] for indx in images: print 'For frame %d:' % indx d, I, sig = background(imageset, indx, n_bins=params.n_bins) print '%8s %8s %8s' % ('d', 'I', 'sig') for j in range(len(I)): print '%8.3f %8.3f %8.3f' % (d[j], I[j], sig[j]) d_spacings.append(d) intensities.append(I) sigmas.append(sig) if params.plot: from matplotlib import pyplot fig = pyplot.figure() for d, I, sig in zip(d_spacings, intensities, sigmas): ds2 = 1/flex.pow2(d) pyplot.plot(ds2, I) pyplot.show()
def b_factor_sharpening_by_map_kurtosis_maximization(map_coeffs, show=True, b_sharp_best=None, b_only=False): ss = 1./flex.pow2(map_coeffs.d_spacings().data()) / 4. if(b_sharp_best is None): b_sharp_best = None kurt = -999 for b_sharp in range(-100,100,5): k_sharp = 1./flex.exp(-ss * b_sharp) map_coeffs_ = map_coeffs.deep_copy().customized_copy( data = map_coeffs.data()*k_sharp) fft_map = map_coeffs_.fft_map(resolution_factor = 0.25) fft_map.apply_sigma_scaling() map_data = fft_map.real_map_unpadded() o = maptbx.more_statistics(map_data) kurt_ = o.kurtosis() if(kurt_ > kurt): kurt = kurt_ b_sharp_best = b_sharp if(show): print "b_sharp: %6.1f skewness: %6.4f kurtosis: %6.4f"%(b_sharp, o.skewness(), o.kurtosis()) if(show): print "Best sharpening B-factor:", b_sharp_best k_sharp = 1./flex.exp(-ss * b_sharp_best) if(b_only): return b_sharp_best else: return map_coeffs.customized_copy(data = map_coeffs.data()*k_sharp)
def __call__(self, x_obs): shape, location, scale = self.params normal_part = (2 / (scale * math.sqrt(2 * math.pi)) * flex.exp(- flex.pow2(x_obs - location)/(2 * scale**2))) cdf_part = 0.5 * ( 1 + scitbx.math.erf(shape * (x_obs - location)/ (math.sqrt(2) * scale))) y_calc = normal_part * cdf_part return y_calc
def estimate_signal_to_noise(x, y): raise if 1: x, y = interpolate(x, y) #x, y_tr = fourier_filter(x, y) x, y_tr = savitzky_golay_filter(x, y) noise = y - y_tr else: from scitbx.math import chebyshev_polynome from scitbx.math import chebyshev_lsq_fit x_obs, y_obs = x, y w_obs = flex.double(y_obs.size(), 1) w_obs[0] = 1e16 w_obs[-1] = 1e16 ## determining the number of terms takes much, much longer than the fit n_terms = chebyshev_lsq_fit.cross_validate_to_determine_number_of_terms( x_obs, y_obs, w_obs, min_terms=2, max_terms=30, n_goes=20, n_free=20) #n_terms = 7 print "n_terms:", n_terms fit = chebyshev_lsq_fit.chebyshev_lsq_fit(n_terms, x_obs, y_obs, w_obs) fit_funct = chebyshev_polynome( n_terms, fit.low_limit, fit.high_limit, fit.coefs) y_fitted = fit_funct.f(x) y_tr = y_fitted n = y_tr.size() noise = y - y_tr noise_sq = flex.pow2(noise) from xfel.command_line.view_pixel_histograms import sliding_average #sigma_sq = sliding_average(noise_sq, n=31) sigma_sq = sliding_average(noise_sq, n=15) #sigma_sq = sliding_average(sigma_sq) #signal_to_noise = y/flex.sqrt(sigma_sq) import math signal_to_noise = y/math.sqrt(flex.mean(noise_sq[50:200])) #pyplot.plot(noise) #pyplot.plot(x,y) #pyplot.show() offset = 0.2 * flex.max(y) offset = 0 pyplot.plot(x, y, linewidth=2) pyplot.plot(x, offset+y_tr, linewidth=2) pyplot.show() pyplot.plot(x, noise, linewidth=2) #pyplot.plot(x, flex.sqrt(sigma_sq), linewidth=2) #ax2 = pyplot.twinx() #ax2.plot(x, y) pyplot.show() pyplot.plot(x[:375], signal_to_noise[:375]) #pyplot.xlim( #ax2 = pyplot.twinx() #ax2.plot(x, y) pyplot.show()
def pyplot(self): from matplotlib import pyplot pyplot.plot(self.x_obs, self.y_obs) pyplot.plot(self.x_obs, self.compute_y_calc()) for i in range(self.n_gaussians): scale, mu, S = tuple(self.x[i*3:i*3+3]) y_calc = scale * flex.exp(-flex.pow2(self.x_obs-mu) * S**2) pyplot.plot(self.x_obs, y_calc) pyplot.show()
def compute_functional_and_gradients(self): y_calc = self.compute_y_calc() delta_y = self.y_obs - y_calc f = flex.sum(flex.pow2(delta_y)) g = flex.double() for funct in self.functions: partial_ders = funct.partial_derivatives(self.x_obs) for i, partial in enumerate(partial_ders): g.append(-2 * flex.sum(delta_y * partial)) return f, g
def do_gaussian_fit(scale, mu, sigma): start = mu - 6 * sigma stop = mu + 6 * sigma step = (stop - start)/1000 x = flex.double(frange(start, stop, step)) y = scale * flex.exp(-flex.pow2(x - mu) / (2 * sigma**2)) fit = curve_fitting.single_gaussian_fit(x, y) assert approx_equal(fit.a, scale, 1e-4) assert approx_equal(fit.b, mu, eps=1e-4) assert approx_equal(fit.c, sigma, eps=1e-4)
def compute_functional_and_gradients(self): slope = self.x[0] y_intercept = self.x[1] y_calc = slope * self.x_obs + y_intercept y_diff = self.y_obs - y_calc f = flex.sum(flex.pow2(y_diff)) g = flex.double([ flex.sum(-2 * y_diff * self.x_obs), flex.sum(-2 * y_diff)]) return f, g
def explore(self): if(self.count == 0): self.T=(flex.mean(flex.pow2(self.simplexValue - flex.mean(self.simplexValue) )))**0.5 * 10.0 self.min_T = self.T /self.T_ratio elif(self.count%self.Nstep == 0): self.T = self.T*self.coolfactor for kk in range(1,self.dimension+1): self.FindCentroidPt(self.dimension+1-kk) self.FindReflectionPt(kk) self.sort() return # end of this explore step
def estimate_signal_to_noise(x, y_noisy, y_smoothed, plot=False): """Estimate noise in spectra by subtracting a smoothed spectrum from the original noisy unsmoothed spectrum. See: The extraction of signal to noise values in x-ray absorption spectroscopy A. J. Dent, P. C. Stephenson, and G. N. Greaves Rev. Sci. Instrum. 63, 856 (1992); http://dx.doi.org/10.1063/1.1142627 """ noise = y_noisy - y_smoothed noise_sq = flex.pow2(noise) from xfel.command_line.view_pixel_histograms import sliding_average sigma_sq = sliding_average(noise_sq, n=31) sigma_sq = smoothing.savitzky_golay_filter( x.as_double(), flex.pow2(noise), half_window=20, degree=1)[1] sigma_sq.set_selected(sigma_sq <= 0, flex.mean(sigma_sq)) # or do this instead to use the background region as the source of noise: #signal_to_noise = y_smoothed/math.sqrt(flex.mean(noise_sq[50:190])) signal_to_noise = y_smoothed/flex.sqrt(sigma_sq) #signal_to_noise.set_selected(x < 50, 0) #signal_to_noise.set_selected(x > 375, 0) if plot: from matplotlib import pyplot linewidth=2 pyplot.plot(x, y_noisy, linewidth=linewidth) pyplot.plot(x, y_smoothed, linewidth=linewidth) pyplot_label_axes() pyplot.show() pyplot.plot(x, noise, linewidth=linewidth, label="noise") pyplot.plot(x, flex.sqrt(sigma_sq), linewidth=linewidth, label="sigma") pyplot_label_axes() pyplot.legend(loc=2, prop={'size':20}) pyplot.show() pyplot.plot(x, signal_to_noise, linewidth=linewidth) pyplot_label_axes() pyplot.show() return signal_to_noise
def scale_factor(self, other, weights=None, cutoff_factor=None, use_binning=False): """ The analytical expression for the least squares scale factor. K = sum(w * yo * yc) / sum(w * yc^2) If the optional cutoff_factor argument is provided, only the reflections whose magnitudes are greater than cutoff_factor * max(yo) will be included in the calculation. """ assert not use_binning or self.binner() is not None if use_binning: assert cutoff_factor is None assert other.size() == self.data().size() if not use_binning: if self.data().size() == 0: return None obs = self.data() calc = other.data() if cutoff_factor is not None: assert cutoff_factor < 1 sel = obs >= flex.max(self.data()) * cutoff_factor obs = obs.select(sel) calc = calc.select(sel) if weights is not None: weights = weights.select(sel) if weights is None: return flex.sum(obs * calc) / flex.sum(flex.pow2(calc)) else: return flex.sum(weights * obs * calc) / flex.sum(weights * flex.pow2(calc)) results = [] for i_bin in self.binner().range_all(): sel = self.binner().selection(i_bin) weights_sel = None if weights is not None: weights_sel = weights.select(sel) results.append(scale_factor(self.select(sel), other.select(sel), weights_sel)) return binned_data(binner=self.binner(), data=results, data_fmt="%7.4f")
def partial_derivatives(self, x_obs): shape, location, scale = self.params exponential_part = (1/(math.sqrt(2 * math.pi)) * flex.exp(- flex.pow2(x_obs - location)/(2 * scale**2))) normal_part = 2 / scale * exponential_part cdf_part = 0.5 * ( 1 + scitbx.math.erf(shape * (x_obs - location)/ (math.sqrt(2) * scale))) d_normal_part_d_location = 2 / scale**3 * (x_obs - location) * exponential_part d_normal_part_d_scale = \ 2 / scale**4 * (flex.pow2(x_obs - location) - scale**2) * exponential_part exponential_part_with_shape = ( 1 / (math.sqrt(math.pi)) * flex.exp(-shape**2 * flex.pow2(x_obs - location)/(2 * scale**2))) d_cdf_d_shape = \ (x_obs - location) / (math.sqrt(2) * scale) * exponential_part_with_shape d_cdf_d_location = \ -shape / (math.sqrt(2) * scale) * exponential_part_with_shape d_cdf_d_scale = (-shape * (x_obs - location) * exponential_part_with_shape / (math.sqrt(2) * scale**2)) # product rule return (d_cdf_d_shape * normal_part, d_normal_part_d_location * cdf_part + d_cdf_d_location * normal_part, d_normal_part_d_scale * cdf_part + d_cdf_d_scale * normal_part)
def _find_peaks(self, grid_real, d_min): grid_real_binary = grid_real.deep_copy() rmsd = math.sqrt( flex.mean( flex.pow2(grid_real_binary.as_1d() - flex.mean(grid_real_binary.as_1d())))) grid_real_binary.set_selected( grid_real_binary < (self._params.rmsd_cutoff) * rmsd, 0) grid_real_binary.as_1d().set_selected(grid_real_binary.as_1d() > 0, 1) grid_real_binary = grid_real_binary.iround() from cctbx import masks # real space FFT grid dimensions cell_lengths = [self._n_points * d_min / 2 for i in range(3)] self._fft_cell = uctbx.unit_cell(cell_lengths + [90] * 3) flood_fill = masks.flood_fill(grid_real_binary, self._fft_cell) if flood_fill.n_voids() < 4: # Require at least peak at origin and one peak for each basis vector raise indexing.DialsIndexError( "Indexing failed: fft3d peak search failed to find sufficient number of peaks." ) # the peak at the origin might have a significantly larger volume than the # rest so exclude any anomalously large peaks from determining minimum volume from scitbx.math import five_number_summary outliers = flex.bool(flood_fill.n_voids(), False) grid_points_per_void = flood_fill.grid_points_per_void() min_x, q1_x, med_x, q3_x, max_x = five_number_summary( grid_points_per_void) iqr_multiplier = 5 iqr_x = q3_x - q1_x cut_x = iqr_multiplier * iqr_x outliers.set_selected( grid_points_per_void.as_double() > (q3_x + cut_x), True) # print q3_x + cut_x, outliers.count(True) isel = (grid_points_per_void > int( self._params.peak_volume_cutoff * flex.max(grid_points_per_void.select(~outliers)))).iselection() sites = flood_fill.centres_of_mass_frac().select(isel) volumes = flood_fill.grid_points_per_void().select(isel) return sites, volumes
def main(filenames, map_file, npoints=192, max_resolution=6, reverse_phi=False): rec_range = 1 / max_resolution image = ImageFactory(filenames[0]) panel = image.get_detector()[0] beam = image.get_beam() s0 = beam.get_s0() pixel_size = panel.get_pixel_size() xlim, ylim = image.get_raw_data().all() xy = recviewer.get_target_pixels(panel, s0, xlim, ylim, max_resolution) s1 = panel.get_lab_coord(xy * pixel_size[0]) # FIXME: assumed square pixel s1 = s1 / s1.norms() * (1 / beam.get_wavelength()) # / is not supported... S = s1 - s0 grid = flex.double(flex.grid(npoints, npoints, npoints), 0) cnts = flex.int(flex.grid(npoints, npoints, npoints), 0) for filename in filenames: print "Processing image", filename try: fill_voxels(ImageFactory(filename), grid, cnts, S, xy, reverse_phi, rec_range) except: print " Failed to process. Skipped this." recviewer.normalize_voxels(grid, cnts) uc = uctbx.unit_cell((npoints, npoints, npoints, 90, 90, 90)) ccp4_map.write_ccp4_map(map_file, uc, sgtbx.space_group("P1"), (0, 0, 0), grid.all(), grid, flex.std_string(["cctbx.miller.fft_map"])) return from scitbx import fftpack fft = fftpack.complex_to_complex_3d(grid.all()) grid_complex = flex.complex_double( reals=flex.pow2(grid), imags=flex.double(grid.size(), 0)) grid_transformed = flex.abs(fft.backward(grid_complex)) print flex.max(grid_transformed), flex.min(grid_transformed), grid_transformed.all() ccp4_map.write_ccp4_map(map_file, uc, sgtbx.space_group("P1"), (0, 0, 0), grid.all(), grid_transformed, flex.std_string(["cctbx.miller.fft_map"]))
def target(self, vector): self.counter += 1 result = 0 length = flex.sum_sq(vector) if (length > self.Rmax2): result = 1e30 else: new_coord = self.pdb.NMPerturb(self.modes, vector) self.she_engine.engine.update_coord(new_coord, self.new_indx) new_I = self.she_engine.engine.I() var = self.expt_s s, o = she.linear_fit(new_I[:5], self.expt_I[:5], var[:5]) result = flex.sum( flex.pow2((self.expt_I - (s * new_I + o)) / self.expt_s)) # result = flex.sum_sq( (self.expt_I-new_I) /self.expt_s ) #print self.pdb.r, result return result
def _estimate_cc_true(self): # A1.2. Estimation of E(CC; S). # (i) var_intensities = flex.mean_and_variance( self.intensities.data()).unweighted_sample_variance() var_sigmas = flex.mean_and_variance( flex.pow2(self.intensities.sigmas())).mean() self.E_cc_true = var_intensities / (var_intensities + var_sigmas) # (ii) reindexed_intensities = self.intensities.change_basis( sgtbx.change_of_basis_op("-x,-y,-z")).map_to_asu() x, y = self.intensities.common_sets(reindexed_intensities, assert_is_similar_symmetry=False) self.cc_identity = CorrelationCoefficientAccumulator( x.data(), y.data()) min_sd = 0.05 min_sample = 10 sigma_1 = max(min_sd, self.cc_sig_fac / 200**0.5) w1 = 0 w2 = 0 if sigma_1 > 0.0001: w1 = 1 / sigma_1**2 if self.cc_identity.n() > min_sample: sigma_2 = max(min_sd, self.cc_sig_fac / self.cc_identity.n()**0.5) w2 = 1 / sigma_2**2 assert (w1 + w2) > 0 self.cc_true = (w1 * self.E_cc_true + w2 * self.cc_identity.coefficient()) / (w1 + w2) logger.debug("cc_true = w1 * E_cc_true + w2 * cc_identity)/(w1 + w2)") logger.debug("w1: %g", w1) logger.debug("w2: %g", w2) logger.debug("E_cc_true: %g", self.E_cc_true) logger.debug("cc_identity: %g", self.cc_identity.coefficient()) logger.debug("cc_true: %g", self.cc_true)
def estimate_init_weight(self): ## initial chi-score ## new_I = self.she_engine.engine.I() var = self.expt_s s, o = she.linear_fit(new_I, self.expt_I, var) chi_score = flex.sum( flex.pow2((self.expt_I - (s * new_I + o)) / self.expt_s)) n_restraint = self.nbeads**2 / 20 tot_res = 0 for ii in range( 10): ## perturb 10 times to estimate initial restraint ## vector = flex.random_double(self.nbeads * 3) * self.step_size self.restraints = self.build_restraint(n_restraint) self.new_xyz = self.pdb_obj.perturb(vector) restraint = self.pdb_obj.beads.restraint(self.restraints, self.new_xyz) tot_res += restraint mean_res = tot_res / 10.0 self.restraint_weight = chi_score / mean_res
def map_and_model_to_fmodel(map_data, xray_structure, atom_radius, d_min, reset_adp=True): box = mmtbx.utils.extract_box_around_model_and_map( xray_structure=xray_structure, map_data=map_data, box_cushion=atom_radius) box.apply_mask_inplace(atom_radius=atom_radius) f_obs_complex = box.box_map_coefficients(d_min=d_min) f_obs = abs(f_obs_complex) if (flex.mean(f_obs.data()) < 1.e-6): return None xrs = box.xray_structure_box.deep_copy_scatterers() if (reset_adp): vals_init = xrs.extract_u_iso_or_u_equiv() xrs = xrs.set_b_iso(value=0) assert approx_equal(flex.mean(xrs.extract_u_iso_or_u_equiv()), 0.) f_calc = f_obs.structure_factors_from_scatterers( xray_structure=xrs).f_calc() sc = flex.sum(abs(f_obs).data()*abs(f_calc).data())/ \ flex.sum(abs(f_calc).data()*abs(f_calc).data()) f_calc = f_calc.array(data=f_calc.data() * sc) o = bulk_solvent.complex_f_kb_scaled( f1=f_obs_complex.data(), f2=f_calc.data(), b_range=flex.double(range(5, 505, 5)), ss=1. / flex.pow2(f_calc.d_spacings().data()) / 4.) xrs = xrs.set_b_iso(value=o.b()) k_isotropic = flex.double(f_calc.data().size(), o.k()) if (o.k() < 1.e-6): k_isotropic = flex.double(f_calc.data().size(), 1) xrs.set_u_iso(values=vals_init) fmodel = mmtbx.f_model.manager(f_obs=f_obs, xray_structure=xrs) if (reset_adp): fmodel.update_core(k_isotropic=k_isotropic) fmodel.update(target_name="ls_wunit_k1") fmodel.update_all_scales(update_f_part1=False, apply_back_trace=True, remove_outliers=False) return fmodel
def go(params, out=None): if out is None: out = sys.stdout data_array = [] multies = [] dmax = params.pregxs.d_max nparam = params.pregxs.fitting.n_coeff nfst = params.pregxs.fitting.n_fst_pass ntrials = params.pregxs.fitting.n_trials strials = params.pregxs.fitting.n_trials_simplex for item in params.pregxs.data: data = saxs_read_write.read_standard_ascii_qis(item) # m = 1.0/data.i[0] # data.multiply_add(m,0.0) data_array.append(data) # multies.append( m ) if params.pregxs.scan: d_max_start = dmax - params.pregxs.fitting.delta d_max_stop = dmax + params.pregxs.fitting.delta n_step = params.pregxs.fitting.n_step scanner = d_max_scan(data, nparam, nfst, ntrials, d_max_start, d_max_stop, n_step, strials) else: fitters = random_start_fixed_dmax(data, dmax, nparam, nfst, ntrials, n_simplex=strials) coefs = fitters.trials[fitters.chi_index].solution for cc in coefs: print cc, print item, "COEF" pr_fit = fitters.trials[fitters.chi_index].get_best_pofr().f( data.q) print flex.mean(flex.pow2( (data.i - pr_fit) / (data.i + pr_fit))) * 4.0, "CHI2"
def exercise_tanh_fit(): # Curve fitting as used by Aimless for fitting CC1/2 plot: # Curve fitting as suggested by Ed Pozharski to a tanh function # of the form (1/2)(1 - tanh(z)) where z = (s - d0)/r, # s = 1/d^2, d0 is the value of s at the half-falloff value, and r controls # the steepness of falloff d = flex.double( [2.71, 2.15, 1.88, 1.71, 1.59, 1.49, 1.42, 1.36, 1.31, 1.26]) x_obs = 1 / d**2 y_obs = flex.double( [0.999, 0.996, 0.993, 0.984, 0.972, 0.948, 0.910, 0.833, 0.732, 0.685]) fit = curve_fitting.tanh_fit(x_obs, y_obs, r=1, s0=1) r, s0 = fit.params f = curve_fitting.tanh(r, s0) y_calc = flex.double(f(x_obs)) residual = flex.sum(flex.pow2(y_obs - y_calc)) assert approx_equal(residual, 0.0023272873437026106) assert approx_equal(fit.params, (0.17930695756689238, 0.6901032957705017))
def get_ks_dist(self): self.dist_mat = [] n_trials = len(self.fitters) self.saved_trials = n_trials for ii in range(n_trials): self.dist_mat.append(flex.double([0] * n_trials)) for ii in range(n_trials): for jj in range(ii): d_cdf = self.cdfs[ii] - self.cdfs[jj] max_d_cdf = flex.max(flex.abs(d_cdf)) self.dist_mat[ii][jj] = max_d_cdf self.dist_mat[jj][ii] = max_d_cdf average_mcdf = flex.double() for ii in range(n_trials): average_mcdf.append(flex.mean(self.dist_mat[ii])) self.best_index = flex.min_index(average_mcdf) self.dmax_best = self.d_array[self.best_index] self.mcdf_mean = average_mcdf[self.best_index] self.mcdf_var = flex.mean( flex.pow2(self.dist_mat[self.best_index] - self.mcdf_mean)) self.mcdf_sigma = math.sqrt(self.mcdf_var)
def run(self, args=None): """Execute the script.""" # Parse the command line self.params, options = self.parser.parse_args(args, show_diff_phil=True) # Configure the logging dials.util.log.config(verbosity=options.verbose, logfile=self.params.output.log) if self.params.hklin is None: self.parser.print_help() sys.exit() iobs = self._extract_data_from_mtz() i_p1 = merge_in_P1(iobs) f_p1 = truncate(i_p1)[1] fsq_p1 = f_p1.customized_copy(data=flex.pow2(f_p1.data())) logger.info("R_friedel(F) = {0:.5f}".format(r_friedel(f_p1))) logger.info("R_friedel(F^2) = {0:.5f}".format(r_friedel(fsq_p1))) logger.info("R_friedel(I) = {0:.5f}".format(r_friedel(i_p1))) return
def squares_of_complex(m1): a1 = flex.pow2(m1.parts()[0]) a2 = flex.pow2(m1.parts()[1]) a3 = a1 + a2 return a3
def partial_derivatives(self, x_obs): a, b, c = self.params exponential_part = flex.exp(-flex.pow2(x_obs - b) / (2 * c**2)) return(exponential_part, a * (x_obs - b) / c**2 * exponential_part, a * flex.pow2(x_obs - b) / c**3 * exponential_part)
def run_0(symbol = "C 2"): space_group_info = sgtbx.space_group_info(symbol = symbol) xrs = random_structure.xray_structure( space_group_info = space_group_info, elements = ["N"]*50, volume_per_atom = 100.0, random_u_iso = True) # b_cart = adptbx.random_traceless_symmetry_constrained_b_cart( crystal_symmetry=xrs.crystal_symmetry()) u_star = adptbx.u_cart_as_u_star(xrs.unit_cell(), adptbx.b_as_u(b_cart)) # F = xrs.structure_factors(d_min = 1.5).f_calc() k_anisotropic = mmtbx.f_model.ext.k_anisotropic(F.indices(), u_star) # bin_selections = [] F.setup_binner(reflections_per_bin=50) for i_bin in F.binner().range_used(): sel = F.binner().selection(i_bin) bin_selections.append(sel) # d_spacings = F.d_spacings().data() ss = 1./flex.pow2(d_spacings) / 4. k_mask_tmp = mmtbx.f_model.ext.k_mask(ss, 0.35, 80.) k_mask = flex.double(F.data().size(), 0) k_isotropic = flex.double(F.data().size(), 0) for s in bin_selections: d = d_spacings.select(s) k_mask.set_selected(s, flex.mean(k_mask_tmp.select(s))) k_isotropic.set_selected(s, random.randint(1,10)) # fmodel = mmtbx.f_model.manager( xray_structure = xrs, f_obs = abs(F), k_isotropic = k_isotropic, k_anisotropic = k_anisotropic, k_mask = k_mask) f_calc = fmodel.f_calc() f_masks = fmodel.f_masks() f_model = fmodel.f_model() f_obs = abs(f_model) r_free_flags = f_obs.generate_r_free_flags(use_lattice_symmetry=False) # assert approx_equal(bulk_solvent.r_factor(f_obs.data(), f_model.data()), 0) aso = scaler.run( f_obs = f_obs, f_calc = f_calc, f_mask = f_masks, r_free_flags = r_free_flags, bin_selections = bin_selections, number_of_cycles = 500, auto_convergence_tolerance = 1.e-9, ss = ss, try_poly = True, try_expanal = True, try_expmin = True, verbose = False) assert approx_equal(aso.r_final, 0.00037, 0.00001) assert approx_equal(aso.r_low, 0.00002, 0.00001) assert approx_equal(aso.r_high, 0.00006, 0.00001) assert approx_equal( bulk_solvent.r_factor(f_obs.data(), abs(aso.core.f_model).data(), 1), bulk_solvent.r_factor(f_obs.data(), abs(aso.core.f_model).data()))
def estimate_signal_to_noise(x, y): raise if 1: x, y = interpolate(x, y) #x, y_tr = fourier_filter(x, y) x, y_tr = savitzky_golay_filter(x, y) noise = y - y_tr else: from scitbx.math import chebyshev_polynome from scitbx.math import chebyshev_lsq_fit x_obs, y_obs = x, y w_obs = flex.double(y_obs.size(), 1) w_obs[0] = 1e16 w_obs[-1] = 1e16 ## determining the number of terms takes much, much longer than the fit n_terms = chebyshev_lsq_fit.cross_validate_to_determine_number_of_terms( x_obs, y_obs, w_obs, min_terms=2, max_terms=30, n_goes=20, n_free=20) #n_terms = 7 print "n_terms:", n_terms fit = chebyshev_lsq_fit.chebyshev_lsq_fit(n_terms, x_obs, y_obs, w_obs) fit_funct = chebyshev_polynome(n_terms, fit.low_limit, fit.high_limit, fit.coefs) y_fitted = fit_funct.f(x) y_tr = y_fitted n = y_tr.size() noise = y - y_tr noise_sq = flex.pow2(noise) from xfel.command_line.view_pixel_histograms import sliding_average #sigma_sq = sliding_average(noise_sq, n=31) sigma_sq = sliding_average(noise_sq, n=15) #sigma_sq = sliding_average(sigma_sq) #signal_to_noise = y/flex.sqrt(sigma_sq) import math signal_to_noise = y / math.sqrt(flex.mean(noise_sq[50:200])) #pyplot.plot(noise) #pyplot.plot(x,y) #pyplot.show() offset = 0.2 * flex.max(y) offset = 0 pyplot.plot(x, y, linewidth=2) pyplot.plot(x, offset + y_tr, linewidth=2) pyplot.show() pyplot.plot(x, noise, linewidth=2) #pyplot.plot(x, flex.sqrt(sigma_sq), linewidth=2) #ax2 = pyplot.twinx() #ax2.plot(x, y) pyplot.show() pyplot.plot(x[:375], signal_to_noise[:375]) #pyplot.xlim( #ax2 = pyplot.twinx() #ax2.plot(x, y) pyplot.show()
def run(args): import matplotlib matplotlib.use("Agg") import libtbx.load_env usage = "%s [options]" % libtbx.env.dispatcher_name parser = OptionParser( usage=usage, phil=phil_scope, check_format=False, epilog=help_message ) params, options, args = parser.parse_args( show_diff_phil=True, return_unhandled=True ) for mtz in args: print(mtz) assert os.path.isfile(mtz), mtz import iotbx.merging_statistics i_obs = iotbx.merging_statistics.select_data(mtz, data_labels=params.labels) if params.space_group is not None: i_obs = i_obs.customized_copy(space_group_info=params.space_group) from scitbx.array_family import flex # set the sigmas to 1, and calculate the mean intensities and internal variances intensities_copy = i_obs.customized_copy(sigmas=flex.double(i_obs.size(), 1)) merging_internal = intensities_copy.merge_equivalents( use_internal_variance=True ) merged = merging_internal.array() merging_external = i_obs.merge_equivalents(use_internal_variance=False) sigmas_internal = merging_internal.array().sigmas() sigmas_external = merging_external.array().sigmas() variances_internal = flex.pow2(sigmas_internal) variances_external = flex.pow2(sigmas_external) n_bins = 100 i_obs.setup_binner_counting_sorted(n_bins=n_bins) sigmas_ratio = sigmas_external / sigmas_internal variance_ratio = variances_external / variances_internal array_sr = merging_external.array().customized_copy( data=sigmas_ratio, sigmas=None ) array_sr.use_binning_of(i_obs) mean_sr = array_sr.mean(use_binning=True) ds2 = mean_sr.binner.bin_centers(2) sr = mean_sr.data[1:-1] array_vr = merging_external.array().customized_copy( data=variance_ratio, sigmas=None ) array_vr.use_binning_of(i_obs) mean_vr = array_vr.mean(use_binning=True) d_star_sq = mean_vr.binner.bin_centers(2) vr = mean_vr.data[1:-1] prefix = params.prefix if prefix is None: prefix = "" from matplotlib import pyplot pyplot.style.use("ggplot") pyplot.plot(d_star_sq, sr) ax = pyplot.gca() xticks = ax.get_xticks() xticks_d = [ "%.2f" % uctbx.d_star_sq_as_d(ds2) if ds2 > 0 else 0 for ds2 in xticks ] ax.set_xticklabels(xticks_d) pyplot.xlabel("d spacing (A)") pyplot.ylabel("<sigI_ext/sigI_int>") pyplot.savefig("%ssigmas_ratio.png" % prefix) pyplot.clf() pyplot.plot(d_star_sq, vr) ax = pyplot.gca() xticks = ax.get_xticks() xticks_d = [ "%.2f" % uctbx.d_star_sq_as_d(ds2) if ds2 > 0 else 0 for ds2 in xticks ] ax.set_xticklabels(xticks_d) pyplot.xlabel("d spacing (A)") pyplot.ylabel("<varI_ext/varI_int>") pyplot.savefig("%svariances_ratio.png" % prefix) pyplot.clf()
def background(imageset, indx, n_bins): from dials.array_family import flex from libtbx.phil import parse from scitbx import matrix import math detector = imageset.get_detector() beam = imageset.get_beam() assert(len(detector) == 1) detector = detector[0] trusted = detector.get_trusted_range() n = matrix.col(detector.get_normal()).normalize() b = matrix.col(beam.get_s0()).normalize() wavelength = beam.get_wavelength() if math.fabs(b.dot(n)) < 0.95: from libtbx.utils import Sorry raise Sorry('Detector not perpendicular to beam') data = imageset.get_raw_data(indx) assert(len(data) == 1) data = data[0] negative = (data < 0) hot = (data > int(round(trusted[1]))) bad = negative | hot from dials.algorithms.spot_finding.factory import SpotFinderFactory from dials.algorithms.spot_finding.factory import phil_scope data = data.as_double() from dxtbx import datablock spot_params = phil_scope.fetch(source=parse("")).extract() threshold_function = SpotFinderFactory.configure_threshold( spot_params, datablock.DataBlock([imageset])) peak_pixels = threshold_function.compute_threshold(data, ~bad) signal = data.select(peak_pixels.iselection()) background = data.select((~bad & ~peak_pixels).iselection()) # print some summary information print 'Mean background: %.3f' % (flex.sum(background) / background.size()) print 'Max/total signal pixels: %.0f / %.0f' % (flex.max(signal), flex.sum(signal)) print 'Peak/background/hot pixels: %d / %d / %d' % (peak_pixels.count(True), background.size(), hot.count(True)) # compute histogram of two-theta values, then same weighted # by pixel values, finally divide latter by former to get # the radial profile out, need to set the number of bins # sensibly; flex.histogram does not allow weights so use # numpy.histogram to get the same effect... inspired by # method in PyFAI data = data.as_1d() two_theta_array = detector.get_two_theta_array(beam.get_s0()) two_theta_array.set_selected((bad | peak_pixels).iselection(), 0.0) data.set_selected((bad | peak_pixels).iselection(), 0.0) # new fangled flex.weighted_histogram :-) h0 = flex.weighted_histogram(two_theta_array, n_slots=n_bins) h1 = flex.weighted_histogram(two_theta_array, data, n_slots=n_bins) h2 = flex.weighted_histogram(two_theta_array, data * data, n_slots=n_bins) d0 = h0.slots() d1 = h1.slots() d2 = h2.slots() I = d1 / d0 I2 = d2 / d0 sig = flex.sqrt(I2 - flex.pow2(I)) tt = h0.slot_centers() d_spacings = wavelength / (2.0 * flex.sin(0.5 * tt)) return d_spacings, I, sig
def setup_weighting_scheme(self, weight): self.expt_var = flex.pow2(self.data.s) if (weight == 'i'): self.weights = self.data.i elif (weight == 's'): self.weights = self.data.s
def __call__(self, x_obs): a, b, c = self.params y_calc = a * flex.exp(-flex.pow2(x_obs - b) / (2 * c**2)) return y_calc
def get_goniometer_shadow_masker(self, goniometer=None): from dials.util.masking import GoniometerShadowMaskGenerator from scitbx.array_family import flex import math coords = flex.vec3_double(( (0,0,0), )) alpha = flex.double_range(0, 190, step=10) * math.pi / 180 r = flex.double(alpha.size(), 40) x = flex.double(r.size(), 107.61) y = -r*flex.sin(alpha) z = -r*flex.cos(alpha) coords.extend(flex.vec3_double(x, y, z)) coords.extend(flex.vec3_double(( # fixed (107.49, 7.84, 39.49), (107.39, 15.69, 38.97), (107.27, 23.53, 38.46), (107.16, 31.37, 37.94), (101.76, 33.99, 36.25), (96.37, 36.63, 34.56), (90.98, 39.25, 33.00), (85.58, 41.88, 31.18), (80.89, 47.06, 31.00), (76.55, 51.51, 31.03), (72.90, 55.04, 31.18), (66.86, 60.46, 31.67), (62.10, 64.41, 32.25), ))) alpha = flex.double_range(180, 370, step=10) * math.pi / 180 r = flex.double(alpha.size(), 33) x = (flex.sqrt(flex.pow2(r * flex.sin(alpha)) + 89.02**2) * flex.cos((50 * math.pi/180) - flex.atan(r/89.02 * flex.sin(alpha)))) y = (flex.sqrt(flex.pow2(r * flex.sin(alpha)) + 89.02**2) * flex.sin((50 * math.pi/180) - flex.atan(r/89.02 * flex.sin(alpha)))) z = -r*flex.cos(alpha) coords.extend(flex.vec3_double(x, y, z)) coords.extend(flex.vec3_double(( # fixed (62.10, 64.41, -32.25), (66.86, 60.46, -31.67), (72.90, 55.04, -31.18), (76.55, 51.51, -31.03), (80.89, 47.06, -31.00), (85.58, 41.88, -31.18), (90.98, 39.25, -33.00), (96.37, 36.63, -34.56), (101.76, 33.99, -36.25), (107.16, 31.37, -37.94), (107.27, 23.53, -38.46), (107.39, 15.69, -38.97), (107.49, 7.84, -39.49), (107.61, 0.00, -40.00) ))) # I23 end station coordinate system: # X-axis: positive direction is facing away from the storage ring (from # sample towards goniometer) # Y-axis: positive direction is vertically up # Z-axis: positive direction is in the direction of the beam (from # sample towards detector) # K-axis (kappa): at an angle of +50 degrees from the X-axis # K & phi rotation axes: clockwise rotation is positive (right hand # thumb rule) # Omega-axis: along the X-axis; clockwise rotation is positive # End station x-axis is parallel to ImgCIF x-axis # End station z-axis points in opposite direction to ImgCIF definition # (ImgCIF: The Z-axis is derived from the source axis which goes from # the sample to the source) # Consequently end station y-axis (to complete set following right hand # rule) points in opposite direction to ImgCIF y-axis. # Kappa arm aligned with -y in ImgCIF convention from rstbx.cftbx.coordinate_frame_helpers import align_reference_frame from scitbx import matrix R = align_reference_frame(matrix.col((1,0,0)), matrix.col((1,0,0)), matrix.col((0,1,0)), matrix.col((0,-1,0))) coords = R.elems * coords if goniometer is None: goniometer = self.get_goniometer() return GoniometerShadowMaskGenerator( goniometer, coords, flex.size_t(len(coords), 1))
def refine(self, trial): print "--------------Trial %d-----------------" % trial, time.ctime() self.working_model = self.start_model.deep_copy() self.nlm_coefs = self.start_nlm_coefs.deep_copy() self.best_nlm_coefs = self.start_nlm_coefs.deep_copy() self.best_i = self.start_i.deep_copy() self.lowest_score = self.start_score init_scores = flex.double() for ii in range(10): self.modify() init_scores.append(self.target()) mean = flex.mean(init_scores) self.deltaS = smath.sqrt( flex.sum(flex.pow2(init_scores - mean)) / 10.0) self.T = self.deltaS * 100 self.nsteps = 200 self.score = mean self.working_model = self.start_model.deep_copy() while (self.T > self.deltaS / 2.0): self.n_reject = 0 for ii in range(self.nsteps): self.move() print "Number of Accept/Reject: %d/%d" % ( self.nsteps - self.n_reject, self.n_reject) if (self.n_reject > self.nsteps * 0.9): print "Too Many rejections (%d), quit at temperature (%f)" % ( self.n_reject, self.T) break self.T = self.T * 0.9 out = open(self.prefix + str(trial) + '_final.iq', 'w') self.nlm_array.load_coefs(self.nlm, self.best_nlm_coefs) best_i = self.zm.calc_intensity_nlm(self.nlm_array) best_i = best_i / best_i[0] * self.scale_2_expt for qq, ic, io in zip(self.data.q, best_i, self.data.i * self.scale_2_expt): print >> out, qq, ic, io out.close() print "total number of moves %d" % self.counter print "total number of accepted moves %d" % self.n_accept if (self.pdb_nlm is not None): align_obj = fft_align.align(self.pdb_nlm, self.nlm_array, nmax=self.nmax, refine=True) mean = abs(self.best_nlm_coefs[0]) var = flex.sum(flex.norm(self.best_nlm_coefs)) sigma = smath.sqrt(var - mean * mean) cc = align_obj.best_score cc = (cc - mean * self.pdb_m) / (sigma * self.pdb_s) print "C.C. (PDB, trial%6d) = %8.5f, Score = %8.5f" % ( trial, cc, self.lowest_score) self.best_nlm_coefs = align_obj.moving_nlm.coefs() reconst_model = self.reconst_model(self.best_nlm_coefs) xplor_map_type(reconst_model, self.np_on_grid, self.rmax, file_name=self.prefix + str(trial) + '_final_rbt.xplor') xplor_map_type(self.best_model, self.np_on_grid, self.rmax, file_name=self.prefix + str(trial) + '_final.xplor') print "-----------End of Trial %d--------------" % trial, time.ctime()
def compute_functional(self, params): self.x = params y_calc = self.compute_y_calc() delta_y = self.y_obs - y_calc f = flex.sum(flex.pow2(delta_y)) return f
def common_mode(self, img, stddev, mask): """The common_mode() function returns the mode of image stored in the array pointed to by @p img. @p mask must be such that the @p stddev at the selected pixels is greater than zero. @param img 2D integer array of the image @param stddev 2D integer array of the standard deviation of each pixel in @p img @param mask 2D Boolean array, @c True if the pixel is to be included, @c False otherwise @return Mode of the image, as a real number """ # Flatten the image and take out inactive pixels XXX because we # cannot take means and medians of 2D arrays? img_1d = img.as_1d().select(mask.as_1d()).as_double() assert img_1d.size() > 0 if (self.common_mode_correction == "mean"): # The common mode is approximated by the mean of the pixels with # signal-to-noise ratio less than a given threshold. XXX Breaks # if the selection is empty! THRESHOLD_SNR = 2 img_snr = img_1d / stddev.as_double().as_1d().select(mask.as_1d()) return (flex.mean(img_1d.select(img_snr < THRESHOLD_SNR))) elif (self.common_mode_correction == "median"): return (flex.median(img_1d)) # Identify the common-mode correction as the peak histogram of the # histogram of pixel values (the "standard" common-mode correction, as # previously implemented in this class). hist_min = -40 hist_max = 40 n_slots = 100 hist = flex.histogram(img_1d, hist_min, hist_max, n_slots=n_slots) slots = hist.slots() i = flex.max_index(slots) common_mode = list(hist.slot_infos())[i].center() if (self.common_mode_correction == "mode"): return (common_mode) # Determine the common-mode correction from the peak of a single # Gaussian function fitted to the histogram. from scitbx.math.curve_fitting import single_gaussian_fit x = hist.slot_centers() y = slots.as_double() fit = single_gaussian_fit(x, y) scale, mu, sigma = fit.a, fit.b, fit.c self.logger.debug("fitted gaussian: mu=%.3f, sigma=%.3f" %(mu, sigma)) mode = common_mode common_mode = mu if abs(mode-common_mode) > 1000: common_mode = mode # XXX self.logger.debug("delta common mode corrections: %.3f" %(mode-common_mode)) if 0 and abs(mode-common_mode) > 0: #if 0 and skew > 0.5: # view histogram and fitted gaussian from numpy import exp from matplotlib import pyplot x_all = x n, bins, patches = pyplot.hist(section_img.as_1d().as_numpy_array(), bins=n_slots, range=(hist_min, hist_max)) y_all = scale * flex.exp(-flex.pow2(x_all-mu) / (2 * sigma**2)) scale = slots[flex.max_index(slots)] y_all *= scale/flex.max(y_all) pyplot.plot(x_all, y_all) pyplot.show() return (common_mode)
def run(argv=None): """Compute mean, standard deviation, and maximum projection images from a set of CSPAD cbf images given on the command line. @param argv Command line argument list @return @c 0 on successful termination, @c 1 on error, and @c 2 for command line syntax errors """ import libtbx.load_env from libtbx import option_parser from scitbx.array_family import flex from dxtbx.format.Registry import Registry from xfel.cftbx.detector.cspad_cbf_tbx import cbf_file_to_basis_dict, write_cspad_cbf # from xfel.cxi.cspad_ana import cspad_tbx # from iotbx.detectors.cspad_detector_formats import reverse_timestamp if argv is None: argv = sys.argv command_line = (option_parser.option_parser( usage="%s [-v] [-a PATH] [-m PATH] [-s PATH] " \ "image1 image2 [image3 ...]" % libtbx.env.dispatcher_name) .option(None, "--average-path", "-a", type="string", default=None, dest="avg_path", metavar="PATH", help="Write average image to PATH") .option(None, "--maximum-path", "-m", type="string", default=None, dest="max_path", metavar="PATH", help="Write maximum projection image to PATH") .option(None, "--stddev-path", "-s", type="string", default=None, dest="stddev_path", metavar="PATH", help="Write standard deviation image to PATH") .option(None, "--verbose", "-v", action="store_true", default=False, dest="verbose", help="Print more information about progress") ).process(args=argv[1:]) # Note that it is not an error to omit the output paths, because # certain statistics could still be printed, e.g. with the verbose # option. paths = command_line.args if len(paths) == 0: command_line.parser.print_usage(file=sys.stderr) return 2 # Loop over all images and accumulate statistics. nfail = 0 nmemb = 0 for path in paths: if command_line.options.verbose: sys.stdout.write("Processing %s...\n" % path) try: # Promote the image to double-precision floating point type. # All real-valued flex arrays have the as_double() function. # Warn if the header items across the set of images do not match # up. Note that discrepancies regarding the image size are # fatal. if not 'reader' in locals(): reader = Registry.find(path) img = reader(path) if 'detector' in locals(): test_detector = img.get_detector() if len(test_detector) != len(detector): sys.stderr.write( "Detectors do not have the same number of panels\n") return 1 for t, d in zip(test_detector, detector): if t.get_image_size() != d.get_image_size(): sys.stderr.write("Panel sizes do not match\n") return 1 if t.get_pixel_size() != d.get_pixel_size(): sys.stderr.write("Pixel sizes do not match\n") return 1 if t.get_d_matrix() != d.get_d_matrix(): sys.stderr.write( "Detector panels are not all in the same location. The average will use the positions of the first image.\n" ) detector = test_detector else: detector = img.get_detector() data = [ img.get_raw_data()[i].as_1d().as_double() for i in range(len(detector)) ] wavelength = img.get_beam().get_wavelength() distance = flex.mean( flex.double([d.get_directed_distance() for d in detector])) except Exception: nfail += 1 continue # The sum-of-squares image is accumulated using long integers, as # this delays the point where overflow occurs. But really, this # is just a band-aid... if nmemb == 0: max_img = copy.deepcopy(data) sum_distance = distance sum_img = copy.deepcopy(data) ssq_img = [flex.pow2(d) for d in data] sum_wavelength = wavelength metro = cbf_file_to_basis_dict(path) else: sel = [(d > max_d).as_1d() for d, max_d in zip(data, max_img)] for d, max_d, s in zip(data, max_img, sel): max_d.set_selected(s, d.select(s)) sum_distance += distance for d, sum_d in zip(data, sum_img): sum_d += d for d, ssq_d in zip(data, ssq_img): ssq_d += flex.pow2(d) sum_wavelength += wavelength nmemb += 1 # Early exit if no statistics were accumulated. if command_line.options.verbose: sys.stderr.write("Processed %d images (%d failed)\n" % (nmemb, nfail)) if nmemb == 0: return 0 # Calculate averages for measures where other statistics do not make # sense. Note that avg_img is required for stddev_img. avg_img = [sum_d.as_double() / nmemb for sum_d in sum_img] avg_distance = sum_distance / nmemb avg_wavelength = sum_wavelength / nmemb def make_tiles(data, detector): """ Assemble a tiles dictionary as required by write_cspad_cbf, consisting of 4 arrays of shape 8x185x388. Assumes the order in the data array matches the order of the enumerated detector panels. """ assert len(data) == 64 tiles = {} s, f = 185, 194 for q_id in range(4): tiles[0, q_id] = flex.double((flex.grid(s * 8, f * 2))) for s_id in range(8): for a_id in range(2): asic_idx = (q_id * 16) + (s_id * 2) + a_id asic = data[asic_idx] asic.reshape(flex.grid((s, f))) tiles[0, q_id].matrix_paste_block_in_place( asic, s_id * s, a_id * f) tiles[0, q_id].reshape(flex.grid((8, s, f * 2))) return tiles # Output the average image, maximum projection image, and standard # deviation image, if requested. if command_line.options.avg_path is not None: tiles = make_tiles(avg_img, detector) write_cspad_cbf(tiles, metro, 'cbf', None, command_line.options.avg_path, avg_wavelength, avg_distance) if command_line.options.max_path is not None: tiles = make_tiles(max_img, detector) write_cspad_cbf(tiles, metro, 'cbf', None, command_line.options.max_path, avg_wavelength, avg_distance) if command_line.options.stddev_path is not None: stddev_img = [ ssq_d.as_double() - sum_d.as_double() * avg_d for ssq_d, sum_d, avg_d in zip(ssq_img, sum_img, avg_img) ] # Accumulating floating-point numbers introduces errors, which may # cause negative variances. Since a two-pass approach is # unacceptable, the standard deviation is clamped at zero. for stddev_d in stddev_img: stddev_d.set_selected(stddev_d < 0, 0) if nmemb == 1: stddev_img = [flex.sqrt(stddev_d) for stddev_d in stddev_img] else: stddev_img = [ flex.sqrt(stddev_d / (nmemb - 1)) for stddev_d in stddev_img ] tiles = make_tiles(stddev_img, detector) write_cspad_cbf(tiles, metro, 'cbf', None, command_line.options.stddev_path, avg_wavelength, avg_distance) return 0
def run_0(symbol="C 2"): space_group_info = sgtbx.space_group_info(symbol=symbol) xrs = random_structure.xray_structure(space_group_info=space_group_info, elements=["N"] * 50, volume_per_atom=100.0, random_u_iso=True) # b_cart = adptbx.random_traceless_symmetry_constrained_b_cart( crystal_symmetry=xrs.crystal_symmetry()) u_star = adptbx.u_cart_as_u_star(xrs.unit_cell(), adptbx.b_as_u(b_cart)) # F = xrs.structure_factors(d_min=1.5).f_calc() k_anisotropic = mmtbx.f_model.ext.k_anisotropic(F.indices(), u_star) # bin_selections = [] F.setup_binner(reflections_per_bin=50) for i_bin in F.binner().range_used(): sel = F.binner().selection(i_bin) bin_selections.append(sel) # d_spacings = F.d_spacings().data() ss = 1. / flex.pow2(d_spacings) / 4. k_mask_tmp = mmtbx.f_model.ext.k_mask(ss, 0.35, 80.) k_mask = flex.double(F.data().size(), 0) k_isotropic = flex.double(F.data().size(), 0) for s in bin_selections: d = d_spacings.select(s) k_mask.set_selected(s, flex.mean(k_mask_tmp.select(s))) k_isotropic.set_selected(s, random.randint(1, 10)) # fmodel = mmtbx.f_model.manager(xray_structure=xrs, f_obs=abs(F), k_isotropic=k_isotropic, k_anisotropic=k_anisotropic, k_mask=k_mask) f_calc = fmodel.f_calc() f_masks = fmodel.f_masks() f_model = fmodel.f_model() f_obs = abs(f_model) r_free_flags = f_obs.generate_r_free_flags(use_lattice_symmetry=False) # assert approx_equal(bulk_solvent.r_factor(f_obs.data(), f_model.data()), 0) aso = scaler.run(f_obs=f_obs, f_calc=f_calc, f_mask=f_masks, r_free_flags=r_free_flags, bin_selections=bin_selections, number_of_cycles=500, auto_convergence_tolerance=1.e-9, ss=ss, try_poly=True, try_expanal=True, try_expmin=True, verbose=True) print("r_f:", aso.r_final) print("r_l:", aso.r_low) print("r_h:", aso.r_high) assert aso.r_final < 0.0009, [aso.r_final, 0.0009] assert aso.r_low < 0.0017, [aso.r_low, 0.0017] assert aso.r_high < 0.0006, [aso.r_high, 0.0006] assert approx_equal( bulk_solvent.r_factor(f_obs.data(), abs(aso.core.f_model).data(), 1), bulk_solvent.r_factor(f_obs.data(), abs(aso.core.f_model).data()))
def plot_angles(coords, labels=None, show=False, plot_name=None): assert len(coords) >= 2 coord_x = coords[0] coord_y = coords[1] if len(coords) > 2: coord_z = coords[2] else: coord_z = None r = flex.sqrt(flex.pow2(coord_x) + flex.pow2(coord_y)) phi = flex.atan2(coord_y, coord_x) import math phi_deg = (180 / math.pi) * phi from matplotlib import pyplot as plt import numpy fig = plt.figure() if 0 and coord_z is not None: from mpl_toolkits.mplot3d import Axes3D # import dependency ax = fig.add_subplot(111, projection='3d') else: ax = fig.add_subplot(111) if labels is None: labels = flex.int(len(coord_x), -1) unique_labels = set(labels) unique_labels = sorted(unique_labels) n_clusters = len(unique_labels) - (1 if -1 in unique_labels else 0) colours = list(plt.cm.Spectral(numpy.linspace(0, 1, n_clusters))) if -1 in unique_labels: colours.insert(0, (255, 255, 255, 1)) for k, col in zip(unique_labels, colours): isel = (labels == k).iselection() if k == -1: # or len(class_members) < min_cluster_size: # Black used for noise. col = 'k' col = '0.25' # mid-grey markersize = 1 marker = '+' #continue else: markersize = 2 marker = 'o' if 0 and not isinstance(col, basestring) and len(col) == 4: # darken the edges frac = 0.75 edgecolor = [col[0] * frac, col[1] * frac, col[2] * frac, col[3]] else: edgecolor = col if coord_z is None: ax.scatter(phi_deg.select(isel), r.select(isel), s=markersize, marker=marker, c=col, edgecolor=edgecolor) else: ax.scatter(phi_deg.select(isel), r.select(isel), coord_z.select(isel), s=markersize, marker=marker, c=col, edgecolor=edgecolor) ax.set_xlim(-180, 180) ax.set_ylim(0, ax.get_ylim()[1]) ax.set_xlabel('Angle ($^{\circ}$)') ax.set_ylabel('Magnitude') if plot_name is not None: plt.savefig(plot_name, size_inches=(10, 10), dpi=300, bbox_inches='tight') if show: plt.show() plt.close(fig)
def partial_derivatives(self, x_obs): a, b, c = self.params exponential_part = flex.exp(-flex.pow2(x_obs - b) / (2 * c**2)) return (exponential_part, a * (x_obs - b) / c**2 * exponential_part, a * flex.pow2(x_obs - b) / c**3 * exponential_part)
def run(argv=None): """Compute mean, standard deviation, and maximum projection images from a set of images given on the command line. @param argv Command line argument list @return @c 0 on successful termination, @c 1 on error, and @c 2 for command line syntax errors """ import libtbx.load_env from libtbx import easy_pickle, option_parser from scitbx.array_family import flex from xfel.cxi.cspad_ana import cspad_tbx from iotbx.detectors.cspad_detector_formats import reverse_timestamp if argv is None: argv = sys.argv command_line = (option_parser.option_parser( usage="%s [-v] [-a PATH] [-m PATH] [-s PATH] " \ "image1 image2 [image3 ...]" % libtbx.env.dispatcher_name) .option(None, "--average-path", "-a", type="string", default=None, dest="avg_path", metavar="PATH", help="Write average image to PATH") .option(None, "--maximum-path", "-m", type="string", default=None, dest="max_path", metavar="PATH", help="Write maximum projection image to PATH") .option(None, "--stddev-path", "-s", type="string", default=None, dest="stddev_path", metavar="PATH", help="Write standard deviation image to PATH") .option(None, "--verbose", "-v", action="store_true", default=False, dest="verbose", help="Print more information about progress") ).process(args=argv[1:]) # Note that it is not an error to omit the output paths, because # certain statistics could still be printed, e.g. with the verbose # option. paths = command_line.args if len(paths) == 0: command_line.parser.print_usage(file=sys.stderr) return 2 # Loop over all images and accumulate statistics. nfail = 0 nmemb = 0 for path in paths: if command_line.options.verbose: sys.stdout.write("Processing %s...\n" % path) try: # Promote the image to double-precision floating point type. # All real-valued flex arrays have the as_double() function. d = easy_pickle.load(path) distance = d['DISTANCE'] img = d['DATA'].as_1d().as_double() wavelength = d['WAVELENGTH'] time_tuple = reverse_timestamp(d['TIMESTAMP']) # Warn if the header items across the set of images do not match # up. Note that discrepancies regarding the image size are # fatal. if 'active_areas' in locals(): if (active_areas != d['ACTIVE_AREAS']).count(True) != 0: sys.stderr.write("Active areas do not match\n") else: active_areas = d['ACTIVE_AREAS'] if 'beam_center' in locals(): if beam_center != (d['BEAM_CENTER_X'], d['BEAM_CENTER_Y']): sys.stderr.write("Beam centers do not match\n") else: beam_center = (d['BEAM_CENTER_X'], d['BEAM_CENTER_Y']) if 'detector_address' in locals(): if detector_address != d['DETECTOR_ADDRESS']: sys.stderr.write("Detector addresses do not match\n") else: detector_address = d['DETECTOR_ADDRESS'] if 'saturated_value' in locals(): if saturated_value != d['SATURATED_VALUE']: sys.stderr.write("Saturated values do not match\n") else: saturated_value = d['SATURATED_VALUE'] if 'size' in locals(): if size != (d['SIZE1'], d['SIZE2']): sys.stderr.write("Image sizes do not match\n") return 1 else: size = (d['SIZE1'], d['SIZE2']) if size != d['DATA'].focus(): sys.stderr.write("Image size does not match pixel array\n") return 1 if 'pixel_size' in locals(): if pixel_size != d['PIXEL_SIZE']: sys.stderr.write("Pixel sizes do not match\n") return 1 else: if 'PIXEL_SIZE' in d: pixel_size = d['PIXEL_SIZE'] else: pixel_size = None except Exception: try: # Fall back on reading the image with dxtbx, and shoehorn the # extracted information into what would have been found in a # pickle file. XXX This code assumes a monolithic detector! from dxtbx.format.Registry import Registry format_class = Registry.find(path) i = format_class(path) beam = i.get_beam() assert len(i.get_detector()) == 1 detector = i.get_detector()[0] beam_center = detector.get_beam_centre(beam.get_s0()) detector_address = format_class.__name__ distance = detector.get_distance() img = i.get_raw_data().as_1d().as_double() pixel_size = 0.5 * sum(detector.get_pixel_size()) saturated_value = int(round(detector.get_trusted_range()[1])) size = detector.get_image_size() time_tuple = (i.get_scan().get_epochs()[0], 0) wavelength = beam.get_wavelength() active_areas = flex.int((0, 0, size[0], size[1])) except Exception: nfail += 1 continue # See also event() in xfel.cxi.cspad_ana.average_tbx. Record the # base time as the timestamp of the first image. # # The sum-of-squares image is accumulated using long integers, as # this delays the point where overflow occurs. But really, this # is just a band-aid... if nmemb == 0: max_img = img.deep_copy() sum_distance = distance sum_img = img.deep_copy() ssq_img = flex.pow2(img) sum_wavelength = wavelength sum_time = (0, 0) time_base = time_tuple else: sel = (img > max_img).as_1d() max_img.set_selected(sel, img.select(sel)) sum_distance += distance sum_img += img ssq_img += flex.pow2(img) sum_wavelength += wavelength sum_time = (sum_time[0] + (time_tuple[0] - time_base[0]), sum_time[1] + (time_tuple[1] - time_base[1])) nmemb += 1 # Early exit if no statistics were accumulated. if command_line.options.verbose: sys.stderr.write("Processed %d images (%d failed)\n" % (nmemb, nfail)) if nmemb == 0: return 0 # Calculate averages for measures where other statistics do not make # sense. Note that avg_img is required for stddev_img. avg_img = sum_img.as_double() / nmemb avg_distance = sum_distance / nmemb avg_timestamp = cspad_tbx.evt_timestamp( (time_base[0] + int(round(sum_time[0] / nmemb)), time_base[1] + int(round(sum_time[1] / nmemb)))) avg_wavelength = sum_wavelength / nmemb # Output the average image, maximum projection image, and standard # deviation image, if requested. if command_line.options.avg_path is not None: avg_img.resize(flex.grid(size[0], size[1])) d = cspad_tbx.dpack( active_areas=active_areas, address=detector_address, beam_center_x=beam_center[0], beam_center_y=beam_center[1], data=avg_img, distance=avg_distance, pixel_size=pixel_size, saturated_value=saturated_value, timestamp=avg_timestamp, wavelength=avg_wavelength) easy_pickle.dump(command_line.options.avg_path, d) if command_line.options.max_path is not None: max_img.resize(flex.grid(size[0], size[1])) d = cspad_tbx.dpack( active_areas=active_areas, address=detector_address, beam_center_x=beam_center[0], beam_center_y=beam_center[1], data=max_img, distance=avg_distance, pixel_size=pixel_size, saturated_value=saturated_value, timestamp=avg_timestamp, wavelength=avg_wavelength) easy_pickle.dump(command_line.options.max_path, d) if command_line.options.stddev_path is not None: stddev_img = ssq_img.as_double() - sum_img.as_double() * avg_img # Accumulating floating-point numbers introduces errors, which may # cause negative variances. Since a two-pass approach is # unacceptable, the standard deviation is clamped at zero. stddev_img.set_selected(stddev_img < 0, 0) if nmemb == 1: stddev_img = flex.sqrt(stddev_img) else: stddev_img = flex.sqrt(stddev_img / (nmemb - 1)) stddev_img.resize(flex.grid(size[0], size[1])) d = cspad_tbx.dpack( active_areas=active_areas, address=detector_address, beam_center_x=beam_center[0], beam_center_y=beam_center[1], data=stddev_img, distance=avg_distance, pixel_size=pixel_size, saturated_value=saturated_value, timestamp=avg_timestamp, wavelength=avg_wavelength) easy_pickle.dump(command_line.options.stddev_path, d) return 0
def event(self, evt, env): """The event() function is called for every L1Accept transition. @param evt Event data object, a configure object @param env Environment object """ super(average_mixin, self).event(evt, env) if evt.get('skip_event'): return # Get the distance for the detectors that should have it, and set # it to NaN for those that should not. if self.detector == 'CxiDs1' or \ self.detector == 'CxiDs2' or \ self.detector == 'CxiDsd' or \ self.detector == 'XppGon': distance = cspad_tbx.env_distance(self.address, env, self._detz_offset) if distance is None: self._nfail += 1 self.logger.warning("event(): no distance, shot skipped") evt.put(skip_event_flag(), 'skip_event') return else: distance = float('nan') if ("skew" in self.flags): # Take out inactive pixels if self.roi is not None: pixels = self.cspad_img[self.roi[2]:self.roi[3], self.roi[0]:self.roi[1]] dark_mask = self.dark_mask[self.roi[2]:self.roi[3], self.roi[0]:self.roi[1]] pixels = pixels.as_1d().select(dark_mask.as_1d()) else: pixels = self.cspad_img.as_1d().select(self.dark_mask.as_1d()).as_double() stats = scitbx.math.basic_statistics(pixels.as_double()) #stats.show() self.logger.info("skew: %.3f" %stats.skew) self.logger.info("kurtosis: %.3f" %stats.kurtosis) if 0: from matplotlib import pyplot hist_min, hist_max = flex.min(flex_cspad_img.as_double()), flex.max(flex_cspad_img.as_double()) print hist_min, hist_max n_slots = 100 n, bins, patches = pyplot.hist(flex_cspad_img.as_1d().as_numpy_array(), bins=n_slots, range=(hist_min, hist_max)) pyplot.show() # XXX This skew threshold probably needs fine-tuning skew_threshold = 0.35 if stats.skew < skew_threshold: self._nfail += 1 self.logger.warning("event(): skew < %f, shot skipped" % skew_threshold) evt.put(skip_event_flag(), 'skip_event') return #self.cspad_img *= stats.skew if ("inactive" in self.flags): self.cspad_img.set_selected(self.dark_stddev <= 0, 0) if ("noelastic" in self.flags): ELASTIC_THRESHOLD = self.elastic_threshold self.cspad_img.set_selected(self.cspad_img > ELASTIC_THRESHOLD, 0) if self.hot_threshold is not None: HOT_THRESHOLD = self.hot_threshold self.cspad_img.set_selected(self.dark_img > HOT_THRESHOLD, 0) if self.gain_map is not None and self.gain_threshold is not None: # XXX comparing each pixel to a moving average would probably be better # since the gain should vary approximately smoothly over different areas # of the detector GAIN_THRESHOLD = self.gain_threshold #self.logger.debug( #"rejecting: %i" %(self.gain_map > GAIN_THRESHOLD).count(True)) self.cspad_img.set_selected(self.gain_map > GAIN_THRESHOLD, 0) if ("nonoise" in self.flags): NOISE_THRESHOLD = self.noise_threshold self.cspad_img.set_selected(self.cspad_img < NOISE_THRESHOLD, 0) if ("sigma_scaling" in self.flags): self.do_sigma_scaling() if ("symnoise" in self.flags): SYMNOISE_THRESHOLD = self.symnoise_threshold self.cspad_img.set_selected((-SYMNOISE_THRESHOLD < self.cspad_img) & ( self.cspad_img < SYMNOISE_THRESHOLD), 0) if ("output" in self.flags): import pickle,os if (not os.path.isdir(self.pickle_dirname)): os.makedirs(self.pickle_dirname) flexdata = flex.int(self.cspad_img.astype(numpy.int32)) d = cspad_tbx.dpack( address=self.address, data=flexdata, timestamp=cspad_tbx.evt_timestamp(cspad_tbx.evt_time(evt)) ) G = open(os.path.join(".",self.pickle_dirname)+"/"+self.pickle_basename, "ab") pickle.dump(d,G,pickle.HIGHEST_PROTOCOL) G.close() if self.photon_threshold is not None and self.two_photon_threshold is not None: self.do_photon_counting() if self.background_path is not None: self.cspad_img -= self.background_img # t and self._sum_time are a two-long arrays of seconds and # milliseconds which hold time with respect to the base time. t = [t1 - t2 for (t1, t2) in zip(cspad_tbx.evt_time(evt), self._metadata['time_base'])] if self._nmemb == 0: # The peers metadata item is a bit field where a bit is set if # the partial sum from the corresponding worker process is # pending. If this is the first frame a worker process sees, # set its corresponding bit in the bit field since it will # contribute a partial sum. if env.subprocess() >= 0: self._lock.acquire() if 'peers' in self._metadata.keys(): self._metadata['peers'] |= (1 << env.subprocess()) else: self._metadata['peers'] = (1 << env.subprocess()) self._lock.release() self._sum_distance = distance self._sum_time = (t[0], t[1]) self._sum_wavelength = self.wavelength if self._have_max: self._max_img = self.cspad_img.deep_copy() if self._have_mean: self._sum_img = self.cspad_img.deep_copy() if self._have_std: self._ssq_img = flex.pow2(self.cspad_img) else: self._sum_distance += distance self._sum_time = (self._sum_time[0] + t[0], self._sum_time[1] + t[1]) self._sum_wavelength += self.wavelength if self._have_max: sel = (self.cspad_img > self._max_img).as_1d() self._max_img.as_1d().set_selected( sel, self.cspad_img.as_1d().select(sel)) if self._have_mean: self._sum_img += self.cspad_img if self._have_std: self._ssq_img += flex.pow2(self.cspad_img) self._nmemb += 1
def run(argv=None): """Compute mean, standard deviation, and maximum projection images from a set of images given on the command line. @param argv Command line argument list @return @c 0 on successful termination, @c 1 on error, and @c 2 for command line syntax errors """ import libtbx.load_env from libtbx import easy_pickle, option_parser from scitbx.array_family import flex from xfel.cxi.cspad_ana import cspad_tbx if argv is None: argv = sys.argv command_line = (option_parser.option_parser( usage="%s [-v] [-a PATH] [-m PATH] [-s PATH] " \ "image1 image2 [image3 ...]" % libtbx.env.dispatcher_name) .option(None, "--average-path", "-a", type="string", default=None, dest="avg_path", metavar="PATH", help="Write average image to PATH") .option(None, "--maximum-path", "-m", type="string", default=None, dest="max_path", metavar="PATH", help="Write maximum projection image to PATH") .option(None, "--stddev-path", "-s", type="string", default=None, dest="stddev_path", metavar="PATH", help="Write standard deviation image to PATH") .option(None, "--verbose", "-v", action="store_true", default=False, dest="verbose", help="Print more information about progress") ).process(args=argv[1:]) # Note that it is not an error to omit the output paths, because # certain statistics could still be printed, e.g. with the verbose # option. paths = command_line.args if len(paths) == 0: command_line.parser.print_usage(file=sys.stderr) return 2 # Loop over all images and accumulate statistics. nfail = 0 nmemb = 0 if len(paths) == 1: # test if the iamge is a multi-image from dxtbx.format.Registry import Registry from dxtbx.format.FormatMultiImage import FormatMultiImage format_class = Registry.find(paths[0]) if not issubclass(format_class, FormatMultiImage): from libtbx.utils import Usage raise Usage("Supply more than one image") print "Loading image..." i = format_class(paths[0]) print "Loaded" def read_single_image(n): if command_line.options.verbose: sys.stdout.write("Processing %s: %d...\n" % (paths[0], n)) beam = i.get_beam(n) assert len(i.get_detector(n)) == 1 detector = i.get_detector(n)[0] beam_center = detector.get_beam_centre(beam.get_s0()) detector_address = format_class.__name__ distance = detector.get_distance() img = i.get_raw_data(n).as_1d().as_double() pixel_size = 0.5 * sum(detector.get_pixel_size()) saturated_value = int(round(detector.get_trusted_range()[1])) size = detector.get_image_size() scan = i.get_scan(n) if scan is None: time_tuple = (0, 0) else: time_tuple = (scan.get_epochs()[0], 0) wavelength = beam.get_wavelength() active_areas = flex.int((0, 0, size[0], size[1])) return beam_center, detector_address, distance, img, pixel_size, saturated_value, size, time_tuple, wavelength, active_areas iterable = xrange(i.get_num_images()) else: def read_single_image(path): if command_line.options.verbose: sys.stdout.write("Processing %s...\n" % path) from dxtbx.format.Registry import Registry format_class = Registry.find(path) i = format_class(path) beam = i.get_beam() assert len(i.get_detector()) == 1 detector = i.get_detector()[0] beam_center = detector.get_beam_centre(beam.get_s0()) detector_address = format_class.__name__ distance = detector.get_distance() img = i.get_raw_data().as_1d().as_double() pixel_size = 0.5 * sum(detector.get_pixel_size()) saturated_value = int(round(detector.get_trusted_range()[1])) size = detector.get_image_size() scan = i.get_scan() if scan is None: time_tuple = (0, 0) else: time_tuple = (scan.get_epochs()[0], 0) wavelength = beam.get_wavelength() active_areas = flex.int((0, 0, size[0], size[1])) return beam_center, detector_address, distance, img, pixel_size, saturated_value, size, time_tuple, wavelength, active_areas iterable = paths for item in iterable: try: #XXX This code assumes a monolithic detector! beam_center, detector_address, distance, img, pixel_size, saturated_value, size, time_tuple, wavelength, active_areas = \ read_single_image(item) except Exception: nfail += 1 continue # See also event() in xfel.cxi.cspad_ana.average_tbx. Record the # base time as the timestamp of the first image. # # The sum-of-squares image is accumulated using long integers, as # this delays the point where overflow occurs. But really, this # is just a band-aid... if nmemb == 0: max_img = img.deep_copy() sum_distance = distance sum_img = img.deep_copy() ssq_img = flex.pow2(img) sum_wavelength = wavelength sum_time = (0, 0) time_base = time_tuple else: sel = (img > max_img).as_1d() max_img.set_selected(sel, img.select(sel)) sum_distance += distance sum_img += img ssq_img += flex.pow2(img) sum_wavelength += wavelength sum_time = (sum_time[0] + (time_tuple[0] - time_base[0]), sum_time[1] + (time_tuple[1] - time_base[1])) nmemb += 1 # Early exit if no statistics were accumulated. if command_line.options.verbose: sys.stderr.write("Processed %d images (%d failed)\n" % (nmemb, nfail)) if nmemb == 0: return 0 # Calculate averages for measures where other statistics do not make # sense. Note that avg_img is required for stddev_img. avg_img = sum_img.as_double() / nmemb avg_distance = sum_distance / nmemb avg_timestamp = cspad_tbx.evt_timestamp( (time_base[0] + int(round(sum_time[0] / nmemb)), time_base[1] + int(round(sum_time[1] / nmemb)))) avg_wavelength = sum_wavelength / nmemb # Output the average image, maximum projection image, and standard # deviation image, if requested. if command_line.options.avg_path is not None: avg_img.resize(flex.grid(size[1], size[0])) d = cspad_tbx.dpack(active_areas=active_areas, address=detector_address, beam_center_x=beam_center[0], beam_center_y=beam_center[1], data=avg_img, distance=avg_distance, pixel_size=pixel_size, saturated_value=saturated_value, timestamp=avg_timestamp, wavelength=avg_wavelength) easy_pickle.dump(command_line.options.avg_path, d) if command_line.options.max_path is not None: max_img.resize(flex.grid(size[1], size[0])) d = cspad_tbx.dpack(active_areas=active_areas, address=detector_address, beam_center_x=beam_center[0], beam_center_y=beam_center[1], data=max_img, distance=avg_distance, pixel_size=pixel_size, saturated_value=saturated_value, timestamp=avg_timestamp, wavelength=avg_wavelength) easy_pickle.dump(command_line.options.max_path, d) if command_line.options.stddev_path is not None: stddev_img = ssq_img.as_double() - sum_img.as_double() * avg_img # Accumulating floating-point numbers introduces errors, which may # cause negative variances. Since a two-pass approach is # unacceptable, the standard deviation is clamped at zero. stddev_img.set_selected(stddev_img < 0, 0) if nmemb == 1: stddev_img = flex.sqrt(stddev_img) else: stddev_img = flex.sqrt(stddev_img / (nmemb - 1)) stddev_img.resize(flex.grid(size[1], size[0])) d = cspad_tbx.dpack(active_areas=active_areas, address=detector_address, beam_center_x=beam_center[0], beam_center_y=beam_center[1], data=stddev_img, distance=avg_distance, pixel_size=pixel_size, saturated_value=saturated_value, timestamp=avg_timestamp, wavelength=avg_wavelength) easy_pickle.dump(command_line.options.stddev_path, d) return 0
def background(imageset, indx, n_bins, mask_params=None): from dials.array_family import flex from libtbx.phil import parse from scitbx import matrix if mask_params is None: # Default mask params for trusted range mask_params = phil_scope.fetch(parse("")).extract().masking from dials.util.masking import MaskGenerator mask_generator = MaskGenerator(mask_params) mask = mask_generator.generate(imageset) detector = imageset.get_detector() beam = imageset.get_beam() # Only working with single panel detector for now assert len(detector) == 1 panel = detector[0] mask = mask[0] n = matrix.col(panel.get_normal()).normalize() b = matrix.col(beam.get_s0()).normalize() wavelength = beam.get_wavelength() if math.fabs(b.dot(n)) < 0.95: raise Sorry("Detector not perpendicular to beam") data = imageset.get_raw_data(indx) assert len(data) == 1 data = data[0] data = data.as_double() spot_params = spot_phil.fetch(source=parse("")).extract() threshold_function = SpotFinderFactory.configure_threshold(spot_params) peak_pixels = threshold_function.compute_threshold(data, mask) signal = data.select(peak_pixels.iselection()) background_pixels = mask & ~peak_pixels background = data.select(background_pixels.iselection()) # print some summary information print("Mean background: %.3f" % (flex.sum(background) / background.size())) if len(signal) > 0: print("Max/total signal pixels: %.0f / %.0f" % (flex.max(signal), flex.sum(signal))) else: print("No signal pixels on this image") print("Peak/background/masked pixels: %d / %d / %d" % (peak_pixels.count(True), background.size(), mask.count(False))) # compute histogram of two-theta values, then same weighted # by pixel values, finally divide latter by former to get # the radial profile out, need to set the number of bins # sensibly; inspired by method in PyFAI two_theta_array = panel.get_two_theta_array(beam.get_s0()) two_theta_array = two_theta_array.as_1d().select( background_pixels.iselection()) # Use flex.weighted_histogram h0 = flex.weighted_histogram(two_theta_array, n_slots=n_bins) h1 = flex.weighted_histogram(two_theta_array, background, n_slots=n_bins) h2 = flex.weighted_histogram(two_theta_array, background * background, n_slots=n_bins) d0 = h0.slots() d1 = h1.slots() d2 = h2.slots() I = d1 / d0 I2 = d2 / d0 sig = flex.sqrt(I2 - flex.pow2(I)) tt = h0.slot_centers() d_spacings = wavelength / (2.0 * flex.sin(0.5 * tt)) return d_spacings, I, sig
def exercise_gaussian_fit(): # test fitting of a gaussian def do_gaussian_fit(scale, mu, sigma): start = mu - 6 * sigma stop = mu + 6 * sigma step = (stop - start) / 1000 x = flex.double(frange(start, stop, step)) y = scale * flex.exp(-flex.pow2(x - mu) / (2 * sigma**2)) fit = curve_fitting.single_gaussian_fit(x, y) assert approx_equal(fit.a, scale, 1e-4) assert approx_equal(fit.b, mu, eps=1e-4) assert approx_equal(fit.c, sigma, eps=1e-4) for i in range(10): scale = random.random() * 1000 sigma = (random.random() + 0.0001) * 10 mu = (-1)**random.randint(0, 1) * random.random() * 1000 functor = curve_fitting.gaussian(scale, mu, sigma) start = mu - 6 * sigma stop = mu + 6 * sigma step = (stop - start) / 1000 x = flex.double(frange(start, stop, step)) fd_grads = finite_differences(functor, x) assert approx_equal(functor.partial_derivatives(x), fd_grads, 1e-4) do_gaussian_fit(scale, mu, sigma) # if we take the log of a gaussian we can fit a parabola scale = 123 mu = 3.2 sigma = 0.1 x = flex.double(frange(2, 4, 0.01)) y = scale * flex.exp(-flex.pow2(x - mu) / (2 * sigma**2)) # need to be careful to only use values of y > 0 eps = 1e-15 x = flex.double([x[i] for i in range(x.size()) if y[i] > eps]) y = flex.double([y[i] for i in range(y.size()) if y[i] > eps]) fit = curve_fitting.univariate_polynomial_fit(x, flex.log(y), degree=2) c, b, a = fit.params assert approx_equal(mu, -b / (2 * a)) assert approx_equal(sigma * sigma, -1 / (2 * a)) # test multiple gaussian fits gaussians = [ curve_fitting.gaussian(0.3989538, 3.7499764, 0.7500268), curve_fitting.gaussian(0.7978957, 6.0000004, 0.5000078) ] x = flex.double(frange(0, 10, 0.1)) y = flex.double(x.size()) for i in range(len(gaussians)): g = gaussians[i] scale, mu, sigma = g.a, g.b, g.c y += g(x) starting_gaussians = [ curve_fitting.gaussian(1, 4, 1), curve_fitting.gaussian(1, 5, 1) ] fit = curve_fitting.gaussian_fit(x, y, starting_gaussians) for g1, g2 in zip(gaussians, fit.gaussians): assert approx_equal(g1.a, g2.a, eps=1e-4) assert approx_equal(g1.b, g2.b, eps=1e-4) assert approx_equal(g1.c, g2.c, eps=1e-4) # use example of 5-gaussian fit from here: # http://research.stowers-institute.org/efg/R/Statistics/MixturesOfDistributions/index.htm gaussians = [ curve_fitting.gaussian(0.10516252, 23.32727, 2.436638), curve_fitting.gaussian(0.46462715, 33.09053, 2.997594), curve_fitting.gaussian(0.29827916, 41.27244, 4.274585), curve_fitting.gaussian(0.08986616, 51.24468, 5.077521), curve_fitting.gaussian(0.04206501, 61.31818, 7.070303) ] x = flex.double(frange(0, 80, 0.1)) y = flex.double(x.size()) for i in range(len(gaussians)): g = gaussians[i] scale, mu, sigma = g.a, g.b, g.c y += g(x) termination_params = scitbx.lbfgs.termination_parameters( min_iterations=500) starting_gaussians = [ curve_fitting.gaussian(1, 21, 2.1), curve_fitting.gaussian(1, 30, 2.8), curve_fitting.gaussian(1, 40, 2.2), curve_fitting.gaussian(1, 51, 1.2), curve_fitting.gaussian(1, 60, 2.3) ] fit = curve_fitting.gaussian_fit(x, y, starting_gaussians, termination_params=termination_params) y_calc = fit.compute_y_calc() assert approx_equal(y, y_calc, eps=1e-2) have_cma_es = libtbx.env.has_module("cma_es") if have_cma_es: fit = curve_fitting.cma_es_minimiser(starting_gaussians, x, y) y_calc = fit.compute_y_calc() assert approx_equal(y, y_calc, eps=5e-2)
def exercise_gaussian_fit(): # test fitting of a gaussian def do_gaussian_fit(scale, mu, sigma): start = mu - 6 * sigma stop = mu + 6 * sigma step = (stop - start)/1000 x = flex.double(frange(start, stop, step)) y = scale * flex.exp(-flex.pow2(x - mu) / (2 * sigma**2)) fit = curve_fitting.single_gaussian_fit(x, y) assert approx_equal(fit.a, scale, 1e-4) assert approx_equal(fit.b, mu, eps=1e-4) assert approx_equal(fit.c, sigma, eps=1e-4) for i in range(10): scale = random.random() * 1000 sigma = (random.random() + 0.0001) * 10 mu = (-1)**random.randint(0,1) * random.random() * 1000 functor = curve_fitting.gaussian(scale, mu, sigma) start = mu - 6 * sigma stop = mu + 6 * sigma step = (stop - start)/1000 x = flex.double(frange(start, stop, step)) fd_grads = finite_differences(functor, x) assert approx_equal(functor.partial_derivatives(x), fd_grads, 1e-4) do_gaussian_fit(scale, mu, sigma) # if we take the log of a gaussian we can fit a parabola scale = 123 mu = 3.2 sigma = 0.1 x = flex.double(frange(2, 4, 0.01)) y = scale * flex.exp(-flex.pow2(x - mu) / (2 * sigma**2)) # need to be careful to only use values of y > 0 eps = 1e-15 x = flex.double([x[i] for i in range(x.size()) if y[i] > eps]) y = flex.double([y[i] for i in range(y.size()) if y[i] > eps]) fit = curve_fitting.univariate_polynomial_fit(x, flex.log(y), degree=2) c, b, a = fit.params assert approx_equal(mu, -b/(2*a)) assert approx_equal(sigma*sigma, -1/(2*a)) # test multiple gaussian fits gaussians = [curve_fitting.gaussian(0.3989538, 3.7499764, 0.7500268), curve_fitting.gaussian(0.7978957, 6.0000004, 0.5000078)] x = flex.double(frange(0, 10, 0.1)) y = flex.double(x.size()) for i in range(len(gaussians)): g = gaussians[i] scale, mu, sigma = g.a, g.b, g.c y += g(x) starting_gaussians = [ curve_fitting.gaussian(1, 4, 1), curve_fitting.gaussian(1, 5, 1)] fit = curve_fitting.gaussian_fit(x, y, starting_gaussians) for g1, g2 in zip(gaussians, fit.gaussians): assert approx_equal(g1.a, g2.a, eps=1e-4) assert approx_equal(g1.b, g2.b, eps=1e-4) assert approx_equal(g1.c, g2.c, eps=1e-4) # use example of 5-gaussian fit from here: # http://research.stowers-institute.org/efg/R/Statistics/MixturesOfDistributions/index.htm gaussians = [curve_fitting.gaussian(0.10516252, 23.32727, 2.436638), curve_fitting.gaussian(0.46462715, 33.09053, 2.997594), curve_fitting.gaussian(0.29827916, 41.27244, 4.274585), curve_fitting.gaussian(0.08986616, 51.24468, 5.077521), curve_fitting.gaussian(0.04206501, 61.31818, 7.070303)] x = flex.double(frange(0, 80, 0.1)) y = flex.double(x.size()) for i in range(len(gaussians)): g = gaussians[i] scale, mu, sigma = g.a, g.b, g.c y += g(x) termination_params = scitbx.lbfgs.termination_parameters( min_iterations=500) starting_gaussians = [curve_fitting.gaussian(1, 21, 2.1), curve_fitting.gaussian(1, 30, 2.8), curve_fitting.gaussian(1, 40, 2.2), curve_fitting.gaussian(1, 51, 1.2), curve_fitting.gaussian(1, 60, 2.3)] fit = curve_fitting.gaussian_fit( x, y, starting_gaussians, termination_params=termination_params) y_calc = fit.compute_y_calc() assert approx_equal(y, y_calc, eps=1e-2) have_cma_es = libtbx.env.has_module("cma_es") if have_cma_es: fit = curve_fitting.cma_es_minimiser(starting_gaussians, x, y) y_calc = fit.compute_y_calc() assert approx_equal(y, y_calc, eps=5e-2)
def get_goniometer_shadow_masker(self, goniometer=None): from dials.util.masking import GoniometerShadowMaskGenerator from scitbx.array_family import flex import math coords = flex.vec3_double(((0, 0, 0), )) alpha = flex.double_range(0, 190, step=10) * math.pi / 180 r = flex.double(alpha.size(), 40) x = flex.double(r.size(), 107.61) y = -r * flex.sin(alpha) z = -r * flex.cos(alpha) coords.extend(flex.vec3_double(x, y, z)) coords.extend( flex.vec3_double(( # fixed (107.49, 7.84, 39.49), (107.39, 15.69, 38.97), (107.27, 23.53, 38.46), (107.16, 31.37, 37.94), (101.76, 33.99, 36.25), (96.37, 36.63, 34.56), (90.98, 39.25, 33.00), (85.58, 41.88, 31.18), (80.89, 47.06, 31.00), (76.55, 51.51, 31.03), (72.90, 55.04, 31.18), (66.86, 60.46, 31.67), (62.10, 64.41, 32.25), ))) alpha = flex.double_range(180, 370, step=10) * math.pi / 180 r = flex.double(alpha.size(), 33) x = (flex.sqrt(flex.pow2(r * flex.sin(alpha)) + 89.02**2) * flex.cos((50 * math.pi / 180) - flex.atan(r / 89.02 * flex.sin(alpha)))) y = (flex.sqrt(flex.pow2(r * flex.sin(alpha)) + 89.02**2) * flex.sin((50 * math.pi / 180) - flex.atan(r / 89.02 * flex.sin(alpha)))) z = -r * flex.cos(alpha) coords.extend(flex.vec3_double(x, y, z)) coords.extend( flex.vec3_double(( # fixed (62.10, 64.41, -32.25), (66.86, 60.46, -31.67), (72.90, 55.04, -31.18), (76.55, 51.51, -31.03), (80.89, 47.06, -31.00), (85.58, 41.88, -31.18), (90.98, 39.25, -33.00), (96.37, 36.63, -34.56), (101.76, 33.99, -36.25), (107.16, 31.37, -37.94), (107.27, 23.53, -38.46), (107.39, 15.69, -38.97), (107.49, 7.84, -39.49), (107.61, 0.00, -40.00)))) # I23 end station coordinate system: # X-axis: positive direction is facing away from the storage ring (from # sample towards goniometer) # Y-axis: positive direction is vertically up # Z-axis: positive direction is in the direction of the beam (from # sample towards detector) # K-axis (kappa): at an angle of +50 degrees from the X-axis # K & phi rotation axes: clockwise rotation is positive (right hand # thumb rule) # Omega-axis: along the X-axis; clockwise rotation is positive # End station x-axis is parallel to ImgCIF x-axis # End station z-axis points in opposite direction to ImgCIF definition # (ImgCIF: The Z-axis is derived from the source axis which goes from # the sample to the source) # Consequently end station y-axis (to complete set following right hand # rule) points in opposite direction to ImgCIF y-axis. # Kappa arm aligned with -y in ImgCIF convention from rstbx.cftbx.coordinate_frame_helpers import align_reference_frame from scitbx import matrix R = align_reference_frame(matrix.col((1, 0, 0)), matrix.col((1, 0, 0)), matrix.col((0, 1, 0)), matrix.col( (0, -1, 0))) coords = R.elems * coords if goniometer is None: goniometer = self.get_goniometer() return GoniometerShadowMaskGenerator(goniometer, coords, flex.size_t(len(coords), 1))
def lewis(template, img): """The lewis() function computes the normalised cross-correlation (NCC) of an image, @p img, and a template, @p template. Both image and template must be two-dimensional, real, and finite. The image must be larger than the template, which in turn should contain more than one pixel, and the template must have positive variance. The function returns the correlation coefficients in the range [-1, +1]. See Lewis, J. P. (1995) "Fast Template Matching", Vision Interface, 120-123. @note This function should be equivalent to MATLAB's normxcorr2() function. @param img Two-dimensional intensity image @param template Two-dimensional intensity template @return Correlation coefficients """ import math import numpy from sys import float_info from scitbx import fftpack from scitbx.array_family import flex # Assert that image and template are two-dimensional arrays, and # that the template is no larger than image. Assert that template # is not flat. XXX Check for real and finite, too? assert len(img.focus()) == 2 and len(template.focus()) == 2 assert img.focus()[0] >= template.focus()[0] and img.focus()[1] >= template.focus()[1] assert template.sample_standard_deviation() > 0 # For conformance with MATLAB's normxcorr2() and geck 342320: for # numerical robustness, ensure that both image and template are # always non-negative. img_nn = img - min(0, flex.min(img)) template_nn = template - min(0, flex.min(template)) # Calculate the terms of the denominator of gamma. Must guard # against negative variance of the image due to inaccuracies in the # one-pass formula. img_sum = _summed_area_table(img_nn, template_nn.focus()[0], template_nn.focus()[1]) img_ssq = _summed_area_table(flex.pow2(img_nn), template_nn.focus()[0], template_nn.focus()[1]) f_sigma = img_ssq - img_sum * img_sum / (template_nn.focus()[0] * template_nn.focus()[1]) f_sigma.set_selected(f_sigma < 0, 0) f_sigma = flex.sqrt(f_sigma) t_sigma = (template_nn - flex.mean(template_nn)).norm() gamma_denominator = f_sigma * t_sigma # Zero-pad the image to permit partial overlap of template and # image, and embed the time-reversed template in a zero-padded array # of the same size. Zero-padding means the entire template is # always overlapping the image, and terms involving the template # mean cancel in the expansion of the numerator of gamma. # # Note: the NCC demands the template to be time-reversed, which can # be accomplished by conjugation in the frequency domain. An # implementation following that approach would however require # special care to be taken for the first rows and columns: # # from numpy import roll # t_embed.matrix_paste_block_in_place( # block=template_nn, # i_row=full[0] - template_nn.focus()[0], # i_column=full[1] - template_nn.focus()[1]) # t_embed = flex.double(roll( # roll(t_embed.as_numpy_array(), 1, axis=0), 1, axis=1)) # # Calculate correlation in frequency domain. XXX Could use spatial # domain calculation in cases where it's faster (see MATLAB's # implementation). full = (img_nn.focus()[0] + template_nn.focus()[0] - 1, img_nn.focus()[1] + template_nn.focus()[1] - 1) f_embed = flex.double(flex.grid(full)) f_embed.matrix_paste_block_in_place(block=img_nn, i_row=0, i_column=0) f_prime = flex.complex_double(reals=f_embed, imags=flex.double(flex.grid(full))) t_embed = flex.double(flex.grid(full)) t_embed.matrix_paste_block_in_place(block=template_nn.matrix_rot90(2), i_row=0, i_column=0) t_prime = flex.complex_double(reals=t_embed, imags=flex.double(flex.grid(full))) fft = fftpack.complex_to_complex_2d(full) fft.forward(f_prime) fft.forward(t_prime) gamma_numerator = f_prime * t_prime fft.backward(gamma_numerator) gamma_numerator = flex.real(gamma_numerator) / (fft.n()[0] * fft.n()[1]) - img_sum * flex.mean(template_nn) # For conformance with MATLAB: set the NCC to zero in regions where # the image has zero variance over the extent of the template. If, # due to small variances in the image or the template, a correlation # coefficient falls outside the range [-1, 1], set it to zero to # reflect the undefined 0/0 condition. tol = math.sqrt(math.ldexp(float_info.epsilon, math.frexp(flex.max(flex.abs(gamma_denominator)))[1] - 1)) sel = gamma_denominator <= tol gamma = gamma_numerator.set_selected(sel, 0) / gamma_denominator.set_selected(sel, 1) gamma.set_selected(flex.abs(gamma) > 1 + math.sqrt(float_info.epsilon), 0) return gamma
def hyperbola(x, c): return flex.sqrt(flex.pow2(x) + c)
def run(args): from dials.util.options import OptionParser from dials.util.options import flatten_experiments usage = "dials.background [options] image_*.cbf" parser = OptionParser(usage=usage, phil=phil_scope, read_experiments=True, epilog=help_message) params, options = parser.parse_args(show_diff_phil=True) # Ensure we have either a data block or an experiment list experiments = flatten_experiments(params.input.experiments) if len(experiments) != 1: parser.print_help() return imagesets = experiments.imagesets() if len(imagesets) != 1: raise Sorry( "Please pass an experiment list that contains a single imageset") imageset = imagesets[0] first, last = imageset.get_scan().get_image_range() images = range(first, last + 1) if params.images: if min(params.images) < first or max(params.images) > last: raise Sorry("image outside of scan range") images = params.images d_spacings = [] intensities = [] sigmas = [] for indx in images: print("For image %d:" % indx) indx -= first # indices passed to imageset.get_raw_data start from zero d, I, sig = background(imageset, indx, n_bins=params.n_bins, mask_params=params.masking) print("%8s %8s %8s" % ("d", "I", "sig")) for j in range(len(I)): print("%8.3f %8.3f %8.3f" % (d[j], I[j], sig[j])) d_spacings.append(d) intensities.append(I) sigmas.append(sig) if params.plot: from matplotlib import pyplot fig = pyplot.figure() ax = fig.add_subplot(111) ax.set_xlabel(r"resolution ($\AA$)") ax.set_ylabel(r"$\langle I_b \rangle$") for d, I, sig in zip(d_spacings, intensities, sigmas): ds2 = 1 / flex.pow2(d) ax.plot(ds2, I) xticks = ax.get_xticks() x_tick_labs = [ "" if e <= 0.0 else "{:.2f}".format(math.sqrt(1.0 / e)) for e in xticks ] ax.set_xticklabels(x_tick_labs) pyplot.show()
def run(argv=None): """Compute mean, standard deviation, and maximum projection images from a set of CSPAD cbf images given on the command line. @param argv Command line argument list @return @c 0 on successful termination, @c 1 on error, and @c 2 for command line syntax errors """ import libtbx.load_env from libtbx import option_parser from scitbx.array_family import flex from dxtbx.format.Registry import Registry from xfel.cftbx.detector.cspad_cbf_tbx import cbf_file_to_basis_dict, write_cspad_cbf # from xfel.cxi.cspad_ana import cspad_tbx # from iotbx.detectors.cspad_detector_formats import reverse_timestamp if argv is None: argv = sys.argv command_line = (option_parser.option_parser( usage="%s [-v] [-a PATH] [-m PATH] [-s PATH] " \ "image1 image2 [image3 ...]" % libtbx.env.dispatcher_name) .option(None, "--average-path", "-a", type="string", default=None, dest="avg_path", metavar="PATH", help="Write average image to PATH") .option(None, "--maximum-path", "-m", type="string", default=None, dest="max_path", metavar="PATH", help="Write maximum projection image to PATH") .option(None, "--stddev-path", "-s", type="string", default=None, dest="stddev_path", metavar="PATH", help="Write standard deviation image to PATH") .option(None, "--verbose", "-v", action="store_true", default=False, dest="verbose", help="Print more information about progress") ).process(args=argv[1:]) # Note that it is not an error to omit the output paths, because # certain statistics could still be printed, e.g. with the verbose # option. paths = command_line.args if len(paths) == 0: command_line.parser.print_usage(file=sys.stderr) return 2 # Loop over all images and accumulate statistics. nfail = 0 nmemb = 0 for path in paths: if command_line.options.verbose: sys.stdout.write("Processing %s...\n" % path) try: # Promote the image to double-precision floating point type. # All real-valued flex arrays have the as_double() function. # Warn if the header items across the set of images do not match # up. Note that discrepancies regarding the image size are # fatal. if not 'reader' in locals(): reader = Registry.find(path) img = reader(path) if 'detector' in locals(): test_detector = img.get_detector() if len(test_detector) != len(detector): sys.stderr.write("Detectors do not have the same number of panels\n") return 1 for t, d in zip(test_detector, detector): if t.get_image_size() != d.get_image_size(): sys.stderr.write("Panel sizes do not match\n") return 1 if t.get_pixel_size() != d.get_pixel_size(): sys.stderr.write("Pixel sizes do not match\n") return 1 if t.get_d_matrix() != d.get_d_matrix(): sys.stderr.write("Detector panels are not all in the same location. The average will use the positions of the first image.\n") detector = test_detector else: detector = img.get_detector() data = [img.get_raw_data()[i].as_1d().as_double() for i in xrange(len(detector))] wavelength = img.get_beam().get_wavelength() distance = flex.mean(flex.double([d.get_directed_distance() for d in detector])) except Exception: nfail += 1 continue # The sum-of-squares image is accumulated using long integers, as # this delays the point where overflow occurs. But really, this # is just a band-aid... if nmemb == 0: max_img = copy.deepcopy(data) sum_distance = distance sum_img = copy.deepcopy(data) ssq_img = [flex.pow2(d) for d in data] sum_wavelength = wavelength metro = cbf_file_to_basis_dict(path) else: sel = [(d > max_d).as_1d() for d, max_d in zip(data, max_img)] for d, max_d, s in zip(data, max_img, sel): max_d.set_selected(s, d.select(s)) sum_distance += distance for d, sum_d in zip(data, sum_img): sum_d += d for d, ssq_d in zip(data, ssq_img): ssq_d += flex.pow2(d) sum_wavelength += wavelength nmemb += 1 # Early exit if no statistics were accumulated. if command_line.options.verbose: sys.stderr.write("Processed %d images (%d failed)\n" % (nmemb, nfail)) if nmemb == 0: return 0 # Calculate averages for measures where other statistics do not make # sense. Note that avg_img is required for stddev_img. avg_img = [sum_d.as_double() / nmemb for sum_d in sum_img] avg_distance = sum_distance / nmemb avg_wavelength = sum_wavelength / nmemb def make_tiles(data, detector): """ Assemble a tiles dictionary as required by write_cspad_cbf, consisting of 4 arrays of shape 8x185x388. Assumes the order in the data array matches the order of the enumerated detector panels. """ assert len(data) == 64 tiles = {} s, f = 185, 194 for q_id in xrange(4): tiles[0,q_id] = flex.double((flex.grid(s*8, f*2))) for s_id in xrange(8): for a_id in xrange(2): asic_idx = (q_id*16) + (s_id*2) + a_id asic = data[asic_idx] asic.reshape(flex.grid((s, f))) tiles[0, q_id].matrix_paste_block_in_place(asic, s_id*s, a_id*f) tiles[0, q_id].reshape(flex.grid((8, s, f*2))) return tiles # Output the average image, maximum projection image, and standard # deviation image, if requested. if command_line.options.avg_path is not None: tiles = make_tiles(avg_img, detector) write_cspad_cbf(tiles, metro, 'cbf', None, command_line.options.avg_path, avg_wavelength, avg_distance) if command_line.options.max_path is not None: tiles = make_tiles(max_img, detector) write_cspad_cbf(tiles, metro, 'cbf', None, command_line.options.max_path, avg_wavelength, avg_distance) if command_line.options.stddev_path is not None: stddev_img = [ssq_d.as_double() - sum_d.as_double() * avg_d for ssq_d, sum_d, avg_d in zip(ssq_img, sum_img, avg_img)] # Accumulating floating-point numbers introduces errors, which may # cause negative variances. Since a two-pass approach is # unacceptable, the standard deviation is clamped at zero. for stddev_d in stddev_img: stddev_d.set_selected(stddev_d < 0, 0) if nmemb == 1: stddev_img = [flex.sqrt(stddev_d) for stddev_d in stddev_img] else: stddev_img = [flex.sqrt(stddev_d / (nmemb - 1)) for stddev_d in stddev_img] tiles = make_tiles(stddev_img, detector) write_cspad_cbf(tiles, metro, 'cbf', None, command_line.options.stddev_path, avg_wavelength, avg_distance) return 0
def wilson_intensity_variate(N=100): "Wilson intensity variate; The exponetial distribution" return flex.pow2(wilson_amplitude_variate(N=N))
def exercise(): """Test prepare_map_for_docking using data with known errors.""" # Generate two half-maps with same anisotropic signal, independent anisotropic # noise. Test to see how well optimal map coefficients are estimated. # Start by working out how large the padding will have to be so that # starting automatically-generated map will be large enough to contain # sphere with room to spare around model. n_residues = 25 d_min = 2.5 from cctbx.development.create_models_or_maps import generate_model test_model = generate_model(n_residues=n_residues) sites_cart = test_model.get_sites_cart() cart_min = flex.double(sites_cart.min()) cart_max = flex.double(sites_cart.max()) box_centre = (cart_min + cart_max) / 2 dsqrmax = flex.max((sites_cart - tuple(box_centre)).norms())**2 model_radius = math.sqrt(dsqrmax) min_model_extent = flex.min(cart_max - cart_min) pad_to_allow_cube = model_radius - min_model_extent / 2 # Extra space needed for eventual masking boundary_to_smoothing_ratio = 2 soft_mask_radius = d_min padding = soft_mask_radius * boundary_to_smoothing_ratio box_cushion = padding + pad_to_allow_cube + d_min # A bit extra # Make map in box big enough to cut out cube containing sphere mmm = map_model_manager() mmm.generate_map(n_residues=n_residues, d_min=d_min, k_sol=0.1, b_sol=50., box_cushion=box_cushion) # Keep copy of perfect map for tests of success mm_start = mmm.map_manager().deep_copy() mmm.add_map_manager_by_id(mm_start, 'perfect_map') model = mmm.model() sites_cart = model.get_sites_cart() cart_min = flex.double(sites_cart.min()) cart_max = flex.double(sites_cart.max()) # Turn starting map into map coeffs for the signal ucpars = mmm.map_manager().unit_cell().parameters() d_max = max(ucpars[0], ucpars[1], ucpars[2]) start_map_coeffs = mmm.map_as_fourier_coefficients(d_min=d_min, d_max=d_max) # Apply anisotropic scaling to map coeffs b_target = (100., 200., 300., -50., 50., 100.) u_star_s = adptbx.u_cart_as_u_star(start_map_coeffs.unit_cell(), adptbx.b_as_u(b_target)) # b_model = (30.,30.,30.,0.,0.,0.) # All atoms in model have B=30 # b_expected = list((flex.double(b_target) + flex.double(b_model))) scaled_map_coeffs = start_map_coeffs.apply_debye_waller_factors( u_star=u_star_s) # Generate map coefficient errors for first half-map from complex normal # distribution b_target_e = (0., 0., 0., -50., -50., 100.) # Anisotropy for error terms u_star_e = adptbx.u_cart_as_u_star(start_map_coeffs.unit_cell(), adptbx.b_as_u(b_target_e)) se_target = 10. # Target for SigmaE variance term rsigma = math.sqrt(se_target / 2.) jj = 0. + 1.j # Define I for generating complex numbers random_complexes1 = flex.complex_double() ncoeffs = start_map_coeffs.size() random.seed(123457) # Make runs reproducible for i in range(ncoeffs): random_complexes1.append( random.gauss(0., rsigma) + random.gauss(0., rsigma) * jj) rc1_miller = start_map_coeffs.customized_copy(data=random_complexes1) mc1_delta = rc1_miller.apply_debye_waller_factors(u_star=u_star_e) map1_coeffs = scaled_map_coeffs.customized_copy( data=scaled_map_coeffs.data() + mc1_delta.data()) # Repeat for second half map with independent errors from same distribution random_complexes2 = flex.complex_double() for i in range(ncoeffs): random_complexes2.append( random.gauss(0., rsigma) + random.gauss(0., rsigma) * jj) rc2_miller = start_map_coeffs.customized_copy(data=random_complexes2) mc2_delta = rc2_miller.apply_debye_waller_factors(u_star=u_star_e) map2_coeffs = scaled_map_coeffs.customized_copy( data=scaled_map_coeffs.data() + mc2_delta.data()) # mmm.write_model("fake_map.pdb") mmm.add_map_from_fourier_coefficients(map1_coeffs, map_id='map_manager_1') mmm.add_map_from_fourier_coefficients(map2_coeffs, map_id='map_manager_2') # Replace original map_manager with mean of half-maps mm_mean_data = (mmm.map_manager_1().map_data() + mmm.map_manager_2().map_data()) / 2 mmm.map_manager().set_map_data(map_data=mm_mean_data) # Add mask map for ordered component of map protein_mw = n_residues * 110. # MW from model would be better... nucleic_mw = None mask_id = 'ordered_volume_mask' add_ordered_volume_mask(mmm, d_min, protein_mw=protein_mw, nucleic_mw=nucleic_mw, map_id_out=mask_id) box_centre = tuple(flex.double((ucpars[0], ucpars[1], ucpars[2])) / 2) # Now refine to assess parameters describing map errors results = assess_cryoem_errors(mmm, d_min, sphere_cent=tuple(box_centre), radius=model_radius + d_min, verbosity=0) # resultsdict = results.resultsdict # b_refined_a = resultsdict["a_baniso"] # print("\nIdeal A tensor as Baniso: ", b_expected) # print("Refined A tensor as Baniso", b_refined_a) # Note that all maps have been cut out with a spherical mask, so compare using these new_mmm = results.new_mmm perfect_mapCC = new_mmm.map_model_cc(map_id='perfect_map') mapCC = new_mmm.map_model_cc(map_id='map_manager_wtd') # Achieved map start_mapCC = new_mmm.map_model_cc( ) # Starting map with noise and anisotropy mc_perfect = new_mmm.map_as_fourier_coefficients(d_min=d_min, d_max=d_max, map_id='perfect_map') mc_achieved = new_mmm.map_as_fourier_coefficients(d_min=d_min, d_max=d_max, map_id='map_manager_wtd') # Compare with results using theoretically perfect error parameters to compute # ideal map coefficients. sigmaS_terms = flex.pow2(get_power_spectrum( mc_perfect)) # Actual signal power before anisotropy mc_start = new_mmm.map_as_fourier_coefficients(d_min=d_min, d_max=d_max) eE_ideal = mc_start.deep_copy() ones_array = flex.double(eE_ideal.size(), 1) all_ones = eE_ideal.customized_copy(data=ones_array) u_star_s2 = tuple(flex.double(u_star_s) * 2.) # Square anisotropy for signal power calc sigmaS_terms = sigmaS_terms * all_ones.apply_debye_waller_factors( u_star=u_star_s2).data() # Corrected for anisotropy u_star_e2 = tuple(flex.double(u_star_e) * 2.) sigmaE_terms = all_ones.apply_debye_waller_factors( u_star=u_star_e2).data() * se_target scale_terms = 1. / flex.sqrt(sigmaS_terms + sigmaE_terms / 2.) dobs_terms = 1. / flex.sqrt(1. + sigmaE_terms / (2 * sigmaS_terms)) mc_ideal = eE_ideal.customized_copy(data=eE_ideal.data() * scale_terms * dobs_terms) # write_mtz(mc_achieved,"achieved_map.mtz","achieved") # write_mtz(mc_ideal,"ideal_map.mtz","ideal") mapCC_ideal_achieved = mc_ideal.map_correlation(other=mc_achieved) # print("CC between ideal and achieved maps:",mapCC_ideal_achieved) assert (mapCC_ideal_achieved > 0.92) new_mmm.add_map_from_fourier_coefficients(mc_ideal, map_id='ideal_map') ideal_mapCC = new_mmm.map_model_cc(map_id='ideal_map') # print("Perfect, starting, ideal and achieved mapCC: ", perfect_mapCC, start_mapCC, ideal_mapCC, mapCC) assert (mapCC > 0.98 * ideal_mapCC)
def _estimate_cc_sig_fac(self): """Estimation of sigma(CC) as a function of sample size. Estimate the error in the correlation coefficient, sigma(CC) by using pairs of reflections at similar resolutions that are not related by potential symmetry. Using pairs of unrelated reflections at similar resolutions, calculate sigma(CC) == rms(CC) for groups of size N = 3..200. The constant CCsigFac is obtained from a linear fit of sigma(CC) to 1/N^(1/2), i.e.: sigma(CC) = CCsigFac/N^(1/2) """ max_bins = 500 reflections_per_bin = max( 200, int(math.ceil(self.intensities.size() / max_bins))) binner = self.intensities.setup_binner_counting_sorted( reflections_per_bin=reflections_per_bin) a = flex.double() b = flex.double() ma_tmp = self.intensities.customized_copy( crystal_symmetry=crystal.symmetry( space_group=self.lattice_group, unit_cell=self.intensities.unit_cell(), assert_is_compatible_unit_cell=False, )).map_to_asu() for i in range(binner.n_bins_all()): count = binner.counts()[i] if count == 0: continue bin_isel = binner.array_indices(i) p = flex.random_permutation(count) p = p[:2 * (count // 2)] # ensure even count ma_a = ma_tmp.select(bin_isel.select(p[:count // 2])) ma_b = ma_tmp.select(bin_isel.select(p[count // 2:])) # only choose pairs of reflections that don't have the same indices # in the asu of the lattice group sel = ma_a.indices() != ma_b.indices() a.extend(ma_a.data().select(sel)) b.extend(ma_b.data().select(sel)) perm = flex.random_selection(a.size(), min(20000, a.size())) a = a.select(perm) b = b.select(perm) self.corr_unrelated = CorrelationCoefficientAccumulator(a, b) n_pairs = a.size() min_num_groups = 10 # minimum number of groups max_n_group = int(min(n_pairs / min_num_groups, 200)) # maximum number in group min_n_group = int(min(5, max_n_group)) # minimum number in group if (max_n_group - min_n_group) < 4: self.cc_sig_fac = 0 return mean_ccs = flex.double() rms_ccs = flex.double() ns = flex.double() for n in range(min_n_group, max_n_group + 1): ns.append(n) ccs = flex.double() for i in range(200): isel = flex.random_selection(a.size(), n) corr = CorrelationCoefficientAccumulator( a.select(isel), b.select(isel)) ccs.append(corr.coefficient()) mean_ccs.append(flex.mean(ccs)) rms_ccs.append(flex.mean(flex.pow2(ccs))**0.5) x = 1 / flex.pow(ns, 0.5) y = rms_ccs fit = flex.linear_regression(x, y) if fit.is_well_defined(): self.cc_sig_fac = fit.slope() else: self.cc_sig_fac = 0
def generate_test_data(space_group, lattice_group=None, unit_cell=None, unit_cell_volume=1000, seed=0, d_min=1, sigma=0.1, sample_size=100, map_to_p1=False, twin_fractions=None): import random import scitbx.random if seed is not None: flex.set_random_seed(seed) random.seed(seed) assert [unit_cell, lattice_group].count(None) > 0 sgi = space_group.info() if unit_cell is not None: from cctbx import crystal cs = crystal.symmetry(unit_cell=unit_cell, space_group_info=sgi) elif lattice_group is not None: from cctbx.sgtbx.subgroups import subgroups subgrps = subgroups(lattice_group).groups_parent_setting() assert space_group in subgrps cs = lattice_group.any_compatible_crystal_symmetry( volume=unit_cell_volume).customized_copy( space_group_info=sgi) else: cs = sgi.any_compatible_crystal_symmetry(volume=unit_cell_volume) intensities = generate_intensities(cs, d_min=d_min) intensities.show_summary() twin_ops = generate_twin_operators(intensities) twin_ops = [sgtbx.change_of_basis_op(op.operator.as_xyz()) for op in twin_ops] if twin_fractions is not None: assert len(twin_fractions) == len(twin_ops) assert len(twin_fractions) == 1, 'Only 1 twin component currently supported' twin_op = twin_ops[0] twin_fraction = twin_fractions[0] intensities, intensities_twin = intensities.common_sets( intensities.change_basis(twin_op).map_to_asu()) twinned_miller = intensities.customized_copy( data=(1.0 - twin_fraction) * intensities.data() \ + twin_fraction * intensities_twin.data(), sigmas=flex.sqrt( flex.pow2(((1.0 - twin_fraction) * intensities.sigmas())) + \ flex.pow2((twin_fraction * intensities_twin.sigmas()))) ) intensities = twinned_miller cb_ops = twin_ops cb_ops.insert(0, sgtbx.change_of_basis_op()) reindexing_ops = {} datasets = [] rand_norm = scitbx.random.normal_distribution( mean=0, sigma=sigma) g = scitbx.random.variate(rand_norm) for i in range(sample_size): cb_op = random.choice(cb_ops) if cb_op.as_xyz() not in reindexing_ops: reindexing_ops[cb_op.as_xyz()] = set() reindexing_ops[cb_op.as_xyz()].add(i) d = intensities.change_basis(cb_op).customized_copy( crystal_symmetry=intensities.crystal_symmetry()) if map_to_p1: cb_op_to_primitive = d.change_of_basis_op_to_primitive_setting() d = d.change_basis(cb_op_to_primitive) d = d.expand_to_p1() d = d.customized_copy(data=d.data()+g(d.size())) datasets.append(d) return datasets, reindexing_ops
def rms(flex_double): return math.sqrt(flex.mean(flex.pow2(flex_double)))