def target(self, vector): self.counter += 1 result = 0 length = flex.sum(flex.pow(vector, 2)) if (length > self.Rmax2): result = 100000000000000 else: new_coord = self.pdb.NMPerturb(self.modes, vector) self.pdb.model.updateDistArray(flex.vec3_double(new_coord)) dist_array = self.pdb.model.getDistArray() new_pr = self.pdb.model.Histogram(dist_array, self.dMax, self.n_slot) if (self.weighted): if (self.scale == 0): self.scale = float(flex.mean( flex.abs(self.expt - new_pr))) / float( flex.mean(self.expt)) self.scale2 = self.scale * self.scale result = flex.pow((self.expt - new_pr), 2.0) result = flex.sum( flex.exp(-self.scale2 * self.expt2 / (result + 10 - 12)) * result) else: result = flex.sum(flex.pow((self.expt - new_pr), 2.0)) result = result * 100 return result
def compute_functional_and_gradients(self): self.a = self.x y_calc = flex.double(self.x_obs.size(),0) for i in range(self.n): y_calc = y_calc + (self.a[i])*flex.pow(self.x_obs,i) y_diff = self.y_obs - y_calc f = flex.sum(y_diff*y_diff/self.w_obs) g = flex.double(self.n,0) for i in range(self.n): g[i] = -flex.sum( 2.0*(y_diff/self.w_obs)*flex.pow(self.x_obs,i) ) print f return f, g
def single_level_curve(self, gi, rgi, bi, rgcfi, pi, q=None): gi = abs(gi) rgi = abs(rgi) bi = abs(bi) rgcfi = abs(rgcfi) if q is None: q = self.q result = abs(gi) * flex.exp(-q * q * rgi * rgi / 3.0) + bi * flex.exp( -q * q * rgcfi * rgcfi / 3.0) * flex.pow( flex.pow(sm.erf(q * rgi * self.cnst), 3.0) / (q + self.eps), pi) return result
def compute_functional_and_gradients(self): self.a = self.x y_calc = flex.double(self.x_obs.size(), 0) for i in range(self.n): y_calc = y_calc + (self.a[i]) * flex.pow(self.x_obs, i) y_diff = self.y_obs - y_calc f = flex.sum(y_diff * y_diff / self.w_obs) g = flex.double(self.n, 0) for i in range(self.n): g[i] = -flex.sum(2.0 * (y_diff / self.w_obs) * flex.pow(self.x_obs, i)) print f return f, g
def estimate_error(self): for ii in xrange(self.n_trial): pr = self.trials[ii].get_best_pofr() pr.pofx.normalize() pofr = pr.f(self.r) self.mean_pr = self.mean_pr + pofr self.mean2 = self.mean2 + flex.pow(pofr, 2.0) if (ii == self.chi_index): self.best_pr = pofr self.mean_pr /= self.n_trial self.mean2 /= self.n_trial self.error = flex.pow(self.mean2 - flex.pow(self.mean_pr, 2.0), 0.5)
def target(self,vector): scales, offsets = self.get_scales_offsets(vector) dr = self.get_mean(scales,offsets) result = 0 for jj in xrange(self.n_sets): dj = scales[jj]*(self.means[jj]+offsets[jj]) vj = self.vars[jj]*scales[jj]*scales[jj] t = flex.pow((dj-dr),2)/( 1e-13 + vj ) if self.n_sets != 2: result += flex.sum( t ) else: if jj != self.ref: vr = self.vars[self.ref] result += flex.sum(flex.pow((dj-dr),2)/( 1e-13 + vj + vr )) return result
def do_polynomial_fit(x, params): n_terms = len(params) y = flex.double(x.size()) for i in range(len(params)): y += params[i] * flex.pow(x, i) fit = curve_fitting.univariate_polynomial_fit(x, y, degree=n_terms-1) assert approx_equal(params, fit.params, eps=1e-4)
def target(self, vector): """ Compute the functional by first applying the current values for the sd parameters to the input data, then computing the complete set of normalized deviations and finally using those normalized deviations to compute the functional.""" sdfac, sdb, sdadd = vector[0],0.0,vector[1] a_new_variance, b_new_variance = ccp4_model.apply_sd_error_params( vector, a_data, b_data, a_sigmas, b_sigmas) mean_num = (a_data/ (a_new_variance) ) + (b_data/ (b_new_variance) ) mean_den = (1./ (a_new_variance) ) + (1./ (b_new_variance) ) mean_values = mean_num / mean_den delta_I_a = a_data - mean_values normal_a = delta_I_a / flex.sqrt(a_new_variance) delta_I_b = b_data - mean_values normal_b = delta_I_b / flex.sqrt(b_new_variance) mean_order = flex.sort_permutation(mean_values) scatters = flex.double(50) scattersb = flex.double(50) for isubsection in xrange(50): subselect = mean_order[isubsection*len(mean_order)//50:(isubsection+1)*len(mean_order)//50] vals = normal_a.select(subselect) scatters[isubsection] = flex.mean_and_variance(vals).unweighted_sample_variance() valsb = normal_b.select(subselect) scattersb[isubsection] = flex.mean_and_variance(valsb).unweighted_sample_variance() f = flex.sum( flex.pow(1.-scatters, 2) ) print "f: % 12.1f, sdfac: %8.5f, sdb: %8.5f, sdadd: %8.5f"%(f, sdfac, sdb, sdadd) return f
def target(self, vector): """ Compute the functional by first applying the current values for the sd parameters to the input data, then computing the complete set of normalized deviations and finally using those normalized deviations to compute the functional.""" sdfac, sdb, sdadd = vector[0],0.0,vector[1] a_new_variance, b_new_variance = ccp4_model.apply_sd_error_params( vector, a_data, b_data, a_sigmas, b_sigmas) mean_num = (a_data/ (a_new_variance) ) + (b_data/ (b_new_variance) ) mean_den = (1./ (a_new_variance) ) + (1./ (b_new_variance) ) mean_values = mean_num / mean_den delta_I_a = a_data - mean_values normal_a = delta_I_a / flex.sqrt(a_new_variance) delta_I_b = b_data - mean_values normal_b = delta_I_b / flex.sqrt(b_new_variance) mean_order = flex.sort_permutation(mean_values) scatters = flex.double(50) scattersb = flex.double(50) for isubsection in range(50): subselect = mean_order[isubsection*len(mean_order)//50:(isubsection+1)*len(mean_order)//50] vals = normal_a.select(subselect) scatters[isubsection] = flex.mean_and_variance(vals).unweighted_sample_variance() valsb = normal_b.select(subselect) scattersb[isubsection] = flex.mean_and_variance(valsb).unweighted_sample_variance() f = flex.sum( flex.pow(1.-scatters, 2) ) print "f: % 12.1f, sdfac: %8.5f, sdb: %8.5f, sdadd: %8.5f"%(f, sdfac, sdb, sdadd) return f
def get_rg(self): r = self.dmax * flex.double(range(1, 101)) / 100.0 pr = self.f(r) rg2 = flex.sum(flex.pow(r, 2.0) * pr) norma = flex.sum(pr) rg = math.sqrt(rg2 / norma) / 1.414 return rg
def target(self, vector): scales, offsets = self.get_scales_offsets(vector) dr = self.get_mean(scales, offsets) result = 0 for jj in range(self.n_sets): dj = scales[jj] * (self.means[jj] + offsets[jj]) vj = self.vars[jj] * scales[jj] * scales[jj] t = flex.pow((dj - dr), 2) / (1e-13 + vj) if self.n_sets != 2: result += flex.sum(t) else: if jj != self.ref: vr = self.vars[self.ref] result += flex.sum( flex.pow((dj - dr), 2) / (1e-13 + vj + vr)) return result
def do_polynomial_fit(x, params): n_terms = len(params) y = flex.double(x.size()) for i in range(len(params)): y += params[i] * flex.pow(x, i) fit = curve_fitting.univariate_polynomial_fit(x, y, degree=n_terms - 1) assert approx_equal(params, fit.params, eps=1e-4)
def test_log_inv_fit(): x = flex.double(range(0, 100)) * 0.01 p = (1, 2) yo = flex.double(x.size()) for i in range(len(p)): yo += 1 / flex.exp(p[i] * flex.pow(x, i)) yf = resolution_analysis.log_inv_fit(x, yo, degree=2) assert yo == pytest.approx(yf, abs=1e-2)
def combined_scores(self): scores = sum( flex.pow(score.as_double(), self.power) for score in ( self.score_by_fraction_indexed(), self.score_by_volume(), self.score_by_rmsd_xy(), )) return scores
def test_polynomial_fit(): x = flex.double(range(-50, 50)) p = (2, 3, 5) yo = flex.double(x.size()) for i in range(len(p)): yo += p[i] * flex.pow(x, i) yf = resolution_analysis.polynomial_fit(x, yo, degree=2) assert yo == pytest.approx(yf)
def adobe_rgb_to_xyz(r, g, b): '''Convert adobe encoded RGB values to x, y, z linearised values.''' from scitbx.array_family import flex scale = 1.0 / 255.0 gamma = 563.0 / 256.0 rl = flex.pow(r * scale, gamma) gl = flex.pow(g * scale, gamma) bl = flex.pow(b * scale, gamma) # matrix from https://www.adobe.com/digitalimag/pdfs/AdobeRGB1998.pdf x = 0.57557 * rl + 0.18556 * gl + 0.18823 * bl y = 0.29734 * rl + 0.62736 * gl + 0.07529 * bl z = 0.02703 * rl + 0.07069 * gl + 0.99134 * bl return x, y, z
def parabola_powered(x, k=None): x_min = flex.min(x) x_max = flex.max(x) assert(x_min>=-1) assert(x_max<=1) base = 1 - x*x if k is not None: base = flex.pow( base, k ) return base
def get_norm_pr_with_var(self): self.calc_inter_hist() self.pr = self.intra_pr + self.inter_pr if (self.total_n_pair == 0): self.total_n_pair = sum(self.pr) self.norm_pr = self.pr / self.total_n_pair self.var = flex.pow(self.var, 0.5) self.var = self.var / self.total_n_pair return self.norm_pr, self.var
def base(self, x): x_min = flex.min(x) x_max = flex.max(x) assert (x_min >= -1) assert (x_max <= 1) base_function = (1 - x * x) * 3.0 / 4.0 if self.k is not None: base_function = flex.pow(base_function, k) return base_function
def estimate_cc_sig_fac(self): # A1.1. Estimation of sigma(CC) as a function of sample size. binner = self.intensities.setup_binner_counting_sorted(reflections_per_bin=200) a = flex.double() b = flex.double() for i in range(binner.n_bins_all()): count = binner.counts()[i] if count == 0: continue bin_isel = binner.array_indices(i) p = flex.random_permutation(count) p = p[:2 * (count // 2)] # ensure even count a.extend(self.intensities.data().select(bin_isel.select(p[:count//2]))) b.extend(self.intensities.data().select(bin_isel.select(p[count//2:]))) perm = flex.random_selection(a.size(), min(20000, a.size())) a = a.select(perm) b = b.select(perm) self.corr_unrelated = CorrelationCoefficientAccumulator(a, b) n_pairs = a.size() min_num_groups = 10 # minimum number of groups max_n_group = int(min(n_pairs/min_num_groups, 200)) # maximum number in group min_n_group = int(min(5, max_n_group)) # minimum number in group mean_ccs = flex.double() rms_ccs = flex.double() ns = flex.double() for n in range(min_n_group, max_n_group): ns.append(n) ccs = flex.double() for i in range(200): isel = flex.random_selection(a.size(), n) corr = CorrelationCoefficientAccumulator(a.select(isel), b.select(isel)) ccs.append(corr.coefficient()) mean_ccs.append(flex.mean(ccs)) rms_ccs.append(flex.mean(flex.pow2(ccs))**0.5) x = 1/flex.pow(ns, 0.5) y = rms_ccs fit = flex.linear_regression(x, y) assert fit.is_well_defined() self.cc_sig_fac = fit.slope() if 0: from matplotlib import pyplot as plt plt.plot(x, y) plt.plot( plt.xlim(), [fit.slope() * x_ + fit.y_intercept() for x_ in plt.xlim()]) plt.show()
def apply_sd_error_params(vector, a_data, b_data, a_sigmas, b_sigmas): sdfac, sdb, sdadd = vector[0],0,vector[1] a_variance = a_sigmas * a_sigmas b_variance = b_sigmas * b_sigmas mean_num = (a_data/ (a_variance) ) + (b_data/ (b_variance) ) mean_den = (1./ (a_variance) ) + (1./ (b_variance) ) mean_values = mean_num / mean_den I_mean_dependent_part = sdb * mean_values + flex.pow(sdadd * mean_values, 2) a_new_variance = sdfac*sdfac * ( a_variance + I_mean_dependent_part ) b_new_variance = sdfac*sdfac * ( b_variance + I_mean_dependent_part ) return a_new_variance, b_new_variance
def target(self, vector): self.pofr.update(vector) if (self.outofbox(vector)): # print "&" return 10e12 calc_data = self.pofr.i(self.data.q) score = calc_data - self.data.i score = flex.pow(score, 2.0) #score = flex.pow(score/self.data.s,2.0) score = flex.sum(score) / calc_data.size() t, a, b = self.pofr.entropy_simple() total_score = score + t * self.alpha #print "#",list(vector), total_score return total_score
def __init__(self, she_object, observed_data): # we'll only optimize the scale factor and form factor of excluded solvent self.rm = 1.62 self.rho = 0.334 self.drho = 0.03 self.obs = observed_data self.she_object = she_object self.rm_fluct_scale = -(4.0 * smath.pi / 3.0)**1.5 * smath.pi * flex.pow( self.obs.q, 2.0) * self.rm**2.0 ### setup the scan range ### self.default_a = 1.0 self.a_range = flex.double(range(-10, 11)) / 50.0 + self.default_a self.drho_range = (flex.double(range(-10, 21)) / 10.0 + 1.0) * self.drho self.scan()
def target(self, vector): self.counter += 1 result = 0 length = flex.sum(flex.pow(vector, 2)) if (length > self.Rmax2): result = 1e30 else: new_coord = self.pdb.NMPerturb(self.modes, vector) t1 = time.time() self.she_engine.engine.update_coord(flex.vec3_double(new_coord), self.new_indx) new_I = self.she_engine.engine.I() self.time_she += (time.time() - t1) var = self.expt_s s, o = she.linear_fit(new_I, self.expt_I, var) result = flex.sum( flex.pow2((self.expt_I - (s * new_I + o)) / self.expt_s)) return result
def get_r_free_stats (miller_array, test_flag_value) : from scitbx.array_family import flex array = get_r_free_as_bool(miller_array, test_flag_value) n_free = array.data().count(True) accu = array.sort(by_value="resolution").r_free_flags_accumulation() lr = flex.linear_regression(accu.reflection_counts.as_double(), accu.free_fractions) assert lr.is_well_defined() slope = lr.slope() y_ideal = accu.reflection_counts.as_double() * slope sse = 0 n_bins = 0 n_ref_last = 0 sse = flex.sum(flex.pow(y_ideal - accu.free_fractions, 2)) for x in accu.reflection_counts : if x > (n_ref_last + 1) : n_bins += 1 n_ref_last = x return (n_bins, n_free, sse, accu)
def get_r_free_stats(miller_array, test_flag_value): from scitbx.array_family import flex array = get_r_free_as_bool(miller_array, test_flag_value) n_free = array.data().count(True) accu = array.sort(by_value="resolution").r_free_flags_accumulation() lr = flex.linear_regression(accu.reflection_counts.as_double(), accu.free_fractions) assert lr.is_well_defined() slope = lr.slope() y_ideal = accu.reflection_counts.as_double() * slope sse = 0 n_bins = 0 n_ref_last = 0 sse = flex.sum(flex.pow(y_ideal - accu.free_fractions, 2)) for x in accu.reflection_counts: if x > (n_ref_last + 1): n_bins += 1 n_ref_last = x return (n_bins, n_free, sse, accu)
def __init__(self, start_pdb, target_pr, ntotal, nmodes, max_rmsd, backbone_scale, prefix): self.counter = 0 self.nmode_init = ntotal self.nmodes = nmodes self.topn = 10 self.Niter = 0 self.modes = flex.int(range(self.nmode_init)) + 7 self.cutoff = 10 self.weighted = True ##### for histogram ## r, self.expt = self.readPr(target_pr) self.expt2 = flex.pow(self.expt, 2.0) #print list(self.expt) start_name = start_pdb self.pdb = PDB(start_name) self.natom = self.pdb.natm self.scale_factor = backbone_scale self.pdb.Hessian = self.pdb.Hessian(self.cutoff, self.nmode_init, self.scale_factor) self.root = prefix self.dMax = max(r) self.n_slot = r.size() self.scale = 0 self.drmsd = max_rmsd self.Rmax2 = self.natom * (self.drmsd)**2.0 self.step_size = sqrt(self.Rmax2 / self.nmodes) * 2.0 self.new_indx = flex.int(range(self.natom)) self.stop = False self.minscore = 1e20 self.minDev = 0 #minimum deviations of refined structures, compared to refined structure from the previous step self.optNum = 20 #number of iterations between geometry optimization self.iterate()
def __call__(self, x_obs): y_calc = flex.double(x_obs.size()) for n in range(self.n_terms): y_calc += self.params[n] * flex.pow(x_obs, n) return y_calc
def run_cc(params, reindexing_op, output): uniform, selected_uniform, have_iso_ref = load_cc_data(params, reindexing_op, output) NBIN = params.output.n_bins if have_iso_ref: slope, offset, corr_iso, N_iso = correlation(selected_uniform[1], selected_uniform[0], params.include_negatives) print >> output, "C.C. iso is %.1f%% on %d indices" % (100 * corr_iso, N_iso) slope, offset, corr_int, N_int = correlation(selected_uniform[2], selected_uniform[3], params.include_negatives) print >> output, "C.C. int is %.1f%% on %d indices" % (100.0 * corr_int, N_int) if have_iso_ref: binned_cc_ref, binned_cc_ref_N = binned_correlation( selected_uniform[1], selected_uniform[0], params.include_negatives ) # binned_cc_ref.show(f=output) ref_scale = scale_factor( selected_uniform[1], selected_uniform[0], weights=flex.pow(selected_uniform[1].sigmas(), -2), use_binning=True, ) # ref_scale.show(f=output) ref_riso = r1_factor(selected_uniform[1], selected_uniform[0], scale_factor=ref_scale, use_binning=True) # ref_riso.show(f=output) ref_scale_all = scale_factor( selected_uniform[1], selected_uniform[0], weights=flex.pow(selected_uniform[1].sigmas(), -2) ) ref_riso_all = r1_factor(selected_uniform[1], selected_uniform[0], scale_factor=ref_scale_all) binned_cc_int, binned_cc_int_N = binned_correlation( selected_uniform[2], selected_uniform[3], params.include_negatives ) # binned_cc_int.show(f=output) oe_scale = scale_factor( selected_uniform[2], selected_uniform[3], weights=flex.pow(selected_uniform[2].sigmas(), -2) + flex.pow(selected_uniform[3].sigmas(), -2), use_binning=True, ) # oe_scale.show(f=output) oe_rint = r1_factor(selected_uniform[2], selected_uniform[3], scale_factor=oe_scale, use_binning=True) # oe_rint.show(f=output) oe_rsplit = r_split(selected_uniform[2], selected_uniform[3], use_binning=True) oe_scale_all = scale_factor( selected_uniform[2], selected_uniform[3], weights=flex.pow(selected_uniform[2].sigmas(), -2) + flex.pow(selected_uniform[3].sigmas(), -2), ) oe_rint_all = r1_factor(selected_uniform[2], selected_uniform[3], scale_factor=oe_scale_all) oe_rsplit_all = r_split(selected_uniform[2], selected_uniform[3]) if have_iso_ref: print >> output, "R factors Riso = %.1f%%, Rint = %.1f%%" % (100.0 * ref_riso_all, 100.0 * oe_rint_all) else: print >> output, "R factor Rint = %.1f%%" % (100.0 * oe_rint_all) split_sigma_data = split_sigma_test( selected_uniform[2], selected_uniform[3], scale=oe_scale, use_binning=True, show_plot=False ) split_sigma_data_all = split_sigma_test( selected_uniform[2], selected_uniform[3], scale=oe_scale_all, use_binning=False, show_plot=False ) print >> output if reindexing_op == "h,k,l": print >> output, "Table of Scaling Results:" else: print >> output, "Table of Scaling Results Reindexing as %s:" % reindexing_op from libtbx import table_utils table_header = ["", "", "", "CC", " N", "CC", " N", "R", "R", "R", "Scale", "Scale", "SpSig"] table_header2 = [ "Bin", "Resolution Range", "Completeness", "int", "int", "iso", "iso", "int", "split", "iso", "int", "iso", "Test", ] table_data = [] table_data.append(table_header) table_data.append(table_header2) items = binned_cc_int.binner.range_used() # XXX Make it clear what the completeness here actually is! cumulative_counts_given = 0 cumulative_counts_complete = 0 for bin in items: table_row = [] table_row.append("%3d" % bin) table_row.append( "%-13s" % binned_cc_int.binner.bin_legend( i_bin=bin, show_bin_number=False, show_bin_range=False, show_d_range=True, show_counts=False ) ) table_row.append( "%13s" % binned_cc_int.binner.bin_legend( i_bin=bin, show_bin_number=False, show_bin_range=False, show_d_range=False, show_counts=True ) ) cumulative_counts_given += binned_cc_int.binner._counts_given[bin] cumulative_counts_complete += binned_cc_int.binner._counts_complete[bin] table_row.append("%.1f%%" % (100.0 * binned_cc_int.data[bin])) table_row.append("%7d" % (binned_cc_int_N.data[bin])) if have_iso_ref and binned_cc_ref.data[bin] is not None: table_row.append("%.1f%%" % (100 * binned_cc_ref.data[bin])) else: table_row.append("--") if have_iso_ref and binned_cc_ref_N.data[bin] is not None: table_row.append("%6d" % (binned_cc_ref_N.data[bin])) else: table_row.append("--") if oe_rint.data[bin] is not None: table_row.append("%.1f%%" % (100.0 * oe_rint.data[bin])) else: table_row.append("--") if oe_rsplit.data[bin] is not None: table_row.append("%.1f%%" % (100 * oe_rsplit.data[bin])) else: table_row.append("--") if have_iso_ref and ref_riso.data[bin] is not None: table_row.append("%.1f%%" % (100 * ref_riso.data[bin])) else: table_row.append("--") if oe_scale.data[bin] is not None: table_row.append("%.3f" % oe_scale.data[bin]) else: table_row.append("--") if have_iso_ref and ref_scale.data[bin] is not None: table_row.append("%.3f" % ref_scale.data[bin]) else: table_row.append("--") if split_sigma_data.data[bin] is not None: table_row.append("%.4f" % split_sigma_data.data[bin]) else: table_row.append("--") table_data.append(table_row) table_data.append([""] * len(table_header)) table_row = [ format_value("%3s", "All"), format_value("%-13s", " "), format_value("%13s", "[%d/%d]" % (cumulative_counts_given, cumulative_counts_complete)), format_value("%.1f%%", 100 * corr_int), format_value("%7d", N_int), ] if have_iso_ref: table_row.extend((format_value("%.1f%%", 100 * corr_iso), format_value("%6d", N_iso))) else: table_row.extend(("--", "--")) table_row.extend((format_value("%.1f%%", 100 * oe_rint_all), format_value("%.1f%%", 100 * oe_rsplit_all))) if have_iso_ref: table_row.append(format_value("%.1f%%", 100 * ref_riso_all)) else: table_row.append("--") table_row.append(format_value("%.3f", oe_scale_all)) if have_iso_ref: table_row.append(format_value("%.3f", ref_scale_all)) else: table_row.append("--") if split_sigma_data_all is not None: table_row.append("%.1f" % split_sigma_data_all) else: table_row.append("--") table_data.append(table_row) print >> output print >> output, table_utils.format(table_data, has_header=2, justify="center", delim=" ") print >> output, """CCint is the CC-1/2 defined by Diederichs; correlation between odd/even images. Similarly, Scale int and R int are the scaling factor and scaling R factor between odd/even images. "iso" columns compare the whole XFEL dataset to the isomorphous reference.""" print >> output, """Niso: result vs. reference common set""", if params.include_negatives: print >> output, """including negative merged intensities (set by phil parameter).""" elif params.scaling.log_cutoff is None: print >> output else: print >> output, """with intensites < %7.2g filtered out (controlled by scaling.log_cutoff phil parameter set to %5.1f)""" % ( math.exp(params.scaling.log_cutoff), params.scaling.log_cutoff, ) if have_iso_ref: assert N_iso == flex.sum(flex.double([x for x in binned_cc_ref_N.data if x is not None])) assert N_int == flex.sum(flex.double([x for x in binned_cc_int_N.data if x is not None])) if params.scaling.show_plots: from matplotlib import pyplot as plt plt.plot(flex.log(selected_uniform[-2].data()), flex.log(selected_uniform[-1].data()), "r.") plt.show() if have_iso_ref: plt.plot(flex.log(selected_uniform[0].data()), flex.log(selected_uniform[1].data()), "r.") plt.show() print >> output
def split_sigma_test(self, other, scale, use_binning=False, show_plot=False): """ Calculates the split sigma ratio test by Peter Zwart: ssr = sum( (Iah-Ibh)^2 ) / sum( sigma_ah^2 + sigma_bh^2) where Iah and Ibh are merged intensities for a given hkl from two halves of a dataset (a and b). Likewise for sigma_ah and sigma_bh. ssr (split sigma ratio) should approximately equal 1 if the errors are correctly estimated. """ assert other.size() == self.data().size() assert (self.indices() == other.indices()).all_eq(True) assert not use_binning or self.binner() is not None if use_binning: results = [] for i_bin in self.binner().range_all(): sel = self.binner().selection(i_bin) i_self = self.select(sel) i_other = other.select(sel) scale_rel = scale.data[i_bin] if i_self.size() == 0: results.append(None) else: results.append(split_sigma_test(i_self, i_other, scale=scale_rel, show_plot=show_plot)) return binned_data(binner=self.binner(), data=results, data_fmt="%7.4f") a_data = self.data() b_data = scale * other.data() a_sigmas = self.sigmas() b_sigmas = scale * other.sigmas() if show_plot: """ # Diagnostic use of the (I - <I>) / sigma distribution, should have mean=0, std=1 a_variance = a_sigmas * a_sigmas b_variance = b_sigmas * b_sigmas mean_num = (a_data/ (a_variance) ) + (b_data/ (b_variance) ) mean_den = (1./ (a_variance) ) + (1./ (b_variance) ) mean_values = mean_num / mean_den delta_I_a = a_data - mean_values normal_a = delta_I_a / (a_sigmas) stats_a = flex.mean_and_variance(normal_a) print "\nA mean %7.4f std %7.4f"%(stats_a.mean(),stats_a.unweighted_sample_standard_deviation()) order_a = flex.sort_permutation(normal_a) delta_I_b = b_data - mean_values normal_b = delta_I_b / (b_sigmas) stats_b = flex.mean_and_variance(normal_b) print "B mean %7.4f std %7.4f"%(stats_b.mean(),stats_b.unweighted_sample_standard_deviation()) order_b = flex.sort_permutation(normal_b) # plots for debugging from matplotlib import pyplot as plt plt.plot(xrange(len(order_a)),normal_a.select(order_a),"b.") plt.plot(xrange(len(order_b)),normal_b.select(order_b),"r.") plt.show() """ from cctbx.examples.merging.sigma_correction import ccp4_model Correction = ccp4_model() Correction.plots(a_data, b_data, a_sigmas, b_sigmas) # a_new_variance,b_new_variance = Correction.optimize(a_data, b_data, a_sigmas, b_sigmas) # Correction.plots(a_data, b_data, flex.sqrt(a_new_variance), flex.sqrt(b_new_variance)) n = flex.pow(a_data - b_data, 2) d = flex.pow(a_sigmas, 2) + flex.pow(b_sigmas, 2) return flex.sum(n) / flex.sum(d)
def run_cc(params, reindexing_op, output): uniform, selected_uniform, have_iso_ref = load_cc_data( params, reindexing_op, output) NBIN = params.output.n_bins if have_iso_ref: slope, offset, corr_iso, N_iso = correlation(selected_uniform[1], selected_uniform[0], params.include_negatives) print >> output, "C.C. iso is %.1f%% on %d indices" % (100 * corr_iso, N_iso) slope, offset, corr_int, N_int = correlation(selected_uniform[2], selected_uniform[3], params.include_negatives) print >> output, "C.C. int is %.1f%% on %d indices" % (100. * corr_int, N_int) if have_iso_ref: binned_cc_ref, binned_cc_ref_N = binned_correlation( selected_uniform[1], selected_uniform[0], params.include_negatives) #binned_cc_ref.show(f=output) ref_scale = scale_factor(selected_uniform[1], selected_uniform[0], weights=flex.pow(selected_uniform[1].sigmas(), -2), use_binning=True) #ref_scale.show(f=output) ref_riso = r1_factor(selected_uniform[1], selected_uniform[0], scale_factor=ref_scale, use_binning=True) #ref_riso.show(f=output) ref_scale_all = scale_factor(selected_uniform[1], selected_uniform[0], weights=flex.pow( selected_uniform[1].sigmas(), -2)) ref_riso_all = r1_factor(selected_uniform[1], selected_uniform[0], scale_factor=ref_scale_all) binned_cc_int, binned_cc_int_N = binned_correlation( selected_uniform[2], selected_uniform[3], params.include_negatives) #binned_cc_int.show(f=output) oe_scale = scale_factor( selected_uniform[2], selected_uniform[3], weights=flex.pow(selected_uniform[2].sigmas(), -2) + flex.pow(selected_uniform[3].sigmas(), -2), use_binning=True) #oe_scale.show(f=output) oe_rint = r1_factor(selected_uniform[2], selected_uniform[3], scale_factor=oe_scale, use_binning=True) #oe_rint.show(f=output) oe_rsplit = r_split(selected_uniform[2], selected_uniform[3], use_binning=True) oe_scale_all = scale_factor( selected_uniform[2], selected_uniform[3], weights=flex.pow(selected_uniform[2].sigmas(), -2) + flex.pow(selected_uniform[3].sigmas(), -2), ) oe_rint_all = r1_factor(selected_uniform[2], selected_uniform[3], scale_factor=oe_scale_all) oe_rsplit_all = r_split(selected_uniform[2], selected_uniform[3]) if have_iso_ref: print >> output, "R factors Riso = %.1f%%, Rint = %.1f%%" % ( 100. * ref_riso_all, 100. * oe_rint_all) else: print >> output, "R factor Rint = %.1f%%" % (100. * oe_rint_all) split_sigma_data = split_sigma_test(selected_uniform[2], selected_uniform[3], scale=oe_scale, use_binning=True, show_plot=False) split_sigma_data_all = split_sigma_test(selected_uniform[2], selected_uniform[3], scale=oe_scale_all, use_binning=False, show_plot=False) print >> output if reindexing_op == "h,k,l": print >> output, "Table of Scaling Results:" else: print >> output, "Table of Scaling Results Reindexing as %s:" % reindexing_op from libtbx import table_utils table_header = [ "", "", "", "CC", " N", "CC", " N", "R", "R", "R", "Scale", "Scale", "SpSig" ] table_header2 = [ "Bin", "Resolution Range", "Completeness", "int", "int", "iso", "iso", "int", "split", "iso", "int", "iso", "Test" ] table_data = [] table_data.append(table_header) table_data.append(table_header2) items = binned_cc_int.binner.range_used() # XXX Make it clear what the completeness here actually is! cumulative_counts_given = 0 cumulative_counts_complete = 0 for bin in items: table_row = [] table_row.append("%3d" % bin) table_row.append("%-13s" % binned_cc_int.binner.bin_legend(i_bin=bin, show_bin_number=False, show_bin_range=False, show_d_range=True, show_counts=False)) table_row.append("%13s" % binned_cc_int.binner.bin_legend(i_bin=bin, show_bin_number=False, show_bin_range=False, show_d_range=False, show_counts=True)) cumulative_counts_given += binned_cc_int.binner._counts_given[bin] cumulative_counts_complete += binned_cc_int.binner._counts_complete[ bin] table_row.append("%.1f%%" % (100. * binned_cc_int.data[bin])) table_row.append("%7d" % (binned_cc_int_N.data[bin])) if have_iso_ref and binned_cc_ref.data[bin] is not None: table_row.append("%.1f%%" % (100 * binned_cc_ref.data[bin])) else: table_row.append("--") if have_iso_ref and binned_cc_ref_N.data[bin] is not None: table_row.append("%6d" % (binned_cc_ref_N.data[bin])) else: table_row.append("--") if oe_rint.data[bin] is not None: table_row.append("%.1f%%" % (100. * oe_rint.data[bin])) else: table_row.append("--") if oe_rsplit.data[bin] is not None: table_row.append("%.1f%%" % (100 * oe_rsplit.data[bin])) else: table_row.append("--") if have_iso_ref and ref_riso.data[bin] is not None: table_row.append("%.1f%%" % (100 * ref_riso.data[bin])) else: table_row.append("--") if oe_scale.data[bin] is not None: table_row.append("%.3f" % oe_scale.data[bin]) else: table_row.append("--") if have_iso_ref and ref_scale.data[bin] is not None: table_row.append("%.3f" % ref_scale.data[bin]) else: table_row.append("--") if split_sigma_data.data[bin] is not None: table_row.append("%.4f" % split_sigma_data.data[bin]) else: table_row.append("--") table_data.append(table_row) table_data.append([""] * len(table_header)) table_row = [ format_value("%3s", "All"), format_value("%-13s", " "), format_value( "%13s", "[%d/%d]" % (cumulative_counts_given, cumulative_counts_complete)), format_value("%.1f%%", 100 * corr_int), format_value("%7d", N_int) ] if have_iso_ref: table_row.extend( (format_value("%.1f%%", 100 * corr_iso), format_value("%6d", N_iso))) else: table_row.extend(("--", "--")) table_row.extend((format_value("%.1f%%", 100 * oe_rint_all), format_value("%.1f%%", 100 * oe_rsplit_all))) if have_iso_ref: table_row.append(format_value("%.1f%%", 100 * ref_riso_all)) else: table_row.append("--") table_row.append(format_value("%.3f", oe_scale_all)) if have_iso_ref: table_row.append(format_value("%.3f", ref_scale_all)) else: table_row.append("--") if split_sigma_data_all is not None: table_row.append("%.1f" % split_sigma_data_all) else: table_row.append("--") table_data.append(table_row) print >> output print >> output, table_utils.format(table_data, has_header=2, justify='center', delim=" ") print >> output, """CCint is the CC-1/2 defined by Diederichs; correlation between odd/even images. Similarly, Scale int and R int are the scaling factor and scaling R factor between odd/even images. "iso" columns compare the whole XFEL dataset to the isomorphous reference.""" print >> output, """Niso: result vs. reference common set""", if params.include_negatives: print >> output, """including negative merged intensities (set by phil parameter).""" elif params.scaling.log_cutoff is None: print >> output else: print >> output, """with intensites < %7.2g filtered out (controlled by scaling.log_cutoff phil parameter set to %5.1f)""" % (math.exp( params.scaling.log_cutoff), params.scaling.log_cutoff) if have_iso_ref: assert N_iso == flex.sum( flex.double([x for x in binned_cc_ref_N.data if x is not None])) assert N_int == flex.sum( flex.double([x for x in binned_cc_int_N.data if x is not None])) if params.scaling.show_plots: from matplotlib import pyplot as plt plt.plot(flex.log(selected_uniform[-2].data()), flex.log(selected_uniform[-1].data()), 'r.') plt.show() if have_iso_ref: plt.plot(flex.log(selected_uniform[0].data()), flex.log(selected_uniform[1].data()), 'r.') plt.show() print >> output
def post_min_recalc(self): print("ENTERING post_min_recalc cx, cy", \ flex.mean(self.spotcx), \ flex.mean(self.spotcy), \ len(self.spotcx), \ len(self.spotcy)) print("ENTERING post_min_recalc fx, fy", \ flex.mean(self.spotfx), \ flex.mean(self.spotfy), \ len(self.spotfx), \ len(self.spotfy)) print("HATTNE check input 0", \ flex.mean(self.model_calcx), \ flex.mean(self.model_calcy), \ len(self.model_calcx), \ len(self.model_calcy)) self.delrsq = self.delrsq_functional(calcx = self.model_calcx, calcy = self.model_calcy) self.tile_rmsd = [0.]*(len(self.tiles) // 4) self.asymmetric_tile_rmsd = [0.]*(len(self.tiles) // 4) self.correction_vector_x = self.model_calcx -self.spotfx self.correction_vector_y = self.model_calcy -self.spotfy self.post_mean_cv = [] print("HATTNE post_min_recalc input CV x,y", \ flex.mean(flex.pow(self.correction_vector_x, 2)), \ flex.mean(flex.pow(self.correction_vector_y, 2))) print("HATTNE post_min_recalc input model ", \ flex.mean(self.model_calcx), \ flex.mean(self.model_calcy)) for x in range(len(self.tiles) // 4): #if self.tilecounts[x]==0: continue selection = self.selections[x] selected_cv = self.master_cv.select(selection) if selection.count(True) == 0: self.post_mean_cv.append(matrix.col((0, 0))) self.asymmetric_tile_rmsd[x] = 0 self.tile_rmsd[x] = 0 else: self.post_mean_cv.append( matrix.col([flex.mean(self.correction_vector_x.select(selection)), flex.mean(self.correction_vector_y.select(selection))])) self.asymmetric_tile_rmsd[x] = math.sqrt(flex.mean(self.delrsq.select(selection))) sel_delx = self.correction_vector_x.select(selection) sel_dely = self.correction_vector_y.select(selection) symmetric_offset_x = sel_delx - self.post_mean_cv[x][0] symmetric_offset_y = sel_dely - self.post_mean_cv[x][1] symmetricrsq = symmetric_offset_x*symmetric_offset_x + symmetric_offset_y*symmetric_offset_y """ if x == 14: print "STATS FOR TILE 14" aa = list(self.tiles[x * 4:(x + 1)*4]) print "EFFECTIVE tiling", aa print "EFFECTIVE center", \ ((aa[0] + aa[2]) / 2, (aa[1] + aa[3]) / 2), \ (aa[2] - aa[0], aa[3] - aa[1]) print "sel_delx", list(sel_delx) #print "sel_dely", list(sel_dely) print "model_calcx", list(self.model_calcx.select(selection)) #print "model_calcy", list(self.model_calcy.select(selection)) print "spotfx", list(self.spotfx.select(selection)) #print "spotfy", list(self.spotfy.select(selection)) print "spotcx", list(self.spotcx.select(selection)) #print "spotcy", list(self.spotcy.select(selection)) print " sel_delx ", flex.min(sel_delx), \ flex.mean(sel_delx), flex.max(sel_delx) print " sel_dely ", flex.min(sel_dely), \ flex.mean(sel_dely), flex.max(sel_dely) print " symmetric_offset_x", flex.min(symmetric_offset_x), \ flex.mean(symmetric_offset_x), flex.max(symmetric_offset_x) print " symmetric_offset_y", flex.min(symmetric_offset_y), \ flex.mean(symmetric_offset_y), flex.max(symmetric_offset_y) print " symmetric rsq ", flex.min(symmetricrsq), \ flex.mean(symmetricrsq), flex.max(symmetricrsq) print " rmsd ", math.sqrt(flex.mean(symmetricrsq)) #import sys #sys.exit(0) """ self.tile_rmsd[x] =math.sqrt(flex.mean(symmetricrsq)) self.overall_N = flex.sum(flex.int( [int(t) for t in self.tilecounts] )) self.overall_cv = matrix.col([flex.mean ( self.correction_vector_x), flex.mean ( self.correction_vector_y) ]) self.overall_rmsd = math.sqrt(flex.mean(self.delrsq_functional(self.model_calcx, self.model_calcy))) print("HATTNE post_min_recalc post_min_recalc 1", list(self.overall_cv)) print("HATTNE post_min_recalc post_min_recalc 2", self.overall_rmsd)
def update_model_error(self, rmax, a=-19.93, b=18.95, c=0.52, d=-1.13): self.ratio = self.data.q * rmax self.ratio = flex.pow(self.ratio, c) * d self.ratio = a + b / (flex.exp(self.ratio) + 1.0) self.ratio = flex.exp(self.ratio)
def signal_to_noise_statistical(signal, background): "M.F. Koenig and J.T. Grant, Surface and Interface Analysis, Vol. 7, No.5, 1985, 217" snr = signal / flex.pow(signal + 2 * background, 0.5) return snr
def t_variate(a=1.0,mu=0.0,sigma=1.0,N=100): "T-variate via Baley's one-liner" U1 = flex.random_double(size=N) U2 = flex.random_double(size=N) return ( flex.sqrt(a*(flex.pow(U1,-2.0/a)-1.0)) *flex.cos(2.0*math.pi*U2)*sigma+mu )
def partial_derivatives(self, x_obs): g = [] for n in range(self.n_terms): g.append(flex.pow(x_obs, n)) return g
def post_min_recalc(self): print "ENTERING post_min_recalc cx, cy", flex.mean(self.spotcx), flex.mean(self.spotcy), len(self.spotcx), len( self.spotcy ) print "ENTERING post_min_recalc fx, fy", flex.mean(self.spotfx), flex.mean(self.spotfy), len(self.spotfx), len( self.spotfy ) print "HATTNE check input 0", flex.mean(self.model_calcx), flex.mean(self.model_calcy), len( self.model_calcx ), len(self.model_calcy) self.delrsq = self.delrsq_functional(calcx=self.model_calcx, calcy=self.model_calcy) self.tile_rmsd = [0.0] * (len(self.tiles) // 4) self.asymmetric_tile_rmsd = [0.0] * (len(self.tiles) // 4) self.correction_vector_x = self.model_calcx - self.spotfx self.correction_vector_y = self.model_calcy - self.spotfy self.post_mean_cv = [] print "HATTNE post_min_recalc input CV x,y", flex.mean(flex.pow(self.correction_vector_x, 2)), flex.mean( flex.pow(self.correction_vector_y, 2) ) print "HATTNE post_min_recalc input model ", flex.mean(self.model_calcx), flex.mean(self.model_calcy) for x in range(len(self.tiles) // 4): # if self.tilecounts[x]==0: continue selection = self.selections[x] selected_cv = self.master_cv.select(selection) if selection.count(True) == 0: self.post_mean_cv.append(matrix.col((0, 0))) self.asymmetric_tile_rmsd[x] = 0 self.tile_rmsd[x] = 0 else: self.post_mean_cv.append( matrix.col( [ flex.mean(self.correction_vector_x.select(selection)), flex.mean(self.correction_vector_y.select(selection)), ] ) ) self.asymmetric_tile_rmsd[x] = math.sqrt(flex.mean(self.delrsq.select(selection))) sel_delx = self.correction_vector_x.select(selection) sel_dely = self.correction_vector_y.select(selection) symmetric_offset_x = sel_delx - self.post_mean_cv[x][0] symmetric_offset_y = sel_dely - self.post_mean_cv[x][1] symmetricrsq = symmetric_offset_x * symmetric_offset_x + symmetric_offset_y * symmetric_offset_y """ if x == 14: print "STATS FOR TILE 14" aa = list(self.tiles[x * 4:(x + 1)*4]) print "EFFECTIVE tiling", aa print "EFFECTIVE center", \ ((aa[0] + aa[2]) / 2, (aa[1] + aa[3]) / 2), \ (aa[2] - aa[0], aa[3] - aa[1]) print "sel_delx", list(sel_delx) #print "sel_dely", list(sel_dely) print "model_calcx", list(self.model_calcx.select(selection)) #print "model_calcy", list(self.model_calcy.select(selection)) print "spotfx", list(self.spotfx.select(selection)) #print "spotfy", list(self.spotfy.select(selection)) print "spotcx", list(self.spotcx.select(selection)) #print "spotcy", list(self.spotcy.select(selection)) print " sel_delx ", flex.min(sel_delx), \ flex.mean(sel_delx), flex.max(sel_delx) print " sel_dely ", flex.min(sel_dely), \ flex.mean(sel_dely), flex.max(sel_dely) print " symmetric_offset_x", flex.min(symmetric_offset_x), \ flex.mean(symmetric_offset_x), flex.max(symmetric_offset_x) print " symmetric_offset_y", flex.min(symmetric_offset_y), \ flex.mean(symmetric_offset_y), flex.max(symmetric_offset_y) print " symmetric rsq ", flex.min(symmetricrsq), \ flex.mean(symmetricrsq), flex.max(symmetricrsq) print " rmsd ", math.sqrt(flex.mean(symmetricrsq)) #import sys #sys.exit(0) """ self.tile_rmsd[x] = math.sqrt(flex.mean(symmetricrsq)) self.overall_N = flex.sum(flex.int([int(t) for t in self.tilecounts])) self.overall_cv = matrix.col([flex.mean(self.correction_vector_x), flex.mean(self.correction_vector_y)]) self.overall_rmsd = math.sqrt(flex.mean(self.delrsq_functional(self.model_calcx, self.model_calcy))) print "HATTNE post_min_recalc post_min_recalc 1", list(self.overall_cv) print "HATTNE post_min_recalc post_min_recalc 2", self.overall_rmsd
def get_p_of_r(self,x): base = flex.pow((1.0-x*x),self.k) exp_pol = flex.exp( self.polynome.f( x ) ) result = exp_pol*base return result
def split_sigma_test(self, other, scale, use_binning=False, show_plot=False): """ Calculates the split sigma ratio test by Peter Zwart: ssr = sum( (Iah-Ibh)^2 ) / sum( sigma_ah^2 + sigma_bh^2) where Iah and Ibh are merged intensities for a given hkl from two halves of a dataset (a and b). Likewise for sigma_ah and sigma_bh. ssr (split sigma ratio) should approximately equal 1 if the errors are correctly estimated. """ assert other.size() == self.data().size() assert (self.indices() == other.indices()).all_eq(True) assert not use_binning or self.binner() is not None if use_binning: results = [] for i_bin in self.binner().range_all(): sel = self.binner().selection(i_bin) i_self = self.select(sel) i_other = other.select(sel) scale_rel = scale.data[i_bin] if i_self.size() == 0: results.append(None) else: results.append( split_sigma_test(i_self, i_other, scale=scale_rel, show_plot=show_plot)) return binned_data(binner=self.binner(), data=results, data_fmt="%7.4f") a_data = self.data() b_data = scale * other.data() a_sigmas = self.sigmas() b_sigmas = scale * other.sigmas() if show_plot: """ # Diagnostic use of the (I - <I>) / sigma distribution, should have mean=0, std=1 a_variance = a_sigmas * a_sigmas b_variance = b_sigmas * b_sigmas mean_num = (a_data/ (a_variance) ) + (b_data/ (b_variance) ) mean_den = (1./ (a_variance) ) + (1./ (b_variance) ) mean_values = mean_num / mean_den delta_I_a = a_data - mean_values normal_a = delta_I_a / (a_sigmas) stats_a = flex.mean_and_variance(normal_a) print "\nA mean %7.4f std %7.4f"%(stats_a.mean(),stats_a.unweighted_sample_standard_deviation()) order_a = flex.sort_permutation(normal_a) delta_I_b = b_data - mean_values normal_b = delta_I_b / (b_sigmas) stats_b = flex.mean_and_variance(normal_b) print "B mean %7.4f std %7.4f"%(stats_b.mean(),stats_b.unweighted_sample_standard_deviation()) order_b = flex.sort_permutation(normal_b) # plots for debugging from matplotlib import pyplot as plt plt.plot(xrange(len(order_a)),normal_a.select(order_a),"b.") plt.plot(xrange(len(order_b)),normal_b.select(order_b),"r.") plt.show() """ from cctbx.examples.merging.sigma_correction import ccp4_model Correction = ccp4_model() Correction.plots(a_data, b_data, a_sigmas, b_sigmas) #a_new_variance,b_new_variance = Correction.optimize(a_data, b_data, a_sigmas, b_sigmas) #Correction.plots(a_data, b_data, flex.sqrt(a_new_variance), flex.sqrt(b_new_variance)) n = flex.pow(a_data - b_data, 2) d = flex.pow(a_sigmas, 2) + flex.pow(b_sigmas, 2) return flex.sum(n) / flex.sum(d)
def _estimate_cc_sig_fac(self): """Estimation of sigma(CC) as a function of sample size. Estimate the error in the correlation coefficient, sigma(CC) by using pairs of reflections at similar resolutions that are not related by potential symmetry. Using pairs of unrelated reflections at similar resolutions, calculate sigma(CC) == rms(CC) for groups of size N = 3..200. The constant CCsigFac is obtained from a linear fit of sigma(CC) to 1/N^(1/2), i.e.: sigma(CC) = CCsigFac/N^(1/2) """ max_bins = 500 reflections_per_bin = max( 200, int(math.ceil(self.intensities.size() / max_bins))) binner = self.intensities.setup_binner_counting_sorted( reflections_per_bin=reflections_per_bin) a = flex.double() b = flex.double() ma_tmp = self.intensities.customized_copy( crystal_symmetry=crystal.symmetry( space_group=self.lattice_group, unit_cell=self.intensities.unit_cell(), assert_is_compatible_unit_cell=False, )).map_to_asu() for i in range(binner.n_bins_all()): count = binner.counts()[i] if count == 0: continue bin_isel = binner.array_indices(i) p = flex.random_permutation(count) p = p[:2 * (count // 2)] # ensure even count ma_a = ma_tmp.select(bin_isel.select(p[:count // 2])) ma_b = ma_tmp.select(bin_isel.select(p[count // 2:])) # only choose pairs of reflections that don't have the same indices # in the asu of the lattice group sel = ma_a.indices() != ma_b.indices() a.extend(ma_a.data().select(sel)) b.extend(ma_b.data().select(sel)) perm = flex.random_selection(a.size(), min(20000, a.size())) a = a.select(perm) b = b.select(perm) self.corr_unrelated = CorrelationCoefficientAccumulator(a, b) n_pairs = a.size() min_num_groups = 10 # minimum number of groups max_n_group = int(min(n_pairs / min_num_groups, 200)) # maximum number in group min_n_group = int(min(5, max_n_group)) # minimum number in group if (max_n_group - min_n_group) < 4: self.cc_sig_fac = 0 return mean_ccs = flex.double() rms_ccs = flex.double() ns = flex.double() for n in range(min_n_group, max_n_group + 1): ns.append(n) ccs = flex.double() for i in range(200): isel = flex.random_selection(a.size(), n) corr = CorrelationCoefficientAccumulator( a.select(isel), b.select(isel)) ccs.append(corr.coefficient()) mean_ccs.append(flex.mean(ccs)) rms_ccs.append(flex.mean(flex.pow2(ccs))**0.5) x = 1 / flex.pow(ns, 0.5) y = rms_ccs fit = flex.linear_regression(x, y) if fit.is_well_defined(): self.cc_sig_fac = fit.slope() else: self.cc_sig_fac = 0
def signal_to_noise_statistical(signal, background): "M.F. Koenig and J.T. Grant, Surface and Interface Analysis, Vol. 7, No.5, 1985, 217" snr = signal/flex.pow(signal + 2 * background, 0.5) return snr