def __init__(self, *args, **kwargs): LBFGSsolver.__init__(self, *args, **kwargs) if self.IAprm_truth is not None: self.IAprm_truth = flex.log(self.IAprm_truth) self.IBprm_truth = flex.log(self.IBprm_truth) IAx = flex.log(self.x[:self.Nhkl]) IBx = flex.log(self.x[self.Nhkl:2 * self.Nhkl]) Gx = self.x[2 * self.Nhkl:] self.x = IAx.concatenate(IBx) self.x = self.x.concatenate(Gx)
def relative_entropy(self, other_pofx): this_one = self.base(self.x_int) that_one = other_pofx.base(self.x_int) this_one_log = flex.log(this_one + 1e-12) that_one_log = flex.log(that_one + 1e-12) this_that = this_one * (this_one_log - that_one_log) this_that = this_that * self.w_int this_that = flex.sum(this_that) that_this = that_one * (-this_one_log + that_one_log) that_this = that_this * self.w_int that_this = flex.sum(that_this) return that_this + this_that
def plot_centroid_weights_histograms(reflections, n_slots=50): from matplotlib import pyplot from scitbx.array_family import flex variances = flex.vec3_double([r.centroid_variance for r in reflections]) vx, vy, vz = variances.parts() idx = (vx > 0).__and__(vy > 0).__and__(vz > 0) vx = vx.select(idx) vy = vy.select(idx) vz = vz.select(idx) wx = 1 / vx wy = 1 / vy wz = 1 / vz wx = flex.log(wx) wy = flex.log(wy) wz = flex.log(wz) hx = flex.histogram(wx, n_slots=n_slots) hy = flex.histogram(wy, n_slots=n_slots) hz = flex.histogram(wz, n_slots=n_slots) fig = pyplot.figure() idx2 = flex.max_index(wx) idx3 = flex.int(range(len(reflections))).select(idx)[idx2] print(reflections[idx3]) return # outliers = reflections.select(wx > 50) # for refl in outliers: # print refl for i, h in enumerate([hx, hy, hz]): ax = fig.add_subplot(311 + i) slots = h.slots().as_double() bins, data = hist_outline(h) log_scale = True if log_scale: data.set_selected( data == 0, 0.1 ) # otherwise lines don't get drawn when we have some empty bins ax.set_yscale("log") ax.plot(bins, data, "-k", linewidth=2) # pyplot.suptitle(title) data_min = min( [slot.low_cutoff for slot in h.slot_infos() if slot.n > 0]) data_max = max( [slot.low_cutoff for slot in h.slot_infos() if slot.n > 0]) ax.set_xlim(data_min, data_max + h.slot_width()) pyplot.show()
def print_scaling_model_error_summary(experiments): """Get a summary of the error distribution of the models.""" models = [e.scaling_model.to_dict() for e in experiments] first_model = models[0] component = first_model["configuration_parameters"]["corrections"][0] msg = "" if "est_standard_devs" in first_model[component]: p_sigmas = flex.double() for model in models: for component in model["configuration_parameters"]["corrections"]: if "est_standard_devs" in model[component]: params = flex.double(model[component]["parameters"]) sigmas = flex.double(model[component]["est_standard_devs"]) null_value = flex.double( len(params), model[component]["null_parameter_value"]) p_sigmas.extend(flex.abs(params - null_value) / sigmas) log_p_sigmas = flex.log(p_sigmas) frac_high_uncertainty = (log_p_sigmas < 0.69315).count(True) / len(log_p_sigmas) if frac_high_uncertainty > 0.5: msg = ( "Warning: Over half ({:.2f}%) of model parameters have signficant\n" "uncertainty (sigma/abs(parameter) > 0.5), which could indicate a\n" "poorly-determined scaling problem or overparameterisation.\n" ).format(frac_high_uncertainty * 100) else: msg = ("{:.2f}% of model parameters have signficant uncertainty\n" "(sigma/abs(parameter) > 0.5)\n").format( frac_high_uncertainty * 100) return msg
def estimate_wilson_b_factor(miller_array, low_res_cutoff=4.0): miller_array = miller_array.resolution_filter( d_max=low_res_cutoff).as_intensity_array() # print(miller_array.data().as_numpy_array()) # print(miller_array.data()) # Setup binner and extract radial averages binner = miller_array.setup_binner(auto_binning=True) binned = miller_array.wilson_plot(use_binning=True) # # print(binned.data[1:-1]) # print(type(binned.data[1:-1])) # print(type(binned.data[1])) # print(binned.data[1:-1]) # print([type(x) for x in binned.data[1:-1]]) # print(type(1.0)) binned_data = [ float(x) if type(x) == float else 1.0 for x in binned.data[1:-1] ] # print(flex.double(binned_data)) # Convert to scale y_values = flex.log(flex.double(binned_data)) x_values = flex.pow2(binner.bin_centers(1)) # Check all values are valid # mask = flex.bool((True - numpy.isnan(list(y_values)) - numpy.isnan(list(x_values))).tolist()) mask = flex.bool((True ^ numpy.isnan(list(y_values)) ^ numpy.isnan(list(x_values))).tolist()) # Perform scaling scl = LinearScaling(x_values=x_values.select(mask), ref_values=y_values.select(mask)) return -0.5 * scl.optimised_values[1]
def __call__(self, sigma_m): '''Calculate the fraction of observed intensity for each observation. Params: sigma_m The mosaicity Returns: A list of log intensity fractions ''' from math import sqrt from scitbx.array_family import flex import scitbx.math # Tiny value TINY = 1e-10 assert (sigma_m > TINY) # Calculate the two components to the fraction a = scitbx.math.erf(self.e1 / sigma_m) b = scitbx.math.erf(self.e2 / sigma_m) # Calculate the fraction of observed reflection intensity R = (a - b) / 2.0 # Set any points <= 0 to 1e-10 (otherwise will get a floating # point error in log calculation below). assert (R.all_ge(0)) mask = R < TINY assert (mask.count(True) < len(mask)) R.set_selected(mask, TINY) # Return the logarithm of r return flex.log(R)
def compute_functional_and_gradients(self): self.a = self.x f = 0. g = flex.double(self.n) vector_T = flex.double( len(self.SP), self.x[0] ) + self.SP * self.x[1] + self.FP * self.x[2] + 0.5 * ( self.SS * self.x[3] + self.SF * self.x[4] + self.FF * self.x[5]) vector_lambda = vector_T / self.gain if (vector_lambda <= 0).count(True) > 0: raise RuntimeError("raising exception to avoid log(value<=0)") f = flex.sum(vector_lambda - (self.KI * flex.log(vector_lambda))) inner_paren = flex.double(len(self.SP), 1.) - (self.KI / vector_lambda) g_list = [ flex.sum(deriv * inner_paren) for deriv in [ flex.double(len(self.SP), 1.), self.SP, self.FP, self.SS, self.SF, self.FF ] ] #self.print_step("LBFGS stp",f) g_list[3] = 0. g_list[4] = 0. g_list[5] = 0. # turn off the 2nd-order Taylor term g = flex.double(g_list) / self.gain return f, g
def compute_rg_from_data(self,q,i): q_sq = q*q ln_i = flex.log( i ) cc_obj = flex.linear_regression( q_sq, ln_i ) rg2 = -cc_obj.slope()*3.0 lni = cc_obj.y_intercept() return rg2, lni
def score_by_rmsd_xy(self, reverse=False): # smaller rmsds = better rmsd_x, rmsd_y, rmsd_z = flex.vec3_double( s.rmsds for s in self.all_solutions).parts() rmsd_xy = flex.sqrt(flex.pow2(rmsd_x) + flex.pow2(rmsd_y)) score = flex.log(rmsd_xy) / math.log(2) return self.rmsd_weight * (score - flex.min(score))
def Hn(m): m_ = m sc = math.log(m_.size()) s = m_>0 m_ = m_.select(s.iselection()) m_ = m_/flex.sum(m_) return -flex.sum(m_*flex.log(m_))/sc
def __call__(self, sigma_m): '''Calculate the fraction of observed intensity for each observation. Params: sigma_m The mosaicity Returns: A list of log intensity fractions ''' from math import sqrt from scitbx.array_family import flex import scitbx.math # Tiny value TINY = 1e-10 assert(sigma_m > TINY) # Calculate the two components to the fraction a = scitbx.math.erf(self.e1 / sigma_m) b = scitbx.math.erf(self.e2 / sigma_m) # Calculate the fraction of observed reflection intensity R = (a - b) / 2.0 # Set any points <= 0 to 1e-10 (otherwise will get a floating # point error in log calculation below). assert(R.all_ge(0)) mask = R < TINY assert(mask.count(True) < len(mask)) R.set_selected(mask, TINY) # Return the logarithm of r return flex.log(R)
def another_example(np=41,nt=5): x = flex.double( range(np) )/(np-1) y = 0.99*flex.exp(-x*x*0.5) y = -flex.log(1.0/y-1) w = y*y/1.0 d = (flex.random_double(np)-0.5)*w y_obs = y+d y = 1.0/( 1.0 + flex.exp(-y) ) fit_w = chebyshev_lsq_fit.chebyshev_lsq_fit(nt, x, y_obs, w ) fit_w_f = chebyshev_polynome( nt, fit_w.low_limit, fit_w.high_limit, fit_w.coefs) fit_nw = chebyshev_lsq_fit.chebyshev_lsq_fit(nt, x, y_obs) fit_nw_f = chebyshev_polynome( nt, fit_nw.low_limit, fit_nw.high_limit, fit_nw.coefs) print print "Coefficients from weighted lsq" print list( fit_w.coefs ) print "Coefficients from non-weighted lsq" print list( fit_nw.coefs ) assert flex.max( flex.abs(fit_nw.coefs-fit_w.coefs) ) > 0
def target(self, log_sigma): ''' The target for minimization. ''' from math import sqrt, exp, pi, log from scitbx.array_family import flex import scitbx.math sigma_m = exp(log_sigma[0]) # Tiny value TINY = 1e-10 assert (sigma_m > TINY) # Calculate the two components to the fraction a = scitbx.math.erf(self.e1 / sigma_m) b = scitbx.math.erf(self.e2 / sigma_m) n = self.n K = self.K # Calculate the fraction of observed reflection intensity zi = (a - b) / 2.0 # Set any points <= 0 to 1e-10 (otherwise will get a floating # point error in log calculation below). assert (zi.all_ge(0)) mask = zi < TINY assert (mask.count(True) < len(mask)) zi.set_selected(mask, TINY) # Compute the likelihood # # The likelihood here is a result of the sum of two log likelihood # functions: # # The first is the same as the one in Kabsch2010 as applied to the # reflection as a whole. This results in the term log(Z) # # The second is the likelihood for each reflection modelling as a Poisson # distribtution with shape given by sigma M. This gives sum(ci log(zi)) - # sum(ci)*log(sum(zi)) # # If the reflection is recorded on 1 frame, the second component is zero # and so the likelihood is dominated by the first term which can be seen # as a prior for sigma, which accounts for which reflections were actually # recorded. # L = 0 for j, (i0, i1) in enumerate(zip(self.indices[:-1], self.indices[1:])): selection = flex.size_t(range(i0, i1)) zj = zi.select(selection) nj = n.select(selection) kj = K[j] Z = flex.sum(zj) #L += flex.sum(nj * flex.log(zj)) - kj * Z #L += flex.sum(nj * flex.log(zj)) - kj * log(Z) L += flex.sum(nj * flex.log(zj)) - kj * log(Z) + log(Z) logger.debug("Sigma M: %f, log(L): %f" % (sigma_m * 180 / pi, L)) # Return the logarithm of r return -L
def plot_centroid_weights_histograms(reflections, n_slots=50): from matplotlib import pyplot from scitbx.array_family import flex variances = flex.vec3_double([r.centroid_variance for r in reflections]) vx, vy, vz = variances.parts() idx = (vx > 0).__and__(vy > 0).__and__(vz > 0) vx = vx.select(idx) vy = vy.select(idx) vz = vz.select(idx) wx = 1/vx wy = 1/vy wz = 1/vz wx = flex.log(wx) wy = flex.log(wy) wz = flex.log(wz) hx = flex.histogram(wx, n_slots=n_slots) hy = flex.histogram(wy, n_slots=n_slots) hz = flex.histogram(wz, n_slots=n_slots) fig = pyplot.figure() idx2 = flex.max_index(wx) idx3 = flex.int(range(len(reflections))).select(idx)[idx2] print reflections[idx3] return #outliers = reflections.select(wx > 50) #for refl in outliers: #print refl for i, h in enumerate([hx, hy, hz]): ax = fig.add_subplot(311+i) slots = h.slots().as_double() bins, data = hist_outline(h) log_scale = True if log_scale: data.set_selected(data == 0, 0.1) # otherwise lines don't get drawn when we have some empty bins ax.set_yscale("log") ax.plot(bins, data, '-k', linewidth=2) #pyplot.suptitle(title) data_min = min([slot.low_cutoff for slot in h.slot_infos() if slot.n > 0]) data_max = max([slot.low_cutoff for slot in h.slot_infos() if slot.n > 0]) ax.set_xlim(data_min, data_max+h.slot_width()) pyplot.show()
def __init__(self, use_curvatures=True, *args, **kwargs): LBFGSsolver.__init__(self, *args, **kwargs) if self.IAprm_truth is not None: self.IAprm_truth = flex.log(self.IAprm_truth) self.IBprm_truth = flex.log(self.IBprm_truth) #self.Gprm_truth = flex.log(self.Gprm_truth) IAx = flex.log(self.x[:self.Nhkl]) IBx = flex.log(self.x[self.Nhkl:2 * self.Nhkl]) Gx = self.x[2 * self.Nhkl:] #Gx = flex.log(self.x[2*self.Nhkl:]) self.x = IAx.concatenate(IBx) self.x = self.x.concatenate(Gx) if use_curvatures: self.minimizer = lbfgs_with_curvatures_mix_in.__init__( self, min_iterations=0, max_iterations=None, use_curvatures=True)
def get_z_scores(self, scale, b_value): i_scaled = flex.exp( self.calc_d_star_sq*b_value )*self.mean_calc*scale sel = ((self.mean_obs > 0) & (i_scaled > 0)) .iselection() ratio = self.mean_obs.select(sel) / i_scaled.select(sel) mean = self.curve( self.calc_d_star_sq ).select(sel) assert ratio.all_gt(0) # FIXME need to filter first! ratio = flex.log(ratio) var = self.std(self.calc_d_star_sq).select(sel) d_star_sq = self.calc_d_star_sq.select(sel) assert var.all_ne(0) z = flex.abs(ratio-mean)/var z_ = flex.double(self.mean_obs.size(), -1) z_.set_selected(sel, z) return z_
def get_z_scores(self, scale, b_value): i_scaled = flex.exp( self.calc_d_star_sq * b_value) * self.mean_calc * scale sel = ((self.mean_obs > 0) & (i_scaled > 0)).iselection() ratio = self.mean_obs.select(sel) / i_scaled.select(sel) mean = self.curve(self.calc_d_star_sq).select(sel) assert ratio.all_gt(0) # FIXME need to filter first! ratio = flex.log(ratio) var = self.std(self.calc_d_star_sq).select(sel) d_star_sq = self.calc_d_star_sq.select(sel) assert var.all_ne(0) z = flex.abs(ratio - mean) / var z_ = flex.double(self.mean_obs.size(), -1) z_.set_selected(sel, z) return z_
def compute_functional_and_gradients(self): self.a = self.x f = 0.; g = flex.double(self.n) vector_T = flex.double(len(self.SP),self.x[0]) + self.SP*self.x[1] + self.FP*self.x[2] + 0.5*( self.SS*self.x[3] + self.SF*self.x[4] + self.FF*self.x[5]) vector_lambda = vector_T/self.gain f = flex.sum(vector_lambda - (self.KI * flex.log(vector_lambda))) inner_paren = flex.double(len(self.SP),1.) - (self.KI/vector_lambda) g_list = [flex.sum( deriv * inner_paren ) for deriv in [flex.double(len(self.SP),1.), self.SP, self.FP, self.SS, self.SF, self.FF]] #self.print_step("LBFGS stp",f) g_list[3]=0.; g_list[4]=0.; g_list[5]=0. # turn off the 2nd-order Taylor term g = flex.double(g_list)/self.gain return f,g
def summary(self): i_scaled = flex.exp( self.calc_d_star_sq*self.b_value ) * \ self.mean_calc * self.scale sel = (self.mean_obs > 0).iselection() ratio = flex.log(i_scaled.select(sel) / self.mean_obs.select(sel)) ratio_ = flex.double(self.mean_obs.size(), 0) ratio_.set_selected(sel, ratio) curves = [ self.calc_d_star_sq, -ratio_, # observed self.curve(self.calc_d_star_sq), # expected self.get_z_scores(self.scale, self.b_value) ] return summary(all_curves=curves, level=self.level, all_bad_z_scores=self.all_bad_z_scores)
def summary (self) : i_scaled = flex.exp( self.calc_d_star_sq*self.b_value ) * \ self.mean_calc * self.scale sel = (self.mean_obs > 0).iselection() ratio = flex.log(i_scaled.select(sel) / self.mean_obs.select(sel)) ratio_ = flex.double(self.mean_obs.size(), 0) ratio_.set_selected(sel, ratio) curves = [ self.calc_d_star_sq, -ratio_, # observed self.curve( self.calc_d_star_sq ), # expected self.get_z_scores(self.scale, self.b_value) ] return summary( all_curves=curves, level=self.level, all_bad_z_scores=self.all_bad_z_scores)
def __call__(self, sigma_m): '''Calculate the fraction of observed intensity for each observation. Params: sigma_m The mosaicity Returns: A list of fractions of length n ''' from math import sqrt, erf from scitbx.array_family import flex import numpy # Tiny value TINY = 1e-10 # Ensure value for sigma_m is valid if sigma_m < TINY: raise ValueError('sigma_m must be > 0') # Oscillation range / 2 dphi2 = self.dphi / 2 # Calculate the denominator to the fraction den = sqrt(2) * sigma_m / flex.abs(self.zeta) # Calculate the two components to the fraction a = flex.double([erf(x) for x in (self.tau + dphi2) / den]) b = flex.double([erf(x) for x in (self.tau - dphi2) / den]) # Calculate the fraction of observed reflection intensity R = (a - b) / 2.0 # Set any points <= 0 to 1e-10 (otherwise will get a floating # point error in log calculation below). bad_index = numpy.where(R.as_numpy_array() < TINY)[0] for i in bad_index: R[int(i)] = TINY # Return the logarithm of r return flex.log(R)
def another_example(np=41, nt=5): x = flex.double(range(np)) / (np - 1) y = 0.99 * flex.exp(-x * x * 0.5) y = -flex.log(1.0 / y - 1) w = y * y / 1.0 d = (flex.random_double(np) - 0.5) * w y_obs = y + d y = 1.0 / (1.0 + flex.exp(-y)) fit_w = chebyshev_lsq_fit.chebyshev_lsq_fit(nt, x, y_obs, w) fit_w_f = chebyshev_polynome(nt, fit_w.low_limit, fit_w.high_limit, fit_w.coefs) fit_nw = chebyshev_lsq_fit.chebyshev_lsq_fit(nt, x, y_obs) fit_nw_f = chebyshev_polynome(nt, fit_nw.low_limit, fit_nw.high_limit, fit_nw.coefs) print print "Coefficients from weighted lsq" print list(fit_w.coefs) print "Coefficients from non-weighted lsq" print list(fit_nw.coefs) assert flex.max(flex.abs(fit_nw.coefs - fit_w.coefs)) > 0
def pseudo_normalized_abs_delta_i(N=100): x = flex.random_double(size=N) x = -0.5*flex.log( 1.0-x ) return(x)
def normal_variate(mu=0.0,sigma=1.0,N=100): "Normal variate via Box-Muller transform" U1 = flex.random_double(size=N) U2 = flex.random_double(size=N) return flex.sqrt(-2.0*flex.log(U1))*flex.cos(2.0*math.pi*U2)*sigma+mu
def normal_variate(mu=0.0, sigma=1.0, N=100): "Normal variate via Box-Muller transform" U1 = flex.random_double(size=N) U2 = flex.random_double(size=N) return flex.sqrt(-2.0 * flex.log(U1)) * flex.cos( 2.0 * math.pi * U2) * sigma + mu
def pseudo_normalized_abs_delta_i(N=100): x = flex.random_double(size=N) x = -0.5 * flex.log(1.0 - x) return (x)
def run_cc(params, reindexing_op, output): uniform, selected_uniform, have_iso_ref = load_cc_data( params, reindexing_op, output) NBIN = params.output.n_bins if have_iso_ref: slope, offset, corr_iso, N_iso = correlation(selected_uniform[1], selected_uniform[0], params.include_negatives) print >> output, "C.C. iso is %.1f%% on %d indices" % (100 * corr_iso, N_iso) slope, offset, corr_int, N_int = correlation(selected_uniform[2], selected_uniform[3], params.include_negatives) print >> output, "C.C. int is %.1f%% on %d indices" % (100. * corr_int, N_int) if have_iso_ref: binned_cc_ref, binned_cc_ref_N = binned_correlation( selected_uniform[1], selected_uniform[0], params.include_negatives) #binned_cc_ref.show(f=output) ref_scale = scale_factor(selected_uniform[1], selected_uniform[0], weights=flex.pow(selected_uniform[1].sigmas(), -2), use_binning=True) #ref_scale.show(f=output) ref_riso = r1_factor(selected_uniform[1], selected_uniform[0], scale_factor=ref_scale, use_binning=True) #ref_riso.show(f=output) ref_scale_all = scale_factor(selected_uniform[1], selected_uniform[0], weights=flex.pow( selected_uniform[1].sigmas(), -2)) ref_riso_all = r1_factor(selected_uniform[1], selected_uniform[0], scale_factor=ref_scale_all) binned_cc_int, binned_cc_int_N = binned_correlation( selected_uniform[2], selected_uniform[3], params.include_negatives) #binned_cc_int.show(f=output) oe_scale = scale_factor( selected_uniform[2], selected_uniform[3], weights=flex.pow(selected_uniform[2].sigmas(), -2) + flex.pow(selected_uniform[3].sigmas(), -2), use_binning=True) #oe_scale.show(f=output) oe_rint = r1_factor(selected_uniform[2], selected_uniform[3], scale_factor=oe_scale, use_binning=True) #oe_rint.show(f=output) oe_rsplit = r_split(selected_uniform[2], selected_uniform[3], use_binning=True) oe_scale_all = scale_factor( selected_uniform[2], selected_uniform[3], weights=flex.pow(selected_uniform[2].sigmas(), -2) + flex.pow(selected_uniform[3].sigmas(), -2), ) oe_rint_all = r1_factor(selected_uniform[2], selected_uniform[3], scale_factor=oe_scale_all) oe_rsplit_all = r_split(selected_uniform[2], selected_uniform[3]) if have_iso_ref: print >> output, "R factors Riso = %.1f%%, Rint = %.1f%%" % ( 100. * ref_riso_all, 100. * oe_rint_all) else: print >> output, "R factor Rint = %.1f%%" % (100. * oe_rint_all) split_sigma_data = split_sigma_test(selected_uniform[2], selected_uniform[3], scale=oe_scale, use_binning=True, show_plot=False) split_sigma_data_all = split_sigma_test(selected_uniform[2], selected_uniform[3], scale=oe_scale_all, use_binning=False, show_plot=False) print >> output if reindexing_op == "h,k,l": print >> output, "Table of Scaling Results:" else: print >> output, "Table of Scaling Results Reindexing as %s:" % reindexing_op from libtbx import table_utils table_header = [ "", "", "", "CC", " N", "CC", " N", "R", "R", "R", "Scale", "Scale", "SpSig" ] table_header2 = [ "Bin", "Resolution Range", "Completeness", "int", "int", "iso", "iso", "int", "split", "iso", "int", "iso", "Test" ] table_data = [] table_data.append(table_header) table_data.append(table_header2) items = binned_cc_int.binner.range_used() # XXX Make it clear what the completeness here actually is! cumulative_counts_given = 0 cumulative_counts_complete = 0 for bin in items: table_row = [] table_row.append("%3d" % bin) table_row.append("%-13s" % binned_cc_int.binner.bin_legend(i_bin=bin, show_bin_number=False, show_bin_range=False, show_d_range=True, show_counts=False)) table_row.append("%13s" % binned_cc_int.binner.bin_legend(i_bin=bin, show_bin_number=False, show_bin_range=False, show_d_range=False, show_counts=True)) cumulative_counts_given += binned_cc_int.binner._counts_given[bin] cumulative_counts_complete += binned_cc_int.binner._counts_complete[ bin] table_row.append("%.1f%%" % (100. * binned_cc_int.data[bin])) table_row.append("%7d" % (binned_cc_int_N.data[bin])) if have_iso_ref and binned_cc_ref.data[bin] is not None: table_row.append("%.1f%%" % (100 * binned_cc_ref.data[bin])) else: table_row.append("--") if have_iso_ref and binned_cc_ref_N.data[bin] is not None: table_row.append("%6d" % (binned_cc_ref_N.data[bin])) else: table_row.append("--") if oe_rint.data[bin] is not None: table_row.append("%.1f%%" % (100. * oe_rint.data[bin])) else: table_row.append("--") if oe_rsplit.data[bin] is not None: table_row.append("%.1f%%" % (100 * oe_rsplit.data[bin])) else: table_row.append("--") if have_iso_ref and ref_riso.data[bin] is not None: table_row.append("%.1f%%" % (100 * ref_riso.data[bin])) else: table_row.append("--") if oe_scale.data[bin] is not None: table_row.append("%.3f" % oe_scale.data[bin]) else: table_row.append("--") if have_iso_ref and ref_scale.data[bin] is not None: table_row.append("%.3f" % ref_scale.data[bin]) else: table_row.append("--") if split_sigma_data.data[bin] is not None: table_row.append("%.4f" % split_sigma_data.data[bin]) else: table_row.append("--") table_data.append(table_row) table_data.append([""] * len(table_header)) table_row = [ format_value("%3s", "All"), format_value("%-13s", " "), format_value( "%13s", "[%d/%d]" % (cumulative_counts_given, cumulative_counts_complete)), format_value("%.1f%%", 100 * corr_int), format_value("%7d", N_int) ] if have_iso_ref: table_row.extend( (format_value("%.1f%%", 100 * corr_iso), format_value("%6d", N_iso))) else: table_row.extend(("--", "--")) table_row.extend((format_value("%.1f%%", 100 * oe_rint_all), format_value("%.1f%%", 100 * oe_rsplit_all))) if have_iso_ref: table_row.append(format_value("%.1f%%", 100 * ref_riso_all)) else: table_row.append("--") table_row.append(format_value("%.3f", oe_scale_all)) if have_iso_ref: table_row.append(format_value("%.3f", ref_scale_all)) else: table_row.append("--") if split_sigma_data_all is not None: table_row.append("%.1f" % split_sigma_data_all) else: table_row.append("--") table_data.append(table_row) print >> output print >> output, table_utils.format(table_data, has_header=2, justify='center', delim=" ") print >> output, """CCint is the CC-1/2 defined by Diederichs; correlation between odd/even images. Similarly, Scale int and R int are the scaling factor and scaling R factor between odd/even images. "iso" columns compare the whole XFEL dataset to the isomorphous reference.""" print >> output, """Niso: result vs. reference common set""", if params.include_negatives: print >> output, """including negative merged intensities (set by phil parameter).""" elif params.scaling.log_cutoff is None: print >> output else: print >> output, """with intensites < %7.2g filtered out (controlled by scaling.log_cutoff phil parameter set to %5.1f)""" % (math.exp( params.scaling.log_cutoff), params.scaling.log_cutoff) if have_iso_ref: assert N_iso == flex.sum( flex.double([x for x in binned_cc_ref_N.data if x is not None])) assert N_int == flex.sum( flex.double([x for x in binned_cc_int_N.data if x is not None])) if params.scaling.show_plots: from matplotlib import pyplot as plt plt.plot(flex.log(selected_uniform[-2].data()), flex.log(selected_uniform[-1].data()), 'r.') plt.show() if have_iso_ref: plt.plot(flex.log(selected_uniform[0].data()), flex.log(selected_uniform[1].data()), 'r.') plt.show() print >> output
def score_by_volume(self, reverse=False): # smaller volume = better volumes = flex.double(s.crystal.get_unit_cell().volume() for s in self.all_solutions) score = flex.log(volumes) / math.log(2) return self.volume_weight * (score - flex.min(score))
def exercise_gaussian_fit(): # test fitting of a gaussian def do_gaussian_fit(scale, mu, sigma): start = mu - 6 * sigma stop = mu + 6 * sigma step = (stop - start)/1000 x = flex.double(frange(start, stop, step)) y = scale * flex.exp(-flex.pow2(x - mu) / (2 * sigma**2)) fit = curve_fitting.single_gaussian_fit(x, y) assert approx_equal(fit.a, scale, 1e-4) assert approx_equal(fit.b, mu, eps=1e-4) assert approx_equal(fit.c, sigma, eps=1e-4) for i in range(10): scale = random.random() * 1000 sigma = (random.random() + 0.0001) * 10 mu = (-1)**random.randint(0,1) * random.random() * 1000 functor = curve_fitting.gaussian(scale, mu, sigma) start = mu - 6 * sigma stop = mu + 6 * sigma step = (stop - start)/1000 x = flex.double(frange(start, stop, step)) fd_grads = finite_differences(functor, x) assert approx_equal(functor.partial_derivatives(x), fd_grads, 1e-4) do_gaussian_fit(scale, mu, sigma) # if we take the log of a gaussian we can fit a parabola scale = 123 mu = 3.2 sigma = 0.1 x = flex.double(frange(2, 4, 0.01)) y = scale * flex.exp(-flex.pow2(x - mu) / (2 * sigma**2)) # need to be careful to only use values of y > 0 eps = 1e-15 x = flex.double([x[i] for i in range(x.size()) if y[i] > eps]) y = flex.double([y[i] for i in range(y.size()) if y[i] > eps]) fit = curve_fitting.univariate_polynomial_fit(x, flex.log(y), degree=2) c, b, a = fit.params assert approx_equal(mu, -b/(2*a)) assert approx_equal(sigma*sigma, -1/(2*a)) # test multiple gaussian fits gaussians = [curve_fitting.gaussian(0.3989538, 3.7499764, 0.7500268), curve_fitting.gaussian(0.7978957, 6.0000004, 0.5000078)] x = flex.double(frange(0, 10, 0.1)) y = flex.double(x.size()) for i in range(len(gaussians)): g = gaussians[i] scale, mu, sigma = g.a, g.b, g.c y += g(x) starting_gaussians = [ curve_fitting.gaussian(1, 4, 1), curve_fitting.gaussian(1, 5, 1)] fit = curve_fitting.gaussian_fit(x, y, starting_gaussians) for g1, g2 in zip(gaussians, fit.gaussians): assert approx_equal(g1.a, g2.a, eps=1e-4) assert approx_equal(g1.b, g2.b, eps=1e-4) assert approx_equal(g1.c, g2.c, eps=1e-4) # use example of 5-gaussian fit from here: # http://research.stowers-institute.org/efg/R/Statistics/MixturesOfDistributions/index.htm gaussians = [curve_fitting.gaussian(0.10516252, 23.32727, 2.436638), curve_fitting.gaussian(0.46462715, 33.09053, 2.997594), curve_fitting.gaussian(0.29827916, 41.27244, 4.274585), curve_fitting.gaussian(0.08986616, 51.24468, 5.077521), curve_fitting.gaussian(0.04206501, 61.31818, 7.070303)] x = flex.double(frange(0, 80, 0.1)) y = flex.double(x.size()) for i in range(len(gaussians)): g = gaussians[i] scale, mu, sigma = g.a, g.b, g.c y += g(x) termination_params = scitbx.lbfgs.termination_parameters( min_iterations=500) starting_gaussians = [curve_fitting.gaussian(1, 21, 2.1), curve_fitting.gaussian(1, 30, 2.8), curve_fitting.gaussian(1, 40, 2.2), curve_fitting.gaussian(1, 51, 1.2), curve_fitting.gaussian(1, 60, 2.3)] fit = curve_fitting.gaussian_fit( x, y, starting_gaussians, termination_params=termination_params) y_calc = fit.compute_y_calc() assert approx_equal(y, y_calc, eps=1e-2) have_cma_es = libtbx.env.has_module("cma_es") if have_cma_es: fit = curve_fitting.cma_es_minimiser(starting_gaussians, x, y) y_calc = fit.compute_y_calc() assert approx_equal(y, y_calc, eps=5e-2)
def Hw(m): s = m>0 m_ = m m_ = m_.select(s.iselection()) return -flex.sum(m_*flex.log(m_))
def score_by_fraction_indexed(self, reverse=False): # more indexed reflections = better fraction_indexed = flex.double(s.fraction_indexed for s in self.all_solutions) score = flex.log(fraction_indexed) / math.log(2) return self.n_indexed_weight * (-score + flex.max(score))
def calculate_scaling(self, miller_array, convergence_crit_perc=0.01, convergence_reject_perc=97.5, max_iter=20): """Calculate the scaling between two arrays""" assert convergence_reject_perc > 90.0 # Convert to intensities and extract d_star_sq new_miller = miller_array.as_intensity_array() new_kernel = self._kernel_normalisation(miller_array=new_miller) # Calculate new range of d_star_sq d_star_sq_min, d_star_sq_max = self._common_d_star_sq_range( d_star_sq=new_kernel.d_star_sq_array) # Create interpolator for the two arrays (new and reference) interpolator = scale_curves.curve_interpolator(d_star_sq_min, d_star_sq_max, self._npoints) # Interpolate the two curves (use full range of the two array) new_itpl_d_star_sq, new_itpl_mean_I, dummy, dummy = interpolator.interpolate( x_array=new_kernel.d_star_sq_array, y_array=new_kernel.mean_I_array) ref_itpl_d_star_sq, ref_itpl_mean_I, dummy, dummy = interpolator.interpolate( x_array=self.ref_kernel.d_star_sq_array, y_array=self.ref_kernel.mean_I_array) # Initalise convergence loop - begin by scaling over all points selection = flex.bool(self._npoints, True) # Set initial scale factor to small value curr_b = 1e-6 # Percent change between iterations - convergence when delta <convergence_criterion n_iter = 0 # Report in case of error report = Report('Scaling log:', verbose=False) while n_iter < max_iter: report('---') report('ITER: ' + str(n_iter)) if selection.all_eq(False): print("Selection now empty, breaking") break # Run optimisation on the linear scaling lsc = ExponentialScaling(x_values=interpolator.target_x, ref_values=ref_itpl_mean_I, scl_values=new_itpl_mean_I, weights=selection.as_double()) # Calculate scaling B-factor lsc.scaling_b_factor = -0.5 * list(lsc.optimised_values)[0] # Break if fitted to 0 if approx_equal_relatively(0.0, lsc.scaling_b_factor, 1e-6): report('Scaling is approximately 0.0 - stopping') break # Calculate percentage change report('Curr/New: ' + str(curr_b) + '\t' + str(lsc.scaling_b_factor)) delta = abs((curr_b - lsc.scaling_b_factor) / curr_b) report('Delta: ' + str(delta)) if delta < convergence_crit_perc: report('Scaling has converged to within tolerance - stopping') break # Update selection report('Curr Selection Size: ' + str(sum(selection))) ref_diffs = flex.log(lsc.ref_values) - flex.log(lsc.out_values) #abs_diffs = flex.abs(ref_diffs) sel_diffs = ref_diffs.select(selection) rej_val_high = numpy.percentile(sel_diffs, convergence_reject_perc) rej_val_low = numpy.percentile(sel_diffs, 100.0 - convergence_reject_perc) report('Percentile: ' + str(convergence_reject_perc) + '\t<' + str(rej_val_low) + '\t>' + str(rej_val_high)) selection.set_selected(ref_diffs > rej_val_high, False) selection.set_selected(ref_diffs < rej_val_low, False) report('New Selection Size: ' + str(sum(selection))) # Update loop params curr_b = lsc.scaling_b_factor n_iter += 1 lsc.unscaled_ln_rmsd = (flex.log(lsc.ref_values) - flex.log( lsc.scl_values)).norm() / (lsc.ref_values.size()**0.5) lsc.scaled_ln_rmsd = (flex.log(lsc.ref_values) - flex.log( lsc.out_values)).norm() / (lsc.ref_values.size()**0.5) lsc.unscaled_ln_dev = flex.sum( flex.abs(flex.log(lsc.ref_values) - flex.log(lsc.scl_values))) lsc.scaled_ln_dev = flex.sum( flex.abs(flex.log(lsc.ref_values) - flex.log(lsc.out_values))) return lsc
def run_cc(params, reindexing_op, output): uniform, selected_uniform, have_iso_ref = load_cc_data(params, reindexing_op, output) NBIN = params.output.n_bins if have_iso_ref: slope, offset, corr_iso, N_iso = correlation(selected_uniform[1], selected_uniform[0], params.include_negatives) print >> output, "C.C. iso is %.1f%% on %d indices" % (100 * corr_iso, N_iso) slope, offset, corr_int, N_int = correlation(selected_uniform[2], selected_uniform[3], params.include_negatives) print >> output, "C.C. int is %.1f%% on %d indices" % (100.0 * corr_int, N_int) if have_iso_ref: binned_cc_ref, binned_cc_ref_N = binned_correlation( selected_uniform[1], selected_uniform[0], params.include_negatives ) # binned_cc_ref.show(f=output) ref_scale = scale_factor( selected_uniform[1], selected_uniform[0], weights=flex.pow(selected_uniform[1].sigmas(), -2), use_binning=True, ) # ref_scale.show(f=output) ref_riso = r1_factor(selected_uniform[1], selected_uniform[0], scale_factor=ref_scale, use_binning=True) # ref_riso.show(f=output) ref_scale_all = scale_factor( selected_uniform[1], selected_uniform[0], weights=flex.pow(selected_uniform[1].sigmas(), -2) ) ref_riso_all = r1_factor(selected_uniform[1], selected_uniform[0], scale_factor=ref_scale_all) binned_cc_int, binned_cc_int_N = binned_correlation( selected_uniform[2], selected_uniform[3], params.include_negatives ) # binned_cc_int.show(f=output) oe_scale = scale_factor( selected_uniform[2], selected_uniform[3], weights=flex.pow(selected_uniform[2].sigmas(), -2) + flex.pow(selected_uniform[3].sigmas(), -2), use_binning=True, ) # oe_scale.show(f=output) oe_rint = r1_factor(selected_uniform[2], selected_uniform[3], scale_factor=oe_scale, use_binning=True) # oe_rint.show(f=output) oe_rsplit = r_split(selected_uniform[2], selected_uniform[3], use_binning=True) oe_scale_all = scale_factor( selected_uniform[2], selected_uniform[3], weights=flex.pow(selected_uniform[2].sigmas(), -2) + flex.pow(selected_uniform[3].sigmas(), -2), ) oe_rint_all = r1_factor(selected_uniform[2], selected_uniform[3], scale_factor=oe_scale_all) oe_rsplit_all = r_split(selected_uniform[2], selected_uniform[3]) if have_iso_ref: print >> output, "R factors Riso = %.1f%%, Rint = %.1f%%" % (100.0 * ref_riso_all, 100.0 * oe_rint_all) else: print >> output, "R factor Rint = %.1f%%" % (100.0 * oe_rint_all) split_sigma_data = split_sigma_test( selected_uniform[2], selected_uniform[3], scale=oe_scale, use_binning=True, show_plot=False ) split_sigma_data_all = split_sigma_test( selected_uniform[2], selected_uniform[3], scale=oe_scale_all, use_binning=False, show_plot=False ) print >> output if reindexing_op == "h,k,l": print >> output, "Table of Scaling Results:" else: print >> output, "Table of Scaling Results Reindexing as %s:" % reindexing_op from libtbx import table_utils table_header = ["", "", "", "CC", " N", "CC", " N", "R", "R", "R", "Scale", "Scale", "SpSig"] table_header2 = [ "Bin", "Resolution Range", "Completeness", "int", "int", "iso", "iso", "int", "split", "iso", "int", "iso", "Test", ] table_data = [] table_data.append(table_header) table_data.append(table_header2) items = binned_cc_int.binner.range_used() # XXX Make it clear what the completeness here actually is! cumulative_counts_given = 0 cumulative_counts_complete = 0 for bin in items: table_row = [] table_row.append("%3d" % bin) table_row.append( "%-13s" % binned_cc_int.binner.bin_legend( i_bin=bin, show_bin_number=False, show_bin_range=False, show_d_range=True, show_counts=False ) ) table_row.append( "%13s" % binned_cc_int.binner.bin_legend( i_bin=bin, show_bin_number=False, show_bin_range=False, show_d_range=False, show_counts=True ) ) cumulative_counts_given += binned_cc_int.binner._counts_given[bin] cumulative_counts_complete += binned_cc_int.binner._counts_complete[bin] table_row.append("%.1f%%" % (100.0 * binned_cc_int.data[bin])) table_row.append("%7d" % (binned_cc_int_N.data[bin])) if have_iso_ref and binned_cc_ref.data[bin] is not None: table_row.append("%.1f%%" % (100 * binned_cc_ref.data[bin])) else: table_row.append("--") if have_iso_ref and binned_cc_ref_N.data[bin] is not None: table_row.append("%6d" % (binned_cc_ref_N.data[bin])) else: table_row.append("--") if oe_rint.data[bin] is not None: table_row.append("%.1f%%" % (100.0 * oe_rint.data[bin])) else: table_row.append("--") if oe_rsplit.data[bin] is not None: table_row.append("%.1f%%" % (100 * oe_rsplit.data[bin])) else: table_row.append("--") if have_iso_ref and ref_riso.data[bin] is not None: table_row.append("%.1f%%" % (100 * ref_riso.data[bin])) else: table_row.append("--") if oe_scale.data[bin] is not None: table_row.append("%.3f" % oe_scale.data[bin]) else: table_row.append("--") if have_iso_ref and ref_scale.data[bin] is not None: table_row.append("%.3f" % ref_scale.data[bin]) else: table_row.append("--") if split_sigma_data.data[bin] is not None: table_row.append("%.4f" % split_sigma_data.data[bin]) else: table_row.append("--") table_data.append(table_row) table_data.append([""] * len(table_header)) table_row = [ format_value("%3s", "All"), format_value("%-13s", " "), format_value("%13s", "[%d/%d]" % (cumulative_counts_given, cumulative_counts_complete)), format_value("%.1f%%", 100 * corr_int), format_value("%7d", N_int), ] if have_iso_ref: table_row.extend((format_value("%.1f%%", 100 * corr_iso), format_value("%6d", N_iso))) else: table_row.extend(("--", "--")) table_row.extend((format_value("%.1f%%", 100 * oe_rint_all), format_value("%.1f%%", 100 * oe_rsplit_all))) if have_iso_ref: table_row.append(format_value("%.1f%%", 100 * ref_riso_all)) else: table_row.append("--") table_row.append(format_value("%.3f", oe_scale_all)) if have_iso_ref: table_row.append(format_value("%.3f", ref_scale_all)) else: table_row.append("--") if split_sigma_data_all is not None: table_row.append("%.1f" % split_sigma_data_all) else: table_row.append("--") table_data.append(table_row) print >> output print >> output, table_utils.format(table_data, has_header=2, justify="center", delim=" ") print >> output, """CCint is the CC-1/2 defined by Diederichs; correlation between odd/even images. Similarly, Scale int and R int are the scaling factor and scaling R factor between odd/even images. "iso" columns compare the whole XFEL dataset to the isomorphous reference.""" print >> output, """Niso: result vs. reference common set""", if params.include_negatives: print >> output, """including negative merged intensities (set by phil parameter).""" elif params.scaling.log_cutoff is None: print >> output else: print >> output, """with intensites < %7.2g filtered out (controlled by scaling.log_cutoff phil parameter set to %5.1f)""" % ( math.exp(params.scaling.log_cutoff), params.scaling.log_cutoff, ) if have_iso_ref: assert N_iso == flex.sum(flex.double([x for x in binned_cc_ref_N.data if x is not None])) assert N_int == flex.sum(flex.double([x for x in binned_cc_int_N.data if x is not None])) if params.scaling.show_plots: from matplotlib import pyplot as plt plt.plot(flex.log(selected_uniform[-2].data()), flex.log(selected_uniform[-1].data()), "r.") plt.show() if have_iso_ref: plt.plot(flex.log(selected_uniform[0].data()), flex.log(selected_uniform[1].data()), "r.") plt.show() print >> output
def __init__(self, conj_grad=True, weights=None, plot_truth=False, plot=False, sovlerization_maximus=True, *args, **kwargs): solvers.LBFGSsolver.__init__(self, *args, **kwargs) # NOTE: do it with lbfgs=False # ^ brings in Yobs, LA, LB, PA, PB, Nhkl, Ns, Nmeas, Aidx, Gidx # correct because working with logs if self.IAprm_truth is not None: self.IAprm_truth = flex.log(self.IAprm_truth) self.IBprm_truth = flex.log(self.IBprm_truth) self.Gprm_truth = self.Gprm_truth self.x_truth = (self.IAprm_truth.concatenate( self.IBprm_truth)).concatenate(self.Gprm_truth) self.x_init = flex.double(np.ascontiguousarray( self.guess["IAprm"])).concatenate( flex.double(np.ascontiguousarray( self.guess["IBprm"]))).concatenate( flex.double(np.ascontiguousarray(self.guess["Gprm"]))) assert (len(self.x_init) == self.Nhkl * 2 + self.Ns) IAx = flex.log(self.x_init[:self.Nhkl]) IBx = flex.log(self.x_init[self.Nhkl:2 * self.Nhkl]) Gx = self.x_init[2 * self.Nhkl:] self.x_init = IAx.concatenate(IBx) self.x_init = self.x_init.concatenate(Gx) self.counter = 0 # set dummie weights for now if weights is None: self.Wobs = flex.double(np.ones(len(self.Yobs))) else: self.Wobs = weights if plot_truth: try: truth = self.x_truth except AttributeError as error: print(error) truth = None else: truth = None self.helper = eigen_helper(initial_estimates=self.x_init, Nhkl=self.Nhkl, plot=plot, truth=truth) self.helper.eigen_wrapper.conj_grad = conj_grad self.helper.set_basic_data(self.Yobs, self.Wobs, self.Aidx, self.Gidx, self.PA, self.PB, self.LA, self.LB, self.Nhkl, self.Ns) self.helper.restart() if sovlerization_maximus: try: _ = normal_eqns_solving.levenberg_marquardt_iterations_encapsulated_eqns( non_linear_ls=self.helper, n_max_iterations=10000, track_all=True, step_threshold=0.0001) except (KeyboardInterrupt, AssertionError): pass print "End of minimization: Converged", self.helper.counter, "cycles" print self.helper.get_eigen_summary() print "Converged functional: ", self.helper.functional_basic( self.helper.x)
def exercise_gaussian_fit(): # test fitting of a gaussian def do_gaussian_fit(scale, mu, sigma): start = mu - 6 * sigma stop = mu + 6 * sigma step = (stop - start) / 1000 x = flex.double(frange(start, stop, step)) y = scale * flex.exp(-flex.pow2(x - mu) / (2 * sigma**2)) fit = curve_fitting.single_gaussian_fit(x, y) assert approx_equal(fit.a, scale, 1e-4) assert approx_equal(fit.b, mu, eps=1e-4) assert approx_equal(fit.c, sigma, eps=1e-4) for i in range(10): scale = random.random() * 1000 sigma = (random.random() + 0.0001) * 10 mu = (-1)**random.randint(0, 1) * random.random() * 1000 functor = curve_fitting.gaussian(scale, mu, sigma) start = mu - 6 * sigma stop = mu + 6 * sigma step = (stop - start) / 1000 x = flex.double(frange(start, stop, step)) fd_grads = finite_differences(functor, x) assert approx_equal(functor.partial_derivatives(x), fd_grads, 1e-4) do_gaussian_fit(scale, mu, sigma) # if we take the log of a gaussian we can fit a parabola scale = 123 mu = 3.2 sigma = 0.1 x = flex.double(frange(2, 4, 0.01)) y = scale * flex.exp(-flex.pow2(x - mu) / (2 * sigma**2)) # need to be careful to only use values of y > 0 eps = 1e-15 x = flex.double([x[i] for i in range(x.size()) if y[i] > eps]) y = flex.double([y[i] for i in range(y.size()) if y[i] > eps]) fit = curve_fitting.univariate_polynomial_fit(x, flex.log(y), degree=2) c, b, a = fit.params assert approx_equal(mu, -b / (2 * a)) assert approx_equal(sigma * sigma, -1 / (2 * a)) # test multiple gaussian fits gaussians = [ curve_fitting.gaussian(0.3989538, 3.7499764, 0.7500268), curve_fitting.gaussian(0.7978957, 6.0000004, 0.5000078) ] x = flex.double(frange(0, 10, 0.1)) y = flex.double(x.size()) for i in range(len(gaussians)): g = gaussians[i] scale, mu, sigma = g.a, g.b, g.c y += g(x) starting_gaussians = [ curve_fitting.gaussian(1, 4, 1), curve_fitting.gaussian(1, 5, 1) ] fit = curve_fitting.gaussian_fit(x, y, starting_gaussians) for g1, g2 in zip(gaussians, fit.gaussians): assert approx_equal(g1.a, g2.a, eps=1e-4) assert approx_equal(g1.b, g2.b, eps=1e-4) assert approx_equal(g1.c, g2.c, eps=1e-4) # use example of 5-gaussian fit from here: # http://research.stowers-institute.org/efg/R/Statistics/MixturesOfDistributions/index.htm gaussians = [ curve_fitting.gaussian(0.10516252, 23.32727, 2.436638), curve_fitting.gaussian(0.46462715, 33.09053, 2.997594), curve_fitting.gaussian(0.29827916, 41.27244, 4.274585), curve_fitting.gaussian(0.08986616, 51.24468, 5.077521), curve_fitting.gaussian(0.04206501, 61.31818, 7.070303) ] x = flex.double(frange(0, 80, 0.1)) y = flex.double(x.size()) for i in range(len(gaussians)): g = gaussians[i] scale, mu, sigma = g.a, g.b, g.c y += g(x) termination_params = scitbx.lbfgs.termination_parameters( min_iterations=500) starting_gaussians = [ curve_fitting.gaussian(1, 21, 2.1), curve_fitting.gaussian(1, 30, 2.8), curve_fitting.gaussian(1, 40, 2.2), curve_fitting.gaussian(1, 51, 1.2), curve_fitting.gaussian(1, 60, 2.3) ] fit = curve_fitting.gaussian_fit(x, y, starting_gaussians, termination_params=termination_params) y_calc = fit.compute_y_calc() assert approx_equal(y, y_calc, eps=1e-2) have_cma_es = libtbx.env.has_module("cma_es") if have_cma_es: fit = curve_fitting.cma_es_minimiser(starting_gaussians, x, y) y_calc = fit.compute_y_calc() assert approx_equal(y, y_calc, eps=5e-2)