def runge_phenomenon(self, n=41, nt=35, print_it=False): x_e = 2.0 * (flex.double(xrange(n)) / float(n - 1) - 0.5) y_e = 1 / (1 + x_e * x_e * 25) fit_e = chebyshev_lsq_fit.chebyshev_lsq_fit( nt, x_e, y_e, ) fit_e = chebyshev_polynome(nt, fit_e.low_limit, fit_e.high_limit, fit_e.coefs) x_c = chebyshev_lsq_fit.chebyshev_nodes(n, -1, 1, True) y_c = 1 / (1 + x_c * x_c * 25) fit_c = chebyshev_lsq_fit.chebyshev_lsq_fit( nt, x_c, y_c, ) fit_c = chebyshev_polynome(nt, fit_c.low_limit, fit_c.high_limit, fit_c.coefs) x_plot = 2.0 * (flex.double(xrange(3 * n)) / float(3 * n - 1) - 0.5) y_plot_e = fit_e.f(x_plot) y_plot_c = fit_c.f(x_plot) y_id = 1 / (1 + x_plot * x_plot * 25) if print_it: for x, y, yy, yyy in zip(x_plot, y_id, y_plot_e, y_plot_c): print x, y, yy, yyy
def another_example(np=41,nt=5): x = flex.double( range(np) )/(np-1) y = 0.99*flex.exp(-x*x*0.5) y = -flex.log(1.0/y-1) w = y*y/1.0 d = (flex.random_double(np)-0.5)*w y_obs = y+d y = 1.0/( 1.0 + flex.exp(-y) ) fit_w = chebyshev_lsq_fit.chebyshev_lsq_fit(nt, x, y_obs, w ) fit_w_f = chebyshev_polynome( nt, fit_w.low_limit, fit_w.high_limit, fit_w.coefs) fit_nw = chebyshev_lsq_fit.chebyshev_lsq_fit(nt, x, y_obs) fit_nw_f = chebyshev_polynome( nt, fit_nw.low_limit, fit_nw.high_limit, fit_nw.coefs) print print "Coefficients from weighted lsq" print list( fit_w.coefs ) print "Coefficients from non-weighted lsq" print list( fit_nw.coefs ) assert flex.max( flex.abs(fit_nw.coefs-fit_w.coefs) ) > 0
def runge_phenomenon(self,n=41,nt=35,print_it=False): x_e = 2.0*(flex.double( xrange(n) )/float(n-1)-0.5) y_e = 1/(1+x_e*x_e*25) fit_e = chebyshev_lsq_fit.chebyshev_lsq_fit(nt, x_e, y_e, ) fit_e = chebyshev_polynome( nt, fit_e.low_limit, fit_e.high_limit, fit_e.coefs) x_c = chebyshev_lsq_fit.chebyshev_nodes(n, -1, 1, True) y_c = 1/(1+x_c*x_c*25) fit_c = chebyshev_lsq_fit.chebyshev_lsq_fit(nt, x_c, y_c, ) fit_c = chebyshev_polynome( nt, fit_c.low_limit, fit_c.high_limit, fit_c.coefs) x_plot = 2.0*(flex.double( xrange(3*n) )/float(3*n-1)-0.5) y_plot_e = fit_e.f( x_plot ) y_plot_c = fit_c.f( x_plot ) y_id = 1/(1+x_plot*x_plot*25) if print_it: for x,y,yy,yyy in zip(x_plot,y_id,y_plot_e,y_plot_c): print x,y,yy,yyy
def chebyshev_fit(self, x_obs, y_obs, w_obs, n_terms=None): from scitbx.math import chebyshev_polynome from scitbx.math import chebyshev_lsq_fit if n_terms is None: # determining the number of terms takes much, much longer than the fit n_terms = chebyshev_lsq_fit.cross_validate_to_determine_number_of_terms( x_obs, y_obs, w_obs, min_terms=5, max_terms=20, n_goes=20, n_free=20) self.logger.info("Fitting with %i terms" % n_terms) fit = chebyshev_lsq_fit.chebyshev_lsq_fit(n_terms, x_obs, y_obs, w_obs) self.logger.info("Least Squares residual: %7.6f" % (fit.f)) fit_funct = chebyshev_polynome(n_terms, fit.low_limit, fit.high_limit, fit.coefs) y_fitted = fit_funct.f(x_obs) if 0: # debugging plots from matplotlib import pyplot pyplot.clf() pyplot.plot(x_obs, y_obs) pyplot.plot(x_obs, y_fitted) pyplot.draw() pyplot.show() return y_fitted
def estimate_signal_to_noise(x, y): raise if 1: x, y = interpolate(x, y) #x, y_tr = fourier_filter(x, y) x, y_tr = savitzky_golay_filter(x, y) noise = y - y_tr else: from scitbx.math import chebyshev_polynome from scitbx.math import chebyshev_lsq_fit x_obs, y_obs = x, y w_obs = flex.double(y_obs.size(), 1) w_obs[0] = 1e16 w_obs[-1] = 1e16 ## determining the number of terms takes much, much longer than the fit n_terms = chebyshev_lsq_fit.cross_validate_to_determine_number_of_terms( x_obs, y_obs, w_obs, min_terms=2, max_terms=30, n_goes=20, n_free=20) #n_terms = 7 print "n_terms:", n_terms fit = chebyshev_lsq_fit.chebyshev_lsq_fit(n_terms, x_obs, y_obs, w_obs) fit_funct = chebyshev_polynome( n_terms, fit.low_limit, fit.high_limit, fit.coefs) y_fitted = fit_funct.f(x) y_tr = y_fitted n = y_tr.size() noise = y - y_tr noise_sq = flex.pow2(noise) from xfel.command_line.view_pixel_histograms import sliding_average #sigma_sq = sliding_average(noise_sq, n=31) sigma_sq = sliding_average(noise_sq, n=15) #sigma_sq = sliding_average(sigma_sq) #signal_to_noise = y/flex.sqrt(sigma_sq) import math signal_to_noise = y/math.sqrt(flex.mean(noise_sq[50:200])) #pyplot.plot(noise) #pyplot.plot(x,y) #pyplot.show() offset = 0.2 * flex.max(y) offset = 0 pyplot.plot(x, y, linewidth=2) pyplot.plot(x, offset+y_tr, linewidth=2) pyplot.show() pyplot.plot(x, noise, linewidth=2) #pyplot.plot(x, flex.sqrt(sigma_sq), linewidth=2) #ax2 = pyplot.twinx() #ax2.plot(x, y) pyplot.show() pyplot.plot(x[:375], signal_to_noise[:375]) #pyplot.xlim( #ax2 = pyplot.twinx() #ax2.plot(x, y) pyplot.show()
def another_example(np=41, nt=5): x = flex.double(range(np)) / (np - 1) y = 0.99 * flex.exp(-x * x * 0.5) y = -flex.log(1.0 / y - 1) w = y * y / 1.0 d = (flex.random_double(np) - 0.5) * w y_obs = y + d y = 1.0 / (1.0 + flex.exp(-y)) fit_w = chebyshev_lsq_fit.chebyshev_lsq_fit(nt, x, y_obs, w) fit_w_f = chebyshev_polynome(nt, fit_w.low_limit, fit_w.high_limit, fit_w.coefs) fit_nw = chebyshev_lsq_fit.chebyshev_lsq_fit(nt, x, y_obs) fit_nw_f = chebyshev_polynome(nt, fit_nw.low_limit, fit_nw.high_limit, fit_nw.coefs) print print "Coefficients from weighted lsq" print list(fit_w.coefs) print "Coefficients from non-weighted lsq" print list(fit_nw.coefs) assert flex.max(flex.abs(fit_nw.coefs - fit_w.coefs)) > 0
def chebyshev_fit(self, x_obs, y_obs, w_obs, n_terms=None): from scitbx.math import chebyshev_polynome from scitbx.math import chebyshev_lsq_fit if n_terms is None: # determining the number of terms takes much, much longer than the fit n_terms = chebyshev_lsq_fit.cross_validate_to_determine_number_of_terms( x_obs, y_obs, w_obs, min_terms=5, max_terms=20, n_goes=20, n_free=20) self.logger.info("Fitting with %i terms" %n_terms) fit = chebyshev_lsq_fit.chebyshev_lsq_fit(n_terms, x_obs, y_obs, w_obs) self.logger.info("Least Squares residual: %7.6f" %(fit.f)) fit_funct = chebyshev_polynome( n_terms, fit.low_limit, fit.high_limit, fit.coefs) y_fitted = fit_funct.f(x_obs) if 0: # debugging plots from matplotlib import pyplot pyplot.clf() pyplot.plot(x_obs, y_obs) pyplot.plot(x_obs, y_fitted) pyplot.draw() pyplot.show() return y_fitted
def estimate_signal_to_noise(x, y): raise if 1: x, y = interpolate(x, y) #x, y_tr = fourier_filter(x, y) x, y_tr = savitzky_golay_filter(x, y) noise = y - y_tr else: from scitbx.math import chebyshev_polynome from scitbx.math import chebyshev_lsq_fit x_obs, y_obs = x, y w_obs = flex.double(y_obs.size(), 1) w_obs[0] = 1e16 w_obs[-1] = 1e16 ## determining the number of terms takes much, much longer than the fit n_terms = chebyshev_lsq_fit.cross_validate_to_determine_number_of_terms( x_obs, y_obs, w_obs, min_terms=2, max_terms=30, n_goes=20, n_free=20) #n_terms = 7 print "n_terms:", n_terms fit = chebyshev_lsq_fit.chebyshev_lsq_fit(n_terms, x_obs, y_obs, w_obs) fit_funct = chebyshev_polynome(n_terms, fit.low_limit, fit.high_limit, fit.coefs) y_fitted = fit_funct.f(x) y_tr = y_fitted n = y_tr.size() noise = y - y_tr noise_sq = flex.pow2(noise) from xfel.command_line.view_pixel_histograms import sliding_average #sigma_sq = sliding_average(noise_sq, n=31) sigma_sq = sliding_average(noise_sq, n=15) #sigma_sq = sliding_average(sigma_sq) #signal_to_noise = y/flex.sqrt(sigma_sq) import math signal_to_noise = y / math.sqrt(flex.mean(noise_sq[50:200])) #pyplot.plot(noise) #pyplot.plot(x,y) #pyplot.show() offset = 0.2 * flex.max(y) offset = 0 pyplot.plot(x, y, linewidth=2) pyplot.plot(x, offset + y_tr, linewidth=2) pyplot.show() pyplot.plot(x, noise, linewidth=2) #pyplot.plot(x, flex.sqrt(sigma_sq), linewidth=2) #ax2 = pyplot.twinx() #ax2.plot(x, y) pyplot.show() pyplot.plot(x[:375], signal_to_noise[:375]) #pyplot.xlim( #ax2 = pyplot.twinx() #ax2.plot(x, y) pyplot.show()
def example(): x_obs = (flex.double(range(100)) + 1.0) / 101.0 y_ideal = flex.sin(x_obs * 6.0 * 3.1415) + flex.exp(x_obs) y_obs = y_ideal + (flex.random_double(size=x_obs.size()) - 0.5) * 0.5 w_obs = flex.double(x_obs.size(), 1) print "Trying to determine the best number of terms " print " via cross validation techniques" print n_terms = chebyshev_lsq_fit.cross_validate_to_determine_number_of_terms( x_obs, y_obs, w_obs, min_terms=5, max_terms=20, n_goes=20, n_free=20) print "Fitting with", n_terms, "terms" print fit = chebyshev_lsq_fit.chebyshev_lsq_fit(n_terms, x_obs, y_obs) print "Least Squares residual: %7.6f" % (fit.f) print " R2-value : %7.6f" % (fit.f / flex.sum(y_obs * y_obs)) print fit_funct = chebyshev_polynome(n_terms, fit.low_limit, fit.high_limit, fit.coefs) y_fitted = fit_funct.f(x_obs) abs_deviation = flex.max(flex.abs((y_ideal - y_fitted))) print "Maximum deviation between fitted and error free data:" print " %4.3f" % (abs_deviation) abs_deviation = flex.mean(flex.abs((y_ideal - y_fitted))) print "Mean deviation between fitted and error free data:" print " %4.3f" % (abs_deviation) print abs_deviation = flex.max(flex.abs((y_obs - y_fitted))) print "Maximum deviation between fitted and observed data:" print " %4.3f" % (abs_deviation) abs_deviation = flex.mean(flex.abs((y_obs - y_fitted))) print "Mean deviation between fitted and observed data:" print " %4.3f" % (abs_deviation) print print "Showing 10 points" print " x y_obs y_ideal y_fit" for ii in range(10): print "%6.3f %6.3f %6.3f %6.3f" \ %(x_obs[ii*9], y_obs[ii*9], y_ideal[ii*9], y_fitted[ii*9]) try: from iotbx import data_plots except ImportError: pass else: print "Preparing output for loggraph in a file called" print " chebyshev.loggraph" chebyshev_plot = data_plots.plot_data(plot_title='Chebyshev fitting', x_label='x values', y_label='y values', x_data=x_obs, y_data=y_obs, y_legend='Observed y values', comments='Chebyshev fit') chebyshev_plot.add_data(y_data=y_ideal, y_legend='Error free y values') chebyshev_plot.add_data(y_data=y_fitted, y_legend='Fitted chebyshev approximation') output_logfile = open('chebyshev.loggraph', 'w') f = StringIO() data_plots.plot_data_loggraph(chebyshev_plot, f) output_logfile.write(f.getvalue())
def __init__(self, miller_obs, miller_calc, r_free_flags, kernel_width_free_reflections=None, kernel_width_d_star_cubed=None, kernel_in_bin_centers=False, kernel_on_chebyshev_nodes=True, n_sampling_points=20, n_chebyshev_terms=10, use_sampling_sum_weights=False, make_checks_and_clean_up=True): assert [kernel_width_free_reflections, kernel_width_d_star_cubed].count(None) == 1 self.miller_obs = miller_obs self.miller_calc = abs(miller_calc) self.r_free_flags = r_free_flags self.kernel_width_free_reflections = kernel_width_free_reflections self.kernel_width_d_star_cubed = kernel_width_d_star_cubed self.n_chebyshev_terms = n_chebyshev_terms if make_checks_and_clean_up: self.miller_obs = self.miller_obs.map_to_asu() self.miller_calc = self.miller_calc.map_to_asu() self.r_free_flags = self.r_free_flags.map_to_asu() assert self.r_free_flags.indices().all_eq( self.miller_obs.indices() ) self.miller_calc = self.miller_calc.common_set( self.miller_obs ) assert self.r_free_flags.indices().all_eq( self.miller_calc.indices() ) assert self.miller_obs.is_real_array() if self.miller_obs.is_xray_intensity_array(): self.miller_obs = self.miller_obs.f_sq_as_f() assert self.miller_obs.observation_type() is None or \ self.miller_obs.is_xray_amplitude_array() if self.miller_calc.observation_type() is None: self.miller_calc = self.miller_calc.set_observation_type( self.miller_obs) # get normalized data please self.normalized_obs_f = absolute_scaling.kernel_normalisation( self.miller_obs, auto_kernel=True) self.normalized_obs =self.normalized_obs_f.normalised_miller_dev_eps.f_sq_as_f() self.normalized_calc_f = absolute_scaling.kernel_normalisation( self.miller_calc, auto_kernel=True) self.normalized_calc =self.normalized_calc_f.normalised_miller_dev_eps.f_sq_as_f() # get the 'free data' if(self.r_free_flags.data().count(True) == 0): self.r_free_flags = self.r_free_flags.array( data = ~self.r_free_flags.data()) self.free_norm_obs = self.normalized_obs.select( self.r_free_flags.data() ) self.free_norm_calc= self.normalized_calc.select( self.r_free_flags.data() ) if self.free_norm_obs.data().size() <= 0: raise RuntimeError("No free reflections.") if (self.kernel_width_d_star_cubed is None): self.kernel_width_d_star_cubed=sigmaa_estimator_kernel_width_d_star_cubed( r_free_flags=self.r_free_flags, kernel_width_free_reflections=self.kernel_width_free_reflections) self.sigma_target_functor = ext.sigmaa_estimator( e_obs = self.free_norm_obs.data(), e_calc = self.free_norm_calc.data(), centric = self.free_norm_obs.centric_flags().data(), d_star_cubed = self.free_norm_obs.d_star_cubed().data() , width=self.kernel_width_d_star_cubed) d_star_cubed_overall = self.miller_obs.d_star_cubed().data() self.min_h = flex.min( d_star_cubed_overall ) self.max_h = flex.max( d_star_cubed_overall ) self.h_array = None if (kernel_in_bin_centers): self.h_array = flex.double( range(1,n_sampling_points*2,2) )*( self.max_h-self.min_h)/(n_sampling_points*2)+self.min_h else: self.min_h *= 0.99 self.max_h *= 1.01 if kernel_on_chebyshev_nodes: self.h_array = chebyshev_lsq_fit.chebyshev_nodes( n=n_sampling_points, low=self.min_h, high=self.max_h, include_limits=True) else: self.h_array = flex.double( range(n_sampling_points) )*( self.max_h-self.min_h)/float(n_sampling_points-1.0)+self.min_h assert self.h_array.size() == n_sampling_points self.sigmaa_array = flex.double() self.sigmaa_array.reserve(self.h_array.size()) self.sum_weights = flex.double() self.sum_weights.reserve(self.h_array.size()) for h in self.h_array: stimator = sigmaa_point_estimator(self.sigma_target_functor, h) self.sigmaa_array.append( stimator.sigmaa ) self.sum_weights.append( self.sigma_target_functor.sum_weights(d_star_cubed=h)) # fit a smooth function reparam_sa = -flex.log( 1.0/self.sigmaa_array -1.0 ) if (use_sampling_sum_weights): w_obs = flex.sqrt(self.sum_weights) else: w_obs = None fit_lsq = chebyshev_lsq_fit.chebyshev_lsq_fit( n_terms=self.n_chebyshev_terms, x_obs=self.h_array, y_obs=reparam_sa, w_obs=w_obs) cheb_pol = chebyshev_polynome( self.n_chebyshev_terms, self.min_h, self.max_h, fit_lsq.coefs) def reverse_reparam(values): return 1.0/(1.0 + flex.exp(-values)) self.sigmaa_fitted = reverse_reparam(cheb_pol.f(self.h_array)) self.sigmaa_miller_array = reverse_reparam(cheb_pol.f(d_star_cubed_overall)) assert flex.min(self.sigmaa_miller_array) >= 0 assert flex.max(self.sigmaa_miller_array) <= 1 self.sigmaa_miller_array = self.miller_obs.array(data=self.sigmaa_miller_array) self.alpha = None self.beta = None self.fom_array = None
def __init__(self, miller_array, kernel_width=None, n_bins=23, n_term=13, d_star_sq_low=None, d_star_sq_high=None, auto_kernel=False, number_of_sorted_reflections_for_auto_kernel=50): ## Autokernel is either False, true or a specific integer if kernel_width is None: assert (auto_kernel is not False) if auto_kernel is not False: assert (kernel_width==None) assert miller_array.size()>0 ## intensity arrays please work_array = None if not miller_array.is_real_array(): raise RuntimeError("Please provide real arrays only") ## I might have to change this upper condition if miller_array.is_xray_amplitude_array(): work_array = miller_array.f_as_f_sq() if miller_array.is_xray_intensity_array(): work_array = miller_array.deep_copy() work_array = work_array.set_observation_type(miller_array) ## If type is not intensity or amplitude ## raise an execption please if not miller_array.is_xray_intensity_array(): if not miller_array.is_xray_amplitude_array(): raise RuntimeError("Observation type unknown") ## declare some shorthands I_obs = work_array.data() epsilons = work_array.epsilons().data().as_double() d_star_sq_hkl = work_array.d_spacings().data() d_star_sq_hkl = 1.0/(d_star_sq_hkl*d_star_sq_hkl) ## Set up some limits if d_star_sq_low is None: d_star_sq_low = flex.min(d_star_sq_hkl) if d_star_sq_high is None: d_star_sq_high = flex.max(d_star_sq_hkl) ## A feeble attempt to determine an appropriate kernel width ## that seems to work reasonable in practice self.kernel_width=kernel_width if auto_kernel is not False: ## get the d_star_sq_array and sort it sort_permut = flex.sort_permutation(d_star_sq_hkl) ## if auto_kernel==True: number=number_of_sorted_reflections_for_auto_kernel else: number=int(auto_kernel) if number > d_star_sq_hkl.size(): number = d_star_sq_hkl.size()-1 self.kernel_width = d_star_sq_hkl[sort_permut[number]]-d_star_sq_low assert self.kernel_width > 0 ## Making the d_star_sq_array assert (n_bins>1) ## assure that there are more then 1 bins for interpolation self.d_star_sq_array = chebyshev_lsq_fit.chebyshev_nodes( n=n_bins, low=d_star_sq_low, high=d_star_sq_high, include_limits=True) ## Now get the average intensity please ## ## This step can be reasonably time consuming self.mean_I_array = scaling.kernel_normalisation( d_star_sq_hkl = d_star_sq_hkl, I_hkl = I_obs, epsilon = epsilons, d_star_sq_array = self.d_star_sq_array, kernel_width = self.kernel_width ) self.var_I_array = scaling.kernel_normalisation( d_star_sq_hkl = d_star_sq_hkl, I_hkl = I_obs*I_obs, epsilon = epsilons*epsilons, d_star_sq_array = self.d_star_sq_array, kernel_width = self.kernel_width ) self.var_I_array = self.var_I_array - self.mean_I_array*self.mean_I_array self.weight_sum = self.var_I_array = scaling.kernel_normalisation( d_star_sq_hkl = d_star_sq_hkl, I_hkl = I_obs*0.0+1.0, epsilon = epsilons*0.0+1.0, d_star_sq_array = self.d_star_sq_array, kernel_width = self.kernel_width ) eps = 1e-16 # XXX Maybe this should be larger? self.bin_selection = (self.mean_I_array > eps) sel_pos = self.bin_selection.iselection() # FIXME rare bug: this crashes when the majority of the data are zero, # e.g. because resolution limit was set too high and F/I filled in with 0. # it would be good to catch such cases in advance by inspecting the binned # values, and raise a different error message. assert sel_pos.size() > 0 if (sel_pos.size() < self.mean_I_array.size() / 2) : raise Sorry("Analysis could not be continued because more than half "+ "of the data have values below 1e-16. This usually indicates either "+ "an inappropriately high resolution cutoff, or an error in the data "+ "file which artificially creates a higher resolution limit.") self.mean_I_array = self.mean_I_array.select(sel_pos) self.d_star_sq_array = self.d_star_sq_array.select(sel_pos) self.var_I_array = flex.log( self.var_I_array.select( sel_pos ) ) self.weight_sum = self.weight_sum.select(sel_pos) self.mean_I_array = flex.log( self.mean_I_array ) ## Fit a chebyshev polynome please normalizer_fit_lsq = chebyshev_lsq_fit.chebyshev_lsq_fit( n_term, self.d_star_sq_array, self.mean_I_array ) self.normalizer = chebyshev_polynome( n_term, d_star_sq_low, d_star_sq_high, normalizer_fit_lsq.coefs) var_lsq_fit = chebyshev_lsq_fit.chebyshev_lsq_fit( n_term, self.d_star_sq_array, self.var_I_array ) self.var_norm = chebyshev_polynome( n_term, d_star_sq_low, d_star_sq_high, var_lsq_fit.coefs) ws_fit = chebyshev_lsq_fit.chebyshev_lsq_fit( n_term, self.d_star_sq_array, self.weight_sum ) self.weight_sum = chebyshev_polynome( n_term, d_star_sq_low, d_star_sq_high, ws_fit.coefs) ## The data wil now be normalised using the ## chebyshev polynome we have just obtained self.mean_I_array = flex.exp( self.mean_I_array) self.normalizer_for_miller_array = flex.exp( self.normalizer.f(d_star_sq_hkl) ) self.var_I_array = flex.exp( self.var_I_array ) self.var_norm = flex.exp( self.var_norm.f(d_star_sq_hkl) ) self.weight_sum = flex.exp( self.weight_sum.f(d_star_sq_hkl)) self.normalised_miller = None self.normalised_miller_dev_eps = None if work_array.sigmas() is not None: self.normalised_miller = work_array.customized_copy( data = work_array.data()/self.normalizer_for_miller_array, sigmas = work_array.sigmas()/self.normalizer_for_miller_array ).set_observation_type(work_array) self.normalised_miller_dev_eps = self.normalised_miller.customized_copy( data = self.normalised_miller.data()/epsilons, sigmas = self.normalised_miller.sigmas()/epsilons)\ .set_observation_type(work_array) else: self.normalised_miller = work_array.customized_copy( data = work_array.data()/self.normalizer_for_miller_array ).set_observation_type(work_array) self.normalised_miller_dev_eps = self.normalised_miller.customized_copy( data = self.normalised_miller.data()/epsilons)\ .set_observation_type(work_array)
def __init__( self, miller_obs, miller_calc, r_free_flags, kernel_width_free_reflections=None, kernel_width_d_star_cubed=None, kernel_in_bin_centers=False, kernel_on_chebyshev_nodes=True, n_sampling_points=20, n_chebyshev_terms=10, use_sampling_sum_weights=False, make_checks_and_clean_up=True, ): assert [kernel_width_free_reflections, kernel_width_d_star_cubed].count(None) == 1 self.miller_obs = miller_obs self.miller_calc = abs(miller_calc) self.r_free_flags = r_free_flags self.kernel_width_free_reflections = kernel_width_free_reflections self.kernel_width_d_star_cubed = kernel_width_d_star_cubed self.n_chebyshev_terms = n_chebyshev_terms if make_checks_and_clean_up: self.miller_obs = self.miller_obs.map_to_asu() self.miller_calc = self.miller_calc.map_to_asu() self.r_free_flags = self.r_free_flags.map_to_asu() assert self.r_free_flags.indices().all_eq(self.miller_obs.indices()) self.miller_calc = self.miller_calc.common_set(self.miller_obs) assert self.r_free_flags.indices().all_eq(self.miller_calc.indices()) assert self.miller_obs.is_real_array() if self.miller_obs.is_xray_intensity_array(): self.miller_obs = self.miller_obs.f_sq_as_f() assert self.miller_obs.observation_type() is None or self.miller_obs.is_xray_amplitude_array() if self.miller_calc.observation_type() is None: self.miller_calc = self.miller_calc.set_observation_type(self.miller_obs) # get normalized data please self.normalized_obs_f = absolute_scaling.kernel_normalisation(self.miller_obs, auto_kernel=True) self.normalized_obs = self.normalized_obs_f.normalised_miller_dev_eps.f_sq_as_f() self.normalized_calc_f = absolute_scaling.kernel_normalisation(self.miller_calc, auto_kernel=True) self.normalized_calc = self.normalized_calc_f.normalised_miller_dev_eps.f_sq_as_f() # get the 'free data' if self.r_free_flags.data().count(True) == 0: self.r_free_flags = self.r_free_flags.array(data=~self.r_free_flags.data()) self.free_norm_obs = self.normalized_obs.select(self.r_free_flags.data()) self.free_norm_calc = self.normalized_calc.select(self.r_free_flags.data()) if self.free_norm_obs.data().size() <= 0: raise RuntimeError("No free reflections.") if self.kernel_width_d_star_cubed is None: self.kernel_width_d_star_cubed = sigmaa_estimator_kernel_width_d_star_cubed( r_free_flags=self.r_free_flags, kernel_width_free_reflections=self.kernel_width_free_reflections ) self.sigma_target_functor = ext.sigmaa_estimator( e_obs=self.free_norm_obs.data(), e_calc=self.free_norm_calc.data(), centric=self.free_norm_obs.centric_flags().data(), d_star_cubed=self.free_norm_obs.d_star_cubed().data(), width=self.kernel_width_d_star_cubed, ) d_star_cubed_overall = self.miller_obs.d_star_cubed().data() self.min_h = flex.min(d_star_cubed_overall) self.max_h = flex.max(d_star_cubed_overall) self.h_array = None if kernel_in_bin_centers: self.h_array = ( flex.double(xrange(1, n_sampling_points * 2, 2)) * (self.max_h - self.min_h) / (n_sampling_points * 2) + self.min_h ) else: self.min_h *= 0.99 self.max_h *= 1.01 if kernel_on_chebyshev_nodes: self.h_array = chebyshev_lsq_fit.chebyshev_nodes( n=n_sampling_points, low=self.min_h, high=self.max_h, include_limits=True ) else: self.h_array = ( flex.double(range(n_sampling_points)) * (self.max_h - self.min_h) / float(n_sampling_points - 1.0) + self.min_h ) assert self.h_array.size() == n_sampling_points self.sigmaa_array = flex.double() self.sigmaa_array.reserve(self.h_array.size()) self.sum_weights = flex.double() self.sum_weights.reserve(self.h_array.size()) for h in self.h_array: stimator = sigmaa_point_estimator(self.sigma_target_functor, h) self.sigmaa_array.append(stimator.sigmaa) self.sum_weights.append(self.sigma_target_functor.sum_weights(d_star_cubed=h)) # fit a smooth function reparam_sa = -flex.log(1.0 / self.sigmaa_array - 1.0) if use_sampling_sum_weights: w_obs = flex.sqrt(self.sum_weights) else: w_obs = None fit_lsq = chebyshev_lsq_fit.chebyshev_lsq_fit( n_terms=self.n_chebyshev_terms, x_obs=self.h_array, y_obs=reparam_sa, w_obs=w_obs ) cheb_pol = chebyshev_polynome(self.n_chebyshev_terms, self.min_h, self.max_h, fit_lsq.coefs) def reverse_reparam(values): return 1.0 / (1.0 + flex.exp(-values)) self.sigmaa_fitted = reverse_reparam(cheb_pol.f(self.h_array)) self.sigmaa_miller_array = reverse_reparam(cheb_pol.f(d_star_cubed_overall)) assert flex.min(self.sigmaa_miller_array) >= 0 assert flex.max(self.sigmaa_miller_array) <= 1 self.sigmaa_miller_array = self.miller_obs.array(data=self.sigmaa_miller_array) self.alpha = None self.beta = None self.fom_array = None
def __init__(self, miller_array, kernel_width=None, n_bins=23, n_term=13, d_star_sq_low=None, d_star_sq_high=None, auto_kernel=False, number_of_sorted_reflections_for_auto_kernel=50): ## Autokernel is either False, true or a specific integer if kernel_width is None: assert (auto_kernel is not False) if auto_kernel is not False: assert (kernel_width == None) assert miller_array.size() > 0 ## intensity arrays please work_array = None if not miller_array.is_real_array(): raise RuntimeError("Please provide real arrays only") ## I might have to change this upper condition if miller_array.is_xray_amplitude_array(): work_array = miller_array.f_as_f_sq() if miller_array.is_xray_intensity_array(): work_array = miller_array.deep_copy() work_array = work_array.set_observation_type(miller_array) ## If type is not intensity or amplitude ## raise an execption please if not miller_array.is_xray_intensity_array(): if not miller_array.is_xray_amplitude_array(): raise RuntimeError("Observation type unknown") ## declare some shorthands I_obs = work_array.data() epsilons = work_array.epsilons().data().as_double() d_star_sq_hkl = work_array.d_spacings().data() d_star_sq_hkl = 1.0 / (d_star_sq_hkl * d_star_sq_hkl) ## Set up some limits if d_star_sq_low is None: d_star_sq_low = flex.min(d_star_sq_hkl) if d_star_sq_high is None: d_star_sq_high = flex.max(d_star_sq_hkl) ## A feeble attempt to determine an appropriate kernel width ## that seems to work reasonable in practice self.kernel_width = kernel_width if auto_kernel is not False: ## get the d_star_sq_array and sort it sort_permut = flex.sort_permutation(d_star_sq_hkl) ## if auto_kernel == True: number = number_of_sorted_reflections_for_auto_kernel else: number = int(auto_kernel) if number > d_star_sq_hkl.size(): number = d_star_sq_hkl.size() - 1 self.kernel_width = d_star_sq_hkl[ sort_permut[number]] - d_star_sq_low assert self.kernel_width > 0 ## Making the d_star_sq_array assert (n_bins > 1 ) ## assure that there are more then 1 bins for interpolation self.d_star_sq_array = chebyshev_lsq_fit.chebyshev_nodes( n=n_bins, low=d_star_sq_low, high=d_star_sq_high, include_limits=True) ## Now get the average intensity please ## ## This step can be reasonably time consuming self.mean_I_array = scaling.kernel_normalisation( d_star_sq_hkl=d_star_sq_hkl, I_hkl=I_obs, epsilon=epsilons, d_star_sq_array=self.d_star_sq_array, kernel_width=self.kernel_width) self.var_I_array = scaling.kernel_normalisation( d_star_sq_hkl=d_star_sq_hkl, I_hkl=I_obs * I_obs, epsilon=epsilons * epsilons, d_star_sq_array=self.d_star_sq_array, kernel_width=self.kernel_width) self.var_I_array = self.var_I_array - self.mean_I_array * self.mean_I_array self.weight_sum = self.var_I_array = scaling.kernel_normalisation( d_star_sq_hkl=d_star_sq_hkl, I_hkl=I_obs * 0.0 + 1.0, epsilon=epsilons * 0.0 + 1.0, d_star_sq_array=self.d_star_sq_array, kernel_width=self.kernel_width) eps = 1e-16 # XXX Maybe this should be larger? self.bin_selection = (self.mean_I_array > eps) sel_pos = self.bin_selection.iselection() # FIXME rare bug: this crashes when the majority of the data are zero, # e.g. because resolution limit was set too high and F/I filled in with 0. # it would be good to catch such cases in advance by inspecting the binned # values, and raise a different error message. assert sel_pos.size() > 0 if (sel_pos.size() < self.mean_I_array.size() / 2): raise Sorry( "Analysis could not be continued because more than half " + "of the data have values below 1e-16. This usually indicates either " + "an inappropriately high resolution cutoff, or an error in the data " + "file which artificially creates a higher resolution limit.") self.mean_I_array = self.mean_I_array.select(sel_pos) self.d_star_sq_array = self.d_star_sq_array.select(sel_pos) self.var_I_array = flex.log(self.var_I_array.select(sel_pos)) self.weight_sum = self.weight_sum.select(sel_pos) self.mean_I_array = flex.log(self.mean_I_array) ## Fit a chebyshev polynome please normalizer_fit_lsq = chebyshev_lsq_fit.chebyshev_lsq_fit( n_term, self.d_star_sq_array, self.mean_I_array) self.normalizer = chebyshev_polynome(n_term, d_star_sq_low, d_star_sq_high, normalizer_fit_lsq.coefs) var_lsq_fit = chebyshev_lsq_fit.chebyshev_lsq_fit( n_term, self.d_star_sq_array, self.var_I_array) self.var_norm = chebyshev_polynome(n_term, d_star_sq_low, d_star_sq_high, var_lsq_fit.coefs) ws_fit = chebyshev_lsq_fit.chebyshev_lsq_fit(n_term, self.d_star_sq_array, self.weight_sum) self.weight_sum = chebyshev_polynome(n_term, d_star_sq_low, d_star_sq_high, ws_fit.coefs) ## The data wil now be normalised using the ## chebyshev polynome we have just obtained self.mean_I_array = flex.exp(self.mean_I_array) self.normalizer_for_miller_array = flex.exp( self.normalizer.f(d_star_sq_hkl)) self.var_I_array = flex.exp(self.var_I_array) self.var_norm = flex.exp(self.var_norm.f(d_star_sq_hkl)) self.weight_sum = flex.exp(self.weight_sum.f(d_star_sq_hkl)) self.normalised_miller = None self.normalised_miller_dev_eps = None if work_array.sigmas() is not None: self.normalised_miller = work_array.customized_copy( data=work_array.data() / self.normalizer_for_miller_array, sigmas=work_array.sigmas() / self.normalizer_for_miller_array).set_observation_type( work_array) self.normalised_miller_dev_eps = self.normalised_miller.customized_copy( data = self.normalised_miller.data()/epsilons, sigmas = self.normalised_miller.sigmas()/epsilons)\ .set_observation_type(work_array) else: self.normalised_miller = work_array.customized_copy( data=work_array.data() / self.normalizer_for_miller_array).set_observation_type( work_array) self.normalised_miller_dev_eps = self.normalised_miller.customized_copy( data = self.normalised_miller.data()/epsilons)\ .set_observation_type(work_array)
def example(): x_obs = (flex.double(range(100))+1.0)/101.0 y_ideal = flex.sin(x_obs*6.0*3.1415) + flex.exp(x_obs) y_obs = y_ideal + (flex.random_double(size=x_obs.size())-0.5)*0.5 w_obs = flex.double(x_obs.size(),1) print "Trying to determine the best number of terms " print " via cross validation techniques" print n_terms = chebyshev_lsq_fit.cross_validate_to_determine_number_of_terms( x_obs,y_obs,w_obs, min_terms=5 ,max_terms=20, n_goes=20,n_free=20) print "Fitting with", n_terms, "terms" print fit = chebyshev_lsq_fit.chebyshev_lsq_fit(n_terms,x_obs,y_obs) print "Least Squares residual: %7.6f" %(fit.f) print " R2-value : %7.6f" %(fit.f/flex.sum(y_obs*y_obs)) print fit_funct = chebyshev_polynome( n_terms, fit.low_limit, fit.high_limit, fit.coefs) y_fitted = fit_funct.f(x_obs) abs_deviation = flex.max( flex.abs( (y_ideal- y_fitted) ) ) print "Maximum deviation between fitted and error free data:" print " %4.3f" %(abs_deviation) abs_deviation = flex.mean( flex.abs( (y_ideal- y_fitted) ) ) print "Mean deviation between fitted and error free data:" print " %4.3f" %(abs_deviation) print abs_deviation = flex.max( flex.abs( (y_obs- y_fitted) ) ) print "Maximum deviation between fitted and observed data:" print " %4.3f" %(abs_deviation) abs_deviation = flex.mean( flex.abs( (y_obs- y_fitted) ) ) print "Mean deviation between fitted and observed data:" print " %4.3f" %(abs_deviation) print print "Showing 10 points" print " x y_obs y_ideal y_fit" for ii in range(10): print "%6.3f %6.3f %6.3f %6.3f" \ %(x_obs[ii*9], y_obs[ii*9], y_ideal[ii*9], y_fitted[ii*9]) try: from iotbx import data_plots except ImportError: pass else: print "Preparing output for loggraph in a file called" print " chebyshev.loggraph" chebyshev_plot = data_plots.plot_data(plot_title='Chebyshev fitting', x_label = 'x values', y_label = 'y values', x_data = x_obs, y_data = y_obs, y_legend = 'Observed y values', comments = 'Chebyshev fit') chebyshev_plot.add_data(y_data=y_ideal, y_legend='Error free y values') chebyshev_plot.add_data(y_data=y_fitted, y_legend='Fitted chebyshev approximation') output_logfile=open('chebyshev.loggraph','w') f = StringIO() data_plots.plot_data_loggraph(chebyshev_plot,f) output_logfile.write(f.getvalue())