def test_twin_r_value(twin_operator): miller_array = random_data(35).map_to_asu() miller_array = miller_array.f_as_f_sq() for twin_fraction, expected_r_abs, expected_r_sq in zip( [0, 0.1, 0.2, 0.3, 0.4, 0.5], [0.50, 0.40, 0.30, 0.20, 0.10, 0.0], [0.333, 0.213, 0.120, 0.0533, 0.0133, 0.00]): cb_op = sgtbx.change_of_basis_op(twin_operator) miller_array_mod, miller_array_twin = miller_array.common_sets( miller_array.change_basis(cb_op).map_to_asu()) twinned_miller = miller_array_mod.customized_copy( data = (1.0-twin_fraction)*miller_array_mod.data() + twin_fraction*miller_array_twin.data(), sigmas = flex.sqrt( flex.pow( ((1.0-twin_fraction)*miller_array_mod.sigmas()),2.0)+\ flex.pow( ((twin_fraction)*miller_array_twin.sigmas()),2.0)) ) twinned_miller.set_observation_type(miller_array.observation_type()) twin_r = scaling.twin_r(twinned_miller.indices(), twinned_miller.data(), twinned_miller.space_group(), twinned_miller.anomalous_flag(), cb_op.c().r().as_double()[0:9]) assert approx_equal(twin_r.r_abs_value(), expected_r_abs, 0.08) assert approx_equal(twin_r.r_sq_value(), expected_r_sq, 0.08)
def skewness_calculation(space_group_info, n_test_points=10, n_sites=20, d_min=3, volume_per_atom=200): structure = random_structure.xray_structure( space_group_info=space_group_info, elements=["Se"]*n_sites, volume_per_atom=volume_per_atom, random_u_iso=True) structure.show_summary() print f_calc = structure.structure_factors( d_min=d_min, anomalous_flag=False).f_calc() f_calc.show_summary() print for i_fudge_factor in xrange(n_test_points+1): fudge_factor = i_fudge_factor/float(n_test_points) randomized_f_calc = randomize_phases(f_calc, fudge_factor) mwpe = f_calc.mean_weighted_phase_error(randomized_f_calc) rho = randomized_f_calc.fft_map().real_map_unpadded() # <(rho-rho_bar)**3>/<(rho-rho_bar)**2>**3/2 rho_rho_bar = rho - flex.mean(rho) num = flex.mean(flex.pow(rho_rho_bar, 3)) den = flex.mean(flex.pow(rho_rho_bar, 2))**(3/2.) assert den != 0 skewness = num / den print "fudge factor, phase difference, map skewness:", print "%4.2f, %5.2f, %.4g" % (fudge_factor, mwpe, skewness) print
def test_twin_r_value(twin_operator): miller_array = random_data(35).map_to_asu() miller_array = miller_array.f_as_f_sq() for twin_fraction, expected_r_abs,expected_r_sq in zip( [0,0.1,0.2,0.3,0.4,0.5], [0.50,0.40,0.30,0.20,0.10,0.0], [0.333,0.213,0.120,0.0533,0.0133,0.00]): cb_op = sgtbx.change_of_basis_op( twin_operator ) miller_array_mod, miller_array_twin = miller_array.common_sets( miller_array.change_basis( cb_op ).map_to_asu() ) twinned_miller = miller_array_mod.customized_copy( data = (1.0-twin_fraction)*miller_array_mod.data() + twin_fraction*miller_array_twin.data(), sigmas = flex.sqrt( flex.pow( ((1.0-twin_fraction)*miller_array_mod.sigmas()),2.0)+\ flex.pow( ((twin_fraction)*miller_array_twin.sigmas()),2.0)) ) twinned_miller.set_observation_type( miller_array.observation_type()) twin_r = scaling.twin_r( twinned_miller.indices(), twinned_miller.data(), twinned_miller.space_group(), twinned_miller.anomalous_flag(), cb_op.c().r().as_double()[0:9] ) assert approx_equal(twin_r.r_abs_value(), expected_r_abs, 0.08) assert approx_equal(twin_r.r_sq_value(), expected_r_sq, 0.08)
def skewness_calculation(space_group_info, n_test_points=10, n_sites=20, d_min=3, volume_per_atom=200): structure = random_structure.xray_structure( space_group_info=space_group_info, elements=["Se"] * n_sites, volume_per_atom=volume_per_atom, random_u_iso=True) structure.show_summary() print() f_calc = structure.structure_factors(d_min=d_min, anomalous_flag=False).f_calc() f_calc.show_summary() print() for i_fudge_factor in range(n_test_points + 1): fudge_factor = i_fudge_factor / float(n_test_points) randomized_f_calc = randomize_phases(f_calc, fudge_factor) mwpe = f_calc.mean_weighted_phase_error(randomized_f_calc) rho = randomized_f_calc.fft_map().real_map_unpadded() # <(rho-rho_bar)**3>/<(rho-rho_bar)**2>**3/2 rho_rho_bar = rho - flex.mean(rho) num = flex.mean(flex.pow(rho_rho_bar, 3)) den = flex.mean(flex.pow(rho_rho_bar, 2))**(3 / 2.) assert den != 0 skewness = num / den print("fudge factor, phase difference, map skewness:", end=' ') print("%4.2f, %5.2f, %.4g" % (fudge_factor, mwpe, skewness)) print()
def integration_proper(self): image_obj = self.imagefiles.imageindex(self.frame_numbers[self.image_number]) #image_obj.read() #assume image already read rawdata = image_obj.linearintdata # assume image #1 self.integration_proper_fast(rawdata,self.predicted,self.hkllist,self.detector_xy_draft) self.integrated_data = self.get_integrated_data() self.integrated_sigma= self.get_integrated_sigma() self.integrated_miller=self.get_integrated_miller() self.detector_xy = self.get_detector_xy() self.max_signal = self.get_max_signal() for correction_type in self.horizons_phil.integration.absorption_correction: if correction_type.apply: if correction_type.algorithm=="fuller_kapton": print("Absorption correction with %d reflections to correct"%(len(self.detector_xy))) from cxi_xdr_xes import absorption C = absorption.correction() if correction_type.fuller_kapton.smart_sigmas: self.fuller_kapton_absorption_correction, self.fuller_kapton_absorption_sigmas = C( panel_size_px = (self.inputpd['size1'],self.inputpd['size2']), pixel_size_mm = self.pixel_size, detector_dist_mm = self.inputai.distance(), wavelength_ang = self.inputai.wavelength, BSmasks = self.BSmasks, get_ISmask_function = self.get_ISmask, params = correction_type.fuller_kapton, i_no_skip = self.get_integrated_flag(), calc_sigmas=True ) # apply corrections and propagate error # term1 = (sig(C)/C)^2 # term2 = (sig(Imeas)/Imeas)^2 # I' = C*I # sig^2(I') = (I')^2*(term1 + term2) # sig(I') = sqrt(sig^2(I')) term1 = flex.pow(self.fuller_kapton_absorption_sigmas/self.fuller_kapton_absorption_correction, 2) term2 = flex.pow(self.integrated_sigma/self.integrated_data, 2) self.integrated_data *= self.fuller_kapton_absorption_correction integrated_sigma_squared = flex.pow(self.integrated_data, 2) * (term1 + term2) self.integrated_sigma = flex.sqrt(integrated_sigma_squared) # order is purposeful: the two lines above require that self.integrated_data has already been corrected! else: self.fuller_kapton_absorption_correction = C( panel_size_px = (self.inputpd['size1'],self.inputpd['size2']), pixel_size_mm = self.pixel_size, detector_dist_mm = self.inputai.distance(), wavelength_ang = self.inputai.wavelength, BSmasks = self.BSmasks, get_ISmask_function = self.get_ISmask, params = correction_type.fuller_kapton, i_no_skip = self.get_integrated_flag() ) # apply these corrections now self.integrated_data *= self.fuller_kapton_absorption_correction self.integrated_sigma *= self.fuller_kapton_absorption_correction #self.show_rejected_spots() return # function has been recoded in C++
def integration_proper(self): image_obj = self.imagefiles.imageindex(self.frame_numbers[self.image_number]) #image_obj.read() #assume image already read rawdata = image_obj.linearintdata # assume image #1 self.integration_proper_fast(rawdata,self.predicted,self.hkllist,self.detector_xy_draft) self.integrated_data = self.get_integrated_data() self.integrated_sigma= self.get_integrated_sigma() self.integrated_miller=self.get_integrated_miller() self.detector_xy = self.get_detector_xy() self.max_signal = self.get_max_signal() for correction_type in self.horizons_phil.integration.absorption_correction: if correction_type.apply: if correction_type.algorithm=="fuller_kapton": print "Absorption correction with %d reflections to correct"%(len(self.detector_xy)) from cxi_xdr_xes import absorption C = absorption.correction() if correction_type.fuller_kapton.smart_sigmas: self.fuller_kapton_absorption_correction, self.fuller_kapton_absorption_sigmas = C( panel_size_px = (self.inputpd['size1'],self.inputpd['size2']), pixel_size_mm = self.pixel_size, detector_dist_mm = self.inputai.distance(), wavelength_ang = self.inputai.wavelength, BSmasks = self.BSmasks, get_ISmask_function = self.get_ISmask, params = correction_type.fuller_kapton, i_no_skip = self.get_integrated_flag(), calc_sigmas=True ) # apply corrections and propagate error # term1 = (sig(C)/C)^2 # term2 = (sig(Imeas)/Imeas)^2 # I' = C*I # sig^2(I') = (I')^2*(term1 + term2) # sig(I') = sqrt(sig^2(I')) term1 = flex.pow(self.fuller_kapton_absorption_sigmas/self.fuller_kapton_absorption_correction, 2) term2 = flex.pow(self.integrated_sigma/self.integrated_data, 2) self.integrated_data *= self.fuller_kapton_absorption_correction integrated_sigma_squared = flex.pow(self.integrated_data, 2) * (term1 + term2) self.integrated_sigma = flex.sqrt(integrated_sigma_squared) # order is purposeful: the two lines above require that self.integrated_data has already been corrected! else: self.fuller_kapton_absorption_correction = C( panel_size_px = (self.inputpd['size1'],self.inputpd['size2']), pixel_size_mm = self.pixel_size, detector_dist_mm = self.inputai.distance(), wavelength_ang = self.inputai.wavelength, BSmasks = self.BSmasks, get_ISmask_function = self.get_ISmask, params = correction_type.fuller_kapton, i_no_skip = self.get_integrated_flag() ) # apply these corrections now self.integrated_data *= self.fuller_kapton_absorption_correction self.integrated_sigma *= self.fuller_kapton_absorption_correction #self.show_rejected_spots() return # function has been recoded in C++
def extreme_wilson_outliers(self, p_extreme_wilson=1e-1, return_data=False): n_acentric = self.acentric_work.data().size() n_centric = self.centric_work.data().size() extreme_acentric = 1.0 - \ flex.pow(1.0 - flex.exp(-self.acentric_work.data() ),float(n_acentric)) extreme_centric = 1.0 - \ flex.pow(erf(flex.sqrt(self.centric_work.data()/2.0) ),float(n_centric)) acentric_selection = flex.bool(extreme_acentric > p_extreme_wilson) centric_selection = flex.bool(extreme_centric > p_extreme_wilson) all_flags = self.work_obs.customized_copy( indices=self.acentric_work.indices().concatenate( self.centric_work.indices()), data=acentric_selection.concatenate(centric_selection)) all_p_values = self.work_obs.customized_copy( indices=self.acentric_work.indices().concatenate( self.centric_work.indices()), data=extreme_acentric.concatenate(extreme_centric)) all_flags = all_flags.common_set(self.miller_obs) all_p_values = all_p_values.common_set(self.miller_obs) log_string = """ Outlier rejection based on extreme value Wilson statistics. ----------------------------------------------------------- Reflections whose normalized intensity have an associated p-value lower than %s are flagged as possible outliers. The p-value is obtained using extreme value distributions of the Wilson distribution. """ % (p_extreme_wilson) log_string = self.make_log_wilson(log_string, all_flags, all_p_values) print >> self.out print >> self.out, log_string print >> self.out if not return_data: return all_flags else: return self.miller_obs.select(all_flags.data())
def extreme_wilson_outliers(self, p_extreme_wilson=1e-1, return_data=False): n_acentric = self.acentric_work.data().size() n_centric = self.centric_work.data().size() extreme_acentric = 1.0 - flex.pow(1.0 - flex.exp(-self.acentric_work.data()), float(n_acentric)) extreme_centric = 1.0 - flex.pow(erf(flex.sqrt(self.centric_work.data() / 2.0)), float(n_centric)) acentric_selection = flex.bool(extreme_acentric > p_extreme_wilson) centric_selection = flex.bool(extreme_centric > p_extreme_wilson) all_flags = self.work_obs.customized_copy( indices=self.acentric_work.indices().concatenate(self.centric_work.indices()), data=acentric_selection.concatenate(centric_selection), ) all_p_values = self.work_obs.customized_copy( indices=self.acentric_work.indices().concatenate(self.centric_work.indices()), data=extreme_acentric.concatenate(extreme_centric), ) all_flags = all_flags.common_set(self.miller_obs) all_p_values = all_p_values.common_set(self.miller_obs) log_string = """ Outlier rejection based on extreme value Wilson statistics. ----------------------------------------------------------- Reflections whose normalized intensity have an associated p-value lower than %s are flagged as possible outliers. The p-value is obtained using extreme value distributions of the Wilson distribution. """ % ( p_extreme_wilson ) log_string = self.make_log_wilson(log_string, all_flags, all_p_values) print >>self.out print >>self.out, log_string print >>self.out if not return_data: return all_flags else: return self.miller_obs.select(all_flags.data())
def sigmaa_model_error(self): x = 0.25*flex.pow( self.h_array, 2.0/3.0 ) # h was in d*^-3 !!! y = flex.log( self.sigmaa_fitted ) #compute the slope please result = flex.linear_regression( x, y ) result = -(result.slope()/math.pi*3) if result < 0: result = None else: result = math.sqrt( result ) return result
def task_a(): # add an anchor if self.params.modify.cosym.anchor: from xfel.merging.application.model.crystal_model import crystal_model XM = crystal_model(params=self.params, purpose="cosym") model_intensities = XM.run([], []) from dxtbx.model import Experiment, Crystal from scitbx.matrix import sqr O = sqr(model_intensities.unit_cell().orthogonalization_matrix( )).transpose().elems real_a = (O[0], O[1], O[2]) real_b = (O[3], O[4], O[5]) real_c = (O[6], O[7], O[8]) nc = Crystal(real_a, real_b, real_c, model_intensities.space_group()) sampling_experiments_for_cosym.append( Experiment(crystal=nc) ) # prepends the reference model to the cosym E-list from dials.array_family import flex exp_reflections = flex.reflection_table() exp_reflections[ 'intensity.sum.value'] = model_intensities.data() exp_reflections['intensity.sum.variance'] = flex.pow( model_intensities.sigmas(), 2) exp_reflections['miller_index'] = model_intensities.indices() exp_reflections[ 'miller_index_asymmetric'] = model_intensities.indices() exp_reflections['flags'] = flex.size_t( model_intensities.size(), flex.reflection_table.flags.integrated_sum) # prepare individual reflection tables for each experiment simple_experiment_id = len(sampling_experiments_for_cosym) - 1 #experiment.identifier = "%d"%simple_experiment_id sampling_experiments_for_cosym[ -1].identifier = "%d" % simple_experiment_id # experiment identifier must be a string according to *.h file # the identifier is changed on the _for_cosym Experiment list, not the master experiments for through analysis exp_reflections['id'] = flex.int(len(exp_reflections), simple_experiment_id) # register the integer id as a new column in the per-experiment reflection table exp_reflections.experiment_identifiers( )[simple_experiment_id] = sampling_experiments_for_cosym[ -1].identifier #apparently the reflection table holds a map from integer id (reflection table) to string id (experiment) sampling_reflections_for_cosym.append(exp_reflections)
def __init__(self, she_object, observed_data): # we'll only optimize the scale factor and form factor of excluded solvent self.rm = 1.62 self.rho=0.334 self.drho=0.03 self.obs = observed_data self.she_object = she_object self.rm_fluct_scale = -(4.0*math.pi/3.0)**1.5*math.pi*flex.pow(self.obs.q,2.0)*self.rm**2.0 ### setup the scan range ### self.default_a = 1.0 self.a_range=flex.double(range(-10,11))/50.0+self.default_a self.drho_range = (flex.double(range(-10,21))/10.0+1.0)*self.drho self.scan()
def calcskew(self, map_coeff, iparams): real_map = map_coeff.fft_map().real_map() ed_limit = iparams.fit_params.ed_sigma_thres*real_map.sample_standard_deviation() ed_limit_up=ed_limit ed_limit_dn=ed_limit*-1 #truncate the electron density beyond sigma limit real_map.set_selected(real_map>ed_limit_up,ed_limit_up) real_map.set_selected(real_map<ed_limit_dn,ed_limit_dn) skew = flex.mean(flex.pow(real_map,3))/pow(flex.mean_sq(real_map),3/2) return skew
def nth_power_scale(dataarray, nth_power): """ set nth_power to appropriate number between 0 and 1 for dampening the difference between the smallest and the largest values. If nth_power < 0 then an automatic value is computed that maps the smallest values to 0.1 of the largest values """ absdat = flex.abs(dataarray).as_double() absdat2 = graphics_utils.NoNansArray(absdat) # much faster than flex.double([e for e in absdat if not math.isnan(e)]) maxdat = flex.max(absdat2) mindat = max(1e-10*maxdat, flex.min(absdat2) ) # only autoscale for sensible values of maxdat and mindat if nth_power < 0.0 and maxdat > mindat : # amounts to automatic scale nth_power = math.log(0.2)/(math.log(mindat) - math.log(maxdat)) datascaled = flex.pow(absdat, nth_power) return datascaled, nth_power
def nth_power_scale(dataarray, nth_power): """ set nth_power to a number for dampening or enhancing the difference between the smallest and the largest values. A negative number means that a large data value is rendered with a smaller radius than a small data value. For nth_power=0 all data values are rendered with the same radius For nth_power=1 data values are rendered with radii proportional to the data values. If nth_power=NaN then an automatic value is computed that maps the smallest values to 0.1 of the largest values """ absdat = flex.abs(dataarray).as_double() absdat2 = graphics_utils.NoNansArray( absdat ) # much faster than flex.double([e for e in absdat if not math.isnan(e)]) maxdat = flex.max(absdat2) mindat = max(1e-10 * maxdat, flex.min(absdat2)) # only autoscale for sensible values of maxdat and mindat if math.isnan(nth_power) and maxdat > mindat: # amounts to automatic scale nth_power = math.log(0.2) / (math.log(mindat) - math.log(maxdat)) datascaled = flex.pow(absdat, nth_power) return datascaled, nth_power
def task_a(params): # add an anchor sampling_experiments_for_cosym = ExperimentList() sampling_reflections_for_cosym = [] if params.modify.cosym.anchor: from xfel.merging.application.model.crystal_model import crystal_model #P = Timer("construct the anchor reference model") XM = crystal_model(params=params, purpose="cosym") model_intensities = XM.run([], []) #del P from dxtbx.model import Experiment, Crystal from scitbx.matrix import sqr O = sqr(model_intensities.unit_cell().orthogonalization_matrix() ).transpose().elems real_a = (O[0], O[1], O[2]) real_b = (O[3], O[4], O[5]) real_c = (O[6], O[7], O[8]) nc = Crystal(real_a, real_b, real_c, model_intensities.space_group()) sampling_experiments_for_cosym.append( Experiment(crystal=nc) ) # prepends the reference model to the cosym E-list from dials.array_family import flex exp_reflections = flex.reflection_table() exp_reflections['intensity.sum.value'] = model_intensities.data() exp_reflections['intensity.sum.variance'] = flex.pow( model_intensities.sigmas(), 2) exp_reflections['miller_index'] = model_intensities.indices() exp_reflections[ 'miller_index_asymmetric'] = model_intensities.indices() exp_reflections['flags'] = flex.size_t( model_intensities.size(), flex.reflection_table.flags.integrated_sum) # prepare individual reflection tables for each experiment cosym.experiment_id_detail(sampling_experiments_for_cosym, sampling_reflections_for_cosym, exp_reflections) return sampling_experiments_for_cosym, sampling_reflections_for_cosym
def exercise_bins(): uc = uctbx.unit_cell((11,11,13,90,90,120)) sg_type = sgtbx.space_group_type("P 3 2 1") anomalous_flag = False d_min = 1 m = miller.index_generator(uc, sg_type, anomalous_flag, d_min).to_array() f = flex.double() for i in range(m.size()): f.append(random.random()) n_bins = 10 b = miller.binning(uc, n_bins, 0, d_min) b = miller.binning(uc, n_bins, 0, d_min, 1.e-6) b = miller.binning(uc, n_bins, m) b = miller.binning(uc, n_bins, m, 0) b = miller.binning(uc, n_bins, m, 0, d_min) b = miller.binning(uc, n_bins, m, 0, d_min, 1.e-6) assert b.d_max() == -1 assert approx_equal(b.d_min(), d_min) assert b.bin_d_range(0) == (-1,-1) assert approx_equal(b.bin_d_range(1), (-1,2.1544336)) assert approx_equal(b.bin_d_range(b.n_bins_all()-1), (1,-1)) d_star_sq = 0.5 r = b.bin_d_range(b.get_i_bin(d_star_sq)) d = 1/math.sqrt(d_star_sq) assert r[1] <= d <= r[0] h = (3,4,5) r = b.bin_d_range(b.get_i_bin(h)) assert r[1] <= uc.d(h) <= r[0] # a quick test to excercise d-spacings on fractional Miller indices: assert approx_equal( uc.d((3,4,5)), uc.d_frac((3.001,4,5)), eps=0.001) binning1 = miller.binning(uc, n_bins, m) assert binning1.unit_cell().is_similar_to(uc) assert binning1.n_bins_used() == n_bins assert binning1.limits().size() == n_bins + 1 assert binning1.n_bins_all() == n_bins + 2 s = pickle.dumps(binning1) l = pickle.loads(s) assert str(l.unit_cell()) == "(11, 11, 13, 90, 90, 120)" assert approx_equal(l.limits(), binning1.limits()) # binner1 = miller.ext.binner(binning1, m) assert binner1.miller_indices().id() == m.id() assert binner1.count(binner1.i_bin_d_too_large()) == 0 assert binner1.count(binner1.i_bin_d_too_small()) == 0 counts = binner1.counts() for i_bin in binner1.range_all(): assert binner1.count(i_bin) == counts[i_bin] assert binner1.selection(i_bin).count(True) == counts[i_bin] assert list(binner1.range_all()) == list(range(binner1.n_bins_all())) assert list(binner1.range_used()) == list(range(1, binner1.n_bins_used()+1)) binning2 = miller.binning(uc, n_bins - 2, binning1.bin_d_min(2), binning1.bin_d_min(n_bins)) binner2 = miller.ext.binner(binning2, m) assert tuple(binner1.counts())[1:-1] == tuple(binner2.counts()) array_indices = flex.size_t(range(m.size())) perm_array_indices1 = flex.size_t() perm_array_indices2 = flex.size_t() for i_bin in binner1.range_all(): perm_array_indices1.extend(array_indices.select(binner1.selection(i_bin))) perm_array_indices2.extend(binner1.array_indices(i_bin)) assert perm_array_indices1.size() == m.size() assert perm_array_indices2.size() == m.size() assert tuple(perm_array_indices1) == tuple(perm_array_indices2) b = miller.ext.binner(miller.binning(uc, n_bins, m, 0, d_min), m) assert approx_equal(b.bin_centers(1), (0.23207956, 0.52448148, 0.62711856, 0.70311998, 0.7652538, 0.818567, 0.86566877, 0.90811134, 0.94690405, 0.98274518)) assert approx_equal(b.bin_centers(2), (0.10772184, 0.27871961, 0.39506823, 0.49551249, 0.58642261, 0.67067026, 0.74987684, 0.82507452, 0.89697271, 0.96608584)) assert approx_equal(b.bin_centers(3), (0.050000075, 0.15000023, 0.25000038, 0.35000053, 0.45000068, 0.55000083, 0.65000098, 0.75000113, 0.85000128, 0.95000143)) v = flex.double(range(b.n_bins_used())) i = b.interpolate(v, 0) for i_bin in b.range_used(): assert i.select(b.selection(i_bin)).all_eq(v[i_bin-1]) dss = uc.d_star_sq(m) for d_star_power in (1,2,3): j = b.interpolate(v, d_star_power) x = flex.pow(dss, (d_star_power/2.)) r = flex.linear_correlation(x, j) assert r.is_well_defined() assert approx_equal( r.coefficient(), (0.946401,0.990764,1.0)[d_star_power-1], eps=1.e-4, multiplier=None) # s = pickle.dumps(binner2) l = pickle.loads(s) assert str(l.unit_cell()) == "(11, 11, 13, 90, 90, 120)" assert approx_equal(l.limits(), binner2.limits()) assert l.miller_indices().all_eq(binner2.miller_indices()) assert l.bin_indices().all_eq(binner2.bin_indices()) # limits = flex.random_double(size=10) bng = miller.binning(uc, limits) assert bng.unit_cell().is_similar_to(uc) assert approx_equal(bng.limits(), limits)
def exercise_bins(): uc = uctbx.unit_cell((11,11,13,90,90,120)) sg_type = sgtbx.space_group_type("P 3 2 1") anomalous_flag = False d_min = 1 m = miller.index_generator(uc, sg_type, anomalous_flag, d_min).to_array() f = flex.double() for i in xrange(m.size()): f.append(random.random()) n_bins = 10 b = miller.binning(uc, n_bins, 0, d_min) b = miller.binning(uc, n_bins, 0, d_min, 1.e-6) b = miller.binning(uc, n_bins, m) b = miller.binning(uc, n_bins, m, 0) b = miller.binning(uc, n_bins, m, 0, d_min) b = miller.binning(uc, n_bins, m, 0, d_min, 1.e-6) assert b.d_max() == -1 assert approx_equal(b.d_min(), d_min) assert b.bin_d_range(0) == (-1,-1) assert approx_equal(b.bin_d_range(1), (-1,2.1544336)) assert approx_equal(b.bin_d_range(b.n_bins_all()-1), (1,-1)) d_star_sq = 0.5 r = b.bin_d_range(b.get_i_bin(d_star_sq)) d = 1/math.sqrt(d_star_sq) assert r[1] <= d <= r[0] h = (3,4,5) r = b.bin_d_range(b.get_i_bin(h)) assert r[1] <= uc.d(h) <= r[0] # a quick test to excercise d-spacings on fractional Miller indices: assert approx_equal( uc.d((3,4,5)), uc.d_frac((3.001,4,5)), eps=0.001) binning1 = miller.binning(uc, n_bins, m) assert binning1.unit_cell().is_similar_to(uc) assert binning1.n_bins_used() == n_bins assert binning1.limits().size() == n_bins + 1 assert binning1.n_bins_all() == n_bins + 2 s = pickle.dumps(binning1) l = pickle.loads(s) assert str(l.unit_cell()) == "(11, 11, 13, 90, 90, 120)" assert approx_equal(l.limits(), binning1.limits()) # binner1 = miller.ext.binner(binning1, m) assert binner1.miller_indices().id() == m.id() assert binner1.count(binner1.i_bin_d_too_large()) == 0 assert binner1.count(binner1.i_bin_d_too_small()) == 0 counts = binner1.counts() for i_bin in binner1.range_all(): assert binner1.count(i_bin) == counts[i_bin] assert binner1.selection(i_bin).count(True) == counts[i_bin] assert list(binner1.range_all()) == range(binner1.n_bins_all()) assert list(binner1.range_used()) == range(1, binner1.n_bins_used()+1) binning2 = miller.binning(uc, n_bins - 2, binning1.bin_d_min(2), binning1.bin_d_min(n_bins)) binner2 = miller.ext.binner(binning2, m) assert tuple(binner1.counts())[1:-1] == tuple(binner2.counts()) array_indices = flex.size_t(range(m.size())) perm_array_indices1 = flex.size_t() perm_array_indices2 = flex.size_t() for i_bin in binner1.range_all(): perm_array_indices1.extend(array_indices.select(binner1.selection(i_bin))) perm_array_indices2.extend(binner1.array_indices(i_bin)) assert perm_array_indices1.size() == m.size() assert perm_array_indices2.size() == m.size() assert tuple(perm_array_indices1) == tuple(perm_array_indices2) b = miller.ext.binner(miller.binning(uc, n_bins, m, 0, d_min), m) assert approx_equal(b.bin_centers(1), (0.23207956, 0.52448148, 0.62711856, 0.70311998, 0.7652538, 0.818567, 0.86566877, 0.90811134, 0.94690405, 0.98274518)) assert approx_equal(b.bin_centers(2), (0.10772184, 0.27871961, 0.39506823, 0.49551249, 0.58642261, 0.67067026, 0.74987684, 0.82507452, 0.89697271, 0.96608584)) assert approx_equal(b.bin_centers(3), (0.050000075, 0.15000023, 0.25000038, 0.35000053, 0.45000068, 0.55000083, 0.65000098, 0.75000113, 0.85000128, 0.95000143)) v = flex.double(xrange(b.n_bins_used())) i = b.interpolate(v, 0) for i_bin in b.range_used(): assert i.select(b.selection(i_bin)).all_eq(v[i_bin-1]) dss = uc.d_star_sq(m) for d_star_power in (1,2,3): j = b.interpolate(v, d_star_power) x = flex.pow(dss, (d_star_power/2.)) r = flex.linear_correlation(x, j) assert r.is_well_defined() assert approx_equal( r.coefficient(), (0.946401,0.990764,1.0)[d_star_power-1], eps=1.e-4, multiplier=None) # s = pickle.dumps(binner2) l = pickle.loads(s) assert str(l.unit_cell()) == "(11, 11, 13, 90, 90, 120)" assert approx_equal(l.limits(), binner2.limits()) assert l.miller_indices().all_eq(binner2.miller_indices()) assert l.bin_indices().all_eq(binner2.bin_indices()) # limits = flex.random_double(size=10) bng = miller.binning(uc, limits) assert bng.unit_cell().is_similar_to(uc) assert approx_equal(bng.limits(), limits)
def twin_the_data_and_analyse(twin_operator,twin_fraction=0.2): out_string = StringIO() miller_array = random_data(35).map_to_asu() miller_array = miller_array.f_as_f_sq() cb_op = sgtbx.change_of_basis_op( twin_operator ) miller_array_mod, miller_array_twin = miller_array.common_sets( miller_array.change_basis( cb_op ).map_to_asu() ) twinned_miller = miller_array_mod.customized_copy( data = (1.0-twin_fraction)*miller_array_mod.data() + twin_fraction*miller_array_twin.data(), sigmas = flex.sqrt( flex.pow( ((1.0-twin_fraction)*miller_array_mod.sigmas()),2.0)+\ flex.pow( ((twin_fraction)*miller_array_twin.sigmas()),2.0)) ) twinned_miller.set_observation_type( miller_array.observation_type()) twin_anal_object = t_a.twin_analyses(twinned_miller, out=out_string, verbose=-100) index = twin_anal_object.twin_summary.most_worrysome_twin_law assert approx_equal( twin_anal_object.twin_summary.britton_alpha[index], twin_fraction,eps=0.1) assert approx_equal(twin_anal_object.twin_law_dependent_analyses[index].ml_murray_rust.estimated_alpha, twin_fraction, eps=0.1) ## Untwinned data standards if twin_fraction==0: ## L-test assert approx_equal(twin_anal_object.l_test.mean_l, 0.50,eps=0.1) ## Wilson ratios assert approx_equal(twin_anal_object.twin_summary.i_ratio, 2.00, eps=0.1) ## H-test assert approx_equal( twin_anal_object.twin_law_dependent_analyses[index].h_test.mean_h, 0.50,eps=0.1) ## Perfect twin standards if twin_fraction==0.5: assert approx_equal(twin_anal_object.l_test.mean_l, 0.375,eps=0.1) assert approx_equal(twin_anal_object.twin_summary.i_ratio, 1.50,eps=0.1) assert approx_equal( twin_anal_object.twin_law_dependent_analyses[index].h_test.mean_h, 0.00,eps=0.1) ## Just make sure we actually detect significant twinning if twin_fraction > 0.10: assert (twin_anal_object.twin_summary.maha_l > 3.0) ## The patterson origin peak should be smallish ... assert (twin_anal_object.twin_summary.patterson_p_value > 0.01) # and the brief test should be passed as well answer = t_a.twin_analyses_brief( twinned_miller,out=out_string,verbose=-100 ) if twin_fraction > 0.10: assert answer is True
def riso(data_1, data_2, params, show_tables=True): uniform = [] # construct a list of intensity arrays to compare between the two datasets for item, label in zip( [data_1, data_2], [params.input.labels_1, params.input.labels_2]): for array in item.as_miller_arrays(): this_label = array.info().labels[0] if this_label != label: continue # print this_label, array.observation_type() uniform.append(array.as_intensity_array()) assert len(uniform) == 2, "Could not identify the two arrays to compare. "+\ "Please check that columns %s and %s are available in the files provided."%\ (params.labels_1, params.labels_2) # if anomalous data, generate Bijvoet mates for any arrays lacking them if params.anomalous_flag: for i in (0,1): if not uniform[i].anomalous_flag(): uniform[i] = uniform[i].generate_bijvoet_mates() # reindex for i in (0,1): uniform[i] = uniform[i].change_basis("h,k,l").map_to_asu() assert uniform[0].space_group_info().symbol_and_number() == \ uniform[1].space_group_info().symbol_and_number(),\ "Incompatible space groups between the datasets provided." # copy the second array with the unit cell of the first d_min = max(params.d_min or 0, uniform[0].d_min(), uniform[1].d_min()) d_max = min(params.d_max or 10000, 10000) common_set_1 = uniform[1].customized_copy( crystal_symmetry = symmetry( unit_cell=uniform[0].unit_cell(), space_group_info = uniform[0].space_group_info()), ).resolution_filter(d_min=d_min, d_max=d_max).map_to_asu() common_set_2 = uniform[0].common_set(common_set_1) common_set_1 = uniform[1].common_set(common_set_2) # set 1 intentionally repeated in case of low res missing reflections assert len(common_set_1.indices()) == len(common_set_2.indices()) common = (common_set_1, common_set_2) print("%6d indices in common in the range %.2f-%.2f Angstroms"%\ (common_set_1.size(),d_min, d_max)) # bin for comparison for array in common: array.setup_binner(d_min=d_min, d_max=d_max, n_bins=params.output.n_bins) # calculate scale factor and Riso # XXX TODO: riso_scale_factor is not set up right yet riso_scale_factor = scale_factor( common_set_2, common_set_1, weights=flex.pow(common_set_2.sigmas(), -2), use_binning=True) riso_binned = r1_factor( common_set_2, common_set_1, scale_factor=riso_scale_factor, use_binning=True) riso_scale_factor_all = scale_factor( common_set_2, common_set_1, weights=flex.pow(common_set_2.sigmas(), -2), use_binning=False) riso_all = r1_factor( common_set_2, common_set_1, scale_factor=riso_scale_factor_all, use_binning=False) if show_tables: from libtbx import table_utils table_header = ["","","","R"] table_header2 = ["Bin","Resolution Range","Completeness","iso"] table_data = [] table_data.append(table_header) table_data.append(table_header2) items = riso_binned.binner.range_used() cumulative_counts_given = 0 cumulative_counts_complete = 0 for bin in items: table_row = [] table_row.append("%3d"%bin) table_row.append("%-13s"%riso_binned.binner.bin_legend( i_bin=bin,show_bin_number=False,show_bin_range=False, show_d_range=True, show_counts=False)) table_row.append("%13s"%riso_binned.binner.bin_legend( i_bin=bin,show_bin_number=False,show_bin_range=False, show_d_range=False, show_counts=True)) cumulative_counts_given += riso_binned.binner._counts_given[bin] cumulative_counts_complete += riso_binned.binner._counts_complete[bin] table_row.append("%.1f%%" % (100 * riso_binned.data[bin])) table_data.append(table_row) table_row = [format_value("%3s", "All"), format_value("%-13s", " "), format_value("%13s", "[%d/%d]"%(cumulative_counts_given, cumulative_counts_complete)), format_value("%.1f%%", 100 * riso_all)] table_data.append(table_row) print(table_utils.format( table_data, has_header=2, justify='center', delim=" ")) print("Riso is the R1 factor between the two datasets supplied.") return riso_binned, riso_all
def run(args): global f f = os.path.join(os.path.split(sys.path[0])[0],"she.txt") with open(f,"w") as tempf: tempf.truncate() #check if we have experimental data t1=time.time() exp_data = None q_values = None var = None with open(f,"a") as tempf: params = get_input( args, master_params, "sas_I", banner, print_help,tempf) if (params is None): exit() if params.sas_I.experimental_data is not None: exp_data = saxs_read_write.read_standard_ascii_qis(params.sas_I.experimental_data) #exp_data.s = flex.sqrt( exp_data.i ) if params.sas_I.data_reduct: qmax = exp_data.q[-1] bandwidth = 0.5/(params.sas_I.n_step-1.0) exp_data=reduce_raw_data( exp_data, qmax, bandwidth,outfile=f ) q_values = exp_data.q var = flex.pow(exp_data.s,2.0) if q_values is None: q_values = params.sas_I.q_start + \ (params.sas_I.q_stop-params.sas_I.q_start )*flex.double( range(params.sas_I.n_step) )/( params.sas_I.n_step-1) # read in pdb file pdbi = pdb.hierarchy.input(file_name=params.sas_I.structure) #atoms = pdbi.hierarchy.atoms() atoms = pdbi.hierarchy.models()[0].atoms() # predefine some arrays we will need dummy_atom_types = flex.std_string() radius= flex.double() b_values = flex.double() occs = flex.double() xyz = flex.vec3_double() # keep track of the atom types we have encountered dummy_at_collection = [] for atom in atoms: #if(not atom.hetero): #### temporarily added b_values.append( atom.b ) occs.append( atom.occ ) xyz.append( atom.xyz ) # Hydrogen controls whether H is treated explicitly or implicitly Hydrogen = not params.sas_I.internals.implicit_hydrogens ### Using Zernike Expansion to Calculate Intensity ### if(params.sas_I.method == 'zernike'): znk_nmax=params.sas_I.znk_nmax absolute_Io = znk_model.calc_abs_Io( atoms, Hydrogen) if( absolute_Io == 0.0): ## in case pdb hierarchy parse did not work out correctly absolute_Io = sas_library.calc_abs_Io_from_pdb( params.sas_I.structure, Hydrogen ) if(Hydrogen): density = znk_model.get_density( atoms ) ## Get number of electrons as density else: density = znk_model.get_density( atoms ) + 1 ## add one H-atom to each heavy atom as a correction znk_engine = znk_model.xyz2znk(xyz,absolute_Io,znk_nmax, density=density) calc_i, calc_i_vac, calc_i_sol, calc_i_layer=znk_engine.calc_intensity(q_values) if(params.sas_I.experimental_data is not None): if params.sas_I.internals.solvent_scale: znk_engine.optimize_solvent(exp_data) calc_i = znk_engine.best_i_calc else: #quick scaling scale, offset = linear_fit( calc_i, exp_data.i, exp_data.s ) calc_i = calc_i*scale + offset CHI2 = flex.mean(flex.pow((calc_i-exp_data.i)/exp_data.s,2.0)) CHI=math.sqrt(CHI2) with open(f,"a") as log: print >>log, "fitting to experimental curve, chi = %5.4e"%CHI print "fitting to experimental curve, chi = %5.4e"%CHI write_debye_data(q_values, calc_i, params.sas_I.output+".fit") write_json(params.sas_I.output+"data.json", q_values, calc_i, y2=exp_data.i) else: ## scaled to the absolute I(0) write_she_data(q_values, calc_i, calc_i_vac, calc_i_layer, calc_i_sol, params.sas_I.output) write_json(params.sas_I.output+"data.json", q_values, calc_i) with open(f,"a") as log: print >>log, znk_engine.summary() print >>log, "Done! total time used: %5.4e (seconds)"%(time.time()-t1) print znk_engine.summary() print "Done! total time used: %5.4e (seconds)"%(time.time()-t1) return ### End of Zernike Model ### dummy_ats= sas_library.read_dummy_type(file_name=params.sas_I.structure) for at in dummy_ats: if at not in dummy_at_collection: dummy_at_collection.append( at ) radius_dict={} ener_lib=server.ener_lib() for dummy in dummy_at_collection: if(Hydrogen): radius_dict[dummy]=ener_lib.lib_atom[dummy].vdw_radius else: if ener_lib.lib_atom[dummy].vdwh_radius is not None: radius_dict[dummy]=ener_lib.lib_atom[dummy].vdwh_radius else: radius_dict[dummy]=ener_lib.lib_atom[dummy].vdw_radius if(radius_dict[dummy] is None): with open(f,"a") as log: print >> log, "****************** WARNING WARNING *******************" print >> log, "Did not find atom type: ", dummy, "default value 1.58 A was used" print >> log, "*******************************************************" print "****************** WARNING WARNING *******************" print "Did not find atom type: ", dummy, "default value 1.58 A was used" print "*******************************************************" radius_dict[dummy]=1.58 for at in dummy_ats: dummy_atom_types.append( at) radius.append(radius_dict[at]) Scaling_factors=sas_library.load_scaling_factor() #------------------ # B_factor_on=params.sas_I.internals.use_adp max_i = params.sas_I.internals.max_i max_L = params.sas_I.internals.max_L f_step= params.sas_I.internals.f_step q_step= params.sas_I.internals.integration_q_step solvent_radius_scale=params.sas_I.internals.solvent_radius_scale protein_radius_scale=params.sas_I.internals.protein_radius_scale rho=params.sas_I.internals.rho drho=params.sas_I.internals.drho delta=params.sas_I.internals.delta #------------------ scat_lib_dummy = sas_library.build_scattering_library( dummy_at_collection, q_values, radius_dict, solvent_radius_scale, Hydrogen, Scaling_factors) new_indx =flex.int() new_coord = flex.vec3_double() model=intensity.model(xyz, radius*protein_radius_scale, b_values, occs, dummy_ats, scat_lib_dummy, B_factor_on) t2=time.time() if(params.sas_I.method == 'she'): max_z_eps=0.02 max_z=model.get_max_radius()*(q_values[-1]+max_z_eps) + max_z_eps engine = intensity.she_engine( model, scat_lib_dummy,max_i,max_L,f_step, q_step,max_z, delta,rho,drho ) engine.update_solvent_params(rho,drho) i = engine.I() a = engine.get_IA() b = engine.get_IB() c = engine.get_IC() attri = engine.Area_Volume() with open(f,"a") as log: print >> log, "Inner surface Area of the Envelop is (A^2.0): ", attri[0]; print >> log, "Inner Volume of the Envelop is (A^3.0): ", attri[1]; print >> log, "Volume of the Envelop shell is (A^3.0): ", attri[2]; print "Inner surface Area of the Envelop is (A^2.0): ", attri[0]; print "Inner Volume of the Envelop is (A^3.0): ", attri[1]; print "Volume of the Envelop shell is (A^3.0): ", attri[2]; if params.sas_I.output is not None: write_she_data( q_values, i,a,b,c, params.sas_I.output ) write_json(params.sas_I.output+"data.json", q_values, i) if params.sas_I.pdblist is not None: pdblist=params.sas_I.pdblist if(os.path.isfile(pdblist)): list= open(pdblist,'r') for line in list: filename=line.split('\n')[0] pdbi = pdb.hierarchy.input(file_name=filename) t21 = time.time() atoms = pdbi.hierarchy.atoms() new_coord.clear() new_indx.clear() i=0 for atom in atoms: new_coord.append( atom.xyz ) new_indx.append(i) i=i+1 engine.update_coord(new_coord,new_indx) i = engine.I() a = engine.get_IA() b = engine.get_IB() c = engine.get_IC() attri = engine.Area_Volume() with open(f,"a") as log: print >> log, "Inner surface Area of the Envelop is (A^2.0): ", attri[0] print >> log, "Inner Volume of the Envelop is (A^3.0): ", attri[1] print >> log, "Volume of the Envelop shell is (A^3.0): ", attri[2] print "Inner surface Area of the Envelop is (A^2.0): ", attri[0] print "Inner Volume of the Envelop is (A^3.0): ", attri[1] print "Volume of the Envelop shell is (A^3.0): ", attri[2] write_she_data( q_values, i,a,b,c, filename+'.int' ) with open(f,"a") as log: print >> log, '\nfininshed pdb ', filename, 'at: ',time.ctime(t21),'\n' print '\nfininshed pdb ', filename, 'at: ',time.ctime(t21),'\n' # attri = engine.Area_Volume2() # print "Inner surface Area of the Envelop is (A^2.0): ", attri[0]; elif(params.sas_I.method == 'debye'): engine = intensity.debye_engine (model, scat_lib_dummy) i = engine.I() if params.sas_I.output is not None: write_debye_data(q_values, i, params.sas_I.output) write_json(params.sas_I.output+"data.json", q_values, i) if(params.sas_I.experimental_data is not None): if params.sas_I.internals.solvent_scale: # more thorough scaling solvent_optim = solvent_parameter_optimisation(she_object=engine, observed_data=exp_data ) scale, offset, drho, a = solvent_optim.get_scales() i = solvent_optim.get_scaled_data() else: #quick scaling scale, offset = linear_fit( i, exp_data.i, exp_data.s ) i = scale*i+offset with open(f,"a") as log: print >>log, "Scaled calculated data against experimental data" print >>log, "Scale factor : %5.4e"%scale print >>log,"Offset : %5.4e"%offset print "Scaled calculated data against experimental data" print "Scale factor : %5.4e"%scale print "Offset : %5.4e"%offset if params.sas_I.internals.solvent_scale: with open(f,"a") as log: print >> log, " Solvent average R ra : ", a print >> log, " Solvation Contrast drho: ", drho print " Solvent average R ra : ", a print " Solvation Contrast drho: ", drho print write_debye_data(q_values, i, params.sas_I.output+".fit") write_json(params.sas_I.output+"data.json", q_values, i, y2=exp_data.i) CHI2 = flex.mean(flex.pow((i-exp_data.i)/exp_data.s,2.0)) CHI=math.sqrt(CHI2) with open(f,"a") as log: print >>log, "fitting to experimental curve, chi = %5.4e"%CHI print "fitting to experimental curve, chi = %5.4e"%CHI t3=time.time() with open(f,"a") as log: print >> log, "Done! total time used: %5.4e (seconds)"%(t3-t1) print >>log, 'start running at: ',time.ctime(t1) print >>log, 'finished PDB file processing at: ',time.ctime(t2) print >>log, 'got all desired I(q) at : ',time.ctime(t3) print "Done! total time used: %5.4e (seconds)"%(t3-t1) print 'start running at: ',time.ctime(t1) print 'finished PDB file processing at: ',time.ctime(t2) print 'got all desired I(q) at : ',time.ctime(t3) with open(f,"a") as log: log.write("__END__")
def twin_the_data_and_analyse(twin_operator, twin_fraction=0.2): out_string = StringIO() miller_array = random_data(35).map_to_asu() miller_array = miller_array.f_as_f_sq() cb_op = sgtbx.change_of_basis_op(twin_operator) miller_array_mod, miller_array_twin = miller_array.common_sets( miller_array.change_basis(cb_op).map_to_asu()) twinned_miller = miller_array_mod.customized_copy( data = (1.0-twin_fraction)*miller_array_mod.data() + twin_fraction*miller_array_twin.data(), sigmas = flex.sqrt( flex.pow( ((1.0-twin_fraction)*miller_array_mod.sigmas()),2.0)+\ flex.pow( ((twin_fraction)*miller_array_twin.sigmas()),2.0)) ) twinned_miller.set_observation_type(miller_array.observation_type()) twin_anal_object = t_a.twin_analyses(twinned_miller, out=out_string, verbose=-100) index = twin_anal_object.twin_summary.most_worrysome_twin_law assert approx_equal(twin_anal_object.twin_summary.britton_alpha[index], twin_fraction, eps=0.1) assert approx_equal(twin_anal_object.twin_law_dependent_analyses[index]. ml_murray_rust.estimated_alpha, twin_fraction, eps=0.1) ## Untwinned data standards if twin_fraction == 0: ## L-test assert approx_equal(twin_anal_object.l_test.mean_l, 0.50, eps=0.1) ## Wilson ratios assert approx_equal(twin_anal_object.twin_summary.i_ratio, 2.00, eps=0.1) ## H-test assert approx_equal( twin_anal_object.twin_law_dependent_analyses[index].h_test.mean_h, 0.50, eps=0.1) ## Perfect twin standards if twin_fraction == 0.5: assert approx_equal(twin_anal_object.l_test.mean_l, 0.375, eps=0.1) assert approx_equal(twin_anal_object.twin_summary.i_ratio, 1.50, eps=0.1) assert approx_equal( twin_anal_object.twin_law_dependent_analyses[index].h_test.mean_h, 0.00, eps=0.1) ## Just make sure we actually detect significant twinning if twin_fraction > 0.10: assert (twin_anal_object.twin_summary.maha_l > 3.0) ## The patterson origin peak should be smallish ... assert (twin_anal_object.twin_summary.patterson_p_value > 0.01) # and the brief test should be passed as well answer = t_a.twin_analyses_brief(twinned_miller, out=out_string, verbose=-100) if twin_fraction > 0.10: assert answer is True
def read_data(self,params): from os import listdir, path from libtbx import easy_pickle from cctbx.crystal_orientation import crystal_orientation # XXX Necessary later? #directory = "/net/viper/raid1/hattne/L220/merging/05fs" #directory = "/reg/d/psdm/cxi/cxib8113/scratch/sauter/metrology/008" #directory = "/reg/d/psdm/xpp/xpp74813/scratch/sauter/metrology/204" #directory = "/net/viper/raid1/hattne/L220/merging/test" #directory = "/reg/d/psdm/xpp/xppb4313/scratch/brewster/results/r0243/003/integration" #directory = "/reg/d/psdm/cxi/cxic0614/scratch/sauter/metrology/004/integration" #directory = "/reg/d/psdm/cxi/cxic0614/scratch/sauter/metrology/150/integration" #directory = "/reg/d/psdm/cxi/cxic0614/scratch/sauter/metrology/152/integration" directory = "/reg/d/psdm/cxi/cxib6714/scratch/sauter/metrology/009/integration" dir_glob = "/reg/d/psdm/CXI/cxib6714/scratch/sauter/results/r*/009/integration" dir_glob = "/reg/d/psdm/CXI/cxib6714/scratch/sauter/results/r*/801/integration" dir_glob = "/reg/d/psdm/xpp/xpp74813/scratch/sauter/r*/216/integration" dir_glob = "/reg/d/psdm/xpp/xpp74813/ftc/sauter/result/r*/104/integration" dir_glob = "/reg/d/psdm/cxi/cxid9114/scratch/sauter/metrology/001/integration" dir_glob = "/reg/d/psdm/CXI/cxid9114/ftc/brewster/results/r00[3-4]*/003/integration" dir_glob = "/reg/d/psdm/CXI/cxid9114/ftc/sauter/results/r00[3-4]*/004/integration" dir_glob = "/reg/d/psdm/CXI/cxid9114/ftc/sauter/results/r00[3-4]*/006/integration" dir_list = ["/reg/d/psdm/CXI/cxid9114/ftc/brewster/results/r%04d/006/integration"%seq for seq in range(95,115)] dir_list = ["/reg/d/psdm/CXI/cxid9114/ftc/sauter/results/r%04d/018/integration"%seq for seq in range(102,115)] dir_list = params.data T = Timer("populate C++ store with register line") itile = flex.int() self.spotfx = flex.double() self.spotfy = flex.double() self.spotcx = flex.double() self.spotcy = flex.double() self.observed_cntr_x = flex.double() self.observed_cntr_y = flex.double() self.refined_cntr_x = flex.double() self.refined_cntr_y = flex.double() self.HKL = flex.miller_index() self.radial = flex.double() self.azimut = flex.double() self.FRAMES = dict( frame_id=flex.int(), wavelength=flex.double(), beam_x=flex.double(), beam_y=flex.double(), distance=flex.double(), orientation=[], rotation100_rad=flex.double(), rotation010_rad=flex.double(), rotation001_rad=flex.double(), half_mosaicity_deg=flex.double(), wave_HE_ang=flex.double(), wave_LE_ang=flex.double(), domain_size_ang=flex.double(), unique_file_name=[] ) self.frame_id = flex.int() import glob #for directory in glob.glob(dir_glob): for directory in dir_list: if self.params.max_frames is not None and len(self.FRAMES['frame_id']) >= self.params.max_frames: break for entry in listdir(directory): tttd = d = easy_pickle.load(path.join(directory, entry)) # XXX Hardcoded, should honour the phil! And should be verified # to be consistent for each correction vector later on! #import pdb; pdb.set_trace() setting_id = d['correction_vectors'][0][0]['setting_id'] #if setting_id != 5: #if setting_id != 12: if setting_id != self.params.bravais_setting_id: #if setting_id != 22: #print "HATTNE BIG SLIPUP 1" continue # Assert that effective_tiling is consistent, and a non-zero # multiple of eight (only whole sensors considered for now--see # mark10.fit_translation4.print_table()). self.tiles is # initialised to zero-length in the C++ code. XXX Should now be # able to retire the "effective_tile_boundaries" parameter. # # XXX Other checks from correction_vector plot, such as consistent # setting? if hasattr(self, 'tiles') and len(self.tiles) > 0: assert (self.tiles == d['effective_tiling']).count(False) == 0 else: assert len(d['effective_tiling']) > 0 \ and len(d['effective_tiling']) % 8 == 0 self.tiles = d['effective_tiling'] if not self.standalone_check(self,setting_id,entry,d,params.diff_cutoff): continue # Reading the frame data. The frame ID is just the index of the # image. self.FRAMES['frame_id'].append(len(self.FRAMES['frame_id']) + 1) # XXX try zero-based here self.FRAMES['wavelength'].append(d['wavelength']) self.FRAMES['beam_x'].append(d['xbeam']) self.FRAMES['beam_y'].append(d['ybeam']) self.FRAMES['distance'].append(d['distance']) self.FRAMES['orientation'].append(d['current_orientation'][0]) self.FRAMES['rotation100_rad'].append(0) # XXX FICTION self.FRAMES['rotation010_rad'].append(0) # XXX FICTION self.FRAMES['rotation001_rad'].append(0) # XXX FICTION self.FRAMES['half_mosaicity_deg'].append(0) # XXX FICTION # self.FRAMES['wave_HE_ang'].append(0.995 * d['wavelength']) # XXX FICTION -- what does Nick use? # self.FRAMES['wave_LE_ang'].append(1.005 * d['wavelength']) # XXX FICTION self.FRAMES['wave_HE_ang'].append(d['wavelength']) self.FRAMES['wave_LE_ang'].append(d['wavelength']) self.FRAMES['domain_size_ang'].append(5000) # XXX FICTION self.FRAMES['unique_file_name'].append(path.join(directory, entry)) print "added frame", self.FRAMES['frame_id'][-1],entry for cv in d['correction_vectors'][0]: # Try to reproduce every predicition using the model from the # frame -- skip CV if fail. Could be because of wrong HKL:s? # # Copy these two images to test directory to reproduce: # int-s01-2011-02-20T21:27Z37.392_00000.pickle # int-s01-2011-02-20T21:27Z37.725_00000.pickle from rstbx.bandpass import use_case_bp3, parameters_bp3 from scitbx.matrix import col from math import hypot, pi indices = flex.miller_index() indices.append(cv['hkl']) parameters = parameters_bp3( indices=indices, orientation=self.FRAMES['orientation'][-1], incident_beam=col(self.INCIDENT_BEAM), packed_tophat=col((1.,1.,0.)), detector_normal=col(self.DETECTOR_NORMAL), detector_fast=col((0.,1.,0.)),detector_slow=col((1.,0.,0.)), pixel_size=col((0.11,0.11,0)), # XXX hardcoded, twice! pixel_offset=col((0.,0.,0.0)), distance=self.FRAMES['distance'][-1], detector_origin=col((-self.FRAMES['beam_x'][-1], -self.FRAMES['beam_y'][-1], 0)) ) ucbp3 = use_case_bp3(parameters=parameters) ucbp3.set_active_areas(self.tiles) integration_signal_penetration=0.5 ucbp3.set_sensor_model(thickness_mm=0.5, mu_rho=8.36644, # CS_PAD detector at 1.3 Angstrom signal_penetration=integration_signal_penetration) half_mosaicity_rad = self.FRAMES['half_mosaicity_deg'][-1] * pi/180. ucbp3.set_mosaicity(half_mosaicity_rad) ucbp3.set_bandpass(self.FRAMES['wave_HE_ang'][-1], self.FRAMES['wave_LE_ang'][-1]) ucbp3.set_orientation(self.FRAMES['orientation'][-1]) ucbp3.set_domain_size(self.FRAMES['domain_size_ang'][-1]) ucbp3.picture_fast_slow_force() ucbp3_prediction = 0.5 * (ucbp3.hi_E_limit + ucbp3.lo_E_limit) diff = hypot(ucbp3_prediction[0][0] - cv['predspot'][1], ucbp3_prediction[0][1] - cv['predspot'][0]) if diff > self.params.diff_cutoff: print "HATTNE INDEXING SLIPUP" continue # For some reason, the setting_id is recorded for each # correction vector as well--assert that it is consistent. #if cv['setting_id'] != setting_id: # print "HATTNE BIG SLIPUP 2" assert cv['setting_id'] == setting_id # For each observed spot, figure out what tile it is on, and # store in itile. XXX This is probably not necessary here, as # correction_vector_store::register_line() does the same thing. obstile = None for i in range(0, len(self.tiles), 4): if cv['obsspot'][0] >= self.tiles[i + 0] \ and cv['obsspot'][0] <= self.tiles[i + 2] \ and cv['obsspot'][1] >= self.tiles[i + 1] \ and cv['obsspot'][1] <= self.tiles[i + 3]: obstile = i break assert obstile is not None itile.append(obstile) # XXX unused variable? # ID of current frame. self.frame_id.append(self.FRAMES['frame_id'][-1]) self.spotfx.append(cv['obsspot'][0]) self.spotfy.append(cv['obsspot'][1]) self.spotcx.append(cv['predspot'][0]) self.spotcy.append(cv['predspot'][1]) self.observed_cntr_x.append(cv['obscenter'][0]) self.observed_cntr_y.append(cv['obscenter'][1]) self.refined_cntr_x.append(cv['refinedcenter'][0]) self.refined_cntr_y.append(cv['refinedcenter'][1]) self.HKL.append(cv['hkl']) self.azimut.append(cv['azimuthal']) self.radial.append(cv['radial']) #print self.FRAMES['frame_id'][-1] # Should honour the max_frames phil parameter #if len(self.FRAMES['frame_id']) >= 1000: if self.params.max_frames is not None and \ len(self.FRAMES['frame_id']) >= self.params.max_frames: break """ For 5000 first images: STATS FOR TILE 14 sel_delx -6.59755265524 -4.41676757746e-10 5.7773557278 sel_dely -6.30796620634 -8.3053734774e-10 6.3362200841 symmetric_offset_x -6.5975526548 -2.73229417105e-15 5.77735572824 symmetric_offset_y -6.30796620551 1.16406818748e-15 6.33622008493 symmetric rsq 0.000255199593417 2.95803352999 56.1918083904 rmsd 1.71989346472 For 10000 first images: STATS FOR TILE 14 sel_delx -6.92345292727 6.9094552919e-10 611.497770006 sel_dely -6.39690476093 1.1869355797e-09 894.691806871 symmetric_offset_x -6.92345292796 1.28753258216e-14 611.497770005 symmetric_offset_y -6.39690476212 -2.10251420168e-15 894.69180687 symmetric rsq 1.58067791823e-05 30.3331143761 1174402.952 rmsd 5.50755066941 """ # This is mark3.fit_translation2.nominal_tile_centers() self.To_x = flex.double(len(self.tiles) // 4) self.To_y = flex.double(len(self.tiles) // 4) for x in range(len(self.tiles) // 4): self.To_x[x] = (self.tiles[4 * x + 0] + self.tiles[4 * x + 2]) / 2 self.To_y[x] = (self.tiles[4 * x + 1] + self.tiles[4 * x + 3]) / 2 delx = self.spotcx - self.spotfx dely = self.spotcy - self.spotfy self.delrsq = self.delrsq_functional(calcx = self.spotcx, calcy = self.spotcy) self.initialize_per_tile_sums() self.tile_rmsd = [0.]*(len(self.tiles) // 4) self.asymmetric_tile_rmsd = [0.]*(len(self.tiles) // 4) # XXX Is (beam1x, beam1y) really observed center and (beamrx, # beamry) refined center? Nick thinks YES! # #itile2 = flex.int([self.register_line(a[2],a[3],a[4],a[5],a[6],a[7],a[8],a[9]) for a in ALL]) itile2 = flex.int( [self.register_line(a[0], a[1], a[2], a[3], a[4], a[5], a[6], a[7]) for a in zip(self.observed_cntr_x, self.observed_cntr_y, self.refined_cntr_x, self.refined_cntr_y, self.spotfx, self.spotfy, self.spotcx, self.spotcy)]) if params.show_consistency: consistency_controls(self,params) T = Timer("calcs based on C++ store") self.selections = [] self.selection_counts = [] for x in range(len(self.tiles) // 4): if self.tilecounts[x]==0: self.radii[x] = 0 self.mean_cv[x] = matrix.col((0, 0)) else: self.radii[x]/=self.tilecounts[x] self.mean_cv[x] = matrix.col(self.mean_cv[x]) / self.tilecounts[x] selection = (self.master_tiles == x) self.selections.append(selection) selected_cv = self.master_cv.select(selection) self.selection_counts.append(selected_cv.size()) # for curvatures if len(selected_cv)>0: self.asymmetric_tile_rmsd[x] = math.sqrt(flex.mean (self.delrsq.select(selection))) sel_delx = delx.select(selection) sel_dely = dely.select(selection) symmetric_offset_x = sel_delx - self.mean_cv[x][0] symmetric_offset_y = sel_dely - self.mean_cv[x][1] symmetricrsq = symmetric_offset_x*symmetric_offset_x + symmetric_offset_y*symmetric_offset_y self.tile_rmsd[x] =math.sqrt(flex.mean(symmetricrsq)) else: self.asymmetric_tile_rmsd[x]=0. self.tile_rmsd[x]=0. self.overall_N = flex.sum(flex.int( [int(t) for t in self.tilecounts] )) self.overall_cv = matrix.col(self.overall_cv)/self.overall_N self.overall_rmsd = math.sqrt( self.sum_sq_cv / self.overall_N ) # master weights for mark3 calculation takes 0.3 seconds self.master_weights = flex.double(len(self.master_tiles)) self.largest_sample = max(self.tilecounts) for x in range(len(self.tiles) // 4): self.master_weights.set_selected( self.selections[x], self.tile_weight(x)) print "AFTER read cx, cy", flex.mean(self.spotcx), flex.mean(self.spotcy) print "AFTER read fx, fy", flex.mean(self.spotfx), flex.mean(self.spotfy) print "AFTER read rmsd_x, rmsd_y", math.sqrt(flex.mean(flex.pow(self.spotcx - self.spotfx, 2))), \ math.sqrt(flex.mean(flex.pow(self.spotcy - self.spotfy, 2))) return
def model_based_outliers(self, f_model, level=0.01, return_data=False, plot_out=None): assert self.r_free_flags is not None if self.r_free_flags.data().count(True) == 0: self.r_free_flags = self.r_free_flags.array(data=~self.r_free_flags.data()) sigmaa_estimator = sigmaa_estimation.sigmaa_estimator( miller_obs=self.miller_obs, miller_calc=f_model, r_free_flags=self.r_free_flags, kernel_width_free_reflections=200, n_sampling_points=20, n_chebyshev_terms=13, ) sigmaa_estimator.show(out=self.out) sigmaa = sigmaa_estimator.sigmaa() obs_norm = abs(sigmaa_estimator.normalized_obs) calc_norm = sigmaa_estimator.normalized_calc f_model_outlier_object = scaling.likelihood_ratio_outlier_test( f_obs=obs_norm.data(), sigma_obs=None, f_calc=calc_norm.data(), # the data is prenormalized, all epsies are unity epsilon=flex.double(calc_norm.data().size(), 1.0), centric=obs_norm.centric_flags().data(), alpha=sigmaa.data(), beta=1.0 - sigmaa.data() * sigmaa.data(), ) modes = f_model_outlier_object.posterior_mode() lik = f_model_outlier_object.log_likelihood() p_lik = f_model_outlier_object.posterior_mode_log_likelihood() s_der = f_model_outlier_object.posterior_mode_snd_der() ll_gain = f_model_outlier_object.standardized_likelihood() # The smallest vallue should be 0. # sometimes, due to numerical issues, it comes out # a wee bit negative. please repair that eps = 1.0e-10 zeros = flex.bool(ll_gain < eps) p_values = ll_gain p_values = p_values.set_selected(zeros, eps) p_values = erf(flex.sqrt(p_values / 2.0)) p_values = 1.0 - flex.pow(p_values, float(p_values.size())) # select on p-values flags = flex.bool(p_values > level) flags = self.miller_obs.customized_copy(data=flags) ll_gain = self.miller_obs.customized_copy(data=ll_gain) p_values = self.miller_obs.customized_copy(data=p_values) log_message = """ Model based outlier rejection. ------------------------------ Calculated amplitudes and estimated values of alpha and beta are used to compute the log-likelihood of the observed amplitude. The method is inspired by Read, Acta Cryst. (1999). D55, 1759-1764. Outliers are rejected on the basis of the assumption that a scaled log likelihood differnce 2(log[P(Fobs)]-log[P(Fmode)])/Q\" is distributed according to a Chi-square distribution (Q\" is equal to the second derivative of the log likelihood function of the mode of the distribution). The outlier threshold of the p-value relates to the p-value of the extreme value distribution of the chi-square distribution. """ flags.map_to_asu() ll_gain.map_to_asu() p_values.map_to_asu() assert flags.indices().all_eq(self.miller_obs.indices()) assert ll_gain.indices().all_eq(self.miller_obs.indices()) assert p_values.indices().all_eq(self.miller_obs.indices()) log_message = self.make_log_model(log_message, flags, ll_gain, p_values, obs_norm, calc_norm, sigmaa, plot_out) tmp_log = StringIO() print >> tmp_log, log_message # histogram of log likelihood gain values print >> tmp_log print >> tmp_log, "The histoghram of scaled (LL-gain) values is shown below." print >> tmp_log, " Note: scaled (LL-gain) is approximately Chi-square distributed." print >> tmp_log print >> tmp_log, " scaled(LL-gain) Frequency" histo = flex.histogram(ll_gain.data(), 15) histo.show(f=tmp_log, format_cutoffs="%7.3f") print >>self.out, tmp_log.getvalue() if not return_data: return flags else: assert flags.indices().all_eq(self.miller_obs.indices()) return self.miller_obs.select(flags.data())
def nth_power_scale(dataarray, nth_power): datascaled = flex.pow(flex.abs(dataarray), nth_power) return datascaled
def model_based_outliers(self, f_model, level=.01, return_data=False, plot_out=None): assert self.r_free_flags is not None if (self.r_free_flags.data().count(True) == 0): self.r_free_flags = self.r_free_flags.array( data=~self.r_free_flags.data()) sigmaa_estimator = sigmaa_estimation.sigmaa_estimator( miller_obs=self.miller_obs, miller_calc=f_model, r_free_flags=self.r_free_flags, kernel_width_free_reflections=200, n_sampling_points=20, n_chebyshev_terms=13) sigmaa_estimator.show(out=self.out) sigmaa = sigmaa_estimator.sigmaa() obs_norm = abs(sigmaa_estimator.normalized_obs) calc_norm = sigmaa_estimator.normalized_calc f_model_outlier_object = scaling.likelihood_ratio_outlier_test( f_obs=obs_norm.data(), sigma_obs=None, f_calc=calc_norm.data(), # the data is prenormalized, all epsies are unity epsilon=flex.double(calc_norm.data().size(), 1.0), centric=obs_norm.centric_flags().data(), alpha=sigmaa.data(), beta=1.0 - sigmaa.data() * sigmaa.data()) modes = f_model_outlier_object.posterior_mode() lik = f_model_outlier_object.log_likelihood() p_lik = f_model_outlier_object.posterior_mode_log_likelihood() s_der = f_model_outlier_object.posterior_mode_snd_der() ll_gain = f_model_outlier_object.standardized_likelihood() # The smallest vallue should be 0. # sometimes, due to numerical issues, it comes out # a wee bit negative. please repair that eps = 1.0e-10 zeros = flex.bool(ll_gain < eps) p_values = ll_gain p_values = p_values.set_selected(zeros, eps) p_values = erf(flex.sqrt(p_values / 2.0)) p_values = 1.0 - flex.pow(p_values, float(p_values.size())) # select on p-values flags = flex.bool(p_values > level) flags = self.miller_obs.customized_copy(data=flags) ll_gain = self.miller_obs.customized_copy(data=ll_gain) p_values = self.miller_obs.customized_copy(data=p_values) log_message = """ Model based outlier rejection. ------------------------------ Calculated amplitudes and estimated values of alpha and beta are used to compute the log-likelihood of the observed amplitude. The method is inspired by Read, Acta Cryst. (1999). D55, 1759-1764. Outliers are rejected on the basis of the assumption that a scaled log likelihood differnce 2(log[P(Fobs)]-log[P(Fmode)])/Q\" is distributed according to a Chi-square distribution (Q\" is equal to the second derivative of the log likelihood function of the mode of the distribution). The outlier threshold of the p-value relates to the p-value of the extreme value distribution of the chi-square distribution. """ flags.map_to_asu() ll_gain.map_to_asu() p_values.map_to_asu() assert flags.indices().all_eq(self.miller_obs.indices()) assert ll_gain.indices().all_eq(self.miller_obs.indices()) assert p_values.indices().all_eq(self.miller_obs.indices()) log_message = self.make_log_model(log_message, flags, ll_gain, p_values, obs_norm, calc_norm, sigmaa, plot_out) tmp_log = StringIO() print >> tmp_log, log_message # histogram of log likelihood gain values print >> tmp_log print >> tmp_log, "The histoghram of scaled (LL-gain) values is shown below." print >> tmp_log, " Note: scaled (LL-gain) is approximately Chi-square distributed." print >> tmp_log print >> tmp_log, " scaled(LL-gain) Frequency" histo = flex.histogram(ll_gain.data(), 15) histo.show(f=tmp_log, format_cutoffs='%7.3f') print >> self.out, tmp_log.getvalue() if not return_data: return flags else: assert flags.indices().all_eq(self.miller_obs.indices()) return self.miller_obs.select(flags.data())