def estimate_cc_sig_fac(self): # A1.1. Estimation of sigma(CC) as a function of sample size. binner = self.intensities.setup_binner_counting_sorted(reflections_per_bin=200) a = flex.double() b = flex.double() for i in range(binner.n_bins_all()): count = binner.counts()[i] if count == 0: continue bin_isel = binner.array_indices(i) p = flex.random_permutation(count) p = p[:2 * (count // 2)] # ensure even count a.extend(self.intensities.data().select(bin_isel.select(p[:count//2]))) b.extend(self.intensities.data().select(bin_isel.select(p[count//2:]))) perm = flex.random_selection(a.size(), min(20000, a.size())) a = a.select(perm) b = b.select(perm) self.corr_unrelated = CorrelationCoefficientAccumulator(a, b) n_pairs = a.size() min_num_groups = 10 # minimum number of groups max_n_group = int(min(n_pairs/min_num_groups, 200)) # maximum number in group min_n_group = int(min(5, max_n_group)) # minimum number in group mean_ccs = flex.double() rms_ccs = flex.double() ns = flex.double() for n in range(min_n_group, max_n_group): ns.append(n) ccs = flex.double() for i in range(200): isel = flex.random_selection(a.size(), n) corr = CorrelationCoefficientAccumulator(a.select(isel), b.select(isel)) ccs.append(corr.coefficient()) mean_ccs.append(flex.mean(ccs)) rms_ccs.append(flex.mean(flex.pow2(ccs))**0.5) x = 1/flex.pow(ns, 0.5) y = rms_ccs fit = flex.linear_regression(x, y) assert fit.is_well_defined() self.cc_sig_fac = fit.slope() if 0: from matplotlib import pyplot as plt plt.plot(x, y) plt.plot( plt.xlim(), [fit.slope() * x_ + fit.y_intercept() for x_ in plt.xlim()]) plt.show()
def sample_data(data, sample_size): """sample (without replacement) the data vectors to select the same sample_size rows from each.""" n = len(data[0]) rows = flex.random_selection(n, sample_size) cols = [e.select(rows) for e in data] return cols
def generate_reflections(self): from cctbx.sgtbx import space_group, space_group_symbols from dials.algorithms.spot_prediction import IndexGenerator, ray_intersection sequence_range = self.scan.get_oscillation_range(deg=False) resolution = 2.0 index_generator = IndexGenerator( self.crystal.get_unit_cell(), space_group(space_group_symbols(1).hall()).type(), resolution, ) indices = index_generator.to_array() # Predict rays within the sequence range ray_predictor = ScansRayPredictor(self.experiments, sequence_range) obs_refs = ray_predictor(indices) # Take only those rays that intersect the detector intersects = ray_intersection(self.detector, obs_refs) obs_refs = obs_refs.select(intersects) # Re-predict using the Experiments predictor for all these reflections. The # result is the same, but we gain also the flags and xyzcal.px columns obs_refs["id"] = flex.int(len(obs_refs), 0) obs_refs = self.ref_predictor(obs_refs) # Set 'observed' centroids from the predicted ones obs_refs["xyzobs.mm.value"] = obs_refs["xyzcal.mm"] # Invent some variances for the centroid positions of the simulated data im_width = 0.1 * pi / 180.0 px_size = self.detector[0].get_pixel_size() var_x = flex.double(len(obs_refs), (px_size[0] / 2.0)**2) var_y = flex.double(len(obs_refs), (px_size[1] / 2.0)**2) var_phi = flex.double(len(obs_refs), (im_width / 2.0)**2) obs_refs["xyzobs.mm.variance"] = flex.vec3_double( var_x, var_y, var_phi) # set the flex random seed to an 'uninteresting' number flex.set_random_seed(12407) # take 10 random reflections for speed reflections = obs_refs.select(flex.random_selection(len(obs_refs), 10)) # use a BlockCalculator to calculate the blocks per image from dials.algorithms.refinement.reflection_manager import BlockCalculator block_calculator = BlockCalculator(self.experiments, reflections) reflections = block_calculator.per_image() return reflections
def index(self, datablock, observed): ''' IOTA-SRS indexing. Goes through ntrials and indexes subsamples''' # index and refine if self.params.iota.method == 'random_sub_sampling': from scitbx.array_family import flex experiments_list = [] #No outlier rejection or refinement should be done for the candidate basis vectors self.known_crystal_models = None outlier_rejection_flag=self.params.indexing.stills.candidate_outlier_rejection refine_all_candidates_flag=self.params.indexing.stills.refine_all_candidates if self.params.iota.random_sub_sampling.no_outlier_rejection_and_candidates_refinement: self.params.indexing.stills.candidate_outlier_rejection=False self.params.indexing.stills.refine_all_candidates=False # Adding timeout option for IOTA initial_time = time.time() for trial in range(self.params.iota.random_sub_sampling.ntrials): curr_time = time.time() if self.params.iota.timeout_cutoff_sec is not None: if curr_time - initial_time > self.params.iota.timeout_cutoff_sec: raise IOTA_TimeoutError('IOTA_TIMEOUT ',curr_time-initial_time) flex.set_random_seed(trial+1001) observed_sample = observed.select(flex.random_selection(len(observed), int(len(observed)*self.params.iota.random_sub_sampling.fraction_sub_sample))) try: print('IOTA:SUM_INTENSITY_VALUE=%d',sum(observed_sample['intensity.sum.value']),' ', trial) experiments_tmp, indexed_tmp = self.index_with_iota(datablock, observed_sample) experiments_list.append(experiments_tmp) except Exception as e: print('Indexing failed for some reason') if self.params.iota.random_sub_sampling.consensus_function == 'unit_cell': #from IPython import embed; embed(); exit() from exafel_project.ADSE13_25.clustering.old_consensus_functions import get_uc_consensus as get_consensus #known_crystal_models = get_consensus(experiments_list, show_plot=self.params.iota.random_sub_sampling.show_plot, return_only_first_indexed_model = False) if len(experiments_list) > 0: known_crystal_models, clustered_experiments_list = get_consensus(experiments_list, show_plot=False, return_only_first_indexed_model=False, finalize_method=None, clustering_params=None) self.known_crystal_models = known_crystal_models print ('IOTA: Reindexing with best chosen crystal model') # Set back whatever PHIL parameter was supplied by user for outlier rejection and refinement self.params.indexing.stills.candidate_outlier_rejection=outlier_rejection_flag self.params.indexing.stills.refine_all_candidates=refine_all_candidates_flag # experiments, indexed = self.index_with_known_orientation(datablock, observed) return experiments,indexed else: experiments, indexed = self.index_with_iota(datablock, observed) return experiments,indexed
def generate_reflections(self): sweep_range = self.scan.get_oscillation_range(deg=False) resolution = 2.0 index_generator = IndexGenerator(self.crystal.get_unit_cell(), space_group(space_group_symbols(1).hall()).type(), resolution) indices = index_generator.to_array() # Predict rays within the sweep range ray_predictor = ScansRayPredictor(self.experiments, sweep_range) obs_refs = ray_predictor(indices) # Take only those rays that intersect the detector intersects = ray_intersection(self.detector, obs_refs) obs_refs = obs_refs.select(intersects) # Re-predict using the Experiments predictor for all these reflections. The # result is the same, but we gain also the flags and xyzcal.px columns obs_refs['id'] = flex.int(len(obs_refs), 0) obs_refs = self.ref_predictor(obs_refs) # Set 'observed' centroids from the predicted ones obs_refs['xyzobs.mm.value'] = obs_refs['xyzcal.mm'] # Invent some variances for the centroid positions of the simulated data im_width = 0.1 * pi / 180. px_size = self.detector[0].get_pixel_size() var_x = flex.double(len(obs_refs), (px_size[0] / 2.)**2) var_y = flex.double(len(obs_refs), (px_size[1] / 2.)**2) var_phi = flex.double(len(obs_refs), (im_width / 2.)**2) obs_refs['xyzobs.mm.variance'] = flex.vec3_double(var_x, var_y, var_phi) # set the flex random seed to an 'uninteresting' number flex.set_random_seed(12407) # take 5 random reflections for speed reflections = obs_refs.select(flex.random_selection(len(obs_refs), 5)) # use a BlockCalculator to calculate the blocks per image from dials.algorithms.refinement.reflection_manager import BlockCalculator block_calculator = BlockCalculator(self.experiments, reflections) reflections = block_calculator.per_image() return reflections
def _estimate_cc_sig_fac(self): """Estimation of sigma(CC) as a function of sample size. Estimate the error in the correlation coefficient, sigma(CC) by using pairs of reflections at similar resolutions that are not related by potential symmetry. Using pairs of unrelated reflections at similar resolutions, calculate sigma(CC) == rms(CC) for groups of size N = 3..200. The constant CCsigFac is obtained from a linear fit of sigma(CC) to 1/N^(1/2), i.e.: sigma(CC) = CCsigFac/N^(1/2) """ max_bins = 500 reflections_per_bin = max( 200, int(math.ceil(self.intensities.size() / max_bins))) binner = self.intensities.setup_binner_counting_sorted( reflections_per_bin=reflections_per_bin) a = flex.double() b = flex.double() ma_tmp = self.intensities.customized_copy( crystal_symmetry=crystal.symmetry( space_group=self.lattice_group, unit_cell=self.intensities.unit_cell(), assert_is_compatible_unit_cell=False, )).map_to_asu() for i in range(binner.n_bins_all()): count = binner.counts()[i] if count == 0: continue bin_isel = binner.array_indices(i) p = flex.random_permutation(count) p = p[:2 * (count // 2)] # ensure even count ma_a = ma_tmp.select(bin_isel.select(p[:count // 2])) ma_b = ma_tmp.select(bin_isel.select(p[count // 2:])) # only choose pairs of reflections that don't have the same indices # in the asu of the lattice group sel = ma_a.indices() != ma_b.indices() a.extend(ma_a.data().select(sel)) b.extend(ma_b.data().select(sel)) perm = flex.random_selection(a.size(), min(20000, a.size())) a = a.select(perm) b = b.select(perm) self.corr_unrelated = CorrelationCoefficientAccumulator(a, b) n_pairs = a.size() min_num_groups = 10 # minimum number of groups max_n_group = int(min(n_pairs / min_num_groups, 200)) # maximum number in group min_n_group = int(min(5, max_n_group)) # minimum number in group if (max_n_group - min_n_group) < 4: self.cc_sig_fac = 0 return mean_ccs = flex.double() rms_ccs = flex.double() ns = flex.double() for n in range(min_n_group, max_n_group + 1): ns.append(n) ccs = flex.double() for i in range(200): isel = flex.random_selection(a.size(), n) corr = CorrelationCoefficientAccumulator( a.select(isel), b.select(isel)) ccs.append(corr.coefficient()) mean_ccs.append(flex.mean(ccs)) rms_ccs.append(flex.mean(flex.pow2(ccs))**0.5) x = 1 / flex.pow(ns, 0.5) y = rms_ccs fit = flex.linear_regression(x, y) if fit.is_well_defined(): self.cc_sig_fac = fit.slope() else: self.cc_sig_fac = 0
def estimate_gain(imageset, kernel_size=(10,10), output_gain_map=None): detector = imageset.get_detector() from dials.algorithms.image.threshold import KabschDebug raw_data = imageset.get_raw_data(0) gain_value = 1 gain_map = [flex.double(raw_data[i].accessor(), gain_value) for i in range(len(detector))] mask = imageset.get_mask(0) min_local = 0 # dummy values, shouldn't affect results nsigma_b = 6 nsigma_s = 3 global_threshold = 0 kabsch_debug_list = [] for i_panel in range(len(detector)): kabsch_debug_list.append( KabschDebug( raw_data[i_panel].as_double(), mask[i_panel], gain_map[i_panel], kernel_size, nsigma_b, nsigma_s, global_threshold, min_local)) dispersion = flex.double() for kabsch in kabsch_debug_list: dispersion.extend(kabsch.coefficient_of_variation().as_1d()) sorted_dispersion = flex.sorted(dispersion) from libtbx.math_utils import nearest_integer as nint q1 = sorted_dispersion[nint(len(sorted_dispersion)/4)] q2 = sorted_dispersion[nint(len(sorted_dispersion)/2)] q3 = sorted_dispersion[nint(len(sorted_dispersion)*3/4)] iqr = q3-q1 print "q1, q2, q3: %.2f, %.2f, %.2f" %(q1, q2, q3) inlier_sel = (sorted_dispersion > (q1 - 1.5*iqr)) & (sorted_dispersion < (q3 + 1.5*iqr)) sorted_dispersion = sorted_dispersion.select(inlier_sel) gain = sorted_dispersion[nint(len(sorted_dispersion)/2)] print "Estimated gain: %.2f" % gain if output_gain_map: # write the gain map import cPickle as pickle gain_map = flex.double(flex.grid(raw_data[0].all()), gain) pickle.dump(gain_map, open(output_gain_map, "w"), protocol=pickle.HIGHEST_PROTOCOL) if 0: sel = flex.random_selection(population_size=len(sorted_dispersion), sample_size=10000) sorted_dispersion = sorted_dispersion.select(sel) from matplotlib import pyplot pyplot.scatter(range(len(sorted_dispersion)), sorted_dispersion) pyplot.ylim(0, 10) pyplot.show() return gain
def estimate_gain(imageset, kernel_size=(10, 10), output_gain_map=None, max_images=1): detector = imageset.get_detector() from dials.algorithms.image.threshold import DispersionThresholdDebug gains = flex.double() for image_no in xrange(len(imageset)): raw_data = imageset.get_raw_data(image_no) #from IPython import embed; embed() #this_data = raw_data[0] #raw_data = (this_data + 80), NSQ = 200 small_section = raw_data[0].matrix_copy_block(400, 400, NSQ, NSQ) print("This small section", len(small_section), "mean ist", flex.mean(small_section.as_double())) raw_data = (small_section, ) gain_value = 1 gain_map = [ flex.double(raw_data[i].accessor(), gain_value) for i in range(len(detector)) ] mask = imageset.get_mask(image_no) mask = (mask[0].matrix_copy_block(400, 400, NSQ, NSQ)), #from IPython import embed; embed() min_local = 0 # dummy values, shouldn't affect results nsigma_b = 6 nsigma_s = 3 global_threshold = 0 kabsch_debug_list = [] for i_panel in range(len(detector)): kabsch_debug_list.append( DispersionThresholdDebug(raw_data[i_panel].as_double(), mask[i_panel], gain_map[i_panel], kernel_size, nsigma_b, nsigma_s, global_threshold, min_local)) dispersion = flex.double() for ipix in range(5, NSQ - 15): for spix in range(5, NSQ - 15): data = small_section.matrix_copy_block(ipix, spix, 10, 10).as_double() datasq = data * data means = flex.mean(data) var = flex.mean(datasq) - (means)**2 #print(ipix,spix,var,var/means) dispersion.append(var / means) if True: dispersion = flex.double() for kabsch in kabsch_debug_list: a_section = kabsch.index_of_dispersion().matrix_copy_block( 5, 5, NSQ - 15, NSQ - 15) print("mean of a_section", flex.mean(a_section)) dispersion.extend(a_section.as_1d()) #ST = flex.mean_and_variance(dispersion) #from IPython import embed; embed() sorted_dispersion = flex.sorted(dispersion) from libtbx.math_utils import nearest_integer as nint q1 = sorted_dispersion[nint(len(sorted_dispersion) / 4)] q2 = sorted_dispersion[nint(len(sorted_dispersion) / 2)] q3 = sorted_dispersion[nint(len(sorted_dispersion) * 3 / 4)] iqr = q3 - q1 print("q1, q2, q3: %.2f, %.2f, %.2f" % (q1, q2, q3)) if iqr == 0.0: raise Sorry( 'Unable to robustly estimate the variation of pixel values.') inlier_sel = (sorted_dispersion > (q1 - 1.5 * iqr)) & (sorted_dispersion < (q3 + 1.5 * iqr)) sorted_dispersion = sorted_dispersion.select(inlier_sel) gain = sorted_dispersion[nint(len(sorted_dispersion) / 2)] print("Estimated gain: %.2f" % gain) gains.append(gain) if image_no == 0: gain0 = gain if image_no + 1 >= max_images: break if len(gains) > 1: stats = flex.mean_and_variance(gains) print("Average gain: %.2f +/- %.2f" % (stats.mean(), stats.unweighted_sample_standard_deviation())) if output_gain_map: if len(gains) > 1: raw_data = imageset.get_raw_data(0) # write the gain map import six.moves.cPickle as pickle gain_map = flex.double(flex.grid(raw_data[0].all()), gain0) with open(output_gain_map, "wb") as fh: pickle.dump(gain_map, fh, protocol=pickle.HIGHEST_PROTOCOL) if 0: sel = flex.random_selection(population_size=len(sorted_dispersion), sample_size=10000) sorted_dispersion = sorted_dispersion.select(sel) from matplotlib import pyplot pyplot.scatter(range(len(sorted_dispersion)), sorted_dispersion) pyplot.ylim(0, 10) pyplot.show() return gain0
def estimate_gain(imageset, kernel_size=(10, 10), output_gain_map=None, max_images=1): detector = imageset.get_detector() from dials.algorithms.image.threshold import DispersionThresholdDebug gains = flex.double() for image_no in xrange(len(imageset)): raw_data = imageset.get_raw_data(image_no) gain_value = 1 gain_map = [ flex.double(raw_data[i].accessor(), gain_value) for i in range(len(detector)) ] mask = imageset.get_mask(image_no) min_local = 0 # dummy values, shouldn't affect results nsigma_b = 6 nsigma_s = 3 global_threshold = 0 kabsch_debug_list = [] for i_panel in range(len(detector)): kabsch_debug_list.append( DispersionThresholdDebug(raw_data[i_panel].as_double(), mask[i_panel], gain_map[i_panel], kernel_size, nsigma_b, nsigma_s, global_threshold, min_local)) dispersion = flex.double() for kabsch in kabsch_debug_list: dispersion.extend(kabsch.index_of_dispersion().as_1d()) sorted_dispersion = flex.sorted(dispersion) from libtbx.math_utils import nearest_integer as nint q1 = sorted_dispersion[nint(len(sorted_dispersion) / 4)] q2 = sorted_dispersion[nint(len(sorted_dispersion) / 2)] q3 = sorted_dispersion[nint(len(sorted_dispersion) * 3 / 4)] iqr = q3 - q1 print "q1, q2, q3: %.2f, %.2f, %.2f" % (q1, q2, q3) if iqr == 0.0: raise Sorry( 'Unable to robustly estimate the variation of pixel values.') inlier_sel = (sorted_dispersion > (q1 - 1.5 * iqr)) & (sorted_dispersion < (q3 + 1.5 * iqr)) sorted_dispersion = sorted_dispersion.select(inlier_sel) gain = sorted_dispersion[nint(len(sorted_dispersion) / 2)] print "Estimated gain: %.2f" % gain gains.append(gain) if image_no == 0: gain0 = gain if image_no + 1 >= max_images: break if len(gains) > 1: stats = flex.mean_and_variance(gains) print "Average gain: %.2f +/- %.2f" % ( stats.mean(), stats.unweighted_sample_standard_deviation()) if output_gain_map: if len(gains) > 1: raw_data = imageset.get_raw_data(0) # write the gain map import cPickle as pickle gain_map = flex.double(flex.grid(raw_data[0].all()), gain0) pickle.dump(gain_map, open(output_gain_map, "w"), protocol=pickle.HIGHEST_PROTOCOL) if 0: sel = flex.random_selection(population_size=len(sorted_dispersion), sample_size=10000) sorted_dispersion = sorted_dispersion.select(sel) from matplotlib import pyplot pyplot.scatter(range(len(sorted_dispersion)), sorted_dispersion) pyplot.ylim(0, 10) pyplot.show() return gain0
# Set 'observed' centroids from the predicted ones obs_refs['xyzobs.mm.value'] = obs_refs['xyzcal.mm'] # Invent some variances for the centroid positions of the simulated data im_width = 0.1 * pi / 180. px_size = mydetector[0].get_pixel_size() var_x = flex.double(len(obs_refs), (px_size[0] / 2.)**2) var_y = flex.double(len(obs_refs), (px_size[1] / 2.)**2) var_phi = flex.double(len(obs_refs), (im_width / 2.)**2) obs_refs['xyzobs.mm.variance'] = flex.vec3_double(var_x, var_y, var_phi) # set the flex random seed to an 'uninteresting' number flex.set_random_seed(12407) # take 5 random reflections for speed reflections = obs_refs.select(flex.random_selection(len(obs_refs), 5)) # use a BlockCalculator to calculate the blocks per image from dials.algorithms.refinement.reflection_manager import BlockCalculator block_calculator = BlockCalculator(experiments, reflections) reflections = block_calculator.per_image() # use a ReflectionManager to exclude reflections too close to the spindle, # plus set the frame numbers from dials.algorithms.refinement.reflection_manager import ReflectionManager refman = ReflectionManager(reflections, experiments, outlier_detector=None) # make a target to ensure reflections are predicted and refman is finalised from dials.algorithms.refinement.target import \ LeastSquaresPositionalResidualWithRmsdCutoff
def estimate_gain(imageset, kernel_size=(10, 10), output_gain_map=None): detector = imageset.get_detector() from dials.algorithms.image.threshold import KabschDebug raw_data = imageset.get_raw_data(0) gain_value = 1 gain_map = [ flex.double(raw_data[i].accessor(), gain_value) for i in range(len(detector)) ] mask = imageset.get_mask(0) min_local = 0 # dummy values, shouldn't affect results nsigma_b = 6 nsigma_s = 3 global_threshold = 0 kabsch_debug_list = [] for i_panel in range(len(detector)): kabsch_debug_list.append( KabschDebug(raw_data[i_panel].as_double(), mask[i_panel], gain_map[i_panel], kernel_size, nsigma_b, nsigma_s, global_threshold, min_local)) dispersion = flex.double() for kabsch in kabsch_debug_list: dispersion.extend(kabsch.coefficient_of_variation().as_1d()) sorted_dispersion = flex.sorted(dispersion) from libtbx.math_utils import nearest_integer as nint q1 = sorted_dispersion[nint(len(sorted_dispersion) / 4)] q2 = sorted_dispersion[nint(len(sorted_dispersion) / 2)] q3 = sorted_dispersion[nint(len(sorted_dispersion) * 3 / 4)] iqr = q3 - q1 print "q1, q2, q3: %.2f, %.2f, %.2f" % (q1, q2, q3) inlier_sel = (sorted_dispersion > (q1 - 1.5 * iqr)) & (sorted_dispersion < (q3 + 1.5 * iqr)) sorted_dispersion = sorted_dispersion.select(inlier_sel) gain = sorted_dispersion[nint(len(sorted_dispersion) / 2)] print "Estimated gain: %.2f" % gain if output_gain_map: # write the gain map import cPickle as pickle gain_map = flex.double(flex.grid(raw_data[0].all()), gain) pickle.dump(gain_map, open(output_gain_map, "w"), protocol=pickle.HIGHEST_PROTOCOL) if 0: sel = flex.random_selection(population_size=len(sorted_dispersion), sample_size=10000) sorted_dispersion = sorted_dispersion.select(sel) from matplotlib import pyplot pyplot.scatter(range(len(sorted_dispersion)), sorted_dispersion) pyplot.ylim(0, 10) pyplot.show() return gain