def five_number_summary(data): """ Returns the Tukey five number summary (min, lower hinge, median, upper hinge, max) for a sequence of observations. This function gives the same results as R's fivenum function. """ try: sorts = flex.sorted(data) except AttributeError: sorts = sorted(data) n = len(sorts) if n % 2: med = sorts[n // 2] lower = sorts[:((n // 2) + 1)] upper = sorts[(n // 2):] else: med = (sorts[n // 2] + sorts[n // 2 - 1]) / 2 lower = sorts[:(n // 2)] upper = sorts[(n // 2):] n = len(lower) if n % 2: lhinge = lower[n // 2] uhinge = upper[n // 2] else: lhinge = (lower[n // 2] + lower[n // 2 - 1]) / 2 uhinge = (upper[n // 2] + upper[n // 2 - 1]) / 2 return sorts[0], lhinge, med, uhinge, sorts[-1]
def five_number_summary(data): """ Returns the Tukey five number summary (min, lower hinge, median, upper hinge, max) for a sequence of observations. This function gives the same results as R's fivenum function. """ try: sorts = flex.sorted(data) except AttributeError: sorts = sorted(data) n = len(sorts) if n % 2: med = sorts[n // 2] lower = sorts[:((n // 2) + 1)] upper = sorts[(n // 2):] else: med = (sorts[n//2] + sorts[n//2 - 1]) / 2 lower = sorts[:(n // 2)] upper = sorts[(n // 2):] n = len(lower) if n % 2: lhinge = lower[n // 2] uhinge = upper[n // 2] else: lhinge = (lower[n//2] + lower[n//2 - 1]) / 2 uhinge = (upper[n//2] + upper[n//2 - 1]) / 2 return sorts[0], lhinge, med, uhinge, sorts[-1]
def join_selections (sel1, sel2) : intersections = sel1.intersection_i_seqs(sel2) unique_sel = flex.bool(len(sel1), True) unique_sel.set_selected(intersections[0], False) sel1 = sel1.select(unique_sel) sel1.extend(sel2) return flex.sorted(sel1)
def sieve_fit (sites_fixed, sites_moving, selection=None, frac_discard=0.5) : """ Reference: Chothia & Lesk??? """ assert (sites_fixed.size() == sites_moving.size() > 0) if (selection is None) : selection = flex.bool(sites_fixed.size(), True) # step 1: superpose using originally selected atoms sites_fixed_aln = sites_fixed.select(selection) sites_moving_aln = sites_moving.select(selection) lsq_fit_obj = least_squares_fit( reference_sites=sites_fixed_aln, other_sites=sites_moving_aln) sites_moving_new = lsq_fit_obj.other_sites_best_fit() # step 2: discard 50% of sites that deviate the most, and superpose again deltas = (sites_fixed_aln - sites_moving_new).norms() deltas_sorted = flex.sorted(deltas) cutoff = deltas_sorted[int((1-frac_discard)*deltas.size())] selection = (deltas > cutoff) if (selection.count(True) == 0) : return lsq_fit_obj sites_fixed_aln = sites_fixed_aln.select(selection) sites_moving_aln = sites_moving_aln.select(selection) lsq_fit_obj = least_squares_fit( reference_sites=sites_fixed_aln, other_sites=sites_moving_aln) return lsq_fit_obj
def normal_probability_plot(self, data, rankits_sel=None, plot=False): """ Use normal probability analysis to determine if a set of data is normally distributed See https://en.wikipedia.org/wiki/Normal_probability_plot. Rankits are computed in the same way as qqnorm does in R. @param data flex array @param rankits_sel only use the rankits in a certain range. Useful for outlier rejection. Should be a tuple such as (-0.5,0.5). @param plot whether to show the normal probabilty plot """ from scitbx.math import distributions import numpy as np norm = distributions.normal_distribution() n = len(data) if n <= 10: a = 3/8 else: a = 0.5 sorted_data = flex.sorted(data) rankits = flex.double([norm.quantile((i+1-a)/(n+1-(2*a))) for i in range(n)]) if rankits_sel is None: corr, slope, offset = self.get_overall_correlation_flex(sorted_data, rankits) else: sel = (rankits >= rankits_sel[0]) & (rankits <= rankits_sel[1]) corr, slope, offset = self.get_overall_correlation_flex(sorted_data.select(sel), rankits.select(sel)) if plot: from matplotlib import pyplot as plt f = plt.figure(0) lim = -5, 5 x = np.linspace(lim[0],lim[1],100) # 100 linearly spaced numbers y = slope * x + offset plt.plot(sorted_data, rankits, '-') #plt.plot(x,y) plt.title("CC: %.3f Slope: %.3f Offset: %.3f"%(corr, slope, offset)) plt.xlabel("Sorted data") plt.ylabel("Rankits") plt.xlim(lim); plt.ylim(lim) plt.axes().set_aspect('equal') f = plt.figure(1) h = flex.histogram(sorted_data, n_slots=100, data_min = lim[0], data_max = lim[1]) stats = flex.mean_and_variance(sorted_data) plt.plot(h.slot_centers().as_numpy_array(), h.slots().as_numpy_array(), '-') plt.xlim(lim) plt.xlabel("Sorted data") plt.ylabel("Count") plt.title("Normalized data mean: %.3f +/- %.3f"%(stats.mean(), stats.unweighted_sample_standard_deviation())) if self.scaler.params.raw_data.error_models.sdfac_refine.plot_refinement_steps: plt.ion() plt.pause(0.05) return corr, slope, offset
def npp(values, input_mean_variance): import math from scitbx.math import distributions from scitbx.array_family import flex distribution = distributions.normal_distribution() values = flex.sorted(values) mean, variance = input_mean_variance scaled = (values - mean) / math.sqrt(variance) expected = distribution.quantiles(values.size()) return expected, scaled
def npp_ify(values, input_mean_variance=None): '''Analyse data in values (assumed to be drawn from one population) and return the sorted list of (expected, observed) deviation from the mean.''' distribution = distributions.normal_distribution() values = flex.sorted(values) if input_mean_variance: mean, variance = input_mean_variance else: mean, variance = mean_variance(values) scaled = (values - mean) / math.sqrt(variance) expected = distribution.quantiles(values.size()) return expected, scaled
def ncs_group_iselection(ncs_restraints_group_list, group_num): """ Collects and returns iselection of all related atoms in NCS group Args: ncs_restraints_group_list : list of ncs restraints group objects group_num (int): the group number in the list (first group is 0) Returns: isel (flex.size_t): complete NCS group selection """ # check that the number of the NCS group is valid if group_num >= len(ncs_restraints_group_list): return flex.size_t() gr = ncs_restraints_group_list[group_num] isel = gr.master_iselection for cp in gr.copies: isel.extend(cp.iselection) # make sure sequential order of selection indices return flex.sorted(isel)
def ncs_group_iselection(ncs_restraints_group_list,group_num): """ Collects and returns iselection of all related atoms in NCS group Args: ncs_restraints_group_list : list of ncs restraints group objects group_num (int): the group number in the list (first group is 0) Returns: isel (flex.size_t): complete NCS group selection """ # check that the number of the NCS group is valid if group_num >= len(ncs_restraints_group_list): return flex.size_t() gr = ncs_restraints_group_list[group_num] isel = gr.master_iselection for cp in gr.copies: isel.extend(cp.iselection) # make sure sequential order of selection indices return flex.sorted(isel)
def whole_group_iselection(self): isel = self.master_iselection.deep_copy() for cp in self.copies: isel.extend(cp.iselection) # make sure sequential order of selection indices return flex.sorted(isel)
class SingleImage(object): def __init__(self, img, init, verbose=True, imported_grid=None): """ Constructor for the SingleImage object using a raw image file or pickle """ # Initialize parameters self.params = init.params self.args = init.args self.raw_img = img[2] self.conv_img = img[2] self.img_index = img[0] self.status = None self.fail = None self.final = None self.log_info = [] self.gs_results = [] self.main_log = init.logfile self.verbose = verbose self.hmed = self.params.cctbx.grid_search.height_median self.amed = self.params.cctbx.grid_search.area_median self.input_base = init.input_base self.conv_base = init.conv_base self.int_base = init.int_base self.obj_base = init.obj_base self.fin_base = init.fin_base self.viz_base = init.viz_base self.tmp_base = init.tmp_base self.abort_file = os.path.join(self.int_base, '.abort.tmp') self.obj_path = None self.obj_file = None self.fin_path = None self.fin_file = None self.viz_path = None # ============================== SELECTION-ONLY FUNCTIONS ============================== # def import_int_file(self, init): """ Replaces path settings in imported image object with new settings NEED TO RE-DO LATER """ if os.path.isfile(self.abort_file): self.fail = 'aborted' return self # Generate paths to output files self.params = init.params self.main_log = init.logfile self.input_base = init.input_base self.conv_base = init.conv_base self.int_base = init.int_base self.obj_base = init.obj_base self.fin_base = init.fin_base self.viz_base = init.viz_base self.obj_path = misc.make_image_path(self.conv_img, self.input_base, self.obj_base) self.obj_file = os.path.abspath( os.path.join( self.obj_path, os.path.basename(self.conv_img).split('.')[0] + ".int")) self.fin_path = misc.make_image_path(self.conv_img, self.input_base, self.fin_base) self.fin_file = os.path.abspath( os.path.join( self.fin_path, os.path.basename(self.conv_img).split('.')[0] + "_int.pickle")) self.final['final'] = self.fin_file self.final['img'] = self.conv_img self.viz_path = misc.make_image_path(self.conv_img, self.input_base, self.viz_base) self.viz_file = os.path.join( self.viz_path, os.path.basename(self.conv_img).split('.')[0] + "_int.png") # Create actual folders (if necessary) try: if not os.path.isdir(self.obj_path): os.makedirs(self.obj_path) if not os.path.isdir(self.fin_path): os.makedirs(self.fin_path) if not os.path.isdir(self.viz_path): os.makedirs(self.viz_path) except OSError: pass # Grid search / integration log file self.int_log = os.path.join( self.fin_path, os.path.basename(self.conv_img).split('.')[0] + '.tmp') # Reset status to 'grid search' to pick up at selection (if no fail) if self.fail == None: self.status = 'bypass grid search' return self def determine_gs_result_file(self): """ For 'selection-only' cctbx.xfel runs, determine where the image objects are """ if self.params.cctbx.selection.select_only.grid_search_path != None: obj_path = os.path.abspath( self.params.cctbx.selection.select_only.grid_search_path) else: run_number = int(os.path.basename(self.int_base)) - 1 obj_path = "{}/integration/{:03d}/image_objects"\ "".format(os.path.abspath(os.curdir), run_number) gs_result_file = os.path.join(obj_path, os.path.basename(self.obj_file)) return gs_result_file # =============================== IMAGE IMPORT FUNCTIONS =============================== # def load_image(self): """ Reads raw image file and extracts data for conversion into pickle format. Also estimates gain if turned on.""" # Load raw image or image pickle try: with misc.Capturing() as junk_output: loaded_img = dxtbx.load(self.raw_img) except IOError, e: loaded_img = None pass # Extract image information if loaded_img is not None: raw_data = loaded_img.get_raw_data() detector = loaded_img.get_detector()[0] beam = loaded_img.get_beam() scan = loaded_img.get_scan() distance = detector.get_distance() pixel_size = detector.get_pixel_size()[0] overload = detector.get_trusted_range()[1] wavelength = beam.get_wavelength() beam_x = detector.get_beam_centre(beam.get_s0())[0] beam_y = detector.get_beam_centre(beam.get_s0())[1] if scan is None: timestamp = None img_type = 'pickle' else: img_type = 'raw' msec, sec = math.modf(scan.get_epochs()[0]) timestamp = evt_timestamp((sec, msec)) # Assemble datapack data = dpack(data=raw_data, distance=distance, pixel_size=pixel_size, wavelength=wavelength, beam_center_x=beam_x, beam_center_y=beam_y, ccd_image_saturation=overload, saturated_value=overload, timestamp=timestamp) if scan is not None: osc_start, osc_range = scan.get_oscillation() if osc_start != osc_range: data['OSC_START'] = 0 #osc_start data['OSC_RANGE'] = 0 #osc_start data['TIME'] = scan.get_exposure_times()[0] else: data = None img_type = 'not imported' # Estimate gain (or set gain to 1.00 if cannot calculate) # Cribbed from estimate_gain.py by Richard Gildea if self.params.advanced.estimate_gain: try: from dials.algorithms.image.threshold import KabschDebug raw_data = [raw_data] gain_value = 1 kernel_size = (10, 10) gain_map = [ flex.double(raw_data[i].accessor(), gain_value) for i in range(len(loaded_img.get_detector())) ] mask = loaded_img.get_mask() min_local = 0 # dummy values, shouldn't affect results: REPLACE WITH SETTINGS! nsigma_b = 6 nsigma_s = 3 global_threshold = 0 kabsch_debug_list = [] for i_panel in range(len(loaded_img.get_detector())): kabsch_debug_list.append( KabschDebug(raw_data[i_panel].as_double(), mask[i_panel], gain_map[i_panel], kernel_size, nsigma_b, nsigma_s, global_threshold, min_local)) dispersion = flex.double() for kabsch in kabsch_debug_list: dispersion.extend( kabsch.coefficient_of_variation().as_1d()) sorted_dispersion = flex.sorted(dispersion) from libtbx.math_utils import nearest_integer as nint q1 = sorted_dispersion[nint(len(sorted_dispersion) / 4)] q2 = sorted_dispersion[nint(len(sorted_dispersion) / 2)] q3 = sorted_dispersion[nint(len(sorted_dispersion) * 3 / 4)] iqr = q3 - q1 inlier_sel = (sorted_dispersion > (q1 - 1.5 * iqr)) & (sorted_dispersion < (q3 + 1.5 * iqr)) sorted_dispersion = sorted_dispersion.select(inlier_sel) self.gain = sorted_dispersion[nint(len(sorted_dispersion) / 2)] except IndexError: self.gain = 1.0 else: self.gain = 1.0 return data, img_type
def del_anom_normal_plot(intensities, strong_cutoff=0.0): """Make a normal probability plot of the normalised anomalous differences.""" diff_array = intensities.anomalous_differences() if not diff_array.data().size(): return {} delta = diff_array.data() / diff_array.sigmas() norm = distributions.normal_distribution() n = len(delta) if n <= 10: a = 3 / 8 else: a = 0.5 y = flex.sorted(delta) x = [norm.quantile((i + 1 - a) / (n + 1 - (2 * a))) for i in range(n)] H, xedges, yedges = np.histogram2d(np.array(x), y.as_numpy_array(), bins=(200, 200)) nonzeros = np.nonzero(H) z = np.empty(H.shape) z[:] = np.NAN z[nonzeros] = H[nonzeros] # also make a histogram histy = flex.histogram(y, n_slots=100) # make a gaussian for reference also n = y.size() width = histy.slot_centers()[1] - histy.slot_centers()[0] gaussian = [] from math import exp, pi for x in histy.slot_centers(): gaussian.append(n * width * exp(-(x**2) / 2.0) / ((2.0 * pi)**0.5)) title = "Normal probability plot of anomalous differences" plotname = "normal_distribution_plot" if strong_cutoff > 0.0: title += " (d > %.2f)" % strong_cutoff plotname += "_lowres" else: title += " (all data)" plotname += "_highres" return { plotname: { "data": [ { "x": xedges.tolist(), "y": yedges.tolist(), "z": z.transpose().tolist(), "type": "heatmap", "name": "normalised deviations", "colorbar": { "title": "Number of reflections", "titleside": "right", }, "colorscale": "Jet", }, { "x": [-5, 5], "y": [-5, 5], "type": "scatter", "mode": "lines", "name": "z = m", "color": "rgb(0,0,0)", }, ], "layout": { "title": title, "xaxis": { "anchor": "y", "title": "expected delta", "range": [-4, 4], }, "yaxis": { "anchor": "x", "title": "observed delta", "range": [-5, 5], }, }, "help": """\ This plot shows the normalised anomalous differences, sorted in order and plotted against the expected order based on a normal distribution model. A true normal distribution of deviations would give the straight line indicated. [1] P. L. Howell and G. D. Smith, J. Appl. Cryst. (1992). 25, 81-86 https://doi.org/10.1107/S0021889891010385 [2] P. Evans, Acta Cryst. (2006). D62, 72-82 https://doi.org/10.1107/S0907444905036693 """, } }
def estimate_gain(imageset, kernel_size=(10, 10), output_gain_map=None, max_images=1): detector = imageset.get_detector() from dials.algorithms.image.threshold import DispersionThresholdDebug gains = flex.double() for image_no in range(len(imageset)): raw_data = imageset.get_raw_data(image_no) gain_value = 1 gain_map = [ flex.double(raw_data[i].accessor(), gain_value) for i in range(len(detector)) ] mask = imageset.get_mask(image_no) min_local = 0 # dummy values, shouldn't affect results nsigma_b = 6 nsigma_s = 3 global_threshold = 0 kabsch_debug_list = [ DispersionThresholdDebug( raw_data[i_panel].as_double(), mask[i_panel], gain_map[i_panel], kernel_size, nsigma_b, nsigma_s, global_threshold, min_local, ) for i_panel in range(len(detector)) ] dispersion = flex.double() for kabsch in kabsch_debug_list: dispersion.extend(kabsch.index_of_dispersion().as_1d()) sorted_dispersion = flex.sorted(dispersion) from libtbx.math_utils import nearest_integer as nint q1 = sorted_dispersion[nint(len(sorted_dispersion) / 4)] q2 = sorted_dispersion[nint(len(sorted_dispersion) / 2)] q3 = sorted_dispersion[nint(len(sorted_dispersion) * 3 / 4)] iqr = q3 - q1 print(f"q1, q2, q3: {q1:.2f}, {q2:.2f}, {q3:.2f}") if iqr == 0.0: raise Sorry( "Unable to robustly estimate the variation of pixel values.") inlier_sel = (sorted_dispersion > (q1 - 1.5 * iqr)) & (sorted_dispersion < (q3 + 1.5 * iqr)) sorted_dispersion = sorted_dispersion.select(inlier_sel) gain = sorted_dispersion[nint(len(sorted_dispersion) / 2)] print(f"Estimated gain: {gain:.2f}") gains.append(gain) if image_no == 0: gain0 = gain if image_no + 1 >= max_images: break if len(gains) > 1: stats = flex.mean_and_variance(gains) print("Average gain: %.2f +/- %.2f" % (stats.mean(), stats.unweighted_sample_standard_deviation())) if output_gain_map: if len(gains) > 1: raw_data = imageset.get_raw_data(0) # write the gain map gain_map = flex.double(flex.grid(raw_data[0].all()), gain0) with open(output_gain_map, "wb") as fh: pickle.dump(gain_map, fh, protocol=pickle.HIGHEST_PROTOCOL) return gain0
def estimate_gain(imageset, kernel_size=(10, 10), output_gain_map=None, max_images=1): detector = imageset.get_detector() from dials.algorithms.image.threshold import DispersionThresholdDebug gains = flex.double() for image_no in xrange(len(imageset)): raw_data = imageset.get_raw_data(image_no) #from IPython import embed; embed() #this_data = raw_data[0] #raw_data = (this_data + 80), NSQ = 200 small_section = raw_data[0].matrix_copy_block(400, 400, NSQ, NSQ) print("This small section", len(small_section), "mean ist", flex.mean(small_section.as_double())) raw_data = (small_section, ) gain_value = 1 gain_map = [ flex.double(raw_data[i].accessor(), gain_value) for i in range(len(detector)) ] mask = imageset.get_mask(image_no) mask = (mask[0].matrix_copy_block(400, 400, NSQ, NSQ)), #from IPython import embed; embed() min_local = 0 # dummy values, shouldn't affect results nsigma_b = 6 nsigma_s = 3 global_threshold = 0 kabsch_debug_list = [] for i_panel in range(len(detector)): kabsch_debug_list.append( DispersionThresholdDebug(raw_data[i_panel].as_double(), mask[i_panel], gain_map[i_panel], kernel_size, nsigma_b, nsigma_s, global_threshold, min_local)) dispersion = flex.double() for ipix in range(5, NSQ - 15): for spix in range(5, NSQ - 15): data = small_section.matrix_copy_block(ipix, spix, 10, 10).as_double() datasq = data * data means = flex.mean(data) var = flex.mean(datasq) - (means)**2 #print(ipix,spix,var,var/means) dispersion.append(var / means) if True: dispersion = flex.double() for kabsch in kabsch_debug_list: a_section = kabsch.index_of_dispersion().matrix_copy_block( 5, 5, NSQ - 15, NSQ - 15) print("mean of a_section", flex.mean(a_section)) dispersion.extend(a_section.as_1d()) #ST = flex.mean_and_variance(dispersion) #from IPython import embed; embed() sorted_dispersion = flex.sorted(dispersion) from libtbx.math_utils import nearest_integer as nint q1 = sorted_dispersion[nint(len(sorted_dispersion) / 4)] q2 = sorted_dispersion[nint(len(sorted_dispersion) / 2)] q3 = sorted_dispersion[nint(len(sorted_dispersion) * 3 / 4)] iqr = q3 - q1 print("q1, q2, q3: %.2f, %.2f, %.2f" % (q1, q2, q3)) if iqr == 0.0: raise Sorry( 'Unable to robustly estimate the variation of pixel values.') inlier_sel = (sorted_dispersion > (q1 - 1.5 * iqr)) & (sorted_dispersion < (q3 + 1.5 * iqr)) sorted_dispersion = sorted_dispersion.select(inlier_sel) gain = sorted_dispersion[nint(len(sorted_dispersion) / 2)] print("Estimated gain: %.2f" % gain) gains.append(gain) if image_no == 0: gain0 = gain if image_no + 1 >= max_images: break if len(gains) > 1: stats = flex.mean_and_variance(gains) print("Average gain: %.2f +/- %.2f" % (stats.mean(), stats.unweighted_sample_standard_deviation())) if output_gain_map: if len(gains) > 1: raw_data = imageset.get_raw_data(0) # write the gain map import six.moves.cPickle as pickle gain_map = flex.double(flex.grid(raw_data[0].all()), gain0) with open(output_gain_map, "wb") as fh: pickle.dump(gain_map, fh, protocol=pickle.HIGHEST_PROTOCOL) if 0: sel = flex.random_selection(population_size=len(sorted_dispersion), sample_size=10000) sorted_dispersion = sorted_dispersion.select(sel) from matplotlib import pyplot pyplot.scatter(range(len(sorted_dispersion)), sorted_dispersion) pyplot.ylim(0, 10) pyplot.show() return gain0
def estimate_gain(imageset, kernel_size=(10,10), output_gain_map=None): detector = imageset.get_detector() from dials.algorithms.image.threshold import KabschDebug raw_data = imageset.get_raw_data(0) gain_value = 1 gain_map = [flex.double(raw_data[i].accessor(), gain_value) for i in range(len(detector))] mask = imageset.get_mask(0) min_local = 0 # dummy values, shouldn't affect results nsigma_b = 6 nsigma_s = 3 global_threshold = 0 kabsch_debug_list = [] for i_panel in range(len(detector)): kabsch_debug_list.append( KabschDebug( raw_data[i_panel].as_double(), mask[i_panel], gain_map[i_panel], kernel_size, nsigma_b, nsigma_s, global_threshold, min_local)) dispersion = flex.double() for kabsch in kabsch_debug_list: dispersion.extend(kabsch.coefficient_of_variation().as_1d()) sorted_dispersion = flex.sorted(dispersion) from libtbx.math_utils import nearest_integer as nint q1 = sorted_dispersion[nint(len(sorted_dispersion)/4)] q2 = sorted_dispersion[nint(len(sorted_dispersion)/2)] q3 = sorted_dispersion[nint(len(sorted_dispersion)*3/4)] iqr = q3-q1 print "q1, q2, q3: %.2f, %.2f, %.2f" %(q1, q2, q3) inlier_sel = (sorted_dispersion > (q1 - 1.5*iqr)) & (sorted_dispersion < (q3 + 1.5*iqr)) sorted_dispersion = sorted_dispersion.select(inlier_sel) gain = sorted_dispersion[nint(len(sorted_dispersion)/2)] print "Estimated gain: %.2f" % gain if output_gain_map: # write the gain map import cPickle as pickle gain_map = flex.double(flex.grid(raw_data[0].all()), gain) pickle.dump(gain_map, open(output_gain_map, "w"), protocol=pickle.HIGHEST_PROTOCOL) if 0: sel = flex.random_selection(population_size=len(sorted_dispersion), sample_size=10000) sorted_dispersion = sorted_dispersion.select(sel) from matplotlib import pyplot pyplot.scatter(range(len(sorted_dispersion)), sorted_dispersion) pyplot.ylim(0, 10) pyplot.show() return gain
def __call__(self): from iotbx.detectors.cspad_detector_formats import reverse_timestamp from xfel.ui.components.timeit import duration #import time #t1 = time.time() run_numbers = [r.run for r in self.trial.runs] assert self.run.run in run_numbers rungroup_ids = [rg.id for rg in self.trial.rungroups] assert self.rungroup.id in rungroup_ids if len(self.trial.isoforms) > 0: cells = [isoform.cell for isoform in self.trial.isoforms] else: cells = self.app.get_trial_cells(self.trial.id, self.rungroup.id, self.run.id) high_res_bin_ids = [] for cell in cells: bins = cell.bins d_mins = [float(b.d_min) for b in bins] if len(d_mins) == 0: continue if self.d_min is None: min_bin_index = d_mins.index(min(d_mins)) else: d_maxes = [float(b.d_max) for b in bins] qualified_bin_indices = [ i for i in range(len(bins)) if d_maxes[i] >= self.d_min and d_mins[i] <= self.d_min ] if len(qualified_bin_indices) == 0: continue min_bin_index = qualified_bin_indices[0] high_res_bin_ids.append(str(bins[min_bin_index].id)) resolutions = flex.double() two_theta_low = flex.double() two_theta_high = flex.double() tag = self.app.params.experiment_tag timestamps, timestamps_s = flex.double(), [] n_strong = flex.int() n_lattices = flex.int() if len(high_res_bin_ids) > 0: # Get the stats in one query. query = """SELECT event.timestamp, event.n_strong, MIN(bin.d_min), event.two_theta_low, event.two_theta_high, COUNT(DISTINCT crystal.id) FROM `%s_event` event JOIN `%s_imageset_event` is_e ON is_e.event_id = event.id JOIN `%s_imageset` imgset ON imgset.id = is_e.imageset_id JOIN `%s_experiment` exp ON exp.imageset_id = imgset.id JOIN `%s_crystal` crystal ON crystal.id = exp.crystal_id JOIN `%s_cell` cell ON cell.id = crystal.cell_id JOIN `%s_bin` bin ON bin.cell_id = cell.id JOIN `%s_cell_bin` cb ON cb.bin_id = bin.id AND cb.crystal_id = crystal.id WHERE event.trial_id = %d AND event.run_id = %d AND event.rungroup_id = %d AND cb.avg_i_sigi >= %f GROUP BY event.id """ % (tag, tag, tag, tag, tag, tag, tag, tag, self.trial.id, self.run.id, self.rungroup.id, self.i_sigi_cutoff) cursor = self.app.execute_query(query) sample = -1 for row in cursor.fetchall(): sample += 1 if sample % self.sampling != 0: continue ts, n_s, d_min, tt_low, tt_high, n_xtal = row try: d_min = float(d_min) except ValueError: d_min = None try: rts = reverse_timestamp(ts) timestamps.append(rts[0] + (rts[1] / 1000)) except ValueError: try: timestamps.append(float(ts)) except ValueError: timestamps_s.append(ts) n_strong.append(n_s) two_theta_low.append(tt_low or -1) two_theta_high.append(tt_high or -1) resolutions.append(d_min or 0) n_lattices.append(n_xtal or 0) # only get results that are strings or ints, not a mix of both assert not (len(timestamps) > 0 and len(timestamps_s) > 0) # This left join query finds the events with no imageset, meaning they failed to index query = """SELECT event.timestamp, event.n_strong, event.two_theta_low, event.two_theta_high FROM `%s_event` event LEFT JOIN `%s_imageset_event` is_e ON is_e.event_id = event.id WHERE is_e.event_id IS NULL AND event.trial_id = %d AND event.run_id = %d AND event.rungroup_id = %d """ % (tag, tag, self.trial.id, self.run.id, self.rungroup.id) cursor = self.app.execute_query(query) for row in cursor.fetchall(): ts, n_s, tt_low, tt_high = row try: rts = reverse_timestamp(ts) timestamps.append(rts[0] + (rts[1] / 1000)) except ValueError: try: rts = float(ts) timestamps.append(rts) except ValueError: timestamps_s.append(ts) n_strong.append(n_s) two_theta_low.append(tt_low or -1) two_theta_high.append(tt_high or -1) resolutions.append(0) n_lattices.append(0) if len(timestamps_s) > 0: timestamps = flex.double([ i[0] for i in sorted(enumerate(timestamps_s), key=lambda x: x[1]) ]) order = flex.size_t([i for i in timestamps.iround()]) timestamps = flex.sorted(timestamps) else: order = flex.sort_permutation(timestamps) timestamps = timestamps.select(order) n_strong = n_strong.select(order) two_theta_low = two_theta_low.select(order) two_theta_high = two_theta_high.select(order) resolutions = resolutions.select(order) n_lattices = n_lattices.select(order) #t2 = time.time() #print "HitrateStats took %s" % duration(t1, t2) return timestamps, two_theta_low, two_theta_high, n_strong, resolutions, n_lattices
def get_all_copies_selection(self): result = flex.size_t() for nrg in self: for c in nrg.copies: result.extend(c.iselection) return flex.sorted(result)
def load_image(self): """ Reads raw image file and extracts data for conversion into pickle format. Also estimates gain if turned on.""" # Load raw image or image pickle try: with misc.Capturing() as junk_output: loaded_img = dxtbx.load(self.raw_img) except IOError: loaded_img = None pass # Extract image information if loaded_img is not None: raw_data = loaded_img.get_raw_data() detector = loaded_img.get_detector()[0] beam = loaded_img.get_beam() scan = loaded_img.get_scan() distance = detector.get_distance() pixel_size = detector.get_pixel_size()[0] overload = detector.get_trusted_range()[1] wavelength = beam.get_wavelength() beam_x = detector.get_beam_centre(beam.get_s0())[0] beam_y = detector.get_beam_centre(beam.get_s0())[1] if scan is None: timestamp = None if abs(beam_x - beam_y) <= 0.1 or self.params.image_conversion.square_mode == "None": img_type = 'converted' else: img_type = 'unconverted' else: msec, sec = math.modf(scan.get_epochs()[0]) timestamp = evt_timestamp((sec,msec)) if self.params.image_conversion.beamstop != 0 or\ self.params.image_conversion.beam_center.x != 0 or\ self.params.image_conversion.beam_center.y != 0 or\ self.params.image_conversion.rename_pickle_prefix != 'Auto' or\ self.params.image_conversion.rename_pickle_prefix != None: img_type = 'unconverted' # Assemble datapack data = dpack(data=raw_data, distance=distance, pixel_size=pixel_size, wavelength=wavelength, beam_center_x=beam_x, beam_center_y=beam_y, ccd_image_saturation=overload, saturated_value=overload, timestamp=timestamp ) #print "data: ", type(raw_data) #print "pixel size: ", type(pixel_size) #print 'wavelength: ', type(wavelength) #print "beamX: ", type(beam_x) #print "saturation: ", type(overload) #print "timestamp: ", type(timestamp) #for i in dir(raw_data): print i #exit() if scan is not None: osc_start, osc_range = scan.get_oscillation() img_type = 'unconverted' if osc_start != osc_range: data['OSC_START'] = osc_start data['OSC_RANGE'] = osc_range data['TIME'] = scan.get_exposure_times()[0] # Estimate gain (or set gain to 1.00 if cannot calculate) # Cribbed from estimate_gain.py by Richard Gildea if self.params.advanced.estimate_gain: try: from dials.algorithms.image.threshold import KabschDebug raw_data = [raw_data] gain_value = 1 kernel_size=(10,10) gain_map = [flex.double(raw_data[i].accessor(), gain_value) for i in range(len(loaded_img.get_detector()))] mask = loaded_img.get_mask() min_local = 0 # dummy values, shouldn't affect results nsigma_b = 6 nsigma_s = 3 global_threshold = 0 kabsch_debug_list = [] for i_panel in range(len(loaded_img.get_detector())): kabsch_debug_list.append( KabschDebug( raw_data[i_panel].as_double(), mask[i_panel], gain_map[i_panel], kernel_size, nsigma_b, nsigma_s, global_threshold, min_local)) dispersion = flex.double() for kabsch in kabsch_debug_list: dispersion.extend(kabsch.coefficient_of_variation().as_1d()) sorted_dispersion = flex.sorted(dispersion) from libtbx.math_utils import nearest_integer as nint q1 = sorted_dispersion[nint(len(sorted_dispersion)/4)] q2 = sorted_dispersion[nint(len(sorted_dispersion)/2)] q3 = sorted_dispersion[nint(len(sorted_dispersion)*3/4)] iqr = q3-q1 inlier_sel = (sorted_dispersion > (q1 - 1.5*iqr)) & (sorted_dispersion < (q3 + 1.5*iqr)) sorted_dispersion = sorted_dispersion.select(inlier_sel) self.gain = sorted_dispersion[nint(len(sorted_dispersion)/2)] except IndexError: self.gain = 1.0 else: self.gain = 1.0 else: data = None return data, img_type
def estimate_gain(imageset, kernel_size=(10, 10), output_gain_map=None): detector = imageset.get_detector() from dials.algorithms.image.threshold import KabschDebug raw_data = imageset.get_raw_data(0) gain_value = 1 gain_map = [ flex.double(raw_data[i].accessor(), gain_value) for i in range(len(detector)) ] mask = imageset.get_mask(0) min_local = 0 # dummy values, shouldn't affect results nsigma_b = 6 nsigma_s = 3 global_threshold = 0 kabsch_debug_list = [] for i_panel in range(len(detector)): kabsch_debug_list.append( KabschDebug(raw_data[i_panel].as_double(), mask[i_panel], gain_map[i_panel], kernel_size, nsigma_b, nsigma_s, global_threshold, min_local)) dispersion = flex.double() for kabsch in kabsch_debug_list: dispersion.extend(kabsch.coefficient_of_variation().as_1d()) sorted_dispersion = flex.sorted(dispersion) from libtbx.math_utils import nearest_integer as nint q1 = sorted_dispersion[nint(len(sorted_dispersion) / 4)] q2 = sorted_dispersion[nint(len(sorted_dispersion) / 2)] q3 = sorted_dispersion[nint(len(sorted_dispersion) * 3 / 4)] iqr = q3 - q1 print "q1, q2, q3: %.2f, %.2f, %.2f" % (q1, q2, q3) inlier_sel = (sorted_dispersion > (q1 - 1.5 * iqr)) & (sorted_dispersion < (q3 + 1.5 * iqr)) sorted_dispersion = sorted_dispersion.select(inlier_sel) gain = sorted_dispersion[nint(len(sorted_dispersion) / 2)] print "Estimated gain: %.2f" % gain if output_gain_map: # write the gain map import cPickle as pickle gain_map = flex.double(flex.grid(raw_data[0].all()), gain) pickle.dump(gain_map, open(output_gain_map, "w"), protocol=pickle.HIGHEST_PROTOCOL) if 0: sel = flex.random_selection(population_size=len(sorted_dispersion), sample_size=10000) sorted_dispersion = sorted_dispersion.select(sel) from matplotlib import pyplot pyplot.scatter(range(len(sorted_dispersion)), sorted_dispersion) pyplot.ylim(0, 10) pyplot.show() return gain
def estimate_gain(raw_data, offset=0, algorithm="kabsch", kernel_size=(10, 10), output_gain_map=None, max_images=1): raw_data = (raw_data - offset), from dials.algorithms.image.threshold import DispersionThresholdDebug gains = flex.double() if True: NSQ = 200 ANCHOR = 400 small_section = raw_data[0].matrix_copy_block(ANCHOR, ANCHOR, NSQ, NSQ) print("This small section", len(small_section), "mean is", flex.mean(small_section.as_double())) raw_data = (small_section, ) gain_value = 1 gain_map = [ flex.double(raw_data[i].accessor(), gain_value) for i in range(len(raw_data)) ] mask = [ flex.bool(raw_data[i].accessor(), True) for i in range(len(raw_data)) ] min_local = 0 # dummy values, shouldn't affect results nsigma_b = 6 nsigma_s = 3 global_threshold = 0 kabsch_debug_list = [] for i_panel in range(1): kabsch_debug_list.append( DispersionThresholdDebug(raw_data[i_panel].as_double(), mask[i_panel], gain_map[i_panel], kernel_size, nsigma_b, nsigma_s, global_threshold, min_local)) if algorithm != "kabsch": dispersion = flex.double() for ipix in range(5, NSQ - 15): for spix in range(5, NSQ - 15): data = small_section.matrix_copy_block(ipix, spix, 10, 10).as_double() datasq = data * data means = flex.mean(data) var = flex.mean(datasq) - (means)**2 dispersion.append(var / means) else: dispersion = flex.double() for kabsch in kabsch_debug_list: a_section = kabsch.index_of_dispersion().matrix_copy_block( 5, 5, NSQ - 15, NSQ - 15) print("mean of a_section", flex.mean(a_section)) dispersion.extend(a_section.as_1d()) #ST = flex.mean_and_variance(dispersion) #from IPython import embed; embed() sorted_dispersion = flex.sorted(dispersion) from libtbx.math_utils import nearest_integer as nint q1 = sorted_dispersion[nint(len(sorted_dispersion) / 4)] q2 = sorted_dispersion[nint(len(sorted_dispersion) / 2)] q3 = sorted_dispersion[nint(len(sorted_dispersion) * 3 / 4)] iqr = q3 - q1 print("q1, q2, q3: %.2f, %.2f, %.2f" % (q1, q2, q3)) if iqr == 0.0: raise Sorry( 'Unable to robustly estimate the variation of pixel values.') inlier_sel = (sorted_dispersion > (q1 - 1.5 * iqr)) & (sorted_dispersion < (q3 + 1.5 * iqr)) sorted_dispersion = sorted_dispersion.select(inlier_sel) gain = sorted_dispersion[nint(len(sorted_dispersion) / 2)] print("Estimated gain %s: %.2f" % (algorithm, gain)) gains.append(gain)