Exemple #1
0
def five_number_summary(data):
    """
  Returns the Tukey five number summary (min, lower hinge, median, upper hinge,
  max) for a sequence of observations. This function gives the same results
  as R's fivenum function.
  """
    try:
        sorts = flex.sorted(data)
    except AttributeError:
        sorts = sorted(data)
    n = len(sorts)
    if n % 2:
        med = sorts[n // 2]
        lower = sorts[:((n // 2) + 1)]
        upper = sorts[(n // 2):]
    else:
        med = (sorts[n // 2] + sorts[n // 2 - 1]) / 2
        lower = sorts[:(n // 2)]
        upper = sorts[(n // 2):]
    n = len(lower)
    if n % 2:
        lhinge = lower[n // 2]
        uhinge = upper[n // 2]
    else:
        lhinge = (lower[n // 2] + lower[n // 2 - 1]) / 2
        uhinge = (upper[n // 2] + upper[n // 2 - 1]) / 2
    return sorts[0], lhinge, med, uhinge, sorts[-1]
Exemple #2
0
def five_number_summary(data):
  """
  Returns the Tukey five number summary (min, lower hinge, median, upper hinge,
  max) for a sequence of observations. This function gives the same results
  as R's fivenum function.
  """
  try:
    sorts = flex.sorted(data)
  except AttributeError:
    sorts = sorted(data)
  n = len(sorts)
  if n % 2:
    med = sorts[n // 2]
    lower = sorts[:((n // 2) + 1)]
    upper = sorts[(n // 2):]
  else:
    med = (sorts[n//2] + sorts[n//2 - 1]) / 2
    lower = sorts[:(n // 2)]
    upper = sorts[(n // 2):]
  n = len(lower)
  if n % 2:
    lhinge = lower[n // 2]
    uhinge = upper[n // 2]
  else:
    lhinge = (lower[n//2] + lower[n//2 - 1]) / 2
    uhinge = (upper[n//2] + upper[n//2 - 1]) / 2
  return sorts[0], lhinge, med, uhinge, sorts[-1]
Exemple #3
0
def join_selections (sel1, sel2) :
  intersections = sel1.intersection_i_seqs(sel2)
  unique_sel = flex.bool(len(sel1), True)
  unique_sel.set_selected(intersections[0], False)
  sel1 = sel1.select(unique_sel)
  sel1.extend(sel2)
  return flex.sorted(sel1)
Exemple #4
0
def sieve_fit (sites_fixed,
               sites_moving,
               selection=None,
               frac_discard=0.5) :
  """
  Reference: Chothia & Lesk???
  """
  assert (sites_fixed.size() == sites_moving.size() > 0)
  if (selection is None) :
    selection = flex.bool(sites_fixed.size(), True)
  # step 1: superpose using originally selected atoms
  sites_fixed_aln = sites_fixed.select(selection)
  sites_moving_aln = sites_moving.select(selection)
  lsq_fit_obj = least_squares_fit(
    reference_sites=sites_fixed_aln,
    other_sites=sites_moving_aln)
  sites_moving_new = lsq_fit_obj.other_sites_best_fit()
  # step 2: discard 50% of sites that deviate the most, and superpose again
  deltas = (sites_fixed_aln - sites_moving_new).norms()
  deltas_sorted = flex.sorted(deltas)
  cutoff = deltas_sorted[int((1-frac_discard)*deltas.size())]
  selection = (deltas > cutoff)
  if (selection.count(True) == 0) :
    return lsq_fit_obj
  sites_fixed_aln = sites_fixed_aln.select(selection)
  sites_moving_aln = sites_moving_aln.select(selection)
  lsq_fit_obj = least_squares_fit(
    reference_sites=sites_fixed_aln,
    other_sites=sites_moving_aln)
  return lsq_fit_obj
Exemple #5
0
  def normal_probability_plot(self, data, rankits_sel=None, plot=False):
    """ Use normal probability analysis to determine if a set of data is normally distributed
    See https://en.wikipedia.org/wiki/Normal_probability_plot.
    Rankits are computed in the same way as qqnorm does in R.
    @param data flex array
    @param rankits_sel only use the rankits in a certain range. Useful for outlier rejection. Should be
    a tuple such as (-0.5,0.5).
    @param plot whether to show the normal probabilty plot
    """
    from scitbx.math import distributions
    import numpy as np
    norm = distributions.normal_distribution()

    n = len(data)
    if n <= 10:
      a = 3/8
    else:
      a = 0.5

    sorted_data = flex.sorted(data)
    rankits = flex.double([norm.quantile((i+1-a)/(n+1-(2*a))) for i in range(n)])

    if rankits_sel is None:
      corr, slope, offset = self.get_overall_correlation_flex(sorted_data, rankits)
    else:
      sel = (rankits >= rankits_sel[0]) & (rankits <= rankits_sel[1])
      corr, slope, offset = self.get_overall_correlation_flex(sorted_data.select(sel), rankits.select(sel))

    if plot:
      from matplotlib import pyplot as plt
      f = plt.figure(0)
      lim = -5, 5
      x = np.linspace(lim[0],lim[1],100) # 100 linearly spaced numbers
      y = slope * x + offset
      plt.plot(sorted_data, rankits, '-')
      #plt.plot(x,y)
      plt.title("CC: %.3f Slope: %.3f Offset: %.3f"%(corr, slope, offset))
      plt.xlabel("Sorted data")
      plt.ylabel("Rankits")
      plt.xlim(lim); plt.ylim(lim)
      plt.axes().set_aspect('equal')

      f = plt.figure(1)
      h = flex.histogram(sorted_data, n_slots=100, data_min = lim[0], data_max = lim[1])
      stats = flex.mean_and_variance(sorted_data)
      plt.plot(h.slot_centers().as_numpy_array(), h.slots().as_numpy_array(), '-')
      plt.xlim(lim)
      plt.xlabel("Sorted data")
      plt.ylabel("Count")
      plt.title("Normalized data mean: %.3f +/- %.3f"%(stats.mean(), stats.unweighted_sample_standard_deviation()))

      if self.scaler.params.raw_data.error_models.sdfac_refine.plot_refinement_steps:
        plt.ion()
        plt.pause(0.05)

    return corr, slope, offset
def npp(values, input_mean_variance):
  import math
  from scitbx.math import distributions
  from scitbx.array_family import flex
  distribution = distributions.normal_distribution()
  values = flex.sorted(values)
  mean, variance = input_mean_variance
  scaled = (values - mean) / math.sqrt(variance)
  expected = distribution.quantiles(values.size())

  return expected, scaled
def npp(values, input_mean_variance):
    import math
    from scitbx.math import distributions
    from scitbx.array_family import flex
    distribution = distributions.normal_distribution()
    values = flex.sorted(values)
    mean, variance = input_mean_variance
    scaled = (values - mean) / math.sqrt(variance)
    expected = distribution.quantiles(values.size())

    return expected, scaled
Exemple #8
0
Fichier : NPP.py Projet : xia2/xia2
def npp_ify(values, input_mean_variance=None):
  '''Analyse data in values (assumed to be drawn from one population) and
  return the sorted list of (expected, observed) deviation from the mean.'''

  distribution = distributions.normal_distribution()
  values = flex.sorted(values)
  if input_mean_variance:
    mean, variance = input_mean_variance
  else:
    mean, variance = mean_variance(values)

  scaled = (values - mean) / math.sqrt(variance)
  expected = distribution.quantiles(values.size())

  return expected, scaled
Exemple #9
0
def npp_ify(values, input_mean_variance=None):
  '''Analyse data in values (assumed to be drawn from one population) and
  return the sorted list of (expected, observed) deviation from the mean.'''

  distribution = distributions.normal_distribution()
  values = flex.sorted(values)
  if input_mean_variance:
    mean, variance = input_mean_variance
  else:
    mean, variance = mean_variance(values)

  scaled = (values - mean) / math.sqrt(variance)
  expected = distribution.quantiles(values.size())

  return expected, scaled
Exemple #10
0
def ncs_group_iselection(ncs_restraints_group_list, group_num):
    """
  Collects and returns iselection of all related atoms in NCS group

  Args:
    ncs_restraints_group_list : list of ncs restraints group objects
    group_num (int): the group number in the list (first group is 0)

  Returns:
    isel (flex.size_t): complete NCS group selection
  """
    # check that the number of the NCS group is valid
    if group_num >= len(ncs_restraints_group_list): return flex.size_t()
    gr = ncs_restraints_group_list[group_num]
    isel = gr.master_iselection
    for cp in gr.copies:
        isel.extend(cp.iselection)
    # make sure sequential order of selection indices
    return flex.sorted(isel)
def ncs_group_iselection(ncs_restraints_group_list,group_num):
  """
  Collects and returns iselection of all related atoms in NCS group

  Args:
    ncs_restraints_group_list : list of ncs restraints group objects
    group_num (int): the group number in the list (first group is 0)

  Returns:
    isel (flex.size_t): complete NCS group selection
  """
  # check that the number of the NCS group is valid
  if group_num >= len(ncs_restraints_group_list): return flex.size_t()
  gr = ncs_restraints_group_list[group_num]
  isel = gr.master_iselection
  for cp in gr.copies:
    isel.extend(cp.iselection)
  # make sure sequential order of selection indices
  return flex.sorted(isel)
Exemple #12
0
 def whole_group_iselection(self):
     isel = self.master_iselection.deep_copy()
     for cp in self.copies:
         isel.extend(cp.iselection)
     # make sure sequential order of selection indices
     return flex.sorted(isel)
Exemple #13
0
class SingleImage(object):
    def __init__(self, img, init, verbose=True, imported_grid=None):
        """ Constructor for the SingleImage object using a raw image file or pickle
    """

        # Initialize parameters
        self.params = init.params
        self.args = init.args
        self.raw_img = img[2]
        self.conv_img = img[2]
        self.img_index = img[0]
        self.status = None
        self.fail = None
        self.final = None
        self.log_info = []
        self.gs_results = []
        self.main_log = init.logfile
        self.verbose = verbose
        self.hmed = self.params.cctbx.grid_search.height_median
        self.amed = self.params.cctbx.grid_search.area_median

        self.input_base = init.input_base
        self.conv_base = init.conv_base
        self.int_base = init.int_base
        self.obj_base = init.obj_base
        self.fin_base = init.fin_base
        self.viz_base = init.viz_base
        self.tmp_base = init.tmp_base
        self.abort_file = os.path.join(self.int_base, '.abort.tmp')

        self.obj_path = None
        self.obj_file = None
        self.fin_path = None
        self.fin_file = None
        self.viz_path = None

# ============================== SELECTION-ONLY FUNCTIONS ============================== #

    def import_int_file(self, init):
        """ Replaces path settings in imported image object with new settings
        NEED TO RE-DO LATER """

        if os.path.isfile(self.abort_file):
            self.fail = 'aborted'
            return self

        # Generate paths to output files
        self.params = init.params
        self.main_log = init.logfile
        self.input_base = init.input_base
        self.conv_base = init.conv_base
        self.int_base = init.int_base
        self.obj_base = init.obj_base
        self.fin_base = init.fin_base
        self.viz_base = init.viz_base
        self.obj_path = misc.make_image_path(self.conv_img, self.input_base,
                                             self.obj_base)
        self.obj_file = os.path.abspath(
            os.path.join(
                self.obj_path,
                os.path.basename(self.conv_img).split('.')[0] + ".int"))
        self.fin_path = misc.make_image_path(self.conv_img, self.input_base,
                                             self.fin_base)
        self.fin_file = os.path.abspath(
            os.path.join(
                self.fin_path,
                os.path.basename(self.conv_img).split('.')[0] + "_int.pickle"))
        self.final['final'] = self.fin_file
        self.final['img'] = self.conv_img
        self.viz_path = misc.make_image_path(self.conv_img, self.input_base,
                                             self.viz_base)
        self.viz_file = os.path.join(
            self.viz_path,
            os.path.basename(self.conv_img).split('.')[0] + "_int.png")

        # Create actual folders (if necessary)
        try:
            if not os.path.isdir(self.obj_path):
                os.makedirs(self.obj_path)
            if not os.path.isdir(self.fin_path):
                os.makedirs(self.fin_path)
            if not os.path.isdir(self.viz_path):
                os.makedirs(self.viz_path)
        except OSError:
            pass

        # Grid search / integration log file
        self.int_log = os.path.join(
            self.fin_path,
            os.path.basename(self.conv_img).split('.')[0] + '.tmp')

        # Reset status to 'grid search' to pick up at selection (if no fail)
        if self.fail == None:
            self.status = 'bypass grid search'

        return self

    def determine_gs_result_file(self):
        """ For 'selection-only' cctbx.xfel runs, determine where the image objects are """
        if self.params.cctbx.selection.select_only.grid_search_path != None:
            obj_path = os.path.abspath(
                self.params.cctbx.selection.select_only.grid_search_path)
        else:
            run_number = int(os.path.basename(self.int_base)) - 1
            obj_path = "{}/integration/{:03d}/image_objects"\
                      "".format(os.path.abspath(os.curdir), run_number)
        gs_result_file = os.path.join(obj_path,
                                      os.path.basename(self.obj_file))
        return gs_result_file

# =============================== IMAGE IMPORT FUNCTIONS =============================== #

    def load_image(self):
        """ Reads raw image file and extracts data for conversion into pickle
        format. Also estimates gain if turned on."""
        # Load raw image or image pickle

        try:
            with misc.Capturing() as junk_output:
                loaded_img = dxtbx.load(self.raw_img)
        except IOError, e:
            loaded_img = None
            pass

        # Extract image information
        if loaded_img is not None:
            raw_data = loaded_img.get_raw_data()
            detector = loaded_img.get_detector()[0]
            beam = loaded_img.get_beam()
            scan = loaded_img.get_scan()
            distance = detector.get_distance()
            pixel_size = detector.get_pixel_size()[0]
            overload = detector.get_trusted_range()[1]
            wavelength = beam.get_wavelength()
            beam_x = detector.get_beam_centre(beam.get_s0())[0]
            beam_y = detector.get_beam_centre(beam.get_s0())[1]

            if scan is None:
                timestamp = None
                img_type = 'pickle'
            else:
                img_type = 'raw'
                msec, sec = math.modf(scan.get_epochs()[0])
                timestamp = evt_timestamp((sec, msec))

            # Assemble datapack
            data = dpack(data=raw_data,
                         distance=distance,
                         pixel_size=pixel_size,
                         wavelength=wavelength,
                         beam_center_x=beam_x,
                         beam_center_y=beam_y,
                         ccd_image_saturation=overload,
                         saturated_value=overload,
                         timestamp=timestamp)

            if scan is not None:
                osc_start, osc_range = scan.get_oscillation()
                if osc_start != osc_range:
                    data['OSC_START'] = 0  #osc_start
                    data['OSC_RANGE'] = 0  #osc_start
                    data['TIME'] = scan.get_exposure_times()[0]
        else:
            data = None
            img_type = 'not imported'

        # Estimate gain (or set gain to 1.00 if cannot calculate)
        # Cribbed from estimate_gain.py by Richard Gildea
        if self.params.advanced.estimate_gain:
            try:
                from dials.algorithms.image.threshold import KabschDebug
                raw_data = [raw_data]

                gain_value = 1
                kernel_size = (10, 10)
                gain_map = [
                    flex.double(raw_data[i].accessor(), gain_value)
                    for i in range(len(loaded_img.get_detector()))
                ]
                mask = loaded_img.get_mask()
                min_local = 0

                # dummy values, shouldn't affect results: REPLACE WITH SETTINGS!
                nsigma_b = 6
                nsigma_s = 3
                global_threshold = 0

                kabsch_debug_list = []
                for i_panel in range(len(loaded_img.get_detector())):
                    kabsch_debug_list.append(
                        KabschDebug(raw_data[i_panel].as_double(),
                                    mask[i_panel], gain_map[i_panel],
                                    kernel_size, nsigma_b, nsigma_s,
                                    global_threshold, min_local))

                dispersion = flex.double()
                for kabsch in kabsch_debug_list:
                    dispersion.extend(
                        kabsch.coefficient_of_variation().as_1d())

                sorted_dispersion = flex.sorted(dispersion)
                from libtbx.math_utils import nearest_integer as nint

                q1 = sorted_dispersion[nint(len(sorted_dispersion) / 4)]
                q2 = sorted_dispersion[nint(len(sorted_dispersion) / 2)]
                q3 = sorted_dispersion[nint(len(sorted_dispersion) * 3 / 4)]
                iqr = q3 - q1

                inlier_sel = (sorted_dispersion >
                              (q1 - 1.5 * iqr)) & (sorted_dispersion <
                                                   (q3 + 1.5 * iqr))
                sorted_dispersion = sorted_dispersion.select(inlier_sel)
                self.gain = sorted_dispersion[nint(len(sorted_dispersion) / 2)]
            except IndexError:
                self.gain = 1.0
        else:
            self.gain = 1.0

        return data, img_type
Exemple #14
0
    def del_anom_normal_plot(intensities, strong_cutoff=0.0):
        """Make a normal probability plot of the normalised anomalous differences."""
        diff_array = intensities.anomalous_differences()
        if not diff_array.data().size():
            return {}
        delta = diff_array.data() / diff_array.sigmas()

        norm = distributions.normal_distribution()

        n = len(delta)
        if n <= 10:
            a = 3 / 8
        else:
            a = 0.5

        y = flex.sorted(delta)
        x = [norm.quantile((i + 1 - a) / (n + 1 - (2 * a))) for i in range(n)]

        H, xedges, yedges = np.histogram2d(np.array(x),
                                           y.as_numpy_array(),
                                           bins=(200, 200))
        nonzeros = np.nonzero(H)
        z = np.empty(H.shape)
        z[:] = np.NAN
        z[nonzeros] = H[nonzeros]

        # also make a histogram
        histy = flex.histogram(y, n_slots=100)
        # make a gaussian for reference also
        n = y.size()
        width = histy.slot_centers()[1] - histy.slot_centers()[0]
        gaussian = []
        from math import exp, pi

        for x in histy.slot_centers():
            gaussian.append(n * width * exp(-(x**2) / 2.0) / ((2.0 * pi)**0.5))

        title = "Normal probability plot of anomalous differences"
        plotname = "normal_distribution_plot"
        if strong_cutoff > 0.0:
            title += " (d > %.2f)" % strong_cutoff
            plotname += "_lowres"
        else:
            title += " (all data)"
            plotname += "_highres"
        return {
            plotname: {
                "data": [
                    {
                        "x": xedges.tolist(),
                        "y": yedges.tolist(),
                        "z": z.transpose().tolist(),
                        "type": "heatmap",
                        "name": "normalised deviations",
                        "colorbar": {
                            "title": "Number of reflections",
                            "titleside": "right",
                        },
                        "colorscale": "Jet",
                    },
                    {
                        "x": [-5, 5],
                        "y": [-5, 5],
                        "type": "scatter",
                        "mode": "lines",
                        "name": "z = m",
                        "color": "rgb(0,0,0)",
                    },
                ],
                "layout": {
                    "title": title,
                    "xaxis": {
                        "anchor": "y",
                        "title": "expected delta",
                        "range": [-4, 4],
                    },
                    "yaxis": {
                        "anchor": "x",
                        "title": "observed delta",
                        "range": [-5, 5],
                    },
                },
                "help":
                """\
    This plot shows the normalised anomalous differences, sorted in order and
    plotted against the expected order based on a normal distribution model.
    A true normal distribution of deviations would give the straight line indicated.

    [1] P. L. Howell and G. D. Smith, J. Appl. Cryst. (1992). 25, 81-86
    https://doi.org/10.1107/S0021889891010385
    [2] P. Evans, Acta Cryst. (2006). D62, 72-82
    https://doi.org/10.1107/S0907444905036693
    """,
            }
        }
Exemple #15
0
def estimate_gain(imageset,
                  kernel_size=(10, 10),
                  output_gain_map=None,
                  max_images=1):
    detector = imageset.get_detector()

    from dials.algorithms.image.threshold import DispersionThresholdDebug

    gains = flex.double()

    for image_no in range(len(imageset)):
        raw_data = imageset.get_raw_data(image_no)

        gain_value = 1
        gain_map = [
            flex.double(raw_data[i].accessor(), gain_value)
            for i in range(len(detector))
        ]

        mask = imageset.get_mask(image_no)

        min_local = 0

        # dummy values, shouldn't affect results
        nsigma_b = 6
        nsigma_s = 3
        global_threshold = 0

        kabsch_debug_list = [
            DispersionThresholdDebug(
                raw_data[i_panel].as_double(),
                mask[i_panel],
                gain_map[i_panel],
                kernel_size,
                nsigma_b,
                nsigma_s,
                global_threshold,
                min_local,
            ) for i_panel in range(len(detector))
        ]

        dispersion = flex.double()
        for kabsch in kabsch_debug_list:
            dispersion.extend(kabsch.index_of_dispersion().as_1d())

        sorted_dispersion = flex.sorted(dispersion)
        from libtbx.math_utils import nearest_integer as nint

        q1 = sorted_dispersion[nint(len(sorted_dispersion) / 4)]
        q2 = sorted_dispersion[nint(len(sorted_dispersion) / 2)]
        q3 = sorted_dispersion[nint(len(sorted_dispersion) * 3 / 4)]
        iqr = q3 - q1

        print(f"q1, q2, q3: {q1:.2f}, {q2:.2f}, {q3:.2f}")
        if iqr == 0.0:
            raise Sorry(
                "Unable to robustly estimate the variation of pixel values.")

        inlier_sel = (sorted_dispersion >
                      (q1 - 1.5 * iqr)) & (sorted_dispersion <
                                           (q3 + 1.5 * iqr))
        sorted_dispersion = sorted_dispersion.select(inlier_sel)
        gain = sorted_dispersion[nint(len(sorted_dispersion) / 2)]
        print(f"Estimated gain: {gain:.2f}")
        gains.append(gain)

        if image_no == 0:
            gain0 = gain
        if image_no + 1 >= max_images:
            break

    if len(gains) > 1:
        stats = flex.mean_and_variance(gains)
        print("Average gain: %.2f +/- %.2f" %
              (stats.mean(), stats.unweighted_sample_standard_deviation()))

    if output_gain_map:
        if len(gains) > 1:
            raw_data = imageset.get_raw_data(0)
        # write the gain map
        gain_map = flex.double(flex.grid(raw_data[0].all()), gain0)
        with open(output_gain_map, "wb") as fh:
            pickle.dump(gain_map, fh, protocol=pickle.HIGHEST_PROTOCOL)

    return gain0
Exemple #16
0
def estimate_gain(imageset,
                  kernel_size=(10, 10),
                  output_gain_map=None,
                  max_images=1):
    detector = imageset.get_detector()

    from dials.algorithms.image.threshold import DispersionThresholdDebug
    gains = flex.double()

    for image_no in xrange(len(imageset)):
        raw_data = imageset.get_raw_data(image_no)
        #from IPython import embed; embed()
        #this_data = raw_data[0]
        #raw_data = (this_data + 80),
        NSQ = 200
        small_section = raw_data[0].matrix_copy_block(400, 400, NSQ, NSQ)
        print("This small section", len(small_section), "mean ist",
              flex.mean(small_section.as_double()))
        raw_data = (small_section, )

        gain_value = 1
        gain_map = [
            flex.double(raw_data[i].accessor(), gain_value)
            for i in range(len(detector))
        ]

        mask = imageset.get_mask(image_no)
        mask = (mask[0].matrix_copy_block(400, 400, NSQ, NSQ)),
        #from IPython import embed; embed()
        min_local = 0

        # dummy values, shouldn't affect results
        nsigma_b = 6
        nsigma_s = 3
        global_threshold = 0

        kabsch_debug_list = []
        for i_panel in range(len(detector)):
            kabsch_debug_list.append(
                DispersionThresholdDebug(raw_data[i_panel].as_double(),
                                         mask[i_panel], gain_map[i_panel],
                                         kernel_size, nsigma_b, nsigma_s,
                                         global_threshold, min_local))

        dispersion = flex.double()
        for ipix in range(5, NSQ - 15):
            for spix in range(5, NSQ - 15):
                data = small_section.matrix_copy_block(ipix, spix, 10,
                                                       10).as_double()
                datasq = data * data
                means = flex.mean(data)
                var = flex.mean(datasq) - (means)**2
                #print(ipix,spix,var,var/means)
                dispersion.append(var / means)

        if True:
            dispersion = flex.double()
            for kabsch in kabsch_debug_list:
                a_section = kabsch.index_of_dispersion().matrix_copy_block(
                    5, 5, NSQ - 15, NSQ - 15)
                print("mean of a_section", flex.mean(a_section))
                dispersion.extend(a_section.as_1d())

        #ST = flex.mean_and_variance(dispersion)
        #from IPython import embed; embed()

        sorted_dispersion = flex.sorted(dispersion)
        from libtbx.math_utils import nearest_integer as nint

        q1 = sorted_dispersion[nint(len(sorted_dispersion) / 4)]
        q2 = sorted_dispersion[nint(len(sorted_dispersion) / 2)]
        q3 = sorted_dispersion[nint(len(sorted_dispersion) * 3 / 4)]
        iqr = q3 - q1

        print("q1, q2, q3: %.2f, %.2f, %.2f" % (q1, q2, q3))
        if iqr == 0.0:
            raise Sorry(
                'Unable to robustly estimate the variation of pixel values.')

        inlier_sel = (sorted_dispersion >
                      (q1 - 1.5 * iqr)) & (sorted_dispersion <
                                           (q3 + 1.5 * iqr))
        sorted_dispersion = sorted_dispersion.select(inlier_sel)
        gain = sorted_dispersion[nint(len(sorted_dispersion) / 2)]
        print("Estimated gain: %.2f" % gain)
        gains.append(gain)

        if image_no == 0:
            gain0 = gain
        if image_no + 1 >= max_images:
            break

    if len(gains) > 1:
        stats = flex.mean_and_variance(gains)
        print("Average gain: %.2f +/- %.2f" %
              (stats.mean(), stats.unweighted_sample_standard_deviation()))

    if output_gain_map:
        if len(gains) > 1:
            raw_data = imageset.get_raw_data(0)
        # write the gain map
        import six.moves.cPickle as pickle
        gain_map = flex.double(flex.grid(raw_data[0].all()), gain0)
        with open(output_gain_map, "wb") as fh:
            pickle.dump(gain_map, fh, protocol=pickle.HIGHEST_PROTOCOL)

    if 0:
        sel = flex.random_selection(population_size=len(sorted_dispersion),
                                    sample_size=10000)
        sorted_dispersion = sorted_dispersion.select(sel)

        from matplotlib import pyplot
        pyplot.scatter(range(len(sorted_dispersion)), sorted_dispersion)
        pyplot.ylim(0, 10)
        pyplot.show()

    return gain0
Exemple #17
0
def estimate_gain(imageset, kernel_size=(10,10), output_gain_map=None):
  detector = imageset.get_detector()

  from dials.algorithms.image.threshold import KabschDebug

  raw_data = imageset.get_raw_data(0)

  gain_value = 1
  gain_map = [flex.double(raw_data[i].accessor(), gain_value)
              for i in range(len(detector))]

  mask = imageset.get_mask(0)

  min_local = 0

  # dummy values, shouldn't affect results
  nsigma_b = 6
  nsigma_s = 3
  global_threshold = 0

  kabsch_debug_list = []
  for i_panel in range(len(detector)):
    kabsch_debug_list.append(
      KabschDebug(
        raw_data[i_panel].as_double(), mask[i_panel], gain_map[i_panel],
        kernel_size, nsigma_b, nsigma_s, global_threshold, min_local))

  dispersion = flex.double()
  for kabsch in kabsch_debug_list:
    dispersion.extend(kabsch.coefficient_of_variation().as_1d())

  sorted_dispersion = flex.sorted(dispersion)
  from libtbx.math_utils import nearest_integer as nint

  q1 = sorted_dispersion[nint(len(sorted_dispersion)/4)]
  q2 = sorted_dispersion[nint(len(sorted_dispersion)/2)]
  q3 = sorted_dispersion[nint(len(sorted_dispersion)*3/4)]
  iqr = q3-q1

  print "q1, q2, q3: %.2f, %.2f, %.2f" %(q1, q2, q3)

  inlier_sel = (sorted_dispersion > (q1 - 1.5*iqr)) & (sorted_dispersion < (q3 + 1.5*iqr))
  sorted_dispersion = sorted_dispersion.select(inlier_sel)
  gain = sorted_dispersion[nint(len(sorted_dispersion)/2)]
  print "Estimated gain: %.2f" % gain

  if output_gain_map:
    # write the gain map
    import cPickle as pickle
    gain_map = flex.double(flex.grid(raw_data[0].all()), gain)
    pickle.dump(gain_map, open(output_gain_map, "w"),
                protocol=pickle.HIGHEST_PROTOCOL)

  if 0:
    sel = flex.random_selection(population_size=len(sorted_dispersion), sample_size=10000)
    sorted_dispersion = sorted_dispersion.select(sel)

    from matplotlib import pyplot
    pyplot.scatter(range(len(sorted_dispersion)), sorted_dispersion)
    pyplot.ylim(0, 10)
    pyplot.show()

  return gain
Exemple #18
0
    def __call__(self):
        from iotbx.detectors.cspad_detector_formats import reverse_timestamp
        from xfel.ui.components.timeit import duration
        #import time
        #t1 = time.time()
        run_numbers = [r.run for r in self.trial.runs]
        assert self.run.run in run_numbers
        rungroup_ids = [rg.id for rg in self.trial.rungroups]
        assert self.rungroup.id in rungroup_ids
        if len(self.trial.isoforms) > 0:
            cells = [isoform.cell for isoform in self.trial.isoforms]
        else:
            cells = self.app.get_trial_cells(self.trial.id, self.rungroup.id,
                                             self.run.id)

        high_res_bin_ids = []
        for cell in cells:
            bins = cell.bins
            d_mins = [float(b.d_min) for b in bins]
            if len(d_mins) == 0: continue
            if self.d_min is None:
                min_bin_index = d_mins.index(min(d_mins))
            else:
                d_maxes = [float(b.d_max) for b in bins]
                qualified_bin_indices = [
                    i for i in range(len(bins))
                    if d_maxes[i] >= self.d_min and d_mins[i] <= self.d_min
                ]
                if len(qualified_bin_indices) == 0: continue
                min_bin_index = qualified_bin_indices[0]
            high_res_bin_ids.append(str(bins[min_bin_index].id))

        resolutions = flex.double()
        two_theta_low = flex.double()
        two_theta_high = flex.double()
        tag = self.app.params.experiment_tag
        timestamps, timestamps_s = flex.double(), []
        n_strong = flex.int()
        n_lattices = flex.int()
        if len(high_res_bin_ids) > 0:

            # Get the stats in one query.
            query = """SELECT event.timestamp, event.n_strong, MIN(bin.d_min), event.two_theta_low, event.two_theta_high, COUNT(DISTINCT crystal.id)
                 FROM `%s_event` event
                 JOIN `%s_imageset_event` is_e ON is_e.event_id = event.id
                 JOIN `%s_imageset` imgset ON imgset.id = is_e.imageset_id
                 JOIN `%s_experiment` exp ON exp.imageset_id = imgset.id
                 JOIN `%s_crystal` crystal ON crystal.id = exp.crystal_id
                 JOIN `%s_cell` cell ON cell.id = crystal.cell_id
                 JOIN `%s_bin` bin ON bin.cell_id = cell.id
                 JOIN `%s_cell_bin` cb ON cb.bin_id = bin.id AND cb.crystal_id = crystal.id
                 WHERE event.trial_id = %d AND event.run_id = %d AND event.rungroup_id = %d AND
                       cb.avg_i_sigi >= %f
                 GROUP BY event.id
              """ % (tag, tag, tag, tag, tag, tag, tag, tag, self.trial.id,
                     self.run.id, self.rungroup.id, self.i_sigi_cutoff)
            cursor = self.app.execute_query(query)
            sample = -1
            for row in cursor.fetchall():
                sample += 1
                if sample % self.sampling != 0:
                    continue
                ts, n_s, d_min, tt_low, tt_high, n_xtal = row
                try:
                    d_min = float(d_min)
                except ValueError:
                    d_min = None
                try:
                    rts = reverse_timestamp(ts)
                    timestamps.append(rts[0] + (rts[1] / 1000))
                except ValueError:
                    try:
                        timestamps.append(float(ts))
                    except ValueError:
                        timestamps_s.append(ts)
                n_strong.append(n_s)
                two_theta_low.append(tt_low or -1)
                two_theta_high.append(tt_high or -1)
                resolutions.append(d_min or 0)
                n_lattices.append(n_xtal or 0)

        # only get results that are strings or ints, not a mix of both
        assert not (len(timestamps) > 0 and len(timestamps_s) > 0)

        # This left join query finds the events with no imageset, meaning they failed to index
        query = """SELECT event.timestamp, event.n_strong, event.two_theta_low, event.two_theta_high
               FROM `%s_event` event
               LEFT JOIN `%s_imageset_event` is_e ON is_e.event_id = event.id
               WHERE is_e.event_id IS NULL AND
                     event.trial_id = %d AND event.run_id = %d AND event.rungroup_id = %d
            """ % (tag, tag, self.trial.id, self.run.id, self.rungroup.id)

        cursor = self.app.execute_query(query)
        for row in cursor.fetchall():
            ts, n_s, tt_low, tt_high = row
            try:
                rts = reverse_timestamp(ts)
                timestamps.append(rts[0] + (rts[1] / 1000))
            except ValueError:
                try:
                    rts = float(ts)
                    timestamps.append(rts)
                except ValueError:
                    timestamps_s.append(ts)
            n_strong.append(n_s)
            two_theta_low.append(tt_low or -1)
            two_theta_high.append(tt_high or -1)
            resolutions.append(0)
            n_lattices.append(0)

        if len(timestamps_s) > 0:
            timestamps = flex.double([
                i[0]
                for i in sorted(enumerate(timestamps_s), key=lambda x: x[1])
            ])
            order = flex.size_t([i for i in timestamps.iround()])
            timestamps = flex.sorted(timestamps)
        else:
            order = flex.sort_permutation(timestamps)
            timestamps = timestamps.select(order)
        n_strong = n_strong.select(order)
        two_theta_low = two_theta_low.select(order)
        two_theta_high = two_theta_high.select(order)
        resolutions = resolutions.select(order)
        n_lattices = n_lattices.select(order)

        #t2 = time.time()
        #print "HitrateStats took %s" % duration(t1, t2)
        return timestamps, two_theta_low, two_theta_high, n_strong, resolutions, n_lattices
 def get_all_copies_selection(self):
     result = flex.size_t()
     for nrg in self:
         for c in nrg.copies:
             result.extend(c.iselection)
     return flex.sorted(result)
Exemple #20
0
  def load_image(self):
    """ Reads raw image file and extracts data for conversion into pickle
        format. Also estimates gain if turned on."""
    # Load raw image or image pickle
    try:
      with misc.Capturing() as junk_output:
        loaded_img = dxtbx.load(self.raw_img)
    except IOError:
      loaded_img = None
      pass

    # Extract image information
    if loaded_img is not None:
      raw_data   = loaded_img.get_raw_data()
      detector   = loaded_img.get_detector()[0]
      beam       = loaded_img.get_beam()
      scan       = loaded_img.get_scan()
      distance   = detector.get_distance()
      pixel_size = detector.get_pixel_size()[0]
      overload   = detector.get_trusted_range()[1]
      wavelength = beam.get_wavelength()
      beam_x     = detector.get_beam_centre(beam.get_s0())[0]
      beam_y     = detector.get_beam_centre(beam.get_s0())[1]

      if scan is None:
        timestamp = None
        if abs(beam_x - beam_y) <= 0.1 or self.params.image_conversion.square_mode == "None":
          img_type = 'converted'
        else:
          img_type = 'unconverted'
      else:
        msec, sec = math.modf(scan.get_epochs()[0])
        timestamp = evt_timestamp((sec,msec))

      if self.params.image_conversion.beamstop != 0 or\
         self.params.image_conversion.beam_center.x != 0 or\
         self.params.image_conversion.beam_center.y != 0 or\
         self.params.image_conversion.rename_pickle_prefix != 'Auto' or\
         self.params.image_conversion.rename_pickle_prefix != None:
        img_type = 'unconverted'

      # Assemble datapack
      data = dpack(data=raw_data,
                   distance=distance,
                   pixel_size=pixel_size,
                   wavelength=wavelength,
                   beam_center_x=beam_x,
                   beam_center_y=beam_y,
                   ccd_image_saturation=overload,
                   saturated_value=overload,
                   timestamp=timestamp
                   )

      #print "data: ", type(raw_data)
      #print "pixel size: ", type(pixel_size)
      #print 'wavelength: ', type(wavelength)
      #print "beamX: ", type(beam_x)
      #print "saturation: ", type(overload)
      #print "timestamp: ", type(timestamp)

      #for i in dir(raw_data): print i

      #exit()

      if scan is not None:
        osc_start, osc_range = scan.get_oscillation()
        img_type = 'unconverted'
        if osc_start != osc_range:
          data['OSC_START'] = osc_start
          data['OSC_RANGE'] = osc_range
          data['TIME'] = scan.get_exposure_times()[0]

      # Estimate gain (or set gain to 1.00 if cannot calculate)
      # Cribbed from estimate_gain.py by Richard Gildea
      if self.params.advanced.estimate_gain:
        try:
          from dials.algorithms.image.threshold import KabschDebug
          raw_data = [raw_data]

          gain_value = 1
          kernel_size=(10,10)
          gain_map = [flex.double(raw_data[i].accessor(), gain_value)
                      for i in range(len(loaded_img.get_detector()))]
          mask = loaded_img.get_mask()
          min_local = 0

          # dummy values, shouldn't affect results
          nsigma_b = 6
          nsigma_s = 3
          global_threshold = 0

          kabsch_debug_list = []
          for i_panel in range(len(loaded_img.get_detector())):
            kabsch_debug_list.append(
              KabschDebug(
                raw_data[i_panel].as_double(), mask[i_panel], gain_map[i_panel],
                kernel_size, nsigma_b, nsigma_s, global_threshold, min_local))

          dispersion = flex.double()
          for kabsch in kabsch_debug_list:
            dispersion.extend(kabsch.coefficient_of_variation().as_1d())

          sorted_dispersion = flex.sorted(dispersion)
          from libtbx.math_utils import nearest_integer as nint

          q1 = sorted_dispersion[nint(len(sorted_dispersion)/4)]
          q2 = sorted_dispersion[nint(len(sorted_dispersion)/2)]
          q3 = sorted_dispersion[nint(len(sorted_dispersion)*3/4)]
          iqr = q3-q1

          inlier_sel = (sorted_dispersion > (q1 - 1.5*iqr)) & (sorted_dispersion < (q3 + 1.5*iqr))
          sorted_dispersion = sorted_dispersion.select(inlier_sel)
          self.gain = sorted_dispersion[nint(len(sorted_dispersion)/2)]
        except IndexError:
          self.gain = 1.0
      else:
        self.gain = 1.0

    else:
      data = None

    return data, img_type
Exemple #21
0
def estimate_gain(imageset, kernel_size=(10, 10), output_gain_map=None):
    detector = imageset.get_detector()

    from dials.algorithms.image.threshold import KabschDebug

    raw_data = imageset.get_raw_data(0)

    gain_value = 1
    gain_map = [
        flex.double(raw_data[i].accessor(), gain_value)
        for i in range(len(detector))
    ]

    mask = imageset.get_mask(0)

    min_local = 0

    # dummy values, shouldn't affect results
    nsigma_b = 6
    nsigma_s = 3
    global_threshold = 0

    kabsch_debug_list = []
    for i_panel in range(len(detector)):
        kabsch_debug_list.append(
            KabschDebug(raw_data[i_panel].as_double(), mask[i_panel],
                        gain_map[i_panel], kernel_size, nsigma_b, nsigma_s,
                        global_threshold, min_local))

    dispersion = flex.double()
    for kabsch in kabsch_debug_list:
        dispersion.extend(kabsch.coefficient_of_variation().as_1d())

    sorted_dispersion = flex.sorted(dispersion)
    from libtbx.math_utils import nearest_integer as nint

    q1 = sorted_dispersion[nint(len(sorted_dispersion) / 4)]
    q2 = sorted_dispersion[nint(len(sorted_dispersion) / 2)]
    q3 = sorted_dispersion[nint(len(sorted_dispersion) * 3 / 4)]
    iqr = q3 - q1

    print "q1, q2, q3: %.2f, %.2f, %.2f" % (q1, q2, q3)

    inlier_sel = (sorted_dispersion > (q1 - 1.5 * iqr)) & (sorted_dispersion <
                                                           (q3 + 1.5 * iqr))
    sorted_dispersion = sorted_dispersion.select(inlier_sel)
    gain = sorted_dispersion[nint(len(sorted_dispersion) / 2)]
    print "Estimated gain: %.2f" % gain

    if output_gain_map:
        # write the gain map
        import cPickle as pickle
        gain_map = flex.double(flex.grid(raw_data[0].all()), gain)
        pickle.dump(gain_map,
                    open(output_gain_map, "w"),
                    protocol=pickle.HIGHEST_PROTOCOL)

    if 0:
        sel = flex.random_selection(population_size=len(sorted_dispersion),
                                    sample_size=10000)
        sorted_dispersion = sorted_dispersion.select(sel)

        from matplotlib import pyplot
        pyplot.scatter(range(len(sorted_dispersion)), sorted_dispersion)
        pyplot.ylim(0, 10)
        pyplot.show()

    return gain
Exemple #22
0
def estimate_gain(raw_data,
                  offset=0,
                  algorithm="kabsch",
                  kernel_size=(10, 10),
                  output_gain_map=None,
                  max_images=1):
    raw_data = (raw_data - offset),
    from dials.algorithms.image.threshold import DispersionThresholdDebug
    gains = flex.double()

    if True:
        NSQ = 200
        ANCHOR = 400
        small_section = raw_data[0].matrix_copy_block(ANCHOR, ANCHOR, NSQ, NSQ)
        print("This small section", len(small_section), "mean is",
              flex.mean(small_section.as_double()))
        raw_data = (small_section, )

        gain_value = 1
        gain_map = [
            flex.double(raw_data[i].accessor(), gain_value)
            for i in range(len(raw_data))
        ]

        mask = [
            flex.bool(raw_data[i].accessor(), True)
            for i in range(len(raw_data))
        ]

        min_local = 0

        # dummy values, shouldn't affect results
        nsigma_b = 6
        nsigma_s = 3
        global_threshold = 0

        kabsch_debug_list = []
        for i_panel in range(1):
            kabsch_debug_list.append(
                DispersionThresholdDebug(raw_data[i_panel].as_double(),
                                         mask[i_panel], gain_map[i_panel],
                                         kernel_size, nsigma_b, nsigma_s,
                                         global_threshold, min_local))

        if algorithm != "kabsch":
            dispersion = flex.double()
            for ipix in range(5, NSQ - 15):
                for spix in range(5, NSQ - 15):
                    data = small_section.matrix_copy_block(ipix, spix, 10,
                                                           10).as_double()
                    datasq = data * data
                    means = flex.mean(data)
                    var = flex.mean(datasq) - (means)**2
                    dispersion.append(var / means)

        else:
            dispersion = flex.double()
            for kabsch in kabsch_debug_list:
                a_section = kabsch.index_of_dispersion().matrix_copy_block(
                    5, 5, NSQ - 15, NSQ - 15)
                print("mean of a_section", flex.mean(a_section))
                dispersion.extend(a_section.as_1d())

        #ST = flex.mean_and_variance(dispersion)
        #from IPython import embed; embed()

        sorted_dispersion = flex.sorted(dispersion)
        from libtbx.math_utils import nearest_integer as nint

        q1 = sorted_dispersion[nint(len(sorted_dispersion) / 4)]
        q2 = sorted_dispersion[nint(len(sorted_dispersion) / 2)]
        q3 = sorted_dispersion[nint(len(sorted_dispersion) * 3 / 4)]
        iqr = q3 - q1

        print("q1, q2, q3: %.2f, %.2f, %.2f" % (q1, q2, q3))
        if iqr == 0.0:
            raise Sorry(
                'Unable to robustly estimate the variation of pixel values.')

        inlier_sel = (sorted_dispersion >
                      (q1 - 1.5 * iqr)) & (sorted_dispersion <
                                           (q3 + 1.5 * iqr))
        sorted_dispersion = sorted_dispersion.select(inlier_sel)
        gain = sorted_dispersion[nint(len(sorted_dispersion) / 2)]
        print("Estimated gain %s: %.2f" % (algorithm, gain))
        gains.append(gain)