예제 #1
0
  def estimate_cc_sig_fac(self):

    # A1.1. Estimation of sigma(CC) as a function of sample size.

    binner = self.intensities.setup_binner_counting_sorted(reflections_per_bin=200)

    a = flex.double()
    b = flex.double()
    for i in range(binner.n_bins_all()):
      count = binner.counts()[i]
      if count == 0:
        continue
      bin_isel = binner.array_indices(i)
      p = flex.random_permutation(count)
      p = p[:2 * (count // 2)] # ensure even count
      a.extend(self.intensities.data().select(bin_isel.select(p[:count//2])))
      b.extend(self.intensities.data().select(bin_isel.select(p[count//2:])))

    perm = flex.random_selection(a.size(), min(20000, a.size()))
    a = a.select(perm)
    b = b.select(perm)

    self.corr_unrelated = CorrelationCoefficientAccumulator(a, b)

    n_pairs = a.size()
    min_num_groups = 10 # minimum number of groups
    max_n_group = int(min(n_pairs/min_num_groups, 200)) # maximum number in group
    min_n_group = int(min(5, max_n_group)) # minimum number in group

    mean_ccs = flex.double()
    rms_ccs = flex.double()
    ns = flex.double()
    for n in range(min_n_group, max_n_group):
      ns.append(n)
      ccs = flex.double()
      for i in range(200):
        isel = flex.random_selection(a.size(), n)
        corr = CorrelationCoefficientAccumulator(a.select(isel), b.select(isel))
        ccs.append(corr.coefficient())

      mean_ccs.append(flex.mean(ccs))
      rms_ccs.append(flex.mean(flex.pow2(ccs))**0.5)

    x = 1/flex.pow(ns, 0.5)
    y = rms_ccs
    fit = flex.linear_regression(x, y)

    assert fit.is_well_defined()
    self.cc_sig_fac = fit.slope()

    if 0:
      from matplotlib import pyplot as plt
      plt.plot(x, y)
      plt.plot(
        plt.xlim(), [fit.slope() * x_ + fit.y_intercept() for x_ in plt.xlim()])
      plt.show()
예제 #2
0
파일: fast_mcd.py 프로젝트: kmdalton/dials
    def sample_data(data, sample_size):
        """sample (without replacement) the data vectors to select the same
        sample_size rows from each."""

        n = len(data[0])
        rows = flex.random_selection(n, sample_size)
        cols = [e.select(rows) for e in data]
        return cols
예제 #3
0
  def sample_data(data, sample_size):
    """sample (without replacement) the data vectors to select the same
    sample_size rows from each."""

    n = len(data[0])
    rows = flex.random_selection(n, sample_size)
    cols = [e.select(rows) for e in data]
    return cols
예제 #4
0
    def generate_reflections(self):
        from cctbx.sgtbx import space_group, space_group_symbols

        from dials.algorithms.spot_prediction import IndexGenerator, ray_intersection

        sequence_range = self.scan.get_oscillation_range(deg=False)
        resolution = 2.0
        index_generator = IndexGenerator(
            self.crystal.get_unit_cell(),
            space_group(space_group_symbols(1).hall()).type(),
            resolution,
        )
        indices = index_generator.to_array()

        # Predict rays within the sequence range
        ray_predictor = ScansRayPredictor(self.experiments, sequence_range)
        obs_refs = ray_predictor(indices)

        # Take only those rays that intersect the detector
        intersects = ray_intersection(self.detector, obs_refs)
        obs_refs = obs_refs.select(intersects)

        # Re-predict using the Experiments predictor for all these reflections. The
        # result is the same, but we gain also the flags and xyzcal.px columns
        obs_refs["id"] = flex.int(len(obs_refs), 0)
        obs_refs = self.ref_predictor(obs_refs)

        # Set 'observed' centroids from the predicted ones
        obs_refs["xyzobs.mm.value"] = obs_refs["xyzcal.mm"]

        # Invent some variances for the centroid positions of the simulated data
        im_width = 0.1 * pi / 180.0
        px_size = self.detector[0].get_pixel_size()
        var_x = flex.double(len(obs_refs), (px_size[0] / 2.0)**2)
        var_y = flex.double(len(obs_refs), (px_size[1] / 2.0)**2)
        var_phi = flex.double(len(obs_refs), (im_width / 2.0)**2)
        obs_refs["xyzobs.mm.variance"] = flex.vec3_double(
            var_x, var_y, var_phi)

        # set the flex random seed to an 'uninteresting' number
        flex.set_random_seed(12407)

        # take 10 random reflections for speed
        reflections = obs_refs.select(flex.random_selection(len(obs_refs), 10))

        # use a BlockCalculator to calculate the blocks per image
        from dials.algorithms.refinement.reflection_manager import BlockCalculator

        block_calculator = BlockCalculator(self.experiments, reflections)
        reflections = block_calculator.per_image()

        return reflections
예제 #5
0
  def index(self, datablock, observed):
    ''' IOTA-SRS indexing. Goes through ntrials and indexes subsamples'''
    # index and refine
    if self.params.iota.method == 'random_sub_sampling':
      from scitbx.array_family import flex
      experiments_list = []
      #No outlier rejection or refinement should be done for the candidate basis vectors
      self.known_crystal_models = None
      outlier_rejection_flag=self.params.indexing.stills.candidate_outlier_rejection
      refine_all_candidates_flag=self.params.indexing.stills.refine_all_candidates
      if self.params.iota.random_sub_sampling.no_outlier_rejection_and_candidates_refinement:
        self.params.indexing.stills.candidate_outlier_rejection=False
        self.params.indexing.stills.refine_all_candidates=False

      # Adding timeout option for IOTA
      initial_time = time.time()
      for trial in range(self.params.iota.random_sub_sampling.ntrials):
        curr_time = time.time()
        if self.params.iota.timeout_cutoff_sec is not None:
          if curr_time - initial_time > self.params.iota.timeout_cutoff_sec:
            raise IOTA_TimeoutError('IOTA_TIMEOUT ',curr_time-initial_time)
        flex.set_random_seed(trial+1001)
        observed_sample = observed.select(flex.random_selection(len(observed), int(len(observed)*self.params.iota.random_sub_sampling.fraction_sub_sample)))
        try:
          print('IOTA:SUM_INTENSITY_VALUE=%d',sum(observed_sample['intensity.sum.value']),' ', trial)
          experiments_tmp, indexed_tmp = self.index_with_iota(datablock, observed_sample)
          experiments_list.append(experiments_tmp)
        except Exception as e:
          print('Indexing failed for some reason')
      if self.params.iota.random_sub_sampling.consensus_function == 'unit_cell':
        #from IPython import embed; embed(); exit()
        from exafel_project.ADSE13_25.clustering.old_consensus_functions import get_uc_consensus as get_consensus
          #known_crystal_models = get_consensus(experiments_list, show_plot=self.params.iota.random_sub_sampling.show_plot, return_only_first_indexed_model = False)
        if len(experiments_list) > 0:
          known_crystal_models, clustered_experiments_list = get_consensus(experiments_list, show_plot=False, return_only_first_indexed_model=False, finalize_method=None, clustering_params=None)
          self.known_crystal_models = known_crystal_models
        print ('IOTA: Reindexing with best chosen crystal model')
          # Set back whatever PHIL parameter was supplied by user for outlier rejection and refinement
        self.params.indexing.stills.candidate_outlier_rejection=outlier_rejection_flag
        self.params.indexing.stills.refine_all_candidates=refine_all_candidates_flag
    #
      experiments, indexed = self.index_with_known_orientation(datablock, observed)
      return experiments,indexed
    else:
      experiments, indexed = self.index_with_iota(datablock, observed)
      return experiments,indexed
  def generate_reflections(self):
    sweep_range = self.scan.get_oscillation_range(deg=False)
    resolution = 2.0
    index_generator = IndexGenerator(self.crystal.get_unit_cell(),
                          space_group(space_group_symbols(1).hall()).type(),
                          resolution)
    indices = index_generator.to_array()

    # Predict rays within the sweep range
    ray_predictor = ScansRayPredictor(self.experiments, sweep_range)
    obs_refs = ray_predictor(indices)

    # Take only those rays that intersect the detector
    intersects = ray_intersection(self.detector, obs_refs)
    obs_refs = obs_refs.select(intersects)

    # Re-predict using the Experiments predictor for all these reflections. The
    # result is the same, but we gain also the flags and xyzcal.px columns
    obs_refs['id'] = flex.int(len(obs_refs), 0)
    obs_refs = self.ref_predictor(obs_refs)

    # Set 'observed' centroids from the predicted ones
    obs_refs['xyzobs.mm.value'] = obs_refs['xyzcal.mm']

    # Invent some variances for the centroid positions of the simulated data
    im_width = 0.1 * pi / 180.
    px_size = self.detector[0].get_pixel_size()
    var_x = flex.double(len(obs_refs), (px_size[0] / 2.)**2)
    var_y = flex.double(len(obs_refs), (px_size[1] / 2.)**2)
    var_phi = flex.double(len(obs_refs), (im_width / 2.)**2)
    obs_refs['xyzobs.mm.variance'] = flex.vec3_double(var_x, var_y, var_phi)

    # set the flex random seed to an 'uninteresting' number
    flex.set_random_seed(12407)

    # take 5 random reflections for speed
    reflections = obs_refs.select(flex.random_selection(len(obs_refs), 5))

    # use a BlockCalculator to calculate the blocks per image
    from dials.algorithms.refinement.reflection_manager import BlockCalculator
    block_calculator = BlockCalculator(self.experiments, reflections)
    reflections = block_calculator.per_image()

    return reflections
예제 #7
0
    def _estimate_cc_sig_fac(self):
        """Estimation of sigma(CC) as a function of sample size.

        Estimate the error in the correlation coefficient, sigma(CC) by using
        pairs of reflections at similar resolutions that are not related by
        potential symmetry. Using pairs of unrelated reflections at similar
        resolutions, calculate sigma(CC) == rms(CC) for groups of size N = 3..200.
        The constant CCsigFac is obtained from a linear fit of
        sigma(CC) to 1/N^(1/2), i.e.:
            sigma(CC) = CCsigFac/N^(1/2)
        """

        max_bins = 500
        reflections_per_bin = max(
            200, int(math.ceil(self.intensities.size() / max_bins)))
        binner = self.intensities.setup_binner_counting_sorted(
            reflections_per_bin=reflections_per_bin)

        a = flex.double()
        b = flex.double()
        ma_tmp = self.intensities.customized_copy(
            crystal_symmetry=crystal.symmetry(
                space_group=self.lattice_group,
                unit_cell=self.intensities.unit_cell(),
                assert_is_compatible_unit_cell=False,
            )).map_to_asu()
        for i in range(binner.n_bins_all()):
            count = binner.counts()[i]
            if count == 0:
                continue
            bin_isel = binner.array_indices(i)
            p = flex.random_permutation(count)
            p = p[:2 * (count // 2)]  # ensure even count
            ma_a = ma_tmp.select(bin_isel.select(p[:count // 2]))
            ma_b = ma_tmp.select(bin_isel.select(p[count // 2:]))
            # only choose pairs of reflections that don't have the same indices
            # in the asu of the lattice group
            sel = ma_a.indices() != ma_b.indices()
            a.extend(ma_a.data().select(sel))
            b.extend(ma_b.data().select(sel))

        perm = flex.random_selection(a.size(), min(20000, a.size()))
        a = a.select(perm)
        b = b.select(perm)

        self.corr_unrelated = CorrelationCoefficientAccumulator(a, b)

        n_pairs = a.size()
        min_num_groups = 10  # minimum number of groups
        max_n_group = int(min(n_pairs / min_num_groups,
                              200))  # maximum number in group
        min_n_group = int(min(5, max_n_group))  # minimum number in group

        if (max_n_group - min_n_group) < 4:
            self.cc_sig_fac = 0
            return

        mean_ccs = flex.double()
        rms_ccs = flex.double()
        ns = flex.double()
        for n in range(min_n_group, max_n_group + 1):
            ns.append(n)
            ccs = flex.double()
            for i in range(200):
                isel = flex.random_selection(a.size(), n)
                corr = CorrelationCoefficientAccumulator(
                    a.select(isel), b.select(isel))
                ccs.append(corr.coefficient())

            mean_ccs.append(flex.mean(ccs))
            rms_ccs.append(flex.mean(flex.pow2(ccs))**0.5)

        x = 1 / flex.pow(ns, 0.5)
        y = rms_ccs
        fit = flex.linear_regression(x, y)

        if fit.is_well_defined():
            self.cc_sig_fac = fit.slope()
        else:
            self.cc_sig_fac = 0
예제 #8
0
def estimate_gain(imageset, kernel_size=(10,10), output_gain_map=None):
  detector = imageset.get_detector()

  from dials.algorithms.image.threshold import KabschDebug

  raw_data = imageset.get_raw_data(0)

  gain_value = 1
  gain_map = [flex.double(raw_data[i].accessor(), gain_value)
              for i in range(len(detector))]

  mask = imageset.get_mask(0)

  min_local = 0

  # dummy values, shouldn't affect results
  nsigma_b = 6
  nsigma_s = 3
  global_threshold = 0

  kabsch_debug_list = []
  for i_panel in range(len(detector)):
    kabsch_debug_list.append(
      KabschDebug(
        raw_data[i_panel].as_double(), mask[i_panel], gain_map[i_panel],
        kernel_size, nsigma_b, nsigma_s, global_threshold, min_local))

  dispersion = flex.double()
  for kabsch in kabsch_debug_list:
    dispersion.extend(kabsch.coefficient_of_variation().as_1d())

  sorted_dispersion = flex.sorted(dispersion)
  from libtbx.math_utils import nearest_integer as nint

  q1 = sorted_dispersion[nint(len(sorted_dispersion)/4)]
  q2 = sorted_dispersion[nint(len(sorted_dispersion)/2)]
  q3 = sorted_dispersion[nint(len(sorted_dispersion)*3/4)]
  iqr = q3-q1

  print "q1, q2, q3: %.2f, %.2f, %.2f" %(q1, q2, q3)

  inlier_sel = (sorted_dispersion > (q1 - 1.5*iqr)) & (sorted_dispersion < (q3 + 1.5*iqr))
  sorted_dispersion = sorted_dispersion.select(inlier_sel)
  gain = sorted_dispersion[nint(len(sorted_dispersion)/2)]
  print "Estimated gain: %.2f" % gain

  if output_gain_map:
    # write the gain map
    import cPickle as pickle
    gain_map = flex.double(flex.grid(raw_data[0].all()), gain)
    pickle.dump(gain_map, open(output_gain_map, "w"),
                protocol=pickle.HIGHEST_PROTOCOL)

  if 0:
    sel = flex.random_selection(population_size=len(sorted_dispersion), sample_size=10000)
    sorted_dispersion = sorted_dispersion.select(sel)

    from matplotlib import pyplot
    pyplot.scatter(range(len(sorted_dispersion)), sorted_dispersion)
    pyplot.ylim(0, 10)
    pyplot.show()

  return gain
예제 #9
0
def estimate_gain(imageset,
                  kernel_size=(10, 10),
                  output_gain_map=None,
                  max_images=1):
    detector = imageset.get_detector()

    from dials.algorithms.image.threshold import DispersionThresholdDebug
    gains = flex.double()

    for image_no in xrange(len(imageset)):
        raw_data = imageset.get_raw_data(image_no)
        #from IPython import embed; embed()
        #this_data = raw_data[0]
        #raw_data = (this_data + 80),
        NSQ = 200
        small_section = raw_data[0].matrix_copy_block(400, 400, NSQ, NSQ)
        print("This small section", len(small_section), "mean ist",
              flex.mean(small_section.as_double()))
        raw_data = (small_section, )

        gain_value = 1
        gain_map = [
            flex.double(raw_data[i].accessor(), gain_value)
            for i in range(len(detector))
        ]

        mask = imageset.get_mask(image_no)
        mask = (mask[0].matrix_copy_block(400, 400, NSQ, NSQ)),
        #from IPython import embed; embed()
        min_local = 0

        # dummy values, shouldn't affect results
        nsigma_b = 6
        nsigma_s = 3
        global_threshold = 0

        kabsch_debug_list = []
        for i_panel in range(len(detector)):
            kabsch_debug_list.append(
                DispersionThresholdDebug(raw_data[i_panel].as_double(),
                                         mask[i_panel], gain_map[i_panel],
                                         kernel_size, nsigma_b, nsigma_s,
                                         global_threshold, min_local))

        dispersion = flex.double()
        for ipix in range(5, NSQ - 15):
            for spix in range(5, NSQ - 15):
                data = small_section.matrix_copy_block(ipix, spix, 10,
                                                       10).as_double()
                datasq = data * data
                means = flex.mean(data)
                var = flex.mean(datasq) - (means)**2
                #print(ipix,spix,var,var/means)
                dispersion.append(var / means)

        if True:
            dispersion = flex.double()
            for kabsch in kabsch_debug_list:
                a_section = kabsch.index_of_dispersion().matrix_copy_block(
                    5, 5, NSQ - 15, NSQ - 15)
                print("mean of a_section", flex.mean(a_section))
                dispersion.extend(a_section.as_1d())

        #ST = flex.mean_and_variance(dispersion)
        #from IPython import embed; embed()

        sorted_dispersion = flex.sorted(dispersion)
        from libtbx.math_utils import nearest_integer as nint

        q1 = sorted_dispersion[nint(len(sorted_dispersion) / 4)]
        q2 = sorted_dispersion[nint(len(sorted_dispersion) / 2)]
        q3 = sorted_dispersion[nint(len(sorted_dispersion) * 3 / 4)]
        iqr = q3 - q1

        print("q1, q2, q3: %.2f, %.2f, %.2f" % (q1, q2, q3))
        if iqr == 0.0:
            raise Sorry(
                'Unable to robustly estimate the variation of pixel values.')

        inlier_sel = (sorted_dispersion >
                      (q1 - 1.5 * iqr)) & (sorted_dispersion <
                                           (q3 + 1.5 * iqr))
        sorted_dispersion = sorted_dispersion.select(inlier_sel)
        gain = sorted_dispersion[nint(len(sorted_dispersion) / 2)]
        print("Estimated gain: %.2f" % gain)
        gains.append(gain)

        if image_no == 0:
            gain0 = gain
        if image_no + 1 >= max_images:
            break

    if len(gains) > 1:
        stats = flex.mean_and_variance(gains)
        print("Average gain: %.2f +/- %.2f" %
              (stats.mean(), stats.unweighted_sample_standard_deviation()))

    if output_gain_map:
        if len(gains) > 1:
            raw_data = imageset.get_raw_data(0)
        # write the gain map
        import six.moves.cPickle as pickle
        gain_map = flex.double(flex.grid(raw_data[0].all()), gain0)
        with open(output_gain_map, "wb") as fh:
            pickle.dump(gain_map, fh, protocol=pickle.HIGHEST_PROTOCOL)

    if 0:
        sel = flex.random_selection(population_size=len(sorted_dispersion),
                                    sample_size=10000)
        sorted_dispersion = sorted_dispersion.select(sel)

        from matplotlib import pyplot
        pyplot.scatter(range(len(sorted_dispersion)), sorted_dispersion)
        pyplot.ylim(0, 10)
        pyplot.show()

    return gain0
예제 #10
0
def estimate_gain(imageset,
                  kernel_size=(10, 10),
                  output_gain_map=None,
                  max_images=1):
    detector = imageset.get_detector()

    from dials.algorithms.image.threshold import DispersionThresholdDebug
    gains = flex.double()

    for image_no in xrange(len(imageset)):
        raw_data = imageset.get_raw_data(image_no)

        gain_value = 1
        gain_map = [
            flex.double(raw_data[i].accessor(), gain_value)
            for i in range(len(detector))
        ]

        mask = imageset.get_mask(image_no)

        min_local = 0

        # dummy values, shouldn't affect results
        nsigma_b = 6
        nsigma_s = 3
        global_threshold = 0

        kabsch_debug_list = []
        for i_panel in range(len(detector)):
            kabsch_debug_list.append(
                DispersionThresholdDebug(raw_data[i_panel].as_double(),
                                         mask[i_panel], gain_map[i_panel],
                                         kernel_size, nsigma_b, nsigma_s,
                                         global_threshold, min_local))

        dispersion = flex.double()
        for kabsch in kabsch_debug_list:
            dispersion.extend(kabsch.index_of_dispersion().as_1d())

        sorted_dispersion = flex.sorted(dispersion)
        from libtbx.math_utils import nearest_integer as nint

        q1 = sorted_dispersion[nint(len(sorted_dispersion) / 4)]
        q2 = sorted_dispersion[nint(len(sorted_dispersion) / 2)]
        q3 = sorted_dispersion[nint(len(sorted_dispersion) * 3 / 4)]
        iqr = q3 - q1

        print "q1, q2, q3: %.2f, %.2f, %.2f" % (q1, q2, q3)
        if iqr == 0.0:
            raise Sorry(
                'Unable to robustly estimate the variation of pixel values.')

        inlier_sel = (sorted_dispersion >
                      (q1 - 1.5 * iqr)) & (sorted_dispersion <
                                           (q3 + 1.5 * iqr))
        sorted_dispersion = sorted_dispersion.select(inlier_sel)
        gain = sorted_dispersion[nint(len(sorted_dispersion) / 2)]
        print "Estimated gain: %.2f" % gain
        gains.append(gain)

        if image_no == 0:
            gain0 = gain
        if image_no + 1 >= max_images:
            break

    if len(gains) > 1:
        stats = flex.mean_and_variance(gains)
        print "Average gain: %.2f +/- %.2f" % (
            stats.mean(), stats.unweighted_sample_standard_deviation())

    if output_gain_map:
        if len(gains) > 1:
            raw_data = imageset.get_raw_data(0)
        # write the gain map
        import cPickle as pickle
        gain_map = flex.double(flex.grid(raw_data[0].all()), gain0)
        pickle.dump(gain_map,
                    open(output_gain_map, "w"),
                    protocol=pickle.HIGHEST_PROTOCOL)

    if 0:
        sel = flex.random_selection(population_size=len(sorted_dispersion),
                                    sample_size=10000)
        sorted_dispersion = sorted_dispersion.select(sel)

        from matplotlib import pyplot
        pyplot.scatter(range(len(sorted_dispersion)), sorted_dispersion)
        pyplot.ylim(0, 10)
        pyplot.show()

    return gain0
# Set 'observed' centroids from the predicted ones
obs_refs['xyzobs.mm.value'] = obs_refs['xyzcal.mm']

# Invent some variances for the centroid positions of the simulated data
im_width = 0.1 * pi / 180.
px_size = mydetector[0].get_pixel_size()
var_x = flex.double(len(obs_refs), (px_size[0] / 2.)**2)
var_y = flex.double(len(obs_refs), (px_size[1] / 2.)**2)
var_phi = flex.double(len(obs_refs), (im_width / 2.)**2)
obs_refs['xyzobs.mm.variance'] = flex.vec3_double(var_x, var_y, var_phi)

# set the flex random seed to an 'uninteresting' number
flex.set_random_seed(12407)

# take 5 random reflections for speed
reflections = obs_refs.select(flex.random_selection(len(obs_refs), 5))

# use a BlockCalculator to calculate the blocks per image
from dials.algorithms.refinement.reflection_manager import BlockCalculator
block_calculator = BlockCalculator(experiments, reflections)
reflections = block_calculator.per_image()

# use a ReflectionManager to exclude reflections too close to the spindle,
# plus set the frame numbers
from dials.algorithms.refinement.reflection_manager import ReflectionManager
refman = ReflectionManager(reflections, experiments,
  outlier_detector=None)

# make a target to ensure reflections are predicted and refman is finalised
from dials.algorithms.refinement.target import \
  LeastSquaresPositionalResidualWithRmsdCutoff
예제 #12
0
def estimate_gain(imageset, kernel_size=(10, 10), output_gain_map=None):
    detector = imageset.get_detector()

    from dials.algorithms.image.threshold import KabschDebug

    raw_data = imageset.get_raw_data(0)

    gain_value = 1
    gain_map = [
        flex.double(raw_data[i].accessor(), gain_value)
        for i in range(len(detector))
    ]

    mask = imageset.get_mask(0)

    min_local = 0

    # dummy values, shouldn't affect results
    nsigma_b = 6
    nsigma_s = 3
    global_threshold = 0

    kabsch_debug_list = []
    for i_panel in range(len(detector)):
        kabsch_debug_list.append(
            KabschDebug(raw_data[i_panel].as_double(), mask[i_panel],
                        gain_map[i_panel], kernel_size, nsigma_b, nsigma_s,
                        global_threshold, min_local))

    dispersion = flex.double()
    for kabsch in kabsch_debug_list:
        dispersion.extend(kabsch.coefficient_of_variation().as_1d())

    sorted_dispersion = flex.sorted(dispersion)
    from libtbx.math_utils import nearest_integer as nint

    q1 = sorted_dispersion[nint(len(sorted_dispersion) / 4)]
    q2 = sorted_dispersion[nint(len(sorted_dispersion) / 2)]
    q3 = sorted_dispersion[nint(len(sorted_dispersion) * 3 / 4)]
    iqr = q3 - q1

    print "q1, q2, q3: %.2f, %.2f, %.2f" % (q1, q2, q3)

    inlier_sel = (sorted_dispersion > (q1 - 1.5 * iqr)) & (sorted_dispersion <
                                                           (q3 + 1.5 * iqr))
    sorted_dispersion = sorted_dispersion.select(inlier_sel)
    gain = sorted_dispersion[nint(len(sorted_dispersion) / 2)]
    print "Estimated gain: %.2f" % gain

    if output_gain_map:
        # write the gain map
        import cPickle as pickle
        gain_map = flex.double(flex.grid(raw_data[0].all()), gain)
        pickle.dump(gain_map,
                    open(output_gain_map, "w"),
                    protocol=pickle.HIGHEST_PROTOCOL)

    if 0:
        sel = flex.random_selection(population_size=len(sorted_dispersion),
                                    sample_size=10000)
        sorted_dispersion = sorted_dispersion.select(sel)

        from matplotlib import pyplot
        pyplot.scatter(range(len(sorted_dispersion)), sorted_dispersion)
        pyplot.ylim(0, 10)
        pyplot.show()

    return gain