Ejemplo n.º 1
0
def chosen_weights(observation_set, params):
    data = observation_set.data()
    sigmas = observation_set.sigmas()
    return {
        "unit": flex.double(len(data), 1.),
        "variance": 1. / (sigmas * sigmas),
        "gentle": flex.pow(flex.sqrt(flex.abs(data)) / sigmas, 2),
        "extreme": flex.pow(data / sigmas, 2)
    }[params.postrefinement.target_weighting]
Ejemplo n.º 2
0
    def __init__(self,
                 d_spacings,
                 target_n_per_bin=25,
                 max_slots=40,
                 min_slots=20):
        d_spacings = flex.double(list(set(d_spacings)))
        d_spacings_sorted = flex.sorted(d_spacings, reverse=True)
        d_star_cubed_sorted = flex.pow(1 / d_spacings_sorted, 3)

        # choose bin volume such that lowest resolution shell contains 5% of the
        # spots, or 25, whichever is greater
        low_res_count = int(
            math.ceil(
                min(
                    max(target_n_per_bin, 0.05 * len(d_spacings)),
                    0.25 * len(d_spacings),
                )))
        bin_step = d_star_cubed_sorted[low_res_count] - d_star_cubed_sorted[0]
        assert bin_step > 0
        n_slots = int(
            math.ceil(
                (d_star_cubed_sorted[-1] - d_star_cubed_sorted[0]) / bin_step))

        if max_slots is not None:
            n_slots = min(n_slots, max_slots)
        if min_slots is not None:
            n_slots = max(n_slots, min_slots)
        bin_step = (d_star_cubed_sorted[-1] - d_star_cubed_sorted[0]) / n_slots

        self.bins = []
        ds3_max = d_star_cubed_sorted[0]
        for i in range(n_slots):
            ds3_min = d_star_cubed_sorted[0] + (i + 1) * bin_step
            self.bins.append(Slot(1 / ds3_min**(1 / 3), 1 / ds3_max**(1 / 3)))
            ds3_max = ds3_min
Ejemplo n.º 3
0
  def __init__(self, d_spacings, target_n_per_bin=25, max_slots=40, min_slots=20):
    d_spacings_sorted = flex.sorted(d_spacings, reverse=True)
    d_star_cubed_sorted = flex.pow(1/d_spacings_sorted, 3)

    # choose bin volume such that lowest resolution shell contains 5% of the
    # spots, or 25, whichever is greater
    low_res_count = int(
      math.ceil(min(max(target_n_per_bin, 0.05*len(d_spacings)),
                    0.25*len(d_spacings))))
    bin_step = d_star_cubed_sorted[low_res_count] - d_star_cubed_sorted[0]
    n_slots = int(
      math.ceil((d_star_cubed_sorted[-1] - d_star_cubed_sorted[0])/bin_step))

    #n_slots = len(d_spacings_sorted)//target_n_per_bin
    if max_slots is not None:
      n_slots = min(n_slots, max_slots)
    if min_slots is not None:
      n_slots = max(n_slots, min_slots)
    bin_step = (d_star_cubed_sorted[-1] - d_star_cubed_sorted[0])/n_slots

    self.bins = []
    ds3_max = d_star_cubed_sorted[0]
    for i in range(n_slots):
      ds3_min = d_star_cubed_sorted[0] + (i+1) * bin_step
      self.bins.append(slot(1/ds3_min**(1/3), 1/ds3_max**(1/3)))
      ds3_max = ds3_min
    def result_for_cxi_merge(self, file_name):
        values = self.get_parameter_values()
        self.rs2_parameter_range_assertions(values)
        scaler = self.nave1_refinery.scaler_callable(
            self.get_parameter_values())

        partiality_array = self.refinery.get_partiality_array(values)
        p_scaler = flex.pow(
            partiality_array,
            0.5 * self.params.postrefinement.merge_partiality_exponent)

        fat_selection = (
            self.nave1_refinery.lorentz_callable(self.get_parameter_values()) >
            self.params.postrefinement.rs_hybrid.partiality_threshold
        )  # was 0.2 for rs2
        fat_count = fat_selection.count(True)
        scaler_s = scaler.select(fat_selection)
        p_scaler_s = p_scaler.select(fat_selection)

        #avoid empty database INSERT, if insufficient centrally-located Bragg spots:
        # in samosa, handle this at a higher level, but handle it somehow.
        if fat_count < 3:
            raise ValueError("< 3 near-fulls after refinement")
        print >> self.out, "On total %5d the fat selection is %5d" % (len(
            self.observations_pair1_selected.indices()), fat_count)
        observations_original_index = \
          self.observations_original_index_pair1_selected.select(fat_selection)

        observations = self.observations_pair1_selected.customized_copy(
            indices=self.observations_pair1_selected.indices().select(
                fat_selection),
            data=(
                self.observations_pair1_selected.data().select(fat_selection) /
                scaler_s),
            sigmas=(
                self.observations_pair1_selected.sigmas().select(fat_selection)
                / (scaler_s * p_scaler_s)))
        matches = miller.match_multi_indices(
            miller_indices_unique=self.miller_set.indices(),
            miller_indices=observations.indices())

        I_weight = flex.double(len(observations.sigmas()), 1.)
        I_reference = flex.double(
            [self.i_model.data()[pair[0]] for pair in matches.pairs()])
        I_invalid = flex.bool(
            [self.i_model.sigmas()[pair[0]] < 0. for pair in matches.pairs()])
        I_weight.set_selected(I_invalid, 0.)
        SWC = simple_weighted_correlation(I_weight, I_reference,
                                          observations.data())
        print >> self.out, "CORR: NEW correlation is", SWC.corr
        print >> self.out, "ASTAR_FILE", file_name, tuple(
            self.nave1_refinery.get_eff_Astar(values))
        self.final_corr = SWC.corr
        #another range assertion
        assert self.final_corr > 0.1, "correlation coefficient out of range (<= 0.1) after LevMar refinement"
        # XXX Specific to the hybrid_rs method, and likely these limits are problem-specific (especially G-max) so look for another approach
        #     or expose the limits as phil parameters.
        assert values.G < 0.5, "G-scale value out of range ( > 0.5 XXX may be too strict ) after LevMar refinement"

        return observations_original_index, observations, matches
Ejemplo n.º 5
0
    def result_for_cxi_merge(self, file_name):
        values = self.get_parameter_values()
        self.rs2_parameter_range_assertions(values)
        scaler = self.refinery.scaler_callable(
            self.parameterization_class(self.MINI.x))

        partiality_array = self.refinery.get_partiality_array(values)
        p_scaler = flex.pow(
            partiality_array,
            0.5 * self.params.postrefinement.merge_partiality_exponent)

        fat_selection = (partiality_array > 0.2)
        fat_count = fat_selection.count(True)
        scaler_s = scaler.select(fat_selection)
        p_scaler_s = p_scaler.select(fat_selection)

        #avoid empty database INSERT, if insufficient centrally-located Bragg spots:
        # in samosa, handle this at a higher level, but handle it somehow.
        if fat_count < 3:
            raise ValueError("< 3 near-fulls after refinement")
        print("On total %5d the fat selection is %5d" %
              (len(self.observations_pair1_selected.indices()), fat_count),
              file=self.out)
        observations_original_index = \
          self.observations_original_index_pair1_selected.select(fat_selection)

        observations = self.observations_pair1_selected.customized_copy(
            indices=self.observations_pair1_selected.indices().select(
                fat_selection),
            data=(
                self.observations_pair1_selected.data().select(fat_selection) /
                scaler_s),
            sigmas=(
                self.observations_pair1_selected.sigmas().select(fat_selection)
                / (scaler_s * p_scaler_s)))
        matches = miller.match_multi_indices(
            miller_indices_unique=self.miller_set.indices(),
            miller_indices=observations.indices())

        I_weight = flex.double(len(observations.sigmas()), 1.)
        I_reference = flex.double(
            [self.i_model.data()[pair[0]] for pair in matches.pairs()])
        I_invalid = flex.bool(
            [self.i_model.sigmas()[pair[0]] < 0. for pair in matches.pairs()])
        I_weight.set_selected(I_invalid, 0.)
        SWC = simple_weighted_correlation(I_weight, I_reference,
                                          observations.data())
        print("CORR: NEW correlation is", SWC.corr, file=self.out)
        print("ASTAR_FILE",
              file_name,
              tuple(self.refinery.get_eff_Astar(values)),
              file=self.out)
        self.final_corr = SWC.corr
        self.refined_mini = self.MINI
        #another range assertion
        assert self.final_corr > 0.1, "correlation coefficient out of range (<= 0.1) after rs2 refinement"

        return observations_original_index, observations, matches
Ejemplo n.º 6
0
            def correct(refl_sele, smart_sigmas=True):
                kapton_correction = image_kapton_correction(
                    panel_size_px=panel_size_px,
                    pixel_size_mm=pixel_size_mm,
                    detector_dist_mm=detector_dist_mm,
                    wavelength_ang=wavelength_ang,
                    reflections_sele=refl_sele,
                    params=self.params,
                    expt=expt,
                    refl=refl,
                    smart_sigmas=smart_sigmas,
                    logger=self.logger,
                )

                k_corr, k_sigmas = kapton_correction()
                refl_sele["kapton_absorption_correction"] = k_corr
                if smart_sigmas:
                    refl_sele["kapton_absorption_correction_sigmas"] = k_sigmas
                    # apply corrections and propagate error
                    # term1 = (sig(C)/C)^2
                    # term2 = (sig(Imeas)/Imeas)^2
                    # I' = C*I
                    # sig^2(I') = (I')^2*(term1 + term2)
                    integrated_data = refl_sele["intensity.sum.value"]
                    integrated_variance = refl_sele["intensity.sum.variance"]
                    integrated_sigma = flex.sqrt(integrated_variance)
                    term1 = flex.pow(k_sigmas / k_corr, 2)
                    term2 = flex.pow(integrated_sigma / integrated_data, 2)
                    integrated_data *= k_corr
                    integrated_variance = flex.pow(integrated_data,
                                                   2) * (term1 + term2)
                    refl_sele["intensity.sum.value"] = integrated_data
                    refl_sele["intensity.sum.variance"] = integrated_variance
                    # order is purposeful: the two lines above require that integrated_data
                    # has already been corrected!
                else:
                    refl_sele["intensity.sum.value"] *= k_corr
                    refl_sele["intensity.sum.variance"] *= flex.pow2(k_corr)
                return refl_sele
Ejemplo n.º 7
0
  def result_for_cxi_merge(self):
    values = self.get_parameter_values()
    self.rs2_parameter_range_assertions(values)
    scaler = self.refinery.scaler_callable(self.parameterization_class(self.MINI.x))

    partiality_array = self.refinery.get_partiality_array(values)
    p_scaler = flex.pow(partiality_array,
                        0.5*self.params.postrefinement.merge_partiality_exponent)

    fat_selection = (partiality_array > 0.2)
    fat_count = fat_selection.count(True)
    scaler_s = scaler.select(fat_selection)
    p_scaler_s = p_scaler.select(fat_selection)

    # reject an experiment with insufficient number of near-full reflections
    if fat_count < 3:
      if self.params.output.log_level == 0:
        self.logger.log("Rejected experiment, because: On total %5d the fat selection is %5d"%(len(self.observations_pair1_selected.indices()), fat_count))
      raise ValueError("< 3 near-fulls after refinement")
    if self.params.output.log_level == 0:
      self.logger.log("On total %5d the fat selection is %5d"%(len(self.observations_pair1_selected.indices()), fat_count))

    observations_original_index = self.observations_original_index_pair1_selected.select(fat_selection)

    observations = self.observations_pair1_selected.customized_copy(
      indices = self.observations_pair1_selected.indices().select(fat_selection),
      data = (self.observations_pair1_selected.data().select(fat_selection)/scaler_s),
      sigmas = (self.observations_pair1_selected.sigmas().select(fat_selection)/(scaler_s * p_scaler_s))
    )
    matches = miller.match_multi_indices(
      miller_indices_unique=self.params.scaling.miller_set.indices(),
      miller_indices=observations.indices())

    I_weight = flex.double(len(observations.sigmas()), 1.)
    I_reference = flex.double([self.params.scaling.i_model.data()[pair[0]] for pair in matches.pairs()])
    I_invalid = flex.bool([self.params.scaling.i_model.sigmas()[pair[0]] < 0. for pair in matches.pairs()])
    I_weight.set_selected(I_invalid,0.)
    SWC = simple_weighted_correlation(I_weight, I_reference, observations.data())

    if self.params.output.log_level == 0:
      self.logger.log("CORR: NEW correlation is: %f"%SWC.corr)
      self.logger.log("ASTAR: ")
      self.logger.log(tuple(self.refinery.get_eff_Astar(values)))

    self.final_corr = SWC.corr
    self.refined_mini = self.MINI

    #another range assertion
    assert self.final_corr > 0.1,"correlation coefficient out of range (<= 0.1) after rs2 refinement"

    return observations_original_index, observations, matches
  def result_for_cxi_merge(self, file_name):
    values = self.get_parameter_values()
    self.rs2_parameter_range_assertions(values)
    scaler = self.nave1_refinery.scaler_callable(self.get_parameter_values())

    partiality_array = self.refinery.get_partiality_array(values)
    p_scaler = flex.pow(partiality_array,
                        0.5*self.params.postrefinement.merge_partiality_exponent)

    fat_selection = (self.nave1_refinery.lorentz_callable(self.get_parameter_values()) >
                     self.params.postrefinement.rs_hybrid.partiality_threshold) # was 0.2 for rs2
    fat_count = fat_selection.count(True)
    scaler_s = scaler.select(fat_selection)
    p_scaler_s = p_scaler.select(fat_selection)

    #avoid empty database INSERT, if insufficient centrally-located Bragg spots:
    # in samosa, handle this at a higher level, but handle it somehow.
    if fat_count < 3:
      raise ValueError, "< 3 near-fulls after refinement"
    print >> self.out, "On total %5d the fat selection is %5d"%(
      len(self.observations_pair1_selected.indices()), fat_count)
    observations_original_index = \
      self.observations_original_index_pair1_selected.select(fat_selection)

    observations = self.observations_pair1_selected.customized_copy(
      indices = self.observations_pair1_selected.indices().select(fat_selection),
      data = (self.observations_pair1_selected.data().select(fat_selection)/scaler_s),
      sigmas = (self.observations_pair1_selected.sigmas().select(fat_selection)/(scaler_s * p_scaler_s))
    )
    matches = miller.match_multi_indices(
      miller_indices_unique=self.miller_set.indices(),
      miller_indices=observations.indices())

    I_weight = flex.double(len(observations.sigmas()), 1.)
    I_reference = flex.double([self.i_model.data()[pair[0]] for pair in matches.pairs()])
    SWC = simple_weighted_correlation(I_weight, I_reference, observations.data())
    print >> self.out, "CORR: NEW correlation is", SWC.corr
    self.final_corr = SWC.corr
    #another range assertion
    assert self.final_corr > 0.1,"correlation coefficient out of range (<= 0.1) after LevMar refinement"
    # XXX Specific to the hybrid_rs method, and likely these limits are problem-specific (especially G-max) so look for another approach
    #     or expose the limits as phil parameters.
    assert values.G < 0.5 , "G-scale value out of range ( > 0.5 XXX may be too strict ) after LevMar refinement"

    return observations_original_index,observations,matches
Ejemplo n.º 9
0
    def run(self, experiments, reflections):

        self.logger.log_step_time("POSTREFINEMENT")

        if not self.params.postrefinement.enable:
            self.logger.log("Postrefinement was not done")
            if self.mpi_helper.rank == 0:
                self.logger.main_log("Postrefinement was not done")
            return experiments, reflections

        target_symm = symmetry(
            unit_cell=self.params.scaling.unit_cell,
            space_group_info=self.params.scaling.space_group)
        i_model = self.params.scaling.i_model
        miller_set = self.params.scaling.miller_set

        # Ensure that match_multi_indices() will return identical results
        # when a frame's observations are matched against the
        # pre-generated Miller set, self.miller_set, and the reference
        # data set, self.i_model.  The implication is that the same match
        # can be used to map Miller indices to array indices for intensity
        # accumulation, and for determination of the correlation
        # coefficient in the presence of a scaling reference.
        assert len(i_model.indices()) == len(miller_set.indices())
        assert (i_model.indices() == miller_set.indices()).count(False) == 0

        new_experiments = ExperimentList()
        new_reflections = flex.reflection_table()

        experiments_rejected_by_reason = {}  # reason:how_many_rejected

        for experiment in experiments:

            exp_reflections = reflections.select(
                reflections['exp_id'] == experiment.identifier)

            # Build a miller array for the experiment reflections with original miller indexes
            exp_miller_indices_original = miller.set(
                target_symm, exp_reflections['miller_index'], True)
            observations_original_index = miller.array(
                exp_miller_indices_original,
                exp_reflections['intensity.sum.value'],
                flex.double(
                    flex.sqrt(exp_reflections['intensity.sum.variance'])))

            assert exp_reflections.size() == exp_miller_indices_original.size()
            assert observations_original_index.size(
            ) == exp_miller_indices_original.size()

            # Build a miller array for the experiment reflections with asu miller indexes
            exp_miller_indices_asu = miller.set(
                target_symm, exp_reflections['miller_index_asymmetric'], True)
            observations = miller.array(
                exp_miller_indices_asu, exp_reflections['intensity.sum.value'],
                flex.double(
                    flex.sqrt(exp_reflections['intensity.sum.variance'])))

            matches = miller.match_multi_indices(
                miller_indices_unique=miller_set.indices(),
                miller_indices=observations.indices())

            pair1 = flex.int([pair[1] for pair in matches.pairs()
                              ])  # refers to the observations
            pair0 = flex.int([pair[0] for pair in matches.pairs()
                              ])  # refers to the model

            assert exp_reflections.size() == exp_miller_indices_original.size()
            assert observations_original_index.size(
            ) == exp_miller_indices_original.size()

            # narrow things down to the set that matches, only
            observations_pair1_selected = observations.customized_copy(
                indices=flex.miller_index(
                    [observations.indices()[p] for p in pair1]),
                data=flex.double([observations.data()[p] for p in pair1]),
                sigmas=flex.double([observations.sigmas()[p] for p in pair1]))

            observations_original_index_pair1_selected = observations_original_index.customized_copy(
                indices=flex.miller_index(
                    [observations_original_index.indices()[p] for p in pair1]),
                data=flex.double(
                    [observations_original_index.data()[p] for p in pair1]),
                sigmas=flex.double(
                    [observations_original_index.sigmas()[p] for p in pair1]))

            I_observed = observations_pair1_selected.data()
            MILLER = observations_original_index_pair1_selected.indices()

            ORI = crystal_orientation(experiment.crystal.get_A(),
                                      basis_type.reciprocal)
            Astar = matrix.sqr(ORI.reciprocal_matrix())
            Astar_from_experiment = matrix.sqr(experiment.crystal.get_A())
            assert Astar == Astar_from_experiment

            WAVE = experiment.beam.get_wavelength()
            BEAM = matrix.col((0.0, 0.0, -1. / WAVE))
            BFACTOR = 0.
            MOSAICITY_DEG = experiment.crystal.get_half_mosaicity_deg()
            DOMAIN_SIZE_A = experiment.crystal.get_domain_size_ang()

            # calculation of correlation here
            I_reference = flex.double(
                [i_model.data()[pair[0]] for pair in matches.pairs()])
            I_invalid = flex.bool(
                [i_model.sigmas()[pair[0]] < 0. for pair in matches.pairs()])
            use_weights = False  # New facility for getting variance-weighted correlation

            if use_weights:
                # variance weighting
                I_weight = flex.double([
                    1. / (observations_pair1_selected.sigmas()[pair[1]])**2
                    for pair in matches.pairs()
                ])
            else:
                I_weight = flex.double(
                    len(observations_pair1_selected.sigmas()), 1.)

            I_weight.set_selected(I_invalid, 0.)
            """Explanation of 'include_negatives' semantics as originally implemented in cxi.merge postrefinement:
         include_negatives = True
         + and - reflections both used for Rh distribution for initial estimate of RS parameter
         + and - reflections both used for calc/obs correlation slope for initial estimate of G parameter
         + and - reflections both passed to the refinery and used in the target function (makes sense if
                             you look at it from a certain point of view)

         include_negatives = False
         + and - reflections both used for Rh distribution for initial estimate of RS parameter
         +       reflections only used for calc/obs correlation slope for initial estimate of G parameter
         + and - reflections both passed to the refinery and used in the target function (makes sense if
                             you look at it from a certain point of view)
      """

            # RB: By design, for MPI-Merge "include negatives" is implicitly True
            SWC = simple_weighted_correlation(I_weight, I_reference,
                                              I_observed)
            if self.params.output.log_level == 0:
                self.logger.log("Old correlation is: %f" % SWC.corr)

            if self.params.postrefinement.algorithm == "rs":

                Rhall = flex.double()

                for mill in MILLER:
                    H = matrix.col(mill)
                    Xhkl = Astar * H
                    Rh = (Xhkl + BEAM).length() - (1. / WAVE)
                    Rhall.append(Rh)

                Rs = math.sqrt(flex.mean(Rhall * Rhall))

                RS = 1. / 10000.  # reciprocal effective domain size of 1 micron
                RS = Rs  # try this empirically determined approximate, monochrome, a-mosaic value
                current = flex.double([SWC.slope, BFACTOR, RS, 0., 0.])

                parameterization_class = rs_parameterization
                refinery = rs_refinery(ORI=ORI,
                                       MILLER=MILLER,
                                       BEAM=BEAM,
                                       WAVE=WAVE,
                                       ICALCVEC=I_reference,
                                       IOBSVEC=I_observed)

            elif self.params.postrefinement.algorithm == "eta_deff":

                eta_init = 2. * MOSAICITY_DEG * math.pi / 180.
                D_eff_init = 2. * DOMAIN_SIZE_A
                current = flex.double(
                    [SWC.slope, BFACTOR, eta_init, 0., 0., D_eff_init])

                parameterization_class = eta_deff_parameterization
                refinery = eta_deff_refinery(ORI=ORI,
                                             MILLER=MILLER,
                                             BEAM=BEAM,
                                             WAVE=WAVE,
                                             ICALCVEC=I_reference,
                                             IOBSVEC=I_observed)

            func = refinery.fvec_callable(parameterization_class(current))
            functional = flex.sum(func * func)

            if self.params.output.log_level == 0:
                self.logger.log("functional: %f" % functional)

            self.current = current
            self.parameterization_class = parameterization_class
            self.refinery = refinery

            self.observations_pair1_selected = observations_pair1_selected
            self.observations_original_index_pair1_selected = observations_original_index_pair1_selected

            error_detected = False

            try:
                self.run_plain()

                result_observations_original_index, result_observations, result_matches = self.result_for_cxi_merge(
                )

                assert result_observations_original_index.size(
                ) == result_observations.size()
                assert result_matches.pairs().size(
                ) == result_observations_original_index.size()

            except (AssertionError, ValueError, RuntimeError) as e:
                error_detected = True
                reason = repr(e)
                if not reason:
                    reason = "Unknown error"
                if not reason in experiments_rejected_by_reason:
                    experiments_rejected_by_reason[reason] = 1
                else:
                    experiments_rejected_by_reason[reason] += 1

            if not error_detected:
                new_experiments.append(experiment)

                new_exp_reflections = flex.reflection_table()
                new_exp_reflections[
                    'miller_index_asymmetric'] = flex.miller_index(
                        result_observations.indices())
                new_exp_reflections['intensity.sum.value'] = flex.double(
                    result_observations.data())
                new_exp_reflections['intensity.sum.variance'] = flex.double(
                    flex.pow(result_observations.sigmas(), 2))
                new_exp_reflections['exp_id'] = flex.std_string(
                    len(new_exp_reflections), experiment.identifier)
                new_reflections.extend(new_exp_reflections)
            '''
      # debugging
      elif reason.startswith("ValueError"):
        self.logger.log("Rejected b/c of value error exp id: %s; unit cell: %s"%(exp_id, str(experiment.crystal.get_unit_cell())) )
      '''

        # report rejected experiments, reflections
        experiments_rejected_by_postrefinement = len(experiments) - len(
            new_experiments)
        reflections_rejected_by_postrefinement = reflections.size(
        ) - new_reflections.size()

        self.logger.log("Experiments rejected by post-refinement: %d" %
                        experiments_rejected_by_postrefinement)
        self.logger.log("Reflections rejected by post-refinement: %d" %
                        reflections_rejected_by_postrefinement)

        all_reasons = []
        for reason, count in experiments_rejected_by_reason.iteritems():
            self.logger.log("Experiments rejected due to %s: %d" %
                            (reason, count))
            all_reasons.append(reason)

        comm = self.mpi_helper.comm
        MPI = self.mpi_helper.MPI

        # Collect all rejection reasons from all ranks. Use allreduce to let each rank have all reasons.
        all_reasons = comm.allreduce(all_reasons, MPI.SUM)
        all_reasons = set(all_reasons)

        # Now that each rank has all reasons from all ranks, we can treat the reasons in a uniform way.
        total_experiments_rejected_by_reason = {}
        for reason in all_reasons:
            rejected_experiment_count = 0
            if reason in experiments_rejected_by_reason:
                rejected_experiment_count = experiments_rejected_by_reason[
                    reason]
            total_experiments_rejected_by_reason[reason] = comm.reduce(
                rejected_experiment_count, MPI.SUM, 0)

        total_accepted_experiment_count = comm.reduce(len(new_experiments),
                                                      MPI.SUM, 0)

        # how many reflections have we rejected due to post-refinement?
        rejected_reflections = len(reflections) - len(new_reflections)
        total_rejected_reflections = self.mpi_helper.sum(rejected_reflections)

        if self.mpi_helper.rank == 0:
            for reason, count in total_experiments_rejected_by_reason.iteritems(
            ):
                self.logger.main_log(
                    "Total experiments rejected due to %s: %d" %
                    (reason, count))
            self.logger.main_log("Total experiments accepted: %d" %
                                 total_accepted_experiment_count)
            self.logger.main_log(
                "Total reflections rejected due to post-refinement: %d" %
                total_rejected_reflections)

        self.logger.log_step_time("POSTREFINEMENT", True)

        return new_experiments, new_reflections
Ejemplo n.º 10
0
def estimate_resolution_limit_distl_method1(reflections, plot_filename=None):
    # Implementation of Method 1 (section 2.4.4) of:
    # Z. Zhang, N. K. Sauter, H. van den Bedem, G. Snell and A. M. Deacon
    # J. Appl. Cryst. (2006). 39, 112-119
    # https://doi.org/10.1107/S0021889805040677

    variances = reflections["intensity.sum.variance"]

    sel = variances > 0
    reflections = reflections.select(sel)
    d_star_sq = flex.pow2(reflections["rlp"].norms())
    d_spacings = uctbx.d_star_sq_as_d(d_star_sq)
    d_star_cubed = flex.pow(reflections["rlp"].norms(), 3)

    step = 2
    while len(reflections) / step > 40:
        step += 1

    order = flex.sort_permutation(d_spacings, reverse=True)

    ds3_subset = flex.double()
    d_subset = flex.double()
    for i in range(len(reflections) // step):
        ds3_subset.append(d_star_cubed[order[i * step]])
        d_subset.append(d_spacings[order[i * step]])

    x = flex.double(range(len(ds3_subset)))

    # (i)
    # Usually, Pm is the last point, that is, m = n. But m could be smaller than
    # n if an unusually high number of spots are detected around a certain
    # intermediate resolution. In that case, our search for the image resolution
    # does not go outside the spot 'bump;. This is particularly useful when
    # ice-rings are present.

    slopes = (ds3_subset[1:] - ds3_subset[0]) / (x[1:] - x[0])
    skip_first = 3
    p_m = flex.max_index(slopes[skip_first:]) + 1 + skip_first

    # (ii)

    x1 = matrix.col((0, ds3_subset[0]))
    x2 = matrix.col((p_m, ds3_subset[p_m]))

    gaps = flex.double([0])
    v = matrix.col(((x2[1] - x1[1]), -(x2[0] - x1[0]))).normalize()

    for i in range(1, p_m):
        x0 = matrix.col((i, ds3_subset[i]))
        r = x1 - x0
        g = abs(v.dot(r))
        gaps.append(g)

    mv = flex.mean_and_variance(gaps)
    s = mv.unweighted_sample_standard_deviation()

    # (iii)

    p_k = flex.max_index(gaps)
    g_k = gaps[p_k]
    p_g = p_k
    for i in range(p_k + 1, len(gaps)):
        g_i = gaps[i]
        if g_i > (g_k - 0.5 * s):
            p_g = i

    d_g = d_subset[p_g]

    noisiness = 0
    n = len(ds3_subset)
    for i in range(n - 1):
        for j in range(i + 1, n - 1):
            if slopes[i] >= slopes[j]:
                noisiness += 1
    noisiness /= (n - 1) * (n - 2) / 2

    if plot_filename is not None:
        from matplotlib import pyplot

        fig = pyplot.figure()
        ax = fig.add_subplot(1, 1, 1)
        ax.scatter(range(len(ds3_subset)), ds3_subset)
        ax.set_ylabel("D^-3")
        xlim = pyplot.xlim()
        ylim = pyplot.ylim()
        ax.vlines(p_g, ylim[0], ylim[1], colors="red")
        pyplot.xlim(0, xlim[1])
        pyplot.ylim(0, ylim[1])
        pyplot.savefig(plot_filename)
        pyplot.close()

    return d_g, noisiness
Ejemplo n.º 11
0
    def run_cc(self):
        uniform, selected_uniform, have_iso_ref = self.load_cc_data()

        include_negatives = True
        if have_iso_ref:
            slope, offset, corr_iso, N_iso = self.correlation(
                selected_uniform[1], selected_uniform[0], include_negatives)
            self.logger.main_log("C.C. iso is %.1f%% on %d indices" %
                                 (100 * corr_iso, N_iso))

        slope, offset, corr_int, N_int = self.correlation(
            selected_uniform[2], selected_uniform[3], include_negatives)
        self.logger.main_log("C.C. int is %.1f%% on %d indices" %
                             (100. * corr_int, N_int))

        if have_iso_ref:
            binned_cc_ref, binned_cc_ref_N = self.binned_correlation(
                selected_uniform[1], selected_uniform[0], include_negatives)
            #binned_cc_ref.show(f=output)

            ref_scale = self.scale_factor(selected_uniform[1],
                                          selected_uniform[0],
                                          weights=flex.pow(
                                              selected_uniform[1].sigmas(),
                                              -2),
                                          use_binning=True)
            #ref_scale.show(f=output)

            ref_riso = self.r1_factor(selected_uniform[1],
                                      selected_uniform[0],
                                      scale_factor=ref_scale,
                                      use_binning=True)
            #ref_riso.show(f=output)

            ref_scale_all = self.scale_factor(selected_uniform[1],
                                              selected_uniform[0],
                                              weights=flex.pow(
                                                  selected_uniform[1].sigmas(),
                                                  -2))

            ref_riso_all = self.r1_factor(selected_uniform[1],
                                          selected_uniform[0],
                                          scale_factor=ref_scale_all)

        binned_cc_int, binned_cc_int_N = self.binned_correlation(
            #selected_uniform[2], selected_uniform[3], params.include_negatives)
            selected_uniform[2],
            selected_uniform[3],
            True)
        #binned_cc_int.show(f=output)

        oe_scale = self.scale_factor(
            selected_uniform[2],
            selected_uniform[3],
            weights=flex.pow(selected_uniform[2].sigmas(), -2) +
            flex.pow(selected_uniform[3].sigmas(), -2),
            use_binning=True)
        #oe_scale.show(f=output)

        oe_rint = self.r1_factor(selected_uniform[2],
                                 selected_uniform[3],
                                 scale_factor=oe_scale,
                                 use_binning=True)
        #oe_rint.show(f=output)

        oe_rsplit = self.r_split(selected_uniform[2],
                                 selected_uniform[3],
                                 use_binning=True)

        oe_scale_all = self.scale_factor(
            selected_uniform[2],
            selected_uniform[3],
            weights=flex.pow(selected_uniform[2].sigmas(), -2) +
            flex.pow(selected_uniform[3].sigmas(), -2),
        )

        oe_rint_all = self.r1_factor(selected_uniform[2],
                                     selected_uniform[3],
                                     scale_factor=oe_scale_all)
        oe_rsplit_all = self.r_split(selected_uniform[2], selected_uniform[3])
        if have_iso_ref:
            self.logger.main_log("R factors Riso = %.1f%%, Rint = %.1f%%" %
                                 (100. * ref_riso_all, 100. * oe_rint_all))
        else:
            self.logger.main_log("R factor Rint = %.1f%%" %
                                 (100. * oe_rint_all))

        split_sigma_data = self.split_sigma_test(selected_uniform[2],
                                                 selected_uniform[3],
                                                 scale=oe_scale,
                                                 use_binning=True,
                                                 show_plot=False)
        split_sigma_data_all = self.split_sigma_test(selected_uniform[2],
                                                     selected_uniform[3],
                                                     scale=oe_scale_all,
                                                     use_binning=False,
                                                     show_plot=False)

        self.logger.main_log('')
        if self.params.scaling.model_reindex_op == "h,k,l":
            self.logger.main_log("Table of Scaling Results:")
        else:
            self.logger.main_log(
                "Table of Scaling Results with Model Reindexing as %s:" %
                reindexing_op)

        from libtbx import table_utils
        table_header = [
            "", "", "", "CC", " N", "CC", " N", "R", "R", "R", "Scale",
            "Scale", "SpSig"
        ]
        table_header2 = [
            "Bin", "Resolution Range", "Completeness", "int", "int", "iso",
            "iso", "int", "split", "iso", "int", "iso", "Test"
        ]
        table_data = []
        table_data.append(table_header)
        table_data.append(table_header2)

        items = binned_cc_int.binner.range_used()

        # XXX Make it clear what the completeness here actually is!
        cumulative_counts_given = 0
        cumulative_counts_complete = 0
        for bin in items:
            table_row = []
            table_row.append("%3d" % bin)
            table_row.append(
                "%-13s" %
                binned_cc_int.binner.bin_legend(i_bin=bin,
                                                show_bin_number=False,
                                                show_bin_range=False,
                                                show_d_range=True,
                                                show_counts=False))
            table_row.append(
                "%13s" % binned_cc_int.binner.bin_legend(i_bin=bin,
                                                         show_bin_number=False,
                                                         show_bin_range=False,
                                                         show_d_range=False,
                                                         show_counts=True))
            cumulative_counts_given += binned_cc_int.binner._counts_given[bin]
            cumulative_counts_complete += binned_cc_int.binner._counts_complete[
                bin]
            table_row.append("%.1f%%" % (100. * binned_cc_int.data[bin]))
            table_row.append("%7d" % (binned_cc_int_N.data[bin]))

            if have_iso_ref and binned_cc_ref.data[bin] is not None:
                table_row.append("%.1f%%" % (100 * binned_cc_ref.data[bin]))
            else:
                table_row.append("--")

            if have_iso_ref and binned_cc_ref_N.data[bin] is not None:
                table_row.append("%6d" % (binned_cc_ref_N.data[bin]))
            else:
                table_row.append("--")

            if oe_rint.data[bin] is not None:
                table_row.append("%.1f%%" % (100. * oe_rint.data[bin]))
            else:
                table_row.append("--")

            if oe_rsplit.data[bin] is not None:
                table_row.append("%.1f%%" % (100 * oe_rsplit.data[bin]))
            else:
                table_row.append("--")

            if have_iso_ref and ref_riso.data[bin] is not None:
                table_row.append("%.1f%%" % (100 * ref_riso.data[bin]))
            else:
                table_row.append("--")

            if oe_scale.data[bin] is not None:
                table_row.append("%.3f" % oe_scale.data[bin])
            else:
                table_row.append("--")

            if have_iso_ref and ref_scale.data[bin] is not None:
                table_row.append("%.3f" % ref_scale.data[bin])
            else:
                table_row.append("--")

            if split_sigma_data.data[bin] is not None:
                table_row.append("%.4f" % split_sigma_data.data[bin])
            else:
                table_row.append("--")

            table_data.append(table_row)
        table_data.append([""] * len(table_header))

        table_row = [
            format_value("%3s", "All"),
            format_value("%-13s", "                 "),
            format_value(
                "%13s", "[%d/%d]" %
                (cumulative_counts_given, cumulative_counts_complete)),
            format_value("%.1f%%", 100 * corr_int),
            format_value("%7d", N_int)
        ]

        if have_iso_ref:
            table_row.extend(
                (format_value("%.1f%%",
                              100 * corr_iso), format_value("%6d", N_iso)))
        else:
            table_row.extend(("--", "--"))

        table_row.extend((format_value("%.1f%%", 100 * oe_rint_all),
                          format_value("%.1f%%", 100 * oe_rsplit_all)))
        if have_iso_ref:
            table_row.append(format_value("%.1f%%", 100 * ref_riso_all))
        else:
            table_row.append("--")

        table_row.append(format_value("%.3f", oe_scale_all))
        if have_iso_ref:
            table_row.append(format_value("%.3f", ref_scale_all))
        else:
            table_row.append("--")

        if split_sigma_data_all is not None:
            table_row.append("%.1f" % split_sigma_data_all)
        else:
            table_row.append("--")

        table_data.append(table_row)

        self.logger.main_log(' ')
        self.logger.main_log(
            table_utils.format(table_data,
                               has_header=2,
                               justify='center',
                               delim=" "))
        self.logger.main_log(
            """CCint is the CC-1/2 defined by Diederichs; correlation between odd/even images.
    Similarly, Scale int and R int are the scaling factor and scaling R factor between odd/even images.
    "iso" columns compare the whole XFEL dataset to the isomorphous reference."""
        )

        self.logger.main_log("Niso: result vs. reference common set")

        if have_iso_ref:
            assert N_iso == flex.sum(
                flex.double([x for x in binned_cc_ref_N.data
                             if x is not None]))
        assert N_int == flex.sum(
            flex.double([x for x in binned_cc_int_N.data if x is not None]))

        # TODO: how is plotting handled in the new phil design?
        '''
    if params.scaling.show_plots:
      from matplotlib import pyplot as plt
      plt.plot(flex.log(selected_uniform[-2].data()),
               flex.log(selected_uniform[-1].data()), 'r.')
      plt.show()
      if have_iso_ref:
        plt.plot(flex.log(selected_uniform[0].data()),
                 flex.log(selected_uniform[1].data()), 'r.')
        plt.show()
    '''
        self.logger.main_log(' ')
Ejemplo n.º 12
0
    def split_sigma_test(self,
                         this,
                         other,
                         scale,
                         use_binning=False,
                         show_plot=False):
        """
    Calculates the split sigma ratio test by Peter Zwart:
    ssr = sum( (Iah-Ibh)^2 ) / sum( sigma_ah^2 + sigma_bh^2)

    where Iah and Ibh are merged intensities for a given hkl from two halves of
    a dataset (a and b). Likewise for sigma_ah and sigma_bh.

    ssr (split sigma ratio) should approximately equal 1 if the errors are correctly estimated.
    """

        assert other.size() == this.data().size()
        assert (this.indices() == other.indices()).all_eq(True)
        assert not use_binning or this.binner() is not None

        if use_binning:
            results = []
            for i_bin in this.binner().range_all():
                sel = this.binner().selection(i_bin)
                i_this = this.select(sel)
                i_other = other.select(sel)
                scale_rel = scale.data[i_bin]
                if i_this.size() == 0:
                    results.append(None)
                else:
                    results.append(
                        self.split_sigma_test(i_this,
                                              i_other,
                                              scale=scale_rel,
                                              show_plot=show_plot))
            return binned_data(binner=this.binner(),
                               data=results,
                               data_fmt="%7.4f")

        a_data = this.data()
        b_data = scale * other.data()
        a_sigmas = this.sigmas()
        b_sigmas = scale * other.sigmas()

        if show_plot:
            """
      # Diagnostic use of the (I - <I>) / sigma distribution, should have mean=0, std=1
      a_variance = a_sigmas * a_sigmas
      b_variance = b_sigmas * b_sigmas
      mean_num = (a_data/ (a_variance) ) + (b_data/ (b_variance) )
      mean_den = (1./ (a_variance) ) + (1./ (b_variance) )
      mean_values = mean_num / mean_den

      delta_I_a = a_data - mean_values
      normal_a = delta_I_a / (a_sigmas)
      stats_a = flex.mean_and_variance(normal_a)
      print "\nA mean %7.4f std %7.4f"%(stats_a.mean(),stats_a.unweighted_sample_standard_deviation())
      order_a = flex.sort_permutation(normal_a)

      delta_I_b = b_data - mean_values
      normal_b = delta_I_b / (b_sigmas)
      stats_b = flex.mean_and_variance(normal_b)
      print "B mean %7.4f std %7.4f"%(stats_b.mean(),stats_b.unweighted_sample_standard_deviation())
      order_b = flex.sort_permutation(normal_b)
      # plots for debugging
      from matplotlib import pyplot as plt
      plt.plot(range(len(order_a)),normal_a.select(order_a),"b.")
      plt.plot(range(len(order_b)),normal_b.select(order_b),"r.")
      plt.show()
      """
            from cctbx.examples.merging.sigma_correction import ccp4_model
            Correction = ccp4_model()
            Correction.plots(a_data, b_data, a_sigmas, b_sigmas)
            #a_new_variance,b_new_variance = Correction.optimize(a_data, b_data, a_sigmas, b_sigmas)
            #Correction.plots(a_data, b_data, flex.sqrt(a_new_variance), flex.sqrt(b_new_variance))

        n = flex.pow(a_data - b_data, 2)
        d = flex.pow(a_sigmas, 2) + flex.pow(b_sigmas, 2)

        return flex.sum(n) / flex.sum(d)
Ejemplo n.º 13
0
def estimate_resolution_limit_distl_method2(reflections,
                                            imageset,
                                            ice_sel=None,
                                            plot_filename=None):

    # Implementation of Method 2 (section 2.4.4) of:
    # Z. Zhang, N. K. Sauter, H. van den Bedem, G. Snell and A. M. Deacon
    # J. Appl. Cryst. (2006). 39, 112-119
    # http://dx.doi.org/10.1107/S0021889805040677

    if ice_sel is None:
        ice_sel = flex.bool(len(reflections), False)

    variances = reflections['intensity.sum.variance']

    sel = variances > 0
    intensities = reflections['intensity.sum.value']
    variances = variances.select(sel)
    ice_sel = ice_sel.select(sel)
    reflections = reflections.select(sel)
    intensities = reflections['intensity.sum.value']
    d_star_sq = flex.pow2(reflections['rlp'].norms())
    d_spacings = uctbx.d_star_sq_as_d(d_star_sq)
    d_star_cubed = flex.pow(reflections['rlp'].norms(), 3)

    binner = binner_d_star_cubed(d_spacings)

    bin_counts = flex.size_t()

    for i_slot, slot in enumerate(binner.bins):
        sel_all = (d_spacings < slot.d_max) & (d_spacings >= slot.d_min)
        #sel = ~(ice_sel) & sel_all
        sel = sel_all

        bin_counts.append(sel.count(True))

    #print list(bin_counts)
    t0 = (bin_counts[0] + bin_counts[1]) / 2

    mu = 0.15

    for i in range(len(bin_counts) - 1):
        tj = bin_counts[i]
        tj1 = bin_counts[i + 1]
        if (tj < (mu * t0)) and (tj1 < (mu * t0)):
            break

    d_min = binner.bins[i].d_min
    noisiness = 0
    m = len(bin_counts)
    for i in range(m):
        for j in range(i + 1, m):
            if bin_counts[i] <= bin_counts[j]:
                noisiness += 1
    noisiness /= (0.5 * m * (m - 1))

    if plot_filename is not None:
        if pyplot is None:
            raise Sorry("matplotlib must be installed to generate a plot.")
        fig = pyplot.figure()
        ax = fig.add_subplot(1, 1, 1)
        ax.scatter(range(len(bin_counts)), bin_counts)
        #ax.set_xlabel('')
        ax.set_ylabel('number of spots in shell')
        xlim = pyplot.xlim()
        ylim = pyplot.ylim()
        ax.vlines(i, ylim[0], ylim[1], colors='red')
        pyplot.xlim(0, xlim[1])
        pyplot.ylim(0, ylim[1])
        pyplot.savefig(plot_filename)
        pyplot.close()

    return d_min, noisiness
Ejemplo n.º 14
0
def estimate_resolution_limit_distl_method2(
  reflections, imageset, ice_sel=None, plot_filename=None):

  # Implementation of Method 2 (section 2.4.4) of:
  # Z. Zhang, N. K. Sauter, H. van den Bedem, G. Snell and A. M. Deacon
  # J. Appl. Cryst. (2006). 39, 112-119
  # http://dx.doi.org/10.1107/S0021889805040677

  if ice_sel is None:
    ice_sel = flex.bool(len(reflections), False)

  variances = reflections['intensity.sum.variance']

  sel = variances > 0
  intensities = reflections['intensity.sum.value']
  variances = variances.select(sel)
  ice_sel = ice_sel.select(sel)
  reflections = reflections.select(sel)
  intensities = reflections['intensity.sum.value']
  d_star_sq = flex.pow2(reflections['rlp'].norms())
  d_spacings = uctbx.d_star_sq_as_d(d_star_sq)
  d_star_cubed = flex.pow(reflections['rlp'].norms(), 3)

  binner = binner_d_star_cubed(d_spacings)

  bin_counts = flex.size_t()

  for i_slot, slot in enumerate(binner.bins):
    sel_all = (d_spacings < slot.d_max) & (d_spacings >= slot.d_min)
    #sel = ~(ice_sel) & sel_all
    sel = sel_all

    bin_counts.append(sel.count(True))

  #print list(bin_counts)
  t0 = (bin_counts[0] + bin_counts[1])/2

  mu = 0.15

  for i in range(len(bin_counts)-1):
    tj = bin_counts[i]
    tj1 = bin_counts[i+1]
    if (tj < (mu * t0)) and (tj1 < (mu * t0)):
      break

  d_min = binner.bins[i].d_min
  noisiness = 0
  m = len(bin_counts)
  for i in range(m):
    for j in range(i+1, m):
      if bin_counts[i] <= bin_counts[j]:
        noisiness += 1
  noisiness /= (0.5 * m * (m-1))

  if plot_filename is not None:
    if pyplot is None:
      raise Sorry("matplotlib must be installed to generate a plot.")
    fig = pyplot.figure()
    ax = fig.add_subplot(1,1,1)
    ax.scatter(range(len(bin_counts)), bin_counts)
    #ax.set_xlabel('')
    ax.set_ylabel('number of spots in shell')
    xlim = pyplot.xlim()
    ylim = pyplot.ylim()
    ax.vlines(i, ylim[0], ylim[1], colors='red')
    pyplot.xlim(0, xlim[1])
    pyplot.ylim(0, ylim[1])
    pyplot.savefig(plot_filename)
    pyplot.close()

  return d_min, noisiness
Ejemplo n.º 15
0
def estimate_resolution_limit_distl_method1(
  reflections, imageset, ice_sel=None, plot_filename=None):

  # Implementation of Method 1 (section 2.4.4) of:
  # Z. Zhang, N. K. Sauter, H. van den Bedem, G. Snell and A. M. Deacon
  # J. Appl. Cryst. (2006). 39, 112-119
  # http://dx.doi.org/10.1107/S0021889805040677

  if ice_sel is None:
    ice_sel = flex.bool(len(reflections), False)

  variances = reflections['intensity.sum.variance']

  sel = variances > 0
  intensities = reflections['intensity.sum.value']
  variances = variances.select(sel)
  ice_sel = ice_sel.select(sel)
  reflections = reflections.select(sel)
  intensities = reflections['intensity.sum.value']
  d_star_sq = flex.pow2(reflections['rlp'].norms())
  d_spacings = uctbx.d_star_sq_as_d(d_star_sq)
  d_star_cubed = flex.pow(reflections['rlp'].norms(), 3)

  step = 2
  while len(reflections)/step > 40:
    step += 1

  order = flex.sort_permutation(d_spacings, reverse=True)

  ds3_subset = flex.double()
  d_subset = flex.double()
  for i in range(len(reflections)//step):
    ds3_subset.append(d_star_cubed[order[i*step]])
    d_subset.append(d_spacings[order[i*step]])

  x = flex.double(range(len(ds3_subset)))

  # (i)
  # Usually, Pm is the last point, that is, m = n. But m could be smaller than
  # n if an unusually high number of spots are detected around a certain
  # intermediate resolution. In that case, our search for the image resolution
  # does not go outside the spot 'bump;. This is particularly useful when
  # ice-rings are present.

  slopes = (ds3_subset[1:] - ds3_subset[0])/(x[1:]-x[0])
  skip_first = 3
  p_m = flex.max_index(slopes[skip_first:]) + 1 + skip_first

  # (ii)

  from scitbx import matrix
  x1 = matrix.col((0, ds3_subset[0]))
  x2 = matrix.col((p_m, ds3_subset[p_m]))

  gaps = flex.double([0])
  v = matrix.col(((x2[1] - x1[1]), -(x2[0] - x1[0]))).normalize()

  for i in range(1, p_m):
    x0 = matrix.col((i, ds3_subset[i]))
    r = x1 - x0
    g = abs(v.dot(r))
    gaps.append(g)

  mv = flex.mean_and_variance(gaps)
  s = mv.unweighted_sample_standard_deviation()

  # (iii)

  p_k = flex.max_index(gaps)
  g_k = gaps[p_k]
  p_g = p_k
  for i in range(p_k+1, len(gaps)):
    g_i = gaps[i]
    if g_i > (g_k - 0.5 * s):
      p_g = i

  ds3_g = ds3_subset[p_g]
  d_g = d_subset[p_g]

  noisiness = 0
  n = len(ds3_subset)
  for i in range(n-1):
    for j in range(i+1, n-1):
      if slopes[i] >= slopes[j]:
        noisiness += 1
  noisiness /= ((n-1)*(n-2)/2)

  if plot_filename is not None:
    if pyplot is None:
      raise Sorry("matplotlib must be installed to generate a plot.")
    fig = pyplot.figure()
    ax = fig.add_subplot(1,1,1)
    ax.scatter(range(len(ds3_subset)), ds3_subset)
    #ax.set_xlabel('')
    ax.set_ylabel('D^-3')
    xlim = pyplot.xlim()
    ylim = pyplot.ylim()
    ax.vlines(p_g, ylim[0], ylim[1], colors='red')
    pyplot.xlim(0, xlim[1])
    pyplot.ylim(0, ylim[1])
    pyplot.savefig(plot_filename)
    pyplot.close()

  return d_g, noisiness