Esempio n. 1
0
def anomalous_probability_plot(intensities, expected_delta=None):
  from scitbx.math import distributions
  from scitbx.array_family import flex

  assert intensities.is_unique_set_under_symmetry()
  assert intensities.anomalous_flag()

  dI = intensities.anomalous_differences()
  y = dI.data()/dI.sigmas()
  perm = flex.sort_permutation(y)
  y = y.select(perm)
  distribution = distributions.normal_distribution()

  x = distribution.quantiles(y.size())

  if expected_delta is not None:
    sel = flex.abs(x) < expected_delta
    x = x.select(sel)
    y = y.select(sel)

  fit = flex.linear_regression(x, y)
  correlation = flex.linear_correlation(x, y)
  assert fit.is_well_defined()

  if 0:
    from matplotlib import pyplot
    pyplot.scatter(x, y)
    m = fit.slope()
    c = fit.y_intercept()
    pyplot.plot(pyplot.xlim(), [m * x_ + c for x_ in pyplot.xlim()])
    pyplot.show()

  return fit.slope(), fit.y_intercept(), x.size()
Esempio n. 2
0
    def run(self):
        """Calculate statistics of array of observed values with uncertainties"""

        query_values, ref_values, q_cut, file_manager = self.data

        # Extract the theoretical quantiles that we would expect if these values were from a normal distribution
        the_diff_vals = normal_distribution().quantiles(len(query_values))

        # Select the points in the middle of the distribution
        mid_idxs = (the_diff_vals < q_cut).iselection().intersection(
            (the_diff_vals > -1 * q_cut).iselection())
        mid_the_diff_vals = the_diff_vals.select(mid_idxs)

        # Calculate the difference from the reference values
        act_diff_vals = query_values - ref_values
        srt_act_diff_vals = flex.double(sorted(act_diff_vals))
        mid_act_diff_vals = srt_act_diff_vals.select(mid_idxs)

        # Calculate the slope of the centre of the graph
        map_unc, map_off = numpy.polyfit(x=mid_the_diff_vals,
                                         y=mid_act_diff_vals,
                                         deg=1)

        try:
            import matplotlib
            matplotlib.interactive(False)
            from matplotlib import pyplot
            pyplot.style.use('ggplot')
            output_graphs = True
        except:
            output_graphs = False

        if output_graphs and file_manager:
            # Sort query and ref values for plotting
            srt_query_vals = sorted(query_values)
            srt_ref_vals = sorted(ref_values)

            analyse_graphs.mean_obs_scatter(
                f_name=file_manager.get_file('obs_qqplot_unsorted_png'),
                mean_vals=ref_values,
                obs_vals=query_values)

            analyse_graphs.sorted_mean_obs_scatter(
                f_name=file_manager.get_file('obs_qqplot_sorted_png'),
                mean_vals=srt_ref_vals,
                obs_vals=srt_query_vals)

            analyse_graphs.uncertainty_qqplot(
                f_name=file_manager.get_file('unc_qqplot_png'),
                map_off=map_off,
                map_unc=map_unc,
                q_cut=q_cut,
                obs_diff=srt_act_diff_vals,
                quantile=the_diff_vals)

        # Print a running row of dots
        print('>', end='')
        sys.stdout.flush()

        return map_unc
Esempio n. 3
0
  def __init__(self,
               hooft_analysis,
               use_students_t_distribution=False,
               students_t_nu=None,
               probability_plot_slope=None):
    self.delta_fo2, minus_fo2 =\
        hooft_analysis.delta_fo2.generate_bijvoet_mates().hemispheres_acentrics()
    self.delta_fc2, minus_fc2 =\
        hooft_analysis.delta_fc2.generate_bijvoet_mates().hemispheres_acentrics()
    # we want to plot both hemispheres
    self.delta_fo2.indices().extend(minus_fo2.indices())
    self.delta_fo2.data().extend(minus_fo2.data() * -1)
    self.delta_fo2.sigmas().extend(minus_fo2.sigmas())
    self.delta_fc2.indices().extend(minus_fc2.indices())
    self.delta_fc2.data().extend(minus_fc2.data() * -1)
    self.indices = self.delta_fo2.indices()
    observed_deviations = (hooft_analysis.G * self.delta_fc2.data()
                           - self.delta_fo2.data())/self.delta_fo2.sigmas()

    if probability_plot_slope is not None:
      observed_deviations /= probability_plot_slope
    selection = flex.sort_permutation(observed_deviations)
    observed_deviations = observed_deviations.select(selection)
    if use_students_t_distribution:
      if students_t_nu is None:
        students_t_nu = maximise_students_t_correlation_coefficient(
          observed_deviations, 1, 200)
      self.distribution = distributions.students_t_distribution(students_t_nu)
    else:
      self.distribution = distributions.normal_distribution()
    self.x = self.distribution.quantiles(observed_deviations.size())
    self.y = observed_deviations
    self.fit = flex.linear_regression(self.x[5:-5], self.y[5:-5])
    self.correlation = flex.linear_correlation(self.x[5:-5], self.y[5:-5])
    assert self.fit.is_well_defined()
Esempio n. 4
0
  def __init__(self,
               hooft_analysis,
               use_students_t_distribution=False,
               students_t_nu=None,
               probability_plot_slope=None):
    self.delta_fo2, minus_fo2 =\
        hooft_analysis.delta_fo2.generate_bijvoet_mates().hemispheres_acentrics()
    self.delta_fc2, minus_fc2 =\
        hooft_analysis.delta_fc2.generate_bijvoet_mates().hemispheres_acentrics()
    # we want to plot both hemispheres
    self.delta_fo2.indices().extend(minus_fo2.indices())
    self.delta_fo2.data().extend(minus_fo2.data() * -1)
    self.delta_fo2.sigmas().extend(minus_fo2.sigmas())
    self.delta_fc2.indices().extend(minus_fc2.indices())
    self.delta_fc2.data().extend(minus_fc2.data() * -1)
    self.indices = self.delta_fo2.indices()
    observed_deviations = (hooft_analysis.G * self.delta_fc2.data()
                           - self.delta_fo2.data())/self.delta_fo2.sigmas()

    if probability_plot_slope is not None:
      observed_deviations /= probability_plot_slope
    selection = flex.sort_permutation(observed_deviations)
    observed_deviations = observed_deviations.select(selection)
    if use_students_t_distribution:
      if students_t_nu is None:
        students_t_nu = maximise_students_t_correlation_coefficient(
          observed_deviations, 1, 200)
      self.distribution = distributions.students_t_distribution(students_t_nu)
    else:
      self.distribution = distributions.normal_distribution()
    self.x = self.distribution.quantiles(observed_deviations.size())
    self.y = observed_deviations
    self.fit = flex.linear_regression(self.x[5:-5], self.y[5:-5])
    self.correlation = flex.linear_correlation(self.x[5:-5], self.y[5:-5])
    assert self.fit.is_well_defined()
Esempio n. 5
0
def anomalous_probability_plot(intensities, expected_delta=None):
    from scitbx.math import distributions
    from scitbx.array_family import flex

    assert intensities.is_unique_set_under_symmetry()
    assert intensities.anomalous_flag()

    dI = intensities.anomalous_differences()
    y = dI.data() / dI.sigmas()
    perm = flex.sort_permutation(y)
    y = y.select(perm)
    distribution = distributions.normal_distribution()

    x = distribution.quantiles(y.size())

    if expected_delta is not None:
        sel = flex.abs(x) < expected_delta
        x = x.select(sel)
        y = y.select(sel)

    fit = flex.linear_regression(x, y)
    correlation = flex.linear_correlation(x, y)
    assert fit.is_well_defined()

    if 0:
        from matplotlib import pyplot
        pyplot.scatter(x, y)
        m = fit.slope()
        c = fit.y_intercept()
        pyplot.plot(pyplot.xlim(), [m * x_ + c for x_ in pyplot.xlim()])
        pyplot.show()

    return fit.slope(), fit.y_intercept(), x.size()
Esempio n. 6
0
  def normal_probability_plot(self, data, rankits_sel=None, plot=False):
    """ Use normal probability analysis to determine if a set of data is normally distributed
    See https://en.wikipedia.org/wiki/Normal_probability_plot.
    Rankits are computed in the same way as qqnorm does in R.
    @param data flex array
    @param rankits_sel only use the rankits in a certain range. Useful for outlier rejection. Should be
    a tuple such as (-0.5,0.5).
    @param plot whether to show the normal probabilty plot
    """
    from scitbx.math import distributions
    import numpy as np
    norm = distributions.normal_distribution()

    n = len(data)
    if n <= 10:
      a = 3/8
    else:
      a = 0.5

    sorted_data = flex.sorted(data)
    rankits = flex.double([norm.quantile((i+1-a)/(n+1-(2*a))) for i in range(n)])

    if rankits_sel is None:
      corr, slope, offset = self.get_overall_correlation_flex(sorted_data, rankits)
    else:
      sel = (rankits >= rankits_sel[0]) & (rankits <= rankits_sel[1])
      corr, slope, offset = self.get_overall_correlation_flex(sorted_data.select(sel), rankits.select(sel))

    if plot:
      from matplotlib import pyplot as plt
      f = plt.figure(0)
      lim = -5, 5
      x = np.linspace(lim[0],lim[1],100) # 100 linearly spaced numbers
      y = slope * x + offset
      plt.plot(sorted_data, rankits, '-')
      #plt.plot(x,y)
      plt.title("CC: %.3f Slope: %.3f Offset: %.3f"%(corr, slope, offset))
      plt.xlabel("Sorted data")
      plt.ylabel("Rankits")
      plt.xlim(lim); plt.ylim(lim)
      plt.axes().set_aspect('equal')

      f = plt.figure(1)
      h = flex.histogram(sorted_data, n_slots=100, data_min = lim[0], data_max = lim[1])
      stats = flex.mean_and_variance(sorted_data)
      plt.plot(h.slot_centers().as_numpy_array(), h.slots().as_numpy_array(), '-')
      plt.xlim(lim)
      plt.xlabel("Sorted data")
      plt.ylabel("Count")
      plt.title("Normalized data mean: %.3f +/- %.3f"%(stats.mean(), stats.unweighted_sample_standard_deviation()))

      if self.scaler.params.raw_data.error_models.sdfac_refine.plot_refinement_steps:
        plt.ion()
        plt.pause(0.05)

    return corr, slope, offset
def npp(values, input_mean_variance):
  import math
  from scitbx.math import distributions
  from scitbx.array_family import flex
  distribution = distributions.normal_distribution()
  values = flex.sorted(values)
  mean, variance = input_mean_variance
  scaled = (values - mean) / math.sqrt(variance)
  expected = distribution.quantiles(values.size())

  return expected, scaled
def npp(values, input_mean_variance):
    import math
    from scitbx.math import distributions
    from scitbx.array_family import flex
    distribution = distributions.normal_distribution()
    values = flex.sorted(values)
    mean, variance = input_mean_variance
    scaled = (values - mean) / math.sqrt(variance)
    expected = distribution.quantiles(values.size())

    return expected, scaled
Esempio n. 9
0
def npp_ify(values, input_mean_variance=None):
  '''Analyse data in values (assumed to be drawn from one population) and
  return the sorted list of (expected, observed) deviation from the mean.'''

  distribution = distributions.normal_distribution()
  values = flex.sorted(values)
  if input_mean_variance:
    mean, variance = input_mean_variance
  else:
    mean, variance = mean_variance(values)

  scaled = (values - mean) / math.sqrt(variance)
  expected = distribution.quantiles(values.size())

  return expected, scaled
Esempio n. 10
0
def map_value_distribution(f_name, plot_vals, plot_normal=False):
    """Plot histogram of values, with optional normal distribution"""
    from scitbx.math.distributions import normal_distribution
    fig = pyplot.figure()
    pyplot.title('Distribution of map values')
    pyplot.hist(x=plot_vals, bins=30, normed=True)
    if plot_normal:
        # Plot the distribution for N(0,1)
        nd_t = normal_distribution()
        theor_x = numpy.linspace(-5, 5, 101)
        theor_y = [nd_t.pdf(x) for x in theor_x]
        pyplot.plot(theor_x, theor_y, c='k', ls='--', marker='o')
        # Plot the distribution for the observed distribution
        nd_o = normal_distribution(mean=numpy.mean(plot_vals),
                                   sd=numpy.std(plot_vals))
        obs_x = numpy.linspace(-5, 5, 101)
        obs_y = [nd_o.pdf(x) for x in obs_x]
        pyplot.plot(obs_x, obs_y, c='g', ls='-', marker='o')
    pyplot.xlabel('Map value')
    pyplot.ylabel('Density')
    #pyplot.tight_layout()
    pyplot.subplots_adjust()
    pyplot.savefig(f_name)
    pyplot.close(fig)
Esempio n. 11
0
File: NPP.py Progetto: xia2/xia2
def npp_ify(values, input_mean_variance=None):
  '''Analyse data in values (assumed to be drawn from one population) and
  return the sorted list of (expected, observed) deviation from the mean.'''

  distribution = distributions.normal_distribution()
  values = flex.sorted(values)
  if input_mean_variance:
    mean, variance = input_mean_variance
  else:
    mean, variance = mean_variance(values)

  scaled = (values - mean) / math.sqrt(variance)
  expected = distribution.quantiles(values.size())

  return expected, scaled
Esempio n. 12
0
def convert_pvalue_to_zscore(pval, two_tailed=True):
    """Convert a p-value to a z-score for a standard normal N(0,1)"""
    # If two-tailed test, need to halve the p-value
    if two_tailed:
        pval = pval / 2.0
    # Create normal distribution to convert - N(0,1)
    nrm = distributions.normal_distribution()
    # Calculate the probability quantile (z-score) corresponding to 1-pval
    try:
        zsco = nrm.quantile(1.0 - pval)
    except RuntimeError:
        # pval too small - return default (8.0 is max calculable)
        if pval < 0.00000000000000006:
            zsco = 8.2
        else:
            raise
    return zsco
Esempio n. 13
0
 def calculate_sorted_deviations(self, parameters):
     """Sort the x,y data."""
     sigmaprime = calc_sigmaprime(parameters, self.filtered_Ih_table)
     delta_hl = calc_deltahl(self.filtered_Ih_table,
                             self.filtered_Ih_table.calc_nh(), sigmaprime)
     norm = normal_distribution()
     n = len(delta_hl)
     if n <= 10:
         a = 3 / 8
     else:
         a = 0.5
     self.sortedy = flex.sorted(flex.double(delta_hl))
     self.sortedx = flex.double(
         [norm.quantile((i + 1 - a) / (n + 1 - (2 * a))) for i in range(n)])
     central_sel = (self.sortedx < 1.5) & (self.sortedx > -1.5)
     self.sortedx = self.sortedx.select(central_sel)
     self.sortedy = self.sortedy.select(central_sel)
Esempio n. 14
0
def qq_plot_against_normal(f_name, plot_vals):
    """Sort and plot list of values against expected quantiles from a normal distribution"""
    from scitbx.math.distributions import normal_distribution
    fig = pyplot.figure()
    pyplot.title('Q-Q plot for map values against normal distribution')
    expected_vals = normal_distribution().quantiles(len(plot_vals))
    pyplot.plot([min(expected_vals) - 1,
                 max(expected_vals) + 1],
                [min(expected_vals) - 1,
                 max(expected_vals) + 1], 'b--')
    pyplot.plot(sorted(plot_vals), expected_vals, 'go-')
    pyplot.xlabel('Observed quantiles')
    pyplot.ylabel('Theoretical quantiles')
    #pyplot.tight_layout()
    pyplot.subplots_adjust()
    pyplot.savefig(f_name)
    pyplot.close(fig)
Esempio n. 15
0
  def calculate_delta_rankits(self):
    '''Implement expression (12) of Brewster2019'''
    # Get the base global index for this rank's deltas. Example: if rank 0 has 10 deltas, the first delta on rank 1 will be the 10th global delta.
    delta_count_per_rank = self.mpi_helper.comm.allreduce([self.deltas.size()])
    base_delta_index = sum(delta_count_per_rank[0:self.mpi_helper.rank])
    self.logger.log("Delta base index: %d"%base_delta_index)

    from scitbx.math import distributions
    import numpy as np
    norm = distributions.normal_distribution()

    a = 3./8. if self.global_delta_count < 10. else 0.5

    self.rankits = flex.double()
    for i in range(self.deltas.size()):
      global_delta_index = base_delta_index + i
      rankit = norm.quantile((global_delta_index+1-a)/(self.global_delta_count+1-(2*a)))
      self.rankits.append(rankit)
Esempio n. 16
0
    def del_anom_normal_plot(intensities, strong_cutoff=0.0):
        """Make a normal probability plot of the normalised anomalous differences."""
        diff_array = intensities.anomalous_differences()
        if not diff_array.data().size():
            return {}
        delta = diff_array.data() / diff_array.sigmas()

        norm = distributions.normal_distribution()

        n = len(delta)
        if n <= 10:
            a = 3 / 8
        else:
            a = 0.5

        y = flex.sorted(delta)
        x = [norm.quantile((i + 1 - a) / (n + 1 - (2 * a))) for i in range(n)]

        H, xedges, yedges = np.histogram2d(np.array(x),
                                           y.as_numpy_array(),
                                           bins=(200, 200))
        nonzeros = np.nonzero(H)
        z = np.empty(H.shape)
        z[:] = np.NAN
        z[nonzeros] = H[nonzeros]

        # also make a histogram
        histy = flex.histogram(y, n_slots=100)
        # make a gaussian for reference also
        n = y.size()
        width = histy.slot_centers()[1] - histy.slot_centers()[0]
        gaussian = []
        from math import exp, pi

        for x in histy.slot_centers():
            gaussian.append(n * width * exp(-(x**2) / 2.0) / ((2.0 * pi)**0.5))

        title = "Normal probability plot of anomalous differences"
        plotname = "normal_distribution_plot"
        if strong_cutoff > 0.0:
            title += " (d > %.2f)" % strong_cutoff
            plotname += "_lowres"
        else:
            title += " (all data)"
            plotname += "_highres"
        return {
            plotname: {
                "data": [
                    {
                        "x": xedges.tolist(),
                        "y": yedges.tolist(),
                        "z": z.transpose().tolist(),
                        "type": "heatmap",
                        "name": "normalised deviations",
                        "colorbar": {
                            "title": "Number of reflections",
                            "titleside": "right",
                        },
                        "colorscale": "Jet",
                    },
                    {
                        "x": [-5, 5],
                        "y": [-5, 5],
                        "type": "scatter",
                        "mode": "lines",
                        "name": "z = m",
                        "color": "rgb(0,0,0)",
                    },
                ],
                "layout": {
                    "title": title,
                    "xaxis": {
                        "anchor": "y",
                        "title": "expected delta",
                        "range": [-4, 4],
                    },
                    "yaxis": {
                        "anchor": "x",
                        "title": "observed delta",
                        "range": [-5, 5],
                    },
                },
                "help":
                """\
    This plot shows the normalised anomalous differences, sorted in order and
    plotted against the expected order based on a normal distribution model.
    A true normal distribution of deviations would give the straight line indicated.

    [1] P. L. Howell and G. D. Smith, J. Appl. Cryst. (1992). 25, 81-86
    https://doi.org/10.1107/S0021889891010385
    [2] P. Evans, Acta Cryst. (2006). D62, 72-82
    https://doi.org/10.1107/S0907444905036693
    """,
            }
        }
 def exercise(self, debug=False):
   if debug:
     distribution = distributions.normal_distribution()
     observed_deviations = (
       self.fo2.data() - self.scale_factor*self.fc.as_intensity_array().data())
     observed_deviations = observed_deviations.select(
       flex.sort_permutation(observed_deviations))
     expected_deviations = distribution.quantiles(observed_deviations.size())
     csv_utils.writer(
       open('delta_F_npp.csv', 'wb'), (expected_deviations, observed_deviations))
   # first with the correct absolute structure
   gaussian = absolute_structure.hooft_analysis(self.fo2, self.fc)
   analyses = [gaussian]
   NPP = absolute_structure.bijvoet_differences_probability_plot(gaussian)
   if self.use_students_t_errors:
     nu_calc = absolute_structure.maximise_students_t_correlation_coefficient(
       NPP.y, min_nu=1, max_nu=200)
     t_analysis = absolute_structure.students_t_hooft_analysis(
       self.fo2, self.fc, nu_calc, probability_plot_slope=NPP.fit.slope())
     analyses.append(gaussian)
     tPP = absolute_structure.bijvoet_differences_probability_plot(
       t_analysis, use_students_t_distribution=True, students_t_nu=nu_calc)
     if tPP.distribution.degrees_of_freedom() < 100:
       assert tPP.correlation.coefficient() > NPP.correlation.coefficient()
   else:
     assert approx_equal(NPP.correlation.coefficient(), 1, 0.005)
   for analysis in analyses:
     assert approx_equal(analysis.hooft_y, 0, 1e-2)
     assert approx_equal(analysis.p2_true, 1)
     assert approx_equal(analysis.p2_false, 0)
     assert approx_equal(analysis.p3_true, 1)
     assert approx_equal(analysis.p3_false, 0)
     assert approx_equal(analysis.p3_racemic_twin, 0)
   if debug:
     csv_utils.writer(open('npp.csv', 'wb'), (NPP.x,NPP.y))
     if self.use_students_t_errors:
       csv_utils.writer(open('tpp.csv', 'wb'), (tPP.x,tPP.y))
   assert approx_equal(NPP.fit.y_intercept(), 0)
   # and now with the wrong absolute structure
   gaussian = absolute_structure.hooft_analysis(self.fo2, self.fc_i)
   analyses = [gaussian]
   NPP = absolute_structure.bijvoet_differences_probability_plot(gaussian)
   if self.use_students_t_errors:
     nu_calc = absolute_structure.maximise_students_t_correlation_coefficient(
       NPP.y, min_nu=1, max_nu=200)
     t_analysis = absolute_structure.students_t_hooft_analysis(
       self.fo2, self.fc_i, nu_calc, probability_plot_slope=NPP.fit.slope())
     analyses.append(gaussian)
     tPP = absolute_structure.bijvoet_differences_probability_plot(
       t_analysis, use_students_t_distribution=True)
     if tPP.distribution.degrees_of_freedom() < 100:
       assert tPP.correlation.coefficient() > NPP.correlation.coefficient()
   else:
     assert approx_equal(NPP.correlation.coefficient(), 1, 0.002)
     assert approx_equal(NPP.fit.y_intercept(), 0)
   for analysis in analyses:
     assert approx_equal(analysis.hooft_y, 1, 1e-2)
     assert approx_equal(analysis.p2_true, 0)
     assert approx_equal(analysis.p2_false, 1)
     assert approx_equal(analysis.p3_true, 0)
     assert approx_equal(analysis.p3_false, 1)
     assert approx_equal(analysis.p3_racemic_twin, 0)
   # test for the case of a racemic twin
   gaussian = absolute_structure.hooft_analysis(self.fo2_twin, self.fc)
   analyses = [gaussian]
   NPP = absolute_structure.bijvoet_differences_probability_plot(gaussian)
   if self.use_students_t_errors:
     nu_calc = absolute_structure.maximise_students_t_correlation_coefficient(
       NPP.y, min_nu=1, max_nu=200)
     t_analysis = absolute_structure.students_t_hooft_analysis(
       self.fo2_twin, self.fc, nu_calc, probability_plot_slope=NPP.fit.slope())
     tPP = absolute_structure.bijvoet_differences_probability_plot(
       t_analysis, use_students_t_distribution=True)
     if tPP.distribution.degrees_of_freedom() < 100:
       assert tPP.correlation.coefficient() > NPP.correlation.coefficient()
   else:
     assert approx_equal(NPP.correlation.coefficient(), 1, 0.002)
     assert approx_equal(NPP.fit.y_intercept(), 0)
   for analysis in analyses:
     assert approx_equal(analysis.hooft_y, 0.5, 1e-2)
     assert approx_equal(analysis.p3_true, 0)
     assert approx_equal(analysis.p3_false, 0)
     assert approx_equal(analysis.p3_racemic_twin, 1)
Esempio n. 18
0
def normal_probability_plot(data):
    """Plot the distribution of normal probabilities of errors."""
    norm = distributions.normal_distribution()

    n = len(data["delta_hl"])
    if n <= 10:
        a = 3 / 8
    else:
        a = 0.5

    y = flex.sorted(flex.double(data["delta_hl"]))
    x = [norm.quantile((i + 1 - a) / (n + 1 - (2 * a))) for i in range(n)]

    H, xedges, yedges = np.histogram2d(np.array(x),
                                       y.as_numpy_array(),
                                       bins=(200, 200))
    nonzeros = np.nonzero(H)
    z = np.empty(H.shape)
    z[:] = np.NAN
    z[nonzeros] = H[nonzeros]

    # also make a histogram
    histy = flex.histogram(y, n_slots=100)
    # make a gaussian for reference also
    n = y.size()
    width = histy.slot_centers()[1] - histy.slot_centers()[0]
    gaussian = [
        n * width * math.exp(-(sc**2) / 2.0) / ((2.0 * math.pi)**0.5)
        for sc in histy.slot_centers()
    ]

    return {
        "normal_distribution_plot": {
            "data": [
                {
                    "x": xedges.tolist(),
                    "y": yedges.tolist(),
                    "z": z.transpose().tolist(),
                    "type": "heatmap",
                    "name": "normalised deviations",
                    "colorbar": {
                        "title": "Number of reflections",
                        "titleside": "right",
                    },
                    "colorscale": "Jet",
                },
                {
                    "x": [-5, 5],
                    "y": [-5, 5],
                    "type": "scatter",
                    "mode": "lines",
                    "name": "z = m",
                    "color": "rgb(0,0,0)",
                },
            ],
            "layout": {
                "title": "Normal probability plot with error model applied",
                "xaxis": {
                    "anchor": "y",
                    "title": "Order statistic medians, m"
                },
                "yaxis": {
                    "anchor": "x",
                    "title": "Ordered responses, z"
                },
            },
            "help":
            """\
This plot shows the normalised devations (of each reflection from the
group-weighted mean), sorted in order and plotted against the expected order
based on a normal distribution model. A true normal distribution of deviations
would give the straight line indicated. If the errors are well described by
this model, the ordered responses should closely fit the straight line to
high absolute values of x (>3), where there is typically a deviation away from
the line due to wide tails of the distribution.
""",
        },
        "nor_dev_hist": {
            "data": [
                {
                    "x": list(histy.slot_centers()),
                    "y": list(histy.slots()),
                    "type": "bar",
                    "name": "dataset normalised deviations",
                },
                {
                    "x": list(histy.slot_centers()),
                    "y": gaussian,
                    "type": "scatter",
                    "name": "Ideal normal distribution",
                },
            ],
            "layout": {
                "title": "Normal deviations with error model applied",
                "xaxis": {
                    "anchor": "y",
                    "title": "Normalised deviation"
                },
                "yaxis": {
                    "anchor": "x",
                    "title": "Number of reflections"
                },
            },
            "help":
            """\
This plot shows the distribution of normalised devations (of each reflection
from the group-weighted mean), for the reflections used to minimise the error
model. A true normal distribution is indicated.
""",
        },
    }
Esempio n. 19
0
 def exercise(self, debug=False):
   if debug:
     distribution = distributions.normal_distribution()
     observed_deviations = (
       self.fo2.data() - self.scale_factor*self.fc.as_intensity_array().data())
     observed_deviations = observed_deviations.select(
       flex.sort_permutation(observed_deviations))
     expected_deviations = distribution.quantiles(observed_deviations.size())
     csv_utils.writer(
       open('delta_F_npp.csv', 'wb'), (expected_deviations, observed_deviations))
   # first with the correct absolute structure
   gaussian = absolute_structure.hooft_analysis(self.fo2, self.fc)
   analyses = [gaussian]
   NPP = absolute_structure.bijvoet_differences_probability_plot(gaussian)
   if self.use_students_t_errors:
     nu_calc = absolute_structure.maximise_students_t_correlation_coefficient(
       NPP.y, min_nu=1, max_nu=200)
     t_analysis = absolute_structure.students_t_hooft_analysis(
       self.fo2, self.fc, nu_calc, probability_plot_slope=NPP.fit.slope())
     analyses.append(gaussian)
     tPP = absolute_structure.bijvoet_differences_probability_plot(
       t_analysis, use_students_t_distribution=True, students_t_nu=nu_calc)
     if tPP.distribution.degrees_of_freedom() < 100:
       assert tPP.correlation.coefficient() > NPP.correlation.coefficient()
   else:
     assert approx_equal(NPP.correlation.coefficient(), 1, 0.005)
   for analysis in analyses:
     assert approx_equal(analysis.hooft_y, 0, 1e-2)
     assert approx_equal(analysis.p2_true, 1)
     assert approx_equal(analysis.p2_false, 0)
     assert approx_equal(analysis.p3_true, 1)
     assert approx_equal(analysis.p3_false, 0)
     assert approx_equal(analysis.p3_racemic_twin, 0)
   if debug:
     csv_utils.writer(open('npp.csv', 'wb'), (NPP.x,NPP.y))
     if self.use_students_t_errors:
       csv_utils.writer(open('tpp.csv', 'wb'), (tPP.x,tPP.y))
   assert approx_equal(NPP.fit.y_intercept(), 0)
   # and now with the wrong absolute structure
   gaussian = absolute_structure.hooft_analysis(self.fo2, self.fc_i)
   analyses = [gaussian]
   NPP = absolute_structure.bijvoet_differences_probability_plot(gaussian)
   if self.use_students_t_errors:
     nu_calc = absolute_structure.maximise_students_t_correlation_coefficient(
       NPP.y, min_nu=1, max_nu=200)
     t_analysis = absolute_structure.students_t_hooft_analysis(
       self.fo2, self.fc_i, nu_calc, probability_plot_slope=NPP.fit.slope())
     analyses.append(gaussian)
     tPP = absolute_structure.bijvoet_differences_probability_plot(
       t_analysis, use_students_t_distribution=True)
     if tPP.distribution.degrees_of_freedom() < 100:
       assert tPP.correlation.coefficient() > NPP.correlation.coefficient()
   else:
     assert approx_equal(NPP.correlation.coefficient(), 1, 0.002)
     assert approx_equal(NPP.fit.y_intercept(), 0)
   for analysis in analyses:
     assert approx_equal(analysis.hooft_y, 1, 1e-2)
     assert approx_equal(analysis.p2_true, 0)
     assert approx_equal(analysis.p2_false, 1)
     assert approx_equal(analysis.p3_true, 0)
     assert approx_equal(analysis.p3_false, 1)
     assert approx_equal(analysis.p3_racemic_twin, 0)
   # test for the case of a racemic twin
   gaussian = absolute_structure.hooft_analysis(self.fo2_twin, self.fc)
   analyses = [gaussian]
   NPP = absolute_structure.bijvoet_differences_probability_plot(gaussian)
   if self.use_students_t_errors:
     nu_calc = absolute_structure.maximise_students_t_correlation_coefficient(
       NPP.y, min_nu=1, max_nu=200)
     t_analysis = absolute_structure.students_t_hooft_analysis(
       self.fo2_twin, self.fc, nu_calc, probability_plot_slope=NPP.fit.slope())
     tPP = absolute_structure.bijvoet_differences_probability_plot(
       t_analysis, use_students_t_distribution=True)
     if tPP.distribution.degrees_of_freedom() < 100:
       assert tPP.correlation.coefficient() > NPP.correlation.coefficient()
   else:
     assert approx_equal(NPP.correlation.coefficient(), 1, 0.002)
     assert approx_equal(NPP.fit.y_intercept(), 0)
   for analysis in analyses:
     assert approx_equal(analysis.hooft_y, 0.5, 1e-2)
     assert approx_equal(analysis.p3_true, 0)
     assert approx_equal(analysis.p3_false, 0)
     assert approx_equal(analysis.p3_racemic_twin, 1)