Ejemplo n.º 1
0
  def __init__(self,
               hooft_analysis,
               use_students_t_distribution=False,
               students_t_nu=None,
               probability_plot_slope=None):
    self.delta_fo2, minus_fo2 =\
        hooft_analysis.delta_fo2.generate_bijvoet_mates().hemispheres_acentrics()
    self.delta_fc2, minus_fc2 =\
        hooft_analysis.delta_fc2.generate_bijvoet_mates().hemispheres_acentrics()
    # we want to plot both hemispheres
    self.delta_fo2.indices().extend(minus_fo2.indices())
    self.delta_fo2.data().extend(minus_fo2.data() * -1)
    self.delta_fo2.sigmas().extend(minus_fo2.sigmas())
    self.delta_fc2.indices().extend(minus_fc2.indices())
    self.delta_fc2.data().extend(minus_fc2.data() * -1)
    self.indices = self.delta_fo2.indices()
    observed_deviations = (hooft_analysis.G * self.delta_fc2.data()
                           - self.delta_fo2.data())/self.delta_fo2.sigmas()

    if probability_plot_slope is not None:
      observed_deviations /= probability_plot_slope
    selection = flex.sort_permutation(observed_deviations)
    observed_deviations = observed_deviations.select(selection)
    if use_students_t_distribution:
      if students_t_nu is None:
        students_t_nu = maximise_students_t_correlation_coefficient(
          observed_deviations, 1, 200)
      self.distribution = distributions.students_t_distribution(students_t_nu)
    else:
      self.distribution = distributions.normal_distribution()
    self.x = self.distribution.quantiles(observed_deviations.size())
    self.y = observed_deviations
    self.fit = flex.linear_regression(self.x[5:-5], self.y[5:-5])
    self.correlation = flex.linear_correlation(self.x[5:-5], self.y[5:-5])
    assert self.fit.is_well_defined()
Ejemplo n.º 2
0
def exercise_sdb(verbose=0):
  structure = random_structure.xray_structure(
    space_group_info=sgtbx.space_group_info("P 31"),
    elements=["N","C","C","O"]*2,
    volume_per_atom=500,
    min_distance=2.,
    general_positions_only=False,
    random_u_iso=True)
  f_abs = abs(structure.structure_factors(
    anomalous_flag=False, d_min=2, algorithm="direct").f_calc())
  sdb_out = structure.as_cns_sdb_file(
    file="foo.sdb",
    description="random_structure",
    comment=["any", "thing"],
    group="best")
  if (0 or verbose):
    sys.stdout.write(sdb_out)
  sdb_files = sdb_reader.multi_sdb_parser(StringIO(sdb_out))
  assert len(sdb_files) == 1
  structure_read = sdb_files[0].as_xray_structure(
    crystal_symmetry=crystal.symmetry(
      unit_cell=structure.unit_cell(),
      space_group_info=None))
  f_read = abs(f_abs.structure_factors_from_scatterers(
    xray_structure=structure_read, algorithm="direct").f_calc())
  regression = flex.linear_regression(f_abs.data(), f_read.data())
  assert regression.is_well_defined()
  if (0 or verbose):
    regression.show_summary()
  assert abs(regression.slope()-1) < 1.e-4
  assert abs(regression.y_intercept()) < 1.e-3
Ejemplo n.º 3
0
  def __init__(self,
               hooft_analysis,
               use_students_t_distribution=False,
               students_t_nu=None,
               probability_plot_slope=None):
    self.delta_fo2, minus_fo2 =\
        hooft_analysis.delta_fo2.generate_bijvoet_mates().hemispheres_acentrics()
    self.delta_fc2, minus_fc2 =\
        hooft_analysis.delta_fc2.generate_bijvoet_mates().hemispheres_acentrics()
    # we want to plot both hemispheres
    self.delta_fo2.indices().extend(minus_fo2.indices())
    self.delta_fo2.data().extend(minus_fo2.data() * -1)
    self.delta_fo2.sigmas().extend(minus_fo2.sigmas())
    self.delta_fc2.indices().extend(minus_fc2.indices())
    self.delta_fc2.data().extend(minus_fc2.data() * -1)
    self.indices = self.delta_fo2.indices()
    observed_deviations = (hooft_analysis.G * self.delta_fc2.data()
                           - self.delta_fo2.data())/self.delta_fo2.sigmas()

    if probability_plot_slope is not None:
      observed_deviations /= probability_plot_slope
    selection = flex.sort_permutation(observed_deviations)
    observed_deviations = observed_deviations.select(selection)
    if use_students_t_distribution:
      if students_t_nu is None:
        students_t_nu = maximise_students_t_correlation_coefficient(
          observed_deviations, 1, 200)
      self.distribution = distributions.students_t_distribution(students_t_nu)
    else:
      self.distribution = distributions.normal_distribution()
    self.x = self.distribution.quantiles(observed_deviations.size())
    self.y = observed_deviations
    self.fit = flex.linear_regression(self.x[5:-5], self.y[5:-5])
    self.correlation = flex.linear_correlation(self.x[5:-5], self.y[5:-5])
    assert self.fit.is_well_defined()
Ejemplo n.º 4
0
def exercise_xray_structure(use_u_aniso, verbose=0):
    structure = random_structure.xray_structure(
        space_group_info=sgtbx.space_group_info("P 31"),
        elements=["N", "C", "C", "O", "Si"] * 2,
        volume_per_atom=500,
        min_distance=2.0,
        general_positions_only=False,
        random_u_iso=True,
        use_u_aniso=use_u_aniso,
    )
    f_abs = abs(structure.structure_factors(anomalous_flag=False, d_min=2, algorithm="direct").f_calc())
    for resname in (None, "res"):
        for fractional_coordinates in (False, True):
            pdb_file = structure.as_pdb_file(
                remark="Title", remarks=["Any", "Thing"], fractional_coordinates=fractional_coordinates, resname=resname
            )
            if 0 or verbose:
                sys.stdout.write(pdb_file)
            structure_read = iotbx.pdb.input(
                source_info=None, lines=flex.std_string(pdb_file.splitlines())
            ).xray_structure_simple(fractional_coordinates=fractional_coordinates, use_scale_matrix_if_available=False)
            f_read = abs(
                f_abs.structure_factors_from_scatterers(xray_structure=structure_read, algorithm="direct").f_calc()
            )
            regression = flex.linear_regression(f_abs.data(), f_read.data())
            assert regression.is_well_defined()
            if 0 or verbose:
                regression.show_summary()
            assert approx_equal(regression.slope(), 1, eps=1.0e-2)
            assert approx_equal(regression.y_intercept(), 0, eps=flex.max(f_abs.data()) * 0.01)
Ejemplo n.º 5
0
def linear_regression_test(d_analytical,
                           d_numerical,
                           test_hard=True,
                           slope_tolerance=1.e-3,
                           correlation_min=0.999,
                           verbose=0):
    if (type(d_analytical) != type(flex.double())):
        d_analytical = flex_tuple_as_flex_double(d_analytical)
    if (type(d_numerical) != type(flex.double())):
        d_numerical = flex_tuple_as_flex_double(d_numerical)
    if (0 or verbose):
        print("analytical:", tuple(d_analytical))
        print("numerical: ", tuple(d_numerical))
    if (flex.max(flex.abs(d_analytical)) == 0
            and flex.max(flex.abs(d_numerical)) == 0):
        return
    regr = flex.linear_regression(d_analytical, d_numerical)
    corr = flex.linear_correlation(d_analytical, d_numerical).coefficient()
    assert regr.is_well_defined()
    if (abs(regr.slope() - 1) > slope_tolerance or corr < correlation_min):
        print("Error: finite difference mismatch:")
        print("slope:", regr.slope())
        print("correlation:", corr)
        if (0 or verbose):
            for a, n in zip(d_analytical, d_numerical):
                print(a, n)
        assert not test_hard
Ejemplo n.º 6
0
def exercise_xray_structure(use_u_aniso, verbose=0):
  structure = random_structure.xray_structure(
    space_group_info=sgtbx.space_group_info("P 31"),
    elements=["N","C","C","O","Si"]*2,
    volume_per_atom=500,
    min_distance=2.,
    general_positions_only=False,
    random_u_iso=True,
    use_u_aniso=use_u_aniso)
  f_abs = abs(structure.structure_factors(
    anomalous_flag=False, d_min=2, algorithm="direct").f_calc())
  for resname in (None, "res"):
    for fractional_coordinates in (False, True):
      pdb_file = structure.as_pdb_file(
        remark="Title", remarks=["Any", "Thing"],
        fractional_coordinates=fractional_coordinates,
        resname=resname)
      if (0 or verbose):
        sys.stdout.write(pdb_file)
      structure_read = iotbx.pdb.input(
        source_info=None,
        lines=flex.std_string(pdb_file.splitlines())).xray_structure_simple(
          fractional_coordinates=fractional_coordinates,
          use_scale_matrix_if_available=False)
      f_read = abs(f_abs.structure_factors_from_scatterers(
        xray_structure=structure_read, algorithm="direct").f_calc())
      regression = flex.linear_regression(f_abs.data(), f_read.data())
      assert regression.is_well_defined()
      if (0 or verbose):
        regression.show_summary()
      assert approx_equal(regression.slope(), 1, eps=1.e-2)
      assert approx_equal(
        regression.y_intercept(), 0, eps=flex.max(f_abs.data())*0.01)
Ejemplo n.º 7
0
def linear_regression_test(d_analytical, d_numerical, test_hard=True,
                           slope_tolerance=1.e-3,
                           correlation_min=0.999,
                           verbose=0):
  if (type(d_analytical) != type(flex.double())):
    d_analytical = flex_tuple_as_flex_double(d_analytical)
  if (type(d_numerical) != type(flex.double())):
    d_numerical = flex_tuple_as_flex_double(d_numerical)
  if (0 or verbose):
    print "analytical:", tuple(d_analytical)
    print "numerical: ", tuple(d_numerical)
  if (    flex.max(flex.abs(d_analytical)) == 0
      and flex.max(flex.abs(d_numerical)) == 0):
    return
  regr = flex.linear_regression(d_analytical, d_numerical)
  corr = flex.linear_correlation(d_analytical, d_numerical).coefficient()
  assert regr.is_well_defined()
  if (abs(regr.slope() - 1) > slope_tolerance or corr < correlation_min):
    print "Error: finite difference mismatch:"
    print "slope:", regr.slope()
    print "correlation:", corr
    if (0 or verbose):
      for a, n in zip(d_analytical, d_numerical):
        print a, n
    assert not test_hard
Ejemplo n.º 8
0
def anomalous_probability_plot(intensities, expected_delta=None):
  from scitbx.math import distributions
  from scitbx.array_family import flex

  assert intensities.is_unique_set_under_symmetry()
  assert intensities.anomalous_flag()

  dI = intensities.anomalous_differences()
  y = dI.data()/dI.sigmas()
  perm = flex.sort_permutation(y)
  y = y.select(perm)
  distribution = distributions.normal_distribution()

  x = distribution.quantiles(y.size())

  if expected_delta is not None:
    sel = flex.abs(x) < expected_delta
    x = x.select(sel)
    y = y.select(sel)

  fit = flex.linear_regression(x, y)
  correlation = flex.linear_correlation(x, y)
  assert fit.is_well_defined()

  if 0:
    from matplotlib import pyplot
    pyplot.scatter(x, y)
    m = fit.slope()
    c = fit.y_intercept()
    pyplot.plot(pyplot.xlim(), [m * x_ + c for x_ in pyplot.xlim()])
    pyplot.show()

  return fit.slope(), fit.y_intercept(), x.size()
Ejemplo n.º 9
0
def absolute_structure_analysis(xs, fo2, fc, scale, nu=None, log=None,
                                outlier_cutoff_factor=None):
  if log is None:
    log = sys.stdout
  hooft_analysis = absolute_structure.hooft_analysis(
    fo2, fc, scale_factor=scale, outlier_cutoff_factor=outlier_cutoff_factor)
  print >> log, "Gaussian analysis:"
  hooft_analysis.show(out=log)
  NPP = absolute_structure.bijvoet_differences_probability_plot(
    hooft_analysis)
  print >> log, "Probability plot:"
  NPP.show(out=log)
  print >> log
  if nu is None:
    nu = absolute_structure.maximise_students_t_correlation_coefficient(
      NPP.y, min_nu=1, max_nu=200)
  distribution = distributions.students_t_distribution(nu)
  observed_deviations = NPP.y
  expected_deviations = distribution.quantiles(observed_deviations.size())
  fit = flex.linear_regression(
    expected_deviations[5:-5], observed_deviations[5:-5])
  t_analysis = absolute_structure.students_t_hooft_analysis(
    fo2, fc, nu, scale_factor=scale, probability_plot_slope=fit.slope(),
    outlier_cutoff_factor=outlier_cutoff_factor)
  tPP = absolute_structure.bijvoet_differences_probability_plot(
    t_analysis, use_students_t_distribution=True, students_t_nu=nu)
  print >> log, "Student's t analysis:"
  print >> log, "nu: %.2f" %nu
  t_analysis.show(out=log)
  print >> log, "Probability plot:"
  tPP.show(out=log)
  print >> log
  if xs is not None:
    flack = absolute_structure.flack_analysis(xs, fo2.as_xray_observations())
    flack.show(out=log)
Ejemplo n.º 10
0
def anomalous_probability_plot(intensities, expected_delta=None):
    from scitbx.math import distributions
    from scitbx.array_family import flex

    assert intensities.is_unique_set_under_symmetry()
    assert intensities.anomalous_flag()

    dI = intensities.anomalous_differences()
    y = dI.data() / dI.sigmas()
    perm = flex.sort_permutation(y)
    y = y.select(perm)
    distribution = distributions.normal_distribution()

    x = distribution.quantiles(y.size())

    if expected_delta is not None:
        sel = flex.abs(x) < expected_delta
        x = x.select(sel)
        y = y.select(sel)

    fit = flex.linear_regression(x, y)
    correlation = flex.linear_correlation(x, y)
    assert fit.is_well_defined()

    if 0:
        from matplotlib import pyplot
        pyplot.scatter(x, y)
        m = fit.slope()
        c = fit.y_intercept()
        pyplot.plot(pyplot.xlim(), [m * x_ + c for x_ in pyplot.xlim()])
        pyplot.show()

    return fit.slope(), fit.y_intercept(), x.size()
Ejemplo n.º 11
0
def exercise_sdb(verbose=0):
    structure = random_structure.xray_structure(
        space_group_info=sgtbx.space_group_info("P 31"),
        elements=["N", "C", "C", "O"] * 2,
        volume_per_atom=500,
        min_distance=2.,
        general_positions_only=False,
        random_u_iso=True)
    f_abs = abs(
        structure.structure_factors(anomalous_flag=False,
                                    d_min=2,
                                    algorithm="direct").f_calc())
    sdb_out = structure.as_cns_sdb_file(file="foo.sdb",
                                        description="random_structure",
                                        comment=["any", "thing"],
                                        group="best")
    if (0 or verbose):
        sys.stdout.write(sdb_out)
    sdb_files = sdb_reader.multi_sdb_parser(StringIO(sdb_out))
    assert len(sdb_files) == 1
    structure_read = sdb_files[0].as_xray_structure(
        crystal_symmetry=crystal.symmetry(unit_cell=structure.unit_cell(),
                                          space_group_info=None))
    f_read = abs(
        f_abs.structure_factors_from_scatterers(xray_structure=structure_read,
                                                algorithm="direct").f_calc())
    regression = flex.linear_regression(f_abs.data(), f_read.data())
    assert regression.is_well_defined()
    if (0 or verbose):
        regression.show_summary()
    assert abs(regression.slope() - 1) < 1.e-4
    assert abs(regression.y_intercept()) < 1.e-3
Ejemplo n.º 12
0
  def __init__(self, f_obs, asu_contents, e_statistics=False):
    assert f_obs.is_real_array()
    self.info = f_obs.info()
    f_obs_selected = f_obs.select(f_obs.data() > 0)
    f_obs_selected.use_binning_of(f_obs)
    # compute <fobs^2> in resolution shells
    self.mean_fobs_sq = f_obs_selected.mean_sq(
      use_binning=True,
      use_multiplicities=True).data[1:-1]
    n_none = self.mean_fobs_sq.count(None)
    if (n_none > 0):
      error_message = "wilson_plot error: %d empty bin%s:" % plural_s(n_none)
      if (self.info is not None):
        error_message += "\n  Info: " + str(self.info)
      error_message += "\n  Number of bins: %d" % len(self.mean_fobs_sq)
      error_message += "\n  Number of f_obs > 0: %d" % (
        f_obs_selected.indices().size())
      error_message += "\n  Number of f_obs <= 0: %d" % (
        f_obs.indices().size() - f_obs_selected.indices().size())
      raise RuntimeError(error_message)
    self.mean_fobs_sq = flex.double(self.mean_fobs_sq)
    # compute <s^2> = <(sin(theta)/lambda)^2> in resolution shells
    stol_sq = f_obs_selected.sin_theta_over_lambda_sq()
    stol_sq.use_binner_of(f_obs_selected)
    self.mean_stol_sq = flex.double(stol_sq.mean(
      use_binning=True,
      use_multiplicities=True).data[1:-1])
    # cache scattering factor info
    gaussians = {}
    for chemical_type in asu_contents.keys():
      gaussians[chemical_type] = eltbx.xray_scattering.wk1995(
        chemical_type).fetch()
    # compute expected f_calc^2 in resolution shells
    self.expected_f_sq = flex.double()
    for stol_sq in self.mean_stol_sq:
      sum_fj_sq = 0
      for chemical_type, n_atoms in asu_contents.items():
        f0 = gaussians[chemical_type].at_stol_sq(stol_sq)
        sum_fj_sq += f0 * f0 * n_atoms
      self.expected_f_sq.append(sum_fj_sq)
    self.expected_f_sq *= f_obs_selected.space_group().order_z() \
                        * f_obs_selected.space_group().n_ltr()
    # fit to straight line
    self.x = self.mean_stol_sq
    self.y = flex.log(self.mean_fobs_sq / self.expected_f_sq)
    fit = flex.linear_regression(self.x, self.y)
    assert fit.is_well_defined()
    self.fit_y_intercept = fit.y_intercept()
    self.fit_slope = fit.slope()
    self.wilson_intensity_scale_factor = math.exp(self.fit_y_intercept) # intensity scale factor
    self.wilson_k = math.sqrt(self.wilson_intensity_scale_factor) # conversion to amplitude scale factor
    self.wilson_b = -self.fit_slope / 2
    self.fit_correlation = flex.linear_correlation(self.x,self.y).coefficient()

    if e_statistics:
      normalised = f_obs_selected.normalised_amplitudes(asu_contents, self)
      self.normalised_f_obs = normalised.array()
      self.mean_e_sq_minus_1 = normalised.mean_e_sq_minus_1()
      self.percent_e_sq_gt_2 = normalised.percent_e_sq_gt_2()
Ejemplo n.º 13
0
def check_regression(x, y, label, min_correlation=0, verbose=0):
    xy_regr = flex.linear_regression(x, y)
    xy_corr = flex.linear_correlation(x, y)
    assert xy_regr.is_well_defined()
    if (0 or verbose):
        print label, "correlation: %.4f slope: %.3f" % (xy_corr.coefficient(),
                                                        xy_regr.slope())
    assert min_correlation == 0 or xy_corr.coefficient() >= min_correlation
def check_regression(x, y, label, min_correlation=0, verbose=0):
  xy_regr = flex.linear_regression(x, y)
  xy_corr = flex.linear_correlation(x, y)
  assert xy_regr.is_well_defined()
  if (0 or verbose):
    print label, "correlation: %.4f slope: %.3f" % (
      xy_corr.coefficient(), xy_regr.slope())
  assert min_correlation == 0 or xy_corr.coefficient() >= min_correlation
Ejemplo n.º 15
0
def exercise(space_group_info, n_scatterers=8, d_min=2, verbose=0, e_min=1.5):
    structure = random_structure.xray_structure(
        space_group_info,
        elements=["const"] * n_scatterers,
        volume_per_atom=200,
        min_distance=3.0,
        general_positions_only=True,
        u_iso=0.0,
    )
    if 0 or verbose:
        structure.show_summary().show_scatterers()
    f_calc = structure.structure_factors(d_min=d_min, anomalous_flag=False).f_calc()
    f_obs = abs(f_calc)
    q_obs = miller.array(
        miller_set=f_obs,
        data=f_obs.data() / math.sqrt(f_obs.space_group().order_p() * n_scatterers) / f_obs.space_group().n_ltr(),
    )
    q_obs = q_obs.sort(by_value="abs")
    q_obs.setup_binner(auto_binning=True)
    n_obs = q_obs.quasi_normalize_structure_factors()
    r = flex.linear_regression(q_obs.data(), n_obs.data())
    if 0 or verbose:
        r.show_summary()
    assert r.is_well_defined()
    assert abs(r.y_intercept()) < 0.1
    assert abs(r.slope() - 1) < 0.2
    q_large = q_obs.select(q_obs.quasi_normalized_as_normalized().data() > e_min)
    if 0 or verbose:
        print "Number of e-values > %.6g: %d" % (e_min, q_large.size())
    other_structure = random_structure.xray_structure(
        space_group_info,
        elements=["const"] * n_scatterers,
        volume_per_atom=200,
        min_distance=3.0,
        general_positions_only=True,
        u_iso=0.0,
    )
    assert other_structure.unit_cell().is_similar_to(structure.unit_cell())
    q_calc = q_large.structure_factors_from_scatterers(other_structure, algorithm="direct").f_calc()
    start = q_large.phase_transfer(q_calc.data())
    for selection_fixed in (None, flex.double([random.random() for i in xrange(start.size())]) < 0.4):
        from_map_data = direct_space_squaring(start, selection_fixed)
        direct_space_result = start.phase_transfer(phase_source=from_map_data)
        new_phases = reciprocal_space_squaring(start, selection_fixed, verbose)
        reciprocal_space_result = start.phase_transfer(phase_source=flex.polar(1, new_phases))
        mwpe = direct_space_result.mean_weighted_phase_error(reciprocal_space_result)
        if 0 or verbose:
            print "mwpe: %.2f" % mwpe, start.space_group_info()
        for i, h in enumerate(direct_space_result.indices()):
            amp_d, phi_d = complex_math.abs_arg(direct_space_result.data()[i], deg=True)
            amp_r, phi_r = complex_math.abs_arg(reciprocal_space_result.data()[i], deg=True)
            phase_err = scitbx.math.phase_error(phi_d, phi_r, deg=True)
            assert phase_err < 1.0 or abs(from_map_data[i]) < 1.0e-6
    exercise_truncate(q_large)
Ejemplo n.º 16
0
def recycle(miller_array):
    merge.write(file_name="tmp.sca", miller_array=miller_array)
    read_back_file = merge.reader(file_handle=open("tmp.sca"))
    read_back_arrays = read_back_file.as_miller_arrays()
    assert len(read_back_arrays) == 1
    read_back_array = read_back_arrays[0]
    read_back_input_indexing = read_back_array.adopt_set(miller_array)
    if (miller_array.is_xray_amplitude_array()):
        read_back_input_indexing = read_back_input_indexing.f_sq_as_f()
    regression = flex.linear_regression(miller_array.data(),
                                        read_back_input_indexing.data())
    assert approx_equal(regression.slope(), 1, eps=1.e-3)
    assert abs(regression.y_intercept()) < 1
    regression = flex.linear_regression(miller_array.sigmas(),
                                        read_back_input_indexing.sigmas())
    if (miller_array.is_xray_intensity_array()):
        assert approx_equal(regression.slope(), 1, eps=1.e-3)
    else:
        assert approx_equal(regression.slope(), 1, eps=1.e-1)
    assert abs(regression.y_intercept()) < 1
Ejemplo n.º 17
0
 def sigmaa_model_error(self):
   x = 0.25*flex.pow( self.h_array, 2.0/3.0 )  # h was in d*^-3 !!!
   y = flex.log( self.sigmaa_fitted )
   #compute the slope please
   result = flex.linear_regression( x, y )
   result = -(result.slope()/math.pi*3)
   if result < 0:
     result = None
   else:
     result = math.sqrt( result )
   return result
Ejemplo n.º 18
0
 def sigmaa_model_error(self):
   x = 0.25*flex.pow( self.h_array, 2.0/3.0 )  # h was in d*^-3 !!!
   y = flex.log( self.sigmaa_fitted )
   #compute the slope please
   result = flex.linear_regression( x, y )
   result = -(result.slope()/math.pi*3)
   if result < 0:
     result = None
   else:
     result = math.sqrt( result )
   return result
Ejemplo n.º 19
0
def recycle(miller_array):
  merge.write(file_name="tmp.sca", miller_array=miller_array)
  read_back_file = merge.reader(file_handle=open("tmp.sca"))
  read_back_arrays = read_back_file.as_miller_arrays()
  assert len(read_back_arrays) == 1
  read_back_array = read_back_arrays[0]
  read_back_input_indexing = read_back_array.adopt_set(miller_array)
  if (miller_array.is_xray_amplitude_array()):
    read_back_input_indexing = read_back_input_indexing.f_sq_as_f()
  regression = flex.linear_regression(
    miller_array.data(),
    read_back_input_indexing.data())
  assert approx_equal(regression.slope(), 1, eps=1.e-3)
  assert abs(regression.y_intercept()) < 1
  regression = flex.linear_regression(
    miller_array.sigmas(),
    read_back_input_indexing.sigmas())
  if (miller_array.is_xray_intensity_array()):
    assert approx_equal(regression.slope(), 1, eps=1.e-3)
  else:
    assert approx_equal(regression.slope(), 1, eps=1.e-1)
  assert abs(regression.y_intercept()) < 1
Ejemplo n.º 20
0
    def __init__(self, f_obs, asu_contents, e_statistics=False):
        assert f_obs.is_real_array()
        self.info = f_obs.info()
        f_obs_selected = f_obs.select(f_obs.data() > 0)
        f_obs_selected.use_binning_of(f_obs)
        # compute <fobs^2> in resolution shells
        self.mean_fobs_sq = f_obs_selected.mean_sq(use_binning=True, use_multiplicities=True).data[1:-1]
        n_none = self.mean_fobs_sq.count(None)
        if n_none > 0:
            error_message = "wilson_plot error: %d empty bin%s:" % plural_s(n_none)
            if self.info is not None:
                error_message += "\n  Info: " + str(self.info)
            error_message += "\n  Number of bins: %d" % len(self.mean_fobs_sq)
            error_message += "\n  Number of f_obs > 0: %d" % (f_obs_selected.indices().size())
            error_message += "\n  Number of f_obs <= 0: %d" % (f_obs.indices().size() - f_obs_selected.indices().size())
            raise RuntimeError(error_message)
        self.mean_fobs_sq = flex.double(self.mean_fobs_sq)
        # compute <s^2> = <(sin(theta)/lambda)^2> in resolution shells
        stol_sq = f_obs_selected.sin_theta_over_lambda_sq()
        stol_sq.use_binner_of(f_obs_selected)
        self.mean_stol_sq = flex.double(stol_sq.mean(use_binning=True, use_multiplicities=True).data[1:-1])
        # cache scattering factor info
        gaussians = {}
        for chemical_type in asu_contents.keys():
            gaussians[chemical_type] = eltbx.xray_scattering.wk1995(chemical_type).fetch()
        # compute expected f_calc^2 in resolution shells
        self.expected_f_sq = flex.double()
        for stol_sq in self.mean_stol_sq:
            sum_fj_sq = 0
            for chemical_type, n_atoms in asu_contents.items():
                f0 = gaussians[chemical_type].at_stol_sq(stol_sq)
                sum_fj_sq += f0 * f0 * n_atoms
            self.expected_f_sq.append(sum_fj_sq)
        self.expected_f_sq *= f_obs_selected.space_group().order_z() * f_obs_selected.space_group().n_ltr()
        # fit to straight line
        self.x = self.mean_stol_sq
        self.y = flex.log(self.mean_fobs_sq / self.expected_f_sq)
        fit = flex.linear_regression(self.x, self.y)
        assert fit.is_well_defined()
        self.fit_y_intercept = fit.y_intercept()
        self.fit_slope = fit.slope()
        self.wilson_intensity_scale_factor = math.exp(self.fit_y_intercept)  # intensity scale factor
        self.wilson_k = math.sqrt(self.wilson_intensity_scale_factor)  # conversion to amplitude scale factor
        self.wilson_b = -self.fit_slope / 2
        self.fit_correlation = flex.linear_correlation(self.x, self.y).coefficient()

        if e_statistics:
            normalised = f_obs_selected.normalised_amplitudes(asu_contents, self)
            self.normalised_f_obs = normalised.array()
            self.mean_e_sq_minus_1 = normalised.mean_e_sq_minus_1()
            self.percent_e_sq_gt_2 = normalised.percent_e_sq_gt_2()
Ejemplo n.º 21
0
def absolute_structure_analysis(xs,
                                fo2,
                                fc,
                                scale,
                                nu=None,
                                log=None,
                                outlier_cutoff_factor=None):
    if log is None:
        log = sys.stdout
    hooft_analysis = absolute_structure.hooft_analysis(
        fo2,
        fc,
        scale_factor=scale,
        outlier_cutoff_factor=outlier_cutoff_factor)
    print >> log, "Gaussian analysis:"
    hooft_analysis.show(out=log)
    NPP = absolute_structure.bijvoet_differences_probability_plot(
        hooft_analysis)
    print >> log, "Probability plot:"
    NPP.show(out=log)
    print >> log
    if nu is None:
        nu = absolute_structure.maximise_students_t_correlation_coefficient(
            NPP.y, min_nu=1, max_nu=200)
    distribution = distributions.students_t_distribution(nu)
    observed_deviations = NPP.y
    expected_deviations = distribution.quantiles(observed_deviations.size())
    fit = flex.linear_regression(expected_deviations[5:-5],
                                 observed_deviations[5:-5])
    t_analysis = absolute_structure.students_t_hooft_analysis(
        fo2,
        fc,
        nu,
        scale_factor=scale,
        probability_plot_slope=fit.slope(),
        outlier_cutoff_factor=outlier_cutoff_factor)
    tPP = absolute_structure.bijvoet_differences_probability_plot(
        t_analysis, use_students_t_distribution=True, students_t_nu=nu)
    print >> log, "Student's t analysis:"
    print >> log, "nu: %.2f" % nu
    t_analysis.show(out=log)
    print >> log, "Probability plot:"
    tPP.show(out=log)
    print >> log
    if xs is not None:
        flack = absolute_structure.flack_analysis(xs,
                                                  fo2.as_xray_observations())
        flack.show(out=log)
Ejemplo n.º 22
0
def run(args):
  import libtbx.load_env
  usage = "%s [options]" %libtbx.env.dispatcher_name

  parser = OptionParser(
    usage=usage,
    phil=phil_scope,
    check_format=False,
    epilog=help_message)

  params, options, args = parser.parse_args(show_diff_phil=True,
                                            return_unhandled=True)

  assert len(args) == 1
  from iotbx.reflection_file_reader import any_reflection_file

  intensities = None

  f = args[0]

  arrays = any_reflection_file(f).as_miller_arrays(merge_equivalents=False)
  for ma in arrays:
    print ma.info().labels
    if ma.info().labels == ['I', 'SIGI']:
      intensities = ma
    elif ma.info().labels == ['IMEAN', 'SIGIMEAN']:
      intensities = ma
    elif ma.info().labels == ['I(+)', 'SIGI(+)', 'I(-)', 'SIGI(-)']:
      intensities = ma

  assert intensities is not None

  if params.d_min is not None:
    intensities = intensities.resolution_filter(d_min=params.d_min)

  from cctbx.array_family import flex

  # see also:
  #   cctbx/miller/merge_equivalents.h
  #   cctbx/miller/equivalent_reflection_merging.tex

  # this should calculate the external variance, i.e. V(y) = sum(v_i)
  merging_external = intensities.merge_equivalents(use_internal_variance=False)
  multiplicities = merging_external.redundancies().data()
  external_sigmas = merging_external.array().sigmas()
  # sigmas should be bigger not smaller
  external_sigmas *= flex.sqrt(multiplicities.as_double())

  # set the sigmas to 1, and calculate the mean intensities and internal variances
  intensities_copy = intensities.customized_copy(
    sigmas=flex.double(intensities.size(), 1))
  merging_internal = intensities_copy.merge_equivalents()
  merged_intensities = merging_internal.array()
  internal_sigmas = merging_internal.array().sigmas()
  # sigmas should be bigger not smaller
  internal_sigmas *= flex.sqrt(multiplicities.as_double())

  # select only those reflections with sufficient repeat observations
  sel = (multiplicities > 3)
  external_sigmas = external_sigmas.select(sel)
  internal_sigmas = internal_sigmas.select(sel)
  merged_intensities = merged_intensities.select(sel)

  # what we want to plot/do linear regression with
  y = flex.pow2(internal_sigmas/merged_intensities.data())
  x = flex.pow2(external_sigmas/merged_intensities.data())

  sel = (x < 1) & (y < 1)
  x = x.select(sel)
  y = y.select(sel)

  # set backend before importing pyplot
  import matplotlib
  #matplotlib.use('Agg')

  linreg = flex.linear_regression(x, y)
  linreg.show_summary()
  import math
  print 1/math.sqrt(linreg.slope() * linreg.y_intercept())

  #x = -flex.log10(x)
  #y = -flex.log10(y)

  x = 1/x
  y = 1/y

  from matplotlib import pyplot
  pyplot.scatter(x, y, marker='+', s=20, alpha=1, c='black')
  pyplot.show()
  pyplot.clf()

  # chi^2 plot vs resolution
  # i.e. <var(int)>/<var(ext)>
  # where var(ext) and var(int) are as defined in equations 4 & 5 respectively
  # in Blessing (1997)

  internal_var = merged_intensities.customized_copy(
    data=flex.pow2(internal_sigmas))
  external_var = merged_intensities.customized_copy(
    data=flex.pow2(external_sigmas))

  n_bins = 10
  internal_var.setup_binner(n_bins=n_bins)
  external_var.use_binning_of(internal_var)

  mean_internal = internal_var.mean(use_binning=True)
  mean_external = external_var.mean(use_binning=True)

  y = [mean_internal.data[i+1]/mean_external.data[i+1] for i in range(n_bins)]
  x = [mean_internal.binner.bin_centers(2)]

  pyplot.scatter(x, y)
  pyplot.xlabel('1/d^2')
  pyplot.ylabel('<var(int)>/<var(ext)>')
  pyplot.show()
  pyplot.clf()

  return
Ejemplo n.º 23
0
def exercise(space_group_info, n_scatterers=8, d_min=2, verbose=0, e_min=1.5):
    structure = random_structure.xray_structure(space_group_info,
                                                elements=["const"] *
                                                n_scatterers,
                                                volume_per_atom=200,
                                                min_distance=3.,
                                                general_positions_only=True,
                                                u_iso=0.0)
    if (0 or verbose):
        structure.show_summary().show_scatterers()
    f_calc = structure.structure_factors(d_min=d_min,
                                         anomalous_flag=False).f_calc()
    f_obs = abs(f_calc)
    q_obs = miller.array(
        miller_set=f_obs,
        data=f_obs.data() /
        math.sqrt(f_obs.space_group().order_p() * n_scatterers) /
        f_obs.space_group().n_ltr())
    q_obs = q_obs.sort(by_value="abs")
    q_obs.setup_binner(auto_binning=True)
    n_obs = q_obs.quasi_normalize_structure_factors()
    r = flex.linear_regression(q_obs.data(), n_obs.data())
    if (0 or verbose):
        r.show_summary()
    assert r.is_well_defined()
    assert abs(r.y_intercept()) < 0.1
    assert abs(r.slope() - 1) < 0.2
    q_large = q_obs.select(
        q_obs.quasi_normalized_as_normalized().data() > e_min)
    if (0 or verbose):
        print("Number of e-values > %.6g: %d" % (e_min, q_large.size()))
    other_structure = random_structure.xray_structure(
        space_group_info,
        elements=["const"] * n_scatterers,
        volume_per_atom=200,
        min_distance=3.,
        general_positions_only=True,
        u_iso=0.0)
    assert other_structure.unit_cell().is_similar_to(structure.unit_cell())
    q_calc = q_large.structure_factors_from_scatterers(
        other_structure, algorithm="direct").f_calc()
    start = q_large.phase_transfer(q_calc.data())
    for selection_fixed in (None,
                            flex.double(
                                [random.random()
                                 for i in range(start.size())]) < 0.4):
        from_map_data = direct_space_squaring(start, selection_fixed)
        direct_space_result = start.phase_transfer(phase_source=from_map_data)
        new_phases = reciprocal_space_squaring(start, selection_fixed, verbose)
        reciprocal_space_result = start.phase_transfer(
            phase_source=flex.polar(1, new_phases))
        mwpe = direct_space_result.mean_weighted_phase_error(
            reciprocal_space_result)
        if (0 or verbose):
            print("mwpe: %.2f" % mwpe, start.space_group_info())
        for i, h in enumerate(direct_space_result.indices()):
            amp_d, phi_d = complex_math.abs_arg(direct_space_result.data()[i],
                                                deg=True)
            amp_r, phi_r = complex_math.abs_arg(
                reciprocal_space_result.data()[i], deg=True)
            phase_err = scitbx.math.phase_error(phi_d, phi_r, deg=True)
            assert phase_err < 1.0 or abs(from_map_data[i]) < 1.e-6
    exercise_truncate(q_large)
Ejemplo n.º 24
0
def run(args):
    import libtbx.load_env

    usage = "%s [options]" % libtbx.env.dispatcher_name

    parser = OptionParser(
        usage=usage, phil=phil_scope, check_format=False, epilog=help_message
    )

    params, options, args = parser.parse_args(
        show_diff_phil=True, return_unhandled=True
    )

    assert len(args) == 1
    from iotbx.reflection_file_reader import any_reflection_file

    intensities = None

    f = args[0]

    arrays = any_reflection_file(f).as_miller_arrays(merge_equivalents=False)
    for ma in arrays:
        print(ma.info().labels)
        if ma.info().labels == ["I", "SIGI"]:
            intensities = ma
        elif ma.info().labels == ["IMEAN", "SIGIMEAN"]:
            intensities = ma
        elif ma.info().labels == ["I(+)", "SIGI(+)", "I(-)", "SIGI(-)"]:
            intensities = ma

    assert intensities is not None

    if params.d_min is not None:
        intensities = intensities.resolution_filter(d_min=params.d_min)

    from cctbx.array_family import flex

    # see also:
    #   cctbx/miller/merge_equivalents.h
    #   cctbx/miller/equivalent_reflection_merging.tex

    # this should calculate the external variance, i.e. V(y) = sum(v_i)
    merging_external = intensities.merge_equivalents(use_internal_variance=False)
    multiplicities = merging_external.redundancies().data()
    external_sigmas = merging_external.array().sigmas()
    # sigmas should be bigger not smaller
    external_sigmas *= flex.sqrt(multiplicities.as_double())

    # set the sigmas to 1, and calculate the mean intensities and internal variances
    intensities_copy = intensities.customized_copy(
        sigmas=flex.double(intensities.size(), 1)
    )
    merging_internal = intensities_copy.merge_equivalents()
    merged_intensities = merging_internal.array()
    internal_sigmas = merging_internal.array().sigmas()
    # sigmas should be bigger not smaller
    internal_sigmas *= flex.sqrt(multiplicities.as_double())

    # select only those reflections with sufficient repeat observations
    sel = multiplicities > 3
    external_sigmas = external_sigmas.select(sel)
    internal_sigmas = internal_sigmas.select(sel)
    merged_intensities = merged_intensities.select(sel)

    # what we want to plot/do linear regression with
    y = flex.pow2(internal_sigmas / merged_intensities.data())
    x = flex.pow2(external_sigmas / merged_intensities.data())

    sel = (x < 1) & (y < 1)
    x = x.select(sel)
    y = y.select(sel)

    # set backend before importing pyplot
    import matplotlib

    # matplotlib.use('Agg')

    linreg = flex.linear_regression(x, y)
    linreg.show_summary()
    import math

    print(1 / math.sqrt(linreg.slope() * linreg.y_intercept()))

    # x = -flex.log10(x)
    # y = -flex.log10(y)

    x = 1 / x
    y = 1 / y

    from matplotlib import pyplot

    pyplot.scatter(x, y, marker="+", s=20, alpha=1, c="black")
    pyplot.show()
    pyplot.clf()

    # chi^2 plot vs resolution
    # i.e. <var(int)>/<var(ext)>
    # where var(ext) and var(int) are as defined in equations 4 & 5 respectively
    # in Blessing (1997)

    internal_var = merged_intensities.customized_copy(data=flex.pow2(internal_sigmas))
    external_var = merged_intensities.customized_copy(data=flex.pow2(external_sigmas))

    n_bins = 10
    internal_var.setup_binner(n_bins=n_bins)
    external_var.use_binning_of(internal_var)

    mean_internal = internal_var.mean(use_binning=True)
    mean_external = external_var.mean(use_binning=True)

    y = [mean_internal.data[i + 1] / mean_external.data[i + 1] for i in range(n_bins)]
    x = [mean_internal.binner.bin_centers(2)]

    pyplot.scatter(x, y)
    pyplot.xlabel("1/d^2")
    pyplot.ylabel("<var(int)>/<var(ext)>")
    pyplot.show()
    pyplot.clf()

    return
Ejemplo n.º 25
0
  def refine_rotx_roty2(OO,enable_rotational_target=True):

      helper = OO.per_frame_helper_factory()
      helper.restart()

      if enable_rotational_target:
        print "Trying least squares minimization of excursions",
        from scitbx.lstbx import normal_eqns_solving
        iterations = normal_eqns_solving.naive_iterations(
          non_linear_ls = helper,
          gradient_threshold = 1.E-10)

      results =  helper.x

      print "with %d reflections"%len(OO.parent.indexed_pairs),
      print "result %6.2f degrees"%(results[1]*180./math.pi),
      print "result %6.2f degrees"%(results[0]*180./math.pi)

      if False: # Excursion histogram
        print "The input mosaicity is %7.3f deg full width"%OO.parent.inputai.getMosaicity()
        # final histogram
        if OO.pvr_fix:
          final = 360.* helper.fvec_callable_pvr(results)
        else:
          final = 360.* helper.fvec_callable_NOT_USED_AFTER_BUGFIX(results)

        rmsdexc = math.sqrt(flex.mean(final*final))
        from matplotlib import pyplot as plt
        nbins = len(final)//20
        n,bins,patches = plt.hist(final,
          nbins, normed=0, facecolor="orange", alpha=0.75)
        plt.xlabel("Rotation on e1 axis, rmsd %7.3f deg"%rmsdexc)
        plt.title("Histogram of cctbx.xfel misorientation")
        plt.axis([-0.5,0.5,0,100])
        plt.plot([rmsdexc],[18],"b|")
        plt.show()

      # Determine optimal mosaicity and domain size model (monochromatic)
      if OO.pvr_fix:
        final = 360.* helper.fvec_callable_pvr(results)
      else:
        final = 360.* helper.fvec_callable_NOT_USED_AFTER_BUGFIX(results)
      #Guard against misindexing -- seen in simulated data, with zone nearly perfectly aligned
      guard_stats = flex.max(final), flex.min(final)
      if False and REMOVETEST_KILLING_LEGITIMATE_EXCURSIONS (guard_stats[0] > 2.0 or guard_stats[1] < -2.0):
        raise Exception("Misindexing diagnosed by meaningless excursion angle (bandpass_gaussian model)");
      print "The mean excursion is %7.3f degrees"%(flex.mean(final))

      two_thetas = helper.last_set_orientation.unit_cell().two_theta(OO.reserve_indices,OO.central_wavelength_ang,deg=True)
      dspacings = helper.last_set_orientation.unit_cell().d(OO.reserve_indices)
      dspace_sq = dspacings * dspacings
      excursion_rad = final * math.pi/ 180.

      #  First -- try to get a reasonable envelope for the observed excursions.
          ## minimum of three regions; maximum of 50 measurements in each bin
      print "fitting parameters on %d spots"%len(excursion_rad)
      n_bins = min(max(3, len(excursion_rad)//25),50)
      bin_sz = len(excursion_rad)//n_bins
      print "nbins",n_bins,"bin_sz",bin_sz
      order = flex.sort_permutation(two_thetas)
      two_thetas_env = flex.double()
      dspacings_env = flex.double()
      excursion_rads_env = flex.double()
      for x in xrange(0,n_bins):
        subset = order[x*bin_sz:(x+1)*bin_sz]
        two_thetas_env.append( flex.mean(two_thetas.select(subset)) )
        dspacings_env.append( flex.mean(dspacings.select(subset)))
        excursion_rads_env.append( flex.max( flex.abs( excursion_rad.select(subset))))

      #  Second -- parameter fit
          ## solve the normal equations
      sum_inv_u_sq = flex.sum(dspacings_env * dspacings_env)
      sum_inv_u    = flex.sum(dspacings_env)
      sum_te_u     = flex.sum(dspacings_env * excursion_rads_env)
      sum_te       = flex.sum(excursion_rads_env)
      Normal_Mat   = sqr((sum_inv_u_sq, sum_inv_u, sum_inv_u, len(dspacings_env)))
      Vector       = col((sum_te_u, sum_te))
      solution     = Normal_Mat.inverse() * Vector
      s_ang = 1./(2*solution[0])
      print "Best LSQ fit Scheerer domain size is %9.2f ang"%(
        s_ang)
      tan_phi_rad = helper.last_set_orientation.unit_cell().d(OO.reserve_indices) / (2. * s_ang)
      tan_phi_deg = tan_phi_rad * 180./math.pi
      k_degrees = solution[1]* 180./math.pi
      print "The LSQ full mosaicity is %8.5f deg; half-mosaicity %9.5f"%(2*k_degrees, k_degrees)
      tan_outer_deg = tan_phi_deg + k_degrees

      if OO.mosaic_refinement_target=="ML":
        from xfel.mono_simulation.max_like import minimizer
        print "input", s_ang,2. * solution[1]*180/math.pi
        # coerce the estimates to be positive for max-likelihood
        lower_limit_domain_size = math.pow(
         helper.last_set_orientation.unit_cell().volume(),
         1./3.)*20 # 10-unit cell block size minimum reasonable domain

        d_estimate = max(s_ang, lower_limit_domain_size)
        M = minimizer(d_i = dspacings, psi_i = excursion_rad, eta_rad = abs(2. * solution[1]),
                      Deff = d_estimate)
        print "output",1./M.x[0], M.x[1]*180./math.pi
        tan_phi_rad_ML = helper.last_set_orientation.unit_cell().d(OO.reserve_indices) / (2. / M.x[0])
        tan_phi_deg_ML = tan_phi_rad_ML * 180./math.pi
        # bugfix: Need factor of 0.5 because the plot shows half mosaicity (displacement from the center point defined as zero)
        tan_outer_deg_ML = tan_phi_deg_ML + 0.5*M.x[1]*180./math.pi

      if OO.parent.horizons_phil.integration.mosaic.enable_polychromatic:
        # add code here to perform polychromatic modeling.
        """
        get miller indices DONE
        get model-predicted mono-wavelength centroid S1 vectors
        back-convert S1vec, with mono-wavelength, to detector-plane position, factoring in subpixel correction
        compare with spot centroid measured position
        compare with locus of bodypixels
        """
        print list(OO.reserve_indices)
        print len(OO.reserve_indices), len(two_thetas)
        positions = [
              OO.ucbp3.simple_forward_calculation_spot_position(
              wavelength = OO.central_wavelength_ang,
              observation_no = obsno).position
              for obsno in xrange(len(OO.parent.indexed_pairs))]
        print len(positions)
        print positions # model-predicted positions
        print len(OO.parent.spots)
        print OO.parent.indexed_pairs
        print OO.parent.spots
        print len(OO.parent.spots)
        meas_spots = [OO.parent.spots[pair["spot"]] for pair in OO.parent.indexed_pairs]
  #      for xspot in meas_spots:
  #        xspot.ctr_mass_x(),xspot.ctr_mass_y()
  #        xspot.max_pxl_x()
  #        xspot.bodypixels
  #        xspot.ctr_mass_x()

        # Do some work to calculate an rmsd
        diff_vecs = flex.vec3_double()
        for p,xspot in zip(positions, meas_spots):
          diff_vecs.append((p[0]-xspot.ctr_mass_y(), p[1]-xspot.ctr_mass_x(), 0.0))
        # could use diff_vecs.rms_length()
        diff_vecs_sq = diff_vecs.dot(diff_vecs)
        mean_diff_vec_sq = flex.mean(diff_vecs_sq)
        rmsd = math.sqrt(mean_diff_vec_sq)
        print "mean obs-pred diff vec on %d spots is %6.2f pixels"%(len(positions),rmsd)

        positions_to_fictitious = [
              OO.ucbp3.simple_forward_calculation_spot_position(
              wavelength = OO.central_wavelength_ang,
              observation_no = obsno).position_to_fictitious
              for obsno in xrange(len(OO.parent.indexed_pairs))]
        # Do some work to calculate an rmsd
        diff_vecs = flex.vec3_double()
        for p,xspot in zip(positions_to_fictitious, meas_spots):
          diff_vecs.append((p[0]-xspot.ctr_mass_y(), p[1]-xspot.ctr_mass_x(), 0.0))
        rmsd = diff_vecs.rms_length()
        print "mean obs-pred_to_fictitious diff vec on %d spots is %6.2f pixels"%(len(positions),rmsd)

        """
        actually, it might be better if the entire set of experimental observations
        is transformed into the ideal detector plane, for the purposes of poly_treatment.


        start here.  Now it would be good to actually implement probability of observing a body pixel given the model.
        We have everything needed right here.
        """
        if OO.parent.horizons_phil.integration.mosaic.enable_AD14F7B:
          # Image plot: obs and predicted positions + bodypixels
          from matplotlib import pyplot as plt
          plt.plot( [p[0] for p in positions_to_fictitious], [p[1] for p in positions_to_fictitious], "r.")
          plt.plot( [xspot.ctr_mass_y() for xspot in meas_spots],
                    [xspot.ctr_mass_x() for xspot in meas_spots], "g.")
          bodypx = []
          for xspot in meas_spots:
            for body in xspot.bodypixels:
              bodypx.append(body)
          plt.plot( [b.y for b in bodypx], [b.x for b in bodypx], "b.")
          plt.axes().set_aspect("equal")
          plt.show()

      print "MEAN excursion",flex.mean(final),
      if OO.mosaic_refinement_target=="ML":
        print "mosaicity deg FW=",M.x[1]*180./math.pi
      else:
        print
      if OO.parent.horizons_phil.integration.mosaic.enable_AD14F7B: # Excursion vs resolution fit
        AD1TF7B_MAX2T = 30.
        AD1TF7B_MAXDP = 1.
        from matplotlib import pyplot as plt
        fig = plt.figure()
        plt.plot(two_thetas, final, "bo")
        mean = flex.mean(final)
        minplot = flex.min(two_thetas)
        plt.plot([0,minplot],[mean,mean],"k-")
        LR = flex.linear_regression(two_thetas, final)
        #LR.show_summary()
        model_y = LR.slope()*two_thetas + LR.y_intercept()
        plt.plot(two_thetas, model_y, "k-")
        print helper.last_set_orientation.unit_cell()
        #for sdp,tw in zip (dspacings,two_thetas):
          #print sdp,tw
        if OO.mosaic_refinement_target=="ML":
          plt.title("ML: mosaicity FW=%4.2f deg, Dsize=%5.0fA on %d spots"%(M.x[1]*180./math.pi, 2./M.x[0], len(two_thetas)))
          plt.plot(two_thetas, tan_phi_deg_ML, "r.")
          plt.plot(two_thetas, -tan_phi_deg_ML, "r.")
          plt.plot(two_thetas, tan_outer_deg_ML, "g.")
          plt.plot(two_thetas, -tan_outer_deg_ML, "g.")
        else:
          plt.plot(two_thetas_env, excursion_rads_env *180./math.pi, "r|")
          plt.plot(two_thetas_env, -excursion_rads_env *180./math.pi, "r|")
          plt.plot(two_thetas_env, excursion_rads_env *180./math.pi, "r-")
          plt.plot(two_thetas_env, -excursion_rads_env *180./math.pi, "r-")
          plt.plot(two_thetas, tan_phi_deg, "r.")
          plt.plot(two_thetas, -tan_phi_deg, "r.")
          plt.plot(two_thetas, tan_outer_deg, "g.")
          plt.plot(two_thetas, -tan_outer_deg, "g.")
        plt.xlim([0,AD1TF7B_MAX2T])
        plt.ylim([-AD1TF7B_MAXDP,AD1TF7B_MAXDP])
        OO.parent.show_figure(plt,fig,"psi")
        plt.close()

      from xfel.mono_simulation.util import green_curve_area,ewald_proximal_volume
      if OO.mosaic_refinement_target=="ML":
        OO.parent.green_curve_area = green_curve_area(two_thetas, tan_outer_deg_ML)
        OO.parent.inputai.setMosaicity(M.x[1]*180./math.pi) # full width, degrees
        OO.parent.ML_half_mosaicity_deg = M.x[1]*180./(2.*math.pi)
        OO.parent.ML_domain_size_ang = 1./M.x[0]
        OO.parent.ewald_proximal_volume = ewald_proximal_volume(
            wavelength_ang = OO.central_wavelength_ang,
            resolution_cutoff_ang = OO.parent.horizons_phil.integration.mosaic.ewald_proximal_volume_resolution_cutoff,
            domain_size_ang = 1./M.x[0],
            full_mosaicity_rad = M.x[1])
        return results, helper.last_set_orientation,1./M.x[0] # full width domain size, angstroms
      else:
        assert OO.mosaic_refinement_target=="LSQ"
        OO.parent.green_curve_area = green_curve_area(two_thetas, tan_outer_deg)
        OO.parent.inputai.setMosaicity(2*k_degrees) # full width
        OO.parent.ML_half_mosaicity_deg = k_degrees
        OO.parent.ML_domain_size_ang = s_ang
        OO.parent.ewald_proximal_volume = ewald_proximal_volume(
            wavelength_ang = OO.central_wavelength_ang,
            resolution_cutoff_ang = OO.parent.horizons_phil.integration.mosaic.ewald_proximal_volume_resolution_cutoff,
            domain_size_ang = s_ang,
            full_mosaicity_rad = 2*k_degrees*math.pi/180.)
        return results, helper.last_set_orientation,s_ang # full width domain size, angstroms
Ejemplo n.º 26
0
    def refine_rotx_roty2(OO, enable_rotational_target=True):

        helper = OO.per_frame_helper_factory()
        helper.restart()

        if enable_rotational_target:
            print "Trying least squares minimization of excursions",
            from scitbx.lstbx import normal_eqns_solving
            iterations = normal_eqns_solving.naive_iterations(
                non_linear_ls=helper, gradient_threshold=1.E-10)

        results = helper.x

        print "with %d reflections" % len(OO.parent.indexed_pairs),
        print "result %6.2f degrees" % (results[1] * 180. / math.pi),
        print "result %6.2f degrees" % (results[0] * 180. / math.pi)

        if False:  # Excursion histogram
            print "The input mosaicity is %7.3f deg full width" % OO.parent.inputai.getMosaicity(
            )
            # final histogram
            if OO.pvr_fix:
                final = 360. * helper.fvec_callable_pvr(results)
            else:
                final = 360. * helper.fvec_callable_NOT_USED_AFTER_BUGFIX(
                    results)

            rmsdexc = math.sqrt(flex.mean(final * final))
            from matplotlib import pyplot as plt
            nbins = len(final) // 20
            n, bins, patches = plt.hist(final,
                                        nbins,
                                        normed=0,
                                        facecolor="orange",
                                        alpha=0.75)
            plt.xlabel("Rotation on e1 axis, rmsd %7.3f deg" % rmsdexc)
            plt.title("Histogram of cctbx.xfel misorientation")
            plt.axis([-0.5, 0.5, 0, 100])
            plt.plot([rmsdexc], [18], "b|")
            plt.show()

        # Determine optimal mosaicity and domain size model (monochromatic)
        if OO.pvr_fix:
            final = 360. * helper.fvec_callable_pvr(results)
        else:
            final = 360. * helper.fvec_callable_NOT_USED_AFTER_BUGFIX(results)
        #Guard against misindexing -- seen in simulated data, with zone nearly perfectly aligned
        guard_stats = flex.max(final), flex.min(final)
        if False and REMOVETEST_KILLING_LEGITIMATE_EXCURSIONS(
                guard_stats[0] > 2.0 or guard_stats[1] < -2.0):
            raise Exception(
                "Misindexing diagnosed by meaningless excursion angle (bandpass_gaussian model)"
            )
        print "The mean excursion is %7.3f degrees" % (flex.mean(final))

        two_thetas = helper.last_set_orientation.unit_cell().two_theta(
            OO.reserve_indices, OO.central_wavelength_ang, deg=True)
        dspacings = helper.last_set_orientation.unit_cell().d(
            OO.reserve_indices)
        dspace_sq = dspacings * dspacings
        excursion_rad = final * math.pi / 180.

        #  First -- try to get a reasonable envelope for the observed excursions.
        ## minimum of three regions; maximum of 50 measurements in each bin
        print "fitting parameters on %d spots" % len(excursion_rad)
        n_bins = min(max(3, len(excursion_rad) // 25), 50)
        bin_sz = len(excursion_rad) // n_bins
        print "nbins", n_bins, "bin_sz", bin_sz
        order = flex.sort_permutation(two_thetas)
        two_thetas_env = flex.double()
        dspacings_env = flex.double()
        excursion_rads_env = flex.double()
        for x in range(0, n_bins):
            subset = order[x * bin_sz:(x + 1) * bin_sz]
            two_thetas_env.append(flex.mean(two_thetas.select(subset)))
            dspacings_env.append(flex.mean(dspacings.select(subset)))
            excursion_rads_env.append(
                flex.max(flex.abs(excursion_rad.select(subset))))

        #  Second -- parameter fit
        ## solve the normal equations
        sum_inv_u_sq = flex.sum(dspacings_env * dspacings_env)
        sum_inv_u = flex.sum(dspacings_env)
        sum_te_u = flex.sum(dspacings_env * excursion_rads_env)
        sum_te = flex.sum(excursion_rads_env)
        Normal_Mat = sqr(
            (sum_inv_u_sq, sum_inv_u, sum_inv_u, len(dspacings_env)))
        Vector = col((sum_te_u, sum_te))
        solution = Normal_Mat.inverse() * Vector
        s_ang = 1. / (2 * solution[0])
        print "Best LSQ fit Scheerer domain size is %9.2f ang" % (s_ang)
        tan_phi_rad = helper.last_set_orientation.unit_cell().d(
            OO.reserve_indices) / (2. * s_ang)
        tan_phi_deg = tan_phi_rad * 180. / math.pi
        k_degrees = solution[1] * 180. / math.pi
        print "The LSQ full mosaicity is %8.5f deg; half-mosaicity %9.5f" % (
            2 * k_degrees, k_degrees)
        tan_outer_deg = tan_phi_deg + k_degrees

        if OO.mosaic_refinement_target == "ML":
            from xfel.mono_simulation.max_like import minimizer
            print "input", s_ang, 2. * solution[1] * 180 / math.pi
            # coerce the estimates to be positive for max-likelihood
            lower_limit_domain_size = math.pow(
                helper.last_set_orientation.unit_cell().volume(), 1. /
                3.) * 20  # 10-unit cell block size minimum reasonable domain

            d_estimate = max(s_ang, lower_limit_domain_size)
            M = minimizer(d_i=dspacings,
                          psi_i=excursion_rad,
                          eta_rad=abs(2. * solution[1]),
                          Deff=d_estimate)
            print "output", 1. / M.x[0], M.x[1] * 180. / math.pi
            tan_phi_rad_ML = helper.last_set_orientation.unit_cell().d(
                OO.reserve_indices) / (2. / M.x[0])
            tan_phi_deg_ML = tan_phi_rad_ML * 180. / math.pi
            # bugfix: Need factor of 0.5 because the plot shows half mosaicity (displacement from the center point defined as zero)
            tan_outer_deg_ML = tan_phi_deg_ML + 0.5 * M.x[1] * 180. / math.pi

        if OO.parent.horizons_phil.integration.mosaic.enable_polychromatic:
            # add code here to perform polychromatic modeling.
            """
        get miller indices DONE
        get model-predicted mono-wavelength centroid S1 vectors
        back-convert S1vec, with mono-wavelength, to detector-plane position, factoring in subpixel correction
        compare with spot centroid measured position
        compare with locus of bodypixels
        """
            print list(OO.reserve_indices)
            print len(OO.reserve_indices), len(two_thetas)
            positions = [
                OO.ucbp3.simple_forward_calculation_spot_position(
                    wavelength=OO.central_wavelength_ang,
                    observation_no=obsno).position
                for obsno in range(len(OO.parent.indexed_pairs))
            ]
            print len(positions)
            print positions  # model-predicted positions
            print len(OO.parent.spots)
            print OO.parent.indexed_pairs
            print OO.parent.spots
            print len(OO.parent.spots)
            meas_spots = [
                OO.parent.spots[pair["spot"]]
                for pair in OO.parent.indexed_pairs
            ]
            #      for xspot in meas_spots:
            #        xspot.ctr_mass_x(),xspot.ctr_mass_y()
            #        xspot.max_pxl_x()
            #        xspot.bodypixels
            #        xspot.ctr_mass_x()

            # Do some work to calculate an rmsd
            diff_vecs = flex.vec3_double()
            for p, xspot in zip(positions, meas_spots):
                diff_vecs.append((p[0] - xspot.ctr_mass_y(),
                                  p[1] - xspot.ctr_mass_x(), 0.0))
            # could use diff_vecs.rms_length()
            diff_vecs_sq = diff_vecs.dot(diff_vecs)
            mean_diff_vec_sq = flex.mean(diff_vecs_sq)
            rmsd = math.sqrt(mean_diff_vec_sq)
            print "mean obs-pred diff vec on %d spots is %6.2f pixels" % (
                len(positions), rmsd)

            positions_to_fictitious = [
                OO.ucbp3.simple_forward_calculation_spot_position(
                    wavelength=OO.central_wavelength_ang,
                    observation_no=obsno).position_to_fictitious
                for obsno in range(len(OO.parent.indexed_pairs))
            ]
            # Do some work to calculate an rmsd
            diff_vecs = flex.vec3_double()
            for p, xspot in zip(positions_to_fictitious, meas_spots):
                diff_vecs.append((p[0] - xspot.ctr_mass_y(),
                                  p[1] - xspot.ctr_mass_x(), 0.0))
            rmsd = diff_vecs.rms_length()
            print "mean obs-pred_to_fictitious diff vec on %d spots is %6.2f pixels" % (
                len(positions), rmsd)
            """
        actually, it might be better if the entire set of experimental observations
        is transformed into the ideal detector plane, for the purposes of poly_treatment.


        start here.  Now it would be good to actually implement probability of observing a body pixel given the model.
        We have everything needed right here.
        """
            if OO.parent.horizons_phil.integration.mosaic.enable_AD14F7B:
                # Image plot: obs and predicted positions + bodypixels
                from matplotlib import pyplot as plt
                plt.plot([p[0] for p in positions_to_fictitious],
                         [p[1] for p in positions_to_fictitious], "r.")
                plt.plot([xspot.ctr_mass_y() for xspot in meas_spots],
                         [xspot.ctr_mass_x() for xspot in meas_spots], "g.")
                bodypx = []
                for xspot in meas_spots:
                    for body in xspot.bodypixels:
                        bodypx.append(body)
                plt.plot([b.y for b in bodypx], [b.x for b in bodypx], "b.")
                plt.axes().set_aspect("equal")
                plt.show()

        print "MEAN excursion", flex.mean(final),
        if OO.mosaic_refinement_target == "ML":
            print "mosaicity deg FW=", M.x[1] * 180. / math.pi
        else:
            print
        if OO.parent.horizons_phil.integration.mosaic.enable_AD14F7B:  # Excursion vs resolution fit
            AD1TF7B_MAX2T = 30.
            AD1TF7B_MAXDP = 1.
            from matplotlib import pyplot as plt
            fig = plt.figure()
            plt.plot(two_thetas, final, "bo")
            mean = flex.mean(final)
            minplot = flex.min(two_thetas)
            plt.plot([0, minplot], [mean, mean], "k-")
            LR = flex.linear_regression(two_thetas, final)
            #LR.show_summary()
            model_y = LR.slope() * two_thetas + LR.y_intercept()
            plt.plot(two_thetas, model_y, "k-")
            print helper.last_set_orientation.unit_cell()
            #for sdp,tw in zip (dspacings,two_thetas):
            #print sdp,tw
            if OO.mosaic_refinement_target == "ML":
                plt.title(
                    "ML: mosaicity FW=%4.2f deg, Dsize=%5.0fA on %d spots" %
                    (M.x[1] * 180. / math.pi, 2. / M.x[0], len(two_thetas)))
                plt.plot(two_thetas, tan_phi_deg_ML, "r.")
                plt.plot(two_thetas, -tan_phi_deg_ML, "r.")
                plt.plot(two_thetas, tan_outer_deg_ML, "g.")
                plt.plot(two_thetas, -tan_outer_deg_ML, "g.")
            else:
                plt.plot(two_thetas_env, excursion_rads_env * 180. / math.pi,
                         "r|")
                plt.plot(two_thetas_env, -excursion_rads_env * 180. / math.pi,
                         "r|")
                plt.plot(two_thetas_env, excursion_rads_env * 180. / math.pi,
                         "r-")
                plt.plot(two_thetas_env, -excursion_rads_env * 180. / math.pi,
                         "r-")
                plt.plot(two_thetas, tan_phi_deg, "r.")
                plt.plot(two_thetas, -tan_phi_deg, "r.")
                plt.plot(two_thetas, tan_outer_deg, "g.")
                plt.plot(two_thetas, -tan_outer_deg, "g.")
            plt.xlim([0, AD1TF7B_MAX2T])
            plt.ylim([-AD1TF7B_MAXDP, AD1TF7B_MAXDP])
            OO.parent.show_figure(plt, fig, "psi")
            plt.close()

        from xfel.mono_simulation.util import green_curve_area, ewald_proximal_volume
        if OO.mosaic_refinement_target == "ML":
            OO.parent.green_curve_area = green_curve_area(
                two_thetas, tan_outer_deg_ML)
            OO.parent.inputai.setMosaicity(M.x[1] * 180. /
                                           math.pi)  # full width, degrees
            OO.parent.ML_half_mosaicity_deg = M.x[1] * 180. / (2. * math.pi)
            OO.parent.ML_domain_size_ang = 1. / M.x[0]
            OO.parent.ewald_proximal_volume = ewald_proximal_volume(
                wavelength_ang=OO.central_wavelength_ang,
                resolution_cutoff_ang=OO.parent.horizons_phil.integration.
                mosaic.ewald_proximal_volume_resolution_cutoff,
                domain_size_ang=1. / M.x[0],
                full_mosaicity_rad=M.x[1])
            return results, helper.last_set_orientation, 1. / M.x[
                0]  # full width domain size, angstroms
        else:
            assert OO.mosaic_refinement_target == "LSQ"
            OO.parent.green_curve_area = green_curve_area(
                two_thetas, tan_outer_deg)
            OO.parent.inputai.setMosaicity(2 * k_degrees)  # full width
            OO.parent.ML_half_mosaicity_deg = k_degrees
            OO.parent.ML_domain_size_ang = s_ang
            OO.parent.ewald_proximal_volume = ewald_proximal_volume(
                wavelength_ang=OO.central_wavelength_ang,
                resolution_cutoff_ang=OO.parent.horizons_phil.integration.
                mosaic.ewald_proximal_volume_resolution_cutoff,
                domain_size_ang=s_ang,
                full_mosaicity_rad=2 * k_degrees * math.pi / 180.)
            return results, helper.last_set_orientation, s_ang  # full width domain size, angstroms