예제 #1
0
파일: merge.py 프로젝트: TiankunZhou/dials
def show_wilson_scaling_analysis(merged_intensities, n_residues=200):
    """
    Report the wilson statistics for a merged intensity array

    Args:
        merged_intensities: A merged miller intensity array.
        n_residues: The number of residues to use for the wilson analysis.
    """
    if not merged_intensities.space_group().is_centric():
        try:
            wilson_scaling = data_statistics.wilson_scaling(
                miller_array=merged_intensities, n_residues=n_residues)
        except (IndexError, RuntimeError) as e:
            logger.error(
                "\n"
                "Error encountered during Wilson statistics calculation:\n"
                "Perhaps there are too few unique reflections.\n"
                "%s",
                e,
                exc_info=True,
            )
        else:
            # Divert output through logger - do with StringIO rather than
            # info_handle else get way too much whitespace in output.
            out = StringIO()
            wilson_scaling.show(out=out)
            logger.info(out.getvalue())
예제 #2
0
def do_french_wilson(mtz_file, hklout, anomalous=False):
    logger.debug("Reading reflections from %s", mtz_file)

    result = any_reflection_file(mtz_file)
    assert result.file_type() == "ccp4_mtz"

    mtz_object = result.file_content()
    output = StringIO()
    mtz_object.show_summary(out=output)

    for ma in result.as_miller_arrays(merge_equivalents=False):
        if anomalous and ma.info().labels == [
            "I(+)",
            "SIGI(+)",
            "I(-)",
            "SIGI(-)",
        ]:
            assert ma.anomalous_flag()
            intensities = ma.merge_equivalents().array()  # XXX why is this necessary?
        elif ma.info().labels == ["IMEAN", "SIGIMEAN"]:
            assert not ma.anomalous_flag()
            intensities = ma
        else:
            intensities = None

        if intensities:
            assert intensities.is_xray_intensity_array()
            amplitudes = intensities.french_wilson(log=output)
            assert amplitudes.is_xray_amplitude_array()

            dano = None
            if amplitudes.anomalous_flag():
                dano = amplitudes.anomalous_differences()

            if not intensities.space_group().is_centric():
                merged_intensities = intensities.merge_equivalents().array()
                wilson_scaling = data_statistics.wilson_scaling(
                    miller_array=merged_intensities, n_residues=200
                )  # XXX default n_residues?
                wilson_scaling.show(out=output)

            mtz_dataset = mtz_object.crystals()[1].datasets()[0]
            mtz_dataset.add_miller_array(amplitudes, column_root_label="F")
            if dano is not None:
                mtz_dataset.add_miller_array(
                    dano, column_root_label="DANO", column_types="DQ"
                )
    mtz_object.add_history("cctbx.french_wilson analysis")
    mtz_object.show_summary(out=output)
    logger.debug("Writing reflections to %s", hklout)
    mtz_object.write(hklout)
    return output.getvalue()
    def __init__(self, mtz_file, params=None):

        print("Reading reflections from %s" % mtz_file)
        from iotbx.reflection_file_reader import any_reflection_file

        result = any_reflection_file(mtz_file)
        assert result.file_type() == "ccp4_mtz"
        mtz_object = result.file_content()
        mtz_object.show_summary()

        intensities = None

        for ma in result.as_miller_arrays(merge_equivalents=False):
            if params.anomalous and ma.info().labels == [
                    "I(+)",
                    "SIGI(+)",
                    "I(-)",
                    "SIGI(-)",
            ]:
                assert ma.anomalous_flag()
                intensities = (ma.merge_equivalents().array()
                               )  # XXX why is this necessary?
            elif not params.anomalous and ma.info().labels == [
                    "IMEAN", "SIGIMEAN"
            ]:
                assert not ma.anomalous_flag()
                intensities = ma

        assert intensities.is_xray_intensity_array()
        amplitudes = intensities.french_wilson(params=params)
        assert amplitudes.is_xray_amplitude_array()

        from mmtbx.scaling import data_statistics

        if not intensities.space_group().is_centric():
            merged_intensities = intensities.merge_equivalents().array()
            wilson_scaling = data_statistics.wilson_scaling(
                miller_array=merged_intensities,
                n_residues=200)  # XXX default n_residues?
            wilson_scaling.show()
            print()

        mtz_dataset = mtz_object.crystals()[1].datasets()[0]
        mtz_dataset.add_miller_array(amplitudes, column_root_label="F")
        mtz_object.add_history("cctbx.french_wilson analysis")
        print("Writing reflections to %s" % (params.hklout))
        mtz_object.show_summary()
        mtz_object.write(params.hklout)
예제 #4
0
  def __init__(self, mtz_file, params=None):

    print 'Reading reflections from %s' %mtz_file
    from iotbx.reflection_file_reader import any_reflection_file
    result = any_reflection_file(mtz_file)
    assert result.file_type() == 'ccp4_mtz'
    mtz_object = result.file_content()
    mtz_object.show_summary()

    intensities = None

    for ma in result.as_miller_arrays(merge_equivalents=False):
      if (params.anomalous and
          ma.info().labels == ['I(+)', 'SIGI(+)', 'I(-)', 'SIGI(-)']):
        assert ma.anomalous_flag()
        intensities = ma.merge_equivalents().array() # XXX why is this necessary?
      elif (not params.anomalous and ma.info().labels == ['IMEAN', 'SIGIMEAN']):
        assert not ma.anomalous_flag()
        intensities = ma

    assert intensities.is_xray_intensity_array()
    amplitudes = intensities.french_wilson(params=params)
    assert amplitudes.is_xray_amplitude_array()

    from mmtbx.scaling import data_statistics
    if not intensities.space_group().is_centric():
      merged_intensities = intensities.merge_equivalents().array()
      wilson_scaling = data_statistics.wilson_scaling(
        miller_array=merged_intensities, n_residues=200) # XXX default n_residues?
      wilson_scaling.show()
      print

    mtz_dataset = mtz_object.crystals()[1].datasets()[0]
    mtz_dataset.add_miller_array(amplitudes, column_root_label='F')
    mtz_object.add_history('cctbx.french_wilson analysis')
    print 'Writing reflections to %s' %(params.hklout)
    mtz_object.show_summary()
    mtz_object.write(params.hklout)
import sys

from iotbx import mtz
from mmtbx.scaling import data_statistics

m = mtz.object(sys.argv[1])
mas = m.as_miller_arrays()

data = None

for ma in mas:
    if ma.is_xray_intensity_array():
        data = ma
        break


def nres_from_mtz(m):
    sg = m.space_group()
    uc = m.crystals()[0].unit_cell()
    n_ops = len(sg.all_ops())
    v_asu = uc.volume() / n_ops
    return v_asu / (2.7 * 128)


n_res = nres_from_mtz(m)

wilson_scaling = data_statistics.wilson_scaling(miller_array=data,
                                                n_residues=n_res)
wilson_scaling.show()
예제 #6
0
def exercise_2 () :
  hkl_file = libtbx.env.find_in_repositories(
    relative_path="phenix_regression/wizards/p9_se_w2.sca",
    test=os.path.isfile)
  if (hkl_file is None) :
    warnings.warn("phenix_regression not available, skipping test")
    return
  hkl_in = file_reader.any_file(hkl_file).assert_file_type("hkl")
  i_obs_raw = hkl_in.file_object.as_miller_arrays(
    merge_equivalents=False,
    crystal_symmetry=crystal.symmetry(
      space_group_symbol="I4",
      unit_cell=(113.949,113.949,32.474,90,90,90)))[0]
  i_obs = i_obs_raw.merge_equivalents().array()
  # completeness and data strength
  cstats = ds.i_sigi_completeness_stats(i_obs)
  d_min_cut = cstats.resolution_cut
  assert approx_equal(d_min_cut, 2.150815)
  ws = ds.wilson_scaling(
    miller_array=i_obs,
    n_residues=120)
  # outliers - this shouldn't actually work, since it requires additional
  # processing steps on the input data
  try :
    outliers = ds.possible_outliers(i_obs)
  except AssertionError :
    pass
  else :
    raise Exception_expected
  ######################################################################
  # OVERALL ANALYSIS
  pdb_file = libtbx.env.find_in_repositories(
    relative_path="phenix_examples/p9-build/p9.pdb",
    test=os.path.isfile)
  f_calc = None
  if (pdb_file is not None) :
    pdb_in = file_reader.any_file(pdb_file).assert_file_type("pdb")
    hierarchy = pdb_in.file_object.hierarchy
    xrs = pdb_in.file_object.xray_structure_simple(
      crystal_symmetry=i_obs)
    f_calc = xrs.structure_factors(d_min=i_obs.d_min()).f_calc()
    f_calc = abs(f_calc).generate_bijvoet_mates()
    f_calc = f_calc.set_observation_type_xray_amplitude()
    i_obs, f_calc = i_obs.common_sets(other=f_calc)
    open("tmp_xtriage.pdb", "w").write(hierarchy.as_pdb_string(
      crystal_symmetry=i_obs))
    pdb_file = "tmp_xtriage.pdb"
  params = xtriage.master_params.extract()
  params.scaling.input.asu_contents.n_residues = 141
  result = xtriage.xtriage_analyses(
    miller_obs=i_obs,
    miller_calc=f_calc,
    params=params,
    unmerged_obs=i_obs_raw,
    text_out=open("logfile3.log", "w"))#sys.stdout)
  # XXX there appears to be some system-dependence here, hence sloppy limits
  assert (15.5 < result.aniso_b_min < 15.9)
  assert (10 < result.aniso_range_of_b < 11)
  # check relative Wilson
  if (pdb_file is not None) :
    assert (result.relative_wilson is not None)
    # FIXME
    #assert (result.relative_wilson.n_outliers() == 34)
  #show_pickled_object_sizes(result)
  test_pickle_consistency_and_size(result)
  # XXX PDB validation server
  assert approx_equal(result.iso_b_wilson, 18.33, eps=0.1)
  assert approx_equal(result.aniso_b_ratio, 0.546, eps=0.1)
  assert (result.number_of_wilson_outliers == 0)
  assert approx_equal(result.l_test_mean_l, 0.493, eps=0.1)
  assert approx_equal(result.l_test_mean_l_squared, 0.326, eps=0.1)
  assert approx_equal(result.i_over_sigma_outer_shell, 3.25, eps=0.1)
  assert ("No significant pseudotranslation is detected" in
          result.patterson_verdict)
  # test consistency of output after pickling and unpickling
  try :
    from phenix_dev.phenix_cloud import xtriage_json
  except ImportError :
    pass
  else :
    json_out = xtriage_json.json_output("p9.sca")
    result.show(out=json_out)
    open("xtriage.json", "w").write(json_out.export())
  # unmerged data
  assert result.merging_stats is not None
  out = StringIO()
  result.merging_stats.show(out=out)
  assert ("R-merge: 0.073" in out.getvalue())
  assert approx_equal(result.estimate_d_min(min_i_over_sigma=10), 1.9645,
    eps=0.001)
  # FIXME PDB doesn't actually have unit cell!
  # test detection of symmetry in reference file
  if (pdb_file is not None) :
    args = [hkl_file, pdb_file]
    result = xtriage.run(args=args, out=null_out())
예제 #7
0
def exercise_2():
    hkl_file = libtbx.env.find_in_repositories(
        relative_path="phenix_regression/wizards/data/p9_se_w2.sca",
        test=os.path.isfile)
    if (hkl_file is None):
        warnings.warn("phenix_regression not available, skipping test")
        return
    hkl_in = file_reader.any_file(hkl_file).assert_file_type("hkl")
    i_obs_raw = hkl_in.file_object.as_miller_arrays(
        merge_equivalents=False,
        crystal_symmetry=crystal.symmetry(space_group_symbol="I4",
                                          unit_cell=(113.949, 113.949, 32.474,
                                                     90, 90, 90)))[0]
    i_obs = i_obs_raw.merge_equivalents().array()
    # completeness and data strength
    cstats = ds.i_sigi_completeness_stats(i_obs)
    d_min_cut = cstats.resolution_cut
    assert approx_equal(d_min_cut, 2.150815)
    ws = ds.wilson_scaling(miller_array=i_obs, n_residues=120)
    # outliers - this shouldn't actually work, since it requires additional
    # processing steps on the input data
    try:
        outliers = ds.possible_outliers(i_obs)
    except AssertionError:
        pass
    else:
        raise Exception_expected
    ######################################################################
    # OVERALL ANALYSIS
    pdb_file = libtbx.env.find_in_repositories(
        relative_path="phenix_examples/p9-build/p9.pdb", test=os.path.isfile)
    f_calc = None
    if (pdb_file is not None):
        pdb_in = file_reader.any_file(pdb_file).assert_file_type("pdb")
        hierarchy = pdb_in.file_object.hierarchy
        xrs = pdb_in.file_object.xray_structure_simple(crystal_symmetry=i_obs)
        f_calc = xrs.structure_factors(d_min=i_obs.d_min()).f_calc()
        f_calc = abs(f_calc).generate_bijvoet_mates()
        f_calc = f_calc.set_observation_type_xray_amplitude()
        i_obs, f_calc = i_obs.common_sets(other=f_calc)
        open("tmp_xtriage.pdb",
             "w").write(hierarchy.as_pdb_string(crystal_symmetry=i_obs))
        pdb_file = "tmp_xtriage.pdb"
    params = xtriage.master_params.extract()
    params.scaling.input.asu_contents.n_residues = 141
    result = xtriage.xtriage_analyses(miller_obs=i_obs,
                                      miller_calc=f_calc,
                                      params=params,
                                      unmerged_obs=i_obs_raw,
                                      text_out=open("logfile3.log",
                                                    "w"))  #sys.stdout)
    # XXX there appears to be some system-dependence here, hence sloppy limits
    assert (15.5 < result.aniso_b_min < 15.9)
    assert (10 < result.aniso_range_of_b < 11)
    # check relative Wilson
    if (pdb_file is not None):
        assert (result.relative_wilson is not None)
        # FIXME
        #assert (result.relative_wilson.n_outliers() == 34)
    #show_pickled_object_sizes(result)
    test_pickle_consistency_and_size(result)
    # XXX PDB validation server
    assert approx_equal(result.iso_b_wilson, 18.33, eps=0.1)
    assert approx_equal(result.aniso_b_ratio, 0.546, eps=0.1)
    assert (result.number_of_wilson_outliers == 0)
    assert approx_equal(result.l_test_mean_l, 0.493, eps=0.1)
    assert approx_equal(result.l_test_mean_l_squared, 0.326, eps=0.1)
    assert approx_equal(result.i_over_sigma_outer_shell, 3.25, eps=0.1)
    assert approx_equal(result.overall_i_sig_i, 10.34, eps=0.1)
    assert approx_equal(
        result.anomalous_info.plan_sad_experiment_stats.get_overall(
            item="i_over_sigma_dict"),
        10.61,
        eps=0.1)
    assert approx_equal(
        result.anomalous_info.plan_sad_experiment_stats.get_overall(
            item="anom_signal_dict"),
        15.35,
        eps=0.1)
    assert ("No significant pseudotranslation is detected"
            in result.patterson_verdict)
    # test consistency of output after pickling and unpickling
    try:
        from phenix_dev.phenix_cloud import xtriage_json
    except ImportError:
        pass
    else:
        json_out = xtriage_json.json_output("p9.sca")
        result.show(out=json_out)
        open("xtriage.json", "w").write(json_out.export())
    # unmerged data
    assert result.merging_stats is not None
    out = StringIO()
    result.merging_stats.show(out=out)
    assert ("R-merge: 0.073" in out.getvalue())
    assert approx_equal(result.estimate_d_min(min_i_over_sigma=10),
                        1.9645,
                        eps=0.001)
    # FIXME PDB doesn't actually have unit cell!
    # test detection of symmetry in reference file
    if (pdb_file is not None):
        args = [hkl_file, pdb_file]
        result = xtriage.run(args=args, out=null_out())
예제 #8
0
파일: merge.py 프로젝트: hattne/dials
def merge_and_truncate(params, experiments, reflections):
    """Filter data, assess space group, run french wilson and Wilson stats."""

    logger.info("\nMerging scaled reflection data\n")
    # first filter bad reflections using dials.util.filter methods
    reflections = filter_reflection_table(
        reflections[0],
        intensity_choice=["scale"],
        d_min=params.d_min,
        combine_partials=params.combine_partials,
        partiality_threshold=params.partiality_threshold,
    )
    # ^ scale factor has been applied, so now set to 1.0 - okay as not
    # going to output scale factor in merged mtz.
    reflections["inverse_scale_factor"] = flex.double(reflections.size(), 1.0)

    scaled_array = scaled_data_as_miller_array([reflections], experiments)
    if params.anomalous:
        anomalous_scaled = scaled_array.as_anomalous_array()

    merged = scaled_array.merge_equivalents(
        use_internal_variance=params.merging.use_internal_variance).array()
    merged_anom = None
    if params.anomalous:
        merged_anom = anomalous_scaled.merge_equivalents(
            use_internal_variance=params.merging.use_internal_variance).array(
            )

    # Before merge, do some assessment of the space_group
    if params.assess_space_group:
        merged_reflections = flex.reflection_table()
        merged_reflections["intensity"] = merged.data()
        merged_reflections["variance"] = merged.sigmas()**2
        merged_reflections["miller_index"] = merged.indices()
        logger.info("Running systematic absences check")
        run_sys_abs_checks(experiments, merged_reflections)

    # Run the stats on truncating on anomalous or non anomalous?
    if params.anomalous:
        intensities = merged_anom
    else:
        intensities = merged

    assert intensities.is_xray_intensity_array()
    amplitudes = None
    anom_amplitudes = None
    if params.truncate:
        logger.info("\nScaling input intensities via French-Wilson Method")
        out = StringIO()
        if params.anomalous:
            anom_amplitudes = intensities.french_wilson(params=params, log=out)
            n_removed = intensities.size() - anom_amplitudes.size()
            assert anom_amplitudes.is_xray_amplitude_array()
            amplitudes = anom_amplitudes.as_non_anomalous_array()
            amplitudes = amplitudes.merge_equivalents().array()
        else:
            amplitudes = intensities.french_wilson(params=params, log=out)
            n_removed = intensities.size() - amplitudes.size()
        logger.info("Total number of rejected intensities %s", n_removed)
        logger.debug(out.getvalue())

    if params.reporting.wilson_stats:
        if not intensities.space_group().is_centric():
            wilson_scaling = data_statistics.wilson_scaling(
                miller_array=intensities,
                n_residues=params.n_residues)  # XXX default n_residues?
            # Divert output through logger - do with StringIO rather than
            # info_handle else get way too much whitespace in output.
            out = StringIO()
            wilson_scaling.show(out=out)
            logger.info(out.getvalue())

    # Apply wilson B to give absolute scale?

    # Show merging stats again.
    if params.reporting.merging_stats:
        stats, anom_stats = merging_stats_from_scaled_array(
            scaled_array, params.merging.n_bins,
            params.merging.use_internal_variance)
        if params.merging.anomalous:
            logger.info(make_merging_statistics_summary(anom_stats))
        else:
            logger.info(make_merging_statistics_summary(stats))

    return merged, merged_anom, amplitudes, anom_amplitudes
예제 #9
0
파일: best.py 프로젝트: BestMX/BestMX
def run(args):

    from dials.util.options import OptionParser
    from dials.util.options import flatten_experiments
    from dials.util.options import flatten_reflections
    import libtbx.load_env

    usage = "%s [options] experiments.json | integrated.pickle" % (
        libtbx.env.dispatcher_name)

    parser = OptionParser(usage=usage,
                          phil=phil_scope,
                          read_experiments=True,
                          read_reflections=True,
                          check_format=False,
                          epilog=help_message)

    params, options = parser.parse_args(show_diff_phil=True)
    experiments = flatten_experiments(params.input.experiments)
    reflections = flatten_reflections(params.input.reflections)

    if len(experiments) == 0 or len(reflections) == 0:
        parser.print_help()
        exit()
    reflections = reflections[0]
    cryst = experiments.crystals()[0]
    unit_cell = cryst.get_unit_cell()
    if params.space_group is not None:
        space_group = params.space_group.group()
        assert space_group.is_compatible_unit_cell(unit_cell), unit_cell
    else:
        space_group = cryst.get_space_group()
    print space_group.info()
    print unit_cell

    expt = experiments[0]
    from cctbx import miller, crystal
    from mmtbx.scaling.data_statistics import wilson_scaling
    sel = reflections.get_flags(reflections.flags.integrated_sum)
    reflections = reflections.select(sel)
    cs = crystal.symmetry(unit_cell=unit_cell, space_group=space_group)
    ms = miller.set(cs,
                    indices=reflections['miller_index'],
                    anomalous_flag=True)
    intensities = miller.array(ms,
                               data=reflections['intensity.sum.value'],
                               sigmas=flex.sqrt(
                                   reflections['intensity.sum.variance']))
    intensities.set_observation_type_xray_intensity()
    d_star_sq = intensities.d_star_sq().data()
    n_bins = 20
    #  binner = intensities.setup_binner_d_star_sq_step(
    #    d_star_sq_step=(flex.max(d_star_sq)-flex.min(d_star_sq)+1e-8)/n_bins)
    binner = intensities.setup_binner_counting_sorted(n_bins=n_bins)
    # wilson = intensities.wilson_plot(use_binning=True)
    # wilson.show()
    # from matplotlib import pyplot
    # pyplot.figure()
    # pyplot.scatter(wilson.binner.bin_centers(2), wilson.data[1:-1])
    # pyplot.show()
    intensities = intensities.merge_equivalents().array()
    wilson = wilson_scaling(intensities, n_residues=200)
    wilson.iso_scale_and_b.show()

    from matplotlib import pyplot
    pyplot.figure()
    pyplot.scatter(wilson.d_star_sq, wilson.mean_I_obs_data, label='Data')
    pyplot.plot(wilson.d_star_sq, wilson.mean_I_obs_theory, label='theory')
    pyplot.plot(wilson.d_star_sq,
                wilson.mean_I_normalisation,
                label='smoothed')
    pyplot.yscale('log')
    pyplot.legend()
    pyplot.show()

    import copy
    import math
    # Hack to make the predicter predict reflections outside of the range
    # of the scan
    expt_input = copy.deepcopy(expt)
    scan = expt.scan
    image_range = scan.get_image_range()
    oscillation = scan.get_oscillation()
    scan.set_image_range((1, int(math.ceil(360 / oscillation[1]))))
    scan.set_oscillation((0, oscillation[1]))
    print scan

    # Populate the reflection table with predictions
    predicted = flex.reflection_table.from_predictions(
        expt, force_static=params.force_static, dmin=params.d_min)
    predicted['id'] = flex.int(len(predicted), 0)

    print len(predicted)

    space_group = space_group.build_derived_reflection_intensity_group(
        anomalous_flag=True)
    cs = crystal.symmetry(unit_cell=unit_cell, space_group=space_group)

    ms = miller.set(cs, indices=predicted['miller_index'], anomalous_flag=True)
    ma = miller.array(ms,
                      data=flex.double(ms.size(), 1),
                      sigmas=flex.double(ms.size(), 1))

    d_star_sq = ma.d_star_sq().data()
    n_bins = 1
    binner = ma.setup_binner_d_star_sq_step(
        d_star_sq_step=(flex.max(d_star_sq) - flex.min(d_star_sq) + 1e-8) /
        n_bins)
    image_number = predicted['xyzcal.px'].parts()[2]
    print flex.min(image_number)
    print flex.max(image_number)
    #dose = flex.size_t(list(flex.floor(image_number).iround()))
    angle_deg = predicted['xyzcal.mm'].parts()[2] * 180 / math.pi
    dose = flex.size_t(list(flex.floor(angle_deg).iround()))
    range_width = 1
    range_min = flex.min(dose) - range_width
    range_max = flex.max(dose)
    n_steps = 2 + int((range_max - range_min) - range_width)

    binner_non_anom = ma.as_non_anomalous_array().use_binning(binner)
    n_complete = flex.size_t(binner_non_anom.counts_complete()[1:-1])

    ranges_dict = {}
    completeness_levels = [10, 20, 30, 40, 50, 60, 70, 80, 90, 95, 99]
    for c in completeness_levels:
        ranges_dict[c] = []
    from xia2.Modules.PyChef2 import ChefStatistics
    step = 1
    for i in range(0, 360, step):
        sel = dose < step
        dose.set_selected(sel, dose.select(sel) + 360)
        dose -= flex.min(dose)
        chef_stats = ChefStatistics(ma.indices(), ma.data(), ma.sigmas(),
                                    ma.d_star_sq().data(), dose, n_complete,
                                    binner, ma.space_group(),
                                    ma.anomalous_flag(), n_steps)

        ieither_completeness = chef_stats.ieither_completeness()
        iboth_completeness = chef_stats.iboth_completeness()

        for c in completeness_levels:
            ranges_dict[c].append(
                min((ieither_completeness > (c / 100)).iselection()))

    from matplotlib import pyplot
    pyplot.figure()
    for c in completeness_levels:
        pyplot.plot(ranges_dict[c], label=str(c))


#  pyplot.plot(range_for_50)
#  pyplot.plot(range_for_99)

#  pyplot.scatter(range(iboth_completeness.size()), iboth_completeness)
    pyplot.legend()
    pyplot.show()
예제 #10
0
파일: report.py 프로젝트: hainm/xia2
def run(args):
  from iotbx.reflection_file_reader import any_reflection_file

  from xia2.Modules.Analysis import phil_scope
  interp = phil_scope.command_line_argument_interpreter()
  params, unhandled = interp.process_and_fetch(
    args, custom_processor='collect_remaining')
  params = params.extract()
  n_bins = params.resolution_bins

  args = unhandled

  intensities = None
  batches = None
  scales = None
  dose = None

  reader = any_reflection_file(args[0])
  assert reader.file_type() == 'ccp4_mtz'
  arrays = reader.as_miller_arrays(merge_equivalents=False)
  for ma in arrays:
    if ma.info().labels == ['BATCH']:
      batches = ma
    elif ma.info().labels == ['DOSE']:
      dose = ma
    elif ma.info().labels == ['I', 'SIGI']:
      intensities = ma
    elif ma.info().labels == ['I(+)', 'SIGI(+)', 'I(-)', 'SIGI(-)']:
      intensities = ma
    elif ma.info().labels == ['SCALEUSED']:
      scales = ma

  assert intensities is not None
  assert batches is not None
  mtz_object = reader.file_content()

  indices = mtz_object.extract_original_index_miller_indices()
  intensities = intensities.customized_copy(
    indices=indices, info=intensities.info())
  batches = batches.customized_copy(indices=indices, info=batches.info())

  from iotbx import merging_statistics
  merging_stats = merging_statistics.dataset_statistics(
    intensities, n_bins=n_bins)

  merging_acentric = intensities.select_acentric().merge_equivalents()
  merging_centric = intensities.select_centric().merge_equivalents()

  multiplicities_acentric = {}
  multiplicities_centric = {}

  for x in sorted(set(merging_acentric.redundancies().data())):
    multiplicities_acentric[x] = merging_acentric.redundancies().data().count(x)
  for x in sorted(set(merging_centric.redundancies().data())):
    multiplicities_centric[x] = merging_centric.redundancies().data().count(x)

  headers = [u'Resolution (Å)', 'N(obs)', 'N(unique)', 'Multiplicity', 'Completeness',
             'Mean(I)', 'Mean(I/sigma)', 'Rmerge', 'Rmeas', 'Rpim', 'CC1/2', 'CCano']
  rows = []
  for bin_stats in merging_stats.bins:
    row = ['%.2f - %.2f' %(bin_stats.d_max, bin_stats.d_min),
           bin_stats.n_obs, bin_stats.n_uniq, '%.2f' %bin_stats.mean_redundancy,
           '%.2f' %(100*bin_stats.completeness), '%.1f' %bin_stats.i_mean,
           '%.1f' %bin_stats.i_over_sigma_mean, '%.3f' %bin_stats.r_merge,
           '%.3f' %bin_stats.r_meas, '%.3f' %bin_stats.r_pim,
           '%.3f' %bin_stats.cc_one_half, '%.3f' %bin_stats.cc_anom]
    rows.append(row)

  from xia2.lib.tabulate import tabulate
  merging_stats_table_html = tabulate(rows, headers, tablefmt='html')
  merging_stats_table_html = merging_stats_table_html.replace(
    '<table>', '<table class="table table-hover table-condensed">')

  unit_cell_params = intensities.unit_cell().parameters()

  headers = ['', 'Overall', 'Low resolution', 'High resolution']

  stats = (merging_stats.overall, merging_stats.bins[0], merging_stats.bins[-1])

  rows = [
    [u'Resolution (Å)'] + [
      '%.2f - %.2f' %(s.d_max, s.d_min) for s in stats],
    ['Observations'] + ['%i' %s.n_obs for s in stats],
    ['Unique reflections'] + ['%i' %s.n_uniq for s in stats],
    ['Multiplicity'] + ['%.1f' %s.mean_redundancy for s in stats],
    ['Completeness'] + ['%.2f%%' %(s.completeness * 100) for s in stats],
    #['Mean intensity'] + ['%.1f' %s.i_mean for s in stats],
    ['Mean I/sigma(I)'] + ['%.1f' %s.i_over_sigma_mean for s in stats],
    ['Rmerge'] + ['%.3f' %s.r_merge for s in stats],
    ['Rmeas'] + ['%.3f' %s.r_meas for s in stats],
    ['Rpim'] + ['%.3f' %s.r_pim for s in stats],
    ['CC1/2'] + ['%.3f' %s.cc_one_half for s in stats],
  ]
  rows = [[u'<strong>%s</strong>' %r[0]] + r[1:] for r in rows]

  overall_stats_table_html = tabulate(rows, headers, tablefmt='html')
  overall_stats_table_html = overall_stats_table_html.replace(
    '<table>', '<table class="table table-hover table-condensed">')

  #headers = ['Crystal symmetry', '']
  #rows = [
    #[u'Unit cell: a (Å)', '%.3f' %unit_cell_params[0]],
    #[u'b (Å)', '%.3f' %unit_cell_params[1]],
    #[u'c (Å)', '%.3f' %unit_cell_params[2]],
    #[u'α (°)', '%.3f' %unit_cell_params[3]],
    #[u'β (°)', '%.3f' %unit_cell_params[4]],
    #[u'γ (°)', '%.3f' %unit_cell_params[5]],
    #['Space group', intensities.space_group_info().symbol_and_number()],
  #]

  #symmetry_table_html = tabulate(rows, headers, tablefmt='html')
  symmetry_table_html = """
  <p>
    <b>Filename:</b> %s
    <br>
    <b>Unit cell:</b> %s
    <br>
    <b>Space group:</b> %s
  </p>
""" %(os.path.abspath(reader.file_name()),
      intensities.space_group_info().symbol_and_number(),
      str(intensities.unit_cell()))

  if params.anomalous:
    intensities = intensities.as_anomalous_array()
    batches = batches.as_anomalous_array()


  from xia2.Modules.PyChef2.PyChef import remove_batch_gaps
  new_batch_data = remove_batch_gaps(batches.data())
  new_batches = batches.customized_copy(data=new_batch_data)
  sc_vs_b = scales_vs_batch(scales, new_batches)
  rmerge_vs_b = rmerge_vs_batch(intensities, new_batches)

  intensities.setup_binner(n_bins=n_bins)

  merged_intensities = intensities.merge_equivalents().array()
  from mmtbx.scaling import twin_analyses
  normalised_intensities = twin_analyses.wilson_normalised_intensities(
    miller_array=merged_intensities)
  nz_test = twin_analyses.n_z_test(
    normalised_acentric=normalised_intensities.acentric,
    normalised_centric=normalised_intensities.centric)

  from mmtbx.scaling import data_statistics
  if not intensities.space_group().is_centric():
    wilson_scaling = data_statistics.wilson_scaling(
      miller_array=merged_intensities, n_residues=200) # XXX default n_residues?

  acentric = intensities.select_acentric()
  centric = intensities.select_centric()
  if acentric.size():
    acentric.setup_binner(n_bins=n_bins)
    second_moments_acentric = acentric.second_moment_of_intensities(use_binning=True)
  if centric.size():
    centric.setup_binner(n_bins=n_bins)
    second_moments_centric = centric.second_moment_of_intensities(use_binning=True)

  d_star_sq_bins = [
    (1/bin_stats.d_min**2) for bin_stats in merging_stats.bins]
  i_over_sig_i_bins = [
    bin_stats.i_over_sigma_mean for bin_stats in merging_stats.bins]
  cc_one_half_bins = [
    bin_stats.cc_one_half for bin_stats in merging_stats.bins]
  cc_anom_bins = [
    bin_stats.cc_anom for bin_stats in merging_stats.bins]

  from xia2.Modules.PyChef2 import PyChef
  if params.chef_min_completeness:
    d_min = PyChef.resolution_limit(
      mtz_file=args[0], min_completeness=params.chef_min_completeness, n_bins=8)
    print 'Estimated d_min for CHEF analysis: %.2f' %d_min
    sel = flex.bool(intensities.size(), True)
    d_spacings = intensities.d_spacings().data()
    sel &= d_spacings >= d_min
    intensities = intensities.select(sel)
    batches = batches.select(sel)
    if dose is not None:
      dose = dose.select(sel)

  if dose is None:
    dose = PyChef.batches_to_dose(batches.data(), params.dose)
  else:
    dose = dose.data()
  pychef_stats = PyChef.Statistics(intensities, dose)

  pychef_dict = pychef_stats.to_dict()

  def d_star_sq_to_d_ticks(d_star_sq, nticks):
    from cctbx import uctbx
    d_spacings = uctbx.d_star_sq_as_d(flex.double(d_star_sq))
    min_d_star_sq = min(d_star_sq)
    dstep = (max(d_star_sq) - min_d_star_sq)/nticks
    tickvals = list(min_d_star_sq + (i*dstep) for i in range(nticks))
    ticktext = ['%.2f' %(uctbx.d_star_sq_as_d(dsq)) for dsq in tickvals]
    return tickvals, ticktext

  tickvals, ticktext = d_star_sq_to_d_ticks(d_star_sq_bins, nticks=5)
  tickvals_wilson, ticktext_wilson = d_star_sq_to_d_ticks(
    wilson_scaling.d_star_sq, nticks=5)
  second_moment_d_star_sq = []
  if acentric.size():
    second_moment_d_star_sq.extend(second_moments_acentric.binner.bin_centers(2))
  if centric.size():
    second_moment_d_star_sq.extend(second_moments_centric.binner.bin_centers(2))
  tickvals_2nd_moment, ticktext_2nd_moment = d_star_sq_to_d_ticks(
    second_moment_d_star_sq, nticks=5)

  json_data = {

    'multiplicities': {
      'data': [
        {
          'x': multiplicities_acentric.keys(),
          'y': multiplicities_acentric.values(),
          'type': 'bar',
          'name': 'Acentric',
          'opacity': 0.75,
        },
        {
          'x': multiplicities_centric.keys(),
          'y': multiplicities_centric.values(),
          'type': 'bar',
          'name': 'Centric',
          'opacity': 0.75,
        },
      ],
      'layout': {
        'title': 'Distribution of multiplicities',
        'xaxis': {'title': 'Multiplicity'},
        'yaxis': {
          'title': 'Frequency',
          #'rangemode': 'tozero'
        },
        'bargap': 0,
        'barmode': 'overlay',
      },
    },

    'scale_rmerge_vs_batch': {
      'data': [
        {
          'x': sc_vs_b.batches,
          'y': sc_vs_b.data,
          'type': 'scatter',
          'name': 'Scale',
          'opacity': 0.75,
        },
        {
          'x': rmerge_vs_b.batches,
          'y': rmerge_vs_b.data,
          'yaxis': 'y2',
          'type': 'scatter',
          'name': 'Rmerge',
          'opacity': 0.75,
        },
      ],
      'layout': {
        'title': 'Scale and Rmerge vs batch',
        'xaxis': {'title': 'N'},
        'yaxis': {
          'title': 'Scale',
          'rangemode': 'tozero'
        },
        'yaxis2': {
          'title': 'Rmerge',
          'overlaying': 'y',
          'side': 'right',
          'rangemode': 'tozero'
        }
      },
    },

    'cc_one_half': {
      'data': [
        {
          'x': d_star_sq_bins, # d_star_sq
          'y': cc_one_half_bins,
          'type': 'scatter',
          'name': 'CC-half',
        },
        ({
          'x': d_star_sq_bins, # d_star_sq
          'y': cc_anom_bins,
          'type': 'scatter',
          'name': 'CC-anom',
        } if not intensities.space_group().is_centric() else {}),
      ],
      'layout':{
        'title': 'CC-half vs resolution',
        'xaxis': {
          'title': u'Resolution (Å)',
          'tickvals': tickvals,
          'ticktext': ticktext,
        },
        'yaxis': {
          'title': 'CC-half',
          'range': [min(cc_one_half_bins + cc_anom_bins + [0]), 1]
          },
        },
    },

    'i_over_sig_i': {
      'data': [{
        'x': d_star_sq_bins, # d_star_sq
        'y': i_over_sig_i_bins,
        'type': 'scatter',
        'name': 'Scales vs batch',
      }],
      'layout': {
        'title': '<I/sig(I)> vs resolution',
        'xaxis': {
          'title': u'Resolution (Å)',
          'tickvals': tickvals,
          'ticktext': ticktext,
        },
        'yaxis': {
          'title': '<I/sig(I)>',
          'rangemode': 'tozero'
        },
      }
    },

    'second_moments': {
      'data': [
        ({
          'x': list(second_moments_acentric.binner.bin_centers(2)), # d_star_sq
          'y': second_moments_acentric.data[1:-1],
          'type': 'scatter',
          'name': '<I^2> acentric',
        } if acentric.size() else {}),
        ({
          'x': list(second_moments_centric.binner.bin_centers(2)), # d_star_sq
          'y': second_moments_centric.data[1:-1],
          'type': 'scatter',
          'name': '<I^2> centric',
          } if centric.size() else {})
      ],
      'layout': {
        'title': 'Second moment of I',
        'xaxis': {
          'title': u'Resolution (Å)',
          'tickvals': tickvals_2nd_moment,
          'ticktext': ticktext_2nd_moment,
        },
        'yaxis': {
          'title': '<I^2>',
          'rangemode': 'tozero'
        },
      }
    },

    'cumulative_intensity_distribution': {
      'data': [
        {
          'x': list(nz_test.z),
          'y': list(nz_test.ac_obs),
          'type': 'scatter',
          'name': 'Acentric observed',
          'mode': 'lines',
          'line': {
            'color': 'rgb(31, 119, 180)',
          },
        },
        {
          'x': list(nz_test.z),
          'y': list(nz_test.c_obs),
          'type': 'scatter',
          'name': 'Centric observed',
          'mode': 'lines',
          'line': {
            'color': 'rgb(255, 127, 14)',
          },
        },
        {
          'x': list(nz_test.z),
          'y': list(nz_test.ac_untwinned),
          'type': 'scatter',
          'name': 'Acentric theory',
          'mode': 'lines',
          'line': {
            'color': 'rgb(31, 119, 180)',
            'dash': 'dot',
          },
          'opacity': 0.8,
        },
        {
          'x': list(nz_test.z),
          'y': list(nz_test.c_untwinned),
          'type': 'scatter',
          'name': 'Centric theory',
          'mode': 'lines',
          'line': {
            'color': 'rgb(255, 127, 14)',
            'dash': 'dot',
          },
          'opacity': 0.8,
        },
      ],
      'layout': {
        'title': 'Cumulative intensity distribution',
        'xaxis': {'title': 'z'},
        'yaxis': {
          'title': 'P(Z <= Z)',
          'rangemode': 'tozero'
        },
      }
    },

    'wilson_intensity_plot': {
      'data': ([
        {
          'x': list(wilson_scaling.d_star_sq),
          'y': list(wilson_scaling.mean_I_obs_data),
          'type': 'scatter',
          'name': 'Observed',
        },
        {
          'x': list(wilson_scaling.d_star_sq),
          'y': list(wilson_scaling.mean_I_obs_theory),
          'type': 'scatter',
          'name': 'Expected',
        },
        {
          'x': list(wilson_scaling.d_star_sq),
          'y': list(wilson_scaling.mean_I_normalisation),
          'type': 'scatter',
          'name': 'Smoothed',
        }] if not intensities.space_group().is_centric() else []),
      'layout': {
        'title': 'Wilson intensity plot',
        'xaxis': {
          'title': u'Resolution (Å)',
          'tickvals': tickvals_wilson,
          'ticktext': ticktext_wilson,
        },
        'yaxis': {
          'type': 'log',
          'title': 'Mean(I)',
          'rangemode': 'tozero',
        },
      },
    },
  }

  json_data.update(pychef_dict)

  from dials.report import html_report
  report = html_report.html_report()

  page_header = html_report.page_header('xia2 report')
  report.add_content(page_header)

  overall_panel = html_report.panel('Overall', 'overall', show=True)
  overall_table = html_report.table_responsive(
    overall_stats_table_html, width=800)
  overall_panel.add_content(overall_table)

  merging_stats_panel = html_report.panel('Resolution shells', 'merging_stats')
  merging_stats_table = html_report.table_responsive(merging_stats_table_html)
  merging_stats_panel.add_content(merging_stats_table)

  merging_stats_panel_group = html_report.panel_group(
    [overall_panel, merging_stats_panel])
  div = html_report.div()
  div.add_content(html_report.raw_html('<h2>Merging statistics</h2>'))
  div.add_content(html_report.raw_html(symmetry_table_html))
  div.add_content(merging_stats_panel_group)
  report.add_content(div)

  resolution_plots_panel = html_report.panel('Analysis by resolution', 'resolution')
  for graph in ('cc_one_half', 'i_over_sig_i', 'second_moments',
                'wilson_intensity_plot'):
    resolution_plots_panel.add_content(html_report.plotly_graph(
      json_data[graph], graph))

  batch_plots_panel = html_report.panel('Analysis by batch', 'batch')
  for graph in ('scale_rmerge_vs_batch', 'completeness_vs_dose',
                'rcp_vs_dose', 'scp_vs_dose', 'rd_vs_batch_difference'):
    batch_plots_panel.add_content(html_report.plotly_graph(
      json_data[graph], graph))

  misc_plots_panel = html_report.panel('Miscellaneous', 'misc')
  for graph in ('multiplicities', 'cumulative_intensity_distribution'):
    misc_plots_panel.add_content(html_report.plotly_graph(
      json_data[graph], graph))

  analysis_plots_panel_group = html_report.panel_group(
    [resolution_plots_panel, batch_plots_panel, misc_plots_panel])
  div = html_report.div()
  div.add_content(html_report.raw_html('<h2>Analysis plots</h2>'))
  div.add_content(analysis_plots_panel_group)
  report.add_content(div)

  html = report.html()

  import json
  json_str = json.dumps(json_data)
  with open('xia2-report.json', 'wb') as f:
    print >> f, json_str

  with open('xia2-report.html', 'wb') as f:
    print >> f, html.encode('ascii', 'xmlcharrefreplace')

  return
예제 #11
0
파일: wilson_stuff.py 프로젝트: xia2/xia2
from __future__ import division
from mmtbx.scaling import data_statistics
from iotbx import mtz
import sys

m = mtz.object(sys.argv[1])
mas = m.as_miller_arrays()

data = None

for ma in mas:
  if ma.is_xray_intensity_array():
    data = ma
    break

def nres_from_mtz(m):
  sg = m.space_group()
  uc = m.crystals()[0].unit_cell()
  n_ops = len(sg.all_ops())
  v_asu = uc.volume() / n_ops
  return v_asu / (2.7 * 128)

n_res = nres_from_mtz(m)

wilson_scaling = data_statistics.wilson_scaling(miller_array=data,
                                                n_residues=n_res)
wilson_scaling.show()