def show_wilson_scaling_analysis(merged_intensities, n_residues=200): """ Report the wilson statistics for a merged intensity array Args: merged_intensities: A merged miller intensity array. n_residues: The number of residues to use for the wilson analysis. """ if not merged_intensities.space_group().is_centric(): try: wilson_scaling = data_statistics.wilson_scaling( miller_array=merged_intensities, n_residues=n_residues) except (IndexError, RuntimeError) as e: logger.error( "\n" "Error encountered during Wilson statistics calculation:\n" "Perhaps there are too few unique reflections.\n" "%s", e, exc_info=True, ) else: # Divert output through logger - do with StringIO rather than # info_handle else get way too much whitespace in output. out = StringIO() wilson_scaling.show(out=out) logger.info(out.getvalue())
def do_french_wilson(mtz_file, hklout, anomalous=False): logger.debug("Reading reflections from %s", mtz_file) result = any_reflection_file(mtz_file) assert result.file_type() == "ccp4_mtz" mtz_object = result.file_content() output = StringIO() mtz_object.show_summary(out=output) for ma in result.as_miller_arrays(merge_equivalents=False): if anomalous and ma.info().labels == [ "I(+)", "SIGI(+)", "I(-)", "SIGI(-)", ]: assert ma.anomalous_flag() intensities = ma.merge_equivalents().array() # XXX why is this necessary? elif ma.info().labels == ["IMEAN", "SIGIMEAN"]: assert not ma.anomalous_flag() intensities = ma else: intensities = None if intensities: assert intensities.is_xray_intensity_array() amplitudes = intensities.french_wilson(log=output) assert amplitudes.is_xray_amplitude_array() dano = None if amplitudes.anomalous_flag(): dano = amplitudes.anomalous_differences() if not intensities.space_group().is_centric(): merged_intensities = intensities.merge_equivalents().array() wilson_scaling = data_statistics.wilson_scaling( miller_array=merged_intensities, n_residues=200 ) # XXX default n_residues? wilson_scaling.show(out=output) mtz_dataset = mtz_object.crystals()[1].datasets()[0] mtz_dataset.add_miller_array(amplitudes, column_root_label="F") if dano is not None: mtz_dataset.add_miller_array( dano, column_root_label="DANO", column_types="DQ" ) mtz_object.add_history("cctbx.french_wilson analysis") mtz_object.show_summary(out=output) logger.debug("Writing reflections to %s", hklout) mtz_object.write(hklout) return output.getvalue()
def __init__(self, mtz_file, params=None): print("Reading reflections from %s" % mtz_file) from iotbx.reflection_file_reader import any_reflection_file result = any_reflection_file(mtz_file) assert result.file_type() == "ccp4_mtz" mtz_object = result.file_content() mtz_object.show_summary() intensities = None for ma in result.as_miller_arrays(merge_equivalents=False): if params.anomalous and ma.info().labels == [ "I(+)", "SIGI(+)", "I(-)", "SIGI(-)", ]: assert ma.anomalous_flag() intensities = (ma.merge_equivalents().array() ) # XXX why is this necessary? elif not params.anomalous and ma.info().labels == [ "IMEAN", "SIGIMEAN" ]: assert not ma.anomalous_flag() intensities = ma assert intensities.is_xray_intensity_array() amplitudes = intensities.french_wilson(params=params) assert amplitudes.is_xray_amplitude_array() from mmtbx.scaling import data_statistics if not intensities.space_group().is_centric(): merged_intensities = intensities.merge_equivalents().array() wilson_scaling = data_statistics.wilson_scaling( miller_array=merged_intensities, n_residues=200) # XXX default n_residues? wilson_scaling.show() print() mtz_dataset = mtz_object.crystals()[1].datasets()[0] mtz_dataset.add_miller_array(amplitudes, column_root_label="F") mtz_object.add_history("cctbx.french_wilson analysis") print("Writing reflections to %s" % (params.hklout)) mtz_object.show_summary() mtz_object.write(params.hklout)
def __init__(self, mtz_file, params=None): print 'Reading reflections from %s' %mtz_file from iotbx.reflection_file_reader import any_reflection_file result = any_reflection_file(mtz_file) assert result.file_type() == 'ccp4_mtz' mtz_object = result.file_content() mtz_object.show_summary() intensities = None for ma in result.as_miller_arrays(merge_equivalents=False): if (params.anomalous and ma.info().labels == ['I(+)', 'SIGI(+)', 'I(-)', 'SIGI(-)']): assert ma.anomalous_flag() intensities = ma.merge_equivalents().array() # XXX why is this necessary? elif (not params.anomalous and ma.info().labels == ['IMEAN', 'SIGIMEAN']): assert not ma.anomalous_flag() intensities = ma assert intensities.is_xray_intensity_array() amplitudes = intensities.french_wilson(params=params) assert amplitudes.is_xray_amplitude_array() from mmtbx.scaling import data_statistics if not intensities.space_group().is_centric(): merged_intensities = intensities.merge_equivalents().array() wilson_scaling = data_statistics.wilson_scaling( miller_array=merged_intensities, n_residues=200) # XXX default n_residues? wilson_scaling.show() print mtz_dataset = mtz_object.crystals()[1].datasets()[0] mtz_dataset.add_miller_array(amplitudes, column_root_label='F') mtz_object.add_history('cctbx.french_wilson analysis') print 'Writing reflections to %s' %(params.hklout) mtz_object.show_summary() mtz_object.write(params.hklout)
import sys from iotbx import mtz from mmtbx.scaling import data_statistics m = mtz.object(sys.argv[1]) mas = m.as_miller_arrays() data = None for ma in mas: if ma.is_xray_intensity_array(): data = ma break def nres_from_mtz(m): sg = m.space_group() uc = m.crystals()[0].unit_cell() n_ops = len(sg.all_ops()) v_asu = uc.volume() / n_ops return v_asu / (2.7 * 128) n_res = nres_from_mtz(m) wilson_scaling = data_statistics.wilson_scaling(miller_array=data, n_residues=n_res) wilson_scaling.show()
def exercise_2 () : hkl_file = libtbx.env.find_in_repositories( relative_path="phenix_regression/wizards/p9_se_w2.sca", test=os.path.isfile) if (hkl_file is None) : warnings.warn("phenix_regression not available, skipping test") return hkl_in = file_reader.any_file(hkl_file).assert_file_type("hkl") i_obs_raw = hkl_in.file_object.as_miller_arrays( merge_equivalents=False, crystal_symmetry=crystal.symmetry( space_group_symbol="I4", unit_cell=(113.949,113.949,32.474,90,90,90)))[0] i_obs = i_obs_raw.merge_equivalents().array() # completeness and data strength cstats = ds.i_sigi_completeness_stats(i_obs) d_min_cut = cstats.resolution_cut assert approx_equal(d_min_cut, 2.150815) ws = ds.wilson_scaling( miller_array=i_obs, n_residues=120) # outliers - this shouldn't actually work, since it requires additional # processing steps on the input data try : outliers = ds.possible_outliers(i_obs) except AssertionError : pass else : raise Exception_expected ###################################################################### # OVERALL ANALYSIS pdb_file = libtbx.env.find_in_repositories( relative_path="phenix_examples/p9-build/p9.pdb", test=os.path.isfile) f_calc = None if (pdb_file is not None) : pdb_in = file_reader.any_file(pdb_file).assert_file_type("pdb") hierarchy = pdb_in.file_object.hierarchy xrs = pdb_in.file_object.xray_structure_simple( crystal_symmetry=i_obs) f_calc = xrs.structure_factors(d_min=i_obs.d_min()).f_calc() f_calc = abs(f_calc).generate_bijvoet_mates() f_calc = f_calc.set_observation_type_xray_amplitude() i_obs, f_calc = i_obs.common_sets(other=f_calc) open("tmp_xtriage.pdb", "w").write(hierarchy.as_pdb_string( crystal_symmetry=i_obs)) pdb_file = "tmp_xtriage.pdb" params = xtriage.master_params.extract() params.scaling.input.asu_contents.n_residues = 141 result = xtriage.xtriage_analyses( miller_obs=i_obs, miller_calc=f_calc, params=params, unmerged_obs=i_obs_raw, text_out=open("logfile3.log", "w"))#sys.stdout) # XXX there appears to be some system-dependence here, hence sloppy limits assert (15.5 < result.aniso_b_min < 15.9) assert (10 < result.aniso_range_of_b < 11) # check relative Wilson if (pdb_file is not None) : assert (result.relative_wilson is not None) # FIXME #assert (result.relative_wilson.n_outliers() == 34) #show_pickled_object_sizes(result) test_pickle_consistency_and_size(result) # XXX PDB validation server assert approx_equal(result.iso_b_wilson, 18.33, eps=0.1) assert approx_equal(result.aniso_b_ratio, 0.546, eps=0.1) assert (result.number_of_wilson_outliers == 0) assert approx_equal(result.l_test_mean_l, 0.493, eps=0.1) assert approx_equal(result.l_test_mean_l_squared, 0.326, eps=0.1) assert approx_equal(result.i_over_sigma_outer_shell, 3.25, eps=0.1) assert ("No significant pseudotranslation is detected" in result.patterson_verdict) # test consistency of output after pickling and unpickling try : from phenix_dev.phenix_cloud import xtriage_json except ImportError : pass else : json_out = xtriage_json.json_output("p9.sca") result.show(out=json_out) open("xtriage.json", "w").write(json_out.export()) # unmerged data assert result.merging_stats is not None out = StringIO() result.merging_stats.show(out=out) assert ("R-merge: 0.073" in out.getvalue()) assert approx_equal(result.estimate_d_min(min_i_over_sigma=10), 1.9645, eps=0.001) # FIXME PDB doesn't actually have unit cell! # test detection of symmetry in reference file if (pdb_file is not None) : args = [hkl_file, pdb_file] result = xtriage.run(args=args, out=null_out())
def exercise_2(): hkl_file = libtbx.env.find_in_repositories( relative_path="phenix_regression/wizards/data/p9_se_w2.sca", test=os.path.isfile) if (hkl_file is None): warnings.warn("phenix_regression not available, skipping test") return hkl_in = file_reader.any_file(hkl_file).assert_file_type("hkl") i_obs_raw = hkl_in.file_object.as_miller_arrays( merge_equivalents=False, crystal_symmetry=crystal.symmetry(space_group_symbol="I4", unit_cell=(113.949, 113.949, 32.474, 90, 90, 90)))[0] i_obs = i_obs_raw.merge_equivalents().array() # completeness and data strength cstats = ds.i_sigi_completeness_stats(i_obs) d_min_cut = cstats.resolution_cut assert approx_equal(d_min_cut, 2.150815) ws = ds.wilson_scaling(miller_array=i_obs, n_residues=120) # outliers - this shouldn't actually work, since it requires additional # processing steps on the input data try: outliers = ds.possible_outliers(i_obs) except AssertionError: pass else: raise Exception_expected ###################################################################### # OVERALL ANALYSIS pdb_file = libtbx.env.find_in_repositories( relative_path="phenix_examples/p9-build/p9.pdb", test=os.path.isfile) f_calc = None if (pdb_file is not None): pdb_in = file_reader.any_file(pdb_file).assert_file_type("pdb") hierarchy = pdb_in.file_object.hierarchy xrs = pdb_in.file_object.xray_structure_simple(crystal_symmetry=i_obs) f_calc = xrs.structure_factors(d_min=i_obs.d_min()).f_calc() f_calc = abs(f_calc).generate_bijvoet_mates() f_calc = f_calc.set_observation_type_xray_amplitude() i_obs, f_calc = i_obs.common_sets(other=f_calc) open("tmp_xtriage.pdb", "w").write(hierarchy.as_pdb_string(crystal_symmetry=i_obs)) pdb_file = "tmp_xtriage.pdb" params = xtriage.master_params.extract() params.scaling.input.asu_contents.n_residues = 141 result = xtriage.xtriage_analyses(miller_obs=i_obs, miller_calc=f_calc, params=params, unmerged_obs=i_obs_raw, text_out=open("logfile3.log", "w")) #sys.stdout) # XXX there appears to be some system-dependence here, hence sloppy limits assert (15.5 < result.aniso_b_min < 15.9) assert (10 < result.aniso_range_of_b < 11) # check relative Wilson if (pdb_file is not None): assert (result.relative_wilson is not None) # FIXME #assert (result.relative_wilson.n_outliers() == 34) #show_pickled_object_sizes(result) test_pickle_consistency_and_size(result) # XXX PDB validation server assert approx_equal(result.iso_b_wilson, 18.33, eps=0.1) assert approx_equal(result.aniso_b_ratio, 0.546, eps=0.1) assert (result.number_of_wilson_outliers == 0) assert approx_equal(result.l_test_mean_l, 0.493, eps=0.1) assert approx_equal(result.l_test_mean_l_squared, 0.326, eps=0.1) assert approx_equal(result.i_over_sigma_outer_shell, 3.25, eps=0.1) assert approx_equal(result.overall_i_sig_i, 10.34, eps=0.1) assert approx_equal( result.anomalous_info.plan_sad_experiment_stats.get_overall( item="i_over_sigma_dict"), 10.61, eps=0.1) assert approx_equal( result.anomalous_info.plan_sad_experiment_stats.get_overall( item="anom_signal_dict"), 15.35, eps=0.1) assert ("No significant pseudotranslation is detected" in result.patterson_verdict) # test consistency of output after pickling and unpickling try: from phenix_dev.phenix_cloud import xtriage_json except ImportError: pass else: json_out = xtriage_json.json_output("p9.sca") result.show(out=json_out) open("xtriage.json", "w").write(json_out.export()) # unmerged data assert result.merging_stats is not None out = StringIO() result.merging_stats.show(out=out) assert ("R-merge: 0.073" in out.getvalue()) assert approx_equal(result.estimate_d_min(min_i_over_sigma=10), 1.9645, eps=0.001) # FIXME PDB doesn't actually have unit cell! # test detection of symmetry in reference file if (pdb_file is not None): args = [hkl_file, pdb_file] result = xtriage.run(args=args, out=null_out())
def merge_and_truncate(params, experiments, reflections): """Filter data, assess space group, run french wilson and Wilson stats.""" logger.info("\nMerging scaled reflection data\n") # first filter bad reflections using dials.util.filter methods reflections = filter_reflection_table( reflections[0], intensity_choice=["scale"], d_min=params.d_min, combine_partials=params.combine_partials, partiality_threshold=params.partiality_threshold, ) # ^ scale factor has been applied, so now set to 1.0 - okay as not # going to output scale factor in merged mtz. reflections["inverse_scale_factor"] = flex.double(reflections.size(), 1.0) scaled_array = scaled_data_as_miller_array([reflections], experiments) if params.anomalous: anomalous_scaled = scaled_array.as_anomalous_array() merged = scaled_array.merge_equivalents( use_internal_variance=params.merging.use_internal_variance).array() merged_anom = None if params.anomalous: merged_anom = anomalous_scaled.merge_equivalents( use_internal_variance=params.merging.use_internal_variance).array( ) # Before merge, do some assessment of the space_group if params.assess_space_group: merged_reflections = flex.reflection_table() merged_reflections["intensity"] = merged.data() merged_reflections["variance"] = merged.sigmas()**2 merged_reflections["miller_index"] = merged.indices() logger.info("Running systematic absences check") run_sys_abs_checks(experiments, merged_reflections) # Run the stats on truncating on anomalous or non anomalous? if params.anomalous: intensities = merged_anom else: intensities = merged assert intensities.is_xray_intensity_array() amplitudes = None anom_amplitudes = None if params.truncate: logger.info("\nScaling input intensities via French-Wilson Method") out = StringIO() if params.anomalous: anom_amplitudes = intensities.french_wilson(params=params, log=out) n_removed = intensities.size() - anom_amplitudes.size() assert anom_amplitudes.is_xray_amplitude_array() amplitudes = anom_amplitudes.as_non_anomalous_array() amplitudes = amplitudes.merge_equivalents().array() else: amplitudes = intensities.french_wilson(params=params, log=out) n_removed = intensities.size() - amplitudes.size() logger.info("Total number of rejected intensities %s", n_removed) logger.debug(out.getvalue()) if params.reporting.wilson_stats: if not intensities.space_group().is_centric(): wilson_scaling = data_statistics.wilson_scaling( miller_array=intensities, n_residues=params.n_residues) # XXX default n_residues? # Divert output through logger - do with StringIO rather than # info_handle else get way too much whitespace in output. out = StringIO() wilson_scaling.show(out=out) logger.info(out.getvalue()) # Apply wilson B to give absolute scale? # Show merging stats again. if params.reporting.merging_stats: stats, anom_stats = merging_stats_from_scaled_array( scaled_array, params.merging.n_bins, params.merging.use_internal_variance) if params.merging.anomalous: logger.info(make_merging_statistics_summary(anom_stats)) else: logger.info(make_merging_statistics_summary(stats)) return merged, merged_anom, amplitudes, anom_amplitudes
def run(args): from dials.util.options import OptionParser from dials.util.options import flatten_experiments from dials.util.options import flatten_reflections import libtbx.load_env usage = "%s [options] experiments.json | integrated.pickle" % ( libtbx.env.dispatcher_name) parser = OptionParser(usage=usage, phil=phil_scope, read_experiments=True, read_reflections=True, check_format=False, epilog=help_message) params, options = parser.parse_args(show_diff_phil=True) experiments = flatten_experiments(params.input.experiments) reflections = flatten_reflections(params.input.reflections) if len(experiments) == 0 or len(reflections) == 0: parser.print_help() exit() reflections = reflections[0] cryst = experiments.crystals()[0] unit_cell = cryst.get_unit_cell() if params.space_group is not None: space_group = params.space_group.group() assert space_group.is_compatible_unit_cell(unit_cell), unit_cell else: space_group = cryst.get_space_group() print space_group.info() print unit_cell expt = experiments[0] from cctbx import miller, crystal from mmtbx.scaling.data_statistics import wilson_scaling sel = reflections.get_flags(reflections.flags.integrated_sum) reflections = reflections.select(sel) cs = crystal.symmetry(unit_cell=unit_cell, space_group=space_group) ms = miller.set(cs, indices=reflections['miller_index'], anomalous_flag=True) intensities = miller.array(ms, data=reflections['intensity.sum.value'], sigmas=flex.sqrt( reflections['intensity.sum.variance'])) intensities.set_observation_type_xray_intensity() d_star_sq = intensities.d_star_sq().data() n_bins = 20 # binner = intensities.setup_binner_d_star_sq_step( # d_star_sq_step=(flex.max(d_star_sq)-flex.min(d_star_sq)+1e-8)/n_bins) binner = intensities.setup_binner_counting_sorted(n_bins=n_bins) # wilson = intensities.wilson_plot(use_binning=True) # wilson.show() # from matplotlib import pyplot # pyplot.figure() # pyplot.scatter(wilson.binner.bin_centers(2), wilson.data[1:-1]) # pyplot.show() intensities = intensities.merge_equivalents().array() wilson = wilson_scaling(intensities, n_residues=200) wilson.iso_scale_and_b.show() from matplotlib import pyplot pyplot.figure() pyplot.scatter(wilson.d_star_sq, wilson.mean_I_obs_data, label='Data') pyplot.plot(wilson.d_star_sq, wilson.mean_I_obs_theory, label='theory') pyplot.plot(wilson.d_star_sq, wilson.mean_I_normalisation, label='smoothed') pyplot.yscale('log') pyplot.legend() pyplot.show() import copy import math # Hack to make the predicter predict reflections outside of the range # of the scan expt_input = copy.deepcopy(expt) scan = expt.scan image_range = scan.get_image_range() oscillation = scan.get_oscillation() scan.set_image_range((1, int(math.ceil(360 / oscillation[1])))) scan.set_oscillation((0, oscillation[1])) print scan # Populate the reflection table with predictions predicted = flex.reflection_table.from_predictions( expt, force_static=params.force_static, dmin=params.d_min) predicted['id'] = flex.int(len(predicted), 0) print len(predicted) space_group = space_group.build_derived_reflection_intensity_group( anomalous_flag=True) cs = crystal.symmetry(unit_cell=unit_cell, space_group=space_group) ms = miller.set(cs, indices=predicted['miller_index'], anomalous_flag=True) ma = miller.array(ms, data=flex.double(ms.size(), 1), sigmas=flex.double(ms.size(), 1)) d_star_sq = ma.d_star_sq().data() n_bins = 1 binner = ma.setup_binner_d_star_sq_step( d_star_sq_step=(flex.max(d_star_sq) - flex.min(d_star_sq) + 1e-8) / n_bins) image_number = predicted['xyzcal.px'].parts()[2] print flex.min(image_number) print flex.max(image_number) #dose = flex.size_t(list(flex.floor(image_number).iround())) angle_deg = predicted['xyzcal.mm'].parts()[2] * 180 / math.pi dose = flex.size_t(list(flex.floor(angle_deg).iround())) range_width = 1 range_min = flex.min(dose) - range_width range_max = flex.max(dose) n_steps = 2 + int((range_max - range_min) - range_width) binner_non_anom = ma.as_non_anomalous_array().use_binning(binner) n_complete = flex.size_t(binner_non_anom.counts_complete()[1:-1]) ranges_dict = {} completeness_levels = [10, 20, 30, 40, 50, 60, 70, 80, 90, 95, 99] for c in completeness_levels: ranges_dict[c] = [] from xia2.Modules.PyChef2 import ChefStatistics step = 1 for i in range(0, 360, step): sel = dose < step dose.set_selected(sel, dose.select(sel) + 360) dose -= flex.min(dose) chef_stats = ChefStatistics(ma.indices(), ma.data(), ma.sigmas(), ma.d_star_sq().data(), dose, n_complete, binner, ma.space_group(), ma.anomalous_flag(), n_steps) ieither_completeness = chef_stats.ieither_completeness() iboth_completeness = chef_stats.iboth_completeness() for c in completeness_levels: ranges_dict[c].append( min((ieither_completeness > (c / 100)).iselection())) from matplotlib import pyplot pyplot.figure() for c in completeness_levels: pyplot.plot(ranges_dict[c], label=str(c)) # pyplot.plot(range_for_50) # pyplot.plot(range_for_99) # pyplot.scatter(range(iboth_completeness.size()), iboth_completeness) pyplot.legend() pyplot.show()
def run(args): from iotbx.reflection_file_reader import any_reflection_file from xia2.Modules.Analysis import phil_scope interp = phil_scope.command_line_argument_interpreter() params, unhandled = interp.process_and_fetch( args, custom_processor='collect_remaining') params = params.extract() n_bins = params.resolution_bins args = unhandled intensities = None batches = None scales = None dose = None reader = any_reflection_file(args[0]) assert reader.file_type() == 'ccp4_mtz' arrays = reader.as_miller_arrays(merge_equivalents=False) for ma in arrays: if ma.info().labels == ['BATCH']: batches = ma elif ma.info().labels == ['DOSE']: dose = ma elif ma.info().labels == ['I', 'SIGI']: intensities = ma elif ma.info().labels == ['I(+)', 'SIGI(+)', 'I(-)', 'SIGI(-)']: intensities = ma elif ma.info().labels == ['SCALEUSED']: scales = ma assert intensities is not None assert batches is not None mtz_object = reader.file_content() indices = mtz_object.extract_original_index_miller_indices() intensities = intensities.customized_copy( indices=indices, info=intensities.info()) batches = batches.customized_copy(indices=indices, info=batches.info()) from iotbx import merging_statistics merging_stats = merging_statistics.dataset_statistics( intensities, n_bins=n_bins) merging_acentric = intensities.select_acentric().merge_equivalents() merging_centric = intensities.select_centric().merge_equivalents() multiplicities_acentric = {} multiplicities_centric = {} for x in sorted(set(merging_acentric.redundancies().data())): multiplicities_acentric[x] = merging_acentric.redundancies().data().count(x) for x in sorted(set(merging_centric.redundancies().data())): multiplicities_centric[x] = merging_centric.redundancies().data().count(x) headers = [u'Resolution (Å)', 'N(obs)', 'N(unique)', 'Multiplicity', 'Completeness', 'Mean(I)', 'Mean(I/sigma)', 'Rmerge', 'Rmeas', 'Rpim', 'CC1/2', 'CCano'] rows = [] for bin_stats in merging_stats.bins: row = ['%.2f - %.2f' %(bin_stats.d_max, bin_stats.d_min), bin_stats.n_obs, bin_stats.n_uniq, '%.2f' %bin_stats.mean_redundancy, '%.2f' %(100*bin_stats.completeness), '%.1f' %bin_stats.i_mean, '%.1f' %bin_stats.i_over_sigma_mean, '%.3f' %bin_stats.r_merge, '%.3f' %bin_stats.r_meas, '%.3f' %bin_stats.r_pim, '%.3f' %bin_stats.cc_one_half, '%.3f' %bin_stats.cc_anom] rows.append(row) from xia2.lib.tabulate import tabulate merging_stats_table_html = tabulate(rows, headers, tablefmt='html') merging_stats_table_html = merging_stats_table_html.replace( '<table>', '<table class="table table-hover table-condensed">') unit_cell_params = intensities.unit_cell().parameters() headers = ['', 'Overall', 'Low resolution', 'High resolution'] stats = (merging_stats.overall, merging_stats.bins[0], merging_stats.bins[-1]) rows = [ [u'Resolution (Å)'] + [ '%.2f - %.2f' %(s.d_max, s.d_min) for s in stats], ['Observations'] + ['%i' %s.n_obs for s in stats], ['Unique reflections'] + ['%i' %s.n_uniq for s in stats], ['Multiplicity'] + ['%.1f' %s.mean_redundancy for s in stats], ['Completeness'] + ['%.2f%%' %(s.completeness * 100) for s in stats], #['Mean intensity'] + ['%.1f' %s.i_mean for s in stats], ['Mean I/sigma(I)'] + ['%.1f' %s.i_over_sigma_mean for s in stats], ['Rmerge'] + ['%.3f' %s.r_merge for s in stats], ['Rmeas'] + ['%.3f' %s.r_meas for s in stats], ['Rpim'] + ['%.3f' %s.r_pim for s in stats], ['CC1/2'] + ['%.3f' %s.cc_one_half for s in stats], ] rows = [[u'<strong>%s</strong>' %r[0]] + r[1:] for r in rows] overall_stats_table_html = tabulate(rows, headers, tablefmt='html') overall_stats_table_html = overall_stats_table_html.replace( '<table>', '<table class="table table-hover table-condensed">') #headers = ['Crystal symmetry', ''] #rows = [ #[u'Unit cell: a (Å)', '%.3f' %unit_cell_params[0]], #[u'b (Å)', '%.3f' %unit_cell_params[1]], #[u'c (Å)', '%.3f' %unit_cell_params[2]], #[u'α (°)', '%.3f' %unit_cell_params[3]], #[u'β (°)', '%.3f' %unit_cell_params[4]], #[u'γ (°)', '%.3f' %unit_cell_params[5]], #['Space group', intensities.space_group_info().symbol_and_number()], #] #symmetry_table_html = tabulate(rows, headers, tablefmt='html') symmetry_table_html = """ <p> <b>Filename:</b> %s <br> <b>Unit cell:</b> %s <br> <b>Space group:</b> %s </p> """ %(os.path.abspath(reader.file_name()), intensities.space_group_info().symbol_and_number(), str(intensities.unit_cell())) if params.anomalous: intensities = intensities.as_anomalous_array() batches = batches.as_anomalous_array() from xia2.Modules.PyChef2.PyChef import remove_batch_gaps new_batch_data = remove_batch_gaps(batches.data()) new_batches = batches.customized_copy(data=new_batch_data) sc_vs_b = scales_vs_batch(scales, new_batches) rmerge_vs_b = rmerge_vs_batch(intensities, new_batches) intensities.setup_binner(n_bins=n_bins) merged_intensities = intensities.merge_equivalents().array() from mmtbx.scaling import twin_analyses normalised_intensities = twin_analyses.wilson_normalised_intensities( miller_array=merged_intensities) nz_test = twin_analyses.n_z_test( normalised_acentric=normalised_intensities.acentric, normalised_centric=normalised_intensities.centric) from mmtbx.scaling import data_statistics if not intensities.space_group().is_centric(): wilson_scaling = data_statistics.wilson_scaling( miller_array=merged_intensities, n_residues=200) # XXX default n_residues? acentric = intensities.select_acentric() centric = intensities.select_centric() if acentric.size(): acentric.setup_binner(n_bins=n_bins) second_moments_acentric = acentric.second_moment_of_intensities(use_binning=True) if centric.size(): centric.setup_binner(n_bins=n_bins) second_moments_centric = centric.second_moment_of_intensities(use_binning=True) d_star_sq_bins = [ (1/bin_stats.d_min**2) for bin_stats in merging_stats.bins] i_over_sig_i_bins = [ bin_stats.i_over_sigma_mean for bin_stats in merging_stats.bins] cc_one_half_bins = [ bin_stats.cc_one_half for bin_stats in merging_stats.bins] cc_anom_bins = [ bin_stats.cc_anom for bin_stats in merging_stats.bins] from xia2.Modules.PyChef2 import PyChef if params.chef_min_completeness: d_min = PyChef.resolution_limit( mtz_file=args[0], min_completeness=params.chef_min_completeness, n_bins=8) print 'Estimated d_min for CHEF analysis: %.2f' %d_min sel = flex.bool(intensities.size(), True) d_spacings = intensities.d_spacings().data() sel &= d_spacings >= d_min intensities = intensities.select(sel) batches = batches.select(sel) if dose is not None: dose = dose.select(sel) if dose is None: dose = PyChef.batches_to_dose(batches.data(), params.dose) else: dose = dose.data() pychef_stats = PyChef.Statistics(intensities, dose) pychef_dict = pychef_stats.to_dict() def d_star_sq_to_d_ticks(d_star_sq, nticks): from cctbx import uctbx d_spacings = uctbx.d_star_sq_as_d(flex.double(d_star_sq)) min_d_star_sq = min(d_star_sq) dstep = (max(d_star_sq) - min_d_star_sq)/nticks tickvals = list(min_d_star_sq + (i*dstep) for i in range(nticks)) ticktext = ['%.2f' %(uctbx.d_star_sq_as_d(dsq)) for dsq in tickvals] return tickvals, ticktext tickvals, ticktext = d_star_sq_to_d_ticks(d_star_sq_bins, nticks=5) tickvals_wilson, ticktext_wilson = d_star_sq_to_d_ticks( wilson_scaling.d_star_sq, nticks=5) second_moment_d_star_sq = [] if acentric.size(): second_moment_d_star_sq.extend(second_moments_acentric.binner.bin_centers(2)) if centric.size(): second_moment_d_star_sq.extend(second_moments_centric.binner.bin_centers(2)) tickvals_2nd_moment, ticktext_2nd_moment = d_star_sq_to_d_ticks( second_moment_d_star_sq, nticks=5) json_data = { 'multiplicities': { 'data': [ { 'x': multiplicities_acentric.keys(), 'y': multiplicities_acentric.values(), 'type': 'bar', 'name': 'Acentric', 'opacity': 0.75, }, { 'x': multiplicities_centric.keys(), 'y': multiplicities_centric.values(), 'type': 'bar', 'name': 'Centric', 'opacity': 0.75, }, ], 'layout': { 'title': 'Distribution of multiplicities', 'xaxis': {'title': 'Multiplicity'}, 'yaxis': { 'title': 'Frequency', #'rangemode': 'tozero' }, 'bargap': 0, 'barmode': 'overlay', }, }, 'scale_rmerge_vs_batch': { 'data': [ { 'x': sc_vs_b.batches, 'y': sc_vs_b.data, 'type': 'scatter', 'name': 'Scale', 'opacity': 0.75, }, { 'x': rmerge_vs_b.batches, 'y': rmerge_vs_b.data, 'yaxis': 'y2', 'type': 'scatter', 'name': 'Rmerge', 'opacity': 0.75, }, ], 'layout': { 'title': 'Scale and Rmerge vs batch', 'xaxis': {'title': 'N'}, 'yaxis': { 'title': 'Scale', 'rangemode': 'tozero' }, 'yaxis2': { 'title': 'Rmerge', 'overlaying': 'y', 'side': 'right', 'rangemode': 'tozero' } }, }, 'cc_one_half': { 'data': [ { 'x': d_star_sq_bins, # d_star_sq 'y': cc_one_half_bins, 'type': 'scatter', 'name': 'CC-half', }, ({ 'x': d_star_sq_bins, # d_star_sq 'y': cc_anom_bins, 'type': 'scatter', 'name': 'CC-anom', } if not intensities.space_group().is_centric() else {}), ], 'layout':{ 'title': 'CC-half vs resolution', 'xaxis': { 'title': u'Resolution (Å)', 'tickvals': tickvals, 'ticktext': ticktext, }, 'yaxis': { 'title': 'CC-half', 'range': [min(cc_one_half_bins + cc_anom_bins + [0]), 1] }, }, }, 'i_over_sig_i': { 'data': [{ 'x': d_star_sq_bins, # d_star_sq 'y': i_over_sig_i_bins, 'type': 'scatter', 'name': 'Scales vs batch', }], 'layout': { 'title': '<I/sig(I)> vs resolution', 'xaxis': { 'title': u'Resolution (Å)', 'tickvals': tickvals, 'ticktext': ticktext, }, 'yaxis': { 'title': '<I/sig(I)>', 'rangemode': 'tozero' }, } }, 'second_moments': { 'data': [ ({ 'x': list(second_moments_acentric.binner.bin_centers(2)), # d_star_sq 'y': second_moments_acentric.data[1:-1], 'type': 'scatter', 'name': '<I^2> acentric', } if acentric.size() else {}), ({ 'x': list(second_moments_centric.binner.bin_centers(2)), # d_star_sq 'y': second_moments_centric.data[1:-1], 'type': 'scatter', 'name': '<I^2> centric', } if centric.size() else {}) ], 'layout': { 'title': 'Second moment of I', 'xaxis': { 'title': u'Resolution (Å)', 'tickvals': tickvals_2nd_moment, 'ticktext': ticktext_2nd_moment, }, 'yaxis': { 'title': '<I^2>', 'rangemode': 'tozero' }, } }, 'cumulative_intensity_distribution': { 'data': [ { 'x': list(nz_test.z), 'y': list(nz_test.ac_obs), 'type': 'scatter', 'name': 'Acentric observed', 'mode': 'lines', 'line': { 'color': 'rgb(31, 119, 180)', }, }, { 'x': list(nz_test.z), 'y': list(nz_test.c_obs), 'type': 'scatter', 'name': 'Centric observed', 'mode': 'lines', 'line': { 'color': 'rgb(255, 127, 14)', }, }, { 'x': list(nz_test.z), 'y': list(nz_test.ac_untwinned), 'type': 'scatter', 'name': 'Acentric theory', 'mode': 'lines', 'line': { 'color': 'rgb(31, 119, 180)', 'dash': 'dot', }, 'opacity': 0.8, }, { 'x': list(nz_test.z), 'y': list(nz_test.c_untwinned), 'type': 'scatter', 'name': 'Centric theory', 'mode': 'lines', 'line': { 'color': 'rgb(255, 127, 14)', 'dash': 'dot', }, 'opacity': 0.8, }, ], 'layout': { 'title': 'Cumulative intensity distribution', 'xaxis': {'title': 'z'}, 'yaxis': { 'title': 'P(Z <= Z)', 'rangemode': 'tozero' }, } }, 'wilson_intensity_plot': { 'data': ([ { 'x': list(wilson_scaling.d_star_sq), 'y': list(wilson_scaling.mean_I_obs_data), 'type': 'scatter', 'name': 'Observed', }, { 'x': list(wilson_scaling.d_star_sq), 'y': list(wilson_scaling.mean_I_obs_theory), 'type': 'scatter', 'name': 'Expected', }, { 'x': list(wilson_scaling.d_star_sq), 'y': list(wilson_scaling.mean_I_normalisation), 'type': 'scatter', 'name': 'Smoothed', }] if not intensities.space_group().is_centric() else []), 'layout': { 'title': 'Wilson intensity plot', 'xaxis': { 'title': u'Resolution (Å)', 'tickvals': tickvals_wilson, 'ticktext': ticktext_wilson, }, 'yaxis': { 'type': 'log', 'title': 'Mean(I)', 'rangemode': 'tozero', }, }, }, } json_data.update(pychef_dict) from dials.report import html_report report = html_report.html_report() page_header = html_report.page_header('xia2 report') report.add_content(page_header) overall_panel = html_report.panel('Overall', 'overall', show=True) overall_table = html_report.table_responsive( overall_stats_table_html, width=800) overall_panel.add_content(overall_table) merging_stats_panel = html_report.panel('Resolution shells', 'merging_stats') merging_stats_table = html_report.table_responsive(merging_stats_table_html) merging_stats_panel.add_content(merging_stats_table) merging_stats_panel_group = html_report.panel_group( [overall_panel, merging_stats_panel]) div = html_report.div() div.add_content(html_report.raw_html('<h2>Merging statistics</h2>')) div.add_content(html_report.raw_html(symmetry_table_html)) div.add_content(merging_stats_panel_group) report.add_content(div) resolution_plots_panel = html_report.panel('Analysis by resolution', 'resolution') for graph in ('cc_one_half', 'i_over_sig_i', 'second_moments', 'wilson_intensity_plot'): resolution_plots_panel.add_content(html_report.plotly_graph( json_data[graph], graph)) batch_plots_panel = html_report.panel('Analysis by batch', 'batch') for graph in ('scale_rmerge_vs_batch', 'completeness_vs_dose', 'rcp_vs_dose', 'scp_vs_dose', 'rd_vs_batch_difference'): batch_plots_panel.add_content(html_report.plotly_graph( json_data[graph], graph)) misc_plots_panel = html_report.panel('Miscellaneous', 'misc') for graph in ('multiplicities', 'cumulative_intensity_distribution'): misc_plots_panel.add_content(html_report.plotly_graph( json_data[graph], graph)) analysis_plots_panel_group = html_report.panel_group( [resolution_plots_panel, batch_plots_panel, misc_plots_panel]) div = html_report.div() div.add_content(html_report.raw_html('<h2>Analysis plots</h2>')) div.add_content(analysis_plots_panel_group) report.add_content(div) html = report.html() import json json_str = json.dumps(json_data) with open('xia2-report.json', 'wb') as f: print >> f, json_str with open('xia2-report.html', 'wb') as f: print >> f, html.encode('ascii', 'xmlcharrefreplace') return
from __future__ import division from mmtbx.scaling import data_statistics from iotbx import mtz import sys m = mtz.object(sys.argv[1]) mas = m.as_miller_arrays() data = None for ma in mas: if ma.is_xray_intensity_array(): data = ma break def nres_from_mtz(m): sg = m.space_group() uc = m.crystals()[0].unit_cell() n_ops = len(sg.all_ops()) v_asu = uc.volume() / n_ops return v_asu / (2.7 * 128) n_res = nres_from_mtz(m) wilson_scaling = data_statistics.wilson_scaling(miller_array=data, n_residues=n_res) wilson_scaling.show()