def merge_data_to_mtz(params, experiments, reflections): """Merge data (at each wavelength) and write to an mtz file object.""" wavelengths = match_wavelengths(experiments) if len(wavelengths) > 1: logger.info( "Multiple wavelengths found: \n%s", "\n".join(" Wavlength: %.5f, experiment numbers: %s " % (k, ",".join(map(str, v))) for k, v in wavelengths.items()), ) return make_MAD_merged_mtz_file(params, experiments, reflections, wavelengths) merged_data = merge_and_truncate(params, experiments, reflections) return make_merged_mtz_file(*((params, list(wavelengths)[0]) + merged_data))
def _export_unmerged_mtz(params, experiments, reflection_table): """Export data to unmerged_mtz format (as single file or split by wavelength).""" from dials.command_line.export import MTZExporter from dials.command_line.export import phil_scope as export_phil_scope export_params = export_phil_scope.extract() export_params.intensity = ["scale"] export_params.mtz.partiality_threshold = params.cut_data.partiality_cutoff export_params.mtz.crystal_name = params.output.crystal_name if params.cut_data.d_min: export_params.mtz.d_min = params.cut_data.d_min if len(params.output.unmerged_mtz) > 1: from dxtbx.model import ExperimentList wavelengths = match_wavelengths(experiments) assert len(params.output.unmerged_mtz) == len(wavelengths.keys()) for filename, wavelength in zip(params.output.unmerged_mtz, wavelengths.keys()): export_params.mtz.hklout = filename logger.info("\nSaving output to an unmerged mtz file to %s.", filename) exps = ExperimentList() ids = [] for i, exp in enumerate(experiments): if i in wavelengths[wavelength]: exps.append(exp) ids.append(exp.identifier) exporter = MTZExporter( export_params, exps, [reflection_table.select_on_experiment_identifiers(ids)], ) exporter.export() else: logger.info( "\nSaving output to an unmerged mtz file to %s.", params.output.unmerged_mtz[0], ) export_params.mtz.hklout = params.output.unmerged_mtz[0] exporter = MTZExporter(export_params, experiments, [reflection_table]) exporter.export()
def _export_multi_merged_mtz(params, experiments, reflection_table): from dxtbx.model import ExperimentList wavelengths = match_wavelengths(experiments) assert len(params.output.merged_mtz) == len(wavelengths.keys()) for filename, wavelength in zip(params.output.merged_mtz, wavelengths.keys()): exps = ExperimentList() ids = [] for i, exp in enumerate(experiments): if i in wavelengths[wavelength]: exps.append(exp) ids.append(exp.identifier) refls = reflection_table.select_on_experiment_identifiers(ids) scaled_array = scaled_data_as_miller_array([refls], exps) merged = scaled_array.merge_equivalents( use_internal_variance=params.output.use_internal_variance).array() merged_anom = (scaled_array.as_anomalous_array().merge_equivalents( use_internal_variance=params.output.use_internal_variance).array()) mtz_file = make_merged_mtz_file(merged, merged_anom) logger.info("\nSaving output to a merged mtz file to %s.\n", filename) mtz_file.write(filename)
def run(self): """Execute the script.""" # Parse the command line params, _ = self.parser.parse_args(show_diff_phil=True) # Try to load the models and data if not params.input.experiments: print("No Experiments found in the input") self.parser.print_help() return if params.input.reflections: if len(params.input.reflections) != len(params.input.experiments): raise Sorry( "The number of input reflections files does not match the " "number of input experiments" ) experiments = flatten_experiments(params.input.experiments) if params.input.reflections: reflections = flatten_reflections(params.input.reflections)[0] else: reflections = None experiments_template = functools.partial( params.output.template.format, prefix=params.output.experiments_prefix, maxindexlength=len(str(len(experiments))), extension="expt", ) reflections_template = functools.partial( params.output.template.format, prefix=params.output.reflections_prefix, maxindexlength=len(str(len(experiments))), extension="refl", ) if params.output.chunk_sizes: if not sum(params.output.chunk_sizes) == len(experiments): raise Sorry( "Sum of chunk sizes list (%s) not equal to number of experiments (%s)" % (sum(params.output.chunk_sizes), len(experiments)) ) if params.by_wavelength: if reflections: if not reflections.experiment_identifiers(): raise Sorry( "Unable to split by wavelength as no experiment " "identifiers are set in the reflection table." ) if all(experiments.identifiers() == ""): raise Sorry( "Unable to split by wavelength as no experiment " "identifiers are set in the experiment list." ) wavelengths = match_wavelengths(experiments) for i, wl in enumerate(sorted(wavelengths.keys())): expids = [] new_exps = ExperimentList() exp_nos = wavelengths[wl] for j in exp_nos: expids.append(experiments[j].identifier) # string new_exps.append(experiments[j]) experiment_filename = experiments_template(index=i) print( "Saving experiments with wavelength %s to %s" % (wl, experiment_filename) ) new_exps.as_json(experiment_filename) if reflections: refls = reflections.select_on_experiment_identifiers(expids) reflections_filename = reflections_template(index=i) print( "Saving reflections with wavelength %s to %s" % (wl, reflections_filename) ) refls.as_file(reflections_filename) elif params.by_detector: assert ( not params.output.chunk_size ), "chunk_size + by_detector is not implemented" if reflections is None: split_data = { detector: {"experiments": ExperimentList()} for detector in experiments.detectors() } else: split_data = { detector: { "experiments": ExperimentList(), "reflections": flex.reflection_table(), } for detector in experiments.detectors() } for i, experiment in enumerate(experiments): split_expt_id = experiments.detectors().index(experiment.detector) experiment_filename = experiments_template(index=split_expt_id) print("Adding experiment %d to %s" % (i, experiment_filename)) split_data[experiment.detector]["experiments"].append(experiment) if reflections is not None: reflections_filename = reflections_template(index=split_expt_id) print( "Adding reflections for experiment %d to %s" % (i, reflections_filename) ) if reflections.experiment_identifiers().keys(): # first find which id value corresponds to experiment in question identifier = experiment.identifier id_ = None for k in reflections.experiment_identifiers().keys(): if reflections.experiment_identifiers()[k] == identifier: id_ = k break if id_ is None: raise Sorry( "Unable to find id matching experiment identifier in reflection table." ) ref_sel = reflections.select(reflections["id"] == id_) # now reset ids and reset/update identifiers map for k in ref_sel.experiment_identifiers().keys(): del ref_sel.experiment_identifiers()[k] new_id = len(split_data[experiment.detector]["experiments"]) - 1 ref_sel["id"] = flex.int(len(ref_sel), new_id) ref_sel.experiment_identifiers()[new_id] = identifier else: ref_sel = reflections.select(reflections["id"] == i) ref_sel["id"] = flex.int( len(ref_sel), len(split_data[experiment.detector]["experiments"]) - 1, ) split_data[experiment.detector]["reflections"].extend(ref_sel) for i, detector in enumerate(experiments.detectors()): experiment_filename = experiments_template(index=i) print("Saving experiment %d to %s" % (i, experiment_filename)) split_data[detector]["experiments"].as_json(experiment_filename) if reflections is not None: reflections_filename = reflections_template(index=i) print( "Saving reflections for experiment %d to %s" % (i, reflections_filename) ) split_data[detector]["reflections"].as_file(reflections_filename) elif params.output.chunk_size or params.output.chunk_sizes: def save_chunk(chunk_id, expts, refls): experiment_filename = experiments_template(index=chunk_id) print("Saving chunk %d to %s" % (chunk_id, experiment_filename)) expts.as_json(experiment_filename) if refls is not None: reflections_filename = reflections_template(index=chunk_id) print( "Saving reflections for chunk %d to %s" % (chunk_id, reflections_filename) ) refls.as_file(reflections_filename) chunk_counter = 0 chunk_expts = ExperimentList() if reflections: chunk_refls = flex.reflection_table() else: chunk_refls = None for i, experiment in enumerate(experiments): chunk_expts.append(experiment) if reflections: if reflections.experiment_identifiers().keys(): # first find which id value corresponds to experiment in question identifier = experiment.identifier id_ = None for k in reflections.experiment_identifiers().keys(): if reflections.experiment_identifiers()[k] == identifier: id_ = k break if id_ is None: raise Sorry( "Unable to find id matching experiment identifier in reflection table." ) ref_sel = reflections.select(reflections["id"] == id_) # now reset ids and reset/update identifiers map for k in ref_sel.experiment_identifiers().keys(): del ref_sel.experiment_identifiers()[k] new_id = len(chunk_expts) - 1 ref_sel["id"] = flex.int(len(ref_sel), new_id) ref_sel.experiment_identifiers()[new_id] = identifier else: ref_sel = reflections.select(reflections["id"] == i) ref_sel["id"] = flex.int(len(ref_sel), len(chunk_expts) - 1) chunk_refls.extend(ref_sel) if params.output.chunk_sizes: chunk_limit = params.output.chunk_sizes[chunk_counter] else: chunk_limit = params.output.chunk_size if len(chunk_expts) == chunk_limit: save_chunk(chunk_counter, chunk_expts, chunk_refls) chunk_counter += 1 chunk_expts = ExperimentList() if reflections: chunk_refls = flex.reflection_table() else: chunk_refls = None if len(chunk_expts) > 0: save_chunk(chunk_counter, chunk_expts, chunk_refls) else: for i, experiment in enumerate(experiments): experiment_filename = experiments_template(index=i) print("Saving experiment %d to %s" % (i, experiment_filename)) ExperimentList([experiment]).as_json(experiment_filename) if reflections is not None: reflections_filename = reflections_template(index=i) print( "Saving reflections for experiment %d to %s" % (i, reflections_filename) ) ref_sel = reflections.select(reflections["id"] == i) if ref_sel.experiment_identifiers().keys(): identifier = ref_sel.experiment_identifiers()[i] for k in ref_sel.experiment_identifiers().keys(): del ref_sel.experiment_identifiers()[k] ref_sel["id"] = flex.int(ref_sel.size(), 0) ref_sel.experiment_identifiers()[0] = identifier else: ref_sel["id"] = flex.int(len(ref_sel), 0) ref_sel.as_file(reflections_filename) return
def merge_data_to_mtz(params, experiments, reflections): """Merge data (at each wavelength) and write to an mtz file object.""" wavelengths = match_wavelengths( experiments, absolute_tolerance=params.wavelength_tolerance, ) # wavelengths is an ordered dict mtz_datasets = [ MTZDataClass(wavelength=w, project_name=params.output.project_name) for w in wavelengths.keys() ] dataset_names = params.output.dataset_names crystal_names = params.output.crystal_names # check if best_unit_cell is set. best_unit_cell = params.best_unit_cell if not best_unit_cell: best_unit_cell = determine_best_unit_cell(experiments) reflections[0]["d"] = best_unit_cell.d(reflections[0]["miller_index"]) for expt in experiments: expt.crystal.unit_cell = best_unit_cell if len(wavelengths) > 1: logger.info( "Multiple wavelengths found: \n%s", "\n".join( " Wavlength: %.5f, experiment numbers: %s " % (k, ",".join(map(str, v))) for k, v in wavelengths.items() ), ) if not dataset_names or len(dataset_names) != len(wavelengths): logger.info( "Unequal number of dataset names and wavelengths, using default naming." ) dataset_names = [None] * len(wavelengths) if not crystal_names or len(crystal_names) != len(wavelengths): logger.info( "Unequal number of crystal names and wavelengths, using default naming." ) crystal_names = [None] * len(wavelengths) experiments_subsets = [] reflections_subsets = [] for dataset, dname, cname in zip(mtz_datasets, dataset_names, crystal_names): dataset.dataset_name = dname dataset.crystal_name = cname for exp_nos in wavelengths.values(): expids = [experiments[i].identifier for i in exp_nos] experiments_subsets.append( ExperimentList([experiments[i] for i in exp_nos]) ) reflections_subsets.append( reflections[0].select_on_experiment_identifiers(expids) ) else: mtz_datasets[0].dataset_name = dataset_names[0] mtz_datasets[0].crystal_name = crystal_names[0] experiments_subsets = [experiments] reflections_subsets = reflections # merge and truncate the data for each wavelength group for experimentlist, reflection_table, mtz_dataset in zip( experiments_subsets, reflections_subsets, mtz_datasets ): # First generate two merge_equivalents objects, collect merging stats merged, merged_anomalous, stats_summary = merge( experimentlist, reflection_table, d_min=params.d_min, d_max=params.d_max, combine_partials=params.combine_partials, partiality_threshold=params.partiality_threshold, best_unit_cell=best_unit_cell, anomalous=params.anomalous, assess_space_group=params.assess_space_group, n_bins=params.merging.n_bins, use_internal_variance=params.merging.use_internal_variance, ) merged_array = merged.array() # Save the relevant data in the mtz_dataset dataclass # This will add the data for IMEAN/SIGIMEAN mtz_dataset.merged_array = merged_array if merged_anomalous: merged_anomalous_array = merged_anomalous.array() # This will add the data for I(+), I(-), SIGI(+), SIGI(-), N(+), N(-) mtz_dataset.merged_anomalous_array = merged_anomalous_array mtz_dataset.multiplicities = merged_anomalous.redundancies() else: merged_anomalous_array = None # This will add the data for N mtz_dataset.multiplicities = merged.redundancies() if params.anomalous: merged_intensities = merged_anomalous_array else: merged_intensities = merged_array anom_amplitudes = None if params.truncate: amplitudes, anom_amplitudes, dano = truncate(merged_intensities) # This will add the data for F, SIGF mtz_dataset.amplitudes = amplitudes # This will add the data for F(+), F(-), SIGF(+), SIGF(-) mtz_dataset.anomalous_amplitudes = anom_amplitudes # This will add the data for DANO, SIGDANO mtz_dataset.dano = dano # print out analysis statistics show_wilson_scaling_analysis(merged_intensities) if stats_summary: logger.info(stats_summary) if anom_amplitudes: logger.info(make_dano_table(anom_amplitudes)) # pass the dataclasses to an MTZ writer to generate the mtz file and return. return make_merged_mtz_file(mtz_datasets)
def merge_data_to_mtz(params, experiments, reflections): """Merge data (at each wavelength) and write to an mtz file object.""" wavelengths = match_wavelengths( experiments) # wavelengths is an ordered dict mtz_datasets = [ MTZDataClass(wavelength=w, project_name=params.output.project_name) for w in wavelengths.keys() ] dataset_names = params.output.dataset_names crystal_names = params.output.crystal_names if len(wavelengths) > 1: logger.info( "Multiple wavelengths found: \n%s", "\n".join(" Wavlength: %.5f, experiment numbers: %s " % (k, ",".join(map(str, v))) for k, v in wavelengths.items()), ) if not dataset_names or len(dataset_names) != len(wavelengths): logger.info( "Unequal number of dataset names and wavelengths, using default naming." ) dataset_names = [None] * len(wavelengths) if not crystal_names or len(crystal_names) != len(wavelengths): logger.info( "Unequal number of crystal names and wavelengths, using default naming." ) crystal_names = [None] * len(wavelengths) experiments_subsets = [] reflections_subsets = [] for dataset, dname, cname in zip(mtz_datasets, dataset_names, crystal_names): dataset.dataset_name = dname dataset.crystal_name = cname for exp_nos in wavelengths.values(): expids = [experiments[i].identifier for i in exp_nos] experiments_subsets.append( ExperimentList([experiments[i] for i in exp_nos])) reflections_subsets.append( reflections[0].select_on_experiment_identifiers(expids)) else: mtz_datasets[0].dataset_name = dataset_names[0] mtz_datasets[0].crystal_name = crystal_names[0] experiments_subsets = [experiments] reflections_subsets = reflections for experimentlist, reflection_table, mtz_dataset in zip( experiments_subsets, reflections_subsets, mtz_datasets): # merge and truncate the data merged_array, merged_anomalous_array, stats_summary = merge( experimentlist, reflection_table, d_min=params.d_min, d_max=params.d_max, combine_partials=params.combine_partials, partiality_threshold=params.partiality_threshold, anomalous=params.anomalous, assess_space_group=params.assess_space_group, n_bins=params.merging.n_bins, use_internal_variance=params.merging.use_internal_variance, ) mtz_dataset.merged_array = merged_array mtz_dataset.merged_anomalous_array = merged_anomalous_array if params.anomalous: merged_intensities = merged_anomalous_array else: merged_intensities = merged_array if params.truncate: amplitudes, anomalous_amplitudes = truncate(merged_intensities) mtz_dataset.amplitudes = amplitudes mtz_dataset.anomalous_amplitudes = anomalous_amplitudes show_wilson_scaling_analysis(merged_intensities) if stats_summary: logger.info(stats_summary) return make_merged_mtz_file(mtz_datasets)