def print_scaling_summary(self, scaling_script): """Log summary information after scaling.""" if ScalingModelObserver().data: logger.info(ScalingModelObserver().return_model_error_summary()) valid_ranges = get_valid_image_ranges(scaling_script.experiments) image_ranges = get_image_ranges(scaling_script.experiments) msg = [] for (img, valid, refl) in zip(image_ranges, valid_ranges, scaling_script.reflections): if valid: if len(valid) > 1 or valid[0][0] != img[0] or valid[-1][ 1] != img[1]: msg.append( "Excluded images for experiment id: %s, image range: %s, limited range: %s" % ( refl.experiment_identifiers().keys()[0], list(img), list(valid), )) if msg: msg = ["Summary of image ranges removed:"] + msg logger.info("\n".join(msg)) # report on partiality of dataset partials = flex.double() for r in scaling_script.reflections: if "partiality" in r: partials.extend(r["partiality"]) not_full_sel = partials < 0.99 not_zero_sel = partials > 0.01 gt_half = partials > 0.5 lt_half = partials < 0.5 partial_gt_half_sel = not_full_sel & gt_half partial_lt_half_sel = not_zero_sel & lt_half logger.info("Summary of dataset partialities") header = ["Partiality (p)", "n_refl"] rows = [ ["all reflections", str(partials.size())], ["p > 0.99", str(not_full_sel.count(False))], ["0.5 < p < 0.99", str(partial_gt_half_sel.count(True))], ["0.01 < p < 0.5", str(partial_lt_half_sel.count(True))], ["p < 0.01", str(not_zero_sel.count(False))], ] logger.info(tabulate(rows, header)) logger.info( """ Reflections below a partiality_cutoff of %s are not considered for any part of the scaling analysis or for the reporting of merging statistics. Additionally, if applicable, only reflections with a min_partiality > %s were considered for use when refining the scaling model. """, scaling_script.params.cut_data.partiality_cutoff, scaling_script.params.reflection_selection.min_partiality, ) if MergingStatisticsObserver().data: logger.info( make_merging_statistics_summary( MergingStatisticsObserver().data["statistics"]))
def test_get_image_ranges(): """Test for namesake function""" scan = Scan(image_range=[1, 200], oscillation=[0.0, 1.0]) exp1 = Experiment(scan=scan) exp2 = Experiment() experiments = [exp1, exp2] image_ranges = get_image_ranges(experiments) assert image_ranges == [(1, 200), (0, 0)]
def export_mtz(integrated_data, experiment_list, params): """Export data from integrated_data corresponding to experiment_list to an MTZ file hklout.""" # if mtz filename is auto, then choose scaled.mtz or integrated.mtz if params.mtz.hklout in (None, Auto, "auto"): if ("intensity.scale.value" in integrated_data) and ("intensity.scale.variance" in integrated_data): params.mtz.hklout = "scaled.mtz" logger.info( "Data appears to be scaled, setting mtz.hklout = 'scaled.mtz'") else: params.mtz.hklout = "integrated.mtz" logger.info( "Data appears to be unscaled, setting mtz.hklout = 'integrated.mtz'" ) # First get the experiment identifier information out of the data expids_in_table = integrated_data.experiment_identifiers() if not list(expids_in_table.keys()): reflection_tables = parse_multiple_datasets([integrated_data]) experiment_list, refl_list = assign_unique_identifiers( experiment_list, reflection_tables) integrated_data = flex.reflection_table() for reflections in refl_list: integrated_data.extend(reflections) expids_in_table = integrated_data.experiment_identifiers() integrated_data.assert_experiment_identifiers_are_consistent( experiment_list) expids_in_list = list(experiment_list.identifiers()) # Convert experiment_list to a real python list or else identity assumptions # fail like: # assert experiment_list[0] is experiment_list[0] # And assumptions about added attributes break experiment_list = list(experiment_list) # Validate multi-experiment assumptions if len(experiment_list) > 1: # All experiments should match crystals, or else we need multiple crystals/datasets if not all(x.crystal == experiment_list[0].crystal for x in experiment_list[1:]): logger.warning( "Experiment crystals differ. Using first experiment crystal for file-level data." ) wavelengths = match_wavelengths(experiment_list) if len(wavelengths.keys()) > 1: logger.info( "Multiple wavelengths found: \n%s", "\n".join(" Wavlength: %.5f, experiment numbers: %s " % (k, ",".join(map(str, v))) for k, v in wavelengths.items()), ) else: wavelengths = OrderedDict( {experiment_list[0].beam.get_wavelength(): [0]}) # also only work correctly with one panel (for the moment) if any(len(experiment.detector) != 1 for experiment in experiment_list): logger.warning("Ignoring multiple panels in output MTZ") best_unit_cell = params.mtz.best_unit_cell if best_unit_cell is None: best_unit_cell = determine_best_unit_cell(experiment_list) integrated_data["d"] = best_unit_cell.d(integrated_data["miller_index"]) # Clean up the data with the passed in options integrated_data = filter_reflection_table( integrated_data, intensity_choice=params.intensity, partiality_threshold=params.mtz.partiality_threshold, combine_partials=params.mtz.combine_partials, min_isigi=params.mtz.min_isigi, filter_ice_rings=params.mtz.filter_ice_rings, d_min=params.mtz.d_min, ) # get batch offsets and image ranges - even for scanless experiments batch_offsets = [ expt.scan.get_batch_offset() for expt in experiment_list if expt.scan is not None ] unique_offsets = set(batch_offsets) if len(set(unique_offsets)) <= 1: logger.debug("Calculating new batches") batch_offsets = calculate_batch_offsets(experiment_list) batch_starts = [ e.scan.get_image_range()[0] if e.scan else 0 for e in experiment_list ] effective_offsets = [ o + s for o, s in zip(batch_offsets, batch_starts) ] unique_offsets = set(effective_offsets) else: logger.debug("Keeping existing batches") image_ranges = get_image_ranges(experiment_list) if len(unique_offsets) != len(batch_offsets): raise ValueError("Duplicate batch offsets detected: %s" % ", ".join( str(item) for item, count in Counter(batch_offsets).items() if count > 1)) # Create the mtz file mtz_writer = UnmergedMTZWriter( experiment_list[0].crystal.get_space_group()) # FIXME TODO for more than one experiment into an MTZ file: # # - add an epoch (or recover an epoch) from the scan and add this as an extra # column to the MTZ file for scaling, so we know that the two lattices were # integrated at the same time # ✓ decide a sensible BATCH increment to apply to the BATCH value between # experiments and add this for id_ in expids_in_table.keys(): # Grab our subset of the data loc = expids_in_list.index( expids_in_table[id_]) # get strid and use to find loc in list experiment = experiment_list[loc] if len(list(wavelengths.keys())) > 1: for i, (wl, exps) in enumerate(wavelengths.items()): if loc in exps: wavelength = wl dataset_id = i + 1 break else: wavelength = list(wavelengths.keys())[0] dataset_id = 1 reflections = integrated_data.select(integrated_data["id"] == id_) batch_offset = batch_offsets[loc] image_range = image_ranges[loc] reflections = assign_batches_to_reflections([reflections], [batch_offset])[0] experiment.data = dict(reflections) s0n = matrix.col(experiment.beam.get_s0()).normalize().elems logger.debug("Beam vector: %.4f %.4f %.4f" % s0n) mtz_writer.add_batch_list( image_range, experiment, wavelength, dataset_id, batch_offset=batch_offset, force_static_model=params.mtz.force_static_model, ) # Create the batch offset array. This gives us an experiment (id)-dependent # batch offset to calculate the correct batch from image number. experiment.data["batch_offset"] = flex.int(len(experiment.data["id"]), batch_offset) # Calculate whether we have a ROT value for this experiment, and set the column _, _, z = experiment.data["xyzcal.px"].parts() if experiment.scan: experiment.data[ "ROT"] = experiment.scan.get_angle_from_array_index(z) else: experiment.data["ROT"] = z mtz_writer.add_crystal( crystal_name=params.mtz.crystal_name, project_name=params.mtz.project_name, unit_cell=best_unit_cell, ) # Note: add unit cell here as may have changed basis since creating mtz. # For multi-wave unmerged mtz, we add an empty dataset for each wavelength, # but only write the data into the final dataset (for unmerged the batches # link the unmerged data to the individual wavelengths). for wavelength in wavelengths: mtz_writer.add_empty_dataset(wavelength) # Combine all of the experiment data columns before writing combined_data = { k: v.deep_copy() for k, v in experiment_list[0].data.items() } for experiment in experiment_list[1:]: for k, v in experiment.data.items(): combined_data[k].extend(v) # ALL columns must be the same length assert len({len(v) for v in combined_data.values() }) == 1, "Column length mismatch" assert len(combined_data["id"]) == len( integrated_data["id"]), "Lost rows in split/combine" # Write all the data and columns to the mtz file mtz_writer.write_columns(combined_data) logger.info("Saving {} integrated reflections to {}".format( len(combined_data["id"]), params.mtz.hklout)) mtz_file = mtz_writer.mtz_file mtz_file.write(params.mtz.hklout) return mtz_file
def print_scaling_summary(script): """Log summary information after scaling.""" logger.info(print_scaling_model_error_summary(script.experiments)) valid_ranges = get_valid_image_ranges(script.experiments) image_ranges = get_image_ranges(script.experiments) msg = [] for (img, valid, refl) in zip(image_ranges, valid_ranges, script.reflections): if valid: if len(valid ) > 1 or valid[0][0] != img[0] or valid[-1][1] != img[1]: msg.append( "Excluded images for experiment id: %s, image range: %s, limited range: %s" % ( refl.experiment_identifiers().keys()[0], list(img), list(valid), )) if msg: msg = ["Summary of image ranges removed:"] + msg logger.info("\n".join(msg)) # report on partiality of dataset partials = flex.double() for r in script.reflections: if "partiality" in r: partials.extend(r["partiality"]) not_full_sel = partials < 0.99 not_zero_sel = partials > 0.01 gt_half = partials > 0.5 lt_half = partials < 0.5 partial_gt_half_sel = not_full_sel & gt_half partial_lt_half_sel = not_zero_sel & lt_half logger.info("Summary of dataset partialities") header = ["Partiality (p)", "n_refl"] rows = [ ["all reflections", str(partials.size())], ["p > 0.99", str(not_full_sel.count(False))], ["0.5 < p < 0.99", str(partial_gt_half_sel.count(True))], ["0.01 < p < 0.5", str(partial_lt_half_sel.count(True))], ["p < 0.01", str(not_zero_sel.count(False))], ] logger.info(tabulate(rows, header)) logger.info( """ Reflections below a partiality_cutoff of %s are not considered for any part of the scaling analysis or for the reporting of merging statistics. Additionally, if applicable, only reflections with a min_partiality > %s were considered for use when refining the scaling model. """, script.params.cut_data.partiality_cutoff, script.params.reflection_selection.min_partiality, ) stats = script.merging_statistics_result if stats: anom_stats, cut_stats, cut_anom_stats = (None, None, None) if not script.scaled_miller_array.space_group().is_centric(): anom_stats = script.anom_merging_statistics_result logger.info(make_merging_statistics_summary(stats)) try: d_min = resolution_cc_half(stats, limit=0.3).d_min except RuntimeError as e: logger.debug(f"Resolution fit failed: {e}") else: max_current_res = stats.bins[-1].d_min if d_min and d_min - max_current_res > 0.005: logger.info( "Resolution limit suggested from CC" + "\u00BD" + " fit (limit CC" + "\u00BD" + "=0.3): %.2f", d_min, ) try: cut_stats, cut_anom_stats = merging_stats_from_scaled_array( script.scaled_miller_array.resolution_filter( d_min=d_min), script.params.output.merging.nbins, script.params.output.use_internal_variance, ) except DialsMergingStatisticsError: pass else: if script.scaled_miller_array.space_group().is_centric(): cut_anom_stats = None logger.info( table_1_summary(stats, anom_stats, cut_stats, cut_anom_stats))