def test_assign_batches_to_reflections(): """Test for namesake function""" reflections = [reflections_3(), reflections_3()] reflections = assign_batches_to_reflections(reflections, batch_offsets=[0, 100]) assert list(reflections[0]["batch"]) == [1, 2] assert list(reflections[1]["batch"]) == [101, 102]
def reflection_tables_to_batch_dependent_properties(reflection_tables, experiments, scaled_array=None): """Extract batch dependent properties from a reflection table list.""" offsets = calculate_batch_offsets(experiments) reflection_tables = assign_batches_to_reflections(reflection_tables, offsets) # filter bad refls and negative scales batches = flex.int() scales = flex.double() for r in reflection_tables: sel = ~r.get_flags(r.flags.bad_for_scaling, all=False) sel &= r["inverse_scale_factor"] > 0 batches.extend(r["batch"].select(sel)) scales.extend(r["inverse_scale_factor"].select(sel)) if not scaled_array: scaled_array = scaled_data_as_miller_array(reflection_tables, experiments) ms = scaled_array.customized_copy() batch_array = miller.array(ms, data=batches) batch_ranges = get_batch_ranges(experiments, offsets) batch_data = [{"id": i, "range": r} for i, r in enumerate(batch_ranges)] properties = batch_dependent_properties(batch_array, scaled_array, miller.array(ms, data=scales)) return properties + (batch_data, )
def from_reflections_and_experiments(cls, reflection_tables, experiments, params): """Construct the resolutionizer from native dials datatypes.""" # add some assertions about data # do batch assignment (same functions as in dials.export) offsets = calculate_batch_offsets(experiments) reflection_tables = assign_batches_to_reflections( reflection_tables, offsets) batches = flex.int() intensities = flex.double() indices = flex.miller_index() variances = flex.double() for table in reflection_tables: if "intensity.scale.value" in table: table = filter_reflection_table(table, ["scale"], partiality_threshold=0.4) intensities.extend(table["intensity.scale.value"]) variances.extend(table["intensity.scale.variance"]) else: table = filter_reflection_table(table, ["profile"], partiality_threshold=0.4) intensities.extend(table["intensity.prf.value"]) variances.extend(table["intensity.prf.variance"]) indices.extend(table["miller_index"]) batches.extend(table["batch"]) crystal_symmetry = miller.crystal.symmetry( unit_cell=determine_best_unit_cell(experiments), space_group=experiments[0].crystal.get_space_group(), assert_is_compatible_unit_cell=False, ) miller_set = miller.set(crystal_symmetry, indices, anomalous_flag=False) i_obs = miller.array(miller_set, data=intensities, sigmas=flex.sqrt(variances)) i_obs.set_observation_type_xray_intensity() i_obs.set_info(miller.array_info(source="DIALS", source_type="refl")) ms = i_obs.customized_copy() batch_array = miller.array(ms, data=batches) if params.reference is not None: reference, _ = miller_array_from_mtz(params.reference, anomalous=params.anomalous, labels=params.labels) else: reference = None return cls(i_obs, params, batches=batch_array, reference=reference)
def reflections_as_miller_arrays(self, combined=False): from dials.util.batch_handling import ( # calculate_batch_offsets, # get_batch_ranges, assign_batches_to_reflections, ) from dials.report.analysis import scaled_data_as_miller_array # offsets = calculate_batch_offsets(experiments) reflection_tables = [] for id_ in set(self._reflections["id"]).difference({-1}): reflection_tables.append( self._reflections.select(self._reflections["id"] == id_)) offsets = [expt.scan.get_batch_offset() for expt in self._experiments] reflection_tables = assign_batches_to_reflections( reflection_tables, offsets) if combined: # filter bad refls and negative scales batches = flex.int() scales = flex.double() for r in reflection_tables: sel = ~r.get_flags(r.flags.bad_for_scaling, all=False) sel &= r["inverse_scale_factor"] > 0 batches.extend(r["batch"].select(sel)) scales.extend(r["inverse_scale_factor"].select(sel)) scaled_array = scaled_data_as_miller_array(reflection_tables, self._experiments) batch_array = miller.array(scaled_array, data=batches) scale_array = miller.array(scaled_array, data=scales) return scaled_array, batch_array, scale_array else: scaled_arrays = [] batch_arrays = [] scale_arrays = [] for expt, r in zip(self._experiments, reflection_tables): sel = ~r.get_flags(r.flags.bad_for_scaling, all=False) sel &= r["inverse_scale_factor"] > 0 batches = r["batch"].select(sel) scales = r["inverse_scale_factor"].select(sel) scaled_arrays.append(scaled_data_as_miller_array([r], [expt])) batch_arrays.append( miller.array(scaled_arrays[-1], data=batches)) scale_arrays.append( miller.array(scaled_arrays[-1], data=scales)) return scaled_arrays, batch_arrays, scale_arrays
def export_mtz(integrated_data, experiment_list, params): """Export data from integrated_data corresponding to experiment_list to an MTZ file hklout.""" # if mtz filename is auto, then choose scaled.mtz or integrated.mtz if params.mtz.hklout in (None, Auto, "auto"): if ("intensity.scale.value" in integrated_data) and ("intensity.scale.variance" in integrated_data): params.mtz.hklout = "scaled.mtz" logger.info( "Data appears to be scaled, setting mtz.hklout = 'scaled.mtz'") else: params.mtz.hklout = "integrated.mtz" logger.info( "Data appears to be unscaled, setting mtz.hklout = 'integrated.mtz'" ) # First get the experiment identifier information out of the data expids_in_table = integrated_data.experiment_identifiers() if not list(expids_in_table.keys()): reflection_tables = parse_multiple_datasets([integrated_data]) experiment_list, refl_list = assign_unique_identifiers( experiment_list, reflection_tables) integrated_data = flex.reflection_table() for reflections in refl_list: integrated_data.extend(reflections) expids_in_table = integrated_data.experiment_identifiers() integrated_data.assert_experiment_identifiers_are_consistent( experiment_list) expids_in_list = list(experiment_list.identifiers()) # Convert experiment_list to a real python list or else identity assumptions # fail like: # assert experiment_list[0] is experiment_list[0] # And assumptions about added attributes break experiment_list = list(experiment_list) # Validate multi-experiment assumptions if len(experiment_list) > 1: # All experiments should match crystals, or else we need multiple crystals/datasets if not all(x.crystal == experiment_list[0].crystal for x in experiment_list[1:]): logger.warning( "Experiment crystals differ. Using first experiment crystal for file-level data." ) wavelengths = match_wavelengths(experiment_list) if len(wavelengths.keys()) > 1: logger.info( "Multiple wavelengths found: \n%s", "\n".join(" Wavlength: %.5f, experiment numbers: %s " % (k, ",".join(map(str, v))) for k, v in wavelengths.items()), ) else: wavelengths = OrderedDict( {experiment_list[0].beam.get_wavelength(): [0]}) # also only work correctly with one panel (for the moment) if any(len(experiment.detector) != 1 for experiment in experiment_list): logger.warning("Ignoring multiple panels in output MTZ") best_unit_cell = params.mtz.best_unit_cell if best_unit_cell is None: best_unit_cell = determine_best_unit_cell(experiment_list) integrated_data["d"] = best_unit_cell.d(integrated_data["miller_index"]) # Clean up the data with the passed in options integrated_data = filter_reflection_table( integrated_data, intensity_choice=params.intensity, partiality_threshold=params.mtz.partiality_threshold, combine_partials=params.mtz.combine_partials, min_isigi=params.mtz.min_isigi, filter_ice_rings=params.mtz.filter_ice_rings, d_min=params.mtz.d_min, ) # get batch offsets and image ranges - even for scanless experiments batch_offsets = [ expt.scan.get_batch_offset() for expt in experiment_list if expt.scan is not None ] unique_offsets = set(batch_offsets) if len(set(unique_offsets)) <= 1: logger.debug("Calculating new batches") batch_offsets = calculate_batch_offsets(experiment_list) batch_starts = [ e.scan.get_image_range()[0] if e.scan else 0 for e in experiment_list ] effective_offsets = [ o + s for o, s in zip(batch_offsets, batch_starts) ] unique_offsets = set(effective_offsets) else: logger.debug("Keeping existing batches") image_ranges = get_image_ranges(experiment_list) if len(unique_offsets) != len(batch_offsets): raise ValueError("Duplicate batch offsets detected: %s" % ", ".join( str(item) for item, count in Counter(batch_offsets).items() if count > 1)) # Create the mtz file mtz_writer = UnmergedMTZWriter( experiment_list[0].crystal.get_space_group()) # FIXME TODO for more than one experiment into an MTZ file: # # - add an epoch (or recover an epoch) from the scan and add this as an extra # column to the MTZ file for scaling, so we know that the two lattices were # integrated at the same time # ✓ decide a sensible BATCH increment to apply to the BATCH value between # experiments and add this for id_ in expids_in_table.keys(): # Grab our subset of the data loc = expids_in_list.index( expids_in_table[id_]) # get strid and use to find loc in list experiment = experiment_list[loc] if len(list(wavelengths.keys())) > 1: for i, (wl, exps) in enumerate(wavelengths.items()): if loc in exps: wavelength = wl dataset_id = i + 1 break else: wavelength = list(wavelengths.keys())[0] dataset_id = 1 reflections = integrated_data.select(integrated_data["id"] == id_) batch_offset = batch_offsets[loc] image_range = image_ranges[loc] reflections = assign_batches_to_reflections([reflections], [batch_offset])[0] experiment.data = dict(reflections) s0n = matrix.col(experiment.beam.get_s0()).normalize().elems logger.debug("Beam vector: %.4f %.4f %.4f" % s0n) mtz_writer.add_batch_list( image_range, experiment, wavelength, dataset_id, batch_offset=batch_offset, force_static_model=params.mtz.force_static_model, ) # Create the batch offset array. This gives us an experiment (id)-dependent # batch offset to calculate the correct batch from image number. experiment.data["batch_offset"] = flex.int(len(experiment.data["id"]), batch_offset) # Calculate whether we have a ROT value for this experiment, and set the column _, _, z = experiment.data["xyzcal.px"].parts() if experiment.scan: experiment.data[ "ROT"] = experiment.scan.get_angle_from_array_index(z) else: experiment.data["ROT"] = z mtz_writer.add_crystal( crystal_name=params.mtz.crystal_name, project_name=params.mtz.project_name, unit_cell=best_unit_cell, ) # Note: add unit cell here as may have changed basis since creating mtz. # For multi-wave unmerged mtz, we add an empty dataset for each wavelength, # but only write the data into the final dataset (for unmerged the batches # link the unmerged data to the individual wavelengths). for wavelength in wavelengths: mtz_writer.add_empty_dataset(wavelength) # Combine all of the experiment data columns before writing combined_data = { k: v.deep_copy() for k, v in experiment_list[0].data.items() } for experiment in experiment_list[1:]: for k, v in experiment.data.items(): combined_data[k].extend(v) # ALL columns must be the same length assert len({len(v) for v in combined_data.values() }) == 1, "Column length mismatch" assert len(combined_data["id"]) == len( integrated_data["id"]), "Lost rows in split/combine" # Write all the data and columns to the mtz file mtz_writer.write_columns(combined_data) logger.info("Saving {} integrated reflections to {}".format( len(combined_data["id"]), params.mtz.hklout)) mtz_file = mtz_writer.mtz_file mtz_file.write(params.mtz.hklout) return mtz_file
def filtered_arrays_from_experiments_reflections( experiments, reflections, outlier_rejection_after_filter=False, partiality_threshold=0.99, return_batches=False, ): """Create a list of filtered arrays from experiments and reflections. A partiality threshold can be set, and if outlier_rejection_after_filter is True, and intensity.scale values are not present, then a round of outlier rejection will take place. Raises: ValueError: if no datasets remain after filtering. """ miller_arrays = [] ids_to_del = [] if return_batches: assert all(expt.scan is not None for expt in experiments) batch_offsets = [expt.scan.get_batch_offset() for expt in experiments] reflections = assign_batches_to_reflections(reflections, batch_offsets) batch_arrays = [] for idx, (expt, refl) in enumerate(zip(experiments, reflections)): crystal_symmetry = crystal.symmetry( unit_cell=expt.crystal.get_unit_cell(), space_group=expt.crystal.get_space_group(), ) # want to use scale intensities if present, else sum + prf (if available) if "intensity.scale.value" in refl: intensity_choice = ["scale"] intensity_to_use = "intensity.scale" else: assert "intensity.sum.value" in refl intensity_to_use = "intensity.sum" intensity_choice = ["sum"] if "intensity.prf.value" in refl: intensity_choice.append("profile") intensity_to_use = "intensity.prf" try: logger.info("Filtering reflections for dataset %s", idx) refl = filter_reflection_table( refl, intensity_choice, min_isigi=-5, filter_ice_rings=False, combine_partials=True, partiality_threshold=partiality_threshold, ) except ValueError: logger.info( "Dataset %s removed as no reflections left after filtering", idx) ids_to_del.append(idx) else: # If scale was chosen - will return scale or have raised ValueError # If prf or sum, possible was no prf but want to continue. try: refl["intensity"] = refl[intensity_to_use + ".value"] refl["variance"] = refl[intensity_to_use + ".variance"] except KeyError: # catch case where prf were removed. refl["intensity"] = refl["intensity.sum.value"] refl["variance"] = refl["intensity.sum.variance"] if outlier_rejection_after_filter and intensity_to_use != "intensity.scale": refl = reject_outliers(refl, expt, method="simple", zmax=12.0) refl = refl.select( ~refl.get_flags(refl.flags.outlier_in_scaling)) miller_set = miller.set(crystal_symmetry, refl["miller_index"], anomalous_flag=False) intensities = miller_set.array(data=refl["intensity"], sigmas=flex.sqrt(refl["variance"])) intensities.set_observation_type_xray_intensity() intensities.set_info( miller.array_info(source="DIALS", source_type="pickle")) miller_arrays.append(intensities) if return_batches: batch_arrays.append( miller_set.array(data=refl["batch"]).set_info( intensities.info())) if not miller_arrays: raise ValueError( """No datasets remain after pre-filtering. Please check input data. The datasets may not contain any full reflections; the command line option partiality_threshold can be lowered to include partials.""") for id_ in ids_to_del[::-1]: del experiments[id_] del reflections[id_] if return_batches: return miller_arrays, batch_arrays return miller_arrays