def reflection_tables_to_batch_dependent_properties(reflection_tables, experiments, scaled_array=None): """Extract batch dependent properties from a reflection table list.""" offsets = calculate_batch_offsets(experiments) reflection_tables = assign_batches_to_reflections(reflection_tables, offsets) # filter bad refls and negative scales batches = flex.int() scales = flex.double() for r in reflection_tables: sel = ~r.get_flags(r.flags.bad_for_scaling, all=False) sel &= r["inverse_scale_factor"] > 0 batches.extend(r["batch"].select(sel)) scales.extend(r["inverse_scale_factor"].select(sel)) if not scaled_array: scaled_array = scaled_data_as_miller_array(reflection_tables, experiments) ms = scaled_array.customized_copy() batch_array = miller.array(ms, data=batches) batch_ranges = get_batch_ranges(experiments, offsets) batch_data = [{"id": i, "range": r} for i, r in enumerate(batch_ranges)] properties = batch_dependent_properties(batch_array, scaled_array, miller.array(ms, data=scales)) return properties + (batch_data, )
def test_calculate_batch_offsets(): """Test offset calculation. Offset is next number ending in 01 bigger than previous batch numbers which is not consecutive""" scan = Scan(image_range=[1, 200], oscillation=[0.0, 1.0]) exp1 = Experiment(scan=scan) exp2 = Experiment() offsets = calculate_batch_offsets([exp1, exp2]) assert offsets == [0, 301]
def from_reflections_and_experiments(cls, reflection_tables, experiments, params): """Construct the resolutionizer from native dials datatypes.""" # add some assertions about data # do batch assignment (same functions as in dials.export) offsets = calculate_batch_offsets(experiments) reflection_tables = assign_batches_to_reflections( reflection_tables, offsets) batches = flex.int() intensities = flex.double() indices = flex.miller_index() variances = flex.double() for table in reflection_tables: if "intensity.scale.value" in table: table = filter_reflection_table(table, ["scale"], partiality_threshold=0.4) intensities.extend(table["intensity.scale.value"]) variances.extend(table["intensity.scale.variance"]) else: table = filter_reflection_table(table, ["profile"], partiality_threshold=0.4) intensities.extend(table["intensity.prf.value"]) variances.extend(table["intensity.prf.variance"]) indices.extend(table["miller_index"]) batches.extend(table["batch"]) crystal_symmetry = miller.crystal.symmetry( unit_cell=determine_best_unit_cell(experiments), space_group=experiments[0].crystal.get_space_group(), assert_is_compatible_unit_cell=False, ) miller_set = miller.set(crystal_symmetry, indices, anomalous_flag=False) i_obs = miller.array(miller_set, data=intensities, sigmas=flex.sqrt(variances)) i_obs.set_observation_type_xray_intensity() i_obs.set_info(miller.array_info(source="DIALS", source_type="refl")) ms = i_obs.customized_copy() batch_array = miller.array(ms, data=batches) if params.reference is not None: reference, _ = miller_array_from_mtz(params.reference, anomalous=params.anomalous, labels=params.labels) else: reference = None return cls(i_obs, params, batches=batch_array, reference=reference)
def test_calculate_batch_ranges(self): assert self._run_ranges([(1, 1)]) == [(1, 1)] # Zero is shifted assert all( [x > 0 for x in self._run_ranges_to_set([(0, 0)])] ), "Should be no zeroth/negative batch" assert not set(self._run_ranges([(1, 1), (1, 1)])) == { (1, 1) }, "Overlapping simple ranges" data_tests = [ [(1, 1), (1, 1)], # while we decide behaviour, remove input # [(1, 1), (8, 8), (9, 9)], [(23, 24), (70, 100), (1, 1), (1, 4), (1, 1)], [(0, 98)], ] for data in data_tests: print("Running ", data) print(" ", self._run_ranges(data)) assert all( [ float(x).is_integer() for x in itertools.chain(*self._run_ranges(data)) ] ), "Fractional epochs" assert all( isinstance(x, int) for x in itertools.chain(*self._run_ranges(data)) ), "Not all true integers" assert all( [x > 0 for x in self._run_ranges_to_set([(0, 0)])] ), "Should be no zeroth/negative batch" assert not has_consecutive_ranges(self._run_ranges(data)) exp1 = TestBatchRangeCalculations.MockExperiment((1, 1), scan=False) exp2 = TestBatchRangeCalculations.MockExperiment((1, 1), scan=False) offsets = calculate_batch_offsets([exp1, exp2]) assert all(float(x).is_integer() for x in offsets) assert all(isinstance(x, int) for x in offsets) assert all(x > 0 for x in offsets)
def export_mtz(integrated_data, experiment_list, params): """Export data from integrated_data corresponding to experiment_list to an MTZ file hklout.""" # if mtz filename is auto, then choose scaled.mtz or integrated.mtz if params.mtz.hklout in (None, Auto, "auto"): if ("intensity.scale.value" in integrated_data) and ("intensity.scale.variance" in integrated_data): params.mtz.hklout = "scaled.mtz" logger.info( "Data appears to be scaled, setting mtz.hklout = 'scaled.mtz'") else: params.mtz.hklout = "integrated.mtz" logger.info( "Data appears to be unscaled, setting mtz.hklout = 'integrated.mtz'" ) # First get the experiment identifier information out of the data expids_in_table = integrated_data.experiment_identifiers() if not list(expids_in_table.keys()): reflection_tables = parse_multiple_datasets([integrated_data]) experiment_list, refl_list = assign_unique_identifiers( experiment_list, reflection_tables) integrated_data = flex.reflection_table() for reflections in refl_list: integrated_data.extend(reflections) expids_in_table = integrated_data.experiment_identifiers() integrated_data.assert_experiment_identifiers_are_consistent( experiment_list) expids_in_list = list(experiment_list.identifiers()) # Convert experiment_list to a real python list or else identity assumptions # fail like: # assert experiment_list[0] is experiment_list[0] # And assumptions about added attributes break experiment_list = list(experiment_list) # Validate multi-experiment assumptions if len(experiment_list) > 1: # All experiments should match crystals, or else we need multiple crystals/datasets if not all(x.crystal == experiment_list[0].crystal for x in experiment_list[1:]): logger.warning( "Experiment crystals differ. Using first experiment crystal for file-level data." ) wavelengths = match_wavelengths(experiment_list) if len(wavelengths.keys()) > 1: logger.info( "Multiple wavelengths found: \n%s", "\n".join(" Wavlength: %.5f, experiment numbers: %s " % (k, ",".join(map(str, v))) for k, v in wavelengths.items()), ) else: wavelengths = OrderedDict( {experiment_list[0].beam.get_wavelength(): [0]}) # also only work correctly with one panel (for the moment) if any(len(experiment.detector) != 1 for experiment in experiment_list): logger.warning("Ignoring multiple panels in output MTZ") best_unit_cell = params.mtz.best_unit_cell if best_unit_cell is None: best_unit_cell = determine_best_unit_cell(experiment_list) integrated_data["d"] = best_unit_cell.d(integrated_data["miller_index"]) # Clean up the data with the passed in options integrated_data = filter_reflection_table( integrated_data, intensity_choice=params.intensity, partiality_threshold=params.mtz.partiality_threshold, combine_partials=params.mtz.combine_partials, min_isigi=params.mtz.min_isigi, filter_ice_rings=params.mtz.filter_ice_rings, d_min=params.mtz.d_min, ) # get batch offsets and image ranges - even for scanless experiments batch_offsets = [ expt.scan.get_batch_offset() for expt in experiment_list if expt.scan is not None ] unique_offsets = set(batch_offsets) if len(set(unique_offsets)) <= 1: logger.debug("Calculating new batches") batch_offsets = calculate_batch_offsets(experiment_list) batch_starts = [ e.scan.get_image_range()[0] if e.scan else 0 for e in experiment_list ] effective_offsets = [ o + s for o, s in zip(batch_offsets, batch_starts) ] unique_offsets = set(effective_offsets) else: logger.debug("Keeping existing batches") image_ranges = get_image_ranges(experiment_list) if len(unique_offsets) != len(batch_offsets): raise ValueError("Duplicate batch offsets detected: %s" % ", ".join( str(item) for item, count in Counter(batch_offsets).items() if count > 1)) # Create the mtz file mtz_writer = UnmergedMTZWriter( experiment_list[0].crystal.get_space_group()) # FIXME TODO for more than one experiment into an MTZ file: # # - add an epoch (or recover an epoch) from the scan and add this as an extra # column to the MTZ file for scaling, so we know that the two lattices were # integrated at the same time # ✓ decide a sensible BATCH increment to apply to the BATCH value between # experiments and add this for id_ in expids_in_table.keys(): # Grab our subset of the data loc = expids_in_list.index( expids_in_table[id_]) # get strid and use to find loc in list experiment = experiment_list[loc] if len(list(wavelengths.keys())) > 1: for i, (wl, exps) in enumerate(wavelengths.items()): if loc in exps: wavelength = wl dataset_id = i + 1 break else: wavelength = list(wavelengths.keys())[0] dataset_id = 1 reflections = integrated_data.select(integrated_data["id"] == id_) batch_offset = batch_offsets[loc] image_range = image_ranges[loc] reflections = assign_batches_to_reflections([reflections], [batch_offset])[0] experiment.data = dict(reflections) s0n = matrix.col(experiment.beam.get_s0()).normalize().elems logger.debug("Beam vector: %.4f %.4f %.4f" % s0n) mtz_writer.add_batch_list( image_range, experiment, wavelength, dataset_id, batch_offset=batch_offset, force_static_model=params.mtz.force_static_model, ) # Create the batch offset array. This gives us an experiment (id)-dependent # batch offset to calculate the correct batch from image number. experiment.data["batch_offset"] = flex.int(len(experiment.data["id"]), batch_offset) # Calculate whether we have a ROT value for this experiment, and set the column _, _, z = experiment.data["xyzcal.px"].parts() if experiment.scan: experiment.data[ "ROT"] = experiment.scan.get_angle_from_array_index(z) else: experiment.data["ROT"] = z mtz_writer.add_crystal( crystal_name=params.mtz.crystal_name, project_name=params.mtz.project_name, unit_cell=best_unit_cell, ) # Note: add unit cell here as may have changed basis since creating mtz. # For multi-wave unmerged mtz, we add an empty dataset for each wavelength, # but only write the data into the final dataset (for unmerged the batches # link the unmerged data to the individual wavelengths). for wavelength in wavelengths: mtz_writer.add_empty_dataset(wavelength) # Combine all of the experiment data columns before writing combined_data = { k: v.deep_copy() for k, v in experiment_list[0].data.items() } for experiment in experiment_list[1:]: for k, v in experiment.data.items(): combined_data[k].extend(v) # ALL columns must be the same length assert len({len(v) for v in combined_data.values() }) == 1, "Column length mismatch" assert len(combined_data["id"]) == len( integrated_data["id"]), "Lost rows in split/combine" # Write all the data and columns to the mtz file mtz_writer.write_columns(combined_data) logger.info("Saving {} integrated reflections to {}".format( len(combined_data["id"]), params.mtz.hklout)) mtz_file = mtz_writer.mtz_file mtz_file.write(params.mtz.hklout) return mtz_file
def _run_ranges(self, ranges): """Convenience method to run the routine with a minimal experiment, and return the result as ranges of batch number""" input_data = [self.MockExperiment(x) for x in ranges] return offset_ranges(calculate_batch_offsets(input_data), ranges)
def _scale_prepare(self): """Perform all of the preparation required to deliver the scaled data. This should sort together the reflection files, ensure that they are correctly indexed (via dials.symmetry) and generally tidy things up.""" # AIM discover symmetry and reindex with dials.symmetry, and set the correct # reflections in si.reflections, si.experiments self._helper.set_working_directory(self.get_working_directory()) self._factory.set_working_directory(self.get_working_directory()) need_to_return = False self._sweep_handler = SweepInformationHandler(self._scalr_integraters) p, x = self._sweep_handler.get_project_info() self._scalr_pname = p self._scalr_xname = x self._helper.set_pname_xname(p, x) Journal.block( "gathering", self.get_scaler_xcrystal().get_name(), "Dials", {"working directory": self.get_working_directory()}, ) # First do stuff to work out if excluding any data # Note - does this actually work? I couldn't seem to get it to work # in either this pipeline or the standard dials pipeline for epoch in self._sweep_handler.get_epochs(): si = self._sweep_handler.get_sweep_information(epoch) intgr = si.get_integrater() _, xname, dname = si.get_project_info() sname = si.get_sweep_name() exclude_sweep = False for sweep in PhilIndex.params.xia2.settings.sweep: if sweep.id == sname and sweep.exclude: exclude_sweep = True break if exclude_sweep: self._sweep_handler.remove_epoch(epoch) Debug.write("Excluding sweep %s" % sname) else: Journal.entry({"adding data from": "%s/%s/%s" % (xname, dname, sname)}) # If multiple files, want to run symmetry to check for consistent indexing # also # try to reproduce what CCP4ScalerA is doing # first assign identifiers to avoid dataset-id collisions # Idea is that this should be called anytime you get data anew from the # integrater, to intercept and assign unique ids, then set in the # sweep_information (si) and always use si.set_reflections/ # si.get_reflections as we process. # self._sweep_handler = self._helper.assign_and_return_datasets( # self._sweep_handler # ) symmetry now sorts out identifiers. need_to_return = False if self._scalr_input_pointgroup: self._input_pointgroup_scale_prepare() elif ( len(self._sweep_handler.get_epochs()) > 1 and PhilIndex.params.xia2.settings.multi_sweep_indexing ): need_to_return = self._multi_sweep_scale_prepare() else: need_to_return = self._standard_scale_prepare() if need_to_return: self.set_scaler_done(False) self.set_scaler_prepare_done(False) return ### After this point, point group is good and only need to ### reindex to consistent setting. Don't need to call back to the ### integator, just use the data in the sweep info. # First work out if we're going to reindex against external reference param = PhilIndex.params.xia2.settings.scale using_external_references = False reference_refl = None reference_expt = None if param.reference_reflection_file: if not param.reference_experiment_file: Chatter.write( """ No DIALS reference experiments file provided, reference reflection file will not be used. Reference mtz files for reindexing not currently supported for pipeline=dials (supported for pipeline=dials-aimless). """ ) else: reference_refl = param.reference_reflection_file reference_expt = param.reference_experiment_file using_external_references = True Debug.write("Using reference reflections %s" % reference_refl) Debug.write("Using reference experiments %s" % reference_expt) if len(self._sweep_handler.get_epochs()) > 1: if PhilIndex.params.xia2.settings.unify_setting: self.unify_setting() if PhilIndex.params.xia2.settings.use_brehm_diederichs: self.brehm_diederichs_reindexing() # If not using Brehm-deidrichs reindexing, set reference as first # sweep, unless using external reference. elif not using_external_references: Debug.write("First sweep will be used as reference for reindexing") first = self._sweep_handler.get_epochs()[0] si = self._sweep_handler.get_sweep_information(first) reference_expt = si.get_experiments() reference_refl = si.get_reflections() # Now reindex to be consistent with first dataset - run reindex on each # dataset with reference (unless did brehm diederichs and didn't supply # a reference file) if reference_refl and reference_expt: exp = load.experiment_list(reference_expt) reference_cell = exp[0].crystal.get_unit_cell().parameters() # ---------- REINDEX TO CORRECT (REFERENCE) SETTING ---------- Chatter.write("Reindexing all datasets to common reference") if using_external_references: epochs = self._sweep_handler.get_epochs() else: epochs = self._sweep_handler.get_epochs()[1:] for epoch in epochs: # if we are working with unified UB matrix then this should not # be a problem here (note, *if*; *should*) # what about e.g. alternative P1 settings? # see JIRA MXSW-904 if PhilIndex.params.xia2.settings.unify_setting: continue reindexer = DialsReindex() reindexer.set_working_directory(self.get_working_directory()) auto_logfiler(reindexer) si = self._sweep_handler.get_sweep_information(epoch) reindexer.set_reference_filename(reference_expt) reindexer.set_reference_reflections(reference_refl) reindexer.set_indexed_filename(si.get_reflections()) reindexer.set_experiments_filename(si.get_experiments()) reindexer.run() # At this point, CCP4ScalerA would reset in integrator so that # the integrater calls reindex, no need to do that here as # have access to the files and will never need to reintegrate. si.set_reflections(reindexer.get_reindexed_reflections_filename()) si.set_experiments(reindexer.get_reindexed_experiments_filename()) # FIXME how to get some indication of the reindexing used? exp = load.experiment_list( reindexer.get_reindexed_experiments_filename() ) cell = exp[0].crystal.get_unit_cell().parameters() # Note - no lattice check as this will already be caught by reindex Debug.write("Cell: %.2f %.2f %.2f %.2f %.2f %.2f" % cell) Debug.write("Ref: %.2f %.2f %.2f %.2f %.2f %.2f" % reference_cell) for j in range(6): if ( math.fabs((cell[j] - reference_cell[j]) / reference_cell[j]) > 0.1 ): raise RuntimeError( "unit cell parameters differ in %s and %s" % (reference_expt, si.get_reflections()) ) # Now make sure all batches ok before finish preparing # This should be made safer, currently after dials.scale there is no # concept of 'batch', dials.export uses the calculate_batch_offsets # to assign batches, giving the same result as below. experiments_to_rebatch = [] for epoch in self._sweep_handler.get_epochs(): si = self._sweep_handler.get_sweep_information(epoch) experiment = si.get_experiments() experiments_to_rebatch.append(load.experiment_list(experiment)[0]) offsets = calculate_batch_offsets(experiments_to_rebatch) for i, epoch in enumerate(self._sweep_handler.get_epochs()): si = self._sweep_handler.get_sweep_information(epoch) r = si.get_batch_range() si.set_batch_offset(offsets[i]) si.set_batches([r[0] + offsets[i], r[1] + offsets[i]])