Example #1
0
def test_assign_batches_to_reflections():
    """Test for namesake function"""
    reflections = [reflections_3(), reflections_3()]
    reflections = assign_batches_to_reflections(reflections,
                                                batch_offsets=[0, 100])
    assert list(reflections[0]["batch"]) == [1, 2]
    assert list(reflections[1]["batch"]) == [101, 102]
Example #2
0
def reflection_tables_to_batch_dependent_properties(reflection_tables,
                                                    experiments,
                                                    scaled_array=None):
    """Extract batch dependent properties from a reflection table list."""
    offsets = calculate_batch_offsets(experiments)
    reflection_tables = assign_batches_to_reflections(reflection_tables,
                                                      offsets)
    # filter bad refls and negative scales
    batches = flex.int()
    scales = flex.double()
    for r in reflection_tables:
        sel = ~r.get_flags(r.flags.bad_for_scaling, all=False)
        sel &= r["inverse_scale_factor"] > 0
        batches.extend(r["batch"].select(sel))
        scales.extend(r["inverse_scale_factor"].select(sel))
    if not scaled_array:
        scaled_array = scaled_data_as_miller_array(reflection_tables,
                                                   experiments)
    ms = scaled_array.customized_copy()
    batch_array = miller.array(ms, data=batches)

    batch_ranges = get_batch_ranges(experiments, offsets)
    batch_data = [{"id": i, "range": r} for i, r in enumerate(batch_ranges)]

    properties = batch_dependent_properties(batch_array, scaled_array,
                                            miller.array(ms, data=scales))

    return properties + (batch_data, )
Example #3
0
    def from_reflections_and_experiments(cls, reflection_tables, experiments,
                                         params):
        """Construct the resolutionizer from native dials datatypes."""
        # add some assertions about data

        # do batch assignment (same functions as in dials.export)
        offsets = calculate_batch_offsets(experiments)
        reflection_tables = assign_batches_to_reflections(
            reflection_tables, offsets)
        batches = flex.int()
        intensities = flex.double()
        indices = flex.miller_index()
        variances = flex.double()
        for table in reflection_tables:
            if "intensity.scale.value" in table:
                table = filter_reflection_table(table, ["scale"],
                                                partiality_threshold=0.4)
                intensities.extend(table["intensity.scale.value"])
                variances.extend(table["intensity.scale.variance"])
            else:
                table = filter_reflection_table(table, ["profile"],
                                                partiality_threshold=0.4)
                intensities.extend(table["intensity.prf.value"])
                variances.extend(table["intensity.prf.variance"])
            indices.extend(table["miller_index"])
            batches.extend(table["batch"])

        crystal_symmetry = miller.crystal.symmetry(
            unit_cell=determine_best_unit_cell(experiments),
            space_group=experiments[0].crystal.get_space_group(),
            assert_is_compatible_unit_cell=False,
        )
        miller_set = miller.set(crystal_symmetry,
                                indices,
                                anomalous_flag=False)
        i_obs = miller.array(miller_set,
                             data=intensities,
                             sigmas=flex.sqrt(variances))
        i_obs.set_observation_type_xray_intensity()
        i_obs.set_info(miller.array_info(source="DIALS", source_type="refl"))

        ms = i_obs.customized_copy()
        batch_array = miller.array(ms, data=batches)

        if params.reference is not None:
            reference, _ = miller_array_from_mtz(params.reference,
                                                 anomalous=params.anomalous,
                                                 labels=params.labels)
        else:
            reference = None

        return cls(i_obs, params, batches=batch_array, reference=reference)
Example #4
0
    def reflections_as_miller_arrays(self, combined=False):
        from dials.util.batch_handling import (
            # calculate_batch_offsets,
            # get_batch_ranges,
            assign_batches_to_reflections, )
        from dials.report.analysis import scaled_data_as_miller_array

        # offsets = calculate_batch_offsets(experiments)
        reflection_tables = []
        for id_ in set(self._reflections["id"]).difference({-1}):
            reflection_tables.append(
                self._reflections.select(self._reflections["id"] == id_))

        offsets = [expt.scan.get_batch_offset() for expt in self._experiments]
        reflection_tables = assign_batches_to_reflections(
            reflection_tables, offsets)

        if combined:
            # filter bad refls and negative scales
            batches = flex.int()
            scales = flex.double()

            for r in reflection_tables:
                sel = ~r.get_flags(r.flags.bad_for_scaling, all=False)
                sel &= r["inverse_scale_factor"] > 0
                batches.extend(r["batch"].select(sel))
                scales.extend(r["inverse_scale_factor"].select(sel))
            scaled_array = scaled_data_as_miller_array(reflection_tables,
                                                       self._experiments)
            batch_array = miller.array(scaled_array, data=batches)
            scale_array = miller.array(scaled_array, data=scales)
            return scaled_array, batch_array, scale_array

        else:
            scaled_arrays = []
            batch_arrays = []
            scale_arrays = []
            for expt, r in zip(self._experiments, reflection_tables):
                sel = ~r.get_flags(r.flags.bad_for_scaling, all=False)
                sel &= r["inverse_scale_factor"] > 0
                batches = r["batch"].select(sel)
                scales = r["inverse_scale_factor"].select(sel)
                scaled_arrays.append(scaled_data_as_miller_array([r], [expt]))
                batch_arrays.append(
                    miller.array(scaled_arrays[-1], data=batches))
                scale_arrays.append(
                    miller.array(scaled_arrays[-1], data=scales))
            return scaled_arrays, batch_arrays, scale_arrays
Example #5
0
def export_mtz(integrated_data, experiment_list, params):
    """Export data from integrated_data corresponding to experiment_list to an
    MTZ file hklout."""

    # if mtz filename is auto, then choose scaled.mtz or integrated.mtz
    if params.mtz.hklout in (None, Auto, "auto"):
        if ("intensity.scale.value"
                in integrated_data) and ("intensity.scale.variance"
                                         in integrated_data):
            params.mtz.hklout = "scaled.mtz"
            logger.info(
                "Data appears to be scaled, setting mtz.hklout = 'scaled.mtz'")
        else:
            params.mtz.hklout = "integrated.mtz"
            logger.info(
                "Data appears to be unscaled, setting mtz.hklout = 'integrated.mtz'"
            )

    # First get the experiment identifier information out of the data
    expids_in_table = integrated_data.experiment_identifiers()
    if not list(expids_in_table.keys()):
        reflection_tables = parse_multiple_datasets([integrated_data])
        experiment_list, refl_list = assign_unique_identifiers(
            experiment_list, reflection_tables)
        integrated_data = flex.reflection_table()
        for reflections in refl_list:
            integrated_data.extend(reflections)
        expids_in_table = integrated_data.experiment_identifiers()
    integrated_data.assert_experiment_identifiers_are_consistent(
        experiment_list)
    expids_in_list = list(experiment_list.identifiers())

    # Convert experiment_list to a real python list or else identity assumptions
    # fail like:
    #   assert experiment_list[0] is experiment_list[0]
    # And assumptions about added attributes break
    experiment_list = list(experiment_list)

    # Validate multi-experiment assumptions
    if len(experiment_list) > 1:
        # All experiments should match crystals, or else we need multiple crystals/datasets
        if not all(x.crystal == experiment_list[0].crystal
                   for x in experiment_list[1:]):
            logger.warning(
                "Experiment crystals differ. Using first experiment crystal for file-level data."
            )

        wavelengths = match_wavelengths(experiment_list)
        if len(wavelengths.keys()) > 1:
            logger.info(
                "Multiple wavelengths found: \n%s",
                "\n".join("  Wavlength: %.5f, experiment numbers: %s " %
                          (k, ",".join(map(str, v)))
                          for k, v in wavelengths.items()),
            )
    else:
        wavelengths = OrderedDict(
            {experiment_list[0].beam.get_wavelength(): [0]})

    # also only work correctly with one panel (for the moment)
    if any(len(experiment.detector) != 1 for experiment in experiment_list):
        logger.warning("Ignoring multiple panels in output MTZ")

    best_unit_cell = params.mtz.best_unit_cell
    if best_unit_cell is None:
        best_unit_cell = determine_best_unit_cell(experiment_list)
    integrated_data["d"] = best_unit_cell.d(integrated_data["miller_index"])

    # Clean up the data with the passed in options
    integrated_data = filter_reflection_table(
        integrated_data,
        intensity_choice=params.intensity,
        partiality_threshold=params.mtz.partiality_threshold,
        combine_partials=params.mtz.combine_partials,
        min_isigi=params.mtz.min_isigi,
        filter_ice_rings=params.mtz.filter_ice_rings,
        d_min=params.mtz.d_min,
    )

    # get batch offsets and image ranges - even for scanless experiments
    batch_offsets = [
        expt.scan.get_batch_offset() for expt in experiment_list
        if expt.scan is not None
    ]
    unique_offsets = set(batch_offsets)
    if len(set(unique_offsets)) <= 1:
        logger.debug("Calculating new batches")
        batch_offsets = calculate_batch_offsets(experiment_list)
        batch_starts = [
            e.scan.get_image_range()[0] if e.scan else 0
            for e in experiment_list
        ]
        effective_offsets = [
            o + s for o, s in zip(batch_offsets, batch_starts)
        ]
        unique_offsets = set(effective_offsets)
    else:
        logger.debug("Keeping existing batches")
    image_ranges = get_image_ranges(experiment_list)
    if len(unique_offsets) != len(batch_offsets):

        raise ValueError("Duplicate batch offsets detected: %s" % ", ".join(
            str(item)
            for item, count in Counter(batch_offsets).items() if count > 1))

    # Create the mtz file
    mtz_writer = UnmergedMTZWriter(
        experiment_list[0].crystal.get_space_group())

    # FIXME TODO for more than one experiment into an MTZ file:
    #
    # - add an epoch (or recover an epoch) from the scan and add this as an extra
    #   column to the MTZ file for scaling, so we know that the two lattices were
    #   integrated at the same time
    # ✓ decide a sensible BATCH increment to apply to the BATCH value between
    #   experiments and add this

    for id_ in expids_in_table.keys():
        # Grab our subset of the data
        loc = expids_in_list.index(
            expids_in_table[id_])  # get strid and use to find loc in list
        experiment = experiment_list[loc]
        if len(list(wavelengths.keys())) > 1:
            for i, (wl, exps) in enumerate(wavelengths.items()):
                if loc in exps:
                    wavelength = wl
                    dataset_id = i + 1
                    break
        else:
            wavelength = list(wavelengths.keys())[0]
            dataset_id = 1
        reflections = integrated_data.select(integrated_data["id"] == id_)
        batch_offset = batch_offsets[loc]
        image_range = image_ranges[loc]
        reflections = assign_batches_to_reflections([reflections],
                                                    [batch_offset])[0]
        experiment.data = dict(reflections)

        s0n = matrix.col(experiment.beam.get_s0()).normalize().elems
        logger.debug("Beam vector: %.4f %.4f %.4f" % s0n)

        mtz_writer.add_batch_list(
            image_range,
            experiment,
            wavelength,
            dataset_id,
            batch_offset=batch_offset,
            force_static_model=params.mtz.force_static_model,
        )

        # Create the batch offset array. This gives us an experiment (id)-dependent
        # batch offset to calculate the correct batch from image number.
        experiment.data["batch_offset"] = flex.int(len(experiment.data["id"]),
                                                   batch_offset)

        # Calculate whether we have a ROT value for this experiment, and set the column
        _, _, z = experiment.data["xyzcal.px"].parts()
        if experiment.scan:
            experiment.data[
                "ROT"] = experiment.scan.get_angle_from_array_index(z)
        else:
            experiment.data["ROT"] = z

    mtz_writer.add_crystal(
        crystal_name=params.mtz.crystal_name,
        project_name=params.mtz.project_name,
        unit_cell=best_unit_cell,
    )
    # Note: add unit cell here as may have changed basis since creating mtz.
    # For multi-wave unmerged mtz, we add an empty dataset for each wavelength,
    # but only write the data into the final dataset (for unmerged the batches
    # link the unmerged data to the individual wavelengths).
    for wavelength in wavelengths:
        mtz_writer.add_empty_dataset(wavelength)

    # Combine all of the experiment data columns before writing
    combined_data = {
        k: v.deep_copy()
        for k, v in experiment_list[0].data.items()
    }
    for experiment in experiment_list[1:]:
        for k, v in experiment.data.items():
            combined_data[k].extend(v)
    # ALL columns must be the same length
    assert len({len(v)
                for v in combined_data.values()
                }) == 1, "Column length mismatch"
    assert len(combined_data["id"]) == len(
        integrated_data["id"]), "Lost rows in split/combine"

    # Write all the data and columns to the mtz file
    mtz_writer.write_columns(combined_data)

    logger.info("Saving {} integrated reflections to {}".format(
        len(combined_data["id"]), params.mtz.hklout))
    mtz_file = mtz_writer.mtz_file
    mtz_file.write(params.mtz.hklout)

    return mtz_file
Example #6
0
def filtered_arrays_from_experiments_reflections(
    experiments,
    reflections,
    outlier_rejection_after_filter=False,
    partiality_threshold=0.99,
    return_batches=False,
):
    """Create a list of filtered arrays from experiments and reflections.

    A partiality threshold can be set, and if outlier_rejection_after_filter
    is True, and intensity.scale values are not present, then a round of
    outlier rejection will take place.

    Raises:
        ValueError: if no datasets remain after filtering.
    """
    miller_arrays = []
    ids_to_del = []

    if return_batches:
        assert all(expt.scan is not None for expt in experiments)
        batch_offsets = [expt.scan.get_batch_offset() for expt in experiments]
        reflections = assign_batches_to_reflections(reflections, batch_offsets)
        batch_arrays = []

    for idx, (expt, refl) in enumerate(zip(experiments, reflections)):
        crystal_symmetry = crystal.symmetry(
            unit_cell=expt.crystal.get_unit_cell(),
            space_group=expt.crystal.get_space_group(),
        )

        # want to use scale intensities if present, else sum + prf (if available)
        if "intensity.scale.value" in refl:
            intensity_choice = ["scale"]
            intensity_to_use = "intensity.scale"
        else:
            assert "intensity.sum.value" in refl
            intensity_to_use = "intensity.sum"
            intensity_choice = ["sum"]
            if "intensity.prf.value" in refl:
                intensity_choice.append("profile")
                intensity_to_use = "intensity.prf"

        try:
            logger.info("Filtering reflections for dataset %s", idx)
            refl = filter_reflection_table(
                refl,
                intensity_choice,
                min_isigi=-5,
                filter_ice_rings=False,
                combine_partials=True,
                partiality_threshold=partiality_threshold,
            )
        except ValueError:
            logger.info(
                "Dataset %s removed as no reflections left after filtering",
                idx)
            ids_to_del.append(idx)
        else:
            # If scale was chosen - will return scale or have raised ValueError
            # If prf or sum, possible was no prf but want to continue.
            try:
                refl["intensity"] = refl[intensity_to_use + ".value"]
                refl["variance"] = refl[intensity_to_use + ".variance"]
            except KeyError:  # catch case where prf were removed.
                refl["intensity"] = refl["intensity.sum.value"]
                refl["variance"] = refl["intensity.sum.variance"]
            if outlier_rejection_after_filter and intensity_to_use != "intensity.scale":
                refl = reject_outliers(refl, expt, method="simple", zmax=12.0)
                refl = refl.select(
                    ~refl.get_flags(refl.flags.outlier_in_scaling))

            miller_set = miller.set(crystal_symmetry,
                                    refl["miller_index"],
                                    anomalous_flag=False)
            intensities = miller_set.array(data=refl["intensity"],
                                           sigmas=flex.sqrt(refl["variance"]))
            intensities.set_observation_type_xray_intensity()
            intensities.set_info(
                miller.array_info(source="DIALS", source_type="pickle"))
            miller_arrays.append(intensities)
            if return_batches:
                batch_arrays.append(
                    miller_set.array(data=refl["batch"]).set_info(
                        intensities.info()))

    if not miller_arrays:
        raise ValueError(
            """No datasets remain after pre-filtering. Please check input data.
The datasets may not contain any full reflections; the command line
option partiality_threshold can be lowered to include partials.""")

    for id_ in ids_to_del[::-1]:
        del experiments[id_]
        del reflections[id_]

    if return_batches:
        return miller_arrays, batch_arrays
    return miller_arrays