def test_raise_exception_when_excluding_non_existing_dataset(
    experiments_024, reflections_024
):
    with pytest.raises(ValueError):
        experiments, refl = select_datasets_on_ids(
            experiments_024, reflections_024, exclude_datasets=["1"]
        )
Esempio n. 2
0
    def remove_bad_data(self):
        """Remove any target model/mtz data and any datasets which were removed
        from the scaler during scaling."""
        # first remove target refl/exps
        if (self.params.scaling_options.target_model
                or self.params.scaling_options.target_mtz
                or self.params.scaling_options.only_target):
            self.experiments = self.experiments[:-1]
            self.reflections = self.reflections[:-1]

        # remove any bad datasets:
        removed_ids = self.scaler.removed_datasets
        if removed_ids:
            logger.info("deleting removed datasets from memory: %s",
                        removed_ids)
            expids = list(self.experiments.identifiers())
            locs_in_list = [expids.index(expid) for expid in removed_ids]
            self.experiments, self.reflections = select_datasets_on_ids(
                self.experiments,
                self.reflections,
                exclude_datasets=locs_in_list)
        # also remove negative scales (or scales below 0.001)
        n = 0
        for table in self.reflections:
            bad_sf = table["inverse_scale_factor"] < 0.001
            n += bad_sf.count(True)
            table.set_flags(bad_sf, table.flags.excluded_for_scaling)
        if n > 0:
            logger.info("%s reflections excluded: scale factor < 0.001", n)
def test_exclude_specific_datasets_using_id(experiments_024, reflections_024):
    experiments, refl = select_datasets_on_ids(
        experiments_024, reflections_024, exclude_datasets=["0"]
    )
    assert len(refl) == 2
    assert list(experiments.identifiers()) == ["2", "4"]
    assert len(experiments) == 2
def test_raise_exception_when_not_all_identifiers_set(experiments, reflections_024):
    experiments[0].identifier = "0"
    experiments[1].identifier = "2"
    with pytest.raises(ValueError):
        exp, refl = select_datasets_on_ids(
            experiments, reflections_024, use_datasets=["2"]
        )
def test_select_specific_datasets_using_id(experiments_024, reflections_024):
    use_datasets = ["0", "2"]
    experiments, refl = select_datasets_on_ids(
        experiments_024, reflections_024, use_datasets=use_datasets
    )
    assert len(experiments) == 2
    assert len(refl) == 2
    assert list(experiments.identifiers()) == ["0", "2"]
Esempio n. 6
0
def test_raise_exception_when_selecting_and_excluding_datasets_at_same_time(
        experiments_024, reflections_024):
    with pytest.raises(ValueError):
        experiments, refl = select_datasets_on_ids(
            experiments_024,
            reflections_024,
            use_datasets=["2", "4"],
            exclude_datasets=["0"],
        )
Esempio n. 7
0
    def _filter_min_reflections(self, experiments, reflections):
        identifiers = []

        for expt, refl in zip(experiments, reflections):
            if len(refl) >= self.params.min_reflections:
                identifiers.append(expt.identifier)

        return select_datasets_on_ids(experiments,
                                      reflections,
                                      use_datasets=identifiers)
def test_correct_handling_with_multi_dataset_table(experiments_024):
    reflections = flex.reflection_table()
    reflections["id"] = flex.int([0, 1, 2])
    reflections.experiment_identifiers()[0] = "0"
    reflections.experiment_identifiers()[1] = "2"
    reflections.experiment_identifiers()[2] = "4"
    exp, refl = select_datasets_on_ids(
        experiments_024, [reflections], exclude_datasets=["2"]
    )
    assert list(refl[0].experiment_identifiers().values()) == ["0", "4"]
    assert list(refl[0]["id"]) == [0, 2]
Esempio n. 9
0
    def __init__(self, experiments, reflections, params=None):
        super(cosym, self).__init__(
            events=["run_cosym", "performed_unit_cell_clustering"])
        if params is None:
            params = phil_scope.extract()
        self.params = params

        self._experiments, self._reflections = self._filter_min_reflections(
            experiments, reflections)

        # map experiments and reflections to primitive setting
        self._experiments, self._reflections = self._map_to_primitive(
            self._experiments, self._reflections)

        if len(self._experiments) > 1:
            # perform unit cell clustering
            identifiers = self._unit_cell_clustering(self._experiments)
            if len(identifiers) < len(self._experiments):
                logger.info(
                    "Selecting subset of %i datasets for cosym analysis: %s",
                    len(identifiers),
                    str(identifiers),
                )
                self._experiments, self._reflections = select_datasets_on_ids(
                    self._experiments,
                    self._reflections,
                    use_datasets=identifiers)

        self._experiments, self._reflections = self._map_to_minimum_cell(
            self._experiments, self._reflections)

        # transform models into miller arrays
        datasets = filtered_arrays_from_experiments_reflections(
            self.experiments,
            self.reflections,
            outlier_rejection_after_filter=False,
            partiality_threshold=params.partiality_threshold,
        )

        self.cosym_analysis = CosymAnalysis(datasets, self.params)
Esempio n. 10
0
    def _map_to_primitive(self, experiments, reflections):
        identifiers = []

        for expt, refl in zip(experiments, reflections):
            cb_op_to_primitive = (expt.crystal.get_crystal_symmetry().
                                  change_of_basis_op_to_primitive_setting())
            sel = expt.crystal.get_space_group().is_sys_absent(
                refl["miller_index"])
            if sel.count(True):
                logger.info(
                    "Eliminating %i systematic absences for experiment %s",
                    sel.count(True),
                    expt.identifier,
                )
                refl = refl.select(~sel)
            refl["miller_index"] = cb_op_to_primitive.apply(
                refl["miller_index"])
            expt.crystal = expt.crystal.change_basis(cb_op_to_primitive)
            identifiers.append(expt.identifier)

        return select_datasets_on_ids(experiments,
                                      reflections,
                                      use_datasets=identifiers)
Esempio n. 11
0
    def remove_unwanted_datasets(self):
        """Remove any target model/mtz data and any datasets which were removed
        from the scaler during scaling."""
        # first remove target refl/exps
        if (self.params.scaling_options.target_model
                or self.params.scaling_options.target_mtz
                or self.params.scaling_options.only_target):
            self.experiments = self.experiments[:-1]
            self.reflections = self.reflections[:-1]

        # remove any bad datasets:
        removed_ids = self.scaler.removed_datasets
        if removed_ids:
            logger.info("deleting removed datasets from memory: %s",
                        removed_ids)
            expids = list(self.experiments.identifiers())
            locs_in_list = []
            for id_ in removed_ids:
                locs_in_list.append(expids.index(id_))
            self.experiments, self.reflections = select_datasets_on_ids(
                self.experiments,
                self.reflections,
                exclude_datasets=removed_ids)
Esempio n. 12
0
    def remove_image_ranges_below_cutoff(
        experiments,
        reflections,
        ids_to_remove,
        image_group_to_expid_and_range,
        expid_to_image_groups,
        results_summary,
    ):
        """Remove image ranges from the datasets."""
        n_valid_reflections = reflections.get_flags(
            reflections.flags.bad_for_scaling, all=False).count(False)

        experiments_to_delete = []
        exclude_images = []
        image_ranges_removed = []  # track for results summary
        n_removed_this_cycle = 1
        while n_removed_this_cycle != 0:
            other_potential_ids_to_remove = []
            n_removed_this_cycle = 0
            for id_ in sorted(ids_to_remove):
                exp_id, image_range = image_group_to_expid_and_range[
                    id_]  # numerical id
                identifier = reflections.experiment_identifiers()[exp_id]
                if expid_to_image_groups[exp_id][-1] == id_:  # is last group
                    image_ranges_removed.append([image_range, exp_id])
                    logger.info(
                        "Removing image range %s from experiment %s",
                        image_range,
                        identifier,
                    )
                    exclude_images.append([
                        identifier + ":" + str(image_range[0]) + ":" +
                        str(image_range[1])
                    ])
                    del expid_to_image_groups[exp_id][-1]
                    n_removed_this_cycle += 1
                else:
                    other_potential_ids_to_remove.append(id_)
            ids_to_remove = other_potential_ids_to_remove
        for id_ in other_potential_ids_to_remove:
            exp_id, image_range = image_group_to_expid_and_range[id_]
            identifier = reflections.experiment_identifiers()[exp_id]
            logger.info(
                """Image range %s from experiment %s is below the cutoff, but not at the end of a sweep.""",
                image_range,
                identifier,
            )

        # Now remove individual batches
        if -1 in reflections["id"]:
            reflections = reflections.select(reflections["id"] != -1)
        reflection_list = reflections.split_by_experiment_id()
        reflection_list, experiments = exclude_image_ranges_for_scaling(
            reflection_list, experiments, exclude_images)
        # if a whole experiment has been excluded: need to remove it here

        for exp in experiments:
            if not exp.scan.get_valid_image_ranges(
                    exp.identifier):  # if all removed above
                experiments_to_delete.append(exp.identifier)
        if experiments_to_delete:
            experiments, reflection_list = select_datasets_on_ids(
                experiments,
                reflection_list,
                exclude_datasets=experiments_to_delete)
        assert len(reflection_list) == len(experiments)

        output_reflections = flex.reflection_table()
        for r in reflection_list:
            output_reflections.extend(r)

        n_valid_filtered_reflections = output_reflections.get_flags(
            output_reflections.flags.bad_for_scaling, all=False).count(False)
        results_summary["dataset_removal"].update({
            "image_ranges_removed":
            image_ranges_removed,
            "experiments_fully_removed":
            experiments_to_delete,
            "n_reflections_removed":
            n_valid_reflections - n_valid_filtered_reflections,
        })
        return output_reflections
Esempio n. 13
0
def prepare_input(params, experiments, reflections):
    """Perform checks on the data and prepare the data for scaling.

    Raises:
        ValueError - a range of checks are made, a ValueError may be raised
            for a number of reasons.

    """

    #### First exclude any datasets, before the dataset is split into
    #### individual reflection tables and expids set.
    if (params.dataset_selection.exclude_datasets
            or params.dataset_selection.use_datasets):
        experiments, reflections = select_datasets_on_ids(
            experiments,
            reflections,
            params.dataset_selection.exclude_datasets,
            params.dataset_selection.use_datasets,
        )
        ids = flex.size_t()
        for r in reflections:
            ids.extend(r.experiment_identifiers().keys())
        logger.info(
            "\nDataset ids for retained datasets are: %s \n",
            ",".join(str(i) for i in ids),
        )

    #### Split the reflections tables into a list of reflection tables,
    #### with one table per experiment.
    logger.info("Checking for the existence of a reflection table \n"
                "containing multiple datasets \n")
    reflections = parse_multiple_datasets(reflections)
    logger.info(
        "Found %s reflection tables & %s experiments in total.",
        len(reflections),
        len(experiments),
    )

    if len(experiments) != len(reflections):
        raise ValueError(
            "Mismatched number of experiments and reflection tables found.")

    #### Assign experiment identifiers.
    experiments, reflections = assign_unique_identifiers(
        experiments, reflections)
    ids = itertools.chain.from_iterable(r.experiment_identifiers().keys()
                                        for r in reflections)
    logger.info("\nDataset ids are: %s \n", ",".join(str(i) for i in ids))

    for r in reflections:
        r.unset_flags(flex.bool(len(r), True), r.flags.bad_for_scaling)
        r.unset_flags(flex.bool(r.size(), True), r.flags.scaled)

    reflections, experiments = exclude_image_ranges_for_scaling(
        reflections, experiments, params.exclude_images)

    #### Allow checking of consistent indexing, useful for
    #### targeted / incremental scaling.
    if params.scaling_options.check_consistent_indexing:
        logger.info("Running dials.cosym to check consistent indexing:\n")
        cosym_params = cosym_phil_scope.extract()
        cosym_params.nproc = params.scaling_options.nproc
        cosym_instance = cosym(experiments, reflections, cosym_params)
        cosym_instance.run()
        experiments = cosym_instance.experiments
        reflections = cosym_instance.reflections
        logger.info("Finished running dials.cosym, continuing with scaling.\n")

    #### Make sure all experiments in same space group
    sgs = [
        expt.crystal.get_space_group().type().number() for expt in experiments
    ]
    if len(set(sgs)) > 1:
        raise ValueError("""The experiments have different space groups:
            space group numbers found: %s
            Please reanalyse the data so that space groups are consistent,
            (consider using dials.reindex, dials.symmetry or dials.cosym) or
            remove incompatible experiments (using the option exclude_datasets=)"""
                         % ", ".join(map(str, set(sgs))))
    logger.info(
        "Space group being used during scaling is %s",
        experiments[0].crystal.get_space_group().info(),
    )

    #### If doing targeted scaling, extract data and append an experiment
    #### and reflection table to the lists
    if params.scaling_options.target_model:
        logger.info("Extracting data from structural model.")
        exp, reflection_table = create_datastructures_for_structural_model(
            reflections, experiments, params.scaling_options.target_model)
        experiments.append(exp)
        reflections.append(reflection_table)

    elif params.scaling_options.target_mtz:
        logger.info("Extracting data from merged mtz.")
        exp, reflection_table = create_datastructures_for_target_mtz(
            experiments, params.scaling_options.target_mtz)
        experiments.append(exp)
        reflections.append(reflection_table)

    #### Perform any non-batch cutting of the datasets, including the target dataset
    best_unit_cell = params.reflection_selection.best_unit_cell
    if best_unit_cell is None:
        best_unit_cell = determine_best_unit_cell(experiments)
    for reflection in reflections:
        if params.cut_data.d_min or params.cut_data.d_max:
            d = best_unit_cell.d(reflection["miller_index"])
            if params.cut_data.d_min:
                sel = d < params.cut_data.d_min
                reflection.set_flags(sel,
                                     reflection.flags.user_excluded_in_scaling)
            if params.cut_data.d_max:
                sel = d > params.cut_data.d_max
                reflection.set_flags(sel,
                                     reflection.flags.user_excluded_in_scaling)
        if params.cut_data.partiality_cutoff and "partiality" in reflection:
            reflection.set_flags(
                reflection["partiality"] < params.cut_data.partiality_cutoff,
                reflection.flags.user_excluded_in_scaling,
            )
    return params, experiments, reflections
def test_selecting_everything_is_identity_function(experiments_024, reflections_024):
    exp, refl = select_datasets_on_ids(experiments_024, reflections_024)
    assert exp is experiments_024
    assert refl is reflections_024