예제 #1
0
    def _filter_min_reflections(self, experiments, reflections):
        identifiers = []

        for expt, refl in zip(experiments, reflections):
            if len(refl) >= self.params.min_reflections:
                identifiers.append(expt.identifier)

        return select_datasets_on_identifiers(experiments,
                                              reflections,
                                              use_datasets=identifiers)
예제 #2
0
    def _filter_min_reflections(self, experiments, reflections, uuid_cache_in):
        identifiers = []
        self.uuid_cache = []
        for expt, refl, uuid in zip(experiments, reflections, uuid_cache_in):
            if len(refl) >= self.params.min_reflections:
                identifiers.append(expt.identifier)
                self.uuid_cache.append(uuid)

        return select_datasets_on_identifiers(experiments,
                                              reflections,
                                              use_datasets=identifiers)
예제 #3
0
    def remove_image_ranges_below_cutoff(
        experiments,
        reflections,
        ids_to_remove,
        image_group_to_expid_and_range,
        expid_to_image_groups,
        results_summary,
    ):
        """Remove image ranges from the datasets."""
        n_valid_reflections = reflections.get_flags(
            reflections.flags.scaled).count(True)
        expid_to_tableid = {
            v: k
            for k, v in zip(
                reflections.experiment_identifiers().keys(),
                reflections.experiment_identifiers().values(),
            )
        }

        experiments_to_delete = []
        exclude_images = []
        image_ranges_removed = []  # track for results summary
        n_removed_this_cycle = 1
        while n_removed_this_cycle != 0:
            other_potential_ids_to_remove = []
            n_removed_this_cycle = 0
            for id_ in sorted(ids_to_remove):
                exp_id, image_range = image_group_to_expid_and_range[
                    id_]  # identifier
                if (expid_to_image_groups[exp_id][-1] == id_
                        or expid_to_image_groups[exp_id][0]
                        == id_):  # is at edge of scan.
                    # loc = list(experiments.identifiers()).index(exp_id)
                    table_id = expid_to_tableid[exp_id]
                    image_ranges_removed.append([image_range, table_id])
                    logger.info(
                        "Removing image range %s from experiment %s",
                        image_range,
                        table_id,
                    )
                    exclude_images.append(
                        [f"{table_id}:{image_range[0]}:{image_range[1]}"])
                    if expid_to_image_groups[exp_id][-1] == id_:
                        del expid_to_image_groups[exp_id][-1]
                    else:
                        del expid_to_image_groups[exp_id][0]
                    n_removed_this_cycle += 1
                else:
                    other_potential_ids_to_remove.append(id_)
            ids_to_remove = other_potential_ids_to_remove
        for id_ in other_potential_ids_to_remove:
            exp_id, image_range = image_group_to_expid_and_range[id_]
            table_id = expid_to_tableid[exp_id]
            logger.info(
                """Image range %s from experiment %s is below the cutoff, but not at the edge of a sweep.""",
                image_range,
                table_id,
            )

        # Now remove individual batches
        if -1 in reflections["id"]:
            reflections = reflections.select(reflections["id"] != -1)
        reflection_list = reflections.split_by_experiment_id()
        reflection_list, experiments = exclude_image_ranges_for_scaling(
            reflection_list, experiments, exclude_images)

        # check if any image groups were all outliers and missed by the analysis
        # This catches an edge case where there is an image group full of
        # outliers, which gets filtered out before the analysis but should
        # be set as not a valid image range.
        exclude_images = []
        for exp in experiments:
            # if any of the image ranges are not in the sets tested, exclude them
            tested = []
            for exp_id, imgrange in image_group_to_expid_and_range.values():
                if exp_id == exp.identifier:
                    tested.extend(list(range(imgrange[0], imgrange[1] + 1)))
            for imgrange in exp.scan.get_valid_image_ranges(exp.identifier):
                if all([
                        j not in tested
                        for j in range(imgrange[0], imgrange[1] + 1)
                ]):
                    table_id = expid_to_tableid[exp.identifier]
                    exclude_images.append(
                        [f"{table_id}:{imgrange[0]}:{imgrange[1]}"])
                    logger.info("Removing %s due to scaling outlier group.",
                                exclude_images[-1])
        if exclude_images:
            reflection_list, experiments = exclude_image_ranges_for_scaling(
                reflection_list, experiments, exclude_images)

        # if a whole experiment has been excluded: need to remove it here
        ids_removed = []
        for exp, refl in zip(experiments, reflection_list):
            if not exp.scan.get_valid_image_ranges(
                    exp.identifier):  # if all removed above
                experiments_to_delete.append(exp.identifier)
                ids_removed.append(refl.experiment_identifiers().keys()[0])
        if experiments_to_delete:
            experiments, reflection_list = select_datasets_on_identifiers(
                experiments,
                reflection_list,
                exclude_datasets=experiments_to_delete)
        assert len(reflection_list) == len(experiments)

        output_reflections = flex.reflection_table()
        for r in reflection_list:
            output_reflections.extend(r)

        n_valid_filtered_reflections = output_reflections.get_flags(
            output_reflections.flags.scaled).count(True)
        results_summary["dataset_removal"].update({
            "image_ranges_removed":
            image_ranges_removed,
            "experiments_fully_removed":
            experiments_to_delete,
            "experiment_ids_fully_removed":
            ids_removed,
            "n_reflections_removed":
            n_valid_reflections - n_valid_filtered_reflections,
        })
        return output_reflections
예제 #4
0
    def __init__(self,
                 experiments,
                 reflections,
                 uuid_cache_in,
                 params=None,
                 do_plot=False,
                 i_plot=None,
                 output_dir=None):
        super(dials_cl_cosym_wrapper, self).__init__(
            events=["run_cosym", "performed_unit_cell_clustering"])
        if params is None:
            params = phil_scope.extract()
        self.params = params

        self._reflections = []
        for refl, expt in zip(reflections, experiments):
            sel = get_selection_for_valid_image_ranges(refl, expt)
            self._reflections.append(refl.select(sel))

        self._experiments, self._reflections = self._filter_min_reflections(
            experiments, self._reflections, uuid_cache_in)
        self.ids_to_identifiers_map = {}
        for table in self._reflections:
            self.ids_to_identifiers_map.update(table.experiment_identifiers())
        self.identifiers_to_ids_map = {
            value: key
            for key, value in self.ids_to_identifiers_map.items()
        }

        if len(self._experiments) > 1:
            # perform unit cell clustering
            identifiers = self._unit_cell_clustering(self._experiments)
            if len(identifiers) < len(self._experiments):
                logger.info(
                    "Selecting subset of %i datasets for cosym analysis: %s" %
                    (len(identifiers), str(identifiers)))
                self._experiments, self._reflections = select_datasets_on_identifiers(
                    self._experiments,
                    self._reflections,
                    use_datasets=identifiers)
                self.uuid_cache = [
                    self.uuid_cache[int(id)] for id in identifiers
                ]

        # Map experiments and reflections to minimum cell
        cb_ops = change_of_basis_ops_to_minimum_cell(
            self._experiments,
            params.lattice_symmetry_max_delta,
            params.relative_length_tolerance,
            params.absolute_angle_tolerance,
        )
        in_cb_ops = len(cb_ops)
        exclude = [
            expt.identifier for expt, cb_op in zip(self._experiments, cb_ops)
            if not cb_op
        ]
        if len(exclude):
            logger.info(
                "Rejecting {} datasets from cosym analysis "\
                "(couldn't determine consistent cb_op to minimum cell):\n"\
                "{}".format(len(exclude), exclude)
            )
            self._experiments, self._reflections = select_datasets_on_identifiers(
                self._experiments, self._reflections, exclude_datasets=exclude)
            cb_ops = list(filter(None, cb_ops))

        ex_cb_ops = len(cb_ops)

        #Normally we expect that all the cb_ops are the same (applicable for PSI with P63)
        assertion_dict = {}
        for cb_op in cb_ops:
            key_ = cb_op.as_hkl()
            assertion_dict[key_] = assertion_dict.get(key_, 0)
            assertion_dict[key_] += 1
        if len(assertion_dict) != 1:
            # unexpected, there is normally only 1 cb operator to minimum cell
            from libtbx.mpi4py import MPI
            mpi_rank = MPI.COMM_WORLD.Get_rank()
            mpi_size = MPI.COMM_WORLD.Get_size()
            print(
                "RANK %02d, # experiments %d, after exclusion %d, unexpectedly there are %d unique cb_ops: %s"
                % (mpi_rank, in_cb_ops, ex_cb_ops, len(assertion_dict),
                   ", ".join([
                       "%s:%d" % (key, assertion_dict[key])
                       for key in assertion_dict
                   ])))
            # revisit with different use cases later

            # In fact we need all cb_ops to match because the user might supply
            # a custom reindexing operator and we need to consistently tranform
            # it from the conventional basis into the minimum basis. Therefore,
            # force them all to match, but make sure user is aware.
            if not params.single_cb_op_to_minimum:
                raise RuntimeError(
                    'There are >1 different cb_ops to minimum and \
cosym.single_cb_op_to_minimum is not True')
            else:
                best_cb_op_str = max(assertion_dict, key=assertion_dict.get)
                best_cb_op = None
                for cb_op in cb_ops:
                    if cb_op.as_hkl() == best_cb_op_str:
                        best_cb_op = cb_op
                        break
                assert best_cb_op is not None
                cb_ops = [best_cb_op] * len(cb_ops)

        self.cb_op_to_minimum = cb_ops

        # Eliminate reflections that are systematically absent due to centring
        # of the lattice, otherwise they would lead to non-integer miller indices
        # when reindexing to a primitive setting
        self._reflections = eliminate_sys_absent(self._experiments,
                                                 self._reflections)

        self._experiments, self._reflections = apply_change_of_basis_ops(
            self._experiments, self._reflections, cb_ops)

        # transform models into miller arrays
        datasets = filtered_arrays_from_experiments_reflections(
            self.experiments,
            self.reflections,
            outlier_rejection_after_filter=False,
            partiality_threshold=params.partiality_threshold,
        )

        datasets = [
            ma.as_anomalous_array().merge_equivalents().array()
            for ma in datasets
        ]

        # opportunity here to subclass as defined above, instead of the dials-implemented version
        self.cosym_analysis = CosymAnalysis(
            datasets,
            self.params,
            do_plot=do_plot,
            i_plot=i_plot,
            plot_fname=self.params.plot.filename,
            plot_format=self.params.plot.format,
            output_dir=output_dir,
            cb_op=cb_ops[0])
예제 #5
0
    def __init__(self, experiments, reflections, params=None):
        super(cosym, self).__init__(
            events=["run_cosym", "performed_unit_cell_clustering"])
        if params is None:
            params = phil_scope.extract()
        self.params = params

        self._reflections = []
        for refl, expt in zip(reflections, experiments):
            sel = get_selection_for_valid_image_ranges(refl, expt)
            self._reflections.append(refl.select(sel))

        self._experiments, self._reflections = self._filter_min_reflections(
            experiments, self._reflections)
        self.ids_to_identifiers_map = {}
        for table in self._reflections:
            self.ids_to_identifiers_map.update(table.experiment_identifiers())
        self.identifiers_to_ids_map = {
            value: key
            for key, value in self.ids_to_identifiers_map.items()
        }

        if len(self._experiments) > 1:
            # perform unit cell clustering
            identifiers = self._unit_cell_clustering(self._experiments)
            if len(identifiers) < len(self._experiments):
                logger.info(
                    "Selecting subset of %i datasets for cosym analysis: %s",
                    len(identifiers),
                    str(identifiers),
                )
                self._experiments, self._reflections = select_datasets_on_identifiers(
                    self._experiments,
                    self._reflections,
                    use_datasets=identifiers)

        # Map experiments and reflections to minimum cell
        cb_ops = change_of_basis_ops_to_minimum_cell(
            self._experiments,
            params.lattice_symmetry_max_delta,
            params.relative_length_tolerance,
            params.absolute_angle_tolerance,
        )
        exclude = [
            expt.identifier for expt, cb_op in zip(self._experiments, cb_ops)
            if not cb_op
        ]
        if len(exclude):
            logger.info(
                f"Rejecting {len(exclude)} datasets from cosym analysis "
                f"(couldn't determine consistent cb_op to minimum cell):\n"
                f"{exclude}", )
            self._experiments, self._reflections = select_datasets_on_identifiers(
                self._experiments, self._reflections, exclude_datasets=exclude)
            cb_ops = list(filter(None, cb_ops))

        # Eliminate reflections that are systematically absent due to centring
        # of the lattice, otherwise they would lead to non-integer miller indices
        # when reindexing to a primitive setting
        self._reflections = eliminate_sys_absent(self._experiments,
                                                 self._reflections)

        self._experiments, self._reflections = apply_change_of_basis_ops(
            self._experiments, self._reflections, cb_ops)

        # transform models into miller arrays
        datasets = filtered_arrays_from_experiments_reflections(
            self.experiments,
            self.reflections,
            outlier_rejection_after_filter=False,
            partiality_threshold=params.partiality_threshold,
        )

        datasets = [
            ma.as_anomalous_array().merge_equivalents().array()
            for ma in datasets
        ]
        self.cosym_analysis = CosymAnalysis(datasets, self.params)
예제 #6
0
    def __init__(self,
                 experiments,
                 reflections,
                 uuid_cache_in,
                 params=None,
                 do_plot=False,
                 i_plot=None,
                 output_dir=None):
        super(dials_cl_cosym_wrapper, self).__init__(
            events=["run_cosym", "performed_unit_cell_clustering"])
        if params is None:
            params = phil_scope.extract()
        self.params = params

        self._reflections = []
        for refl, expt in zip(reflections, experiments):
            sel = get_selection_for_valid_image_ranges(refl, expt)
            self._reflections.append(refl.select(sel))

        self._experiments, self._reflections = self._filter_min_reflections(
            experiments, self._reflections, uuid_cache_in)
        self.ids_to_identifiers_map = {}
        for table in self._reflections:
            self.ids_to_identifiers_map.update(table.experiment_identifiers())
        self.identifiers_to_ids_map = {
            value: key
            for key, value in self.ids_to_identifiers_map.items()
        }

        if len(self._experiments) > 1:
            # perform unit cell clustering
            identifiers = self._unit_cell_clustering(self._experiments)
            if len(identifiers) < len(self._experiments):
                logger.info(
                    "Selecting subset of %i datasets for cosym analysis: %s" %
                    (len(identifiers), str(identifiers)))
                self._experiments, self._reflections = select_datasets_on_identifiers(
                    self._experiments,
                    self._reflections,
                    use_datasets=identifiers)

        # Map experiments and reflections to minimum cell
        cb_ops = change_of_basis_ops_to_minimum_cell(
            self._experiments,
            params.lattice_symmetry_max_delta,
            params.relative_length_tolerance,
            params.absolute_angle_tolerance,
        )
        exclude = [
            expt.identifier for expt, cb_op in zip(self._experiments, cb_ops)
            if not cb_op
        ]
        if len(exclude):
            logger.info(
                "Rejecting {} datasets from cosym analysis "\
                "(couldn't determine consistent cb_op to minimum cell):\n"\
                "{}".format(len(exclude), exclude)
            )
            self._experiments, self._reflections = select_datasets_on_identifiers(
                self._experiments, self._reflections, exclude_datasets=exclude)
            cb_ops = list(filter(None, cb_ops))

        # Eliminate reflections that are systematically absent due to centring
        # of the lattice, otherwise they would lead to non-integer miller indices
        # when reindexing to a primitive setting
        self._reflections = eliminate_sys_absent(self._experiments,
                                                 self._reflections)

        self._experiments, self._reflections = apply_change_of_basis_ops(
            self._experiments, self._reflections, cb_ops)

        # transform models into miller arrays
        datasets = filtered_arrays_from_experiments_reflections(
            self.experiments,
            self.reflections,
            outlier_rejection_after_filter=False,
            partiality_threshold=params.partiality_threshold,
        )

        datasets = [
            ma.as_anomalous_array().merge_equivalents().array()
            for ma in datasets
        ]

        # opportunity here to subclass as defined above, instead of the dials-implemented version
        self.cosym_analysis = CosymAnalysis(
            datasets,
            self.params,
            do_plot=do_plot,
            i_plot=i_plot,
            plot_fname=self.params.plot.filename,
            plot_format=self.params.plot.format,
            output_dir=output_dir)
        #Fixed in subclass: parent class apparently erases the knowledge of input-to-minimum cb_ops.
        # without storing the op in self, we can never trace back to input setting.
        self.cb_op_to_minimum = cb_ops

        #Not sure yet, we may be assuming that all the cb_ops are the same (applicable for PSI with P63)
        assertion_set = set(cb_ops)
        assert len(
            assertion_set
        ) == 1  # guarantees all are the same; revisit with different use cases later