Ejemplo n.º 1
0
    def run_cosym(self):
        from dials.algorithms.symmetry.cosym import phil_scope

        params = phil_scope.extract()
        from dials.algorithms.symmetry.cosym import CosymAnalysis

        datasets = self.individual_merged_intensities
        datasets = [
            d.eliminate_sys_absent(integral_only=True).primitive_setting()
            for d in datasets
        ]
        params.lattice_group = datasets[0].space_group_info()
        params.space_group = datasets[0].space_group_info()
        params.cluster.method = "dbscan"

        self.cosym = CosymAnalysis(datasets, params)
        self.cosym.run()
Ejemplo n.º 2
0
    def __init__(self, experiments, reflections, params=None):
        super(cosym, self).__init__(
            events=["run_cosym", "performed_unit_cell_clustering"])
        if params is None:
            params = phil_scope.extract()
        self.params = params

        self._experiments, self._reflections = self._filter_min_reflections(
            experiments, reflections)

        # map experiments and reflections to primitive setting
        self._experiments, self._reflections = self._map_to_primitive(
            self._experiments, self._reflections)

        if len(self._experiments) > 1:
            # perform unit cell clustering
            identifiers = self._unit_cell_clustering(self._experiments)
            if len(identifiers) < len(self._experiments):
                logger.info(
                    "Selecting subset of %i datasets for cosym analysis: %s",
                    len(identifiers),
                    str(identifiers),
                )
                self._experiments, self._reflections = select_datasets_on_ids(
                    self._experiments,
                    self._reflections,
                    use_datasets=identifiers)

        self._experiments, self._reflections = self._map_to_minimum_cell(
            self._experiments, self._reflections)

        # transform models into miller arrays
        datasets = filtered_arrays_from_experiments_reflections(
            self.experiments,
            self.reflections,
            outlier_rejection_after_filter=False,
            partiality_threshold=params.partiality_threshold,
        )

        self.cosym_analysis = CosymAnalysis(datasets, self.params)
def test_reindexing_ops_for_dataset(mocker):
    # Mock a minimal CosymAnalysis instance
    self = mocker.Mock()
    self.cluster_labels = flex.double([1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0])
    self.params.cluster.n_clusters = 2
    self.input_intensities = [mocker.Mock(), mocker.Mock()]
    self.cb_op_inp_min = sgtbx.change_of_basis_op()

    # Lattice symmetry and true space group
    lattice_group = sgtbx.space_group_info(
        symbol="C 2 2 2 (x+y,z,-2*y)").group()
    sg = sgtbx.space_group_info(symbol="P 1 2 1").group()
    cosets = sgtbx.cosets.left_decomposition(lattice_group, sg)

    # Finally run the method we're testing
    reindexing_ops = CosymAnalysis._reindexing_ops_for_dataset(
        self, 0, list(lattice_group.smx()), cosets)
    assert reindexing_ops == {0: "x+z,-y,-z", 1: "x,y,z"}
Ejemplo n.º 4
0
class cosym(Subject):
    def __init__(self, experiments, reflections, params=None):
        super(cosym, self).__init__(
            events=["run_cosym", "performed_unit_cell_clustering"])
        if params is None:
            params = phil_scope.extract()
        self.params = params

        self._experiments, self._reflections = self._filter_min_reflections(
            experiments, reflections)

        # map experiments and reflections to primitive setting
        self._experiments, self._reflections = self._map_to_primitive(
            self._experiments, self._reflections)

        if len(self._experiments) > 1:
            # perform unit cell clustering
            identifiers = self._unit_cell_clustering(self._experiments)
            if len(identifiers) < len(self._experiments):
                logger.info(
                    "Selecting subset of %i datasets for cosym analysis: %s",
                    len(identifiers),
                    str(identifiers),
                )
                self._experiments, self._reflections = select_datasets_on_ids(
                    self._experiments,
                    self._reflections,
                    use_datasets=identifiers)

        self._experiments, self._reflections = self._map_to_minimum_cell(
            self._experiments, self._reflections)

        # transform models into miller arrays
        datasets = filtered_arrays_from_experiments_reflections(
            self.experiments,
            self.reflections,
            outlier_rejection_after_filter=False,
            partiality_threshold=params.partiality_threshold,
        )

        self.cosym_analysis = CosymAnalysis(datasets, self.params)

    @property
    def experiments(self):
        """Return the experiment list."""
        return self._experiments

    @property
    def reflections(self):
        """Return the list of reflection tables."""
        return self._reflections

    @Subject.notify_event(event="run_cosym")
    def run(self):
        self.cosym_analysis.run()

        space_groups = {}
        reindexing_ops = {}
        for dataset_id in self.cosym_analysis.reindexing_ops:
            if 0 in self.cosym_analysis.reindexing_ops[dataset_id]:
                cb_op = self.cosym_analysis.reindexing_ops[dataset_id][0]
                reindexing_ops.setdefault(cb_op, [])
                reindexing_ops[cb_op].append(dataset_id)
            if dataset_id in self.cosym_analysis.space_groups:
                space_groups.setdefault(
                    self.cosym_analysis.space_groups[dataset_id], [])
                space_groups[self.cosym_analysis.
                             space_groups[dataset_id]].append(dataset_id)

        logger.info("Space groups:")
        for sg, datasets in space_groups.items():
            logger.info(str(sg.info().reference_setting()))
            logger.info(datasets)

        logger.info("Reindexing operators:")
        for cb_op, datasets in reindexing_ops.items():
            logger.info(cb_op)
            logger.info(datasets)

        self._apply_reindexing_operators(
            reindexing_ops, subgroup=self.cosym_analysis.best_subgroup)

    def export(self):
        """Output the datafiles for cosym.

        This includes the cosym.json, reflections and experiments files."""

        reindexed_reflections = flex.reflection_table()
        for refl in self._reflections:
            reindexed_reflections.extend(refl)
        reindexed_reflections.reset_ids()

        logger.info("Saving reindexed experiments to %s",
                    self.params.output.experiments)
        self._experiments.as_file(self.params.output.experiments)
        logger.info("Saving reindexed reflections to %s",
                    self.params.output.reflections)
        reindexed_reflections.as_file(self.params.output.reflections)

    def _apply_reindexing_operators(self, reindexing_ops, subgroup=None):
        """Apply the reindexing operators to the reflections and experiments."""
        for cb_op, dataset_ids in reindexing_ops.items():
            cb_op = sgtbx.change_of_basis_op(cb_op)
            if subgroup is not None:
                cb_op = subgroup["cb_op_inp_best"] * cb_op
            for dataset_id in dataset_ids:
                expt = self._experiments[dataset_id]
                refl = self._reflections[dataset_id]
                expt.crystal = expt.crystal.change_basis(cb_op)
                if subgroup is not None:
                    expt.crystal.set_space_group(
                        subgroup["best_subsym"].space_group(
                        ).build_derived_acentric_group())
                expt.crystal.set_unit_cell(
                    expt.crystal.get_space_group().average_unit_cell(
                        expt.crystal.get_unit_cell()))
                refl["miller_index"] = cb_op.apply(refl["miller_index"])

    def _map_to_primitive(self, experiments, reflections):
        identifiers = []

        for expt, refl in zip(experiments, reflections):
            cb_op_to_primitive = (expt.crystal.get_crystal_symmetry().
                                  change_of_basis_op_to_primitive_setting())
            sel = expt.crystal.get_space_group().is_sys_absent(
                refl["miller_index"])
            if sel.count(True):
                logger.info(
                    "Eliminating %i systematic absences for experiment %s",
                    sel.count(True),
                    expt.identifier,
                )
                refl = refl.select(~sel)
            refl["miller_index"] = cb_op_to_primitive.apply(
                refl["miller_index"])
            expt.crystal = expt.crystal.change_basis(cb_op_to_primitive)
            identifiers.append(expt.identifier)

        return select_datasets_on_ids(experiments,
                                      reflections,
                                      use_datasets=identifiers)

    def _filter_min_reflections(self, experiments, reflections):
        identifiers = []

        for expt, refl in zip(experiments, reflections):
            if len(refl) >= self.params.min_reflections:
                identifiers.append(expt.identifier)

        return select_datasets_on_ids(experiments,
                                      reflections,
                                      use_datasets=identifiers)

    def _map_to_minimum_cell(self, experiments, reflections):
        cb_op_ref_min = (experiments[0].crystal.get_crystal_symmetry().
                         change_of_basis_op_to_minimum_cell())
        for expt, refl in zip(experiments, reflections):
            expt.crystal = expt.crystal.change_basis(cb_op_ref_min)
            expt.crystal.set_space_group(sgtbx.space_group())
            refl["miller_index"] = cb_op_ref_min.apply(refl["miller_index"])
        return experiments, reflections

    @Subject.notify_event("performed_unit_cell_clustering")
    def _unit_cell_clustering(self, experiments):
        crystal_symmetries = [
            expt.crystal.get_crystal_symmetry() for expt in experiments
        ]
        lattice_ids = experiments.identifiers()
        from dials.algorithms.clustering.unit_cell import UnitCellCluster
        from xfel.clustering.cluster_groups import unit_cell_info

        ucs = UnitCellCluster.from_crystal_symmetries(crystal_symmetries,
                                                      lattice_ids=lattice_ids)
        self.unit_cell_clusters, self.unit_cell_dendrogram, _ = ucs.ab_cluster(
            self.params.unit_cell_clustering.threshold,
            log=self.params.unit_cell_clustering.log,
            labels="lattice_id",
            write_file_lists=False,
            schnell=False,
            doplot=False,
        )
        logger.info(unit_cell_info(self.unit_cell_clusters))
        largest_cluster_lattice_ids = None
        for cluster in self.unit_cell_clusters:
            cluster_lattice_ids = [m.lattice_id for m in cluster.members]
            if largest_cluster_lattice_ids is None:
                largest_cluster_lattice_ids = cluster_lattice_ids
            elif len(cluster_lattice_ids) > len(largest_cluster_lattice_ids):
                largest_cluster_lattice_ids = cluster_lattice_ids

        dataset_selection = largest_cluster_lattice_ids
        return dataset_selection
Ejemplo n.º 5
0
def test_cosym(
    space_group,
    unit_cell,
    dimensions,
    sample_size,
    use_known_space_group,
    use_known_lattice_group,
    best_monoclinic_beta,
    run_in_tmpdir,
):
    import matplotlib

    matplotlib.use("Agg")

    datasets, expected_reindexing_ops = generate_test_data(
        space_group=sgtbx.space_group_info(symbol=space_group).group(),
        unit_cell=unit_cell,
        unit_cell_volume=10000,
        d_min=1.5,
        map_to_p1=True,
        sample_size=sample_size,
        seed=1,
    )
    expected_space_group = sgtbx.space_group_info(symbol=space_group).group()

    params = phil_scope.extract()
    params.dimensions = dimensions
    params.best_monoclinic_beta = best_monoclinic_beta
    if use_known_space_group:
        params.space_group = expected_space_group.info()
    if use_known_lattice_group:
        params.lattice_group = expected_space_group.info()

    params.normalisation = None
    cosym = CosymAnalysis(datasets, params)
    cosym.run()
    d = cosym.as_dict()
    if not use_known_space_group:
        assert d["subgroup_scores"][0]["likelihood"] > 0.89
        assert (sgtbx.space_group(d["subgroup_scores"][0]["patterson_group"])
                == sgtbx.space_group_info(
                    space_group).group().build_derived_patterson_group())
        expected_sg = (sgtbx.space_group_info(
            space_group).group().build_derived_patterson_group())
    else:
        expected_sg = sgtbx.space_group_info(space_group).group()
    assert cosym.best_subgroup["best_subsym"].space_group() == expected_sg
    assert len(cosym.reindexing_ops) == len(expected_reindexing_ops)

    space_group_info = cosym.best_subgroup["subsym"].space_group_info()
    reference = None
    for d_id, cb_op in enumerate(cosym.reindexing_ops):
        reindexed = (datasets[d_id].change_basis(
            sgtbx.change_of_basis_op(cb_op)).customized_copy(
                space_group_info=space_group_info.change_basis(
                    cosym.cb_op_inp_min.inverse())))
        assert reindexed.is_compatible_unit_cell(), str(
            reindexed.crystal_symmetry())
        if reference:
            assert (reindexed.correlation(
                reference, assert_is_similar_symmetry=False).coefficient() >
                    0.99)
        else:
            reference = reindexed
Ejemplo n.º 6
0
    def __init__(self, experiments, reflections, params=None):
        super(cosym, self).__init__(
            events=["run_cosym", "performed_unit_cell_clustering"])
        if params is None:
            params = phil_scope.extract()
        self.params = params

        self._reflections = []
        for refl, expt in zip(reflections, experiments):
            sel = get_selection_for_valid_image_ranges(refl, expt)
            self._reflections.append(refl.select(sel))

        self._experiments, self._reflections = self._filter_min_reflections(
            experiments, self._reflections)
        self.ids_to_identifiers_map = {}
        for table in self._reflections:
            self.ids_to_identifiers_map.update(table.experiment_identifiers())
        self.identifiers_to_ids_map = {
            value: key
            for key, value in self.ids_to_identifiers_map.items()
        }

        if len(self._experiments) > 1:
            # perform unit cell clustering
            identifiers = self._unit_cell_clustering(self._experiments)
            if len(identifiers) < len(self._experiments):
                logger.info(
                    "Selecting subset of %i datasets for cosym analysis: %s",
                    len(identifiers),
                    str(identifiers),
                )
                self._experiments, self._reflections = select_datasets_on_identifiers(
                    self._experiments,
                    self._reflections,
                    use_datasets=identifiers)

        # Map experiments and reflections to minimum cell
        cb_ops = change_of_basis_ops_to_minimum_cell(
            self._experiments,
            params.lattice_symmetry_max_delta,
            params.relative_length_tolerance,
            params.absolute_angle_tolerance,
        )
        exclude = [
            expt.identifier for expt, cb_op in zip(self._experiments, cb_ops)
            if not cb_op
        ]
        if len(exclude):
            logger.info(
                f"Rejecting {len(exclude)} datasets from cosym analysis "
                f"(couldn't determine consistent cb_op to minimum cell):\n"
                f"{exclude}", )
            self._experiments, self._reflections = select_datasets_on_identifiers(
                self._experiments, self._reflections, exclude_datasets=exclude)
            cb_ops = list(filter(None, cb_ops))

        # Eliminate reflections that are systematically absent due to centring
        # of the lattice, otherwise they would lead to non-integer miller indices
        # when reindexing to a primitive setting
        self._reflections = eliminate_sys_absent(self._experiments,
                                                 self._reflections)

        self._experiments, self._reflections = apply_change_of_basis_ops(
            self._experiments, self._reflections, cb_ops)

        # transform models into miller arrays
        datasets = filtered_arrays_from_experiments_reflections(
            self.experiments,
            self.reflections,
            outlier_rejection_after_filter=False,
            partiality_threshold=params.partiality_threshold,
        )

        datasets = [
            ma.as_anomalous_array().merge_equivalents().array()
            for ma in datasets
        ]
        self.cosym_analysis = CosymAnalysis(datasets, self.params)
Ejemplo n.º 7
0
class cosym(Subject):
    def __init__(self, experiments, reflections, params=None):
        super(cosym, self).__init__(
            events=["run_cosym", "performed_unit_cell_clustering"])
        if params is None:
            params = phil_scope.extract()
        self.params = params

        self._reflections = []
        for refl, expt in zip(reflections, experiments):
            sel = get_selection_for_valid_image_ranges(refl, expt)
            self._reflections.append(refl.select(sel))

        self._experiments, self._reflections = self._filter_min_reflections(
            experiments, self._reflections)
        self.ids_to_identifiers_map = {}
        for table in self._reflections:
            self.ids_to_identifiers_map.update(table.experiment_identifiers())
        self.identifiers_to_ids_map = {
            value: key
            for key, value in self.ids_to_identifiers_map.items()
        }

        if len(self._experiments) > 1:
            # perform unit cell clustering
            identifiers = self._unit_cell_clustering(self._experiments)
            if len(identifiers) < len(self._experiments):
                logger.info(
                    "Selecting subset of %i datasets for cosym analysis: %s",
                    len(identifiers),
                    str(identifiers),
                )
                self._experiments, self._reflections = select_datasets_on_identifiers(
                    self._experiments,
                    self._reflections,
                    use_datasets=identifiers)

        # Map experiments and reflections to minimum cell
        cb_ops = change_of_basis_ops_to_minimum_cell(
            self._experiments,
            params.lattice_symmetry_max_delta,
            params.relative_length_tolerance,
            params.absolute_angle_tolerance,
        )
        exclude = [
            expt.identifier for expt, cb_op in zip(self._experiments, cb_ops)
            if not cb_op
        ]
        if len(exclude):
            logger.info(
                f"Rejecting {len(exclude)} datasets from cosym analysis "
                f"(couldn't determine consistent cb_op to minimum cell):\n"
                f"{exclude}", )
            self._experiments, self._reflections = select_datasets_on_identifiers(
                self._experiments, self._reflections, exclude_datasets=exclude)
            cb_ops = list(filter(None, cb_ops))

        # Eliminate reflections that are systematically absent due to centring
        # of the lattice, otherwise they would lead to non-integer miller indices
        # when reindexing to a primitive setting
        self._reflections = eliminate_sys_absent(self._experiments,
                                                 self._reflections)

        self._experiments, self._reflections = apply_change_of_basis_ops(
            self._experiments, self._reflections, cb_ops)

        # transform models into miller arrays
        datasets = filtered_arrays_from_experiments_reflections(
            self.experiments,
            self.reflections,
            outlier_rejection_after_filter=False,
            partiality_threshold=params.partiality_threshold,
        )

        datasets = [
            ma.as_anomalous_array().merge_equivalents().array()
            for ma in datasets
        ]
        self.cosym_analysis = CosymAnalysis(datasets, self.params)

    @property
    def experiments(self):
        """Return the experiment list."""
        return self._experiments

    @property
    def reflections(self):
        """Return the list of reflection tables."""
        return self._reflections

    @Subject.notify_event(event="run_cosym")
    def run(self):
        self.cosym_analysis.run()

        reindexing_ops = {}
        sym_op_counts = {
            cluster_id: collections.Counter(
                ops[cluster_id]
                for ops in self.cosym_analysis.reindexing_ops.values())
            for cluster_id in range(self.params.cluster.n_clusters)
        }
        identity_counts = [
            counts["x,y,z"] for counts in sym_op_counts.values()
        ]
        cluster_id = identity_counts.index(max(identity_counts))
        for dataset_id in self.cosym_analysis.reindexing_ops:
            if cluster_id in self.cosym_analysis.reindexing_ops[dataset_id]:
                cb_op = self.cosym_analysis.reindexing_ops[dataset_id][
                    cluster_id]
                reindexing_ops.setdefault(cb_op, [])
                reindexing_ops[cb_op].append(dataset_id)

        logger.info("Reindexing operators:")
        for cb_op, datasets in reindexing_ops.items():
            logger.info(cb_op)
            logger.info(datasets)

        self._apply_reindexing_operators(
            reindexing_ops, subgroup=self.cosym_analysis.best_subgroup)

    def export(self):
        """Output the datafiles for cosym.

        This includes the cosym.json, reflections and experiments files."""

        reindexed_reflections = flex.reflection_table()
        for refl in self._reflections:
            reindexed_reflections.extend(refl)
        reindexed_reflections.reset_ids()

        logger.info("Saving reindexed experiments to %s",
                    self.params.output.experiments)
        self._experiments.as_file(self.params.output.experiments)
        logger.info("Saving reindexed reflections to %s",
                    self.params.output.reflections)
        reindexed_reflections.as_file(self.params.output.reflections)

    def _apply_reindexing_operators(self, reindexing_ops, subgroup=None):
        """Apply the reindexing operators to the reflections and experiments."""
        for cb_op, dataset_ids in reindexing_ops.items():
            cb_op = sgtbx.change_of_basis_op(cb_op)
            if subgroup is not None:
                cb_op = subgroup["cb_op_inp_best"] * cb_op
            for dataset_id in dataset_ids:
                expt = self._experiments[dataset_id]
                refl = self._reflections[dataset_id]
                expt.crystal = expt.crystal.change_basis(cb_op)
                if subgroup is not None:
                    expt.crystal.set_space_group(
                        subgroup["best_subsym"].space_group(
                        ).build_derived_acentric_group())
                expt.crystal.set_unit_cell(
                    expt.crystal.get_space_group().average_unit_cell(
                        expt.crystal.get_unit_cell()))
                refl["miller_index"] = cb_op.apply(refl["miller_index"])

    def _filter_min_reflections(self, experiments, reflections):
        identifiers = []

        for expt, refl in zip(experiments, reflections):
            if len(refl) >= self.params.min_reflections:
                identifiers.append(expt.identifier)

        return select_datasets_on_identifiers(experiments,
                                              reflections,
                                              use_datasets=identifiers)

    @Subject.notify_event("performed_unit_cell_clustering")
    def _unit_cell_clustering(self, experiments):
        crystal_symmetries = [
            expt.crystal.get_crystal_symmetry() for expt in experiments
        ]
        # lattice ids used to label plots, so want numerical ids
        lattice_ids = [
            self.identifiers_to_ids_map[i] for i in experiments.identifiers()
        ]

        ucs = UnitCellCluster.from_crystal_symmetries(crystal_symmetries,
                                                      lattice_ids=lattice_ids)
        self.unit_cell_clusters, self.unit_cell_dendrogram, _ = ucs.ab_cluster(
            self.params.unit_cell_clustering.threshold,
            log=self.params.unit_cell_clustering.log,
            labels="lattice_id",
            write_file_lists=False,
            schnell=False,
            doplot=False,
        )
        logger.info(unit_cell_info(self.unit_cell_clusters))
        largest_cluster_lattice_ids = None
        for cluster in self.unit_cell_clusters:
            cluster_lattice_ids = [m.lattice_id for m in cluster.members]
            if largest_cluster_lattice_ids is None:
                largest_cluster_lattice_ids = cluster_lattice_ids
            elif len(cluster_lattice_ids) > len(largest_cluster_lattice_ids):
                largest_cluster_lattice_ids = cluster_lattice_ids

        dataset_selection = largest_cluster_lattice_ids
        # now convert to actual identifiers for selection
        return [self.ids_to_identifiers_map[i] for i in dataset_selection]
def test_cosym(
    space_group,
    unit_cell,
    dimensions,
    sample_size,
    use_known_space_group,
    use_known_lattice_group,
    best_monoclinic_beta,
    run_in_tmpdir,
):
    import matplotlib

    matplotlib.use("Agg")

    datasets, expected_reindexing_ops = generate_test_data(
        space_group=sgtbx.space_group_info(symbol=space_group).group(),
        unit_cell=unit_cell,
        unit_cell_volume=10000,
        d_min=1.5,
        map_to_p1=True,
        sample_size=sample_size,
        seed=1,
    )
    expected_space_group = sgtbx.space_group_info(symbol=space_group).group()

    params = phil_scope.extract()
    params.cluster.n_clusters = len(expected_reindexing_ops)
    params.dimensions = dimensions
    params.best_monoclinic_beta = best_monoclinic_beta
    if use_known_space_group:
        params.space_group = expected_space_group.info()
    if use_known_lattice_group:
        params.lattice_group = expected_space_group.info()

    params.normalisation = None
    cosym = CosymAnalysis(datasets, params)
    cosym.run()
    d = cosym.as_dict()
    if not use_known_space_group:
        assert d["subgroup_scores"][0]["likelihood"] > 0.89
        assert (sgtbx.space_group(d["subgroup_scores"][0]["patterson_group"])
                == sgtbx.space_group_info(
                    space_group).group().build_derived_patterson_group())

    reindexing_ops = {}
    for dataset_id in cosym.reindexing_ops.keys():
        if 0 in cosym.reindexing_ops[dataset_id]:
            cb_op = cosym.reindexing_ops[dataset_id][0]
            reindexing_ops.setdefault(cb_op, set())
            reindexing_ops[cb_op].add(dataset_id)

    assert len(reindexing_ops) == len(expected_reindexing_ops)

    if use_known_space_group:
        expected_sg = sgtbx.space_group_info(space_group).group()
    else:
        expected_sg = (sgtbx.space_group_info(
            space_group).group().build_derived_patterson_group())
    assert cosym.best_subgroup["best_subsym"].space_group() == expected_sg

    space_group_info = cosym.best_subgroup["subsym"].space_group_info()
    for cb_op, ridx_set in reindexing_ops.items():
        for expected_set in expected_reindexing_ops.values():
            assert (len(ridx_set.symmetric_difference(expected_set))
                    == 0) or (len(ridx_set.intersection(expected_set)) == 0)
        for d_id in ridx_set:
            reindexed = (datasets[d_id].change_basis(
                sgtbx.change_of_basis_op(cb_op)).customized_copy(
                    space_group_info=space_group_info.change_basis(
                        cosym.cb_op_inp_min.inverse())))
            assert reindexed.is_compatible_unit_cell(), str(
                reindexed.crystal_symmetry())
Ejemplo n.º 9
0
class multi_crystal_analysis:
    def __init__(self, unmerged_intensities, labels=None, prefix=None):

        self.unmerged_intensities = unmerged_intensities
        self._intensities_all = None
        self._labels_all = flex.size_t()
        if prefix is None:
            prefix = ""
        self._prefix = prefix

        self.intensities = unmerged_intensities
        self.individual_merged_intensities = []
        if labels is None:
            labels = ["%i" % (i + 1) for i in range(len(self.intensities))]
        assert len(labels) == len(self.intensities)
        self.labels = labels

        for i, unmerged in enumerate(self.intensities):
            self.individual_merged_intensities.append(
                unmerged.merge_equivalents().array().set_info(unmerged.info()))
            if self._intensities_all is None:
                self._intensities_all = unmerged.deep_copy()
            else:
                self._intensities_all = self._intensities_all.concatenate(
                    unmerged, assert_is_similar_symmetry=False)
            self._labels_all.extend(flex.size_t(unmerged.size(), i))

        self.run_cosym()
        (
            correlation_matrix,
            linkage_matrix,
        ) = self.compute_correlation_coefficient_matrix()

        cos_angle_matrix, ca_linkage_matrix = self.compute_cos_angle_matrix()

        d = self.to_plotly_json(correlation_matrix,
                                linkage_matrix,
                                labels=labels)

        with open("%sintensity_clusters.json" % self._prefix, "w") as f:
            json.dump(d, f, indent=2)

        d = self.to_plotly_json(cos_angle_matrix,
                                ca_linkage_matrix,
                                labels=labels,
                                matrix_type="cos_angle")

        with open("%scos_angle_clusters.json" % self._prefix, "w") as f:
            json.dump(d, f, indent=2)

        self.cos_angle_linkage_matrix = ca_linkage_matrix
        self.cos_angle_matrix = cos_angle_matrix
        self.cos_angle_clusters = self.cluster_info(
            self.linkage_matrix_to_dict(self.cos_angle_linkage_matrix))
        self.cc_linkage_matrix = linkage_matrix
        self.cc_matrix = correlation_matrix
        self.cc_clusters = self.cluster_info(
            self.linkage_matrix_to_dict(self.cc_linkage_matrix))

        logger.info("\nIntensity correlation clustering summary:")
        logger.info(
            tabulate(self.as_table(self.cc_clusters),
                     headers="firstrow",
                     tablefmt="rst"))
        logger.info("\nCos(angle) clustering summary:")
        logger.info(
            tabulate(
                self.as_table(self.cos_angle_clusters),
                headers="firstrow",
                tablefmt="rst",
            ))

    def cluster_info(self, cluster_dict):
        info = []
        for cluster_id, cluster in cluster_dict.items():
            sel_cluster = flex.bool(self._labels_all.size(), False)
            uc_params = [flex.double() for i in range(6)]
            for j in cluster["datasets"]:
                sel_cluster |= self._labels_all == j
                uc_j = self.intensities[j - 1].unit_cell().parameters()
                for i in range(6):
                    uc_params[i].append(uc_j[i])
            average_uc = [flex.mean(uc_params[i]) for i in range(6)]
            intensities_cluster = self._intensities_all.select(sel_cluster)
            merging = intensities_cluster.merge_equivalents()
            merged_intensities = merging.array()
            multiplicities = merging.redundancies()
            dataset_ids = cluster["datasets"]
            labels = [self.labels[i - 1] for i in dataset_ids]
            info.append(
                ClusterInfo(
                    cluster_id,
                    labels,
                    flex.mean(multiplicities.data().as_double()),
                    merged_intensities.completeness(),
                    unit_cell=average_uc,
                    height=cluster.get("height"),
                ))
        return info

    def as_table(self, cluster_info):
        from libtbx.str_utils import wordwrap

        headers = [
            "Cluster",
            "No. datasets",
            "Datasets",
            "Height",
            "Multiplicity",
            "Completeness",
        ]
        rows = []
        for info in cluster_info:
            rows.append([
                "%i" % info.cluster_id,
                "%i" % len(info.labels),
                wordwrap(" ".join("%s" % l for l in info.labels)),
                "%.2g" % info.height,
                "%.1f" % info.multiplicity,
                "%.2f" % info.completeness,
            ])

        rows.insert(0, headers)
        return rows

    @staticmethod
    def linkage_matrix_to_dict(linkage_matrix):
        tree = hierarchy.to_tree(linkage_matrix, rd=False)

        d = {}

        # http://w3facility.org/question/scipy-dendrogram-to-json-for-d3-js-tree-visualisation/
        # https://gist.github.com/mdml/7537455

        def add_node(node):
            if node.is_leaf():
                return
            cluster_id = node.get_id() - len(linkage_matrix) - 1
            row = linkage_matrix[cluster_id]
            d[cluster_id + 1] = {
                "datasets": [i + 1 for i in sorted(node.pre_order())],
                "height": row[2],
            }

            # Recursively add the current node's children
            if node.left:
                add_node(node.left)
            if node.right:
                add_node(node.right)

        add_node(tree)

        return OrderedDict(sorted(d.items()))

    def run_cosym(self):
        from dials.algorithms.symmetry.cosym import phil_scope

        params = phil_scope.extract()
        from dials.algorithms.symmetry.cosym import CosymAnalysis

        datasets = [
            d.eliminate_sys_absent(integral_only=True).primitive_setting()
            for d in self.individual_merged_intensities
        ]
        params.lattice_group = self.individual_merged_intensities[
            0].space_group_info()
        params.space_group = self.individual_merged_intensities[
            0].space_group_info()
        params.cluster.method = "dbscan"

        self.cosym = CosymAnalysis(datasets, params)
        self.cosym.run()

    def compute_correlation_coefficient_matrix(self):
        import scipy.spatial.distance as ssd

        correlation_matrix = self.cosym.target.rij_matrix

        for i in range(correlation_matrix.all()[0]):
            correlation_matrix[i, i] = 1

        # clip values of correlation matrix to account for floating point errors
        correlation_matrix.set_selected(correlation_matrix < -1, -1)
        correlation_matrix.set_selected(correlation_matrix > 1, 1)
        diffraction_dissimilarity = 1 - correlation_matrix

        dist_mat = diffraction_dissimilarity.as_numpy_array()

        assert ssd.is_valid_dm(dist_mat, tol=1e-12)
        # convert the redundant n*n square matrix form into a condensed nC2 array
        dist_mat = ssd.squareform(dist_mat, checks=False)

        linkage_matrix = hierarchy.linkage(dist_mat, method="average")

        return correlation_matrix, linkage_matrix

    def compute_cos_angle_matrix(self):
        import scipy.spatial.distance as ssd

        dist_mat = ssd.pdist(self.cosym.coords.as_numpy_array(),
                             metric="cosine")
        cos_angle = 1 - ssd.squareform(dist_mat)
        linkage_matrix = hierarchy.linkage(dist_mat, method="average")
        return flex.double(cos_angle), linkage_matrix

    @staticmethod
    def to_plotly_json(correlation_matrix,
                       linkage_matrix,
                       labels=None,
                       matrix_type="correlation"):
        assert matrix_type in ("correlation", "cos_angle")

        ddict = hierarchy.dendrogram(linkage_matrix,
                                     color_threshold=0.05,
                                     labels=labels,
                                     show_leaf_counts=False)

        y2_dict = scipy_dendrogram_to_plotly_json(ddict)  # above heatmap
        x2_dict = copy.deepcopy(y2_dict)  # left of heatmap, rotated
        for d in y2_dict["data"]:
            d["yaxis"] = "y2"
            d["xaxis"] = "x2"

        for d in x2_dict["data"]:
            x = d["x"]
            y = d["y"]
            d["x"] = y
            d["y"] = x
            d["yaxis"] = "y3"
            d["xaxis"] = "x3"

        D = correlation_matrix.as_numpy_array()
        index = ddict["leaves"]
        D = D[index, :]
        D = D[:, index]
        ccdict = {
            "data": [{
                "name": "%s_matrix" % matrix_type,
                "x": list(range(D.shape[0])),
                "y": list(range(D.shape[1])),
                "z": D.tolist(),
                "type": "heatmap",
                "colorbar": {
                    "title": ("Correlation coefficient" if matrix_type
                              == "correlation" else "cos(angle)"),
                    "titleside":
                    "right",
                    "xpad":
                    0,
                },
                "colorscale": "YIOrRd",
                "xaxis": "x",
                "yaxis": "y",
            }],
            "layout": {
                "autosize": False,
                "bargap": 0,
                "height": 1000,
                "hovermode": "closest",
                "margin": {
                    "r": 20,
                    "t": 50,
                    "autoexpand": True,
                    "l": 20
                },
                "showlegend": False,
                "title": "Dendrogram Heatmap",
                "width": 1000,
                "xaxis": {
                    "domain": [0.2, 0.9],
                    "mirror":
                    "allticks",
                    "showgrid":
                    False,
                    "showline":
                    False,
                    "showticklabels":
                    True,
                    "tickmode":
                    "array",
                    "ticks":
                    "",
                    "ticktext":
                    y2_dict["layout"]["xaxis"]["ticktext"],
                    "tickvals":
                    list(range(len(y2_dict["layout"]["xaxis"]["ticktext"]))),
                    "tickangle":
                    300,
                    "title":
                    "",
                    "type":
                    "linear",
                    "zeroline":
                    False,
                },
                "yaxis": {
                    "domain": [0, 0.78],
                    "anchor":
                    "x",
                    "mirror":
                    "allticks",
                    "showgrid":
                    False,
                    "showline":
                    False,
                    "showticklabels":
                    True,
                    "tickmode":
                    "array",
                    "ticks":
                    "",
                    "ticktext":
                    y2_dict["layout"]["xaxis"]["ticktext"],
                    "tickvals":
                    list(range(len(y2_dict["layout"]["xaxis"]["ticktext"]))),
                    "title":
                    "",
                    "type":
                    "linear",
                    "zeroline":
                    False,
                },
                "xaxis2": {
                    "domain": [0.2, 0.9],
                    "anchor": "y2",
                    "showgrid": False,
                    "showline": False,
                    "showticklabels": False,
                    "zeroline": False,
                },
                "yaxis2": {
                    "domain": [0.8, 1],
                    "anchor": "x2",
                    "showgrid": False,
                    "showline": False,
                    "zeroline": False,
                },
                "xaxis3": {
                    "domain": [0.0, 0.1],
                    "anchor": "y3",
                    "range": [max(max(d["x"]) for d in x2_dict["data"]), 0],
                    "showgrid": False,
                    "showline": False,
                    "tickangle": 300,
                    "zeroline": False,
                },
                "yaxis3": {
                    "domain": [0, 0.78],
                    "anchor": "x3",
                    "showgrid": False,
                    "showline": False,
                    "showticklabels": False,
                    "zeroline": False,
                },
            },
        }
        d = ccdict
        d["data"].extend(y2_dict["data"])
        d["data"].extend(x2_dict["data"])

        d["clusters"] = multi_crystal_analysis.linkage_matrix_to_dict(
            linkage_matrix)

        return d
Ejemplo n.º 10
0
def test_cosym(
    space_group,
    unit_cell,
    dimensions,
    sample_size,
    use_known_space_group,
    use_known_lattice_group,
    run_in_tmpdir,
):
    import matplotlib

    matplotlib.use("Agg")

    datasets, expected_reindexing_ops = generate_test_data(
        space_group=sgtbx.space_group_info(symbol=space_group).group(),
        unit_cell=unit_cell,
        unit_cell_volume=10000,
        d_min=1.5,
        map_to_p1=True,
        sample_size=sample_size,
    )
    expected_space_group = sgtbx.space_group_info(symbol=space_group).group()

    # Workaround fact that the minimum cell reduction can occassionally be unstable
    # The input *should* be already the minimum cell, but for some combinations of unit
    # cell parameters the change_of_basis_op_to_minimum_cell is never the identity.
    # Therefore apply this cb_op to the expected_reindexing_ops prior to the comparison.
    cb_op_inp_min = datasets[0].crystal_symmetry(
    ).change_of_basis_op_to_minimum_cell()
    expected_reindexing_ops = {
        (cb_op_inp_min.inverse() * sgtbx.change_of_basis_op(cb_op) *
         cb_op_inp_min).as_xyz(): dataset_ids
        for cb_op, dataset_ids in expected_reindexing_ops.items()
    }

    params = phil_scope.extract()
    params.cluster.n_clusters = len(expected_reindexing_ops)
    params.dimensions = dimensions
    if use_known_space_group:
        params.space_group = expected_space_group.info()
    if use_known_lattice_group:
        params.lattice_group = expected_space_group.info()

    cosym = CosymAnalysis(datasets, params)
    cosym.run()
    d = cosym.as_dict()
    if not use_known_space_group:
        assert d["subgroup_scores"][0]["likelihood"] > 0.89
        assert (sgtbx.space_group(d["subgroup_scores"][0]["patterson_group"])
                == sgtbx.space_group_info(
                    space_group).group().build_derived_patterson_group())

    space_groups = {}
    reindexing_ops = {}
    for dataset_id in cosym.reindexing_ops.keys():
        if 0 in cosym.reindexing_ops[dataset_id]:
            cb_op = cosym.reindexing_ops[dataset_id][0]
            reindexing_ops.setdefault(cb_op, set())
            reindexing_ops[cb_op].add(dataset_id)
        if dataset_id in cosym.space_groups:
            space_groups.setdefault(cosym.space_groups[dataset_id], set())
            space_groups[cosym.space_groups[dataset_id]].add(dataset_id)

    assert len(reindexing_ops) == len(expected_reindexing_ops)
    assert sorted(reindexing_ops.keys()) == sorted(
        expected_reindexing_ops.keys())
    assert len(space_groups) == 1

    if use_known_space_group:
        expected_sg = sgtbx.space_group_info(space_group).group()
    else:
        expected_sg = (sgtbx.space_group_info(
            space_group).group().build_derived_patterson_group())
    assert cosym.best_subgroup["best_subsym"].space_group() == expected_sg

    for cb_op, ridx_set in reindexing_ops.items():
        for expected_set in expected_reindexing_ops.values():
            assert (len(ridx_set.symmetric_difference(expected_set))
                    == 0) or (len(ridx_set.intersection(expected_set)) == 0)
        for d_id in ridx_set:
            reindexed = (datasets[d_id].change_basis(cb_op).customized_copy(
                space_group_info=space_groups.keys()[0].info()))
            assert reindexed.is_compatible_unit_cell(), str(
                reindexed.crystal_symmetry())