def run_cosym(self): from dials.algorithms.symmetry.cosym import phil_scope params = phil_scope.extract() from dials.algorithms.symmetry.cosym import CosymAnalysis datasets = self.individual_merged_intensities datasets = [ d.eliminate_sys_absent(integral_only=True).primitive_setting() for d in datasets ] params.lattice_group = datasets[0].space_group_info() params.space_group = datasets[0].space_group_info() params.cluster.method = "dbscan" self.cosym = CosymAnalysis(datasets, params) self.cosym.run()
def __init__(self, experiments, reflections, params=None): super(cosym, self).__init__( events=["run_cosym", "performed_unit_cell_clustering"]) if params is None: params = phil_scope.extract() self.params = params self._experiments, self._reflections = self._filter_min_reflections( experiments, reflections) # map experiments and reflections to primitive setting self._experiments, self._reflections = self._map_to_primitive( self._experiments, self._reflections) if len(self._experiments) > 1: # perform unit cell clustering identifiers = self._unit_cell_clustering(self._experiments) if len(identifiers) < len(self._experiments): logger.info( "Selecting subset of %i datasets for cosym analysis: %s", len(identifiers), str(identifiers), ) self._experiments, self._reflections = select_datasets_on_ids( self._experiments, self._reflections, use_datasets=identifiers) self._experiments, self._reflections = self._map_to_minimum_cell( self._experiments, self._reflections) # transform models into miller arrays datasets = filtered_arrays_from_experiments_reflections( self.experiments, self.reflections, outlier_rejection_after_filter=False, partiality_threshold=params.partiality_threshold, ) self.cosym_analysis = CosymAnalysis(datasets, self.params)
def test_reindexing_ops_for_dataset(mocker): # Mock a minimal CosymAnalysis instance self = mocker.Mock() self.cluster_labels = flex.double([1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0]) self.params.cluster.n_clusters = 2 self.input_intensities = [mocker.Mock(), mocker.Mock()] self.cb_op_inp_min = sgtbx.change_of_basis_op() # Lattice symmetry and true space group lattice_group = sgtbx.space_group_info( symbol="C 2 2 2 (x+y,z,-2*y)").group() sg = sgtbx.space_group_info(symbol="P 1 2 1").group() cosets = sgtbx.cosets.left_decomposition(lattice_group, sg) # Finally run the method we're testing reindexing_ops = CosymAnalysis._reindexing_ops_for_dataset( self, 0, list(lattice_group.smx()), cosets) assert reindexing_ops == {0: "x+z,-y,-z", 1: "x,y,z"}
class cosym(Subject): def __init__(self, experiments, reflections, params=None): super(cosym, self).__init__( events=["run_cosym", "performed_unit_cell_clustering"]) if params is None: params = phil_scope.extract() self.params = params self._experiments, self._reflections = self._filter_min_reflections( experiments, reflections) # map experiments and reflections to primitive setting self._experiments, self._reflections = self._map_to_primitive( self._experiments, self._reflections) if len(self._experiments) > 1: # perform unit cell clustering identifiers = self._unit_cell_clustering(self._experiments) if len(identifiers) < len(self._experiments): logger.info( "Selecting subset of %i datasets for cosym analysis: %s", len(identifiers), str(identifiers), ) self._experiments, self._reflections = select_datasets_on_ids( self._experiments, self._reflections, use_datasets=identifiers) self._experiments, self._reflections = self._map_to_minimum_cell( self._experiments, self._reflections) # transform models into miller arrays datasets = filtered_arrays_from_experiments_reflections( self.experiments, self.reflections, outlier_rejection_after_filter=False, partiality_threshold=params.partiality_threshold, ) self.cosym_analysis = CosymAnalysis(datasets, self.params) @property def experiments(self): """Return the experiment list.""" return self._experiments @property def reflections(self): """Return the list of reflection tables.""" return self._reflections @Subject.notify_event(event="run_cosym") def run(self): self.cosym_analysis.run() space_groups = {} reindexing_ops = {} for dataset_id in self.cosym_analysis.reindexing_ops: if 0 in self.cosym_analysis.reindexing_ops[dataset_id]: cb_op = self.cosym_analysis.reindexing_ops[dataset_id][0] reindexing_ops.setdefault(cb_op, []) reindexing_ops[cb_op].append(dataset_id) if dataset_id in self.cosym_analysis.space_groups: space_groups.setdefault( self.cosym_analysis.space_groups[dataset_id], []) space_groups[self.cosym_analysis. space_groups[dataset_id]].append(dataset_id) logger.info("Space groups:") for sg, datasets in space_groups.items(): logger.info(str(sg.info().reference_setting())) logger.info(datasets) logger.info("Reindexing operators:") for cb_op, datasets in reindexing_ops.items(): logger.info(cb_op) logger.info(datasets) self._apply_reindexing_operators( reindexing_ops, subgroup=self.cosym_analysis.best_subgroup) def export(self): """Output the datafiles for cosym. This includes the cosym.json, reflections and experiments files.""" reindexed_reflections = flex.reflection_table() for refl in self._reflections: reindexed_reflections.extend(refl) reindexed_reflections.reset_ids() logger.info("Saving reindexed experiments to %s", self.params.output.experiments) self._experiments.as_file(self.params.output.experiments) logger.info("Saving reindexed reflections to %s", self.params.output.reflections) reindexed_reflections.as_file(self.params.output.reflections) def _apply_reindexing_operators(self, reindexing_ops, subgroup=None): """Apply the reindexing operators to the reflections and experiments.""" for cb_op, dataset_ids in reindexing_ops.items(): cb_op = sgtbx.change_of_basis_op(cb_op) if subgroup is not None: cb_op = subgroup["cb_op_inp_best"] * cb_op for dataset_id in dataset_ids: expt = self._experiments[dataset_id] refl = self._reflections[dataset_id] expt.crystal = expt.crystal.change_basis(cb_op) if subgroup is not None: expt.crystal.set_space_group( subgroup["best_subsym"].space_group( ).build_derived_acentric_group()) expt.crystal.set_unit_cell( expt.crystal.get_space_group().average_unit_cell( expt.crystal.get_unit_cell())) refl["miller_index"] = cb_op.apply(refl["miller_index"]) def _map_to_primitive(self, experiments, reflections): identifiers = [] for expt, refl in zip(experiments, reflections): cb_op_to_primitive = (expt.crystal.get_crystal_symmetry(). change_of_basis_op_to_primitive_setting()) sel = expt.crystal.get_space_group().is_sys_absent( refl["miller_index"]) if sel.count(True): logger.info( "Eliminating %i systematic absences for experiment %s", sel.count(True), expt.identifier, ) refl = refl.select(~sel) refl["miller_index"] = cb_op_to_primitive.apply( refl["miller_index"]) expt.crystal = expt.crystal.change_basis(cb_op_to_primitive) identifiers.append(expt.identifier) return select_datasets_on_ids(experiments, reflections, use_datasets=identifiers) def _filter_min_reflections(self, experiments, reflections): identifiers = [] for expt, refl in zip(experiments, reflections): if len(refl) >= self.params.min_reflections: identifiers.append(expt.identifier) return select_datasets_on_ids(experiments, reflections, use_datasets=identifiers) def _map_to_minimum_cell(self, experiments, reflections): cb_op_ref_min = (experiments[0].crystal.get_crystal_symmetry(). change_of_basis_op_to_minimum_cell()) for expt, refl in zip(experiments, reflections): expt.crystal = expt.crystal.change_basis(cb_op_ref_min) expt.crystal.set_space_group(sgtbx.space_group()) refl["miller_index"] = cb_op_ref_min.apply(refl["miller_index"]) return experiments, reflections @Subject.notify_event("performed_unit_cell_clustering") def _unit_cell_clustering(self, experiments): crystal_symmetries = [ expt.crystal.get_crystal_symmetry() for expt in experiments ] lattice_ids = experiments.identifiers() from dials.algorithms.clustering.unit_cell import UnitCellCluster from xfel.clustering.cluster_groups import unit_cell_info ucs = UnitCellCluster.from_crystal_symmetries(crystal_symmetries, lattice_ids=lattice_ids) self.unit_cell_clusters, self.unit_cell_dendrogram, _ = ucs.ab_cluster( self.params.unit_cell_clustering.threshold, log=self.params.unit_cell_clustering.log, labels="lattice_id", write_file_lists=False, schnell=False, doplot=False, ) logger.info(unit_cell_info(self.unit_cell_clusters)) largest_cluster_lattice_ids = None for cluster in self.unit_cell_clusters: cluster_lattice_ids = [m.lattice_id for m in cluster.members] if largest_cluster_lattice_ids is None: largest_cluster_lattice_ids = cluster_lattice_ids elif len(cluster_lattice_ids) > len(largest_cluster_lattice_ids): largest_cluster_lattice_ids = cluster_lattice_ids dataset_selection = largest_cluster_lattice_ids return dataset_selection
def test_cosym( space_group, unit_cell, dimensions, sample_size, use_known_space_group, use_known_lattice_group, best_monoclinic_beta, run_in_tmpdir, ): import matplotlib matplotlib.use("Agg") datasets, expected_reindexing_ops = generate_test_data( space_group=sgtbx.space_group_info(symbol=space_group).group(), unit_cell=unit_cell, unit_cell_volume=10000, d_min=1.5, map_to_p1=True, sample_size=sample_size, seed=1, ) expected_space_group = sgtbx.space_group_info(symbol=space_group).group() params = phil_scope.extract() params.dimensions = dimensions params.best_monoclinic_beta = best_monoclinic_beta if use_known_space_group: params.space_group = expected_space_group.info() if use_known_lattice_group: params.lattice_group = expected_space_group.info() params.normalisation = None cosym = CosymAnalysis(datasets, params) cosym.run() d = cosym.as_dict() if not use_known_space_group: assert d["subgroup_scores"][0]["likelihood"] > 0.89 assert (sgtbx.space_group(d["subgroup_scores"][0]["patterson_group"]) == sgtbx.space_group_info( space_group).group().build_derived_patterson_group()) expected_sg = (sgtbx.space_group_info( space_group).group().build_derived_patterson_group()) else: expected_sg = sgtbx.space_group_info(space_group).group() assert cosym.best_subgroup["best_subsym"].space_group() == expected_sg assert len(cosym.reindexing_ops) == len(expected_reindexing_ops) space_group_info = cosym.best_subgroup["subsym"].space_group_info() reference = None for d_id, cb_op in enumerate(cosym.reindexing_ops): reindexed = (datasets[d_id].change_basis( sgtbx.change_of_basis_op(cb_op)).customized_copy( space_group_info=space_group_info.change_basis( cosym.cb_op_inp_min.inverse()))) assert reindexed.is_compatible_unit_cell(), str( reindexed.crystal_symmetry()) if reference: assert (reindexed.correlation( reference, assert_is_similar_symmetry=False).coefficient() > 0.99) else: reference = reindexed
def __init__(self, experiments, reflections, params=None): super(cosym, self).__init__( events=["run_cosym", "performed_unit_cell_clustering"]) if params is None: params = phil_scope.extract() self.params = params self._reflections = [] for refl, expt in zip(reflections, experiments): sel = get_selection_for_valid_image_ranges(refl, expt) self._reflections.append(refl.select(sel)) self._experiments, self._reflections = self._filter_min_reflections( experiments, self._reflections) self.ids_to_identifiers_map = {} for table in self._reflections: self.ids_to_identifiers_map.update(table.experiment_identifiers()) self.identifiers_to_ids_map = { value: key for key, value in self.ids_to_identifiers_map.items() } if len(self._experiments) > 1: # perform unit cell clustering identifiers = self._unit_cell_clustering(self._experiments) if len(identifiers) < len(self._experiments): logger.info( "Selecting subset of %i datasets for cosym analysis: %s", len(identifiers), str(identifiers), ) self._experiments, self._reflections = select_datasets_on_identifiers( self._experiments, self._reflections, use_datasets=identifiers) # Map experiments and reflections to minimum cell cb_ops = change_of_basis_ops_to_minimum_cell( self._experiments, params.lattice_symmetry_max_delta, params.relative_length_tolerance, params.absolute_angle_tolerance, ) exclude = [ expt.identifier for expt, cb_op in zip(self._experiments, cb_ops) if not cb_op ] if len(exclude): logger.info( f"Rejecting {len(exclude)} datasets from cosym analysis " f"(couldn't determine consistent cb_op to minimum cell):\n" f"{exclude}", ) self._experiments, self._reflections = select_datasets_on_identifiers( self._experiments, self._reflections, exclude_datasets=exclude) cb_ops = list(filter(None, cb_ops)) # Eliminate reflections that are systematically absent due to centring # of the lattice, otherwise they would lead to non-integer miller indices # when reindexing to a primitive setting self._reflections = eliminate_sys_absent(self._experiments, self._reflections) self._experiments, self._reflections = apply_change_of_basis_ops( self._experiments, self._reflections, cb_ops) # transform models into miller arrays datasets = filtered_arrays_from_experiments_reflections( self.experiments, self.reflections, outlier_rejection_after_filter=False, partiality_threshold=params.partiality_threshold, ) datasets = [ ma.as_anomalous_array().merge_equivalents().array() for ma in datasets ] self.cosym_analysis = CosymAnalysis(datasets, self.params)
class cosym(Subject): def __init__(self, experiments, reflections, params=None): super(cosym, self).__init__( events=["run_cosym", "performed_unit_cell_clustering"]) if params is None: params = phil_scope.extract() self.params = params self._reflections = [] for refl, expt in zip(reflections, experiments): sel = get_selection_for_valid_image_ranges(refl, expt) self._reflections.append(refl.select(sel)) self._experiments, self._reflections = self._filter_min_reflections( experiments, self._reflections) self.ids_to_identifiers_map = {} for table in self._reflections: self.ids_to_identifiers_map.update(table.experiment_identifiers()) self.identifiers_to_ids_map = { value: key for key, value in self.ids_to_identifiers_map.items() } if len(self._experiments) > 1: # perform unit cell clustering identifiers = self._unit_cell_clustering(self._experiments) if len(identifiers) < len(self._experiments): logger.info( "Selecting subset of %i datasets for cosym analysis: %s", len(identifiers), str(identifiers), ) self._experiments, self._reflections = select_datasets_on_identifiers( self._experiments, self._reflections, use_datasets=identifiers) # Map experiments and reflections to minimum cell cb_ops = change_of_basis_ops_to_minimum_cell( self._experiments, params.lattice_symmetry_max_delta, params.relative_length_tolerance, params.absolute_angle_tolerance, ) exclude = [ expt.identifier for expt, cb_op in zip(self._experiments, cb_ops) if not cb_op ] if len(exclude): logger.info( f"Rejecting {len(exclude)} datasets from cosym analysis " f"(couldn't determine consistent cb_op to minimum cell):\n" f"{exclude}", ) self._experiments, self._reflections = select_datasets_on_identifiers( self._experiments, self._reflections, exclude_datasets=exclude) cb_ops = list(filter(None, cb_ops)) # Eliminate reflections that are systematically absent due to centring # of the lattice, otherwise they would lead to non-integer miller indices # when reindexing to a primitive setting self._reflections = eliminate_sys_absent(self._experiments, self._reflections) self._experiments, self._reflections = apply_change_of_basis_ops( self._experiments, self._reflections, cb_ops) # transform models into miller arrays datasets = filtered_arrays_from_experiments_reflections( self.experiments, self.reflections, outlier_rejection_after_filter=False, partiality_threshold=params.partiality_threshold, ) datasets = [ ma.as_anomalous_array().merge_equivalents().array() for ma in datasets ] self.cosym_analysis = CosymAnalysis(datasets, self.params) @property def experiments(self): """Return the experiment list.""" return self._experiments @property def reflections(self): """Return the list of reflection tables.""" return self._reflections @Subject.notify_event(event="run_cosym") def run(self): self.cosym_analysis.run() reindexing_ops = {} sym_op_counts = { cluster_id: collections.Counter( ops[cluster_id] for ops in self.cosym_analysis.reindexing_ops.values()) for cluster_id in range(self.params.cluster.n_clusters) } identity_counts = [ counts["x,y,z"] for counts in sym_op_counts.values() ] cluster_id = identity_counts.index(max(identity_counts)) for dataset_id in self.cosym_analysis.reindexing_ops: if cluster_id in self.cosym_analysis.reindexing_ops[dataset_id]: cb_op = self.cosym_analysis.reindexing_ops[dataset_id][ cluster_id] reindexing_ops.setdefault(cb_op, []) reindexing_ops[cb_op].append(dataset_id) logger.info("Reindexing operators:") for cb_op, datasets in reindexing_ops.items(): logger.info(cb_op) logger.info(datasets) self._apply_reindexing_operators( reindexing_ops, subgroup=self.cosym_analysis.best_subgroup) def export(self): """Output the datafiles for cosym. This includes the cosym.json, reflections and experiments files.""" reindexed_reflections = flex.reflection_table() for refl in self._reflections: reindexed_reflections.extend(refl) reindexed_reflections.reset_ids() logger.info("Saving reindexed experiments to %s", self.params.output.experiments) self._experiments.as_file(self.params.output.experiments) logger.info("Saving reindexed reflections to %s", self.params.output.reflections) reindexed_reflections.as_file(self.params.output.reflections) def _apply_reindexing_operators(self, reindexing_ops, subgroup=None): """Apply the reindexing operators to the reflections and experiments.""" for cb_op, dataset_ids in reindexing_ops.items(): cb_op = sgtbx.change_of_basis_op(cb_op) if subgroup is not None: cb_op = subgroup["cb_op_inp_best"] * cb_op for dataset_id in dataset_ids: expt = self._experiments[dataset_id] refl = self._reflections[dataset_id] expt.crystal = expt.crystal.change_basis(cb_op) if subgroup is not None: expt.crystal.set_space_group( subgroup["best_subsym"].space_group( ).build_derived_acentric_group()) expt.crystal.set_unit_cell( expt.crystal.get_space_group().average_unit_cell( expt.crystal.get_unit_cell())) refl["miller_index"] = cb_op.apply(refl["miller_index"]) def _filter_min_reflections(self, experiments, reflections): identifiers = [] for expt, refl in zip(experiments, reflections): if len(refl) >= self.params.min_reflections: identifiers.append(expt.identifier) return select_datasets_on_identifiers(experiments, reflections, use_datasets=identifiers) @Subject.notify_event("performed_unit_cell_clustering") def _unit_cell_clustering(self, experiments): crystal_symmetries = [ expt.crystal.get_crystal_symmetry() for expt in experiments ] # lattice ids used to label plots, so want numerical ids lattice_ids = [ self.identifiers_to_ids_map[i] for i in experiments.identifiers() ] ucs = UnitCellCluster.from_crystal_symmetries(crystal_symmetries, lattice_ids=lattice_ids) self.unit_cell_clusters, self.unit_cell_dendrogram, _ = ucs.ab_cluster( self.params.unit_cell_clustering.threshold, log=self.params.unit_cell_clustering.log, labels="lattice_id", write_file_lists=False, schnell=False, doplot=False, ) logger.info(unit_cell_info(self.unit_cell_clusters)) largest_cluster_lattice_ids = None for cluster in self.unit_cell_clusters: cluster_lattice_ids = [m.lattice_id for m in cluster.members] if largest_cluster_lattice_ids is None: largest_cluster_lattice_ids = cluster_lattice_ids elif len(cluster_lattice_ids) > len(largest_cluster_lattice_ids): largest_cluster_lattice_ids = cluster_lattice_ids dataset_selection = largest_cluster_lattice_ids # now convert to actual identifiers for selection return [self.ids_to_identifiers_map[i] for i in dataset_selection]
def test_cosym( space_group, unit_cell, dimensions, sample_size, use_known_space_group, use_known_lattice_group, best_monoclinic_beta, run_in_tmpdir, ): import matplotlib matplotlib.use("Agg") datasets, expected_reindexing_ops = generate_test_data( space_group=sgtbx.space_group_info(symbol=space_group).group(), unit_cell=unit_cell, unit_cell_volume=10000, d_min=1.5, map_to_p1=True, sample_size=sample_size, seed=1, ) expected_space_group = sgtbx.space_group_info(symbol=space_group).group() params = phil_scope.extract() params.cluster.n_clusters = len(expected_reindexing_ops) params.dimensions = dimensions params.best_monoclinic_beta = best_monoclinic_beta if use_known_space_group: params.space_group = expected_space_group.info() if use_known_lattice_group: params.lattice_group = expected_space_group.info() params.normalisation = None cosym = CosymAnalysis(datasets, params) cosym.run() d = cosym.as_dict() if not use_known_space_group: assert d["subgroup_scores"][0]["likelihood"] > 0.89 assert (sgtbx.space_group(d["subgroup_scores"][0]["patterson_group"]) == sgtbx.space_group_info( space_group).group().build_derived_patterson_group()) reindexing_ops = {} for dataset_id in cosym.reindexing_ops.keys(): if 0 in cosym.reindexing_ops[dataset_id]: cb_op = cosym.reindexing_ops[dataset_id][0] reindexing_ops.setdefault(cb_op, set()) reindexing_ops[cb_op].add(dataset_id) assert len(reindexing_ops) == len(expected_reindexing_ops) if use_known_space_group: expected_sg = sgtbx.space_group_info(space_group).group() else: expected_sg = (sgtbx.space_group_info( space_group).group().build_derived_patterson_group()) assert cosym.best_subgroup["best_subsym"].space_group() == expected_sg space_group_info = cosym.best_subgroup["subsym"].space_group_info() for cb_op, ridx_set in reindexing_ops.items(): for expected_set in expected_reindexing_ops.values(): assert (len(ridx_set.symmetric_difference(expected_set)) == 0) or (len(ridx_set.intersection(expected_set)) == 0) for d_id in ridx_set: reindexed = (datasets[d_id].change_basis( sgtbx.change_of_basis_op(cb_op)).customized_copy( space_group_info=space_group_info.change_basis( cosym.cb_op_inp_min.inverse()))) assert reindexed.is_compatible_unit_cell(), str( reindexed.crystal_symmetry())
class multi_crystal_analysis: def __init__(self, unmerged_intensities, labels=None, prefix=None): self.unmerged_intensities = unmerged_intensities self._intensities_all = None self._labels_all = flex.size_t() if prefix is None: prefix = "" self._prefix = prefix self.intensities = unmerged_intensities self.individual_merged_intensities = [] if labels is None: labels = ["%i" % (i + 1) for i in range(len(self.intensities))] assert len(labels) == len(self.intensities) self.labels = labels for i, unmerged in enumerate(self.intensities): self.individual_merged_intensities.append( unmerged.merge_equivalents().array().set_info(unmerged.info())) if self._intensities_all is None: self._intensities_all = unmerged.deep_copy() else: self._intensities_all = self._intensities_all.concatenate( unmerged, assert_is_similar_symmetry=False) self._labels_all.extend(flex.size_t(unmerged.size(), i)) self.run_cosym() ( correlation_matrix, linkage_matrix, ) = self.compute_correlation_coefficient_matrix() cos_angle_matrix, ca_linkage_matrix = self.compute_cos_angle_matrix() d = self.to_plotly_json(correlation_matrix, linkage_matrix, labels=labels) with open("%sintensity_clusters.json" % self._prefix, "w") as f: json.dump(d, f, indent=2) d = self.to_plotly_json(cos_angle_matrix, ca_linkage_matrix, labels=labels, matrix_type="cos_angle") with open("%scos_angle_clusters.json" % self._prefix, "w") as f: json.dump(d, f, indent=2) self.cos_angle_linkage_matrix = ca_linkage_matrix self.cos_angle_matrix = cos_angle_matrix self.cos_angle_clusters = self.cluster_info( self.linkage_matrix_to_dict(self.cos_angle_linkage_matrix)) self.cc_linkage_matrix = linkage_matrix self.cc_matrix = correlation_matrix self.cc_clusters = self.cluster_info( self.linkage_matrix_to_dict(self.cc_linkage_matrix)) logger.info("\nIntensity correlation clustering summary:") logger.info( tabulate(self.as_table(self.cc_clusters), headers="firstrow", tablefmt="rst")) logger.info("\nCos(angle) clustering summary:") logger.info( tabulate( self.as_table(self.cos_angle_clusters), headers="firstrow", tablefmt="rst", )) def cluster_info(self, cluster_dict): info = [] for cluster_id, cluster in cluster_dict.items(): sel_cluster = flex.bool(self._labels_all.size(), False) uc_params = [flex.double() for i in range(6)] for j in cluster["datasets"]: sel_cluster |= self._labels_all == j uc_j = self.intensities[j - 1].unit_cell().parameters() for i in range(6): uc_params[i].append(uc_j[i]) average_uc = [flex.mean(uc_params[i]) for i in range(6)] intensities_cluster = self._intensities_all.select(sel_cluster) merging = intensities_cluster.merge_equivalents() merged_intensities = merging.array() multiplicities = merging.redundancies() dataset_ids = cluster["datasets"] labels = [self.labels[i - 1] for i in dataset_ids] info.append( ClusterInfo( cluster_id, labels, flex.mean(multiplicities.data().as_double()), merged_intensities.completeness(), unit_cell=average_uc, height=cluster.get("height"), )) return info def as_table(self, cluster_info): from libtbx.str_utils import wordwrap headers = [ "Cluster", "No. datasets", "Datasets", "Height", "Multiplicity", "Completeness", ] rows = [] for info in cluster_info: rows.append([ "%i" % info.cluster_id, "%i" % len(info.labels), wordwrap(" ".join("%s" % l for l in info.labels)), "%.2g" % info.height, "%.1f" % info.multiplicity, "%.2f" % info.completeness, ]) rows.insert(0, headers) return rows @staticmethod def linkage_matrix_to_dict(linkage_matrix): tree = hierarchy.to_tree(linkage_matrix, rd=False) d = {} # http://w3facility.org/question/scipy-dendrogram-to-json-for-d3-js-tree-visualisation/ # https://gist.github.com/mdml/7537455 def add_node(node): if node.is_leaf(): return cluster_id = node.get_id() - len(linkage_matrix) - 1 row = linkage_matrix[cluster_id] d[cluster_id + 1] = { "datasets": [i + 1 for i in sorted(node.pre_order())], "height": row[2], } # Recursively add the current node's children if node.left: add_node(node.left) if node.right: add_node(node.right) add_node(tree) return OrderedDict(sorted(d.items())) def run_cosym(self): from dials.algorithms.symmetry.cosym import phil_scope params = phil_scope.extract() from dials.algorithms.symmetry.cosym import CosymAnalysis datasets = [ d.eliminate_sys_absent(integral_only=True).primitive_setting() for d in self.individual_merged_intensities ] params.lattice_group = self.individual_merged_intensities[ 0].space_group_info() params.space_group = self.individual_merged_intensities[ 0].space_group_info() params.cluster.method = "dbscan" self.cosym = CosymAnalysis(datasets, params) self.cosym.run() def compute_correlation_coefficient_matrix(self): import scipy.spatial.distance as ssd correlation_matrix = self.cosym.target.rij_matrix for i in range(correlation_matrix.all()[0]): correlation_matrix[i, i] = 1 # clip values of correlation matrix to account for floating point errors correlation_matrix.set_selected(correlation_matrix < -1, -1) correlation_matrix.set_selected(correlation_matrix > 1, 1) diffraction_dissimilarity = 1 - correlation_matrix dist_mat = diffraction_dissimilarity.as_numpy_array() assert ssd.is_valid_dm(dist_mat, tol=1e-12) # convert the redundant n*n square matrix form into a condensed nC2 array dist_mat = ssd.squareform(dist_mat, checks=False) linkage_matrix = hierarchy.linkage(dist_mat, method="average") return correlation_matrix, linkage_matrix def compute_cos_angle_matrix(self): import scipy.spatial.distance as ssd dist_mat = ssd.pdist(self.cosym.coords.as_numpy_array(), metric="cosine") cos_angle = 1 - ssd.squareform(dist_mat) linkage_matrix = hierarchy.linkage(dist_mat, method="average") return flex.double(cos_angle), linkage_matrix @staticmethod def to_plotly_json(correlation_matrix, linkage_matrix, labels=None, matrix_type="correlation"): assert matrix_type in ("correlation", "cos_angle") ddict = hierarchy.dendrogram(linkage_matrix, color_threshold=0.05, labels=labels, show_leaf_counts=False) y2_dict = scipy_dendrogram_to_plotly_json(ddict) # above heatmap x2_dict = copy.deepcopy(y2_dict) # left of heatmap, rotated for d in y2_dict["data"]: d["yaxis"] = "y2" d["xaxis"] = "x2" for d in x2_dict["data"]: x = d["x"] y = d["y"] d["x"] = y d["y"] = x d["yaxis"] = "y3" d["xaxis"] = "x3" D = correlation_matrix.as_numpy_array() index = ddict["leaves"] D = D[index, :] D = D[:, index] ccdict = { "data": [{ "name": "%s_matrix" % matrix_type, "x": list(range(D.shape[0])), "y": list(range(D.shape[1])), "z": D.tolist(), "type": "heatmap", "colorbar": { "title": ("Correlation coefficient" if matrix_type == "correlation" else "cos(angle)"), "titleside": "right", "xpad": 0, }, "colorscale": "YIOrRd", "xaxis": "x", "yaxis": "y", }], "layout": { "autosize": False, "bargap": 0, "height": 1000, "hovermode": "closest", "margin": { "r": 20, "t": 50, "autoexpand": True, "l": 20 }, "showlegend": False, "title": "Dendrogram Heatmap", "width": 1000, "xaxis": { "domain": [0.2, 0.9], "mirror": "allticks", "showgrid": False, "showline": False, "showticklabels": True, "tickmode": "array", "ticks": "", "ticktext": y2_dict["layout"]["xaxis"]["ticktext"], "tickvals": list(range(len(y2_dict["layout"]["xaxis"]["ticktext"]))), "tickangle": 300, "title": "", "type": "linear", "zeroline": False, }, "yaxis": { "domain": [0, 0.78], "anchor": "x", "mirror": "allticks", "showgrid": False, "showline": False, "showticklabels": True, "tickmode": "array", "ticks": "", "ticktext": y2_dict["layout"]["xaxis"]["ticktext"], "tickvals": list(range(len(y2_dict["layout"]["xaxis"]["ticktext"]))), "title": "", "type": "linear", "zeroline": False, }, "xaxis2": { "domain": [0.2, 0.9], "anchor": "y2", "showgrid": False, "showline": False, "showticklabels": False, "zeroline": False, }, "yaxis2": { "domain": [0.8, 1], "anchor": "x2", "showgrid": False, "showline": False, "zeroline": False, }, "xaxis3": { "domain": [0.0, 0.1], "anchor": "y3", "range": [max(max(d["x"]) for d in x2_dict["data"]), 0], "showgrid": False, "showline": False, "tickangle": 300, "zeroline": False, }, "yaxis3": { "domain": [0, 0.78], "anchor": "x3", "showgrid": False, "showline": False, "showticklabels": False, "zeroline": False, }, }, } d = ccdict d["data"].extend(y2_dict["data"]) d["data"].extend(x2_dict["data"]) d["clusters"] = multi_crystal_analysis.linkage_matrix_to_dict( linkage_matrix) return d
def test_cosym( space_group, unit_cell, dimensions, sample_size, use_known_space_group, use_known_lattice_group, run_in_tmpdir, ): import matplotlib matplotlib.use("Agg") datasets, expected_reindexing_ops = generate_test_data( space_group=sgtbx.space_group_info(symbol=space_group).group(), unit_cell=unit_cell, unit_cell_volume=10000, d_min=1.5, map_to_p1=True, sample_size=sample_size, ) expected_space_group = sgtbx.space_group_info(symbol=space_group).group() # Workaround fact that the minimum cell reduction can occassionally be unstable # The input *should* be already the minimum cell, but for some combinations of unit # cell parameters the change_of_basis_op_to_minimum_cell is never the identity. # Therefore apply this cb_op to the expected_reindexing_ops prior to the comparison. cb_op_inp_min = datasets[0].crystal_symmetry( ).change_of_basis_op_to_minimum_cell() expected_reindexing_ops = { (cb_op_inp_min.inverse() * sgtbx.change_of_basis_op(cb_op) * cb_op_inp_min).as_xyz(): dataset_ids for cb_op, dataset_ids in expected_reindexing_ops.items() } params = phil_scope.extract() params.cluster.n_clusters = len(expected_reindexing_ops) params.dimensions = dimensions if use_known_space_group: params.space_group = expected_space_group.info() if use_known_lattice_group: params.lattice_group = expected_space_group.info() cosym = CosymAnalysis(datasets, params) cosym.run() d = cosym.as_dict() if not use_known_space_group: assert d["subgroup_scores"][0]["likelihood"] > 0.89 assert (sgtbx.space_group(d["subgroup_scores"][0]["patterson_group"]) == sgtbx.space_group_info( space_group).group().build_derived_patterson_group()) space_groups = {} reindexing_ops = {} for dataset_id in cosym.reindexing_ops.keys(): if 0 in cosym.reindexing_ops[dataset_id]: cb_op = cosym.reindexing_ops[dataset_id][0] reindexing_ops.setdefault(cb_op, set()) reindexing_ops[cb_op].add(dataset_id) if dataset_id in cosym.space_groups: space_groups.setdefault(cosym.space_groups[dataset_id], set()) space_groups[cosym.space_groups[dataset_id]].add(dataset_id) assert len(reindexing_ops) == len(expected_reindexing_ops) assert sorted(reindexing_ops.keys()) == sorted( expected_reindexing_ops.keys()) assert len(space_groups) == 1 if use_known_space_group: expected_sg = sgtbx.space_group_info(space_group).group() else: expected_sg = (sgtbx.space_group_info( space_group).group().build_derived_patterson_group()) assert cosym.best_subgroup["best_subsym"].space_group() == expected_sg for cb_op, ridx_set in reindexing_ops.items(): for expected_set in expected_reindexing_ops.values(): assert (len(ridx_set.symmetric_difference(expected_set)) == 0) or (len(ridx_set.intersection(expected_set)) == 0) for d_id in ridx_set: reindexed = (datasets[d_id].change_basis(cb_op).customized_copy( space_group_info=space_groups.keys()[0].info())) assert reindexed.is_compatible_unit_cell(), str( reindexed.crystal_symmetry())