Exemple #1
0
def run(_args):
    if _args < 2:
        raise IOError("Must give at least one path to folder of pickles")
    ucs = Cluster.from_directories(_args.folders,
                                   "cxi_targt_uc",
                                   n_images=args.n)

    if not _args.noplot:
        clusters, _ = ucs.ab_cluster(_args.t,
                                     log=_args.log,
                                     write_file_lists=_args.nofiles,
                                     schnell=_args.schnell,
                                     doplot=_args.noplot)
    else:
        plt.figure("Andrews-Bernstein distance dendogram", figsize=(12, 8))
        ax = plt.gca()
        clusters, cluster_axes = ucs.ab_cluster(_args.t,
                                                log=_args.log,
                                                ax=ax,
                                                write_file_lists=_args.nofiles,
                                                schnell=_args.schnell,
                                                doplot=_args.noplot)
        plt.tight_layout()
        plt.show()

    print unit_cell_info(clusters)
Exemple #2
0
def run(_args):
    if _args < 2:
        raise IOError("Must give at least one path to folder of pickles")

    ucs = Cluster.from_directories(_args.folders, "cluster_42")
    logging.info("Data imported.")

    #  Set up mega-plot
    plt.figure(figsize=(22, 15))
    gs = gridspec.GridSpec(3, 3, height_ratios=[1, 1, 3])
    orr_axes = [plt.subplot(gs[0, 0]), plt.subplot(gs[0, 1]), plt.subplot(gs[0, 2])]
    inten_axes = [plt.subplot(gs[1, 0]), plt.subplot(gs[1, 1]), plt.subplot(gs[1, 2])]
    clust_ax = plt.subplot(gs[2, :])

    orr_axes = ucs.visualise_orientational_distribution(orr_axes, cbar=True)
    inten_axes = ucs.intensity_statistics(inten_axes)
    clusters, cluster_ax = ucs.ab_cluster(
        _args.t, log=_args.log, ax=clust_ax, schnell=_args.fast, write_file_lists=False
    )

    # plt.text("cluster.42 Plot Everything!")
    plt.tight_layout()

    print unit_cell_info(clusters)
    plt.show()
Exemple #3
0
 def __init__(self,
              experiments,
              reflections,
              dendrogram=False,
              threshold=1000,
              n_max=None):
     try:
         from xfel.clustering.cluster import Cluster
         from xfel.clustering.cluster_groups import unit_cell_info
     except ImportError:
         raise Sorry, "clustering is not configured"
     import matplotlib.pyplot as plt
     ucs = Cluster.from_expts(refl_table=reflections,
                              expts_list=experiments,
                              n_images=n_max)
     self.clusters, axes = ucs.ab_cluster(
         threshold=threshold,
         log=True,  # log scale
         ax=plt.gca() if dendrogram else None,
         write_file_lists=False,
         schnell=False,
         doplot=dendrogram)
     print unit_cell_info(self.clusters)
     self.clustered_frames = {
         int(c.cname.split("_")[1]): c.members
         for c in self.clusters
     }
     if dendrogram:
         plt.tight_layout()
         plt.show()
def do_cluster_analysis(crystal_symmetries, params):
    ucs = Cluster.from_crystal_symmetries(crystal_symmetries)

    if params.plot.show or params.plot.name is not None:
        if not params.plot.show:
            import matplotlib

            # http://matplotlib.org/faq/howto_faq.html#generate-images-without-having-a-window-appear
            matplotlib.use("Agg")  # use a non-interactive backend
        import matplotlib.pyplot as plt

        plt.figure("Andrews-Bernstein distance dendogram", figsize=(12, 8))
        ax = plt.gca()
        clusters, cluster_axes = ucs.ab_cluster(
            params.threshold,
            log=params.plot.log,
            ax=ax,
            write_file_lists=False,
            doplot=True,
        )
        print(unit_cell_info(clusters))
        plt.tight_layout()
        if params.plot.name is not None:
            plt.savefig(params.plot.name)
        if params.plot.show:
            plt.show()

    else:
        clusters, cluster_axes = ucs.ab_cluster(params.threshold,
                                                log=params.plot.log,
                                                write_file_lists=False,
                                                doplot=False)
        print(unit_cell_info(clusters))

    return clusters
Exemple #5
0
def run(_args):
    if _args < 2:
        raise IOError("Must provide location(s) of pickles")
    if _args.paths:
        ucs = Cluster.from_files(raw_input=_args.dirs,
                                 n_images=_args.n,
                                 dials=_args.dials)
    else:
        ucs = Cluster.from_directories(_args.dirs,
                                       n_images=_args.n,
                                       dials=_args.dials)

    if not _args.noplot:
        clusters, _ = ucs.ab_cluster(_args.t,
                                     log=_args.log,
                                     write_file_lists=_args.nofiles,
                                     schnell=_args.schnell,
                                     doplot=_args.noplot)
        print unit_cell_info(clusters)
    else:
        plt.figure("Andrews-Bernstein distance dendogram", figsize=(12, 8))
        ax = plt.gca()
        clusters, cluster_axes = ucs.ab_cluster(_args.t,
                                                log=_args.log,
                                                ax=ax,
                                                write_file_lists=_args.nofiles,
                                                schnell=_args.schnell,
                                                doplot=_args.noplot)
        print unit_cell_info(clusters)
        plt.tight_layout()
        plt.show()
Exemple #6
0
def run(_args):
    if _args < 2:
        raise IOError("Must give at least one path to folder of pickles")

    ucs = Cluster.from_directories(_args.folders, "cluster_42")
    logging.info("Data imported.")

    #  Set up mega-plot
    plt.figure(figsize=(22, 15))
    gs = gridspec.GridSpec(3, 3, height_ratios=[1, 1, 3])
    orr_axes = [
        plt.subplot(gs[0, 0]),
        plt.subplot(gs[0, 1]),
        plt.subplot(gs[0, 2])
    ]
    inten_axes = [
        plt.subplot(gs[1, 0]),
        plt.subplot(gs[1, 1]),
        plt.subplot(gs[1, 2])
    ]
    clust_ax = plt.subplot(gs[2, :])

    orr_axes = ucs.visualise_orientational_distribution(orr_axes, cbar=True)
    inten_axes = ucs.intensity_statistics(inten_axes)
    clusters, cluster_ax = ucs.ab_cluster(_args.t,
                                          log=_args.log,
                                          ax=clust_ax,
                                          schnell=_args.fast,
                                          write_file_lists=False)

    #plt.text("cluster.42 Plot Everything!")
    plt.tight_layout()

    print unit_cell_info(clusters)
    plt.show()
    def _unit_cell_clustering(self, experiments):
        crystal_symmetries = [
            expt.crystal.get_crystal_symmetry() for expt in experiments
        ]
        lattice_ids = experiments.identifiers()
        from dials.algorithms.clustering.unit_cell import UnitCellCluster
        from xfel.clustering.cluster_groups import unit_cell_info

        ucs = UnitCellCluster.from_crystal_symmetries(crystal_symmetries,
                                                      lattice_ids=lattice_ids)
        self.unit_cell_clusters, self.unit_cell_dendrogram, _ = ucs.ab_cluster(
            self.params.unit_cell_clustering.threshold,
            log=self.params.unit_cell_clustering.log,
            labels="lattice_id",
            write_file_lists=False,
            schnell=False,
            doplot=False,
        )
        logger.info(unit_cell_info(self.unit_cell_clusters))
        largest_cluster_lattice_ids = None
        for cluster in self.unit_cell_clusters:
            cluster_lattice_ids = [m.lattice_id for m in cluster.members]
            if largest_cluster_lattice_ids is None:
                largest_cluster_lattice_ids = cluster_lattice_ids
            elif len(cluster_lattice_ids) > len(largest_cluster_lattice_ids):
                largest_cluster_lattice_ids = cluster_lattice_ids

        dataset_selection = largest_cluster_lattice_ids
        return dataset_selection
Exemple #8
0
    def __init__(self,
                 experiments,
                 reflections,
                 dendrogram=False,
                 threshold=1000,
                 n_max=None):
        if dendrogram:
            import matplotlib.pyplot as plt

            axes = plt.gca()
        else:
            axes = None

        ucs = xfel.clustering.cluster.Cluster.from_expts(
            refl_table=reflections, expts_list=experiments, n_images=n_max)
        self.clusters, _ = ucs.ab_cluster(
            threshold=threshold,
            log=True,  # log scale
            ax=axes,
            write_file_lists=False,
            schnell=False,
            doplot=dendrogram,
        )
        print(unit_cell_info(self.clusters))
        self.clustered_frames = {
            int(c.cname.split("_")[1]): c.members
            for c in self.clusters
        }
        if dendrogram:
            plt.tight_layout()
            plt.show()
Exemple #9
0
    def _unit_cell_clustering(self, experiments):
        crystal_symmetries = [
            expt.crystal.get_crystal_symmetry() for expt in experiments
        ]
        # lattice ids used to label plots, so want numerical ids
        lattice_ids = [
            self.identifiers_to_ids_map[i] for i in experiments.identifiers()
        ]

        ucs = UnitCellCluster.from_crystal_symmetries(crystal_symmetries,
                                                      lattice_ids=lattice_ids)
        self.unit_cell_clusters, self.unit_cell_dendrogram, _ = ucs.ab_cluster(
            self.params.unit_cell_clustering.threshold,
            log=self.params.unit_cell_clustering.log,
            labels="lattice_id",
            write_file_lists=False,
            schnell=False,
            doplot=False,
        )
        logger.info(unit_cell_info(self.unit_cell_clusters))
        largest_cluster_lattice_ids = None
        for cluster in self.unit_cell_clusters:
            cluster_lattice_ids = [m.lattice_id for m in cluster.members]
            if largest_cluster_lattice_ids is None:
                largest_cluster_lattice_ids = cluster_lattice_ids
            elif len(cluster_lattice_ids) > len(largest_cluster_lattice_ids):
                largest_cluster_lattice_ids = cluster_lattice_ids

        dataset_selection = largest_cluster_lattice_ids
        # now convert to actual identifiers for selection
        return [self.ids_to_identifiers_map[i] for i in dataset_selection]
    def unit_cell_clustering(self, plot_name=None):
        from xia2.Modules.MultiCrystalAnalysis import MultiCrystalAnalysis
        from xfel.clustering.cluster_groups import unit_cell_info

        clusters, dendrogram = MultiCrystalAnalysis.unit_cell_clustering(
            self._data_manager.experiments,
            threshold=self._params.unit_cell_clustering.threshold,
            log=self._params.unit_cell_clustering.log,
            plot_name=plot_name,
        )
        logger.info(unit_cell_info(clusters))
        largest_cluster_lattice_ids = None
        for cluster in clusters:
            cluster_lattice_ids = [m.lattice_id for m in cluster.members]
            if largest_cluster_lattice_ids is None:
                largest_cluster_lattice_ids = cluster_lattice_ids
            elif len(cluster_lattice_ids) > len(largest_cluster_lattice_ids):
                largest_cluster_lattice_ids = cluster_lattice_ids

        if len(largest_cluster_lattice_ids) < len(self._data_manager.experiments):
            logger.info(
                "Selecting subset of data sets for subsequent analysis: %s"
                % str(largest_cluster_lattice_ids)
            )
            self._data_manager.select(largest_cluster_lattice_ids)
        else:
            logger.info("Using all data sets for subsequent analysis")
Exemple #11
0
def do_cluster_analysis(crystal_symmetries, params):

    try:
        from xfel.clustering.cluster import Cluster
        from xfel.clustering.cluster_groups import unit_cell_info
    except ImportError:
        raise Sorry(
            "cluster_unit_cell requires xfel module but is not available")

    ucs = Cluster.from_crystal_symmetries(crystal_symmetries)

    if params.plot.show or params.plot.name is not None:
        if not params.plot.show:
            import matplotlib

            # http://matplotlib.org/faq/howto_faq.html#generate-images-without-having-a-window-appear
            matplotlib.use("Agg")  # use a non-interactive backend
        import matplotlib.pyplot as plt

        plt.figure("Andrews-Bernstein distance dendogram", figsize=(12, 8))
        ax = plt.gca()
        clusters, cluster_axes = ucs.ab_cluster(
            params.threshold,
            log=params.plot.log,
            ax=ax,
            write_file_lists=False,
            # schnell=_args.schnell,
            doplot=True,
        )
        print(unit_cell_info(clusters))
        plt.tight_layout()
        if params.plot.name is not None:
            plt.savefig(params.plot.name)
        if params.plot.show:
            plt.show()

    else:
        clusters, cluster_axes = ucs.ab_cluster(
            params.threshold,
            log=params.plot.log,
            write_file_lists=False,
            # schnell=_args.schnell,
            doplot=False,
        )
        print(unit_cell_info(clusters))

    return clusters
def run(_args):
  if _args < 2:
    raise IOError("Must give at least one path to folder of pickles")
  ucs = Cluster.from_directories(_args.folders, "cxi_targt_uc")

  if not _args.noplot:
    clusters, _ = ucs.ab_cluster(_args.t, log=_args.log,
                               write_file_lists=_args.nofiles,
                               schnell=_args.schnell,
                               doplot=_args.noplot)
  else:
    plt.figure("Andrews-Bernstein distance dendogram", figsize=(12, 8))
    ax = plt.gca()
    clusters, cluster_axes = ucs.ab_cluster(_args.t, log=_args.log, ax=ax,
                                            write_file_lists=_args.nofiles,
                                            schnell=_args.schnell,
                                            doplot=_args.noplot)
    plt.tight_layout()
    plt.show()

  print unit_cell_info(clusters)
Exemple #13
0
    def unit_cell_clustering(self, plot_name=None):
        crystal_symmetries = []
        for expt in self._data_manager.experiments:
            crystal_symmetry = expt.crystal.get_crystal_symmetry(
                assert_is_compatible_unit_cell=False)
            crystal_symmetries.append(crystal_symmetry.niggli_cell())
        lattice_ids = [
            expt.identifier for expt in self._data_manager.experiments
        ]
        from xfel.clustering.cluster import Cluster
        from xfel.clustering.cluster_groups import unit_cell_info
        ucs = Cluster.from_crystal_symmetries(crystal_symmetries,
                                              lattice_ids=lattice_ids)
        if plot_name is not None:
            from matplotlib import pyplot as plt
            plt.figure("Andrews-Bernstein distance dendogram", figsize=(12, 8))
            ax = plt.gca()
        else:
            ax = None
        clusters, _ = ucs.ab_cluster(
            self._params.unit_cell_clustering.threshold,
            log=self._params.unit_cell_clustering.log,
            write_file_lists=False,
            schnell=False,
            doplot=(plot_name is not None),
            ax=ax)
        if plot_name is not None:
            plt.tight_layout()
            plt.savefig(plot_name)
            plt.clf()
        logger.info(unit_cell_info(clusters))
        largest_cluster = None
        largest_cluster_lattice_ids = None
        for cluster in clusters:
            cluster_lattice_ids = [m.lattice_id for m in cluster.members]
            if largest_cluster_lattice_ids is None:
                largest_cluster_lattice_ids = cluster_lattice_ids
            elif len(cluster_lattice_ids) > len(largest_cluster_lattice_ids):
                largest_cluster_lattice_ids = cluster_lattice_ids

        if len(largest_cluster_lattice_ids) < len(crystal_symmetries):
            logger.info(
                'Selecting subset of data sets for subsequent analysis: %s' %
                str(largest_cluster_lattice_ids))
            self._data_manager.select(largest_cluster_lattice_ids)
        else:
            logger.info('Using all data sets for subsequent analysis')
Exemple #14
0
def run(args: List[str] = None, phil: phil.scope = phil_scope) -> None:
    parser = OptionParser(
        usage="",
        read_experiments=True,
        read_reflections=True,
        phil=phil_scope,
        check_format=False,
        epilog=__doc__,
    )
    params, _ = parser.parse_args(args=args, show_diff_phil=False)

    if not params.input.experiments or not params.input.reflections:
        parser.print_help()
        sys.exit()

    reflections, experiments = reflections_and_experiments_from_files(
        params.input.reflections, params.input.experiments)
    log.config(verbosity=1, logfile=params.output.log)
    logger.info(dials_version())

    diff_phil = parser.diff_phil.as_str()
    if diff_phil:
        logger.info("The following parameters have been modified:\n%s",
                    diff_phil)

    st = time.time()
    indexed_experiments, indexed_reflections, summary_data = index(
        experiments, reflections[0], params)

    # print some clustering information
    ucs = Cluster.from_crystal_symmetries([
        crystal.symmetry(
            unit_cell=expt.crystal.get_unit_cell(),
            space_group=expt.crystal.get_space_group(),
        ) for expt in indexed_experiments
    ])
    clusters, _ = ucs.ab_cluster(5000,
                                 log=None,
                                 write_file_lists=False,
                                 doplot=False)
    large_clusters = []
    cluster_plots = {}
    threshold = math.floor(0.05 * len(indexed_experiments))
    for cluster in clusters:
        if len(cluster.members) > threshold:
            large_clusters.append(cluster)
    large_clusters.sort(key=lambda x: len(x.members), reverse=True)

    if large_clusters:
        logger.info(f"""
Unit cell clustering analysis, clusters with >5% of the number of crystals indexed
""" + unit_cell_info(large_clusters))
        if params.output.html or params.output.json:
            cluster_plots = make_cluster_plots(large_clusters)
    else:
        logger.info(f"No clusters found with >5% of the number of crystals.")

    logger.info(f"Saving indexed experiments to {params.output.experiments}")
    indexed_experiments.as_file(params.output.experiments)
    logger.info(f"Saving indexed reflections to {params.output.reflections}")
    indexed_reflections.as_file(params.output.reflections)

    if params.output.html or params.output.json:
        summary_plots = generate_plots(summary_data)
        if cluster_plots:
            summary_plots.update(cluster_plots)
        if params.output.html:
            generate_html_report(summary_plots, params.output.html)
        if params.output.json:
            with open(params.output.json, "w") as outfile:
                json.dump(summary_plots, outfile)

    logger.info(f"Total time: {time.time() - st:.2f}s")
Exemple #15
0
def run(args):
    import libtbx
    from libtbx import easy_pickle
    from dials.util import log
    from dials.util.options import OptionParser

    parser = OptionParser(
        #usage=usage,
        phil=phil_scope,
        read_reflections=True,
        read_datablocks=False,
        read_experiments=True,
        check_format=False,
        #epilog=help_message
    )

    params, options, args = parser.parse_args(show_diff_phil=False,
                                              return_unhandled=True)

    # Configure the logging
    log.config(params.verbosity,
               info=params.output.log,
               debug=params.output.debug_log)

    from dials.util.version import dials_version
    logger.info(dials_version())

    # Log the diff phil
    diff_phil = parser.diff_phil.as_str()
    if diff_phil is not '':
        logger.info('The following parameters have been modified:\n')
        logger.info(diff_phil)

    if params.seed is not None:
        import random
        flex.set_random_seed(params.seed)
        random.seed(params.seed)

    if params.save_plot and not params.animate:
        import matplotlib
        # http://matplotlib.org/faq/howto_faq.html#generate-images-without-having-a-window-appear
        matplotlib.use('Agg')  # use a non-interactive backend

    datasets_input = []

    experiments = flatten_experiments(params.input.experiments)
    reflections = flatten_reflections(params.input.reflections)

    if len(experiments) or len(reflections):
        if len(reflections) == 1:
            reflections_input = reflections[0]
            reflections = []
            for i in range(len(experiments)):
                reflections.append(
                    reflections_input.select(reflections_input['id'] == i))

        if len(experiments) > len(reflections):
            flattened_reflections = []
            for refl in reflections:
                for i in range(0, flex.max(refl['id']) + 1):
                    sel = refl['id'] == i
                    flattened_reflections.append(refl.select(sel))
            reflections = flattened_reflections

        assert len(experiments) == len(reflections)

        i_refl = 0
        for i_expt in enumerate(experiments):
            refl = reflections[i_refl]

        for expt, refl in zip(experiments, reflections):
            crystal_symmetry = crystal.symmetry(
                unit_cell=expt.crystal.get_unit_cell(),
                space_group=expt.crystal.get_space_group())
            if 0 and 'intensity.prf.value' in refl:
                sel = refl.get_flags(refl.flags.integrated_prf)
                assert sel.count(True) > 0
                refl = refl.select(sel)
                data = refl['intensity.prf.value']
                variances = refl['intensity.prf.variance']
            else:
                assert 'intensity.sum.value' in refl
                sel = refl.get_flags(refl.flags.integrated_sum)
                assert sel.count(True) > 0
                refl = refl.select(sel)
                data = refl['intensity.sum.value']
                variances = refl['intensity.sum.variance']
            # FIXME probably need to do some filtering of intensities similar to that
            # done in export_mtz
            miller_indices = refl['miller_index']
            assert variances.all_gt(0)
            sigmas = flex.sqrt(variances)

            miller_set = miller.set(crystal_symmetry,
                                    miller_indices,
                                    anomalous_flag=False)
            intensities = miller.array(miller_set, data=data, sigmas=sigmas)
            intensities.set_observation_type_xray_intensity()
            intensities.set_info(
                miller.array_info(source='DIALS', source_type='pickle'))
            datasets_input.append(intensities)

    files = args

    for file_name in files:

        try:
            data = easy_pickle.load(file_name)
            intensities = data['observations'][0]
            intensities.set_info(
                miller.array_info(source=file_name, source_type='pickle'))
            intensities = intensities.customized_copy(
                anomalous_flag=False).set_info(intensities.info())
            batches = None
        except Exception:
            reader = any_reflection_file(file_name)
            assert reader.file_type() == 'ccp4_mtz'

            as_miller_arrays = reader.as_miller_arrays(merge_equivalents=False)
            intensities = [
                ma for ma in as_miller_arrays
                if ma.info().labels == ['I', 'SIGI']
            ][0]
            batches = [
                ma for ma in as_miller_arrays if ma.info().labels == ['BATCH']
            ]
            if len(batches):
                batches = batches[0]
            else:
                batches = None
            mtz_object = reader.file_content()
            intensities = intensities.customized_copy(
                anomalous_flag=False,
                indices=mtz_object.extract_original_index_miller_indices(
                )).set_info(intensities.info())

        intensities.set_observation_type_xray_intensity()
        datasets_input.append(intensities)

    if len(datasets_input) == 0:
        raise Sorry('No valid reflection files provided on command line')

    datasets = []
    for intensities in datasets_input:

        if params.batch is not None:
            assert batches is not None
            bmin, bmax = params.batch
            assert bmax >= bmin
            sel = (batches.data() >= bmin) & (batches.data() <= bmax)
            assert sel.count(True) > 0
            intensities = intensities.select(sel)

        if params.min_i_mean_over_sigma_mean is not None and (
                params.d_min is libtbx.Auto or params.d_min is not None):
            from xia2.Modules import Resolutionizer
            rparams = Resolutionizer.phil_defaults.extract().resolutionizer
            rparams.nbins = 20
            resolutionizer = Resolutionizer.resolutionizer(
                intensities, None, rparams)
            i_mean_over_sigma_mean = 4
            d_min = resolutionizer.resolution_i_mean_over_sigma_mean(
                i_mean_over_sigma_mean)
            if params.d_min is libtbx.Auto:
                intensities = intensities.resolution_filter(
                    d_min=d_min).set_info(intensities.info())
                if params.verbose:
                    logger.info('Selecting reflections with d > %.2f' % d_min)
            elif d_min > params.d_min:
                logger.info('Rejecting dataset %s as d_min too low (%.2f)' %
                            (file_name, d_min))
                continue
            else:
                logger.info('Estimated d_min for %s: %.2f' %
                            (file_name, d_min))
        elif params.d_min not in (None, libtbx.Auto):
            intensities = intensities.resolution_filter(
                d_min=params.d_min).set_info(intensities.info())

        if params.normalisation == 'kernel':
            from mmtbx.scaling import absolute_scaling
            normalisation = absolute_scaling.kernel_normalisation(
                intensities, auto_kernel=True)
            intensities = normalisation.normalised_miller.deep_copy()

        cb_op_to_primitive = intensities.change_of_basis_op_to_primitive_setting(
        )
        intensities = intensities.change_basis(cb_op_to_primitive)
        if params.mode == 'full' or params.space_group is not None:
            if params.space_group is not None:
                space_group_info = params.space_group.primitive_setting()
                if not space_group_info.group().is_compatible_unit_cell(
                        intensities.unit_cell()):
                    logger.info(
                        'Skipping data set - incompatible space group and unit cell: %s, %s'
                        % (space_group_info, intensities.unit_cell()))
                    continue
            else:
                space_group_info = sgtbx.space_group_info('P1')
            intensities = intensities.customized_copy(
                space_group_info=space_group_info)

        datasets.append(intensities)

    crystal_symmetries = [d.crystal_symmetry().niggli_cell() for d in datasets]
    lattice_ids = range(len(datasets))
    from xfel.clustering.cluster import Cluster
    from xfel.clustering.cluster_groups import unit_cell_info
    ucs = Cluster.from_crystal_symmetries(crystal_symmetries,
                                          lattice_ids=lattice_ids)
    threshold = 1000
    if params.save_plot:
        from matplotlib import pyplot as plt
        fig = plt.figure("Andrews-Bernstein distance dendogram",
                         figsize=(12, 8))
        ax = plt.gca()
    else:
        ax = None
    clusters, _ = ucs.ab_cluster(params.unit_cell_clustering.threshold,
                                 log=params.unit_cell_clustering.log,
                                 write_file_lists=False,
                                 schnell=False,
                                 doplot=params.save_plot,
                                 ax=ax)
    if params.save_plot:
        plt.tight_layout()
        plt.savefig('%scluster_unit_cell.png' % params.plot_prefix)
        plt.close(fig)
    logger.info(unit_cell_info(clusters))
    largest_cluster = None
    largest_cluster_lattice_ids = None
    for cluster in clusters:
        cluster_lattice_ids = [m.lattice_id for m in cluster.members]
        if largest_cluster_lattice_ids is None:
            largest_cluster_lattice_ids = cluster_lattice_ids
        elif len(cluster_lattice_ids) > len(largest_cluster_lattice_ids):
            largest_cluster_lattice_ids = cluster_lattice_ids

    dataset_selection = largest_cluster_lattice_ids
    if len(dataset_selection) < len(datasets):
        logger.info('Selecting subset of data for cosym analysis: %s' %
                    str(dataset_selection))
        datasets = [datasets[i] for i in dataset_selection]

    # per-dataset change of basis operator to ensure all consistent
    change_of_basis_ops = []
    for i, dataset in enumerate(datasets):
        metric_subgroups = sgtbx.lattice_symmetry.metric_subgroups(dataset,
                                                                   max_delta=5)
        subgroup = metric_subgroups.result_groups[0]
        cb_op_inp_best = subgroup['cb_op_inp_best']
        datasets[i] = dataset.change_basis(cb_op_inp_best)
        change_of_basis_ops.append(cb_op_inp_best)

    cb_op_ref_min = datasets[0].change_of_basis_op_to_niggli_cell()
    for i, dataset in enumerate(datasets):
        if params.space_group is None:
            datasets[i] = dataset.change_basis(cb_op_ref_min).customized_copy(
                space_group_info=sgtbx.space_group_info('P1'))
        else:
            datasets[i] = dataset.change_basis(cb_op_ref_min)
            datasets[i] = datasets[i].customized_copy(
                crystal_symmetry=crystal.symmetry(
                    unit_cell=datasets[i].unit_cell(),
                    space_group_info=params.space_group.primitive_setting(),
                    assert_is_compatible_unit_cell=False))
        datasets[i] = datasets[i].merge_equivalents().array()
        change_of_basis_ops[i] = cb_op_ref_min * change_of_basis_ops[i]

    result = analyse_datasets(datasets, params)

    space_groups = {}
    reindexing_ops = {}
    for dataset_id in result.reindexing_ops.iterkeys():
        if 0 in result.reindexing_ops[dataset_id]:
            cb_op = result.reindexing_ops[dataset_id][0]
            reindexing_ops.setdefault(cb_op, [])
            reindexing_ops[cb_op].append(dataset_id)
        if dataset_id in result.space_groups:
            space_groups.setdefault(result.space_groups[dataset_id], [])
            space_groups[result.space_groups[dataset_id]].append(dataset_id)

    logger.info('Space groups:')
    for sg, datasets in space_groups.iteritems():
        logger.info(str(sg.info().reference_setting()))
        logger.info(datasets)

    logger.info('Reindexing operators:')
    for cb_op, datasets in reindexing_ops.iteritems():
        logger.info(cb_op)
        logger.info(datasets)

    if (len(experiments) and len(reflections)
            and params.output.reflections is not None
            and params.output.experiments is not None):
        import copy
        from dxtbx.model import ExperimentList
        from dxtbx.serialize import dump
        reindexed_experiments = ExperimentList()
        reindexed_reflections = flex.reflection_table()
        expt_id = 0
        for cb_op, dataset_ids in reindexing_ops.iteritems():
            cb_op = sgtbx.change_of_basis_op(cb_op)
            for dataset_id in dataset_ids:
                expt = experiments[dataset_selection[dataset_id]]
                refl = reflections[dataset_selection[dataset_id]]
                reindexed_expt = copy.deepcopy(expt)
                refl_reindexed = copy.deepcopy(refl)
                cb_op_this = cb_op * change_of_basis_ops[dataset_id]
                reindexed_expt.crystal = reindexed_expt.crystal.change_basis(
                    cb_op_this)
                refl_reindexed['miller_index'] = cb_op_this.apply(
                    refl_reindexed['miller_index'])
                reindexed_experiments.append(reindexed_expt)
                refl_reindexed['id'] = flex.int(refl_reindexed.size(), expt_id)
                reindexed_reflections.extend(refl_reindexed)
                expt_id += 1

        logger.info('Saving reindexed experiments to %s' %
                    params.output.experiments)
        dump.experiment_list(reindexed_experiments, params.output.experiments)
        logger.info('Saving reindexed reflections to %s' %
                    params.output.reflections)
        reindexed_reflections.as_pickle(params.output.reflections)

    elif params.output.suffix is not None:
        for cb_op, dataset_ids in reindexing_ops.iteritems():
            cb_op = sgtbx.change_of_basis_op(cb_op)
            for dataset_id in dataset_ids:
                file_name = files[dataset_selection[dataset_id]]
                basename = os.path.basename(file_name)
                out_name = os.path.splitext(
                    basename)[0] + params.output.suffix + '_' + str(
                        dataset_selection[dataset_id]) + ".mtz"
                reader = any_reflection_file(file_name)
                assert reader.file_type() == 'ccp4_mtz'
                mtz_object = reader.file_content()
                cb_op_this = cb_op * change_of_basis_ops[dataset_id]
                if not cb_op_this.is_identity_op():
                    logger.info('reindexing %s (%s)' %
                                (file_name, cb_op_this.as_xyz()))
                    mtz_object.change_basis_in_place(cb_op_this)
                mtz_object.write(out_name)