Python Cluster.Cluster Examples

Programming Language: Python

Namespace/Package Name: xfel.clustering.cluster

Class/Type: Cluster

Method/Function: Cluster

Examples at hotexamples.com: 2

Python Cluster.Cluster - 2 examples found. These are the top rated real world Python examples of xfel.clustering.cluster.Cluster.Cluster extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

from_crystal_symmetries(6)

from_directories(6)

from_files(4)

from_iterable(4)

Cluster(2)

__init__(1)

from_expts(1)

from_list(1)

Example #1

Show file

File: combine_experiments.py Project: jasonroyprice/dials

    def run_with_preparsed(self, params, options):
        """Run combine_experiments, but allow passing in of parameters"""
        from dials.util.options import flatten_experiments

        # Try to load the models and data
        if len(params.input.experiments) == 0:
            print("No Experiments found in the input")
            self.parser.print_help()
            return
        if len(params.input.reflections) == 0:
            print("No reflection data found in the input")
            self.parser.print_help()
            return
        try:
            assert len(params.input.reflections) == len(
                params.input.experiments)
        except AssertionError:
            raise Sorry(
                "The number of input reflections files does not match the "
                "number of input experiments")

        flat_exps = flatten_experiments(params.input.experiments)

        ref_beam = params.reference_from_experiment.beam
        ref_goniometer = params.reference_from_experiment.goniometer
        ref_scan = params.reference_from_experiment.scan
        ref_crystal = params.reference_from_experiment.crystal
        ref_detector = params.reference_from_experiment.detector

        if ref_beam is not None:
            try:
                ref_beam = flat_exps[ref_beam].beam
            except IndexError:
                raise Sorry("{} is not a valid experiment ID".format(ref_beam))

        if ref_goniometer is not None:
            try:
                ref_goniometer = flat_exps[ref_goniometer].goniometer
            except IndexError:
                raise Sorry(
                    "{} is not a valid experiment ID".format(ref_goniometer))

        if ref_scan is not None:
            try:
                ref_scan = flat_exps[ref_scan].scan
            except IndexError:
                raise Sorry("{} is not a valid experiment ID".format(ref_scan))

        if ref_crystal is not None:
            try:
                ref_crystal = flat_exps[ref_crystal].crystal
            except IndexError:
                raise Sorry(
                    "{} is not a valid experiment ID".format(ref_crystal))

        if ref_detector is not None:
            assert not params.reference_from_experiment.average_detector
            try:
                ref_detector = flat_exps[ref_detector].detector
            except IndexError:
                raise Sorry(
                    "{} is not a valid experiment ID".format(ref_detector))
        elif params.reference_from_experiment.average_detector:
            # Average all of the detectors together
            from scitbx.matrix import col

            def average_detectors(target, panelgroups, depth):
                # Recursive function to do the averaging

                if (params.reference_from_experiment.average_hierarchy_level is
                        None or depth == params.reference_from_experiment.
                        average_hierarchy_level):
                    n = len(panelgroups)
                    sum_fast = col((0.0, 0.0, 0.0))
                    sum_slow = col((0.0, 0.0, 0.0))
                    sum_ori = col((0.0, 0.0, 0.0))

                    # Average the d matrix vectors
                    for pg in panelgroups:
                        sum_fast += col(pg.get_local_fast_axis())
                        sum_slow += col(pg.get_local_slow_axis())
                        sum_ori += col(pg.get_local_origin())
                    sum_fast /= n
                    sum_slow /= n
                    sum_ori /= n

                    # Re-orthagonalize the slow and the fast vectors by rotating around the cross product
                    c = sum_fast.cross(sum_slow)
                    a = sum_fast.angle(sum_slow, deg=True) / 2
                    sum_fast = sum_fast.rotate(c, a - 45, deg=True)
                    sum_slow = sum_slow.rotate(c, -(a - 45), deg=True)

                    target.set_local_frame(sum_fast, sum_slow, sum_ori)

                if target.is_group():
                    # Recurse
                    for i, target_pg in enumerate(target):
                        average_detectors(target_pg,
                                          [pg[i] for pg in panelgroups],
                                          depth + 1)

            ref_detector = flat_exps[0].detector
            average_detectors(ref_detector.hierarchy(),
                              [e.detector.hierarchy() for e in flat_exps], 0)

        combine = CombineWithReference(
            beam=ref_beam,
            goniometer=ref_goniometer,
            scan=ref_scan,
            crystal=ref_crystal,
            detector=ref_detector,
            params=params,
        )

        # set up global experiments and reflections lists
        from dials.array_family import flex

        reflections = flex.reflection_table()
        global_id = 0
        skipped_expts = 0
        from dxtbx.model.experiment_list import ExperimentList

        experiments = ExperimentList()

        # loop through the input, building up the global lists
        nrefs_per_exp = []
        for ref_wrapper, exp_wrapper in zip(params.input.reflections,
                                            params.input.experiments):
            refs = ref_wrapper.data
            exps = exp_wrapper.data
            for i, exp in enumerate(exps):
                sel = refs["id"] == i
                sub_ref = refs.select(sel)
                n_sub_ref = len(sub_ref)
                if (params.output.min_reflections_per_experiment is not None
                        and n_sub_ref <
                        params.output.min_reflections_per_experiment):
                    skipped_expts += 1
                    continue

                nrefs_per_exp.append(n_sub_ref)
                sub_ref["id"] = flex.int(len(sub_ref), global_id)
                if params.output.delete_shoeboxes and "shoebox" in sub_ref:
                    del sub_ref["shoebox"]
                reflections.extend(sub_ref)
                try:
                    experiments.append(combine(exp))
                except ComparisonError as e:
                    # When we failed tolerance checks, give a useful error message
                    (path,
                     index) = find_experiment_in(exp, params.input.experiments)
                    raise Sorry(
                        "Model didn't match reference within required tolerance for experiment {} in {}:"
                        "\n{}\nAdjust tolerances or set compare_models=False to ignore differences."
                        .format(index, path, str(e)))

                global_id += 1

        if (params.output.min_reflections_per_experiment is not None
                and skipped_expts > 0):
            print("Removed {0} experiments with fewer than {1} reflections".
                  format(skipped_expts,
                         params.output.min_reflections_per_experiment))

        # print number of reflections per experiment
        from libtbx.table_utils import simple_table

        header = ["Experiment", "Number of reflections"]
        rows = [(str(i), str(n)) for (i, n) in enumerate(nrefs_per_exp)]
        st = simple_table(rows, header)
        print(st.format())

        # save a random subset if requested
        if (params.output.n_subset is not None
                and len(experiments) > params.output.n_subset):
            subset_exp = ExperimentList()
            subset_refls = flex.reflection_table()
            if params.output.n_subset_method == "random":
                n_picked = 0
                indices = list(range(len(experiments)))
                while n_picked < params.output.n_subset:
                    idx = indices.pop(random.randint(0, len(indices) - 1))
                    subset_exp.append(experiments[idx])
                    refls = reflections.select(reflections["id"] == idx)
                    refls["id"] = flex.int(len(refls), n_picked)
                    subset_refls.extend(refls)
                    n_picked += 1
                print(
                    "Selecting a random subset of {0} experiments out of {1} total."
                    .format(params.output.n_subset, len(experiments)))
            elif params.output.n_subset_method == "n_refl":
                if params.output.n_refl_panel_list is None:
                    refls_subset = reflections
                else:
                    sel = flex.bool(len(reflections), False)
                    for p in params.output.n_refl_panel_list:
                        sel |= reflections["panel"] == p
                    refls_subset = reflections.select(sel)
                refl_counts = flex.int()
                for expt_id in range(len(experiments)):
                    refl_counts.append(
                        len(refls_subset.select(
                            refls_subset["id"] == expt_id)))
                sort_order = flex.sort_permutation(refl_counts, reverse=True)
                for expt_id, idx in enumerate(
                        sort_order[:params.output.n_subset]):
                    subset_exp.append(experiments[idx])
                    refls = reflections.select(reflections["id"] == idx)
                    refls["id"] = flex.int(len(refls), expt_id)
                    subset_refls.extend(refls)
                print(
                    "Selecting a subset of {0} experiments with highest number of reflections out of {1} total."
                    .format(params.output.n_subset, len(experiments)))

            elif params.output.n_subset_method == "significance_filter":
                from dials.algorithms.integration.stills_significance_filter import (
                    SignificanceFilter, )

                params.output.significance_filter.enable = True
                sig_filter = SignificanceFilter(params.output)
                refls_subset = sig_filter(experiments, reflections)
                refl_counts = flex.int()
                for expt_id in range(len(experiments)):
                    refl_counts.append(
                        len(refls_subset.select(
                            refls_subset["id"] == expt_id)))
                sort_order = flex.sort_permutation(refl_counts, reverse=True)
                for expt_id, idx in enumerate(
                        sort_order[:params.output.n_subset]):
                    subset_exp.append(experiments[idx])
                    refls = reflections.select(reflections["id"] == idx)
                    refls["id"] = flex.int(len(refls), expt_id)
                    subset_refls.extend(refls)

            experiments = subset_exp
            reflections = subset_refls

        def save_in_batches(experiments,
                            reflections,
                            exp_name,
                            refl_name,
                            batch_size=1000):
            from dxtbx.command_line.image_average import splitit

            for i, indices in enumerate(
                    splitit(list(range(len(experiments))),
                            (len(experiments) // batch_size) + 1)):
                batch_expts = ExperimentList()
                batch_refls = flex.reflection_table()
                for sub_id, sub_idx in enumerate(indices):
                    batch_expts.append(experiments[sub_idx])
                    sub_refls = reflections.select(
                        reflections["id"] == sub_idx)
                    sub_refls["id"] = flex.int(len(sub_refls), sub_id)
                    batch_refls.extend(sub_refls)
                exp_filename = os.path.splitext(exp_name)[0] + "_%03d.expt" % i
                ref_filename = os.path.splitext(
                    refl_name)[0] + "_%03d.refl" % i
                self._save_output(batch_expts, batch_refls, exp_filename,
                                  ref_filename)

        def combine_in_clusters(experiments_l, reflections_l, exp_name,
                                refl_name, end_count):
            result = []
            for cluster, experiment in enumerate(experiments_l):
                cluster_expts = ExperimentList()
                cluster_refls = flex.reflection_table()
                for i, expts in enumerate(experiment):
                    refls = reflections_l[cluster][i]
                    refls["id"] = flex.int(len(refls), i)
                    cluster_expts.append(expts)
                    cluster_refls.extend(refls)
                exp_filename = os.path.splitext(exp_name)[0] + (
                    "_cluster%d.expt" % (end_count - cluster))
                ref_filename = os.path.splitext(refl_name)[0] + (
                    "_cluster%d.refl" % (end_count - cluster))
                result.append(
                    (cluster_expts, cluster_refls, exp_filename, ref_filename))
            return result

        # cluster the resulting experiments if requested
        if params.clustering.use:
            clustered = Cluster(
                experiments,
                reflections,
                dendrogram=params.clustering.dendrogram,
                threshold=params.clustering.threshold,
                n_max=params.clustering.max_crystals,
            )
            n_clusters = len(clustered.clustered_frames)

            def not_too_many(keeps):
                if params.clustering.max_clusters is not None:
                    return len(keeps) < params.clustering.max_clusters
                return True

            keep_frames = []
            sorted_keys = sorted(clustered.clustered_frames.keys())
            while len(clustered.clustered_frames) > 0 and not_too_many(
                    keep_frames):
                keep_frames.append(
                    clustered.clustered_frames.pop(sorted_keys.pop(-1)))
            if params.clustering.exclude_single_crystal_clusters:
                keep_frames = [k for k in keep_frames if len(k) > 1]
            clustered_experiments = [[f.experiment for f in frame_cluster]
                                     for frame_cluster in keep_frames]
            clustered_reflections = [[f.reflections for f in frame_cluster]
                                     for frame_cluster in keep_frames]
            list_of_combined = combine_in_clusters(
                clustered_experiments,
                clustered_reflections,
                params.output.experiments_filename,
                params.output.reflections_filename,
                n_clusters,
            )
            for saveable_tuple in list_of_combined:
                if params.output.max_batch_size is None:
                    self._save_output(*saveable_tuple)
                else:
                    save_in_batches(*saveable_tuple,
                                    batch_size=params.output.max_batch_size)
        else:
            if params.output.max_batch_size is None:
                self._save_output(
                    experiments,
                    reflections,
                    params.output.experiments_filename,
                    params.output.reflections_filename,
                )
            else:
                save_in_batches(
                    experiments,
                    reflections,
                    params.output.experiments_filename,
                    params.output.reflections_filename,
                    batch_size=params.output.max_batch_size,
                )
        return

Example #2

Show file

    def run(self):
        '''Execute the script.'''

        from dials.util.options import flatten_experiments
        from libtbx.utils import Sorry

        # Parse the command line
        params, options = self.parser.parse_args(show_diff_phil=True)

        # Try to load the models and data
        if len(params.input.experiments) == 0:
            print "No Experiments found in the input"
            self.parser.print_help()
            return
        if len(params.input.reflections) == 0:
            print "No reflection data found in the input"
            self.parser.print_help()
            return
        try:
            assert len(params.input.reflections) == len(
                params.input.experiments)
        except AssertionError:
            raise Sorry(
                "The number of input reflections files does not match the "
                "number of input experiments")

        flat_exps = flatten_experiments(params.input.experiments)

        ref_beam = params.reference_from_experiment.beam
        ref_goniometer = params.reference_from_experiment.goniometer
        ref_scan = params.reference_from_experiment.scan
        ref_crystal = params.reference_from_experiment.crystal
        ref_detector = params.reference_from_experiment.detector

        if ref_beam is not None:
            try:
                ref_beam = flat_exps[ref_beam].beam
            except IndexError:
                raise Sorry(
                    "{0} is not a valid experiment ID".format(ref_beam))

        if ref_goniometer is not None:
            try:
                ref_goniometer = flat_exps[ref_goniometer].goniometer
            except IndexError:
                raise Sorry(
                    "{0} is not a valid experiment ID".format(ref_goniometer))

        if ref_scan is not None:
            try:
                ref_scan = flat_exps[ref_scan].scan
            except IndexError:
                raise Sorry(
                    "{0} is not a valid experiment ID".format(ref_scan))

        if ref_crystal is not None:
            try:
                ref_crystal = flat_exps[ref_crystal].crystal
            except IndexError:
                raise Sorry(
                    "{0} is not a valid experiment ID".format(ref_crystal))

        if ref_detector is not None:
            assert not params.reference_from_experiment.average_detector
            try:
                ref_detector = flat_exps[ref_detector].detector
            except IndexError:
                raise Sorry(
                    "{0} is not a valid experiment ID".format(ref_detector))
        elif params.reference_from_experiment.average_detector:
            # Average all of the detectors together
            from scitbx.matrix import col

            def average_detectors(target, panelgroups, depth):
                # Recursive function to do the averaging

                if params.reference_from_experiment.average_hierarchy_level is None or \
                    depth == params.reference_from_experiment.average_hierarchy_level:
                    n = len(panelgroups)
                    sum_fast = col((0.0, 0.0, 0.0))
                    sum_slow = col((0.0, 0.0, 0.0))
                    sum_ori = col((0.0, 0.0, 0.0))

                    # Average the d matrix vectors
                    for pg in panelgroups:
                        sum_fast += col(pg.get_local_fast_axis())
                        sum_slow += col(pg.get_local_slow_axis())
                        sum_ori += col(pg.get_local_origin())
                    sum_fast /= n
                    sum_slow /= n
                    sum_ori /= n

                    # Re-orthagonalize the slow and the fast vectors by rotating around the cross product
                    c = sum_fast.cross(sum_slow)
                    a = sum_fast.angle(sum_slow, deg=True) / 2
                    sum_fast = sum_fast.rotate(c, a - 45, deg=True)
                    sum_slow = sum_slow.rotate(c, -(a - 45), deg=True)

                    target.set_local_frame(sum_fast, sum_slow, sum_ori)

                if target.is_group():
                    # Recurse
                    for i, target_pg in enumerate(target):
                        average_detectors(target_pg,
                                          [pg[i] for pg in panelgroups],
                                          depth + 1)

            ref_detector = flat_exps[0].detector
            average_detectors(ref_detector.hierarchy(),
                              [e.detector.hierarchy() for e in flat_exps], 0)

        combine = CombineWithReference(beam=ref_beam,
                                       goniometer=ref_goniometer,
                                       scan=ref_scan,
                                       crystal=ref_crystal,
                                       detector=ref_detector,
                                       params=params)

        # set up global experiments and reflections lists
        from dials.array_family import flex
        reflections = flex.reflection_table()
        global_id = 0
        from dxtbx.model.experiment_list import ExperimentList
        experiments = ExperimentList()

        # loop through the input, building up the global lists
        nrefs_per_exp = []
        for ref_wrapper, exp_wrapper in zip(params.input.reflections,
                                            params.input.experiments):
            refs = ref_wrapper.data
            exps = exp_wrapper.data
            for i, exp in enumerate(exps):
                sel = refs['id'] == i
                sub_ref = refs.select(sel)
                nrefs_per_exp.append(len(sub_ref))
                sub_ref['id'] = flex.int(len(sub_ref), global_id)
                if params.output.delete_shoeboxes and 'shoebox' in sub_ref:
                    del sub_ref['shoebox']
                reflections.extend(sub_ref)
                experiments.append(combine(exp))
                global_id += 1

        # print number of reflections per experiment
        from libtbx.table_utils import simple_table
        header = ["Experiment", "Nref"]
        rows = [(str(i), str(n)) for (i, n) in enumerate(nrefs_per_exp)]
        st = simple_table(rows, header)
        print st.format()

        # save a random subset if requested
        if params.output.n_subset is not None and len(
                experiments) > params.output.n_subset:
            import random
            subset_exp = ExperimentList()
            subset_refls = flex.reflection_table()
            n_picked = 0
            indices = range(len(experiments))
            while n_picked < params.output.n_subset:
                idx = indices.pop(random.randint(0, len(indices) - 1))
                subset_exp.append(experiments[idx])
                refls = reflections.select(reflections['id'] == idx)
                refls['id'] = flex.int(len(refls), n_picked)
                subset_refls.extend(refls)
                n_picked += 1
            experiments = subset_exp
            reflections = subset_refls

        def save_output(experiments, reflections, exp_name, refl_name):
            # save output
            from dxtbx.model.experiment_list import ExperimentListDumper
            print 'Saving combined experiments to {0}'.format(exp_name)
            dump = ExperimentListDumper(experiments)
            dump.as_json(exp_name)
            print 'Saving combined reflections to {0}'.format(refl_name)
            reflections.as_pickle(refl_name)

        def save_in_batches(experiments,
                            reflections,
                            exp_name,
                            refl_name,
                            batch_size=1000):
            from dxtbx.command_line.image_average import splitit
            import os
            result = []
            for i, indices in enumerate(
                    splitit(range(len(experiments)),
                            (len(experiments) // batch_size) + 1)):
                batch_expts = ExperimentList()
                batch_refls = flex.reflection_table()
                for sub_id, sub_idx in enumerate(indices):
                    batch_expts.append(experiments[sub_idx])
                    sub_refls = reflections.select(
                        reflections['id'] == sub_idx)
                    sub_refls['id'] = flex.int(len(sub_refls), sub_id)
                    batch_refls.extend(sub_refls)
                exp_filename = os.path.splitext(exp_name)[0] + "_%03d.json" % i
                ref_filename = os.path.splitext(
                    refl_name)[0] + "_%03d.pickle" % i
                save_output(batch_expts, batch_refls, exp_filename,
                            ref_filename)

        def combine_in_clusters(experiments_l, reflections_l, exp_name,
                                refl_name, end_count):
            import os
            result = []
            for cluster in xrange(len(experiments_l)):
                cluster_expts = ExperimentList()
                cluster_refls = flex.reflection_table()
                for i in xrange(len(experiments_l[cluster])):
                    refls = reflections_l[cluster][i]
                    expts = experiments_l[cluster][i]
                    refls['id'] = flex.int(len(refls), i)
                    cluster_expts.append(expts)
                    cluster_refls.extend(refls)
                exp_filename = os.path.splitext(exp_name)[0] + (
                    "_cluster%d.json" % (end_count - cluster))
                ref_filename = os.path.splitext(refl_name)[0] + (
                    "_cluster%d.pickle" % (end_count - cluster))
                result.append(
                    (cluster_expts, cluster_refls, exp_filename, ref_filename))
            return result

        # cluster the resulting experiments if requested
        if params.clustering.use:
            clustered = Cluster(experiments,
                                reflections,
                                dendrogram=params.clustering.dendrogram,
                                threshold=params.clustering.threshold,
                                n_max=params.clustering.max_crystals)
            n_clusters = len(clustered.clustered_frames)
            if params.clustering.max_clusters is not None:
                not_too_many = lambda keeps: len(
                    keeps) < params.clustering.max_clusters
            else:
                not_too_many = lambda keeps: True
            keep_frames = []
            sorted_keys = sorted(clustered.clustered_frames.keys())
            while len(clustered.clustered_frames) > 0 and not_too_many(
                    keep_frames):
                keep_frames.append(
                    clustered.clustered_frames.pop(sorted_keys.pop(-1)))
            if params.clustering.exclude_single_crystal_clusters:
                keep_frames = [k for k in keep_frames if len(k) > 1]
            clustered_experiments = [[f.experiment for f in frame_cluster]
                                     for frame_cluster in keep_frames]
            clustered_reflections = [[f.reflections for f in frame_cluster]
                                     for frame_cluster in keep_frames]
            list_of_combined = combine_in_clusters(
                clustered_experiments, clustered_reflections,
                params.output.experiments_filename,
                params.output.reflections_filename, n_clusters)
            for i in xrange(len(list_of_combined)):
                savable_tuple = list_of_combined[i]
                if params.output.max_batch_size is None:
                    save_output(*savable_tuple)
                else:
                    save_in_batches(*savable_tuple,
                                    batch_size=params.output.max_batch_size)
        else:
            if params.output.max_batch_size is None:
                save_output(experiments, reflections,
                            params.output.experiments_filename,
                            params.output.reflections_filename)
            else:
                save_in_batches(experiments,
                                reflections,
                                params.output.experiments_filename,
                                params.output.reflections_filename,
                                batch_size=params.output.max_batch_size)
        return