def run_with_preparsed(self, params, options):
        """Run combine_experiments, but allow passing in of parameters"""
        from dials.util.options import flatten_experiments

        # Try to load the models and data
        if len(params.input.experiments) == 0:
            print("No Experiments found in the input")
            self.parser.print_help()
            return
        if len(params.input.reflections) == 0:
            print("No reflection data found in the input")
            self.parser.print_help()
            return
        try:
            assert len(params.input.reflections) == len(
                params.input.experiments)
        except AssertionError:
            raise Sorry(
                "The number of input reflections files does not match the "
                "number of input experiments")

        flat_exps = flatten_experiments(params.input.experiments)

        ref_beam = params.reference_from_experiment.beam
        ref_goniometer = params.reference_from_experiment.goniometer
        ref_scan = params.reference_from_experiment.scan
        ref_crystal = params.reference_from_experiment.crystal
        ref_detector = params.reference_from_experiment.detector

        if ref_beam is not None:
            try:
                ref_beam = flat_exps[ref_beam].beam
            except IndexError:
                raise Sorry("{} is not a valid experiment ID".format(ref_beam))

        if ref_goniometer is not None:
            try:
                ref_goniometer = flat_exps[ref_goniometer].goniometer
            except IndexError:
                raise Sorry(
                    "{} is not a valid experiment ID".format(ref_goniometer))

        if ref_scan is not None:
            try:
                ref_scan = flat_exps[ref_scan].scan
            except IndexError:
                raise Sorry("{} is not a valid experiment ID".format(ref_scan))

        if ref_crystal is not None:
            try:
                ref_crystal = flat_exps[ref_crystal].crystal
            except IndexError:
                raise Sorry(
                    "{} is not a valid experiment ID".format(ref_crystal))

        if ref_detector is not None:
            assert not params.reference_from_experiment.average_detector
            try:
                ref_detector = flat_exps[ref_detector].detector
            except IndexError:
                raise Sorry(
                    "{} is not a valid experiment ID".format(ref_detector))
        elif params.reference_from_experiment.average_detector:
            # Average all of the detectors together
            from scitbx.matrix import col

            def average_detectors(target, panelgroups, depth):
                # Recursive function to do the averaging

                if (params.reference_from_experiment.average_hierarchy_level is
                        None or depth == params.reference_from_experiment.
                        average_hierarchy_level):
                    n = len(panelgroups)
                    sum_fast = col((0.0, 0.0, 0.0))
                    sum_slow = col((0.0, 0.0, 0.0))
                    sum_ori = col((0.0, 0.0, 0.0))

                    # Average the d matrix vectors
                    for pg in panelgroups:
                        sum_fast += col(pg.get_local_fast_axis())
                        sum_slow += col(pg.get_local_slow_axis())
                        sum_ori += col(pg.get_local_origin())
                    sum_fast /= n
                    sum_slow /= n
                    sum_ori /= n

                    # Re-orthagonalize the slow and the fast vectors by rotating around the cross product
                    c = sum_fast.cross(sum_slow)
                    a = sum_fast.angle(sum_slow, deg=True) / 2
                    sum_fast = sum_fast.rotate(c, a - 45, deg=True)
                    sum_slow = sum_slow.rotate(c, -(a - 45), deg=True)

                    target.set_local_frame(sum_fast, sum_slow, sum_ori)

                if target.is_group():
                    # Recurse
                    for i, target_pg in enumerate(target):
                        average_detectors(target_pg,
                                          [pg[i] for pg in panelgroups],
                                          depth + 1)

            ref_detector = flat_exps[0].detector
            average_detectors(ref_detector.hierarchy(),
                              [e.detector.hierarchy() for e in flat_exps], 0)

        combine = CombineWithReference(
            beam=ref_beam,
            goniometer=ref_goniometer,
            scan=ref_scan,
            crystal=ref_crystal,
            detector=ref_detector,
            params=params,
        )

        # set up global experiments and reflections lists
        from dials.array_family import flex

        reflections = flex.reflection_table()
        global_id = 0
        skipped_expts = 0
        from dxtbx.model.experiment_list import ExperimentList

        experiments = ExperimentList()

        # loop through the input, building up the global lists
        nrefs_per_exp = []
        for ref_wrapper, exp_wrapper in zip(params.input.reflections,
                                            params.input.experiments):
            refs = ref_wrapper.data
            exps = exp_wrapper.data
            for i, exp in enumerate(exps):
                sel = refs["id"] == i
                sub_ref = refs.select(sel)
                n_sub_ref = len(sub_ref)
                if (params.output.min_reflections_per_experiment is not None
                        and n_sub_ref <
                        params.output.min_reflections_per_experiment):
                    skipped_expts += 1
                    continue

                nrefs_per_exp.append(n_sub_ref)
                sub_ref["id"] = flex.int(len(sub_ref), global_id)
                if params.output.delete_shoeboxes and "shoebox" in sub_ref:
                    del sub_ref["shoebox"]
                reflections.extend(sub_ref)
                try:
                    experiments.append(combine(exp))
                except ComparisonError as e:
                    # When we failed tolerance checks, give a useful error message
                    (path,
                     index) = find_experiment_in(exp, params.input.experiments)
                    raise Sorry(
                        "Model didn't match reference within required tolerance for experiment {} in {}:"
                        "\n{}\nAdjust tolerances or set compare_models=False to ignore differences."
                        .format(index, path, str(e)))

                global_id += 1

        if (params.output.min_reflections_per_experiment is not None
                and skipped_expts > 0):
            print("Removed {0} experiments with fewer than {1} reflections".
                  format(skipped_expts,
                         params.output.min_reflections_per_experiment))

        # print number of reflections per experiment
        from libtbx.table_utils import simple_table

        header = ["Experiment", "Number of reflections"]
        rows = [(str(i), str(n)) for (i, n) in enumerate(nrefs_per_exp)]
        st = simple_table(rows, header)
        print(st.format())

        # save a random subset if requested
        if (params.output.n_subset is not None
                and len(experiments) > params.output.n_subset):
            subset_exp = ExperimentList()
            subset_refls = flex.reflection_table()
            if params.output.n_subset_method == "random":
                n_picked = 0
                indices = list(range(len(experiments)))
                while n_picked < params.output.n_subset:
                    idx = indices.pop(random.randint(0, len(indices) - 1))
                    subset_exp.append(experiments[idx])
                    refls = reflections.select(reflections["id"] == idx)
                    refls["id"] = flex.int(len(refls), n_picked)
                    subset_refls.extend(refls)
                    n_picked += 1
                print(
                    "Selecting a random subset of {0} experiments out of {1} total."
                    .format(params.output.n_subset, len(experiments)))
            elif params.output.n_subset_method == "n_refl":
                if params.output.n_refl_panel_list is None:
                    refls_subset = reflections
                else:
                    sel = flex.bool(len(reflections), False)
                    for p in params.output.n_refl_panel_list:
                        sel |= reflections["panel"] == p
                    refls_subset = reflections.select(sel)
                refl_counts = flex.int()
                for expt_id in range(len(experiments)):
                    refl_counts.append(
                        len(refls_subset.select(
                            refls_subset["id"] == expt_id)))
                sort_order = flex.sort_permutation(refl_counts, reverse=True)
                for expt_id, idx in enumerate(
                        sort_order[:params.output.n_subset]):
                    subset_exp.append(experiments[idx])
                    refls = reflections.select(reflections["id"] == idx)
                    refls["id"] = flex.int(len(refls), expt_id)
                    subset_refls.extend(refls)
                print(
                    "Selecting a subset of {0} experiments with highest number of reflections out of {1} total."
                    .format(params.output.n_subset, len(experiments)))

            elif params.output.n_subset_method == "significance_filter":
                from dials.algorithms.integration.stills_significance_filter import (
                    SignificanceFilter, )

                params.output.significance_filter.enable = True
                sig_filter = SignificanceFilter(params.output)
                refls_subset = sig_filter(experiments, reflections)
                refl_counts = flex.int()
                for expt_id in range(len(experiments)):
                    refl_counts.append(
                        len(refls_subset.select(
                            refls_subset["id"] == expt_id)))
                sort_order = flex.sort_permutation(refl_counts, reverse=True)
                for expt_id, idx in enumerate(
                        sort_order[:params.output.n_subset]):
                    subset_exp.append(experiments[idx])
                    refls = reflections.select(reflections["id"] == idx)
                    refls["id"] = flex.int(len(refls), expt_id)
                    subset_refls.extend(refls)

            experiments = subset_exp
            reflections = subset_refls

        def save_in_batches(experiments,
                            reflections,
                            exp_name,
                            refl_name,
                            batch_size=1000):
            from dxtbx.command_line.image_average import splitit

            for i, indices in enumerate(
                    splitit(list(range(len(experiments))),
                            (len(experiments) // batch_size) + 1)):
                batch_expts = ExperimentList()
                batch_refls = flex.reflection_table()
                for sub_id, sub_idx in enumerate(indices):
                    batch_expts.append(experiments[sub_idx])
                    sub_refls = reflections.select(
                        reflections["id"] == sub_idx)
                    sub_refls["id"] = flex.int(len(sub_refls), sub_id)
                    batch_refls.extend(sub_refls)
                exp_filename = os.path.splitext(exp_name)[0] + "_%03d.expt" % i
                ref_filename = os.path.splitext(
                    refl_name)[0] + "_%03d.refl" % i
                self._save_output(batch_expts, batch_refls, exp_filename,
                                  ref_filename)

        def combine_in_clusters(experiments_l, reflections_l, exp_name,
                                refl_name, end_count):
            result = []
            for cluster, experiment in enumerate(experiments_l):
                cluster_expts = ExperimentList()
                cluster_refls = flex.reflection_table()
                for i, expts in enumerate(experiment):
                    refls = reflections_l[cluster][i]
                    refls["id"] = flex.int(len(refls), i)
                    cluster_expts.append(expts)
                    cluster_refls.extend(refls)
                exp_filename = os.path.splitext(exp_name)[0] + (
                    "_cluster%d.expt" % (end_count - cluster))
                ref_filename = os.path.splitext(refl_name)[0] + (
                    "_cluster%d.refl" % (end_count - cluster))
                result.append(
                    (cluster_expts, cluster_refls, exp_filename, ref_filename))
            return result

        # cluster the resulting experiments if requested
        if params.clustering.use:
            clustered = Cluster(
                experiments,
                reflections,
                dendrogram=params.clustering.dendrogram,
                threshold=params.clustering.threshold,
                n_max=params.clustering.max_crystals,
            )
            n_clusters = len(clustered.clustered_frames)

            def not_too_many(keeps):
                if params.clustering.max_clusters is not None:
                    return len(keeps) < params.clustering.max_clusters
                return True

            keep_frames = []
            sorted_keys = sorted(clustered.clustered_frames.keys())
            while len(clustered.clustered_frames) > 0 and not_too_many(
                    keep_frames):
                keep_frames.append(
                    clustered.clustered_frames.pop(sorted_keys.pop(-1)))
            if params.clustering.exclude_single_crystal_clusters:
                keep_frames = [k for k in keep_frames if len(k) > 1]
            clustered_experiments = [[f.experiment for f in frame_cluster]
                                     for frame_cluster in keep_frames]
            clustered_reflections = [[f.reflections for f in frame_cluster]
                                     for frame_cluster in keep_frames]
            list_of_combined = combine_in_clusters(
                clustered_experiments,
                clustered_reflections,
                params.output.experiments_filename,
                params.output.reflections_filename,
                n_clusters,
            )
            for saveable_tuple in list_of_combined:
                if params.output.max_batch_size is None:
                    self._save_output(*saveable_tuple)
                else:
                    save_in_batches(*saveable_tuple,
                                    batch_size=params.output.max_batch_size)
        else:
            if params.output.max_batch_size is None:
                self._save_output(
                    experiments,
                    reflections,
                    params.output.experiments_filename,
                    params.output.reflections_filename,
                )
            else:
                save_in_batches(
                    experiments,
                    reflections,
                    params.output.experiments_filename,
                    params.output.reflections_filename,
                    batch_size=params.output.max_batch_size,
                )
        return
Example #2
0
    def run(self):
        '''Execute the script.'''

        from dials.util.options import flatten_experiments
        from libtbx.utils import Sorry

        # Parse the command line
        params, options = self.parser.parse_args(show_diff_phil=True)

        # Try to load the models and data
        if len(params.input.experiments) == 0:
            print "No Experiments found in the input"
            self.parser.print_help()
            return
        if len(params.input.reflections) == 0:
            print "No reflection data found in the input"
            self.parser.print_help()
            return
        try:
            assert len(params.input.reflections) == len(
                params.input.experiments)
        except AssertionError:
            raise Sorry(
                "The number of input reflections files does not match the "
                "number of input experiments")

        flat_exps = flatten_experiments(params.input.experiments)

        ref_beam = params.reference_from_experiment.beam
        ref_goniometer = params.reference_from_experiment.goniometer
        ref_scan = params.reference_from_experiment.scan
        ref_crystal = params.reference_from_experiment.crystal
        ref_detector = params.reference_from_experiment.detector

        if ref_beam is not None:
            try:
                ref_beam = flat_exps[ref_beam].beam
            except IndexError:
                raise Sorry(
                    "{0} is not a valid experiment ID".format(ref_beam))

        if ref_goniometer is not None:
            try:
                ref_goniometer = flat_exps[ref_goniometer].goniometer
            except IndexError:
                raise Sorry(
                    "{0} is not a valid experiment ID".format(ref_goniometer))

        if ref_scan is not None:
            try:
                ref_scan = flat_exps[ref_scan].scan
            except IndexError:
                raise Sorry(
                    "{0} is not a valid experiment ID".format(ref_scan))

        if ref_crystal is not None:
            try:
                ref_crystal = flat_exps[ref_crystal].crystal
            except IndexError:
                raise Sorry(
                    "{0} is not a valid experiment ID".format(ref_crystal))

        if ref_detector is not None:
            assert not params.reference_from_experiment.average_detector
            try:
                ref_detector = flat_exps[ref_detector].detector
            except IndexError:
                raise Sorry(
                    "{0} is not a valid experiment ID".format(ref_detector))
        elif params.reference_from_experiment.average_detector:
            # Average all of the detectors together
            from scitbx.matrix import col

            def average_detectors(target, panelgroups, depth):
                # Recursive function to do the averaging

                if params.reference_from_experiment.average_hierarchy_level is None or \
                    depth == params.reference_from_experiment.average_hierarchy_level:
                    n = len(panelgroups)
                    sum_fast = col((0.0, 0.0, 0.0))
                    sum_slow = col((0.0, 0.0, 0.0))
                    sum_ori = col((0.0, 0.0, 0.0))

                    # Average the d matrix vectors
                    for pg in panelgroups:
                        sum_fast += col(pg.get_local_fast_axis())
                        sum_slow += col(pg.get_local_slow_axis())
                        sum_ori += col(pg.get_local_origin())
                    sum_fast /= n
                    sum_slow /= n
                    sum_ori /= n

                    # Re-orthagonalize the slow and the fast vectors by rotating around the cross product
                    c = sum_fast.cross(sum_slow)
                    a = sum_fast.angle(sum_slow, deg=True) / 2
                    sum_fast = sum_fast.rotate(c, a - 45, deg=True)
                    sum_slow = sum_slow.rotate(c, -(a - 45), deg=True)

                    target.set_local_frame(sum_fast, sum_slow, sum_ori)

                if target.is_group():
                    # Recurse
                    for i, target_pg in enumerate(target):
                        average_detectors(target_pg,
                                          [pg[i] for pg in panelgroups],
                                          depth + 1)

            ref_detector = flat_exps[0].detector
            average_detectors(ref_detector.hierarchy(),
                              [e.detector.hierarchy() for e in flat_exps], 0)

        combine = CombineWithReference(beam=ref_beam,
                                       goniometer=ref_goniometer,
                                       scan=ref_scan,
                                       crystal=ref_crystal,
                                       detector=ref_detector,
                                       params=params)

        # set up global experiments and reflections lists
        from dials.array_family import flex
        reflections = flex.reflection_table()
        global_id = 0
        from dxtbx.model.experiment_list import ExperimentList
        experiments = ExperimentList()

        # loop through the input, building up the global lists
        nrefs_per_exp = []
        for ref_wrapper, exp_wrapper in zip(params.input.reflections,
                                            params.input.experiments):
            refs = ref_wrapper.data
            exps = exp_wrapper.data
            for i, exp in enumerate(exps):
                sel = refs['id'] == i
                sub_ref = refs.select(sel)
                nrefs_per_exp.append(len(sub_ref))
                sub_ref['id'] = flex.int(len(sub_ref), global_id)
                if params.output.delete_shoeboxes and 'shoebox' in sub_ref:
                    del sub_ref['shoebox']
                reflections.extend(sub_ref)
                experiments.append(combine(exp))
                global_id += 1

        # print number of reflections per experiment
        from libtbx.table_utils import simple_table
        header = ["Experiment", "Nref"]
        rows = [(str(i), str(n)) for (i, n) in enumerate(nrefs_per_exp)]
        st = simple_table(rows, header)
        print st.format()

        # save a random subset if requested
        if params.output.n_subset is not None and len(
                experiments) > params.output.n_subset:
            import random
            subset_exp = ExperimentList()
            subset_refls = flex.reflection_table()
            n_picked = 0
            indices = range(len(experiments))
            while n_picked < params.output.n_subset:
                idx = indices.pop(random.randint(0, len(indices) - 1))
                subset_exp.append(experiments[idx])
                refls = reflections.select(reflections['id'] == idx)
                refls['id'] = flex.int(len(refls), n_picked)
                subset_refls.extend(refls)
                n_picked += 1
            experiments = subset_exp
            reflections = subset_refls

        def save_output(experiments, reflections, exp_name, refl_name):
            # save output
            from dxtbx.model.experiment_list import ExperimentListDumper
            print 'Saving combined experiments to {0}'.format(exp_name)
            dump = ExperimentListDumper(experiments)
            dump.as_json(exp_name)
            print 'Saving combined reflections to {0}'.format(refl_name)
            reflections.as_pickle(refl_name)

        def save_in_batches(experiments,
                            reflections,
                            exp_name,
                            refl_name,
                            batch_size=1000):
            from dxtbx.command_line.image_average import splitit
            import os
            result = []
            for i, indices in enumerate(
                    splitit(range(len(experiments)),
                            (len(experiments) // batch_size) + 1)):
                batch_expts = ExperimentList()
                batch_refls = flex.reflection_table()
                for sub_id, sub_idx in enumerate(indices):
                    batch_expts.append(experiments[sub_idx])
                    sub_refls = reflections.select(
                        reflections['id'] == sub_idx)
                    sub_refls['id'] = flex.int(len(sub_refls), sub_id)
                    batch_refls.extend(sub_refls)
                exp_filename = os.path.splitext(exp_name)[0] + "_%03d.json" % i
                ref_filename = os.path.splitext(
                    refl_name)[0] + "_%03d.pickle" % i
                save_output(batch_expts, batch_refls, exp_filename,
                            ref_filename)

        def combine_in_clusters(experiments_l, reflections_l, exp_name,
                                refl_name, end_count):
            import os
            result = []
            for cluster in xrange(len(experiments_l)):
                cluster_expts = ExperimentList()
                cluster_refls = flex.reflection_table()
                for i in xrange(len(experiments_l[cluster])):
                    refls = reflections_l[cluster][i]
                    expts = experiments_l[cluster][i]
                    refls['id'] = flex.int(len(refls), i)
                    cluster_expts.append(expts)
                    cluster_refls.extend(refls)
                exp_filename = os.path.splitext(exp_name)[0] + (
                    "_cluster%d.json" % (end_count - cluster))
                ref_filename = os.path.splitext(refl_name)[0] + (
                    "_cluster%d.pickle" % (end_count - cluster))
                result.append(
                    (cluster_expts, cluster_refls, exp_filename, ref_filename))
            return result

        # cluster the resulting experiments if requested
        if params.clustering.use:
            clustered = Cluster(experiments,
                                reflections,
                                dendrogram=params.clustering.dendrogram,
                                threshold=params.clustering.threshold,
                                n_max=params.clustering.max_crystals)
            n_clusters = len(clustered.clustered_frames)
            if params.clustering.max_clusters is not None:
                not_too_many = lambda keeps: len(
                    keeps) < params.clustering.max_clusters
            else:
                not_too_many = lambda keeps: True
            keep_frames = []
            sorted_keys = sorted(clustered.clustered_frames.keys())
            while len(clustered.clustered_frames) > 0 and not_too_many(
                    keep_frames):
                keep_frames.append(
                    clustered.clustered_frames.pop(sorted_keys.pop(-1)))
            if params.clustering.exclude_single_crystal_clusters:
                keep_frames = [k for k in keep_frames if len(k) > 1]
            clustered_experiments = [[f.experiment for f in frame_cluster]
                                     for frame_cluster in keep_frames]
            clustered_reflections = [[f.reflections for f in frame_cluster]
                                     for frame_cluster in keep_frames]
            list_of_combined = combine_in_clusters(
                clustered_experiments, clustered_reflections,
                params.output.experiments_filename,
                params.output.reflections_filename, n_clusters)
            for i in xrange(len(list_of_combined)):
                savable_tuple = list_of_combined[i]
                if params.output.max_batch_size is None:
                    save_output(*savable_tuple)
                else:
                    save_in_batches(*savable_tuple,
                                    batch_size=params.output.max_batch_size)
        else:
            if params.output.max_batch_size is None:
                save_output(experiments, reflections,
                            params.output.experiments_filename,
                            params.output.reflections_filename)
            else:
                save_in_batches(experiments,
                                reflections,
                                params.output.experiments_filename,
                                params.output.reflections_filename,
                                batch_size=params.output.max_batch_size)
        return