def run_with_preparsed(self, params, options): """Run combine_experiments, but allow passing in of parameters""" from dials.util.options import flatten_experiments # Try to load the models and data if len(params.input.experiments) == 0: print("No Experiments found in the input") self.parser.print_help() return if len(params.input.reflections) == 0: print("No reflection data found in the input") self.parser.print_help() return try: assert len(params.input.reflections) == len( params.input.experiments) except AssertionError: raise Sorry( "The number of input reflections files does not match the " "number of input experiments") flat_exps = flatten_experiments(params.input.experiments) ref_beam = params.reference_from_experiment.beam ref_goniometer = params.reference_from_experiment.goniometer ref_scan = params.reference_from_experiment.scan ref_crystal = params.reference_from_experiment.crystal ref_detector = params.reference_from_experiment.detector if ref_beam is not None: try: ref_beam = flat_exps[ref_beam].beam except IndexError: raise Sorry("{} is not a valid experiment ID".format(ref_beam)) if ref_goniometer is not None: try: ref_goniometer = flat_exps[ref_goniometer].goniometer except IndexError: raise Sorry( "{} is not a valid experiment ID".format(ref_goniometer)) if ref_scan is not None: try: ref_scan = flat_exps[ref_scan].scan except IndexError: raise Sorry("{} is not a valid experiment ID".format(ref_scan)) if ref_crystal is not None: try: ref_crystal = flat_exps[ref_crystal].crystal except IndexError: raise Sorry( "{} is not a valid experiment ID".format(ref_crystal)) if ref_detector is not None: assert not params.reference_from_experiment.average_detector try: ref_detector = flat_exps[ref_detector].detector except IndexError: raise Sorry( "{} is not a valid experiment ID".format(ref_detector)) elif params.reference_from_experiment.average_detector: # Average all of the detectors together from scitbx.matrix import col def average_detectors(target, panelgroups, depth): # Recursive function to do the averaging if (params.reference_from_experiment.average_hierarchy_level is None or depth == params.reference_from_experiment. average_hierarchy_level): n = len(panelgroups) sum_fast = col((0.0, 0.0, 0.0)) sum_slow = col((0.0, 0.0, 0.0)) sum_ori = col((0.0, 0.0, 0.0)) # Average the d matrix vectors for pg in panelgroups: sum_fast += col(pg.get_local_fast_axis()) sum_slow += col(pg.get_local_slow_axis()) sum_ori += col(pg.get_local_origin()) sum_fast /= n sum_slow /= n sum_ori /= n # Re-orthagonalize the slow and the fast vectors by rotating around the cross product c = sum_fast.cross(sum_slow) a = sum_fast.angle(sum_slow, deg=True) / 2 sum_fast = sum_fast.rotate(c, a - 45, deg=True) sum_slow = sum_slow.rotate(c, -(a - 45), deg=True) target.set_local_frame(sum_fast, sum_slow, sum_ori) if target.is_group(): # Recurse for i, target_pg in enumerate(target): average_detectors(target_pg, [pg[i] for pg in panelgroups], depth + 1) ref_detector = flat_exps[0].detector average_detectors(ref_detector.hierarchy(), [e.detector.hierarchy() for e in flat_exps], 0) combine = CombineWithReference( beam=ref_beam, goniometer=ref_goniometer, scan=ref_scan, crystal=ref_crystal, detector=ref_detector, params=params, ) # set up global experiments and reflections lists from dials.array_family import flex reflections = flex.reflection_table() global_id = 0 skipped_expts = 0 from dxtbx.model.experiment_list import ExperimentList experiments = ExperimentList() # loop through the input, building up the global lists nrefs_per_exp = [] for ref_wrapper, exp_wrapper in zip(params.input.reflections, params.input.experiments): refs = ref_wrapper.data exps = exp_wrapper.data for i, exp in enumerate(exps): sel = refs["id"] == i sub_ref = refs.select(sel) n_sub_ref = len(sub_ref) if (params.output.min_reflections_per_experiment is not None and n_sub_ref < params.output.min_reflections_per_experiment): skipped_expts += 1 continue nrefs_per_exp.append(n_sub_ref) sub_ref["id"] = flex.int(len(sub_ref), global_id) if params.output.delete_shoeboxes and "shoebox" in sub_ref: del sub_ref["shoebox"] reflections.extend(sub_ref) try: experiments.append(combine(exp)) except ComparisonError as e: # When we failed tolerance checks, give a useful error message (path, index) = find_experiment_in(exp, params.input.experiments) raise Sorry( "Model didn't match reference within required tolerance for experiment {} in {}:" "\n{}\nAdjust tolerances or set compare_models=False to ignore differences." .format(index, path, str(e))) global_id += 1 if (params.output.min_reflections_per_experiment is not None and skipped_expts > 0): print("Removed {0} experiments with fewer than {1} reflections". format(skipped_expts, params.output.min_reflections_per_experiment)) # print number of reflections per experiment from libtbx.table_utils import simple_table header = ["Experiment", "Number of reflections"] rows = [(str(i), str(n)) for (i, n) in enumerate(nrefs_per_exp)] st = simple_table(rows, header) print(st.format()) # save a random subset if requested if (params.output.n_subset is not None and len(experiments) > params.output.n_subset): subset_exp = ExperimentList() subset_refls = flex.reflection_table() if params.output.n_subset_method == "random": n_picked = 0 indices = list(range(len(experiments))) while n_picked < params.output.n_subset: idx = indices.pop(random.randint(0, len(indices) - 1)) subset_exp.append(experiments[idx]) refls = reflections.select(reflections["id"] == idx) refls["id"] = flex.int(len(refls), n_picked) subset_refls.extend(refls) n_picked += 1 print( "Selecting a random subset of {0} experiments out of {1} total." .format(params.output.n_subset, len(experiments))) elif params.output.n_subset_method == "n_refl": if params.output.n_refl_panel_list is None: refls_subset = reflections else: sel = flex.bool(len(reflections), False) for p in params.output.n_refl_panel_list: sel |= reflections["panel"] == p refls_subset = reflections.select(sel) refl_counts = flex.int() for expt_id in range(len(experiments)): refl_counts.append( len(refls_subset.select( refls_subset["id"] == expt_id))) sort_order = flex.sort_permutation(refl_counts, reverse=True) for expt_id, idx in enumerate( sort_order[:params.output.n_subset]): subset_exp.append(experiments[idx]) refls = reflections.select(reflections["id"] == idx) refls["id"] = flex.int(len(refls), expt_id) subset_refls.extend(refls) print( "Selecting a subset of {0} experiments with highest number of reflections out of {1} total." .format(params.output.n_subset, len(experiments))) elif params.output.n_subset_method == "significance_filter": from dials.algorithms.integration.stills_significance_filter import ( SignificanceFilter, ) params.output.significance_filter.enable = True sig_filter = SignificanceFilter(params.output) refls_subset = sig_filter(experiments, reflections) refl_counts = flex.int() for expt_id in range(len(experiments)): refl_counts.append( len(refls_subset.select( refls_subset["id"] == expt_id))) sort_order = flex.sort_permutation(refl_counts, reverse=True) for expt_id, idx in enumerate( sort_order[:params.output.n_subset]): subset_exp.append(experiments[idx]) refls = reflections.select(reflections["id"] == idx) refls["id"] = flex.int(len(refls), expt_id) subset_refls.extend(refls) experiments = subset_exp reflections = subset_refls def save_in_batches(experiments, reflections, exp_name, refl_name, batch_size=1000): from dxtbx.command_line.image_average import splitit for i, indices in enumerate( splitit(list(range(len(experiments))), (len(experiments) // batch_size) + 1)): batch_expts = ExperimentList() batch_refls = flex.reflection_table() for sub_id, sub_idx in enumerate(indices): batch_expts.append(experiments[sub_idx]) sub_refls = reflections.select( reflections["id"] == sub_idx) sub_refls["id"] = flex.int(len(sub_refls), sub_id) batch_refls.extend(sub_refls) exp_filename = os.path.splitext(exp_name)[0] + "_%03d.expt" % i ref_filename = os.path.splitext( refl_name)[0] + "_%03d.refl" % i self._save_output(batch_expts, batch_refls, exp_filename, ref_filename) def combine_in_clusters(experiments_l, reflections_l, exp_name, refl_name, end_count): result = [] for cluster, experiment in enumerate(experiments_l): cluster_expts = ExperimentList() cluster_refls = flex.reflection_table() for i, expts in enumerate(experiment): refls = reflections_l[cluster][i] refls["id"] = flex.int(len(refls), i) cluster_expts.append(expts) cluster_refls.extend(refls) exp_filename = os.path.splitext(exp_name)[0] + ( "_cluster%d.expt" % (end_count - cluster)) ref_filename = os.path.splitext(refl_name)[0] + ( "_cluster%d.refl" % (end_count - cluster)) result.append( (cluster_expts, cluster_refls, exp_filename, ref_filename)) return result # cluster the resulting experiments if requested if params.clustering.use: clustered = Cluster( experiments, reflections, dendrogram=params.clustering.dendrogram, threshold=params.clustering.threshold, n_max=params.clustering.max_crystals, ) n_clusters = len(clustered.clustered_frames) def not_too_many(keeps): if params.clustering.max_clusters is not None: return len(keeps) < params.clustering.max_clusters return True keep_frames = [] sorted_keys = sorted(clustered.clustered_frames.keys()) while len(clustered.clustered_frames) > 0 and not_too_many( keep_frames): keep_frames.append( clustered.clustered_frames.pop(sorted_keys.pop(-1))) if params.clustering.exclude_single_crystal_clusters: keep_frames = [k for k in keep_frames if len(k) > 1] clustered_experiments = [[f.experiment for f in frame_cluster] for frame_cluster in keep_frames] clustered_reflections = [[f.reflections for f in frame_cluster] for frame_cluster in keep_frames] list_of_combined = combine_in_clusters( clustered_experiments, clustered_reflections, params.output.experiments_filename, params.output.reflections_filename, n_clusters, ) for saveable_tuple in list_of_combined: if params.output.max_batch_size is None: self._save_output(*saveable_tuple) else: save_in_batches(*saveable_tuple, batch_size=params.output.max_batch_size) else: if params.output.max_batch_size is None: self._save_output( experiments, reflections, params.output.experiments_filename, params.output.reflections_filename, ) else: save_in_batches( experiments, reflections, params.output.experiments_filename, params.output.reflections_filename, batch_size=params.output.max_batch_size, ) return
def run(self): '''Execute the script.''' from dials.util.options import flatten_experiments from libtbx.utils import Sorry # Parse the command line params, options = self.parser.parse_args(show_diff_phil=True) # Try to load the models and data if len(params.input.experiments) == 0: print "No Experiments found in the input" self.parser.print_help() return if len(params.input.reflections) == 0: print "No reflection data found in the input" self.parser.print_help() return try: assert len(params.input.reflections) == len( params.input.experiments) except AssertionError: raise Sorry( "The number of input reflections files does not match the " "number of input experiments") flat_exps = flatten_experiments(params.input.experiments) ref_beam = params.reference_from_experiment.beam ref_goniometer = params.reference_from_experiment.goniometer ref_scan = params.reference_from_experiment.scan ref_crystal = params.reference_from_experiment.crystal ref_detector = params.reference_from_experiment.detector if ref_beam is not None: try: ref_beam = flat_exps[ref_beam].beam except IndexError: raise Sorry( "{0} is not a valid experiment ID".format(ref_beam)) if ref_goniometer is not None: try: ref_goniometer = flat_exps[ref_goniometer].goniometer except IndexError: raise Sorry( "{0} is not a valid experiment ID".format(ref_goniometer)) if ref_scan is not None: try: ref_scan = flat_exps[ref_scan].scan except IndexError: raise Sorry( "{0} is not a valid experiment ID".format(ref_scan)) if ref_crystal is not None: try: ref_crystal = flat_exps[ref_crystal].crystal except IndexError: raise Sorry( "{0} is not a valid experiment ID".format(ref_crystal)) if ref_detector is not None: assert not params.reference_from_experiment.average_detector try: ref_detector = flat_exps[ref_detector].detector except IndexError: raise Sorry( "{0} is not a valid experiment ID".format(ref_detector)) elif params.reference_from_experiment.average_detector: # Average all of the detectors together from scitbx.matrix import col def average_detectors(target, panelgroups, depth): # Recursive function to do the averaging if params.reference_from_experiment.average_hierarchy_level is None or \ depth == params.reference_from_experiment.average_hierarchy_level: n = len(panelgroups) sum_fast = col((0.0, 0.0, 0.0)) sum_slow = col((0.0, 0.0, 0.0)) sum_ori = col((0.0, 0.0, 0.0)) # Average the d matrix vectors for pg in panelgroups: sum_fast += col(pg.get_local_fast_axis()) sum_slow += col(pg.get_local_slow_axis()) sum_ori += col(pg.get_local_origin()) sum_fast /= n sum_slow /= n sum_ori /= n # Re-orthagonalize the slow and the fast vectors by rotating around the cross product c = sum_fast.cross(sum_slow) a = sum_fast.angle(sum_slow, deg=True) / 2 sum_fast = sum_fast.rotate(c, a - 45, deg=True) sum_slow = sum_slow.rotate(c, -(a - 45), deg=True) target.set_local_frame(sum_fast, sum_slow, sum_ori) if target.is_group(): # Recurse for i, target_pg in enumerate(target): average_detectors(target_pg, [pg[i] for pg in panelgroups], depth + 1) ref_detector = flat_exps[0].detector average_detectors(ref_detector.hierarchy(), [e.detector.hierarchy() for e in flat_exps], 0) combine = CombineWithReference(beam=ref_beam, goniometer=ref_goniometer, scan=ref_scan, crystal=ref_crystal, detector=ref_detector, params=params) # set up global experiments and reflections lists from dials.array_family import flex reflections = flex.reflection_table() global_id = 0 from dxtbx.model.experiment_list import ExperimentList experiments = ExperimentList() # loop through the input, building up the global lists nrefs_per_exp = [] for ref_wrapper, exp_wrapper in zip(params.input.reflections, params.input.experiments): refs = ref_wrapper.data exps = exp_wrapper.data for i, exp in enumerate(exps): sel = refs['id'] == i sub_ref = refs.select(sel) nrefs_per_exp.append(len(sub_ref)) sub_ref['id'] = flex.int(len(sub_ref), global_id) if params.output.delete_shoeboxes and 'shoebox' in sub_ref: del sub_ref['shoebox'] reflections.extend(sub_ref) experiments.append(combine(exp)) global_id += 1 # print number of reflections per experiment from libtbx.table_utils import simple_table header = ["Experiment", "Nref"] rows = [(str(i), str(n)) for (i, n) in enumerate(nrefs_per_exp)] st = simple_table(rows, header) print st.format() # save a random subset if requested if params.output.n_subset is not None and len( experiments) > params.output.n_subset: import random subset_exp = ExperimentList() subset_refls = flex.reflection_table() n_picked = 0 indices = range(len(experiments)) while n_picked < params.output.n_subset: idx = indices.pop(random.randint(0, len(indices) - 1)) subset_exp.append(experiments[idx]) refls = reflections.select(reflections['id'] == idx) refls['id'] = flex.int(len(refls), n_picked) subset_refls.extend(refls) n_picked += 1 experiments = subset_exp reflections = subset_refls def save_output(experiments, reflections, exp_name, refl_name): # save output from dxtbx.model.experiment_list import ExperimentListDumper print 'Saving combined experiments to {0}'.format(exp_name) dump = ExperimentListDumper(experiments) dump.as_json(exp_name) print 'Saving combined reflections to {0}'.format(refl_name) reflections.as_pickle(refl_name) def save_in_batches(experiments, reflections, exp_name, refl_name, batch_size=1000): from dxtbx.command_line.image_average import splitit import os result = [] for i, indices in enumerate( splitit(range(len(experiments)), (len(experiments) // batch_size) + 1)): batch_expts = ExperimentList() batch_refls = flex.reflection_table() for sub_id, sub_idx in enumerate(indices): batch_expts.append(experiments[sub_idx]) sub_refls = reflections.select( reflections['id'] == sub_idx) sub_refls['id'] = flex.int(len(sub_refls), sub_id) batch_refls.extend(sub_refls) exp_filename = os.path.splitext(exp_name)[0] + "_%03d.json" % i ref_filename = os.path.splitext( refl_name)[0] + "_%03d.pickle" % i save_output(batch_expts, batch_refls, exp_filename, ref_filename) def combine_in_clusters(experiments_l, reflections_l, exp_name, refl_name, end_count): import os result = [] for cluster in xrange(len(experiments_l)): cluster_expts = ExperimentList() cluster_refls = flex.reflection_table() for i in xrange(len(experiments_l[cluster])): refls = reflections_l[cluster][i] expts = experiments_l[cluster][i] refls['id'] = flex.int(len(refls), i) cluster_expts.append(expts) cluster_refls.extend(refls) exp_filename = os.path.splitext(exp_name)[0] + ( "_cluster%d.json" % (end_count - cluster)) ref_filename = os.path.splitext(refl_name)[0] + ( "_cluster%d.pickle" % (end_count - cluster)) result.append( (cluster_expts, cluster_refls, exp_filename, ref_filename)) return result # cluster the resulting experiments if requested if params.clustering.use: clustered = Cluster(experiments, reflections, dendrogram=params.clustering.dendrogram, threshold=params.clustering.threshold, n_max=params.clustering.max_crystals) n_clusters = len(clustered.clustered_frames) if params.clustering.max_clusters is not None: not_too_many = lambda keeps: len( keeps) < params.clustering.max_clusters else: not_too_many = lambda keeps: True keep_frames = [] sorted_keys = sorted(clustered.clustered_frames.keys()) while len(clustered.clustered_frames) > 0 and not_too_many( keep_frames): keep_frames.append( clustered.clustered_frames.pop(sorted_keys.pop(-1))) if params.clustering.exclude_single_crystal_clusters: keep_frames = [k for k in keep_frames if len(k) > 1] clustered_experiments = [[f.experiment for f in frame_cluster] for frame_cluster in keep_frames] clustered_reflections = [[f.reflections for f in frame_cluster] for frame_cluster in keep_frames] list_of_combined = combine_in_clusters( clustered_experiments, clustered_reflections, params.output.experiments_filename, params.output.reflections_filename, n_clusters) for i in xrange(len(list_of_combined)): savable_tuple = list_of_combined[i] if params.output.max_batch_size is None: save_output(*savable_tuple) else: save_in_batches(*savable_tuple, batch_size=params.output.max_batch_size) else: if params.output.max_batch_size is None: save_output(experiments, reflections, params.output.experiments_filename, params.output.reflections_filename) else: save_in_batches(experiments, reflections, params.output.experiments_filename, params.output.reflections_filename, batch_size=params.output.max_batch_size) return