def test_exclude_image_ranges_for_scaling(): """Test for namesake function.""" refl1 = flex.reflection_table() refl1["xyzobs.px.value"] = flex.vec3_double([(0, 0, 0.5), (0, 0, 1.5), (0, 0, 5.5), (0, 0, 9.5), (0, 0, 10.5)]) refl1.set_flags(flex.bool(5, False), refl1.flags.user_excluded_in_scaling) refl2 = copy.deepcopy(refl1) refl1.experiment_identifiers()[0] = "0" refl2.experiment_identifiers()[1] = "1" explist = ExperimentList([ make_scan_experiment(image_range=(2, 20), expid="0"), make_scan_experiment(image_range=(2, 20), expid="1"), ]) refls, explist = exclude_image_ranges_for_scaling([refl1, refl2], explist, [["1:11:20"]]) assert list(explist[0].scan.get_valid_image_ranges("0")) == [(2, 20)] assert list(explist[1].scan.get_valid_image_ranges("1")) == [(2, 10)] assert list(refls[0].get_flags( refls[0].flags.user_excluded_in_scaling)) == [ True, False, False, False, False, ] assert list(refls[1].get_flags( refls[0].flags.user_excluded_in_scaling)) == [ True, False, False, False, True, ]
def remove_image_ranges_below_cutoff( experiments, reflections, ids_to_remove, image_group_to_expid_and_range, expid_to_image_groups, results_summary, ): """Remove image ranges from the datasets.""" n_valid_reflections = reflections.get_flags( reflections.flags.scaled).count(True) expid_to_tableid = { v: k for k, v in zip( reflections.experiment_identifiers().keys(), reflections.experiment_identifiers().values(), ) } experiments_to_delete = [] exclude_images = [] image_ranges_removed = [] # track for results summary n_removed_this_cycle = 1 while n_removed_this_cycle != 0: other_potential_ids_to_remove = [] n_removed_this_cycle = 0 for id_ in sorted(ids_to_remove): exp_id, image_range = image_group_to_expid_and_range[ id_] # identifier if (expid_to_image_groups[exp_id][-1] == id_ or expid_to_image_groups[exp_id][0] == id_): # is at edge of scan. # loc = list(experiments.identifiers()).index(exp_id) table_id = expid_to_tableid[exp_id] image_ranges_removed.append([image_range, table_id]) logger.info( "Removing image range %s from experiment %s", image_range, table_id, ) exclude_images.append( [f"{table_id}:{image_range[0]}:{image_range[1]}"]) if expid_to_image_groups[exp_id][-1] == id_: del expid_to_image_groups[exp_id][-1] else: del expid_to_image_groups[exp_id][0] n_removed_this_cycle += 1 else: other_potential_ids_to_remove.append(id_) ids_to_remove = other_potential_ids_to_remove for id_ in other_potential_ids_to_remove: exp_id, image_range = image_group_to_expid_and_range[id_] table_id = expid_to_tableid[exp_id] logger.info( """Image range %s from experiment %s is below the cutoff, but not at the edge of a sweep.""", image_range, table_id, ) # Now remove individual batches if -1 in reflections["id"]: reflections = reflections.select(reflections["id"] != -1) reflection_list = reflections.split_by_experiment_id() reflection_list, experiments = exclude_image_ranges_for_scaling( reflection_list, experiments, exclude_images) # check if any image groups were all outliers and missed by the analysis # This catches an edge case where there is an image group full of # outliers, which gets filtered out before the analysis but should # be set as not a valid image range. exclude_images = [] for exp in experiments: # if any of the image ranges are not in the sets tested, exclude them tested = [] for exp_id, imgrange in image_group_to_expid_and_range.values(): if exp_id == exp.identifier: tested.extend(list(range(imgrange[0], imgrange[1] + 1))) for imgrange in exp.scan.get_valid_image_ranges(exp.identifier): if all([ j not in tested for j in range(imgrange[0], imgrange[1] + 1) ]): table_id = expid_to_tableid[exp.identifier] exclude_images.append( [f"{table_id}:{imgrange[0]}:{imgrange[1]}"]) logger.info("Removing %s due to scaling outlier group.", exclude_images[-1]) if exclude_images: reflection_list, experiments = exclude_image_ranges_for_scaling( reflection_list, experiments, exclude_images) # if a whole experiment has been excluded: need to remove it here ids_removed = [] for exp, refl in zip(experiments, reflection_list): if not exp.scan.get_valid_image_ranges( exp.identifier): # if all removed above experiments_to_delete.append(exp.identifier) ids_removed.append(refl.experiment_identifiers().keys()[0]) if experiments_to_delete: experiments, reflection_list = select_datasets_on_identifiers( experiments, reflection_list, exclude_datasets=experiments_to_delete) assert len(reflection_list) == len(experiments) output_reflections = flex.reflection_table() for r in reflection_list: output_reflections.extend(r) n_valid_filtered_reflections = output_reflections.get_flags( output_reflections.flags.scaled).count(True) results_summary["dataset_removal"].update({ "image_ranges_removed": image_ranges_removed, "experiments_fully_removed": experiments_to_delete, "experiment_ids_fully_removed": ids_removed, "n_reflections_removed": n_valid_reflections - n_valid_filtered_reflections, }) return output_reflections
def remove_image_ranges_below_cutoff( experiments, reflections, ids_to_remove, image_group_to_expid_and_range, expid_to_image_groups, results_summary, ): """Remove image ranges from the datasets.""" n_valid_reflections = reflections.get_flags( reflections.flags.bad_for_scaling, all=False).count(False) experiments_to_delete = [] exclude_images = [] image_ranges_removed = [] # track for results summary n_removed_this_cycle = 1 while n_removed_this_cycle != 0: other_potential_ids_to_remove = [] n_removed_this_cycle = 0 for id_ in sorted(ids_to_remove): exp_id, image_range = image_group_to_expid_and_range[ id_] # numerical id identifier = reflections.experiment_identifiers()[exp_id] if expid_to_image_groups[exp_id][-1] == id_: # is last group image_ranges_removed.append([image_range, exp_id]) logger.info( "Removing image range %s from experiment %s", image_range, identifier, ) exclude_images.append([ identifier + ":" + str(image_range[0]) + ":" + str(image_range[1]) ]) del expid_to_image_groups[exp_id][-1] n_removed_this_cycle += 1 else: other_potential_ids_to_remove.append(id_) ids_to_remove = other_potential_ids_to_remove for id_ in other_potential_ids_to_remove: exp_id, image_range = image_group_to_expid_and_range[id_] identifier = reflections.experiment_identifiers()[exp_id] logger.info( """Image range %s from experiment %s is below the cutoff, but not at the end of a sweep.""", image_range, identifier, ) # Now remove individual batches if -1 in reflections["id"]: reflections = reflections.select(reflections["id"] != -1) reflection_list = reflections.split_by_experiment_id() reflection_list, experiments = exclude_image_ranges_for_scaling( reflection_list, experiments, exclude_images) # if a whole experiment has been excluded: need to remove it here for exp in experiments: if not exp.scan.get_valid_image_ranges( exp.identifier): # if all removed above experiments_to_delete.append(exp.identifier) if experiments_to_delete: experiments, reflection_list = select_datasets_on_ids( experiments, reflection_list, exclude_datasets=experiments_to_delete) assert len(reflection_list) == len(experiments) output_reflections = flex.reflection_table() for r in reflection_list: output_reflections.extend(r) n_valid_filtered_reflections = output_reflections.get_flags( output_reflections.flags.bad_for_scaling, all=False).count(False) results_summary["dataset_removal"].update({ "image_ranges_removed": image_ranges_removed, "experiments_fully_removed": experiments_to_delete, "n_reflections_removed": n_valid_reflections - n_valid_filtered_reflections, }) return output_reflections
def prepare_input(params, experiments, reflections): """Perform checks on the data and prepare the data for scaling. Raises: ValueError - a range of checks are made, a ValueError may be raised for a number of reasons. """ #### First exclude any datasets, before the dataset is split into #### individual reflection tables and expids set. if (params.dataset_selection.exclude_datasets or params.dataset_selection.use_datasets): experiments, reflections = select_datasets_on_ids( experiments, reflections, params.dataset_selection.exclude_datasets, params.dataset_selection.use_datasets, ) ids = flex.size_t() for r in reflections: ids.extend(r.experiment_identifiers().keys()) logger.info( "\nDataset ids for retained datasets are: %s \n", ",".join(str(i) for i in ids), ) #### Split the reflections tables into a list of reflection tables, #### with one table per experiment. logger.info("Checking for the existence of a reflection table \n" "containing multiple datasets \n") reflections = parse_multiple_datasets(reflections) logger.info( "Found %s reflection tables & %s experiments in total.", len(reflections), len(experiments), ) if len(experiments) != len(reflections): raise ValueError( "Mismatched number of experiments and reflection tables found.") #### Assign experiment identifiers. experiments, reflections = assign_unique_identifiers( experiments, reflections) ids = itertools.chain.from_iterable(r.experiment_identifiers().keys() for r in reflections) logger.info("\nDataset ids are: %s \n", ",".join(str(i) for i in ids)) for r in reflections: r.unset_flags(flex.bool(len(r), True), r.flags.bad_for_scaling) r.unset_flags(flex.bool(r.size(), True), r.flags.scaled) reflections, experiments = exclude_image_ranges_for_scaling( reflections, experiments, params.exclude_images) #### Allow checking of consistent indexing, useful for #### targeted / incremental scaling. if params.scaling_options.check_consistent_indexing: logger.info("Running dials.cosym to check consistent indexing:\n") cosym_params = cosym_phil_scope.extract() cosym_params.nproc = params.scaling_options.nproc cosym_instance = cosym(experiments, reflections, cosym_params) cosym_instance.run() experiments = cosym_instance.experiments reflections = cosym_instance.reflections logger.info("Finished running dials.cosym, continuing with scaling.\n") #### Make sure all experiments in same space group sgs = [ expt.crystal.get_space_group().type().number() for expt in experiments ] if len(set(sgs)) > 1: raise ValueError("""The experiments have different space groups: space group numbers found: %s Please reanalyse the data so that space groups are consistent, (consider using dials.reindex, dials.symmetry or dials.cosym) or remove incompatible experiments (using the option exclude_datasets=)""" % ", ".join(map(str, set(sgs)))) logger.info( "Space group being used during scaling is %s", experiments[0].crystal.get_space_group().info(), ) #### If doing targeted scaling, extract data and append an experiment #### and reflection table to the lists if params.scaling_options.target_model: logger.info("Extracting data from structural model.") exp, reflection_table = create_datastructures_for_structural_model( reflections, experiments, params.scaling_options.target_model) experiments.append(exp) reflections.append(reflection_table) elif params.scaling_options.target_mtz: logger.info("Extracting data from merged mtz.") exp, reflection_table = create_datastructures_for_target_mtz( experiments, params.scaling_options.target_mtz) experiments.append(exp) reflections.append(reflection_table) #### Perform any non-batch cutting of the datasets, including the target dataset best_unit_cell = params.reflection_selection.best_unit_cell if best_unit_cell is None: best_unit_cell = determine_best_unit_cell(experiments) for reflection in reflections: if params.cut_data.d_min or params.cut_data.d_max: d = best_unit_cell.d(reflection["miller_index"]) if params.cut_data.d_min: sel = d < params.cut_data.d_min reflection.set_flags(sel, reflection.flags.user_excluded_in_scaling) if params.cut_data.d_max: sel = d > params.cut_data.d_max reflection.set_flags(sel, reflection.flags.user_excluded_in_scaling) if params.cut_data.partiality_cutoff and "partiality" in reflection: reflection.set_flags( reflection["partiality"] < params.cut_data.partiality_cutoff, reflection.flags.user_excluded_in_scaling, ) return params, experiments, reflections
def run(args): usage = ("xia2.multiplex [options] [param.phil] " "models1.expt models2.expt observations1.refl " "observations2.refl...") # Create the parser parser = OptionParser( usage=usage, phil=phil_scope, read_reflections=True, read_experiments=True, check_format=False, epilog=help_message, ) # Parse the command line params, options = parser.parse_args(args=args, show_diff_phil=False) # Configure the logging xia2.Handlers.Streams.setup_logging(logfile=params.output.log, verbose=options.verbose) logger.info(dials_version()) # Log the diff phil diff_phil = parser.diff_phil.as_str() if diff_phil != "": logger.info("The following parameters have been modified:\n") logger.info(diff_phil) # Try to load the models and data if len(params.input.experiments) == 0: logger.info("No Experiments found in the input") parser.print_help() return if len(params.input.reflections) == 0: logger.info("No reflection data found in the input") parser.print_help() return try: assert len(params.input.reflections) == len(params.input.experiments) except AssertionError: raise sys.exit( "The number of input reflections files does not match the " "number of input experiments") if params.seed is not None: flex.set_random_seed(params.seed) random.seed(params.seed) experiments = flatten_experiments(params.input.experiments) reflections = flatten_reflections(params.input.reflections) if len(experiments) < 2: sys.exit("xia2.multiplex requires a minimum of two experiments") reflections = parse_multiple_datasets(reflections) experiments, reflections = assign_unique_identifiers( experiments, reflections) reflections, experiments = exclude_image_ranges_for_scaling( reflections, experiments, params.exclude_images) reflections_all = flex.reflection_table() assert len(reflections) == 1 or len(reflections) == len(experiments) for i, (expt, refl) in enumerate(zip(experiments, reflections)): reflections_all.extend(refl) reflections_all.assert_experiment_identifiers_are_consistent(experiments) if params.identifiers is not None: identifiers = [] for identifier in params.identifiers: identifiers.extend(identifier.split(",")) params.identifiers = identifiers try: ScaleAndMerge.MultiCrystalScale(experiments, reflections_all, params) except ValueError as e: sys.exit(str(e))