def _filter_min_reflections(self, experiments, reflections): identifiers = [] for expt, refl in zip(experiments, reflections): if len(refl) >= self.params.min_reflections: identifiers.append(expt.identifier) return select_datasets_on_identifiers(experiments, reflections, use_datasets=identifiers)
def _filter_min_reflections(self, experiments, reflections, uuid_cache_in): identifiers = [] self.uuid_cache = [] for expt, refl, uuid in zip(experiments, reflections, uuid_cache_in): if len(refl) >= self.params.min_reflections: identifiers.append(expt.identifier) self.uuid_cache.append(uuid) return select_datasets_on_identifiers(experiments, reflections, use_datasets=identifiers)
def remove_image_ranges_below_cutoff( experiments, reflections, ids_to_remove, image_group_to_expid_and_range, expid_to_image_groups, results_summary, ): """Remove image ranges from the datasets.""" n_valid_reflections = reflections.get_flags( reflections.flags.scaled).count(True) expid_to_tableid = { v: k for k, v in zip( reflections.experiment_identifiers().keys(), reflections.experiment_identifiers().values(), ) } experiments_to_delete = [] exclude_images = [] image_ranges_removed = [] # track for results summary n_removed_this_cycle = 1 while n_removed_this_cycle != 0: other_potential_ids_to_remove = [] n_removed_this_cycle = 0 for id_ in sorted(ids_to_remove): exp_id, image_range = image_group_to_expid_and_range[ id_] # identifier if (expid_to_image_groups[exp_id][-1] == id_ or expid_to_image_groups[exp_id][0] == id_): # is at edge of scan. # loc = list(experiments.identifiers()).index(exp_id) table_id = expid_to_tableid[exp_id] image_ranges_removed.append([image_range, table_id]) logger.info( "Removing image range %s from experiment %s", image_range, table_id, ) exclude_images.append( [f"{table_id}:{image_range[0]}:{image_range[1]}"]) if expid_to_image_groups[exp_id][-1] == id_: del expid_to_image_groups[exp_id][-1] else: del expid_to_image_groups[exp_id][0] n_removed_this_cycle += 1 else: other_potential_ids_to_remove.append(id_) ids_to_remove = other_potential_ids_to_remove for id_ in other_potential_ids_to_remove: exp_id, image_range = image_group_to_expid_and_range[id_] table_id = expid_to_tableid[exp_id] logger.info( """Image range %s from experiment %s is below the cutoff, but not at the edge of a sweep.""", image_range, table_id, ) # Now remove individual batches if -1 in reflections["id"]: reflections = reflections.select(reflections["id"] != -1) reflection_list = reflections.split_by_experiment_id() reflection_list, experiments = exclude_image_ranges_for_scaling( reflection_list, experiments, exclude_images) # check if any image groups were all outliers and missed by the analysis # This catches an edge case where there is an image group full of # outliers, which gets filtered out before the analysis but should # be set as not a valid image range. exclude_images = [] for exp in experiments: # if any of the image ranges are not in the sets tested, exclude them tested = [] for exp_id, imgrange in image_group_to_expid_and_range.values(): if exp_id == exp.identifier: tested.extend(list(range(imgrange[0], imgrange[1] + 1))) for imgrange in exp.scan.get_valid_image_ranges(exp.identifier): if all([ j not in tested for j in range(imgrange[0], imgrange[1] + 1) ]): table_id = expid_to_tableid[exp.identifier] exclude_images.append( [f"{table_id}:{imgrange[0]}:{imgrange[1]}"]) logger.info("Removing %s due to scaling outlier group.", exclude_images[-1]) if exclude_images: reflection_list, experiments = exclude_image_ranges_for_scaling( reflection_list, experiments, exclude_images) # if a whole experiment has been excluded: need to remove it here ids_removed = [] for exp, refl in zip(experiments, reflection_list): if not exp.scan.get_valid_image_ranges( exp.identifier): # if all removed above experiments_to_delete.append(exp.identifier) ids_removed.append(refl.experiment_identifiers().keys()[0]) if experiments_to_delete: experiments, reflection_list = select_datasets_on_identifiers( experiments, reflection_list, exclude_datasets=experiments_to_delete) assert len(reflection_list) == len(experiments) output_reflections = flex.reflection_table() for r in reflection_list: output_reflections.extend(r) n_valid_filtered_reflections = output_reflections.get_flags( output_reflections.flags.scaled).count(True) results_summary["dataset_removal"].update({ "image_ranges_removed": image_ranges_removed, "experiments_fully_removed": experiments_to_delete, "experiment_ids_fully_removed": ids_removed, "n_reflections_removed": n_valid_reflections - n_valid_filtered_reflections, }) return output_reflections
def __init__(self, experiments, reflections, uuid_cache_in, params=None, do_plot=False, i_plot=None, output_dir=None): super(dials_cl_cosym_wrapper, self).__init__( events=["run_cosym", "performed_unit_cell_clustering"]) if params is None: params = phil_scope.extract() self.params = params self._reflections = [] for refl, expt in zip(reflections, experiments): sel = get_selection_for_valid_image_ranges(refl, expt) self._reflections.append(refl.select(sel)) self._experiments, self._reflections = self._filter_min_reflections( experiments, self._reflections, uuid_cache_in) self.ids_to_identifiers_map = {} for table in self._reflections: self.ids_to_identifiers_map.update(table.experiment_identifiers()) self.identifiers_to_ids_map = { value: key for key, value in self.ids_to_identifiers_map.items() } if len(self._experiments) > 1: # perform unit cell clustering identifiers = self._unit_cell_clustering(self._experiments) if len(identifiers) < len(self._experiments): logger.info( "Selecting subset of %i datasets for cosym analysis: %s" % (len(identifiers), str(identifiers))) self._experiments, self._reflections = select_datasets_on_identifiers( self._experiments, self._reflections, use_datasets=identifiers) self.uuid_cache = [ self.uuid_cache[int(id)] for id in identifiers ] # Map experiments and reflections to minimum cell cb_ops = change_of_basis_ops_to_minimum_cell( self._experiments, params.lattice_symmetry_max_delta, params.relative_length_tolerance, params.absolute_angle_tolerance, ) in_cb_ops = len(cb_ops) exclude = [ expt.identifier for expt, cb_op in zip(self._experiments, cb_ops) if not cb_op ] if len(exclude): logger.info( "Rejecting {} datasets from cosym analysis "\ "(couldn't determine consistent cb_op to minimum cell):\n"\ "{}".format(len(exclude), exclude) ) self._experiments, self._reflections = select_datasets_on_identifiers( self._experiments, self._reflections, exclude_datasets=exclude) cb_ops = list(filter(None, cb_ops)) ex_cb_ops = len(cb_ops) #Normally we expect that all the cb_ops are the same (applicable for PSI with P63) assertion_dict = {} for cb_op in cb_ops: key_ = cb_op.as_hkl() assertion_dict[key_] = assertion_dict.get(key_, 0) assertion_dict[key_] += 1 if len(assertion_dict) != 1: # unexpected, there is normally only 1 cb operator to minimum cell from libtbx.mpi4py import MPI mpi_rank = MPI.COMM_WORLD.Get_rank() mpi_size = MPI.COMM_WORLD.Get_size() print( "RANK %02d, # experiments %d, after exclusion %d, unexpectedly there are %d unique cb_ops: %s" % (mpi_rank, in_cb_ops, ex_cb_ops, len(assertion_dict), ", ".join([ "%s:%d" % (key, assertion_dict[key]) for key in assertion_dict ]))) # revisit with different use cases later # In fact we need all cb_ops to match because the user might supply # a custom reindexing operator and we need to consistently tranform # it from the conventional basis into the minimum basis. Therefore, # force them all to match, but make sure user is aware. if not params.single_cb_op_to_minimum: raise RuntimeError( 'There are >1 different cb_ops to minimum and \ cosym.single_cb_op_to_minimum is not True') else: best_cb_op_str = max(assertion_dict, key=assertion_dict.get) best_cb_op = None for cb_op in cb_ops: if cb_op.as_hkl() == best_cb_op_str: best_cb_op = cb_op break assert best_cb_op is not None cb_ops = [best_cb_op] * len(cb_ops) self.cb_op_to_minimum = cb_ops # Eliminate reflections that are systematically absent due to centring # of the lattice, otherwise they would lead to non-integer miller indices # when reindexing to a primitive setting self._reflections = eliminate_sys_absent(self._experiments, self._reflections) self._experiments, self._reflections = apply_change_of_basis_ops( self._experiments, self._reflections, cb_ops) # transform models into miller arrays datasets = filtered_arrays_from_experiments_reflections( self.experiments, self.reflections, outlier_rejection_after_filter=False, partiality_threshold=params.partiality_threshold, ) datasets = [ ma.as_anomalous_array().merge_equivalents().array() for ma in datasets ] # opportunity here to subclass as defined above, instead of the dials-implemented version self.cosym_analysis = CosymAnalysis( datasets, self.params, do_plot=do_plot, i_plot=i_plot, plot_fname=self.params.plot.filename, plot_format=self.params.plot.format, output_dir=output_dir, cb_op=cb_ops[0])
def __init__(self, experiments, reflections, params=None): super(cosym, self).__init__( events=["run_cosym", "performed_unit_cell_clustering"]) if params is None: params = phil_scope.extract() self.params = params self._reflections = [] for refl, expt in zip(reflections, experiments): sel = get_selection_for_valid_image_ranges(refl, expt) self._reflections.append(refl.select(sel)) self._experiments, self._reflections = self._filter_min_reflections( experiments, self._reflections) self.ids_to_identifiers_map = {} for table in self._reflections: self.ids_to_identifiers_map.update(table.experiment_identifiers()) self.identifiers_to_ids_map = { value: key for key, value in self.ids_to_identifiers_map.items() } if len(self._experiments) > 1: # perform unit cell clustering identifiers = self._unit_cell_clustering(self._experiments) if len(identifiers) < len(self._experiments): logger.info( "Selecting subset of %i datasets for cosym analysis: %s", len(identifiers), str(identifiers), ) self._experiments, self._reflections = select_datasets_on_identifiers( self._experiments, self._reflections, use_datasets=identifiers) # Map experiments and reflections to minimum cell cb_ops = change_of_basis_ops_to_minimum_cell( self._experiments, params.lattice_symmetry_max_delta, params.relative_length_tolerance, params.absolute_angle_tolerance, ) exclude = [ expt.identifier for expt, cb_op in zip(self._experiments, cb_ops) if not cb_op ] if len(exclude): logger.info( f"Rejecting {len(exclude)} datasets from cosym analysis " f"(couldn't determine consistent cb_op to minimum cell):\n" f"{exclude}", ) self._experiments, self._reflections = select_datasets_on_identifiers( self._experiments, self._reflections, exclude_datasets=exclude) cb_ops = list(filter(None, cb_ops)) # Eliminate reflections that are systematically absent due to centring # of the lattice, otherwise they would lead to non-integer miller indices # when reindexing to a primitive setting self._reflections = eliminate_sys_absent(self._experiments, self._reflections) self._experiments, self._reflections = apply_change_of_basis_ops( self._experiments, self._reflections, cb_ops) # transform models into miller arrays datasets = filtered_arrays_from_experiments_reflections( self.experiments, self.reflections, outlier_rejection_after_filter=False, partiality_threshold=params.partiality_threshold, ) datasets = [ ma.as_anomalous_array().merge_equivalents().array() for ma in datasets ] self.cosym_analysis = CosymAnalysis(datasets, self.params)
def __init__(self, experiments, reflections, uuid_cache_in, params=None, do_plot=False, i_plot=None, output_dir=None): super(dials_cl_cosym_wrapper, self).__init__( events=["run_cosym", "performed_unit_cell_clustering"]) if params is None: params = phil_scope.extract() self.params = params self._reflections = [] for refl, expt in zip(reflections, experiments): sel = get_selection_for_valid_image_ranges(refl, expt) self._reflections.append(refl.select(sel)) self._experiments, self._reflections = self._filter_min_reflections( experiments, self._reflections, uuid_cache_in) self.ids_to_identifiers_map = {} for table in self._reflections: self.ids_to_identifiers_map.update(table.experiment_identifiers()) self.identifiers_to_ids_map = { value: key for key, value in self.ids_to_identifiers_map.items() } if len(self._experiments) > 1: # perform unit cell clustering identifiers = self._unit_cell_clustering(self._experiments) if len(identifiers) < len(self._experiments): logger.info( "Selecting subset of %i datasets for cosym analysis: %s" % (len(identifiers), str(identifiers))) self._experiments, self._reflections = select_datasets_on_identifiers( self._experiments, self._reflections, use_datasets=identifiers) # Map experiments and reflections to minimum cell cb_ops = change_of_basis_ops_to_minimum_cell( self._experiments, params.lattice_symmetry_max_delta, params.relative_length_tolerance, params.absolute_angle_tolerance, ) exclude = [ expt.identifier for expt, cb_op in zip(self._experiments, cb_ops) if not cb_op ] if len(exclude): logger.info( "Rejecting {} datasets from cosym analysis "\ "(couldn't determine consistent cb_op to minimum cell):\n"\ "{}".format(len(exclude), exclude) ) self._experiments, self._reflections = select_datasets_on_identifiers( self._experiments, self._reflections, exclude_datasets=exclude) cb_ops = list(filter(None, cb_ops)) # Eliminate reflections that are systematically absent due to centring # of the lattice, otherwise they would lead to non-integer miller indices # when reindexing to a primitive setting self._reflections = eliminate_sys_absent(self._experiments, self._reflections) self._experiments, self._reflections = apply_change_of_basis_ops( self._experiments, self._reflections, cb_ops) # transform models into miller arrays datasets = filtered_arrays_from_experiments_reflections( self.experiments, self.reflections, outlier_rejection_after_filter=False, partiality_threshold=params.partiality_threshold, ) datasets = [ ma.as_anomalous_array().merge_equivalents().array() for ma in datasets ] # opportunity here to subclass as defined above, instead of the dials-implemented version self.cosym_analysis = CosymAnalysis( datasets, self.params, do_plot=do_plot, i_plot=i_plot, plot_fname=self.params.plot.filename, plot_format=self.params.plot.format, output_dir=output_dir) #Fixed in subclass: parent class apparently erases the knowledge of input-to-minimum cb_ops. # without storing the op in self, we can never trace back to input setting. self.cb_op_to_minimum = cb_ops #Not sure yet, we may be assuming that all the cb_ops are the same (applicable for PSI with P63) assertion_set = set(cb_ops) assert len( assertion_set ) == 1 # guarantees all are the same; revisit with different use cases later