def save_opt_det(phil_params, x, ref_params, SIM): opt_det = get_optimized_detector(x, ref_params, SIM) El = ExperimentList() E = Experiment() E.detector = opt_det El.append(E) El.as_file(phil_params.geometry.optimized_detector_name) print("Saved detector model to %s" % phil_params.geometry.optimized_detector_name)
class construct_reflection_table_and_experiment_list(object): def __init__(self, pickle, img_location, pixel_size, proceed_without_image=False): # unpickle pickle file and keep track of image location if img_location is None: if proceed_without_image: self.img_location = [] else: raise Sorry( "No image found at specified location. Override by setting proceed_without_image to False" + "to produce experiment lists that may only be read when check_format is False." ) else: self.img_location = [img_location] # pickle can be already unpickled, if so, loading it will fail with an AttributeError. A load # error will fail with EOFError try: self.data = easy_pickle.load(pickle) except EOFError: self.data = None self.pickle = None except AttributeError: self.data = pickle self.pickle = None else: self.pickle = pickle if self.data is not None: self.length = len(self.data['observations'][0].data()) self.pixel_size = pixel_size # extract things from pickle file def unpack_pickle(self): """Extract all relevant information from an integration pickle file.""" # crystal-dependent self.ori = self.data['current_orientation'][0] self.ucell = self.data['current_orientation'][0].unit_cell() # experiment-dependent self.wavelength = self.data['wavelength'] self.det_dist = self.data['distance'] # observation-dependent self.observations = self.data['observations'][0] self.predictions = self.data['mapped_predictions'][0] if 'fuller_kapton_absorption_correction' in self.data: self.fuller_correction = self.data[ 'fuller_kapton_absorption_correction'][0] if 'fuller_kapton_absorption_correction_sigmas' in self.data: self.fuller_correction_sigmas = self.data[ 'fuller_kapton_absorption_correction_sigmas'][0] # construct the experiments and experiment_list objects def expt_beam_maker(self): """Construct the beam object for the experiments file.""" self.beam = BeamFactory.simple(self.wavelength) def expt_crystal_maker(self): """Construct the crystal object for the experiments file.""" a, b, c = self.ucell.parameters()[0:3] direct_matrix = self.ori.direct_matrix() real_a = direct_matrix[0:3] real_b = direct_matrix[3:6] real_c = direct_matrix[6:9] lattice = self.ucell.lattice_symmetry_group() found_it = False if 'ML_half_mosaicity_deg' in self.data: assert 'ML_domain_size_ang' in self.data if d['ML_half_mosaicity_deg'][0] is None or d[ 'ML_domain_size_ang'][0] is None: assert d['ML_half_mosaicity_deg'][0] is None and d[ 'ML_domain_size_ang'][0] is None else: found_it = True if 'mosaicity' in self.data and self.data['mosaicity'] > 0: print( "Warning, two kinds of mosaicity found. Using Sauter2014 model" ) from dxtbx.model import MosaicCrystalSauter2014 self.crystal = MosaicCrystalSauter2014(real_a, real_b, real_c, space_group=lattice) self.crystal.set_half_mosaicity_deg( self.data['ML_half_mosaicity_deg'][0]) self.crystal.set_domain_size_ang( self.data['ML_domain_size_ang'][0]) if not found_it: if 'mosaicity' in self.data: from dxtbx.model import MosaicCrystalKabsch2010 self.crystal = MosaicCrystalKabsch2010(real_a, real_b, real_c, space_group=lattice) self.crystal.set_mosaicity(self.data['mosaicity']) else: from dxtbx.model import Crystal self.crystal = Crystal(real_a, real_b, real_c, space_group=lattice) if 'identified_isoform' in self.data and self.data[ 'identified_isoform'] is not None: self.crystal.identified_isoform = self.data['identified_isoform'] def expt_detector_maker(self): """Construct the detector object for the experiments file. This function generates a monolithic flattening of the CSPAD detector if not supplied with an image file.""" self.distance = self.data['distance'] self.xbeam, self.ybeam = self.data['xbeam'], self.data['ybeam'] if len(self.img_location) > 0 and not dxtbx.load( self.img_location[0])._image_file.endswith("_00000.pickle"): self.detector = dxtbx.load(self.img_location[0])._detector() else: self.detector = DetectorFactory.simple( 'SENSOR_UNKNOWN', self.distance, (self.xbeam, self.ybeam), '+x', '-y', (self.pixel_size, self.pixel_size), (1765, 1765)) def expt_gonio_maker(self): """XFEL data consisting of stills is expected to have been generated by an experiment without a goniometer -- use placeholder None.""" self.goniometer = None def expt_imageset_maker(self): """Construct the imageset object for the experiments file.""" if len(self.img_location) == 0: self.imageset = None return self.filename = self.img_location self.format = FormatMultiImage.FormatMultiImage() self.reader = imageset.MultiFileReader(self.format, self.filename) self.imageset = imageset.ImageSet(self.reader) def expt_scan_maker(self): """XFEL data consisting of stills is expected not to contain scans -- use placeholder None.""" self.scan = None def assemble_experiments(self): self.unpack_pickle() self.expt_beam_maker() self.expt_crystal_maker() self.expt_detector_maker() self.expt_gonio_maker() self.expt_imageset_maker() self.expt_scan_maker() self.experiment = Experiment(beam=self.beam, crystal=self.crystal, detector=self.detector, goniometer=self.goniometer, imageset=self.imageset, scan=self.scan) self.experiment_list = ExperimentList([self.experiment]) def experiments_to_json(self, path_name=None): if path_name is None: loc = os.path.dirname(self.pickle) else: loc = path_name name = os.path.basename(self.pickle).split(".pickle")[0] expt_name = int_pickle_to_filename(name, "idx-", ".expt") experiments = os.path.join(loc, expt_name) self.experiment_list.as_file(experiments) # construct the reflection table def refl_table_maker(self): self.reflections = flex.reflection_table() def refl_bkgd_maker(self): self.reflections['background.mean'] = sciflex.double(self.length) self.reflections['background.mse'] = sciflex.double(self.length) def refl_bbox_maker(self): self.reflections['bbox'] = flex.int6(self.length) def refl_correlation_maker(self): self.reflections['correlation.ideal.profile'] = sciflex.double( self.length) def refl_entering_maker(self): self.reflections['entering'] = flex.bool(self.length) def refl_flags_maker(self): self.reflections['flags'] = flex.size_t(self.length, 1) def refl_id_maker(self): self.reflections['id'] = sciflex.size_t(self.length) def refl_intensities_maker(self): self.reflections['intensity.sum.value'] = self.observations.data() self.reflections['intensity.sum.variance'] = self.observations.sigmas( )**2 def refl_lp_maker(self): self.reflections['lp'] = sciflex.double(self.length) def refl_millers_maker(self): self.reflections['miller_index'] = self.observations._indices def refl_nbackgroundforeground_maker(self): self.reflections['n_background'] = sciflex.size_t(self.length) self.reflections['n_foreground'] = sciflex.size_t(self.length) def refl_panel_maker(self): self.reflections['panel'] = sciflex.size_t(self.length, 0) def refl_profile_maker(self): self.reflections['profile.correlation'] = sciflex.double(self.length) def refl_s1_maker(self): from scitbx.matrix import col self.reflections['s1'] = sciflex.vec3_double(self.length) for idx in range(self.length): coords = col(self.detector[0].get_pixel_lab_coord( self.reflections['xyzobs.px.value'][idx][0:2])).normalize() self.reflections['s1'][idx] = tuple(coords) def refl_xyzcal_maker(self): self.reflections['xyzcal.px'] = sciflex.vec3_double( self.predictions.parts()[1], self.predictions.parts()[0], sciflex.double(self.length, 0.0)) self.reflections[ 'xyzcal.mm'] = self.pixel_size * self.reflections['xyzcal.px'] def refl_xyzobs_maker(self): self.reflections['xyzobs.px.value'] = sciflex.vec3_double( self.predictions.parts()[1], self.predictions.parts()[0], sciflex.double(self.length, 0.5)) self.reflections['xyzobs.px.variance'] = sciflex.vec3_double( self.length, (0.0, 0.0, 0.0)) def refl_zeta_maker(self): self.reflections['zeta'] = sciflex.double(self.length) def refl_kapton_correction_maker(self): if hasattr(self, 'fuller_correction'): self.reflections[ 'kapton_absorption_correction'] = self.fuller_correction if hasattr(self, 'fuller_correction_sigmas'): self.reflections[ 'kapton_absorption_correction_sigmas'] = self.fuller_correction_sigmas def assemble_reflections(self): self.refl_table_maker() self.refl_bkgd_maker() self.refl_bbox_maker() self.refl_correlation_maker() self.refl_entering_maker() self.refl_flags_maker() self.refl_id_maker() self.refl_intensities_maker() self.refl_millers_maker() self.refl_nbackgroundforeground_maker() self.refl_panel_maker() self.refl_xyzcal_maker() self.refl_xyzobs_maker() self.refl_zeta_maker() self.refl_kapton_correction_maker() self.refl_s1_maker( ) # depends on successful completion of refl_xyz_obs_maker def reflections_to_pickle(self, path_name=None): if path_name is None: loc = os.path.dirname(self.pickle) else: loc = path_name name = os.path.basename(self.pickle).split(".refl")[0] refl_name = int_pickle_to_filename(name, "idx-", "_integrated.refl") reflections = os.path.join(loc, refl_name) self.reflections.as_pickle(reflections)
class DataManager: def __init__(self, experiments, reflections): self._input_experiments = experiments self._input_reflections = reflections self._experiments = copy.deepcopy(experiments) self._reflections = copy.deepcopy(reflections) self.ids_to_identifiers_map = dict( self._reflections.experiment_identifiers()) self.identifiers_to_ids_map = { value: key for key, value in self.ids_to_identifiers_map.items() } self._set_batches() def _set_batches(self): max_batches = max(e.scan.get_image_range()[1] for e in self._experiments) max_batches += 10 # allow some head room n = int(math.ceil(math.log10(max_batches))) for i, expt in enumerate(self._experiments): expt.scan.set_batch_offset(i * 10**n) # This may be a different scan instance ¯\_(ツ)_/¯ expt.imageset.get_scan().set_batch_offset( expt.scan.get_batch_offset()) logger.debug( f"{expt.scan.get_batch_offset()} {expt.scan.get_batch_range()}" ) @property def experiments(self): return self._experiments @experiments.setter def experiments(self, experiments): self._experiments = experiments @property def reflections(self): return self._reflections @reflections.setter def reflections(self, reflections): self._reflections = reflections def select(self, experiment_identifiers): self._experiments = ExperimentList([ expt for expt in self._experiments if expt.identifier in experiment_identifiers ]) self.reflections = self.reflections.select_on_experiment_identifiers( experiment_identifiers) self.reflections.reset_ids() self.reflections.assert_experiment_identifiers_are_consistent( self.experiments) def filter_dose(self, dose_min, dose_max): from dials.command_line.slice_sequence import ( slice_experiments, slice_reflections, ) image_range = [( max(dose_min, expt.scan.get_image_range()[0]), min(dose_max, expt.scan.get_image_range()[1]), ) for expt in self._experiments] n_refl_before = self._reflections.size() self._experiments = slice_experiments(self._experiments, image_range) flex.min_max_mean_double( self._reflections["xyzobs.px.value"].parts()[2]).show() self._reflections = slice_reflections(self._reflections, image_range) flex.min_max_mean_double( self._reflections["xyzobs.px.value"].parts()[2]).show() logger.info( "%i reflections out of %i remaining after filtering for dose" % (self._reflections.size(), n_refl_before)) def reflections_as_miller_arrays(self, combined=False): from dials.util.batch_handling import ( # calculate_batch_offsets, # get_batch_ranges, assign_batches_to_reflections, ) from dials.report.analysis import scaled_data_as_miller_array # offsets = calculate_batch_offsets(experiments) reflection_tables = [] for id_ in set(self._reflections["id"]).difference({-1}): reflection_tables.append( self._reflections.select(self._reflections["id"] == id_)) offsets = [expt.scan.get_batch_offset() for expt in self._experiments] reflection_tables = assign_batches_to_reflections( reflection_tables, offsets) if combined: # filter bad refls and negative scales batches = flex.int() scales = flex.double() for r in reflection_tables: sel = ~r.get_flags(r.flags.bad_for_scaling, all=False) sel &= r["inverse_scale_factor"] > 0 batches.extend(r["batch"].select(sel)) scales.extend(r["inverse_scale_factor"].select(sel)) scaled_array = scaled_data_as_miller_array(reflection_tables, self._experiments) batch_array = miller.array(scaled_array, data=batches) scale_array = miller.array(scaled_array, data=scales) return scaled_array, batch_array, scale_array else: scaled_arrays = [] batch_arrays = [] scale_arrays = [] for expt, r in zip(self._experiments, reflection_tables): sel = ~r.get_flags(r.flags.bad_for_scaling, all=False) sel &= r["inverse_scale_factor"] > 0 batches = r["batch"].select(sel) scales = r["inverse_scale_factor"].select(sel) scaled_arrays.append(scaled_data_as_miller_array([r], [expt])) batch_arrays.append( miller.array(scaled_arrays[-1], data=batches)) scale_arrays.append( miller.array(scaled_arrays[-1], data=scales)) return scaled_arrays, batch_arrays, scale_arrays def reindex(self, cb_op, space_group=None): logger.info("Reindexing: %s" % cb_op) self._reflections["miller_index"] = cb_op.apply( self._reflections["miller_index"]) for expt in self._experiments: cryst_reindexed = expt.crystal.change_basis(cb_op) if space_group is not None: cryst_reindexed.set_space_group(space_group) expt.crystal.update(cryst_reindexed) def export_reflections(self, filename, d_min=None): reflections = self._reflections if d_min: reflections = reflections.select(reflections["d"] >= d_min) reflections.as_file(filename) return filename def export_experiments(self, filename): self._experiments.as_file(filename) return filename def export_unmerged_mtz(self, filename, d_min=None): params = export.phil_scope.extract() params.mtz.d_min = d_min params.mtz.hklout = filename params.intensity = ["scale"] export.export_mtz(params, self._experiments, [self._reflections]) def export_merged_mtz(self, filename, d_min=None): params = merge.phil_scope.extract() params.d_min = d_min params.assess_space_group = False mtz_obj = merge.merge_data_to_mtz(params, self._experiments, [self._reflections]) mtz_obj.write(filename)
def update_detector(x, ref_params, SIM, save=None): """ Update the internal geometry of the diffBragg instance :param x: refinement parameters as seen by scipy.optimize (e.g. rescaled floats) :param ref_params: diffBragg.refiners.Parameters (dict of RangedParameters) :param SIM: SIM instance (instance of nanoBragg.sim_data.SimData) :param save: optional name to save the detector """ det = SIM.detector if save is not None: new_det = Detector() for pid in range(len(det)): panel = det[pid] panel_dict = panel.to_dict() group_id = SIM.panel_group_from_id[pid] if group_id not in SIM.panel_groups_refined: fdet = panel.get_fast_axis() sdet = panel.get_slow_axis() origin = panel.get_origin() else: Oang_p = ref_params["group%d_RotOrth" % group_id] Fang_p = ref_params["group%d_RotFast" % group_id] Sang_p = ref_params["group%d_RotSlow" % group_id] Xdist_p = ref_params["group%d_ShiftX" % group_id] Ydist_p = ref_params["group%d_ShiftY" % group_id] Zdist_p = ref_params["group%d_ShiftZ" % group_id] Oang = Oang_p.get_val(x[Oang_p.xpos]) Fang = Fang_p.get_val(x[Fang_p.xpos]) Sang = Sang_p.get_val(x[Sang_p.xpos]) Xdist = Xdist_p.get_val(x[Xdist_p.xpos]) Ydist = Ydist_p.get_val(x[Ydist_p.xpos]) Zdist = Zdist_p.get_val(x[Zdist_p.xpos]) origin_of_rotation = SIM.panel_reference_from_id[pid] SIM.D.reference_origin = origin_of_rotation SIM.D.update_dxtbx_geoms(det, SIM.beam.nanoBragg_constructor_beam, pid, Oang, Fang, Sang, Xdist, Ydist, Zdist, force=False) fdet = SIM.D.fdet_vector sdet = SIM.D.sdet_vector origin = SIM.D.get_origin() if save is not None: panel_dict["fast_axis"] = fdet panel_dict["slow_axis"] = sdet panel_dict["origin"] = origin new_det.add_panel(Panel.from_dict(panel_dict)) if save is not None and COMM.rank == 0: t = time.time() El = ExperimentList() E = Experiment() E.detector = new_det El.append(E) El.as_file(save) t = time.time() - t print("Saved detector model to %s (took %.4f sec)" % (save, t), flush=True)
def write_output_files(Xopt, LMP, Modelers, SIM, params): """ Writes refl and exper files for each experiment modeled during the ensemble refiner :param Xopt: float array of optimized rescaled parameter values :param LMP: simtbx.diffBragg.refiners.parameters.Parameters() object :param Modelers: data modelers (launcher.Modleers :param SIM: instance of sim_data (launcher.SIM) :param params: phil params, simtbx.diffBragg.phil.py """ opt_det = get_optimized_detector(Xopt, LMP, SIM) if params.geometry.pandas_dir is not None and COMM.rank == 0: if not os.path.exists(params.geometry.pandas_dir): os.makedirs(params.geometry.pandas_dir) refdir = os.path.join(params.geometry.pandas_dir, "refls") expdir = os.path.join(params.geometry.pandas_dir, "expts") for dname in [refdir, expdir]: if not os.path.exists(dname): os.makedirs(dname) all_shot_pred_offsets = [] for i_shot in Modelers: Modeler = Modelers[i_shot] # these are in simtbx.diffBragg.refiners.parameters.RangedParameter objects rotX = LMP["rank%d_shot%d_RotXYZ%d" % (COMM.rank, i_shot, 0)] rotY = LMP["rank%d_shot%d_RotXYZ%d" % (COMM.rank, i_shot, 1)] rotZ = LMP["rank%d_shot%d_RotXYZ%d" % (COMM.rank, i_shot, 2)] num_uc_p = len(Modeler.ucell_man.variables) ucell_pars = [ LMP["rank%d_shot%d_Ucell%d" % (COMM.rank, i_shot, i_uc)] for i_uc in range(num_uc_p) ] # convert rotation angles back to radians (thats what the parameters.RangedParamter.get_val method does) rotXYZ = rotX.get_val(Xopt[rotX.xpos]),\ rotY.get_val(Xopt[rotY.xpos]),\ rotZ.get_val(Xopt[rotZ.xpos]) # ucell_man is an instance of # simtbx.diffBragg.refiners.crystal_systems.manager.Manager() # (for the correct xtal system) Modeler.ucell_man.variables = [ p.get_val(Xopt[p.xpos]) for p in ucell_pars ] ucpar = Modeler.ucell_man.unit_cell_parameters new_crystal = hopper_utils.new_cryst_from_rotXYZ_and_ucell( rotXYZ, ucpar, Modeler.E.crystal) new_exp = deepcopy(Modeler.E) new_exp.crystal = new_crystal wave, wt = map(np.array, zip(*Modeler.spectra)) ave_wave = (wave * wt).sum() / wt.sum() new_exp.beam.set_wavelength(ave_wave) new_exp.detector = opt_det Modeler.best_model = model(Xopt, LMP, i_shot, Modeler, SIM, return_model=True) Modeler.best_model_includes_background = True # store the updated per-roi scale factors in the new refl table roi_scale_factor = flex.double(len(Modeler.refls), 1) for roi_id in Modeler.roi_id_unique: p = LMP["rank%d_shot%d_scale_roi%d" % (COMM.rank, i_shot, roi_id)] scale_fac = p.get_val(Xopt[p.xpos]) test_refl_idx = Modeler.refls_idx[roi_id] slc = Modeler.roi_id_slices[roi_id][0] roi_refl_ids = Modeler.all_refls_idx[slc] # NOTE, just a sanity check: assert len(np.unique(roi_refl_ids)) == 1 refl_idx = roi_refl_ids[0] assert test_refl_idx == refl_idx roi_scale_factor[refl_idx] = scale_fac Modeler.refls["scale_factor"] = roi_scale_factor # get the new refls new_refl = hopper_utils.get_new_xycalcs(Modeler, new_exp, old_refl_tag="before_geom_ref") new_refl_fname, refl_ext = os.path.splitext(Modeler.refl_name) new_refl_fname = "rank%d_%s_%s%s" % ( COMM.rank, os.path.basename(new_refl_fname), params.geometry.optimized_results_tag, refl_ext) if not new_refl_fname.endswith(".refl"): new_refl_fname += ".refl" new_refl_fname = os.path.join(params.geometry.pandas_dir, "refls", new_refl_fname) new_refl.as_file(new_refl_fname) shot_pred_offsets = get_dist_from_R(new_refl) all_shot_pred_offsets += list(shot_pred_offsets) new_expt_fname, expt_ext = os.path.splitext(Modeler.exper_name) new_expt_fname = "rank%d_%s_%s%s" % ( COMM.rank, os.path.basename(new_expt_fname), params.geometry.optimized_results_tag, expt_ext) if not new_expt_fname.endswith(".expt"): new_expt_fname += ".expt" new_expt_fname = os.path.join(params.geometry.pandas_dir, "expts", new_expt_fname) new_exp_lst = ExperimentList() new_exp_lst.append(new_exp) new_exp_lst.as_file(new_expt_fname) if params.geometry.pandas_dir is not None: a, b, c, al, be, ga = ucpar ncells_p = [ LMP["rank%d_shot%d_Nabc%d" % (COMM.rank, i_shot, i)] for i in range(3) ] Na, Nb, Nc = [p.get_val(Xopt[p.xpos]) for p in ncells_p] scale_p = LMP["rank%d_shot%d_Scale" % (COMM.rank, i_shot)] scale = scale_p.get_val(Xopt[scale_p.xpos]) _, fluxes = zip(*SIM.beam.spectrum) eta_a = eta_b = eta_c = np.nan df = single_expt_pandas( xtal_scale=scale, Amat=new_crystal.get_A(), ncells_abc=(Na, Nb, Nc), ncells_def=(0, 0, 0), eta_abc=(eta_a, eta_b, eta_c), diff_gamma=(np.nan, np.nan, np.nan), diff_sigma=(np.nan, np.nan, np.nan), detz_shift=0, use_diffuse=params.use_diffuse_models, gamma_miller_units=params.gamma_miller_units, eta=np.nan, rotXYZ=tuple(rotXYZ), ucell_p=(a, b, c, al, be, ga), ucell_p_init=(np.nan, np.nan, np.nan, np.nan, np.nan, np.nan), lam0_lam1=(np.nan, np.nan), spec_file=Modeler.spec_name, spec_stride=params.simulator.spectrum.stride, flux=sum(fluxes), beamsize_mm=SIM.beam.size_mm, orig_exp_name=Modeler.exper_name, opt_exp_name=os.path.abspath(new_expt_fname), spec_from_imageset=params.spectrum_from_imageset, oversample=SIM.D.oversample, opt_det=params.opt_det, stg1_refls=Modeler.refl_name, stg1_img_path=None) pandas_name = os.path.splitext( os.path.basename(new_expt_fname))[0] + ".pkl" pandas_name = os.path.join(params.geometry.pandas_dir, pandas_name) df.to_pickle(pandas_name) modeler_name = pandas_name.replace(".pkl", ".npy") np.save(modeler_name, Modeler) #print("Wrote files %s and %s" % (new_refl_fname, new_expt_fname)) all_shot_pred_offsets = COMM.reduce(all_shot_pred_offsets) if COMM.rank == 0: median_pred_offset = np.median(all_shot_pred_offsets) else: median_pred_offset = None median_pred_offset = COMM.bcast(median_pred_offset) return median_pred_offset
class Screen19(object): """Encapsulates the screening script.""" def __init__(self): # Throughout the pipeline, retain the state of the processing. self.expts = ExperimentList([]) self.refls = flex.reflection_table() # Get some default parameters. These must be extracted from the 'fetched' # PHIL scope, rather than the 'definition' phil scope returned by # iotbx.phil.parse. Confused? Blame PHIL. self.params = phil_scope.fetch(iotbx.phil.parse("")).extract() def _quick_import(self, files): # type: (List[str]) -> bool """ Generate xia2-style templates from file names and attempt a quick import. From each given filename, generate a filename template by substituting a hash character (#) for each numeral in the last contiguous group of numerals before the file extension. For example, the filename `example_01_0001.cbf` becomes `example_01_####.cbf`. Contiguous image ranges are recorded by associating the start and end image number of the range with the relevant filename template. dials.import is then run with options to extrapolate header information from the first image file, thereby running more quickly than reading each image header individually. Args: files: List of image filenames. Returns: Boolean flag indicating whether the quick import has succeeded. """ if len(files) == 1: # No point in quick-importing a single file return False debug("Attempting quick import...") files.sort() templates = {} # type: Dict[str, List[Optional[List[int]]]] for f in files: template, image = screen19.make_template(f) if template not in templates: image_range = [image, image] if image else [] templates.update({template: [image_range]}) elif image == templates[template][-1][-1] + 1: templates[template][-1][-1] = image elif image == templates[template][-1][-1]: # We have a duplicate input file name. Do nothing. pass else: templates[template].append([image, image]) # Return tuple of template and image range for each unique image range templates = [(t, tuple(r)) for t, ranges in templates.items() for r in ranges] # type: Templates return self._quick_import_templates(templates) def _quick_import_templates(self, templates): # type: (Templates) -> bool """ Take image file templates and frame number ranges and try to run dials.import. dials.import is run with options to extrapolate header information from the first image file, thereby running more quickly than reading each image header individually. Args: templates: A list of tuples, each tuple containing a xia2-style filename template and the start and end image numbers of the associated sweep. Returns: Boolean flag indicating whether the quick import has succeeded. """ debug("Quick import template summary:\n\t%s", templates) if len(templates) > 1: debug("Cannot currently run quick import on multiple templates.") return False try: scan_range = templates[0][1] # type: Tuple[int, int] if not scan_range: raise IndexError except IndexError: debug( "Cannot run quick import: could not determine image naming template." ) return False info("Running quick import.") self.params.dials_import.input.template = [templates[0][0]] self.params.dials_import.geometry.scan.image_range = scan_range self.params.dials_import.geometry.scan.extrapolate_scan = True self._run_dials_import() return True def _import(self, files): # type: (List[str]) -> None """ Try to run a quick call of dials.import. Failing that, run a slow call. Try initially to construct file name templates contiguous groups of files. Failing that, pass a full list of the files to the importer (slower). Args: files: List of image filenames. """ info("\nImporting data...") if len(files) == 1: if os.path.isdir(files[0]): debug("You specified a directory. Importing all CBF files in " "that directory.") # TODO Support HDF5. files = [ os.path.join(files[0], f) for f in os.listdir(files[0]) if f.endswith(".cbf") or f.endswith(".cbf.gz") or f.endswith(".cbf.bz2") ] elif len(files[0].split(":")) == 3: debug("You specified an image range in the xia2 format. " "Importing all specified files.") template, start, end = files[0].split(":") template = screen19.make_template(template)[0] start, end = int(start), int(end) if not self._quick_import_templates([(template, (start, end))]): warning("Could not import specified image range.") sys.exit(1) info("Quick import successful.") return elif files[0].endswith(".expt"): debug("You specified an existing experiment list file. " "No import necessary.") try: self.expts = ExperimentList.from_file(files[0]) except (IOError, PickleError, ValueError): pass else: self.params.dials_import.output.experiments = files[0] if self.expts: return if not files: warning("No images found matching input.") sys.exit(1) # Can the files be quick-imported? if self._quick_import(files): info("Quick import successful.") return self.params.dials_import.input.experiments = files self._run_dials_import() def _run_dials_import(self): """ Perform a minimal version of dials.import to get an experiment list. Use some filleted bits of dials.import and dials.util.options.Importer. """ # Get some key data format arguments. try: format_kwargs = { "dynamic_shadowing": self.params.dials_import.format.dynamic_shadowing, "multi_panel": self.params.dials_import.format.multi_panel, } except AttributeError: format_kwargs = {} # If filenames contain wildcards, expand args = [] for arg in self.params.dials_import.input.experiments: if "*" in arg: args.extend(glob(arg)) else: args.append(arg) if args: # Are compare{beam,detector,goniometer} and scan_tolerance necessary? # They are cargo-culted from the DIALS option parser. tol_params = self.params.dials_import.input.tolerance compare_beam = BeamComparison( wavelength_tolerance=tol_params.beam.wavelength, direction_tolerance=tol_params.beam.direction, polarization_normal_tolerance=tol_params.beam. polarization_normal, polarization_fraction_tolerance=tol_params.beam. polarization_fraction, ) compare_detector = DetectorComparison( fast_axis_tolerance=tol_params.detector.fast_axis, slow_axis_tolerance=tol_params.detector.slow_axis, origin_tolerance=tol_params.detector.origin, ) compare_goniometer = GoniometerComparison( rotation_axis_tolerance=tol_params.goniometer.rotation_axis, fixed_rotation_tolerance=tol_params.goniometer.fixed_rotation, setting_rotation_tolerance=tol_params.goniometer. setting_rotation, ) scan_tolerance = tol_params.scan.oscillation # Import an experiment list from image data. try: experiments = ExperimentListFactory.from_filenames( args, compare_beam=compare_beam, compare_detector=compare_detector, compare_goniometer=compare_goniometer, scan_tolerance=scan_tolerance, format_kwargs=format_kwargs, ) except IOError as e: warning("%s '%s'", e.strerror, e.filename) sys.exit(1) # Record the imported experiments for use elsewhere. # Quit if there aren't any. self.expts.extend(experiments) if not self.expts: warning("No images found.") sys.exit(1) else: # Use the template importer. if len(self.params.dials_import.input.template) > 0: importer = ExperimentListTemplateImporter( self.params.dials_import.input.template, format_kwargs=format_kwargs) # Record the imported experiments for use elsewhere. # Quit if there aren't any. self.expts.extend(importer.experiments) if not self.expts: warning("No images found matching template %s" % self.params.dials_import.input.template[0]) sys.exit(1) # Setup the metadata updater metadata_updater = MetaDataUpdater(self.params.dials_import) # Extract the experiments and loop through self.expts = metadata_updater(self.expts.imagesets()) def _count_processors(self, nproc=None): # type: (Optional[int]) -> None """ Determine the number of processors and save it as an instance variable. The user may specify the number of processors to use. If no value is given, the number of available processors is returned. Args: nproc (optional): Number of processors. """ if nproc and nproc is not Auto: self.nproc = nproc return # if environmental variable NSLOTS is set to a number then use that try: self.nproc = int(os.environ.get("NSLOTS")) return except (ValueError, TypeError): pass self.nproc = number_of_processors(return_value_if_unknown=-1) if self.nproc <= 0: warning( "Could not determine number of available processors. Error code %d", self.nproc, ) sys.exit(1) def _count_images(self): # type: () -> int """ Attempt to determine the number of diffraction images. The number of diffraction images is determined from the imported_experiments JSON file. Returns: Number of images. """ # FIXME: This exception handling should be redundant. Empty experiment # lists should get caught at the import stage. Is this so? try: return self.expts[0].imageset.size() except IndexError: warning("Could not determine number of images in dataset.") sys.exit(1) def _check_intensities(self, mosaicity_correction=True): # type: (bool) -> None """ Run xia2.overload and plot a histogram of pixel intensities. If `mosaicity_correction` is true, the pixel intensities are approximately adjusted to take account of a systematic defect in the detector count rate correction. See https://github.com/xia2/screen19/wiki#mosaicity-correction Args: mosaicity_correction (optional): default is `True`. """ info("\nTesting pixel intensities...") command = ["xia2.overload", "nproc=%s" % self.nproc, "indexed.expt"] debug("running %s", command) result = procrunner.run(command, print_stdout=False, debug=procrunner_debug) debug("result = %s", screen19.prettyprint_dictionary(result)) info("Successfully completed (%.1f sec)", result["runtime"]) if result["exitcode"] != 0: warning("Failed with exit code %d", result["exitcode"]) sys.exit(1) with open("overload.json") as fh: overload_data = json.load(fh) info("Pixel intensity distribution:") count_sum = 0 hist = {} if "bins" in overload_data: for b in range(overload_data["bin_count"]): if overload_data["bins"][b] > 0: hist[b] = overload_data["bins"][b] count_sum += b * overload_data["bins"][b] else: hist = { int(k): v for k, v in overload_data["counts"].items() if int(k) > 0 } count_sum = sum([k * v for k, v in hist.items()]) average_to_peak = 1 if mosaicity_correction: # Adjust for the detector count rate correction if self._sigma_m: delta_z = self._oscillation / self._sigma_m / math.sqrt(2) average_to_peak = ( math.sqrt(math.pi) * delta_z * math.erf(delta_z) + math.exp(-(delta_z**2)) - 1) / delta_z**2 info("Average-to-peak intensity ratio: %f", average_to_peak) scale = 100 * overload_data["scale_factor"] / average_to_peak info("Determined scale factor for intensities as %f", scale) debug( "intensity histogram: { %s }", ", ".join(["%d:%d" % (k, hist[k]) for k in sorted(hist)]), ) max_count = max(hist.keys()) hist_max = max_count * scale hist_granularity, hist_format = 1, "%.0f" if hist_max < 50: hist_granularity, hist_format = 2, "%.1f" if hist_max < 15: hist_granularity, hist_format = 10, "%.1f" rescaled_hist = {} for x in hist.keys(): rescaled = round(x * scale * hist_granularity) if rescaled > 0: rescaled_hist[rescaled] = hist[x] + rescaled_hist.get( rescaled, 0) hist = rescaled_hist debug( "rescaled histogram: { %s }", ", ".join([(hist_format + ":%d") % (k / hist_granularity, hist[k]) for k in sorted(hist)]), ) screen19.plot_intensities(hist, 1 / hist_granularity, procrunner_debug=procrunner_debug) linear_response_limit = 100 * self.params.maximum_flux.trusted_range_correction marginal_limit = max(70, linear_response_limit) text = "".join(( "Strongest pixel (%d counts) " % max_count, "reaches %.1f%% " % hist_max, "of the detector count rate limit", )) if hist_max > 100: warning("Warning: %s!", text) else: info(text) if ("overload_limit" in overload_data and max_count >= overload_data["overload_limit"]): warning( "Warning: THE DATA CONTAIN REGULAR OVERLOADS!\n" " The photon incidence rate is outside the specified " "limits of the detector.\n" " The built-in detector count rate correction cannot " "adjust for this.\n" " You should aim for count rates below {:.0%} of the " "detector limit.".format( self.params.maximum_flux.trusted_range_correction)) elif hist_max > marginal_limit: warning( "Warning: The photon incidence rate is well outside the " "linear response region of the detector (<{:.0%}).\n" " The built-in detector count rate correction may not be " "able to adjust for this.".format( self.params.maximum_flux.trusted_range_correction)) elif hist_max > linear_response_limit: info("The photon incidence rate is outside the linear response " "region of the detector (<{:.0%}).\n" " The built-in detector count rate correction may be able " "to adjust for this.".format( self.params.maximum_flux.trusted_range_correction)) if not mosaicity_correction: warning( "Warning: Not enough data for proper profile estimation." " The spot intensities are not corrected for mosaicity.\n" " The true photon incidence rate will be higher than the " "given estimate.") info("Total sum of counts in dataset: %d", count_sum) def _find_spots(self, args=None): # type: (Optional[List[str]]) -> None """ Call `dials.find_spots` on the imported experiment list. Args: args (optional): List of any additional PHIL parameters to be used by dials.import. """ info("\nFinding spots...") dials_start = timeit.default_timer() # Use some choice fillets from dials.find_spots # Ignore `args`, use `self.params` # Loop through all the imagesets and find the strong spots self.refls = flex.reflection_table.from_observations( self.expts, self.params.dials_find_spots) # Add n_signal column - before deleting shoeboxes good = MaskCode.Foreground | MaskCode.Valid self.refls["n_signal"] = self.refls["shoebox"].count_mask_values(good) # Delete the shoeboxes if not self.params.dials_find_spots.output.shoeboxes: del self.refls["shoebox"] info( 60 * "-" + "\n%s\n" + 60 * "-" + "\nSuccessfully completed (%.1f sec)", spot_counts_per_image_plot(self.refls), timeit.default_timer() - dials_start, ) def _index(self): # type: () -> bool """ Call `dials.index` on the output of spot finding. Returns: Boolean value indicating whether indexing was successful. """ dials_start = timeit.default_timer() # Prepare max_cell constraint strategies. max_cell = self.params.dials_index.indexing.max_cell # By default, try unconstrained max_cell followed by max_cell=20. # If the user has already specified a max_cell < 20, do not relax to 20Å. cell_constraints = [([], max_cell)] if not max_cell or max_cell is Auto or max_cell > 20: cell_constraints += [(["max_cell constraint"], 20)] # Prepare indexing methods, preferring the real_space_grid_search if a # known unit cell has been specified, otherwise using 3D FFT, then 1D FFT. methods = ([ (["real space grid search"], "real_space_grid_search") ] if self.params.dials_index.indexing.known_symmetry.unit_cell else []) methods += [(["3D FFT"], "fft3d"), (["1D FFT"], "fft1d")] # Cycle through the indexing methods for each of the max_cell constraint # strategies until an indexing solution is found. for i, (max_cell_msg, max_cell) in enumerate(cell_constraints): # Set the max_cell constraint strategy. self.params.dials_index.indexing.max_cell = max_cell for j, (method_msg, method) in enumerate(methods): # Set the indexing method. self.params.dials_index.indexing.method = method # Log a handy message to the user. msg = ("Retrying with " + " and ".join(method_msg + max_cell_msg) if i + j else "Indexing") info("\n%s...", msg) try: # If indexing is successful, break out of the inner loop. self.expts, self.refls = index(self.expts, [self.refls], self.params.dials_index) break except (DialsIndexError, ValueError) as e: # If indexing is unsuccessful, try again with the next # strategy. warning("Failed: %s", str(e)) continue else: # When all the indexing methods are unsuccessful, move onto # the next max_cell constraint strategy and try again. continue # We should only get here if successfully indexed. Break out of the loop break else: # Indexing completely unsuccessful. return False sg_type = self.expts[0].crystal.get_crystal_symmetry().space_group( ).type() symb = sg_type.universal_hermann_mauguin_symbol() unit_cell = self.expts[0].crystal.get_unit_cell() self.refls.as_file(self.params.dials_index.output.reflections) self.expts.as_file(self.params.dials_index.output.experiments) self.refls.as_file(self.params.dials_index.output.reflections) info( "Found primitive solution: %s %s using %s reflections\n" "Indexed experiments and reflections saved as %s, %s\n" "Successfully completed (%.1f sec)", symb, unit_cell, self.refls["id"].count(0), self.params.dials_index.output.experiments, self.params.dials_index.output.reflections, timeit.default_timer() - dials_start, ) # Report the indexing successful. return True def _wilson_calculation(self): # type: () -> None """ Run `screen19.minimum_exposure` on an experiment list and reflection table. For best results, the reflections and experiment list should contain the results of integration or scaling. If only strong spots are used, the Wilson plot fit may be poor. """ dials_start = timeit.default_timer() info("\nEstimating lower exposure bound...") suggest_minimum_exposure(self.expts, self.refls, self.params.minimum_exposure) info("Successfully completed (%.1f sec)", timeit.default_timer() - dials_start) def _refine(self): # type: () -> None """ Run `dials.refine` on the results of indexing. """ dials_start = timeit.default_timer() info("\nRefining...") try: self.expts, self.refls, _, _ = run_dials_refine( self.expts, self.refls, self.params.dials_refine) except Sorry as e: warning("dials.refine failed: %d\nGiving up.\n", e) sys.exit(1) info("Successfully refined (%.1f sec)", timeit.default_timer() - dials_start) def _create_profile_model(self): # type: () -> bool """ Run `dials.create_profile_model` on indexed reflections. The indexed experiment list will be overwritten with a copy that includes the profile model but is otherwise identical. Returns: Boolean value indicating whether it was possible to determine a profile model from the data. """ info("\nCreating profile model...") command = [ "dials.create_profile_model", self.params.dials_index.output.experiments, self.params.dials_index.output.reflections, "output = %s" % self.params.dials_index.output.experiments, ] result = procrunner.run(command, print_stdout=False, debug=procrunner_debug) debug("result = %s", screen19.prettyprint_dictionary(result)) self._sigma_m = None if result["exitcode"] == 0: db = ExperimentList.from_file( self.params.dials_index.output.experiments)[0] self._oscillation = db.imageset.get_scan().get_oscillation()[1] self._sigma_m = db.profile.sigma_m() info( u"%d images, %s° oscillation, σ_m=%.3f°", db.imageset.get_scan().get_num_images(), str(self._oscillation), self._sigma_m, ) info("Successfully completed (%.1f sec)", result["runtime"]) return True warning("Failed with exit code %d", result["exitcode"]) return False def _integrate(self): # type: () -> None """Run `dials.integrate` to integrate reflection intensities.""" dials_start = timeit.default_timer() info("\nIntegrating...") # Don't waste time recreating the profile model self.params.dials_integrate.create_profile_model = False # Get the dials.integrate PHIL scope, populated with parsed input parameters integrate_scope = phil_scope.get("dials_integrate").objects[0] integrate_scope.name = "" integrate_scope = integrate_scope.format(self.params.dials_integrate) try: integrated_experiments, integrated_reflections = _run_integration( integrate_scope, self.params.dials_index.output.experiments, self.params.dials_index.output.reflections, ) # Save the output to files integrated_reflections.as_file( self.params.dials_integrate.output.reflections) integrated_experiments.as_file( self.params.dials_integrate.output.experiments) # ... and also store the output internally self.expts, self.refls = integrated_experiments, integrated_reflections info( "Successfully completed (%.1f sec)", timeit.default_timer() - dials_start, ) except SystemExit as e: if e.code: warning("dials.integrate failed with exit code %d\nGiving up.", e.code) sys.exit(1) # This is a hacky check but should work for as long as DIALS 2.0 is supported. if version.dials_version() < "DIALS 2.1": def _refine_bravais(self, experiments, reflections): # type: (ExperimentList, flex.reflection_table) -> None """ Run `dials.refine_bravais_settings` on an experiments and reflections. Args: experiments: An experiment list.. reflections: The corresponding reflection table. """ info("\nRefining Bravais settings...") command = [ "dials.refine_bravais_settings", experiments, reflections ] result = procrunner.run(command, print_stdout=False, debug=procrunner_debug) debug("result = %s", screen19.prettyprint_dictionary(result)) if result["exitcode"] == 0: m = re.search( r"[-+]{3,}\n[^\n]*\n[-+|]{3,}\n(.*\n)*[-+]{3,}", result["stdout"].decode("utf-8"), ) if m: info(m.group(0)) else: info( "Could not interpret dials.refine_bravais_settings output, " "please check dials.refine_bravais_settings.log") info("Successfully completed (%.1f sec)", result["runtime"]) else: warning("Failed with exit code %d", result["exitcode"]) sys.exit(1) else: def _refine_bravais(self): # type: () -> None """Run `dials.refine_bravais_settings` to determine the space group.""" dials_start = timeit.default_timer() info("\nRefining Bravais settings...") self.refls = eliminate_sys_absent(self.expts, self.refls) map_to_primitive(self.expts, self.refls) try: refined_settings = refined_settings_from_refined_triclinic( self.expts, self.refls, self.params.dials_refine_bravais) except RuntimeError as e: warning("dials.refine_bravais_settings failed.\nGiving up.") sys.exit(e) possible_bravais_settings = { solution["bravais"] for solution in refined_settings } bravais_lattice_to_space_group_table(possible_bravais_settings) try: # Old version of dials with as_str() method logger.info(refined_settings.as_str()) except AttributeError: # Newer versions of dials (>= 2.2.2) has proper __str__ method logger.info(refined_settings) info( "Successfully completed (%.1f sec)", timeit.default_timer() - dials_start, ) def _report(self, experiments, reflections): # type: (ExperimentList, flex.reflection_table) -> None """ Run `dials.report` on an experiment list and reflection table. Args: experiments: An experiment list. reflections: The corresponding reflection table. """ info("\nCreating report...") command = ["dials.report", experiments, reflections] result = procrunner.run(command, print_stdout=False, debug=procrunner_debug) debug("result = %s", screen19.prettyprint_dictionary(result)) if result["exitcode"] == 0: info("Successfully completed (%.1f sec)", result["runtime"]) # if sys.stdout.isatty(): # info("Trying to start browser") # try: # import subprocess # d = dict(os.environ) # d["LD_LIBRARY_PATH"] = "" # subprocess.Popen(["xdg-open", "dials-report.html"], env=d) # except Exception as e: # debug("Could not open browser\n%s", str(e)) else: warning("Failed with exit code %d", result["exitcode"]) sys.exit(1) def run(self, args=None, phil=phil_scope, set_up_logging=False): # type: (Optional[List[str]], scope, bool) -> None """ TODO: Docstring. Args: args: phil: set_up_logging: Returns: """ usage = "%prog [options] image_directory | image_files.cbf | imported.expt" parser = OptionParser(usage=usage, epilog=__doc__, phil=phil, check_format=False) self.params, options, unhandled = parser.parse_args( args=args, show_diff_phil=True, return_unhandled=True, quick_parse=True) version_information = "screen19 v%s using %s (%s)" % ( screen19.__version__, dials.util.version.dials_version(), time.strftime("%Y-%m-%d %H:%M:%S"), ) start = timeit.default_timer() if len(unhandled) == 0: print(__doc__) print(version_information) return if set_up_logging: # Configure the logging log.config(verbosity=self.params.verbosity, logfile=self.params.output.log) # Unless verbose output has been requested, suppress generation of # debug and info log records from any child DIALS command, retaining # those from screen19 itself. if not self.params.verbosity: logging.getLogger("dials").setLevel(logging.WARNING) logging.getLogger("dials.screen19").setLevel(logging.INFO) info(version_information) debug("Run with:\n%s\n%s", " ".join(unhandled), parser.diff_phil.as_str()) self._count_processors(nproc=self.params.nproc) debug("Using %s processors", self.nproc) # Set multiprocessing settings for spot-finding, indexing and # integration to match the top-level specified number of processors self.params.dials_find_spots.spotfinder.mp.nproc = self.nproc self.params.dials_index.indexing.nproc = self.nproc # Setting self.params.dials_refine.refinement.mp.nproc is not helpful self.params.dials_integrate.integration.mp.nproc = self.nproc # Set the input and output parameters for the DIALS components # TODO: Compare to diff_phil and start from later in the pipeline if # appropriate self._import(unhandled) imported_name = self.params.dials_import.output.experiments self._find_spots() if not self._index(): info("\nRetrying for stronger spots only...") strong_refls = self.refls self.params.dials_find_spots.spotfinder.threshold.dispersion.sigma_strong = ( 15) self._find_spots() if not self._index(): warning("Giving up.") self.expts.as_file(imported_name) strong_refls.as_file("strong.refl") self.refls.as_file("stronger.refl") info( "Could not find an indexing solution. You may want to " "have a look at the reciprocal space by running:\n\n" " dials.reciprocal_lattice_viewer %s %s\n\n" "or, to only include stronger spots:\n\n" " dials.reciprocal_lattice_viewer %s %s\n", imported_name, "strong.refl", imported_name, "stronger.refl", ) sys.exit(1) if not self._create_profile_model(): info( "\nRefining model to attempt to increase number of valid spots..." ) self._refine() if not self._create_profile_model(): warning("Giving up.") info( "The identified indexing solution may not be correct. " "You may want to have a look at the reciprocal space by " "running:\n\n" " dials.reciprocal_lattice_viewer indexed.expt indexed.refl\n" ) sys.exit(1) self._check_intensities() if self.params.minimum_exposure.data == "integrated": self._integrate() self._wilson_calculation() experiments = self.params.dials_integrate.output.experiments reflections = self.params.dials_integrate.output.reflections else: self._wilson_calculation() experiments = self.params.dials_create_profile.output reflections = self.params.dials_index.output.reflections # This is a hacky check but should work for as long as DIALS 2.0 is supported. if version.dials_version() < "DIALS 2.1": self._refine_bravais(experiments, reflections) else: self._refine_bravais() self._report(experiments, reflections) runtime = timeit.default_timer() - start debug( "Finished at %s, total runtime: %.1f", time.strftime("%Y-%m-%d %H:%M:%S"), runtime, ) info("screen19 successfully completed (%.1f sec).", runtime)
def run(self): ''' Parse the options. ''' from dials.util.options import flatten_experiments, flatten_reflections from dxtbx.model import ExperimentList from scitbx.math import five_number_summary # Parse the command line arguments params, options = self.parser.parse_args(show_diff_phil=True) self.params = params experiments = flatten_experiments(params.input.experiments) reflections = flatten_reflections(params.input.reflections) assert len(reflections) == 1 reflections = reflections[0] print("Found", len(reflections), "reflections", "and", len(experiments), "experiments") filtered_reflections = flex.reflection_table() filtered_experiments = ExperimentList() skipped_reflections = flex.reflection_table() skipped_experiments = ExperimentList() if params.detector is not None: culled_reflections = flex.reflection_table() culled_experiments = ExperimentList() detector = experiments.detectors()[params.detector] for expt_id, experiment in enumerate(experiments): refls = reflections.select(reflections['id'] == expt_id) if experiment.detector is detector: culled_experiments.append(experiment) refls['id'] = flex.int(len(refls), len(culled_experiments) - 1) culled_reflections.extend(refls) else: skipped_experiments.append(experiment) refls['id'] = flex.int(len(refls), len(skipped_experiments) - 1) skipped_reflections.extend(refls) print( "RMSD filtering %d experiments using detector %d, out of %d" % (len(culled_experiments), params.detector, len(experiments))) reflections = culled_reflections experiments = culled_experiments difference_vector_norms = (reflections['xyzcal.mm'] - reflections['xyzobs.mm.value']).norms() if params.max_delta is not None: sel = difference_vector_norms <= params.max_delta reflections = reflections.select(sel) difference_vector_norms = difference_vector_norms.select(sel) data = flex.double() counts = flex.double() for i in range(len(experiments)): dvns = difference_vector_norms.select(reflections['id'] == i) counts.append(len(dvns)) if len(dvns) == 0: data.append(0) continue rmsd = math.sqrt(flex.sum_sq(dvns) / len(dvns)) data.append(rmsd) data *= 1000 subset = data.select(counts > 0) print(len(subset), "experiments with > 0 reflections") if params.show_plots: h = flex.histogram(subset, n_slots=40) fig = plt.figure() ax = fig.add_subplot('111') ax.plot(h.slot_centers().as_numpy_array(), h.slots().as_numpy_array(), '-') plt.title("Histogram of %d image RMSDs" % len(subset)) fig = plt.figure() plt.boxplot(subset, vert=False) plt.title("Boxplot of %d image RMSDs" % len(subset)) plt.show() outliers = counts == 0 min_x, q1_x, med_x, q3_x, max_x = five_number_summary(subset) print( "Five number summary of RMSDs (microns): min %.1f, q1 %.1f, med %.1f, q3 %.1f, max %.1f" % (min_x, q1_x, med_x, q3_x, max_x)) iqr_x = q3_x - q1_x cut_x = params.iqr_multiplier * iqr_x outliers.set_selected(data > q3_x + cut_x, True) #outliers.set_selected(col < q1_x - cut_x, True) # Don't throw away the images that are outliers in the 'good' direction! for i in range(len(experiments)): if outliers[i]: continue refls = reflections.select(reflections['id'] == i) refls['id'] = flex.int(len(refls), len(filtered_experiments)) filtered_reflections.extend(refls) filtered_experiments.append(experiments[i]) zeroes = counts == 0 n_zero = len(counts.select(zeroes)) print( "Removed %d bad experiments and %d experiments with zero reflections, out of %d (%%%.1f)" % (len(experiments) - len(filtered_experiments) - n_zero, n_zero, len(experiments), 100 * ((len(experiments) - len(filtered_experiments)) / len(experiments)))) if params.detector is not None: crystals = filtered_experiments.crystals() for expt_id, experiment in enumerate(skipped_experiments): if experiment.crystal in crystals: filtered_experiments.append(experiment) refls = skipped_reflections.select( skipped_reflections['id'] == expt_id) refls['id'] = flex.int(len(refls), len(filtered_experiments) - 1) filtered_reflections.extend(refls) if params.delta_psi_filter is not None: delta_psi = filtered_reflections['delpsical.rad'] * 180 / math.pi sel = (delta_psi <= params.delta_psi_filter) & ( delta_psi >= -params.delta_psi_filter) l = len(filtered_reflections) filtered_reflections = filtered_reflections.select(sel) print("Filtering by delta psi, removing %d out of %d reflections" % (l - len(filtered_reflections), l)) print("Final experiment count", len(filtered_experiments)) filtered_experiments.as_file(params.output.filtered_experiments) filtered_reflections.as_pickle(params.output.filtered_reflections)
# New beam per reflection expt = expts[refl['ID'][i]] temp = expt.beam.get_s0() new_expt = expt new_expt.beam = deepcopy(expt.beam) new_expt.beam.set_wavelength(refl['Wavelength'][i]) s0 = (expt.beam.get_s0() / np.linalg.norm(expt.beam.get_s0())) / new_expt.beam.get_wavelength() new_expt.beam.set_s0(s0) new_expt.identifier = str(i) new_expts.append(new_expt) # Write new beam identifiers to reflections dials_df.at[i, 'new_ID'] = i print('finished loop') # Replace reflection IDs with new IDs idx = flex.int(dials_df['new_ID']) refl_output["id"] = idx print('assigned IDs') print('writing experiments') # Write experiment file with multiple beams new_expts.as_file(new_expt_filename) print('experiments written') print('writing refls') # Write refl file refl_output.as_file(new_refl_filename) print('refls written')
def convert_crystfel_to_dxtbx(geom_filename, output_filename, detdist_override=None): """ :param geom_filename: a crystfel geometry file https://www.desy.de/~twhite/crystfel/manual-crystfel_geometry.html :param output_filename: filename for a dxtbx experiment containing a single detector model (this is a json file) :param detdist_override: alter the detector distance stored in the crystfel geometry to this value (in millimeters) """ geom = load_crystfel_geometry(geom_filename) dxtbx_det = Detector() for panel_name in geom['panels'].keys(): P = geom['panels'][panel_name] FAST = P['fsx'], P['fsy'], P['fsz'] SLOW = P['ssx'], P['ssy'], P['ssz'] # dxtbx uses millimeters pixsize = 1 / P['res'] # meters pixsize_mm = pixsize * 1000 detdist = P['coffset'] + P['clen'] # meters detdist_mm = detdist * 1000 if detdist_override is not None: detdist_mm = detdist_override # dxtbx and crystfel both identify the outer corner of the first pixel in memory as the origin of the panel origin = P['cnx'] * pixsize_mm, P[ 'cny'] * pixsize_mm, -detdist_mm # dxtbx assumes crystal as at point 0,0,0 num_fast_pix = P["max_fs"] - P['min_fs'] + 1 num_slow_pix = P["max_ss"] - P['min_ss'] + 1 panel_description = { 'fast_axis': FAST, 'gain': 1.0, # I dont think nanoBragg cares about this parameter 'identifier': '', 'image_size': (num_fast_pix, num_slow_pix), 'mask': [], 'material': 'Si', 'mu': 0, # NOTE for a thick detector set this to appropriate value 'name': panel_name, 'origin': origin, 'pedestal': 0.0, # I dont think nanoBragg cares about this parameter 'pixel_size': (pixsize_mm, pixsize_mm), 'px_mm_strategy': { 'type': 'SimplePxMmStrategy' }, 'raw_image_offset': (0, 0), # not sure what this is 'slow_axis': SLOW, 'thickness': 0, # note for a thick detector set this to appropriate value 'trusted_range': (-1.0, 1e6), # set as you wish 'type': 'SENSOR_PAD' } dxtbx_node = Panel.from_dict(panel_description) dxtbx_det.add_panel(dxtbx_node) E = Experiment() E.detector = dxtbx_det El = ExperimentList() El.append(E) El.as_file(output_filename) # this can be loaded into nanoBragg
def run(args): if '-h' in args or '--help' in args or '-c' in args: print(help_str) phil_scope.show(attributes_level=2) return user_phil = [] for arg in args: if os.path.isfile(arg): user_phil.append(parse("geom_path=%s" % arg)) else: try: user_phil.append(parse(arg)) except Exception as e: raise Sorry("Unrecognized argument: %s" % arg) params = phil_scope.fetch(sources=user_phil).extract() if params.distance is None: raise Usage("Please specify detector distance") geom = {} for line in open(params.geom_path): if len(line.split("=")) != 2: continue if "rigid_group" in line and not "collection" in line: geom[line.split("=")[1].strip()] = {} else: for key in geom: if line.startswith("%s/" % key): geom[key][line.split("=")[0].split("/") [1].strip()] = line.split("=")[-1].strip() detector = Detector() root = detector.hierarchy() root.set_frame((1, 0, 0), (0, 1, 0), (0, 0, -params.distance)) for i, key in enumerate(sorted(geom)): fs_x, fs_y = geom[key]['fs'].split(" ") ss_x, ss_y = geom[key]['ss'].split(" ") fast = matrix.col( (-float(fs_x.rstrip('x')), float(fs_y.rstrip('y')), 0.0)) slow = matrix.col( (-float(ss_x.rstrip('x')), float(ss_y.rstrip('y')), 0.0)) origin = matrix.col( (-float(geom[key]['corner_x']) * params.pixel_size, float(geom[key]['corner_y']) * params.pixel_size, 0.0)) # OBS! you need to set the panel to a root before set local frame... p = root.add_panel() p.set_name('panel-%s' % key) p.set_image_size((512, 1024)) p.set_trusted_range((-1, 1000000)) p.set_pixel_size((params.pixel_size, params.pixel_size)) p.set_local_frame(fast.elems, slow.elems, origin.elems) from dxtbx.model import BeamFactory wavelength = params.wavelength beam = BeamFactory.simple(wavelength) from dxtbx.model import Experiment, ExperimentList experiments = ExperimentList() experiment = Experiment(detector=detector, beam=beam) experiments.append(experiment) experiments.as_file("geometry.expt")
def write_output_files(Xopt, LMP, Modelers, SIM, params): """ Writes refl and exper files for each experiment modeled during the ensemble refiner :param Xopt: float array of optimized rescaled parameter values :param LMP: simtbx.diffBragg.refiners.parameters.Parameters() object :param Modelers: data modelers (launcher.Modleers :param SIM: instance of sim_data (launcher.SIM) :param params: phil params, simtbx.diffBragg.phil.py """ opt_det = geometry_refiner.get_optimized_detector(Xopt, LMP, SIM) # Store the hessian of negative log likelihood for error estimation # must determine total number of refined Fhkls and then create a vector of 0's of that length num_fhkl_param = 0 for name in LMP: if "fcell" in name: num_fhkl_param += 1 diag_hess = np.zeros(num_fhkl_param) if params.geometry.pandas_dir is not None and COMM.rank == 0: if not os.path.exists(params.geometry.pandas_dir): os.makedirs(params.geometry.pandas_dir) refdir = os.path.join(params.geometry.pandas_dir, "refls") expdir = os.path.join(params.geometry.pandas_dir, "expts") for dname in [refdir, expdir]: if not os.path.exists(dname): os.makedirs(dname) all_shot_pred_offsets = [] for i_shot in Modelers: Modeler = Modelers[i_shot] # these are in simtbx.diffBragg.refiners.parameters.RangedParameter objects rotX = LMP["rank%d_shot%d_RotXYZ%d" % (COMM.rank, i_shot, 0)] rotY = LMP["rank%d_shot%d_RotXYZ%d" % (COMM.rank, i_shot, 1)] rotZ = LMP["rank%d_shot%d_RotXYZ%d" % (COMM.rank, i_shot, 2)] num_uc_p = len(Modeler.ucell_man.variables) ucell_pars = [ LMP["rank%d_shot%d_Ucell%d" % (COMM.rank, i_shot, i_uc)] for i_uc in range(num_uc_p) ] # convert rotation angles back to radians (thats what the parameters.RangedParamter.get_val method does) rotXYZ = rotX.get_val(Xopt[rotX.xpos]), \ rotY.get_val(Xopt[rotY.xpos]), \ rotZ.get_val(Xopt[rotZ.xpos]) # ucell_man is an instance of # simtbx.diffBragg.refiners.crystal_systems.manager.Manager() # (for the correct xtal system) Modeler.ucell_man.variables = [ p.get_val(Xopt[p.xpos]) for p in ucell_pars ] ucpar = Modeler.ucell_man.unit_cell_parameters new_crystal = hopper_utils.new_cryst_from_rotXYZ_and_ucell( rotXYZ, ucpar, Modeler.E.crystal) new_exp = deepcopy(Modeler.E) new_exp.crystal = new_crystal wave, wt = map(np.array, zip(*Modeler.spectra)) ave_wave = (wave * wt).sum() / wt.sum() new_exp.beam.set_wavelength(ave_wave) new_exp.detector = opt_det Modeler.best_model = model(Xopt, LMP, i_shot, Modeler, SIM, return_model=True) Modeler.best_model_includes_background = True # Get the bragg-only component of model in order to compute hessian terms bragg = Modeler.best_model - Modeler.all_background # store the updated per-roi scale factors in the new refl table roi_scale_factor = flex.double(len(Modeler.refls), 1) for ii, fcell_idx in enumerate(Modeler.fcell_idx_unique): p = LMP["scale_fcell%d" % fcell_idx] scale_fac = p.get_val(Xopt[p.xpos]) slices = Modeler.fcell_idx_slices[fcell_idx] for slc in slices: # update the refl table column roi_refl_ids = Modeler.all_refls_idx[slc] unique_refl_ids = np.unique(roi_refl_ids) for refl_idx in unique_refl_ids: roi_scale_factor[refl_idx] = scale_fac # update the hessian of the log likelihood # first derivative is the Bragg component of the model divided by the scale factor # TODO what if scale_fac is close to 0 ? first_deriv = bragg[slc] / scale_fac u = Modeler.all_data[slc] - Modeler.best_model[slc] v = Modeler.best_model[slc] + Modeler.nominal_sigma_rdout**2 one_by_v = 1 / v G = 1 - 2 * u - u * u * one_by_v hessian_coef = one_by_v * (one_by_v * G - 2 - 2 * u * one_by_v - u * u * one_by_v * one_by_v) trusted_slc = Modeler.all_trusted[slc] diag_hess[fcell_idx] += -0.5 * ( hessian_coef * (first_deriv**2))[trusted_slc].sum() Modeler.refls["global_scale_factor"] = roi_scale_factor # get the new refls new_refl = hopper_utils.get_new_xycalcs(Modeler, new_exp, old_refl_tag="before_geom_ref") new_refl_fname, refl_ext = os.path.splitext(Modeler.refl_name) new_refl_fname = "rank%d_%s_%s%s" % ( COMM.rank, os.path.basename(new_refl_fname), params.geometry.optimized_results_tag, refl_ext) if not new_refl_fname.endswith(".refl"): new_refl_fname += ".refl" new_refl_fname = os.path.join(params.geometry.pandas_dir, "refls", new_refl_fname) new_refl.as_file(new_refl_fname) shot_pred_offsets = geometry_refiner.get_dist_from_R(new_refl) all_shot_pred_offsets += list(shot_pred_offsets) new_expt_fname, expt_ext = os.path.splitext(Modeler.exper_name) new_expt_fname = "rank%d_%s_%s%s" % ( COMM.rank, os.path.basename(new_expt_fname), params.geometry.optimized_results_tag, expt_ext) if not new_expt_fname.endswith(".expt"): new_expt_fname += ".expt" new_expt_fname = os.path.join(params.geometry.pandas_dir, "expts", new_expt_fname) new_exp_lst = ExperimentList() new_exp_lst.append(new_exp) new_exp_lst.as_file(new_expt_fname) if params.geometry.pandas_dir is not None: a, b, c, al, be, ga = ucpar ncells_p = [ LMP["rank%d_shot%d_Nabc%d" % (COMM.rank, i_shot, i)] for i in range(3) ] Na, Nb, Nc = [p.get_val(Xopt[p.xpos]) for p in ncells_p] scale_p = LMP["rank%d_shot%d_Scale" % (COMM.rank, i_shot)] scale = scale_p.get_val(Xopt[scale_p.xpos]) _, fluxes = zip(*SIM.beam.spectrum) eta_a = eta_b = eta_c = np.nan df = single_expt_pandas( xtal_scale=scale, Amat=new_crystal.get_A(), ncells_abc=(Na, Nb, Nc), ncells_def=(0, 0, 0), eta_abc=(eta_a, eta_b, eta_c), diff_gamma=(np.nan, np.nan, np.nan), diff_sigma=(np.nan, np.nan, np.nan), detz_shift=0, use_diffuse=params.use_diffuse_models, gamma_miller_units=params.gamma_miller_units, eta=np.nan, rotXYZ=tuple(rotXYZ), ucell_p=(a, b, c, al, be, ga), ucell_p_init=(np.nan, np.nan, np.nan, np.nan, np.nan, np.nan), lam0_lam1=(np.nan, np.nan), spec_file=Modeler.spec_name, spec_stride=params.simulator.spectrum.stride, flux=sum(fluxes), beamsize_mm=SIM.beam.size_mm, orig_exp_name=Modeler.exper_name, opt_exp_name=os.path.abspath(new_expt_fname), spec_from_imageset=params.spectrum_from_imageset, oversample=SIM.D.oversample, opt_det=params.opt_det, stg1_refls=Modeler.refl_name, stg1_img_path=None) pandas_name = os.path.splitext( os.path.basename(new_expt_fname))[0] + ".pkl" pandas_name = os.path.join(params.geometry.pandas_dir, pandas_name) df.to_pickle(pandas_name) modeler_name = pandas_name.replace(".pkl", ".npy") np.save(modeler_name, Modeler) all_shot_pred_offsets = COMM.reduce(all_shot_pred_offsets) if COMM.rank == 0: median_pred_offset = np.median(all_shot_pred_offsets) else: median_pred_offset = None median_pred_offset = COMM.bcast(median_pred_offset) # reduce the hessian over all shots then compute the errors of the structure factors diag_hess = COMM.reduce(diag_hess) uc_p = np.zeros(6) nshot = 0 for i_shot in Modelers: Mod = Modelers[i_shot] num_uc_p = len(Mod.ucell_man.variables) ucell_pars = [ LMP["rank%d_shot%d_Ucell%d" % (COMM.rank, i_shot, i_uc)] for i_uc in range(num_uc_p) ] Mod.ucell_man.variables = [p.get_val(Xopt[p.xpos]) for p in ucell_pars] uc_p += np.array(Mod.ucell_man.unit_cell_parameters) nshot += 1 nshot = COMM.reduce(nshot) uc_p = COMM.reduce(uc_p) if COMM.rank == 0: ave_uc_p = uc_p / nshot fhkl_file = os.path.join(params.geometry.pandas_dir, "final_merge.mtz") F = SIM.crystal.miller_array Fmap = {h: amp for h, amp in zip(F.indices(), F.data())} with np.errstate(divide='ignore', invalid='ignore'): scale_variance = 1 / diag_hess indices = flex.miller_index() data = flex.double() sigmas = flex.double() for fcell_idx in range(num_fhkl_param): pname = "scale_fcell%d" % fcell_idx p = LMP[pname] scale = p.get_val(Xopt[p.xpos]) hkl = SIM.asu_from_idx[fcell_idx] F_no_scale = Fmap[hkl] Ihkl = scale * F_no_scale**2 Fhkl = np.sqrt(Ihkl) var_scale = scale_variance[fcell_idx] if var_scale <= 0: continue sig_F = 0.5 * F_no_scale / np.sqrt(scale) * np.sqrt(var_scale) if np.isinf(sig_F): continue indices.append(hkl) data.append(Fhkl) sigmas.append(sig_F) # store an optimized mtz, and a numpy array with the same information sym = crystal.symmetry(tuple(ave_uc_p), SIM.crystal.symbol) mset = miller.set(sym, indices, True) ma = miller.array(mset, data, sigmas) ma = ma.set_observation_type_xray_amplitude().as_anomalous_array() ma.as_mtz_dataset(column_root_label="F").mtz_object().write(fhkl_file) return median_pred_offset
if has_master: master_index = iset.indices()[0] d = d.query("master_indices==%d" % master_index) if len(d) != 1: continue A = d.Amats.values[0] #break C = deepcopy(crystals[i]) #C.set_A(A) Ex = Experiment() Ex.crystal = C Ex.imageset = iset Ex.beam = beams[i] Ex.detector = D El2.append(Ex) Rsel = R.select(R['id']==i) nref = len(Rsel) Rsel['id'] = flex.int(nref, new_id) R2.extend(Rsel) new_id += 1 print (new_id) el_file = "%s.expt" % args.tag R_file = "%s.refl" % args.tag El2.as_file(el_file) print("Saved experiment %s" % el_file ) R2.as_file(R_file) print("Saved refls %s" % R_file)