def distribute_reflections_over_hkl_chunks(self, reflections): '''Distribute reflections, according to their HKLs, over pre-set HKL chunks''' total_reflection_count = reflections.size() total_distributed_reflection_count = 0 if total_reflection_count > 0: # set up two lists to be passed to the C++ extension: HKLs and chunk ids. It's basically a hash table to look up chunk ids by HKLs hkl_list = flex.miller_index() chunk_id_list = flex.int() for i in range(len(self.hkl_split_set)): for j in range(len(self.hkl_split_set[i])): hkl = (int(self.hkl_split_set[i][j][0]), int(self.hkl_split_set[i][j][1]), int(self.hkl_split_set[i][j][2])) hkl_list.append(hkl) chunk_id_list.append(i) # distribute reflections over hkl chunks, using a C++ extension from xfel.merging import get_hkl_chunks_cpp get_hkl_chunks_cpp(reflections, hkl_list, chunk_id_list, self.hkl_chunks) for chunk in self.hkl_chunks: total_distributed_reflection_count += len(chunk) self.logger.log( "Distributed %d out of %d reflections" % (total_distributed_reflection_count, total_reflection_count)) self.logger.log("Memory usage: %d MB" % get_memory_usage()) reflections.clear()
def prune_reflection_table_keys(self, reflections): from xfel.merging.application.reflection_table_utils import reflection_table_utils reflections = reflection_table_utils.prune_reflection_table_keys(reflections=reflections, keys_to_keep=['intensity.sum.value', 'intensity.sum.variance', 'miller_index', 'miller_index_asymmetric', \ 'exp_id', 's1', 'intensity.sum.value.unmodified', 'intensity.sum.variance.unmodified']) self.logger.log("Pruned reflection table") self.logger.log("Memory usage: %d MB"%get_memory_usage()) return reflections
def get_reflections_from_alltoall_sliced(self, number_of_slices): '''Split each hkl chunk into N slices. This is needed to address the MPI alltoall memory problem''' result_reflections = self.distribute_reflection_table( ) # the total reflection table, which this rank will receive after all slices of alltoall list_of_sliced_hkl_chunks = [ ] # if self.hkl_chunks is [A,B,C...], this list will be [[A1,A2,...,An], [B1,B2,...,Bn], [C1,C2,...,Cn], ...], where n is the number of chunk slices for i in range(len(self.hkl_chunks)): hkl_chunk_slices = [] for chunk_slice in reflection_table_utils.get_next_reflection_table_slice( self.hkl_chunks[i], number_of_slices, self.distribute_reflection_table): hkl_chunk_slices.append(chunk_slice) list_of_sliced_hkl_chunks.append(hkl_chunk_slices) self.logger.log("Ready for all-to-all...") self.logger.log("Memory usage: %d MB" % get_memory_usage()) for j in range(number_of_slices): hkl_chunks_for_alltoall = list() for i in range(len(self.hkl_chunks)): hkl_chunks_for_alltoall.append( list_of_sliced_hkl_chunks[i][j]) # [Aj,Bj,Cj...] self.logger.log_step_time("ALL-TO-ALL") self.logger.log("Executing MPI all-to-all...") self.logger.log("Memory usage: %d MB" % get_memory_usage()) received_hkl_chunks = comm.alltoall(hkl_chunks_for_alltoall) self.logger.log("After all-to-all received %d hkl chunks" % len(received_hkl_chunks)) self.logger.log_step_time("ALL-TO-ALL", True) self.logger.log_step_time("CONSOLIDATE") self.logger.log("Consolidating reflection tables...") for chunk in received_hkl_chunks: result_reflections.extend(chunk) self.logger.log_step_time("CONSOLIDATE", True) return result_reflections
def prune_reflection_table_keys(self, reflections): from xfel.merging.application.reflection_table_utils import reflection_table_utils reflections = reflection_table_utils.prune_reflection_table_keys(reflections=reflections, keys_to_keep=['intensity.sum.value', 'intensity.sum.variance', 'miller_index', 'miller_index_asymmetric', \ 'exp_id', 's1', 'intensity.sum.value.unmodified', 'intensity.sum.variance.unmodified', 'kapton_absorption_correction', 'flags'], keys_to_ignore=self.params.input.persistent_refl_cols) self.logger.log("Pruned reflection table") self.logger.log("Memory usage: %d MB" % get_memory_usage()) return reflections
def _mem_usage(self): memMB = get_memory_usage() import socket host = socket.gethostname() print("Rank 0 reporting memory usage: %f GB on Rank 0 node %s" % (memMB / 1e3, host))
def run(self, all_experiments, all_reflections): """ Load all the data using MPI """ from dxtbx.model.experiment_list import ExperimentList from dials.array_family import flex # Both must be none or not none test = [all_experiments is None, all_reflections is None].count(True) assert test in [0, 2] if test == 2: all_experiments = ExperimentList() all_reflections = flex.reflection_table() starting_expts_count = starting_refls_count = 0 else: starting_expts_count = len(all_experiments) starting_refls_count = len(all_reflections) self.logger.log( "Initial number of experiments: %d; Initial number of reflections: %d" % (starting_expts_count, starting_refls_count)) # Generate and send a list of file paths to each worker if self.mpi_helper.rank == 0: file_list = self.get_list() self.logger.log( "Built an input list of %d json/pickle file pairs" % (len(file_list))) self.params.input.path = None # Rank 0 has already parsed the input parameters per_rank_file_list = file_load_calculator(self.params, file_list, self.logger).\ calculate_file_load(available_rank_count = self.mpi_helper.size) self.logger.log( 'Transmitting a list of %d lists of json/pickle file pairs' % (len(per_rank_file_list))) transmitted = per_rank_file_list else: transmitted = None self.logger.log_step_time("BROADCAST_FILE_LIST") transmitted = self.mpi_helper.comm.bcast(transmitted, root=0) new_file_list = transmitted[ self.mpi_helper. rank] if self.mpi_helper.rank < len(transmitted) else None self.logger.log_step_time("BROADCAST_FILE_LIST", True) # Load the data self.logger.log_step_time("LOAD") if new_file_list is not None: self.logger.log("Received a list of %d json/pickle file pairs" % len(new_file_list)) for experiments_filename, reflections_filename in new_file_list: self.logger.log("Reading %s %s" % (experiments_filename, reflections_filename)) experiments = ExperimentListFactory.from_json_file( experiments_filename, check_format=False) reflections = flex.reflection_table.from_file( reflections_filename) self.logger.log("Data read, prepping") if 'intensity.sum.value' in reflections: reflections[ 'intensity.sum.value.unmodified'] = reflections[ 'intensity.sum.value'] * 1 if 'intensity.sum.variance' in reflections: reflections[ 'intensity.sum.variance.unmodified'] = reflections[ 'intensity.sum.variance'] * 1 new_ids = flex.int(len(reflections), -1) new_identifiers = flex.std_string(len(reflections)) eid = reflections.experiment_identifiers() for k in eid.keys(): del eid[k] for experiment_id, experiment in enumerate(experiments): # select reflections of the current experiment refls_sel = reflections['id'] == experiment_id if refls_sel.count(True) == 0: continue if experiment.identifier is None or len( experiment.identifier) == 0: experiment.identifier = create_experiment_identifier( experiment, experiments_filename, experiment_id) if not self.params.input.keep_imagesets: experiment.imageset = None all_experiments.append(experiment) # Reflection experiment 'id' is unique within this rank; 'exp_id' (i.e. experiment identifier) is unique globally new_identifiers.set_selected(refls_sel, experiment.identifier) new_id = len(all_experiments) - 1 eid[new_id] = experiment.identifier new_ids.set_selected(refls_sel, new_id) assert (new_ids < 0 ).count(True) == 0, "Not all reflections accounted for" reflections['id'] = new_ids reflections['exp_id'] = new_identifiers all_reflections.extend(reflections) else: self.logger.log("Received a list of 0 json/pickle file pairs") self.logger.log_step_time("LOAD", True) self.logger.log('Read %d experiments consisting of %d reflections' % (len(all_experiments) - starting_expts_count, len(all_reflections) - starting_refls_count)) self.logger.log("Memory usage: %d MB" % get_memory_usage()) all_reflections = self.prune_reflection_table_keys(all_reflections) # Do we have any data? from xfel.merging.application.utils.data_counter import data_counter data_counter(self.params).count(all_experiments, all_reflections) return all_experiments, all_reflections
def tst_one(i_exp,spectra,Fmerge,gpu_channels_singleton,rank,params): from simtbx.nanoBragg import utils from dxtbx.model.experiment_list import ExperimentListFactory import numpy as np print("Experiment %d" % i_exp, flush=True) sys.stdout.flush() outfile = "boop_%d.hdf5" % i_exp from LS49.adse13_187.case_data import retrieve_from_repo experiment_file = retrieve_from_repo(i_exp) # Not used # refl_file = "/global/cfs/cdirs/m3562/der/run795/top_%d.refl" % i_exp cuda = True # False # whether to use cuda omp = False ngpu_on_node = 1 # 8 # number of available GPUs mosaic_spread = 0.07 # degrees mosaic_spread_samples = params.mosaic_spread_samples # number of mosaic blocks sampling mosaicity Ncells_abc = 30, 30, 10 # medians from best stage1 ev_res = 1.5 # resolution of the downsample spectrum total_flux = 1e12 # total flux across channels beamsize_mm = 0.000886226925452758 # sqrt of beam focal area spot_scale = 500. # 5.16324 # median from best stage1 plot_spec = False # plot the downsample spectra before simulating oversample = 1 # oversample factor, 1,2, or 3 probable enough panel_list = None # integer list of panels, usefule for debugging rois_only = False # only set True if you are running openMP, or CPU-only (i.e. not for GPU) include_background = params.include_background # default is to add water background 100 mm thick verbose = 0 # leave as 0, unles debug flat = True # enfore that the camera has 0 thickness #<><><><><><><><> # XXX new code El = ExperimentListFactory.from_json_file(experiment_file, check_format=True) exper = El[0] crystal = exper.crystal detector = exper.detector if flat: from dxtbx_model_ext import SimplePxMmStrategy for panel in detector: panel.set_px_mm_strategy(SimplePxMmStrategy()) panel.set_mu(0) panel.set_thickness(0) beam = exper.beam # XXX new code spec = exper.imageset.get_spectrum(0) energies_raw, weights_raw = spec.get_energies_eV().as_numpy_array(), \ spec.get_weights().as_numpy_array() energies, weights = utils.downsample_spectrum(energies_raw, weights_raw, method=1, total_flux=total_flux, ev_width=ev_res) if flat: assert detector[0].get_thickness() == 0 if panel_list is None: panel_list = list(range(len(detector))) pids_for_rank = panel_list device_Id = 0 if gpu_channels_singleton is not None: device_Id = gpu_channels_singleton.get_deviceID() print("Rank %d will use device %d" % (rank, device_Id)) show_params = False time_panels = (rank == 0) mn_energy = (energies*weights).sum() / weights.sum() mn_wave = utils.ENERGY_CONV / mn_energy if params.use_exascale_api: BEG=time() print (gpu_channels_singleton.get_deviceID(),"device") Famp_is_uninitialized = ( gpu_channels_singleton.get_nchannels() == 0 ) # uninitialized if Famp_is_uninitialized: F_P1 = Fmerge.expand_to_p1() for x in range(1): # in this scenario, amplitudes are independent of lambda gpu_channels_singleton.structure_factors_to_GPU_direct( x, F_P1.indices(), F_P1.data()) assert gpu_channels_singleton.get_nchannels() == 1 JF16M_numpy_array, TIME_BG, TIME_BRAGG, _ = multipanel_sim( CRYSTAL=crystal, DETECTOR=detector, BEAM=beam, Famp = gpu_channels_singleton, energies=list(energies), fluxes=list(weights), background_wavelengths=[mn_wave], background_wavelength_weights=[1], background_total_flux=total_flux,background_sample_thick_mm=0.5, cuda=True, oversample=oversample, Ncells_abc=Ncells_abc, mos_dom=mosaic_spread_samples, mos_spread=mosaic_spread, mosaic_method=params.mosaic_method, beamsize_mm=beamsize_mm,show_params=show_params, time_panels=time_panels, verbose=verbose, spot_scale_override=spot_scale, include_background=include_background, mask_file=params.mask_file) TIME_EXA = time()-BEG print ("Exascale time",TIME_EXA) if params.write_experimental_data: data = exper.imageset.get_raw_data(0) tsave = time() img_sh = JF16M_numpy_array.shape assert img_sh == (256,254,254) num_output_images = 1 + int(params.write_experimental_data) print("Saving exascale output data of shape", img_sh) beam_dict = beam.to_dict() det_dict = detector.to_dict() try: beam_dict.pop("spectrum_energies") beam_dict.pop("spectrum_weights") except Exception: pass # XXX no longer have two separate files if params.write_output: with utils.H5AttributeGeomWriter("exap_%d.hdf5"%i_exp, image_shape=img_sh, num_images=num_output_images, detector=det_dict, beam=beam_dict, detector_and_beam_are_dicts=True) as writer: writer.add_image(JF16M_numpy_array) if params.write_experimental_data: data = [data[pid].as_numpy_array() for pid in panel_list] writer.add_image(data) tsave = time() - tsave print("Saved output to file %s. Saving took %.4f sec" % ("exap_%d.hdf5"%i_exp, tsave, )) BEG2 = time() #optional background TIME_BG2 = time() backgrounds = {pid: None for pid in panel_list} if include_background: backgrounds = {pid: utils.sim_background( # default is for water detector, beam, wavelengths=[mn_wave], wavelength_weights=[1], total_flux=total_flux, Fbg_vs_stol=water, pidx=pid, beam_size_mm=beamsize_mm, sample_thick_mm=0.5) for pid in pids_for_rank} TIME_BG2 = time()-TIME_BG2 TIME_BRAGG2 = time() pid_and_pdata = utils.flexBeam_sim_colors( CRYSTAL=crystal, DETECTOR=detector, BEAM=beam, energies=list(energies), fluxes=list(weights), Famp=Fmerge, pids=pids_for_rank, cuda=cuda, device_Id=device_Id, oversample=oversample, Ncells_abc=Ncells_abc, verbose=verbose, time_panels=time_panels, show_params=show_params, spot_scale_override=spot_scale, mos_dom=mosaic_spread_samples, mos_spread=mosaic_spread, beamsize_mm=beamsize_mm, background_raw_pixels=backgrounds, include_noise=False, rois_perpanel=None) TIME_BRAGG2 = time()-TIME_BRAGG2 pid_and_pdata = sorted(pid_and_pdata, key=lambda x: x[0]) _, pdata = zip(*pid_and_pdata) TIME_VINTAGE = time()-BEG2 print("\n<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>") print("\tBreakdown:") if params.use_exascale_api: print("\t\tExascale: time for bkgrd sim: %.4fs; Bragg sim: %.4fs; total: %.4fs" % (TIME_BG, TIME_BRAGG, TIME_EXA)) print("\t\tVintage: time for bkgrd sim: %.4fs; Bragg sim: %.4fs; total: %.4fs" % (TIME_BG2, TIME_BRAGG2, TIME_VINTAGE)) print("<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>\n") if params.test_pixel_congruency and params.use_exascale_api: abs_diff = np.abs(np.array(pdata) - JF16M_numpy_array).max() assert np.allclose(pdata, JF16M_numpy_array), "max per-pixel difference: %f photons"%abs_diff print("pixel congruency: OK!") # pdata is a list of 256 2D numpy arrays, now. if len(panel_list) != len(detector): print("Cant save partial detector image, exiting..") exit() #from dxtbx.model import Detector #new_det = Detector() #for pid in panel_list: # new_det.add_panel(detector[pid]) #detector = new_det if params.write_experimental_data: data = exper.imageset.get_raw_data(0) tsave = time() pdata = np.array(pdata) # now pdata is a numpy array of shape 256,254,254 img_sh = pdata.shape num_output_images = 3 + int(params.write_experimental_data) print("BOOPZ: Rank=%d ; i_exp=%d, RAM usage=%f" % (rank, i_exp,get_memory_usage()/1e6 )) beam_dict = beam.to_dict() det_dict = detector.to_dict() try: beam_dict.pop("spectrum_energies") beam_dict.pop("spectrum_weights") except Exception: pass if params.write_output: print("Saving output data of shape", img_sh) with utils.H5AttributeGeomWriter(outfile, image_shape=img_sh, num_images=num_output_images, detector=det_dict, beam=beam_dict, detector_and_beam_are_dicts=True) as writer: writer.add_image(JF16M_numpy_array/pdata) writer.add_image(JF16M_numpy_array) writer.add_image(pdata) if params.write_experimental_data: data = [data[pid].as_numpy_array() for pid in panel_list] writer.add_image(data) tsave = time() - tsave print("Saved output to file %s. Saving took %.4f sec" % (outfile, tsave, ))
def run(self, input_experiments, input_reflections): from collections import OrderedDict if self.mpi_helper.rank == 0: print("Starting cosym worker") #Overall = Profiler("Cosym total time") # Evenly distribute all experiments from mpi_helper ranks reports = self.mpi_helper.comm.gather( (len(input_experiments)), root=0) # report from all ranks on experiment count if self.mpi_helper.rank == 0: from xfel.merging.application.modify.token_passing_left_right import construct_src_to_dst_plan plan = construct_src_to_dst_plan( flex.int(reports), self.params.modify.cosym.tranch_size, self.mpi_helper.comm) else: plan = 0 plan = self.mpi_helper.comm.bcast(plan, root=0) dst_offset = 1 if self.mpi_helper.size > 1 else 0 # decision whether to reserve rank 0 for parallel anchor determination # FIXME XXX probably need to look at plan size to decide dst_offset or not from xfel.merging.application.modify.token_passing_left_right import apply_all_to_all tokens = apply_all_to_all(plan=plan, dst_offset=dst_offset, value=(input_experiments, input_reflections), comm=self.mpi_helper.comm) if self.params.modify.cosym.anchor: if self.mpi_helper.rank == 0: MIN_ANCHOR = 20 from xfel.merging.application.modify.token_passing_left_right import construct_anchor_src_to_dst_plan anchor_plan = construct_anchor_src_to_dst_plan( MIN_ANCHOR, flex.int(reports), self.params.modify.cosym.tranch_size, self.mpi_helper.comm) else: anchor_plan = 0 anchor_plan = self.mpi_helper.comm.bcast(anchor_plan, root=0) self.logger.log_step_time("COSYM") if self.params.modify.cosym.plot.interactive: self.params.modify.cosym.plot.filename = None has_tokens = len(tokens) > 0 all_has_tokens = self.mpi_helper.comm.allgather(has_tokens) ranks_with_tokens = [ i for (i, val) in enumerate(all_has_tokens) if val ] ranks_to_plot = ranks_with_tokens[:self.params.modify.cosym.plot.n_max] do_plot = (self.params.modify.cosym.plot.do_plot and self.mpi_helper.rank in ranks_to_plot) if len( tokens ) > 0: # Only select ranks that have been assigned tranch data, for mutual coset determination # because cosym has a problem with hashed identifiers, use simple experiment identifiers sampling_experiments_for_cosym = ExperimentList() sampling_reflections_for_cosym = [ ] # is a list of flex.reflection_table COSYM = self.task_c(self.params, self.mpi_helper, self.logger, tokens, sampling_experiments_for_cosym, sampling_reflections_for_cosym, communicator_size=self.mpi_helper.size, do_plot=do_plot) self.uuid_cache = COSYM.uuid_cache # reformed uuid list after n_refls filter rank_N_refl = flex.double([r.size() for r in COSYM.reflections]) message = """Task 1. Prepare the data for cosym change_of_basis_ops_to_minimum_cell eliminate_sys_absent transform models into Miller arrays, putting data in primitive triclinic reduced cell There are %d experiments with %d reflections, averaging %.1f reflections/experiment""" % ( len(COSYM.experiments), flex.sum(rank_N_refl), flex.mean(rank_N_refl)) self.logger.log(message) if self.mpi_helper.rank == 1: print(message) #; P = Timer("COSYM.run") COSYM.run() #if self.mpi_helper.rank == 1: del P keyval = [("experiment", []), ("reindex_op", []), ("coset", [])] raw = OrderedDict(keyval) if self.mpi_helper.rank == 0: print("Rank", self.mpi_helper.rank, "experiments:", len(sampling_experiments_for_cosym)) for sidx in range(len(self.uuid_cache)): raw["experiment"].append(self.uuid_cache[sidx]) sidx_plus = sidx try: minimum_to_input = COSYM.cb_op_to_minimum[ sidx_plus].inverse() except Exception as e: print("raising", e, sidx_plus, len(COSYM.cb_op_to_minimum)) raise e reindex_op = minimum_to_input * \ sgtbx.change_of_basis_op(COSYM.cosym_analysis.reindexing_ops[sidx_plus]) * \ COSYM.cb_op_to_minimum[sidx_plus] # Keep this block even though not currently used; need for future assertions: LG = COSYM.cosym_analysis.target._lattice_group LGINP = LG.change_basis( COSYM.cosym_analysis.cb_op_inp_min.inverse()).change_basis( minimum_to_input) SG = COSYM.cosym_analysis.input_space_group SGINP = SG.change_basis( COSYM.cosym_analysis.cb_op_inp_min.inverse()).change_basis( minimum_to_input) CO = sgtbx.cosets.left_decomposition(LGINP, SGINP) partitions = CO.partitions this_reindex_op = reindex_op.as_hkl() this_coset = None for p_no, partition in enumerate(partitions): partition_ops = [ change_of_basis_op(ip).as_hkl() for ip in partition ] if this_reindex_op in partition_ops: this_coset = p_no break assert this_coset is not None raw["coset"].append(this_coset) raw["reindex_op"].append(this_reindex_op) keys = list(raw.keys()) from pandas import DataFrame as df data = df(raw) # major assumption is that all the coset decompositions "CO" are the same. NOT sure if a test is needed. reports = self.mpi_helper.comm.gather((data, CO), root=0) else: reports = self.mpi_helper.comm.gather(None, root=0) if self.mpi_helper.rank == 0: # report back to rank==0 and reconcile all coset assignments while None in reports: reports.pop(reports.index(None)) # global CO global_coset_decomposition = reports[0][ 1] # again, assuming here they are all the same XXX else: global_coset_decomposition = 0 global_coset_decomposition = self.mpi_helper.comm.bcast( global_coset_decomposition, root=0) partitions = global_coset_decomposition.partitions self.mpi_helper.comm.barrier() # end of distributed embedding if self.params.modify.cosym.anchor: anchor_tokens = apply_all_to_all(plan=anchor_plan, dst_offset=0, value=(input_experiments, input_reflections), comm=self.mpi_helper.comm) if self.mpi_helper.rank == 0: from xfel.merging.application.modify.df_cosym import reconcile_cosym_reports REC = reconcile_cosym_reports(reports) results = REC.composite_tranch_merge(voting_method="consensus") # at this point we have the opportunity to reconcile the results with an anchor # recycle the data structures for anchor determination if self.params.modify.cosym.anchor: sampling_experiments_for_cosym, sampling_reflections_for_cosym = self.task_a( self.params) ANCHOR = self.task_c( self.params, self.mpi_helper, self.logger, anchor_tokens, sampling_experiments_for_cosym, sampling_reflections_for_cosym, uuid_starting=["anchor structure"], communicator_size=1) # only run on the rank==0 tranch. self.uuid_cache = ANCHOR.uuid_cache # reformed uuid list after n_refls filter #P = Timer("ANCHOR.run") ANCHOR.run( ) # Future redesign XXX FIXME do this in rank 0 in parallel with distributed composite tranches #del P keyval = [("experiment", []), ("coset", [])] raw = OrderedDict(keyval) print("Anchor", "experiments:", len(sampling_experiments_for_cosym)) anchor_op = ANCHOR.cb_op_to_minimum[0].inverse() * \ sgtbx.change_of_basis_op(ANCHOR.cosym_analysis.reindexing_ops[0]) * \ ANCHOR.cb_op_to_minimum[0] anchor_coset = None for p_no, partition in enumerate(partitions): partition_ops = [ change_of_basis_op(ip).as_hkl() for ip in partition ] if anchor_op.as_hkl() in partition_ops: anchor_coset = p_no break assert anchor_coset is not None print("The consensus for the anchor is", anchor_op.as_hkl(), " anchor coset", anchor_coset) raw["experiment"].append("anchor structure") raw["coset"].append(anchor_coset) for sidx in range(1, len(self.uuid_cache)): raw["experiment"].append(self.uuid_cache[sidx]) sidx_plus = sidx minimum_to_input = ANCHOR.cb_op_to_minimum[ sidx_plus].inverse() reindex_op = minimum_to_input * \ sgtbx.change_of_basis_op(ANCHOR.cosym_analysis.reindexing_ops[sidx_plus]) * \ ANCHOR.cb_op_to_minimum[sidx_plus] this_reindex_op = reindex_op.as_hkl() this_coset = None for p_no, partition in enumerate(partitions): partition_ops = [ change_of_basis_op(ip).as_hkl() for ip in partition ] if this_reindex_op in partition_ops: this_coset = p_no break assert this_coset is not None raw["coset"].append(this_coset) from pandas import DataFrame as df anchor_data = df(raw) REC.reconcile_with_anchor(results, anchor_data, anchor_op) # no need for return value; results dataframe is modified in place if self.params.modify.cosym.dataframe: import os results.to_pickle( path=os.path.join(self.params.output.output_dir, self.params.modify.cosym.dataframe)) transmitted = results else: transmitted = 0 self.mpi_helper.comm.barrier() transmitted = self.mpi_helper.comm.bcast(transmitted, root=0) # "transmitted" holds the global coset assignments #subselect expt and refl on the successful coset assignments # output: experiments-->result_experiments_for_cosym; reflections-->reflections (modified in place) result_experiments_for_cosym = ExperimentList() good_refls = flex.bool(len(input_reflections), False) good_expt_id = list(transmitted["experiment"]) good_coset = list( transmitted["coset"] ) # would like to understand how to use pandas rather than Python list for iexpt in range(len(input_experiments)): iexpt_id = input_experiments[iexpt].identifier keepit = iexpt_id in good_expt_id if keepit: this_coset = good_coset[good_expt_id.index(iexpt_id)] this_cb_op = change_of_basis_op( global_coset_decomposition.partitions[this_coset][0]) accepted_expt = input_experiments[iexpt] if this_coset > 0: accepted_expt.crystal = MosaicCrystalSauter2014( accepted_expt.crystal.change_basis(this_cb_op)) # need to use wrapper because of cctbx/dxtbx#5 result_experiments_for_cosym.append(accepted_expt) good_refls |= input_reflections["exp_id"] == iexpt_id selected_reflections = input_reflections.select( good_refls) # XXX is this in place (double check) self.mpi_helper.comm.barrier() # still have to reindex the reflection table, but try to do it efficiently from xfel.merging.application.modify.reindex_cosym import reindex_refl_by_coset if (len(result_experiments_for_cosym) > 0): reindex_refl_by_coset( refl=selected_reflections, data=transmitted, symms=[ E.crystal.get_crystal_symmetry() for E in result_experiments_for_cosym ], uuids=[E.identifier for E in result_experiments_for_cosym], co=global_coset_decomposition, anomalous_flag=self.params.merging.merge_anomalous == False, verbose=False) # this should have re-indexed the refls in place, no need for return value self.mpi_helper.comm.barrier() # Note: this handles the simple case of lattice ambiguity (P63 in P/mmm lattice group) # in this use case we assume all inputs and outputs are in P63. # more complex use cases would have to reset the space group in the crystal, and recalculate # the ASU "miller_indicies" in the reflections table. self.logger.log_step_time("COSYM", True) self.logger.log("Memory usage: %d MB" % get_memory_usage()) from xfel.merging.application.utils.data_counter import data_counter data_counter(self.params).count(result_experiments_for_cosym, selected_reflections) return result_experiments_for_cosym, selected_reflections
def run(self, experiments, reflections): self.logger.log_step_time("POLARIZATION_CORRECTION") result = flex.reflection_table() for experiment in experiments: refls = reflections.select( reflections['exp_id'] == experiment.identifier) if len(refls) == 0: continue beam = experiment.beam # Remove the need for pixel size within cxi.merge. Allows multipanel detector with dissimilar panels. # Relies on new frame extractor code called by dials.stills_process that writes s0, s1 and polarization normal # vectors all to the integration pickle. Future path (IE THIS CODE): use dials json and reflection file. s0_vec = matrix.col(beam.get_s0()).normalize() s0_polar_norm = beam.get_polarization_normal() s1_vec = refls['s1'] Ns1 = len(s1_vec) # project the s1_vector onto the plane normal to s0. Get result by subtracting the # projection of s1 onto s0, which is (s1.dot.s0_norm)s0_norm s0_norm = flex.vec3_double(Ns1, s0_vec) s1_proj = (s1_vec.dot(s0_norm)) * s0_norm s1_in_normal_plane = s1_vec - s1_proj # Now want the polar angle between the projected s1 and the polarization normal s0_polar_norms = flex.vec3_double(Ns1, s0_polar_norm) dotprod = (s1_in_normal_plane.dot(s0_polar_norms)) costheta = dotprod / (s1_in_normal_plane.norms()) theta = flex.acos(costheta) cos_two_polar_angle = flex.cos(2.0 * theta) # gives same as old answer to ~1% but not exact. Not sure why, should not matter. tt_vec = experiment.crystal.get_unit_cell().two_theta( miller_indices=refls['miller_index'], wavelength=beam.get_wavelength()) cos_tt_vec = flex.cos(tt_vec) sin_tt_vec = flex.sin(tt_vec) cos_sq_tt_vec = cos_tt_vec * cos_tt_vec sin_sq_tt_vec = sin_tt_vec * sin_tt_vec P_nought_vec = 0.5 * (1. + cos_sq_tt_vec) F_prime = -1.0 # Hard-coded value defines the incident polarization axis P_prime = 0.5 * F_prime * cos_two_polar_angle * sin_sq_tt_vec # added as a diagnostic #prange=P_nought_vec - P_prime #other_F_prime = 1.0 #otherP_prime = 0.5 * other_F_prime * cos_two_polar_angle * sin_sq_tt_vec #otherprange=P_nought_vec - otherP_prime #diff2 = flex.abs(prange - otherprange) #print >> out, "mean diff is",flex.mean(diff2), "range",flex.min(diff2), flex.max(diff2) # done correction = 1 / (P_nought_vec - P_prime) refls['intensity.sum.value'] = refls[ 'intensity.sum.value'] * correction refls['intensity.sum.variance'] = refls[ 'intensity.sum.variance'] * correction**2 # propagated error # This corrects observations for polarization assuming 100% polarization on # one axis (thus the F_prime = -1.0 rather than the perpendicular axis, 1.0) # Polarization model as described by Kahn, Fourme, Gadet, Janin, Dumas & Andre # (1982) J. Appl. Cryst. 15, 330-337, equations 13 - 15. result.extend(refls) if len(reflections) > 0: self.logger.log( "Applied polarization correction. Mean intensity changed from %.2f to %.2f" % (flex.mean(reflections['intensity.sum.value']), flex.mean(result['intensity.sum.value']))) self.logger.log_step_time("POLARIZATION_CORRECTION", True) self.logger.log("Memory usage: %d MB" % get_memory_usage()) # Remove 's1' column from the reflection table from xfel.merging.application.reflection_table_utils import reflection_table_utils reflections = reflection_table_utils.prune_reflection_table_keys( reflections=result, keys_to_delete=['s1']) self.logger.log("Pruned reflection table") self.logger.log("Memory usage: %d MB" % get_memory_usage()) return experiments, reflections
def run(self, all_experiments, all_reflections): """ Load all the data using MPI """ from dxtbx.model.experiment_list import ExperimentList from dials.array_family import flex # Both must be none or not none test = [all_experiments is None, all_reflections is None].count(True) assert test in [0, 2] if test == 2: all_experiments = ExperimentList() all_reflections = flex.reflection_table() starting_expts_count = starting_refls_count = 0 else: starting_expts_count = len(all_experiments) starting_refls_count = len(all_reflections) self.logger.log( "Initial number of experiments: %d; Initial number of reflections: %d" % (starting_expts_count, starting_refls_count)) # Generate and send a list of file paths to each worker if self.mpi_helper.rank == 0: file_list = self.get_list() self.logger.log( "Built an input list of %d json/pickle file pairs" % (len(file_list))) self.params.input.path = None # Rank 0 has already parsed the input parameters # optionally write a file list mapping to disk, useful in post processing if save_experiments_and_reflections=True file_id_from_names = None if self.params.output.expanded_bookkeeping: apath = lambda x: os.path.abspath(x) file_names_from_id = { i_f: tuple(map(apath, exp_ref_pair)) for i_f, exp_ref_pair in enumerate(file_list) } with open( os.path.join(self.params.output.output_dir, "file_list_map.json"), "w") as o: json.dump(file_names_from_id, o) file_id_from_names = { tuple(map(apath, exp_ref_pair)): i_f for i_f, exp_ref_pair in enumerate(file_list) } per_rank_file_list = file_load_calculator(self.params, file_list, self.logger).\ calculate_file_load(available_rank_count = self.mpi_helper.size) self.logger.log( 'Transmitting a list of %d lists of json/pickle file pairs' % (len(per_rank_file_list))) transmitted = per_rank_file_list, file_id_from_names else: transmitted = None self.logger.log_step_time("BROADCAST_FILE_LIST") new_file_list, file_names_mapping = self.mpi_helper.comm.bcast( transmitted, root=0) new_file_list = new_file_list[ self.mpi_helper. rank] if self.mpi_helper.rank < len(new_file_list) else None self.logger.log_step_time("BROADCAST_FILE_LIST", True) # Load the data self.logger.log_step_time("LOAD") if new_file_list is not None: self.logger.log("Received a list of %d json/pickle file pairs" % len(new_file_list)) for experiments_filename, reflections_filename in new_file_list: self.logger.log("Reading %s %s" % (experiments_filename, reflections_filename)) experiments = ExperimentListFactory.from_json_file( experiments_filename, check_format=self.params.input.read_image_headers) reflections = flex.reflection_table.from_file( reflections_filename) if self.params.output.expanded_bookkeeping: # NOTE: these are un-prunable reflections["input_refl_index"] = flex.int( list(range(len(reflections)))) reflections["orig_exp_id"] = reflections['id'] assert file_names_mapping is not None exp_ref_pair = os.path.abspath( experiments_filename), os.path.abspath( reflections_filename) this_refl_fileMappings = [ file_names_mapping[exp_ref_pair] ] * len(reflections) reflections["file_list_mapping"] = flex.int( this_refl_fileMappings) self.logger.log("Data read, prepping") if 'intensity.sum.value' in reflections: reflections[ 'intensity.sum.value.unmodified'] = reflections[ 'intensity.sum.value'] * 1 if 'intensity.sum.variance' in reflections: reflections[ 'intensity.sum.variance.unmodified'] = reflections[ 'intensity.sum.variance'] * 1 new_ids = flex.int(len(reflections), -1) new_identifiers = flex.std_string(len(reflections)) eid = reflections.experiment_identifiers() for k in eid.keys(): del eid[k] if self.params.output.expanded_bookkeeping: preGen_experiment_identifiers(experiments, experiments_filename) for experiment_id, experiment in enumerate(experiments): # select reflections of the current experiment refls_sel = reflections['id'] == experiment_id if refls_sel.count(True) == 0: continue if experiment.identifier is None or len( experiment.identifier) == 0: experiment.identifier = create_experiment_identifier( experiment, experiments_filename, experiment_id) if not self.params.input.keep_imagesets: experiment.imageset = None all_experiments.append(experiment) # Reflection experiment 'id' is unique within this rank; 'exp_id' (i.e. experiment identifier) is unique globally new_identifiers.set_selected(refls_sel, experiment.identifier) new_id = len(all_experiments) - 1 eid[new_id] = experiment.identifier new_ids.set_selected(refls_sel, new_id) assert (new_ids < 0 ).count(True) == 0, "Not all reflections accounted for" reflections['id'] = new_ids reflections['exp_id'] = new_identifiers all_reflections.extend(reflections) else: self.logger.log("Received a list of 0 json/pickle file pairs") self.logger.log_step_time("LOAD", True) self.logger.log('Read %d experiments consisting of %d reflections' % (len(all_experiments) - starting_expts_count, len(all_reflections) - starting_refls_count)) self.logger.log("Memory usage: %d MB" % get_memory_usage()) all_reflections = self.prune_reflection_table_keys(all_reflections) # Do we have any data? from xfel.merging.application.utils.data_counter import data_counter data_counter(self.params).count(all_experiments, all_reflections) return all_experiments, all_reflections
def run(self, all_experiments, all_reflections): """ Load all the data using MPI """ from dxtbx.model.experiment_list import ExperimentList from dials.array_family import flex # Both must be none or not none test = [all_experiments is None, all_reflections is None].count(True) assert test in [0, 2] if test == 2: all_experiments = ExperimentList() all_reflections = flex.reflection_table() starting_expts_count = starting_refls_count = 0 else: starting_expts_count = len(all_experiments) starting_refls_count = len(all_reflections) self.logger.log( "Initial number of experiments: %d; Initial number of reflections: %d" % (starting_expts_count, starting_refls_count)) # Generate and send a list of file paths to each worker if self.mpi_helper.rank == 0: file_list = self.get_list() self.logger.log( "Built an input list of %d json/pickle file pairs" % (len(file_list))) self.params.input.path = None # Rank 0 has already parsed the input parameters per_rank_file_list = file_load_calculator(self.params, file_list, self.logger).\ calculate_file_load(available_rank_count = self.mpi_helper.size) self.logger.log( 'Transmitting a list of %d lists of json/pickle file pairs' % (len(per_rank_file_list))) transmitted = per_rank_file_list else: transmitted = None self.logger.log_step_time("BROADCAST_FILE_LIST") transmitted = self.mpi_helper.comm.bcast(transmitted, root=0) new_file_list = transmitted[ self.mpi_helper. rank] if self.mpi_helper.rank < len(transmitted) else None self.logger.log_step_time("BROADCAST_FILE_LIST", True) # Load the data self.logger.log_step_time("LOAD") if new_file_list is not None: self.logger.log("Received a list of %d json/pickle file pairs" % len(new_file_list)) for experiments_filename, reflections_filename in new_file_list: experiments = ExperimentListFactory.from_json_file( experiments_filename, check_format=False) reflections = flex.reflection_table.from_file( reflections_filename) for experiment_id, experiment in enumerate(experiments): if experiment.identifier is None or len( experiment.identifier) == 0: experiment.identifier = create_experiment_identifier( experiment, experiments_filename, experiment_id) all_experiments.append(experiment) #experiment.identifier = "%d"%(len(all_experiments) - 1) # select reflections of the current experiment refls = reflections.select( reflections['id'] == experiment_id) # Reflection experiment 'id' is supposed to be unique within this rank; 'exp_id' (i.e. experiment identifier) is supposed to be unique globally #refls['id'] = flex.size_t(len(refls), len(all_experiments)-1) refls['exp_id'] = flex.std_string(len(refls), experiment.identifier) all_reflections.extend(refls) else: self.logger.log("Received a list of 0 json/pickle file pairs") self.logger.log_step_time("LOAD", True) self.logger.log('Read %d experiments consisting of %d reflections' % (len(all_experiments) - starting_expts_count, len(all_reflections) - starting_refls_count)) self.logger.log("Memory usage: %d MB" % get_memory_usage()) from xfel.merging.application.reflection_table_utils import reflection_table_utils all_reflections = reflection_table_utils.prune_reflection_table_keys( reflections=all_reflections, keys_to_keep=[ 'intensity.sum.value', 'intensity.sum.variance', 'miller_index', 'miller_index_asymmetric', 'exp_id', 's1' ]) self.logger.log("Pruned reflection table") self.logger.log("Memory usage: %d MB" % get_memory_usage()) # Do we have any data? from xfel.merging.application.utils.data_counter import data_counter data_counter(self.params).count(all_experiments, all_reflections) return all_experiments, all_reflections
def run(self, experiments, reflections): assert self.mpi_helper.size not in [2,3,4], "Please run modify_cosym on " \ "1 or >= 5 MPI ranks." self.logger.log_step_time("COSYM") all_sampling_experiments = experiments all_sampling_reflections = reflections # because cosym has a problem with hashed identifiers, use simple experiment identifiers from dxtbx.model.experiment_list import ExperimentList sampling_experiments_for_cosym = ExperimentList() sampling_reflections_for_cosym = [ ] # is a list of flex.reflection_table def task_a(): # add an anchor if self.params.modify.cosym.anchor: from xfel.merging.application.model.crystal_model import crystal_model XM = crystal_model(params=self.params, purpose="cosym") model_intensities = XM.run([], []) from dxtbx.model import Experiment, Crystal from scitbx.matrix import sqr O = sqr(model_intensities.unit_cell().orthogonalization_matrix( )).transpose().elems real_a = (O[0], O[1], O[2]) real_b = (O[3], O[4], O[5]) real_c = (O[6], O[7], O[8]) nc = Crystal(real_a, real_b, real_c, model_intensities.space_group()) sampling_experiments_for_cosym.append( Experiment(crystal=nc) ) # prepends the reference model to the cosym E-list from dials.array_family import flex exp_reflections = flex.reflection_table() exp_reflections[ 'intensity.sum.value'] = model_intensities.data() exp_reflections['intensity.sum.variance'] = flex.pow( model_intensities.sigmas(), 2) exp_reflections['miller_index'] = model_intensities.indices() exp_reflections[ 'miller_index_asymmetric'] = model_intensities.indices() exp_reflections['flags'] = flex.size_t( model_intensities.size(), flex.reflection_table.flags.integrated_sum) # prepare individual reflection tables for each experiment simple_experiment_id = len(sampling_experiments_for_cosym) - 1 #experiment.identifier = "%d"%simple_experiment_id sampling_experiments_for_cosym[ -1].identifier = "%d" % simple_experiment_id # experiment identifier must be a string according to *.h file # the identifier is changed on the _for_cosym Experiment list, not the master experiments for through analysis exp_reflections['id'] = flex.int(len(exp_reflections), simple_experiment_id) # register the integer id as a new column in the per-experiment reflection table exp_reflections.experiment_identifiers( )[simple_experiment_id] = sampling_experiments_for_cosym[ -1].identifier #apparently the reflection table holds a map from integer id (reflection table) to string id (experiment) sampling_reflections_for_cosym.append(exp_reflections) #if self.mpi_helper.rank == 0: # task_a() # no anchor for initial pass def task_1(uuid_starting=[], mpi_helper_size=1, do_plot=False): self.uuid_cache = uuid_starting if mpi_helper_size == 1: # simple case, one rank for experiment in all_sampling_experiments: sampling_experiments_for_cosym.append(experiment) self.uuid_cache.append(experiment.identifier) exp_reflections = all_sampling_reflections.select( all_sampling_reflections['exp_id'] == experiment.identifier) # prepare individual reflection tables for each experiment simple_experiment_id = len( sampling_experiments_for_cosym) - 1 #experiment.identifier = "%d"%simple_experiment_id sampling_experiments_for_cosym[ -1].identifier = "%d" % simple_experiment_id # experiment identifier must be a string according to *.h file # the identifier is changed on the _for_cosym Experiment list, not the master experiments for through analysis exp_reflections['id'] = flex.int(len(exp_reflections), simple_experiment_id) # register the integer id as a new column in the per-experiment reflection table exp_reflections.experiment_identifiers( )[simple_experiment_id] = sampling_experiments_for_cosym[ -1].identifier #apparently the reflection table holds a map from integer id (reflection table) to string id (experiment) sampling_reflections_for_cosym.append(exp_reflections) else: # complex case, overlap tranches for mutual coset determination self.mpi_helper.MPI.COMM_WORLD.barrier() from xfel.merging.application.modify.token_passing_left_right import token_passing_left_right values = token_passing_left_right((experiments, reflections)) for tranch_experiments, tranch_reflections in values: for experiment in tranch_experiments: sampling_experiments_for_cosym.append(experiment) self.uuid_cache.append(experiment.identifier) exp_reflections = tranch_reflections.select( tranch_reflections['exp_id'] == experiment.identifier) # prepare individual reflection tables for each experiment simple_experiment_id = len( sampling_experiments_for_cosym) - 1 #experiment.identifier = "%d"%simple_experiment_id sampling_experiments_for_cosym[ -1].identifier = "%d" % simple_experiment_id # experiment identifier must be a string according to *.h file # the identifier is changed on the _for_cosym Experiment list, not the master experiments for through analysis exp_reflections['id'] = flex.int( len(exp_reflections), simple_experiment_id) # register the integer id as a new column in the per-experiment reflection table exp_reflections.experiment_identifiers( )[simple_experiment_id] = sampling_experiments_for_cosym[ -1].identifier #apparently the reflection table holds a map from integer id (reflection table) to string id (experiment) sampling_reflections_for_cosym.append(exp_reflections) from dials.command_line import cosym as cosym_module cosym_module.logger = self.logger i_plot = self.mpi_helper.rank from xfel.merging.application.modify.aux_cosym import dials_cl_cosym_subclass as dials_cl_cosym_wrapper COSYM = dials_cl_cosym_wrapper( sampling_experiments_for_cosym, sampling_reflections_for_cosym, self.uuid_cache, params=self.params.modify.cosym, output_dir=self.params.output.output_dir, do_plot=do_plot, i_plot=i_plot) return COSYM if self.params.modify.cosym.plot.interactive: self.params.modify.cosym.plot.filename = None do_plot = (self.params.modify.cosym.plot.do_plot and self.mpi_helper.rank < self.params.modify.cosym.plot.n_max) COSYM = task_1(mpi_helper_size=self.mpi_helper.size, do_plot=do_plot) self.uuid_cache = COSYM.uuid_cache # reformed uuid list after n_refls filter import dials.algorithms.symmetry.cosym.target from xfel.merging.application.modify.aux_cosym import TargetWithFastRij dials.algorithms.symmetry.cosym.target.Target = TargetWithFastRij rank_N_refl = flex.double([r.size() for r in COSYM.reflections]) message = """Task 1. Prepare the data for cosym change_of_basis_ops_to_minimum_cell eliminate_sys_absent transform models into Miller arrays, putting data in primitive triclinic reduced cell There are %d experiments with %d reflections, averaging %.1f reflections/experiment""" % ( len(COSYM.experiments), flex.sum(rank_N_refl), flex.mean(rank_N_refl)) self.logger.log(message) COSYM.run() from collections import OrderedDict #assert len(sampling_experiments_for_cosym) + 1 anchor if present == len(COSYM._experiments) keyval = [("experiment", []), ("reindex_op", []), ("coset", [])] raw = OrderedDict(keyval) print("Rank", self.mpi_helper.rank, "experiments:", len(sampling_experiments_for_cosym)) for sidx in range(len(self.uuid_cache)): raw["experiment"].append(self.uuid_cache[sidx]) sidx_plus = sidx minimum_to_input = COSYM.cb_op_to_minimum[sidx_plus].inverse() reindex_op = minimum_to_input * \ sgtbx.change_of_basis_op(COSYM.cosym_analysis.reindexing_ops[sidx_plus]) * \ COSYM.cb_op_to_minimum[sidx_plus] # Keep this block even though not currently used; need for future assertions: LG = COSYM.cosym_analysis.target._lattice_group LGINP = LG.change_basis( COSYM.cosym_analysis.cb_op_inp_min.inverse()).change_basis( minimum_to_input) SG = COSYM.cosym_analysis.input_space_group SGINP = SG.change_basis( COSYM.cosym_analysis.cb_op_inp_min.inverse()).change_basis( minimum_to_input) CO = sgtbx.cosets.left_decomposition(LGINP, SGINP) partitions = CO.partitions this_reindex_op = reindex_op.as_hkl() this_coset = None for p_no, partition in enumerate(partitions): partition_ops = [ change_of_basis_op(ip).as_hkl() for ip in partition ] if this_reindex_op in partition_ops: this_coset = p_no break assert this_coset is not None raw["coset"].append(this_coset) raw["reindex_op"].append(this_reindex_op) keys = list(raw.keys()) from pandas import DataFrame as df data = df(raw) # major assumption is that all the coset decompositions "CO" are the same. NOT sure if a test is needed. # report back to rank==0 and reconcile all coset assignments reports = self.mpi_helper.comm.gather((data, CO), root=0) if self.mpi_helper.rank == 0: from xfel.merging.application.modify.df_cosym import reconcile_cosym_reports REC = reconcile_cosym_reports(reports) results = REC.simple_merge(voting_method="consensus") # at this point we have the opportunity to reconcile the results with an anchor # recycle the data structures for anchor determination if self.params.modify.cosym.anchor: sampling_experiments_for_cosym = ExperimentList() sampling_reflections_for_cosym = [] print("ANCHOR determination") task_a() ANCHOR = task_1( uuid_starting=["anchor structure"], mpi_helper_size=1) # only run on the rank==0 tranch. self.uuid_cache = ANCHOR.uuid_cache # reformed uuid list after n_refls filter ANCHOR.run() keyval = [("experiment", []), ("coset", [])] raw = OrderedDict(keyval) print("Anchor", "experiments:", len(sampling_experiments_for_cosym)) anchor_op = ANCHOR.cb_op_to_minimum[0].inverse() * \ sgtbx.change_of_basis_op(ANCHOR.cosym_analysis.reindexing_ops[0]) * \ ANCHOR.cb_op_to_minimum[0] anchor_coset = None for p_no, partition in enumerate(partitions): partition_ops = [ change_of_basis_op(ip).as_hkl() for ip in partition ] if anchor_op.as_hkl() in partition_ops: anchor_coset = p_no break assert anchor_coset is not None print("The consensus for the anchor is", anchor_op.as_hkl(), " anchor coset", anchor_coset) raw["experiment"].append("anchor structure") raw["coset"].append(anchor_coset) for sidx in range(1, len(self.uuid_cache)): raw["experiment"].append(self.uuid_cache[sidx]) sidx_plus = sidx minimum_to_input = ANCHOR.cb_op_to_minimum[ sidx_plus].inverse() reindex_op = minimum_to_input * \ sgtbx.change_of_basis_op(ANCHOR.cosym_analysis.reindexing_ops[sidx_plus]) * \ ANCHOR.cb_op_to_minimum[sidx_plus] this_reindex_op = reindex_op.as_hkl() this_coset = None for p_no, partition in enumerate(partitions): partition_ops = [ change_of_basis_op(ip).as_hkl() for ip in partition ] if this_reindex_op in partition_ops: this_coset = p_no break assert this_coset is not None raw["coset"].append(this_coset) from pandas import DataFrame as df anchor_data = df(raw) REC.reconcile_with_anchor(results, anchor_data, anchor_op) # no need for return value; results dataframe is modified in place if self.params.modify.cosym.dataframe: import os results.to_pickle( path=os.path.join(self.params.output.output_dir, self.params.modify.cosym.dataframe)) transmitted = results else: transmitted = None self.mpi_helper.comm.barrier() transmitted = self.mpi_helper.comm.bcast(transmitted, root=0) # "transmitted" holds the global coset assignments # subselect expt and refl on the successful coset assignments # output: experiments-->result_experiments_for_cosym; reflections-->reflections (modified in place) result_experiments_for_cosym = ExperimentList() good_refls = flex.bool(len(reflections), False) good_expt_id = list(transmitted["experiment"]) good_coset = list( transmitted["coset"] ) # would like to understand how to use pandas rather than Python list for iexpt in range(len(experiments)): iexpt_id = experiments[iexpt].identifier keepit = iexpt_id in good_expt_id if keepit: this_coset = good_coset[good_expt_id.index(iexpt_id)] this_cb_op = change_of_basis_op(CO.partitions[this_coset][0]) accepted_expt = experiments[iexpt] if this_coset > 0: accepted_expt.crystal = MosaicCrystalSauter2014( accepted_expt.crystal.change_basis(this_cb_op)) # need to use wrapper because of cctbx/dxtbx#5 result_experiments_for_cosym.append(accepted_expt) good_refls |= reflections["exp_id"] == iexpt_id reflections = reflections.select(good_refls) self.mpi_helper.comm.barrier() #if self.mpi_helper.rank == 0: # import pickle # with open("refl.pickle","wb") as F: # pickle.dump(reflections, F) # pickle.dump(transmitted, F) # pickle.dump([E.crystal.get_crystal_symmetry() for E in result_experiments_for_cosym],F) # pickle.dump([E.identifier for E in result_experiments_for_cosym],F) # pickle.dump(CO, F) # still have to reindex the reflection table, but try to do it efficiently from xfel.merging.application.modify.reindex_cosym import reindex_refl_by_coset reindex_refl_by_coset( refl=reflections, data=transmitted, symms=[ E.crystal.get_crystal_symmetry() for E in result_experiments_for_cosym ], uuids=[E.identifier for E in result_experiments_for_cosym], co=CO, anomalous_flag=self.params.merging.merge_anomalous == False, verbose=False) # this should have re-indexed the refls in place, no need for return value self.mpi_helper.comm.barrier() # Note: this handles the simple case of lattice ambiguity (P63 in P/mmm lattice group) # in this use case we assume all inputs and outputs are in P63. # more complex use cases would have to reset the space group in the crystal, and recalculate # the ASU "miller_indicies" in the reflections table. self.logger.log_step_time("COSYM", True) self.logger.log("Memory usage: %d MB" % get_memory_usage()) from xfel.merging.application.utils.data_counter import data_counter data_counter(self.params).count(result_experiments_for_cosym, reflections) return result_experiments_for_cosym, reflections
def run(self, all_experiments, all_reflections): """ Load all the data using MPI """ from dxtbx.model.experiment_list import ExperimentList from dials.array_family import flex # Both must be none or not none test = [all_experiments is None, all_reflections is None].count(True) assert test in [0, 2] if test == 2: all_experiments = ExperimentList() all_reflections = flex.reflection_table() starting_expts_count = starting_refls_count = 0 else: starting_expts_count = len(all_experiments) starting_refls_count = len(all_reflections) self.logger.log( "Initial number of experiments: %d; Initial number of reflections: %d" % (starting_expts_count, starting_refls_count)) # Generate and send a list of file paths to each worker if self.mpi_helper.rank == 0: file_list = self.get_list() self.logger.log( "Built an input list of %d json/pickle file pairs" % (len(file_list))) self.params.input.path = None # Rank 0 has already parsed the input parameters per_rank_file_list = file_load_calculator(self.params, file_list, self.logger).\ calculate_file_load(available_rank_count = self.mpi_helper.size) self.logger.log( 'Transmitting a list of %d lists of json/pickle file pairs' % (len(per_rank_file_list))) transmitted = per_rank_file_list else: transmitted = None self.logger.log_step_time("BROADCAST_FILE_LIST") transmitted = self.mpi_helper.comm.bcast(transmitted, root=0) new_file_list = transmitted[ self.mpi_helper. rank] if self.mpi_helper.rank < len(transmitted) else None self.logger.log_step_time("BROADCAST_FILE_LIST", True) # Load the data self.logger.log_step_time("LOAD") if new_file_list is not None: self.logger.log("Received a list of %d json/pickle file pairs" % len(new_file_list)) for experiments_filename, reflections_filename in new_file_list: experiments = ExperimentListFactory.from_json_file( experiments_filename, check_format=False) reflections = flex.reflection_table.from_file( reflections_filename) # NOTE: had to use slicing below because it selection no longer works... reflections.sort("id") unique_refl_ids = set(reflections['id']) assert len(unique_refl_ids) == len( experiments ), "refl table and experiment list should contain data on same experiment " # TODO: decide if this is true assert min( reflections["id"] ) >= 0, "No more -1 in the id column, ideally it should be the numerical index of experiment, but beware that this is not enforced anywhere in the upstream code base" if 'intensity.sum.value' in reflections: reflections[ 'intensity.sum.value.unmodified'] = reflections[ 'intensity.sum.value'] * 1 if 'intensity.sum.variance' in reflections: reflections[ 'intensity.sum.variance.unmodified'] = reflections[ 'intensity.sum.variance'] * 1 for experiment_id, experiment in enumerate(experiments): if experiment.identifier is None or len( experiment.identifier) == 0: experiment.identifier = create_experiment_identifier( experiment, experiments_filename, experiment_id) all_experiments.append(experiment) # select reflections of the current experiment # FIXME the selection was broke for me, it raised # RuntimeError: boost::bad_get: failed value get using boost::get #refls = reflections.select(reflections['id'] == experiment_id) # NOTE: this is a hack due to the broken expereimnt_id selection above exp_id_pos = np.where( reflections['id'] == experiment_id)[0] assert exp_id_pos.size, "no refls in this experiment" # NOTE: maybe we can relax this assertion ? refls = reflections[exp_id_pos[0]:exp_id_pos[-1] + 1] #FIXME: how will this work if reading in multiple composite mode experiment jsons? # Reflection experiment 'id' is supposed to be unique within this rank; 'exp_id' (i.e. experiment identifier) is supposed to be unique globally refls['exp_id'] = flex.std_string(len(refls), experiment.identifier) new_id = 0 if len(all_reflections) > 0: new_id = max(all_reflections['id']) + 1 # FIXME: it is hard to interperet that a function call returning a changeable property eid = refls.experiment_identifiers() for k in eid.keys(): del eid[k] eid[new_id] = experiment.identifier refls['id'] = flex.int(len(refls), new_id) all_reflections.extend(refls) else: self.logger.log("Received a list of 0 json/pickle file pairs") self.logger.log_step_time("LOAD", True) self.logger.log('Read %d experiments consisting of %d reflections' % (len(all_experiments) - starting_expts_count, len(all_reflections) - starting_refls_count)) self.logger.log("Memory usage: %d MB" % get_memory_usage()) from xfel.merging.application.reflection_table_utils import reflection_table_utils all_reflections = reflection_table_utils.prune_reflection_table_keys( reflections=all_reflections, keys_to_keep=[ 'intensity.sum.value', 'intensity.sum.variance', 'miller_index', 'miller_index_asymmetric', 'exp_id', 's1', 'intensity.sum.value.unmodified', 'intensity.sum.variance.unmodified' ]) self.logger.log("Pruned reflection table") self.logger.log("Memory usage: %d MB" % get_memory_usage()) # Do we have any data? from xfel.merging.application.utils.data_counter import data_counter data_counter(self.params).count(all_experiments, all_reflections) return all_experiments, all_reflections
def run(self, experiments, reflections): self.logger.log_step_time("REINDEX") # Get list of twinning operators for this space group operators = twin_laws(self.params.scaling.i_model).operators if not operators: self.logger.log("No indexing ambiguity. Skipping this step.") return experiments, reflections self.logger.log("Resolving indexing ambiguity using operators h,k,l, %s"%", ".join( \ [op.operator.r().as_hkl() for op in operators])) operators = [ sgtbx.change_of_basis_op(op.operator.r().as_hkl()) for op in operators ] result = flex.reflection_table() scaler = experiment_scaler(self.params, self.mpi_helper, self.logger) model_intensities = self.params.scaling.i_model target_symm = symmetry( unit_cell=self.params.scaling.unit_cell, space_group_info=self.params.scaling.space_group) def get_correlation(cb_op=None): """ Helper function to get CC to the reference given an operator """ # Build a miller array for the experiment reflections exp_miller_indices = miller.set( target_symm, exp_reflections['miller_index_asymmetric'], True) exp_intensities = miller.array( exp_miller_indices, exp_reflections['intensity.sum.value'], flex.sqrt(exp_reflections['intensity.sum.variance'])) if cb_op: exp_intensities = exp_intensities.change_basis( cb_op).map_to_asu() # Extract an array of HKLs from the model to match the experiment HKLs matching_indices = miller.match_multi_indices( miller_indices_unique=model_intensities.indices(), miller_indices=exp_intensities.indices()) # Least squares scaling_result = scaler.fit_experiment_to_reference( model_intensities, exp_intensities, matching_indices) return scaling_result.correlation if scaling_result.correlation is not None else -1 # Test each experiment to see if an operator gives a better CC to the reference, and if it does, apply it for expt_id, experiment in enumerate(experiments): exp_reflections = reflections.select( reflections['exp_id'] == experiment.identifier) all_correlations = [] best_correlation = get_correlation() all_correlations.append(best_correlation) best_op = None for cb_op in operators: test_correlation = get_correlation(cb_op) all_correlations.append(test_correlation) if test_correlation > best_correlation: best_correlation = test_correlation best_op = cb_op if best_op: exp_miller_indices = miller.set( target_symm, exp_reflections['miller_index'], True).change_basis(best_op) exp_reflections[ 'miller_index_asymmetric'] = exp_miller_indices.map_to_asu( ).indices() exp_reflections['miller_index'] = exp_miller_indices.indices() experiment.crystal = MosaicCrystalSauter2014( experiment.crystal.change_basis(best_op) ) # need to use wrapper because of cctbx/dxtbx#5 result.extend(exp_reflections) self.logger.log( "Expt %d, reindexing op correlations: %s" % (expt_id, ", ".join(["%6.3f" % c for c in all_correlations]))) self.logger.log_step_time("REINDEX", True) self.logger.log("Memory usage: %d MB" % get_memory_usage()) return experiments, result