def broadcast(fn, opt: PreProcOpts): """Pre-processes in one go a dataset comprising movie frames, by transferring the found beam center positions and diffraction spots (in CXI format) from an aggregated set processed earlier. Arguments: fn {function} -- [description] opt {PreProcOpts} -- [description] Raises: err: [description] Returns: [type] -- [description] """ if isinstance(fn, list) and len(fn) == 1: fn = fn[0] def log(*args): if not (opt.verbose or any([isinstance(err, Exception) for e in args])): return if isinstance(fn, list): dispfn = os.path.basename(fn[0]) + ' etc.' else: dispfn = os.path.basename(fn) idstring = '[{} - {} - broadcast] '.format( datetime.datetime.now().time(), dispfn) print(idstring, *args) t0 = time() dsagg = Dataset.from_list(fn, load_tables=False) dsraw = Dataset.from_list(list(dsagg.shots.file_raw.unique())) dsraw.shots['file_raw'] = dsraw.shots[ 'file'] # required for association later reference = imread(opt.reference) pxmask = imread(opt.pxmask) # And now: the interesting part... dsagg.shots['from_id'] = range( dsagg.shots.shape[0]) # label original (aggregated shots) in_agg = dsraw.shots[opt.idfields].merge( dsagg.shots[opt.idfields + ['selected']], on=opt.idfields, how='left')['selected'].fillna(False) dsraw.shots['selected'] = in_agg try: dsraw.open_stacks(readonly=True) dssel = dsraw.get_selection(f'({opt.select_query}) and selected', file_suffix=opt.single_suffix, new_folder=opt.proc_dir) shots = dssel.shots.merge(dsagg.shots[opt.idfields + ['from_id']], on=opt.idfields, validate='m:1') # _inner_ merge log(f'{dsraw.shots.shape[0]} raw, {dssel.shots.shape[0]} selected, {dsagg.shots.shape[0]} aggregated.' ) # get the broadcasted image centers dsagg.open_stacks(readonly=True) ctr = dsagg.stacks[opt.center_stack][shots.from_id.values, :] # Flat-field and dead-pixel correction stack_rechunked = dssel.raw_counts.rechunk( {0: ctr.chunks[0]}) # select and re-chunk the raw data if opt.correct_saturation: stack_ff = proc2d.apply_flatfield( proc2d.apply_saturation_correction(stack_rechunked, opt.shutter_time, opt.dead_time), reference) else: stack_ff = proc2d.apply_flatfield(stack_rechunked, reference) stack = proc2d.correct_dead_pixels(stack_ff, pxmask, strategy='replace', replace_val=-1, mask_gaps=True) centered = proc2d.center_image(stack, ctr[:, 0], ctr[:, 1], opt.xsize, opt.ysize, -1, parallel=True) # add the new stacks to the aggregated dataset alldata = { 'center_of_mass': dsagg.stacks['center_of_mass'][shots.from_id.values, ...], 'lorentz_fit': dsagg.stacks['lorentz_fit'][shots.from_id.values, ...], 'beam_center': ctr, 'centered': centered.astype(np.float32) if opt.float else centered.astype(np.int16), 'pxmask_centered': (centered != -1).astype(np.uint16), 'adf1': proc2d.apply_virtual_detector(centered, opt.r_adf1[0], opt.r_adf1[1]), 'adf2': proc2d.apply_virtual_detector(centered, opt.r_adf2[0], opt.r_adf2[1]) } if opt.broadcast_peaks: alldata.update({ 'nPeaks': dsagg.stacks['nPeaks'][shots.from_id.values, ...], 'peakTotalIntensity': dsagg.stacks['peakTotalIntensity'][shots.from_id.values, ...], 'peakXPosRaw': dsagg.stacks['peakXPosRaw'][shots.from_id.values, ...], 'peakYPosRaw': dsagg.stacks['peakYPosRaw'][shots.from_id.values, ...], }) for lbl, stk in alldata.items(): dssel.add_stack(lbl, stk, overwrite=True) dssel.init_files(overwrite=True) dssel.store_tables(shots=True, features=True) dssel.open_stacks(readonly=False) dssel.delete_stack( 'raw_counts', from_files=False) # we don't need the raw counts in the new files dssel.store_stacks( overwrite=True, progress_bar=False) # this does the actual calculation log('Finished with', dssel.centered.shape[0], 'shots after', time() - t0, 'seconds') except Exception as err: log('Broadcast processing failed:', err) raise err finally: dsagg.close_stacks() dsraw.close_stacks() dssel.close_stacks() return dssel.files
def read_files(self): file_type = args.filename.rsplit('.', 1)[-1] if file_type == 'stream': print(f'Parsing stream file {args.filename}...') stream = StreamParser(args.filename) # with open('tmp.geom', 'w') as fh: # fh.write('\n'.join(stream._geometry_string)) # self.geom = load_crystfel_geometry('tmp.geom') # os.remove('tmp.geom') # if len(self.geom['panels']) == 1: # print('Single-panel geometry, so ignoring transforms for now.') # #TODO make this more elegant, e.g. by overwriting image transform func with identity # self.geom = None self.geom = None try: self.data_path = stream.geometry['data'] except KeyError: if args.geometry is None: raise ValueError('No data location specified in geometry file. Please use -d parameter.') files = sorted(list(stream.shots['file'].unique())) # print('Loading data files found in stream... \n', '\n'.join(files)) try: self.dataset = Dataset.from_files(files, load_tables=False, init_stacks=False, open_stacks=False) self.dataset.load_tables(features=True) # print(self.dataset.shots.columns) self.dataset.merge_stream(stream) # get_selection would not be the right method to call (changes IDs), instead do... self.dataset._shots = self.dataset._shots.loc[self.dataset._shots.selected,:].reset_index(drop=True) # TODO get subset for incomplete coverage print('Merged stream and hdf5 shot lists') except Exception as err: self.dataset = Dataset() self.dataset._shots = stream.shots self.dataset._peaks = stream.peaks self.dataset._predict = stream.indexed self.dataset._shots['selected'] = True print('Could not load shot lists from H5 files, but have that from the stream file.') print(f'Reason: {err}') if args.geometry is not None: raise ValueError('Geometry files are currently not supported.') # self.geom = load_crystfel_geometry(args.geometry) if file_type in ['lst', 'h5', 'hdf', 'nxs']: self.dataset = Dataset.from_list(args.filename, load_tables=True, init_stacks=False, open_stacks=False) if not self.dataset.shots.selected.all(): # dirty removal of unwanted shots is sufficient in this case: self.dataset._shots = self.dataset._shots.loc[self.dataset._shots.selected,:].reset_index(drop=True) if args.data_path is not None: self.data_path = args.data_path if self.data_path is None: # data path neither set via stream file, nor explicitly. We have to guess. try: with h5py.File(self.dataset.shots.file.iloc[0], 'r') as fh: base = '/%/data'.replace('%', self.dataset.shots.subset.iloc[0]) self.data_path = '/%/data/' + fh[base].attrs['signal'] print('Found data path', self.data_path) except Exception as err: warn(str(err), RuntimeWarning) print('Could not find out data path. Assuming /%/data/raw_counts') self.data_path = '/%/data/raw_counts' if self.args.query: print('Only showing shots with', self.args.query) #self.dataset.select(self.args.query) #self.dataset = self.dataset.get_selection(self.args.query, file_suffix=None, reset_id=False) #print('cutting shot list only') self.dataset._shots = self.dataset._shots.query(args.query) if self.args.sort_crystals: print('Re-sorting shots by region/crystal/run.') self.dataset._shots = self.dataset._shots.sort_values(by=['sample', 'region', 'crystal_id', 'run']) if not self.args.internal: #adxv_args = {'wavelength': 0.0251, 'distance': 2280, 'pixelsize': 0.055} adxv_args = {} self.adxv = Adxv(hdf5_path=self.data_path.replace('%', 'entry'), adxv_bin=self.args.adxv_bin, **adxv_args) self.b_goto.setMaximum(self.dataset.shots.shape[0]-1) self.b_goto.setMinimum(0)
def refine_center(fn, opt: PreProcOpts): """Refines the centering of diffraction patterns based on Friedel mate positions. Arguments: fn {str} -- [file/list name of input files, can contain wildcards] opt {PreProcOpts} -- [pre-processing options] Raises: err: [description] Returns: [list] -- [output files] """ if isinstance(fn, list) and len(fn) == 1: fn = fn[0] def log(*args): if not (opt.verbose or any([isinstance(err, Exception) for e in args])): return if isinstance(fn, list): dispfn = os.path.basename(fn[0]) + ' etc.' else: dispfn = os.path.basename(fn) idstring = '[{} - {} - refine_center] '.format( datetime.datetime.now().time(), dispfn) print(idstring, *args) ds = Dataset.from_list(fn) stream = find_peaks(ds, opt=opt, merge_peaks=False, return_cxi=False, geo_params={'clen': opt.cam_length}, exc=opt.im_exc) p0 = [opt.xsize // 2, opt.ysize // 2] # get Friedel-refined center from stream file ctr = proc_peaks.center_friedel(stream.peaks, ds.shots, p0=p0, sigma=opt.peak_sigma, minpeaks=opt.min_peaks, maxres=opt.friedel_max_radius) maxcorr = ctr['friedel_cost'].values changed = np.logical_not(np.isnan(maxcorr)) with ds.Stacks() as stk: beam_center_old = stk['beam_center'].compute() # previous beam center beam_center_new = beam_center_old.copy() beam_center_new[changed, :] = np.ceil(beam_center_old[ changed, :]) + ctr.loc[changed, ['beam_x', 'beam_y']].values - p0 if (np.abs(np.mean(beam_center_new - beam_center_old, axis=0) > .5)).any(): log('WARNING: average shift is larger than 0.5!') # visualization log( '{:g}% of shots refined. \n'.format( (1 - np.isnan(maxcorr).sum() / len(maxcorr)) * 100), 'Shift standard deviation: {} \n'.format( np.std(beam_center_new - beam_center_old, axis=0)), 'Average shift: {} \n'.format( np.mean(beam_center_new - beam_center_old, axis=0))) # make new files and add the shifted images try: ds.open_stacks(readonly=False) centered2 = proc2d.center_image(ds.centered, ctr['beam_x'].values, ctr['beam_y'].values, 1556, 616, -1) ds.add_stack('centered', centered2, overwrite=True) ds.add_stack('pxmask_centered', (centered2 != -1).astype(np.uint16), overwrite=True) ds.add_stack('beam_center', beam_center_new, overwrite=True) ds.change_filenames(opt.refined_file_suffix) print(ds.files) ds.init_files(keep_features=False, overwrite=True) ds.store_tables(shots=True, features=True) ds.open_stacks(readonly=False) ds.store_stacks(overwrite=True, progress_bar=False) ds.close_stacks() del centered2 except Exception as err: log('Error during storing center-refined images', err) raise err finally: ds.close_stacks() # run peak finder again, this time on the refined images pks_cxi = find_peaks(ds, opt=opt, merge_peaks=opt.peaks_nexus, return_cxi=True, geo_params={'clen': opt.cam_length}, exc=opt.im_exc) # export peaks to CXI-format arrays if opt.peaks_cxi: with ds.Stacks() as stk: for k, v in pks_cxi.items(): if k in stk: ds.delete_stack(k, from_files=True) ds.add_stack(k, v, overwrite=True) ds.store_stacks(list(pks_cxi.keys()), progress_bar=True, overwrite=True) return ds.files