Esempio n. 1
0
def broadcast(fn, opt: PreProcOpts):
    """Pre-processes in one go a dataset comprising movie frames, by transferring the found beam center
    positions and diffraction spots (in CXI format) from an aggregated set processed earlier.
    
    Arguments:
        fn {function} -- [description]
        opt {PreProcOpts} -- [description]
    
    Raises:
        err: [description]
    
    Returns:
        [type] -- [description]
    """

    if isinstance(fn, list) and len(fn) == 1:
        fn = fn[0]

    def log(*args):
        if not (opt.verbose or any([isinstance(err, Exception)
                                    for e in args])):
            return
        if isinstance(fn, list):
            dispfn = os.path.basename(fn[0]) + ' etc.'
        else:
            dispfn = os.path.basename(fn)
        idstring = '[{} - {} - broadcast] '.format(
            datetime.datetime.now().time(), dispfn)
        print(idstring, *args)

    t0 = time()
    dsagg = Dataset.from_list(fn, load_tables=False)
    dsraw = Dataset.from_list(list(dsagg.shots.file_raw.unique()))
    dsraw.shots['file_raw'] = dsraw.shots[
        'file']  # required for association later

    reference = imread(opt.reference)
    pxmask = imread(opt.pxmask)

    # And now: the interesting part...
    dsagg.shots['from_id'] = range(
        dsagg.shots.shape[0])  # label original (aggregated shots)

    in_agg = dsraw.shots[opt.idfields].merge(
        dsagg.shots[opt.idfields + ['selected']], on=opt.idfields,
        how='left')['selected'].fillna(False)
    dsraw.shots['selected'] = in_agg

    try:

        dsraw.open_stacks(readonly=True)
        dssel = dsraw.get_selection(f'({opt.select_query}) and selected',
                                    file_suffix=opt.single_suffix,
                                    new_folder=opt.proc_dir)
        shots = dssel.shots.merge(dsagg.shots[opt.idfields + ['from_id']],
                                  on=opt.idfields,
                                  validate='m:1')  # _inner_ merge

        log(f'{dsraw.shots.shape[0]} raw, {dssel.shots.shape[0]} selected, {dsagg.shots.shape[0]} aggregated.'
            )

        # get the broadcasted image centers
        dsagg.open_stacks(readonly=True)
        ctr = dsagg.stacks[opt.center_stack][shots.from_id.values, :]

        # Flat-field and dead-pixel correction
        stack_rechunked = dssel.raw_counts.rechunk(
            {0: ctr.chunks[0]})  # select and re-chunk the raw data
        if opt.correct_saturation:
            stack_ff = proc2d.apply_flatfield(
                proc2d.apply_saturation_correction(stack_rechunked,
                                                   opt.shutter_time,
                                                   opt.dead_time), reference)
        else:
            stack_ff = proc2d.apply_flatfield(stack_rechunked, reference)
        stack = proc2d.correct_dead_pixels(stack_ff,
                                           pxmask,
                                           strategy='replace',
                                           replace_val=-1,
                                           mask_gaps=True)
        centered = proc2d.center_image(stack,
                                       ctr[:, 0],
                                       ctr[:, 1],
                                       opt.xsize,
                                       opt.ysize,
                                       -1,
                                       parallel=True)

        # add the new stacks to the aggregated dataset
        alldata = {
            'center_of_mass':
            dsagg.stacks['center_of_mass'][shots.from_id.values, ...],
            'lorentz_fit':
            dsagg.stacks['lorentz_fit'][shots.from_id.values, ...],
            'beam_center':
            ctr,
            'centered':
            centered.astype(np.float32)
            if opt.float else centered.astype(np.int16),
            'pxmask_centered': (centered != -1).astype(np.uint16),
            'adf1':
            proc2d.apply_virtual_detector(centered, opt.r_adf1[0],
                                          opt.r_adf1[1]),
            'adf2':
            proc2d.apply_virtual_detector(centered, opt.r_adf2[0],
                                          opt.r_adf2[1])
        }

        if opt.broadcast_peaks:
            alldata.update({
                'nPeaks':
                dsagg.stacks['nPeaks'][shots.from_id.values, ...],
                'peakTotalIntensity':
                dsagg.stacks['peakTotalIntensity'][shots.from_id.values, ...],
                'peakXPosRaw':
                dsagg.stacks['peakXPosRaw'][shots.from_id.values, ...],
                'peakYPosRaw':
                dsagg.stacks['peakYPosRaw'][shots.from_id.values, ...],
            })

        for lbl, stk in alldata.items():
            dssel.add_stack(lbl, stk, overwrite=True)

        dssel.init_files(overwrite=True)
        dssel.store_tables(shots=True, features=True)
        dssel.open_stacks(readonly=False)
        dssel.delete_stack(
            'raw_counts',
            from_files=False)  # we don't need the raw counts in the new files
        dssel.store_stacks(
            overwrite=True,
            progress_bar=False)  # this does the actual calculation
        log('Finished with', dssel.centered.shape[0], 'shots after',
            time() - t0, 'seconds')

    except Exception as err:
        log('Broadcast processing failed:', err)
        raise err

    finally:
        dsagg.close_stacks()
        dsraw.close_stacks()
        dssel.close_stacks()

    return dssel.files
Esempio n. 2
0
    def read_files(self):

        file_type = args.filename.rsplit('.', 1)[-1]

        if file_type == 'stream':
            print(f'Parsing stream file {args.filename}...')
            stream = StreamParser(args.filename)
            # with open('tmp.geom', 'w') as fh:
            #     fh.write('\n'.join(stream._geometry_string))
            # self.geom = load_crystfel_geometry('tmp.geom')
            # os.remove('tmp.geom')
            # if len(self.geom['panels']) == 1:
            #     print('Single-panel geometry, so ignoring transforms for now.')
            #     #TODO make this more elegant, e.g. by overwriting image transform func with identity
            #     self.geom = None
            self.geom = None
            
            try:
                self.data_path = stream.geometry['data']
            except KeyError:
                if args.geometry is None:
                    raise ValueError('No data location specified in geometry file. Please use -d parameter.')

            files = sorted(list(stream.shots['file'].unique()))
            # print('Loading data files found in stream... \n', '\n'.join(files))
            try:
                self.dataset = Dataset.from_files(files, load_tables=False, init_stacks=False, open_stacks=False)
                self.dataset.load_tables(features=True)
                # print(self.dataset.shots.columns)
                self.dataset.merge_stream(stream)
                # get_selection would not be the right method to call (changes IDs), instead do...
                self.dataset._shots = self.dataset._shots.loc[self.dataset._shots.selected,:].reset_index(drop=True)
                # TODO get subset for incomplete coverage
                print('Merged stream and hdf5 shot lists')
            except Exception as err:
                self.dataset = Dataset()
                self.dataset._shots = stream.shots
                self.dataset._peaks = stream.peaks
                self.dataset._predict = stream.indexed
                self.dataset._shots['selected'] = True
                print('Could not load shot lists from H5 files, but have that from the stream file.')
                print(f'Reason: {err}')

        if args.geometry is not None:
            raise ValueError('Geometry files are currently not supported.')
            # self.geom = load_crystfel_geometry(args.geometry)

        if file_type in ['lst', 'h5', 'hdf', 'nxs']:
            self.dataset = Dataset.from_list(args.filename, load_tables=True, init_stacks=False, open_stacks=False)
            if not self.dataset.shots.selected.all():
                # dirty removal of unwanted shots is sufficient in this case:
                self.dataset._shots = self.dataset._shots.loc[self.dataset._shots.selected,:].reset_index(drop=True)

        if args.data_path is not None:
            self.data_path = args.data_path

        if self.data_path is None:
            # data path neither set via stream file, nor explicitly. We have to guess.
            try:
                with h5py.File(self.dataset.shots.file.iloc[0], 'r') as fh:
                    base = '/%/data'.replace('%', self.dataset.shots.subset.iloc[0])
                    self.data_path = '/%/data/' + fh[base].attrs['signal']
                print('Found data path', self.data_path)
            except Exception as err:
                warn(str(err), RuntimeWarning)
                print('Could not find out data path. Assuming /%/data/raw_counts')
                self.data_path = '/%/data/raw_counts'

        if self.args.query:
            print('Only showing shots with', self.args.query)
            #self.dataset.select(self.args.query)
            #self.dataset = self.dataset.get_selection(self.args.query, file_suffix=None, reset_id=False)
            #print('cutting shot list only')
            self.dataset._shots = self.dataset._shots.query(args.query)

        if self.args.sort_crystals:
            print('Re-sorting shots by region/crystal/run.')
            self.dataset._shots = self.dataset._shots.sort_values(by=['sample', 'region', 'crystal_id', 'run'])

        if not self.args.internal:
            #adxv_args = {'wavelength': 0.0251, 'distance': 2280, 'pixelsize': 0.055}
            adxv_args = {}
            self.adxv = Adxv(hdf5_path=self.data_path.replace('%', 'entry'),
                             adxv_bin=self.args.adxv_bin, **adxv_args)

        self.b_goto.setMaximum(self.dataset.shots.shape[0]-1)
        self.b_goto.setMinimum(0)
Esempio n. 3
0
def refine_center(fn, opt: PreProcOpts):
    """Refines the centering of diffraction patterns based on Friedel mate positions.
    
    Arguments:
        fn {str} -- [file/list name of input files, can contain wildcards]
        opt {PreProcOpts} -- [pre-processing options]
    
    Raises:
        err: [description]
    
    Returns:
        [list] -- [output files]
    """

    if isinstance(fn, list) and len(fn) == 1:
        fn = fn[0]

    def log(*args):
        if not (opt.verbose or any([isinstance(err, Exception)
                                    for e in args])):
            return
        if isinstance(fn, list):
            dispfn = os.path.basename(fn[0]) + ' etc.'
        else:
            dispfn = os.path.basename(fn)
        idstring = '[{} - {} - refine_center] '.format(
            datetime.datetime.now().time(), dispfn)
        print(idstring, *args)

    ds = Dataset.from_list(fn)
    stream = find_peaks(ds,
                        opt=opt,
                        merge_peaks=False,
                        return_cxi=False,
                        geo_params={'clen': opt.cam_length},
                        exc=opt.im_exc)

    p0 = [opt.xsize // 2, opt.ysize // 2]

    # get Friedel-refined center from stream file
    ctr = proc_peaks.center_friedel(stream.peaks,
                                    ds.shots,
                                    p0=p0,
                                    sigma=opt.peak_sigma,
                                    minpeaks=opt.min_peaks,
                                    maxres=opt.friedel_max_radius)

    maxcorr = ctr['friedel_cost'].values
    changed = np.logical_not(np.isnan(maxcorr))

    with ds.Stacks() as stk:
        beam_center_old = stk['beam_center'].compute()  # previous beam center

    beam_center_new = beam_center_old.copy()
    beam_center_new[changed, :] = np.ceil(beam_center_old[
        changed, :]) + ctr.loc[changed, ['beam_x', 'beam_y']].values - p0
    if (np.abs(np.mean(beam_center_new - beam_center_old, axis=0) > .5)).any():
        log('WARNING: average shift is larger than 0.5!')

    # visualization
    log(
        '{:g}% of shots refined. \n'.format(
            (1 - np.isnan(maxcorr).sum() / len(maxcorr)) * 100),
        'Shift standard deviation: {} \n'.format(
            np.std(beam_center_new - beam_center_old,
                   axis=0)), 'Average shift: {} \n'.format(
                       np.mean(beam_center_new - beam_center_old, axis=0)))

    # make new files and add the shifted images
    try:
        ds.open_stacks(readonly=False)
        centered2 = proc2d.center_image(ds.centered, ctr['beam_x'].values,
                                        ctr['beam_y'].values, 1556, 616, -1)
        ds.add_stack('centered', centered2, overwrite=True)
        ds.add_stack('pxmask_centered', (centered2 != -1).astype(np.uint16),
                     overwrite=True)
        ds.add_stack('beam_center', beam_center_new, overwrite=True)
        ds.change_filenames(opt.refined_file_suffix)
        print(ds.files)
        ds.init_files(keep_features=False, overwrite=True)
        ds.store_tables(shots=True, features=True)
        ds.open_stacks(readonly=False)
        ds.store_stacks(overwrite=True, progress_bar=False)
        ds.close_stacks()
        del centered2
    except Exception as err:
        log('Error during storing center-refined images', err)
        raise err
    finally:
        ds.close_stacks()

    # run peak finder again, this time on the refined images
    pks_cxi = find_peaks(ds,
                         opt=opt,
                         merge_peaks=opt.peaks_nexus,
                         return_cxi=True,
                         geo_params={'clen': opt.cam_length},
                         exc=opt.im_exc)

    # export peaks to CXI-format arrays
    if opt.peaks_cxi:
        with ds.Stacks() as stk:
            for k, v in pks_cxi.items():
                if k in stk:
                    ds.delete_stack(k, from_files=True)
                ds.add_stack(k, v, overwrite=True)
            ds.store_stacks(list(pks_cxi.keys()),
                            progress_bar=True,
                            overwrite=True)

    return ds.files