def test_read_fxe_raw_run_selective(mock_fxe_raw_run): run = RunDirectory(mock_fxe_raw_run, include='*DA*') assert run.train_ids == list(range(10000, 10480)) assert 'SA1_XTD2_XGM/DOOCS/MAIN' in run.control_sources assert 'FXE_DET_LPD1M-1/DET/0CH0:xtdf' not in run.detector_sources run = RunDirectory(mock_fxe_raw_run, include='*LPD*') assert run.train_ids == list(range(10000, 10480)) assert 'SA1_XTD2_XGM/DOOCS/MAIN' not in run.control_sources assert 'FXE_DET_LPD1M-1/DET/0CH0:xtdf' in run.detector_sources
def test_stack_detector_data_missing(mock_fxe_raw_run): test_run = RunDirectory(mock_fxe_raw_run) tid, data = test_run.train_from_id(10000, devices=[('*/DET/*', 'image.data')]) # Three variants of missing data: # 1. Source missing del data['FXE_DET_LPD1M-1/DET/3CH0:xtdf'] # 2. Key missing del data['FXE_DET_LPD1M-1/DET/7CH0:xtdf']['image.data'] # 3. Empty array missing = [ 'FXE_DET_LPD1M-1/DET/{}CH0:xtdf'.format(m) for m in (1, 5, 9, 15) ] for module in missing: data[module]['image.data'] = np.zeros((0, 1, 256, 256), dtype=np.uint16) comb = stack_detector_data(data, 'image.data', fillvalue=22) assert comb.shape == (128, 1, 16, 256, 256) assert not (comb[:, :, 0] == 22).any() # Control assert (comb[:, :, 3] == 22).all() # Source missing assert (comb[:, :, 7] == 22).all() # Key missing assert (comb[:, :, 5] == 22).all() # Empty array
def test_write_virtual(mock_fxe_raw_run): with TemporaryDirectory() as td: new_file = osp.join(td, 'test.h5') with RunDirectory(mock_fxe_raw_run) as run: run.write_virtual(new_file) assert_isfile(new_file) with h5py.File(new_file) as f: ds = f['CONTROL/SPB_XTD9_XGM/DOOCS/MAIN/beamPosition/ixPos/value'] assert ds.is_virtual with H5File(new_file) as f: np.testing.assert_array_equal( f.train_ids, np.arange(10000, 10480, dtype=np.uint64)) assert 'SPB_XTD9_XGM/DOOCS/MAIN' in f.control_sources assert 'SPB_XTD9_XGM/DOOCS/MAIN:output' in f.instrument_sources s = f.get_series('SPB_XTD9_XGM/DOOCS/MAIN', 'beamPosition.ixPos.value') # This should have concatenated the two sequence files (400 + 80) assert len(s) == 480 a = f.get_array('SPB_XTD9_XGM/DOOCS/MAIN:output', 'data.intensityTD') assert a.shape == (480, 1000)
def test_select_trains(mock_fxe_raw_run): run = RunDirectory(mock_fxe_raw_run) assert len(run.train_ids) == 480 sel = run.select_trains(by_id[10200:10220]) assert sel.train_ids == list(range(10200, 10220)) sel = run.select_trains(by_index[:10]) assert sel.train_ids == list(range(10000, 10010)) with pytest.raises(ValueError): run.select_trains(by_id[9000:9100]) # Before data with pytest.raises(ValueError): run.select_trains(by_id[12000:12500]) # After data # Select a list of train IDs sel = run.select_trains(by_id[[9950, 10000, 10101, 10500]]) assert sel.train_ids == [10000, 10101] with pytest.raises(ValueError): run.select_trains(by_id[[9900, 10600]]) # Select a list of indexes sel = run.select_trains(by_index[[5, 25]]) assert sel.train_ids == [10005, 10025] with pytest.raises(IndexError): run.select_trains(by_index[[480]])
def __init__(self, run_folder): """Initialization. :param str run_folder: full path of the run folder. """ self._run = RunDirectory(run_folder) self._sources = { 'MONO': 'SA3_XTD10_MONO/MDL/PHOTON_ENERGY', 'XGM': 'SCS_BLU_XGM/XGM/DOOCS', 'XGM_OUTPUT': 'SCS_BLU_XGM/XGM/DOOCS:output', 'SA3_XGM': 'SA3_XTD10_XGM/XGM/DOOCS', 'SA3_XGM_OUTPUT': 'SA3_XTD10_XGM/XGM/DOOCS:output' } # get the DataFrame for XGM control data self._xgm_df = self._run.get_dataframe(fields=[(self._sources['XGM'], '*value')]) self._xgm_df.rename(columns=lambda x: x.split('/')[-1], inplace=True) self._sa3_xgm_df = self._run.get_dataframe( fields=[(self._sources['SA3_XGM'], '*value')]) self._sa3_xgm_df.rename(columns=lambda x: x.split('/')[-1], inplace=True) # get the DataFrame for SoftMono control data self._mono_df = self._run.get_dataframe(fields=[(self._sources['MONO'], '*value')]) self._mono_df.rename(columns=lambda x: x.split('/')[-1], inplace=True) self._photon_energies = None # photon energies for each pulse self._I0 = None self._I1 = OrderedDict() self._data = None # pulse-resolved data in DataFrame
def test_run_info(capsys): test_run = RunDirectory(RUNPATH) test_run.info() captured = capsys.readouterr() print(captured) assert captured[0] == ('Run information\n' '\tDuration: 0:08:57.200000\n' '\tFirst train ID: 1472806005\n' '\tLast train ID: 1472811377\n' '\t# of trains: 542\n' '\n' 'Devices\n' '\tInstruments\n' '\t- SPB_DET_AGIPD1M-1/DET/0CH0:xtdf\n' '\t- SPB_DET_AGIPD1M-1/DET/10CH0:xtdf\n' '\t- SPB_DET_AGIPD1M-1/DET/1CH0:xtdf\n' '\t- SPB_DET_AGIPD1M-1/DET/2CH0:xtdf\n' '\t- SPB_DET_AGIPD1M-1/DET/3CH0:xtdf\n' '\t- SPB_DET_AGIPD1M-1/DET/4CH0:xtdf\n' '\t- SPB_DET_AGIPD1M-1/DET/5CH0:xtdf\n' '\t- SPB_DET_AGIPD1M-1/DET/6CH0:xtdf\n' '\t- SPB_DET_AGIPD1M-1/DET/7CH0:xtdf\n' '\t- SPB_DET_AGIPD1M-1/DET/8CH0:xtdf\n' '\t- SPB_DET_AGIPD1M-1/DET/9CH0:xtdf\n' '\tControls\n' '\t-\n')
def test_iterate_select_trains(mock_fxe_run): run = RunDirectory(mock_fxe_run) tids = [tid for (tid, _) in run.trains(train_range=by_id[10004:10006])] assert tids == [10004, 10005] tids = [tid for (tid, _) in run.trains(train_range=by_id[:10003])] assert tids == [10000, 10001, 10002] # Overlap with start of run tids = [tid for (tid, _) in run.trains(train_range=by_id[9000:10003])] assert tids == [10000, 10001, 10002] # Overlap with end of run tids = [tid for (tid, _) in run.trains(train_range=by_id[10478:10500])] assert tids == [10478, 10479] # Not overlapping with pytest.raises(ValueError) as excinfo: list(run.trains(train_range=by_id[9000:9050])) assert 'before' in str(excinfo.value) with pytest.raises(ValueError) as excinfo: list(run.trains(train_range=by_id[10500:10550])) assert 'after' in str(excinfo.value) tids = [tid for (tid, _) in run.trains(train_range=by_index[4:6])] assert tids == [10004, 10005]
def test_run_get_series_select_trains(mock_fxe_raw_run): run = RunDirectory(mock_fxe_raw_run) sel = run.select_trains(by_id[10100:10150]) s = sel.get_series('SA1_XTD2_XGM/DOOCS/MAIN', "beamPosition.iyPos.value") assert isinstance(s, pd.Series) assert len(s) == 50 assert list(s.index) == list(range(10100, 10150))
def test_train_from_index_fxe_run(mock_fxe_run): run = RunDirectory(mock_fxe_run) _, data = run.train_from_index(479) assert 'FXE_DET_LPD1M-1/DET/15CH0:xtdf' in data assert 'image.data' in data['FXE_DET_LPD1M-1/DET/15CH0:xtdf'] assert 'FXE_XAD_GEC/CAM/CAMERA' in data assert 'firmwareVersion.value' in data['FXE_XAD_GEC/CAM/CAMERA']
def test_stack_detector_data(mock_fxe_run): test_run = RunDirectory(mock_fxe_run) tid, data = test_run.train_from_id(10000, devices=[('*/DET/*', 'image.data')]) comb = stack_detector_data(data, 'image.data') assert comb.shape == (128, 1, 16, 256, 256)
def test_permission(): d = mkdtemp() os.chmod(d, not stat.S_IRUSR) with pytest.raises(PermissionError) as excinfo: run = RunDirectory(d) assert "Permission denied" in str(excinfo.value) assert d in str(excinfo.value)
def test_run_immutable_sources(mock_fxe_raw_run): test_run = RunDirectory(mock_fxe_raw_run) before = len(test_run.all_sources) with pytest.raises(AttributeError): test_run.all_sources.pop() assert len(test_run.all_sources) == before
def test_iterate_spb_raw_run(mock_spb_raw_run): run = RunDirectory(mock_spb_raw_run) trains_iter = run.trains() tid, data = next(trains_iter) assert tid == 10000 device = 'SPB_IRU_CAM/CAM/SIDEMIC:daqOutput' assert device in data assert data[device]['data.image.pixels'].shape == (1024, 768)
def test_stack_detector_data(): test_run = RunDirectory(RUNPATH) tid, data = test_run.train_from_id(1472810853) comb = stack_detector_data(data, 'image.data', only='AGIPD1M-1') print(comb.shape) assert comb.shape == (60, 16, 512, 128) np.testing.assert_equal(comb[:, 0, ...], data['SPB_DET_AGIPD1M-1/DET/0CH0:xtdf']['image.data'])
def test_run_all_sources(): test_run = RunDirectory(RUNPATH) before = len(test_run.all_sources) with pytest.raises(AttributeError): test_run.all_sources.pop() assert len(test_run.all_sources) == before
def test_run_get_array_error(mock_fxe_run): run = RunDirectory(mock_fxe_run) with pytest.raises(SourceNameError): run.get_array('bad_name', 'data.intensityTD') with pytest.raises(PropertyNameError): run.get_array('SA1_XTD2_XGM/DOOCS/MAIN:output', 'bad_name')
def test_run_get_array_empty(mock_fxe_run): run = RunDirectory(mock_fxe_run) arr = run.get_array('FXE_XAD_GEC/CAM/CAMERA_NODATA:daqOutput', 'data.image.pixels') assert isinstance(arr, DataArray) assert arr.dims[0] == 'trainId' assert arr.shape == (0, 255, 1024)
def main(argv=None): ap = argparse.ArgumentParser('karabo-data-make-virtual-cxi') ap.add_argument('run_dir', help="Path to an EuXFEL run directory") # Specifying a proposal directory & a run number is the older interface. # If the run_number argument is passed, run_dir is used as proposal. ap.add_argument('run_number', nargs="?", help=argparse.SUPPRESS) ap.add_argument( '-o', '--output', help="Filename or path for the CXI output file. " "By default, it is written in the proposal's scratch directory.") ap.add_argument( '--min-modules', type=int, default=9, metavar='N', help="Include trains where at least N modules have data (default 9)") args = ap.parse_args(argv) out_file = args.output logging.basicConfig(level=logging.INFO) if args.run_number: # proposal directory, run number run = 'r%04d' % int(args.run_number) proposal = args.run_dir run_dir = osp.join(args.run_dir, 'proc', run) if out_file is None: out_file = osp.join(proposal, 'scratch', '{}_detectors_virt.cxi'.format(run)) else: # run directory run_dir = os.path.abspath(args.run_dir) if out_file is None: m = re.search(r'/(raw|proc)/(r\d{4})/?$', run_dir) if not m: sys.exit("ERROR: '-o outfile' option needed when " "input directory doesn't look like .../proc/r0123") proposal = run_dir[:m.start()] fname = '{}_{}_detectors_virt.cxi'.format(*m.group(2, 1)) out_file = osp.join(proposal, 'scratch', fname) out_dir = osp.dirname(osp.abspath(out_file)) if not os.access(run_dir, os.R_OK): sys.exit("ERROR: Don't have read access to {}".format(run_dir)) if not os.access(out_dir, os.W_OK): sys.exit("ERROR: Don't have write access to {}".format(out_dir)) log.info("Reading run directory %s", run_dir) run = RunDirectory(run_dir) det = _get_detector(run, args.min_modules) if det is None: sys.exit("No AGIPD or LPD sources found in {!r}".format(run_dir)) det.write_virtual_cxi(out_file)
def test_iterate_fxe_run(mock_fxe_raw_run): run = RunDirectory(mock_fxe_raw_run) trains_iter = run.trains() tid, data = next(trains_iter) assert tid == 10000 assert 'FXE_DET_LPD1M-1/DET/15CH0:xtdf' in data assert 'image.data' in data['FXE_DET_LPD1M-1/DET/15CH0:xtdf'] assert 'FXE_XAD_GEC/CAM/CAMERA' in data assert 'firmwareVersion.value' in data['FXE_XAD_GEC/CAM/CAMERA']
def test_stack_data_2(): test_run = RunDirectory(RUNPATH) tid, data = test_run.train_from_id(1472810853) skip = ['SPB_DET_AGIPD1M-1/DET/0CH0:xtdf', 'SPB_DET_AGIPD1M-1/DET/9CH0:xtdf'] comb = stack_data(data, 'image.data', axis=0, xcept=skip) print(comb.shape) assert comb.shape == (8, 60, 512, 128) np.testing.assert_equal(comb[0, ...], data['SPB_DET_AGIPD1M-1/DET/1CH0:xtdf']['image.data'])
def test_iterate_run_glob_devices(mock_fxe_run): run = RunDirectory(mock_fxe_run) trains_iter = run.trains([("*/DET/*", "image.data")]) tid, data = next(trains_iter) assert tid == 10000 assert 'FXE_DET_LPD1M-1/DET/15CH0:xtdf' in data assert 'image.data' in data['FXE_DET_LPD1M-1/DET/15CH0:xtdf'] assert 'detector.data' not in data['FXE_DET_LPD1M-1/DET/15CH0:xtdf'] assert 'FXE_XAD_GEC/CAM/CAMERA' not in data
def test_run_get_array(mock_fxe_run): run = RunDirectory(mock_fxe_run) arr = run.get_array('SA1_XTD2_XGM/DOOCS/MAIN:output', 'data.intensityTD', extra_dims=['pulse']) assert isinstance(arr, DataArray) assert arr.dims == ('trainId', 'pulse') assert arr.shape == (480, 1000) assert arr.coords['trainId'][0] == 10000
def test_run_get_array_multiple_per_train(mock_fxe_raw_run): run = RunDirectory(mock_fxe_raw_run) sel = run.select_trains(by_index[:2]) arr = sel.get_array('FXE_DET_LPD1M-1/DET/6CH0:xtdf', 'image.data', roi=by_index[:, 10:20, 20:40]) assert isinstance(arr, DataArray) assert arr.shape == (256, 1, 10, 20) np.testing.assert_array_equal(arr.coords['trainId'], np.repeat([10000, 10001], 128))
def serve_files(path, port, fast_devices=None, require_all=False, repeat_stream=True, **kwargs): """Stream data from files through a TCP socket. Parameters ---------- path: str Path to the HDF5 file or file folder. port: int Local TCP port to bind socket to. slow_devices: list of tuples [('src', 'prop')] fast_devices: list of tuples [('src', 'prop')] require_all: bool If set to True, will stream only trainIDs that has data corresponding to keys specified in fast_devices. Default: False repeat_stream: bool If set to True, will continue streaming when trains() iterator is empty. Trainids will be monotonically increasing. Default: False """ try: corr_data = RunDirectory(path) num_trains = len(corr_data.train_ids) except Exception as ex: print(repr(ex)) return streamer = ZMQStreamer(port, **kwargs) streamer.start() counter = 0 repeat_stream = False while True: for tid, train_data in corr_data.trains(devices=fast_devices, require_all=require_all): # loop over corrected DataCollection if train_data: # Generate fake meta data with monotically increasing # trainids only after the actual trains in corrected data meta = generate_meta(train_data.keys(), tid + counter) if counter > 0 else None streamer.feed(train_data, metadata=meta) if not repeat_stream: break # increase the counter by total number of trains in a run counter += num_trains streamer.stop()
def test_run_get_dataframe(mock_fxe_run): run = RunDirectory(mock_fxe_run) df = run.get_dataframe(fields=[("*_XGM/*", "*.i[xy]Pos*")]) assert len(df.columns) == 4 assert "SA1_XTD2_XGM/DOOCS/MAIN/beamPosition.ixPos" in df.columns df2 = run.get_dataframe(fields=[("*_XGM/*", "*.i[xy]Pos*")], timestamps=True) assert len(df2.columns) == 8 assert "SA1_XTD2_XGM/DOOCS/MAIN/beamPosition.ixPos" in df2.columns assert "SA1_XTD2_XGM/DOOCS/MAIN/beamPosition.ixPos.timestamp" in df2.columns
def test_run_get_array_select_trains(mock_fxe_raw_run): run = RunDirectory(mock_fxe_raw_run) sel = run.select_trains(by_id[10100:10150]) arr = sel.get_array('SA1_XTD2_XGM/DOOCS/MAIN:output', 'data.intensityTD', extra_dims=['pulse']) assert isinstance(arr, DataArray) assert arr.dims == ('trainId', 'pulse') assert arr.shape == (50, 1000) assert arr.coords['trainId'][0] == 10100
def test_run_get_virtual_dataset_filename(mock_fxe_raw_run, tmpdir): run = RunDirectory(mock_fxe_raw_run) path = str(tmpdir / 'test-vds.h5') ds = run.get_virtual_dataset('FXE_DET_LPD1M-1/DET/6CH0:xtdf', 'image.data', filename=path) assert_isfile(path) assert ds.file.filename == path assert isinstance(ds, h5py.Dataset) assert ds.is_virtual assert ds.shape == (61440, 1, 256, 256)
def test_stack_detector_data_type_error(mock_fxe_raw_run): test_run = RunDirectory(mock_fxe_raw_run) tid, data = test_run.train_from_id(10000, devices=[('*/DET/*', 'image.data')]) module = 'FXE_DET_LPD1M-1/DET/3CH0:xtdf' data[module]['image.data'] = data[module]['image.data'].astype(np.float32) with pytest.raises(ValueError) as excinfo: comb = stack_detector_data(data, 'image.data') assert "dtype('float32')" in str(excinfo.value)
def test_run_get_dask_array(mock_fxe_raw_run): import dask.array as da run = RunDirectory(mock_fxe_raw_run) arr = run.get_dask_array( 'SA1_XTD2_XGM/DOOCS/MAIN:output', 'data.intensityTD', ) assert isinstance(arr, da.Array) assert arr.shape == (480, 1000) assert arr.dtype == np.float32
def load_data(run, proposal, exp, VERBOSE=False): ddir = "/gpfs/exfel/exp/SPB/{}/{}/raw/{}/".format(exp, proposal, run) print("Loading data from: {}".format(ddir)) data = RunDirectory(ddir) if VERBOSE: data.info() return data