def test_storage_converter(): with tempfile.TemporaryDirectory() as temp_dir: st = strax.Context(storage=strax.DataDirectory(temp_dir), register=[Records, Peaks]) st.make(run_id=run_id, targets='peaks') with tempfile.TemporaryDirectory() as temp_dir_2: st = strax.Context(storage=[ strax.DataDirectory(temp_dir, readonly=True), strax.DataDirectory(temp_dir_2) ], register=[Records, Peaks], storage_converter=True) store_1, store_2 = st.storage # Data is now in store 1, but not store 2 key = st._key_for(run_id, 'peaks') store_1.find(key) with pytest.raises(strax.DataNotAvailable): store_2.find(key) st.make(run_id, 'peaks') # Data is now in both stores store_1.find(key) store_2.find(key)
def test_fuzzy_matching(): with tempfile.TemporaryDirectory() as temp_dir: st = strax.Context(storage=strax.DataDirectory(temp_dir), register=[Records, Peaks]) st.make(run_id=run_id, targets='peaks') # Changing option causes data not to match st.set_config(dict(some_option=1)) assert not st.is_stored(run_id, 'peaks') assert st.list_available('peaks') == [] # In fuzzy context, data does match st2 = st.new_context(fuzzy_for=('peaks', )) assert st2.is_stored(run_id, 'peaks') assert st2.list_available('peaks') == [run_id] # And we can actually load it st2.get_meta(run_id, 'peaks') st2.get_array(run_id, 'peaks') # Fuzzy for options also works st3 = st.new_context(fuzzy_for_options=('some_option', )) assert st3.is_stored(run_id, 'peaks') # No saving occurs at all while fuzzy matching with tempfile.TemporaryDirectory() as temp_dir: st = strax.Context(storage=strax.DataDirectory(temp_dir), register=[Records, Peaks], fuzzy_for=('records', )) st.make(run_id, 'peaks') assert not st.is_stored(run_id, 'peaks') assert not st.is_stored(run_id, 'records')
def test_filestore(): with tempfile.TemporaryDirectory() as temp_dir: mystrax = strax.Context(storage=strax.DataDirectory(temp_dir), register=[Records, Peaks]) assert not mystrax.is_stored(run_id, 'peaks') mystrax.scan_runs() assert mystrax.list_available('peaks') == [] mystrax.make(run_id=run_id, targets='peaks') assert mystrax.is_stored(run_id, 'peaks') mystrax.scan_runs() assert mystrax.list_available('peaks') == [run_id] assert mystrax.scan_runs()['name'].values.tolist() == [run_id] # We should have two directories data_dirs = sorted(glob.glob(osp.join(temp_dir, '*/'))) assert len(data_dirs) == 2 # The first dir contains peaks. # It should have one data chunk (rechunk is on) and a metadata file prefix = strax.dirname_to_prefix(data_dirs[0]) assert sorted(os.listdir( data_dirs[0])) == [f'{prefix}-000000', f'{prefix}-metadata.json'] # Check metadata got written correctly. metadata = mystrax.get_meta(run_id, 'peaks') assert len(metadata) assert 'writing_ended' in metadata assert 'exception' not in metadata assert len(metadata['chunks']) == 1 # Check data gets loaded from cache, not rebuilt md_filename = osp.join(data_dirs[0], f'{prefix}-metadata.json') mtime_before = osp.getmtime(md_filename) df = mystrax.get_array(run_id=run_id, targets='peaks') assert len(df) == recs_per_chunk * n_chunks assert mtime_before == osp.getmtime(md_filename) # Test the zipfile store. Zipping is still awkward... zf = osp.join(temp_dir, f'{run_id}.zip') strax.ZipDirectory.zip_dir(temp_dir, zf, delete=True) assert osp.exists(zf) mystrax = strax.Context(storage=strax.ZipDirectory(temp_dir), register=[Records, Peaks]) metadata_2 = mystrax.get_meta(run_id, 'peaks') assert metadata == metadata_2
def test_sim_1T(): """Test the 1T simulator (should always work with the publicly available files)""" with tempfile.TemporaryDirectory() as tempdir: log.debug(f'Working in {tempdir}') testing_config_1T = dict(hev_gain_model=('to_pe_constant', 0.0085), gain_model=('to_pe_constant', 0.0085)) st = strax.Context( storage=tempdir, config=dict( nchunk=1, event_rate=1, chunk_size=2, detector='XENON1T', fax_config= 'https://raw.githubusercontent.com/XENONnT/strax_auxiliary_files/0b5a11195554d106c99784d8ad84805b0f42d51d/sim_files/fax_config_1t.json', # noqa **straxen.contexts.x1t_common_config), **straxen.contexts.common_opts) st.register(wfsim.RawRecordsFromFax1T) log.debug(f'Setting testing config {testing_config_1T}') st.set_config(testing_config_1T) log.debug(f'Getting raw-records') rr = st.get_array(run_id, 'raw_records') p = st.get_array(run_id, 'peaks') _sanity_check(rr, p) log.info(f'All done')
def MC_test(): return strax.Context(storage=[ strax.DataDirectory('./strax_data'), ], # register= # register_all = [sx.nEXO_MC_reader,] )
def setUpClass(cls) -> None: # Just to make sure we are running some mongo server, see test-class docstring cls.test_run_ids = ['0', '1'] cls.all_targets = ('peaks', 'records') uri = os.environ.get('TEST_MONGO_URI') db_name = 'test_rundb' cls.collection_name = 'test_rundb_coll' client = pymongo.MongoClient(uri) cls.database = client[db_name] collection = cls.database[cls.collection_name] cls.path = os.path.join(tempfile.gettempdir(), 'strax_data') # assert cls.collection_name not in cls.database.list_collection_names() if not straxen.utilix_is_configured(): # Bit of an ugly hack but there is no way to get around this # function even though we don't need it straxen.rundb.utilix.rundb.xent_collection = lambda *args, **kwargs: collection cls.rundb_sf = straxen.RunDB(readonly=False, runid_field='number', new_data_path=cls.path, minimum_run_number=-1, rucio_path='./strax_test_data', ) cls.rundb_sf.client = client cls.rundb_sf.collection = collection cls.st = strax.Context(register=[Records, Peaks], storage=[cls.rundb_sf], use_per_run_defaults=False, config=dict(bonus_area=0), )
def get_context(use_per_run_defaults: bool): """Get simple context""" st = strax.Context(storage=[], register=(ChannelIsRunidRecords, MaxChannelPeaks), config=dict(bonus_area=1)) st.set_context_config({'use_per_run_defaults': use_per_run_defaults}) return st
def demo(): """Return strax context used in the straxen demo notebook""" straxen.download_test_data() st = strax.Context(storage=[ strax.DataDirectory('./strax_data'), strax.DataDirectory('./strax_test_data', deep_scan=True, provide_run_metadata=True, readonly=True) ], forbid_creation_of=straxen.daqreader.DAQReader.provides, config=dict(**x1t_common_config), **x1t_context_config) # Use configs that are always available st.set_config( dict( hev_gain_model=('1T_to_pe_placeholder', False), gain_model=('1T_to_pe_placeholder', False), elife_conf=('elife_constant', 1e6), electron_drift_velocity=("electron_drift_velocity_constant", 1.3325e-4), )) return st
def setUp(self): self.test_run_id = '0' self.all_targets = ('peaks', 'records') self.mongo_target = 'peaks' uri = os.environ.get('TEST_MONGO_URI') db_name = 'test_mongosf_database' self.collection_name = 'temp-test-collection-mongosf' client = pymongo.MongoClient(uri) self.database = client[db_name] self.collection.drop() assert self.collection_name not in self.database.list_collection_names( ) self.mongo_sf = strax.MongoFrontend( uri=uri, database=db_name, take_only=self.mongo_target, col_name=self.collection_name, ) self.st = strax.Context( register=[Records, Peaks], storage=[self.mongo_sf], use_per_run_defaults=True, ) self.log = logging.getLogger(self.__class__.__name__) assert not self.is_all_targets_stored
def setUp(self): self.path = os.path.join(tempfile.gettempdir(), 'strax_data') self.st = strax.Context( use_per_run_defaults=True, register=[Records], ) self.target = 'records'
def test_random_access(): """Test basic random access TODO: test random access when time info is not provided directly """ with tempfile.TemporaryDirectory() as temp_dir: # Hack to enable testing if only required chunks are loaded Peaks.rechunk_on_save = False st = strax.Context(storage=strax.DataDirectory(temp_dir), register=[Records, Peaks]) with pytest.raises(strax.DataNotAvailable): # Time range selection requires data already available st.get_df(run_id, 'peaks', time_range=(3, 5)) st.make(run_id=run_id, targets='peaks') # Second part of hack: corrupt data by removing one chunk dirname = str(st._key_for(run_id, 'peaks')) os.remove( os.path.join(temp_dir, dirname, strax.dirname_to_prefix(dirname) + '-000000')) with pytest.raises(FileNotFoundError): st.get_array(run_id, 'peaks') df = st.get_array(run_id, 'peaks', time_range=(3, 5)) assert len(df) == 2 * recs_per_chunk assert df['time'].min() == 3 assert df['time'].max() == 4
def demo(): """Return strax context used in the straxen demo notebook""" straxen.download_test_data() st = strax.Context(storage=[ strax.DataDirectory('./strax_data'), strax.DataDirectory('./strax_test_data', deep_scan=True, provide_run_metadata=True, readonly=True) ], forbid_creation_of=straxen.daqreader.DAQReader.provides, config=dict(**x1t_common_config), **x1t_context_config) # Use configs that are always available st.set_config( dict( hev_gain_model=( 'to_pe_per_run', straxen.aux_repo + '3548132b55f81a43654dba5141366041e1daaf01/strax_files/to_pe.npy' ), gain_model=( 'to_pe_per_run', straxen.aux_repo + '3548132b55f81a43654dba5141366041e1daaf01/strax_files/to_pe.npy' ), )) st.register(straxen.RecordsFromPax) return st
def demo(): """Return strax context used in the straxen demo notebook""" straxen.download_test_data() st = strax.Context(storage=[ strax.DataDirectory('./strax_data'), strax.DataDirectory('./strax_test_data', deep_scan=True, provide_run_metadata=True, readonly=True) ], forbid_creation_of=straxen.daqreader.DAQReader.provides, config=dict(**x1t_common_config), **x1t_context_config) # Use configs that are always available st.set_config( dict( hev_gain_model=('1T_to_pe_placeholder', False), gain_model=('1T_to_pe_placeholder', False), elife=1e6, electron_drift_velocity=("electron_drift_velocity_constant", 1.3325e-4), se_gain=28.2, avg_se_gain=28.2, rel_extraction_eff=1.0, s1_xyz_map= f'itp_map://resource://{pax_file("XENON1T_s1_xyz_lce_true_kr83m_SR1_pax-680_fdc-3d_v0.json")}?fmt=json', s2_xy_map= f'itp_map://resource://{pax_file("XENON1T_s2_xy_ly_SR1_v2.2.json")}?fmt=json', )) return st
def get_context(self, use_defaults): """Get simple context where we have one mock run in the only storage frontend""" assert isinstance(use_defaults, bool) st = strax.Context(storage=self.get_mock_sf(), check_available=('records', )) st.set_context_config({'use_per_run_defaults': use_defaults}) return st
def test_core(): recs_per_chunk = 10 n_chunks = 10 class Records(strax.Plugin): provides = 'records' depends_on = tuple() dtype = strax.record_dtype() def iter(self, *args, **kwargs): for t in range(n_chunks): r = np.zeros(recs_per_chunk, self.dtype) r['time'] = t r['length'] = 1 r['dt'] = 1 r['channel'] = np.arange(len(r)) yield r class Peaks(strax.Plugin): provides = 'peaks' depends_on = ('records', ) dtype = strax.peak_dtype() def compute(self, records): p = np.zeros(len(records), self.dtype) p['time'] = records['time'] return p mystrax = strax.Context(storage=[]) mystrax.register(Records) mystrax.register(Peaks) bla = mystrax.get_array(run_id='some_run', targets='peaks') assert len(bla) == recs_per_chunk * n_chunks assert bla.dtype == strax.peak_dtype()
def xenonnt_online(output_folder='./strax_data', we_are_the_daq=False, **kwargs): """XENONnT online processing and analysis""" context_options = {**straxen.contexts.common_opts, **kwargs} st = strax.Context(storage=[ straxen.RunDB(readonly=not we_are_the_daq, runid_field='number', new_data_path=output_folder), ], config=straxen.contexts.xnt_common_config, **context_options) st.register([straxen.DAQReader, straxen.LEDCalibration]) if not we_are_the_daq: st.storage += [ strax.DataDirectory('/dali/lgrandi/xenonnt/raw', readonly=True, take_only=straxen.DAQReader.provides), strax.DataDirectory('/dali/lgrandi/xenonnt/processed', readonly=True) ] if output_folder: st.storage.append(strax.DataDirectory(output_folder)) st.context_config['forbid_creation_of'] = ('raw_records', 'records') return st
def test_run_selection(): mock_rundb = [ dict(name='0', mode='funny', tags=[dict(name='bad')]), dict(name='1', mode='nice', tags=[dict(name='interesting'), dict(name='bad')]), dict(name='2', mode='nice', tags=[dict(name='interesting')]) ] with tempfile.TemporaryDirectory() as temp_dir: sf = strax.DataDirectory(path=temp_dir) # Write mock runs db for d in mock_rundb: sf.write_run_metadata(d['name'], d) st = strax.Context(storage=sf) assert len(st.scan_runs()) == len(mock_rundb) assert st.run_metadata('0') == mock_rundb[0] assert len(st.select_runs(run_mode='nice') == 1) assert len(st.select_runs(include_tags='interesting') == 2) assert len( st.select_runs(include_tags='interesting', exclude_tags='bad') == 1) assert len( st.select_runs(include_tags='interesting', run_mode='nice') == 1)
def build_datastructure_doc(): out = page_header pd.set_option('display.max_colwidth', -1) st = strax.Context(register_all=straxen.plugins.plugins) # Too lazy to write proper graph sorter plugins_by_deps = defaultdict(list) for pn, p in st._plugin_class_registry.items(): plugins = st._get_plugins((pn, ), run_id='0') plugins_by_deps[len(plugins)].append(pn) os.makedirs(this_dir + '/graphs', exist_ok=True) for n_deps in list(reversed(sorted(list(plugins_by_deps.keys())))): for data_type in plugins_by_deps[n_deps]: plugins = st._get_plugins((data_type, ), run_id='0') # Create dependency graph g = graphviz.Digraph(format='svg') for d, p in plugins.items(): g.node(d, style='filled', href='#' + d.replace('_', '-'), fillcolor=kind_colors[p.data_kind]) for dep in p.depends_on: g.edge(d, dep) fn = this_dir + '/graphs/' + data_type g.render(fn) with open(fn + '.svg', mode='r') as f: svg = add_spaces(f.readlines()[5:]) config_df = st.show_config(d).sort_values(by='option') # Shorten long default values config_df['default'] = [ x[:10] + '...' + x[-10:] if isinstance(x, str) and len(x) > 30 else x for x in config_df['default'].values ] p = plugins[data_type] out += template.format( p=p, svg=svg, data_type=data_type, columns=add_spaces( st.data_info(data_type).to_html(index=False)), docstring=p.__doc__ if p.__doc__ else '(no plugin description)', config_options=add_spaces(config_df.to_html(index=False))) with open(this_dir + '/reference/datastructure.rst', mode='w') as f: f.write(out) shutil.rmtree(this_dir + '/graphs')
def _get_context(temp_dir=tempfile.gettempdir()) -> strax.Context: """Get a context for the tests below""" context = strax.Context( storage=strax.DataDirectory(temp_dir, deep_scan=True), register=[Records, Peaks], use_per_run_defaults=True, ) return context
def setUp(self): self.test_run_id = '0' self.target = 'records' self.path = os.path.join(tempfile.gettempdir(), 'strax_data') self.st = strax.Context(use_per_run_defaults=True, register=[Records], storage=[strax.DataDirectory(self.path)]) assert not self.st.is_stored(self.test_run_id, self.target)
def demo(): """Return strax context used in the straxen demo notebook""" return strax.Context( storage=[strax.DataDirectory('./strax_data'), strax.DataDirectory('./strax_test_data')], register=straxen.plugins.pax_interface.RecordsFromPax, forbid_creation_of=('raw_records',), **common_opts)
def test_byte_strings_as_run_id(): with tempfile.TemporaryDirectory() as temp_dir: st = strax.Context(storage=strax.DataDirectory(temp_dir, deep_scan=True), register=[Records]) records_bytes = st.get_array(b'0', 'records') records = st.get_array('0', 'records') assert np.all(records_bytes == records)
def xenon1t_simulation(output_folder='./strax_data'): import wfsim st = strax.Context(storage=strax.DataDirectory(output_folder), config=dict(fax_config='fax_config_1t.json', detector='XENON1T', **x1t_common_config), **x1t_context_config) st.register(wfsim.RawRecordsFromFax1T) return st
def test_cut_plugin(input_peaks, cut_threshold): """ """ # Just one chunk will do chunks = [input_peaks] _dtype = input_peaks.dtype class ToBeCut(strax.Plugin): """Data to be cut with strax.CutPlugin""" depends_on = tuple() dtype = _dtype provides = 'to_be_cut' data_kind = 'to_be_cut' # match with depends_on below def compute(self, chunk_i): data = chunks[chunk_i] return self.chunk(data=data, start=(int(data[0]['time']) if len(data) else np.arange(len(chunks))[chunk_i]), end=(int(strax.endtime(data[-1])) if len(data) else np.arange(1, len(chunks) + 1)[chunk_i])) # Hack to make peak output stop after a few chunks def is_ready(self, chunk_i): return chunk_i < len(chunks) def source_finished(self): return True class CutSomething(strax.CutPlugin): """Minimal working example of CutPlugin""" depends_on = ('to_be_cut', ) def cut_by(self, to_be_cut): return to_be_cut[_dtype_name] > cut_threshold st = strax.Context(storage=[]) st.register(ToBeCut) st.register(CutSomething) result = st.get_array(run_id='some_run', targets=strax.camel_to_snake(CutSomething.__name__)) correct_answer = np.sum(input_peaks[_dtype_name] > cut_threshold) assert len(result) == len(input_peaks), "WTF??" assert correct_answer == np.sum(result['cut_something']), ( "Cut plugin does not give boolean arrays correctly") if len(input_peaks): assert strax.endtime(input_peaks).max() == \ strax.endtime(result).max(), "last end time got scrambled" assert np.all(input_peaks['time'] == result['time']), "(start) times got scrambled" assert np.all(strax.endtime(input_peaks) == strax.endtime( result)), "Some end times got scrambled"
def fake_daq(): """Context for processing fake DAQ data in the current directory""" return strax.Context( storage=[ strax.DataDirectory('./strax_data', provide_run_metadata=False), # Fake DAQ puts run doc JSON in same folder: strax.DataDirectory('./from_fake_daq', readonly=True) ], config=dict(input_dir='./from_fake_daq'), **common_opts)
def get_plugin(config): with tempfile.TemporaryDirectory() as temp_dir: context = strax.Context( storage=strax.DataDirectory(temp_dir, deep_scan=True), config=config, register=[DummyPlugin], use_per_run_defaults=True, ) return context.get_single_plugin('321', 'dummy_data')
def xenon1t_simulation(output_folder='./strax_data'): import wfsim st = strax.Context(storage=strax.DataDirectory(output_folder), config=dict(fax_config='fax_config_1t.json', detector='XENON1T', check_raw_record_overlaps=False, **straxen.contexts.x1t_common_config), **straxen.contexts.common_opts) st.register(wfsim.RawRecordsFromFax1T) return st
def demo(): """Return strax context used in the nEXO_strax demo notebook""" sx.download_test_data() return strax.Context(storage=[ strax.DataDirectory('./strax_data'), strax.DataDirectory('./strax_test_data') ], register=sx.RecordsFromPax, forbid_creation_of=('raw_records', ), **common_opts)
def test_lineage_changes(self): st = strax.Context(register=[Records, Peaks], storage=[self.rundb_sf], use_per_run_defaults=True, ) lineages = [st.key_for(r, 'peaks').lineage_hash for r in self.test_run_ids] assert len(set(lineages)) > 1 with self.assertRaises(ValueError): # Lineage changing per run is not allowed! st.select_runs(available='peaks')
def test_core(): for max_workers in [1, 2]: mystrax = strax.Context( storage=[], register=[Records, Peaks], ) bla = mystrax.get_array(run_id=run_id, targets='peaks', max_workers=max_workers) assert len(bla) == recs_per_chunk * n_chunks assert bla.dtype == strax.peak_dtype()