def _time_range_to_n_range(self, run_id: str, time_range: ty.Tuple[int], d_with_time: str): """Return range of chunk numbers that include time_range :param run_id: Run name :param time_range: (start, stop) ns since unix epoch :param d_with_time: Name of data type """ # Find a range of row numbers that contains the time range # It's a bit too large: to # Get the n <-> time mapping in needed chunks if not self.is_stored(run_id, d_with_time): raise strax.DataNotAvailable( "Time range selection needs time info from " f"{d_with_time}, but this data is not yet available") meta = self.get_meta(run_id, d_with_time) times = np.array([c['first_time'] for c in meta['chunks']]) # Reconstruct row numbers from row counts, which are in metadata # n_end is last row + 1 in a chunk. n_start is the first. n_end = np.array([c['n'] for c in meta['chunks']]).cumsum() n_start = n_end - n_end[0] _inds = np.searchsorted(times, time_range) - 1 # Clip to prevent out-of-range times causing # negative or nonexistent indices _inds = np.clip(_inds, 0, len(n_end) - 1) return n_start[_inds[0]], n_end[_inds[1]]
def _saver(self, dirname, metadata, meta_only=False): # Test if the parent directory is writeable. # We need abspath since the dir itself may not exist, # even though its parent-to-be does parent_dir = os.path.abspath(os.path.join(dirname, os.pardir)) if not os.access(parent_dir, os.W_OK): raise strax.DataNotAvailable( f"Can't write data to {dirname}, " f"no write permissions in {parent_dir}.") return FileSaver(dirname, metadata=metadata, meta_only=meta_only)
def try_load(self, st: strax.Context, target: str): try: rr = st.get_array(self.run_id, target) except strax.DataNotAvailable as data_error: message = (f'Could not find ' f'{st.key_for(self.run_id, target)} ' f'with the following frontends\n') for sf in st.storage: message += f'\t{sf}\n' raise strax.DataNotAvailable(message) from data_error return rr
def get_metadata(self, dirname: str, **kwargs): prefix = dirname_to_prefix(dirname) metadata_json = f'{prefix}-metadata.json' fn = rucio_path(self.root_dir, metadata_json, dirname) folder = osp.join('/', *fn.split('/')[:-1]) if not osp.exists(folder): raise strax.DataNotAvailable(f"No folder for matadata at {fn}") if not osp.exists(fn): raise strax.DataCorrupted(f"Folder exists but no matadata at {fn}") with open(fn, mode='r') as f: return json.loads(f.read())
def _saver(self, dirname, metadata, **kwargs): # Test if the parent directory is writeable. # We need abspath since the dir itself may not exist, # even though its parent-to-be does parent_dir = os.path.abspath(os.path.join(dirname, os.pardir)) # In case the parent dir also doesn't exist, we have to create is # otherwise the write permission check below will certainly fail try: os.makedirs(parent_dir, exist_ok=True) except OSError as e: raise strax.DataNotAvailable( f"Can't write data to {dirname}, " f"{parent_dir} does not exist and we could not create it." f"Original error: {e}") # Finally, check if we have permission to create the new subdirectory # (which the Saver will do) if not os.access(parent_dir, os.W_OK): raise strax.DataNotAvailable( f"Can't write data to {dirname}, " f"no write permissions in {parent_dir}.") return FileSaver(dirname, metadata=metadata, **kwargs)
def get_meta(self, run_id, target) -> dict: """Return metadata for target for run_id, or raise DataNotAvailable if data is not yet available. :param run_id: run id to get :param target: data type to get """ key = self._key_for(run_id, target) for sf in self.storage: try: return sf.get_metadata(key, **self._find_options) except strax.DataNotAvailable as e: self.log.debug(f"Frontend {sf} does not have {key}") raise strax.DataNotAvailable(f"Can't load metadata, " f"data for {key} not available")
def get_metadata(self, did: str, **kwargs): scope, name = did.split(':') number, dtype, hsh = parse_did(did) metadata_json = f'{dtype}-{hsh}-metadata.json' metadata_did = f'{scope}:{metadata_json}' metadata_path = rucio_path(self.rucio_dir, metadata_did) folder = os.path.join('/', *metadata_path.split('/')[:-1]) if not os.path.exists(folder): raise strax.DataNotAvailable( f"No folder for metadata at {metadata_path}") if not os.path.exists(metadata_path): raise strax.DataCorrupted( f"Folder exists but no metadata at {metadata_path}") with open(metadata_path, mode='r') as f: return json.loads(f.read())
def run_metadata(self, run_id, projection=None) -> dict: """Return run-level metadata for run_id, or raise DataNotAvailable if this is not available :param run_id: run id to get :param projection: Selection of fields to get, following MongoDB syntax. May not be supported by frontend. """ for sf in self.storage: if not sf.provide_run_metadata: continue try: return sf.run_metadata(run_id, projection=projection) except (strax.DataNotAvailable, NotImplementedError): self.log.debug(f"Frontend {sf} does not have " f"run metadata for {run_id}") raise strax.DataNotAvailable(f"No run-level metadata available " f"for {run_id}")
def check_cache(d): nonlocal plugins, loaders, savers, seen if d in seen: return seen.add(d) p = plugins[d] key = strax.DataKey(run_id, d, p.lineage) for sb_i, sf in enumerate(self.storage): try: # Bit clunky... but allows specifying executor later sf.find(key, **self._find_options) loaders[d] = partial(sf.loader, key, n_range=n_range, **self._find_options) # Found it! No need to make it del plugins[d] break except strax.DataNotAvailable: continue else: if time_range is not None: # While the data type providing the time information is # available (else we'd have failed earlier), one of the # other requested data types is not. raise strax.DataNotAvailable( f"Time range selection assumes data is already " f"available, but {d} for {run_id} is not.") if d in self.context_config['forbid_creation_of']: raise strax.DataNotAvailable( f"{d} for {run_id} not found in any storage, and " "your context specifies it cannot be created.") # Not in any cache. We will be computing it. to_compute[d] = p for dep_d in p.depends_on: check_cache(dep_d) # Should we save this data? if time_range is not None: # No, since we're not even getting the whole data. # Without this check, saving could be attempted if the # storage converter mode is enabled. self.log.warning(f"Not saving {d} while " f"selecting a time range in the run") return if any([ len(v) > 0 for k, v in self._find_options.items() if 'fuzzy' in k ]): # In fuzzy matching mode, we cannot (yet) derive the lineage # of any data we are creating. To avoid create false # data entries, we currently do not save at all. self.log.warning(f"Not saving {d} while fuzzy matching is " f"turned on.") return if self.context_config['allow_incomplete']: self.log.warning(f"Not saving {d} while loading incomplete " f"data is allowed.") return elif p.save_when == strax.SaveWhen.NEVER: if d in save: raise ValueError("Plugin forbids saving of {d}") return elif p.save_when == strax.SaveWhen.TARGET: if d not in targets: return elif p.save_when == strax.SaveWhen.EXPLICIT: if d not in save: return else: assert p.save_when == strax.SaveWhen.ALWAYS for sf in self.storage: if sf.readonly: continue if d not in to_compute: if not self.context_config['storage_converter']: continue try: sf.find(key, **self._find_options) # Already have this data in this backend continue except strax.DataNotAvailable: # Don't have it, so let's convert it! pass try: savers[d].append(sf.saver(key, metadata=p.metadata(run_id))) except strax.DataNotAvailable: # This frontend cannot save. Too bad. pass
def get_components( self, run_id: str, targets=tuple(), save=tuple(), time_range=None, ) -> strax.ProcessorComponents: """Return components for setting up a processor {get_docs} """ save = strax.to_str_tuple(save) targets = strax.to_str_tuple(targets) plugins = self._get_plugins(targets, run_id) n_range = None if time_range is not None: # Ensure we have one data kind if len(set([plugins[t].data_kind for t in targets])) > 1: raise NotImplementedError( "Time range selection not implemented " "for multiple data kinds.") # Which plugin provides time information? We need it to map to # row indices. for p in targets: if 'time' in plugins[p].dtype.names: break else: raise RuntimeError(f"No time info in targets, should have been" f" caught earlier??") # Find a range of row numbers that contains the time range # It's a bit too large: to # Get the n <-> time mapping in needed chunks if not self.is_stored(run_id, p): raise strax.DataNotAvailable(f"Time range selection needs time" f" info from {p}, but this data" f" is not yet available") meta = self.get_meta(run_id, p) times = np.array([c['first_time'] for c in meta['chunks']]) # Reconstruct row numbers from row counts, which are in metadata # n_end is last row + 1 in a chunk. n_start is the first. n_end = np.array([c['n'] for c in meta['chunks']]).cumsum() n_start = n_end - n_end[0] _inds = np.searchsorted(times, time_range) - 1 # Clip to prevent out-of-range times causing # negative or nonexistent indices _inds = np.clip(_inds, 0, len(n_end) - 1) n_range = n_start[_inds[0]], n_end[_inds[1]] # Get savers/loaders, and meanwhile filter out plugins that do not # have to do computation.(their instances will stick around # though the .deps attribute of plugins that do) loaders = dict() savers = collections.defaultdict(list) seen = set() to_compute = dict() def check_cache(d): nonlocal plugins, loaders, savers, seen if d in seen: return seen.add(d) p = plugins[d] key = strax.DataKey(run_id, d, p.lineage) for sb_i, sf in enumerate(self.storage): try: # Bit clunky... but allows specifying executor later sf.find(key, **self._find_options) loaders[d] = partial(sf.loader, key, n_range=n_range, **self._find_options) # Found it! No need to make it del plugins[d] break except strax.DataNotAvailable: continue else: if time_range is not None: # While the data type providing the time information is # available (else we'd have failed earlier), one of the # other requested data types is not. raise strax.DataNotAvailable( f"Time range selection assumes data is already " f"available, but {d} for {run_id} is not.") if d in self.context_config['forbid_creation_of']: raise strax.DataNotAvailable( f"{d} for {run_id} not found in any storage, and " "your context specifies it cannot be created.") # Not in any cache. We will be computing it. to_compute[d] = p for dep_d in p.depends_on: check_cache(dep_d) # Should we save this data? if time_range is not None: # No, since we're not even getting the whole data. # Without this check, saving could be attempted if the # storage converter mode is enabled. self.log.warning(f"Not saving {d} while " f"selecting a time range in the run") return if any([ len(v) > 0 for k, v in self._find_options.items() if 'fuzzy' in k ]): # In fuzzy matching mode, we cannot (yet) derive the lineage # of any data we are creating. To avoid create false # data entries, we currently do not save at all. self.log.warning(f"Not saving {d} while fuzzy matching is " f"turned on.") return if self.context_config['allow_incomplete']: self.log.warning(f"Not saving {d} while loading incomplete " f"data is allowed.") return elif p.save_when == strax.SaveWhen.NEVER: if d in save: raise ValueError("Plugin forbids saving of {d}") return elif p.save_when == strax.SaveWhen.TARGET: if d not in targets: return elif p.save_when == strax.SaveWhen.EXPLICIT: if d not in save: return else: assert p.save_when == strax.SaveWhen.ALWAYS for sf in self.storage: if sf.readonly: continue if d not in to_compute: if not self.context_config['storage_converter']: continue try: sf.find(key, **self._find_options) # Already have this data in this backend continue except strax.DataNotAvailable: # Don't have it, so let's convert it! pass try: savers[d].append(sf.saver(key, metadata=p.metadata(run_id))) except strax.DataNotAvailable: # This frontend cannot save. Too bad. pass for d in targets: check_cache(d) plugins = to_compute intersec = list(plugins.keys() & loaders.keys()) if len(intersec): raise RuntimeError("{intersec} both computed and loaded?!") # For the plugins which will run computations, # check all required options are available or set defaults. # Also run any user-defined setup for p in plugins.values(): self._set_plugin_config(p, run_id, tolerant=False) p.setup() return strax.ProcessorComponents(plugins=plugins, loaders=loaders, savers=dict(savers), targets=targets)
def check_cache(d): nonlocal plugins, loaders, savers, seen if d in seen: return seen.add(d) p = plugins[d] # Can we load this data, or must we compute it? loading_this_data = False key = strax.DataKey(run_id, d, p.lineage) for sb_i, sf in enumerate(self.storage): try: # Partial is clunky... but allows specifying executor later # Since it doesn't run until later, we must do a find now # that we can still handle DataNotAvailable sf.find(key, **self._find_options) loaders[d] = partial(sf.loader, key, n_range=n_range, **self._find_options) except strax.DataNotAvailable: continue else: # Found it! No need to make it or look in other frontends loading_this_data = True del plugins[d] break else: # Data not found anywhere. We will be computing it. if time_range is not None and not d.startswith('_temp'): # While the data type providing the time information is # available (else we'd have failed earlier), one of the # other requested data types is not. raise strax.DataNotAvailable( f"Time range selection assumes data is already " f"available, but {d} for {run_id} is not.") if d in self.context_config['forbid_creation_of']: raise strax.DataNotAvailable( f"{d} for {run_id} not found in any storage, and " "your context specifies it cannot be created.") to_compute[d] = p for dep_d in p.depends_on: check_cache(dep_d) # Should we save this data? If not, return. if (loading_this_data and not self.context_config['storage_converter']): return if p.save_when == strax.SaveWhen.NEVER: if d in save: raise ValueError("Plugin forbids saving of {d}") return elif p.save_when == strax.SaveWhen.TARGET: if d not in targets: return elif p.save_when == strax.SaveWhen.EXPLICIT: if d not in save: return else: assert p.save_when == strax.SaveWhen.ALWAYS # Warn about conditions that preclude saving, but the user # might not expect. if time_range is not None: # We're not even getting the whole data. # Without this check, saving could be attempted if the # storage converter mode is enabled. self.log.warning(f"Not saving {d} while " f"selecting a time range in the run") return if any([ len(v) > 0 for k, v in self._find_options.items() if 'fuzzy' in k ]): # In fuzzy matching mode, we cannot (yet) derive the # lineage of any data we are creating. To avoid creating # false data entries, we currently do not save at all. self.log.warning(f"Not saving {d} while fuzzy matching is" f" turned on.") return if self.context_config['allow_incomplete']: self.log.warning(f"Not saving {d} while loading incomplete" f" data is allowed.") return # Save the target and any other outputs of the plugin. for d_to_save in set([d] + list(p.provides)): if d_to_save in savers and len(savers[d_to_save]): # This multi-output plugin was scanned before # let's not create doubled savers assert p.multi_output continue key = strax.DataKey(run_id, d_to_save, p.lineage) for sf in self.storage: if sf.readonly: continue if loading_this_data: # Usually, we don't save if we're loading if not self.context_config['storage_converter']: continue # ... but in storage converter mode we do: try: sf.find(key, **self._find_options) # Already have this data in this backend continue except strax.DataNotAvailable: # Don't have it, so let's save it! pass # If we get here, we must try to save try: savers[d_to_save].append( sf.saver(key, metadata=p.metadata(run_id, d_to_save))) except strax.DataNotAvailable: # This frontend cannot save. Too bad. pass
def _event_display( context, run_id, events, to_pe, axes=None, records_matrix=True, s2_fuzz=50, s1_fuzz=0, max_peaks=500, xenon1t=False, display_peak_info=PEAK_DISPLAY_DEFAULT_INFO, display_event_info=EVENT_DISPLAY_DEFAULT_INFO, s1_hp_kwargs=None, s2_hp_kwargs=None, event_time_limit=None, plot_all_positions=True, ): """{event_docs} :param axes: if a dict of matplotlib axes (w/ same keys as below, and empty/None for panels not filled) {event_returns} """ if len(events) != 1: raise ValueError(f'Found {len(events)} only request one') event = events[0] if not context.is_stored(run_id, 'peaklets'): raise strax.DataNotAvailable(f'peaklets not available for {run_id}') if axes is None: raise ValueError(f'No axes provided') ax_s1 = axes.get("ax_s1", None) ax_s2 = axes.get("ax_s2", None) ax_s1_hp_t = axes.get("ax_s1_hp_t", None) ax_s1_hp_b = axes.get("ax_s1_hp_b", None) ax_s2_hp_t = axes.get("ax_s2_hp_t", None) ax_s2_hp_b = axes.get("ax_s2_hp_b", None) ax_event_info = axes.get("ax_event_info", None) ax_peak_info = axes.get("ax_peak_info", None) ax_ev = axes.get("ax_ev", None) ax_rec = axes.get("ax_rec", None) # titles for ax, title in zip([ ax_s1, ax_s1_hp_t, ax_s1_hp_b, ax_s2, ax_s2_hp_t, ax_s2_hp_b, ax_event_info, ax_peak_info ], [ "Main S1", "S1 top", "S1 bottom", "Main S2", "S2 top", "S2 bottom", "Event info", "Peak info" ]): if ax is not None: ax.set_title(title) # Parse the hit pattern options # Convert to dict (not at function definition because of mutable defaults) if s1_hp_kwargs is None: s1_hp_kwargs = {} if s2_hp_kwargs is None: s2_hp_kwargs = {} # Hit patterns options: for hp_opt, color_map in ((s1_hp_kwargs, "Blues"), (s2_hp_kwargs, "Greens")): _common_opt = dict(xenon1t=xenon1t, pmt_label_color='lightgrey', log_scale=True, vmin=0.1, s=(250 if records_matrix else 220), pmt_label_size=7, edgecolor='grey', dead_pmts=np.argwhere(to_pe == 0), cmap=color_map) # update s1 & S2 hit pattern kwargs with _common_opt if not # specified by the user for k, v in _common_opt.items(): if k not in hp_opt: hp_opt[k] = v # S1 if events['s1_area'] != 0: if ax_s1 is not None: plt.sca(ax_s1) context.plot_peaks(run_id, time_range=(events['s1_time'] - s1_fuzz, events['s1_endtime'] + s1_fuzz), single_figure=False) # Hit pattern plots area = context.get_array( run_id, 'peaklets', time_range=(events['s1_time'], events['s1_endtime']), keep_columns=('area_per_channel', 'time', 'dt', 'length'), progress_bar=False, ) for ax, array in ((ax_s1_hp_t, 'top'), (ax_s1_hp_b, 'bottom')): if ax is not None: plt.sca(ax) straxen.plot_on_single_pmt_array(c=np.sum( area['area_per_channel'], axis=0), array_name=array, **s1_hp_kwargs) # Mark reconstructed position plt.scatter(event['x'], event['y'], marker='X', s=100, c='k') # S2 if event['s2_area'] != 0: if ax_s2 is not None: plt.sca(ax_s2) context.plot_peaks(run_id, time_range=(events['s2_time'] - s2_fuzz, events['s2_endtime'] + s2_fuzz), single_figure=False) # Hit pattern plots area = context.get_array( run_id, 'peaklets', time_range=(events['s2_time'], events['s2_endtime']), keep_columns=('area_per_channel', 'time', 'dt', 'length'), progress_bar=False, ) for axi, (ax, array) in enumerate([(ax_s2_hp_t, 'top'), (ax_s2_hp_b, 'bottom')]): if ax is not None: plt.sca(ax) straxen.plot_on_single_pmt_array(c=np.sum( area['area_per_channel'], axis=0), array_name=array, **s2_hp_kwargs) # Mark reconstructed position (corrected) plt.scatter(event['x'], event['y'], marker='X', s=100, c='k') if not xenon1t and axi == 0 and plot_all_positions: _scatter_rec(event) # Fill panels with peak/event info for it, (ax, labels_and_unit) in enumerate([ (ax_event_info, display_event_info), (ax_peak_info, display_peak_info) ]): if ax is not None: for i, (_lab, _unit) in enumerate(labels_and_unit): coord = 0.01, 0.9 - 0.9 * i / len(labels_and_unit) ax.text(*coord, _lab[:24], va='top', zorder=-10) ax.text(coord[0] + 0.5, coord[1], _unit.format(v=event[_lab]), va='top', zorder=-10) # Remove axes and labels from panel ax.set_xticks([]) ax.set_yticks([]) _ = [s.set_visible(False) for s in ax.spines.values()] # Plot peaks in event ev_range = None if ax_ev is not None: plt.sca(ax_ev) if event_time_limit is None: time_range = (events['time'], events['endtime']) else: time_range = event_time_limit context.plot_peaks(run_id, time_range=time_range, show_largest=max_peaks, single_figure=False) ev_range = plt.xlim() if records_matrix and ax_rec is not None: plt.sca(ax_rec) context.plot_records_matrix(run_id, raw=records_matrix == 'raw', time_range=(events['time'], events['endtime']), single_figure=False) ax_rec.tick_params(axis='x', rotation=0) if not xenon1t: # Top vs bottom division ax_rec.axhline(straxen.n_top_pmts, c='k') if ev_range is not None: plt.xlim(*ev_range) # Final tweaks if ax_s2 is not None: ax_s1.tick_params(axis='x', rotation=45) if ax_s2 is not None: ax_s1.tick_params(axis='x', rotation=45) if ax_ev is not None: ax_ev.tick_params(axis='x', rotation=0) title = (f'Run {run_id}. Time ' f'{str(events["time"])[:-9]}.{str(events["time"])[-9:]}\n' f'{datetime.fromtimestamp(event["time"] / 1e9, tz=pytz.utc)}') plt.suptitle(title, y=0.95) # NB: reflects panels order return (ax_s1, ax_s2, ax_s1_hp_t, ax_s1_hp_b, ax_event_info, ax_peak_info, ax_s2_hp_t, ax_s2_hp_b, ax_ev, ax_rec)
def check_cache(d): nonlocal plugins, loaders, savers, seen if d in seen: return seen.add(d) p = plugins[d] key = strax.DataKey(run_id, d, p.lineage) for sb_i, sf in enumerate(self.storage): try: loaders[d] = sf.loader(key, n_range=n_range, **self._fuzzy_options) # Found it! No need to make it del plugins[d] break except strax.DataNotAvailable: continue else: if time_range is not None: # While the data type providing the time information is # available (else we'd have failed earlier), one of the # other requested data types is not. raise strax.DataNotAvailable( f"Time range selection assumes data is already " f"available, but {d} for {run_id} is not.") # Not in any cache. We will be computing it. to_compute[d] = p for dep_d in p.depends_on: check_cache(dep_d) # Should we save this data? if time_range is not None: # No, since we're not even getting the whole data return elif p.save_when == strax.SaveWhen.NEVER: if d in save: raise ValueError("Plugin forbids saving of {d}") return elif p.save_when == strax.SaveWhen.TARGET: if d not in targets: return elif p.save_when == strax.SaveWhen.EXPLICIT: if d not in save: return else: assert p.save_when == strax.SaveWhen.ALWAYS for sf in self.storage: if sf.readonly: continue if d not in to_compute: if not self.context_config['storage_converter']: continue try: sf.find(key, **self._fuzzy_options) # Already have this data in this backend continue except strax.DataNotAvailable: pass try: savers[d].append( sf.saver(key, metadata=p.metadata(run_id), meta_only=p.save_meta_only)) except strax.DataNotAvailable: # This frontend cannot save. Too bad. pass
def loader(self, backend_key, time_range=None, chunk_number=None, executor=None): """Iterates over strax data in backend_key :param time_range: 2-length arraylike of (start, exclusive end) of desired data. Will return all data that partially overlaps with the range. Default is None, which means get the entire :param chunk_number: Chunk number to get exclusively :param executor: Executor to push load/decompress operations to """ metadata = self.get_metadata(backend_key) if 'strax_version' in metadata: v_old = metadata['strax_version'] if version.parse(v_old) < version.parse('0.9.0'): raise strax.DataNotAvailable( f"Cannot load data at {backend_key}: " f"it was created with strax {v_old}, " f"but you have strax {strax.__version__}. ") else: warnings.warn(f"Data at {backend_key} does not say what strax " "version it was generated with. This means it is " "corrupted, or very, very old. Probably " "we cannot load this.") # 'start' and 'end' are not required, to allow allow_incomplete required_fields = ( 'run_id data_type data_kind dtype compressor').split() missing_fields = [x for x in required_fields if x not in metadata] if len(missing_fields): raise strax.DataNotAvailable( f"Cannot load data at {backend_key}: metadata is " f"missing the required fields {missing_fields}. ") if not len(metadata['chunks']): raise ValueError( f"Cannot load data at {backend_key}, it has no chunks!") dtype = literal_eval(metadata['dtype']) # Common arguments for chunk construction, not stored with chunk-level # metadata chunk_kwargs = dict( data_type=metadata['data_type'], data_kind=metadata['data_kind'], dtype=dtype) required_chunk_metadata_fields = 'start end run_id'.split() for i, chunk_info in enumerate(strax.iter_chunk_meta(metadata)): missing_fields = [x for x in required_chunk_metadata_fields if x not in chunk_info] if len(missing_fields): raise ValueError( f"Error reading chunk {i} of {metadata['dtype']} " f"of {metadata['run_d']} from {backend_key}: " f"chunk metadata is missing fields {missing_fields}") # Chunk number constraint if chunk_number is not None: if i != chunk_number: continue # Time constraint if time_range: if (chunk_info['end'] <= time_range[0] or time_range[1] <= chunk_info['start']): # Chunk does not cover any part of range continue read_chunk_kwargs = dict( backend_key=backend_key, dtype=dtype, metadata=metadata, chunk_info=chunk_info, time_range=time_range, chunk_construction_kwargs=chunk_kwargs) if executor is None: yield self._read_and_format_chunk(**read_chunk_kwargs) else: yield executor.submit(self._read_and_format_chunk, **read_chunk_kwargs)
def check_cache(d): nonlocal plugins, loaders, savers, seen if d in seen: return seen.add(d) p = plugins[d] # Can we load this data? loading_this_data = False key = strax.DataKey(run_id, d, p.lineage) ldr = self._get_partial_loader_for(key, chunk_number=chunk_number, time_range=time_range) if not ldr and run_id.startswith('_'): if time_range is not None: raise NotImplementedError("time range loading not yet " "supported for superruns") sub_run_spec = self.run_metadata( run_id, 'sub_run_spec')['sub_run_spec'] self.make(list(sub_run_spec.keys()), d) ldrs = [] for subrun in sub_run_spec: sub_key = strax.DataKey( subrun, d, self._get_plugins((d, ), subrun)[d].lineage) if sub_run_spec[subrun] == 'all': _subrun_time_range = None else: _subrun_time_range = sub_run_spec[subrun] ldr = self._get_partial_loader_for( sub_key, time_range=_subrun_time_range, chunk_number=chunk_number) if not ldr: raise RuntimeError( f"Could not load {d} for subrun {subrun} " f"even though we made it??") ldrs.append(ldr) def concat_loader(*args, **kwargs): for x in ldrs: yield from x(*args, **kwargs) ldr = lambda *args, **kwargs: concat_loader(*args, **kwargs) if ldr: # Found it! No need to make it or look in other frontends loading_this_data = True loaders[d] = ldr del plugins[d] else: # Data not found anywhere. We will be computing it. if (time_range is not None and plugins[d].save_when != strax.SaveWhen.NEVER): # While the data type providing the time information is # available (else we'd have failed earlier), one of the # other requested data types is not. raise strax.DataNotAvailable( f"Time range selection assumes data is already " f"available, but {d} for {run_id} is not.") if '*' in self.context_config['forbid_creation_of']: raise strax.DataNotAvailable( f"{d} for {run_id} not found in any storage, and " "your context specifies no new data can be created.") if d in self.context_config['forbid_creation_of']: raise strax.DataNotAvailable( f"{d} for {run_id} not found in any storage, and " "your context specifies it cannot be created.") to_compute[d] = p for dep_d in p.depends_on: check_cache(dep_d) # Should we save this data? If not, return. if (loading_this_data and not self.context_config['storage_converter']): return if p.save_when == strax.SaveWhen.NEVER: if d in save: raise ValueError("Plugin forbids saving of {d}") return elif p.save_when == strax.SaveWhen.TARGET: if d not in targets: return elif p.save_when == strax.SaveWhen.EXPLICIT: if d not in save: return else: assert p.save_when == strax.SaveWhen.ALWAYS # Warn about conditions that preclude saving, but the user # might not expect. if time_range is not None: # We're not even getting the whole data. # Without this check, saving could be attempted if the # storage converter mode is enabled. self.log.warning(f"Not saving {d} while " f"selecting a time range in the run") return if any([ len(v) > 0 for k, v in self._find_options.items() if 'fuzzy' in k ]): # In fuzzy matching mode, we cannot (yet) derive the # lineage of any data we are creating. To avoid creating # false data entries, we currently do not save at all. self.log.warning(f"Not saving {d} while fuzzy matching is" f" turned on.") return if self.context_config['allow_incomplete']: self.log.warning(f"Not saving {d} while loading incomplete" f" data is allowed.") return # Save the target and any other outputs of the plugin. for d_to_save in set([d] + list(p.provides)): if d_to_save in savers and len(savers[d_to_save]): # This multi-output plugin was scanned before # let's not create doubled savers assert p.multi_output continue key = strax.DataKey(run_id, d_to_save, p.lineage) for sf in self.storage: if sf.readonly: continue if loading_this_data: # Usually, we don't save if we're loading if not self.context_config['storage_converter']: continue # ... but in storage converter mode we do: try: sf.find(key, **self._find_options) # Already have this data in this backend continue except strax.DataNotAvailable: # Don't have it, so let's save it! pass # If we get here, we must try to save try: savers[d_to_save].append( sf.saver(key, metadata=p.metadata(run_id, d_to_save))) except strax.DataNotAvailable: # This frontend cannot save. Too bad. pass