def get_iter(self, run_id: str, targets, save=tuple(), max_workers=None, **kwargs) -> ty.Iterator[np.ndarray]: """Compute target for run_id and iterate over results {get_docs} TODO: This is not quite a normal iterator: if you break results will still accumulate in a background thread! """ if len(kwargs): self = self.new_context(**kwargs) # If multiple targets of the same kind, create a MergeOnlyPlugin # automatically if isinstance(targets, (list, tuple)) and len(targets) > 1: plugins = self._get_plugins(targets=targets) if len(set(plugins[d].data_kind for d in targets)) == 1: temp_name = ''.join( random.choices(string.ascii_lowercase, k=10)) temp_merge = type(temp_name, (strax.MergeOnlyPlugin, ), dict(depends_on=tuple(targets))) self.register(temp_merge) targets = temp_name # TODO: auto-unregister? Better to have a temp register # override option in get_components else: raise RuntimeError("Cannot automerge different data kinds!") components = self.get_components(run_id, targets=targets, save=save) yield from strax.ThreadedMailboxProcessor( components, max_workers=max_workers).iter()
def get_iter(self, run_id: str, targets, save=tuple(), max_workers=None, time_range=None, selection=None, **kwargs) -> ty.Iterator[np.ndarray]: """Compute target for run_id and iterate over results. Do NOT interrupt the iterator (i.e. break): it will keep running stuff in background threads... {get_docs} """ # If any new options given, replace the current context # with a temporary one if len(kwargs): # noinspection PyMethodFirstArgAssignment self = self.new_context(**kwargs) if isinstance(selection, (list, tuple)): selection = ' & '.join(f'({x})' for x in selection) # If multiple targets of the same kind, create a MergeOnlyPlugin # automatically if isinstance(targets, (list, tuple)) and len(targets) > 1: plugins = self._get_plugins(targets=targets, run_id=run_id) if len(set(plugins[d].data_kind for d in targets)) == 1: temp_name = ''.join( random.choices(string.ascii_lowercase, k=10)) temp_merge = type(temp_name, (strax.MergeOnlyPlugin, ), dict(depends_on=tuple(targets))) self.register(temp_merge) targets = temp_name # TODO: auto-unregister? Better to have a temp register # override option in get_components # Or just always create new context, not only if new options # are given else: raise RuntimeError("Cannot automerge different data kinds!") components = self.get_components(run_id, targets=targets, save=save, time_range=time_range) for x in strax.ThreadedMailboxProcessor( components, max_workers=max_workers).iter(): if selection is not None: mask = numexpr.evaluate( selection, local_dict={fn: x[fn] for fn in x.dtype.names}) x = x[mask] if time_range: if 'time' not in x.dtype.names: raise NotImplementedError( "Time range selection requires time information, " "but none of the required plugins provides it.") x = x[(time_range[0] <= x['time']) & (x['time'] < time_range[1])] yield x
def get_iter(self, run_id: str, targets, save=tuple(), max_workers=None, time_range=None, seconds_range=None, selection=None, **kwargs) -> ty.Iterator[np.ndarray]: """Compute target for run_id and iterate over results. Do NOT interrupt the iterator (i.e. break): it will keep running stuff in background threads... {get_docs} """ # If any new options given, replace the current context # with a temporary one if len(kwargs): # noinspection PyMethodFirstArgAssignment self = self.new_context(**kwargs) if isinstance(selection, (list, tuple)): selection = ' & '.join(f'({x})' for x in selection) # Convert relative to absolute time range if seconds_range is not None: try: # Use run metadata, if it is available, to get # the run start time (floored to seconds) t0 = self.run_metadata(run_id, 'start')['start'] t0 = int(t0.timestamp()) * int(1e9) except Exception: # Get an approx start from the data itself, # then floor it to seconds for consistency if isinstance(targets, (list, tuple)): t = targets[0] else: t = targets t0 = self.get_meta(run_id, t)['chunks'][0]['first_time'] t0 = int(t0 / int(1e9)) * int(1e9) time_range = (t0 + int(1e9) * seconds_range[0], t0 + int(1e9) * seconds_range[1]) # If multiple targets of the same kind, create a MergeOnlyPlugin # automatically if isinstance(targets, (list, tuple)) and len(targets) > 1: plugins = self._get_plugins(targets=targets, run_id=run_id) if len(set(plugins[d].data_kind for d in targets)) == 1: temp_name = ''.join( random.choices(string.ascii_lowercase, k=10)) temp_merge = type(temp_name, (strax.MergeOnlyPlugin, ), dict(depends_on=tuple(targets))) self.register(temp_merge) targets = temp_name # TODO: auto-unregister? Better to have a temp register # override option in get_components # Or just always create new context, not only if new options # are given else: raise RuntimeError("Cannot automerge different data kinds!") components = self.get_components(run_id, targets=targets, save=save, time_range=time_range) for x in strax.ThreadedMailboxProcessor( components, max_workers=max_workers, allow_shm=self.context_config['allow_shm'], allow_multiprocess=self.context_config['allow_multiprocess'], allow_rechunk=self.context_config['allow_rechunk']).iter(): if selection is not None: mask = numexpr.evaluate( selection, local_dict={fn: x[fn] for fn in x.dtype.names}) x = x[mask] if time_range: if 'time' not in x.dtype.names: raise NotImplementedError( "Time range selection requires time information, " "but none of the required plugins provides it.") x = x[(time_range[0] <= x['time']) & (x['time'] < time_range[1])] yield x
def get_iter(self, run_id: str, targets, save=tuple(), max_workers=None, time_range=None, seconds_range=None, time_within=None, time_selection='fully_contained', selection_str=None, **kwargs) -> ty.Iterator[np.ndarray]: """Compute target for run_id and iterate over results. Do NOT interrupt the iterator (i.e. break): it will keep running stuff in background threads... {get_docs} """ # If any new options given, replace the current context # with a temporary one if len(kwargs): # noinspection PyMethodFirstArgAssignment self = self.new_context(**kwargs) if isinstance(selection_str, (list, tuple)): selection_str = ' & '.join(f'({x})' for x in selection_str) # Convert alternate time arguments to absolute range time_range = self.to_absolute_time_range(run_id=run_id, targets=targets, time_range=time_range, seconds_range=seconds_range, time_within=time_within) # If multiple targets of the same kind, create a MergeOnlyPlugin # to merge the results automatically if isinstance(targets, (list, tuple)) and len(targets) > 1: plugins = self._get_plugins(targets=targets, run_id=run_id) if len(set(plugins[d].data_kind for d in targets)) == 1: temp_name = ( '_temp_' + ''.join(random.choices(string.ascii_lowercase, k=10))) p = type(temp_name, (strax.MergeOnlyPlugin, ), dict(depends_on=tuple(targets))) self.register(p) targets = (temp_name, ) else: raise RuntimeError("Cannot automerge different data kinds!") components = self.get_components(run_id, targets=targets, save=save, time_range=time_range) # Cleanup the temp plugins for k in list(self._plugin_class_registry.keys()): if k.startswith('_temp'): del self._plugin_class_registry[k] for x in strax.ThreadedMailboxProcessor( components, max_workers=max_workers, allow_shm=self.context_config['allow_shm'], allow_multiprocess=self.context_config['allow_multiprocess'], allow_rechunk=self.context_config['allow_rechunk']).iter(): if not isinstance(x, np.ndarray): raise ValueError(f"Got type {type(x)} rather than numpy array " "from the processor!") x = self.apply_selection(x, selection_str, time_range, time_selection) yield x
def get_iter(self, run_id: str, targets, save=tuple(), max_workers=None, time_range=None, seconds_range=None, time_within=None, time_selection='fully_contained', selection_str=None, keep_columns=None, _chunk_number=None, **kwargs) -> ty.Iterator[strax.Chunk]: """Compute target for run_id and iterate over results. Do NOT interrupt the iterator (i.e. break): it will keep running stuff in background threads... {get_docs} """ # If any new options given, replace the current context # with a temporary one if len(kwargs): # noinspection PyMethodFirstArgAssignment self = self.new_context(**kwargs) # Convert alternate time arguments to absolute range time_range = self.to_absolute_time_range(run_id=run_id, targets=targets, time_range=time_range, seconds_range=seconds_range, time_within=time_within) # If multiple targets of the same kind, create a MergeOnlyPlugin # to merge the results automatically if isinstance(targets, (list, tuple)) and len(targets) > 1: plugins = self._get_plugins(targets=targets, run_id=run_id) if len(set(plugins[d].data_kind_for(d) for d in targets)) == 1: temp_name = ( '_temp_' + ''.join(random.choices(string.ascii_lowercase, k=10))) p = type(temp_name, (strax.MergeOnlyPlugin, ), dict(depends_on=tuple(targets))) self.register(p) targets = (temp_name, ) else: raise RuntimeError("Cannot automerge different data kinds!") components = self.get_components(run_id, targets=targets, save=save, time_range=time_range, chunk_number=_chunk_number) # Cleanup the temp plugins for k in list(self._plugin_class_registry.keys()): if k.startswith('_temp'): del self._plugin_class_registry[k] seen_a_chunk = False generator = strax.ThreadedMailboxProcessor( components, max_workers=max_workers, allow_shm=self.context_config['allow_shm'], allow_multiprocess=self.context_config['allow_multiprocess'], allow_rechunk=self.context_config['allow_rechunk'], allow_lazy=self.context_config['allow_lazy'], max_messages=self.context_config['max_messages'], timeout=self.context_config['timeout']).iter() try: for result in strax.continuity_check(generator): seen_a_chunk = True if not isinstance(result, strax.Chunk): raise ValueError(f"Got type {type(result)} rather than " f"a strax Chunk from the processor!") result.data = self.apply_selection( result.data, selection_str=selection_str, keep_columns=keep_columns, time_range=time_range, time_selection=time_selection) yield result except GeneratorExit: generator.throw( OutsideException( "Terminating due to an exception originating from outside " "strax's get_iter (which we cannot retrieve).")) except Exception as e: generator.throw(e) raise if not seen_a_chunk: if time_range is None: raise strax.DataCorrupted("No data returned!") raise ValueError(f"Invalid time range: {time_range}, " "returned no chunks!")