Esempio n. 1
0
    def _time_range_to_n_range(self, run_id: str, time_range: ty.Tuple[int], d_with_time: str):
        """Return range of chunk numbers that include time_range
        :param run_id: Run name
        :param time_range: (start, stop) ns since unix epoch
        :param d_with_time: Name of data type
        """
        # Find a range of row numbers that contains the time range
        # It's a bit too large: to
        # Get the n <-> time mapping in needed chunks
        if not self.is_stored(run_id, d_with_time):
            raise strax.DataNotAvailable(
                "Time range selection needs time info from "
                f"{d_with_time}, but this data is not yet available")

        meta = self.get_meta(run_id, d_with_time)
        times = np.array([c['first_time'] for c in meta['chunks']])
        # Reconstruct row numbers from row counts, which are in metadata
        # n_end is last row + 1 in a chunk. n_start is the first.
        n_end = np.array([c['n'] for c in meta['chunks']]).cumsum()
        n_start = n_end - n_end[0]
        _inds = np.searchsorted(times, time_range) - 1
        # Clip to prevent out-of-range times causing
        # negative or nonexistent indices
        _inds = np.clip(_inds, 0, len(n_end) - 1)
        return n_start[_inds[0]], n_end[_inds[1]]
Esempio n. 2
0
 def _saver(self, dirname, metadata, meta_only=False):
     # Test if the parent directory is writeable.
     # We need abspath since the dir itself may not exist,
     # even though its parent-to-be does
     parent_dir = os.path.abspath(os.path.join(dirname, os.pardir))
     if not os.access(parent_dir, os.W_OK):
         raise strax.DataNotAvailable(
             f"Can't write data to {dirname}, "
             f"no write permissions in {parent_dir}.")
     return FileSaver(dirname, metadata=metadata, meta_only=meta_only)
Esempio n. 3
0
 def try_load(self, st: strax.Context, target: str):
     try:
         rr = st.get_array(self.run_id, target)
     except strax.DataNotAvailable as data_error:
         message = (f'Could not find '
                    f'{st.key_for(self.run_id, target)} '
                    f'with the following frontends\n')
         for sf in st.storage:
             message += f'\t{sf}\n'
         raise strax.DataNotAvailable(message) from data_error
     return rr
Esempio n. 4
0
    def get_metadata(self, dirname: str, **kwargs):
        prefix = dirname_to_prefix(dirname)
        metadata_json = f'{prefix}-metadata.json'
        fn = rucio_path(self.root_dir, metadata_json, dirname)
        folder = osp.join('/', *fn.split('/')[:-1])
        if not osp.exists(folder):
            raise strax.DataNotAvailable(f"No folder for matadata at {fn}")
        if not osp.exists(fn):
            raise strax.DataCorrupted(f"Folder exists but no matadata at {fn}")

        with open(fn, mode='r') as f:
            return json.loads(f.read())
Esempio n. 5
0
    def _saver(self, dirname, metadata, **kwargs):
        # Test if the parent directory is writeable.
        # We need abspath since the dir itself may not exist,
        # even though its parent-to-be does
        parent_dir = os.path.abspath(os.path.join(dirname, os.pardir))

        # In case the parent dir also doesn't exist, we have to create is
        # otherwise the write permission check below will certainly fail
        try:
            os.makedirs(parent_dir, exist_ok=True)
        except OSError as e:
            raise strax.DataNotAvailable(
                f"Can't write data to {dirname}, "
                f"{parent_dir} does not exist and we could not create it."
                f"Original error: {e}")

        # Finally, check if we have permission to create the new subdirectory
        # (which the Saver will do)
        if not os.access(parent_dir, os.W_OK):
            raise strax.DataNotAvailable(
                f"Can't write data to {dirname}, "
                f"no write permissions in {parent_dir}.")

        return FileSaver(dirname, metadata=metadata, **kwargs)
Esempio n. 6
0
    def get_meta(self, run_id, target) -> dict:
        """Return metadata for target for run_id, or raise DataNotAvailable
        if data is not yet available.

        :param run_id: run id to get
        :param target: data type to get
        """
        key = self._key_for(run_id, target)
        for sf in self.storage:
            try:
                return sf.get_metadata(key, **self._find_options)
            except strax.DataNotAvailable as e:
                self.log.debug(f"Frontend {sf} does not have {key}")
        raise strax.DataNotAvailable(f"Can't load metadata, "
                                     f"data for {key} not available")
Esempio n. 7
0
    def get_metadata(self, did: str, **kwargs):
        scope, name = did.split(':')
        number, dtype, hsh = parse_did(did)
        metadata_json = f'{dtype}-{hsh}-metadata.json'
        metadata_did = f'{scope}:{metadata_json}'

        metadata_path = rucio_path(self.rucio_dir, metadata_did)
        folder = os.path.join('/', *metadata_path.split('/')[:-1])
        if not os.path.exists(folder):
            raise strax.DataNotAvailable(
                f"No folder for metadata at {metadata_path}")
        if not os.path.exists(metadata_path):
            raise strax.DataCorrupted(
                f"Folder exists but no metadata at {metadata_path}")

        with open(metadata_path, mode='r') as f:
            return json.loads(f.read())
Esempio n. 8
0
    def run_metadata(self, run_id, projection=None) -> dict:
        """Return run-level metadata for run_id, or raise DataNotAvailable
        if this is not available

        :param run_id: run id to get
        :param projection: Selection of fields to get, following MongoDB
        syntax. May not be supported by frontend.
        """
        for sf in self.storage:
            if not sf.provide_run_metadata:
                continue
            try:
                return sf.run_metadata(run_id, projection=projection)
            except (strax.DataNotAvailable, NotImplementedError):
                self.log.debug(f"Frontend {sf} does not have "
                               f"run metadata for {run_id}")
        raise strax.DataNotAvailable(f"No run-level metadata available "
                                     f"for {run_id}")
Esempio n. 9
0
        def check_cache(d):
            nonlocal plugins, loaders, savers, seen
            if d in seen:
                return
            seen.add(d)
            p = plugins[d]
            key = strax.DataKey(run_id, d, p.lineage)

            for sb_i, sf in enumerate(self.storage):
                try:
                    # Bit clunky... but allows specifying executor later
                    sf.find(key, **self._find_options)
                    loaders[d] = partial(sf.loader,
                                         key,
                                         n_range=n_range,
                                         **self._find_options)
                    # Found it! No need to make it
                    del plugins[d]
                    break
                except strax.DataNotAvailable:
                    continue
            else:
                if time_range is not None:
                    # While the data type providing the time information is
                    # available (else we'd have failed earlier), one of the
                    # other requested data types is not.
                    raise strax.DataNotAvailable(
                        f"Time range selection assumes data is already "
                        f"available, but {d} for {run_id} is not.")
                if d in self.context_config['forbid_creation_of']:
                    raise strax.DataNotAvailable(
                        f"{d} for {run_id} not found in any storage, and "
                        "your context specifies it cannot be created.")
                # Not in any cache. We will be computing it.
                to_compute[d] = p
                for dep_d in p.depends_on:
                    check_cache(dep_d)

            # Should we save this data?
            if time_range is not None:
                # No, since we're not even getting the whole data.
                # Without this check, saving could be attempted if the
                # storage converter mode is enabled.
                self.log.warning(f"Not saving {d} while "
                                 f"selecting a time range in the run")
                return
            if any([
                    len(v) > 0 for k, v in self._find_options.items()
                    if 'fuzzy' in k
            ]):
                # In fuzzy matching mode, we cannot (yet) derive the lineage
                # of any data we are creating. To avoid create false
                # data entries, we currently do not save at all.
                self.log.warning(f"Not saving {d} while fuzzy matching is "
                                 f"turned on.")
                return
            if self.context_config['allow_incomplete']:
                self.log.warning(f"Not saving {d} while loading incomplete "
                                 f"data is allowed.")
                return

            elif p.save_when == strax.SaveWhen.NEVER:
                if d in save:
                    raise ValueError("Plugin forbids saving of {d}")
                return
            elif p.save_when == strax.SaveWhen.TARGET:
                if d not in targets:
                    return
            elif p.save_when == strax.SaveWhen.EXPLICIT:
                if d not in save:
                    return
            else:
                assert p.save_when == strax.SaveWhen.ALWAYS

            for sf in self.storage:
                if sf.readonly:
                    continue
                if d not in to_compute:
                    if not self.context_config['storage_converter']:
                        continue
                    try:
                        sf.find(key, **self._find_options)
                        # Already have this data in this backend
                        continue
                    except strax.DataNotAvailable:
                        # Don't have it, so let's convert it!
                        pass
                try:
                    savers[d].append(sf.saver(key,
                                              metadata=p.metadata(run_id)))
                except strax.DataNotAvailable:
                    # This frontend cannot save. Too bad.
                    pass
Esempio n. 10
0
    def get_components(
            self,
            run_id: str,
            targets=tuple(),
            save=tuple(),
            time_range=None,
    ) -> strax.ProcessorComponents:
        """Return components for setting up a processor
        {get_docs}
        """
        save = strax.to_str_tuple(save)
        targets = strax.to_str_tuple(targets)

        plugins = self._get_plugins(targets, run_id)

        n_range = None
        if time_range is not None:
            # Ensure we have one data kind
            if len(set([plugins[t].data_kind for t in targets])) > 1:
                raise NotImplementedError(
                    "Time range selection not implemented "
                    "for multiple data kinds.")

            # Which plugin provides time information? We need it to map to
            # row indices.
            for p in targets:
                if 'time' in plugins[p].dtype.names:
                    break
            else:
                raise RuntimeError(f"No time info in targets, should have been"
                                   f" caught earlier??")

            # Find a range of row numbers that contains the time range
            # It's a bit too large: to
            # Get the n <-> time mapping in needed chunks
            if not self.is_stored(run_id, p):
                raise strax.DataNotAvailable(f"Time range selection needs time"
                                             f" info from {p}, but this data"
                                             f" is not yet available")
            meta = self.get_meta(run_id, p)
            times = np.array([c['first_time'] for c in meta['chunks']])
            # Reconstruct row numbers from row counts, which are in metadata
            # n_end is last row + 1 in a chunk. n_start is the first.
            n_end = np.array([c['n'] for c in meta['chunks']]).cumsum()
            n_start = n_end - n_end[0]
            _inds = np.searchsorted(times, time_range) - 1
            # Clip to prevent out-of-range times causing
            # negative or nonexistent indices
            _inds = np.clip(_inds, 0, len(n_end) - 1)
            n_range = n_start[_inds[0]], n_end[_inds[1]]

        # Get savers/loaders, and meanwhile filter out plugins that do not
        # have to do computation.(their instances will stick around
        # though the .deps attribute of plugins that do)
        loaders = dict()
        savers = collections.defaultdict(list)
        seen = set()
        to_compute = dict()

        def check_cache(d):
            nonlocal plugins, loaders, savers, seen
            if d in seen:
                return
            seen.add(d)
            p = plugins[d]
            key = strax.DataKey(run_id, d, p.lineage)

            for sb_i, sf in enumerate(self.storage):
                try:
                    # Bit clunky... but allows specifying executor later
                    sf.find(key, **self._find_options)
                    loaders[d] = partial(sf.loader,
                                         key,
                                         n_range=n_range,
                                         **self._find_options)
                    # Found it! No need to make it
                    del plugins[d]
                    break
                except strax.DataNotAvailable:
                    continue
            else:
                if time_range is not None:
                    # While the data type providing the time information is
                    # available (else we'd have failed earlier), one of the
                    # other requested data types is not.
                    raise strax.DataNotAvailable(
                        f"Time range selection assumes data is already "
                        f"available, but {d} for {run_id} is not.")
                if d in self.context_config['forbid_creation_of']:
                    raise strax.DataNotAvailable(
                        f"{d} for {run_id} not found in any storage, and "
                        "your context specifies it cannot be created.")
                # Not in any cache. We will be computing it.
                to_compute[d] = p
                for dep_d in p.depends_on:
                    check_cache(dep_d)

            # Should we save this data?
            if time_range is not None:
                # No, since we're not even getting the whole data.
                # Without this check, saving could be attempted if the
                # storage converter mode is enabled.
                self.log.warning(f"Not saving {d} while "
                                 f"selecting a time range in the run")
                return
            if any([
                    len(v) > 0 for k, v in self._find_options.items()
                    if 'fuzzy' in k
            ]):
                # In fuzzy matching mode, we cannot (yet) derive the lineage
                # of any data we are creating. To avoid create false
                # data entries, we currently do not save at all.
                self.log.warning(f"Not saving {d} while fuzzy matching is "
                                 f"turned on.")
                return
            if self.context_config['allow_incomplete']:
                self.log.warning(f"Not saving {d} while loading incomplete "
                                 f"data is allowed.")
                return

            elif p.save_when == strax.SaveWhen.NEVER:
                if d in save:
                    raise ValueError("Plugin forbids saving of {d}")
                return
            elif p.save_when == strax.SaveWhen.TARGET:
                if d not in targets:
                    return
            elif p.save_when == strax.SaveWhen.EXPLICIT:
                if d not in save:
                    return
            else:
                assert p.save_when == strax.SaveWhen.ALWAYS

            for sf in self.storage:
                if sf.readonly:
                    continue
                if d not in to_compute:
                    if not self.context_config['storage_converter']:
                        continue
                    try:
                        sf.find(key, **self._find_options)
                        # Already have this data in this backend
                        continue
                    except strax.DataNotAvailable:
                        # Don't have it, so let's convert it!
                        pass
                try:
                    savers[d].append(sf.saver(key,
                                              metadata=p.metadata(run_id)))
                except strax.DataNotAvailable:
                    # This frontend cannot save. Too bad.
                    pass

        for d in targets:
            check_cache(d)
        plugins = to_compute

        intersec = list(plugins.keys() & loaders.keys())
        if len(intersec):
            raise RuntimeError("{intersec} both computed and loaded?!")

        # For the plugins which will run computations,
        # check all required options are available or set defaults.
        # Also run any user-defined setup
        for p in plugins.values():
            self._set_plugin_config(p, run_id, tolerant=False)
            p.setup()
        return strax.ProcessorComponents(plugins=plugins,
                                         loaders=loaders,
                                         savers=dict(savers),
                                         targets=targets)
Esempio n. 11
0
        def check_cache(d):
            nonlocal plugins, loaders, savers, seen
            if d in seen:
                return
            seen.add(d)
            p = plugins[d]

            # Can we load this data, or must we compute it?
            loading_this_data = False
            key = strax.DataKey(run_id, d, p.lineage)
            for sb_i, sf in enumerate(self.storage):
                try:
                    # Partial is clunky... but allows specifying executor later
                    # Since it doesn't run until later, we must do a find now
                    # that we can still handle DataNotAvailable
                    sf.find(key, **self._find_options)
                    loaders[d] = partial(sf.loader,
                                         key,
                                         n_range=n_range,
                                         **self._find_options)
                except strax.DataNotAvailable:
                    continue
                else:
                    # Found it! No need to make it or look in other frontends
                    loading_this_data = True
                    del plugins[d]
                    break
            else:
                # Data not found anywhere. We will be computing it.
                if time_range is not None and not d.startswith('_temp'):
                    # While the data type providing the time information is
                    # available (else we'd have failed earlier), one of the
                    # other requested data types is not.
                    raise strax.DataNotAvailable(
                        f"Time range selection assumes data is already "
                        f"available, but {d} for {run_id} is not.")
                if d in self.context_config['forbid_creation_of']:
                    raise strax.DataNotAvailable(
                        f"{d} for {run_id} not found in any storage, and "
                        "your context specifies it cannot be created.")
                to_compute[d] = p
                for dep_d in p.depends_on:
                    check_cache(dep_d)

            # Should we save this data? If not, return.
            if (loading_this_data
                    and not self.context_config['storage_converter']):
                return
            if p.save_when == strax.SaveWhen.NEVER:
                if d in save:
                    raise ValueError("Plugin forbids saving of {d}")
                return
            elif p.save_when == strax.SaveWhen.TARGET:
                if d not in targets:
                    return
            elif p.save_when == strax.SaveWhen.EXPLICIT:
                if d not in save:
                    return
            else:
                assert p.save_when == strax.SaveWhen.ALWAYS

            # Warn about conditions that preclude saving, but the user
            # might not expect.
            if time_range is not None:
                # We're not even getting the whole data.
                # Without this check, saving could be attempted if the
                # storage converter mode is enabled.
                self.log.warning(f"Not saving {d} while "
                                 f"selecting a time range in the run")
                return
            if any([
                    len(v) > 0 for k, v in self._find_options.items()
                    if 'fuzzy' in k
            ]):
                # In fuzzy matching mode, we cannot (yet) derive the
                # lineage of any data we are creating. To avoid creating
                # false data entries, we currently do not save at all.
                self.log.warning(f"Not saving {d} while fuzzy matching is"
                                 f" turned on.")
                return
            if self.context_config['allow_incomplete']:
                self.log.warning(f"Not saving {d} while loading incomplete"
                                 f" data is allowed.")
                return

            # Save the target and any other outputs of the plugin.
            for d_to_save in set([d] + list(p.provides)):
                if d_to_save in savers and len(savers[d_to_save]):
                    # This multi-output plugin was scanned before
                    # let's not create doubled savers
                    assert p.multi_output
                    continue

                key = strax.DataKey(run_id, d_to_save, p.lineage)

                for sf in self.storage:
                    if sf.readonly:
                        continue
                    if loading_this_data:
                        # Usually, we don't save if we're loading
                        if not self.context_config['storage_converter']:
                            continue
                        # ... but in storage converter mode we do:
                        try:
                            sf.find(key, **self._find_options)
                            # Already have this data in this backend
                            continue
                        except strax.DataNotAvailable:
                            # Don't have it, so let's save it!
                            pass
                    # If we get here, we must try to save
                    try:
                        savers[d_to_save].append(
                            sf.saver(key,
                                     metadata=p.metadata(run_id, d_to_save)))
                    except strax.DataNotAvailable:
                        # This frontend cannot save. Too bad.
                        pass
Esempio n. 12
0
def _event_display(
    context,
    run_id,
    events,
    to_pe,
    axes=None,
    records_matrix=True,
    s2_fuzz=50,
    s1_fuzz=0,
    max_peaks=500,
    xenon1t=False,
    display_peak_info=PEAK_DISPLAY_DEFAULT_INFO,
    display_event_info=EVENT_DISPLAY_DEFAULT_INFO,
    s1_hp_kwargs=None,
    s2_hp_kwargs=None,
    event_time_limit=None,
    plot_all_positions=True,
):
    """{event_docs}
    :param axes: if a dict of matplotlib axes (w/ same keys as below,
        and empty/None for panels not filled)
    {event_returns} 
    """
    if len(events) != 1:
        raise ValueError(f'Found {len(events)} only request one')
    event = events[0]

    if not context.is_stored(run_id, 'peaklets'):
        raise strax.DataNotAvailable(f'peaklets not available for {run_id}')

    if axes is None:
        raise ValueError(f'No axes provided')
    ax_s1 = axes.get("ax_s1", None)
    ax_s2 = axes.get("ax_s2", None)
    ax_s1_hp_t = axes.get("ax_s1_hp_t", None)
    ax_s1_hp_b = axes.get("ax_s1_hp_b", None)
    ax_s2_hp_t = axes.get("ax_s2_hp_t", None)
    ax_s2_hp_b = axes.get("ax_s2_hp_b", None)
    ax_event_info = axes.get("ax_event_info", None)
    ax_peak_info = axes.get("ax_peak_info", None)
    ax_ev = axes.get("ax_ev", None)
    ax_rec = axes.get("ax_rec", None)

    # titles
    for ax, title in zip([
            ax_s1, ax_s1_hp_t, ax_s1_hp_b, ax_s2, ax_s2_hp_t, ax_s2_hp_b,
            ax_event_info, ax_peak_info
    ], [
            "Main S1", "S1 top", "S1 bottom", "Main S2", "S2 top", "S2 bottom",
            "Event info", "Peak info"
    ]):
        if ax is not None:
            ax.set_title(title)

    # Parse the hit pattern options
    # Convert to dict (not at function definition because of mutable defaults)
    if s1_hp_kwargs is None:
        s1_hp_kwargs = {}
    if s2_hp_kwargs is None:
        s2_hp_kwargs = {}

    # Hit patterns options:
    for hp_opt, color_map in ((s1_hp_kwargs, "Blues"), (s2_hp_kwargs,
                                                        "Greens")):
        _common_opt = dict(xenon1t=xenon1t,
                           pmt_label_color='lightgrey',
                           log_scale=True,
                           vmin=0.1,
                           s=(250 if records_matrix else 220),
                           pmt_label_size=7,
                           edgecolor='grey',
                           dead_pmts=np.argwhere(to_pe == 0),
                           cmap=color_map)
        # update s1 & S2 hit pattern kwargs with _common_opt if not
        # specified by the user
        for k, v in _common_opt.items():
            if k not in hp_opt:
                hp_opt[k] = v

    # S1
    if events['s1_area'] != 0:
        if ax_s1 is not None:
            plt.sca(ax_s1)
            context.plot_peaks(run_id,
                               time_range=(events['s1_time'] - s1_fuzz,
                                           events['s1_endtime'] + s1_fuzz),
                               single_figure=False)

        # Hit pattern plots
        area = context.get_array(
            run_id,
            'peaklets',
            time_range=(events['s1_time'], events['s1_endtime']),
            keep_columns=('area_per_channel', 'time', 'dt', 'length'),
            progress_bar=False,
        )
        for ax, array in ((ax_s1_hp_t, 'top'), (ax_s1_hp_b, 'bottom')):
            if ax is not None:
                plt.sca(ax)
                straxen.plot_on_single_pmt_array(c=np.sum(
                    area['area_per_channel'], axis=0),
                                                 array_name=array,
                                                 **s1_hp_kwargs)
                # Mark reconstructed position
                plt.scatter(event['x'], event['y'], marker='X', s=100, c='k')

    # S2
    if event['s2_area'] != 0:
        if ax_s2 is not None:
            plt.sca(ax_s2)
            context.plot_peaks(run_id,
                               time_range=(events['s2_time'] - s2_fuzz,
                                           events['s2_endtime'] + s2_fuzz),
                               single_figure=False)

        # Hit pattern plots
        area = context.get_array(
            run_id,
            'peaklets',
            time_range=(events['s2_time'], events['s2_endtime']),
            keep_columns=('area_per_channel', 'time', 'dt', 'length'),
            progress_bar=False,
        )
        for axi, (ax, array) in enumerate([(ax_s2_hp_t, 'top'),
                                           (ax_s2_hp_b, 'bottom')]):
            if ax is not None:
                plt.sca(ax)
                straxen.plot_on_single_pmt_array(c=np.sum(
                    area['area_per_channel'], axis=0),
                                                 array_name=array,
                                                 **s2_hp_kwargs)
                # Mark reconstructed position (corrected)
                plt.scatter(event['x'], event['y'], marker='X', s=100, c='k')
                if not xenon1t and axi == 0 and plot_all_positions:
                    _scatter_rec(event)

    # Fill panels with peak/event info
    for it, (ax, labels_and_unit) in enumerate([
        (ax_event_info, display_event_info), (ax_peak_info, display_peak_info)
    ]):
        if ax is not None:
            for i, (_lab, _unit) in enumerate(labels_and_unit):
                coord = 0.01, 0.9 - 0.9 * i / len(labels_and_unit)
                ax.text(*coord, _lab[:24], va='top', zorder=-10)
                ax.text(coord[0] + 0.5,
                        coord[1],
                        _unit.format(v=event[_lab]),
                        va='top',
                        zorder=-10)
                # Remove axes and labels from panel
                ax.set_xticks([])
                ax.set_yticks([])
                _ = [s.set_visible(False) for s in ax.spines.values()]

    # Plot peaks in event
    ev_range = None
    if ax_ev is not None:
        plt.sca(ax_ev)
        if event_time_limit is None:
            time_range = (events['time'], events['endtime'])
        else:
            time_range = event_time_limit

        context.plot_peaks(run_id,
                           time_range=time_range,
                           show_largest=max_peaks,
                           single_figure=False)
        ev_range = plt.xlim()

    if records_matrix and ax_rec is not None:
        plt.sca(ax_rec)
        context.plot_records_matrix(run_id,
                                    raw=records_matrix == 'raw',
                                    time_range=(events['time'],
                                                events['endtime']),
                                    single_figure=False)
        ax_rec.tick_params(axis='x', rotation=0)
        if not xenon1t:
            # Top vs bottom division
            ax_rec.axhline(straxen.n_top_pmts, c='k')
        if ev_range is not None:
            plt.xlim(*ev_range)

    # Final tweaks
    if ax_s2 is not None:
        ax_s1.tick_params(axis='x', rotation=45)
    if ax_s2 is not None:
        ax_s1.tick_params(axis='x', rotation=45)
    if ax_ev is not None:
        ax_ev.tick_params(axis='x', rotation=0)
    title = (f'Run {run_id}. Time '
             f'{str(events["time"])[:-9]}.{str(events["time"])[-9:]}\n'
             f'{datetime.fromtimestamp(event["time"] / 1e9, tz=pytz.utc)}')
    plt.suptitle(title, y=0.95)
    # NB: reflects panels order
    return (ax_s1, ax_s2, ax_s1_hp_t, ax_s1_hp_b, ax_event_info, ax_peak_info,
            ax_s2_hp_t, ax_s2_hp_b, ax_ev, ax_rec)
Esempio n. 13
0
        def check_cache(d):
            nonlocal plugins, loaders, savers, seen
            if d in seen:
                return
            seen.add(d)
            p = plugins[d]
            key = strax.DataKey(run_id, d, p.lineage)

            for sb_i, sf in enumerate(self.storage):
                try:
                    loaders[d] = sf.loader(key,
                                           n_range=n_range,
                                           **self._fuzzy_options)
                    # Found it! No need to make it
                    del plugins[d]
                    break
                except strax.DataNotAvailable:
                    continue
            else:
                if time_range is not None:
                    # While the data type providing the time information is
                    # available (else we'd have failed earlier), one of the
                    # other requested data types is not.
                    raise strax.DataNotAvailable(
                        f"Time range selection assumes data is already "
                        f"available, but {d} for {run_id} is not.")
                # Not in any cache. We will be computing it.
                to_compute[d] = p
                for dep_d in p.depends_on:
                    check_cache(dep_d)

            # Should we save this data?
            if time_range is not None:
                # No, since we're not even getting the whole data
                return
            elif p.save_when == strax.SaveWhen.NEVER:
                if d in save:
                    raise ValueError("Plugin forbids saving of {d}")
                return
            elif p.save_when == strax.SaveWhen.TARGET:
                if d not in targets:
                    return
            elif p.save_when == strax.SaveWhen.EXPLICIT:
                if d not in save:
                    return
            else:
                assert p.save_when == strax.SaveWhen.ALWAYS

            for sf in self.storage:
                if sf.readonly:
                    continue
                if d not in to_compute:
                    if not self.context_config['storage_converter']:
                        continue
                    try:
                        sf.find(key, **self._fuzzy_options)
                        # Already have this data in this backend
                        continue
                    except strax.DataNotAvailable:
                        pass
                try:
                    savers[d].append(
                        sf.saver(key,
                                 metadata=p.metadata(run_id),
                                 meta_only=p.save_meta_only))
                except strax.DataNotAvailable:
                    # This frontend cannot save. Too bad.
                    pass
Esempio n. 14
0
    def loader(self,
               backend_key,
               time_range=None,
               chunk_number=None,
               executor=None):
        """Iterates over strax data in backend_key
        :param time_range: 2-length arraylike of (start, exclusive end)
        of desired data. Will return all data that partially overlaps with
        the range.
        Default is None, which means get the entire
        :param chunk_number: Chunk number to get exclusively
        :param executor: Executor to push load/decompress operations to
        """
        metadata = self.get_metadata(backend_key)

        if 'strax_version' in metadata:
            v_old = metadata['strax_version']
            if version.parse(v_old) < version.parse('0.9.0'):
                raise strax.DataNotAvailable(
                    f"Cannot load data at {backend_key}: "
                    f"it was created with strax {v_old}, "
                    f"but you have strax {strax.__version__}. ")
        else:
            warnings.warn(f"Data at {backend_key} does not say what strax "
                          "version it was generated with. This means it is "
                          "corrupted, or very, very old. Probably "
                          "we cannot load this.")

        # 'start' and 'end' are not required, to allow allow_incomplete
        required_fields = (
            'run_id data_type data_kind dtype compressor').split()
        missing_fields = [x for x in required_fields if x not in metadata]
        if len(missing_fields):
            raise strax.DataNotAvailable(
                f"Cannot load data at {backend_key}: metadata is "
                f"missing the required fields {missing_fields}. ")

        if not len(metadata['chunks']):
            raise ValueError(
                f"Cannot load data at {backend_key}, it has no chunks!")

        dtype = literal_eval(metadata['dtype'])

        # Common arguments for chunk construction, not stored with chunk-level
        # metadata
        chunk_kwargs = dict(
            data_type=metadata['data_type'],
            data_kind=metadata['data_kind'],
            dtype=dtype)

        required_chunk_metadata_fields = 'start end run_id'.split()

        for i, chunk_info in enumerate(strax.iter_chunk_meta(metadata)):

            missing_fields = [x for x in required_chunk_metadata_fields
                              if x not in chunk_info]
            if len(missing_fields):
                raise ValueError(
                    f"Error reading chunk {i} of {metadata['dtype']} " 
                    f"of {metadata['run_d']} from {backend_key}: "
                    f"chunk metadata is missing fields {missing_fields}")

            # Chunk number constraint
            if chunk_number is not None:
                if i != chunk_number:
                    continue

            # Time constraint
            if time_range:
                if (chunk_info['end'] <= time_range[0]
                        or time_range[1] <= chunk_info['start']):
                    # Chunk does not cover any part of range
                    continue

            read_chunk_kwargs = dict(
                backend_key=backend_key,
                dtype=dtype,
                metadata=metadata,
                chunk_info=chunk_info,
                time_range=time_range,
                chunk_construction_kwargs=chunk_kwargs)
            if executor is None:
                yield self._read_and_format_chunk(**read_chunk_kwargs)
            else:
                yield executor.submit(self._read_and_format_chunk,
                                      **read_chunk_kwargs)
Esempio n. 15
0
        def check_cache(d):
            nonlocal plugins, loaders, savers, seen
            if d in seen:
                return
            seen.add(d)
            p = plugins[d]

            # Can we load this data?
            loading_this_data = False
            key = strax.DataKey(run_id, d, p.lineage)

            ldr = self._get_partial_loader_for(key,
                                               chunk_number=chunk_number,
                                               time_range=time_range)

            if not ldr and run_id.startswith('_'):
                if time_range is not None:
                    raise NotImplementedError("time range loading not yet "
                                              "supported for superruns")

                sub_run_spec = self.run_metadata(
                    run_id, 'sub_run_spec')['sub_run_spec']
                self.make(list(sub_run_spec.keys()), d)

                ldrs = []
                for subrun in sub_run_spec:
                    sub_key = strax.DataKey(
                        subrun, d,
                        self._get_plugins((d, ), subrun)[d].lineage)
                    if sub_run_spec[subrun] == 'all':
                        _subrun_time_range = None
                    else:
                        _subrun_time_range = sub_run_spec[subrun]
                    ldr = self._get_partial_loader_for(
                        sub_key,
                        time_range=_subrun_time_range,
                        chunk_number=chunk_number)
                    if not ldr:
                        raise RuntimeError(
                            f"Could not load {d} for subrun {subrun} "
                            f"even though we made it??")
                    ldrs.append(ldr)

                def concat_loader(*args, **kwargs):
                    for x in ldrs:
                        yield from x(*args, **kwargs)

                ldr = lambda *args, **kwargs: concat_loader(*args, **kwargs)

            if ldr:
                # Found it! No need to make it or look in other frontends
                loading_this_data = True
                loaders[d] = ldr
                del plugins[d]
            else:
                # Data not found anywhere. We will be computing it.
                if (time_range is not None
                        and plugins[d].save_when != strax.SaveWhen.NEVER):
                    # While the data type providing the time information is
                    # available (else we'd have failed earlier), one of the
                    # other requested data types is not.
                    raise strax.DataNotAvailable(
                        f"Time range selection assumes data is already "
                        f"available, but {d} for {run_id} is not.")
                if '*' in self.context_config['forbid_creation_of']:
                    raise strax.DataNotAvailable(
                        f"{d} for {run_id} not found in any storage, and "
                        "your context specifies no new data can be created.")
                if d in self.context_config['forbid_creation_of']:
                    raise strax.DataNotAvailable(
                        f"{d} for {run_id} not found in any storage, and "
                        "your context specifies it cannot be created.")
                to_compute[d] = p
                for dep_d in p.depends_on:
                    check_cache(dep_d)

            # Should we save this data? If not, return.
            if (loading_this_data
                    and not self.context_config['storage_converter']):
                return
            if p.save_when == strax.SaveWhen.NEVER:
                if d in save:
                    raise ValueError("Plugin forbids saving of {d}")
                return
            elif p.save_when == strax.SaveWhen.TARGET:
                if d not in targets:
                    return
            elif p.save_when == strax.SaveWhen.EXPLICIT:
                if d not in save:
                    return
            else:
                assert p.save_when == strax.SaveWhen.ALWAYS

            # Warn about conditions that preclude saving, but the user
            # might not expect.
            if time_range is not None:
                # We're not even getting the whole data.
                # Without this check, saving could be attempted if the
                # storage converter mode is enabled.
                self.log.warning(f"Not saving {d} while "
                                 f"selecting a time range in the run")
                return
            if any([
                    len(v) > 0 for k, v in self._find_options.items()
                    if 'fuzzy' in k
            ]):
                # In fuzzy matching mode, we cannot (yet) derive the
                # lineage of any data we are creating. To avoid creating
                # false data entries, we currently do not save at all.
                self.log.warning(f"Not saving {d} while fuzzy matching is"
                                 f" turned on.")
                return
            if self.context_config['allow_incomplete']:
                self.log.warning(f"Not saving {d} while loading incomplete"
                                 f" data is allowed.")
                return

            # Save the target and any other outputs of the plugin.
            for d_to_save in set([d] + list(p.provides)):
                if d_to_save in savers and len(savers[d_to_save]):
                    # This multi-output plugin was scanned before
                    # let's not create doubled savers
                    assert p.multi_output
                    continue

                key = strax.DataKey(run_id, d_to_save, p.lineage)

                for sf in self.storage:
                    if sf.readonly:
                        continue
                    if loading_this_data:
                        # Usually, we don't save if we're loading
                        if not self.context_config['storage_converter']:
                            continue
                        # ... but in storage converter mode we do:
                        try:
                            sf.find(key, **self._find_options)
                            # Already have this data in this backend
                            continue
                        except strax.DataNotAvailable:
                            # Don't have it, so let's save it!
                            pass
                    # If we get here, we must try to save
                    try:
                        savers[d_to_save].append(
                            sf.saver(key,
                                     metadata=p.metadata(run_id, d_to_save)))
                    except strax.DataNotAvailable:
                        # This frontend cannot save. Too bad.
                        pass