Beispiel #1
0
    def __init__(self,
                 readonly=False,
                 provide_run_metadata=None,
                 overwrite='if_broken',
                 take_only=tuple(),
                 exclude=tuple()):
        """
        :param readonly: If True, throws CannotWriteData whenever saving is
        attempted.
        :param overwrite: When to overwrite data that already exists.
         - 'never': Never overwrite any data.
         - 'if_broken': Only overwrites data if it is incomplete or broken.
         - 'always': Always overwrite data. Use with caution!
        :param take_only: Provide/accept only these data types.
        :param exclude: Do NOT provide/accept these data types.
        :param provide_run_metadata: Whether to provide run-level metadata
        (run docs). If None, use class-specific default

        If take_only and exclude are both omitted, provide all data types.
        If a data type is listed in both, it will not be provided.
        Attempting to read/write unwanted data types throws DataTypeNotWanted.
        """
        if overwrite not in 'never if_broken always'.split():
            raise RuntimeError(f"Invalid 'overwrite' setting {overwrite}. ")

        self.take_only = strax.to_str_tuple(take_only)
        self.exclude = strax.to_str_tuple(exclude)
        self.overwrite = overwrite
        if provide_run_metadata is not None:
            self.provide_run_metadata = provide_run_metadata
        self.readonly = readonly
        self.log = logging.getLogger(self.__class__.__name__)
Beispiel #2
0
 def test_first_run_plugins(self):
     self.script.purge_below('match_acceptance_extended')
     for t in strax.to_str_tuple(self.script.target):
         for r in strax.to_str_tuple(self.script.run_id):
             if (self.script.st.get_save_when(t) > strax.SaveWhen.NEVER):
                 self.script.st.make(r, t)
                 assert self.script.st.is_stored(r, t)
Beispiel #3
0
def available_for_run(self: strax.Context,
                      run_id: str,
                      include_targets: ty.Union[None, list, tuple, str] = None,
                      exclude_targets: ty.Union[None, list, tuple, str] = None,
                      pattern_type: str = 'fnmatch') -> pd.DataFrame:
    """
    For a given single run, check all the targets if they are stored.
        Excludes the target if never stored anyway.
    :param run_id: requested run
    :param include_targets: targets to include e.g. raw_records,
        raw_records* or *_nv. If multiple targets (e.g. a list) is
        provided, the target should match any of the arguments!
    :param exclude_targets: targets to exclude e.g. raw_records,
        raw_records* or *_nv. If multiple targets (e.g. a list) is
        provided, the target should match none of the arguments!
    :param pattern_type: either 'fnmatch' (Unix filename pattern
        matching) or 're' (Regular expression operations).
    :return: Table of available data per target
    """
    if not isinstance(run_id, str):
        raise ValueError(f'Only single run_id is allowed (str),'
                         f' got {run_id} ({type(run_id)})')

    if exclude_targets is None:
        exclude_targets = []
    if include_targets is None:
        include_targets = []

    is_stored = defaultdict(list)
    for target in self._plugin_class_registry.keys():
        # Skip targets that are not stored
        save_when = self.get_save_when(target)
        if save_when == strax.SaveWhen.NEVER:
            continue

        # Should we include this target or exclude it?
        include_t = []
        exclude_t = False

        for excl in strax.to_str_tuple(exclude_targets):
            # Simple logic, if we match the excluded target, we should
            # should not continue
            if _tag_match(target, excl, pattern_type, False):
                exclude_t = True
                break

        # We can match any of the "incl" targets, keep a list and check
        # of any of the "incl" matches the target.
        for incl in strax.to_str_tuple(include_targets):
            include_t.append(_tag_match(target, incl, pattern_type, False))

        # Convert to simple bool. If no include_targets is specified,
        # all are fine, otherwise check at least one is matching.
        include_t = True if not len(include_t) else any(include_t)

        if include_t and not exclude_t:
            is_stored['target'].append(target)
            is_stored['is_stored'].append(self.is_stored(run_id, target))
    return pd.DataFrame(is_stored)
Beispiel #4
0
 def __init__(self,
              take_only=tuple(),
              exclude=tuple(),
              recover=False,
              readonly=False):
     self._take_only = strax.to_str_tuple(take_only)
     self._exclude = strax.to_str_tuple(exclude)
     self.recover = recover
     self.readonly = readonly
     self.log = logging.getLogger(self.__class__.__name__)
Beispiel #5
0
def check_loading_allowed(data,
                          run_id,
                          target,
                          max_in_disallowed=1,
                          disallowed=('event_positions', 'corrected_areas',
                                      'energy_estimates')):
    """
    Check that the loading of the specified targets is not
    disallowed

    :param data: chunk of data
    :param run_id: run_id of the run
    :param target: list of targets requested by the user
    :param max_in_disallowed: the max number of targets that are
        in the disallowed list
    :param disallowed: list of targets that are not allowed to be
        loaded simultaneously by the user
    :return: data
    :raise: RuntimeError if more than max_in_disallowed targets
        are requested
    """
    n_targets_in_disallowed = sum(
        [t in disallowed for t in strax.to_str_tuple(target)])
    if n_targets_in_disallowed > max_in_disallowed:
        raise RuntimeError(
            f'Don\'t load {disallowed} separately, use "event_info" instead')
    return data
Beispiel #6
0
def pre_apply_function(data,
                       run_id,
                       target,
                       function_name='pre_apply_function'):
    """
    Prior to returning the data (from one chunk) see if any function(s) need to
    be applied.

    :param data: one chunk of data for the requested target(s)
    :param run_id: Single run-id of of the chunk of data
    :param target: one or more targets
    :param function_name: the name of the function to be applied. The
        function_name.py should be stored in the database.
    :return: Data where the function is applied.
    """
    if function_name not in _resource_cache:
        # only load the function once and put it in the resource cache
        function_file = f'{function_name}.py'
        function_file = _load_function_file_from_home(function_file)
        function = get_resource(function_file, fmt='txt')
        # pylint: disable=exec-used
        exec(function)
        # Cache the function to reduce reloading & eval operations
        _resource_cache[function_name] = locals().get(function_name)
    data = _resource_cache[function_name](data, run_id,
                                          strax.to_str_tuple(target))
    return data
Beispiel #7
0
    def __init__(self):
        if not hasattr(self, 'depends_on'):
            raise ValueError('depends_on not provided for '
                             f'{self.__class__.__name__}')

        self.depends_on = strax.to_str_tuple(self.depends_on)

        # Store compute parameter names, see if we take chunk_i too
        compute_pars = list(inspect.signature(self.compute).parameters.keys())
        if 'chunk_i' in compute_pars:
            self.compute_takes_chunk_i = True
            del compute_pars[compute_pars.index('chunk_i')]
        if 'start' in compute_pars:
            if 'end' not in compute_pars:
                raise ValueError(f"Compute of {self} takes start, "
                                 f"so it should also take end.")
            self.compute_takes_start_end = True
            del compute_pars[compute_pars.index('start')]
            del compute_pars[compute_pars.index('end')]

        if not isinstance(self.save_when, (IntEnum, immutabledict, int)):
            raise ValueError(
                'save_when must be either a SaveWhen object or an immutabledict '
                'representing the different data_types provided.')

        if hasattr(self, 'provides') and not isinstance(
                self.save_when, immutabledict):
            # The ParallelSource plugin does not provide anything as it
            # inlines only already existing components, therefore we also do
            # not have to updated save_when
            self.save_when = immutabledict.fromkeys(self.provides,
                                                    self.save_when)

        self.compute_pars = compute_pars
        self.input_buffer = dict()
Beispiel #8
0
def dataframe_to_wiki(df,
                      float_digits=5,
                      title='Awesome table',
                      force_int=tuple()):
    """Convert a pandas dataframe to a dokuwiki table 
    (which you can copy-paste onto the XENON wiki)
    :param df: dataframe to convert
    :param float_digits: Round float-ing point values to this number of digits.
    :param title: title of the table.
    """
    table = '^ %s ' % title + '^' * (len(df.columns) - 1) + '^\n'
    table += '^ ' + ' ^ '.join(df.columns) + ' ^\n'

    def do_round(x):
        if isinstance(x, float):
            return round(x, float_digits)
        return x

    force_int = np.where(
        np.in1d(df.columns.values, strax.to_str_tuple(force_int)))[0]

    for _, row in df.iterrows():
        table += "| " + ' | '.join([
            str(int(x) if i in force_int else do_round(x))
            for i, x in enumerate(row.values.tolist())
        ]) + ' |\n'
    return table
Beispiel #9
0
def rekey_dict(d, replace_keys='', with_keys=''):
    '''
    :param d: dictionary that will have its keys renamed
    :param replace_keys: comma-separated string of keys that will be replaced
    :param with_keys:  comma-separated string of keys that will replace the replace_keys
    :return: dictionary with renamed keys
    '''
    new_dict = d.copy()
    replace_keys = strax.to_str_tuple(replace_keys.split(','))
    with_keys = strax.to_str_tuple(with_keys.split(','))
    if len(replace_keys) != len(with_keys):
        raise RuntimeError(
            "replace_keys and with_keys must have the same length")
    for old_key, new_key in zip(replace_keys, with_keys):
        new_dict[new_key] = new_dict.pop(old_key)
    return new_dict
Beispiel #10
0
def print_versions(modules=('strax', 'straxen'), return_string=False):
    """
    Print versions of modules installed.

    :param modules: Modules to print, should be str, tuple or list. E.g.
        print_versions(modules=('strax', 'straxen', 'wfsim',
        'cutax', 'pema'))
    :param return_string: optional. Instead of printing the message,
        return a string
    :return: optional, the message that would have been printed
    """
    message = (f'Working on {socket.getfqdn()} with the following '
               f'versions and installation paths:')
    py_version = sys.version.replace(' (', '\t(').replace('\n', '')
    message += f"\npython\tv{py_version}"
    for m in strax.to_str_tuple(modules):
        try:
            # pylint: disable=exec-used
            exec(f'import {m}')
            # pylint: disable=eval-used
            message += f'\n{m}\tv{eval(m).__version__}\t{eval(m).__path__[0]}'
        except (ModuleNotFoundError, ImportError):
            print(f'{m} is not installed')
    if return_string:
        return message
    print(message)
Beispiel #11
0
 def get_array(self,
               run_id: ty.Union[str, tuple, list],
               targets,
               save=tuple(),
               max_workers=None,
               **kwargs) -> np.ndarray:
     """Compute target for run_id and return as numpy array
     {get_docs}
     """
     run_ids = strax.to_str_tuple(run_id)
     if len(run_ids) > 1:
         results = strax.multi_run(self.get_array,
                                   run_ids,
                                   targets=targets,
                                   save=save,
                                   max_workers=max_workers,
                                   **kwargs)
     else:
         source = self.get_iter(run_ids[0],
                                targets,
                                save=save,
                                max_workers=max_workers,
                                **kwargs)
         results = [x.data for x in source]
     return np.concatenate(results)
Beispiel #12
0
    def make(self,
             run_id: ty.Union[str, tuple, list],
             targets,
             save=tuple(),
             max_workers=None,
             _skip_if_built=True,
             **kwargs) -> None:
        """Compute target for run_id. Returns nothing (None).
        {get_docs}
        """
        # Multi-run support
        run_ids = strax.to_str_tuple(run_id)
        if len(run_ids) == 0:
            raise ValueError("Cannot build empty list of runs")
        if len(run_ids) > 1:
            return strax.multi_run(self.get_array,
                                   run_ids,
                                   targets=targets,
                                   throw_away_result=True,
                                   save=save,
                                   max_workers=max_workers,
                                   **kwargs)

        if _skip_if_built and self.is_stored(run_id, targets):
            return

        for _ in self.get_iter(run_ids[0],
                               targets,
                               save=save,
                               max_workers=max_workers,
                               **kwargs):
            pass
Beispiel #13
0
def remap_old(data, targets, run_id, works_on_target=''):
    """
    If the data is of before the time sectors were re-cabled, apply a software remap
        otherwise just return the data is it is.
    :param data: numpy array of data with at least the field time. It is assumed the data
        is sorted by time
    :param targets: targets in the st.get_array to get
    :param run_id: required positional argument of apply_function_to_data in strax
    :param works_on_target: regex match string to match any of the targets. By default set
        to '' such that any target in the targets would be remapped (which is what we want
        as channels are present in most data types). If one only wants records (no
        raw-records) and peaks* use e.g. works_on_target = 'records|peaks'.
    """

    if np.any(data['time'][:2] >= TSTART_FIRST_CORRECTLY_CABLED_RUN):
        # We leave the 'new' data be
        pass
    elif not np.any(
        [match(works_on_target, t) for t in strax.to_str_tuple(targets)]):
        # None of the targets are such that we want to remap
        pass
    elif len(data):
        # select the old data and do the remapping for this
        mask = data['time'] < TSTART_FIRST_CORRECTLY_CABLED_RUN
        data = data.copy()
        data[mask] = remap_channels(data[mask])
    return data
Beispiel #14
0
def define_run(self,
               name: str,
               data: ty.Union[np.ndarray, pd.DataFrame, dict],
               from_run: ty.Union[str, None] = None):
    if isinstance(data, (pd.DataFrame, np.ndarray)):
        # Array of events / regions of interest
        start, end = data['time'], strax.endtime(data)
        if from_run is not None:
            return self.define_run(name,
                                   {from_run: np.transpose([start, end])})
        else:
            df = pd.DataFrame(
                dict(starts=start, ends=end, run_id=data['run_id']))
            self.define_run(
                name, {
                    run_id: rs[['start', 'stop']].values.transpose()
                    for run_id, rs in df.groupby('fromrun')
                })

    if isinstance(data, (list, tuple)):
        # list of runids
        data = strax.to_str_tuple(data)
        self.define_run(name, {run_id: 'all' for run_id in data})

    if not isinstance(data, dict):
        raise ValueError("Can't define run from {type(data)}")

    # Dict mapping run_id: array of time ranges or all
    for sf in self.storage:
        if not sf.readonly and sf.can_define_runs:
            sf.define_run(name, data)
            break
    else:
        raise RuntimeError("No storage frontend registered that allows"
                           " run definition")
Beispiel #15
0
def remap_old(data, targets, works_on_target=''):
    """
    If the data is of before the time sectors were re-cabled, apply a software remap
        otherwise just return the data is it is.
    :param data: numpy array of data with at least the field time. It is assumed the data
        is sorted by time
    :param targets: targets in the st.get_array to get
    :param works_on_target: regex match string to match any of the targets. By default set
        to '' such that any target in the targets would be remapped (which is what we want
        as channels are present in most data types). If one only wants records (no
        raw-records) and peaks* use e.g. works_on_target = 'records|peaks'.
    """

    if np.any(data['time'][:2] >= TSTART_FIRST_CORRECTLY_CABLED_RUN):
        # We leave the 'new' data be
        pass
    elif not np.any([match(works_on_target, t) for t in strax.to_str_tuple(targets)]):
        # None of the targets are such that we want to remap
        pass
    elif len(data):
        # select the old data and do the remapping for this
        warn("Correcting data of runs with mis-cabled PMTs. \nSee: https://"
             "xe1t-wiki.lngs.infn.it/doku.php?id=xenon:xenonnt:dsg:daq:sector_swap. "
             "Don't use '' selection_str='channel == xx' '' (github.com/XENONnT/straxen/issues/239)")
        mask = data['time'] < TSTART_FIRST_CORRECTLY_CABLED_RUN
        data[mask] = remap_channels(data[mask])
    return data
Beispiel #16
0
    def make(self,
             run_id: ty.Union[str, tuple, list],
             targets,
             save=tuple(),
             max_workers=None,
             **kwargs) -> None:
        """Compute target for run_id. Returns nothing (None).
        {get_docs}
        """
        # Multi-run support
        run_ids = strax.to_str_tuple(run_id)
        if len(run_ids) > 1:
            return multi_run(self,
                             run_ids,
                             targets=targets,
                             save=save,
                             max_workers=max_workers,
                             **kwargs)

        for _ in self.get_iter(run_ids[0],
                               targets,
                               save=save,
                               max_workers=max_workers,
                               **kwargs):
            pass
Beispiel #17
0
 def get_array(self,
               run_id: ty.Union[str, tuple, list],
               targets,
               save=tuple(),
               max_workers=None,
               **kwargs) -> np.ndarray:
     """Compute target for run_id and return as numpy array
     {get_docs}
     """
     run_ids = strax.to_str_tuple(run_id)
     if len(run_ids) > 1:
         results = multi_run(self.get_array,
                             run_ids,
                             targets=targets,
                             save=save,
                             max_workers=max_workers,
                             **kwargs)
     else:
         results = list(
             self.get_iter(run_ids[0],
                           targets,
                           save=save,
                           max_workers=max_workers,
                           **kwargs))
     if len(results):
         return np.concatenate(results)
     raise ValueError("No results returned?")
Beispiel #18
0
    def estimate_run_start(self, run_id, targets=None):
        """Return run start time in ns since epoch.

        This fetches from run metadata, and if this fails, it
        estimates it using data metadata from targets.
        """
        try:
            # Use run metadata, if it is available, to get
            # the run start time (floored to seconds)
            t0 = self.run_metadata(run_id, 'start')['start']
            t0 = t0.replace(tzinfo=datetime.timezone.utc)
            return int(t0.timestamp()) * int(1e9)
        except (strax.RunMetadataNotAvailable, KeyError):
            pass
        # Get an approx start from the data itself,
        # then floor it to seconds for consistency
        if targets:
            for t in strax.to_str_tuple(targets):
                try:
                    t0 = self.get_meta(run_id, t)['chunks'][0]['start']
                    return (int(t0) // int(1e9)) * int(1e9)
                except strax.DataNotAvailable:
                    pass
        warnings.warn(
            "Could not estimate run start time from "
            "run metadata: assuming it is 0", UserWarning)
        return 0
Beispiel #19
0
def get_itp_dict(loaded_json,
                 run_id=None,
                 time_key='time',
                 itp_keys='correction',
                 **kwargs
                 ) -> typing.Union[np.ndarray, typing.Dict[str, np.ndarray]]:
    """
    Interpolate a dictionary at the start time that is queried from
    a run-id.

    :param loaded_json: a dictionary with a time-series
    :param run_id: run_id
    :param time_key: key that gives the timestamps
    :param itp_keys: which keys from the dict to read. Should be
        comma (',') separated!

    :return: Interpolated values of dict at the start time, either
        returned as an np.ndarray (single value) or as a dict
        (multiple itp_dict_keys)
    """
    keys = strax.to_str_tuple(itp_keys.split(','))
    for key in list(keys) + [time_key]:
        if key not in loaded_json:
            raise KeyError(
                f"The json does contain the key '{key}'. Try one of: {loaded_json.keys()}"
            )

    times = loaded_json[time_key]

    # get start time of this run. Need to make tz-aware
    start = xent_collection().find_one({'number': int(run_id)},
                                       {'start': 1})['start']
    start = pytz.utc.localize(start).timestamp() * 1e9

    try:
        if len(strax.to_str_tuple(keys)) > 1:
            return {
                key: interp1d(times, loaded_json[key],
                              bounds_error=True)(start)
                for key in keys
            }

        else:
            interp = interp1d(times, loaded_json[keys[0]], bounds_error=True)
            return interp(start)
    except ValueError as e:
        raise ValueError(f"Correction is not defined for run {run_id}") from e
Beispiel #20
0
    def __init__(self,
                 st: strax.context,
                 run_id: ty.Union[str, tuple, list],
                 target: ty.Union[str, tuple],
                 config: ty.Union[dict, immutabledict, None] = None,
                 ):
        self.st = st.new_context()
        self.run_id = strax.to_str_tuple(run_id)
        self.target = strax.to_str_tuple(target)
        if config is None:
            config = {}
        self.config = config
        self.st.set_config(config)

        self.base_dir = self.extract_base_dir(st)
        for subdir in self.base_dir_requires:
            os.makedirs(os.path.join(self.base_dir, subdir), exist_ok=True)
Beispiel #21
0
def define_run(self: strax.Context,
               name: str,
               data: ty.Union[np.ndarray, pd.DataFrame, dict],
               from_run: ty.Union[str, None] = None):
    if isinstance(data, (pd.DataFrame, np.ndarray)):
        # Array of events / regions of interest
        start, end = data['time'], strax.endtime(data)
        if from_run is not None:
            return self.define_run(
                name,
                {from_run: np.transpose([start, end])})
        elif not 'run_id' in data:
            raise ValueError(
                "Must provide from_run or data with a run_id column "
                "to define a superrun")
        else:
            df = pd.DataFrame(dict(starts=start, ends=end,
                                   run_id=data['run_id']))
            return self.define_run(
                name,
                {run_id: rs[['start', 'stop']].values.transpose()
                 for run_id, rs in df.groupby('fromrun')})

    if isinstance(data, (list, tuple)):
        # list of runids
        data = strax.to_str_tuple(data)
        return self.define_run(
            name,
            {run_id: 'all' for run_id in data})

    if not isinstance(data, dict):
        raise ValueError(f"Can't define run from {type(data)}")

    # Find start and end time of the new run = earliest start time of other runs
    run_md = dict(start=float('inf'), end=0, livetime=0)
    for _subrunid in data:
        doc = self.run_metadata(_subrunid, ['start', 'end'])
        run_md['start'] = min(run_md['start'], doc['start'])
        run_md['end'] = max(run_md['end'], doc['end'])
        run_md['livetime'] += doc['end'] - doc['start']

    # Superrun names must start with an underscore
    if not name.startswith('_'):
        name = '_' + name

    # Dict mapping run_id: array of time ranges or all
    for sf in self.storage:
        if not sf.readonly and sf.can_define_runs:
            sf.define_run(name, sub_run_spec=data, **run_md)
            break
    else:
        raise RuntimeError("No storage frontend registered that allows"
                           " run definition")
Beispiel #22
0
 def _scan_runs(self, store_fields):
     cursor = self.collection.find(filter={},
                                   projection=strax.to_str_tuple(
                                       list(store_fields) +
                                       ['reader.ini.name']))
     for doc in tqdm(cursor,
                     desc='Fetching run info from MongoDB',
                     total=cursor.count()):
         del doc['_id']
         if self.reader_ini_name_is_mode:
             doc['mode'] = \
                 doc.get('reader', {}).get('ini', {}).get('name', '')
         yield doc
Beispiel #23
0
    def __init__(self):
        if not hasattr(self, 'depends_on'):
            raise ValueError('depends_on not provided for '
                             f'{self.__class__.__name__}')

        self.depends_on = strax.to_str_tuple(self.depends_on)

        # Store compute parameter names, see if we take chunk_i too
        compute_pars = list(inspect.signature(self.compute).parameters.keys())
        if 'chunk_i' in compute_pars:
            self.compute_takes_chunk_i = True
            del compute_pars[compute_pars.index('chunk_i')]

        self.compute_pars = compute_pars
Beispiel #24
0
 def _scan_runs(self, store_fields):
     cursor = self.collection.find(filter={},
                                   projection=strax.to_str_tuple(
                                       list(store_fields) +
                                       ['reader.ini.name']))
     for doc in tqdm(cursor,
                     desc='Fetching run info from MongoDB',
                     total=cursor.count()):
         # Remove the Mongo document ID and add the run mode
         del doc['_id']
         doc.setdefault(
             'mode',
             doc.get('reader', {}).get('ini', {}).get('name', ''))
         yield doc
Beispiel #25
0
 def _scan_runs(self, store_fields):
     if self.minimum_run_number:
         query = {'number': {'$gt': self.minimum_run_number}}
     else:
         query = {}
     cursor = self.collection.find(filter=query,
                                   projection=strax.to_str_tuple(
                                       list(store_fields)))
     for doc in tqdm(cursor,
                     desc='Fetching run info from MongoDB',
                     total=cursor.count()):
         del doc['_id']
         if self.reader_ini_name_is_mode:
             doc['mode'] = \
                 doc.get('reader', {}).get('ini', {}).get('name', '')
         yield doc
Beispiel #26
0
    def __init__(self, store_files_at=None, *args, **kwargs):
        super().__init__(*args, **kwargs)

        # We are going to set a place where to store the files. It's
        # either specified by the user or we use these defaults:
        if store_files_at is None:
            store_files_at = (
                './resource_cache',
                '/tmp/straxen_resource_cache/'
                '/dali/lgrandi/strax/resource_cache',
            )
        elif not isinstance(store_files_at, (tuple, str, list)):
            raise ValueError(f'{store_files_at} should be tuple of paths!')
        elif isinstance(store_files_at, str):
            store_files_at = to_str_tuple(store_files_at)

        self.storage_options = store_files_at
Beispiel #27
0
 def _scan_runs(self, store_fields):
     query = self.number_query()
     projection = strax.to_str_tuple(list(store_fields))
     # Replace fields by their subfields if requested only take the most
     # "specific" projection
     projection = [
         f1 for f1 in projection
         if not any([f2.startswith(f1 + ".") for f2 in projection])
     ]
     cursor = self.collection.find(filter=query, projection=projection)
     for doc in tqdm(cursor,
                     desc='Fetching run info from MongoDB',
                     total=cursor.count()):
         del doc['_id']
         if self.reader_ini_name_is_mode:
             doc['mode'] = \
                 doc.get('reader', {}).get('ini', {}).get('name', '')
         yield doc
Beispiel #28
0
    def scan_runs(self, check_available=std_dtypes, extra_fields=tuple()):
        """Update and return self.runs with runs currently available
        in the runs db.
        :param check_available: Check whether these data types are available
        Availability of xxx is stored as a boolean in the xxx_available
        column.
        :param extra_fields: Additional fields from run doc to include
        as rows in the dataframe.
        """
        base_fields = [
            'name', 'number', 'reader.ini.name', 'tags.name', 'start', 'end',
            'trigger.events_built', 'tags.name'
        ]

        if self.rundb is None:
            raise RuntimeError("Cannot scan runs db if no "
                               "rundb frontend is registered in the context.")

        docs = []
        cursor = self.rundb.find(
            filter={},
            projection=(base_fields + list(strax.to_str_tuple(extra_fields))))
        for doc in tqdm(cursor, desc='Loading run info', total=cursor.count()):
            # TODO: Perhaps we should turn this query into an aggregation
            # to return also availability of key data types
            # (records, peaks, events?)

            # If there is no name, make one from the number
            doc.setdefault('name', str(doc['number']))

            # Process and flatten the doc
            # Convert tags to single string
            doc['tags'] = ','.join([t['name'] for t in doc.get('tags', [])])
            doc = straxen.flatten_dict(doc, separator='__')
            del doc['_id']  # Remove the Mongo document ID
            docs.append(doc)

        self.runs = pd.DataFrame(docs)

        for d in tqdm(check_available, desc='Checking data availability'):
            self.runs[d + '_available'] = np.in1d(self.runs.name.values,
                                                  self.list_available(d))

        return self.runs
Beispiel #29
0
 def test_later_compare(self):
     st = self.script.st
     st2 = st.new_context()
     for t in strax.to_str_tuple(self.script.target):
         print(run_id, t)
         st2.make(run_id, t)
     peaks_1 = st.get_array(run_id, 'match_acceptance_extended')
     peaks_2 = st2.get_array(run_id, 'match_acceptance_extended')
     if 'run_id' not in peaks_1.dtype.names:
         peaks_1 = pema.append_fields(peaks_1, 'run_id',
                                      [run_id] * len(peaks_1))
         peaks_2 = pema.append_fields(peaks_2, 'run_id',
                                      [run_id] * len(peaks_2))
     pema.compare_truth_and_outcome(st,
                                    peaks_1,
                                    max_peaks=2,
                                    show=False,
                                    fig_dir=self.tempdir,
                                    raw=True)
     plt.clf()
     pema.compare_outcomes(
         st,
         peaks_1,
         st2,
         peaks_2,
         max_peaks=2,
         show=False,
         different_by=None,
         fig_dir=self.tempdir,
         raw=False,
     )
     plt.clf()
     if len(peaks_1):
         pema.summary_plots.plot_peak_matching_histogram(
             peaks_1,
             'n_photon',
             bin_edges=[0, int(peaks_1['n_photon'].max())])
         plt.clf()
         pema.summary_plots.acceptance_plot(
             peaks_1,
             'n_photon',
             bin_edges=[0, int(peaks_1['n_photon'].max())])
         plt.clf()
Beispiel #30
0
def print_versions(
    modules=('strax', 'straxen', 'cutax'),
    print_output=not _is_jupyter,
    include_python=True,
    return_string=False,
    include_git=True,
):
    """
    Print versions of modules installed.

    :param modules: Modules to print, should be str, tuple or list. E.g.
        print_versions(modules=('numpy', 'dddm',))
    :param return_string: optional. Instead of printing the message,
        return a string
    :param include_git: Include the current branch and latest
        commit hash
    :return: optional, the message that would have been printed
    """
    versions = defaultdict(list)
    if include_python:
        versions['module'] = ['python']
        versions['version'] = [python_version()]
        versions['path'] = [sys.executable]
        versions['git'] = [None]
    for m in strax.to_str_tuple(modules):
        result = _version_info_for_module(m, include_git=include_git)
        if result is None:
            continue
        version, path, git_info = result
        versions['module'].append(m)
        versions['version'].append(version)
        versions['path'].append(path)
        versions['git'].append(git_info)
    df = pd.DataFrame(versions)
    info = f'Host {socket.getfqdn()}\n{df.to_string(index=False,)}'
    if print_output:
        print(info)
    if return_string:
        return info
    return df