Ejemplo n.º 1
0
def _load_file(f, compressor, dtype):
    try:
        data = f.read()
        if not len(data):
            return np.zeros(0, dtype=dtype)

        data = COMPRESSORS[compressor]['decompress'](data)
        return np.frombuffer(data, dtype=dtype)

    except Exception:
        raise strax.DataCorrupted(f"Fatal Error while reading file {f}: " +
                                  strax.utils.formatted_exception())
Ejemplo n.º 2
0
    def get_metadata(self, dirname: str, **kwargs):
        prefix = dirname_to_prefix(dirname)
        metadata_json = f'{prefix}-metadata.json'
        fn = rucio_path(self.root_dir, metadata_json, dirname)
        folder = osp.join('/', *fn.split('/')[:-1])
        if not osp.exists(folder):
            raise strax.DataNotAvailable(f"No folder for matadata at {fn}")
        if not osp.exists(fn):
            raise strax.DataCorrupted(f"Folder exists but no matadata at {fn}")

        with open(fn, mode='r') as f:
            return json.loads(f.read())
Ejemplo n.º 3
0
    def get_metadata(self, dirname):
        prefix = dirname_to_prefix(dirname)
        metadata_json = f'{prefix}-metadata.json'
        md_path = osp.join(dirname, metadata_json)

        if not osp.exists(md_path):
            # Try old-format metadata
            # (if it's not there, just let it raise FileNotFound
            # with the usual message in the next stage)
            old_md_path = osp.join(dirname, 'metadata.json')
            if not osp.exists(old_md_path):
                raise strax.DataCorrupted(f"Data in {dirname} has no metadata")
            md_path = old_md_path

        with open(md_path, mode='r') as f:
            return json.loads(f.read())
Ejemplo n.º 4
0
    def get_metadata(self, did: str, **kwargs):
        scope, name = did.split(':')
        number, dtype, hsh = parse_did(did)
        metadata_json = f'{dtype}-{hsh}-metadata.json'
        metadata_did = f'{scope}:{metadata_json}'

        metadata_path = rucio_path(self.rucio_dir, metadata_did)
        folder = os.path.join('/', *metadata_path.split('/')[:-1])
        if not os.path.exists(folder):
            raise strax.DataNotAvailable(
                f"No folder for metadata at {metadata_path}")
        if not os.path.exists(metadata_path):
            raise strax.DataCorrupted(
                f"Folder exists but no metadata at {metadata_path}")

        with open(metadata_path, mode='r') as f:
            return json.loads(f.read())
Ejemplo n.º 5
0
    def get_metadata(self, backend_key: typing.Union[DataKey, str], **kwargs) -> dict:
        """
        Get the metadata using the backend_key and the Backend specific
        _get_metadata method. When an unforeseen error occurs, raises an
        strax.DataCorrupted error. Any kwargs are passed on to _get_metadata

        :param backend_key: The key the backend should look for (can be string
            or strax.DataKey)
        :return: metadata for the data associated to the requested backend-key
        :raises strax.DataCorrupted: This backend is not able to read the
            metadata but it should exist
        :raises strax.DataNotAvailable: When there is no data associated with
            this backend-key
        """
        try:
            return self._get_metadata(backend_key, **kwargs)
        except (strax.DataCorrupted, strax.DataNotAvailable, NotImplementedError):
            raise
        except Exception as e:
            raise strax.DataCorrupted(f'Cannot open metadata for {str(backend_key)}') from e
Ejemplo n.º 6
0
    def get_iter(self,
                 run_id: str,
                 targets,
                 save=tuple(),
                 max_workers=None,
                 time_range=None,
                 seconds_range=None,
                 time_within=None,
                 time_selection='fully_contained',
                 selection_str=None,
                 keep_columns=None,
                 _chunk_number=None,
                 **kwargs) -> ty.Iterator[strax.Chunk]:
        """Compute target for run_id and iterate over results.

        Do NOT interrupt the iterator (i.e. break): it will keep running stuff
        in background threads...
        {get_docs}
        """
        # If any new options given, replace the current context
        # with a temporary one
        if len(kwargs):
            # noinspection PyMethodFirstArgAssignment
            self = self.new_context(**kwargs)

        # Convert alternate time arguments to absolute range
        time_range = self.to_absolute_time_range(run_id=run_id,
                                                 targets=targets,
                                                 time_range=time_range,
                                                 seconds_range=seconds_range,
                                                 time_within=time_within)

        # If multiple targets of the same kind, create a MergeOnlyPlugin
        # to merge the results automatically
        if isinstance(targets, (list, tuple)) and len(targets) > 1:
            plugins = self._get_plugins(targets=targets, run_id=run_id)
            if len(set(plugins[d].data_kind_for(d) for d in targets)) == 1:
                temp_name = (
                    '_temp_' +
                    ''.join(random.choices(string.ascii_lowercase, k=10)))
                p = type(temp_name, (strax.MergeOnlyPlugin, ),
                         dict(depends_on=tuple(targets)))
                self.register(p)
                targets = (temp_name, )
            else:
                raise RuntimeError("Cannot automerge different data kinds!")

        components = self.get_components(run_id,
                                         targets=targets,
                                         save=save,
                                         time_range=time_range,
                                         chunk_number=_chunk_number)

        # Cleanup the temp plugins
        for k in list(self._plugin_class_registry.keys()):
            if k.startswith('_temp'):
                del self._plugin_class_registry[k]

        seen_a_chunk = False
        generator = strax.ThreadedMailboxProcessor(
            components,
            max_workers=max_workers,
            allow_shm=self.context_config['allow_shm'],
            allow_multiprocess=self.context_config['allow_multiprocess'],
            allow_rechunk=self.context_config['allow_rechunk'],
            allow_lazy=self.context_config['allow_lazy'],
            max_messages=self.context_config['max_messages'],
            timeout=self.context_config['timeout']).iter()

        try:
            for result in strax.continuity_check(generator):
                seen_a_chunk = True
                if not isinstance(result, strax.Chunk):
                    raise ValueError(f"Got type {type(result)} rather than "
                                     f"a strax Chunk from the processor!")
                result.data = self.apply_selection(
                    result.data,
                    selection_str=selection_str,
                    keep_columns=keep_columns,
                    time_range=time_range,
                    time_selection=time_selection)
                yield result

        except GeneratorExit:
            generator.throw(
                OutsideException(
                    "Terminating due to an exception originating from outside "
                    "strax's get_iter (which we cannot retrieve)."))

        except Exception as e:
            generator.throw(e)
            raise

        if not seen_a_chunk:
            if time_range is None:
                raise strax.DataCorrupted("No data returned!")
            raise ValueError(f"Invalid time range: {time_range}, "
                             "returned no chunks!")