Esempio n. 1
0
    def _read_chunk(self, backend_key, chunk_info, dtype, compressor):
        # Temporary hack for backward compatibility
        if 'filename' in chunk_info:
            chunk_info['key_name'] = f"{backend_key}/{chunk_info['filename']}"

        with tempfile.SpooledTemporaryFile() as f:
            self.s3.download_fileobj(Bucket=BUCKET_NAME,
                                     Key=chunk_info['key_name'],
                                     Fileobj=f)
            f.seek(0)  # Needed?
            return strax.load_file(f, dtype=dtype, compressor=compressor)
Esempio n. 2
0
 def _load_chunk(self, path, kind='central'):
     records = [
         strax.load_file(fn, compressor='blosc', dtype=strax.record_dtype())
         for fn in glob.glob(f'{path}/reader_*')
     ]
     records = np.concatenate(records)
     records = strax.sort_by_time(records)
     if kind == 'central':
         return records
     result = strax.from_break(
         records,
         safe_break=int(1e3),  # TODO config?
         left=kind == 'post',
         tolerant=True)
     if self.config['erase']:
         shutil.rmtree(path)
     return result
Esempio n. 3
0
 def _load_chunk(self, path, kind='central'):
     records = [
         strax.load_file(fn, compressor='blosc', dtype=strax.record_dtype())
         for fn in sorted(glob.glob(f'{path}/*'))
     ]
     records = np.concatenate(records)
     records = strax.sort_by_time(records)
     if kind == 'central':
         result = records
     else:
         result = strax.from_break(
             records,
             safe_break=self.config['safe_break_in_pulses'],
             left=kind == 'post',
             tolerant=True)
     if self.config['erase']:
         shutil.rmtree(path)
     return result
Esempio n. 4
0
 def load_chunk(self, folder, kind='central'):
     records = np.concatenate([
         strax.load_file(os.path.join(folder, f),
                         compressor='blosc',
                         dtype=strax.record_dtype())
         for f in os.listdir(folder)
     ])
     records = strax.sort_by_time(records)
     if kind == 'central':
         result = records
     else:
         if self.config['do_breaks']:
             result = strax.from_break(records,
                                       safe_break=self.config['safe_break'],
                                       left=kind == 'post',
                                       tolerant=True)
         else:
             result = records
     result['time'] += self.config['run_start']
     return result
Esempio n. 5
0
    def _read_chunk(self, dset_did, chunk_info, dtype, compressor):
        base_dir = os.path.join(self.staging_dir, did_to_dirname(dset_did))
        chunk_file = chunk_info['filename']
        chunk_path = os.path.abspath(os.path.join(base_dir, chunk_file))
        if not os.path.exists(chunk_path):
            number, datatype, hsh = parse_rucio_did(dset_did)
            if datatype in self.heavy_types and not self.download_heavy:
                error_msg = (
                    "For space reasons we don't want to have everyone "
                    "downloading raw data. If you know what you're "
                    "doing, pass download_heavy=True to the Rucio "
                    "frontend. If not, check your context and/or ask "
                    "someone if this raw data is needed locally.")
                warn(error_msg)
                raise strax.DataNotAvailable
            scope, name = dset_did.split(':')
            chunk_did = f"{scope}:{chunk_file}"
            if dset_did in self.dset_cache:
                rse = self.dset_cache[dset_did]
            else:
                rses = admix.rucio.get_rses(dset_did)
                rse = admix.downloader.determine_rse(rses)
                self.dset_cache[dset_did] = rse

            downloaded = admix.download(chunk_did,
                                        rse=rse,
                                        location=self.staging_dir)
            if len(downloaded) != 1:
                raise ValueError(f"{chunk_did} should be a single file. "
                                 f"We found {len(downloaded)}.")
            assert chunk_path == downloaded[0]

        # check again
        if not os.path.exists(chunk_path):
            raise FileNotFoundError(f"No chunk file found at {chunk_path}")

        return strax.load_file(chunk_path, dtype=dtype, compressor=compressor)
Esempio n. 6
0
 def _read_chunk(self, dirname, chunk_info, dtype, compressor):
     fn = osp.join(dirname, chunk_info['filename'])
     return strax.load_file(fn, dtype=dtype, compressor=compressor)
Esempio n. 7
0
 def _read_chunk(self, zipn_and_dirn, chunk_info, dtype, compressor):
     zipn, dirn = zipn_and_dirn
     with zipfile.ZipFile(zipn) as zp:
         with zp.open(dirn + '/' + chunk_info['filename']) as f:
             return strax.load_file(f, dtype=dtype, compressor=compressor)
Esempio n. 8
0
 def _load_chunk(self, fp):
     records = strax.load_file(fp,
                               'blosc',
                               strax.record_dtype())
     records = strax.sort_by_time(records)
     return records
Esempio n. 9
0
    def _load_chunk(self, path, start, end, kind='central'):
        records = [
            strax.load_file(fn,
                            compressor=self.config["daq_compressor"],
                            dtype=self.dtype_for('raw_records'))
            for fn in sorted(glob.glob(f'{path}/*'))
        ]
        records = np.concatenate(records)
        records = strax.sort_by_time(records)

        first_start, last_start, last_end = None, None, None
        if len(records):
            first_start, last_start = records[0]['time'], records[-1]['time']
            # Records are sorted by (start)time and are of variable length.
            # Their end-times can differ. In the most pessimistic case we have
            # to look back one record length for each channel.
            tot_channels = np.sum(
                [np.diff(x) + 1 for x in self.config['channel_map'].values()])
            look_n_samples = self.config["record_length"] * tot_channels
            last_end = strax.endtime(records[-look_n_samples:]).max()
            if first_start < start or last_start >= end:
                raise ValueError(
                    f"Bad data from DAQ: chunk {path} should contain data "
                    f"that starts in [{start}, {end}), but we see start times "
                    f"ranging from {first_start} to {last_start}.")

        if kind == 'central':
            result = records
            break_time = None
        else:
            # Find a time at which we can safely partition the data.
            min_gap = self.config['safe_break_in_pulses']
            if not len(records) or last_end + min_gap < end:
                # There is enough room at the end of the data
                break_time = end - min_gap
                result = records if kind == 'post' else records[:0]
            else:
                # Let's hope there is some quiet time in the middle
                try:
                    result, break_time = strax.from_break(
                        records,
                        safe_break=min_gap,
                        # Records from the last chunk can extend as far as:
                        not_before=(
                            start +
                            self.config['record_length'] * self.dt_max),
                        left=kind == 'post',
                        tolerant=False)
                except strax.NoBreakFound:
                    # We still have to break somewhere, but this can involve
                    # throwing away data.
                    # Let's do it at the end of the chunk
                    # TODO: find a better time, e.g. a longish-but-not-quite
                    # satisfactory gap
                    break_time = end - min_gap

                    # Mark the region where data /might/ be removed with
                    # artificial deadtime.
                    dead_time_start = (
                        break_time -
                        self.config['record_length'] * self.dt_max)
                    warnings.warn(
                        f"Data in {path} is so dense that no {min_gap} "
                        f"ns break exists: data loss inevitable. "
                        f"Inserting artificial deadtime between "
                        f"{dead_time_start} and {end}.",
                        ArtificialDeadtimeInserted)

                    if kind == 'pre':
                        # Give the artificial deadtime past the break
                        result = self._artificial_dead_time(start=break_time,
                                                            end=end,
                                                            dt=self.dt_max)
                    else:
                        # Remove data that would stick out
                        result = records[strax.endtime(records) <= break_time]
                        # Add the artificial deadtime until the break
                        result = strax.sort_by_time(
                            np.concatenate([
                                result,
                                self._artificial_dead_time(
                                    start=dead_time_start,
                                    end=break_time,
                                    dt=self.dt_max)
                            ]))

        if self.config['erase']:
            shutil.rmtree(path)
        return result, break_time
Esempio n. 10
0
 def _read_chunk(self, did, chunk_info, dtype, compressor):
     scope, name = did.split(':')
     did = f"{scope}:{chunk_info['filename']}"
     fn = rucio_path(self.rucio_dir, did)
     return strax.load_file(fn, dtype=dtype, compressor=compressor)
Esempio n. 11
0
 def _read_chunk(self, dirname, chunk_info, dtype, compressor):
     fn = rucio_path(self.root_dir, chunk_info['filename'], dirname)
     return strax.load_file(fn, dtype=dtype, compressor=compressor)