def _read_chunk(self, backend_key, chunk_info, dtype, compressor): # Temporary hack for backward compatibility if 'filename' in chunk_info: chunk_info['key_name'] = f"{backend_key}/{chunk_info['filename']}" with tempfile.SpooledTemporaryFile() as f: self.s3.download_fileobj(Bucket=BUCKET_NAME, Key=chunk_info['key_name'], Fileobj=f) f.seek(0) # Needed? return strax.load_file(f, dtype=dtype, compressor=compressor)
def _load_chunk(self, path, kind='central'): records = [ strax.load_file(fn, compressor='blosc', dtype=strax.record_dtype()) for fn in glob.glob(f'{path}/reader_*') ] records = np.concatenate(records) records = strax.sort_by_time(records) if kind == 'central': return records result = strax.from_break( records, safe_break=int(1e3), # TODO config? left=kind == 'post', tolerant=True) if self.config['erase']: shutil.rmtree(path) return result
def _load_chunk(self, path, kind='central'): records = [ strax.load_file(fn, compressor='blosc', dtype=strax.record_dtype()) for fn in sorted(glob.glob(f'{path}/*')) ] records = np.concatenate(records) records = strax.sort_by_time(records) if kind == 'central': result = records else: result = strax.from_break( records, safe_break=self.config['safe_break_in_pulses'], left=kind == 'post', tolerant=True) if self.config['erase']: shutil.rmtree(path) return result
def load_chunk(self, folder, kind='central'): records = np.concatenate([ strax.load_file(os.path.join(folder, f), compressor='blosc', dtype=strax.record_dtype()) for f in os.listdir(folder) ]) records = strax.sort_by_time(records) if kind == 'central': result = records else: if self.config['do_breaks']: result = strax.from_break(records, safe_break=self.config['safe_break'], left=kind == 'post', tolerant=True) else: result = records result['time'] += self.config['run_start'] return result
def _read_chunk(self, dset_did, chunk_info, dtype, compressor): base_dir = os.path.join(self.staging_dir, did_to_dirname(dset_did)) chunk_file = chunk_info['filename'] chunk_path = os.path.abspath(os.path.join(base_dir, chunk_file)) if not os.path.exists(chunk_path): number, datatype, hsh = parse_rucio_did(dset_did) if datatype in self.heavy_types and not self.download_heavy: error_msg = ( "For space reasons we don't want to have everyone " "downloading raw data. If you know what you're " "doing, pass download_heavy=True to the Rucio " "frontend. If not, check your context and/or ask " "someone if this raw data is needed locally.") warn(error_msg) raise strax.DataNotAvailable scope, name = dset_did.split(':') chunk_did = f"{scope}:{chunk_file}" if dset_did in self.dset_cache: rse = self.dset_cache[dset_did] else: rses = admix.rucio.get_rses(dset_did) rse = admix.downloader.determine_rse(rses) self.dset_cache[dset_did] = rse downloaded = admix.download(chunk_did, rse=rse, location=self.staging_dir) if len(downloaded) != 1: raise ValueError(f"{chunk_did} should be a single file. " f"We found {len(downloaded)}.") assert chunk_path == downloaded[0] # check again if not os.path.exists(chunk_path): raise FileNotFoundError(f"No chunk file found at {chunk_path}") return strax.load_file(chunk_path, dtype=dtype, compressor=compressor)
def _read_chunk(self, dirname, chunk_info, dtype, compressor): fn = osp.join(dirname, chunk_info['filename']) return strax.load_file(fn, dtype=dtype, compressor=compressor)
def _read_chunk(self, zipn_and_dirn, chunk_info, dtype, compressor): zipn, dirn = zipn_and_dirn with zipfile.ZipFile(zipn) as zp: with zp.open(dirn + '/' + chunk_info['filename']) as f: return strax.load_file(f, dtype=dtype, compressor=compressor)
def _load_chunk(self, fp): records = strax.load_file(fp, 'blosc', strax.record_dtype()) records = strax.sort_by_time(records) return records
def _load_chunk(self, path, start, end, kind='central'): records = [ strax.load_file(fn, compressor=self.config["daq_compressor"], dtype=self.dtype_for('raw_records')) for fn in sorted(glob.glob(f'{path}/*')) ] records = np.concatenate(records) records = strax.sort_by_time(records) first_start, last_start, last_end = None, None, None if len(records): first_start, last_start = records[0]['time'], records[-1]['time'] # Records are sorted by (start)time and are of variable length. # Their end-times can differ. In the most pessimistic case we have # to look back one record length for each channel. tot_channels = np.sum( [np.diff(x) + 1 for x in self.config['channel_map'].values()]) look_n_samples = self.config["record_length"] * tot_channels last_end = strax.endtime(records[-look_n_samples:]).max() if first_start < start or last_start >= end: raise ValueError( f"Bad data from DAQ: chunk {path} should contain data " f"that starts in [{start}, {end}), but we see start times " f"ranging from {first_start} to {last_start}.") if kind == 'central': result = records break_time = None else: # Find a time at which we can safely partition the data. min_gap = self.config['safe_break_in_pulses'] if not len(records) or last_end + min_gap < end: # There is enough room at the end of the data break_time = end - min_gap result = records if kind == 'post' else records[:0] else: # Let's hope there is some quiet time in the middle try: result, break_time = strax.from_break( records, safe_break=min_gap, # Records from the last chunk can extend as far as: not_before=( start + self.config['record_length'] * self.dt_max), left=kind == 'post', tolerant=False) except strax.NoBreakFound: # We still have to break somewhere, but this can involve # throwing away data. # Let's do it at the end of the chunk # TODO: find a better time, e.g. a longish-but-not-quite # satisfactory gap break_time = end - min_gap # Mark the region where data /might/ be removed with # artificial deadtime. dead_time_start = ( break_time - self.config['record_length'] * self.dt_max) warnings.warn( f"Data in {path} is so dense that no {min_gap} " f"ns break exists: data loss inevitable. " f"Inserting artificial deadtime between " f"{dead_time_start} and {end}.", ArtificialDeadtimeInserted) if kind == 'pre': # Give the artificial deadtime past the break result = self._artificial_dead_time(start=break_time, end=end, dt=self.dt_max) else: # Remove data that would stick out result = records[strax.endtime(records) <= break_time] # Add the artificial deadtime until the break result = strax.sort_by_time( np.concatenate([ result, self._artificial_dead_time( start=dead_time_start, end=break_time, dt=self.dt_max) ])) if self.config['erase']: shutil.rmtree(path) return result, break_time
def _read_chunk(self, did, chunk_info, dtype, compressor): scope, name = did.split(':') did = f"{scope}:{chunk_info['filename']}" fn = rucio_path(self.rucio_dir, did) return strax.load_file(fn, dtype=dtype, compressor=compressor)
def _read_chunk(self, dirname, chunk_info, dtype, compressor): fn = rucio_path(self.root_dir, chunk_info['filename'], dirname) return strax.load_file(fn, dtype=dtype, compressor=compressor)