def flatten_run_metadata(md): # Flatten the tags field. Note this sets it to an empty string # if it does not exist. return strax.flatten_dict( md, separator='.', keep=[strax.RUN_DEFAULTS_KEY, 'sub_run_spec', 'tags'])
def run_metadata(self, run_id, projection=None): path = self._run_meta_path(run_id) if osp.exists(path): with open(path, mode='r') as f: md = json.loads(f.read(), object_hook=json_util.object_hook) if not projection: return md md = strax.flatten_dict(md, separator='.') return {k: v for k, v in md.items() if k in projection} else: raise strax.RunMetadataNotAvailable( f"No file at {path}, cannot find run metadata for {run_id}")
def scan_runs(self, check_available=tuple(), store_fields=tuple()): """Update and return self.runs with runs currently available in all storage frontends. :param check_available: Check whether these data types are available Availability of xxx is stored as a boolean in the xxx_available column. :param store_fields: Additional fields from run doc to include as rows in the dataframe. The context options scan_availability and store_run_fields list data types and run fields, respectively, that will always be scanned. """ store_fields = tuple( set( list(strax.to_str_tuple(store_fields)) + ['name', 'number', 'tags', 'mode'] + list(self.context_config['store_run_fields']))) check_available = tuple( set( list(strax.to_str_tuple(check_available)) + list(self.context_config['check_available']))) docs = None for sf in self.storage: _temp_docs = [] for doc in sf._scan_runs(store_fields=store_fields): # If there is no number, make one from the name if 'number' not in doc: if 'name' not in doc: raise ValueError(f"Invalid run doc {doc}, contains " f"neither name nor number.") doc['number'] = int(doc['name']) # If there is no name, make one from the number doc.setdefault('name', str(doc['number'])) doc.setdefault('mode', '') # Flatten the tags field, if it exists doc['tags'] = ','.join( [t['name'] for t in doc.get('tags', [])]) # Flatten the rest of the doc (mainly in case the mode field # is something deeply nested) doc = strax.flatten_dict(doc, separator='.') _temp_docs.append(doc) if len(_temp_docs): new_docs = pd.DataFrame(_temp_docs) else: new_docs = pd.DataFrame([], columns=store_fields) if docs is None: docs = new_docs else: # Keep only new runs (not found by earlier frontends) docs = pd.concat( [docs, new_docs[~np.in1d(new_docs['name'], docs['name'])]], sort=False) self.runs = docs for d in tqdm(check_available, desc='Checking data availability'): self.runs[d + '_available'] = np.in1d(self.runs.name.values, self.list_available(d)) return self.runs