def __init__(self, garbage_length=256, packet_size_guess=1024): self.garbage_table = lh5.Table(garbage_length) shape_guess = (garbage_length, packet_size_guess) self.garbage_table.add_field( 'packets', lh5.VectorOfVectors(shape_guess=shape_guess, dtype='uint8')) self.garbage_table.add_field( 'packet_id', lh5.Array(shape=garbage_length, dtype='uint32')) # TODO: add garbage codes enum attribute: user supplies in constructor # before calling super() self.garbage_table.add_field( 'garbage_code', lh5.Array(shape=garbage_length, dtype='uint32'))
def dsp_to_hit(): """ save calibrated energies into the dsp file. this is a good example of adding a column, reading & writing to an LH5 file. """ f_dsp = '/Users/wisecg/Data/OPPI/dsp/oppi_run0_cyc2027_dsp_test.lh5' f_hit = '/Users/wisecg/Data/OPPI/hit/oppi_run0_cyc2027_hit.lh5' sto = lh5.Store() groups = sto.ls(f_dsp) tb_name = 'ORSIS3302DecoderForEnergy/raw' data = sto.read_object(tb_name, f_dsp) df_dsp = data.get_dataframe() # add a new column for each energy estimator of interest for etype in ['energy', 'trapE']: ecal_name = etype + '_cal' pfit = linear_cal(etype) df_dsp[ecal_name] = df_dsp[etype] * pfit[0] + pfit[1] e_cal_lh5 = lh5.Array(df_dsp[ecal_name].values, attrs={'units': 'keV'}) data.add_field(f'{etype}_cal', e_cal_lh5) # write to hit file. delete if exists, LH5 overwrite is broken rn if os.path.exists(f_hit): os.remove(f_hit) sto.write_object(data, tb_name, f_hit)
def initialize_lh5_table(self, lh5_table, channel=None): """ initialize and lh5 Table based on decoded_values channel is the channel according to ch_group """ if not hasattr(self, 'decoded_values'): name = type(self).__name__ print(name, 'Error: no decoded_values available for setting up buffer') return dec_vals = self.get_decoded_values(channel) size = lh5_table.size for field, fld_attrs in dec_vals.items(): attrs = fld_attrs.copy() if 'dtype' not in attrs: name = type(self).__name__ print(name, 'Error: must specify dtype for', field) continue dtype = attrs.pop('dtype') if 'datatype' not in attrs: # no datatype: just a "normal" array # allow to override "kind" for the dtype for lh5 if 'kind' in attrs: attrs['datatype'] = 'array<1>{' + attrs.pop('kind') + '}' lh5_table.add_field( field, lh5.Array(shape=size, dtype=dtype, attrs=attrs)) continue datatype = attrs.pop('datatype') # handle waveforms from digitizers in a uniform way if datatype == 'waveform': wf_table = lh5.Table(size) # Build t0 array. No attributes for now # TODO: add more control over t0: another field to fill it? # Optional units attribute? t0_attrs = {'units': 'ns'} wf_table.add_field( 't0', lh5.Array(nda=np.zeros(size, dtype='float'), attrs=t0_attrs)) # Build sampling period array with units attribute wf_per = attrs.pop('sample_period') dt_nda = np.full(size, wf_per, dtype='float') wf_per_units = attrs.pop('sample_period_units') dt_attrs = {'units': wf_per_units} wf_table.add_field('dt', lh5.Array(nda=dt_nda, attrs=dt_attrs)) # Build waveform array. All non-popped attributes get sent # TODO: add vector of vectors and compression capabilities wf_len = attrs.pop('length') dims = [1, 1] aoesa = lh5.ArrayOfEqualSizedArrays(shape=(size, wf_len), dtype=dtype, dims=dims, attrs=attrs) wf_table.add_field('values', aoesa) lh5_table.add_field(field, wf_table) continue # If we get here, must be a LH5 datatype datatype, shape, elements = lh5.parse_datatype(datatype) if datatype == 'array_of_equalsized_arrays': length = attrs.pop('length') dims = [1, 1] aoesa = lh5.ArrayOfEqualSizedArrays(shape=(size, length), dtype=dtype, dims=dims, attrs=attrs) lh5_table.add_field(field, aoesa) continue if elements.startswith('array'): # vector-of-vectors length_guess = size if 'length_guess' in attrs: length_guess = attrs.pop('length_guess') vov = lh5.VectorOfVectors(shape_guess=(size, length_guess), dtype=dtype, attrs=attrs) lh5_table.add_field(field, vov) continue else: name = type(self).__name__ print(name, 'Error: do not know how to make a', datatype, 'for', field)
def dsp_to_hit(df_row, dg=None, verbose=False, overwrite=False, lowE=False): """ Create hit files from dsp files. This routine is specific to CAGE but could be extended & modified in the future to work for multi-channel data (PGT, L200, etc.) """ apply_ecal = True apply_tscorr = False # not needed, should be fixed by the jan 30 2021 re-d2r f_dsp = f"{dg.config['dsp_input_dir']}/{df_row['dsp_path']}/{df_row['dsp_file']}" f_hit = f"{dg.config['hit_output_dir']}/{df_row['hit_path']}/{df_row['hit_file']}" # change output directory if in spec_id 2 mode (ie low-energy calibration to get 60 keV in right place) if lowE: f_hit = f"{dg.config['hit_output_dir']}/{df_row['hit_path']}/lowE/{df_row['hit_file']}" print(f'Writing to low-energy hit file: {f_hit}') if verbose: print('input:', f_dsp) print('output:', f_hit) if not overwrite and os.path.exists(f_hit): print('file exists, overwrite not set, skipping f_hit:\n ', f_dsp) return # get run and cycle for ecalDB lookup. also apply run selection run, cycle = df_row[['run', 'cycle']].astype(int) if df_row.skip: print(f'Cycle {cycle} has been marked junk, will not process.') return # create initial 'hit' DataFrame from dsp data hit_store = lh5.Store() data, n_rows = hit_store.read_object(dg.config['input_table'], f_dsp) df_hit = data.get_dataframe() # 1. get energy calibration for this run from peakfit if apply_ecal: # loading the tinydb this way preserves the in-file text formatting cal_db = db.TinyDB(storage=MemoryStorage) with open(dg.config['ecaldb']) as f: raw_db = json.load(f) cal_db.storage.write(raw_db) # loop over energy estimators of interest for etype in dg.config['rawe']: # load ecalDB table tb = cal_db.table(f'peakfit_{etype}').all() df_cal = pd.DataFrame(tb) for col in ['run', 'cyclo', 'cychi']: df_cal[col] = df_cal[col].astype(int) # load cal constants for this cycle que = f'run=={run} and cyclo <= {cycle} <= cychi' df_run = df_cal.query(que) if len(df_run) != 1: print('Warning, non-unique query:', que) print(df_run) exit() # figure out the order of the polynomial from column names pols = {} for col in [c for c in df_run.columns if 'cal' in c]: val = parse('cal{p}', col) val = val.named # convert to dict iord = int(val['p']) pols[iord] = df_run.iloc[0][f'cal{iord}'] # get the coefficients in descending order for np.poly1d: p2, p1, p0... coeffs = [] for ord, val in sorted(pols.items()): coeffs.append([ord, val]) coeffs = np.array(coeffs) coeffs = coeffs[coeffs[:, 0].argsort()[::-1]] # 2, 1, 0 ... coeffs = coeffs[:, 1] # apply the calibration to the dataframe pfunc = np.poly1d(coeffs) df_hit[f'{etype}_cal'] = pfunc(df_hit[f'{etype}']) # 2. compute timestamp rollover correction (specific to struck 3302) clock = 100e6 # 100 MHz if apply_tscorr: UINT_MAX = 4294967295 # (0xffffffff) t_max = UINT_MAX / clock ts = df_hit['timestamp'].values / clock tdiff = np.diff(ts) tdiff = np.insert(tdiff, 0, 0) iwrap = np.where(tdiff < 0) iloop = np.append(iwrap[0], len(ts)) ts_new, t_roll = [], 0 for i, idx in enumerate(iloop): ilo = 0 if i == 0 else iwrap[0][i - 1] ihi = idx ts_block = ts[ilo:ihi] t_last = ts[ilo - 1] t_diff = t_max - t_last ts_new.append(ts_block + t_roll) t_roll += t_last + t_diff df_hit['ts_sec'] = np.concatenate(ts_new) else: # NOTE: may need to subtract off the 1st value here if we find # that the timestamp doesn't reset at cycle boundaries. df_hit['ts_sec'] = df_hit['timestamp'].values / clock # 3. compute global timestamp t_start = df_row['startTime'] if t_start is not None: df_hit['ts_glo'] = df_hit['ts_sec'] + t_start # write to LH5 file if os.path.exists(f_hit): os.remove(f_hit) sto = lh5.Store() tb_name = dg.config['input_table'].replace('dsp', 'hit') tb_lh5 = lh5.Table(size=len(df_hit)) for col in df_hit.columns: tb_lh5.add_field(col, lh5.Array(df_hit[col].values, attrs={'units': ''})) if verbose: print(col) print(f'Writing table: {tb_name} in file:\n {f_hit}') sto.write_object(tb_lh5, tb_name, f_hit) if verbose: print('Creating diagnostic plots ...') # energy xlo, xhi, xpb = 0, 3000, 10 hist, bins, _ = pgh.get_hist(df_hit['trapEftp_cal'], range=(xlo, xhi), dx=xpb) plt.semilogy(bins[1:], hist, ds='steps', c='b', lw=1) plt.xlabel('Energy (keV)', ha='right', x=1) plt.ylabel('Counts', ha='right', y=1) plt.savefig('./plots/d2h_etest.png') print('saved figure: ./plots/d2h_etest.png') plt.cla() # timestamp xv = np.arange(len(df_hit)) plt.plot(xv, df_hit['ts_sec'], '.b') plt.savefig('./plots/d2h_ttest.png') print('saved figure: ./plots/d2h_ttest.png') plt.cla() # exit, don't create + overwrite a million plots print( 'verbose mode of d2h is meant to look at 1 cycle file, exiting...') exit()
def test_array(): a = lh5.Array(shape=(1), dtype=float) assert a.dataype_name() == 'array'
def build_processing_chain(lh5_in, dsp_config, db_dict=None, outputs=None, verbosity=1, block_width=16): """ Produces a ProcessingChain object and an lh5 table for output parameters from an input lh5 table and a json recipe. Returns (proc_chain, lh5_out): - proc_chain: ProcessingChain object that is bound to lh5_in and lh5_out; all you need to do is handle file i/o for lh5_in/out and run execute - lh5_out: output LH5 table Required arguments: - lh5_in: input LH5 table - config: dict or name of json file containing a recipe for constructing the ProcessingChain object produced by this function. config is formated as a json dict with different processors. Config should have a dictionary called processors, containing dictionaries of the following format: Key: parameter name: name of parameter produced by the processor. can optionally provide multiple, separated by spaces Values: processor (req): name of gufunc module (req): name of module in which to find processor prereqs (req): name of parameters from other processors and from input that are required to exist to run this args (req): list of arguments for processor, with variables passed by name or value. Names should either be inputs from lh5_in, or parameter names for other processors. Names of the format db.name will look up the parameter in the metadata. kwargs (opt): kwargs used when adding processors to proc_chain init_args (opt): args used when initializing a processor that has static data (for factory functions) default (opt): default value for db parameters if not found unit (opt): unit to be used for attr in lh5 file. There may also be a list called 'outputs', containing a list of parameters to put into lh5_out. Optional keyword arguments: - outputs: list of parameters to put in the output lh5 table. If None, use the parameters in the 'outputs' list from config - db_dict: a nested dict pointing to values for db args. e.g. if a processor uses arg db.trap.risetime, it will look up db_dict['trap']['risetime'] and use the found value. If no value is found, use the default defined in the config file. - verbosity: verbosity level: 0: Print nothing (except errors...) 1: Print basic warnings (default) 2: Print basic debug info 3: Print friggin' everything! - block_width: number of entries to process at once. """ if isinstance(dsp_config, str): with open(dsp_config) as f: dsp_config = json.load(f) elif dsp_config is None: dsp_config = {'outputs': [], 'processors': {}} else: # We don't want to modify the input! dsp_config = deepcopy(dsp_config) if outputs is None: outputs = dsp_config['outputs'] processors = dsp_config['processors'] # for processors with multiple outputs, add separate entries to the processor list for key in list(processors): keys = [k for k in re.split(",| ", key) if k != ''] if len(keys) > 1: for k in keys: processors[k] = key # Recursive function to crawl through the parameters/processors and get # a sequence of unique parameters such that parameters always appear after # their dependencies. For parameters that are not produced by the ProcChain # (i.e. input/db parameters), add them to the list of leafs # https://www.electricmonk.nl/docs/dependency_resolving_algorithm/dependency_resolving_algorithm.html def resolve_dependencies(par, resolved, leafs, unresolved=[]): if par in resolved: return elif par in unresolved: raise Exception('Circular references detected: %s -> %s' % (par, edge)) # if we don't find a node, this is a leaf node = processors.get(par) if node is None: if par not in leafs: leafs.append(par) return # if it's a string, that means it is part of a processor that returns multiple outputs (see above); in that case, node is a str pointing to the actual node we want if isinstance(node, str): resolve_dependencies(node, resolved, leafs, unresolved) return edges = node['prereqs'] unresolved.append(par) for edge in edges: resolve_dependencies(edge, resolved, leafs, unresolved) resolved.append(par) unresolved.remove(par) proc_par_list = [] # calculated from processors input_par_list = [] # input from file and used for processors copy_par_list = [] # copied from input to output out_par_list = [] for out_par in outputs: if out_par not in processors: copy_par_list.append(out_par) else: resolve_dependencies(out_par, proc_par_list, input_par_list) out_par_list.append(out_par) if verbosity > 0: print('Processing parameters:', str(proc_par_list)) print('Required input parameters:', str(input_par_list)) print('Copied output parameters:', str(copy_par_list)) print('Processed output parameters:', str(out_par_list)) proc_chain = ProcessingChain(block_width, lh5_in.size, verbosity=verbosity) # Now add all of the input buffers from lh5_in (and also the clk time) for input_par in input_par_list: buf_in = lh5_in.get(input_par) if buf_in is None: print("I don't know what to do with " + input_par + ". Building output without it!") elif isinstance(buf_in, lh5.Array): proc_chain.add_input_buffer(input_par, buf_in.nda) elif isinstance(buf_in, lh5.Table): # check if this is waveform if 't0' and 'dt' and 'values' in buf_in: proc_chain.add_input_buffer(input_par, buf_in['values'].nda, 'float32') clk = buf_in['dt'].nda[0] * unit_parser.parse_unit( lh5_in['waveform']['dt'].attrs['units']) if proc_chain._clk is not None and proc_chain._clk != clk: print( "Somehow you managed to set multiple clock frequencies...Using " + str(proc_chain._clk)) else: proc_chain._clk = clk # now add the processors for proc_par in proc_par_list: recipe = processors[proc_par] module = importlib.import_module(recipe['module']) func = getattr(module, recipe['function']) args = recipe['args'] for i, arg in enumerate(args): if isinstance(arg, str) and arg[0:3] == 'db.': lookup_path = arg[3:].split('.') try: node = db_dict for key in lookup_path: node = node[key] args[i] = node if (verbosity > 0): print("Database lookup: found", node, "for", arg) except: try: args[i] = recipe['defaults'][arg] if (verbosity > 0): print("Database lookup: using default value of", args[i], "for", arg) except: raise Exception( 'Did not find', arg, 'in database, and could not find default value.') kwargs = recipe.get('kwargs', {}) # might also need db lookup here # if init_args are defined, parse any strings and then call func # as a factory/constructor function try: init_args = recipe['init_args'] for i, arg in enumerate(init_args): if isinstance(arg, str) and arg[0:3] == 'db.': lookup_path = arg[3:].split('.') try: node = db_dict for key in lookup_path: node = node[key] init_args[i] = node if (verbosity > 0): print("Database lookup: found", node, "for", arg) except: try: init_args[i] = recipe['defaults'][arg] if (verbosity > 0): print( "Database lookup: using default value of", init_args[i], "for", arg) except: raise Exception( 'Did not find', arg, 'in database, and could not find default value.' ) arg = init_args[i] # see if string can be parsed by proc_chain if isinstance(arg, str): try: init_args[i] = proc_chain.get_variable(arg) except: pass if (verbosity > 1): print("Building function", func.__name__, "from init_args", init_args) func = func(*init_args) except: pass proc_chain.add_processor(func, *args, **kwargs) # build the output buffers lh5_out = lh5.Table(size=proc_chain._buffer_len) # add inputs that are directly copied for copy_par in copy_par_list: buf_in = lh5_in.get(copy_par) if isinstance(buf_in, lh5.Array): lh5_out.add_field(copy_par, buf_in) elif isinstance(buf_in, lh5.Table): # check if this is waveform if 't0' and 'dt' and 'values' in buf_in: lh5_out.add_field(copy_par, buf_in['values']) clk = buf_in['dt'].nda[0] * unit_parser.parse_unit( lh5_in['waveform']['dt'].attrs['units']) if proc_chain._clk is not None and proc_chain._clk != clk: print( "Somehow you managed to set multiple clock frequencies...Using " + str(proc_chain._clk)) else: proc_chain._clk = clk else: print("I don't know what to do with " + input_par + ". Building output without it!") # finally, add the output buffers to lh5_out and the proc chain for out_par in out_par_list: recipe = processors[out_par] # special case for proc with multiple outputs if isinstance(recipe, str): i = [k for k in re.split(",| ", recipe) if k != ''].index(out_par) recipe = processors[recipe] unit = recipe['unit'][i] else: unit = recipe['unit'] try: scale = convert(1, unit_parser.parse_unit(unit), clk) except InvalidConversion: scale = None buf_out = proc_chain.get_output_buffer(out_par, unit=scale) lh5_out.add_field(out_par, lh5.Array(buf_out, attrs={"units": unit})) return (proc_chain, lh5_out)
def dsp_to_hit_cage(f_dsp, f_hit, dg, n_max=None, verbose=False, t_start=None): """ non-general placeholder for creating a pygama 'hit' file. uses pandas. for every file, apply: - energy calibration (peakfit results) - timestamp correction for a more general dsp_to_hit, maybe each function could be given in terms of an 'apply' on a dsp dataframe ... TODO: create entry config['rawe'] with list of energy pars to calibrate, as in energy_cal.py """ rawe = ['trapEmax'] # create initial 'hit' DataFrame from dsp data hit_store = lh5.Store() data = hit_store.read_object(dg.config['input_table'], f_dsp) df_hit = data.get_dataframe() # 1. get energy calibration for this run from peakfit cal_db = db.TinyDB(storage=MemoryStorage) with open(dg.config['ecaldb']) as f: raw_db = json.load(f) cal_db.storage.write(raw_db) runs = dg.fileDB.run.unique() if len(runs) > 1: print("sorry, I can't do combined runs yet") exit() run = runs[0] for etype in rawe: tb = cal_db.table(f'peakfit_{etype}').all() df_cal = pd.DataFrame(tb) df_cal['run'] = df_cal['run'].astype(int) df_run = df_cal.loc[df_cal.run == run] cal_pars = df_run.iloc[0][['cal0', 'cal1', 'cal2']] pol = np.poly1d(cal_pars) # handy numpy polynomial object df_hit[f'{etype}_cal'] = pol(df_hit[f'{etype}']) # 2. compute timestamp rollover correction (specific to struck 3302) clock = 100e6 # 100 MHz UINT_MAX = 4294967295 # (0xffffffff) t_max = UINT_MAX / clock ts = df_hit['timestamp'].values / clock tdiff = np.diff(ts) tdiff = np.insert(tdiff, 0, 0) iwrap = np.where(tdiff < 0) iloop = np.append(iwrap[0], len(ts)) ts_new, t_roll = [], 0 for i, idx in enumerate(iloop): ilo = 0 if i == 0 else iwrap[0][i - 1] ihi = idx ts_block = ts[ilo:ihi] t_last = ts[ilo - 1] t_diff = t_max - t_last ts_new.append(ts_block + t_roll) t_roll += t_last + t_diff df_hit['ts_sec'] = np.concatenate(ts_new) # 3. compute global timestamp if t_start is not None: df_hit['ts_glo'] = df_hit['ts_sec'] + t_start # write to LH5 file if os.path.exists(f_hit): os.remove(f_hit) sto = lh5.Store() tb_name = dg.config['input_table'].replace('dsp', 'hit') tb_lh5 = lh5.Table(size=len(df_hit)) for col in df_hit.columns: tb_lh5.add_field(col, lh5.Array(df_hit[col].values, attrs={'units': ''})) print(col) print(f'Writing table: {tb_name} in file:\n {f_hit}') sto.write_object(tb_lh5, tb_name, f_hit)
def build_processing_chain(lh5_in, dsp_config, db_dict = None, outputs = None, verbosity=1, block_width=16): """ Produces a ProcessingChain object and an lh5 table for output parameters from an input lh5 table and a json recipe. Parameters ---------- lh5_in : lgdo.Table HDF5 table from which raw data is read. At least one row of entries should be read in prior to calling this! dsp_config: dict or str A dict or json filename containing the recipes for computing DSP parameter from raw parameters. The format is as follows: { "outputs" : [ "parnames", ... ] -> list of output parameters to compute by default; see outputs parameter. "processors" : { "name1, ..." : { -> names of parameters computed "function" : str -> name of function to call. Function should implement the gufunc interface, a factory function returning a gufunc, or an arbitrary function that can be mapped onto a gufunc "module" : str -> name of module containing function "args" : [ str or numeric, ... ] -> list of names of computed and input parameters or constant values used as inputs to function. Note that outputs should be fed by reference as args! Arguments read from the database are prepended with db. "kwargs" : dict -> keyword arguments for ProcesssingChain.add_processor. "init_args" : [ str or numeric, ... ] -> list of names of computed and input parameters or constant values used to initialize a gufunc via a factory function "unit" : str or [ strs, ... ] -> units for parameters "defaults" : dict -> default value to be used for arguments read from the database "prereqs" : DEPRECATED [ strs, ...] -> list of parameters that must be computed before these can } outputs: [str, ...] (optional) List of parameters to put in the output lh5 table. If None, use the parameters in the 'outputs' list from config db_dict: dict (optional) A nested dict pointing to values for db args. e.g. if a processor uses arg db.trap.risetime, it will look up db_dict['trap']['risetime'] and use the found value. If no value is found, use the default defined in the config file. verbosity : int (optional) 0: Print nothing (except errors...) 1: Print basic warnings (default) 2: Print basic debug info 3: Print friggin' everything! block_width : int (optional) number of entries to process at once. To optimize performance, a multiple of 16 is preferred, but if performance is not an issue any value can be used. Returns ------- (proc_chain, field_mask, lh5_out) : tuple proc_chain : ProcessingChain object that is executed field_mask : List of input fields that are used lh5_out : output lh5 table containing processed values """ proc_chain = ProcessingChain(block_width, lh5_in.size, verbosity = verbosity) if isinstance(dsp_config, str): with open(dsp_config) as f: dsp_config = json.load(f) elif dsp_config is None: dsp_config = {'outputs':[], 'processors':{}} else: # We don't want to modify the input! dsp_config = deepcopy(dsp_config) if outputs is None: outputs = dsp_config['outputs'] processors = dsp_config['processors'] # prepare the processor list multi_out_procs = {} for key, node in processors.items(): # if we have multiple outputs, add each to the processesors list keys = [k for k in re.split(",| ", key) if k!=''] if len(keys)>1: for k in keys: multi_out_procs[k] = key # parse the arguments list for prereqs, if not included explicitly if not 'prereqs' in node: prereqs = [] for arg in node['args']: if not isinstance(arg, str): continue for prereq in proc_chain.get_variable(arg, True): if prereq not in prereqs and prereq not in keys and prereq != 'db': prereqs.append(prereq) node['prereqs'] = prereqs if verbosity>=2: print("Prereqs for", key, "are", node['prereqs']) processors.update(multi_out_procs) # Recursive function to crawl through the parameters/processors and get # a sequence of unique parameters such that parameters always appear after # their dependencies. For parameters that are not produced by the ProcChain # (i.e. input/db parameters), add them to the list of leafs # https://www.electricmonk.nl/docs/dependency_resolving_algorithm/dependency_resolving_algorithm.html def resolve_dependencies(par, resolved, leafs, unresolved=[]): if par in resolved: return elif par in unresolved: raise ProcessingChainError('Circular references detected: %s -> %s' % (par, edge)) # if we don't find a node, this is a leaf node = processors.get(par) if node is None: if par not in leafs: leafs.append(par) return # if it's a string, that means it is part of a processor that returns multiple outputs (see above); in that case, node is a str pointing to the actual node we want if isinstance(node, str): resolve_dependencies(node, resolved, leafs, unresolved) return edges = node['prereqs'] unresolved.append(par) for edge in edges: resolve_dependencies(edge, resolved, leafs, unresolved) resolved.append(par) unresolved.remove(par) proc_par_list = [] # calculated from processors input_par_list = [] # input from file and used for processors copy_par_list = [] # copied from input to output out_par_list = [] for out_par in outputs: if out_par not in processors: copy_par_list.append(out_par) else: resolve_dependencies(out_par, proc_par_list, input_par_list) out_par_list.append(out_par) if verbosity>0: print('Processing parameters:', str(proc_par_list)) print('Required input parameters:', str(input_par_list)) print('Copied output parameters:', str(copy_par_list)) print('Processed output parameters:', str(out_par_list)) # Now add all of the input buffers from lh5_in (and also the clk time) for input_par in input_par_list: buf_in = lh5_in.get(input_par) if buf_in is None: print("I don't know what to do with " + input_par + ". Building output without it!") elif isinstance(buf_in, lh5.Array): proc_chain.add_input_buffer(input_par, buf_in.nda) elif isinstance(buf_in, lh5.Table): # check if this is waveform if 't0' and 'dt' and 'values' in buf_in: proc_chain.add_input_buffer(input_par, buf_in['values'].nda, 'float32') clk = buf_in['dt'].nda[0] * unit_parser.parse_unit(lh5_in['waveform']['dt'].attrs['units']) if proc_chain._clk is not None and proc_chain._clk != clk: print("Somehow you managed to set multiple clock frequencies...Using " + str(proc_chain._clk)) else: proc_chain._clk = clk # now add the processors for proc_par in proc_par_list: recipe = processors[proc_par] module = importlib.import_module(recipe['module']) func = getattr(module, recipe['function']) args = recipe['args'] for i, arg in enumerate(args): if isinstance(arg, str) and arg[0:3]=='db.': lookup_path = arg[3:].split('.') try: node = db_dict for key in lookup_path: node = node[key] args[i] = node if(verbosity>0): print("Database lookup: found", node, "for", arg) except (KeyError, TypeError): try: args[i] = recipe['defaults'][arg] if(verbosity>0): print("Database lookup: using default value of", args[i], "for", arg) except (KeyError, TypeError): raise ProcessingChainError('Did not find', arg, 'in database, and could not find default value.') kwargs = recipe.get('kwargs', {}) # might also need db lookup here # if init_args are defined, parse any strings and then call func # as a factory/constructor function try: init_args = recipe['init_args'] for i, arg in enumerate(init_args): if isinstance(arg, str) and arg[0:3]=='db.': lookup_path = arg[3:].split('.') try: node = db_dict for key in lookup_path: node = node[key] init_args[i] = node if(verbosity>0): print("Database lookup: found", node, "for", arg) except (KeyError, TypeError): try: init_args[i] = recipe['defaults'][arg] if(verbosity>0): print("Database lookup: using default value of", init_args[i], "for", arg) except (KeyError, TypeError): raise ProcessingChainError('Did not find', arg, 'in database, and could not find default value.') arg = init_args[i] # see if string can be parsed by proc_chain if isinstance(arg, str): init_args[i] = proc_chain.get_variable(arg) if(verbosity>1): print("Building function", func.__name__, "from init_args", init_args) func = func(*init_args) except KeyError: pass proc_chain.add_processor(func, *args, **kwargs) # build the output buffers lh5_out = lh5.Table(size=proc_chain._buffer_len) # add inputs that are directly copied for copy_par in copy_par_list: buf_in = lh5_in.get(copy_par) if isinstance(buf_in, lh5.Array): lh5_out.add_field(copy_par, buf_in) elif isinstance(buf_in, lh5.Table): # check if this is waveform if 't0' and 'dt' and 'values' in buf_in: lh5_out.add_field(copy_par, buf_in['values']) clk = buf_in['dt'].nda[0] * unit_parser.parse_unit(lh5_in['waveform']['dt'].attrs['units']) if proc_chain._clk is not None and proc_chain._clk != clk: print("Somehow you managed to set multiple clock frequencies...Using " + str(proc_chain._clk)) else: proc_chain._clk = clk else: print("I don't know what to do with " + input_par + ". Building output without it!") # finally, add the output buffers to lh5_out and the proc chain for out_par in out_par_list: recipe = processors[out_par] # special case for proc with multiple outputs if isinstance(recipe, str): i = [k for k in re.split(",| ", recipe) if k!=''].index(out_par) recipe = processors[recipe] unit = recipe['unit'][i] else: unit = recipe['unit'] try: scale = convert(1, unit_parser.parse_unit(unit), clk) except InvalidConversion: scale = None buf_out = proc_chain.get_output_buffer(out_par, unit=scale) lh5_out.add_field(out_par, lh5.Array(buf_out, attrs={"units":unit}) ) field_mask = input_par_list + copy_par_list return (proc_chain, field_mask, lh5_out)