def main(): """ Clone of pygama/apps/raw_to_dsp.py. Intended for quick prototyping of dsp_to_hit processors. Heavy lifting with many input/output files should be moved to a more specialized processing app, with raw_to_dsp and dsp_to_hit both moved to functions in pygama.io. """ parser = argparse.ArgumentParser( description= """Process a 'pygama DSP LH5' file and produce a 'pygama HIT LH5' file.""" ) parser.add_argument('file', help="Input (dsp) LH5 file.") parser.add_argument( '-o', '--output', help= "Name of output file. By default, output to ./t2_[input file name].") parser.add_argument( '-g', '--group', default='', help= "Name of group in LH5 file. By default process all base groups. Supports wildcards." ) args = parser.parse_args() # import h5py # f = h5py.File('/Users/wisecg/Data/LPGTA/raw/geds/cal/LPGTA_r0018_20200302T184433Z_cal_geds_raw.lh5') # # print(f['g024/raw'].keys()) # # ['baseline', 'channel', 'energy', 'ievt', 'numtraces', 'packet_id', \ # # 'timestamp', 'tracelist', 'waveform', 'wf_max', 'wf_std'] # def print_attrs(name, obj): # print(name) # for key, val in obj.attrs.items(): # print(" attr: %s val: %s" % (key, val)) # # f = h5py.File(f,'r') # f.visititems(print_attrs) # exit() lh5_in = lh5.Store() groups = lh5_in.ls(args.file, args.group) out = args.output if args.output is not None else './d2h_test.lh5' print('output file:', out) for group in groups[:1]: print(group) print("Processing: " + args.file + '/' + group) #data = lh5_in.read_object(args.group, args.file, 0, args.chunk) data = lh5_in.read_object(group + '/raw', args.file) # print(type(data))#, data.keys()) # print(data.keys()) wf_in = data['waveform']['values'].nda dt = data['waveform']['dt'].nda[0] * unit_parser.parse_unit( data['waveform']['dt'].attrs['units']) # print(wf_in.shape) ene_in = data['energy'].nda # print(ene_in.shape) # print(ene_in.dtype) # exit() n_block = 8 verbose = 1 proc = ProcessingChain(block_width=n_block, clock_unit=dt, verbosity=verbose) # proc.add_input_buffer("wf", wf_in, dtype='float32') proc.add_input_buffer("ene_in", ene_in, dtype='uint16') proc.add_processor(energy_cal, "ene_in")
if out is None: out = 't2_' + args.file[args.file.rfind('/') + 1:].replace('t1_', '') for group in groups: print("Processing: " + args.file + '/' + group) #data = lh5_in.read_object(args.group, args.file, 0, args.chunk) data = lh5_in.read_object(group, args.file) wf_in = data['waveform']['values'].nda chan_in = data['channel'].nda dt = data['waveform']['dt'].nda[0] * unit_parser.parse_unit( data['waveform']['dt'].attrs['units']) # Set up processing chain proc = ProcessingChain(block_width=args.block, clock_unit=dt, verbosity=args.verbose) proc.add_input_buffer("wf", wf_in, dtype='float32') proc.add_input_buffer("chan", chan_in) # Get tail constants from a hard-coded dict. This is a temporary stop-gap # until we have analysis parameters databasing. This is also a test of the # param_lookup processor. This would become a necessary processor # for mixed-channel processing! chan2PZ = { 24: 242.4, 25: 138.9, 26: 345.6, 27: 142.6, 28: 130.3,
def raw_to_dsp(ds, overwrite=False, nevt=None, test=False, verbose=2, block=8, group=''): """ Run raw_to_dsp on a set of runs. [raw file] ---> [dsp_run{}.lh5] (digital signal processing results) """ for run in ds.runs: raw_file = "/lfs/l1/legend/users/dandrea/pygama/pgt/tier1/pgt_longtrace_run0117-20200110-105115-calib_raw.lh5" dsp_file = "/lfs/l1/legend/users/dandrea/pygama/pgt/tier2/pgt_longtrace_run0117-20200110-105115-calib_dsp.lh5" #raw_file = ds.paths[run]["raw_path"] #dsp_file = ds.paths[run]["dsp_path"] print("raw_file: ",raw_file) print("dsp_file: ",dsp_file) if dsp_file is not None and overwrite is False: continue if dsp_file is None: # declare new file name dsp_file = raw_file.replace('raw_', 'dsp_') if test: print("test mode (dry run), processing raw file:", raw_file) continue print("Definition of new LH5 version") #f_lh5 = lh5.Store() #data = f_lh5.read_object("raw", raw_file) #wf_in = data['waveform']['values'].nda #dt = data['waveform']['dt'].nda[0] * unit_parser.parse_unit(data['waveform']['dt'].attrs['units']) lh5_in = lh5.Store() #groups = lh5_in.ls(raw_file, group) f = h5py.File(raw_file,'r') print("File info: ",f.keys()) for group in f.keys(): print("Processing: " + raw_file + '/' + group) #data = lh5_in.read_object(group, raw_file) data = f[group]['raw'] #wf_in = data['waveform']['values'].nda #dt = data['waveform']['dt'].nda[0] * unit_parser.parse_unit(data['waveform']['dt'].attrs['units']) wf_in = data['waveform']['values'][()] dt = data['waveform']['dt'][0] * unit_parser.parse_unit(data['waveform']['dt'].attrs['units']) # Parameters for DCR calculation dcr_trap_int = 200 dcr_trap_flat = 1000 dcr_trap_startSample = 1200 # Set up processing chain proc = ProcessingChain(block_width=block, clock_unit=dt, verbosity=verbose) proc.add_input_buffer("wf", wf_in, dtype='float32') # Basic Filters proc.add_processor(mean_stdev, "wf[0:1000]", "bl", "bl_sig") proc.add_processor(np.subtract, "wf", "bl", "wf_blsub") proc.add_processor(pole_zero, "wf_blsub", 145*us, "wf_pz") proc.add_processor(trap_norm, "wf_pz", 10*us, 5*us, "wf_trap") proc.add_processor(asymTrapFilter, "wf_pz", 0.05*us, 2*us, 4*us, "wf_atrap") # Timepoint calculation proc.add_processor(np.argmax, "wf_blsub", 1, "t_max", signature='(n),()->()', types=['fi->i']) proc.add_processor(time_point_frac, "wf_blsub", 0.95, "t_max", "tp_95") proc.add_processor(time_point_frac, "wf_blsub", 0.8, "t_max", "tp_80") proc.add_processor(time_point_frac, "wf_blsub", 0.5, "t_max", "tp_50") proc.add_processor(time_point_frac, "wf_blsub", 0.2, "t_max", "tp_20") proc.add_processor(time_point_frac, "wf_blsub", 0.05, "t_max", "tp_05") proc.add_processor(time_point_thresh, "wf_atrap[0:2000]", 0, "tp_0") # Energy calculation proc.add_processor(np.amax, "wf_trap", 1, "trapEmax", signature='(n),()->()', types=['fi->f']) proc.add_processor(fixed_time_pickoff, "wf_trap", "tp_0+(5*us+9*us)", "trapEftp") proc.add_processor(trap_pickoff, "wf_pz", 1.5*us, 0, "tp_0", "ct_corr") # Current calculation proc.add_processor(avg_current, "wf_pz", 10, "curr(len(wf_pz)-10, f)") proc.add_processor(np.amax, "curr", 1, "curr_amp", signature='(n),()->()', types=['fi->f']) proc.add_processor(np.divide, "curr_amp", "trapEftp", "aoe") # DCR calculation: use slope using 1000 samples apart and averaging 200 # samples, with the start 1.5 us offset from t0 proc.add_processor(trap_pickoff, "wf_pz", 200, 1000, "tp_0+1.5*us", "dcr_unnorm") proc.add_processor(np.divide, "dcr_unnorm", "trapEftp", "dcr") # Tail slope. Basically the same as DCR, except with no PZ correction proc.add_processor(linear_fit, "wf_blsub[3000:]", "wf_b", "wf_m") proc.add_processor(np.divide, "-wf_b", "wf_m", "tail_rc") #add zac filter energy calculation sigma = 10*us flat = 1*us decay = 160*us proc.add_processor(zac_filter, "wf", sigma, flat, decay, "wf_zac(101, f)") proc.add_processor(np.amax, "wf_zac", 1, "zacE", signature='(n),()->()', types=['fi->f']) # Set up the LH5 output lh5_out = lh5.Table(size=proc._buffer_len) lh5_out.add_field("zacE", lh5.Array(proc.get_output_buffer("zacE"), attrs={"units":"ADC"})) lh5_out.add_field("trapEmax", lh5.Array(proc.get_output_buffer("trapEmax"), attrs={"units":"ADC"})) lh5_out.add_field("trapEftp", lh5.Array(proc.get_output_buffer("trapEftp"), attrs={"units":"ADC"})) lh5_out.add_field("ct_corr", lh5.Array(proc.get_output_buffer("ct_corr"), attrs={"units":"ADC*ns"})) lh5_out.add_field("bl", lh5.Array(proc.get_output_buffer("bl"), attrs={"units":"ADC"})) lh5_out.add_field("bl_sig", lh5.Array(proc.get_output_buffer("bl_sig"), attrs={"units":"ADC"})) lh5_out.add_field("A", lh5.Array(proc.get_output_buffer("curr_amp"), attrs={"units":"ADC"})) lh5_out.add_field("AoE", lh5.Array(proc.get_output_buffer("aoe"), attrs={"units":"ADC"})) lh5_out.add_field("dcr", lh5.Array(proc.get_output_buffer("dcr"), attrs={"units":"ADC"})) lh5_out.add_field("tp_max", lh5.Array(proc.get_output_buffer("tp_95", unit=us), attrs={"units":"us"})) lh5_out.add_field("tp_95", lh5.Array(proc.get_output_buffer("tp_95", unit=us), attrs={"units":"us"})) lh5_out.add_field("tp_80", lh5.Array(proc.get_output_buffer("tp_80", unit=us), attrs={"units":"us"})) lh5_out.add_field("tp_50", lh5.Array(proc.get_output_buffer("tp_50", unit=us), attrs={"units":"us"})) lh5_out.add_field("tp_20", lh5.Array(proc.get_output_buffer("tp_20", unit=us), attrs={"units":"us"})) lh5_out.add_field("tp_05", lh5.Array(proc.get_output_buffer("tp_05", unit=us), attrs={"units":"us"})) lh5_out.add_field("tp_0", lh5.Array(proc.get_output_buffer("tp_0", unit=us), attrs={"units":"us"})) lh5_out.add_field("tail_rc", lh5.Array(proc.get_output_buffer("tail_rc", unit=us), attrs={"units":"us"})) print("Processing:\n",proc) proc.execute() #groupname = group[:group.rfind('/')+1]+"data" groupname = group+"/data" print("Writing to: " + dsp_file + "/" + groupname) lh5_in.write_object(lh5_out, groupname, dsp_file)
def process_ds(f_grid, f_opt, f_tier1, d_out, efilter): """ process the windowed raw file 'f_tier1' and create the DSP file 'f_opt' """ print("Grid file:",f_grid) df_grid = pd.read_hdf(f_grid) if os.path.exists(f_opt): os.remove(f_opt) if 'corr' in efilter: bfilter = efilter.split('corr')[0] try: df_res = pd.read_hdf(f'{d_out}/{bfilter}_results.h5',key='results') print("Extraction of best parameters for", bfilter) except: print(bfilter,"not optimized") return # open raw file lh5_in = lh5.Store() #groups = lh5_in.ls(f_tier1, '*/raw') f = h5py.File(f_tier1,'r') #print("File info: ",f.keys()) t_start = time.time() #for group in groups: for idx, ged in enumerate(f.keys()): if idx == 4: diff = time.time() - t_start tot = diff/5 * len(df_grid) / 60 tot -= diff / 60 print(f"Estimated remaining time: {tot:.2f} mins") print("Detector:",ged) #data = lh5_in.read_object(group, f_tier1) data = f[ged]['raw'] #wf_in = data['waveform']['values'].nda #dt = data['waveform']['dt'].nda[0] * unit_parser.parse_unit(data['waveform']['dt'].attrs['units']) wf_in = data['waveform']['values'][()] dt = data['waveform']['dt'][0] * unit_parser.parse_unit(data['waveform']['dt'].attrs['units']) bl_in = data['baseline'][()] #flashcam baseline values # Set up DSP processing chain -- very minimal block = 8 #waveforms to process simultaneously proc = ProcessingChain(block_width=block, clock_unit=dt, verbosity=False) proc.add_input_buffer("wf", wf_in, dtype='float32') proc.add_input_buffer("bl", bl_in, dtype='float32') wsize = wf_in.shape[1] dt0 = data['waveform']['dt'][0]*0.001 #proc.add_processor(mean_stdev, "wf[0:1000]", "bl", "bl_sig") proc.add_processor(np.subtract, "wf", "bl", "wf_blsub") for i, row in df_grid.iterrows(): if 'corr' in efilter: ct_const = row if 'trapE' in efilter: if 'corr' in efilter: rise, flat, rc = float(df_res['rise'][idx]), float(df_res['flat'][idx]), float(df_res['rc'][idx]) else: rise, flat, rc = row proc.add_processor(pole_zero, "wf_blsub", rc*us, "wf_pz") proc.add_processor(trap_norm, "wf_pz", rise*us, flat*us, f"wf_trap_{i}") proc.add_processor(asymTrapFilter, "wf_pz", 0.05*us, 4*us, 4*us, "wf_atrap") proc.add_processor(time_point_thresh, "wf_pz", 0, "tp_0") proc.add_processor(np.amax, f"wf_trap_{i}", 1, f"trapE_{i}", signature='(n),()->()', types=['fi->f']) proc.add_processor(fixed_time_pickoff, f"wf_trap_{i}", f"tp_0+({rise*us}+{flat*us})", f"trapEftp_{i}") if 'zacE' in efilter: if 'corr' in efilter: sigma, flat, decay = float(df_res['sigma'][idx]), float(df_res['flat'][idx]), float(df_res['decay'][idx]) else: sigma, flat, decay = row proc.add_processor(zac_filter(wsize, sigma/dt0, flat/dt0, decay/dt0),"wf", f"wf_zac_{i}(101, f)") proc.add_processor(np.amax, f"wf_zac_{i}", 1, f"zacE_{i}", signature='(n),()->()', types=['fi->f']) if 'cuspE' in efilter: if 'corr' in efilter: sigma, flat, decay = float(df_res['sigma'][idx]), float(df_res['flat'][idx]), float(df_res['decay'][idx]) else: sigma, flat, decay = row proc.add_processor(cusp_filter(wsize, sigma/dt0, flat/dt0, decay/dt0),"wf_blsub", f"wf_cusp_{i}(101, f)") proc.add_processor(np.amax, f"wf_cusp_{i}", 1, f"cuspE_{i}", signature='(n),()->()', types=['fi->f']) if 'corr' in efilter: proc.add_processor(trap_pickoff, "wf_pz", 1.5*us, 0, "tp_0", "ct_corr") #proc.add_processor(trap_pickoff, "wf_pz", rise*us, flat*us, "tp_0", "ct_corr") proc.add_processor(np.multiply, ct_const, "ct_corr", f"ct_corr_cal_{i}") proc.add_processor(np.add, f"ct_corr_cal_{i}", f"{bfilter}_{i}", f"{efilter}_{i}") # Set up the LH5 output lh5_out = lh5.Table(size=proc._buffer_len) for i, row in df_grid.iterrows(): lh5_out.add_field(f"{efilter}_{i}", lh5.Array(proc.get_output_buffer(f"{efilter}_{i}"), attrs={"units":"ADC"})) print("Processing:\n",proc) proc.execute() #groupname = group[:group.rfind('/')+1]+"data" #groupname = df_key+"/"+group+"/data" groupname = ged+"/data" print("Writing to: " + f_opt + "/" + groupname) lh5_in.write_object(lh5_out, groupname, f_opt) print("") #list the datasets of the output file data_opt = lh5_in.ls(f_opt) #data_opt_0 = lh5_in.ls(f_opt,'opt_0/*') data_opt_0 = lh5_in.ls(f_opt,'g024/data/*') diff = time.time() - t_start print(f"Time to process: {diff:.2f} s")
def raw_to_dsp(ds, overwrite=False, nevt=None, test=False, verbose=2, block=8, group='daqdata'): """ Run raw_to_dsp on a set of runs. [raw file] ---> [dsp_run{}.lh5] (digital signal processing results) """ for run in ds.runs: raw_file = ds.paths[run]["raw_path"] dsp_file = ds.paths[run]["dsp_path"] if dsp_file is not None and overwrite is False: continue if dsp_file is None: # declare new file name dsp_file = raw_file.replace('raw', 'dsp') if test: print("test mode (dry run), processing raw file:", raw_file) continue # new LH5 version lh5_in = lh5.Store() data = lh5_in.read_object("/ORSIS3302DecoderForEnergy", raw_file) wf_in = data['waveform']['values'].nda dt = data['waveform']['dt'].nda[0] * unit_parser.parse_unit( data['waveform']['dt'].attrs['units']) # Parameters for DCR calculation dcr_trap_int = 200 dcr_trap_flat = 1000 dcr_trap_startSample = 1200 # Set up processing chain proc = ProcessingChain(block_width=block, clock_unit=dt, verbosity=verbose) proc.add_input_buffer("wf", wf_in, dtype='float32') proc.add_processor(mean_stdev, "wf[0:1000]", "bl", "bl_sig") proc.add_processor(np.subtract, "wf", "bl", "wf_blsub") proc.add_processor(pole_zero, "wf_blsub", 70 * us, "wf_pz") proc.add_processor(asymTrapFilter, "wf_pz", 10 * us, 5 * us, 10 * us, "wf_atrap") proc.add_processor(np.amax, "wf_atrap", 1, "atrapE", signature='(n),()->()', types=['fi->f']) # proc.add_processor(np.divide, "atrapmax", 10*us, "atrapE") proc.add_processor(trap_norm, "wf_pz", 10 * us, 5 * us, "wf_trap") proc.add_processor(np.amax, "wf_trap", 1, "trapE", signature='(n),()->()', types=['fi->f']) proc.add_processor(avg_current, "wf_pz", 10, "curr") proc.add_processor(np.amax, "curr", 1, "A_10", signature='(n),()->()', types=['fi->f']) proc.add_processor(np.divide, "A_10", "trapE", "AoE") proc.add_processor(trap_pickoff, "wf_pz", dcr_trap_int, dcr_trap_flat, dcr_trap_startSample, "dcr") # Set up the LH5 output lh5_out = lh5.Table(size=proc._buffer_len) lh5_out.add_field( "trapE", lh5.Array(proc.get_output_buffer("trapE"), attrs={"units": "ADC"})) lh5_out.add_field( "bl", lh5.Array(proc.get_output_buffer("bl"), attrs={"units": "ADC"})) lh5_out.add_field( "bl_sig", lh5.Array(proc.get_output_buffer("bl_sig"), attrs={"units": "ADC"})) lh5_out.add_field( "A", lh5.Array(proc.get_output_buffer("A_10"), attrs={"units": "ADC"})) lh5_out.add_field( "AoE", lh5.Array(proc.get_output_buffer("AoE"), attrs={"units": "ADC"})) lh5_out.add_field( "dcr", lh5.Array(proc.get_output_buffer("dcr"), attrs={"units": "ADC"})) print("Processing:\n", proc) proc.execute() print("Writing to: ", dsp_file) f_lh5.write_object(lh5_out, "data", dsp_file)
for chan_name in chans: group = chan_name + '/raw' print("Processing: " + args.file + '/' + group) tot_n_rows = lh5_st.read_n_rows(group, args.file) if args.nentries is not None: tot_n_rows = min(tot_n_rows, args.nentries) lh5_in, n_rows_read = lh5_st.read_object(group, args.file, 0, args.chunk) wf_in = lh5_in['waveform']['values'].nda dt = lh5_in['waveform']['dt'].nda[0] * unit_parser.parse_unit( lh5_in['waveform']['dt'].attrs['units']) # Set up processing chain proc = ProcessingChain(block_width=args.block, clock_unit=dt, verbosity=args.verbose) proc.add_input_buffer("wf", wf_in, dtype='float32') # measure baseline, then window and baseline subtract proc.add_processor(mean_stdev, "wf[{}]".format(args.BL_samples), "bl", "bl_sig") proc.add_processor(np.subtract, "wf[{}]".format(args.Tail_samples), "bl", "wf_blsub") # RC constant. Linear fit of log of falling tail. proc.add_processor(np.log, "wf_blsub", "tail_log") proc.add_processor(linear_fit, "tail_log", "tail_b", "tail_m") proc.add_processor(np.divide, -1, "tail_m", "tail_rc") # Get tail_rc output buffer
def build_processing_chain(lh5_in, dsp_config, db_dict=None, outputs=None, verbosity=1, block_width=16): """ Produces a ProcessingChain object and an lh5 table for output parameters from an input lh5 table and a json recipe. Returns (proc_chain, lh5_out): - proc_chain: ProcessingChain object that is bound to lh5_in and lh5_out; all you need to do is handle file i/o for lh5_in/out and run execute - lh5_out: output LH5 table Required arguments: - lh5_in: input LH5 table - config: dict or name of json file containing a recipe for constructing the ProcessingChain object produced by this function. config is formated as a json dict with different processors. Config should have a dictionary called processors, containing dictionaries of the following format: Key: parameter name: name of parameter produced by the processor. can optionally provide multiple, separated by spaces Values: processor (req): name of gufunc module (req): name of module in which to find processor prereqs (req): name of parameters from other processors and from input that are required to exist to run this args (req): list of arguments for processor, with variables passed by name or value. Names should either be inputs from lh5_in, or parameter names for other processors. Names of the format db.name will look up the parameter in the metadata. kwargs (opt): kwargs used when adding processors to proc_chain init_args (opt): args used when initializing a processor that has static data (for factory functions) default (opt): default value for db parameters if not found unit (opt): unit to be used for attr in lh5 file. There may also be a list called 'outputs', containing a list of parameters to put into lh5_out. Optional keyword arguments: - outputs: list of parameters to put in the output lh5 table. If None, use the parameters in the 'outputs' list from config - db_dict: a nested dict pointing to values for db args. e.g. if a processor uses arg db.trap.risetime, it will look up db_dict['trap']['risetime'] and use the found value. If no value is found, use the default defined in the config file. - verbosity: verbosity level: 0: Print nothing (except errors...) 1: Print basic warnings (default) 2: Print basic debug info 3: Print friggin' everything! - block_width: number of entries to process at once. """ if isinstance(dsp_config, str): with open(dsp_config) as f: dsp_config = json.load(f) elif dsp_config is None: dsp_config = {'outputs': [], 'processors': {}} else: # We don't want to modify the input! dsp_config = deepcopy(dsp_config) if outputs is None: outputs = dsp_config['outputs'] processors = dsp_config['processors'] # for processors with multiple outputs, add separate entries to the processor list for key in list(processors): keys = [k for k in re.split(",| ", key) if k != ''] if len(keys) > 1: for k in keys: processors[k] = key # Recursive function to crawl through the parameters/processors and get # a sequence of unique parameters such that parameters always appear after # their dependencies. For parameters that are not produced by the ProcChain # (i.e. input/db parameters), add them to the list of leafs # https://www.electricmonk.nl/docs/dependency_resolving_algorithm/dependency_resolving_algorithm.html def resolve_dependencies(par, resolved, leafs, unresolved=[]): if par in resolved: return elif par in unresolved: raise Exception('Circular references detected: %s -> %s' % (par, edge)) # if we don't find a node, this is a leaf node = processors.get(par) if node is None: if par not in leafs: leafs.append(par) return # if it's a string, that means it is part of a processor that returns multiple outputs (see above); in that case, node is a str pointing to the actual node we want if isinstance(node, str): resolve_dependencies(node, resolved, leafs, unresolved) return edges = node['prereqs'] unresolved.append(par) for edge in edges: resolve_dependencies(edge, resolved, leafs, unresolved) resolved.append(par) unresolved.remove(par) proc_par_list = [] # calculated from processors input_par_list = [] # input from file and used for processors copy_par_list = [] # copied from input to output out_par_list = [] for out_par in outputs: if out_par not in processors: copy_par_list.append(out_par) else: resolve_dependencies(out_par, proc_par_list, input_par_list) out_par_list.append(out_par) if verbosity > 0: print('Processing parameters:', str(proc_par_list)) print('Required input parameters:', str(input_par_list)) print('Copied output parameters:', str(copy_par_list)) print('Processed output parameters:', str(out_par_list)) proc_chain = ProcessingChain(block_width, lh5_in.size, verbosity=verbosity) # Now add all of the input buffers from lh5_in (and also the clk time) for input_par in input_par_list: buf_in = lh5_in.get(input_par) if buf_in is None: print("I don't know what to do with " + input_par + ". Building output without it!") elif isinstance(buf_in, lh5.Array): proc_chain.add_input_buffer(input_par, buf_in.nda) elif isinstance(buf_in, lh5.Table): # check if this is waveform if 't0' and 'dt' and 'values' in buf_in: proc_chain.add_input_buffer(input_par, buf_in['values'].nda, 'float32') clk = buf_in['dt'].nda[0] * unit_parser.parse_unit( lh5_in['waveform']['dt'].attrs['units']) if proc_chain._clk is not None and proc_chain._clk != clk: print( "Somehow you managed to set multiple clock frequencies...Using " + str(proc_chain._clk)) else: proc_chain._clk = clk # now add the processors for proc_par in proc_par_list: recipe = processors[proc_par] module = importlib.import_module(recipe['module']) func = getattr(module, recipe['function']) args = recipe['args'] for i, arg in enumerate(args): if isinstance(arg, str) and arg[0:3] == 'db.': lookup_path = arg[3:].split('.') try: node = db_dict for key in lookup_path: node = node[key] args[i] = node if (verbosity > 0): print("Database lookup: found", node, "for", arg) except: try: args[i] = recipe['defaults'][arg] if (verbosity > 0): print("Database lookup: using default value of", args[i], "for", arg) except: raise Exception( 'Did not find', arg, 'in database, and could not find default value.') kwargs = recipe.get('kwargs', {}) # might also need db lookup here # if init_args are defined, parse any strings and then call func # as a factory/constructor function try: init_args = recipe['init_args'] for i, arg in enumerate(init_args): if isinstance(arg, str) and arg[0:3] == 'db.': lookup_path = arg[3:].split('.') try: node = db_dict for key in lookup_path: node = node[key] init_args[i] = node if (verbosity > 0): print("Database lookup: found", node, "for", arg) except: try: init_args[i] = recipe['defaults'][arg] if (verbosity > 0): print( "Database lookup: using default value of", init_args[i], "for", arg) except: raise Exception( 'Did not find', arg, 'in database, and could not find default value.' ) arg = init_args[i] # see if string can be parsed by proc_chain if isinstance(arg, str): try: init_args[i] = proc_chain.get_variable(arg) except: pass if (verbosity > 1): print("Building function", func.__name__, "from init_args", init_args) func = func(*init_args) except: pass proc_chain.add_processor(func, *args, **kwargs) # build the output buffers lh5_out = lh5.Table(size=proc_chain._buffer_len) # add inputs that are directly copied for copy_par in copy_par_list: buf_in = lh5_in.get(copy_par) if isinstance(buf_in, lh5.Array): lh5_out.add_field(copy_par, buf_in) elif isinstance(buf_in, lh5.Table): # check if this is waveform if 't0' and 'dt' and 'values' in buf_in: lh5_out.add_field(copy_par, buf_in['values']) clk = buf_in['dt'].nda[0] * unit_parser.parse_unit( lh5_in['waveform']['dt'].attrs['units']) if proc_chain._clk is not None and proc_chain._clk != clk: print( "Somehow you managed to set multiple clock frequencies...Using " + str(proc_chain._clk)) else: proc_chain._clk = clk else: print("I don't know what to do with " + input_par + ". Building output without it!") # finally, add the output buffers to lh5_out and the proc chain for out_par in out_par_list: recipe = processors[out_par] # special case for proc with multiple outputs if isinstance(recipe, str): i = [k for k in re.split(",| ", recipe) if k != ''].index(out_par) recipe = processors[recipe] unit = recipe['unit'][i] else: unit = recipe['unit'] try: scale = convert(1, unit_parser.parse_unit(unit), clk) except InvalidConversion: scale = None buf_out = proc_chain.get_output_buffer(out_par, unit=scale) lh5_out.add_field(out_par, lh5.Array(buf_out, attrs={"units": unit})) return (proc_chain, lh5_out)
def build_processing_chain(lh5_in, dsp_config, db_dict = None, outputs = None, verbosity=1, block_width=16): """ Produces a ProcessingChain object and an lh5 table for output parameters from an input lh5 table and a json recipe. Parameters ---------- lh5_in : lgdo.Table HDF5 table from which raw data is read. At least one row of entries should be read in prior to calling this! dsp_config: dict or str A dict or json filename containing the recipes for computing DSP parameter from raw parameters. The format is as follows: { "outputs" : [ "parnames", ... ] -> list of output parameters to compute by default; see outputs parameter. "processors" : { "name1, ..." : { -> names of parameters computed "function" : str -> name of function to call. Function should implement the gufunc interface, a factory function returning a gufunc, or an arbitrary function that can be mapped onto a gufunc "module" : str -> name of module containing function "args" : [ str or numeric, ... ] -> list of names of computed and input parameters or constant values used as inputs to function. Note that outputs should be fed by reference as args! Arguments read from the database are prepended with db. "kwargs" : dict -> keyword arguments for ProcesssingChain.add_processor. "init_args" : [ str or numeric, ... ] -> list of names of computed and input parameters or constant values used to initialize a gufunc via a factory function "unit" : str or [ strs, ... ] -> units for parameters "defaults" : dict -> default value to be used for arguments read from the database "prereqs" : DEPRECATED [ strs, ...] -> list of parameters that must be computed before these can } outputs: [str, ...] (optional) List of parameters to put in the output lh5 table. If None, use the parameters in the 'outputs' list from config db_dict: dict (optional) A nested dict pointing to values for db args. e.g. if a processor uses arg db.trap.risetime, it will look up db_dict['trap']['risetime'] and use the found value. If no value is found, use the default defined in the config file. verbosity : int (optional) 0: Print nothing (except errors...) 1: Print basic warnings (default) 2: Print basic debug info 3: Print friggin' everything! block_width : int (optional) number of entries to process at once. To optimize performance, a multiple of 16 is preferred, but if performance is not an issue any value can be used. Returns ------- (proc_chain, field_mask, lh5_out) : tuple proc_chain : ProcessingChain object that is executed field_mask : List of input fields that are used lh5_out : output lh5 table containing processed values """ proc_chain = ProcessingChain(block_width, lh5_in.size, verbosity = verbosity) if isinstance(dsp_config, str): with open(dsp_config) as f: dsp_config = json.load(f) elif dsp_config is None: dsp_config = {'outputs':[], 'processors':{}} else: # We don't want to modify the input! dsp_config = deepcopy(dsp_config) if outputs is None: outputs = dsp_config['outputs'] processors = dsp_config['processors'] # prepare the processor list multi_out_procs = {} for key, node in processors.items(): # if we have multiple outputs, add each to the processesors list keys = [k for k in re.split(",| ", key) if k!=''] if len(keys)>1: for k in keys: multi_out_procs[k] = key # parse the arguments list for prereqs, if not included explicitly if not 'prereqs' in node: prereqs = [] for arg in node['args']: if not isinstance(arg, str): continue for prereq in proc_chain.get_variable(arg, True): if prereq not in prereqs and prereq not in keys and prereq != 'db': prereqs.append(prereq) node['prereqs'] = prereqs if verbosity>=2: print("Prereqs for", key, "are", node['prereqs']) processors.update(multi_out_procs) # Recursive function to crawl through the parameters/processors and get # a sequence of unique parameters such that parameters always appear after # their dependencies. For parameters that are not produced by the ProcChain # (i.e. input/db parameters), add them to the list of leafs # https://www.electricmonk.nl/docs/dependency_resolving_algorithm/dependency_resolving_algorithm.html def resolve_dependencies(par, resolved, leafs, unresolved=[]): if par in resolved: return elif par in unresolved: raise ProcessingChainError('Circular references detected: %s -> %s' % (par, edge)) # if we don't find a node, this is a leaf node = processors.get(par) if node is None: if par not in leafs: leafs.append(par) return # if it's a string, that means it is part of a processor that returns multiple outputs (see above); in that case, node is a str pointing to the actual node we want if isinstance(node, str): resolve_dependencies(node, resolved, leafs, unresolved) return edges = node['prereqs'] unresolved.append(par) for edge in edges: resolve_dependencies(edge, resolved, leafs, unresolved) resolved.append(par) unresolved.remove(par) proc_par_list = [] # calculated from processors input_par_list = [] # input from file and used for processors copy_par_list = [] # copied from input to output out_par_list = [] for out_par in outputs: if out_par not in processors: copy_par_list.append(out_par) else: resolve_dependencies(out_par, proc_par_list, input_par_list) out_par_list.append(out_par) if verbosity>0: print('Processing parameters:', str(proc_par_list)) print('Required input parameters:', str(input_par_list)) print('Copied output parameters:', str(copy_par_list)) print('Processed output parameters:', str(out_par_list)) # Now add all of the input buffers from lh5_in (and also the clk time) for input_par in input_par_list: buf_in = lh5_in.get(input_par) if buf_in is None: print("I don't know what to do with " + input_par + ". Building output without it!") elif isinstance(buf_in, lh5.Array): proc_chain.add_input_buffer(input_par, buf_in.nda) elif isinstance(buf_in, lh5.Table): # check if this is waveform if 't0' and 'dt' and 'values' in buf_in: proc_chain.add_input_buffer(input_par, buf_in['values'].nda, 'float32') clk = buf_in['dt'].nda[0] * unit_parser.parse_unit(lh5_in['waveform']['dt'].attrs['units']) if proc_chain._clk is not None and proc_chain._clk != clk: print("Somehow you managed to set multiple clock frequencies...Using " + str(proc_chain._clk)) else: proc_chain._clk = clk # now add the processors for proc_par in proc_par_list: recipe = processors[proc_par] module = importlib.import_module(recipe['module']) func = getattr(module, recipe['function']) args = recipe['args'] for i, arg in enumerate(args): if isinstance(arg, str) and arg[0:3]=='db.': lookup_path = arg[3:].split('.') try: node = db_dict for key in lookup_path: node = node[key] args[i] = node if(verbosity>0): print("Database lookup: found", node, "for", arg) except (KeyError, TypeError): try: args[i] = recipe['defaults'][arg] if(verbosity>0): print("Database lookup: using default value of", args[i], "for", arg) except (KeyError, TypeError): raise ProcessingChainError('Did not find', arg, 'in database, and could not find default value.') kwargs = recipe.get('kwargs', {}) # might also need db lookup here # if init_args are defined, parse any strings and then call func # as a factory/constructor function try: init_args = recipe['init_args'] for i, arg in enumerate(init_args): if isinstance(arg, str) and arg[0:3]=='db.': lookup_path = arg[3:].split('.') try: node = db_dict for key in lookup_path: node = node[key] init_args[i] = node if(verbosity>0): print("Database lookup: found", node, "for", arg) except (KeyError, TypeError): try: init_args[i] = recipe['defaults'][arg] if(verbosity>0): print("Database lookup: using default value of", init_args[i], "for", arg) except (KeyError, TypeError): raise ProcessingChainError('Did not find', arg, 'in database, and could not find default value.') arg = init_args[i] # see if string can be parsed by proc_chain if isinstance(arg, str): init_args[i] = proc_chain.get_variable(arg) if(verbosity>1): print("Building function", func.__name__, "from init_args", init_args) func = func(*init_args) except KeyError: pass proc_chain.add_processor(func, *args, **kwargs) # build the output buffers lh5_out = lh5.Table(size=proc_chain._buffer_len) # add inputs that are directly copied for copy_par in copy_par_list: buf_in = lh5_in.get(copy_par) if isinstance(buf_in, lh5.Array): lh5_out.add_field(copy_par, buf_in) elif isinstance(buf_in, lh5.Table): # check if this is waveform if 't0' and 'dt' and 'values' in buf_in: lh5_out.add_field(copy_par, buf_in['values']) clk = buf_in['dt'].nda[0] * unit_parser.parse_unit(lh5_in['waveform']['dt'].attrs['units']) if proc_chain._clk is not None and proc_chain._clk != clk: print("Somehow you managed to set multiple clock frequencies...Using " + str(proc_chain._clk)) else: proc_chain._clk = clk else: print("I don't know what to do with " + input_par + ". Building output without it!") # finally, add the output buffers to lh5_out and the proc chain for out_par in out_par_list: recipe = processors[out_par] # special case for proc with multiple outputs if isinstance(recipe, str): i = [k for k in re.split(",| ", recipe) if k!=''].index(out_par) recipe = processors[recipe] unit = recipe['unit'][i] else: unit = recipe['unit'] try: scale = convert(1, unit_parser.parse_unit(unit), clk) except InvalidConversion: scale = None buf_out = proc_chain.get_output_buffer(out_par, unit=scale) lh5_out.add_field(out_par, lh5.Array(buf_out, attrs={"units":unit}) ) field_mask = input_par_list + copy_par_list return (proc_chain, field_mask, lh5_out)