Esempio n. 1
0
def main():
    """
    Clone of pygama/apps/raw_to_dsp.py.  Intended for quick prototyping of dsp_to_hit
    processors.  Heavy lifting with many input/output files should be moved to a
    more specialized processing app, with raw_to_dsp and dsp_to_hit both moved to
    functions in pygama.io.
    """
    parser = argparse.ArgumentParser(
        description=
        """Process a 'pygama DSP LH5' file and produce a 'pygama HIT LH5' file."""
    )
    parser.add_argument('file', help="Input (dsp) LH5 file.")
    parser.add_argument(
        '-o',
        '--output',
        help=
        "Name of output file. By default, output to ./t2_[input file name].")

    parser.add_argument(
        '-g',
        '--group',
        default='',
        help=
        "Name of group in LH5 file. By default process all base groups. Supports wildcards."
    )
    args = parser.parse_args()

    # import h5py
    # f = h5py.File('/Users/wisecg/Data/LPGTA/raw/geds/cal/LPGTA_r0018_20200302T184433Z_cal_geds_raw.lh5')
    # # print(f['g024/raw'].keys())
    # # ['baseline', 'channel', 'energy', 'ievt', 'numtraces', 'packet_id', \
    # #  'timestamp', 'tracelist', 'waveform', 'wf_max', 'wf_std']
    # def print_attrs(name, obj):
    #     print(name)
    #     for key, val in obj.attrs.items():
    #         print("    attr: %s  val: %s" % (key, val))
    # # f = h5py.File(f,'r')
    # f.visititems(print_attrs)

    # exit()

    lh5_in = lh5.Store()
    groups = lh5_in.ls(args.file, args.group)

    out = args.output if args.output is not None else './d2h_test.lh5'
    print('output file:', out)

    for group in groups[:1]:
        print(group)

        print("Processing: " + args.file + '/' + group)

        #data = lh5_in.read_object(args.group, args.file, 0, args.chunk)

        data = lh5_in.read_object(group + '/raw', args.file)

        # print(type(data))#, data.keys())
        # print(data.keys())

        wf_in = data['waveform']['values'].nda
        dt = data['waveform']['dt'].nda[0] * unit_parser.parse_unit(
            data['waveform']['dt'].attrs['units'])
        # print(wf_in.shape)

        ene_in = data['energy'].nda
        # print(ene_in.shape)
        # print(ene_in.dtype)
        # exit()

        n_block = 8
        verbose = 1

        proc = ProcessingChain(block_width=n_block,
                               clock_unit=dt,
                               verbosity=verbose)

        # proc.add_input_buffer("wf", wf_in, dtype='float32')

        proc.add_input_buffer("ene_in", ene_in, dtype='uint16')

        proc.add_processor(energy_cal, "ene_in")
Esempio n. 2
0
if out is None:
    out = 't2_' + args.file[args.file.rfind('/') + 1:].replace('t1_', '')

for group in groups:
    print("Processing: " + args.file + '/' + group)
    #data = lh5_in.read_object(args.group, args.file, 0, args.chunk)
    data = lh5_in.read_object(group, args.file)

    wf_in = data['waveform']['values'].nda
    chan_in = data['channel'].nda
    dt = data['waveform']['dt'].nda[0] * unit_parser.parse_unit(
        data['waveform']['dt'].attrs['units'])

    # Set up processing chain
    proc = ProcessingChain(block_width=args.block,
                           clock_unit=dt,
                           verbosity=args.verbose)

    proc.add_input_buffer("wf", wf_in, dtype='float32')
    proc.add_input_buffer("chan", chan_in)

    # Get tail constants from a hard-coded dict. This is a temporary stop-gap
    # until we have analysis parameters databasing. This is also a test of the
    # param_lookup processor. This would become a necessary processor
    # for mixed-channel processing!
    chan2PZ = {
        24: 242.4,
        25: 138.9,
        26: 345.6,
        27: 142.6,
        28: 130.3,
Esempio n. 3
0
def raw_to_dsp(ds, overwrite=False, nevt=None, test=False, verbose=2, block=8, group=''):
    """
    Run raw_to_dsp on a set of runs.
    [raw file] ---> [dsp_run{}.lh5] (digital signal processing results)
    """
    for run in ds.runs:
        raw_file = "/lfs/l1/legend/users/dandrea/pygama/pgt/tier1/pgt_longtrace_run0117-20200110-105115-calib_raw.lh5"
        dsp_file = "/lfs/l1/legend/users/dandrea/pygama/pgt/tier2/pgt_longtrace_run0117-20200110-105115-calib_dsp.lh5"
        #raw_file = ds.paths[run]["raw_path"]
        #dsp_file = ds.paths[run]["dsp_path"]
        print("raw_file: ",raw_file)
        print("dsp_file: ",dsp_file)
        if dsp_file is not None and overwrite is False:
            continue

        if dsp_file is None:
            # declare new file name
            dsp_file = raw_file.replace('raw_', 'dsp_')
            
        if test:
            print("test mode (dry run), processing raw file:", raw_file)
            continue
            
        print("Definition of new LH5 version")
        #f_lh5 = lh5.Store()
        #data = f_lh5.read_object("raw", raw_file)
        #wf_in = data['waveform']['values'].nda
        #dt = data['waveform']['dt'].nda[0] * unit_parser.parse_unit(data['waveform']['dt'].attrs['units'])
        
        lh5_in = lh5.Store()
        #groups = lh5_in.ls(raw_file, group)
        f = h5py.File(raw_file,'r')
        print("File info: ",f.keys())
        for group in f.keys():
            print("Processing: " + raw_file + '/' + group)
            #data = lh5_in.read_object(group, raw_file)
            data =  f[group]['raw']
            
            #wf_in = data['waveform']['values'].nda
            #dt = data['waveform']['dt'].nda[0] * unit_parser.parse_unit(data['waveform']['dt'].attrs['units'])
            wf_in = data['waveform']['values'][()]
            dt = data['waveform']['dt'][0] * unit_parser.parse_unit(data['waveform']['dt'].attrs['units'])
            
            # Parameters for DCR calculation
            dcr_trap_int = 200
            dcr_trap_flat = 1000
            dcr_trap_startSample = 1200
            
            # Set up processing chain
            proc = ProcessingChain(block_width=block, clock_unit=dt, verbosity=verbose)
            proc.add_input_buffer("wf", wf_in, dtype='float32')
            
            # Basic Filters
            proc.add_processor(mean_stdev, "wf[0:1000]", "bl", "bl_sig")
            proc.add_processor(np.subtract, "wf", "bl", "wf_blsub")
            proc.add_processor(pole_zero, "wf_blsub", 145*us, "wf_pz")
            proc.add_processor(trap_norm, "wf_pz", 10*us, 5*us, "wf_trap")
            proc.add_processor(asymTrapFilter, "wf_pz", 0.05*us, 2*us, 4*us, "wf_atrap")
            
            # Timepoint calculation
            proc.add_processor(np.argmax, "wf_blsub", 1, "t_max", signature='(n),()->()', types=['fi->i'])
            proc.add_processor(time_point_frac, "wf_blsub", 0.95, "t_max", "tp_95")
            proc.add_processor(time_point_frac, "wf_blsub", 0.8, "t_max", "tp_80")
            proc.add_processor(time_point_frac, "wf_blsub", 0.5, "t_max", "tp_50")
            proc.add_processor(time_point_frac, "wf_blsub", 0.2, "t_max", "tp_20")
            proc.add_processor(time_point_frac, "wf_blsub", 0.05, "t_max", "tp_05")
            proc.add_processor(time_point_thresh, "wf_atrap[0:2000]", 0, "tp_0")
            
            # Energy calculation
            proc.add_processor(np.amax, "wf_trap", 1, "trapEmax", signature='(n),()->()', types=['fi->f'])
            proc.add_processor(fixed_time_pickoff, "wf_trap", "tp_0+(5*us+9*us)", "trapEftp")
            proc.add_processor(trap_pickoff, "wf_pz", 1.5*us, 0, "tp_0", "ct_corr")
            
            # Current calculation
            proc.add_processor(avg_current, "wf_pz", 10, "curr(len(wf_pz)-10, f)")
            proc.add_processor(np.amax, "curr", 1, "curr_amp", signature='(n),()->()', types=['fi->f'])
            proc.add_processor(np.divide, "curr_amp", "trapEftp", "aoe")

            # DCR calculation: use slope using 1000 samples apart and averaging 200
            # samples, with the start 1.5 us offset from t0
            proc.add_processor(trap_pickoff, "wf_pz", 200, 1000, "tp_0+1.5*us", "dcr_unnorm")
            proc.add_processor(np.divide, "dcr_unnorm", "trapEftp", "dcr")
            
            # Tail slope. Basically the same as DCR, except with no PZ correction
            proc.add_processor(linear_fit, "wf_blsub[3000:]", "wf_b", "wf_m")
            proc.add_processor(np.divide, "-wf_b", "wf_m", "tail_rc")            
            
            #add zac filter energy calculation
            sigma = 10*us
            flat = 1*us
            decay = 160*us
            proc.add_processor(zac_filter, "wf", sigma, flat, decay, "wf_zac(101, f)")
            proc.add_processor(np.amax, "wf_zac", 1, "zacE", signature='(n),()->()', types=['fi->f'])
            
            # Set up the LH5 output
            lh5_out = lh5.Table(size=proc._buffer_len)
            lh5_out.add_field("zacE", lh5.Array(proc.get_output_buffer("zacE"), attrs={"units":"ADC"}))
            lh5_out.add_field("trapEmax", lh5.Array(proc.get_output_buffer("trapEmax"), attrs={"units":"ADC"}))
            lh5_out.add_field("trapEftp", lh5.Array(proc.get_output_buffer("trapEftp"), attrs={"units":"ADC"}))
            lh5_out.add_field("ct_corr", lh5.Array(proc.get_output_buffer("ct_corr"), attrs={"units":"ADC*ns"}))
            lh5_out.add_field("bl", lh5.Array(proc.get_output_buffer("bl"), attrs={"units":"ADC"}))
            lh5_out.add_field("bl_sig", lh5.Array(proc.get_output_buffer("bl_sig"), attrs={"units":"ADC"}))
            lh5_out.add_field("A", lh5.Array(proc.get_output_buffer("curr_amp"), attrs={"units":"ADC"}))
            lh5_out.add_field("AoE", lh5.Array(proc.get_output_buffer("aoe"), attrs={"units":"ADC"}))
            lh5_out.add_field("dcr", lh5.Array(proc.get_output_buffer("dcr"), attrs={"units":"ADC"}))
            
            lh5_out.add_field("tp_max", lh5.Array(proc.get_output_buffer("tp_95", unit=us), attrs={"units":"us"}))
            lh5_out.add_field("tp_95", lh5.Array(proc.get_output_buffer("tp_95", unit=us), attrs={"units":"us"}))
            lh5_out.add_field("tp_80", lh5.Array(proc.get_output_buffer("tp_80", unit=us), attrs={"units":"us"}))
            lh5_out.add_field("tp_50", lh5.Array(proc.get_output_buffer("tp_50", unit=us), attrs={"units":"us"}))
            lh5_out.add_field("tp_20", lh5.Array(proc.get_output_buffer("tp_20", unit=us), attrs={"units":"us"}))
            lh5_out.add_field("tp_05", lh5.Array(proc.get_output_buffer("tp_05", unit=us), attrs={"units":"us"}))
            lh5_out.add_field("tp_0", lh5.Array(proc.get_output_buffer("tp_0", unit=us), attrs={"units":"us"}))
            lh5_out.add_field("tail_rc", lh5.Array(proc.get_output_buffer("tail_rc", unit=us), attrs={"units":"us"}))
            
            print("Processing:\n",proc)
            proc.execute()
            
            #groupname = group[:group.rfind('/')+1]+"data"
            groupname = group+"/data"
            print("Writing to: " + dsp_file + "/" + groupname)
            lh5_in.write_object(lh5_out, groupname, dsp_file)
Esempio n. 4
0
def process_ds(f_grid, f_opt, f_tier1, d_out, efilter):
    """
    process the windowed raw file 'f_tier1' and create the DSP file 'f_opt'
    """
    print("Grid file:",f_grid)
    df_grid = pd.read_hdf(f_grid)
    
    if os.path.exists(f_opt):
        os.remove(f_opt)

    if 'corr' in efilter:
        bfilter = efilter.split('corr')[0]
        try:
            df_res = pd.read_hdf(f'{d_out}/{bfilter}_results.h5',key='results')
            print("Extraction of best parameters for", bfilter)
        except:
            print(bfilter,"not optimized")
            return
    
    # open raw file
    lh5_in = lh5.Store()
    #groups = lh5_in.ls(f_tier1, '*/raw')
    f = h5py.File(f_tier1,'r')
    #print("File info: ",f.keys())
    
    t_start = time.time()
    #for group in groups:
    for idx, ged in enumerate(f.keys()):
        if idx == 4:
            diff = time.time() - t_start
            tot = diff/5 * len(df_grid) / 60
            tot -= diff / 60
            print(f"Estimated remaining time: {tot:.2f} mins")
        
        print("Detector:",ged)
        #data = lh5_in.read_object(group, f_tier1)
        data =  f[ged]['raw']
        
        #wf_in = data['waveform']['values'].nda
        #dt = data['waveform']['dt'].nda[0] * unit_parser.parse_unit(data['waveform']['dt'].attrs['units'])
        wf_in = data['waveform']['values'][()]
        dt = data['waveform']['dt'][0] * unit_parser.parse_unit(data['waveform']['dt'].attrs['units'])
        bl_in = data['baseline'][()] #flashcam baseline values
        
        # Set up DSP processing chain -- very minimal
        block = 8 #waveforms to process simultaneously
        proc = ProcessingChain(block_width=block, clock_unit=dt, verbosity=False)
        proc.add_input_buffer("wf", wf_in, dtype='float32')
        proc.add_input_buffer("bl", bl_in, dtype='float32')
        
        wsize = wf_in.shape[1]
        dt0 = data['waveform']['dt'][0]*0.001
        
        #proc.add_processor(mean_stdev, "wf[0:1000]", "bl", "bl_sig")
        proc.add_processor(np.subtract, "wf", "bl", "wf_blsub")
        for i, row in df_grid.iterrows():
            if 'corr' in efilter: ct_const = row
            if 'trapE' in efilter:
                if 'corr' in efilter: rise, flat, rc = float(df_res['rise'][idx]), float(df_res['flat'][idx]), float(df_res['rc'][idx])
                else: rise, flat, rc = row
                proc.add_processor(pole_zero, "wf_blsub", rc*us, "wf_pz")
                proc.add_processor(trap_norm, "wf_pz", rise*us, flat*us, f"wf_trap_{i}")
                proc.add_processor(asymTrapFilter, "wf_pz", 0.05*us, 4*us, 4*us, "wf_atrap")
                proc.add_processor(time_point_thresh, "wf_pz", 0, "tp_0")
                proc.add_processor(np.amax, f"wf_trap_{i}", 1, f"trapE_{i}", signature='(n),()->()', types=['fi->f'])
                proc.add_processor(fixed_time_pickoff, f"wf_trap_{i}", f"tp_0+({rise*us}+{flat*us})", f"trapEftp_{i}")
            if 'zacE' in efilter:
                if 'corr' in efilter: sigma, flat, decay = float(df_res['sigma'][idx]), float(df_res['flat'][idx]), float(df_res['decay'][idx])
                else: sigma, flat, decay = row
                proc.add_processor(zac_filter(wsize, sigma/dt0, flat/dt0, decay/dt0),"wf", f"wf_zac_{i}(101, f)")
                proc.add_processor(np.amax, f"wf_zac_{i}", 1, f"zacE_{i}", signature='(n),()->()', types=['fi->f'])
            if 'cuspE' in efilter:
                if 'corr' in efilter: sigma, flat, decay = float(df_res['sigma'][idx]), float(df_res['flat'][idx]), float(df_res['decay'][idx])
                else: sigma, flat, decay = row
                proc.add_processor(cusp_filter(wsize, sigma/dt0, flat/dt0, decay/dt0),"wf_blsub", f"wf_cusp_{i}(101, f)")
                proc.add_processor(np.amax, f"wf_cusp_{i}", 1, f"cuspE_{i}", signature='(n),()->()', types=['fi->f'])
            if 'corr' in efilter:
                proc.add_processor(trap_pickoff, "wf_pz", 1.5*us, 0, "tp_0", "ct_corr")
                #proc.add_processor(trap_pickoff, "wf_pz", rise*us, flat*us, "tp_0", "ct_corr")
                proc.add_processor(np.multiply, ct_const, "ct_corr", f"ct_corr_cal_{i}")
                proc.add_processor(np.add, f"ct_corr_cal_{i}", f"{bfilter}_{i}", f"{efilter}_{i}")
                
        # Set up the LH5 output
        lh5_out = lh5.Table(size=proc._buffer_len)
        for i, row in df_grid.iterrows():
            lh5_out.add_field(f"{efilter}_{i}", lh5.Array(proc.get_output_buffer(f"{efilter}_{i}"), attrs={"units":"ADC"}))
        
        print("Processing:\n",proc)
        proc.execute()
        
        #groupname = group[:group.rfind('/')+1]+"data"
        #groupname = df_key+"/"+group+"/data"
        groupname = ged+"/data"
        print("Writing to: " + f_opt + "/" + groupname)
        lh5_in.write_object(lh5_out, groupname, f_opt)
        print("")
    
    #list the datasets of the output file
    data_opt = lh5_in.ls(f_opt)
    #data_opt_0 = lh5_in.ls(f_opt,'opt_0/*')
    data_opt_0 = lh5_in.ls(f_opt,'g024/data/*')
    diff = time.time() - t_start
    print(f"Time to process: {diff:.2f} s")
Esempio n. 5
0
def raw_to_dsp(ds,
               overwrite=False,
               nevt=None,
               test=False,
               verbose=2,
               block=8,
               group='daqdata'):
    """
    Run raw_to_dsp on a set of runs.
    [raw file] ---> [dsp_run{}.lh5] (digital signal processing results)
    """
    for run in ds.runs:
        raw_file = ds.paths[run]["raw_path"]
        dsp_file = ds.paths[run]["dsp_path"]

        if dsp_file is not None and overwrite is False:
            continue

        if dsp_file is None:
            # declare new file name
            dsp_file = raw_file.replace('raw', 'dsp')

        if test:
            print("test mode (dry run), processing raw file:", raw_file)
            continue

        # new LH5 version

        lh5_in = lh5.Store()
        data = lh5_in.read_object("/ORSIS3302DecoderForEnergy", raw_file)

        wf_in = data['waveform']['values'].nda
        dt = data['waveform']['dt'].nda[0] * unit_parser.parse_unit(
            data['waveform']['dt'].attrs['units'])

        # Parameters for DCR calculation
        dcr_trap_int = 200
        dcr_trap_flat = 1000
        dcr_trap_startSample = 1200

        # Set up processing chain
        proc = ProcessingChain(block_width=block,
                               clock_unit=dt,
                               verbosity=verbose)
        proc.add_input_buffer("wf", wf_in, dtype='float32')

        proc.add_processor(mean_stdev, "wf[0:1000]", "bl", "bl_sig")
        proc.add_processor(np.subtract, "wf", "bl", "wf_blsub")
        proc.add_processor(pole_zero, "wf_blsub", 70 * us, "wf_pz")

        proc.add_processor(asymTrapFilter, "wf_pz", 10 * us, 5 * us, 10 * us,
                           "wf_atrap")
        proc.add_processor(np.amax,
                           "wf_atrap",
                           1,
                           "atrapE",
                           signature='(n),()->()',
                           types=['fi->f'])

        # proc.add_processor(np.divide, "atrapmax", 10*us, "atrapE")

        proc.add_processor(trap_norm, "wf_pz", 10 * us, 5 * us, "wf_trap")
        proc.add_processor(np.amax,
                           "wf_trap",
                           1,
                           "trapE",
                           signature='(n),()->()',
                           types=['fi->f'])
        proc.add_processor(avg_current, "wf_pz", 10, "curr")
        proc.add_processor(np.amax,
                           "curr",
                           1,
                           "A_10",
                           signature='(n),()->()',
                           types=['fi->f'])
        proc.add_processor(np.divide, "A_10", "trapE", "AoE")
        proc.add_processor(trap_pickoff, "wf_pz", dcr_trap_int, dcr_trap_flat,
                           dcr_trap_startSample, "dcr")

        # Set up the LH5 output
        lh5_out = lh5.Table(size=proc._buffer_len)
        lh5_out.add_field(
            "trapE",
            lh5.Array(proc.get_output_buffer("trapE"), attrs={"units": "ADC"}))

        lh5_out.add_field(
            "bl",
            lh5.Array(proc.get_output_buffer("bl"), attrs={"units": "ADC"}))
        lh5_out.add_field(
            "bl_sig",
            lh5.Array(proc.get_output_buffer("bl_sig"), attrs={"units":
                                                               "ADC"}))
        lh5_out.add_field(
            "A",
            lh5.Array(proc.get_output_buffer("A_10"), attrs={"units": "ADC"}))
        lh5_out.add_field(
            "AoE",
            lh5.Array(proc.get_output_buffer("AoE"), attrs={"units": "ADC"}))
        lh5_out.add_field(
            "dcr",
            lh5.Array(proc.get_output_buffer("dcr"), attrs={"units": "ADC"}))

        print("Processing:\n", proc)
        proc.execute()

        print("Writing to: ", dsp_file)
        f_lh5.write_object(lh5_out, "data", dsp_file)
Esempio n. 6
0
for chan_name in chans:
    group = chan_name + '/raw'
    print("Processing: " + args.file + '/' + group)

    tot_n_rows = lh5_st.read_n_rows(group, args.file)
    if args.nentries is not None:
        tot_n_rows = min(tot_n_rows, args.nentries)
    lh5_in, n_rows_read = lh5_st.read_object(group, args.file, 0, args.chunk)
    wf_in = lh5_in['waveform']['values'].nda
    dt = lh5_in['waveform']['dt'].nda[0] * unit_parser.parse_unit(
        lh5_in['waveform']['dt'].attrs['units'])

    # Set up processing chain
    proc = ProcessingChain(block_width=args.block,
                           clock_unit=dt,
                           verbosity=args.verbose)
    proc.add_input_buffer("wf", wf_in, dtype='float32')

    # measure baseline, then window and baseline subtract
    proc.add_processor(mean_stdev, "wf[{}]".format(args.BL_samples), "bl",
                       "bl_sig")
    proc.add_processor(np.subtract, "wf[{}]".format(args.Tail_samples), "bl",
                       "wf_blsub")

    # RC constant. Linear fit of log of falling tail.
    proc.add_processor(np.log, "wf_blsub", "tail_log")
    proc.add_processor(linear_fit, "tail_log", "tail_b", "tail_m")
    proc.add_processor(np.divide, -1, "tail_m", "tail_rc")

    # Get tail_rc output buffer
def build_processing_chain(lh5_in,
                           dsp_config,
                           db_dict=None,
                           outputs=None,
                           verbosity=1,
                           block_width=16):
    """
    Produces a ProcessingChain object and an lh5 table for output parameters
    from an input lh5 table and a json recipe.
    
    Returns (proc_chain, lh5_out):
    - proc_chain: ProcessingChain object that is bound to lh5_in and lh5_out;
      all you need to do is handle file i/o for lh5_in/out and run execute
    - lh5_out: output LH5 table
    
    Required arguments:
    - lh5_in: input LH5 table
    - config: dict or name of json file containing a recipe for
      constructing the ProcessingChain object produced by this function.
      config is formated as a json dict with different processors. Config
      should have a dictionary called processors, containing dictionaries
      of the following format:
        Key: parameter name: name of parameter produced by the processor.
             can optionally provide multiple, separated by spaces
        Values:
          processor (req): name of gufunc
          module (req): name of module in which to find processor
          prereqs (req): name of parameters from other processors and from 
            input that are required to exist to run this
          args (req): list of arguments for processor, with variables passed
            by name or value. Names should either be inputs from lh5_in, or
            parameter names for other processors. Names of the format db.name
            will look up the parameter in the metadata. 
          kwargs (opt): kwargs used when adding processors to proc_chain
          init_args (opt): args used when initializing a processor that has
            static data (for factory functions)
          default (opt): default value for db parameters if not found
          unit (opt): unit to be used for attr in lh5 file.
      There may also be a list called 'outputs', containing a list of parameters
      to put into lh5_out.
    
    Optional keyword arguments:
    - outputs: list of parameters to put in the output lh5 table. If None,
      use the parameters in the 'outputs' list from config
    - db_dict: a nested dict pointing to values for db args.
      e.g. if a processor uses arg db.trap.risetime, it will look up
          db_dict['trap']['risetime']
      and use the found value. If no value is found, use the default defined
      in the config file.
    - verbosity: verbosity level:
            0: Print nothing (except errors...)
            1: Print basic warnings (default)
            2: Print basic debug info
            3: Print friggin' everything!    
    - block_width: number of entries to process at once.
    """

    if isinstance(dsp_config, str):
        with open(dsp_config) as f:
            dsp_config = json.load(f)
    elif dsp_config is None:
        dsp_config = {'outputs': [], 'processors': {}}
    else:
        # We don't want to modify the input!
        dsp_config = deepcopy(dsp_config)

    if outputs is None:
        outputs = dsp_config['outputs']

    processors = dsp_config['processors']

    # for processors with multiple outputs, add separate entries to the processor list
    for key in list(processors):
        keys = [k for k in re.split(",| ", key) if k != '']
        if len(keys) > 1:
            for k in keys:
                processors[k] = key

    # Recursive function to crawl through the parameters/processors and get
    # a sequence of unique parameters such that parameters always appear after
    # their dependencies. For parameters that are not produced by the ProcChain
    # (i.e. input/db parameters), add them to the list of leafs
    # https://www.electricmonk.nl/docs/dependency_resolving_algorithm/dependency_resolving_algorithm.html
    def resolve_dependencies(par, resolved, leafs, unresolved=[]):
        if par in resolved:
            return
        elif par in unresolved:
            raise Exception('Circular references detected: %s -> %s' %
                            (par, edge))

        # if we don't find a node, this is a leaf
        node = processors.get(par)
        if node is None:
            if par not in leafs:
                leafs.append(par)
            return

        # if it's a string, that means it is part of a processor that returns multiple outputs (see above); in that case, node is a str pointing to the actual node we want
        if isinstance(node, str):
            resolve_dependencies(node, resolved, leafs, unresolved)
            return

        edges = node['prereqs']
        unresolved.append(par)
        for edge in edges:
            resolve_dependencies(edge, resolved, leafs, unresolved)
        resolved.append(par)
        unresolved.remove(par)

    proc_par_list = []  # calculated from processors
    input_par_list = []  # input from file and used for processors
    copy_par_list = []  # copied from input to output
    out_par_list = []
    for out_par in outputs:
        if out_par not in processors:
            copy_par_list.append(out_par)
        else:
            resolve_dependencies(out_par, proc_par_list, input_par_list)
            out_par_list.append(out_par)

    if verbosity > 0:
        print('Processing parameters:', str(proc_par_list))
        print('Required input parameters:', str(input_par_list))
        print('Copied output parameters:', str(copy_par_list))
        print('Processed output parameters:', str(out_par_list))

    proc_chain = ProcessingChain(block_width, lh5_in.size, verbosity=verbosity)

    # Now add all of the input buffers from lh5_in (and also the clk time)
    for input_par in input_par_list:
        buf_in = lh5_in.get(input_par)
        if buf_in is None:
            print("I don't know what to do with " + input_par +
                  ". Building output without it!")
        elif isinstance(buf_in, lh5.Array):
            proc_chain.add_input_buffer(input_par, buf_in.nda)
        elif isinstance(buf_in, lh5.Table):
            # check if this is waveform
            if 't0' and 'dt' and 'values' in buf_in:
                proc_chain.add_input_buffer(input_par, buf_in['values'].nda,
                                            'float32')
                clk = buf_in['dt'].nda[0] * unit_parser.parse_unit(
                    lh5_in['waveform']['dt'].attrs['units'])
                if proc_chain._clk is not None and proc_chain._clk != clk:
                    print(
                        "Somehow you managed to set multiple clock frequencies...Using "
                        + str(proc_chain._clk))
                else:
                    proc_chain._clk = clk

    # now add the processors
    for proc_par in proc_par_list:
        recipe = processors[proc_par]
        module = importlib.import_module(recipe['module'])
        func = getattr(module, recipe['function'])
        args = recipe['args']
        for i, arg in enumerate(args):
            if isinstance(arg, str) and arg[0:3] == 'db.':
                lookup_path = arg[3:].split('.')
                try:
                    node = db_dict
                    for key in lookup_path:
                        node = node[key]
                    args[i] = node
                    if (verbosity > 0):
                        print("Database lookup: found", node, "for", arg)
                except:
                    try:
                        args[i] = recipe['defaults'][arg]
                        if (verbosity > 0):
                            print("Database lookup: using default value of",
                                  args[i], "for", arg)
                    except:
                        raise Exception(
                            'Did not find', arg,
                            'in database, and could not find default value.')

        kwargs = recipe.get('kwargs', {})  # might also need db lookup here
        # if init_args are defined, parse any strings and then call func
        # as a factory/constructor function
        try:
            init_args = recipe['init_args']
            for i, arg in enumerate(init_args):
                if isinstance(arg, str) and arg[0:3] == 'db.':
                    lookup_path = arg[3:].split('.')
                    try:
                        node = db_dict
                        for key in lookup_path:
                            node = node[key]
                        init_args[i] = node
                        if (verbosity > 0):
                            print("Database lookup: found", node, "for", arg)
                    except:
                        try:
                            init_args[i] = recipe['defaults'][arg]
                            if (verbosity > 0):
                                print(
                                    "Database lookup: using default value of",
                                    init_args[i], "for", arg)
                        except:
                            raise Exception(
                                'Did not find', arg,
                                'in database, and could not find default value.'
                            )
                    arg = init_args[i]

                # see if string can be parsed by proc_chain
                if isinstance(arg, str):
                    try:
                        init_args[i] = proc_chain.get_variable(arg)
                    except:
                        pass

            if (verbosity > 1):
                print("Building function", func.__name__, "from init_args",
                      init_args)
            func = func(*init_args)
        except:
            pass
        proc_chain.add_processor(func, *args, **kwargs)

    # build the output buffers
    lh5_out = lh5.Table(size=proc_chain._buffer_len)

    # add inputs that are directly copied
    for copy_par in copy_par_list:
        buf_in = lh5_in.get(copy_par)
        if isinstance(buf_in, lh5.Array):
            lh5_out.add_field(copy_par, buf_in)
        elif isinstance(buf_in, lh5.Table):
            # check if this is waveform
            if 't0' and 'dt' and 'values' in buf_in:
                lh5_out.add_field(copy_par, buf_in['values'])
                clk = buf_in['dt'].nda[0] * unit_parser.parse_unit(
                    lh5_in['waveform']['dt'].attrs['units'])
                if proc_chain._clk is not None and proc_chain._clk != clk:
                    print(
                        "Somehow you managed to set multiple clock frequencies...Using "
                        + str(proc_chain._clk))
                else:
                    proc_chain._clk = clk
        else:
            print("I don't know what to do with " + input_par +
                  ". Building output without it!")

    # finally, add the output buffers to lh5_out and the proc chain
    for out_par in out_par_list:
        recipe = processors[out_par]
        # special case for proc with multiple outputs
        if isinstance(recipe, str):
            i = [k for k in re.split(",| ", recipe) if k != ''].index(out_par)
            recipe = processors[recipe]
            unit = recipe['unit'][i]
        else:
            unit = recipe['unit']

        try:
            scale = convert(1, unit_parser.parse_unit(unit), clk)
        except InvalidConversion:
            scale = None

        buf_out = proc_chain.get_output_buffer(out_par, unit=scale)
        lh5_out.add_field(out_par, lh5.Array(buf_out, attrs={"units": unit}))
    return (proc_chain, lh5_out)
Esempio n. 8
0
def build_processing_chain(lh5_in, dsp_config, db_dict = None,
                           outputs = None, verbosity=1, block_width=16):
    """
    Produces a ProcessingChain object and an lh5 table for output parameters
    from an input lh5 table and a json recipe.
    
    Parameters
    ----------
    lh5_in : lgdo.Table
        HDF5 table from which raw data is read. At least one row of entries
        should be read in prior to calling this!
    dsp_config: dict or str
        A dict or json filename containing the recipes for computing DSP
        parameter from raw parameters. The format is as follows:
        {
            "outputs" : [ "parnames", ... ] -> list of output parameters
                 to compute by default; see outputs parameter.
            "processors" : {
                 "name1, ..." : { -> names of parameters computed
                      "function" : str -> name of function to call. Function
                           should implement the gufunc interface, a factory
                           function returning a gufunc, or an arbitrary
                           function that can be mapped onto a gufunc
                      "module" : str -> name of module containing function
                      "args" : [ str or numeric, ... ] -> list of names of
                           computed and input parameters or constant values
                           used as inputs to function. Note that outputs
                           should be fed by reference as args! Arguments read
                           from the database are prepended with db.
                      "kwargs" : dict -> keyword arguments for
                           ProcesssingChain.add_processor.
                      "init_args" : [ str or numeric, ... ] -> list of names
                           of computed and input parameters or constant values
                           used to initialize a gufunc via a factory function
                      "unit" : str or [ strs, ... ] -> units for parameters
                      "defaults" : dict -> default value to be used for
                           arguments read from the database
                      "prereqs" : DEPRECATED [ strs, ...] -> list of parameters
                           that must be computed before these can
                 }
    outputs: [str, ...] (optional)
        List of parameters to put in the output lh5 table. If None,
        use the parameters in the 'outputs' list from config
    db_dict: dict (optional)
        A nested dict pointing to values for db args. e.g. if a processor
        uses arg db.trap.risetime, it will look up
          db_dict['trap']['risetime']
        and use the found value. If no value is found, use the default
        defined in the config file.
    verbosity : int (optional)
        0: Print nothing (except errors...)
        1: Print basic warnings (default)
        2: Print basic debug info
        3: Print friggin' everything!    
    block_width : int (optional)
        number of entries to process at once. To optimize performance,
        a multiple of 16 is preferred, but if performance is not an issue
        any value can be used.
    
    Returns
    -------
    (proc_chain, field_mask, lh5_out) : tuple
        proc_chain : ProcessingChain object that is executed
        field_mask : List of input fields that are used
        lh5_out : output lh5 table containing processed values
    """
    proc_chain = ProcessingChain(block_width, lh5_in.size, verbosity = verbosity)
    
    if isinstance(dsp_config, str):
        with open(dsp_config) as f:
            dsp_config = json.load(f)
    elif dsp_config is None:
        dsp_config = {'outputs':[], 'processors':{}}
    else:
        # We don't want to modify the input!
        dsp_config = deepcopy(dsp_config)

    if outputs is None:
        outputs = dsp_config['outputs']

    processors = dsp_config['processors']
    
    # prepare the processor list
    multi_out_procs = {}
    for key, node in processors.items():
        # if we have multiple outputs, add each to the processesors list
        keys = [k for k in re.split(",| ", key) if k!='']
        if len(keys)>1:
            for k in keys:
                multi_out_procs[k] = key

        # parse the arguments list for prereqs, if not included explicitly
        if not 'prereqs' in node:
            prereqs = []
            for arg in node['args']:
                if not isinstance(arg, str): continue
                for prereq in proc_chain.get_variable(arg, True):
                    if prereq not in prereqs and prereq not in keys and prereq != 'db':
                        prereqs.append(prereq)
            node['prereqs'] = prereqs

        if verbosity>=2:
            print("Prereqs for", key, "are", node['prereqs'])

    processors.update(multi_out_procs)
    
    # Recursive function to crawl through the parameters/processors and get
    # a sequence of unique parameters such that parameters always appear after
    # their dependencies. For parameters that are not produced by the ProcChain
    # (i.e. input/db parameters), add them to the list of leafs
    # https://www.electricmonk.nl/docs/dependency_resolving_algorithm/dependency_resolving_algorithm.html
    def resolve_dependencies(par, resolved, leafs, unresolved=[]):
        if par in resolved:
            return
        elif par in unresolved:
            raise ProcessingChainError('Circular references detected: %s -> %s' % (par, edge))

        # if we don't find a node, this is a leaf
        node = processors.get(par)
        if node is None:
            if par not in leafs:
                leafs.append(par)
            return

        # if it's a string, that means it is part of a processor that returns multiple outputs (see above); in that case, node is a str pointing to the actual node we want
        if isinstance(node, str):
            resolve_dependencies(node, resolved, leafs, unresolved)
            return
        
        edges = node['prereqs']
        unresolved.append(par)
        for edge in edges:
            resolve_dependencies(edge, resolved, leafs, unresolved)
        resolved.append(par)
        unresolved.remove(par)

    proc_par_list = [] # calculated from processors
    input_par_list = [] # input from file and used for processors
    copy_par_list = [] # copied from input to output
    out_par_list = []
    for out_par in outputs:
        if out_par not in processors:
            copy_par_list.append(out_par)
        else:
            resolve_dependencies(out_par, proc_par_list, input_par_list)
            out_par_list.append(out_par)

    if verbosity>0:
        print('Processing parameters:', str(proc_par_list))
        print('Required input parameters:', str(input_par_list))
        print('Copied output parameters:', str(copy_par_list))
        print('Processed output parameters:', str(out_par_list))
    
    # Now add all of the input buffers from lh5_in (and also the clk time)
    for input_par in input_par_list:
        buf_in = lh5_in.get(input_par)
        if buf_in is None:
            print("I don't know what to do with " + input_par + ". Building output without it!")
        elif isinstance(buf_in, lh5.Array):
            proc_chain.add_input_buffer(input_par, buf_in.nda)
        elif isinstance(buf_in, lh5.Table):
            # check if this is waveform
            if 't0' and 'dt' and 'values' in buf_in:
                proc_chain.add_input_buffer(input_par, buf_in['values'].nda, 'float32')
                clk = buf_in['dt'].nda[0] * unit_parser.parse_unit(lh5_in['waveform']['dt'].attrs['units'])
                if proc_chain._clk is not None and proc_chain._clk != clk:
                    print("Somehow you managed to set multiple clock frequencies...Using " + str(proc_chain._clk))
                else:
                    proc_chain._clk = clk

    # now add the processors
    for proc_par in proc_par_list:
        recipe = processors[proc_par]
        module = importlib.import_module(recipe['module'])
        func = getattr(module, recipe['function'])
        args = recipe['args']
        for i, arg in enumerate(args):
            if isinstance(arg, str) and arg[0:3]=='db.':
                lookup_path = arg[3:].split('.')
                try:
                    node = db_dict
                    for key in lookup_path:
                        node = node[key]
                    args[i] = node
                    if(verbosity>0):
                        print("Database lookup: found", node, "for", arg)
                except (KeyError, TypeError):
                    try:
                        args[i] = recipe['defaults'][arg]
                        if(verbosity>0):
                            print("Database lookup: using default value of", args[i], "for", arg)
                    except (KeyError, TypeError):
                        raise ProcessingChainError('Did not find', arg, 'in database, and could not find default value.')
            
        kwargs = recipe.get('kwargs', {}) # might also need db lookup here
        # if init_args are defined, parse any strings and then call func
        # as a factory/constructor function
        try:
            init_args = recipe['init_args']
            for i, arg in enumerate(init_args):
                if isinstance(arg, str) and arg[0:3]=='db.':
                    lookup_path = arg[3:].split('.')
                    try:
                        node = db_dict
                        for key in lookup_path:
                            node = node[key]
                        init_args[i] = node
                        if(verbosity>0):
                            print("Database lookup: found", node, "for", arg)
                    except (KeyError, TypeError):
                        try:
                            init_args[i] = recipe['defaults'][arg]
                            if(verbosity>0):
                                print("Database lookup: using default value of", init_args[i], "for", arg)
                        except (KeyError, TypeError):
                            raise ProcessingChainError('Did not find', arg, 'in database, and could not find default value.')
                    arg = init_args[i]

                # see if string can be parsed by proc_chain
                if isinstance(arg, str):
                    init_args[i] = proc_chain.get_variable(arg)
                    
            if(verbosity>1):
                print("Building function", func.__name__, "from init_args", init_args)
            func = func(*init_args)
        except KeyError:
            pass
        proc_chain.add_processor(func, *args, **kwargs)

    
    # build the output buffers
    lh5_out = lh5.Table(size=proc_chain._buffer_len)
    
    # add inputs that are directly copied
    for copy_par in copy_par_list:
        buf_in = lh5_in.get(copy_par)
        if isinstance(buf_in, lh5.Array):
            lh5_out.add_field(copy_par, buf_in)
        elif isinstance(buf_in, lh5.Table):
            # check if this is waveform
            if 't0' and 'dt' and 'values' in buf_in:
                lh5_out.add_field(copy_par, buf_in['values'])
                clk = buf_in['dt'].nda[0] * unit_parser.parse_unit(lh5_in['waveform']['dt'].attrs['units'])
                if proc_chain._clk is not None and proc_chain._clk != clk:
                    print("Somehow you managed to set multiple clock frequencies...Using " + str(proc_chain._clk))
                else:
                    proc_chain._clk = clk
        else:
            print("I don't know what to do with " + input_par + ". Building output without it!")
    
    # finally, add the output buffers to lh5_out and the proc chain
    for out_par in out_par_list:
        recipe = processors[out_par]
        # special case for proc with multiple outputs
        if isinstance(recipe, str):
            i = [k for k in re.split(",| ", recipe) if k!=''].index(out_par)
            recipe = processors[recipe]
            unit = recipe['unit'][i]
        else:
            unit = recipe['unit']
        
        try:
            scale = convert(1, unit_parser.parse_unit(unit), clk)
        except InvalidConversion:
            scale = None
        
        buf_out = proc_chain.get_output_buffer(out_par, unit=scale)
        lh5_out.add_field(out_par, lh5.Array(buf_out, attrs={"units":unit}) )

    field_mask = input_par_list + copy_par_list
    return (proc_chain, field_mask, lh5_out)