Beispiel #1
0
 def __init__(self, garbage_length=256, packet_size_guess=1024):
     self.garbage_table = lh5.Table(garbage_length)
     shape_guess = (garbage_length, packet_size_guess)
     self.garbage_table.add_field(
         'packets',
         lh5.VectorOfVectors(shape_guess=shape_guess, dtype='uint8'))
     self.garbage_table.add_field(
         'packet_id', lh5.Array(shape=garbage_length, dtype='uint32'))
     # TODO: add garbage codes enum attribute: user supplies in constructor
     # before calling super()
     self.garbage_table.add_field(
         'garbage_code', lh5.Array(shape=garbage_length, dtype='uint32'))
Beispiel #2
0
def dsp_to_hit():
    """
    save calibrated energies into the dsp file.
    this is a good example of adding a column, reading & writing to an LH5 file.
    """
    f_dsp = '/Users/wisecg/Data/OPPI/dsp/oppi_run0_cyc2027_dsp_test.lh5'
    f_hit = '/Users/wisecg/Data/OPPI/hit/oppi_run0_cyc2027_hit.lh5'
    sto = lh5.Store()
    groups = sto.ls(f_dsp)
    tb_name = 'ORSIS3302DecoderForEnergy/raw'
    data = sto.read_object(tb_name, f_dsp)
    df_dsp = data.get_dataframe()

    # add a new column for each energy estimator of interest
    for etype in ['energy', 'trapE']:
        ecal_name = etype + '_cal'
        pfit = linear_cal(etype)
        df_dsp[ecal_name] = df_dsp[etype] * pfit[0] + pfit[1]

        e_cal_lh5 = lh5.Array(df_dsp[ecal_name].values, attrs={'units': 'keV'})
        data.add_field(f'{etype}_cal', e_cal_lh5)

    # write to hit file.  delete if exists, LH5 overwrite is broken rn
    if os.path.exists(f_hit):
        os.remove(f_hit)
    sto.write_object(data, tb_name, f_hit)
Beispiel #3
0
    def initialize_lh5_table(self, lh5_table, channel=None):
        """ initialize and lh5 Table based on decoded_values 
        channel is the channel according to ch_group
        """
        if not hasattr(self, 'decoded_values'):
            name = type(self).__name__
            print(name,
                  'Error: no decoded_values available for setting up buffer')
            return
        dec_vals = self.get_decoded_values(channel)
        size = lh5_table.size
        for field, fld_attrs in dec_vals.items():
            attrs = fld_attrs.copy()
            if 'dtype' not in attrs:
                name = type(self).__name__
                print(name, 'Error: must specify dtype for', field)
                continue

            dtype = attrs.pop('dtype')
            if 'datatype' not in attrs:
                # no datatype: just a "normal" array
                # allow to override "kind" for the dtype for lh5
                if 'kind' in attrs:
                    attrs['datatype'] = 'array<1>{' + attrs.pop('kind') + '}'
                lh5_table.add_field(
                    field, lh5.Array(shape=size, dtype=dtype, attrs=attrs))
                continue

            datatype = attrs.pop('datatype')

            # handle waveforms from digitizers in a uniform way
            if datatype == 'waveform':
                wf_table = lh5.Table(size)

                # Build t0 array. No attributes for now
                # TODO: add more control over t0: another field to fill it?
                # Optional units attribute?
                t0_attrs = {'units': 'ns'}
                wf_table.add_field(
                    't0',
                    lh5.Array(nda=np.zeros(size, dtype='float'),
                              attrs=t0_attrs))

                # Build sampling period array with units attribute
                wf_per = attrs.pop('sample_period')
                dt_nda = np.full(size, wf_per, dtype='float')
                wf_per_units = attrs.pop('sample_period_units')
                dt_attrs = {'units': wf_per_units}
                wf_table.add_field('dt', lh5.Array(nda=dt_nda, attrs=dt_attrs))

                # Build waveform array. All non-popped attributes get sent
                # TODO: add vector of vectors and compression capabilities
                wf_len = attrs.pop('length')
                dims = [1, 1]
                aoesa = lh5.ArrayOfEqualSizedArrays(shape=(size, wf_len),
                                                    dtype=dtype,
                                                    dims=dims,
                                                    attrs=attrs)
                wf_table.add_field('values', aoesa)

                lh5_table.add_field(field, wf_table)
                continue

            # If we get here, must be a LH5 datatype
            datatype, shape, elements = lh5.parse_datatype(datatype)

            if datatype == 'array_of_equalsized_arrays':
                length = attrs.pop('length')
                dims = [1, 1]
                aoesa = lh5.ArrayOfEqualSizedArrays(shape=(size, length),
                                                    dtype=dtype,
                                                    dims=dims,
                                                    attrs=attrs)
                lh5_table.add_field(field, aoesa)
                continue

            if elements.startswith('array'):  # vector-of-vectors
                length_guess = size
                if 'length_guess' in attrs:
                    length_guess = attrs.pop('length_guess')
                vov = lh5.VectorOfVectors(shape_guess=(size, length_guess),
                                          dtype=dtype,
                                          attrs=attrs)
                lh5_table.add_field(field, vov)
                continue

            else:
                name = type(self).__name__
                print(name, 'Error: do not know how to make a', datatype,
                      'for', field)
Beispiel #4
0
def dsp_to_hit(df_row, dg=None, verbose=False, overwrite=False, lowE=False):
    """
    Create hit files from dsp files.  This routine is specific to CAGE but could
    be extended & modified in the future to work for multi-channel data (PGT,
    L200, etc.)
    """
    apply_ecal = True
    apply_tscorr = False  # not needed, should be fixed by the jan 30 2021 re-d2r

    f_dsp = f"{dg.config['dsp_input_dir']}/{df_row['dsp_path']}/{df_row['dsp_file']}"
    f_hit = f"{dg.config['hit_output_dir']}/{df_row['hit_path']}/{df_row['hit_file']}"
    # change output directory if in spec_id 2 mode (ie low-energy calibration to get 60 keV in right place)
    if lowE:
        f_hit = f"{dg.config['hit_output_dir']}/{df_row['hit_path']}/lowE/{df_row['hit_file']}"
        print(f'Writing to low-energy hit file: {f_hit}')
    if verbose:
        print('input:', f_dsp)
        print('output:', f_hit)

    if not overwrite and os.path.exists(f_hit):
        print('file exists, overwrite not set, skipping f_hit:\n   ', f_dsp)
        return

    # get run and cycle for ecalDB lookup.  also apply run selection
    run, cycle = df_row[['run', 'cycle']].astype(int)
    if df_row.skip:
        print(f'Cycle {cycle} has been marked junk, will not process.')
        return

    # create initial 'hit' DataFrame from dsp data
    hit_store = lh5.Store()
    data, n_rows = hit_store.read_object(dg.config['input_table'], f_dsp)
    df_hit = data.get_dataframe()

    # 1. get energy calibration for this run from peakfit
    if apply_ecal:

        # loading the tinydb this way preserves the in-file text formatting
        cal_db = db.TinyDB(storage=MemoryStorage)
        with open(dg.config['ecaldb']) as f:
            raw_db = json.load(f)
            cal_db.storage.write(raw_db)

        # loop over energy estimators of interest
        for etype in dg.config['rawe']:

            # load ecalDB table
            tb = cal_db.table(f'peakfit_{etype}').all()
            df_cal = pd.DataFrame(tb)
            for col in ['run', 'cyclo', 'cychi']:
                df_cal[col] = df_cal[col].astype(int)

            # load cal constants for this cycle
            que = f'run=={run} and cyclo <= {cycle} <= cychi'
            df_run = df_cal.query(que)
            if len(df_run) != 1:
                print('Warning, non-unique query:', que)
                print(df_run)
                exit()

            # figure out the order of the polynomial from column names
            pols = {}
            for col in [c for c in df_run.columns if 'cal' in c]:
                val = parse('cal{p}', col)
                val = val.named  # convert to dict
                iord = int(val['p'])
                pols[iord] = df_run.iloc[0][f'cal{iord}']

            # get the coefficients in descending order for np.poly1d: p2, p1, p0...
            coeffs = []
            for ord, val in sorted(pols.items()):
                coeffs.append([ord, val])
            coeffs = np.array(coeffs)
            coeffs = coeffs[coeffs[:, 0].argsort()[::-1]]  # 2, 1, 0 ...
            coeffs = coeffs[:, 1]

            # apply the calibration to the dataframe
            pfunc = np.poly1d(coeffs)
            df_hit[f'{etype}_cal'] = pfunc(df_hit[f'{etype}'])

    # 2. compute timestamp rollover correction (specific to struck 3302)
    clock = 100e6  # 100 MHz
    if apply_tscorr:
        UINT_MAX = 4294967295  # (0xffffffff)
        t_max = UINT_MAX / clock
        ts = df_hit['timestamp'].values / clock
        tdiff = np.diff(ts)
        tdiff = np.insert(tdiff, 0, 0)
        iwrap = np.where(tdiff < 0)
        iloop = np.append(iwrap[0], len(ts))
        ts_new, t_roll = [], 0
        for i, idx in enumerate(iloop):
            ilo = 0 if i == 0 else iwrap[0][i - 1]
            ihi = idx
            ts_block = ts[ilo:ihi]
            t_last = ts[ilo - 1]
            t_diff = t_max - t_last
            ts_new.append(ts_block + t_roll)
            t_roll += t_last + t_diff
        df_hit['ts_sec'] = np.concatenate(ts_new)
    else:
        # NOTE: may need to subtract off the 1st value here if we find
        # that the timestamp doesn't reset at cycle boundaries.
        df_hit['ts_sec'] = df_hit['timestamp'].values / clock

    # 3. compute global timestamp
    t_start = df_row['startTime']
    if t_start is not None:
        df_hit['ts_glo'] = df_hit['ts_sec'] + t_start

    # write to LH5 file
    if os.path.exists(f_hit):
        os.remove(f_hit)
    sto = lh5.Store()
    tb_name = dg.config['input_table'].replace('dsp', 'hit')
    tb_lh5 = lh5.Table(size=len(df_hit))

    for col in df_hit.columns:
        tb_lh5.add_field(col, lh5.Array(df_hit[col].values,
                                        attrs={'units': ''}))
        if verbose:
            print(col)

    print(f'Writing table: {tb_name} in file:\n   {f_hit}')
    sto.write_object(tb_lh5, tb_name, f_hit)

    if verbose:
        print('Creating diagnostic plots ...')

        # energy
        xlo, xhi, xpb = 0, 3000, 10
        hist, bins, _ = pgh.get_hist(df_hit['trapEftp_cal'],
                                     range=(xlo, xhi),
                                     dx=xpb)
        plt.semilogy(bins[1:], hist, ds='steps', c='b', lw=1)
        plt.xlabel('Energy (keV)', ha='right', x=1)
        plt.ylabel('Counts', ha='right', y=1)
        plt.savefig('./plots/d2h_etest.png')
        print('saved figure: ./plots/d2h_etest.png')
        plt.cla()

        # timestamp
        xv = np.arange(len(df_hit))
        plt.plot(xv, df_hit['ts_sec'], '.b')
        plt.savefig('./plots/d2h_ttest.png')
        print('saved figure: ./plots/d2h_ttest.png')
        plt.cla()

        # exit, don't create + overwrite a million plots
        print(
            'verbose mode of d2h is meant to look at 1 cycle file, exiting...')
        exit()
Beispiel #5
0
def test_array():
    a = lh5.Array(shape=(1), dtype=float)
    assert a.dataype_name() == 'array'
def build_processing_chain(lh5_in,
                           dsp_config,
                           db_dict=None,
                           outputs=None,
                           verbosity=1,
                           block_width=16):
    """
    Produces a ProcessingChain object and an lh5 table for output parameters
    from an input lh5 table and a json recipe.
    
    Returns (proc_chain, lh5_out):
    - proc_chain: ProcessingChain object that is bound to lh5_in and lh5_out;
      all you need to do is handle file i/o for lh5_in/out and run execute
    - lh5_out: output LH5 table
    
    Required arguments:
    - lh5_in: input LH5 table
    - config: dict or name of json file containing a recipe for
      constructing the ProcessingChain object produced by this function.
      config is formated as a json dict with different processors. Config
      should have a dictionary called processors, containing dictionaries
      of the following format:
        Key: parameter name: name of parameter produced by the processor.
             can optionally provide multiple, separated by spaces
        Values:
          processor (req): name of gufunc
          module (req): name of module in which to find processor
          prereqs (req): name of parameters from other processors and from 
            input that are required to exist to run this
          args (req): list of arguments for processor, with variables passed
            by name or value. Names should either be inputs from lh5_in, or
            parameter names for other processors. Names of the format db.name
            will look up the parameter in the metadata. 
          kwargs (opt): kwargs used when adding processors to proc_chain
          init_args (opt): args used when initializing a processor that has
            static data (for factory functions)
          default (opt): default value for db parameters if not found
          unit (opt): unit to be used for attr in lh5 file.
      There may also be a list called 'outputs', containing a list of parameters
      to put into lh5_out.
    
    Optional keyword arguments:
    - outputs: list of parameters to put in the output lh5 table. If None,
      use the parameters in the 'outputs' list from config
    - db_dict: a nested dict pointing to values for db args.
      e.g. if a processor uses arg db.trap.risetime, it will look up
          db_dict['trap']['risetime']
      and use the found value. If no value is found, use the default defined
      in the config file.
    - verbosity: verbosity level:
            0: Print nothing (except errors...)
            1: Print basic warnings (default)
            2: Print basic debug info
            3: Print friggin' everything!    
    - block_width: number of entries to process at once.
    """

    if isinstance(dsp_config, str):
        with open(dsp_config) as f:
            dsp_config = json.load(f)
    elif dsp_config is None:
        dsp_config = {'outputs': [], 'processors': {}}
    else:
        # We don't want to modify the input!
        dsp_config = deepcopy(dsp_config)

    if outputs is None:
        outputs = dsp_config['outputs']

    processors = dsp_config['processors']

    # for processors with multiple outputs, add separate entries to the processor list
    for key in list(processors):
        keys = [k for k in re.split(",| ", key) if k != '']
        if len(keys) > 1:
            for k in keys:
                processors[k] = key

    # Recursive function to crawl through the parameters/processors and get
    # a sequence of unique parameters such that parameters always appear after
    # their dependencies. For parameters that are not produced by the ProcChain
    # (i.e. input/db parameters), add them to the list of leafs
    # https://www.electricmonk.nl/docs/dependency_resolving_algorithm/dependency_resolving_algorithm.html
    def resolve_dependencies(par, resolved, leafs, unresolved=[]):
        if par in resolved:
            return
        elif par in unresolved:
            raise Exception('Circular references detected: %s -> %s' %
                            (par, edge))

        # if we don't find a node, this is a leaf
        node = processors.get(par)
        if node is None:
            if par not in leafs:
                leafs.append(par)
            return

        # if it's a string, that means it is part of a processor that returns multiple outputs (see above); in that case, node is a str pointing to the actual node we want
        if isinstance(node, str):
            resolve_dependencies(node, resolved, leafs, unresolved)
            return

        edges = node['prereqs']
        unresolved.append(par)
        for edge in edges:
            resolve_dependencies(edge, resolved, leafs, unresolved)
        resolved.append(par)
        unresolved.remove(par)

    proc_par_list = []  # calculated from processors
    input_par_list = []  # input from file and used for processors
    copy_par_list = []  # copied from input to output
    out_par_list = []
    for out_par in outputs:
        if out_par not in processors:
            copy_par_list.append(out_par)
        else:
            resolve_dependencies(out_par, proc_par_list, input_par_list)
            out_par_list.append(out_par)

    if verbosity > 0:
        print('Processing parameters:', str(proc_par_list))
        print('Required input parameters:', str(input_par_list))
        print('Copied output parameters:', str(copy_par_list))
        print('Processed output parameters:', str(out_par_list))

    proc_chain = ProcessingChain(block_width, lh5_in.size, verbosity=verbosity)

    # Now add all of the input buffers from lh5_in (and also the clk time)
    for input_par in input_par_list:
        buf_in = lh5_in.get(input_par)
        if buf_in is None:
            print("I don't know what to do with " + input_par +
                  ". Building output without it!")
        elif isinstance(buf_in, lh5.Array):
            proc_chain.add_input_buffer(input_par, buf_in.nda)
        elif isinstance(buf_in, lh5.Table):
            # check if this is waveform
            if 't0' and 'dt' and 'values' in buf_in:
                proc_chain.add_input_buffer(input_par, buf_in['values'].nda,
                                            'float32')
                clk = buf_in['dt'].nda[0] * unit_parser.parse_unit(
                    lh5_in['waveform']['dt'].attrs['units'])
                if proc_chain._clk is not None and proc_chain._clk != clk:
                    print(
                        "Somehow you managed to set multiple clock frequencies...Using "
                        + str(proc_chain._clk))
                else:
                    proc_chain._clk = clk

    # now add the processors
    for proc_par in proc_par_list:
        recipe = processors[proc_par]
        module = importlib.import_module(recipe['module'])
        func = getattr(module, recipe['function'])
        args = recipe['args']
        for i, arg in enumerate(args):
            if isinstance(arg, str) and arg[0:3] == 'db.':
                lookup_path = arg[3:].split('.')
                try:
                    node = db_dict
                    for key in lookup_path:
                        node = node[key]
                    args[i] = node
                    if (verbosity > 0):
                        print("Database lookup: found", node, "for", arg)
                except:
                    try:
                        args[i] = recipe['defaults'][arg]
                        if (verbosity > 0):
                            print("Database lookup: using default value of",
                                  args[i], "for", arg)
                    except:
                        raise Exception(
                            'Did not find', arg,
                            'in database, and could not find default value.')

        kwargs = recipe.get('kwargs', {})  # might also need db lookup here
        # if init_args are defined, parse any strings and then call func
        # as a factory/constructor function
        try:
            init_args = recipe['init_args']
            for i, arg in enumerate(init_args):
                if isinstance(arg, str) and arg[0:3] == 'db.':
                    lookup_path = arg[3:].split('.')
                    try:
                        node = db_dict
                        for key in lookup_path:
                            node = node[key]
                        init_args[i] = node
                        if (verbosity > 0):
                            print("Database lookup: found", node, "for", arg)
                    except:
                        try:
                            init_args[i] = recipe['defaults'][arg]
                            if (verbosity > 0):
                                print(
                                    "Database lookup: using default value of",
                                    init_args[i], "for", arg)
                        except:
                            raise Exception(
                                'Did not find', arg,
                                'in database, and could not find default value.'
                            )
                    arg = init_args[i]

                # see if string can be parsed by proc_chain
                if isinstance(arg, str):
                    try:
                        init_args[i] = proc_chain.get_variable(arg)
                    except:
                        pass

            if (verbosity > 1):
                print("Building function", func.__name__, "from init_args",
                      init_args)
            func = func(*init_args)
        except:
            pass
        proc_chain.add_processor(func, *args, **kwargs)

    # build the output buffers
    lh5_out = lh5.Table(size=proc_chain._buffer_len)

    # add inputs that are directly copied
    for copy_par in copy_par_list:
        buf_in = lh5_in.get(copy_par)
        if isinstance(buf_in, lh5.Array):
            lh5_out.add_field(copy_par, buf_in)
        elif isinstance(buf_in, lh5.Table):
            # check if this is waveform
            if 't0' and 'dt' and 'values' in buf_in:
                lh5_out.add_field(copy_par, buf_in['values'])
                clk = buf_in['dt'].nda[0] * unit_parser.parse_unit(
                    lh5_in['waveform']['dt'].attrs['units'])
                if proc_chain._clk is not None and proc_chain._clk != clk:
                    print(
                        "Somehow you managed to set multiple clock frequencies...Using "
                        + str(proc_chain._clk))
                else:
                    proc_chain._clk = clk
        else:
            print("I don't know what to do with " + input_par +
                  ". Building output without it!")

    # finally, add the output buffers to lh5_out and the proc chain
    for out_par in out_par_list:
        recipe = processors[out_par]
        # special case for proc with multiple outputs
        if isinstance(recipe, str):
            i = [k for k in re.split(",| ", recipe) if k != ''].index(out_par)
            recipe = processors[recipe]
            unit = recipe['unit'][i]
        else:
            unit = recipe['unit']

        try:
            scale = convert(1, unit_parser.parse_unit(unit), clk)
        except InvalidConversion:
            scale = None

        buf_out = proc_chain.get_output_buffer(out_par, unit=scale)
        lh5_out.add_field(out_par, lh5.Array(buf_out, attrs={"units": unit}))
    return (proc_chain, lh5_out)
Beispiel #7
0
def dsp_to_hit_cage(f_dsp, f_hit, dg, n_max=None, verbose=False, t_start=None):
    """
    non-general placeholder for creating a pygama 'hit' file.  uses pandas.
    for every file, apply:
    - energy calibration (peakfit results)
    - timestamp correction
    for a more general dsp_to_hit, maybe each function could be given in terms
    of an 'apply' on a dsp dataframe ...
    
    TODO: create entry config['rawe'] with list of energy pars to calibrate, as 
    in energy_cal.py
    """
    rawe = ['trapEmax']

    # create initial 'hit' DataFrame from dsp data
    hit_store = lh5.Store()
    data = hit_store.read_object(dg.config['input_table'], f_dsp)
    df_hit = data.get_dataframe()

    # 1. get energy calibration for this run from peakfit
    cal_db = db.TinyDB(storage=MemoryStorage)
    with open(dg.config['ecaldb']) as f:
        raw_db = json.load(f)
        cal_db.storage.write(raw_db)
    runs = dg.fileDB.run.unique()
    if len(runs) > 1:
        print("sorry, I can't do combined runs yet")
        exit()
    run = runs[0]
    for etype in rawe:
        tb = cal_db.table(f'peakfit_{etype}').all()
        df_cal = pd.DataFrame(tb)
        df_cal['run'] = df_cal['run'].astype(int)
        df_run = df_cal.loc[df_cal.run == run]
        cal_pars = df_run.iloc[0][['cal0', 'cal1', 'cal2']]
        pol = np.poly1d(cal_pars)  # handy numpy polynomial object
        df_hit[f'{etype}_cal'] = pol(df_hit[f'{etype}'])

    # 2. compute timestamp rollover correction (specific to struck 3302)
    clock = 100e6  # 100 MHz
    UINT_MAX = 4294967295  # (0xffffffff)
    t_max = UINT_MAX / clock
    ts = df_hit['timestamp'].values / clock
    tdiff = np.diff(ts)
    tdiff = np.insert(tdiff, 0, 0)
    iwrap = np.where(tdiff < 0)
    iloop = np.append(iwrap[0], len(ts))
    ts_new, t_roll = [], 0
    for i, idx in enumerate(iloop):
        ilo = 0 if i == 0 else iwrap[0][i - 1]
        ihi = idx
        ts_block = ts[ilo:ihi]
        t_last = ts[ilo - 1]
        t_diff = t_max - t_last
        ts_new.append(ts_block + t_roll)
        t_roll += t_last + t_diff
    df_hit['ts_sec'] = np.concatenate(ts_new)

    # 3. compute global timestamp
    if t_start is not None:
        df_hit['ts_glo'] = df_hit['ts_sec'] + t_start

    # write to LH5 file
    if os.path.exists(f_hit):
        os.remove(f_hit)
    sto = lh5.Store()
    tb_name = dg.config['input_table'].replace('dsp', 'hit')
    tb_lh5 = lh5.Table(size=len(df_hit))

    for col in df_hit.columns:
        tb_lh5.add_field(col, lh5.Array(df_hit[col].values,
                                        attrs={'units': ''}))
        print(col)

    print(f'Writing table: {tb_name} in file:\n   {f_hit}')
    sto.write_object(tb_lh5, tb_name, f_hit)
def build_processing_chain(lh5_in, dsp_config, db_dict = None,
                           outputs = None, verbosity=1, block_width=16):
    """
    Produces a ProcessingChain object and an lh5 table for output parameters
    from an input lh5 table and a json recipe.
    
    Parameters
    ----------
    lh5_in : lgdo.Table
        HDF5 table from which raw data is read. At least one row of entries
        should be read in prior to calling this!
    dsp_config: dict or str
        A dict or json filename containing the recipes for computing DSP
        parameter from raw parameters. The format is as follows:
        {
            "outputs" : [ "parnames", ... ] -> list of output parameters
                 to compute by default; see outputs parameter.
            "processors" : {
                 "name1, ..." : { -> names of parameters computed
                      "function" : str -> name of function to call. Function
                           should implement the gufunc interface, a factory
                           function returning a gufunc, or an arbitrary
                           function that can be mapped onto a gufunc
                      "module" : str -> name of module containing function
                      "args" : [ str or numeric, ... ] -> list of names of
                           computed and input parameters or constant values
                           used as inputs to function. Note that outputs
                           should be fed by reference as args! Arguments read
                           from the database are prepended with db.
                      "kwargs" : dict -> keyword arguments for
                           ProcesssingChain.add_processor.
                      "init_args" : [ str or numeric, ... ] -> list of names
                           of computed and input parameters or constant values
                           used to initialize a gufunc via a factory function
                      "unit" : str or [ strs, ... ] -> units for parameters
                      "defaults" : dict -> default value to be used for
                           arguments read from the database
                      "prereqs" : DEPRECATED [ strs, ...] -> list of parameters
                           that must be computed before these can
                 }
    outputs: [str, ...] (optional)
        List of parameters to put in the output lh5 table. If None,
        use the parameters in the 'outputs' list from config
    db_dict: dict (optional)
        A nested dict pointing to values for db args. e.g. if a processor
        uses arg db.trap.risetime, it will look up
          db_dict['trap']['risetime']
        and use the found value. If no value is found, use the default
        defined in the config file.
    verbosity : int (optional)
        0: Print nothing (except errors...)
        1: Print basic warnings (default)
        2: Print basic debug info
        3: Print friggin' everything!    
    block_width : int (optional)
        number of entries to process at once. To optimize performance,
        a multiple of 16 is preferred, but if performance is not an issue
        any value can be used.
    
    Returns
    -------
    (proc_chain, field_mask, lh5_out) : tuple
        proc_chain : ProcessingChain object that is executed
        field_mask : List of input fields that are used
        lh5_out : output lh5 table containing processed values
    """
    proc_chain = ProcessingChain(block_width, lh5_in.size, verbosity = verbosity)
    
    if isinstance(dsp_config, str):
        with open(dsp_config) as f:
            dsp_config = json.load(f)
    elif dsp_config is None:
        dsp_config = {'outputs':[], 'processors':{}}
    else:
        # We don't want to modify the input!
        dsp_config = deepcopy(dsp_config)

    if outputs is None:
        outputs = dsp_config['outputs']

    processors = dsp_config['processors']
    
    # prepare the processor list
    multi_out_procs = {}
    for key, node in processors.items():
        # if we have multiple outputs, add each to the processesors list
        keys = [k for k in re.split(",| ", key) if k!='']
        if len(keys)>1:
            for k in keys:
                multi_out_procs[k] = key

        # parse the arguments list for prereqs, if not included explicitly
        if not 'prereqs' in node:
            prereqs = []
            for arg in node['args']:
                if not isinstance(arg, str): continue
                for prereq in proc_chain.get_variable(arg, True):
                    if prereq not in prereqs and prereq not in keys and prereq != 'db':
                        prereqs.append(prereq)
            node['prereqs'] = prereqs

        if verbosity>=2:
            print("Prereqs for", key, "are", node['prereqs'])

    processors.update(multi_out_procs)
    
    # Recursive function to crawl through the parameters/processors and get
    # a sequence of unique parameters such that parameters always appear after
    # their dependencies. For parameters that are not produced by the ProcChain
    # (i.e. input/db parameters), add them to the list of leafs
    # https://www.electricmonk.nl/docs/dependency_resolving_algorithm/dependency_resolving_algorithm.html
    def resolve_dependencies(par, resolved, leafs, unresolved=[]):
        if par in resolved:
            return
        elif par in unresolved:
            raise ProcessingChainError('Circular references detected: %s -> %s' % (par, edge))

        # if we don't find a node, this is a leaf
        node = processors.get(par)
        if node is None:
            if par not in leafs:
                leafs.append(par)
            return

        # if it's a string, that means it is part of a processor that returns multiple outputs (see above); in that case, node is a str pointing to the actual node we want
        if isinstance(node, str):
            resolve_dependencies(node, resolved, leafs, unresolved)
            return
        
        edges = node['prereqs']
        unresolved.append(par)
        for edge in edges:
            resolve_dependencies(edge, resolved, leafs, unresolved)
        resolved.append(par)
        unresolved.remove(par)

    proc_par_list = [] # calculated from processors
    input_par_list = [] # input from file and used for processors
    copy_par_list = [] # copied from input to output
    out_par_list = []
    for out_par in outputs:
        if out_par not in processors:
            copy_par_list.append(out_par)
        else:
            resolve_dependencies(out_par, proc_par_list, input_par_list)
            out_par_list.append(out_par)

    if verbosity>0:
        print('Processing parameters:', str(proc_par_list))
        print('Required input parameters:', str(input_par_list))
        print('Copied output parameters:', str(copy_par_list))
        print('Processed output parameters:', str(out_par_list))
    
    # Now add all of the input buffers from lh5_in (and also the clk time)
    for input_par in input_par_list:
        buf_in = lh5_in.get(input_par)
        if buf_in is None:
            print("I don't know what to do with " + input_par + ". Building output without it!")
        elif isinstance(buf_in, lh5.Array):
            proc_chain.add_input_buffer(input_par, buf_in.nda)
        elif isinstance(buf_in, lh5.Table):
            # check if this is waveform
            if 't0' and 'dt' and 'values' in buf_in:
                proc_chain.add_input_buffer(input_par, buf_in['values'].nda, 'float32')
                clk = buf_in['dt'].nda[0] * unit_parser.parse_unit(lh5_in['waveform']['dt'].attrs['units'])
                if proc_chain._clk is not None and proc_chain._clk != clk:
                    print("Somehow you managed to set multiple clock frequencies...Using " + str(proc_chain._clk))
                else:
                    proc_chain._clk = clk

    # now add the processors
    for proc_par in proc_par_list:
        recipe = processors[proc_par]
        module = importlib.import_module(recipe['module'])
        func = getattr(module, recipe['function'])
        args = recipe['args']
        for i, arg in enumerate(args):
            if isinstance(arg, str) and arg[0:3]=='db.':
                lookup_path = arg[3:].split('.')
                try:
                    node = db_dict
                    for key in lookup_path:
                        node = node[key]
                    args[i] = node
                    if(verbosity>0):
                        print("Database lookup: found", node, "for", arg)
                except (KeyError, TypeError):
                    try:
                        args[i] = recipe['defaults'][arg]
                        if(verbosity>0):
                            print("Database lookup: using default value of", args[i], "for", arg)
                    except (KeyError, TypeError):
                        raise ProcessingChainError('Did not find', arg, 'in database, and could not find default value.')
            
        kwargs = recipe.get('kwargs', {}) # might also need db lookup here
        # if init_args are defined, parse any strings and then call func
        # as a factory/constructor function
        try:
            init_args = recipe['init_args']
            for i, arg in enumerate(init_args):
                if isinstance(arg, str) and arg[0:3]=='db.':
                    lookup_path = arg[3:].split('.')
                    try:
                        node = db_dict
                        for key in lookup_path:
                            node = node[key]
                        init_args[i] = node
                        if(verbosity>0):
                            print("Database lookup: found", node, "for", arg)
                    except (KeyError, TypeError):
                        try:
                            init_args[i] = recipe['defaults'][arg]
                            if(verbosity>0):
                                print("Database lookup: using default value of", init_args[i], "for", arg)
                        except (KeyError, TypeError):
                            raise ProcessingChainError('Did not find', arg, 'in database, and could not find default value.')
                    arg = init_args[i]

                # see if string can be parsed by proc_chain
                if isinstance(arg, str):
                    init_args[i] = proc_chain.get_variable(arg)
                    
            if(verbosity>1):
                print("Building function", func.__name__, "from init_args", init_args)
            func = func(*init_args)
        except KeyError:
            pass
        proc_chain.add_processor(func, *args, **kwargs)

    
    # build the output buffers
    lh5_out = lh5.Table(size=proc_chain._buffer_len)
    
    # add inputs that are directly copied
    for copy_par in copy_par_list:
        buf_in = lh5_in.get(copy_par)
        if isinstance(buf_in, lh5.Array):
            lh5_out.add_field(copy_par, buf_in)
        elif isinstance(buf_in, lh5.Table):
            # check if this is waveform
            if 't0' and 'dt' and 'values' in buf_in:
                lh5_out.add_field(copy_par, buf_in['values'])
                clk = buf_in['dt'].nda[0] * unit_parser.parse_unit(lh5_in['waveform']['dt'].attrs['units'])
                if proc_chain._clk is not None and proc_chain._clk != clk:
                    print("Somehow you managed to set multiple clock frequencies...Using " + str(proc_chain._clk))
                else:
                    proc_chain._clk = clk
        else:
            print("I don't know what to do with " + input_par + ". Building output without it!")
    
    # finally, add the output buffers to lh5_out and the proc chain
    for out_par in out_par_list:
        recipe = processors[out_par]
        # special case for proc with multiple outputs
        if isinstance(recipe, str):
            i = [k for k in re.split(",| ", recipe) if k!=''].index(out_par)
            recipe = processors[recipe]
            unit = recipe['unit'][i]
        else:
            unit = recipe['unit']
        
        try:
            scale = convert(1, unit_parser.parse_unit(unit), clk)
        except InvalidConversion:
            scale = None
        
        buf_out = proc_chain.get_output_buffer(out_par, unit=scale)
        lh5_out.add_field(out_par, lh5.Array(buf_out, attrs={"units":unit}) )

    field_mask = input_par_list + copy_par_list
    return (proc_chain, field_mask, lh5_out)