def convert_btag_csv_file(csvFilePath):
    btag_f = open(csvFilePath)
    nameandcols = btag_f.readline().split(';')
    btag_f.close()
    name = nameandcols[0].strip()
    columns = nameandcols[1].strip()
    columns = [column.strip() for column in columns.split(',')]

    corrections = np.genfromtxt(csvFilePath,
                                dtype=None,
                                names=tuple(columns),
                                converters={1: lambda s: s.strip(),
                                            2: lambda s: s.strip(),
                                            10: lambda s: s.strip(' "')},
                                delimiter=',',
                                skip_header=1,
                                unpack=True,
                                encoding='ascii'
                                )

    all_names = corrections[[columns[i] for i in range(4)]]
    labels = np.unique(corrections[[columns[i] for i in range(4)]])
    wrapped_up = {}
    for label in labels:
        etaMins = np.unique(corrections[np.where(all_names == label)][columns[4]])
        etaMaxs = np.unique(corrections[np.where(all_names == label)][columns[5]])
        etaBins = np.union1d(etaMins, etaMaxs)
        ptMins = np.unique(corrections[np.where(all_names == label)][columns[6]])
        ptMaxs = np.unique(corrections[np.where(all_names == label)][columns[7]])
        ptBins = np.union1d(ptMins, ptMaxs)
        discrMins = np.unique(corrections[np.where(all_names == label)][columns[8]])
        discrMaxs = np.unique(corrections[np.where(all_names == label)][columns[9]])
        discrBins = np.union1d(discrMins, discrMaxs)
        vals = np.zeros(shape=(len(discrBins) - 1, len(ptBins) - 1, len(etaBins) - 1),
                        dtype=corrections.dtype[10])
        for i, eta_bin in enumerate(etaBins[:-1]):
            for j, pt_bin in enumerate(ptBins[:-1]):
                for k, discr_bin in enumerate(discrBins[:-1]):
                    this_bin = np.where((all_names == label) &
                                        (corrections[columns[4]] == eta_bin) &
                                        (corrections[columns[6]] == pt_bin) &
                                        (corrections[columns[8]] == discr_bin))
                    vals[k, j, i] = corrections[this_bin][columns[10]][0]
        label_decode = []
        for i in range(len(label)):
            label_decode.append(label[i])
            if isinstance(label_decode[i], bytes):
                label_decode[i] = label_decode[i].decode()
            else:
                label_decode[i] = str(label_decode[i])
        str_label = '_'.join([name] + label_decode)
        feval_dim = btag_feval_dims[label[0]]
        wrapped_up[(str_label, 'dense_evaluated_lookup')] = (vals, (etaBins, ptBins, discrBins), tuple(feval_dim))
    return wrapped_up
Example #2
0
def _parse_jme_formatted_file(jmeFilePath,
                              interpolatedFunc=False,
                              parmsFromColumns=False):
    jme_f = open(jmeFilePath, 'r')
    layoutstr = jme_f.readline().strip().strip('{}')
    jme_f.close()

    name = jmeFilePath.split('/')[-1].split('.')[0]

    layout = layoutstr.split()
    if not layout[0].isdigit():
        raise Exception('First column of JME File Header must be a digit!')

    #setup the file format
    nBinnedVars = int(layout[0])
    nBinColumns = 2 * nBinnedVars
    nEvalVars = int(layout[nBinnedVars + 1])
    formula = layout[nBinnedVars + nEvalVars + 2]
    nParms = 0
    while (formula.count('[%i]' % nParms)):
        formula = formula.replace('[%i]' % nParms, 'p%i' % nParms)
        nParms += 1
    #get rid of TMath
    tmath = {'TMath::Max': 'max', 'TMath::Log': 'log', 'TMath::Power': 'pow'}
    for key, rpl in tmath.items():
        formula = formula.replace(key, rpl)
    #protect function names with vars in them
    funcs_to_cap = ['max', 'exp', 'pow']
    for f in funcs_to_cap:
        formula = formula.replace(f, f.upper())

    templatevars = ['x', 'y', 'z', 'w', 't', 's']
    varnames = [layout[i + nBinnedVars + 2] for i in range(nEvalVars)]
    for find, replace in zip(templatevars, varnames):
        formula = formula.replace(find, replace)
    #restore max
    for f in funcs_to_cap:
        formula = formula.replace(f.upper(), f)
    nFuncColumns = 2 * nEvalVars + nParms
    nTotColumns = nFuncColumns + 1

    #parse the columns
    minMax = ['Min', 'Max']
    columns = []
    dtypes = []
    offset = 1
    for i in range(nBinnedVars):
        columns.extend(['%s%s' % (layout[i + offset], mm) for mm in minMax])
        dtypes.extend(['<f8', '<f8'])
    columns.append('NVars')
    dtypes.append('<i8')
    offset += nBinnedVars + 1
    if not interpolatedFunc:
        for i in range(nEvalVars):
            columns.extend(
                ['%s%s' % (layout[i + offset], mm) for mm in minMax])
            dtypes.extend(['<f8', '<f8'])
    for i in range(nParms):
        columns.append('p%i' % i)
        dtypes.append('<f8')

    pars = np.genfromtxt(jmeFilePath,
                         dtype=tuple(dtypes),
                         names=tuple(columns),
                         skip_header=1,
                         unpack=True,
                         encoding='ascii')
    if parmsFromColumns:
        nParms = pars[columns[nBinnedVars + 1]][0]
        for i in range(nParms):
            columns.append('p%i' % i)
            dtypes.append('<f8')
        pars = np.genfromtxt(jmeFilePath,
                             dtype=tuple(dtypes),
                             names=tuple(columns),
                             skip_header=1,
                             unpack=True,
                             encoding='ascii')

    outs = [
        name, layout, pars, nBinnedVars, nBinColumns, nEvalVars, formula,
        nParms, columns, dtypes
    ]
    return tuple(outs)
Example #3
0
def convert_effective_area_file(eaFilePath):
    ea_f = open(eaFilePath, 'r')
    layoutstr = ea_f.readline().strip().strip('{}')
    ea_f.close()

    name = eaFilePath.split('/')[-1].split('.')[0]

    layout = layoutstr.split()
    if not layout[0].isdigit():
        raise Exception(
            'First column of Effective Area File Header must be a digit!')

    #setup the file format
    nBinnedVars = int(layout[0])
    nBinColumns = 2 * nBinnedVars
    nEvalVars = int(layout[nBinnedVars + 1])

    minMax = ['Min', 'Max']
    columns = []
    dtypes = []
    offset = 1
    for i in range(nBinnedVars):
        columns.extend(['%s%s' % (layout[i + offset], mm) for mm in minMax])
        dtypes.extend(['<f8', '<f8'])
    offset += nBinnedVars + 1
    for i in range(nEvalVars):
        columns.append('%s' % (layout[i + offset]))
        dtypes.append('<f8')

    pars = np.genfromtxt(eaFilePath,
                         dtype=tuple(dtypes),
                         names=tuple(columns),
                         skip_header=1,
                         unpack=True,
                         encoding='ascii')

    bins = {}
    offset_col = 0
    offset_name = 1
    bin_order = []
    for i in range(nBinnedVars):
        binMins = None
        binMaxs = None
        if i == 0:
            binMins = np.unique(pars[columns[0]])
            binMaxs = np.unique(pars[columns[1]])
            bins[layout[i + offset_name]] = np.union1d(binMins, binMaxs)
        else:
            counts = np.zeros(0, dtype=np.int)
            allBins = np.zeros(0, dtype=np.double)
            for binMin in bins[bin_order[0]][:-1]:
                binMins = np.unique(pars[np.where(
                    pars[columns[0]] == binMin)][columns[i + offset_col]])
                binMaxs = np.unique(pars[np.where(
                    pars[columns[0]] == binMin)][columns[i + offset_col + 1]])
                theBins = np.union1d(binMins, binMaxs)
                allBins = np.append(allBins, theBins)
                counts = np.append(counts, theBins.size)
            bins[layout[i + offset_name]] = awkward.JaggedArray.fromcounts(
                counts, allBins)
        bin_order.append(layout[i + offset_name])
        offset_col += 1

    # again this is only for one dimension of binning, fight me
    # we can figure out a 2D EA when we get there
    offset_name += 1
    wrapped_up = {}
    lookup_type = 'dense_lookup'
    dims = bins[layout[1]]
    for i in range(nEvalVars):
        ea_name = '_'.join([name, columns[offset_name + i]])
        values = pars[columns[offset_name + i]]
        wrapped_up[(ea_name, lookup_type)] = (values, dims)

    return wrapped_up