Exemple #1
0
def convert_junc_txt_component(juncFilePath, uncFile):
    name, layout, pars, nBinnedVars, \
        nBinColumns, nEvalVars, formula, \
        nParms, columns, dtypes = _parse_jme_formatted_file(juncFilePath,
                                                            interpolatedFunc=True,
                                                            parmsFromColumns=True,
                                                            jme_f=uncFile)

    temp = _build_standard_jme_lookup(name, layout, pars, nBinnedVars, nBinColumns,
                                      nEvalVars, formula, nParms, columns, dtypes,
                                      interpolatedFunc=True)
    wrapped_up = {}
    for key, val in temp.items():
        newkey = (key[0], 'jec_uncertainty_lookup')
        vallist = list(val)
        vals, names = vallist[-1]
        knots = vals[0:len(vals):3]
        downs = vals[1:len(vals):3]
        ups = vals[2:len(vals):3]
        downs = np.array([down.flatten() for down in downs])
        ups = np.array([up.flatten() for up in ups])
        for knotv in knots:
            knot = np.unique(knotv.flatten())
            if knot.size != 1:
                raise Exception('Multiple bin low edges found')
        knots = np.array([np.unique(k.flatten())[0] for k in knots])
        vallist[2] = ({'knots': knots, 'ups': ups.T, 'downs': downs.T}, vallist[2][-1])
        vallist = vallist[:-1]
        wrapped_up[newkey] = tuple(vallist)
    return wrapped_up
Exemple #2
0
    def _evaluate(self, *args):
        """ uncertainties = f(args) """
        bin_vals = {
            argname: args[self._dim_args[argname]]
            for argname in self._dim_order
        }
        eval_vals = {
            argname: args[self._eval_args[argname]]
            for argname in self._eval_vars
        }

        #lookup the bins that we care about
        dim1_name = self._dim_order[0]
        dim1_indices = np.clip(
            np.searchsorted(
                self._bins[dim1_name], bin_vals[dim1_name], side='right') - 1,
            0, self._bins[dim1_name].size - 2)

        #get clamp values and clip the inputs
        outs = np.ones(shape=(args[0].size, 2), dtype=np.float)
        for i in np.unique(dim1_indices):
            mask = np.where(dim1_indices == i)
            vals = np.clip(eval_vals[self._eval_vars[0]][mask],
                           self._eval_knots[0], self._eval_knots[-1])
            outs[:, 0][mask] += self._eval_ups[i](vals)
            outs[:, 1][mask] -= self._eval_downs[i](vals)

        return outs
def masked_bin_eval(dim1_indices, dimN_bins, dimN_vals):
    dimN_indices = np.empty_like(dim1_indices)
    for i in np.unique(dim1_indices):
        idx = np.where(dim1_indices == i)
        dimN_indices[idx] = np.clip(
            np.searchsorted(dimN_bins[i], dimN_vals[idx], side='right') - 1, 0,
            len(dimN_bins[i]) - 2)
    return dimN_indices
def convert_btag_csv_file(csvFilePath):
    btag_f = open(csvFilePath)
    nameandcols = btag_f.readline().split(';')
    btag_f.close()
    name = nameandcols[0].strip()
    columns = nameandcols[1].strip()
    columns = [column.strip() for column in columns.split(',')]

    corrections = np.genfromtxt(csvFilePath,
                                dtype=None,
                                names=tuple(columns),
                                converters={1: lambda s: s.strip(),
                                            2: lambda s: s.strip(),
                                            10: lambda s: s.strip(' "')},
                                delimiter=',',
                                skip_header=1,
                                unpack=True,
                                encoding='ascii'
                                )

    all_names = corrections[[columns[i] for i in range(4)]]
    labels = np.unique(corrections[[columns[i] for i in range(4)]])
    wrapped_up = {}
    for label in labels:
        etaMins = np.unique(corrections[np.where(all_names == label)][columns[4]])
        etaMaxs = np.unique(corrections[np.where(all_names == label)][columns[5]])
        etaBins = np.union1d(etaMins, etaMaxs)
        ptMins = np.unique(corrections[np.where(all_names == label)][columns[6]])
        ptMaxs = np.unique(corrections[np.where(all_names == label)][columns[7]])
        ptBins = np.union1d(ptMins, ptMaxs)
        discrMins = np.unique(corrections[np.where(all_names == label)][columns[8]])
        discrMaxs = np.unique(corrections[np.where(all_names == label)][columns[9]])
        discrBins = np.union1d(discrMins, discrMaxs)
        vals = np.zeros(shape=(len(discrBins) - 1, len(ptBins) - 1, len(etaBins) - 1),
                        dtype=corrections.dtype[10])
        for i, eta_bin in enumerate(etaBins[:-1]):
            for j, pt_bin in enumerate(ptBins[:-1]):
                for k, discr_bin in enumerate(discrBins[:-1]):
                    this_bin = np.where((all_names == label) &
                                        (corrections[columns[4]] == eta_bin) &
                                        (corrections[columns[6]] == pt_bin) &
                                        (corrections[columns[8]] == discr_bin))
                    vals[k, j, i] = corrections[this_bin][columns[10]][0]
        label_decode = []
        for i in range(len(label)):
            label_decode.append(label[i])
            if isinstance(label_decode[i], bytes):
                label_decode[i] = label_decode[i].decode()
            else:
                label_decode[i] = str(label_decode[i])
        str_label = '_'.join([name] + label_decode)
        feval_dim = btag_feval_dims[label[0]]
        wrapped_up[(str_label, 'dense_evaluated_lookup')] = (vals, (etaBins, ptBins, discrBins), tuple(feval_dim))
    return wrapped_up
Exemple #5
0
def extract_json_histo_structure(parselevel, axis_names, axes):
    if 'value' in parselevel.keys():
        return
    name = list(parselevel)[0].split(':')[0]
    bins_pairs = [
        key.split(':')[-1].strip('[]').split(',') for key in parselevel.keys()
    ]
    bins = []
    for pair in bins_pairs:
        bins.extend([float(val) for val in pair])
    bins.sort()
    bins = np.unique(np.array(bins))
    axis_names.append(name.encode())
    axes[axis_names[-1]] = bins
    extract_json_histo_structure(parselevel[list(parselevel)[0]], axis_names,
                                 axes)
Exemple #6
0
def _build_standard_jme_lookup(name,
                               layout,
                               pars,
                               nBinnedVars,
                               nBinColumns,
                               nEvalVars,
                               formula,
                               nParms,
                               columns,
                               dtypes,
                               interpolatedFunc=False):
    #the first bin is always usual for JECs
    #the next bins may vary in number, so they're jagged arrays... yay
    bins = {}
    offset_col = 0
    offset_name = 1
    bin_order = []
    for i in range(nBinnedVars):
        binMins = None
        binMaxs = None
        if i == 0:
            binMins = np.unique(pars[columns[0]])
            binMaxs = np.unique(pars[columns[1]])
            bins[layout[i + offset_name]] = np.union1d(binMins, binMaxs)
        else:
            counts = np.zeros(0, dtype=np.int)
            allBins = np.zeros(0, dtype=np.double)
            for binMin in bins[bin_order[0]][:-1]:
                binMins = np.unique(pars[np.where(
                    pars[columns[0]] == binMin)][columns[i + offset_col]])
                binMaxs = np.unique(pars[np.where(
                    pars[columns[0]] == binMin)][columns[i + offset_col + 1]])
                theBins = np.union1d(binMins, binMaxs)
                allBins = np.append(allBins, theBins)
                counts = np.append(counts, theBins.size)
            bins[layout[i + offset_name]] = awkward.JaggedArray.fromcounts(
                counts, allBins)
        bin_order.append(layout[i + offset_name])
        offset_col += 1

    #skip nvars to the variable columns
    #the columns here define clamps for the variables defined in columns[]
    # ----> clamps can be different from bins
    # ----> if there is more than one binning variable this array is jagged
    # ----> just make it jagged all the time
    binshapes = tuple([bins[thebin].size - 1 for thebin in bin_order])
    clamp_mins = {}
    clamp_maxs = {}
    var_order = []
    offset_col = 2 * nBinnedVars + 1
    offset_name = nBinnedVars + 2
    jagged_counts = np.ones(bins[bin_order[0]].size - 1, dtype=np.int)
    if len(bin_order) > 1:
        jagged_counts = np.maximum(
            bins[bin_order[1]].counts - 1,
            0)  #need counts-1 since we only care about Nbins
    for i in range(nEvalVars):
        var_order.append(layout[i + offset_name])
        if not interpolatedFunc:
            clamp_mins[layout[i +
                              offset_name]] = awkward.JaggedArray.fromcounts(
                                  jagged_counts,
                                  np.atleast_1d(pars[columns[i + offset_col]]))
            clamp_maxs[layout[i +
                              offset_name]] = awkward.JaggedArray.fromcounts(
                                  jagged_counts,
                                  np.atleast_1d(pars[columns[i + offset_col +
                                                             1]]))
            offset_col += 1

    #now get the parameters, which we will look up with the clamped values
    parms = []
    parm_order = []
    offset_col = 2 * nBinnedVars + 1 + (interpolatedFunc
                                        == False) * 2 * nEvalVars
    for i in range(nParms):
        parms.append(
            awkward.JaggedArray.fromcounts(jagged_counts,
                                           pars[columns[i + offset_col]]))
        parm_order.append('p%i' % (i))

    wrapped_up = {}
    wrapped_up[(name, 'jme_standard_function')] = (formula, (bins, bin_order),
                                                   (clamp_mins, clamp_maxs,
                                                    var_order), (parms,
                                                                 parm_order))
    return wrapped_up
Exemple #7
0
def convert_effective_area_file(eaFilePath):
    ea_f = open(eaFilePath, 'r')
    layoutstr = ea_f.readline().strip().strip('{}')
    ea_f.close()

    name = eaFilePath.split('/')[-1].split('.')[0]

    layout = layoutstr.split()
    if not layout[0].isdigit():
        raise Exception(
            'First column of Effective Area File Header must be a digit!')

    #setup the file format
    nBinnedVars = int(layout[0])
    nBinColumns = 2 * nBinnedVars
    nEvalVars = int(layout[nBinnedVars + 1])

    minMax = ['Min', 'Max']
    columns = []
    dtypes = []
    offset = 1
    for i in range(nBinnedVars):
        columns.extend(['%s%s' % (layout[i + offset], mm) for mm in minMax])
        dtypes.extend(['<f8', '<f8'])
    offset += nBinnedVars + 1
    for i in range(nEvalVars):
        columns.append('%s' % (layout[i + offset]))
        dtypes.append('<f8')

    pars = np.genfromtxt(eaFilePath,
                         dtype=tuple(dtypes),
                         names=tuple(columns),
                         skip_header=1,
                         unpack=True,
                         encoding='ascii')

    bins = {}
    offset_col = 0
    offset_name = 1
    bin_order = []
    for i in range(nBinnedVars):
        binMins = None
        binMaxs = None
        if i == 0:
            binMins = np.unique(pars[columns[0]])
            binMaxs = np.unique(pars[columns[1]])
            bins[layout[i + offset_name]] = np.union1d(binMins, binMaxs)
        else:
            counts = np.zeros(0, dtype=np.int)
            allBins = np.zeros(0, dtype=np.double)
            for binMin in bins[bin_order[0]][:-1]:
                binMins = np.unique(pars[np.where(
                    pars[columns[0]] == binMin)][columns[i + offset_col]])
                binMaxs = np.unique(pars[np.where(
                    pars[columns[0]] == binMin)][columns[i + offset_col + 1]])
                theBins = np.union1d(binMins, binMaxs)
                allBins = np.append(allBins, theBins)
                counts = np.append(counts, theBins.size)
            bins[layout[i + offset_name]] = awkward.JaggedArray.fromcounts(
                counts, allBins)
        bin_order.append(layout[i + offset_name])
        offset_col += 1

    # again this is only for one dimension of binning, fight me
    # we can figure out a 2D EA when we get there
    offset_name += 1
    wrapped_up = {}
    lookup_type = 'dense_lookup'
    dims = bins[layout[1]]
    for i in range(nEvalVars):
        ea_name = '_'.join([name, columns[offset_name + i]])
        values = pars[columns[offset_name + i]]
        wrapped_up[(ea_name, lookup_type)] = (values, dims)

    return wrapped_up
 def __init__(self, runs=None, lumis=None):
     self.array = np.zeros(shape=(0, 2))
     if runs is not None:
         self.array = np.unique(np.c_[runs, lumis], axis=0)
Exemple #9
0
 def __call__(self, runs, lumis):
     mask = np.zeros(dtype='bool', shape=runs.shape)
     for run in np.unique(runs):
         if run in self._masks:
             mask |= (np.searchsorted(self._masks[run], lumis)%2==1) & (runs==run)
     return mask