Beispiel #1
0
def convert_histo_json_file(filename):
    file = open(filename)
    info = json.load(file)
    file.close()
    names_and_orders = {}
    names_and_axes = {}
    names_and_binvalues = {}
    names_and_valnames = {}

    #first pass, convert info['dir']['hist_title'] to dir/hist_title
    #and un-nest everything from the json structure, make binnings, etc.
    for dir in info.keys():
        for htitle in info[dir].keys():
            axis_order = []  #keep the axis order
            axes = {}
            bins_and_values = {}
            val_names = set()
            extract_json_histo_structure(info[dir][htitle], axis_order, axes)
            extract_json_histo_values(info[dir][htitle], [], bins_and_values,
                                      val_names)
            histname = '%s/%s' % (dir, htitle)
            names_and_axes[histname] = axes
            names_and_orders[histname] = axis_order
            names_and_binvalues[histname] = bins_and_values
            names_and_valnames[histname] = val_names

    wrapped_up = {}
    for name, axes in names_and_axes.items():
        theshape = tuple(
            [axes[axis].size - 1 for axis in names_and_orders[name]])
        valsdict = {}
        for vname in names_and_valnames[histname]:
            valsdict[vname] = np.zeros(shape=theshape).flatten()
        flatidx = np.arange(np.zeros(shape=theshape).size)
        binidx = np.unravel_index(flatidx, dims=theshape)
        for vname in valsdict:
            for iflat in flatidx:
                binlows = []
                for idim, axis in enumerate(names_and_orders[name]):
                    binlows.append(axes[axis][binidx[idim][iflat]])
                thevals = names_and_binvalues[name][tuple(binlows)]
                valsdict[vname][iflat] = thevals[vname]
            valsdict[vname] = valsdict[vname].reshape(theshape)
        bins_in_order = []
        for axis in names_and_orders[name]:
            bins_in_order.append(axes[axis])
        for vname in valsdict:
            wrapped_up[(name + '_' + vname,
                        'dense_lookup')] = (valsdict[vname].T,
                                            tuple(bins_in_order))
    return wrapped_up
 def get_lumi(self, runlumis):
     """
         Return integrated lumi
         runlumis: 2d numpy array of [[run,lumi], [run,lumi], ...] or LumiList object
     """
     if isinstance(runlumis, LumiList):
         runlumis = runlumis.array
     tot_lumi = np.zeros((1, ), dtype=np.float64)
     LumiData.get_lumi_kernel(runlumis[:, 0], runlumis[:, 1], self.index, tot_lumi)
     return tot_lumi[0]
def test_lumidata():
    lumidata = LumiData("tests/samples/lumi_small.csv")

    runslumis = np.zeros((10, 2), dtype=np.uint32)
    runslumis[:, 0] = lumidata._lumidata[0:10, 0]
    runslumis[:, 1] = lumidata._lumidata[0:10, 1]
    l = lumidata.get_lumi(runslumis)
    diff = abs(l - 1.539941814)
    print("lumi:", l, "diff:", diff)
    assert (diff < 0.1)
def convert_btag_csv_file(csvFilePath):
    btag_f = open(csvFilePath)
    nameandcols = btag_f.readline().split(';')
    btag_f.close()
    name = nameandcols[0].strip()
    columns = nameandcols[1].strip()
    columns = [column.strip() for column in columns.split(',')]

    corrections = np.genfromtxt(csvFilePath,
                                dtype=None,
                                names=tuple(columns),
                                converters={1: lambda s: s.strip(),
                                            2: lambda s: s.strip(),
                                            10: lambda s: s.strip(' "')},
                                delimiter=',',
                                skip_header=1,
                                unpack=True,
                                encoding='ascii'
                                )

    all_names = corrections[[columns[i] for i in range(4)]]
    labels = np.unique(corrections[[columns[i] for i in range(4)]])
    wrapped_up = {}
    for label in labels:
        etaMins = np.unique(corrections[np.where(all_names == label)][columns[4]])
        etaMaxs = np.unique(corrections[np.where(all_names == label)][columns[5]])
        etaBins = np.union1d(etaMins, etaMaxs)
        ptMins = np.unique(corrections[np.where(all_names == label)][columns[6]])
        ptMaxs = np.unique(corrections[np.where(all_names == label)][columns[7]])
        ptBins = np.union1d(ptMins, ptMaxs)
        discrMins = np.unique(corrections[np.where(all_names == label)][columns[8]])
        discrMaxs = np.unique(corrections[np.where(all_names == label)][columns[9]])
        discrBins = np.union1d(discrMins, discrMaxs)
        vals = np.zeros(shape=(len(discrBins) - 1, len(ptBins) - 1, len(etaBins) - 1),
                        dtype=corrections.dtype[10])
        for i, eta_bin in enumerate(etaBins[:-1]):
            for j, pt_bin in enumerate(ptBins[:-1]):
                for k, discr_bin in enumerate(discrBins[:-1]):
                    this_bin = np.where((all_names == label) &
                                        (corrections[columns[4]] == eta_bin) &
                                        (corrections[columns[6]] == pt_bin) &
                                        (corrections[columns[8]] == discr_bin))
                    vals[k, j, i] = corrections[this_bin][columns[10]][0]
        label_decode = []
        for i in range(len(label)):
            label_decode.append(label[i])
            if isinstance(label_decode[i], bytes):
                label_decode[i] = label_decode[i].decode()
            else:
                label_decode[i] = str(label_decode[i])
        str_label = '_'.join([name] + label_decode)
        feval_dim = btag_feval_dims[label[0]]
        wrapped_up[(str_label, 'dense_evaluated_lookup')] = (vals, (etaBins, ptBins, discrBins), tuple(feval_dim))
    return wrapped_up
Beispiel #5
0
def parseGeneratorHistory(gp_pdgId_in,gp_parent_in):
    inChain = activePdgIds
    #index manipulation
    offsets = gp_pdgId_in.offsets
    parents = gp_pdgId_in.parents
    pstarts = offsets[parents].astype('i4')
    gp_pdgId = gp_pdgId_in.content
    gp_ancestor = gp_parent_in.content + pstarts
    gp_ancestor_valid = (gp_parent_in.content >= 0)

    #create parentage bitmaps
    gp_pdgId_mapped = np.zeros(shape=gp_pdgId.shape, dtype='u4')
    for pdgId, bit in inChain.items():
        if abs(pdgId) == 24:
            gp_pdgId_mapped[gp_pdgId==pdgId] = bit
        else:
            gp_pdgId_mapped[np.abs(gp_pdgId)==pdgId] = bit
    
    gp_proc = np.zeros(shape=gp_pdgId.shape, dtype='u4')

    pdg_tmp = np.empty_like(gp_pdgId_mapped)
    parent_tmp = np.empty_like(gp_ancestor)
    niter = 0
    while np.any(gp_ancestor_valid) and niter < 50:
        np.take(gp_pdgId_mapped, gp_ancestor, out=pdg_tmp, mode='clip')
        np.take(gp_parent_in.content, gp_ancestor, out=parent_tmp, mode='clip')
        np.bitwise_or(gp_proc, pdg_tmp, where=gp_ancestor_valid, out=gp_proc)
        np.bitwise_and(gp_ancestor_valid, parent_tmp>=0, where=gp_ancestor_valid, out=gp_ancestor_valid)
        np.add(parent_tmp, pstarts, out=gp_ancestor)
        niter += 1

    #print 'Parsed ancestor tree in %d iterations'%niter

    if niter == 50 and np.any(gp_ancestor_valid):
        raise Exception('reached 50 iterations, gen particles not trustable')

    return awkward.JaggedArray.fromoffsets(offsets, gp_proc)
Beispiel #6
0
    def fill(self, **values):
        if not all(d.name in values for d in self._axes):
            raise ValueError("Not all axes specified for this histogram!")

        if "weight" in values and self._sumw2 is None:
            self._init_sumw2()

        sparse_key = tuple(d.index(values[d.name]) for d in self.sparse_axes())
        if sparse_key not in self._sumw:
            self._sumw[sparse_key] = np.zeros(shape=self._dense_shape,
                                              dtype=self._dtype)
            if self._sumw2 is not None:
                self._sumw2[sparse_key] = np.zeros(shape=self._dense_shape,
                                                   dtype=self._dtype)

        if self.dense_dim() > 0:
            dense_indices = tuple(
                d.index(values[d.name]) for d in self._axes
                if isinstance(d, DenseAxis))
            if "weight" in values:
                np.add.at(self._sumw[sparse_key], dense_indices,
                          values["weight"])
                np.add.at(self._sumw2[sparse_key], dense_indices,
                          values["weight"]**2)
            else:
                np.add.at(self._sumw[sparse_key], dense_indices, 1.)
                if self._sumw2 is not None:
                    np.add.at(self._sumw2[sparse_key], dense_indices, 1.)
        else:
            if "weight" in values:
                self._sumw[sparse_key] += np.sum(values["weight"])
                self._sumw2[sparse_key] += np.sum(values["weight"]**2)
            else:
                self._sumw[sparse_key] += 1.
                if self._sumw2 is not None:
                    self._sumw2[sparse_key] += 1.
Beispiel #7
0
 def __init__(self, values, dims, feval_dim=None):
     super(dense_evaluated_lookup, self).__init__()
     self._dimension = 0
     whattype = type(dims)
     if whattype == np.ndarray:
         self._dimension = 1
     else:
         self._dimension = len(dims)
     if self._dimension == 0:
         raise Exception(
             'Could not define dimension for {}'.format(whattype))
     self._axes = deepcopy(dims)
     self._feval_dim = None
     vals_are_strings = ('string' in values.dtype.name
                         or 'str' in values.dtype.name
                         or 'unicode' in values.dtype.name
                         or 'bytes' in values.dtype.name)  #....
     if not isinstance(values, np.ndarray):
         raise TypeError('values is not a numpy array, but %r' %
                         type(values))
     if not vals_are_strings:
         raise Exception(
             'Non-string values passed to dense_evaluated_lookup!')
     if feval_dim is None:
         raise Exception(
             'Evaluation dimensions not specified in dense_evaluated_lookup'
         )
     funcs = np.zeros(shape=values.shape, dtype='O')
     for i in range(values.size):
         idx = np.unravel_index(i, dims=values.shape)
         funcs[idx] = numbaize(values[idx], ['x'])
     self._values = deepcopy(funcs)
     # TODO: support for multidimensional functions and functions with variables other than 'x'
     if len(feval_dim) > 1:
         raise Exception(
             'lookup_tools.evaluator only accepts 1D functions right now!')
     self._feval_dim = feval_dim[0]
Beispiel #8
0
def _build_standard_jme_lookup(name,
                               layout,
                               pars,
                               nBinnedVars,
                               nBinColumns,
                               nEvalVars,
                               formula,
                               nParms,
                               columns,
                               dtypes,
                               interpolatedFunc=False):
    #the first bin is always usual for JECs
    #the next bins may vary in number, so they're jagged arrays... yay
    bins = {}
    offset_col = 0
    offset_name = 1
    bin_order = []
    for i in range(nBinnedVars):
        binMins = None
        binMaxs = None
        if i == 0:
            binMins = np.unique(pars[columns[0]])
            binMaxs = np.unique(pars[columns[1]])
            bins[layout[i + offset_name]] = np.union1d(binMins, binMaxs)
        else:
            counts = np.zeros(0, dtype=np.int)
            allBins = np.zeros(0, dtype=np.double)
            for binMin in bins[bin_order[0]][:-1]:
                binMins = np.unique(pars[np.where(
                    pars[columns[0]] == binMin)][columns[i + offset_col]])
                binMaxs = np.unique(pars[np.where(
                    pars[columns[0]] == binMin)][columns[i + offset_col + 1]])
                theBins = np.union1d(binMins, binMaxs)
                allBins = np.append(allBins, theBins)
                counts = np.append(counts, theBins.size)
            bins[layout[i + offset_name]] = awkward.JaggedArray.fromcounts(
                counts, allBins)
        bin_order.append(layout[i + offset_name])
        offset_col += 1

    #skip nvars to the variable columns
    #the columns here define clamps for the variables defined in columns[]
    # ----> clamps can be different from bins
    # ----> if there is more than one binning variable this array is jagged
    # ----> just make it jagged all the time
    binshapes = tuple([bins[thebin].size - 1 for thebin in bin_order])
    clamp_mins = {}
    clamp_maxs = {}
    var_order = []
    offset_col = 2 * nBinnedVars + 1
    offset_name = nBinnedVars + 2
    jagged_counts = np.ones(bins[bin_order[0]].size - 1, dtype=np.int)
    if len(bin_order) > 1:
        jagged_counts = np.maximum(
            bins[bin_order[1]].counts - 1,
            0)  #need counts-1 since we only care about Nbins
    for i in range(nEvalVars):
        var_order.append(layout[i + offset_name])
        if not interpolatedFunc:
            clamp_mins[layout[i +
                              offset_name]] = awkward.JaggedArray.fromcounts(
                                  jagged_counts,
                                  np.atleast_1d(pars[columns[i + offset_col]]))
            clamp_maxs[layout[i +
                              offset_name]] = awkward.JaggedArray.fromcounts(
                                  jagged_counts,
                                  np.atleast_1d(pars[columns[i + offset_col +
                                                             1]]))
            offset_col += 1

    #now get the parameters, which we will look up with the clamped values
    parms = []
    parm_order = []
    offset_col = 2 * nBinnedVars + 1 + (interpolatedFunc
                                        == False) * 2 * nEvalVars
    for i in range(nParms):
        parms.append(
            awkward.JaggedArray.fromcounts(jagged_counts,
                                           pars[columns[i + offset_col]]))
        parm_order.append('p%i' % (i))

    wrapped_up = {}
    wrapped_up[(name, 'jme_standard_function')] = (formula, (bins, bin_order),
                                                   (clamp_mins, clamp_maxs,
                                                    var_order), (parms,
                                                                 parm_order))
    return wrapped_up
Beispiel #9
0
def convert_effective_area_file(eaFilePath):
    ea_f = open(eaFilePath, 'r')
    layoutstr = ea_f.readline().strip().strip('{}')
    ea_f.close()

    name = eaFilePath.split('/')[-1].split('.')[0]

    layout = layoutstr.split()
    if not layout[0].isdigit():
        raise Exception(
            'First column of Effective Area File Header must be a digit!')

    #setup the file format
    nBinnedVars = int(layout[0])
    nBinColumns = 2 * nBinnedVars
    nEvalVars = int(layout[nBinnedVars + 1])

    minMax = ['Min', 'Max']
    columns = []
    dtypes = []
    offset = 1
    for i in range(nBinnedVars):
        columns.extend(['%s%s' % (layout[i + offset], mm) for mm in minMax])
        dtypes.extend(['<f8', '<f8'])
    offset += nBinnedVars + 1
    for i in range(nEvalVars):
        columns.append('%s' % (layout[i + offset]))
        dtypes.append('<f8')

    pars = np.genfromtxt(eaFilePath,
                         dtype=tuple(dtypes),
                         names=tuple(columns),
                         skip_header=1,
                         unpack=True,
                         encoding='ascii')

    bins = {}
    offset_col = 0
    offset_name = 1
    bin_order = []
    for i in range(nBinnedVars):
        binMins = None
        binMaxs = None
        if i == 0:
            binMins = np.unique(pars[columns[0]])
            binMaxs = np.unique(pars[columns[1]])
            bins[layout[i + offset_name]] = np.union1d(binMins, binMaxs)
        else:
            counts = np.zeros(0, dtype=np.int)
            allBins = np.zeros(0, dtype=np.double)
            for binMin in bins[bin_order[0]][:-1]:
                binMins = np.unique(pars[np.where(
                    pars[columns[0]] == binMin)][columns[i + offset_col]])
                binMaxs = np.unique(pars[np.where(
                    pars[columns[0]] == binMin)][columns[i + offset_col + 1]])
                theBins = np.union1d(binMins, binMaxs)
                allBins = np.append(allBins, theBins)
                counts = np.append(counts, theBins.size)
            bins[layout[i + offset_name]] = awkward.JaggedArray.fromcounts(
                counts, allBins)
        bin_order.append(layout[i + offset_name])
        offset_col += 1

    # again this is only for one dimension of binning, fight me
    # we can figure out a 2D EA when we get there
    offset_name += 1
    wrapped_up = {}
    lookup_type = 'dense_lookup'
    dims = bins[layout[1]]
    for i in range(nEvalVars):
        ea_name = '_'.join([name, columns[offset_name + i]])
        values = pars[columns[offset_name + i]]
        wrapped_up[(ea_name, lookup_type)] = (values, dims)

    return wrapped_up
 def __call__(self, runs, lumis):
     mask_out = np.zeros(dtype='bool', shape=runs.shape)
     LumiMask.apply_run_lumi_mask(self._masks, runs, lumis, mask_out)
     return mask_out
 def clear(self):
     self.array = np.zeros(shape=(0, 2))
 def __init__(self, runs=None, lumis=None):
     self.array = np.zeros(shape=(0, 2))
     if runs is not None:
         self.array = np.unique(np.c_[runs, lumis], axis=0)
Beispiel #13
0
 def dense_op(array):
     anew = np.zeros(out._dense_shape, dtype=out._dtype)
     for iold, inew in enumerate(binmap):
         anew[view_ax(inew)] += array[view_ax(iold)]
     return anew
Beispiel #14
0
 def __call__(self, runs, lumis):
     mask = np.zeros(dtype='bool', shape=runs.shape)
     for run in np.unique(runs):
         if run in self._masks:
             mask |= (np.searchsorted(self._masks[run], lumis)%2==1) & (runs==run)
     return mask