def convert_histo_json_file(filename): file = open(filename) info = json.load(file) file.close() names_and_orders = {} names_and_axes = {} names_and_binvalues = {} names_and_valnames = {} #first pass, convert info['dir']['hist_title'] to dir/hist_title #and un-nest everything from the json structure, make binnings, etc. for dir in info.keys(): for htitle in info[dir].keys(): axis_order = [] #keep the axis order axes = {} bins_and_values = {} val_names = set() extract_json_histo_structure(info[dir][htitle], axis_order, axes) extract_json_histo_values(info[dir][htitle], [], bins_and_values, val_names) histname = '%s/%s' % (dir, htitle) names_and_axes[histname] = axes names_and_orders[histname] = axis_order names_and_binvalues[histname] = bins_and_values names_and_valnames[histname] = val_names wrapped_up = {} for name, axes in names_and_axes.items(): theshape = tuple( [axes[axis].size - 1 for axis in names_and_orders[name]]) valsdict = {} for vname in names_and_valnames[histname]: valsdict[vname] = np.zeros(shape=theshape).flatten() flatidx = np.arange(np.zeros(shape=theshape).size) binidx = np.unravel_index(flatidx, dims=theshape) for vname in valsdict: for iflat in flatidx: binlows = [] for idim, axis in enumerate(names_and_orders[name]): binlows.append(axes[axis][binidx[idim][iflat]]) thevals = names_and_binvalues[name][tuple(binlows)] valsdict[vname][iflat] = thevals[vname] valsdict[vname] = valsdict[vname].reshape(theshape) bins_in_order = [] for axis in names_and_orders[name]: bins_in_order.append(axes[axis]) for vname in valsdict: wrapped_up[(name + '_' + vname, 'dense_lookup')] = (valsdict[vname].T, tuple(bins_in_order)) return wrapped_up
def get_lumi(self, runlumis): """ Return integrated lumi runlumis: 2d numpy array of [[run,lumi], [run,lumi], ...] or LumiList object """ if isinstance(runlumis, LumiList): runlumis = runlumis.array tot_lumi = np.zeros((1, ), dtype=np.float64) LumiData.get_lumi_kernel(runlumis[:, 0], runlumis[:, 1], self.index, tot_lumi) return tot_lumi[0]
def test_lumidata(): lumidata = LumiData("tests/samples/lumi_small.csv") runslumis = np.zeros((10, 2), dtype=np.uint32) runslumis[:, 0] = lumidata._lumidata[0:10, 0] runslumis[:, 1] = lumidata._lumidata[0:10, 1] l = lumidata.get_lumi(runslumis) diff = abs(l - 1.539941814) print("lumi:", l, "diff:", diff) assert (diff < 0.1)
def convert_btag_csv_file(csvFilePath): btag_f = open(csvFilePath) nameandcols = btag_f.readline().split(';') btag_f.close() name = nameandcols[0].strip() columns = nameandcols[1].strip() columns = [column.strip() for column in columns.split(',')] corrections = np.genfromtxt(csvFilePath, dtype=None, names=tuple(columns), converters={1: lambda s: s.strip(), 2: lambda s: s.strip(), 10: lambda s: s.strip(' "')}, delimiter=',', skip_header=1, unpack=True, encoding='ascii' ) all_names = corrections[[columns[i] for i in range(4)]] labels = np.unique(corrections[[columns[i] for i in range(4)]]) wrapped_up = {} for label in labels: etaMins = np.unique(corrections[np.where(all_names == label)][columns[4]]) etaMaxs = np.unique(corrections[np.where(all_names == label)][columns[5]]) etaBins = np.union1d(etaMins, etaMaxs) ptMins = np.unique(corrections[np.where(all_names == label)][columns[6]]) ptMaxs = np.unique(corrections[np.where(all_names == label)][columns[7]]) ptBins = np.union1d(ptMins, ptMaxs) discrMins = np.unique(corrections[np.where(all_names == label)][columns[8]]) discrMaxs = np.unique(corrections[np.where(all_names == label)][columns[9]]) discrBins = np.union1d(discrMins, discrMaxs) vals = np.zeros(shape=(len(discrBins) - 1, len(ptBins) - 1, len(etaBins) - 1), dtype=corrections.dtype[10]) for i, eta_bin in enumerate(etaBins[:-1]): for j, pt_bin in enumerate(ptBins[:-1]): for k, discr_bin in enumerate(discrBins[:-1]): this_bin = np.where((all_names == label) & (corrections[columns[4]] == eta_bin) & (corrections[columns[6]] == pt_bin) & (corrections[columns[8]] == discr_bin)) vals[k, j, i] = corrections[this_bin][columns[10]][0] label_decode = [] for i in range(len(label)): label_decode.append(label[i]) if isinstance(label_decode[i], bytes): label_decode[i] = label_decode[i].decode() else: label_decode[i] = str(label_decode[i]) str_label = '_'.join([name] + label_decode) feval_dim = btag_feval_dims[label[0]] wrapped_up[(str_label, 'dense_evaluated_lookup')] = (vals, (etaBins, ptBins, discrBins), tuple(feval_dim)) return wrapped_up
def parseGeneratorHistory(gp_pdgId_in,gp_parent_in): inChain = activePdgIds #index manipulation offsets = gp_pdgId_in.offsets parents = gp_pdgId_in.parents pstarts = offsets[parents].astype('i4') gp_pdgId = gp_pdgId_in.content gp_ancestor = gp_parent_in.content + pstarts gp_ancestor_valid = (gp_parent_in.content >= 0) #create parentage bitmaps gp_pdgId_mapped = np.zeros(shape=gp_pdgId.shape, dtype='u4') for pdgId, bit in inChain.items(): if abs(pdgId) == 24: gp_pdgId_mapped[gp_pdgId==pdgId] = bit else: gp_pdgId_mapped[np.abs(gp_pdgId)==pdgId] = bit gp_proc = np.zeros(shape=gp_pdgId.shape, dtype='u4') pdg_tmp = np.empty_like(gp_pdgId_mapped) parent_tmp = np.empty_like(gp_ancestor) niter = 0 while np.any(gp_ancestor_valid) and niter < 50: np.take(gp_pdgId_mapped, gp_ancestor, out=pdg_tmp, mode='clip') np.take(gp_parent_in.content, gp_ancestor, out=parent_tmp, mode='clip') np.bitwise_or(gp_proc, pdg_tmp, where=gp_ancestor_valid, out=gp_proc) np.bitwise_and(gp_ancestor_valid, parent_tmp>=0, where=gp_ancestor_valid, out=gp_ancestor_valid) np.add(parent_tmp, pstarts, out=gp_ancestor) niter += 1 #print 'Parsed ancestor tree in %d iterations'%niter if niter == 50 and np.any(gp_ancestor_valid): raise Exception('reached 50 iterations, gen particles not trustable') return awkward.JaggedArray.fromoffsets(offsets, gp_proc)
def fill(self, **values): if not all(d.name in values for d in self._axes): raise ValueError("Not all axes specified for this histogram!") if "weight" in values and self._sumw2 is None: self._init_sumw2() sparse_key = tuple(d.index(values[d.name]) for d in self.sparse_axes()) if sparse_key not in self._sumw: self._sumw[sparse_key] = np.zeros(shape=self._dense_shape, dtype=self._dtype) if self._sumw2 is not None: self._sumw2[sparse_key] = np.zeros(shape=self._dense_shape, dtype=self._dtype) if self.dense_dim() > 0: dense_indices = tuple( d.index(values[d.name]) for d in self._axes if isinstance(d, DenseAxis)) if "weight" in values: np.add.at(self._sumw[sparse_key], dense_indices, values["weight"]) np.add.at(self._sumw2[sparse_key], dense_indices, values["weight"]**2) else: np.add.at(self._sumw[sparse_key], dense_indices, 1.) if self._sumw2 is not None: np.add.at(self._sumw2[sparse_key], dense_indices, 1.) else: if "weight" in values: self._sumw[sparse_key] += np.sum(values["weight"]) self._sumw2[sparse_key] += np.sum(values["weight"]**2) else: self._sumw[sparse_key] += 1. if self._sumw2 is not None: self._sumw2[sparse_key] += 1.
def __init__(self, values, dims, feval_dim=None): super(dense_evaluated_lookup, self).__init__() self._dimension = 0 whattype = type(dims) if whattype == np.ndarray: self._dimension = 1 else: self._dimension = len(dims) if self._dimension == 0: raise Exception( 'Could not define dimension for {}'.format(whattype)) self._axes = deepcopy(dims) self._feval_dim = None vals_are_strings = ('string' in values.dtype.name or 'str' in values.dtype.name or 'unicode' in values.dtype.name or 'bytes' in values.dtype.name) #.... if not isinstance(values, np.ndarray): raise TypeError('values is not a numpy array, but %r' % type(values)) if not vals_are_strings: raise Exception( 'Non-string values passed to dense_evaluated_lookup!') if feval_dim is None: raise Exception( 'Evaluation dimensions not specified in dense_evaluated_lookup' ) funcs = np.zeros(shape=values.shape, dtype='O') for i in range(values.size): idx = np.unravel_index(i, dims=values.shape) funcs[idx] = numbaize(values[idx], ['x']) self._values = deepcopy(funcs) # TODO: support for multidimensional functions and functions with variables other than 'x' if len(feval_dim) > 1: raise Exception( 'lookup_tools.evaluator only accepts 1D functions right now!') self._feval_dim = feval_dim[0]
def _build_standard_jme_lookup(name, layout, pars, nBinnedVars, nBinColumns, nEvalVars, formula, nParms, columns, dtypes, interpolatedFunc=False): #the first bin is always usual for JECs #the next bins may vary in number, so they're jagged arrays... yay bins = {} offset_col = 0 offset_name = 1 bin_order = [] for i in range(nBinnedVars): binMins = None binMaxs = None if i == 0: binMins = np.unique(pars[columns[0]]) binMaxs = np.unique(pars[columns[1]]) bins[layout[i + offset_name]] = np.union1d(binMins, binMaxs) else: counts = np.zeros(0, dtype=np.int) allBins = np.zeros(0, dtype=np.double) for binMin in bins[bin_order[0]][:-1]: binMins = np.unique(pars[np.where( pars[columns[0]] == binMin)][columns[i + offset_col]]) binMaxs = np.unique(pars[np.where( pars[columns[0]] == binMin)][columns[i + offset_col + 1]]) theBins = np.union1d(binMins, binMaxs) allBins = np.append(allBins, theBins) counts = np.append(counts, theBins.size) bins[layout[i + offset_name]] = awkward.JaggedArray.fromcounts( counts, allBins) bin_order.append(layout[i + offset_name]) offset_col += 1 #skip nvars to the variable columns #the columns here define clamps for the variables defined in columns[] # ----> clamps can be different from bins # ----> if there is more than one binning variable this array is jagged # ----> just make it jagged all the time binshapes = tuple([bins[thebin].size - 1 for thebin in bin_order]) clamp_mins = {} clamp_maxs = {} var_order = [] offset_col = 2 * nBinnedVars + 1 offset_name = nBinnedVars + 2 jagged_counts = np.ones(bins[bin_order[0]].size - 1, dtype=np.int) if len(bin_order) > 1: jagged_counts = np.maximum( bins[bin_order[1]].counts - 1, 0) #need counts-1 since we only care about Nbins for i in range(nEvalVars): var_order.append(layout[i + offset_name]) if not interpolatedFunc: clamp_mins[layout[i + offset_name]] = awkward.JaggedArray.fromcounts( jagged_counts, np.atleast_1d(pars[columns[i + offset_col]])) clamp_maxs[layout[i + offset_name]] = awkward.JaggedArray.fromcounts( jagged_counts, np.atleast_1d(pars[columns[i + offset_col + 1]])) offset_col += 1 #now get the parameters, which we will look up with the clamped values parms = [] parm_order = [] offset_col = 2 * nBinnedVars + 1 + (interpolatedFunc == False) * 2 * nEvalVars for i in range(nParms): parms.append( awkward.JaggedArray.fromcounts(jagged_counts, pars[columns[i + offset_col]])) parm_order.append('p%i' % (i)) wrapped_up = {} wrapped_up[(name, 'jme_standard_function')] = (formula, (bins, bin_order), (clamp_mins, clamp_maxs, var_order), (parms, parm_order)) return wrapped_up
def convert_effective_area_file(eaFilePath): ea_f = open(eaFilePath, 'r') layoutstr = ea_f.readline().strip().strip('{}') ea_f.close() name = eaFilePath.split('/')[-1].split('.')[0] layout = layoutstr.split() if not layout[0].isdigit(): raise Exception( 'First column of Effective Area File Header must be a digit!') #setup the file format nBinnedVars = int(layout[0]) nBinColumns = 2 * nBinnedVars nEvalVars = int(layout[nBinnedVars + 1]) minMax = ['Min', 'Max'] columns = [] dtypes = [] offset = 1 for i in range(nBinnedVars): columns.extend(['%s%s' % (layout[i + offset], mm) for mm in minMax]) dtypes.extend(['<f8', '<f8']) offset += nBinnedVars + 1 for i in range(nEvalVars): columns.append('%s' % (layout[i + offset])) dtypes.append('<f8') pars = np.genfromtxt(eaFilePath, dtype=tuple(dtypes), names=tuple(columns), skip_header=1, unpack=True, encoding='ascii') bins = {} offset_col = 0 offset_name = 1 bin_order = [] for i in range(nBinnedVars): binMins = None binMaxs = None if i == 0: binMins = np.unique(pars[columns[0]]) binMaxs = np.unique(pars[columns[1]]) bins[layout[i + offset_name]] = np.union1d(binMins, binMaxs) else: counts = np.zeros(0, dtype=np.int) allBins = np.zeros(0, dtype=np.double) for binMin in bins[bin_order[0]][:-1]: binMins = np.unique(pars[np.where( pars[columns[0]] == binMin)][columns[i + offset_col]]) binMaxs = np.unique(pars[np.where( pars[columns[0]] == binMin)][columns[i + offset_col + 1]]) theBins = np.union1d(binMins, binMaxs) allBins = np.append(allBins, theBins) counts = np.append(counts, theBins.size) bins[layout[i + offset_name]] = awkward.JaggedArray.fromcounts( counts, allBins) bin_order.append(layout[i + offset_name]) offset_col += 1 # again this is only for one dimension of binning, fight me # we can figure out a 2D EA when we get there offset_name += 1 wrapped_up = {} lookup_type = 'dense_lookup' dims = bins[layout[1]] for i in range(nEvalVars): ea_name = '_'.join([name, columns[offset_name + i]]) values = pars[columns[offset_name + i]] wrapped_up[(ea_name, lookup_type)] = (values, dims) return wrapped_up
def __call__(self, runs, lumis): mask_out = np.zeros(dtype='bool', shape=runs.shape) LumiMask.apply_run_lumi_mask(self._masks, runs, lumis, mask_out) return mask_out
def clear(self): self.array = np.zeros(shape=(0, 2))
def __init__(self, runs=None, lumis=None): self.array = np.zeros(shape=(0, 2)) if runs is not None: self.array = np.unique(np.c_[runs, lumis], axis=0)
def dense_op(array): anew = np.zeros(out._dense_shape, dtype=out._dtype) for iold, inew in enumerate(binmap): anew[view_ax(inew)] += array[view_ax(iold)] return anew
def __call__(self, runs, lumis): mask = np.zeros(dtype='bool', shape=runs.shape) for run in np.unique(runs): if run in self._masks: mask |= (np.searchsorted(self._masks[run], lumis)%2==1) & (runs==run) return mask