def convert_btag_csv_file(csvFilePath): btag_f = open(csvFilePath) nameandcols = btag_f.readline().split(';') btag_f.close() name = nameandcols[0].strip() columns = nameandcols[1].strip() columns = [column.strip() for column in columns.split(',')] corrections = np.genfromtxt(csvFilePath, dtype=None, names=tuple(columns), converters={1: lambda s: s.strip(), 2: lambda s: s.strip(), 10: lambda s: s.strip(' "')}, delimiter=',', skip_header=1, unpack=True, encoding='ascii' ) all_names = corrections[[columns[i] for i in range(4)]] labels = np.unique(corrections[[columns[i] for i in range(4)]]) wrapped_up = {} for label in labels: etaMins = np.unique(corrections[np.where(all_names == label)][columns[4]]) etaMaxs = np.unique(corrections[np.where(all_names == label)][columns[5]]) etaBins = np.union1d(etaMins, etaMaxs) ptMins = np.unique(corrections[np.where(all_names == label)][columns[6]]) ptMaxs = np.unique(corrections[np.where(all_names == label)][columns[7]]) ptBins = np.union1d(ptMins, ptMaxs) discrMins = np.unique(corrections[np.where(all_names == label)][columns[8]]) discrMaxs = np.unique(corrections[np.where(all_names == label)][columns[9]]) discrBins = np.union1d(discrMins, discrMaxs) vals = np.zeros(shape=(len(discrBins) - 1, len(ptBins) - 1, len(etaBins) - 1), dtype=corrections.dtype[10]) for i, eta_bin in enumerate(etaBins[:-1]): for j, pt_bin in enumerate(ptBins[:-1]): for k, discr_bin in enumerate(discrBins[:-1]): this_bin = np.where((all_names == label) & (corrections[columns[4]] == eta_bin) & (corrections[columns[6]] == pt_bin) & (corrections[columns[8]] == discr_bin)) vals[k, j, i] = corrections[this_bin][columns[10]][0] label_decode = [] for i in range(len(label)): label_decode.append(label[i]) if isinstance(label_decode[i], bytes): label_decode[i] = label_decode[i].decode() else: label_decode[i] = str(label_decode[i]) str_label = '_'.join([name] + label_decode) feval_dim = btag_feval_dims[label[0]] wrapped_up[(str_label, 'dense_evaluated_lookup')] = (vals, (etaBins, ptBins, discrBins), tuple(feval_dim)) return wrapped_up
def _parse_jme_formatted_file(jmeFilePath, interpolatedFunc=False, parmsFromColumns=False): jme_f = open(jmeFilePath, 'r') layoutstr = jme_f.readline().strip().strip('{}') jme_f.close() name = jmeFilePath.split('/')[-1].split('.')[0] layout = layoutstr.split() if not layout[0].isdigit(): raise Exception('First column of JME File Header must be a digit!') #setup the file format nBinnedVars = int(layout[0]) nBinColumns = 2 * nBinnedVars nEvalVars = int(layout[nBinnedVars + 1]) formula = layout[nBinnedVars + nEvalVars + 2] nParms = 0 while (formula.count('[%i]' % nParms)): formula = formula.replace('[%i]' % nParms, 'p%i' % nParms) nParms += 1 #get rid of TMath tmath = {'TMath::Max': 'max', 'TMath::Log': 'log', 'TMath::Power': 'pow'} for key, rpl in tmath.items(): formula = formula.replace(key, rpl) #protect function names with vars in them funcs_to_cap = ['max', 'exp', 'pow'] for f in funcs_to_cap: formula = formula.replace(f, f.upper()) templatevars = ['x', 'y', 'z', 'w', 't', 's'] varnames = [layout[i + nBinnedVars + 2] for i in range(nEvalVars)] for find, replace in zip(templatevars, varnames): formula = formula.replace(find, replace) #restore max for f in funcs_to_cap: formula = formula.replace(f.upper(), f) nFuncColumns = 2 * nEvalVars + nParms nTotColumns = nFuncColumns + 1 #parse the columns minMax = ['Min', 'Max'] columns = [] dtypes = [] offset = 1 for i in range(nBinnedVars): columns.extend(['%s%s' % (layout[i + offset], mm) for mm in minMax]) dtypes.extend(['<f8', '<f8']) columns.append('NVars') dtypes.append('<i8') offset += nBinnedVars + 1 if not interpolatedFunc: for i in range(nEvalVars): columns.extend( ['%s%s' % (layout[i + offset], mm) for mm in minMax]) dtypes.extend(['<f8', '<f8']) for i in range(nParms): columns.append('p%i' % i) dtypes.append('<f8') pars = np.genfromtxt(jmeFilePath, dtype=tuple(dtypes), names=tuple(columns), skip_header=1, unpack=True, encoding='ascii') if parmsFromColumns: nParms = pars[columns[nBinnedVars + 1]][0] for i in range(nParms): columns.append('p%i' % i) dtypes.append('<f8') pars = np.genfromtxt(jmeFilePath, dtype=tuple(dtypes), names=tuple(columns), skip_header=1, unpack=True, encoding='ascii') outs = [ name, layout, pars, nBinnedVars, nBinColumns, nEvalVars, formula, nParms, columns, dtypes ] return tuple(outs)
def convert_effective_area_file(eaFilePath): ea_f = open(eaFilePath, 'r') layoutstr = ea_f.readline().strip().strip('{}') ea_f.close() name = eaFilePath.split('/')[-1].split('.')[0] layout = layoutstr.split() if not layout[0].isdigit(): raise Exception( 'First column of Effective Area File Header must be a digit!') #setup the file format nBinnedVars = int(layout[0]) nBinColumns = 2 * nBinnedVars nEvalVars = int(layout[nBinnedVars + 1]) minMax = ['Min', 'Max'] columns = [] dtypes = [] offset = 1 for i in range(nBinnedVars): columns.extend(['%s%s' % (layout[i + offset], mm) for mm in minMax]) dtypes.extend(['<f8', '<f8']) offset += nBinnedVars + 1 for i in range(nEvalVars): columns.append('%s' % (layout[i + offset])) dtypes.append('<f8') pars = np.genfromtxt(eaFilePath, dtype=tuple(dtypes), names=tuple(columns), skip_header=1, unpack=True, encoding='ascii') bins = {} offset_col = 0 offset_name = 1 bin_order = [] for i in range(nBinnedVars): binMins = None binMaxs = None if i == 0: binMins = np.unique(pars[columns[0]]) binMaxs = np.unique(pars[columns[1]]) bins[layout[i + offset_name]] = np.union1d(binMins, binMaxs) else: counts = np.zeros(0, dtype=np.int) allBins = np.zeros(0, dtype=np.double) for binMin in bins[bin_order[0]][:-1]: binMins = np.unique(pars[np.where( pars[columns[0]] == binMin)][columns[i + offset_col]]) binMaxs = np.unique(pars[np.where( pars[columns[0]] == binMin)][columns[i + offset_col + 1]]) theBins = np.union1d(binMins, binMaxs) allBins = np.append(allBins, theBins) counts = np.append(counts, theBins.size) bins[layout[i + offset_name]] = awkward.JaggedArray.fromcounts( counts, allBins) bin_order.append(layout[i + offset_name]) offset_col += 1 # again this is only for one dimension of binning, fight me # we can figure out a 2D EA when we get there offset_name += 1 wrapped_up = {} lookup_type = 'dense_lookup' dims = bins[layout[1]] for i in range(nEvalVars): ea_name = '_'.join([name, columns[offset_name + i]]) values = pars[columns[offset_name + i]] wrapped_up[(ea_name, lookup_type)] = (values, dims) return wrapped_up