def make_fractions_single(output_dir, channel, variable, components_dict, category='inclusive', systematic='nominal'): '''This funciton aims at producing a root file containing all histograms needed for datacards. To do so, provide a dict of components (class Component) that have the ROOT histograms to be used. Gives also the output directory and the variable name, so that you can make datacards for different variables.''' rootfilename = '_'.join(['htt', channel, 'for_FF_fractions']) rootfile = TFile('{}/{}.root'.format(output_dir, rootfilename), 'UPDATE') rootdirname = '_'.join([channels_names[channel], variable, category]) rootdir = rootfile.GetDirectory(rootdirname) if not rootdir: rootdir = TDirectoryFile(rootdirname, rootdirname) rootdir.cd() for key, component in components_dict.iteritems(): histname = '_'.join([key, systematic]) if systematic == 'nominal': histname = key else: histname = histname.replace('up', 'Up') histname = histname.replace('down', 'Down') if rootdir.Get(histname): continue hist = component.histogram.Clone(histname) if histname[:len("fakes_")] == "fakes_": hist.Scale(-1) hist.SetTitle(key) hist.Write() rootfile.Close()
def FiltFunc(args): with open(args.cfgFileName, 'r') as ymlCfgFile: inputCfg = yaml.load(ymlCfgFile, yaml.FullLoader) with open(args.cutSetFileNames[0], 'r') as ymlCutSetFile: cutSetCfg = yaml.load(ymlCutSetFile, yaml.FullLoader) cutVars = cutSetCfg['cutvars'] axesToKeep = cutSetCfg['axestokeep'] infilenames = inputCfg['filename'] if not isinstance(infilenames, list): infilenames = [infilenames] for iFile, infilename in enumerate(infilenames): sparseReco, sparseGen = LoadSparseFromTask(infilename, inputCfg) # filter sparses sparseFiltReco = FilterSparses(sparseReco, cutVars, axesToKeep) if inputCfg['isMC']: sparseFiltGen = FilterSparses(sparseGen, cutVars, axesToKeep) # plot filtered sparses (each variable vs pt) if args.plot: PlotFiltVarsVsPt(sparseFiltReco, 'cReco{0}'.format(iFile)) if inputCfg['isMC']: PlotFiltVarsVsPt(sparseFiltGen, 'cGen{0}'.format(iFile)) # get other objects from original file inlist = LoadListFromTask(infilename, inputCfg) _, normCounter = LoadNormObjFromTask(infilename, inputCfg) cutObj, cutObjName = LoadCutObjFromTask(infilename, inputCfg) for sparse in sparseFiltReco.values(): sparsetodel = inlist.FindObject(sparse.GetName()) inlist.Remove(sparsetodel) inlist.Add(sparse) if inputCfg['isMC']: for sparse in sparseFiltGen.values(): sparsetodel = inlist.FindObject(sparse.GetName()) inlist.Remove(sparsetodel) inlist.Add(sparse) # save new file with filtered sparses outfilename = infilename if args.suffix: outfilename = outfilename.replace( '.root', '{0}.root'.format(args.suffix)) print('Saving filtered ThnSparses in file', outfilename) outfile = TFile(outfilename, 'recreate') outdir = TDirectoryFile(inputCfg['dirname'], inputCfg['dirname']) outdir.Write(inputCfg['dirname']) outdir.cd() inlist.Write(inputCfg['listname'], 1) cutObj.Write(cutObjName) normCounter.Write() outfile.Close()
def FiltFuncV2(args): with open(args.cfgFileName, 'r') as ymlCfgFile: inputCfg = yaml.load(ymlCfgFile, yaml.FullLoader) cutSetsCfg = [] for cutFile in args.cutSetFileNames: with open(cutFile, 'r') as file: cutCfg = yaml.load(file, yaml.FullLoader) cutSetsCfg.append(cutCfg) if not len(cutSetsCfg) == len(inputCfg['dirname']) == len( inputCfg['listname']): print('Wrong number of cut sets, dir names or list names') return sparseList = LoadSparseFromTaskV2(inputCfg) sparseListFilt = [] for sparse, cutSet in zip(sparseList, cutSetsCfg): sparseFilt = FilterSparsesV2(sparse, cutSet['cutvars'], cutSet['axestokeep']) sparseListFilt.append(sparseFilt) for sparse in sparseListFilt[1:]: sparseListFilt[0].Add(sparse) # get other objects from original file inlist = LoadListFromTaskV2(inputCfg['filename'], inputCfg['dirname'][0], inputCfg['listname'][0]) sparsetodel = inlist.FindObject(sparseList[0].GetName()) inlist.Remove(sparsetodel) inlist.Add(sparseListFilt[0]) # save new file with filtered sparses outfilename = inputCfg['outfilename'] print('Saving filtered ThnSparses in file', outfilename) outfile = TFile(outfilename, 'recreate') outdir = TDirectoryFile(inputCfg['outdirname'], inputCfg['outdirname']) outdir.Write(inputCfg['outdirname']) outdir.cd() inlist.Write(inputCfg['outlistname'], 1) outfile.Close()
def make_datacards(output_dir, channel, variable, components_dict, category='inclusive', systematics=['nominal']): '''This funciton aims at producing a root file containing all histograms needed for datacards. To do so, provide a dict of components (class Component) that have the ROOT histograms to be used. Gives also the output directory and the variable name, so that you can make datacards for different variables.''' rootfilename = '_'.join(['htt', channel+'.inputs', 'datacards', variable]) rootfile = TFile('{}/{}.root'.format(output_dir, rootfilename), 'UPDATE') rootdirname = '_'.join([channels_names[channel], category]) rootdir = TDirectoryFile(rootdirname, rootdirname) rootdir.cd() for systematic in systematics: for key, component in components_dict[systematic].iteritems(): histname = '_'.join([key,systematic]) if systematic == 'nominal': histname = key elif histname in syst_rename_dir: histname = syst_rename_dir[histname] else: histname = histname.replace('up','Up') histname = histname.replace('down','Down') hist = component.histogram.Clone(histname) hist.SetMinimum(0.000001) hist.SetTitle(key) hist.Write() if systematic in syst_split_list: hist_list = [] for sys_type in types_dir[key]: if 'Down' in histname: new_histname = histname.replace('Down',sys_type+'Down') elif 'Up' in histname: new_histname = histname.replace('Up',sys_type+'Up') hist_list.append(component.histogram.Clone(new_histname)) hist_list[-1].SetTitle(key) hist_list[-1].Write() rootfile.Close() print('Datacards for category {} and variable {} made.'.format(category, variable))
hisnorm.Scale(1. / hisnorm.Integral()) hisnorm.GetYaxis().SetRangeUser(1.e-5, 5.) hisnorm.GetYaxis().SetTitle('Normalised entries') hisselnorm = hissel.Clone(hissel.GetName() + '_Norm') if hisselnorm.Integral() > 0: hisselnorm.Scale(1. / hisselnorm.Integral()) hisselnorm.GetYaxis().SetTitle('Normalised entries') cVarsNorm[f'Pt{ptmin:.0f}_{ptmax:.0f}'].cd(iVar + 1).SetLogy() hisnorm.DrawCopy('hist') hisnorm.DrawCopy('Esame') hisselnorm.DrawCopy('histsame') hisselnorm.DrawCopy('Esame') leg.Draw('same') dir1D.cd() cVars[f'Pt{ptmin:.0f}_{ptmax:.0f}'].Write() cVarsNorm[f'Pt{ptmin:.0f}_{ptmax:.0f}'].Write() hVars[f'Pt{ptmin:.0f}_{ptmax:.0f}'][topovars[axis]].Write() hVarsSel[f'Pt{ptmin:.0f}_{ptmax:.0f}'][topovars[axis]].Write() hisnorm.Write() hisselnorm.Write() hVarsVsPt[f'Pt{ptmin:.0f}_{ptmax:.0f}'][topovars[axis]].SetTitle( f'{ptmin} < #it{{p}}_{{T}} < {ptmax} GeV/#it{{c}};#it{{p}}_{{T}} (GeV/#it{{c}});{vartitles[axis]}' ) hVarsSelVsPt[f'Pt{ptmin:.0f}_{ptmax:.0f}'][topovars[axis]].SetTitle( f'{ptmin} < #it{{p}}_{{T}} < {ptmax} GeV/#it{{c}};#it{{p}}_{{T}} (GeV/#it{{c}});{vartitles[axis]}' ) cVarsVsPt[f'Pt{ptmin:.0f}_{ptmax:.0f}'].cd(iVar + 1)
padtopmargin=0.075, titleoffset=1., palette=kRainBow, titlesize=0.06, labelsize=0.055, maxdigits=4) cSignifVsRest, hSignifVsRest, cEstimVsCut, hEstimVsCut = [], [], [], [] counter = 0 for iPt, (ptMin, ptMax) in enumerate(zip(ptMins, ptMaxs)): # reshuffle bkg and take only a fraction of it, seed fixed for reproducibility dfBkgPt = dfBkg_tot.query( f'{ptMin} < pt_cand and pt_cand < {ptMax}').sample( frac=fractionstokeep[iPt], random_state=42).reset_index(drop=True) outDirFitSB.cd() outDirFitSBPt.append( TDirectoryFile(f'pT{ptMin}-{ptMax}', f'pT{ptMin}-{ptMax}')) outDirFitSBPt[iPt].Write() outDirPlots.cd() outDirPlotsPt.append( TDirectoryFile(f'pT{ptMin}-{ptMax}', f'pT{ptMin}-{ptMax}')) outDirPlotsPt[iPt].Write() dfPromptPt = dfPrompt.query(f'{ptMin} < pt_cand < {ptMax}') dfFDPt = dfFD.query(f'{ptMin} < pt_cand < {ptMax}') # Raa ptCent = (ptMax + ptMin) / 2. if isinstance(RaaPrompt_config, str): if ptMinRaaPrompt < ptCent < ptMaxRaaPrompt: RaaPrompt = RaaPromptSpline['yCent'](ptCent)
def main(args): print(args) channels = args.channels.split(',') categories = args.categories.split(',') nickname = os.path.basename(args.input).replace(".root", "") DNN_jsons = args.DNNs.split(',') DNN_jsons = [f for f in DNN_jsons if f != ""] models = {} inputs = [] model_number = 1 for DNN_json in DNN_jsons: DNN_object = DNN_model_from_json(DNN_json) models["DNN{}".format(model_number)] = DNN_object model_number += 1 inputs += DNN_object.inputs # load root file and create friend tree root_file_input = args.input output_path = os.path.join(args.output_dir, nickname) if not os.path.exists(output_path): os.makedirs(output_path) root_file_output = os.path.join( output_path, "_".join( filter( None, [ nickname, args.pipeline, str(args.first_entry), str(args.last_entry), ], )) + ".root", ) root_file_in = TFile.Open(root_file_input, 'read') if 'all' in channels: channels = set( [k.GetName().split('_')[0] for k in root_file_in.GetListOfKeys()]) if 'all' in categories: categories = set([ k.GetName().split('_')[-1] for k in root_file_in.GetListOfKeys() if any([c == k.GetName().split('_')[0] for c in channels]) ]) if not args.dry: root_file_out = TFile.Open(root_file_output, 'recreate') print("Opened new file") first_pass = True for channel in channels: for cat in categories: rootdirname = '{}_{}'.format(channel, cat) print(rootdirname) if rootdirname not in [ k.GetName() for k in root_file_in.GetListOfKeys() ]: continue if rootdirname != args.pipeline and args.pipeline != None: continue print('process pipeline: %s_%s' % (channel, cat)) if not first_pass and not args.dry: root_file_out = TFile.Open(root_file_output, 'update') first_pass = False if not args.dry: rootdir = TDirectoryFile(rootdirname, rootdirname) rootdir.cd() tree = TTree(args.tree, args.tree) leafValues = {} for model in models: leafValues[model] = array.array("f", [0]) tree_from_root_file_in = root_file_in.Get(rootdirname).Get( args.tree) if not args.dry: print("Filling new branch in tree...") for model in models: print(model) newBranch = tree.Branch(model, leafValues[model], "{}/F".format(model)) first_entry = args.first_entry last_entry = tree_from_root_file_in.GetEntries() if args.last_entry >= first_entry and args.last_entry < last_entry: last_entry = args.last_entry k = 0 for evt in tree_from_root_file_in: if k >= first_entry and k <= last_entry: for model in models: leafValues[model][0] = models[model].predict( evt, channel) tree.Fill() elif k > last_entry: break k += 1 print("Filled.") if not args.dry: tree.Write(args.tree, kOverwrite) root_file_out.Close() print("Done")
hRawYieldsMeanSecPeak.Write() hRawYieldsSigmaSecPeak.Write() hRawYieldsSignificanceSecPeak.Write() hRawYieldsSigmaRatioSecondFirstPeak.Write() hRawYieldsSoverBSecPeak.Write() hRawYieldsSignalSecPeak.Write() hRawYieldsBkgSecPeak.Write() hRawYieldsTrue.Write() hRawYieldsSecPeakTrue.Write() hRelDiffRawYieldsFitTrue.Write() hRelDiffRawYieldsSecPeakFitTrue.Write() hEv.Write() if not args.isMC: dirSB = TDirectoryFile('SandBDiffNsigma', 'SandBDiffNsigma') dirSB.Write() dirSB.cd() for iS, _ in enumerate(nSigma4SandB): hRawYieldsSignalDiffSigma[iS].Write() hRawYieldsBkgDiffSigma[iS].Write() hRawYieldsSoverBDiffSigma[iS].Write() hRawYieldsSignifDiffSigma[iS].Write() dirSB.Close() outFile.Close() outFileNamePDF = args.outFileName.replace('.root', '.pdf') outFileNameResPDF = outFileNamePDF.replace('.pdf', '_Residuals.pdf') for iCanv, (cM, cR) in enumerate(zip(cMass, cResiduals)): if iCanv == 0 and nCanvases > 1: cM.SaveAs(f'{outFileNamePDF}[') cM.SaveAs(outFileNamePDF) if iCanv == nCanvases - 1 and nCanvases > 1:
def main(args): print(args) channels = args.channels.split(',') categories = args.categories.split(',') nickname = os.path.basename(args.input).replace(".root", "") XGB_jsons = args.XGBs.split(',') XGB_jsons = [f for f in XGB_jsons if f != ""] models = {} inputs = [] for XGB_json in XGB_jsons: XGB_object = XGB_model_from_json(XGB_json) models[XGB_object.name] = XGB_object inputs += XGB_object.inputs # load root file and create friend tree root_file_input = args.input output_path = os.path.join(args.output_dir, nickname) if not os.path.exists(output_path): os.makedirs(output_path) root_file_output = os.path.join( output_path, "_".join( filter( None, [ nickname, args.pipeline, str(args.first_entry), str(args.last_entry), ], )) + ".root", ) root_file_in = uproot.open(root_file_input) if 'all' in channels: channels = set([k.split('_')[0] for k in root_file_in.keys()]) if 'all' in categories: categories = set([ k.split('_')[-1].split(';')[0] for k in root_file_in.keys() if any([c in k for c in channels]) ]) if not args.dry: root_file_old = None if not args.recreate: os.system( "if [[ -e {root_file_output} ]] ; then mv {root_file_output} {root_file_output}_to_update ; fi" .format(root_file_output=root_file_output)) root_file_old = TFile("{}_to_update".format(root_file_output), 'read') root_file_out = TFile(root_file_output, 'recreate') print("Opened new file") first_pass = True for channel in channels: for cat in categories: rootdirname = '{}_{}'.format(channel, cat) if rootdirname not in root_file_in.keys() and "{};1".format( rootdirname) not in root_file_in.keys(): continue if rootdirname != args.pipeline and args.pipeline != None: continue print('process pipeline: %s_%s' % (channel, cat)) if not first_pass and not args.dry: root_file_out = TFile(root_file_output, 'update') first_pass = False if not args.dry: rootdir_old = False if root_file_old: rootdir_old = root_file_old.GetDirectory(rootdirname) if not rootdir_old: already_rootdir = False else: already_rootdir = True rootdir = TDirectoryFile(rootdirname, rootdirname) rootdir.cd() tree_old = False if already_rootdir: if not args.recreate: tree_old = rootdir_old.Get(args.tree) tree = TTree(args.tree, args.tree) old_models = [] if tree_old: old_models = [ model.GetName() for model in [tree_old.GetListOfBranches()][0] ] if len(old_models) > 0: root_file_out_old = uproot.open( "{}_to_update".format(root_file_output)) models = {i: models[i] for i in models if i not in old_models} all_models = old_models + [k for k in models] leafValues = {} for model in all_models: leafValues[model] = array.array("f", [0]) if args.pandas: df = root_file_in[rootdirname][args.tree].pandas.df() if tree_old: df_old = root_file_out_old[rootdirname][ args.tree].pandas.df() else: _df = root_file_in[rootdirname][args.tree].arrays() df = pandas.DataFrame() keys_to_export = set(inputs + ["pt_1", "pt_2", "phi_1", "phi_2"]) for key in ["N_neutrinos_reco", "mt_tt"]: if key in keys_to_export: keys_to_export.remove(key) for k in keys_to_export: df[k] = _df[str.encode(k)] if tree_old: _df_old = root_file_out_old[rootdirname][ args.tree].arrays() df_old = pandas.DataFrame() keys_to_export = old_models for k in keys_to_export: df_old[k] = _df_old[str.encode(k)] df["mt_tt"] = (2 * df["pt_1"] * df["pt_2"] * (1 - np.cos(df["phi_1"] - df["phi_2"])))**.5 df["N_neutrinos_reco"] = N_neutrinos_in_channel[channel] * np.ones( len(df[inputs[0]]), dtype='int') # remove values set at -10 by default to match training settings for variable in ["jpt_r", "jeta_r", "jphi_r", "Njet_r"]: if variable in inputs: df[variable].values[df[variable].values < 0] = 0 for model in models: print("Predicting with {}...".format(model)) df[model] = models[model].predict(df) if not args.dry: print("Filling new branch in tree...") for model in all_models: newBranch = tree.Branch(model, leafValues[model], "prediction/F") first_entry = args.first_entry last_entry = len(df[model].values) if args.last_entry > first_entry and args.last_entry < len( df[model].values): last_entry = args.last_entry for k in range(first_entry, last_entry + 1): for model in all_models: if model in old_models: leafValues[model][0] = df_old[model].values[k] else: leafValues[model][0] = df[model].values[k] tree.Fill() print("Filled.") rootdir.Remove(rootdir.Get(args.tree)) tree.Write(args.tree, kOverwrite) root_file_out.Close() os.system("rm -rf {}_to_update".format(root_file_output))
hSgnMin[-1].SetDirectory(0) hSgnMax[-1].SetDirectory(0) hSignif[-1].SetDirectory(0) hSignifMin[-1].SetDirectory(0) hSignifMax[-1].SetDirectory(0) hSoverB[-1].SetDirectory(0) hSoverBMin[-1].SetDirectory(0) hSoverBMax[-1].SetDirectory(0) hEff[-1].SetDirectory(0) hAcc[-1].SetDirectory(0) hEffAcc[-1].SetDirectory(0) outFile.cd() outDir = TDirectoryFile(meson, meson) outDir.Write() outDir.cd() hSgn[-1].Write() hSgnMin[-1].Write() hSgnMax[-1].Write() for iPt, _ in enumerate(hMassSignal): hMassSignal[iPt].Write() hMassSignalMin[iPt].Write() hMassSignalMax[iPt].Write() hBkg[-1].Write() for iPt, _ in enumerate(hMassSignal): hMassBkg[iPt].Write() hMassBkgMin[iPt].Write() hMassBkgMax[iPt].Write() hSignif[-1].Write() hSignifMin[-1].Write() hSignifMax[-1].Write()