def make_fractions_single(output_dir, channel, variable, components_dict, category='inclusive', systematic='nominal'): '''This funciton aims at producing a root file containing all histograms needed for datacards. To do so, provide a dict of components (class Component) that have the ROOT histograms to be used. Gives also the output directory and the variable name, so that you can make datacards for different variables.''' rootfilename = '_'.join(['htt', channel, 'for_FF_fractions']) rootfile = TFile('{}/{}.root'.format(output_dir, rootfilename), 'UPDATE') rootdirname = '_'.join([channels_names[channel], variable, category]) rootdir = rootfile.GetDirectory(rootdirname) if not rootdir: rootdir = TDirectoryFile(rootdirname, rootdirname) rootdir.cd() for key, component in components_dict.iteritems(): histname = '_'.join([key, systematic]) if systematic == 'nominal': histname = key else: histname = histname.replace('up', 'Up') histname = histname.replace('down', 'Down') if rootdir.Get(histname): continue hist = component.histogram.Clone(histname) if histname[:len("fakes_")] == "fakes_": hist.Scale(-1) hist.SetTitle(key) hist.Write() rootfile.Close()
def make_datacards_singlecat(output_dir, channel, variable, components_dict, category='inclusive', systematics=['nominal']): '''This funciton aims at producing a root file containing all histograms needed for datacards. To do so, provide a dict of components (class Component) that have the ROOT histograms to be used. Gives also the output directory and the variable name, so that you can make datacards for different variables.''' rootfilename = '_'.join( ['htt', channel + '.inputs', 'datacards', variable]) rootfile = TFile('{}/{}.root'.format(output_dir, rootfilename), 'UPDATE') rootdirname = '_'.join([channels_names[channel], category]) rootdir = rootfile.GetDirectory(rootdirname) if not rootdir: rootdir = TDirectoryFile(rootdirname, rootdirname) rootdir.cd() for systematic in systematics: for key, component in components_dict[systematic].iteritems(): histname = '_'.join([key, systematic]) if systematic == 'nominal': histname = key elif histname in syst_rename_dir: histname = syst_rename_dir[histname] else: histname = histname.replace('up', 'Up') histname = histname.replace('down', 'Down') if rootdir.Get(histname): continue hist = component.histogram.Clone(histname) if 'jetFakes' in histname and 'ff_' in systematic: hist.Scale( components_dict['nominal']['jetFakes'].histogram.Integral( 0, hist.GetNbinsX() + 1) / hist.Integral(0, hist.GetNbinsX() + 1)) hist.SetMinimum(0.0) hist.SetBinContent(0, 0) hist.SetBinContent(hist.GetNbinsX() + 1, 0) hist.SetTitle(key) #if not ((systematic!='nominal') and ('jetFakes' in histname)): hist.Write() if systematic in syst_split_list: hist_list = [] for sys_type in types_dir[key]: if 'Down' in histname: new_histname = histname.replace( 'Down', sys_type + 'Down') elif 'Up' in histname: new_histname = histname.replace('Up', sys_type + 'Up') hist_list.append(component.histogram.Clone(new_histname)) hist_list[-1].SetMinimum(0.0) hist_list[-1].SetBinContent(0, 0) hist_list[-1].SetBinContent(hist.GetNbinsX() + 1, 0) hist_list[-1].SetTitle(key) hist_list[-1].Write() rootfile.Close() print('Datacards for category {} and variable {} made.'.format( category, variable))
def main(args): print(args) channels = args.channels.split(',') categories = args.categories.split(',') nickname = os.path.basename(args.input).replace(".root", "") XGB_jsons = args.XGBs.split(',') XGB_jsons = [f for f in XGB_jsons if f != ""] models = {} inputs = [] for XGB_json in XGB_jsons: XGB_object = XGB_model_from_json(XGB_json) models[XGB_object.name] = XGB_object inputs += XGB_object.inputs # load root file and create friend tree root_file_input = args.input output_path = os.path.join(args.output_dir, nickname) if not os.path.exists(output_path): os.makedirs(output_path) root_file_output = os.path.join( output_path, "_".join( filter( None, [ nickname, args.pipeline, str(args.first_entry), str(args.last_entry), ], )) + ".root", ) root_file_in = uproot.open(root_file_input) if 'all' in channels: channels = set([k.split('_')[0] for k in root_file_in.keys()]) if 'all' in categories: categories = set([ k.split('_')[-1].split(';')[0] for k in root_file_in.keys() if any([c in k for c in channels]) ]) if not args.dry: root_file_old = None if not args.recreate: os.system( "if [[ -e {root_file_output} ]] ; then mv {root_file_output} {root_file_output}_to_update ; fi" .format(root_file_output=root_file_output)) root_file_old = TFile("{}_to_update".format(root_file_output), 'read') root_file_out = TFile(root_file_output, 'recreate') print("Opened new file") first_pass = True for channel in channels: for cat in categories: rootdirname = '{}_{}'.format(channel, cat) if rootdirname not in root_file_in.keys() and "{};1".format( rootdirname) not in root_file_in.keys(): continue if rootdirname != args.pipeline and args.pipeline != None: continue print('process pipeline: %s_%s' % (channel, cat)) if not first_pass and not args.dry: root_file_out = TFile(root_file_output, 'update') first_pass = False if not args.dry: rootdir_old = False if root_file_old: rootdir_old = root_file_old.GetDirectory(rootdirname) if not rootdir_old: already_rootdir = False else: already_rootdir = True rootdir = TDirectoryFile(rootdirname, rootdirname) rootdir.cd() tree_old = False if already_rootdir: if not args.recreate: tree_old = rootdir_old.Get(args.tree) tree = TTree(args.tree, args.tree) old_models = [] if tree_old: old_models = [ model.GetName() for model in [tree_old.GetListOfBranches()][0] ] if len(old_models) > 0: root_file_out_old = uproot.open( "{}_to_update".format(root_file_output)) models = {i: models[i] for i in models if i not in old_models} all_models = old_models + [k for k in models] leafValues = {} for model in all_models: leafValues[model] = array.array("f", [0]) if args.pandas: df = root_file_in[rootdirname][args.tree].pandas.df() if tree_old: df_old = root_file_out_old[rootdirname][ args.tree].pandas.df() else: _df = root_file_in[rootdirname][args.tree].arrays() df = pandas.DataFrame() keys_to_export = set(inputs + ["pt_1", "pt_2", "phi_1", "phi_2"]) for key in ["N_neutrinos_reco", "mt_tt"]: if key in keys_to_export: keys_to_export.remove(key) for k in keys_to_export: df[k] = _df[str.encode(k)] if tree_old: _df_old = root_file_out_old[rootdirname][ args.tree].arrays() df_old = pandas.DataFrame() keys_to_export = old_models for k in keys_to_export: df_old[k] = _df_old[str.encode(k)] df["mt_tt"] = (2 * df["pt_1"] * df["pt_2"] * (1 - np.cos(df["phi_1"] - df["phi_2"])))**.5 df["N_neutrinos_reco"] = N_neutrinos_in_channel[channel] * np.ones( len(df[inputs[0]]), dtype='int') # remove values set at -10 by default to match training settings for variable in ["jpt_r", "jeta_r", "jphi_r", "Njet_r"]: if variable in inputs: df[variable].values[df[variable].values < 0] = 0 for model in models: print("Predicting with {}...".format(model)) df[model] = models[model].predict(df) if not args.dry: print("Filling new branch in tree...") for model in all_models: newBranch = tree.Branch(model, leafValues[model], "prediction/F") first_entry = args.first_entry last_entry = len(df[model].values) if args.last_entry > first_entry and args.last_entry < len( df[model].values): last_entry = args.last_entry for k in range(first_entry, last_entry + 1): for model in all_models: if model in old_models: leafValues[model][0] = df_old[model].values[k] else: leafValues[model][0] = df[model].values[k] tree.Fill() print("Filled.") rootdir.Remove(rootdir.Get(args.tree)) tree.Write(args.tree, kOverwrite) root_file_out.Close() os.system("rm -rf {}_to_update".format(root_file_output))