file_names = [ # # CvsL # ('CvsL_76_17Jan/ROCs/5565121efc_ROCs.root', '7.6 Optimization'), ('test/ROCs.root', 'separate testing'), # # CvsB # # ('CvsB_76_18Jan/ROCs/82d2a6148a_ROCs.root', '7.5 Optimization'), # ('CvsB_76_18Jan/ROCs/ff481cb37f_ROCs.root', '7.6 Optimization'), ] output = 'test/test.png' if args.input: jconf = prettyjson.loads(open(args.input[-1]).read()) file_names = [tuple(i) for i in jconf['file_names']] output = jconf['output'] graph_path = jconf['graph_path'] #dump configuration in json for bookkeeping jconf = { 'file_names' : file_names, 'output' : output, 'graph_path' : graph_path, } jout = output.split('.')[0] with open('%s.json' % jout, 'w') as out: out.write(prettyjson.dumps(jconf))
def processNtuple(infile_name, outfile_name, variables, sample, flav_weight=False, pteta_weight=False, cat_weight=False, tag=''): log.debug("processing %s --> %s" % (infile_name, outfile_name)) type_dict = { 'i' : int, 'l' : long, 'f' : float } fname_regex = re.compile('[a-zA-Z_0-9\/]*\/?[a-zA-Z_0-9]+_(?P<category>[a-zA-Z]+)_(?P<flavor>[A-Z]+)\.root') match = fname_regex.match(infile_name) if not match: raise ValueError("Could not match the regex to the file %s" % infile_name) flavor = match.group('flavor') full_category = match.group('category') weight_tfile = None flav_dir = None tfile_category = '' if pteta_weight or flav_weight or cat_weight: weight_tfile = io.root_open('data/%s_weights.root' % sample) flav_dir = weight_tfile.Get(flavor) categories = [i.name for i in flav_dir.keys()] #match existing categories to this one, which might be a subset of general category stored in the root file tfile_category = [i for i in categories if i in full_category][0] weights = None if pteta_weight: weights = flav_dir.Get('%s/kin' % tfile_category) flavor_weight = 1. if flav_weight: flavor_weight = prettyjson.loads( weight_tfile.flavour_weights.String().Data() )[flavor] #put bias weights category_weights = None if cat_weight: category_weights = flav_dir.Get('%s/bias' % tfile_category) with io.root_open(outfile_name, 'recreate') as outfile: outtree = Tree('tree', title='c-tagging training tree') branches_def = dict((name, info['type']) for name, info in variables.iteritems()) if pteta_weight: branches_def['kinematic_weight'] = 'F' branches_def['total_weight'] = 'F' if flav_weight: branches_def['flavour_weight'] = 'F' branches_def['total_weight'] = 'F' if cat_weight: branches_def['slcategory_weight'] = 'F' branches_def['total_weight'] = 'F' outtree.create_branches( branches_def ) with io.root_open(infile_name) as infile: intree = infile.Get(full_category) for e_idx, entry in enumerate(intree): if e_idx % 1000 == 0: log.debug("processing entry: %i" % e_idx) for name, info in variables.iteritems(): value = info['default'] try: if 'var' in info and hasattr(entry, info['var']): var = getattr(entry, info['var']) vtype = type_dict[info['type'].lower()] if 'idx' in info: if var.size() > info['idx']: value = vtype(var[info['idx']]) else: value = vtype(var) elif 'fcn' in info: vtype = type_dict[info['type'].lower()] fcn = globals()[info['fcn']] value = vtype(fcn(entry, *info['args'])) except: set_trace() # else: # set_trace() # raise RuntimeError("something went wrong processing variable %s" % name) #if value is nan, then set to default (maybe better if you skip the whole jet) value = info['default'] if math.isnan(value) else value setattr(outtree, name, value) total_weight = 1. if pteta_weight: bin_idx = weights.FindFixBin(entry.jetPt, abs(entry.jetEta)) outtree.kinematic_weight = weights[bin_idx].value total_weight *= weights[bin_idx].value if flav_weight: outtree.flavour_weight = flavor_weight total_weight *= flavor_weight if cat_weight: bin_idx = category_weights.FindFixBin(entry.jetPt, abs(entry.jetEta)) outtree.slcategory_weight = category_weights[bin_idx].value total_weight *= category_weights[bin_idx].value if 'total_weight' in branches_def: outtree.total_weight = total_weight #set_trace() outtree.Fill() log.info("processing done [%s]" % tag)
def processNtuple(infile_name, outfile_name, sample, flav_weight=False, pteta_weight=False, cat_weight=False, tag=''): log.debug("processing %s --> %s" % (infile_name, outfile_name)) fname_regex = re.compile('[a-zA-Z_0-9\/]*\/?[a-zA-Z_0-9]+_(?P<category>[a-zA-Z]+)_(?P<flavor>[A-Z]+)\.root') match = fname_regex.match(infile_name) if not match: raise ValueError("Could not match the regex to the file %s" % infile_name) flavor = match.group('flavor') full_category = match.group('category') weight_tfile = None flav_dir = None tfile_category = '' if pteta_weight or flav_weight or cat_weight: weight_tfile = io.root_open('../data_trees/%s_weights.root' % sample) flav_dir = weight_tfile.Get(flavor) categories = [i.name for i in flav_dir.keys()] #match existing categories to this one, which might be a subset of general category stored in the root file tfile_category = [i for i in categories if i in full_category][0] weights = None if pteta_weight: weights = flav_dir.Get('%s/kin' % tfile_category) flavor_weight = 1. if flav_weight: flavor_weight = prettyjson.loads( weight_tfile.flavour_weights.String().Data() )[flavor] #put bias weights category_weights = None if cat_weight: category_weights = flav_dir.Get('%s/bias' % tfile_category) print "Starting to process %s" %infile_name # make copy of input ntuple to be safe and work with that print "copying %s to %s" %(infile_name, outfile_name) shutil.copy2("%s" %(infile_name), "%s"%(outfile_name)) # retrieve the ntuple of interest inFile = TFile.Open( "%s" %(outfile_name), "update" ) # this now uses the copied file in outDirName inTreeName = "ttree" myTree = inFile.Get( inTreeName ) #create new branches weight_etaPt = array( "f", [ 0. ] ) weight_category = array( "f", [ 0. ] ) weight_flavour = array( "f", [ 0. ] ) weight = array( "f", [ 0. ] ) b_weight_etaPt = myTree.Branch( "weight_etaPt", weight_etaPt, 'weight_etaPt/F' ) b_weight_category = myTree.Branch( "weight_category", weight_category, 'weight_category/F' ) b_weight_flavour = myTree.Branch( "weight_flavour", weight_flavour, 'weight_flavour/F' ) b_weight = myTree.Branch( "weight", weight, 'weight/F' ) # connect branches needed for weight calculation Jet_pt = array( "f", [ 0. ] ) Jet_eta = array( "f", [ 0. ] ) myTree.SetBranchAddress( 'Jet_pt', Jet_pt ) myTree.SetBranchAddress( 'Jet_eta', Jet_eta ) ### actual loop ### entries = myTree.GetEntriesFast() print "%s: Starting event loop" %(multiprocessing.current_process().name) startTime = time.time() for ientry in xrange(entries): # get the next tree in the chain and verify myTree.GetEntry(ientry) # timing reportEveryNevents = 50000 if (ientry%reportEveryNevents==0): if (ientry != 0): print "%s: Progress: %3.1f%%" %(multiprocessing.current_process().name, float(ientry)/(entries)*100) endTime = time.time() deltaTime = endTime - startTime rate = float(reportEveryNevents)/deltaTime print "%s: current rate: %5.2f Hz" %(multiprocessing.current_process().name, rate) startTime = time.time() # obtain the different weights weight[0] = 1. if pteta_weight: bin_idx = weights.FindFixBin(Jet_pt[0], abs(Jet_eta[0])) weight_etaPt[0] = weights[bin_idx].value weight[0] *= weights[bin_idx].value if flav_weight: weight_flavour[0] = flavor_weight weight[0] *= flavor_weight if cat_weight: bin_idx = category_weights.FindFixBin(Jet_pt[0], abs(Jet_eta[0])) weight_category[0] = category_weights[bin_idx].value weight[0] *= category_weights[bin_idx].value # and fill the branches b_weight_etaPt.Fill() b_weight_category.Fill() b_weight_flavour.Fill() b_weight.Fill() inFile.Write() inFile.Close() print "%s: Total time: %5.2f s" %(multiprocessing.current_process().name, time.clock())
from argparse import ArgumentParser import rootpy.io as io from rootpy.plotting import Hist2D import prettyjson import glob import re import rootpy from pdb import set_trace import copy import binning import ROOT log = rootpy.log["/compute_weights"] log.setLevel(rootpy.log.INFO) qcd_yields = prettyjson.loads(open('data/qcd_yields.json' ).read()) ttj_yields = prettyjson.loads(open('data/ttjets_yields.json').read()) biased_qcd = copy.deepcopy(qcd_yields) cat_weight = copy.deepcopy(qcd_yields) bin_weight = copy.deepcopy(qcd_yields) flavours = qcd_yields.keys() categories = qcd_yields[flavours[0]].keys() bins = qcd_yields[flavours[0]][categories[0]].keys() total_yield = sum( k for i in qcd_yields.itervalues() for j in i.itervalues() for k in j.itervalues() ) #sum( k for k in j.itervalues() for j in i.itervalues() for i in qcd_yields.itervalues() ) flav_weights = {}
from argparse import ArgumentParser import rootpy.io as io from rootpy.plotting import Hist2D import prettyjson import glob import re import rootpy from pdb import set_trace import copy import binning import ROOT log = rootpy.log["/compute_weights"] log.setLevel(rootpy.log.INFO) qcd_yields = prettyjson.loads(open('../data_trees/qcd_yields.json' ).read()) ttj_yields = prettyjson.loads(open('../data_trees/ttjets_yields.json').read()) biased_qcd = copy.deepcopy(qcd_yields) cat_weight = copy.deepcopy(qcd_yields) bin_weight = copy.deepcopy(qcd_yields) flavours = qcd_yields.keys() categories = qcd_yields[flavours[0]].keys() bins = qcd_yields[flavours[0]][categories[0]].keys() total_yield = sum( k for i in qcd_yields.itervalues() for j in i.itervalues() for k in j.itervalues() ) #sum( k for k in j.itervalues() for j in i.itervalues() for i in qcd_yields.itervalues() ) flav_weights = {}
def processNtuple(infile_name, outfile_name, variables, sample, flav_weight=False, pteta_weight=False, cat_weight=False, tag=''): log.debug("processing %s --> %s" % (infile_name, outfile_name)) type_dict = {'i': int, 'l': long, 'f': float} fname_regex = re.compile( '[a-zA-Z_0-9\/]*\/?[a-zA-Z_0-9]+_(?P<category>[a-zA-Z]+)_(?P<flavor>[A-Z]+)\.root' ) match = fname_regex.match(infile_name) if not match: raise ValueError("Could not match the regex to the file %s" % infile_name) flavor = match.group('flavor') full_category = match.group('category') weight_tfile = None flav_dir = None tfile_category = '' if pteta_weight or flav_weight or cat_weight: weight_tfile = io.root_open('data/%s_weights.root' % sample) flav_dir = weight_tfile.Get(flavor) categories = [i.name for i in flav_dir.keys()] #match existing categories to this one, which might be a subset of general category stored in the root file tfile_category = [i for i in categories if i in full_category][0] weights = None if pteta_weight: weights = flav_dir.Get('%s/kin' % tfile_category) flavor_weight = 1. if flav_weight: flavor_weight = prettyjson.loads( weight_tfile.flavour_weights.String().Data())[flavor] #put bias weights category_weights = None if cat_weight: category_weights = flav_dir.Get('%s/bias' % tfile_category) with io.root_open(outfile_name, 'recreate') as outfile: outtree = Tree('tree', title='c-tagging training tree') branches_def = dict( (name, info['type']) for name, info in variables.iteritems()) if pteta_weight: branches_def['kinematic_weight'] = 'F' branches_def['total_weight'] = 'F' if flav_weight: branches_def['flavour_weight'] = 'F' branches_def['total_weight'] = 'F' if cat_weight: branches_def['slcategory_weight'] = 'F' branches_def['total_weight'] = 'F' outtree.create_branches(branches_def) with io.root_open(infile_name) as infile: intree = infile.Get(full_category) for e_idx, entry in enumerate(intree): if e_idx % 1000 == 0: log.debug("processing entry: %i" % e_idx) for name, info in variables.iteritems(): value = info['default'] try: if 'var' in info and hasattr(entry, info['var']): var = getattr(entry, info['var']) vtype = type_dict[info['type'].lower()] if 'idx' in info: if var.size() > info['idx']: value = vtype(var[info['idx']]) else: value = vtype(var) elif 'fcn' in info: vtype = type_dict[info['type'].lower()] fcn = globals()[info['fcn']] value = vtype(fcn(entry, *info['args'])) except: set_trace() # else: # set_trace() # raise RuntimeError("something went wrong processing variable %s" % name) #if value is nan, then set to default (maybe better if you skip the whole jet) value = info['default'] if math.isnan(value) else value setattr(outtree, name, value) total_weight = 1. if pteta_weight: bin_idx = weights.FindFixBin(entry.jetPt, abs(entry.jetEta)) outtree.kinematic_weight = weights[bin_idx].value total_weight *= weights[bin_idx].value if flav_weight: outtree.flavour_weight = flavor_weight total_weight *= flavor_weight if cat_weight: bin_idx = category_weights.FindFixBin( entry.jetPt, abs(entry.jetEta)) outtree.slcategory_weight = category_weights[bin_idx].value total_weight *= category_weights[bin_idx].value if 'total_weight' in branches_def: outtree.total_weight = total_weight #set_trace() outtree.Fill() log.info("processing done [%s]" % tag)
file_names = [ # # CvsL # ('CvsL_76_17Jan/ROCs/5565121efc_ROCs.root', '7.6 Optimization'), ('test/ROCs.root', 'separate testing'), # # CvsB # # ('CvsB_76_18Jan/ROCs/82d2a6148a_ROCs.root', '7.5 Optimization'), # ('CvsB_76_18Jan/ROCs/ff481cb37f_ROCs.root', '7.6 Optimization'), ] output = 'test/test.png' if args.input: jconf = prettyjson.loads(open(args.input[-1]).read()) file_names = [tuple(i) for i in jconf['file_names']] output = jconf['output'] graph_path = jconf['graph_path'] #dump configuration in json for bookkeeping jconf = { 'file_names': file_names, 'output': output, 'graph_path': graph_path, } jout = output.split('.')[0] with open('%s.json' % jout, 'w') as out: out.write(prettyjson.dumps(jconf))