def execute(self): """Execute the code!""" self.object_level_cuts = ObjectCuts(self.p_settings,self.p_cuts) self.event_level_cuts = EventCuts(self.p_settings,self.p_cuts) self.FileLoop() return
class Whiteboard(object): """ Base class for doing simple studies and inheriting object building and event loops. """ def __init__(self,cfg_filename): """ Init @param cfg_filename Filename of the config file ("PyUser.ini") """ self.p_settings = info.getConfigSettings(cfg_filename) self.p_cfg_name = 'configuration' self.ops = config.logicOperators() self.key2attr = info.key2attr() self.obj2ttree = info.obj2ttree() self.truthObj = info.truthObject() self.nonGeVvars = info.nonGeVvariables() # variables that aren't in GeV (for scaling plots) self.GeV = 1000. def initialize(self): """Load the necessary settings from the config file.""" self.p_objects = self.p_settings.get(self.p_cfg_name,'objects') self.p_scalefactor = config.str2bool(self.p_settings.get(self.p_cfg_name,'eventweights')) self.p_lepton = self.p_settings.get(self.p_cfg_name,'lepton') self.p_extra_save = self.p_settings.get(self.p_cfg_name,'extra_saveAs') self.p_plot_type = self.p_settings.get(self.p_cfg_name,'plot_framework') # python/root self.p_plot1d = self.p_settings.get(self.p_cfg_name,'1dplot') # for saving data self.p_plot2d = self.p_settings.get(self.p_cfg_name,'2dplot') # for saving data self.p_cuts = self.p_settings.get(self.p_cfg_name,'cutsfile') self.p_cuts = config.processCuts(self.p_cuts) self.p_variables = self.p_settings.get(self.p_cfg_name,'variables').split(',') self.p_eff_x_vars = self.p_settings.get(self.p_cfg_name,'eff_x').split(',') self.p_treename = self.p_settings.get(self.p_cfg_name,'treename') self.eff_conditions = self.p_settings.get(self.p_cfg_name,'eff_y').split(',') self.p_nevents = int(self.p_settings.get(self.p_cfg_name,'NEvents')) p_files = self.p_settings.get(self.p_cfg_name,'files') self.p_files = open(p_files,'r').readlines() self.p_read_data = config.str2bool(self.p_settings.get(self.p_cfg_name,'read_data')) p_custom_vars = self.p_settings.get(self.p_cfg_name,'custom_variables') if p_custom_vars == 'None': self.p_custom_vars = '' else: self.p_custom_vars = p_custom_vars ## -- Easy variables to initialize -- ## self.p_btag_wkpt = info.btagging_WP(self.p_settings.get(self.p_cfg_name,'btag_wkpt')) self.outputfilenames = ['data/'+fname.split('/')[-1].split('.')[0]+'_'+self.p_lepton+'_'+self.p_extra_save for fname in self.p_files] return self.p_settings def execute(self): """Execute the code!""" self.object_level_cuts = ObjectCuts(self.p_settings,self.p_cuts) self.event_level_cuts = EventCuts(self.p_settings,self.p_cuts) self.FileLoop() return def FileLoop(self): """Loop over files""" for ff,file in enumerate(self.p_files): self.f = ROOT.TFile.Open(file.strip()) self.ttree = self.f.Get(self.p_treename) ## -- Initialize data for each file -- ## self.init_data(ff) ## -- Loop over events -- ## self.EventLoop() ## -- Output data -- ## self.SaveData(ff) return def EventLoop(self): """Loop over events.""" total_entries = self.ttree.GetEntries() if self.p_nevents < 0 or self.p_nevents > total_entries: nEntries = total_entries else: nEntries = self.p_nevents entry = 0 print print print " RUNNING OVER ",self.f.GetName(),": ",nEntries," entries" print while entry < nEntries: self.ttree.GetEntry(entry) if not entry%1000: print " -> Entry ",entry ## -- Get some event info if self.ttree.ejets: lep = 'el' else: lep = 'mu' if self.p_lepton!='muel' and lep!=self.p_lepton: entry+=1 continue self.BuildObjects() ## -- Apply cuts (if necessary) failed_event = False if self.p_cuts: failed_event = self.ApplyCuts() if failed_event: entry+=1 continue scaleFactor = self.getSF() if self.p_plot1d == 'efficiency': self.getEff(lep,scaleFactor) ## -- Loop over variables that we want else: custom_vars = addData.custom_variables(self.ttree,self.p_custom_vars,self.event_objects) for variable in self.p_variables: try: values = custom_vars[variable] except KeyError: if '.' in variable: name,attr = variable.split('.') quantity = self.key2attr[attr.lower()] if type(self.event_objects[name])==list: values = [] for py_object in self.event_objects[name]: values.append( self.getValue(py_object,quantity) ) else: values = [ self.getValue(py_object,quantity) ] else: values = self.event_objects[variable] ## -- If there are no values, no need to continue! if not values: continue ## -- Keeping everything as a list for uniform interfaces if type(values)==float: values = [values] scaleFactor = [scaleFactor for _ in values] ## Scale by GeV for variables with that unit if not any(x in variable for x in self.nonGeVvars): values = [i/self.GeV for i in values] ## saving the data to plot later ## self.save_event(variable,values,scaleFactor,lep) entry+=1 # -- end this entry -- # return def SaveData(self,ff): """Save data to json file or root file""" if self.use_hists: # root file self.outfiles[ff].Write() self.outfiles[ff].Close() else: # json json.dump(self.data, open(self.outputfilenames[ff] ,'w')) return def save_json(self,json_var,json_values,json_weight,json_leptonflavor): """""" # if ',' not in json_var: # json_values = json_values[0] self.data['mcChannelNumber'] = self.ttree.mcChannelNumber self.data[json_var][json_leptonflavor]['value'].append(json_values) self.data[json_var][json_leptonflavor]['weight'].append(json_weight) return def save_hist(self,r_var,r_values,r_weight,r_leptonflavor): """""" self.data['mcChannelNumber'] = self.ttree.mcChannelNumber # setting this repeatedly.. if ',' in r_var: self.data[r_var][r_leptonflavor].Fill(r_values[0],r_values[1],r_weight) else: self.data[r_var][r_leptonflavor].Fill(r_values[0],r_weight) return def ApplyCuts(self): """Apply event- and object-level cuts""" obj_cuts = self.object_level_cuts.applyCuts(self.event_objects) if not obj_cuts['result']: return True # event failed self.event_objects = obj_cuts['objects'] # update the dictionary of objects evt_cuts = self.event_level_cuts.applyCuts(self.event_objects) if not evt_cuts['result']: return True # event failed self.event_objects = evt_cuts['objects'] # update the dictionary of objects return False # event passed def getSF(self): """Retrieve event weight""" sf = 1. if self.ttree.mcChannelNumber>0 and self.p_scalefactor: ew = vlq.compute_weight(self.ttree,self.p_btag_wkpt) sf *= ew return sf def BuildObjects(self): """ Build 4-vectors of objects in event: lepton,met,nu,jets,tjets,bjets,fatjets,rcjets,resjets """ objects = self.p_objects.split(',') # make 'resjets' be the last thing built (need the jets first) if objects[-1]!='resjets' and 'resjets' in objects: objects.remove('resjets') objects.append('resjets') ## possible objects linked to their functions import_keys = {'lepton': lepBase,\ 'met': metBase,\ 'nu': nuBase,\ 'jets': jetBase,\ 'bjets': bjetBase,\ 'tjets': tjetBase,\ 'fatjets': fjBase,\ 'rcjets': rcBase,\ 'resjets': resBase} self.event_objects = {'vlq_evtype':self.ttree.vlq_evtype} ## special treatment for truth information because of how it is saved in truthSelection if any(ob.startswith('truth') for ob in objects) or self.p_plot1d=='efficiency': objs = truthBase(self.ttree) for key in objs.keys(): self.event_objects[key] = objs[key] objects.remove('truth') # just set it up, done with truth information for event_object in objects: if event_object == 'bjets+jets' or event_object == 'jets+bjets': jets = jetBase(self.ttree) bjets = bjetBase(self.ttree) jets = jets+bjets # merge the objects into one list jets.sort( key=lambda x: x.Pt(), reverse=True) # [0] has the highest pT self.event_objects['jets'] = jets elif event_object == 'resjets': obj = import_keys[event_object](self.event_objects['jets']) self.event_objects[event_object] = obj obj.sort( key=lambda x: x.Pt(), reverse=True) # [0] has the highest pT else: obj = import_keys[event_object](self.ttree) self.event_objects[event_object] = obj try: obj.sort( key=lambda x: x.Pt(), reverse=True) # [0] has the highest pT except AttributeError: continue if 'jets' in objects or 'bjets' in objects or 'bjets+jets' in objects: self.event_objects['nbtags'] = len( [q for q in self.event_objects['jets'] if q.mv2c20>self.p_btag_wkpt] ) if 'bjets' not in objects: self.event_objects['bjets'] = [] self.event_objects['resjets'] = [] if 'ht' in self.p_variables: self.event_objects['ht'] = vlq.calcHT(self.event_objects) return def init_data(self,f_ind): # self.p_variables self.data = {} self.use_hists = False self.use_json = True if self.p_plot_type == 'python': if self.p_plot1d == 'efficiency': self.use_hists = True else: self.use_json = True elif self.p_plot_type == 'root': self.use_hists = True else: print " UNKNOWN PLOT TYPE. EXITING " import sys sys.exit(1) # putting for loop inside this if statement because I don't want to check it every time if self.use_hists: import pyDataMC.datamc_dicts as datamc_dicts self.outfiles = [ROOT.TFile(of+'.root',"recreate") for of in self.outputfilenames] plot_keys = datamc_dicts.text_dicts() n_bins = 1000 for eff_c,var in zip(self.eff_conditions,self.p_variables): eff_c = re.search(r'\d+',eff_c).group() # check for 2d histogram if ',' in var: vars = var.split(',') ttree_vars = [] bins = [] for var in vars: ttree_name = self.variablename2ttreename(var) bins.append(array('d',plot_keys['variables'][ttree_name]['bins'])) self.data[var] = {'mu':ROOT.TH2D(var+'_mu_name',var+'_mu_title', n_bins,bins[0],n_bins,bins[1] ),\ 'el':ROOT.TH2D(var+'_el_name',var+'_el_title', n_bins,bins[0],n_bins,bins[1] )} # else it's a 1d histogram else: try: bins = np.asarray(plot_keys['variables'][self.p_eff_x_vars[0]]['bins']) except KeyError: v_name = var.split('_') ttree_name = self.obj2ttree[v_name[0]]+'_'+self.key2attr[v_name[1]] bins = np.asarray(plot_keys['variables'][ttree_name]['bins']) if self.p_plot1d == 'efficiency': self.data[var+eff_c] = {'el': {'total':ROOT.TH1D(var+'_el_name_t_'+eff_c,var+'_el_title_t_'+eff_c, n_bins,bins[0],bins[-1] ),\ 'good': ROOT.TH1D(var+'_el_name_g_'+eff_c,var+'_el_title_g_'+eff_c, n_bins,bins[0],bins[-1] ),\ 'mis': ROOT.TH1D(var+'_el_name_m_'+eff_c,var+'_el_title_m_'+eff_c, n_bins,bins[0],bins[-1] )},\ 'mu': {'total':ROOT.TH1D(var+'_mu_name_t_'+eff_c,var+'_mu_title_t_'+eff_c, n_bins,bins[0],bins[-1] ),\ 'good': ROOT.TH1D(var+'_mu_name_g_'+eff_c,var+'_mu_title_g_'+eff_c, n_bins,bins[0],bins[-1] ),\ 'mis': ROOT.TH1D(var+'_mu_name_m_'+eff_c,var+'_mu_title_m_'+eff_c, n_bins,bins[0],bins[-1] )}} else: self.data[var] = {'el':ROOT.TH1D(var+'_el_name',var+'_el_title', n_bins,bins ),\ 'mu':ROOT.TH1D(var+'_mu_name',var+'_mu_title', n_bins,bins )} elif self.use_json: self.save_event = self.save_json self.outputfilenames = self.outputfilenames[f_ind]+'.json' for var in self.p_variables: self.data[var] = {'el':{'value':[],'weight':[]},\ 'mu':{'value':[],'weight':[]}} # can support 2d (use [a,b]) else: print " NEED JSON OR ROOT OUTPUT. EXITING " sys.exit(1) return def getEff(self,emu,event_weight): """ Get efficiency and save to histogram """ custom_vars = addData.custom_variables(self.ttree,self.p_custom_vars,self.event_objects) eff_conditions = self.p_settings.get(self.p_cfg_name,'eff_y').split(',') # things like ==1 logic_operator = [] good_condition = [] for ec,eff_c in enumerate(eff_conditions): value = re.search(r'\d+',eff_c).group() # '==1' -> 1 cond = eff_c.split(value)[0] # '==1' -> '==' if eff_c.endswith('.'): good_condition.append( float(value) ) else: good_condition.append( int(value) ) logic_operator.append( cond ) # self.p_eff_x_vars # loop variables that we are getting the efficiency as a function of! # self.p_variables # loop variables that we are getting the efficiency of! for eff_x_var in self.p_eff_x_vars: for vv,variable in enumerate(self.p_variables): eff_c = value = re.search(r'\d+',self.eff_conditions[vv]).group() # '==1' -> 1 if eff_x_var in custom_vars.keys(): eff_x_val = custom_vars[eff_x_var] # getattr( reco,quantity )() else: eff_x_val = None # set it somewhere else; user needs to do this! (too modular...) # something like the object pT (below) or another object's pT (define here) _values = {'total':[],'good':[],'mis':[]} reco_name,attr = variable.split('_') quantity = self.key2attr[attr.lower()] truth_name = self.truthObj[reco_name]['name'] dr_match = self.truthObj[reco_name]['dr'] # deltaR match value reco_object = self.event_objects[reco_name] truth_object = self.event_objects[truth_name] # -- DeltaR match and fill hists # loop over objects (if more than 1) if type(reco_object)==list: for reco in reco_object: # fill total (matched) if reco.DeltaR(truth_object) < dr_match: result = self.getValue(reco,quantity) self.data[variable+eff_c][emu]['total'].Fill(eff_x_val,event_weight) # fill good (matched & tagged) if self.ops[logic_operator[vv]](result,good_condition[vv]): self.data[variable+eff_c][emu]['good'].Fill(eff_x_val,event_weight) # fill mis (matched & mis-tagged) else: self.data[variable+eff_c][emu]['mis'].Fill(eff_x_val,event_weight) else: if reco_object.DeltaR(truth_object) < dr_match: # fill total (matched) result = self.getValue(reco,quantity) self.data[variable+eff_c][emu]['total'].Fill(eff_x_val,event_weight) # fill good (matched & tagged) if self.ops[logic_operator[vv]](result,good_condition[vv]): self.data[variable+eff_c][emu]['good'].Fill(eff_x_val,event_weight) # fill mis (matched & mis-tagged) else: self.data[variable+eff_c][emu]['mis'].Fill(eff_x_val,event_weight) return def getValue(self,g_value,g_quant): """""" try: g_result = getattr( g_value,g_quant )() except: g_result = getattr( g_value,g_quant ) return g_result def varname2ttreename(self,variable_name): """ The ttree branches and objects in PyMiniAna aren't the same name. I should probably fix this at some point... This function converts the object name to the ttree name. """ if variablename.split('.')[0] in self.obj2ttree.keys(): v_name = variablename.split('.') first_name = self.obj2ttree[v_name[0]] # should be correct second_name = self.key2attr[v_name[1]] # may need to be lower-case ttree_name = first_name+'_'+second_name try: v_dict = self.plot_keys['variables'][ttree_name] except KeyError: ttree_name = first_name+'_'+second_name.lower() else: ttree_name = variablename return ttree_name