def wrapper(): logger.info("Processing impacts") #name = "ewkDM_ttZ_ll_DC2A_0p200000_DC2V_0p200000" #name = "ewkDM_ttZ_ll_DC2A_0p250000_DC2V_m0p150000" name = args.cardfile cardFile = name+".txt" cardFilePath = cardfileLocation + cardFile combineDirname = os.path.join(releaseLocation, "ttZ_YR2018") logger.info("Creating %s"%combineDirname) if not os.path.isdir(combineDirname): os.makedirs(combineDirname) shutil.copyfile(cardFilePath,combineDirname+'/'+cardFile) prepWorkspace = "text2workspace.py %s -m 125"%cardFile robustFit = "combineTool.py -M Impacts -d %s.root -m 125 --doInitialFit "%name impactFits = "combineTool.py -M Impacts -d %s.root -m 125 --doFits --parallel %s "%(name,str(args.cores)) extractImpact = "combineTool.py -M Impacts -d %s.root -m 125 -o impacts.json"%name plotImpacts = "plotImpacts.py -i impacts.json -o impacts" combineCommand = "cd %s;eval `scramv1 runtime -sh`;%s;%s;%s;%s;%s"%(combineDirname,prepWorkspace,robustFit,impactFits,extractImpact,plotImpacts) logger.info("Will run the following command, might take a few hours:\n%s"%combineCommand) os.system(combineCommand) plotDir = plot_directory + "/impactsYR2018/" if not os.path.isdir(plotDir): os.makedirs(plotDir) shutil.copyfile(combineDirname+'/impacts.pdf', "%s/%s.pdf"%(plotDir,"ttZ_YR2018")) logger.info("Copied result to %s"%plotDir) if args.removeDir: logger.info("Removing directory in release location") shutil.rmtree(combineDirname)
def getCommands(line): commands = [] split = None try: m = re.search(r"SPLIT[0-9][0-9]*", line) split = int(m.group(0).replace('SPLIT', '')) except: pass line = line.split('#')[0] if line: if split: logger.info("Splitting in %i jobs", split) for i in range(split): commands.append(line + " --nJobs %i --job %i" % (split, i)) else: commands.append(line) return commands
def filler(event): event.run, event.lumi, event.evt = reader.evt if reader.position % 100 == 0: logger.info("At event %i/%i", reader.position, reader.nEvents) if args.addReweights: event.nrw = weightInfo.nid lhe_weights = reader.products['lhe'].weights() weights = [] param_points = [] for weight in lhe_weights: # Store nominal weight (First position!) if weight.id == 'rwgt_1': event.rw_nominal = weight.wgt if not weight.id in weightInfo.id: continue pos = weightInfo.data[weight.id] event.rw_w[pos] = weight.wgt weights.append(weight.wgt) interpreted_weight = interpret_weight(weight.id) for var in weightInfo.variables: getattr(event, "rw_" + var)[pos] = interpreted_weight[var] # weight data for interpolation if not hyperPoly.initialized: param_points.append( tuple(interpreted_weight[var] for var in weightInfo.variables)) # Initialize if not hyperPoly.initialized: hyperPoly.initialize(param_points) coeff = hyperPoly.get_parametrization(weights) # = HyperPoly(weight_data, args.interpolationOrder) event.np = hyperPoly.ndof event.chi2_ndof = hyperPoly.chi2_ndof(coeff, weights) #logger.debug( "chi2_ndof %f coeff %r", event.chi2_ndof, coeff ) logger.debug("chi2_ndof %f", event.chi2_ndof) for n in xrange(hyperPoly.ndof): event.p_C[n] = coeff[n] # All gen particles gp = reader.products['gp'] # for searching search = GenSearch(gp) # find heavy objects before they decay tops = map(lambda t: {var: getattr(t, var)() for var in top_varnames}, filter(lambda p: abs(p.pdgId()) == 6 and search.isLast(p), gp)) tops.sort(key=lambda p: -p['pt']) fill_vector(event, "top", top_varnames, tops) gen_Zs = filter(lambda p: abs(p.pdgId()) == 23 and search.isLast(p), gp) gen_Zs.sort(key=lambda p: -p.pt()) if len(gen_Zs) > 0: gen_Z = gen_Zs[0] for var in Z_read_varnames: setattr(event, "Z_" + var, getattr(gen_Z, var)()) else: gen_Z = None if gen_Z is not None: d1, d2 = gen_Z.daughter(0), gen_Z.daughter(1) if d1.pdgId() > 0: lm, lp = d1, d2 else: lm, lp = d2, d1 event.Z_daughterPdg = lm.pdgId() event.Z_cosThetaStar = cosThetaStar(gen_Z.mass(), gen_Z.pt(), gen_Z.eta(), gen_Z.phi(), lm.pt(), lm.eta(), lm.phi()) gen_Gammas = filter(lambda p: abs(p.pdgId()) == 22 and search.isLast(p), gp) gen_Gammas.sort(key=lambda p: -p.pt()) if len(gen_Gammas) > 0: gen_Gamma = gen_Gammas[0] for var in gamma_read_varnames: setattr(event, "gamma_" + var, getattr(gen_Gamma, var)()) else: gen_Gamma = None # find all leptons leptons = [(search.ascend(l), l) for l in filter( lambda p: abs(p.pdgId()) in [11, 13] and search.isLast(p) and p.pt() >= 0, gp)] leps = [] for first, last in leptons: mother_pdgId = first.mother( 0).pdgId() if first.numberOfMothers() > 0 else -1 leps.append({var: getattr(last, var)() for var in lep_varnames}) leps[-1]['motherPdgId'] = mother_pdgId leps.sort(key=lambda p: -p['pt']) fill_vector(event, "GenLep", lep_all_varnames, leps) # MET event.GenMet_pt = reader.products['genMET'][0].pt() event.GenMet_phi = reader.products['genMET'][0].phi() # jets jets = map(lambda t: {var: getattr(t, var)() for var in jet_read_varnames}, filter(lambda j: j.pt() > 30, reader.products['genJets'])) # jet/lepton disambiguation jets = filter( lambda j: (min([999] + [deltaR2(j, l) for l in leps if l['pt'] > 10]) > 0.3**2), jets) # find b's from tops: b_partons = [ b for b in filter( lambda p: abs(p.pdgId()) == 5 and p.numberOfMothers() == 1 and abs( p.mother(0).pdgId()) == 6, gp) ] for jet in jets: jet['matchBParton'] = (min([999] + [ deltaR2(jet, { 'eta': b.eta(), 'phi': b.phi() }) for b in b_partons ]) < 0.2**2) jets.sort(key=lambda p: -p['pt']) fill_vector(event, "GenJet", jet_write_varnames, jets)
type=int, default=2, help="Interpolation order for EFT weights.") args = argParser.parse_args() # # Logger # import TTXPheno.Tools.logger as logger import RootTools.core.logger as logger_rt logger = logger.get_logger(args.logLevel, logFile=None) logger_rt = logger_rt.get_logger(args.logLevel, logFile=None) # Load sample either from if len(args.inputFiles) > 0: logger.info("Input files found. Ignoring 'sample' argument. Files: %r", args.inputFiles) sample = FWLiteSample(args.targetSampleName, args.inputFiles) else: sample_file = "$CMSSW_BASE/python/TTXPheno/samples/fwlite_benchmarks.py" samples = imp.load_source("samples", os.path.expandvars(sample_file)) sample = getattr(samples, args.sample) maxEvents = -1 if args.small: args.targetDir += "_small" maxEvents = 100 # Number of files sample.files = sample.files[:1] # Load reweight pickle file if supposed to keep weights. extra_variables = [] if args.addReweights:
if args.reweightPtXToSM: if 'ttZ' in args.processFile: varX = "%sZ_pt"%args.level elif 'ttW' in args.processFile: varX = "%sW_pt"%args.level elif 'ttgamma' in args.processFile: varX = "%sPhoton_pt"%args.level rwIndex = -2 if args.backgrounds else -1 for i, param in enumerate( params[::-1] ): if i==0 and args.backgrounds: continue # no bg scaling param[0]['ptX_histo'] = ttXSample.get1DHistoFromDraw(varX, [50,0,500], selectionString = cutInterpreter.cutString(args.selection), weightString = w.get_weight_string(**param[0]['WC'])) ptX_integral = param[0]['ptX_histo'].Integral() if ptX_integral > 0: param[0]['ptX_histo'].Scale(1./ptX_integral) param[0]['ptX_reweight_histo'] = params[rwIndex][0]['ptX_histo'].Clone() param[0]['ptX_reweight_histo'].Divide(param[0]['ptX_histo']) logger.info( 'Made reweighting histogram for ptX and param-point %r with integral %f', param[0], param[0]['ptX_reweight_histo'].Integral()) def get_reweight( param, sample_, isSignal=True ): if isSignal: histo = param['ptX_reweight_histo'] bsm_rw = w.get_weight_func( **param['WC'] ) def reweight(event, sample): i_bin = histo.FindBin(getattr( event, varX ) ) return histo.GetBinContent(i_bin)*bsm_rw( event, sample ) * event.ref_lumiweight1fb * float(args.luminosity) * float(sample.event_factor) # return histo.GetBinContent(i_bin)*bsm_rw( event, sample ) * sample_.xsec * 1000 / sample_.nEvents / event.p_C[0] * float(args.luminosity) * float(sample.event_factor) return reweight else: def reweightRef(event, sample):
texY="Number of Events", attribute=lambda event, sample: event.nrecoPhoton, binning=[3, 0, 3], )) return plotList plots = getPlots() # Loop over channels allPlots = {} #allModes = [ 'all', 'mumumu', 'mumue', 'muee', 'eee' ] allModes = ['all'] for index, mode in enumerate(allModes): logger.info("Computing plots for mode %s", mode) # Define 2l selections leptonSelection = cutInterpreter.cutString(mode) for sample in stack.samples: sample.setSelectionString([leptonSelection]) plotting.fill(plots, read_variables=read_variables, sequence=sequence) logger.info("Plotting mode %s", mode) allPlots[mode] = copy.deepcopy( plots) # deep copy for creating SF/all plots afterwards! drawPlots(allPlots[mode], mode)
def calculation( c_var ): sigmaC = getHiggsWeight( c_var ) nameList = [args.sample] + args.binning + [ args.selection, 'small' if args.small else 'full', c_var ] cardname = '%s_nll_card'%'_'.join( map( str, nameList ) ) cardFilePath = os.path.join( cardfileLocation, cardname + '.txt' ) c = cardFileWriter.cardFileWriter() if not args.fitOnly: # print 'run cardfile' # uncertainties c.reset() c.addUncertainty('Luminosity', 'lnN') c.addUncertainty('JER', 'lnN') c.addUncertainty('btagging', 'lnN') c.addUncertainty('mistagging', 'lnN') c.addUncertainty('LeptonID', 'lnN') signal_rate = {} for i_region, region in enumerate(regions): i_r = i_region % 4 signal_rate[region] = signalPP.getYieldFromDraw( selectionString=region.cutString(), weightString="%f"%(nloXSec*(1-c_var)**2/sigmaC) )['val'] signal_rate[region] += signalGH.getYieldFromDraw( selectionString=region.cutString(), weightString="%f"%(nloXSec*(1-c_var)*c_var/sigmaC) )['val'] signal_rate[region] += signalHG.getYieldFromDraw( selectionString=region.cutString(), weightString="%f"%(nloXSec*(1-c_var)*c_var/sigmaC) )['val'] signal_rate[region] += signalHH.getYieldFromDraw( selectionString=region.cutString(), weightString="%f"%(nloXSec*c_var**2/sigmaC) )['val'] signal_btagging_uncertainty [region] = 1 + .015/(i_r+1.) signal_mistagging_uncertainty [region] = 1 + .01/(i_r+1.) signal_leptonId_uncertainty [region] = 1 + .01/(i_r+1.) signal_jes_uncertainty [region] = 1 + .05/(i_r+1.) bin_name = "Region_%i" % i_region nice_name = region.__str__() c.addBin(bin_name, ['_'.join(s.name.split('_')[1:3]) for s in bg], nice_name) c.specifyObservation( bin_name, observation[region] ) c.specifyExpectation( bin_name, 'signal', signal_rate[region] ) c.specifyFlatUncertainty( 'Luminosity', 1.026 ) c.specifyUncertainty( 'JER', bin_name, 'signal', signal_jes_uncertainty[region] ) c.specifyUncertainty( 'btagging', bin_name, 'signal', signal_btagging_uncertainty[region] ) c.specifyUncertainty( 'mistagging', bin_name, 'signal', signal_mistagging_uncertainty[region] ) c.specifyUncertainty( 'LeptonID', bin_name, 'signal', signal_leptonId_uncertainty[region] ) for background in bg: c.specifyExpectation( bin_name, '_'.join( background.name.split('_')[1:3] ), background_rate[region][background.name] ) c.specifyUncertainty( 'JER', bin_name, '_'.join( background.name.split('_')[1:3] ), background_jes_uncertainty[region][background.name]) c.specifyUncertainty( 'btagging', bin_name, '_'.join( background.name.split('_')[1:3] ), background_btagging_uncertainty[region][background.name]) c.specifyUncertainty( 'mistagging', bin_name, '_'.join( background.name.split('_')[1:3] ), background_mistagging_uncertainty[region][background.name]) c.specifyUncertainty( 'LeptonID', bin_name, '_'.join( background.name.split('_')[1:3] ), background_leptonId_uncertainty[region][background.name]) c.writeToFile( cardFilePath ) else: logger.info( "Running only NLL Fit with given CardFile %s"%cardFilePath) if not os.path.isfile( cardFilePath ): raise ValueError('CardFiles not found! Run script without --fitOnly!') if args.bestFit: r = (0.99, 1.01) else: r = (0., 2.) profiledLoglikelihoodFit = ProfiledLoglikelihoodFit( cardFilePath ) profiledLoglikelihoodFit.make_workspace(rmin=r[0], rmax=r[1]) nll = profiledLoglikelihoodFit.likelihoodTest() profiledLoglikelihoodFit.cleanup(removeFiles=args.removeCardFiles) del profiledLoglikelihoodFit logger.info( "NLL: %f", nll) ROOT.gDirectory.Clear() # in very large WC regions, the fit fails, not relevant for the interesting regions if nll is None or abs(nll) > 10000 or abs(nll) < 1: nll = 999 del c return c_var, nll
def calculation(variables): #def calculation( var1, var2 ): if args.variables[0] == 'cuB' and args.variables[1] == 'cuW': var1, var2 = variables #cuB cuW ctZ, ctW = cuBWtoctWZ(var1, var2) kwargs = {'ctZ': ctZ, 'ctW': ctW} else: var1, var2 = variables kwargs = {args.variables[0]: var1, args.variables[1]: var2} nameList = args.sample.split( '_')[1:3] + args.variables + args.binning + [ args.level, args.version, args.order, args.luminosity, "14TeV" if args.scale14TeV else "13TeV", args.selection, 'small' if args.small else 'full', 'statOnly' if args.statOnly else 'fullUnc' if not args.noExpUnc else 'noExpUnc', var1, var2 ] cardname = '%s_nll_card' % '_'.join(map(str, nameList)) cardFilePath = os.path.join(cardfileLocation, cardname + '.txt') c = cardFileWriter.cardFileWriter() if args.useCombine: c.releaseLocation = combineReleaseLocation if not args.fitOnly: # print 'run cardfile' # uncertainties c.reset() if not args.statOnly: if not args.noExpUnc: c.addUncertainty('lumi', 'lnN') c.addUncertainty('JES', 'lnN') c.addUncertainty('btagging', 'lnN') c.addUncertainty('mistagging', 'lnN') c.addUncertainty('muonId', 'lnN') c.addUncertainty('electronId', 'lnN') for unc in args.addUncertainties: c.addUncertainty(unc, 'lnN') signal_rate = {} for i_region, region in enumerate(regions): signal_rate[region] = ttXSample.weightInfo.get_weight_yield( ttX_coeffList[region], **kwargs) if not args.statOnly and not args.noExpUnc: # signal uncertainties # btagging signal_rate_reweighted = ttXSample.weightInfo.get_weight_yield( ttX_coeffList_reweighted_btagging[region], **kwargs) signal_btagging_uncertainty[region] = 1 + ( (signal_rate_reweighted - signal_rate[region]) / signal_rate[region]) if signal_rate[region] > 0 else 1. # mistagging signal_rate_reweighted = ttXSample.weightInfo.get_weight_yield( ttX_coeffList_reweighted_mistagging[region], **kwargs) signal_mistagging_uncertainty[region] = 1 + ( (signal_rate_reweighted - signal_rate[region]) / signal_rate[region]) if signal_rate[region] > 0 else 1. # muonId signal_rate_reweighted = ttXSample.weightInfo.get_weight_yield( ttX_coeffList_reweighted_muonId[region], **kwargs) signal_muonId_uncertainty[region] = 1 + ( (signal_rate_reweighted - signal_rate[region]) / signal_rate[region]) if signal_rate[region] > 0 else 1. # electronId signal_rate_reweighted = ttXSample.weightInfo.get_weight_yield( ttX_coeffList_reweighted_electronId[region], **kwargs) signal_electronId_uncertainty[region] = 1 + ( (signal_rate_reweighted - signal_rate[region]) / signal_rate[region]) if signal_rate[region] > 0 else 1. # JES signal_rate_reweighted_JES_up = ttXSample.weightInfo.get_weight_yield( ttX_coeffList_reweighted_jes_up[region], **kwargs) signal_rate_reweighted_JES_down = ttXSample.weightInfo.get_weight_yield( ttX_coeffList_reweighted_jes_down[region], **kwargs) signal_jes_uncertainty[region] = 1 + ( (signal_rate_reweighted_JES_up - signal_rate_reweighted_JES_down) / (2 * signal_rate[region]) ) if signal_rate[region] > 0 else 1. bin_name = "Region_%i" % i_region nice_name = region.__str__() c.addBin(bin_name, ['_'.join(s.name.split('_')[1:3]) for s in bg] + ['nonPrompt'] if args.addNonPrompt else ['_'.join(s.name.split('_')[1:3]) for s in bg], nice_name) c.specifyObservation(bin_name, observation[region]) c.specifyExpectation(bin_name, 'signal', signal_rate[region]) if not args.statOnly: if not args.noExpUnc: c.specifyFlatUncertainty('lumi', 1.01) c.specifyUncertainty('JES', bin_name, 'signal', signal_jes_uncertainty[region]) c.specifyUncertainty( 'btagging', bin_name, 'signal', signal_btagging_uncertainty[region]) c.specifyUncertainty( 'mistagging', bin_name, 'signal', signal_mistagging_uncertainty[region]) c.specifyUncertainty('muonId', bin_name, 'signal', signal_muonId_uncertainty[region]) c.specifyUncertainty( 'electronId', bin_name, 'signal', signal_electronId_uncertainty[region]) for unc in args.addUncertainties: c.specifyUncertainty( unc, bin_name, 'signal', 1 + (getUncertaintyValue( args.additionalCardFile, args.addBinNumberShift + i_region, 'signal', unc) - 1) * args.uncertaintyScale) if args.addNonPrompt: # for nonpromt only nonpromt uncertainty is important c.specifyExpectation(bin_name, 'nonPrompt', nonPromptObservation[region]) if not args.statOnly: c.specifyUncertainty( 'nonprompt', bin_name, 'nonPrompt', 1 + (getUncertaintyValue( args.additionalCardFile, args.addBinNumberShift + i_region, 'nonPromptDD', 'nonprompt') - 1) * args.uncertaintyScale) #c.specifyExpectation( bin_name, 'ttX_SM', ttX_SM_rate[region] ) #c.specifyUncertainty( 'JES', bin_name, 'ttX_SM', ttX_SM_jes_uncertainty[region]) #c.specifyUncertainty( 'btagging',bin_name, 'ttX_SM', ttX_SM_btagging_uncertainty[region]) for background in bg: c.specifyExpectation( bin_name, '_'.join(background.name.split('_')[1:3]), background_rate[region][background.name]) if not args.statOnly: if not args.noExpUnc: c.specifyUncertainty( 'JES', bin_name, '_'.join(background.name.split('_')[1:3]), background_jes_uncertainty[region][ background.name]) c.specifyUncertainty( 'btagging', bin_name, '_'.join(background.name.split('_')[1:3]), background_btagging_uncertainty[region][ background.name]) c.specifyUncertainty( 'mistagging', bin_name, '_'.join(background.name.split('_')[1:3]), background_mistagging_uncertainty[region][ background.name]) c.specifyUncertainty( 'muonId', bin_name, '_'.join(background.name.split('_')[1:3]), background_muonId_uncertainty[region][ background.name]) c.specifyUncertainty( 'electronId', bin_name, '_'.join(background.name.split('_')[1:3]), background_electronId_uncertainty[region][ background.name]) for unc in args.addUncertainties: if 'tZq' in background.name.split( '_') or 'ttgamma' in background.name.split( '_') or 'tWZ' in background.name.split( '_'): proc = 'TTX' elif 'WZ' in background.name.split('_'): proc = 'WZ' else: raise ValueError('Background not found: %s' % background.name) c.specifyUncertainty( unc, bin_name, '_'.join(background.name.split('_')[1:3]), 1 + (getUncertaintyValue( args.additionalCardFile, args.addBinNumberShift + i_region, proc, unc) - 1) * args.uncertaintyScale) c.writeToFile(cardFilePath) else: logger.info("Running only NLL Fit with given CardFile %s" % cardFilePath) if not os.path.isfile(cardFilePath): raise ValueError( 'CardFiles not found! Run script without --fitOnly!') if args.useCombine: # use the official cms combine tool # c.calcNuisances( cardFilePath, bestFit=args.bestFit ) nll = c.calcNLL(cardFilePath, bestFit=args.bestFit) # nll = nll['nll0'] #pre-fit nll = nll['nll_abs'] #post-fit if args.removeCardFiles: for file in os.listdir(cardfileLocation): if file.startswith(cardname): os.remove(os.path.join(cardfileLocation, file)) else: if args.bestFit: r = (0.99, 1.01) else: r = (0., 2.) profiledLoglikelihoodFit = ProfiledLoglikelihoodFit(cardFilePath) profiledLoglikelihoodFit.make_workspace(rmin=r[0], rmax=r[1]) nll = profiledLoglikelihoodFit.likelihoodTest() profiledLoglikelihoodFit.cleanup(removeFiles=args.removeCardFiles) del profiledLoglikelihoodFit logger.info("NLL: %f", nll) ROOT.gDirectory.Clear() # in very large WC regions, the fit fails, not relevant for the interesting regions if nll is None or abs(nll) > 10000 or abs(nll) < 1: nll = 999 del c return var1, var2, nll
def make_batch_job(batch_job_file, batch_job_title, batch_output_dir, command): # If X509_USER_PROXY is set, use existing proxy. if options.dpm: if host == 'lxplus': from StopsDilepton.Tools.user import cern_proxy_certificate proxy_location = cern_proxy_certificate else: proxy_location = None from RootTools.core.helpers import renew_proxy proxy = renew_proxy(proxy_location) logger.info("Using proxy certificate %s", proxy) proxy_cmd = "export X509_USER_PROXY=%s" % proxy else: proxy_cmd = "" import subprocess if host == 'heplx': template =\ """\ #!/bin/sh #SBATCH -J {batch_job_title} #SBATCH -D {pwd} #SBATCH -o {batch_output_dir}batch-test.%j.out {proxy_cmd} voms-proxy-info -all eval \`"scram runtime -sh"\` echo CMSSW_BASE: {cmssw_base} echo Executing user command echo "{command}" {command} voms-proxy-info -all """.format(\ command = command, cmssw_base = os.getenv("CMSSW_BASE"), batch_output_dir = batch_output_dir, batch_job_title = batch_job_title, pwd = os.getenv("PWD"), proxy_cmd = proxy_cmd ) elif host == 'lxplus': template =\ """\ #!/bin/bash export CMSSW_PROJECT_SRC={cmssw_base}/src cd $CMSSW_PROJECT_SRC eval `scramv1 ru -sh` alias python={python_release} which python python --version {proxy_cmd} voms-proxy-info -all echo CMSSW_BASE: $CMSSW_BASE cd {pwd} echo Executing user command while in $PWD echo "{command}" {command} voms-proxy-info -all """.format(\ command = command, cmssw_base = os.getenv("CMSSW_BASE"), #batch_output_dir = batch_output_dir, #batch_job_title = batch_job_title, pwd = os.getenv("PWD"), proxy_cmd = proxy_cmd, python_release = subprocess.check_output(['which', 'python']).rstrip(), ) batch_job = file(batch_job_file, "w") batch_job.write(template) batch_job.close() return
params = {'ctZ': 0., 'ctZI': 0.} # Can specify any EFT parameter here observation = {} signal_jec_uncertainty = {} signal_fakerate_uncertainty = {} ttZ_SM_rate = {} ttZ_SM_jec_uncertainty = {} ttZ_SM_fakerate_uncertainty = {} background_rate = {} background_jec_uncertainty = {} background_fakerate_uncertainty = {} ttZ_coeffList = {} for i_region, region in enumerate(regions): logger.info("At region %s", region) # compute signal yield for this region (this is the final code) ttZ_coeffList[region] = ttZ_sample.weightInfo.getCoeffListFromDraw( ttZ_sample, region.cutString(), weightString='150*ref_lumiweight1fb') # TTZ SM ttZ_SM_rate[region] = ttZ_sample.weightInfo.get_weight_yield( ttZ_coeffList[region]) ttZ_SM_jec_uncertainty[region] = 1.05 ttZ_SM_fakerate_uncertainty[region] = 1.0 # signal has no FR uncertainty # signal uncertainties signal_jec_uncertainty[region] = 1.05 signal_fakerate_uncertainty[region] = 1.0 # signal has no FR uncertainty background_rate[region] = {}
def calculation( variables ): if args.variables[0] == 'cuB' and args.variables[1] == 'cuW': var1, var2 = variables #cuB cuW ctZ, ctW = cuBWtoctWZ( var1, var2 ) kwargs = { 'ctZ':ctZ, 'ctW':ctW } else: var1, var2 = variables kwargs = { args.variables[0]:var1, args.variables[1]:var2 } # uncertainties c.reset() c.addUncertainty('lumi', 'lnN') c.addUncertainty('JEC', 'lnN') c.addUncertainty('fake', 'lnN') signal_rate = {} for i_region, region in enumerate(ttZRegions): signal_rate[region] = ttZSample.weightInfo.get_weight_yield( ttZ_coeffList[region], **kwargs) - ttZ_SM_rate[region] bin_name = "Region_%i" % i_region nice_name = region.__str__() c.addBin(bin_name,['ttX_SM'] + ['_'.join(s.name.split('_')[1:3]) for s in ttZBg], nice_name) c.specifyObservation( bin_name, observation[region] ) c.specifyFlatUncertainty( 'lumi', 1.05 ) c.specifyExpectation( bin_name, 'signal', signal_rate[region] ) c.specifyUncertainty( 'JEC', bin_name, 'signal', signal_jec_uncertainty[region]) c.specifyUncertainty( 'fake',bin_name, 'signal', signal_fakerate_uncertainty[region]) c.specifyExpectation( bin_name, 'ttX_SM', ttZ_SM_rate[region] ) c.specifyUncertainty( 'JEC', bin_name, 'ttX_SM', ttX_SM_jec_uncertainty[region]) c.specifyUncertainty( 'fake',bin_name, 'ttX_SM', ttX_SM_fakerate_uncertainty[region]) for background in ttZBg: c.specifyExpectation( bin_name, '_'.join( background.name.split('_')[1:3] ), background_rate[region][background.name] ) c.specifyUncertainty( 'JEC', bin_name, '_'.join( background.name.split('_')[1:3] ), background_jec_uncertainty[region][background.name]) c.specifyUncertainty( 'fake',bin_name, '_'.join( background.name.split('_')[1:3] ), background_fakerate_uncertainty[region][background.name]) for i_region, region in enumerate(ttgammaRegions): signal_rate[region] = ttgamma1lSample.weightInfo.get_weight_yield( ttgamma1l_coeffList[region], **kwargs) - ttgamma1l_SM_rate[region] signal_rate[region] += ttgamma2lSample.weightInfo.get_weight_yield( ttgamma2l_coeffList[region], **kwargs) - ttgamma2l_SM_rate[region] bin_name = "Region_%i" % (i_region + len(ttZRegions)) nice_name = region.__str__() c.addBin(bin_name,['ttX_SM'] + ['_'.join(s.name.split('_')[1:3]) for s in ttgammaBg], nice_name) c.specifyObservation( bin_name, observation[region] ) # c.specifyFlatUncertainty( 'lumi', 1.05 ) # c.specifyFlatUncertainty( 'lumi', 1.026 ) c.specifyFlatUncertainty( 'lumi', 1.05 ) c.specifyExpectation( bin_name, 'signal', signal_rate[region] ) c.specifyUncertainty( 'JEC', bin_name, 'signal', signal_jec_uncertainty[region]) c.specifyUncertainty( 'fake',bin_name, 'signal', signal_fakerate_uncertainty[region]) c.specifyExpectation( bin_name, 'ttX_SM', ttgamma1l_SM_rate[region] + ttgamma2l_SM_rate[region] ) c.specifyUncertainty( 'JEC', bin_name, 'ttX_SM', ttX_SM_jec_uncertainty[region]) c.specifyUncertainty( 'fake',bin_name, 'ttX_SM', ttX_SM_fakerate_uncertainty[region]) for background in ttgammaBg: c.specifyExpectation( bin_name, '_'.join( background.name.split('_')[1:3] ), background_rate[region][background.name] ) c.specifyUncertainty( 'JEC', bin_name, '_'.join( background.name.split('_')[1:3] ), background_jec_uncertainty[region][background.name]) c.specifyUncertainty( 'fake',bin_name, '_'.join( background.name.split('_')[1:3] ), background_fakerate_uncertainty[region][background.name]) nameList = ['combined'] + args.variables + args.binning + [ args.level, args.version, args.order, args.luminosity, 'small' if args.small else 'full', var1, var2 ] cardname = '%s_limit_card'%'_'.join( map( str, nameList ) ) c.writeToFile( './tmp/%s.txt'%cardname ) # try to adjust rmax with some margin exp_tot_sigmas = 0 max_rmax = float('inf') for region in ttZRegions: tot_background = sum( [ background_rate[region][background.name] for background in ttZBg ] ) exp_tot_sigmas += abs(signal_rate[region]) / sqrt( tot_background ) if tot_background > 0 else 1. #float('inf') print 'region', region print 'exp_sigma', exp_tot_sigmas # avoid total neg. yield if signal_rate[region] < 0: max_r = -tot_background / signal_rate[region] if max_r < max_rmax: max_rmax = max_r print 'max_rmax', max_rmax print for region in ttgammaRegions: tot_background = sum( [ background_rate[region][background.name] for background in ttgammaBg ] ) exp_tot_sigmas += abs(signal_rate[region]) / sqrt( tot_background ) if tot_background > 0 else 100. #float('inf') print 'region', region print 'exp_sigma', exp_tot_sigmas # avoid total neg. yield if signal_rate[region] < 0: max_r = -tot_background / signal_rate[region] if max_r < max_rmax: max_rmax = max_r print 'max_rmax', max_rmax print if exp_tot_sigmas is float('inf'): rmax_est = 0.5 #float('inf') elif exp_tot_sigmas == 0: rmax_est = 200 #float('inf') else: rmax_est = 400. / exp_tot_sigmas print print 'rmax_est', rmax_est if max_rmax < rmax_est: rmax_est = 0.9*max_rmax # safety margin such that at least +10% total yield survives in the smallest SR print 'rmax_est', rmax_est print profiledLoglikelihoodFit = ProfiledLoglikelihoodFit( './tmp/%s.txt'%cardname ) profiledLoglikelihoodFit.make_workspace(rmin=0, rmax=rmax_est) #expected_limit = profiledLoglikelihoodFit.calculate_limit( calculator = "frequentist" ) expected_limit = profiledLoglikelihoodFit.calculate_limit( calculator = "asymptotic", plotLimit = False ) logger.info( "Expected Limit: %f", expected_limit[0] ) profiledLoglikelihoodFit.cleanup( removeFiles = True ) del profiledLoglikelihoodFit ROOT.gDirectory.Clear() return var1, var2, [ expected_limit[i] for i in range(-2,3) ]
def plot_1D_yield(var, coeffList, range=[-10, 10, .5], differential=False, normalization=False): ''' Create 1D plot of (differential) yield as a function of WC (val) for a given list of weights (coeffList) ''' var_val = np.arange(range[0], range[1], range[2]) dict_list = [{var: val} for val in var_val] if differential: y = np.array([ w.get_diff_weight_yield(var, coeffList, **item) for item in dict_list ]) else: y = np.array( [w.get_weight_yield(coeffList, **item) for item in dict_list]) if normalization: y /= w.get_weight_yield(coeffList, **{var: 0}) graph = ROOT.TGraph(len(var_val), var_val, y) graph.SetLineWidth(2) graph.SetLineColor(ROOT.kRed) graph.GetXaxis().SetRangeUser(range[0], range[1]) y_label = '' if differential: y_label += 'Diff. ' y_label += 'Total Yield' if normalization and not differential: y_label += ' [SM Yield]' graph.GetYaxis().SetTitle(y_label) graph.GetXaxis().SetTitle(var) graph.SetTitle('') graph.GetXaxis().SetTitleSize(textsize) graph.GetYaxis().SetTitleSize(textsize) graph.GetXaxis().SetLabelSize(textsize) graph.GetYaxis().SetLabelSize(textsize) # Base Line if normalization: line = ROOT.TLine(range[0], 1, range[1], 1) if differential: line = ROOT.TLine(range[0], 0, range[1], 0) if normalization or differential: line.SetLineColor(ROOT.kBlack) line.SetLineWidth(2) line.SetLineColor(ROOT.kBlue) # Legend legend = ROOT.TLegend(0.63, 0.78, .9, .9) legend.SetTextSize(textsize) legend.SetTextFont(1) if normalization: legend.AddEntry(line, "SM Yield", "l") legend.AddEntry(graph, "Total Yield", "l") # Plotting c1 = ROOT.TCanvas() graph.Draw() if normalization or differential: line.Draw('SAME') legend.Draw() logger.info('Plot created:' + os.path.join(plot_directory_, '%s.png') % (var)) c1.Print(os.path.join(plot_directory_, '%s.png') % (var))
# Logger import TTXPheno.Tools.logger as logger logger = logger.get_logger(options.logLevel, logFile = None ) # Walk the directory structure and group files in 'jobs' of [f1_0.root, f1_1.root, ...] tootalling to approx. sizeGB jobs = [] for dirName, subdirList, fileList in os.walk(options.dir): rootFiles = [] for f in fileList: if f.endswith('.root'): full_filename = os.path.join(dirName, f) if not '_reHadd_' in f: to_skip = False for skip in options.skip: if skip in f: logger.info( "Found skip string %s in %s. Skipping.", skip, f ) to_skip = True break if to_skip: continue isOK = checkRootFile( full_filename, checkForObjects = [options.treeName]) \ if options.treeName is not None else checkRootFile( full_filename ) if isOK: rootFiles.append( f ) else: logger.warning( "File %s does not look OK. Checked for tree: %r", full_filename, options.treeName ) else: logger.info( "Found '_reHadd_' in file %s in %s. Skipping.", full_filename, dirName ) job = [] jobsize = 0 for fname in rootFiles: filename, file_extension = os.path.splitext(fname)
params = {'ctZ': 0., 'ctZI': 0.} # Can specify any EFT parameter here observation = {} signal_jec_uncertainty = {} signal_fakerate_uncertainty = {} ttZ_SM_rate = {} #ttZ_SM_jec_uncertainty = {} #ttZ_SM_fakerate_uncertainty = {} background_rate = {} background_jec_uncertainty = {} background_fakerate_uncertainty = {} ttZ_coeffList = {} for i_region, region in enumerate(regions): logger.info("At region %s", region) # compute signal yield for this region (this is the final code) ttZ_coeffList[region] = ttZ_sample.weightInfo.getCoeffListFromDraw( ttZ_sample, region.cutString(), weightString='150*ref_lumiweight1fb') ## TTZ SM ttZ_SM_rate[region] = ttZ_sample.weightInfo.get_weight_yield( ttZ_coeffList[region]) #ttZ_SM_jec_uncertainty [region] = 1.05 #ttZ_SM_fakerate_uncertainty [region] = 1.0 # signal has no FR uncertainty # signal uncertainties signal_jec_uncertainty[region] = 1.05 signal_fakerate_uncertainty[region] = 1.0 # signal has no FR uncertainty background_rate[region] = {}
ttZ_SM_rate = {} ttZ_coeffList = {} ttgamma1l_SM_rate = {} ttgamma1l_coeffList = {} ttgamma2l_SM_rate = {} ttgamma2l_coeffList = {} background_rate = {} background_jec_uncertainty = {} background_fakerate_uncertainty = {} for i_region, region in enumerate(ttZRegions): logger.info("At region %s", region) # ttX SM ttZ_coeffList[region] = ttZSample.weightInfo.getCoeffListFromDraw( ttZSample, selectionString=region.cutString()) ttZ_SM_rate[region] = ttZSample.weightInfo.get_weight_yield( ttZ_coeffList[region]) # signal uncertainties signal_jec_uncertainty[region] = 1.05 # signal_jec_uncertainty [region] = 1.09 signal_fakerate_uncertainty[ region] = 1.0 # signal has no FR uncertainty background_rate[region] = {} background_fakerate_uncertainty[region] = {}
# Make stack stack = Stack(*[[sample] for param in params]) # reweighting of pTZ if args.reweightPtZToSM: for param in params[::-1]: param['ptZ_histo'] = sample.get1DHistoFromDraw( "Z_pt", [20, 0, 500], selectionString=cutInterpreter.cutString(args.selection), weightString=w.get_weight_string(**param['WC'])) if param['ptZ_histo'].Integral() > 0: param['ptZ_histo'].Scale(1. / param['ptZ_histo'].Integral()) param['ptZ_reweight_histo'] = params[-1]['ptZ_histo'].Clone() param['ptZ_reweight_histo'].Divide(param['ptZ_histo']) logger.info( 'Made reweighting histogram for ptZ and param-point %r with integral %f', param, param['ptZ_reweight_histo'].Integral()) def get_reweight(param): histo = param['ptZ_reweight_histo'] var = 'Z_pt' bsm_rw = w.get_weight_func(**param['WC']) def reweight(event, sample): i_bin = histo.FindBin(getattr(event, var)) return histo.GetBinContent(i_bin) * bsm_rw( event, sample ) * event.ref_lumiweight1fb * args.luminosity * event_factor return reweight
def calculation(variables): #def calculation( var1, var2 ): if args.variables[0] == 'cuB' and args.variables[1] == 'cuW': var1, var2 = variables #cuB cuW ctZ, ctW = cuBWtoctWZ(var1, var2) kwargs = {'ctZ': ctZ, 'ctW': ctW} else: var1, var2 = variables kwargs = {args.variables[0]: var1, args.variables[1]: var2} # uncertainties c.reset() c.addUncertainty('lumi', 'lnN') c.addUncertainty('JEC', 'lnN') c.addUncertainty('fake', 'lnN') signal_rate = {} for i_region, region in enumerate(ttZRegions): signal_rate[region] = ttZSample.weightInfo.get_weight_yield( ttZ_coeffList[region], **kwargs) bin_name = "Region_%i" % i_region nice_name = region.__str__() c.addBin(bin_name, ['_'.join(s.name.split('_')[1:3]) for s in ttZBg], nice_name) c.specifyObservation(bin_name, observation[region]) # c.specifyFlatUncertainty( 'lumi', 1.05 ) # c.specifyFlatUncertainty( 'lumi', 1.026 ) c.specifyFlatUncertainty('lumi', 1.01) c.specifyExpectation(bin_name, 'signal', signal_rate[region]) c.specifyUncertainty('JEC', bin_name, 'signal', signal_jec_uncertainty[region]) c.specifyUncertainty('fake', bin_name, 'signal', signal_fakerate_uncertainty[region]) #c.specifyExpectation( bin_name, 'ttX_SM', ttX_SM_rate[region] ) #c.specifyUncertainty( 'JEC', bin_name, 'ttX_SM', ttX_SM_jec_uncertainty[region]) #c.specifyUncertainty( 'fake',bin_name, 'ttX_SM', ttX_SM_fakerate_uncertainty[region]) for background in ttZBg: c.specifyExpectation(bin_name, '_'.join(background.name.split('_')[1:3]), background_rate[region][background.name]) c.specifyUncertainty( 'JEC', bin_name, '_'.join(background.name.split('_')[1:3]), background_jec_uncertainty[region][background.name]) c.specifyUncertainty( 'fake', bin_name, '_'.join(background.name.split('_')[1:3]), background_fakerate_uncertainty[region][background.name]) for i_region, region in enumerate(ttgammaRegions): signal_rate[region] = ttgamma1lSample.weightInfo.get_weight_yield( ttgamma1l_coeffList[region], **kwargs) signal_rate[region] += ttgamma2lSample.weightInfo.get_weight_yield( ttgamma2l_coeffList[region], **kwargs) bin_name = "Region_%i" % (i_region + len(ttZRegions)) nice_name = region.__str__() c.addBin(bin_name, ['_'.join(s.name.split('_')[1:3]) for s in ttgammaBg], nice_name) c.specifyObservation(bin_name, observation[region]) # c.specifyFlatUncertainty( 'lumi', 1.05 ) # c.specifyFlatUncertainty( 'lumi', 1.026 ) c.specifyFlatUncertainty('lumi', 1.05) c.specifyExpectation(bin_name, 'signal', signal_rate[region]) c.specifyUncertainty('JEC', bin_name, 'signal', signal_jec_uncertainty[region]) c.specifyUncertainty('fake', bin_name, 'signal', signal_fakerate_uncertainty[region]) #c.specifyExpectation( bin_name, 'ttX_SM', ttX_SM_rate[region] ) #c.specifyUncertainty( 'JEC', bin_name, 'ttX_SM', ttX_SM_jec_uncertainty[region]) #c.specifyUncertainty( 'fake',bin_name, 'ttX_SM', ttX_SM_fakerate_uncertainty[region]) for background in ttgammaBg: c.specifyExpectation(bin_name, '_'.join(background.name.split('_')[1:3]), background_rate[region][background.name]) c.specifyUncertainty( 'JEC', bin_name, '_'.join(background.name.split('_')[1:3]), background_jec_uncertainty[region][background.name]) c.specifyUncertainty( 'fake', bin_name, '_'.join(background.name.split('_')[1:3]), background_fakerate_uncertainty[region][background.name]) nameList = ['combined'] + args.variables + args.binning + [ args.level, args.version, args.order, args.luminosity, 'small' if args.small else 'full', var1, var2 ] cardname = '%s_nll_card' % '_'.join(map(str, nameList)) c.writeToFile('./tmp/%s.txt' % cardname) profiledLoglikelihoodFit = ProfiledLoglikelihoodFit('./tmp/%s.txt' % cardname) profiledLoglikelihoodFit.make_workspace(rmin=rmin, rmax=rmax) #expected_limit = profiledLoglikelihoodFit.calculate_limit( calculator = "frequentist" ) nll = profiledLoglikelihoodFit.likelihoodTest() logger.info("NLL: %f", nll) profiledLoglikelihoodFit.cleanup(removeFiles=True) del profiledLoglikelihoodFit ROOT.gDirectory.Clear() if nll is None or abs(nll) > 10000: nll = 999 return var1, var2, nll
if line: if split: logger.info("Splitting in %i jobs", split) for i in range(split): commands.append(line + " --nJobs %i --job %i" % (split, i)) else: commands.append(line) return commands if __name__ == '__main__': if not len(args) == 1: raise Exception( "Only one argument accepted! Instead this was given: %s" % args) if os.path.isfile(args[0]): logger.info("Reading commands from file: %s", args[0]) commands = [] with open(args[0]) as f: for line in f.xreadlines(): commands.extend(getCommands(line.rstrip("\n"))) elif type(args[0]) == type(""): commands = getCommands(args[0]) if commands: logger.info("Working on host %s", host) if host == 'heplx': if not os.path.isdir(batch_output_dir): os.mkdir(batch_output_dir) logger.info( "Batch system output file to be written to directory: %s",