def xsec( self, modified_couplings=None, overwrite=False, skip=False ): key = self.getKey( modified_couplings ) # Do we have the x-sec? if self.xsecDB.contains(key) and not overwrite: logger.debug( "Found x-sec %s for key %r. Do nothing.", self.xsecDB.get(key), key ) return self.xsecDB.get(key) elif skip: return u_float(0) else: print "Trying to get xsec" self.__initialize( modified_couplings ) logger.info( "Calculating x-sec" ) # rerun MG to obtain the correct x-sec (with more events) with open( os.path.join( self.processTmpDir, 'Cards/run_card.dat' ), 'a' ) as f: f.write( ".false. = gridpack\n" ) logger.info( "Calculate x-sec: Calling bin/generate_events" ) output = subprocess.check_output( [ os.path.join( self.processTmpDir, 'bin/generate_events' ) , '-f' ] ) for i in range(10): try: output = subprocess.check_output( [ os.path.join( self.processTmpDir, 'bin/generate_events' ) , '-f' ] ) m = re.search( "Cross-section :\s*(.*) \pb", output ) logger.info( "x-sec: {} pb".format(m.group(1)) ) break except ValueError: logger.info("Encountered problem during the MG run. Restarting.") xsec_ = u_float.fromString(m.group(1)) self.xsecDB.add( key, xsec_, overwrite=True ) logger.info( "Done!" ) return xsec_
def drawPlots(plots): logger.info("Plotting mode: %s" % args.mode) for plot in plots: # check if the plot is filled if not max(l[0].GetMaximum() for l in plot.histos): logger.info("Empty plot!") continue # Empty plot # plot in log scale and linear scale for log in [True, False]: plot_directory_ = os.path.join(plot_directory, 'EFTvalidation', str(args.parameter), args.plot_directory, args.selection, args.sample, args.mode, "log" if log else "lin") plotting.draw( plot, plot_directory=plot_directory_, ratio={ 'yRange': (0.5, 1.5), 'histos': [(0, 1), (2, 3), (4, 5)], 'texY': 'weight./sim.' }, logX=False, logY=log, sorting=True, yRange=(10, "auto"), scaling=scaling if args.normalize else {}, legend=[(0.2, 0.74, 0.9, 0.9), 2], drawObjects=drawObjects(lumi_scale), copyIndexPHP=True, )
def drawPlots(plots): logger.info("Plotting mode: %s" % args.mode) for plot in plots: # check if the plot is filled if not max(l[0].GetMaximum() for l in plot.histos): logger.info("Empty plot!") continue # Empty plot # plot in log scale and linear scale for log in [True, False]: plot_directory_ = os.path.join(plot_directory, 'simplePlots', str(args.year), args.plot_directory, args.selection, args.mode, "log" if log else "lin") plotting.draw( plot, plot_directory=plot_directory_, ratio={'yRange': (0.5, 1.5)} if not args.noData else None, logX=False, logY=log, sorting=True, yRange=(0.001, "auto"), legend=[(0.2, 0.9 - 0.025 * sum(map(len, plot.histos)), 0.9, 0.9), 3], drawObjects=drawObjects(not args.noData, lumi_scale), copyIndexPHP=True, )
def makeTemplate( self, selection, weight='(1)' ): logger.info( "Make PU profile for sample %s and selection %s and weight %s", self.source_sample.name, selection, weight ) h_source = self.source_sample.get1DHistoFromDraw(self.draw_string, self.binning, selectionString = selection, weightString = weight ) logger.info( "PU histogram contains %s weighted events", h_source.Integral() ) h_source.Scale( 1./h_source.Integral() ) return h_source
def initCache(self, cacheDir="systematics"): logger.info("Initializing cache for %s in directory %s"%(self.name, cacheDir)) if cacheDir: self.cacheDir = os.path.join(cache_directory, cacheDir) try: os.makedirs(cacheDir) except: pass cacheDirName = os.path.join(cacheDir, self.name) self.cache = MergingDirDB(cacheDirName) if not self.cache: raise Exeption("Cache not initiated!") if self.name.count("DD"): helperCacheDirName = os.path.join(cacheDir, self.name+"_helper") self.helperCache = MergingDirDB(helperCacheDirName) if not self.helperCache: raise histoHelperCacheDirName = os.path.join(cacheDir, self.name+"_histo") self.histoHelperCache = MergingDirDB(histoHelperCacheDirName) if not self.histoHelperCache: raise tfCacheDirName = os.path.join(cacheDir, self.name+"_tf") self.tfCache = MergingDirDB(tfCacheDirName) if not self.tfCache: raise elif self.name.count("had"): helperCacheDirName = os.path.join(cacheDir, "had_helper") self.helperCache = MergingDirDB(helperCacheDirName) if not self.helperCache: raise else: self.helperCache=None self.tfCache=None else: self.cache=None self.helperCache=None self.tfCache=None
def replaceAliases(cutString): cut = cutString for key, val in aliases.iteritems(): cut = cut.replace(key, val) logger.info("Replacing variable names: old cut: %s, new cut: %s" % (cutString, cut)) return cut
def drawPlots(plots, mode, dataMCScale): logger.info("Plotting mode: %s" % mode) for log in [False, True]: plot_directory_ = os.path.join(plot_directory, 'isrChecks', str(args.year), args.plot_directory) for plot in plots: postFix = " (%s)" % mode.replace("mu", "#mu").replace( "all", "e+#mu") extensions_ = ["pdf", "png", "root"] logger.info("Plotting...") if isinstance(plot, Plot): plotting.draw( plot, plot_directory=plot_directory_, extensions=extensions_, logX=False, logY=log, sorting=False, yRange=(0.03, "auto") if log else (0.001, "auto"), legend=[(0.15, 0.9 - 0.03 * sum(map(len, plot.histos)), 0.9, 0.9), 2], drawObjects=drawObjects(lumi_scale), copyIndexPHP=True, )
def drawPlots(plots): logger.info("Plotting mode: %s" % args.mode) for plot in plots: # check if the plot is filled if not max(l[0].GetMaximum() for l in plot.histos): logger.info("Empty plot!") continue # Empty plot # plot in log scale and linear scale for log in [True, False]: plot_directory_ = os.path.join(plot_directory, 'genPlotsEFT', str(args.year), args.plot_directory, 'met_reweighting', args.selection, args.mode, "log" if log else "lin") plotting.draw( plot, plot_directory=plot_directory_, ratio={ 'yRange': (0.5, 1.5), 'histos': [(i, len(params) - 1) for i in range(0, len(params))], 'texY': 'EFT/SM' }, logX=False, logY=log, sorting=True, yRange=(0.1, "auto"), scaling=scaling if args.normalize else {}, legend=[(0.2, 0.88 - 0.025 * sum(map(len, plot.histos)), 0.9, 0.88), 4], drawObjects=drawObjects(lumi_scale), copyIndexPHP=True, )
def cleanup(self): if os.path.isdir(self.uniquePath): logger.info("Cleaning up, deleting %s" % self.uniquePath) try: shutil.rmtree(self.uniquePath) except OSError: logger.info( "Couldn't completely remove %s, please clean up afterwards" % self.uniquePath)
def preselection(self, dataMC, channel="all", processCut=None): """Get preselection cutstring.""" cut = self.selection(dataMC, channel=channel, processCut=processCut, **self.parameters) logger.debug("Using cut-string: %s", cut) if processCut: logger.info("Adding process specific cut: %s" % processCut) return cut
def wrapper(): logger.info("Processing impacts") name = args.cardfile cardFile = name + ".txt" cardFilePath = cardfileLocation + cardFile combineDirname = os.path.join(releaseLocation, "impacts", str(args.year)) logger.info("Creating %s" % combineDirname) if not os.path.isdir(combineDirname): os.makedirs(combineDirname) shutil.copyfile(cardFilePath, combineDirname + '/' + cardFile) #https://twiki.cern.ch/twiki/bin/view/Sandbox/SilvioNotes#How_to_get_impact_plot_rho_pulls prepWorkspace = "text2workspace.py %s " % cardFile robustFit = "combineTool.py -M Impacts -d %s.root -m 125 --robustFit 1 --doInitialFit " % name impactFits = "combineTool.py -M Impacts -d %s.root -m 125 --robustFit 1 --doFits --parallel %s " % ( name, str(args.cores)) extractImpact = "combineTool.py -M Impacts -d %s.root -m 125 -o impacts.json" % name plotImpacts = "plotImpacts.py -i impacts.json -o impacts" combineCommand = "cd %s;eval `scramv1 runtime -sh`;%s;%s;%s;%s;%s" % ( combineDirname, prepWorkspace, robustFit, impactFits, extractImpact, plotImpacts) # prepWorkspace = "text2workspace.py %s -m 125"%cardFile # if args.bgOnly: # robustFit = "combineTool.py -M Impacts -d %s.root -m 125 --doInitialFit --robustFit 1 --rMin -0.98 --rMax 1.02"%name # impactFits = "combineTool.py -M Impacts -d %s.root -m 125 --robustFit 1 --doFits --parallel %s --rMin -0.98 --rMax 1.02"%(name,str(args.cores)) # else: # robustFit = "combineTool.py -M Impacts -d %s.root -m 125 --doInitialFit "%name # impactFits = "combineTool.py -M Impacts -d %s.root -m 125 --doFits --parallel %s "%(name,str(args.cores)) # extractImpact = "combineTool.py -M Impacts -d %s.root -m 125 -o impacts.json"%name # plotImpacts = "plotImpacts.py -i impacts.json -o impacts" # combineCommand = "cd %s;eval `scramv1 runtime -sh`;%s;%s;%s;%s;%s"%(combineDirname,prepWorkspace,robustFit,impactFits,extractImpact,plotImpacts) logger.info("Will run the following command, might take a few hours:\n%s" % combineCommand) os.system(combineCommand) plotDir = plot_directory + "/impacts%s/" % args.year if not os.path.isdir(plotDir): os.makedirs(plotDir) shutil.copyfile(combineDirname + '/impacts.pdf', "%s/%s.pdf" % (plotDir, "impacts")) logger.info("Copied result to %s" % plotDir) if args.removeDir: logger.info("Removing directory in release location") shutil.rmtree(combineDirname)
def wrapper(arg): r, channel, setup, addon = arg logger.info( "Running estimate for region %s, channel %s in setup %s for estimator %s" % (r, channel, args.controlRegion if args.controlRegion else "None", args.selectEstimator if args.selectEstimator else "None")) res = estimate.cachedFakeFactor(r, channel, setup, overwrite=args.overwrite, checkOnly=args.checkOnly) # res = estimate.cachedEstimate(r, channel, setup, overwrite=args.overwrite, checkOnly=args.checkOnly) return (estimate.uniqueKey(r, channel, setup), res)
def __init__(self, name, cacheDir=None): logger.info("Initializing Systematic Estimator for %s"%name) self.name = name self.initCache(cacheDir) self.processCut = None if "_gen" in name: self.processCut = "cat0" #"photoncat0" elif "_misID" in name: self.processCut = "cat2" #"photoncat2" elif "_had" in name: self.processCut = "cat134" #"photoncat134" elif "_prompt" in name: self.processCut = "cat02" #"photoncat02" elif "_np" in name: self.processCut = "cat134" #"photoncat134" elif "_hp" in name: self.processCut = "cat1" #"photoncat1" elif "_fake" in name: self.processCut = "cat3" #"photoncat3" elif "_PU" in name: self.processCut = "cat4" #"photoncat4"
def setup(): # preparing gen-sample reweighting logger.info("Preparing reweighting setup") sel = {} for i, year in enumerate(args.years): sel[year] = {} logger.info("At year %i", year) for region, cut in regionCuts[year].iteritems(): logger.info("At region %s", region) regionCut = replaceAliases(simpleStringToCutString(cut)) sel[year][region] = "&&".join( [cutInterpreter.cutString(args.genSelection), regionCut]) # safe some time if i > 0 and sel[year][region] == sel[args.years[0]][region]: coeffList[year][region] = coeffList[args.years[0]][region] signal_genRateSM[year][region] = signal_genRateSM[ args.years[0]][region] else: coeffList[year][region] = w.getCoeffListFromDraw( genSignalSample, selectionString=sel[year][region]) signal_genRateSM[year][region] = float( w.get_weight_yield(coeffList[year][region])) logger.info( "Calculated SM gen-sample signal rate for region %s and year %i: %f" % (region, year, signal_genRateSM[year][region]))
def wrapper(arg): r, channel, setup, addon = arg logger.info( "Running estimate for region %s, channel %s in setup %s for estimator %s" % (r, channel, args.controlRegion if args.controlRegion else "None", args.selectEstimator if args.selectEstimator else "None")) res = estimate.cachedEstimate(r, channel, setup, signalAddon=addon, save=True, overwrite=args.overwrite, checkOnly=(args.checkOnly or args.createExecFile)) return (estimate.uniqueKey(r, channel, setup), res)
def wrapper(arg): # INFO: fakeFactor = fakesData / fakesMC * kappaData * kappaMC key, subkey, r, channel, setup = arg logger.info( "Running estimate for region %s, channel %s in setup %s" % (r, channel, args.controlRegion if args.controlRegion else "None")) # fakeFactor = estimate.cachedFakeFactor(r, channel, setup, checkOnly=True).val kappaData = estimate._kappaData(r, channel, setup) kappaMC = estimate._kappaMC(r, channel, setup) fakesData = estimate._fakesData(r, channel, setup) fakesMC = estimate._fakesMC(r, channel, setup) ddfakes = fakesData * kappaMC * kappaData sf = ddfakes / fakesMC if fakesMC.val > 0 else u_float(0) return (key, subkey, channel, fakesData.tuple(), kappaData.tuple(), kappaMC.tuple(), ddfakes.tuple(), fakesMC.tuple(), sf.tuple())
def __writeProcessCard( self ): out = open( self.tmpProcessCard, 'w' ) with open( self.templateProcessCard, 'r' ) as f: #FIXME (somewhat dirty) for line in f: if "import model" in line: out.write( "import model %s-no_b_mass\n\n"%self.config.model_name ) elif "NP=1" in line and self.config.model_name == "TopEffTh": out.write( line.replace("NP=1","NP=2") ) elif "NP=1" in line and self.config.model_name == "dim6top_LO": out.write( line.replace("NP=1","DIM6=1") ) else: out.write(line) out.write( "output %s -nojpeg" % self.processTmpDir ) out.close() logger.info( "Written process card to %s", self.tmpProcessCard )
def getCommands( line ): commands = [] split = None try: m=re.search(r"SPLIT[0-9][0-9]*", line) split=int(m.group(0).replace('SPLIT','')) except: pass line = line.split('#')[0] if line: if split: logger.info( "Splitting in %i jobs", split ) for i in range(split): commands.append(line+" --nJobs %i --job %i"%( split, i )) else: commands.append(line) return commands
def getNllData(pointDict): global notCached varDict = allWCDict for key, val in pointDict.iteritems(): varDict[key] = val res = {"process": "ttZ", "years": "_".join(map(str, args.years))} res.update(varDict) nCacheFiles = nllCache.contains(res) if nCacheFiles: cachedDict = nllCache.getDicts(res)[0] nll = cachedDict["value"] else: logger.info("Data for %s not in cache" % (", ".join( ["%s = %s" % (key, val) for key, val in pointDict.iteritems()]))) notCached += 1 if args.skipMissingPoints: nll = 999 else: sys.exit(1) return float(nll)
def cachedTemplate(self, selection, weight='(1)', save=True, overwrite=False): key = { "selection": selection, "weight": weight, "source": self.source_sample.name } if (self.cache and self.cache.contains(key)) and not overwrite: result = self.cache.get(key) logger.info("Loaded MC PU profile from %s" % (self.cache.database_file)) logger.debug("Key used: %s result: %r" % (key, result)) elif self.cache: logger.info("Obtain PU profile for %s" % (key, )) result = self.makeTemplate(selection=selection, weight=weight) if result: result = self.cache.addData(key, result, overwrite=save) logger.info("Adding PU profile to cache for %s : %r" % (key, result)) else: logger.warning( "Couldn't create PU profile to cache for %s : %r" % (key, result)) else: result = self.makeTemplate(selection=selection, weight=weight) return result
def drawPlots(plots): logger.info("Plotting mode: %s" % args.mode) for plot in plots: # check if the plot is filled if not max(l[0].GetMaximum() for l in plot.histos): logger.info("Empty plot!") continue # Empty plot for i_hist, hist in enumerate(plot.histos[0]): if plot.stack[0][i_hist].name == "TTG": continue for i in range(len(params)): plot.histos[i + 1][0].Add(hist) # plot in log scale and linear scale for log in [True, False]: plot_directory_ = os.path.join(plot_directory, 'analysisPlots', str(args.year), args.plot_directory, args.selection, args.mode, "log" if log else "lin") plotting.draw( plot, plot_directory=plot_directory_, ratio={ 'yRange': (0.1, 1.9), 'histos': [(i + 1, 0) for i in range(0, len(params) + 1)], 'texY': 'EFT/SM' } if not args.noData else None, logX=False, logY=log, sorting=True, yRange=(0.001, "auto"), scaling=scaling if args.normalize else {}, legend=[(0.2, 0.9 - 0.025 * sum(map(len, plot.histos)), 0.9, 0.9), 4], drawObjects=drawObjects(not args.noData, lumi_scale), histModifications=[lambda h: h.GetYaxis().SetTitleOffset(2)], copyIndexPHP=True, )
def getNllData(var1, var2): card = cardname.replace("var1", str(var1)).replace("var2", str(var2)) res = { 'cardname': card, "year": "combined", "WC1_name": args.variables[0], "WC1_val": var1, "WC2_name": args.variables[1], "WC2_val": var2 } nCacheFiles = nllCache.contains(res) if nCacheFiles: cachedDict = nllCache.getDicts(res)[0] nll = cachedDict["nll_prefit"] else: logger.info( "Data for %s=%s and %s=%s not in cache!" % (args.variables[0], str(var1), args.variables[1], str(var2))) if args.skipMissingPoints: nll = 999 else: sys.exit(1) return float(nll)
def wrapper(arg): r, channel, setup, addon = arg logger.info( "Running estimate for region %s, channel %s in setup %s for estimator %s" % (r, channel, args.controlRegion if args.controlRegion else "None", args.selectEstimator if args.selectEstimator else "None")) res = estimateFrom.cachedEstimate(r, channel, setup, signalAddon=addon, save=True, overwrite=False, checkOnly=True) if res.val >= 0: toRes = estimateTo.writeToCache(r, channel, setup, res, signalAddon=addon, overwrite=args.overwrite) else: print "Did not copy: ", args.selectEstimator, estimateFrom.uniqueKey( r, channel, setup), args.controlRegion return (estimateTo.uniqueKey(r, channel, setup), res)
def wrapper(arg): r, channel, setup3p, addon, setup3, setup4p = arg logger.info( "Running estimate for region %s, channel %s in setup %s for estimator %s" % (r, channel, args.controlRegion if args.controlRegion else "None", args.selectEstimator if args.selectEstimator else "None")) res3 = estimate.cachedEstimate(r, channel, setup3, signalAddon=addon, save=True, overwrite=False, checkOnly=True) res4p = estimate.cachedEstimate(r, channel, setup4p, signalAddon=addon, save=True, overwrite=False, checkOnly=True) if res3.val >= 0 and res4p.val >= 0: res3p = res3 + res4p toRes = estimate.writeToCache(r, channel, setup3p, res3p, signalAddon=addon, overwrite=args.overwrite) else: res3p = -1 print "Did not copy: ", args.selectEstimator, estimate.uniqueKey( r, channel, setup3p), args.controlRegion # print "Got: 3: %s, 4p: %s, 3p: %s"%(res3, res4p, res3p) return (estimate.uniqueKey(r, channel, setup3p), res3p)
def checkFile(file): if args.log: logger.info("Checking filepath: %s" % file) corrupt = False if args.check: corrupt = not checkRootFile(file, checkForObjects=["Events"]) if args.deepcheck and not corrupt: corrupt = not deepCheckRootFile(file) if args.checkWeight and not corrupt: corrupt = not deepCheckWeight(file) if corrupt: if file.startswith("root://hephyse.oeaw.ac.at/"): file = file.split("root://hephyse.oeaw.ac.at/")[1] logger.info("File corrupt: %s" % file) if args.remove: logger.info("Removing file: %s" % file) os.system("/usr/bin/rfrm -f %s" % file)
def initialize(self, modified_couplings=None): ''' Update the restriction card ''' logger.info("#################### Model Setup ######################") self.__pre_initialize() # couplings modified_couplings = modified_couplings if modified_couplings is not None else {} # Check whether couplings are in the model for coup in modified_couplings.keys(): if coup not in self.all_model_couplings: logger.error( "Coupling %s not found in model %s. All available couplings: %s", coup, self.model_name, ",".join(self.all_model_couplings)) raise RuntimeError logger.debug('Creating restriction file based on template %s', self.restrictCardTemplate) # make block strings to be inserted into template file block_strings = {} for block in self.model.keys(): # copy defaults couplings = copy.deepcopy(self.model[block]) # make modifications & build string for the template file block_strings[block + '_template_string'] = "" for i_coupling, coupling in enumerate( couplings): # coupling is a pair (name, value) if modified_couplings.has_key(coupling[0]): coupling[1] = modified_couplings[coupling[0]] block_strings[block + '_template_string'] += "%6i %8.6f # %s\n" % ( i_coupling + 1, coupling[1], coupling[0]) # read template file with open(self.restrictCardTemplate, 'r') as f: template_string = f.read() out = open(self.restrictCard, 'w') out.write(template_string.format(**block_strings)) out.close() logger.info('Written restriction file %s', self.restrictCard) logger.info("################# Done: Model Setup ###################")
def cacheYields((i_region, i_sample)): region = regions[i_region] sample = allSamples[i_sample] logger.info("At region %s for sample %s" % (region, sample.name)) dbFilename = "yields_%s.sql" % sample.name yieldDB = Cache(os.path.join(cache_dir, dbFilename), "yields", ["selection", "year", "small", "region", sample.name]) if not yieldDB: raise res = { "selection": args.selection, "year": args.year, "small": args.small, "region": str(region), sample.name: sample.name } if yieldDB.contains(res) and not args.overwrite: if not args.checkOnly: logger.info( "Yield for sample %s at region %s already in database: %s" % (sample.name, region, yieldDB.getDicts(res)[0]['value'])) return if args.checkOnly: logger.info("Yield for sample %s at region %s not processed" % (sample.name, region)) return rate = sample.getYieldFromDraw(selectionString=region.cutString())['val'] logger.info("Adding yield for sample %s at region %s: %s" % (sample.name, str(region), str(rate))) yieldDB.add(res, str(rate), overwrite=True)
try: redirector = sys.modules["__main__"].redirector except: from TTGammaEFT.Tools.user import redirector as redirector # Logging if __name__ == "__main__": import Analysis.Tools.logger as logger logger = logger.get_logger("INFO", logFile=None) import RootTools.core.logger as logger_rt logger_rt = logger_rt.get_logger("INFO", logFile=None) else: import logging logger = logging.getLogger(__name__) logger.info("Loading MC samples from directory %s", os.path.join(data_directory_, postprocessing_directory_)) # Directories dirs = {} dirs["DY_LO"] = ["DYJetsToLL_M50_LO_ext1_comb"] dirs["DY_NLO"] = ["DYJetsToLL_M50_ext2"] dirs["TT_pow"] = ["TTLep_pow_CP5"] dirs["ZG_lowMLL"] = ["ZGToLLG_lowMLL"] dirs["ZG_lowMLL_lowGPt"] = ["ZGToLLG_lowMLL_lowGPt"] dirs["WJets"] = ["W1JetsToLNu", "W2JetsToLNu", "W3JetsToLNu", "W4JetsToLNu"] directories = { key: [ os.path.join(data_directory_, postprocessing_directory_, dir) for dir in dirs[key] ]
def createTestAndTrainingSample(self, read_variables=[], sequence=[], weightString="1", overwrite=False): ''' Creates a single background and a single signal sample for training purposes ''' self.read_variables = read_variables self.sequence = sequence self.weightString = weightString # return if the samples are done already if not overwrite and os.path.isfile(self.dataFile): self.trainingAndTestSample = Sample.fromFiles( "TrainingAndTestSample", files=[self.dataFile]) return self.trainingAndTestSample # Get yields and counts for all samples, because we want to mix the events according to their yield for s in self.samples: s._yield = s.getYieldFromDraw( weightString=self.weightString)["val"] s.count = int(s.getYieldFromDraw(weightString="(1)")["val"]) logger.info("Found %i events for sample %s", s.count, s.name) # calculate training sample sizes and mix weighted backgrounds according to lumi yields # finds nbBkg1,...,nBkgN such that nBkg1+...+nBkgN is maximal while respecting # nBkg1+nBkg2+...+nBkgN<=nSigTraining, nBkg1:nBkg2:...:nBkgN=yBkg1:yBkg2:...:yBkgN # and nBkg1<=self.fractionTraining*nBkg1Max, ...., self.fractionTraining*nBkgNMax<=nBkgNMax # Check we're OK overall maxSignalCount = int(self.fractionTraining * self.signal.count) assert maxSignalCount > 0, "Too few signal events. Training events: %i" % maxSignalCount maxBkgYield = float(max([b._yield for b in self.backgrounds])) assert maxBkgYield > 0, "Maximum background yield non-positive: %f" % maxBkgYield # maximum number of training events that are available in each sample for background in self.backgrounds: background.maxTrainingEvents = int(self.fractionTraining * background.count) assert background.maxTrainingEvents > 0, "Not enough training events in bkg sample: %s" % background.name # compute the average weight in the background sample background.average_weight = float(background._yield) / int( self.fractionTraining * background.count) background_with_max_average_weight = max( self.backgrounds, key=attrgetter('average_weight')) # The maximum number of training events per sample consistent with the requirements maxAchievableBkg = [ int(self.fractionTraining * background.count * (background._yield / background_with_max_average_weight._yield)) for background in self.backgrounds ] # Case1: We have more signal than the combined background # The background samples limit. if sum(maxAchievableBkg) < maxSignalCount: logger.info("Backgrounds limit training statistic.") self.signal.max_nEvents_training = sum(maxAchievableBkg) for background in self.backgrounds: background.max_nEvents_training = int( self.fractionTraining * background.count * (background._yield / background_with_max_average_weight._yield)) # Case2: We have more background than the signal # The signal sample limits. else: logger.info("Signal limits training statistic.") self.signal.max_nEvents_training = maxSignalCount for background in self.backgrounds: background.max_nEvents_training = int( self.fractionTraining * background.count * (background._yield / background_with_max_average_weight._yield) * (maxSignalCount / float(sum(maxAchievableBkg)))) for i_sample, sample in enumerate(self.samples): logger.info( "Sample %20s using %8i events out of %8i which are %i%%.", sample.name, sample.max_nEvents_training, sample.count, round(100 * sample.max_nEvents_training / float(sample.count))) # determine randomized training event sequence for sample in self.samples: sample.training_test_list = [1] * sample.max_nEvents_training + [ 0 ] * (sample.count - sample.max_nEvents_training) random.shuffle(sample.training_test_list) # Now write a single ntuple with one tree that contains # the correct number of background events and also contains isSignal and isTraining # make random list of bkg and signal positions of the correct length for random loop: sig_bkg_list = [] for i_sample, sample in enumerate(self.samples): sig_bkg_list.extend([i_sample] * sample.count) sample.reader = sample.treeReader( \ variables = map( TreeVariable.fromString, read_variables), ) sample.reader.start() random.shuffle(sig_bkg_list) def filler(event): # get a random reader event.isTraining = isTraining event.isSignal = isSignal # write mva variables for name, func in self.mva_variables.iteritems(): # setattr( event, name, func(reader.event) ) setattr(event, name, func(reader.event, sample=None)) # Create a maker. Maker class will be compiled. maker = TreeMaker( sequence=[filler], variables=map(TreeVariable.fromString, ["isTraining/I", "isSignal/I"] + ["%s/F" % var for var in self.mva_variable_names]), treeName="Events") maker.start() # # Do the thing # reader.start() # counter = 0 while len(sig_bkg_list): # determine random sample i_sample = sig_bkg_list.pop(0) # get its reader reader = self.samples[i_sample].reader reader.run() for func in self.sequence: func(reader.event) # determine whether training or test isTraining = self.samples[i_sample].training_test_list.pop(0) isSignal = (i_sample == 0) maker.run() counter += 1 if counter % 10000 == 0: logger.info("Written %i events.", counter) nEventsTotal = maker.tree.GetEntries() tmp_directory = ROOT.gDirectory outputfile = ROOT.TFile.Open(self.dataFile, 'recreate') maker.tree.Write() outputfile.Close() tmp_directory.cd() logger.info("Written %s", self.dataFile) # # # Destroy the TTree maker.clear() logger.info("Written %i events to %s", nEventsTotal, self.dataFile) self.trainingAndTestSample = Sample.fromFiles("TrainingAndTestSample", files=[self.dataFile]) return self.trainingAndTestSample
# Get all NanoAOD tuples for caching from Samples.nanoAOD.Fall17_private_legacy_v1 import * #from Samples.nanoAOD.Fall17_nanoAODv6 import * #from Samples.nanoAOD.Fall17_private import * from Analysis.Tools.user import plot_directory # Logger import Analysis.Tools.logger as logger import RootTools.core.logger as logger_rt logger = logger.get_logger(args.logLevel, logFile=None) logger_rt = logger_rt.get_logger(args.logLevel, logFile=None) if args.overwrite: os.remove(cache_directory + "/puProfiles/puProfiles_v2.sql") for sample in [TTLep_pow_ext]: logger.info("Working on samples %s", sample.name) puProfiles = puProfile(source_sample=sample, cacheDir=cache_directory + "/puProfiles/") # reweighting selection selection = "( 1 )" profile = puProfiles.cachedTemplate(selection, weight='genWeight', overwrite=False) # plot the MC PU profile if args.makePlots: profilePlot = Plot.fromHisto(sample.name, texX="nTrueInt", histos=[[profile]]) plotting.draw(profilePlot,