def xsec( self, modified_couplings=None, overwrite=False, skip=False ):

        key = self.getKey( modified_couplings )
        # Do we have the x-sec?
        if self.xsecDB.contains(key) and not overwrite:
            logger.debug( "Found x-sec %s for key %r. Do nothing.", self.xsecDB.get(key), key )
            return self.xsecDB.get(key)
        elif skip:
            return u_float(0)
        else:
            print "Trying to get xsec"
            self.__initialize( modified_couplings ) 
            logger.info( "Calculating x-sec" )
            # rerun MG to obtain the correct x-sec (with more events)
            with open( os.path.join( self.processTmpDir, 'Cards/run_card.dat' ), 'a' ) as f:
                f.write( ".false. =  gridpack\n" )
            logger.info( "Calculate x-sec: Calling bin/generate_events" )
            output = subprocess.check_output( [ os.path.join( self.processTmpDir, 'bin/generate_events' ) , '-f' ] )
            for i in range(10):
                try:
                    output = subprocess.check_output( [ os.path.join( self.processTmpDir, 'bin/generate_events' ) , '-f' ] )
                    m = re.search( "Cross-section :\s*(.*) \pb", output )
                    logger.info( "x-sec: {} pb".format(m.group(1)) )
                    break
                except ValueError:
                    logger.info("Encountered problem during the MG run. Restarting.")

            xsec_ = u_float.fromString(m.group(1)) 
            
            self.xsecDB.add( key, xsec_, overwrite=True )

            logger.info( "Done!" )

            return xsec_
Exemple #2
0
def drawPlots(plots):
    logger.info("Plotting mode: %s" % args.mode)
    for plot in plots:

        # check if the plot is filled
        if not max(l[0].GetMaximum() for l in plot.histos):
            logger.info("Empty plot!")
            continue  # Empty plot

        # plot in log scale and linear scale
        for log in [True, False]:
            plot_directory_ = os.path.join(plot_directory, 'EFTvalidation',
                                           str(args.parameter),
                                           args.plot_directory, args.selection,
                                           args.sample, args.mode,
                                           "log" if log else "lin")
            plotting.draw(
                plot,
                plot_directory=plot_directory_,
                ratio={
                    'yRange': (0.5, 1.5),
                    'histos': [(0, 1), (2, 3), (4, 5)],
                    'texY': 'weight./sim.'
                },
                logX=False,
                logY=log,
                sorting=True,
                yRange=(10, "auto"),
                scaling=scaling if args.normalize else {},
                legend=[(0.2, 0.74, 0.9, 0.9), 2],
                drawObjects=drawObjects(lumi_scale),
                copyIndexPHP=True,
            )
Exemple #3
0
def drawPlots(plots):
    logger.info("Plotting mode: %s" % args.mode)
    for plot in plots:

        # check if the plot is filled
        if not max(l[0].GetMaximum() for l in plot.histos):
            logger.info("Empty plot!")
            continue  # Empty plot

        # plot in log scale and linear scale
        for log in [True, False]:
            plot_directory_ = os.path.join(plot_directory, 'simplePlots',
                                           str(args.year), args.plot_directory,
                                           args.selection, args.mode,
                                           "log" if log else "lin")
            plotting.draw(
                plot,
                plot_directory=plot_directory_,
                ratio={'yRange': (0.5, 1.5)} if not args.noData else None,
                logX=False,
                logY=log,
                sorting=True,
                yRange=(0.001, "auto"),
                legend=[(0.2, 0.9 - 0.025 * sum(map(len, plot.histos)), 0.9,
                         0.9), 3],
                drawObjects=drawObjects(not args.noData, lumi_scale),
                copyIndexPHP=True,
            )
Exemple #4
0
    def makeTemplate( self, selection, weight='(1)' ):
        logger.info( "Make PU profile for sample %s and selection %s and weight %s", self.source_sample.name, selection, weight )

        h_source = self.source_sample.get1DHistoFromDraw(self.draw_string, self.binning, selectionString = selection, weightString = weight )
        logger.info( "PU histogram contains %s weighted events", h_source.Integral() )
        h_source.Scale( 1./h_source.Integral() )
        return h_source
    def initCache(self, cacheDir="systematics"):
        logger.info("Initializing cache for %s in directory %s"%(self.name, cacheDir))
        if cacheDir:
            self.cacheDir = os.path.join(cache_directory, cacheDir)
            try:    os.makedirs(cacheDir)
            except: pass

            cacheDirName       = os.path.join(cacheDir, self.name)

            self.cache = MergingDirDB(cacheDirName)
            if not self.cache:
                raise Exeption("Cache not initiated!")

            if self.name.count("DD"):
                helperCacheDirName = os.path.join(cacheDir, self.name+"_helper")
                self.helperCache = MergingDirDB(helperCacheDirName)
                if not self.helperCache: raise
                histoHelperCacheDirName = os.path.join(cacheDir, self.name+"_histo")
                self.histoHelperCache = MergingDirDB(histoHelperCacheDirName)
                if not self.histoHelperCache: raise
                tfCacheDirName = os.path.join(cacheDir, self.name+"_tf")
                self.tfCache = MergingDirDB(tfCacheDirName)
                if not self.tfCache: raise
            elif self.name.count("had"):
                helperCacheDirName = os.path.join(cacheDir, "had_helper")
                self.helperCache = MergingDirDB(helperCacheDirName)
                if not self.helperCache: raise
            else:
                self.helperCache=None
                self.tfCache=None

        else:
            self.cache=None
            self.helperCache=None
            self.tfCache=None
def replaceAliases(cutString):
    cut = cutString
    for key, val in aliases.iteritems():
        cut = cut.replace(key, val)
    logger.info("Replacing variable names: old cut: %s, new cut: %s" %
                (cutString, cut))
    return cut
Exemple #7
0
def drawPlots(plots, mode, dataMCScale):

    logger.info("Plotting mode: %s" % mode)

    for log in [False, True]:
        plot_directory_ = os.path.join(plot_directory, 'isrChecks',
                                       str(args.year), args.plot_directory)

        for plot in plots:
            postFix = " (%s)" % mode.replace("mu", "#mu").replace(
                "all", "e+#mu")
            extensions_ = ["pdf", "png", "root"]

            logger.info("Plotting...")

            if isinstance(plot, Plot):
                plotting.draw(
                    plot,
                    plot_directory=plot_directory_,
                    extensions=extensions_,
                    logX=False,
                    logY=log,
                    sorting=False,
                    yRange=(0.03, "auto") if log else (0.001, "auto"),
                    legend=[(0.15, 0.9 - 0.03 * sum(map(len, plot.histos)),
                             0.9, 0.9), 2],
                    drawObjects=drawObjects(lumi_scale),
                    copyIndexPHP=True,
                )
def drawPlots(plots):
    logger.info("Plotting mode: %s" % args.mode)
    for plot in plots:

        # check if the plot is filled
        if not max(l[0].GetMaximum() for l in plot.histos):
            logger.info("Empty plot!")
            continue  # Empty plot

        # plot in log scale and linear scale
        for log in [True, False]:
            plot_directory_ = os.path.join(plot_directory, 'genPlotsEFT',
                                           str(args.year), args.plot_directory,
                                           'met_reweighting', args.selection,
                                           args.mode, "log" if log else "lin")
            plotting.draw(
                plot,
                plot_directory=plot_directory_,
                ratio={
                    'yRange': (0.5, 1.5),
                    'histos':
                    [(i, len(params) - 1) for i in range(0, len(params))],
                    'texY': 'EFT/SM'
                },
                logX=False,
                logY=log,
                sorting=True,
                yRange=(0.1, "auto"),
                scaling=scaling if args.normalize else {},
                legend=[(0.2, 0.88 - 0.025 * sum(map(len, plot.histos)), 0.9,
                         0.88), 4],
                drawObjects=drawObjects(lumi_scale),
                copyIndexPHP=True,
            )
Exemple #9
0
    def cleanup(self):

        if os.path.isdir(self.uniquePath):
            logger.info("Cleaning up, deleting %s" % self.uniquePath)
            try:
                shutil.rmtree(self.uniquePath)
            except OSError:
                logger.info(
                    "Couldn't completely remove %s, please clean up afterwards"
                    % self.uniquePath)
Exemple #10
0
 def preselection(self, dataMC, channel="all", processCut=None):
     """Get preselection  cutstring."""
     cut = self.selection(dataMC,
                          channel=channel,
                          processCut=processCut,
                          **self.parameters)
     logger.debug("Using cut-string: %s", cut)
     if processCut:
         logger.info("Adding process specific cut: %s" % processCut)
     return cut
def wrapper():

    logger.info("Processing impacts")

    name = args.cardfile
    cardFile = name + ".txt"
    cardFilePath = cardfileLocation + cardFile

    combineDirname = os.path.join(releaseLocation, "impacts", str(args.year))

    logger.info("Creating %s" % combineDirname)

    if not os.path.isdir(combineDirname): os.makedirs(combineDirname)
    shutil.copyfile(cardFilePath, combineDirname + '/' + cardFile)

    #https://twiki.cern.ch/twiki/bin/view/Sandbox/SilvioNotes#How_to_get_impact_plot_rho_pulls
    prepWorkspace = "text2workspace.py %s " % cardFile
    robustFit = "combineTool.py -M Impacts -d %s.root -m 125 --robustFit 1 --doInitialFit " % name
    impactFits = "combineTool.py -M Impacts -d %s.root -m 125 --robustFit 1 --doFits --parallel %s " % (
        name, str(args.cores))
    extractImpact = "combineTool.py -M Impacts -d %s.root -m 125 -o impacts.json" % name
    plotImpacts = "plotImpacts.py -i impacts.json -o impacts"
    combineCommand = "cd %s;eval `scramv1 runtime -sh`;%s;%s;%s;%s;%s" % (
        combineDirname, prepWorkspace, robustFit, impactFits, extractImpact,
        plotImpacts)

    #    prepWorkspace   = "text2workspace.py %s -m 125"%cardFile
    #    if args.bgOnly:
    #        robustFit   = "combineTool.py -M Impacts -d %s.root -m 125 --doInitialFit --robustFit 1 --rMin -0.98 --rMax 1.02"%name
    #        impactFits  = "combineTool.py -M Impacts -d %s.root -m 125 --robustFit 1 --doFits --parallel %s --rMin -0.98 --rMax 1.02"%(name,str(args.cores))
    #    else:
    #        robustFit   = "combineTool.py -M Impacts -d %s.root -m 125 --doInitialFit "%name
    #        impactFits  = "combineTool.py -M Impacts -d %s.root -m 125 --doFits --parallel %s "%(name,str(args.cores))
    #    extractImpact   = "combineTool.py -M Impacts -d %s.root -m 125 -o impacts.json"%name
    #    plotImpacts     = "plotImpacts.py -i impacts.json -o impacts"
    #    combineCommand  = "cd %s;eval `scramv1 runtime -sh`;%s;%s;%s;%s;%s"%(combineDirname,prepWorkspace,robustFit,impactFits,extractImpact,plotImpacts)

    logger.info("Will run the following command, might take a few hours:\n%s" %
                combineCommand)

    os.system(combineCommand)

    plotDir = plot_directory + "/impacts%s/" % args.year
    if not os.path.isdir(plotDir): os.makedirs(plotDir)
    shutil.copyfile(combineDirname + '/impacts.pdf',
                    "%s/%s.pdf" % (plotDir, "impacts"))

    logger.info("Copied result to %s" % plotDir)

    if args.removeDir:
        logger.info("Removing directory in release location")
        shutil.rmtree(combineDirname)
Exemple #12
0
def wrapper(arg):
    r, channel, setup, addon = arg
    logger.info(
        "Running estimate for region %s, channel %s in setup %s for estimator %s"
        % (r, channel, args.controlRegion if args.controlRegion else "None",
           args.selectEstimator if args.selectEstimator else "None"))
    res = estimate.cachedFakeFactor(r,
                                    channel,
                                    setup,
                                    overwrite=args.overwrite,
                                    checkOnly=args.checkOnly)
    #        res = estimate.cachedEstimate(r, channel, setup, overwrite=args.overwrite, checkOnly=args.checkOnly)
    return (estimate.uniqueKey(r, channel, setup), res)
    def __init__(self, name, cacheDir=None):
        logger.info("Initializing Systematic Estimator for %s"%name)
        self.name = name
        self.initCache(cacheDir)
        self.processCut = None

        if   "_gen"    in name: self.processCut = "cat0"   #"photoncat0"
        elif "_misID"  in name: self.processCut = "cat2"   #"photoncat2"
        elif "_had"    in name: self.processCut = "cat134" #"photoncat134"
        elif "_prompt" in name: self.processCut = "cat02"  #"photoncat02"
        elif "_np"     in name: self.processCut = "cat134" #"photoncat134"
        elif "_hp"     in name: self.processCut = "cat1"   #"photoncat1"
        elif "_fake"   in name: self.processCut = "cat3"   #"photoncat3"
        elif "_PU"     in name: self.processCut = "cat4"   #"photoncat4"
def setup():
    # preparing gen-sample reweighting
    logger.info("Preparing reweighting setup")

    sel = {}
    for i, year in enumerate(args.years):

        sel[year] = {}
        logger.info("At year %i", year)

        for region, cut in regionCuts[year].iteritems():

            logger.info("At region %s", region)

            regionCut = replaceAliases(simpleStringToCutString(cut))
            sel[year][region] = "&&".join(
                [cutInterpreter.cutString(args.genSelection), regionCut])
            # safe some time
            if i > 0 and sel[year][region] == sel[args.years[0]][region]:
                coeffList[year][region] = coeffList[args.years[0]][region]
                signal_genRateSM[year][region] = signal_genRateSM[
                    args.years[0]][region]
            else:
                coeffList[year][region] = w.getCoeffListFromDraw(
                    genSignalSample, selectionString=sel[year][region])
                signal_genRateSM[year][region] = float(
                    w.get_weight_yield(coeffList[year][region]))
            logger.info(
                "Calculated SM gen-sample signal rate for region %s and year %i: %f"
                % (region, year, signal_genRateSM[year][region]))
Exemple #15
0
def wrapper(arg):
    r, channel, setup, addon = arg
    logger.info(
        "Running estimate for region %s, channel %s in setup %s for estimator %s"
        % (r, channel, args.controlRegion if args.controlRegion else "None",
           args.selectEstimator if args.selectEstimator else "None"))
    res = estimate.cachedEstimate(r,
                                  channel,
                                  setup,
                                  signalAddon=addon,
                                  save=True,
                                  overwrite=args.overwrite,
                                  checkOnly=(args.checkOnly
                                             or args.createExecFile))
    return (estimate.uniqueKey(r, channel, setup), res)
Exemple #16
0
def wrapper(arg):
    # INFO: fakeFactor = fakesData / fakesMC * kappaData * kappaMC
    key, subkey, r, channel, setup = arg
    logger.info(
        "Running estimate for region %s, channel %s in setup %s" %
        (r, channel, args.controlRegion if args.controlRegion else "None"))
    #        fakeFactor = estimate.cachedFakeFactor(r, channel, setup, checkOnly=True).val
    kappaData = estimate._kappaData(r, channel, setup)
    kappaMC = estimate._kappaMC(r, channel, setup)
    fakesData = estimate._fakesData(r, channel, setup)
    fakesMC = estimate._fakesMC(r, channel, setup)
    ddfakes = fakesData * kappaMC * kappaData
    sf = ddfakes / fakesMC if fakesMC.val > 0 else u_float(0)
    return (key, subkey, channel, fakesData.tuple(), kappaData.tuple(),
            kappaMC.tuple(), ddfakes.tuple(), fakesMC.tuple(), sf.tuple())
Exemple #17
0
    def __writeProcessCard( self ):

        out = open( self.tmpProcessCard, 'w' )
        with open( self.templateProcessCard, 'r' ) as f:  #FIXME (somewhat dirty)
            for line in f:
                if "import model" in line:
                    out.write( "import model %s-no_b_mass\n\n"%self.config.model_name )
                elif "NP=1" in line and self.config.model_name == "TopEffTh":
                    out.write( line.replace("NP=1","NP=2") )
                elif "NP=1" in line and self.config.model_name == "dim6top_LO":
                    out.write( line.replace("NP=1","DIM6=1") )
                else:
                    out.write(line)
            out.write( "output %s -nojpeg" % self.processTmpDir )
        out.close()
        logger.info( "Written process card to %s", self.tmpProcessCard )
Exemple #18
0
def getCommands( line ):
    commands = []
    split = None
    try:
        m=re.search(r"SPLIT[0-9][0-9]*", line)
        split=int(m.group(0).replace('SPLIT',''))
    except:
        pass 
    line = line.split('#')[0]
    if line:
        if split:
            logger.info( "Splitting in %i jobs", split )
            for i in range(split):
                commands.append(line+" --nJobs %i --job %i"%( split, i ))
        else:
            commands.append(line)
    return commands
Exemple #19
0
def getNllData(pointDict):
    global notCached

    varDict = allWCDict
    for key, val in pointDict.iteritems():
        varDict[key] = val

    res = {"process": "ttZ", "years": "_".join(map(str, args.years))}
    res.update(varDict)
    nCacheFiles = nllCache.contains(res)

    if nCacheFiles:
        cachedDict = nllCache.getDicts(res)[0]
        nll = cachedDict["value"]
    else:
        logger.info("Data for %s not in cache" % (", ".join(
            ["%s = %s" % (key, val) for key, val in pointDict.iteritems()])))
        notCached += 1
        if args.skipMissingPoints: nll = 999
        else: sys.exit(1)
    return float(nll)
Exemple #20
0
 def cachedTemplate(self,
                    selection,
                    weight='(1)',
                    save=True,
                    overwrite=False):
     key = {
         "selection": selection,
         "weight": weight,
         "source": self.source_sample.name
     }
     if (self.cache and self.cache.contains(key)) and not overwrite:
         result = self.cache.get(key)
         logger.info("Loaded MC PU profile from %s" %
                     (self.cache.database_file))
         logger.debug("Key used: %s result: %r" % (key, result))
     elif self.cache:
         logger.info("Obtain PU profile for %s" % (key, ))
         result = self.makeTemplate(selection=selection, weight=weight)
         if result:
             result = self.cache.addData(key, result, overwrite=save)
             logger.info("Adding PU profile to cache for %s : %r" %
                         (key, result))
         else:
             logger.warning(
                 "Couldn't create PU profile to cache for %s : %r" %
                 (key, result))
     else:
         result = self.makeTemplate(selection=selection, weight=weight)
     return result
def drawPlots(plots):
    logger.info("Plotting mode: %s" % args.mode)
    for plot in plots:

        # check if the plot is filled
        if not max(l[0].GetMaximum() for l in plot.histos):
            logger.info("Empty plot!")
            continue  # Empty plot

        for i_hist, hist in enumerate(plot.histos[0]):
            if plot.stack[0][i_hist].name == "TTG": continue
            for i in range(len(params)):
                plot.histos[i + 1][0].Add(hist)

        # plot in log scale and linear scale
        for log in [True, False]:
            plot_directory_ = os.path.join(plot_directory, 'analysisPlots',
                                           str(args.year), args.plot_directory,
                                           args.selection, args.mode,
                                           "log" if log else "lin")
            plotting.draw(
                plot,
                plot_directory=plot_directory_,
                ratio={
                    'yRange': (0.1, 1.9),
                    'histos': [(i + 1, 0) for i in range(0,
                                                         len(params) + 1)],
                    'texY': 'EFT/SM'
                } if not args.noData else None,
                logX=False,
                logY=log,
                sorting=True,
                yRange=(0.001, "auto"),
                scaling=scaling if args.normalize else {},
                legend=[(0.2, 0.9 - 0.025 * sum(map(len, plot.histos)), 0.9,
                         0.9), 4],
                drawObjects=drawObjects(not args.noData, lumi_scale),
                histModifications=[lambda h: h.GetYaxis().SetTitleOffset(2)],
                copyIndexPHP=True,
            )
Exemple #22
0
def getNllData(var1, var2):
    card = cardname.replace("var1", str(var1)).replace("var2", str(var2))
    res = {
        'cardname': card,
        "year": "combined",
        "WC1_name": args.variables[0],
        "WC1_val": var1,
        "WC2_name": args.variables[1],
        "WC2_val": var2
    }
    nCacheFiles = nllCache.contains(res)

    if nCacheFiles:
        cachedDict = nllCache.getDicts(res)[0]
        nll = cachedDict["nll_prefit"]
    else:
        logger.info(
            "Data for %s=%s and %s=%s not in cache!" %
            (args.variables[0], str(var1), args.variables[1], str(var2)))
        if args.skipMissingPoints: nll = 999
        else: sys.exit(1)
    return float(nll)
def wrapper(arg):
    r, channel, setup, addon = arg
    logger.info(
        "Running estimate for region %s, channel %s in setup %s for estimator %s"
        % (r, channel, args.controlRegion if args.controlRegion else "None",
           args.selectEstimator if args.selectEstimator else "None"))
    res = estimateFrom.cachedEstimate(r,
                                      channel,
                                      setup,
                                      signalAddon=addon,
                                      save=True,
                                      overwrite=False,
                                      checkOnly=True)
    if res.val >= 0:
        toRes = estimateTo.writeToCache(r,
                                        channel,
                                        setup,
                                        res,
                                        signalAddon=addon,
                                        overwrite=args.overwrite)
    else:
        print "Did not copy: ", args.selectEstimator, estimateFrom.uniqueKey(
            r, channel, setup), args.controlRegion
    return (estimateTo.uniqueKey(r, channel, setup), res)
Exemple #24
0
def wrapper(arg):
    r, channel, setup3p, addon, setup3, setup4p = arg
    logger.info(
        "Running estimate for region %s, channel %s in setup %s for estimator %s"
        % (r, channel, args.controlRegion if args.controlRegion else "None",
           args.selectEstimator if args.selectEstimator else "None"))
    res3 = estimate.cachedEstimate(r,
                                   channel,
                                   setup3,
                                   signalAddon=addon,
                                   save=True,
                                   overwrite=False,
                                   checkOnly=True)
    res4p = estimate.cachedEstimate(r,
                                    channel,
                                    setup4p,
                                    signalAddon=addon,
                                    save=True,
                                    overwrite=False,
                                    checkOnly=True)
    if res3.val >= 0 and res4p.val >= 0:
        res3p = res3 + res4p
        toRes = estimate.writeToCache(r,
                                      channel,
                                      setup3p,
                                      res3p,
                                      signalAddon=addon,
                                      overwrite=args.overwrite)
    else:
        res3p = -1
        print "Did not copy: ", args.selectEstimator, estimate.uniqueKey(
            r, channel, setup3p), args.controlRegion


#        print "Got: 3: %s, 4p: %s, 3p: %s"%(res3, res4p, res3p)
    return (estimate.uniqueKey(r, channel, setup3p), res3p)
 def checkFile(file):
     if args.log: logger.info("Checking filepath: %s" % file)
     corrupt = False
     if args.check:
         corrupt = not checkRootFile(file, checkForObjects=["Events"])
     if args.deepcheck and not corrupt:
         corrupt = not deepCheckRootFile(file)
     if args.checkWeight and not corrupt:
         corrupt = not deepCheckWeight(file)
     if corrupt:
         if file.startswith("root://hephyse.oeaw.ac.at/"):
             file = file.split("root://hephyse.oeaw.ac.at/")[1]
         logger.info("File corrupt: %s" % file)
         if args.remove:
             logger.info("Removing file: %s" % file)
             os.system("/usr/bin/rfrm -f %s" % file)
Exemple #26
0
    def initialize(self, modified_couplings=None):
        ''' Update the restriction card
        '''
        logger.info("#################### Model Setup ######################")

        self.__pre_initialize()

        # couplings
        modified_couplings = modified_couplings if modified_couplings is not None else {}

        # Check whether couplings are in the model
        for coup in modified_couplings.keys():
            if coup not in self.all_model_couplings:
                logger.error(
                    "Coupling %s not found in model %s. All available couplings: %s",
                    coup, self.model_name, ",".join(self.all_model_couplings))
                raise RuntimeError

        logger.debug('Creating restriction file based on template %s',
                     self.restrictCardTemplate)
        # make block strings to be inserted into template file
        block_strings = {}
        for block in self.model.keys():

            # copy defaults
            couplings = copy.deepcopy(self.model[block])

            # make modifications & build string for the template file
            block_strings[block + '_template_string'] = ""
            for i_coupling, coupling in enumerate(
                    couplings):  # coupling is a pair (name, value)
                if modified_couplings.has_key(coupling[0]):
                    coupling[1] = modified_couplings[coupling[0]]
                block_strings[block +
                              '_template_string'] += "%6i %8.6f # %s\n" % (
                                  i_coupling + 1, coupling[1], coupling[0])

        # read template file
        with open(self.restrictCardTemplate, 'r') as f:
            template_string = f.read()

        out = open(self.restrictCard, 'w')
        out.write(template_string.format(**block_strings))
        out.close()

        logger.info('Written restriction file %s', self.restrictCard)
        logger.info("################# Done: Model Setup ###################")
Exemple #27
0
def cacheYields((i_region, i_sample)):

    region = regions[i_region]
    sample = allSamples[i_sample]

    logger.info("At region %s for sample %s" % (region, sample.name))

    dbFilename = "yields_%s.sql" % sample.name
    yieldDB = Cache(os.path.join(cache_dir, dbFilename), "yields",
                    ["selection", "year", "small", "region", sample.name])
    if not yieldDB: raise

    res = {
        "selection": args.selection,
        "year": args.year,
        "small": args.small,
        "region": str(region),
        sample.name: sample.name
    }

    if yieldDB.contains(res) and not args.overwrite:
        if not args.checkOnly:
            logger.info(
                "Yield for sample %s at region %s already in database: %s" %
                (sample.name, region, yieldDB.getDicts(res)[0]['value']))
        return

    if args.checkOnly:
        logger.info("Yield for sample %s at region %s not processed" %
                    (sample.name, region))
        return

    rate = sample.getYieldFromDraw(selectionString=region.cutString())['val']
    logger.info("Adding yield for sample %s at region %s: %s" %
                (sample.name, str(region), str(rate)))
    yieldDB.add(res, str(rate), overwrite=True)
try:
    redirector = sys.modules["__main__"].redirector
except:
    from TTGammaEFT.Tools.user import redirector as redirector

# Logging
if __name__ == "__main__":
    import Analysis.Tools.logger as logger
    logger = logger.get_logger("INFO", logFile=None)
    import RootTools.core.logger as logger_rt
    logger_rt = logger_rt.get_logger("INFO", logFile=None)
else:
    import logging
    logger = logging.getLogger(__name__)

logger.info("Loading MC samples from directory %s",
            os.path.join(data_directory_, postprocessing_directory_))

# Directories
dirs = {}
dirs["DY_LO"] = ["DYJetsToLL_M50_LO_ext1_comb"]
dirs["DY_NLO"] = ["DYJetsToLL_M50_ext2"]
dirs["TT_pow"] = ["TTLep_pow_CP5"]
dirs["ZG_lowMLL"] = ["ZGToLLG_lowMLL"]
dirs["ZG_lowMLL_lowGPt"] = ["ZGToLLG_lowMLL_lowGPt"]
dirs["WJets"] = ["W1JetsToLNu", "W2JetsToLNu", "W3JetsToLNu", "W4JetsToLNu"]

directories = {
    key: [
        os.path.join(data_directory_, postprocessing_directory_, dir)
        for dir in dirs[key]
    ]
Exemple #29
0
    def createTestAndTrainingSample(self,
                                    read_variables=[],
                                    sequence=[],
                                    weightString="1",
                                    overwrite=False):
        ''' Creates a single background and a single signal sample for training purposes
        '''
        self.read_variables = read_variables
        self.sequence = sequence
        self.weightString = weightString

        # return if the samples are done already
        if not overwrite and os.path.isfile(self.dataFile):
            self.trainingAndTestSample = Sample.fromFiles(
                "TrainingAndTestSample", files=[self.dataFile])
            return self.trainingAndTestSample

        # Get yields and counts for all samples, because we want to mix the events according to their yield
        for s in self.samples:
            s._yield = s.getYieldFromDraw(
                weightString=self.weightString)["val"]
            s.count = int(s.getYieldFromDraw(weightString="(1)")["val"])
            logger.info("Found %i events for sample %s", s.count, s.name)

        # calculate training sample sizes and mix weighted backgrounds according to lumi yields
        #   finds nbBkg1,...,nBkgN such that nBkg1+...+nBkgN is maximal while respecting
        #   nBkg1+nBkg2+...+nBkgN<=nSigTraining, nBkg1:nBkg2:...:nBkgN=yBkg1:yBkg2:...:yBkgN
        #   and nBkg1<=self.fractionTraining*nBkg1Max, ...., self.fractionTraining*nBkgNMax<=nBkgNMax

        # Check we're OK overall
        maxSignalCount = int(self.fractionTraining * self.signal.count)
        assert maxSignalCount > 0, "Too few signal events. Training events: %i" % maxSignalCount
        maxBkgYield = float(max([b._yield for b in self.backgrounds]))
        assert maxBkgYield > 0, "Maximum background yield non-positive: %f" % maxBkgYield

        # maximum number of training events that are available in each sample
        for background in self.backgrounds:
            background.maxTrainingEvents = int(self.fractionTraining *
                                               background.count)
            assert background.maxTrainingEvents > 0, "Not enough training events in bkg sample: %s" % background.name
            # compute the average weight in the background sample
            background.average_weight = float(background._yield) / int(
                self.fractionTraining * background.count)

        background_with_max_average_weight = max(
            self.backgrounds, key=attrgetter('average_weight'))

        # The maximum number of training events per sample consistent with the requirements
        maxAchievableBkg = [
            int(self.fractionTraining * background.count *
                (background._yield /
                 background_with_max_average_weight._yield))
            for background in self.backgrounds
        ]

        # Case1: We have more signal than the combined background
        #        The background samples limit.
        if sum(maxAchievableBkg) < maxSignalCount:
            logger.info("Backgrounds limit training statistic.")
            self.signal.max_nEvents_training = sum(maxAchievableBkg)
            for background in self.backgrounds:
                background.max_nEvents_training = int(
                    self.fractionTraining * background.count *
                    (background._yield /
                     background_with_max_average_weight._yield))
        # Case2: We have more background than the signal
        #        The signal sample limits.
        else:
            logger.info("Signal limits training statistic.")
            self.signal.max_nEvents_training = maxSignalCount
            for background in self.backgrounds:
                background.max_nEvents_training = int(
                    self.fractionTraining * background.count *
                    (background._yield /
                     background_with_max_average_weight._yield) *
                    (maxSignalCount / float(sum(maxAchievableBkg))))

        for i_sample, sample in enumerate(self.samples):
            logger.info(
                "Sample %20s using %8i events out of %8i which are %i%%.",
                sample.name, sample.max_nEvents_training, sample.count,
                round(100 * sample.max_nEvents_training / float(sample.count)))

        # determine randomized training event sequence
        for sample in self.samples:
            sample.training_test_list = [1] * sample.max_nEvents_training + [
                0
            ] * (sample.count - sample.max_nEvents_training)
            random.shuffle(sample.training_test_list)

        # Now write a single ntuple with one tree that contains
        # the correct number of background events and also contains isSignal and isTraining

        # make random list of bkg and signal positions of the correct length for random loop:
        sig_bkg_list = []
        for i_sample, sample in enumerate(self.samples):
            sig_bkg_list.extend([i_sample] * sample.count)
            sample.reader = sample.treeReader( \
                variables = map( TreeVariable.fromString, read_variables),
                )
            sample.reader.start()

        random.shuffle(sig_bkg_list)

        def filler(event):
            # get a random reader
            event.isTraining = isTraining
            event.isSignal = isSignal
            # write mva variables
            for name, func in self.mva_variables.iteritems():
                #                setattr( event, name, func(reader.event) )
                setattr(event, name, func(reader.event, sample=None))

        # Create a maker. Maker class will be compiled.
        maker = TreeMaker(
            sequence=[filler],
            variables=map(TreeVariable.fromString,
                          ["isTraining/I", "isSignal/I"] +
                          ["%s/F" % var for var in self.mva_variable_names]),
            treeName="Events")

        maker.start()
        #        # Do the thing
        #        reader.start()
        #
        counter = 0
        while len(sig_bkg_list):
            # determine random sample
            i_sample = sig_bkg_list.pop(0)
            # get its reader
            reader = self.samples[i_sample].reader
            reader.run()
            for func in self.sequence:
                func(reader.event)
            # determine whether training or test
            isTraining = self.samples[i_sample].training_test_list.pop(0)
            isSignal = (i_sample == 0)

            maker.run()

            counter += 1
            if counter % 10000 == 0:
                logger.info("Written %i events.", counter)

        nEventsTotal = maker.tree.GetEntries()

        tmp_directory = ROOT.gDirectory
        outputfile = ROOT.TFile.Open(self.dataFile, 'recreate')
        maker.tree.Write()
        outputfile.Close()
        tmp_directory.cd()
        logger.info("Written %s", self.dataFile)
        #
        #      # Destroy the TTree
        maker.clear()
        logger.info("Written %i events to %s", nEventsTotal, self.dataFile)

        self.trainingAndTestSample = Sample.fromFiles("TrainingAndTestSample",
                                                      files=[self.dataFile])
        return self.trainingAndTestSample
Exemple #30
0
    # Get all NanoAOD tuples for caching
    from Samples.nanoAOD.Fall17_private_legacy_v1 import *
    #from Samples.nanoAOD.Fall17_nanoAODv6 import *
    #from Samples.nanoAOD.Fall17_private           import *
    from Analysis.Tools.user import plot_directory

    # Logger
    import Analysis.Tools.logger as logger
    import RootTools.core.logger as logger_rt
    logger = logger.get_logger(args.logLevel, logFile=None)
    logger_rt = logger_rt.get_logger(args.logLevel, logFile=None)

    if args.overwrite:
        os.remove(cache_directory + "/puProfiles/puProfiles_v2.sql")
    for sample in [TTLep_pow_ext]:
        logger.info("Working on samples %s", sample.name)
        puProfiles = puProfile(source_sample=sample,
                               cacheDir=cache_directory + "/puProfiles/")

        # reweighting selection
        selection = "( 1 )"
        profile = puProfiles.cachedTemplate(selection,
                                            weight='genWeight',
                                            overwrite=False)

        # plot the MC PU profile
        if args.makePlots:
            profilePlot = Plot.fromHisto(sample.name,
                                         texX="nTrueInt",
                                         histos=[[profile]])
            plotting.draw(profilePlot,