def getobsyieldsprecise(channelfolder,signalname): """ """ import os import glob from math import sqrt from functionspool_mod import processedsample # Extract the name of the signal try: actsignalname = os.path.basename(glob.glob(channelfolder+"/cluster_"+signalname+"*")[0]).replace("cluster_","") except IndexError: message = "\033[1;31mgetobsyields ERROR\033[1;m Some unexpeted error" raise RuntimeError(message) # Name of the observed data dataname = "Data" dataps = processedsample(channelfolder+"/cluster_"+dataname+"/Results/"+dataname+".root") Nobs,Nerr = dataps.getvalue("MET") samplesname = filter(lambda x: x != actsignalname and x != dataname, \ map(lambda x: os.path.basename(x).replace("cluster_",""),glob.glob(channelfolder+"/cluster_"+"*")) ) Nerr2 = Nerr**2.0 # Creatinga the instances of proccess for sample in samplesname: rootfilename = channelfolder+"/cluster_"+sample+"/Results/"+sample+".root" evts = processedsample(rootfilename) val,err = evts.getvalue("MET") Nobs -= val Nerr2 += err**2. del evts del dataps return Nobs,sqrt(Nerr2)
def getsystwofiles(file1,file2,verbose): """ """ import sys if file1 == file2: message = "\033[1;31mgetsystematics ERROR\033[1;m Cannot evaluate differences"\ " between the same files arguments '%s'. See usage." % (file1) sys.exit(message) if verbose: print "\033[1;34mgetsystematics INFO\033[1;m Extracting systematics using files %s and %s" % (file1,file2) sys.stdout.flush() evt = processedsample(file1,showall=verbose)-processedsample(file2,showall=verbose) return evt
def getdifferences(samplesnames, baselinefolder, comparedfolder, sysdict=None): """.. getdifferences(samplesnames,baselinefolder,comparedfolder) -> (dict(s """ from functionspool_mod import processedsample import os # Get some previous info # --- who is the analysis folder analysisdir = filter(lambda x: type(x) == list, [baselinefolder] + [comparedfolder])[0] # --- extract channels folders channelsfolders = map(lambda x: x.split("/")[-1], analysisdir) # --- build the channels path for the str. directory if type(baselinefolder) == list: baselinetop, channeldir = os.path.split(analysisdir[0]) comparedtop = comparedfolder else: baselinetop = baselinefolder comparedtop, channeldir = os.path.split(analysisdir[0]) # --- get signal name- signal = ''.join(ch for ch in channelsfolders[0] if ch != "m" and ch != "e") # Do it per channel sysdict = {} for channeldir in channelsfolders: print ".", for dataname in samplesnames: filebase = baselinetop+"/"+channeldir+"/"+"cluster_"+dataname+\ "/Results/"+dataname+".root" filecomp = comparedtop+"/"+channeldir+"/"+"cluster_"+dataname+\ "/Results/"+dataname+".root" diffsample = processedsample(filebase,showall=True)-\ processedsample(filecomp,showall=True) channel = channeldir.replace(signal, "") lastcut = diffsample.getcutlist()[-1] sysrel = diffsample.getsysrelative(lastcut) try: sysdict["SYS" + dataname][channel] = sysrel except KeyError: sysdict["SYS" + dataname] = {channel: sysrel} print "" return sysdict
def getsystwofiles(file1, file2, verbose): """ """ import sys if file1 == file2: message = "\033[1;31mgetsystematics ERROR\033[1;m Cannot evaluate differences"\ " between the same files arguments '%s'. See usage." % (file1) sys.exit(message) if verbose: print "\033[1;34mgetsystematics INFO\033[1;m Extracting systematics using files %s and %s" % ( file1, file2) sys.stdout.flush() evt = processedsample(file1, showall=verbose) - processedsample( file2, showall=verbose) return evt
def plotpv(sampleslist, luminosity): """ """ import ROOT from LatinoStyle_mod import LatinosStyle from functionspool_mod import processedsample lstyle = LatinosStyle() lstyle.cd() ROOT.gStyle.SetOptStat(0) ROOT.gROOT.SetBatch() c = ROOT.TCanvas() frame = c.DrawFrame(0, 0, 30, 0.12) frame.SetXTitle("N_{PV}") frame.SetYTitle("Normalized Events") leg = ROOT.TLegend(0.6, 0.7, 0.8, 0.9) leg.SetBorderSize(0) leg.SetTextSize(0.03) leg.SetFillColor(10) i = 0 for s in sampleslist: samplename = s.split("cluster_")[-1] if samplename == "Fakes": continue rootfilename = os.path.join(os.path.join(s, "Results"), samplename + ".root") print rootfilename ps = processedsample(rootfilename, lumi=luminosity) h = ps.gethistogram("fHNPrimaryVertices") h.SetNormFactor(1) ymax = h.GetMaximum() / h.Integral() if ymax > frame.GetMaximum(): frame.GetYaxis().SetRangeUser(frame.GetBinLowEdge(1), ymax * 1.20) try: color = COLORSDICT[samplename] except KeyError: color = kGreen - 5 h.SetLineColor(color) h.SetMarkerColor(color) #h.SetFillColor(color) #h.SetFillStyle(3002+i) h.SetMarkerColor(color) option = "PF" if samplename == "Data": h.SetMarkerStyle(20) h.Draw("PESAME") option = "PL" else: h.Draw("SAME") leg.AddEntry(h, samplename, option) i += 1 leg.Draw() c.SaveAs("pvafterreweighting.pdf") ROOT.gROOT.SetBatch(1)
def plotpv(sampleslist, luminosity): """ """ import ROOT from LatinoStyle_mod import LatinosStyle from functionspool_mod import processedsample lstyle = LatinosStyle() lstyle.cd() ROOT.gStyle.SetOptStat(0) ROOT.gROOT.SetBatch() c = ROOT.TCanvas() frame = c.DrawFrame(0, 0, 30, 0.12) frame.SetXTitle("N_{PV}") frame.SetYTitle("Normalized Events") leg = ROOT.TLegend(0.6, 0.7, 0.8, 0.9) leg.SetBorderSize(0) leg.SetTextSize(0.03) leg.SetFillColor(10) i = 0 for s in sampleslist: samplename = s.split("cluster_")[-1] if samplename == "Fakes": continue rootfilename = os.path.join(os.path.join(s, "Results"), samplename + ".root") print rootfilename ps = processedsample(rootfilename, lumi=luminosity) h = ps.gethistogram("fHNPrimaryVertices") h.SetNormFactor(1) ymax = h.GetMaximum() / h.Integral() if ymax > frame.GetMaximum(): frame.GetYaxis().SetRangeUser(frame.GetBinLowEdge(1), ymax * 1.20) try: color = COLORSDICT[samplename] except KeyError: color = kGreen - 5 h.SetLineColor(color) h.SetMarkerColor(color) # h.SetFillColor(color) # h.SetFillStyle(3002+i) h.SetMarkerColor(color) option = "PF" if samplename == "Data": h.SetMarkerStyle(20) h.Draw("PESAME") option = "PL" else: h.Draw("SAME") leg.AddEntry(h, samplename, option) i += 1 leg.Draw() c.SaveAs("pvafterreweighting.pdf") ROOT.gROOT.SetBatch(1)
shutil.move(fakesubstractedfile, clustername(opt.dataname) + "/Results/" + opt.dataname + ".root") warningfile = "CAVEAT: Directory tree created automatically from 'builtddsample' script\n" warningfile += " using the files:\n" for f in addsamples.values() + subsamples.values(): warningfile += " %s\n" % f warningfile += "Events yields are weighted but not normalized to any luminosity (when MC)\n" warningfile += "=" * 60 + "\n" ## Including some useful info: yields low term substracted to the main addnameslist = addyieldsdict.keys() subnameslist = subyieldsdict.keys() warningfile += "%20s ||" % ("") for name in addnameslist + subnameslist: warningfile += " %10s ||" % name warningfile = warningfile[:-2] + "\n" # Just to get the list of cuts _p = processedsample(addsamples.values()[0]) cutordered = _p.getcutlist() del _p for cutname in cutordered: warningfile += "%20s " % cutname # for pppname in ["Nt2"]+pppnameslist: for name in addnameslist + subnameslist: try: warningfile += " %10.2f " % addyieldsdict[name][cutname] except KeyError: warningfile += " %10.2f " % (-1.0 * subyieldsdict[name][cutname]) warningfile = warningfile[:-2] + "\n" warningfile += "=" * 60 + "\n" fw = open(clustername(opt.dataname) + "/WARNING_FOLDER_GENERATED_FROM_SCRIPT.txt", "w") fw.writelines(warningfile) fw.close()
def getobsyields(channelfolder,signalname,charge): """.. function:: getobsyields(channelfolder,signalname,charge) -> Nobs,errs,NsysUp,NsysDown,Acc,AccSysUp,AccSysDown Evaluate the number of estimated signal events inside a standard folder structure. It is calculated also the systematic propagation of the backgrounds Also is going to estimate the acceptance systematics in relative :channelfolder param: path where to do calculation. It should contain the 'cluster_*' folders inside :channelfolder type: str :signalname param: name of the signal :signalname type: str :charge param: the W charge production, valid values are PLUS|MINUS :charge type: str :return: a 6-tuple with the number of signal events, its statistical error, and a couple of dictionaries with the number of signal events variated with the different systematic sources considered. Also it returns the acceptance and the systematic variations :rtype: tuple(float,float,dict(str:float),dict(str:float),float,dict(str:float),dict(str:float)) """ import os import glob from math import sqrt from functionspool_mod import processedsample import sys sys.path.insert(0,os.path.abspath(channelfolder.split("/")[0])) import systematics_mod sys.path = sys.path[1:] # Extract the name of the signal try: actsignalname = os.path.basename(glob.glob(channelfolder+"/cluster_"+signalname+"*")[0]).replace("cluster_","") except IndexError: message = "\033[1;31mgetobsyields ERROR\033[1;m Some unexpected error" raise RuntimeError(message) # Get the channel channel = channelfolder.split("/")[-1][-3:] evalsys = True if channel not in [ 'eee', 'eem', 'mme', 'mmm' ]: evalsys = False # Name of the observed data dataname = "Data" dataps = processedsample(channelfolder+"/cluster_"+dataname+"/Results/"+dataname+".root") Nobs,Nerr = dataps.getvalue("MET") Ndata = Nobs # ACCEPTANCE CALCULATION signalps = processedsample(channelfolder+"/cluster_"+actsignalname+"/Results/"+actsignalname+".root") Npass,NpassErr = signalps.getrealvalue("MET") #Ngen = NGEN[charge] Ngen = NGENPUWEIGHTED[charge] acc = Npass/Ngen # ACCEPTANCE SYSTEMATICS accsys = eval("systematics_mod.SYS"+actsignalname) accSysUp ={} accSysDown={} if evalsys: for systype,chdict in accsys.iteritems(): Npassup = Npass*(1.0+chdict[channel]) accSysUp[systype] = Npassup/Ngen Npassdown = Npass*(1.0-chdict[channel]) accSysDown[systype] = Npassdown/Ngen # Systematic propagation for Background samples samplesname = filter(lambda x: x != actsignalname and x != dataname, \ map(lambda x: os.path.basename(x).replace("cluster_",""),glob.glob(channelfolder+"/cluster_"+"*")) ) syssamples = {} for sample in samplesname: try: syssamples[sample] = eval("systematics_mod.SYS"+sample) except AttributeError: # Assuming ZZ: Just a patch because ZZ is split syssamples[sample] = eval("systematics_mod.SYSZZ") # plus Fakes, modifying dict to the standard notation syssamples["Fakes"] = { "DDMMC": systematics_mod.DDMMC, "Fakes": systematics_mod.SYSFakes } # Building the full list of systematics types allsysset = set() dummy = map(lambda x: [allsysset.add(k) for k in x.keys()], syssamples.values()) allsys = list(allsysset) Nerr2 = Nerr**2.0 TotalBkgSysUp = dict([(systype,0.0) for systype in allsys]) TotalBkgSysDown = dict([(systype,0.0) for systype in allsys]) # Creating the instances of process for sample in samplesname: rootfilename = channelfolder+"/cluster_"+sample+"/Results/"+sample+".root" evts = processedsample(rootfilename) val,err = evts.getvalue("MET") Nobs -= val Nerr2 += err**2. if not evalsys: del evts continue #for systype in syssamples[sample].keys(): for systype in allsys: try: sys = syssamples[sample][systype][channel] except KeyError: sys = 0.0 # Total Background with all the background samples variated # to the same direction (UP/DOWN) for the same systematic TotalBkgSysUp[systype] += val*(1+sys) TotalBkgSysDown[systype] += val*(1-sys) # Adding "Fakes" to stat error---> FIXME??? if sample == "Fakes" and systype == "Fakes": Nerr2 += (val*sys)**2.0 del evts del dataps # Get in Number of observed events with the bkg variated with systematics NsysUp = dict( map(lambda (systype,nTotbkg): (systype,Ndata-nTotbkg),TotalBkgSysUp.iteritems()) ) NsysDown = dict( map(lambda (systype,nTotbkg): (systype,Ndata-nTotbkg),TotalBkgSysDown.iteritems()) ) return Nobs,sqrt(Nerr2),NsysUp,NsysDown,acc,accSysUp,accSysDown
lambda x: os.path.basename(folder1) == os.path.basename(x), thechannelfolders2)[0] dN_S2 = 0.0 N_S = 0.0 for sample in samplesname: file1 = os.path.join( folder1, "cluster_" + sample + "/Results/" + sample + ".root") file2 = os.path.join( folder2, "cluster_" + sample + "/Results/" + sample + ".root") diffsample = getsystwofiles(file1, file2, opt.verbose) cut = diffsample.getcutlist()[-1] dN_S2 += diffsample.getvalue(cut)[0]**2. # Extracting the signal sp = processedsample(file1) value = sp.getvalue(cut)[0] del sp if sample == dataname: N_S += value else: N_S -= value dN_S = sqrt(dN_S2) print "\033[1;34mgetsystematics INFO\033[1;m ---- Total WZ yield relative difference at %s cut: %.3f%s" % ( cut, dN_S / N_S * 100.0, "%") # Introduced a regular sample name else: # first check: the sample exists knownsamples = map( lambda x: os.path.basename(x).replace("cluster_", ""), glob.glob(thechannelfolders1[0] + "/cluster_" + "*"))
def __init__(self, data, signal, **keywords): """..class:: table(data,signal[,format="tex|html", isreduced=True|False, wildcardfiles="dir",join="metasample"]) The table is composed by several 'processedsample' instances (see functionspool_mod module), and is going to be built as bkg1 bkg2 ... TotBkg data signal :param data: name of the column which it will be placed before the last one. Also the values of this column are not going to be added up with the other columns (to create the TotBkg column) :type data: str :param signal: name of the column which it will placed the last one. Also the values of this column are not going to be added up with the other columns (creating the TotBkg column) :type param: str :param format: latex|tex|html The output format of the table :type format: str :param wildcardfiles: string with wildcards which can be used to find what files have to be used as column generators :param join: Complex parameter to introduce the possibility of merge two or more samples into one unique metasample (called in this way because the metasample does not have associated any filename and is compossed by two or more original samples). The argument could be a str defining a pre-built metasample: DY Z+Jets Other (see getsamplecomponent); or could be a list of string which are defining more than one pre-built metasample; or could be a dictionary whose keys are the metasample names and the values are list containing the samples to merge. :type join: str| [ str, str, ...] | { str: [ str, str, ... ], ... } """ import glob import ROOT import os global TITLEDICT # First checkings: if not os.getenv("VHSYS"): raise "\033[31mtable ERROR\033[m Initialize your"+\ " environment (VHSYS env variable needed)" formatprov = None validkeywords = ["format", "isreduced", "join", "wildcardfiles"] wildcardfiles = "*.root" # Per default join = [] self.usermetasample = {} for key, value in keywords.iteritems(): if not key in keywords.keys(): message = "\033[31mtable ERROR\033[m Incorrect instantiation of 'table'" message += " class. Valid keywords are: " + str(validkeywords) raise message if key == 'format': formatprov = value elif key == 'isreduced': if value: join = ["DY", "Z+Jets", "Other"] elif key == "wildcardfiles": wildcardfiles = value elif key == "join": if type(value) == list and len(value) == 0: # we don't want to smash the join list pass elif type(value) == list: join = value elif type(value) == dict: self.usermetasample = value join = self.usermetasample.keys() else: join.append(value) # available filenames self.filenames = glob.glob(wildcardfiles) # Just to be sure that only use one WH signal (adapted 2012 MC samples) if signal.find("WH") == 0 or signal.find("wztt") == 0: #Extract the other WH signals #potentialSfiles = filter(lambda x : x.find("WH") != -1,self.filenames) # Common between 2011 and 2012 MC signal samples names # FIXME: Add all the signals in the last columns potentialSfiles = filter(lambda x: x.find("ToWW") != -1, self.filenames) nonsignalfiles = filter( lambda x: x.split("/")[-1].split(".root")[0] != signal, potentialSfiles) # Removing for f in nonsignalfiles: self.filenames.remove(f) # Added the important cuts to be used in the reduced table case self.importantcutsdict = { 'Pre-selection': 'Exactly3Leptons', \ 'DeltaR': 'DeltaR', 'ZVeto':'ZVeto', 'MET': 'MET'} self.importantcutslist = [ 'Pre-selection', 'DeltaR', 'ZVeto', 'MET' ] else: # Added the important cuts to be used in the reduced table case # FIXME WARNING HARDCODED!! --> THis is a temporal patch... self.importantcutsdict = { 'Pre-selection': 'Exactly3Leptons', 'Z': 'HasZCandidate', 'W': 'MET' } self.importantcutslist = ['Pre-selection', 'Z', 'W'] # building the columns self.columns = {} for f in self.filenames: naturalname = os.path.basename(f).split(".")[0] # Putting the fancy name if has to, or using the # per default name (so updating the TITLEDICT global) try: coltitle = TITLEDICT[naturalname] except KeyError: TITLEDICT[naturalname] = naturalname col = processedsample(f) self.columns[col.title] = col # samples names self.samples = self.columns.keys() # Check the signal and data are there if not signal in self.samples + join: raise RuntimeError("\033[31mtable ERROR\033[m The signal introduced '"\ +signal+"' has not been found."+\ " Check you have not introduced the '-n' option without quotes:\n"+\ " printtable "+signal+" -n \"whatever*..\"") if not data in self.samples + join: raise RuntimeError("\033[31mtable ERROR\033[m The data introduced '"\ +data+"' has not been found."+\ " Check you have not introduced the '-n' option without quotes:\n"+\ " printtable "+signal+" -n \"whatever*..\"") self.signal = signal self.data = data # Ordered samples names (column names) self.columntitles = [] # Backgrounds ordered first self.columntitles = filter(lambda x: x in self.samples + join, ORDERCOLUMNS) for i in self.samples: # Already took into account if i in self.columntitles: continue # Signal and data will be put after if i == self.signal or i == self.data: continue self.columntitles.append(i) # -- Adding the other columns self.columntitles.append("TotBkg") self.columntitles.append(self.data) self.columntitles.append("Data-TotBkg") self.columntitles.append(self.signal) # -- Avoiding repetition (mainly for DDD-DDM case) dummy = map( lambda x: self.columntitles.remove(x), set( filter(lambda x: self.columntitles.count(x) > 1, self.columntitles))) # The datamember samples is superceeded by columntitles self.samples = self.columntitles # 2) Merge some samples just in one for metasample in join: samplestodelete = [] self.columns[metasample] = processedsample("", nobuilt=True) for sample in self.getsamplecomponents(metasample): try: self.columns[metasample] += self.columns[sample] samplestodelete.append(sample) except KeyError: # Protecting the case where the pre-defined # samples aren't there (for instance WJets_Madgraph # in the 'Other' metasample) pass # See if we have any of the samples to merge, if not then # it has no sense going on if len(samplestodelete) == 0: # Do not incorporate the metasample to the table self.columns.pop(metasample) # And do nothing else continue # Put the title self.columns[metasample].title = metasample # Incorporates the metasample to the global TITLEDICT TITLEDICT[metasample] = metasample # Erase the samples merged for s2remove in samplestodelete: self.columns.pop(s2remove) index = self.columntitles.index(s2remove) self.columntitles.remove(s2remove) # And add to the columntitles data member if not in there if not metasample in self.columntitles: self.columntitles.insert(index, metasample) # format specific self.format = format() if formatprov: self.setformat(formatprov)
def getMSvalerr(self, cut, sample): """.. function::getMSvalerr( cut, sample ) -> valueanderror Extract the value and the error, given the cut and the sample name. The function will find the format to return the value using only the most significant decimal numbers depending on how is its error. The error will be modified following the below rules: - if the error is >= 1.5, then the error is an integer, same as the value - if the error is between 1.0 and 1.5, the error is taken with two decimals - if the error is < 1.0, then return :param cut: cut name :type cut: str :param sample: sample name :type sample: str :return: the value and its error :rtype : (str,str) """ from math import sqrt from functionspool_mod import getrounded # Dealing with the TotBkg sample which has to be built if sample == "TotBkg": val = 0.0 err2 = 0.0 for s in filter( lambda x: x != self.data and x != self.signal and x != "TotBkg" and x != "Data-TotBkg", self.samples): (v, e) = self.columns[s].getvalue(cut) val += v err2 += e**2.0 err = sqrt(err2) try: self.columns["TotBkg"].rowvaldict[cut] = (val, err) except KeyError: # Creating the column self.columns["TotBkg"] = processedsample("", nobuilt=True) # Inititalizing dict and all the other needed data members self.columns["TotBkg"].rowvaldict = {cut: (val, err)} self.columns["TotBkg"].cutordered = self.columns[ self.data].cutordered # extracting the values try: val, err = self.columns[sample].getvalue(cut) except KeyError: # It's substraction data - TotBkg columns if sample == "Data-TotBkg": # Note that as it has been extract by order, Data and total background (valdata, errdata) = self.columns[self.data].getvalue(cut) (valbkg, errbkg) = self.columns["TotBkg"].getvalue(cut) val = valdata - valbkg err = sqrt(errdata**2.0 + errbkg**2.0) # Begin the formatting # Found the last significant value: we get the first value # > 0 #errstr = str(err) #valstr = str(val) nafterpoint = 0 # Case > 1.5 if abs(err) < 1e-30: return getrounded(val, 1000) elif err >= 1.5 and err < 2.0: errstr = getrounded(err, 1) valstr = getrounded(val, 1) nafterpoint = 1 # Case 1 > err > 1.5 elif err >= 1.0 and err < 1.5: errstr = getrounded(err, 2) valstr = getrounded(val, 2) nafterpoint = 2 elif err >= 2.0: errstr = getrounded(err, 0) valstr = "%i" % round(val) nafterpoint = 0 elif err < 1.0 and err > 0.0: getdecimal = False errstrPRE = str(err).split(".")[-1] numbuilt = '' for n, index in zip(errstrPRE, xrange(len(errstrPRE))): nafterpoint += 1 if int(n) > 0: numbuilt += n try: dum = int(numbuilt) except ValueError: continue if int(numbuilt) >= 2: # done # --- Check the first is not 1 nsignumberformat = "%.0e" if numbuilt[0] == "1": nsignumberformat = "%.1e" if nafterpoint > 3: errstr = nsignumberformat % err exponent = int(errstr.split("e-")[-1]) errstr = errstr.split("e-")[0] # - keeping the integer part # FIXME: assume at least 10^-3... valstr = '%i' % val decpart = val - int(valstr) if valstr == '0': valstr = '' if exponent != nafterpoint: missing = nafterpoint - exponent decpartstrformat = "%." + str(exponent + missing) + "f" else: decpartstrformat = "%." + str(exponent) + "f" # To rounding properly decpartstrPRE = decpartstrformat % decpart # get only the decimal part decpartstrONLY = decpartstrPRE.split(".")[-1] # Moving down the decimal point decpartstr = '' for i in xrange(exponent): decpartstr += decpartstrONLY[i] decpartstr = str(int(decpartstr)) if numbuilt[0] == "1": valstr += decpartstr[:-1] + "." + decpartstr[-1] else: valstr += decpartstr else: howmany0 = nafterpoint - len(numbuilt) errstrformat = "%." + str(nafterpoint) + "f" errstr = errstrformat % err #errstr = "0."+"0"*howmany0+numbuilt valstrformat = "%." + str(howmany0 + len(numbuilt)) + "f" valstr = valstrformat % val break if nafterpoint > 3: totalvalstr = "("+valstr+self.format.plusminus+errstr+")"+self.format.cdot+"10"+\ self.format.exponentstart+"-"+str(exponent)+self.format.exponentend else: totalvalstr = valstr + self.format.plusminus + errstr return totalvalstr
def getdifferences(samplesnames,baselinefolder,comparedfolder,sysdict = None, **keywords): """.. getdifferences(samplesnames,baselinefolder,comparedfolder) -> (dict(s """ from functionspool_mod import processedsample import os metasamples = {} if keywords.has_key('metasamples'): metasamples = keywords['metasamples'] # Get some previous info # --- who is the analysis folder analysisdir = filter(lambda x: type(x) == list, [baselinefolder]+[comparedfolder])[0] # --- extract channels folders channelsfolders = map(lambda x: x.split("/")[-1],analysisdir) # --- build the channels path for the str. directory if type(baselinefolder) == list: baselinetop,channeldir = os.path.split(analysisdir[0]) comparedtop = comparedfolder else: baselinetop = baselinefolder comparedtop,channeldir = os.path.split(analysisdir[0]) # --- get signal name- signal = ''.join(ch for ch in channelsfolders[0] if ch != "m" and ch != "e") # Do it per channel sysdict = {} for channeldir in channelsfolders: print ".", for dataname in filter(lambda x: x not in metasamples.keys(), samplesnames): filebase = baselinetop+"/"+channeldir+"/"+"cluster_"+dataname+\ "/Results/"+dataname+".root" filecomp = comparedtop+"/"+channeldir+"/"+"cluster_"+dataname+\ "/Results/"+dataname+".root" diffsample = processedsample(filebase,showall=True)-\ processedsample(filecomp,showall=True) channel = channeldir.replace(signal,"") lastcut = diffsample.getcutlist()[-1] sysrel = diffsample.getsysrelative(lastcut) try: sysdict["SYS"+dataname][channel] = abs(sysrel) except KeyError: sysdict["SYS"+dataname] = { channel: abs(sysrel) } # And the metasamples for metaname,realsamples in metasamples.iteritems(): metaprocessbase = processedsample('',nobuilt=True) metaprocesscomp = processedsample('',nobuilt=True) for realname in realsamples: filebase = baselinetop+"/"+channeldir+"/"+"cluster_"+realname+\ "/Results/"+realname+".root" metaprocessbase += processedsample(filebase,showall=True) filecomp = comparedtop+"/"+channeldir+"/"+"cluster_"+realname+\ "/Results/"+realname+".root" metaprocesscomp += processedsample(filecomp,showall=True) diffsample = metaprocessbase-metaprocesscomp channel = channeldir.replace(signal,"") lastcut = diffsample.getcutlist()[-1] sysrel = diffsample.getsysrelative(lastcut) try: sysdict["SYS"+metaname][channel] = abs(sysrel) except KeyError: sysdict["SYS"+metaname] = { channel: abs(sysrel) } print "" return sysdict
os.chdir(folder) # Find the samples and the folder clustername = 'cluster_' + opt.dataname rootmainterm = clustername + "/Results/" + opt.dataname + ".root" ntsamplename = opt.dataname + "_" + lowterm clustername_Nt = 'cluster_' + ntsamplename rootlowterm = clustername_Nt + "/Results/" + ntsamplename + ".root" # Find the MAIN order sample fakesample = os.path.join(folder, rootmainterm) if not os.path.isfile(fakesample): message = "\033[31mnt3subtract ERROR\033[m Malformed folder structure:"\ " Not found the FAKES file "\ "'%s'' inside the folder '%s'" % (rootmainterm,folder) sys.exit(message) # Including the raw main term info to be print in the warning file psnt2 = processedsample(fakesample) cutordered = psnt2.getcutlist() # Using the same kind of yields than the other term yieldsmain = dict( map(lambda cut: (cut, psnt2.getrealvalue(cut)), cutordered)) #yieldsmain = psnt2.rowvaldict # and store nametarfile = opt.dataname.lower().replace("_", "") + "pool.tar.gz" # Find the LOW order samples lowfolders = glob.glob(os.path.join(folder, clustername_Nt + "*")) if len(lowfolders) == 0: # Checking we didn't use this script before if os.path.isfile(nametarfile): # Recovering the original samples shutil.rmtree(clustername) tar = tarfile.open(nametarfile)
print "\033[1;34mgetsystematics INFO\033[1;m Systematics for WZ signal yields (N_S=N_D-N_BKG) term. You can get the values below directly as %" for folder1 in sorted(thechannelfolders1): channel = os.path.basename(folder1) print "\033[1;34mgetsystematics INFO\033[1;m Channel %s" % channel folder2 = filter(lambda x: os.path.basename(folder1) == os.path.basename(x), thechannelfolders2)[0] dN_S2 = 0.0 N_S = 0.0 for sample in samplesname: file1 = os.path.join(folder1,"cluster_"+sample+"/Results/"+sample+".root") file2 = os.path.join(folder2,"cluster_"+sample+"/Results/"+sample+".root") diffsample = getsystwofiles(file1,file2,opt.verbose) cut = diffsample.getcutlist()[-1] dN_S2 += diffsample.getvalue(cut)[0]**2. # Extracting the signal sp = processedsample(file1) value = sp.getvalue(cut)[0] del sp if sample == dataname: N_S += value else: N_S -= value dN_S = sqrt(dN_S2) print "\033[1;34mgetsystematics INFO\033[1;m ---- Total WZ yield relative difference at %s cut: %.3f%s" % (cut,dN_S/N_S*100.0,"%") # Introduced a regular sample name else: # first check: the sample exists knownsamples = map(lambda x: os.path.basename(x).replace("cluster_",""),glob.glob(thechannelfolders1[0]+"/cluster_"+"*")) if not opt.samplename in knownsamples: message = "\033[1;31mgetsystematics ERROR\033[1;m Sample not found '%s' in the channel folders" sys.exit(message)
def __init__(self,data,signal,**keywords): """..class:: table(data,signal[,format="tex|html",\ isreduced=True|False, wildcardfiles="dir",join="metasample",\ subtract=metasample,force=listofsamples]) The table is composed by several 'processedsample' instances (see functionspool_mod module), and is going to be built as bkg1 bkg2 ... TotBkg data signal :param data: name of the column which it will be placed before the last one. Also the values of this column are not going to be added up with the other columns (to create the TotBkg column) :type data: str :param signal: name of the column which it will placed the last one. Also the values of this column are not going to be added up with the other columns (creating the TotBkg column) :type param: str :param format: latex|tex|html The output format of the table :type format: str :param wildcardfiles: string with wildcards which can be used to find what files have to be used as column generators :param join: Complex parameter to introduce the possibility of merge two or more samples into one unique metasample (called in this way because the metasample does not have associated any filename and is compossed by two or more original samples). The argument could be a str defining a pre-built metasample: DY Z+Jets Other (see getsamplecomponent); or could be a list of string which are defining more than one pre-built metasample; or could be a dictionary whose keys are the metasample names and the values are list containing the samples to merge. :type join: str| [ str, str, ...] | { str: [ str, str, ... ], ... } :param subtract: analogous parameter than join but to subtract :type subtract: dict(str,list(str)) :param force: list of samples to be kept in the table although are inside the subtract list :type force: list(str) """ import glob import ROOT import os global TITLEDICT formatprov = None validkeywords = [ "format", "isreduced", "join", "wildcardfiles", \ "signalmc", "subtract", "force", "datadriven"] wildcardfiles = "*.root" # Per default join = [] subtract = {} subtractmeta = [] keepforce = [] self.usermetasample = {} self.signalmc = None self.nsignalcolumn = True for key,value in keywords.iteritems(): if not key in keywords.keys(): message = "\033[31mtable ERROR\033[m Incorrect instantiation of 'table'" message += " class. Valid keywords are: "+str(validkeywords) raise RuntimeError(message) if key == 'format': formatprov = value elif key == 'isreduced': if value: join = [ "DY", "Z+Jets", "Other" ] elif key == "wildcardfiles": wildcardfiles = value elif key == "join": if type(value) == list and len(value) == 0: # we don't want to smash the join list pass elif type(value) == list: join = value elif type(value) == dict: self.usermetasample = value join = self.usermetasample.keys() else: join.append(value) elif key == "subtract": subtract = value elif key == "force": keepforce=value elif key == 'signalmc': self.signalmc = value elif key == 'datadriven': if value == 'PPF': self.nsignalcolumn = True elif value == 'PPP': self.nsignalcolumn = False # Update the usermetasample for metaname,listsamples in subtract.iteritems(): if self.usermetasample.has_key(metaname): message = '\033[1;31mtable ERROR[1;m Conflicts between merge and subtract' message += ' sample' raise RuntimeError(message) self.usermetasample[metaname] = listsamples subtractmeta.append(metaname) # available filenames self.filenames = glob.glob(wildcardfiles) # Just to be sure that only use one WH signal (adapted 2012 MC samples) if signal.find("WH") == 0 or signal.find("wztt") == 0: #Extract the other WH signals # Common between 2011 and 2012 MC signal samples names # FIXME: Add all the signals in the last columns potentialSfiles = filter(lambda x : x.find("ToWW") != -1,self.filenames) nonsignalfiles = filter( lambda x: x.split("/")[-1].split(".root")[0] != signal,potentialSfiles) # Removing for f in nonsignalfiles: self.filenames.remove(f) # Added the important cuts to be used in the reduced table case self.importantcutsdict = { 'Pre-selection': 'Exactly3Leptons', \ 'DeltaR': 'DeltaR', 'ZVeto':'ZVeto', 'MET': 'MET'} self.importantcutslist = [ 'Pre-selection', 'DeltaR','ZVeto', 'MET'] else: # Added the important cuts to be used in the reduced table case self.importantcutsdict = { 'Pre-selection' : 'Exactly3Leptons', 'Z' : 'HasZCandidate', 'W': 'MET' } self.importantcutslist = [ 'Pre-selection', 'Z', 'W' ] # building the columns self.columns = {} for f in self.filenames: naturalname = os.path.basename(f).split(".")[0] # Putting the fancy name if has to, or using the # per default name (so updating the TITLEDICT global) try: coltitle = TITLEDICT[naturalname] except KeyError: TITLEDICT[naturalname] = naturalname col = processedsample(f) self.columns[col.title] = col # samples names self.samples = self.columns.keys() # Check the signal and data are there if not signal in self.samples+join: raise RuntimeError("\033[31mtable ERROR\033[m The signal introduced '"\ +signal+"' has not been found."+\ " Check you have not introduced the '-n' option without quotes:\n"+\ " printtable "+signal+" -n \"whatever*..\"") if not data in self.samples+join: raise RuntimeError("\033[31mtable ERROR\033[m The data introduced '"\ +data+"' has not been found."+\ " Check you have not introduced the '-n' option without quotes:\n"+\ " printtable "+signal+" -n \"whatever*..\"") self.signal = signal self.data = data # Ordered samples names (column names) self.columntitles = [] # Backgrounds ordered first self.columntitles = filter(lambda x: x in self.samples+join, ORDERCOLUMNS) for i in self.samples: # Already took into account if i in self.columntitles: continue # Signal and data will be put after if i == self.signal or i == self.data or i == self.signalmc: continue self.columntitles.append(i) # -- Adding the other columns self.columntitles.append( "TotBkg" ) self.columntitles.append( self.data ) if self.nsignalcolumn: self.columntitles.append( "Data-TotBkg" ) self.columntitles.append( self.signal ) if self.signalmc: self.columntitles.append( self.signalmc ) # -- Avoiding repetition (mainly for DDD-DDM case) dummy = map(lambda x: self.columntitles.remove(x), set(filter(lambda x: self.columntitles.count(x) > 1, self.columntitles))) # The datamember samples is superceeded by columntitles self.samples = self.columntitles # 2) Merge some samples just in one for metasample in join: samplestodelete = [] self.columns[metasample] = processedsample("",nobuilt=True) for sample in self.getsamplecomponents(metasample): try: self.columns[metasample] += self.columns[sample] samplestodelete.append( sample ) except KeyError: # Protecting the case where the pre-defined # samples aren't there (for instance WJets_Madgraph # in the 'Other' metasample) pass # See if we have any of the samples to merge, if not then # it has no sense going on if len(samplestodelete) == 0: # Do not incorporate the metasample to the table self.columns.pop(metasample) # And do nothing else continue # Put the title self.columns[metasample].title = metasample # Incorporates the metasample to the global TITLEDICT TITLEDICT[metasample] = metasample # Erase the samples merged for s2remove in samplestodelete: self.columns.pop(s2remove) index=self.columntitles.index(s2remove) self.columntitles.remove(s2remove) # And add to the columntitles data member if not in there if not metasample in self.columntitles: self.columntitles.insert(index,metasample) #2_1) Subtract some samples for metasample in subtractmeta: samplestodelete = [] for sample in self.getsamplecomponents(metasample): try: self.columns[metasample] -= self.columns[sample] if sample not in keepforce: samplestodelete.append( sample ) except KeyError: # Protecting the case where the pre-defined # samples aren't there (for instance WJets_Madgraph # in the 'Other' metasample) pass # Erase the samples merged for s2remove in samplestodelete: self.columns.pop(s2remove) index=self.columntitles.index(s2remove) self.columntitles.remove(s2remove) # And add to the columntitles data member if not in there if not metasample in self.columntitles: self.columntitles.insert(index,metasample) # format specific self.format = format() if formatprov: self.setformat(formatprov)
def getMSvalerr(self, cut,sample): """.. function::getMSvalerr( cut, sample ) -> valueanderror Extract the value and the error, given the cut and the sample name. The function will find the format to return the value using only the most significant decimal numbers depending on how is its error. The error will be modified following the below rules: - if the error is >= 1.5, then the error is an integer, same as the value - if the error is between 1.0 and 1.5, the error is taken with two decimals - if the error is < 1.0, then return :param cut: cut name :type cut: str :param sample: sample name :type sample: str :return: the value and its error :rtype : (str,str) """ from math import sqrt from functionspool_mod import getvalpluserr # Dealing with the TotBkg sample which has to be built if sample == "TotBkg": val = 0.0 err2 = 0.0 for s in filter(lambda x: x != self.data and \ x != self.signalmc and \ x != self.signal and x != "TotBkg" and \ x != "Data-TotBkg",self.samples): (v,e) = self.columns[s].getvalue(cut) val += v err2 += e**2.0 err = sqrt(err2) try: self.columns["TotBkg"].rowvaldict[cut] = (val,err) except KeyError: # Creating the column self.columns["TotBkg"] = processedsample("",nobuilt=True) # Inititalizing dict and all the other needed data members self.columns["TotBkg"].rowvaldict= { cut: (val,err) } self.columns["TotBkg"].cutordered = self.columns[self.data].cutordered # extracting the values try: val,err=self.columns[sample].getvalue(cut) except KeyError: # It's substraction data - TotBkg columns if sample == "Data-TotBkg": # Note that as it has been extract by order, Data and total background (valdata,errdata) = self.columns[self.data].getvalue(cut) (valbkg,errbkg) = self.columns["TotBkg"].getvalue(cut) val = valdata-valbkg err = sqrt(errdata**2.0+errbkg**2.0) valstr,errstr = getvalpluserr(val,err) totalvalstr = valstr+self.format.plusminus+errstr return totalvalstr