Esempi in Python per processedsample, esempi in Python per functionspool_mod.processedsample

Esempio n. 1

0

Mostra file

File: getwzratio.py Progetto: duartej/usercode

def getobsyieldsprecise(channelfolder,signalname):
	"""
	"""
	import os
	import glob
	from math import sqrt
	from functionspool_mod import processedsample

	# Extract the name of the signal
	try:
		actsignalname = os.path.basename(glob.glob(channelfolder+"/cluster_"+signalname+"*")[0]).replace("cluster_","")
	except IndexError:
		message = "\033[1;31mgetobsyields ERROR\033[1;m Some unexpeted error"
		raise RuntimeError(message)
	# Name of the observed data
	dataname = "Data"
	dataps = processedsample(channelfolder+"/cluster_"+dataname+"/Results/"+dataname+".root")
	Nobs,Nerr = dataps.getvalue("MET")
	
	samplesname = filter(lambda x: x != actsignalname and x != dataname, \
			map(lambda x: os.path.basename(x).replace("cluster_",""),glob.glob(channelfolder+"/cluster_"+"*")) )

	Nerr2 = Nerr**2.0
	# Creatinga the instances of proccess
	for sample in samplesname:
		rootfilename = channelfolder+"/cluster_"+sample+"/Results/"+sample+".root"
		evts = processedsample(rootfilename)
		val,err = evts.getvalue("MET")
		Nobs -= val
		Nerr2 += err**2.
		del evts
	del dataps
	
	return Nobs,sqrt(Nerr2)

Esempio n. 2

0

Mostra file

File: getsystematics.py Progetto: duartej/AnalysisVH

def getsystwofiles(file1,file2,verbose):
	"""
	"""
	import sys

	if file1 == file2:
		message = "\033[1;31mgetsystematics ERROR\033[1;m Cannot evaluate differences"\
				" between the same files arguments '%s'. See usage." % (file1)
		sys.exit(message)
	if verbose:
		print "\033[1;34mgetsystematics INFO\033[1;m Extracting systematics using files %s and %s" % (file1,file2)
		sys.stdout.flush()
	
	evt = processedsample(file1,showall=verbose)-processedsample(file2,showall=verbose)

	return evt

Esempio n. 3

0

Mostra file

File: resumesys.py Progetto: duartej/usercode

def getdifferences(samplesnames, baselinefolder, comparedfolder, sysdict=None):
    """.. getdifferences(samplesnames,baselinefolder,comparedfolder) -> (dict(s
	"""
    from functionspool_mod import processedsample
    import os

    # Get some previous info
    # --- who is the analysis folder
    analysisdir = filter(lambda x: type(x) == list,
                         [baselinefolder] + [comparedfolder])[0]
    # --- extract channels folders
    channelsfolders = map(lambda x: x.split("/")[-1], analysisdir)
    # --- build the channels path for the str. directory
    if type(baselinefolder) == list:
        baselinetop, channeldir = os.path.split(analysisdir[0])
        comparedtop = comparedfolder
    else:
        baselinetop = baselinefolder
        comparedtop, channeldir = os.path.split(analysisdir[0])

    # --- get signal name-
    signal = ''.join(ch for ch in channelsfolders[0]
                     if ch != "m" and ch != "e")

    # Do it per channel
    sysdict = {}
    for channeldir in channelsfolders:
        print ".",
        for dataname in samplesnames:
            filebase = baselinetop+"/"+channeldir+"/"+"cluster_"+dataname+\
              "/Results/"+dataname+".root"

            filecomp = comparedtop+"/"+channeldir+"/"+"cluster_"+dataname+\
              "/Results/"+dataname+".root"
            diffsample = processedsample(filebase,showall=True)-\
              processedsample(filecomp,showall=True)
            channel = channeldir.replace(signal, "")
            lastcut = diffsample.getcutlist()[-1]
            sysrel = diffsample.getsysrelative(lastcut)
            try:
                sysdict["SYS" + dataname][channel] = sysrel
            except KeyError:
                sysdict["SYS" + dataname] = {channel: sysrel}
    print ""

    return sysdict

Esempio n. 4

0

Mostra file

def getsystwofiles(file1, file2, verbose):
    """
	"""
    import sys

    if file1 == file2:
        message = "\033[1;31mgetsystematics ERROR\033[1;m Cannot evaluate differences"\
          " between the same files arguments '%s'. See usage." % (file1)
        sys.exit(message)
    if verbose:
        print "\033[1;34mgetsystematics INFO\033[1;m Extracting systematics using files %s and %s" % (
            file1, file2)
        sys.stdout.flush()

    evt = processedsample(file1, showall=verbose) - processedsample(
        file2, showall=verbose)

    return evt

Esempio n. 5

0

Mostra file

File: checkPV.py Progetto: duartej/usercode

def plotpv(sampleslist, luminosity):
    """
	"""
    import ROOT
    from LatinoStyle_mod import LatinosStyle
    from functionspool_mod import processedsample

    lstyle = LatinosStyle()
    lstyle.cd()
    ROOT.gStyle.SetOptStat(0)

    ROOT.gROOT.SetBatch()
    c = ROOT.TCanvas()
    frame = c.DrawFrame(0, 0, 30, 0.12)
    frame.SetXTitle("N_{PV}")
    frame.SetYTitle("Normalized Events")
    leg = ROOT.TLegend(0.6, 0.7, 0.8, 0.9)
    leg.SetBorderSize(0)
    leg.SetTextSize(0.03)
    leg.SetFillColor(10)

    i = 0
    for s in sampleslist:
        samplename = s.split("cluster_")[-1]
        if samplename == "Fakes":
            continue
        rootfilename = os.path.join(os.path.join(s, "Results"),
                                    samplename + ".root")
        print rootfilename
        ps = processedsample(rootfilename, lumi=luminosity)
        h = ps.gethistogram("fHNPrimaryVertices")
        h.SetNormFactor(1)
        ymax = h.GetMaximum() / h.Integral()
        if ymax > frame.GetMaximum():
            frame.GetYaxis().SetRangeUser(frame.GetBinLowEdge(1), ymax * 1.20)
        try:
            color = COLORSDICT[samplename]
        except KeyError:
            color = kGreen - 5
        h.SetLineColor(color)
        h.SetMarkerColor(color)
        #h.SetFillColor(color)
        #h.SetFillStyle(3002+i)
        h.SetMarkerColor(color)
        option = "PF"
        if samplename == "Data":
            h.SetMarkerStyle(20)
            h.Draw("PESAME")
            option = "PL"
        else:
            h.Draw("SAME")
        leg.AddEntry(h, samplename, option)
        i += 1
    leg.Draw()
    c.SaveAs("pvafterreweighting.pdf")
    ROOT.gROOT.SetBatch(1)

Esempio n. 6

0

Mostra file

File: checkPV.py Progetto: duartej/AnalysisVH

def plotpv(sampleslist, luminosity):
    """
	"""
    import ROOT
    from LatinoStyle_mod import LatinosStyle
    from functionspool_mod import processedsample

    lstyle = LatinosStyle()
    lstyle.cd()
    ROOT.gStyle.SetOptStat(0)

    ROOT.gROOT.SetBatch()
    c = ROOT.TCanvas()
    frame = c.DrawFrame(0, 0, 30, 0.12)
    frame.SetXTitle("N_{PV}")
    frame.SetYTitle("Normalized Events")
    leg = ROOT.TLegend(0.6, 0.7, 0.8, 0.9)
    leg.SetBorderSize(0)
    leg.SetTextSize(0.03)
    leg.SetFillColor(10)

    i = 0
    for s in sampleslist:
        samplename = s.split("cluster_")[-1]
        if samplename == "Fakes":
            continue
        rootfilename = os.path.join(os.path.join(s, "Results"), samplename + ".root")
        print rootfilename
        ps = processedsample(rootfilename, lumi=luminosity)
        h = ps.gethistogram("fHNPrimaryVertices")
        h.SetNormFactor(1)
        ymax = h.GetMaximum() / h.Integral()
        if ymax > frame.GetMaximum():
            frame.GetYaxis().SetRangeUser(frame.GetBinLowEdge(1), ymax * 1.20)
        try:
            color = COLORSDICT[samplename]
        except KeyError:
            color = kGreen - 5
        h.SetLineColor(color)
        h.SetMarkerColor(color)
        # h.SetFillColor(color)
        # h.SetFillStyle(3002+i)
        h.SetMarkerColor(color)
        option = "PF"
        if samplename == "Data":
            h.SetMarkerStyle(20)
            h.Draw("PESAME")
            option = "PL"
        else:
            h.Draw("SAME")
        leg.AddEntry(h, samplename, option)
        i += 1
    leg.Draw()
    c.SaveAs("pvafterreweighting.pdf")
    ROOT.gROOT.SetBatch(1)

Esempio n. 7

0

Mostra file

File: builtddsample.py Progetto: duartej/AnalysisVH

 shutil.move(fakesubstractedfile, clustername(opt.dataname) + "/Results/" + opt.dataname + ".root")
 warningfile = "CAVEAT: Directory tree created automatically from 'builtddsample' script\n"
 warningfile += "        using the files:\n"
 for f in addsamples.values() + subsamples.values():
     warningfile += "          %s\n" % f
 warningfile += "Events yields are weighted but not normalized to any luminosity (when MC)\n"
 warningfile += "=" * 60 + "\n"
 ## Including some useful info: yields low term substracted to the main
 addnameslist = addyieldsdict.keys()
 subnameslist = subyieldsdict.keys()
 warningfile += "%20s ||" % ("")
 for name in addnameslist + subnameslist:
     warningfile += " %10s ||" % name
 warningfile = warningfile[:-2] + "\n"
 # Just to get the list of cuts
 _p = processedsample(addsamples.values()[0])
 cutordered = _p.getcutlist()
 del _p
 for cutname in cutordered:
     warningfile += "%20s  " % cutname
     # for pppname in ["Nt2"]+pppnameslist:
     for name in addnameslist + subnameslist:
         try:
             warningfile += " %10.2f  " % addyieldsdict[name][cutname]
         except KeyError:
             warningfile += " %10.2f  " % (-1.0 * subyieldsdict[name][cutname])
     warningfile = warningfile[:-2] + "\n"
 warningfile += "=" * 60 + "\n"
 fw = open(clustername(opt.dataname) + "/WARNING_FOLDER_GENERATED_FROM_SCRIPT.txt", "w")
 fw.writelines(warningfile)
 fw.close()

Esempio n. 8

0

Mostra file

File: getwzratio.py Progetto: duartej/AnalysisVH

def getobsyields(channelfolder,signalname,charge):
	""".. function:: getobsyields(channelfolder,signalname,charge) -> Nobs,errs,NsysUp,NsysDown,Acc,AccSysUp,AccSysDown

	Evaluate the number of estimated signal events inside a standard folder
	structure. It is calculated also the systematic propagation of the backgrounds
	Also is going to estimate the acceptance systematics in relative

	:channelfolder param: path where to do calculation. It should contain the 'cluster_*'
	                    folders inside
	:channelfolder type: str
	:signalname param: name of the signal
	:signalname type: str
	:charge param: the W charge production, valid values are PLUS|MINUS
	:charge type: str

	:return: a 6-tuple with the number of signal events, its statistical error, and
	        a couple of dictionaries with the number of signal events variated with
		the different systematic sources considered. Also it returns the acceptance
		and the systematic variations
	:rtype: tuple(float,float,dict(str:float),dict(str:float),float,dict(str:float),dict(str:float))
	"""
	import os
	import glob
	from math import sqrt
	from functionspool_mod import processedsample

	import sys
	sys.path.insert(0,os.path.abspath(channelfolder.split("/")[0]))
	import systematics_mod
	sys.path = sys.path[1:]
	
	# Extract the name of the signal
	try:
		actsignalname = os.path.basename(glob.glob(channelfolder+"/cluster_"+signalname+"*")[0]).replace("cluster_","")
	except IndexError:
		message = "\033[1;31mgetobsyields ERROR\033[1;m Some unexpected error"
		raise RuntimeError(message)
	# Get the channel
	channel = channelfolder.split("/")[-1][-3:]
	evalsys = True
	if channel not in [ 'eee', 'eem', 'mme', 'mmm' ]:
		evalsys = False

	# Name of the observed data
	dataname = "Data"
	dataps = processedsample(channelfolder+"/cluster_"+dataname+"/Results/"+dataname+".root")
	Nobs,Nerr = dataps.getvalue("MET")
	Ndata = Nobs

	# ACCEPTANCE CALCULATION 
	signalps = processedsample(channelfolder+"/cluster_"+actsignalname+"/Results/"+actsignalname+".root")
	Npass,NpassErr = signalps.getrealvalue("MET")
	#Ngen = NGEN[charge]
	Ngen = NGENPUWEIGHTED[charge]
	acc = Npass/Ngen
	# ACCEPTANCE SYSTEMATICS 
	accsys = eval("systematics_mod.SYS"+actsignalname)
	accSysUp  ={}
	accSysDown={}
	if evalsys:
		for systype,chdict in accsys.iteritems():
			Npassup = Npass*(1.0+chdict[channel])
			accSysUp[systype] = Npassup/Ngen
			Npassdown = Npass*(1.0-chdict[channel])
			accSysDown[systype] = Npassdown/Ngen
	
	# Systematic propagation for Background samples 
	samplesname = filter(lambda x: x != actsignalname and x != dataname, \
			map(lambda x: os.path.basename(x).replace("cluster_",""),glob.glob(channelfolder+"/cluster_"+"*")) )
	
	syssamples = {}
	for sample in samplesname:
		try:
			syssamples[sample] = eval("systematics_mod.SYS"+sample)
		except AttributeError:
			# Assuming ZZ: Just a patch because ZZ is split
			syssamples[sample] = eval("systematics_mod.SYSZZ")
	# plus Fakes, modifying dict to the standard notation
	syssamples["Fakes"] = { "DDMMC": systematics_mod.DDMMC, "Fakes": systematics_mod.SYSFakes }
	
	# Building the full list of systematics types
	allsysset = set()
	dummy = map(lambda x: [allsysset.add(k) for k in x.keys()], syssamples.values())
	allsys = list(allsysset)
	
	Nerr2 = Nerr**2.0
	TotalBkgSysUp = dict([(systype,0.0) for systype in allsys])
	TotalBkgSysDown = dict([(systype,0.0) for systype in allsys])
	# Creating the instances of process
	for sample in samplesname:
		rootfilename = channelfolder+"/cluster_"+sample+"/Results/"+sample+".root"
		evts = processedsample(rootfilename)
		val,err = evts.getvalue("MET")
		Nobs -= val
		Nerr2 += err**2.
		if not evalsys:
			del evts
			continue
		#for systype in syssamples[sample].keys():
		for systype in allsys:
			try:
				sys = syssamples[sample][systype][channel] 
			except KeyError:
				sys = 0.0
			# Total Background with all the background samples variated
			# to the same direction (UP/DOWN) for the same systematic
			TotalBkgSysUp[systype] += val*(1+sys)
			TotalBkgSysDown[systype] += val*(1-sys)
			# Adding "Fakes" to stat error---> FIXME???
			if sample == "Fakes" and systype == "Fakes":
				Nerr2 += (val*sys)**2.0
		del evts
	del dataps
	# Get in Number of observed events with the bkg variated with systematics
	NsysUp   = dict( map(lambda (systype,nTotbkg): (systype,Ndata-nTotbkg),TotalBkgSysUp.iteritems()) )
	NsysDown = dict( map(lambda (systype,nTotbkg): (systype,Ndata-nTotbkg),TotalBkgSysDown.iteritems()) )

	return Nobs,sqrt(Nerr2),NsysUp,NsysDown,acc,accSysUp,accSysDown

Esempio n. 9

0

Mostra file

             lambda x: os.path.basename(folder1) == os.path.basename(x),
             thechannelfolders2)[0]
         dN_S2 = 0.0
         N_S = 0.0
         for sample in samplesname:
             file1 = os.path.join(
                 folder1,
                 "cluster_" + sample + "/Results/" + sample + ".root")
             file2 = os.path.join(
                 folder2,
                 "cluster_" + sample + "/Results/" + sample + ".root")
             diffsample = getsystwofiles(file1, file2, opt.verbose)
             cut = diffsample.getcutlist()[-1]
             dN_S2 += diffsample.getvalue(cut)[0]**2.
             # Extracting the signal
             sp = processedsample(file1)
             value = sp.getvalue(cut)[0]
             del sp
             if sample == dataname:
                 N_S += value
             else:
                 N_S -= value
         dN_S = sqrt(dN_S2)
         print "\033[1;34mgetsystematics INFO\033[1;m ---- Total WZ yield relative difference at %s cut: %.3f%s" % (
             cut, dN_S / N_S * 100.0, "%")
 # Introduced a regular sample name
 else:
     # first check: the sample exists
     knownsamples = map(
         lambda x: os.path.basename(x).replace("cluster_", ""),
         glob.glob(thechannelfolders1[0] + "/cluster_" + "*"))

Esempio n. 10

0

Mostra file

File: printtable.py Progetto: duartej/usercode

    def __init__(self, data, signal, **keywords):
        """..class:: table(data,signal[,format="tex|html", isreduced=True|False, wildcardfiles="dir",join="metasample"]) 
		
		The table is composed by several 'processedsample' instances (see functionspool_mod module), and is 
		going to be built as
		               bkg1  bkg2 ... TotBkg data signal
		

		:param data: name of the column which it will be placed before the last one. Also
		             the values of this column are not going to be added up with the other
			     columns (to create the TotBkg column)
		:type data: str
		:param signal: name of the column which it will placed the last one. Also the 
		             values of this column are not going to be added up with the other
			     columns (creating the TotBkg column)
		:type param: str
		:param format: latex|tex|html  The output format of the table
		:type format: str
		:param wildcardfiles: string with wildcards which can be used to find what files
		             have to be used as column generators
		:param join: Complex parameter to introduce the possibility of merge two or more
		             samples into one unique metasample (called in this way because the
			     metasample does not have associated any filename and is compossed by
			     two or more original samples). The argument could be a str defining
			     a pre-built metasample: DY Z+Jets Other (see getsamplecomponent);
			     or could be a list of string which are defining more than one 
			     pre-built metasample; or could be a dictionary whose keys are the
			     metasample names and the values are list containing the samples to
			     merge.
		:type join: str| [ str, str, ...] | { str: [ str, str, ... ], ... }
		"""
        import glob
        import ROOT
        import os
        global TITLEDICT

        # First checkings:
        if not os.getenv("VHSYS"):
            raise "\033[31mtable ERROR\033[m Initialize your"+\
              " environment (VHSYS env variable needed)"

        formatprov = None

        validkeywords = ["format", "isreduced", "join", "wildcardfiles"]
        wildcardfiles = "*.root"  # Per default
        join = []
        self.usermetasample = {}
        for key, value in keywords.iteritems():
            if not key in keywords.keys():
                message = "\033[31mtable ERROR\033[m Incorrect instantiation of 'table'"
                message += " class. Valid keywords are: " + str(validkeywords)
                raise message

            if key == 'format':
                formatprov = value
            elif key == 'isreduced':
                if value:
                    join = ["DY", "Z+Jets", "Other"]
            elif key == "wildcardfiles":
                wildcardfiles = value
            elif key == "join":
                if type(value) == list and len(value) == 0:
                    # we don't want to smash the join list
                    pass
                elif type(value) == list:
                    join = value
                elif type(value) == dict:
                    self.usermetasample = value
                    join = self.usermetasample.keys()
                else:
                    join.append(value)

        # available filenames
        self.filenames = glob.glob(wildcardfiles)

        # Just to be sure that only use one WH signal (adapted 2012 MC samples)
        if signal.find("WH") == 0 or signal.find("wztt") == 0:
            #Extract the other WH signals
            #potentialSfiles = filter(lambda x : x.find("WH") != -1,self.filenames)
            # Common between 2011 and 2012 MC signal samples names
            # FIXME: Add all the signals in the last columns
            potentialSfiles = filter(lambda x: x.find("ToWW") != -1,
                                     self.filenames)
            nonsignalfiles = filter(
                lambda x: x.split("/")[-1].split(".root")[0] != signal,
                potentialSfiles)
            # Removing
            for f in nonsignalfiles:
                self.filenames.remove(f)
            # Added the important cuts to be used in the reduced table case
            self.importantcutsdict = { 'Pre-selection': 'Exactly3Leptons', \
              'DeltaR': 'DeltaR', 'ZVeto':'ZVeto', 'MET': 'MET'}
            self.importantcutslist = [
                'Pre-selection', 'DeltaR', 'ZVeto', 'MET'
            ]
        else:
            # Added the important cuts to be used in the reduced table case
            # FIXME WARNING HARDCODED!! --> THis is a temporal patch...
            self.importantcutsdict = {
                'Pre-selection': 'Exactly3Leptons',
                'Z': 'HasZCandidate',
                'W': 'MET'
            }
            self.importantcutslist = ['Pre-selection', 'Z', 'W']

        # building the columns
        self.columns = {}
        for f in self.filenames:
            naturalname = os.path.basename(f).split(".")[0]
            # Putting the fancy name if has to, or using the
            # per default name (so updating the TITLEDICT global)
            try:
                coltitle = TITLEDICT[naturalname]
            except KeyError:
                TITLEDICT[naturalname] = naturalname
            col = processedsample(f)
            self.columns[col.title] = col

        # samples names
        self.samples = self.columns.keys()
        # Check the signal and data are there
        if not signal in self.samples + join:
            raise RuntimeError("\033[31mtable ERROR\033[m The signal introduced '"\
              +signal+"' has not been found."+\
              " Check you have not introduced the '-n' option without quotes:\n"+\
              " printtable "+signal+" -n \"whatever*..\"")
        if not data in self.samples + join:
            raise RuntimeError("\033[31mtable ERROR\033[m The data introduced '"\
              +data+"' has not been found."+\
              " Check you have not introduced the '-n' option without quotes:\n"+\
              " printtable "+signal+" -n \"whatever*..\"")

        self.signal = signal
        self.data = data

        # Ordered samples names (column names)
        self.columntitles = []
        # Backgrounds ordered first
        self.columntitles = filter(lambda x: x in self.samples + join,
                                   ORDERCOLUMNS)
        for i in self.samples:
            # Already took into account
            if i in self.columntitles:
                continue
            # Signal and data will be put after
            if i == self.signal or i == self.data:
                continue
            self.columntitles.append(i)
        # -- Adding the other columns
        self.columntitles.append("TotBkg")
        self.columntitles.append(self.data)
        self.columntitles.append("Data-TotBkg")
        self.columntitles.append(self.signal)
        # -- Avoiding repetition (mainly for  DDD-DDM case)
        dummy = map(
            lambda x: self.columntitles.remove(x),
            set(
                filter(lambda x: self.columntitles.count(x) > 1,
                       self.columntitles)))

        # The datamember samples is superceeded by columntitles
        self.samples = self.columntitles

        # 2) Merge some samples just in one
        for metasample in join:
            samplestodelete = []
            self.columns[metasample] = processedsample("", nobuilt=True)
            for sample in self.getsamplecomponents(metasample):
                try:
                    self.columns[metasample] += self.columns[sample]
                    samplestodelete.append(sample)
                except KeyError:
                    # Protecting the case where the pre-defined
                    # samples aren't there (for instance WJets_Madgraph
                    # in the 'Other' metasample)
                    pass
            # See if we have any of the samples to merge, if not then
            # it has no sense going on
            if len(samplestodelete) == 0:
                # Do not incorporate the metasample to the table
                self.columns.pop(metasample)
                # And do nothing else
                continue
            # Put the title
            self.columns[metasample].title = metasample
            # Incorporates the metasample to the global TITLEDICT
            TITLEDICT[metasample] = metasample
            # Erase the samples merged
            for s2remove in samplestodelete:
                self.columns.pop(s2remove)
                index = self.columntitles.index(s2remove)
                self.columntitles.remove(s2remove)

            # And add to the columntitles data member if not in there
            if not metasample in self.columntitles:
                self.columntitles.insert(index, metasample)

        # format specific
        self.format = format()
        if formatprov:
            self.setformat(formatprov)

Esempio n. 11

0

Mostra file

File: printtable.py Progetto: duartej/usercode

    def getMSvalerr(self, cut, sample):
        """.. function::getMSvalerr( cut, sample ) -> valueanderror

		Extract the value and the error, given the cut and the sample name. 
		The function will find the format to return the value using only the most 
		significant decimal numbers depending on how is its error. The 
		error will be modified following the below rules:
		 - if the error is >= 1.5, then the error is an integer, same as
		 the value
		 - if the error is between 1.0 and 1.5, the error is taken with
		 two decimals 
		 - if the error is < 1.0, then return 

		:param cut: cut name
		:type  cut: str
		:param sample: sample name
		:type  sample: str

		:return: the value and its error 
		:rtype : (str,str)
		"""
        from math import sqrt
        from functionspool_mod import getrounded

        # Dealing with the TotBkg sample which has to be built
        if sample == "TotBkg":
            val = 0.0
            err2 = 0.0
            for s in filter(
                    lambda x: x != self.data and x != self.signal and x !=
                    "TotBkg" and x != "Data-TotBkg", self.samples):
                (v, e) = self.columns[s].getvalue(cut)
                val += v
                err2 += e**2.0
            err = sqrt(err2)
            try:
                self.columns["TotBkg"].rowvaldict[cut] = (val, err)
            except KeyError:
                # Creating the column
                self.columns["TotBkg"] = processedsample("", nobuilt=True)
                # Inititalizing dict and all the other needed data members
                self.columns["TotBkg"].rowvaldict = {cut: (val, err)}
                self.columns["TotBkg"].cutordered = self.columns[
                    self.data].cutordered

        # extracting the values
        try:
            val, err = self.columns[sample].getvalue(cut)
        except KeyError:
            # It's substraction data - TotBkg columns
            if sample == "Data-TotBkg":
                # Note that as it has been extract by order, Data and total background
                (valdata, errdata) = self.columns[self.data].getvalue(cut)
                (valbkg, errbkg) = self.columns["TotBkg"].getvalue(cut)
                val = valdata - valbkg
                err = sqrt(errdata**2.0 + errbkg**2.0)

        # Begin the formatting
        # Found the last significant value: we get the first value
        # > 0
        #errstr      = str(err)
        #valstr      = str(val)
        nafterpoint = 0
        # Case > 1.5
        if abs(err) < 1e-30:
            return getrounded(val, 1000)
        elif err >= 1.5 and err < 2.0:
            errstr = getrounded(err, 1)
            valstr = getrounded(val, 1)
            nafterpoint = 1
        # Case 1 > err > 1.5
        elif err >= 1.0 and err < 1.5:
            errstr = getrounded(err, 2)
            valstr = getrounded(val, 2)
            nafterpoint = 2
        elif err >= 2.0:
            errstr = getrounded(err, 0)
            valstr = "%i" % round(val)
            nafterpoint = 0
        elif err < 1.0 and err > 0.0:
            getdecimal = False
            errstrPRE = str(err).split(".")[-1]
            numbuilt = ''
            for n, index in zip(errstrPRE, xrange(len(errstrPRE))):
                nafterpoint += 1
                if int(n) > 0:
                    numbuilt += n
                try:
                    dum = int(numbuilt)
                except ValueError:
                    continue
                if int(numbuilt) >= 2:
                    # done
                    # --- Check the first is not 1
                    nsignumberformat = "%.0e"
                    if numbuilt[0] == "1":
                        nsignumberformat = "%.1e"
                    if nafterpoint > 3:
                        errstr = nsignumberformat % err
                        exponent = int(errstr.split("e-")[-1])
                        errstr = errstr.split("e-")[0]
                        # - keeping the integer part
                        #   FIXME: assume at least 10^-3...
                        valstr = '%i' % val
                        decpart = val - int(valstr)
                        if valstr == '0':
                            valstr = ''
                        if exponent != nafterpoint:
                            missing = nafterpoint - exponent
                            decpartstrformat = "%." + str(exponent +
                                                          missing) + "f"
                        else:
                            decpartstrformat = "%." + str(exponent) + "f"
                        # To rounding properly
                        decpartstrPRE = decpartstrformat % decpart
                        # get only the decimal part
                        decpartstrONLY = decpartstrPRE.split(".")[-1]
                        # Moving down the decimal point
                        decpartstr = ''
                        for i in xrange(exponent):
                            decpartstr += decpartstrONLY[i]
                        decpartstr = str(int(decpartstr))

                        if numbuilt[0] == "1":
                            valstr += decpartstr[:-1] + "." + decpartstr[-1]
                        else:
                            valstr += decpartstr
                    else:
                        howmany0 = nafterpoint - len(numbuilt)
                        errstrformat = "%." + str(nafterpoint) + "f"
                        errstr = errstrformat % err
                        #errstr       = "0."+"0"*howmany0+numbuilt
                        valstrformat = "%." + str(howmany0 +
                                                  len(numbuilt)) + "f"
                        valstr = valstrformat % val

                    break

        if nafterpoint > 3:
            totalvalstr = "("+valstr+self.format.plusminus+errstr+")"+self.format.cdot+"10"+\
              self.format.exponentstart+"-"+str(exponent)+self.format.exponentend
        else:
            totalvalstr = valstr + self.format.plusminus + errstr
        return totalvalstr

Esempio n. 12

0

Mostra file

File: resumesys.py Progetto: duartej/AnalysisVH

def getdifferences(samplesnames,baselinefolder,comparedfolder,sysdict = None, **keywords):
	""".. getdifferences(samplesnames,baselinefolder,comparedfolder) -> (dict(s
	"""
	from functionspool_mod import processedsample
	import os

	metasamples = {}
	if keywords.has_key('metasamples'):
		metasamples = keywords['metasamples']
	
	# Get some previous info
	# --- who is the analysis folder
	analysisdir = filter(lambda x: type(x) == list, [baselinefolder]+[comparedfolder])[0]
	# --- extract channels folders
	channelsfolders = map(lambda x: x.split("/")[-1],analysisdir)
	# --- build the channels path for the str. directory
	if type(baselinefolder) == list:
		baselinetop,channeldir = os.path.split(analysisdir[0])
		comparedtop = comparedfolder
	else:
		baselinetop = baselinefolder
		comparedtop,channeldir = os.path.split(analysisdir[0])
	
	# --- get signal name-
	signal = ''.join(ch for ch in channelsfolders[0] if ch != "m" and ch != "e")


	# Do it per channel
	sysdict = {}
	for channeldir in channelsfolders:
		print ".",
		for dataname in filter(lambda x: x not in metasamples.keys(), samplesnames):
			filebase = baselinetop+"/"+channeldir+"/"+"cluster_"+dataname+\
					"/Results/"+dataname+".root"

			filecomp = comparedtop+"/"+channeldir+"/"+"cluster_"+dataname+\
					"/Results/"+dataname+".root"
			diffsample = processedsample(filebase,showall=True)-\
					processedsample(filecomp,showall=True)
			channel = channeldir.replace(signal,"")
			lastcut = diffsample.getcutlist()[-1]
			sysrel = diffsample.getsysrelative(lastcut)
			try:
				sysdict["SYS"+dataname][channel] = abs(sysrel)
			except KeyError:
				sysdict["SYS"+dataname] = { channel: abs(sysrel) }
		# And the metasamples
		for metaname,realsamples in metasamples.iteritems():
			metaprocessbase = processedsample('',nobuilt=True)
			metaprocesscomp = processedsample('',nobuilt=True)
			for realname in realsamples:
				filebase = baselinetop+"/"+channeldir+"/"+"cluster_"+realname+\
						"/Results/"+realname+".root"
				metaprocessbase += processedsample(filebase,showall=True)
				filecomp = comparedtop+"/"+channeldir+"/"+"cluster_"+realname+\
					"/Results/"+realname+".root"
				metaprocesscomp += processedsample(filecomp,showall=True)
			diffsample = metaprocessbase-metaprocesscomp
			channel = channeldir.replace(signal,"")
			lastcut = diffsample.getcutlist()[-1]
			sysrel = diffsample.getsysrelative(lastcut)
			try:
				sysdict["SYS"+metaname][channel] = abs(sysrel)
			except KeyError:
				sysdict["SYS"+metaname] = { channel: abs(sysrel) }
	print ""

	return sysdict

Esempio n. 13

0

Mostra file

File: nt3subtract.py Progetto: duartej/usercode

 os.chdir(folder)
 # Find the samples and the folder
 clustername = 'cluster_' + opt.dataname
 rootmainterm = clustername + "/Results/" + opt.dataname + ".root"
 ntsamplename = opt.dataname + "_" + lowterm
 clustername_Nt = 'cluster_' + ntsamplename
 rootlowterm = clustername_Nt + "/Results/" + ntsamplename + ".root"
 # Find the MAIN order sample
 fakesample = os.path.join(folder, rootmainterm)
 if not os.path.isfile(fakesample):
     message = "\033[31mnt3subtract ERROR\033[m Malformed folder structure:"\
       " Not found the FAKES file "\
       "'%s'' inside the folder '%s'" % (rootmainterm,folder)
     sys.exit(message)
 # Including the raw main term info to be print in the warning file
 psnt2 = processedsample(fakesample)
 cutordered = psnt2.getcutlist()
 # Using the same kind of yields than the other term
 yieldsmain = dict(
     map(lambda cut: (cut, psnt2.getrealvalue(cut)), cutordered))
 #yieldsmain = psnt2.rowvaldict
 # and store
 nametarfile = opt.dataname.lower().replace("_", "") + "pool.tar.gz"
 # Find the LOW order samples
 lowfolders = glob.glob(os.path.join(folder, clustername_Nt + "*"))
 if len(lowfolders) == 0:
     # Checking we didn't use this script before
     if os.path.isfile(nametarfile):
         # Recovering the original samples
         shutil.rmtree(clustername)
         tar = tarfile.open(nametarfile)

Esempio n. 14

0

Mostra file

File: getsystematics.py Progetto: duartej/AnalysisVH

	
		print "\033[1;34mgetsystematics INFO\033[1;m Systematics for WZ signal yields (N_S=N_D-N_BKG) term. You can get the values below directly as %"
		for folder1 in sorted(thechannelfolders1):
			channel = os.path.basename(folder1)
			print "\033[1;34mgetsystematics INFO\033[1;m Channel %s" % channel
			folder2 = filter(lambda x: os.path.basename(folder1) == os.path.basename(x), thechannelfolders2)[0]
			dN_S2 = 0.0
			N_S = 0.0
			for sample in samplesname:
				file1 = os.path.join(folder1,"cluster_"+sample+"/Results/"+sample+".root")
				file2 = os.path.join(folder2,"cluster_"+sample+"/Results/"+sample+".root") 
				diffsample = getsystwofiles(file1,file2,opt.verbose)
				cut = diffsample.getcutlist()[-1]
				dN_S2 += diffsample.getvalue(cut)[0]**2.
				# Extracting the signal
				sp = processedsample(file1)
				value = sp.getvalue(cut)[0]
				del sp
				if sample == dataname:
					N_S += value
				else:
					N_S -= value
			dN_S = sqrt(dN_S2)
			print "\033[1;34mgetsystematics INFO\033[1;m ---- Total WZ yield relative difference at %s cut: %.3f%s" % (cut,dN_S/N_S*100.0,"%")
	# Introduced a regular sample name
	else:
		# first check: the sample exists
		knownsamples = map(lambda x: os.path.basename(x).replace("cluster_",""),glob.glob(thechannelfolders1[0]+"/cluster_"+"*")) 
		if not opt.samplename in knownsamples:
			message = "\033[1;31mgetsystematics ERROR\033[1;m Sample not found '%s' in the channel folders"
			sys.exit(message)

Esempio n. 15

0

Mostra file

File: printtable.py Progetto: duartej/AnalysisVH

	def __init__(self,data,signal,**keywords):
		"""..class:: table(data,signal[,format="tex|html",\
				isreduced=True|False, wildcardfiles="dir",join="metasample",\
				subtract=metasample,force=listofsamples]) 
		
		The table is composed by several 'processedsample' instances (see functionspool_mod module), and is 
		going to be built as
		               bkg1  bkg2 ... TotBkg data signal
		

		:param data: name of the column which it will be placed before the last one. Also
		             the values of this column are not going to be added up with the other
			     columns (to create the TotBkg column)
		:type data: str
		:param signal: name of the column which it will placed the last one. Also the 
		             values of this column are not going to be added up with the other
			     columns (creating the TotBkg column)
		:type param: str
		:param format: latex|tex|html  The output format of the table
		:type format: str
		:param wildcardfiles: string with wildcards which can be used to find what files
		             have to be used as column generators
		:param join: Complex parameter to introduce the possibility of merge two or more
		             samples into one unique metasample (called in this way because the
			     metasample does not have associated any filename and is compossed by
			     two or more original samples). The argument could be a str defining
			     a pre-built metasample: DY Z+Jets Other (see getsamplecomponent);
			     or could be a list of string which are defining more than one 
			     pre-built metasample; or could be a dictionary whose keys are the
			     metasample names and the values are list containing the samples to
			     merge.
		:type join: str| [ str, str, ...] | { str: [ str, str, ... ], ... }
		:param subtract: analogous parameter than join but to subtract
		:type subtract:  dict(str,list(str))
		:param force: list of samples to be kept in the table although are inside the
		              subtract list
		:type force: list(str)
		"""
		import glob
		import ROOT
		import os
		global TITLEDICT


		formatprov = None

		validkeywords = [ "format", "isreduced", "join", "wildcardfiles", \
				"signalmc", "subtract", "force", "datadriven"]
		wildcardfiles = "*.root" # Per default
		join = []
		subtract = {}
		subtractmeta = []
		keepforce = []
		self.usermetasample = {}
		self.signalmc = None
		self.nsignalcolumn = True
		for key,value in keywords.iteritems():
			if not key in keywords.keys():
				message  = "\033[31mtable ERROR\033[m Incorrect instantiation of 'table'"
				message += " class. Valid keywords are: "+str(validkeywords)
				raise RuntimeError(message)

			if key == 'format':
				formatprov = value
			elif key == 'isreduced':
				if value:
					join = [ "DY", "Z+Jets", "Other" ]
			elif key == "wildcardfiles":
				wildcardfiles = value
			elif key == "join":
				if type(value) == list and len(value) == 0:
					# we don't want to smash the join list
					pass
				elif type(value) == list:
					join = value
				elif type(value) == dict:
					self.usermetasample = value
					join = self.usermetasample.keys()
				else:
					join.append(value)
			elif key == "subtract":
				subtract = value
			elif key == "force":
				keepforce=value
			elif key == 'signalmc': 
				self.signalmc = value
			elif key == 'datadriven':
				if value == 'PPF':
					self.nsignalcolumn = True
				elif value == 'PPP':
					self.nsignalcolumn = False
		# Update the usermetasample
		for metaname,listsamples in subtract.iteritems():
			if self.usermetasample.has_key(metaname):
				message = '\033[1;31mtable ERROR[1;m Conflicts between merge and subtract'
				message += ' sample'
				raise RuntimeError(message)
			self.usermetasample[metaname] = listsamples
			subtractmeta.append(metaname)

		# available filenames
		self.filenames = glob.glob(wildcardfiles)

		# Just to be sure that only use one WH signal (adapted 2012 MC samples)
		if signal.find("WH") == 0 or signal.find("wztt") == 0:
			#Extract the other WH signals
			# Common between 2011 and 2012 MC signal samples names
			# FIXME: Add all the signals in the last columns 
			potentialSfiles = filter(lambda x : x.find("ToWW") != -1,self.filenames)
			nonsignalfiles  = filter( lambda x: x.split("/")[-1].split(".root")[0] != signal,potentialSfiles)
			# Removing
			for f in nonsignalfiles:
				self.filenames.remove(f)
			# Added the important cuts to be used in the reduced table case
			self.importantcutsdict = { 'Pre-selection': 'Exactly3Leptons', \
					'DeltaR': 'DeltaR', 'ZVeto':'ZVeto', 'MET': 'MET'}
			self.importantcutslist = [ 'Pre-selection', 'DeltaR','ZVeto', 'MET']
		else:
			# Added the important cuts to be used in the reduced table case
			self.importantcutsdict = { 'Pre-selection' : 'Exactly3Leptons',
					'Z' : 'HasZCandidate',  'W': 'MET' }
			self.importantcutslist = [ 'Pre-selection', 'Z', 'W' ]

		# building the columns
		self.columns = {}
		for f in self.filenames:
			naturalname = os.path.basename(f).split(".")[0]
			# Putting the fancy name if has to, or using the
			# per default name (so updating the TITLEDICT global)
			try:
				coltitle = TITLEDICT[naturalname]
			except KeyError:
				TITLEDICT[naturalname] = naturalname
			col = processedsample(f)
			self.columns[col.title] = col
		
		# samples names
		self.samples = self.columns.keys()
		# Check the signal and data are there
		if not signal in self.samples+join:
			raise RuntimeError("\033[31mtable ERROR\033[m The signal introduced '"\
					+signal+"' has not been found."+\
					" Check you have not introduced the '-n' option without quotes:\n"+\
					" printtable "+signal+" -n \"whatever*..\"")
		if not data in self.samples+join:
			raise RuntimeError("\033[31mtable ERROR\033[m The data introduced '"\
					+data+"' has not been found."+\
					" Check you have not introduced the '-n' option without quotes:\n"+\
					" printtable "+signal+" -n \"whatever*..\"")

		self.signal = signal
		self.data   = data

		# Ordered samples names (column names)
		self.columntitles = []
		# Backgrounds ordered first
		self.columntitles = filter(lambda x: x in self.samples+join, ORDERCOLUMNS)
		for i in self.samples:
			# Already took into account
			if i in self.columntitles:
				continue
			# Signal and data will be put after
			if i == self.signal or i == self.data or i == self.signalmc:
				continue
			self.columntitles.append(i)
		# -- Adding the other columns
		self.columntitles.append( "TotBkg" )
		self.columntitles.append( self.data )
		if self.nsignalcolumn:
			self.columntitles.append( "Data-TotBkg" )
		self.columntitles.append( self.signal )
		if self.signalmc:
			self.columntitles.append( self.signalmc )
		# -- Avoiding repetition (mainly for  DDD-DDM case)
		dummy = map(lambda x: self.columntitles.remove(x),
				set(filter(lambda x: self.columntitles.count(x) > 1, self.columntitles)))

		# The datamember samples is superceeded by columntitles
		self.samples = self.columntitles

		# 2) Merge some samples just in one
		for metasample in join:
			samplestodelete = []
			self.columns[metasample] = processedsample("",nobuilt=True)
			for sample in self.getsamplecomponents(metasample):
				try:
					self.columns[metasample] += self.columns[sample]
					samplestodelete.append( sample )
				except KeyError:
					# Protecting the case where the pre-defined
					# samples aren't there (for instance WJets_Madgraph
					# in the 'Other' metasample)
					pass
			# See if we have any of the samples to merge, if not then 
			# it has no sense going on
			if len(samplestodelete) == 0:
				# Do not incorporate the metasample to the table
				self.columns.pop(metasample)
				# And do nothing else
				continue
			# Put the title
			self.columns[metasample].title = metasample
			# Incorporates the metasample to the global TITLEDICT
			TITLEDICT[metasample] = metasample
			# Erase the samples merged 
			for s2remove in samplestodelete:
				self.columns.pop(s2remove)
				index=self.columntitles.index(s2remove)
				self.columntitles.remove(s2remove)

			# And add to the columntitles data member if not in there
			if not metasample in self.columntitles:
				self.columntitles.insert(index,metasample)
		#2_1) Subtract some samples
		for metasample in subtractmeta:
			samplestodelete = []
			for sample in self.getsamplecomponents(metasample):
				try:
					self.columns[metasample] -= self.columns[sample]
					if sample not in keepforce:
						samplestodelete.append( sample )
				except KeyError:
					# Protecting the case where the pre-defined
					# samples aren't there (for instance WJets_Madgraph
					# in the 'Other' metasample)
					pass
			# Erase the samples merged 
			for s2remove in samplestodelete:
				self.columns.pop(s2remove)
				index=self.columntitles.index(s2remove)
				self.columntitles.remove(s2remove)

			# And add to the columntitles data member if not in there
			if not metasample in self.columntitles:
				self.columntitles.insert(index,metasample)
		# format specific
		self.format = format()
		if formatprov:
			self.setformat(formatprov)

Esempio n. 16

0

Mostra file

File: printtable.py Progetto: duartej/AnalysisVH

	def getMSvalerr(self, cut,sample):
		""".. function::getMSvalerr( cut, sample ) -> valueanderror

		Extract the value and the error, given the cut and the sample name. 
		The function will find the format to return the value using only the most 
		significant decimal numbers depending on how is its error. The 
		error will be modified following the below rules:
		 - if the error is >= 1.5, then the error is an integer, same as
		 the value
		 - if the error is between 1.0 and 1.5, the error is taken with
		 two decimals 
		 - if the error is < 1.0, then return 

		:param cut: cut name
		:type  cut: str
		:param sample: sample name
		:type  sample: str

		:return: the value and its error 
		:rtype : (str,str)
		"""
		from math import sqrt
		from functionspool_mod import getvalpluserr

		# Dealing with the TotBkg sample which has to be built 
		if sample == "TotBkg":
			val = 0.0
			err2 = 0.0
			for s in filter(lambda x: x != self.data and \
					x != self.signalmc and \
					x != self.signal and x != "TotBkg" and \
					x != "Data-TotBkg",self.samples):
				(v,e) = self.columns[s].getvalue(cut)
				val += v
				err2 += e**2.0
			err = sqrt(err2)
			try:
				self.columns["TotBkg"].rowvaldict[cut] = (val,err)
			except KeyError:
				# Creating the column
				self.columns["TotBkg"] = processedsample("",nobuilt=True)
				# Inititalizing dict and all the other needed data members
				self.columns["TotBkg"].rowvaldict= { cut: (val,err) }
				self.columns["TotBkg"].cutordered = self.columns[self.data].cutordered

		# extracting the values
		try:
			val,err=self.columns[sample].getvalue(cut)
		except KeyError:
			# It's substraction data - TotBkg columns
			if sample == "Data-TotBkg":
				# Note that as it has been extract by order, Data and total background
				(valdata,errdata) = self.columns[self.data].getvalue(cut)
				(valbkg,errbkg)   = self.columns["TotBkg"].getvalue(cut)
				val = valdata-valbkg
				err = sqrt(errdata**2.0+errbkg**2.0)

		valstr,errstr = getvalpluserr(val,err)
		totalvalstr = valstr+self.format.plusminus+errstr
	
		return totalvalstr