def buildSignalProcessCollection(datacard_paths): datacards = [Datacard(path) for path in datacard_paths] signal_process_collection = None for card in datacards: histogram_dict = buildHistogramDictionary(card.shapeFilePath()) sig_col = processCollectionFromCard(card, histogram_dict, True) if signal_process_collection is None: signal_process_collection = sig_col else: signal_process_collection += sig_col return signal_process_collection
def MakeCard(ws, ch, tf="", signal=[], useModel=True): print "Writing datacard_%s" % ch proclist = signal + [ 'ZJets', 'WJets', 'DYJets', 'GJets', 'DiBoson', 'TTJets', 'QCD' ] ch_card = Datacard(ch, ws) ch_card.hists = getHists(ws, ch) ch_card.pdfs = getPDFs(ws, ch) tfmodel = None if "%s_%s" % (tf, ch) in ch_card.pdfs: tfmodel = ws.pdf("%s_%s" % (tf, ch)) ch_card.vars = getVars(ws, tfmodel) ch_card.setObservation(shape='data_obs_%s' % ch) for proc in proclist: AddProc(ch_card, proc, proc in signal, useModel=useModel) AddTF(ch_card, tf, useModel) ch_card.write()
def buildProcessCollectionAndData(datacard_paths): datacards = [Datacard(path) for path in datacard_paths] total_process_collection = None total_data = None for card in datacards: histogram_dict = buildHistogramDictionary(card.shapeFilePath()) #merge process collections proc_col = processCollectionFromCard(card, histogram_dict, False) if total_process_collection is None: total_process_collection = proc_col else: total_process_collection += proc_col #merge data if total_data is None: total_data = histogram_dict['data_obs'] else: total_data.Add(histogram_dict['data_obs']) return total_data, total_process_collection
def maskBins(datacard_path, bins_to_remove): datacard = Datacard(datacard_path) #back up the datacard backupDatacard(datacard) #read all histograms in the shape file into a dictionary hist_dict = buildHistogramDictionary(datacard.shapeFilePath()) #make a new histogram dictionary where the requested bins are removed new_hist_dict = {} for key in hist_dict: new_hist_dict[key] = removeBins(hist_dict[key], bins_to_remove) #determine the updated nominal yields new_yields = nominalYieldDict(new_hist_dict) #update the datacard and shape file datacard.modifyRatesAndObservation(new_yields) datacard.rewriteShapeFile(new_hist_dict)
def __init__(self, config, config_name="DC"): self.config = config self.config_name = config_name if config.has_option('LimitGeneral', 'List_for_rebinner'): self.regions = [ x.strip() for x in config.get('LimitGeneral', 'List_for_rebinner').split(',') if len(x.strip()) > 0 ] else: self.regions = [ x.strip() for x in config.get('LimitGeneral', 'List').split(',') if len(x.strip()) > 0 and config.get('dc:%s' % (x.strip()), 'type').lower() != 'cr' ] print("Convert regions:") print(", ".join(self.regions)) self.dcMakers = { region: Datacard(config=self.config, region=region) for region in self.regions } self.dfs = {region: {} for region in self.regions}
elif plot[0] == 'M3': base_card_path = base_card_path.replace('_datacard', '_mass3_datacard') base_card_path2 = base_card_path2.replace('_datacard', '_mass3_datacard') base_card_path3 = base_card_path3.replace('_datacard', '_mass3_datacard') base_card_path4 = base_card_path4.replace('_datacard', '_mass3_datacard') elif plot[0] == 'SR': base_card_path = base_card_path.replace('_datacard', '_datacard') base_card_path2 = base_card_path2.replace('_datacard', '_datacard') base_card_path3 = base_card_path3.replace('_datacard', '_datacard') base_card_path4 = base_card_path4.replace('_datacard', '_datacard') datacard_paths = ['{}_{}.txt'.format(base_card_path, y) for y in years] datacards = [Datacard(path) for path in datacard_paths] #additional signal sample to add to the plot datacard_paths2 = ['{}_{}.txt'.format(base_card_path2, y) for y in years] datacards2 = [Datacard(path) for path in datacard_paths2] datacard_paths3 = ['{}_{}.txt'.format(base_card_path3, y) for y in years] datacards3 = [Datacard(path) for path in datacard_paths3] datacard_paths4 = ['{}_{}.txt'.format(base_card_path4, y) for y in years] datacards4 = [Datacard(path) for path in datacard_paths4] ##################### central one! ########################## processCollections = [] total_data = None for i, card in enumerate(datacards): #build the backgrounds histogram_dict = buildHistogramDictionary(card.shapeFilePath()) process_list = []
def parseCard(file, options): if type(file) == type("str"): raise RuntimeError, "You should pass as argument to parseCards a file object, stream or a list of lines, not a string" ret = Datacard() ret.discretes = [] # nbins = -1 nprocesses = -1 nuisances = -1 binline = [] processline = [] sigline = [] lineNumber = None try: for lineNumber, l in enumerate(file): f = l.split() if len(f) < 1: continue if f[0] == "imax": nbins = int(f[1]) if f[1] != "*" else -1 if f[0] == "jmax": nprocesses = int(f[1]) + 1 if f[1] != "*" else -1 if f[0] == "kmax": nuisances = int(f[1]) if f[1] != "*" else -1 if f[0] == "shapes": if not options.bin: raise RuntimeError, "Can use shapes only with binary output mode" if len(f) < 4: raise RuntimeError, "Malformed shapes line" if not ret.shapeMap.has_key(f[2]): ret.shapeMap[f[2]] = {} if ret.shapeMap[f[2]].has_key(f[1]): raise RuntimeError, "Duplicate definition for process '%s', channel '%s'" % ( f[1], f[2]) ret.shapeMap[f[2]][f[1]] = f[3:] if f[0] == "Observation" or f[0] == "observation": ret.obs = [float(x) for x in f[1:]] if nbins == -1: nbins = len(ret.obs) if len(ret.obs) != nbins: raise RuntimeError, "Found %d observations but %d bins have been declared" % ( len(ret.obs), nbins) if binline != []: if len(binline) != len(ret.obs): raise RuntimeError, "Found %d bins (%s) but %d bins have been declared" % ( len(ret.bins), ret.bins, nbins) ret.bins = binline ret.obs = dict([(b, ret.obs[i]) for i, b in enumerate(ret.bins)]) binline = [] if f[0] == "bin": binline = [] for b in f[1:]: if re.match("[0-9]+", b): stderr.write( "Warning: Bin %(b)s starts with a digit. Will call it 'bin%(b)s' but this may break shapes.\n" % locals()) b = "bin" + b # TODO Here should be some patching of the shapes names in order to not get errors later. binline.append(b) if f[0] == "process": if processline == []: # first line contains names processline = f[1:] if len(binline) != len(processline): raise RuntimeError, "'bin' line has a different length than 'process' line." continue sigline = f[1:] # second line contains ids if re.match("-?[0-9]+", processline[0]) and not re.match( "-?[0-9]+", sigline[0]): (processline, sigline) = (sigline, processline) if len(sigline) != len(processline): raise RuntimeError, "'bin' line has a different length than 'process' line." hadBins = (len(ret.bins) > 0) for i, b in enumerate(binline): p = processline[i] s = (int(sigline[i]) <= 0 ) # <=0 for signals, >0 for backgrounds ret.keyline.append((b, processline[i], s)) if hadBins: if b not in ret.bins: raise RuntimeError, "Bin %s not among the declared bins %s" % ( b, ret.bins) else: if b not in ret.bins: ret.bins.append(b) if p not in ret.processes: ret.processes.append(p) if nprocesses == -1: nprocesses = len(ret.processes) if nbins == -1: nbins = len(ret.bins) if not options.noJMax: if nprocesses != len(ret.processes): raise RuntimeError, "Found %d processes (%s), declared jmax = %d" % ( len(ret.processes), ret.processes, nprocesses) if nbins != len(ret.bins): raise RuntimeError, "Found %d bins (%s), declared imax = %d" % ( len(ret.bins), ret.bins, nbins) ret.exp = dict([(b, {}) for b in ret.bins]) ret.isSignal = dict([(p, None) for p in ret.processes]) if ret.obs != [] and type( ret.obs ) == list: # still as list, must change into map with bin names ret.obs = dict([(b, ret.obs[i]) for i, b in enumerate(ret.bins)]) for (b, p, s) in ret.keyline: if ret.isSignal[p] == None: ret.isSignal[p] = s elif ret.isSignal[p] != s: raise RuntimeError, "Process %s is declared as signal in some bin and as background in some other bin" % p ret.signals = [p for p, s in ret.isSignal.items() if s == True] if len(ret.signals) == 0 and not options.allowNoSignal: raise RuntimeError, "You must have at least one signal process (id <= 0)" if f[0] == "rate": if processline == []: raise RuntimeError, "Missing line with process names before rate line" if sigline == []: raise RuntimeError, "Missing line with process id before rate line" if len(f[1:]) != len(ret.keyline): raise RuntimeError, "Malformed rate line: length %d, while bins and process lines have length %d" % ( len(f[1:]), len(ret.keyline)) for (b, p, s), r in zip(ret.keyline, f[1:]): ret.exp[b][p] = float(r) break # rate is the last line before nuisances # parse nuisances for lineNumber, l in enumerate(file): if l.startswith("--"): continue l = re.sub("\\s*#.*", "", l) l = re.sub("(?<=\\s)-+(\\s|$)", " 0\\1", l) f = l.split() if len(f) <= 1: continue nofloat = False lsyst = f[0] pdf = f[1] args = [] numbers = f[2:] if lsyst.endswith("[nofloat]"): lsyst = lsyst.replace("[nofloat]", "") nofloat = True if options.nuisancesToExclude and isVetoed( lsyst, options.nuisancesToExclude): if options.verbose > 0: stderr.write( "Excluding nuisance %s selected by a veto pattern among %s\n" % (lsyst, options.nuisancesToExclude)) if nuisances != -1: nuisances -= 1 continue if re.match("[0-9]+", lsyst): lsyst = "theta" + lsyst if pdf == "lnN" or pdf == "lnU" or pdf == "gmM" or pdf == "trG" or pdf.startswith( "shape"): pass # nothing special to do elif pdf == "gmN": args = [int(f[2])] numbers = f[3:] elif pdf == "unif": args = [float(f[2]), float(f[3])] numbers = f[4:] elif pdf == "dFD" or pdf == "dFD2": args = [float(f[2])] numbers = f[3:] elif pdf == "param": # for parametric uncertainties, there's no line to account per bin/process effects # just assume everything else is an argument and move on args = f[2:] if len(args) <= 1: raise RuntimeError, "Uncertainties of type 'param' must have at least two arguments (mean and sigma)" ret.systs.append([lsyst, nofloat, pdf, args, []]) continue elif pdf == "flatParam": ret.flatParamNuisances[lsyst] = True #for flat parametric uncertainties, code already does the right thing as long as they are non-constant RooRealVars linked to the model continue elif pdf == "discrete": args = f[2:] ret.discretes.append(lsyst) continue else: raise RuntimeError, "Unsupported pdf %s" % pdf if len(numbers) < len(ret.keyline): raise RuntimeError, "Malformed systematics line %s of length %d: while bins and process lines have length %d" % ( lsyst, len(numbers), len(ret.keyline)) errline = dict([(b, {}) for b in ret.bins]) nonNullEntries = 0 for (b, p, s), r in zip(ret.keyline, numbers): if "/" in r: # "number/number" if (pdf not in ["lnN", "lnU"]) and ("?" not in pdf): raise RuntimeError, "Asymmetric errors are allowed only for Log-normals" errline[b][p] = [float(x) for x in r.split("/")] for v in errline[b][p]: #if v <= 0.00: raise ValueError('Found "%s" in the nuisances affecting %s for %s. This would lead to NANs later on, so please fix it.'%(r,p,b)) if v <= 0.00: print( 'Found "%s" in the nuisances affecting %s for %s. This would lead to NANs later on, so please fix it.' % (r, p, b)) else: errline[b][p] = float(r) #values of 0.0 are treated as 1.0; scrap negative values. if pdf not in ["trG", "dFD", "dFD2"] and errline[b][p] < 0: raise ValueError( 'Found "%s" in the nuisances affecting %s in %s. This would lead to NANs later on, so please fix it.' % (r, p, b)) # set the rate to epsilon for backgrounds with zero observed sideband events. if pdf == "gmN" and ret.exp[b][p] == 0 and float(r) != 0: ret.exp[b][p] = 1e-6 ret.systs.append([lsyst, nofloat, pdf, args, errline]) except Exception, ex: if lineNumber != None: msg = "Error reading line %d" % (lineNumber + 1) if hasattr(file, 'name'): msg += " of file " + file.name msg += ": " + ex.args[0] ex.args = (msg, ) + ex.args[1:] raise
def parseCard(file, options): if type(file) == type("str"): raise RuntimeError, "You should pass as argument to parseCards a file object, stream or a list of lines, not a string" ret = Datacard() ret.discretes=[] # nbins = -1; nprocesses = -1; nuisances = -1; binline = []; processline = []; sigline = [] lineNumber = None try: for lineNumber,l in enumerate(file): f = l.split(); if len(f) < 1: continue if f[0] == "imax": nbins = int(f[1]) if f[1] != "*" else -1 if f[0] == "jmax": nprocesses = int(f[1])+1 if f[1] != "*" else -1 if f[0] == "kmax": nuisances = int(f[1]) if f[1] != "*" else -1 if f[0] == "shapes": if not options.bin: raise RuntimeError, "Can use shapes only with binary output mode" if len(f) < 4: raise RuntimeError, "Malformed shapes line" if not ret.shapeMap.has_key(f[2]): ret.shapeMap[f[2]] = {} if ret.shapeMap[f[2]].has_key(f[1]): raise RuntimeError, "Duplicate definition for process '%s', channel '%s'" % (f[1], f[2]) ret.shapeMap[f[2]][f[1]] = f[3:] if f[0] == "Observation" or f[0] == "observation": ret.obs = [ float(x) for x in f[1:] ] if nbins == -1: nbins = len(ret.obs) if len(ret.obs) != nbins: raise RuntimeError, "Found %d observations but %d bins have been declared" % (len(ret.obs), nbins) if binline != []: if len(binline) != len(ret.obs): raise RuntimeError, "Found %d bins (%s) but %d bins have been declared" % (len(ret.bins), ret.bins, nbins) ret.bins = binline ret.obs = dict([(b,ret.obs[i]) for i,b in enumerate(ret.bins)]) binline = [] if f[0] == "bin": binline = [] for b in f[1:]: if re.match("[0-9]+", b): stderr.write("Warning: Bin %(b)s starts with a digit. Will call it 'bin%(b)s' but this may break shapes.\n" % locals()) b = "bin"+b # TODO Here should be some patching of the shapes names in order to not get errors later. binline.append(b) if f[0] == "process": if processline == []: # first line contains names processline = f[1:] if len(binline) != len(processline): raise RuntimeError, "'bin' line has a different length than 'process' line." continue sigline = f[1:] # second line contains ids if re.match("-?[0-9]+", processline[0]) and not re.match("-?[0-9]+", sigline[0]): (processline,sigline) = (sigline,processline) if len(sigline) != len(processline): raise RuntimeError, "'bin' line has a different length than 'process' line." hadBins = (len(ret.bins) > 0) for i,b in enumerate(binline): p = processline[i]; s = (int(sigline[i]) <= 0) # <=0 for signals, >0 for backgrounds ret.keyline.append((b, processline[i], s)) if hadBins: if b not in ret.bins: raise RuntimeError, "Bin %s not among the declared bins %s" % (b, ret.bins) else: if b not in ret.bins: ret.bins.append(b) if p not in ret.processes: ret.processes.append(p) if nprocesses == -1: nprocesses = len(ret.processes) if nbins == -1: nbins = len(ret.bins) if not options.noJMax: if nprocesses != len(ret.processes): raise RuntimeError, "Found %d processes (%s), declared jmax = %d" % (len(ret.processes),ret.processes,nprocesses) if nbins != len(ret.bins): raise RuntimeError, "Found %d bins (%s), declared imax = %d" % (len(ret.bins),ret.bins,nbins) ret.exp = dict([(b,{}) for b in ret.bins]) ret.isSignal = dict([(p,None) for p in ret.processes]) if ret.obs != [] and type(ret.obs) == list: # still as list, must change into map with bin names ret.obs = dict([(b,ret.obs[i]) for i,b in enumerate(ret.bins)]) for (b,p,s) in ret.keyline: if ret.isSignal[p] == None: ret.isSignal[p] = s elif ret.isSignal[p] != s: raise RuntimeError, "Process %s is declared as signal in some bin and as background in some other bin" % p ret.signals = [p for p,s in ret.isSignal.items() if s == True] if len(ret.signals) == 0 and not options.allowNoSignal: raise RuntimeError, "You must have at least one signal process (id <= 0)" if f[0] == "rate": if processline == []: raise RuntimeError, "Missing line with process names before rate line" if sigline == []: raise RuntimeError, "Missing line with process id before rate line" if len(f[1:]) != len(ret.keyline): raise RuntimeError, "Malformed rate line: length %d, while bins and process lines have length %d" % (len(f[1:]), len(ret.keyline)) for (b,p,s),r in zip(ret.keyline,f[1:]): ret.exp[b][p] = float(r) break # rate is the last line before nuisances # parse nuisances for lineNumber,l in enumerate(file): if l.startswith("--"): continue l = re.sub("\\s*#.*","",l) l = re.sub("(?<=\\s)-+(\\s|$)"," 0\\1",l); f = l.split(); if len(f) <= 1: continue nofloat = False lsyst = f[0]; pdf = f[1]; args = []; numbers = f[2:]; if lsyst.endswith("[nofloat]"): lsyst = lsyst.replace("[nofloat]","") nofloat = True if options.nuisancesToExclude and isVetoed(lsyst, options.nuisancesToExclude): if options.verbose > 0: stderr.write("Excluding nuisance %s selected by a veto pattern among %s\n" % (lsyst, options.nuisancesToExclude)) if nuisances != -1: nuisances -= 1 continue if re.match("[0-9]+",lsyst): lsyst = "theta"+lsyst if pdf == "lnN" or pdf == "lnU" or pdf == "gmM" or pdf == "trG" or pdf.startswith("shape"): pass # nothing special to do elif pdf == "gmN": args = [int(f[2])]; numbers = f[3:]; elif pdf == "unif": args = [float(f[2]), float(f[3])]; numbers = f[4:]; elif pdf == "dFD" or pdf == "dFD2": args = [float(f[2])]; numbers = f[3:]; elif pdf == "param": # for parametric uncertainties, there's no line to account per bin/process effects # just assume everything else is an argument and move on args = f[2:] if len(args) <= 1: raise RuntimeError, "Uncertainties of type 'param' must have at least two arguments (mean and sigma)" ret.systs.append([lsyst,nofloat,pdf,args,[]]) continue elif pdf == "flatParam": ret.flatParamNuisances[lsyst] = True #for flat parametric uncertainties, code already does the right thing as long as they are non-constant RooRealVars linked to the model continue elif pdf=="discrete": args = f[2:] ret.discretes.append(lsyst) continue else: raise RuntimeError, "Unsupported pdf %s" % pdf if len(numbers) < len(ret.keyline): raise RuntimeError, "Malformed systematics line %s of length %d: while bins and process lines have length %d" % (lsyst, len(numbers), len(ret.keyline)) errline = dict([(b,{}) for b in ret.bins]) nonNullEntries = 0 for (b,p,s),r in zip(ret.keyline,numbers): if "/" in r: # "number/number" if (pdf not in ["lnN","lnU"]) and ("?" not in pdf): raise RuntimeError, "Asymmetric errors are allowed only for Log-normals" errline[b][p] = [ float(x) for x in r.split("/") ] for v in errline[b][p]: #if v <= 0.00: raise ValueError('Found "%s" in the nuisances affecting %s for %s. This would lead to NANs later on, so please fix it.'%(r,p,b)) if v <= 0.00: print ('Found "%s" in the nuisances affecting %s for %s. This would lead to NANs later on, so please fix it.'%(r,p,b)) else: errline[b][p] = float(r) #values of 0.0 are treated as 1.0; scrap negative values. if pdf not in ["trG", "dFD", "dFD2"] and errline[b][p] < 0: raise ValueError('Found "%s" in the nuisances affecting %s in %s. This would lead to NANs later on, so please fix it.'%(r,p,b)) # set the rate to epsilon for backgrounds with zero observed sideband events. if pdf == "gmN" and ret.exp[b][p] == 0 and float(r) != 0: ret.exp[b][p] = 1e-6 ret.systs.append([lsyst,nofloat,pdf,args,errline]) except Exception, ex: if lineNumber != None: msg = "Error reading line %d" % (lineNumber + 1) if hasattr(file,'name'): msg += " of file " + file.name msg += ": " + ex.args[0] ex.args = (msg, ) + ex.args[1:] raise