def blind(self,bmin=None,bmax=None,**kwargs): """Return selection string that blinds some window (bmin,bmax), making sure the cuts match the bin edges of some (nbins,xmin,xmax) binning.""" verbosity = LOG.getverbosity(self,kwargs) if bmin==None: bmin = self.blindcuts[0] if bmax<bmin: bmax, bmin = bmin, bmax LOG.insist(bmax>bmin,'Variable.blind: "%s" has window a = %s <= %s = b !'%(self._name,bmin,bmax)) blindcut = "" xlow, xhigh = bmin, bmax nbins, xmin, xmax = self.nbins, self.min, self.max if self.hasvariablebins(): bins = self.bins for xval in bins: if xval>bmin: break xlow = xval for xval in reversed(bins): if xval<bmax: break xhigh = xval else: binwidth = float(xmax-xmin)/nbins if xmin<bmin<xmax: bin, rem = divmod(bmin-xmin,binwidth) xlow = bin*binwidth if xmin<bmax<xmax: bin, rem = divmod(bmax-xmin,binwidth) if rem>0: bin += 1 xhigh = bin*binwidth blindcut = "(%s<%s || %s<%s)"%(self.name,xlow,xhigh,self.name) LOG.verb('Variable.blind: blindcut = "%s" for a (%s,%s) window and (%s,%s,%s) binning'%(blindcut,bmin,bmax,nbins,xmin,xmax),verbosity,2) return blindcut
def match(terms, labels, **kwargs): """Match given search terms (strings) to some given list of labels.""" verbosity = LOG.getverbosity(kwargs) terms = ensurelist(terms, nonzero=True) # search terms labels = ensurelist(labels, nonzero=True) # labels to match to found = True regex = kwargs.get('regex', False) # use regexpr patterns (instead of glob) incl = kwargs.get( 'incl', True ) # match at least one term; if incl=False ("exclusive"), match every term start = kwargs.get('start', False) # match only beginning of string LOG.verb( "match: compare labels=%s -> searchterms=%s (incl=%s,regex=%s)" % (labels, terms, incl, regex), verbosity, 3) if not terms: return False for i, searchterm in enumerate(terms): if not searchterm: continue if not regex: # convert glob to regexp #fnmatch.translate( '*.foo' ) #searchterm = re.sub(r"(?<!\\)\+",r"\+",searchterm) # replace + with \+ #searchterm = re.sub(r"([^\.])\*",r"\1.*",searchterm) # replace * with .* searchterm = re.escape(searchterm).replace(r'\?', '.').replace( r'\*', '.*?').replace(r'\^', '^') if start and not searchterm.startswith('^'): searchterm = '^' + searchterm terms[i] = searchterm if incl: # inclusive: match only one search term for searchterm in terms: for label in labels: matches = re.findall(searchterm, label) if matches: LOG.verb( " matched %r -> %r; return True" % (label, searchterm), verbosity, 3) return True # one search terms is matched else: LOG.verb(" not matched %r -> %r" % (label, searchterm), verbosity, 3) return False # no search term was matched else: # exclusive: match all search terms for searchterm in terms: for label in labels: matches = re.findall(searchterm, label) if matches: LOG.verb(" matched %r -> %r" % (label, searchterm), verbosity, 3) break else: LOG.verb(" not matched %r -> %r" % (label, searchterm), verbosity, 3) else: return False # this search terms was not matched return True # all search terms were matched
def addoverflow(self,**kwargs): """Modify variable name in order to add the overflow to the last bin.""" verbosity = LOG.getverbosity(self,kwargs) if self.hasvariablebins(): width = self.bins[-1]-self.bins[-2] threshold = self.bins[-2] + 0.90*width else: width = (self.max-self.min)/float(self.nbins) threshold = self.max - 0.90*width self.name = "min(%s,%s)"%(self._name,threshold) LOG.verb("Variable.addoverflow: '%s' -> '%s' for binning '%s'"%(self._name,self.name,self.getbins()),verbosity,2) return self.name
def filtervars(vars,filters,**kwargs): """Filter list of variables. Allow glob patterns.""" verbosity = LOG.getverbosity(kwargs) newvars = [ ] if not filters: return vars[:] for var in vars: if any(match(f,[var.name,var.filename]) for f in filters): newvars.append(var) LOG.verb("filtervars: Matched %r, including..."%(var),verbosity,2) else: LOG.verb("filtervars: No match to %r, ignoring..."%(var),verbosity,2) return newvars #from TauFW.Plotter.plot.Selection import Selection
def setbins(self,*args): """Set binning: (N,min,max), or bins if it is set""" LOG.verb('Variable.setbins: setting binning to %s'%(args,),level=2) numbers = [a for a in args if isnumber(a)] bins = [a for a in args if islist(a) ] if len(numbers)==3: self.nbins = numbers[0] self.min = numbers[1] self.max = numbers[2] self.bins = None elif len(bins)>0: edges = list(bins[0]) self.nbins = len(edges)-1 self.min = edges[0] self.max = edges[-1] self.bins = edges else: LOG.throw(IOError,'Variable: bad arguments "%s" for binning!'%(args,))
def plotfor(self, variable, **kwargs): """Check is variable is vetoed for this variable.""" verbosity = LOG.getverbosity(kwargs) if not isinstance(variable, str): variable = variable.name for searchterm in self.veto: if re.search(searchterm, variable): LOG.verb("Variable.plotFor: Regex match of variable %r to %r" % (variable, searchterm), verbosity, level=2) return False for searchterm in self.only: if re.search(searchterm, variable): LOG.verb("Variable.plotFor: Regex match of variable %r to %r" % (variable, searchterm), verbosity, level=2) return True return len(self.only) == 0
def createinputs(fname, sampleset, observables, bins, **kwargs): """Create histogram inputs in ROOT file for datacards. fname: filename pattern of ROOT file sampleset: SampleSet object observables: list of Variables objects bins: list of Selection objects """ #LOG.header("createinputs") outdir = kwargs.get('outdir', "") tag = kwargs.get('tag', "") # file tag htag = kwargs.get('htag', "") # hist tag for systematic filters = kwargs.get('filter', None) # only create histograms for these processes vetoes = kwargs.get('veto', None) # veto these processes parallel = kwargs.get('parallel', True) # MultiDraw histograms in parallel recreate = kwargs.get('recreate', False) # recreate ROOT file replaceweight = kwargs.get('replaceweight', None) # replace weight extraweight = kwargs.get('weight', "") # extraweight shiftQCD = kwargs.get('shiftQCD', 0) # e.g 0.30 for 30% verbosity = kwargs.get('verb', 0) option = 'RECREATE' if recreate else 'UPDATE' method = 'QCD_OSSS' if filters == None or 'QCD' in filters else None method = kwargs.get('method', method) # FILE LOGISTICS: prepare file and directories files = {} ensuredir(outdir) fname = os.path.join(outdir, fname) for obs in observables: obsname = obs.filename ftag = tag + obs.tag fname_ = repkey(fname, OBS=obsname, TAG=tag) file = TFile.Open(fname_, option) if recreate: print ">>> created file %s" % (fname_) for selection in bins: if not obs.plotfor(selection): continue obs.changecontext(selection) ensureTDirectory(file, selection.filename, cd=True, verb=verbosity) if recreate: string = joincuts(selection.selection, obs.cut) TNamed("selection", string).Write( ) # write exact selection string to ROOT file for the record / debugging #TNamed("weight",sampleset.weight).Write() LOG.verb( "%s selection %r: %r" % (obsname, selection.name, string), verbosity, 1) files[obs] = file # GET HISTS for selection in bins: bin = selection.filename # bin name print ">>>\n>>> " + color( " %s " % (bin), 'magenta', bold=True, ul=True) if htag: print ">>> systematic uncertainty: %s" % (color( htag.lstrip('_'), 'grey')) if recreate or verbosity >= 1: print ">>> %r" % (selection.selection) hists = sampleset.gethists(observables, selection, method=method, split=True, parallel=parallel, filter=filters, veto=vetoes) # SAVE HIST ljust = 4 + max(11, len(htag)) # extra space TAB = LOG.table("%10.1f %10d %-18s %s") TAB.printheader('events', 'entries', 'variable', 'process'.ljust(ljust)) for obs, hist in hists.iterhists(): name = lreplace(hist.GetName(), obs.filename).strip( '_') # histname = $VAR_$NAME (see Sample.gethist) if not name.endswith(htag): name += htag # HIST = $PROCESS_$SYSTEMATIC name = repkey(name, BIN=bin) drawopt = 'E1' if 'data' in name else 'EHIST' lcolor = kBlack if any( s in name for s in ['data', 'ST', 'VV']) else hist.GetFillColor() hist.SetOption(drawopt) hist.SetLineColor(lcolor) hist.SetFillStyle(0) # no fill in ROOT file hist.SetName(name) hist.GetXaxis().SetTitle(obs.title) for i, yval in enumerate(hist): if yval < 0: print ">>> replace bin %d (%.3f<0) of %r" % ( i, yval, hist.GetName()) hist.SetBinContent(i, 0) files[obs].cd(bin) # $FILE:$BIN/$PROCESS_$SYSTEMATC hist.Write(name, TH1.kOverwrite) TAB.printrow(hist.GetSumOfWeights(), hist.GetEntries(), obs.printbins(), name) deletehist(hist) # clean memory # CLOSE for obs, file in files.iteritems(): file.Close()