def datamcplotter(self,variable): """Run the script that makes the figure and adjust parameters.""" self.var = variable self._leading = False if '[' in self.var: self._leading = True self.var,self.var_entry = brackets(self.var) ## -- for pre-selection, skip variables that aren't defined if self.var in self.notforpre and 'pre' in self.p_jsonfilename: return ## -- Set the output file ana_status_label = self.p_ana_status.replace(' ','_') outfile = "{0}{1}_{2}_{3}{4}_{5}".format(self.pathSave,self.var,self.p_jsonfilename,ana_status_label,self.p_extra_saveAs,self.timeStamp) ## -- Load our data! pathData = info.getJsonPath()+"{0}/{1}_{2}.json".format(self.p_lepton,self.var,self.p_jsonfilename) json_data = json.load(open(pathData)) ## -- Initializing various bin related arrays self.plotBins = self.plot_keys['variables'][self.var]['bins'] # Binning for this histogram plotBins_array = np.asarray(self.plotBins) bins_mp = 0.5*(plotBins_array[:-1]+plotBins_array[1:]) # midpoint in bins bin_widths = plotBins_array[1:]-plotBins_array[:-1] # bin widths half_bin_widths = bin_widths/2. # half of bin widths (xerr) ## -- Plot only desired signals ## Recommended format = 'TTS_M800','TTS_M1400',etc. (csv format) try: if int(self.p_plot_signal) < 0: # self.p_plot_signal = -1 (or some other negative number to mean 'ALL') signal = self.physics_samples['signal'] else: # self.p_plot_signal = 1400 (or something to plot a single mass point) signal = [i for i in self.physics_samples['signal'] if self.p_plot_signal in i] except: # self.p_plot_signal = [comma separated values (works for >= 0 entries)] signal = self.p_plot_signal.split(',') ## ## ## -- Setting up figure -- ## ## ## self.x_label = self.plot_keys['variables'][self.var]['label'] self.x_min = self.plotBins[0] # horizontal axis scale self.x_max = self.plotBins[-1] self.y1min = 0. # set the minimum of the y-axis (for logplots, see 'plot_prediction()') py_samples = [i for i in json_data.keys() if '_weight' in i] # just grabbing what's in the json file py_samples = [i.split('_weight')[0] for i in py_samples] ## plotting all samples in one command, need lists of everything to do that self.entry_values = [] self.entry_scales = [] self.entry_labels = [] self.entry_colors = [] ## plotting uncertainty bands fill_between_bins = np.asarray(self.plotBins) ## for plotting hatch uncertainty fill_between_bins = [self.plotBins[0]]+list(fill_between_bins[1:-1].repeat(2))+[self.plotBins[-1]] if self.p_blind: ## No data ## fig, self.ax1 = plt.subplots(figsize=(10,8)) if self.p_logplot: self.ax1.set_yscale('log') self.plot_prediction(py_samples,json_data) self.plot_signal(py_samples,json_data,signal) ## Set the axis properties of the main x-axis self.config_xaxis(self.ax1) self.get_uncertainties() self.y1max = max(self.totpred)/0.82 data_height = [0. for i in self.totpred] else: ## Data ## fig = plt.figure(figsize=(10,8)) gs = gridspec.GridSpec(2,1,height_ratios=[3,1],hspace=0.0) self.ax1 = fig.add_subplot(gs[0]) self.ax2 = fig.add_subplot(gs[1],sharex=self.ax1) plt.setp(self.ax1.get_xticklabels(),visible=False) if self.p_logplot: self.ax1.set_yscale('log') data_label = 'Data' data_color = 'black' data_values = json_data['data'] ## For vectors (e.g., jet_pt; there is >=1 jet per event) if type(data_values[0])==list: if self._leading: data_values = [i[self.var_entry] for i in data_values] else: data_values = list(itertools.chain(*data_values)) else: ## catch possible issues here (not putting this with MC, because ## it should be caught here) if self._leading: try: data_values = [i[self.var_entry] for i in data_values] except TypeError: print print " Cannot access a pT-sorted value for object " print " that is not stored in vector." print " If you're trying to access an object that " print " should be stored as a vector and isn't, " print " please contact the author." print " If you're trying to access an object that " print " isn't stored as a vector and shouldn't be, " print " please fix your error. " print print " Continuing to next variable. " print return ## Plot the data points as 'error bars' (circles with error bars) ## Check if we want underflow and/or overflow if not self.p_underflow and not self.p_overflow: d_hist,bins = np.histogram(data_values,self.plotBins) data_hist = np.asarray([i if i else float('NaN') for i in d_hist]) data_err = np.sqrt(data_hist) data_height = data_hist+data_err # for scaling the y-axis on the plot self.ax1.errorbar(bins_mp,data_hist,yerr=data_err,capsize=0,fmt='o', c=data_color,label=data_label,zorder=100) else: ## To get underflow/overflow in python, we need to define the histograms ## first with numpy, then plot them in matplotlib (after adding the ## underflow/overflow values to first/last bins. ## For data, we can use the histogram we already made (because this is ## not plotted with ax.hist(), but as error bars instead. d_hist,bins = np.histogram(data_values,self.plotBins) if self.p_underflow: underflow = self.getUnderflow(data_values,[]) d_hist[0] += underflow if self.p_overflow: overflow = self.getOverflow(data_values,[]) d_hist[-1] += overflow data_hist = np.asarray([i if i else float('NaN') for i in d_hist]) data_err = np.sqrt(data_hist) data_height = data_hist+data_err # for scaling the y-axis on the plot ## Now make the plot. Use the numpy histogram output as the weights ## so that the histogram function still works ## Use the binning as a proxy for 'data' so that we get 1 entry per bin ## and the new values for weights scale the hist appropriately self.ax1.errorbar(bins_mp,data_hist,yerr=data_err,capsize=0,fmt='o', c=data_color,label=data_label,zorder=100) ## Now plot the prediction and signal samples self.plot_prediction(py_samples,json_data) self.plot_signal(py_samples,json_data,signal) self.get_uncertainties() ## ## ## Residual plotting (the Data/MC ratio) ## ## ## self.y2min = 0.5 self.y2max = 1.5 self.y1max = max([max(self.totpred), np.nanmax(data_height)])/0.82 # Using 'np.nanmax' here because the 'data_height' array may contain # float('NaN') values. In that case, the built-in method 'max' doesn't # interpret float('NaN') as 0 (or as any number). ## Residual Values (data/prediction subplot) self.resid_unc['total']['up'] = list(((self.totpred+self.unc['total']['up'])/self.totpred).repeat(2)) self.resid_unc['total']['dn'] = list(((self.totpred-self.unc['total']['dn'])/self.totpred).repeat(2)) residual = deepcopy( data_hist / self.totpred ) residual_err = deepcopy( data_err / self.totpred ) self.ax2.errorbar(bins_mp,residual,yerr=residual_err,xerr=half_bin_widths,\ capsize=0,fmt='o',c='black',zorder=100) ## Simulation Uncertainties self.ax2.fill_between(fill_between_bins,\ self.resid_unc['total']['dn'],\ self.resid_unc['total']['up'],\ **self.p_hatch_args) ## labels, legends, and text ## self.ax2.axhline(y=1,ls='--',c='k',zorder=1) self.config_xaxis(self.ax2) ## Set the axis properties of the ratio y-axis if any('qcd' in b.lower() for b in self.background): y_ratio_label = "Data/Pred." else: y_ratio_label = "Data/MC" self.ax2.set_ylim(ymin=self.y2min,ymax=self.y2max) self.ax2.set_yticks(np.asarray([0.6,1.0,1.4])) self.ax2.set_yticklabels(self.ax2.get_yticks(),fontProperties,fontsize=self.label_size) self.ax2.set_ylabel(y_ratio_label,fontsize=self.label_size,ha='center',va='bottom') ## Set the axis properties of the main y-axis ax1_unc_bottom = [0.] # for checking later (in case there is no uncertainty drawn) ax1_unc_height = [0.] # for checking later if self.draw_ax1_unc: ax1_unc_bottom = list( np.asarray( self.totpred-self.unc['total']['dn'] ).repeat(2) ) ax1_unc_height = list( np.asarray( self.totpred+self.unc['total']['up'] ).repeat(2) ) self.ax1.fill_between(fill_between_bins,\ ax1_unc_bottom,\ ax1_unc_height,\ zorder=200,\ **self.p_hatch_args) # need to cover up the uncertainty band (if it's the green band) with black line for histogram # if p_format=='eps': # dummy_hist, d_bins, d_patches = ax1.hist(entry.var_vals, bins=plot_bins, weights = entry.scale_factors, # histtype = 'step', bottom = bottom_edge, # color = 'k', zorder = 999) self.y1max = max(self.y1max, max(ax1_unc_height)/0.82) ## the rest is common to both kinds of plots, so put it at the end ## If the bin width is 10, we don't want 10.0. If it's a float, ## we only want to keep the first 2 decimal points bin_width_ = str(min(bin_widths)).split('.') if len(bin_width_)>1: if bin_width_[1][0]=='0': bin_width_ = bin_width_[0] else: if len(bin_width_[1])>1: bin_width_ = "{0:.2f}".format(min(bin_widths)) else: bin_width_ = "{0:.1f}".format(min(bin_widths)) else: bin_width_ = bin_width_[0] unit_tag = '' if self.plot_keys['variables'][self.var]['gev']: unit_tag = ' GeV' y_main_label = "Events/"+bin_width_+unit_tag self.ax1.yaxis.get_label().set_position((0,1)) self.ax1.set_ylabel(y_main_label,fontsize=self.label_size,ha='right',va='bottom') ax1_legend = self.ax1.legend(numpoints=1, fontsize=self.leg_txtsize, ncol=2, columnspacing=0.) ax1_legend.draw_frame(False) ## The following is a hack to change the figure height based on the legend ## (so the plot doesn't interfere with the legend). This is difficult ## because the legend is drawn at the very end -- when the plot is made: ## Using the assumption that size 22 font is approximately 0.08 (use 0.10) units tall ## to get the height of the legend, and then determing what gives the larger axis handles,labels = self.ax1.get_legend_handles_labels() nrows = len(handles)/ax1_legend._ncol + len(handles)%ax1_legend._ncol # approximately number of rows in legend legend_height = float(self.leg_txtsize)/self.atlas_size * 0.10 * nrows legend_ax1ymax = max( max(ax1_unc_height), max(self.totpred), np.nanmax(data_height) )/(1-legend_height) text_heights = [legend_ax1ymax,self.y1max] # compare this new value # (height based on legend) with # height based on 'ATLAS' label self.config_yaxis(self.ax1,self.y1min,max(text_heights)) # configure the y-axis self.plot_text() print " Saving file as: {0}.{1}\n".format(outfile,self.p_format) plt.savefig(outfile+'.'+self.p_format,bbox_inches='tight',format=self.p_format,dpi=300) plt.close() return
def execute(self): """ Plot the variables! This is currently designed so that you can plot multiple variables on the same plot, but they need the same binning. You can also plot different samples against each other (each entry in self.data represents a single 'sample' -- ttbar, w+jets, TTS, etc.) """ if self.file_type=='json': self.data = [json.load(open(input_data,'r')) for input_data in self.inputfilenames] else: self.data = [ROOT.TFile.Open(input_data) for input_data in self.inputfilenames] ## -- 2D plot if self.p_2dvariables: self.p_2dvariables = self.p_2dvariables.split('/') # split figures by '/' for va,variable in enumerate(self.p_2dvariables): # -- Making separate figure for these variables self.fig,self.ax = plt.subplots(figsize=(10,8)) xvar,yvar = variable.split(',') self._leading = {'x':False,'y':False} if '[' in xvar: self._leading['x'] = True xvar,self.xvar_entry = brackets(xvar) if '[' in yvar: self._leading['y'] = True yvar,self.yvar_entry = brackets(yvar) ttree_xvar = self.PMA_varname2ttreename(xvar) self.x_bins = self.plot_keys['variables'][ttree_xvar]['bins'] self.x_label = self.plot_keys['variables'][ttree_xvar]['label'] self.x_min = self.x_bins[0] self.x_max = self.x_bins[-1] ttree_yvar = self.PMA_varname2ttreename(yvar) self.y_bins = self.plot_keys['variables'][ttree_yvar]['bins'] self.y_label = self.plot_keys['variables'][ttree_yvar]['label'] self.y_min = self.y_bins[0] self.y_max = self.y_bins[-1] # Loop over the self.data vector to plot each sample data2d = self.PMA_plot_2d(variable) # Done making plot, setup the axes self.config_xaxis(self.ax) self.config_yaxis() # Save this figure outputfilename = '{0}_{1}_{2}_{3}.{4}'.format(xvar,yvar,self.p_lepton,self.p_extra_save,self.format) self.finalize(outputfilename) ## -- 1D plot if self.p_1dvariables: self.p_1dvariables = self.p_1dvariables.split('/') # split variables in different plot by '/' for variable in self.p_1dvariables: # -- Making separate figure for these variables self.fig,self.ax = plt.subplots(figsize=(10,8)) vars = variable.split(',') # split variables in same plot by ',' ## -- Initialize 'default' color scheme to be as aesthetic as possible ## Prefer 'red' and 'blue' if there are just 2 things to plot if not self.colors: self.colors = [ ['red','blue'], self.PMA_plot_colors() ][len(vars)!=2] ## -- Make a default y-axis label if not self.y_label: self.y_label = ['Arbitrary Units','Events'][self.p_scalefactor] ## -- Add each variable in the plot (e.g., comparing pTs or something) for v,var in enumerate(vars): self._leading = False if '[' in var: self._leading = True var,self.var_entry = brackets(var) ttree_name = self.PMA_varname2ttreename(var) # convert object name to ttree name self.x_bins = self.plot_keys['variables'][ttree_name]['bins'] self.x_label = self.plot_keys['variables'][ttree_name]['label'] self.x_min = self.x_bins[0] self.x_max = self.x_bins[-1] ## -- Make the plot! data1d = self.PMA_plot_1d(v,var) self.y_max = self.maxpred/0.82 if self.p_logplot: minimum = np.min(self.totpred[0][self.totpred[0]>0])*0.1 logplot_ymin = 0.01 # cut-off bottom of y-axis self.y_min = max( [minimum,logplot_ymin] ) self.y_max = 30.*self.y_max else: self.y_min = 0. # this assumes some sort of histogram feature # where all values are > 0 -- may not be true! self.config_xaxis(self.ax) # have to pass axis because the inherited function needs it self.config_yaxis() self.PMA_draw_legend() # Save this figure outputfilename = '{0}_{1}_{2}'.format(var,self.p_lepton,self.p_extra_save) self.finalize(outputfilename.replace('.','_')+'.'+self.format) return
def processObject(self,py_object): """ Process the object in the cut to determine its type and what operations are needed. - Is it a vector? (e.g., jets or lepton) - Is there a quantity? (e.g., jets.pt or jets.eta) - Is there a leading request? (e.g., jets[0] or jets[1]) - Is it a single object? (e.g., lepton or neutrino 'nu') """ isSingle = False isVector = False hasQuantity = False quantity = None isLeading = False leading_index = None name = py_object if py_object.startswith('truth'): # first attempt at truth-level selections # future support for particle-level?? isSingle = True name = py_object else: if '.' in py_object: # Quantity (can have leading, vectors, or single) hasQuantity = True name,quantity = py_object.split('.') # check for leading with quantity if quantity.endswith(']'): isSingle = True isLeading = True quantity,leading_index = quantity.split('[') # pt[0] -> 'pt','0]' leading_index = leading_index.rstrip(']') # check for vectors with quantity elif name in self.vector_objects: isVector = True # no other choices (just lepton, nu, met, etc.) else: isSingle = True elif py_object.endswith(']'): # Leading! (no quantity and no vectors) name = py_object.split('[')[0] isSingle = True isLeading = True leading_index = config.brackets(py_object) elif py_object in self.vector_objects: # Vectors with no quantity (and no leading) isVector = True name = py_object else: isSingle = True name = py_object evtObject = {'name': name,\ 'isSingle': isSingle,\ 'isVector': isVector,\ 'hasQuantity': hasQuantity,\ 'quantity': quantity,\ 'isLeading': isLeading,\ 'leading_index':leading_index} return evtObject