Example #1
0
    def datamcplotter(self,variable):
        """Run the script that makes the figure and adjust parameters."""
        self.var = variable

        self._leading = False
        if '[' in self.var:
            self._leading           = True
            self.var,self.var_entry = brackets(self.var)

        ## -- for pre-selection, skip variables that aren't defined
        if self.var in self.notforpre and 'pre' in self.p_jsonfilename:
            return

        ## -- Set the output file
        ana_status_label = self.p_ana_status.replace(' ','_')
        outfile    = "{0}{1}_{2}_{3}{4}_{5}".format(self.pathSave,self.var,self.p_jsonfilename,ana_status_label,self.p_extra_saveAs,self.timeStamp)
        ## -- Load our data!
        pathData  = info.getJsonPath()+"{0}/{1}_{2}.json".format(self.p_lepton,self.var,self.p_jsonfilename)
        json_data = json.load(open(pathData))

        ## -- Initializing various bin related arrays
        self.plotBins   = self.plot_keys['variables'][self.var]['bins']  # Binning for this histogram
        plotBins_array  = np.asarray(self.plotBins)
        bins_mp         = 0.5*(plotBins_array[:-1]+plotBins_array[1:]) # midpoint in bins
        bin_widths      = plotBins_array[1:]-plotBins_array[:-1]       # bin widths
        half_bin_widths = bin_widths/2.                                # half of bin widths (xerr)


        ## -- Plot only desired signals
        ##    Recommended format = 'TTS_M800','TTS_M1400',etc. (csv format)
        try:
            if int(self.p_plot_signal) < 0:
                # self.p_plot_signal = -1  (or some other negative number to mean 'ALL')
                signal = self.physics_samples['signal']
            else:
                # self.p_plot_signal = 1400 (or something to plot a single mass point)
                signal = [i for i in self.physics_samples['signal'] if self.p_plot_signal in i]
        except:
            # self.p_plot_signal = [comma separated values (works for >= 0 entries)]
            signal = self.p_plot_signal.split(',')

        ##                         ##
        ## -- Setting up figure -- ##
        ##                         ##
        self.x_label = self.plot_keys['variables'][self.var]['label']
        self.x_min   = self.plotBins[0]   # horizontal axis scale
        self.x_max   = self.plotBins[-1]
        self.y1min   = 0.   # set the minimum of the y-axis (for logplots, see 'plot_prediction()')

        py_samples = [i for i in json_data.keys() if '_weight' in i] # just grabbing what's in the json file
        py_samples = [i.split('_weight')[0] for i in py_samples]

        ## plotting all samples in one command, need lists of everything to do that
        self.entry_values = []
        self.entry_scales = []
        self.entry_labels = []
        self.entry_colors = []

        ## plotting uncertainty bands
        fill_between_bins = np.asarray(self.plotBins)  ## for plotting hatch uncertainty
        fill_between_bins = [self.plotBins[0]]+list(fill_between_bins[1:-1].repeat(2))+[self.plotBins[-1]]

        if self.p_blind:
            ## No data ##
            fig, self.ax1 = plt.subplots(figsize=(10,8))

            if self.p_logplot:
                self.ax1.set_yscale('log')
            self.plot_prediction(py_samples,json_data)
            self.plot_signal(py_samples,json_data,signal)

            ## Set the axis properties of the main x-axis
            self.config_xaxis(self.ax1) 

            self.get_uncertainties()
            self.y1max  = max(self.totpred)/0.82
            data_height = [0. for i in self.totpred]

        else:
            ##  Data  ##
            fig = plt.figure(figsize=(10,8))
            gs  = gridspec.GridSpec(2,1,height_ratios=[3,1],hspace=0.0)

            self.ax1 = fig.add_subplot(gs[0])
            self.ax2 = fig.add_subplot(gs[1],sharex=self.ax1)

            plt.setp(self.ax1.get_xticklabels(),visible=False)
            if self.p_logplot: 
                self.ax1.set_yscale('log')

            data_label  = 'Data'
            data_color  = 'black'
            data_values = json_data['data']
            ## For vectors (e.g., jet_pt; there is >=1 jet per event)
            if type(data_values[0])==list:
                if self._leading:
                    data_values = [i[self.var_entry] for i in data_values]
                else:
                    data_values = list(itertools.chain(*data_values))
            else:
                ## catch possible issues here (not putting this with MC, because
                ## it should be caught here)
                if self._leading:
                    try:
                        data_values = [i[self.var_entry] for i in data_values]
                    except TypeError:
                        print
                        print " Cannot access a pT-sorted value for object "
                        print " that is not stored in vector."
                        print " If you're trying to access an object that "
                        print " should be stored as a vector and isn't, "
                        print " please contact the author."
                        print " If you're trying to access an object that "
                        print " isn't stored as a vector and shouldn't be, "
                        print " please fix your error. "
                        print
                        print " Continuing to next variable. "
                        print

                        return


            ## Plot the data points as 'error bars' (circles with error bars)
            ## Check if we want underflow and/or overflow
            if not self.p_underflow and not self.p_overflow:
                d_hist,bins = np.histogram(data_values,self.plotBins)
                data_hist   = np.asarray([i if i else float('NaN') for i in d_hist])
                data_err    = np.sqrt(data_hist)
                data_height = data_hist+data_err  # for scaling the y-axis on the plot

                self.ax1.errorbar(bins_mp,data_hist,yerr=data_err,capsize=0,fmt='o', 
                                  c=data_color,label=data_label,zorder=100)
            else:
                ## To get underflow/overflow in python, we need to define the histograms
                ## first with numpy, then plot them in matplotlib (after adding the 
                ## underflow/overflow values to first/last bins.
                ## For data, we can use the histogram we already made (because this is
                ## not plotted with ax.hist(), but as error bars instead.
                d_hist,bins = np.histogram(data_values,self.plotBins)

                if self.p_underflow:
                    underflow   = self.getUnderflow(data_values,[])
                    d_hist[0]  += underflow
                if self.p_overflow:
                    overflow    = self.getOverflow(data_values,[])
                    d_hist[-1] += overflow

                data_hist   = np.asarray([i if i else float('NaN') for i in d_hist])
                data_err    = np.sqrt(data_hist)
                data_height = data_hist+data_err  # for scaling the y-axis on the plot

                ## Now make the plot.  Use the numpy histogram output as the weights
                ## so that the histogram function still works
                ## Use the binning as a proxy for 'data' so that we get 1 entry per bin
                ## and the new values for weights scale the hist appropriately
                self.ax1.errorbar(bins_mp,data_hist,yerr=data_err,capsize=0,fmt='o', 
                                  c=data_color,label=data_label,zorder=100)


            ## Now plot the prediction and signal samples
            self.plot_prediction(py_samples,json_data)
            self.plot_signal(py_samples,json_data,signal)
            
            self.get_uncertainties()

            ##                                       ##
            ## Residual plotting (the Data/MC ratio) ##
            ##                                       ##
            self.y2min = 0.5
            self.y2max = 1.5
            self.y1max = max([max(self.totpred), np.nanmax(data_height)])/0.82
            # Using 'np.nanmax' here because the 'data_height' array may contain
            # float('NaN') values.  In that case, the built-in method 'max' doesn't
            # interpret float('NaN') as 0 (or as any number).

            ## Residual Values (data/prediction subplot)
            self.resid_unc['total']['up'] = list(((self.totpred+self.unc['total']['up'])/self.totpred).repeat(2))
            self.resid_unc['total']['dn'] = list(((self.totpred-self.unc['total']['dn'])/self.totpred).repeat(2))

            residual     = deepcopy( data_hist / self.totpred )
            residual_err = deepcopy( data_err  / self.totpred )

            self.ax2.errorbar(bins_mp,residual,yerr=residual_err,xerr=half_bin_widths,\
                         capsize=0,fmt='o',c='black',zorder=100)

            ## Simulation Uncertainties
            self.ax2.fill_between(fill_between_bins,\
                                  self.resid_unc['total']['dn'],\
                                  self.resid_unc['total']['up'],\
                                  **self.p_hatch_args)   

            ## labels, legends, and text ##
            self.ax2.axhline(y=1,ls='--',c='k',zorder=1)
            self.config_xaxis(self.ax2)

            ## Set the axis properties of the ratio y-axis
            if any('qcd' in b.lower() for b in self.background):
                y_ratio_label = "Data/Pred."
            else:
                y_ratio_label = "Data/MC"
            self.ax2.set_ylim(ymin=self.y2min,ymax=self.y2max)
            self.ax2.set_yticks(np.asarray([0.6,1.0,1.4]))
            self.ax2.set_yticklabels(self.ax2.get_yticks(),fontProperties,fontsize=self.label_size)
            self.ax2.set_ylabel(y_ratio_label,fontsize=self.label_size,ha='center',va='bottom')


        ## Set the axis properties of the main y-axis
        ax1_unc_bottom = [0.]  # for checking later (in case there is no uncertainty drawn)
        ax1_unc_height = [0.]  # for checking later
        if self.draw_ax1_unc:
            ax1_unc_bottom = list( np.asarray( self.totpred-self.unc['total']['dn'] ).repeat(2) )
            ax1_unc_height = list( np.asarray( self.totpred+self.unc['total']['up'] ).repeat(2) )
            self.ax1.fill_between(fill_between_bins,\
                                  ax1_unc_bottom,\
                                  ax1_unc_height,\
                                  zorder=200,\
                                  **self.p_hatch_args)

#               need to cover up the uncertainty band (if it's the green band) with black line for histogram
#                if p_format=='eps':
#                    dummy_hist, d_bins, d_patches = ax1.hist(entry.var_vals, bins=plot_bins, weights = entry.scale_factors,
#                                                    histtype = 'step', bottom = bottom_edge,
#                                                    color = 'k', zorder = 999)


            self.y1max = max(self.y1max, max(ax1_unc_height)/0.82)

        ## the rest is common to both kinds of plots, so put it at the end
        ## If the bin width is 10, we don't want 10.0. If it's a float,
        ## we only want to keep the first 2 decimal points
        bin_width_   = str(min(bin_widths)).split('.')
        if len(bin_width_)>1:
            if bin_width_[1][0]=='0':
                bin_width_ = bin_width_[0]
            else:
                if len(bin_width_[1])>1:
                    bin_width_ = "{0:.2f}".format(min(bin_widths))
                else:
                    bin_width_ = "{0:.1f}".format(min(bin_widths))
        else:
            bin_width_ = bin_width_[0]

        unit_tag = ''
        if self.plot_keys['variables'][self.var]['gev']:
            unit_tag = ' GeV'

        y_main_label = "Events/"+bin_width_+unit_tag


        self.ax1.yaxis.get_label().set_position((0,1))
        self.ax1.set_ylabel(y_main_label,fontsize=self.label_size,ha='right',va='bottom')

        ax1_legend = self.ax1.legend(numpoints=1, fontsize=self.leg_txtsize, ncol=2, columnspacing=0.)
        ax1_legend.draw_frame(False)

        ## The following is a hack to change the figure height based on the legend
        ## (so the plot doesn't interfere with the legend).  This is difficult
        ## because the legend is drawn at the very end -- when the plot is made:
        ## Using the assumption that size 22 font is approximately 0.08 (use 0.10) units tall
        ## to get the height of the legend, and then determing what gives the larger axis
        handles,labels = self.ax1.get_legend_handles_labels()
        nrows          = len(handles)/ax1_legend._ncol + len(handles)%ax1_legend._ncol # approximately number of rows in legend
        legend_height  = float(self.leg_txtsize)/self.atlas_size * 0.10 * nrows
        legend_ax1ymax = max( max(ax1_unc_height), max(self.totpred), np.nanmax(data_height) )/(1-legend_height)
        text_heights   = [legend_ax1ymax,self.y1max]  # compare this new value 
                                                      # (height based on legend) with 
                                                      # height based on 'ATLAS' label 

        self.config_yaxis(self.ax1,self.y1min,max(text_heights))  # configure the y-axis

        self.plot_text()

        print "      Saving file as: {0}.{1}\n".format(outfile,self.p_format)
        plt.savefig(outfile+'.'+self.p_format,bbox_inches='tight',format=self.p_format,dpi=300)
        plt.close()

        return
Example #2
0
    def execute(self):
        """
        Plot the variables!
        This is currently designed so that you can plot multiple variables
        on the same plot, but they need the same binning.  You can also plot
        different samples against each other (each entry in self.data represents
        a single 'sample' -- ttbar, w+jets, TTS, etc.)
        """
        if self.file_type=='json':
            self.data = [json.load(open(input_data,'r')) for input_data in self.inputfilenames]
        else:
            self.data = [ROOT.TFile.Open(input_data) for input_data in self.inputfilenames]

        ## -- 2D plot
        if self.p_2dvariables:
            self.p_2dvariables = self.p_2dvariables.split('/') # split figures by '/'

            for va,variable in enumerate(self.p_2dvariables):
                # -- Making separate figure for these variables
                self.fig,self.ax  = plt.subplots(figsize=(10,8))
                xvar,yvar    = variable.split(',')

                self._leading = {'x':False,'y':False}
                if '[' in xvar:
                    self._leading['x']   = True
                    xvar,self.xvar_entry = brackets(xvar)
                if '[' in yvar:
                    self._leading['y']   = True
                    yvar,self.yvar_entry = brackets(yvar)
                ttree_xvar   = self.PMA_varname2ttreename(xvar)
                self.x_bins  = self.plot_keys['variables'][ttree_xvar]['bins']
                self.x_label = self.plot_keys['variables'][ttree_xvar]['label']
                self.x_min   = self.x_bins[0]
                self.x_max   = self.x_bins[-1]

                ttree_yvar   = self.PMA_varname2ttreename(yvar)
                self.y_bins  = self.plot_keys['variables'][ttree_yvar]['bins']
                self.y_label = self.plot_keys['variables'][ttree_yvar]['label']
                self.y_min   = self.y_bins[0]
                self.y_max   = self.y_bins[-1]

                # Loop over the self.data vector to plot each sample
                data2d       = self.PMA_plot_2d(variable)

                # Done making plot, setup the axes
                self.config_xaxis(self.ax)
                self.config_yaxis()

                # Save this figure
                outputfilename = '{0}_{1}_{2}_{3}.{4}'.format(xvar,yvar,self.p_lepton,self.p_extra_save,self.format)
                self.finalize(outputfilename)


        ## -- 1D plot
        if self.p_1dvariables:
            self.p_1dvariables = self.p_1dvariables.split('/') # split variables in different plot by '/'

            for variable in self.p_1dvariables:
                # -- Making separate figure for these variables
                self.fig,self.ax  = plt.subplots(figsize=(10,8))
                vars = variable.split(',')  # split variables in same plot by ','

                ## -- Initialize 'default' color scheme to be as aesthetic as possible
                ##    Prefer 'red' and 'blue' if there are just 2 things to plot
                if not self.colors:
                    self.colors = [ ['red','blue'], self.PMA_plot_colors() ][len(vars)!=2]
                ## -- Make a default y-axis label
                if not self.y_label:
                    self.y_label = ['Arbitrary Units','Events'][self.p_scalefactor]

                ## -- Add each variable in the plot (e.g., comparing pTs or something)
                for v,var in enumerate(vars):
                    self._leading = False
                    if '[' in var:
                        self._leading      = True
                        var,self.var_entry = brackets(var)

                    ttree_name   = self.PMA_varname2ttreename(var) # convert object name to ttree name
                    self.x_bins  = self.plot_keys['variables'][ttree_name]['bins']
                    self.x_label = self.plot_keys['variables'][ttree_name]['label']
                    self.x_min   = self.x_bins[0]
                    self.x_max   = self.x_bins[-1]

                    ## -- Make the plot!
                    data1d = self.PMA_plot_1d(v,var)

                    self.y_max = self.maxpred/0.82
                    if self.p_logplot:
                        minimum      = np.min(self.totpred[0][self.totpred[0]>0])*0.1
                        logplot_ymin = 0.01 # cut-off bottom of y-axis
                        self.y_min   = max( [minimum,logplot_ymin] )
                        self.y_max   = 30.*self.y_max
                    else:
                        self.y_min   = 0.   # this assumes some sort of histogram feature
                                            # where all values are > 0 -- may not be true!

                self.config_xaxis(self.ax) # have to pass axis because the inherited function needs it
                self.config_yaxis()
                self.PMA_draw_legend()

                # Save this figure
                outputfilename = '{0}_{1}_{2}'.format(var,self.p_lepton,self.p_extra_save)
                self.finalize(outputfilename.replace('.','_')+'.'+self.format)

        return
Example #3
0
    def processObject(self,py_object):
        """
        Process the object in the cut to determine its type and 
        what operations are needed.

        - Is it a vector?             (e.g., jets or lepton)
        - Is there a quantity?        (e.g., jets.pt or jets.eta)
        - Is there a leading request? (e.g., jets[0] or jets[1])
        - Is it a single object?      (e.g., lepton or neutrino 'nu')
        """
        isSingle      = False
        isVector      = False
        hasQuantity   = False
        quantity      = None
        isLeading     = False
        leading_index = None
        name          = py_object


        if py_object.startswith('truth'):
            # first attempt at truth-level selections
            # future support for particle-level??
            isSingle = True
            name     = py_object
        else:
            if '.' in py_object:
                # Quantity (can have leading, vectors, or single)
                hasQuantity   = True
                name,quantity = py_object.split('.')
                # check for leading with quantity
                if quantity.endswith(']'):
                    isSingle  = True
                    isLeading = True
                    quantity,leading_index  = quantity.split('[') # pt[0] -> 'pt','0]'
                    leading_index           = leading_index.rstrip(']')
                # check for vectors with quantity
                elif name in self.vector_objects:
                    isVector = True
                # no other choices (just lepton, nu, met, etc.)
                else:
                    isSingle = True

            elif py_object.endswith(']'):
                # Leading! (no quantity and no vectors)
                name          = py_object.split('[')[0]
                isSingle      = True
                isLeading     = True
                leading_index = config.brackets(py_object)

            elif py_object in self.vector_objects:
                # Vectors with no quantity (and no leading)
                isVector = True
                name     = py_object

            else:
                isSingle = True
                name     = py_object


        evtObject = {'name':         name,\
                     'isSingle':     isSingle,\
                     'isVector':     isVector,\
                     'hasQuantity':  hasQuantity,\
                     'quantity':     quantity,\
                     'isLeading':    isLeading,\
                     'leading_index':leading_index}

        return evtObject