Exemple #1
0
    def merge_json_files(self):
        """Merge json files that are created individually."""
        print "\n   -- Merging JSON files -- \n"

        names_dict = info.physicsSamples()
        names      = names_dict['signal']+names_dict['backgrounds']+['data']
        savePath   = "{0}/{1}".format(info.getJsonPath(),self.p_lepton)
        jsonfiles  = open("share/jsonfiles2plot.txt","r").readlines()
        # jsonfiles is the saved list of all json files made

        for var in self.p_varlist_nolead:

            print " Preparing JSON file for variable ",var
            pathData    = info.getJsonPath()+"{0}/{1}_{2}.json".format(self.p_lepton,var,self.p_outfile)
            merged_data = {}

            for name in jsonfiles:
                logging.info(" Merging {0}".format(name))
                json_filename = name%(var)
                json_filename = json_filename.rstrip('\n')
                if not os.path.isfile(json_filename): 
                    logging.info(" File	{0} does not exist. ".format(json_filename))
                    continue
                temp_data  = json.load(open(json_filename))
                merged_data.update(temp_data)

            with open(pathData,'w') as outputfile:
                json.dump(merged_data, outputfile)

        return
Exemple #2
0
    def __init__(self,cfg_parser):
        """
        Initialize the parameters from the config file.
        
        @param cfg_parser   Object that parsed the configuration file
        """
        logging.getLogger('share/systematics.log')
        loggingLEVEL = logging.getLogger().getEffectiveLevel() # DEBUG, INFO, ERROR, etc.
        logging.info("")
        logging.critical(" -- In tree2hist.py")
        logging.info("  ------------  ")
        logging.info("  Initializing the config file.")

        self.GeV  = 1000.

        ## -- Configuration -- ##
        self.p_varList    = cfg_parser.get('systematics','vars')       # ex. share/varNames.txt
        self.p_rootfiles  = cfg_parser.get('systematics','inputfile')  # ex. share/systematics_ntuples.txt
        self.p_selection  = cfg_parser.get('systematics','selection')  # ex. pre2pre
        self.p_outputname = cfg_parser.get('systematics','outputname') # ex. pre
        self.p_makejsons  = config.str2bool(cfg_parser.get('systematics','make_jsons'))  # ex. True
        self.p_makehists  = config.str2bool(cfg_parser.get('systematics','make_hists'))  # ex. True
        self.p_lepton     = cfg_parser.get('systematics','lepton')     # ex. muel
        self.p_nEvents    = cfg_parser.get('systematics','nevents')    # ex. -1
        ## ------------------- ##

        self.p_varList  = open(self.p_varList,'r').readlines() # update the variable
        self.p_varList  = [v.rstrip('\n') for v in self.p_varList]

        if not self.p_makejsons and not self.p_makehists:
            print
            print " You didn't specify outputs (either json, hist, or both)"
            print " for the systematics output."
            print
            sys.exit(1)

        self.json_path = info.getJsonPath()+self.p_lepton
        self.hist_path = self.json_path.split('json')[0]+'hists/'+self.p_lepton

        ## Run the program
        self.main()

        return
Exemple #3
0
def ROOT2json(cfg_parser):
    """
    For converting data in the ROOT files to .json files for plotting.
    Increases ease-of-use for making Data/MC plots (don't have to 
    re-process the ROOT file just to make a single plot).

    @param cfg_parser     Object which parsed the configuration file
    """
    logging.getLogger('share/datamc.log')
    loggingLEVEL = logging.getLogger().getEffectiveLevel() # DEBUG, INFO, ERROR, etc.
    logging.info("")
    logging.critical(" -- In root2json.py")
    logging.info("  ------------  ")
    logging.info("  Initializing the config file.")

    ## -- Configuration -- ##
    p_varList    = cfg_parser.get('datamc','vars')         # ex. 'share/varNames.txt'
    p_inputfiles = cfg_parser.get('datamc','rootfiles')    # ex. 'share/datamc_ntuples.txt'
    p_lepton     = cfg_parser.get('datamc','lepton')       # ex. muel (both muon & electron)
    p_outfile    = cfg_parser.get('datamc','jsonfilename') # ex. 'elLHMedium_pre_A1_1tagin'
    p_nEvents    = int(cfg_parser.get('datamc','nEvents')) # ex. -1
    ## ------------------- ##

    treename   = info.treename()
    bckg_names = info.physicsSamples()['backgrounds']
    savePath   = "{0}/{1}".format(info.getJsonPath(),p_lepton)
    if not os.path.isdir(savePath):
        os.makedirs(savePath)
    logging.info("  Set the output path: {0}".format(savePath))

    ## -- Load various files of data
    inputfiles = info.read_txt_file(p_inputfiles)
    if not inputfiles:
        print
        print " ERROR: File {0} is empty (no files!) ".format(p_inputfiles)
        print
        from sys import exit
        exit(1)

    i_varList  = info.read_txt_file(p_varList)
    varList    = []
    for p_var in i_varList:
        p_var = p_var.split('[')[0]
        if p_var not in varList:
            varList.append(p_var)

    ## -- Loop over input files
    logging.info(" inputfiles = {0}".format(inputfiles))
    logged_files = {}  # keeping track if a sample has been used before
                       # the list of input files may have multiple root files
                       # for the same sample (ttbar, wjets, etc.)

    ## -- Make a simple text file that stores all of the json files we just made
    newfile = open("share/jsonfiles2plot.txt","w")
    for p in inputfiles:
        jsonData = config.AutoVivification()
        p_file   = ROOT.TFile.Open(p)
        p_tree   = p_file.Get(treename)

        p_tree.GetEntry(0) # just to get the mcChannelNumber
        name     = config.getSampleName(root_tree=p_tree,dsid=p_tree.mcChannelNumber)['name']
        # need different names from each file (otherwise different ttbar files
        # will overwrite each other!)

        ## -- load the new DataMC object
        if name not in logged_files.keys():
            entry = DataMC_Type(name)
            logged_files[name] = entry

            for var in varList:
                entry.varVals[var]      = []
                entry.scaleFactors[var] = []
                entry.lepCharges[var]   = []
                entry.lepNames[var]     = []
        else:
            entry = logged_files[name]

        print "\n   ++ Producing json file from {0}\n".format(p)
        logging.info("   ++ Running {0}".format(name))

        ## -- Attach the data (values,weights) to each DataMC object
        entry = addData.addData(entry, p_tree, varList, cfg_parser)  # Get data from ROOT

        logging.info(" Exporting data to json format.")

        ## -- Log the DataMC object in the dictionary
        ##    not sure that this is being used effectively...
        logged_files[name] = entry

        ## Save each json file now that we have looped over the file
        logging.info("  Saving json information.")

        outfile_name = '{0}/%s_{1}_{2}.json'.format(savePath,p_outfile,name)
        newfile.write("%s\n" % outfile_name)
        for var in varList:

            # put information in dictionaries to be saved to json
            jsonData[var][entry.name]             = entry.varVals[var]
            jsonData[var][entry.name+'_weight']   = entry.scaleFactors[var]
            jsonData[var][entry.name+'_lepNames'] = entry.lepNames[var]
            jsonData[var]['LUMI']                 = info.LUMI()

            print "      Saving output to {0}".format(outfile_name%(var))
            logging.info("   -- Saving output to {0}".format(outfile_name))
            with open(outfile_name%(var),'w') as outputfile:
                json.dump(jsonData[var], outputfile)

    logging.info("  End root2json.py")

    return
Exemple #4
0
    def datamcplotter(self,variable):
        """Run the script that makes the figure and adjust parameters."""
        self.var = variable

        self._leading = False
        if '[' in self.var:
            self._leading           = True
            self.var,self.var_entry = brackets(self.var)

        ## -- for pre-selection, skip variables that aren't defined
        if self.var in self.notforpre and 'pre' in self.p_jsonfilename:
            return

        ## -- Set the output file
        ana_status_label = self.p_ana_status.replace(' ','_')
        outfile    = "{0}{1}_{2}_{3}{4}_{5}".format(self.pathSave,self.var,self.p_jsonfilename,ana_status_label,self.p_extra_saveAs,self.timeStamp)
        ## -- Load our data!
        pathData  = info.getJsonPath()+"{0}/{1}_{2}.json".format(self.p_lepton,self.var,self.p_jsonfilename)
        json_data = json.load(open(pathData))

        ## -- Initializing various bin related arrays
        self.plotBins   = self.plot_keys['variables'][self.var]['bins']  # Binning for this histogram
        plotBins_array  = np.asarray(self.plotBins)
        bins_mp         = 0.5*(plotBins_array[:-1]+plotBins_array[1:]) # midpoint in bins
        bin_widths      = plotBins_array[1:]-plotBins_array[:-1]       # bin widths
        half_bin_widths = bin_widths/2.                                # half of bin widths (xerr)


        ## -- Plot only desired signals
        ##    Recommended format = 'TTS_M800','TTS_M1400',etc. (csv format)
        try:
            if int(self.p_plot_signal) < 0:
                # self.p_plot_signal = -1  (or some other negative number to mean 'ALL')
                signal = self.physics_samples['signal']
            else:
                # self.p_plot_signal = 1400 (or something to plot a single mass point)
                signal = [i for i in self.physics_samples['signal'] if self.p_plot_signal in i]
        except:
            # self.p_plot_signal = [comma separated values (works for >= 0 entries)]
            signal = self.p_plot_signal.split(',')

        ##                         ##
        ## -- Setting up figure -- ##
        ##                         ##
        self.x_label = self.plot_keys['variables'][self.var]['label']
        self.x_min   = self.plotBins[0]   # horizontal axis scale
        self.x_max   = self.plotBins[-1]
        self.y1min   = 0.   # set the minimum of the y-axis (for logplots, see 'plot_prediction()')

        py_samples = [i for i in json_data.keys() if '_weight' in i] # just grabbing what's in the json file
        py_samples = [i.split('_weight')[0] for i in py_samples]

        ## plotting all samples in one command, need lists of everything to do that
        self.entry_values = []
        self.entry_scales = []
        self.entry_labels = []
        self.entry_colors = []

        ## plotting uncertainty bands
        fill_between_bins = np.asarray(self.plotBins)  ## for plotting hatch uncertainty
        fill_between_bins = [self.plotBins[0]]+list(fill_between_bins[1:-1].repeat(2))+[self.plotBins[-1]]

        if self.p_blind:
            ## No data ##
            fig, self.ax1 = plt.subplots(figsize=(10,8))

            if self.p_logplot:
                self.ax1.set_yscale('log')
            self.plot_prediction(py_samples,json_data)
            self.plot_signal(py_samples,json_data,signal)

            ## Set the axis properties of the main x-axis
            self.config_xaxis(self.ax1) 

            self.get_uncertainties()
            self.y1max  = max(self.totpred)/0.82
            data_height = [0. for i in self.totpred]

        else:
            ##  Data  ##
            fig = plt.figure(figsize=(10,8))
            gs  = gridspec.GridSpec(2,1,height_ratios=[3,1],hspace=0.0)

            self.ax1 = fig.add_subplot(gs[0])
            self.ax2 = fig.add_subplot(gs[1],sharex=self.ax1)

            plt.setp(self.ax1.get_xticklabels(),visible=False)
            if self.p_logplot: 
                self.ax1.set_yscale('log')

            data_label  = 'Data'
            data_color  = 'black'
            data_values = json_data['data']
            ## For vectors (e.g., jet_pt; there is >=1 jet per event)
            if type(data_values[0])==list:
                if self._leading:
                    data_values = [i[self.var_entry] for i in data_values]
                else:
                    data_values = list(itertools.chain(*data_values))
            else:
                ## catch possible issues here (not putting this with MC, because
                ## it should be caught here)
                if self._leading:
                    try:
                        data_values = [i[self.var_entry] for i in data_values]
                    except TypeError:
                        print
                        print " Cannot access a pT-sorted value for object "
                        print " that is not stored in vector."
                        print " If you're trying to access an object that "
                        print " should be stored as a vector and isn't, "
                        print " please contact the author."
                        print " If you're trying to access an object that "
                        print " isn't stored as a vector and shouldn't be, "
                        print " please fix your error. "
                        print
                        print " Continuing to next variable. "
                        print

                        return


            ## Plot the data points as 'error bars' (circles with error bars)
            ## Check if we want underflow and/or overflow
            if not self.p_underflow and not self.p_overflow:
                d_hist,bins = np.histogram(data_values,self.plotBins)
                data_hist   = np.asarray([i if i else float('NaN') for i in d_hist])
                data_err    = np.sqrt(data_hist)
                data_height = data_hist+data_err  # for scaling the y-axis on the plot

                self.ax1.errorbar(bins_mp,data_hist,yerr=data_err,capsize=0,fmt='o', 
                                  c=data_color,label=data_label,zorder=100)
            else:
                ## To get underflow/overflow in python, we need to define the histograms
                ## first with numpy, then plot them in matplotlib (after adding the 
                ## underflow/overflow values to first/last bins.
                ## For data, we can use the histogram we already made (because this is
                ## not plotted with ax.hist(), but as error bars instead.
                d_hist,bins = np.histogram(data_values,self.plotBins)

                if self.p_underflow:
                    underflow   = self.getUnderflow(data_values,[])
                    d_hist[0]  += underflow
                if self.p_overflow:
                    overflow    = self.getOverflow(data_values,[])
                    d_hist[-1] += overflow

                data_hist   = np.asarray([i if i else float('NaN') for i in d_hist])
                data_err    = np.sqrt(data_hist)
                data_height = data_hist+data_err  # for scaling the y-axis on the plot

                ## Now make the plot.  Use the numpy histogram output as the weights
                ## so that the histogram function still works
                ## Use the binning as a proxy for 'data' so that we get 1 entry per bin
                ## and the new values for weights scale the hist appropriately
                self.ax1.errorbar(bins_mp,data_hist,yerr=data_err,capsize=0,fmt='o', 
                                  c=data_color,label=data_label,zorder=100)


            ## Now plot the prediction and signal samples
            self.plot_prediction(py_samples,json_data)
            self.plot_signal(py_samples,json_data,signal)
            
            self.get_uncertainties()

            ##                                       ##
            ## Residual plotting (the Data/MC ratio) ##
            ##                                       ##
            self.y2min = 0.5
            self.y2max = 1.5
            self.y1max = max([max(self.totpred), np.nanmax(data_height)])/0.82
            # Using 'np.nanmax' here because the 'data_height' array may contain
            # float('NaN') values.  In that case, the built-in method 'max' doesn't
            # interpret float('NaN') as 0 (or as any number).

            ## Residual Values (data/prediction subplot)
            self.resid_unc['total']['up'] = list(((self.totpred+self.unc['total']['up'])/self.totpred).repeat(2))
            self.resid_unc['total']['dn'] = list(((self.totpred-self.unc['total']['dn'])/self.totpred).repeat(2))

            residual     = deepcopy( data_hist / self.totpred )
            residual_err = deepcopy( data_err  / self.totpred )

            self.ax2.errorbar(bins_mp,residual,yerr=residual_err,xerr=half_bin_widths,\
                         capsize=0,fmt='o',c='black',zorder=100)

            ## Simulation Uncertainties
            self.ax2.fill_between(fill_between_bins,\
                                  self.resid_unc['total']['dn'],\
                                  self.resid_unc['total']['up'],\
                                  **self.p_hatch_args)   

            ## labels, legends, and text ##
            self.ax2.axhline(y=1,ls='--',c='k',zorder=1)
            self.config_xaxis(self.ax2)

            ## Set the axis properties of the ratio y-axis
            if any('qcd' in b.lower() for b in self.background):
                y_ratio_label = "Data/Pred."
            else:
                y_ratio_label = "Data/MC"
            self.ax2.set_ylim(ymin=self.y2min,ymax=self.y2max)
            self.ax2.set_yticks(np.asarray([0.6,1.0,1.4]))
            self.ax2.set_yticklabels(self.ax2.get_yticks(),fontProperties,fontsize=self.label_size)
            self.ax2.set_ylabel(y_ratio_label,fontsize=self.label_size,ha='center',va='bottom')


        ## Set the axis properties of the main y-axis
        ax1_unc_bottom = [0.]  # for checking later (in case there is no uncertainty drawn)
        ax1_unc_height = [0.]  # for checking later
        if self.draw_ax1_unc:
            ax1_unc_bottom = list( np.asarray( self.totpred-self.unc['total']['dn'] ).repeat(2) )
            ax1_unc_height = list( np.asarray( self.totpred+self.unc['total']['up'] ).repeat(2) )
            self.ax1.fill_between(fill_between_bins,\
                                  ax1_unc_bottom,\
                                  ax1_unc_height,\
                                  zorder=200,\
                                  **self.p_hatch_args)

#               need to cover up the uncertainty band (if it's the green band) with black line for histogram
#                if p_format=='eps':
#                    dummy_hist, d_bins, d_patches = ax1.hist(entry.var_vals, bins=plot_bins, weights = entry.scale_factors,
#                                                    histtype = 'step', bottom = bottom_edge,
#                                                    color = 'k', zorder = 999)


            self.y1max = max(self.y1max, max(ax1_unc_height)/0.82)

        ## the rest is common to both kinds of plots, so put it at the end
        ## If the bin width is 10, we don't want 10.0. If it's a float,
        ## we only want to keep the first 2 decimal points
        bin_width_   = str(min(bin_widths)).split('.')
        if len(bin_width_)>1:
            if bin_width_[1][0]=='0':
                bin_width_ = bin_width_[0]
            else:
                if len(bin_width_[1])>1:
                    bin_width_ = "{0:.2f}".format(min(bin_widths))
                else:
                    bin_width_ = "{0:.1f}".format(min(bin_widths))
        else:
            bin_width_ = bin_width_[0]

        unit_tag = ''
        if self.plot_keys['variables'][self.var]['gev']:
            unit_tag = ' GeV'

        y_main_label = "Events/"+bin_width_+unit_tag


        self.ax1.yaxis.get_label().set_position((0,1))
        self.ax1.set_ylabel(y_main_label,fontsize=self.label_size,ha='right',va='bottom')

        ax1_legend = self.ax1.legend(numpoints=1, fontsize=self.leg_txtsize, ncol=2, columnspacing=0.)
        ax1_legend.draw_frame(False)

        ## The following is a hack to change the figure height based on the legend
        ## (so the plot doesn't interfere with the legend).  This is difficult
        ## because the legend is drawn at the very end -- when the plot is made:
        ## Using the assumption that size 22 font is approximately 0.08 (use 0.10) units tall
        ## to get the height of the legend, and then determing what gives the larger axis
        handles,labels = self.ax1.get_legend_handles_labels()
        nrows          = len(handles)/ax1_legend._ncol + len(handles)%ax1_legend._ncol # approximately number of rows in legend
        legend_height  = float(self.leg_txtsize)/self.atlas_size * 0.10 * nrows
        legend_ax1ymax = max( max(ax1_unc_height), max(self.totpred), np.nanmax(data_height) )/(1-legend_height)
        text_heights   = [legend_ax1ymax,self.y1max]  # compare this new value 
                                                      # (height based on legend) with 
                                                      # height based on 'ATLAS' label 

        self.config_yaxis(self.ax1,self.y1min,max(text_heights))  # configure the y-axis

        self.plot_text()

        print "      Saving file as: {0}.{1}\n".format(outfile,self.p_format)
        plt.savefig(outfile+'.'+self.p_format,bbox_inches='tight',format=self.p_format,dpi=300)
        plt.close()

        return