def plot_fit_results( histograms, category, channel ):
    global variable, b_tag_bin, output_folder
    from tools.plotting import Histogram_properties, make_data_mc_comparison_plot
    fit_variables = histograms.keys()
    for variable_bin in variable_bins_ROOT[variable]:
        path = output_folder + str( measurement_config.centre_of_mass_energy ) + 'TeV/' + variable + '/' + category + '/fit_results/'
        make_folder_if_not_exists( path )
        for fit_variable in fit_variables:
            plotname = channel + '_' + fit_variable + '_bin_' + variable_bin
            # check if template plots exist already
            for output_format in output_formats:
                if os.path.isfile( plotname + '.' + output_format ):
                    continue
                
            # plot with matplotlib
            h_data = histograms[fit_variable][variable_bin]['data']
            h_signal = histograms[fit_variable][variable_bin]['signal']
            h_background = histograms[fit_variable][variable_bin]['background']
            
            histogram_properties = Histogram_properties()
            histogram_properties.name = plotname
            histogram_properties.x_axis_title = fit_variables_latex[fit_variable]
            histogram_properties.y_axis_title = 'Events/(%s)' % get_unit_string(fit_variable)
            label, _ = get_cms_labels( channel )
            histogram_properties.title = label
            histogram_properties.x_limits = measurement_config.fit_boundaries[fit_variable]
            
            make_data_mc_comparison_plot( [h_data, h_background, h_signal],
                                         ['data', 'background', 'signal'],
                                         ['black', 'green', 'red'], histogram_properties,
                                         save_folder = path, save_as = output_formats )    
def plot_fit_results(histograms, category, channel):
    global variable, b_tag_bin, output_folder
    from tools.plotting import Histogram_properties, make_data_mc_comparison_plot
    
    for variable_bin in variable_bins_ROOT[variable]:
        path = output_folder + str(measurement_config.centre_of_mass) + 'TeV/' + variable + '/' + category + '/fit_results/'
        make_folder_if_not_exists(path)
        plotname = channel + '_bin_' + variable_bin
        # check if template plots exist already
        for output_format in output_formats:
            if os.path.isfile(plotname + '.' + output_format):
                continue
            
        # plot with matplotlib
        h_data = histograms[variable_bin]['data']
        h_signal = histograms[variable_bin]['signal']
        h_background = histograms[variable_bin]['background']
        
        histogram_properties = Histogram_properties()
        histogram_properties.name = plotname
        histogram_properties.x_axis_title = channel + ' $\left|\eta\\right|$'
        histogram_properties.y_axis_title = 'events/0.2'
        histogram_properties.title = get_cms_labels(channel)
        
        make_data_mc_comparison_plot([h_data, h_background, h_signal], 
                                     ['data', 'background', 'signal'], 
                                     ['black', 'green', 'red'], histogram_properties, 
                                     save_folder = path, save_as = output_formats)    
def plot_central_and_systematics( channel, systematics, exclude = [], suffix = 'altogether' ):
    global variable, k_values, b_tag_bin, met_type

    plt.figure( figsize = ( 16, 16 ), dpi = 200, facecolor = 'white' )
    axes = plt.axes()
    axes.minorticks_on()
    
    hist_data_central = read_xsection_measurement_results( 'central', channel )[0]['unfolded_with_systematics']
    hist_data_central.markersize = 2  # points. Imagine, tangible units!
    hist_data_central.marker = 'o'
    
    
    plt.xlabel( '$%s$ [GeV]' % variables_latex[variable], CMS.x_axis_title )
    plt.ylabel( r'$\frac{1}{\sigma}  \frac{d\sigma}{d' + variables_latex[variable] + '} \left[\mathrm{GeV}^{-1}\\right]$', CMS.y_axis_title )
    plt.tick_params( **CMS.axis_label_major )
    plt.tick_params( **CMS.axis_label_minor )

    rplt.errorbar( hist_data_central, axes = axes, label = 'data', xerr = True )

    for systematic in sorted( systematics ):
        if systematic in exclude or systematic == 'central':
            continue

        hist_data_systematic = read_xsection_measurement_results( systematic, channel )[0]['unfolded']
        hist_data_systematic.markersize = 2
        hist_data_systematic.marker = 'o'
        colour_number = systematics.index( systematic ) + 2
        if colour_number == 10:
            colour_number = 42
        hist_data_systematic.SetMarkerColor( colour_number )
        if 'PDF' in systematic:
            rplt.errorbar( hist_data_systematic, axes = axes, label = systematic.replace( 'Weights_', ' ' ), xerr = None )
        elif met_type in systematic:
            rplt.errorbar( hist_data_systematic, axes = axes, label = met_systematics_latex[systematic.replace( met_type, '' )], xerr = None )
        else:
            rplt.errorbar( hist_data_systematic, axes = axes, label = measurements_latex[systematic], xerr = None )
            
    plt.legend( numpoints = 1, loc = 'center right', prop = {'size':25}, ncol = 2 )
    label, channel_label = get_cms_labels( channel )
    plt.title( label, CMS.title )
    # CMS text
    # note: fontweight/weight does not change anything as we use Latex text!!!
    plt.text(0.95, 0.95, r"\textbf{CMS}", transform=axes.transAxes, fontsize=42,
        verticalalignment='top',horizontalalignment='right')
    # channel text
    axes.text(0.95, 0.90, r"\emph{%s}" %channel_label, transform=axes.transAxes, fontsize=40,
        verticalalignment='top',horizontalalignment='right')
    plt.tight_layout()

    
    path = output_folder + str( measurement_config.centre_of_mass_energy ) + 'TeV/' + variable
    make_folder_if_not_exists( path )
    for output_format in output_formats:
        filename = path + '/normalised_xsection_' + channel + '_' + suffix + '_kv' + str( k_values[channel] ) + '.' + output_format
        if channel == 'combined':
            filename = filename.replace( '_kv' + str( k_values[channel] ), '' )
        plt.savefig( filename ) 

    plt.close()
    gc.collect()
def make_template_plots(histograms, category, channel):
    global variable, output_folder
    
    for variable_bin in variable_bins_ROOT[variable]:
        path = output_folder + str(measurement_config.centre_of_mass) + 'TeV/' + variable + '/' + category + '/fit_templates/'
        make_folder_if_not_exists(path)
        plotname = path + channel + '_templates_bin_' + variable_bin 
        
        # check if template plots exist already
        for output_format in output_formats:
            if os.path.isfile(plotname + '.' + output_format):
                continue
        
        # plot with matplotlib
        h_signal = histograms[variable_bin]['signal']
        h_VJets = histograms[variable_bin]['V+Jets']
        h_QCD = histograms[variable_bin]['QCD']
        
        h_signal.linecolor = 'red'
        h_VJets.linecolor = 'green'
        h_QCD.linecolor = 'gray'
        h_VJets.linestyle = 'dashed'
        h_QCD.linestyle = 'dotted'# currently not working
        #bug report: http://trac.sagemath.org/sage_trac/ticket/13834
        
        h_signal.linewidth = 5
        h_VJets.linewidth = 5
        h_QCD.linewidth = 5
    
        plt.figure(figsize=(16, 16), dpi=200, facecolor='white')
        axes = plt.axes()
        axes.minorticks_on()
        
        plt.xlabel(r'lepton $|\eta|$', CMS.x_axis_title)
        plt.ylabel('normalised to unit area/0.2', CMS.y_axis_title)
        plt.tick_params(**CMS.axis_label_major)
        plt.tick_params(**CMS.axis_label_minor)

        rplt.hist(h_signal, axes=axes, label='signal')
        if (h_VJets.Integral() != 0):
            rplt.hist(h_VJets, axes=axes, label='V+Jets')
        else:
            print "WARNING: in %s bin %s, %s category, %s channel, V+Jets template is empty: not plotting." % (variable, variable_bin, category, channel)
        if (h_QCD.Integral() != 0):
            rplt.hist(h_QCD, axes=axes, label='QCD')
        else:
            print "WARNING: in %s bin %s, %s category, %s channel, QCD template is empty: not plotting." % (variable, variable_bin, category, channel)
        axes.set_ylim([0, 0.2])
        
        plt.legend(numpoints=1, loc='upper right', prop=CMS.legend_properties)
        plt.title(get_cms_labels(channel), CMS.title)
        plt.tight_layout()
    
        for output_format in output_formats:
            plt.savefig(plotname + '.' + output_format)
        
        plt.close()
        gc.collect()
Example #5
0
    def submit(self):
        '''
            Submits all registered jobs to the local HTCondor scheduler using
            a job template (DailyPythonScripts/condor/job_template) description
            file and the 'condor_submit' command
        '''
        today = time.strftime("%d-%m-%Y")
        job_folder = 'jobs/{0}/'.format(today)
        make_folder_if_not_exists(job_folder)
        make_folder_if_not_exists(job_folder + 'logs')
        # construct jobs
        self._construct_jobs()
        # convert each job into a pickle file
        # construct a class ad for each job
        with open('condor/job_template', 'r') as template:
            job_template = template.read()
        condor_jobs = []

        # prepare DPS for submission
        self._dps_tar_directory_on_hdfs = '/TopQuarkGroup/condor_dps/{you}/{now}/'.format(
                                                                                            you = getpass.getuser(), 
                                                                                            now = time.strftime('%d_%m_%Y_%H_%M') 
                                                                                            )

        for i, job in enumerate(self.prepared_jobs):
            job_file = job_folder + 'job_{0}.pkl'.format(i)
            job_desc_file = job_folder + 'job_{0}.dsc'.format(i)
            job_description = job_template.replace('%pkl_file%', job_file)
            job_description = job_description.replace('%dir_of_dps_on_hdfs%',
                                                      self._dps_tar_directory_on_hdfs)
            job_description = job_description.replace('%total_memory%',
                                                      str(self.request_memory))
            job_description = job_description.replace('%n_jobs_to_run%',
                                                      str(self.n_jobs_to_run))
            job_description = job_description.replace('%n_jobs_to_split%',
                                                      str(self.n_jobs_to_split))
            input_files = []
            if hasattr(job, 'additional_input_files'):
                input_files.extend(job.additional_input_files)
            input_files_str = ','.join(input_files)
            job_description = job_description.replace('%input_files%',
                                                      input_files_str)
            job_description = job_description.replace('%today%', today)

            with open(job_file, 'w+') as jf:
                pickle.dump(job, jf)
            with open(job_desc_file, 'w+') as jdf:
                jdf.write(job_description)

            condor_jobs.append(job_desc_file)

        prepare_process = subprocess.Popen(['./condor/prepare_dps.sh',self._dps_tar_directory_on_hdfs])
        prepare_process.communicate()
        
        # # submit jobs
        for j in condor_jobs:
            p = subprocess.Popen(['condor_submit', j])
            p.communicate()  # wait until command completed
def unfold_results(results, category, channel, h_truth, h_measured, h_response,
                   method):
    global variable, path_to_JSON
    h_data = value_error_tuplelist_to_hist(results, bin_edges[variable])
    unfolding = Unfolding(h_truth, h_measured, h_response, method=method)

    #turning off the unfolding errors for systematic samples
    if category != 'central':
        unfoldCfg.Hreco = 0

    h_unfolded_data = unfolding.unfold(h_data)

    #export the D and SV distributions
    SVD_path = path_to_JSON + '/' + variable + '/unfolding_objects/' + channel + '/kv_' + str(
        unfoldCfg.SVD_k_value) + '/'
    make_folder_if_not_exists(SVD_path)
    if method == 'TSVDUnfold':
        SVDdist = TFile(
            SVD_path + method + '_SVDdistributions_' + category + '.root',
            'recreate')
        directory = SVDdist.mkdir('SVDdist')
        directory.cd()
        unfolding.unfoldObject.GetD().Write()
        unfolding.unfoldObject.GetSV().Write()
        #    unfolding.unfoldObject.GetUnfoldCovMatrix(data_covariance_matrix(h_data), unfoldCfg.SVD_n_toy).Write()
        SVDdist.Close()
    else:
        SVDdist = TFile(
            SVD_path + method + '_SVDdistributions_Hreco' +
            str(unfoldCfg.Hreco) + '_' + category + '.root', 'recreate')
        directory = SVDdist.mkdir('SVDdist')
        directory.cd()
        unfolding.unfoldObject.Impl().GetD().Write()
        unfolding.unfoldObject.Impl().GetSV().Write()
        h_truth.Write()
        h_measured.Write()
        h_response.Write()
        #    unfolding.unfoldObject.Impl().GetUnfoldCovMatrix(data_covariance_matrix(h_data), unfoldCfg.SVD_n_toy).Write()
        SVDdist.Close()

    #export the whole unfolding object if it doesn't exist
    if method == 'TSVDUnfold':
        unfolding_object_file_name = SVD_path + method + '_unfoldingObject_' + category + '.root'
    else:
        unfolding_object_file_name = SVD_path + method + '_unfoldingObject_Hreco' + str(
            unfoldCfg.Hreco) + '_' + category + '.root'
    if not os.path.isfile(unfolding_object_file_name):
        unfoldingObjectFile = TFile(unfolding_object_file_name, 'recreate')
        directory = unfoldingObjectFile.mkdir('unfoldingObject')
        directory.cd()
        if method == 'TSVDUnfold':
            unfolding.unfoldObject.Write()
        else:
            unfolding.unfoldObject.Impl().Write()
        unfoldingObjectFile.Close()

    del unfolding
    return hist_to_value_error_tuplelist(h_unfolded_data)
def make_plots_matplotlib(histograms, category, output_folder, histname):
    global variable, variables_latex_matplotlib, measurements_latex_matplotlib, k_value
    
    channel = 'electron'
    if 'electron' in histname:
        channel = 'electron'
    elif 'muon' in histname:
        channel = 'muon'
    else:
        channel = 'combined'
        
    # plot with matplotlib
    hist_data = histograms['unfolded']
    hist_measured = histograms['measured']
    
    hist_data.markersize = 2
    hist_measured.markersize = 2
    hist_data.marker = 'o'
    hist_measured.marker = 'o'
    hist_measured.color = 'red'

    plt.figure(figsize=(14, 10), dpi=200, facecolor='white')
    axes = plt.axes()
    axes.minorticks_on()
    
    plt.xlabel('$%s$ [GeV]' % variables_latex_matplotlib[variable], CMS.x_axis_title)
    plt.ylabel(r'$\frac{1}{\sigma} \times \frac{d\sigma}{d' + variables_latex_matplotlib[variable] + '} \left[\mathrm{GeV}^{-1}\\right]$', CMS.y_axis_title)
    plt.tick_params(**CMS.axis_label_major)
    plt.tick_params(**CMS.axis_label_minor)

    rplt.errorbar(hist_data, axes=axes, label='unfolded', xerr=False)
    rplt.errorbar(hist_measured, axes=axes, label='measured', xerr=False)
    
    for key, hist in histograms.iteritems():
        if not 'unfolded' in key and not 'measured' in key:
            hist.linestyle = 'dashed'
            hist.linewidth = 2
#            hist.SetLineStyle(7)
#            hist.SetLineWidth(2)
            #setting colours
            if 'POWHEG' in key or 'matchingdown' in key:
                hist.SetLineColor(kBlue)
            elif 'MADGRAPH' in key or 'matchingup' in key:
                hist.SetLineColor(kRed + 1)
            elif 'MCATNLO'  in key or 'scaleup' in key:
                hist.SetLineColor(kMagenta + 3)
            elif 'scaledown' in key:
                hist.SetLineColor(kGreen)
            rplt.hist(hist, axes=axes, label=measurements_latex_matplotlib[key])
    
    plt.legend(numpoints=1, loc='upper right', prop=CMS.legend_properties)
    plt.title(get_cms_labels_matplotlib(channel), CMS.title)
    plt.tight_layout()

    path = output_folder + str(measurement_config.centre_of_mass) + 'TeV/' + variable + '/' + category
    make_folder_if_not_exists(path)
    for output_format in output_formats:
        plt.savefig(path + '/' + histname + '_kv' + str(k_value) + '.' + output_format)
def main():
    '''
        Main function for this script
    '''
    set_root_defaults(msg_ignore_level=3001)

    parser = OptionParser()
    parser.add_option("-o", "--output",
                      dest="output_folder", default='data/pull_data/',
                      help="output folder for pull data files")
    parser.add_option("-n", "--n_input_mc", type=int,
                      dest="n_input_mc", default=100,
                      help="number of toy MC used for the tests")
    parser.add_option("-k", "--k_value", type=int,
                      dest="k_value", default=3,
                      help="k-value for SVD unfolding")
    parser.add_option("--tau", type='float',
                      dest="tau_value", default=-1.,
                      help="tau-value for SVD unfolding")
    parser.add_option("-m", "--method", type='string',
                      dest="method", default='RooUnfoldSvd',
                      help="unfolding method")
    parser.add_option("-f", "--file", type='string',
                      dest="file", default='data/toy_mc/unfolding_toy_mc.root',
                      help="file with toy MC")
    parser.add_option("-v", "--variable", dest="variable", default='MET',
                      help="set the variable to analyse (MET, HT, ST, MT, WPT)")
    parser.add_option("-s", "--centre-of-mass-energy", dest="CoM", default=13,
                      help='''set the centre of mass energy for analysis.
                      Default = 8 [TeV]''', type=int)
    parser.add_option("-c", "--channel", type='string',
                      dest="channel", default='combined',
                      help="channel to be analysed: electron|muon|combined")

    parser.add_option("--offset_toy_mc", type=int,
                      dest="offset_toy_mc", default=0,
                      help="offset of the toy MC used to response matrix")
    parser.add_option("--offset_toy_data", type=int,
                      dest="offset_toy_data", default=0,
                      help="offset of the toy MC used as data for unfolding")
    (options, _) = parser.parse_args()

    centre_of_mass = options.CoM
    make_folder_if_not_exists(options.output_folder)

    # set the number of toy MC for error calculation
    k_value = options.k_value
    tau_value = options.tau_value
    use_n_toy = options.n_input_mc
    offset_toy_mc = options.offset_toy_mc
    offset_toy_data = options.offset_toy_data
    method = options.method
    variable = options.variable

    create_unfolding_pull_data(options.file, method, options.channel,
                               centre_of_mass, variable, use_n_toy, use_n_toy,
                               options.output_folder, offset_toy_mc,
                               offset_toy_data, k_value, tau_value)
def create_unfolding_pull_data(input_file_name,
                               method,
                               channel,
                               centre_of_mass,
                               variable,
                               n_toy_mc,
                               n_toy_data,
                               output_folder,
                               offset_toy_mc,
                               offset_toy_data,
                               k_value,
                               tau_value=-1,
                               run_matrix=None):
    '''
        Sets up all variables for check_multiple_data_multiple_unfolding
    '''
    timer = Timer()
    input_file = File(input_file_name, 'read')
    folder_template = '{path}/{centre_of_mass}TeV/{variable}/'
    folder_template += '{n_toy_mc}_input_toy_mc/{n_toy_data}_input_toy_data/'
    folder_template += '{vtype}_value_{value}/'

    msg_template = 'Producing unfolding pull data for {variable},'
    msg_template += ' {vtype}-value {value}'
    inputs = {
        'path': output_folder,
        'centre_of_mass': centre_of_mass,
        'variable': variable,
        'n_toy_mc': n_toy_mc,
        'n_toy_data': n_toy_data,
        'vtype': 'k',
        'value': k_value,
    }
    if tau_value >= 0:
        inputs['vtype'] = 'tau'
        inputs['value'] = round(tau_value, 1)

    output_folder = folder_template.format(**inputs)
    make_folder_if_not_exists(output_folder)
    print(msg_template.format(**inputs))
    print('Output folder: {0}'.format(output_folder))

    check_multiple_data_multiple_unfolding(
        input_file,
        method,
        channel,
        variable,
        n_toy_mc,
        n_toy_data,
        output_folder,
        offset_toy_mc,
        offset_toy_data,
        k_value,
        tau_value,
        run_matrix,
    )
    print('Runtime', timer.elapsed_time())
def compare_vjets_templates( variable = 'MET', met_type = 'patType1CorrectedPFMet',
                             title = 'Untitled', channel = 'electron' ):
    ''' Compares the V+jets templates in different bins
     of the current variable'''
    global fit_variable_properties, b_tag_bin, save_as
    variable_bins = variable_bins_ROOT[variable]
    histogram_template = get_histogram_template( variable )
    
    for fit_variable in electron_fit_variables:
        all_hists = {}
        inclusive_hist = None
        save_path = 'plots/%dTeV/fit_variables/%s/%s/' % ( measurement_config.centre_of_mass_energy, variable, fit_variable )
        make_folder_if_not_exists( save_path + '/vjets/' )
        
        max_bins = len( variable_bins )
        for bin_range in variable_bins[0:max_bins]:
            
            params = {'met_type': met_type, 'bin_range':bin_range, 'fit_variable':fit_variable, 'b_tag_bin':b_tag_bin, 'variable':variable}
            fit_variable_distribution = histogram_template % params
            # format: histograms['data'][qcd_fit_variable_distribution]
            histograms = get_histograms_from_files( [fit_variable_distribution], histogram_files )
            prepare_histograms( histograms, rebin = fit_variable_properties[fit_variable]['rebin'], scale_factor = measurement_config.luminosity_scale )
            all_hists[bin_range] = histograms['V+Jets'][fit_variable_distribution]
    
        # create the inclusive distributions
        inclusive_hist = deepcopy( all_hists[variable_bins[0]] )
        for bin_range in variable_bins[1:max_bins]:
            inclusive_hist += all_hists[bin_range]
        for bin_range in variable_bins[0:max_bins]:
            if not all_hists[bin_range].Integral() == 0:
                all_hists[bin_range].Scale( 1 / all_hists[bin_range].Integral() )
        # normalise all histograms
        inclusive_hist.Scale( 1 / inclusive_hist.Integral() )
        # now compare inclusive to all bins
        histogram_properties = Histogram_properties()
        histogram_properties.x_axis_title = fit_variable_properties[fit_variable]['x-title']
        histogram_properties.y_axis_title = fit_variable_properties[fit_variable]['y-title']
        histogram_properties.y_axis_title = histogram_properties.y_axis_title.replace( 'Events', 'a.u.' )
        histogram_properties.x_limits = [fit_variable_properties[fit_variable]['min'], fit_variable_properties[fit_variable]['max']]
        histogram_properties.title = title
        histogram_properties.additional_text = channel_latex[channel] + ', ' + b_tag_bins_latex[b_tag_bin]
        histogram_properties.name = variable + '_' + fit_variable + '_' + b_tag_bin + '_VJets_template_comparison'
        histogram_properties.y_max_scale = 1.5
        measurements = {bin_range + ' GeV': histogram for bin_range, histogram in all_hists.iteritems()}
        measurements = OrderedDict( sorted( measurements.items() ) )
        fit_var = fit_variable.replace( 'electron_', '' )
        fit_var = fit_var.replace( 'muon_', '' )
        graphs = spread_x( measurements.values(), fit_variable_bin_edges[fit_var] )
        for key, graph in zip( sorted( measurements.keys() ), graphs ):
            measurements[key] = graph
        compare_measurements( models = {'inclusive' : inclusive_hist},
                             measurements = measurements,
                             show_measurement_errors = True,
                             histogram_properties = histogram_properties,
                             save_folder = save_path + '/vjets/',
                             save_as = save_as )
Example #11
0
 def save(self):
     make_folder_if_not_exists(self._path)
     for f in self.__properties.formats:
         file_name = '{path}{name}.{format}'
         file_name = file_name.format(
                         path = self._path,
                         name = self.__properties.name,
                         format = f,
                         )
         plt.savefig(file_name)
Example #12
0
 def save(self):
     make_folder_if_not_exists(self._path)
     for f in self.__properties.formats:
         file_name = '{path}{name}.{format}'
         file_name = file_name.format(
             path=self._path,
             name=self.__properties.name,
             format=f,
         )
         plt.savefig(file_name)
Example #13
0
def print_xsections(xsections, channel, toFile=True):
    global savePath, variable, k_value, met_type, b_tag_bin
    printout = '\n'
    printout += '=' * 60
    printout = '\n'
    printout += 'Results for %s variable, %s channel, k-value %s, met type %s, %s b-tag region\n' % (
        variable, channel, k_value, met_type, b_tag_bin)
    printout += '=' * 60
    printout += '\n'
    rows = {}
    header = 'Measurement'
    scale = 100

    bins = variable_bins_ROOT[variable]
    assert (len(bins) == len(xsections['central']))

    for bin_i, variable_bin in enumerate(bins):
        header += '& $\sigma_{meas}$ %s bin %s~\GeV' % (variable, variable_bin)
        for source in categories:
            value, error = xsections[source][bin_i]
            relativeError = getRelativeError(value, error)
            text = ' $(%.2f \pm %.2f) \cdot 10^{-2}$ ' % (
                value * scale,
                error * scale) + '(%.2f' % (relativeError * 100) + '\%)'
            if rows.has_key(source):
                rows[source].append(text)
            else:
                rows[source] = [translateOptions[source], text]

    header += '\\\\ \n'
    printout += header
    printout += '\hline\n'
    for item in rows['central']:
        printout += item + '&'
    printout = printout.rstrip('&')
    printout += '\\\\ \n'

    for source in sorted(rows.keys()):
        if source == 'central':
            continue
        for item in rows[source]:
            printout += item + '&'
        printout = printout.rstrip('&')
        printout += '\\\\ \n'
    printout += '\hline \n\n'

    make_folder_if_not_exists(savePath + '/' + variable)
    if toFile:
        output_file = open(
            savePath + '/' + variable + '/normalised_xsection_result_' +
            channel + '_' + met_type + '_kv' + str(k_value) + '.tex', 'w')
        output_file.write(printout)
        output_file.close()
    else:
        print printout
Example #14
0
def check_save_folder(save_folder):
    '''
        Checks and fixes (if necessary) the save folder
    '''
    # save_folder should end with an '/'
    if not save_folder.endswith('/'):
        save_folder += '/'
    # save_folder should exist
    make_folder_if_not_exists(save_folder)
    
    return save_folder
def unfold_results( results, category, channel, k_value, h_truth, h_measured, h_response, h_fakes, method ):
    global variable, path_to_JSON, options
    h_data = value_error_tuplelist_to_hist( results, bin_edges[variable] )
    unfolding = Unfolding( h_truth, h_measured, h_response, h_fakes, method = method, k_value = k_value )
    
    # turning off the unfolding errors for systematic samples
    if not category == 'central':
        unfoldCfg.Hreco = 0
    else:
        unfoldCfg.Hreco = options.Hreco
        
    h_unfolded_data = unfolding.unfold( h_data )
    
    if options.write_unfolding_objects:
        # export the D and SV distributions
        SVD_path = path_to_JSON + '/unfolding_objects/' + channel + '/kv_' + str( k_value ) + '/'
        make_folder_if_not_exists( SVD_path )
        if method == 'TSVDUnfold':
            SVDdist = File( SVD_path + method + '_SVDdistributions_' + category + '.root', 'recreate' )
            directory = SVDdist.mkdir( 'SVDdist' )
            directory.cd()
            unfolding.unfoldObject.GetD().Write()
            unfolding.unfoldObject.GetSV().Write()
            #    unfolding.unfoldObject.GetUnfoldCovMatrix(data_covariance_matrix(h_data), unfoldCfg.SVD_n_toy).Write()
            SVDdist.Close()
        else:
            SVDdist = File( SVD_path + method + '_SVDdistributions_Hreco' + str( unfoldCfg.Hreco ) + '_' + category + '.root', 'recreate' )
            directory = SVDdist.mkdir( 'SVDdist' )
            directory.cd()
            unfolding.unfoldObject.Impl().GetD().Write()
            unfolding.unfoldObject.Impl().GetSV().Write()
            h_truth.Write()
            h_measured.Write()
            h_response.Write()
            #    unfolding.unfoldObject.Impl().GetUnfoldCovMatrix(data_covariance_matrix(h_data), unfoldCfg.SVD_n_toy).Write()
            SVDdist.Close()
    
        # export the whole unfolding object if it doesn't exist
        if method == 'TSVDUnfold':
            unfolding_object_file_name = SVD_path + method + '_unfoldingObject_' + category + '.root'
        else:
            unfolding_object_file_name = SVD_path + method + '_unfoldingObject_Hreco' + str( unfoldCfg.Hreco ) + '_' + category + '.root'
        if not os.path.isfile( unfolding_object_file_name ):
            unfoldingObjectFile = File( unfolding_object_file_name, 'recreate' )
            directory = unfoldingObjectFile.mkdir( 'unfoldingObject' )
            directory.cd()
            if method == 'TSVDUnfold':
                unfolding.unfoldObject.Write()
            else:
                unfolding.unfoldObject.Impl().Write()
            unfoldingObjectFile.Close()
    
    del unfolding
    return hist_to_value_error_tuplelist( h_unfolded_data )
def plot_central_and_systematics(channel, systematics, exclude=[], suffix='altogether'):
    global variable, variables_latex, k_value, b_tag_bin, maximum, ttbar_generator_systematics

    canvas = Canvas(width=700, height=500)
    canvas.SetLeftMargin(0.15)
    canvas.SetBottomMargin(0.15)
    canvas.SetTopMargin(0.05)
    canvas.SetRightMargin(0.05)
    legend = plotting.create_legend(x0=0.6, y1=0.5)
    
    hist_data_central = read_xsection_measurement_results('central', channel)[0]['unfolded']
    
    hist_data_central.GetXaxis().SetTitle(variables_latex[variable] + ' [GeV]')
    hist_data_central.GetYaxis().SetTitle('#frac{1}{#sigma} #frac{d#sigma}{d' + variables_latex[variable] + '} [GeV^{-1}]')
    hist_data_central.GetXaxis().SetTitleSize(0.05)
    hist_data_central.GetYaxis().SetTitleSize(0.05)
    hist_data_central.SetMinimum(0)
    hist_data_central.SetMaximum(maximum[variable])
    hist_data_central.SetMarkerSize(1)
    hist_data_central.SetMarkerStyle(20)

    gStyle.SetEndErrorSize(20)
    hist_data_central.Draw('P')
    legend.AddEntry(hist_data_central, 'measured (unfolded)', 'P')
    
    for systematic in systematics:
        if systematic in exclude or systematic == 'central':
            continue

        hist_data_systematic = read_xsection_measurement_results(systematic, channel)[0]['unfolded']
        hist_data_systematic.SetMarkerSize(0.5)
        hist_data_systematic.SetMarkerStyle(20)
        colour_number = systematics.index(systematic)+1
        if colour_number == 10:
            colour_number = 42
        hist_data_systematic.SetMarkerColor(colour_number)
        hist_data_systematic.Draw('same P')
        legend.AddEntry(hist_data_systematic, systematic, 'P')
    
    legend.Draw()
    
    cms_label, channel_label = get_cms_labels(channel)
    cms_label.Draw()
    
    channel_label.Draw()
    
    canvas.Modified()
    canvas.Update()
    
    path = output_folder + str(measurement_config.centre_of_mass) + 'TeV/' + variable
    make_folder_if_not_exists(path)
    
    for output_format in output_formats:
        canvas.SaveAs(path + '/normalised_xsection_' + channel + '_' + suffix + '_kv' + str(k_value) + '.' + output_format)
Example #17
0
def check_save_folder(save_folder):
    '''
        Checks and fixes (if necessary) the save folder
    '''
    # save_folder should end with an '/'
    if not save_folder.endswith('/'):
        save_folder += '/'
    # save_folder should exist
    make_folder_if_not_exists(save_folder)

    return save_folder
def main():
    '''
        Main function for this script
    '''
    set_root_defaults(msg_ignore_level=3001)

    parser = OptionParser()
    parser.add_option("-o", "--output",
                      dest="output_folder", default='data/pull_data/',
                      help="output folder for pull data files")
    parser.add_option("-n", "--n_input_mc", type=int,
                      dest="n_input_mc", default=100,
                      help="number of toy MC used for the tests")
    parser.add_option("--tau", type='float',
                      dest="tau_value", default=-1.,
                      help="tau-value for SVD unfolding")
    parser.add_option("-m", "--method", type='string',
                      dest="method", default='TUnfold',
                      help="unfolding method")
    parser.add_option("-f", "--file", type='string',
                      dest="file", default='data/toy_mc/unfolding_toy_mc.root',
                      help="file with toy MC")
    parser.add_option("-v", "--variable", dest="variable", default='MET',
                      help="set the variable to analyse (defined in config/variable_binning.py)")
    parser.add_option("--com", "--centre-of-mass-energy", dest="CoM", default=13,
                      help='''set the centre of mass energy for analysis.
                      Default = 8 [TeV]''', type=int)
    parser.add_option("-c", "--channel", type='string',
                      dest="channel", default='combined',
                      help="channel to be analysed: electron|muon|combined")
    parser.add_option("-s", type='string',
                      dest="sample", default='madgraph',
                      help="channel to be analysed: electron|muon|combined")

    (options, _) = parser.parse_args()

    centre_of_mass = options.CoM
    measurement_config = XSectionConfig(centre_of_mass)
    make_folder_if_not_exists(options.output_folder)

    use_n_toy = options.n_input_mc
    method = options.method
    variable = options.variable
    sample = options.sample
    tau_value = options.tau_value

    create_unfolding_pull_data(options.file, method, options.channel,
                               centre_of_mass, variable,
                               sample,
                               measurement_config.unfolding_central,
                               use_n_toy,
                               options.output_folder,
                               tau_value)
Example #19
0
def tau_from_scan( unfoldingObject, regularisation_settings ):
    variable = regularisation_settings.variable

    # Plots that get outputted by the scan
    lCurve = TGraph()
    scanResult = TSpline3()
    d = 'signal'
    a = ''

    # Parameters of scan
    # Number of points to scan, and min/max tau
    nScan = 1000
    minTau = 1.E-6
    maxTau = 1.E-0

    if variable == 'abs_lepton_eta':
        minTau = 1.E-8
        maxTau = 1.E-3
    elif variable == 'lepton_pt':
        minTau = 1.E-6
        maxTau = 1.E-2
    elif variable == 'NJets':
        minTau = 1.E-6
        maxTau = 1.E-2

    # Scan is performed here    
    iBest = unfoldingObject.ScanTau(nScan, minTau, maxTau, scanResult, TUnfoldDensity.kEScanTauRhoSquareAvg);

    # Plot the scan result
    # Correlation as function of log tau
    canvas = TCanvas()
    scanResult.SetMarkerColor(600)
    scanResult.SetMarkerSize(1)
    scanResult.SetMarkerStyle(5)
    scanResult.Draw('LP')

    # Add point corresponding to optimum tau
    t = Double(0)
    x = Double(0)
    scanResult.GetKnot(iBest,t,x);
    bestTau = Graph(1)
    bestTau.SetPoint(1,t,x)
    bestTau.markercolor = 'red'
    bestTau.SetMarkerSize(1.25)
    bestTau.Draw('*')

    # Write to file
    output_dir = regularisation_settings.output_folder
    make_folder_if_not_exists(output_dir)
    canvas.SaveAs(output_dir + '/{0}.png'.format(variable) )

    return unfoldingObject.GetTau()
def plot_central_and_systematics(channel, systematics, exclude=[], suffix='altogether'):
    global variable, k_value, b_tag_bin, met_type

    plt.figure(figsize=(16, 16), dpi=200, facecolor='white')
    axes = plt.axes()
    axes.minorticks_on()
    
    hist_data_central = read_xsection_measurement_results('central', channel)[0]['unfolded_with_systematics']
    hist_data_central.markersize = 2  # points. Imagine, tangible units!
    hist_data_central.marker = 'o'
    
    
    plt.xlabel('$%s$ [GeV]' % variables_latex[variable], CMS.x_axis_title)
    plt.ylabel(r'$\frac{1}{\sigma}  \frac{d\sigma}{d' + variables_latex[variable] + '} \left[\mathrm{GeV}^{-1}\\right]$', CMS.y_axis_title)
    plt.tick_params(**CMS.axis_label_major)
    plt.tick_params(**CMS.axis_label_minor)

    rplt.errorbar(hist_data_central, axes=axes, label='data', xerr=True)

    for systematic in sorted(systematics):
        if systematic in exclude or systematic == 'central':
            continue

        hist_data_systematic = read_xsection_measurement_results(systematic, channel)[0]['unfolded']
        hist_data_systematic.markersize = 2
        hist_data_systematic.marker = 'o'
        colour_number = systematics.index(systematic) + 2
        if colour_number == 10:
            colour_number = 42
        hist_data_systematic.SetMarkerColor(colour_number)
        if 'PDF' in systematic:
            rplt.errorbar(hist_data_systematic, axes=axes, label=systematic.replace('Weights_', ' '), xerr=False)
        elif met_type in systematic:
            rplt.errorbar(hist_data_systematic, axes=axes, label=met_systematics_latex[systematic.replace(met_type, '')], xerr=False)
        else:
            rplt.errorbar(hist_data_systematic, axes=axes, label=measurements_latex[systematic], xerr=False)
            
    plt.legend(numpoints=1, loc='upper right', prop={'size':25}, ncol=2)
    plt.title(get_cms_labels(channel), CMS.title)
    plt.tight_layout()

    
    path = output_folder + str(measurement_config.centre_of_mass) + 'TeV/' + variable
    make_folder_if_not_exists(path)
    for output_format in output_formats:
        plt.savefig(path + '/normalised_xsection_' + channel + '_' + suffix + '_kv' + str(k_value) + '.' + output_format) 

    plt.close()
    gc.collect()
def make_template_plots(histograms, category, channel):
    global variable, output_folder
    
    for variable_bin in variable_bins_ROOT[variable]:
        path = output_folder + str(measurement_config.centre_of_mass) + 'TeV/' + variable + '/' + category + '/fit_templates/'
        make_folder_if_not_exists(path)
        plotname = path + channel + '_templates_bin_' + variable_bin
        #check if template plots exist already
        for output_format in output_formats:
            if os.path.isfile(plotname + '.' + output_format):
                continue
        canvas = Canvas(width=700, height=500)
        canvas.SetLeftMargin(0.15)
        canvas.SetBottomMargin(0.15)
        canvas.SetTopMargin(0.05)
        canvas.SetRightMargin(0.05)
        legend = plotting.create_legend(x0=0.7, y1=0.8)
        h_signal = histograms[variable_bin]['signal']
        h_VJets = histograms[variable_bin]['V+Jets']
        h_QCD = histograms[variable_bin]['QCD']
        
        h_signal.GetXaxis().SetTitle('Lepton #eta')
        h_signal.GetYaxis().SetTitle('Normalised Events')
        h_signal.GetXaxis().SetTitleSize(0.05)
        h_signal.GetYaxis().SetTitleSize(0.05)
        h_signal.SetMinimum(0)
        h_signal.SetMaximum(0.2)
        h_signal.SetLineWidth(2)
        h_VJets.SetLineWidth(2)
        h_QCD.SetLineWidth(2)
        h_signal.SetLineColor(kRed + 1)
        h_VJets.SetLineColor(kBlue)
        h_QCD.SetLineColor(kYellow)
        h_signal.Draw('hist')
        h_VJets.Draw('hist same')
        h_QCD.Draw('hist same')
        legend.AddEntry(h_signal, 'signal', 'l')
        legend.AddEntry(h_VJets, 'V+Jets', 'l')
        legend.AddEntry(h_QCD, 'QCD', 'l')
        legend.Draw()
        
        cms_label, channel_label = get_cms_labels(channel)
        cms_label.Draw()
        channel_label.Draw()
        
        canvas.Modified()
        canvas.Update()
        for output_format in output_formats:
            canvas.SaveAs(plotname + '.' + output_format)
def make_template_plots_matplotlib(histograms, category, channel):
    global variable, output_folder
    from matplotlib import rc
    rc('text', usetex=True)
    
    for variable_bin in variable_bins_ROOT[variable]:
        path = output_folder + str(measurement_config.centre_of_mass) + 'TeV/' + variable + '/' + category + '/fit_templates/'
        make_folder_if_not_exists(path)
        plotname = path + channel + '_templates_bin_' + variable_bin 
        
        # check if template plots exist already
        for output_format in output_formats:
            if os.path.isfile(plotname + '.' + output_format):
                continue
        
        # plot with matplotlib
        h_signal = histograms[variable_bin]['signal']
        h_VJets = histograms[variable_bin]['V+Jets']
        h_QCD = histograms[variable_bin]['QCD']
        
        h_signal.linecolor = 'red'
        h_VJets.linecolor = 'green'
        h_QCD.linecolor = 'yellow'
        
        h_signal.linewidth = 5
        h_VJets.linewidth = 5
        h_QCD.linewidth = 5
    
        plt.figure(figsize=(14, 10), dpi=200, facecolor='white')
        axes = plt.axes()
        axes.minorticks_on()
        
        plt.xlabel(r'lepton $|\eta|$', CMS.x_axis_title)
        plt.ylabel('normalised to unit area/0.2', CMS.y_axis_title)
        plt.tick_params(**CMS.axis_label_major)
        plt.tick_params(**CMS.axis_label_minor)

        rplt.hist(h_signal, axes=axes, label='signal')
        rplt.hist(h_VJets, axes=axes, label='V+Jets')
        rplt.hist(h_QCD, axes=axes, label='QCD')
        axes.set_ylim([0,0.2])
        
        plt.legend(numpoints=1, loc='upper right', prop=CMS.legend_properties)
        plt.title(get_cms_labels_matplotlib(channel), CMS.title)
        plt.tight_layout()
    
        for output_format in output_formats:
            plt.savefig(plotname + '.' + output_format) 
Example #23
0
    def submit(self):
        '''
            Submits all registered jobs to the local HTCondor scheduler using
            a job template (DailyPythonScripts/condor/job_template) description
            file and the 'condor_submit' command
        '''
        today = time.strftime("%d-%m-%Y")
        job_folder = 'jobs/{0}/'.format(today)
        make_folder_if_not_exists(job_folder)
        make_folder_if_not_exists(job_folder + 'logs')
        # construct jobs
        self._construct_jobs()
        # convert each job into a pickle file
        # construct a class ad for each job
        with open('condor/job_template', 'r') as template:
            job_template = template.read()
        condor_jobs = []
        for i, job in enumerate(self.prepared_jobs):
            job_file = job_folder + 'job_{0}.pkl'.format(i)
            job_desc_file = job_folder + 'job_{0}.dsc'.format(i)
            job_description = job_template.replace('%pkl_file%', job_file)
            job_description = job_description.replace('%total_memory%',
                                                      str(self.request_memory))
            job_description = job_description.replace('%n_jobs_to_run%',
                                                      str(self.n_jobs_to_run))
            job_description = job_description.replace(
                '%n_jobs_to_split%', str(self.n_jobs_to_split))
            input_files = ['dps.tar']
            if hasattr(job, 'additional_input_files'):
                input_files.extend(job.additional_input_files)
            input_files_str = ','.join(input_files)
            job_description = job_description.replace('%input_files%',
                                                      input_files_str)
            job_description = job_description.replace('%today%', today)

            with open(job_file, 'w+') as jf:
                pickle.dump(job, jf)
            with open(job_desc_file, 'w+') as jdf:
                jdf.write(job_description)

            condor_jobs.append(job_desc_file)
        # prepare DPS for submission
        subprocess.Popen(['./condor/prepare_dps.sh'])
        # submit jobs
        for j in condor_jobs:
            p = subprocess.Popen(['condor_submit', j])
            p.communicate()  # wait until command completed
def create_toy_mc(input_file, sample, output_folder, n_toy, centre_of_mass, ttbar_xsection):
    from tools.file_utilities import make_folder_if_not_exists
    from tools.toy_mc import generate_toy_MC_from_distribution, generate_toy_MC_from_2Ddistribution
    from tools.Unfolding import get_unfold_histogram_tuple
    make_folder_if_not_exists(output_folder)
    input_file_hists = File(input_file)
    output_file_name = get_output_file_name(output_folder, sample, n_toy, centre_of_mass)
    variable_bins = bin_edges_vis.copy()
    with root_open(output_file_name, 'recreate') as f_out:
        for channel in ['combined']:
            for variable in variable_bins:
                output_dir = f_out.mkdir(channel + '/' + variable, recurse=True)
                cd = output_dir.cd
                mkdir = output_dir.mkdir
                h_truth, h_measured, h_response, _ = get_unfold_histogram_tuple(input_file_hists,
                                                                        variable,
                                                                        channel,
                                                                        centre_of_mass = centre_of_mass,
                                                                        ttbar_xsection = ttbar_xsection,
                                                                        visiblePS = True,
                                                                        load_fakes=False)

                cd()

                mkdir('Original')
                cd ('Original')
                h_truth.Write('truth')
                h_measured.Write('measured')
                h_response.Write('response')

                for i in range(1, n_toy+1):
                    toy_id = 'toy_{0}'.format(i)
                    mkdir(toy_id)
                    cd(toy_id)
                    # create histograms
                    # add tuples (truth, measured, response) of histograms
                    truth = generate_toy_MC_from_distribution(h_truth)
                    measured = generate_toy_MC_from_distribution(h_measured)
                    response = generate_toy_MC_from_2Ddistribution(h_response)

                    truth.SetName('truth')
                    measured.SetName('measured')
                    response.SetName('response')

                    truth.Write()
                    measured.Write()
                    response.Write()
def print_xsections(xsections, channel, toFile = True, print_before_unfolding = False):
    global output_folder, variable, k_value, met_type, b_tag_bin
    printout = '=' * 60
    printout += '\n'
    printout += 'Results for %s variable, %s channel, k-value %s, met type %s, %s b-tag region\n' % (variable, channel, k_value, met_type, b_tag_bin)
    if print_before_unfolding:
        printout += 'BEFORE UNFOLDING\n'
    printout += '=' * 60
    printout += '\n'
    printout += '$%s$ bin & $\sigma_{meas}$' % variables_latex[variable]
    printout += '\\\\ \n\hline\n'
    scale = 100
    
    bins = variable_bins_ROOT[variable]
    assert(len(bins) == len(xsections['unfolded_with_systematics']))
    
    for bin_i, variable_bin in enumerate(bins):
        if print_before_unfolding:
            value, error_up, error_down = xsections['measured_with_systematics'][bin_i]
        else:
            value, error_up, error_down = xsections['unfolded_with_systematics'][bin_i]
        relativeError_up = getRelativeError(value, error_up)
        relativeError_down = getRelativeError(value, error_down)
        if error_up == error_down:
            printout += '%s & ' % variable_bins_latex[variable_bin] + ' $(%.2f \pm %.2f ) \cdot 10^{-2} ' % (value * scale, error_up * scale) +\
                    '(%.2f' % (relativeError_up * 100) + '\%)$'
        else:
            printout += '%s & ' % variable_bins_latex[variable_bin] + ' $(%.2f^{+%.2f}_{-%.2f)} \cdot 10^{-2} ' % (value * scale, error_up * scale, error_down * scale) +\
                    '(^{+%.2f}_{-%.2f}' % (relativeError_up * 100, relativeError_down * 100) + '\%)$'
        printout += '\\\\ \n'

    printout += '\hline \n\n'
    
    if toFile:
        path = output_folder + '/'  + str(measurement_config.centre_of_mass) + 'TeV/'  + variable
        make_folder_if_not_exists(path)
        if print_before_unfolding:
            output_file = open(path + '/normalised_xsection_result_' + channel + '_' + met_type + '_kv' + str(k_value) + '_measured.tex', 'w')
        else:
            output_file = open(path + '/normalised_xsection_result_' + channel + '_' + met_type + '_kv' + str(k_value) + '_unfolded.tex', 'w')
        output_file.write(printout)
        output_file.close()
    else:
        print printout
def create_unfolding_pull_data(input_file_name, method, channel,
                               centre_of_mass, variable,
                               sample, 
                               responseFile,
                               n_toy_data,
                               output_folder, 
                               tau_value,
                                run_matrix=None):
    '''
        Sets up all variables for check_multiple_data_multiple_unfolding
    '''
    set_root_defaults(msg_ignore_level=3001)
    timer = Timer()
    input_file = File(input_file_name, 'read')
    folder_template = '{path}/{centre_of_mass}TeV/{variable}/{sample}/'

    msg_template = 'Producing unfolding pull data for {variable},'
    msg_template += ' tau-value {value}'
    inputs = {
        'path': output_folder,
        'centre_of_mass': centre_of_mass,
        'variable': variable,
        'sample': sample,
        'value': round(tau_value,4),
    }

    h_response = get_response_histogram(responseFile, variable, channel)
    output_folder = folder_template.format(**inputs)
    make_folder_if_not_exists(output_folder)
    print(msg_template.format(**inputs))
    print('Output folder: {0}'.format(output_folder))
    print ('Response here :',h_response)
    output_file_name = check_multiple_data_multiple_unfolding(
                                input_file, method, channel, variable, 
                                h_response,
                                n_toy_data,
                                output_folder, 
                                tau_value,
                            )
    print('Runtime', timer.elapsed_time())

    return output_file_name
def main():
    global measurement_config, histogram_files
    global electron_fit_variables, muon_fit_variables, fit_variable_properties
    global b_tag_bin, category, histogram_files, variables
    global b_tag_bin_ctl
    
    title_template = 'CMS Preliminary, $\mathcal{L} = %.1f$ fb$^{-1}$  at $\sqrt{s}$ = %d TeV \n %s'
    e_title = title_template % ( measurement_config.new_luminosity / 1000., measurement_config.centre_of_mass_energy, 'e+jets, $\geq$ 4 jets' )
    met_type = 'patType1CorrectedPFMet'
    for variable in variables:
        variable_bins = variable_bins_ROOT[variable]
        histogram_template = get_histogram_template( variable )
        
        for fit_variable in electron_fit_variables:
            if '_bl' in fit_variable:
                b_tag_bin_ctl = '1orMoreBtag'
            else:
                b_tag_bin_ctl = '0orMoreBtag'
            save_path = 'plots/%dTeV/fit_variables/%s/%s/' % (measurement_config.centre_of_mass_energy, variable, fit_variable)
            make_folder_if_not_exists(save_path)
            make_folder_if_not_exists(save_path + 'qcd/')
            make_folder_if_not_exists(save_path + 'vjets/')
            for bin_range in variable_bins:
                params = {'met_type': met_type, 'bin_range':bin_range, 'fit_variable':fit_variable, 'b_tag_bin':b_tag_bin, 'variable':variable}
                fit_variable_distribution = histogram_template % params
                qcd_fit_variable_distribution = fit_variable_distribution.replace( 'Ref selection', 'QCDConversions' )
                qcd_fit_variable_distribution = qcd_fit_variable_distribution.replace( b_tag_bin, b_tag_bin_ctl )
                histograms = get_histograms_from_files( [fit_variable_distribution, qcd_fit_variable_distribution], histogram_files )
                plot_fit_variable( histograms, fit_variable, variable, bin_range, fit_variable_distribution, qcd_fit_variable_distribution, e_title, save_path )
        compare_qcd_control_regions(variable, met_type, e_title)
        compare_vjets_btag_regions(variable, met_type, e_title)
def create_unfolding_pull_data(input_file_name, method, channel,
                               centre_of_mass, variable, n_toy_mc, n_toy_data,
                               output_folder, offset_toy_mc, offset_toy_data,
                               k_value, tau_value=-1, run_matrix=None):
    '''
        Sets up all variables for check_multiple_data_multiple_unfolding
    '''
    timer = Timer()
    input_file = File(input_file_name, 'read')
    folder_template = '{path}/{centre_of_mass}TeV/{variable}/'
    folder_template += '{n_toy_mc}_input_toy_mc/{n_toy_data}_input_toy_data/'
    folder_template += '{vtype}_value_{value}/'

    msg_template = 'Producing unfolding pull data for {variable},'
    msg_template += ' {vtype}-value {value}'
    inputs = {
        'path': output_folder,
        'centre_of_mass': centre_of_mass,
        'variable': variable,
        'n_toy_mc': n_toy_mc,
        'n_toy_data': n_toy_data,
        'vtype': 'k',
        'value': k_value,
    }
    if tau_value >= 0:
        inputs['vtype'] = 'tau'
        inputs['value'] = round(tau_value, 1)

    output_folder = folder_template.format(**inputs)
    make_folder_if_not_exists(output_folder)
    print(msg_template.format(**inputs))
    print('Output folder: {0}'.format(output_folder))

    check_multiple_data_multiple_unfolding(
        input_file, method, channel, variable, n_toy_mc, n_toy_data,
        output_folder, offset_toy_mc, offset_toy_data, k_value, tau_value,
        run_matrix,
    )
    print('Runtime', timer.elapsed_time())
def compare_vjets_btag_regions( variable = 'MET', met_type = 'patType1CorrectedPFMet',
                                title = 'Untitled', channel = 'electron' ):
    ''' Compares the V+Jets template in different b-tag bins'''
    global fit_variable_properties, b_tag_bin, save_as, b_tag_bin_ctl
    b_tag_bin_ctl = '0orMoreBtag'
    variable_bins = variable_bins_ROOT[variable]
    histogram_template = get_histogram_template( variable )
    
    for fit_variable in electron_fit_variables:
        if '_bl' in fit_variable:
                b_tag_bin_ctl = '1orMoreBtag'
        else:
            b_tag_bin_ctl = '0orMoreBtag'
        save_path = 'plots/%dTeV/fit_variables/%s/%s/' % ( measurement_config.centre_of_mass_energy, variable, fit_variable )
        make_folder_if_not_exists( save_path + '/vjets/' )
        histogram_properties = Histogram_properties()
        histogram_properties.x_axis_title = fit_variable_properties[fit_variable]['x-title']
        histogram_properties.y_axis_title = fit_variable_properties[fit_variable]['y-title']
        histogram_properties.y_axis_title = histogram_properties.y_axis_title.replace( 'Events', 'a.u.' )
        histogram_properties.x_limits = [fit_variable_properties[fit_variable]['min'], fit_variable_properties[fit_variable]['max']]
        histogram_properties.title = title
        histogram_properties.additional_text = channel_latex[channel] + ', ' + b_tag_bins_latex[b_tag_bin_ctl]
        histogram_properties.y_max_scale = 1.5
        for bin_range in variable_bins:
            params = {'met_type': met_type, 'bin_range':bin_range, 'fit_variable':fit_variable, 'b_tag_bin':b_tag_bin, 'variable':variable}
            fit_variable_distribution = histogram_template % params
            fit_variable_distribution_ctl = fit_variable_distribution.replace( b_tag_bin, b_tag_bin_ctl )
            # format: histograms['data'][qcd_fit_variable_distribution]
            histograms = get_histograms_from_files( [fit_variable_distribution, fit_variable_distribution_ctl], {'V+Jets' : histogram_files['V+Jets']} )
            prepare_histograms( histograms, rebin = fit_variable_properties[fit_variable]['rebin'], scale_factor = measurement_config.luminosity_scale )
            histogram_properties.name = variable + '_' + bin_range + '_' + fit_variable + '_' + b_tag_bin_ctl + '_VJets_template_comparison'
            histograms['V+Jets'][fit_variable_distribution].Scale( 1 / histograms['V+Jets'][fit_variable_distribution].Integral() )
            histograms['V+Jets'][fit_variable_distribution_ctl].Scale( 1 / histograms['V+Jets'][fit_variable_distribution_ctl].Integral() )
            compare_measurements( models = {'no b-tag' : histograms['V+Jets'][fit_variable_distribution_ctl]},
                             measurements = {'$>=$ 2 b-tags': histograms['V+Jets'][fit_variable_distribution]},
                             show_measurement_errors = True,
                             histogram_properties = histogram_properties,
                             save_folder = save_path + '/vjets/',
                             save_as = save_as )
def plot_fit_results( histograms, category, channel ):
    global variable, b_tag_bin, output_folder, phase_space
    from tools.plotting import Histogram_properties, make_data_mc_comparison_plot
    fit_variables = histograms.keys()

    variableBins = None
    if phase_space == 'VisiblePS':
        variableBins = variable_bins_visiblePS_ROOT
    elif phase_space == 'FullPS':
        variableBins = variable_bins_ROOT

    for variable_bin in variableBins[variable]:
        path = output_folder + str( measurement_config.centre_of_mass_energy ) + 'TeV/' + variable + '/' + category + '/fit_results/'
        make_folder_if_not_exists( path )
        for fit_variable in fit_variables:
            plotname = channel + '_' + fit_variable + '_bin_' + variable_bin
            # check if template plots exist already
            for output_format in output_formats:
                if os.path.isfile( plotname + '.' + output_format ):
                    continue

            # plot with matplotlib
            h_data = histograms[fit_variable][variable_bin]['data']
            h_signal = histograms[fit_variable][variable_bin]['signal']
            h_background = histograms[fit_variable][variable_bin]['background']

            histogram_properties = Histogram_properties()
            histogram_properties.name = plotname
            histogram_properties.x_axis_title = fit_variables_latex[fit_variable]
            histogram_properties.y_axis_title = 'Events/(%s)' % get_unit_string(fit_variable)
            label, _ = get_cms_labels( channel )
            histogram_properties.title = label
            histogram_properties.x_limits = measurement_config.fit_boundaries[fit_variable]

            make_data_mc_comparison_plot( [h_data, h_background, h_signal],
                                         ['data', 'background', 'signal'],
                                         ['black', 'green', 'red'], histogram_properties,
                                         save_folder = path, save_as = output_formats )
def main():
    global measurement_config, histogram_files
    global electron_fit_variables, muon_fit_variables, fit_variable_properties
    global b_tag_bin, category, histogram_files, variables
    global b_tag_bin_ctl
    
    title_template = '$%.1f$ fb$^{-1}$(%d TeV)'
    e_title = title_template % ( measurement_config.new_luminosity / 1000., measurement_config.centre_of_mass_energy )
    met_type = 'patType1CorrectedPFMet'
    for variable in variables:
        variable_bins = variable_bins_ROOT[variable]
        histogram_template = get_histogram_template( variable )
        
        for fit_variable in electron_fit_variables:
            if '_bl' in fit_variable:
                b_tag_bin_ctl = '1orMoreBtag'
            else:
                b_tag_bin_ctl = '0orMoreBtag'
            save_path = 'plots/%dTeV/fit_variables/%s/%s/' % ( measurement_config.centre_of_mass_energy, variable, fit_variable )
            make_folder_if_not_exists( save_path )
            make_folder_if_not_exists( save_path + 'qcd/' )
            make_folder_if_not_exists( save_path + 'vjets/' )
            inclusive_histograms = {}
            inclusive_fit_distribution = ''
            inclusive_qcd_distribution = ''
            for bin_range in variable_bins:
                params = {'met_type': met_type, 'bin_range':bin_range, 'fit_variable':fit_variable, 'b_tag_bin':b_tag_bin, 'variable':variable}
                fit_variable_distribution = histogram_template % params
                qcd_fit_variable_distribution = fit_variable_distribution.replace( 'Ref selection', 'QCDConversions' )
                qcd_fit_variable_distribution = qcd_fit_variable_distribution.replace( b_tag_bin, b_tag_bin_ctl )
                histograms = get_histograms_from_files( [fit_variable_distribution, qcd_fit_variable_distribution], histogram_files )
                plot_fit_variable( histograms, fit_variable, variable, bin_range, fit_variable_distribution, qcd_fit_variable_distribution, e_title, save_path )
                # sum histograms for inclusive plots
                for sample, hist in histograms.iteritems():
                    inclusive_fit_distribution = fit_variable_distribution.replace( bin_range, "inclusive" )
                    inclusive_qcd_distribution = qcd_fit_variable_distribution.replace( bin_range, "inclusive" )
                    if not inclusive_histograms.has_key( sample ):
                        inclusive_histograms[sample] = {}
                        inclusive_histograms[sample][inclusive_fit_distribution] = hist[fit_variable_distribution].clone()
                        inclusive_histograms[sample][inclusive_qcd_distribution] = hist[qcd_fit_variable_distribution].clone() 
                    else:
                        inclusive_histograms[sample][inclusive_fit_distribution] += hist[fit_variable_distribution]   
                        inclusive_histograms[sample][inclusive_qcd_distribution] += hist[qcd_fit_variable_distribution]
                        
            plot_fit_variable( inclusive_histograms, fit_variable, variable,
                               'inclusive', inclusive_fit_distribution,
                               inclusive_qcd_distribution, e_title, save_path )
            
        compare_qcd_control_regions( variable, met_type, e_title )
        compare_vjets_btag_regions( variable, met_type, e_title )
        compare_vjets_templates( variable, met_type, e_title )
def main():
    global measurement_config, histogram_files
    global electron_fit_variables, muon_fit_variables, fit_variable_properties
    global b_tag_bin, category, histogram_files, variables
    global b_tag_bin_ctl

    title_template = '$%.1f$ fb$^{-1}$(%d TeV)'
    e_title = title_template % (measurement_config.new_luminosity / 1000.,
                                measurement_config.centre_of_mass_energy)
    met_type = 'patType1CorrectedPFMet'
    for variable in variables:
        variable_bins = variable_bins_ROOT[variable]
        histogram_template = get_histogram_template(variable)

        for fit_variable in electron_fit_variables:
            if '_bl' in fit_variable:
                b_tag_bin_ctl = '1orMoreBtag'
            else:
                b_tag_bin_ctl = '0orMoreBtag'
            save_path = 'plots/%dTeV/fit_variables/%s/%s/' % (
                measurement_config.centre_of_mass_energy, variable,
                fit_variable)
            make_folder_if_not_exists(save_path)
            make_folder_if_not_exists(save_path + 'qcd/')
            make_folder_if_not_exists(save_path + 'vjets/')
            inclusive_histograms = {}
            inclusive_fit_distribution = ''
            inclusive_qcd_distribution = ''
            for bin_range in variable_bins:
                params = {
                    'met_type': met_type,
                    'bin_range': bin_range,
                    'fit_variable': fit_variable,
                    'b_tag_bin': b_tag_bin,
                    'variable': variable
                }
                fit_variable_distribution = histogram_template % params
                qcd_fit_variable_distribution = fit_variable_distribution.replace(
                    'Ref selection', 'QCDConversions')
                qcd_fit_variable_distribution = qcd_fit_variable_distribution.replace(
                    b_tag_bin, b_tag_bin_ctl)
                histograms = get_histograms_from_files(
                    [fit_variable_distribution, qcd_fit_variable_distribution],
                    histogram_files)
                plot_fit_variable(histograms, fit_variable, variable,
                                  bin_range, fit_variable_distribution,
                                  qcd_fit_variable_distribution, e_title,
                                  save_path)
                # sum histograms for inclusive plots
                for sample, hist in histograms.iteritems():
                    inclusive_fit_distribution = fit_variable_distribution.replace(
                        bin_range, "inclusive")
                    inclusive_qcd_distribution = qcd_fit_variable_distribution.replace(
                        bin_range, "inclusive")
                    if not inclusive_histograms.has_key(sample):
                        inclusive_histograms[sample] = {}
                        inclusive_histograms[sample][
                            inclusive_fit_distribution] = hist[
                                fit_variable_distribution].clone()
                        inclusive_histograms[sample][
                            inclusive_qcd_distribution] = hist[
                                qcd_fit_variable_distribution].clone()
                    else:
                        inclusive_histograms[sample][
                            inclusive_fit_distribution] += hist[
                                fit_variable_distribution]
                        inclusive_histograms[sample][
                            inclusive_qcd_distribution] += hist[
                                qcd_fit_variable_distribution]

            plot_fit_variable(inclusive_histograms, fit_variable, variable,
                              'inclusive', inclusive_fit_distribution,
                              inclusive_qcd_distribution, e_title, save_path)

        compare_qcd_control_regions(variable, met_type, e_title)
        compare_vjets_btag_regions(variable, met_type, e_title)
        compare_vjets_templates(variable, met_type, e_title)
def plot_central_and_systematics(channel):
    global variable, translate_options, k_value, b_tag_bin, maximum, categories
    ROOT.TH1.SetDefaultSumw2(False)
    ROOT.gROOT.SetBatch(True)
    ROOT.gROOT.ProcessLine('gErrorIgnoreLevel = 1001;')
    plotting.setStyle()
    gStyle.SetTitleYOffset(1.4)
    ROOT.gROOT.ForceStyle()
    canvas = Canvas(width=700, height=500)
    canvas.SetLeftMargin(0.15)
    canvas.SetBottomMargin(0.15)
    canvas.SetTopMargin(0.05)
    canvas.SetRightMargin(0.05)
    legend = plotting.create_legend(x0=0.6, y1=0.5)

    hist_data_central = read_xsection_measurement_results(
        'central', channel)[0]['unfolded']

    hist_data_central.GetXaxis().SetTitle(translate_options[variable] +
                                          ' [GeV]')
    hist_data_central.GetYaxis().SetTitle('#frac{1}{#sigma} #frac{d#sigma}{d' +
                                          translate_options[variable] +
                                          '} [GeV^{-1}]')
    hist_data_central.GetXaxis().SetTitleSize(0.05)
    hist_data_central.GetYaxis().SetTitleSize(0.05)
    hist_data_central.SetMinimum(0)
    hist_data_central.SetMaximum(maximum[variable])
    hist_data_central.SetMarkerSize(1)
    hist_data_central.SetMarkerStyle(20)
    #    plotAsym = TGraphAsymmErrors(hist_data)
    #    plotStatErr = TGraphAsymmErrors(hist_data)
    gStyle.SetEndErrorSize(20)
    hist_data_central.Draw('P')
    #    plotStatErr.Draw('same P')
    #    plotAsym.Draw('same P Z')
    legend.AddEntry(hist_data_central, 'measured (unfolded)', 'P')

    for systematic in categories:
        if systematic != 'central':
            hist_data_systematic = read_xsection_measurement_results(
                systematic, channel)[0]['unfolded']
            hist_data_systematic.SetMarkerSize(0.5)
            hist_data_systematic.SetMarkerStyle(20)
            colour_number = categories.index(systematic) + 1
            if colour_number == 10:
                colour_number = 42
            hist_data_systematic.SetMarkerColor(colour_number)
            hist_data_systematic.Draw('same P')
            legend.AddEntry(hist_data_systematic, systematic, 'P')


#    for central_generator in ['MADGRAPH', 'POWHEG', 'MCATNLO']:
#        hist_MC = read_xsection_measurement_results('central', channel)[0][central_generator]
#        hist_MC.SetLineStyle(7)
#        hist_MC.SetLineWidth(2)
#        #setting colours
#        if central_generator == 'POWHEG':
#            hist_MC.SetLineColor(kBlue)
#        elif central_generator == 'MADGRAPH':
#            hist_MC.SetLineColor(kRed + 1)
#        elif central_generator == 'MCATNLO':
#            hist_MC.SetLineColor(kMagenta + 3)
#        hist_MC.Draw('hist same')
#legend.AddEntry(hist_MC, translate_options[central_generator], 'l')

    legend.Draw()

    mytext = TPaveText(0.5, 0.97, 1, 1.01, "NDC")
    channelLabel = TPaveText(0.18, 0.97, 0.5, 1.01, "NDC")
    if channel == 'electron':
        channelLabel.AddText(
            "e, %s, %s, k_v = %s" %
            ("#geq 4 jets", b_tag_bins_latex[b_tag_bin], k_value))
    elif channel == 'muon':
        channelLabel.AddText(
            "#mu, %s, %s, k_v = %s" %
            ("#geq 4 jets", b_tag_bins_latex[b_tag_bin], k_value))
    else:
        channelLabel.AddText(
            "combined, %s, %s, k_v = %s" %
            ("#geq 4 jets", b_tag_bins_latex[b_tag_bin], k_value))
    mytext.AddText("CMS Preliminary, L = %.1f fb^{-1} at #sqrt{s} = 8 TeV" %
                   (5.8))

    mytext.SetFillStyle(0)
    mytext.SetBorderSize(0)
    mytext.SetTextFont(42)
    mytext.SetTextAlign(13)

    channelLabel.SetFillStyle(0)
    channelLabel.SetBorderSize(0)
    channelLabel.SetTextFont(42)
    channelLabel.SetTextAlign(13)
    mytext.Draw()
    if not channel == 'combination':
        channelLabel.Draw()

    canvas.Modified()
    canvas.Update()

    path = save_path + '/' + variable
    make_folder_if_not_exists(path)
    canvas.SaveAs(path + '/normalised_xsection_' + channel + '_altogether_kv' +
                  str(k_value) + '.png')
    canvas.SaveAs(path + '/normalised_xsection_' + channel + '_altogether_kv' +
                  str(k_value) + '.pdf')
def compare_qcd_control_regions( variable = 'MET', met_type = 'patType1CorrectedPFMet', title = 'Untitled'):
    ''' Compares the templates from the control regions in different bins
     of the current variable'''
    global fit_variable_properties, b_tag_bin, save_as, b_tag_bin_ctl
    variable_bins = variable_bins_ROOT[variable]
    histogram_template = get_histogram_template( variable )
    
    for fit_variable in electron_fit_variables:
        all_hists = {}
        inclusive_hist = None
        if '_bl' in fit_variable:
                b_tag_bin_ctl = '1orMoreBtag'
        else:
            b_tag_bin_ctl = '0orMoreBtag'
        save_path = 'plots/fit_variables/%dTeV/%s/%s/' % (measurement_config.centre_of_mass_energy, variable, fit_variable)
        make_folder_if_not_exists(save_path + '/qcd/')
        
        max_bins = 3
        for bin_range in variable_bins[0:max_bins]:
            
            params = {'met_type': met_type, 'bin_range':bin_range, 'fit_variable':fit_variable, 'b_tag_bin':b_tag_bin, 'variable':variable}
            fit_variable_distribution = histogram_template % params
            qcd_fit_variable_distribution = fit_variable_distribution.replace( 'Ref selection', 'QCDConversions' )
            qcd_fit_variable_distribution = qcd_fit_variable_distribution.replace( b_tag_bin, b_tag_bin_ctl )
            # format: histograms['data'][qcd_fit_variable_distribution]
            histograms = get_histograms_from_files( [qcd_fit_variable_distribution], histogram_files )
            prepare_histograms( histograms, rebin = fit_variable_properties[fit_variable]['rebin'], scale_factor = measurement_config.luminosity_scale )

            histograms_for_cleaning = {'data':histograms['data'][qcd_fit_variable_distribution],
                               'V+Jets':histograms['V+Jets'][qcd_fit_variable_distribution],
                               'SingleTop':histograms['SingleTop'][qcd_fit_variable_distribution],
                               'TTJet':histograms['TTJet'][qcd_fit_variable_distribution]}
            qcd_from_data = clean_control_region( histograms_for_cleaning, subtract = ['TTJet', 'V+Jets', 'SingleTop'] )
            # clean
            all_hists[bin_range] = qcd_from_data
    
        # create the inclusive distributions
        inclusive_hist = deepcopy(all_hists[variable_bins[0]])
        for bin_range in variable_bins[1:max_bins]:
            inclusive_hist += all_hists[bin_range]
        for bin_range in variable_bins[0:max_bins]:
            if not all_hists[bin_range].Integral() == 0:
                all_hists[bin_range].Scale(1/all_hists[bin_range].Integral())
        # normalise all histograms
        inclusive_hist.Scale(1/inclusive_hist.Integral())
        # now compare inclusive to all bins
        histogram_properties = Histogram_properties()
        histogram_properties.x_axis_title = fit_variable_properties[fit_variable]['x-title']
        histogram_properties.y_axis_title = fit_variable_properties[fit_variable]['y-title']
        histogram_properties.y_axis_title = histogram_properties.y_axis_title.replace('Events', 'a.u.')
        histogram_properties.x_limits = [fit_variable_properties[fit_variable]['min'], fit_variable_properties[fit_variable]['max']]
#         histogram_properties.y_limits = [0, 0.5]
        histogram_properties.title = title + ', ' + b_tag_bins_latex[b_tag_bin_ctl]
        histogram_properties.name = variable + '_' + fit_variable + '_' + b_tag_bin_ctl + '_QCD_template_comparison'
        measurements = {bin_range + ' GeV': histogram for bin_range, histogram in all_hists.iteritems()}
        measurements = OrderedDict(sorted(measurements.items()))
        compare_measurements(models = {'inclusive' : inclusive_hist}, 
                             measurements = measurements, 
                             show_measurement_errors = True, 
                             histogram_properties = histogram_properties, 
                             save_folder = save_path + '/qcd/', 
                             save_as = save_as)
Example #35
0
	sys.exit()

#set up the config according to the centre of mass energy
config = XSectionConfig(options.centreOfMassEnergy)

#Get the luminosity for the centre of mass energy
luminosity = config.luminosities[options.centreOfMassEnergy]

#Get current working directory
current_working_directory = os.getcwd()

#Get folder to move files to
path_to_AN_folder = config.path_to_files

#move log files separately first, since there is no "logs" category in categories_and_prefixes
make_folder_if_not_exists(path_to_AN_folder + '/logs/')
command = 'mv ' + options.pathToBATOutputFiles + '/*' + str(options.centreOfMassEnergy) + 'TeV*.log ' + path_to_AN_folder + '/logs/'
if options.doNothing:
	print "command = ", command
	print "path to folder = ", path_to_AN_folder + '/logs/'
elif not options.doNothing:
	make_folder_if_not_exists( path_to_AN_folder + "/logs" )

	p = subprocess.Popen(command, shell=True)
	p.wait()

#Now move all other BAT output files.

for category in config.categories_and_prefixes.keys():
	make_folder_if_not_exists(path_to_AN_folder + "/" + category)
	command = 'mv ' + options.pathToBATOutputFiles + '/*' + str(luminosity) + 'pb*' + config.categories_and_prefixes[category] + '.root ' + path_to_AN_folder + "/" + category
def make_template_plots( histograms, category, channel ):
    global variable, output_folder, phase_space
    fit_variables = histograms.keys()

    variableBins = None
    if phase_space == 'VisiblePS':
        variableBins = variable_bins_visiblePS_ROOT
    elif phase_space == 'FullPS':
        variableBins = variable_bins_ROOT

    for variable_bin in variableBins[variable]:
        path = output_folder + str( measurement_config.centre_of_mass_energy ) + 'TeV/' + variable + '/' + category + '/fit_templates/'
        make_folder_if_not_exists( path )
        for fit_variable in fit_variables:
            plotname = path + channel + '_' + fit_variable + '_template_bin_' + variable_bin

            # check if template plots exist already
            for output_format in output_formats:
                if os.path.isfile( plotname + '.' + output_format ):
                    continue

            # plot with matplotlib
            h_ttjet = histograms[fit_variable][variable_bin]['TTJet']
            h_single_top = histograms[fit_variable][variable_bin]['SingleTop']
            h_VJets = histograms[fit_variable][variable_bin]['V+Jets']
            h_QCD = histograms[fit_variable][variable_bin]['QCD']

            h_ttjet.linecolor = 'red'
            h_single_top.linecolor = 'magenta'
            h_VJets.linecolor = 'green'
            h_QCD.linecolor = 'gray'
            h_VJets.linestyle = 'dashed'
            h_QCD.linestyle = 'dotted'  # currently not working
            # bug report: http://trac.sagemath.org/sage_trac/ticket/13834

            h_ttjet.linewidth = 5
            h_single_top.linewidth = 5
            h_VJets.linewidth = 5
            h_QCD.linewidth = 5

            plt.figure( figsize = ( 16, 16 ), dpi = 200, facecolor = 'white' )
            axes = plt.axes()
            if not variable in ['NJets']:
                axes.minorticks_on()

            plt.xlabel( fit_variables_latex[fit_variable], CMS.x_axis_title )
            plt.ylabel( 'normalised to unit area/(%s)' % get_unit_string(fit_variable), CMS.y_axis_title )
            plt.tick_params( **CMS.axis_label_major )
            if not variable in ['NJets']:
                plt.tick_params( **CMS.axis_label_minor )

            rplt.hist( h_ttjet, axes = axes, label = 'signal' )
            rplt.hist( h_single_top, axes = axes, label = 'Single Top' )

            if ( h_VJets.Integral() != 0 ):
                rplt.hist( h_VJets, axes = axes, label = 'V+Jets' )
            else:
                print("WARNING: in %s bin %s, %s category, %s channel, V+Jets template is empty: not plotting." % ( variable, variable_bin, category, channel ))
            if ( h_QCD.Integral() != 0 ):
                rplt.hist( h_QCD, axes = axes, label = 'QCD' )
            else:
                print("WARNING: in %s bin %s, %s category, %s channel, QCD template is empty: not plotting." % ( variable, variable_bin, category, channel ))
            y_max = get_best_max_y([h_ttjet, h_single_top, h_VJets, h_QCD])
            axes.set_ylim( [0, y_max * 1.1] )
            axes.set_xlim( measurement_config.fit_boundaries[fit_variable] )

            plt.legend( numpoints = 1, loc = 'upper right', prop = CMS.legend_properties )
            label, channel_label = get_cms_labels( channel )
            plt.title( label, CMS.title )
            # CMS text
            # note: fontweight/weight does not change anything as we use Latex text!!!
            plt.text(0.95, 0.95, r"\textbf{CMS}", transform=axes.transAxes, fontsize=42,
                     verticalalignment='top',horizontalalignment='right')
            # channel text
            axes.text(0.95, 0.95, r"\emph{%s}" %channel_label, transform=axes.transAxes, fontsize=40,
                      verticalalignment='top',horizontalalignment='right')

            plt.tight_layout()

            for output_format in output_formats:
                plt.savefig( plotname + '.' + output_format )

            plt.close()
            gc.collect()
Example #37
0
    parser.add_option( "-l", "--log-plots", dest = "log_plots", action = "store_true",
                      help = "plots the y axis in log scale" )

    ( options, args ) = parser.parse_args()
    measurement_config = XSectionConfig( options.CoM)
    
    centre_of_mass = measurement_config.centre_of_mass_energy
    luminosity = measurement_config.luminosity * measurement_config.luminosity_scale
    ttbar_xsection = measurement_config.ttbar_xsection
    load_fakes = options.load_fakes
    method = options.unfolding_method
    path_to_JSON = options.data_path
    plot_location = options.output_folder + '/' + str(centre_of_mass) + 'TeV/' + options.test + '/'
    met_type = measurement_config.translate_options[options.metType]
    log_plots = options.log_plots
    make_folder_if_not_exists( plot_location )

    test = options.test

    input_filename_central = measurement_config.unfolding_madgraph
    input_filename_bias = measurement_config.unfolding_mcatnlo

    variables = ['MET', 'WPT', 'MT', 'ST', 'HT']

    input_file = File( input_filename_central, 'read' )
    input_file_bias = File( input_filename_bias, 'read' )

    print 'Performing', test, 'unfolding checks at', centre_of_mass, 'TeV'
    
    for channel in ['electron', 'muon']:
        for variable in variables:
Example #38
0
import json
from config import XSectionConfig
from config.variable_binning import bin_edges, bin_edges_vis
from tools.file_utilities import make_folder_if_not_exists

com = 13
fitVars = "M3_angle_bl"

config = XSectionConfig( com )

make_folder_if_not_exists('config/unfolding/FullPS/')
make_folder_if_not_exists('config/unfolding/VisiblePS/')

for channel in config.analysis_types.keys():
	for variable in bin_edges.keys():

		histogramTemplate = "unfolding_%s_analyser_%s_channel" % ( variable, channel )
		outputJson = {
		    "output_folder": "plots/%sTeV/unfolding_tests/FullPS" % com, 
		    "output_format": ["png", "pdf"], 
		    "centre-of-mass energy" : com,
		    "channel": "%s" % channel,
		    "variable": "%s" % variable,
		    "phaseSpace" : "FullPS",
			"truth" : { 
				"file" : "%s" % config.unfolding_central,
				# "histogram": "%s/truth" % ( histogramTemplate ),
				},
			"gen_vs_reco" : { 
				"file" : "%s" % config.unfolding_central,
				# "histogram": "%s/response_without_fakes" % ( histogramTemplate ),
Example #39
0
def generate_toy(n_toy, n_input_mc, config, output_folder, start_at=0, split=1):
    from progressbar import Percentage, Bar, ProgressBar, ETA
    set_root_defaults()
    genWeight = '( EventWeight * {0})'.format(config.luminosity_scale)
    file_name = config.ttbar_category_templates_trees['central']
    make_folder_if_not_exists(output_folder)
    outfile = get_output_file_name(
        output_folder, n_toy, start_at, n_input_mc, config.centre_of_mass_energy)

    variable_bins = bin_edges.copy()
    
    widgets = ['Progress: ', Percentage(), ' ', Bar(),
           ' ', ETA()]
    
    with root_open(file_name, 'read') as f_in, root_open(outfile, 'recreate') as f_out:
        tree = f_in.Get("TTbar_plus_X_analysis/Unfolding/Unfolding")
        n_events = tree.GetEntries()
        print("Number of entries in tree : ", n_events)
        for channel in ['electron', 'muon']:
            print('Channel :', channel)
            gen_selection, gen_selection_vis = '', ''
            if channel is 'muon':
                gen_selection = '( isSemiLeptonicMuon == 1 )'
                gen_selection_vis = '( isSemiLeptonicMuon == 1 && passesGenEventSelection )'
            else:
                gen_selection = '( isSemiLeptonicElectron == 1 )'
                gen_selection_vis = '( isSemiLeptonicElectron == 1 && passesGenEventSelection )'

            selection = '( {0} ) * ( {1} )'.format(genWeight, gen_selection)
            selection_vis = '( {0} ) * ( {1} )'.format(genWeight,
                                                       gen_selection_vis)
            weighted_entries = get_weighted_entries(tree, selection)
            weighted_entries_vis = get_weighted_entries(tree, selection_vis)
            pbar = ProgressBar(widgets=widgets, maxval=n_input_mc).start()

            toy_mc_sets = []
            for variable in ['MET', 'HT', 'ST', 'WPT']:  # variable_bins:
                toy_mc = ToySet(f_out, variable, channel, n_toy)
                toy_mc_sets.append(toy_mc)
            count = 0
            for event in tree:
                # generate 300 weights for each event
                mc_weights = get_mc_weight(weighted_entries, n_toy)
                mc_weights_vis = get_mc_weight(weighted_entries_vis, n_toy)

                if count >= n_input_mc:
                    break
                count += 1
                if count < start_at:
                    continue
#                 weight = event.EventWeight * config.luminosity_scale
#                 # rescale to N input events
#                 weight *= n_events / n_input_mc / split
                weight = 1

                for toy_mc in toy_mc_sets:
                    toy_mc.fill(event, weight, mc_weights, mc_weights_vis)
                if count % 1000 == 1:
                    pbar.update(count)
                    print('Processed {0} events'.format(count))
            pbar.finish()
            for toy_mc in toy_mc_sets:
                toy_mc.write()
    print('Toy MC was saved to file:', outfile)
Example #40
0
def print_xsections_with_uncertainties(xsections, channel, toFile=True):
    global savePath, variable, k_value, met_type, b_tag_bin
    printout = '\n'
    printout += '=' * 60
    printout = '\n'
    printout += 'Results for %s variable, %s channel, k-value %s, met type %s, %s b-tag region\n' % (
        variable, channel, k_value, met_type, b_tag_bin)
    printout += '=' * 60
    printout += '\n'
    #    rows = {}
    printout += '%s bin & $\sigma_{meas}$ \\\\ \n' % variable
    printout += '\hline\n'
    uncertainties = {}
    header = 'Uncertainty'

    bins = variable_bins_ROOT[variable]
    assert (len(bins) == len(xsections['central']))

    for bin_i, variable_bin in enumerate(bins):
        header += '& %s bin %s' % (variable, variable_bin)
        centralresult = xsections['central'][bin_i]
        uncertainty = calculateTotalUncertainty(xsections, bin_i)
        uncertainty_total_plus = uncertainty['Total+'][0]
        uncertainty_total_minus = uncertainty['Total-'][0]
        uncertainty_total_plus, uncertainty_total_minus = symmetriseErrors(
            uncertainty_total_plus, uncertainty_total_minus)
        scale = 100
        central_measurement = centralresult[0]
        fit_error = centralresult[1]

        formatting = (variable_bins_latex[variable_bin],
                      central_measurement * scale, fit_error * scale,
                      uncertainty_total_plus * scale,
                      uncertainty_total_minus * scale)
        text = '%s & $%.2f \pm %.2f (fit)^{+%.2f}_{-%.2f} (sys) \cdot 10^{-2}$\\\\ \n' % formatting
        if doSymmetricErrors:
            relativeError = getRelativeError(
                central_measurement, fit_error + uncertainty_total_plus)
            formatting = (variable_bins_latex[variable_bin],
                          central_measurement * scale, fit_error * scale,
                          uncertainty_total_plus * scale)
            text = '%s & $\\left(%.2f \\pm %.2f \\text{ (fit)} \pm %.2f \\text{ (syst.)}\\right)' % formatting + '(%.2f' % (
                relativeError *
                100) + '\%) \\times 10^{-2}\, \\GeV^{-1}$\\\\ \n'
        printout += text
        for source in uncertainty.keys():
            unc_result = uncertainty[source]
            if not uncertainties.has_key(source):
                if source in metsystematics_sources:
                    uncertainties[
                        source] = metsystematics_sources_latex[source] + ' & '
                else:
                    uncertainties[source] = source + ' & '
            relativeError = getRelativeError(centralresult[0], unc_result[0])
            #            text = ' $(%.2f \pm %.2f) \cdot 10^{-2} $ ' % (unc_result[0]*scale,unc_result[1]*scale) + '(%.2f' % (relativeError * 100) + '\%) &'
            text = '%.2f' % (relativeError * 100) + '\% &'
            #            text = ' $%.2f \pm %.2f $ ' % (unc_result[0]*scale,unc_result[1]*scale) + '(%.2f' % (relativeError * 100) + '\%) &'
            uncertainties[source] += text

    printout += '\\\\ \n'
    for source in sorted(uncertainties.keys()):
        value = uncertainties[source]
        value = value.rstrip('&')
        value += '\\\\ \n'
        printout += value

    make_folder_if_not_exists(savePath + '/' + variable)
    if toFile:
        output_file = open(
            savePath + '/' + variable + '/normalised_xsection_main_result_' +
            channel + '_' + met_type + '_kv' + str(k_value) + '.tex', 'w')
        output_file.write(printout)
        output_file.close()
    else:
        print printout
def plot(regularisation_settings, results, use_current_k_values=False):
    variable = regularisation_settings.variable
    channel = regularisation_settings.channel
    com = regularisation_settings.centre_of_mass_energy
    output_folder = regularisation_settings.output_folder
    output_format = regularisation_settings.output_format
    measurement_config = XSectionConfig(com)

    name = 'reg_param_from_global_correlation_%s_channel_%s' % (channel,
                                                                variable)
    hp = Histogram_properties()
    hp.name = name
    hp.x_axis_title = r'log($\tau$)'
    hp.y_axis_title = r'$\bar{\rho}(\tau)$'
    hp.title = r'global correlation for $%s$, %s channel, $\sqrt{s} = %d$ TeV'
    hp.title = hp.title % (variables_latex[variable], channel, com)

    k_results, tau_results = results
    optimal_tau, minimal_rho, tau_values, rho_values = tau_results
    optimal_k, optimal_k_rho, k_values, k_tau_values, k_rho_values = k_results

    plt.figure(figsize=(16, 16), dpi=200, facecolor='white')
    plt.plot(tau_values, rho_values)
    plt.plot(k_tau_values, k_rho_values, 'ro')

    plt.title(hp.title, CMS.title)
    plt.xlabel(hp.x_axis_title, CMS.x_axis_title)
    plt.ylabel(hp.y_axis_title, CMS.y_axis_title)
    plt.tick_params(**CMS.axis_label_major)
    plt.tick_params(**CMS.axis_label_minor)

    ax = plt.axes()

    current_k, closest_tau, _, _ = get_k_tau_set(measurement_config, channel,
                                                 variable, results)
    current_k_rho = k_rho_values[k_values.index(current_k)]

    # first best k
    tau_index = k_values.index(optimal_k)
    closest_tau_best_k = k_tau_values[tau_index]
    ax.annotate(
        r"$\tau = %.2g$" % optimal_tau,
        xy=(optimal_tau, minimal_rho),
        xycoords='data',
        xytext=(optimal_tau * 0.9, minimal_rho * 1.15),
        textcoords='data',
        bbox=dict(boxstyle="round4", fc="w"),
        arrowprops=dict(
            arrowstyle="fancy,head_length=0.4,head_width=0.4,tail_width=0.4",
            connectionstyle="arc3"),
        size=40,
    )

    ax.annotate(
        r"$\tau(k_b = %d) = %.2g$" % (optimal_k, closest_tau_best_k),
        xy=(closest_tau_best_k, optimal_k_rho),
        xycoords='data',
        xytext=(closest_tau_best_k * 10, optimal_k_rho),
        textcoords='data',
        bbox=dict(boxstyle="round4", fc="w"),
        arrowprops=dict(arrowstyle="<-", connectionstyle="arc3", lw=3),
        size=40,
    )
    # then current k
    if use_current_k_values:
        ax.annotate(
            r"$\tau(k_c = %d) = %.2g$" % (current_k, closest_tau),
            xy=(closest_tau, current_k_rho),
            xycoords='data',
            xytext=(closest_tau, current_k_rho * 0.9),
            textcoords='data',
            bbox=dict(boxstyle="round4", fc="w"),
            arrowprops=dict(arrowstyle="<-", connectionstyle="arc3", lw=3),
            size=40,
        )

    plt.xscale('log')
    make_folder_if_not_exists(output_folder)
    for f in output_format:
        plt.savefig(output_folder + '/' + hp.name + '.' + f)
def make_template_plots(histograms, category, channel):
    global variable, translate_options, b_tag_bin, save_path
    ROOT.TH1.SetDefaultSumw2(False)
    ROOT.gROOT.SetBatch(True)
    ROOT.gROOT.ProcessLine('gErrorIgnoreLevel = 1001;')
    plotting.setStyle()
    gStyle.SetTitleYOffset(1.4)
    ROOT.gROOT.ForceStyle()

    for variable_bin in variable_bins_ROOT[variable]:
        path = save_path + '/' + variable + '/' + category + '/fit_templates/'
        make_folder_if_not_exists(path)
        plotname = path + channel + '_templates_bin_' + variable_bin + '.png'
        # check if template plots exist already
        if os.path.isfile(plotname):
            continue
        canvas = Canvas(width=700, height=500)
        canvas.SetLeftMargin(0.15)
        canvas.SetBottomMargin(0.15)
        canvas.SetTopMargin(0.05)
        canvas.SetRightMargin(0.05)
        legend = plotting.create_legend(x0=0.7, y1=0.8)
        h_signal = histograms[variable_bin]['signal']
        h_VJets = histograms[variable_bin]['V+Jets']
        h_QCD = histograms[variable_bin]['QCD']

        h_signal.GetXaxis().SetTitle('Lepton #eta')
        h_signal.GetYaxis().SetTitle('Normalised Events')
        h_signal.GetXaxis().SetTitleSize(0.05)
        h_signal.GetYaxis().SetTitleSize(0.05)
        h_signal.SetMinimum(0)
        h_signal.SetMaximum(0.2)
        h_signal.SetLineWidth(2)
        h_VJets.SetLineWidth(2)
        h_QCD.SetLineWidth(2)
        h_signal.SetLineColor(kRed + 1)
        h_VJets.SetLineColor(kBlue)
        h_QCD.SetLineColor(kYellow)
        h_signal.Draw('hist')
        h_VJets.Draw('hist same')
        h_QCD.Draw('hist same')
        legend.AddEntry(h_signal, 'signal', 'l')
        legend.AddEntry(h_VJets, 'V+Jets', 'l')
        legend.AddEntry(h_QCD, 'QCD', 'l')
        legend.Draw()

        mytext = TPaveText(0.5, 0.97, 1, 1.01, "NDC")
        channelLabel = TPaveText(0.18, 0.97, 0.5, 1.01, "NDC")
        if channel == 'electron':
            channelLabel.AddText("e, %s, %s" %
                                 ("#geq 4 jets", b_tag_bins_latex[b_tag_bin]))
        elif channel == 'muon':
            channelLabel.AddText("#mu, %s, %s" %
                                 ("#geq 4 jets", b_tag_bins_latex[b_tag_bin]))
        else:
            channelLabel.AddText("combined, %s, %s" %
                                 ("#geq 4 jets", b_tag_bins_latex[b_tag_bin]))
        mytext.AddText(
            "CMS Preliminary, L = %.1f fb^{-1} at #sqrt{s} = 8 TeV" % (5.8))

        mytext.SetFillStyle(0)
        mytext.SetBorderSize(0)
        mytext.SetTextFont(42)
        mytext.SetTextAlign(13)

        channelLabel.SetFillStyle(0)
        channelLabel.SetBorderSize(0)
        channelLabel.SetTextFont(42)
        channelLabel.SetTextAlign(13)
        mytext.Draw()
        channelLabel.Draw()

        canvas.Modified()
        canvas.Update()
        canvas.SaveAs(plotname)
        canvas.SaveAs(plotname.replace('png', 'pdf'))
def plot_fit_results(histograms, category, channel):
    global variable, translate_options, b_tag_bin, save_path
    #ROOT.TH1.SetDefaultSumw2(False)
    ROOT.gROOT.SetBatch(True)
    ROOT.gROOT.ProcessLine('gErrorIgnoreLevel = 1001;')
    plotting.setStyle()
    gStyle.SetTitleYOffset(1.4)
    ROOT.gROOT.ForceStyle()

    for variable_bin in variable_bins_ROOT[variable]:
        path = save_path + '/' + variable + '/' + category + '/fit_results/'
        make_folder_if_not_exists(path)
        plotname = path + channel + '_bin_' + variable_bin + '.png'
        # check if template plots exist already
        if os.path.isfile(plotname):
            continue
        canvas = Canvas(width=700, height=500)
        canvas.SetLeftMargin(0.15)
        canvas.SetBottomMargin(0.15)
        canvas.SetTopMargin(0.05)
        canvas.SetRightMargin(0.05)
        legend = plotting.create_legend(x0=0.7, y1=0.8)
        h_data = histograms[variable_bin]['data']
        h_signal = histograms[variable_bin]['signal']
        h_background = histograms[variable_bin]['background']

        h_data.GetXaxis().SetTitle('Lepton #eta')
        h_data.GetYaxis().SetTitle('Number of Events')
        h_data.GetXaxis().SetTitleSize(0.05)
        h_data.GetYaxis().SetTitleSize(0.05)
        h_data.SetMinimum(0)
        h_data.SetMarkerSize(1)
        h_data.SetMarkerStyle(20)
        gStyle.SetEndErrorSize(20)
        h_data.Draw('P')

        h_signal.SetFillColor(kRed + 1)
        h_background.SetFillColor(kGreen - 3)
        h_signal.SetLineWidth(2)
        h_background.SetLineWidth(2)
        h_signal.SetFillStyle(1001)
        h_background.SetFillStyle(1001)

        mcStack = THStack("MC", "MC")
        mcStack.Add(h_background)
        mcStack.Add(h_signal)

        mcStack.Draw('hist same')
        h_data.Draw('error P same')
        legend.AddEntry(h_data, 'data', 'P')
        legend.AddEntry(h_signal, 'signal', 'F')
        legend.AddEntry(h_background, 'background', 'F')
        legend.Draw()

        mytext = TPaveText(0.5, 0.97, 1, 1.01, "NDC")
        channelLabel = TPaveText(0.18, 0.97, 0.5, 1.01, "NDC")
        if channel == 'electron':
            channelLabel.AddText("e, %s, %s" %
                                 ("#geq 4 jets", b_tag_bins_latex[b_tag_bin]))
        elif channel == 'muon':
            channelLabel.AddText("#mu, %s, %s" %
                                 ("#geq 4 jets", b_tag_bins_latex[b_tag_bin]))
        else:
            channelLabel.AddText("combined, %s, %s" %
                                 ("#geq 4 jets", b_tag_bins_latex[b_tag_bin]))
        mytext.AddText(
            "CMS Preliminary, L = %.1f fb^{-1} at #sqrt{s} = 8 TeV" % (5.8))

        mytext.SetFillStyle(0)
        mytext.SetBorderSize(0)
        mytext.SetTextFont(42)
        mytext.SetTextAlign(13)

        channelLabel.SetFillStyle(0)
        channelLabel.SetBorderSize(0)
        channelLabel.SetTextFont(42)
        channelLabel.SetTextAlign(13)
        mytext.Draw()
        channelLabel.Draw()

        canvas.Modified()
        canvas.Update()
        canvas.SaveAs(plotname)
        canvas.SaveAs(plotname.replace('png', 'pdf'))
from tools.file_utilities import make_folder_if_not_exists, read_data_from_JSON

make_folder_if_not_exists('hepdata')
from dps.config.variable_binning import bin_edges_vis
from dps.config.latex_labels import variables_latex
from dps.config.xsection import XSectionConfig
from dps.utils.pandas_utilities import file_to_df, matrix_from_df
import os.path
import numpy as np

measurement_config = XSectionConfig(13)

regularisations = {
    'regularised':
    '/scratch/db0268/DPS/DPSTestingGround/DailyPythonScripts/data_X_allFixes/normalisation/background_subtraction/',
    'unregularised':
    '/scratch/db0268/DPS/DPSTestingGround/DailyPythonScripts/data_X_allFixes_allTau0/normalisation/background_subtraction/'
}

normalisations = ['normalised', 'absolute']

variableHeaders = {
    'MET':
    'name: "{variable}", units: GEV'.format(variable=variables_latex['MET']),
    'HT':
    'name: "{variable}", units: GEV'.format(variable=variables_latex['HT']),
    'ST':
    'name: "{variable}", units: GEV'.format(variable=variables_latex['ST']),
    'WPT':
    'name: "{variable}", units: GEV'.format(variable=variables_latex['WPT']),
    'NJets':
if options.jobNumber > (len(allJobs) - 1):
    print 'Job number', options.jobNumber, 'too large'
    print 'Total number of possible jobs :', len(allJobs)
    print 'Largest possible job number : ', len(allJobs) - 1
    sys.exit()

jobNumber = options.jobNumber
job = allJobs[jobNumber]
category = job[0]
sample = job[1]
input_samples = job[2]

# print 'Test with :',sample, category, input_samples

# Make folder
make_folder_if_not_exists(path_to_AN_folder + "/" + category)

current_working_directory = os.getcwd()  #find current working directory
output_file_hdfs = config.general_category_templates[category] % sample
output_file = output_file_hdfs.replace(
    "/hdfs/TopQuarkGroup/results/histogramfiles", current_working_directory)
input_files = [
    config.general_category_templates[category] % input_sample
    for input_sample in input_samples
]

if not os.path.exists(output_file):
    merge_ROOT_files(input_files,
                     output_file,
                     compression=7,
                     waitToFinish=True)
def plot_central_and_systematics( channel, systematics, exclude = [], suffix = 'altogether' ):
    global variable, b_tag_bin, met_type

    plt.figure( figsize = ( 16, 16 ), dpi = 200, facecolor = 'white' )
    axes = plt.axes()
    if not variable in ['NJets']:
        axes.minorticks_on()

    hist_data_central = read_xsection_measurement_results( 'central', channel )[0]['unfolded_with_systematics']
    hist_data_central.markersize = 2  # points. Imagine, tangible units!
    hist_data_central.marker = 'o'

    if variable in ['NJets', 'abs_lepton_eta', 'lepton_eta']:
        plt.xlabel( '$%s$' % variables_latex[variable], CMS.x_axis_title )
        plt.ylabel( r'$\frac{1}{\sigma}  \frac{d\sigma}{d' + variables_latex[variable] + '}$', CMS.y_axis_title )
    else:
        plt.xlabel( '$%s$ [GeV]' % variables_latex[variable], CMS.x_axis_title )
        plt.ylabel( r'$\frac{1}{\sigma}  \frac{d\sigma}{d' + variables_latex[variable] + '} \left[\mathrm{GeV}^{-1}\\right]$', CMS.y_axis_title )
    plt.tick_params( **CMS.axis_label_major )
    if not variable in ['NJets']:
        plt.tick_params( **CMS.axis_label_minor )

    rplt.errorbar( hist_data_central, axes = axes, label = 'data', xerr = True )

    for systematic in sorted( systematics ):
        if systematic in exclude or systematic == 'central':
            continue

        hist_data_systematic = read_xsection_measurement_results( systematic, channel )[0]['unfolded']
        hist_data_systematic.markersize = 2
        hist_data_systematic.marker = 'o'
        colour_number = systematics.index( systematic ) + 2
        if colour_number == 10:
            colour_number = 42
        hist_data_systematic.SetMarkerColor( colour_number )
        if 'PDF' in systematic:
            rplt.errorbar( hist_data_systematic, axes = axes, label = systematic.replace( 'Weights_', ' ' ), xerr = None )
        elif met_type in systematic:
            rplt.errorbar( hist_data_systematic, axes = axes, label = measurements_latex[systematic.replace( met_type, '' )], xerr = None )
        else:
            rplt.errorbar( hist_data_systematic, axes = axes, label = measurements_latex[systematic], xerr = None )

    plt.legend( numpoints = 1, loc = 'center right', prop = {'size':25}, ncol = 2 )
    label, channel_label = get_cms_labels( channel )
    plt.title( label, CMS.title )
    # CMS text
    # note: fontweight/weight does not change anything as we use Latex text!!!
    plt.text(0.95, 0.95, r"\textbf{CMS}", transform=axes.transAxes, fontsize=42,
        verticalalignment='top',horizontalalignment='right')
    # channel text
    axes.text(0.95, 0.90, r"\emph{%s}" %channel_label, transform=axes.transAxes, fontsize=40,
        verticalalignment='top',horizontalalignment='right')
    plt.tight_layout()


    path = output_folder + str( measurement_config.centre_of_mass_energy ) + 'TeV/' + variable
    make_folder_if_not_exists( path )
    for output_format in output_formats:
        filename = path + '/normalised_xsection_' + channel + '_' + suffix + '.' + output_format

        plt.savefig( filename )

    plt.close()
    gc.collect()
def main():
    parser = OptionParser(__doc__)
    parser.add_option(
        "-v",
        "--variable",
        dest="variable",
        default='MET',
        help="set the variable to analyse (MET, HT, ST, MT, WPT)")
    parser.add_option(
        "-s",
        "--centre-of-mass-energy",
        dest="CoM",
        default=8,
        help="set the centre of mass energy for analysis. Default = 8 [TeV]",
        type=int)
    parser.add_option("-o",
                      "--output",
                      dest="output_folder",
                      default='plots/unfolding_pulls',
                      help="output folder for unfolding pull plots")
    parser.add_option("-c",
                      "--channel",
                      type='string',
                      dest="channel",
                      default='combined',
                      help="channel to be analysed: electron|muon|combined")
    parser.add_option(
        "-k",
        "--k_value",
        type='int',
        dest="k_value",
        default=-1,
        help=
        "k-value used in SVD unfolding, only for categorisation purpose at this stage"
    )
    parser.add_option("--tau",
                      type='float',
                      dest="tau_value",
                      default=-1.,
                      help="tau-value for SVD unfolding")

    (options, args) = parser.parse_args()
    #     measurement_config = XSectionConfig(options.CoM)

    if len(args) == 0:
        print('No input files specified.')
        print('Run script with "-h" for usage')
        sys.exit(-1)
    files = args
    centre_of_mass = options.CoM
    variable = options.variable
    channel = options.channel
    config = XSectionConfig(centre_of_mass)
    k_value = get_k_value(options.k_value, config, channel, variable)
    tau_value = get_tau_value(options.tau_value, config, channel, variable)

    output_folder = '{option_output}/{centre_of_mass}TeV/{variable}/{channel}/'
    output_folder = output_folder.format(option_output=options.output_folder,
                                         centre_of_mass=centre_of_mass,
                                         variable=variable,
                                         channel=channel)
    make_folder_if_not_exists(output_folder)
    output_formats = ['pdf']

    bins = array('d', bin_edges[variable])
    nbins = len(bins) - 1

    msg = 'Producing unfolding pull plots for {0} variable, channel: {1}'
    print(msg.format(variable, channel))
    value = get_value_title(k_value, tau_value)
    print('Using {0}'.format(value))
    print('Output folder: {0}'.format(output_folder))

    pulls = get_data(files, subset='pull')

    fit_results = []
    for bin_i in range(0, nbins):
        fr = plot_pull(pulls,
                       centre_of_mass,
                       channel,
                       variable,
                       k_value,
                       tau_value,
                       output_folder,
                       output_formats,
                       bin_index=bin_i,
                       n_bins=nbins)
        fit_results.append(fr)

    plot_fit_results(fit_results, centre_of_mass, channel, variable, k_value,
                     tau_value, output_folder, output_formats, bins)
    # plot all bins
    plot_pull(pulls, centre_of_mass, channel, variable, k_value, tau_value,
              output_folder, output_formats)
    del pulls  # deleting to make space in memory

    difference = get_data(files, subset='difference')
    plot_difference(difference, centre_of_mass, channel, variable, k_value,
                    tau_value, output_folder, output_formats)
def make_error_plot( errorHists, bins ):
    global output_folder, variable
    # For each up/down source, reduce to one set of numbers
    symmetricErrorHists = {}
    for source, hist in errorHists.iteritems():
        if ( variable == 'HT' or variable == 'NJets' or variable == 'lepton_pt' or variable == 'abs_lepton_eta'  ) and source in measurement_config.met_systematics and not 'JES' in source and not 'JER' in source:
            continue

        if 'down' in source or '-' in source or 'lower' in source or 'Down' in source:
            # Find up version
            upHist = None
            newSource = ''
            if 'down' in source:
                upHist = errorHists[source.replace('down','up')]
                newSource = source.replace('down','')
            elif 'Down' in source:
                upHist = errorHists[source.replace('Down','Up')]
                newSource = source.replace('Down','')
            elif '-' in source:
                upHist = errorHists[source.replace('-','+')]
                newSource = source.replace('-','')
            elif 'lower' in source:
                upHist = errorHists[source.replace('lower','upper')]
                newSource = source.replace('lower','')

            if newSource[-1] == '_':
                newSource = newSource[:-1]
            # if '_' in newSource:
            #     newSource = newSource.replace('_','')

            symmetricErrorHists[newSource] = []
            for errorup, errordown in zip(hist, upHist):
                newError = max( abs(errorup), abs(errordown) )
                symmetricErrorHists[newSource].append(newError)
        elif 'TTJets_hadronisation' in source or 'QCD_shape' in source or 'TTJets_NLOgenerator' in source:        
            symmetricErrorHists[source] = [ abs(i) for i in hist ]

    x_limits = [bins[0], bins[-1]]
    y_limits = [0,0.6]
    plt.figure( figsize = ( 20, 16 ), dpi = 200, facecolor = 'white' )

    ax0 = plt.axes()
    ax0.minorticks_on()
    ax0.xaxis.labelpad = 12
    ax0.yaxis.labelpad = 12
    ax0.set_xlim( x_limits )
    plt.tick_params( **CMS.axis_label_major )
    plt.tick_params( **CMS.axis_label_minor )


    statisticalErrorHists = values_and_errors_to_hist( errorHists['statistical'], [], bins )
    for source, hist in symmetricErrorHists.iteritems():
        symmetricErrorHists[source] = values_and_errors_to_hist( hist, [], bins )

    colours = ['silver', 'r', 'tan', 'chartreuse', 'cadetblue', 'dodgerblue', 'pink', 'hotpink', 'coral', 'forestgreen', 'cyan', 'teal', 'crimson', 'darkmagenta', 'olive', 'slateblue', 'deepskyblue', 'orange', 'r' ]
    for source, colour in zip( symmetricErrorHists.keys(), colours):
        hist = symmetricErrorHists[source]
        hist.linewidth = 4
        hist.color = colour
        rplt.hist( hist, stacked=False, axes = ax0, cmap = my_cmap, vmin = 1, label = source )

    statisticalErrorHists.linewidth = 4
    statisticalErrorHists.color = 'black'
    statisticalErrorHists.linestyle = 'dashed'
    rplt.hist( statisticalErrorHists, stacked=False, axes = ax0, cmap = my_cmap, vmin = 1, label = 'stat.' )

    ax0.set_ylim( y_limits )
    leg = plt.legend(loc=1,prop={'size':40},ncol=2)
    leg.draw_frame(False)
    x_title = variables_NonLatex[variable]
    if variable in ['HT', 'MET', 'WPT', 'ST', 'lepton_pt']:
        x_title += ' [GeV]'
    plt.xlabel( x_title, CMS.x_axis_title )
    plt.ylabel( 'Relative Uncertainty', CMS.y_axis_title)
    plt.tight_layout()

    path = output_folder + '/'  + variable + '/'
    make_folder_if_not_exists(path)
    file_template = path + '/%s_systematics_%dTeV_%s.pdf' % (variable, measurement_config.centre_of_mass_energy, channel)
    plt.savefig(file_template)
    pass
Example #49
0
def main():
    parser = OptionParser(__doc__)
    parser.add_option(
        "-v",
        "--variable",
        dest="variable",
        default='MET',
        help="set the variable to analyse (MET, HT, ST, MT, WPT)")
    parser.add_option(
        "-s",
        "--centre-of-mass-energy",
        dest="CoM",
        default=8,
        help="set the centre of mass energy for analysis. Default = 8 [TeV]",
        type=int)
    parser.add_option("-o",
                      "--output",
                      dest="output_folder",
                      default='plots/unfolding_pulls',
                      help="output folder for unfolding pull plots")
    parser.add_option("-c",
                      "--channel",
                      type='string',
                      dest="channel",
                      default='combined',
                      help="channel to be analysed: electron|muon|combined")

    (options, args) = parser.parse_args()
    if len(args) == 0:
        print('No input files specified.')
        print('Run script with "-h" for usage')
        sys.exit(-1)
    files = args

    centre_of_mass = options.CoM
    variable = options.variable
    channel = options.channel
    output_folder_base = options.output_folder + '/' + \
        str(centre_of_mass) + 'TeV/' + variable + '/' + channel + '/'
    make_folder_if_not_exists(output_folder_base)
    output_formats = ['pdf']

    bins = array('d', bin_edges[variable])
    nbins = len(bins) - 1

    kValues = sorted(getkValueRange(files))

    sigmaForEachK = []
    tau = -1
    for k in kValues:
        if k is 1:
            continue

        output_folder = output_folder_base + '/kv' + str(k) + '/'
        make_folder_if_not_exists(output_folder)
        print(
            'Producing unfolding pull plots for {0} variable, k-value of {1}, channel: {2}.'
            .format(variable, k, channel))
        print('Output folder: {0}'.format(output_folder))
        pulls = get_data(files, subset='pull')

        maxSigma = 0
        minSigma = 100
        for bin_i in range(0, nbins):
            fitResults = plot_pull(pulls,
                                   centre_of_mass,
                                   channel,
                                   variable,
                                   k,
                                   tau,
                                   output_folder,
                                   output_formats,
                                   bin_index=bin_i,
                                   n_bins=nbins)
            if fitResults.sigma > maxSigma:
                maxSigma = fitResults.sigma
            if fitResults.sigma < minSigma:
                minSigma = fitResults.sigma

        # plot all bins
        allBinsFitResults = plot_pull(pulls, centre_of_mass, channel, variable,
                                      k, tau, output_folder, output_formats)

        allBinsSigma = allBinsFitResults.sigma
        sigmaForEachK.append([k, allBinsSigma, maxSigma, minSigma])
        print('All bins sigma :', allBinsFitResults.sigma)
        print('Max/min sigma :', maxSigma, minSigma)
        print('Spread :', maxSigma - minSigma)
        del pulls  # deleting to make space in memory
    print()

    kValues = list(zip(*sigmaForEachK)[0])
    kValuesup = []
    kValuesdown = []
    sigmas = list(zip(*sigmaForEachK)[1])
    sigmaups = list(zip(*sigmaForEachK)[2])
    sigmadowns = list(zip(*sigmaForEachK)[3])
    spread = []

    for i in range(0, len(sigmas)):
        spread.append((sigmaups[i] - sigmadowns[i]) / sigmas[i])
        sigmaups[i] = sigmaups[i] - sigmas[i]
        sigmadowns[i] = sigmas[i] - sigmadowns[i]
        kValuesup.append(0.5)
        kValuesdown.append(0.5)
    print(spread)
    kValueChoice = spread.index(min(spread))
    print(kValueChoice)

    graph = asrootpy(
        TGraphAsymmErrors(len(sigmas), array('d', kValues), array('d', sigmas),
                          array('d', kValuesdown), array('d', kValuesup),
                          array('d', sigmadowns), array('d', sigmaups)))
    graphSpread = asrootpy(
        TGraphAsymmErrors(len(sigmas), array('d', kValues), array('d', spread),
                          array('d', kValuesdown), array('d', kValuesup),
                          array('d', sigmadowns), array('d', sigmaups)))

    # plot with matplotlib
    plt.figure(figsize=(20, 16), dpi=200, facecolor='white')

    ax0 = plt.axes()
    ax0.minorticks_on()
    ax0.grid(True, 'major', linewidth=2)
    ax0.grid(True, 'minor')
    plt.tick_params(**CMS.axis_label_major)
    plt.tick_params(**CMS.axis_label_minor)

    ax0.xaxis.set_major_formatter(FormatStrFormatter('%d'))
    ax0.yaxis.set_major_formatter(FormatStrFormatter('%.1f'))
    ax0.xaxis.labelpad = 11

    rplt.errorbar(graph,
                  xerr=True,
                  emptybins=True,
                  axes=ax0,
                  marker='o',
                  ms=15,
                  mew=3,
                  lw=2)
    rplt.errorbar(graphSpread,
                  xerr=None,
                  yerr=False,
                  axes=ax0,
                  linestyle='-',
                  marker='s',
                  ms=10,
                  mew=1,
                  lw=2)

    for output_format in output_formats:
        print(output_folder_base)
        plt.savefig(output_folder_base + '/TESTGRAPH' + '.' + output_format)

    print(kValues)
    print(kValuesup)
    print(kValuesdown)
    print(sigmas)
    print(sigmaups)
    print(sigmadowns)
    print(sigmaForEachK)
def print_typical_systematics_table(central_values, errors, channel, toFile = True, print_before_unfolding = False):
    global output_folder, variable, met_type, b_tag_bin, all_measurements, phase_space, measurement_config
    bins = None
    if phase_space == 'VisiblePS':
        bins = variable_bins_visiblePS_ROOT[variable]
    elif phase_space == 'FullPS':
        bins = variable_bins_ROOT[variable]
    if print_before_unfolding:
        measurement = 'measured'
    else:
        measurement = 'unfolded'

    assert(len(bins) == len(errors['central']))
    assert(len(bins) == len(central_values[measurement]))
    typical_systematics = measurement_config.typical_systematics
    for s in typical_systematics:
        assert(errors.has_key(s))
    
    group_errors = {}
    for group in measurement_config.typical_systematics_summary:
        group_errors[group] = []

    for bin_i, _ in enumerate(bins):

        central_value = central_values[measurement][bin_i][0]
        uncertainties = {}
        # calculate all relative errors
        for systematic in typical_systematics:
            abs_error = errors[systematic][bin_i]
            relative_error = getRelativeError(central_value, abs_error)
            uncertainties[systematic] = relative_error
        # add errors in a group in quadrature
        for group, u_list in measurement_config.typical_systematics_summary.items():

            group_error_squared = 0
            for subgroup in u_list:
                # use the biggest of up and down
                subgroup_error = max(uncertainties[subgroup[0]], uncertainties[subgroup[1]])
                group_error_squared += pow(subgroup_error, 2)
            group_errors[group].append(math.sqrt(group_error_squared))

    summarised_typical_systematics = {}
    summarised_max_systematics = {}
    # calculate the median
    # x 100 to be in %
    for group, u_list in group_errors.items():
        summarised_typical_systematics[group] = median(u_list)*100
        summarised_max_systematics[group] = max(u_list) * 100

    for summary, errors in {'median':summarised_typical_systematics,'max':summarised_max_systematics}.iteritems():
        printout = '%% ' + '=' * 60
        printout += '\n'
        printout += '%% Typical systematics table for {0} channel, met type {1}, {2} b-tag region\n'.format(channel, met_type, b_tag_bin)
        if print_before_unfolding:
            printout += '%% BEFORE UNFOLDING\n'
        printout += '%% ' + '=' * 60
        printout += '\n'
        printout += '\\begin{table}[htbp]\n'
        printout += '\\centering\n'
        printout += '\\caption{Typical systematic uncertainties (median values) for the normalised \\ttbar cross section measurement \n'
        printout += 'at a centre-of-mass energy of {0} TeV '.format(measurement_config.centre_of_mass_energy)
        if channel == 'combined' or channel == 'combinedBeforeUnfolding':
            printout += '(combination of electron and muon channels).}\n'
        else:
            printout += '({0} channel).}\n'.format(channel)
        printout += '\\label{{tab:typical_systematics_{0}TeV_{1}}}\n'.format(measurement_config.centre_of_mass_energy, channel)
        printout += '\\resizebox{\\columnwidth}{!} {\n'
        printout += '\\begin{tabular}{l' + 'r'*len(bins) + '}\n'
        printout += '\\hline\n'

        header = 'Uncertainty source '
        header += '& {0}'.format(variables_latex[variable])

        header += ' '
        printout += header
        printout += '\n\\hline\n'
        for group, ts in errors.items():
            printout += group + ' (\\%) & {:.2f} \\\\ \n'.format(ts)
        printout += '\\hline \n'
        printout += '\\hline \n'
        printout += '\\end{tabular}\n'
        printout += '}\n'
        printout += '\\end{table}\n'

        if toFile:
            path = output_folder + '/'
            make_folder_if_not_exists(path)
            file_template = path + '/{0}_systematics_{1}TeV_{2}.tex'.format(summary,measurement_config.centre_of_mass_energy, channel)

            if print_before_unfolding:
                make_folder_if_not_exists(path + '/before_unfolding/')
                file_template = file_template.replace(path, path + '/before_unfolding/')
            if os.path.isfile(file_template): 
                with open(file_template, 'r+') as output_file:
                    lines = output_file.readlines()
                    for line_number, line in enumerate (lines):
                        if line.startswith("Uncertainty source"):
                            lines[line_number] = lines[line_number].strip() + "& " + variables_latex[variable] + "\n"
                        elif variable == "HT" and line.startswith("$E_{T}^{miss}$ uncertainties"):
                            lines[line_number] = lines[line_number].strip() + "& - \n"
                        else:
                            for group, ts in errors.items():
                                if line.startswith(group):
                                    new_line = line.replace('\\\\', '')
                                    new_line = new_line.strip()
                                    lines[line_number] = new_line + '& {:.2f} \\\\ \n'.format(ts)
                    output_file.seek(0)
                    for line in lines:
                        output_file.write(line)            
            else:
                output_file = open(file_template, 'w')
                output_file.write(printout)
            output_file.close()
        else:
            print printout
if __name__ == '__main__':
    set_root_defaults( msg_ignore_level = 3001 )
    parser = OptionParser()
    parser.add_option("-p", "--path", dest="path", default='/hdfs/TopQuarkGroup/trigger_BLT_ntuples/',
                  help="set path to input BLT ntuples")
    parser.add_option("-o", "--output_folder", dest="output_plots_folder", default='plots/2011/hadron_leg/',
                  help="set path to save tables")

    (options, args) = parser.parse_args()
    input_path = options.path
    output_folder = options.output_plots_folder
    output_pickle_folder = './pickle_files/'
    channel = 'electron'
    centre_of_mass = 7

    make_folder_if_not_exists(output_folder)
    make_folder_if_not_exists(output_pickle_folder)
    output_formats = ['pdf']

    data_histFile = input_path + '/2011/SingleElectron_2011_RunAB_had_leg.root'
    data_input_file = File(data_histFile)
    data_tree = data_input_file.Get('rootTupleTreeEPlusJets/ePlusJetsTree')

    reco_leptons_collection = 'selectedPatElectronsLoosePFlow'
    reco_jet_collection = 'cleanedJetsPFlowEPlusJets'
    trigger_object_lepton = 'TriggerObjectElectronIsoLeg'
    trigger_jet_collection = 'TriggerObjectHadronPFIsoLeg'

    print 'Number of events in data tree: ', data_tree.GetEntries()

    n_lepton_leg_events = 0
def main():
    '''
        Main function for this script
    '''
    set_root_defaults(msg_ignore_level=3001)

    parser = OptionParser()
    parser.add_option("-o",
                      "--output",
                      dest="output_folder",
                      default='data/pull_data/',
                      help="output folder for pull data files")
    parser.add_option("-n",
                      "--n_input_mc",
                      type=int,
                      dest="n_input_mc",
                      default=100,
                      help="number of toy MC used for the tests")
    parser.add_option("-k",
                      "--k_value",
                      type=int,
                      dest="k_value",
                      default=3,
                      help="k-value for SVD unfolding")
    parser.add_option("--tau",
                      type='float',
                      dest="tau_value",
                      default=-1.,
                      help="tau-value for SVD unfolding")
    parser.add_option("-m",
                      "--method",
                      type='string',
                      dest="method",
                      default='RooUnfoldSvd',
                      help="unfolding method")
    parser.add_option("-f",
                      "--file",
                      type='string',
                      dest="file",
                      default='data/toy_mc/unfolding_toy_mc.root',
                      help="file with toy MC")
    parser.add_option(
        "-v",
        "--variable",
        dest="variable",
        default='MET',
        help="set the variable to analyse (MET, HT, ST, MT, WPT)")
    parser.add_option("-s",
                      "--centre-of-mass-energy",
                      dest="CoM",
                      default=13,
                      help='''set the centre of mass energy for analysis.
                      Default = 8 [TeV]''',
                      type=int)
    parser.add_option("-c",
                      "--channel",
                      type='string',
                      dest="channel",
                      default='combined',
                      help="channel to be analysed: electron|muon|combined")

    parser.add_option("--offset_toy_mc",
                      type=int,
                      dest="offset_toy_mc",
                      default=0,
                      help="offset of the toy MC used to response matrix")
    parser.add_option("--offset_toy_data",
                      type=int,
                      dest="offset_toy_data",
                      default=0,
                      help="offset of the toy MC used as data for unfolding")
    (options, _) = parser.parse_args()

    centre_of_mass = options.CoM
    make_folder_if_not_exists(options.output_folder)

    # set the number of toy MC for error calculation
    k_value = options.k_value
    tau_value = options.tau_value
    use_n_toy = options.n_input_mc
    offset_toy_mc = options.offset_toy_mc
    offset_toy_data = options.offset_toy_data
    method = options.method
    variable = options.variable

    create_unfolding_pull_data(options.file, method, options.channel,
                               centre_of_mass, variable, use_n_toy, use_n_toy,
                               options.output_folder, offset_toy_mc,
                               offset_toy_data, k_value, tau_value)
def print_fit_results_table(initial_values, fit_results, channel, toFile = True):
    global output_folder, variable, met_type, phase_space
    bins = None
    bins_latex = None
    if phase_space == 'VisiblePS':
        bins = variable_bins_visiblePS_ROOT[variable]
        bins_latex = variable_bins_visiblePS_latex[variable]
    elif phase_space == 'FullPS':
        bins = variable_bins_ROOT[variable]
        bins_latex = variable_bins_latex[variable]

    printout = '%% ' + '=' * 60
    printout += '\n'
    printout += '%% Fit results for %s variable, %s channel, met type %s \n' % (variable, channel, met_type)
    printout += '%% ' + '=' * 60
    printout += '\n'

    printout += '\\begin{table}[htbp]\n'
    printout += '\\centering\n'
    printout += '\\caption{Fit results for the %s variable\n' % variables_latex[variable]
    printout += 'at a centre-of-mass energy of %d TeV (%s channel).}\n' % ( measurement_config.centre_of_mass_energy, channel )
    printout += '\\label{tab:%s_fit_results_%dTeV_%s}\n' % (variable, measurement_config.centre_of_mass_energy, channel)
    printout += '\\resizebox{\\columnwidth}{!} {\n'
    printout += '\\begin{tabular}{l' + 'r'*len(bins) + 'r}\n'
    printout += '\\hline\n'

    header = 'Process'
    template_in = '%s in'
    ttjet_in_line = template_in % samples_latex['TTJet'] 
    singletop_in_line = template_in % samples_latex['SingleTop'] 
    vjets_in_line = template_in % samples_latex['V+Jets'] 
    qcd_in_line = template_in % samples_latex['QCD'] 

    template_fit = '%s fit'
    ttjet_fit_line = template_fit % samples_latex['TTJet'] 
    singletop_fit_line = template_fit % samples_latex['SingleTop'] 
    vjets_fit_line = template_fit % samples_latex['V+Jets'] 
    qcd_fit_line = template_fit % samples_latex['QCD'] 

    sum_MC_in_line = 'Sum MC in'
    sum_MC_fit_line = 'Sum MC fit'
    sum_data_line = 'Data'

    N_initial_ttjet = 0
    N_initial_singletop = 0
    N_initial_vjets = 0
    N_initial_qcd = 0
    N_initial_sum_MC = 0
    N_initial_ttjet_error = 0
    N_initial_singletop_error = 0
    N_initial_vjets_error = 0
    N_initial_qcd_error = 0
    N_initial_sum_MC_error = 0
    N_data = 0
    N_data_error = 0

    N_fit_ttjet = 0
    N_fit_singletop = 0
    N_fit_vjets = 0
    N_fit_qcd = 0
    N_fit_sum_MC = 0
    N_fit_ttjet_error = 0
    N_fit_singletop_error = 0
    N_fit_vjets_error = 0
    N_fit_qcd_error = 0
    N_fit_sum_MC_error = 0

    for bin_i, variable_bin in enumerate(bins):
        header += ' & %s' % (bins_latex[variable_bin])
        ttjet_in_line += ' & %.1f $\pm$ %.1f' % (initial_values['TTJet'][bin_i][0], initial_values['TTJet'][bin_i][1])
        N_initial_ttjet += initial_values['TTJet'][bin_i][0]
        N_initial_ttjet_error += initial_values['TTJet'][bin_i][1]
        
        singletop_in_line += ' & %.1f $\pm$ %.1f' % (initial_values['SingleTop'][bin_i][0], initial_values['SingleTop'][bin_i][1])
        N_initial_singletop += initial_values['SingleTop'][bin_i][0]
        N_initial_singletop_error += initial_values['SingleTop'][bin_i][1]

        vjets_in_line += ' & %.1f $\pm$ %.1f' % (initial_values['V+Jets'][bin_i][0], initial_values['V+Jets'][bin_i][1])
        N_initial_vjets += initial_values['V+Jets'][bin_i][0]
        N_initial_vjets_error += initial_values['V+Jets'][bin_i][1]

        qcd_in_line += ' & %.1f $\pm$ %.1f' % (initial_values['QCD'][bin_i][0], initial_values['QCD'][bin_i][1])
        N_initial_qcd += initial_values['QCD'][bin_i][0]
        N_initial_qcd_error += initial_values['QCD'][bin_i][1]

        sumMCin = initial_values['TTJet'][bin_i][0] + initial_values['SingleTop'][bin_i][0] + initial_values['V+Jets'][bin_i][0] + initial_values['QCD'][bin_i][0]
        sumMCinerror = initial_values['TTJet'][bin_i][1] + initial_values['SingleTop'][bin_i][1] + initial_values['V+Jets'][bin_i][1] + initial_values['QCD'][bin_i][1]

        sum_MC_in_line += ' & %.1f $\pm$ %.1f' % (sumMCin, sumMCinerror)
        N_initial_sum_MC += sumMCin
        N_initial_sum_MC_error += sumMCinerror

        ttjet_fit_line += ' & %.1f $\pm$ %.1f' % (fit_results['TTJet'][bin_i][0], fit_results['TTJet'][bin_i][1])
        N_fit_ttjet += fit_results['TTJet'][bin_i][0]
        N_fit_ttjet_error += fit_results['TTJet'][bin_i][1]
        
        singletop_fit_line += ' & %.1f $\pm$ %.1f' % (fit_results['SingleTop'][bin_i][0], fit_results['SingleTop'][bin_i][1])
        N_fit_singletop += fit_results['SingleTop'][bin_i][0]
        N_fit_singletop_error += fit_results['SingleTop'][bin_i][1]

        vjets_fit_line += ' & %.1f $\pm$ %.1f' % (fit_results['V+Jets'][bin_i][0], fit_results['V+Jets'][bin_i][1])
        N_fit_vjets += fit_results['V+Jets'][bin_i][0]
        N_fit_vjets_error += fit_results['V+Jets'][bin_i][1]

        qcd_fit_line += ' & %.1f $\pm$ %.1f' % (fit_results['QCD'][bin_i][0], fit_results['QCD'][bin_i][1])
        N_fit_qcd += fit_results['QCD'][bin_i][0]
        N_fit_qcd_error += fit_results['QCD'][bin_i][1]
        
        sumMCfit = fit_results['TTJet'][bin_i][0] + fit_results['SingleTop'][bin_i][0] + fit_results['V+Jets'][bin_i][0] + fit_results['QCD'][bin_i][0]
        sumMCfiterror = fit_results['TTJet'][bin_i][1] + fit_results['SingleTop'][bin_i][1] + fit_results['V+Jets'][bin_i][1] + fit_results['QCD'][bin_i][1]

        sum_MC_fit_line += ' & %.1f $\pm$ %.1f' % (sumMCfit, sumMCfiterror)
        N_fit_sum_MC += sumMCfit
        N_fit_sum_MC_error += sumMCfiterror

        sum_data_line += ' & %.1f $\pm$ %.1f' % (initial_values['data'][bin_i][0], initial_values['data'][bin_i][1])
        N_data += initial_values['data'][bin_i][0]
        N_data_error += initial_values['data'][bin_i][1]

    header += '& Total \\\\'
    ttjet_in_line += ' & %.1f $\pm$ %.1f \\\\' % (N_initial_ttjet, N_initial_ttjet_error)
    singletop_in_line += ' & %.1f $\pm$ %.1f \\\\' % (N_initial_singletop, N_initial_singletop_error)
    vjets_in_line += ' & %.1f $\pm$ %.1f \\\\' % (N_initial_vjets, N_initial_vjets_error)
    qcd_in_line += ' & %.1f $\pm$ %.1f \\\\' % (N_initial_qcd, N_initial_qcd_error)
    sum_MC_in_line += '& %.1f $\pm$ %.1f \\\\' % (N_initial_sum_MC, N_initial_sum_MC_error)
    ttjet_fit_line += ' & %.1f $\pm$ %.1f \\\\' % (N_fit_ttjet, N_fit_ttjet_error)
    singletop_fit_line += ' & %.1f $\pm$ %.1f \\\\' % (N_fit_singletop, N_fit_singletop_error)
    vjets_fit_line += ' & %.1f $\pm$ %.1f \\\\' % (N_fit_vjets, N_fit_vjets_error)
    qcd_fit_line += ' & %.1f $\pm$ %.1f \\\\' % (N_fit_qcd, N_fit_qcd_error)
    sum_MC_fit_line += ' & %.1f $\pm$ %.1f \\\\' % (N_fit_sum_MC, N_fit_sum_MC_error)
    sum_data_line += ' & %.1f $\pm$ %.1f \\\\' % (N_data, N_data_error)

    printout += header
    printout += '\n\hline\n'
    printout += ttjet_in_line
    printout += '\n'
    printout += ttjet_fit_line
    printout += '\n\hline\n'
    printout += singletop_in_line
    printout += '\n'
    printout += singletop_fit_line
    printout += '\n\hline\n'
    printout += vjets_in_line
    printout += '\n'
    printout += vjets_fit_line
    printout += '\n\hline\n'
    printout += qcd_in_line
    printout += '\n'
    printout += qcd_fit_line
    printout += '\n\hline\n'
    printout += sum_MC_in_line
    printout += '\n'
    printout += sum_MC_fit_line
    printout += '\n\hline\n'
    printout += sum_data_line
    printout += '\n\hline\n'
    printout += '\\end{tabular}\n'
    printout += '}\n'
    printout += '\\end{table}\n'

    if toFile:
        path = output_folder + '/'  + variable
        make_folder_if_not_exists(path)
        file_template = path + '/%s_fit_results_table_%dTeV_%s.tex' % (variable, measurement_config.centre_of_mass_energy, channel)
        output_file = open(file_template, 'w')
        output_file.write(printout)
        output_file.close()
    else:
        print printout
def make_plots( histograms, category, output_folder, histname, show_ratio = True, show_before_unfolding = False ):
    global variable, phase_space

    channel = 'electron'
    if 'electron' in histname:
        channel = 'electron'
    elif 'muon' in histname:
        channel = 'muon'
    else:
        channel = 'combined'

    # plot with matplotlib
    hist_data = histograms['unfolded']
    if category == 'central':
        hist_data_with_systematics = histograms['unfolded_with_systematics']
    hist_measured = histograms['measured']

    hist_data.markersize = 2
    hist_data.marker = 'o'

    if category == 'central':
        hist_data_with_systematics.markersize = 2
        hist_data_with_systematics.marker = 'o'

    hist_measured.markersize = 2
    hist_measured.marker = 'o'
    hist_measured.color = 'red'

    plt.figure( figsize = CMS.figsize, dpi = CMS.dpi, facecolor = CMS.facecolor )
    if show_ratio:
        gs = gridspec.GridSpec( 2, 1, height_ratios = [5, 1] )
        axes = plt.subplot( gs[0] )
    else:
        axes = plt.axes()
        if variable in ['NJets', 'abs_lepton_eta', 'lepton_eta']:
            plt.xlabel( '$%s$' % variables_latex[variable], CMS.x_axis_title )
        else:
            plt.xlabel( '$%s$ [GeV]' % variables_latex[variable], CMS.x_axis_title )
    if not variable in ['NJets']:
        axes.minorticks_on()
    if variable in ['NJets', 'abs_lepton_eta', 'lepton_eta']:
        plt.ylabel( r'$\frac{1}{\sigma}  \frac{d\sigma}{d' + variables_latex[variable] + '}$', CMS.y_axis_title )
    else:
        plt.ylabel( r'$\frac{1}{\sigma}  \frac{d\sigma}{d' + variables_latex[variable] + '} \left[\mathrm{GeV}^{-1}\\right]$', CMS.y_axis_title )
    plt.tick_params( **CMS.axis_label_major )
    if not variable in ['NJets']:
        plt.tick_params( **CMS.axis_label_minor )

    hist_data.visible = True
    if category == 'central':
        hist_data_with_systematics.visible = True
        rplt.errorbar( hist_data_with_systematics, axes = axes, label = 'do_not_show', xerr = None, capsize = 0, elinewidth = 2, zorder = len( histograms ) + 1 )
    rplt.errorbar( hist_data, axes = axes, label = 'do_not_show', xerr = None, capsize = 15, capthick = 3, elinewidth = 2, zorder = len( histograms ) + 2 )
    rplt.errorbar( hist_data, axes = axes, label = 'data', xerr = None, yerr = False, zorder = len( histograms ) + 3 )  # this makes a nicer legend entry

    if show_before_unfolding:
        rplt.errorbar( hist_measured, axes = axes, label = 'data (before unfolding)', xerr = None, zorder = len( histograms ) )

    dashes = {}
    for key, hist in sorted( histograms.items() ):
        zorder = sorted( histograms, reverse = False ).index( key )
        if key == 'powhegPythia8' and zorder != len(histograms) - 3:
            zorder = len(histograms) - 3
        elif key != 'powhegPythia8' and not 'unfolded' in key:
            while zorder >= len(histograms) - 3:
                zorder = zorder - 1 

        if not 'unfolded' in key and not 'measured' in key:
            hist.linewidth = 4
            # setting colours
            linestyle = None
            if 'amcatnlo_HERWIG' in key or 'massdown' in key:
                hist.SetLineColor( kBlue )
                dashes[key] = [25,5,5,5,5,5,5,5]
            elif 'madgraphMLM' in key or 'scaledown' in key:
                hist.SetLineColor( 417 )
                dashes[key] = [5,5]
            elif 'MADGRAPH_ptreweight' in key:
                hist.SetLineColor( kBlack )
            elif 'powhegPythia8' in key:
                linestyle = 'solid'
                dashes[key] = None
                hist.SetLineColor( 633 )
            elif 'massup' in key or 'amcatnlo' in key:
                hist.SetLineColor( 807 )
                dashes[key] = [20,5]
            elif 'MCATNLO' in key or 'scaleup' in key:
                hist.SetLineColor( 619 )
                dashes[key] = [5,5,10,5]

            if linestyle != None:
                hist.linestyle = linestyle
            line, h = rplt.hist( hist, axes = axes, label = measurements_latex[key], zorder = zorder )

            if dashes[key] != None:
                line.set_dashes(dashes[key])
                h.set_dashes(dashes[key])

    handles, labels = axes.get_legend_handles_labels()
    # making data first in the list
    data_label_index = labels.index( 'data' )
    data_handle = handles[data_label_index]
    labels.remove( 'data' )
    handles.remove( data_handle )
    labels.insert( 0, 'data' )
    handles.insert( 0, data_handle )

    new_handles, new_labels = [], []
    zipped = dict( zip( labels, handles ) )
    labelOrder = ['data', 
        measurements_latex['powhegPythia8'],
        measurements_latex['amcatnlo'],
        measurements_latex['amcatnlo_HERWIG'],
        measurements_latex['madgraphMLM'],
        measurements_latex['scaleup'], 
        measurements_latex['scaledown'],
        measurements_latex['massup'],
        measurements_latex['massdown']
    ]
    for label in labelOrder:
        if label in labels:
            new_handles.append(zipped[label])
            new_labels.append(label)

    legend_location = (0.97, 0.82)
    if variable == 'MT':
        legend_location = (0.05, 0.82)
    elif variable == 'ST':
        legend_location = (0.97, 0.82)
    elif variable == 'WPT':
        legend_location = (1.0, 0.84)
    elif variable == 'abs_lepton_eta':
        legend_location = (1.0, 0.94)
    plt.legend( new_handles, new_labels, numpoints = 1, prop = CMS.legend_properties, frameon = False, bbox_to_anchor=legend_location,
                bbox_transform=plt.gcf().transFigure )
    label, channel_label = get_cms_labels( channel )
    # title
    plt.title( label,loc='right', **CMS.title )
    # CMS text
    # note: fontweight/weight does not change anything as we use Latex text!!!
    logo_location = (0.05, 0.98)
    prelim_location = (0.05, 0.92)
    channel_location = ( 0.05, 0.86)
    if variable == 'WPT':
        logo_location = (0.03, 0.98)
        prelim_location = (0.03, 0.92)
        channel_location = (0.03, 0.86)
    elif variable == 'abs_lepton_eta':
        logo_location = (0.03, 0.98)
        prelim_location = (0.03, 0.92)
        channel_location = (0.03, 0.86)
    plt.text(logo_location[0], logo_location[1], r"\textbf{CMS}", transform=axes.transAxes, fontsize=42,
        verticalalignment='top',horizontalalignment='left')
    # preliminary
    plt.text(prelim_location[0], prelim_location[1], r"\emph{Preliminary}",
                 transform=axes.transAxes, fontsize=42,
                 verticalalignment='top',horizontalalignment='left')
    # channel text
    plt.text(channel_location[0], channel_location[1], r"\emph{%s}" %channel_label, transform=axes.transAxes, fontsize=40,
        verticalalignment='top',horizontalalignment='left')
    ylim = axes.get_ylim()
    if ylim[0] < 0:
        axes.set_ylim( ymin = 0.)
    if variable == 'WPT':
        axes.set_ylim(ymax = ylim[1]*1.3)
    elif variable == 'abs_lepton_eta':
        axes.set_ylim(ymax = ylim[1]*1.3)
    else :
        axes.set_ylim(ymax = ylim[1]*1.2)


    if show_ratio:
        plt.setp( axes.get_xticklabels(), visible = False )
        ax1 = plt.subplot( gs[1] )
        if not variable in ['NJets']:
            ax1.minorticks_on()
        #ax1.grid( True, 'major', linewidth = 1 )
        # setting the x_limits identical to the main plot
        x_limits = axes.get_xlim()
        ax1.set_xlim(x_limits)
        ax1.yaxis.set_major_locator( MultipleLocator( 0.5 ) )
        if not variable in ['NJets']:
            ax1.yaxis.set_minor_locator( MultipleLocator( 0.1 ) )

        if variable in ['NJets', 'abs_lepton_eta', 'lepton_eta']:
            plt.xlabel('$%s$' % variables_latex[variable], CMS.x_axis_title )
        else:
            plt.xlabel( '$%s$ [GeV]' % variables_latex[variable], CMS.x_axis_title )

        plt.tick_params( **CMS.axis_label_major )
        if not variable in ['NJets']:
            plt.tick_params( **CMS.axis_label_minor )
        plt.ylabel( '$\\frac{\\textrm{pred.}}{\\textrm{data}}$', CMS.y_axis_title )
        ax1.yaxis.set_label_coords(-0.115, 0.8)
        #draw a horizontal line at y=1 for data
        plt.axhline(y = 1, color = 'black', linewidth = 2)

        for key, hist in sorted( histograms.iteritems() ):
            if not 'unfolded' in key and not 'measured' in key:
                ratio = hist.Clone()
                ratio.Divide( hist_data ) #divide by data
                line, h = rplt.hist( ratio, axes = ax1, label = 'do_not_show' )
                if dashes[key] != None:
                    h.set_dashes(dashes[key])

        stat_lower = hist_data.Clone()
        stat_upper = hist_data.Clone()
        syst_lower = hist_data.Clone()
        syst_upper = hist_data.Clone()

        # plot error bands on data in the ratio plot
        stat_errors = graph_to_value_errors_tuplelist(hist_data)
        if category == 'central':
            syst_errors = graph_to_value_errors_tuplelist(hist_data_with_systematics)
        for bin_i in range( 1, hist_data.GetNbinsX() + 1 ):
            stat_value, stat_error, _ = stat_errors[bin_i-1]
            stat_rel_error = stat_error/stat_value
            stat_lower.SetBinContent( bin_i, 1 - stat_rel_error )
            stat_upper.SetBinContent( bin_i, 1 + stat_rel_error )
            if category == 'central':
                syst_value, syst_error_down, syst_error_up  = syst_errors[bin_i-1]
                syst_rel_error_down = syst_error_down/syst_value
                syst_rel_error_up = syst_error_up/syst_value
                syst_lower.SetBinContent( bin_i, 1 - syst_rel_error_down )
                syst_upper.SetBinContent( bin_i, 1 + syst_rel_error_up )
        if category == 'central':
            rplt.fill_between( syst_lower, syst_upper, ax1,
                               color = 'yellow' )

        rplt.fill_between( stat_upper, stat_lower, ax1, color = '0.75',
                            )

        loc = 'upper left'
        # if variable in ['ST']:
        #     loc = 'upper right'
        # legend for ratio plot
        p_stat = mpatches.Patch(facecolor='0.75', label='Stat.', edgecolor='black' )
        p_stat_and_syst = mpatches.Patch(facecolor='yellow', label=r'Stat. $\oplus$ Syst.', edgecolor='black' )
        l1 = ax1.legend(handles = [p_stat, p_stat_and_syst], loc = loc,
                     frameon = False, prop = {'size':26}, ncol = 2)

        # ax1.legend(handles = [p_stat_and_syst], loc = 'lower left',
        #              frameon = False, prop = {'size':30})
        ax1.add_artist(l1)

        if variable == 'MET':
            ax1.set_ylim( ymin = 0.8, ymax = 1.2 )
            ax1.yaxis.set_major_locator( MultipleLocator( 0.5 ) )
#             ax1.yaxis.set_minor_locator( MultipleLocator( 0.1 ) )
        if variable == 'MT':
            ax1.set_ylim( ymin = 0.8, ymax = 1.2 )
            ax1.yaxis.set_major_locator( MultipleLocator( 0.2 ) )
            ax1.yaxis.set_minor_locator( MultipleLocator( 0.1 ) )
        elif variable == 'HT':
            ax1.set_ylim( ymin = 0.8, ymax = 1.37 )
            ax1.yaxis.set_major_locator( MultipleLocator( 0.2 ) )
            ax1.yaxis.set_minor_locator( MultipleLocator( 0.1 ) )
        elif variable == 'ST':
            ax1.set_ylim( ymin = 0.7, ymax = 1.5 )
            ax1.yaxis.set_major_locator( MultipleLocator( 0.5 ) )
            ax1.yaxis.set_minor_locator( MultipleLocator( 0.1 ) )
        elif variable == 'WPT':
            ax1.set_ylim( ymin = 0.8, ymax = 1.2 )
            ax1.yaxis.set_major_locator( MultipleLocator( 0.5 ) )
            ax1.yaxis.set_minor_locator( MultipleLocator( 0.1 ) )
        elif variable == 'NJets':
            ax1.set_ylim( ymin = 0.7, ymax = 1.5 )
        elif variable == 'abs_lepton_eta':
            ax1.set_ylim( ymin = 0.8, ymax = 1.2 )
            ax1.yaxis.set_major_locator( MultipleLocator( 0.2 ) )
            ax1.yaxis.set_minor_locator( MultipleLocator( 0.1 ) )
        elif variable == 'lepton_pt':
            ax1.set_ylim( ymin = 0.8, ymax = 1.3 )
            ax1.yaxis.set_major_locator( MultipleLocator( 0.2 ) )
            ax1.yaxis.set_minor_locator( MultipleLocator( 0.1 ) )

    if CMS.tight_layout:
        plt.tight_layout()

    path = '{output_folder}/{centre_of_mass_energy}TeV/{phaseSpace}/{variable}/'
    path = path.format(
            output_folder = output_folder,
            centre_of_mass_energy = measurement_config.centre_of_mass_energy,
            phaseSpace = phase_space,
            variable = variable
            )
    make_folder_if_not_exists( path )
    for output_format in output_formats:
        filename = path + '/' + histname + '.' + output_format
        plt.savefig( filename )

    del hist_data, hist_measured
    plt.close()
    gc.collect()
def print_error_table(central_values, errors, channel, toFile = True, print_before_unfolding = False):
    global output_folder, variable, met_type, b_tag_bin, all_measurements, phase_space
    bins = None
    bins_latex = None
    binEdges = None
    variable_latex = variables_latex[variable]
    if phase_space == 'VisiblePS':
        bins = variable_bins_visiblePS_ROOT[variable]
        bins_latex = variable_bins_visiblePS_latex[variable]
        binEdges = bin_edges_vis[variable]
    elif phase_space == 'FullPS':
        bins = variable_bins_ROOT[variable]
        bins_latex = variable_bins_latex[variable]
        binEdges = bin_edges_full[variable]
    printout = '%% ' + '=' * 60
    printout += '\n'
    printout += '%% Systematics table for %s variable, %s channel, met type %s, %s b-tag region\n' % (variable, channel, met_type, b_tag_bin)
    if print_before_unfolding:
        printout += '%% BEFORE UNFOLDING\n'
    printout += '%% ' + '=' * 60
    printout += '\n'

    printout += '\\begin{table}[htbp]\n'
    printout += '\\centering\n'
    printout += '\\caption{Systematic uncertainties for the normalised \\ttbar cross section measurement with respect to %s variable\n' % variable_latex
    printout += 'at a centre-of-mass energy of %d TeV ' % measurement_config.centre_of_mass_energy
    if channel == 'combined' or channel == "combinedBeforeUnfolding":
        printout += '(combination of electron and muon channels).}\n'
    else:
        printout += '(%s channel).}\n' % channel
    printout += '\\label{tab:%s_systematics_%dTeV_%s}\n' % (variable, measurement_config.centre_of_mass_energy, channel)
    if variable == 'MT':
        printout += '\\resizebox*{!}{\\textheight} {\n'
    else:
        printout += '\\resizebox{\\columnwidth}{!} {\n'
    printout += '\\begin{tabular}{l' + 'r'*len(bins) + '}\n'
    printout += '\\hline\n'

    header = 'Uncertainty source '
    rows = {}

    assert(len(bins) == len(errors['central']))
    if print_before_unfolding:
        assert(len(bins) == len(central_values['measured']))
    else:
        assert(len(bins) == len(central_values['unfolded']))
    
    errorHists = {}
    errorHists['statistical'] = []
    for source in all_measurements:
        errorHists[source] = []

    for bin_i, variable_bin in enumerate(bins):
        header += '& %s' % (bins_latex[variable_bin])
        if print_before_unfolding:
            central_value = central_values['measured'][bin_i][0]
        else:
            central_value = central_values['unfolded'][bin_i][0]

        for source in all_measurements:
            if ( variable == 'HT' or variable == 'NJets' or variable == 'lepton_pt' or variable == 'abs_lepton_eta'  ) and source in measurement_config.met_systematics and not 'JES' in source and not 'JER' in source:
                continue

            abs_error = errors[source][bin_i]
            relative_error = getRelativeError(central_value, abs_error)

            errorHists[source].append(relative_error)

            text = '%.2f' % (relative_error*100)
            if rows.has_key(source):
                rows[source].append(text)
            elif met_type in source:
                rows[source] = [measurements_latex[source.replace(met_type, '')] + ' (\%)', text]
            else:
                if source in met_systematics_latex.keys():
                    rows[source] = [met_systematics_latex[source] + ' (\%)', text]
                else:
                    rows[source] = [measurements_latex[source] + ' (\%)', text]
    header += ' \\\\'
    printout += header
    printout += '\n\\hline\n'

    for source in sorted(rows.keys()):
        if source == 'central':
            continue
        for item in rows[source]:
            printout += item + ' & '
        printout = printout.rstrip('& ')
        printout += ' \\\\ \n'

    #append the total statistical error to the table
    printout += '\\hline \n'
    total_line = 'Total Stat. (\%)'
    for bin_i, variable_bin in enumerate(bins):
        if print_before_unfolding:
            value, error = central_values['measured'][bin_i]
        else:
            value, error = central_values['unfolded'][bin_i]
        relativeError = getRelativeError(value, error)
        errorHists['statistical'].append(relativeError)
        total_line += ' & %.2f ' % (relativeError * 100)
    printout += total_line + '\\\\ \n'

    if not print_before_unfolding:
        make_error_plot( errorHists, binEdges )

    #append the total systematic error to the table
    total_line = 'Total Sys. (\%)'
    for bin_i, variable_bin in enumerate(bins):
        if print_before_unfolding:
            value, error_up, error_down = central_values['measured_with_systematics_only'][bin_i]
        else:
            value, error_up, error_down = central_values['unfolded_with_systematics_only'][bin_i]
        error = max(error_up, error_down)
        relativeError = getRelativeError(value, error)
        total_line += ' & %.2f ' % (relativeError * 100)
    printout += total_line + '\\\\ \n'

    #append the total error to the table
    printout += '\\hline \n'
    total_line = 'Total (\%)'
    for bin_i, variable_bin in enumerate(bins):
        if print_before_unfolding:
            value, error_up, error_down = central_values['measured_with_systematics'][bin_i]
        else:
            value, error_up, error_down = central_values['unfolded_with_systematics'][bin_i]
        error = max(error_up, error_down)
        relativeError = getRelativeError(value, error)
        total_line += ' & %.2f ' % (relativeError * 100)
    printout += total_line + '\\\\ \n'
    printout += '\\hline \n'
    printout += '\\end{tabular}\n'
    printout += '}\n'
    printout += '\\end{table}\n'
    
    if toFile:
        path = output_folder + '/'  + variable + '/'
        make_folder_if_not_exists(path)
        file_template = path + '/%s_systematics_%dTeV_%s.tex' % (variable, measurement_config.centre_of_mass_energy, channel)

        if print_before_unfolding:
            make_folder_if_not_exists(path + '/before_unfolding/')
            file_template = file_template.replace(path, path + '/before_unfolding/')
        output_file = open(file_template, 'w')
        output_file.write(printout)
        output_file.close()
    else:
        print printout
def compare_qcd_control_regions(variable='MET',
                                met_type='patType1CorrectedPFMet',
                                title='Untitled',
                                channel='electron'):
    ''' Compares the templates from the control regions in different bins
     of the current variable'''
    global fit_variable_properties, b_tag_bin, save_as, b_tag_bin_ctl
    variable_bins = variable_bins_ROOT[variable]
    histogram_template = get_histogram_template(variable)

    for fit_variable in electron_fit_variables:
        all_hists = {}
        inclusive_hist = None
        if '_bl' in fit_variable:
            b_tag_bin_ctl = '1orMoreBtag'
        else:
            b_tag_bin_ctl = '0orMoreBtag'
        save_path = 'plots/%dTeV/fit_variables/%s/%s/' % (
            measurement_config.centre_of_mass_energy, variable, fit_variable)
        make_folder_if_not_exists(save_path + '/qcd/')

        max_bins = 3
        for bin_range in variable_bins[0:max_bins]:

            params = {
                'met_type': met_type,
                'bin_range': bin_range,
                'fit_variable': fit_variable,
                'b_tag_bin': b_tag_bin,
                'variable': variable
            }
            fit_variable_distribution = histogram_template % params
            qcd_fit_variable_distribution = fit_variable_distribution.replace(
                'Ref selection', 'QCDConversions')
            qcd_fit_variable_distribution = qcd_fit_variable_distribution.replace(
                b_tag_bin, b_tag_bin_ctl)
            # format: histograms['data'][qcd_fit_variable_distribution]
            histograms = get_histograms_from_files(
                [qcd_fit_variable_distribution], histogram_files)
            prepare_histograms(
                histograms,
                rebin=fit_variable_properties[fit_variable]['rebin'],
                scale_factor=measurement_config.luminosity_scale)

            histograms_for_cleaning = {
                'data': histograms['data'][qcd_fit_variable_distribution],
                'V+Jets': histograms['V+Jets'][qcd_fit_variable_distribution],
                'SingleTop':
                histograms['SingleTop'][qcd_fit_variable_distribution],
                'TTJet': histograms['TTJet'][qcd_fit_variable_distribution]
            }
            qcd_from_data = clean_control_region(
                histograms_for_cleaning,
                subtract=['TTJet', 'V+Jets', 'SingleTop'])
            # clean
            all_hists[bin_range] = qcd_from_data

        # create the inclusive distributions
        inclusive_hist = deepcopy(all_hists[variable_bins[0]])
        for bin_range in variable_bins[1:max_bins]:
            inclusive_hist += all_hists[bin_range]
        for bin_range in variable_bins[0:max_bins]:
            if not all_hists[bin_range].Integral() == 0:
                all_hists[bin_range].Scale(1 / all_hists[bin_range].Integral())
        # normalise all histograms
        inclusive_hist.Scale(1 / inclusive_hist.Integral())
        # now compare inclusive to all bins
        histogram_properties = Histogram_properties()
        histogram_properties.x_axis_title = fit_variable_properties[
            fit_variable]['x-title']
        histogram_properties.y_axis_title = fit_variable_properties[
            fit_variable]['y-title']
        histogram_properties.y_axis_title = histogram_properties.y_axis_title.replace(
            'Events', 'a.u.')
        histogram_properties.x_limits = [
            fit_variable_properties[fit_variable]['min'],
            fit_variable_properties[fit_variable]['max']
        ]
        #         histogram_properties.y_limits = [0, 0.5]
        histogram_properties.title = title
        histogram_properties.additional_text = channel_latex[
            channel] + ', ' + b_tag_bins_latex[b_tag_bin_ctl]
        histogram_properties.name = variable + '_' + fit_variable + '_' + b_tag_bin_ctl + '_QCD_template_comparison'
        histogram_properties.y_max_scale = 1.5
        measurements = {
            bin_range + ' GeV': histogram
            for bin_range, histogram in all_hists.iteritems()
        }
        measurements = OrderedDict(sorted(measurements.items()))
        compare_measurements(models={'inclusive': inclusive_hist},
                             measurements=measurements,
                             show_measurement_errors=True,
                             histogram_properties=histogram_properties,
                             save_folder=save_path + '/qcd/',
                             save_as=save_as)
Example #57
0
def main():

    parser = OptionParser()
    parser.add_option("--topPtReweighting", dest="applyTopPtReweighting", type="int", default=0)
    parser.add_option("--topEtaReweighting", dest="applyTopEtaReweighting", type="int", default=0)
    parser.add_option("-c", "--centreOfMassEnergy", dest="centreOfMassEnergy", type="int", default=13)
    parser.add_option("--generatorWeight", type="int", dest="generatorWeight", default=-1)
    parser.add_option("--nGeneratorWeights", type="int", dest="nGeneratorWeights", default=1)
    parser.add_option("-s", "--sample", dest="sample", default="central")
    parser.add_option("-d", "--debug", action="store_true", dest="debug", default=False)
    parser.add_option("-n", action="store_true", dest="donothing", default=False)
    parser.add_option("-e", action="store_true", dest="extraHists", default=False)
    parser.add_option("-f", action="store_true", dest="fineBinned", default=False)

    (options, _) = parser.parse_args()

    measurement_config = XSectionConfig(options.centreOfMassEnergy)

    # Input file name
    file_name = "crap.root"
    if int(options.centreOfMassEnergy) == 13:
        # file_name = fileNames['13TeV'][options.sample]
        file_name = getFileName("13TeV", options.sample, measurement_config)
        # if options.generatorWeight >= 0:
        #     file_name = 'localInputFile.root'
    else:
        print "Error: Unrecognised centre of mass energy."

    generatorWeightsToRun = []
    # nGeneratorWeights = How many PDF weights do you want to run in 1 job (specified in runJobsCrab.py)
    if options.nGeneratorWeights > 1:
        for i in range(0, options.nGeneratorWeights):
            generatorWeightsToRun.append(options.generatorWeight + i)
    # generatorWeights = 1 for PDF Variations
    # generatorWeights either 4 or 8 for alpha_s, renormalisation, hadronisation
    elif options.generatorWeight >= 0:
        generatorWeightsToRun.append(options.generatorWeight)
    else:
        generatorWeightsToRun.append(-1)

    # Output file name
    outputFileName = "crap.root"
    outputFileDir = "unfolding/%sTeV/" % options.centreOfMassEnergy
    make_folder_if_not_exists(outputFileDir)

    energySuffix = "%sTeV" % (options.centreOfMassEnergy)

    for meWeight in generatorWeightsToRun:
        if options.applyTopEtaReweighting != 0:
            if options.applyTopEtaReweighting == 1:
                outputFileName = (
                    outputFileDir + "/unfolding_TTJets_%s_asymmetric_withTopEtaReweighting_up.root" % energySuffix
                )
            elif options.applyTopEtaReweighting == -1:
                outputFileName = (
                    outputFileDir + "/unfolding_TTJets_%s_asymmetric_withTopEtaReweighting_down.root" % energySuffix
                )
        elif options.applyTopPtReweighting != 0:
            if options.applyTopPtReweighting == 1:
                outputFileName = (
                    outputFileDir + "/unfolding_TTJets_%s_asymmetric_withTopPtReweighting_up.root" % energySuffix
                )
            elif options.applyTopPtReweighting == -1:
                outputFileName = (
                    outputFileDir + "/unfolding_TTJets_%s_asymmetric_withTopPtReweighting_down.root" % energySuffix
                )
        elif meWeight == 4:
            outputFileName = outputFileDir + "/unfolding_TTJets_%s_asymmetric_scaleUpWeight.root" % (energySuffix)
        elif meWeight == 8:
            outputFileName = outputFileDir + "/unfolding_TTJets_%s_asymmetric_scaleDownWeight.root" % (energySuffix)
        elif meWeight >= 9 and meWeight <= 108:
            outputFileName = outputFileDir + "/unfolding_TTJets_%s_asymmetric_generatorWeight_%i.root" % (
                energySuffix,
                meWeight,
            )
        elif options.sample != "central":
            outputFileName = outputFileDir + "/unfolding_TTJets_%s_%s_asymmetric.root" % (energySuffix, options.sample)
        elif options.fineBinned:
            outputFileName = outputFileDir + "/unfolding_TTJets_%s.root" % (energySuffix)
        else:
            outputFileName = outputFileDir + "/unfolding_TTJets_%s_asymmetric.root" % energySuffix

        with root_open(file_name, "read") as f, root_open(outputFileName, "recreate") as out:

            # Get the tree
            treeName = "TTbar_plus_X_analysis/Unfolding/Unfolding"
            if options.sample == "jesup":
                treeName += "_JESUp"
            elif options.sample == "jesdown":
                treeName += "_JESDown"
            elif options.sample == "jerup":
                treeName += "_JERUp"
            elif options.sample == "jerdown":
                treeName += "_JERDown"

            tree = f.Get(treeName)
            nEntries = tree.GetEntries()
            # weightTree = f.Get('TTbar_plus_X_analysis/Unfolding/GeneratorSystematicWeights')
            # if meWeight >= 0 :
            #     tree.AddFriend('TTbar_plus_X_analysis/Unfolding/GeneratorSystematicWeights')
            #     tree.SetBranchStatus('genWeight_*',1)
            #     tree.SetBranchStatus('genWeight_%i' % meWeight, 1)

            # For variables where you want bins to be symmetric about 0, use abs(variable) (but also make plots for signed variable)
            allVariablesBins = bin_edges_vis.copy()
            for variable in bin_edges_vis:

                if "Rap" in variable:
                    allVariablesBins["abs_%s" % variable] = [0, bin_edges_vis[variable][-1]]

            recoVariableNames = {}
            genVariable_particle_names = {}
            genVariable_parton_names = {}
            histograms = {}
            outputDirs = {}

            for variable in allVariablesBins:
                if options.debug and variable != "HT":
                    continue

                if options.sample in measurement_config.met_systematics and variable not in ["MET", "ST", "WPT"]:
                    continue

                outputDirs[variable] = {}
                histograms[variable] = {}

                #
                # Variable names
                #
                recoVariableName = branchNames[variable]
                sysIndex = None
                if variable in ["MET", "ST", "WPT"]:
                    if options.sample == "jesup":
                        recoVariableName += "_METUncertainties"
                        sysIndex = 2
                    elif options.sample == "jesdown":
                        recoVariableName += "_METUncertainties"
                        sysIndex = 3
                    elif options.sample == "jerup":
                        recoVariableName += "_METUncertainties"
                        sysIndex = 0
                    elif options.sample == "jerdown":
                        recoVariableName += "_METUncertainties"
                        sysIndex = 1
                    elif options.sample in measurement_config.met_systematics:
                        recoVariableName += "_METUncertainties"
                        sysIndex = measurement_config.met_systematics[options.sample]

                genVariable_particle_name = None
                genVariable_parton_name = None
                if variable in genBranchNames_particle:
                    genVariable_particle_name = genBranchNames_particle[variable]
                if variable in genBranchNames_parton:
                    genVariable_parton_name = genBranchNames_parton[variable]

                recoVariableNames[variable] = recoVariableName
                genVariable_particle_names[variable] = genVariable_particle_name
                genVariable_parton_names[variable] = genVariable_parton_name

                for channel in channels:
                    # Make dir in output file
                    outputDirName = variable + "_" + channel.outputDirName
                    outputDir = out.mkdir(outputDirName)
                    outputDirs[variable][channel.channelName] = outputDir

                    #
                    # Book histograms
                    #
                    # 1D histograms
                    histograms[variable][channel.channelName] = {}
                    h = histograms[variable][channel.channelName]
                    h["truth"] = Hist(allVariablesBins[variable], name="truth")
                    h["truthVis"] = Hist(allVariablesBins[variable], name="truthVis")
                    h["truth_parton"] = Hist(allVariablesBins[variable], name="truth_parton")
                    h["measured"] = Hist(reco_bin_edges_vis[variable], name="measured")
                    h["measuredVis"] = Hist(reco_bin_edges_vis[variable], name="measuredVis")
                    h["measured_without_fakes"] = Hist(reco_bin_edges_vis[variable], name="measured_without_fakes")
                    h["measuredVis_without_fakes"] = Hist(
                        reco_bin_edges_vis[variable], name="measuredVis_without_fakes"
                    )
                    h["fake"] = Hist(reco_bin_edges_vis[variable], name="fake")
                    h["fakeVis"] = Hist(reco_bin_edges_vis[variable], name="fakeVis")
                    # 2D histograms
                    h["response"] = Hist2D(reco_bin_edges_vis[variable], allVariablesBins[variable], name="response")
                    h["response_without_fakes"] = Hist2D(
                        reco_bin_edges_vis[variable], allVariablesBins[variable], name="response_without_fakes"
                    )
                    h["responseVis_without_fakes"] = Hist2D(
                        reco_bin_edges_vis[variable], allVariablesBins[variable], name="responseVis_without_fakes"
                    )
                    h["response_parton"] = Hist2D(
                        reco_bin_edges_vis[variable], allVariablesBins[variable], name="response_parton"
                    )
                    h["response_without_fakes_parton"] = Hist2D(
                        reco_bin_edges_vis[variable], allVariablesBins[variable], name="response_without_fakes_parton"
                    )

                    if options.fineBinned:
                        minVar = trunc(allVariablesBins[variable][0])
                        maxVar = trunc(
                            max(
                                tree.GetMaximum(genVariable_particle_names[variable]),
                                tree.GetMaximum(recoVariableNames[variable]),
                            )
                            * 1.2
                        )
                        nBins = int(maxVar - minVar)
                        if variable is "lepton_eta" or variable is "bjets_eta":
                            maxVar = 2.5
                            minVar = -2.5
                            nBins = 1000
                        elif "abs" in variable and "eta" in variable:
                            maxVar = 3.0
                            minVar = 0.0
                            nBins = 1000
                        elif "Rap" in variable:
                            maxVar = 3.0
                            minVar = -3.0
                            nBins = 1000
                        elif "NJets" in variable:
                            maxVar = 20.5
                            minVar = 3.5
                            nBins = 17

                        h["truth"] = Hist(nBins, minVar, maxVar, name="truth")
                        h["truthVis"] = Hist(nBins, minVar, maxVar, name="truthVis")
                        h["truth_parton"] = Hist(nBins, minVar, maxVar, name="truth_parton")
                        h["measured"] = Hist(nBins, minVar, maxVar, name="measured")
                        h["measuredVis"] = Hist(nBins, minVar, maxVar, name="measuredVis")
                        h["measured_without_fakes"] = Hist(nBins, minVar, maxVar, name="measured_without_fakes")
                        h["measuredVis_without_fakes"] = Hist(nBins, minVar, maxVar, name="measuredVis_without_fakes")
                        h["fake"] = Hist(nBins, minVar, maxVar, name="fake")
                        h["fakeVis"] = Hist(nBins, minVar, maxVar, name="fakeVis")
                        h["response"] = Hist2D(nBins, minVar, maxVar, nBins, minVar, maxVar, name="response")
                        h["response_without_fakes"] = Hist2D(
                            nBins, minVar, maxVar, nBins, minVar, maxVar, name="response_without_fakes"
                        )
                        h["responseVis_without_fakes"] = Hist2D(
                            nBins, minVar, maxVar, nBins, minVar, maxVar, name="responseVis_without_fakes"
                        )

                        h["response_parton"] = Hist2D(
                            nBins, minVar, maxVar, nBins, minVar, maxVar, name="response_parton"
                        )
                        h["response_without_fakes_parton"] = Hist2D(
                            nBins, minVar, maxVar, nBins, minVar, maxVar, name="response_without_fakes_parton"
                        )

                    # Some interesting histograms
                    h["puOffline"] = Hist(20, 0, 2, name="puWeights_offline")
                    h["eventWeightHist"] = Hist(100, -2, 2, name="eventWeightHist")
                    h["genWeightHist"] = Hist(100, -2, 2, name="genWeightHist")
                    h["offlineWeightHist"] = Hist(100, -2, 2, name="offlineWeightHist")

                    h["phaseSpaceInfoHist"] = Hist(10, 0, 1, name="phaseSpaceInfoHist")

            # Counters for studying phase space
            nVis = {c.channelName: 0 for c in channels}
            nVisNotOffline = {c.channelName: 0 for c in channels}
            nOffline = {c.channelName: 0 for c in channels}
            nOfflineNotVis = {c.channelName: 0 for c in channels}
            nFull = {c.channelName: 0 for c in channels}
            nOfflineSL = {c.channelName: 0 for c in channels}

            n = 0
            # Event Loop
            # for event, weight in zip(tree,weightTree):
            for event in tree:
                branch = event.__getattr__
                n += 1
                if not n % 100000:
                    print "Processing event %.0f Progress : %.2g %%" % (n, float(n) / nEntries * 100)
                # if n == 100000: break
                # # #
                # # # Weights and selection
                # # #

                # Pileup weight
                # Don't apply if calculating systematic
                pileupWeight = event.PUWeight
                if options.sample == "pileupSystematic":
                    pileupWeight = 1

                # Generator level weight
                genWeight = event.EventWeight * measurement_config.luminosity_scale

                # Offline level weights
                offlineWeight = pileupWeight

                # Lepton weight
                leptonWeight = event.LeptonEfficiencyCorrection
                if options.sample == "leptonup":
                    leptonWeight = event.LeptonEfficiencyCorrectionUp
                elif options.sample == "leptondown":
                    leptonWeight == event.LeptonEfficiencyCorrectionDown

                # B Jet Weight
                bjetWeight = event.BJetWeight
                if options.sample == "bjetup":
                    bjetWeight = event.BJetUpWeight
                elif options.sample == "bjetdown":
                    bjetWeight = event.BJetDownWeight
                elif options.sample == "lightjetup":
                    bjetWeight = event.LightJetUpWeight
                elif options.sample == "lightjetdown":
                    bjetWeight = event.LightJetDownWeight

                offlineWeight = event.EventWeight * measurement_config.luminosity_scale
                offlineWeight *= pileupWeight
                offlineWeight *= bjetWeight
                offlineWeight *= leptonWeight

                # Generator weight
                # Scale up/down, pdf
                if meWeight >= 0:
                    genWeight *= branch("genWeight_%i" % meWeight)
                    offlineWeight *= branch("genWeight_%i" % meWeight)
                    pass

                if options.applyTopPtReweighting != 0:
                    ptWeight = calculateTopPtWeight(
                        branch("lepTopPt_parton"), branch("hadTopPt_parton"), options.applyTopPtReweighting
                    )
                    offlineWeight *= ptWeight
                    genWeight *= ptWeight

                if options.applyTopEtaReweighting != 0:
                    etaWeight = calculateTopEtaWeight(
                        branch("lepTopRap_parton"), branch("hadTopRap_parton"), options.applyTopEtaReweighting
                    )
                    offlineWeight *= etaWeight
                    genWeight *= etaWeight

                for channel in channels:
                    # Generator level selection
                    genSelection = ""
                    genSelectionVis = ""
                    if channel.channelName is "muPlusJets":
                        genSelection = event.isSemiLeptonicMuon == 1
                        genSelectionVis = event.isSemiLeptonicMuon == 1 and event.passesGenEventSelection == 1
                    elif channel.channelName is "ePlusJets":
                        genSelection = event.isSemiLeptonicElectron == 1
                        genSelectionVis = event.isSemiLeptonicElectron == 1 and event.passesGenEventSelection == 1

                    # Offline level selection
                    offlineSelection = 0
                    if channel.channelName is "muPlusJets":
                        offlineSelection = event.passSelection == 1
                    elif channel.channelName is "ePlusJets":
                        offlineSelection = event.passSelection == 2

                    # Fake selection
                    fakeSelection = offlineSelection and not genSelection
                    fakeSelectionVis = offlineSelection and not genSelectionVis

                    # Phase space info
                    if genSelection:
                        nFull[channel.channelName] += genWeight
                        if offlineSelection:
                            nOfflineSL[channel.channelName] += genWeight
                    if genSelectionVis:
                        nVis[channel.channelName] += genWeight
                        if not offlineSelection:
                            nVisNotOffline[channel.channelName] += genWeight
                    if offlineSelection:
                        nOffline[channel.channelName] += offlineWeight
                        if not genSelectionVis:
                            nOfflineNotVis[channel.channelName] += offlineWeight

                    for variable in allVariablesBins:
                        if options.sample in measurement_config.met_systematics and variable not in [
                            "MET",
                            "ST",
                            "WPT",
                        ]:
                            continue

                        # # #
                        # # # Variable to plot
                        # # #
                        recoVariable = branch(recoVariableNames[variable])
                        if (
                            variable in ["MET", "ST", "WPT"]
                            and sysIndex != None
                            and (offlineSelection or fakeSelection or fakeSelectionVis)
                        ):
                            recoVariable = recoVariable[sysIndex]

                        if "abs" in variable:
                            recoVariable = abs(recoVariable)

                        # With TUnfold, reco variable never goes in the overflow (or underflow)
                        # if recoVariable > allVariablesBins[variable][-1]:
                        #     print 'Big reco variable : ',recoVariable
                        #     print 'Setting to :',min( recoVariable, allVariablesBins[variable][-1] - 0.000001 )
                        if not options.fineBinned:
                            recoVariable = min(recoVariable, allVariablesBins[variable][-1] - 0.000001)
                        genVariable_particle = branch(genVariable_particle_names[variable])
                        if "abs" in variable:
                            genVariable_particle = abs(genVariable_particle)
                        # #
                        # # Fill histograms
                        # #
                        histogramsToFill = histograms[variable][channel.channelName]
                        if not options.donothing:

                            if genSelection:
                                histogramsToFill["truth"].Fill(genVariable_particle, genWeight)
                            if genSelectionVis:
                                histogramsToFill["truthVis"].Fill(genVariable_particle, genWeight)
                            if offlineSelection:
                                histogramsToFill["measured"].Fill(recoVariable, offlineWeight)
                                histogramsToFill["measuredVis"].Fill(recoVariable, offlineWeight)
                                if genSelectionVis:
                                    histogramsToFill["measuredVis_without_fakes"].Fill(recoVariable, offlineWeight)
                                if genSelection:
                                    histogramsToFill["measured_without_fakes"].Fill(recoVariable, offlineWeight)
                                histogramsToFill["response"].Fill(recoVariable, genVariable_particle, offlineWeight)
                            if offlineSelection and genSelection:
                                histogramsToFill["response_without_fakes"].Fill(
                                    recoVariable, genVariable_particle, offlineWeight
                                )
                            elif genSelection:
                                histogramsToFill["response_without_fakes"].Fill(
                                    allVariablesBins[variable][0] - 1, genVariable_particle, genWeight
                                )
                                # if genVariable_particle < 0 : print recoVariable, genVariable_particle
                                # if genVariable_particle < 0 : print genVariable_particle
                            if offlineSelection and genSelectionVis:
                                histogramsToFill["responseVis_without_fakes"].Fill(
                                    recoVariable, genVariable_particle, offlineWeight
                                )
                            elif genSelectionVis:
                                histogramsToFill["responseVis_without_fakes"].Fill(
                                    allVariablesBins[variable][0] - 1, genVariable_particle, genWeight
                                )
                            if fakeSelection:
                                histogramsToFill["fake"].Fill(recoVariable, offlineWeight)
                            if fakeSelectionVis:
                                histogramsToFill["fakeVis"].Fill(recoVariable, offlineWeight)

                            if options.extraHists:
                                if genSelection:
                                    histogramsToFill["eventWeightHist"].Fill(event.EventWeight)
                                    histogramsToFill["genWeightHist"].Fill(genWeight)
                                    histogramsToFill["offlineWeightHist"].Fill(offlineWeight)

            #
            # Output histgorams to file
            #
            for variable in allVariablesBins:
                if options.sample in measurement_config.met_systematics and variable not in ["MET", "ST", "WPT"]:
                    continue
                for channel in channels:

                    # Fill phase space info
                    h = histograms[variable][channel.channelName]["phaseSpaceInfoHist"]
                    h.SetBinContent(1, nVisNotOffline[channel.channelName] / nVis[channel.channelName])
                    h.SetBinContent(2, nOfflineNotVis[channel.channelName] / nOffline[channel.channelName])
                    h.SetBinContent(3, nVis[channel.channelName] / nFull[channel.channelName])
                    # Selection efficiency for SL ttbar
                    h.SetBinContent(4, nOfflineSL[channel.channelName] / nFull[channel.channelName])
                    # Fraction of offline that are SL
                    h.SetBinContent(5, nOfflineSL[channel.channelName] / nOffline[channel.channelName])

                    outputDirs[variable][channel.channelName].cd()
                    for h in histograms[variable][channel.channelName]:
                        histograms[variable][channel.channelName][h].Write()

        with root_open(outputFileName, "update") as out:
            # Done all channels, now combine the two channels, and output to the same file
            for path, dirs, objects in out.walk():
                if "electron" in path:
                    outputDir = out.mkdir(path.replace("electron", "combined"))
                    outputDir.cd()
                    for h in objects:
                        h_e = out.Get(path + "/" + h)
                        h_mu = out.Get(path.replace("electron", "muon") + "/" + h)
                        h_comb = (h_e + h_mu).Clone(h)
                        h_comb.Write()
                    pass
                pass
            pass
def compare_vjets_btag_regions(variable='MET',
                               met_type='patType1CorrectedPFMet',
                               title='Untitled',
                               channel='electron'):
    ''' Compares the V+Jets template in different b-tag bins'''
    global fit_variable_properties, b_tag_bin, save_as, b_tag_bin_ctl
    b_tag_bin_ctl = '0orMoreBtag'
    variable_bins = variable_bins_ROOT[variable]
    histogram_template = get_histogram_template(variable)

    for fit_variable in electron_fit_variables:
        if '_bl' in fit_variable:
            b_tag_bin_ctl = '1orMoreBtag'
        else:
            b_tag_bin_ctl = '0orMoreBtag'
        save_path = 'plots/%dTeV/fit_variables/%s/%s/' % (
            measurement_config.centre_of_mass_energy, variable, fit_variable)
        make_folder_if_not_exists(save_path + '/vjets/')
        histogram_properties = Histogram_properties()
        histogram_properties.x_axis_title = fit_variable_properties[
            fit_variable]['x-title']
        histogram_properties.y_axis_title = fit_variable_properties[
            fit_variable]['y-title']
        histogram_properties.y_axis_title = histogram_properties.y_axis_title.replace(
            'Events', 'a.u.')
        histogram_properties.x_limits = [
            fit_variable_properties[fit_variable]['min'],
            fit_variable_properties[fit_variable]['max']
        ]
        histogram_properties.title = title
        histogram_properties.additional_text = channel_latex[
            channel] + ', ' + b_tag_bins_latex[b_tag_bin_ctl]
        histogram_properties.y_max_scale = 1.5
        for bin_range in variable_bins:
            params = {
                'met_type': met_type,
                'bin_range': bin_range,
                'fit_variable': fit_variable,
                'b_tag_bin': b_tag_bin,
                'variable': variable
            }
            fit_variable_distribution = histogram_template % params
            fit_variable_distribution_ctl = fit_variable_distribution.replace(
                b_tag_bin, b_tag_bin_ctl)
            # format: histograms['data'][qcd_fit_variable_distribution]
            histograms = get_histograms_from_files(
                [fit_variable_distribution, fit_variable_distribution_ctl],
                {'V+Jets': histogram_files['V+Jets']})
            prepare_histograms(
                histograms,
                rebin=fit_variable_properties[fit_variable]['rebin'],
                scale_factor=measurement_config.luminosity_scale)
            histogram_properties.name = variable + '_' + bin_range + '_' + fit_variable + '_' + b_tag_bin_ctl + '_VJets_template_comparison'
            histograms['V+Jets'][fit_variable_distribution].Scale(
                1 / histograms['V+Jets'][fit_variable_distribution].Integral())
            histograms['V+Jets'][fit_variable_distribution_ctl].Scale(
                1 /
                histograms['V+Jets'][fit_variable_distribution_ctl].Integral())
            compare_measurements(
                models={
                    'no b-tag':
                    histograms['V+Jets'][fit_variable_distribution_ctl]
                },
                measurements={
                    '$>=$ 2 b-tags':
                    histograms['V+Jets'][fit_variable_distribution]
                },
                show_measurement_errors=True,
                histogram_properties=histogram_properties,
                save_folder=save_path + '/vjets/',
                save_as=save_as)
def compare_vjets_templates(variable='MET',
                            met_type='patType1CorrectedPFMet',
                            title='Untitled',
                            channel='electron'):
    ''' Compares the V+jets templates in different bins
     of the current variable'''
    global fit_variable_properties, b_tag_bin, save_as
    variable_bins = variable_bins_ROOT[variable]
    histogram_template = get_histogram_template(variable)

    for fit_variable in electron_fit_variables:
        all_hists = {}
        inclusive_hist = None
        save_path = 'plots/%dTeV/fit_variables/%s/%s/' % (
            measurement_config.centre_of_mass_energy, variable, fit_variable)
        make_folder_if_not_exists(save_path + '/vjets/')

        max_bins = len(variable_bins)
        for bin_range in variable_bins[0:max_bins]:

            params = {
                'met_type': met_type,
                'bin_range': bin_range,
                'fit_variable': fit_variable,
                'b_tag_bin': b_tag_bin,
                'variable': variable
            }
            fit_variable_distribution = histogram_template % params
            # format: histograms['data'][qcd_fit_variable_distribution]
            histograms = get_histograms_from_files([fit_variable_distribution],
                                                   histogram_files)
            prepare_histograms(
                histograms,
                rebin=fit_variable_properties[fit_variable]['rebin'],
                scale_factor=measurement_config.luminosity_scale)
            all_hists[bin_range] = histograms['V+Jets'][
                fit_variable_distribution]

        # create the inclusive distributions
        inclusive_hist = deepcopy(all_hists[variable_bins[0]])
        for bin_range in variable_bins[1:max_bins]:
            inclusive_hist += all_hists[bin_range]
        for bin_range in variable_bins[0:max_bins]:
            if not all_hists[bin_range].Integral() == 0:
                all_hists[bin_range].Scale(1 / all_hists[bin_range].Integral())
        # normalise all histograms
        inclusive_hist.Scale(1 / inclusive_hist.Integral())
        # now compare inclusive to all bins
        histogram_properties = Histogram_properties()
        histogram_properties.x_axis_title = fit_variable_properties[
            fit_variable]['x-title']
        histogram_properties.y_axis_title = fit_variable_properties[
            fit_variable]['y-title']
        histogram_properties.y_axis_title = histogram_properties.y_axis_title.replace(
            'Events', 'a.u.')
        histogram_properties.x_limits = [
            fit_variable_properties[fit_variable]['min'],
            fit_variable_properties[fit_variable]['max']
        ]
        histogram_properties.title = title
        histogram_properties.additional_text = channel_latex[
            channel] + ', ' + b_tag_bins_latex[b_tag_bin]
        histogram_properties.name = variable + '_' + fit_variable + '_' + b_tag_bin + '_VJets_template_comparison'
        histogram_properties.y_max_scale = 1.5
        measurements = {
            bin_range + ' GeV': histogram
            for bin_range, histogram in all_hists.iteritems()
        }
        measurements = OrderedDict(sorted(measurements.items()))
        fit_var = fit_variable.replace('electron_', '')
        fit_var = fit_var.replace('muon_', '')
        graphs = spread_x(measurements.values(),
                          fit_variable_bin_edges[fit_var])
        for key, graph in zip(sorted(measurements.keys()), graphs):
            measurements[key] = graph
        compare_measurements(models={'inclusive': inclusive_hist},
                             measurements=measurements,
                             show_measurement_errors=True,
                             histogram_properties=histogram_properties,
                             save_folder=save_path + '/vjets/',
                             save_as=save_as)
def make_plots_ROOT(histograms, category, save_path, histname, channel):
    global variable, translateOptions, k_value, b_tag_bin, maximum
    ROOT.TH1.SetDefaultSumw2(False)
    ROOT.gROOT.SetBatch(True)
    ROOT.gROOT.ProcessLine('gErrorIgnoreLevel = 1001;')
    plotting.setStyle()
    gStyle.SetTitleYOffset(2.)
    ROOT.gROOT.ForceStyle()
    canvas = Canvas(width=700, height=500)
    canvas.SetLeftMargin(0.18)
    canvas.SetBottomMargin(0.15)
    canvas.SetTopMargin(0.05)
    canvas.SetRightMargin(0.05)
    legend = plotting.create_legend(x0=0.6, y1=0.5)

    hist_data = histograms['unfolded']
    hist_data.GetXaxis().SetTitle(translate_options[variable] + ' [GeV]')
    hist_data.GetYaxis().SetTitle('#frac{1}{#sigma} #frac{d#sigma}{d' +
                                  translate_options[variable] + '} [GeV^{-1}]')
    hist_data.GetXaxis().SetTitleSize(0.05)
    hist_data.GetYaxis().SetTitleSize(0.05)
    hist_data.SetMinimum(0)
    hist_data.SetMaximum(maximum[variable])
    hist_data.SetMarkerSize(1)
    hist_data.SetMarkerStyle(8)
    plotAsym = TGraphAsymmErrors(hist_data)
    plotStatErr = TGraphAsymmErrors(hist_data)

    xsections = read_unfolded_xsections(channel)
    bins = variable_bins_ROOT[variable]
    assert (len(bins) == len(xsections['central']))

    for bin_i in range(len(bins)):
        scale = 1  # / width
        centralresult = xsections['central'][bin_i]
        fit_error = centralresult[1]
        uncertainty = calculateTotalUncertainty(xsections, bin_i)
        uncertainty_total_plus = uncertainty['Total+'][0]
        uncertainty_total_minus = uncertainty['Total-'][0]
        uncertainty_total_plus, uncertainty_total_minus = symmetriseErrors(
            uncertainty_total_plus, uncertainty_total_minus)
        error_up = sqrt(fit_error**2 + uncertainty_total_plus**2) * scale
        error_down = sqrt(fit_error**2 + uncertainty_total_minus**2) * scale
        plotStatErr.SetPointEYhigh(bin_i, fit_error * scale)
        plotStatErr.SetPointEYlow(bin_i, fit_error * scale)
        plotAsym.SetPointEYhigh(bin_i, error_up)
        plotAsym.SetPointEYlow(bin_i, error_down)

    gStyle.SetEndErrorSize(20)
    plotAsym.SetLineWidth(2)
    plotStatErr.SetLineWidth(2)
    hist_data.Draw('P')
    plotStatErr.Draw('same P')
    plotAsym.Draw('same P Z')
    legend.AddEntry(hist_data, 'unfolded', 'P')

    hist_measured = histograms['measured']
    hist_measured.SetMarkerSize(1)
    hist_measured.SetMarkerStyle(20)
    hist_measured.SetMarkerColor(2)
    #hist_measured.Draw('same P')
    #legend.AddEntry(hist_measured, 'measured', 'P')

    for key, hist in sorted(histograms.iteritems()):
        if not 'unfolded' in key and not 'measured' in key:
            hist.SetLineStyle(7)
            hist.SetLineWidth(2)
            # setting colours
            if 'POWHEG' in key or 'matchingdown' in key:
                hist.SetLineColor(kBlue)
            elif 'MADGRAPH' in key or 'matchingup' in key:
                hist.SetLineColor(kRed + 1)
            elif 'MCATNLO' in key or 'scaleup' in key:
                hist.SetLineColor(kGreen - 3)
            elif 'scaledown' in key:
                hist.SetLineColor(kMagenta + 3)
            hist.Draw('hist same')
            legend.AddEntry(hist, translate_options[key], 'l')

    legend.Draw()

    mytext = TPaveText(0.5, 0.97, 1, 1.01, "NDC")
    channelLabel = TPaveText(0.18, 0.97, 0.5, 1.01, "NDC")
    if 'electron' in histname:
        channelLabel.AddText(
            "e, %s, %s, k = %s" %
            ("#geq 4 jets", b_tag_bins_latex[b_tag_bin], k_value))
    elif 'muon' in histname:
        channelLabel.AddText(
            "#mu, %s, %s, k = %s" %
            ("#geq 4 jets", b_tag_bins_latex[b_tag_bin], k_value))
    else:
        channelLabel.AddText(
            "combined, %s, %s, k = %s" %
            ("#geq 4 jets", b_tag_bins_latex[b_tag_bin], k_value))
    mytext.AddText("CMS Preliminary, L = %.1f fb^{-1} at #sqrt{s} = 8 TeV" %
                   (5.8))

    mytext.SetFillStyle(0)
    mytext.SetBorderSize(0)
    mytext.SetTextFont(42)
    mytext.SetTextAlign(13)

    channelLabel.SetFillStyle(0)
    channelLabel.SetBorderSize(0)
    channelLabel.SetTextFont(42)
    channelLabel.SetTextAlign(13)
    mytext.Draw()
    channelLabel.Draw()

    canvas.Modified()
    canvas.Update()

    path = save_path + '/' + variable + '/' + category
    make_folder_if_not_exists(path)
    canvas.SaveAs(path + '/' + histname + '_kv' + str(k_value) + '.png')
    canvas.SaveAs(path + '/' + histname + '_kv' + str(k_value) + '.pdf')