def get_k_from_d_i( h_truth, h_measured, h_response, h_fakes = None, h_data = None ):
    global method
    k_start = h_measured.nbins()
    unfolding = Unfolding( h_truth,
                           h_measured,
                           h_response,
                           h_fakes,
                           method = method,
                           k_value = k_start,
                           error_treatment = 0,
                           verbose = 1 )
    unfolding.unfold( h_data )
    hist_d_i = None
    if method == 'RooUnfoldSvd':
        hist_d_i = asrootpy( unfolding.unfoldObject.Impl().GetD() )
    elif method == 'TSVDUnfold':
        hist_d_i = asrootpy( unfolding.unfoldObject.GetD() )
    best_k = k_start
    for i, d_i in enumerate( hist_d_i.y() ):
        # i count starts at 0
        if d_i >= 1:
            continue
        else:
            # first i when d_i < 0, is k
            # because i starts at 0
            best_k = i
            break
            
    return best_k, hist_d_i.clone()
def get_dNdeta_in_classifier_bin_interval(sums_classifier_dir, event_counter, classifier_bin_interval):
    """
    Get dN/deta for a given interval of classifier bin indices
    Parameters
    ----------
    sums_classifier_dir : TList
        Sums directory of a classifier
    event_counter : Hist1D
        Event counter histogram with the classifier value on the xaxis
    classifier_bin_interval : list
        classifier value bin edges given as bin indices
    Returns
    -------
    Hist1D
    """
    hist_name = "eta_classifier_{}".format(sums_classifier_dir.GetName())
    h2d = asrootpy(sums_classifier_dir.FindObject(hist_name))
    if not h2d:
        raise ValueError("Could not find histogram {}".format(hist_name))
    h2d.yaxis.set_range(classifier_bin_interval[0], classifier_bin_interval[1])
    h = asrootpy(h2d.projection_x(gen_random_name()))
    h.title = "{} - {} %".format(100 * classifier_bin_interval[0], 100 * classifier_bin_interval[1])
    # scale by the number of events in this mult_interval and bin width
    try:
        h.Scale((1.0 /
                 float(event_counter.Integral(classifier_bin_interval[0], classifier_bin_interval[1]))),
                "width")
    except ZeroDivisionError:
        # If this happens, we have empty bins in dN/deta! The stats must suck!
        raise ZeroDivisionError("Your statistics are terrible! Consider increasing the classifier value interval to avoid this")
    return h
 def unfold(self, data):
     self.data = data
     if not self.unfoldObject:
         if not self.unfoldResponse:
             self.unfoldResponse = self._makeUnfoldResponse()
         if self.method == 'RooUnfoldBayes':
             self.unfoldObject = RooUnfoldBayes     (self.unfoldResponse, data, unfoldCfg.Bayes_n_repeat)
         elif self.method == 'RooUnfoldBinByBin':
             self.unfoldObject = RooUnfoldBinByBin     (self.unfoldResponse, data)
         elif self.method == 'RooUnfoldInvert':
             self.unfoldObject = RooUnfoldInvert     (self.unfoldResponse, data)
         elif self.method == 'RooUnfoldTUnfold':
             self.unfoldObject = RooUnfoldTUnfold     (self.unfoldResponse, data)
         elif self.method == 'RooUnfoldSvd':
             self.unfoldObject = RooUnfoldSvd(self.unfoldResponse, data, unfoldCfg.SVD_k_value, unfoldCfg.SVD_n_toy)
         elif self.method == 'TSVDUnfold':
             new_data = Hist(list(data.xedges()), type = 'D')
             new_data.Add(data)
             new_measured = Hist(list(self.measured.xedges()), type = 'D')
             new_measured.Add(self.measured)
             new_truth = Hist(list(self.truth.xedges()), type = 'D')
             new_truth.Add(self.truth)
             new_response = Hist2D(list(self.response.xedges()),list(self.response.yedges()), type = 'D')
             new_response.Add(self.response)
             self.unfoldObject = TSVDUnfold(new_data, new_measured, new_truth, new_response)
     if self.method == 'TSVDUnfold':
         self.unfolded_data = asrootpy(self.unfoldObject.Unfold(unfoldCfg.SVD_k_value))
     else:
         self.unfoldObject.SetVerbose(self.verbose)
         self.unfolded_data = asrootpy(self.unfoldObject.Hreco(unfoldCfg.Hreco))
     #remove unfold reports (faster)
     return self.unfolded_data
Beispiel #4
0
def getHistogramsPostfit(tf, samples, hname, channel, x_min, x_max):

    hs = OrderedDict()

    for sample, sample_name in samples:

        #print "NONE/shapes_fit_s/" + hname + "/" + sample

        print "NONE/shapes_fit_s/" + channel + "_" + hname + "/" + sample

        try:
            # SL
            # input
            #h = tf.get( "NONE/shapes_fit_s/" + channel + "_" + hname + "/" + sample).Clone()
            # final disc
            #h = tf.get( "NONE/shapes_fit_s/"  + channel + "/" + sample).Clone()

            # DL
            h = tf.get("NONE/shapes_fit_s/" + channel + "/" + sample).Clone()
        except rootpy.io.file.DoesNotExist as e:
            continue

        h.GetXaxis().SetLimits(x_min, x_max)
        hs[sample] = rootpy.asrootpy(h)

    print "Post fit hs:", hs

    for sample, sample_name in samples:
        if not hs.has_key(sample):
            if len(hs) > 0:
                hs[sample] = rootpy.asrootpy(0.0 * hs.values()[0].Clone())
            else:
                return hs

    return hs
def get_pT_distribution(results_est_dir, pids, classifier_bin_interval, normalized=False):
    """
    Parameters
    ----------
    results_est_dir : TDirectory
        Directory of a given estimator
    pids : list
        List of strings denoting requested pids
    classifier_bin_interval : tuple
        Lower and upper limit of classifier value for which the p_T distribution should be made.
        This value needs to be given as bin indices!
    normalized : Boolean
        Should the distribution be normalized to yield P(p_T)?
    Returns
    -------
    Hist1D :
        Histogram P(p_T)
    """
    mult_pt_hists = []
    for pid in pids:
        mult_pt_hists.append(getattr(results_est_dir.mult_pt, pid))
    summed_mult_pt = sum(mult_pt_hists)
    summed_mult_pt.xaxis.SetRange(*classifier_bin_interval)
    projy = asrootpy(summed_mult_pt.ProjectionY())
    projy.name = gen_random_name()
    event_counter = asrootpy(results_est_dir.event_counter)
    # Scale by the number of events in the interval;
    projy.Scale(1.0 / event_counter.Integral(*classifier_bin_interval))
    if normalized:
        projy.Scale(1.0 / projy.Integral())
    return projy
Beispiel #6
0
def parallelTreeWorker(item):
    import random, string, os
    filename, _tree, value, cut, _hist, weight, f = item
    rfile = File(filename)
    tree = rfile.Get(_tree)
    tree = asrootpy(tree)
    _hist = asrootpy(_hist)
    try:
        if weight is None:
            tree.Draw(value, selection=cut, hist=_hist)
        else:
            #_tree.Draw(value,selection="(%s)*(%s)"%(cut,weight),hist=self.hists[f])
            tmpFileName = ''.join(
                random.choice(string.ascii_lowercase) for i in range(4))
            tmpFile = File("/tmp/%s.root" % tmpFileName, "recreate")
            #sel_tree=_tree.copy_tree(selection=cut)
            sel_tree = asrootpy(tree.CopyTree(cut))
            ##print weight
            sel_tree.Draw(value, selection=weight, hist=_hist)
            tmpFile.Close()
            os.remove("/tmp/%s.root" % tmpFileName)
    except Exception as e:
        print(tree, value, cut, _hist, weight, f)
        log_plotlib.info("error:%s" % (e))
        log_plotlib.info("file :%s" % (f))
        log_plotlib.info("Perhaps try this one:")
        for i in tree.glob("*"):
            log_plotlib.info(i)
        raise RuntimeError("Will stop here!")
    rfile.Close()
    del (tree)
    return (_hist, f)
Beispiel #7
0
def getHistogramsPrefit(tf, samples, hname, channel):

    hs = OrderedDict()

    for sample, sample_name in samples:

        try:

            # SL
            # input
            #h = tf.get(sample + "_inputVar_" + channel + "_" + hname).Clone()
            # final disc
            #h = tf.get(sample + "_" + hname + "_" + channel).Clone()
            # DL
            h = tf.get(channel + "_" + hname + "/" + sample).Clone()

        except rootpy.io.file.DoesNotExist as e:
            continue
        hs[sample] = rootpy.asrootpy(h)

    print hs

    for sample, sample_name in samples:
        if not hs.has_key(sample):
            if len(hs) > 0:
                hs[sample] = rootpy.asrootpy(0.0 * hs.values()[0].Clone())
            else:
                return hs

    return hs
Beispiel #8
0
def rejection_linear(eff):
    htot = asrootpy(eff.GetTotalHistogram()).Clone()
    hpass = asrootpy(eff.GetPassedHistogram()).Clone()
    hnotpass =  htot - hpass
    hnotpass.Sumw2()
    rej = Efficiency(hnotpass, htot, name='Rejlin_{0}'.format(eff.name), title=eff.title)
    return rej
Beispiel #9
0
def get_dNdeta_in_classifier_bin_interval(sums_classifier_dir, event_counter,
                                          classifier_bin_interval):
    """
    Get dN/deta for a given interval of classifier bin indices
    Parameters
    ----------
    sums_classifier_dir : TList
        Sums directory of a classifier
    event_counter : Hist1D
        Event counter histogram with the classifier value on the xaxis
    classifier_bin_interval : list
        classifier value bin edges given as bin indices
    Returns
    -------
    Hist1D
    """
    hist_name = "eta_classifier_{0}".format(sums_classifier_dir.GetName())
    h2d = asrootpy(sums_classifier_dir.FindObject(hist_name))
    if not h2d:
        raise ValueError("Could not find histogram {0}".format(hist_name))
    h2d.yaxis.set_range(classifier_bin_interval[0], classifier_bin_interval[1])
    h = asrootpy(h2d.projection_x(gen_random_name()))
    h.title = "{0} - {1} %".format(100 * classifier_bin_interval[0],
                                   100 * classifier_bin_interval[1])
    # scale by the number of events in this mult_interval and bin width
    try:
        h.Scale((1.0 / float(
            event_counter.Integral(classifier_bin_interval[0],
                                   classifier_bin_interval[1]))), "width")
    except ZeroDivisionError:
        # If this happens, we have empty bins in dN/deta! The stats must suck!
        raise ZeroDivisionError(
            "Your statistics are terrible! Consider increasing the classifier value interval to avoid this"
        )
    return h
Beispiel #10
0
def get_pT_distribution(results_est_dir,
                        pids,
                        classifier_bin_interval,
                        normalized=False):
    """
    Parameters
    ----------
    results_est_dir : TDirectory
        Directory of a given estimator
    pids : list
        List of strings denoting requested pids
    classifier_bin_interval : tuple
        Lower and upper limit of classifier value for which the p_T distribution should be made.
        This value needs to be given as bin indices!
    normalized : Boolean
        Should the distribution be normalized to yield P(p_T)?
    Returns
    -------
    Hist1D :
        Histogram P(p_T)
    """
    mult_pt_hists = []
    for pid in pids:
        mult_pt_hists.append(getattr(results_est_dir.mult_pt, pid))
    summed_mult_pt = sum(mult_pt_hists)
    summed_mult_pt.xaxis.SetRange(*classifier_bin_interval)
    projy = asrootpy(summed_mult_pt.ProjectionY())
    projy.name = gen_random_name()
    event_counter = asrootpy(results_est_dir.event_counter)
    # Scale by the number of events in the interval;
    projy.Scale(1.0 / event_counter.Integral(*classifier_bin_interval))
    if normalized:
        projy.Scale(1.0 / projy.Integral())
    return projy
Beispiel #11
0
    def closureTest( self ):
        if not self.closure_test:
            if not self.unfoldResponse:
                self.unfoldResponse = self._makeUnfoldResponse()
            if self.method == 'RooUnfoldBayes':
                self.closure_test = RooUnfoldBayes     ( self.unfoldResponse, self.measured, self.Bayes_n_repeat )
            elif self.method == 'RooUnfoldBinByBin':
                self.closure_test = RooUnfoldBinByBin     ( self.unfoldResponse, self.measured )
            elif self.method == 'RooUnfoldInvert':
                self.closure_test = RooUnfoldInvert     ( self.unfoldResponse, self.measured )
            elif self.method == 'RooUnfoldSvd':
                self.closure_test = RooUnfoldSvd( self.unfoldResponse, self.measured, self.k_value, self.n_toy )
            elif self.method == 'RooUnfoldTUnfold':
                self.closure_test = RooUnfoldTUnfold ( self.unfoldResponse, self.measured )
                if self.tau >= 0:
                    self.closure_test.FixTau( self.tau )
            elif self.method == 'TSVDUnfold':
                new_measured = Hist( list( self.measured.xedges() ), type = 'D' )
                new_measured.Add( self.measured )
                new_truth = Hist( list( self.truth.xedges() ), type = 'D' )
                new_truth.Add( self.truth )

                if self.fakes:
                    new_fakes = Hist( list ( self.fakes.xedges() ), type = 'D' )
                    new_fakes.Add ( self.fakes )
                    new_measured = new_measured - new_fakes

                new_response = Hist2D( list( self.response.xedges() ), list( self.response.yedges() ), type = 'D' )
                new_response.Add( self.response )
                self.closure_test = TSVDUnfold( new_measured, new_measured, new_truth, new_response )
        if self.method == 'TSVDUnfold':
            self.unfolded_closure = asrootpy( self.closure_test.Unfold( self.k_value ) )
        else:
            self.unfolded_closure = asrootpy( self.closure_test.Hreco( self.Hreco ) )
        return self.unfolded_closure
def get_unfold_histogram_tuple(inputfile, variable, channel, met_type):
    folder = None
    if not 'HT' in variable:
        if measurement_config.centre_of_mass == 7:
            folder = inputfile.Get('unfoldingAnalyser%sChannel' % channel.title())
        else:
            folder = inputfile.Get('unfolding_%s_analyser_%s_channel_%s' % (variable, channel, met_type))
    else:
        folder = inputfile.Get('unfolding_%s_analyser_%s_channel' % (variable, channel))
        
    h_truth = asrootpy(folder.truth_AsymBins).Clone()
    h_measured = asrootpy(folder.measured_AsymBins).Clone()
    
    h_response = None
    if measurement_config.centre_of_mass == 7:
        h_response = folder.response_withoutFakes_AsymBins.Clone()
    else:
        h_response = folder.response_without_fakes_AsymBins.Clone()
    
    nEvents = inputfile.EventFilter.EventCounter.GetBinContent(1)  # number of processed events 
    lumiweight = ttbar_xsection * luminosity / nEvents  # ttbar x-section = 225.2pb, lumi = 5814pb-1
    h_truth.Scale(lumiweight)
    h_measured.Scale(lumiweight)
    h_response.Scale(lumiweight)
    return h_truth, h_measured, h_response
def get_unfold_histogram_tuple(inputfile, variable, channel, met_type):
    folder = None
    if not 'HT' in variable:
        folder = inputfile.Get('unfolding_%s_analyser_%s_channel_%s' %
                               (variable, channel, met_type))
    else:
        folder = inputfile.Get('unfolding_%s_analyser_%s_channel' %
                               (variable, channel))

    n_bins = len(bin_edges[variable]) - 1
    bin_edge_array = array('d', bin_edges[variable])

    #h_fakes = asrootpy(folder.fake_AsymBins)
    #h_truth = asrootpy(folder.truth.Rebin(n_bins, 'truth', bin_edge_array))
    h_truth = asrootpy(folder.truth_AsymBins).Clone()
    #h_measured = asrootpy(folder.measured.Rebin(n_bins, 'measured', bin_edge_array))
    h_measured = asrootpy(folder.measured_AsymBins).Clone()
    h_response = folder.response_without_fakes_AsymBins.Clone(
    )  # response_AsymBins

    nEvents = inputfile.EventFilter.EventCounter.GetBinContent(
        1)  #number of processed events
    lumiweight = ttbar_xsection * luminosity / nEvents  #ttbar x-section = 225.2pb, lumi = 5814pb-1
    h_truth.Scale(lumiweight)
    h_measured.Scale(lumiweight)
    h_response.Scale(lumiweight)
    return h_truth, h_measured, h_response
def get_input_plots(data_file, mc_file, trigger_under_study):
    plot_data_total = data_file.Get(trigger_under_study % 'visited')
    plot_data_passed = data_file.Get(trigger_under_study % 'fired')
    
    mc_trigger = trigger_under_study
    if 'CaloIdVT_CaloIsoT_TrkIdT_TrkIsoT' in trigger_under_study:
        #no isolated trigger available (bug!) in analysed MC, use non-iso instead.
        mc_trigger = trigger_under_study.replace('CaloIdVT_CaloIsoT_TrkIdT_TrkIsoT', 'CaloIdVT_TrkIdT')
        
    plot_ttbar_total = ttbar_file.Get(mc_trigger % 'visited')
    plot_ttbar_passed = ttbar_file.Get(mc_trigger % 'fired')
    
    plot_data_passed.Sumw2()
    plot_data_total.Sumw2()
    plot_ttbar_passed.Sumw2()
    plot_ttbar_total.Sumw2()
    
    bin_edge_array = get_binning(trigger_under_study)
    n_bins = len(bin_edge_array) - 1
    
    plot_data_passed = asrootpy(plot_data_passed.Rebin(n_bins, 'truth', bin_edge_array))
    plot_data_total = asrootpy(plot_data_total.Rebin(n_bins, 'truth', bin_edge_array))
    plot_ttbar_passed = asrootpy(plot_ttbar_passed.Rebin(n_bins, 'truth', bin_edge_array))
    plot_ttbar_total = asrootpy(plot_ttbar_total.Rebin(n_bins, 'truth', bin_edge_array))
    
    return plot_data_passed, plot_data_total, plot_ttbar_passed, plot_ttbar_total
def get_input_plots(data_file, mc_file, trigger_under_study):
    plot_data_total = data_file.Get(trigger_under_study % 'visited')
    plot_data_passed = data_file.Get(trigger_under_study % 'fired')

    mc_trigger = trigger_under_study
    if 'CaloIdVT_CaloIsoT_TrkIdT_TrkIsoT' in trigger_under_study:
        #no isolated trigger available (bug!) in analysed MC, use non-iso instead.
        mc_trigger = trigger_under_study.replace(
            'CaloIdVT_CaloIsoT_TrkIdT_TrkIsoT', 'CaloIdVT_TrkIdT')

    plot_ttbar_total = ttbar_file.Get(mc_trigger % 'visited')
    plot_ttbar_passed = ttbar_file.Get(mc_trigger % 'fired')

    plot_data_passed.Sumw2()
    plot_data_total.Sumw2()
    plot_ttbar_passed.Sumw2()
    plot_ttbar_total.Sumw2()

    bin_edge_array = get_binning(trigger_under_study)
    n_bins = len(bin_edge_array) - 1

    plot_data_passed = asrootpy(
        plot_data_passed.Rebin(n_bins, 'truth', bin_edge_array))
    plot_data_total = asrootpy(
        plot_data_total.Rebin(n_bins, 'truth', bin_edge_array))
    plot_ttbar_passed = asrootpy(
        plot_ttbar_passed.Rebin(n_bins, 'truth', bin_edge_array))
    plot_ttbar_total = asrootpy(
        plot_ttbar_total.Rebin(n_bins, 'truth', bin_edge_array))

    return plot_data_passed, plot_data_total, plot_ttbar_passed, plot_ttbar_total
def get_next_end( histograms, bin_start, bin_end, p_min, s_min, n_min ): 
    current_bin_start = bin_start
    current_bin_end = bin_end 

    for gen_vs_reco_histogram in histograms:
        reco = asrootpy( gen_vs_reco_histogram.ProjectionX() )
        gen = asrootpy( gen_vs_reco_histogram.ProjectionY() )
        reco_i = list( reco.y() )
        gen_i = list( gen.y() )
        # keep the start bin the same but roll the end bin
        for bin_i in range ( current_bin_end, len( reco_i ) + 1 ):
            n_reco = sum( reco_i[current_bin_start:bin_i] )
            n_gen = sum( gen_i[current_bin_start:bin_i] )
            n_gen_and_reco = 0
            if bin_i < current_bin_start + 1:
                n_gen_and_reco = gen_vs_reco_histogram.Integral( current_bin_start + 1, bin_i + 1, current_bin_start + 1, bin_i + 1 )
            else:
                # this is necessary to synchronise the integral with the rebin method
                # only if the bin before is taken is is equivalent to rebinning
                # the histogram and taking the diagonal elements (which is what we want)
                n_gen_and_reco = gen_vs_reco_histogram.Integral( current_bin_start + 1, bin_i , current_bin_start + 1, bin_i )

            p, s = 0, 0
            if n_reco > 0:            
                p = round( n_gen_and_reco / n_reco, 3 )
            if n_gen > 0:
                s = round( n_gen_and_reco / n_gen, 3 )
            # find the bin range that matches
            if p >= p_min and s >= s_min and n_gen_and_reco >= n_min:
                current_bin_end = bin_i
                break
            # if it gets to the end, this is the best we can do
            current_bin_end = bin_i
    return current_bin_end, p, s, n_gen_and_reco
Beispiel #17
0
def rejection_linear(eff):
    htot = asrootpy(eff.GetTotalHistogram()).Clone()
    hpass = asrootpy(eff.GetPassedHistogram()).Clone()
    hnotpass =  htot - hpass
    hnotpass.Sumw2()
    rej = Efficiency(hnotpass, htot, name='Rejlin_{0}'.format(eff.name), title=eff.title)
    return rej
Beispiel #18
0
def remap_x_values(hist, corr_hist):
    """
    Map the x values of hist to the y values of map_hist.
    In order to do so, it is necessary that the x values of hist are also present as x-values in map_hist.

    Parameters
    ----------
    hist : Hist1D
    corr_hist : Hist2D
                Correlations between the quantity on hist's x-axis (also corr_hist's xaxis) and the new
                quantity to plot agains (on corr_hist's y-axis.

    Returns
    -------
    Graph
            Graph of the remapped hist. Errors are ??? TODO
    """
    hist = asrootpy(hist)
    corr_hist = asrootpy(corr_hist)
    profx = asrootpy(corr_hist.ProfileX(gen_random_name()))

    rt_graph = Graph()
    for i, (nch_ref_bin, counter_bin) in enumerate(zip(profx.bins(), hist.bins())):
        rt_graph.SetPoint(i, nch_ref_bin.value, counter_bin.value)
        xerr, yerr = nch_ref_bin.error / 2.0, counter_bin.error / 2.0
        rt_graph.SetPointError(i, xerr, xerr, yerr, yerr)
    return rt_graph
def get_k_from_d_i(h_truth, h_measured, h_response, h_fakes=None, h_data=None):
    global method
    k_start = h_measured.nbins()
    unfolding = Unfolding(h_truth,
                          h_measured,
                          h_response,
                          h_fakes,
                          method=method,
                          k_value=k_start,
                          error_treatment=0,
                          verbose=1)
    unfolding.unfold(h_data)
    hist_d_i = None
    if method == 'RooUnfoldSvd':
        hist_d_i = asrootpy(unfolding.unfoldObject.Impl().GetD())
    elif method == 'TSVDUnfold':
        hist_d_i = asrootpy(unfolding.unfoldObject.GetD())
    best_k = k_start
    for i, d_i in enumerate(hist_d_i.y()):
        # i count starts at 0
        if d_i >= 1:
            continue
        else:
            # first i when d_i < 0, is k
            # because i starts at 0
            best_k = i
            break

    return best_k, hist_d_i.clone()
Beispiel #20
0
def remap_x_values(hist, corr_hist):
    """
    Map the x values of hist to the y values of map_hist.
    In order to do so, it is necessary that the x values of hist are also present as x-values in map_hist.

    Parameters
    ----------
    hist : Hist1D
    corr_hist : Hist2D
                Correlations between the quantity on hist's x-axis (also corr_hist's xaxis) and the new
                quantity to plot agains (on corr_hist's y-axis.

    Returns
    -------
    Graph
            Graph of the remapped hist. Errors are ??? TODO
    """
    hist = asrootpy(hist)
    corr_hist = asrootpy(corr_hist)
    profx = asrootpy(corr_hist.ProfileX(gen_random_name()))

    rt_graph = Graph()
    for i, (nch_ref_bin,
            counter_bin) in enumerate(zip(profx.bins(), hist.bins())):
        rt_graph.SetPoint(i, nch_ref_bin.value, counter_bin.value)
        xerr, yerr = nch_ref_bin.error / 2.0, counter_bin.error / 2.0
        rt_graph.SetPointError(i, xerr, xerr, yerr, yerr)
    return rt_graph
def checkOnMC(unfolding, method):
    global bins, nbins
    RooUnfold.SVD_n_toy = 1000
    pulls = []
    for sub in range(1,9):
        inputFile2 = File('../data/unfolding_merged_sub%d.root' % sub, 'read')
        h_data = asrootpy(inputFile2.unfoldingAnalyserElectronChannel.measured.Rebin(nbins, 'measured', bins))
        nEvents = inputFile2.EventFilter.EventCounter.GetBinContent(1)
        lumiweight = 164.5 * 5050 / nEvents
#        print sub, nEvents
        h_data.Scale(lumiweight)
        doUnfoldingSequence(unfolding, h_data, method, '_sub%d' %sub)
        pull = unfolding.pull_inputErrorOnly()
#        unfolding.printTable()
        pulls.append(pull)
        unfolding.Reset()
    allpulls = []

    for pull in pulls:
        allpulls.extend(pull)
    h_allpulls = Hist(100,-30,30)
    filling = h_allpulls.Fill
    for entry in allpulls:
        filling(entry)
    fit = h_allpulls.Fit('gaus', 'WWS')
    h_fit = asrootpy(h_allpulls.GetFunction("gaus").GetHistogram())
    canvas = Canvas(width=1600, height=1000)
    canvas.SetLeftMargin(0.15)
    canvas.SetBottomMargin(0.15)
    canvas.SetTopMargin(0.10)
    canvas.SetRightMargin(0.05)
    h_allpulls.Draw()
    fit.Draw('same')
    canvas.SaveAs('plots/Pull_allBins_withFit.png')
    
    
    
    plt.figure(figsize=(16, 10), dpi=100)
    rplt.errorbar(h_allpulls, label=r'Pull distribution for all bins',  emptybins=False)
    rplt.hist(h_fit, label=r'fit')
    plt.xlabel('(unfolded-true)/error', CMS.x_axis_title)
    plt.ylabel('entries', CMS.y_axis_title)
    plt.title('Pull distribution for all bins', CMS.title)
    plt.tick_params(**CMS.axis_label_major)
    plt.tick_params(**CMS.axis_label_minor)
    plt.legend(numpoints=1)
    plt.savefig('plots/Pull_allBins.png')
    
    #individual bins
    for bin_i in range(nbins):
        h_pull = Hist(100,-30,30)
        for pull in pulls:
            h_pull.Fill(pull[bin_i])
        plt.figure(figsize=(16, 10), dpi=100)
        rplt.errorbar(h_pull, label=r'Pull distribution for bin %d' % (bin_i + 1), emptybins=False)
        plt.xlabel('(unfolded-true)/error', CMS.x_axis_title)
        plt.ylabel('entries', CMS.y_axis_title)
        plt.title('Pull distribution for  bin %d' % (bin_i + 1), CMS.title)
        plt.savefig('Pull_bin_%d.png' % (bin_i + 1))
Beispiel #22
0
 def _f(*args, **kwargs):
     retval = fn(*args, **kwargs)
     if isinstance(retval, tuple):
         return tuple(asrootpy(i) for i in retval)
     elif isinstance(retval, list):
         return list(asrootpy(i) for i in retval)
     else:
         return asrootpy(fn(*args, **kwargs))
def get_subtraction(fn, title=None):
    f = r_open(fn, 'read')
    h1 = asrootpy(f.Get('totalYield'+str(0)))
    h2 = asrootpy(f.Get('totalYield'+str(3)))
    sub = h2 - h1
    if title:
        sub.title = title
    return sub
Beispiel #24
0
def get_unfold_histogram_tuple( 
                inputfile,
                variable,
                channel,
                met_type = 'patType1CorrectedPFMet',
                centre_of_mass = 8,
                ttbar_xsection = 245.8,
                luminosity = 19712,
                load_fakes = False,
                scale_to_lumi = False,
                ):
    folder = None
    h_truth = None
    h_measured = None
    h_response = None
    h_fakes = None
    if not channel == 'combined':
        if not 'HT' in variable:
            folder = inputfile.Get( 'unfolding_%s_analyser_%s_channel_%s' % ( variable, channel, met_type ) )
        else:
            folder = inputfile.Get( 'unfolding_%s_analyser_%s_channel' % ( variable, channel ) )
        
        h_truth = asrootpy( folder.truth.Clone() )
        h_measured = asrootpy( folder.measured.Clone() )

        # response matrix is always without fakes
        # fake subtraction from measured is performed automatically in RooUnfoldSvd (h_measured - h_response->ProjectionX())
        # or manually for TSVDUnfold
        # fix for a bug/typo in NTupleTools
        h_response = asrootpy( folder.response_without_fakes.Clone() )

        if load_fakes:
            h_fakes = asrootpy( folder.fake.Clone() )
    else:
        return get_combined_unfold_histogram_tuple( inputfile = inputfile,
                                                   variable = variable,
                                                   met_type = met_type,
                                                   centre_of_mass = centre_of_mass,
                                                   ttbar_xsection = ttbar_xsection,
                                                   luminosity = luminosity,
                                                   load_fakes = load_fakes,
                                                   scale_to_lumi = scale_to_lumi,
                                                   )

    if scale_to_lumi:
        nEvents = inputfile.EventFilter.EventCounter.GetBinContent( 1 )  # number of processed events 
        lumiweight = ttbar_xsection * luminosity / nEvents
        if load_fakes:
            h_fakes.Scale( lumiweight )
        h_truth.Scale( lumiweight )
        h_measured.Scale( lumiweight )
        h_response.Scale( lumiweight )
    
    h_truth, h_measured, h_response = [ fix_overflow( hist ) for hist in [h_truth, h_measured, h_response] ]
    if load_fakes:
        h_fakes = fix_overflow( h_fakes )
    
    return h_truth, h_measured, h_response, h_fakes
Beispiel #25
0
def get_bestfit_nll_workspace(workspace, return_nll=False):
    if return_nll:
        roo_min, nll_func = asrootpy(workspace).fit(return_nll=return_nll)
        fitres = roo_min.save()
        return fitres.minNll(), nll_func
    else:
        roo_min = asrootpy(workspace).fit(return_nll=return_nll)
        fitres = roo_min.save()
        return fitres.minNll()
Beispiel #26
0
    def get_nominal_hists_array(self, obsData, mc, simPdf):
        hists_array = {}
        # --> get the list of categories index and iterate over
        catIter = simPdf.indexCat().typeIterator()
        while True:
            cat = catIter.Next()
            if not cat:
                break
            log.info("Scanning category {0}".format(cat.GetName()))
            hists_comp = []

            # --> Get the total model pdf, the observables and the POI
            pdftmp = simPdf.getPdf(cat.GetName())
            obstmp = pdftmp.getObservables(mc.GetObservables())
            obs = obstmp.first()
            poi = mc.GetParametersOfInterest().first()

            # --> Create the data histogram
            datatmp = obsData.reduce(
                "{0}=={1}::{2}".format(simPdf.indexCat().GetName(), simPdf.indexCat().GetName(), cat.GetName())
            )
            datatmp.__class__ = ROOT.RooAbsData  # --> Ugly fix !!!
            log.info("Retrieve the data histogram")
            hists_comp.append(("data", asrootpy(datatmp.createHistogram("", obs))))

            # --> Create the total model histogram
            log.info("Retrieve the total background")
            poi.setVal(0.0)
            hists_comp.append(("background", asrootpy(pdftmp.createHistogram("cat_%s" % cat.GetName(), obs))))

            # --> Create the total model histogram
            log.info("Retrieve the total model (signal+background)")
            poi.setVal(1.0)
            hists_comp.append(
                ("background+signal", asrootpy(pdftmp.createHistogram("model_cat_%s" % cat.GetName(), obs)))
            )
            poi.setVal(0.0)

            comps = pdftmp.getComponents()
            compsIter = comps.createIterator()
            while True:
                comp = compsIter.Next()
                if not comp:
                    break
                # ---> loop only over the nominal histograms
                if "nominal" not in comp.GetName():
                    continue

                log.info("Retrieve component {0}".format(comp.GetName()))
                hists_comp.append(
                    (
                        comp.GetName()[:14],
                        asrootpy(comp.createHistogram("%s_%s" % (cat.GetName(), comp.GetName()), obs)),
                    )
                )
            hists_array[cat.GetName()] = hists_comp
        return hists_array
Beispiel #27
0
def get_bestfit_nll_workspace(workspace, return_nll=False):
    if return_nll:
        roo_min, nll_func = asrootpy(workspace).fit(return_nll=return_nll)
        fitres = roo_min.save()
        return fitres.minNll(), nll_func
    else:
        roo_min = asrootpy(workspace).fit(return_nll=return_nll)
        fitres = roo_min.save()
        return fitres.minNll()
Beispiel #28
0
 def unfold( self, data ):
     have_zeros = [value == 0 for value,_ in hist_to_value_error_tuplelist( data )]
     if not False in have_zeros:
         raise ValueError('Data histograms contains only zeros')
     self.setup_unfolding( data )
     if self.method == 'TSVDUnfold' or self.method == 'TopSVDUnfold':
         self.unfolded_data = asrootpy( self.unfoldObject.Unfold( self.k_value ) )
     else:
         # remove unfold reports (faster)
         self.unfoldObject.SetVerbose( self.verbose )
         self.unfolded_data = asrootpy( self.unfoldObject.Hreco( self.Hreco ) )
     return self.unfolded_data
Beispiel #29
0
def rejection(eff):
    htot = asrootpy(eff.GetTotalHistogram()).Clone()
    hpass = asrootpy(eff.GetPassedHistogram())
    if hpass.Integral !=0:
        rej = htot/hpass
    else:
        rej = htot
    rej = Graph(rej)
    name = '_'.join(eff.name.split('_')[1:])
    rej.name = 'rej_{0}'.format(name)
    rej.title = eff.title
    return rej
Beispiel #30
0
    def _project_or_clone(tobject, projection_options=None):

        if isinstance(tobject, _ProfileBase):
            # create an "x-projection" with a unique suffix
            if projection_options is None:
                return asrootpy(tobject.ProjectionX(uuid.uuid4().get_hex()))
            else:
                return asrootpy(
                    tobject.ProjectionX(uuid.uuid4().get_hex(),
                                        projection_options))
        else:
            return tobject.Clone()
Beispiel #31
0
def rejection(eff):
    htot = asrootpy(eff.GetTotalHistogram()).Clone()
    hpass = asrootpy(eff.GetPassedHistogram())
    if hpass.Integral != 0:
        rej = htot / hpass
    else:
        rej = htot
    rej = Graph(rej)
    name = '_'.join(eff.name.split('_')[1:])
    rej.name = 'rej_{0}'.format(name)
    rej.title = eff.title
    return rej
 def make_efficiency(labels):
     this_name = "efficiency" + name.format(**labels)
     this_title = title.format(**labels)
     '''Checking type of 'low' to see whether it's int (x-range minimum)
             or array (bin edges) for constructing TEfficiency'''
     if isinstance(low, np.ndarray):
         eff = asrootpy(
             ROOT.TEfficiency(this_name, this_title, n_bins, low))
     else:
         eff = asrootpy(
             ROOT.TEfficiency(this_name, this_title, n_bins, low, high))
     eff.drawstyle = EfficiencyPlot.drawstyle
     return eff
Beispiel #33
0
def gethists_nphe(ptype='p', sector=1):
    # ptype, sector = 'p', 1
    hnpheS = []
    ntmpl = 'hnphe_V_pmt_%s_s%d'
    with root_open('CC_Hists.root') as fin:
        h2n = ntmpl%(ptype,sector)
        h2 = asrootpy(fin.Get(h2n))
        for b in range(1,37):
            hn = h2n+'_pmt%d'%b
            h = asrootpy(h2.ProjectionY(hn,b,b+1))
            h.SetDirectory(0)
            hnpheS.append(h)
    return hnpheS
 def unfold(self, data):
     if data is None:
         raise ValueError("Data histogram is None")
     have_zeros = [value == 0 for value, _ in hist_to_value_error_tuplelist(data)]
     if not False in have_zeros:
         raise ValueError("Data histograms contains only zeros")
     self.setup_unfolding(data)
     if self.method == "TSVDUnfold":
         self.unfolded_data = asrootpy(self.unfoldObject.Unfold(self.k_value))
     else:
         # remove unfold reports (faster)
         self.unfoldObject.SetVerbose(self.verbose)
         self.unfolded_data = asrootpy(self.unfoldObject.Hreco(self.error_treatment))
     return self.unfolded_data
Beispiel #35
0
 def test_regularisation ( self, data, k_max ):
     self.setup_unfolding( data )
     if self.method == 'RooUnfoldSvd':
         findingK = RooUnfoldParms( self.unfoldObject, self.Hreco, self.truth )
         findingK.SetMinParm( 1 )
         findingK.SetMaxParm( k_max )
         findingK.SetStepSizeParm( 1 )
         RMSerror = asrootpy( findingK.GetRMSError().Clone() )
         MeanResiduals = asrootpy( findingK.GetMeanResiduals().Clone() )
         RMSresiduals = asrootpy( findingK.GetRMSResiduals().Clone() )
         Chi2 = asrootpy( findingK.GetChi2().Clone() )
         return RMSerror, MeanResiduals, RMSresiduals, Chi2
     else:
         raise ValueError( 'Unfolding method "%s" is not supported for regularisation parameter tests. Please use RooUnfoldSvd.' % ( self.method ) )
 def test_regularisation ( self, data, k_max ):
     self.setup_unfolding( data )
     if self.method == 'RooUnfoldSvd':
         findingK = RooUnfoldParms( self.unfoldObject, self.error_treatment, self.truth )
         findingK.SetMinParm( 1 )
         findingK.SetMaxParm( k_max )
         findingK.SetStepSizeParm( 1 )
         RMSerror = asrootpy( findingK.GetRMSError().Clone() )
         MeanResiduals = asrootpy( findingK.GetMeanResiduals().Clone() )
         RMSresiduals = asrootpy( findingK.GetRMSResiduals().Clone() )
         Chi2 = asrootpy( findingK.GetChi2().Clone() )
         return RMSerror, MeanResiduals, RMSresiduals, Chi2
     else:
         raise ValueError( 'Unfolding method "%s" is not supported for regularisation parameter tests. Please use RooUnfoldSvd.' % ( self.method ) )
 def unfold( self, data ):
     if data is None:
         raise ValueError('Data histogram is None')
     have_zeros = [value == 0 for value,_ in hist_to_value_error_tuplelist( data )]
     if not False in have_zeros:
         raise ValueError('Data histograms contains only zeros')
     self.setup_unfolding( data )
     if self.method == 'TSVDUnfold':
         self.unfolded_data = asrootpy( self.unfoldObject.Unfold( self.k_value ) )
     else:
         # remove unfold reports (faster)
         self.unfoldObject.SetVerbose( self.verbose )
         self.unfolded_data = asrootpy( self.unfoldObject.Hreco( self.error_treatment ) )
     return self.unfolded_data
def read_and_scale_histograms(channel):
    global bins, nbins, options
    input_file = File(options.input_file, 'read')
    h_truth, h_measured, h_fakes, h_response_AsymBins = None, None, None, None
    
    if channel == 'electron':
        h_truth = asrootpy(input_file.unfoldingAnalyserElectronChannel.truth.Rebin(nbins, 'truth', bins))
        h_measured = asrootpy(input_file.unfoldingAnalyserElectronChannel.measured.Rebin(nbins, 'measured', bins))
        h_fakes = asrootpy(input_file.unfoldingAnalyserElectronChannel.fake.Rebin(nbins, 'truth', bins))
        h_response = input_file.unfoldingAnalyserElectronChannel.response_withoutFakes  # response_AsymBins
        h_response_AsymBins = input_file.unfoldingAnalyserElectronChannel.response_withoutFakes_AsymBins  # for rescaling
    elif channel == 'muon':
        h_truth = asrootpy(input_file.unfoldingAnalyserMuonChannel.truth.Rebin(nbins, 'truth', bins))
        h_measured = asrootpy(input_file.unfoldingAnalyserMuonChannel.measured.Rebin(nbins, 'measured', bins))
        h_fakes = asrootpy(input_file.unfoldingAnalyserMuonChannel.fake.Rebin(nbins, 'truth', bins))
        h_response = input_file.unfoldingAnalyserMuonChannel.response_withoutFakes  # response_AsymBins
        h_response_AsymBins = input_file.unfoldingAnalyserMuonChannel.response_withoutFakes_AsymBins  # for rescaling
    else:
        print 'Unknown channel', channel
        print 'Expecting electron or muon'
        return
    nEvents = input_file.EventFilter.EventCounter.GetBinContent(1)
    lumiweight = 164.5 * 5050 / nEvents
    h_truth.Scale(lumiweight)
    h_measured.Scale(lumiweight)
    h_fakes.Scale(lumiweight)
    h_response.Scale(lumiweight)
    h_response_AsymBins.Scale(lumiweight)
    h_reco_truth = asrootpy(h_response.ProjectionX().Rebin(nbins, 'reco_truth', bins))
    h_truth_selected = asrootpy(h_response.ProjectionY().Rebin(nbins, 'truth_selected', bins))
    
    return h_truth, h_measured, h_fakes, h_response_AsymBins, h_reco_truth, h_truth_selected
Beispiel #39
0
    def _getHist(self):
        with root_open(self.fName) as f:
            try:
                hClus = asrootpy(f.Get('demo/Clusters/nClusters'))
                hClus.SetDirectory(0)
                hClus.SetName('nClusters')
                self.dHist['nClusters'] = hClus
            except:
                self.dHist['nClusters'] = None

            for region in REGIONS:
                hOrig = asrootpy(f.Get('demo/' + self.dirName + '/' + region))
                hOrig.SetDirectory(0)
                hOrig.SetName(region + '_Original')

                hNorm = hOrig.Clone()
                hNorm.SetDirectory(0)
                hNorm.Scale(1 / hNorm.integral())
                hNorm.SetName(region + '_NormTo1')

                hCut = hOrig.Clone()
                hCut.SetDirectory(0)
                hCut = self._doCut(hCut)
                hCut.SetName(region + '_Cut')

                hCutNorm = hOrig.Clone()
                hCutNorm.SetDirectory(0)
                hCutNorm = self._doCut(hCutNorm)
                hCutNorm.Scale(1 / hCutNorm.integral())
                hCutNorm.SetName(region + '_CutNormTo1')

                hCDF = hOrig.Clone()
                hCDF.SetDirectory(0)
                hCDF = self._doCDF(hCDF)
                hCDF.SetName(region + '_CDF')

                hCutCDF = hOrig.Clone()
                hCutCDF.SetDirectory(0)
                hCutCDF = self._doCut(hCutCDF)
                hCutCDF = self._doCDF(hCutCDF)
                hCutCDF.SetName(region + '_CutCDF')

                self.dHist[region + '_Original'] = hOrig
                self.dHist[region + '_NormTo1'] = hNorm
                self.dHist[region + '_Cut'] = hCut
                self.dHist[region + '_CutNormTo1'] = hCutNorm
                self.dHist[region + '_CDF'] = hCDF
                self.dHist[region + '_CutCDF'] = hCutCDF
        return
    def _dynamic_bin(self, eff):
        """
        Re-build efficiency plots so that there are no bins with < min_ entries
        """

        min_ = 16
        total = []
        passed = []
        bins = []
        bins.append(eff.GetTotalHistogram().GetBinLowEdge(1))
        nbins = eff.GetTotalHistogram().GetNbinsX()

        merge_total = 0
        merge_passed = 0

        for bin in range(1, nbins + 1):

            next_bin_total = eff.GetTotalHistogram().GetBinContent(bin + 1)
            merge_total += eff.GetTotalHistogram().GetBinContent(bin)
            merge_passed += eff.GetPassedHistogram().GetBinContent(bin)
            if bin == nbins:
                merge_total += eff.GetTotalHistogram().GetBinContent(bin + 1)
                merge_passed += eff.GetPassedHistogram().GetBinContent(bin + 1)

            if (next_bin_total > min_ and merge_total > min_) or bin == nbins:
                bins.append(eff.GetTotalHistogram().GetBinLowEdge(bin + 1))
                total.append(merge_total)
                passed.append(merge_passed)
                merge_total = 0
                merge_passed = 0

        npbins = np.asarray(bins)

        hist_total = asrootpy(
            ROOT.TH1I("total", "total",
                      len(bins) - 1, npbins))
        hist_passed = asrootpy(
            ROOT.TH1I("passed", "passed",
                      len(bins) - 1, npbins))

        for bin in range(1, len(bins)):
            hist_total.SetBinContent(bin, total[bin - 1])
            hist_passed.SetBinContent(bin, passed[bin - 1])

        hist_total.Sumw2(False)
        hist_passed.Sumw2(False)

        eff.SetTotalHistogram(hist_total, "f")
        eff.SetPassedHistogram(hist_passed, "f")
Beispiel #41
0
 def hist_proton_pid(self, sector, paddle, from_tree=False, cor=0):
     htmp = None
     if from_tree:
         #root [3] c->SetAlias("bifp","sqrt(p*p/(p*p+0.93827203*0.93827203))");
         #root [4] c->SetAlias("dtifp","sc_t[sc-1]-sc_r[sc-1]/(29.9792458*bifp)-tr_time");
         #root [5] c->Draw("dtifp:p>>h2(550, 0, 5.5, 200, -10, 10)", "q>0 && sc>0 && ec>0 && sc_sect[sc-1]==6 && sc_pd[sc-1]==25", "colz")
         bifp = 'sqrt(p*p/(p*p+%f*%f))'%(MASS_P, MASS_P)
         dtifp = 'sc_t[sc-1]-sc_r[sc-1]/(%f*%s)-tr_time-%d'%(SOL, bifp, cor)
         h10 = asrootpy(self.fin.h10clone.h10)
         htmp = h10.draw('p:%s>>htmp(500,0,5,400,-10,10)'%dtifp,
                         'q>0 && sc>0 && dc>0 && sc_sect[sc-1]==%d && sc_pd[sc-1]==%d'%(sector, paddle), 'goff')
         htmp = asrootpy(htmp)
     else:
         htmp = asrootpy(self.fin.scpid[self.hpid_p_templ%(sector, paddle)])
     return htmp.clone()
Beispiel #42
0
def __clean(hists):
    cleaned_hists = []
    for hist in hists:
        if isinstance(hist, Efficiency):
            new = asrootpy(hist.CreateGraph("e0"))
            new.decorate(hist)
            hist = new
            hist.SetMarkerSize(0.5)
        cleaned_hists.append(hist)

    axis_hist = cleaned_hists[0]
    if isinstance(axis_hist, _GraphBase):
        axis_hist = asrootpy(axis_hist.GetHistogram())

    return axis_hist, cleaned_hists
Beispiel #43
0
    def __set_unfolding_histograms__( self ):
        # at the moment only one file is supported for the unfolding input
        files = set( 
            [self.truth['file'],
            self.gen_vs_reco['file'],
            self.measured['file']]
        )
        if len( files ) > 1:
            print "Currently not supported to have different files for truth, gen_vs_reco and measured"
            sys.exit()
            
        input_file = files.pop()
        visiblePS = self.phaseSpace

        t, m, r, f = get_unfold_histogram_tuple( 
            File(input_file),
            self.variable,
            self.channel,
            centre_of_mass = self.centre_of_mass_energy,
            ttbar_xsection=self.measurement_config.ttbar_xsection,
            luminosity=self.measurement_config.luminosity,
            load_fakes = True,
            visiblePS = visiblePS
        )

        self.h_truth = asrootpy ( t )
        self.h_response = asrootpy ( r )
        self.h_measured = asrootpy ( m )
        self.h_fakes = asrootpy ( f )
        self.h_refolded = None

        data_file = self.data['file']
        if data_file.endswith('.root'):
            self.h_data = get_histogram_from_file(self.data['histogram'], self.data['file'])
        elif data_file.endswith('.json') or data_file.endswith('.txt'):
            data_key = self.data['histogram']
            # assume configured bin edges
            edges = []
            edges = reco_bin_edges_vis[self.variable]

            json_input = read_tuple_from_file(data_file)

            if data_key == "": # JSON file == histogram
                self.h_data = value_error_tuplelist_to_hist(json_input, edges)
            else:
                self.h_data = value_error_tuplelist_to_hist(json_input[data_key], edges)
        else:
            print 'Unkown file extension', data_file.split('.')[-1]
def make_efficiency_plot(pass_data, total_data, pass_mc, total_mc, trigger_under_study):
    global output_folder, output_formats

    efficiency_data = asrootpy(TGraphAsymmErrors())
    efficiency_mc = asrootpy(TGraphAsymmErrors())

    efficiency_data.Divide(pass_data, total_data, "cl=0.683 b(1,1) mode")
    efficiency_mc.Divide(pass_mc, total_mc, "cl=0.683 b(1,1) mode")

    scale_factor = pass_data.Clone("pass_mc")
    scale_factor.Multiply(total_mc)
    scale_factor.Divide(total_data)
    scale_factor.Divide(pass_mc)
    scale_factor.linewidth = 6
    scale_factor.SetMarkerSize(3)
    scale_factor.linecolor = "green"
    scale_factor.SetMarkerColor("green")

    x_limits, x_title, y_title, fit_function, fit_range = get_parameters(trigger_under_study)

    fit_data = TF1("fit_data", fit_function, fit_range[0], fit_range[1])
    fit_mc = TF1("fit_mc", fit_function, fit_range[0], fit_range[1])
    fit_SF = TF1("fit_SF", fit_function, fit_range[0], fit_range[1])
    set_parameter_limits(trigger_under_study, fit_data)
    set_parameter_limits(trigger_under_study, fit_mc)
    set_parameter_limits(trigger_under_study, fit_SF)

    efficiency_data.Fit(fit_data, "FECQ")
    efficiency_mc.Fit(fit_mc, "FECQ")
    scale_factor.Fit(fit_SF, "FECQ")

    set_plot_styles(efficiency_data, efficiency_mc)

    save_as_name = trigger_under_study
    save_as_name = save_as_name.replace("Jet30/", "Jet30_")
    plot_efficiencies(
        efficiency_data,
        efficiency_mc,
        scale_factor,
        fit_data,
        fit_mc,
        fit_SF,
        fit_function,
        x_limits,
        x_title,
        y_title,
        save_as_name,
    )
Beispiel #45
0
def hdf2root(infile, outfile, verbose=False):
    try:
        from rootpy.io import root_open
        from rootpy import asrootpy
        from root_numpy import array2tree
    except ImportError:
        raise ImportError(
            "Please load ROOT into PYTHONPATH and install rootpy+root_numpy:\n"
            "   `pip install rootpy root_numpy`"
        )

    from tables import open_file

    h5 = open_file(infile, 'r')
    rf = root_open(outfile, 'recreate')

    # 'walk_nodes' does not allow to check if is a group or leaf
    #   exception handling is bugged
    #   introspection/typecheck is buged
    # => this moronic nested loop instead of simple `walk`
    for group in h5.walk_groups():
        for leafname, leaf in group._v_leaves.items():
            arr = leaf[:]
            if arr.dtype.names is None:
                dt = np.dtype((arr.dtype, [(leafname, arr.dtype)]))
                arr = arr.view(dt)
            treename = leaf._v_pathname.replace('/', '_')
            tree = asrootpy(array2tree(arr, name=treename))
            tree.write()
    rf.close()
    h5.close()
Beispiel #46
0
    def get_covariance_matrix(self):
        '''
        Get the covariance matrix from all contributions
        https://root.cern.ch/doc/master/classTUnfoldDensity.html#a7f9335973b3c520e2a4311d2dd6f5579
        '''
        import uncertainties as u
        from numpy import array, matrix, zeros
        from numpy import sqrt as np_sqrt
        if self.unfolded_data is not None:
            # Calculate the covariance from TUnfold
            covariance = asrootpy( 
                self.unfoldObject.GetEmatrixInput('Covariance'))

            # Reformat into a numpy matrix
            zs = list(covariance.z())
            cov_matrix = matrix(zs)

            # Just the unfolded number of events         
            inputs = hist_to_value_error_tuplelist(self.unfolded_data)
            nominal_values = [i[0] for i in inputs]         
            # # Unfolded number of events in each bin
            # # With correlation between uncertainties
            values_correlated = u.correlated_values( nominal_values, cov_matrix.tolist() )
            corr_matrix = matrix(u.correlation_matrix(values_correlated) )

            return cov_matrix, corr_matrix
        else:
            print("Data has not been unfolded. Cannot return unfolding covariance matrix")
        return
Beispiel #47
0
def calculate_purities( gen_vs_reco_histogram ):
    '''
    Takes a 2D histogram of generated versus reconstructed events and returns
    a list of *purity* values  for each bin.
    
    *purity* is defined as the number reconstructed & generated events in one 
    bin divided by the number of reconstructed events:
    p_i = \frac{N^{\text{rec\&gen}}}{N^{\text{rec}}}
    '''
    # assume reco = x axis and gen = y axis
    reco = asrootpy( gen_vs_reco_histogram.ProjectionX() )
    reco_i = list( reco.y() )
    n_bins = len( reco_i )
    
    purities = []
    add_purity = purities.append
    
    for i in range( 1, n_bins + 1 ):
        n_gen_and_reco = gen_vs_reco_histogram.GetBinContent( i, i )
        n_reco = reco_i[i - 1]
        p = 0
        if n_reco > 0:
            p = round( n_gen_and_reco / n_reco, 3 )
        add_purity( p )
        
    return purities
Beispiel #48
0
def calculate_stabilities( gen_vs_reco_histogram ):
    '''
    Takes a 2D histogram of generated versus reconstructed events and returns
    a list of *stability* values  for each bin.
    
    *stability* is defined as the number reconstructed & generated events in
    one bin divided by the number of generated events: 
    s_i = \frac{N^{\text{rec\&gen}}}{N^{\text{rec}}}
    '''
    # assume reco = x axis and gen = y axis
    gen = asrootpy( gen_vs_reco_histogram.ProjectionY() )
    gen_i = list( gen.y() )
    n_bins = len( gen_i )
    
    stabilities = []
    add_stability = stabilities.append
    
    for i in range( 1, n_bins + 1 ):
        n_gen_and_reco = gen_vs_reco_histogram.GetBinContent( i, i )
        n_gen = gen_i[i - 1]
        s = 0
        if n_gen > 0:
            s = round( n_gen_and_reco / n_gen, 3 )
        add_stability( s )
        
    return stabilities
Beispiel #49
0
    def _process(self, config):
        '''process all tasks'''

        _tfile = self._get_file(config['filename'])

        with preserve_current_directory():
            for _subtask_config in config['subtasks']:
                _expression = _subtask_config['expression']
                _output_path = _subtask_config['output_path']

                _basename = os.path.basename(_output_path)
                _dirname = os.path.dirname(_output_path)

                ROOT.gROOT.cd()
                _plot_object = asrootpy(self._input_controller.get_expr(_expression).Clone(_basename))

                try:
                    _dir = _tfile.GetDirectory(_dirname)
                except DoesNotExist:
                    _dir = _tfile.mkdir(_dirname, recurse=True)

                _dir.cd()
                try:
                    _plot_object.SetDirectory(_dir)
                except AttributeError:
                    # skip objects without 'SetDirectory' method (e.g. TGraph)
                    pass
                _plot_object.Write()
Beispiel #50
0
def hist_to_table(hist):
    """Convert 1D ROOT histogram into astropy Table.
    
    Parameters
    ----------
    hist : ROOT.TH1
        ROOT histogram.
    
    Returns
    -------
    table : `~astropy.table.table.Table`
        Histogram data in astropy table format.
    """
    from rootpy import asrootpy

    hist = asrootpy(hist)
    
    data = OrderedDict()
    data['x'] = list(hist.x())
    data['x_err'] = list(hist.xerravg())
    data['x_err_lo'] = list(hist.xerrl())
    data['x_err_hi'] = list(hist.xerrh())
    data['y'] = list(hist.y())
    data['y_err'] = list(hist.yerravg())
    data['y_err_lo'] = list(hist.yerrl())
    data['y_err_hi'] = list(hist.yerrh())

    table = Table(data)
    return table
def prepare_histograms(histograms, rebin = 1, scale_factor=1.):
    
    for _, histogram_dict in histograms.iteritems():
        for _, histogram in histogram_dict.iteritems():
            histogram = asrootpy(histogram)
            histogram.Rebin(rebin)
            histogram.Scale(scale_factor)
Beispiel #52
0
    def get_hist_array(self,
                       field_hist_template,
                       category=None,
                       cuts=None,
                       multi_proc=False):
        """
        """
        sel = self.cuts(category)
        if not cuts is None:
            sel &= cuts
        if self.weight_field is not None:
            if isinstance(self.weight_field, (list, tuple)):
                for w in self.weight_field:
                    sel *= w
            else:
                sel *= self.weight_field
        field_hists = {}

        from .jet import JZ
        if isinstance(self, JZ):
            multi_proc = False

        if multi_proc:
            keys = [key for key in field_hist_template.keys()]
            workers = [
                FuncWorker(self.draw_helper, field_hist_template[key], key,
                           sel) for key in keys
            ]
            run_pool(workers, n_jobs=-1)
            for key, w in zip(keys, workers):
                field_hists[key] = asrootpy(w.output)
        else:
            for key, hist in field_hist_template.items():
                field_hists[key] = self.draw_helper(hist, key, sel)
        return field_hists
 def get_bias(self):
     '''
     Return the bias vector
     '''
     self.bias = asrootpy(
         self.unfoldObject.GetBias('BiasVector'))
     return self.bias
def get_input_efficiency(input_file, efficiency_instance):
    efficiency = input_file.Get(efficiency_instance)
    
    hist_passed = efficiency.GetPassedHistogram()
    hist_total = efficiency.GetTotalHistogram()   

    bin_edge_array = get_binning(efficiency_instance)
    n_bins = len(bin_edge_array) - 1
    
    #hist_passed = asrootpy(hist_passed.Rebin(n_bins, 'truth', bin_edge_array))
    #hist_total = asrootpy(hist_total.Rebin(n_bins, 'truth', bin_edge_array))

    hist_passed = asrootpy(hist_passed)
    hist_total = asrootpy(hist_total)

    return hist_passed, hist_total
Beispiel #55
0
 def fill_hist(self, hist_template, expr, selection):
     """
         """
     assert len(self.files) == 1
     file = self.files[0]
     rfile = root_open(file, 'READ')
     tree = rfile[self.tree_name]
     # use TTree Draw for now (limited to Nbins, Xmin, Xmax)
     binning = (hist_template.nbins(), list(hist_template.xedges())[0],
                list(hist_template.xedges())[-1])
     hist = hist_template.Clone()
     hist.Sumw2()
     root_string = '{0}>>{1}{2}'.format(expr, hist.name, binning)
     log.debug("Plotting {0} using selection: {1}".format(
         root_string, selection))
     log.debug('{0}: Draw {1} with \n selection: {2} ...'.format(
         self.name, root_string, selection))
     tree.Draw(root_string, selection)
     try:
         hist = asrootpy(ROOT.gPad.GetPrimitive(hist.name))
         return Hist(hist, title=self.label, **self.hist_decor)
     except:
         log.warning(
             '{0}: unable to retrieve histogram for {1} with selection {2}'.
             format(self.name, expr, selection))
         return Hist(binning[0],
                     binning[1],
                     binning[2],
                     title=self.label,
                     **self.hist_decor)
Beispiel #56
0
    def get_hist_array(self,
                       field_hist_template,
                       category=None,
                       cuts=None,
                       multi_proc=False):
        """
        """
        sel = Cut('')
        if category is not None:
            sel = self.cuts(category)
        if not cuts is None:
            sel &= cuts
        if self.weight_field is not None:
            sel *= self.weight_field
        field_hists = {}

        if multi_proc:
            keys = [key for key in field_hist_template.keys()]
            workers = [
                FuncWorker(self.draw_helper, field_hist_template[key], key,
                           sel) for key in keys
            ]
            run_pool(workers, n_jobs=-1)
            for key, w in zip(keys, workers):
                field_hists[key] = asrootpy(w.output)
        else:
            for key, hist in field_hist_template.items():
                field_hists[key] = self.fill_hist(hist, key, sel)
        return field_hists
Beispiel #57
0
def prepare_histograms(histograms, rebin=1, scale_factor=1.):

    for _, histogram_dict in histograms.iteritems():
        for _, histogram in histogram_dict.iteritems():
            histogram = asrootpy(histogram)
            histogram.Rebin(rebin)
            histogram.Scale(scale_factor)