def setUp(self):

        # create histograms
        h_bkg1_1 = Hist(100, 40, 200, title='Background')
        h_signal_1 = h_bkg1_1.Clone(title='Signal')
        h_data_1 = h_bkg1_1.Clone(title='Data')
        h_bkg1_2 = h_bkg1_1.Clone(title='Background')
        h_signal_2 = h_bkg1_1.Clone(title='Signal')
        h_data_2 = h_bkg1_1.Clone(title='Data')

        # fill the histograms with our distributions
        map(h_bkg1_1.Fill, x1)
        map(h_signal_1.Fill, x2)
        map(h_data_1.Fill, x1_obs)
        map(h_data_1.Fill, x2_obs)

        map(h_bkg1_2.Fill, x3)
        map(h_signal_2.Fill, x4)
        map(h_data_2.Fill, x3_obs)
        map(h_data_2.Fill, x4_obs)

        h_data_1.Scale(data_scale)
        h_data_2.Scale(data_scale)

        histograms_1 = {'signal': h_signal_1, 'bkg1': h_bkg1_1}

        histograms_2 = {'signal': h_signal_2, 'bkg1': h_bkg1_2}

        fit_data_1 = FitData(h_data_1, histograms_1, fit_boundaries=(40, 200))
        fit_data_2 = FitData(h_data_2, histograms_2, fit_boundaries=(40, 200))

        single_fit_collection = FitDataCollection()
        single_fit_collection.add(fit_data_1)

        collection_1 = FitDataCollection()
        collection_1.add(fit_data_1, 'var1')
        collection_1.add(fit_data_2, 'var2')

        collection_2 = FitDataCollection()
        collection_2.add(fit_data_1, 'var1')
        collection_2.add(fit_data_2, 'var2')
        collection_2.set_normalisation_constraints({'bkg1': 0.5})

        collection_3 = FitDataCollection()
        collection_3.add(fit_data_1, 'var1')
        collection_3.add(fit_data_2, 'var2')
        collection_3.set_normalisation_constraints({'bkg1': 0.001})

        self.minuit_fitter = Minuit(single_fit_collection)
        self.minuit_fitter.fit()

        self.simultaneous_fit = Minuit(collection_1)
        self.simultaneous_fit.fit()

        self.simultaneous_fit_with_constraints = Minuit(collection_2)
        self.simultaneous_fit_with_constraints.fit()

        self.simultaneous_fit_with_bad_constraints = Minuit(collection_3)
        self.simultaneous_fit_with_bad_constraints.fit()
    def setUp( self ):

        # create histograms
        h_bkg1_1 = Hist( 100, 40, 200, title = 'Background' )
        h_signal_1 = h_bkg1_1.Clone( title = 'Signal' )
        h_data_1 = h_bkg1_1.Clone( title = 'Data' )
        h_bkg1_2 = h_bkg1_1.Clone( title = 'Background' )
        h_signal_2 = h_bkg1_1.Clone( title = 'Signal' )
        h_data_2 = h_bkg1_1.Clone( title = 'Data' )
    
        # fill the histograms with our distributions
        map( h_bkg1_1.Fill, x1 )
        map( h_signal_1.Fill, x2 )
        map( h_data_1.Fill, x1_obs )
        map( h_data_1.Fill, x2_obs )
        
        map( h_bkg1_2.Fill, x3 )
        map( h_signal_2.Fill, x4 )
        map( h_data_2.Fill, x3_obs )
        map( h_data_2.Fill, x4_obs )
        
        h_data_1.Scale(data_scale)
        h_data_2.Scale(data_scale)
        
        self.histograms_1 = {'signal': h_signal_1,
                             'bkg1': h_bkg1_1}
        
        self.histograms_2 = {'signal': h_signal_2,
                             'bkg1': h_bkg1_2}
        
        self.histograms_3 = {'var1': h_signal_1,
                             'bkg1': h_bkg1_1}
        
        self.fit_data_1 = FitData( h_data_1, self.histograms_1, fit_boundaries = ( x_min, x_max ))
        self.fit_data_2 = FitData( h_data_2, self.histograms_2, fit_boundaries = ( x_min, x_max ))
        self.fit_data_3 = FitData( h_data_1, self.histograms_3, fit_boundaries = ( x_min, x_max ))

        self.collection_1 = FitDataCollection()
        self.collection_1.add( self.fit_data_1, 'signal region' )
        self.collection_1.add( self.fit_data_2, 'control region' )
        self.collection_1.set_normalisation_constraints({'bkg1': 0.5})
        
        self.collection_2 = FitDataCollection()
        self.collection_2.add( self.fit_data_1 )
        self.collection_2.add( self.fit_data_2 )
        self.collection_2.set_normalisation_constraints({'bkg1': 0.5})
        
        self.single_collection = FitDataCollection()
        self.single_collection.add( self.fit_data_1 )
        self.single_collection.set_normalisation_constraints({'bkg1': 0.5})
        
        self.non_simultaneous_fit_collection = FitDataCollection()
        self.non_simultaneous_fit_collection.add( self.fit_data_1 )
        self.non_simultaneous_fit_collection.add( self.fit_data_3 )
        
        self.h_data = h_data_1
        self.h_bkg1 = h_bkg1_1
        self.h_signal = h_signal_1
Beispiel #3
0
def main ():
    N_bkg1 = 9000
    N_signal = 1000
    N_bkg1_obs = 10000
    N_signal_obs = 2000
    N_data = N_bkg1_obs + N_signal_obs
    mu1, mu2, sigma1, sigma2 = 100, 140, 15, 5
    x1 = mu1 + sigma1 * np.random.randn( N_bkg1 )
    x2 = mu2 + sigma2 * np.random.randn( N_signal )
    x1_obs = mu1 + sigma1 * np.random.randn( N_bkg1_obs )
    x2_obs = mu2 + sigma2 * np.random.randn( N_signal_obs )
    
    h1 = Hist( 100, 40, 200, title = 'Background' )
    h2 = h1.Clone( title = 'Signal' )
    h3 = h1.Clone( title = 'Data' )
    h3.markersize = 1.2
    
    # fill the histograms with our distributions
    map( h1.Fill, x1 )
    map( h2.Fill, x2 )
    map( h3.Fill, x1_obs )
    map( h3.Fill, x2_obs )
    
    histograms_1 = {'signal': h2,
                  'bkg1': h1,
                  'data': h3}
    
    histograms_2 = {'signal': h2,
                  'bkg1': h1,
                  'data': h3}
    
    # roofit_histograms contains RooDataHist
    # model = RooAddPdf
    model1, roofit_histograms_1,fit_variable_1 = get_roofit_model( histograms_1, fit_boundaries = ( 40, 200 ), name = 'm1' )
    model2, roofit_histograms_2, fit_variable_2 = get_roofit_model( histograms_2, fit_boundaries = ( 40, 200 ), name = 'm2' )
    sample = RooCategory( 'sample', 'sample' )
    sample.defineType( 'm1', 1 )
    sample.defineType( 'm2', 2 )
    combined_data = deepcopy( roofit_histograms_1['data'] )
    combined_data.add( roofit_histograms_2['data'] )
    # RooDataHist(const char* name, const char* title, const RooArgList& vars, RooCategory& indexCat, map<std::string,TH1*> histMap, Double_t initWgt = 1.0)
    sim_pdf = RooSimultaneous( "simPdf", "simultaneous pdf", sample )
    sim_pdf.addPdf( model1, 'm1' )
    sim_pdf.addPdf( model2, 'm2' )
    variables = RooArgList()
    variables.add(fit_variable_1)
    variables.add(fit_variable_2)
#     combined_data = RooDataHist('combined_data', 'combined_data',
#                                 variables, RooFit.Index(sample),
#                                 RooFit.Import('m1', roofit_histograms_1['data']),
#                                 RooFit.Import('m2', roofit_histograms_2['data']))
    fitResult = sim_pdf.fitTo( combined_data,
#                    RooFit.Minimizer( "Minuit2", "Migrad" ),
#                    RooFit.NumCPU( 1 ),
#                    RooFit.Extended(),
                    RooFit.Save(), 
                   )
Beispiel #4
0
def test_plottable_clone():

    a = Hist(10, 0, 1, linecolor='blue', drawstyle='same')

    b = a.Clone(fillstyle='solid')
    assert_equals(b.fillstyle, 'solid')
    assert_equals(b.linecolor, 'blue')
    assert_equals(b.drawstyle, 'same')

    c = a.Clone(color='red')
    assert_equals(c.linecolor, 'red')
    assert_equals(c.fillcolor, 'red')
    assert_equals(c.markercolor, 'red')
Beispiel #5
0
def weighted_mass_workspace(analysis,
                            categories,
                            masses,
                            systematics=False,
                            cuts=None):
    hist_template = Hist(20, 50, 250, type='D')
    channels = {}
    for category in analysis.iter_categories(categories):
        clf = analysis.get_clf(category, load=True, mass=125)
        clf_bins = clf.binning(analysis.year, overflow=1E5)
        scores = analysis.get_scores(clf,
                                     category,
                                     analysis.target_region,
                                     masses=[125],
                                     mode='combined',
                                     systematics=False,
                                     unblind=True)
        bkg_scores = scores.bkg_scores
        sig_scores = scores.all_sig_scores[125]
        min_score = scores.min_score
        max_score = scores.max_score
        bkg_score_hist = Hist(clf_bins, type='D')
        sig_score_hist = bkg_score_hist.Clone()
        hist_scores(bkg_score_hist, bkg_scores)
        _bkg = bkg_score_hist.Clone()
        hist_scores(sig_score_hist, sig_scores)
        _sig = sig_score_hist.Clone()
        sob_hist = (1 + _sig / _bkg)
        _log = math.log
        for bin in sob_hist.bins(overflow=True):
            bin.value = _log(bin.value)
        log.info(str(list(sob_hist.y())))
        for mass in masses:
            channel = analysis.get_channel_array(
                {MMC_MASS: hist_template},
                category=category,
                region=analysis.target_region,
                include_signal=True,
                weight_hist=sob_hist,
                clf=clf,
                cuts=cuts,
                mass=mass,
                mode='workspace',
                systematics=systematics)[MMC_MASS]
            if mass not in channels:
                channels[mass] = {}
            channels[mass][category.name] = channel
    return channels, []
Beispiel #6
0
def draw_ROC(bkg_scores, sig_scores):
    # draw ROC curves for all categories
    hist_template = Hist(100, -1, 1)
    plt.figure()
    for category, (bkg_scores, sig_scores) in category_scores.items():
        bkg_hist = hist_template.Clone()
        sig_hist = hist_template.Clone()
        hist_scores(bkg_hist, bkg_scores)
        hist_scores(sig_hist, sig_scores)
        bkg_array = np.array(bkg_hist)
        sig_array = np.array(sig_hist)
        # reverse cumsum
        bkg_eff = bkg_array[::-1].cumsum()[::-1]
        sig_eff = sig_array[::-1].cumsum()[::-1]
        bkg_eff /= bkg_array.sum()
        sig_eff /= sig_array.sum()
        plt.plot(sig_eff,
                 1. - bkg_eff,
                 linestyle='-',
                 linewidth=2.,
                 label=category)
    plt.legend(loc='lower left')
    plt.ylabel('Background Rejection')
    plt.xlabel('Signal Efficiency')
    plt.ylim(0, 1)
    plt.xlim(0, 1)
    plt.grid()
    plt.savefig(os.path.join(PLOTS_DIR, 'ROC.png'), bbox_inches='tight')
Beispiel #7
0
def test_slice_assign():
    hist = Hist(10, 0, 1)
    hist[:] = [i for i in xrange(len(hist))]
    assert all([a.value == b for a, b in zip(hist, xrange(len(hist)))])
    clone = hist.Clone()
    # reverse bins
    hist[:] = clone[::-1]
    assert all([a.value == b.value for a, b in zip(hist, clone[::-1])])
Beispiel #8
0
def get_histograms_from_trees(
                              trees = [],
                              branch = 'var',
                              weightBranch = 'EventWeight',
                              selection = '1',
                              files = {},
                              verbose = False,
                              nBins = 40,
                              xMin = 0,
                              xMax = 100,
                              ignoreUnderflow = True,
                              ):
    histograms = {}
    nHistograms = 0

    # Setup selection and weight string for ttree draw
    weightAndSelection = '( %s ) * ( %s )' % ( weightBranch, selection )

    for sample, input_file in files.iteritems():

        histograms[sample] = {}

        for tree in trees:

            tempTree = tree
            if 'data' in sample and ( 'Up' in tempTree or 'Down' in tempTree ) :
                tempTree = tempTree.replace('_'+tempTree.split('_')[-1],'')


            chain = None;
            if isinstance( input_file, list ):
                for f in input_file:
                    chain.Add(f)
            else:
                chain = TreeChain(tempTree, [input_file]);


            weightAndSelection = '( %s ) * ( %s )' % ( weightBranch, selection )

            root_histogram = Hist( nBins, xMin, xMax, type='D')
            chain.Draw(branch, weightAndSelection, hist = root_histogram)
            if not is_valid_histogram( root_histogram, tree, input_file):
                return

            # When a tree is filled with a dummy variable, it will end up in the underflow, so ignore it
            if ignoreUnderflow:
                root_histogram.SetBinContent(0, 0)
                root_histogram.SetBinError(0,0)

            gcd()
            nHistograms += 1
            histograms[sample][tree] = root_histogram.Clone()

    return histograms
Beispiel #9
0
def test_hist():
    from rootpy.plotting import root2matplotlib as rplt
    h = Hist(100, -5, 5)
    h.FillRandom('gaus')
    rplt.hist(h)

    # stack
    h1 = h.Clone()
    stack = HistStack([h, h1])
    rplt.hist(stack)
    rplt.hist([h, h1])
Beispiel #10
0
def test_bar():
    from rootpy.plotting import root2matplotlib as rplt
    h = Hist(100, -5, 5)
    h.FillRandom('gaus')
    rplt.bar(h)

    # stack
    h1 = h.Clone()
    stack = HistStack([h, h1])
    rplt.bar(stack)
    rplt.bar([h, h1], stacked=True)
    rplt.bar([h, h1], stacked=False)
    rplt.bar([h, h1], stacked=False, reverse=True)
    def setUp(self):
        # create histograms
        h_bkg1_1 = Hist(100, 40, 200, title='Background')
        h_signal_1 = h_bkg1_1.Clone(title='Signal')
        h_data_1 = h_bkg1_1.Clone(title='Data')

        # fill the histograms with our distributions
        map(h_bkg1_1.Fill, x1)
        map(h_signal_1.Fill, x2)
        map(h_data_1.Fill, x1_obs)
        map(h_data_1.Fill, x2_obs)

        histograms_1 = {
            'signal': h_signal_1,
            'bkg1': h_bkg1_1,
            #                       'data': h_data_1
        }
        fit_data_1 = FitData(h_data_1, histograms_1, fit_boundaries=(40, 200))
        self.single_fit_collection = FitDataCollection()
        self.single_fit_collection.add(fit_data_1)

        #         self.roofitFitter = RooFitFit(histograms_1, dataLabel='data', fit_boundries=(40, 200))
        self.roofitFitter = RooFitFit(self.single_fit_collection)
Beispiel #12
0
def weighted_mass_cba_workspace(analysis,
                                categories,
                                masses,
                                systematics=False,
                                cuts=None):
    hist_template = Hist(20, 50, 250, type='D')
    channels = {}

    def scaled(hist, factor):
        new_hist = hist * factor
        new_hist.name = hist.name + '_scaled'
        return new_hist

    for category in analysis.iter_categories(categories):
        for mass in masses:
            channel = analysis.get_channel_array(
                {MMC_MASS: hist_template},
                category=category,
                region=analysis.target_region,
                include_signal=True,
                cuts=cuts,
                mass=mass,
                mode='workspace',
                systematics=systematics)[MMC_MASS]
            # weight by ln(1 + s / b)
            total_s = hist_template.Clone()
            total_s.Reset()
            total_b = total_s.Clone()
            for sample in channel.samples:
                if is_signal(sample):
                    total_s += sample.hist
                else:
                    total_b += sample.hist
            sob = math.log(1 + total_s.integral() / total_b.integral())
            channel.data.hist = scaled(channel.data.hist, sob)
            for sample in channel.samples:
                sample.hist = scaled(sample.hist, sob)
                for hsys in sample.histo_sys:
                    hsys.high = scaled(hsys.high, sob)
                    hsys.low = scaled(hsys.low, sob)
            if mass not in channels:
                channels[mass] = {}
            channels[mass][category.name] = channel
    return channels, []
Beispiel #13
0
def test_draw():

    for sample in analysis.backgrounds:
        print sample.name

        hist = Hist(30, 0, 250)
        hist_array = hist.Clone()
        field_hist = {'mmc1_mass': hist_array}

        sample.draw_into(hist, 'mmc1_mass', Category_VBF, 'OS_TRK')
        sample.draw_array(field_hist, Category_VBF, 'OS_TRK')

        assert_almost_equal(hist.Integral(), hist_array.Integral(), places=3)

        assert_equal(sorted(hist.systematics.keys()),
                     sorted(hist_array.systematics.keys()))

        for term, sys_hist in hist.systematics.items():
            print term
            sys_hist_array = hist_array.systematics[term]
            assert_almost_equal(sys_hist.Integral(), sys_hist_array.Integral())
Beispiel #14
0
	
      
#global summary plots are best gathered from the samples normalizations
#to take into account at least a bit of nuisance correlations
norms = mlfit_file.norm_fit_s
edges = set()
binning = binning_file[args.varname]
for i in binning.itervalues():
   edges.add(i['up_edge'])
   edges.add(i['low_edge'])
edges = sorted(list(edges))
template_hist = Hist(edges)
samples = set(i.name.split('/')[-1] for i in norms)
fit_histos = {}
for name in samples:
   fit_histos[name] = template_hist.Clone()
   fit_histos[name].title = '%s ' % name
data = template_hist.Clone()
data.title = 'data_obs'

for categories in groups.itervalues():
   #check that all categories have identical bin index
   assert(len(set(binning[i]['idx'] for i in categories)) <= 1)
   bin_idx = binning[categories[0]]['idx']+1
   for sample in samples:
      val = sum(
         norms['%s/%s' % (i, sample)].value 
         for i in categories
         if '%s/%s' % (i, sample) in norms
         )
      err = quad(*[
Beispiel #15
0
# set the style
style = get_style('ATLAS')
style.SetEndErrorSize(3)
set_style(style)

# set the random seed
ROOT.gRandom.SetSeed(42)
np.random.seed(42)

# signal distribution
signal = 126 + 10 * np.random.randn(100)
signal_obs = 126 + 10 * np.random.randn(100)

# create histograms
h1 = Hist(30, 40, 200, title='Background', markersize=0, legendstyle='F')
h2 = h1.Clone(title='Signal')
h3 = h1.Clone(title='Data', drawstyle='E1 X0', legendstyle='LEP')
h3.markersize = 1.2

# fill the histograms with our distributions
h1.FillRandom('landau', 1000)
map(h2.Fill, signal)
h3.FillRandom('landau', 1000)
map(h3.Fill, signal_obs)

# set visual attributes
h1.fillstyle = 'solid'
h1.fillcolor = 'green'
h1.linecolor = 'green'
h1.linewidth = 0
def unfolding_toy_diagnostics(indir, variable):

    plotter = BasePlotter(defaults={
        'clone': False,
        'name_canvas': True,
        'show_title': True,
        'save': {
            'png': True,
            'pdf': False
        }
    }, )
    styles = {
        'dots': {
            'linestyle': 0,
            'markerstyle': 21,
            'markercolor': 1
        },
        'compare': {
            'linesstyle': [1, 0],
            'markerstyle': [0, 21],
            'markercolor': [2, 1],
            'linecolor': [2, 1],
            'drawstyle': ['hist', 'pe'],
            'legendstyle': ['l', 'p']
        }
    }

    xaxislabel = set_pretty_label(variable)

    true_distribution = None

    curdir = os.getcwd()
    os.chdir(indir)
    toydirs = get_immediate_subdirectories(".")

    methods = []
    pulls_lists = {}
    pull_means_lists = {}
    pull_mean_errors_lists = {}
    pull_sums_lists = {}
    pull_sigmas_lists = {}
    pull_sigma_errors_lists = {}
    deltas_lists = {}
    delta_means_lists = {}
    delta_mean_errors_lists = {}
    delta_sigmas_lists = {}
    delta_sigma_errors_lists = {}
    ratio_sums_lists = {}
    nneg_bins_lists = {}
    unfoldeds_lists = {}
    unfolded_sigmas_lists = {}
    taus_lists = {}

    histos_created = False
    lists_created = False
    idir = 0
    true_distro = None
    #loop over toys
    for directory in toydirs:
        if not directory.startswith('toy_'): continue
        os.chdir(directory)
        log.debug('Inspecting toy %s' % directory)
        idir = idir + 1
        i = 0
        if not os.path.isfile("result_unfolding.root"):
            raise ValueError('root file not found in %s' % os.getcwd())
        with io.root_open("result_unfolding.root") as inputfile:
            log.debug('Iteration %s over the file' % i)
            i = i + 1
            if not methods:
                keys = [i.name for i in inputfile.keys()]
                for key in keys:
                    if hasattr(getattr(inputfile, key), "hdata_unfolded"):
                        methods.append(key)

            unfolded_hists = [
                inputfile.get('%s/hdata_unfolded' % i) for i in methods
            ]
            unfolded_wps_hists = [
                inputfile.get('%s/hdata_unfolded_ps_corrected' % i)
                for i in methods
            ]
            for unf, unfps, method in zip(unfolded_hists, unfolded_wps_hists,
                                          methods):
                unf.name = method
                unfps.name = method
            if true_distro is None:
                true_distribution = inputfile.true_distribution
                ROOT.TH1.AddDirectory(False)
                true_distro = true_distribution.Clone()
            taus = prettyjson.loads(inputfile.best_taus.GetTitle())
            if len(taus_lists) == 0:
                taus_lists = dict((i, []) for i in taus)
            for i, t in taus.iteritems():
                taus_lists[i].append(t)

            for histo in unfolded_hists:
                #create pull/delta containers during first iteration
                name = histo.name
                nbins = histo.nbins()
                log.debug("name = %s, n bins = %s" % (name, nbins))
                if not lists_created:
                    for ibin in range(1, nbins + 1):
                        outname = "pull_" + name + "_bin" + str(ibin)
                        pulls_lists[outname] = []
                        outname = "delta_" + name + "_bin" + str(ibin)
                        deltas_lists[outname] = []
                        outname = "unfolded_" + name + "_bin" + str(ibin)
                        unfoldeds_lists[outname] = []
                        unfolded_sigmas_lists[outname] = []
                    outname = "pull_" + name
                    pull_means_lists[outname] = {}
                    pull_mean_errors_lists[outname] = {}
                    pull_sigmas_lists[outname] = {}
                    pull_sigma_errors_lists[outname] = {}

                    outname = "delta_" + name
                    delta_means_lists[outname] = {}
                    delta_mean_errors_lists[outname] = {}
                    delta_sigmas_lists[outname] = {}
                    delta_sigma_errors_lists[outname] = {}

                for ibin in range(1, nbins + 1):
                    outname = "pull_" + name + "_bin" + str(ibin)
                    unfolded_bin_content = histo.GetBinContent(ibin)
                    unfolded_bin_error = histo.GetBinError(ibin)
                    true_bin_content = true_distro.GetBinContent(ibin)
                    true_bin_error = true_distro.GetBinError(ibin)
                    total_bin_error = math.sqrt(unfolded_bin_error**2)  #???
                    if (total_bin_error != 0):
                        pull = (unfolded_bin_content -
                                true_bin_content) / total_bin_error
                    else:
                        pull = 9999
                    log.debug(
                        'unfolded bin content %s +/- %s, true bin content %s, pull %s'
                        % (unfolded_bin_content, unfolded_bin_error,
                           true_bin_content, pull))
                    pulls_lists[outname].append(pull)
                    outname = "delta_" + name + "_bin" + str(ibin)
                    delta = unfolded_bin_content - true_bin_content
                    log.debug(
                        'unfolded bin content %s +/- %s, true bin content %s, delta %s'
                        % (unfolded_bin_content, unfolded_bin_error,
                           true_bin_content, delta))
                    deltas_lists[outname].append(delta)
                    outname = "unfolded_" + name + "_bin" + str(ibin)
                    unfoldeds_lists[outname].append(unfolded_bin_content)
                    unfolded_sigmas_lists[outname].append(unfolded_bin_error)

            nneg_bins_hists = [
                i for i in inputfile.keys()
                if i.GetName().startswith("nneg_bins")
            ]
            nneg_bins_hists = [asrootpy(i.ReadObj()) for i in nneg_bins_hists]
            for histo in nneg_bins_hists:
                #create pull/delta containers during first iteration
                name = histo.name
                nbins = histo.nbins()
                log.debug("name = %s, n bins = %s" % (name, nbins))
                if not lists_created:
                    outname = name
                    nneg_bins_lists[outname] = []
                outname = name
                nneg_bins_lists[outname].append(histo.GetBinContent(1))

            pull_sums_hists = [
                i for i in inputfile.keys()
                if i.GetName().startswith("sum_of_pulls")
            ]
            pull_sums_hists = [asrootpy(i.ReadObj()) for i in pull_sums_hists]
            for histo in pull_sums_hists:
                #create pull/delta containers during first iteration
                name = histo.name
                nbins = histo.nbins()
                log.debug("name = %s, n bins = %s" % (name, nbins))
                if not lists_created:
                    outname = name
                    pull_sums_lists[outname] = []
                outname = name
                pull_sums_lists[outname].append(histo.GetBinContent(1))

            ratio_sums_hists = [
                i for i in inputfile.keys()
                if i.GetName().startswith("sum_of_ratios")
            ]
            ratio_sums_hists = [
                asrootpy(i.ReadObj()) for i in ratio_sums_hists
            ]
            for histo in ratio_sums_hists:
                #create ratio/delta containers during first iteration
                name = histo.name
                nbins = histo.nbins()
                log.debug("name = %s, n bins = %s" % (name, nbins))
                if not lists_created:
                    outname = name
                    ratio_sums_lists[outname] = []
                outname = name
                ratio_sums_lists[outname].append(histo.GetBinContent(1))

            #after the first iteration on the file all the lists are created
            lists_created = True

        os.chdir("..")

    #create histograms
    #histo containers
    taus = {}
    for name, vals in taus_lists.iteritems():
        ROOT.TH1.AddDirectory(False)  #repeat, you never know
        val_min = min(vals)
        val_min = 0.8 * val_min if val_min > 0 else 1.2 * val_min
        val_max = max(vals)
        val_max = 0.8 * val_max if val_max < 0 else 1.2 * val_max
        if val_min == val_max:
            if tau_nbins % 2:  #if odd
                val_min, val_max = val_min - 0.01, val_min + 0.01
            else:
                brange = 0.02
                bwidth = brange / tau_nbins
                val_min, val_max = val_min - 0.01 + bwidth / 2., val_min + 0.01 + bwidth / 2.
        title = '#tau choice - %s ;#tau;N_{toys}' % (name)
        histo = Hist(tau_nbins, val_min, val_max, name=name, title=title)
        for val in vals:
            histo.Fill(val)
        taus[name] = histo

    pulls = {}
    for name, vals in pulls_lists.iteritems():
        ROOT.TH1.AddDirectory(False)  #repeat, you never know
        val_min = min(vals)
        val_min = 0.8 * val_min if val_min > 0 else 1.2 * val_min
        val_max = max(vals)
        val_max = 0.8 * val_max if val_max < 0 else 1.2 * val_max
        abs_max = max(abs(val_min), abs(val_max))
        if 'L_curve' in name:
            method = 'L_curve'
            binno = name.split('_')[-1]
        else:
            _, method, binno = tuple(name.split('_'))
        title = 'Pulls - %s - %s ;Pull;N_{toys}' % (binno, method)
        histo = Hist(pull_nbins, -abs_max, abs_max, name=name, title=title)
        for val in vals:
            histo.Fill(val)
        pulls[name] = histo

    deltas = {}
    for name, vals in deltas_lists.iteritems():
        ROOT.TH1.AddDirectory(False)  #repeat, you never know
        val_min = min(vals)
        val_min = 0.8 * val_min if val_min > 0 else 1.2 * val_min
        val_max = max(vals)
        val_max = 0.8 * val_max if val_max < 0 else 1.2 * val_max
        if 'L_curve' in name:
            method = 'L_curve'
            binno = name.split('_')[-1]
        else:
            _, method, binno = tuple(name.split('_'))
        title = 'Deltas - %s - %s ;Delta;N_{toys}' % (binno, method)
        histo = Hist(delta_nbins, val_min, val_max, name=name, title=title)
        for val in vals:
            histo.Fill(val)
        deltas[name] = histo

    unfoldeds = {}
    for name, vals in unfoldeds_lists.iteritems():
        ROOT.TH1.AddDirectory(False)  #repeat, you never know
        val_min = min(vals)
        val_min = 0.8 * val_min if val_min > 0 else 1.2 * val_min
        val_max = max(vals)
        val_max = 0.8 * val_max if val_max < 0 else 1.2 * val_max
        if 'L_curve' in name:
            method = 'L_curve'
            binno = name.split('_')[-1]
        else:
            _, method, binno = tuple(name.split('_'))
        title = 'Unfoldeds - %s - %s ;Unfolded;N_{toys}' % (binno, method)
        histo = Hist(unfolded_nbins, val_min, val_max, name=name, title=title)
        for val in vals:
            histo.Fill(val)
        unfoldeds[name] = histo

    nneg_bins = {}
    for name, vals, in nneg_bins_lists.iteritems():
        ROOT.TH1.AddDirectory(False)  #repeat, you never know
        val_min = min(vals)
        val_min = 0 if val_min > 0 else val_min - 1
        val_max = max(vals)
        val_max = 0 if val_max < 0 else val_max + 1
        if 'L_curve' in name:
            method = 'L_curve'
        else:
            set_trace()
            _, method, _ = tuple(name.split('_'))
        title = 'N of negative bins - %s ;N. neg bins;N_{toys}' % method
        histo = Hist(int(val_max - val_min + 1),
                     val_min,
                     val_max,
                     name=name,
                     title=title)
        for val in vals:
            histo.Fill(val)
        nneg_bins[name] = histo

    pull_sums = {}
    for name, vals in pull_sums_lists.iteritems():
        ROOT.TH1.AddDirectory(False)  #repeat, you never know
        val_min = min(vals)
        val_min = 0.8 * val_min if val_min > 0 else 1.2 * val_min
        val_max = max(vals)
        val_max = 0.8 * val_max if val_max < 0 else 1.2 * val_max
        if 'L_curve' in name:
            method = 'L_curve'
        else:
            set_trace()
            _, _, _, _, _, method = tuple(name.split('_'))
        title = 'Pull sums - %s ;#Sigma(pull)/N_{bins};N_{toys}' % method
        histo = Hist(unfolded_nbins, val_min, val_max, name=name, title=title)
        for val in vals:
            histo.Fill(val)
        pull_sums[name] = histo

    ratio_sums = {}
    for name, vals in ratio_sums_lists.iteritems():
        ROOT.TH1.AddDirectory(False)  #repeat, you never know
        val_min = min(vals)
        val_min = 0.8 * val_min if val_min > 0 else 1.2 * val_min
        val_max = max(vals)
        val_max = 0.8 * val_max if val_max < 0 else 1.2 * val_max
        if 'L_curve' in name:
            method = 'L_curve'
            binno = name.split('_')[-1]
        else:
            set_trace()
            _, _, _, _, _, method = tuple(name.split('_'))
        title = 'Ratio sums - %s;#Sigma(ratio)/N_{bins};N_{toys}' % method
        histo = Hist(unfolded_nbins, val_min, val_max, name=name, title=title)
        for val in vals:
            histo.Fill(val)
        ratio_sums[name] = histo

    unfolded_sigmas = {}
    for name, vals in unfolded_sigmas_lists.iteritems():
        ROOT.TH1.AddDirectory(False)  #repeat, you never know
        val_min = min(vals)
        val_min = 0.8 * val_min if val_min > 0 else 1.2 * val_min
        val_max = max(vals)
        val_max = 0.8 * val_max if val_max < 0 else 1.2 * val_max
        if 'L_curve' in name:
            method = 'L_curve'
            binno = name.split('_')[-1]
        else:
            _, method, binno = tuple(name.split('_'))
        title = 'Unfolded uncertainties - %s - %s ;Uncertainty;N_{toys}' % (
            binno, method)
        histo = Hist(unfolded_nbins, val_min, val_max, name=name, title=title)
        for val in vals:
            histo.Fill(val)
        unfolded_sigmas[name] = histo

    for name, histo in pulls.iteritems():
        log.debug("name is %s and object type is %s" % (name, type(histo)))
        histo.Fit("gaus", 'Q')
        if not histo.GetFunction("gaus"):
            log.warning("Function not found for histogram %s" % name)
            continue
        mean = histo.GetFunction("gaus").GetParameter(1)
        meanError = histo.GetFunction("gaus").GetParError(1)
        sigma = histo.GetFunction("gaus").GetParameter(2)
        sigmaError = histo.GetFunction("gaus").GetParError(2)

        general_name, idx = tuple(name.split('_bin'))
        idx = int(idx)

        pull_means_lists[general_name][idx] = mean
        pull_mean_errors_lists[general_name][idx] = meanError
        pull_sigmas_lists[general_name][idx] = sigma
        pull_sigma_errors_lists[general_name][idx] = sigmaError

    for name, histo in deltas.iteritems():
        log.debug("name is %s and object type is %s" % (name, type(histo)))
        histo.Fit("gaus", 'Q')
        if not histo.GetFunction("gaus"):
            log.warning("Function not found for histogram %s" % name)
            continue
        mean = histo.GetFunction("gaus").GetParameter(1)
        meanError = histo.GetFunction("gaus").GetParError(1)
        sigma = histo.GetFunction("gaus").GetParameter(2)
        sigmaError = histo.GetFunction("gaus").GetParError(2)

        general_name, idx = tuple(name.split('_bin'))
        idx = int(idx)

        delta_means_lists[general_name][idx] = mean
        delta_mean_errors_lists[general_name][idx] = meanError
        delta_sigmas_lists[general_name][idx] = sigma
        delta_sigma_errors_lists[general_name][idx] = sigmaError

    outfile = rootpy.io.File("unfolding_diagnostics.root", "RECREATE")
    outfile.cd()

    pull_means = {}
    pull_sigmas = {}
    pull_means_summary = {}
    pull_sigmas_summary = {}
    delta_means = {}
    delta_sigmas = {}
    delta_means_summary = {}
    delta_sigmas_summary = {}

    for outname, pmeans in pull_means_lists.iteritems():
        outname_mean = outname + "_mean"
        outtitle = "Pull means - " + outname + ";Pull mean; N_{toys}"
        pull_mean_min = min(pmeans.values())
        pull_mean_max = max(pmeans.values())
        pull_mean_newmin = pull_mean_min - (pull_mean_max -
                                            pull_mean_min) * 0.5
        pull_mean_newmax = pull_mean_max + (pull_mean_max -
                                            pull_mean_min) * 0.5
        pull_means[outname] = plotting.Hist(pull_mean_nbins,
                                            pull_mean_newmin,
                                            pull_mean_newmax,
                                            name=outname_mean,
                                            title=outtitle)

        outname_mean_summary = outname + "_mean_summary"
        outtitle_mean_summary = "Pull mean summary - " + outname
        histocloned = true_distro.Clone(outname_mean_summary)
        histocloned.Reset()
        histocloned.xaxis.title = xaxislabel
        histocloned.yaxis.title = 'Pull mean'
        histocloned.title = outtitle_mean_summary
        pull_means_summary[outname] = histocloned

        for idx, pmean in pmeans.iteritems():
            pull_means[outname].Fill(pmean)
            histocloned[idx].value = pmean
            histocloned[idx].error = pull_mean_errors_lists[outname][idx]
        histocloned.yaxis.SetRangeUser(min(pmeans.values()),
                                       max(pmeans.values()))

    for outname, psigmas in pull_sigmas_lists.iteritems():
        outname_sigma = outname + "_sigma"
        outtitle_sigma = "Pull #sigma's - " + outname + ";Pull #sigma; N_{toys}"
        pull_sigma_min = min(psigmas.values())
        pull_sigma_max = max(psigmas.values())
        pull_sigma_newmin = pull_sigma_min - (pull_sigma_max -
                                              pull_sigma_min) * 0.5
        pull_sigma_newmax = pull_sigma_max + (pull_sigma_max -
                                              pull_sigma_min) * 0.5
        pull_sigmas[outname] = plotting.Hist(pull_sigma_nbins,
                                             pull_sigma_newmin,
                                             pull_sigma_newmax,
                                             name=outname_sigma,
                                             title=outtitle_sigma)

        outname_sigma_summary = outname + "_sigma_summary"
        outtitle_sigma_summary = "Pull #sigma summary - " + outname
        histocloned = true_distro.Clone(outname_sigma_summary)
        histocloned.Reset()
        histocloned.xaxis.title = xaxislabel
        histocloned.yaxis.title = 'Pull #sigma'
        histocloned.title = outtitle_sigma_summary
        pull_sigmas_summary[outname] = histocloned

        for idx, psigma in psigmas.iteritems():
            pull_sigmas[outname].Fill(psigma)
            histocloned[idx].value = psigma
            histocloned[idx].error = pull_sigma_errors_lists[outname][idx]
        histocloned.yaxis.SetRangeUser(min(psigmas.values()),
                                       max(psigmas.values()))

    for outname, dmeans in delta_means_lists.iteritems():
        outname_mean = outname + "_mean"
        outtitle = "Delta means - " + outname + ";Delta mean; N_{toys}"
        delta_mean_min = min(dmeans.values())
        delta_mean_max = max(dmeans.values())
        delta_mean_newmin = delta_mean_min - (delta_mean_max -
                                              delta_mean_min) * 0.5
        delta_mean_newmax = delta_mean_max + (delta_mean_max -
                                              delta_mean_min) * 0.5
        delta_means[outname] = plotting.Hist(delta_mean_nbins,
                                             delta_mean_newmin,
                                             delta_mean_newmax,
                                             name=outname_mean,
                                             title=outtitle)

        outname_mean_summary = outname + "_mean_summary"
        outtitle_mean_summary = "Delta mean summary - " + outname
        histocloned = true_distro.Clone(outname_mean_summary)
        histocloned.Reset()
        histocloned.xaxis.title = xaxislabel
        histocloned.yaxis.title = 'Delta mean'
        histocloned.title = outtitle_mean_summary
        delta_means_summary[outname] = histocloned

        for idx, dmean in dmeans.iteritems():
            delta_means[outname].Fill(dmean)
            histocloned[idx].value = dmean
            histocloned[idx].error = delta_mean_errors_lists[outname][idx]
        histocloned.yaxis.SetRangeUser(min(dmeans.values()),
                                       max(dmeans.values()))

    for outname, dsigmas in delta_sigmas_lists.iteritems():
        outname_sigma = outname + "_sigma"
        outtitle_sigma = "Delta #sigma's - " + outname + ";Delta #sigma; N_{toys}"
        delta_sigma_min = min(dsigmas.values())
        delta_sigma_max = max(dsigmas.values())
        delta_sigma_newmin = delta_sigma_min - (delta_sigma_max -
                                                delta_sigma_min) * 0.5
        delta_sigma_newmax = delta_sigma_max + (delta_sigma_max -
                                                delta_sigma_min) * 0.5
        delta_sigmas[outname] = plotting.Hist(delta_sigma_nbins,
                                              delta_sigma_newmin,
                                              delta_sigma_newmax,
                                              name=outname_sigma,
                                              title=outtitle_sigma)

        outname_sigma_summary = outname + "_sigma_summary"
        outtitle_sigma_summary = "Delta #sigma summary - " + outname
        histocloned = true_distro.Clone(outname_sigma_summary)
        histocloned.Reset()
        histocloned.xaxis.title = xaxislabel
        histocloned.yaxis.title = 'Delta #sigma'
        histocloned.title = outtitle_sigma_summary
        delta_sigmas_summary[outname] = histocloned

        for idx, dsigma in dsigmas.iteritems():
            delta_sigmas[outname].Fill(dsigma)
            histocloned[idx].value = dsigma
            histocloned[idx].error = delta_sigma_errors_lists[outname][idx]
        histocloned.yaxis.SetRangeUser(min(dsigmas.values()),
                                       max(dsigmas.values()))

    unfolded_summary = {}
    unfolded_average = {}
    unfolded_envelope = {}
    for name, histo in unfoldeds.iteritems():
        log.debug("name is %s and object type is %s" % (name, type(histo)))
        histo.Fit("gaus", 'Q')
        if not histo.GetFunction("gaus"):
            log.warning("Function not found for histogram %s" % name)
            continue
        mean = histo.GetFunction("gaus").GetParameter(1)
        meanError = histo.GetFunction("gaus").GetParError(1)
        sigma = histo.GetFunction("gaus").GetParameter(2)
        sigmaError = histo.GetFunction("gaus").GetParError(2)

        general_name, idx = tuple(name.split('_bin'))
        idx = int(idx)

        if general_name not in unfolded_summary:
            histo = true_distro.Clone("%s_unfolded_summary" % general_name)
            outtitle_unfolded_summary = "Unfolded summary - " + general_name
            histo.Reset()
            histo.xaxis.title = xaxislabel
            histo.yaxis.title = 'N_{events}'
            histo.title = outtitle_unfolded_summary
            unfolded_summary[general_name] = histo

            unfolded_envelope[general_name] = histo.Clone(
                "%s_unfolded_envelope" % general_name)
            unfolded_average[general_name] = histo.Clone(
                "%s_unfolded_average" % general_name)

        unfolded_summary[general_name][idx].value = mean
        unfolded_summary[general_name][idx].error = meanError

        unfolded_envelope[general_name][idx].value = mean
        unfolded_envelope[general_name][idx].error = sigma

        unfolded_average[general_name][idx].value = mean
        unfolded_average[general_name][idx].error = \
           unfolded_sigmas['%s_bin%i' % (general_name, idx)].GetMean()

    plotter.set_subdir('taus')
    for name, histo in taus.iteritems():
        #canvas = plotter.create_and_write_canvas_single(0, 21, 1, False, False, histo, write=False)
        plotter.canvas.cd()
        histo = plotter.plot(histo, **styles['dots'])
        histo.SetStats(True)

        info = plotter.make_text_box(
            'mode #tau = %.5f' % histo[histo.GetMaximumBin()].x.center,
            position=(plotter.pad.GetLeftMargin(), plotter.pad.GetTopMargin(),
                      0.3, 0.025))
        info.Draw()

        plotter.save()
        histo.Write()
        plotter.canvas.Write()

    plotter.set_subdir('pulls')
    for name, histo in pulls.iteritems():
        histo = plotter.plot(histo, **styles['dots'])
        histo.SetStats(True)
        plotter.save()
        histo.Write()
        plotter.canvas.Write()
    for name, histo in pull_means.iteritems():
        histo = plotter.plot(histo, **styles['dots'])
        histo.Write()
        plotter.save()
    for name, histo in pull_sigmas.iteritems():
        histo = plotter.plot(histo, **styles['dots'])
        histo.Write()
        plotter.save()

    plotter.set_subdir('pull_summaries')
    for name, histo in pull_means_summary.iteritems():
        histo = plotter.plot(histo, **styles['dots'])
        #histo.SetStats(True)
        line = ROOT.TLine(histo.GetBinLowEdge(1), 0,
                          histo.GetBinLowEdge(histo.GetNbinsX() + 1), 0)
        line.Draw("same")
        plotter.save()
        histo.Write()
        plotter.canvas.Write()
    for name, histo in pull_sigmas_summary.iteritems():
        histo = plotter.plot(histo, **styles['dots'])
        #histo.SetStats(True)
        line = ROOT.TLine(histo.GetBinLowEdge(1), 1,
                          histo.GetBinLowEdge(histo.GetNbinsX() + 1), 1)
        line.Draw("same")
        plotter.save()
        histo.Write()
        plotter.canvas.Write()

    plotter.set_subdir('deltas')
    for name, histo in deltas.iteritems():
        histo = plotter.plot(histo, **styles['dots'])
        histo.SetStats(True)
        plotter.save()
        histo.Write()
        plotter.canvas.Write()
    for name, histo in delta_means.iteritems():
        histo = plotter.plot(histo, **styles['dots'])
        histo.Write()
        plotter.save()
    for name, histo in delta_sigmas.iteritems():
        histo = plotter.plot(histo, **styles['dots'])
        histo.Write()
        plotter.save()

    plotter.set_subdir('delta_summaries')
    for name, histo in delta_means_summary.iteritems():
        histo = plotter.plot(histo, **styles['dots'])
        #histo.SetStats(True)
        plotter.save()
        histo.Write()
        plotter.canvas.Write()
    for name, histo in delta_sigmas_summary.iteritems():
        histo = plotter.plot(histo, **styles['dots'])
        #histo.SetStats(True)
        plotter.save()
        histo.Write()
        plotter.canvas.Write()

    plotter.set_subdir('unfolding_unc')
    for name, histo in unfolded_sigmas.iteritems():
        histo = plotter.plot(histo, **styles['dots'])
        histo.SetStats(True)
        plotter.save()
        histo.Write()
        plotter.canvas.Write()

    plotter.set_subdir('unfolded')
    for name, histo in unfoldeds.iteritems():
        histo = plotter.plot(histo, **styles['dots'])
        histo.SetStats(True)
        plotter.save()
        histo.Write()
        plotter.canvas.Write()

    plotter.set_subdir('unfolded_summaries')
    for name, histo in unfolded_summary.iteritems():
        histo = plotter.plot(histo, **styles['dots'])
        histo.SetStats(True)
        plotter.save()
        histo.Write()
        plotter.canvas.Write()

    for name, histo in unfolded_summary.iteritems():
        leg = LegendDefinition("Unfolding comparison",
                               'NE',
                               labels=['Truth', 'Unfolded'])
        plotter.overlay_and_compare([true_distro],
                                    histo,
                                    legend_def=leg,
                                    **styles['compare'])
        plotter.canvas.name = 'Pull_' + name
        plotter.save()
        plotter.canvas.Write()
        plotter.overlay_and_compare([true_distro],
                                    histo,
                                    legend_def=leg,
                                    method='ratio',
                                    **styles['compare'])
        plotter.canvas.name = 'Ratio_' + name
        plotter.save()
        plotter.canvas.Write()

    plotter.set_subdir('unfolded_average')
    for name, histo in unfolded_average.iteritems():
        leg = LegendDefinition("Unfolding comparison",
                               'NE',
                               labels=['Truth', 'Unfolded'])
        #set_trace()
        plotter.overlay_and_compare([true_distro],
                                    histo,
                                    legend_def=leg,
                                    **styles['compare'])
        plotter.canvas.name = 'Pull_' + name
        plotter.save()
        plotter.canvas.Write()
        plotter.overlay_and_compare([true_distro],
                                    histo,
                                    legend_def=leg,
                                    method='ratio',
                                    **styles['compare'])
        plotter.canvas.name = 'Ratio_' + name
        plotter.save()
        plotter.canvas.Write()

    plotter.set_subdir('unfolded_envelope')
    for name, histo in unfolded_envelope.iteritems():
        leg = LegendDefinition("Unfolding comparison",
                               'NE',
                               labels=['Truth', 'Unfolded'])
        plotter.overlay_and_compare([true_distro],
                                    histo,
                                    legend_def=leg,
                                    **styles['compare'])
        plotter.canvas.name = 'Pull_' + name
        plotter.save()
        plotter.canvas.Write()
        plotter.overlay_and_compare([true_distro],
                                    histo,
                                    legend_def=leg,
                                    method='ratio',
                                    **styles['compare'])
        plotter.canvas.name = 'Ratio_' + name
        plotter.save()
        plotter.canvas.Write()

    plotter.set_subdir('figures_of_merit')
    for name, histo in nneg_bins.iteritems():
        histo = plotter.plot(histo, **styles['dots'])
        histo.SetStats(True)
        plotter.save()
        histo.Write()
        plotter.canvas.Write()
    for name, histo in pull_sums.iteritems():
        histo = plotter.plot(histo, **styles['dots'])
        histo.SetStats(True)
        plotter.save()
        histo.Write()
        plotter.canvas.Write()
    for name, histo in ratio_sums.iteritems():
        histo = plotter.plot(histo, **styles['dots'])
        histo.SetStats(True)
        plotter.save()
        histo.Write()
        plotter.canvas.Write()

    outfile.close()
    os.chdir(curdir)
Beispiel #17
0
            lbias[xb, yb].error = bias.std_dev

        if not args.asimov:
            tag = '%.1f' % cval if args.conly else '%.1f_%.1f' % (cval, lval)
            hdir.WriteTObject(csf, 'csf_%s' % tag)
            hdir.WriteTObject(csf_prefit, 'csf_prefit_%s' % tag)
            hdir.WriteTObject(csf2d, 'csf2D_%s' % tag)
            if not args.conly:
                hdir.WriteTObject(lsf2d, 'lsf2D_%s' % tag)
                hdir.WriteTObject(lsf, 'lsf_%s' % tag)
                hdir.WriteTObject(lsf_prefit, 'lsf_prefit_%s' % tag)

    for i in failing:
        new_val = i.value / mtot
        i.value = new_val
    out.WriteTObject(cbias.Clone(), 'cbias')
    out.WriteTObject(failing.Clone(), 'failing')
    out.WriteTObject(ccover.Clone(), 'ccover')
    if args.systematic:
        out.WriteTObject(ccover_p.Clone(), 'ccover_p')
    if not args.conly:
        out.WriteTObject(lbias.Clone(), 'lbias')
        out.WriteTObject(lcover.Clone(), 'lcover')
        if args.systematic:
            out.WriteTObject(lcoverp.Clone(), 'lcoverp')
else:
    infile = root_open('%s/toys_summary.root' % indir)
    cbias = infile.cbias
    failing = infile.failing
    ccover = infile.ccover
    if not args.conly:
Beispiel #18
0
set_style('ATLAS')

nominal_scores = np.random.normal(-.3, .2, size=args.events)
up_scores = np.random.normal(-.25, .2, size=args.events)
dn_scores = np.random.normal(-.35, .2, size=args.events)


def transform(x):
    return 2.0 / (1.0 + np.exp(-args.transform_scale * x)) - 1.0


nominal = Hist(args.bins, -1, 1, title='Nominal')
nominal.fill_array(transform(nominal_scores))

up = nominal.Clone(title='Up',
                   linecolor='red',
                   linestyle='dashed',
                   linewidth=2)
up.Reset()
up.fill_array(transform(up_scores))

dn = nominal.Clone(title='Down',
                   linecolor='blue',
                   linestyle='dashed',
                   linewidth=2)
dn.Reset()
dn.fill_array(transform(dn_scores))

# Plot the nominal, up, and down scores

canvas = Canvas()
nominal.SetMaximum(max(dn) * 1.1)
Beispiel #19
0
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import rootpy.plotting.root2matplotlib as rplt
from rootpy.plotting import Hist

x = np.linspace(0, 2 * np.pi, 100)

plt.figure(figsize=(8, 8), dpi=100)
#plt.fill(x,np.sin(x),color='blue',alpha=0.5)
plt.fill(x, np.sin(x), color='None', edgecolor='blue', hatch='///')
plt.fill(x, np.sin(2 * x), color='None', linewidth=0, hatch=r'\\', zorder=950)

a = Hist(5, x[0], x[-1])
b = a.Clone()
b.Fill(3, .5)

rplt.fill_between(
    a,
    b,
    edgecolor='black',
    linewidth=0,
    facecolor=(0, 0, 0, 0),
    hatch=r'\\\\\\\\',
    zorder=900,
)
plt.savefig('./test.eps')
plt.savefig('./test.pdf')
Beispiel #20
0
def rebin_hist(hist, new_binning, axis='x'):
    """
    Redo the binning of the hist and returns: rebinned_hist, hist_template

    new_binning = list of bin edges

    WARNING: doesn't assert that the edges of the new binning matches the old one.
    """
    assert axis in ['x', 'y', 'z']

    x_binning = [hist.GetBinLowEdge(i) for i in range(1, hist.GetNbinsX() + 2)]
    y_binning = [
        hist.GetYaxis().GetBinLowEdge(i)
        for i in range(1,
                       hist.GetNbinsY() + 2)
    ]
    z_binning = [
        hist.GetZaxis().GetBinLowEdge(i)
        for i in range(1,
                       hist.GetNbinsZ() + 2)
    ]
    if axis is 'x':
        x_binning = new_binning
    elif axis is 'y':
        y_binning = new_binning
    elif axis is 'z':
        z_binning = new_binning
    else:
        print "ERROR! Can only rebin x, y or z axis"

    if hist.DIM == 1:
        new_hist_template = Hist(x_binning, type='D')
    elif hist.DIM == 2:
        new_hist_template = Hist2D(x_binning, y_binning, type='D')
    elif hist.DIM == 3:
        new_hist_template = Hist3D(x_binning, y_binning, z_binning, type='D')

    # Save the stats of the histogram
    stat_array = array.array('d', [0.] * 10)
    hist.GetStats(stat_array)
    entries = hist.GetEntries()

    new_hist = new_hist_template.Clone()

    if hasattr(hist, 'systematics'):
        new_hist.systematics = {}
        for sys_term in hist.systematics:
            new_hist.systematics[sys_term] = new_hist_template.Clone()

    # Use TH1.FindBin to find out where the bins should be merged into
    for x in range(1, hist.GetNbinsX() + 1):
        new_x = new_hist.FindBin(hist.GetBinCenter(x))
        for y in range(1, hist.GetNbinsY() + 1):
            new_y = new_hist.GetYaxis().FindBin(
                hist.GetYaxis().GetBinCenter(y))
            for z in range(1, hist.GetNbinsZ() + 1):
                new_z = new_hist.GetZaxis().FindBin(
                    hist.GetZaxis().GetBinCenter(z))
                v = hist.GetBinContent(x, y, z)
                new_v = new_hist.GetBinContent(new_x, new_y, new_z)
                new_hist.SetBinContent(new_x, new_y, new_z, v + new_v)
                comb_w2N = add_stat_w2(hist, (x, y, z), new_hist,
                                       (new_x, new_y, new_z))
                set_stat_w2(new_hist, comb_w2N, new_x, new_y, new_z)

                if not hasattr(hist, 'systematics'):
                    continue
                # Rebin the systematics histograms, too
                for sys_term in hist.systematics:
                    v = hist.systematics[sys_term].GetBinContent(x, y, z)
                    new_v = new_hist.systematics[sys_term].GetBinContent(
                        new_x, new_y, new_z)
                    new_hist.systematics[sys_term].SetBinContent(
                        new_x, new_y, new_z, v + new_v)
                    # WARNING: stats completely ignored in systematics histogram for now.

    # Restores the stats of the NOMINAL histogram
    new_hist.SetEntries(entries)
    new_hist.PutStats(stat_array)
    return new_hist, new_hist_template
Beispiel #21
0
from rootpy.plotting import Hist
from rootpy.interactive import wait
from mva.stats.smooth import smooth, smooth_alt
import ROOT
ROOT.gROOT.SetBatch(False)
import numpy as np

nom = Hist(50, 0, 100, linewidth=2)
nom.fill_array(np.random.uniform(0, 100, 1000))

sys = nom.Clone(linewidth=2, linestyle='dashed')
nom.Smooth(10)

for i, bin in enumerate(sys.bins()):
    bin.value += i / 10.

smooth_sys = smooth(nom,
                    sys,
                    10,
                    linecolor='red',
                    linewidth=2,
                    linestyle='dashed')
smooth_alt_sys = smooth_alt(nom,
                            sys,
                            linecolor='blue',
                            linewidth=2,
                            linestyle='dashed')

nom.SetMaximum(
    max(nom.GetMaximum(), sys.GetMaximum(), smooth_sys.GetMaximum(),
        smooth_alt_sys.GetMaximum()) * 1.2)
Beispiel #22
0
mu1, mu2, sigma1, sigma2 = 100, 140, 15, 5
x1 = mu1 + sigma1 * np.random.randn( N_bkg1 )
x2 = mu2 + sigma2 * np.random.randn( N_signal )
x1_obs = mu1 + sigma1 * np.random.randn( N_bkg1_obs )
x2_obs = mu2 + sigma2 * np.random.randn( N_signal_obs )

x3 = mu2 + sigma1 * np.random.randn( N_bkg1 )
x4 = mu1 + sigma2 * np.random.randn( N_signal )
x3_obs = mu2 + sigma1 * np.random.randn( N_bkg1_obs )
x4_obs = mu1 + sigma2 * np.random.randn( N_signal_obs )

data_scale = 1.2
N_data = N_data * data_scale

h_bkg1_1 = Hist( 100, 40, 200, title = 'Background' )
h_signal_1 = h_bkg1_1.Clone( title = 'Signal' )
h_data_1 = h_bkg1_1.Clone( title = 'Data' )
h_bkg1_2 = h_bkg1_1.Clone( title = 'Background' )
h_signal_2 = h_bkg1_1.Clone( title = 'Signal' )
h_data_2 = h_bkg1_1.Clone( title = 'Data' )

# fill the histograms with our distributions
map( h_bkg1_1.Fill, x1 )
map( h_signal_1.Fill, x2 )
map( h_data_1.Fill, x1_obs )
map( h_data_1.Fill, x2_obs )

map( h_bkg1_2.Fill, x3 )
map( h_signal_2.Fill, x4 )
map( h_data_2.Fill, x3_obs )
map( h_data_2.Fill, x4_obs )
Beispiel #23
0
    with root_open(fname) as asifile:
        toy = asifile.toys.toy_asimov
        for bin in toy:
            if args.allbins:
                key = bin.leaves['CMS_channel'].index, bin.leaves[
                    'CMS_th1x'].value
            else:
                key = bin.leaves['CMS_channel'].index

            tot += bin.weight
            if key in asimov:
                asimov[key] += bin.weight
            else:
                asimov[key] = bin.weight

    asitot = total.Clone()
    asitot.Reset()
    asitot.Fill(tot, height)
    asitot.markerstyle = 20
    asitot.markercolor = 'red'

    f1 = plotter.parse_formula('height*TMath::Poisson(x, mean)',
                               'height[1], mean[%f]' % (tot), [0, 100000])
    f1.linecolor = 'red'
    f1.linewidth = 2
    peak = f1(tot)
    f1 = plotter.parse_formula('height*TMath::Poisson(x, mean)',
                               'height[%f], mean[%f]' % (height / peak, tot),
                               [0, 100000])

    canlog = all(i > 0 for i in asimov.itervalues())
Beispiel #24
0
            print "effic: %.6f" % efficiency(ztautau, level_selection, prong, category)
            print "high: %.6f low: %.6f" % efficiency_uncertainty(
                    ztautau, level_selection, prong, category)

            print "=" * 20
"""

import ROOT
ROOT.gROOT.SetBatch(True)
from higgstautau.tauid import uncertainty
from rootpy.plotting import Hist, Canvas
from matplotlib import pyplot as plt
from rootpy.plotting import root2matplotlib as rplt

nominal = Hist(50, 0, 1, title='nominal')
high = nominal.Clone(title='high')
low = nominal.Clone(title='low')

high.linecolor = 'red'
low.linecolor = 'blue'

for weight, event in tauid_sample.iter():
    high_score, low_score = uncertainty(event.tau1_BDTJetScore, pt,
                                        event.tau1_numTrack,
                                        event.number_of_good_vertices)
    nominal.Fill(event.tau1_BDTJetScore, weight)
    low.Fill(low_score, weight)
    high.Fill(high_score, weight)

fig = plt.figure()
rplt.hist(nominal, histtype='stepfilled')
Beispiel #25
0
# h_data.FillRandom(  t1Scale * h_t1Shape + t2Scale * h_t2Shape + t3Scale * h_t3Shape, nData )
fillingHistogram = 0
if useT1: fillingHistogram += t1Scale * h_t1Shape
if useT2: fillingHistogram += t2Scale * h_t2Shape
if useT3: fillingHistogram += t3Scale * h_t3Shape
if useT4: fillingHistogram += t4Scale * h_t4Shape

if useDataFromFile:
    h_data = getDataFromFile()
else:
    #     h_data.FillRandom(  ( h_t1 * t1Scale + h_t2 * t2Scale + h_t3 * t3Scale ), nData )
    # print 'Integral :',h_data.Integral()
    # h_data = h_t1 * t1Scale + h_t2 * t2Scale + h_t3 * t3Scale
    #     h_data.FillRandom( h_t3, nData )
    dataFillingHistogram = 0
    if useT1: dataFillingHistogram += h_t1.Clone()
    if useT2: dataFillingHistogram += h_t2.Clone()
    if useT3: dataFillingHistogram += h_t3.Clone()
    if useT4: dataFillingHistogram += h_t4.Clone()
    #     h_data = dataFillingHistogram
    h_data = h_t1 * 1.3
    #     h_data.Scale(absolute_eta_initialValues['data'][whichBinFromFile][0] / h_data.Integral() )
    #     h_data.FillRandom( dataFillingHistogram, int(absolute_eta_initialValues['data'][whichBinFromFile][0]) )
    #     h_data.FillRandom( dataFillingHistogram, int(absolute_eta_initialValues['data'][whichBinFromFile][0]) )
    pass

# for bin in range (0,nBins+1):
# #     h_data.SetBinContent( bin, t1Scale * h_t1.GetBinContent( bin ) + t2Scale*h_t2.GetBinContent( bin ) + t3Scale*h_t3.GetBinContent( bin ) )
#     h_data.SetBinError(bin, sqrt(h_data.GetBinContent(bin)))
#     h_t1.SetBinError( bin, sqrt(h_t1.GetBinContent(bin)))
#     h_t2.SetBinError( bin, sqrt(h_t2.GetBinContent(bin)))
Beispiel #26
0
def plot_clf(background_scores,
             category,
             signal_scores=None,
             signal_scale=1.,
             data_scores=None,
             name=None,
             draw_histograms=True,
             draw_data=False,
             save_histograms=False,
             hist_template=None,
             bins=10,
             min_score=0,
             max_score=1,
             signal_colors=cm.spring,
             systematics=None,
             unblind=False,
             **kwargs):

    if hist_template is None:
        if hasattr(bins, '__iter__'):
            # variable width bins
            hist_template = Hist(bins)
            min_score = min(bins)
            max_score = max(bins)
        else:
            hist_template = Hist(bins, min_score, max_score)

    bkg_hists = []
    for bkg, scores_dict in background_scores:
        hist = hist_template.Clone(title=bkg.label)
        scores, weight = scores_dict['NOMINAL']
        fill_hist(hist, scores, weight)
        hist.decorate(**bkg.hist_decor)
        hist.systematics = {}
        for sys_term in scores_dict.keys():
            if sys_term == 'NOMINAL':
                continue
            sys_hist = hist_template.Clone()
            scores, weight = scores_dict[sys_term]
            fill_hist(sys_hist, scores, weight)
            hist.systematics[sys_term] = sys_hist
        bkg_hists.append(hist)

    if signal_scores is not None:
        sig_hists = []
        for sig, scores_dict in signal_scores:
            sig_hist = hist_template.Clone(title=sig.label)
            scores, weight = scores_dict['NOMINAL']
            fill_hist(sig_hist, scores, weight)
            sig_hist.decorate(**sig.hist_decor)
            sig_hist.systematics = {}
            for sys_term in scores_dict.keys():
                if sys_term == 'NOMINAL':
                    continue
                sys_hist = hist_template.Clone()
                scores, weight = scores_dict[sys_term]
                fill_hist(sys_hist, scores, weight)
                sig_hist.systematics[sys_term] = sys_hist
            sig_hists.append(sig_hist)
    else:
        sig_hists = None

    if data_scores is not None and draw_data and unblind is not False:
        data, data_scores = data_scores
        if isinstance(unblind, float):
            if sig_hists is not None:
                # unblind up to `unblind` % signal efficiency
                sum_sig = sum(sig_hists)
                cut = efficiency_cut(sum_sig, 0.3)
                data_scores = data_scores[data_scores < cut]
        data_hist = hist_template.Clone(title=data.label)
        data_hist.decorate(**data.hist_decor)
        fill_hist(data_hist, data_scores)
        if unblind >= 1 or unblind is True:
            log.info("Data events: %d" % sum(data_hist))
            log.info("Model events: %f" % sum(sum(bkg_hists)))
            for hist in bkg_hists:
                log.info("{0} {1}".format(hist.GetTitle(), sum(hist)))
            log.info("Data / Model: %f" %
                     (sum(data_hist) / sum(sum(bkg_hists))))
    else:
        data_hist = None

    if draw_histograms:
        output_name = 'event_bdt_score'
        if name is not None:
            output_name += '_' + name
        for logy in (False, True):
            draw(data=data_hist,
                 model=bkg_hists,
                 signal=sig_hists,
                 signal_scale=signal_scale,
                 category=category,
                 name="BDT Score",
                 output_name=output_name,
                 show_ratio=data_hist is not None,
                 model_colors=None,
                 signal_colors=signal_colors,
                 systematics=systematics,
                 logy=logy,
                 **kwargs)
    return bkg_hists, sig_hists, data_hist
Beispiel #27
0
    def clf_channels(self, clf,
                     category, region,
                     cuts=None,
                     bins=10,
                     limits=None,
                     mass=None,
                     mode=None,
                     systematics=True,
                     unblind=False,
                     hybrid_data=False,
                     no_signal_fixes=False,
                     uniform=False,
                     mva=False):
        """
        Return a HistFactory Channel for each mass hypothesis
        """
        log.info("constructing channels")

        # determine min and max scores
        scores_obj = self.get_scores(
            clf, category, region, cuts=cuts,
            masses=[mass], mode=mode,
            systematics=systematics,
            unblind=unblind)

        data_scores = scores_obj.data_scores
        bkg_scores = scores_obj.bkg_scores
        all_sig_scores = scores_obj.all_sig_scores
        min_score = scores_obj.min_score
        max_score = scores_obj.max_score

        if isinstance(bins, int):
            if limits is not None:
                low, high = limits
                binning = Hist(bins, low, high, type='D')
            else:
                binning = Hist(bins, min_score, max_score, type='D')
        else: # iterable
            if bins[0] > min_score:
                log.warning("min score is less than first edge "
                            "(will be underflow)")
            if bins[-1] <= max_score:
                log.warning("max score is greater than or equal to last edge "
                            "(will be overflow)")
            binning = Hist(bins, type='D')

        bkg_samples = []
        for s, scores in bkg_scores:
            hist_template = binning.Clone(
                title=s.label,
                **s.hist_decor)
            sample = s.get_histfactory_sample(
                hist_template, clf,
                category, region,
                cuts=cuts, scores=scores,
                systematics=systematics,
                uniform=uniform,
                mva=mva)
            bkg_samples.append(sample)

        data_sample = None
        if data_scores is not None:
            hist_template = binning.Clone(
                title=self.data.label,
                **self.data.hist_decor)
            data_sample = self.data.get_histfactory_sample(
                hist_template, clf,
                category, region,
                cuts=cuts, scores=data_scores,
                uniform=uniform)
            if unblind is False:
                # blind full histogram
                data_sample.hist[:] = (0, 0)
            elif (unblind is not True) and isinstance(unblind, int):
                # blind highest N bins
                data_sample.hist[-(unblind + 1):] = (0, 0)
            elif isinstance(unblind, float):
                # blind above a signal efficiency
                max_unblind_score = efficiency_cut(
                    sum([histogram_scores(hist_template, scores)
                        for s, scores in all_sig_scores[mass]]), unblind)
                blind_bin = hist_template.FindBin(max_unblind_score)
                data_sample.hist[blind_bin:] = (0, 0)

        # create signal HistFactory samples
        sig_samples = []
        for s, scores in all_sig_scores[mass]:
            hist_template = binning.Clone(
                title=s.label,
                **s.hist_decor)
            sample = s.get_histfactory_sample(
                hist_template, clf,
                category, region,
                cuts=cuts, scores=scores,
                no_signal_fixes=no_signal_fixes,
                systematics=systematics,
                uniform=uniform,
                mva=mva)
            sig_samples.append(sample)

        # replace data in blind bins with signal + background
        if hybrid_data and (unblind is not True):
            sum_sig_bkg = sum([s.hist for s in (bkg_samples + sig_samples)])
            if unblind is False:
                # replace full hist
                data_sample.hist[:] = sum_sig_bkg[:]
            elif isinstance(unblind, int):
                # replace highest N bins
                bin = -(unblind + 1)
                data_sample.hist[bin:] = sum_sig_bkg[bin:]
            elif isinstance(unblind, float):
                data_sample.hist[blind_bin:] = sum_sig_bkg[blind_bin:]

        # create channel for this mass point
        channel = histfactory.make_channel(
            'hh_{0}_{1}_{2}'.format(self.year % 1000, category.name, mass),
            bkg_samples + sig_samples,
            data=data_sample)

        return scores_obj, channel
Beispiel #28
0
def optimized_channels(clf,
                       category,
                       region,
                       backgrounds,
                       data=None,
                       cuts=None,
                       mass_points=None,
                       mu=1.,
                       systematics=True,
                       lumi_rel_error=0.,
                       algo='EvenBinningByLimit'):
    """
    Return optimally binned HistFactory Channels for each mass hypothesis

    Determine the number of bins that yields the best limit at the 125 GeV mass
    hypothesis. Then construct and return the channels for all requested mass
    hypotheses.

    algos: EvenBinningByLimit, UnevenBinningBySignificance
    """
    log.info("constructing optimized channels")

    scores_obj = get_scores(clf,
                            category,
                            region,
                            backgrounds,
                            data=data,
                            cuts=cuts,
                            mass_points=mass_points,
                            mu=mu,
                            systematics=systematics)

    data_scores = scores_obj.data_scores
    bkg_scores = scores_obj.bkg_scores
    all_sig_scores = scores_obj.all_sig_scores
    min_score = scores_obj.min_score
    max_score = scores_obj.max_score

    sig_scores = all_sig_scores[125]

    best_hist_template = None
    if algo == 'EvenBinningByLimit':
        limit_hists = []
        best_limit = float('inf')
        best_nbins = 0
        nbins_range = xrange(2, 50)

        for nbins in nbins_range:

            hist_template = Hist(nbins, min_score, max_score, type='D')

            # create HistFactory samples
            samples = []
            for s, scores in bkg_scores + sig_scores:
                sample = s.get_histfactory_sample(hist_template,
                                                  clf,
                                                  category,
                                                  region,
                                                  cuts=cuts,
                                                  scores=scores)
                samples.append(sample)

            data_sample = None
            if data is not None:
                data_sample = data.get_histfactory_sample(hist_template,
                                                          clf,
                                                          category,
                                                          region,
                                                          cuts=cuts,
                                                          scores=data_scores)

            # create channel for this mass point
            channel = histfactory.make_channel("%s_%d" % (category.name, 125),
                                               samples,
                                               data=data_sample)

            # get limit
            limit_hist = get_limit(channel, lumi_rel_error=lumi_rel_error)
            limit_hist.SetName("%s_%d_%d" % (category, 125, nbins))

            # is this better than the best limit so far?
            hist_dict = hist_to_dict(limit_hist)
            limit_hists.append(hist_dict)
            if hist_dict['Expected'] < best_limit:
                best_limit = hist_dict['Expected']
                best_nbins = nbins
                best_hist_template = hist_template

        # plot limit vs nbins
        fig = plt.figure()
        ax = fig.add_subplot(111)
        central_values = np.array([h['Expected'] for h in limit_hists])
        high_values_1sig = np.array([h['+1sigma'] for h in limit_hists])
        low_values_1sig = np.array([h['-1sigma'] for h in limit_hists])
        high_values_2sig = np.array([h['+2sigma'] for h in limit_hists])
        low_values_2sig = np.array([h['-2sigma'] for h in limit_hists])
        plt.plot(nbins_range, central_values, 'k-')
        plt.fill_between(nbins_range,
                         low_values_2sig,
                         high_values_2sig,
                         linewidth=0,
                         facecolor='yellow')
        plt.fill_between(nbins_range,
                         low_values_1sig,
                         high_values_1sig,
                         linewidth=0,
                         facecolor='green')
        plt.xlim(nbins_range[0], nbins_range[-1])
        plt.xlabel("Number of Bins")
        plt.ylabel("Limit")
        plt.grid(True)
        plt.text(.5,
                 .8,
                 "Best limit of %.2f at %d bins" % (best_limit, best_nbins),
                 horizontalalignment='center',
                 verticalalignment='center',
                 transform=ax.transAxes,
                 fontsize=20)
        plt.savefig('category_%s_limit_vs_nbins.png' % category.name)

    elif algo == 'UnevenBinningBySignificance':
        #hist_template = Hist(200, min_score, max_score)
        hist_template = Hist(200, -1.0, 1.0, type='D')

        sig_hist = hist_template.Clone(title='Signal')
        sig_hist.systematics = {}
        for sig, scores_dict in sig_scores:
            scores, weight = scores_dict['NOMINAL']
            sig_hist.fill_array(scores, weight)
            for sys_term in scores_dict.keys():
                if sys_term == 'NOMINAL':
                    continue
                if not sys_term in sig_hist.systematics:
                    sys_hist = hist_template.Clone()
                    sig_hist.systematics[sys_term] = sys_hist
                else:
                    sys_hist = sig_hist.systematics[sys_term]
                scores, weight = scores_dict[sys_term]
                sys_hist.fill_array(scores, weight)

        bkg_hist = hist_template.Clone(title='Background')
        bkg_hist.systematics = {}
        for bkg, scores_dict in bkg_scores:
            scores, weight = scores_dict['NOMINAL']
            bkg_hist.fill_array(scores, weight)
            for sys_term in scores_dict.keys():
                if sys_term == 'NOMINAL':
                    continue
                if not sys_term in bkg_hist.systematics:
                    sys_hist = hist_template.Clone()
                    bkg_hist.systematics[sys_term] = sys_hist
                else:
                    sys_hist = bkg_hist.systematics[sys_term]
                scores, weight = scores_dict[sys_term]
                sys_hist.fill_array(scores, weight)

        print "SIG entries:", sig_hist.GetEntries()
        print "BKG entries:", bkg_hist.GetEntries()
        sig_hist, bkg_hist, best_hist_template = optimize_binning(
            sig_hist,
            bkg_hist,
            #starting_point='fine'
            starting_point='merged')
        if best_hist_template is None:
            best_hist_template = hist_template
        #raw_input("Hit enter to continue...")
    else:
        print "ERROR: binning optimisation algo %s not in list!" % algo
        exit(1)

    hist_template = best_hist_template
    channels = dict()

    # create HistFactory samples
    bkg_samples = []
    for s, scores in bkg_scores:
        sample = s.get_histfactory_sample(hist_template,
                                          clf,
                                          category,
                                          region,
                                          cuts=cuts,
                                          scores=scores)
        bkg_samples.append(sample)

    data_sample = None
    if data_scores is not None:
        data_sample = data.get_histfactory_sample(hist_template,
                                                  clf,
                                                  category,
                                                  region,
                                                  cuts=cuts,
                                                  scores=data_scores)

    # now use the optimal binning and construct channels for all requested mass
    # hypotheses
    for mass in Higgs.MASSES:
        if mass_points is not None and mass not in mass_points:
            continue
        log.info('=' * 20)
        log.info("%d GeV mass hypothesis" % mass)

        # create HistFactory samples
        sig_samples = []
        for s, scores in all_sig_scores[mass]:
            sample = s.get_histfactory_sample(hist_template,
                                              clf,
                                              category,
                                              region,
                                              cuts=cuts,
                                              scores=scores)
            sig_samples.append(sample)

        # create channel for this mass point
        channel = histfactory.make_channel("%s_%d" % (category.name, mass),
                                           bkg_samples + sig_samples,
                                           data=data_sample)

        channels[mass] = channel
    return channels
Beispiel #29
0
from config import CMS
# Setting this to True (default in rootpy)
# changes how the histograms look in ROOT...
ROOT.TH1.SetDefaultSumw2(False)
ROOT.gROOT.SetBatch(True)

# create normal distributions
mu1, mu2, sigma1, sigma2 = 100, 140, 15, 5
x1 = mu1 + sigma1 * np.random.randn(10000)
x2 = mu2 + sigma2 * np.random.randn(500)
x1_obs = mu1 + sigma1 * np.random.randn(10000)
x2_obs = mu2 + sigma2 * np.random.randn(1000)

# create histograms
h1 = Hist(100, 40, 200, title='Background')
h2 = h1.Clone(title='Signal')
h3 = h1.Clone(title='Data')
h3.markersize = 1.2

# fill the histograms with our distributions
map(h1.Fill, x1)
map(h2.Fill, x2)
map(h3.Fill, x1_obs)
map(h3.Fill, x2_obs)

# set visual attributes
h1.fillstyle = 'solid'
h1.fillcolor = 'green'
h1.linecolor = 'green'
h1.linewidth = 0
Beispiel #30
0
mu_mc_chain = TreeChain(
    "TTbar_plus_X_analysis/MuPlusJets/Ref selection/Pileup/Pileup", mc_files)
mu_mc_chain.Draw('NVertices',
                 'EventWeight * LeptonEfficiencyCorrection',
                 hist=pu_mu_mc_reco)

el_chain = TreeChain(
    "TTbar_plus_X_analysis/EPlusJets/Ref selection/Pileup/Pileup",
    data_files_el)
el_chain.Draw('NVertices', hist=pu_el_reco)
mu_chain = TreeChain(
    "TTbar_plus_X_analysis/MuPlusJets/Ref selection/Pileup/Pileup",
    data_files_mu)
mu_chain.Draw('NVertices', hist=pu_mu_reco)

pu_mc_reco = pu_el_mc_reco.Clone('pu_mc_reco')
pu_mc_reco += pu_mu_mc_reco

pu_data_reco = pu_el_reco.Clone('pu_data_reco')
pu_data_reco += pu_mu_reco

hists = [pu_mc_reco, pu_data_reco]
norm_hists = {}

for hist in hists:
    name = hist.GetName() + '_norm'
    h = hist.Clone(name)
    h.Scale(1 / h.integral())
    norm_hists[name] = h
# rename so it is compatible to hardcoded name
pileup = pu_data_reco.Clone('pileup')