Exemple #1
0
def test_unsupported_branch_in_branches():
    tree = TTree('test', 'test')
    vect = TLorentzVector()
    double = np.array([0], dtype=float)
    tree.Branch('vector', vect)
    tree.Branch('double', double, 'double/D')
    rnp.tree2array(tree)
    assert_raises(TypeError, rnp.tree2array, tree, branches=['vector'])
Exemple #2
0
def test_tree2array():
    chain = TChain('tree')
    chain.Add(load('single1.root'))
    check_single(rnp.tree2array(chain))

    f = get_file('single1.root')
    tree = f.Get('tree')
    check_single(rnp.tree2array(tree))

    assert_raises(ValueError, get_file, 'file_does_not_exist.root')
def read_data(input_file_path, tree_name, event=1):
	input_file = ROOT.TFile(input_file_path, 'read')
	input_tree = input_file.Get(tree_name)
	data = tree2array(input_tree)
	input_file.Close()
	event_data = data[event-1] # choose event to display
	return event_data
Exemple #4
0
def test_single_branch():
    f = get_file('single1.root')
    tree = f.Get('tree')
    arr1_1d = rnp.tree2array(tree, branches='n_int')
    arr2_1d = rnp.root2array(load('single1.root'), branches='n_int')
    assert_equal(arr1_1d.dtype, np.dtype('<i4'))
    assert_equal(arr2_1d.dtype, np.dtype('<i4'))
Exemple #5
0
    def to_array(self, *args, **kwargs):
        """
        Convert this tree into a NumPy structured array
        """
        from root_numpy import tree2array

        return tree2array(self, *args, **kwargs)
def do_cut(args, did, files, supercuts, weights):
  start = clock()
  try:
    # load up the tree for the files
    tree = get_ttree(args.tree_name, files, args.eventWeightBranch)
    # if using numpy optimization, load the tree as a numpy array to apply_cuts on
    if args.numpy:
      # this part is tricky, a user might specify multiple branches
      #   in their selection string, so we will remove non-alphanumeric characters (underscores are safe)
      #   and remove anything else that is an empty string (hence the filter)
      #   and then flatten the entire list, removing duplicate branch names
      '''
        totalSelections = []
        for supercut in supercuts:
          selection = supercut['selections']
          # filter out non-alphanumeric
          selection = p.sub(' ', selection.format("-", "-", "-", "-", "-", "-", "-", "-", "-", "-"))
          # split on spaces, since we substituted non alphanumeric with spaces
          selections = selection.split(' ')
          # remove empty elements
          filter(None, selections)
          totalSelections.append(selections)

        # flatten the thing
        totalSelections = itertools.chain.from_iterable(totalSelections)
        # remove duplicates
        totalSelections = list(set(totalSelections))
      '''
      alphachars = re.compile('\W+')
      branchesSpecified = list(set(itertools.chain.from_iterable(filter(None, alphachars.sub(' ', supercut['selections'].format(*['-']*10)).split(' ')) for supercut in supercuts)))
      # get actual list of branches in the file
      availableBranches = [i.GetName() for i in tree.GetListOfBranches() if not i.GetName() == args.eventWeightBranch]
      # remove anything that doesn't exist
      branchesToUse = [branch for branch in branchesSpecified if branch in availableBranches]
      branchesSkipped = list(set(branchesSpecified) - set(branchesToUse))
      if branchesSkipped:
        logger.info("The following branches have been skipped...")
        for branch in branchesSkipped:
          logger.info("\t{0:s}".format(branch))
      tree = rnp.tree2array(tree, branches=[args.eventWeightBranch]+branchesToUse)

    # get the scale factor
    sample_scaleFactor = get_scaleFactor(weights, did)

    # iterate over the cuts available
    cuts = {}
    for cut in get_cut(copy.deepcopy(supercuts)):
      cut_hash = get_cut_hash(cut)
      rawEvents, weightedEvents = apply_cuts(tree, cut, args.eventWeightBranch, args.numpy)
      scaledEvents = weightedEvents*sample_scaleFactor
      cuts[cut_hash] = {'raw': rawEvents, 'weighted': weightedEvents, 'scaled': scaledEvents}
    logger.info("Applied {0:d} cuts".format(len(cuts)))
    with open('{0:s}/{1:s}.json'.format(args.output_directory, did), 'w+') as f:
      f.write(json.dumps(cuts, sort_keys=True, indent=4))
      result = True
  except:
    logger.exception("Caught an error - skipping {0:s}".format(did))
    result = False
  end = clock()
  return (result, end-start)
Exemple #7
0
def i3root2hdf5(infile, force=False):
    h5file = infile + '.h5'
    bad_keys = ['AntMCTree', 'MasterTree']
    rf = root_open(infile, 'r')
    keys = [k.name for k in rf.keys()]
    if force:
        mode = 'w'
    else:
        mode = 'a'
    h5 = h5py.File(h5file, mode)
    for key in keys:
        if key in bad_keys:
            continue
        tree = rf[key]
        arr = tree2array(tree)
        try:
            h5.create_dataset(
                key,
                data=arr,
                compression='gzip',
                compression_opts=9,
                shuffle=True,
                fletcher32=True,
            )
        except TypeError:
            continue
        h5.flush()
    h5.close()
Exemple #8
0
def tree_to_ndarray(trees, branches=None, dtype=np.float32, include_weight=False, weight_dtype="f4"):
    """
    Convert a tree or a list of trees into a numpy.ndarray
    """
    if isinstance(trees, (list, tuple)):
        return np.concatenate(
            [
                _add_weight_column(
                    recarray_to_ndarray(tree2array(tree, branches), dtype=dtype), tree, include_weight, weight_dtype
                )
                for tree in trees
            ]
        )
    return _add_weight_column(
        recarray_to_ndarray(tree2array(trees, branches), dtype=dtype), trees, include_weight, weight_dtype
    )
def main ():

    inputdir  = '/eos/atlas/user/a/asogaard/Analysis/2016/BoostedJetISR/StatsInputs/2017-06-28/'
    outputdir = '/eos/atlas/user/a/asogaard/Analysis/2016/BoostedJetISR/StatsInputs/2017-07-10/'
    
    inputpaths  = glob.glob(inputdir + '/ISRgamma_*.root')
    outputpaths = [p.replace(inputdir, outputdir).replace('ISRgamma', 'hist_ISRgamma') for p in inputpaths]

    for inputpath, outputpath in zip(inputpaths,outputpaths):
        print "Processing '%s'" % inputpath

        infile  = ROOT.TFile(inputpath,  'READ')
        outfile = ROOT.TFile(outputpath, 'RECREATE')
        categories = [key.GetName() for key in infile.GetListOfKeys()]

        for category in categories:
            print "-- '%s'" % category
            tree = infile.Get(category)
            array = tree2array(tree)
            #hist = ROOT.TH1F(category, "", 30, 100, 250)
            hist = ROOT.TH1F(category, "", 32, 100, 260)
            fill_hist(hist, array['mJ'], weights=array['weight'])
            
            # TF shape/norm ...

            outfile.cd()
            hist.Write()
            pass

        outfile.Write()
        outfile.Close()
        infile.Close()
        pass

    return
Exemple #10
0
def test_chain():
    chain = ROOT.TChain('tree')
    chain.Add(load('single1.root'))
    check_single(rnp.tree2array(chain))

    f = load(['single1.root', 'single2.root'])
    a = rnp.root2array(f)
    check_single(a, 200)
Exemple #11
0
def test_selection():
    chain = ROOT.TChain('tree')
    chain.Add(load('single1.root'))
    chain.Add(load('single2.root'))
    a = rnp.tree2array(chain)
    assert_equal((a['d_double'] <= 100).any(), True)
    a = rnp.tree2array(chain, selection="d_double > 100")
    assert_equal((a['d_double'] <= 100).any(), False)

    # selection with differing variables in branches and expression
    a = rnp.tree2array(chain,
        branches=['d_double'],
        selection="f_float < 100 && n_int%2 == 1")

    # selection with TMath
    a = rnp.tree2array(chain,
        selection="TMath::Erf(d_double) < 0.5")
Exemple #12
0
def getTreeToArray(tree):
    """
        Convert and return a tree into a numpy array
        Inputs: TTree object
        Return: np array
    """
    from root_numpy import tree2array
    return tree2array(tree)
Exemple #13
0
 def records(self, **kwargs):
     ""
     ""
     from root_numpy import tree2array
     rfile = get_file(self.ntuple_path, self.student)
     tree = rfile[self.tree_name]
     log.info('Converting tree to record array, sorry if this is long ...')
     rec = tree2array(tree, **kwargs).view(np.recarray)
     return rec
Exemple #14
0
def test_weights():
    f = ROOT.TFile(load('test.root'))
    tree = f.Get('tree')
    tree.SetWeight(5.)
    rec = rnp.tree2array(tree, include_weight=True, weight_name='treeweight')
    assert_array_equal(rec['treeweight'], np.ones(100) * 5)
    f = load(['single1.root', 'single2.root'])
    a = rnp.root2array(f, include_weight=True)
    assert_array_equal(
        a['weight'],
        np.concatenate((np.ones(100) * 2., np.ones(100) * 3.)))
Exemple #15
0
def test_array2tree():
    a = np.array([
        (12345, 2., 2.1, True),
        (3, 4., 4.2, False),],
        dtype=[
            ('x', np.int32),
            ('y', np.float32),
            ('z', np.float64),
            ('w', np.bool)])

    with temp() as tmp:
        tree = rnp.array2tree(a)
        a_conv = rnp.tree2array(tree)
        assert_array_equal(a, a_conv)
        # extend the tree
        tree2 = rnp.array2tree(a, tree=tree)
        assert_equal(tree2.GetEntries(), len(a) * 2)
        a_conv2 = rnp.tree2array(tree2)
        assert_array_equal(np.hstack([a, a]), a_conv2)

    assert_raises(TypeError, rnp.array2tree, a, tree=object)
Exemple #16
0
 def to_array(self, branches=None,
              include_weight=False,
              weight_name='weight',
              weight_dtype='f4'):
     """
     Convert this tree into a NumPy structured array
     """
     from root_numpy import tree2array
     return tree2array(self, branches,
             include_weight=include_weight,
             weight_name=weight_name,
             weight_dtype=weight_dtype)
Exemple #17
0
def test_array2tree():
    a = np.array([
        (12345, 2., 2.1, True),
        (3, 4., 4.2, False),],
        dtype=[
            ('x', np.int32),
            ('y', np.float32),
            ('z', np.float64),
            ('w', np.bool)])
    tmp = ROOT.TFile.Open('test_array2tree_temp_file.root', 'recreate')
    tree = rnp.array2tree(a)
    a_conv = rnp.tree2array(tree)
    assert_array_equal(a, a_conv)
    # extend the tree
    tree2 = rnp.array2tree(a, tree=tree)
    assert_equal(tree2.GetEntries(), len(a) * 2)
    a_conv2 = rnp.tree2array(tree2)
    assert_array_equal(np.hstack([a, a]), a_conv2)
    tmp.Close()
    os.remove(tmp.GetName())
    assert_raises(TypeError, rnp.array2tree, a, tree=object)
def read_ntuple(path="./", cfg="", selection = 'loose'):
    # read the config files the same as for the Zfitter
    # to the a chain of the files
    config =  pd.read_csv(path + "/" + cfg , sep = " ", names = ['id', 'tree', 'file'], comment ="#")
    print config
    chain = r.TChain('merged')
    for index, root in config.iterrows():
        print root.id , "\t: ", root.file
        chain.Add(root.file+'/'+root.tree)
    # transform this chain to an array ment to be used later by matplotlib

    data = tree2array( chain, selection =  ecal_selections[selection])
    return data
Exemple #19
0
def test_single():
    f = load('single1.root')
    a = rnp.root2array(f)
    check_single(a)

    # specify tree name
    a = rnp.root2array(f, treename='tree')
    check_single(a)

    # tree2array
    f = get_file('single1.root')
    tree = f.Get('tree')
    check_single(rnp.tree2array(tree))
def KStest( PmissBins , ana_sim , ana_data , var , cut=ROOT.TCut() , debug=2 , Nbins=20):
    # [http://docs.scipy.org/doc/scipy-0.15.1/reference/generated/scipy.stats.ks_2samp.html]
    KS_distances , Pval_KS = [] , []
    figure = plt.figure(figsize=[60,20])
    for i in range(len(PmissBins)):
        pMiss_min , pMiss_max = PmissBins[i][0] , PmissBins[i][1]
        reduced_data = tree2array(ana_data.GetTree(),branches=var , selection = '%f < Pmiss3Mag && Pmiss3Mag < %f'%(pMiss_min , pMiss_max) )
        reduced_sim = tree2array(ana_sim.GetTree(),branches=var , selection = '%f < Pmiss3Mag && Pmiss3Mag < %f'%(pMiss_min , pMiss_max))
        D , Pvalue = ks_2samp( reduced_sim , reduced_data )

        if ( debug > 1 ):
            ax = figure.add_subplot(len(PmissBins)/2,3,i+1)
            for array,col in zip([reduced_sim , reduced_data],['black','red']):
                g=sns.distplot( array, bins=np.linspace(-1, 2 , Nbins), ax=ax, color=col , axlabel=var )
            g.axes.set_title(r'%.2f < p$_{miss}$ < %.2f GeV/c'%(pMiss_min , pMiss_max), fontsize=34,color="b")
            print_important( "KS test of data vs. simulation for %s in p(miss) bin %d is D = %f, Pvalue = %f"%(var , i , D , Pvalue) )
    
        KS_distances.append(D)
        Pval_KS.append(Pvalue)

    figure.savefig("/Users/erezcohen/Desktop/cmHistos_%s.pdf"%var)

    return KS_distances , Pval_KS
def numpyfy(run): 

  for f in files_to_convert: 
    fname = "%s/run%d/%s%d.root" % (root_dir, run,f,run)
    print " Processing %s ..." % (fname )

    rfile = ROOT.TFile(fname)

    for key in rfile.GetListOfKeys(): 

      if key.GetClassName() == "TTree": 
        name = key.GetName() 
        print "  Converting Tree %s " % name 
        tree = rfile.Get(name) 

        #Ok, apparently root_numpy is not smart enough to convert composite objects, 
        # but we can give it a list of branches instead that we can automatically generate.
        # Not sure why it doesn't do it itself... 

        branch_list = build_branches(tree) 

        size = tree.GetEntries() 

        if size == 0: 
          continue 

        nchunks = int(math.ceil(size / float(max_output_length))) 

        outdir = "%s/run%d" % (numpy_dir, run)
        if not os.path.exists(outdir): 
          os.makedirs(outdir)

        for chunk in range(nchunks): 
          arrays = {} 
          arrays[tree.GetName()] = tree2array(tree, branch_list, start = chunk * max_output_length, stop = (chunk+1) * max_output_length)
          outfile = "" 
          if nchunks > 1:
            outfile = "%s/%s%d_%d.npz" % (outdir, tree.GetName(), run,chunk)
          else:
            outfile = "%s/%s%d.npz" % (outdir, tree.GetName(), run)

          print "Creating %s " % outfile 

          numpy.savez_compressed(outfile, **arrays) 
      else: 
        print " No trees found... skipping" 
Exemple #22
0
def test_duplicate_branch_name():
    from array import array
    tree = ROOT.TTree('tree', 'tree')
    d = array('d', [0.])
    tree.Branch('double', d, 'double/D')
    tree.Branch('double', d, 'double/D')
    tree.Fill()

    # check that a warning was emitted
    with warnings.catch_warnings(record=True) as w:
        warnings.simplefilter("always")
        a = rnp.tree2array(tree)
        assert_equal(len(w), 1)
        assert_true(issubclass(w[-1].category, RuntimeWarning))
        assert_true("ignoring duplicate branch named" in str(w[-1].message))
    assert_equal(
        a.dtype,
        [('double', '<f8')])
Exemple #23
0
def evaluate(config, tree, names, transform=None):
    output = []
    dtype = []
    for name in names:
        setup = load(config, name.split("_")[1])
        data = rec2array(tree2array(tree.raw(), list(transform(setup["variables"])) if transform else setup["variables"]))
        if name.startswith("sklearn"):
            fn = os.path.join(config["mvadir"], name + ".pkl")
            with open(fn, 'rb') as fd:
                bdt, label = pickle.load(fd)
            scores = []
            if len(data) > 0:
                scores = bdt.predict_proba(data)[:, 1]
            output += [scores]
            dtype += [(name, 'float64')]

        fn = os.path.join(config["mvadir"], name + ".xml")
        reader = r.TMVA.Reader("Silent")
        for var in setup['variables']:
            reader.AddVariable(var, array('f', [0.]))
        reader.BookMVA("BDT", fn)
        scores = evaluate_reader(reader, "BDT", data)
        output += [scores]
        dtype += [(name.replace("sklearn", "tmvalike"), 'float64')]

    f = r.TFile(os.path.join(config.get("mvadir", config.get("indir", config["outdir"])), "mapping.root"), "READ")
    if f.IsOpen():
        likelihood = f.Get("hTargetBinning")

        def lh(values):
            return likelihood.GetBinContent(likelihood.FindBin(*values))
        indices = dict((v, n) for n, (v, _) in enumerate(dtype))
        tt = output[indices['tmvalike_tt']]
        ttZ = output[indices['tmvalike_ttZ']]
        if len(tt) == 0:
            output += [[]]
        else:
            output += [np.apply_along_axis(lh, 1, np.array([tt, ttZ]).T)]
        dtype += [('tmvalike_likelihood', 'float64')]
        f.Close()

    data = np.array(zip(*output), dtype)
    tree.mva(array2tree(data))
Exemple #24
0
def _tree_to_array(schema, to_npy = False):
	print 'Loading File...'
	f = root_open(schema['sample']['file'])
	T = f[schema['sample']['tree']]
	if schema['sample'].has_key('selection') == False:
		this_sel = None
	else:
		this_sel = schema['sample']['selection']
	if schema['sample'].has_key('step'):
		this_step = schema['sample']['step']
	else:
		this_step = None
	print 'Pulling Tree...'
	arr = rn.tree2array(T, selection = this_sel, step = this_step)
	if to_npy is True:
		print 'Writing to *.npy file...'
		varlist = "".join(this_sel.split()).replace('(', '').replace(')', '').split('&&')
		varlist.sort()
		hash_name = os.path.basename(schema['sample']['file']) + schema['sample']['tree'] + ''.join(varlist) + str(this_step)
		m = hashlib.sha1()
		m.update(hash_name)
		np.save(os.path.dirname(schema['sample']['file']) + '/' + m.hexdigest() + '.npy', arr)
	print 'Done.'
	return arr
import ROOT
import pandas as pd
import numpy as np
from root_numpy import tree2array
f = ROOT.TFile('evetest_CC4GeVmb_110_n50k.root')
tree = f.Get('cbmsim')
tracks_params = tree2array(tree, branches='BmnGemStripHit')
print "Accessed the trees"

# get input variable names from branches
vars = img.getBoostCandBranchNames(treeHH4W)
treeVars = vars
print "Variables for jet image creation: ", vars

# create selection criteria
#sel = ""
sel = "jetAK8_pt > 500 && jetAK8_mass > 50"
#sel = "tau32 < 9999. && et > 500. && et < 2500. && bDisc1 > -0.05 && SDmass < 400"

# make arrays from the trees
#start, stop, step = 0, 200000, 1
arrayHH4W = tree2array(treeHH4W, treeVars, sel)#, None, start, stop, step )
arrayHH4W = tools.appendTreeArray(arrayHH4W)

print "Number of Jets that will be imaged: ", len(arrayHH4W)

imgArrayHH4W = img.makeBoostCandFourVector(arrayHH4W)

print "Made candidate 4 vector arrays from the datasets"

#==================================================================================
# Store BEST Variables ////////////////////////////////////////////////////////////
#==================================================================================

# get BEST variable names from branches
bestVars = tools.getBestBranchNames(treeHH4W)
print "Boosted Event Shape Variables: ", bestVars
Exemple #27
0
print 'getting trees...', 'Making training arrays...', 'Making testing arrays...'

seltest = "Tprime2_DeepAK8_Mass >= 0"
#======process=====
#open file
#get tree
#Convert the ntuple branches to numpy arrays

for i in range(1, 7):
    fileTTToSemiLepT = TFile.Open(
        eosdir +
        "TTJets_SingleLeptFromT_TuneCUETP8M1_13TeV-madgraphMLM-pythia8_" +
        str(i) + "_hadd.root", "READ")
    treeTTToSemiLepT = fileTTToSemiLepT.Get("ljmet")
    if i == 1:
        arrayTTToSemiLepT = tree2array(treeTTToSemiLepT, treeVars, sel)
        testTTToSemiLepT = tree2array(treeTTToSemiLepT, treeVars, seltest)
    else:
        arrayTTToSemiLepT = np.concatenate(
            [arrayTTToSemiLepT,
             tree2array(treeTTToSemiLepT, treeVars, sel)])
        testTTToSemiLepT = np.concatenate([
            testTTToSemiLepT,
            tree2array(treeTTToSemiLepT, treeVars, seltest)
        ])

for i in range(1, 3):
    fileTTToSemiLepTb = TFile.Open(
        eosdir +
        "TTJets_SingleLeptFromTbar_TuneCUETP8M1_13TeV-madgraphMLM-pythia8_" +
        str(i) + "_hadd.root", "READ")
Exemple #28
0
                ]
                print branchnames, len(branchnames)

        jetbranch = ['jet_pt', 'jet_eta', 'jet_mass', 'jet_phi', 'jet_btag']
        mu_branch = ['mu_pt', 'mu_eta', 'mu_mt', 'mu_phi', 'mu_q']
        el_branch = ['el_pt', 'el_eta', 'el_mt', 'el_phi', 'el_q']
        flat_branch = [
            'm_l1j1', 'H_T', 'm_l1j2', 'm_l1l2', 'Nleps', 'H_Tratio', 'Nbtags',
            'Nlooseb', 'Ntightb', 'H_Tb', 'Njets', 'MET', 'm_j1j2'
        ]

        truthbranch = ['class']

        data_dict = {}

        Y = rootnp.tree2array(file_check, branches=truthbranch)
        Z_Y = rootnp.rec2array(Y)

        flat = rootnp.tree2array(file_check, branches=flat_branch)
        Z_flat = rootnp.rec2array(flat)
        #Z_Y = np.zeros(Y.shape[0])
        #for a in range(0,Y.shape):
        #        Z_Y[a] = Z_Y[a].tolist()

        X_mu = rootnp.tree2array(file_check, branches=mu_branch)
        X_mu = rootnp.rec2array(X_mu)

        X_el = rootnp.tree2array(file_check, branches=el_branch)
        X_el = rootnp.rec2array(X_el)

        X_jets = rootnp.tree2array(file_check, branches=jetbranch)
Exemple #29
0
def anaUltraLatency(infilename,
                    debug=False,
                    latSigMaskRange=None,
                    latSigRange=None,
                    outputDir=None,
                    outfilename="latencyAna.root",
                    performFit=False):
    """
    Analyzes data taken by ultraLatency.py

    infilename      - Name of input TFile containing the latTree TTree
    debug           - If True prints additional debugging statements
    latSigMaskRange - Comma separated pair of values defining the region 
                      to be masked when trying to fit the noise, e.g. 
                      lat #notepsilon [40,44] is noise (lat < 40 || lat > 44)")
    latSigRange     - Comma separated pair of values defining expected 
                      signal range, e.g. lat #epsilon [41,43] is signal")
    outfilename  - Name of output TFile containing analysis results
    performFit      - Fit the latency distributions    
    """

    # Determine output filepath
    if outputDir is None:
        from gempython.gemplotting.utils.anautilities import getElogPath
        outputDir = getElogPath()
        pass

    # Redirect sys.stdout and sys.stderr if necessary
    from gempython.gemplotting.utils.multiprocUtils import redirectStdOutAndErr
    redirectStdOutAndErr("anaUltraLatency", outputDir)

    # Create the output File and TTree
    import ROOT as r
    outF = r.TFile(outputDir + "/" + outfilename, "RECREATE")
    if not outF.IsOpen():
        outF.Close()
        raise IOError(
            "Unable to open output file {1} check to make sure you have write permissions under {0}"
            .format(outputDir, outfilename))
    if outF.IsZombie():
        outF.Close()
        raise IOError(
            "Output file {1} is a Zombie, check to make sure you have write permissions under {0}"
            .format(outputDir, outfilename))
    myT = r.TTree('latFitTree', 'Tree Holding FitData')

    # Attempt to open input TFile
    inFile = r.TFile(infilename, "read")
    if not inFile.IsOpen():
        outF.Close()
        inFile.Close()
        raise IOError(
            "Unable to open input file {0} check to make sure you have read permissions"
            .format(infilename))
    if inFile.IsZombie():
        outF.Close()
        inFile.Close()
        raise IOError(
            "Input file {0} is a Zombie, check to make sure you have write permissions and file has expected size"
            .format(infilename))

    from gempython.tools.hw_constants import vfatsPerGemVariant
    # Get ChipID's
    import numpy as np
    import root_numpy as rp

    ##### FIXME
    from gempython.gemplotting.mapping.chamberInfo import gemTypeMapping
    if 'gemType' not in inFile.latTree.GetListOfBranches():
        gemType = "ge11"
    else:
        gemType = gemTypeMapping[rp.tree2array(tree=inFile.latTree,
                                               branches=['gemType'])[0][0]]
    print gemType
    ##### END
    from gempython.tools.hw_constants import vfatsPerGemVariant
    nVFATS = vfatsPerGemVariant[gemType]
    from gempython.gemplotting.mapping.chamberInfo import CHANNELS_PER_VFAT as maxChans

    listOfBranches = inFile.latTree.GetListOfBranches()
    if 'vfatID' in listOfBranches:
        array_chipID = np.unique(
            rp.tree2array(inFile.latTree, branches=['vfatID', 'vfatN']))
        dict_chipID = {}
        for entry in array_chipID:
            dict_chipID[entry['vfatN']] = entry['vfatID']
    else:
        dict_chipID = {vfat: 0 for vfat in range(nVFATS)}

    if debug:
        print("VFAT Position to ChipID Mapping")
        for vfat, vfatID in dict_chipID.iteritems():
            print(vfat, vfatID)

    # Set default histogram behavior
    r.TH1.SetDefaultSumw2(False)
    r.gROOT.SetBatch(True)
    r.gStyle.SetOptStat(1111111)

    #Initializing Histograms
    print('Initializing Histograms')
    from gempython.utils.gemlogger import printYellow
    from gempython.utils.nesteddict import nesteddict as ndict
    dict_hVFATHitsVsLat = ndict()
    for vfat in range(0, nVFATS):
        try:
            chipID = dict_chipID[vfat]
        except KeyError as err:
            chipID = 0
            if debug:
                printYellow(
                    "No CHIP_ID for VFAT{0}, If you don't expect data from this VFAT there's no problem"
                    .format(vfat))

        dict_hVFATHitsVsLat[vfat] = r.TH1F(
            "vfat{0}HitsVsLat".format(vfat),
            "VFAT {0}: chipID {1}".format(vfat, chipID), 1024, -0.5, 1023.5)
        pass

    #Filling Histograms
    print('Filling Histograms')
    latMin = 1000
    latMax = -1
    nTrig = -1
    for event in inFile.latTree:
        dict_hVFATHitsVsLat[int(event.vfatN)].Fill(event.latency, event.Nhits)
        if event.latency < latMin and event.Nhits > 0:
            latMin = event.latency
            pass
        elif event.latency > latMax:
            latMax = event.latency
            pass

        if nTrig < 0:
            nTrig = event.Nev
            pass
        pass

    from math import sqrt
    for vfat in range(0, nVFATS):
        for binX in range(1, dict_hVFATHitsVsLat[vfat].GetNbinsX() + 1):
            dict_hVFATHitsVsLat[vfat].SetBinError(
                binX, sqrt(dict_hVFATHitsVsLat[vfat].GetBinContent(binX)))

    hHitsVsLat_AllVFATs = dict_hVFATHitsVsLat[0].Clone("hHitsVsLat_AllVFATs")
    hHitsVsLat_AllVFATs.SetTitle("Sum over all VFATs")
    for vfat in range(1, nVFATS):
        hHitsVsLat_AllVFATs.Add(dict_hVFATHitsVsLat[vfat])

    # Set Latency Fitting Bounds - Signal
    latFitMin_Sig = latMin
    latFitMax_Sig = latMax
    if latSigRange is not None:
        listLatValues = map(lambda val: float(val), latSigRange.split(","))
        if len(listLatValues) != 2:
            raise IndexError(
                "You must specify exactly two values for determining latency signal range; values given: {0} do not meet this criterion"
                .format(listLatValues))
        else:
            latFitMin_Sig = min(listLatValues)
            latFitMax_Sig = max(listLatValues)

    # Set Latency Fitting Bounds - Noise
    latFitMin_Noise = latFitMin_Sig - 1
    latFitMax_Noise = latFitMax_Sig + 1
    if latSigMaskRange is not None:
        listLatValues = map(lambda val: float(val), latSigMaskRange.split(","))
        if len(listLatValues) != 2:
            raise IndexError(
                "You must specify exactly two values for determining latency signal range; values given: {0} do not meet this criterion"
                .format(listLatValues))
        else:
            latFitMin_Noise = min(listLatValues)
            latFitMax_Noise = max(listLatValues)

    # Make output TFile and TTree
    from array import array
    dirVFATPlots = outF.mkdir("VFAT_Plots")
    if 'detName' in listOfBranches:
        detName = r.vector('string')()
        detName.push_back(
            rp.tree2array(inFile.latTree, branches=['detName'])[0][0][0])
        myT.Branch('detName', detName)
    vfatN = array('i', [0])
    myT.Branch('vfatN', vfatN, 'vfatN/I')
    vfatID = array('L', [0])
    myT.Branch('vfatID', vfatID, 'vfatID/i')  #Hex Chip ID of VFAT
    hitCountMaxLat = array('f', [0])
    myT.Branch('hitCountMaxLat', hitCountMaxLat, 'hitCountMaxLat/F')
    hitCountMaxLatErr = array('f', [0])
    myT.Branch('hitCountMaxLatErr', hitCountMaxLatErr, 'hitCountMaxLatErr/F')
    maxLatBin = array('f', [0])
    myT.Branch('maxLatBin', maxLatBin, 'maxLatBin/F')
    hitCountBkg = array('f', [0])
    hitCountBkgErr = array('f', [0])
    hitCountSig = array('f', [0])
    hitCountSigErr = array('f', [0])
    SigOverBkg = array('f', [0])
    SigOverBkgErr = array('f', [0])
    if performFit:
        myT.Branch('hitCountBkg', hitCountBkg, 'hitCountBkg/F')
        myT.Branch('hitCountBkgErr', hitCountBkgErr, 'hitCountBkgErr/F')
        myT.Branch('hitCountSig', hitCountSig, 'hitCountSig/F')
        myT.Branch('hitCountSigErr', hitCountSigErr, 'hitCountSigErr/F')
        myT.Branch('SigOverBkg', SigOverBkg, 'SigOverBkg/F')
        myT.Branch('SigOverBkgErr', SigOverBkgErr, 'SigOverBkgErr/F')

    # Make output plots
    from math import sqrt
    dict_grNHitsVFAT = ndict()
    dict_fitNHitsVFAT_Sig = ndict()
    dict_fitNHitsVFAT_Noise = ndict()
    grNMaxLatBinByVFAT = r.TGraphErrors(len(dict_hVFATHitsVsLat))
    grMaxLatBinByVFAT = r.TGraphErrors(len(dict_hVFATHitsVsLat))
    grVFATSigOverBkg = r.TGraphErrors(len(dict_hVFATHitsVsLat))
    grVFATNSignalNoBkg = r.TGraphErrors(len(dict_hVFATHitsVsLat))
    r.gStyle.SetOptStat(0)
    if debug and performFit:
        print("VFAT\tSignalHits\tSignal/Noise")

    for vfat in dict_hVFATHitsVsLat:
        #if we don't have any data for this VFAT, we just need to initialize the TGraphAsymmErrors since it is drawn later
        if vfat not in dict_chipID:
            dict_grNHitsVFAT[vfat] = r.TGraphAsymmErrors()
            continue

        # Store VFAT info
        vfatN[0] = vfat
        vfatID[0] = dict_chipID[vfat]

        # Store Max Info
        hitCountMaxLat[0] = dict_hVFATHitsVsLat[vfat].GetBinContent(
            dict_hVFATHitsVsLat[vfat].GetMaximumBin())
        hitCountMaxLatErr[0] = sqrt(hitCountMaxLat[0])
        grNMaxLatBinByVFAT.SetPoint(vfat, vfat, hitCountMaxLat[0])
        grNMaxLatBinByVFAT.SetPointError(vfat, 0, hitCountMaxLatErr[0])

        maxLatBin[0] = dict_hVFATHitsVsLat[vfat].GetBinCenter(
            dict_hVFATHitsVsLat[vfat].GetMaximumBin())
        grMaxLatBinByVFAT.SetPoint(vfat, vfat, maxLatBin[0])
        grMaxLatBinByVFAT.SetPointError(vfat, 0, 0.5)  #could be improved upon

        # Initialize
        dict_fitNHitsVFAT_Sig[vfat] = r.TF1(
            "func_N_vs_Lat_VFAT{0}_Sig".format(vfat), "[0]", latFitMin_Sig,
            latFitMax_Sig)
        dict_fitNHitsVFAT_Noise[vfat] = r.TF1(
            "func_N_vs_Lat_VFAT{0}_Noise".format(vfat), "[0]", latMin, latMax)
        dict_grNHitsVFAT[vfat] = r.TGraphAsymmErrors(dict_hVFATHitsVsLat[vfat])
        dict_grNHitsVFAT[vfat].SetName("g_N_vs_Lat_VFAT{0}".format(vfat))

        # Fitting
        if performFit:
            # Fit Signal
            dict_fitNHitsVFAT_Sig[vfat].SetParameter(0, hitCountMaxLat[0])
            dict_fitNHitsVFAT_Sig[vfat].SetLineColor(r.kGreen + 1)
            dict_grNHitsVFAT[vfat].Fit(dict_fitNHitsVFAT_Sig[vfat], "QR")

            # Remove Signal Region
            latVal = r.Double()
            hitVal = r.Double()
            gTempDist = dict_grNHitsVFAT[vfat].Clone(
                "g_N_vs_Lat_VFAT{0}_NoSig".format(vfat))
            for idx in range(dict_grNHitsVFAT[vfat].GetN() - 1, 0, -1):
                gTempDist.GetPoint(idx, latVal, hitVal)
                if latFitMin_Noise < latVal and latVal < latFitMax_Noise:
                    gTempDist.RemovePoint(idx)

            # Fit Noise
            dict_fitNHitsVFAT_Noise[vfat].SetParameter(0, 0.)
            dict_fitNHitsVFAT_Noise[vfat].SetLineColor(r.kRed + 1)
            gTempDist.Fit(dict_fitNHitsVFAT_Noise[vfat], "QR")

            # Calc Signal & Signal/Noise
            hitCountBkg[0] = dict_fitNHitsVFAT_Noise[vfat].GetParameter(0)
            hitCountBkgErr[0] = dict_fitNHitsVFAT_Noise[vfat].GetParError(0)
            hitCountSig[0] = dict_fitNHitsVFAT_Sig[vfat].GetParameter(
                0) - hitCountBkg[0]
            hitCountSigErr[0] = sqrt(
                (dict_fitNHitsVFAT_Sig[vfat].GetParError(0))**2 +
                hitCountBkgErr[0]**2)

            SigOverBkg[0] = hitCountSig[0] / hitCountBkg[0]
            SigOverBkgErr[0] = sqrt((hitCountSigErr[0] / hitCountBkg[0])**2 +
                                    (hitCountBkgErr[0]**2 *
                                     (hitCountSig[0] / hitCountBkg[0]**2)**2))

            # Add to Plot
            grVFATSigOverBkg.SetPoint(vfat, vfat, SigOverBkg[0])
            grVFATSigOverBkg.SetPointError(vfat, 0, SigOverBkgErr[0])

            grVFATNSignalNoBkg.SetPoint(vfat, vfat, hitCountSig[0])
            grVFATNSignalNoBkg.SetPointError(vfat, 0, hitCountSigErr[0])

            # Print if requested
            if debug:
                print("{0}\t{1}\t{2}".format(vfat, hitCountSig[0],
                                             SigOverBkg[0]))
            pass

        # Format
        r.gStyle.SetOptStat(0)
        dict_grNHitsVFAT[vfat].SetMarkerStyle(21)
        dict_grNHitsVFAT[vfat].SetMarkerSize(0.7)
        dict_grNHitsVFAT[vfat].SetLineWidth(2)
        dict_grNHitsVFAT[vfat].GetXaxis().SetRangeUser(latMin, latMax)
        dict_grNHitsVFAT[vfat].GetXaxis().SetTitle("Lat")
        dict_grNHitsVFAT[vfat].GetYaxis().SetRangeUser(0, nTrig)
        dict_grNHitsVFAT[vfat].GetYaxis().SetTitle("N")

        # Write
        dirVFAT = dirVFATPlots.mkdir("VFAT{0}".format(vfat))
        dirVFAT.cd()
        dict_grNHitsVFAT[vfat].Write()
        dict_hVFATHitsVsLat[vfat].Write()
        if performFit:
            dict_fitNHitsVFAT_Sig[vfat].Write()
            dict_fitNHitsVFAT_Noise[vfat].Write()
        myT.Fill()
        pass

    # Store - Summary
    from gempython.gemplotting.utils.anautilities import getSummaryCanvas, addPlotToCanvas
    if performFit:
        canv_Summary = getSummaryCanvas(dict_grNHitsVFAT,
                                        name='canv_Summary',
                                        drawOpt='APE1',
                                        gemType=gemType)
        canv_Summary = addPlotToCanvas(canv_Summary, dict_fitNHitsVFAT_Noise,
                                       gemType)
        canv_Summary.SaveAs(outputDir + '/Summary.png')
    else:
        canv_Summary = getSummaryCanvas(dict_grNHitsVFAT,
                                        name='canv_Summary',
                                        drawOpt='APE1',
                                        gemType=gemType)
        canv_Summary.SaveAs(outputDir + '/Summary.png')

    # Store - Sig Over Bkg
    if performFit:
        canv_SigOverBkg = r.TCanvas("canv_SigOverBkg", "canv_SigOverBkg", 600,
                                    600)
        canv_SigOverBkg.cd()
        canv_SigOverBkg.cd().SetLogy()
        canv_SigOverBkg.cd().SetGridy()
        grVFATSigOverBkg.SetTitle("")
        grVFATSigOverBkg.SetMarkerStyle(21)
        grVFATSigOverBkg.SetMarkerSize(0.7)
        grVFATSigOverBkg.SetLineWidth(2)
        grVFATSigOverBkg.GetXaxis().SetTitle("VFAT Pos")
        grVFATSigOverBkg.GetYaxis().SetTitle("Sig / Bkg)")
        grVFATSigOverBkg.GetYaxis().SetTitleOffset(1.25)
        grVFATSigOverBkg.GetYaxis().SetRangeUser(1e-1, 1e2)
        grVFATSigOverBkg.GetXaxis().SetRangeUser(-0.5, nVFATS + 0.5)
        grVFATSigOverBkg.Draw("APE1")
        canv_SigOverBkg.SaveAs(outputDir + '/SignalOverBkg.png')

    # Store - Signal
    if performFit:
        canv_Signal = r.TCanvas("canv_Signal", "canv_Signal", 600, 600)
        canv_Signal.cd()
        grVFATNSignalNoBkg.SetTitle("")
        grVFATNSignalNoBkg.SetMarkerStyle(21)
        grVFATNSignalNoBkg.SetMarkerSize(0.7)
        grVFATNSignalNoBkg.SetLineWidth(2)
        grVFATNSignalNoBkg.GetXaxis().SetTitle("VFAT Pos")
        grVFATNSignalNoBkg.GetYaxis().SetTitle("Signal Hits")
        grVFATNSignalNoBkg.GetYaxis().SetTitleOffset(1.5)
        grVFATNSignalNoBkg.GetYaxis().SetRangeUser(0, nTrig)
        grVFATNSignalNoBkg.GetXaxis().SetRangeUser(-0.5, nVFATS + 0.5)
        grVFATNSignalNoBkg.Draw("APE1")
        canv_Signal.SaveAs(outputDir + '/SignalNoBkg.png')

    # Store - Sum over all VFATs
    canv_LatSum = r.TCanvas("canv_LatSumOverAllVFATs",
                            "canv_LatSumOverAllVFATs", 600, 600)
    canv_LatSum.cd()
    hHitsVsLat_AllVFATs.SetXTitle("Latency")
    hHitsVsLat_AllVFATs.SetYTitle("N")
    hHitsVsLat_AllVFATs.GetXaxis().SetRangeUser(latMin, latMax)
    hHitsVsLat_AllVFATs.Draw("hist")
    canv_LatSum.SaveAs(outputDir + '/LatSumOverAllVFATs.png')

    # Store - Max Hits By Lat Per VFAT
    canv_MaxHitsPerLatByVFAT = r.TCanvas("canv_MaxHitsPerLatByVFAT",
                                         "canv_MaxHitsPerLatByVFAT", 1200, 600)
    canv_MaxHitsPerLatByVFAT.Divide(2, 1)
    canv_MaxHitsPerLatByVFAT.cd(1)
    grNMaxLatBinByVFAT.SetTitle("")
    grNMaxLatBinByVFAT.SetMarkerStyle(21)
    grNMaxLatBinByVFAT.SetMarkerSize(0.7)
    grNMaxLatBinByVFAT.SetLineWidth(2)
    grNMaxLatBinByVFAT.GetXaxis().SetRangeUser(-0.5, nVFATS + 0.5)
    grNMaxLatBinByVFAT.GetXaxis().SetTitle("VFAT Pos")
    grNMaxLatBinByVFAT.GetYaxis().SetRangeUser(0, nTrig)
    grNMaxLatBinByVFAT.GetYaxis().SetTitle("Hit Count of Max Lat Bin")
    grNMaxLatBinByVFAT.GetYaxis().SetTitleOffset(1.7)
    grNMaxLatBinByVFAT.Draw("APE1")
    canv_MaxHitsPerLatByVFAT.cd(2)
    grMaxLatBinByVFAT.SetTitle("")
    grMaxLatBinByVFAT.SetMarkerStyle(21)
    grMaxLatBinByVFAT.SetMarkerSize(0.7)
    grMaxLatBinByVFAT.SetLineWidth(2)
    grMaxLatBinByVFAT.GetXaxis().SetTitle("VFAT Pos")
    grMaxLatBinByVFAT.GetYaxis().SetTitle("Max Lat Bin")
    grMaxLatBinByVFAT.GetYaxis().SetTitleOffset(1.2)
    grMaxLatBinByVFAT.GetXaxis().SetRangeUser(-0.5, nVFATS + 0.5)
    grMaxLatBinByVFAT.Draw("APE1")
    canv_MaxHitsPerLatByVFAT.SaveAs(outputDir + '/MaxHitsPerLatByVFAT.png')

    # Store - TObjects
    outF.cd()
    hHitsVsLat_AllVFATs.Write()
    grNMaxLatBinByVFAT.SetName("grNMaxLatBinByVFAT")
    grNMaxLatBinByVFAT.Write()
    grMaxLatBinByVFAT.SetName("grMaxLatBinByVFAT")
    grMaxLatBinByVFAT.Write()
    if performFit:
        grVFATSigOverBkg.SetName("grVFATSigOverBkg")
        grVFATSigOverBkg.Write()
        grVFATNSignalNoBkg.SetName("grVFATNSignalNoBkg")
        grVFATNSignalNoBkg.Write()
    myT.Write()
    outF.Close()
Exemple #30
0
def load_data(inputPathNTuples, treeDirName, variables):
    print "In data_manager::load_data()::\n inputPathNTuples: ", inputPathNTuples, "\n treeDirName: ", treeDirName
    print " variables: ", variables

    my_cols_list = variables + ['proces', 'key', 'target', "totalWeight"]
    data = pandas.DataFrame(
        columns=my_cols_list
    )  ## right now an empty dataframe with columns = my_cols_list
    print "data: ", data

    target = None
    for process in keys:
        print 'process %s ' % (process)

        if 'WZ' in process:
            sampleName = "WZ"
            target = 0
        if 'signal' in process:
            sampleName = "signal_ggf_spin0_400_hh_wwww"
            target = 1

        inputNTuples = glob.glob("%s/%s*_forBDTtraining.root" %
                                 (inputPathNTuples, process))
        inputTree = "%s/%s/evtTree" % (treeDirName, sampleName)
        print "inputTree", inputTree, ",  len(inputNTuples):", len(
            inputNTuples), "  inputNTuples: ", inputNTuples

        for intuple in range(0, len(inputNTuples)):
            try:
                tfile = ROOT.TFile(inputNTuples[intuple])
            except:
                print "%s   FAIL load root file" % inputNTuples[intuple]
                continue
            try:
                tree = tfile.Get(inputTree)
            except:
                print(inputTree, "FAIL read inputTree", tfile)
                continue
            if tree is not None:
                print "sampleName: ", sampleName, ",  process: ", process, ", inputNTuples[intuple]: ", inputNTuples[
                    intuple], ", nEvents: ", tree.GetEntries()
                try:
                    chunk_arr = tree2array(tree)
                except:
                    print(inputTree, "FAIL tree2array ", tfile)
                    tfile.Close()
                    continue
                else:
                    chunk_df = pandas.DataFrame(chunk_arr, columns=variables)
                    tfile.Close()
                    chunk_df['proces'] = sampleName
                    chunk_df['key'] = process
                    chunk_df['target'] = target
                    chunk_df["totalWeight"] = chunk_df["evtWeight"]
                    #print "chunk_df: ",chunk_df
                    data = data.append(chunk_df, ignore_index=True)
            else:
                print("file " + list[ii] + "was empty")
        nS = len(data.ix[(data.target.values == 1)
                         & (data.key.values == process)])
        nB = len(data.ix[(data.target.values == 0)
                         & (data.key.values == process)])
        print "%s  signal size %g,  bk size %g,   evtWeight %g,  totalWeight %g" % (
            process, nS, nB, data.ix[(data.key.values
                                      == process)]["evtWeight"].sum(),
            data.ix[(data.key.values == process)]["totalWeight"].sum())
        nNW = len(data.ix[(data["totalWeight"].values < 0)
                          & (data.key.values == process)])
        print process, " no. of events with -ve weights", nNW

    #print 'data to list = ', (data.columns.values.tolist())
    n = len(data)
    nS = len(data.ix[data.target.values == 1])
    nB = len(data.ix[data.target.values == 0])
    print treeDirName, " size of sig, bkg: ", nS, nB
    return data
Exemple #31
0
def test_tree2array_wrong_type():
    rnp.tree2array(list())
Exemple #32
0
def load_single(tree, start_, stop_, branches_):
    X = tree2array(tree, start=start_, stop=stop_, branches=branches_)
    X = np.array([x[0] for x in X])

    return X
Exemple #33
0
from sklearn import metrics

import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt

files = ["BDT.root"]
#files = ["BDT_GlobalOnly.root", "BDT_CandsOnly.root", "BDT_AllFeatures.root", "BDT_GlobalNoIP.root", "BDT_GlobalNoKin.root", "BDT_Global_NoIPRoundedKinematics.root", "BDT_AllFeatures_RoundedKinematics.root"]

branches = ['classID', 'BDT']

for file in files:
    f = ROOT.TFile(file)
    tree = f.Get("TestTree")

    data = root_numpy.tree2array(tree, branches=branches)

    tpr, fpr, thresh = metrics.roc_curve(
        data['classID'], data['BDT']
    )  # labels are interpreted backwards (that's why tpr and fpr are reversed)
    numpy.savez('ROCs/' + file.replace(".root", ""), tpr=tpr, fpr=fpr)

    plt.figure()
    plt.plot(tpr, fpr, color='aqua', label='BDT')
    plt.xscale('log')
    plt.xlim([0.001, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.legend(loc='lower right')
    plt.savefig('plot' + file.replace(".root", "") + '.pdf')
treeHH4B = fileHH4B.Get("run/jetTree")

print "Accessed the trees"

# get input variable names from branches
vars = img.getBoostCandBranchNames(treeJJ)
treeVars = vars
print "Variables for jet image creation: ", vars

# create selection criteria
#sel = ""
sel = "jetAK8_pt > 500 && jetAK8_mass > 50"
#sel = "tau32 < 9999. && et > 500. && et < 2500. && bDisc1 > -0.05 && SDmass < 400"

# make arrays from the trees
arrayJJ = tree2array(treeJJ, treeVars, sel)
arrayJJ = tools.appendTreeArray(arrayJJ)
imgArrayJJ = img.makeBoostCandFourVector(arrayJJ)

arrayHH4W = tree2array(treeHH4W, treeVars, sel)
arrayHH4W = tools.appendTreeArray(arrayHH4W)
imgArrayHH4W = img.makeBoostCandFourVector(arrayHH4W)

arrayHH4B = tree2array(treeHH4B, treeVars, sel)
arrayHH4B = tools.appendTreeArray(arrayHH4B)
imgArrayHH4B = img.makeBoostCandFourVector(arrayHH4B)

print "Made candidate 4 vector arrays from the datasets"

#==================================================================================
# Make Jet Images /////////////////////////////////////////////////////////////////
from ROOT import *
import root_numpy

import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt

print 1
import numpy
import sys

file = TFile('/gpfs/ddn/cms/user/lgiannini/DeepNtupleRegression/Train_wSV.root'
             )  #/uscms_data/d3/lgiannin/tree_Reg.root')
tree = file.Get("tree")
tree2 = root_numpy.tree2array(
    tree, ["nPVs", "jetpt", "jeteta", "genjetpt", "genjetpt_wNu"],
    selection="")

print tree2.shape
print tree2[0].shape
for i in range(len(tree2[0])):
    print i, tree2[0][i]

print tree2

tree2 = root_numpy.rec2array(tree2)

print tree2.shape

t2 = root_numpy.tree2array(
    tree,
Exemple #36
0
def tvars(rootfile, first, last):
    stringa = [
        "seed_pt",
        "seed_eta",
        "seed_phi",
        "seed_mass",
        "seed_dz",
        "seed_dxy",
        "seed_3D_ip",
        "seed_3D_sip",
        "seed_2D_ip",
        "seed_2D_sip",
        "seed_3D_signedIp",
        "seed_3D_signedSip",
        "seed_2D_signedIp",
        "seed_2D_signedSip",
        "seed_chi2reduced",
        "seed_nPixelHits",
        "seed_nHits",
        "seed_jetAxisDistance",
        "seed_jetAxisDlength",

        #more truth vars
        "seed_MC_pt",
        "seed_MC_eta",
        "seed_MC_phi",
        "seed_MC_mass",
        "seed_MC_dz",
        "seed_MC_dxy",
        "seed_MC_MomPdgId",
        "seed_MC_MomFlavour",
        "seed_MC_BChain",
        "seed_MC_DChain",  #no B in this case
        "seed_MC_vx",
        "seed_MC_vy",
        "seed_MC_vz",
        "seed_MC_pvd"
    ]

    stringa2 = [
        "nearTracks_pt",
        "nearTracks_eta",
        "nearTracks_phi",
        "nearTracks_dz",
        "nearTracks_dxy",
        "nearTracks_mass",
        "nearTracks_3D_ip",
        "nearTracks_3D_sip",
        "nearTracks_2D_ip",
        "nearTracks_2D_sip",
        "nearTracks_PCAdist",
        "nearTracks_PCAdsig",
        "nearTracks_PCAonSeed_x",
        "nearTracks_PCAonSeed_y",
        "nearTracks_PCAonSeed_z",
        "nearTracks_PCAonSeed_xerr",
        "nearTracks_PCAonSeed_yerr",
        "nearTracks_PCAonSeed_zerr",
        "nearTracks_PCAonTrack_x",
        "nearTracks_PCAonTrack_y",
        "nearTracks_PCAonTrack_z",
        "nearTracks_PCAonTrack_xerr",
        "nearTracks_PCAonTrack_yerr",
        "nearTracks_PCAonTrack_zerr",
        "nearTracks_dotprodTrack",
        "nearTracks_dotprodSeed",
        "nearTracks_dotprodTrackSeed2D",
        "nearTracks_dotprodTrackSeed3D",
        "nearTracks_dotprodTrackSeedVectors2D",
        "nearTracks_dotprodTrackSeedVectors3D",
        "nearTracks_PCAonSeed_pvd",
        "nearTracks_PCAonTrack_pvd",
        "nearTracks_PCAjetAxis_dist",
        "nearTracks_PCAjetMomenta_dotprod",
        "nearTracks_PCAjetDirs_DEta",
        "nearTracks_PCAjetDirs_DPhi",

        #more MC vars
        "nearTracks_MC_pt",
        "nearTracks_MC_eta",
        "nearTracks_MC_phi",
        "nearTracks_MC_dz",
        "nearTracks_MC_dxy",
        "nearTracks_MC_MomPdgId",
        "nearTracks_MC_MomFlavour",
        "nearTracks_MC_BChain",
        "nearTracks_MC_DChain",  #no B in this case
        "nearTracks_MC_Track_vx",
        "nearTracks_MC_Track_vy",
        "nearTracks_MC_Track_vz",
        "nearTracks_MC_fromSeedVtx",
        "nearTracks_MC_fromSeedChain",
        "nearTracks_MC_pvd",
        #"nearTracks_MC_fromSeedVtx*(nearTracks_MC_pvd>0)"
    ]

    f = TFile(rootfile)
    #    tree=f.Get("analyzer1/tree")
    tree = root_numpy.tree2array(f.Get('analyzer1/tree'),
                                 branches=stringa2,
                                 selection="(jet_pt>30)&&(abs(jet_eta)<2.4)",
                                 start=first,
                                 stop=last)
    print "loaded"
    tree2 = root_numpy.rec2array(tree)
    print "s"
    print tree2.shape
    print round(time.time() - starttime, 2), "reshape"
    tree3 = tree2.reshape((200, 51, len(tree)))
    tree3 = tree3.reshape((10, 51 * 20, len(tree)))
    print tree3.shape
    tree3 = tree3.swapaxes(0, 2)
    t2 = root_numpy.tree2array(f.Get('analyzer1/tree'),
                               branches=stringa,
                               selection="(jet_pt>30)&&(abs(jet_eta)<2.4)",
                               start=first,
                               stop=last)
    t2 = root_numpy.rec2array(t2)
    print t2.shape
    t2 = t2.reshape((10, len(stringa), len(tree)))
    t2 = t2.swapaxes(0, 2)
    tree5 = numpy.concatenate((t2, tree3), axis=1)
    print tree5.shape
    numpy.save(
        "tvars_" + str(first) + "_" + str(last) + "_" +
        rootfile.split(".")[0] + ".npy", tree5)
    print time.time() - starttime
    f.Close()
    os.system("mv " + "tvars_" + str(first) + "_" + str(last) + "_" +
              rootfile.split(".")[0] + ".npy" +
              " /gpfs/ddn/users/lgiannini/NN/DataRECO")
def convert_tree_to_np(sources, destination, npy_files=[]):
    """ Converts the root files in sources to numpy arrays
    Params
        sources : list
            root file paths/names or path to directory of root files
        destination : str
            path to directory where the npy files will be saved
        npy_files : list
            list of already converted files (for recursive functionality)
    Returns
        list
            paths to the converted files 
    """
    for i in xrange(len(sources)):
        if os.path.isdir(sources[i]) and ('failed' not in sources[i]):
            # source is a directory -> recurse on all files in directory
            new_sources = [
                sources[i] + '/' + e for e in os.listdir(sources[i])
            ]
            new_destination = destination + '/' + sources[i].split('/')[-1]

            print('new_sources ', len(new_sources), new_sources[-9:])
            print('new_destination ', new_destination)
            logging.info('new_sources ' + str(len(new_sources)) + ' ' +
                         str(new_sources[-9:]))
            logging.info('new_destination ' + new_destination)
            os.mkdir(new_destination)
            convert_tree_to_np(new_sources, new_destination, npy_files)
        else:
            if ".root" in sources[i]:
                try:
                    # print(i, sources[i])
                    logging.info(str(i) + ' ' + sources[i])
                    print(str(i) + ' ', end="")
                    sys.stdout.flush()

                    tChain = rt.TChain('MyAnalysis/MyTree')
                    tChain.Add(sources[i])

                    array = root_numpy.tree2array(tChain)
                    # print 'Total number of entries: ',tChain.GetEntries()

                    pkl_file_name = destination + '/' + sources[i].split(
                        '/')[-1][:-5]
                    np.save(pkl_file_name, array)
                    npy_files.append(pkl_file_name + '.npy')
                except Exception as e:
                    if os.path.exists(failed.pkl):
                        continue
                    else:
                        mylist = []
                        with open('failed.pkl', 'wb') as f:
                            pickle.dump(mylist, f)
                    print("")
                    print(e)
                    print(sources[i], " ** FAILED ** ")
                    logging.error(sources[i] + " ** FAILED ** ")
                    logging.error(e)

                    f = open('failed.pkl', 'rb')
                    failed = pickle.load(f)
                    f.close()
                    failed.append(sources[i])
                    f = open('failed.pkl', 'wb')
                    pickle.dump(failed, f)
                    f.close()

    return npy_files
Exemple #38
0
import pandas as pd
from ROOT import *
from root_numpy import root2array, tree2array
from root_numpy import testdata
from IPython.display import display
import numpy as np

path = '../data/'

# --read signal dataset
sig_file = TFile.Open(path + 'sig.root')
sig_tree = sig_file.Get('ntuple')
sig_arr = tree2array(sig_tree)
sig_df = pd.DataFrame(sig_arr)

# --read background dataset
bkg_file = TFile.Open(path + 'bkg.root')
bkg_tree = bkg_file.Get('ntuple')
bkg_arr = tree2array(bkg_tree)
bkg_df = pd.DataFrame(bkg_arr)

print('sig: ', sig_df.shape[0])
print('bkg: ', bkg_df.shape[0])


# --Normalize
def MinMaxScaler(data):
    numerator = data - np.min(data, 0)
    denominator = np.max(data, 0) - np.min(data, 0)
    denominator = denominator.astype('float')
    return numerator / denominator
Exemple #39
0
path    = '/beegfs/desy/user/hezhiyua/backed/dustData/crab_folder_v2/'
pathOut = '/beegfs/desy/user/hezhiyua/backed/dustData/crab_folder_v2/test/'
Fname   = 'VBFH_HToSSTobbbb_MH-125_MS-40_ctauS-500_TuneCUETP8M1_13TeV-powheg-pythia8_PRIVATE-MC.root'


entries = 200



fin   = TFile(path + Fname)
tin   = fin.Get('ntuple/tree')


tm1 = tm()
s_cut = 1000#None#100
arr_energy   = rnp.tree2array(tin, ['PFCandidates.energy'], stop=s_cut)
arr_phi      = rnp.tree2array(tin, ['PFCandidates.phi'], stop=s_cut)
arr_eta      = rnp.tree2array(tin, ['PFCandidates.eta'], stop=s_cut)
arr_jetindex = rnp.tree2array(tin, ['PFCandidates.jetIndex'], stop=s_cut)

#e_npar = np.array(arr_energy)
e_df     = pd.DataFrame(arr_energy) 
phi_df   = pd.DataFrame(arr_phi)
eta_df   = pd.DataFrame(arr_eta)

#print e_npar
#print arr_energy[3]
print e_df.loc[3,'PFCandidates.energy'][3]
df        = pd.DataFrame()
df_o      = pd.DataFrame()
df['e']   = e_df
Exemple #40
0
eosdir = "root://cmseos.fnal.gov//store/user/jmanagan/MVAtraining_2017_Jan2021/"

## Choosing valid events with appropriate characteristics and cutting the rest
seltrain = "isValidTTDecayMode_DeepAK8 == 0 && Tprime2_DeepAK8_Mass < 0 && NJetsAK8_JetSubCalc > 2"
seltest = "isValidTTDecayMode_DeepAK8 == 0 && Tprime2_DeepAK8_Mass >= 0"

treeVars = vars

## Getting values from trees for each parent particle and either keeping them in an array or adding them together

fileTTToSemiLepT = TFile.Open(
    eosdir +
    "TTJets_SingleLeptFromT_TuneCP5_13TeV-madgraphMLM-pythia8_hadd.root",
    "READ")
treeTTToSemiLepT = fileTTToSemiLepT.Get("ljmet")
trainTTToSemiLepT = tree2array(treeTTToSemiLepT, treeVars, seltrain)
testTTToSemiLepT = tree2array(treeTTToSemiLepT, treeVars, seltest)

## Selection with Single Lept from TBar
fileTTToSemiLepTb = TFile.Open(
    eosdir +
    "TTJets_SingleLeptFromTbar_TuneCP5_13TeV-madgraphMLM-pythia8_hadd.root",
    "READ")
treeTTToSemiLepTb = fileTTToSemiLepTb.Get("ljmet")
trainTTToSemiLepTb = tree2array(treeTTToSemiLepTb, treeVars, seltrain)
testTTToSemiLepTb = tree2array(treeTTToSemiLepTb, treeVars, seltest)

## Selection with signals
fileTprime = TFile.Open(
    eosdir + "TprimeTprime_M-1000_TuneCP5_13TeV-madgraph-pythia8_hadd.root",
    "READ")
Exemple #41
0
@author: rupeshdotel
"""

import numpy as np
import LT.box as B
import ROOT as R
from root_numpy import tree2array
import matplotlib.pyplot as plt

#%%

rfile = R.TFile(
    "/Users/rupeshdotel/analysis/work/pi0pippimeta/data/qfactor_data/qfactortree/qfactortree_for_may_10_gluexI.root"
)
intree = rfile.Get('qfactortree')
d = tree2array(intree)

#%%

mm2m = d['mm2m']
mpi013 = d['mpi013']
mpi014 = d['mpi014']
mpi023 = d['mpi023']
mpi024 = d['mpi024']

metap = d['metap']
metappi0 = d['metappi0']
cost_etap = d['cos_t']
phi_etap = d['phi_gj']
mpippimpi0 = d['mpippimpi0']
mpi0p = d['mpi0p']
Exemple #42
0
## Open ROOT files
print 'Opening files...'
eosdir = "root://cmseos.fnal.gov//store/user/cholz/Step2MVAtraining_0432020/"

#sel = "Bprime2_DeepAK8_Mass < 0"
sel = "isValidBBDecayMode_DeepAK8 == 0 && Bprime2_DeepAK8_Mass < 0 && NJetsAK8_JetSubCalc > 2"
treeVars = vars
print 'getting trees...','Making training arrays...','Making testing arrays...'

seltest = "isValidBBDecayMode_DeepAK8 == 0 && Bprime2_DeepAK8_Mass >= 0"

for i in range(1,7):
   fileTTToSemiLepT  = TFile.Open(eosdir + "TTJets_SingleLeptFromT_TuneCUETP8M1_13TeV-madgraphMLM-pythia8_"+ str(i)+"_hadd.root", "READ")
   treeTTToSemiLepT  = fileTTToSemiLepT.Get("ljmet")
   if i == 1:
      arrayTTToSemiLepT = tree2array(treeTTToSemiLepT, treeVars, sel)
      testTTToSemiLepT  = tree2array(treeTTToSemiLepT, treeVars, seltest)
   else:
      arrayTTToSemiLepT = np.concatenate([arrayTTToSemiLepT,tree2array(treeTTToSemiLepT, treeVars, sel)])
      testTTToSemiLepT  = np.concatenate([testTTToSemiLepT,tree2array(treeTTToSemiLepT, treeVars, seltest)])

for i in range(1,3):
   fileTTToSemiLepTb = TFile.Open(eosdir + "TTJets_SingleLeptFromTbar_TuneCUETP8M1_13TeV-madgraphMLM-pythia8_"+ str(i)+"_hadd.root", "READ")
   treeTTToSemiLepTb = fileTTToSemiLepTb.Get("ljmet")
   if i == 1:
      arrayTTToSemiLepTb = tree2array(treeTTToSemiLepTb, treeVars, sel)
      testTTToSemiLepTb  = tree2array(treeTTToSemiLepTb, treeVars, seltest)
   else:
      arrayTTToSemiLepTb = np.concatenate([arrayTTToSemiLepTb,tree2array(treeTTToSemiLepTb, treeVars, sel)])
      testTTToSemiLepTb  = np.concatenate([testTTToSemiLepTb,tree2array(treeTTToSemiLepTb, treeVars, seltest)])
Exemple #43
0
path_tree = '/home/ucl/cp3/fbury/storage/NNAndELLipseOutputTrees/model_'+str(args.model)+'/'

################################################################################
# Input Trees #
################################################################################
for name in glob.glob(path_tree+'*.root'):
    filename = name.replace(path_tree,'')
    num = [int(s) for s in re.findall('\d+',filename )]
    if num[0]!=mH_select or num[1]!=mA_select:
        continue
    break
    f = ROOT.TFile.Open(name)
    t = f.Get("tree")

    sig = tree2array(t,branches=['NN_out','Ell_out','weight'],selection='id==0')
    DYToLL_0J = tree2array(t,branches=['NN_out','Ell_out','weight'],selection='id==1')
    DYToLL_1J = tree2array(t,branches=['NN_out','Ell_out','weight'],selection='id==2')
    DYToLL_2J = tree2array(t,branches=['NN_out','Ell_out','weight'],selection='id==3')
    TT_Other = tree2array(t,branches=['NN_out','Ell_out','weight'],selection='id==4')
    TTTo2L2Nu = tree2array(t,branches=['NN_out','Ell_out','weight'],selection='id==5')

    for cn in cut_NN:
        print ('NN cut : ',cn)
        N_sig = np.sum(sig[sig[:]['NN_out']>cn]['weight'])
        N_DYToLL_0J = np.sum(DYToLL_0J[DYToLL_0J[:]['NN_out']>cn]['weight'])
        N_DYToLL_1J = np.sum(DYToLL_1J[DYToLL_1J[:]['NN_out']>cn]['weight'])
        N_DYToLL_2J = np.sum(DYToLL_2J[DYToLL_2J[:]['NN_out']>cn]['weight'])
        N_TT_Other = np.sum(TT_Other[TT_Other[:]['NN_out']>cn]['weight'])
        N_TTTo2L2Nu = np.sum(TTTo2L2Nu[TTTo2L2Nu[:]['NN_out']>cn]['weight'])
Exemple #44
0
        procP3 = glob.glob(inputPath + "/" + folderName + "_fastsim_p3/" +
                           folderName +
                           "_fastsim_p3_forBDTtraining_OS_central_*.root")
        list = procP1 + procP2 + procP3
    else:
        procP1 = glob.glob(inputPath + "/" + folderName + "_fastsim/" +
                           folderName +
                           "_fastsim_forBDTtraining_OS_central_*.root")
        list = procP1
    print("Date: ", time.asctime(time.localtime(time.time())))
    for ii in range(0, len(list)):  #
        #print (list[ii],inputTree)
        tfile = ROOT.TFile(list[ii])
        tree = tfile.Get(inputTree)
        if tree is not None:
            chunk_arr = tree2array(tree)  #,  start=start, stop = stop)
            chunk_df = pandas.DataFrame(chunk_arr)  #
            chunk_df['key'] = folderName
            chunk_df['target'] = target
            #chunk_df['file']=list[ii].split("_")[10]
            if channel == "2lss_1tau":
                data[
                    "totalWeight"] = data.evtWeight * data.tau_frWeight * data.lep1_frWeight * data.lep2_frWeight
            if channel == "1l_2tau": data["totalWeight"] = data.evtWeight
            data = data.append(chunk_df, ignore_index=True)
        else:
            print("file " + list[ii] + "was empty")
        tfile.Close()
print(data.columns.values.tolist())
n = len(data)
nS = len(data.ix[data.target.values == 0])
Exemple #45
0
		data_chf1 	= root2array(directory, what_tree, branch_names_chfonly1)
		data_chfp1 	= root2array(directory, what_tree, branch_names_chfp1)
		data_tot1	= root2array(directory, what_tree, branch_names_tot1)
		data_noreg1 = root2array(directory, what_tree, branch_names_noreg1)
		
		data_xgb2	= root2array(directory, what_tree, branch_names_xgb2)
		data_chf2 	= root2array(directory, what_tree, branch_names_chfonly2)
		data_chfp2 	= root2array(directory, what_tree, branch_names_chfp2)
		data_tot2	= root2array(directory, what_tree, branch_names_tot2)
		data_noreg2 = root2array(directory, what_tree, branch_names_noreg2)
		
		data_cw		= root2array(directory_cw, what_tree_cw, branch_names_cw) 
		
		'''

        data_xgb1 = tree2array(what_tree, branch_names_xgb1)
        data_chf1 = tree2array(what_tree, branch_names_chfonly1)
        data_chfp1 = tree2array(what_tree, branch_names_chfp1)
        data_tot1 = tree2array(what_tree, branch_names_tot1)
        data_cw = tree2array(what_tree_cw, branch_names_cw1)
        data_noreg1 = tree2array(what_tree, branch_names_noreg1)

        data_xgb2 = tree2array(what_tree, branch_names_xgb2)
        data_chf2 = tree2array(what_tree, branch_names_chfonly2)
        data_chfp2 = tree2array(what_tree, branch_names_chfp2)
        data_tot2 = tree2array(what_tree, branch_names_tot2)
        data_cw = tree2array(what_tree_cw, branch_names_cw2)
        data_noreg2 = tree2array(what_tree, branch_names_noreg2)

        #================LOAD WEIGHT FILES========================
def main():
    ROOT.gROOT.SetBatch(1)
    ROOT.gStyle.SetOptStat(0)
    njet = "all"
    #process = {"sig1lnotTwB1wt1000tminfixed_5K": "sig", "bkg1lnotTwB1wt1000tminfixed_5K": "bkg"}
    #process = {"sig1lnotTwB1wt100_50K": "sig", "bkg1lnotTwB1wt100_50K": "bkg"}
    process = {
        "sig1lnotTwB1wt1000_50K": "sig",
        "bkg1lnotTwB1wt1000_50K": "bkg"
    }
    #process = "sig1lnotTwB1wt1000_5K"
    #process = "bkg1lnotTwB1wt1000_5K"
    #process = "sig1lnotTwB1wt1000_1K"
    #process = "bkg1lnotTwB1wt1000_1K"
    #process = "sig1lnotTwB1wt100_1K"
    #process = "bkg1lnotTwB1wt100_1K"
    #process = "sig"
    #process = "bkg"
    #process = "sigOld"
    #process = "bkgOld"
    #njet = "ge10"
    #njet = "9"
    #njet = "8"
    #njet = "7"
    #njet = "6"
    #njet = "5"
    tlist = {}
    for p in process.keys():
        if (njet == "all"):
            t = ROOT.TChain("nominal_Loose")
            jet_cat = ["ge10", "9", "8", "7", "6", "5"]
            for jetc in jet_cat:
                fj = ROOT.TFile(
                    "/afs/cern.ch/work/s/sosen/ChongbinTop/common-framework/run-offline/test_%s/ljets%sj/ttbar_powpyt8.root"
                    % (p, jetc), 'READ')
                t.Add(
                    "/afs/cern.ch/work/s/sosen/ChongbinTop/common-framework/run-offline/test_%s/ljets%sj/ttbar_powpyt8.root"
                    % (p, jetc))
        else:
            f = ROOT.TFile(
                "/afs/cern.ch/work/s/sosen/ChongbinTop/common-framework/run-offline/test_%s/ljets%sj/ttbar_powpyt8.root"
                % (p, njet), 'READ')
            f.ls()
            t = f.Get("nominal_Loose")
        t.ls()
        tlist[process[p] + "tree"] = t

    #bucket type count
    Stwcount = rnp.tree2array(tlist["sigtree"], branches="twcount")
    Stmincount = rnp.tree2array(tlist["sigtree"], branches="tmincount")
    St0count = rnp.tree2array(tlist["sigtree"], branches="t0count")

    ##
    SmW0 = rnp.tree2array(tlist["sigtree"], branches="mW0")
    SmW1 = rnp.tree2array(tlist["sigtree"], branches="mW1")
    SmBucketPrim0 = rnp.tree2array(tlist["sigtree"], branches="mBucketPrim0")
    SmBucketPrim1 = rnp.tree2array(tlist["sigtree"], branches="mBucketPrim1")
    SmBucketPrim = np.concatenate((SmBucketPrim0, SmBucketPrim1), axis=None)
    Stwmass0 = rnp.tree2array(tlist["sigtree"], branches="twmass0")
    StwPt0 = rnp.tree2array(tlist["sigtree"], branches="twPt0")
    SNaddjets = rnp.tree2array(tlist["sigtree"], branches="Naddjets")

    #bucket type count
    Btwcount = rnp.tree2array(tlist["bkgtree"], branches="twcount")
    Btmincount = rnp.tree2array(tlist["bkgtree"], branches="tmincount")
    Bt0count = rnp.tree2array(tlist["bkgtree"], branches="t0count")

    ##
    BmW0 = rnp.tree2array(tlist["bkgtree"], branches="mW0")
    BmW1 = rnp.tree2array(tlist["bkgtree"], branches="mW1")
    BmBucketPrim0 = rnp.tree2array(tlist["bkgtree"], branches="mBucketPrim0")
    BmBucketPrim1 = rnp.tree2array(tlist["bkgtree"], branches="mBucketPrim1")
    BmBucketPrim = np.concatenate((BmBucketPrim0, BmBucketPrim1), axis=None)
    Btwmass0 = rnp.tree2array(tlist["bkgtree"], branches="twmass0")
    BtwPt0 = rnp.tree2array(tlist["bkgtree"], branches="twPt0")
    BNaddjets = rnp.tree2array(tlist["bkgtree"], branches="Naddjets")

    c0 = ROOT.TCanvas('c', 'c', 800, 600)
    leg0 = ROOT.TLegend(0.65, 0.75, 0.88, 0.88)
    leg0.SetFillColor(0)
    leg0.SetLineColor(0)
    #bucket type count
    ShNaddjets = ROOT.TH1F("signal hNaddjets", "", 21, -0.5, 20.5)
    fill_hist(ShNaddjets, SNaddjets, "additional jets per event", "a.u.")
    ShNaddjets.Scale(1. / (ShNaddjets.Integral()))
    ShNaddjets.SetLineColor(ROOT.kRed)
    BhNaddjets = ROOT.TH1F("bkg hNaddjets", "", 21, -0.5, 20.5)
    fill_hist(BhNaddjets, BNaddjets, "additional jets per event", "a.u.")
    BhNaddjets.Scale(1. / (BhNaddjets.Integral()))
    BhNaddjets.SetLineColor(ROOT.kBlack)
    ShNaddjets.SetMaximum(
        max(ShNaddjets.GetMaximum(), BhNaddjets.GetMaximum()) * 1.1)
    ShNaddjets.Draw("hist")
    leg0.AddEntry(ShNaddjets, 'signal', "L")
    BhNaddjets.Draw("hist same")
    leg0.AddEntry(BhNaddjets, 'ttbar+jets (bkg)', "L")
    leg0.Draw()
    c0.Print("OverlayhNaddjets_%sjetregion.eps" % njet)

    c1 = ROOT.TCanvas('c', 'c', 800, 600)
    leg1 = ROOT.TLegend(0.65, 0.75, 0.88, 0.88)
    leg1.SetFillColor(0)
    leg1.SetLineColor(0)
    #bucket type count
    Shtwcount = ROOT.TH1F("signal htwcount", "", 4, -0.5, 3.5)
    fill_hist(Shtwcount, Stwcount, "tw buckets per event", "a.u.")
    Shtwcount.Scale(1. / (Shtwcount.Integral()))
    Shtwcount.SetLineColor(ROOT.kRed)
    Bhtwcount = ROOT.TH1F("bkg htwcount", "", 4, -0.5, 3.5)
    fill_hist(Bhtwcount, Btwcount, "tw buckets per event", "a.u.")
    Bhtwcount.Scale(1. / (Bhtwcount.Integral()))
    Bhtwcount.SetLineColor(ROOT.kBlack)
    Shtwcount.SetMaximum(
        max(Shtwcount.GetMaximum(), Bhtwcount.GetMaximum()) * 1.1)
    Shtwcount.Draw("hist")
    leg1.AddEntry(Shtwcount, 'signal', "L")
    Bhtwcount.Draw("hist same")
    leg1.AddEntry(Bhtwcount, 'ttbar+jets (bkg)', "L")
    leg1.Draw()
    c1.Print("Overlayhtwcount_%sjetregion.eps" % njet)

    c2 = ROOT.TCanvas('c', 'c', 800, 600)
    leg2 = ROOT.TLegend(0.65, 0.75, 0.88, 0.88)
    leg2.SetFillColor(0)
    leg2.SetLineColor(0)
    #bucket type count
    ShmW0 = ROOT.TH1F("signal hmW0",
                      "Mass of the (possible) W candidate in B1", 150, 0.0001,
                      300)
    fill_hist(ShmW0, SmW0, "Mass (GeV)", "")
    ShmW0.Scale(1. / (ShmW0.Integral()))
    ShmW0.SetLineColor(ROOT.kRed)
    BhmW0 = ROOT.TH1F("bkg hmW0", "Mass of the (possible) W candidate in B1",
                      150, 0.0001, 300)
    fill_hist(BhmW0, BmW0, "Mass (GeV)", "")
    BhmW0.Scale(1. / (BhmW0.Integral()))
    BhmW0.SetLineColor(ROOT.kBlack)
    ShmW0.SetMaximum(max(ShmW0.GetMaximum(), BhmW0.GetMaximum()) * 1.1)
    ShmW0.Draw("hist")
    leg2.AddEntry(ShmW0, 'signal', "L")
    BhmW0.Draw("hist same")
    leg2.AddEntry(BhmW0, 'ttbar+jets (bkg)', "L")
    leg2.Draw()
    c2.Print("OverlayhmW0_%sjetregion.eps" % njet)

    c3 = ROOT.TCanvas('c', 'c', 800, 600)
    leg3 = ROOT.TLegend(0.65, 0.75, 0.88, 0.88)
    leg3.SetFillColor(0)
    leg3.SetLineColor(0)
    #bucket type count
    ShmBucketPrim0 = ROOT.TH1F("signal hmBucketPrim0", "Mass of B1", 150, 0,
                               300)
    fill_hist(ShmBucketPrim0, SmBucketPrim0, "Mass (GeV)", "")
    ShmBucketPrim0.Scale(1. / (ShmBucketPrim0.Integral()))
    ShmBucketPrim0.SetLineColor(ROOT.kRed)
    BhmBucketPrim0 = ROOT.TH1F("bkg hmBucketPrim0", "Mass of B1", 150, 0, 300)
    fill_hist(BhmBucketPrim0, BmBucketPrim0, "Mass (GeV)", "")
    BhmBucketPrim0.Scale(1. / (BhmBucketPrim0.Integral()))
    BhmBucketPrim0.SetLineColor(ROOT.kBlack)
    ShmBucketPrim0.SetMaximum(
        max(ShmBucketPrim0.GetMaximum(), BhmBucketPrim0.GetMaximum()) * 1.1)
    ShmBucketPrim0.Draw("hist")
    leg3.AddEntry(ShmBucketPrim0, 'signal', "L")
    BhmBucketPrim0.Draw("hist same")
    leg3.AddEntry(BhmBucketPrim0, 'ttbar+jets (bkg)', "L")
    leg3.Draw()
    c3.Print("OverlayhmBucketPrim0_%sjetregion.eps" % njet)

    c4 = ROOT.TCanvas('c', 'c', 800, 600)
    leg4 = ROOT.TLegend(0.65, 0.75, 0.88, 0.88)
    leg4.SetFillColor(0)
    leg4.SetLineColor(0)
    #bucket type count
    ShmBucketPrim1 = ROOT.TH1F("signal hmBucketPrim1", "Mass of B2", 150, 0,
                               300)
    fill_hist(ShmBucketPrim1, SmBucketPrim1, "Mass (GeV)", "")
    ShmBucketPrim1.Scale(1. / (ShmBucketPrim1.Integral()))
    ShmBucketPrim1.SetLineColor(ROOT.kRed)
    BhmBucketPrim1 = ROOT.TH1F("bkg hmBucketPrim1", "Mass of B2", 150, 0, 300)
    fill_hist(BhmBucketPrim1, BmBucketPrim1, "Mass (GeV)", "")
    BhmBucketPrim1.Scale(1. / (BhmBucketPrim1.Integral()))
    BhmBucketPrim1.SetLineColor(ROOT.kBlack)
    ShmBucketPrim1.SetMaximum(
        max(ShmBucketPrim1.GetMaximum(), BhmBucketPrim1.GetMaximum()) * 1.1)
    ShmBucketPrim1.Draw("hist")
    leg4.AddEntry(ShmBucketPrim1, 'signal', "L")
    BhmBucketPrim1.Draw("hist same")
    leg4.AddEntry(BhmBucketPrim1, 'ttbar+jets (bkg)', "L")
    leg4.Draw()
    c4.Print("OverlayhmBucketPrim1_%sjetregion.eps" % njet)
Exemple #47
0
def do_cut(did, files, supercuts, weights, tree_name, output_directory,
           eventWeightBranch, doNumpy, pids):

    position = -1
    if pids is not None:
        # handle pid registration
        if os.getpid() not in pids: pids[np.argmax(pids == 0)] = os.getpid()
        # this gives us the position of this particular process in our list of processes
        position = np.where(pids == os.getpid())[0][0]

    start = clock()
    try:
        # load up the tree for the files
        tree = get_ttree(tree_name, files, eventWeightBranch)
        # if using numpy optimization, load the tree as a numpy array to apply_cuts on
        if doNumpy:
            # this part is tricky, a user might specify multiple branches
            #   in their selection string, so we will remove non-alphanumeric characters (underscores are safe)
            #   and remove anything else that is an empty string (hence the filter)
            #   and then flatten the entire list, removing duplicate branch names
            '''
        totalSelections = []
        for supercut in supercuts:
          selection = supercut['selections']
          # filter out non-alphanumeric
          selection = p.sub(' ', selection.format("-", "-", "-", "-", "-", "-", "-", "-", "-", "-"))
          # split on spaces, since we substituted non alphanumeric with spaces
          selections = selection.split(' ')
          # remove empty elements
          filter(None, selections)
          totalSelections.append(selections)

        # flatten the thing
        totalSelections = itertools.chain.from_iterable(totalSelections)
        # remove duplicates
        totalSelections = list(set(totalSelections))
      '''
            branchesSpecified = list(
                set(
                    itertools.chain.from_iterable(
                        selection_to_branches(supercut['selections'], tree)
                        for supercut in supercuts)))
            eventWeightBranchesSpecified = list(
                set(selection_to_branches(eventWeightBranch, tree)))

            # get actual list of branches in the file
            availableBranches = tree_get_branches(
                tree, eventWeightBranchesSpecified)

            # remove anything that doesn't exist
            branchesToUse = [
                branch for branch in branchesSpecified
                if branch in availableBranches
            ]
            branchesSkipped = list(set(branchesSpecified) - set(branchesToUse))
            if branchesSkipped:
                logger.info("The following branches have been skipped...")
                for branch in branchesSkipped:
                    logger.info("\t{0:s}".format(branch))
            tree = rnp.tree2array(tree,
                                  branches=eventWeightBranchesSpecified +
                                  branchesToUse)

        # get the scale factor
        sample_scaleFactor = get_scaleFactor(weights, did)

        # build the containing canvas for all histograms drawn in `apply_selection`
        canvas = ROOT.TCanvas('test{0:s}'.format(did), 'test{0:s}'.format(did),
                              200, 10, 100, 100)

        # iterate over the cuts available
        cuts = {}
        for cut in tqdm(get_cut(copy.deepcopy(supercuts)),
                        desc='Working on DID {0:s}'.format(did),
                        total=get_n_cuts(supercuts),
                        disable=(position == -1),
                        position=position + 1,
                        leave=True,
                        mininterval=5,
                        maxinterval=10,
                        unit='cuts',
                        dynamic_ncols=True):
            cut_hash = get_cut_hash(cut)
            rawEvents, weightedEvents = apply_cuts(tree,
                                                   cut,
                                                   eventWeightBranch,
                                                   doNumpy,
                                                   canvas=canvas)
            scaledEvents = weightedEvents * sample_scaleFactor
            cuts[cut_hash] = {
                'raw': rawEvents,
                'weighted': weightedEvents,
                'scaled': scaledEvents
            }
        logger.info("Applied {0:d} cuts".format(len(cuts)))
        with open('{0:s}/{1:s}.json'.format(output_directory, did), 'w+') as f:
            f.write(json.dumps(cuts, sort_keys=True, indent=4))
            result = True
        del canvas
    except:
        logger.exception("Caught an error - skipping {0:s}".format(did))
        result = False
    end = clock()
    return (result, end - start)
Exemple #48
0
def main():
    treeFolder = "analysis/allEvents/"
    treeName = "massData"
    wrMassBranch = "WRMass"
    SRmassBranch = "superResolvedNNMass"
    RmassBranch = "resolvedNNMass"
    correctMassBranch = "correctNMass"
    incorrectMassBranch = "incorrectNMass"
    leadMassBranch = "leadNMass"
    subleadMassBranch = "subNMass"
    weightBranch = "weight"

    #LOADING THE TTREE
    fileNames = ["TTTo2L2Nu.root"]
    crossSections = [88.29]
    counts2 = [79140880]

    #numpy arrays

    # make new root file with new tree
    file = ROOT.TFile("fullttbar.root", 'recreate')
    tree = ROOT.TTree("fullttbar", "fullttbar")

    # create 1 dimensional float arrays as fill variables, in this way the float
    # array serves as a pointer which can be passed to the branch

    # create some random numbers, assign them into the fill variables and call Fill()

    WRMass = np.zeros(1, dtype=float)
    resolvedNNMass = np.zeros(1, dtype=float)
    superResolvedNNMass = np.zeros(1, dtype=float)
    correctNMass = np.zeros(1, dtype=float)
    treeWeight = np.zeros(1, dtype=float)
    treeWeight2 = np.zeros(1, dtype=float)
    incorrectNMass = np.zeros(1, dtype=float)
    leadNMass = np.zeros(1, dtype=float)
    subNMass = np.zeros(1, dtype=float)

    tree.Branch("WRMass", WRMass, "WRMass/D")
    tree.Branch("resolvedNNMass", resolvedNNMass, "resolvedNNMass/D")
    tree.Branch("superResolvedNNMass", superResolvedNNMass,
                "superResolvedNNMass/D")
    tree.Branch("correctNMass", correctNMass, "correctNMass/D")
    tree.Branch("incorrectNMass", incorrectNMass, "incorrectNMass/D")
    tree.Branch("leadNMass", leadNMass, "leadNMass/D")
    tree.Branch("subNMass", subNMass, "subNMass/D")
    tree.Branch("weight", treeWeight, "weight/D")
    tree.Branch("weight2", treeWeight2, "weight2/D")

    for fileName, xSec, count2 in zip(fileNames, crossSections, counts2):
        rootfile = ROOT.TFile.Open(fileName, "read")

        massTree = rootfile.Get(treeFolder + treeName)

        countHisto = rootfile.Get(treeFolder + "countHisto")
        counts = countHisto.GetBinContent(1)
        print(counts)

        WRmassArray = tree2array(massTree, branches=wrMassBranch)
        SRmassArray = tree2array(massTree, branches=SRmassBranch)
        RmassArray = tree2array(massTree, branches=RmassBranch)
        correctMassArray = tree2array(massTree, branches=correctMassBranch)
        incorrectMassArray = tree2array(massTree, branches=incorrectMassBranch)
        leadMassArray = tree2array(massTree, branches=leadMassBranch)
        subleadMassArray = tree2array(massTree, branches=subleadMassBranch)
        weightArray = tree2array(massTree, branches=weightBranch)
        weightArray2 = weightArray * xSec / count2
        weightArray = weightArray * xSec / counts

        print(WRmassArray.shape)
        print(WRmassArray.shape[0])
        for i in range(WRmassArray.shape[0]):
            WRMass[0] = WRmassArray[i]
            resolvedNNMass[0] = RmassArray[i]
            superResolvedNNMass[0] = SRmassArray[i]
            correctNMass[0] = correctMassArray[i]
            treeWeight[0] = weightArray[i]
            treeWeight2[0] = weightArray2[i]
            incorrectNMass[0] = incorrectMassArray[i]
            leadNMass[0] = leadMassArray[i]
            subNMass[0] = subleadMassArray[i]
            tree.Fill()

    # write the tree into the output file and close the file
    file.Write()
    file.Close()
Exemple #49
0
def test_tree2rec():
    chain = TChain('tree')
    chain.Add(load('single1.root'))
    check_single(rnp.tree2array(chain))
for name in glob.glob(INPUT_FOLDER + '*.root'):
    filename = name.replace(INPUT_FOLDER, '')
    print('Opening file : ', filename)

    if filename.startswith('HToZATo2L2B'):  # Signal
        print('\t-> Signal')
        Sig = True  #Signal case
    else:  # Background
        print('\t-> Background')
        Sig = False  #Background case

    f = ROOT.TFile.Open(name)
    t = f.Get("tree")

    selection = 'met_pt<80 && ll_M>70 && ll_M<110'
    jj_M = np.asarray(tree2array(t, branches='jj_M', selection=selection))
    lljj_M = np.asarray(tree2array(t, branches='lljj_M', selection=selection))
    MEM_TT = np.asarray(
        tree2array(t, branches='weight_TT', selection=selection))
    MEM_DY = np.asarray(
        tree2array(t, branches='weight_DY', selection=selection))
    total_weight = np.asarray(
        tree2array(t, branches='total_weight', selection=selection))
    N = jj_M.shape[0]
    if Sig:  #Signal
        # Extract mA, mH generated from file title
        num = [int(s) for s in re.findall('\d+', filename)]
        print('\tmH = ', num[2], ', mA = ', num[3])
        mH = np.ones(N) * num[2]
        mA = np.ones(N) * num[3]
Exemple #51
0
def test_empty_tree():
    from array import array
    tree = TTree('tree', 'tree')
    d = array('d', [0.])
    tree.Branch('double', d, 'double/D')
    rnp.tree2array(tree)
Exemple #52
0
from __future__ import print_function
from root_numpy import root2array, tree2array
import ROOT

#Get Data
rfile = ROOT.TFile("TreeFile.root")
intree = rfile.Get('Tree')
intree.Print()
# and convert the TTree into an array
array = tree2array(intree, branches=["Gen", "Reco", "data"])
array.dtype.names = ('reco', "gen", "data")

#Plot Data
import matplotlib.pyplot as plt
from matplotlib.colors import LogNorm
import numpy as np
import pandas as pd

df = pd.DataFrame(array)

NBins = 10
xmin = 0
xmax = 1000
content_reco, bin, patches = plt.hist(df['reco'], bins=NBins)
plt.ylabel('# events')
plt.xlabel("reco")
# plt.show()
plt.savefig("plots/reco.pdf")

content_gen, bin, patches = plt.hist(df['gen'], bins=NBins)
print(content_gen)
Exemple #53
0
def efficiency(year):
    import numpy as np
    from root_numpy import tree2array, fill_hist
    from aliases import AK8veto, electronVeto, muonVeto
    genPoints = [
        1800, 2000, 2500, 3000, 3500, 4000, 4500, 5000, 5500, 6000, 7000, 8000
    ]
    eff = {}
    vetoes = {"AK8": AK8veto, "electron": electronVeto, "muon": muonVeto}
    VETO = "AK8"  ##could change the veto to investigate here
    if SEPARATE: eff_add = {}

    #channels = ['none', 'qq', 'bq', 'bb', 'mumu']
    channels = ['qq', 'bq', 'bb', 'mumu']

    for channel in channels:
        treeSign = {}
        ngenSign = {}
        nevtSign = {}
        eff[channel] = TGraphErrors()
        if SEPARATE:
            nevtSign_add = {}
            eff_add[channel] = TGraphErrors()

        for i, m in enumerate(genPoints):
            signName = "ZpBB_M" + str(m)
            ngenSign[m] = 0.
            nevtSign[m] = 0.
            if SEPARATE: nevtSign_add[m] = 0.
            for j, ss in enumerate(sample[signName]['files']):
                if year == "run2" or year in ss:
                    sfile = TFile(NTUPLEDIR + ss + ".root", "READ")
                    ngenSign[m] += sfile.Get("Events").GetBinContent(1)
                    treeSign[m] = sfile.Get("tree")
                    if BTAGGING == 'semimedium':
                        #if SEPARATE:
                        #    temp_array = tree2array(treeSign[m], branches='BTagAK4Weight_deepJet', selection=aliasSM[channel].replace(vetoes[VETO], ""))
                        #else:
                        temp_array = tree2array(
                            treeSign[m],
                            branches='BTagAK4Weight_deepJet',
                            selection=aliasSM[channel])
                        temp_hist = TH1F('pass', 'pass', 1, 0, 1)
                        fill_hist(temp_hist,
                                  np.zeros(len(temp_array)),
                                  weights=temp_array)
                        nevtSign[m] += temp_hist.GetBinContent(1)
                        temp_array = None
                        temp_hist.Reset()
                        if SEPARATE:
                            temp_array = tree2array(
                                treeSign[m],
                                branches='BTagAK4Weight_deepJet',
                                selection=aliasSM[channel].replace(
                                    vetoes[VETO], ""))
                            temp_hist = TH1F('pass', 'pass', 1, 0, 1)
                            fill_hist(temp_hist,
                                      np.zeros(len(temp_array)),
                                      weights=temp_array)
                            nevtSign[m] += temp_hist.GetBinContent(1)
                            temp_array = None
                            temp_hist.Reset()
                    else:
                        #if SEPARATE:
                        #    temp_array = tree2array(treeSign[m], branches='BTagAK4Weight_deepJet', selection=alias[channel].format(WP=working_points[BTAGGING]).replace(vetoes[VETO], ""))
                        #else:
                        temp_array = tree2array(
                            treeSign[m],
                            branches='BTagAK4Weight_deepJet',
                            selection=alias[channel].format(
                                WP=working_points[BTAGGING]))
                        temp_hist = TH1F('pass', 'pass', 1, 0, 1)
                        fill_hist(temp_hist,
                                  np.zeros(len(temp_array)),
                                  weights=temp_array)
                        nevtSign[m] += temp_hist.GetBinContent(1)
                        temp_array = None
                        temp_hist.Reset()
                        if SEPARATE:
                            temp_array = tree2array(
                                treeSign[m],
                                branches='BTagAK4Weight_deepJet',
                                selection=alias[channel].format(
                                    WP=working_points[BTAGGING]).replace(
                                        vetoes[VETO], ""))
                            temp_hist = TH1F('pass', 'pass', 1, 0, 1)
                            fill_hist(temp_hist,
                                      np.zeros(len(temp_array)),
                                      weights=temp_array)
                            nevtSign_add[m] += temp_hist.GetBinContent(1)
                            temp_array = None
                            temp_hist.Reset()

                    sfile.Close()
                    print channel, ss, ":", nevtSign[m], "/", ngenSign[
                        m], "=", nevtSign[m] / ngenSign[m]
            if nevtSign[m] == 0 or ngenSign[m] < 0: continue
            n = eff[channel].GetN()
            eff[channel].SetPoint(n, m, nevtSign[m] / ngenSign[m])
            eff[channel].SetPointError(n, 0,
                                       math.sqrt(nevtSign[m]) / ngenSign[m])
            if SEPARATE:
                eff_add[channel].SetPoint(n, m, nevtSign_add[m] / ngenSign[m])
                eff_add[channel].SetPointError(
                    n, 0,
                    math.sqrt(nevtSign_add[m]) / ngenSign[m])

        eff[channel].SetMarkerColor(color[channel])
        eff[channel].SetMarkerStyle(20)
        eff[channel].SetLineColor(color[channel])
        eff[channel].SetLineWidth(2)

        if SEPARATE:
            eff_add[channel].SetMarkerColor(color[channel] +
                                            color_shift[channel])
            eff_add[channel].SetMarkerStyle(21)
            eff_add[channel].SetLineColor(color[channel] +
                                          color_shift[channel])
            eff_add[channel].SetLineWidth(2)
            eff_add[channel].SetLineStyle(7)

        if channel == 'qq' or channel == 'none': eff[channel].SetLineStyle(3)

    n = max([eff[x].GetN() for x in channels])
    maxEff = 0.

    # Total efficiency
    eff["sum"] = TGraphErrors(n)
    eff["sum"].SetMarkerStyle(24)
    eff["sum"].SetMarkerColor(1)
    eff["sum"].SetLineWidth(2)

    if SEPARATE:
        eff_add["sum"] = TGraphErrors(n)
        eff_add["sum"].SetMarkerStyle(25)
        eff_add["sum"].SetMarkerColor(1)
        eff_add["sum"].SetLineWidth(2)
        eff_add["sum"].SetLineStyle(7)

    for i in range(n):
        tot, mass = 0., 0.
        if SEPARATE: tot_add = 0.
        for channel in channels:
            if channel == 'qq' or channel == 'none':
                continue  #not sure if I should include 2mu category in sum
            if eff[channel].GetN() > i:
                tot += eff[channel].GetY()[i]
                if SEPARATE: tot_add += eff_add[channel].GetY()[i]
                mass = eff[channel].GetX()[i]
                if tot > maxEff: maxEff = tot
        eff["sum"].SetPoint(i, mass, tot)
        if SEPARATE: eff_add["sum"].SetPoint(i, mass, tot_add)

    if SEPARATE:
        leg = TLegend(0.15, 0.50, 0.95, 0.8)
    else:
        leg = TLegend(0.15, 0.60, 0.95, 0.8)
    leg.SetBorderSize(0)
    leg.SetFillStyle(0)  #1001
    leg.SetFillColor(0)
    leg.SetNColumns(len(channels) / 4)
    for i, channel in enumerate(channels):
        if eff[channel].GetN() > 0:
            leg.AddEntry(eff[channel], getChannel(channel), "pl")
            if SEPARATE:
                leg.AddEntry(eff_add[channel],
                             getChannel(channel) + " no " + VETO + "-veto",
                             "pl")
    if SEPARATE:
        leg.SetY1(leg.GetY2() -
                  len([x for x in channels if eff[x].GetN() > 0]) * 0.045)
    else:
        leg.SetY1(leg.GetY2() -
                  len([x for x in channels if eff[x].GetN() > 0]) / 2. * 0.045)
    if SEPARATE:
        legS = TLegend(0.5, 0.8 - 0.045, 0.9, 0.85)
    else:
        legS = TLegend(0.5, 0.85 - 0.045, 0.9, 0.85)
    legS.SetBorderSize(0)
    legS.SetFillStyle(0)  #1001
    legS.SetFillColor(0)
    legS.AddEntry(eff['sum'],
                  "Total b tag efficiency (1 b tag + 2 b tag + 2 #mu)", "pl")
    if SEPARATE:
        legS.AddEntry(eff_add['sum'],
                      "Total b tag efficiency, no " + VETO + "-veto", "pl")
    c1 = TCanvas("c1", "Signal Efficiency", 1200, 800)
    c1.cd(1)
    eff['sum'].Draw("APL")
    if SEPARATE: eff_add['sum'].Draw("SAME, PL")
    for i, channel in enumerate(channels):
        eff[channel].Draw("SAME, PL")
        if SEPARATE: eff_add[channel].Draw("SAME, PL")
    leg.Draw()
    legS.Draw()
    setHistStyle(eff["sum"], 1.1)
    eff["sum"].SetTitle(";m_{Z'} (GeV);Acceptance #times efficiency")
    eff["sum"].SetMinimum(0.)
    eff["sum"].SetMaximum(max(1., maxEff * 1.5))  #0.65
    if SEPARATE:
        eff_add["sum"].SetTitle(";m_{Z'} (GeV);Acceptance #times efficiency")
        eff_add["sum"].SetMinimum(0.)
        eff_add["sum"].SetMaximum(1.)

    eff["sum"].GetXaxis().SetTitleSize(0.045)
    eff["sum"].GetYaxis().SetTitleSize(0.045)
    eff["sum"].GetYaxis().SetTitleOffset(1.1)
    eff["sum"].GetXaxis().SetTitleOffset(1.05)
    eff["sum"].GetXaxis().SetRangeUser(1500, 8000)
    c1.SetTopMargin(0.05)
    #drawCMS(-1, "Simulation Preliminary", year=year) #Preliminary
    #drawCMS(-1, "Work in Progress", year=year, suppressCMS=True)
    drawCMS(-1, "", year=year, suppressCMS=True)
    drawAnalysis("")

    if SEPARATE:
        c1.Print("plots/Efficiency/" + year + "_" + BTAGGING + "_no" + VETO +
                 "veto.pdf")
        c1.Print("plots/Efficiency/" + year + "_" + BTAGGING + "_no" + VETO +
                 "veto.png")
    else:
        c1.Print("plots/Efficiency/" + year + "_" + BTAGGING + ".pdf")
        c1.Print("plots/Efficiency/" + year + "_" + BTAGGING + ".png")

    # print
    print "category",
    for m in range(0, eff["sum"].GetN()):
        print " & %d" % int(eff["sum"].GetX()[m]),
    print "\\\\", "\n\\hline"
    for i, channel in enumerate(channels + ["sum"]):
        if channel == 'sum': print "\\hline"
        print getChannel(channel).replace("high ", "H").replace(
            "low ", "L").replace("purity", "P").replace("b-tag", ""),
        for m in range(0, eff[channel].GetN()):
            print "& %.1f" % (100. * eff[channel].GetY()[m]),
        print "\\\\"
Exemple #54
0
from matplotlib.colors import LogNorm
from random import randint
seed = 10
np.random.seed(seed) #init for reproducibilty

##Input
maxtracks_read = 100 # max number of tracks to read
maxtracks_train = 25 # max number of tracks to use in the training 

filename = 'ntuHevjin.root'
file=TFile(filename, 'R')
tree=file.Get('PDsecondTree')

evtcuts = '((evtNumber % 10) < 8)' # leave out 20% of events for testing

vInput=root_numpy.tree2array(tree, branches=['trkPt', 'trkEta', 'trkPhi', 'trkDxy', 'trkDz', 'trkIsInJet', 'trkIsHighPurity', 'trkCharge'], selection=evtcuts)
vInput=root_numpy.rec2array(vInput)

nspec = 2 # numebr of feature not used in the training
nfeat = len(vInput[0]) - nspec

vPt = vInput[:,0]
vEta = vInput[:,1]
vPhi = vInput[:,2]
vDxy = vInput[:,3]
vDz = vInput[:,4]
vtrkIsInJet = vInput[:,-3] 
vtrkIsHighPurity = vInput[:,-2]
vQ = vInput[:,-1]

##Shape formatting and zero padding
Exemple #55
0
def test_branch_DNE():
    chain = TChain('tree')
    chain.Add(load('single1.root'))
    rnp.tree2array(chain, branches=['my_net_worth'])
Exemple #56
0
def Tree2Pandas(input_file,
                variables,
                weight=None,
                cut=None,
                xsec=None,
                event_weight_sum=None,
                luminosity=None,
                paramFun=None,
                tree_name='tree',
                start=None,
                stop=None,
                additional_columns={}):
    """
    Convert a ROOT TTree to a pandas DF
    """
    variables = copy.copy([
        var for var in variables if not var.startswith("$")
    ])  # Otherwise will add the weight and have a duplicate branch
    # Check for repetitions in variables -> makes root_numpy crash #
    repeated_var = [
        item for item, count in collections.Counter(variables).items()
        if count > 1
    ]
    if len(repeated_var) != 0:
        logging.critical('There are repeated variables')
        for var in repeated_var:
            logging.critical('... %s' % var)
        raise RuntimeError("Repeated arguments for importing data")

    # Get root tree, check if exists first #
    if not os.path.exists(input_file):
        logging.warning("File %s does not exist" % input_file)
        print("File %s does not exist" % input_file)
        return None
    file_handle = TFile.Open(input_file)
    if not file_handle.GetListOfKeys().Contains(tree_name):
        #logging.warning("Could not find tree %s in %s"%(tree_name,input_file))
        logging.debug("Could not find tree %s in %s" % (tree_name, input_file))
        return None
    tree = file_handle.Get(tree_name)
    N = tree.GetEntries()
    logging.debug('\tNumber of events : %d' % N)

    # Read the tree and convert it to a numpy structured array
    if weight is not None:
        variables += [weight]
    try:
        data = tree2array(tree,
                          branches=variables,
                          selection=cut,
                          start=start,
                          stop=stop)
    except ValueError as e:
        logging.error("Issue with file {}".format(input_file))
        raise e

    # Convert to pandas dataframe #
    df = pd.DataFrame(data)

    # Reweighting #
    relative_weight = 1
    if weight is not None and xsec is not None and event_weight_sum is not None:
        if luminosity is None:
            luminosity = 1
        relative_weight = xsec * luminosity / event_weight_sum
        logging.debug('\t\tReweighting requested')
        logging.debug('\t\t\tCross section : %0.5f' % xsec)
        logging.debug('\t\t\tEvent weight sum : %0.2f' % event_weight_sum)
        logging.debug('\t\t\tLuminosity : %0.2f' % luminosity)
        logging.debug('\t\tRelative weight %0.3e' % relative_weight)
        df['cross_section'] = np.ones(df.shape[0]) * xsec
        df['luminosity'] = np.ones(df.shape[0]) * luminosity
        df['event_weight_sum'] = np.ones(df.shape[0]) * event_weight_sum
    else:
        df['cross_section'] = np.ones(df.shape[0])
        df['luminosity'] = np.ones(df.shape[0])
        df['event_weight_sum'] = np.ones(df.shape[0])
        if df.shape[0] != 0:
            relative_weight /= df.shape[0]

    if weight is not None:
        df['event_weight'] = df[weight] * relative_weight
    else:
        df['event_weight'] = np.ones(df.shape[0])

    if paramFun is not None:
        assert callable(paramFun)
        param = paramFun(os.path.basename(input_file))
        if param is None:
            param = 0
        df['param'] = np.ones(df.shape[0]) * param

    # Register additional columns #
    if len(additional_columns.keys()) != 0:
        for key, val in additional_columns.items():
            df[key] = pd.Series([val] * df.shape[0])

    # Slice printout #
    if start is not None or stop is not None:
        ni = start if start is not None else 0
        nf = stop if stop is not None else N
        logging.debug(
            "Reading from {} to {} in input tree (over {} entries)".format(
                ni, nf, N))
    file_handle.Close()

    return df
Exemple #57
0
def test_PyROOT():
    f = TFile(load('single1.root'))
    tree = f.Get('tree')
    rnp.tree2array(tree)
weight__background = []
array__signal = []
array__background = []

name__signal = Options['SignalTree']  #wt_DR_nominal wt_DS
name__background = Options['BackgroundTree']  #tt_nominal tt_radHi
#Need to add lines to make sure I can use may files
print name__signal
#for filename in input:

for name in name__signal:
    if file.Get(name) != None:
        print 'name', name
        tree__signal.append(file.Get(name))
        event__signal.append(
            tree2array(tree__signal[-1], branches=variableList, selection='1'))
        weight__signal.append(
            tree2array(tree__signal[-1],
                       branches=[Options['EventWeight']],
                       selection='1'))
        #     weight__signal.append(tree2array(tree__signal[-1], branches="EventWeight", selection='1'))
        array__signal.append([
            list(elem) for elem in zip(event__signal[-1], weight__signal[-1])
        ])
for name in name__background:
    if file.Get(name) != None:
        tree__background.append(file.Get(name))
        event__background.append(
            tree2array(tree__background[-1],
                       branches=variableList,
                       selection='1'))
def reweight( sample, puType = 0  ):
    if sample.path is None:
        print '[puReweighter]: Need to know the MC tree (option --mcTree or sample.path)'
        sys.exit(1)
    

### create a tree with only weights that will be used as friend tree for reweighting different lumi periods
    print 'Opening mc file: ', sample.path[0]
    fmc = rt.TFile(sample.path[0],'read')
    tmc = None
    if sample.tnpTree is None:
        dirs = fmc.GetListOfKeys()
        for d in dirs:
            if (d.GetName() == "sampleInfo"): continue
            tmc = fmc.Get("%s/fitter_tree" % d.GetName())
    else:
        tmc = fmc.Get(sample.tnpTree)
    

#### can reweight vs nVtx but better to reweight v truePU
    puMCnVtx = []
    puMCrho = []
    if   puType == 1 :
        hmc   = rt.TH1F('hMC_nPV'  ,'MC nPV'  , 75,-0.5,74.5)
        tmc.Draw('event_nPV>>hMC_nPV','','goff')
        hmc.Scale(1/hmc.Integral())
        for ib in range(1,hmc.GetNbinsX()+1):
            puMCnVtx.append( hmc.GetBinContent(ib) )
        print 'len nvtxMC = ',len(puMCnVtx)

    elif puType == 2 :
        hmc   = rt.TH1F('hMC_rho'  ,'MC #rho'  , 75,-0.5,74.5)
        tmc.Draw('rho>>hMC_rho','','goff')
        hmc.Scale(1/hmc.Integral())
        for ib in range(1,hmc.GetNbinsX()+1):
            puMCrho.append( hmc.GetBinContent(ib) )
        print 'len rhoMC = ',len(puMCrho)
    

    puDataDist = {}
    puDataArray= {}
    weights = {}
    epochKeys = puDataEpoch.keys()
    if puType == 1  : epochKeys = nVtxDataEpoch.keys()
    if puType == 2  : epochKeys = rhoDataEpoch.keys()
 
    for pu in epochKeys:
        fpu = None
        if   puType == 1 : fpu = rt.TFile(nVtxDataEpoch[pu],'read')
        elif puType == 2 : fpu = rt.TFile(rhoDataEpoch[pu],'read')
        else             : fpu = rt.TFile(puDataEpoch[pu],'read')
        puDataDist[pu] = fpu.Get('pileup').Clone('puHist_%s' % pu)
        puDataDist[pu].Scale(1./puDataDist[pu].Integral())
        puDataDist[pu].SetDirectory(0)
        puDataArray[pu] = []
        for ipu in range(len(puMC[puMCscenario])):
            ibin_pu  = puDataDist[pu].GetXaxis().FindBin(ipu+0.00001)
            puDataArray[pu].append(puDataDist[pu].GetBinContent(ibin_pu))
        print 'puData[%s] length = %d' % (pu,len(puDataArray[pu]))
        fpu.Close()
        weights[pu] = []

    mcEvts = tree2array( tmc, branches = ['weight','truePU','event_nPV','rho'] )


    pumc = puMC[puMCscenario]
    if   puType == 1:  pumc = puMCnVtx
    elif puType == 2:  pumc = puMCrho
    else            :  pumc = puMC[puMCscenario]

    puMax = len(pumc)
    print '-> nEvtsTot ', len(mcEvts)
    for ievt in xrange(len(mcEvts)):
        if ievt%1000000 == 0 :            print 'iEvt:',ievt
        evt = mcEvts[ievt]
        for pu in epochKeys:
            pum = -1
            pud = -1
            if puType == 1 and evt['event_nPV'] < puMax:
                pud = puDataArray[pu][evt['event_nPV']]
                pum = pumc[evt['event_nPV']]
            if puType == 2 and int(evt['rho']) < puMax:
                pud = puDataArray[pu][int(evt['rho'])]
                pum = pumc[int(evt['rho'])]
            elif puType == 0:
                pud = puDataArray[pu][evt['truePU']] 
                pum = pumc[evt['truePU']]
            puw = 1
            if pum > 0: 
                puw  = pud/pum

            if evt['weight'] > 0 : totw = +puw
            else                 : totw = -puw
            weights[pu].append( ( puw,totw) )

    newFile    = rt.TFile( sample.puTree, 'recreate')

    for pu in epochKeys:
        treeWeight = rt.TTree('weights_%s'%pu,'tree with weights')
        wpuarray = np.array(weights[pu],dtype=[('PUweight',float),('totWeight',float)])
        array2tree( wpuarray, tree = treeWeight )
        treeWeight.Write()

    newFile.Close()    
    fmc.Close()
from rootpy.io import root_open
from root_numpy import tree2array
import numpy as np
import math as m
import pprint
from shapely.geometry import Polygon
import waferGeometry

f = root_open("test_triggergeom.root")
cells_tree = f.Get("hgcaltriggergeomtester/TreeCells")
TC_tree = f.Get("hgcaltriggergeomtester/TreeTriggerCells")

cells_wafer_info = tree2array(cells_tree,
                              branches=[
                                  'id', 'wafertype', 'wafer', 'layer',
                                  'subdet', 'zside', 'x', 'y', 'cell',
                                  'waferrow', 'wafercolumn'
                              ])
cells_tc_info = tree2array(TC_tree,
                           branches=[
                               'triggercell', 'c_id', 'c_cell', 'wafer',
                               'layer', 'subdet', 'zside'
                           ])

f.close()


#########################################################
## FUNCTIONS ##
def ExtractMappingCoordinates(koordx, koordy, d, x0, y0):
    distx = koordx - x0