Exemple #1
0
def test_multicompression_4(tmp_path):
    newfile = os.path.join(tmp_path, "newfile.root")

    branch1 = np.arange(100)
    branch2 = 1.1 * np.arange(100)

    with uproot.recreate(newfile, compression=uproot.ZLIB(5)) as fout:
        fout.mktree("tree", {"branch1": branch1.dtype, "branch2": branch2.dtype})
        fout["tree"].extend({"branch1": branch1, "branch2": branch2})

    with uproot.open(newfile) as fin:
        assert fin["tree/branch1"].array(library="np").tolist() == branch1.tolist()
        assert fin["tree/branch2"].array(library="np").tolist() == branch2.tolist()
        assert fin["tree/branch1"].compression == uproot.ZLIB(5)
        assert fin["tree/branch2"].compression == uproot.ZLIB(5)
        assert fin["tree/branch1"].compressed_bytes < 874
        assert fin["tree/branch2"].compressed_bytes < 874
        assert fin["tree/branch1"].uncompressed_bytes == 874
        assert fin["tree/branch2"].uncompressed_bytes == 874

    f3 = ROOT.TFile(newfile)
    t3 = f3.Get("tree")
    assert [x.branch1 for x in t3] == branch1.tolist()
    assert [x.branch2 for x in t3] == branch2.tolist()
    f3.Close()
Exemple #2
0
def test_compresschange(tmp_path):
    filename = join(str(tmp_path), "example.root")

    with uproot.recreate(filename, compression=uproot.ZLIB(2)) as f:
        f.compression = uproot.ZLIB(3)
        f["hello"] = "a"*2000

    f = ROOT.TFile.Open(filename)
    assert f.GetCompressionAlgorithm() == uproot.const.kZLIB
    assert f.GetCompressionLevel() == 3
Exemple #3
0
def test_compressed_TObjString(tmp_path):
    filename = join(str(tmp_path), "example.root")

    with uproot.recreate(filename, compression=uproot.ZLIB(1)) as f:
        f["hello"] = "a"*2000

    f = ROOT.TFile.Open(filename)
    assert str(f.Get("hello")) == "a"*2000
    f.Close()
Exemple #4
0
def make_trees(args):



    filelists = files_by_dataset(args.files)
    # The output for each dataset will be written into a separate file
    for dataset, files in filelists.items():
        # Find region and branch names
        datatypes = {}
        tree_by_variable = {}
        variables = []
        regions = []

        # Scout out what branches there are
        for fname in files:
            acc = load(fname)
            
            treenames = [x for x in map(str,acc.keys()) if x.startswith("tree")]

            for tn in treenames:
                datatype = tn.split("_")[-1]
                for region in acc[tn].keys():
                    vars = acc[tn][region].keys()
                    regions.append(region)
                    variables.extend(vars)
                    for v in vars:
                        datatypes[v] = np.float64 #getattr(np, datatype)
                        tree_by_variable[v] = tn

        # Combine
        with uproot.recreate(pjoin(args.outdir, f"tree_{dataset}.root"),compression=uproot.ZLIB(4)) as f:
            for region in set(regions):
                for fname in files:
                    acc = load(fname)
                    d = {x: acc[tree_by_variable[x]][region][x].value for x in variables}

                    # Remove empty entries
                    to_remove = []
                    for k, v in d.items():
                        if not len(v):
                            to_remove.append(k)
                    for k in to_remove:
                        d.pop(k)

                    if not len(d):
                        continue
                    if not (region in [re.sub(";.*","",x.decode("utf-8")) for x in f.keys()]):
                        f[region] = uproot.newtree({x : np.float64 for x in d.keys()})

                    lengths = set()
                    for k,v in d.items():
                        lengths.add(len(v))
                    assert(len(lengths) == 1)
                    # write
                    f[region].extend(d)
Exemple #5
0
def test_zlib(tmp_path):
    filename = join(str(tmp_path), "example.root")

    with uproot.recreate(filename, compression=uproot.ZLIB(1)) as f:
        f["hello"] = "a"*2000

    f = ROOT.TFile.Open(filename)
    assert f.GetCompressionAlgorithm() == uproot.const.kZLIB
    assert f.GetCompressionLevel() == 1
    assert str(f.Get("hello")) == "a"*2000
    f.Close()
Exemple #6
0
 def save(self, filename=None, working_dir="fitroom", force=True):
     if not filename:
         filename = "histograms-" + self.name + ".root"
         if "signal" in self.ptype:
             filename = filename.replace(self.name, "signal")
             self.name = self.name.replace(self.name, "signal")
     self.outfile = working_dir + "/" + filename
     if os.path.isdir(self.outfile) or force:
         fout = uproot.recreate(self.outfile, compression=uproot.ZLIB(4))
         for name, hist in self.merged.items():
             name = name.replace("_sys", "")
             if "data" in name:
                 name = name.replace("data", "data_obs")
             fout[name] = uproot_methods.classes.TH1.from_numpy(hist)
         fout.close()
Exemple #7
0
def write_tuple(rootfile, array, branches, tree="tree"):
    """
    Store numpy 2D array in the ROOT file using uproot.
      rootfile : ROOT file name
      array : numpy array to store. The shape of the array should be (N, V),
              where N is the number of events in the NTuple, and V is the
              number of branches
      branches : list of V strings defining branch names
      tree : name of the tree
    All branches are of double precision
    """
    with uproot.recreate(rootfile, compression=uproot.ZLIB(4)) as file:
        file[tree] = uproot.newtree({b: "float64" for b in branches})
        d = {b: array[:, i] for i, b in enumerate(branches)}
        # print(d)
        file[tree].extend(d)
Exemple #8
0
def test_flattree_ZLIB(tmp_path):
    newfile = os.path.join(tmp_path, "newfile.root")

    branch1 = np.arange(100)
    branch2 = 1.1 * np.arange(100)

    with uproot.recreate(newfile, compression=uproot.ZLIB(5)) as fout:
        fout["tree"] = {"branch1": branch1, "branch2": branch2}
        fout["tree"].extend({"branch1": branch1, "branch2": branch2})

    with uproot.open(newfile) as fin:
        assert fin["tree/branch1"].array(library="np").tolist() == branch1.tolist() * 2
        assert fin["tree/branch2"].array(library="np").tolist() == branch2.tolist() * 2

    f3 = ROOT.TFile(newfile)
    t3 = f3.Get("tree")
    assert [x.branch1 for x in t3] == branch1.tolist() * 2
    assert [x.branch2 for x in t3] == branch2.tolist() * 2
    f3.Close()
Exemple #9
0
def test_compressed_th2(tmp_path):
    filename = join(str(tmp_path), "example.root")
    testfile = join(str(tmp_path), "test.root")

    import numpy as np
    binsx = np.array([1.0, 3.0, 4.0, 10.0, 11.0, 12.0], dtype="float64")
    binsy = np.array([1.0, 3.0, 4.0, 10.0, 11.0, 12.0, 20.0], dtype="float64")
    f = ROOT.TFile.Open(testfile, "RECREATE")
    h = ROOT.TH2F("hvar", "title", 5, binsx, 6, binsy)
    h.Write()
    f.Close()

    t = uproot.open(testfile)
    hist = t["hvar"]
    with uproot.recreate(filename, compression=uproot.ZLIB(1)) as f:
        f["test"] = hist

    f = ROOT.TFile.Open(filename)
    h = f.Get("test")
    assert h.GetNbinsX() == 5
    assert h.GetNbinsY() == 6
Exemple #10
0
def test_histogram_ZLIB(tmp_path):
    newfile = os.path.join(tmp_path, "newfile.root")

    SIZE = 2 ** 21
    histogram = (np.random.randint(0, 10, SIZE), np.linspace(0, 1, SIZE + 1))
    last = histogram[0][-1]

    with uproot.recreate(newfile, compression=uproot.ZLIB(1)) as fout:
        fout["out"] = histogram

    with uproot.open(newfile) as fin:
        content, edges = fin["out"].to_numpy()
        assert len(content) == SIZE
        assert len(edges) == SIZE + 1
        assert content[-1] == last

    f3 = ROOT.TFile(newfile)
    h3 = f3.Get("out")
    assert h3.GetNbinsX() == SIZE
    assert h3.GetBinContent(SIZE) == last
    f3.Close()
Exemple #11
0
def test_jaggedtree_ZLIB(tmp_path):
    ak = pytest.importorskip("awkward")

    newfile = os.path.join(tmp_path, "newfile.root")

    branch1 = ak.Array([[1, 2, 3], [], [4, 5]] * 10)
    branch2 = ak.Array([[1.1, 2.2, 3.3], [], [4.4, 5.5]] * 10)

    with uproot.recreate(newfile, compression=uproot.ZLIB(5)) as fout:
        fout["tree"] = {"branch1": branch1, "branch2": branch2}
        fout["tree"].extend({"branch1": branch1, "branch2": branch2})

    with uproot.open(newfile) as fin:
        assert fin["tree/branch1"].array().tolist() == branch1.tolist() * 2
        assert fin["tree/branch2"].array().tolist() == branch2.tolist() * 2

    f3 = ROOT.TFile(newfile)
    t3 = f3.Get("tree")
    assert [list(x.branch1) for x in t3] == branch1.tolist() * 2
    assert [list(x.branch2) for x in t3] == branch2.tolist() * 2
    f3.Close()
Exemple #12
0
def generate_graphs_and_histograms(
        output_file='tests/samples/non_tree_objects.root'):
    with uproot.recreate(output_file, compression=uproot.ZLIB(4)) as f:
        f['1Dhist'] = np.histogram(np.random.normal(0, 1, 10000))
        f['2Dhist'] = np.histogram2d(np.random.normal(0, 1, 10000),
                                     np.random.normal(0, 1, 10000))
Exemple #13
0
    parser.add_argument('-out',
                        '--output-path',
                        dest='output_path',
                        type=str,
                        help='path to output GATE file')

    args = parser.parse_args()

    goja = np.loadtxt(args.input_path)
    if goja.shape[1] < 39:
        print(
            "GOJA input should contain 39 columns with data. Use newer GOJA version to generate an input."
        )
        exit(1)

    gate = uproot.recreate(args.output_path, compression=uproot.ZLIB(4))

    gate["Coincidences"] = {
        "globalPosX1": goja[:, 0] * 10,
        "globalPosY1": goja[:, 1] * 10,
        "globalPosZ1": goja[:, 2] * 10,
        "time1": goja[:, 3] * 1e-12,
        "globalPosX2": goja[:, 4] * 10,
        "globalPosY2": goja[:, 5] * 10,
        "globalPosZ2": goja[:, 6] * 10,
        "time2": goja[:, 7] * 1e-12,

        # Needed for classification:
        "eventID1": goja[:, 19].astype(c_int32),
        "eventID2": goja[:, 20].astype(c_int32),
        "comptonPhantom1": goja[:, 21].astype(c_int32),
Exemple #14
0
import uproot

b1 = uproot.newbranch("i4", compression=uproot.ZLIB(5))
b2 = uproot.newbranch("i8", compression=uproot.LZMA(4))
b3 = uproot.newbranch("f4")

branchdict = {"branch1": b1, "branch2": b2, "branch3": b3}
tree = uproot.newtree(branchdict, compression=uproot.LZ4(4))
with uproot.recreate("example.root", compression=uproot.LZMA(5)) as f:
    f["t"] = tree
    f["t"].extend({
        "branch1": [1] * 1000,
        "branch2": [2] * 1000,
        "branch3": [3] * 1000
    })
Exemple #15
0
def get_bkg_templates(tmp_rname):
    '''
    Function that writes linearized mtt vs costheta distributions to root file.
    '''
    ## variables that only need to be defined/evaluated once
    hdict = plt_tools.add_coffea_files(bkg_fnames) if len(bkg_fnames) > 1 else load(bkg_fnames[0])

        ## get data lumi and scale MC by lumi
    data_lumi_year = prettyjson.loads(open('%s/inputs/lumis_data.json' % proj_dir).read())[args.year]

        # get correct hist and rebin
    hname_to_use = 'mtt_vs_tlep_ctstar_abs'
    if hname_to_use not in hdict.keys():
        raise ValueError("%s not found in file" % hname_to_use)
    xrebinning, yrebinning = linearize_binning
    histo = hdict[hname_to_use] # process, sys, jmult, leptype, btag, lepcat
    
    xaxis_name = histo.dense_axes()[0].name
    yaxis_name = histo.dense_axes()[1].name
        ## rebin x axis
    if isinstance(xrebinning, np.ndarray):
        new_xbins = hist.Bin(xaxis_name, xaxis_name, xrebinning)
    elif isinstance(xrebinning, float) or isinstance(xrebinning, int):
        new_xbins = xrebinning
    histo = histo.rebin(xaxis_name, new_xbins)
        ## rebin y axis
    if isinstance(yrebinning, np.ndarray):
        new_ybins = hist.Bin(yaxis_name, yaxis_name, yrebinning)
    elif isinstance(yrebinning, float) or isinstance(yrebinning, int):
        new_ybins = yrebinning
    rebin_histo = histo.rebin(yaxis_name, new_ybins)
    
    nbins = (len(xrebinning)-1)*(len(yrebinning)-1)
    
        ## scale ttJets events, split by reconstruction type, by normal ttJets lumi correction
    ttJets_permcats = ['*right', '*matchable', '*unmatchable', '*other']
    names = [dataset for dataset in sorted(set([key[0] for key in hdict[hname_to_use].values().keys()]))] # get dataset names in hists
    ttJets_cats = [name for name in names if any([fnmatch.fnmatch(name, cat) for cat in ttJets_permcats])] # gets ttJets(_PS)_other, ...

        # use ttJets events that don't have PS weights for dedicated sys samples in 2016    
    if bkg_ttJets_fname is not None:
        ttJets_hdict = load(bkg_ttJets_fname)
        ttJets_histo = ttJets_hdict[hname_to_use] # process, sys, jmult, leptype, btag, lepcat
        
            ## rebin x axis
        ttJets_histo = ttJets_histo.rebin(xaxis_name, new_xbins)
            ## rebin y axis
        ttJets_histo = ttJets_histo.rebin(yaxis_name, new_ybins)
        
        only_ttJets_names = [dataset for dataset in sorted(set([key[0] for key in ttJets_hdict[hname_to_use].values().keys()]))] # get dataset names in hists
        only_ttJets_cats = [name for name in only_ttJets_names if any([fnmatch.fnmatch(name, cat) for cat in ttJets_permcats])] # gets ttJets(_PS)_other, ...


        ## make groups based on process
    process = hist.Cat("process", "Process", sorting='placement')
    process_cat = "dataset"

        # need to save coffea hist objects to file so they can be opened by uproot in the proper format
    upfout = uproot.recreate(tmp_rname, compression=uproot.ZLIB(4)) if os.path.isfile(tmp_rname) else uproot.create(tmp_rname)

    if '3Jets' in njets_to_run:
        histo_dict_3j = processor.dict_accumulator({'Muon' : {}, 'Electron' :{}})
    if '4PJets' in njets_to_run:
        histo_dict_4pj = processor.dict_accumulator({'Muon' : {}, 'Electron' :{}})

    for lep in ['Muon', 'Electron']:
        lepdir = 'mujets' if lep == 'Muon' else 'ejets'
    
        ## make groups based on process
        process_groups = plt_tools.make_dataset_groups(lep, args.year, samples=names, gdict='templates')
        
        lumi_correction = load('%s/Corrections/%s/MC_LumiWeights_IgnoreSigEvts.coffea' % (proj_dir, jobid))[args.year]['%ss' % lep]
                # scale ttJets events, split by reconstruction type, by normal ttJets lumi correction
        if len(ttJets_cats) > 0:
            for tt_cat in ttJets_cats:
                ttJets_lumi_topo = '_'.join(tt_cat.split('_')[:-1]) # gets ttJets[SL, Had, DiLep] or ttJets_PS
                ttJets_eff_lumi = lumi_correction[ttJets_lumi_topo]
                lumi_correction.update({tt_cat: ttJets_eff_lumi})
    
        histo = rebin_histo.copy()
        histo.scale(lumi_correction, axis='dataset')
        histo = histo.group(process_cat, process, process_groups)[:, :, :, lep, :, :].integrate('leptype')

            # use ttJets events that don't have PS weights for dedicated sys samples in 2016    
        if bkg_ttJets_fname is not None:
            if len(only_ttJets_cats) > 0:
                for tt_cat in only_ttJets_cats:
                    ttJets_lumi_topo = '_'.join(tt_cat.split('_')[:-1]) # gets ttJets[SL, Had, DiLep] or ttJets_PS
                    ttJets_eff_lumi = lumi_correction[ttJets_lumi_topo]
                    lumi_correction.update({tt_cat: ttJets_eff_lumi})

            tt_histo = ttJets_histo.copy()
            tt_histo.scale(lumi_correction, axis='dataset')
            tt_histo = tt_histo.group(process_cat, process, {'TT' : ['ttJets_right', 'ttJets_matchable', 'ttJets_unmatchable', 'ttJets_other']})[:, :, :, lep, :, :].integrate('leptype')


        for jmult in njets_to_run:
            iso_sb    = Plotter.linearize_hist(histo[:, 'nosys', jmult, 'btagPass', 'Loose'].integrate('sys').integrate('jmult').integrate('lepcat').integrate('btag'))
            btag_sb   = Plotter.linearize_hist(histo[:, 'nosys', jmult, 'btagFail', 'Tight'].integrate('sys').integrate('jmult').integrate('lepcat').integrate('btag'))
            double_sb = Plotter.linearize_hist(histo[:, 'nosys', jmult, 'btagFail', 'Loose'].integrate('sys').integrate('jmult').integrate('lepcat').integrate('btag'))
            sig_histo = Plotter.linearize_hist(histo[:, :, jmult, 'btagPass', 'Tight'].integrate('jmult').integrate('lepcat').integrate('btag'))
        
            for sys in sys_to_use.keys():
                if sys not in histo.axis('sys')._sorted:
                    print('\n\n   Systematic %s not available, skipping\n\n' % sys)
                    continue

                #set_trace()
                sysname, onlyTT = sys_to_use[sys]
                if 'LEP' in sysname: sysname = sysname.replace('LEP', lepdir[0])
        
                qcd_est_histo = Plotter.QCD_Est(sig_reg=sig_histo, iso_sb=iso_sb, btag_sb=btag_sb, double_sb=double_sb, norm_type='Sideband', shape_region='BTAG', norm_region='BTAG', sys=sys)

                    ## write nominal and systematic variations for each topology to file
                for proc in sorted(set([key[0] for key in qcd_est_histo.values().keys()])):
                    if (proc != 'TT') and onlyTT: continue
                    if (proc == 'data_obs') and not (sys == 'nosys'): continue
                    name = proc+lepdir if proc == 'QCD' else proc
                    print(lep, jmult, sys, name)
                    outhname = '_'.join([jmult, lepdir, name]) if sys == 'nosys' else '_'.join([jmult, lepdir, name, sysname])
                    template_histo = qcd_est_histo[proc].integrate('process')
                    if (('ue' in sys) or ('hdamp' in sys) or ('mtop' in sys)) and (bkg_ttJets_fname is not None):
                        tt_lin_histo = Plotter.linearize_hist(tt_histo['TT', 'nosys', jmult, 'btagPass', 'Tight'].integrate('jmult').integrate('lepcat').integrate('btag'))
                        tt_lin_histo = tt_lin_histo['TT', 'nosys'].integrate('process').integrate('sys')
                        template_histo = substitute_ttJets(sys_histo=template_histo, ttJets_histo=tt_lin_histo, ttJets_PS_histo=sig_histo['TT', 'nosys'].integrate('process').integrate('sys'))

                    if ((sys == 'mtop1695') or (sys == 'mtop1755')) and (templates_to_smooth[proc]):
                        template_histo = scale_mtop3gev(nominal=histo_dict_3j[lep][proc] if jmult == '3Jets' else histo_dict_4pj[lep][proc], template=template_histo)
                        #set_trace()

                    if (sys != 'nosys') and (args.smooth) and (templates_to_smooth[proc]):
                        template_histo = smoothing(nominal=histo_dict_3j[lep][proc] if jmult == '3Jets' else histo_dict_4pj[lep][proc], template=template_histo, nbinsx=len(xrebinning)-1, nbinsy=len(yrebinning)-1)#, debug=True if proc=='VV' else False)
                        #set_trace()

                        ## save template histos to coffea dict
                    if jmult == '3Jets':
                        histo_dict_3j[lep][proc if sys == 'nosys' else '%s_%s' % (proc, sys)] = template_histo
                    if jmult == '4PJets':
                        histo_dict_4pj[lep][proc if sys == 'nosys' else '%s_%s' % (proc, sys)] = template_histo

                        ## save template histo to root file
                    upfout[outhname] = hist.export1d(template_histo)

    if '3Jets' in njets_to_run:
        coffea_out_3j = '%s/templates_lj_3Jets_bkg_smoothed_%s_QCD_Est_%s.coffea' % (outdir, jobid, args.year) if args.smooth else '%s/templates_lj_3Jets_bkg_%s_QCD_Est_%s.coffea' % (outdir, jobid, args.year)
        save(histo_dict_3j, coffea_out_3j)
        print("%s written" % coffea_out_3j)
    if '4PJets' in njets_to_run:
        coffea_out_4pj = '%s/templates_lj_4PJets_bkg_smoothed_%s_QCD_Est_%s.coffea' % (outdir, jobid, args.year) if args.smooth else '%s/templates_lj_4PJets_bkg_%s_QCD_Est_%s.coffea' % (outdir, jobid, args.year)
        save(histo_dict_4pj, coffea_out_4pj)
        print("%s written" % coffea_out_4pj)

    
    upfout.close()
    print('%s written' % tmp_rname)