def main(args): files = [ ifile for ifile in glob('{}/*.root'.format(args.input)) if valid_background(ifile) ] backgrounds = pandas.DataFrame() for ifile in files: open_file = uproot.open(ifile) tree_name = parse_tree_name(open_file.keys()) events = open_file[tree_name].arrays(['*'], outputtype=pandas.DataFrame) signal_events = events[(events['is_signal'] > 0) & (events['contamination'] > 0)] backgrounds = pandas.concat([backgrounds, signal_events], sort=False) shift_up = backgrounds.copy(deep=True) shift_dn = backgrounds.copy(deep=True) shift_up['evtwt'] *= 0.1 shift_dn['evtwt'] *= -0.1 open_file = uproot.open('{}/embed.root'.format(args.input)) tree_name = parse_tree_name(open_file.keys()) oldtree = open_file[tree_name].arrays(['*']) treedict = {ikey: oldtree[ikey].dtype for ikey in oldtree.keys()} events = pandas.DataFrame(oldtree) signal_events = events[(events['is_signal'] > 0)] shift_up = pandas.concat([shift_up, signal_events], sort=False) shift_dn = pandas.concat([shift_dn, signal_events], sort=False) call('mkdir -p {}/../SYST_embed_contam_up'.format(args.input), shell=True) call('mkdir -p {}/../SYST_embed_contam_down'.format(args.input), shell=True) output_name_up = 'embed_up.root' if '/hdfs' in args.input else '{}/../SYST_embed_contam_up/embed.root'.format( args.input) output_name_dn = 'embed_down.root' if '/hdfs' in args.input else '{}/../SYST_embed_contam_down/embed.root'.format( args.input) with uproot.recreate(output_name_up) as f: f[tree_name] = uproot.newtree(treedict) f[tree_name].extend(shift_up.to_dict('list')) with uproot.recreate(output_name_dn) as f: f[tree_name] = uproot.newtree(treedict) f[tree_name].extend(shift_dn.to_dict('list')) if '/hdfs' in args.input: call( 'mv -v embed_up.root {}/../SYST_embed_contam_up/embed.root'.format( args.input), shell=True) call('mv -v embed_down.root {}/../SYST_embed_contam_down/embed.root'. format(args.input), shell=True)
def test_read_root_multiple_trees(self, table): import uproot # append hasn't been implemented in uproot 3 yet with utils.TemporaryFilename(suffix='.root') as tmp: with uproot.create(tmp) as root: root["a"] = uproot.newtree({"branch": "int32"}) root["a"].extend({"branch": asarray([1, 2, 3, 4, 5])}) root["b"] = uproot.newtree() with pytest.raises(ValueError) as exc: self.TABLE.read(tmp) assert str(exc.value).startswith('Multiple trees found') self.TABLE.read(tmp, treename="a")
def save_data(folder, data): from os.path import join with uproot.recreate(join(folder, 'data.root')) as f: f['data'] = uproot.newtree( {key: val.dtype for key, val in data.items()}) f['data'].extend(data)
def writeOut(self, outfile, treeName, workSet, prediction): outDict = {name: workSet[name] for name in self.allVars} for i, name in enumerate(self.groupNames): outDict[name] = prediction[i] outfile[treeName] = uproot.newtree({name:"float32" for name in self.allVars+self.groupNames}) outfile[treeName].extend(outDict)
def table_to_root(table, filename, treename="tree", overwrite=False, append=False, **kwargs): """Write a Table to a ROOT file """ uproot = _import_uproot_that_can_write_root_files() createkw = {k: kwargs.pop(k) for k in { "compression", } if k in kwargs} create_func = uproot.recreate if overwrite else uproot.create if append is True: raise NotImplementedError( "uproot currently doesn't support appending to existing files", ) tree = uproot.newtree(dict(table.dtype.descr), **kwargs) with create_func(filename, **createkw) as outf: outf[treename] = tree outf[treename].extend(dict(table.columns))
def Export2TTree(sOutputName, fits): with uproot.recreate(sOutputName) as f: f["Recon"] = uproot.newtree({ "X": uproot.newbranch(float, title="X"), "Y": uproot.newbranch(float, title="Y"), "Z": uproot.newbranch(float, title="Z"), "T": uproot.newbranch(float, title="T"), "Theta": uproot.newbranch(float, title="Theta"), "Phi": uproot.newbranch(float, title="Phi"), "MCID": uproot.newbranch(int, title="MCID") }) f["Recon"].extend({ "X": fits[:, 0], "Y": fits[:, 1], "Z": fits[:, 2], "T": fits[:, 3], "Theta": fits[:, 4], "Phi": fits[:, 5], "MCID": fits[:, 6] })
def create_ntuple(fname, treename, varname, var_array, weightname, weight_array): with uproot.recreate(fname) as f: f[treename] = uproot.newtree({ varname: "float64", weightname: "float64" }) f[treename].extend({varname: var_array, weightname: weight_array})
def ndarray_to_DxAOD(filename, array, branches=branches27D, compression=uproot.ZLIB): f = uproot.recreate(filename) branchdict = dict(branches) print(branchdict) f["CollectionTree"] = uproot.newtree(branchdict) f["CollectionTree"].extend(dict([(branch[0],array[:,i]) for (i,branch) in enumerate(branches)]))
def create_file(file_name, distributions, weights, labels): with uproot.recreate(file_name) as f: # write the predicted processes for i, label in enumerate(labels): f[label] = uproot.newtree({ "jet_pt": "float64", "weight": "float64" }) f[label].extend({"jet_pt": distributions[i], "weight": weights[i]})
def writetorootfile(rootfilename, datadict): branchdict = {} for key, data in datadict.items(): branchdict[key] = data.dtype tree = uproot.newtree(branches=branchdict) with uproot.recreate(rootfilename) as f: f['mytree'] = tree f['mytree'].extend(datadict) pass
def make_trees(args): filelists = files_by_dataset(args.files) # The output for each dataset will be written into a separate file for dataset, files in filelists.items(): # Find region and branch names datatypes = {} tree_by_variable = {} variables = [] regions = [] # Scout out what branches there are for fname in files: acc = load(fname) treenames = [x for x in map(str,acc.keys()) if x.startswith("tree")] for tn in treenames: datatype = tn.split("_")[-1] for region in acc[tn].keys(): vars = acc[tn][region].keys() regions.append(region) variables.extend(vars) for v in vars: datatypes[v] = np.float64 #getattr(np, datatype) tree_by_variable[v] = tn # Combine with uproot.recreate(pjoin(args.outdir, f"tree_{dataset}.root"),compression=uproot.ZLIB(4)) as f: for region in set(regions): for fname in files: acc = load(fname) d = {x: acc[tree_by_variable[x]][region][x].value for x in variables} # Remove empty entries to_remove = [] for k, v in d.items(): if not len(v): to_remove.append(k) for k in to_remove: d.pop(k) if not len(d): continue if not (region in [re.sub(";.*","",x.decode("utf-8")) for x in f.keys()]): f[region] = uproot.newtree({x : np.float64 for x in d.keys()}) lengths = set() for k,v in d.items(): lengths.add(len(v)) assert(len(lengths) == 1) # write f[region].extend(d)
def makeABCD(nPoints=10000): with uproot.recreate("ABDC.root") as f: f["ABCD"] = uproot.newtree({"A": "float32", "B": "float32", "C": "float32", "D": "float32"}) for i in range(5): f["ABCD"].extend({"A": np.random.normal(0, 1, nPoints), "B":np.random.normal(0, 1, nPoints), "C": np.random.normal(0, 1, nPoints), "D": np.random.normal(0, 1, nPoints)}) f = ROOT.TFile("ABCD.root") tree = ROOT.TTree(f.Get("ABCD")) tree.SetAlias("bigA","A>0.5") tree.SetAlias("smallA","A<0.5") return tree, f
def _write_root(file, table, treename='Events', compression=-1, step=1048576): import uproot if compression == -1: compression = uproot.write.compress.LZ4(4) with uproot.recreate(file, compression=compression) as fout: fout[treename] = uproot.newtree({k: v.dtype for k, v in table.items()}) start = 0 while start < len(list(table.values())[0]) - 1: fout[treename].extend( {k: v[start:start + step] for k, v in table.items()}) start += step
def open_root_file(self): print("going to open a root file!") self.file_handle = uproot.recreate(self.filename) self.file_handle["EVENT_NTUPLE"] = uproot.newtree( { "pulse_height": uproot.newbranch(numpy.dtype(">i8"), size="hit_count"), "chan": uproot.newbranch(numpy.dtype(">i8"), size="hit_count"), "timestamp": uproot.newbranch(numpy.dtype(">i8")) }, compression=None)
def create_file_pseudodata(file_name, pseudodata): n_events = len(pseudodata) with uproot.recreate(file_name) as f: # write pseudodata lep_charge = create_lepton_charge(n_events) f["pseudodata"] = uproot.newtree({ "jet_pt": "float64", "lep_charge": "int" }) f["pseudodata"].extend({ "jet_pt": pseudodata, "lep_charge": lep_charge })
def write_tuple(rootfile, array, branches, tree="tree"): """ Store numpy 2D array in the ROOT file using uproot. rootfile : ROOT file name array : numpy array to store. The shape of the array should be (N, V), where N is the number of events in the NTuple, and V is the number of branches branches : list of V strings defining branch names tree : name of the tree All branches are of double precision """ with uproot.recreate(rootfile, compression=uproot.ZLIB(4)) as file: file[tree] = uproot.newtree({b: "float64" for b in branches}) d = {b: array[:, i] for i, b in enumerate(branches)} # print(d) file[tree].extend(d)
def create_file(file_name, distributions, weights, labels): n_events = len(weights[0]) with uproot.recreate(file_name) as f: # write the predicted processes for i, label in enumerate(labels): lep_charge = create_lepton_charge(n_events) f[label] = uproot.newtree({ "jet_pt": "float64", "weight": "float64", "lep_charge": "int" }) f[label].extend({ "jet_pt": distributions[i], "weight": weights[i], "lep_charge": lep_charge, })
def merge_root(rootfiles, outputfile, incrementRunId=False): """ Merge root files in output files """ import uproot out = uproot.recreate(outputfile) #Previous ID values to be able to increment runIn or EventId previousId = {} #create the dict reading all input root files trees = {} pbar = tqdm.tqdm(total = len(rootfiles)) for file in rootfiles: root = uproot.open(file) root_keys = unicity(root.keys()) for tree in root_keys: if hasattr(root[tree], 'keys'): if not tree in trees: trees[tree] = {} trees[tree]["rootDictType"] = {} trees[tree]["rootDictValue"] = {} previousId[tree] = {} for branch in root[tree].keys(): array = root[tree].array(branch) if len(array) > 0: if type(array[0]) is type(b'c'): array = np.array([0 for xi in array]) if not branch in trees[tree]["rootDictType"]: trees[tree]["rootDictType"][branch] = type(array[0]) trees[tree]["rootDictValue"][branch] = np.array([]) if (not incrementRunId and branch.decode('utf-8').startswith('eventID')) or (incrementRunId and branch.decode('utf-8').startswith('runID')): if not branch in previousId[tree]: previousId[tree][branch] = 0 array += previousId[tree][branch] previousId[tree][branch] = max(array) +1 trees[tree]["rootDictValue"][branch] = np.append(trees[tree]["rootDictValue"][branch], array) pbar.update(1) pbar.close() #Set the dict in the output root file for tree in trees: if not trees[tree]["rootDictValue"] == {} or not trees[tree]["rootDictType"] == {}: out[tree] = uproot.newtree(trees[tree]["rootDictType"]) out[tree].extend(trees[tree]["rootDictValue"])
def saveevents(eventslist, filename, treename="Nominal"): alldata = None for each in eventslist: if alldata is None: alldata = each else: alldata = alldata + each branchdict = {"weight": np.float32} extend_branchdict = {"weight": alldata.weight} for eachkey in alldata.data.keys(): branchdict[eachkey.decode("utf-8")] = np.float32 extend_branchdict[eachkey.decode("utf-8")] = alldata.data[eachkey] tree = uproot.newtree(branchdict, compression=None) with uproot.recreate(filename + ".root", compression=None) as f: f[treename] = tree for eachkey, eachcontent in extend_branchdict.items(): #print(eachkey, eachcontent.shape, np.mean(eachcontent)) f[treename][eachkey].newbasket(eachcontent.tolist())
def main(args): open_file = uproot.open('{}/data_obs.root'.format(args.input)) tree_name = parse_tree_name(open_file.keys()) channel_prefix = tree_name[:2] oldtree = open_file[tree_name].arrays(['*']) treedict = {ikey: oldtree[ikey].dtype for ikey in oldtree.keys()} treedict['fake_weight'] = numpy.float64 # categorize data events = pandas.DataFrame(oldtree) os_passing, ss_passing, os_failing, ss_failing = categorize(events) # categorize backgrounds for bkg in backgrounds: bkg_events = uproot.open('{}/{}.root'.format( args.input, bkg))[tree_name].arrays(['*'], outputtype=pandas.DataFrame) bkg_events['evtwt'] *= -1 # subtract backgroungds from data bkg_os_passing, bkg_ss_passing, bkg_os_failing, bkg_ss_failing = categorize( bkg_events) os_passing = pandas.concat([os_passing, bkg_os_passing], ignore_index=True, sort=False) ss_passing = pandas.concat([ss_passing, bkg_ss_passing], ignore_index=True, sort=False) os_failing = pandas.concat([os_failing, bkg_os_failing], ignore_index=True, sort=False) ss_failing = pandas.concat([ss_failing, bkg_ss_failing], ignore_index=True, sort=False) os_ss_ratio = os_failing['evtwt'].sum() / ss_failing['evtwt'].sum( ) # calculate ratio print 'OSSS ratio: {}'.format(os_ss_ratio) output_events = ss_passing.copy( deep=True) # use passing same sign events for shape output_events['fake_weight'] = os_ss_ratio # add weight branch to output with uproot.recreate('{}/QCD.root'.format(args.input)) as f: f[tree_name] = uproot.newtree(treedict) f[tree_name].extend(output_events.to_dict('list'))
def WriteTree(df, cols, treeName, fileName): ''' Helper method to write a tree with uproot Arguments ---------- - pandas data frame to be written as tree in a root file - name of the columns - name of the output tree - name of the output file ''' outBranches = {} for colName in cols: outBranches[ colName] = np.float32 #define all branches as float for the moment with uproot.recreate(fileName, compression=uproot.LZ4(4)) as outFile: outFile[treeName] = uproot.newtree(outBranches, compression=uproot.LZ4(4)) outFile[treeName].extend(dict(df[cols]))
def create_fakes(input_name, tree_name, channel_prefix, treedict, output_dir, fake_file, fractions, sample, doSysts=False): ff_weighter = FFApplicationTool(fake_file, channel_prefix) open_file = uproot.open('{}/{}.root'.format(input_name, sample)) events = open_file[tree_name].arrays(['*'], outputtype=pandas.DataFrame) anti_events = events[(events['is_antiTauIso'] > 0)].copy() anti_events['fake_weight'] = anti_events[filling_variables].apply( lambda x: get_weight(x, ff_weighter, fractions, channel_prefix), axis=1).values if sample != 'data_obs': anti_events['fake_weight'] *= -1 if doSysts: for syst in systs: print 'Processing: {} {}'.format(sample, syst) anti_events[ syst[0] + "_" + syst[1]] = anti_events[filling_variables].apply( lambda x: get_weight( x, ff_weighter, fractions, channel_prefix, syst=syst), axis=1).values if sample != 'data_obs': anti_events[syst[0] + "_" + syst[1]] *= -1 with uproot.recreate('{}/jetFakes_{}.root'.format(output_dir, sample)) as f: f[tree_name] = uproot.newtree(treedict) f[tree_name].extend(anti_events.to_dict('list')) print 'Finished writing {}'.format(sample) return None
def classify(ifile, tree_prefix, scaler, scaler_info, model_name, output_dir): fname = ifile.replace('.root', '').split('/')[-1] # print 'Processing file: {} from {}'.format(fname, ifile.split('merged')[0].split('/')[-2]) # load the model model = load_model('Output/models/{}.hdf5'.format(model_name)) # read input and get things ready for output TTree open_file = uproot.open(ifile) oldtree = open_file[tree_prefix].arrays(['*']) treedict = {ikey: oldtree[ikey].dtype for ikey in oldtree.keys()} treedict['NN_disc'] = numpy.float64 # drop all variables not going into the network to_classify = open_file[tree_prefix].arrays(scaler_info, outputtype=pd.DataFrame) # clean inputs to_classify.fillna(-100, inplace=True) to_classify.replace([numpy.inf, -numpy.inf], -100, inplace=True) # scale correctly scaled = pd.DataFrame(scaler.transform(to_classify.values), columns=to_classify.columns.values, dtype='float64') # There's room here to try and optimize by only classifying VBF events and storing a # default value for others. Just need to figure out how to keep everything in order # so that it can be slotted back into the correct place in the TTree. # do the classification guesses = model.predict(scaled.values, verbose=False) with uproot.recreate('Output/trees/{}/{}.root'.format(output_dir, fname)) as f: f[tree_prefix] = uproot.newtree(treedict) oldtree['NN_disc'] = guesses.reshape(len(guesses)) f[tree_prefix].extend(oldtree) return None
def predict(): output_dir = args.outputdir if not os.path.exists(output_dir): os.makedirs(output_dir) for f in flist_pred: fpath = os.path.join(args.inputdir, f) if not os.path.exists(fpath): print('Ignore non-existing file: %s' % fpath) df, dmats = make_dmatrix(fpath, predict=True, k_folds=k_folds) df[args.bdt_varname] = -99 * np.ones(df.shape[0]) for idx, (pos, dmat) in enumerate(dmats): bst = xgb.Booster({'predictor': 'cpu_predictor'}) bst.load_model('%s.%d' % (os.path.join(args.model_dir, model_name), idx)) df.loc[pos, args.bdt_varname] = bst.predict(dmat) assert not np.any(df[args.bdt_varname] == -99) outputpath = os.path.join(output_dir, f) if not os.path.exists(os.path.dirname(outputpath)): os.makedirs(os.path.dirname(outputpath)) print('Write prediction file to %s' % outputpath) # from root_numpy import array2root # array2root(df.to_records(index=False), filename=outputpath, treename='Events', mode='RECREATE') with uproot.recreate(outputpath, compression=uproot.write.compress.LZ4(4)) as fout: fout['Events'] = uproot.newtree( {k: df[k].dtype for k in df.keys()}) step = 2**20 start = 0 while start < len(df) - 1: fout['Events'].extend( {k: df[k][start:start + step].values for k in df.keys()}) start += step
def write_tree(branches, filename, treename): """ Write a TTree to a new ROOT file from a collection of arrays. Parameters ---------- branches : dict Dictionary of `branchname: branch_data` pairs. filename : str Pathname of new ROOT file. treename : str Name of new TTree. """ branch_definition_dictionary = dict() branch_content_dictionary = dict() for name, content in branches.items(): if isinstance(content, np.ndarray) and len(content.shape) == 1: branch_definition_dictionary[name] = uproot.newbranch( content.dtype) elif isinstance(content.content, np.ndarray): if content.content.dtype == np.dtype('int64'): raise NotImplementedError( 'Jagged arrays of 64-bit integers are not yet' ' supported due to a known bug in the tree-writing' ' code' ' (https://github.com/scikit-hep/uproot/issues/506)' '.') size_name = name + '_n' branch_definition_dictionary[name] = uproot.newbranch( content.content.dtype, size=size_name) branch_content_dictionary[size_name] = content.count() else: raise NotImplementedError('Branch type (' + str(type(content)) + ') not supported') branch_content_dictionary[name] = content with uproot.recreate(filename) as file: file[treename] = uproot.newtree(branch_definition_dictionary) file[treename].extend(branch_content_dictionary)
} out = uproot.recreate(args.f_out) #, compression = uproot.LZMA(8)) first = True for rfile in args.f_in: tf = uproot.open(rfile) tt = tf['tree'] arrays = tt.arrays(branches + mc_feats if args.mc else branches) raw = {i.decode(): j for i, j in arrays.items()} # create tree for the first file only if first: first = False out['tree'] = uproot.newtree( {c: unsigned_patch.get(a.dtype, a.dtype) for c, a in raw.items()}) # select only matched candidates if args.mc: if args.resonant: # require all three candidates to come from the same B same_mother = (raw['e1_genGMaId'] == raw['e2_genGMaId']) & \ (raw['e1_genGMaId'] == raw['k_genMumId']) & (np.abs(raw['e1_genGMaId']) == 521) else: # require all three candidates to come from the same B and have low q**2 same_mother = (raw['e1_genMumId'] == raw['e1_genMumId']) & \ (raw['e1_genMumId'] == raw['k_genMumId']) & (np.abs(raw['e1_genMumId']) == 521) & \ (raw['B_mll'] < 2.45) # Check all MC matches and B mass within 3 sigmas
def save_dataframe(self, filename, df, df_true=False, histograms=[]): # Create output directory if it does not already exist if not os.path.exists(self.output_dir): os.makedirs(self.output_dir) # Open output directory and (re)create rootfile with uproot.recreate(self.output_dir + filename) as f: if df_true: # Create tree with truth particle info title = 'tree_Particle_gen' branchdict = { "run_number": int, "ev_id": int, "ParticlePt": float, "ParticleEta": float, "ParticlePhi": float } print("Length of truth track tree: %i" % len(self.track_df)) f[title] = uproot.newtree(branchdict, title=title) f[title].extend({ "run_number": self.track_df["run_number"], "ev_id": self.track_df["ev_id"], "ParticlePt": self.track_df["ParticlePt"], "ParticleEta": self.track_df["ParticleEta"], "ParticlePhi": self.track_df["ParticlePhi"] }) # Create tree with detector-level particle info title = 'tree_Particle' branchdict = { "run_number": int, "ev_id": int, "ParticlePt": float, "ParticleEta": float, "ParticlePhi": float } print("Length of detector-level track tree: %i" % len(df)) f[title] = uproot.newtree(branchdict, title=title) f[title].extend({ "run_number": df["run_number"], "ev_id": df["ev_id"], "ParticlePt": df["ParticlePt"], "ParticleEta": df["ParticleEta"], "ParticlePhi": df["ParticlePhi"] }) # Create tree with event char title = self.event_tree_name branchdict = { "is_ev_rej": int, "run_number": int, "ev_id": int, "z_vtx_reco": float } f[title] = uproot.newtree(branchdict, title=title) f[title].extend({ "is_ev_rej": self.event_df_orig["is_ev_rej"], "run_number": self.event_df_orig["run_number"], "ev_id": self.event_df_orig["ev_id"], "z_vtx_reco": self.event_df_orig["z_vtx_reco"] }) # Write hNevents histogram: number of accepted events at detector level f["hNevents"] = (np.array([0, df["ev_id"].nunique()]), np.array([-0.5, 0.5, 1.5])) # Write histograms to file too, if any are passed for title, h in histograms: f[title] = h
print(merged3['t_p'].to_numpy()) a=merged3['t_p'].to_numpy() print(type(a)) print(a.ndim) print(a.shape) ''' print(merged3['t_ieta'].dtype) with uproot.recreate(foutput+".root") as f: f["tree"] = uproot.newtree({"t_Event": np.int32, "t_p_PU": np.float64, "t_eHcal_PU":np.float64, "t_delta_PU":np.float64, "t_p_NoPU": np.float64, "t_eHcal_noPU":np.float64, "t_delta_NoPU":np.float64, "t_ieta":np.int32}) f["tree"].extend({"t_Event": merged3['t_Event'], "t_p_PU": merged3['t_p_x'].to_numpy(), "t_eHcal_PU": merged3['t_eHcal_x'].to_numpy(), "t_delta_PU": merged3['t_delta_x'].to_numpy(), "t_p_NoPU": merged3['t_p_y'].to_numpy(), "t_eHcal_noPU": merged3['t_eHcal_y'].to_numpy(), "t_delta_NoPU": merged3['t_delta_y'].to_numpy(), "t_ieta": merged3['t_ieta'].to_numpy()})
def create_file_pseudodata(file_name, pseudodata): with uproot.recreate(file_name) as f: # write pseudodata f["pseudodata"] = uproot.newtree({"jet_pt": "float64"}) f["pseudodata"].extend({"jet_pt": pseudodata})
df['e2_genMumId' ] = sel_bcands.e2.motherPdgId df['e1_genGMaId' ] = sel_bcands.e1.granmaPdgId df['e2_genGMaId' ] = sel_bcands.e2.granmaPdgId # df['trgmu_eta'] = sel_bcands.trg_mu.p4.eta # df['trgmu_pt'] = sel_bcands.trg_mu.p4.pt final_df = pd.concat((final_df, df)) # final_df.to_hdf(args.f_out, 'df', mode = 'w') print('DONE! Processed events: ', nprocessed) print('Saved events:', final_df.shape[0]) # import pdb; pdb.set_trace() import numpy as np # convert all unsigned integer to signed, as the streaming is not implemented yet unsigned_patch = {np.dtype(f'uint{i}') : np.dtype(f'int{i}') for i in [8, 16, 32, 64]} out = uproot.recreate(f_out)#, compression = uproot.LZMA(8)) out['tree'] = uproot.newtree({ c : unsigned_patch.get(final_df[c].dtype, final_df[c].dtype) for c in final_df.columns }) out['tree'].extend({c : final_df[c].values for c in final_df.columns}) # # out['tree'] = final_final_df # # uproot.newtree({'a' : np.int32, 'b' : np.float32}) # # out["tree"].extend({'a' : np.array([1,2,3,4]), 'b' : np.array([1.1, 2.2, 3.3, 4.4])}) out.close()