def test_object_selection(): a = rnp.root2array(load('vary*.root'), branches='n_int', object_selection={'n_int % 2 == 0': 'n_int'}) for suba in a: assert_true((suba % 2 == 0).all()) # branch does not exist assert_raises(ValueError, rnp.root2array, load('vary*.root'), branches='n_int', object_selection={'n_int % 2 == 0': 'DNE'}) # duplicate branch in selection list assert_raises(ValueError, rnp.root2array, load('vary*.root'), branches='n_int', object_selection={'n_int % 2 == 0': ['n_int', 'n_int']}) # test object selection on variable-length expression a = rnp.root2array(load('object*.root'), branches='lines.GetX1()', object_selection={'lines.GetX1() > 3': 'lines.GetX1()'}) for suba in a: assert_true((suba > 3).all()) # attempting to apply object selection on fixed-length array # currently not implemented since this changes the output type from # fixed-length to variable-length assert_raises(TypeError, rnp.root2array, load("fixed*.root"), branches='n_int', object_selection={'n_int % 2 == 0': 'n_int'}) # test with vectors a = rnp.root2array(load('vector.root'), branches='v_i', object_selection={'v_i % 2 == 0': 'v_i'}) for suba in a: assert_true((suba % 2 == 0).all())
def test_array2root(): a = np.array([ (12345, 2., 2.1, True), (3, 4., 4.2, False),], dtype=[ ('x', np.int32), ('y', np.float32), ('z', np.float64), ('w', np.bool)]) with temp() as tmp: rnp.array2root(a, tmp.GetName(), mode='recreate') a_conv = rnp.root2array(tmp.GetName()) assert_array_equal(a, a_conv) # extend the tree rnp.array2root(a, tmp.GetName(), mode='update') a_conv2 = rnp.root2array(tmp.GetName()) assert_array_equal(np.hstack([a, a]), a_conv2) # write into subdirectory tname = 'root/sub/tree' rnp.array2root(a, tmp.GetName(), treename=tname, mode='update') a_conv3 = rnp.root2array(tmp.GetName(), treename=tname) assert_array_equal(a, a_conv3) # try creating tree with conflicting name assert_raises(IOError, rnp.array2root, a, tmp.GetName(), treename='root/sub', mode='update') # try creating subdirectory with conflicting name assert_raises(IOError, rnp.array2root, a, tmp.GetName(), treename='root/sub/tree/error', mode='update')
def test_single(): f = load('single1.root') a = rnp.root2array(f) check_single(a) # specify tree name a = rnp.root2array(f, treename='tree') check_single(a)
def test_array2tree_fixed_length_arrays(): f = load(['fixed1.root', 'fixed2.root']) a = rnp.root2array(f) with temp() as tmp: rnp.array2root(a, tmp.GetName(), mode='recreate') a_conv = rnp.root2array(tmp.GetName()) assert_array_equal(a, a_conv)
def load_data( data_path, branch_names, dataset_names, dataset_ranges = []): """ Import data from several ROOT files to a recarray """ l_raw_vars = [] l_weight = [] l_origin = [] for i, d_name in enumerate(dataset_names): f_name = "{}{}.root".format(data_path,d_name) if "BTagCSV" in d_name: d_weight = 1. else: d_weight = mc_samples[d_name]["xs"]/mc_samples[d_name]["gen_events"] if len(dataset_ranges) == len(dataset_names): l_raw_vars.append(root2array(f_name,"tree", branch_names, stop=dataset_ranges[i])) else: l_raw_vars.append(root2array(f_name,"tree", branch_names)) n_ev = l_raw_vars[-1].shape[0] l_weight.append(np.full((n_ev),d_weight, 'f8')) l_origin.append(np.full((n_ev),d_name, 'a20')) raw_vars = stack_arrays(l_raw_vars, asrecarray=True, usemask=False) weight = stack_arrays(l_weight, asrecarray=True, usemask=False) origin = stack_arrays(l_origin, asrecarray=True, usemask=False) raw_vars = append_fields(raw_vars, ["origin","weight"], [origin, weight], asrecarray=True, usemask=False) return raw_vars
def test_expression(): rec = rnp.root2array(load('single*.root')) rec2 = rnp.root2array(load('single*.root'), branches=['f_float*2']) assert_array_equal(rec['f_float'] * 2, rec2['f_float*2']) a = rnp.root2array(load('single*.root'), branches='Entry$') assert_equal(a.dtype, np.int32) assert_array_equal(a, np.arange(a.shape[0]))
def compute_N_B_events_MC(track_file, vertex_file, name=""): Bevents_tracks = pandas.DataFrame(root_numpy.root2array(track_file, branches=['run', 'event', 'IPs'])) Bevents_tracks = Bevents_tracks.ix[numpy.isfinite(Bevents_tracks.IPs), :] B_events_vertices = pandas.DataFrame(root_numpy.root2array(vertex_file, branches=['run', 'event', 'vcharge'])) B_events_vertices = B_events_vertices[B_events_vertices.vcharge > 0] B_events = pandas.concat([Bevents_tracks, B_events_vertices]) B_events['event_id'] = B_events.run.apply(str) + '_' + B_events.event.apply(str) B_events['N_sig_sw'] = 1 N_B_events = get_events_number(B_events) return N_B_events
def test_selection_and_expression(): ref = len(rnp.root2array( load('test.root'), branches=['x', 'y'], selection='z>0')) assert_equal(ref, len(rnp.root2array( load('test.root'), branches=['x', 'y', 'z'], selection='z>0'))) assert_equal(ref, len(rnp.root2array( load('test.root'), branches=['x', 'x*y'], selection='z>0'))) assert_equal(ref, len(rnp.root2array( load('test.root'), branches=['x', 'x*z'], selection='z>0')))
def test_slice(): a = rnp.root2array(load('single1.root'), stop=10).view(np.recarray) assert_equal(len(a), 10) assert_equal(a.n_int[-1], 10) a = rnp.root2array(load('single1.root'), stop=11, start=1).view(np.recarray) assert_equal(len(a), 10) assert_equal(a.n_int[-1], 11) a = rnp.root2array(load('single1.root'), stop=105, start=95).view(np.recarray) assert_equal(len(a), 5) assert_equal(a.n_int[-1], 100)
def test_single(): f = load('single1.root') a = rnp.root2array(f) check_single(a) # specify tree name a = rnp.root2array(f, treename='tree') check_single(a) # tree2array f = get_file('single1.root') tree = f.Get('tree') check_single(rnp.tree2array(tree))
def run(name, source, quick=False): print time.asctime(time.localtime()), "Filling BDT Branches" branch_names = joblib.load("pickle/variables.pkl") if quick == True: signal = joblib.load('pickle/all_signalq.pkl') clf = joblib.load("pickle/" + name + "quick.pkl") else: signal = joblib.load('pickle/all_signal.pkl') clf = joblib.load("pickle/" + name + ".pkl") # predict and write probability of each MC event being signal bdt_MC_predicted = clf.predict_proba(signal) bdt_MC_predicted.dtype = [('GradBoost_prob', np.float64)] array2root((np.hsplit(bdt_MC_predicted,2)[1]), "/net/storage03/data/users/dlafferty/NTuples/SignalMC/2012/combined/Bs2phiphi_MC_2012_combined_corrected_TupleA_BDT.root", "DecayTree") # predict and write probability of every data event being signal all_data = root2array("/net/storage03/data/users/dlafferty/NTuples/data/2012/combined/Bs2phiphi_data_2012_corrected_TupleA_BDT.root", "DecayTree", branch_names) all_data = rec2array(all_data) bdt_data_predicted = clf.predict_proba(all_data) bdt_data_predicted.dtype = [('GradBoost_prob', np.float64)] array2root((np.hsplit(bdt_data_predicted,2)[1]), "/net/storage03/data/users/dlafferty/NTuples/data/2012/combined/Bs2phiphi_data_2012_corrected_TupleA_BDT.root", "DecayTree") print time.asctime(time.localtime()), "Branches Filled!"
def main(): # Use the Bayesian Methods for Hackers design plt.style.use('bmh') matplotlib.rcParams.update({'font.size': 8}) # Parse command line arguments parser = argparse.ArgumentParser(description='') parser.add_argument("-l", "--file_list", help="ROOT file") args = parser.parse_args() # If a list of files has not been specified, warn the user and exit # the application. if not args.file_list: print 'A list of ROOT files needs to be specified' sys.exit(2) # Open the file containing the list of files to process root_file_list = None try: root_file_list = open(args.file_list, 'r') except IOError: print 'Unable to open file %s' % args.file_list sys.exit(2) root_files = [] for line in root_file_list: root_files.append(line.strip()) rec = rnp.root2array(root_files, 'results') make_plots(rec)
def readFiles(): print 'Reading files...' weightsS = root2rec(files_signal, treename='tree', branches=['full_weight'], selection=selection)['full_weight'] weightsB = root2rec(files_bg, treename='tree', branches=['full_weight'], selection=selection)['full_weight'] sum_weightsS = np.sum(weightsS) sum_weightsB = np.sum(weightsB) weightsB = weightsB * sum_weightsS/sum_weightsB nS = len(weightsS) nB = len(weightsB) fullWeight = np.concatenate((weightsS, weightsB)) # fullWeight = fullWeight['weight'] # fullWeight = np.ones(len(fullWeight)) # del weightsS, weightsB arrSB = root2array(files_signal + files_bg, treename='tree', branches=trainVars(), selection=selection) # Need a matrix-like array instead of a 1-D array of lists for sklearn arrSB = (np.asarray([arrSB[var] for var in trainVars()])).transpose() targets = np.concatenate((np.ones(nS),np.zeros(nB))) print 'Done reading files.' return arrSB, fullWeight, targets
def _GenData(self, args): ''' Loads the data for a general data value. ''' if len(args) > 1: warn('WARNING in ' + self.__name__ + ':\n\t' + self.__name__ + ' takes a single argument: cut. Ignoring additional' + ' arguments.', UserWarning) # If it's not a valid cut if not self._Check_Cut(args[0]): # Get Last Cut cut = CAPy_globals.GetLastCut() # If it is a valid cut else: cut = args[0] # Store Cut CAPy_globals.SetLastCut(cut) files, dirName, treeName = CAPy_globals._FileInfo(self.__name__, 1) # Now call data m = root2array(files, dirName + '/' + treeName, [self.__name__]) # If cut, apply if cut: print "#TODO: Implement Cut" return m
def list_flat_branches(filename, treename, use_dtype=True): """ Lists branches in the file, vector branches, say D_p, turns into D_p[0], D_p[1], D_p[2], D_p[3]. First event is used to count number of components :param filename: filename :param treename: name of tree :return: list of strings """ import root_numpy import numpy result = [] data = root_numpy.root2array(filename, treename=treename, stop=1) for branch, value in data.dtype.fields.items(): if use_dtype: if value[0].name != 'object': result.append(branch) else: matrix = numpy.array(list(data[branch])) for index in range(matrix.shape[1]): result.append("{}[{}]".format(branch, index)) else: try: for index in range(len(data[branch][0])): result.append("{column}[{index}]".format(column=branch, index=index)) except TypeError: result.append(branch) return result
def main() : # Parse command line arguments parser = argparse.ArgumentParser(description='') parser.add_argument("-f", "--file_list", help="List of ROOT files to process.") parser.add_argument("-l", "--lumi", help="Luminosity") args = parser.parse_args() if not args.file_list: print 'A list of ROOT files to process needs to be specified.' sys.exit(2) # Open the file containing the list of files to process root_file_list = None try: root_file_list = open(args.file_list, 'r') except IOError: print "Unable to open file %s" % args.file_list sys.exit(2) root_files = [] for line in root_file_list: root_files.append(line.strip()) rec = rnp.root2array(root_files, 'results') apply_tri_selection(rec, args.lumi)
def datagen(sel, brs, infname, n_chunks=10): f = ROOT.TFile.Open(infname) entries = f.Get("multiclass_6j").GetEntries() f.Close() # Initialize step = entries/n_chunks i_start = 0 # Generate data forever while True: d = root_numpy.root2array(infname, treename="multiclass_6j", branches=brs, selection = sel, start=i_start, stop = i_start + step) i_start += step # roll over if i_start + step >= entries: i_start = 0 df = pandas.DataFrame(d) # Shuffle df = df.iloc[np.random.permutation(len(df))] yield df
def test_efficiency(functions, function_inputs, variables, inputfile, tree, selection): # Retrieve data from tree ninputs = len(function_inputs) branches = copy.deepcopy(function_inputs) branches.extend(variables) data = root2array(inputfile, treename=tree, branches=branches, selection=selection) data = data.view((np.float64, len(data.dtype.names))) inputs = data[:, range(ninputs)].astype(np.float32) # Compute efficiencies along each variable and for each function graphs = [] try: for ifu, function in enumerate(functions): for i,variable in enumerate(variables): xs = data[:, [ninputs+i]].astype(np.float32).ravel() graphs.append(efficiency_graph(pass_function=function,\ function_inputs=inputs,\ xs=xs)) graphs[-1].SetName('efficiency_{}_{}'.format(ifu,variable)) except TypeError: for i,variable in enumerate(variables): xs = data[:, [ninputs+i]].astype(np.float32).ravel() graphs.append(efficiency_graph(pass_function=functions,\ function_inputs=inputs,\ xs=xs)) graphs[-1].SetName('efficiency_'+variable) return graphs
def read_batch(path, treename, leaves, batch_size, each = 1, test_leaf = 0): event_batches = None need_another_batch = True batch_offset = 0 while need_another_batch: branches = get_index(leaves, np.arange(batch_size)[::each] + batch_offset) data_root = root_numpy.root2array(path, treename=treename, branches=branches, ) need_another_batch, events = split_by_events(data_root, leaves, batch_size / each, test_leaf = test_leaf) batch_offset += batch_size if event_batches is None: event_batches = [ [event] for event in events ] else: assert len(event_batches) == len(events) event_batches = [ batches + [batch] for batches, batch in zip(event_batches, events) ] return [ np.vstack(event) for event in event_batches ]
def read_geometry(filename, treename, subdet, layer, wafer=-1): # Read cells from one layer selection = "zside==1 && layer=={0} && subdet=={1}".format(layer,subdet) if wafer!=-1: selection += ' && wafer=={}'.format(wafer) branches = ['id', 'wafer', 'wafertype', 'cell', 'x', 'y'] cells = root2array(filename, treename=treename, branches=branches, selection=selection) # Create cell shapes output_cells = [] for cell in cells: vertices = cell_vertices(cell['x'], cell['y'], cell['wafertype'], cell['cell']) barycenter = Point((cell['x'],cell['y'])) output_cells.append(Cell( # id=int(cell['id']), id=int(compute_id(cell['wafer'], cell['cell'])), layer=layer, subdet=subdet, zside=1, module=int(cell['wafer']), center=barycenter, vertices=vertices )) return output_cells
def read_bh_geometry(filename, treename): # Read cells from one side selection = "zside==1 && subdet==2 && layer==1" branches = ['id', 'ieta', 'iphi', 'x', 'y'] for corner in xrange(1,5): branches.append('x{}'.format(corner)) branches.append('y{}'.format(corner)) cells = root2array(filename, treename=treename, branches=branches, selection=selection) # Create cell shapes output_cells = [] for cell in cells: vertices = Polygon([(cell['x1'],cell['y1']), (cell['x2'],cell['y2']), (cell['x3'],cell['y3']), (cell['x4'],cell['y4'])]) barycenter = Point((cell['x'],cell['y'])) output_cells.append(Cell( id=int(cell['id']), layer=1, subdet=5, zside=1, module=1, ieta=int(cell['ieta']), iphi=int(cell['iphi']), center=barycenter, vertices=vertices )) return output_cells
def fit(filename, treename, inputsname, targetname, workingpoint=0.9, test=False): # Reading inputs and targets ninputs = len(inputsname) branches = copy.deepcopy(inputsname) branches.append(targetname) data = root2array(filename, treename=treename, branches=branches) data = data.view((np.float64, len(data.dtype.names))) # Extract and format inputs and targets from numpy array inputs = data[:, range(ninputs)].astype(np.float32) targets = data[:, [ninputs]].astype(np.float32).ravel() # if test requested, use 60% of events for training and 40% for testing inputs_train = inputs targets_train = targets if test: inputs_train, inputs_test, targets_train, targets_test = cross_validation.train_test_split(inputs, targets, test_size=0.4, random_state=0) # Define and fit quantile regression (quantile = workingpoint) # Default training parameters are used regressor = GradientBoostingRegressor(loss='quantile', alpha=workingpoint) regressor.fit(inputs_train, targets_train) if test: # Compare regression prediction with the true value and count the fraction of time it falls below # This should give the working point value predict_test = regressor.predict(inputs_test) compare = np.less(targets_test, predict_test) print 'Testing regression with inputs', inputsname, 'and working point', workingpoint print ' Test efficiency =', float(list(compare).count(True))/float(len(compare)) # TODO: add 1D efficiency graphs vs input variables return regressor
def test_single_branch(): f = get_file('single1.root') tree = f.Get('tree') arr1_1d = rnp.tree2array(tree, branches='n_int') arr2_1d = rnp.root2array(load('single1.root'), branches='n_int') assert_equal(arr1_1d.dtype, np.dtype('<i4')) assert_equal(arr2_1d.dtype, np.dtype('<i4'))
def _DetData(self, args): ''' Loads the data for a detector specific value. ''' # If there are more than 2 arguments if len(args) > 2: warn('WARNING in ' + self.__name__ + ':\n\t' + self.__name__ + ' takes two arguments: detnum and cut. Ignoring additional' + ' arguments.', UserWarning) detnum = args[0] cut = args[1] # If there are just 2 arguments elif len(args) == 2: detnum = args[0] cut = args[1] # If there is one argument elif len(args) == 1: if self._Check_Detnum(args[0]): detnum = args[0] CAPy_globals.SetLastDetnum(detnum) cut = CAPy_globals.GetLastCut() elif self._Check_Cut(args[1]): cut = args[0] CAPy_globals.SetLastCut(cut) detnum = CAPy_globals.GetLastDetnum() else: warn('WARNING in ' + self.__name__ + ':\n\tArgument is' + ' neither a detnum nor a cut. Ignoring argument.', UserWarning) detnum = CAPy_globals.GetLastDetnum() cut = CAPy_globals.GetLastCut() # If there are no arguments: else: print 'No Arguments' detnum = CAPy_globals.GetLastDetnum() cut = CAPy_globals.GetLastCut() print detnum, cut # Now call data if detnum: files, dirName, treeName = CAPy_globals._FileInfo(self.__name__, 1) # Now call data m = root2array(files, dirName + '/' + treeName, [self.__name__]) # If cut, apply if cut: print "#TODO: Implement Cut" return m else: warn('WARNING in ' + self.__name__ + ':\n\t' + 'No detector' + ' given and none stored in globals. Returning nothing', UserWarning) return None
def test_array2root(): a = np.array([ (12345, 2., 2.1, True), (3, 4., 4.2, False),], dtype=[ ('x', np.int32), ('y', np.float32), ('z', np.float64), ('w', np.bool)]) with temp() as tmp: rnp.array2root(a, tmp.GetName(), mode='recreate') a_conv = rnp.root2array(tmp.GetName()) assert_array_equal(a, a_conv) # extend the tree rnp.array2root(a, tmp.GetName(), mode='update') a_conv2 = rnp.root2array(tmp.GetName()) assert_array_equal(np.hstack([a, a]), a_conv2)
def test_array2tree_charstar(): a = np.array([b'', b'a', b'ab', b'abc', b'xyz', b''], dtype=[('string', 'S3')]) with temp() as tmp: rnp.array2root(a, tmp.GetName(), mode='recreate') a_conv = rnp.root2array(tmp.GetName()) assert_array_equal(a, a_conv)
def test_chain(): chain = ROOT.TChain('tree') chain.Add(load('single1.root')) check_single(rnp.tree2array(chain)) f = load(['single1.root', 'single2.root']) a = rnp.root2array(f) check_single(a, 200)
def test_struct(): assert_array_equal(rnp.root2array(load('struct.root')), np.array([(10, 15.5, 20, 781.2)], dtype=[ ('branch1_intleaf', '<i4'), ('branch1_floatleaf', '<f4'), ('branch2_intleaf', '<i4'), ('branch2_floatleaf', '<f4')]))
def test_object_expression(): rec = rnp.root2array(load(['object1.root', 'object2.root']), branches=['vect.Pt()']) assert_array_equal( rec['vect.Pt()'], np.concatenate([ np.arange(10, dtype='d') + 1, np.arange(10, dtype='d') + 2]))
def read_inputs(config, setup): from ttH.TauRoast.processing import Process fn = os.path.join(config.get("indir", config["outdir"]), "ntuple.root") signal = None signal_weights = None for proc, weight in sum([cfg.items() for cfg in setup['signals']], []): for p in sum([Process.expand(proc)], []): logging.debug('reading {}'.format(p)) d = rec2array(root2array(fn, str(p), setup['variables'])) if isinstance(weight, float) or isinstance(weight, int): w = np.array([weight] * len(d)) else: w = rec2array(root2array(fn, str(p), [weight])).ravel() w *= p.cross_section / p.events if signal is not None: signal = np.concatenate((signal, d)) signal_weights = np.concatenate((signal_weights, w)) else: signal = d signal_weights = w background = None background_weights = None for proc, weight in sum([cfg.items() for cfg in setup['backgrounds']], []): for p in sum([Process.expand(proc)], []): logging.debug('reading {}'.format(p)) d = rec2array(root2array(fn, str(p), setup['variables'])) if isinstance(weight, float) or isinstance(weight, int): w = np.array([weight] * len(d)) else: w = rec2array(root2array(fn, str(p), [weight])).ravel() w *= p.cross_section / p.events if background is not None: background = np.concatenate((background, d)) background_weights = np.concatenate((background_weights, w)) else: background = d background_weights = w factor = np.sum(signal_weights) / np.sum(background_weights) logging.info("renormalizing background events by factor {}".format(factor)) background_weights *= factor return signal, signal_weights, background, background_weights
def main(): from root_numpy import root2array import sys import argparse import matplotlib.pyplot as plt import numpy as np parser = argparse.ArgumentParser( description='Plots output from blochSiegert.cpp') parser.add_argument("-f", "--file", type=str, help="Filename", required=True) parser.add_argument("-rf", "--ramseyFringe", type=int, help="rf___ branch to draw") args = parser.parse_args() filename = args.file print("Loading...", filename) try: phi = np.array(root2array(filename, branches="phi")[0]) gridMin = np.array(root2array(filename, branches="gridMin")[0]) polyMin = np.array(root2array(filename, branches="polyMin")[0]) params = np.array(root2array(filename, branches="params")[0]) except: sys.exit() if (args.ramseyFringe != None): try: branchname = "rf" + str(args.ramseyFringe) fringe = np.array(root2array(filename, branches=branchname)[0]) wRange = np.array(root2array(filename, branches="wRange")[0]) fig2 = plt.figure(branchname) ax2 = fig2.add_subplot(111) ax2.set(title=branchname) ax2.set(xlabel='w [rad/s]') ax2.set(ylabel='P(z)') ax2.plot(wRange, fringe) ax2.grid(True) except: print("Could not read branch ", branchname) if (params[3] == 1): print("Circular RF Ramsey fringe") else: print("Linear RF Ramsey fringe") print("{W0_VAL, PRECESS_TIME, PULSE_TIME}") print(params[0], " ", params[1], " ", params[2]) fig1 = plt.figure("blochSiegertShift") ax1 = fig1.add_subplot(111) ax1.plot(phi, params[0] - polyMin, label="Polynomial fit") ax1.plot(phi, params[0] - gridMin, label="Gridsearch") ax1.grid(True) ax1.set(title='Bloch Siergert shift for optimized Ramsey Fringes') ax1.set(xlabel='Initial phase angle [rad]') ax1.set(ylabel='Shift [rad/s]') ax1.legend() plt.show() return
def test_single_pattern_not_exist(): f = load(['single1.root', 'does_not_exist.root']) a = rnp.root2array(f)
def test_preserve_branch_order(): a = rnp.root2array(load('test.root')) assert_equal(a.dtype.names, ('i', 'x', 'y', 'z')) a = rnp.root2array(load('test.root'), branches=['y', 'x', 'z']) assert_equal(a.dtype.names, ('y', 'x', 'z'))
ar = root2rec('../test/test.root', 'tree') print ar.i print ar.f #ipython autocomplete columnname patch is available with this numpy patch #https://github.com/piti118/numpy/commit/a996292238ab98dcf53f2d48476d637eab9f1a72 ar.i[0] #ar[0].i won't work ar[0][0] # <codecell> ar.f[ar.i > 5] # <codecell> #root2array is available if you don't like recarray a = root2array('../test/test.root', 'tree') #this tree has two column i and integer and f as float a #you will see that a is a structure array # <codecell> #access whole column print a['i'] print a['f'] # <codecell> #access 0th record print a[0] #and the first record print a[1]
Ztob.SetPxPyPzE(Z.Px(),Z.Py(),Z.Pz(),Z.E()) Zboost = ROOT.TVector3() Zboost = Ztob.BoostVector() v = Zboost.Unit() eletron1.Boost(-Zboost) Htob = ROOT.TLorentzVector() Htob.SetPxPyPzE(H.Px(),H.Py(),H.Pz(),H.E()) Hboost = ROOT.TVector3() Hboost = Htob.BoostVector() ang = Hboost.Unit() bjato1.Boost(-Hboost) tree.Cos_Hb1 = np.cos(bjato1.Angle(ang)) tree.Cos_lZ = np.cos(eletron1.Angle(v)) tree.Fill() # Show resulting histograms #hist_PT_l1.Draw() #raw_input("Press Enter to continue...") tree.write() f.close() #create the csv output to_convert = root2array(root_name,'test') df_conv = pd.DataFrame(to_convert) df_conv.to_csv( csv_name + '.csv', index=False, header= df_conv.keys(), mode='w', sep=' ')
def optimize_background_rejection_vs_ieta(effs, isolations, signalfile, signaltree, backgroundfile, backgroundtree, inputnames=['abs(ieta)', 'ntt'], targetname='iso'): #ieta_binning = np.arange(0.5,28.5,1) ieta_binning = [0.5, 3.5, 6.5, 9.5, 13.5, 18.5, 22.5, 27.5] # Compute signal efficiencies ninputs = len(inputnames) branches = copy.deepcopy(inputnames) branches.append(targetname) data = root2array(signalfile, treename=signaltree, branches=branches, selection='et>10') data = data.view((np.float64, len(data.dtype.names))) inputs = data[:, range(ninputs)].astype(np.float32) targets = data[:, [ninputs]].astype(np.float32).ravel() xs = data[:, [0]].astype(np.float32).ravel() # signal_efficiencies is a 2D array # The first dimension corresponds to different ieta values # The second dimension corresponds to different working points signal_efficiencies = [ graph2array( efficiency.efficiency_graph( pass_function=(lambda x: np.less(x[1], iso.predict(x[0]))), function_inputs=(inputs, targets), xs=xs, bins=ieta_binning))[:, [1]].ravel() for iso in isolations ] signal_efficiencies = np.column_stack(signal_efficiencies) # Compute background efficiencies ninputs = len(inputnames) branches = copy.deepcopy(inputnames) branches.append(targetname) data = root2array(backgroundfile, treename=backgroundtree, branches=branches, selection='et>10') data = data.view((np.float64, len(data.dtype.names))) inputs = data[:, range(ninputs)].astype(np.float32) targets = data[:, [ninputs]].astype(np.float32).ravel() xs = data[:, [0]].astype(np.float32).ravel() # background_efficiencies is a 2D array # The first dimension corresponds to different ieta values # The second dimension corresponds to different working points background_efficiencies = [ graph2array( efficiency.efficiency_graph( pass_function=(lambda x: np.less(x[1], iso.predict(x[0]))), function_inputs=(inputs, targets), xs=xs, bins=ieta_binning))[:, [1]].ravel() for iso in isolations ] background_efficiencies = np.column_stack(background_efficiencies) signal_efficiencies_diff_graphs = [] background_efficiencies_diff_graphs = [] optimal_points_graphs = [] optimal_points = [] # compute best working point for each ieta for i, (signal_effs, background_effs) in enumerate( zip(signal_efficiencies, background_efficiencies)): signal_efficiencies_diff_graph, background_efficiencies_diff_graph, optimal_points_graph, optimal_point = find_best_working_point( effs, signal_effs, background_effs) signal_efficiencies_diff_graph.SetName( 'efficiencies_signal_ieta_{}'.format(i)) background_efficiencies_diff_graph.SetName( 'efficiencies_background_ieta_{}'.format(i)) optimal_points_graph.SetName( 'signal_background_optimal_points_ieta_{}'.format(i)) signal_efficiencies_diff_graphs.append(signal_efficiencies_diff_graph) background_efficiencies_diff_graphs.append( background_efficiencies_diff_graph) optimal_points_graphs.append(optimal_points_graph) optimal_points.append(optimal_point) return signal_efficiencies_diff_graphs, background_efficiencies_diff_graphs, optimal_points_graphs, optimal_points
epochs=2000 number_hidden_nodes=[20,15,1] number_layers=len(number_hidden_nodes) activations=['tanh','tanh','tanh'] batch_size=128 filenameBkg = 'hadded/uhh2.AnalysisModuleRunner.MC.TTbar.root' filenameSig = 'hadded/uhh2.AnalysisModuleRunner.MC.TstarTstar_M-Combined.root' split_train_test = 0.7 z_mean = 1.0 z_sigma = 0.1 reduced_training = True reduced_dimension = 10 ########################################################################## arrBkg = pandas.DataFrame(root2array(filenameBkg, treename='AnalysisTree',branches=branches_to_analyze)) arrSig = pandas.DataFrame(root2array(filenameSig, treename='AnalysisTree',branches=branches_to_analyze)) #save the numpy format if(save_numpy_format): outfileBkg = 'outBkg.npy' np.save(outfileBkg,arrBkg) #save as numpy outfileSig = 'outSig.npy' np.save(outfileSig,arrSig) #save as numpy #define train and test arrays for training msk = np.random.rand(len(arrBkg)) < split_train_test train_sample = arrBkg[msk] test_sample = arrBkg[~msk]
import numpy as np #1.11.0 from root_numpy import root2array, array2root, list_branches #4.6.0 #Concatenate different root files #list of the pathes to the different root files to concatenate path_sig = [ "/data/lhcb/marin/hhpi0gamma/radiativehhpi0RG_R16S28r1p1_MC_2.root", "/data/lhcb/marin/hhpi0gamma/radiativehhpi0RG_R16S28r1p1_MC.root", "/data/lhcb/marin/hhpi0gamma/radiativehhpi0RG_R16S28r1p1_MC_4.root", "/data/lhcb/marin/hhpi0gamma/radiativehhpi0RG_R16S28r1p1_MC_5.root", "/data/lhcb/marin/hhpi0gamma/radiativehhpi0RG_R16S28r1p1_MC_6.root", "/data/lhcb/marin/hhpi0gamma/radiativehhpi0RG_R16S28r1p1_MC_7.root" ] #path where to save the concatenated file path_concat = "/users/LHCb/corentin/radiative_dataset/data/concat_signal.root" data = [] for path in path_sig: data += [root2array(filenames=path)] print("import complete") signal = np.concatenate(data) print('concatenation complete') array2root(signal, path_concat, mode='recreate') print('export complete')
input1 = "../../../storage/cc14398/Co60-10s-15Mar/Co60-10s-PS1m.root" input2 = "../../../storage/cc14398/Cs137-10s-18Mar/PS1m.root" #input2="SavedData/Shielding/Isotropic/1Mar-PSonly-Co60-33MBq-100ms-3m-Bp.root" #input3="SavedData/Shielding/Isotropic/6Mar-Co60-37MBq-100ms-2m-Phantom.root" #input4="SavedData/Shielding/Isotropic/6Mar-Co60-37MBq-100ms-8m-Phantom.root" #input2="SavedData/Shielding/ParticleGun/5Mar-Co60-PartGun-Bp-1m.root" #input3="SavedData/Shielding/ParticleGun/5Mar-Co60-PartGun-Ph-1m.root" #input4="SavedData/Shielding/Isotropic/1Mar-PSonly-Co60-33MBq-100ms-8m-Bp.root" #input1="../../../storage/cc14398/Co60-10s-15Mar/Co60-10s-PS1m.root" #input2="../../../storage/cc14398/Co60-10s-15Mar/Co60-10s-PS3m.root" #input3="../../../storage/cc14398/Co60-10s-15Mar/Co60-10s-PS7m.root" #input4="../../../storage/cc14398/Co60-10s-15Mar/Co60-10s-PS8m.root" KE1 = root2array(input1, treename="PhaseSpace", branches="Ekine") KE2 = root2array(input2, treename="PhaseSpace", branches="Ekine") #KE3=root2array(input3,treename="PhaseSpace",branches="Ekine") #KE4=root2array(input4,treename="PhaseSpace",branches="Ekine") #dX=root2array(inputFile,treename="PhaseSpace", branches="dX") plt.hist(KE1, bins=20, histtype='step', label='Co 60') plt.hist(KE2, bins=10, histtype='step', label='Cs 137') #plt.hist2d(E_kinetic,dX) plt.xlabel('Kinetic Energy (MeV)') plt.ylabel('Counts') #fig, ax1 = plt.subplots() #ax1.hist([KE1,KE2,KE3], label=['1m','3m','7m'],bins=15,density=True)
os.mkdir(output) if isinstance(inputs, (list,)): treeName = list_trees(inputs[0]) else: treeName = list_trees(inputs) inputs = [inputs, ] if (len(treeName) > 1): print("more then one tree in file ... specify, which tree to use") exit() print(">>> Load Events in gen acceptance") dfGen = [tree_to_df(root2array(i, treeName[0], selection=selection, branches=branches), 1) for i in inputs] dfGen = pd.concat(dfGen) dfGen = dfGen.rename(columns={ 'Muon_ID[muon_genRecoObj]_0': 'muon_ID', 'Muon_ID[antiMuon_genRecoObj]_0': 'antiMuon_ID', 'Muon_triggerBits[muon_genRecoObj]_0': 'muon_triggerBits', 'Muon_triggerBits[antiMuon_genRecoObj]_0': 'antiMuon_triggerBits', 'Muon_tkRelIso[muon_genRecoObj]_0': 'muon_tkIso', 'Muon_tkRelIso[antiMuon_genRecoObj]_0': 'antiMuon_tkIso', 'Muon_pfRelIso04_all[muon_genRecoObj]_0': 'muon_pfIso', 'Muon_pfRelIso04_all[antiMuon_genRecoObj]_0': 'antiMuon_pfIso', 'Muon_eta[muon_genRecoObj]_0': 'muon_eta', 'Muon_eta[antiMuon_genRecoObj]_0': 'antiMuon_eta', 'Muon_pt[muon_genRecoObj]_0': 'muon_pt', 'Muon_pt[antiMuon_genRecoObj]_0': 'antiMuon_pt',
pars = [] xsec_list = [] tracks_per_evt = [] for path in file_path: name = os.path.basename(path[:-14]) print name out = re.search('_M[0-9]+_', name) out = out.group(0) stopmass = int(out[2:-1]) # if stopmass >300 : continue t = root2array(path) # branches: # ['Nev','tof_reco', 'PID', 'tof_gen', # 'P_reco', 'vtx_SumPT2', 'vtx_NDOF','vtx_SumPT', # 'Zout','Tout','pt','ctgtheta','phi','d0','dz', 'L', # 'sigma_pt', 'sigma_d0', 'sigma_dz', 'sigma_Tin', # 'M_reco', 'beta_reco' # ] c_pt = rt.TCanvas('c_' + name, 'c_' + name, 1600, 600) c_pt.Divide(3, 1) bsm_sel = np.logical_and( np.abs(t['PID']) >= 1000612, np.abs(t['PID']) <= 1093334) kin_sel = np.logical_and(
def main(args): ### Perpare data for processing # Ensure output directory exists out_path = args.data_dir + 'results/' if not os.path.exists(out_path): os.makedirs(out_path) ## Load files l_fit_vars = [ 'logDIRA', 'log_bplus_IPCHI2_OWNPV', 'bplus_LOKI_DTF_CHI2NDOF', 'log_bplus_FDCHI2_OWNPV', 'bplus_ETA', 'log_1_IPCHI2_OWNPV', 'log_2_IPCHI2_OWNPV', 'log_3_IPCHI2_OWNPV', 'log_4_IPCHI2_OWNPV', 'log_5_IPCHI2_OWNPV', 'mu_PT_max', 'mu_PT_min' ] l_mass_vars = ['scaledmass', 'mjpipi'] l_load_branches = l_fit_vars + l_mass_vars # Load files into arrays print('*** Loading Data ***') a_mc_x = root_numpy.root2array(args.data_dir + 'mc_x_proba.root', treename=args.tree_name, branches=l_load_branches) a_mc_p = root_numpy.root2array(args.data_dir + 'mc_p_proba.root', treename=args.tree_name, branches=l_load_branches) a_side = root_numpy.root2array(args.data_dir + 'side_proba.root', treename=args.tree_name, branches=l_load_branches) a_data = root_numpy.root2array(args.data_dir + 'data_proba.root', treename=args.tree_name, branches=l_load_branches) print('*** Processing Data ***') # Convert to DataFrames df_mc_x = pd.DataFrame(a_mc_x) df_mc_p = pd.DataFrame(a_mc_p) df_side = pd.DataFrame(a_side) df_data = pd.DataFrame(a_data) # Add categoriastion df_mc_x['cat'] = 'mc_x' df_mc_p['cat'] = 'mc_p' df_side['cat'] = 'side' # Add target df_mc_x['class'] = 1 df_mc_p['class'] = 1 df_side['class'] = 0 # Combine into training set df_train = pd.concat([df_mc_x, df_mc_p, df_side]) # Print summary stats print(' *** Data loaded ***') print(' *** Training events: %d ***' % (df_train.shape[0])) print(' *** Data events: %d ***' % (df_data.shape[0])) # Dictionaries for storing information on each run d_run_info = {} d_roc_plot = {} ### Estimate signal yield - all data d_sig_est_alldata = fit_doubleCB(pd.concat([df_mc_x, df_mc_p ])['scaledmass'].as_matrix(), df_data['scaledmass'].as_matrix(), out_path, s_info='alldata_signal_est') ### Estimate signal yield - X region s0 = None if args.find_s0: df_data_p = df_data[(df_data['mjpipi'] > 3676) & (df_data['mjpipi'] < 3696)] df_data_x = df_data[(df_data['mjpipi'] > 3862) & (df_data['mjpipi'] < 3882)] d_sig_est_p = fit_doubleCB( df_mc_p['scaledmass'].as_matrix(), df_data_p['scaledmass'].as_matrix(), out_path, s_info='psi(2S)_s0_est', ) d_sig_est_x = fit_doubleCB( df_mc_x['scaledmass'].as_matrix(), df_data_x['scaledmass'].as_matrix(), out_path, s_info='x(3872)_s0_est', ) print("*** Expected psi(2S) signal yield: %d ***" % (d_sig_est_p['data_sig_yield'])) print("*** Expected X(3872) signal yield: %d ***" % (d_sig_est_x['data_sig_yield'])) print("*** Expected X(3823) signal yield: %d ***" % (float(d_sig_est_x['data_sig_yield']) / 20.)) s0 = float(d_sig_est_x['data_sig_yield']) / 20. d_run_info['sig_est_x_reg'] = d_sig_est_x print('*** Performing run %s ***' % (run)) d_run_info[run] = {} d_roc_plot[run] = {} out_path_plots = out_path + 'plots/' if not os.path.exists(out_path_plots): os.makedirs(out_path_plots) if args.opt_cut is None: ### Find optimal cut print(' *** Determining optimal cut ***') # Optimise the probability cut sig_effs_mcp = [] # record signal efficiencies - on MC psi(2S) only sig_effs_mcx = [] # record signal efficiencies - on MC X(3823) only sig_effs_all = [] # record signal efficiencies bgr_rejs = [] # record background rejections cut_scores = [] # record cut optimisation metric # Determine cut metric for a range of cuts cuts = np.linspace(.0, 1., 200, endpoint=False) for prob_threshold in cuts: # Determine how many predictions are correct signal_efficiency_mcx = float( df_train[(df_train['prob_' + run] > prob_threshold) & (df_train['cat'] == 'mc_x')].shape[0]) / float( df_train[df_train['cat'] == 'mc_x'].shape[0]) signal_efficiency_mcp = float( df_train[(df_train['prob_' + run] > prob_threshold) & (df_train['cat'] == 'mc_p')].shape[0]) / float( df_train[df_train['cat'] == 'mc_p'].shape[0]) signal_efficiency_all = float( df_train[(df_train['prob_' + run] > prob_threshold) & (df_train['class'] == 1)].shape[0]) / float( df_train[df_train['class'] == 1].shape[0]) background_rejection = float( df_train[(df_train['prob_' + run] > prob_threshold) & (df_train['class'] == 0)].shape[0]) / float( df_train[df_train['class'] == 0].shape[0]) # Store scores sig_effs_all.append(signal_efficiency_all) sig_effs_mcp.append(signal_efficiency_mcp) sig_effs_mcx.append(signal_efficiency_mcx) bgr_rejs.append(background_rejection) # Optimize cut eff = signal_efficiency_all a = 5. # expected significance # Background events, scaled to 40MeV window about B peak, considering only those in X(3823) region B = df_train[((df_train['prob_' + run] > prob_threshold)) & ( (df_train['scaledmass'] > 5400.) & (df_train['scaledmass'] < 5450.)) & ((df_train['mjpipi'] > 3773) & (df_train['mjpipi'] < 3873))].shape[0] * .8 if s0 is not None: cut_scores.append((s0 * eff) / sqrt((s0 * eff) + B)) else: cut_scores.append(eff / ((a / 2) + sqrt(B))) # Find optimal cut if args.bck_cut: # Hard cut at 99% background rejection cut_index = np.argmax(np.array(bgr_rejs) > .99) print("Background used: {:.3f}".format(bgr_rejs[cut_index])) prob_threshold = cuts[cut_index] else: # Base on cut optimisation metric cut_index = np.argmax(cut_scores) prob_threshold = cuts[cut_index] ### Store some parameters of interest d_run_info[run]['optimal_cut'] = np.asscalar(prob_threshold) d_run_info[run]['all_signal_efficiency'] = sig_effs_all[cut_index] d_run_info[run]['mcj_signal_efficiency'] = sig_effs_mcp[cut_index] d_run_info[run]['mcx_signal_efficiency'] = sig_effs_mcx[cut_index] d_roc_plot[run]['sig_effs'] = sig_effs_all d_roc_plot[run]['bgr_rejs'] = bgr_rejs ### Print some summary stats print(' *** Optimal cut: %1.2f ***' % (prob_threshold)) print(' *** Signal efficiency: %1.2f ***' % (d_run_info[run]['all_signal_efficiency'])) print(' *** MCJ Signal efficiency: %1.2f ***' % (d_run_info[run]['mcj_signal_efficiency'])) print(' *** MCX Signal efficiency: %1.2f ***' % (d_run_info[run]['mcx_signal_efficiency'])) else: prob_threshold = float(args.opt_cut) ### Apply model to data df_data['class'] = df_data['prob_' + run] > prob_threshold ### Plot cut optimisation print(' *** Plotting cut optimisation ***') fig = plt.figure() plt.plot(cuts, cut_scores) plt.ylabel("Cut Score") plt.xlabel("Probability Threshold") plt.xlim(0., 1.) plt.title("Cut Score " + run) plt.tight_layout(pad=2.0) fig.savefig(out_path_plots + 'cut_score.pdf') plt.close() ### Plot mass histogram for optimal cut print(' *** Plotting Mass histograms ***') # Initialise canvas c_name = 'B_Mass_Distribution ' + run c = ROOT.TCanvas(c_name, c_name, 600, 400) c.cd() # Select required quantity a_raw = df_data['scaledmass'].as_matrix() a_cut = df_data[df_data['class'] == 1]['scaledmass'].as_matrix() # Create and format histograms h_raw = ROOT.TH1F( c_name + '_No_Cut', c_name + '_No_Cut;B Mass [MeV/#it{c}^{2}];candidates/18[MeV/#it{c}^{2}]', 100, 5220., 5400.) h_cut = ROOT.TH1F( c_name + '_XGB_Cut', c_name + '_XGB_Cut;B Mass [MeV/#it{c}^{2}];candidates/18[MeV/#it{c}^{2}]', 100, 5220., 5400.) # Fill histograms map(h_raw.Fill, a_raw) map(h_cut.Fill, a_cut) # Normalise ## Make it pretty h_raw.SetTitle('B Mass Distribution ' + run) # Format for each case of x-axis h_raw.GetYaxis().SetTitleOffset(1.6) y_max = 1.1 * max(h_raw.GetBinContent(h_raw.GetMaximumBin()), h_cut.GetBinContent(h_cut.GetMaximumBin())) y_min = 0.9 * min(h_raw.GetBinContent(h_raw.GetMinimumBin()), h_cut.GetBinContent(h_cut.GetMinimumBin())) h_raw.GetYaxis().SetRangeUser(y_min, y_max) # Format plotting style h_raw.SetLineColor(ROOT.kRed) h_raw.SetFillColorAlpha(ROOT.kRed - 10, 0.7) h_cut.SetLineColor(ROOT.kBlue) h_cut.SetFillColorAlpha(ROOT.kBlue - 10, 0.7) # Remove stats boxes h_raw.SetStats(False) h_cut.SetStats(False) # Print h_raw.Draw('HIST') h_cut.Draw('HISTsame') # Create legend leg = ROOT.TLegend(0.6, 0.75, 0.9, 0.9) leg.AddEntry(h_raw, 'Uncut Data', 'L') leg.AddEntry(h_cut, 'XGBoost cut: {:.3f}'.format(prob_threshold), 'L') leg.SetLineColor(0) leg.SetLineStyle(0) leg.SetFillStyle(0) leg.SetBorderSize(0) leg.Draw('same') # Save c.SaveAs(out_path_plots + 'Mass_histogram_B_XGBcut.pdf') ### BDT answer print(' *** Plotting classification probabilities ***') # Initialise canvas c_name = 'BDT_Predicted_Probability ' + run c = ROOT.TCanvas(c_name, c_name, 600, 400) c.cd() # Select required quantity a_train_sig_prob = df_train['prob_' + run][df_train['class'] == 1].as_matrix() a_train_bkg_prob = df_train['prob_' + run][df_train['class'] == 0].as_matrix() a_data_prob = df_data['prob_' + run].as_matrix() # Create and format histograms h_train_sig_prob = ROOT.TH1F(c_name + '_Sig_Prob', c_name + '_Sig_Prob;Probability;Candidates', 100, 0., 1.) h_train_bkg_prob = ROOT.TH1F(c_name + '_Bkg_Prob', c_name + '_Bkg_Prob;Probability;Candidates', 100, 0., 1.) h_data_prob = ROOT.TH1F(c_name + '_Data_Prob', c_name + '_Data_Prob;Probability;Candidates', 100, 0., 1.) # Fill histograms map(h_train_sig_prob.Fill, a_train_sig_prob) map(h_train_bkg_prob.Fill, a_train_bkg_prob) map(h_data_prob.Fill, a_data_prob) # Normalise h_train_sig_prob.Scale(1. / h_train_sig_prob.Integral()) h_train_bkg_prob.Scale(1. / h_train_bkg_prob.Integral()) h_data_prob.Scale(1. / h_data_prob.Integral()) ## Make it pretty h_train_sig_prob.SetTitle('Event Probability Distribution ' + run) # Format for each case of x-axis h_train_sig_prob.GetYaxis().SetTitleOffset(1.6) y_max = 1.1 * max( max(h_train_sig_prob.GetBinContent(h_train_sig_prob.GetMaximumBin()), h_train_bkg_prob.GetBinContent(h_train_bkg_prob.GetMaximumBin())), h_data_prob.GetBinContent(h_data_prob.GetMaximumBin())) y_min = 0.9 * min( min(h_train_sig_prob.GetBinContent(h_train_sig_prob.GetMinimumBin()), h_train_bkg_prob.GetBinContent(h_train_bkg_prob.GetMinimumBin())), h_data_prob.GetBinContent(h_data_prob.GetMinimumBin())) h_train_sig_prob.GetYaxis().SetRangeUser(y_min, y_max) # Format plotting style h_train_sig_prob.SetLineColor(ROOT.kRed) h_train_sig_prob.SetFillColorAlpha(ROOT.kRed - 10, 0.7) h_train_bkg_prob.SetLineColor(ROOT.kBlue) h_train_bkg_prob.SetFillColorAlpha(ROOT.kBlue - 10, 0.7) h_data_prob.SetLineColor(ROOT.kGreen) h_data_prob.SetFillColorAlpha(ROOT.kGreen - 10, 0.7) # Remove stats boxes h_train_sig_prob.SetStats(False) h_train_bkg_prob.SetStats(False) h_data_prob.SetStats(False) # Print h_train_sig_prob.Draw('HIST') h_train_bkg_prob.Draw('HISTsame') h_data_prob.Draw('HISTsame') # Create legend leg = ROOT.TLegend(0.6, 0.75, 0.9, 0.9) leg.AddEntry(h_train_sig_prob, 'Training Signal Events', 'L') leg.AddEntry(h_train_bkg_prob, 'Training Background Events', 'L') leg.AddEntry(h_data_prob, 'Data Events', 'L') leg.SetLineColor(0) leg.SetLineStyle(0) leg.SetFillStyle(0) leg.SetBorderSize(0) leg.Draw('same') # Save c.SaveAs(out_path_plots + 'BDT_answer.pdf') ### Plot variable distributions print(' *** Plotting training variable distributions ***') out_path_var = out_path_plots + 'dist_vars/' if not os.path.exists(out_path_var): os.makedirs(out_path_var) for var in D_CONFIGS[run]['fit_vars'] + ['bplus_PT'] + ['prob_' + run]: # Initialise canvas c_name = var + '_Distribution_' + run c = ROOT.TCanvas(c_name, c_name, 600, 400) c.cd() # Select required quantity a_plt_sig = df_train[var][df_train['prob_' + run] >= prob_threshold].as_matrix() a_plt_bkg = df_train[var][df_train['prob_' + run] < prob_threshold].as_matrix() # Scale DIRA and IPCHI2 i_str = '' if (var == 'bplus_DIRA_OWNPV'): a_plt_sig = np.arccos(a_plt_sig) a_plt_bkg = np.arccos(a_plt_bkg) i_str = 'arccos ' if ('CHI2' in var): a_plt_sig = np.log(a_plt_sig) a_plt_bkg = np.log(a_plt_bkg) i_str = 'log ' # Create and format histograms x_max = max(max(a_plt_sig), max(a_plt_bkg)) x_min = min(min(a_plt_sig), min(a_plt_bkg)) h_plt_sig = ROOT.TH1F(c_name + '_Sig', c_name + '_Sig;' + i_str + var + ';candidates', 100, x_min, x_max) h_plt_bkg = ROOT.TH1F(c_name + '_Bkg', c_name + '_Bkg;' + i_str + var + ';candidates', 100, x_min, x_max) # Fill histograms map(h_plt_sig.Fill, a_plt_sig) map(h_plt_bkg.Fill, a_plt_bkg) ## Make it pretty h_plt_sig.SetTitle(var + ' Distribution ' + run) # Format for each case of x-axis h_plt_sig.GetYaxis().SetTitleOffset(1.6) y_max = 1.1 * max(h_plt_sig.GetBinContent(h_plt_sig.GetMaximumBin()), h_plt_bkg.GetBinContent(h_plt_bkg.GetMaximumBin())) h_plt_sig.GetYaxis().SetRangeUser(0, y_max) h_plt_sig.GetXaxis().SetRangeUser(x_min, x_max) # Format plotting style h_plt_sig.SetLineColor(ROOT.kRed) h_plt_sig.SetFillColorAlpha(ROOT.kRed - 10, 0.7) h_plt_bkg.SetLineColor(ROOT.kBlue) h_plt_bkg.SetFillColorAlpha(ROOT.kBlue - 10, 0.7) # Remove stats boxes h_plt_sig.SetStats(False) h_plt_bkg.SetStats(False) # Print h_plt_sig.Draw('HIST') h_plt_bkg.Draw('HISTsame') # Create legend leg = ROOT.TLegend(0.6, 0.75, 0.9, 0.9) leg.AddEntry(h_plt_sig, 'Training Events Identified as Signal', 'L') leg.AddEntry(h_plt_bkg, 'Training Events Identified as Background', 'L') leg.SetLineColor(0) leg.SetLineStyle(0) leg.SetFillStyle(0) leg.SetBorderSize(0) leg.Draw('same') # Save c.SaveAs(out_path_var + var + '.pdf') ### Plot comparison to MC data out_path_mcp_data = out_path_plots + 'mcp_v_data/' if not os.path.exists(out_path_mcp_data): os.makedirs(out_path_mcp_data) print(' *** Plotting comparison to psi(2S) MC ***') df_data_comp = df_data[( (df_data['mjpipi'] < 3696) & (df_data['mjpipi'] > 3676)) & ((df_data['scaledmass'] < 5299) & (df_data['scaledmass'] > 5259))] df_side_comp = df_side[(df_side['mjpipi'] < 3696) & (df_side['mjpipi'] > 3676)] for var in D_CONFIGS[run]['fit_vars'] + ['bplus_PT'] + ['prob_' + run]: # Initialise canvas c_name = var + '_MC_#psi(2S)_Comparison_' + run c = ROOT.TCanvas(c_name, c_name, 600, 400) c.cd() # Select required quantity a_mc_p = df_mc_p[var].as_matrix() a_data_comp = df_data_comp[var].as_matrix() a_side_comp = df_side_comp[var].as_matrix() # Scale DIRA and IPCHI2 i_str = '' if (var == 'bplus_DIRA_OWNPV'): a_mc_p = np.arccos(a_mc_p) a_data_comp = np.arccos(a_data_comp) a_side_comp = np.arccos(a_side_comp) i_str = 'arccos ' #if ('CHI2' in var): # a_mc_p = np.log(a_mc_p) # a_data_comp = np.log(a_data_comp) # a_side_comp = np.log(a_side_comp) # i_str = 'log ' # Create and format histograms x_max = max(max(a_mc_p), max(a_data_comp)) x_min = min(min(a_mc_p), min(a_data_comp)) h_mc_p = ROOT.TH1F( c_name + '_mc_#psi', c_name + '_mc_#psi;' + i_str + var + ';candidates', 100, x_min, x_max) h_comp = ROOT.TH1F(c_name + '_data', c_name + '_data;' + i_str + var + ';candidates', 100, x_min, x_max) h_side = ROOT.TH1F(c_name + '_side', c_name + '_side;' + i_str + var + ';candidates', 100, x_min, x_max) # Fill histograms map(h_mc_p.Fill, a_mc_p) map(h_comp.Fill, a_data_comp) map(h_side.Fill, a_side_comp) # Background reduce h_comp.Add(h_side, -1) # Normalise h_mc_p.Scale(1. / h_mc_p.Integral()) h_comp.Scale(1. / h_comp.Integral()) h_side.Scale(1. / h_side.Integral()) ## Make it pretty h_mc_p.SetTitle(var + ' Data vs MC J(2S) Distribution ' + run) # Format for each case of x-axis h_mc_p.GetYaxis().SetTitleOffset(1.6) y_max = 1.1 * max((h_mc_p.GetBinContent(h_mc_p.GetMaximumBin()), h_comp.GetBinContent(h_comp.GetMaximumBin()), h_side.GetBinContent(h_side.GetMaximumBin()))) y_min = 0.9 * min((h_mc_p.GetBinContent(h_mc_p.GetMinimumBin()), h_comp.GetBinContent(h_comp.GetMinimumBin()), h_side.GetBinContent(h_side.GetMinimumBin()))) h_mc_p.GetYaxis().SetRangeUser(y_min, y_max) # Format plotting style h_mc_p.SetLineColor(ROOT.kRed) h_mc_p.SetFillColorAlpha(ROOT.kRed - 10, 0.7) h_comp.SetLineColor(ROOT.kBlue) h_comp.SetFillColorAlpha(ROOT.kBlue - 10, 0.7) h_side.SetLineColor(ROOT.kGreen) h_side.SetFillColorAlpha(ROOT.kGreen - 10, 0.7) # Remove stats boxes h_mc_p.SetStats(False) h_comp.SetStats(False) h_side.SetStats(False) # Print h_mc_p.Draw('HIST') h_comp.Draw('HISTsame') h_side.Draw('HISTsame') # Create legend leg = ROOT.TLegend(0.6, 0.75, 0.9, 0.9) leg.AddEntry(h_mc_p, '#psi(2S) Monte-Carlo', 'L') leg.AddEntry(h_comp, 'Background Reduced Data in #psi(2S) Region', 'L') leg.AddEntry(h_side, 'Background Data in #psi(2S) Region', 'L') leg.SetLineColor(0) leg.SetLineStyle(0) leg.SetFillStyle(0) leg.SetBorderSize(0) leg.Draw('same') # Save c.SaveAs(out_path_mcp_data + var + '.pdf') ### Plot comparison to MC data out_path_mcx_data = out_path_plots + 'mcx_v_data/' if not os.path.exists(out_path_mcx_data): os.makedirs(out_path_mcx_data) print(' *** Plotting comparison to X(3872) MC ***') df_data_comp = df_data[( (df_data['mjpipi'] < 3882) & (df_data['mjpipi'] > 3862)) & ((df_data['scaledmass'] < 5299) & (df_data['scaledmass'] > 5259))] df_side_comp = df_side[(df_side['mjpipi'] < 3882) & (df_side['mjpipi'] > 3862)] for var in D_CONFIGS[run]['fit_vars'] + ['bplus_PT'] + ['prob_' + run]: # Initialise canvas c_name = var + '_MC_X(3872)_Comparison_' + run c = ROOT.TCanvas(c_name, c_name, 600, 400) c.cd() # Select required quantity a_mc_p = df_mc_x[var].as_matrix() a_data_comp = df_data_comp[var].as_matrix() a_side_comp = df_side_comp[var].as_matrix() # Scale DIRA and IPCHI2 i_str = '' if (var == 'bplus_DIRA_OWNPV'): a_mc_p = np.arccos(a_mc_p) a_data_comp = np.arccos(a_data_comp) a_side_comp = np.arccos(a_side_comp) i_str = 'arccos ' if ('CHI2' in var): a_mc_p = np.log(a_mc_p) a_data_comp = np.log(a_data_comp) a_side_comp = np.log(a_side_comp) i_str = 'log ' # Create and format histograms x_max = max(max(a_mc_p), max(a_data_comp)) x_min = min(min(a_mc_p), min(a_data_comp)) h_mc_p = ROOT.TH1F( c_name + '_mc_#psi', c_name + '_mc_#psi;' + i_str + var + ';candidates', 100, x_min, x_max) h_comp = ROOT.TH1F(c_name + '_data', c_name + '_data;' + i_str + var + ';candidates', 100, x_min, x_max) h_side = ROOT.TH1F(c_name + '_side', c_name + '_side;' + i_str + var + ';candidates', 100, x_min, x_max) # Fill histograms map(h_mc_p.Fill, a_mc_p) map(h_comp.Fill, a_data_comp) map(h_side.Fill, a_side_comp) # Background reduce h_comp.Add(h_side, -1) # Normalise h_mc_p.Scale(1. / h_mc_p.Integral()) h_comp.Scale(1. / h_comp.Integral()) h_side.Scale(1. / h_side.Integral()) ## Make it pretty h_mc_p.SetTitle(var + ' Data vs MC X(3872) Distribution ' + run) # Format for each case of x-axis h_mc_p.GetYaxis().SetTitleOffset(1.6) y_max = 1.1 * max((h_mc_p.GetBinContent(h_mc_p.GetMaximumBin()), h_comp.GetBinContent(h_comp.GetMaximumBin()), h_side.GetBinContent(h_side.GetMaximumBin()))) y_min = 0.9 * min((h_mc_p.GetBinContent(h_mc_p.GetMinimumBin()), h_comp.GetBinContent(h_comp.GetMinimumBin()), h_side.GetBinContent(h_side.GetMinimumBin()))) h_mc_p.GetYaxis().SetRangeUser(y_min, y_max) # Format plotting style h_mc_p.SetLineColor(ROOT.kRed) h_mc_p.SetFillColorAlpha(ROOT.kRed - 10, 0.7) h_comp.SetLineColor(ROOT.kBlue) h_comp.SetFillColorAlpha(ROOT.kBlue - 10, 0.7) h_side.SetLineColor(ROOT.kGreen) h_side.SetFillColorAlpha(ROOT.kGreen - 10, 0.7) # Remove stats boxes h_mc_p.SetStats(False) h_comp.SetStats(False) h_side.SetStats(False) # Print h_mc_p.Draw('HIST') h_comp.Draw('HISTsame') h_side.Draw('HISTsame') # Create legend leg = ROOT.TLegend(0.6, 0.75, 0.9, 0.9) leg.AddEntry(h_mc_p, 'X(3872) Monte-Carlo', 'L') leg.AddEntry(h_comp, 'Background Reduced Data in X(3872) Region', 'L') leg.AddEntry(h_side, 'Background Data in X(3872) Region', 'L') leg.SetLineColor(0) leg.SetLineStyle(0) leg.SetFillStyle(0) leg.SetBorderSize(0) leg.Draw('same') # Save c.SaveAs(out_path_mcx_data + var + '.pdf') ### Plot comparison of MC data out_path_mc_mc = out_path_plots + 'mcp_v_mcx/' if not os.path.exists(out_path_mc_mc): os.makedirs(out_path_mc_mc) print(' *** Plotting comparison of psi(2S) MC and X(3972) MC ***') for var in D_CONFIGS[run]['fit_vars'] + ['bplus_PT'] + ['prob_' + run]: # Initialise canvas c_name = var + '_MC_Comparison_' + run c = ROOT.TCanvas(c_name, c_name, 600, 400) c.cd() # Select required quantity a_mc_p = df_mc_p[var].as_matrix() a_mc_x = df_mc_x[var].as_matrix() # Scale DIRA and IPCHI2 i_str = '' if (var == 'bplus_DIRA_OWNPV'): a_mc_p = np.arccos(a_mc_p) a_mc_x = np.arccos(a_mc_x) i_str = 'arccos ' if ('CHI2' in var): a_mc_p = np.log(a_mc_p) a_mc_x = np.log(a_mc_x) i_str = 'log ' # Create and format histograms x_max = max(max(a_mc_p), max(a_mc_x)) x_min = min(min(a_mc_p), min(a_mc_x)) h_mc_p = ROOT.TH1F( c_name + '_mc_#psi', c_name + '_mc_#psi;' + i_str + var + ';candidates', 100, x_min, x_max) h_mc_x = ROOT.TH1F(c_name + '_mc_X', c_name + '_mc_X;' + i_str + var + ';candidates', 100, x_min, x_max) # Fill histograms map(h_mc_p.Fill, a_mc_p) map(h_mc_x.Fill, a_mc_x) # Normalise h_mc_p.Scale(1. / h_mc_p.Integral()) h_mc_x.Scale(1. / h_mc_x.Integral()) ## Make it pretty h_mc_p.SetTitle(var + ' MC X(3872) vs MC #psi(2S) Distribution ' + run) # Format for each case of x-axis h_mc_p.GetYaxis().SetTitleOffset(1.6) y_max = 1.1 * max(h_mc_p.GetBinContent(h_mc_p.GetMaximumBin()), h_mc_x.GetBinContent(h_mc_x.GetMaximumBin())) y_min = 0.9 * min(h_mc_p.GetBinContent(h_mc_p.GetMinimumBin()), h_mc_x.GetBinContent(h_mc_x.GetMinimumBin())) h_mc_p.GetYaxis().SetRangeUser(y_min, y_max) # Format plotting style h_mc_p.SetLineColor(ROOT.kRed) h_mc_p.SetFillColorAlpha(ROOT.kRed - 10, 0.7) h_mc_x.SetLineColor(ROOT.kBlue) h_mc_x.SetFillColorAlpha(ROOT.kBlue - 10, 0.7) # Remove stats boxes h_mc_p.SetStats(False) h_mc_x.SetStats(False) # Print h_mc_p.Draw('HIST') h_mc_x.Draw('HISTsame') # Create legend leg = ROOT.TLegend(0.6, 0.75, 0.9, 0.9) leg.AddEntry(h_mc_p, '#psi(2S) Monte-Carlo', 'L') leg.AddEntry(h_mc_x, 'X(3872) Monte-Carlo', 'L') leg.SetLineColor(0) leg.SetLineStyle(0) leg.SetFillStyle(0) leg.SetBorderSize(0) leg.Draw('same') # Save c.SaveAs(out_path_mc_mc + var + '.pdf') ## Perform fit to XGB cut data # Filter dataframes a_cut_mc = df_train[df_train['class'] == 1]['scaledmass'].as_matrix() a_cut_data = df_data[df_data['class'] == 1]['scaledmass'].as_matrix() # Fit d_cut_fit = fit_doubleCB(a_cut_mc, a_cut_data, out_path_plots, s_info='cut_data_plot') # Store params d_run_info[run]['cut_fit_params'] = d_cut_fit # Fit in X region only a_mc_x = df_train[df_train['cat'] == 'mc_x']['scaledmass'].as_matrix() a_data_x = df_data[(df_data['class'] == 1) & ( (df_data['mjpipi'] > 3862) & (df_data['mjpipi'] < 3882))]['scaledmass'].as_matrix() d_sig_est = fit_doubleCB(a_mc_x, a_data_x, out_path_plots, s_info='x_signal_yield_est') d_run_info[run]['x_reg_fit_params'] = d_sig_est print('*** Estimated fitted signal efficiency: {:.3f} ***'.format( float(d_cut_fit['data_sig_yield']) / d_sig_est_alldata['data_sig_yield'])) print('*** Plotting ROC curve ***') ### Plot ROC curve fig = plt.figure() for run in list(D_CONFIGS.keys()): plt.plot(d_roc_plot[run]['bgr_rejs'], d_roc_plot[run]['sig_effs'], label=run) plt.legend(loc=3) plt.ylabel("Background Rejection") plt.xlabel("Signal Efficiency") plt.xlim(0., 1.) plt.ylim(0., 1.) plt.title("ROC Curve") plt.tight_layout(pad=2.0) fig.savefig(out_path + 'ROC_curve.pdf') plt.close() print('*** Dumping run information ***') with open(out_path + args.out_dict, 'w') as outfile: yaml.dump(d_run_info, outfile, default_flow_style=False) with open(out_path + 'roc_plot.yml', 'w') as outfile: yaml.dump(d_roc_plot, outfile, default_flow_style=False)
import pandas import numpy from hep_ml import reweight from sklearn.cross_validation import train_test_split from utils.plot import draw_distributions from utils.stats import print_statistics ############### # Import data # ############### columns = ['hSPD', 'pt_b', 'pt_phi', 'vchi2_b', 'mu_pt_sum'] original = root_numpy.root2array('MC_distribution.root', branches=columns) original = pandas.DataFrame(original) target = root_numpy.root2array('RD_distribution.root', branches=columns) target = pandas.DataFrame(target) original_weights = numpy.ones(len(original)) ################################## # Prepare train and test samples # ################################## # Divide original samples into training ant test parts original_train, original_test = train_test_split(original) # Divide target samples into training ant test parts target_train, target_test = train_test_split(target)
#filename = testdata.get_filepath('Nue_LowE.root') #arr = root2array(filename, 'MCNeutrinoAna/pot_tree') from root_numpy import root2array, tree2array from root_numpy import testdata filename = testdata.get_filepath('test.root') # Convert a TTree in a ROOT file into a NumPy structured array arr = root2array(filename, 'tree') # The TTree name is always optional if there is only one TTree in the file # Or first get the TTree from the ROOT file import ROOT rfile = ROOT.TFile(filename) intree = rfile.Get('tree') # and convert the TTree into an array array = tree2array(intree) print array raw_input()
parser.add_argument('-a', '--alpha', type=str, default='0.01') parser.add_argument('-g', '--gamma', type=str, default='0.01') parser.add_argument('-s', '--step', type=int, default='20') parser.add_argument('-o', '--offset', type=int, default='0') args = parser.parse_args() # specified parameters apar = args.alpha gpar = args.gamma scaledown = args.step offset = args.offset # retrieve training data and official reco hadronic energy for comparison X = root2array('../training_data.root', branches='calehad', selection='mustopz<1275&&isnumucc==1', step=scaledown, start=offset) recoemu_official = root2array('../training_data.root', branches='recoemu', selection='mustopz<1275&&isnumucc==1', step=scaledown, start=offset) trueenu = root2array('../training_data.root', branches='trueenu', selection='mustopz<1275&&isnumucc==1', step=scaledown, start=offset) y = trueenu - recoemu_official yoff = root2array('../training_data.root', branches='recoehad',
import os, sys import ROOT import numpy as np import root_numpy as rn import pandas as pd from array import array FILE = str(sys.argv[1]) print "GOT %s" % FILE NAME = str(os.path.basename(FILE).split(".")[0]) print "NAME %s" % NAME df = pd.DataFrame(rn.root2array( FILE, treename="analysistree/pottree"))[['run', 'subrun', 'pot']] df['pot_fname'] = NAME FOUT = "pot_%s.root" % NAME tf = ROOT.TFile.Open(FOUT, "RECREATE") print "OPEN %s" % FOUT tf.cd() run = array('i', [0]) subrun = array('i', [0]) pot = array('d', [0]) fname = ROOT.std.string() tree = ROOT.TTree("pot_tree", "") tree.Branch("run", run, "run/I") tree.Branch("subrun", subrun, "subrun/I") tree.Branch("pot", pot, "pot/D") tree.Branch("pot_fname", fname)
def write_h5_v4(folder, output_folder, file_name, xs, LUMI, counter, cols, tree_name="", counter_hist="", sel_cut="", obj_sel_cut="", verbose=True): print(" Opening ", folder) print("\n") if verbose: print("\n") #print(" Initialized df for sample: ", file_name) print(" Initialized df for sample: ", file_name) #print(cols) # loop over files, called file_name oldFile = TFile(folder + file_name, "READ") if (oldFile.GetListOfKeys().Contains(counter_hist) == False): return #counter = oldFile.Get(counter_hist)#).GetBinContent(1) #nevents_gen = counter.GetBinContent(1) nevents_gen = counter print(" n events gen.: ", nevents_gen) if (nevents_gen == 0): return print(" empty root file! ") oldTree = oldFile.Get(tree_name) nevents_tot = oldTree.GetEntries() #?#-1 #tree_weight = oldTree.GetWeight() tree_weight = LUMI * xs / nevents_gen print(" Tree weight: ", tree_weight) if verbose: print(" Reading n. events in tree: ", nevents_tot) #print("\n") if nevents_tot <= 0: print(" Empty tree!!! ") return # First loop: check how many events are passing selections count = rnp.root2array(folder + file_name, selection=sel_cut, object_selection=obj_sel_cut, treename=tree_name, branches=["EventNumber"], start=0, stop=nevents_tot) nevents = count.shape[0] if verbose: print(" Cut applied: ", sel_cut) print(" Events passing cuts: ", nevents) print("\n") #avoid loop over variables, read all together #we have already zero padded startTime = time.time() b = rnp.root2array(folder + file_name, selection=sel_cut, object_selection=obj_sel_cut, treename=tree_name, branches=cols, start=0, stop=nevents_tot) df = pd.DataFrame(b) #,columns=cols) #Remove dots from column names column_names = [] for a in cols: if isinstance(a, tuple): column_names.append(a[0].replace('.', '_').replace('s[', '_').replace( ']', '')) else: column_names.append(a.replace('.', '_')) df.columns = column_names print(df) #add is_signal flag df["is_signal"] = np.ones(nevents) if ( ("n3n2" in folder) or ("H2ToSSTobbbb" in folder) or ("TChiHH" in folder)) else np.zeros(nevents) df["c_nEvents"] = np.ones(nevents) * nevents_gen df["EventWeight"] = df["EventWeight"] * tree_weight df["SampleWeight"] = np.ones(nevents) * tree_weight #print(df) print("\n") print(" * * * * * * * * * * * * * * * * * * * * * * *") print(" Time needed root2array: %.2f seconds" % (time.time() - startTime)) print(" * * * * * * * * * * * * * * * * * * * * * * *") print("\n") #df.rename(columns={"nJets" : "nCHSJets"},inplace=True) if verbose: print(df) #shuffle df.sample(frac=1).reset_index(drop=True) print(" ------------------- ") print(" Events : ", df.shape[0]) # Write h5 if ".root" in file_name: file_name = file_name[:-5] df.to_hdf(output_folder + '/' + file_name + '.h5', 'df', format='table' if (len(cols) <= 2000) else 'fixed') print(" " + output_folder + "/" + file_name + ".h5 stored") print(" ------------------- ")
base = os.path.basename(fname) match = fname_regex.match(base) if not match: raise ValueError("Could not match the regex to the file %s" % fname) flavor = match.group('flavor') full_category = match.group('category') category = [i for i in sv_categories if i in full_category][0] if flavor != args.signal and flavor != args.bkg: log.info( 'flavour %s is not considered signal or background in this training and is omitted' % flavor) continue nfiles_per_sample = None tree = rootnp.root2array(fname, 'tree', variables, None, 0, nfiles_per_sample, args.pickEvery, False, 'weight') tree = rootnp.rec2array(tree) X = np.concatenate((X, tree), 0) if flavor == args.signal: y = np.concatenate((y, np.ones(tree.shape[0]))) else: y = np.concatenate((y, np.zeros(tree.shape[0]))) # Getting the weights out ## if args.sample.lower() == 'qcd': ## weights_tree = rootnp.root2array(fname,'tree','total_weight',None,0,nfiles_per_sample,args.pickEvery,False,'total_weight') ## weights = np.concatenate((weights,weights_tree),0) ## else: ## weights = np.concatenate((weights,np.ones(tree.shape[0])))
return plt.show() # In[49]: ## Visualize weights: Heat Map of neural network weights dnn_weight_map( pipe_classifiers["DNN"].named_steps['kerasclassifier']) # In[50]: # Load dataset rec_np_data = root2array("combined/run2016Data.root", "event_mvaVariables_step7_cate4", features) np_data = rec2array(rec_np_data) # convert to numpy ndarray into pandas dataframe df_raw_data = pd.DataFrame(data=np_data, columns=features) df_raw_data.describe() df_raw_data.info() X_data = df_raw_data.values # In[51]: # Plot a mva distribution
def test_single_chain(): f = load(['single1.root', 'single2.root']) a = rnp.root2array(f) check_single(a, 200)
start = 1000000 stop = 2000000 storage_output ="/mnt/storage/lborgna/BkgMatched/Final/" out_file = "BkgAll6_HighPt_Test" +'.root' fnew = ROOT.TFile(storage_output+out_file,"recreate") Tree = ROOT.TTree("FlatSubstructureJetTree", "Reconst ntuple") test = ROOT.TFile.Open(bkg_storage + bkg_file) old_tree = test.Get("FlatSubstructureJetTree") Wpt = rtnp.root2array(sig_storage + sig_file, treename = treename, selection = selection, branches = fjet_pt) QCD_pt = rtnp.root2array(bkg_storage + bkg_file, treename = treename, selection = selectionQCD, branches = fjet_pt) print(Wpt) print(QCD_pt) Nbins = 100 n, bins, patches = plt.hist(QCD_pt, Nbins, normed=False, facecolor='green', alpha=0.5) nn, bbins, ppatches = plt.hist(Wpt, bins, normed=False, facecolor='red', alpha=0.5) ratio = nn/(n+ 0.00000000001) A = np.max(ratio) ratio = (1/A) * ratio cluster_E_entry = ROOT.vector('float')() cluster_eta_entry = ROOT.vector('float')()
# calibrated energy and other features that indicate the "hardness" # of the interaction. from __future__ import print_function from ROOT import * from root_numpy import root2array from sklearn.externals import joblib from sklearn.svm import SVR import matplotlib.pyplot as plt import os # retrieve data, scaled down by factor of 20 scaledown = 20 Xhad = root2array('../grid_output_stride5_offset0.root', branches=['calehad', 'cvnchargedpion'], selection='mustopz<1275', step=scaledown) Xmu = root2array('../grid_output_stride5_offset0.root', branches='recotrklenact', selection='mustopz<1275', step=scaledown).reshape(-1, 1) ynu = root2array('../grid_output_stride5_offset0.root', branches='trueenu', selection='mustopz<1275', step=scaledown) svr_mu = joblib.load('../muon/models/muon_energy_estimator_active.pkl') recoemu = svr_mu.predict(Xmu) yhad = ynu - recoemu hfit = TH2F('hfit', '', 100, 0, 2, 100, 0, 5) for i in range(len(Xhad)):
input8="../../../hdfs/user/cc14398/Cs137-10s-18Mar/PS8m.root" input9="../../../hdfs/user/cc14398/Cs137-10s-18Mar/PS9m.root" input10="../../../hdfs/user/cc14398/Cs137-10s-18Mar/PS1m10.root" input1="../../..//hdfs/user/cc14398/Cs137-10s-28Mar/PS550cm1.root" input2="../../..//hdfs/user/cc14398/Cs137-10s-28Mar/PS550cm2.root" input3="../../..//hdfs/user/cc14398/Cs137-10s-28Mar/PS550cm3.root" input4="../../..//hdfs/user/cc14398/Cs137-10s-28Mar/PS550cm4.root" input5="../../..//hdfs/user/cc14398/Cs137-10s-28Mar/PS550cm5.root" input6="../../..//hdfs/user/cc14398/Cs137-10s-28Mar/PS550cm6.root" input7="../../..//hdfs/user/cc14398/Cs137-10s-28Mar/PS550cm7.root" input8="../../..//hdfs/user/cc14398/Cs137-10s-28Mar/PS550cm8.root" input9="../../..//hdfs/user/cc14398/Cs137-10s-28Mar/PS550cm9.root" input10="../../..//hdfs/user/cc14398/Cs137-10s-28Mar/PS550cm10.root" """ KE1 = root2array(input1, treename="PhaseSpace", branches="Ekine") KE2 = root2array(input2, treename="PhaseSpace", branches="Ekine") KE3 = root2array(input3, treename="PhaseSpace", branches="Ekine") KE4 = root2array(input4, treename="PhaseSpace", branches="Ekine") KE5 = root2array(input5, treename="PhaseSpace", branches="Ekine") KE6 = root2array(input6, treename="PhaseSpace", branches="Ekine") """ KE7=root2array(input7,treename="PhaseSpace",branches="Ekine") KE8=root2array(input8,treename="PhaseSpace",branches="Ekine") KE9=root2array(input9,treename="PhaseSpace",branches="Ekine") KE10=root2array(input10,treename="PhaseSpace",branches="Ekine") """ print(KE1.size) print(KE2.size) print(KE3.size)
def test_ntuple(): f = load('ntuple.root') a = rnp.root2array(f) assert_equal(len(a), 10) assert_equal(len(a.dtype.names), 3)
def test_single_filename_not_exist(): f = load('does_not_exist.root') a = rnp.root2array(f)
# -*- coding: utf-8 -*- import root_numpy import ROOT import numpy as np import pandas as pd from shutil import copyfile ROOT.gROOT.SetBatch(True) # Load data B mass branches print('*** Loading Data ***') data_loc = '/home/s1305440/PPE_disk/project_stuff/data/data_Qcut.root' a_data = root_numpy.root2array( data_loc, treename='DecayTree', branches=['scaledmass', 'mppp', 'mjprp', 'mjpk']) # Load RapidSim B mass branches Bu2Jpsipipipi_loc = '/home/s1305440/PPE_disk/project_stuff/RapidSim/validation/Bu2Jpsipipipi_tree.root' a_Bu2Jpsipipipi = root_numpy.root2array(Bu2Jpsipipipi_loc, treename='DecayTree', branches=['Bp_0_M_pip_12Kp', 'Bp_0_M']) Bu2JpsipipiK_loc = '/home/s1305440/PPE_disk/project_stuff/RapidSim/validation/Bu2JpsipipiK_tree.root' a_Bu2JpsipipiK = root_numpy.root2array(Bu2JpsipipiK_loc, treename='DecayTree', branches=['Bp_0_M_Kp_02pip', 'Bp_0_M']) B02psi2skpi_loc = '/home/s1305440/PPE_disk/project_stuff/RapidSim/validation/B02psi2skpi_tree.root' a_B02psi2skpi = root_numpy.root2array(B02psi2skpi_loc, treename='DecayTree', branches=['m_pim_1_drop', 'B0_0_M']) Bs2psi2Sphi_loc = '/home/s1305440/PPE_disk/project_stuff/RapidSim/validation/Bs2psi2Sphi_tree.root' a_Bs2psi2Sphi = root_numpy.root2array(Bs2psi2Sphi_loc, treename='DecayTree',
'ntuple_ecal_hits_1.8e8EOT_9.root', 'ntuple_ecal_hits_1.8e8EOT_10.root', 'ntuple_ecal_hits_1.8e8EOT_11.root', 'ntuple_ecal_hits_1.8e8EOT_12.root', 'ntuple_ecal_hits_1.8e8EOT_13.root', 'ntuple_ecal_hits_1.8e8EOT_14.root', 'ntuple_hcal_hits_1.8e8EOT_0.root', 'ntuple_hcal_hits_1.8e8EOT_1.root', 'ntuple_hcal_hits_1.8e8EOT_2.root', 'ntuple_hcal_hits_1.8e8EOT_3.root', 'ntuple_hcal_hits_1.8e8EOT_4.root', 'hcalHits_signal_mA1MeV.root', 'hcalHits_signal_mA5MeV.root', 'hcalHits_signal_mA10MeV.root', 'hcalHits_signal_mA50MeV.root', 'hcalHits_signal_mA100MeV.root', 'hcalHits_signal_mA500MeV.root', 'hcalHits_signal_mA1000MeV.root' ] target_name_tab = [ 'background_0.npy', 'background_1.npy', 'background_2.npy', 'background_3.npy', 'background_4.npy', 'background_5.npy', 'background_6.npy', 'background_7.npy', 'background_8.npy', 'background_9.npy', 'background_10.npy', 'background_11.npy', 'background_12.npy', 'background_13.npy', 'background_14.npy', 'hcal_background_0.npy', 'hcal_background_1.npy', 'hcal_background_2.npy', 'hcal_background_3.npy', 'hcal_background_4.npy', 'hcal_signal_m_1.npy', 'hcal_signal_m_5.npy', 'hcal_signal_m_10.npy', 'hcal_signal_m_50.npy', 'hcal_signal_m_100.npy', 'hcal_signal_m_500.npy', 'hcal_signal_m_1000.npy' ] file_placement = 'data/' i = 0 for file in fname_tab: array = root2array(file) np.save(file_placement + target_name_tab[i], array) i += 1 #Method which does not require the use of root_numpy library? (does not exist for windowns and not on SLAC server)
def test_double_tree_name_not_specified(): f = load('trees.root') a = rnp.root2array(f)
# Event data frame edfs = {} mdfs = {} sample_name = str(sys.argv[1]) sample_file = str(sys.argv[2]) for name, file_ in [(sample_name, sample_file)]: INPUT_FILE = file_ # # Vertex wise Trees # vertex_df = pd.DataFrame(rn.root2array(INPUT_FILE, treename='VertexTree')) angle_df = pd.DataFrame(rn.root2array(INPUT_FILE, treename='AngleAnalysis')) shape_df = pd.DataFrame(rn.root2array(INPUT_FILE, treename='ShapeAnalysis')) gap_df = pd.DataFrame(rn.root2array(INPUT_FILE, treename="GapAnalysis")) match_df = pd.DataFrame(rn.root2array(INPUT_FILE, treename="MatchAnalysis")) dqds_df = pd.DataFrame(rn.root2array(INPUT_FILE, treename="dQdSAnalysis")) # # Combine DataFrames # comb_df = pd.concat([ vertex_df.set_index(rserv), angle_df.set_index(rserv),
def test_no_filename(): rnp.root2array([])