def readFromRootFile(self,filename,TupleMeanStd, weighter): # this function defines how to convert the root ntuple to the training format # options are not yet described here import numpy as np import ROOT fileTimeOut(filename,120) #give eos a minute to recover rfile = ROOT.TFile(filename) tree = rfile.Get("tree") self.nsamples=tree.GetEntries() # user code, example works with the example 2D images in root format generated by make_example_data from DeepJetCore.preprocessing import read2DArray print(filename) feature_array = read2DArray(filename,"tree","image2d",self.nsamples,32,32) reg_truth = read2DArray(filename,"tree","sigfrac2d",self.nsamples,32,32) bgfrac = 1 - reg_truth truth = np.concatenate([reg_truth, bgfrac, feature_array], axis=-1) #notremoves=weighter.createNotRemoveIndices(Tuple) # this removes parts of the dataset for weighting the events #feature_array = feature_array[notremoves > 0] # call this in the end self.nsamples=len(feature_array) self.x=[feature_array] # list of feature numpy arrays self.y=[truth] # list of target numpy arrays (truth) self.w=[] # list of weight arrays. One for each truth target, not used
def convertFromSourceFile(self, filename, weighterobjects, istraining): # This is the only really mandatory function (unless writeFromSourceFile is defined). # It defines the conversion rule from an input source file to the lists of training # arrays self.x, self.y, self.w # self.x is a list of input feature arrays # self.y is a list of truth arrays # self.w is optional and can contain a weight array # (needs to have same number of entries as truth array) # If no weights are needed, this can be left completely empty # # The conversion should convert finally to numpy arrays. In the future, # also tensorflow tensors will be supported. # # In this example, differnt ways of reading files are deliberatly mixed # print('reading ' + filename) import ROOT fileTimeOut(filename, 120) #give eos a minute to recover rfile = ROOT.TFile(filename) tree = rfile.Get("tree") nsamples = tree.GetEntries() # user code, example works with the example 2D images in root format generated by make_example_data from DeepJetCore.preprocessing import read2DArray feature_array = read2DArray(filename, "tree", "image2d", nsamples, 32, 32) print('feature_array', feature_array.shape) import uproot urfile = uproot.open(filename)["tree"] truth = np.concatenate([ np.expand_dims(urfile.array("isA"), axis=1), np.expand_dims(urfile.array("isB"), axis=1), np.expand_dims(urfile.array("isC"), axis=1) ], axis=1) truth = truth.astype(dtype='float32', order='C') #important, float32 and C-type! self.nsamples = len(feature_array) #returns a list of feature arrays, a list of truth arrays and a list of weight arrays return [feature_array], [truth], []
def readFromRootFile(self,filename,TupleMeanStd, weighter): # this function defines how to convert the root ntuple to the training format # options are not yet described here import numpy as np import ROOT fileTimeOut(filename,120) #give eos a minute to recover rfile = ROOT.TFile(filename) tree = rfile.Get("tree") self.nsamples=tree.GetEntries() # user code, example works with the example 2D images in root format generated by make_example_data from DeepJetCore.preprocessing import read2DArray,readListArray print(filename) feature_image = read2DArray(filename,"tree","image2d",self.nsamples,24,24) npy_array = self.readTreeFromRootToTuple(filename) scale = np.expand_dims(npy_array['scale'],axis=1) xcenter = np.expand_dims(npy_array['xcenter'],axis=1) ycenter = np.expand_dims(npy_array['ycenter'],axis=1) ptype = np.expand_dims(npy_array['type'],axis=1) print('ycenter',ycenter.shape) add_features = np.concatenate([scale,xcenter,ycenter,ptype],axis=1) xcoords = numpy.expand_dims( numpy.array(list(npy_array['xcoords']),dtype='float32'), axis=2) ycoords = numpy.expand_dims( numpy.array(list(npy_array['ycoords']),dtype='float32'), axis=2) xcoords = numpy.reshape(xcoords, newshape=[xcoords.shape[0],24,24,1]) ycoords = numpy.reshape(ycoords, newshape=[xcoords.shape[0],24,24,1]) print('xcoords',xcoords.shape) all_coords = numpy.concatenate([xcoords,ycoords],axis=-1) #readListArray(filename,"tree","frac_at_idxs",self.nsamples,4,1) alltruth = numpy.zeros(self.nsamples)+1. #this is real data self.x = [feature_image,all_coords,add_features] self.y = [alltruth] self.w=[]