def __init__(self, name, xmin, ymin, xmax, ymax, N, normalize=False): self.xmin = xmin self.ymin = ymin self.xmax = xmax self.ymax = ymax self.N = N self.name = name self.normalize = normalize # Xsec*BR*Acc self.nevents = 0 # count the number of events custom_objects = {'PreprocessLayer': PreprocessLayer} self.model = Restore(os.path.join( '/home/users/f/b/fbury/MoMEMtaNeuralNet/model/', name + '_HToZA.zip'), custom_objects=custom_objects).model #self.model = HyperModel(name,'HToZA') # Make directory output # self.path_output = os.path.join( '/home/ucl/cp3/fbury/MoMEMtaNeuralNet/Plotting/PDF', self.name) if not os.path.exists(self.path_output): os.makedirs(self.path_output) # Get the normalization # if self.normalize: self._importGraphs() # Make the grid # self._make_grid() # Prepare output # self.Z = np.zeros( self.N) # N has chnaged because of unphysical phase space point
def HyperRestore(self,inputs,verbose=0,generator=False): """ Retrieve a zip containing the best model, parameters, x and y data, ... and restores it Produces an output from the input numpy array Reference : /home/ucl/cp3/fbury/.local/lib/python3.6/site-packages/talos/commands/restore.py """ logging.info(('Using model %s.zip '%(self.name).center(80,'-'))) # Restore model # loaded = False while not loaded: try: a = Restore(os.path.join(parameters.main_path,'model',self.name+'.zip'),custom_objects=self.custom_objects) loaded = True except Exception as e: logging.warning('Could not load model due to "%s", will try again in 3s'%e) time.sleep(3) has_LBN = any([l.__class__.__name__ == 'LBNLayer' for l in a.model.layers]) if has_LBN: inputsLL = inputs[[param.replace('$','') for param in parameters.inputs]].astype(np.float32).values inputsLBN = inputs[parameters.LBN_inputs].astype(np.float32).values.reshape(-1,4,len(parameters.LBN_inputs)//4) outputs = a.model.predict((np.hsplit(inputsLL,inputsLL.shape[1]),inputsLBN),batch_size=parameters.output_batch_size,verbose=verbose) else: inputsLL = inputs[[param.replace('$','') for param in parameters.inputs]].astype(np.float32).values outputs = a.model.predict(np.hsplit(inputsLL,inputsLL.shape[1]),batch_size=parameters.output_batch_size,verbose=verbose) # outputs = a.model.predict_generator(output_generator, # workers=parameters.workers, # max_queue_size=2*parameters.workers, # use_multiprocessing=True, # verbose=1) return outputs
def neural_predictor(test_x, experiment_name=None): #Example call: predict(test_x, '../talos_training/03_25_2020_13_21_00.zip') #Fetches most recent model deployment: if experiment_name == None: list_of_files = glob.glob('../talos_training/*.zip') print(list_of_files) pathname = max(list_of_files, key=os.path.getctime) print('Restoring talos-model ' + pathname + '..') t = Restore(pathname) else: t = Restore(experiment_name) print('Restoring talos-model ' + experiment_name + '..') results = t.model.predict(test_x) return inverse_transform_df(results), t.model
def predict(test_x, experiment_name=None): #Example call: predict(test_x, 'talos_models/03_25_2020_13_21_00.zip') if experiment_name == None: list_of_files = glob.glob( 'talos_models/*') # * means all if need specific format then *.csv experiment_name = max(list_of_files, key=os.path.getctime) restore_model = Restore(experiment_name + '.zip') results = restore_model.model.predict(test_x) return np.array([inverse_transform(scaler, result) for result in results])
def HyperRestore(self, inputs, verbose=0, generator=False, generator_filepath=None): """ Retrieve a zip containing the best model, parameters, x and y data, ... and restores it Produces an output from the input numpy array Reference : /home/ucl/cp3/fbury/.local/lib/python3.6/site-packages/talos/commands/restore.py """ logging.info( (' Starting restoration of sample %s with model %s_%s.zip ' % (self.sample, self.name, self.sample)).center(80, '-')) # Load the preprocessing layer # # Restore model # loaded = False while not loaded: try: a = Restore(os.path.join( parameters.main_path, 'model', self.name + '_' + self.sample + '.zip'), custom_objects=self.custom_objects) loaded = True except Exception as e: logging.warning( 'Could not load model due to "%s", will try again in 3s' % e) time.sleep(3) # Output of the model # if not generator: outputs = a.model.predict(inputs, batch_size=parameters.output_batch_size, verbose=verbose) else: if generator_filepath is None: logging.error( "Generator output must be provided with a filepath") sys.exit(1) output_generator = DataGenerator( path=generator_filepath, inputs=parameters.inputs, outputs=parameters.outputs, batch_size=parameters.output_batch_size, state_set='output') outputs = a.model.predict_generator(output_generator, workers=parameters.workers, use_multiprocessing=True, verbose=1) return outputs
def load_model(self,path_model): self.model = Restore(path_model, custom_objects={'PreprocessLayer': PreprocessLayer}).model self.model_name = os.path.basename(path_model).replace(".zip","")
class MassPlane: def __init__(self,x_bins,x_min,x_max,y_bins,y_min,y_max,plot_DY=False,plot_TT=False,plot_ZA=False,profile=False): self.x_bins = x_bins self.x_min = x_min self.x_max = x_max self.y_bins = y_bins self.y_min = y_min self.y_max = y_max self.model = None self.plot_DY = plot_DY self.plot_TT = plot_TT self.plot_ZA = plot_ZA self.profile = profile self.graph_list = [] # Produce grid # self.produce_grid() def produce_grid(self): self.X,self.Y = np.meshgrid(np.linspace(self.x_min,self.x_max,self.x_bins),np.linspace(self.y_min,self.y_max,self.y_bins)) bool_upper = np.greater_equal(self.Y,self.X) self.X = self.X[bool_upper] self.Y = self.Y[bool_upper] self.x = self.X.reshape(-1,1) self.y = self.Y.reshape(-1,1) # X, Y are 2D arrays, x,y are vectors of points def load_model(self,path_model): self.model = Restore(path_model, custom_objects={'PreprocessLayer': PreprocessLayer}).model self.model_name = os.path.basename(path_model).replace(".zip","") def plotMassPoint(self,mH,mA): print ("Producing plot for MH = %.2f GeV, MA = %.2f"%(mH,mA)) N = self.x.shape[0] params = np.c_[np.ones(N)*mA,np.ones(N)*mH] inputs = np.c_[self.x,self.y,params] output = self.model.predict(inputs) g_DY = ROOT.TGraph2D(N) g_DY.SetNpx(500) g_DY.SetNpy(500) g_TT = ROOT.TGraph2D(N) g_TT.SetNpx(500) g_TT.SetNpy(500) g_ZA = ROOT.TGraph2D(N) g_ZA.SetNpx(500) g_ZA.SetNpy(500) g_DY.SetName(("MassPlane_DY_mH_%s_mA_%s"%(mH,mA)).replace('.','p')) g_TT.SetName(("MassPlane_TT_mH_%s_mA_%s"%(mH,mA)).replace('.','p')) g_ZA.SetName(("MassPlane_ZA_mH_%s_mA_%s"%(mH,mA)).replace('.','p')) for i in range(N): if self.plot_DY: g_DY.SetPoint(i,self.x[i],self.y[i],output[i,0]) if self.plot_TT: g_TT.SetPoint(i,self.x[i],self.y[i],output[i,1]) if self.plot_ZA: g_ZA.SetPoint(i,self.x[i],self.y[i],output[i,2]) if self.plot_DY: self.graph_list.append(g_DY) g_DY.GetHistogram().SetTitle("P(DY) for mass point M_{H} = %.2f GeV, M_{A} = %.2f GeV"%(mH,mA)) g_DY.GetHistogram().GetXaxis().SetTitle("M_{jj} [GeV]") g_DY.GetHistogram().GetYaxis().SetTitle("M_{lljj} [GeV]") g_DY.GetHistogram().GetZaxis().SetTitle("DNN output") g_DY.GetHistogram().GetZaxis().SetRangeUser(0.,1.) g_DY.GetHistogram().SetContour(100) g_DY.GetXaxis().SetTitleOffset(1.2) g_DY.GetYaxis().SetTitleOffset(1.2) g_DY.GetZaxis().SetTitleOffset(1.2) g_DY.GetXaxis().SetTitleSize(0.045) g_DY.GetYaxis().SetTitleSize(0.045) g_DY.GetZaxis().SetTitleSize(0.045) if self.plot_TT: g_TT.GetHistogram().SetTitle("P(t#bar{t}) for mass point M_{H} = %.2f GeV, M_{A} = %.2f GeV"%(mH,mA)) g_TT.GetHistogram().GetXaxis().SetTitle("M_{jj} [GeV]") g_TT.GetHistogram().GetYaxis().SetTitle("M_{lljj} [GeV]") g_TT.GetHistogram().GetZaxis().SetTitle("DNN output") g_TT.GetHistogram().GetZaxis().SetRangeUser(0.,1.) g_TT.GetHistogram().SetContour(100) g_TT.GetXaxis().SetTitleOffset(1.2) g_TT.GetYaxis().SetTitleOffset(1.2) g_TT.GetZaxis().SetTitleOffset(1.2) g_TT.GetXaxis().SetTitleSize(0.045) g_TT.GetYaxis().SetTitleSize(0.045) g_TT.GetZaxis().SetTitleSize(0.045) self.graph_list.append(g_TT) if self.plot_ZA: g_ZA.GetHistogram().SetTitle("P(H#rightarrowZA) for mass point M_{H} = %.2f GeV, M_{A} = %.2f GeV"%(mH,mA)) g_ZA.GetHistogram().GetXaxis().SetTitle("M_{jj} [GeV]") g_ZA.GetHistogram().GetYaxis().SetTitle("M_{lljj} [GeV]") g_ZA.GetHistogram().GetZaxis().SetTitle("DNN output") g_ZA.GetHistogram().GetZaxis().SetRangeUser(0.,1.) g_ZA.GetHistogram().SetContour(100) g_ZA.GetXaxis().SetTitleOffset(1.2) g_ZA.GetYaxis().SetTitleOffset(1.2) g_ZA.GetZaxis().SetTitleOffset(1.2) g_ZA.GetXaxis().SetTitleSize(0.045) g_ZA.GetYaxis().SetTitleSize(0.045) g_ZA.GetZaxis().SetTitleSize(0.045) self.graph_list.append(g_ZA) @staticmethod def getProfiles(g): h = g.GetHistogram() xproj = h.ProjectionX() yproj = h.ProjectionY() array = hist2array(h) # Need to compensate the triangular binning nonzeroXbins = h.GetNbinsY()/np.count_nonzero(array,axis=1) nonzeroYbins = h.GetNbinsX()/np.count_nonzero(array,axis=0) for x in range(1,h.GetNbinsX()): xproj.SetBinContent(x,xproj.GetBinContent(x)*nonzeroXbins[x-1]) for y in range(1,h.GetNbinsY()): yproj.SetBinContent(y,yproj.GetBinContent(y)*nonzeroYbins[y-1]) xproj.GetYaxis().SetTitle("DNN output") yproj.GetYaxis().SetTitle("DNN output") return xproj, yproj def plotOnCanvas(self): setTDRStyle() pdf_path = "MassPlane/"+self.model_name+".pdf" root_path = pdf_path.replace('.pdf','.root') outFile = ROOT.TFile(root_path,"RECREATE") C = ROOT.TCanvas("C","C",800,600) #C.SetLogz() C.Print(pdf_path+"[") for g in self.graph_list: print ("Plotting %s"%g.GetName()) g.Draw("colz") g_copy = g.Clone() contours = np.array([0.90,0.95,0.99]) g_copy.GetHistogram().SetContour(contours.shape[0],contours) g_copy.Draw("cont2 same") g.Write() C.Print(pdf_path,"Title:"+g.GetName()) if self.profile: xproj,yproj = self.getProfiles(g) xproj.Draw("hist") xproj.Write() C.Print(pdf_path,"Title:"+g.GetName()+" X profile") yproj.Draw("hist") C.Print(pdf_path,"Title:"+g.GetName()+" Y profile") yproj.Write() C.Print(pdf_path+"]") outFile.Close() print ("Root file saved as %s"%root_path) def makePavement(self,contours): for contour in contours: print ("Producing pavement for cut %.2f"%contour) pdf_path = "MassPlane/"+self.model_name+("_pave%0.2f"%contour).replace('.','p')+".pdf" root_path = pdf_path.replace('.pdf','.root') outFile = ROOT.TFile(root_path,"RECREATE") C = ROOT.TCanvas("C","C",800,600) opt = 'cont2' new_graph_list = [g.Clone() for g in self.graph_list] # Need them saved in a list otherwise they are deleted and canvas is blank for g in new_graph_list: print ('Adding to contour plot',g.GetName()) g.SetTitle("Pavement for cut %0.2f"%contour) g.GetHistogram().SetContour(1,np.array([contour])) g.Draw(opt) if 'same' not in opt : opt += " same" g.Write() C.Print(pdf_path) outFile.Close()
def HyperScan(self,data,list_inputs,list_outputs,task,model_idx=None,generator=False,resume=False): """ Performs the scan for hyperparameters If task is specified, will load a pickle dict splitted from the whole set of parameters Data is a pandas dataframe containing all the event informations (inputs, outputs and unused variables) The column to be selected are given in list_inputs, list_outputs as lists of strings Reference : /home/ucl/cp3/fbury/.local/lib/python3.6/site-packages/talos/scan/Scan.py """ logging.info(' Starting scan '.center(80,'-')) # Printing # logging.info('Number of features : %d'%len(list_inputs)) for name in list_inputs: logging.info('..... %s'%name) logging.info('Number of outputs : %d'%len(list_outputs)) for name in list_outputs: logging.info('..... %s'%name) # Records # if not generator: self.x = data[list_inputs].values self.y = data[list_outputs+['learning_weights']].values # Data splitting # if model_idx is None: size = parameters.training_ratio/(parameters.training_ratio+parameters.evaluation_ratio) self.x_train, self.x_val, self.y_train, self.y_val = train_test_split(self.x,self.y,train_size=size) else: # Cross validation : take the training and evaluation set based on the mask # model_idx == index of mask on which model will be applied (aka, not trained nor evaluated) _, eval_idx, train_idx = GenerateSliceIndices(model_idx) #, GenerateSliceMask eval_mask = GenerateSliceMask(eval_idx,data['mask']) train_mask = GenerateSliceMask(train_idx,data['mask']) self.x_val = self.x[eval_mask] self.y_val = self.y[eval_mask] self.x_train = self.x[train_mask] self.y_train = self.y[train_mask] logging.info("Training set : %d"%self.x_train.shape[0]) logging.info("Evaluation set : %d"%self.x_val.shape[0]) else: # Needs to use dummy inputs to launch talos scan but in Model the generator will be used dummyX = np.ones((1,len(list_inputs))) dummyY = np.ones((1,len(list_outputs)+1)) # emulates output + weights self.x_train = dummyX self.y_train = dummyY self.x_val = dummyX self.y_val = dummyY # Talos hyperscan parameters # self.task = task if self.task != '': # if task is specified load it otherwise get it from parameters.py with open(os.path.join(parameters.main_path,'split',self.name,self.task), 'rb') as f: self.p = pickle.load(f) else: # We need the full dict self.p = parameters.p # If resume, puts it as argument ot be passed to function # # Also, needs to change the dictionary parameters for the one in the imported model # if resume: logging.info("Will resume training of model %s"%parameters.resume_model) # Get model and extract epoch range # a = Restore(parameters.resume_model,custom_objects=self.custom_objects) initial_epoch = a.params['epochs'][0] supp_epochs = self.p['epochs'][0] # Will update the param dict, so must keep that in memory batch_size_save = self.p['batch_size'] # Might want to change batch_size in retraining # Update params dict with the one from the trained model # self.p = a.params self.p['resume'] = [parameters.resume_model] self.p['initial_epoch'] = [initial_epoch] # Save initial epoch to be passed to Model self.p['epochs'][0] = initial_epoch+supp_epochs # Initial = last epoch of already trained model (is a list) self.p['batch_size'] = batch_size_save logging.warning("Since you asked to resume training of model %s, the parameters dictionary has been set to the one used to train the model"%parameters.resume_model) logging.info("Will train the model from epoch %d to %d"%(self.p['initial_epoch'][0],self.p['epochs'][0])) # Check if no already exists then change it -> avoids rewriting # # This is only valid in worker mode, not driver # no = 1 if self.task == '': # If done on frontend name = self.name while os.path.exists(os.path.join(parameters.path_model,self.name+'_'+str(no)+'.csv')): no +=1 if model_idx is not None: name += '_crossval%d'%model_idx self.name_model = name+'_'+str(no) else: # If job on cluster name = self.name if model_idx is not None: name += '_crossval%d'%model_idx self.name_model = name+'_'+self.task.replace('.pkl','') # Define scan object # self.h = Scan(x=self.x_train, # Training inputs y=self.y_train, # Training targets params=self.p, # Parameters dict dataset_name=self.name, # Name of experiment experiment_no=str(no), # Number of experiment model=getattr(Model,parameters.model),# Get the model in Model.py specified by parameters.py val_split=0.1, # How much data is to be used for val_loss reduction_metric='val_loss', # How to select best model #grid_downsample=0.1, # When used in serial mode #random_method='lhs', --- #reduction_method='spear', --- #reduction_window=1000, --- #reduction_interval=100, --- #last_epoch_value=True, --- print_params=True, # To print param at each job repetition=parameters.repetition, # Wether a set of parameters is to be trained several times path_model = parameters.path_model, # Where to save the model custom_objects=self.custom_objects, # Custom object : custom layer ) if not generator: # Use the save information in DF # self.h_with_eval = Autom8(scan_object = self.h, # the scan object x_val = self.x_val, # Evaluation inputs y_val = self.y_val[:,:-1],# Evaluatio targets (last column is weight) n = -1, # How many model to evaluate (n=-1 means all) folds = 5, # Cross-validation splits for nominal and errors metric = 'val_loss', # On what metric to sort asc = True, # Ascending because loss function shuffle = True, # Shuffle bfore evaluation average = 'micro') # Not useful here self.h_with_eval.data.to_csv(self.name_model+'.csv') # save to csv including error self.autom8 = True else: # Needs to use the generator evaluation # error_arr = np.zeros(self.h.data.shape[0]) for i in range(self.h.data.shape[0]): logging.info("Evaluating model %d"%i) # Load model # model_eval = model_from_json(self.h.saved_models[i],custom_objects=self.custom_objects) model_eval.set_weights(self.h.saved_weights[i]) model_eval.compile(optimizer=Adam(),loss={'OUT':parameters.p['loss_function']},metrics=['accuracy']) # Evaluate model # evaluation_generator = DataGenerator(path = parameters.path_gen_evaluation, inputs = parameters.inputs, outputs = parameters.outputs, batch_size = parameters.p['batch_size'][0], state_set = 'evaluation') eval_metric = model_eval.evaluate_generator(generator = evaluation_generator, workers = parameters.workers, use_multiprocessing = True) # Save errors # error_arr[i] = eval_metric[0] logging.info('Error is %f'%error_arr[i]) # Save evaluation error to csv # self.h.data['eval_mean'] = error_arr self.h.data.to_csv(self.name_model+'.csv') # save to csv including error self.autom8 = True # returns the experiment configuration details logging.info('='*80) logging.debug('Details') logging.debug(self.h.details)
class LikelihoodMap(): def __init__(self, name, xmin, ymin, xmax, ymax, N, normalize=False): self.xmin = xmin self.ymin = ymin self.xmax = xmax self.ymax = ymax self.N = N self.name = name self.normalize = normalize # Xsec*BR*Acc self.nevents = 0 # count the number of events custom_objects = {'PreprocessLayer': PreprocessLayer} self.model = Restore(os.path.join( '/home/users/f/b/fbury/MoMEMtaNeuralNet/model/', name + '_HToZA.zip'), custom_objects=custom_objects).model #self.model = HyperModel(name,'HToZA') # Make directory output # self.path_output = os.path.join( '/home/ucl/cp3/fbury/MoMEMtaNeuralNet/Plotting/PDF', self.name) if not os.path.exists(self.path_output): os.makedirs(self.path_output) # Get the normalization # if self.normalize: self._importGraphs() # Make the grid # self._make_grid() # Prepare output # self.Z = np.zeros( self.N) # N has chnaged because of unphysical phase space point ################################################ # L(x_i|alpha) = \prod_i 1/sigma_vis W(x_i|alpha) # -ln (L(x_i|alpha)) = \sum_i [ -ln(W(x_i|alpha)) + ln(sigma_vis) ] # = \sum_i [ output_DNN ] + n*ln(sigma_vis) def _make_grid(self): x = np.linspace(self.xmin, self.xmax, self.N) y = np.linspace(self.ymin, self.ymax, self.N) X, Y = np.meshgrid(x, y) mA = X.flatten() mH = Y.flatten() mh = 125 mZ = 90 # Unphysical phase-space points # condition1 = np.greater(mH, mA) condition2 = np.greater(mH, mh) condition3 = np.greater(np.subtract(mH, mA), mZ) #upper = np.logical_and(condition1,condition2) upper = np.logical_and(np.logical_and(condition1, condition2), condition3) # Ensure that mH > mh # Ensure that mH > mA # Ensure that mH-mA > mZ self.mA = mA[upper] self.mH = mH[upper] self.N = self.mA.shape[0] # Normalisation with xsec visible # self.norm = np.ones(self.N) if self.normalize: logging.info('Normalization enabled') for i in range(0, self.N): self.norm[i] *= self.acc.Interpolate(self.mA[i], self.mH[i]) #self.norm[i] *= self.xsec.Interpolate(self.mA[i],self.mH[i]) self.norm[i] *= self.BR_HtoZA.Interpolate( self.mA[i], self.mH[i]) #self.norm[i] *= self.BR_Atobb.Interpolate(self.mA[i],self.mH[i]) self.norm[i] *= 3.3658 * 2 / 100 # Z-> e+e- + Z-> mu+mu- self.norm[i] *= 1e-12 # Xsec in pb # Get log(normalization) # self.norm = np.log10( self.norm ) # if normalize == False => norm =1 => log(norm)=0 (thus not used) # phase space points non physical(xsec=0) -> will produce -inf def AddEvent(self, event): # Get the -log(weight) # inputs = np.c_[np.tile(event, (self.N, 1)), self.mH, self.mA] #outputs = self.model.HyperRestore(inputs) outputs = self.model.predict(inputs, batch_size=512) self.Z += outputs.reshape(-1, ) self.nevents += 1 def MakeGraph(self, title, suffix): self.legend_title = title.replace('.root', '').replace('-', '_') self.suffix = suffix if title.find('DY') != -1: title = 'Drell-Yan events' elif title.find('TT') != -1: title = r't\bar{t} events' else: mH_value = re.findall(r'\d+', title)[2] mA_value = re.findall(r'\d+', title)[3] title = 'Signal events with M_{H} = %s GeV and M_{A} = %s GeV' % ( mH_value, mA_value) # Normalize # if self.normalize: self.Z += self.nevents * self.norm title += ' [Normalized]' self.legend_title += '_norm' # Divide by total entries # self.Z /= self.nevents # Divide by N self.Z *= 2 # Because -2 log L # check for invalids (nan of inf) might be coming from log10 # invalid_entries = np.logical_or(np.isinf(self.Z), np.isnan(self.Z)) max_Z = np.amax(self.Z[np.invert(invalid_entries)]) min_Z = np.amin(self.Z[np.invert(invalid_entries)]) self.Z[invalid_entries] = 0 # removes non physical points # Generate graph # graph = TGraph2D(self.N) print('Generating TGraph2D') manager = enlighten.get_manager() pbar = manager.counter(total=self.N, desc='Progress', unit='Point') for i in range(self.N): graph.SetPoint(i, self.mA[i], self.mH[i], self.Z[i]) pbar.update() manager.stop() #graph = copy.deepcopy(TGraph2D(self.N,self.mA,self.mH,self.Z)) graph.SetTitle( 'Log-Likelihood : %s;M_{A} [GeV]; M_{H} [GeV]; -2log L' % (title)) graph.SetMaximum(max_Z) graph.SetMinimum(min_Z) graph.SetNpx(1000) graph.SetNpy(1000) # Save graph # self.graph = graph self._saveGraph() def _saveGraph(self): full_name = self.path_output + '/likelihood_' + self.suffix + '.root' if os.path.exists(full_name): root_file = TFile(full_name, "update") self.graph.Write(self.legend_title, TObject.kOverwrite) logging.info("New Graph saved in %s" % full_name) else: root_file = TFile(full_name, "recreate") self.graph.Write(self.legend_title) logging.info("Graph replaced in %s" % full_name) def _importGraphs(self): # import the TGraphs 2D # path_graphs = '/home/users/f/b/fbury/MoMEMtaNeuralNet/Plotting/' file_xsec = TFile.Open(os.path.join(path_graphs, 'XsecMap_full.root')) file_acc = TFile.Open( os.path.join(path_graphs, 'AcceptanceMap_full.root')) try: # Deepcopy necessary to avoid seg fault self.xsec = copy.deepcopy(file_xsec.Get('Xsec')) self.BR_HtoZA = copy.deepcopy(file_xsec.Get('BR_HtoZA')) self.BR_Atobb = copy.deepcopy(file_xsec.Get('BR_Atobb')) self.BR_Ztoll = copy.deepcopy(file_xsec.Get('BR_Ztoll')) self.acc = copy.deepcopy(file_acc.Get('Acceptance')) except: self.normalize = False logging.warning('Could not load the Objects -> normalization off') if not isinstance(self.xsec, ROOT.TGraph2D): self.normalize = False logging.warning( 'Xsec is %s and not TGraph2D -> normalization off' % type(self.xsec)) if not isinstance(self.BR_HtoZA, ROOT.TGraph2D): self.normalize = False logging.warning( 'BR_HtoZA is %s and not TGraph2D -> normalization off' % type(self.BR_HtoZA)) if not isinstance(self.BR_Atobb, ROOT.TGraph2D): self.normalize = False logging.warning( 'BR_Atobb is %s and not TGraph2D -> normalization off' % type(self.BR_Atobb)) if not isinstance(self.BR_Ztoll, ROOT.TGraph2D): self.normalize = False logging.warning( 'BR_Ztoll is %s and not TGraph2D -> normalization off' % type(self.BR_Ztoll)) if not isinstance(self.acc, ROOT.TGraph2D): self.normalize = False logging.warning( 'Acceptance is %s and not TGraph2D -> normalization off' % type(self.acc))
def HyperScan(self, data, list_inputs, list_outputs, task, generator=False, generator_weights=False, resume=False): """ Performs the scan for hyperparameters If task is specified, will load a pickle dict splitted from the whole set of parameters Data is a pandas dataframe containing all the event informations (inputs, outputs and unused variables) The column to be selected are given in list_inputs, list_outputs as lists of strings Reference : /home/ucl/cp3/fbury/.local/lib/python3.6/site-packages/talos/scan/Scan.py """ logging.info(' Starting scan '.center(80, '-')) # Printing # logging.info('Number of features : %d' % len(list_inputs)) for name in list_inputs: logging.info('..... %s' % name) logging.info('Number of outputs : %d' % len(list_outputs)) for name in list_outputs: logging.info('..... %s' % name) # Records # if not generator: self.x = data[list_inputs].values self.y = data[list_outputs + ['learning_weights']].values # Data splitting # size = parameters.training_ratio / (parameters.training_ratio + parameters.validation_ratio) self.x_train, self.x_val, self.y_train, self.y_val = train_test_split( self.x, self.y, train_size=size) logging.info("Training set : %d" % self.x_train.shape[0]) logging.info("Evaluation set : %d" % self.x_val.shape[0]) else: dummyX = np.ones((1, len(list_inputs))) dummyY = np.ones( (1, len(list_outputs) + 1)) # emulates output + weights self.x_train = dummyX self.y_train = dummyY self.x_val = dummyX self.y_val = dummyY # Talos hyperscan parameters # self.task = task if self.task != '': # if task is specified load it otherwise get it from parameters.py with open( os.path.join(parameters.main_path, 'split', self.name, self.task), 'rb') as f: self.p = pickle.load(f) else: # We need the full dict self.p = parameters.p # If resume, puts it as argument ot be passed to function # # Also, needs to change the dictionary parameters for the one in the imported model # if resume: logging.info("Will resume training of model %s" % parameters.resume_model) # Get model and extract epoch range # a = Restore(parameters.resume_model, custom_objects=self.custom_objects) initial_epoch = a.params['epochs'][0] supp_epochs = self.p['epochs'][ 0] # Will update the param dict, so must keep that in memory batch_size_save = self.p[ 'batch_size'] # Might want to change batch_size in retraining # Update params dict with the one from the trained model # self.p = a.params self.p['resume'] = [parameters.resume_model] self.p['initial_epoch'] = [ initial_epoch ] # Save initial epoch to be passed to Model self.p['epochs'][ 0] = initial_epoch + supp_epochs # Initial = last epoch of already trained model (is a list) self.p['batch_size'] = batch_size_save logging.warning( "Since you asked to resume training of model %s, the parameters dictionary has been set to the one used to train the model" % parameters.resume_model) logging.info("Will train the model from epoch %d to %d" % (self.p['initial_epoch'][0], self.p['epochs'][0])) # Specify that weights should be used by generator # if generator_weights: self.p['generator_weights'] = [ True ] # Add to dictionary to be passed to Model # Check if no already exists then change it -> avoids rewriting # no = 1 if self.task == '': # If done on frontend self.name = self.name + '_' + self.sample self.path_model = os.path.join(parameters.main_path, 'model', self.name) while os.path.exists( os.path.join(parameters.path_model, self.name + '_' + str(no) + '.csv')): no += 1 self.name_model = self.name + '_' + str(no) else: # If job on cluster self.name_model = self.name + '_' + self.sample + self.task.replace( '.pkl', '') # Define scan object # #parallel_gpu_jobs(0.5) self.h = Scan( x=self.x_train, y=self.y_train, params=self.p, dataset_name=self.name, experiment_no=str(no), model=getattr(Model, parameters.model), val_split=0.1, reduction_metric='val_loss', #grid_downsample=0.1, #random_method='lhs', #reduction_method='spear', #reduction_window=1000, #reduction_interval=100, #last_epoch_value=True, print_params=True, repetition=parameters.repetition, path_model=parameters.path_model, custom_objects=self.custom_objects, ) if not generator: self.h_with_eval = Autom8( scan_object=self.h, x_val=self.x_val, y_val=self.y_val[:, :-1], # last column is weight n=-1, folds=10, metric='val_loss', asc=True, shuffle=True, average=None) self.h_with_eval.data.to_csv(self.name_model + '.csv') # save to csv including error self.autom8 = True else: error_arr = np.zeros(self.h.data.shape[0]) for i in range(self.h.data.shape[0]): logging.info("Evaluating model %d" % i) model_eval = model_from_json( self.h.saved_models[i], custom_objects=self.custom_objects) model_eval.set_weights(self.h.saved_weights[i]) #model_eval.compile(optimizer=Adam(),loss={'OUT':parameters.p['loss_function']},metrics=['accuracy']) model_eval.compile(optimizer=Adam(), loss={'OUT': mean_squared_error}, metrics=['accuracy']) evaluation_generator = DataGenerator( path=parameters.path_gen_evaluation, inputs=parameters.inputs, outputs=parameters.outputs, batch_size=parameters.p['batch_size'][0], state_set='evaluation') eval_metric = model_eval.evaluate_generator( generator=evaluation_generator, workers=parameters.workers, use_multiprocessing=True) error_arr[i] = eval_metric[0] logging.info('Error is %f' % error_arr[i]) self.h.data['eval_mean'] = error_arr self.h.data.to_csv(self.name_model + '.csv') # save to csv including error self.autom8 = True # returns the experiment configuration details logging.info('=' * 80) logging.debug('Details') logging.debug(self.h.details)
def lstm(self): lstm_cv = Restore(f'{LSTM}/lstm_deploy_3.zip') text_sequence = self.transform(self.textlines) predict_words = lstm_cv.model.predict(text_sequence) evals = self.evaluate(self.textlines, predict_words, model='lstm') return evals
orig_df = pd.read_csv('results.csv') test_df = pd.read_csv('test_data.csv') test_df.columns = ['case number', 'U', 'angle', 'Cd', 'Cl'] test_df.sort_values(by=['angle'], inplace=True) print(test_df) x_val = test_df[['U', 'angle']] y_val = test_df[['Cd', 'Cl']] i = 0 while os.path.exists('./optimized_networks/optimized_airfoil_nn_%s.zip' % i): i += 1 i -= 1 net = Restore('./optimized_networks/optimized_airfoil_nn_%s.zip' % i) pred = net.model.predict(x_val) print("Cd predictions: ") print(pred[:, 0]) print("Cl predictions: ") print(pred[:, 1]) test_df['pred_Cd'] = rescale(pred[:, 1], orig_df['Cd'].min(), orig_df['Cd'].max(), reverse=True) test_df['pred_Cl'] = rescale(pred[:, 0], orig_df['Cl'].min(), orig_df['Cl'].max(), reverse=True)
def NeuralNetGeneratorModel(x_train, y_train, x_val, y_val, params): """ Keras model for the Neural Network, used to scan the hyperparameter space by Talos Uses the generator rather than the input data (which are dummies) """ # Design network # with open( os.path.join(parameters.main_path, 'scaler_' + parameters.suffix + '.pkl'), 'rb') as handle: # Import scaler that was created before scaler = pickle.load(handle) IN = Input(shape=(x_train.shape[1], ), name='IN') L0 = PreprocessLayer(batch_size=params['batch_size'], mean=scaler.mean_, std=scaler.scale_, name='Preprocess')(IN) L1 = Dense(params['first_neuron'], activation=params['activation'], kernel_regularizer=l2(params['l2']))(L0) HIDDEN = hidden_layers(params, 1, batch_normalization=True).API(L1) OUT = Dense(1, activation=params['output_activation'], name='OUT')(HIDDEN) #preprocess = Model(inputs=[IN],outputs=[L0]) #utils.print_summary(model=preprocess) # Tensorboard logs # path_board = os.path.join(parameters.main_path, "TensorBoard") suffix = 0 while (os.path.exists(os.path.join(path_board, "Run_" + str(suffix)))): suffix += 1 path_board = os.path.join(path_board, "Run_" + str(suffix)) os.makedirs(path_board) logging.info("TensorBoard log dir is at %s" % path_board) # Callbacks # early_stopping = EarlyStopping(monitor='val_loss', min_delta=0., patience=50, verbose=1, mode='min') reduceLR = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=20, verbose=1, mode='min', cooldown=10, min_lr=1e-5) loss_history = LossHistory() board = TensorBoard(log_dir=path_board, histogram_freq=1, batch_size=params['batch_size'], write_graph=True, write_grads=True, write_images=True) #embeddings_freq=0, #embeddings_layer_names=None, #embeddings_metadata=None, #embeddings_data=None, #update_freq='epoch') Callback_list = [loss_history, early_stopping, reduceLR, board] #Callback_list = [loss_history,reduceLR,board] # Check if generator weights has been asked # weights_generator = parameters.weights_generator if 'generator_weights' in params and params[ 'generator_weights'] else '' # Compile # if 'resume' not in params: # Define model # model = Model(inputs=[IN], outputs=[OUT]) utils.print_summary(model=model) #used to print model # Compile it # model.compile(optimizer=Adam(lr=params['lr']), loss={'OUT': params['loss_function']}, metrics=['accuracy']) initial_epoch = 0 else: # a model has to be imported and resumes training custom_objects = {'PreprocessLayer': PreprocessLayer} logging.info("Loaded model %s" % params['resume']) a = Restore(params['resume'], custom_objects=custom_objects, method='h5') model = a.model model.compile(optimizer=Adam(lr=params['lr']), loss={'OUT': params['loss_function']}, metrics=['accuracy']) utils.print_summary(model=model) #used to print model #initial_epoch = a.params['epochs'][0] initial_epoch = params['initial_epoch'] # Generator # training_generator = DataGenerator(path=parameters.path_gen_training, inputs=parameters.inputs, outputs=parameters.outputs, batch_size=params['batch_size'], state_set='training', weights_generator=weights_generator) validation_generator = DataGenerator(path=parameters.path_gen_validation, inputs=parameters.inputs, outputs=parameters.outputs, batch_size=params['batch_size'], state_set='validation') #weights_generator = weights_generator) # Might be unnecessary # Fit # logging.info("Will use %d workers" % parameters.workers) logging.warning("Keras location " + keras.__file__) logging.warning("Tensorflow location " + tf.__file__) logging.warning("GPU ") logging.warning(K.tensorflow_backend._get_available_gpus()) history = model.fit_generator( generator=training_generator, validation_data=validation_generator, epochs=params['epochs'], verbose=1, max_queue_size=parameters.workers * 2, callbacks=Callback_list, initial_epoch=initial_epoch, workers=parameters.workers, shuffle=True, #steps_per_epoch = 20, use_multiprocessing=True) #test_generator = DataGenerator(path = parameters.path_gen_output, # inputs = parameters.inputs, # outputs = parameters.outputs, # batch_size = params['batch_size'], #out_preprocess = preprocess.predict_generator(test_generator, # workers=10, # steps=10, # use_multiprocessing=False, # verbose=1) #print ("Mean preprocessing") #print (np.mean(out_preprocess)) #print (np.std(out_preprocess)) # #out_all = model.predict_generator(test_generator, # workers=10, # steps=10, # use_multiprocessing=False, # verbose=1) # #print (out_all) #print ("Mean output") #print (np.mean(out_all)) #print (np.std(out_all)) # Plot history # PlotHistory(loss_history) return history, model