Ejemplo n.º 1
0
 def __init__(self, name, xmin, ymin, xmax, ymax, N, normalize=False):
     self.xmin = xmin
     self.ymin = ymin
     self.xmax = xmax
     self.ymax = ymax
     self.N = N
     self.name = name
     self.normalize = normalize  # Xsec*BR*Acc
     self.nevents = 0  # count the number of events
     custom_objects = {'PreprocessLayer': PreprocessLayer}
     self.model = Restore(os.path.join(
         '/home/users/f/b/fbury/MoMEMtaNeuralNet/model/',
         name + '_HToZA.zip'),
                          custom_objects=custom_objects).model
     #self.model = HyperModel(name,'HToZA')
     # Make directory output #
     self.path_output = os.path.join(
         '/home/ucl/cp3/fbury/MoMEMtaNeuralNet/Plotting/PDF', self.name)
     if not os.path.exists(self.path_output):
         os.makedirs(self.path_output)
     # Get the normalization #
     if self.normalize:
         self._importGraphs()
     # Make the grid #
     self._make_grid()
     # Prepare output #
     self.Z = np.zeros(
         self.N)  # N has chnaged because of unphysical phase space point
Ejemplo n.º 2
0
    def HyperRestore(self,inputs,verbose=0,generator=False):
        """
        Retrieve a zip containing the best model, parameters, x and y data, ... and restores it
        Produces an output from the input numpy array
        Reference :
            /home/ucl/cp3/fbury/.local/lib/python3.6/site-packages/talos/commands/restore.py
        """
        logging.info(('Using model %s.zip '%(self.name).center(80,'-')))
        # Restore model #
        loaded = False
        while not loaded:
            try:
                a = Restore(os.path.join(parameters.main_path,'model',self.name+'.zip'),custom_objects=self.custom_objects)
                loaded = True
            except Exception as e:
                logging.warning('Could not load model due to "%s", will try again in 3s'%e)
                time.sleep(3)
        has_LBN = any([l.__class__.__name__ == 'LBNLayer' for l in a.model.layers])
        if has_LBN:
            inputsLL  = inputs[[param.replace('$','') for param in parameters.inputs]].astype(np.float32).values
            inputsLBN = inputs[parameters.LBN_inputs].astype(np.float32).values.reshape(-1,4,len(parameters.LBN_inputs)//4)
            outputs = a.model.predict((np.hsplit(inputsLL,inputsLL.shape[1]),inputsLBN),batch_size=parameters.output_batch_size,verbose=verbose)
        else:
            inputsLL  = inputs[[param.replace('$','') for param in parameters.inputs]].astype(np.float32).values
            outputs = a.model.predict(np.hsplit(inputsLL,inputsLL.shape[1]),batch_size=parameters.output_batch_size,verbose=verbose)
            
#                outputs = a.model.predict_generator(output_generator,
#                                                    workers=parameters.workers,
#                                                    max_queue_size=2*parameters.workers,
#                                                    use_multiprocessing=True,
#                                                    verbose=1)

        return outputs
def neural_predictor(test_x, experiment_name=None):
    #Example call: predict(test_x, '../talos_training/03_25_2020_13_21_00.zip')

    #Fetches most recent model deployment:
    if experiment_name == None:
        list_of_files = glob.glob('../talos_training/*.zip')
        print(list_of_files)
        pathname = max(list_of_files, key=os.path.getctime)
        print('Restoring talos-model ' + pathname + '..')
        t = Restore(pathname)

    else:
        t = Restore(experiment_name)
        print('Restoring talos-model ' + experiment_name + '..')

    results = t.model.predict(test_x)

    return inverse_transform_df(results), t.model
Ejemplo n.º 4
0
def predict(test_x, experiment_name=None):
    #Example call: predict(test_x, 'talos_models/03_25_2020_13_21_00.zip')
    if experiment_name == None:
        list_of_files = glob.glob(
            'talos_models/*')  # * means all if need specific format then *.csv
        experiment_name = max(list_of_files, key=os.path.getctime)

    restore_model = Restore(experiment_name + '.zip')
    results = restore_model.model.predict(test_x)

    return np.array([inverse_transform(scaler, result) for result in results])
Ejemplo n.º 5
0
    def HyperRestore(self,
                     inputs,
                     verbose=0,
                     generator=False,
                     generator_filepath=None):
        """
        Retrieve a zip containing the best model, parameters, x and y data, ... and restores it
        Produces an output from the input numpy array
        Reference :
            /home/ucl/cp3/fbury/.local/lib/python3.6/site-packages/talos/commands/restore.py
        """
        logging.info(
            (' Starting restoration of sample %s with model %s_%s.zip ' %
             (self.sample, self.name, self.sample)).center(80, '-'))
        # Load the preprocessing layer #
        # Restore model #
        loaded = False
        while not loaded:
            try:
                a = Restore(os.path.join(
                    parameters.main_path, 'model',
                    self.name + '_' + self.sample + '.zip'),
                            custom_objects=self.custom_objects)
                loaded = True
            except Exception as e:
                logging.warning(
                    'Could not load model due to "%s", will try again in 3s' %
                    e)
                time.sleep(3)

        # Output of the model #
        if not generator:
            outputs = a.model.predict(inputs,
                                      batch_size=parameters.output_batch_size,
                                      verbose=verbose)
        else:
            if generator_filepath is None:
                logging.error(
                    "Generator output must be provided with a filepath")
                sys.exit(1)
            output_generator = DataGenerator(
                path=generator_filepath,
                inputs=parameters.inputs,
                outputs=parameters.outputs,
                batch_size=parameters.output_batch_size,
                state_set='output')
            outputs = a.model.predict_generator(output_generator,
                                                workers=parameters.workers,
                                                use_multiprocessing=True,
                                                verbose=1)
        return outputs
Ejemplo n.º 6
0
 def load_model(self,path_model):
     self.model = Restore(path_model, custom_objects={'PreprocessLayer': PreprocessLayer}).model
     self.model_name = os.path.basename(path_model).replace(".zip","")
Ejemplo n.º 7
0
class MassPlane:
    def __init__(self,x_bins,x_min,x_max,y_bins,y_min,y_max,plot_DY=False,plot_TT=False,plot_ZA=False,profile=False):
        self.x_bins     = x_bins
        self.x_min      = x_min
        self.x_max      = x_max
        self.y_bins     = y_bins
        self.y_min      = y_min
        self.y_max      = y_max
        self.model      = None
        self.plot_DY    = plot_DY
        self.plot_TT    = plot_TT
        self.plot_ZA    = plot_ZA
        self.profile    = profile
        self.graph_list = []

        # Produce grid #
        self.produce_grid()


    def produce_grid(self):
        self.X,self.Y = np.meshgrid(np.linspace(self.x_min,self.x_max,self.x_bins),np.linspace(self.y_min,self.y_max,self.y_bins))
        bool_upper = np.greater_equal(self.Y,self.X)
        self.X = self.X[bool_upper]
        self.Y = self.Y[bool_upper]
        self.x = self.X.reshape(-1,1)
        self.y = self.Y.reshape(-1,1)
        # X, Y are 2D arrays, x,y are vectors of points

    def load_model(self,path_model):
        self.model = Restore(path_model, custom_objects={'PreprocessLayer': PreprocessLayer}).model
        self.model_name = os.path.basename(path_model).replace(".zip","")


    def plotMassPoint(self,mH,mA):
        print ("Producing plot for MH = %.2f GeV, MA = %.2f"%(mH,mA))
        N = self.x.shape[0]
        params = np.c_[np.ones(N)*mA,np.ones(N)*mH]
        inputs = np.c_[self.x,self.y,params]
        output = self.model.predict(inputs)

        g_DY = ROOT.TGraph2D(N)
        g_DY.SetNpx(500)
        g_DY.SetNpy(500)
        g_TT = ROOT.TGraph2D(N)
        g_TT.SetNpx(500)
        g_TT.SetNpy(500)
        g_ZA = ROOT.TGraph2D(N)
        g_ZA.SetNpx(500)
        g_ZA.SetNpy(500)

        g_DY.SetName(("MassPlane_DY_mH_%s_mA_%s"%(mH,mA)).replace('.','p'))
        g_TT.SetName(("MassPlane_TT_mH_%s_mA_%s"%(mH,mA)).replace('.','p'))
        g_ZA.SetName(("MassPlane_ZA_mH_%s_mA_%s"%(mH,mA)).replace('.','p'))

        for i in range(N):
            if self.plot_DY:
                g_DY.SetPoint(i,self.x[i],self.y[i],output[i,0])
            if self.plot_TT:
                g_TT.SetPoint(i,self.x[i],self.y[i],output[i,1])
            if self.plot_ZA:
                g_ZA.SetPoint(i,self.x[i],self.y[i],output[i,2])

        if self.plot_DY:
            self.graph_list.append(g_DY)
            g_DY.GetHistogram().SetTitle("P(DY) for mass point M_{H} = %.2f GeV, M_{A} = %.2f GeV"%(mH,mA))
            g_DY.GetHistogram().GetXaxis().SetTitle("M_{jj} [GeV]")
            g_DY.GetHistogram().GetYaxis().SetTitle("M_{lljj} [GeV]")
            g_DY.GetHistogram().GetZaxis().SetTitle("DNN output")
            g_DY.GetHistogram().GetZaxis().SetRangeUser(0.,1.)
            g_DY.GetHistogram().SetContour(100)
            g_DY.GetXaxis().SetTitleOffset(1.2)
            g_DY.GetYaxis().SetTitleOffset(1.2)
            g_DY.GetZaxis().SetTitleOffset(1.2)
            g_DY.GetXaxis().SetTitleSize(0.045)
            g_DY.GetYaxis().SetTitleSize(0.045)
            g_DY.GetZaxis().SetTitleSize(0.045)

        if self.plot_TT:
            g_TT.GetHistogram().SetTitle("P(t#bar{t}) for mass point M_{H} = %.2f GeV, M_{A} = %.2f GeV"%(mH,mA))
            g_TT.GetHistogram().GetXaxis().SetTitle("M_{jj} [GeV]")
            g_TT.GetHistogram().GetYaxis().SetTitle("M_{lljj} [GeV]")
            g_TT.GetHistogram().GetZaxis().SetTitle("DNN output")
            g_TT.GetHistogram().GetZaxis().SetRangeUser(0.,1.)
            g_TT.GetHistogram().SetContour(100)
            g_TT.GetXaxis().SetTitleOffset(1.2)
            g_TT.GetYaxis().SetTitleOffset(1.2)
            g_TT.GetZaxis().SetTitleOffset(1.2)
            g_TT.GetXaxis().SetTitleSize(0.045)
            g_TT.GetYaxis().SetTitleSize(0.045)
            g_TT.GetZaxis().SetTitleSize(0.045)
            self.graph_list.append(g_TT)

        if self.plot_ZA:
            g_ZA.GetHistogram().SetTitle("P(H#rightarrowZA) for mass point M_{H} = %.2f GeV, M_{A} = %.2f GeV"%(mH,mA))
            g_ZA.GetHistogram().GetXaxis().SetTitle("M_{jj} [GeV]")
            g_ZA.GetHistogram().GetYaxis().SetTitle("M_{lljj} [GeV]")
            g_ZA.GetHistogram().GetZaxis().SetTitle("DNN output")
            g_ZA.GetHistogram().GetZaxis().SetRangeUser(0.,1.)
            g_ZA.GetHistogram().SetContour(100)
            g_ZA.GetXaxis().SetTitleOffset(1.2)
            g_ZA.GetYaxis().SetTitleOffset(1.2)
            g_ZA.GetZaxis().SetTitleOffset(1.2)
            g_ZA.GetXaxis().SetTitleSize(0.045)
            g_ZA.GetYaxis().SetTitleSize(0.045)
            g_ZA.GetZaxis().SetTitleSize(0.045)
            self.graph_list.append(g_ZA)

    @staticmethod
    def getProfiles(g):
        h = g.GetHistogram()
        xproj = h.ProjectionX()
        yproj = h.ProjectionY()
        array = hist2array(h)
        # Need to compensate the triangular binning
        nonzeroXbins = h.GetNbinsY()/np.count_nonzero(array,axis=1)
        nonzeroYbins = h.GetNbinsX()/np.count_nonzero(array,axis=0)
        for x in range(1,h.GetNbinsX()):
            xproj.SetBinContent(x,xproj.GetBinContent(x)*nonzeroXbins[x-1])
        for y in range(1,h.GetNbinsY()):
            yproj.SetBinContent(y,yproj.GetBinContent(y)*nonzeroYbins[y-1])
        xproj.GetYaxis().SetTitle("DNN output")
        yproj.GetYaxis().SetTitle("DNN output")

        return xproj, yproj


    def plotOnCanvas(self):
        setTDRStyle()
        pdf_path = "MassPlane/"+self.model_name+".pdf"
        root_path = pdf_path.replace('.pdf','.root')
        outFile = ROOT.TFile(root_path,"RECREATE")
        C = ROOT.TCanvas("C","C",800,600)
        #C.SetLogz()
        C.Print(pdf_path+"[")
        for g in self.graph_list:
            print ("Plotting %s"%g.GetName())
            g.Draw("colz")
            g_copy = g.Clone()
            contours = np.array([0.90,0.95,0.99])
            g_copy.GetHistogram().SetContour(contours.shape[0],contours)
            g_copy.Draw("cont2 same")
            g.Write()
            C.Print(pdf_path,"Title:"+g.GetName())
            if self.profile:
                xproj,yproj = self.getProfiles(g)
                xproj.Draw("hist")
                xproj.Write()
                C.Print(pdf_path,"Title:"+g.GetName()+" X profile")
                yproj.Draw("hist")
                C.Print(pdf_path,"Title:"+g.GetName()+" Y profile")
                yproj.Write()

        C.Print(pdf_path+"]")
        outFile.Close()
        print ("Root file saved as %s"%root_path)

    def makePavement(self,contours):
        for contour in contours:
            print ("Producing pavement for cut %.2f"%contour)
            pdf_path = "MassPlane/"+self.model_name+("_pave%0.2f"%contour).replace('.','p')+".pdf"
            root_path = pdf_path.replace('.pdf','.root')
            outFile = ROOT.TFile(root_path,"RECREATE")
            C = ROOT.TCanvas("C","C",800,600)
            opt = 'cont2'
            new_graph_list = [g.Clone() for g in self.graph_list]
            # Need them saved in a list otherwise they are deleted and canvas is blank
            for g in new_graph_list:
                print ('Adding to contour plot',g.GetName())
                g.SetTitle("Pavement for cut %0.2f"%contour)
                g.GetHistogram().SetContour(1,np.array([contour]))
                g.Draw(opt)
                if 'same' not in opt : opt += " same"
                g.Write()
            
            C.Print(pdf_path)
            outFile.Close()
Ejemplo n.º 8
0
    def HyperScan(self,data,list_inputs,list_outputs,task,model_idx=None,generator=False,resume=False):
        """
        Performs the scan for hyperparameters
        If task is specified, will load a pickle dict splitted from the whole set of parameters
        Data is a pandas dataframe containing all the event informations (inputs, outputs and unused variables)
        The column to be selected are given in list_inputs, list_outputs as lists of strings
        Reference : /home/ucl/cp3/fbury/.local/lib/python3.6/site-packages/talos/scan/Scan.py
        """
        logging.info(' Starting scan '.center(80,'-'))

        # Printing #
        logging.info('Number of features : %d'%len(list_inputs))
        for name in list_inputs:
            logging.info('..... %s'%name)
        logging.info('Number of outputs : %d'%len(list_outputs))
        for name in list_outputs:
            logging.info('..... %s'%name)
            
        # Records #
        if not generator:
            self.x = data[list_inputs].values
            self.y = data[list_outputs+['learning_weights']].values
            # Data splitting #
            if model_idx is None:
                size = parameters.training_ratio/(parameters.training_ratio+parameters.evaluation_ratio)
                self.x_train, self.x_val, self.y_train, self.y_val = train_test_split(self.x,self.y,train_size=size)
            else: # Cross validation : take the training and evaluation set based on the mask
                # model_idx == index of mask on which model will be applied (aka, not trained nor evaluated)
                _, eval_idx, train_idx = GenerateSliceIndices(model_idx) #, GenerateSliceMask
                eval_mask = GenerateSliceMask(eval_idx,data['mask'])
                train_mask = GenerateSliceMask(train_idx,data['mask'])
                self.x_val   = self.x[eval_mask]
                self.y_val   = self.y[eval_mask]
                self.x_train = self.x[train_mask]
                self.y_train = self.y[train_mask]
            logging.info("Training set   : %d"%self.x_train.shape[0])
            logging.info("Evaluation set : %d"%self.x_val.shape[0])
        else:
            # Needs to use dummy inputs to launch talos scan but in Model the generator will be used
            dummyX = np.ones((1,len(list_inputs)))
            dummyY = np.ones((1,len(list_outputs)+1)) # emulates output + weights
            self.x_train = dummyX
            self.y_train = dummyY
            self.x_val = dummyX
            self.y_val = dummyY

        # Talos hyperscan parameters #
        self.task = task
        if self.task != '': # if task is specified load it otherwise get it from parameters.py
            with open(os.path.join(parameters.main_path,'split',self.name,self.task), 'rb') as f:
                self.p = pickle.load(f)
        else: # We need the full dict
            self.p = parameters.p

        # If resume, puts it as argument ot be passed to function #
        # Also, needs to change the dictionary parameters for the one in the imported model #
        if resume:
            logging.info("Will resume training of model %s"%parameters.resume_model)
            # Get model and extract epoch range #
            a = Restore(parameters.resume_model,custom_objects=self.custom_objects)
            initial_epoch = a.params['epochs'][0]
            supp_epochs = self.p['epochs'][0] # Will update the param dict, so must keep that in memory
            batch_size_save = self.p['batch_size'] # Might want to change batch_size in retraining
            # Update params dict with the one from the trained model #
            self.p = a.params
            self.p['resume'] = [parameters.resume_model]
            self.p['initial_epoch'] = [initial_epoch]  # Save initial epoch to be passed to Model
            self.p['epochs'][0] = initial_epoch+supp_epochs # Initial = last epoch of already trained model (is a list)
            self.p['batch_size'] = batch_size_save
            logging.warning("Since you asked to resume training of model %s, the parameters dictionary has been set to the one used to train the model"%parameters.resume_model)
            logging.info("Will train the model from epoch %d to %d"%(self.p['initial_epoch'][0],self.p['epochs'][0]))

        # Check if no already exists then change it -> avoids rewriting  #
        # This is only valid in worker mode, not driver #
        no = 1
        if self.task == '': # If done on frontend
            name = self.name
            while os.path.exists(os.path.join(parameters.path_model,self.name+'_'+str(no)+'.csv')):
                no +=1
            if model_idx is not None:
                name += '_crossval%d'%model_idx
            self.name_model = name+'_'+str(no)
        else:               # If job on cluster
            name = self.name
            if model_idx is not None:
                name += '_crossval%d'%model_idx
            self.name_model = name+'_'+self.task.replace('.pkl','')

        # Define scan object #
        self.h = Scan(x=self.x_train,                       # Training inputs 
                      y=self.y_train,                       # Training targets
                      params=self.p,                        # Parameters dict
                      dataset_name=self.name,               # Name of experiment
                      experiment_no=str(no),                # Number of experiment
                      model=getattr(Model,parameters.model),# Get the model in Model.py specified by parameters.py
                      val_split=0.1,                        # How much data is to be used for val_loss
                      reduction_metric='val_loss',          # How to select best model
                      #grid_downsample=0.1,                 # When used in serial mode
                      #random_method='lhs',                     ---
                      #reduction_method='spear',                ---
                      #reduction_window=1000,                   ---
                      #reduction_interval=100,                  ---
                      #last_epoch_value=True,                   ---
                      print_params=True,                    # To print param at each job
                      repetition=parameters.repetition,     # Wether a set of parameters is to be trained several times
                      path_model = parameters.path_model,   # Where to save the model
                      custom_objects=self.custom_objects,   # Custom object : custom layer
                )
        if not generator:
            # Use the save information in DF #
            self.h_with_eval = Autom8(scan_object = self.h,     # the scan object
                                      x_val = self.x_val,       # Evaluation inputs
                                      y_val = self.y_val[:,:-1],# Evaluatio targets (last column is weight)
                                      n = -1,                   # How many model to evaluate (n=-1 means all)
                                      folds = 5,                # Cross-validation splits for nominal and errors
                                      metric = 'val_loss',      # On what metric to sort
                                      asc = True,               # Ascending because loss function
                                      shuffle = True,           # Shuffle bfore evaluation
                                      average = 'micro')        # Not useful here
            self.h_with_eval.data.to_csv(self.name_model+'.csv') # save to csv including error
            self.autom8 = True
        else:
            # Needs to use the generator evaluation #
            error_arr = np.zeros(self.h.data.shape[0])
            for i in range(self.h.data.shape[0]):
                logging.info("Evaluating model %d"%i)
                # Load model #
                model_eval = model_from_json(self.h.saved_models[i],custom_objects=self.custom_objects)   
                model_eval.set_weights(self.h.saved_weights[i])
                model_eval.compile(optimizer=Adam(),loss={'OUT':parameters.p['loss_function']},metrics=['accuracy'])
                # Evaluate model #
                evaluation_generator = DataGenerator(path = parameters.path_gen_evaluation,
                                                     inputs = parameters.inputs,
                                                     outputs = parameters.outputs,
                                                     batch_size = parameters.p['batch_size'][0],
                                                     state_set = 'evaluation')

                eval_metric = model_eval.evaluate_generator(generator             = evaluation_generator,
                                                            workers               = parameters.workers,
                                                            use_multiprocessing   = True)
                # Save errors #
                error_arr[i] = eval_metric[0]
                logging.info('Error is %f'%error_arr[i])

            # Save evaluation error to csv #
            self.h.data['eval_mean'] = error_arr
            self.h.data.to_csv(self.name_model+'.csv') # save to csv including error
            self.autom8 = True
            
        # returns the experiment configuration details
        logging.info('='*80)
        logging.debug('Details')
        logging.debug(self.h.details)
Ejemplo n.º 9
0
class LikelihoodMap():
    def __init__(self, name, xmin, ymin, xmax, ymax, N, normalize=False):
        self.xmin = xmin
        self.ymin = ymin
        self.xmax = xmax
        self.ymax = ymax
        self.N = N
        self.name = name
        self.normalize = normalize  # Xsec*BR*Acc
        self.nevents = 0  # count the number of events
        custom_objects = {'PreprocessLayer': PreprocessLayer}
        self.model = Restore(os.path.join(
            '/home/users/f/b/fbury/MoMEMtaNeuralNet/model/',
            name + '_HToZA.zip'),
                             custom_objects=custom_objects).model
        #self.model = HyperModel(name,'HToZA')
        # Make directory output #
        self.path_output = os.path.join(
            '/home/ucl/cp3/fbury/MoMEMtaNeuralNet/Plotting/PDF', self.name)
        if not os.path.exists(self.path_output):
            os.makedirs(self.path_output)
        # Get the normalization #
        if self.normalize:
            self._importGraphs()
        # Make the grid #
        self._make_grid()
        # Prepare output #
        self.Z = np.zeros(
            self.N)  # N has chnaged because of unphysical phase space point

        ################################################
        # L(x_i|alpha)       = \prod_i 1/sigma_vis W(x_i|alpha)
        # -ln (L(x_i|alpha)) = \sum_i [ -ln(W(x_i|alpha)) + ln(sigma_vis) ]
        #                    = \sum_i [ output_DNN ] + n*ln(sigma_vis)

    def _make_grid(self):
        x = np.linspace(self.xmin, self.xmax, self.N)
        y = np.linspace(self.ymin, self.ymax, self.N)
        X, Y = np.meshgrid(x, y)
        mA = X.flatten()
        mH = Y.flatten()
        mh = 125
        mZ = 90
        # Unphysical phase-space points #
        condition1 = np.greater(mH, mA)
        condition2 = np.greater(mH, mh)
        condition3 = np.greater(np.subtract(mH, mA), mZ)
        #upper = np.logical_and(condition1,condition2)
        upper = np.logical_and(np.logical_and(condition1, condition2),
                               condition3)
        # Ensure that mH > mh
        # Ensure that mH > mA
        # Ensure that mH-mA > mZ
        self.mA = mA[upper]
        self.mH = mH[upper]
        self.N = self.mA.shape[0]

        # Normalisation with xsec visible #
        self.norm = np.ones(self.N)
        if self.normalize:
            logging.info('Normalization enabled')
            for i in range(0, self.N):
                self.norm[i] *= self.acc.Interpolate(self.mA[i], self.mH[i])
                #self.norm[i] *= self.xsec.Interpolate(self.mA[i],self.mH[i])
                self.norm[i] *= self.BR_HtoZA.Interpolate(
                    self.mA[i], self.mH[i])
                #self.norm[i] *= self.BR_Atobb.Interpolate(self.mA[i],self.mH[i])
                self.norm[i] *= 3.3658 * 2 / 100  # Z-> e+e- + Z-> mu+mu-
                self.norm[i] *= 1e-12  # Xsec in pb
        # Get log(normalization) #
        self.norm = np.log10(
            self.norm
        )  # if normalize == False => norm =1 => log(norm)=0 (thus not used)
        # phase space points non physical(xsec=0) -> will produce -inf

    def AddEvent(self, event):
        # Get the -log(weight) #
        inputs = np.c_[np.tile(event, (self.N, 1)), self.mH, self.mA]
        #outputs = self.model.HyperRestore(inputs)
        outputs = self.model.predict(inputs, batch_size=512)
        self.Z += outputs.reshape(-1, )
        self.nevents += 1

    def MakeGraph(self, title, suffix):
        self.legend_title = title.replace('.root', '').replace('-', '_')
        self.suffix = suffix
        if title.find('DY') != -1:
            title = 'Drell-Yan events'
        elif title.find('TT') != -1:
            title = r't\bar{t} events'
        else:
            mH_value = re.findall(r'\d+', title)[2]
            mA_value = re.findall(r'\d+', title)[3]
            title = 'Signal events with M_{H} = %s GeV and M_{A} = %s GeV' % (
                mH_value, mA_value)

        # Normalize #
        if self.normalize:
            self.Z += self.nevents * self.norm
            title += ' [Normalized]'
            self.legend_title += '_norm'

        # Divide by total entries #
        self.Z /= self.nevents  # Divide by N
        self.Z *= 2  # Because -2 log L

        # check for invalids (nan of inf) might be coming from log10 #
        invalid_entries = np.logical_or(np.isinf(self.Z), np.isnan(self.Z))
        max_Z = np.amax(self.Z[np.invert(invalid_entries)])
        min_Z = np.amin(self.Z[np.invert(invalid_entries)])
        self.Z[invalid_entries] = 0  # removes non physical points

        # Generate graph #
        graph = TGraph2D(self.N)
        print('Generating TGraph2D')
        manager = enlighten.get_manager()
        pbar = manager.counter(total=self.N, desc='Progress', unit='Point')
        for i in range(self.N):
            graph.SetPoint(i, self.mA[i], self.mH[i], self.Z[i])
            pbar.update()
        manager.stop()

        #graph = copy.deepcopy(TGraph2D(self.N,self.mA,self.mH,self.Z))
        graph.SetTitle(
            'Log-Likelihood : %s;M_{A} [GeV]; M_{H} [GeV]; -2log L' % (title))
        graph.SetMaximum(max_Z)
        graph.SetMinimum(min_Z)
        graph.SetNpx(1000)
        graph.SetNpy(1000)

        # Save graph #
        self.graph = graph
        self._saveGraph()

    def _saveGraph(self):
        full_name = self.path_output + '/likelihood_' + self.suffix + '.root'
        if os.path.exists(full_name):
            root_file = TFile(full_name, "update")
            self.graph.Write(self.legend_title, TObject.kOverwrite)
            logging.info("New Graph saved in %s" % full_name)
        else:
            root_file = TFile(full_name, "recreate")
            self.graph.Write(self.legend_title)
            logging.info("Graph replaced in %s" % full_name)

    def _importGraphs(self):
        # import the TGraphs 2D #
        path_graphs = '/home/users/f/b/fbury/MoMEMtaNeuralNet/Plotting/'
        file_xsec = TFile.Open(os.path.join(path_graphs, 'XsecMap_full.root'))
        file_acc = TFile.Open(
            os.path.join(path_graphs, 'AcceptanceMap_full.root'))

        try:  # Deepcopy necessary to avoid seg fault
            self.xsec = copy.deepcopy(file_xsec.Get('Xsec'))
            self.BR_HtoZA = copy.deepcopy(file_xsec.Get('BR_HtoZA'))
            self.BR_Atobb = copy.deepcopy(file_xsec.Get('BR_Atobb'))
            self.BR_Ztoll = copy.deepcopy(file_xsec.Get('BR_Ztoll'))
            self.acc = copy.deepcopy(file_acc.Get('Acceptance'))
        except:
            self.normalize = False
            logging.warning('Could not load the Objects -> normalization off')

        if not isinstance(self.xsec, ROOT.TGraph2D):
            self.normalize = False
            logging.warning(
                'Xsec is %s and not TGraph2D -> normalization off' %
                type(self.xsec))
        if not isinstance(self.BR_HtoZA, ROOT.TGraph2D):
            self.normalize = False
            logging.warning(
                'BR_HtoZA is %s and not TGraph2D -> normalization off' %
                type(self.BR_HtoZA))
        if not isinstance(self.BR_Atobb, ROOT.TGraph2D):
            self.normalize = False
            logging.warning(
                'BR_Atobb is %s and not TGraph2D -> normalization off' %
                type(self.BR_Atobb))
        if not isinstance(self.BR_Ztoll, ROOT.TGraph2D):
            self.normalize = False
            logging.warning(
                'BR_Ztoll is %s and not TGraph2D -> normalization off' %
                type(self.BR_Ztoll))
        if not isinstance(self.acc, ROOT.TGraph2D):
            self.normalize = False
            logging.warning(
                'Acceptance is %s and not TGraph2D -> normalization off' %
                type(self.acc))
Ejemplo n.º 10
0
    def HyperScan(self,
                  data,
                  list_inputs,
                  list_outputs,
                  task,
                  generator=False,
                  generator_weights=False,
                  resume=False):
        """
        Performs the scan for hyperparameters
        If task is specified, will load a pickle dict splitted from the whole set of parameters
        Data is a pandas dataframe containing all the event informations (inputs, outputs and unused variables)
        The column to be selected are given in list_inputs, list_outputs as lists of strings
        Reference : /home/ucl/cp3/fbury/.local/lib/python3.6/site-packages/talos/scan/Scan.py
        """
        logging.info(' Starting scan '.center(80, '-'))

        # Printing #
        logging.info('Number of features : %d' % len(list_inputs))
        for name in list_inputs:
            logging.info('..... %s' % name)
        logging.info('Number of outputs : %d' % len(list_outputs))
        for name in list_outputs:
            logging.info('..... %s' % name)

        # Records #
        if not generator:
            self.x = data[list_inputs].values
            self.y = data[list_outputs + ['learning_weights']].values
            # Data splitting #
            size = parameters.training_ratio / (parameters.training_ratio +
                                                parameters.validation_ratio)
            self.x_train, self.x_val, self.y_train, self.y_val = train_test_split(
                self.x, self.y, train_size=size)
            logging.info("Training set   : %d" % self.x_train.shape[0])
            logging.info("Evaluation set : %d" % self.x_val.shape[0])
        else:
            dummyX = np.ones((1, len(list_inputs)))
            dummyY = np.ones(
                (1, len(list_outputs) + 1))  # emulates output + weights
            self.x_train = dummyX
            self.y_train = dummyY
            self.x_val = dummyX
            self.y_val = dummyY

        # Talos hyperscan parameters #
        self.task = task
        if self.task != '':  # if task is specified load it otherwise get it from parameters.py
            with open(
                    os.path.join(parameters.main_path, 'split', self.name,
                                 self.task), 'rb') as f:
                self.p = pickle.load(f)
        else:  # We need the full dict
            self.p = parameters.p

        # If resume, puts it as argument ot be passed to function #
        # Also, needs to change the dictionary parameters for the one in the imported model #
        if resume:
            logging.info("Will resume training of model %s" %
                         parameters.resume_model)
            # Get model and extract epoch range #
            a = Restore(parameters.resume_model,
                        custom_objects=self.custom_objects)
            initial_epoch = a.params['epochs'][0]
            supp_epochs = self.p['epochs'][
                0]  # Will update the param dict, so must keep that in memory
            batch_size_save = self.p[
                'batch_size']  # Might want to change batch_size in retraining
            # Update params dict with the one from the trained model #
            self.p = a.params
            self.p['resume'] = [parameters.resume_model]
            self.p['initial_epoch'] = [
                initial_epoch
            ]  # Save initial epoch to be passed to Model
            self.p['epochs'][
                0] = initial_epoch + supp_epochs  # Initial = last epoch of already trained model (is a list)
            self.p['batch_size'] = batch_size_save
            logging.warning(
                "Since you asked to resume training of model %s, the parameters dictionary has been set to the one used to train the model"
                % parameters.resume_model)
            logging.info("Will train the model from epoch %d to %d" %
                         (self.p['initial_epoch'][0], self.p['epochs'][0]))

        # Specify that weights should be used by generator #
        if generator_weights:
            self.p['generator_weights'] = [
                True
            ]  # Add to dictionary to be passed to Model

        # Check if no already exists then change it -> avoids rewriting  #
        no = 1
        if self.task == '':  # If done on frontend
            self.name = self.name + '_' + self.sample
            self.path_model = os.path.join(parameters.main_path, 'model',
                                           self.name)
            while os.path.exists(
                    os.path.join(parameters.path_model,
                                 self.name + '_' + str(no) + '.csv')):
                no += 1
            self.name_model = self.name + '_' + str(no)
        else:  # If job on cluster
            self.name_model = self.name + '_' + self.sample + self.task.replace(
                '.pkl', '')

        # Define scan object #
        #parallel_gpu_jobs(0.5)
        self.h = Scan(
            x=self.x_train,
            y=self.y_train,
            params=self.p,
            dataset_name=self.name,
            experiment_no=str(no),
            model=getattr(Model, parameters.model),
            val_split=0.1,
            reduction_metric='val_loss',
            #grid_downsample=0.1,
            #random_method='lhs',
            #reduction_method='spear',
            #reduction_window=1000,
            #reduction_interval=100,
            #last_epoch_value=True,
            print_params=True,
            repetition=parameters.repetition,
            path_model=parameters.path_model,
            custom_objects=self.custom_objects,
        )
        if not generator:
            self.h_with_eval = Autom8(
                scan_object=self.h,
                x_val=self.x_val,
                y_val=self.y_val[:, :-1],  # last column is weight
                n=-1,
                folds=10,
                metric='val_loss',
                asc=True,
                shuffle=True,
                average=None)
            self.h_with_eval.data.to_csv(self.name_model +
                                         '.csv')  # save to csv including error
            self.autom8 = True
        else:
            error_arr = np.zeros(self.h.data.shape[0])
            for i in range(self.h.data.shape[0]):
                logging.info("Evaluating model %d" % i)
                model_eval = model_from_json(
                    self.h.saved_models[i], custom_objects=self.custom_objects)
                model_eval.set_weights(self.h.saved_weights[i])
                #model_eval.compile(optimizer=Adam(),loss={'OUT':parameters.p['loss_function']},metrics=['accuracy'])
                model_eval.compile(optimizer=Adam(),
                                   loss={'OUT': mean_squared_error},
                                   metrics=['accuracy'])
                evaluation_generator = DataGenerator(
                    path=parameters.path_gen_evaluation,
                    inputs=parameters.inputs,
                    outputs=parameters.outputs,
                    batch_size=parameters.p['batch_size'][0],
                    state_set='evaluation')

                eval_metric = model_eval.evaluate_generator(
                    generator=evaluation_generator,
                    workers=parameters.workers,
                    use_multiprocessing=True)
                error_arr[i] = eval_metric[0]
                logging.info('Error is %f' % error_arr[i])
            self.h.data['eval_mean'] = error_arr
            self.h.data.to_csv(self.name_model +
                               '.csv')  # save to csv including error
            self.autom8 = True

        # returns the experiment configuration details
        logging.info('=' * 80)
        logging.debug('Details')
        logging.debug(self.h.details)
Ejemplo n.º 11
0
 def lstm(self):
     lstm_cv = Restore(f'{LSTM}/lstm_deploy_3.zip')
     text_sequence = self.transform(self.textlines)
     predict_words = lstm_cv.model.predict(text_sequence)
     evals = self.evaluate(self.textlines, predict_words, model='lstm')
     return evals
Ejemplo n.º 12
0
orig_df = pd.read_csv('results.csv')
test_df = pd.read_csv('test_data.csv')
test_df.columns = ['case number', 'U', 'angle', 'Cd', 'Cl']

test_df.sort_values(by=['angle'], inplace=True)

print(test_df)

x_val = test_df[['U', 'angle']]
y_val = test_df[['Cd', 'Cl']]

i = 0
while os.path.exists('./optimized_networks/optimized_airfoil_nn_%s.zip' % i):
    i += 1
i -= 1
net = Restore('./optimized_networks/optimized_airfoil_nn_%s.zip' % i)

pred = net.model.predict(x_val)
print("Cd predictions: ")
print(pred[:, 0])
print("Cl predictions: ")
print(pred[:, 1])
test_df['pred_Cd'] = rescale(pred[:, 1],
                             orig_df['Cd'].min(),
                             orig_df['Cd'].max(),
                             reverse=True)
test_df['pred_Cl'] = rescale(pred[:, 0],
                             orig_df['Cl'].min(),
                             orig_df['Cl'].max(),
                             reverse=True)
Ejemplo n.º 13
0
def NeuralNetGeneratorModel(x_train, y_train, x_val, y_val, params):
    """
    Keras model for the Neural Network, used to scan the hyperparameter space by Talos
    Uses the generator rather than the input data (which are dummies)
    """

    # Design network #
    with open(
            os.path.join(parameters.main_path,
                         'scaler_' + parameters.suffix + '.pkl'),
            'rb') as handle:  # Import scaler that was created before
        scaler = pickle.load(handle)
    IN = Input(shape=(x_train.shape[1], ), name='IN')
    L0 = PreprocessLayer(batch_size=params['batch_size'],
                         mean=scaler.mean_,
                         std=scaler.scale_,
                         name='Preprocess')(IN)
    L1 = Dense(params['first_neuron'],
               activation=params['activation'],
               kernel_regularizer=l2(params['l2']))(L0)
    HIDDEN = hidden_layers(params, 1, batch_normalization=True).API(L1)
    OUT = Dense(1, activation=params['output_activation'], name='OUT')(HIDDEN)

    #preprocess = Model(inputs=[IN],outputs=[L0])
    #utils.print_summary(model=preprocess)

    # Tensorboard logs #
    path_board = os.path.join(parameters.main_path, "TensorBoard")
    suffix = 0
    while (os.path.exists(os.path.join(path_board, "Run_" + str(suffix)))):
        suffix += 1
    path_board = os.path.join(path_board, "Run_" + str(suffix))
    os.makedirs(path_board)
    logging.info("TensorBoard log dir is at %s" % path_board)

    # Callbacks #
    early_stopping = EarlyStopping(monitor='val_loss',
                                   min_delta=0.,
                                   patience=50,
                                   verbose=1,
                                   mode='min')
    reduceLR = ReduceLROnPlateau(monitor='val_loss',
                                 factor=0.5,
                                 patience=20,
                                 verbose=1,
                                 mode='min',
                                 cooldown=10,
                                 min_lr=1e-5)
    loss_history = LossHistory()
    board = TensorBoard(log_dir=path_board,
                        histogram_freq=1,
                        batch_size=params['batch_size'],
                        write_graph=True,
                        write_grads=True,
                        write_images=True)
    #embeddings_freq=0,
    #embeddings_layer_names=None,
    #embeddings_metadata=None,
    #embeddings_data=None,
    #update_freq='epoch')
    Callback_list = [loss_history, early_stopping, reduceLR, board]
    #Callback_list = [loss_history,reduceLR,board]

    # Check if generator weights has been asked #
    weights_generator = parameters.weights_generator if 'generator_weights' in params and params[
        'generator_weights'] else ''

    # Compile #
    if 'resume' not in params:
        # Define model #
        model = Model(inputs=[IN], outputs=[OUT])
        utils.print_summary(model=model)  #used to print model
        # Compile it #
        model.compile(optimizer=Adam(lr=params['lr']),
                      loss={'OUT': params['loss_function']},
                      metrics=['accuracy'])
        initial_epoch = 0
    else:  # a model has to be imported and resumes training
        custom_objects = {'PreprocessLayer': PreprocessLayer}
        logging.info("Loaded model %s" % params['resume'])
        a = Restore(params['resume'],
                    custom_objects=custom_objects,
                    method='h5')
        model = a.model
        model.compile(optimizer=Adam(lr=params['lr']),
                      loss={'OUT': params['loss_function']},
                      metrics=['accuracy'])
        utils.print_summary(model=model)  #used to print model
        #initial_epoch = a.params['epochs'][0]
        initial_epoch = params['initial_epoch']

    # Generator #
    training_generator = DataGenerator(path=parameters.path_gen_training,
                                       inputs=parameters.inputs,
                                       outputs=parameters.outputs,
                                       batch_size=params['batch_size'],
                                       state_set='training',
                                       weights_generator=weights_generator)
    validation_generator = DataGenerator(path=parameters.path_gen_validation,
                                         inputs=parameters.inputs,
                                         outputs=parameters.outputs,
                                         batch_size=params['batch_size'],
                                         state_set='validation')
    #weights_generator = weights_generator) # Might be unnecessary

    # Fit #
    logging.info("Will use %d workers" % parameters.workers)
    logging.warning("Keras location " + keras.__file__)
    logging.warning("Tensorflow location " + tf.__file__)
    logging.warning("GPU ")
    logging.warning(K.tensorflow_backend._get_available_gpus())
    history = model.fit_generator(
        generator=training_generator,
        validation_data=validation_generator,
        epochs=params['epochs'],
        verbose=1,
        max_queue_size=parameters.workers * 2,
        callbacks=Callback_list,
        initial_epoch=initial_epoch,
        workers=parameters.workers,
        shuffle=True,
        #steps_per_epoch       = 20,
        use_multiprocessing=True)

    #test_generator = DataGenerator(path = parameters.path_gen_output,
    #                                   inputs = parameters.inputs,
    #                                   outputs = parameters.outputs,
    #                                   batch_size = params['batch_size'],
    #out_preprocess = preprocess.predict_generator(test_generator,
    #                                              workers=10,
    #                                              steps=10,
    #                                              use_multiprocessing=False,
    #                                              verbose=1)
    #print ("Mean preprocessing")
    #print (np.mean(out_preprocess))
    #print (np.std(out_preprocess))
    #
    #out_all = model.predict_generator(test_generator,
    #                                              workers=10,
    #                                              steps=10,
    #                                              use_multiprocessing=False,
    #                                              verbose=1)
    #
    #print (out_all)
    #print ("Mean output")
    #print (np.mean(out_all))
    #print (np.std(out_all))

    # Plot history #
    PlotHistory(loss_history)

    return history, model