Exemple #1
0
 def run_param(self, param):
     N = self.df_train.shape[0]
     K = self.K
     for i in range(K):
         if self.verbose:
             print(i,K)
         idx_valid = self.fold[i]
         idx_train = list_diff(range(N), idx_valid)
         df_valid = self.df_train.iloc[idx_valid, :]
         df_train_v = self.df_train.iloc[idx_train, :]
         st = now()
         result = LightGBMRegressor.run([df_train_v, df_valid], param)
         t_cost = (now() - st).total_seconds()
         result.update({
             'pcode' : dict_to_code(param),
             'fold_i' : i,
             'fold_k' : K,
             'time' : t_cost,
         })
         result.update(param)
         self.result_list.append(result)
         if self.verbose:
             print(result)
     if self.verbose:
         print(self.df)
     return self
Exemple #2
0
    def tune(self, pid, T=1, tbound=60, pgen=(lambda:{
                    'num_leaves' : int(draw(15,127,3,1023)),
                    'learning_rate' : draw(1e-2,1e-1,1e-10,1,log=True),
                    'n_estimators' : int(draw(100,200,4,400)),
                    'min_child_samples' : int(draw(15,63,7,1023)),
                }) ):
        save = self.save
        load = self.load
        check = self.check
        K_fold = self.K_fold
        assert type(self.save)==type(lambda:None)
        assert type(self.check)==type(lambda:None)
        assert type(self.load)==type(lambda:None)

        # prepare df_train, df_valid
        df_train = []
        df_valid = []
        for i_fold in range(K_fold):
            print("loading %d/%d fold"%(i_fold,K_fold))
            assert check(i_fold)
            fold = load(i_fold)
            df_train.append(fold['df_train'])
            df_valid.append(fold['df_valid'])
        
        # tune
        rlist = []
        for ti in range(T):
            try:
                param = pgen()
                print(param)
                for i_fold in range(K_fold):
                    st = now()
                    result = LightGBMRegressor.run([df_train[i_fold], df_valid[i_fold]], param)
                    tcost = (now()-st).total_seconds()
                    result.update({
                        'ti' : ti,
                        'pcode' : dict_to_code(param),
                        'i_fold' : i_fold,
                        'K_fold' : K_fold,
                        'time' : tcost,
                    })
                    result.update(param)
                    rlist.append(result)
                    print('i_fold = %d\nresult = %s'%(i_fold,str(result)))
                    if tcost > tbound:
                        print("cut param run time too long")
                        break

                # save rlist every round
                try:
                    save(pid, rlist)
                except Exception as e:
                    print("[WARNING] save failed !!!!")
                    print(e)
            except Exception as e:
                print("[WARNING] ti=%d failed."%ti)
                print(e)
Exemple #3
0
 def target(**param):
     param['units'] = int(round(param['units']))
     param['layers'] = int(round(param['layers']))
     pcode = dict_to_code(param)
     if cache[pcode]:
         print("cache!")
         return -cache[pcode]
     result = self.tune(param)
     
     print("remain = %10.2f"%(float(total_time - (now()-st).total_seconds())), file=sys.stderr, end='\r')
     sys.stderr.flush()
     self.rlist.append(result)
     self.save()
     cache[pcode] = result['E_val']
     return -result['E_val']
Exemple #4
0
 def tune(self, param):
     param.update({'epochs':self.epochs_fixed})
     K = self.K_fold
     result = {}
     result.update(param)
     def f(i_fold):
         model = self.fmodel(param)
         model.setup(self.fold[i_fold])
         model.fit(self.time_limit)
         return model
         
     # train
     st = now()
     models = [f(i) for i in range(K)]
     tcost = (now()-st).total_seconds()
     
     # conclude K models information
     real_epochs = [m.epochs for m in models]
     evals = [sum([m.eval[i] for m in models])/K for i in range(min(real_epochs))]
     best_eval = min(evals)
     best_epochs = 0
     for i in range(min(real_epochs)):
         if evals[i]==best_eval:
             best_epochs = i
             break
     
     bein  = [m.ein[best_epochs] for m in models]
     beval = [m.eval[best_epochs] for m in models]
     result.update({
         'time' : tcost,
         'pcode' : dict_to_code(param),
         'best_epochs' : best_epochs,
         'E_in' : np.mean(bein),
         'E_in_std' : np.std(bein),
         'E_val' : np.mean(beval),
         'E_val_std' : np.std(beval),
     })
     for i in range(K):
         result['epo%d'%i] = real_epochs[i]
     return result
Exemple #5
0
    def tune_init(self, param):
        # check param & cache
        param_used = self.check_param(param)
        pcode = dict_to_code(param_used)
        if self.rcache[pcode] >= 0:
            return self.rlist[self.rcache[pcode]]

        # init models
        self.model_list = []
        K = len(self.data_list)
        for i in range(K):
            model = self.new_model(param_used)
            try:
                model.init(self.data_list[i])
            except:  # MLE?
                result = {
                    'pcode': -1,
                    'pid': self.pid,
                }
                self.rlist.append(result)
                return result
            self.model_list.append(model)
        return None, param_used
Exemple #6
0
    def tune(self, param):
        # init & handle exceptions such as MLE / identity param
        result_fail, param_used = self.tune_init(param)
        if result_fail:
            return result_fail
        pcode = dict_to_code(param_used)

        # train
        K = len(self.data_list)
        iepoch = 0
        self.start = now()
        while True:
            # sequentially train because python multiprocessing overhead
            try:
                for i in range(K):
                    self.model_list[i].epoch()
            except Exception as e:
                print(e)
                break
            if self.verbose:
                print("iepoch %d done. eval=%s" %
                      (iepoch,
                       str([
                           round(self.model_list[i].last_eval, 6)
                           for i in range(5)
                       ])))
            if self.early_stop(iepoch):
                break
            if iepoch > 3 and self.model_list[0].last_ein > 1000:
                break
            iepoch += 1
        nepoch = iepoch + 1

        # conclude K models information
        evals = [
            sum([m.eval[i] for m in self.model_list]) / K
            for i in range(nepoch)
        ]
        best_eval = min(evals)
        best_iepoch = 0
        for i in range(nepoch):
            if evals[i] == best_eval:
                best_iepoch = i
                break

        bein = [m.ein[best_iepoch] for m in self.model_list]
        beval = [m.eval[best_iepoch] for m in self.model_list]
        result = {}
        result.update(param_used)  # instead of "param"
        result.update({
            'time': self.time_elapsed,
            'pcode': pcode,
            'best_iepoch': best_iepoch,
            'nepoch': nepoch,
            'samples': self.samples,
            'E_in': np.mean(bein),
            'E_in_std': np.std(bein),
            'E_val': np.mean(beval),
            'E_val_std': np.std(beval),
            'pid': self.pid,
        })
        self.rcache[pcode] = len(self.rlist)
        self.rlist.append(result)
        return result