Esempio n. 1
0
def optimize_model_pytorch(device, args, train_GWAS, train_y, test_GWAS, test_y, out_folder ="", startupJobs = 40, maxevals = 200, noOut = False):
    global numTrials_pytorch
    numTrials_pytorch= 0

    trials = Trials()
    trial_wrapper = partial(trial_pytorch,device = device, args = args , train_GWAS = train_GWAS, train_y = train_y , test_GWAS = test_GWAS , test_y = test_y)

    best_pars = fmin(trial_wrapper, parameter_space_pytorch(), algo=partial(tpe.suggest, n_startup_jobs=(startupJobs) ), max_evals=maxevals, trials=trials)

    # Print the selected 'best' hyperparameters.
    if noOut == False: print('\nBest hyperparameter settings: ',space_eval(parameter_space_pytorch(), best_pars),'\n')

    # loops through the 1st entry in the dict that holds all the lookup keys
    regression = True

    for p in trials.trials[0]['misc']['idxs']: plot_optimization_pytorch(trials, p, regression, out_folder = out_folder) 

    best_pars = space_eval(parameter_space_pytorch(), best_pars) # this turns the indices into the actual params into the valid aprameter space
    
    # override the epochs with the early start
    lowestLossIndex = np.argmin(trials.losses())
    trials.trial_attachments(trials.trials[lowestLossIndex])['highestAcc_epoch']
    best_pars['earlyStopEpochs'] = trials.trial_attachments(trials.trials[lowestLossIndex])['highestAcc_epoch']
    best_pars['earlyStopEpochs'] += 1 # as epochs are 0 based otherwise...
    best_pars['epochs'] = best_pars['earlyStopEpochs'] 
    if best_pars['epochs'] <= 0 : best_pars['epochs'] = 1 # we dont want a network without any training, as that will cause a problem for deep dreaming
    return(best_pars)
Esempio n. 2
0
 def run(self):
     trials = Trials()
     best = fmin(
         self._obj, self.model_param_space._build_space(),
         tpe.suggest, self.max_evals, trials)
     best_params = space_eval(self.model_param_space._build_space(), best)
     best_params = self.model_param_space._convert_into_param(best_params)
     trial_loss = np.asarray(trials.losses(), dtype=float)
     best_ind = np.argmin(trial_loss)
     mrr = -trial_loss[best_ind]
     raw_mrr = trials.trial_attachments(trials.trials[best_ind])["raw_mrr"]
     raw_hits_at1 = trials.trial_attachments(trials.trials[best_ind])["raw_hits_at1"]
     raw_hits_at3 = trials.trial_attachments(trials.trials[best_ind])["raw_hits_at3"]
     raw_hits_at10 = trials.trial_attachments(trials.trials[best_ind])["raw_hits_at10"]
     hits_at1 = trials.trial_attachments(trials.trials[best_ind])["hits_at1"]
     hits_at3 = trials.trial_attachments(trials.trials[best_ind])["hits_at3"]
     hits_at10 = trials.trial_attachments(trials.trials[best_ind])["hits_at10"]
     self.logger.info("-" * 50)
     self.logger.info("Best CV Results:")
     self.logger.info("Raw MRR: %.6f" % raw_mrr)
     self.logger.info("Filtered MRR: %.6f" % mrr)
     self.logger.info("Raw: Hits@1 %.3f Hits@3 %.3f Hits@10 %.3f" % (
         raw_hits_at1, raw_hits_at3, raw_hits_at10))
     self.logger.info("Filtered: Hits@1 %.3f Hits@3 %.3f Hits@10 %.3f" % (
         hits_at1, hits_at3, hits_at10))
     self.logger.info("Best Param:")
     self.task._print_param_dict(best_params)
     self.logger.info("-" * 50)
Esempio n. 3
0
    def run(self):
        line_index = 1
        self.param_space = ModelParamSpace()
        for task_mode in learner_space.keys():
            if task_mode not in learner_space:
                print('%s model missed' % task_mode)
                continue
            print('start %s model task' % task_mode)
            for learner in learner_space[task_mode]:
                print('optimizing %s' % learner)
                self.leaner_name = learner
                start = time.time()
                trials = Trials()
                logname = "%s_%s_%s.log" % (
                    task_mode, learner,
                    datetime.datetime.now().strftime("%Y-%m-%d-%H-%M"))
                self.logger = logging_utils._get_logger(
                    config.LOG_DIR, logname)
                best = fmin(lambda param: self._obj(param, task_mode),
                            self.param_space._build_space(learner),
                            tpe.suggest, self.max_evals, trials)

                end = time.time()
                time_cost = time_utils.time_diff(start, end)
                self.logger.info("Hyperopt_Time")
                self.logger.info("     %s" % time_cost)
                self.logger.info("-" * 50)
                print("   Finished %d hyper train with %d-fold cv, took %s" %
                      (self.max_evals, self.n_iter, time_cost))

                best_params = space_eval(
                    self.param_space._build_space(learner), best)
                best_params = self.param_space._convert_int_param(best_params)
                trial_loss = np.asarray(trials.losses(), dtype=float)
                best_ind = np.argmin(trial_loss)
                auc_cv_mean = -trial_loss[best_ind]
                test_auc = trials.trial_attachments(
                    trials.trials[best_ind])["test_auc"]
                refit_time = trials.trial_attachments(
                    trials.trials[best_ind])["refit_time"]

                with open(config.MODEL_COMPARE, 'a+') as f:
                    if line_index:
                        line_index = 0
                        f.writelines(
                            "task_mode   learner   auc_cv_mean   test_auc   refit_time   best_params \n"
                        )
                    f.writelines("%s   %s   %.4f   %.4f   %s   %s \n" %
                                 (task_mode, learner, auc_cv_mean, test_auc,
                                  refit_time, best_params))
                f.close()
 def run(self):
     start = time.time()
     trials = Trials()
     best = fmin(self._obj, self.model_param_space._build_space(),
                 tpe.suggest, hp_iter, trials)
     best_params = space_eval(self.model_param_space._build_space(), best)
     best_params = self.model_param_space._convert_int_param(best_params)
     trial_rmses = np.asarray(trials.losses(), dtype=float)
     best_ind = np.argmin(trial_rmses)
     best_rmse_mean = trial_rmses[best_ind]
     best_rmse_std = trials.trial_attachments(
         trials.trials[best_ind])["std"]
     self.logger.info("-" * 50)
     self.logger.info("Best RMSE")
     self.logger.info("      Mean: %.6f" % best_rmse_mean)
     self.logger.info("      std: %.6f" % best_rmse_std)
     self.logger.info("Best param")
     self.task._print_param_dict(best_params)
     end = time.time()
     _sec = end - start
     _min = int(_sec / 60.)
     self.logger.info("Time")
     if _min > 0:
         self.logger.info("      %d mins" % _min)
     else:
         self.logger.info("      %d secs" % _sec)
     self.logger.info("-" * 50)
Esempio n. 5
0
 def run(self):
     start = time.time()
     trials = Trials()
     best = fmin(self._obj, self.model_param_space._build_space(), tpe.suggest, self.max_evals, trials)
     best_params = space_eval(self.model_param_space._build_space(), best)
     best_params = self.model_param_space._convert_int_param(best_params)
     trial_rmses = np.asarray(trials.losses(), dtype=float)
     best_ind = np.argmin(trial_rmses)
     best_rmse_mean = trial_rmses[best_ind]
     best_rmse_std = trials.trial_attachments(trials.trials[best_ind])["std"]
     self.logger.info("-"*50)
     self.logger.info("Best RMSE")
     self.logger.info("      Mean: %.6f"%best_rmse_mean)
     self.logger.info("      std: %.6f"%best_rmse_std)
     self.logger.info("Best param")
     self.task._print_param_dict(best_params)
     end = time.time()
     _sec = end - start
     _min = int(_sec/60.)
     self.logger.info("Time")
     if _min > 0:
         self.logger.info("      %d mins"%_min)
     else:
         self.logger.info("      %d secs"%_sec)
     self.logger.info("-"*50)
Esempio n. 6
0
 def run(self):
     trials = Trials()
     best = fmin(self._obj, self.model_param_space._build_space(),
                 tpe.suggest, self.max_evals, trials)
     best_params = space_eval(self.model_param_space._build_space(), best)
     best_params = self.model_param_space._convert_into_param(best_params)
     trial_loss = np.asarray(trials.losses(), dtype=float)
     best_ind = np.argmin(trial_loss)
     best_ap = trial_loss[best_ind]
     best_loss = trials.trial_attachments(trials.trials[best_ind])["loss"]
     best_acc = trials.trial_attachments(trials.trials[best_ind])["acc"]
     self.logger.info("-" * 50)
     self.logger.info("Best Average Precision: %.3f" % best_ap)
     self.logger.info("with Loss %.3f, Accuracy %.3f" %
                      (best_loss, best_acc))
     self.logger.info("Best Param:")
     self.task._print_param_dict(best_params)
     self.logger.info("-" * 50)
Esempio n. 7
0
def make_opt_predict_by_models(specified_models):
    """
    使用指定的模型预测结果
    所有尝试的参数均记录在文件中
    :param specified_models:
    :return:best_kappa_mean, best_kappa_std
    """
    log_path = "%s/Log" % config.output_path
    if not os.path.exists(log_path):
        os.makedirs(log_path)
    models_best_params = []
    # 判断传入参数中的models是不是已经配置的models
    for feat_name in specified_models:
        if feat_name not in model_library_config.feat_names:
            continue
        # param space ,模型内部也需要(打日志头部)
        feat_folder, param_space = model_library_config.model_config[feat_name]
        model = create_model(param_space, config.solution_info, feat_folder,
                             feat_name)
        model.log_header()

        print("************************************************************")
        print("Search for the best params")
        # global trial_counter
        trials = Trials()
        objective = lambda p: model.hyperopt_obj(p, feat_folder, feat_name)
        best_params = fmin(objective,
                           param_space,
                           algo=tpe.suggest,
                           trials=trials,
                           max_evals=param_space["max_evals"])
        # 把best_params包含的数字属性转成int
        for f in model_library_config.int_feat:
            if best_params.has_key(f):
                best_params[f] = int(best_params[f])
        print("************************************************************")
        print("Best params")
        for k, v in best_params.items():
            print("        %s: %s" % (k, v))
        # 获取尝试的losses
        trial_kappas = -np.asarray(trials.losses(), dtype=float)
        best_kappa_mean = max(trial_kappas)
        # where返回两个维度的坐标
        ind = np.where(trial_kappas == best_kappa_mean)[0][0]
        # 找到最优参数的std
        best_kappa_std = trials.trial_attachments(trials.trials[ind])['std']
        print("Kappa stats")
        print("Mean: %.6f\n        Std: %.6f" %
              (best_kappa_mean, best_kappa_std))

        models_best_params.append((feat_name, best_kappa_mean, best_kappa_std))

    return models_best_params
Esempio n. 8
0
 def run(self):
     trials = Trials()
     best = fmin(self._obj, self.model_param_space._build_space(), tpe.suggest, max_evals=1, trials=trials)
     best_params = space_eval(self.model_param_space._build_space(), best)
     best_params = self.model_param_space._convert_into_param(best_params)
     trial_loss = np.asarray(trials.losses(), dtype=float)
     best_ind = np.argmin(trial_loss)
     best_loss = -trial_loss[best_ind]
     best_pacc = trials.trial_attachments(trials.trials[best_ind])["pacc"]
     # best_eacc = trials.trial_attachments(trials.trials[best_ind])["eacc"]
     self.logger.info("-" * 50)
     self.logger.info("Best Exact Accuracy %.3f with Parital Accuracy %.3f" % (best_loss, best_pacc))
     self.logger.info("Best Param:")
     self.task._print_param_dict(best_params)
     self.logger.info("-" * 50)
def TunningParamter(param,data,features,feature,source_name,real_value,int_boolean):
    data = data[~pd.isnull(all_data[feature])]
    print data.shape
    ISOTIMEFORMAT='%Y-%m-%d %X'
    start = time.strftime(ISOTIMEFORMAT, time.localtime())
    trials = Trials()
    objective = lambda p : trainModel(p, data, features, feature,source_name,real_value,int_boolean)
    
    best_parameters = fmin(objective, param, algo =tpe.suggest,max_evals=param['max_evals'],trials= trials)
    #now we need to get best_param
    trials_loss = np.asanyarray(trials.losses(),dtype=float)
    best_loss = min(trials_loss)
    ind = np.where(trials_loss==best_loss)[0][0]
    best_loss_std = trials.trial_attachments(trials.trials[ind])['std']
    end = time.strftime(ISOTIMEFORMAT,time.localtime())
    dumpMessage(best_parameters, best_loss, best_loss_std,param['task'],source_name,start,end)
Esempio n. 10
0
 def run(self):
     trials = Trials()
     best = fmin(self._obj, self.model_param_space._build_space(),
                 tpe.suggest, self.max_evals, trials)
     best_params = space_eval(self.model_param_space._build_space(), best)
     best_params = self.model_param_space._convert_into_param(best_params)
     trial_loss = np.asarray(trials.losses(), dtype=float)
     best_ind = np.argmin(trial_loss)
     acc = -trial_loss[best_ind]
     loss = trials.trial_attachments(trials.trials[best_ind])["loss"]
     self.logger.info("-" * 50)
     self.logger.info("Best CV Results:")
     self.logger.info("Loss: %.3f" % loss)
     self.logger.info("Accuracy: %.3f" % acc)
     self.logger.info("Best Param:")
     self.task._print_param_dict(best_params)
     self.logger.info("-" * 50)
 def run(self):
     start = time.time()
     trials = Trials()
     best = min(self._obj, self.model_param_space.build_space(), tpe.suggest, self.max_evals, trials)
     best_params = space_eval(self.model_param_space.build_space(), best)
     best_params = self.model_param_space._convert_int_param(best_params)
     trial_score = np.asarray(trials.losses(), dtype=float)
     best_ind = np.argmin(trial_score)
     best_score_mean = trial_score[best_ind]
     best_score_std = trials.trial_attachments(trials.trials[best_ind])["std"]
     self.logger.info("-" * 50)
     self.logger.info("Best Score")
     self.logger.info("      Mean: %.6f" % -best_score_mean)
     self.logger.info("      std: %.6f" % best_score_std)
     self.logger.info("Best param")
     self.task._print_param_dict(best_params)
     end = time.time()
     _sec = end - start
     _min = int(_sec / 60.)
     self.logger.info("Time")
     self.logger.info('-'* 50)
 def minimize(self, restarts=2, epochs=600, tune_space=None):
     from hyperopt import fmin, tpe, Trials
     if tune_space is None:
         initial_values = self.tf_session.run(self.variables)
         tune_space = self._make_tune_space(initial_values)
     # TODO: This report structure has the downside of not writing
     # anything to disk until it's 100% complete.
     reports = []
     # Make minimize deterministic
     R = np.random.RandomState(self.seed)
     for restarts in range(restarts):
         trials = Trials()
         best = fmin(fn=self._evaluate,
                     space=tune_space,
                     algo=tpe.suggest,
                     max_evals=epochs,
                     trials=trials,
                     rstate=R)
         self._assign_values(best)
         reports.extend(trials.trial_attachments(t)['report'] for t in trials.trials)
     return self.evaluator.make_agg_report(reports)
Esempio n. 13
0
def TunningParamter(param, data, price_data):
    ISOTIMEFORMAT = '%Y-%m-%d %X'
    start_time = time.strftime(ISOTIMEFORMAT, time.localtime())
    trials = Trials()
    if param['task'] == 'mean_raw_scroll':
        objective = lambda p: rawPredict(p, data)
    elif param['task'] == 'mean_price_scroll':
        objective = lambda p: pricePredict(p, price_data, data)

    best_params = fmin(objective,
                       param,
                       algo=tpe.suggest,
                       trials=trials,
                       max_evals=param["max_evals"])
    print best_params
    trial_acc = np.asanyarray(trials.losses(), dtype=float)
    best_acc_mean = min(trial_acc)
    ind = np.where(trial_acc == best_acc_mean)[0][0]
    best_loss_std = trials.trial_attachments(trials.trials[ind])['std']
    end_time = time.strftime(ISOTIMEFORMAT, time.localtime())
    dumpMessage(best_params, best_acc_mean, best_loss_std, param['task'],
                start_time, end_time)
    print("Best stats")
Esempio n. 14
0
def TunningParamter(param, num, features):
    ISOTIMEFORMAT = '%Y-%m-%d %X'
    start = time.strftime(ISOTIMEFORMAT, time.localtime())
    trials = Trials()
    #add feature
    #label to predict

    for i in range(1, 2 * num + 1):
        feature = "Ret_all_%d" % (i)
        features.append(feature)
        feature = "Ret_all_std_%d" % (i)
        features.append(feature)

    #Predict label
    print features
    predict_lable = []

    for i in range(1, num + 1):
        predict_lable.append("Pred_%d" % (i))

    objective = lambda p: trainModel(p, train, features, predict_lable)
    best_parameters = fmin(objective,
                           param,
                           algo=tpe.suggest,
                           max_evals=param['max_evals'],
                           trials=trials)
    #now we need to get best_param
    print best_parameters

    trials_loss = np.asanyarray(trials.losses(), dtype=float)
    best_loss = min(trials_loss)
    ind = np.where(trials_loss == best_loss)[0][0]
    best_loss_std = trials.trial_attachments(trials.trials[ind])['std']
    end = time.strftime(ISOTIMEFORMAT, time.localtime())
    dumpMessage(best_parameters, best_loss, best_loss_std, param['task'],
                start, end)
Esempio n. 15
0
    for k, v in trials_lgb.trials[0]['misc']['vals'].items()
}
hp_assignment = {k: v[0]
                 for k, v in trials_lgb.vals.items()}  # 这句和上面那句等价,这句更简洁
space_eval(space_lgb, hp_assignment)

trials_lgb.trials[0]['result']  # {'loss': -0.8737864077669903, 'status': 'ok'}
# 返回k个最好的模型的参数dict组成的一个list
trials_lgb.topk_trials(k=2)
# return_score=True就返回2个list组成的tuple
trials_lgb.topk_trials(2, return_score=True, ordered=True)
type(trials_lgb.topk_trials(2, return_score=True,
                            ordered=True)[0][0])  # 这个类型就是个dict
# Trials().trial_attachments的作用是,根据trial的参数字典解析出相应的model路径
trials_lgb.trial_attachments(
    trials_lgb.topk_trials(2, return_score=True,
                           ordered=True)[0][0])["model"].decode()
# %%

#返回topk的模型
select_models = lambda trials, k: [
    (trials.trial_attachments(t)["model"].decode(), c)
    for t, c in zip(*trials.topk_trials(k, return_score=True, ordered=True))
]
for sub_model_path, sub_model_score in select_models(trials_lgb, 3):
    print(-sub_model_score, sub_model_path)

best_auc = -trials_lgb.topk_trials(1, return_score=True, ordered=True)[1][0]
best_space = trials_lgb.topk_trials(1, return_score=True,
                                    ordered=True)[0][0]['misc']['vals']
best_hyperparam = space_eval(
Esempio n. 16
0
		param_space = param_spaces[model_name]
		trials = Trials()
		objective = lambda p: hyperopt_wrapper(p, feat_key, model_name, train, loss)
		best_params = fmin(objective, param_space, algo=tpe.suggest,
				   trials=trials, max_evals=max_evals)
		for f in int_feat:
		    if best_params.has_key(f):
			best_params[f] = int(best_params[f])
		print("************************************************************")
		print("Best params")
		for k,v in best_params.items():
		    print "        %s: %s" % (k,v)
		trial_losses = -np.asarray(trials.losses(), dtype=float)
		best_loss_mean = max(trial_losses)
		ind = np.where(trial_losses == best_loss_mean)[0][0]
		best_loss_std = trials.trial_attachments(trials.trials[ind])['std']
		print("Loss stats")
		print("        Mean: %.6f\n        Std: %.6f" % (best_loss_mean, best_loss_std))

	else:
		print '-------- generating submission -------'
		
		test = pd.read_csv(test_file, index_col = False)
		test_ids = test['ID']
		test.drop('ID', axis=1, inplace=True)

		best_params = loads(dumps(cv_scores.find({'model_name':model_name, 'feat_key':feat_key}).sort([('loss_cv_mean', -1)]).limit(1)))[0]
		
		print("Best params")
		for k,v in best_params.items():
		    print "        %s: %s" % (k,v)
Esempio n. 17
0
        log_handler = open(log_file, 'wb' )
        writer = csv.writer( log_handler )
        headers = ['trial_counter', 'kappa_mean', 'kappa_std' ]
        for k,v in sorted(param_space.items()):
            headers.append(k)
        writer.writerow( headers )
        log_handler.flush()
        
        print("************************************************************")
        print("Search for the best params")
        #global trial_counter
        trial_counter = 0
        trials = Trials()
        objective = lambda p: hyperopt_wrapper(p,feat_name)
        best_params = fmin(objective, param_space, algo=tpe.suggest,
                           trials=trials, max_evals=param_space["max_evals"])
        for f in int_feat:
            if best_params.has_key(f):
                best_params[f] = int(best_params[f])
        print("************************************************************")
        print("Best params")
        for k,v in best_params.items():
            print "        %s: %s" % (k,v)
        trial_kappas = -np.asarray(trials.losses(), dtype=float)
        best_kappa_mean = max(trial_kappas)
        ind = np.where(trial_kappas == best_kappa_mean)[0][0]
        best_kappa_std = trials.trial_attachments(trials.trials[ind])['std']
        print("Kappa stats")
        print("        Mean: %.6f\n        Std: %.6f" % (best_kappa_mean, best_kappa_std))
    
Esempio n. 18
0
        model_type = sys.argv[1]
        max_evals = int(sys.argv[2])

    else:
        model_type = 'lgb'
        max_evals = 2

    logger.debug(
        f'Try to search paras base on model:{model_type}, max_evals:{max_evals}'
    )

    from functools import partial
    optimize_fun_ex = partial(optimize_fun, model_type=model_type)

    trials = Trials()
    space = get_search_space(model_type)
    best = fmin(optimize_fun_ex,
                space,
                algo=tpe.suggest,
                max_evals=max_evals,
                trials=trials)

    #logger.debug(f"Best: {best}")

    att_message = [
        trials.trial_attachments(trial)['message'] for trial in trials.trials
    ]
    for score, para, misc in zip(
            trials.losses(), att_message,
        [item.get('misc').get('vals') for item in trials.trials]):
        logger.debug(f'score:{"%9.6f"%score}, para:{para}, misc:{misc}')
Esempio n. 19
0
    return {
        'loss': x**2,
        'status': STATUS_OK,
        # -- store other results like this
        'eval_time': time.time(),
        'other_stuff': {
            'type': None,
            'value': [0, 1, 2]
        },
        # -- attachments are handled differently
        'attachments': {
            'time_module': pickle.dumps(time.time)
        }
    }


trials = Trials()
best = fmin(objective,
            space=hp.uniform('x', -10, 10),
            algo=tpe.suggest,
            max_evals=100,
            trials=trials)

print(best)

msg = trials.trial_attachments(trials.trials[5])['time_module']
print(msg)

time_module = pickle.loads(msg)
print(time_module)
Esempio n. 20
0
        for k, v in sorted(param_space.items()):
            headers.append(k)
        writer.writerow(headers)
        log_handler.flush()

        print("************************************************************")
        print("Search for the best params")
        #global trial_counter
        trial_counter = 0
        trials = Trials()
        objective = lambda p: hyperopt_wrapper(p, feat_folder, feat_name)
        best_params = fmin(objective,
                           param_space,
                           algo=tpe.suggest,
                           trials=trials,
                           max_evals=param_space["max_evals"])
        for f in int_feat:
            if best_params.has_key(f):
                best_params[f] = int(best_params[f])
        print("************************************************************")
        print("Best params")
        for k, v in best_params.items():
            print "        %s: %s" % (k, v)
        trial_f1scores = -np.asarray(trials.losses(), dtype=float)
        best_f1score_mean = max(trial_f1scores)
        ind = np.where(trial_f1scores == best_f1score_mean)[0][0]
        best_f1score_std = trials.trial_attachments(trials.trials[ind])['std']
        print("F1 Score stats")
        print("        Mean: %.6f\n        Std: %.6f" %
              (best_f1score_mean, best_f1score_std))
Esempio n. 21
0
def wikiLearn():
    """
    不是特别懂
    """
    # 1、简单的函数
    from hyperopt import fmin, tpe, hp
    best = fmin(fn=lambda x: x ** 2,
                space=hp.uniform('x', -10, 10),
                algo=tpe.suggest,
                max_evals=100)
    print best
    # 2、使用函数+ok状态
    from hyperopt import fmin, tpe, hp, STATUS_OK
    def objective(x):
        return {'loss': x ** 2, 'status': STATUS_OK }
    best = fmin(objective,
                space=hp.uniform('x', -10, 10),
                algo=tpe.suggest,
                max_evals=100)
    print best
    # 3、使用dict的返回
    import pickle
    import time
    from hyperopt import fmin, tpe, hp, STATUS_OK, Trials
    def objective(x):
        return {
            'loss': x ** 2,
            'status': STATUS_OK,
            # -- store other results like this
            'eval_time': time.time(),
            'other_stuff': {'type': None, 'value': [0, 1, 2]},
            # -- attachments are handled differently
            'attachments': {'time_module': pickle.dumps(time.time)}
        }
    trials = Trials()
    best = fmin(objective,
                space=hp.uniform('x', -10, 10),
                algo=tpe.suggest,
                max_evals=100,
                trials=trials)
    print best
    print trials.trials
    print trials.results
    print trials.losses()
    print trials.statuses()
    # 没明白 attachments 是什么意思
    msg = trials.trial_attachments(trials.trials[5])['time_module']
    time_module = pickle.loads(msg)
    from hyperopt import hp
    space = hp.choice('a',
                      [
                          ('case 1', 1 + hp.lognormal('c1', 0, 1)),
                          ('case 2', hp.uniform('c2', -10, 10))
                      ])
    import hyperopt.pyll.stochastic
    print hyperopt.pyll.stochastic.sample(space)
    # hp.choice(label, options)
    # hp.randint(label, upper)                  # [0,upper]
    # hp.uniform(label, low, high)
    # hp.quniform(label, low, high, q)          # round(uniform(low, high) / q) * q
    # hp.loguniform(label, low, high)
    # hp.qloguniform(label, low, high, q)       # round(exp(uniform(low, high)) / q) * q
    # hp.normal(label, mu, sigma)
    # hp.qnormal(label, mu, sigma, q)           # round(normal(mu, sigma) / q) * q
    # hp.lognormal(label, mu, sigma)
    # hp.qlognormal(label, mu, sigma, q)        # round(exp(normal(mu, sigma)) / q) * q
    # 4、对于sklearn使用
    from hyperopt import hp
    space = hp.choice('classifier_type', [
        {
            'type': 'naive_bayes',
        },
        {
            'type': 'svm',
            'C': hp.lognormal('svm_C', 0, 1),
            'kernel': hp.choice('svm_kernel', [
                {'ktype': 'linear'},
                {'ktype': 'RBF', 'width': hp.lognormal('svm_rbf_width', 0, 1)},
            ]),
        },
        {
            'type': 'dtree',
            'criterion': hp.choice('dtree_criterion', ['gini', 'entropy']),
            'max_depth': hp.choice('dtree_max_depth',
                                   [None, hp.qlognormal('dtree_max_depth_int', 3, 1, 1)]),
            'min_samples_split': hp.qlognormal('dtree_min_samples_split', 2, 1, 1),
        },
    ])
    # 5、还是没有搞懂 scope.define
    import hyperopt.pyll
    from hyperopt.pyll import scope
    @scope.define
    def foo(a, b=0):
        print 'running foo', a, b
        return a + b / 2
    # -- this will print 0, foo is called as usual.
    print foo(0)
    # In describing search spaces you can use `foo` as you
    # would in normal Python. These two calls will not actually call foo,
    # they just record that foo should be called to evaluate the graph.
    space1 = scope.foo(hp.uniform('a', 0, 10))
    space2 = scope.foo(hp.uniform('a', 0, 10), hp.normal('b', 0, 1))
    # -- this will print an pyll.Apply node
    print space1
    # -- this will draw a sample by running foo()
    print hyperopt.pyll.stochastic.sample(space1)
        writer = csv.writer(log_handler)
        headers = ['trial_counter', 'log_loss_mean', 'log_loss_std', 'spend_time']
        for k, v in sorted(param_space.items()):
            headers.append(k)
        print(headers)
        writer.writerow(headers)
        log_handler.flush()

        print("************************************************************")
        print("Search for the best params")
        # global trial_counter
        trial_counter = 0
        trials = Trials()
        # lambda在这一步并不会运行,只是定义一个函数而已
        objective = lambda p: hyperopt_wrapper(p, feat_folder, feat_name)
        # objective放到fmin中,会被调用,且传进三个参数
        best_params = fmin(objective, param_space, algo=tpe.suggest,
                           trials=trials, max_evals=param_space["max_evals"])
        for f in int_feat:
            if f in best_params:
                best_params[f] = int(best_params[f])
        print("************************************************************")
        print("Best params")
        for k, v in best_params.items():
            print("        %s: %s" % (k, v))
        trial_log_losss = -np.asarray(trials.losses(), dtype=float)
        best_log_loss_mean = max(trial_log_losss)
        ind = np.where(trial_log_losss == best_log_loss_mean)[0][0]
        best_log_loss_std = trials.trial_attachments(trials.trials[ind])['std']
        print("log_loss stats")
        print("        Mean: %.6f\n        Std: %.6f" % (best_log_loss_mean, best_log_loss_std))
Esempio n. 23
0
 train = pd.read_csv("../data/train.process.csv")
 for feat_name,feat_fold in zip(feat_names,feat_folders):
     #at first we need to read to for our model 
     #this is for reduce time to read data
     print 'read data for trainning'
     print 'generate model in condition in %s'%(feat_name)
     print "Search for the best models"
     print "fea_name %s"%(feat_name)
     #for reduce the time for read data
     #the train.shape[0]=39774
     ISOTIMEFORMAT='%Y-%m-%d %X'
     start_time = time.strftime( ISOTIMEFORMAT, time.localtime() )
     param_space = para_spaces[feat_name]
     trials = Trials()
     objective = lambda p : trainModel(p, feat_fold, feat_name)
     best_params = fmin(objective,param_space,algo=tpe.suggest,
                       trials=trials, max_evals=param_space["max_evals"])
     print type(best_params)
     print best_params
     for f in int_feat:
         if best_params.has_key(f):
             best_params[f] = int(best_params[f])
     trial_acc = -np.asanyarray(trials.losses(), dtype=float )
     best_acc_mean = max(trial_acc)
     ind = np.where(trial_acc==best_acc_mean)[0][0]
     best_acc_std = trials.trial_attachments(trials.trials[ind])['std']
     end_time = time.strftime( ISOTIMEFORMAT, time.localtime() )
     dumpModelMessage(best_params, best_acc_mean, best_acc_std, feat_fold,feat_name,start_time,end_time)
     print ("Best stats")
     print ('Mean:%.6f \nStd:%.6f \n'%(best_acc_mean,best_acc_std))
     
Esempio n. 24
0
        print("************************************************************")
        print("Search for the best params")
        # global trial_counter
        trial_counter = 0
        trials = Trials()
        objective = lambda p: hyperopt_wrapper(p, feat_folder, feat_name)
        best_params = fmin(objective,
                           param_space,
                           algo=tpe.suggest,
                           trials=trials,
                           max_evals=param_space["max_evals"])
        # 把best_params包含的数字属性转成int
        for f in int_feat:
            if best_params.has_key(f):
                best_params[f] = int(best_params[f])
        print("************************************************************")
        print("Best params")
        for k, v in best_params.items():
            print "        %s: %s" % (k, v)
        # 获取尝试的losses
        trial_kappas = -np.asarray(trials.losses(), dtype=float)
        best_kappa_mean = max(trial_kappas)
        # where返回两个维度的坐标
        ind = np.where(trial_kappas == best_kappa_mean)[0][0]
        # 找到最优参数的std
        best_kappa_std = trials.trial_attachments(trials.trials[ind])['std']
        print("Kappa stats")
        print("        Mean: %.6f\n        Std: %.6f" %
              (best_kappa_mean, best_kappa_std))
Esempio n. 25
0
        trials = Trials()
        if config.preload:
            start = time.clock()
            cv_data, tt_data = get_feat_data(feat_folder)
            print("loding data time used:", (time.clock() - start))
            objective = lambda p: hyperopt_wrapper(p, feat_folder, feat_name,
                                                   cv_data, tt_data)
        else:
            objective = lambda p: hyperopt_wrapper(p, feat_folder, feat_name,
                                                   None, None)

        best_params = fmin(objective,
                           param_space,
                           algo=tpe.suggest,
                           trials=trials,
                           max_evals=param_space["max_evals"])
        for f in int_feat:
            if f in best_params:
                best_params[f] = int(best_params[f])
        print("************************************************************")
        print("Best params")
        for k, v in best_params.items():
            print("        %s: %s" % (k, v))
        trial_ginis = -np.asarray(trials.losses(), dtype=float)
        best_gini_mean = max(trial_ginis)
        ind = np.where(trial_ginis == best_gini_mean)[0][0]
        best_gini_std = trials.trial_attachments(trials.trials[ind])['std']
        print("Kappa stats")
        print("        Mean: %.6f\n        Std: %.6f" %
              (best_gini_mean, best_gini_std))
            data = [X_all, y_class_tr_all, y_reg_tr_all]
            # =========================== Search the best params ===========================
            print("------------------------------------------------------------------------")
            print("-------- Search the best params for %s --------" % ftmodnm)
            starttime = time.clock()
            log_handler = log(ftmodnm)
            trial_counter = 0
            ftmodinfo = [model, data]
            trials = Trials()
            objective = lambda p: hyperopt_wrapper(p, ftmodinfo)
            best_params = fmin(objective, param, algo=tpe.suggest, trials=trials, max_evals=param["max_evals"])

            for f in modp.int_feat():
                if f in best_params:
                    best_params[f] = int(best_params[f])
            elapsed = round((time.clock() - starttime) / 60.0, 2)
            print("************************************************************")
            print("Best params for %s in %.2f min" %(ftmodnm, elapsed))
            for k, v in best_params.items():
                print("        %s: %s" % (k, v))
            trial_RMSEs = np.asarray(trials.losses(), dtype=float)
            best_RMSE_mean = min(trial_RMSEs)
            ind = np.where(trial_RMSEs == best_RMSE_mean)[0][0]
            best_RMSE_std = trials.trial_attachments(trials.trials[ind])['std']
            print("RMSE stats")
            print("        Mean: %.6f\n        Std: %.6f" % (best_RMSE_mean, best_RMSE_std))
            print("        Trial: %s" % str(ind + 1))
            print("************************************************************")
            print()

Esempio n. 27
0
    def evaluation(self, space, out_file=None, model_dir=None):
        '''
        evaluation function for bayesian search
        '''
        import random
        from hyperopt import tpe
        from hyperopt import Trials
        from hyperopt import fmin
        random.seed(4)
        if model_dir is None:
            model_dir = 'best_model.txt'

        trials = Trials()

        global ITERATION
        ITERATION = 0

        best = fmin(fn=self.bayesian_objective,
                    space=space,
                    algo=tpe.suggest,
                    trials=trials,
                    max_evals=self.MAX_EVALS,
                    rstate=np.random.RandomState(100))

        DEV_metric = []
        OOT_metric = []

        for i in range(self.MAX_EVALS):
            DEV_metric.append(
                trials.trial_attachments(trials.trials[i])['DEV_metric'])
            OOT_metric.append(
                trials.trial_attachments(trials.trials[i])['OOT_metric'])

        metrics_records = pd.DataFrame({
            'Itr': list(range(0, self.MAX_EVALS)),
            'DEV_metric': DEV_metric,
            'OOT_metric': OOT_metric
        })

        keys = trials.trials[0]['result']['hyperparameters'].keys()
        hyperparameters_records = pd.DataFrame(index=range(self.MAX_EVALS),
                                               columns=keys)

        for i in range(self.MAX_EVALS):
            hyperparameters_records.iloc[i, :] = list(
                trials.trials[i]['result']['hyperparameters'].values())

        all_records = pd.concat([metrics_records, hyperparameters_records],
                                axis=1)

        if out_file:
            all_records.to_csv(out_file, index=False)

        best_ind = metrics_records.loc[:, 'OOT_metric'].idxmin()

        pickle.dump(
            trials.trial_attachments(trials.trials[best_ind])['model'],
            open(model_dir, 'wb'))

        return metrics_records.iloc[best_ind, :], \
                trials.trials[best_ind]['result']['hyperparameters'], \
                trials.results[best_ind]