def optimize_model_pytorch(device, args, train_GWAS, train_y, test_GWAS, test_y, out_folder ="", startupJobs = 40, maxevals = 200, noOut = False): global numTrials_pytorch numTrials_pytorch= 0 trials = Trials() trial_wrapper = partial(trial_pytorch,device = device, args = args , train_GWAS = train_GWAS, train_y = train_y , test_GWAS = test_GWAS , test_y = test_y) best_pars = fmin(trial_wrapper, parameter_space_pytorch(), algo=partial(tpe.suggest, n_startup_jobs=(startupJobs) ), max_evals=maxevals, trials=trials) # Print the selected 'best' hyperparameters. if noOut == False: print('\nBest hyperparameter settings: ',space_eval(parameter_space_pytorch(), best_pars),'\n') # loops through the 1st entry in the dict that holds all the lookup keys regression = True for p in trials.trials[0]['misc']['idxs']: plot_optimization_pytorch(trials, p, regression, out_folder = out_folder) best_pars = space_eval(parameter_space_pytorch(), best_pars) # this turns the indices into the actual params into the valid aprameter space # override the epochs with the early start lowestLossIndex = np.argmin(trials.losses()) trials.trial_attachments(trials.trials[lowestLossIndex])['highestAcc_epoch'] best_pars['earlyStopEpochs'] = trials.trial_attachments(trials.trials[lowestLossIndex])['highestAcc_epoch'] best_pars['earlyStopEpochs'] += 1 # as epochs are 0 based otherwise... best_pars['epochs'] = best_pars['earlyStopEpochs'] if best_pars['epochs'] <= 0 : best_pars['epochs'] = 1 # we dont want a network without any training, as that will cause a problem for deep dreaming return(best_pars)
def run(self): trials = Trials() best = fmin( self._obj, self.model_param_space._build_space(), tpe.suggest, self.max_evals, trials) best_params = space_eval(self.model_param_space._build_space(), best) best_params = self.model_param_space._convert_into_param(best_params) trial_loss = np.asarray(trials.losses(), dtype=float) best_ind = np.argmin(trial_loss) mrr = -trial_loss[best_ind] raw_mrr = trials.trial_attachments(trials.trials[best_ind])["raw_mrr"] raw_hits_at1 = trials.trial_attachments(trials.trials[best_ind])["raw_hits_at1"] raw_hits_at3 = trials.trial_attachments(trials.trials[best_ind])["raw_hits_at3"] raw_hits_at10 = trials.trial_attachments(trials.trials[best_ind])["raw_hits_at10"] hits_at1 = trials.trial_attachments(trials.trials[best_ind])["hits_at1"] hits_at3 = trials.trial_attachments(trials.trials[best_ind])["hits_at3"] hits_at10 = trials.trial_attachments(trials.trials[best_ind])["hits_at10"] self.logger.info("-" * 50) self.logger.info("Best CV Results:") self.logger.info("Raw MRR: %.6f" % raw_mrr) self.logger.info("Filtered MRR: %.6f" % mrr) self.logger.info("Raw: Hits@1 %.3f Hits@3 %.3f Hits@10 %.3f" % ( raw_hits_at1, raw_hits_at3, raw_hits_at10)) self.logger.info("Filtered: Hits@1 %.3f Hits@3 %.3f Hits@10 %.3f" % ( hits_at1, hits_at3, hits_at10)) self.logger.info("Best Param:") self.task._print_param_dict(best_params) self.logger.info("-" * 50)
def run(self): line_index = 1 self.param_space = ModelParamSpace() for task_mode in learner_space.keys(): if task_mode not in learner_space: print('%s model missed' % task_mode) continue print('start %s model task' % task_mode) for learner in learner_space[task_mode]: print('optimizing %s' % learner) self.leaner_name = learner start = time.time() trials = Trials() logname = "%s_%s_%s.log" % ( task_mode, learner, datetime.datetime.now().strftime("%Y-%m-%d-%H-%M")) self.logger = logging_utils._get_logger( config.LOG_DIR, logname) best = fmin(lambda param: self._obj(param, task_mode), self.param_space._build_space(learner), tpe.suggest, self.max_evals, trials) end = time.time() time_cost = time_utils.time_diff(start, end) self.logger.info("Hyperopt_Time") self.logger.info(" %s" % time_cost) self.logger.info("-" * 50) print(" Finished %d hyper train with %d-fold cv, took %s" % (self.max_evals, self.n_iter, time_cost)) best_params = space_eval( self.param_space._build_space(learner), best) best_params = self.param_space._convert_int_param(best_params) trial_loss = np.asarray(trials.losses(), dtype=float) best_ind = np.argmin(trial_loss) auc_cv_mean = -trial_loss[best_ind] test_auc = trials.trial_attachments( trials.trials[best_ind])["test_auc"] refit_time = trials.trial_attachments( trials.trials[best_ind])["refit_time"] with open(config.MODEL_COMPARE, 'a+') as f: if line_index: line_index = 0 f.writelines( "task_mode learner auc_cv_mean test_auc refit_time best_params \n" ) f.writelines("%s %s %.4f %.4f %s %s \n" % (task_mode, learner, auc_cv_mean, test_auc, refit_time, best_params)) f.close()
def run(self): start = time.time() trials = Trials() best = fmin(self._obj, self.model_param_space._build_space(), tpe.suggest, hp_iter, trials) best_params = space_eval(self.model_param_space._build_space(), best) best_params = self.model_param_space._convert_int_param(best_params) trial_rmses = np.asarray(trials.losses(), dtype=float) best_ind = np.argmin(trial_rmses) best_rmse_mean = trial_rmses[best_ind] best_rmse_std = trials.trial_attachments( trials.trials[best_ind])["std"] self.logger.info("-" * 50) self.logger.info("Best RMSE") self.logger.info(" Mean: %.6f" % best_rmse_mean) self.logger.info(" std: %.6f" % best_rmse_std) self.logger.info("Best param") self.task._print_param_dict(best_params) end = time.time() _sec = end - start _min = int(_sec / 60.) self.logger.info("Time") if _min > 0: self.logger.info(" %d mins" % _min) else: self.logger.info(" %d secs" % _sec) self.logger.info("-" * 50)
def run(self): start = time.time() trials = Trials() best = fmin(self._obj, self.model_param_space._build_space(), tpe.suggest, self.max_evals, trials) best_params = space_eval(self.model_param_space._build_space(), best) best_params = self.model_param_space._convert_int_param(best_params) trial_rmses = np.asarray(trials.losses(), dtype=float) best_ind = np.argmin(trial_rmses) best_rmse_mean = trial_rmses[best_ind] best_rmse_std = trials.trial_attachments(trials.trials[best_ind])["std"] self.logger.info("-"*50) self.logger.info("Best RMSE") self.logger.info(" Mean: %.6f"%best_rmse_mean) self.logger.info(" std: %.6f"%best_rmse_std) self.logger.info("Best param") self.task._print_param_dict(best_params) end = time.time() _sec = end - start _min = int(_sec/60.) self.logger.info("Time") if _min > 0: self.logger.info(" %d mins"%_min) else: self.logger.info(" %d secs"%_sec) self.logger.info("-"*50)
def run(self): trials = Trials() best = fmin(self._obj, self.model_param_space._build_space(), tpe.suggest, self.max_evals, trials) best_params = space_eval(self.model_param_space._build_space(), best) best_params = self.model_param_space._convert_into_param(best_params) trial_loss = np.asarray(trials.losses(), dtype=float) best_ind = np.argmin(trial_loss) best_ap = trial_loss[best_ind] best_loss = trials.trial_attachments(trials.trials[best_ind])["loss"] best_acc = trials.trial_attachments(trials.trials[best_ind])["acc"] self.logger.info("-" * 50) self.logger.info("Best Average Precision: %.3f" % best_ap) self.logger.info("with Loss %.3f, Accuracy %.3f" % (best_loss, best_acc)) self.logger.info("Best Param:") self.task._print_param_dict(best_params) self.logger.info("-" * 50)
def make_opt_predict_by_models(specified_models): """ 使用指定的模型预测结果 所有尝试的参数均记录在文件中 :param specified_models: :return:best_kappa_mean, best_kappa_std """ log_path = "%s/Log" % config.output_path if not os.path.exists(log_path): os.makedirs(log_path) models_best_params = [] # 判断传入参数中的models是不是已经配置的models for feat_name in specified_models: if feat_name not in model_library_config.feat_names: continue # param space ,模型内部也需要(打日志头部) feat_folder, param_space = model_library_config.model_config[feat_name] model = create_model(param_space, config.solution_info, feat_folder, feat_name) model.log_header() print("************************************************************") print("Search for the best params") # global trial_counter trials = Trials() objective = lambda p: model.hyperopt_obj(p, feat_folder, feat_name) best_params = fmin(objective, param_space, algo=tpe.suggest, trials=trials, max_evals=param_space["max_evals"]) # 把best_params包含的数字属性转成int for f in model_library_config.int_feat: if best_params.has_key(f): best_params[f] = int(best_params[f]) print("************************************************************") print("Best params") for k, v in best_params.items(): print(" %s: %s" % (k, v)) # 获取尝试的losses trial_kappas = -np.asarray(trials.losses(), dtype=float) best_kappa_mean = max(trial_kappas) # where返回两个维度的坐标 ind = np.where(trial_kappas == best_kappa_mean)[0][0] # 找到最优参数的std best_kappa_std = trials.trial_attachments(trials.trials[ind])['std'] print("Kappa stats") print("Mean: %.6f\n Std: %.6f" % (best_kappa_mean, best_kappa_std)) models_best_params.append((feat_name, best_kappa_mean, best_kappa_std)) return models_best_params
def run(self): trials = Trials() best = fmin(self._obj, self.model_param_space._build_space(), tpe.suggest, max_evals=1, trials=trials) best_params = space_eval(self.model_param_space._build_space(), best) best_params = self.model_param_space._convert_into_param(best_params) trial_loss = np.asarray(trials.losses(), dtype=float) best_ind = np.argmin(trial_loss) best_loss = -trial_loss[best_ind] best_pacc = trials.trial_attachments(trials.trials[best_ind])["pacc"] # best_eacc = trials.trial_attachments(trials.trials[best_ind])["eacc"] self.logger.info("-" * 50) self.logger.info("Best Exact Accuracy %.3f with Parital Accuracy %.3f" % (best_loss, best_pacc)) self.logger.info("Best Param:") self.task._print_param_dict(best_params) self.logger.info("-" * 50)
def TunningParamter(param,data,features,feature,source_name,real_value,int_boolean): data = data[~pd.isnull(all_data[feature])] print data.shape ISOTIMEFORMAT='%Y-%m-%d %X' start = time.strftime(ISOTIMEFORMAT, time.localtime()) trials = Trials() objective = lambda p : trainModel(p, data, features, feature,source_name,real_value,int_boolean) best_parameters = fmin(objective, param, algo =tpe.suggest,max_evals=param['max_evals'],trials= trials) #now we need to get best_param trials_loss = np.asanyarray(trials.losses(),dtype=float) best_loss = min(trials_loss) ind = np.where(trials_loss==best_loss)[0][0] best_loss_std = trials.trial_attachments(trials.trials[ind])['std'] end = time.strftime(ISOTIMEFORMAT,time.localtime()) dumpMessage(best_parameters, best_loss, best_loss_std,param['task'],source_name,start,end)
def run(self): trials = Trials() best = fmin(self._obj, self.model_param_space._build_space(), tpe.suggest, self.max_evals, trials) best_params = space_eval(self.model_param_space._build_space(), best) best_params = self.model_param_space._convert_into_param(best_params) trial_loss = np.asarray(trials.losses(), dtype=float) best_ind = np.argmin(trial_loss) acc = -trial_loss[best_ind] loss = trials.trial_attachments(trials.trials[best_ind])["loss"] self.logger.info("-" * 50) self.logger.info("Best CV Results:") self.logger.info("Loss: %.3f" % loss) self.logger.info("Accuracy: %.3f" % acc) self.logger.info("Best Param:") self.task._print_param_dict(best_params) self.logger.info("-" * 50)
def run(self): start = time.time() trials = Trials() best = min(self._obj, self.model_param_space.build_space(), tpe.suggest, self.max_evals, trials) best_params = space_eval(self.model_param_space.build_space(), best) best_params = self.model_param_space._convert_int_param(best_params) trial_score = np.asarray(trials.losses(), dtype=float) best_ind = np.argmin(trial_score) best_score_mean = trial_score[best_ind] best_score_std = trials.trial_attachments(trials.trials[best_ind])["std"] self.logger.info("-" * 50) self.logger.info("Best Score") self.logger.info(" Mean: %.6f" % -best_score_mean) self.logger.info(" std: %.6f" % best_score_std) self.logger.info("Best param") self.task._print_param_dict(best_params) end = time.time() _sec = end - start _min = int(_sec / 60.) self.logger.info("Time") self.logger.info('-'* 50)
def minimize(self, restarts=2, epochs=600, tune_space=None): from hyperopt import fmin, tpe, Trials if tune_space is None: initial_values = self.tf_session.run(self.variables) tune_space = self._make_tune_space(initial_values) # TODO: This report structure has the downside of not writing # anything to disk until it's 100% complete. reports = [] # Make minimize deterministic R = np.random.RandomState(self.seed) for restarts in range(restarts): trials = Trials() best = fmin(fn=self._evaluate, space=tune_space, algo=tpe.suggest, max_evals=epochs, trials=trials, rstate=R) self._assign_values(best) reports.extend(trials.trial_attachments(t)['report'] for t in trials.trials) return self.evaluator.make_agg_report(reports)
def TunningParamter(param, data, price_data): ISOTIMEFORMAT = '%Y-%m-%d %X' start_time = time.strftime(ISOTIMEFORMAT, time.localtime()) trials = Trials() if param['task'] == 'mean_raw_scroll': objective = lambda p: rawPredict(p, data) elif param['task'] == 'mean_price_scroll': objective = lambda p: pricePredict(p, price_data, data) best_params = fmin(objective, param, algo=tpe.suggest, trials=trials, max_evals=param["max_evals"]) print best_params trial_acc = np.asanyarray(trials.losses(), dtype=float) best_acc_mean = min(trial_acc) ind = np.where(trial_acc == best_acc_mean)[0][0] best_loss_std = trials.trial_attachments(trials.trials[ind])['std'] end_time = time.strftime(ISOTIMEFORMAT, time.localtime()) dumpMessage(best_params, best_acc_mean, best_loss_std, param['task'], start_time, end_time) print("Best stats")
def TunningParamter(param, num, features): ISOTIMEFORMAT = '%Y-%m-%d %X' start = time.strftime(ISOTIMEFORMAT, time.localtime()) trials = Trials() #add feature #label to predict for i in range(1, 2 * num + 1): feature = "Ret_all_%d" % (i) features.append(feature) feature = "Ret_all_std_%d" % (i) features.append(feature) #Predict label print features predict_lable = [] for i in range(1, num + 1): predict_lable.append("Pred_%d" % (i)) objective = lambda p: trainModel(p, train, features, predict_lable) best_parameters = fmin(objective, param, algo=tpe.suggest, max_evals=param['max_evals'], trials=trials) #now we need to get best_param print best_parameters trials_loss = np.asanyarray(trials.losses(), dtype=float) best_loss = min(trials_loss) ind = np.where(trials_loss == best_loss)[0][0] best_loss_std = trials.trial_attachments(trials.trials[ind])['std'] end = time.strftime(ISOTIMEFORMAT, time.localtime()) dumpMessage(best_parameters, best_loss, best_loss_std, param['task'], start, end)
for k, v in trials_lgb.trials[0]['misc']['vals'].items() } hp_assignment = {k: v[0] for k, v in trials_lgb.vals.items()} # 这句和上面那句等价,这句更简洁 space_eval(space_lgb, hp_assignment) trials_lgb.trials[0]['result'] # {'loss': -0.8737864077669903, 'status': 'ok'} # 返回k个最好的模型的参数dict组成的一个list trials_lgb.topk_trials(k=2) # return_score=True就返回2个list组成的tuple trials_lgb.topk_trials(2, return_score=True, ordered=True) type(trials_lgb.topk_trials(2, return_score=True, ordered=True)[0][0]) # 这个类型就是个dict # Trials().trial_attachments的作用是,根据trial的参数字典解析出相应的model路径 trials_lgb.trial_attachments( trials_lgb.topk_trials(2, return_score=True, ordered=True)[0][0])["model"].decode() # %% #返回topk的模型 select_models = lambda trials, k: [ (trials.trial_attachments(t)["model"].decode(), c) for t, c in zip(*trials.topk_trials(k, return_score=True, ordered=True)) ] for sub_model_path, sub_model_score in select_models(trials_lgb, 3): print(-sub_model_score, sub_model_path) best_auc = -trials_lgb.topk_trials(1, return_score=True, ordered=True)[1][0] best_space = trials_lgb.topk_trials(1, return_score=True, ordered=True)[0][0]['misc']['vals'] best_hyperparam = space_eval(
param_space = param_spaces[model_name] trials = Trials() objective = lambda p: hyperopt_wrapper(p, feat_key, model_name, train, loss) best_params = fmin(objective, param_space, algo=tpe.suggest, trials=trials, max_evals=max_evals) for f in int_feat: if best_params.has_key(f): best_params[f] = int(best_params[f]) print("************************************************************") print("Best params") for k,v in best_params.items(): print " %s: %s" % (k,v) trial_losses = -np.asarray(trials.losses(), dtype=float) best_loss_mean = max(trial_losses) ind = np.where(trial_losses == best_loss_mean)[0][0] best_loss_std = trials.trial_attachments(trials.trials[ind])['std'] print("Loss stats") print(" Mean: %.6f\n Std: %.6f" % (best_loss_mean, best_loss_std)) else: print '-------- generating submission -------' test = pd.read_csv(test_file, index_col = False) test_ids = test['ID'] test.drop('ID', axis=1, inplace=True) best_params = loads(dumps(cv_scores.find({'model_name':model_name, 'feat_key':feat_key}).sort([('loss_cv_mean', -1)]).limit(1)))[0] print("Best params") for k,v in best_params.items(): print " %s: %s" % (k,v)
log_handler = open(log_file, 'wb' ) writer = csv.writer( log_handler ) headers = ['trial_counter', 'kappa_mean', 'kappa_std' ] for k,v in sorted(param_space.items()): headers.append(k) writer.writerow( headers ) log_handler.flush() print("************************************************************") print("Search for the best params") #global trial_counter trial_counter = 0 trials = Trials() objective = lambda p: hyperopt_wrapper(p,feat_name) best_params = fmin(objective, param_space, algo=tpe.suggest, trials=trials, max_evals=param_space["max_evals"]) for f in int_feat: if best_params.has_key(f): best_params[f] = int(best_params[f]) print("************************************************************") print("Best params") for k,v in best_params.items(): print " %s: %s" % (k,v) trial_kappas = -np.asarray(trials.losses(), dtype=float) best_kappa_mean = max(trial_kappas) ind = np.where(trial_kappas == best_kappa_mean)[0][0] best_kappa_std = trials.trial_attachments(trials.trials[ind])['std'] print("Kappa stats") print(" Mean: %.6f\n Std: %.6f" % (best_kappa_mean, best_kappa_std))
model_type = sys.argv[1] max_evals = int(sys.argv[2]) else: model_type = 'lgb' max_evals = 2 logger.debug( f'Try to search paras base on model:{model_type}, max_evals:{max_evals}' ) from functools import partial optimize_fun_ex = partial(optimize_fun, model_type=model_type) trials = Trials() space = get_search_space(model_type) best = fmin(optimize_fun_ex, space, algo=tpe.suggest, max_evals=max_evals, trials=trials) #logger.debug(f"Best: {best}") att_message = [ trials.trial_attachments(trial)['message'] for trial in trials.trials ] for score, para, misc in zip( trials.losses(), att_message, [item.get('misc').get('vals') for item in trials.trials]): logger.debug(f'score:{"%9.6f"%score}, para:{para}, misc:{misc}')
return { 'loss': x**2, 'status': STATUS_OK, # -- store other results like this 'eval_time': time.time(), 'other_stuff': { 'type': None, 'value': [0, 1, 2] }, # -- attachments are handled differently 'attachments': { 'time_module': pickle.dumps(time.time) } } trials = Trials() best = fmin(objective, space=hp.uniform('x', -10, 10), algo=tpe.suggest, max_evals=100, trials=trials) print(best) msg = trials.trial_attachments(trials.trials[5])['time_module'] print(msg) time_module = pickle.loads(msg) print(time_module)
for k, v in sorted(param_space.items()): headers.append(k) writer.writerow(headers) log_handler.flush() print("************************************************************") print("Search for the best params") #global trial_counter trial_counter = 0 trials = Trials() objective = lambda p: hyperopt_wrapper(p, feat_folder, feat_name) best_params = fmin(objective, param_space, algo=tpe.suggest, trials=trials, max_evals=param_space["max_evals"]) for f in int_feat: if best_params.has_key(f): best_params[f] = int(best_params[f]) print("************************************************************") print("Best params") for k, v in best_params.items(): print " %s: %s" % (k, v) trial_f1scores = -np.asarray(trials.losses(), dtype=float) best_f1score_mean = max(trial_f1scores) ind = np.where(trial_f1scores == best_f1score_mean)[0][0] best_f1score_std = trials.trial_attachments(trials.trials[ind])['std'] print("F1 Score stats") print(" Mean: %.6f\n Std: %.6f" % (best_f1score_mean, best_f1score_std))
def wikiLearn(): """ 不是特别懂 """ # 1、简单的函数 from hyperopt import fmin, tpe, hp best = fmin(fn=lambda x: x ** 2, space=hp.uniform('x', -10, 10), algo=tpe.suggest, max_evals=100) print best # 2、使用函数+ok状态 from hyperopt import fmin, tpe, hp, STATUS_OK def objective(x): return {'loss': x ** 2, 'status': STATUS_OK } best = fmin(objective, space=hp.uniform('x', -10, 10), algo=tpe.suggest, max_evals=100) print best # 3、使用dict的返回 import pickle import time from hyperopt import fmin, tpe, hp, STATUS_OK, Trials def objective(x): return { 'loss': x ** 2, 'status': STATUS_OK, # -- store other results like this 'eval_time': time.time(), 'other_stuff': {'type': None, 'value': [0, 1, 2]}, # -- attachments are handled differently 'attachments': {'time_module': pickle.dumps(time.time)} } trials = Trials() best = fmin(objective, space=hp.uniform('x', -10, 10), algo=tpe.suggest, max_evals=100, trials=trials) print best print trials.trials print trials.results print trials.losses() print trials.statuses() # 没明白 attachments 是什么意思 msg = trials.trial_attachments(trials.trials[5])['time_module'] time_module = pickle.loads(msg) from hyperopt import hp space = hp.choice('a', [ ('case 1', 1 + hp.lognormal('c1', 0, 1)), ('case 2', hp.uniform('c2', -10, 10)) ]) import hyperopt.pyll.stochastic print hyperopt.pyll.stochastic.sample(space) # hp.choice(label, options) # hp.randint(label, upper) # [0,upper] # hp.uniform(label, low, high) # hp.quniform(label, low, high, q) # round(uniform(low, high) / q) * q # hp.loguniform(label, low, high) # hp.qloguniform(label, low, high, q) # round(exp(uniform(low, high)) / q) * q # hp.normal(label, mu, sigma) # hp.qnormal(label, mu, sigma, q) # round(normal(mu, sigma) / q) * q # hp.lognormal(label, mu, sigma) # hp.qlognormal(label, mu, sigma, q) # round(exp(normal(mu, sigma)) / q) * q # 4、对于sklearn使用 from hyperopt import hp space = hp.choice('classifier_type', [ { 'type': 'naive_bayes', }, { 'type': 'svm', 'C': hp.lognormal('svm_C', 0, 1), 'kernel': hp.choice('svm_kernel', [ {'ktype': 'linear'}, {'ktype': 'RBF', 'width': hp.lognormal('svm_rbf_width', 0, 1)}, ]), }, { 'type': 'dtree', 'criterion': hp.choice('dtree_criterion', ['gini', 'entropy']), 'max_depth': hp.choice('dtree_max_depth', [None, hp.qlognormal('dtree_max_depth_int', 3, 1, 1)]), 'min_samples_split': hp.qlognormal('dtree_min_samples_split', 2, 1, 1), }, ]) # 5、还是没有搞懂 scope.define import hyperopt.pyll from hyperopt.pyll import scope @scope.define def foo(a, b=0): print 'running foo', a, b return a + b / 2 # -- this will print 0, foo is called as usual. print foo(0) # In describing search spaces you can use `foo` as you # would in normal Python. These two calls will not actually call foo, # they just record that foo should be called to evaluate the graph. space1 = scope.foo(hp.uniform('a', 0, 10)) space2 = scope.foo(hp.uniform('a', 0, 10), hp.normal('b', 0, 1)) # -- this will print an pyll.Apply node print space1 # -- this will draw a sample by running foo() print hyperopt.pyll.stochastic.sample(space1)
writer = csv.writer(log_handler) headers = ['trial_counter', 'log_loss_mean', 'log_loss_std', 'spend_time'] for k, v in sorted(param_space.items()): headers.append(k) print(headers) writer.writerow(headers) log_handler.flush() print("************************************************************") print("Search for the best params") # global trial_counter trial_counter = 0 trials = Trials() # lambda在这一步并不会运行,只是定义一个函数而已 objective = lambda p: hyperopt_wrapper(p, feat_folder, feat_name) # objective放到fmin中,会被调用,且传进三个参数 best_params = fmin(objective, param_space, algo=tpe.suggest, trials=trials, max_evals=param_space["max_evals"]) for f in int_feat: if f in best_params: best_params[f] = int(best_params[f]) print("************************************************************") print("Best params") for k, v in best_params.items(): print(" %s: %s" % (k, v)) trial_log_losss = -np.asarray(trials.losses(), dtype=float) best_log_loss_mean = max(trial_log_losss) ind = np.where(trial_log_losss == best_log_loss_mean)[0][0] best_log_loss_std = trials.trial_attachments(trials.trials[ind])['std'] print("log_loss stats") print(" Mean: %.6f\n Std: %.6f" % (best_log_loss_mean, best_log_loss_std))
train = pd.read_csv("../data/train.process.csv") for feat_name,feat_fold in zip(feat_names,feat_folders): #at first we need to read to for our model #this is for reduce time to read data print 'read data for trainning' print 'generate model in condition in %s'%(feat_name) print "Search for the best models" print "fea_name %s"%(feat_name) #for reduce the time for read data #the train.shape[0]=39774 ISOTIMEFORMAT='%Y-%m-%d %X' start_time = time.strftime( ISOTIMEFORMAT, time.localtime() ) param_space = para_spaces[feat_name] trials = Trials() objective = lambda p : trainModel(p, feat_fold, feat_name) best_params = fmin(objective,param_space,algo=tpe.suggest, trials=trials, max_evals=param_space["max_evals"]) print type(best_params) print best_params for f in int_feat: if best_params.has_key(f): best_params[f] = int(best_params[f]) trial_acc = -np.asanyarray(trials.losses(), dtype=float ) best_acc_mean = max(trial_acc) ind = np.where(trial_acc==best_acc_mean)[0][0] best_acc_std = trials.trial_attachments(trials.trials[ind])['std'] end_time = time.strftime( ISOTIMEFORMAT, time.localtime() ) dumpModelMessage(best_params, best_acc_mean, best_acc_std, feat_fold,feat_name,start_time,end_time) print ("Best stats") print ('Mean:%.6f \nStd:%.6f \n'%(best_acc_mean,best_acc_std))
print("************************************************************") print("Search for the best params") # global trial_counter trial_counter = 0 trials = Trials() objective = lambda p: hyperopt_wrapper(p, feat_folder, feat_name) best_params = fmin(objective, param_space, algo=tpe.suggest, trials=trials, max_evals=param_space["max_evals"]) # 把best_params包含的数字属性转成int for f in int_feat: if best_params.has_key(f): best_params[f] = int(best_params[f]) print("************************************************************") print("Best params") for k, v in best_params.items(): print " %s: %s" % (k, v) # 获取尝试的losses trial_kappas = -np.asarray(trials.losses(), dtype=float) best_kappa_mean = max(trial_kappas) # where返回两个维度的坐标 ind = np.where(trial_kappas == best_kappa_mean)[0][0] # 找到最优参数的std best_kappa_std = trials.trial_attachments(trials.trials[ind])['std'] print("Kappa stats") print(" Mean: %.6f\n Std: %.6f" % (best_kappa_mean, best_kappa_std))
trials = Trials() if config.preload: start = time.clock() cv_data, tt_data = get_feat_data(feat_folder) print("loding data time used:", (time.clock() - start)) objective = lambda p: hyperopt_wrapper(p, feat_folder, feat_name, cv_data, tt_data) else: objective = lambda p: hyperopt_wrapper(p, feat_folder, feat_name, None, None) best_params = fmin(objective, param_space, algo=tpe.suggest, trials=trials, max_evals=param_space["max_evals"]) for f in int_feat: if f in best_params: best_params[f] = int(best_params[f]) print("************************************************************") print("Best params") for k, v in best_params.items(): print(" %s: %s" % (k, v)) trial_ginis = -np.asarray(trials.losses(), dtype=float) best_gini_mean = max(trial_ginis) ind = np.where(trial_ginis == best_gini_mean)[0][0] best_gini_std = trials.trial_attachments(trials.trials[ind])['std'] print("Kappa stats") print(" Mean: %.6f\n Std: %.6f" % (best_gini_mean, best_gini_std))
data = [X_all, y_class_tr_all, y_reg_tr_all] # =========================== Search the best params =========================== print("------------------------------------------------------------------------") print("-------- Search the best params for %s --------" % ftmodnm) starttime = time.clock() log_handler = log(ftmodnm) trial_counter = 0 ftmodinfo = [model, data] trials = Trials() objective = lambda p: hyperopt_wrapper(p, ftmodinfo) best_params = fmin(objective, param, algo=tpe.suggest, trials=trials, max_evals=param["max_evals"]) for f in modp.int_feat(): if f in best_params: best_params[f] = int(best_params[f]) elapsed = round((time.clock() - starttime) / 60.0, 2) print("************************************************************") print("Best params for %s in %.2f min" %(ftmodnm, elapsed)) for k, v in best_params.items(): print(" %s: %s" % (k, v)) trial_RMSEs = np.asarray(trials.losses(), dtype=float) best_RMSE_mean = min(trial_RMSEs) ind = np.where(trial_RMSEs == best_RMSE_mean)[0][0] best_RMSE_std = trials.trial_attachments(trials.trials[ind])['std'] print("RMSE stats") print(" Mean: %.6f\n Std: %.6f" % (best_RMSE_mean, best_RMSE_std)) print(" Trial: %s" % str(ind + 1)) print("************************************************************") print()
def evaluation(self, space, out_file=None, model_dir=None): ''' evaluation function for bayesian search ''' import random from hyperopt import tpe from hyperopt import Trials from hyperopt import fmin random.seed(4) if model_dir is None: model_dir = 'best_model.txt' trials = Trials() global ITERATION ITERATION = 0 best = fmin(fn=self.bayesian_objective, space=space, algo=tpe.suggest, trials=trials, max_evals=self.MAX_EVALS, rstate=np.random.RandomState(100)) DEV_metric = [] OOT_metric = [] for i in range(self.MAX_EVALS): DEV_metric.append( trials.trial_attachments(trials.trials[i])['DEV_metric']) OOT_metric.append( trials.trial_attachments(trials.trials[i])['OOT_metric']) metrics_records = pd.DataFrame({ 'Itr': list(range(0, self.MAX_EVALS)), 'DEV_metric': DEV_metric, 'OOT_metric': OOT_metric }) keys = trials.trials[0]['result']['hyperparameters'].keys() hyperparameters_records = pd.DataFrame(index=range(self.MAX_EVALS), columns=keys) for i in range(self.MAX_EVALS): hyperparameters_records.iloc[i, :] = list( trials.trials[i]['result']['hyperparameters'].values()) all_records = pd.concat([metrics_records, hyperparameters_records], axis=1) if out_file: all_records.to_csv(out_file, index=False) best_ind = metrics_records.loc[:, 'OOT_metric'].idxmin() pickle.dump( trials.trial_attachments(trials.trials[best_ind])['model'], open(model_dir, 'wb')) return metrics_records.iloc[best_ind, :], \ trials.trials[best_ind]['result']['hyperparameters'], \ trials.results[best_ind]