def train_light_gbm_regressor(X, y, cv, n_params, test_size=.2, n_jobs=-1): LGBM_params = { 'num_leaves': sp_randint(6, 50), 'min_child_samples': sp_randint(100, 500), 'min_child_weight': [1e-5, 1e-3, 1e-2, 1e-1, 1, 1e1, 1e2, 1e3, 1e4], 'subsample': sp_uniform(loc=0.2, scale=0.8), 'colsample_bytree': sp_uniform(loc=0.4, scale=0.6), 'reg_alpha': [0, 1e-1, 1, 2, 5, 7, 10, 50, 100], 'reg_lambda': [0, 1e-1, 1, 5, 10, 20, 50, 100] } Xt, Xv, yt, yv = train_test_split(X, y, test_size=test_size) param_list = list(ParameterSampler(LGBM_params, n_iter=n_params)) param_scores = [] int_skf = KFold(n_splits=cv) for p in range(n_params): best_scs = [] for train_i, test_i in int_skf.split(Xt, yt): Xt_train, yt_train = Xt[train_i], yt[train_i] Xt_test, yt_test = Xt[test_i], yt[test_i] model = LGBMRegressor(n_jobs=n_jobs, silent=True, n_estimators=5000, **param_list[p]) model.fit(Xt_train, yt_train, eval_set=(Xt_test, yt_test), verbose=False, early_stopping_rounds=300) best_sc = model.best_score_['valid_0']['l2'] best_scs.append(best_sc) param_scores.append(np.mean(best_scs)) bp_ind = np.argmin(param_scores) model = LGBMRegressor(n_jobs=n_jobs, silent=True, n_estimators=5000, **param_list[bp_ind]) model.fit(Xt, yt, eval_set=(Xv, yv), verbose=False, early_stopping_rounds=500) return model
def test_param_sampler(): # test basic properties of param sampler param_distributions = {"kernel": ["rbf", "linear"], "C": uniform(0, 1)} sampler = ParameterSampler(param_distributions=param_distributions, n_iter=10, random_state=0) samples = [x for x in sampler] assert_equal(len(samples), 10) for sample in samples: assert_true(sample["kernel"] in ["rbf", "linear"]) assert_true(0 <= sample["C"] <= 1)
def bm25_parameter_space(n_trials): rng = np.random.RandomState(42) return ParameterSampler( dict(tf_method=["binary", "raw", "freq", "log_norm", "double_norm"] , idf_method=["smooth", "probabilistic"], drop_stopwords=[True, False], drop_suffix=[True, False], drop_punct=[True, False], lowercase=[True, False], k1=uniform(1.2, 2.0), b=uniform(0.5, 0.8), delta=uniform(0, 2)), n_iter=n_trials, random_state=rng)
def uniform_sample_params(template: Dict, n_samples: int): # Sample according to the template for n_samples param_grid = dict() for k, v in template.items(): if type(v) == str: param_grid[k] = eval("dist." + v) else: param_grid[k] = v return ParameterSampler(param_grid, n_iter=n_samples)
def _sample_config(self) -> dict: params = list( ParameterSampler(self._params_space, n_iter=1, random_state=self.random_state))[0] for key in params: if isinstance(params[key], np.float64): # Fix error in FastAI, can't handle np.float64 params[key] = float(params[key]) params.update(self._params_static) return params
def ts_grid_search(df, holidays, param_grid=None, cv_param=None, RandomizedSearch=True, random_state=None): '''网格搜索 时间序列需要特殊的交叉验证 df: holidays: 需要实现调好 ''' df = df.copy() if param_grid is None: param_grid = { 'growth': ['linear'], 'seasonality_prior_scale': np.round(np.logspace(0, 2.2, 10)), 'holidays_prior_scale': np.round(np.logspace(0, 2.2, 10)), 'changepoint_prior_scale': [0.05] #[0.005,0.01,0.02,0.03,0.05,0.008,0.10,0.13,0.16,0.2] , 'interval_width': [0.80] #[0.2,0.4,0.6,0.8] } if RandomizedSearch: param_list = list( ParameterSampler(param_grid, n_iter=10, random_state=random_state)) else: param_list = list(ParameterGrid(param_grid)) if cv_param is None: cv_param = {'horizon': 30, 'period': 120, 'initial': 1095} scores = [] for i, param in enumerate(param_list): print('{}/{}:'.format(i, len(param_list)), param) param.update({'holidays': holidays}) scores_tmp = ts_evaluation(df, param, exp=True, **cv_param) param.pop('holidays') tmp = param.copy() tmp.update({'mape': scores_tmp['mape'], 'rmse': scores_tmp['rmse']}) scores.append(tmp) print('mape : {:.5f}%'.format(100 * scores_tmp['mape'])) scores = pd.DataFrame(scores) best_param_ = scores.loc[scores['mape'].argmin(), :].to_dict() best_scores_ = best_param_['mape'] best_param_.pop('mape') best_param_.pop('rmse') return best_param_, best_scores_, scores
def tuning(mode, n_iter, n_gpu, devices, save_interval, n_blocks, block_id): """ Example: python v12.py tuning --devices 0,1,2,3 --n-gpu 2 """ if n_gpu == -1: n_gpu = len(devices.split(',')) space = [ { 'batch_size': [32], # 'epochs': [1], # 'scaleup_epochs': [0], 'loss': ['arcface'], 'verifythresh': [30], 'freqthresh': [3], 'margin': [0.3, 0.2], 's': [30], # 'augmentation': ['soft'], }, # for reproduce # { # 'batch_size': [32], # 'loss': ['arcface'], # 'verifythresh': [30, 40, 50], # 'freqthresh': [2, 3, 5], # 'margin': [0.3], # 's': [30], # }, # { # 'batch_size': [32], # 'loss': ['arcface'], # 'verifythresh': [30], # 'freqthresh': [3], # 'margin': [0.2], # 's': [30], # }, ] if mode == 'grid': candidate_list = list(ParameterGrid(space)) elif mode == 'random': candidate_list = list(ParameterSampler(space, n_iter, random_state=params['seed'])) else: raise ValueError n_per_block = math.ceil(len(candidate_list) / n_blocks) candidate_chunk = candidate_list[block_id * n_per_block: (block_id + 1) * n_per_block] utils.launch_tuning(mode=mode, n_iter=n_iter, n_gpu=n_gpu, devices=devices, params=params, root=ROOT, save_interval=save_interval, candidate_list=candidate_chunk)
def _get_hyperparameter_configurations(self,n): """ Args: None Returns: n randomly sampled hyperparameter configutations """ np.random.seed(self.seed) return [{'model_name':str(uuid.uuid4().hex),'score':np.nan,'config':config} for config in list(ParameterSampler(self.param_grid, n_iter=n))]
def load_hyperparam_grid(n_iter=50): from sklearn.model_selection import ParameterSampler hyperparam_grid = { "lr": [0.00001, 0.0001, 0.001, 0.01, 0.1, 1.0], "weight_decay": [0.00001, 0.0001, 0.001, 0.01, 0.1], "n_hid": [25, 50, 75, 100], "p_dropout": [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9] } return list( ParameterSampler(hyperparam_grid, n_iter=n_iter, random_state=42))
def __init__(self, algo, calib_type, hp_grid, cv, scoring, n_iter=None): self.cv = cv self.algo = algo self.scoring = scoring self.hp_grid = hp_grid self.best_hp = {} if calib_type == 'GridSearch': self.hp_iterator = ParameterGrid(self.hp_grid) elif calib_type == 'RandomSearch': self.hp_iterator = ParameterSampler(self.hp_grid, n_iter) elif calib_type == 'GeneticAlgorithm': self.hp_iterator = GeneticAlgorithm(self.hp_grid, n_iter)
def select_best_nn_classifier(build_func, params_grid, n_iter, random_state, X_train, y_train, X_val, y_val, early_stopping, epochs=100, shuffle=False, verbose=1): ''' select best neural network model using KerasClassifier and ParameterSampler to generate models with diffrent parameters''' models = [] scoring_list = [] # list of dicts random generated paramters params_list = list(ParameterSampler(params_grid, n_iter, random_state)) for params in params_list: # set parameters for model model = KerasClassifier(build_fn=build_func, validation_data=(X_val, y_val), input_shape=X_train.shape[1:], epochs=epochs, shuffle=shuffle, callbacks=[early_stopping], verbose=0, **params) model_name = f'{model.__class__.__name__}{params}' model.fit(X_train, y_train) score_train = accuracy_score(model.predict(X_train), y_train) score_valid = accuracy_score(model.predict(X_val), y_val) scoring_list.append(score_valid) models.append((model_name, model)) if verbose == 1: print(model_name) print( f'Accuracy score on training set: {score_train.round(4)} | Accuracy score on validation set: {score_valid.round(4)}' ) print('-' * 100) # select best models using scoring list best_model_index = np.argmax(scoring_list) best_model = models[best_model_index] return best_model[0], best_model[1]
def search(command, param_dists, num_samples, sbatch_args, source_bashrc=True, conda_env=None, hparams_save_path="./", max_concurrent_jobs=0, wait=False, random_seed=42): sampler = ParameterSampler(param_dists, n_iter=num_samples, random_state=random_seed) hparams = list(sampler) hparam_args = [] for p in hparams: args_list = [] for k, v in p.items(): if isinstance(v, bool): if v: args_list.append(f"--{k}") else: args_list.append(f"--{k} {v}") hparam_args.append(" ".join(args_list)) hparams_array = "('" + "' '".join(hparam_args) + "')" script = ["#!/bin/bash\n"] + [f"#SBATCH {arg}\n" for arg in sbatch_args] script.append(f"#SBATCH --array=0-{len(hparams)-1}%{max_concurrent_jobs}\n") if wait: script.append("#SBATCH -W") if conda_env or source_bashrc: script.append("source ~/.bashrc\n") if conda_env: script.append(f"conda activate {conda_env}\n") script.append(f"HPARAMS={hparams_array}\n") script.append(f"{command} ${{HPARAMS[$SLURM_ARRAY_TASK_ID]}}\n") with tempfile.NamedTemporaryFile("w") as f: f.writelines(script) f.seek(0) out = subprocess.run( ["sbatch", os.path.join(tempfile.gettempdir(), f.name)], capture_output=True) job_id = out.stdout.decode().replace("Submitted batch job ", "").rstrip("\n") df = pd.DataFrame(hparams) df["version"] = [f"{job_id}_{str(i)}" for i in range(len(df))] df.to_csv(os.path.join(hparams_save_path, f"hparams_{job_id}.csv"), index=False)
def tfidf_parameter_space(n_trials): rng = np.random.RandomState(42) return ParameterSampler(dict( tf_method=["binary", "raw", "freq", "log_norm", "double_norm"], idf_method=["smooth", "probabilistic"], drop_stopwords=[True, False], drop_suffix=[True, False], drop_punct=[True, False], lowercase=[True, False]), n_iter=n_trials, random_state=rng)
def optimize_circuits(k, direction, cerebellum="seg"): print("Assessing k={:02d} circuits".format(k)) act_bin = load_coordinates(cerebellum=cerebellum) lexicon = load_lexicon(["cogneuro"]) dtm_bin = load_doc_term_matrix(version=190325, binarize=True) lexicon = sorted(list(set(lexicon).intersection(dtm_bin.columns))) dtm_bin = dtm_bin[lexicon] lists, circuits = load_domains(k, cerebellum=cerebellum) function_features = pd.DataFrame(index=dtm_bin.index, columns=range(1, k+1)) structure_features = pd.DataFrame(index=act_bin.index, columns=range(1, k+1)) for i in range(1, k+1): functions = lists.loc[lists["CLUSTER"] == i, "TOKEN"] function_features[i] = dtm_bin[functions].sum(axis=1) structures = circuits.loc[circuits["CLUSTER"] == i, "STRUCTURE"] structure_features[i] = act_bin[structures].sum(axis=1) function_features = pd.DataFrame(doc_mean_thres(function_features), index=dtm_bin.index, columns=range(1, k+1)) structure_features = pd.DataFrame(binarize(structure_features), index=act_bin.index, columns=range(1, k+1)) # Load the data splits splits = {} for split in ["train", "validation"]: splits[split] = [int(pmid.strip()) for pmid in open("../../../data/splits/{}.txt".format(split), "r").readlines()] # Specify the hyperparameters for the randomized grid search param_grid = {"penalty": ["l1", "l2"], "C": [0.001, 0.01, 0.1, 1, 10, 100, 1000], "fit_intercept": [True, False]} param_list = list(ParameterSampler(param_grid, n_iter=28, random_state=42)) max_iter = 500 if direction == "forward": file = "fits/forward_k{:02d}_{}.p".format(k, direction) if not os.path.isfile(file): print("-" * 80 + "\nOptimizing forward model\n" + "-" * 80) train_set = [function_features.loc[splits["train"]], structure_features.loc[splits["train"]]] val_set = [function_features.loc[splits["validation"]], structure_features.loc[splits["validation"]]] op_fit = optimize_hyperparameters(param_list, train_set, val_set, max_iter=max_iter) pickle.dump(op_fit, open(file, "wb"), protocol=2) elif direction == "reverse": file = "fits/reverse_k{:02d}_{}.p".format(k, direction) if not os.path.isfile(file): print("-" * 80 + "\nOptimizing reverse model\n" + "-" * 80) train_set = [structure_features.loc[splits["train"]], function_features.loc[splits["train"]]] val_set = [structure_features.loc[splits["validation"]], function_features.loc[splits["validation"]]] op_fit = optimize_hyperparameters(param_list, train_set, val_set, max_iter=max_iter) pickle.dump(op_fit, open(file, "wb"), protocol=2)
def sample_population(estimator_cls, params, X, y, popsize=25, scoring='accuracy'): population = set() p = iter(ParameterSampler(param_distributions=params, n_iter=100)) while len(population) < popsize: try: inner = {attr: BaseParam(param) for attr, param in (next(p)).items()} individual = Estimator(estimator_cls=estimator_cls, inner=inner, X=X, y=y, scoring=scoring) individual.evaluate() population.add(individual) except: print(f"Failed to draft initial {inner}") return population
def draw_random_p0(cfg, N_max_fits): param_grid = { # 'lambda_E': uniform(cfg.lambda_E/10, cfg.lambda_E*5), # 'lambda_I': uniform(cfg.lambda_I/10, cfg.lambda_I*5), "beta": uniform(cfg.beta / 10, cfg.beta * 5), "tau": uniform(-10, 10), } i = 0 while i < N_max_fits: random_p0 = list(ParameterSampler(param_grid, n_iter=1))[0] yield i, random_p0 i += 1
def sample_hyperparameters( model_name: str, data_origin: str, round_to: int = 6) -> List[Dict[str, Union[int, float]]]: """ Sample the hyperparameters for different runs of the same model. The distributions parameters are sampled from are defined in uncertainty_estimation.models.info.PARAM_SEARCH and the number of evaluations per model type in uncertainty_estimation.models.info.NUM_EVALS. Parameters ---------- model_name: str Name of the model. data_origin: str Specify the data set which should be used to specify the hyperparameters to be sampled / default values. round_to: int Decimal that floats should be rounded to. Returns ------- sampled_params: List[Dict[str, Union[int, float]]] List of dictionaries containing hyperparameters and their sampled values. """ sampled_params = list( ParameterSampler( param_distributions={ hyperparam: PARAM_SEARCH[hyperparam] for hyperparam, val in MODEL_PARAMS[model_name] [data_origin].items() # MIMIC is just a default here if hyperparam in PARAM_SEARCH }, n_iter=NUM_EVALS[model_name], )) sampled_params = [ dict( { # Round float values hyperparam: round(val, round_to) if isinstance(val, float) else val for hyperparam, val in params.items() }, **{ # Add hyperparameters that stay fixed hyperparam: val for hyperparam, val in MODEL_PARAMS[model_name] [data_origin].items() # MIMIC is just a default here if hyperparam not in PARAM_SEARCH }, ) for params in sampled_params ] return sampled_params
def random_search(): param_grid = { 'noise_factor_cafe': uniform(3, 1), 'noise_factor_car': uniform(15, 2), 'noise_factor_white': uniform(0.05, 0.02), 'noise_file': [0, 1, 2], 'speed_factor': uniform(0.8, 0.4), } param_list = list(ParameterSampler(param_grid, n_iter=10)) return [ dict((k, round(v, 4) if not isinstance(v, int) else v) for (k, v) in d.items()) for d in param_list ]
def maximize(self, score_optimum=None, realize=True): """ Find the next best hyper-parameter setting to optimizer. Parameters ---------- score_optimum: float An optional score to use inside the EI formula instead of the optimizer's current_best_score realize: bool Whether or not to give a more realistic estimate of the EI (default=True) Returns ------- best_setting: dict The setting with the highest expected improvement best_score: float The highest EI (per second) """ start = time.time() # Select a sample of parameters sampled_params = ParameterSampler(self.param_distributions, self.draw_samples) # Set score optimum if score_optimum is None: score_optimum = self.current_best_score # Determine the best parameters best_setting, best_score = self._maximize_on_sample( sampled_params, score_optimum) if self.local_search: best_setting, best_score = self._local_search( best_setting, best_score, score_optimum, max_steps=self.ls_max_steps) if realize: best_setting, best_score = self._realize(best_setting, best_score, score_optimum) # Store running time running_time = (time.time() - start) / self.simulate_speedup self.maximize_times.append(running_time) return best_setting, best_score
def __init__( self, model_fn: Callable[..., tf.keras.models.Model], param_distributions: Dict[str, Callable], n_iter: int = 10, verbose: int = 0, **kwargs: Any, ) -> None: """RandomSearch for a given parameter distribution. The random search randomly iterates over the param_distributions dictionary, which defines the hyperparameter value range for a key that is a parameter name of the model_fn. For example, if the model_fn has the parameter "num_units" a dictionary could look like this: ``` python def model_fn(num_units: int): pass param_distributions = {"num_units": [10, 20 ,30]} ``` Note: Inside the model_fn it is expected that the model is compiled. The random search is evaluated by: - The validation loss value, if no metrics are passed to model.compile() - The validation score of the last defined metric in model.compile() ``` python model.compile(loss="mse", metrics=["mse", "mae"]) ``` This would sort the grid search combinations based on the validation mae score. Args: model_fn (Callable[..., tf.keras.models.Model]): Function that builds and compiles a tf.keras.models.Model object. param_distributions (Dict[str, Callable]): Dict of str, callable pairs, where the str is the parameter name of the. n_iter (int): Number of random models. Defaults to 10. verbose (int): Whether to show information in the terminal. Defaults to 0. kwargs (Any): Keyword arguments for the model_fn function. """ super().__init__(model_fn=model_fn, verbose=verbose, **kwargs) self.param_distributions = param_distributions self.n_iter = n_iter self.random_sampler = ParameterSampler(self.param_distributions, n_iter=self.n_iter)
def LCB(self, n_sample, gpr=None, Xd=None): gpr = self._get_gpr(gpr) if Xd is None: Xd = self.Xd preds = gpr.predict(Xd, return_std=True) preds = pd.DataFrame({"prediction" : preds[0], "std_dev" : preds[1]}) # n.b. lambda is a keyword so change vector of values to alpha alpha = ParameterSampler({ "alpha" : expon()}, n_iter=n_sample) lcb = pd.DataFrame({"lcb_{}".format(i) : \ preds.prediction - \ (li["alpha"] * preds.std_dev) \ for i, li in enumerate(alpha)}) # TODO: include X in lcb, to look up parameters from selected values return lcb
def _get_padadamps(n_params): powers = [5, 5.5, 6, 6.5, 7] param_space = { "initial_batch_size": [2**i for i in powers], "max_batch_size": [100, 200, 500, 1000, 2000, 5000], "batch_growth_rate": loguniform(1e-3, 1e-1), "dwell": [1, 2, 5, 10, 20, 50, 100, 200, 500, 1000], "weight_decay": [1e-3, 1e-4, 1e-5, 1e-6, 0, 0, 0], } model = PadaDamp(seed=42) params = ParameterSampler(param_space, n_iter=n_params, seed=42) models = [clone(model).set_params(**p) for p in params] return models
def fit(self, frame): """Fit the grid search. Parameters ---------- frame : H2OFrame, shape=(n_samples, n_features) The training frame on which to fit. """ sampled_params = ParameterSampler(self.param_grid, self.n_iter, random_state=self.random_state) return self._fit(frame, sampled_params)
def test_param_sampler(): # test basic properties of param sampler param_distributions = {"kernel": ["rbf", "linear"], "C": uniform(0, 1)} sampler = ParameterSampler(param_distributions=param_distributions, n_iter=10, random_state=0) samples = [x for x in sampler] assert_equal(len(samples), 10) for sample in samples: assert_true(sample["kernel"] in ["rbf", "linear"]) assert_true(0 <= sample["C"] <= 1) # test that repeated calls yield identical parameters param_distributions = {"C": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]} sampler = ParameterSampler(param_distributions=param_distributions, n_iter=3, random_state=0) assert_equal([x for x in sampler], [x for x in sampler]) if sp_version >= (0, 16): param_distributions = {"C": uniform(0, 1)} sampler = ParameterSampler(param_distributions=param_distributions, n_iter=10, random_state=0) assert_equal([x for x in sampler], [x for x in sampler])
def tuning(mode, n_iter, n_gpu, devices, save_interval, n_blocks, block_id): if n_gpu == -1: n_gpu = len(devices.split(',')) space = [ # { # 'loss': ['arcface'], # # 'epochs': [5], # # 'augmentation': ['soft'], # 'verifythresh': [20, 30, 40], # 'freqthresh': [1], # # 'freqthresh': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], # }, # { # 'loss': ['arcface'], # # 'epochs': [5], # # 'augmentation': ['soft'], # 'verifythresh': [20], # 'freqthresh': [2], # # 'freqthresh': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], # }, { 'loss': ['arcface'], 'verifythresh': [30], 'freqthresh': [3], }, ] if mode == 'grid': candidate_list = list(ParameterGrid(space)) elif mode == 'random': candidate_list = list( ParameterSampler(space, n_iter, random_state=params['seed'])) else: raise ValueError n_per_block = math.ceil(len(candidate_list) / n_blocks) candidate_chunk = candidate_list[block_id * n_per_block:(block_id + 1) * n_per_block] utils.launch_tuning(mode=mode, n_iter=n_iter, n_gpu=n_gpu, devices=devices, params=params, root=ROOT, save_interval=save_interval, candidate_list=candidate_chunk)
def iter(self): ''' The __iter__ method that returns an iterator Since it is called at each new call of the iterable in a 'for' statement, it initializes all dynamic elements ''' self.n_iter = 0 self.pop_scores = [] try: self.population = list( ParameterSampler(self.hp_grid, self.init_pop_size) ) # First population is random, we turn it into list to copy it except ValueError: self.population = list(ParameterGrid(self.hp_grid)) self.current_pop = self.population.copy() self.generation = 0 return self
def sample_hyperparameters(random_state, num): space = { 'n_iter': N_ITER, 'batch_size': BATCH_SIZE, 'l2': L2, 'learning_rate': LEARNING_RATES, 'loss': LOSSES, 'embedding_dim': EMBEDDING_DIM, } sampler = ParameterSampler(space, n_iter=num, random_state=random_state) for params in sampler: yield params
def fit_svr(X_train: np.array, y_train: np.array, X_valid: np.array, y_valid: np.array, params_dict: dict = params_dict, n_iter: int = 25): ps = ParameterSampler(n_iter=n_iter, param_distributions=params_dict) scores = np.zeros(n_iter) models = list(repeat(None, n_iter)) for idx, params in enumerate(ps): svr = SVR(**params) svr.fit(X_train, y_train) scores[idx] = r2_score(y_valid, svr.predict(X_valid)) models[idx] = svr return models[np.argmax(scores)]
def Train_Light_GBM(X, y, int_cv=3, regression=True, n_params=10, test_size=.2, n_jobs=1, e_stop_rounds=300, **kwargs): ''' Wrapper function to train a Light GBM regression or classifier model X - Training input y - Training labels int_cv - Number of internal cross validation folds regression - True for regression, False for binary classification n_params - Number of different random hyperparam combinations to explore test_size - Size in (%) of the outer test fold (to use for final validation fit) n_jobs - Number of proc. to use e_stop_rounds - Number of early stop rounds used in checking parameters (double used in final fit) ''' if regression: Base_Model = LGBMRegressor else: Base_Model = LGBMClassifier #Train val split, for final fit X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=test_size) param_scores = [] param_list = list(ParameterSampler(DEFAULT_PARAM_GRID, n_iter=n_params)) int_skf = KFold(n_splits=int_cv) for p in range(n_params): best_scores = [] for train_ind, test_ind in int_skf.split(X_train, y_train): int_X_train, int_y_train = X_train[train_ind], y_train[train_ind] int_X_test, int_y_test = X_train[test_ind], y_train[test_ind] model = Base_Model(n_jobs=n_jobs, silent=True, n_estimators=5000, **param_list[p]) model.fit(int_X_train, int_y_train, eval_set=(int_X_test, int_y_test), verbose=False, early_stopping_rounds=e_stop_rounds) best_score = list(model.best_score_['valid_0'].values())[0] best_scores.append(best_score) param_scores.append(np.mean(best_scores)) bp_ind = np.argmin(param_scores) #Index of best parameters model = Base_Model(n_jobs=n_jobs, silent=True, n_estimators=5000, **param_list[bp_ind]) model.fit(X_train, y_train, eval_set=(X_val, y_val), verbose=False, early_stopping_rounds=int(e_stop_rounds*2)) return model
def knn_classifier(parameter_sampler_random_int): parameters_sample_dict = { "n_neighbors": sp_randint(1, 25), "metric": ["euclidean", "manhattan", "chebyshev", "minkowski"], "weights": ["uniform", "distance"], } p_clf = list( ParameterSampler( parameters_sample_dict, n_iter=1, random_state=parameter_sampler_random_int ) )[0] clf = KNeighborsClassifier(**p_clf) return clf, build_classifier_parameter_dict(clf, p_clf)