def test_integer(): a = Integer(1, 10) for i in range(50): yield (check_limits, a.rvs(random_state=i), 1, 11) random_values = a.rvs(random_state=0, n_samples=10) assert_array_equal(random_values.shape, (10)) assert_array_equal(a.transform(random_values), random_values) assert_array_equal(a.inverse_transform(random_values), random_values)
def test_select_method__auto_mixed_dims(self): cs = Config([Categorical(['one', 'two']), Integer(0, 1)], 'auto', 10) self.assertEqual(cs.selected_method, 'bayesian')
valid_split=cfg.TRAIN.VALID_SPLIT) # Set the device device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") print("Device used: ", device) # Hyperparameter Search if cfg.HYPERSEARCH: # Defined the prameters and search space dim_learning_rate = Real(low=1e-6, high=1e-2, prior='log-uniform', name='learning_rate') dim_num_dense_layers = Integer(low=0, high=2, name='num_dense_layers') dim_dropout = Real(low=0, high=0.9, name='dropout') dim_wd = Real(low=1e-6, high=1e-2, prior='log-uniform', name='Weight_Decay') dimensions = [ dim_learning_rate, dim_num_dense_layers, dim_dropout, dim_wd ] default_parameters = [0.001, 2, 0.2, 0.0005] # Path to save the best model find during the hyperparameter search path_best_model = "best_overall_model.pth"
import pickle as pkl from skopt import BayesSearchCV from sklearn.ensemble import RandomForestRegressor from skopt.space import Real, Categorical, Integer with open("results/db_reg_close", "rb") as f: Xur_c_train, Xur_c_valid, Xr_c_train, Xr_c_valid, Y_c_train, Y_c_valid = pkl.load(f) ur_model = BayesSearchCV( RandomForestRegressor(oob_score=True), { 'max_depth': Integer(10, 30), }, n_iter=32, verbose=1, cv=5 ) ur_model.fit(Xur_c_train, Y_c_train) with open("results/UR_close_rf_1", "wb") as f: pkl.dump(ur_model, f) r_model = BayesSearchCV( RandomForestRegressor(oob_score=True), { 'max_depth': Integer(10, 30), }, n_iter=32, verbose=1, cv=5 ) r_model.fit(Xr_c_train, Y_c_train) with open("results/R_close_rf_1", "wb") as f: pkl.dump(r_model, f)
def sell_indicator_space() -> List[Dimension]: return [ Integer(30, 90, name='sell-adx'), Real(0, 1.0, name='sell-fisher') ]
beta=0.24087176329409027, normalize_similarity=True) # recommender_3 = UserKNNCFRecommender(urm_train) # recommender_3.fit(shrink=2, topK=600, normalize=True) W_sparse_CF = recommender_4.W_sparse recommender_class = CFW_D_Similarity_Linalg parameterSearch = SearchBayesianSkopt(recommender_class, evaluator_validation=evaluator_valid, evaluator_test=evaluator_test) hyperparameters_range_dictionary = {} hyperparameters_range_dictionary["topK"] = Integer(5, 1000) hyperparameters_range_dictionary["add_zeros_quota"] = Real(low=0, high=1, prior='uniform') hyperparameters_range_dictionary["normalize_similarity"] = Categorical( [True, False]) recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[urm_train, icm_asset, W_sparse_CF], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS={}) output_folder_path = "result_experiments/" import os
Y = params['Y'] waveform_out = gen_waveform_from_coords([X, Y]) peak_to_rms = sound_metrics.peak_to_rms(waveform_out) print("""The peak to RMS at X=%.6f Y=%.6f is %.6f""" % (X, Y, peak_to_rms)) return -peak_to_rms fund_freq_range = (50, 3000) n_harmonics_range = [0, 4] detuning_range_hz = [0, 100] harmonics_power_range = [0, 1] bigger_space = [ Real(50, 3000, name='fund_freq'), Integer(0, 4, name='n_harmonics'), Integer(-99, 100, name='detuning_hz'), Real(0, 1, name='harmonics_power') ] @use_named_args(bigger_space) def objective_rms_to_peak_high_dimension(**params): fund_freq = params['fund_freq'] n_harmonics = params['n_harmonics'] detuning_hz = params['detuning_hz'] harmonics_power = params['harmonics_power'] params_1d = get_freqs_powers( np.array([[ fund_freq, n_harmonics, detuning_hz, harmonics_power
def runParameterSearch_Collaborative(recommender_class, URM_train, ICM_train=None, URM_train_last_test=None, metric_to_optimize="PRECISION", evaluator_validation=None, evaluator_test=None, evaluator_validation_earlystopping=None, output_folder_path="result_experiments/", parallelizeKNN=True, n_cases=35, n_random_starts=50, resume_from_saved=False, save_model="best", allow_weighting=True, similarity_type_list=None): # If directory does not exist, create if not os.path.exists(output_folder_path): os.makedirs(output_folder_path) earlystopping_keywargs = { "validation_every_n": 5, "stop_on_validation": True, "evaluator_object": evaluator_validation_earlystopping, "lower_validations_allowed": 5, "validation_metric": metric_to_optimize, } URM_train = URM_train.copy() if URM_train_last_test is not None: URM_train_last_test = URM_train_last_test.copy() try: output_file_name_root = recommender_class.RECOMMENDER_NAME parameterSearch = SearchBayesianSkopt( recommender_class, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test) if recommender_class in [TopPop, GlobalEffects, Random]: """ TopPop, GlobalEffects and Random have no parameters therefore only one evaluation is needed """ parameterSearch = SearchSingleCase( recommender_class, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test) recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS={}) if URM_train_last_test is not None: recommender_input_args_last_test = recommender_input_args.copy( ) recommender_input_args_last_test.CONSTRUCTOR_POSITIONAL_ARGS[ 0] = URM_train_last_test else: recommender_input_args_last_test = None parameterSearch.search( recommender_input_args, recommender_input_args_last_test= recommender_input_args_last_test, fit_hyperparameters_values={}, output_folder_path=output_folder_path, output_file_name_root=output_file_name_root, resume_from_saved=resume_from_saved, save_model=save_model, ) return ########################################################################################################## if recommender_class in [ ItemKNNCFRecommender, UserKNNCFRecommender, ItemKNNCBFRecommender ]: if similarity_type_list is None: similarity_type_list = [ 'cosine', 'jaccard', "asymmetric", "dice", "tversky" ] recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS={}) if URM_train_last_test is not None: recommender_input_args_last_test = recommender_input_args.copy( ) recommender_input_args_last_test.CONSTRUCTOR_POSITIONAL_ARGS[ 0] = URM_train_last_test else: recommender_input_args_last_test = None run_KNNCFRecommender_on_similarity_type_partial = partial( run_KNNRecommender_on_similarity_type, recommender_input_args=recommender_input_args, parameter_search_space={}, parameterSearch=parameterSearch, n_cases=n_cases, n_random_starts=n_random_starts, resume_from_saved=resume_from_saved, save_model=save_model, output_folder_path=output_folder_path, output_file_name_root=output_file_name_root, metric_to_optimize=metric_to_optimize, allow_weighting=allow_weighting, recommender_input_args_last_test= recommender_input_args_last_test) if parallelizeKNN: pool = multiprocessing.Pool( processes=multiprocessing.cpu_count(), maxtasksperchild=1) pool.map(run_KNNCFRecommender_on_similarity_type_partial, similarity_type_list) pool.close() pool.join() else: for similarity_type in similarity_type_list: run_KNNCFRecommender_on_similarity_type_partial( similarity_type) return ########################################################################################################## if recommender_class is P3alphaRecommender: hyperparameters_range_dictionary = {} hyperparameters_range_dictionary["topK"] = Integer(5, 1000) hyperparameters_range_dictionary["alpha"] = Real(low=0, high=2, prior='uniform') hyperparameters_range_dictionary[ "normalize_similarity"] = Categorical([True, False]) recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS={}) ########################################################################################################## if recommender_class is RP3betaRecommender: hyperparameters_range_dictionary = {} hyperparameters_range_dictionary["topK"] = Integer(5, 1000) hyperparameters_range_dictionary["alpha"] = Real(low=0, high=2, prior='uniform') hyperparameters_range_dictionary["beta"] = Real(low=0, high=2, prior='uniform') hyperparameters_range_dictionary[ "normalize_similarity"] = Categorical([True, False]) recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS={}) ########################################################################################################## if recommender_class in [LinearHybrid001, LinearHybrid002]: hyperparameters_range_dictionary = {} hyperparameters_range_dictionary["alpha"] = Real(low=0, high=1, prior='uniform') hyperparameters_range_dictionary["l1_ratio"] = Real( low=0, high=1, prior='uniform') recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train, ICM_train], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS={}) ########################################################################################################## if recommender_class is PipeHybrid001: hyperparameters_range_dictionary = {} hyperparameters_range_dictionary["topK"] = Integer(5, 1000) hyperparameters_range_dictionary["alpha"] = Real(low=0, high=2, prior='uniform') hyperparameters_range_dictionary["beta"] = Real(low=0, high=2, prior='uniform') hyperparameters_range_dictionary[ "normalize_similarity"] = Categorical([True, False]) recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train, ICM_train], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS={}) ########################################################################################################## if recommender_class is MergedHybrid000: hyperparameters_range_dictionary = {} hyperparameters_range_dictionary["alpha"] = Real(low=0, high=1, prior='uniform') hyperparameters_range_dictionary["topK"] = Integer(5, 1000) recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS={}) ########################################################################################################## if recommender_class is MatrixFactorization_FunkSVD_Cython: hyperparameters_range_dictionary = {} hyperparameters_range_dictionary["sgd_mode"] = Categorical( ["adam"]) # Categorical(["sgd", "adagrad", "adam"]) hyperparameters_range_dictionary["epochs"] = Categorical( [400]) # Changed! default = 500 #hyperparameters_range_dictionary["use_bias"] = Categorical([True, False]) #hyperparameters_range_dictionary["batch_size"] = Categorical([1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024]) hyperparameters_range_dictionary["num_factors"] = Integer( 160, 210) # Integer(1, 200) hyperparameters_range_dictionary["item_reg"] = Real( low=1e-5, high=1e-2, prior='log-uniform') hyperparameters_range_dictionary["user_reg"] = Real( low=1e-5, high=1e-2, prior='log-uniform') #hyperparameters_range_dictionary["learning_rate"] = Real(low = 1e-4, high = 1e-1, prior = 'log-uniform') hyperparameters_range_dictionary[ "negative_interactions_quota"] = Real(low=0.0, high=0.5, prior='uniform') recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS=earlystopping_keywargs) ########################################################################################################## if recommender_class is MatrixFactorization_AsySVD_Cython: hyperparameters_range_dictionary = {} hyperparameters_range_dictionary["sgd_mode"] = Categorical( ["sgd", "adagrad", "adam"]) hyperparameters_range_dictionary["epochs"] = Categorical([500]) hyperparameters_range_dictionary["use_bias"] = Categorical( [True, False]) hyperparameters_range_dictionary["batch_size"] = Categorical([1]) hyperparameters_range_dictionary["num_factors"] = Integer(1, 200) hyperparameters_range_dictionary["item_reg"] = Real( low=1e-5, high=1e-2, prior='log-uniform') hyperparameters_range_dictionary["user_reg"] = Real( low=1e-5, high=1e-2, prior='log-uniform') hyperparameters_range_dictionary["learning_rate"] = Real( low=1e-4, high=1e-1, prior='log-uniform') hyperparameters_range_dictionary[ "negative_interactions_quota"] = Real(low=0.0, high=0.5, prior='uniform') recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS=earlystopping_keywargs) ########################################################################################################## if recommender_class is MatrixFactorization_BPR_Cython: hyperparameters_range_dictionary = {} hyperparameters_range_dictionary["sgd_mode"] = Categorical( ["sgd", "adagrad", "adam"]) hyperparameters_range_dictionary["epochs"] = Categorical([1500]) hyperparameters_range_dictionary["num_factors"] = Integer(1, 200) hyperparameters_range_dictionary["batch_size"] = Categorical( [1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024]) hyperparameters_range_dictionary["positive_reg"] = Real( low=1e-5, high=1e-2, prior='log-uniform') hyperparameters_range_dictionary["negative_reg"] = Real( low=1e-5, high=1e-2, prior='log-uniform') hyperparameters_range_dictionary["learning_rate"] = Real( low=1e-4, high=1e-1, prior='log-uniform') recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS={ **earlystopping_keywargs, "positive_threshold_BPR": None }) ########################################################################################################## if recommender_class is IALSRecommender: hyperparameters_range_dictionary = {} hyperparameters_range_dictionary["num_factors"] = Integer(1, 200) hyperparameters_range_dictionary[ "confidence_scaling"] = Categorical(["linear", "log"]) hyperparameters_range_dictionary["alpha"] = Real( low=1e-3, high=50.0, prior='log-uniform') hyperparameters_range_dictionary["epsilon"] = Real( low=1e-3, high=10.0, prior='log-uniform') hyperparameters_range_dictionary["reg"] = Real(low=1e-5, high=1e-2, prior='log-uniform') recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS=earlystopping_keywargs) ########################################################################################################## if recommender_class in [PureSVDRecommender, PureSVDItemRecommender]: hyperparameters_range_dictionary = {} hyperparameters_range_dictionary["num_factors"] = Integer(1, 500) recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS={}) ########################################################################################################## if recommender_class is NMFRecommender: hyperparameters_range_dictionary = {} hyperparameters_range_dictionary["num_factors"] = Integer(1, 350) hyperparameters_range_dictionary["solver"] = Categorical( ["multiplicative_update"]) # , "coordinate_descent"]) hyperparameters_range_dictionary["init_type"] = Categorical( ["random", "nndsvda"]) hyperparameters_range_dictionary["beta_loss"] = Categorical( ["frobenius", "kullback-leibler"]) recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS={}) ######################################################################################################### if recommender_class is SLIM_BPR_Cython: hyperparameters_range_dictionary = {} hyperparameters_range_dictionary["topK"] = Integer(5, 1000) hyperparameters_range_dictionary["epochs"] = Categorical([1500]) hyperparameters_range_dictionary["symmetric"] = Categorical( [True, False]) hyperparameters_range_dictionary["sgd_mode"] = Categorical( ["sgd", "adagrad", "adam"]) hyperparameters_range_dictionary["lambda_i"] = Real( low=1e-5, high=1e-2, prior='log-uniform') hyperparameters_range_dictionary["lambda_j"] = Real( low=1e-5, high=1e-2, prior='log-uniform') hyperparameters_range_dictionary["learning_rate"] = Real( low=1e-4, high=1e-1, prior='log-uniform') recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS={ **earlystopping_keywargs, "positive_threshold_BPR": None, 'train_with_sparse_weights': None }) ########################################################################################################## if recommender_class is SLIMElasticNetRecommender: hyperparameters_range_dictionary = {} hyperparameters_range_dictionary["topK"] = Integer(5, 1000) hyperparameters_range_dictionary["l1_ratio"] = Real( low=1e-5, high=1.0, prior='log-uniform') hyperparameters_range_dictionary["alpha"] = Real(low=1e-3, high=1.0, prior='uniform') recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS={}) ######################################################################################################### if recommender_class is EASE_R_Recommender: hyperparameters_range_dictionary = {} hyperparameters_range_dictionary["topK"] = Categorical( [None]) #Integer(5, 3000) hyperparameters_range_dictionary["normalize_matrix"] = Categorical( [False]) hyperparameters_range_dictionary["l2_norm"] = Real( low=1e0, high=1e7, prior='log-uniform') recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS={}) ######################################################################################################### if URM_train_last_test is not None: recommender_input_args_last_test = recommender_input_args.copy() recommender_input_args_last_test.CONSTRUCTOR_POSITIONAL_ARGS[ 0] = URM_train_last_test else: recommender_input_args_last_test = None ## Final step, after the hyperparameter range has been defined for each type of algorithm parameterSearch.search( recommender_input_args, parameter_search_space=hyperparameters_range_dictionary, n_cases=n_cases, n_random_starts=n_random_starts, resume_from_saved=resume_from_saved, save_model=save_model, output_folder_path=output_folder_path, output_file_name_root=output_file_name_root, metric_to_optimize=metric_to_optimize, recommender_input_args_last_test=recommender_input_args_last_test) except Exception as e: print("On recommender {} Exception {}".format(recommender_class, str(e))) traceback.print_exc() error_file = open(output_folder_path + "ErrorLog.txt", "a") error_file.write("On recommender {} Exception {}\n".format( recommender_class, str(e))) error_file.close()
print('Save predictors and categorical: ') np.save(PICKLEPATH + 'predictors' + VERSION_NUM + '.npy', predictors) np.save(PICKLEPATH + 'categorical' + VERSION_NUM + '.npy', categorical) # Delete test df now to save memory del test_df gc.collect() ''' ####################### # MODELLING # ####################### ''' start_time = time.time() space = [ Integer(3, 10, name='max_depth'), Integer(6, 30, name='num_leaves'), Integer(20, 200, name='min_child_samples'), Real(250, 400, name='scale_pos_weight'), Real(0.2, 0.7, name='subsample'), Real(0.2, 0.7, name='colsample_bytree'), Integer(50, 255, name='max_bin') ] print("Preparing train and val datasets") xgtrain = lgb.Dataset(train_df[predictors].values, label=train_df[target].values, feature_name=predictors, categorical_feature=categorical, free_raw_data=False) del train_df
def get_optimize_params(): space = [ Real(0.8, 1, name='learning_rate'), Integer(2, 5, name='depth'), Real(0, 0.5, name='l2_leaf_reg'), #Integer(16, 48, name='max_leaves'), Real(0, 2, name='random_strength'), ] def get_MAE(arg_list): keys = ['learning_rate', 'depth', 'l2_leaf_reg', 'random_strength'] val_params = {keys[i]: arg_list[i] for i in range(len(keys))} #learning_rate, depth, l2_leaf_reg, num_leaves, random_strength = arg_list """ Melissa.send_message(f'starting val CATBOOST\n so fermo nmezzo alla strada... ovviamente\n' f'{val_params}') """ X, Y = data.dataset('local', 'train', onehot=False) weather_cols = [ 'WEATHER_-4', 'WEATHER_-3', 'WEATHER_-2', 'WEATHER_-1' ] X[weather_cols] = X[weather_cols].fillna('Unknown') weather_cols = [ col for col in X.columns if col.startswith('WEATHER_') ] categorical_cols = [ 'EMERGENCY_LANE', 'ROAD_TYPE', 'EVENT_DETAIL', 'EVENT_TYPE' ] + weather_cols categorical_cols.extend(['WEEK_DAY', 'IS_WEEKEND']) weather_clusters_cols = [ 'WEATHER_-4_CL', 'WEATHER_-3_CL', 'WEATHER_-2_CL', 'WEATHER_-1_CL' ] X[weather_clusters_cols] = X[weather_clusters_cols].fillna( 'Unknown') # build params from default and validation ones params = { 'X': X, 'mode': 'local', 'n_estimators': 10000, 'loss_function': 'MAE', 'eval_metric': 'MAE', 'early_stopping_rounds': 100, 'cat_features': categorical_cols } params.update(val_params) catboost = CatBoost(params) model = MultiOutputRegressor(catboost, n_jobs=-1) model.fit(X, Y) X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, shuffle=False) MAE = inout.evaluate(model, X_test, y_test) iterations = [] for i in range(4): iterations.append(model.estimators_[i].model.best_iteration_) global _best_MAE if MAE < _best_MAE: _best_MAE = MAE Melissa.send_message( f'CATBOOST\n ITERATIONS: {iterations} MAE: {MAE}\nparams:{val_params}\n' ) return MAE return space, get_MAE
def create_params(): param_grid_xgb = { 'classifier__learning_rate': Real(0.1, 1.0, prior='log-uniform'), 'classifier__max_depth': Integer(1, 20), 'classifier__gamma': Real(0.01, 1.0, prior='log-uniform'), 'classifier__subsample': Real(0.1, 1.0, prior='uniform'), } param_grid_lgbm = { 'classifier__num_leaves': Integer(10, 150), 'classifier__learning_rate': Real(0.01, 1), 'classifier__max_depth': Integer(1, 30), 'classifier__feature_fraction': Real(0.1, 1), 'classifier__subsample': Real(0.1, 1) } param_grid_hgb = { 'classifier__learning_rate': Real(0.001, 0.1, prior='log-uniform'), 'classifier__max_leaf_nodes': Integer(2, 60), 'classifier__max_depth': Integer(2, 50), 'classifier__min_samples_leaf': Integer(2, 20), 'classifier__l2_regularization': Real(1, 100) } param_grid_extra = { 'classifier__n_estimators': Integer(100, 500), 'classifier__min_samples_leaf': Integer(1, 10), 'classifier__max_depth': Integer(1, 30), 'classifier__max_features': Integer(2, 10), 'classifier__class_weight': ['balanced'], 'classifier__criterion': ['gini', 'entropy'] } param_grid_log = { 'classifier__penalty': ['l1', 'l2'], 'classifier__C': Real(0.01, 100), 'classifier__class_weight': ['balanced'], 'classifier__solver': ['saga'] } param_grid_rf = { 'classifier__n_estimators': Integer(100, 500), 'classifier__min_samples_leaf': Integer(1, 10), 'classifier__max_depth': Integer(1, 30), 'classifier__max_features': Integer(2, 10), 'classifier__class_weight': ['balanced'], 'classifier__criterion': ['gini', 'entropy'] } param_grid_svm = { 'classifier__C': Real(1000, 100000), 'classifier__gamma': Real(0.000000001, 0.0001, prior='log-uniform'), 'classifier__class_weight': ['balanced'] } param_grid_nn = { 'classifier__activation': ['tanh', 'relu'], 'classifier__solver': ['sgd', 'adam'], 'classifier__alpha': Real(0.01, 10), 'classifier__learning_rate': ['constant', 'adaptive'], } param_grid_knn = {'classifier__n_neighbors': Integer(1, 150)} param_grid_tree = { 'classifier__min_samples_split': Integer(2, 20), 'classifier__max_depth': Integer(1, 20), 'classifier__class_weight': ['balanced'], } param_grid_ada = { 'classifier__n_estimators': Integer(20, 200), 'classifier__learning_rate': Real(0.001, 0.1, prior='log-uniform') } param_grid_cat = { 'classifier__learning_rate': Real(0.001, 0.1, prior='log-uniform'), 'classifier__depth': Integer(1, 6), 'classifier__l2_leaf_reg': Real(1, 100), 'classifier__silent': [True] } PARAMS = { 'XGBOOST': param_grid_xgb, 'LGBM': param_grid_lgbm, 'HGB': param_grid_hgb, 'EXTRA': param_grid_extra, 'LOG': param_grid_log, 'RF': param_grid_rf, 'SVM': param_grid_svm, 'NN': param_grid_nn, 'KNN': param_grid_knn, 'TREE': param_grid_tree, 'ADA': param_grid_ada, 'CAT': param_grid_cat } return PARAMS
def get_default_params_and_name(self, estimator): # print("get default estimator name and parameters") if isinstance(estimator, AdaBoostClassifier): base_estimator = [ RandomForestClassifier(verbose=2, n_estimators=20), ExtraTreesClassifier(verbose=2, n_estimators=20), # GradientBoostingClassifier(verbose=2,n_estimators=20), # XGBClassifier(n_estimators=20), # LGBMClassifier(n_estimators=20) ] params = { "n_estimators": (30, 200), "learning_rate": (1e-6, 1.0, 'log-uniform'), "base_estimator": Categorical(base_estimator), } estimator_name = "adaboost" elif isinstance(estimator, GradientBoostingClassifier): params = { "n_estimators": (300, 1200), "learning_rate": (1e-6, 1.0, 'log-uniform'), "max_depth": (3, 30), "min_samples_leaf": (1, 128), "min_samples_split": (2, 256), "subsample": (0.6, 1.0, 'uniform'), "max_features": (0.6, 1.0, 'uniform'), "min_weight_fraction_leaf": (0.0, 0.5, 'uniform'), "min_impurity_decrease": (1e-6, 1e-1, 'log-uniform'), } estimator_name = "gbm" elif isinstance(estimator, XGBClassifier): params = { "n_estimators": (300, 1200), "max_depth": (3, 30), "min_child_weight": (1e-3, 1e+3, 'log-uniform'), "learning_rate": (1e-6, 1.0, 'log-uniform'), "colsample_bytree": (0.6, 1.0, 'uniform'), "subsample": (0.6, 1.0, 'uniform'), "gamma": (1e-6, 1.0, 'log-uniform'), 'reg_alpha': (1e-3, 1e3, 'log-uniform'), 'reg_lambda': (1e-3, 1e3, 'log-uniform'), "scale_pos_weight": (0.01, 1.0, 'uniform'), } estimator_name = "xgb" elif isinstance(estimator, LGBMClassifier): params = { "n_estimators": (300, 1200), "max_depth": (3, 30), "max_bin": (64, 256), "num_leaves": (30, 256), "min_child_weight": (1e-3, 1e3, 'log-uniform'), "min_child_samples": (8, 256), "min_split_gain": (1e-6, 1.0, 'log-uniform'), "learning_rate": (1e-6, 1.0, 'log-uniform'), "colsample_bytree": (0.6, 1.0, 'uniform'), "subsample": (0.6, 1.0, 'uniform'), 'reg_alpha': (1e-3, 1e3, 'log-uniform'), 'reg_lambda': (1e-3, 1e3, 'log-uniform'), "scale_pos_weight": (0.01, 1.0, 'uniform'), } estimator_name = "lgb" elif isinstance(estimator, CatBoostClassifier): params = { # 'iterations': hyperopt.hp.quniform("iterations", 300, 1200, 10), 'depth': hyperopt.hp.quniform("depth", 3, 12, 1), # 'border_count': hyperopt.hp.quniform("border_count", 16, 224, 4), 'learning_rate': hyperopt.hp.loguniform('learning_rate', 1e-6, 1e-1), 'l2_leaf_reg': hyperopt.hp.qloguniform('l2_leaf_reg', 0, 3, 1), 'bagging_temperature': hyperopt.hp.uniform('bagging_temperature', 0.6, 1.0), 'rsm': hyperopt.hp.uniform('rsm', 0.8, 1.0) } estimator_name = "catboost" elif isinstance(estimator, RandomForestClassifier): params = { "n_estimators": (100, 1000), "criterion": Categorical(["gini", "entropy"]), "max_features": (0.8, 1.0, 'uniform'), "max_depth": (3, 30), "min_samples_split": (2, 256), "min_samples_leaf": (1, 128), "min_weight_fraction_leaf": (0.0, 0.5, 'uniform'), "max_leaf_nodes": (30, 256), "min_impurity_decrease": (1e-6, 1e-1, 'log-uniform'), } estimator_name = "rf" elif isinstance(estimator, ExtraTreesClassifier): params = { "n_estimators": (100, 1000), "criterion": Categorical(["gini", "entropy"]), "max_features": (0.8, 1.0, 'uniform'), "max_depth": (3, 30), "min_samples_split": (2, 256), "min_samples_leaf": (1, 128), "min_weight_fraction_leaf": (0.0, 0.5, 'uniform'), "max_leaf_nodes": (30, 256), "min_impurity_decrease": (1e-6, 1e-1, 'log-uniform'), } estimator_name = "et" elif isinstance(estimator, SVC): params = { "C": Real(1e-6, 1e+6, prior='log-uniform'), "gamma": Real(1e-6, 1e+1, prior='log-uniform'), "degree": Integer(1, 3), "kernel": Categorical(['linear', 'poly', 'rbf']), } estimator_name = "svc" elif isinstance(estimator, LogisticRegression): params = { "C": Real(1e-6, 1e+6, prior='log-uniform'), # "penalty": Categorical(['l1', 'l2']), "solver": Categorical(["newton-cg", "lbfgs", "liblinear", "saga"]), } estimator_name = "lr" elif isinstance(estimator, MLPClassifier): hls = [] for i in [16, 32, 64]: hls.append((i * 2, i * 3)) hls.append((i * 2, i * 3, i * 2)) hls.append((i, i * 2, i * 4, i * 3)) params = { "hidden_layer_sizes": Categorical(hls), "activation": Categorical(["logistic", "tanh", "relu"]), "solver": Categorical(["lbfgs", "sgd", "adam"]), "learning_rate": Categorical(["invscaling", "adaptive"]), "alpha": Categorical([0.00001, 0.0001, 0.001, 0.01]), } estimator_name = "mlp" else: print("wrong base estimator used") return return (estimator_name, params)
def test_integer_distance_out_of_range(): ints = Integer(1, 10) assert_raises_regex(RuntimeError, "compute distance for values within", ints.distance, 11, 10)
def test_integer_distance(): ints = Integer(1, 10) for i in range(1, 10 + 1): assert_equal(ints.distance(4, i), abs(4 - i))
def test_space_consistency(): # Reals (uniform) s1 = Space([Real(0.0, 1.0)]) s2 = Space([Real(0.0, 1.0)]) s3 = Space([Real(0, 1)]) s4 = Space([(0.0, 1.0)]) s5 = Space([(0.0, 1.0, "uniform")]) s6 = Space([(0, 1.0)]) s7 = Space([(np.float64(0.0), 1.0)]) s8 = Space([(0, np.float64(1.0))]) a1 = s1.rvs(n_samples=10, random_state=0) a2 = s2.rvs(n_samples=10, random_state=0) a3 = s3.rvs(n_samples=10, random_state=0) a4 = s4.rvs(n_samples=10, random_state=0) a5 = s5.rvs(n_samples=10, random_state=0) assert_equal(s1, s2) assert_equal(s1, s3) assert_equal(s1, s4) assert_equal(s1, s5) assert_equal(s1, s6) assert_equal(s1, s7) assert_equal(s1, s8) assert_array_equal(a1, a2) assert_array_equal(a1, a3) assert_array_equal(a1, a4) assert_array_equal(a1, a5) # Reals (log-uniform) s1 = Space([Real(10**-3.0, 10**3.0, prior="log-uniform")]) s2 = Space([Real(10**-3.0, 10**3.0, prior="log-uniform")]) s3 = Space([Real(10**-3, 10**3, prior="log-uniform")]) s4 = Space([(10**-3.0, 10**3.0, "log-uniform")]) s5 = Space([(np.float64(10**-3.0), 10**3.0, "log-uniform")]) a1 = s1.rvs(n_samples=10, random_state=0) a2 = s2.rvs(n_samples=10, random_state=0) a3 = s3.rvs(n_samples=10, random_state=0) a4 = s4.rvs(n_samples=10, random_state=0) assert_equal(s1, s2) assert_equal(s1, s3) assert_equal(s1, s4) assert_equal(s1, s5) assert_array_equal(a1, a2) assert_array_equal(a1, a3) assert_array_equal(a1, a4) # Integers s1 = Space([Integer(1, 5)]) s2 = Space([Integer(1.0, 5.0)]) s3 = Space([(1, 5)]) s4 = Space([(np.int64(1.0), 5)]) s5 = Space([(1, np.int64(5.0))]) a1 = s1.rvs(n_samples=10, random_state=0) a2 = s2.rvs(n_samples=10, random_state=0) a3 = s3.rvs(n_samples=10, random_state=0) assert_equal(s1, s2) assert_equal(s1, s3) assert_equal(s1, s4) assert_equal(s1, s5) assert_array_equal(a1, a2) assert_array_equal(a1, a3) # Categoricals s1 = Space([Categorical(["a", "b", "c"])]) s2 = Space([Categorical(["a", "b", "c"])]) s3 = Space([["a", "b", "c"]]) a1 = s1.rvs(n_samples=10, random_state=0) a2 = s2.rvs(n_samples=10, random_state=0) a3 = s3.rvs(n_samples=10, random_state=0) assert_equal(s1, s2) assert_array_equal(a1, a2) assert_equal(s1, s3) assert_array_equal(a1, a3) s1 = Space([(True, False)]) s2 = Space([Categorical([True, False])]) assert s1 == s2
# example of bayesian optimization with scikit-optimize from numpy import mean from sklearn.datasets import make_blobs from sklearn.model_selection import cross_val_score from sklearn.neighbors import KNeighborsClassifier from skopt.space import Integer from skopt.utils import use_named_args from skopt import gp_minimize # generate 2d classification dataset X, y = make_blobs(n_samples=500, centers=3, n_features=2) # define the model model = KNeighborsClassifier() # define the space of hyperparameters to search search_space = [Integer(1, 5, name='n_neighbors'), Integer(1, 2, name='p')] # define the function used to evaluate a given configuration @use_named_args(search_space) def evaluate_model(**params): # something model.set_params(**params) # calculate 5-fold cross validation result = cross_val_score(model, X, y, cv=5, n_jobs=-1, scoring='accuracy') # calculate the mean of the scores estimate = mean(result) return 1.0 - estimate # perform optimization result = gp_minimize(evaluate_model, search_space)
init_type = "random" #init_type = "sobol" #init_type = "latin" ##### # Experiment parameters ##### boston = load_boston() X, y = boston.data, boston.target n_features = X.shape[1] space = [ Real(10**-5, 10**0, name='learning_rate_init'), Real(10**-5, 10**0, name='alpha'), Integer(1, 100, name='hidden_layer_sizes') ] space_gpyopt = [ { "name": "learning_rate_init", "type": "continuous", "domain": (10**-5, 10**0) }, { "name": "alpha", "type": "continuous", "domain": (10**-5, 10**0) }, { "name": "hidden_layer_sizes",
def run_KNNRecommender_on_similarity_type( similarity_type, parameterSearch, parameter_search_space, recommender_input_args, n_cases, n_random_starts, resume_from_saved, save_model, output_folder_path, output_file_name_root, metric_to_optimize, allow_weighting=False, recommender_input_args_last_test=None): original_parameter_search_space = parameter_search_space hyperparameters_range_dictionary = {} hyperparameters_range_dictionary["topK"] = Categorical(range( 5, 900, 5)) #Integer(5, 1000) hyperparameters_range_dictionary["shrink"] = Integer(0, 1000) hyperparameters_range_dictionary["similarity"] = Categorical( [similarity_type]) hyperparameters_range_dictionary["normalize"] = Categorical([True, False]) is_set_similarity = similarity_type in [ "tversky", "dice", "jaccard", "tanimoto" ] if similarity_type == "asymmetric": hyperparameters_range_dictionary["asymmetric_alpha"] = Real( low=0, high=2, prior='uniform') hyperparameters_range_dictionary["normalize"] = Categorical([True]) elif similarity_type == "tversky": hyperparameters_range_dictionary["tversky_alpha"] = Real( low=0, high=2, prior='uniform') hyperparameters_range_dictionary["tversky_beta"] = Real( low=0, high=2, prior='uniform') hyperparameters_range_dictionary["normalize"] = Categorical([True]) elif similarity_type == "euclidean": hyperparameters_range_dictionary["normalize"] = Categorical( [True, False]) hyperparameters_range_dictionary["normalize_avg_row"] = Categorical( [True, False]) hyperparameters_range_dictionary[ "similarity_from_distance_mode"] = Categorical( ["lin", "log", "exp"]) if not is_set_similarity: if allow_weighting: hyperparameters_range_dictionary[ "feature_weighting"] = Categorical(["none", "BM25", "TF-IDF"]) local_parameter_search_space = { **hyperparameters_range_dictionary, **original_parameter_search_space } parameterSearch.search( recommender_input_args, parameter_search_space=local_parameter_search_space, n_cases=n_cases, n_random_starts=n_random_starts, resume_from_saved=resume_from_saved, save_model=save_model, output_folder_path=output_folder_path, output_file_name_root=output_file_name_root + "_" + similarity_type, metric_to_optimize=metric_to_optimize, recommender_input_args_last_test=recommender_input_args_last_test)
_local_variables['docker_client'] = docker_client _local_variables['report_name'] = report_name # Parameter names for appending the best configuration. PARAMETERS = ('gop', 'bitrate', 'ip-period', 'init-qp', 'qpmin', 'qpmax', 'disable-frame-skip', 'diff-qp-ip', 'diff-qp-ib', 'num-ref-frame', 'rc-mode', 'profile', 'cabac', 'dct8x8', 'deblock-filter', 'prefix-nal', 'idr-interval') # All parameters available in qsv-h264 and their ranges # https://github.com/intel/libyami-utils/blob/c64cad218e676cc02b426cb67b660d8eb2567d3b/tests/encodehelp.h # https://github.com/intel/libyami/blob/apache/interface/VideoEncoderDefs.h # https://github.com/intel/libyami-utils/blob/master/doc/yamitranscode.1 SPACE = [ Integer(1, 250, name='gop'), Integer(100, 5000, name='bitrate'), Integer(0, 50, name='ip_period'), # @TODO check range Integer(0, 51, name='init_qp'), Integer(0, 50, name='qpmin'), Integer(0, 51, name='qpmax'), Categorical((0, 1), name='disable_frame_skip'), Integer(0, 51, name='diff_qp_ip'), # @TODO check range Integer(0, 51, name='diff_qp_ib'), # @TODO check range Integer(0, 16, name='num_ref_frame'), Integer(0, 4, name='rc_mode'), Integer(0, 2, name='profile'), Categorical((0, 1), name='cabac'), Categorical((0, 1), name='dct8x8'), Categorical((0, 1), name='deblock_filter'), Categorical((0, 1), name='prefix_nal'),
epochs=200, batch_size=batch_size, verbose=0, class_weight=classWeight, callbacks=[early_stopping, model_checkpoint], validation_split=0.25) Y_predict = model.predict(X_test) fpr, tpr, thresholds = roc_curve(Y_test, Y_predict) roc_auc = auc(fpr, tpr) return roc_auc #best_acc = max(history.history['val_acc']) #return best_acc space = [ Integer(2, 5, name='hidden_layers'), Integer(32, 1024, name='initial_nodes'), Real(10**-6, 10**-3, "log-uniform", name='l2_lambda'), Real(0.0, 0.5, name='dropout'), Integer(256, 4096, name='batch_size'), Real(10**-5, 10**-1, "log-uniform", name='learning_rate'), ] @use_named_args(space) def objective(**X): print("New configuration: {}".format(X)) model = build_custom_model(num_hiddens=X['hidden_layers'], initial_node=X['initial_nodes'], dropout=X['dropout'],
def get_parameters_space(hidden_layers_comb): params = [conf_to_params(conf) for conf in Config.get("bayesianOpt")["hyperparameters"]] max = len(hidden_layers_comb[-1])-1 params.append(Integer(0, max, name="hidden_layer_choice")) return params
import numpy as np from sklearn.datasets import load_boston from sklearn.ensemble import GradientBoostingRegressor from sklearn.model_selection import cross_val_score from skopt.space import Real, Integer from skopt.utils import use_named_args from skopt import gp_minimize boston = load_boston() X, y = boston.data, boston.target n_features = X.shape[1] reg = GradientBoostingRegressor(n_estimators=50, random_state=0) space = [ Integer(1, 5, name='max_depth'), Real(10**-5, 10**0, "log-uniform", name='learning_rate'), Integer(1, n_features, name='max_features'), Integer(2, 100, name='min_samples_split'), Integer(1, 100, name='min_samples_leaf') ] @use_named_args(space) def objective(**params): reg.set_params(**params) return -np.mean( cross_val_score( reg, X, y, cv=5, n_jobs=-1, scoring="neg_mean_absolute_error"))
return s.data.cpu().numpy() def fn_skopt(params): x,y,z=params px=torch.tensor(x,device='cpu',requires_grad=True) py=torch.tensor(y,device='cpu',requires_grad=True) s=torch.tensor(0.5,device='cpu',requires_grad=True) for i in trange(10,leave=False): s=s+0.5*px+py sleep(0.1) return float(s.data.cpu().numpy()) if args.fn=='fn_skopt': res_gp=gp_minimize(func=fn_skopt, dimensions=[Real(-10,10,'uniform',name='x'), Real(-10,10,'uniform',name='y'), Integer(-10,10,name='z')], n_calls=15, random_state=0) print("Best score=%.4f" % res_gp.fun) print('best param',res_gp.x) best=res_gp.fun else: bo=bayesopt(fn_bayes,{'x':[-10,10],'y':[-10,10],'z':[-10,10]}) bo.maximize(init_points=5,n_iter=10,kappa=2) best=bo.res['max'] print(bo.res['all']) print(best)
def runParameterSearch_Collaborative(recommender_class, URM_train, metric_to_optimize="PRECISION", evaluator_validation=None, evaluator_test=None, evaluator_validation_earlystopping=None, output_folder_path="result_experiments/", parallelizeKNN=True, n_cases=35, allow_weighting=True, similarity_type_list=None): # If directory does not exist, create if not os.path.exists(output_folder_path): os.makedirs(output_folder_path) try: output_file_name_root = recommender_class.RECOMMENDER_NAME parameterSearch = SearchBayesianSkopt( recommender_class, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test) if recommender_class in [TopPop, GlobalEffects, Random]: """ TopPop, GlobalEffects and Random have no parameters therefore only one evaluation is needed """ parameterSearch = SearchSingleCase( recommender_class, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test) recommender_parameters = SearchInputRecommenderParameters( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS={}) parameterSearch.search(recommender_parameters, fit_parameters_values={}, output_folder_path=output_folder_path, output_file_name_root=output_file_name_root) return ########################################################################################################## if recommender_class in [ItemKNNCFRecommender, UserKNNCFRecommender]: if similarity_type_list is None: similarity_type_list = [ 'cosine', 'jaccard', "asymmetric", "dice", "tversky" ] recommender_parameters = SearchInputRecommenderParameters( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS={}) run_KNNCFRecommender_on_similarity_type_partial = partial( run_KNNRecommender_on_similarity_type, recommender_parameters=recommender_parameters, parameter_search_space={}, parameterSearch=parameterSearch, n_cases=n_cases, output_folder_path=output_folder_path, output_file_name_root=output_file_name_root, metric_to_optimize=metric_to_optimize, allow_weighting=allow_weighting) if parallelizeKNN: pool = multiprocessing.Pool( processes=multiprocessing.cpu_count(), maxtasksperchild=1) pool.map(run_KNNCFRecommender_on_similarity_type_partial, similarity_type_list) pool.close() pool.join() else: for similarity_type in similarity_type_list: run_KNNCFRecommender_on_similarity_type_partial( similarity_type) return ########################################################################################################## if recommender_class is P3alphaRecommender: hyperparameters_range_dictionary = {} hyperparameters_range_dictionary["topK"] = Integer(5, 800) hyperparameters_range_dictionary["alpha"] = Real(low=0, high=2, prior='uniform') hyperparameters_range_dictionary[ "normalize_similarity"] = Categorical([True, False]) recommender_parameters = SearchInputRecommenderParameters( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS={}) ########################################################################################################## if recommender_class is RP3betaRecommender: hyperparameters_range_dictionary = {} hyperparameters_range_dictionary["topK"] = Integer(5, 800) hyperparameters_range_dictionary["alpha"] = Real(low=0, high=2, prior='uniform') hyperparameters_range_dictionary["beta"] = Real(low=0, high=2, prior='uniform') hyperparameters_range_dictionary[ "normalize_similarity"] = Categorical([True, False]) recommender_parameters = SearchInputRecommenderParameters( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS={}) ########################################################################################################## if recommender_class is PureSVDRecommender: hyperparameters_range_dictionary = {} hyperparameters_range_dictionary["num_factors"] = Integer(1, 250) recommender_parameters = SearchInputRecommenderParameters( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS={}) ########################################################################################################## if recommender_class is SLIMElasticNetRecommender: hyperparameters_range_dictionary = {} hyperparameters_range_dictionary["topK"] = Integer(5, 800) hyperparameters_range_dictionary["l1_ratio"] = Real( low=1e-5, high=1.0, prior='log-uniform') hyperparameters_range_dictionary["alpha"] = Real(low=1e-3, high=1.0, prior='uniform') recommender_parameters = SearchInputRecommenderParameters( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS={}) ######################################################################################################### ## Final step, after the hyperparameter range has been defined for each type of algorithm parameterSearch.search( recommender_parameters, parameter_search_space=hyperparameters_range_dictionary, n_cases=n_cases, output_folder_path=output_folder_path, output_file_name_root=output_file_name_root, metric_to_optimize=metric_to_optimize) except Exception as e: print("On recommender {} Exception {}".format(recommender_class, str(e))) traceback.print_exc() error_file = open(output_folder_path + "ErrorLog.txt", "a") error_file.write("On recommender {} Exception {}\n".format( recommender_class, str(e))) error_file.close()
N_SPLITS = 3 groups = create_fold_groups(X, n_splits=N_SPLITS) kfold = CustomSplitter(n_splits=N_SPLITS) # Create estimator and metrics estimator = RandomForestClassifier(n_jobs=-1, random_state=42, n_estimators=100, class_weight='balanced') scorer = make_scorer(sharpe) # Define parameter search space search_spaces = { 'max_features': Real(.05, .5), 'max_samples': Real(.1, .99), 'min_samples_leaf': Integer(1, 100), } opt = CustomBayesSearch(estimator=estimator, search_spaces=search_spaces, n_iter=1_00, scoring=scorer, cv=kfold) opt.fit(X, y, groups) submitter = PredictionSubmitter(napi=napi, model=opt.best_estimator_, napi_user='******') submitter.submit()
from __future__ import print_function from skopt import gp_minimize from skopt.utils import use_named_args from skopt.space import Integer import numpy as np import scipy.io as sio import BER_calc from datetime import datetime from sklearn.utils import shuffle import tensorflow.compat.v1 as tf from scipy import special tf.disable_v2_behavior() top = 100 dim_delay = Integer(low=1, high=50, name='lr') dim_num_dense_layers11 = Integer(low=1, high=top, name='node_layer11') dim_num_dense_layers12 = Integer(low=1, high=top, name='node_layer12') dim_num_dense_layers13 = Integer(low=1, high=top, name='node_layer13') dim_num_dense_layers21 = Integer(low=1, high=top, name='node_layer21') dim_num_dense_layers22 = Integer(low=1, high=top, name='node_layer22') dim_num_dense_layers23 = Integer(low=1, high=top, name='node_layer23') # dim_batch_size = Integer(low=200, high=200, name='batch_size') dimensions = [ dim_delay, dim_num_dense_layers11, dim_num_dense_layers12, dim_num_dense_layers13, dim_num_dense_layers21, dim_num_dense_layers22,
from skopt.utils import use_named_args from hyperopt import tpe, hp, fmin import time from datetime import datetime as dt SEED = 1 # random_seed N_FOLD = 5 # number of folds in StratifiedKFold MAX_CALLS = 20 # number of iterations for hyperparameter tuning CHUNKS = 100000 CLASS_WEIGHTS_GAL = {6: 1, 16: 1, 53: 1, 65: 1, 92: 1} CLASS_WEIGHTS_EXTRA = {15: 2, 42: 1, 52: 1, 62: 1, 64: 2, 67: 1, 88: 1, 90: 1, 95: 1} TUNING = 'skopt' # 'skopt' or 'hyperopt' # Tuning LightGBM parameters space_skopt = [Integer(4, 7, name='max_depth'), Real(low=1e-3, high=1e-1, prior="log-uniform", name='learning_rate'), Integer(low=100, high=800, name='n_estimators')] space_hyperopt = {'max_depth': hp.choice('max_depth', range(4, 8, 1)), 'learning_rate': hp.loguniform('learning_rate', np.log(1e-3), np.log(1e-1)), 'n_estimators': hp.choice('n_estimators', range(100, 801, 1))} def multi_weighted_logloss(y_true, y_preds): """ Multi logloss for PLAsTiCC challenge """ if len(np.unique(y_true)) == 5: class_weights = CLASS_WEIGHTS_GAL elif len(np.unique(y_true)) == 9: class_weights = CLASS_WEIGHTS_EXTRA
# Here we define a function that we evaluate. def objective(params): clf = DecisionTreeClassifier(**{ dim.name: val for dim, val in zip(SPACE, params) if dim.name != 'dummy' }) return -np.mean(cross_val_score(clf, *load_breast_cancer(True))) ############################################################################# # Bayesian optimization # ===================== SPACE = [ Integer(1, 20, name='max_depth'), Integer(2, 100, name='min_samples_split'), Integer(5, 30, name='min_samples_leaf'), Integer(1, 30, name='max_features'), Categorical(list('abc'), name='dummy'), Categorical(['gini', 'entropy'], name='criterion'), Categorical(list('def'), name='dummy'), ] result = gp_minimize(objective, SPACE, n_calls=20) ############################################################################# # Partial dependence plot # ======================= # # Here we see an example of using partial dependence. Even when setting
hpo_params = { 'n_calls': 100, 'n_random_starts': 10, 'base_estimator': 'ET', 'acq_func': 'EI', 'xi': 0.02, 'kappa': 1.96, 'n_points': 10000, } rf_space = [ Categorical([10, 100, 500], name='n_estimators'), Categorical(['auto', 'log2'], name='max_features'), Categorical([2, 5, 10, 20, None], name='max_depth'), Real(0.0001, 1, name='min_samples_split'), Integer(1, 5, name='min_samples_leaf'), Categorical([None, 50, 100, 150, 200], name='max_leaf_nodes') #Integer(1, 37, name = 'max_features') ] ada_space = [ #Integer(200, 500, name = 'n_estimators'), Real(0.01, 1, prior="log-uniform", name='learning_rate') ] gbc_space = [ Real(0.01, 1, prior="log-uniform", name='learning_rate'), Integer(200, 500, name='n_estimators'), Integer(1, 10, name='max_depth'), Real(0.1, 1, name='min_samples_split'), Real(0.1, 0.5, name='min_samples_leaf'), Integer(1, 10, name='max_features')
with open( "/home/norberteke/PycharmProjects/Thesis/data/GH_recent_full_activity_corpus.txt", 'w') as f: for text in texts: f.write(str(text) + "\n") corpus = [dictionary.doc2bow(text) for text in texts] model = LdaTransformer(id2word=dictionary, alpha='auto', iterations=100, random_state=2019) # The list of hyper-parameters we want to optimize. For each one we define the bounds, # the corresponding scikit-learn parameter name space = [Integer(20, 500, name='num_topics'), Real(0.001, 200, name='eta')] # this decorator allows your objective function to receive a the parameters as keyword arguments. # This is particularly convenient when you want to set scikit-learn estimator parameters @use_named_args(space) def objective(**params): model.set_params(**params) lda = model.fit(corpus) coherence = evaluateModel(lda.gensim_model) try: cm = CoherenceModel(model=lda.gensim_model, corpus=corpus, dictionary=dictionary, coherence='u_mass')
for i in range(1, 10 + 1): assert_equal(reals.distance(4.1234, i), abs(4.1234 - i)) @pytest.mark.parametrize("dimension, bounds", [(Real, (2, 1)), (Integer, (2, 1)), (Real, (2, 2)), (Integer, (2, 2))]) def test_dimension_bounds(dimension, bounds): with pytest.raises(ValueError) as exc: dim = dimension(*bounds) assert "has to be less than the upper bound " in exc.value.args[0] @pytest.mark.parametrize( "dimension, name", [(Real(1, 2, name="learning rate"), "learning rate"), (Integer(1, 100, name="no of trees"), "no of trees"), (Categorical(["red, blue"], name="colors"), "colors")]) def test_dimension_name(dimension, name): assert dimension.name == name @pytest.mark.parametrize( "dimension", [Real(1, 2), Integer(1, 100), Categorical(["red, blue"])]) def test_dimension_name_none(dimension): assert dimension.name is None def test_dimension_name(): notnames = [1, 1., True]