def get_param_space(): # param_space = {} # param_types = {} # param_space['svc'] = {'C': expon(scale=100), 'gamma': expon(scale=0.1), 'probability': [True], 'kernel': ['linear']} # param_types['svc'] = {'C': 'real', 'gamma': 'real', 'probability': 'int', 'kernel': 'categorical'} # param_space['rfc'] = {'n_estimators': randint(50, 600), 'max_features': [1, 2]} # param_types['rfc'] = {'n_estimators': 'int', 'max_features': 'int'} param_space = {} param_space['sgd'] = {'solver': ['sgd'], 'primary_metric': [np.random.choice(['ce', 'accuracy'])], 'lr': randint(-6, -1), 'L2': randint(-8, -1), 'num_epochs': randint(2, 5), 'batch_size': randint(4, 8)} param_space['adam'] = {'solver': ['adam'], 'primary_metric': [np.random.choice(['ce', 'accuracy'])], 'lr': randint(-6, -1), 'L2': randint(-8, -1), 'num_epochs': randint(2, 5), 'batch_size': randint(4, 8)} ''' clfs maps string-names to a cloneable clf instance. ''' # clfs = {'svc': SVC(), 'rfc': RFC()} return param_space
def __init__(self, low, high): """Search space dimension that can take on integer values. Parameters ---------- * `low` [float]: Lower bound (inclusive). * `high` [float]: Upper bound (inclusive). """ self._low = low self._high = high self._rvs = randint(self._low, self._high + 1) self.transformer = _Identity()
def get_param_space(): param_space = {} param_types = {} param_space["svc"] = {"C": expon(scale=100), "gamma": expon(scale=0.1), "probability": [True], "kernel": ["linear"]} param_types["svc"] = {"C": "real", "gamma": "real", "probability": "int", "kernel": "categorical"} param_space["rfc"] = {"n_estimators": randint(50, 600), "max_features": [1, 2]} param_types["rfc"] = {"n_estimators": "int", "max_features": "int"} """ clfs maps string-names to a cloneable clf instance. """ clfs = {"svc": SVC(), "rfc": RFC()} return (clfs, param_space, param_types)
def get_param_space(): param_space = {} param_types = {} param_space['svc'] = {'C': expon(scale=100), 'gamma': expon(scale=0.1), 'probability': [True], 'kernel': ['linear']} param_types['svc'] = {'C': 'real', 'gamma': 'real', 'probability': 'int', 'kernel': 'categorical'} param_space['rfc'] = {'n_estimators': randint(50, 600), 'max_features': [1, 2]} param_types['rfc'] = {'n_estimators': 'int', 'max_features': 'int'} ''' clfs maps string-names to a cloneable clf instance. ''' clfs = {'svc': SVC(), 'rfc': RFC()} return (clfs, param_space, param_types)
def __init__(self, low, high, transform=None, name=None): """Search space dimension that can take on integer values. Parameters ---------- * `low` [int]: Lower bound (inclusive). * `high` [int]: Upper bound (inclusive). * `transform` ["identity", "normalize", optional]: The following transformations are supported. - "identity", (default) the transformed space is the same as the original space. - "normalize", the transformed space is scaled to be between 0 and 1. * `name` [str or None]: Name associated with dimension, e.g., "number of trees". """ if high <= low: raise ValueError("the lower bound {} has to be less than the" " upper bound {}".format(low, high)) self.low = low self.high = high self.name = name if transform is None: transform = "identity" self.transform_ = transform if transform not in ["normalize", "identity"]: raise ValueError("transform should be 'normalize' or 'identity'" " got {}".format(self.transform_)) if transform == "normalize": self._rvs = uniform(0, 1) self.transformer = Normalize(low, high, is_int=True) else: self._rvs = randint(self.low, self.high + 1) self.transformer = Identity()
def set_transformer(self, transform="identitiy"): """Define _rvs and transformer spaces. Parameters ---------- transform : str Can be 'normalize' or 'identity' """ self.transform_ = transform if transform not in ["normalize", "identity"]: raise ValueError("transform should be 'normalize' or 'identity'" " got {}".format(self.transform_)) if self.transform_ == "normalize": self._rvs = _uniform_inclusive(0.0, 1.0) if self.prior == "uniform": self.transformer = Pipeline( [Identity(), Normalize(self.low, self.high, is_int=True)]) else: self.transformer = Pipeline([ LogN(self.base), Normalize( np.log10(self.low) / self.log_base, np.log10(self.high) / self.log_base, ), ]) else: if self.prior == "uniform": self._rvs = randint(self.low, self.high + 1) self.transformer = Identity() else: self._rvs = _uniform_inclusive( np.log10(self.low) / self.log_base, np.log10(self.high) / self.log_base - np.log10(self.low) / self.log_base, ) self.transformer = LogN(self.base)
def __init__(self, low, high, transform=None): """Search space dimension that can take on integer values. Parameters ---------- * `low` [int]: Lower bound (inclusive). * `high` [int]: Upper bound (inclusive). * `transform` ["identity", "normalize", optional]: The following transformations are supported. - "identity", (default) the transformed space is the same as the original space. - "normalize", the transformed space is scaled to be between 0 and 1. """ self.low = low self.high = high if transform is None: transform = "identity" self.transform_ = transform if transform not in ["normalize", "identity"]: raise ValueError( "transform should be 'normalize' or 'identity' got %s" % self.transform_) if transform == "normalize": self._rvs = uniform(0, 1) self.transformer = Normalize(low, high, is_int=True) else: self._rvs = randint(self.low, self.high + 1) self.transformer = Identity()
def __init__(self, low, high, transform=None): """Search space dimension that can take on integer values. Parameters ---------- * `low` [int]: Lower bound (inclusive). * `high` [int]: Upper bound (inclusive). * `transform` ["identity", "normalize", optional]: The following transformations are supported. - "identity", (default) the transformed space is the same as the original space. - "normalize", the transformed space is scaled to be between 0 and 1. """ self.low = low self.high = high if transform is None: transform = "identity" self.transform_ = transform if transform not in ["normalize", "identity"]: raise ValueError("transform should be 'normalize' or 'identity'" " got {}".format(self.transform_)) if transform == "normalize": self._rvs = uniform(0, 1) self.transformer = Normalize(low, high, is_int=True) else: self._rvs = randint(self.low, self.high + 1) self.transformer = Identity()
def __init__(self, low, high, prior="uniform", base=10, transform=None, name=None, dtype=np.int64): if high <= low: raise ValueError("the lower bound {} has to be less than the" " upper bound {}".format(low, high)) self.low = low self.high = high self.prior = prior self.base = base self.log_base = np.log10(base) self.name = name self.dtype = dtype if isinstance(self.dtype, str) and self.dtype\ not in ['int', 'int8', 'int16', 'int32', 'int64', 'uint8', 'uint16', 'uint32', 'uint64']: raise ValueError("dtype must be 'int', 'int8', 'int16'," "'int32', 'int64', 'uint8'," "'uint16', 'uint32', or" "'uint64', but got {}".format(self.dtype)) elif isinstance(self.dtype, type) and self.dtype\ not in [int, np.int8, np.int16, np.int32, np.int64, np.uint8, np.uint16, np.uint32, np.uint64]: raise ValueError("dtype must be 'int', 'np.int8', 'np.int16'," "'np.int32', 'np.int64', 'np.uint8'," "'np.uint16', 'np.uint32', or" "'np.uint64', but got {}".format(self.dtype)) if transform is None: transform = "identity" self.transform_ = transform if transform not in ["normalize", "identity"]: raise ValueError("transform should be 'normalize' or 'identity'" " got {}".format(self.transform_)) if self.transform_ == "normalize": self._rvs = _uniform_inclusive(0.0, 1.0) if self.prior == "uniform": self.transformer = Pipeline( [Identity(), Normalize(low, high, is_int=True)]) else: self.transformer = Pipeline([ LogN(self.base), Normalize( np.log10(low) / self.log_base, np.log10(high) / self.log_base) ]) else: if self.prior == "uniform": self._rvs = randint(self.low, self.high + 1) self.transformer = Identity() else: self._rvs = _uniform_inclusive( np.log10(self.low) / self.log_base, np.log10(self.high) / self.log_base - np.log10(self.low) / self.log_base) self.transformer = LogN(self.base)
# exit() gini_scorer = make_scorer(gini_sklearn, greater_is_better=True, needs_proba=True) rsc = RandomizedSearchCV( estimator=model, param_distributions={ #'n_estimators': randint(25, 250), #'subsample': uniform(0.5, 0.5), #'subsample_freq': randint(2, 25), #'colsample_bytree': uniform(0.5, 0.5), #'learning_rate': uniform(0.0, 0.1), #'min_child_samples': randint(5, 500), 'num_leaves': randint(5, 200), }, scoring=gini_scorer, cv=StratifiedShuffleSplit(n_splits=5, test_size=0.2), verbose=2, n_iter=5) grid_result = rsc.fit(X_partial, Y_partial) print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_)) for test_mean, train_mean, param in zip( grid_result.cv_results_['mean_test_score'], grid_result.cv_results_['mean_train_score'], grid_result.cv_results_['params']): print("Train: %f // Test : %f with: %r" % (train_mean, test_mean, param))
} booster_params = { 'eta': 0.1, # default=0.3 'gamma': 0., # default=0.; larger => more conservative 'max_depth': 6, # default=6 'min_child_weight': 1, # default=1; larger => more conservative 'subsample': 1., # default=1.; proportion of points to sample each round 'lambda': 1, # default=1, L2 regularization 'alpha': 0, # default=0, L1 regularization } # Parameter space to search over param_dist = { 'eta': [0.1], 'gamma': expon(), 'max_depth': randint(3, 10), 'min_child_weight': randint(1, 10), 'subsample': uniform(0.5, 0.5), 'lambda': expon(), 'alpha': expon() } sampler = ParameterSampler(param_dist, n_iter=32, random_state=1) # Perform the search best_score = np.Inf best_params = {**general_params, **booster_params} # Repeatedly sample parameters from the above distributions print('Testing hyperparameters...') for point in tqdm(sampler): current_params = best_params.copy()
def get_param_space(): ''' define parameter space. used by driver.py ''' return {'sleep': randint(1, 5)}
#'max_features': ('auto', 'sqrt'), #'max_features': range(3, 9), #'max_depth': range(3, 7), }, #scoring='neg_log_loss', scoring='roc_auc', #scoring='f1', #scoring=gini_scorer, cv=cv, verbose=2 ) rsc = RandomizedSearchCV( estimator=rf, param_distributions={ 'n_estimators': randint(250, 2500), #'class_weight': [{0: 1, 1: x} for x in range(15, 51, 5)], #'criterion': ('gini', 'entropy'), #'min_samples_leaf': randint(15, 50), #'min_samples_split': randint(15, 50), #'max_features': ('auto', 'sqrt') + range(5,50), #'max_features': range(3, 9), #'max_depth': randint(2, 6), }, #scoring=gini_scorer, scoring='roc_auc', cv=cv, verbose=2, n_iter=7 )
from SALib.analyze import sobol parser = ArgumentParser() parser.description = "Draw samples using the Saltelli methods" parser.add_argument( "-s", "--n_samples", dest="n_samples", type=int, help="""number of samples to draw. default=10.""", default=10 ) parser.add_argument("OUTFILE", nargs=1, help="Ouput file (CSV)", default="saltelli_samples.csv") options = parser.parse_args() n_samples = options.n_samples outfile = options.OUTFILE[-1] distributions = OrderedDict() distributions["m_min"] = uniform(loc=-1.5, scale=0.5) distributions["m_max"] = uniform(loc=4.0, scale=1.0) distributions["h_min"] = randint(50, 150) distributions["h_ela"] = randint(1500, 1800) distributions["h_max"] = randint(2500, 3000) # Names of all the variables keys = distributions.keys() print(distributions) # Generate the Sobol sequence samples with uniform distributions problem = {"num_vars": len(keys), "names": keys, "bounds": [[0, 1]] * len(keys)} # Generate samples unif_sample = saltelli.sample(problem, n_samples, calc_second_order=False) # To hold the transformed variables
#!/usr/bin/env python import numpy as np import pandas as pd from pyDOE import lhs from scipy.stats.distributions import truncnorm, gamma, uniform, randint import pylab as plt # The number of allowable model runs n_samples = 500 # scipy.stats.distributions objects for each distribution, per Table 1 in the paper. Note that for truncated normal, the bounds are relative to the mean in units of scale, so if we want a positive distribution for a normal with mean 8 and sigma 4, then the lower bound is -8/4=-2 distributions = { "GCM": randint(0, 4), "FICE": truncnorm(-4 / 4.0, 4.0 / 4, loc=8, scale=4), "FSNOW": truncnorm(-4.1 / 3, 4.1 / 3, loc=4.1, scale=1.5), "PRS": uniform(loc=5, scale=2), "RFR": truncnorm(-0.4 / 0.3, 0.4 / 0.3, loc=0.5, scale=0.2), "OCM": randint(-1, 2), "OCS": randint(-1, 2), "TCT": randint(-1, 2), "VCM": truncnorm(-0.35 / 0.2, 0.35 / 0.2, loc=1, scale=0.2), "PPQ": truncnorm(-0.35 / 0.2, 0.35 / 0.2, loc=0.6, scale=0.2), "SIAE": gamma(1.5, scale=0.8, loc=1), } # Names of all the variables keys = ["GCM", "FICE", "FSNOW", "PRS", "RFR", "OCM", "OCS", "TCT", "VCM", "PPQ", "SIAE"] # Generate the latin hypercube samples with uniform distributions unif_sample = lhs(len(keys), n_samples)
def __init__(self, low, high, prior="uniform", base=10, transform=None, name=None): """Search space dimension that can take on integer values. Parameters ---------- * `low` [int]: Lower bound (inclusive). * `high` [int]: Upper bound (inclusive). * `prior` ["uniform" or "log-uniform", default="uniform"]: Distribution to use when sampling random intgers for this dimension. - If `"uniform"`, intgers are sampled uniformly between the lower and upper bounds. - If `"log-uniform"`, intgers are sampled uniformly between `log(lower, base)` and `log(upper, base)` where log has base `base`. * `base` [int]: The logarithmic base to use for a log-uniform prior. - Default 10, otherwise commonly 2. * `transform` ["identity", "normalize", optional]: The following transformations are supported. - "identity", (default) the transformed space is the same as the original space. - "normalize", the transformed space is scaled to be between 0 and 1. * `name` [str or None]: Name associated with dimension, e.g., "number of trees". """ if high <= low: raise ValueError("the lower bound {} has to be less than the" " upper bound {}".format(low, high)) self.low = low self.high = high self.prior = prior self.base = base self.log_base = np.log10(base) self.name = name if transform is None: transform = "identity" self.transform_ = transform if transform not in ["normalize", "identity"]: raise ValueError("transform should be 'normalize' or 'identity'" " got {}".format(self.transform_)) if self.transform_ == "normalize": self._rvs = _uniform_inclusive(0.0, 1.0) if self.prior == "uniform": self.transformer = Pipeline([Identity(), Normalize(low, high)]) else: self.transformer = Pipeline([ LogN(self.base), Normalize( np.log10(low) / self.log_base, np.log10(high) / self.log_base) ]) else: if self.prior == "uniform": self._rvs = randint(self.low, self.high + 1) self.transformer = Identity() else: self._rvs = _uniform_inclusive( np.log10(self.low) / self.log_base, np.log10(self.high) / self.log_base - np.log10(self.low) / self.log_base) self.transformer = LogN(self.base)
"sia_e": 1.25, "ssa_n": 3.0, "gamma_T": 0.0001338671875, "thickness_calving_threshold": "thickness_calving_threshold_300_500_1998-01-01_2000-01-01.nc", "fractures": "true", "fracture_gamma": 0.4697265625, "fracture_gamma_h": 0.0, "fracture_softening": 0.9969726562500001, "fracture_initiation_threshold": 127548.828125, "healing_threshold": 4.3249023437500005e-10, }, }, "ocean": { "uq": { "frontal_melt_file": randint(0, 10), }, "default_values": { "climate": "given", "hydrology": "routing", "frontal_melt": "discharge_routing", "climate_file": "DMI-HIRHAM5_ERA_1980_2020_EPSG3413_4500M_DM.nc", "runoff_file": "DMI-HIRHAM5_ERA_1980_2020_EPSG3413_4500M_DM.nc", "salinity": "", "pseudo_plastic_q": 0.6, "sia_e": 1.25, "ssa_n": 3.0, "gamma_T": 1.5e-4, "thickness_calving_threshold": 300, "fractures": "true", "fracture_gamma": 0.4697265625,
y.head() # Podział na zbiór treningowy i testowy (niestosowany do nauki modelu, a do jego testowania) stanowiący 15 % obserwacji X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=12345) print(f"Rozmiar zbioru treningowego: {X_train.shape}") print(f"Rozmiar zbioru testowego: {X_test.shape}") # Siatka parametrów dla estymatora LightGBM użyta podczas poszukiwania hyperparametrów random_search_grid = { "learning_rate": uniform(0.0001, 0.1), "n_estimators": randint(150, 350), "min_data_in_leaf": randint(10, 50), "max_bin": randint(40, 200), "bagging_fraction": uniform(0.2, 0.5), "bagging_freq": randint(4, 10), "boosting_type": ["dart", "gbdt"] } # Przygotowanie obiektów estymatora oraz metody do szukania hyperparametrów lgbm_model = LGBMRegressor(n_jobs=-1, n_iter=200) random_search = RandomizedSearchCV(estimator=lgbm_model, param_distributions=random_search_grid, n_iter=50, cv=4, verbose=50, scoring="neg_root_mean_squared_error")
# soso we need random parametersets from sklearn.model_selection import ParameterSampler from scipy.stats.distributions import uniform, randint import numpy as np np.random.seed() #uniform(loc=4,scale=2) # default is something something # not sure what this does... #rounded_list = [dict((k, round(v, 6)) for (k, v) in d.items()) # for d in param_list] defaultarg = {} defaultarg['imp_thresh'] = uniform() defaultarg['imp_lin_start'] = uniform() defaultarg['maxsizediff'] = randint(low=5, high=20) defaultarg['acc_min_sim'] = uniform(loc=.2, scale=.6) defaultarg['n_samples'] = randint(low=2, high=8) # this many creations PER INSTANCE defaultarg['n_steps'] = randint(low=10, high=100) defaultarg['quick_skip'] = [True, False] defaultarg['core_choice'] = [True, False] defaultarg['burnin'] = randint(low=0, high=15) defaultarg['mincipcount'] = [1, 2] #randint(low=1,high=4) defaultarg['mininterfacecount'] = [1, 2] #randint(low=1,high=4) def swapifsmaler(parm, a, b): if parm[a] < parm[b]: parm[a], parm[b] = parm[b], parm[a] return parm
def __init__(self, low, high, distribution='uniform', base=10, transform=None, name=None, dtype=int): if high <= low: raise ValueError('the lower bound should be less than the upper ' 'bound, got low {} high {}'.format(low, high)) self.low = low self.high = high if distribution not in ['uniform', 'log-uniform']: raise ValueError("distribution should be 'uniform' or " "'log-uniform, got {}".format(distribution)) self.distribution = distribution self.base = base self.log_base = np.log10(base) if transform is None: transform = 'identity' self.transform_ = transform if transform == 'normalize': # TODO: self._rv = _uniform_inclusive(0.0, 1.0) if self.distribution == 'uniform': self.transformer = Pipeline( [Identity(), Normalize(low, high, is_int=True)]) else: self.transformer = Pipeline([ LogN(self.base), Normalize(np.log10(low) / self.log_base, np.log10(high) / self.log_base, is_int=True) ]) elif transform == 'identity': # TODO: if self.distribution == 'uniform': self._rv = randint(self.low, self.high + 1) self.transformer = Identity() else: self._rv = _uniform_inclusive( np.log10(low) / self.log_base, (np.log10(high) - np.log10(low)) / self.log_base) self.transformer = LogN(self.base) else: raise ValueError("transform should be 'identity' or 'normalize', " "got {}".format(transform)) if (isinstance(dtype, str) and dtype not in [ 'int', 'int8', 'int16', 'int32', 'uint8', 'uint16', 'uint32', 'uint64' ]): raise TypeError('dtype should be int, got {}'.format(dtype)) elif (isinstance(dtype, type) and dtype not in [ int, np.int, np.int8, np.int16, np.int32, np.int64, np.uint8, np.uint16, np.uint32, np.uint64 ]): raise TypeError('dtype should be int, got {}'.format(dtype)) self.dtype = dtype self.name = name