예제 #1
0
def get_param_space():
    # param_space = {}
    # param_types = {}

    # param_space['svc'] = {'C': expon(scale=100), 'gamma': expon(scale=0.1), 'probability': [True], 'kernel': ['linear']}
    # param_types['svc'] = {'C': 'real', 'gamma': 'real', 'probability': 'int', 'kernel': 'categorical'}

    # param_space['rfc'] = {'n_estimators': randint(50, 600), 'max_features': [1, 2]}
    # param_types['rfc'] = {'n_estimators': 'int', 'max_features': 'int'}

    param_space = {}
    param_space['sgd'] = {'solver': ['sgd'],
                          'primary_metric': [np.random.choice(['ce', 'accuracy'])],
                          'lr': randint(-6, -1),
                          'L2': randint(-8, -1),
                          'num_epochs': randint(2, 5),
                          'batch_size': randint(4, 8)}
    param_space['adam'] = {'solver': ['adam'],
                           'primary_metric': [np.random.choice(['ce', 'accuracy'])],
                           'lr': randint(-6, -1),
                           'L2': randint(-8, -1),
                           'num_epochs': randint(2, 5),
                           'batch_size': randint(4, 8)}

    '''
    clfs maps string-names to a cloneable clf instance.
    '''
    # clfs = {'svc': SVC(), 'rfc': RFC()}
    return param_space
예제 #2
0
    def __init__(self, low, high):
        """Search space dimension that can take on integer values.

        Parameters
        ----------
        * `low` [float]:
            Lower bound (inclusive).

        * `high` [float]:
            Upper bound (inclusive).
        """
        self._low = low
        self._high = high
        self._rvs = randint(self._low, self._high + 1)
        self.transformer = _Identity()
예제 #3
0
파일: driver.py 프로젝트: yitang/optomatic
def get_param_space():
    param_space = {}
    param_types = {}

    param_space["svc"] = {"C": expon(scale=100), "gamma": expon(scale=0.1), "probability": [True], "kernel": ["linear"]}
    param_types["svc"] = {"C": "real", "gamma": "real", "probability": "int", "kernel": "categorical"}

    param_space["rfc"] = {"n_estimators": randint(50, 600), "max_features": [1, 2]}
    param_types["rfc"] = {"n_estimators": "int", "max_features": "int"}

    """
    clfs maps string-names to a cloneable clf instance.
    """
    clfs = {"svc": SVC(), "rfc": RFC()}
    return (clfs, param_space, param_types)
예제 #4
0
def get_param_space():
    param_space = {}
    param_types = {}

    param_space['svc'] = {'C': expon(scale=100), 'gamma': expon(scale=0.1), 'probability': [True], 'kernel': ['linear']}
    param_types['svc'] = {'C': 'real', 'gamma': 'real', 'probability': 'int', 'kernel': 'categorical'}

    param_space['rfc'] = {'n_estimators': randint(50, 600), 'max_features': [1, 2]}
    param_types['rfc'] = {'n_estimators': 'int', 'max_features': 'int'}

    '''
    clfs maps string-names to a cloneable clf instance.
    '''
    clfs = {'svc': SVC(), 'rfc': RFC()}
    return (clfs, param_space, param_types)
예제 #5
0
    def __init__(self, low, high, transform=None, name=None):
        """Search space dimension that can take on integer values.

        Parameters
        ----------
        * `low` [int]:
            Lower bound (inclusive).

        * `high` [int]:
            Upper bound (inclusive).

        * `transform` ["identity", "normalize", optional]:
            The following transformations are supported.

            - "identity", (default) the transformed space is the same as the
              original space.
            - "normalize", the transformed space is scaled to be between
              0 and 1.

        * `name` [str or None]:
            Name associated with dimension, e.g., "number of trees".
        """
        if high <= low:
            raise ValueError("the lower bound {} has to be less than the"
                             " upper bound {}".format(low, high))
        self.low = low
        self.high = high
        self.name = name

        if transform is None:
            transform = "identity"

        self.transform_ = transform

        if transform not in ["normalize", "identity"]:
            raise ValueError("transform should be 'normalize' or 'identity'"
                             " got {}".format(self.transform_))
        if transform == "normalize":
            self._rvs = uniform(0, 1)
            self.transformer = Normalize(low, high, is_int=True)
        else:
            self._rvs = randint(self.low, self.high + 1)
            self.transformer = Identity()
예제 #6
0
    def set_transformer(self, transform="identitiy"):
        """Define _rvs and transformer spaces.

        Parameters
        ----------
        transform : str
           Can be 'normalize' or 'identity'

        """
        self.transform_ = transform

        if transform not in ["normalize", "identity"]:
            raise ValueError("transform should be 'normalize' or 'identity'"
                             " got {}".format(self.transform_))

        if self.transform_ == "normalize":
            self._rvs = _uniform_inclusive(0.0, 1.0)
            if self.prior == "uniform":
                self.transformer = Pipeline(
                    [Identity(),
                     Normalize(self.low, self.high, is_int=True)])
            else:

                self.transformer = Pipeline([
                    LogN(self.base),
                    Normalize(
                        np.log10(self.low) / self.log_base,
                        np.log10(self.high) / self.log_base,
                    ),
                ])
        else:
            if self.prior == "uniform":
                self._rvs = randint(self.low, self.high + 1)
                self.transformer = Identity()
            else:
                self._rvs = _uniform_inclusive(
                    np.log10(self.low) / self.log_base,
                    np.log10(self.high) / self.log_base -
                    np.log10(self.low) / self.log_base,
                )
                self.transformer = LogN(self.base)
예제 #7
0
    def __init__(self, low, high, transform=None):
        """Search space dimension that can take on integer values.

        Parameters
        ----------
        * `low` [int]:
            Lower bound (inclusive).

        * `high` [int]:
            Upper bound (inclusive).

        * `transform` ["identity", "normalize", optional]:
            The following transformations are supported.

            - "identity", (default) the transformed space is the same as the
              original space.
            - "normalize", the transformed space is scaled to be between
              0 and 1.
        """
        self.low = low
        self.high = high

        if transform is None:
            transform = "identity"

        self.transform_ = transform

        if transform not in ["normalize", "identity"]:
            raise ValueError(
                "transform should be 'normalize' or 'identity' got %s" %
                self.transform_)
        if transform == "normalize":
            self._rvs = uniform(0, 1)
            self.transformer = Normalize(low, high, is_int=True)
        else:
            self._rvs = randint(self.low, self.high + 1)
            self.transformer = Identity()
예제 #8
0
    def __init__(self, low, high, transform=None):
        """Search space dimension that can take on integer values.

        Parameters
        ----------
        * `low` [int]:
            Lower bound (inclusive).

        * `high` [int]:
            Upper bound (inclusive).

        * `transform` ["identity", "normalize", optional]:
            The following transformations are supported.

            - "identity", (default) the transformed space is the same as the
              original space.
            - "normalize", the transformed space is scaled to be between
              0 and 1.
        """
        self.low = low
        self.high = high

        if transform is None:
            transform = "identity"

        self.transform_ = transform

        if transform not in ["normalize", "identity"]:
            raise ValueError("transform should be 'normalize' or 'identity'"
                             " got {}".format(self.transform_))
        if transform == "normalize":
            self._rvs = uniform(0, 1)
            self.transformer = Normalize(low, high, is_int=True)
        else:
            self._rvs = randint(self.low, self.high + 1)
            self.transformer = Identity()
예제 #9
0
    def __init__(self,
                 low,
                 high,
                 prior="uniform",
                 base=10,
                 transform=None,
                 name=None,
                 dtype=np.int64):
        if high <= low:
            raise ValueError("the lower bound {} has to be less than the"
                             " upper bound {}".format(low, high))
        self.low = low
        self.high = high
        self.prior = prior
        self.base = base
        self.log_base = np.log10(base)
        self.name = name
        self.dtype = dtype
        if isinstance(self.dtype, str) and self.dtype\
            not in ['int', 'int8', 'int16', 'int32', 'int64',
                    'uint8', 'uint16', 'uint32', 'uint64']:
            raise ValueError("dtype must be 'int', 'int8', 'int16',"
                             "'int32', 'int64', 'uint8',"
                             "'uint16', 'uint32', or"
                             "'uint64', but got {}".format(self.dtype))
        elif isinstance(self.dtype, type) and self.dtype\
                not in [int, np.int8, np.int16, np.int32, np.int64,
                        np.uint8, np.uint16, np.uint32, np.uint64]:
            raise ValueError("dtype must be 'int', 'np.int8', 'np.int16',"
                             "'np.int32', 'np.int64', 'np.uint8',"
                             "'np.uint16', 'np.uint32', or"
                             "'np.uint64', but got {}".format(self.dtype))

        if transform is None:
            transform = "identity"

        self.transform_ = transform

        if transform not in ["normalize", "identity"]:
            raise ValueError("transform should be 'normalize' or 'identity'"
                             " got {}".format(self.transform_))

        if self.transform_ == "normalize":
            self._rvs = _uniform_inclusive(0.0, 1.0)
            if self.prior == "uniform":
                self.transformer = Pipeline(
                    [Identity(), Normalize(low, high, is_int=True)])
            else:

                self.transformer = Pipeline([
                    LogN(self.base),
                    Normalize(
                        np.log10(low) / self.log_base,
                        np.log10(high) / self.log_base)
                ])
        else:
            if self.prior == "uniform":
                self._rvs = randint(self.low, self.high + 1)
                self.transformer = Identity()
            else:
                self._rvs = _uniform_inclusive(
                    np.log10(self.low) / self.log_base,
                    np.log10(self.high) / self.log_base -
                    np.log10(self.low) / self.log_base)
                self.transformer = LogN(self.base)
예제 #10
0
# exit()

gini_scorer = make_scorer(gini_sklearn,
                          greater_is_better=True,
                          needs_proba=True)

rsc = RandomizedSearchCV(
    estimator=model,
    param_distributions={
        #'n_estimators': randint(25, 250),
        #'subsample': uniform(0.5, 0.5),
        #'subsample_freq': randint(2, 25),
        #'colsample_bytree': uniform(0.5, 0.5),
        #'learning_rate': uniform(0.0, 0.1),
        #'min_child_samples': randint(5, 500),
        'num_leaves': randint(5, 200),
    },
    scoring=gini_scorer,
    cv=StratifiedShuffleSplit(n_splits=5, test_size=0.2),
    verbose=2,
    n_iter=5)

grid_result = rsc.fit(X_partial, Y_partial)

print("Best: %f using %s" %
      (grid_result.best_score_, grid_result.best_params_))
for test_mean, train_mean, param in zip(
        grid_result.cv_results_['mean_test_score'],
        grid_result.cv_results_['mean_train_score'],
        grid_result.cv_results_['params']):
    print("Train: %f // Test : %f with: %r" % (train_mean, test_mean, param))
예제 #11
0
}
booster_params = {
    'eta': 0.1,  # default=0.3
    'gamma': 0.,  # default=0.; larger => more conservative
    'max_depth': 6,  # default=6
    'min_child_weight': 1,  # default=1; larger => more conservative
    'subsample': 1.,  # default=1.; proportion of points to sample each round
    'lambda': 1,  # default=1, L2 regularization
    'alpha': 0,  # default=0, L1 regularization
}

# Parameter space to search over
param_dist = {
    'eta': [0.1],
    'gamma': expon(),
    'max_depth': randint(3, 10),
    'min_child_weight': randint(1, 10),
    'subsample': uniform(0.5, 0.5),
    'lambda': expon(),
    'alpha': expon()
}
sampler = ParameterSampler(param_dist, n_iter=32, random_state=1)

# Perform the search
best_score = np.Inf
best_params = {**general_params, **booster_params}

# Repeatedly sample parameters from the above distributions
print('Testing hyperparameters...')
for point in tqdm(sampler):
    current_params = best_params.copy()
예제 #12
0
파일: user.py 프로젝트: wai-chuen/optomatic
def get_param_space():
    '''
    define parameter space. used by driver.py
    '''
    return {'sleep': randint(1, 5)}
예제 #13
0
        #'max_features': ('auto', 'sqrt'),
        #'max_features': range(3, 9),
        #'max_depth': range(3, 7),
    },
    #scoring='neg_log_loss',
    scoring='roc_auc',
    #scoring='f1',
    #scoring=gini_scorer,
    cv=cv,
    verbose=2
)

rsc = RandomizedSearchCV(
     estimator=rf,
     param_distributions={
         'n_estimators': randint(250, 2500),
         #'class_weight': [{0: 1, 1: x} for x in range(15, 51, 5)],
         #'criterion': ('gini', 'entropy'),
         #'min_samples_leaf': randint(15, 50),
         #'min_samples_split': randint(15, 50),
         #'max_features': ('auto', 'sqrt') + range(5,50),
         #'max_features': range(3, 9),
         #'max_depth': randint(2, 6),
     },
     #scoring=gini_scorer,
     scoring='roc_auc',
     cv=cv,
     verbose=2,
     n_iter=7
)
예제 #14
0
from SALib.analyze import sobol

parser = ArgumentParser()
parser.description = "Draw samples using the Saltelli methods"
parser.add_argument(
    "-s", "--n_samples", dest="n_samples", type=int, help="""number of samples to draw. default=10.""", default=10
)
parser.add_argument("OUTFILE", nargs=1, help="Ouput file (CSV)", default="saltelli_samples.csv")
options = parser.parse_args()
n_samples = options.n_samples
outfile = options.OUTFILE[-1]

distributions = OrderedDict()
distributions["m_min"] = uniform(loc=-1.5, scale=0.5)
distributions["m_max"] = uniform(loc=4.0, scale=1.0)
distributions["h_min"] = randint(50, 150)
distributions["h_ela"] = randint(1500, 1800)
distributions["h_max"] = randint(2500, 3000)

# Names of all the variables
keys = distributions.keys()
print(distributions)
# Generate the Sobol sequence samples with uniform distributions

problem = {"num_vars": len(keys), "names": keys, "bounds": [[0, 1]] * len(keys)}

# Generate samples
unif_sample = saltelli.sample(problem, n_samples, calc_second_order=False)


# To hold the transformed variables
예제 #15
0
#!/usr/bin/env python

import numpy as np
import pandas as pd
from pyDOE import lhs
from scipy.stats.distributions import truncnorm, gamma, uniform, randint
import pylab as plt

# The number of allowable model runs
n_samples = 500

# scipy.stats.distributions objects for each distribution, per Table 1 in the paper.  Note that for truncated normal, the bounds are relative to the mean in units of scale, so if we want a positive distribution for a normal with mean 8 and sigma 4, then the lower bound is -8/4=-2
distributions = {
    "GCM": randint(0, 4),
    "FICE": truncnorm(-4 / 4.0, 4.0 / 4, loc=8, scale=4),
    "FSNOW": truncnorm(-4.1 / 3, 4.1 / 3, loc=4.1, scale=1.5),
    "PRS": uniform(loc=5, scale=2),
    "RFR": truncnorm(-0.4 / 0.3, 0.4 / 0.3, loc=0.5, scale=0.2),
    "OCM": randint(-1, 2),
    "OCS": randint(-1, 2),
    "TCT": randint(-1, 2),
    "VCM": truncnorm(-0.35 / 0.2, 0.35 / 0.2, loc=1, scale=0.2),
    "PPQ": truncnorm(-0.35 / 0.2, 0.35 / 0.2, loc=0.6, scale=0.2),
    "SIAE": gamma(1.5, scale=0.8, loc=1),
}

# Names of all the variables
keys = ["GCM", "FICE", "FSNOW", "PRS", "RFR", "OCM", "OCS", "TCT", "VCM", "PPQ", "SIAE"]

# Generate the latin hypercube samples with uniform distributions
unif_sample = lhs(len(keys), n_samples)
예제 #16
0
    def __init__(self,
                 low,
                 high,
                 prior="uniform",
                 base=10,
                 transform=None,
                 name=None):
        """Search space dimension that can take on integer values.

        Parameters
        ----------
        * `low` [int]:
            Lower bound (inclusive).

        * `high` [int]:
            Upper bound (inclusive).

        * `prior` ["uniform" or "log-uniform", default="uniform"]:
            Distribution to use when sampling random intgers for this dimension.
            - If `"uniform"`, intgers are sampled uniformly between the lower
              and upper bounds.
            - If `"log-uniform"`, intgers are sampled uniformly between
              `log(lower, base)` and `log(upper, base)` where log
              has base `base`.
              
        * `base` [int]:
            The logarithmic base to use for a log-uniform prior.
            - Default 10, otherwise commonly 2.

        * `transform` ["identity", "normalize", optional]:
            The following transformations are supported.

            - "identity", (default) the transformed space is the same as the
              original space.
            - "normalize", the transformed space is scaled to be between
              0 and 1.

        * `name` [str or None]:
            Name associated with dimension, e.g., "number of trees".
        """
        if high <= low:
            raise ValueError("the lower bound {} has to be less than the"
                             " upper bound {}".format(low, high))
        self.low = low
        self.high = high
        self.prior = prior
        self.base = base
        self.log_base = np.log10(base)
        self.name = name

        if transform is None:
            transform = "identity"

        self.transform_ = transform

        if transform not in ["normalize", "identity"]:
            raise ValueError("transform should be 'normalize' or 'identity'"
                             " got {}".format(self.transform_))

        if self.transform_ == "normalize":
            self._rvs = _uniform_inclusive(0.0, 1.0)
            if self.prior == "uniform":
                self.transformer = Pipeline([Identity(), Normalize(low, high)])
            else:

                self.transformer = Pipeline([
                    LogN(self.base),
                    Normalize(
                        np.log10(low) / self.log_base,
                        np.log10(high) / self.log_base)
                ])
        else:
            if self.prior == "uniform":
                self._rvs = randint(self.low, self.high + 1)
                self.transformer = Identity()
            else:
                self._rvs = _uniform_inclusive(
                    np.log10(self.low) / self.log_base,
                    np.log10(self.high) / self.log_base -
                    np.log10(self.low) / self.log_base)
                self.transformer = LogN(self.base)
예제 #17
0
         "sia_e": 1.25,
         "ssa_n": 3.0,
         "gamma_T": 0.0001338671875,
         "thickness_calving_threshold":
         "thickness_calving_threshold_300_500_1998-01-01_2000-01-01.nc",
         "fractures": "true",
         "fracture_gamma": 0.4697265625,
         "fracture_gamma_h": 0.0,
         "fracture_softening": 0.9969726562500001,
         "fracture_initiation_threshold": 127548.828125,
         "healing_threshold": 4.3249023437500005e-10,
     },
 },
 "ocean": {
     "uq": {
         "frontal_melt_file": randint(0, 10),
     },
     "default_values": {
         "climate": "given",
         "hydrology": "routing",
         "frontal_melt": "discharge_routing",
         "climate_file": "DMI-HIRHAM5_ERA_1980_2020_EPSG3413_4500M_DM.nc",
         "runoff_file": "DMI-HIRHAM5_ERA_1980_2020_EPSG3413_4500M_DM.nc",
         "salinity": "",
         "pseudo_plastic_q": 0.6,
         "sia_e": 1.25,
         "ssa_n": 3.0,
         "gamma_T": 1.5e-4,
         "thickness_calving_threshold": 300,
         "fractures": "true",
         "fracture_gamma": 0.4697265625,
예제 #18
0
y.head()

# Podział na zbiór treningowy i testowy (niestosowany do nauki modelu, a do jego testowania) stanowiący 15 % obserwacji
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.15,
                                                    random_state=12345)

print(f"Rozmiar zbioru treningowego: {X_train.shape}")
print(f"Rozmiar zbioru testowego: {X_test.shape}")

# Siatka parametrów dla estymatora LightGBM użyta podczas poszukiwania hyperparametrów
random_search_grid = {
    "learning_rate": uniform(0.0001, 0.1),
    "n_estimators": randint(150, 350),
    "min_data_in_leaf": randint(10, 50),
    "max_bin": randint(40, 200),
    "bagging_fraction": uniform(0.2, 0.5),
    "bagging_freq": randint(4, 10),
    "boosting_type": ["dart", "gbdt"]
}

# Przygotowanie obiektów estymatora oraz metody do szukania hyperparametrów
lgbm_model = LGBMRegressor(n_jobs=-1, n_iter=200)
random_search = RandomizedSearchCV(estimator=lgbm_model,
                                   param_distributions=random_search_grid,
                                   n_iter=50,
                                   cv=4,
                                   verbose=50,
                                   scoring="neg_root_mean_squared_error")
예제 #19
0
# soso we need random parametersets
from sklearn.model_selection import ParameterSampler
from scipy.stats.distributions import uniform, randint
import numpy as np
np.random.seed()

#uniform(loc=4,scale=2) # default is something something
# not sure what this does...
#rounded_list = [dict((k, round(v, 6)) for (k, v) in d.items())
#                 for d in param_list]

defaultarg = {}
defaultarg['imp_thresh'] = uniform()
defaultarg['imp_lin_start'] = uniform()
defaultarg['maxsizediff'] = randint(low=5, high=20)
defaultarg['acc_min_sim'] = uniform(loc=.2, scale=.6)
defaultarg['n_samples'] = randint(low=2,
                                  high=8)  # this many creations PER INSTANCE
defaultarg['n_steps'] = randint(low=10, high=100)
defaultarg['quick_skip'] = [True, False]
defaultarg['core_choice'] = [True, False]
defaultarg['burnin'] = randint(low=0, high=15)
defaultarg['mincipcount'] = [1, 2]  #randint(low=1,high=4)
defaultarg['mininterfacecount'] = [1, 2]  #randint(low=1,high=4)


def swapifsmaler(parm, a, b):
    if parm[a] < parm[b]:
        parm[a], parm[b] = parm[b], parm[a]
    return parm
예제 #20
0
    def __init__(self,
                 low,
                 high,
                 distribution='uniform',
                 base=10,
                 transform=None,
                 name=None,
                 dtype=int):
        if high <= low:
            raise ValueError('the lower bound should be less than the upper '
                             'bound, got low {} high {}'.format(low, high))
        self.low = low
        self.high = high

        if distribution not in ['uniform', 'log-uniform']:
            raise ValueError("distribution should be 'uniform' or "
                             "'log-uniform, got {}".format(distribution))
        self.distribution = distribution
        self.base = base
        self.log_base = np.log10(base)

        if transform is None:
            transform = 'identity'
        self.transform_ = transform
        if transform == 'normalize':
            # TODO:
            self._rv = _uniform_inclusive(0.0, 1.0)
            if self.distribution == 'uniform':
                self.transformer = Pipeline(
                    [Identity(), Normalize(low, high, is_int=True)])
            else:
                self.transformer = Pipeline([
                    LogN(self.base),
                    Normalize(np.log10(low) / self.log_base,
                              np.log10(high) / self.log_base,
                              is_int=True)
                ])
        elif transform == 'identity':
            # TODO:
            if self.distribution == 'uniform':
                self._rv = randint(self.low, self.high + 1)
                self.transformer = Identity()
            else:
                self._rv = _uniform_inclusive(
                    np.log10(low) / self.log_base,
                    (np.log10(high) - np.log10(low)) / self.log_base)
                self.transformer = LogN(self.base)
        else:
            raise ValueError("transform should be 'identity' or 'normalize', "
                             "got {}".format(transform))

        if (isinstance(dtype, str) and dtype not in [
                'int', 'int8', 'int16', 'int32', 'uint8', 'uint16', 'uint32',
                'uint64'
        ]):
            raise TypeError('dtype should be int, got {}'.format(dtype))
        elif (isinstance(dtype, type) and dtype not in [
                int, np.int, np.int8, np.int16, np.int32, np.int64, np.uint8,
                np.uint16, np.uint32, np.uint64
        ]):
            raise TypeError('dtype should be int, got {}'.format(dtype))
        self.dtype = dtype
        self.name = name