Example #1
0
def build_params_space(model_type):
    assert_supported(model_type)

    if model_type == 'svr':
        space = {
            'estimator__kernel': ['poly', 'rbf'],
            'estimator__C': loguniform(1e-3, 1e3),
            'estimator__degree': range(1, 8),
        }

    if model_type == 'lr':
        space = {
            'estimator__alpha': loguniform(1e-3, 1e1),
            'estimator__l1_ratio': uniform(0, 1),
        }

    if model_type == 'rf':
        space = {
            'estimator__n_estimators': range(50, 200),
            'estimator__max_depth': range(3, 10),
            'estimator__criterion': ['mse', 'mae'],
            'estimator__max_features': ['auto', 'sqrt', 'log2']
        }

    if model_type == 'gbm':
        space = {
            'estimator__n_estimators': range(25, 200),
            'estimator__max_depth': range(3, 8),
            'estimator__learning_rate': loguniform(1e-3, 1e-1)
        }

    return space
Example #2
0
    def otimizar(self):
        # Definindo os parâmetros a serem utilizados
        parametros = {
            'C': loguniform(2**-5, 2**15),
            'gamma': loguniform(2**-15, 2**3),
            'epsilon': uniform(0.0, 1)
        }

        cv_ = ShuffleSplit(n_splits=1, test_size=0.1, train_size=0.9)

        # Executando otimização dos parâmetros
        self.iniciar_tempo()
        randomSCV = RandomizedSearchCV(SVR(kernel='rbf'), parametros, scoring="neg_mean_absolute_error", cv=cv_, n_iter=self.num_combinacoes, n_jobs=-1)
        randomSCV.fit(self.X_treinamento, self.Y_treinamento)
        self.finalizar_tempo()

        # Identify optimal hyperparameter values
        C = randomSCV.best_params_['C']
        gamma = randomSCV.best_params_['gamma']
        epsilon = randomSCV.best_params_['epsilon']

        # Treinando SVM final com os parâmetros encontrados
        self.svm = SVM(gamma, C, epsilon)
        self.svm.treinar(self.X_treinamento, self.Y_treinamento)
        self.svm.testar(self.X_teste, self.Y_teste)
Example #3
0
def _get_params_random(model_type, is_cl, with_preprocessing):
    if model_type == "linear":
        ml_params = dict(penalty=["l1", "l2"], C=stats.loguniform(
            1e-5, 10)) if is_cl else dict(alpha=stats.loguniform(1e-5, 10))
    else:
        ml_params = dict(max_depth=list(range(5, 16)))
    return _convert_ml_params(ml_params) if with_preprocessing else ml_params
Example #4
0
    def fit_model_Randomize(self, X_train, y_train):
        # Create Pipeline
        pipeline = Pipeline([
            ('tfidf', TfidfVectorizer(lowercase=False)),
            ('model', MultinomialNB()),
        ])

        parameters = {
            'tfidf__ngram_range': [
                (1, 1),
                (2, 2),
                (1, 2),
            ],
            'tfidf__min_df': stats.loguniform(0.01, 0.2),
            'tfidf__max_df': stats.loguniform(0.01, 0.3),
            'tfidf__norm': ['l1', 'l2'],
            'model__alpha': stats.uniform(0.5, 1)
        }

        # Perform grid search on pipeline
        grid_search = RandomizedSearchCV(pipeline,
                                         parameters,
                                         n_jobs=-1,
                                         verbose=1,
                                         scoring="accuracy",
                                         cv=5,
                                         n_iter=200,
                                         refit=True)
        grid_search.fit(X_train, y_train)
        return grid_search.best_estimator_
Example #5
0
def get_param_dist(model_name):
    """Get the parameter distribution for each machine learning model for random search."""
    if model_name == 'logistic_regression':
        param_dist = dict(C=loguniform(1e-6, 1e+6))

    elif model_name == 'elastic_net':
        param_dist = dict(l1_ratio=uniform(0, 1), C=loguniform(1e-6, 1e+6))

    elif model_name == 'svm':
        kernel = ['linear', 'poly', 'rbf']
        param_dist = dict(C=loguniform(1e-3, 1e+3), kernel=kernel)

    elif model_name == 'random_forest':
        max_features = ["auto", "log2"]
        param_dist = dict(n_estimators=range(100, 1001),
                          max_features=max_features)

    elif model_name == 'gradient_boost':
        param_dist = dict(learning_rate=uniform(0, 1),
                          subsample=uniform(0.1, 0.9),
                          max_depth=range(0, 11),
                          min_child_weight=range(0, 26))

    else:
        raise ValueError(
            f'The entered model "{model_name}", was not found. Please check that you have chosen a valid model.'
        )

    return param_dist
Example #6
0
def get_random_size(random_state=None, larger=False):
    rng = check_random_state(random_state)
    rv = loguniform(3, 500)
    x1, x2 = rv.rvs(size=2, random_state=rng).astype(int)
    h1 = loguniform(3, 70).rvs(random_state=rng).astype(int)
    h2 = h1 * int(rng.uniform(1, 3))
    if larger:
        h1, h2, x1, x2 = np.sort([h1, h2, x1, x2])
    return (x1, x2), (h1, h2)
Example #7
0
def svm_parameter_space():
    param_grid = [{
        'kernel': ['linear'],
        'C': loguniform(0.001, 10),
    }, {
        'kernel': ['rbf'],
        'C': loguniform(0.1, 1000),
        'gamma': loguniform(0.0001, 1.0),
    }]
    return param_grid
Example #8
0
def sweep():
    dataset = pd.read_csv("data/train.csv")
    Y = dataset["Survived"]
    X = dataset.drop(["Survived"], axis=1)

    pipe = Pipeline([('feature_removal', ppcs.get_feature_removal()),
                     ('col_t', ppcs.get_col_transf()),
                     ('model', ensemble.AdaBoostClassifier())])

    param_dist = [{
        'model': [LogisticRegression()],
        'model__C': expon(scale=1),
        'col_t__num__poly': [PolynomialFeatures(degree=2)]
    }, {
        'model': [LinearSVC()],
        'model__C': loguniform(0.000001, 1000000),
        'model__max_iter': [5000]
    }, {
        'model': [SVC()],
        'model__C': loguniform(0.000001, 1e6),
        'model__kernel': ['rbf', 'poly'],
        'model__gamma': ['scale', 'auto']
    }, {
        'model': [KNeighborsClassifier()],
        'model__n_neighbors': range(1, 10),
        'model__weights': ['uniform', 'distance']
    }, {
        'model': [ensemble.RandomForestClassifier()],
        'model__n_estimators': [10, 30, 100, 300, 1000, 3000],
        'model__criterion': ['gini', 'entropy'],
        'model__min_samples_split': range(2, 30),
    }, {
        'model': [ensemble.AdaBoostClassifier()]
    }, {
        'model': [ensemble.GradientBoostingClassifier()],
        'model__loss': ['deviance', 'exponential'],
        'model__n_estimators': [10, 30, 100, 300, 1000, 3000],
        'model__min_samples_split': range(2, 30),
    }]

    search = RandomizedSearchCV(pipe,
                                param_dist,
                                n_iter=100,
                                cv=3,
                                n_jobs=2,
                                verbose=1,
                                random_state=42,
                                return_train_score=True,
                                scoring='accuracy')

    search.fit(X, Y)

    dump(search.cv_results_, "models/results2.joblib")
Example #9
0
def test_gp_samples_to_params():
    space = {
        'a': range(10),
        'b': uniform(-10, 20),
        'c': ['cat1', 1, 'cat2'],
        'e': [1, 2, 3],
        'f': ['const'],
        'g': loguniform(0.001, 100),
        'h': [10]
    }
    X = np.array([
        # 4, -8, 'cat2', 1, 'const', 1 , 10
        [0.4444, 0.1, 0, 0, 1, 0, 1, 0.6, 0],
        # 0, -10.0, 'cat1', 3, 'const', 0.001 , 10
        [0.0, 0.0, 1, 0, 0, 1, 1, 0.0, 0],
        # 9, 10.0, 1, 2, 'const', 100 , 10
        [1.0, 1.0, 0, 1, 0, 0.5, 1, 1.0, 0],
    ])

    expected = [
        dict(a=4, b=-8.0, c='cat2', e=1, f='const', g=1, h=10),
        dict(a=0, b=-10.0, c='cat1', e=3, f='const', g=.001, h=10),
        dict(a=9, b=10.0, c=1, e=2, f='const', g=100, h=10),
    ]

    ds = domain_space(space, domain_size=1000)

    params = ds.convert_to_params(X)

    for act, exp in zip(params, expected):
        for k, v in act.items():
            if k == 'g':
                assert np.isclose(v, exp[k])
            else:
                assert v == exp[k]
Example #10
0
def test_keras(c, s, a, b):
    # Mirror the mnist dataset
    X, y = make_classification(n_classes=10, n_features=784, n_informative=100)
    X = X.astype("float32")
    assert y.dtype == np.dtype("int64")

    model = KerasClassifier(build_fn=_keras_build_fn, lr=0.01, verbose=False)
    params = {"lr": loguniform(1e-3, 1e-1)}

    search = IncrementalSearchCV(model,
                                 params,
                                 max_iter=3,
                                 n_initial_parameters=5,
                                 decay_rate=None)
    yield search.fit(X, y)
    #  search.fit(X, y)

    assert search.best_score_ >= 0

    # Make sure the model trains, and scores aren't constant
    scores = {
        ident: [h["score"] for h in hist]
        for ident, hist in search.model_history_.items()
    }
    assert all(len(hist) == 3 for hist in scores.values())
    nuniq_scores = [pd.Series(v).nunique() for v in scores.values()]
    assert max(nuniq_scores) > 1
Example #11
0
async def test_pytorch(c, s, a, b):

    n_features = 10
    defaults = {
        "callbacks": False,
        "warm_start": False,
        "train_split": None,
        "max_epochs": 1,
    }
    model = NeuralNetRegressor(
        module=ShallowNet,
        module__n_features=n_features,
        criterion=nn.MSELoss,
        optimizer=optim.SGD,
        optimizer__lr=0.1,
        batch_size=64,
        **defaults,
    )

    model2 = clone(model)
    assert model.callbacks is False
    assert model.warm_start is False
    assert model.train_split is None
    assert model.max_epochs == 1

    params = {"optimizer__lr": loguniform(1e-3, 1e0)}
    X, y = make_regression(n_samples=100, n_features=n_features)
    X = X.astype("float32")
    y = y.astype("float32").reshape(-1, 1)
    search = IncrementalSearchCV(model2, params, max_iter=5, decay_rate=None)
    await search.fit(X, y)
    assert search.best_score_ >= 0
Example #12
0
def _get_geodamps(n_params):
    model = GeoDamp(seed=42)

    # Don't decay the learning rate:
    # damping delay = 250,000 examples (5 epochs)
    # Tune close to that.
    powers = [5, 6, 7]
    param_space = {
        "initial_batch_size": [2**i for i in powers],
        "max_batch_size": [100, 200, 500, 1000, 2000, 5000],
        "dampingfactor": loguniform(1, 10),
        "dampingdelay": loguniform(50e3, 500e3),
        "weight_decay": [1e-3, 1e-4, 1e-5, 1e-6, 0, 0, 0],
    }
    params = ParameterSampler(param_space, n_iter=n_params, seed=42)
    models = [clone(model).set_params(**p) for p in params]
    return models
def _get_data(a, b, ndim, rng):
    if ndim == 2:
        s1 = loguniform(min(3, a // 2), a * 2).rvs(random_state=rng).astype(int)
        s2 = loguniform(min(3, b // 2), b * 2).rvs(random_state=rng).astype(int)
        x = rng.randn(a, s1)
        h = rng.randn(b, s2)
        if mode == "valid":
            s = np.sort([a, b, s1, s2])
            h = rng.randn(*s[:2])
            x = rng.randn(*s[2:])
            assert all(h.shape[i] <= x.shape[j] for i in [0, 1] for j in [0, 1])
    elif ndim == 1:
        x = rng.randn(a)
        h = rng.randn(b)
    else:
        raise ValueError("ndim")
    return x, h
Example #14
0
    def convert_to_sklearn(self):
        from scipy.stats import loguniform, uniform

        if self.log:
            sampler = loguniform(self.lower, self.upper)
        else:
            sampler = uniform(self.lower, self.upper - self.lower)
        return sampler
Example #15
0
def load_params_svm(p, label):
    c = p[label]['C']
    gamma = p[label]['gamma']
    kernel = p[label]['kernel']
    class_weight = p[label]['class_weight']

    for i, cw in enumerate(class_weight):
        if cw == 'None':
            class_weight[i] = None

    params = {
        'C': loguniform(c[0], c[1]),
        'gamma': loguniform(gamma[0], gamma[1]),
        'kernel': kernel,
        'class_weight': class_weight
    }
    return params
def hyper_xgboost_rs():
    cs = {
        'eta': loguniform(1e-5, 1),
        'subsample': uniform(0.1, 0.9),
        'max_depth': list(range(1, 99)),
        'gamma': uniform(0.001, 1.999),
        'min_child_weight': uniform(1, 69)
    }
    return cs
def hyper_catboost_rs():
    cs = {
        'max_depth': list(range(1, 15)),
        'learning_rate': loguniform(0.001, 1),
        'l2_leaf_reg': uniform(1, 29),
        'bagging_temperature': uniform(0.1, 9.9),
        'random_strength': uniform(0.1, 9.9)
    }
    return cs
Example #18
0
def _time_2d(seed, mode):
    rng = np.random.RandomState(2**31 - seed)
    n = loguniform(1e1, 500).rvs(size=1, random_state=rng).astype(int).item()
    k = loguniform(3, 75).rvs(size=1, random_state=rng).astype(int).item()
    r1, r2 = uniform(1, 2).rvs(size=2, random_state=rng)
    n2, k2 = int(r1 * n), int(r2 * k)
    if mode == "valid":
        k, k2, n, n2 = np.sort([n, n2, k, k2])
    x = rng.randn(n, n2)
    h = rng.randn(k, k2)
    assert x.ndim == 2 and h.ndim == 2
    datum = {"x_shape0": n, "h_shape0": k, "x_shape1": n2, "h_shape1": k2,
             "seed": seed, "mode": mode, "ndim": 2}
    datum["choose_conv_method"] = choose_conv_method(x, h, mode)
    for method in ["fft", "direct", "auto"]:
        start = time()
        y = convolve(x, h, mode=mode, method=method)
        datum[method + "_time"] = time() - start
    return datum 
Example #19
0
def fit_gbdt(X, y, n_iter):
    """Fit a gradient boosted decision trees model"""
    model = LGBMClassifier(n_estimators=2000, random_state=42)
    model = make_pipeline(columns_transform(), model)

    param_space = {
        "lgbmclassifier__min_data_in_leaf": loguniform_int(5, 500),
        "lgbmclassifier__num_leaves": loguniform_int(31, 500),
        "lgbmclassifier__reg_alpha": st.loguniform(1e-10, 1.0),
        "lgbmclassifier__reg_lambda": st.loguniform(1e-10, 1.0),
        "lgbmclassifier__learning_rate": st.loguniform(1e-4, 1e-1),
    }
    model = dcv.RandomizedSearchCV(model,
                                   param_space,
                                   scoring="neg_log_loss",
                                   n_iter=n_iter,
                                   random_state=42,
                                   cv=5)

    model.fit(X, y)
    return model
Example #20
0
def _time_1d(seed, mode):
    rng = np.random.RandomState(2**31 - seed)
    n, k = loguniform(3, 5e4).rvs(size=2, random_state=rng).astype(int)
    x = rng.randn(n)
    h = rng.randn(k)
    assert x.ndim == 1 and h.ndim == 1
    datum = {"x_shape": n, "h_shape": k, "seed": seed, "mode": mode, "ndim": 1}
    datum["choose_conv_method"] = choose_conv_method(x, h, mode)
    for method in ["fft", "direct", "auto"]:
        start = time()
        y = convolve(x, h, mode=mode, method=method)
        datum[method + "_time"] = time() - start
    return datum 
Example #21
0
def fit_mlp(X, y, n_iter):
    """Fit a simple multi-layer perceptron model"""
    model = MLPClassifier(random_state=42, early_stopping=True)
    model = make_pipeline(columns_transform(), model)

    layers_options = [
        [n_units] * n_layers
        for n_units, n_layers in it.product([32, 64, 128, 256, 512], [1, 2])
    ]
    param_space = {
        "mlpclassifier__hidden_layer_sizes": layers_options,
        "mlpclassifier__alpha": st.loguniform(1e-5, 1e-2),
        "mlpclassifier__learning_rate_init": st.loguniform(1e-4, 1e-1),
    }
    model = dcv.RandomizedSearchCV(model,
                                   param_space,
                                   scoring="neg_log_loss",
                                   n_iter=n_iter,
                                   random_state=42,
                                   cv=5)

    model.fit(X, y)
    return model
Example #22
0
def _get_padadamps(n_params):
    powers = [5, 5.5, 6, 6.5, 7]
    param_space = {
        "initial_batch_size": [2**i for i in powers],
        "max_batch_size": [100, 200, 500, 1000, 2000, 5000],
        "batch_growth_rate": loguniform(1e-3, 1e-1),
        "dwell": [1, 2, 5, 10, 20, 50, 100, 200, 500, 1000],
        "weight_decay": [1e-3, 1e-4, 1e-5, 1e-6, 0, 0, 0],
    }
    model = PadaDamp(seed=42)

    params = ParameterSampler(param_space, n_iter=n_params, seed=42)
    models = [clone(model).set_params(**p) for p in params]
    return models
    def cv(self, x_train, y_train, x_val=None, y_val=None, hpo="random"):
        from sklearn.svm import SVR

        params = self.params
        params["verbose"] = 0

        best_param = None
        best_param_score = None
        if hpo == "grid":
            search_params = [{"kernel": ["rbf"],
                              "gamma": [0.001, 0.01, 0.1, 1],
                              "C": [0.1, 1, 10, 100]},
                             {"kernel": ["sigmoid"],
                              "gamma": [0.001, 0.01, 0.1, 1],
                              "C": [0.1, 1, 10, 100]}]

            search = GridSearchCV(SVR(**params), search_params, n_jobs=-1, cv=3,
                                  scoring="neg_mean_squared_error", verbose=1)
            search.fit(x_train, y_train)

            best_param = search.best_params_
            best_param_score = search.best_score_
        elif hpo == "random":
            search_params = {"kernel": ["rbf", "sigmoid", "linear"],
                             "gamma": loguniform(0.001, 1),
                             "C": loguniform(0.1, 100)}
            search = RandomizedSearchCV(SVR(**params), search_params, n_jobs=-1, cv=3,
                                        random_state=1234,
                                        scoring="neg_mean_squared_error", verbose=1)
            search.fit(x_train, y_train)
            best_param = search.best_params_
            best_param_score = search.best_score_

        if best_param is not None:
            print("Best Param: {}, with scores: {}".format(best_param, best_param_score))
            self.params.update(best_param)
            self.build_model(**self.params)
Example #24
0
    def test_continous_induced_measure_ppf(self):
        degree = 2
        alpha_stat, beta_stat = 3, 3
        ab = jacobi_recurrence(
            degree+1, alpha=beta_stat-1, beta=alpha_stat-1, probability=True)

        tol = 1e-15
        var = stats.beta(alpha_stat, beta_stat, -5, 10)
        can_lb, can_ub = -1, 1
        lb, ub = var.support()
        print(lb, ub)
        cx = np.linspace(can_lb, can_ub, 51)

        def can_pdf(xx):
            loc, scale = lb+(ub-lb)/2, (ub-lb)/2
            return var.pdf(xx*scale+loc)*scale

        cdf_vals = continuous_induced_measure_cdf(
            can_pdf, ab, degree, can_lb, can_ub, tol, cx)
        assert np.all(cdf_vals <= 1.0)
        ppf_vals = continuous_induced_measure_ppf(
            var, ab, degree, cdf_vals, 1e-10, 1e-8)
        assert np.allclose(cx, ppf_vals)

        try:
            var = stats.loguniform(1.e-5, 1.e-3)
        except:
            var = stats.reciprocal(1.e-5, 1.e-3)
        ab = get_recursion_coefficients_from_variable(var, degree+5, {})
        can_lb, can_ub = -1, 1
        cx = np.linspace(can_lb, can_ub, 51)
        lb, ub = var.support()

        def can_pdf(xx):
            loc, scale = lb+(ub-lb)/2, (ub-lb)/2
            return var.pdf(xx*scale+loc)*scale
        cdf_vals = continuous_induced_measure_cdf(
            can_pdf, ab, degree, can_lb, can_ub, tol, cx)
        # differences caused by root finding optimization tolerance
        assert np.all(cdf_vals <= 1.0)
        ppf_vals = continuous_induced_measure_ppf(
            var, ab, degree, cdf_vals, 1e-10, 1e-8)
        # import matplotlib.pyplot as plt
        # plt.plot(cx, cdf_vals)
        # plt.plot(ppf_vals, cdf_vals, 'r*', ms=2)
        # plt.show()
        assert np.allclose(cx, ppf_vals)
    def cv(self, x_train, y_train, x_val=None, y_val=None, hpo="random"):
        from xgboost import XGBRegressor

        params = self.params
        params["verbosity"] = 0
        if x_val is not None:
            fit_params = {"early_stopping_rounds": 20,
                          "eval_set": [(x_val, y_val)],
                          "verbose": False}
        else:
            fit_params = {"verbose": False}

        best_param = None
        best_param_score = None
        if hpo == "grid":
            search_params = {
                "n_estimators": [100, 500, 1000],
                "max_depth": [5, 10, 15, 20],
                "learning_rate": [0.1, 0.05, 0.01]
            }
            search = GridSearchCV(XGBRegressor(**params), search_params, n_jobs=1, cv=3,
                                  scoring="neg_mean_squared_error", verbose=True)
            search.fit(x_train, y_train, **fit_params)
            best_param = search.best_params_
            best_param_score = search.best_score_
        elif hpo == "random":
            search_params = {
                "n_estimators": [100, 500, 1000],
                "max_depth": [5, 10, 15, 20],
                "learning_rate": loguniform(loc=0.01, scale=0.1)
            }
            search = RandomizedSearchCV(XGBRegressor(**params), search_params, n_jobs=1, cv=3,
                                        random_state=1234,
                                        scoring="neg_mean_squared_error", verbose=True)
            search.fit(x_train, y_train, **fit_params)
            best_param = search.best_params_
            best_param_score = search.best_score_

        if best_param is not None:
            print("Best Param: {}, with scores: {}".format(best_param, best_param_score))
            self.params.update(best_param)
            self.params["n_jobs"] = 0
            self.build_model(**self.params)
Example #26
0
def fit_linear(X, y, n_iter):
    """Fit a logistic regression model"""
    model = LogisticRegression(max_iter=500,
                               penalty="elasticnet",
                               solver="saga")
    model = make_pipeline(columns_transform(), model)

    param_space = {
        "logisticregression__l1_ratio": st.uniform(0, 1),
        "logisticregression__C": st.loguniform(1e-4, 1e4),
    }
    model = dcv.RandomizedSearchCV(model,
                                   param_space,
                                   scoring="neg_log_loss",
                                   n_iter=n_iter,
                                   random_state=42,
                                   cv=5)

    model.fit(X, y)
    return model
def create_logistic_regression_model(random_state,
                                     tune=True,
                                     class_balanced=True):
    """Create a logistic regression model using best hyperparameters or tuning

    Parameters
    ----------
    tune : bool, optional
        tune the hyperparameter or using the best values, by default True

    Returns
    -------
    Logistic Regression Model
        The model we want to create
    """

    class_weight = "balanced"

    if not class_balanced:
        class_weight = None

    model = {
        "clf":
        LogisticRegression(class_weight=class_weight,
                           random_state=random_state,
                           max_iter=1000)
    }

    if tune:
        model[PARAM_DIST] = {"logisticregression__C": loguniform(1e-3, 1e3)}
    else:
        if class_balanced:
            model[PARAM_DIST] = {
                "logisticregression__C": [0.008713608033492446]
            }
        else:
            model[PARAM_DIST] = {
                "logisticregression__C": [0.008713608033492446]
            }
    return model
def test_gp_space():
    space = {
        'f': range(10),
        'h': uniform(-10, 20),
        'e': ['cat1', 1, 'cat2'],
        'c': [1, 2, 3],
        'a': ['const'],
        'g': loguniform(0.001, 100),
        'b': [10],
        'd': uniform(0, 1),
        'i': [True, False]
    }

    ds = domain_space(space, domain_size=10000)
    X = ds.sample_gp_space()

    assert (X <= 1.0).all()
    assert (X >= 0.0).all()
    assert (X[:, 0] == 1.).all()  # a
    assert (X[:, 1] == 0.).all()  # b
    assert np.isin(X[:, 2], [0.0, 0.5, 1.0]).all()  # c
    assert np.isin(X[:, 4:7], np.eye(3)).all()  # e
    assert X.shape == (ds.domain_size, 12)

    params = ds.convert_to_params(X)

    for param in params:
        assert param['a'] == 'const'
        assert param['b'] == 10
        assert param['c'] in space['c']
        assert 0.0 <= param['d'] <= 1.0
        assert param['e'] in space['e']
        assert param['f'] in space['f']
        assert 0.001 <= param['g'] <= 100
        assert -10 <= param['h'] <= 10
        assert param['i'] in space['i']

    X2 = ds.convert_to_gp(params)
    assert np.isclose(X2, X).all()
Example #29
0
def parse_config(config_file):
    with open(config_file, "r") as f:
        config = load(f, Loader=Loader)
    sbatch_args = []
    for k, v in config["sbatch"].items():
        if len(k) == 1:
            sbatch_args.append(f"-{k} {v}")
        else:
            sbatch_args.append(f"--{k}={v}")
    param_dists = {}
    for k, v in config["hyperparams"].items():
        if isinstance(v, dict):
            lo, hi = v["range"]
            if v["dist"] == "uniform":
                param_dists[k] = uniform(lo, hi)
            elif v["dist"] == "loguniform":
                param_dists[k] = loguniform(lo, hi)
        else: # list or constant
            if not isinstance(v, (list, tuple)):
                param_dists[k] = [v]
            else:
                param_dists[k] = v
    return sbatch_args, param_dists
def hyper_parameter_search(classifier_type, trainX, RTrain):
    hyper_params = dict()
    space = dict()
    space['solver'] = ['lbfgs']
    space['penalty'] = ['none', 'l2']
    space['C'] = loguniform(1e-5, 100)
    model = LogisticRegression(multi_class='multinomial')
    #perform topic-conditional hyper-parameter search
    for topic in trainX:
        if topic != 'R175':
            if classifier_type == 'logistic':
                search = RandomizedSearchCV(model,
                                            space,
                                            n_iter=100,
                                            scoring='accuracy',
                                            n_jobs=-1,
                                            random_state=1)
                # execute search
                result = search.fit(trainX[topic], RTrain[topic])
                # summarize result
                print('Best Score: %s' % result.best_score_)
                print('Best Hyperparameters: %s' % result.best_params_)
                hyper_params[topic] = result.best_params_
    return hyper_params