Esempio n. 1
0
    def test_search(self):
        conn = sigopt.Connection()

        n_iter = 5
        folds = 3
        cv = SigOptSearchCV(
            estimator=GradientBoostingClassifier(),
            param_domains=GradientBoostingClassifier_PARAM_DOMAIN,
            client_token='client_token',
            n_iter=n_iter,
            cv=folds)
        assert len(conn.experiments().create.mock_calls) == 0
        assert len(conn.experiments().fetch.mock_calls) == 0
        assert len(conn.experiments().suggestions.create.mock_calls) == 0
        assert len(conn.experiments().observations.create.mock_calls) == 0

        data = sklearn.datasets.load_iris()
        cv.fit(data['data'], data['target'])
        assert len(conn.experiments().create.mock_calls) == 1
        create_definition = conn.experiments().create.call_args[1]
        assert create_definition[
            'name'] == GradientBoostingClassifier_EXPERIMENT_DEF['name']

        assert len(create_definition['parameters']) == len(
            GradientBoostingClassifier_EXPERIMENT_DEF['parameters'])
        for p in GradientBoostingClassifier_EXPERIMENT_DEF['parameters']:
            assert p in create_definition['parameters']
        assert len(conn.experiments().best_assignments().fetch.mock_calls) == 1
        assert len(conn.experiments().suggestions().create.mock_calls
                   ) == n_iter * folds
        assert len(conn.experiments().observations().create.mock_calls
                   ) == n_iter * folds

        assert cv.best_params_ == zero_corner(
            GradientBoostingClassifier_EXPERIMENT_DEF)
Esempio n. 2
0
 def test_non_string_categorical(self):
     data = sklearn.datasets.load_iris()
     clf = SigOptSearchCV(SVC(),
                          SVC_PARAM_DOMAIN,
                          client_token='client_token',
                          n_iter=5)
     clf.fit(data['data'], data['target'])
Esempio n. 3
0
  def test_search(self):
    conn = sigopt.Connection()

    n_iter = 5
    folds = 3
    cv = SigOptSearchCV(
      estimator=GradientBoostingClassifier(),
      param_domains=GradientBoostingClassifier_PARAM_DOMAIN,
      client_token='client_token',
      n_iter=n_iter,
      cv=folds
    )
    assert len(conn.experiments().create.mock_calls) == 0
    assert len(conn.experiments().fetch.mock_calls) == 0
    assert len(conn.experiments().suggestions.create.mock_calls) == 0
    assert len(conn.experiments().observations.create.mock_calls) == 0

    data = sklearn.datasets.load_iris()
    cv.fit(data['data'], data['target'])
    assert len(conn.experiments().create.mock_calls) == 1
    create_definition = conn.experiments().create.call_args[1]
    assert create_definition['name'] == GradientBoostingClassifier_EXPERIMENT_DEF['name']

    assert len(create_definition['parameters']) == len(GradientBoostingClassifier_EXPERIMENT_DEF['parameters'])
    for p in GradientBoostingClassifier_EXPERIMENT_DEF['parameters']:
      assert p in create_definition['parameters']
    assert len(conn.experiments().best_assignments().fetch.mock_calls) == 1
    assert len(conn.experiments().suggestions().create.mock_calls) == n_iter 
    assert len(conn.experiments().observations().create.mock_calls) == n_iter

    assert cv.best_params_ == zero_corner(GradientBoostingClassifier_EXPERIMENT_DEF)
Esempio n. 4
0
 def test_bad_param_range_not_iterable(self):
   with pytest.raises(Exception):
     clf = SigOptSearchCV(
       SVC(),
       {'max_iter': 15},
       client_token='client_token',
       n_iter=5
     )
     clf._transform_param_domains(clf.param_domains)
Esempio n. 5
0
 def test_warn_param_range_list(self):
   with pytest.warns(UserWarning):
     clf = SigOptSearchCV(
       SVC(),
       {'max_iter': [5, 10]},
       client_token='client_token',
       n_iter=5
     )
     clf._transform_param_domains(clf.param_domains)
Esempio n. 6
0
 def test_bad_param_range_not_iterable(self):
   with pytest.raises(Exception):
     clf = SigOptSearchCV(
       SVC(),
       {'max_iter': 15},
       client_token='client_token',
       n_iter=5
     )
     clf._transform_param_domains(clf.param_domains)
Esempio n. 7
0
 def test_warn_param_range_list(self):
   with pytest.warns(UserWarning):
     clf = SigOptSearchCV(
       SVC(),
       {'max_iter': [5, 10]},
       client_token='client_token',
       n_iter=5
     )
     clf._transform_param_domains(clf.param_domains)
Esempio n. 8
0
 def test_bad_param_range2(self):
     with pytest.raises(Exception):
         clf = SigOptSearchCV(SVC(), {
             'bad_param_range': (1, 2, 3),
             'hidden_layer_sizes': {
                 '5': (5, ),
                 '5,4,3': (5, 4, 3)
             }
         },
                              client_token='client_token',
                              n_iter=5)
         clf._transform_param_domains(clf.param_domains)
Esempio n. 9
0
 def test_bad_param_range2(self):
   with pytest.raises(Exception):
     clf = SigOptSearchCV(
       SVC(),
       {
         'bad_param_range': (1, 2, 3),
         'hidden_layer_sizes': {'5': (5,), '5,4,3': (5, 4, 3)}
       },
       client_token='client_token',
       n_iter=5
     )
     clf._transform_param_domains(clf.param_domains)
# Define domains for the Random Forest parameters
random_forest_parameters = dict(
                                max_features=[1,  128],
                                n_estimators=[1, 100],
                                min_samples_leaf=[1, 10],
                                )

# define sklearn estimator
random_forest = RandomForestClassifier()

# define SigOptCV search strategy
clf = SigOptSearchCV(
                     random_forest,
                     random_forest_parameters,
                     cv=5,
                     client_token=client_token,
                     n_iter=60
                     )

time1 = time()
clf.fit(X_train, y_train)
time2 = time()

# Prediction
y_pred_train =  clfSVM.predict(X_train)

# Compute the score
score = compute_pred_score(y_train, y_pred_train)
print('Score sur le train : %s' % score)
Esempio n. 11
0
def main():
    # convert arg structure to regular dict
    args = vars(parse_args())
    X_path = args['X_file']
    y_path = args['y_file']
    client_token = args['client_token']
    estimator_name = args['estimator']
    output_path = args['output_file']
    opt_timeout = args['opt_timeout']
    with open(X_path, 'rb') as infile:
        X = pickle.load(infile)
    with open(y_path, 'rb') as infile:
        y = pickle.load(infile)

    # define param doimains for all esimators
    rf_params = {
        'max_features': ['sqrt', 'log2'],
        'max_depth': [3, 20],
        'criterion': ['gini', 'entropy'],
        'n_estimators': [10, 100],
    }

    svm_params = {
        'degree': [2, 4],
        '__log__C': [math.log(0.00001), math.log(1.0)],
        'gamma': [0.0, 1.0]
    }

    knn_params = {
        'n_neighbors': [2, 10],
        'algorithm': ['ball_tree', 'kd_tree'],
        'leaf_size': [10, 50],
        'p': [1, 3]
    }

    sgd_params = {
        '__log__alpha': [math.log(0.00001), math.log(10.0)],
        'l1_ratio': [0.0, 1.0],
        'loss': ['log', 'modified_huber']
    }

    xgb_params = {
        '__log__learning_rate': [math.log(0.0001),
                                 math.log(0.5)],
        'n_estimators': [10, 100],
        'max_depth': [3, 10],
        'min_child_weight': [6, 12],
        'gamma': [0, 0.5],
        'subsample': [0.6, 1.0],
        'colsample_bytree': [0.6, 1.0],
    }

    lda_params = {"__log__tol": [math.log(0.00001), math.log(0.5)]}
    qda_params = {"__log__tol": [math.log(0.00001), math.log(0.5)]}

    # mapping from classifier name to estimaitor object and domain
    # dict stores : (estimator, hyperparams, sparse_support)
    estname_2_args = {
        "GaussianNBClassifier": (GaussianNB(), None, False),
        "SVMClassifier": (SVC(probability=True), svm_params, True),
        "RandomForestClassifier":
        (RandomForestClassifier(n_jobs=2), rf_params, True),
        "SGDClassifier":
        (SGDClassifier(penalty='elasticnet'), sgd_params, True),
        "XGBClassifier": (XGBClassifier(nthread=2), xgb_params, True),
        "KNNClassifier": (KNeighborsClassifier(n_jobs=2), knn_params, False),
        "LDAClassifier": (LinearDiscriminantAnalysis(), lda_params, False),
        "QDAClassifier": (QuadraticDiscriminantAnalysis(), qda_params, False),
    }
    est, est_params, est_handle_sparse = estname_2_args[estimator_name]

    # check that estimator can handle sparse matrices
    if scipy.sparse.issparse(X) and not est_handle_sparse:
        raise Exception(
            '{} does not support sparse matrices.'.format(estimator_name))
    elif est_params is not None:
        # fit the estimator if it has params to tune
        n_iter = max(10 * len(est_params), 20)
        clf = SigOptSearchCV(
            est,
            est_params,
            cv=3,
            opt_timeout=opt_timeout,
            client_token=client_token,
            n_jobs=3,
            n_iter=n_iter,
        )
    else:
        clf = est

    clf.fit(X, y)
    if hasattr(clf, 'best_estimator_'):
        clf = clf.best_estimator_
    # store classifier in specified output file
    with open(output_path, 'wb') as outfile:
        pickle.dump(clf, outfile, pickle.HIGHEST_PROTOCOL)
Esempio n. 12
0
 def test_no_token(self):
     with pytest.raises(ValueError):
         SigOptSearchCV(
             estimator=GradientBoostingClassifier,
             param_domains=GradientBoostingClassifier_PARAM_DOMAIN)
Esempio n. 13
0
 def test_create(self):
     SigOptSearchCV(estimator=GradientBoostingClassifier,
                    param_domains=GradientBoostingClassifier_PARAM_DOMAIN,
                    client_token='client_token')
Esempio n. 14
0
iris = datasets.load_iris()
X = iris.data
y = iris.target

# Define domains for the Random Forest parameters
random_forest_parameters = dict(
  max_features=(1, len(iris) - 2),
  n_estimators=(1, 100),
  min_samples_leaf=(1, 10),
)

# define sklearn estimator
random_forest = RandomForestClassifier()

# define SigOptCV search strategy
clf = SigOptSearchCV(
  random_forest,
  random_forest_parameters,
  cv=5,
  client_token=client_token,
  n_iter=60
)

# perform CV search for best parameters and fits estimator
# on all data using best found configuration
clf.fit(X, y)

# clf.predict() now uses best found estimator
# clf.best_score_ contains CV score for best found estimator
# clf.best_params_ contains best found param configuration
iris = datasets.load_iris()
X = iris.data
y = iris.target

# Define domains for the Random Forest parameters
random_forest_parameters = dict(
  max_features=[1, len(iris) - 1],
  n_estimators=[1, 100],
  min_samples_leaf=[1, 10],
)

# define sklearn estimator
random_forest = RandomForestClassifier()

# define SigOptCV search strategy
clf = SigOptSearchCV(
  random_forest,
  random_forest_parameters,
  cv=5,
  client_token=client_token,
  n_iter=60
)

# perform CV search for best parameters and fits estimator
# on all data using best found configuration
clf.fit(X, y)

# clf.predict() now uses best found estimator
# clf.best_score_ contains CV score for best found estimator
# clf.best_params_ contains best found param configuration
Esempio n. 16
0
 def test_non_string_categorical(self):
   data = sklearn.datasets.load_iris()
   clf = SigOptSearchCV(SVC(gamma='auto'), SVC_PARAM_DOMAIN, client_token='client_token', n_iter=5, cv=3)
   clf.fit(data['data'], data['target'])