def main():
    cv = 5
    clf_scoring = "accuracy"
    reg_scoring = "neg_mean_squared_error"

    data = load_iris()
    X = data["data"]
    y = data["target"]

    grid = dict(learning_rate=[.05, .01],
                max_depth=[4, 6, 8],
                colsample_bytree=[.6, .8, 1.0],
                n_estimators=[100, 200, 300])

    model = DistGridSearchCV(XGBClassifier(),
                             grid,
                             spark.sparkContext,
                             cv=cv,
                             scoring=clf_scoring)

    model.fit(X, y)
    # predictions on the driver
    preds = model.predict(X)
    probs = model.predict_proba(X)

    # results
    print("-- Grid Search --")
    print("Best Score: {0}".format(model.best_score_))
    print("Best colsample_bytree: {0}".format(
        model.best_estimator_.colsample_bytree))
    print("Best learning_rate: {0}".format(
        model.best_estimator_.learning_rate))
    print("Best max_depth: {0}".format(model.best_estimator_.max_depth))
    print("Best n_estimators: {0}".format(model.best_estimator_.n_estimators))
# load sample data (binary target)
data = load_breast_cancer()
X = data["data"]
y = data["target"]

### distributed grid search
model = DistGridSearchCV(LogisticRegression(solver=solver),
                         dict(C=Cs),
                         sc,
                         cv=cv,
                         scoring=scoring)
# distributed fitting with spark
model.fit(X, y)
# predictions on the driver
preds = model.predict(X)
probs = model.predict_proba(X)

# results
print("-- Grid Search --")
print("Best Score: {0}".format(model.best_score_))
print("Best C: {0}".format(model.best_estimator_.C))
result_data = pd.DataFrame(model.cv_results_)[["param_C", "mean_test_score"]]
print(result_data.sort_values("param_C"))
print(pickle.loads(pickle.dumps(model)))

### distributed randomized search
param_dist = dict(C=[])
model = DistRandomizedSearchCV(
    LogisticRegression(solver=solver),
    dict(C=Cs),
    sc,