def test_example(self):
        # The classic example from the sklearn documentation
        iris = datasets.load_iris()
        parameters = {'kernel':('linear', 'rbf'), 'C':[1, 10]}
        svr = svm.SVC()
        clf = grid_search.GridSearchCV(svr, parameters)
        clf.fit(iris.data, iris.target)

        clf2 = GridSearchCV(self.sc, svr, parameters)
        clf2.fit(iris.data, iris.target)

        b1 = clf.estimator
        b2 = clf2.estimator
        self.assertEqual(b1.get_params(), b2.get_params())
Esempio n. 2
0
    def test_example(self):
        # The classic example from the sklearn documentation
        iris = datasets.load_iris()
        parameters = {'kernel': ('linear', 'rbf'), 'C': [1, 10]}
        svr = svm.SVC(gamma='auto')
        clf = grid_search.GridSearchCV(svr, parameters)
        clf.fit(iris.data, iris.target)

        clf2 = GridSearchCV(self.sc, svr, parameters)
        clf2.fit(iris.data, iris.target)

        b1 = clf.estimator
        b2 = clf2.estimator
        self.assertEqual(b1.get_params(), b2.get_params())
Esempio n. 3
0
scaler.fit(X_train)
StandardScaler(copy=True, with_mean=True, with_std=True)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)


from pyspark.sql import SparkSession
# spark context
spark = SparkSession.builder.appName("Regression_worker_2").getOrCreate()
sc = spark.sparkContext

# model 초기화
MLP_model = GridSearchCV(sc, MLPRegressor(alpha=0.005, random_state=42), {'hidden_layer_sizes':[[512, 4], [256, 4]], 'max_iter':[5000]})

#linear_model.fit(X_train, y_train)
MLP_model.fit(X_train, y_train)
#RandomForest_model.fit(X_train, y_train)
#GradientBoosting_model.fit(X_train, y_train)
    
# print scores
models = [MLP_model]

with open('./model_scores_worker_2.txt', 'w') as f:
    for m in models:
        f.write('Training Set Mean Squared Error: {:.2f}\n'.format(mean_squared_error(y_train, m.predict(X_train))))
        f.write('Training Set R^2: {:.2f}\n'.format(r2_score(y_train, m.predict(X_train))))

        f.write('Testing Set Mean Squared Error: {:.2f}\n'.format(mean_squared_error(y_test, m.predict(X_test))))
        f.write('testing Set R^2: {:.2f}\n\n'.format(r2_score(y_test, m.predict(X_test))))

    f.write('\nRunning Time: {:.2f}'.format(time.time() - start))
# model 초기화
linear_model = GridSearchCV(sc, LinearRegression(), {})
MLP_model = GridSearchCV(
    sc,
    MLPRegressor(hidden_layer_sizes=[512, 4],
                 max_iter=5000,
                 alpha=0.005,
                 random_state=42), {})
RandomForest_model = GridSearchCV(
    sc, RandomForestRegressor(n_estimators=100, random_state=0), {})
GradientBoosting_model = GridSearchCV(
    sc,
    GradientBoostingRegressor(n_estimators=100, max_depth=10, criterion='mse'),
    {})

linear_model.fit(X_train, y_train)
MLP_model.fit(X_train, y_train)
RandomForest_model.fit(X_train, y_train)
GradientBoosting_model.fit(X_train, y_train)

# print scores
models = [linear_model, MLP_model, RandomForest_model, GradientBoosting_model]
with open('./model_scores_compare.txt', 'w') as f:
    for m in models:
        #f.write(str(m) + '\n')
        f.write('Training Set Mean Squared Error: {:.2f}\n'.format(
            mean_squared_error(y_train, m.predict(X_train))))
        f.write('Training Set R^2: {:.2f}\n'.format(
            r2_score(y_train, m.predict(X_train))))

        f.write('Testing Set Mean Squared Error: {:.2f}\n'.format(
Esempio n. 5
0
digits = datasets.load_digits()
X, y = digits.data, digits.target

sc = createLocalSparkSession().sparkContext
param_grid = {
    "max_depth": [3, None],
    "max_features": [1, 3, 10],
    "min_samples_split": [0.1, 0.2, 0.3],
    "min_samples_leaf": [1, 3, 10],
    "bootstrap": [True, False],
    "criterion": ["gini", "entropy"],
    "n_estimators": [10, 20, 40, 80]
}

gs = GridSearchCV(sc, RandomForestClassifier(), param_grid=param_grid)
gs.fit(X, y)

# 获取最佳参数
best_params_ = None
best_score_ = 0
params = gs.cv_results_['params']
mean_train_score = gs.cv_results_['mean_train_score']
for i, score in enumerate(mean_train_score):
    if i == 0:
        best_score_ = score
        best_params_ = params[i]
    if score > best_score_:
        best_score_ = score
        best_params_ = params[i]
print(best_params_)
print(best_score_)