Esempi in Python per GridSearchCV

Linguaggio di programmazione: Python

Spazio dei nomi/nome del pacchetto: spark_sklearn.grid_search

Classe/tipologia: GridSearchCV

Esempi su hotexamples.com: 8

GridSearchCV in Python: 8 esempi trovati. Questi sono i migliori esempi reali in Python per spark_sklearn.grid_search.GridSearchCV, estratti da progetti open source. Li puoi valutare, per aiutarci a migliorare la qualità dei nostri esempi.

Metodi utilizzati di frequente

Mostra Nascondi

GridSearchCV(7)

fit(4)

Metodi utilizzati di frequente

GridSearchCV (7)

fit (4)

Esempio n. 1

Mostra file

File: test_grid_search_2.py Progetto: smurching/spark-sklearn

    def test_example(self):
        # The classic example from the sklearn documentation
        iris = datasets.load_iris()
        parameters = {'kernel':('linear', 'rbf'), 'C':[1, 10]}
        svr = svm.SVC()
        clf = grid_search.GridSearchCV(svr, parameters)
        clf.fit(iris.data, iris.target)

        clf2 = GridSearchCV(self.sc, svr, parameters)
        clf2.fit(iris.data, iris.target)

        b1 = clf.estimator
        b2 = clf2.estimator
        self.assertEqual(b1.get_params(), b2.get_params())

Esempio n. 2

Mostra file

    def test_example(self):
        # The classic example from the sklearn documentation
        iris = datasets.load_iris()
        parameters = {'kernel': ('linear', 'rbf'), 'C': [1, 10]}
        svr = svm.SVC(gamma='auto')
        clf = grid_search.GridSearchCV(svr, parameters)
        clf.fit(iris.data, iris.target)

        clf2 = GridSearchCV(self.sc, svr, parameters)
        clf2.fit(iris.data, iris.target)

        b1 = clf.estimator
        b2 = clf2.estimator
        self.assertEqual(b1.get_params(), b2.get_params())

Esempio n. 3

Mostra file

    def test_cv_lasso_with_mllib_featurization(self):
        data = [('hi there', 0.0),
                ('what is up', 1.0),
                ('huh', 1.0),
                ('now is the time', 5.0),
                ('for what', 0.0),
                ('the spark was there', 5.0),
                ('and so', 3.0),
                ('were many socks', 0.0),
                ('really', 1.0),
                ('too cool', 2.0)]
        data = self.sql.createDataFrame(data, ["review", "rating"])

        # Feature extraction using MLlib
        tokenizer = Tokenizer(inputCol="review", outputCol="words")
        hashingTF = HashingTF(inputCol="words", outputCol="features", numFeatures=20000)
        pipeline = Pipeline(stages=[tokenizer, hashingTF])
        data = pipeline.fit(data).transform(data)

        df = self.converter.toPandas(data.select(data.features.alias("review"), "rating"))

        pipeline = SKL_Pipeline([
            ('lasso', SKL_Lasso())
        ])
        parameters = {
            'lasso__alpha': (0.001, 0.005, 0.01)
        }

        grid_search = GridSearchCV(self.sc, pipeline, parameters)
        skl_gs = grid_search.fit(df.review.values, df.rating.values)
        assert len(skl_gs.cv_results_['params']) == len(parameters['lasso__alpha'])

Esempio n. 4

Mostra file

File: test_grid_search_2.py Progetto: phi-dbq/spark-sklearn

 def test_cv_pipeline(self):
     pipeline = SKL_Pipeline([
         ('vect', SKL_HashingVectorizer(n_features=20)),
         ('tfidf', SKL_TfidfTransformer(use_idf=False)),
         ('lasso', SKL_Lasso(max_iter=1))
     ])
     parameters = {
         'lasso__alpha': (0.001, 0.005, 0.01)
     }
     grid_search = GridSearchCV(self.sc, pipeline, parameters)
     data = [('hi there', 0.0),
             ('what is up', 1.0),
             ('huh', 1.0),
             ('now is the time', 5.0),
             ('for what', 0.0),
             ('the spark was there', 5.0),
             ('and so', 3.0),
             ('were many socks', 0.0),
             ('really', 1.0),
             ('too cool', 2.0)]
     df = self.sql.createDataFrame(data, ["review", "rating"]).toPandas()
     skl_gs = grid_search.fit(df.review.values, df.rating.values)
     assert len(skl_gs.grid_scores_) == len(parameters['lasso__alpha'])
     # TODO
     for gs in skl_gs.grid_scores_:
         pass # assert(gs.)

Esempio n. 5

Mostra file

 def test_cv_linreg(self):
     pipeline = SKL_Pipeline([('lasso', SKL_Lasso(max_iter=1))])
     parameters = {'lasso__alpha': (0.001, 0.005, 0.01)}
     grid_search = GridSearchCV(self.sc, pipeline, parameters)
     X = scipy.sparse.vstack(
         map(lambda x: self.list2csr([x, x + 1.0]), range(0, 100)))
     y = np.array(list(range(0, 100))).reshape((100, 1))
     skl_gs = grid_search.fit(X, y)
     assert len(skl_gs.cv_results_['params']) == len(
         parameters['lasso__alpha'])

Esempio n. 6

Mostra file

# data 표준화
scaler = StandardScaler()
scaler.fit(X_train)
StandardScaler(copy=True, with_mean=True, with_std=True)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)


from pyspark.sql import SparkSession
# spark context
spark = SparkSession.builder.appName("Regression_worker_2").getOrCreate()
sc = spark.sparkContext

# model 초기화
MLP_model = GridSearchCV(sc, MLPRegressor(alpha=0.005, random_state=42), {'hidden_layer_sizes':[[512, 4], [256, 4]], 'max_iter':[5000]})

#linear_model.fit(X_train, y_train)
MLP_model.fit(X_train, y_train)
#RandomForest_model.fit(X_train, y_train)
#GradientBoosting_model.fit(X_train, y_train)
    
# print scores
models = [MLP_model]

with open('./model_scores_worker_2.txt', 'w') as f:
    for m in models:
        f.write('Training Set Mean Squared Error: {:.2f}\n'.format(mean_squared_error(y_train, m.predict(X_train))))
        f.write('Training Set R^2: {:.2f}\n'.format(r2_score(y_train, m.predict(X_train))))

        f.write('Testing Set Mean Squared Error: {:.2f}\n'.format(mean_squared_error(y_test, m.predict(X_test))))

Esempio n. 7

Mostra file

File: submit_python_compare_models.py Progetto: StarJins/python_regression

X_train, X_test, y_train, y_test = train_test_split(X, y)

# data 표준화
scaler = StandardScaler()
scaler.fit(X_train)
StandardScaler(copy=True, with_mean=True, with_std=True)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

from pyspark.sql import SparkSession
# spark context
spark = SparkSession.builder.appName("Regression_compare_models").getOrCreate()
sc = spark.sparkContext

# model 초기화
linear_model = GridSearchCV(sc, LinearRegression(), {})
MLP_model = GridSearchCV(
    sc,
    MLPRegressor(hidden_layer_sizes=[512, 4],
                 max_iter=5000,
                 alpha=0.005,
                 random_state=42), {})
RandomForest_model = GridSearchCV(
    sc, RandomForestRegressor(n_estimators=100, random_state=0), {})
GradientBoosting_model = GridSearchCV(
    sc,
    GradientBoostingRegressor(n_estimators=100, max_depth=10, criterion='mse'),
    {})

linear_model.fit(X_train, y_train)
MLP_model.fit(X_train, y_train)

Esempio n. 8

Mostra file

digits = datasets.load_digits()
X, y = digits.data, digits.target

sc = createLocalSparkSession().sparkContext
param_grid = {
    "max_depth": [3, None],
    "max_features": [1, 3, 10],
    "min_samples_split": [0.1, 0.2, 0.3],
    "min_samples_leaf": [1, 3, 10],
    "bootstrap": [True, False],
    "criterion": ["gini", "entropy"],
    "n_estimators": [10, 20, 40, 80]
}

gs = GridSearchCV(sc, RandomForestClassifier(), param_grid=param_grid)
gs.fit(X, y)

# 获取最佳参数
best_params_ = None
best_score_ = 0
params = gs.cv_results_['params']
mean_train_score = gs.cv_results_['mean_train_score']
for i, score in enumerate(mean_train_score):
    if i == 0:
        best_score_ = score
        best_params_ = params[i]
    if score > best_score_:
        best_score_ = score
        best_params_ = params[i]
print(best_params_)