예제 #1
0
    def tune_hyperparams_scikit(self, model, X_train, y_train):
        """
		Tuning parameters of model using scikit learn GridSearch
		"""
        print("Started CV task at " +
              datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"))

        # create gini scorer
        measurer = Measurement()
        gini_score = measurer.get_gini_scorer()

        params = [{
            "learning_rate": [0.01],
            "n_estimators": [100, 200],
            "seed": [100],
            "max_depth": [2],
            "min_child_weight": [1, 5],
            "subsample": [1]
        }]

        print("Running GridSearch")
        gscv = GridSearchCV(model,
                            params,
                            cv=2,
                            n_jobs=-1,
                            scoring=gini_score,
                            verbose=3,
                            error_score=0)
        gscv.fit(X_train, y_train)
        best_model = gscv.best_estimator_
        # save best model
        save_model(best_model, MODEL_DIR, prefix="CV5-bestModel-")
        # save CV results
        gscv_resutls = pd.DataFrame(gscv.cv_results_)
        gscv_resutls.to_csv("GridSearchRestest.csv", index=False)

        print("Finished CV task at " +
              datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"))