コード例 #1
0
ファイル: hyperopt_uci.py プロジェクト: yrahul3910/dl4se
def main():
    for dataset in glob.glob('../../../Dodge/data/UCI/*.csv'):
        df = pd.read_csv(dataset)
        target = df.columns[-1]
        sys.stdout = open(f'./hyperopt-log/{dataset.split("/")[-1]}.txt', 'w')
        try:
            print(f'Running {dataset}')
            print('=' * 20)
            data = DataLoader.from_file(dataset,
                                        target=target,
                                        col_start=0,
                                        col_stop=-1)

            a = time.time()
            estim = HyperoptEstimator(classifier=any_classifier('clf'),
                                      preprocessing=any_preprocessing('pre'),
                                      algo=tpe.suggest,
                                      max_evals=30,
                                      loss_fn=loss,
                                      trial_timeout=30)

            estim.fit(data.x_train, data.y_train)
            preds = estim.predict(data.x_test)
            metr = ClassificationMetrics(data.y_test, preds)
            metr.add_metrics(['d2h', 'pd', 'pf'])
            print('perf:', metr.get_metrics()[0])
            print(metr.get_metrics())
            print(estim.best_model())
            b = time.time()

            print('Completed in', b - a, 'seconds.')
        except:
            raise
            continue
コード例 #2
0
def applyHPSKLEARN(X_train, y_train, X_test, y_test, SavePath,
                  max_evals=100, trial_timeout=100, useSavedModels = True):

    if not useSavedModels or not os.path.isfile(SavePath+".pckl"):
        HPSKLEARNModel = HyperoptEstimator(regressor=any_regressor('reg'),
                                preprocessing=any_preprocessing('pre'),
                                loss_fn=mean_squared_error,
                                max_evals=max_evals,
                                trial_timeout=trial_timeout,
                                algo=tpe.suggest)
        # perform the search
        HPSKLEARNModel.fit(X_train, y_train)
        pickle.dump(HPSKLEARNModel, open(SavePath+".pckl", 'wb'))
    else:
        HPSKLEARNModel = pickle.load(open(SavePath+".pckl", 'rb'))

    # summarize performance
    score = HPSKLEARNModel.score(X_test, y_test)
    y_hat = HPSKLEARNModel.predict(X_test)
    print("HPSKLEARN - Score: ")
    print("MAE: %.4f" % score)
    # summarize the best model
    print(HPSKLEARNModel.best_model())
    
    return y_hat
コード例 #3
0
ファイル: hyperopt_smell.py プロジェクト: yrahul3910/dl4se
def main():
    for dataset in [
            'DataClass.csv', 'FeatureEnvy.csv', 'GodClass.csv',
            'LongMethod.csv'
    ]:
        sys.stdout = open(f'./hyperopt-log/{dataset}.txt', 'w')
        try:
            print(f'Running {dataset}')
            print('=' * 20)
            data = DataLoader.from_file(f'../../../Dodge/data/smell/{dataset}',
                                        target='SMELLS',
                                        col_start=0,
                                        col_stop=-1)

            a = time.time()
            estim = HyperoptEstimator(classifier=any_classifier('clf'),
                                      preprocessing=any_preprocessing('pre'),
                                      algo=tpe.suggest,
                                      max_evals=30,
                                      loss_fn=loss,
                                      trial_timeout=30)

            estim.fit(data.x_train, data.y_train)
            preds = estim.predict(data.x_test)
            metr = ClassificationMetrics(data.y_test, preds)
            metr.add_metrics(['d2h', 'pd', 'pf'])
            print('perf:', metr.get_metrics()[0])
            print(metr.get_metrics())
            print(estim.best_model())
            b = time.time()

            print('Completed in', b - a, 'seconds.')
        except:
            continue
コード例 #4
0
ファイル: auto_ml_core.py プロジェクト: sn0wfree/auto_ml
    def _create_estimator_random_classifier(classifier=any_classifier('my_clf'
                                                                      ),
                                            preprocessing=any_preprocessing(
                                                'my_pre'),
                                            max_evals=100,
                                            trial_timeout=120,
                                            seed=None,
                                            algo=tpe.suggest):
        """

        :param classifier:
        :param preprocessing:
        :param max_evals:
        :param trial_timeout:
        :param seed:
        :param algo:
        :return:
        """
        estim = HyperoptEstimator(classifier=classifier,
                                  preprocessing=preprocessing,
                                  algo=algo,
                                  max_evals=max_evals,
                                  trial_timeout=trial_timeout,
                                  ex_preprocs=None,
                                  regressor=None,
                                  space=None,
                                  loss_fn=None,
                                  continuous_loss_fn=False,
                                  verbose=False,
                                  fit_increment=1,
                                  fit_increment_dump_filename=None,
                                  seed=seed,
                                  use_partial_fit=False,
                                  refit=True)
        return estim
コード例 #5
0
ファイル: test_sample.py プロジェクト: sn0wfree/auto_ml
def test():
    iris = load_iris()

    X = iris.data
    y = iris.target

    test_size = int(0.2 * len(y))
    np.random.seed(13)
    indices = np.random.permutation(len(X))
    X_train = X[indices[:-test_size]]
    y_train = y[indices[:-test_size]]
    X_test = X[indices[-test_size:]]
    y_test = y[indices[-test_size:]]

    # Instantiate a HyperoptEstimator with the search space and number of evaluations

    estim = HyperoptEstimator(classifier=any_classifier('my_clf'),
                              preprocessing=any_preprocessing('my_pre'),
                              algo=tpe.suggest,
                              max_evals=100,
                              trial_timeout=120)

    # Search the hyperparameter space based on the data

    estim.fit(X_train, y_train)

    # Show the results

    print(estim.score(X_test, y_test))
    # 1.0

    print(estim.best_model())
コード例 #6
0
def train_hypsklearn(X_train, X_test, y_train, y_test, mtype,
                     common_name_model, problemtype, classes,
                     default_featurenames, transform_model, settings,
                     model_session):

    modelname = common_name_model + '.pickle'
    files = list()

    if mtype in [' classification', 'c']:

        estim = HyperoptEstimator(classifier=any_classifier('my_clf'),
                                  preprocessing=any_preprocessing('my_pre'),
                                  algo=tpe.suggest,
                                  max_evals=100,
                                  trial_timeout=120)

        # Search the hyperparameter space based on the data
        estim.fit(X_train, y_train)

    elif mtype in ['regression', 'r']:

        estim = HyperoptEstimator(classifier=any_regressor('my_clf'),
                                  preprocessing=any_preprocessing('my_pre'),
                                  algo=tpe.suggest,
                                  max_evals=100,
                                  trial_timeout=120)

        # Search the hyperparameter space based on the data

        estim.fit(X_train, y_train)

    # Show the results
    print(estim.score(X_test, y_test))
    print(estim.best_model())
    scores = estim.score(X_test, y_test)
    bestmodel = str(estim.best_model())

    print('saving classifier to disk')
    f = open(modelname, 'wb')
    pickle.dump(estim, f)
    f.close()

    files.append(modelname)
    modeldir = os.getcwd()

    return modelname, modeldir, files
コード例 #7
0
def bayesian_opt_pipeline():
    X, y = generate_dataset()

    estimator = HyperoptEstimator(
        classifier=svc("hyperopt_svc"),
        preprocessing=any_preprocessing("hyperopt_preprocess"),
        algo=tpe.suggest,
        max_evals=100,
        trial_timeout=120)
    start_time = time.time()
    estimator.fit(X, y)
    print(f"Time taken for fitting {time.time() - start_time} seconds")

    print("best model:")
    print(estimator.best_model())
コード例 #8
0
ファイル: 4.py プロジェクト: Mrklata/MachineLearning
 def hyper_bot(self):
     """
     print accuracy
     :return: None
     """
     model = HyperoptEstimator(
         classifier=any_classifier("cla"),
         preprocessing=any_preprocessing("pre"),
         algo=tpe.suggest,
         max_evals=20,
         trial_timeout=30,
     )
     model.fit(self.x_train, self.y_train)
     accuracy = model.score(self.x_test, self.x_train)
     print(f"Accuray: {accuracy}")
コード例 #9
0
ファイル: hyperopt_defect.py プロジェクト: yrahul3910/dl4se
def main():
    file_dic = {"ivy":     ["ivy-1.4.csv", "ivy-2.0.csv"],
                "lucene":  ["lucene-2.0.csv", "lucene-2.2.csv"],
                "lucene2": ["lucene-2.2.csv", "lucene-2.4.csv"],
                "poi":     ["poi-1.5.csv", "poi-2.5.csv"],
                "poi2": ["poi-2.5.csv", "poi-3.0.csv"],
                "synapse": ["synapse-1.0.csv", "synapse-1.1.csv"],
                "synapse2": ["synapse-1.1.csv", "synapse-1.2.csv"],
                "camel": ["camel-1.2.csv", "camel-1.4.csv"],
                "camel2": ["camel-1.4.csv", "camel-1.6.csv"],
                "xerces": ["xerces-1.2.csv", "xerces-1.3.csv"],
                "jedit": ["jedit-3.2.csv", "jedit-4.0.csv"],
                "jedit2": ["jedit-4.0.csv", "jedit-4.1.csv"],
                "log4j": ["log4j-1.0.csv", "log4j-1.1.csv"],
                "xalan": ["xalan-2.4.csv", "xalan-2.5.csv"]
                }

    for dataset in file_dic:
        sys.stdout = open(f'./hyperopt-log/{dat}.txt', 'w')
        print(f'Running {dat}')
        print('=' * 20)
        data = DataLoader.from_files(
            base_path='./issue_close_time/', files=file_dic[dataset])

        try:
            a = time.time()
            estim = HyperoptEstimator(classifier=any_classifier('clf'),
                                      preprocessing=any_preprocessing(
                'pre'),
                algo=tpe.suggest,
                max_evals=30,
                loss_fn=loss,
                trial_timeout=30)

            estim.fit(data.x_train, data.y_train)
            preds = estim.predict(data.x_test)
            metr = ClassificationMetrics(data.y_test, preds)
            metr.add_metrics(['d2h', 'pd', 'pf'])
            print(metr.get_metrics())
            print(estim.best_model())
            b = time.time()

            print('Completed in', b-a, 'seconds.')
            except:
                continue
コード例 #10
0
def main():

    directories = [
        "1 day", "7 days", "14 days", "30 days", "90 days", "180 days",
        "365 days"
    ]
    datasets = [
        "camel", "cloudstack", "cocoon", "hadoop", "deeplearning", "hive",
        "node", "ofbiz", "qpid"
    ]

    for dat in datasets:
        for time_ in directories:
            sys.stdout = open(f'./hyperopt-log/{dat}-{time_}.txt', 'w')
            print(f'Running {dat}-{time_}')
            print('=' * 30)
            data = DataLoader.from_file(
                "/Users/ryedida/PycharmProjects/raise-package/issue_close_time/"
                + time_ + "/" + dat + ".csv",
                target="timeOpen",
                col_start=0)

            try:
                a = time.time()
                estim = HyperoptEstimator(
                    classifier=any_classifier('clf'),
                    preprocessing=any_preprocessing('pre'),
                    algo=tpe.suggest,
                    max_evals=30,
                    loss_fn=partial(loss, dat, time_),
                    trial_timeout=30)

                estim.fit(data.x_train, data.y_train)
                preds = estim.predict(data.x_test)
                metr = ClassificationMetrics(data.y_test, preds)
                metr.add_metrics(['d2h', 'pd', 'pf'])
                print(metr.get_metrics())
                print(estim.best_model())
                b = time.time()

                print('Completed in', b - a, 'seconds.')
            except ValueError:
                continue
            except:
                continue
コード例 #11
0
# define dataset
X, y = make_classification(n_samples=100,
                           n_features=10,
                           n_informative=5,
                           n_redundant=5,
                           random_state=1)

# split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.33,
                                                    random_state=1)

# define search
model = HyperoptEstimator(classifier=any_classifier("cla"),
                          preprocessing=any_preprocessing("pre"),
                          algo=tpe.suggest,
                          max_evals=50,
                          trial_timeout=30)

# perform the search
model.fit(X_train, y_train)

# summarize performance
accuracy = model.score(X_test, y_test)
print(f"Accuracy: {accuracy}")

# summarize the best model
print(model.best_model)

# the problem is that hyperopt sklearn is not advancing
コード例 #12
0
def build_model(dataset, pipeline, experiment, param_grid=None, cv=5, scoring='accuracy', n_jobs='auto', test_size=0.3, use_target=None, expanding_window=False):
    models_dir = './results/{}_{}_{}/models/'.format(dataset, pipeline, experiment)
    reports_dir = './results/{}_{}_{}/reports/'.format(dataset, pipeline, experiment)
    experiment_index_file = './results/{}_{}_{}/index.json'.format(dataset, pipeline, experiment)
    log_file = './results/{}_{}_{}/model_build.log'.format(dataset, pipeline, experiment)
    if ',' in scoring:
        scoring = scoring.split(',')
    # if scoring is precision, make scorer manually to suppress zero_division warnings in case of heavy bias
    if scoring == 'precision':
        scoring = make_scorer(precision_score, zero_division=1)
    os.makedirs(models_dir, exist_ok=True)
    os.makedirs(reports_dir, exist_ok=True)
    # Setup logging
    logger.setup(
        filename=log_file,
        filemode='w',
        root_level=logging.DEBUG,
        log_level=logging.DEBUG,
        logger='build_model'
    )
    index_name = 'index'
    if '.' in dataset:
        splits = dataset.split(".")
        dataset = splits[0]
        index_name = splits[1]
    # Load the dataset index
    dataset_index = load_dataset(dataset, return_index=True, index_name=index_name)
    # Dynamically import the pipeline we want to use for building the model
    p = importlib.import_module('pipelines.' + pipeline)
    experiment_index = {}

    if n_jobs == 'auto':
        n_jobs = os.cpu_count()
    # Load parameter grid argument
    if param_grid == None:
        param_grid = p.PARAMETER_GRID
    elif type(param_grid) is 'str':
        with open(param_grid, 'r') as f:
            param_grid = json.load(f)

    logger.info('Start experiment: {} using {} on {}'.format(experiment, pipeline, dataset))
    for _sym, data in dataset_index.items():
        logger.info('Start processing: {}'.format(_sym))
        features = pd.read_csv(data['csv'], sep=',', encoding='utf-8', index_col='Date', parse_dates=True)
        targets = pd.read_csv(data['target_csv'], sep=',', encoding='utf-8', index_col='Date', parse_dates=True)
        current_target = p.TARGET if not use_target else use_target

        # Drop columns whose values are all NaN, as well as rows with ANY nan value, then
        # replace infinity values with nan so that they can later be imputed to a finite value
        features = features.dropna(axis='columns', how='all').dropna().replace([np.inf, -np.inf], np.nan)
        target = targets.loc[features.index][current_target]

        features = features.replace([np.inf, -np.inf], np.nan)
        imputer = SimpleImputer()
        imputer.fit(features.values)
        feat_imp_values = imputer.transform(features.values)
        features = pd.DataFrame(feat_imp_values, index=features.index, columns=features.columns)
        X_train, X_test, y_train, y_test = train_test_split(features.values, target.values, shuffle=False, test_size=test_size)
        # Summarize distribution
        logger.info("Start Hyperopt search")
        if expanding_window:
            cv = TimeSeriesSplit(n_splits=expanding_window)
        #cv = sliding_window_split(X_train, 0.1)
        est = HyperoptEstimator(classifier=any_classifier('my_clf'),
                          preprocessing=any_preprocessing('my_pre'),
                          algo=tpe.suggest,
                          max_evals=100,
                          trial_timeout=120)
        est.fit(X_train, y_train)
        logger.info("End Hyperopt search")

        # Take the fitted ensemble with tuned hyperparameters
        clf = est.best_model()['learner']
        best_score = est.score(X_train, y_train)
        best_params = {}

        # Plot learning curve for the classifier
        #est = p.estimator
        #est.set_params(**best_params)

        _, axes = plt.subplots(3, 3, figsize=(20, 12), dpi=200, constrained_layout=True)
        #plt.tight_layout()
        _train_ax = [ axes[0][0], axes[0][1], axes[0][2] ]
        #plot_learning_curve(est, "{} - Learning curves (Train)".format(_sym), X_train, y_train, axes=_train_ax, cv=cv)

        axes[1][0].set_title("{} - ROC (Train)".format(_sym))
        plot_roc_curve(clf, X_train, y_train, ax=axes[1][0])
        axes[1][1].set_title("{} - Precision/Recall (Train)".format(_sym))
        plot_precision_recall_curve(clf, X_train, y_train, ax=axes[1][1])
        axes[1][2].set_title("{} - Confusion matrix (Train)".format(_sym))
        plot_confusion_matrix(clf, X_train, y_train, cmap='Blues', ax=axes[1][2])

        axes[2][0].set_title("{} - ROC (Test)".format(_sym))
        plot_roc_curve(clf, X_test, y_test, ax=axes[2][0])
        axes[2][1].set_title("{} - Precision/Recall (Test)".format(_sym))
        plot_precision_recall_curve(clf, X_train, y_train, ax=axes[2][1])
        axes[2][2].set_title("{} - Confusion matrix (Test)".format(_sym))
        plot_confusion_matrix(clf, X_test, y_test, cmap='Oranges', ax=axes[2][2])

        curve_path = '{}{}_learning_curve.png'.format(reports_dir, _sym)
        plt.savefig(curve_path)
        plt.close()

        # Test ensemble's performance on training and test sets
        predictions1 = clf.predict(X_train)
        train_report = classification_report(y_train, predictions1, output_dict=True)
        logger.info("Classification report on train set:\n{}".format(classification_report(y_train, predictions1)))
        predictions2 = clf.predict(X_test)
        test_report = classification_report(y_test, predictions2, output_dict=True)
        logger.info("Classification report on test set\n{}".format(classification_report(y_test, predictions2)))

        report = {
            'training_set': {
                'features':X_train.shape[1],
                'records':X_train.shape[0],
                'class_distribution': get_class_distribution(y_train),
                'classification_report': train_report,
                'accuracy': accuracy_score(y_train, predictions1),
                'mse': mean_squared_error(y_train, predictions1),
                'precision': precision_score(y_train, predictions1),
                'recall': recall_score(y_train, predictions1),
                'f1': f1_score(y_train, predictions1),
                'y_true':[y for y in y_train],
                'y_pred':[y for y in predictions1]
            },
            'test_set': {
                'features':X_test.shape[1],
                'records':X_test.shape[0],
                'class_distribution':get_class_distribution(y_test),
                'classification_report': test_report,
                'accuracy': accuracy_score(y_test, predictions2),
                'precision': precision_score(y_test, predictions2),
                'mse': mean_squared_error(y_test, predictions2),
                'recall': recall_score(y_test, predictions2),
                'f1': f1_score(y_test, predictions2),
                'y_true': [y for y in y_test],
                'y_pred': [y for y in predictions2]
            }
        }
        # If the classifier has a feature_importances attribute, save it in the report
        feature_importances = None
        if hasattr(clf, 'feature_importances_'):
            feature_importances = clf.feature_importances_
        elif hasattr(clf, 'named_steps') and hasattr(clf.named_steps, 'c') and hasattr(clf.named_steps.c, 'feature_importances_'):
            feature_importances = clf.named_steps.c.feature_importances_
        if feature_importances is not None:
            importances = {features.columns[i]: v for i, v in enumerate(feature_importances)}
            labeled = {str(k): float(v) for k, v in sorted(importances.items(), key=lambda item: -item[1])}
            report['feature_importances'] = labeled
        if hasattr(clf, 'ranking_'):
            report['feature_rank'] = {features.columns[i]: s for i, s in enumerate(clf.ranking_)}
        if hasattr(clf, 'support_'):
            report['feature_support'] = [features.columns[i] for i, s in enumerate(clf.support_) if s]
        train_dist = ['\t\tClass {}:\t{}\t({}%%)'.format(k, d['count'], d['pct']) for k, d in get_class_distribution(y_train).items()]
        test_dist = ['\t\tClass {}:\t{}\t({}%%)'.format(k, d['count'], d['pct']) for k, d in get_class_distribution(y_test).items()]

        logger.info('Model evaluation: \n'
              '== Training set ==\n'
              '\t # Features: {} | # Records: {}\n '
              '\tClass distribution:\n{}\n'
              '\tAccuracy: {}\n'
              '\tPrecision: {}\n'
              '\tMSE: {}\n' \
              '\tRecall: {}\n' \
              '\tF1: {}\n' \
              '== Test set ==\n'
              '\t # Features: {} | # Records: {}\n '
              '\tClass distribution:\n{}\n'
              '\tAccuracy: {}\n'
              '\tPrecision: {}\n'
              '\tMSE: {}\n' \
              '\tRecall: {}\n' \
              '\tF1: {}\n' \
              .format(X_train.shape[1], X_train.shape[0], '\n'.join(train_dist),
                      report['training_set']['accuracy'], report['training_set']['precision'], report['training_set']['mse'],
                      report['training_set']['recall'], report['training_set']['f1'],
                      X_test.shape[1], X_test.shape[0], '\n'.join(test_dist),
                      report['test_set']['accuracy'], report['test_set']['precision'], report['test_set']['mse'],
                      report['test_set']['recall'], report['test_set']['f1']
                      )
        )

        # Save a pickle dump of the model
        model_path = '{}{}.p'.format(models_dir, _sym)
        with open(model_path, 'wb') as f:
            pickle.dump(clf, f)
        # Save the model's parameters
        params_path = '{}{}_parameters.json'.format(models_dir, _sym)
        with open(params_path, 'w') as f:
            json.dump(best_params, f, indent=4)
        # Save the report for this model
        report_path = '{}{}.json'.format(reports_dir, _sym)
        with open(report_path, 'w') as f:
            json.dump(report, f, indent=4)
        # Update the experiment's index with the new results, and save it
        experiment_index[_sym] = {
            'model':model_path,
            'params':params_path,
            'report':report_path
        }
        with open(experiment_index_file, 'w') as f:
            json.dump(experiment_index, f, indent=4)
        logger.info("--- {} end ---".format(_sym))
    return experiment_index
コード例 #13
0
y_test = y[indices[-test_size:]]

import pandas as pd
y_train_ohe = pd.get_dummies(y_train)

# Instantiate a HyperoptEstimator with the search space and number of evaluations
from sklearn.linear_model import LogisticRegression
model = LogisticRegression()
model.fit(X_train, y_train)

import lightgbm as lgb

model_lgb = lgb.LGBMClassifier()
model_lgb.fit(X_train, y_train)
estim = HyperoptEstimator(classifier=model_lgb,
                          preprocessing=any_preprocessing('standard_scaler'),
                          algo=tpe.suggest,
                          max_evals=100,
                          trial_timeout=120)

# Search the hyperparameter space based on the data

estim.fit(X_train, y_train, random_state=50)
# estim.fit(X_train, y_train_ohe)

# Show the results

print(estim.score(X_test, y_test))
# 1.0

print(estim.best_model())
コード例 #14
0
                                                        test_size=0.3,
                                                        random_state=42)
    print('Prepared data: X_train: %s  y_train: %s' %
          (X_train.shape, y_train.shape))
    print('Prepared data: X_test: %s  y_test: %s' %
          (X_test.shape, y_test.shape))
    # replace training dataset
    X = X_train
    y = y_train
    """ ESTIMATOR WITH BAYESIAN TUNING """

    from hpsklearn import HyperoptEstimator, any_regressor, any_preprocessing
    from hyperopt import tpe
    # Instantiate a HyperoptEstimator with the search space and number of evaluations
    clf = HyperoptEstimator(regressor=any_regressor('my_clf'),
                            preprocessing=any_preprocessing('my_pre'),
                            algo=tpe.suggest,
                            max_evals=250,
                            trial_timeout=300)

    clf.fit(X, y)
    print(clf.best_model())
    y_hat = clf.predict(X_test)
    dscores = metrics_regression(y_test, y_hat, X.shape[1])
    tf = t.since('test')
    print(
        '\nBayesian tuning -test:  bias = %.3f  mae = %.3f  r2 = %.3f (time: %s)'
        %
        (dscores['bias'], dscores['mae'], dscores['r2'], format_duration(tf)))
    # training
    y_hat = clf.predict(X)
コード例 #15
0
ファイル: ensembleTrain4.py プロジェクト: MPCSM/MPCSM
waferlabel=waferlabel['result']
wafer=pd.DataFrame(wafer,columns=['cnnResult','svmResult'])


# Download the data and split into training and test sets


X=wafer.values

y=waferlabel.values

X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=.25, random_state=1)

estim = HyperoptEstimator(
        preprocessing=any_preprocessing('pp'),
        classifier=any_classifier('clf'),
        algo=tpe.suggest,
        trial_timeout=200.0,  # seconds
        max_evals=10,
        seed=1
    )

estim.fit( X_train, y_train )
print(estim.score(X_test, y_test),estim.best_model() )
joblib.dump(estim,'ensemble4.m')

ensemble=joblib.load('ensemble4.m')

ensemble.predict(X_test)