Python EasyEnsembleClassifier.predict 예제들, imblearn.ensemble.EasyEnsembleClassifier.predict Python 예제들

예제 #1

0

파일 보기

파일: test_easy_ensemble.py 프로젝트: wenqin2017/imbalanced-learn

def test_easy_ensemble_classifier(n_estimators, base_estimator):
    # Check classification for various parameter settings.
    X, y = make_imbalance(iris.data,
                          iris.target,
                          sampling_strategy={
                              0: 20,
                              1: 25,
                              2: 50
                          },
                          random_state=0)
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

    eec = EasyEnsembleClassifier(n_estimators=n_estimators,
                                 base_estimator=base_estimator,
                                 n_jobs=-1,
                                 random_state=RND_SEED)
    eec.fit(X_train, y_train).score(X_test, y_test)
    assert len(eec.estimators_) == n_estimators
    for est in eec.estimators_:
        assert (len(
            est.named_steps['classifier']) == base_estimator.n_estimators)
    # test the different prediction function
    eec.predict(X_test)
    eec.predict_proba(X_test)
    eec.predict_log_proba(X_test)
    eec.decision_function(X_test)

예제 #2

0

파일 보기

def test_warm_start_equal_n_estimators():
    # Test that nothing happens when fitting without increasing n_estimators
    X, y = make_hastie_10_2(n_samples=20, random_state=1)
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=43)

    clf = EasyEnsembleClassifier(n_estimators=5, warm_start=True, random_state=83)
    clf.fit(X_train, y_train)

    y_pred = clf.predict(X_test)
    # modify X to nonsense values, this should not change anything
    X_train += 1.0

    warn_msg = "Warm-start fitting without increasing n_estimators"
    with pytest.warns(UserWarning, match=warn_msg):
        clf.fit(X_train, y_train)
    assert_array_equal(y_pred, clf.predict(X_test))

예제 #3

0

파일 보기

파일: test_easy_ensemble.py 프로젝트: chkoar/imbalanced-learn

def test_warm_start_equal_n_estimators():
    # Test that nothing happens when fitting without increasing n_estimators
    X, y = make_hastie_10_2(n_samples=20, random_state=1)
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=43)

    clf = EasyEnsembleClassifier(
        n_estimators=5, warm_start=True, random_state=83)
    clf.fit(X_train, y_train)

    y_pred = clf.predict(X_test)
    # modify X to nonsense values, this should not change anything
    X_train += 1.

    warn_msg = "Warm-start fitting without increasing n_estimators"
    with pytest.warns(UserWarning, match=warn_msg):
        clf.fit(X_train, y_train)
    assert_array_equal(y_pred, clf.predict(X_test))

예제 #4

0

파일 보기

def test_warm_start_equivalence():
    # warm started classifier with 5+5 estimators should be equivalent to
    # one classifier with 10 estimators
    X, y = make_hastie_10_2(n_samples=20, random_state=1)
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=43)

    clf_ws = EasyEnsembleClassifier(n_estimators=5, warm_start=True, random_state=3141)
    clf_ws.fit(X_train, y_train)
    clf_ws.set_params(n_estimators=10)
    clf_ws.fit(X_train, y_train)
    y1 = clf_ws.predict(X_test)

    clf = EasyEnsembleClassifier(n_estimators=10, warm_start=False, random_state=3141)
    clf.fit(X_train, y_train)
    y2 = clf.predict(X_test)

    assert_allclose(y1, y2)

예제 #5

0

파일 보기

파일: test_easy_ensemble.py 프로젝트: chkoar/imbalanced-learn

def test_warm_start_equivalence():
    # warm started classifier with 5+5 estimators should be equivalent to
    # one classifier with 10 estimators
    X, y = make_hastie_10_2(n_samples=20, random_state=1)
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=43)

    clf_ws = EasyEnsembleClassifier(
        n_estimators=5, warm_start=True, random_state=3141)
    clf_ws.fit(X_train, y_train)
    clf_ws.set_params(n_estimators=10)
    clf_ws.fit(X_train, y_train)
    y1 = clf_ws.predict(X_test)

    clf = EasyEnsembleClassifier(
        n_estimators=10, warm_start=False, random_state=3141)
    clf.fit(X_train, y_train)
    y2 = clf.predict(X_test)

    assert_allclose(y1, y2)

예제 #6

0

파일 보기

파일: test_easy_ensemble.py 프로젝트: chkoar/imbalanced-learn

def test_easy_ensemble_classifier(n_estimators, base_estimator):
    # Check classification for various parameter settings.
    X, y = make_imbalance(iris.data, iris.target,
                          sampling_strategy={0: 20, 1: 25, 2: 50},
                          random_state=0)
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

    eec = EasyEnsembleClassifier(n_estimators=n_estimators,
                                 base_estimator=base_estimator,
                                 n_jobs=-1,
                                 random_state=RND_SEED)
    eec.fit(X_train, y_train).score(X_test, y_test)
    assert len(eec.estimators_) == n_estimators
    for est in eec.estimators_:
        assert (len(est.named_steps['classifier']) ==
                base_estimator.n_estimators)
    # test the different prediction function
    eec.predict(X_test)
    eec.predict_proba(X_test)
    eec.predict_log_proba(X_test)
    eec.decision_function(X_test)

예제 #7

0

파일 보기

파일: test_easy_ensemble.py 프로젝트: chkoar/imbalanced-learn

def test_easy_ensemble_classifier_single_estimator():
    X, y = make_imbalance(iris.data, iris.target,
                          sampling_strategy={0: 20, 1: 25, 2: 50},
                          random_state=0)
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

    clf1 = EasyEnsembleClassifier(n_estimators=1, random_state=0).fit(
        X_train, y_train)
    clf2 = make_pipeline(RandomUnderSampler(random_state=0),
                         AdaBoostClassifier(random_state=0)).fit(
                             X_train, y_train)

    assert_array_equal(clf1.predict(X_test), clf2.predict(X_test))

예제 #8

0

파일 보기

파일: test_easy_ensemble.py 프로젝트: timyb/imbalanced-learn

def test_easy_ensemble_classifier_single_estimator():
    X, y = make_imbalance(iris.data, iris.target,
                          sampling_strategy={0: 20, 1: 25, 2: 50},
                          random_state=0)
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

    clf1 = EasyEnsembleClassifier(n_estimators=1, random_state=0).fit(
        X_train, y_train)
    clf2 = make_pipeline(RandomUnderSampler(random_state=0),
                         AdaBoostClassifier(random_state=0)).fit(
                             X_train, y_train)

    assert_array_equal(clf1.predict(X_test), clf2.predict(X_test))

예제 #9

0

파일 보기

파일: easy_ensemble.py 프로젝트: georgSquared/AdvancedML-Auth

def run(X_train, X_test, y_train, y_test):
    print("######################")
    print("Easy Ensemble")
    print("######################")
    print("\n")

    print('Original dataset shape %s' % Counter(y_train))

    # resample all classes but the majority class
    eec = EasyEnsembleClassifier(sampling_strategy='not majority',
                                 replacement=True,
                                 random_state=42,
                                 n_jobs=-1)
    eec.fit(X_train, y_train)
    y_pred = eec.predict(X_test)
    y_proba = eec.predict_proba(X_test)

    return y_test, y_pred, y_proba

예제 #10

0

파일 보기

파일: BurdenDerek.py 프로젝트: BurdenDerek/Academic_Success_Predictions

    def easy_ensemble_classifier(df, drop, target):

        # split the table into features and outcomes
        x_cols = [i for i in df.columns if i not in drop]
        X = df[x_cols]
        y = df[target]

        # split features and outcomes into train and test data
        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            y,
                                                            random_state=1)
        eec = EasyEnsembleClassifier(n_estimators=100, random_state=0)
        eec.fit(X_train, y_train)
        y_predictions = eec.predict(X_test)

        # Calculating the accuracy score.
        acc_score = balanced_accuracy_score(y_test, y_predictions)

        return acc_score * 100

예제 #11

0

파일 보기

파일: IrregularDatasets.py 프로젝트: mblaszczyk97/Irregular-Datasets

def adaboost(X_train, y_train, X_test, y_test):
    base_estimator = AdaBoostClassifier(n_estimators=10)
    eec = EasyEnsembleClassifier(n_estimators=10, base_estimator=base_estimator, n_jobs=-1)
    eec.fit(X_train, y_train.values.ravel())
    y_train_eec = eec.predict(X_test)
    cnf_matrix_tra = confusion_matrix(y_test, y_train_eec)
    without=100*cnf_matrix_tra[1,1]/(cnf_matrix_tra[1,0]+cnf_matrix_tra[1,1])
    print("Adaboost (boosting): {}%".format(without))
    print(cnf_matrix_tra[0,0],cnf_matrix_tra[1,1])

    objects = ('Boosting', '-')
    y_pos = np.arange(len(objects))
    performance = [without, 0]
    plt.bar(y_pos, performance, align='center', alpha=0.5)
    plt.xticks(y_pos, objects)
    plt.ylabel('Procent dokładności')
    plt.title('Dokładność Adaboost z losowym undersamplingiem')
    plt.show()

    return without

예제 #12

0

파일 보기

    return train_auc_roc_curve


easy_lgbm = EasyEnsembleClassifier(
    base_estimator=LGBMClassifier(random_state=42),
    n_estimators=250,
    n_jobs=1,
    random_state=42,
    replacement=True,
    sampling_strategy='auto',
    verbose=0,
    warm_start=True)
easy_lgbm.fit(X_train_svm, y_train_svm)
evaluate(easy_lgbm, X_test_svm, y_test_svm)

print(classification_report(y_train_svm, easy_lgbm.predict(X_train_svm)))
print(confusion_matrix(y_train_svm, easy_lgbm.predict(X_train_svm)))
print('Recall Score = ',
      recall_score(y_train_svm, easy_lgbm.predict(X_train_svm)))
print('Precision Score = ',
      precision_score(y_train_svm, easy_lgbm.predict(X_train_svm)))

print(f1_score(y_train_svm, easy_lgbm.predict(X_train_svm)))
print(f1_score(y_test_svm, easy_lgbm.predict(X_test_svm)))

eli5_permutation = PermutationImportance(estimator=easy_lgbm,
                                         scoring='f1',
                                         random_state=42,
                                         n_iter=5)
eli5_permutation.fit(X_test_svm, y_test_svm)
eli5_permutation.feature_importances_.T.reshape(-1, 1)

예제 #13

0

파일 보기

파일: oversampling.py 프로젝트: TudorAndrei/eda_project

                                                    test_size=0.2,
                                                    shuffle=True,
                                                    stratify=y)

models = {
    'xgb': xgb.XGBClassifier(use_label_encoder=False, verbosity=0, n_jobs=-1),
    'sklearn-gbc': GradientBoostingClassifier()
}

for key in models.keys():
    print(key)
    estimator = models[key]
    eec = EasyEnsembleClassifier(random_state=42,
                                 sampling_strategy=0.5,
                                 base_estimator=estimator)

    eec.fit(X_train, y_train)

    y_hat = eec.predict(X_test)
    y_hat_train = eec.predict(X_train)

    print("Training classification")
    print(classification_report_imbalanced(y_hat_train, y_train))
    print("Testing classification")
    print(classification_report_imbalanced(y_hat, y_test))

features = pd.Series(model.feature_importances_,
                     index=index).sort_values(ascending=False)

print(features)

예제 #14

0

파일 보기

                      ax=ax[1],
                      title='Balanced random forest')

###############################################################################
# Boosting classifier
###############################################################################
# In the same manner, easy ensemble classifier is a bag of balanced AdaBoost
# classifier. However, it will be slower to train than random forest and will
# achieve worse performance.

base_estimator = AdaBoostClassifier(n_estimators=10)
eec = EasyEnsembleClassifier(n_estimators=10,
                             base_estimator=base_estimator,
                             n_jobs=-1)
eec.fit(X_train, y_train)
y_pred_eec = eec.predict(X_test)
print('Easy ensemble classifier performance:')
print('Balanced accuracy: {:.2f} - Geometric mean {:.2f}'.format(
    balanced_accuracy_score(y_test, y_pred_eec),
    geometric_mean_score(y_test, y_pred_eec)))
cm_eec = confusion_matrix(y_test, y_pred_eec)
fig, ax = plt.subplots(ncols=2)
plot_confusion_matrix(cm_eec,
                      classes=np.unique(satimage.target),
                      ax=ax[0],
                      title='Easy ensemble classifier')

rusboost = RUSBoostClassifier(n_estimators=10, base_estimator=base_estimator)
rusboost.fit(X_train, y_train)
y_pred_rusboost = rusboost.predict(X_test)
print('RUSBoost classifier performance:')

예제 #15

0

파일 보기

파일: model_imbalanced.py 프로젝트: wallinjoshua/CY6720_Final_Project

    #smote_enn = EditedNearestNeighbours()
    #feature_train, class_train = smote_enn.fit_resample(feature_train, class_train)

    # Downsample the positive training examples
    combined_training_data = np.append(feature_train, class_train.reshape((len(class_train),-1)), axis=1)
    positive_samples = np.array([x for x in combined_training_data if x[28] == 1])
    negative_samples = np.array([x for x in combined_training_data if x[28] == 0])
    new_samples = resample(positive_samples, n_samples=int(math.ceil((1-downsampling_factor) * len(positive_samples))))
    combined_training_data = np.append(negative_samples, new_samples, axis=0)
    feature_train = combined_training_data[:, :-1]
    class_train = combined_training_data[:,-1]

    clf = EasyEnsembleClassifier()
    # clf = AdaBoostClassifier(n_estimators=1000)
    clf.fit(feature_train, class_train)
    preds_clf = clf.predict(feature_test)
    tn_clf, fp_clf, fn_clf, tp_clf = confusion_matrix(class_test, preds_clf).ravel()
    recall = tn_clf/(tn_clf+fp_clf)
    precision = tn_clf/(tn_clf+fn_clf)
    print("\tAdaboost Accuracy:")
    print("\t\tOverall:", accuracy_score(class_test, preds_clf))
    print("\t\tNegative Class:", tn_clf/(tn_clf+fp_clf))
    print("\t\tRecall:", recall)
    print("\t\tPrecision:", precision)
    print("\t\tF-Measure:", (2 * recall * precision)/(recall + precision))
    print("\t\tG-Mean:", math.sqrt((tp_clf/(tp_clf+fn_clf)) * (tn_clf/(tn_clf+fp_clf))))
    

    if(accuracy_score(class_test, preds_clf) > best_overall_accuracy and tn_clf/(tn_clf+fp_clf) > best_negative_accuracy):
        best_overall_accuracy = accuracy_score(class_test, preds_clf)
        best_negative_accuracy = tn_clf/(tn_clf+fp_clf)

예제 #16

0

파일 보기

classifier.fit(X_train_st, y_train_st)

# In[95]:

y_pred = classifier.predict(X_test)
print(confusion_matrix(y_test, y_pred))
print(accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

# ##Ensemble Techniques

# In[96]:

from imblearn.ensemble import EasyEnsembleClassifier

# In[97]:

easy = EasyEnsembleClassifier()
easy.fit(X_train, y_train)

# In[98]:

y_pred = easy.predict(X_test)

print('Confustion Matrix : \n\n', confusion_matrix(y_test, y_pred))
print('\n Accuracy Score : ', accuracy_score(y_test, y_pred))
print('\n Classification Report : \n \n',
      classification_report(y_test, y_pred))

# In[ ]:

예제 #17

0

파일 보기

파일: balance_sample_models.py 프로젝트: DeepSleepUCDenver/sleep_models

x_tr, y_tr, x_te, y_te, x_va, y_va = load_known_data()

model_name.append("Balanced Random Forest")
label_prop.append("No Propagation")
rfb = BalancedRandomForestClassifier(max_depth=2)
rfb.fit(x_tr, y_tr)
train_accuracy.append(rfb.score(x_tr, y_tr))
test_accuracy.append(rfb.score(x_te, y_te))
validation_accuracy.append(rfb.score(x_va, y_va))

model_name.append("Easy Ensemble")
label_prop.append("No Propagation")
clf = EasyEnsembleClassifier(random_state=0)
clf.fit(x_tr, y_tr)
clf.predict(x_tr)
train_accuracy.append(clf.score(x_tr, y_tr))
test_accuracy.append(clf.score(x_te, y_te))
validation_accuracy.append(clf.score(x_va, y_va))

#
#
# Propagation labels
#
#

x_tr, y_tr, x_te, y_te, x_va, y_va = load_all_data()

model_name.append("Balanced Random Forest")
label_prop.append("Label Propagation")
rfb = BalancedRandomForestClassifier(max_depth=2)

예제 #18

0

파일 보기

파일: plot_comparison_ensemble_classifier.py 프로젝트: chkoar/imbalanced-learn

plot_confusion_matrix(cm_brf, classes=np.unique(satimage.target), ax=ax[1],
                      title='Balanced random forest')

###############################################################################
# Boosting classifier
###############################################################################
# In the same manner, easy ensemble classifier is a bag of balanced AdaBoost
# classifier. However, it will be slower to train than random forest and will
# achieve worse performance.

base_estimator = AdaBoostClassifier(n_estimators=10)
eec = EasyEnsembleClassifier(n_estimators=10,
                             base_estimator=base_estimator,
                             n_jobs=-1)
eec.fit(X_train, y_train)
y_pred_eec = eec.predict(X_test)
print('Easy ensemble classifier performance:')
print('Balanced accuracy: {:.2f} - Geometric mean {:.2f}'
      .format(balanced_accuracy_score(y_test, y_pred_eec),
              geometric_mean_score(y_test, y_pred_eec)))
cm_eec = confusion_matrix(y_test, y_pred_eec)
fig, ax = plt.subplots(ncols=2)
plot_confusion_matrix(cm_eec, classes=np.unique(satimage.target), ax=ax[0],
                      title='Easy ensemble classifier')

rusboost = RUSBoostClassifier(n_estimators=10,
                              base_estimator=base_estimator)
rusboost.fit(X_train, y_train)
y_pred_rusboost = rusboost.predict(X_test)
print('RUSBoost classifier performance:')
print('Balanced accuracy: {:.2f} - Geometric mean {:.2f}'

예제 #19

0

파일 보기

파일: ensemble.py 프로젝트: dlagez/classification

                           random_state=0)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

eec = EasyEnsembleClassifier(random_state=0)
train_data = pd.read_csv(
    '/data/file/classification_data/2012-2019/data_sum/2015/train/train_data.csv',
    index_col=0)
train_label = pd.read_csv(
    '/data/file/classification_data/2012-2019/data_sum/2015/train/train_label.csv',
    index_col=0)

test_data = pd.read_csv(
    '/data/file/classification_data/2012-2019/data_sum/2015/train/test_data.csv',
    index_col=0)
test_label = pd.read_csv(
    '/data/file/classification_data/2012-2019/data_sum/2015/train/test_label.csv',
    index_col=0)
# 将pandas的DataFrame格式转换成array格式
train_data.values
train_label.values

test_data.values.shape  # (520, 448)
test_label = test_label.values
test_label.reshape(-1)
test_label.shape
eec.fit(train_data.values, train_label.values)

test_pred = eec.predict(test_data.values)
test_pred.shape
balanced_accuracy_score(test_label, test_pred)

예제 #20

0

파일 보기

class Model_Finder:
    """
               Tthis is to find the best model

               """
    def __init__(self):
        self.file_object = open("../logs/modeltune/log.txt", 'a+')
        self.saved_best_model_path = '../saved_model/best_model.sav'
        self.logger = App_Logger()
        self.transformed_data = dataTransform()
        self.df = self.transformed_data.trainingData()
        self.data = self.df.iloc[:, :-1]
        self.label = self.df.iloc[:, -1]
        self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(
            self.data,
            self.label,
            test_size=0.2,
            random_state=0,
            stratify=self.label)
        self.BRF = BalancedRandomForestClassifier(n_jobs=-1)
        self.EEC = EasyEnsembleClassifier(n_jobs=-1)

    def f2_make(self, y_true, y_pred):
        return fbeta_score(y_true, y_pred, beta=2)

    def get_best_params_for_balanced_random_forest(self, X_train, y_train):
        self.logger.log(
            self.file_object,
            'Entered the get_best_params_for_balanced_random_forest method of the Model_Finder class'
        )
        #def f2_make(y_true, y_pred):
        #return fbeta_score(y_true, y_pred, beta=2)

        print('in RF')
        f2 = make_scorer(self.f2_make)
        try:
            # Number of trees in random forest
            n_estimators = [80, 100, 130, 160]
            criterion = ['gini', 'entropy']
            # Number of features to consider at every split
            max_features = ['log2', 'sqrt']
            # Maximum number of levels in tree
            max_depth = [5, 8, 10, 15]
            max_depth.append(None)
            # Minimum number of samples required to split a node
            min_samples_split = [2, 5, 8]
            # Minimum number of samples required at each leaf node
            min_samples_leaf = [2, 4]
            # Method of selecting samples for training each tree
            bootstrap = [True, False]
            replacement = [True, False]
            class_weight = ['balanced', None]

            # Create the random grid
            self.param_grid = {
                'brf__n_estimators': n_estimators,
                'brf__criterion': criterion,
                'brf__max_features': max_features,
                'brf__max_depth': max_depth,
                'brf__min_samples_split': min_samples_split,
                'brf__min_samples_leaf': min_samples_leaf,
                'brf__bootstrap': bootstrap,
                'brf__replacement': replacement,
                'brf__class_weight': class_weight
            }
            self.estimators = []
            #estimators.append(('standardize', StandardScaler()))
            self.estimators.append(('brf', self.BRF))
            self.pipeline_imlearn = Pipeline(self.estimators)
            self.brf_random = RandomizedSearchCV(
                estimator=self.pipeline_imlearn,
                param_distributions=self.param_grid,
                n_iter=80,
                cv=5,
                verbose=0,
                random_state=42,
                scoring=f2,
                n_jobs=-1)
            self.brf_random.fit(X_train, y_train)
            self.n_estimators = self.brf_random.best_params_[
                'brf__n_estimators']
            self.criterion = self.brf_random.best_params_['brf__criterion']
            self.max_features = self.brf_random.best_params_[
                'brf__max_features']
            self.max_depth = self.brf_random.best_params_['brf__max_depth']
            self.min_samples_split = self.brf_random.best_params_[
                'brf__min_samples_split']
            self.min_samples_leaf = self.brf_random.best_params_[
                'brf__min_samples_leaf']
            self.bootstrap = self.brf_random.best_params_['brf__bootstrap']
            self.replacement = self.brf_random.best_params_['brf__replacement']
            self.class_weight = self.brf_random.best_params_[
                'brf__class_weight']

            self.brf = BalancedRandomForestClassifier(
                n_estimators=self.n_estimators,
                criterion=self.criterion,
                max_features=self.max_features,
                max_depth=self.max_depth,
                min_samples_split=self.min_samples_split,
                min_samples_leaf=self.min_samples_leaf,
                bootstrap=self.bootstrap,
                replacement=self.replacement,
                class_weight=self.class_weight)
            self.brf.fit(X_train, y_train)
            self.logger.log(
                self.file_object, 'Balanced Random Forest best params: ' +
                str(self.brf_random.best_params_) + '\t' +
                str(self.brf_random.best_score_) +
                '. Exited the get_best_params_for_random_forest method of the Model_Finder class'
            )
            print('RF done and exited')
            return self.brf
        except Exception as e:
            self.logger.log(
                self.file_object,
                'Exception occured in get_best_params_for_balanced_random_forest method of the Model_Finder class. Exception message:  '
                + str(e))
            self.logger.log(
                self.file_object,
                'Balance Random Forest Parameter tuning  failed. Exited the get_best_params_for_balanced_random_forest method of the Model_Finder class'
            )
            raise Exception()

    def get_best_params_for_balanced_adaBoost(self, X_train, y_train):
        self.logger.log(
            self.file_object,
            'Entered the get_best_params_for_balanced_adaBoost method of the Model_Finder class'
        )

        print('enter ada boost')
        f2 = make_scorer(self.f2_make)
        try:
            n_estimators = [10, 15, 20, 25]
            warm_start = [True, False]
            sampling_strategy = ['auto', 'majority']
            replacement = [True, False]

            # Create the random grid
            self.param_grid = {
                'eec__n_estimators': n_estimators,
                'eec__warm_start': warm_start,
                'eec__sampling_strategy': sampling_strategy,
                'eec__replacement': replacement
            }

            self.estimators = []
            #estimators.append(('standardize', StandardScaler()))
            self.estimators.append(('eec', self.EEC))
            self.pipeline_imlearn = Pipeline(self.estimators)
            self.eec_random = RandomizedSearchCV(
                estimator=self.pipeline_imlearn,
                param_distributions=self.param_grid,
                n_iter=32,
                cv=5,
                verbose=0,
                random_state=42,
                scoring=f2,
                n_jobs=-1)
            self.eec_random.fit(X_train, y_train)
            self.n_estimators = self.eec_random.best_params_[
                'eec__n_estimators']
            self.warm_start = self.eec_random.best_params_['eec__warm_start']
            self.sampling_strategy = self.eec_random.best_params_[
                'eec__sampling_strategy']
            self.replacement = self.eec_random.best_params_['eec__replacement']

            self.eec = EasyEnsembleClassifier(
                n_estimators=self.n_estimators,
                warm_start=self.warm_start,
                sampling_strategy=self.sampling_strategy,
                replacement=self.replacement)
            self.eec.fit(X_train, y_train)
            self.logger.log(
                self.file_object, 'Balanced Ada Boost params: ' +
                str(self.eec_random.best_params_) + '\t' +
                str(self.eec_random.best_score_) +
                '. Exited the get_best_params_for_AdaBoost method of the Model_Finder class'
            )
            print('aba boost done and exited')
            return self.eec
        except Exception as e:
            self.logger.log(
                self.file_object,
                'Exception occured in get_best_params_for_balanced_adaBoost method of the Model_Finder class. Exception message:  '
                + str(e))
            self.logger.log(
                self.file_object,
                'Balance Ada Boost tuning  failed. Exited the get_best_params_for_balanced_AdaBoost method of the Model_Finder class'
            )
            raise Exception()

    def get_best_model(self, X_train, X_test, y_train, y_test):

        self.logger.log(
            self.file_object,
            'Entered the get_best_model method of the Model_Finder class')

        print('in get best model')
        try:

            self.brf = self.get_best_params_for_balanced_random_forest(
                X_train, y_train)
            self.y_pred_brf = self.brf.predict(X_test)
            self.brf_f2 = self.f2_make(y_test, self.y_pred_brf)

            self.eec = self.get_best_params_for_balanced_adaBoost(
                X_train, y_train)
            self.y_pred_eec = self.eec.predict(X_test)
            self.eec_f2 = self.f2_make(y_test, self.y_pred_eec)

            #comparing the two models
            if (self.brf_f2 > self.eec_f2):
                print('best model exited')
                joblib.dump(self.brf, self.saved_best_model_path)
                return 'BalancedRandomForestClassifier', self.brf
            else:
                print('best model exited')
                joblib.dump(self.eec, self.saved_best_model_path)
                return 'EasyEnsembleClassifier', self.eec

        except Exception as e:
            self.logger.log(
                self.file_object,
                'Exception occured in get_best_model method of the Model_Finder class. Exception message:  '
                + str(e))
            self.logger.log(
                self.file_object,
                'Model Selection Failed. Exited the get_best_model method of the Model_Finder class'
            )
            raise Exception()