コード例 #1
0
    # 'etc__n_estimators': [20, 200],
    # 'voting': 'hard',
    # If we already have the optimal config or decide not to
    # fine tune each classifier individually, then
    # probably try to use 'soft' voting method. However, to use soft
    # voting, the weights for each classifier need to setup
    # correctly. Then find the right weight combination will be the
    # next target.
    'voting': ('soft', ),
    'weights': [[1, 1, 1], [1, 1, 2], [1, 2, 1], [2, 1, 1], [1, 2,
                                                             2], [2, 1, 2],
                [2, 2, 1], [1, 2, 3], [1, 3, 2], [2, 1, 3], [2, 3, 1],
                [3, 1, 2], [3, 2, 1]]
}

scorer = cbs.scorer(show=True)

if __name__ == '__main__':
    # n_jobs = 1 due to limitied memory
    # refit = False for the sake of clarity
    gscv = GridSearchCV(vclf,
                        parameters,
                        scoring=scorer,
                        cv=3,
                        verbose=10,
                        refit=False,
                        n_jobs=1,
                        return_train_score=False)
    gscv.fit(X, y)
    print(gscv.best_params_, gscv.best_score_)
    print(gscv.cv_results_)
コード例 #2
0
ファイル: et_gs.py プロジェクト: shubhampachori12110095/kdd99
    selected_feat_names = pickle.load(f)
print("data loaded")

y = df["attack_type"].values
X = df[selected_feat_names].values

etc = ExtraTreesClassifier(n_jobs=-1)

parameters_etc = {
    # 'criterion': ("gini", "entropy"),
    'n_estimators': (5, 10, 20, 30, 40, 50),
    # 'max_features': ("sqrt", "log2"),
    # 'min_samples_split': (2, 3, 4),
}

scorer = cbs.scorer(True)

if __name__ == '__main__':
    gscv = GridSearchCV(estimator=etc,
                        param_grid=parameters_etc,
                        scoring=scorer,
                        verbose=10,
                        refit=False,
                        cv=3,
                        n_jobs=1,
                        return_train_score=False)
    gscv.fit(X, y)
    print(gscv.cv_results_)
    print(gscv.best_params_)
    print(gscv.best_score_)
    # 5 estimator, entropy, others no significance
コード例 #3
0
df = pd.read_csv(r'data/train10pc', header=None, names=__ATTR_NAMES)
# sparse feature merge
df = processing.merge_sparse_feature(df)
# one hot encoding
df = processing.one_hot(df)
# y labels mapping
df = processing.map2major5(df)
with open(r'data/selected_feat_names.pkl', 'rb') as f:
    selected_feat_names = pickle.load(f)
print("training data loaded")

y = df["attack_type"].values
X = df[selected_feat_names].values
ada = AdaBoostClassifier()
parameters = {
    'n_estimators': (50, 75, 100, 125, 150),
    'learning_rate': (1, 1.5, 2),
}
scorer = cost_based_scoring.scorer(show=True)
gscv = GridSearchCV(ada,
                    parameters,
                    scoring=scorer,
                    cv=3,
                    verbose=2,
                    refit=False,
                    n_jobs=1,
                    return_train_score=False)
gscv.fit(X, y)
print("optimization params:", gscv.best_params_)
print("grid search finished")
#n_estimators=75, learning_rate=1.5