# 'etc__n_estimators': [20, 200], # 'voting': 'hard', # If we already have the optimal config or decide not to # fine tune each classifier individually, then # probably try to use 'soft' voting method. However, to use soft # voting, the weights for each classifier need to setup # correctly. Then find the right weight combination will be the # next target. 'voting': ('soft', ), 'weights': [[1, 1, 1], [1, 1, 2], [1, 2, 1], [2, 1, 1], [1, 2, 2], [2, 1, 2], [2, 2, 1], [1, 2, 3], [1, 3, 2], [2, 1, 3], [2, 3, 1], [3, 1, 2], [3, 2, 1]] } scorer = cbs.scorer(show=True) if __name__ == '__main__': # n_jobs = 1 due to limitied memory # refit = False for the sake of clarity gscv = GridSearchCV(vclf, parameters, scoring=scorer, cv=3, verbose=10, refit=False, n_jobs=1, return_train_score=False) gscv.fit(X, y) print(gscv.best_params_, gscv.best_score_) print(gscv.cv_results_)
selected_feat_names = pickle.load(f) print("data loaded") y = df["attack_type"].values X = df[selected_feat_names].values etc = ExtraTreesClassifier(n_jobs=-1) parameters_etc = { # 'criterion': ("gini", "entropy"), 'n_estimators': (5, 10, 20, 30, 40, 50), # 'max_features': ("sqrt", "log2"), # 'min_samples_split': (2, 3, 4), } scorer = cbs.scorer(True) if __name__ == '__main__': gscv = GridSearchCV(estimator=etc, param_grid=parameters_etc, scoring=scorer, verbose=10, refit=False, cv=3, n_jobs=1, return_train_score=False) gscv.fit(X, y) print(gscv.cv_results_) print(gscv.best_params_) print(gscv.best_score_) # 5 estimator, entropy, others no significance
df = pd.read_csv(r'data/train10pc', header=None, names=__ATTR_NAMES) # sparse feature merge df = processing.merge_sparse_feature(df) # one hot encoding df = processing.one_hot(df) # y labels mapping df = processing.map2major5(df) with open(r'data/selected_feat_names.pkl', 'rb') as f: selected_feat_names = pickle.load(f) print("training data loaded") y = df["attack_type"].values X = df[selected_feat_names].values ada = AdaBoostClassifier() parameters = { 'n_estimators': (50, 75, 100, 125, 150), 'learning_rate': (1, 1.5, 2), } scorer = cost_based_scoring.scorer(show=True) gscv = GridSearchCV(ada, parameters, scoring=scorer, cv=3, verbose=2, refit=False, n_jobs=1, return_train_score=False) gscv.fit(X, y) print("optimization params:", gscv.best_params_) print("grid search finished") #n_estimators=75, learning_rate=1.5