y = df["attack_type"].values X = df[selected_feat_names].values # TODO: put the best paras learn from grid search rfc = RandomForestClassifier(n_jobs=-1, n_estimators=35, criterion="entropy") ada = AdaBoostClassifier(n_estimators=75, learning_rate=1.5) etc = ExtraTreesClassifier(n_jobs=-1, criterion="entropy", n_estimators=5) eclf = VotingClassifier(estimators=[('ada', ada), ('rfc', rfc), ('etc', etc)], voting='soft', weights=[2, 1, 3],n_jobs=1) eclf.fit(X, y) print("training finished") df=pd.read_csv(r'data/corrected',header=None, names=__ATTR_NAMES) df = processing.merge_sparse_feature(df) # one hot encoding df = processing.one_hot(df) # y labels mapping df = processing.map2major5(df) with open(r'data/selected_feat_names.pkl', 'rb') as f: selected_feat_names = pickle.load(f) print("test data loaded") X = df[selected_feat_names].values y = df['attack_type'].values y_rf = eclf.predict(X) print("voting results:") cost_based_scoring.score(y, y_rf, True)
from scoring import cost_based_scoring as cbs with open('../data/test_df.pkl', 'rb') as f: df = pickle.load(f) with open(r'../data/selected_feat_names.pkl', 'rb') as f: selected_feat_names = pickle.load(f) X = df[selected_feat_names].values y = df['attack_type'].values # ground truth print("data loaded") # rf with open('../data/rf.pkl', 'rb') as f: rf = pickle.load(f) y_rf = rf.predict(X) print("rf results:") cbs.score(y, y_rf, True) # ada boost with open('../data/ada.pkl', 'rb') as f: ada = pickle.load(f) y_ada = ada.predict(X) print("ada results:") cbs.score(y, y_ada, True) # et with open('../data/et.pkl', 'rb') as f: et = pickle.load(f) y_et = et.predict(X) print("et results:") cbs.score(y, y_et, True)