y = df["attack_type"].values
X = df[selected_feat_names].values
# TODO: put the best paras learn from grid search
rfc = RandomForestClassifier(n_jobs=-1, n_estimators=35, criterion="entropy")
ada = AdaBoostClassifier(n_estimators=75, learning_rate=1.5)
etc = ExtraTreesClassifier(n_jobs=-1, criterion="entropy", n_estimators=5)
eclf = VotingClassifier(estimators=[('ada', ada), ('rfc', rfc), ('etc', etc)], voting='soft', weights=[2, 1, 3],n_jobs=1)
eclf.fit(X, y)
print("training finished")

df=pd.read_csv(r'data/corrected',header=None, names=__ATTR_NAMES)
df = processing.merge_sparse_feature(df)
# one hot encoding
df = processing.one_hot(df)
# y labels mapping
df = processing.map2major5(df)
with open(r'data/selected_feat_names.pkl', 'rb') as f:
    selected_feat_names = pickle.load(f)
print("test data loaded")

X = df[selected_feat_names].values
y = df['attack_type'].values
y_rf = eclf.predict(X)

print("voting results:")
cost_based_scoring.score(y, y_rf, True)




from scoring import cost_based_scoring as cbs

with open('../data/test_df.pkl', 'rb') as f:
    df = pickle.load(f)
with open(r'../data/selected_feat_names.pkl', 'rb') as f:
    selected_feat_names = pickle.load(f)
X = df[selected_feat_names].values
y = df['attack_type'].values  # ground truth
print("data loaded")

# rf
with open('../data/rf.pkl', 'rb') as f:
    rf = pickle.load(f)
y_rf = rf.predict(X)
print("rf results:")
cbs.score(y, y_rf, True)

# ada boost
with open('../data/ada.pkl', 'rb') as f:
    ada = pickle.load(f)
y_ada = ada.predict(X)
print("ada results:")
cbs.score(y, y_ada, True)

# et
with open('../data/et.pkl', 'rb') as f:
    et = pickle.load(f)
y_et = et.predict(X)
print("et results:")
cbs.score(y, y_et, True)