def do_xgboost_blance_sample(X, Y): """ Args: X_train:DataFrame, shape(x_index*(1-test_size), feature_number(feature_map)), the features for training X_test: DataFrame, shape(x_index*test_size, feature_number(feature_map)), the features for testing Y_train: list, the labels for training Y_test: list, the labels for testing Returns: Y_pred: list, the Predicted value of the model """ logging.info('训练结果输出') X_train, X_vld, Y_train, Y_vld = train_test_split(X, Y, test_size=0.2, random_state=1) xgb_model = xgb.XGBClassifier().fit(X_train, Y_train, sample_weight=compute_sample_weight( "balanced", Y_train)) Y_pred = xgb_model.predict(X_vld) do_metrics(Y_vld, Y_pred) return xgb_model
def dnn_test(X_test, Y_test, model_path, log_path, name): """ Args: X_test: Y_test: save_path: name: Returns: """ save_log_file(log_path) model = load_model(model_path + name + 'model.h5') Y_test = to_categorical(Y_test, num_classes=5) Y_pred = model.predict(X_test) do_metrics(Y_test, Y_pred) attack_types = ['normal', 'attacker', 'victim', 'suspicious', 'unknown'] confusion_matrixs.plot_confusion_matrix( np.array(metrics.confusion_matrix(Y_test, Y_pred)), classes=attack_types, normalize=True, title='dnn Normalized confusion matrix')
if __name__=="__main__": log_path = '/home/liyulian/code/CIDDS/repositories' save_log_file(log_path) ### CIDDS-001 path = '/home/liyulian/code/CIDDS/sources/utils/data_features_001.csv' ### 无样本平衡 logging.info('使用CIDDS-001数据, 用xgboost完成实验分类,' '无样本平衡, 设置了划分样本的random_state=1, 并且保存模型') X_train, X_test, Y_train, Y_test = get_features_FPPB(path) Y_pred, model = machine_learnings.do_xgboost(X_train, X_test, Y_train, Y_test) do_metrics(Y_test, Y_pred) import pickle # pickle模块 # 保存Model(注:save文件夹要预先建立,否则会报错) with open('repositories/model_001.pickle', 'wb') as f: pickle.dump(model, f) # ### 无样本平衡 # logging.info('使用CIDDS-001数据, 用xgboost完成实验分类,' # '无样本平衡') # # X_train, X_test, Y_train, Y_test = get_features(path) # Y_pred = machine_learnings.do_xgboost(X_train, X_test, Y_train, Y_test) # do_metrics(Y_test, Y_pred) """