# 模型网络的入口和出口 d = Model(inputs=dnn_inp, outputs=d_out) d.compile(optimizer=Adam(lr=0.00001), loss="categorical_crossentropy", metrics=["accuracy"]) # 以下输入数据进行wide and deep模型的训练 print(d.summary()) X_tr = X_train_dnn Y_tr = y_train_dnn # 测试集 X_te = X_test_dnn Y_te = y_test_dnn d.fit(X_tr, Y_tr, epochs=40000, batch_size=32) results = d.evaluate(X_te, Y_te) print("\n", results) results = d.evaluate(X_te, Y_te) predicts = d.predict(X_te) y_pre = [np.argmax(i) for i in predicts] y_ture = [np.argmax(i) for i in Y_te] print("\n", results) alphabet = [ "AIM", "email", "facebookchat", "gmailchat", "hangoutsaudio", "hangoutschat", "icqchat", "netflix", "skypechat", "skypefile", "spotify", "vimeo", "youtube", "youtubeHTML5" ] figures.plot_confusion_matrix(y_ture, y_pre, alphabet, "./finetune_")
# print("recall_score_micro:",recall_score(y_true,predicts,average='micro')) print("recall_score_macro:",recall_score(y_true,predicts,average='macro')) # alphabet=["AIM","email","facebookchat","gmailchat","hangoutsaudio","hangoutschat","icqchat","netflix","skypechat","skypefile","spotify","vimeo","youtube","youtubeHTML5"] alphabet=softwares=["Baidu Map", "Baidu Post Bar", "Netease cloud music", "iQIYI", "Jingdong", "Jinritoutiao", "Meituan", "QQ", "QQ music", "QQ reader", "Taobao", "Weibo", "CTRIP", "Zhihu", "Tik Tok", "Ele.me", "gtja", "QQ mail", "Tencent", "Alipay"] figures.plot_confusion_matrix(y_true, predicts,alphabet, "./%dx%d_"% (pkt_counts, pkt_size) + choose) # 打印weight weights=model.predict(input_fn=test_input_fn,predict_keys=["layer_rnn_weight"]) weights=[w['layer_rnn_weight'] for w in weights] for weight in weights[:10]: print(weight) print("\n")
from sklearn.ensemble import RandomForestClassifier train_data, train_labels, test_data, test_labels = input_w.inputs() clf = RandomForestClassifier(n_estimators=100, max_depth=12, random_state=0, verbose=1) clf.fit(train_data, train_labels) dump(clf, './randomForest.model') clf = load('./randomForest.model') predicts = clf.predict(test_data) print(predicts) print("predicts:", len(predicts)) print("accuracy_score:", accuracy_score(test_labels, predicts)) print("precision_score:", precision_score(test_labels, predicts, average='macro')) # print("f1_score_micro:",f1_score(y_true,predicts,average='micro')) print("f1_score_macro:", f1_score(test_labels, predicts, average='macro')) # print("recall_score_micro:",recall_score(y_true,predicts,average='micro')) print("recall_score_macro:", recall_score(test_labels, predicts, average='macro')) # alphabet=["AIM","email","facebookchat","gmailchat","hangoutsaudio","hangoutschat","icqchat","netflix","skypechat","skypefile","spotify","vimeo","youtube","youtubeHTML5"] alphabet = softwares = [ "Baidu Map", "Baidu Post Bar", "Netease cloud music", "iQIYI", "Jingdong", "Jinritoutiao", "Meituan", "QQ", "QQ music", "QQ reader", "Taobao", "Weibo", "CTRIP", "Zhihu", "Tik Tok", "Ele.me", "gtja", "QQ mail", "Tencent", "Alipay" ] figures.plot_confusion_matrix(test_labels, predicts, alphabet, "./rf")
cnn_x_name=["Seq_%d_y"%i for i in range(128)] rnn_x_name=["Seq_%d_x"%i for i in range(128)] filtered_val_data=pd.read_csv(config.HTTPS_CONFIG["all_val_path"]) y_test=filtered_val_data["label"] # y_test=to_categorical(y_test) X_test_cnn = filtered_val_data[cnn_x_name] X_test_cnn=np.asarray(X_test_cnn).reshape((-1,128,1)) X_test_rnn = filtered_val_data[rnn_x_name] X_test_rnn=np.asarray(X_test_rnn) X_te = [X_test_cnn, X_test_rnn] Y_te = y_test outcomes=[] models=os.listdir(config.HTTPS_CONFIG["models"]) for model in models: clf=load_model(config.HTTPS_CONFIG["models"]+model) outcome=clf.predict(X_te) outcomes.append(np.asarray(outcome)[:,0]) #转置 tmp=np.asarray(outcomes).T predicts=[np.argmax(i) for i in tmp] choose=config.HTTPS_CONFIG[config.HTTPS_CONFIG["choose"]] names=os.listdir(choose) alphabet=[names[i][:-4] for i in range(len(names))] #绘制混淆矩阵 figures.plot_confusion_matrix(Y_te, predicts,alphabet, "./")
def fit_and_save_result(x_train, y_train, x_test, y_ture, clf, path): t1 = time() clf.fit(x_train, y_train) t2 = time() test_one_hot = label_binarize(y_ture, np.arange(config.HTTPS_CONFIG["num_class"])) y_score = clf.predict_proba(x_test) t3 = time() y_pre = clf.predict(x_test) t4 = time() # conf_arr = confusion_matrix(y_true=y_ture, y_pred=y_pre) accuracy = metrics.accuracy_score(y_true=y_ture, y_pred=y_pre) f1 = metrics.f1_score(y_true=y_ture, y_pred=y_pre, average='macro') precision = metrics.precision_score(y_true=y_ture, y_pred=y_pre, average='macro') recall = metrics.recall_score(y_true=y_ture, y_pred=y_pre, average='macro') auc = metrics.roc_auc_score(y_true=test_one_hot, y_score=y_score, average='macro') auc_all, f1_all, recall_all, precision_all, acc_all = [], [], [], [], [] for label in range(config.HTTPS_CONFIG["num_class"]): def convert(x): if x == label: return 1 else: return 0 label = int(label) ture = y_ture pre = y_pre score = y_score[:, label] ture = [convert(x) for x in ture] pre = [convert(x) for x in pre] auc_each = metrics.roc_auc_score(ture, score) f1_each = metrics.f1_score(y_true=ture, y_pred=pre, average='binary') recall_each = metrics.recall_score(ture, pre) precision_each = metrics.precision_score(ture, pre) accuracy_each = metrics.accuracy_score(ture, pre) auc_all.append(auc_each) f1_all.append(f1_each) recall_all.append(recall_each) precision_all.append(precision_each) acc_all.append(accuracy_each) each_class_pd = pd.DataFrame({ 'auc':auc_all, 'f1':f1_all, 'recall':recall_all, 'precision_all':precision_all, 'acc':acc_all }) each_class_pd.to_csv(path + 'each_class_metrics.csv', index=False) df_score = pd.DataFrame(y_score) df_score.to_csv(path + 'predict_proba.csv', index=False) df_result = pd.DataFrame({'y_ture' : y_ture, 'y_pred' : y_pre}) df_result.to_csv(path + 'predict_data.csv', index=False) with open(path + 'metrics', 'w') as w: w.write('train_time: %s' % str(t2 - t1) + '\n') w.write('predict_time: %s' % str(t4 - t3) + '\n') w.write('accuracy: %s' % str(accuracy) + '\n') w.write('f1: %s' % str(f1) + '\n') w.write('precision: %s' % str(precision) + '\n') w.write('recall: %s' % str(recall) + '\n') w.write('auc: %s' % str(auc) + '\n') # 保存最优超参数 with open(path + 'best_params', 'w') as w: w.write('\n'.join(['%s %s' % (key, str(value)) for key, value in clf.best_params_.items()])) # 画出混淆矩阵并保存 figures.plot_confusion_matrix(y_ture, y_pre,alphabet, path)
"learning_rate": 0.1, "lambda_l1": 0.1, "lambda_l2": 0.2, "max_depth": 5, "objective": "multiclass", "num_class": config.HTTPS_CONFIG["num_class"], "silent": True, "verbosity": -1 } train_data = lgb.Dataset(train_data, label=train_labels) validation_data = lgb.Dataset(eval_data, label=eval_labels) clf = lgb.train(params, train_data, num_boost_round=100000, valid_sets=[validation_data], early_stopping_rounds=500, feval=f1_score_vali, verbose_eval=1) names = os.listdir(choose) labels = [names[i][:-4] for i in range(len(names))] y_pred = clf.predict(eval_data) y_pred = [np.argmax(i) for i in y_pred] eval_labels = [i for i in eval_labels] figures.plot_confusion_matrix( eval_labels, y_pred, labels, config.HTTPS_CONFIG["outcome"] + 'lgb_confusion_matrix.png')
xgb_train = xgb.DMatrix(train_data[500:], label=train_labels[500:]) xgb_val = xgb.DMatrix(train_data[:500], train_labels[:500]) xgb_test = xgb.DMatrix(test_data) watchlist = [(xgb_val, 'val')] model = xgb.train(plst, xgb_train, num_rounds, watchlist, early_stopping_rounds=100) model.save_model("./xgb.model") bst = xgb.Booster({'nthread': 4}) bst.load_model('./xgb.model') preds = bst.predict(xgb_test) print(preds.tolist()) print("accuracy_score:", accuracy_score(test_labels, preds)) print("precision_score:", precision_score(test_labels, preds, average='macro')) # print("f1_score_micro:",f1_score(y_true,predicts,average='micro')) print("f1_score_macro:", f1_score(test_labels, preds, average='macro')) # print("recall_score_micro:",recall_score(y_true,predicts,average='micro')) print("recall_score_macro:", recall_score(test_labels, preds, average='macro')) # alphabet=["AIM","email","facebookchat","gmailchat","hangoutsaudio","hangoutschat","icqchat","netflix","skypechat","skypefile","spotify","vimeo","youtube","youtubeHTML5"] alphabet = softwares = [ "Baidu Map", "Baidu Post Bar", "Netease cloud music", "iQIYI", "Jingdong", "Jinritoutiao", "Meituan", "QQ", "QQ music", "QQ reader", "Taobao", "Weibo", "CTRIP", "Zhihu", "Tik Tok", "Ele.me", "gtja", "QQ mail", "Tencent", "Alipay" ] figures.plot_confusion_matrix(test_labels, preds, alphabet, "./xgb_finetune_")