# 模型网络的入口和出口
d = Model(inputs=dnn_inp, outputs=d_out)
d.compile(optimizer=Adam(lr=0.00001),
          loss="categorical_crossentropy",
          metrics=["accuracy"])
# 以下输入数据进行wide and deep模型的训练
print(d.summary())

X_tr = X_train_dnn
Y_tr = y_train_dnn
# 测试集
X_te = X_test_dnn
Y_te = y_test_dnn
d.fit(X_tr, Y_tr, epochs=40000, batch_size=32)

results = d.evaluate(X_te, Y_te)
print("\n", results)
results = d.evaluate(X_te, Y_te)
predicts = d.predict(X_te)
y_pre = [np.argmax(i) for i in predicts]
y_ture = [np.argmax(i) for i in Y_te]
print("\n", results)

alphabet = [
    "AIM", "email", "facebookchat", "gmailchat", "hangoutsaudio",
    "hangoutschat", "icqchat", "netflix", "skypechat", "skypefile", "spotify",
    "vimeo", "youtube", "youtubeHTML5"
]
figures.plot_confusion_matrix(y_ture, y_pre, alphabet, "./finetune_")
# print("recall_score_micro:",recall_score(y_true,predicts,average='micro'))
print("recall_score_macro:",recall_score(y_true,predicts,average='macro'))
# alphabet=["AIM","email","facebookchat","gmailchat","hangoutsaudio","hangoutschat","icqchat","netflix","skypechat","skypefile","spotify","vimeo","youtube","youtubeHTML5"]
alphabet=softwares=["Baidu Map",
                    "Baidu Post Bar",
                    "Netease cloud music",
                    "iQIYI",
                    "Jingdong",
                    "Jinritoutiao",
                    "Meituan",
                    "QQ",
                    "QQ music",
                    "QQ reader",
                    "Taobao",
                    "Weibo",
                    "CTRIP",
                    "Zhihu",
                    "Tik Tok",
                    "Ele.me",
                    "gtja",
                    "QQ mail",
                    "Tencent",
                    "Alipay"]
figures.plot_confusion_matrix(y_true, predicts,alphabet, "./%dx%d_"% (pkt_counts, pkt_size) + choose)

# 打印weight
weights=model.predict(input_fn=test_input_fn,predict_keys=["layer_rnn_weight"])
weights=[w['layer_rnn_weight'] for w in weights]
for weight in weights[:10]:
    print(weight)
    print("\n")
from sklearn.ensemble import RandomForestClassifier
train_data, train_labels, test_data, test_labels = input_w.inputs()
clf = RandomForestClassifier(n_estimators=100,
                             max_depth=12,
                             random_state=0,
                             verbose=1)
clf.fit(train_data, train_labels)

dump(clf, './randomForest.model')
clf = load('./randomForest.model')

predicts = clf.predict(test_data)
print(predicts)
print("predicts:", len(predicts))
print("accuracy_score:", accuracy_score(test_labels, predicts))
print("precision_score:",
      precision_score(test_labels, predicts, average='macro'))
# print("f1_score_micro:",f1_score(y_true,predicts,average='micro'))
print("f1_score_macro:", f1_score(test_labels, predicts, average='macro'))
# print("recall_score_micro:",recall_score(y_true,predicts,average='micro'))
print("recall_score_macro:",
      recall_score(test_labels, predicts, average='macro'))
# alphabet=["AIM","email","facebookchat","gmailchat","hangoutsaudio","hangoutschat","icqchat","netflix","skypechat","skypefile","spotify","vimeo","youtube","youtubeHTML5"]
alphabet = softwares = [
    "Baidu Map", "Baidu Post Bar", "Netease cloud music", "iQIYI", "Jingdong",
    "Jinritoutiao", "Meituan", "QQ", "QQ music", "QQ reader", "Taobao",
    "Weibo", "CTRIP", "Zhihu", "Tik Tok", "Ele.me", "gtja", "QQ mail",
    "Tencent", "Alipay"
]
figures.plot_confusion_matrix(test_labels, predicts, alphabet, "./rf")
cnn_x_name=["Seq_%d_y"%i for i in range(128)]
rnn_x_name=["Seq_%d_x"%i for i in range(128)]
filtered_val_data=pd.read_csv(config.HTTPS_CONFIG["all_val_path"])

y_test=filtered_val_data["label"]
# y_test=to_categorical(y_test)
X_test_cnn = filtered_val_data[cnn_x_name]
X_test_cnn=np.asarray(X_test_cnn).reshape((-1,128,1))
X_test_rnn = filtered_val_data[rnn_x_name]
X_test_rnn=np.asarray(X_test_rnn)

X_te = [X_test_cnn, X_test_rnn]
Y_te = y_test

outcomes=[]

models=os.listdir(config.HTTPS_CONFIG["models"])
for model in models:
    clf=load_model(config.HTTPS_CONFIG["models"]+model)
    outcome=clf.predict(X_te)
    outcomes.append(np.asarray(outcome)[:,0])
#转置
tmp=np.asarray(outcomes).T
predicts=[np.argmax(i) for i in tmp]
choose=config.HTTPS_CONFIG[config.HTTPS_CONFIG["choose"]]
names=os.listdir(choose)
alphabet=[names[i][:-4] for i in range(len(names))]
#绘制混淆矩阵
figures.plot_confusion_matrix(Y_te, predicts,alphabet, "./")

def fit_and_save_result(x_train, y_train, x_test, y_ture, clf, path):
    t1 = time()
    clf.fit(x_train, y_train)
    t2 = time()
    test_one_hot = label_binarize(y_ture, np.arange(config.HTTPS_CONFIG["num_class"]))

    y_score = clf.predict_proba(x_test)
    t3 = time()
    y_pre = clf.predict(x_test)
    t4 = time()
    # conf_arr = confusion_matrix(y_true=y_ture, y_pred=y_pre)
    accuracy = metrics.accuracy_score(y_true=y_ture, y_pred=y_pre)
    f1 = metrics.f1_score(y_true=y_ture, y_pred=y_pre, average='macro')
    precision = metrics.precision_score(y_true=y_ture, y_pred=y_pre, average='macro')
    recall = metrics.recall_score(y_true=y_ture, y_pred=y_pre, average='macro')

    auc = metrics.roc_auc_score(y_true=test_one_hot, y_score=y_score, average='macro')

    auc_all, f1_all, recall_all, precision_all, acc_all = [], [], [], [], []
    for label in range(config.HTTPS_CONFIG["num_class"]):
        def convert(x):
            if x == label:
                return 1
            else:
                return 0

        label = int(label)
        ture = y_ture
        pre = y_pre
        score = y_score[:, label]
        ture = [convert(x) for x in ture]
        pre = [convert(x) for x in pre]

        auc_each = metrics.roc_auc_score(ture, score)
        f1_each = metrics.f1_score(y_true=ture, y_pred=pre, average='binary')
        recall_each = metrics.recall_score(ture, pre)
        precision_each = metrics.precision_score(ture, pre)
        accuracy_each = metrics.accuracy_score(ture, pre)

        auc_all.append(auc_each)
        f1_all.append(f1_each)
        recall_all.append(recall_each)
        precision_all.append(precision_each)
        acc_all.append(accuracy_each)

    each_class_pd = pd.DataFrame({
        'auc':auc_all,
        'f1':f1_all,
        'recall':recall_all,
        'precision_all':precision_all,
        'acc':acc_all
    })

    each_class_pd.to_csv(path + 'each_class_metrics.csv', index=False)


    df_score = pd.DataFrame(y_score)
    df_score.to_csv(path + 'predict_proba.csv', index=False)

    df_result = pd.DataFrame({'y_ture' : y_ture, 'y_pred' : y_pre})
    df_result.to_csv(path + 'predict_data.csv', index=False)



    with open(path + 'metrics', 'w') as w:
        w.write('train_time: %s' % str(t2 - t1) + '\n')
        w.write('predict_time: %s' % str(t4 - t3) + '\n')
        w.write('accuracy: %s' % str(accuracy) + '\n')
        w.write('f1: %s' % str(f1) + '\n')
        w.write('precision: %s' % str(precision) + '\n')
        w.write('recall: %s' % str(recall) + '\n')
        w.write('auc: %s' % str(auc) + '\n')

    # 保存最优超参数
    with open(path + 'best_params', 'w') as w:
        w.write('\n'.join(['%s %s' % (key, str(value)) for key, value in clf.best_params_.items()]))

    # 画出混淆矩阵并保存
    figures.plot_confusion_matrix(y_ture, y_pre,alphabet, path)
Esempio n. 6
0
    "learning_rate": 0.1,
    "lambda_l1": 0.1,
    "lambda_l2": 0.2,
    "max_depth": 5,
    "objective": "multiclass",
    "num_class": config.HTTPS_CONFIG["num_class"],
    "silent": True,
    "verbosity": -1
}

train_data = lgb.Dataset(train_data, label=train_labels)
validation_data = lgb.Dataset(eval_data, label=eval_labels)

clf = lgb.train(params,
                train_data,
                num_boost_round=100000,
                valid_sets=[validation_data],
                early_stopping_rounds=500,
                feval=f1_score_vali,
                verbose_eval=1)

names = os.listdir(choose)
labels = [names[i][:-4] for i in range(len(names))]

y_pred = clf.predict(eval_data)
y_pred = [np.argmax(i) for i in y_pred]
eval_labels = [i for i in eval_labels]

figures.plot_confusion_matrix(
    eval_labels, y_pred, labels,
    config.HTTPS_CONFIG["outcome"] + 'lgb_confusion_matrix.png')
Esempio n. 7
0
xgb_train = xgb.DMatrix(train_data[500:], label=train_labels[500:])
xgb_val = xgb.DMatrix(train_data[:500], train_labels[:500])
xgb_test = xgb.DMatrix(test_data)

watchlist = [(xgb_val, 'val')]
model = xgb.train(plst,
                  xgb_train,
                  num_rounds,
                  watchlist,
                  early_stopping_rounds=100)
model.save_model("./xgb.model")

bst = xgb.Booster({'nthread': 4})
bst.load_model('./xgb.model')
preds = bst.predict(xgb_test)
print(preds.tolist())
print("accuracy_score:", accuracy_score(test_labels, preds))
print("precision_score:", precision_score(test_labels, preds, average='macro'))
# print("f1_score_micro:",f1_score(y_true,predicts,average='micro'))
print("f1_score_macro:", f1_score(test_labels, preds, average='macro'))
# print("recall_score_micro:",recall_score(y_true,predicts,average='micro'))
print("recall_score_macro:", recall_score(test_labels, preds, average='macro'))
# alphabet=["AIM","email","facebookchat","gmailchat","hangoutsaudio","hangoutschat","icqchat","netflix","skypechat","skypefile","spotify","vimeo","youtube","youtubeHTML5"]
alphabet = softwares = [
    "Baidu Map", "Baidu Post Bar", "Netease cloud music", "iQIYI", "Jingdong",
    "Jinritoutiao", "Meituan", "QQ", "QQ music", "QQ reader", "Taobao",
    "Weibo", "CTRIP", "Zhihu", "Tik Tok", "Ele.me", "gtja", "QQ mail",
    "Tencent", "Alipay"
]
figures.plot_confusion_matrix(test_labels, preds, alphabet, "./xgb_finetune_")