def Test_dbn(): mnist = input_data.read_data_sets("MNIST_data/", one_hot=True) trX, trY, teX, teY = mnist.train.images, mnist.train.labels, mnist.test.images,\ mnist.test.labels opts = DLOption(10, 1., 100, 0.0, 0., 0.) dbn = DBN([400, 100], opts, trX) errs = dbn.train() print(errs) nn = NN([100], opts, trX, trY) nn = NN([400, 100], opts, trX, trY) nn.load_from_dbn(dbn) nn.train() print(np.mean(np.argmax(teY, axis=1) == nn.predict(teX)))
#! /usr/bin/env python # -*- coding: utf-8 -*- # vim:fenc=utf-8 # # Copyright © 2016 Peng Liu <*****@*****.**> # # Distributed under terms of the GNU GPL3 license. """ Test some function. """ import input_data from opts import DLOption from dbn_tf import DBN from nn_tf import NN import numpy as np mnist = input_data.read_data_sets("MNIST_data/", one_hot=True) trX, trY, teX, teY = mnist.train.images, mnist.train.labels, mnist.test.images,\ mnist.test.labels opts = DLOption(10, 1., 100, 0.0, 0., 0.) dbn = DBN([400, 100], opts, trX) dbn.train() nn = NN([100], opts, trX, trY) nn = NN([400, 100], opts, trX, trY) nn.load_from_dbn(dbn) nn.train() print(np.mean(np.argmax(teY, axis=1) == nn.predict(teX)))
# -*- coding: utf-8 -*- # vim:fenc=utf-8 # # Copyright © 2016 Peng Liu <*****@*****.**> # # Distributed under terms of the GNU GPL3 license. """ Test some function. """ import input_data from opts import DLOption from dbn_tf import DBN from nn_tf import NN import numpy as np mnist = input_data.read_data_sets("MNIST_data/", one_hot=True) trX, trY, teX, teY = mnist.train.images, mnist.train.labels, mnist.test.images,\ mnist.test.labels opts = DLOption(10, 1., 100, 0.0, 0., 0.) dbn = DBN([400, 100], opts, trX) dbn.train() nn = NN([100], opts, trX, trY) nn = NN([400, 100], opts, trX, trY) nn.load_from_dbn(dbn) nn.train() print np.mean(np.argmax(teY, axis=1) == nn.predict(teX))
def train(do_k_fold, out_dir, log_f): if do_k_fold: utils.print_out("# do k_fold k=%d" % k_fold, log_f) k_fold_val = 0 k_fold_tra = 0 [k_TP, k_TN, k_FP, k_FN, k_SE, k_SP, k_MCC, k_ACC] = [0, 0, 0, 0, 0, 0, 0, 0] for i in range(k_fold): trX_=[] trY_=[] for j in range(k_fold): if j == i: continue trX_.append(k_fold_X[j]) trY_.append(k_fold_Y[j]) trX_ = np.concatenate(trX_) trY_ = np.concatenate(trY_) utils.print_out("#k_fold %d" % i, log_f) utils.print_out("#do DBN ...", log_f) dbn = DBN() dbn.train(trX_) utils.print_out("#end DBN", log_f) utils.print_out("#do caps ...", log_f) capsNet = CapsNet(is_training=True, dbn=dbn) i_k_fold_val, i_k_fold_tra = capsNet.train(trX_, trY_, k_fold_X[i], k_fold_Y[i], None, log_f) TP, TN, FP, FN, SE, SP, MCC, ACC = eva(capsNet, k_fold_X[i], k_fold_Y[i]) print(i,", TP:", TP) print(i,", TN:", TN) print(i,", FP:", FP) print(i,", FN:", FN) print(i,", SE:", SE) print(i,", SP:", SP) print(i,", MCC:", MCC) print(i,", ACC: ", ACC) k_TP += TP k_TN += TN k_FP += FP k_FN += FN k_SE += SE k_SP += SP k_MCC += MCC k_ACC += ACC print("TP :", k_TP / 5) print("TN :", k_TN / 5) print("FP :", k_FP / 5) print("FN :", k_FN / 5) print("SE :", k_SE / 5) print("SP :", k_SP / 5) print("MCC: ", k_MCC / 5) print("ACC: ", k_ACC / 5) else: utils.print_out("#do DBN ...", log_f) dbn = DBN() dbn.train(trX) utils.print_out("#end DBN", log_f) utils.print_out("#do caps ...", log_f) utils.print_out("#test instead val set for test ...", log_f) capsNet = CapsNet(is_training=isTraining, dbn=dbn) if isTraining: i_k_fold_val, i_k_fold_tra = capsNet.train(trX, trY, teX, teY, "./board", log_f) utils.print_out("#end caps", log_f) tr_TP, tr_TN, tr_FP, tr_FN, tr_SE, tr_SP, tr_MCC, tr_ACC = eva(capsNet, trX, trY) val_TP, val_TN, val_FP, val_FN, val_SE, val_SP, val_MCC, val_ACC = eva(capsNet, vaX, vaY) te_P, te_TN, te_FP, te_FN, te_SE, te_SP, te_MCC,te_ACC = eva(capsNet, teX, teY) utils.print_out('train : TP:%.3f; TN:%.3f; FP:%.3f; FN:%.3f; SE:%.3f SP:%.3f MCC:%.3f P:%.3f' \ %(tr_TP, tr_TN, tr_FP, tr_FN, tr_SE, tr_SP, tr_MCC, tr_ACC), log_f) utils.print_out('val : TP:%.3f; TN:%.3f; FP:%.3f; FN:%.3f; SE:%.3f SP:%.3f MCC:%.3f P:%.3f' \ % (val_TP, val_TN, val_FP, val_FN, val_SE, val_SP, val_MCC, val_ACC), log_f) utils.print_out('test : TP:%.3f; TN:%.3f; FP:%.3f; FN:%.3f; SE:%.3f SP:%.3f MCC:%.3f P:%.3f' \ % (te_P, te_TN, te_FP, te_FN, te_SE, te_SP, te_MCC, te_ACC), log_f) else: import csv csvFile = open("./"+train_datadir+"/"+setFileNames[1], "r") reader = csv.reader(csvFile) # 返回的是迭代类型 data = [] for item in reader: data.append(item[0]) csvFile.close() data = data[1:] utils.print_out("#end caps", log_f) pre_Y= pre(capsNet, vaX).tolist()[0] import pandas as pd dataFrame = pd.DataFrame({ "0_name": data,"1_class": pre_Y}) dataFrame.to_csv('./data_set/test_dir/180831-result.csv', index=False, sep=",")
#! /usr/bin/env python # -*- coding: utf-8 -*- # vim:fenc=utf-8 # # Copyright © 2016 Peng Liu <*****@*****.**> # # Distributed under terms of the GNU GPL3 license. """ Test some function. """ import input_data from opts import DLOption from dbn_tf import DBN from nn_tf import NN import numpy as np mnist = input_data.read_data_sets("MNIST_data/", one_hot=True) trX, trY, teX, teY = mnist.train.images, mnist.train.labels, mnist.test.images,\ mnist.test.labels opts = DLOption(10, 1., 100, 0.0, 0., 0.) dbn = DBN([400, 100], opts, trX) # dbn.train() nn = NN([100], opts, trX, trY) nn = NN([400, 100], opts, trX, trY) # nn.load_from_dbn(dbn) nn.train() print(np.mean(np.argmax(teY, axis=1) == nn.predict(teX)))
y_train[key] = np.zeros(n_sample) for i in range(n_sample): X_train[key][i] = np.concatenate( (dm[key][int(train_index[i, 0])], gm[key][int(train_index[i, 1])])) y_train[key][i] = int(train_index[i, 2]) X_train[key] = normalization(X_train[key]) for b in range(n_batches): X_test[key][b * batch_size:(b + 1) * batch_size] = normalization( X_test[key][b * batch_size:(b + 1) * batch_size]) dbn[key] = DBN(hidden_layers_structure=[hidb, hidb, hidb], weight_cost=0.001, batch_size=4, n_epoches=30, learning_rate_rbm=[0.0005, 1e-2, 1e-2], rbm_gauss_visible=True) dbn[key].fit(X_train[key]) X_train[key] = dbn[key].transform(X_train[key]) for b in range(n_batches): X_test[key][b * batch_size:(b + 1) * batch_size] = dbn[key].transform( X_test[key][b * batch_size:(b + 1) * batch_size]) #save the Data np.save('X_train_go', X_train[key]) np.save('y_train', y_train[key]) np.save('X_test_go', X_test[key])
def svmBagging(SamplingMethode): # 读取数据 train = pd.read_csv("../data/data.csv", header=0) # 将数据都变为int型 for col in train.columns: for i in range(1000): train[col][i] = int(train[col][i]) # 归一化处理 min_max_scaler = preprocessing.MinMaxScaler() train = min_max_scaler.fit_transform(train) # 分割为train和test两个数据集 train, test = train_test_split(train, test_size=0.2) #À travers Upsampling and LowSampling, Équilibrer les données, # C'est à dire, le nombre de bons clients soit le même que le nombre de mauvais clients # Le différence entre eux, c'est UpSampling élève le nombre de mauvais clients just qu'aux égal bon clients if SamplingMethode == 'upSampling': # 这里做上采样 X_train, y_train = upSampling(train) y_train = y_train.reshape(len(y_train), 1) train = np.append(y_train, X_train, axis=1) print("Apres UpSampling, la quantité des données équal: ", len(train)) # LowSampling réduit le nombre de bon clients just qu'aux égal mauvais clients elif SamplingMethode == 'lowSamoling': train = pd.DataFrame(train) train = np.array(lowSampling(train)) print("Apres LowSampling, la quantité des données équal: ", len(train)) # 切割出EI数据集 # EI = np.array(RepetitionRandomSampling(train, len(train), 0.5)) EI_train = EI[:, 1:] EI_test = EI[:, 0] clf_svm = [svm.SVC(kernel='rbf', gamma='scale', C=1.75) for _ in range(40)] #clf_svm = [fsvmClass.HYP_SVM(kernel='polynomial', C=1.5, P=1.5) for _ in range(40)] # clf_svm = [HYP_SVM(C=1.5) for _ in range(2)] bag = Bagging(40, clf_svm, 0.5) svms = bag.MutModel_clf(np.array(train), np.array(test)) result = list() for each in svms: result.append(each.predict(EI_train)) #chaque colonne est le résultat de chaque svms result = np.array(result) trX = np.transpose(result) #transpose pour chaque ligne est le résultat trX = trX.astype(np.float32) trY = to_categorical(EI_test) # devenir une binary class trY = trY.astype(np.float32) # DLOption为一个用于存储模型超参数的类。 # 按顺序来是: epoches, learning_rate, batchsize, momentum, penaltyL2,dropoutProb opts = DLOption(300, 0.01, 64, 0.01, 0., 0.2) # DBN类代表DBN网络类,参数分别为sizes, opts, X # 这里的[400,200,100]表示有三层RBM,每一层输出为400,200和100 dbn = DBN([400, 100], opts, trX) # DBN训练 dbn.train() # 这里初始化三层全联接层,前两层全联接层使用已训练好的RBM参数填入进去,最后一层进行fine-turn训练 # 输入参数分别为sizes, opts, X, Y # nn = NN([100], opts, trX, trY) # 这里创建的三层,前两层的输出与上面保持一致 nn = NN([400, 100], opts, trX, trY) # 这里加载已经训练好的dbn参数 nn.load_from_dbn(dbn) # 训练最后一层输出层,达到分类效果 nn.train() testX = test[:, 1:] testY = test[:, 0] test_result = list() for each in svms: test_result.append(each.predict(testX)) test_result = np.array(test_result) teX = np.transpose(test_result) teX = teX.astype(np.float32) teY = testY.astype(np.float32) # score = Voting(result) cm = confusion_matrix(teY, nn.predict(teX)) sns.heatmap(cm, annot=True, fmt='') plt.title('DBN') plt.ylabel('True label') plt.xlabel('Predicted label') plt.show() acc = len(teY[teY == nn.predict(teX)]) / len(teY) sp = cm[0, 0] / (cm[0, 0] + cm[0, 1]) se = cm[1, 1] / (cm[1, 0] + cm[1, 1]) print("accuracy for DBN: ", round(acc, 3)) print("specifity for DBN: ", round(sp, 3)) print("Sensitivity for DBN: ", round(se, 3)) score = Voting(test_result) cm = confusion_matrix(teY, score) sns.heatmap(cm, annot=True, fmt='') plt.title('Voting') plt.ylabel('True label') plt.xlabel('Predicted label') plt.show() acc = len(teY[teY == score]) / len(teY) sp = cm[0, 0] / (cm[0, 0] + cm[0, 1]) se = cm[1, 1] / (cm[1, 0] + cm[1, 1]) print("accuracy for Voting: ", round(acc, 3)) print("specifity for Voting: ", round(sp, 3)) print("Sensitivity for Voting: ", round(se, 3))
def fsvmBagging(SamplingMethode): # 读取数据 train = pd.read_csv("../data/data.csv", header=0) # 将数据都变为int型 for col in train.columns: for i in range(1000): train[col][i] = int(train[col][i]) features = train.columns[1:21] X = train[features] y = train['Creditability'] min_max_scaler = preprocessing.MinMaxScaler() X = min_max_scaler.fit_transform(X) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0) y_train = np.array(y_train) y_train = y_train.reshape(len(y_train), 1) train = np.append(y_train, np.array(X_train), axis=1) if SamplingMethode == 'upSampling': X_train, y_train = upSampling(train) elif SamplingMethode == 'lowSampling': train = pd.DataFrame(train) train = np.array(lowSampling(train)) X_train = train[:, 1:] y_train = train[:, 0] X_train = np.asarray(X_train) y_train = np.asarray(y_train) for i in range(len(y_train)): if y_train[i] == 0: y_train[i] = -1 y_test = np.array(y_test) for i in range(len(y_test)): if y_test[i] == 0: y_test[i] = -1 y_train = np.array(y_train) y_train = y_train.reshape(len(y_train), 1) train = np.append(y_train, np.array(X_train), axis=1) y_test = np.array(y_test) y_test = y_test.reshape(len(y_test), 1) test = np.append(y_test, np.array(X_test), axis=1) # 切割出EI数据集 # le numbre de sous ensembles de donnée est 0.5*len(train) EI = np.array(RepetitionRandomSampling(train, len(train), 0.5)) EI_train = EI[:, 1:] EI_test = EI[:, 0] clf_svm = [HYP_SVM(kernel='polynomial', C=1.75, P=0.1) for _ in range(20)] # 20 sous ensembles de données, estimator est fsvm, rate est 0.5 bag = Bagging(20, clf_svm, 0.5, 'fsvm') svms = bag.MutModel_clf(np.array(train), np.array(test)) result = list() for each in svms: result.append(each.predict(EI_train)) result = np.array(result) trX = np.transpose(result) trX = trX.astype(np.float32) for i in range(len(EI_test)): if EI_test[i] == -1: EI_test[i] = 0 trY = to_categorical(EI_test) trY = trY.astype(np.float32) # DLOption为一个用于存储模型超参数的类。 # 按顺序来是: epoches, learning_rate, batchsize, momentum, penaltyL2,dropoutProb opts = DLOption(10, 1, 64, 0.01, 0., 0.2) # DBN类代表DBN网络类,参数分别为sizes, opts, X # 这里的[400,100]表示有两层RBM,每一层输出为400和100 [100,50,10] dbn = DBN([100, 50, 10], opts, trX) # DBN训练 dbn.train() # 这里初始化三层全联接层,前两层全联接层使用已训练好的RBM参数填入进去,最后一层进行fine-turn训练 # 输入参数分别为sizes, opts, X, Y # nn = NN([100], opts, trX, trY) # 这里创建的三层,前两层的输出与上面保持一致 nn = NN([100, 50, 10], opts, trX, trY) # 这里加载已经训练好的dbn参数 nn.load_from_dbn(dbn) # 训练最后一层输出层,达到分类效果 nn.train() testX = test[:, 1:] testY = test[:, 0] test_result = list() for each in svms: test_result.append(each.predict(testX)) test_result = np.array(test_result) teX = np.transpose(test_result) teX = teX.astype(np.float32) for i in range(len(testY)): if testY[i] == -1: testY[i] = 0 teY = testY.astype(np.float32) # print('Fianl acc: ',teY == nn.predict(teX)) # score = Voting(result) cm = confusion_matrix(teY, nn.predict(teX)) sns.heatmap(cm, annot=True, fmt='') plt.title('DBN') plt.ylabel('True label') plt.xlabel('Predicted label') plt.show() acc = len(teY[teY == nn.predict(teX)]) / len(teY) sp = cm[0, 0] / (cm[0, 0] + cm[0, 1]) se = cm[1, 1] / (cm[1, 0] + cm[1, 1]) print("accuracy for DBN: ", round(acc, 3)) print("specifity for DBN: ", round(sp, 3)) print("Sensitivity for DBN: ", round(se, 3)) score = Voting(test_result) cm = confusion_matrix(teY, score) sns.heatmap(cm, annot=True, fmt='') plt.title('Voting') plt.ylabel('True label') plt.xlabel('Predicted label') plt.show() acc = len(teY[teY == score]) / len(teY) sp = cm[0, 0] / (cm[0, 0] + cm[0, 1]) se = cm[1, 1] / (cm[1, 0] + cm[1, 1]) print("accuracy for Voting: ", round(acc, 3)) print("specifity for Voting: ", round(sp, 3)) print("Sensitivity for Voting: ", round(se, 3))
logreg = linear_model.LogisticRegression(C=600.0) hidt = 512 X_train, X_test = {}, {} """--------------------""" #save the Data X_train['ppi'] = np.load('X_train_ppi.npy') X_train['go'] = np.load('X_train_go.npy') y_train = np.load('y_train.npy') top_dbn = DBN(hidden_layers_structure=[hidt, hidt, hidt], weight_cost=0.001, batch_size=4, n_epoches=30, learning_rate_rbm=[1e-2, 1e-2, 1e-2]) X_train_joint = np.concatenate((X_train['ppi'], X_train['go']), axis=1) top_dbn.fit(X_train_joint) X_train_joint = top_dbn.transform(X_train_joint) logreg.fit(X_train_joint, y_train) #predict X_test_joint = np.load('X_test_joint.npy') """-------------------- This batch_size is also used due to the limited amount of RAM, the program can only analyze 10000 samples at a time. """