x = [] y = [] for i in range(len(dataset)): y.append(float(dataset[i][len(dataset[0]) - 1])) t = [] for j in range(len(dataset[0]) - 1): t.append(float(dataset[i][j])) x.append(t) df1 = pd.read_csv('ex2data1test.csv', delimiter=',') dataset1 = np.array(df1).tolist() xt = [] yt = [] for i in range(len(dataset1)): yt.append(float(dataset1[i][len(dataset1[0]) - 1])) t = [] for j in range(len(dataset1[0]) - 1): t.append(float(dataset1[i][j])) xt.append(t) from RF import RF rf = RF(B=5, Bagging=True) rf.train(x, y) yPredict = rf.predict(xt) count = 0 for i in range(len(yPredict)): if yPredict[i] == yt[i]: count += 1 print(count / float(len(yt)))
def bagging(N): #training data xRF = RFData.x y = RFData.y #testing data xtRF = RFData.xt yt = RFData.yt xKNN = knnData.main()[0] #training X xtKNN = knnData.main()[2] #test X countYPredict = [] for i in range(len(yt)): countYPredict.append(0) for k in range(N): # number of bootstrapping x_RF = [] y_RF = [] x_KNN = [] y_KNN = [] # bootstrapping for i in range(int(len(xRF) * 0.6)): r = randint(0, len(xRF) - 1) tRF = [] for j in range(len(xRF[0]) - 1): tRF.append(xRF[r][j]) # for RF, data duplicates are not allowed if tRF not in x_RF: x_RF.append(tRF) y_RF.append(y[r]) x_KNN.append(xKNN[r]) y_KNN.append(y[r]) # RF start = time.time() rf = RF(B=TreeNum, Bagging=isBagging) rf.train(x_RF, y_RF) pred = rf.predict(xtRF) end = time.time() count = 0 for i in range(len(pred)): if pred[i] == yt[i]: count += 1 print("RF, trial #" + str(k + 1) + ": ") print('\taccuracy: ' + str(round(count / float(len(yt)) * 100, 2)) + '%') print('\ttraining time: ' + str(round(end - start, 1)) + ' seconds') for i in range(len(pred)): countYPredict[i] = countYPredict[i] + pred[i] # KNN start = time.time() pred = knn.main(x_KNN, y_KNN, xtKNN, kInKnn) end = time.time() count = 0 for i in range(len(pred)): if pred[i] == yt[i]: count += 1 print("KNN, trial #" + str(k + 1) + ": ") print('\taccuracy: ' + str(round(count / float(len(yt)) * 100, 2)) + '%') print('\ttraining time: ' + str(round(end - start, 1)) + ' seconds') for i in range(len(pred)): countYPredict[i] = countYPredict[i] + pred[i] finalPredict = [] for i in range(len(yt)): if countYPredict[i] >= N: finalPredict.append(1) else: finalPredict.append(0) count = 0 for i in range(len(finalPredict)): if finalPredict[i] == yt[i]: count += 1 print() print('After combining the classifiers by bagging: ') print('\taccuracy: ' + str(round(count / float(len(yt)) * 100, 2)) + '%')
y = [] for i in range(len(dataset)): y.append(float(dataset[i][len(dataset[0])-1])) t = [] for j in range(len(dataset[0])-1): t.append(float(dataset[i][j])) x.append(t) df1 = pd.read_csv('ex2data1test.csv', delimiter=',') dataset1 = np.array(df1).tolist() xt = [] yt = [] for i in range(len(dataset1)): yt.append(float(dataset1[i][len(dataset1[0])-1])) t = [] for j in range(len(dataset1[0])-1): t.append(float(dataset1[i][j])) xt.append(t) from RF import RF rf = RF(B=5, Bagging = True) rf.train(x, y) yPredict = rf.predict(xt) count = 0 for i in range(len(yPredict)): if yPredict[i] == yt[i]: count +=1 print(count/float(len(yt)))
def bagging(N): # training data xRF = RFData.x y = RFData.y # testing data xtRF = RFData.xt yt = RFData.yt xKNN = knnData.main()[0] # training X xtKNN = knnData.main()[2] # test X countYPredict = [] for i in range(len(yt)): countYPredict.append(0) for k in range(N): # number of bootstrapping x_RF = [] y_RF = [] x_KNN = [] y_KNN = [] # bootstrapping for i in range(int(len(xRF) * 0.6)): r = randint(0, len(xRF) - 1) tRF = [] for j in range(len(xRF[0]) - 1): tRF.append(xRF[r][j]) # for RF, data duplicates are not allowed if tRF not in x_RF: x_RF.append(tRF) y_RF.append(y[r]) x_KNN.append(xKNN[r]) y_KNN.append(y[r]) # RF start = time.time() rf = RF(B=TreeNum, Bagging=isBagging) rf.train(x_RF, y_RF) pred = rf.predict(xtRF) end = time.time() count = 0 for i in range(len(pred)): if pred[i] == yt[i]: count += 1 print("RF, trial #" + str(k + 1) + ": ") print("\taccuracy: " + str(round(count / float(len(yt)) * 100, 2)) + "%") print("\ttraining time: " + str(round(end - start, 1)) + " seconds") for i in range(len(pred)): countYPredict[i] = countYPredict[i] + pred[i] # KNN start = time.time() pred = knn.main(x_KNN, y_KNN, xtKNN, kInKnn) end = time.time() count = 0 for i in range(len(pred)): if pred[i] == yt[i]: count += 1 print("KNN, trial #" + str(k + 1) + ": ") print("\taccuracy: " + str(round(count / float(len(yt)) * 100, 2)) + "%") print("\ttraining time: " + str(round(end - start, 1)) + " seconds") for i in range(len(pred)): countYPredict[i] = countYPredict[i] + pred[i] finalPredict = [] for i in range(len(yt)): if countYPredict[i] >= N: finalPredict.append(1) else: finalPredict.append(0) count = 0 for i in range(len(finalPredict)): if finalPredict[i] == yt[i]: count += 1 print() print("After combining the classifiers by bagging: ") print("\taccuracy: " + str(round(count / float(len(yt)) * 100, 2)) + "%")