Ejemplo n.º 1
0
x = []
y = []
for i in range(len(dataset)):
    y.append(float(dataset[i][len(dataset[0]) - 1]))
    t = []
    for j in range(len(dataset[0]) - 1):
        t.append(float(dataset[i][j]))
    x.append(t)

df1 = pd.read_csv('ex2data1test.csv', delimiter=',')
dataset1 = np.array(df1).tolist()

xt = []
yt = []
for i in range(len(dataset1)):
    yt.append(float(dataset1[i][len(dataset1[0]) - 1]))
    t = []
    for j in range(len(dataset1[0]) - 1):
        t.append(float(dataset1[i][j]))
    xt.append(t)

from RF import RF
rf = RF(B=5, Bagging=True)
rf.train(x, y)
yPredict = rf.predict(xt)

count = 0
for i in range(len(yPredict)):
    if yPredict[i] == yt[i]:
        count += 1
print(count / float(len(yt)))
Ejemplo n.º 2
0
def bagging(N):
    #training data
    xRF = RFData.x
    y = RFData.y
    #testing data
    xtRF = RFData.xt
    yt = RFData.yt

    xKNN = knnData.main()[0]  #training X
    xtKNN = knnData.main()[2]  #test X

    countYPredict = []
    for i in range(len(yt)):
        countYPredict.append(0)

    for k in range(N):  # number of bootstrapping
        x_RF = []
        y_RF = []
        x_KNN = []
        y_KNN = []
        # bootstrapping
        for i in range(int(len(xRF) * 0.6)):
            r = randint(0, len(xRF) - 1)
            tRF = []
            for j in range(len(xRF[0]) - 1):
                tRF.append(xRF[r][j])
            # for RF, data duplicates are not allowed
            if tRF not in x_RF:
                x_RF.append(tRF)
                y_RF.append(y[r])
            x_KNN.append(xKNN[r])
            y_KNN.append(y[r])

        # RF
        start = time.time()
        rf = RF(B=TreeNum, Bagging=isBagging)
        rf.train(x_RF, y_RF)
        pred = rf.predict(xtRF)
        end = time.time()
        count = 0
        for i in range(len(pred)):
            if pred[i] == yt[i]:
                count += 1
        print("RF, trial #" + str(k + 1) + ": ")
        print('\taccuracy: ' + str(round(count / float(len(yt)) * 100, 2)) +
              '%')
        print('\ttraining time: ' + str(round(end - start, 1)) + ' seconds')
        for i in range(len(pred)):
            countYPredict[i] = countYPredict[i] + pred[i]

        # KNN
        start = time.time()
        pred = knn.main(x_KNN, y_KNN, xtKNN, kInKnn)
        end = time.time()
        count = 0
        for i in range(len(pred)):
            if pred[i] == yt[i]:
                count += 1
        print("KNN, trial #" + str(k + 1) + ": ")
        print('\taccuracy: ' + str(round(count / float(len(yt)) * 100, 2)) +
              '%')
        print('\ttraining time: ' + str(round(end - start, 1)) + ' seconds')
        for i in range(len(pred)):
            countYPredict[i] = countYPredict[i] + pred[i]

    finalPredict = []
    for i in range(len(yt)):
        if countYPredict[i] >= N:
            finalPredict.append(1)
        else:
            finalPredict.append(0)

    count = 0
    for i in range(len(finalPredict)):
        if finalPredict[i] == yt[i]:
            count += 1
    print()
    print('After combining the classifiers by bagging: ')
    print('\taccuracy: ' + str(round(count / float(len(yt)) * 100, 2)) + '%')
Ejemplo n.º 3
0
y = []
for i in range(len(dataset)):
    y.append(float(dataset[i][len(dataset[0])-1]))
    t = []
    for j in range(len(dataset[0])-1):
        t.append(float(dataset[i][j]))
    x.append(t)


df1 = pd.read_csv('ex2data1test.csv', delimiter=',')
dataset1 = np.array(df1).tolist()

xt = []
yt = []
for i in range(len(dataset1)):
    yt.append(float(dataset1[i][len(dataset1[0])-1]))
    t = []
    for j in range(len(dataset1[0])-1):
        t.append(float(dataset1[i][j]))
    xt.append(t)

from RF import RF
rf = RF(B=5, Bagging = True)
rf.train(x, y)
yPredict = rf.predict(xt)

count = 0
for i in range(len(yPredict)):
    if yPredict[i] == yt[i]:
        count +=1
print(count/float(len(yt)))
Ejemplo n.º 4
0
def bagging(N):
    # training data
    xRF = RFData.x
    y = RFData.y
    # testing data
    xtRF = RFData.xt
    yt = RFData.yt

    xKNN = knnData.main()[0]  # training X
    xtKNN = knnData.main()[2]  # test X

    countYPredict = []
    for i in range(len(yt)):
        countYPredict.append(0)

    for k in range(N):  # number of bootstrapping
        x_RF = []
        y_RF = []
        x_KNN = []
        y_KNN = []
        # bootstrapping
        for i in range(int(len(xRF) * 0.6)):
            r = randint(0, len(xRF) - 1)
            tRF = []
            for j in range(len(xRF[0]) - 1):
                tRF.append(xRF[r][j])
            # for RF, data duplicates are not allowed
            if tRF not in x_RF:
                x_RF.append(tRF)
                y_RF.append(y[r])
            x_KNN.append(xKNN[r])
            y_KNN.append(y[r])

        # RF
        start = time.time()
        rf = RF(B=TreeNum, Bagging=isBagging)
        rf.train(x_RF, y_RF)
        pred = rf.predict(xtRF)
        end = time.time()
        count = 0
        for i in range(len(pred)):
            if pred[i] == yt[i]:
                count += 1
        print("RF, trial #" + str(k + 1) + ": ")
        print("\taccuracy: " + str(round(count / float(len(yt)) * 100, 2)) + "%")
        print("\ttraining time: " + str(round(end - start, 1)) + " seconds")
        for i in range(len(pred)):
            countYPredict[i] = countYPredict[i] + pred[i]

        # KNN
        start = time.time()
        pred = knn.main(x_KNN, y_KNN, xtKNN, kInKnn)
        end = time.time()
        count = 0
        for i in range(len(pred)):
            if pred[i] == yt[i]:
                count += 1
        print("KNN, trial #" + str(k + 1) + ": ")
        print("\taccuracy: " + str(round(count / float(len(yt)) * 100, 2)) + "%")
        print("\ttraining time: " + str(round(end - start, 1)) + " seconds")
        for i in range(len(pred)):
            countYPredict[i] = countYPredict[i] + pred[i]

    finalPredict = []
    for i in range(len(yt)):
        if countYPredict[i] >= N:
            finalPredict.append(1)
        else:
            finalPredict.append(0)

    count = 0
    for i in range(len(finalPredict)):
        if finalPredict[i] == yt[i]:
            count += 1
    print()
    print("After combining the classifiers by bagging: ")
    print("\taccuracy: " + str(round(count / float(len(yt)) * 100, 2)) + "%")