Exemple #1
0
        
        if myForest.isTrained:
            Xtest,yTest,XtestID = myForest.getDataFromFile(train_test_file)
            finalPredictions = myForest.predict(Xtest)
            myForest.writeToFile(XtestID,finalPredictions,'output.txt')
            print("Accuracy is: " ,sum(finalPredictions==yTest)/len(yTest))
        else:
            print("Untrained model being tested")
  
#train train-data.txt adaboost_model.txt adaboost
#test test-data.txt adaboost_model.txt adaboost    
if model == 'adaboost' :
    
    if trainOrTest == 'train':
        myBoost = AdaBoost(300,verbose = False)
        TrainX,TrainY,TrainXID = myBoost.getDataFromFile(train_test_file)
        myBoost.train(TrainX,TrainY)
        pk.dump(myBoost,open(model_file,'wb'))
        
    if trainOrTest == 'test':
        try:
            myBoost = pk.load(open(model_file,'rb'))
        except:
            print("output file has not been generated")
        
        if myBoost.isTrained:
            Xtest,yTest,XtestID = myBoost.getDataFromFile(train_test_file)
            finalPredictions = myBoost.predict(Xtest)
            myBoost.writeToFile(XtestID,finalPredictions,'output.txt')
            print("Accuracy is: " ,sum(finalPredictions==yTest)/len(yTest))
        else:
Exemple #2
0
        myForest.trainForest(TrainX, TrainY)
        Xtest, yTest, XtestID = myForest.getDataFromFile('test-data.txt')
        finalPredictions = myForest.predict(Xtest)
        baggpropAccu.append(sum(finalPredictions == yTest) / len(yTest))

    plt.plot(bagprop, baggpropAccu)
    plt.xlabel("Variation Of Bagging Proportion")
    plt.ylabel("Accuracy")
    plt.title("Accuracy vs Bagging Proportion")

    #########################Adaboost################################################
    numTreeAccu = []
    numTrees = list(range(5, 200, 5))
    for numtree in numTrees:
        myBoost = AdaBoost(nTrees=numtree)
        TrainX, TrainY, TrainXID = myBoost.getDataFromFile('train-data.txt')

        myBoost.train(TrainX, TrainY)
        Xtest, yTest, XtestID = myBoost.getDataFromFile('test-data.txt')
        finalPredictions = myBoost.predict(Xtest)
        numTreeAccu.append(sum(finalPredictions == yTest) / len(yTest))

    plt.plot(numTrees, numTreeAccu)

    ############################################################################

    numTreeAccu = []
    samples = []
    for i in range(5000, 36000, 5000):
        myBoost = AdaBoost(nTrees=200)
        TrainX, TrainY, TrainXID = myBoost.getDataFromFile('train-data.txt')