def generateSolutions(self, queriesFile, solutionsFile):
     # get queries set from queires file
     querySet = getInputData(queriesFile)
     
     # open solutions file for writing
     outfile = open(solutionsFile, 'w')
     
     # loop over each queries set
     for i in range(0, len(querySet)):
         # get prediction for each record in queries set
         prediction = mdclassify(querySet[i], self.tree)
         
         if ('<=50K' in prediction):
             prediction = '<=50K'
         else:
             prediction = '>50K'
         
         # format a string for output
         tempstr = "tst" + str(i + 1) + "," + prediction
         
         # write to file
         print(tempstr, file = outfile)
     
     # close output file
     outfile.close()
    def generateSolutions(self, queriesFile, solutionsFile):
        # get queries set from queires file
        querySet = getInputData(queriesFile)

        # open solutions file for writing
        outfile = open(solutionsFile, 'w')

        # loop over each queries set
        for i in range(0, len(querySet)):
            # get prediction for each record in queries set
            prediction = mdclassify(querySet[i], self.tree)

            if ('<=50K' in prediction):
                prediction = '<=50K'
            else:
                prediction = '>50K'

            # format a string for output
            tempstr = "tst" + str(i + 1) + "," + prediction

            # write to file
            print(tempstr, file=outfile)

        # close output file
        outfile.close()
예제 #3
0
 def generateSolutions(self, queriesFile, solutionsFile):
     # get queries set from queires file
     querySet = getInputData(queriesFile)
     
     # open solutions file for writing
     outfile = open(solutionsFile, 'w')
     
     # loop over each queries set
     for i in range(0, len(querySet)):
         # get prediction for each record in queries set
         prediction = self.knnestimate(querySet[i], k = 5)
         
         # format a string for output
         tempstr = "tst" + str(i + 1) + "," + prediction
         
         # write to file
         print(tempstr, file = outfile)
     
     # close output file
     outfile.close()
예제 #4
0
# Salary Prediction System
# to classify employees' salaries.
#
# Implemented Decision Tree and KNN models.
# Solutions are generated using Dicision Tree.
'''

from classes.knn import knn
from functions.crossvalidate import crossValidate
from functions.function import getInputData

if __name__ == '__main__':
    print("Start processing...")
    ''' load training data '''
    # get training data from data source file
    sourceData = getInputData('./../data/trainingset.txt')
    ''' cross validate training data '''
    # split source data to training set and test set randomly by multiple times
    # calculate the mean accuracy
    # select the model that generates the highest accuracy

    print("cross validating training data...")

    # knn
    correct = crossValidate(knn, sourceData, 10, 0.05)
    print("knn correct: " + str(correct))

    # decision tree
    #correct = crossValidate2(buildtree, sourceData, 10, 0.05)
    #print("knn correct: " + str(correct))
    ''' export predictions '''
예제 #5
0
# Implemented Decision Tree and KNN models.
# Solutions are generated using Dicision Tree.
'''


from classes.knn import knn
from functions.crossvalidate import crossValidate
from functions.function import getInputData


if __name__ == '__main__':
    print ("Start processing...")
    
    ''' load training data '''
    # get training data from data source file
    sourceData = getInputData('./../data/trainingset.txt')
    
    ''' cross validate training data '''
    # split source data to training set and test set randomly by multiple times
    # calculate the mean accuracy
    # select the model that generates the highest accuracy
    
    print ("cross validating training data...")
    
    # knn
    correct = crossValidate(knn, sourceData, 10, 0.05)
    print("knn correct: " + str(correct))
    
    # decision tree
    #correct = crossValidate2(buildtree, sourceData, 10, 0.05)
    #print("knn correct: " + str(correct))