def generateSolutions(self, queriesFile, solutionsFile): # get queries set from queires file querySet = getInputData(queriesFile) # open solutions file for writing outfile = open(solutionsFile, 'w') # loop over each queries set for i in range(0, len(querySet)): # get prediction for each record in queries set prediction = mdclassify(querySet[i], self.tree) if ('<=50K' in prediction): prediction = '<=50K' else: prediction = '>50K' # format a string for output tempstr = "tst" + str(i + 1) + "," + prediction # write to file print(tempstr, file = outfile) # close output file outfile.close()
def generateSolutions(self, queriesFile, solutionsFile): # get queries set from queires file querySet = getInputData(queriesFile) # open solutions file for writing outfile = open(solutionsFile, 'w') # loop over each queries set for i in range(0, len(querySet)): # get prediction for each record in queries set prediction = mdclassify(querySet[i], self.tree) if ('<=50K' in prediction): prediction = '<=50K' else: prediction = '>50K' # format a string for output tempstr = "tst" + str(i + 1) + "," + prediction # write to file print(tempstr, file=outfile) # close output file outfile.close()
def generateSolutions(self, queriesFile, solutionsFile): # get queries set from queires file querySet = getInputData(queriesFile) # open solutions file for writing outfile = open(solutionsFile, 'w') # loop over each queries set for i in range(0, len(querySet)): # get prediction for each record in queries set prediction = self.knnestimate(querySet[i], k = 5) # format a string for output tempstr = "tst" + str(i + 1) + "," + prediction # write to file print(tempstr, file = outfile) # close output file outfile.close()
# Salary Prediction System # to classify employees' salaries. # # Implemented Decision Tree and KNN models. # Solutions are generated using Dicision Tree. ''' from classes.knn import knn from functions.crossvalidate import crossValidate from functions.function import getInputData if __name__ == '__main__': print("Start processing...") ''' load training data ''' # get training data from data source file sourceData = getInputData('./../data/trainingset.txt') ''' cross validate training data ''' # split source data to training set and test set randomly by multiple times # calculate the mean accuracy # select the model that generates the highest accuracy print("cross validating training data...") # knn correct = crossValidate(knn, sourceData, 10, 0.05) print("knn correct: " + str(correct)) # decision tree #correct = crossValidate2(buildtree, sourceData, 10, 0.05) #print("knn correct: " + str(correct)) ''' export predictions '''
# Implemented Decision Tree and KNN models. # Solutions are generated using Dicision Tree. ''' from classes.knn import knn from functions.crossvalidate import crossValidate from functions.function import getInputData if __name__ == '__main__': print ("Start processing...") ''' load training data ''' # get training data from data source file sourceData = getInputData('./../data/trainingset.txt') ''' cross validate training data ''' # split source data to training set and test set randomly by multiple times # calculate the mean accuracy # select the model that generates the highest accuracy print ("cross validating training data...") # knn correct = crossValidate(knn, sourceData, 10, 0.05) print("knn correct: " + str(correct)) # decision tree #correct = crossValidate2(buildtree, sourceData, 10, 0.05) #print("knn correct: " + str(correct))