def main(): trainData = pd.read_csv('CrimeClassification/Dataset/train-2.csv') classesMap = dm.mapClasses(trainData) print trainData.info() print(classesMap) cleanedTrainData,normalizationValues = dm.cleanTrainData(trainData,classesMap) print(cleanedTrainData.info()) data = cleanedTrainData.values np.random.shuffle(data.astype(np.float64)) Ytrain = binarizeLabels(data[0:,0]) Xtrain = data[0:,1:] model = trainModel(Xtrain,Ytrain) output = testProbaModel(model,Xtrain) testData = pd.read_csv('CrimeClassification/Dataset/test-2.csv') cleanedTestData = dm.cleanTestData(testData,normalizationValues) print(cleanedTestData.info()) output = testProbaModel(model,cleanedTestData.values[:,1:]) result = np.c_[cleanedTestData.values[:,0].astype(int),output] outputVec = sorted(classesMap, key=classesMap.__getitem__) outputVec.insert(0,'Id') dataFrameResults = pd.DataFrame(result,columns=outputVec) dataFrameResults['Id']=dataFrameResults['Id'].astype(int) dm.saveResults(dataFrameResults)
def main(): trainData = pd.read_csv('CrimeClassification/Dataset/train-2.csv') classesMap = dm.mapClasses(trainData) print trainData.info() print(classesMap) cleanedTrainData,normalizationValues = dm.cleanTrainData(trainData,classesMap) print(cleanedTrainData.info()) [Xtrain, Ytrain, Xtest,Ytest]=splitData(cleanedTrainData.values) model = trainModel(Xtrain,Ytrain) Ypred = testModel(model,Xtest) confMatrix = da.confusionMatrix(Ypred,Ytest) titleCM = da.orderClassesMapKeys(classesMap) da.plotConfusionMatrix(confMatrix,titleCM) print (da.f1Score(Ypred,Ytest))