def main(): trainData = pd.read_csv("CrimeClassification/Dataset/train-2.csv") trainData.info() classesMap = dm.mapClasses(trainData) cleanedTrainData = dm.cleanData(trainData, classesMap) cleanedTrainData.describe() heatMapXY(cleanedTrainData, "Global heatMap of crimes") heatMapPerCategory(cleanedTrainData, classesMap) histogramOfCategories(cleanedTrainData, classesMap) textHistogram(cleanedTrainData, classesMap)
def main(): trainData = pd.read_csv("CrimeClassification/Dataset/train-2.csv") classesMap = dm.mapClasses(trainData) print trainData.info() print (classesMap) cleanedTrainData = dm.cleanData(trainData, classesMap) testData = pd.read_csv("CrimeClassification/Dataset/test-2.csv") cleanedTestData = dm.cleanTestData(testData) print (cleanedTrainData.info()) model = trainModel(cleanedTrainData.values) result = np.c_[cleanedTestData.values[:, 0].astype(int), output] outputVec = sorted(classesMap, key=classesMap.__getitem__) outputVec.insert(0, "Id") dataFrameResults = pd.DataFrame(result, columns=outputVec) dataFrameResults["Id"] = dataFrameResults["Id"].astype(int) dm.saveResults(dataFrameResults)
def main(): pd.set_option("display.precision",3) trainData = pd.read_csv('CrimeClassification/Dataset/train01-tsc.csv') classesMap = dm.mapClasses(trainData) print(classesMap) cleanedTrainData = dm.cleanData(trainData,classesMap) testData = pd.read_csv('CrimeClassification/Dataset/test-tsc.csv') cleanedTestData = dm.cleanTestData(testData) print(cleanedTrainData.info()) model = trainModel(cleanedTrainData.values) print(cleanedTestData.info()) output = testProbaModel(model,cleanedTestData.values) result = np.c_[cleanedTestData.values[:,0].astype(int),output] outputVec = sorted(classesMap, key=classesMap.__getitem__) outputVec.insert(0,'Id') dataFrameResults = pd.DataFrame(result,columns=outputVec) dataFrameResults['Id']=dataFrameResults['Id'].astype(int)