def main(): trainData = pd.read_csv('CrimeClassification/Dataset/train-2.csv') classesMap = dm.mapClasses(trainData) print trainData.info() print(classesMap) cleanedTrainData,normalizationValues = dm.cleanTrainData(trainData,classesMap) print(cleanedTrainData.info()) data = cleanedTrainData.values np.random.shuffle(data.astype(np.float64)) Ytrain = binarizeLabels(data[0:,0]) Xtrain = data[0:,1:] model = trainModel(Xtrain,Ytrain) output = testProbaModel(model,Xtrain) testData = pd.read_csv('CrimeClassification/Dataset/test-2.csv') cleanedTestData = dm.cleanTestData(testData,normalizationValues) print(cleanedTestData.info()) output = testProbaModel(model,cleanedTestData.values[:,1:]) result = np.c_[cleanedTestData.values[:,0].astype(int),output] outputVec = sorted(classesMap, key=classesMap.__getitem__) outputVec.insert(0,'Id') dataFrameResults = pd.DataFrame(result,columns=outputVec) dataFrameResults['Id']=dataFrameResults['Id'].astype(int) dm.saveResults(dataFrameResults)
def main(): trainData = pd.read_csv("CrimeClassification/Dataset/train-2.csv") classesMap = dm.mapClasses(trainData) print trainData.info() print (classesMap) cleanedTrainData = dm.cleanData(trainData, classesMap) testData = pd.read_csv("CrimeClassification/Dataset/test-2.csv") cleanedTestData = dm.cleanTestData(testData) print (cleanedTrainData.info()) model = trainModel(cleanedTrainData.values) result = np.c_[cleanedTestData.values[:, 0].astype(int), output] outputVec = sorted(classesMap, key=classesMap.__getitem__) outputVec.insert(0, "Id") dataFrameResults = pd.DataFrame(result, columns=outputVec) dataFrameResults["Id"] = dataFrameResults["Id"].astype(int) dm.saveResults(dataFrameResults)
def main(): pd.set_option("display.precision",3) trainData = pd.read_csv('CrimeClassification/Dataset/train01-tsc.csv') classesMap = dm.mapClasses(trainData) print(classesMap) cleanedTrainData = dm.cleanData(trainData,classesMap) testData = pd.read_csv('CrimeClassification/Dataset/test-tsc.csv') cleanedTestData = dm.cleanTestData(testData) print(cleanedTrainData.info()) model = trainModel(cleanedTrainData.values) print(cleanedTestData.info()) output = testProbaModel(model,cleanedTestData.values) result = np.c_[cleanedTestData.values[:,0].astype(int),output] outputVec = sorted(classesMap, key=classesMap.__getitem__) outputVec.insert(0,'Id') dataFrameResults = pd.DataFrame(result,columns=outputVec) dataFrameResults['Id']=dataFrameResults['Id'].astype(int)