예제 #1
0
def main():
    trainData = pd.read_csv('CrimeClassification/Dataset/train-2.csv')
    classesMap = dm.mapClasses(trainData)
    print trainData.info()
    print(classesMap)
    cleanedTrainData,normalizationValues = dm.cleanTrainData(trainData,classesMap)
    print(cleanedTrainData.info())
    data = cleanedTrainData.values
    np.random.shuffle(data.astype(np.float64))
    Ytrain = binarizeLabels(data[0:,0])
    Xtrain = data[0:,1:]
    model = trainModel(Xtrain,Ytrain)
    output = testProbaModel(model,Xtrain)


    testData = pd.read_csv('CrimeClassification/Dataset/test-2.csv')
    cleanedTestData = dm.cleanTestData(testData,normalizationValues)
    print(cleanedTestData.info())
    output = testProbaModel(model,cleanedTestData.values[:,1:])

    result = np.c_[cleanedTestData.values[:,0].astype(int),output]
    outputVec = sorted(classesMap, key=classesMap.__getitem__)
    outputVec.insert(0,'Id')
    dataFrameResults = pd.DataFrame(result,columns=outputVec)
    dataFrameResults['Id']=dataFrameResults['Id'].astype(int)
    dm.saveResults(dataFrameResults)
예제 #2
0
def main():
    trainData = pd.read_csv("CrimeClassification/Dataset/train-2.csv")
    classesMap = dm.mapClasses(trainData)
    print trainData.info()
    print (classesMap)
    cleanedTrainData = dm.cleanData(trainData, classesMap)
    testData = pd.read_csv("CrimeClassification/Dataset/test-2.csv")
    cleanedTestData = dm.cleanTestData(testData)
    print (cleanedTrainData.info())
    model = trainModel(cleanedTrainData.values)
    result = np.c_[cleanedTestData.values[:, 0].astype(int), output]
    outputVec = sorted(classesMap, key=classesMap.__getitem__)
    outputVec.insert(0, "Id")
    dataFrameResults = pd.DataFrame(result, columns=outputVec)
    dataFrameResults["Id"] = dataFrameResults["Id"].astype(int)
    dm.saveResults(dataFrameResults)
예제 #3
0
def main():
    pd.set_option("display.precision",3)
    trainData = pd.read_csv('CrimeClassification/Dataset/train001-tsc.csv')
    classesMap = dm.mapClasses(trainData)
    print(classesMap)
    cleanedTrainData = dm.cleanData(trainData,classesMap)
    testData = pd.read_csv('CrimeClassification/Dataset/test-tsc.csv')
    cleanedTestData = dm.cleanTestData(testData)
    print(cleanedTrainData.info())
    model = trainModel(cleanedTrainData.values)
    print(cleanedTestData.info())
    output = testProbaModel(model,cleanedTestData.values)
    result = np.c_[cleanedTestData.values[:,0].astype(int),output]
    outputVec = sorted(classesMap, key=classesMap.__getitem__)
    outputVec.insert(0,'Id')

    dataFrameResults = pd.DataFrame(result,columns=outputVec)
    dataFrameResults['Id']=dataFrameResults['Id'].astype(int)
    dm.saveResults(dataFrameResults)