def main():
    trainData = pd.read_csv('CrimeClassification/Dataset/train-2.csv')
    classesMap = dm.mapClasses(trainData)
    print trainData.info()
    print(classesMap)
    cleanedTrainData,normalizationValues = dm.cleanTrainData(trainData,classesMap)
    print(cleanedTrainData.info())
    data = cleanedTrainData.values
    np.random.shuffle(data.astype(np.float64))
    Ytrain = binarizeLabels(data[0:,0])
    Xtrain = data[0:,1:]
    model = trainModel(Xtrain,Ytrain)
    output = testProbaModel(model,Xtrain)


    testData = pd.read_csv('CrimeClassification/Dataset/test-2.csv')
    cleanedTestData = dm.cleanTestData(testData,normalizationValues)
    print(cleanedTestData.info())
    output = testProbaModel(model,cleanedTestData.values[:,1:])

    result = np.c_[cleanedTestData.values[:,0].astype(int),output]
    outputVec = sorted(classesMap, key=classesMap.__getitem__)
    outputVec.insert(0,'Id')
    dataFrameResults = pd.DataFrame(result,columns=outputVec)
    dataFrameResults['Id']=dataFrameResults['Id'].astype(int)
    dm.saveResults(dataFrameResults)
def main():
    trainData = pd.read_csv('CrimeClassification/Dataset/train-2.csv')
    classesMap = dm.mapClasses(trainData)
    print trainData.info()
    print(classesMap)
    cleanedTrainData,normalizationValues = dm.cleanTrainData(trainData,classesMap)
    print(cleanedTrainData.info())
    [Xtrain, Ytrain, Xtest,Ytest]=splitData(cleanedTrainData.values)
    model = trainModel(Xtrain,Ytrain)
    Ypred = testModel(model,Xtest)
    confMatrix = da.confusionMatrix(Ypred,Ytest)
    titleCM = da.orderClassesMapKeys(classesMap)
    da.plotConfusionMatrix(confMatrix,titleCM)
    print (da.f1Score(Ypred,Ytest))