#delete the column with constant values(std) categoryHeader, npCategoryData = util.deleteColumnWithConstantValue((categoryHeader, npCategoryData)) numericalHeader, npNumericalData = util.deleteColumnWithConstantValue((numericalHeader, npNumericalData)) np.savetxt('./Data/reducedCategoryData.csv',npCategoryData,header=','.join(categoryHeader).encode('utf-8'),delimiter=',', fmt='%d',comments='') np.savetxt('./Data/reducedNumericalData.csv',npNumericalData,header=','.join(numericalHeader).encode('utf-8'),delimiter=',', fmt='%.4f',comments='') # z-score format zscoreNumericalData = preprocessing.scale(npNumericalData) np.savetxt('./Data/zscoreNumericalData.csv',zscoreNumericalData,header=','.join(numericalHeader).encode('utf-8'),delimiter=',', fmt='%.4f',comments='') # bitformat for categoryData preCategoryHeader, preCategoryData = bpModelDataPrcocessor.getPreProcessedFlattenCategoryData((categoryHeader, npCategoryData), mongoDataProcessor.getCategoryInfo()) np.savetxt('./Data/preCategoryData.csv',preCategoryData,header=','.join(preCategoryHeader).encode('utf-8'),delimiter=',', fmt='%d',comments='') # PCA+KMeans for numericalData PCAResultMap, transResultMap = bpModelDataPrcocessor.getAssociatedMapFromPCA((numericalHeader, zscoreNumericalData)) kmLists = bpModelDataPrcocessor.getKMeansListByCalculation((numericalHeader, zscoreNumericalData), (PCAResultMap, transResultMap), path='./Figures/') bpModelDataPrcocessor.saveKMeansListToFile('./KMeansModel/', numericalHeader) preNumericalHeader, preNumericalData = bpModelDataPrcocessor.getPreProcessedFlattenNumericalData((numericalHeader, zscoreNumericalData), dropTags=[]) np.savetxt('./Data/preNumericalData.csv',preNumericalData,header=','.join(preNumericalHeader).encode('utf-8'),delimiter=',', fmt='%d',comments='') # ANN bPModelTrainer = BPModelTrainer( flattenCategoryHeader=preCategoryHeader, flattenCategoryData=preCategoryData, flattenNumericalHeader=preNumericalHeader, flattenNumericalData=preNumericalData, flattenTargetHeader=targetHeader, flattenTargetData=targetData ) # transport targetData to discrete value
CategoryTagHeader = dataBase.CategoryTagHeader CategoryTagHeader.remove({"loanType":loanType}) for index in range(len(preCategoryHeader)): CategoryTagHeader.insert_one( { "tagName": preCategoryHeader[index], "updateTime": int(time.time()), "version": version, "loanType": loanType, "index" : index }) # Step12: get pre-processed numerical data sortedMap, trans = bpModelDataPrcocessor.getAssociatedMapFromPCA((npNumericalHeader, npNumericalData)) kmLists = bpModelDataPrcocessor.getKMeansListByCalculation((npNumericalHeader, npNumericalData), (sortedMap, trans), path='./Figures/') bpModelDataPrcocessor.saveKMeansListToFile('./KMeansModel/', npNumericalHeader) preNumericalHeader, preNumericalData = bpModelDataPrcocessor.getPreProcessedFlattenNumericalData((npNumericalHeader, npNumericalData), dropTags=['livePlace', 'occupation', 'brandModel']) with open('Data/PreProcessedFlattenNumericalData.csv', 'w') as f: print >> f, ','.join(preNumericalHeader).encode('utf-8') np.savetxt(f, preNumericalData, delimiter=',', fmt='%d') # please note the fmt arg NumericalTagHeader = dataBase.NumericalTagHeader NumericalTagHeader.remove({"loanType":loanType}) for index in range(len(preNumericalHeader)): NumericalTagHeader.insert_one( { "tagName": preNumericalHeader[index], "updateTime": int(time.time()),
delimiter=',', fmt='%.4f', comments='') # bitformat for categoryData preCategoryHeader, preCategoryData = bpModelDataPrcocessor.getPreProcessedFlattenCategoryData( (categoryHeader, npCategoryData), mongoDataProcessor.getCategoryInfo()) np.savetxt('./Data/preCategoryData.csv', preCategoryData, header=','.join(preCategoryHeader).encode('utf-8'), delimiter=',', fmt='%d', comments='') # PCA+KMeans for numericalData PCAResultMap, transResultMap = bpModelDataPrcocessor.getAssociatedMapFromPCA( (numericalHeader, zscoreNumericalData)) kmLists = bpModelDataPrcocessor.getKMeansListByCalculation( (numericalHeader, zscoreNumericalData), (PCAResultMap, transResultMap), path='./Figures/') bpModelDataPrcocessor.saveKMeansListToFile('./KMeansModel/', numericalHeader) preNumericalHeader, preNumericalData = bpModelDataPrcocessor.getPreProcessedFlattenNumericalData( (numericalHeader, zscoreNumericalData), dropTags=[]) np.savetxt('./Data/preNumericalData.csv', preNumericalData, header=','.join(preNumericalHeader).encode('utf-8'), delimiter=',', fmt='%d', comments='') # ANN bPModelTrainer = BPModelTrainer(flattenCategoryHeader=preCategoryHeader,