Exemple #1
0
        {
            "tagName": tagName,
            "max": int(max),
            "min": int(min),
            "mean" : int(mean),
            "std": float(std),
            "loanType": loanType,
            "version": version
        }
    )


# Step11: get pre-processed category data
categoryInfo = mongoDataProcessor.getCategoryInfo()
input = (npCategoryHeader, npCategoryData)
preCategoryHeader, preCategoryData = bpModelDataPrcocessor.getPreProcessedFlattenCategoryData(input, categoryInfo)
with open('Data/PreProcessedFlattenCategpryData.csv', 'w') as f:
    print >> f,  ','.join(preCategoryHeader).encode('utf-8')
    np.savetxt(f, preCategoryData, delimiter=',', fmt='%d')  # please note the fmt arg


CategoryTagHeader = dataBase.CategoryTagHeader
CategoryTagHeader.remove({"loanType":loanType})
for index in range(len(preCategoryHeader)):
    CategoryTagHeader.insert_one(
    {
        "tagName": preCategoryHeader[index],
        "updateTime": int(time.time()),
        "version": version,
        "loanType": loanType,
        "index" : index
Exemple #2
0
        categoryTagIndex.append(header.index(tagName))
    else:
        categoryTagIndex.append(-1)

for row in data:
    rowData = []
    for index in categoryTagIndex:
        if index >= 0:
            rowData.append(row[index])
        else:
            rowData.append(missingValue)
    categoryDataTmp.append(rowData)

categoryData = np.array(categoryDataTmp, dtype=np.int)
categoryInfo = mongoDataProcessor.getCategoryInfo()
flattenCategoryData = bpModelDataPrcocessor.getPreProcessedFlattenCategoryData(
    (categoryHeader, categoryData), categoryInfo)[1]
with open('Data/ResultFlattenCategoryData.csv', 'w') as f:
    f.write(','.join(flattenCategoryHeader) + '\n')
    np.savetxt(f, flattenCategoryData, delimiter=',', fmt='%d')

# Step4: get flatten numerical data
numStats = dataBase.NumStats
meanStdMap = {}
mscollection = numStats.find({})
for item in mscollection:
    std = item['std']
    mean = item['mean']
    tagName = item['tagName']
    meanStdMap[tagName] = (mean, std)
numericalTagIndex = []
for tagName in numericalHeader:
Exemple #3
0
#replace missing value(-9) with mean, just for numerical data
npNumericalData = util.replaceMissingValueWithMean(npNumericalData)
np.savetxt('./Data/replaceMissingNumericalData.csv',npNumericalData,header=','.join(numericalHeader).encode('utf-8'),delimiter=',', fmt='%.4f',comments='')

#delete the column with constant values(std)
categoryHeader, npCategoryData = util.deleteColumnWithConstantValue((categoryHeader, npCategoryData))
numericalHeader, npNumericalData = util.deleteColumnWithConstantValue((numericalHeader, npNumericalData))
np.savetxt('./Data/reducedCategoryData.csv',npCategoryData,header=','.join(categoryHeader).encode('utf-8'),delimiter=',', fmt='%d',comments='')
np.savetxt('./Data/reducedNumericalData.csv',npNumericalData,header=','.join(numericalHeader).encode('utf-8'),delimiter=',', fmt='%.4f',comments='')

# z-score format
zscoreNumericalData = preprocessing.scale(npNumericalData)
np.savetxt('./Data/zscoreNumericalData.csv',zscoreNumericalData,header=','.join(numericalHeader).encode('utf-8'),delimiter=',', fmt='%.4f',comments='')

# bitformat for categoryData
preCategoryHeader, preCategoryData = bpModelDataPrcocessor.getPreProcessedFlattenCategoryData((categoryHeader, npCategoryData), mongoDataProcessor.getCategoryInfo())
np.savetxt('./Data/preCategoryData.csv',preCategoryData,header=','.join(preCategoryHeader).encode('utf-8'),delimiter=',', fmt='%d',comments='')

# PCA+KMeans for numericalData
PCAResultMap, transResultMap = bpModelDataPrcocessor.getAssociatedMapFromPCA((numericalHeader, zscoreNumericalData))
kmLists = bpModelDataPrcocessor.getKMeansListByCalculation((numericalHeader, zscoreNumericalData), (PCAResultMap, transResultMap), path='./Figures/')
bpModelDataPrcocessor.saveKMeansListToFile('./KMeansModel/', numericalHeader)
preNumericalHeader, preNumericalData = bpModelDataPrcocessor.getPreProcessedFlattenNumericalData((numericalHeader, zscoreNumericalData), dropTags=[])
np.savetxt('./Data/preNumericalData.csv',preNumericalData,header=','.join(preNumericalHeader).encode('utf-8'),delimiter=',', fmt='%d',comments='')

# ANN
bPModelTrainer = BPModelTrainer(
    flattenCategoryHeader=preCategoryHeader,
    flattenCategoryData=preCategoryData,
    flattenNumericalHeader=preNumericalHeader,
    flattenNumericalData=preNumericalData,
Exemple #4
0
    else:
        categoryTagIndex.append(-1)

for row in data:
    rowData = []
    for index in categoryTagIndex:
        if index >= 0:
            rowData.append(row[index])
        else:
            rowData.append(missingValue)
    categoryDataTmp.append(rowData)

categoryData = np.array(categoryDataTmp, dtype=np.int)
categoryInfo = mongoDataProcessor.getCategoryInfo()
flattenCategoryData = bpModelDataPrcocessor.getPreProcessedFlattenCategoryData(
    (categoryHeader, categoryData), categoryInfo
)[1]
with open("Data/ResultFlattenCategoryData.csv", "w") as f:
    f.write(",".join(flattenCategoryHeader) + "\n")
    np.savetxt(f, flattenCategoryData, delimiter=",", fmt="%d")

# Step4: get flatten numerical data
numStats = dataBase.NumStats
meanStdMap = {}
mscollection = numStats.find({})
for item in mscollection:
    std = item["std"]
    mean = item["mean"]
    tagName = item["tagName"]
    meanStdMap[tagName] = (mean, std)
numericalTagIndex = []
Exemple #5
0
           header=','.join(numericalHeader).encode('utf-8'),
           delimiter=',',
           fmt='%.4f',
           comments='')

# z-score format
zscoreNumericalData = preprocessing.scale(npNumericalData)
np.savetxt('./Data/zscoreNumericalData.csv',
           zscoreNumericalData,
           header=','.join(numericalHeader).encode('utf-8'),
           delimiter=',',
           fmt='%.4f',
           comments='')

# bitformat for categoryData
preCategoryHeader, preCategoryData = bpModelDataPrcocessor.getPreProcessedFlattenCategoryData(
    (categoryHeader, npCategoryData), mongoDataProcessor.getCategoryInfo())
np.savetxt('./Data/preCategoryData.csv',
           preCategoryData,
           header=','.join(preCategoryHeader).encode('utf-8'),
           delimiter=',',
           fmt='%d',
           comments='')

# PCA+KMeans for numericalData
PCAResultMap, transResultMap = bpModelDataPrcocessor.getAssociatedMapFromPCA(
    (numericalHeader, zscoreNumericalData))
kmLists = bpModelDataPrcocessor.getKMeansListByCalculation(
    (numericalHeader, zscoreNumericalData), (PCAResultMap, transResultMap),
    path='./Figures/')
bpModelDataPrcocessor.saveKMeansListToFile('./KMeansModel/', numericalHeader)
preNumericalHeader, preNumericalData = bpModelDataPrcocessor.getPreProcessedFlattenNumericalData(