Example #1
0
            offset += 100
        elif key == 'extraGuarantee' and checkMap[key] >= 0:
            offset += 50
    return offset


dbSource = MongoClient(dbAddressIP, 27017)
dataBase = dbSource.RANK
bpModelDataPrcocessor = BPModelDataProcessor()

# Step1: Get data from database
# tmp_xxx includes the information
# 1001: personal loan
# 1019: car loan
# 1018: house loan
mongoDataProcessor = MongoDataProcessor(['1018'], address=dbAddressIP)
header, data = mongoDataProcessor.getFlattenTagData(
    computeNeeded=True,
    fromTraining=False)  # get only those records which need computation
data = util.getFixedData(data)
projectIds = mongoDataProcessor.getProjectId()
appIds = mongoDataProcessor.getAppId()
itemTypes = mongoDataProcessor.getItemType()
modelComputed = mongoDataProcessor.getModelComputed()
riskItemComputed = mongoDataProcessor.getRiskItemComputed()
if len(data) == 0 or len(projectIds) == 0 or appIds == 0:
    util.printToFile(
        str(time.time()) + ': no increamental data attached!', logPath)
    sys.exit()

# Step2: Reassemble headers
Example #2
0
sys.path.append("../../Commons/Utils")

from BPModelDataProcessor import BPModelDataProcessor
from MongoDataProcessor import MongoDataProcessor
from BPModelTrainer import BPModelTrainer
import Utility as util
import numpy as np
import time
from pymongo import MongoClient
from ZScaleNormalizer import ZScaleNormalizer
loanType = 'CarLoan'
version = 1.0
mongoAddress = ['192.168.1.125','192.168.1.126','192.168.1.127']
dbSource = MongoClient(mongoAddress, 27017)
dataBase = dbSource.RANK
mongoDataProcessor = MongoDataProcessor('1019', address=mongoAddress)
bpModelDataPrcocessor = BPModelDataProcessor()
logPath = './log.txt'
# Step1: get all tags list
allTags = mongoDataProcessor.getMetaTagList()
util.printToFile(','.join(allTags), logPath, 'w')


# Step2: get Flatten Header/Data
flattenHeader, flattenData = mongoDataProcessor.getFlattenTagData(additionalTags=['creditScore'])
with open('Data/FlattenData.csv', 'w') as f:
    f.write(','.join(flattenHeader)+'\n')
    for dataRow in flattenData:
        print >> f,  ','.join(dataRow)  # encode('utf-8')

# Step3: get Target Header/Data
Example #3
0
# coding=utf-8

import sys
import numpy as np
sys.path.append("../../Commons/DataProcessor")
sys.path.append("../../Commons/ModelDataPreProcessor")
sys.path.append("../../Commons/Utils")
sys.path.append("../../Commons/Normalizer")

from BPModelDataProcessor import BPModelDataProcessor
from MongoDataProcessor import MongoDataProcessor
import Utility as util
from ZScaleNormalizer import ZScaleNormalizer

mongoDataProessorTest = MongoDataProcessor('1019', address='192.168.1.120')
bpModelDataPrcocessor = BPModelDataProcessor()

# get the raw data from MongoDB
flattenHeader, flattenData = mongoDataProessorTest.getFlattenTagData(
    additionalTags=['creditScore'])
categoryHeader, categoryData = mongoDataProessorTest.getFlattenCategoryData()
numericalHeader, numericalData = mongoDataProessorTest.getFlattenNumericalData(
)
targetData = mongoDataProessorTest.getTargetTagData(targetTags=['creditScore'])
targetHead = ['creditScore']
categoryInfo = mongoDataProessorTest.getCategoryInfo()
tagDataByTagGroups = mongoDataProessorTest.getTagDataByTagGroups()

# replace strange data with default value
fixedCategoryData = util.getFixedData(categoryData)
fixedNumericalData = util.getFixedData(numericalData)
Example #4
0
loanType = "CarLoan"
missingValue = "-9"

dbAddressIP = ["192.168.1.125", "192.168.1.126", "192.168.1.127"]  # local
# dbAddressIP = '10.251.255.163' # aliyun

dbSource = MongoClient(dbAddressIP, 27017)
dataBase = dbSource.RANK
bpModelDataPrcocessor = BPModelDataProcessor()

# Step1: Get data from database
# tmp_xxx includes the information
# 1001: personal loan
# 1019: car loan
# 1018: house loan
mongoDataProcessor = MongoDataProcessor(["1001"], address=dbAddressIP)
header, data = mongoDataProcessor.getFlattenTagData(
    computeNeeded=True, fromTraining=False
)  # get only those records which need computation
data = util.getFixedData(data)
projectIds = mongoDataProcessor.getProjectId()
appIds = mongoDataProcessor.getAppId()
itemTypes = mongoDataProcessor.getItemType()
modelComputed = mongoDataProcessor.getModelComputed()
riskItemComputed = mongoDataProcessor.getRiskItemComputed()
if len(data) == 0 or len(projectIds) == 0 or appIds == 0:
    util.printToFile(str(time.time()) + ": no increamental data attached!", logPath)
    sys.exit()

# Step2: Reassemble headers
flattenCategoryHeaderTmp = {}
Example #5
0
import numpy as np
import pandas as pd
import Utility as util

from pymongo import MongoClient
from MongoDataProcessor import MongoDataProcessor
from sklearn import preprocessing
from BPModelDataProcessor import BPModelDataProcessor
from BPModelTrainer import BPModelTrainer

loanType = 'HousingLoan'
version = 1.0
mongoAddress = ['192.168.1.125','192.168.1.126','192.168.1.127']
dbSource = MongoClient(mongoAddress, 27017)
dataBase = dbSource.RANK
mongoDataProcessor = MongoDataProcessor('1018', address=mongoAddress)
bpModelDataPrcocessor = BPModelDataProcessor()

allTags = mongoDataProcessor.getMetaTagList()

# get source data from mongo
# flattenHeader, flattenData = mongoDataProcessor.getFlattenTagData(additionalTags=['creditScore'])

#the test way ,may be not used in the future
flattenData = pd.read_csv('./Data/housing_source.csv')
(row,column)=np.shape(flattenData)
# columnScore = pd.Series(np.random.randint(100,200,row))
# flattenData['creditScore']=columnScore
targetHeader = ['creditScore']
targetData = np.random.randint(120,200,row)
Example #6
0
# coding=utf-8
import sys
sys.path.append("../../Commons/DataProcessor")

from MongoDataProcessor import MongoDataProcessor
mongoDataProcessorTest = MongoDataProcessor('1019', address='192.168.1.120')
'''
#Test for getMetaTagList
allTags = mongoDataProcessorTest.getMetaTagList()
print allTags
'''

#Test for getCategoryInfo
categoryInfo = mongoDataProcessorTest.getCategoryInfo()
for multiSelect in categoryInfo.keys():
    allBinaryTags = categoryInfo[multiSelect]
    print str(multiSelect) + ':'
    indentChar = '\t'
    for tag in allBinaryTags.keys():
        valueGroup = allBinaryTags[tag]
        print indentChar + tag + ':'
        for value in valueGroup.keys():
            name = valueGroup[value]
            print indentChar + '\t', value, ' ' + name
'''
#Test for getFlattenTagData
flattenHeader, flattenData = mongoDataProcessorTest.getFlattenTagData(additionalTags=['creditScore'])
with open('Data/FlattenData.csv', 'w') as f:
    f.write(','.join(flattenHeader)+'\n')
    for dataRow in flattenData:
        print >> f,  ','.join(dataRow)#.encode('utf-8')
Example #7
0
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import ExtraTreesRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn import cross_validation


def VarScore(estimator, x, y):
    sy = estimator.predict(x)
    return np.sum((sy - y)**2)


loanType = 'CarLoan'
version = 1.0
dbSource = MongoClient('192.168.1.120', 27017)
dataBase = dbSource.RANK
mongoDataProcessor = MongoDataProcessor('1019', address='192.168.1.120')
bpModelDataPrcocessor = BPModelDataProcessor()
#get flatten data
flattenHeader, flattenData = mongoDataProcessor.getFlattenTagData(
    additionalTags=['creditScore'])
np.savetxt('./Data/FlattenData.csv',
           flattenData,
           header=','.join(flattenHeader).encode('utf-8'),
           delimiter=',',
           fmt='%s',
           comments='')
# get target data
targetHeader, targetData = mongoDataProcessor.getTargetTagData(
    targetTags=['creditScore'])
# trans targetData to number
targetData = np.array(targetData)
Example #8
0
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import ExtraTreesRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn import cross_validation


def VarScore(estimator, x, y):
    sy = estimator.predict(x)
    return np.sum((sy - y) ** 2)


loanType = "CarLoan"
version = 1.0
dbSource = MongoClient("192.168.1.120", 27017)
dataBase = dbSource.RANK
mongoDataProcessor = MongoDataProcessor("1019", address="192.168.1.120")
bpModelDataPrcocessor = BPModelDataProcessor()
# get flatten data
flattenHeader, flattenData = mongoDataProcessor.getFlattenTagData(additionalTags=["creditScore"])
np.savetxt(
    "./Data/FlattenData.csv",
    flattenData,
    header=",".join(flattenHeader).encode("utf-8"),
    delimiter=",",
    fmt="%s",
    comments="",
)
# get target data
targetHeader, targetData = mongoDataProcessor.getTargetTagData(targetTags=["creditScore"])
# trans targetData to number
targetData = np.array(targetData)
Example #9
0
# coding=utf-8
import sys
sys.path.append("../../Commons/DataProcessor")

from MongoDataProcessor import MongoDataProcessor
mongoDataProcessorTest = MongoDataProcessor('1019', address='192.168.1.120')

'''
#Test for getMetaTagList
allTags = mongoDataProcessorTest.getMetaTagList()
print allTags
'''


#Test for getCategoryInfo
categoryInfo = mongoDataProcessorTest.getCategoryInfo()
for multiSelect in categoryInfo.keys():
    allBinaryTags = categoryInfo[multiSelect]
    print str(multiSelect) + ':'
    indentChar = '\t'
    for tag in allBinaryTags.keys():
        valueGroup = allBinaryTags[tag]
        print indentChar+tag+':'
        for value in valueGroup.keys():
            name = valueGroup[value]
            print indentChar+'\t',value,' '+name


'''
#Test for getFlattenTagData
flattenHeader, flattenData = mongoDataProcessorTest.getFlattenTagData(additionalTags=['creditScore'])
Example #10
0
import numpy as np
import pandas as pd
import Utility as util

from pymongo import MongoClient
from MongoDataProcessor import MongoDataProcessor
from sklearn import preprocessing
from BPModelDataProcessor import BPModelDataProcessor
from BPModelTrainer import BPModelTrainer

loanType = 'HousingLoan'
version = 1.0
mongoAddress = ['192.168.1.125', '192.168.1.126', '192.168.1.127']
dbSource = MongoClient(mongoAddress, 27017)
dataBase = dbSource.RANK
mongoDataProcessor = MongoDataProcessor('1018', address=mongoAddress)
bpModelDataPrcocessor = BPModelDataProcessor()

allTags = mongoDataProcessor.getMetaTagList()

# get source data from mongo
# flattenHeader, flattenData = mongoDataProcessor.getFlattenTagData(additionalTags=['creditScore'])

#the test way ,may be not used in the future
flattenData = pd.read_csv('./Data/housing_source.csv')
(row, column) = np.shape(flattenData)
# columnScore = pd.Series(np.random.randint(100,200,row))
# flattenData['creditScore']=columnScore
targetHeader = ['creditScore']
targetData = np.random.randint(120, 200, row)
# coding=utf-8

import sys
import numpy as np
sys.path.append("../../Commons/DataProcessor")
sys.path.append("../../Commons/ModelDataPreProcessor")
sys.path.append("../../Commons/Utils")
sys.path.append("../../Commons/Normalizer")

from BPModelDataProcessor import BPModelDataProcessor
from MongoDataProcessor import MongoDataProcessor
import Utility as util
from ZScaleNormalizer import ZScaleNormalizer

mongoDataProessorTest = MongoDataProcessor('1019', address='192.168.1.120')
bpModelDataPrcocessor = BPModelDataProcessor()

# get the raw data from MongoDB
flattenHeader, flattenData = mongoDataProessorTest.getFlattenTagData(additionalTags=['creditScore'])
categoryHeader, categoryData = mongoDataProessorTest.getFlattenCategoryData()
numericalHeader, numericalData = mongoDataProessorTest.getFlattenNumericalData()
targetData = mongoDataProessorTest.getTargetTagData(targetTags=['creditScore'])
targetHead = ['creditScore']
categoryInfo = mongoDataProessorTest.getCategoryInfo()
tagDataByTagGroups = mongoDataProessorTest.getTagDataByTagGroups()

# replace strange data with default value
fixedCategoryData = util.getFixedData(categoryData)
fixedNumericalData = util.getFixedData(numericalData)

# convert data to numpy format