Ejemplo n.º 1
0
from BPModelDataProcessor import BPModelDataProcessor
from MongoDataProcessor import MongoDataProcessor
from BPModelTrainer import BPModelTrainer
import Utility as util
import numpy as np
import time
from pymongo import MongoClient
from ZScaleNormalizer import ZScaleNormalizer
loanType = 'CarLoan'
version = 1.0
mongoAddress = ['192.168.1.125','192.168.1.126','192.168.1.127']
dbSource = MongoClient(mongoAddress, 27017)
dataBase = dbSource.RANK
mongoDataProcessor = MongoDataProcessor('1019', address=mongoAddress)
bpModelDataPrcocessor = BPModelDataProcessor()
logPath = './log.txt'
# Step1: get all tags list
allTags = mongoDataProcessor.getMetaTagList()
util.printToFile(','.join(allTags), logPath, 'w')


# Step2: get Flatten Header/Data
flattenHeader, flattenData = mongoDataProcessor.getFlattenTagData(additionalTags=['creditScore'])
with open('Data/FlattenData.csv', 'w') as f:
    f.write(','.join(flattenHeader)+'\n')
    for dataRow in flattenData:
        print >> f,  ','.join(dataRow)  # encode('utf-8')

# Step3: get Target Header/Data
targetHeader, targetData = mongoDataProcessor.getTargetTagData(targetTags=['creditScore'])
Ejemplo n.º 2
0
            offset += 100
        elif key == "extraGuarantee" and checkMap[key] >= 0:
            offset += 50
    return offset


logPath = "../../logs/log.txt"
loanType = "CarLoan"
missingValue = "-9"

dbAddressIP = ["192.168.1.125", "192.168.1.126", "192.168.1.127"]  # local
# dbAddressIP = '10.251.255.163' # aliyun

dbSource = MongoClient(dbAddressIP, 27017)
dataBase = dbSource.RANK
bpModelDataPrcocessor = BPModelDataProcessor()

# Step1: Get data from database
# tmp_xxx includes the information
# 1001: personal loan
# 1019: car loan
# 1018: house loan
mongoDataProcessor = MongoDataProcessor(["1001"], address=dbAddressIP)
header, data = mongoDataProcessor.getFlattenTagData(
    computeNeeded=True, fromTraining=False
)  # get only those records which need computation
data = util.getFixedData(data)
projectIds = mongoDataProcessor.getProjectId()
appIds = mongoDataProcessor.getAppId()
itemTypes = mongoDataProcessor.getItemType()
modelComputed = mongoDataProcessor.getModelComputed()
Ejemplo n.º 3
0
            if checkMap['financeAmount'] > 0:
                if checkMap['financeAmount'] / (
                        checkMap[key] / 30 + 1) < checkMap['familyIncome'] / 2:
                    offset += 100
                else:
                    offset -= 100
        elif key == 'guaranteeWay' and checkMap[key] >= 0:
            offset += 100
        elif key == 'extraGuarantee' and checkMap[key] >= 0:
            offset += 50
    return offset


dbSource = MongoClient(dbAddressIP, 27017)
dataBase = dbSource.RANK
bpModelDataPrcocessor = BPModelDataProcessor()

# Step1: Get data from database
# tmp_xxx includes the information
# 1001: personal loan
# 1019: car loan
# 1018: house loan
mongoDataProcessor = MongoDataProcessor(['1018'], address=dbAddressIP)
header, data = mongoDataProcessor.getFlattenTagData(
    computeNeeded=True,
    fromTraining=False)  # get only those records which need computation
data = util.getFixedData(data)
projectIds = mongoDataProcessor.getProjectId()
appIds = mongoDataProcessor.getAppId()
itemTypes = mongoDataProcessor.getItemType()
modelComputed = mongoDataProcessor.getModelComputed()
Ejemplo n.º 4
0
# coding=utf-8

import sys
import numpy as np
sys.path.append("../../Commons/DataProcessor")
sys.path.append("../../Commons/ModelDataPreProcessor")
sys.path.append("../../Commons/Utils")
sys.path.append("../../Commons/Normalizer")

from BPModelDataProcessor import BPModelDataProcessor
from MongoDataProcessor import MongoDataProcessor
import Utility as util
from ZScaleNormalizer import ZScaleNormalizer

mongoDataProessorTest = MongoDataProcessor('1019', address='192.168.1.120')
bpModelDataPrcocessor = BPModelDataProcessor()

# get the raw data from MongoDB
flattenHeader, flattenData = mongoDataProessorTest.getFlattenTagData(
    additionalTags=['creditScore'])
categoryHeader, categoryData = mongoDataProessorTest.getFlattenCategoryData()
numericalHeader, numericalData = mongoDataProessorTest.getFlattenNumericalData(
)
targetData = mongoDataProessorTest.getTargetTagData(targetTags=['creditScore'])
targetHead = ['creditScore']
categoryInfo = mongoDataProessorTest.getCategoryInfo()
tagDataByTagGroups = mongoDataProessorTest.getTagDataByTagGroups()

# replace strange data with default value
fixedCategoryData = util.getFixedData(categoryData)
fixedNumericalData = util.getFixedData(numericalData)
Ejemplo n.º 5
0
import pandas as pd
import Utility as util

from pymongo import MongoClient
from MongoDataProcessor import MongoDataProcessor
from sklearn import preprocessing
from BPModelDataProcessor import BPModelDataProcessor
from BPModelTrainer import BPModelTrainer

loanType = 'HousingLoan'
version = 1.0
mongoAddress = ['192.168.1.125','192.168.1.126','192.168.1.127']
dbSource = MongoClient(mongoAddress, 27017)
dataBase = dbSource.RANK
mongoDataProcessor = MongoDataProcessor('1018', address=mongoAddress)
bpModelDataPrcocessor = BPModelDataProcessor()

allTags = mongoDataProcessor.getMetaTagList()

# get source data from mongo
# flattenHeader, flattenData = mongoDataProcessor.getFlattenTagData(additionalTags=['creditScore'])

#the test way ,may be not used in the future
flattenData = pd.read_csv('./Data/housing_source.csv')
(row,column)=np.shape(flattenData)
# columnScore = pd.Series(np.random.randint(100,200,row))
# flattenData['creditScore']=columnScore
targetHeader = ['creditScore']
targetData = np.random.randint(120,200,row)

categoryFrame = pd.DataFrame()
Ejemplo n.º 6
0
from sklearn.ensemble import ExtraTreesRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn import cross_validation


def VarScore(estimator, x, y):
    sy = estimator.predict(x)
    return np.sum((sy - y)**2)


loanType = 'CarLoan'
version = 1.0
dbSource = MongoClient('192.168.1.120', 27017)
dataBase = dbSource.RANK
mongoDataProcessor = MongoDataProcessor('1019', address='192.168.1.120')
bpModelDataPrcocessor = BPModelDataProcessor()
#get flatten data
flattenHeader, flattenData = mongoDataProcessor.getFlattenTagData(
    additionalTags=['creditScore'])
np.savetxt('./Data/FlattenData.csv',
           flattenData,
           header=','.join(flattenHeader).encode('utf-8'),
           delimiter=',',
           fmt='%s',
           comments='')
# get target data
targetHeader, targetData = mongoDataProcessor.getTargetTagData(
    targetTags=['creditScore'])
# trans targetData to number
targetData = np.array(targetData)
targetData = np.array(targetData.flatten(), dtype=np.int32)
Ejemplo n.º 7
0
import pandas as pd
import Utility as util

from pymongo import MongoClient
from MongoDataProcessor import MongoDataProcessor
from sklearn import preprocessing
from BPModelDataProcessor import BPModelDataProcessor
from BPModelTrainer import BPModelTrainer

loanType = 'HousingLoan'
version = 1.0
mongoAddress = ['192.168.1.125', '192.168.1.126', '192.168.1.127']
dbSource = MongoClient(mongoAddress, 27017)
dataBase = dbSource.RANK
mongoDataProcessor = MongoDataProcessor('1018', address=mongoAddress)
bpModelDataPrcocessor = BPModelDataProcessor()

allTags = mongoDataProcessor.getMetaTagList()

# get source data from mongo
# flattenHeader, flattenData = mongoDataProcessor.getFlattenTagData(additionalTags=['creditScore'])

#the test way ,may be not used in the future
flattenData = pd.read_csv('./Data/housing_source.csv')
(row, column) = np.shape(flattenData)
# columnScore = pd.Series(np.random.randint(100,200,row))
# flattenData['creditScore']=columnScore
targetHeader = ['creditScore']
targetData = np.random.randint(120, 200, row)

categoryFrame = pd.DataFrame()
Ejemplo n.º 8
0
# coding=utf-8

import sys
import numpy as np
sys.path.append("../../Commons/DataProcessor")
sys.path.append("../../Commons/ModelDataPreProcessor")
sys.path.append("../../Commons/Utils")
sys.path.append("../../Commons/Normalizer")

from BPModelDataProcessor import BPModelDataProcessor
from MongoDataProcessor import MongoDataProcessor
import Utility as util
from ZScaleNormalizer import ZScaleNormalizer

mongoDataProessorTest = MongoDataProcessor('1019', address='192.168.1.120')
bpModelDataPrcocessor = BPModelDataProcessor()

# get the raw data from MongoDB
flattenHeader, flattenData = mongoDataProessorTest.getFlattenTagData(additionalTags=['creditScore'])
categoryHeader, categoryData = mongoDataProessorTest.getFlattenCategoryData()
numericalHeader, numericalData = mongoDataProessorTest.getFlattenNumericalData()
targetData = mongoDataProessorTest.getTargetTagData(targetTags=['creditScore'])
targetHead = ['creditScore']
categoryInfo = mongoDataProessorTest.getCategoryInfo()
tagDataByTagGroups = mongoDataProessorTest.getTagDataByTagGroups()

# replace strange data with default value
fixedCategoryData = util.getFixedData(categoryData)
fixedNumericalData = util.getFixedData(numericalData)

# convert data to numpy format