offset += 100 elif key == 'extraGuarantee' and checkMap[key] >= 0: offset += 50 return offset dbSource = MongoClient(dbAddressIP, 27017) dataBase = dbSource.RANK bpModelDataPrcocessor = BPModelDataProcessor() # Step1: Get data from database # tmp_xxx includes the information # 1001: personal loan # 1019: car loan # 1018: house loan mongoDataProcessor = MongoDataProcessor(['1018'], address=dbAddressIP) header, data = mongoDataProcessor.getFlattenTagData( computeNeeded=True, fromTraining=False) # get only those records which need computation data = util.getFixedData(data) projectIds = mongoDataProcessor.getProjectId() appIds = mongoDataProcessor.getAppId() itemTypes = mongoDataProcessor.getItemType() modelComputed = mongoDataProcessor.getModelComputed() riskItemComputed = mongoDataProcessor.getRiskItemComputed() if len(data) == 0 or len(projectIds) == 0 or appIds == 0: util.printToFile( str(time.time()) + ': no increamental data attached!', logPath) sys.exit() # Step2: Reassemble headers
sys.path.append("../../Commons/Utils") from BPModelDataProcessor import BPModelDataProcessor from MongoDataProcessor import MongoDataProcessor from BPModelTrainer import BPModelTrainer import Utility as util import numpy as np import time from pymongo import MongoClient from ZScaleNormalizer import ZScaleNormalizer loanType = 'CarLoan' version = 1.0 mongoAddress = ['192.168.1.125','192.168.1.126','192.168.1.127'] dbSource = MongoClient(mongoAddress, 27017) dataBase = dbSource.RANK mongoDataProcessor = MongoDataProcessor('1019', address=mongoAddress) bpModelDataPrcocessor = BPModelDataProcessor() logPath = './log.txt' # Step1: get all tags list allTags = mongoDataProcessor.getMetaTagList() util.printToFile(','.join(allTags), logPath, 'w') # Step2: get Flatten Header/Data flattenHeader, flattenData = mongoDataProcessor.getFlattenTagData(additionalTags=['creditScore']) with open('Data/FlattenData.csv', 'w') as f: f.write(','.join(flattenHeader)+'\n') for dataRow in flattenData: print >> f, ','.join(dataRow) # encode('utf-8') # Step3: get Target Header/Data
# coding=utf-8 import sys import numpy as np sys.path.append("../../Commons/DataProcessor") sys.path.append("../../Commons/ModelDataPreProcessor") sys.path.append("../../Commons/Utils") sys.path.append("../../Commons/Normalizer") from BPModelDataProcessor import BPModelDataProcessor from MongoDataProcessor import MongoDataProcessor import Utility as util from ZScaleNormalizer import ZScaleNormalizer mongoDataProessorTest = MongoDataProcessor('1019', address='192.168.1.120') bpModelDataPrcocessor = BPModelDataProcessor() # get the raw data from MongoDB flattenHeader, flattenData = mongoDataProessorTest.getFlattenTagData( additionalTags=['creditScore']) categoryHeader, categoryData = mongoDataProessorTest.getFlattenCategoryData() numericalHeader, numericalData = mongoDataProessorTest.getFlattenNumericalData( ) targetData = mongoDataProessorTest.getTargetTagData(targetTags=['creditScore']) targetHead = ['creditScore'] categoryInfo = mongoDataProessorTest.getCategoryInfo() tagDataByTagGroups = mongoDataProessorTest.getTagDataByTagGroups() # replace strange data with default value fixedCategoryData = util.getFixedData(categoryData) fixedNumericalData = util.getFixedData(numericalData)
loanType = "CarLoan" missingValue = "-9" dbAddressIP = ["192.168.1.125", "192.168.1.126", "192.168.1.127"] # local # dbAddressIP = '10.251.255.163' # aliyun dbSource = MongoClient(dbAddressIP, 27017) dataBase = dbSource.RANK bpModelDataPrcocessor = BPModelDataProcessor() # Step1: Get data from database # tmp_xxx includes the information # 1001: personal loan # 1019: car loan # 1018: house loan mongoDataProcessor = MongoDataProcessor(["1001"], address=dbAddressIP) header, data = mongoDataProcessor.getFlattenTagData( computeNeeded=True, fromTraining=False ) # get only those records which need computation data = util.getFixedData(data) projectIds = mongoDataProcessor.getProjectId() appIds = mongoDataProcessor.getAppId() itemTypes = mongoDataProcessor.getItemType() modelComputed = mongoDataProcessor.getModelComputed() riskItemComputed = mongoDataProcessor.getRiskItemComputed() if len(data) == 0 or len(projectIds) == 0 or appIds == 0: util.printToFile(str(time.time()) + ": no increamental data attached!", logPath) sys.exit() # Step2: Reassemble headers flattenCategoryHeaderTmp = {}
import numpy as np import pandas as pd import Utility as util from pymongo import MongoClient from MongoDataProcessor import MongoDataProcessor from sklearn import preprocessing from BPModelDataProcessor import BPModelDataProcessor from BPModelTrainer import BPModelTrainer loanType = 'HousingLoan' version = 1.0 mongoAddress = ['192.168.1.125','192.168.1.126','192.168.1.127'] dbSource = MongoClient(mongoAddress, 27017) dataBase = dbSource.RANK mongoDataProcessor = MongoDataProcessor('1018', address=mongoAddress) bpModelDataPrcocessor = BPModelDataProcessor() allTags = mongoDataProcessor.getMetaTagList() # get source data from mongo # flattenHeader, flattenData = mongoDataProcessor.getFlattenTagData(additionalTags=['creditScore']) #the test way ,may be not used in the future flattenData = pd.read_csv('./Data/housing_source.csv') (row,column)=np.shape(flattenData) # columnScore = pd.Series(np.random.randint(100,200,row)) # flattenData['creditScore']=columnScore targetHeader = ['creditScore'] targetData = np.random.randint(120,200,row)
# coding=utf-8 import sys sys.path.append("../../Commons/DataProcessor") from MongoDataProcessor import MongoDataProcessor mongoDataProcessorTest = MongoDataProcessor('1019', address='192.168.1.120') ''' #Test for getMetaTagList allTags = mongoDataProcessorTest.getMetaTagList() print allTags ''' #Test for getCategoryInfo categoryInfo = mongoDataProcessorTest.getCategoryInfo() for multiSelect in categoryInfo.keys(): allBinaryTags = categoryInfo[multiSelect] print str(multiSelect) + ':' indentChar = '\t' for tag in allBinaryTags.keys(): valueGroup = allBinaryTags[tag] print indentChar + tag + ':' for value in valueGroup.keys(): name = valueGroup[value] print indentChar + '\t', value, ' ' + name ''' #Test for getFlattenTagData flattenHeader, flattenData = mongoDataProcessorTest.getFlattenTagData(additionalTags=['creditScore']) with open('Data/FlattenData.csv', 'w') as f: f.write(','.join(flattenHeader)+'\n') for dataRow in flattenData: print >> f, ','.join(dataRow)#.encode('utf-8')
from sklearn.ensemble import RandomForestRegressor from sklearn.ensemble import ExtraTreesRegressor from sklearn.tree import DecisionTreeRegressor from sklearn import cross_validation def VarScore(estimator, x, y): sy = estimator.predict(x) return np.sum((sy - y)**2) loanType = 'CarLoan' version = 1.0 dbSource = MongoClient('192.168.1.120', 27017) dataBase = dbSource.RANK mongoDataProcessor = MongoDataProcessor('1019', address='192.168.1.120') bpModelDataPrcocessor = BPModelDataProcessor() #get flatten data flattenHeader, flattenData = mongoDataProcessor.getFlattenTagData( additionalTags=['creditScore']) np.savetxt('./Data/FlattenData.csv', flattenData, header=','.join(flattenHeader).encode('utf-8'), delimiter=',', fmt='%s', comments='') # get target data targetHeader, targetData = mongoDataProcessor.getTargetTagData( targetTags=['creditScore']) # trans targetData to number targetData = np.array(targetData)
from sklearn.ensemble import RandomForestRegressor from sklearn.ensemble import ExtraTreesRegressor from sklearn.tree import DecisionTreeRegressor from sklearn import cross_validation def VarScore(estimator, x, y): sy = estimator.predict(x) return np.sum((sy - y) ** 2) loanType = "CarLoan" version = 1.0 dbSource = MongoClient("192.168.1.120", 27017) dataBase = dbSource.RANK mongoDataProcessor = MongoDataProcessor("1019", address="192.168.1.120") bpModelDataPrcocessor = BPModelDataProcessor() # get flatten data flattenHeader, flattenData = mongoDataProcessor.getFlattenTagData(additionalTags=["creditScore"]) np.savetxt( "./Data/FlattenData.csv", flattenData, header=",".join(flattenHeader).encode("utf-8"), delimiter=",", fmt="%s", comments="", ) # get target data targetHeader, targetData = mongoDataProcessor.getTargetTagData(targetTags=["creditScore"]) # trans targetData to number targetData = np.array(targetData)
# coding=utf-8 import sys sys.path.append("../../Commons/DataProcessor") from MongoDataProcessor import MongoDataProcessor mongoDataProcessorTest = MongoDataProcessor('1019', address='192.168.1.120') ''' #Test for getMetaTagList allTags = mongoDataProcessorTest.getMetaTagList() print allTags ''' #Test for getCategoryInfo categoryInfo = mongoDataProcessorTest.getCategoryInfo() for multiSelect in categoryInfo.keys(): allBinaryTags = categoryInfo[multiSelect] print str(multiSelect) + ':' indentChar = '\t' for tag in allBinaryTags.keys(): valueGroup = allBinaryTags[tag] print indentChar+tag+':' for value in valueGroup.keys(): name = valueGroup[value] print indentChar+'\t',value,' '+name ''' #Test for getFlattenTagData flattenHeader, flattenData = mongoDataProcessorTest.getFlattenTagData(additionalTags=['creditScore'])
import numpy as np import pandas as pd import Utility as util from pymongo import MongoClient from MongoDataProcessor import MongoDataProcessor from sklearn import preprocessing from BPModelDataProcessor import BPModelDataProcessor from BPModelTrainer import BPModelTrainer loanType = 'HousingLoan' version = 1.0 mongoAddress = ['192.168.1.125', '192.168.1.126', '192.168.1.127'] dbSource = MongoClient(mongoAddress, 27017) dataBase = dbSource.RANK mongoDataProcessor = MongoDataProcessor('1018', address=mongoAddress) bpModelDataPrcocessor = BPModelDataProcessor() allTags = mongoDataProcessor.getMetaTagList() # get source data from mongo # flattenHeader, flattenData = mongoDataProcessor.getFlattenTagData(additionalTags=['creditScore']) #the test way ,may be not used in the future flattenData = pd.read_csv('./Data/housing_source.csv') (row, column) = np.shape(flattenData) # columnScore = pd.Series(np.random.randint(100,200,row)) # flattenData['creditScore']=columnScore targetHeader = ['creditScore'] targetData = np.random.randint(120, 200, row)
# coding=utf-8 import sys import numpy as np sys.path.append("../../Commons/DataProcessor") sys.path.append("../../Commons/ModelDataPreProcessor") sys.path.append("../../Commons/Utils") sys.path.append("../../Commons/Normalizer") from BPModelDataProcessor import BPModelDataProcessor from MongoDataProcessor import MongoDataProcessor import Utility as util from ZScaleNormalizer import ZScaleNormalizer mongoDataProessorTest = MongoDataProcessor('1019', address='192.168.1.120') bpModelDataPrcocessor = BPModelDataProcessor() # get the raw data from MongoDB flattenHeader, flattenData = mongoDataProessorTest.getFlattenTagData(additionalTags=['creditScore']) categoryHeader, categoryData = mongoDataProessorTest.getFlattenCategoryData() numericalHeader, numericalData = mongoDataProessorTest.getFlattenNumericalData() targetData = mongoDataProessorTest.getTargetTagData(targetTags=['creditScore']) targetHead = ['creditScore'] categoryInfo = mongoDataProessorTest.getCategoryInfo() tagDataByTagGroups = mongoDataProessorTest.getTagDataByTagGroups() # replace strange data with default value fixedCategoryData = util.getFixedData(categoryData) fixedNumericalData = util.getFixedData(numericalData) # convert data to numpy format