Ejemplo n.º 1
0
    tmpI, tmpJ = 0, 0
    dr = DataReader()
    baseDf, ansY = dr.cvtPathListToDfList(_basePath + "010_blenderXgboost_train.csv", "train")

    tmpOutPath = _basePath + "010_train_last_blender.csv"
    tmpFeatureBlendedAns = pd.DataFrame()
    baseDf = pd.DataFrame()
    tmpDfList = []
    for tmpClfName in clfNameList:
        dr = DataReader()
        tmpPath = _basePath + "010_" + "blender" + tmpClfName + "_train.csv"
        newX, tmpY = dr.cvtPathListToDfList(tmpPath, "train")
        tmpDfList.append(newX)

    b1 = Blender(clfNameList, tmpDfList, ansY)
    b1.autoFlow(2000, tmpOutPath)

    # test
    finalWeight = b1._bestParamList
    tmpOutPath = _basePath + "010_test_last_blender.csv"
    baseDf = pd.DataFrame()
    tmpI = 0
    finalTestDf = pd.DataFrame()
    for tmpClfName in clfNameList:
        dr = DataReader()
        tmpPath = _basePath + "010_" + "blender" + tmpClfName + "_test.csv"
        newX, tmpY = dr.cvtPathListToDfList(tmpPath, "train")
        tmpWeight = finalWeight[tmpI]
        newX.multiply(tmpWeight)
        if tmpI == 0:
Ejemplo n.º 2
0
    tmpPath = _basePath + "008_submission_1_train_Random_Forest.csv"
    newX, newY =  dr.cvtPathListToDfList(tmpPath, "train")
    tmpDfList.append(newX)
    
    tmpPath = _basePath + "008_submission_1_train_Xgboost.csv"
    newX, newY =  dr.cvtPathListToDfList(tmpPath, "train")
    tmpDfList.append(newX)
    
    clfNameList = []
    clfNameList.append("Extra_Trees")
    clfNameList.append("K_NN")
    clfNameList.append("RandomForest")
    clfNameList.append("Xgboost")
    
    
    b1 = Blender(clfNameList, tmpDfList, newY)
    b1.autoFlow(1000, outputPath)
    
    if doTestFlag == True:
        dr.readInCSV(testPath , "test")
        newX = dr._testDataFrame
        #newX = pd.DataFrame(newX[newX.columns[0]])
        #print newX
 
    
    # 3. get all best model from newX
#     fab = ModelFactory()
#     fab._gridSearchFlag = True
#     fab._n_iter_search = 100
#     fab._expInfo = expInfo
#     fab.getXgboostClf(newX, newY)
Ejemplo n.º 3
0
    tmpPath = _basePath + "008_submission_1_train_Random_Forest.csv"
    newX, newY = dr.cvtPathListToDfList(tmpPath, "train")
    tmpDfList.append(newX)

    tmpPath = _basePath + "008_submission_1_train_Xgboost.csv"
    newX, newY = dr.cvtPathListToDfList(tmpPath, "train")
    tmpDfList.append(newX)

    clfNameList = []
    clfNameList.append("Extra_Trees")
    clfNameList.append("K_NN")
    clfNameList.append("RandomForest")
    clfNameList.append("Xgboost")

    b1 = Blender(clfNameList, tmpDfList, newY)
    b1.autoFlow(1000, outputPath)

    if doTestFlag == True:
        dr.readInCSV(testPath, "test")
        newX = dr._testDataFrame
        #newX = pd.DataFrame(newX[newX.columns[0]])
        #print newX

    # 3. get all best model from newX
#     fab = ModelFactory()
#     fab._gridSearchFlag = True
#     fab._n_iter_search = 100
#     fab._expInfo = expInfo
#     fab.getXgboostClf(newX, newY)
Ejemplo n.º 4
0
    tmpI, tmpJ = 0, 0
    dr = DataReader()
    baseDf, ansY = dr.cvtPathListToDfList(
        _basePath + "010_blenderXgboost_train.csv", "train")

    tmpOutPath = _basePath + "010_train_last_blender.csv"
    tmpFeatureBlendedAns = pd.DataFrame()
    baseDf = pd.DataFrame()
    tmpDfList = []
    for tmpClfName in clfNameList:
        dr = DataReader()
        tmpPath = _basePath + "010_" + "blender" + tmpClfName + "_train.csv"
        newX, tmpY = dr.cvtPathListToDfList(tmpPath, "train")
        tmpDfList.append(newX)

    b1 = Blender(clfNameList, tmpDfList, ansY)
    b1.autoFlow(2000, tmpOutPath)

    # test
    finalWeight = b1._bestParamList
    tmpOutPath = _basePath + "010_test_last_blender.csv"
    baseDf = pd.DataFrame()
    tmpI = 0
    finalTestDf = pd.DataFrame()
    for tmpClfName in clfNameList:
        dr = DataReader()
        tmpPath = _basePath + "010_" + "blender" + tmpClfName + "_test.csv"
        newX, tmpY = dr.cvtPathListToDfList(tmpPath, "train")
        tmpWeight = finalWeight[tmpI]
        newX.multiply(tmpWeight)
        if tmpI == 0: