예제 #1
0
        
#      4. test all data, output 3 ans as features
#     D:\Kaggle\Telstra\004_one_hot_resource_type\(Xgboost)_(2016-02-06_11_14_31).model
#     D:\Kaggle\Telstra\004_one_hot_resource_type\(Random_Forest)_(2016-02-06_11_24_09).model
#     D:\Kaggle\Telstra\004_one_hot_resource_type\(Extra_Trees)_(2016-02-06_11_30_52).model
#     D:\Kaggle\Telstra\004_one_hot_resource_type\(K_NN)_(2016-02-06_11_40_10).model

    modelFolder = _basePath + "models" + Config.osSep + "stacked" + Config.osSep
    
    clfNameList = []
    clfNameList.append("Extra_Trees")
    clfNameList.append("K_NN")
    clfNameList.append("Random_Forest")
    clfNameList.append("Xgboost")
    clfNameList.append("Logistic_Regression")
    
    testCsv = _basePath + "010_train_tobe.csv"
    dr = DataReader()
    newX, testY = dr.cvtPathListToDfList(testCsv, "train")
    
    for curModel in clfNameList:
        modelPath =  modelFolder + str(getMatchNameModelPath(modelFolder, curModel))
        tmpOutPath = _basePath + expNo + "_blender" + curModel + "_train.csv"
        tmpClf = loadModel( modelPath)
        log(tmpClf.predict_proba(newX))
        #outDf = pd.concat([newX, pd.DataFrame(tmpClf.predict_proba(newX))], axis=1)
        outDf = pd.DataFrame(tmpClf.predict_proba(newX))
        outDf.to_csv(tmpOutPath, sep=',', encoding='utf-8')
        #musicAlarm()

    
예제 #2
0
    #D:\Kaggle\Telstra\004_one_hot_resource_type\(K_NN)_(2016-02-06_11_40_10).model
    #Logistic_Regression
    modelList = [
        "Xgboost", "Random_Forest", "Extra_Trees", "K_NN",
        "Logistic_Regression"
    ]
    featureList = [
        "event_type", "log_feature", "resource_type", "severity_type"
    ]

    for tmpFeature in featureList:
        for tmpModel in modelList:
            subFolder = tmpFeature
            curModel = tmpModel

            tmpCsvPath = _basePath + expNo + "_" + tmpFeature + "_test_tobe.csv"
            dr = DataReader()
            dr.readInCSV(tmpCsvPath, "train")
            newX = dr._trainDataFrame
            modelFolder = _basePath + "models" + Config.osSep + subFolder + Config.osSep
            modelPath = modelFolder + str(
                getMatchNameModelPath(modelFolder, curModel))
            tmpOutPath = _basePath + "010_" + curModel + "_stack_" + subFolder + "_test.csv"
            tmpClf = loadModel(modelPath)
            log(tmpClf.predict_proba(newX))
            outDf = pd.concat(
                [newX, pd.DataFrame(tmpClf.predict_proba(newX))], axis=1)
            outDf = pd.DataFrame(tmpClf.predict_proba(newX))
            outDf.to_csv(tmpOutPath, sep=',', encoding='utf-8')
    #musicAlarm()
#     log("004 Done")
예제 #3
0
    newX, newY = dr._trainDataFrame, dr._ansDataFrame

    dr2 = DataReader()
    dr2.readInCSV(testPath, "test")
    #newX = dr2._testDataFrame

    dr3 = DataReader()
    dr3.readInCSV(testSortIdPath, "test")
    sortIdDf = dr3._testDataFrame

    dr4 = DataReader()
    dr4.readInCSV(trainSortIdPath, "test")
    sortIdDf = dr4._testDataFrame

    modelFolder = _basePath + "models" + Config.osSep + "binary" + Config.osSep
    curModel = "Xgboost"
    modelPath = modelFolder + str(getMatchNameModelPath(modelFolder, curModel))
    tmpOutPath = _basePath + expNo + "_" + curModel + "_test_ans.csv"
    tmpClf = loadModel(modelPath)
    log(tmpClf.predict_proba(newX))
    ans = tmpClf.predict_proba(newX)

    ansList = []
    for i, tmpAns in enumerate(dr._ansDataFrame):
        if ans[i][tmpAns] < 0.35:
            #log( "id: " + sortIdDf[sortIdDf.columns[0]][i] + ", prob: " + ans[i][tmpAns], ", cate: " + tmpAns)
            log((sortIdDf[sortIdDf.columns[0]][i], ans[i][tmpAns], tmpAns))
            ansList.append(
                (sortIdDf[sortIdDf.columns[0]][i], ans[i][tmpAns], tmpAns))

    log(len(ansList))