tmpI, tmpJ = 0, 0 dr = DataReader() baseDf, ansY = dr.cvtPathListToDfList(_basePath + "010_blenderXgboost_train.csv", "train") tmpOutPath = _basePath + "010_train_last_blender.csv" tmpFeatureBlendedAns = pd.DataFrame() baseDf = pd.DataFrame() tmpDfList = [] for tmpClfName in clfNameList: dr = DataReader() tmpPath = _basePath + "010_" + "blender" + tmpClfName + "_train.csv" newX, tmpY = dr.cvtPathListToDfList(tmpPath, "train") tmpDfList.append(newX) b1 = Blender(clfNameList, tmpDfList, ansY) b1.autoFlow(2000, tmpOutPath) # test finalWeight = b1._bestParamList tmpOutPath = _basePath + "010_test_last_blender.csv" baseDf = pd.DataFrame() tmpI = 0 finalTestDf = pd.DataFrame() for tmpClfName in clfNameList: dr = DataReader() tmpPath = _basePath + "010_" + "blender" + tmpClfName + "_test.csv" newX, tmpY = dr.cvtPathListToDfList(tmpPath, "train") tmpWeight = finalWeight[tmpI] newX.multiply(tmpWeight) if tmpI == 0:
tmpPath = _basePath + "008_submission_1_train_Random_Forest.csv" newX, newY = dr.cvtPathListToDfList(tmpPath, "train") tmpDfList.append(newX) tmpPath = _basePath + "008_submission_1_train_Xgboost.csv" newX, newY = dr.cvtPathListToDfList(tmpPath, "train") tmpDfList.append(newX) clfNameList = [] clfNameList.append("Extra_Trees") clfNameList.append("K_NN") clfNameList.append("RandomForest") clfNameList.append("Xgboost") b1 = Blender(clfNameList, tmpDfList, newY) b1.autoFlow(1000, outputPath) if doTestFlag == True: dr.readInCSV(testPath , "test") newX = dr._testDataFrame #newX = pd.DataFrame(newX[newX.columns[0]]) #print newX # 3. get all best model from newX # fab = ModelFactory() # fab._gridSearchFlag = True # fab._n_iter_search = 100 # fab._expInfo = expInfo # fab.getXgboostClf(newX, newY)
tmpPath = _basePath + "008_submission_1_train_Random_Forest.csv" newX, newY = dr.cvtPathListToDfList(tmpPath, "train") tmpDfList.append(newX) tmpPath = _basePath + "008_submission_1_train_Xgboost.csv" newX, newY = dr.cvtPathListToDfList(tmpPath, "train") tmpDfList.append(newX) clfNameList = [] clfNameList.append("Extra_Trees") clfNameList.append("K_NN") clfNameList.append("RandomForest") clfNameList.append("Xgboost") b1 = Blender(clfNameList, tmpDfList, newY) b1.autoFlow(1000, outputPath) if doTestFlag == True: dr.readInCSV(testPath, "test") newX = dr._testDataFrame #newX = pd.DataFrame(newX[newX.columns[0]]) #print newX # 3. get all best model from newX # fab = ModelFactory() # fab._gridSearchFlag = True # fab._n_iter_search = 100 # fab._expInfo = expInfo # fab.getXgboostClf(newX, newY)
tmpI, tmpJ = 0, 0 dr = DataReader() baseDf, ansY = dr.cvtPathListToDfList( _basePath + "010_blenderXgboost_train.csv", "train") tmpOutPath = _basePath + "010_train_last_blender.csv" tmpFeatureBlendedAns = pd.DataFrame() baseDf = pd.DataFrame() tmpDfList = [] for tmpClfName in clfNameList: dr = DataReader() tmpPath = _basePath + "010_" + "blender" + tmpClfName + "_train.csv" newX, tmpY = dr.cvtPathListToDfList(tmpPath, "train") tmpDfList.append(newX) b1 = Blender(clfNameList, tmpDfList, ansY) b1.autoFlow(2000, tmpOutPath) # test finalWeight = b1._bestParamList tmpOutPath = _basePath + "010_test_last_blender.csv" baseDf = pd.DataFrame() tmpI = 0 finalTestDf = pd.DataFrame() for tmpClfName in clfNameList: dr = DataReader() tmpPath = _basePath + "010_" + "blender" + tmpClfName + "_test.csv" newX, tmpY = dr.cvtPathListToDfList(tmpPath, "train") tmpWeight = finalWeight[tmpI] newX.multiply(tmpWeight) if tmpI == 0: