log(tmpCurFeatureList)        
        
        
        newX = pd.DataFrame()
        
        for tmpFeature in tmpCurFeatureList:
            path = _basePath + tmpFeature + "_train.csv"
            dr = DataReader()
            tmpX = dr.cvtPathListToDfList(path, "test")
            newX = pd.concat([newX, tmpX], axis=1)
        #log("feature len: " , len(newX))
            
        # Get all best model from newX
        fab = ModelFactory()
        fab._setXgboostTheradToOne = True
        fab._gridSearchFlag = True
        fab._onlyTreeBasedModels = True
        fab._subFolderName = "one_hot_each_" + str(i)
        fab._n_iter_search = 30
        fab._expInfo = expInfo
#         fab.getAllModels(newX, newY)
        fab.getRandomForestClf(newX, newY)
#         fab.getXgboostClf(newX, newY)
        log ( i , "/32 done..." )
    
    
    
   
    
    
Esempio n. 2
0
    
#     t = pd.get_dummies(newY)
#     finalList = []
#     for tmp in range(0,len(t)):
#         tmpList =[]
#         for i in range(0, len(t.ix[tmp])):
#             tmpList.append( int( t.ix[tmp][i]))
#         finalList.append(tmpList)
#     print finalList
    #exit()
     
 
    #print len(newX)        
    # Get all best model from newX
    fab = ModelFactory()
    fab._setXgboostTheradToOne = False
    fab._gridSearchFlag = True
    fab._subFolderName = "ismail3"  
    fab._n_iter_search = 1
    fab._expInfo = expInfo
  
    #clf = fab.getXgboostClf(newX, newY)
    clf = fab.getRandomForestClf(newX, newY)
    #print fab.getLogloss(clf,newX,newY)


    
    def llfun(act, pred):
        epsilon = 1e-15
        pred = sp.maximum(epsilon, pred)
        pred = sp.minimum(1-epsilon, pred)