コード例 #1
0
def stacknet_train_test(X, y, text=False):
    models = [
            # First level
            [RandomForestClassifier(n_estimators=500, max_depth=3, random_state=0),
             ExtraTreesClassifier (n_estimators=100, max_depth=5, random_state=0),
             SGDClassifier(loss="log", penalty="l2", max_iter=5),
             KNeighborsClassifier(n_neighbors=5),
             LogisticRegression(random_state=0),
             MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(5, 2), random_state=0, learning_rate='invscaling'),
             AdaBoostClassifier(n_estimators=500, learning_rate=1e-3, random_state=0),
             ],
            # Second level
            [RandomForestClassifier(n_estimators=1000, max_depth=5, random_state=0)]
    ]

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=0, shuffle=True)

    model = StackNetClassifier(models, metric="f1", folds=4, restacking=True, use_retraining=True, use_proba=True, random_state=0, verbose=1)

    model.fit(X_train, y_train)
    y_init = model.predict_proba(X_test)
    y_pred = [0 if i[0] > i[1] else 1 for i in y_init]
    y_score = [i[0] for i in y_init]

    y_pred = np.array(y_pred)

    files = {0: "stackNet", 1: "stackNet_text"}

    if text:
        name = files[1]
    else:
        name = files[0]

    # this function is imported from models_final.py file
    report_card = get_report(y_test, y_pred, y_score, name)

    with open("final_results/final_report_stackNet.txt", "a") as f:
        f.write(f"Classification report for {name}: \n")
        f.write(report_card)
        f.write("\n")
        f.write("-----------------------------------------------------------------")
        f.write("\n")
コード例 #2
0
def test_pystacknet():
    
    Xn=np.array(x_train)
    yn=np.array(y_train)
    print (Xn.shape, yn.shape)
    
    
    #####################################################################################
    ###############################  CLASSIFICATION #####################################        
    #####################################################################################
    
    
    models=[ 
            
            [RandomForestClassifier (n_estimators=100, criterion="entropy", max_depth=5, max_features=0.5, random_state=1),
             ExtraTreesClassifier (n_estimators=100, criterion="entropy", max_depth=5, max_features=0.5, random_state=1),
             GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, max_depth=5, max_features=0.5, random_state=1),
             LogisticRegression(random_state=1)
             ],
            
            [RandomForestClassifier (n_estimators=200, criterion="entropy", max_depth=5, max_features=0.5, random_state=1)]
            
            
            ]
    
    ################## no proba metric ###############################
    model=StackNetClassifier(models, metric="accuracy", folds=4, restacking=False,
                             use_retraining=True, use_proba=True, random_state=12345,
                             n_jobs=1, verbose=1)
    
    model.fit(x_train,y_train )
    preds=model.predict_proba(x_test)[:,1]
    print ("accuracy test 1 , auc %f " % (roc_auc_score(y_test,preds)))
    
    ##################  proba metric ###############################    
    
    model=StackNetClassifier(models, metric="auc", folds=4, restacking=False,
                             use_retraining=True, use_proba=True, random_state=12345,
                             n_jobs=1, verbose=1)
    
    model.fit(x_train,y_train )
    preds=model.predict_proba(x_test)[:,1]
    print ("auc test 2 , auc %f " % (roc_auc_score(y_test,preds)))   
    
    ##################  custom metric ###############################    
    
    model=StackNetClassifier(models, metric=gini, folds=4, restacking=False,
                             use_retraining=True, use_proba=True, random_state=12345,
                             n_jobs=1, verbose=1)
    
    model.fit(x_train,y_train )
    preds=model.predict_proba(x_test)[:,1]
    print ("custom metric gini test 3 , auc %f " % (gini(y_test,preds))) 

    ##################  numpy input ###############################    
    
    model=StackNetClassifier(models, metric="auc", folds=4, restacking=False,
                             use_retraining=True, use_proba=True, random_state=12345,
                             n_jobs=1, verbose=1)
    
    model.fit(Xn,yn )
    preds=model.predict_proba(np.array(x_test))[:,1]
    print ("numpy auc test 4 , auc %f " % (roc_auc_score(y_test,preds)))         
    
    ##################  csr_matrix input ############################### 
    
    model=StackNetClassifier(models, metric="auc", folds=4, restacking=False,
                             use_retraining=True, use_proba=True, random_state=12345,
                             n_jobs=1, verbose=1)
    
    model.fit(csr_matrix( Xn) ,yn )
    preds=model.predict_proba(csr_matrix(x_test))[:,1]
    print ("csr auc test 5 , auc %f " % (roc_auc_score(y_test,preds)))  

    ##################  restacking ############################### 
    
    model=StackNetClassifier(models, metric="auc", folds=4, restacking=True,
                             use_retraining=True, use_proba=True, random_state=12345,
                             n_jobs=1, verbose=1)
    
    model.fit(csr_matrix( Xn) ,yn )
    preds=model.predict_proba(csr_matrix(x_test))[:,1]
    print ("restacking auc test 6 , auc %f " % (roc_auc_score(y_test,preds))) 
    
    ##################  without retraining ############################### 
    
    model=StackNetClassifier(models, metric="auc", folds=4, restacking=True,
                             use_retraining=False, use_proba=True, random_state=12345,
                             n_jobs=1, verbose=1)
    
    model.fit(csr_matrix( Xn) ,yn )
    preds=model.predict_proba(csr_matrix(x_test))[:,1]
    print ("no retraining auc test 7 , auc %f " % (roc_auc_score(y_test,preds)))     
    
    ##################  custom k folder object ###############################     
    
    
    k=StratifiedKFold(yn, n_folds=4, shuffle=True, random_state=1251)
    
    model=StackNetClassifier(models, metric="auc", folds=k, restacking=True,
                             use_retraining=False, use_proba=True, random_state=12345,
                             n_jobs=1, verbose=1)
    
    model.fit(csr_matrix( Xn) ,yn )
    preds=model.predict_proba(csr_matrix(x_test))[:,1]
    print ("custom kfold auc test 8 , auc %f " % (roc_auc_score(y_test,preds)))   



    ##################  regressor in base level ###############################    
    models_reg=[ 
            
            [RandomForestClassifier (n_estimators=100, criterion="entropy", max_depth=5, max_features=0.5, random_state=1),
             ExtraTreesRegressor (n_estimators=100, max_depth=5, max_features=0.5, random_state=1),
             GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, max_depth=5, max_features=0.5, random_state=1),
             LogisticRegression(random_state=1)
             ],
            
            [RandomForestClassifier (n_estimators=200, criterion="entropy", max_depth=5, max_features=0.5, random_state=1)]
            
            
            ]
            
    model=StackNetClassifier(models_reg, metric="auc", folds=4, restacking=False,
                             use_retraining=True, use_proba=True, random_state=12345,
                             n_jobs=1, verbose=1)
    
    model.fit(x_train,y_train )
    preds=model.predict_proba(x_test)[:,1]
    print ("with regressor test 9 , auc %f " % (roc_auc_score(y_test,preds)))      
    

    ##################  transformer in base level ###############################    
    models_pca=[ 
            
            [RandomForestClassifier (n_estimators=100, criterion="entropy", max_depth=5, max_features=0.5, random_state=1),
             ExtraTreesRegressor (n_estimators=100, max_depth=5, max_features=0.5, random_state=1),
             GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, max_depth=5, max_features=0.5, random_state=1),
             LogisticRegression(random_state=1),
             PCA(n_components=4,random_state=1)
             ],
            
            [RandomForestClassifier (n_estimators=200, criterion="entropy", max_depth=5, max_features=0.5, random_state=1)]
            
            
            ]
            
    model=StackNetClassifier(models_pca, metric="auc", folds=4, restacking=False,
                             use_retraining=True, use_proba=True, random_state=12345,
                             n_jobs=1, verbose=1)
    
    model.fit(x_train,y_train )
    preds=model.predict_proba(x_test)[:,1]
    print ("with PCA test 10 , auc %f " % (roc_auc_score(y_test,preds)))  
    
    
    ##################  multiclass metric ###############################    
    
    model=StackNetClassifier(models, metric="logloss", folds=4, restacking=False,
                             use_retraining=True, use_proba=True, random_state=12345,
                             n_jobs=1, verbose=1)
    
    model.fit(x_train,y2d[:100] )
    preds=model.predict_proba(x_test)
    print ("logloss test 11 , auc %f " % (log_loss(y2d[100:],preds)))    
    
    
    
    ##################  3 levels  ###############################    
    
    models3=[ 
            
            [RandomForestClassifier (n_estimators=100, criterion="entropy", max_depth=5, max_features=0.5, random_state=1),
             ExtraTreesClassifier (n_estimators=100, criterion="entropy", max_depth=5, max_features=0.5, random_state=1),
             GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, max_depth=5, max_features=0.5, random_state=1),
             LogisticRegression(random_state=1)
             ],
            
            [GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, max_depth=5, max_features=0.5, random_state=1),
             LogisticRegression(random_state=1)
             ],
            
            [RandomForestClassifier (n_estimators=200, criterion="entropy", max_depth=5, max_features=0.5, random_state=1)]
            
            
            ]    
    
    
    model=StackNetClassifier(models3, metric="logloss", folds=4, restacking=False,
                             use_retraining=True, use_proba=True, random_state=12345,
                             n_jobs=1, verbose=1)
    
    model.fit(x_train,y2d[:100] )
    preds=model.predict_proba(x_test)
    print ("3 levels test 12 , auc %f " % (log_loss(y2d[100:],preds)))   


    ################## with sample_weight ###############################    
    
    model=StackNetClassifier(models, metric="auc", folds=4, restacking=False,
                             use_retraining=True, use_proba=True, random_state=12345,
                             n_jobs=1, verbose=1)
    
    model.fit(x_train,y_train , sample_weight=w_train)
    preds=model.predict_proba(x_test)[:,1]
    print ("auc weighted test 13 , auc %f " % (roc_auc_score(y_test,preds, sample_weight=w_test)))  
    

    #####################################################################################
    ###############################  REGRESSION #########################################        
    #####################################################################################
    
    
    
    models=[ 
            
            [RandomForestRegressor (n_estimators=100, max_depth=5, max_features=0.5, random_state=1),
             ExtraTreesRegressor (n_estimators=100,  max_depth=5, max_features=0.5, random_state=1),
             GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, max_depth=5, max_features=0.5, random_state=1),
             Ridge(random_state=1)
             ],
            
            [RandomForestRegressor (n_estimators=200, max_depth=5, max_features=0.5, random_state=1)]
            
            
            ]
    
    ################## rmse  metric ###############################
    model=StackNetRegressor(models, metric="rmse", folds=4, restacking=False,
                             use_retraining=True, random_state=12345,
                             n_jobs=1, verbose=1)
    
    model.fit(x_train,y_train )
    preds=model.predict(x_test)
    print ("rmse test 1 , %f " % (rmse(y_test,preds)))
    
    ##################  mae metric ###############################    
    
    model=StackNetRegressor(models, metric="mae", folds=4, restacking=False,
                             use_retraining=True, random_state=12345,
                             n_jobs=1, verbose=1)
    
    model.fit(x_train,y_train )
    preds=model.predict(x_test)
    print ("mae test 2 , %f " % (mae(y_test,preds)))   
    
    ##################  custom metric ###############################    
    
    model=StackNetRegressor(models, metric=R, folds=4, restacking=False,
                             use_retraining=True, random_state=12345,
                             n_jobs=1, verbose=1)
    
    model.fit(x_train,y_train )
    preds=model.predict(x_test)
    print ("custom metric R test 3  %f " % (R(y_test,preds))) 

    ##################  numpy input ###############################    
    
    model=StackNetRegressor(models, metric="rmse", folds=4, restacking=False,
                             use_retraining=True, random_state=12345,
                             n_jobs=1, verbose=1)
    
    model.fit(Xn,yn )
    preds=model.predict(x_test)
    print ("numpy rmse test 4  %f " % (rmse(y_test,preds)))         
    
    ##################  csr_matrix input ############################### 
    
    model=StackNetRegressor(models, metric="rmse", folds=4, restacking=False,
                             use_retraining=True, random_state=12345,
                             n_jobs=1, verbose=1)
    
    model.fit(csr_matrix( Xn) ,yn )
    preds=model.predict(x_test)
    print ("csr test 5 , rmse %f " % (rmse(y_test,preds)))  

    ##################  restacking ############################### 
    
    model=StackNetRegressor(models, metric="rmse", folds=4, restacking=True,
                             use_retraining=True,  random_state=12345,
                             n_jobs=1, verbose=1)
    
    model.fit(csr_matrix( Xn) ,yn )
    preds=model.predict(x_test)
    print ("restacking rmse test 6 , rmse %f " % (rmse(y_test,preds))) 
    
    ##################  without retraining ############################### 
    
    model=StackNetRegressor(models, metric="rmse", folds=4, restacking=True,
                             use_retraining=False,  random_state=12345,
                             n_jobs=1, verbose=1)
    
    model.fit(csr_matrix( Xn) ,yn )
    preds=model.predict(x_test)
    print ("no retraining rmse test 7, rmse %f " % (rmse(y_test,preds)))     
    
    ##################  custom k folder object ###############################     
    
    
    k=StratifiedKFold(yn, n_folds=4, shuffle=True, random_state=1251)
    
    model=StackNetRegressor(models, metric="rmse", folds=k, restacking=True,
                             use_retraining=False,random_state=12345,
                             n_jobs=1, verbose=1)
    
    model.fit(csr_matrix( Xn) ,yn )
    preds=model.predict(x_test)
    print ("custom kfold rmse test 8, %f " % (rmse(y_test,preds)))   



    ##################  classifier in base level ###############################    
    models_class=[ 
            
            [RandomForestRegressor(n_estimators=100, max_depth=5, max_features=0.5, random_state=1),
             ExtraTreesClassifier (n_estimators=100, max_depth=5, max_features=0.5, random_state=1),
             GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, max_depth=5, max_features=0.5, random_state=1),
             Ridge(random_state=1)
             ],
            
            [RandomForestRegressor (n_estimators=200, max_depth=5, max_features=0.5, random_state=1)]
            
            
            ]
            
    model=StackNetRegressor(models_class, metric="rmse", folds=4, restacking=False,
                             use_retraining=True, random_state=12345,
                             n_jobs=1, verbose=1)
    
    model.fit(x_train,y_train )
    preds=model.predict(x_test)
    print ("with regressor test 9, rmse %f " % (rmse(y_test,preds)))      
    

    ##################  transformer in base level ###############################    
    models_pca=[ 
            
            [RandomForestRegressor (n_estimators=100, max_depth=5, max_features=0.5, random_state=1),
             ExtraTreesRegressor (n_estimators=100, max_depth=5, max_features=0.5, random_state=1),
             GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, max_depth=5, max_features=0.5, random_state=1),
             Ridge(random_state=1),
             PCA(n_components=4,random_state=1)
             ],
            
            [RandomForestRegressor(n_estimators=200, max_depth=5, max_features=0.5, random_state=1)]
            
            
            ]
            
    model=StackNetRegressor(models_pca, metric="rmse", folds=4, restacking=False,
                             use_retraining=True,  random_state=12345,
                             n_jobs=1, verbose=1)
    
    model.fit(x_train,y_train )
    preds=model.predict(x_test)
    print ("with PCA test 10 , rmse %f " % (rmse(y_test,preds)))  
    
    
    ##################  2d target ###############################    
    models2=[ 
            
            [RandomForestRegressor(n_estimators=100, max_depth=5, max_features=0.5, random_state=1),
             ExtraTreesRegressor (n_estimators=100, max_depth=5, max_features=0.5, random_state=1),
             #GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, max_depth=5, max_features=0.5, random_state=1),
             Ridge(random_state=1)
             ],
            
            
            [RandomForestRegressor(n_estimators=200, max_depth=5, max_features=0.5, random_state=1)]
            
            
            ]    
    
    
    model=StackNetRegressor(models2, metric="rmse", folds=4, restacking=False,
                             use_retraining=True, random_state=12345,
                             n_jobs=1, verbose=1)
    
    model.fit(x_train,np.column_stack((y_train,y2d[:100] )))
    preds=model.predict(x_test)
    print ("rmse test 11 , rmse %f " % (rmse(np.column_stack((y_test,y2d[100:])),preds)))    
    
    
    
    ##################  3 levels  ###############################    
    
    models3=[ 
            
            [RandomForestRegressor(n_estimators=100, max_depth=5, max_features=0.5, random_state=1),
             ExtraTreesRegressor (n_estimators=100, max_depth=5, max_features=0.5, random_state=1),
             #GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, max_depth=5, max_features=0.5, random_state=1),
             Ridge(random_state=1)
             ],
            
            [ExtraTreesRegressor (n_estimators=100, max_depth=5, max_features=0.5, random_state=1),
             Ridge(random_state=1)
             ],
            
            [RandomForestRegressor(n_estimators=200, max_depth=5, max_features=0.5, random_state=1)]
            
            
            ]    
    
    
    model=StackNetRegressor(models3, metric="rmse", folds=4, restacking=False,
                             use_retraining=True, random_state=12345,
                             n_jobs=1, verbose=1)
    
    model.fit(x_train,y2d[:100] )
    preds=model.predict(x_test)
    print ("3 levels test 12 , rmse %f " % (rmse(y2d[100:],preds)))   
    
    
    ################## with sample)weight ###############################
    model=StackNetRegressor(models, metric="rmse", folds=4, restacking=False,
                             use_retraining=True, random_state=12345,
                             n_jobs=1, verbose=1)
    
    model.fit(x_train,y_train,sample_weight=w_train )
    preds=model.predict(x_test)
    print ("rmse weighted test 13 , %f " % (rmse(y_test,preds, sample_weight=w_test)))    
コード例 #3
0
                      epsilon=0.1)
    ]
]

# leave 4 subject out
kf = KFold(4)
generator = kf.split(X_train, y_train)

# build StackNet
model = StackNetClassifier(models,
                           metric="auc",
                           folds=generator,
                           restacking=False,
                           use_retraining=True,
                           use_proba=True,
                           random_state=42,
                           n_jobs=-1,
                           verbose=1)
# evaluate model
model.fit(X_train, y_train)
y_probs = model.predict_proba(X_test)[:, 1]

# save score
csv = pd.read_csv('./data/benchmark.csv')
csv['Prediction'] = y_probs
csv.to_csv('submission_StackNet.csv', index=False)

print(
    '--------------------Submission file has been generated.--------------------------'
)
def test_pystacknet():

    path = ""

    y, X = load_data(path, 'train.csv')
    y_test, X_test = load_data(path, 'test.csv', use_labels=False)

    # === one-hot encoding === #
    # we want to encode the category IDs encountered both in
    # the training and the test set, so we fit the encoder on both
    encoder = preprocessing.OneHotEncoder()
    encoder.fit(np.vstack((X, X_test)))
    X = encoder.transform(X)  # Returns a sparse matrix (see numpy.sparse)
    X_test = encoder.transform(X_test)

    #####################################################################################
    ###############################  CLASSIFICATION #####################################
    #####################################################################################

    models = [[
        LogisticRegression(C=1, random_state=1),
        LogisticRegression(C=3, random_state=1),
        Ridge(alpha=0.1, random_state=1),
        LogisticRegression(penalty="l1", C=1, random_state=1),
        XGBClassifier(max_depth=5,
                      learning_rate=0.1,
                      n_estimators=300,
                      objective="binary:logistic",
                      n_jobs=1,
                      booster="gbtree",
                      random_state=1,
                      colsample_bytree=0.4),
        XGBClassifier(max_depth=5,
                      learning_rate=0.3,
                      reg_lambda=0.1,
                      n_estimators=300,
                      objective="binary:logistic",
                      n_jobs=1,
                      booster="gblinear",
                      random_state=1,
                      colsample_bytree=0.4),
        XGBClassifier(max_depth=5,
                      learning_rate=0.1,
                      n_estimators=300,
                      objective="rank:pairwise",
                      n_jobs=1,
                      booster="gbtree",
                      random_state=1,
                      colsample_bytree=0.4),
        LGBMClassifier(boosting_type='gbdt',
                       num_leaves=40,
                       max_depth=-1,
                       learning_rate=0.01,
                       n_estimators=1000,
                       subsample_for_bin=1000,
                       objective="xentropy",
                       min_split_gain=0.0,
                       min_child_weight=0.01,
                       min_child_samples=10,
                       subsample=0.9,
                       subsample_freq=1,
                       colsample_bytree=0.5,
                       reg_alpha=0.0,
                       reg_lambda=0.0,
                       random_state=1,
                       n_jobs=1)
    ],
              [
                  RandomForestClassifier(n_estimators=300,
                                         criterion="entropy",
                                         max_depth=6,
                                         max_features=0.5,
                                         random_state=1)
              ]]

    ##################  proba metric ###############################

    model = StackNetClassifier(models,
                               metric="auc",
                               folds=4,
                               restacking=False,
                               use_retraining=True,
                               use_proba=True,
                               random_state=12345,
                               n_jobs=1,
                               verbose=1)

    model.fit(X, y)
    preds = model.predict_proba(X_test)[:, 1]

    save_results(preds, path + "pystacknet_pred.csv")
コード例 #5
0
def main():
    
    # Download the data and split into training and test sets

    iris = load_iris()
    
    X = iris.data
    y = iris.target
    
    test_size = int(0.2 * len(y))
    np.random.seed(13)
    indices = np.random.permutation(len(X))
    X_train = X[indices[:-test_size]]
    y_train = y[indices[:-test_size]]
    X_test = X[indices[-test_size:]]
    y_test = y[indices[-test_size:]]
    
    # for other datas, there will more complex data clearning
    
    
    
    # list all machine learning algorithms for hyper params tuning
    MLA = {
        'rfc':  [
                RandomForestClassifier(),
                #RandomForestClassifier (n_estimators=100, criterion="entropy", max_depth=5, max_features=0.5, random_state=1),
                {
                'n_estimators': [50,100,200], #default=1.0
                'criterion': ['entropy'], #edfault: auto
                'max_depth': [4,5,6], #default:ovr
                #'min_samples_split': [5,10,.03,.05,.10],
                'max_features': [.5],
                'random_state': [1],
                },
                random_forest('my_rfc'),
                ],
        
        'etc':  [
                ExtraTreesClassifier(), 
                #ExtraTreesClassifier (n_estimators=100, criterion="entropy", max_depth=5, max_features=0.5, random_state=1),
                {
                'n_estimators': [50,100,200], #default=1.0
                'criterion': ['entropy'], #edfault: auto
                'max_depth': [4,5,6], #default:ovr
                'max_features': [.5],
                'random_state': [1],
                },
                extra_trees('my_etc'),
                ],
        
        'gbc':  [
                GradientBoostingClassifier(),
                #GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, max_depth=5, max_features=0.5, random_state=1),
                {
                #'loss': ['deviance', 'exponential'],
                'learning_rate': [.1,.25,.5],
                'n_estimators': [50,100,200],
                #'criterion': ['friedman_mse', 'mse', 'mae'],
                'max_depth': [4,5,6],
                'max_features': [.5],
                #'min_samples_split': [5,10,.03,.05,.10],
                #'min_samples_leaf': [5,10,.03,.05,.10],      
                'random_state': [1],
                },
                gradient_boosting('my_rgc'),
                ], 
        
        'lr':  [
                LogisticRegression(),
                #LogisticRegression(random_state=1)
                {
                #'fit_intercept': grid_bool,
                #'penalty': ['l1','l2'],
                #'solver': ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga'],
                'random_state': [1],
                },
                ], 
        
        'svc':  [
                svm.SVC(),
                {
                #SVC - http://scikit-learn.org/stable/modules/generated/sklearn.svm.SVC.html#sklearn.svm.SVC
                #http://blog.hackerearth.com/simple-tutorial-svm-parameter-tuning-python-r
                #'kernel': ['linear', 'poly', 'rbf', 'sigmoid'],
                'C': [1,2,3,4,5], #default=1.0
                'gamma': [.1, .25, .5, .75, 1.0], #edfault: auto
                'decision_function_shape': ['ovo', 'ovr'], #default:ovr
                'probability': [True],
                'random_state': [0]
                },
                ],
    
        'xgb':  [
                XGBClassifier(),
                {
                #XGBClassifier - http://xgboost.readthedocs.io/en/latest/parameter.html
                'learning_rate': [.01, .03, .05, .1, .25], #default: .3
                'max_depth': [1,2,4,6,8,10], #default 2
                'n_estimators': [10, 50, 100, 300], 
                'seed': [0]  
                },
                ]    
        }

    # list some algorithms for HyperoptEstimator, but error !!!
    #MLA2 = {
        #'rfc':  [
                #random_forest('my_rfc'),
                #],
        
        #'etc':  [
                #extra_trees('my_etc'),
                #],
        
        #'gbc':  [
                #gradient_boosting('my_rgc'),
                #], 
 
        #}  
    # list some algorithms for HyperoptEstimator, but error !!!
    
    
    def opt(clf):
        est = MLA[clf][0]

        # ---------want to use Hyperopt, but has some errors !!!
        #estim = HyperoptEstimator(classifier=MLA2[clf][0],
                                  #preprocessing=[],
                                  #algo=tpe.suggest,
                                  #max_evals=3,
                                  #trial_timeout=120)
        
        #estim.fit( X_train, y_train )
        
        #est = estim
        
        # ---------want to use Hyperopt, but has some errors !!!
        
        # use GridSearchCV, it's too slow
        est = model_selection.GridSearchCV(estimator=est, param_grid=MLA[clf][1], cv=5) # --, scoring='roc_auc'
        
        return est
        
    # for StackNetClassifier
    #models=[ 
            ######### First level ########
            #[RandomForestClassifier(n_estimators=100, criterion="entropy", max_depth=5, max_features=0.5, random_state=1),
             #ExtraTreesClassifier(n_estimators=100, criterion="entropy", max_depth=5, max_features=0.5, random_state=1),
             #GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, max_depth=5, max_features=0.5, random_state=1),
             #LogisticRegression(random_state=1)
            #],
            ######### Second level ########
            #[RandomForestClassifier(n_estimators=200, criterion="entropy", max_depth=5, max_features=0.5, random_state=1)]
            #]
    
    models=[ 
            ######## First level ########
            [
            opt('rfc'),
            opt('etc'),
            #opt('gbc'),
            #opt('lr'),
            ],
            ######## Second level ########
            [
            opt('rfc'),
            ],
           ]
    
    # use StackNet to stacking the models
    StackNetmodel=StackNetClassifier(models, folds=4, # --metric="auc", 
                                     restacking=False, use_retraining=True, use_proba=True, 
                                     random_state=12345, n_jobs=1, verbose=1)
    
    StackNetmodel.fit(X_train, y_train)    
コード例 #6
0
    ######## Third level ########
    [
        RandomForestClassifier(n_estimators=200,
                               criterion="entropy",
                               max_depth=5,
                               max_features=0.5,
                               random_state=1),
        # LogisticRegression(random_state=1),
    ]
]
from pystacknet.pystacknet import StackNetClassifier

model = StackNetClassifier(models,
                           metric=metric_self,
                           folds=5,
                           restacking=False,
                           use_retraining=True,
                           use_proba=True,
                           random_state=12345,
                           n_jobs=-1,
                           verbose=1)

model.fit(X_loc_train, y_loc_train)
preds = model.predict_proba(X_loc_test)[:, 1]

predict_result = test.loc[:, ['UID']]
predict_result['Tag'] = preds
now = datetime.datetime.now()
now = now.strftime('%m-%d-%H-%M')
predict_result[['UID', 'Tag']].to_csv("lgb_stacknet%s.csv" % now, index=False)
print(predict_result.head())
コード例 #7
0
                  scoring='roc_auc',
                  print_progress=True,
                  cv=5)
          ]]

model = StackNetClassifier(models,
                           metric="auc",
                           folds=5,
                           restacking=False,
                           use_retraining=False,
                           use_proba=True,
                           random_state=555,
                           n_jobs=1,
                           verbose=2)

model.fit(train.drop(TARGET_COL, axis=1), train[TARGET_COL])

test.shape

y_pred = model.predict_proba(test[list(train.drop(TARGET_COL,
                                                  axis=1).columns)].values)

sample_submission = pd.read_csv('sb_test.csv')[[
    'encounter_id', 'hospital_death'
]]

sample_submission[TARGET_COL] = y_pred[:, 1]

import pandas as pd
test = pd.read_csv('unlabeled.csv')
コード例 #8
0
                   subsample_freq=5,
                   colsample_bytree=0.05,
                   reg_alpha=0.1,
                   reg_lambda=0.35,
                   random_state=1,
                   n_jobs=-1)
],
          [
              RandomForestClassifier(n_estimators=300,
                                     criterion="entropy",
                                     max_depth=6,
                                     max_features=0.5,
                                     random_state=1)
          ]]

model = StackNetClassifier(models,
                           metric="auc",
                           folds=5,
                           restacking=False,
                           use_retraining=True,
                           use_proba=True,
                           random_state=0,
                           n_jobs=8,
                           verbose=1)

model.fit(train_df.iloc[:, 2:].values, train_df.iloc[:, 1].values)
preds = model.predict_proba(test_df.iloc[:, 1:].values)
sub = test_df.iloc[:, :2].drop(columns=['var_0'])
sub['target'] = preds[:, 1]
sub.to_csv('submission.csv', index=False)