Esempi in Python per StackNetClassifier, esempi in Python per pystacknet.pystacknet.StackNetClassifier

Esempio n. 1

0

Mostra file

def stacknet_train_test(X, y, text=False):
    models = [
            # First level
            [RandomForestClassifier(n_estimators=500, max_depth=3, random_state=0),
             ExtraTreesClassifier (n_estimators=100, max_depth=5, random_state=0),
             SGDClassifier(loss="log", penalty="l2", max_iter=5),
             KNeighborsClassifier(n_neighbors=5),
             LogisticRegression(random_state=0),
             MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(5, 2), random_state=0, learning_rate='invscaling'),
             AdaBoostClassifier(n_estimators=500, learning_rate=1e-3, random_state=0),
             ],
            # Second level
            [RandomForestClassifier(n_estimators=1000, max_depth=5, random_state=0)]
    ]

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=0, shuffle=True)

    model = StackNetClassifier(models, metric="f1", folds=4, restacking=True, use_retraining=True, use_proba=True, random_state=0, verbose=1)

    model.fit(X_train, y_train)
    y_init = model.predict_proba(X_test)
    y_pred = [0 if i[0] > i[1] else 1 for i in y_init]
    y_score = [i[0] for i in y_init]

    y_pred = np.array(y_pred)

    files = {0: "stackNet", 1: "stackNet_text"}

    if text:
        name = files[1]
    else:
        name = files[0]

    # this function is imported from models_final.py file
    report_card = get_report(y_test, y_pred, y_score, name)

    with open("final_results/final_report_stackNet.txt", "a") as f:
        f.write(f"Classification report for {name}: \n")
        f.write(report_card)
        f.write("\n")
        f.write("-----------------------------------------------------------------")
        f.write("\n")

Esempio n. 2

0

Mostra file

File: test_pystacknet.py Progetto: simonfei123/BCI_Challenge

def test_pystacknet():
    
    Xn=np.array(x_train)
    yn=np.array(y_train)
    print (Xn.shape, yn.shape)
    
    
    #####################################################################################
    ###############################  CLASSIFICATION #####################################        
    #####################################################################################
    
    
    models=[ 
            
            [RandomForestClassifier (n_estimators=100, criterion="entropy", max_depth=5, max_features=0.5, random_state=1),
             ExtraTreesClassifier (n_estimators=100, criterion="entropy", max_depth=5, max_features=0.5, random_state=1),
             GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, max_depth=5, max_features=0.5, random_state=1),
             LogisticRegression(random_state=1)
             ],
            
            [RandomForestClassifier (n_estimators=200, criterion="entropy", max_depth=5, max_features=0.5, random_state=1)]
            
            
            ]
    
    ################## no proba metric ###############################
    model=StackNetClassifier(models, metric="accuracy", folds=4, restacking=False,
                             use_retraining=True, use_proba=True, random_state=12345,
                             n_jobs=1, verbose=1)
    
    model.fit(x_train,y_train )
    preds=model.predict_proba(x_test)[:,1]
    print ("accuracy test 1 , auc %f " % (roc_auc_score(y_test,preds)))
    
    ##################  proba metric ###############################    
    
    model=StackNetClassifier(models, metric="auc", folds=4, restacking=False,
                             use_retraining=True, use_proba=True, random_state=12345,
                             n_jobs=1, verbose=1)
    
    model.fit(x_train,y_train )
    preds=model.predict_proba(x_test)[:,1]
    print ("auc test 2 , auc %f " % (roc_auc_score(y_test,preds)))   
    
    ##################  custom metric ###############################    
    
    model=StackNetClassifier(models, metric=gini, folds=4, restacking=False,
                             use_retraining=True, use_proba=True, random_state=12345,
                             n_jobs=1, verbose=1)
    
    model.fit(x_train,y_train )
    preds=model.predict_proba(x_test)[:,1]
    print ("custom metric gini test 3 , auc %f " % (gini(y_test,preds))) 

    ##################  numpy input ###############################    
    
    model=StackNetClassifier(models, metric="auc", folds=4, restacking=False,
                             use_retraining=True, use_proba=True, random_state=12345,
                             n_jobs=1, verbose=1)
    
    model.fit(Xn,yn )
    preds=model.predict_proba(np.array(x_test))[:,1]
    print ("numpy auc test 4 , auc %f " % (roc_auc_score(y_test,preds)))         
    
    ##################  csr_matrix input ############################### 
    
    model=StackNetClassifier(models, metric="auc", folds=4, restacking=False,
                             use_retraining=True, use_proba=True, random_state=12345,
                             n_jobs=1, verbose=1)
    
    model.fit(csr_matrix( Xn) ,yn )
    preds=model.predict_proba(csr_matrix(x_test))[:,1]
    print ("csr auc test 5 , auc %f " % (roc_auc_score(y_test,preds)))  

    ##################  restacking ############################### 
    
    model=StackNetClassifier(models, metric="auc", folds=4, restacking=True,
                             use_retraining=True, use_proba=True, random_state=12345,
                             n_jobs=1, verbose=1)
    
    model.fit(csr_matrix( Xn) ,yn )
    preds=model.predict_proba(csr_matrix(x_test))[:,1]
    print ("restacking auc test 6 , auc %f " % (roc_auc_score(y_test,preds))) 
    
    ##################  without retraining ############################### 
    
    model=StackNetClassifier(models, metric="auc", folds=4, restacking=True,
                             use_retraining=False, use_proba=True, random_state=12345,
                             n_jobs=1, verbose=1)
    
    model.fit(csr_matrix( Xn) ,yn )
    preds=model.predict_proba(csr_matrix(x_test))[:,1]
    print ("no retraining auc test 7 , auc %f " % (roc_auc_score(y_test,preds)))     
    
    ##################  custom k folder object ###############################     
    
    
    k=StratifiedKFold(yn, n_folds=4, shuffle=True, random_state=1251)
    
    model=StackNetClassifier(models, metric="auc", folds=k, restacking=True,
                             use_retraining=False, use_proba=True, random_state=12345,
                             n_jobs=1, verbose=1)
    
    model.fit(csr_matrix( Xn) ,yn )
    preds=model.predict_proba(csr_matrix(x_test))[:,1]
    print ("custom kfold auc test 8 , auc %f " % (roc_auc_score(y_test,preds)))   



    ##################  regressor in base level ###############################    
    models_reg=[ 
            
            [RandomForestClassifier (n_estimators=100, criterion="entropy", max_depth=5, max_features=0.5, random_state=1),
             ExtraTreesRegressor (n_estimators=100, max_depth=5, max_features=0.5, random_state=1),
             GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, max_depth=5, max_features=0.5, random_state=1),
             LogisticRegression(random_state=1)
             ],
            
            [RandomForestClassifier (n_estimators=200, criterion="entropy", max_depth=5, max_features=0.5, random_state=1)]
            
            
            ]
            
    model=StackNetClassifier(models_reg, metric="auc", folds=4, restacking=False,
                             use_retraining=True, use_proba=True, random_state=12345,
                             n_jobs=1, verbose=1)
    
    model.fit(x_train,y_train )
    preds=model.predict_proba(x_test)[:,1]
    print ("with regressor test 9 , auc %f " % (roc_auc_score(y_test,preds)))      
    

    ##################  transformer in base level ###############################    
    models_pca=[ 
            
            [RandomForestClassifier (n_estimators=100, criterion="entropy", max_depth=5, max_features=0.5, random_state=1),
             ExtraTreesRegressor (n_estimators=100, max_depth=5, max_features=0.5, random_state=1),
             GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, max_depth=5, max_features=0.5, random_state=1),
             LogisticRegression(random_state=1),
             PCA(n_components=4,random_state=1)
             ],
            
            [RandomForestClassifier (n_estimators=200, criterion="entropy", max_depth=5, max_features=0.5, random_state=1)]
            
            
            ]
            
    model=StackNetClassifier(models_pca, metric="auc", folds=4, restacking=False,
                             use_retraining=True, use_proba=True, random_state=12345,
                             n_jobs=1, verbose=1)
    
    model.fit(x_train,y_train )
    preds=model.predict_proba(x_test)[:,1]
    print ("with PCA test 10 , auc %f " % (roc_auc_score(y_test,preds)))  
    
    
    ##################  multiclass metric ###############################    
    
    model=StackNetClassifier(models, metric="logloss", folds=4, restacking=False,
                             use_retraining=True, use_proba=True, random_state=12345,
                             n_jobs=1, verbose=1)
    
    model.fit(x_train,y2d[:100] )
    preds=model.predict_proba(x_test)
    print ("logloss test 11 , auc %f " % (log_loss(y2d[100:],preds)))    
    
    
    
    ##################  3 levels  ###############################    
    
    models3=[ 
            
            [RandomForestClassifier (n_estimators=100, criterion="entropy", max_depth=5, max_features=0.5, random_state=1),
             ExtraTreesClassifier (n_estimators=100, criterion="entropy", max_depth=5, max_features=0.5, random_state=1),
             GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, max_depth=5, max_features=0.5, random_state=1),
             LogisticRegression(random_state=1)
             ],
            
            [GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, max_depth=5, max_features=0.5, random_state=1),
             LogisticRegression(random_state=1)
             ],
            
            [RandomForestClassifier (n_estimators=200, criterion="entropy", max_depth=5, max_features=0.5, random_state=1)]
            
            
            ]    
    
    
    model=StackNetClassifier(models3, metric="logloss", folds=4, restacking=False,
                             use_retraining=True, use_proba=True, random_state=12345,
                             n_jobs=1, verbose=1)
    
    model.fit(x_train,y2d[:100] )
    preds=model.predict_proba(x_test)
    print ("3 levels test 12 , auc %f " % (log_loss(y2d[100:],preds)))   


    ################## with sample_weight ###############################    
    
    model=StackNetClassifier(models, metric="auc", folds=4, restacking=False,
                             use_retraining=True, use_proba=True, random_state=12345,
                             n_jobs=1, verbose=1)
    
    model.fit(x_train,y_train , sample_weight=w_train)
    preds=model.predict_proba(x_test)[:,1]
    print ("auc weighted test 13 , auc %f " % (roc_auc_score(y_test,preds, sample_weight=w_test)))  
    

    #####################################################################################
    ###############################  REGRESSION #########################################        
    #####################################################################################
    
    
    
    models=[ 
            
            [RandomForestRegressor (n_estimators=100, max_depth=5, max_features=0.5, random_state=1),
             ExtraTreesRegressor (n_estimators=100,  max_depth=5, max_features=0.5, random_state=1),
             GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, max_depth=5, max_features=0.5, random_state=1),
             Ridge(random_state=1)
             ],
            
            [RandomForestRegressor (n_estimators=200, max_depth=5, max_features=0.5, random_state=1)]
            
            
            ]
    
    ################## rmse  metric ###############################
    model=StackNetRegressor(models, metric="rmse", folds=4, restacking=False,
                             use_retraining=True, random_state=12345,
                             n_jobs=1, verbose=1)
    
    model.fit(x_train,y_train )
    preds=model.predict(x_test)
    print ("rmse test 1 , %f " % (rmse(y_test,preds)))
    
    ##################  mae metric ###############################    
    
    model=StackNetRegressor(models, metric="mae", folds=4, restacking=False,
                             use_retraining=True, random_state=12345,
                             n_jobs=1, verbose=1)
    
    model.fit(x_train,y_train )
    preds=model.predict(x_test)
    print ("mae test 2 , %f " % (mae(y_test,preds)))   
    
    ##################  custom metric ###############################    
    
    model=StackNetRegressor(models, metric=R, folds=4, restacking=False,
                             use_retraining=True, random_state=12345,
                             n_jobs=1, verbose=1)
    
    model.fit(x_train,y_train )
    preds=model.predict(x_test)
    print ("custom metric R test 3  %f " % (R(y_test,preds))) 

    ##################  numpy input ###############################    
    
    model=StackNetRegressor(models, metric="rmse", folds=4, restacking=False,
                             use_retraining=True, random_state=12345,
                             n_jobs=1, verbose=1)
    
    model.fit(Xn,yn )
    preds=model.predict(x_test)
    print ("numpy rmse test 4  %f " % (rmse(y_test,preds)))         
    
    ##################  csr_matrix input ############################### 
    
    model=StackNetRegressor(models, metric="rmse", folds=4, restacking=False,
                             use_retraining=True, random_state=12345,
                             n_jobs=1, verbose=1)
    
    model.fit(csr_matrix( Xn) ,yn )
    preds=model.predict(x_test)
    print ("csr test 5 , rmse %f " % (rmse(y_test,preds)))  

    ##################  restacking ############################### 
    
    model=StackNetRegressor(models, metric="rmse", folds=4, restacking=True,
                             use_retraining=True,  random_state=12345,
                             n_jobs=1, verbose=1)
    
    model.fit(csr_matrix( Xn) ,yn )
    preds=model.predict(x_test)
    print ("restacking rmse test 6 , rmse %f " % (rmse(y_test,preds))) 
    
    ##################  without retraining ############################### 
    
    model=StackNetRegressor(models, metric="rmse", folds=4, restacking=True,
                             use_retraining=False,  random_state=12345,
                             n_jobs=1, verbose=1)
    
    model.fit(csr_matrix( Xn) ,yn )
    preds=model.predict(x_test)
    print ("no retraining rmse test 7, rmse %f " % (rmse(y_test,preds)))     
    
    ##################  custom k folder object ###############################     
    
    
    k=StratifiedKFold(yn, n_folds=4, shuffle=True, random_state=1251)
    
    model=StackNetRegressor(models, metric="rmse", folds=k, restacking=True,
                             use_retraining=False,random_state=12345,
                             n_jobs=1, verbose=1)
    
    model.fit(csr_matrix( Xn) ,yn )
    preds=model.predict(x_test)
    print ("custom kfold rmse test 8, %f " % (rmse(y_test,preds)))   



    ##################  classifier in base level ###############################    
    models_class=[ 
            
            [RandomForestRegressor(n_estimators=100, max_depth=5, max_features=0.5, random_state=1),
             ExtraTreesClassifier (n_estimators=100, max_depth=5, max_features=0.5, random_state=1),
             GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, max_depth=5, max_features=0.5, random_state=1),
             Ridge(random_state=1)
             ],
            
            [RandomForestRegressor (n_estimators=200, max_depth=5, max_features=0.5, random_state=1)]
            
            
            ]
            
    model=StackNetRegressor(models_class, metric="rmse", folds=4, restacking=False,
                             use_retraining=True, random_state=12345,
                             n_jobs=1, verbose=1)
    
    model.fit(x_train,y_train )
    preds=model.predict(x_test)
    print ("with regressor test 9, rmse %f " % (rmse(y_test,preds)))      
    

    ##################  transformer in base level ###############################    
    models_pca=[ 
            
            [RandomForestRegressor (n_estimators=100, max_depth=5, max_features=0.5, random_state=1),
             ExtraTreesRegressor (n_estimators=100, max_depth=5, max_features=0.5, random_state=1),
             GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, max_depth=5, max_features=0.5, random_state=1),
             Ridge(random_state=1),
             PCA(n_components=4,random_state=1)
             ],
            
            [RandomForestRegressor(n_estimators=200, max_depth=5, max_features=0.5, random_state=1)]
            
            
            ]
            
    model=StackNetRegressor(models_pca, metric="rmse", folds=4, restacking=False,
                             use_retraining=True,  random_state=12345,
                             n_jobs=1, verbose=1)
    
    model.fit(x_train,y_train )
    preds=model.predict(x_test)
    print ("with PCA test 10 , rmse %f " % (rmse(y_test,preds)))  
    
    
    ##################  2d target ###############################    
    models2=[ 
            
            [RandomForestRegressor(n_estimators=100, max_depth=5, max_features=0.5, random_state=1),
             ExtraTreesRegressor (n_estimators=100, max_depth=5, max_features=0.5, random_state=1),
             #GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, max_depth=5, max_features=0.5, random_state=1),
             Ridge(random_state=1)
             ],
            
            
            [RandomForestRegressor(n_estimators=200, max_depth=5, max_features=0.5, random_state=1)]
            
            
            ]    
    
    
    model=StackNetRegressor(models2, metric="rmse", folds=4, restacking=False,
                             use_retraining=True, random_state=12345,
                             n_jobs=1, verbose=1)
    
    model.fit(x_train,np.column_stack((y_train,y2d[:100] )))
    preds=model.predict(x_test)
    print ("rmse test 11 , rmse %f " % (rmse(np.column_stack((y_test,y2d[100:])),preds)))    
    
    
    
    ##################  3 levels  ###############################    
    
    models3=[ 
            
            [RandomForestRegressor(n_estimators=100, max_depth=5, max_features=0.5, random_state=1),
             ExtraTreesRegressor (n_estimators=100, max_depth=5, max_features=0.5, random_state=1),
             #GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, max_depth=5, max_features=0.5, random_state=1),
             Ridge(random_state=1)
             ],
            
            [ExtraTreesRegressor (n_estimators=100, max_depth=5, max_features=0.5, random_state=1),
             Ridge(random_state=1)
             ],
            
            [RandomForestRegressor(n_estimators=200, max_depth=5, max_features=0.5, random_state=1)]
            
            
            ]    
    
    
    model=StackNetRegressor(models3, metric="rmse", folds=4, restacking=False,
                             use_retraining=True, random_state=12345,
                             n_jobs=1, verbose=1)
    
    model.fit(x_train,y2d[:100] )
    preds=model.predict(x_test)
    print ("3 levels test 12 , rmse %f " % (rmse(y2d[100:],preds)))   
    
    
    ################## with sample)weight ###############################
    model=StackNetRegressor(models, metric="rmse", folds=4, restacking=False,
                             use_retraining=True, random_state=12345,
                             n_jobs=1, verbose=1)
    
    model.fit(x_train,y_train,sample_weight=w_train )
    preds=model.predict(x_test)
    print ("rmse weighted test 13 , %f " % (rmse(y_test,preds, sample_weight=w_test)))

Esempio n. 3

0

Mostra file

File: StackNet_Best_model.py Progetto: yad015/COGS189_project_BCI_Challenge

                      beta_1=0.1,
                      beta_2=0.1,
                      epsilon=0.1)
    ]
]

# leave 4 subject out
kf = KFold(4)
generator = kf.split(X_train, y_train)

# build StackNet
model = StackNetClassifier(models,
                           metric="auc",
                           folds=generator,
                           restacking=False,
                           use_retraining=True,
                           use_proba=True,
                           random_state=42,
                           n_jobs=-1,
                           verbose=1)
# evaluate model
model.fit(X_train, y_train)
y_probs = model.predict_proba(X_test)[:, 1]

# save score
csv = pd.read_csv('./data/benchmark.csv')
csv['Prediction'] = y_probs
csv.to_csv('submission_StackNet.csv', index=False)

print(
    '--------------------Submission file has been generated.--------------------------'

Esempio n. 4

0

Mostra file

                    niveaux = dict()
                    for name, models in selections.items():
                        les_models = list()
                        for i in range(len(models)):
                            les_models.append(models_1.get(models[i]))
                        niveaux[name] = les_models

                    pystacknet_model = list()
                    for models in niveaux.values():
                        pystacknet_model.append(models)

                    model = StackNetClassifier(pystacknet_model,
                                                metric=param_stacknet["metric"],
                                                folds=param_stacknet['folds'],
                                                restacking=param_stacknet['restacking'],
                                                use_retraining=param_stacknet['use_retraining'],
                                                use_proba=param_stacknet['use_proba'],
                                                random_state=param_stacknet['random_state'],
                                                n_jobs=param_stacknet['n_jobs'],
                                                verbose=param_stacknet['verbose'])
                    if model:
                        st.info("Génération du model StackNet est terminé")

    choix = st.checkbox("Afficher le datasetCovid")
    if choix:
        X_train, X_test, y_train, y_test = get_Covid_19()
        if st.checkbox("Affichez les shape"):
            st.text(X_train.shape)

        if st.checkbox("Evaluer") & choix:
            model.fit(X_train, y_train)

Esempio n. 5

0

Mostra file

File: test_amazon.py Progetto: Jaineel-Mamtora/Malware-Detection-using-Machine-Learning

def test_pystacknet():

    path = ""

    y, X = load_data(path, 'train.csv')
    y_test, X_test = load_data(path, 'test.csv', use_labels=False)

    # === one-hot encoding === #
    # we want to encode the category IDs encountered both in
    # the training and the test set, so we fit the encoder on both
    encoder = preprocessing.OneHotEncoder()
    encoder.fit(np.vstack((X, X_test)))
    X = encoder.transform(X)  # Returns a sparse matrix (see numpy.sparse)
    X_test = encoder.transform(X_test)

    #####################################################################################
    ###############################  CLASSIFICATION #####################################
    #####################################################################################

    models = [[
        LogisticRegression(C=1, random_state=1),
        LogisticRegression(C=3, random_state=1),
        Ridge(alpha=0.1, random_state=1),
        LogisticRegression(penalty="l1", C=1, random_state=1),
        XGBClassifier(max_depth=5,
                      learning_rate=0.1,
                      n_estimators=300,
                      objective="binary:logistic",
                      n_jobs=1,
                      booster="gbtree",
                      random_state=1,
                      colsample_bytree=0.4),
        XGBClassifier(max_depth=5,
                      learning_rate=0.3,
                      reg_lambda=0.1,
                      n_estimators=300,
                      objective="binary:logistic",
                      n_jobs=1,
                      booster="gblinear",
                      random_state=1,
                      colsample_bytree=0.4),
        XGBClassifier(max_depth=5,
                      learning_rate=0.1,
                      n_estimators=300,
                      objective="rank:pairwise",
                      n_jobs=1,
                      booster="gbtree",
                      random_state=1,
                      colsample_bytree=0.4),
        LGBMClassifier(boosting_type='gbdt',
                       num_leaves=40,
                       max_depth=-1,
                       learning_rate=0.01,
                       n_estimators=1000,
                       subsample_for_bin=1000,
                       objective="xentropy",
                       min_split_gain=0.0,
                       min_child_weight=0.01,
                       min_child_samples=10,
                       subsample=0.9,
                       subsample_freq=1,
                       colsample_bytree=0.5,
                       reg_alpha=0.0,
                       reg_lambda=0.0,
                       random_state=1,
                       n_jobs=1)
    ],
              [
                  RandomForestClassifier(n_estimators=300,
                                         criterion="entropy",
                                         max_depth=6,
                                         max_features=0.5,
                                         random_state=1)
              ]]

    ##################  proba metric ###############################

    model = StackNetClassifier(models,
                               metric="auc",
                               folds=4,
                               restacking=False,
                               use_retraining=True,
                               use_proba=True,
                               random_state=12345,
                               n_jobs=1,
                               verbose=1)

    model.fit(X, y)
    preds = model.predict_proba(X_test)[:, 1]

    save_results(preds, path + "pystacknet_pred.csv")

Esempio n. 6

0

Mostra file

File: untitled-2-20190209.py Progetto: stu-github/Kaggle

def main():
    
    # Download the data and split into training and test sets

    iris = load_iris()
    
    X = iris.data
    y = iris.target
    
    test_size = int(0.2 * len(y))
    np.random.seed(13)
    indices = np.random.permutation(len(X))
    X_train = X[indices[:-test_size]]
    y_train = y[indices[:-test_size]]
    X_test = X[indices[-test_size:]]
    y_test = y[indices[-test_size:]]
    
    # for other datas, there will more complex data clearning
    
    
    
    # list all machine learning algorithms for hyper params tuning
    MLA = {
        'rfc':  [
                RandomForestClassifier(),
                #RandomForestClassifier (n_estimators=100, criterion="entropy", max_depth=5, max_features=0.5, random_state=1),
                {
                'n_estimators': [50,100,200], #default=1.0
                'criterion': ['entropy'], #edfault: auto
                'max_depth': [4,5,6], #default:ovr
                #'min_samples_split': [5,10,.03,.05,.10],
                'max_features': [.5],
                'random_state': [1],
                },
                random_forest('my_rfc'),
                ],
        
        'etc':  [
                ExtraTreesClassifier(), 
                #ExtraTreesClassifier (n_estimators=100, criterion="entropy", max_depth=5, max_features=0.5, random_state=1),
                {
                'n_estimators': [50,100,200], #default=1.0
                'criterion': ['entropy'], #edfault: auto
                'max_depth': [4,5,6], #default:ovr
                'max_features': [.5],
                'random_state': [1],
                },
                extra_trees('my_etc'),
                ],
        
        'gbc':  [
                GradientBoostingClassifier(),
                #GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, max_depth=5, max_features=0.5, random_state=1),
                {
                #'loss': ['deviance', 'exponential'],
                'learning_rate': [.1,.25,.5],
                'n_estimators': [50,100,200],
                #'criterion': ['friedman_mse', 'mse', 'mae'],
                'max_depth': [4,5,6],
                'max_features': [.5],
                #'min_samples_split': [5,10,.03,.05,.10],
                #'min_samples_leaf': [5,10,.03,.05,.10],      
                'random_state': [1],
                },
                gradient_boosting('my_rgc'),
                ], 
        
        'lr':  [
                LogisticRegression(),
                #LogisticRegression(random_state=1)
                {
                #'fit_intercept': grid_bool,
                #'penalty': ['l1','l2'],
                #'solver': ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga'],
                'random_state': [1],
                },
                ], 
        
        'svc':  [
                svm.SVC(),
                {
                #SVC - http://scikit-learn.org/stable/modules/generated/sklearn.svm.SVC.html#sklearn.svm.SVC
                #http://blog.hackerearth.com/simple-tutorial-svm-parameter-tuning-python-r
                #'kernel': ['linear', 'poly', 'rbf', 'sigmoid'],
                'C': [1,2,3,4,5], #default=1.0
                'gamma': [.1, .25, .5, .75, 1.0], #edfault: auto
                'decision_function_shape': ['ovo', 'ovr'], #default:ovr
                'probability': [True],
                'random_state': [0]
                },
                ],
    
        'xgb':  [
                XGBClassifier(),
                {
                #XGBClassifier - http://xgboost.readthedocs.io/en/latest/parameter.html
                'learning_rate': [.01, .03, .05, .1, .25], #default: .3
                'max_depth': [1,2,4,6,8,10], #default 2
                'n_estimators': [10, 50, 100, 300], 
                'seed': [0]  
                },
                ]    
        }

    # list some algorithms for HyperoptEstimator, but error !!!
    #MLA2 = {
        #'rfc':  [
                #random_forest('my_rfc'),
                #],
        
        #'etc':  [
                #extra_trees('my_etc'),
                #],
        
        #'gbc':  [
                #gradient_boosting('my_rgc'),
                #], 
 
        #}  
    # list some algorithms for HyperoptEstimator, but error !!!
    
    
    def opt(clf):
        est = MLA[clf][0]

        # ---------want to use Hyperopt, but has some errors !!!
        #estim = HyperoptEstimator(classifier=MLA2[clf][0],
                                  #preprocessing=[],
                                  #algo=tpe.suggest,
                                  #max_evals=3,
                                  #trial_timeout=120)
        
        #estim.fit( X_train, y_train )
        
        #est = estim
        
        # ---------want to use Hyperopt, but has some errors !!!
        
        # use GridSearchCV, it's too slow
        est = model_selection.GridSearchCV(estimator=est, param_grid=MLA[clf][1], cv=5) # --, scoring='roc_auc'
        
        return est
        
    # for StackNetClassifier
    #models=[ 
            ######### First level ########
            #[RandomForestClassifier(n_estimators=100, criterion="entropy", max_depth=5, max_features=0.5, random_state=1),
             #ExtraTreesClassifier(n_estimators=100, criterion="entropy", max_depth=5, max_features=0.5, random_state=1),
             #GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, max_depth=5, max_features=0.5, random_state=1),
             #LogisticRegression(random_state=1)
            #],
            ######### Second level ########
            #[RandomForestClassifier(n_estimators=200, criterion="entropy", max_depth=5, max_features=0.5, random_state=1)]
            #]
    
    models=[ 
            ######## First level ########
            [
            opt('rfc'),
            opt('etc'),
            #opt('gbc'),
            #opt('lr'),
            ],
            ######## Second level ########
            [
            opt('rfc'),
            ],
           ]
    
    # use StackNet to stacking the models
    StackNetmodel=StackNetClassifier(models, folds=4, # --metric="auc", 
                                     restacking=False, use_retraining=True, use_proba=True, 
                                     random_state=12345, n_jobs=1, verbose=1)
    
    StackNetmodel.fit(X_train, y_train)

Esempio n. 7

0

Mostra file

File: esemble_stacket_model.py Progetto: TSLNIHAOGIT/orange_risk_Identification

    ######## Third level ########
    [
        RandomForestClassifier(n_estimators=200,
                               criterion="entropy",
                               max_depth=5,
                               max_features=0.5,
                               random_state=1),
        # LogisticRegression(random_state=1),
    ]
]
from pystacknet.pystacknet import StackNetClassifier

model = StackNetClassifier(models,
                           metric=metric_self,
                           folds=5,
                           restacking=False,
                           use_retraining=True,
                           use_proba=True,
                           random_state=12345,
                           n_jobs=-1,
                           verbose=1)

model.fit(X_loc_train, y_loc_train)
preds = model.predict_proba(X_loc_test)[:, 1]

predict_result = test.loc[:, ['UID']]
predict_result['Tag'] = preds
now = datetime.datetime.now()
now = now.strftime('%m-%d-%H-%M')
predict_result[['UID', 'Tag']].to_csv("lgb_stacknet%s.csv" % now, index=False)
print(predict_result.head())

Esempio n. 8

0

Mostra file

                                       n_jobs=-1,
                                       max_iter=15000,
                                       random_state=1234,
                                       tol=0.00001),
                  min_features=6,
                  max_features=7,
                  scoring='roc_auc',
                  print_progress=True,
                  cv=5)
          ]]

model = StackNetClassifier(models,
                           metric="auc",
                           folds=5,
                           restacking=False,
                           use_retraining=False,
                           use_proba=True,
                           random_state=555,
                           n_jobs=1,
                           verbose=2)

model.fit(train.drop(TARGET_COL, axis=1), train[TARGET_COL])

test.shape

y_pred = model.predict_proba(test[list(train.drop(TARGET_COL,
                                                  axis=1).columns)].values)

sample_submission = pd.read_csv('sb_test.csv')[[
    'encounter_id', 'hospital_death'
]]

Esempio n. 9

0

Mostra file

            ######## Second level ########
            [clf_lgb],
]

# StackNetClassifier with GPU

#you can convert dataframe to numpy array by .as_matrix()
X_test = X_test.as_matrix()
X_train = X_train.as_matrix()
#then refit model, it is ok

model = StackNetClassifier(
    models,
    metric="auc",
    folds=2,
    restacking=False,
    use_retraining=False,
    use_proba=True,
    random_state=42,
    verbose=1,
)

model.fit(X_train, y_train)
y_pred = model.predict_proba(X_test)
pd.DataFrame(y_pred, columns=['predictions','isFraud']).to_csv('prediction StackNetClassifier.csv')



#Neural Networks

from sklearn.neural_network import MLPClassifier

Esempio n. 10

0

Mostra file

File: stacknet_baseline.py Progetto: witat/kaggle_competition

                   subsample_freq=5,
                   colsample_bytree=0.05,
                   reg_alpha=0.1,
                   reg_lambda=0.35,
                   random_state=1,
                   n_jobs=-1)
],
          [
              RandomForestClassifier(n_estimators=300,
                                     criterion="entropy",
                                     max_depth=6,
                                     max_features=0.5,
                                     random_state=1)
          ]]

model = StackNetClassifier(models,
                           metric="auc",
                           folds=5,
                           restacking=False,
                           use_retraining=True,
                           use_proba=True,
                           random_state=0,
                           n_jobs=8,
                           verbose=1)

model.fit(train_df.iloc[:, 2:].values, train_df.iloc[:, 1].values)
preds = model.predict_proba(test_df.iloc[:, 1:].values)
sub = test_df.iloc[:, :2].drop(columns=['var_0'])
sub['target'] = preds[:, 1]
sub.to_csv('submission.csv', index=False)