def stacknet_train_test(X, y, text=False): models = [ # First level [RandomForestClassifier(n_estimators=500, max_depth=3, random_state=0), ExtraTreesClassifier (n_estimators=100, max_depth=5, random_state=0), SGDClassifier(loss="log", penalty="l2", max_iter=5), KNeighborsClassifier(n_neighbors=5), LogisticRegression(random_state=0), MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(5, 2), random_state=0, learning_rate='invscaling'), AdaBoostClassifier(n_estimators=500, learning_rate=1e-3, random_state=0), ], # Second level [RandomForestClassifier(n_estimators=1000, max_depth=5, random_state=0)] ] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=0, shuffle=True) model = StackNetClassifier(models, metric="f1", folds=4, restacking=True, use_retraining=True, use_proba=True, random_state=0, verbose=1) model.fit(X_train, y_train) y_init = model.predict_proba(X_test) y_pred = [0 if i[0] > i[1] else 1 for i in y_init] y_score = [i[0] for i in y_init] y_pred = np.array(y_pred) files = {0: "stackNet", 1: "stackNet_text"} if text: name = files[1] else: name = files[0] # this function is imported from models_final.py file report_card = get_report(y_test, y_pred, y_score, name) with open("final_results/final_report_stackNet.txt", "a") as f: f.write(f"Classification report for {name}: \n") f.write(report_card) f.write("\n") f.write("-----------------------------------------------------------------") f.write("\n")
def test_pystacknet(): Xn=np.array(x_train) yn=np.array(y_train) print (Xn.shape, yn.shape) ##################################################################################### ############################### CLASSIFICATION ##################################### ##################################################################################### models=[ [RandomForestClassifier (n_estimators=100, criterion="entropy", max_depth=5, max_features=0.5, random_state=1), ExtraTreesClassifier (n_estimators=100, criterion="entropy", max_depth=5, max_features=0.5, random_state=1), GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, max_depth=5, max_features=0.5, random_state=1), LogisticRegression(random_state=1) ], [RandomForestClassifier (n_estimators=200, criterion="entropy", max_depth=5, max_features=0.5, random_state=1)] ] ################## no proba metric ############################### model=StackNetClassifier(models, metric="accuracy", folds=4, restacking=False, use_retraining=True, use_proba=True, random_state=12345, n_jobs=1, verbose=1) model.fit(x_train,y_train ) preds=model.predict_proba(x_test)[:,1] print ("accuracy test 1 , auc %f " % (roc_auc_score(y_test,preds))) ################## proba metric ############################### model=StackNetClassifier(models, metric="auc", folds=4, restacking=False, use_retraining=True, use_proba=True, random_state=12345, n_jobs=1, verbose=1) model.fit(x_train,y_train ) preds=model.predict_proba(x_test)[:,1] print ("auc test 2 , auc %f " % (roc_auc_score(y_test,preds))) ################## custom metric ############################### model=StackNetClassifier(models, metric=gini, folds=4, restacking=False, use_retraining=True, use_proba=True, random_state=12345, n_jobs=1, verbose=1) model.fit(x_train,y_train ) preds=model.predict_proba(x_test)[:,1] print ("custom metric gini test 3 , auc %f " % (gini(y_test,preds))) ################## numpy input ############################### model=StackNetClassifier(models, metric="auc", folds=4, restacking=False, use_retraining=True, use_proba=True, random_state=12345, n_jobs=1, verbose=1) model.fit(Xn,yn ) preds=model.predict_proba(np.array(x_test))[:,1] print ("numpy auc test 4 , auc %f " % (roc_auc_score(y_test,preds))) ################## csr_matrix input ############################### model=StackNetClassifier(models, metric="auc", folds=4, restacking=False, use_retraining=True, use_proba=True, random_state=12345, n_jobs=1, verbose=1) model.fit(csr_matrix( Xn) ,yn ) preds=model.predict_proba(csr_matrix(x_test))[:,1] print ("csr auc test 5 , auc %f " % (roc_auc_score(y_test,preds))) ################## restacking ############################### model=StackNetClassifier(models, metric="auc", folds=4, restacking=True, use_retraining=True, use_proba=True, random_state=12345, n_jobs=1, verbose=1) model.fit(csr_matrix( Xn) ,yn ) preds=model.predict_proba(csr_matrix(x_test))[:,1] print ("restacking auc test 6 , auc %f " % (roc_auc_score(y_test,preds))) ################## without retraining ############################### model=StackNetClassifier(models, metric="auc", folds=4, restacking=True, use_retraining=False, use_proba=True, random_state=12345, n_jobs=1, verbose=1) model.fit(csr_matrix( Xn) ,yn ) preds=model.predict_proba(csr_matrix(x_test))[:,1] print ("no retraining auc test 7 , auc %f " % (roc_auc_score(y_test,preds))) ################## custom k folder object ############################### k=StratifiedKFold(yn, n_folds=4, shuffle=True, random_state=1251) model=StackNetClassifier(models, metric="auc", folds=k, restacking=True, use_retraining=False, use_proba=True, random_state=12345, n_jobs=1, verbose=1) model.fit(csr_matrix( Xn) ,yn ) preds=model.predict_proba(csr_matrix(x_test))[:,1] print ("custom kfold auc test 8 , auc %f " % (roc_auc_score(y_test,preds))) ################## regressor in base level ############################### models_reg=[ [RandomForestClassifier (n_estimators=100, criterion="entropy", max_depth=5, max_features=0.5, random_state=1), ExtraTreesRegressor (n_estimators=100, max_depth=5, max_features=0.5, random_state=1), GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, max_depth=5, max_features=0.5, random_state=1), LogisticRegression(random_state=1) ], [RandomForestClassifier (n_estimators=200, criterion="entropy", max_depth=5, max_features=0.5, random_state=1)] ] model=StackNetClassifier(models_reg, metric="auc", folds=4, restacking=False, use_retraining=True, use_proba=True, random_state=12345, n_jobs=1, verbose=1) model.fit(x_train,y_train ) preds=model.predict_proba(x_test)[:,1] print ("with regressor test 9 , auc %f " % (roc_auc_score(y_test,preds))) ################## transformer in base level ############################### models_pca=[ [RandomForestClassifier (n_estimators=100, criterion="entropy", max_depth=5, max_features=0.5, random_state=1), ExtraTreesRegressor (n_estimators=100, max_depth=5, max_features=0.5, random_state=1), GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, max_depth=5, max_features=0.5, random_state=1), LogisticRegression(random_state=1), PCA(n_components=4,random_state=1) ], [RandomForestClassifier (n_estimators=200, criterion="entropy", max_depth=5, max_features=0.5, random_state=1)] ] model=StackNetClassifier(models_pca, metric="auc", folds=4, restacking=False, use_retraining=True, use_proba=True, random_state=12345, n_jobs=1, verbose=1) model.fit(x_train,y_train ) preds=model.predict_proba(x_test)[:,1] print ("with PCA test 10 , auc %f " % (roc_auc_score(y_test,preds))) ################## multiclass metric ############################### model=StackNetClassifier(models, metric="logloss", folds=4, restacking=False, use_retraining=True, use_proba=True, random_state=12345, n_jobs=1, verbose=1) model.fit(x_train,y2d[:100] ) preds=model.predict_proba(x_test) print ("logloss test 11 , auc %f " % (log_loss(y2d[100:],preds))) ################## 3 levels ############################### models3=[ [RandomForestClassifier (n_estimators=100, criterion="entropy", max_depth=5, max_features=0.5, random_state=1), ExtraTreesClassifier (n_estimators=100, criterion="entropy", max_depth=5, max_features=0.5, random_state=1), GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, max_depth=5, max_features=0.5, random_state=1), LogisticRegression(random_state=1) ], [GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, max_depth=5, max_features=0.5, random_state=1), LogisticRegression(random_state=1) ], [RandomForestClassifier (n_estimators=200, criterion="entropy", max_depth=5, max_features=0.5, random_state=1)] ] model=StackNetClassifier(models3, metric="logloss", folds=4, restacking=False, use_retraining=True, use_proba=True, random_state=12345, n_jobs=1, verbose=1) model.fit(x_train,y2d[:100] ) preds=model.predict_proba(x_test) print ("3 levels test 12 , auc %f " % (log_loss(y2d[100:],preds))) ################## with sample_weight ############################### model=StackNetClassifier(models, metric="auc", folds=4, restacking=False, use_retraining=True, use_proba=True, random_state=12345, n_jobs=1, verbose=1) model.fit(x_train,y_train , sample_weight=w_train) preds=model.predict_proba(x_test)[:,1] print ("auc weighted test 13 , auc %f " % (roc_auc_score(y_test,preds, sample_weight=w_test))) ##################################################################################### ############################### REGRESSION ######################################### ##################################################################################### models=[ [RandomForestRegressor (n_estimators=100, max_depth=5, max_features=0.5, random_state=1), ExtraTreesRegressor (n_estimators=100, max_depth=5, max_features=0.5, random_state=1), GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, max_depth=5, max_features=0.5, random_state=1), Ridge(random_state=1) ], [RandomForestRegressor (n_estimators=200, max_depth=5, max_features=0.5, random_state=1)] ] ################## rmse metric ############################### model=StackNetRegressor(models, metric="rmse", folds=4, restacking=False, use_retraining=True, random_state=12345, n_jobs=1, verbose=1) model.fit(x_train,y_train ) preds=model.predict(x_test) print ("rmse test 1 , %f " % (rmse(y_test,preds))) ################## mae metric ############################### model=StackNetRegressor(models, metric="mae", folds=4, restacking=False, use_retraining=True, random_state=12345, n_jobs=1, verbose=1) model.fit(x_train,y_train ) preds=model.predict(x_test) print ("mae test 2 , %f " % (mae(y_test,preds))) ################## custom metric ############################### model=StackNetRegressor(models, metric=R, folds=4, restacking=False, use_retraining=True, random_state=12345, n_jobs=1, verbose=1) model.fit(x_train,y_train ) preds=model.predict(x_test) print ("custom metric R test 3 %f " % (R(y_test,preds))) ################## numpy input ############################### model=StackNetRegressor(models, metric="rmse", folds=4, restacking=False, use_retraining=True, random_state=12345, n_jobs=1, verbose=1) model.fit(Xn,yn ) preds=model.predict(x_test) print ("numpy rmse test 4 %f " % (rmse(y_test,preds))) ################## csr_matrix input ############################### model=StackNetRegressor(models, metric="rmse", folds=4, restacking=False, use_retraining=True, random_state=12345, n_jobs=1, verbose=1) model.fit(csr_matrix( Xn) ,yn ) preds=model.predict(x_test) print ("csr test 5 , rmse %f " % (rmse(y_test,preds))) ################## restacking ############################### model=StackNetRegressor(models, metric="rmse", folds=4, restacking=True, use_retraining=True, random_state=12345, n_jobs=1, verbose=1) model.fit(csr_matrix( Xn) ,yn ) preds=model.predict(x_test) print ("restacking rmse test 6 , rmse %f " % (rmse(y_test,preds))) ################## without retraining ############################### model=StackNetRegressor(models, metric="rmse", folds=4, restacking=True, use_retraining=False, random_state=12345, n_jobs=1, verbose=1) model.fit(csr_matrix( Xn) ,yn ) preds=model.predict(x_test) print ("no retraining rmse test 7, rmse %f " % (rmse(y_test,preds))) ################## custom k folder object ############################### k=StratifiedKFold(yn, n_folds=4, shuffle=True, random_state=1251) model=StackNetRegressor(models, metric="rmse", folds=k, restacking=True, use_retraining=False,random_state=12345, n_jobs=1, verbose=1) model.fit(csr_matrix( Xn) ,yn ) preds=model.predict(x_test) print ("custom kfold rmse test 8, %f " % (rmse(y_test,preds))) ################## classifier in base level ############################### models_class=[ [RandomForestRegressor(n_estimators=100, max_depth=5, max_features=0.5, random_state=1), ExtraTreesClassifier (n_estimators=100, max_depth=5, max_features=0.5, random_state=1), GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, max_depth=5, max_features=0.5, random_state=1), Ridge(random_state=1) ], [RandomForestRegressor (n_estimators=200, max_depth=5, max_features=0.5, random_state=1)] ] model=StackNetRegressor(models_class, metric="rmse", folds=4, restacking=False, use_retraining=True, random_state=12345, n_jobs=1, verbose=1) model.fit(x_train,y_train ) preds=model.predict(x_test) print ("with regressor test 9, rmse %f " % (rmse(y_test,preds))) ################## transformer in base level ############################### models_pca=[ [RandomForestRegressor (n_estimators=100, max_depth=5, max_features=0.5, random_state=1), ExtraTreesRegressor (n_estimators=100, max_depth=5, max_features=0.5, random_state=1), GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, max_depth=5, max_features=0.5, random_state=1), Ridge(random_state=1), PCA(n_components=4,random_state=1) ], [RandomForestRegressor(n_estimators=200, max_depth=5, max_features=0.5, random_state=1)] ] model=StackNetRegressor(models_pca, metric="rmse", folds=4, restacking=False, use_retraining=True, random_state=12345, n_jobs=1, verbose=1) model.fit(x_train,y_train ) preds=model.predict(x_test) print ("with PCA test 10 , rmse %f " % (rmse(y_test,preds))) ################## 2d target ############################### models2=[ [RandomForestRegressor(n_estimators=100, max_depth=5, max_features=0.5, random_state=1), ExtraTreesRegressor (n_estimators=100, max_depth=5, max_features=0.5, random_state=1), #GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, max_depth=5, max_features=0.5, random_state=1), Ridge(random_state=1) ], [RandomForestRegressor(n_estimators=200, max_depth=5, max_features=0.5, random_state=1)] ] model=StackNetRegressor(models2, metric="rmse", folds=4, restacking=False, use_retraining=True, random_state=12345, n_jobs=1, verbose=1) model.fit(x_train,np.column_stack((y_train,y2d[:100] ))) preds=model.predict(x_test) print ("rmse test 11 , rmse %f " % (rmse(np.column_stack((y_test,y2d[100:])),preds))) ################## 3 levels ############################### models3=[ [RandomForestRegressor(n_estimators=100, max_depth=5, max_features=0.5, random_state=1), ExtraTreesRegressor (n_estimators=100, max_depth=5, max_features=0.5, random_state=1), #GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, max_depth=5, max_features=0.5, random_state=1), Ridge(random_state=1) ], [ExtraTreesRegressor (n_estimators=100, max_depth=5, max_features=0.5, random_state=1), Ridge(random_state=1) ], [RandomForestRegressor(n_estimators=200, max_depth=5, max_features=0.5, random_state=1)] ] model=StackNetRegressor(models3, metric="rmse", folds=4, restacking=False, use_retraining=True, random_state=12345, n_jobs=1, verbose=1) model.fit(x_train,y2d[:100] ) preds=model.predict(x_test) print ("3 levels test 12 , rmse %f " % (rmse(y2d[100:],preds))) ################## with sample)weight ############################### model=StackNetRegressor(models, metric="rmse", folds=4, restacking=False, use_retraining=True, random_state=12345, n_jobs=1, verbose=1) model.fit(x_train,y_train,sample_weight=w_train ) preds=model.predict(x_test) print ("rmse weighted test 13 , %f " % (rmse(y_test,preds, sample_weight=w_test)))
beta_1=0.1, beta_2=0.1, epsilon=0.1) ] ] # leave 4 subject out kf = KFold(4) generator = kf.split(X_train, y_train) # build StackNet model = StackNetClassifier(models, metric="auc", folds=generator, restacking=False, use_retraining=True, use_proba=True, random_state=42, n_jobs=-1, verbose=1) # evaluate model model.fit(X_train, y_train) y_probs = model.predict_proba(X_test)[:, 1] # save score csv = pd.read_csv('./data/benchmark.csv') csv['Prediction'] = y_probs csv.to_csv('submission_StackNet.csv', index=False) print( '--------------------Submission file has been generated.--------------------------'
niveaux = dict() for name, models in selections.items(): les_models = list() for i in range(len(models)): les_models.append(models_1.get(models[i])) niveaux[name] = les_models pystacknet_model = list() for models in niveaux.values(): pystacknet_model.append(models) model = StackNetClassifier(pystacknet_model, metric=param_stacknet["metric"], folds=param_stacknet['folds'], restacking=param_stacknet['restacking'], use_retraining=param_stacknet['use_retraining'], use_proba=param_stacknet['use_proba'], random_state=param_stacknet['random_state'], n_jobs=param_stacknet['n_jobs'], verbose=param_stacknet['verbose']) if model: st.info("Génération du model StackNet est terminé") choix = st.checkbox("Afficher le datasetCovid") if choix: X_train, X_test, y_train, y_test = get_Covid_19() if st.checkbox("Affichez les shape"): st.text(X_train.shape) if st.checkbox("Evaluer") & choix: model.fit(X_train, y_train)
def test_pystacknet(): path = "" y, X = load_data(path, 'train.csv') y_test, X_test = load_data(path, 'test.csv', use_labels=False) # === one-hot encoding === # # we want to encode the category IDs encountered both in # the training and the test set, so we fit the encoder on both encoder = preprocessing.OneHotEncoder() encoder.fit(np.vstack((X, X_test))) X = encoder.transform(X) # Returns a sparse matrix (see numpy.sparse) X_test = encoder.transform(X_test) ##################################################################################### ############################### CLASSIFICATION ##################################### ##################################################################################### models = [[ LogisticRegression(C=1, random_state=1), LogisticRegression(C=3, random_state=1), Ridge(alpha=0.1, random_state=1), LogisticRegression(penalty="l1", C=1, random_state=1), XGBClassifier(max_depth=5, learning_rate=0.1, n_estimators=300, objective="binary:logistic", n_jobs=1, booster="gbtree", random_state=1, colsample_bytree=0.4), XGBClassifier(max_depth=5, learning_rate=0.3, reg_lambda=0.1, n_estimators=300, objective="binary:logistic", n_jobs=1, booster="gblinear", random_state=1, colsample_bytree=0.4), XGBClassifier(max_depth=5, learning_rate=0.1, n_estimators=300, objective="rank:pairwise", n_jobs=1, booster="gbtree", random_state=1, colsample_bytree=0.4), LGBMClassifier(boosting_type='gbdt', num_leaves=40, max_depth=-1, learning_rate=0.01, n_estimators=1000, subsample_for_bin=1000, objective="xentropy", min_split_gain=0.0, min_child_weight=0.01, min_child_samples=10, subsample=0.9, subsample_freq=1, colsample_bytree=0.5, reg_alpha=0.0, reg_lambda=0.0, random_state=1, n_jobs=1) ], [ RandomForestClassifier(n_estimators=300, criterion="entropy", max_depth=6, max_features=0.5, random_state=1) ]] ################## proba metric ############################### model = StackNetClassifier(models, metric="auc", folds=4, restacking=False, use_retraining=True, use_proba=True, random_state=12345, n_jobs=1, verbose=1) model.fit(X, y) preds = model.predict_proba(X_test)[:, 1] save_results(preds, path + "pystacknet_pred.csv")
def main(): # Download the data and split into training and test sets iris = load_iris() X = iris.data y = iris.target test_size = int(0.2 * len(y)) np.random.seed(13) indices = np.random.permutation(len(X)) X_train = X[indices[:-test_size]] y_train = y[indices[:-test_size]] X_test = X[indices[-test_size:]] y_test = y[indices[-test_size:]] # for other datas, there will more complex data clearning # list all machine learning algorithms for hyper params tuning MLA = { 'rfc': [ RandomForestClassifier(), #RandomForestClassifier (n_estimators=100, criterion="entropy", max_depth=5, max_features=0.5, random_state=1), { 'n_estimators': [50,100,200], #default=1.0 'criterion': ['entropy'], #edfault: auto 'max_depth': [4,5,6], #default:ovr #'min_samples_split': [5,10,.03,.05,.10], 'max_features': [.5], 'random_state': [1], }, random_forest('my_rfc'), ], 'etc': [ ExtraTreesClassifier(), #ExtraTreesClassifier (n_estimators=100, criterion="entropy", max_depth=5, max_features=0.5, random_state=1), { 'n_estimators': [50,100,200], #default=1.0 'criterion': ['entropy'], #edfault: auto 'max_depth': [4,5,6], #default:ovr 'max_features': [.5], 'random_state': [1], }, extra_trees('my_etc'), ], 'gbc': [ GradientBoostingClassifier(), #GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, max_depth=5, max_features=0.5, random_state=1), { #'loss': ['deviance', 'exponential'], 'learning_rate': [.1,.25,.5], 'n_estimators': [50,100,200], #'criterion': ['friedman_mse', 'mse', 'mae'], 'max_depth': [4,5,6], 'max_features': [.5], #'min_samples_split': [5,10,.03,.05,.10], #'min_samples_leaf': [5,10,.03,.05,.10], 'random_state': [1], }, gradient_boosting('my_rgc'), ], 'lr': [ LogisticRegression(), #LogisticRegression(random_state=1) { #'fit_intercept': grid_bool, #'penalty': ['l1','l2'], #'solver': ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga'], 'random_state': [1], }, ], 'svc': [ svm.SVC(), { #SVC - http://scikit-learn.org/stable/modules/generated/sklearn.svm.SVC.html#sklearn.svm.SVC #http://blog.hackerearth.com/simple-tutorial-svm-parameter-tuning-python-r #'kernel': ['linear', 'poly', 'rbf', 'sigmoid'], 'C': [1,2,3,4,5], #default=1.0 'gamma': [.1, .25, .5, .75, 1.0], #edfault: auto 'decision_function_shape': ['ovo', 'ovr'], #default:ovr 'probability': [True], 'random_state': [0] }, ], 'xgb': [ XGBClassifier(), { #XGBClassifier - http://xgboost.readthedocs.io/en/latest/parameter.html 'learning_rate': [.01, .03, .05, .1, .25], #default: .3 'max_depth': [1,2,4,6,8,10], #default 2 'n_estimators': [10, 50, 100, 300], 'seed': [0] }, ] } # list some algorithms for HyperoptEstimator, but error !!! #MLA2 = { #'rfc': [ #random_forest('my_rfc'), #], #'etc': [ #extra_trees('my_etc'), #], #'gbc': [ #gradient_boosting('my_rgc'), #], #} # list some algorithms for HyperoptEstimator, but error !!! def opt(clf): est = MLA[clf][0] # ---------want to use Hyperopt, but has some errors !!! #estim = HyperoptEstimator(classifier=MLA2[clf][0], #preprocessing=[], #algo=tpe.suggest, #max_evals=3, #trial_timeout=120) #estim.fit( X_train, y_train ) #est = estim # ---------want to use Hyperopt, but has some errors !!! # use GridSearchCV, it's too slow est = model_selection.GridSearchCV(estimator=est, param_grid=MLA[clf][1], cv=5) # --, scoring='roc_auc' return est # for StackNetClassifier #models=[ ######### First level ######## #[RandomForestClassifier(n_estimators=100, criterion="entropy", max_depth=5, max_features=0.5, random_state=1), #ExtraTreesClassifier(n_estimators=100, criterion="entropy", max_depth=5, max_features=0.5, random_state=1), #GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, max_depth=5, max_features=0.5, random_state=1), #LogisticRegression(random_state=1) #], ######### Second level ######## #[RandomForestClassifier(n_estimators=200, criterion="entropy", max_depth=5, max_features=0.5, random_state=1)] #] models=[ ######## First level ######## [ opt('rfc'), opt('etc'), #opt('gbc'), #opt('lr'), ], ######## Second level ######## [ opt('rfc'), ], ] # use StackNet to stacking the models StackNetmodel=StackNetClassifier(models, folds=4, # --metric="auc", restacking=False, use_retraining=True, use_proba=True, random_state=12345, n_jobs=1, verbose=1) StackNetmodel.fit(X_train, y_train)
######## Third level ######## [ RandomForestClassifier(n_estimators=200, criterion="entropy", max_depth=5, max_features=0.5, random_state=1), # LogisticRegression(random_state=1), ] ] from pystacknet.pystacknet import StackNetClassifier model = StackNetClassifier(models, metric=metric_self, folds=5, restacking=False, use_retraining=True, use_proba=True, random_state=12345, n_jobs=-1, verbose=1) model.fit(X_loc_train, y_loc_train) preds = model.predict_proba(X_loc_test)[:, 1] predict_result = test.loc[:, ['UID']] predict_result['Tag'] = preds now = datetime.datetime.now() now = now.strftime('%m-%d-%H-%M') predict_result[['UID', 'Tag']].to_csv("lgb_stacknet%s.csv" % now, index=False) print(predict_result.head())
n_jobs=-1, max_iter=15000, random_state=1234, tol=0.00001), min_features=6, max_features=7, scoring='roc_auc', print_progress=True, cv=5) ]] model = StackNetClassifier(models, metric="auc", folds=5, restacking=False, use_retraining=False, use_proba=True, random_state=555, n_jobs=1, verbose=2) model.fit(train.drop(TARGET_COL, axis=1), train[TARGET_COL]) test.shape y_pred = model.predict_proba(test[list(train.drop(TARGET_COL, axis=1).columns)].values) sample_submission = pd.read_csv('sb_test.csv')[[ 'encounter_id', 'hospital_death' ]]
######## Second level ######## [clf_lgb], ] # StackNetClassifier with GPU #you can convert dataframe to numpy array by .as_matrix() X_test = X_test.as_matrix() X_train = X_train.as_matrix() #then refit model, it is ok model = StackNetClassifier( models, metric="auc", folds=2, restacking=False, use_retraining=False, use_proba=True, random_state=42, verbose=1, ) model.fit(X_train, y_train) y_pred = model.predict_proba(X_test) pd.DataFrame(y_pred, columns=['predictions','isFraud']).to_csv('prediction StackNetClassifier.csv') #Neural Networks from sklearn.neural_network import MLPClassifier
subsample_freq=5, colsample_bytree=0.05, reg_alpha=0.1, reg_lambda=0.35, random_state=1, n_jobs=-1) ], [ RandomForestClassifier(n_estimators=300, criterion="entropy", max_depth=6, max_features=0.5, random_state=1) ]] model = StackNetClassifier(models, metric="auc", folds=5, restacking=False, use_retraining=True, use_proba=True, random_state=0, n_jobs=8, verbose=1) model.fit(train_df.iloc[:, 2:].values, train_df.iloc[:, 1].values) preds = model.predict_proba(test_df.iloc[:, 1:].values) sub = test_df.iloc[:, :2].drop(columns=['var_0']) sub['target'] = preds[:, 1] sub.to_csv('submission.csv', index=False)