X = pd.concat([X_cat_dummies, X_num, X_survey], axis=1) if TEST == True: n = X.shape[0] test_size = np.floor(n * 0.1) X = X.loc[0:test_size, :] ys = ys.loc[0:test_size, :] all_scores = {} for dependent_variable in y_variables: print('Running variables: ' + dependent_variable) y = ys[dependent_variable].values print('Running Linear Regression') # Linear Regression lr_cv_train_scores, lr_cv_test_scores = models.lr_model( X, y, dummies, num_demo + data_sets[key]) all_scores['lr_train_' + dependent_variable] = lr_cv_train_scores all_scores['lr_test_' + dependent_variable] = lr_cv_test_scores print('Running Random Forest') #Random Forest if TEST == True: param_grid = { 'max_depth': [15], 'min_samples_leaf': [20], 'min_samples_split': [20], 'n_estimators': [500] } else:
print("被筛选的特征数量", selector.n_features_) print("特征排名", selector.ranking_) columns = x_train.columns selects = [columns[i] for i, j in enumerate(selector.support_) if j] cat_features = [ "is_festival_user", "is_LAST_2YEAR_DD_ACTIVE", "cafe_tag_is_mop_available", "IS_SR_KIT_USER", ] x_train = x_train[selects] x_test = x_test[selects] x_btest = df_btest[selects] adaboost_model(x_train, x_test, y_train, y_test, x_btest, df_btest[labels]) lr_model(x_train, x_test, y_train, y_test, x_btest, df_btest[labels]) gbdt_mdoel(x_train, x_test, y_train, y_test, x_btest, df_btest[labels]) xgb_model(x_train, x_test, y_train, y_test, x_btest, df_btest[labels]) lgb_model(x_train, x_test, y_train, y_test, x_btest, df_btest[labels]) cat_boost_model(x_train, x_test, y_train, y_test, x_btest, df_btest[labels], cat_features=cat_features) # from sklearn.feature_selection import RFECV # x = df_train.copy() # clf1 = RandomForestClassifier()
# x1 = X_train_tra[X_train_tra[labels] == 1] # x0 = x0.sample(n=None, frac=0.33, replace=False, weights=None, random_state=0, axis=0) # X_train_tra = pd.concat([x0, x1], axis=0) # 划分label x_train, y_train = seperate_label(X_train_tra, label=labels) x_test, y_test = seperate_label(X_test_tra, label=labels) print("x_train", x_train.shape) # #模型训练 # adaboost模型 adaboost_model(x_train, x_test, y_train, y_test, df_btest.drop(labels, axis=1), df_btest[labels]) # LR模型 lr_model(x_train, x_test, y_train, y_test, df_btest.drop(labels, axis=1), df_btest[labels]) # # rf模型 rf_mdoel(x_train, x_test, y_train, y_test, df_btest.drop(labels, axis=1), df_btest[labels]) # # # # # gbdt模型 gbdt_mdoel(x_train, x_test, y_train, y_test, df_btest.drop(labels, axis=1), df_btest[labels]) # xgb模型 xgb_model(x_train, x_test, y_train, y_test, df_btest.drop(labels, axis=1), df_btest[labels]) # lgb模型 lgb_model(x_train, x_test,