'n_estimators': 500, 'max_depth': 3, 'random_state': SEED } tuned_params = { 'n_estimators': [400, 600, 800, 1000, 1200], 'max_depth': [3, 4, 5, 6, 7], 'random_state': [1, 2, 3, 4, 5] } X_train, y_train, X_test, test_id, feature_names = load_data(processedDataPath) X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, random_state=SEED, test_size=TEST_SIZE) trainer = ortTrainer(modelClass=RandomForestClassifier, params=other_params, tuned_params=tuned_params, isupdate=True, istune=True, modelName='rf', cv=5) trainer.fit(X_train, y_train) output = trainer.getOutput(X_test, test_id, X_val, y_val) bestResult = pd.read_csv('currentBest.txt', header=None, sep='\t') bestResult.columns = ['id', 'prob'] pccs = pearsonr(output['prob'], bestResult['prob']) print("Score prediction: {}".format(pccs)) print(trainer.bestParams) sorted_importances, sorted_featurenames = zip( *sorted(zip(trainer.model.feature_importances_, feature_names), reverse=True)
tuned_params = { "learning_rate": [0.02, 0.2, 0.5, 0.9], # shrinkage "max_depth": [8, 12, 15, 20], "subsample": [0.6, 0.7, 0.8, 0.9], "colsample_bytree": [0.3, 0.5, 0.7, 0.9], } X_train, y_train, X_test, test_id, feature_names = load_data(processedDataPath) X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, random_state=SEED, test_size=TEST_SIZE) trainer = ortTrainer(modelClass=XGBClassifier , params=params, tuned_params=tuned_params, isupdate=True, istune=True, modelName='xgb-ordinal') trainer.fit( X_train, y_train, eval_set=[(X_val, y_val)], early_stopping_rounds=100, eval_metric="auc", verbose=True ) trainer.getOutput(X_test, test_id, X_val, y_val)
modelName='adb', cv=5) trainers = [] adbtrainer = TrainerWrapper(baseTrainer, adbtrainer_params) trainers.append(adbtrainer) #extratrainer= TrainerWrapper(baseTrainer, extratrainer_params) rftrainer = TrainerWrapper(ortTrainer, rftrainer_params) trainers.append(rftrainer) lgbtrainer = XgBoostWrapper(ortTrainer, lgbtrainer_params) trainers.append(lgbtrainer) features = [get_oof(trainer, X_train, y_train, X_test) for trainer in trainers] X_train_new = np.hstack(feature[0] for feature in features) X_test_new = np.hstack(feature[1] for feature in features) finaltrainer = ortTrainer(modelClass=LGBMClassifier, params=lgb_params, tuned_params=lgb_tune_params, isupdate=True, istune=True, modelName='final', cv=5) finaltrainer.fit(X_train_new, y_train) _, X_val_new, _, y_val_new = train_test_split(X_train_new, y_train, random_state=SEED) finaltrainer.getOutput(X_test_new, test_id, X_val_new, y_val_new) print("haha")
from baseModel import ortTrainer from path import * from utils import load_data from sklearn.linear_model import LogisticRegression other_params = { 'n_jobs': 1, 'random_state': 1, 'C': 0.3 } tuned_params = { 'C': [0.01, 0.05, 0.1, 0.3, 0.5, 0.7] } X_train, y_train, X_test, test_id, feature_names = load_data(processedDataPath) trainer = ortTrainer(modelClass=LogisticRegression, params=other_params, tuned_params=tuned_params, isupdate=True, istune=True, modelName='lr') trainer.fit(X_train, y_train) trainer.getOutput()
} tuned_params = { "learning_rate": [0.02, 0.2, 0.5, 0.8], # shrinkage "max_depth": [8, 12, 15, 20], "subsample": [0.5, 0.6, 0.7, 0.8], 'reg_lambda': [0.01, 0.2, 0.5, 0.8], "colsample_bytree": [0.3, 0.5, 0.7, 0.8], } # 训练模型 X_train, y_train, X_test, test_id, feature_names = load_data(reinbalancedDataPath) X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, random_state=SEED, test_size=TEST_SIZE) trainer = ortTrainer(modelClass=LGBMClassifier, params=params, tuned_params=tuned_params, isupdate=True, istune=False, modelName='lgb-ordinal') trainer.fit( X_train, y_train, eval_set=[(X_val, y_val)], early_stopping_rounds=100, eval_metric="auc", verbose=True ) trainer.getOutput(X_test, test_id, X_val, y_val) print(trainer.bestParams) dict(zip(trainer.model.feature_importances_, feature_names))