コード例 #1
0
ファイル: rf.py プロジェクト: catwang01/kaggle
    'n_estimators': 500,
    'max_depth': 3,
    'random_state': SEED
}

tuned_params = {
    'n_estimators': [400, 600, 800, 1000, 1200],
    'max_depth': [3, 4, 5, 6, 7],
    'random_state': [1, 2, 3, 4, 5]
}

X_train, y_train, X_test, test_id, feature_names = load_data(processedDataPath)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, random_state=SEED, test_size=TEST_SIZE)
trainer = ortTrainer(modelClass=RandomForestClassifier,
                     params=other_params,
                     tuned_params=tuned_params,
                     isupdate=True, istune=True,
                     modelName='rf', cv=5)

trainer.fit(X_train, y_train)
output = trainer.getOutput(X_test, test_id, X_val, y_val)

bestResult = pd.read_csv('currentBest.txt', header=None, sep='\t')
bestResult.columns = ['id', 'prob']

pccs = pearsonr(output['prob'], bestResult['prob'])
print("Score prediction: {}".format(pccs))
print(trainer.bestParams)

sorted_importances, sorted_featurenames = zip(
    *sorted(zip(trainer.model.feature_importances_, feature_names), reverse=True)
コード例 #2
0
tuned_params = {
    "learning_rate": [0.02, 0.2, 0.5, 0.9],  # shrinkage
    "max_depth": [8, 12, 15, 20],
    "subsample": [0.6, 0.7, 0.8, 0.9],
    "colsample_bytree": [0.3, 0.5, 0.7, 0.9],
}


X_train, y_train, X_test, test_id, feature_names = load_data(processedDataPath)

X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, random_state=SEED, test_size=TEST_SIZE)

trainer = ortTrainer(modelClass=XGBClassifier ,
                     params=params,
                     tuned_params=tuned_params,
                     isupdate=True,
                     istune=True,
                     modelName='xgb-ordinal')

trainer.fit( X_train, y_train,
    eval_set=[(X_val, y_val)],
    early_stopping_rounds=100,
    eval_metric="auc",
    verbose=True
)

trainer.getOutput(X_test, test_id, X_val, y_val)



コード例 #3
0
ファイル: stack.py プロジェクト: catwang01/kaggle
                       modelName='adb', cv=5)


trainers = []
adbtrainer = TrainerWrapper(baseTrainer, adbtrainer_params)
trainers.append(adbtrainer)

#extratrainer= TrainerWrapper(baseTrainer, extratrainer_params)
rftrainer = TrainerWrapper(ortTrainer, rftrainer_params)
trainers.append(rftrainer)

lgbtrainer = XgBoostWrapper(ortTrainer, lgbtrainer_params)
trainers.append(lgbtrainer)

features = [get_oof(trainer, X_train, y_train, X_test) for trainer in trainers]

X_train_new = np.hstack(feature[0] for feature in features)
X_test_new = np.hstack(feature[1] for feature in features)

finaltrainer = ortTrainer(modelClass=LGBMClassifier,
                        params=lgb_params, tuned_params=lgb_tune_params,
                        isupdate=True, istune=True,
                        modelName='final', cv=5)

finaltrainer.fit(X_train_new, y_train)

_, X_val_new, _, y_val_new = train_test_split(X_train_new, y_train, random_state=SEED)
finaltrainer.getOutput(X_test_new, test_id, X_val_new, y_val_new)

print("haha")
コード例 #4
0
from baseModel import ortTrainer
from path import *
from utils import load_data
from sklearn.linear_model import LogisticRegression

other_params = {
    'n_jobs': 1,
    'random_state': 1,
    'C': 0.3
}

tuned_params = {
    'C': [0.01, 0.05, 0.1, 0.3, 0.5, 0.7]
}

X_train, y_train, X_test, test_id, feature_names = load_data(processedDataPath)
trainer = ortTrainer(modelClass=LogisticRegression,
                      params=other_params,
                      tuned_params=tuned_params,
                      isupdate=True,
                      istune=True,
                      modelName='lr')

trainer.fit(X_train, y_train)
trainer.getOutput()
コード例 #5
0
ファイル: lgb.py プロジェクト: catwang01/kaggle
}

tuned_params = {
    "learning_rate": [0.02, 0.2, 0.5, 0.8],  # shrinkage
    "max_depth": [8, 12, 15, 20],
    "subsample": [0.5, 0.6, 0.7, 0.8],
    'reg_lambda': [0.01, 0.2, 0.5, 0.8],
    "colsample_bytree": [0.3, 0.5, 0.7, 0.8],
}

# 训练模型
X_train, y_train, X_test, test_id, feature_names = load_data(reinbalancedDataPath)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, random_state=SEED, test_size=TEST_SIZE)
trainer = ortTrainer(modelClass=LGBMClassifier,
                     params=params,
                     tuned_params=tuned_params,
                     isupdate=True,
                     istune=False,
                     modelName='lgb-ordinal')

trainer.fit( X_train, y_train,
    eval_set=[(X_val, y_val)],
    early_stopping_rounds=100,
    eval_metric="auc",
    verbose=True
)

trainer.getOutput(X_test, test_id, X_val, y_val)

print(trainer.bestParams)

dict(zip(trainer.model.feature_importances_, feature_names))