コード例 #1
0
params_est = {
    'n_estimators': 200,
    'loss': 'deviance',
    'learning_rate': 0.04,
    'subsample': 0.50,
    'min_samples_leaf': 60,
    'max_features': 4,
    'random_state': 1
}
bst7 = GradientBoostingClassifier(**params_est)
bst7.fit(X_train_xgb2, y_train)

sub_data = submiss[cols].values
submiss_val = xgb.DMatrix(sub_data, missing=np.NaN)
t1 = np.asarray([[i] for i in bst1.predict(submiss_val)])
t2 = np.asarray([[i] for i in bst2.predict_proba(submiss[cols].values)[:, 1]])
t3 = np.asarray([[i] for i in bst3.predict_proba(submiss[cols].values)[:, 1]])
t4 = np.asarray(bst4.predict_proba(submiss[cols_k2].values))
t5 = np.asarray([[i] for i in bst5.predict_proba(submiss_reg)[:, 1]])
t6 = np.asarray([[
    i
] for i in bst6.predict(xgb.DMatrix(submiss[xgb2_cols].values, missing=np.NaN))
                 ])
t7 = np.asarray([[i]
                 for i in bst7.predict_proba(submiss[xgb2_cols].values)[:, 1]])

tst_data = np.hstack((
    t1,
    t2,
    t3,
    t4,
コード例 #2
0
# Parameters
seed = 1337
nfolds = 5
test_size = 0.2
path_to_exec = "~/Documents/apps/LightGBM/lightgbm"
np.random.seed(seed)  # for reproducibility

X, Y = datasets.make_classification(n_samples=1000,
                                    n_features=100,
                                    random_state=seed)
x_train, x_test, y_train, y_test = model_selection.train_test_split(
    X, Y, test_size=test_size, random_state=seed)

# 'exec_path' is the path to lightgbm executable
clf = GBMClassifier(exec_path=path_to_exec,
                    num_iterations=1000,
                    learning_rate=0.01,
                    min_data_in_leaf=1,
                    num_leaves=5,
                    metric='binary_error',
                    early_stopping_round=20)

clf.fit(x_train, y_train, test_data=[(x_test, y_test)])

y_prob = clf.predict_proba(x_test)
y_pred = y_prob.argmax(-1)

print("Log loss: ", metrics.log_loss(y_test, y_prob))
print("Accuracy: ", metrics.accuracy_score(y_test, y_pred))
print("Best round: ", clf.best_round)
コード例 #3
0
ファイル: lgb_baseline.py プロジェクト: chepet/MLBootCampV
    min_sum_hessian_in_leaf=1e-4,
    num_iterations=5000,
    num_threads=4,
    early_stopping_round=EARLY_STOPPING,
    drop_rate=0.0001,
    max_depth=6,
    lambda_l1=0.,
    lambda_l2=0.,
    max_bin=63,
    feature_fraction=1.0,
    #bagging_fraction=0.5,
    #bagging_freq=3,
    verbose=True)
cl.fit(X_train, y_train, test_data=[(X_test, y_test)])

#</editor-fold>

#<editor-fold desc="Генерация сабмита">

if MAKE_SUBMISSION:
    print('Computing submission probabilities...')
    y_submission = cl.predict_proba(x_test)[:, 1]
    print('Store submission data')
    submission_filename = os.path.join(submission_folder,
                                       'submission_lightgbm.dat')
    store_submission(y_submission, submission_filename)
    print(
        'Submission data have been stored in {}\n'.format(submission_filename))

#</editor-fold>