Ejemplo n.º 1
0
    def __train_qreg(self, X, y):
        self.params["objective"] = "quantile"

        quantile_alphas = [0.05, 0.5, 0.95]

        lgb_quantile_alphas = []
        for quantile_alpha in tqdm(quantile_alphas, desc="Training quantiles"):
            lgb = LGBMRegressor(alpha=quantile_alpha, **self.params)
            lgb.fit(X, y)
            lgb_quantile_alphas.append(lgb)

        return lgb_quantile_alphas
Ejemplo n.º 2
0
def run_cv(x_train, x_test, y_test, y_train, conf):
    tic = time.time()
    data_message = 'x_train.shape={}, x_test.shape={}'.format(
        x_train.shape, x_test.shape)
    log.logger.info(data_message)

    lgb = LightGBM(conf)
    lgb_model, best_score, best_round = lgb.fit(x_train, y_train)
    log.logger.info('Time cost {}s'.format(time.time() - tic))
    result_message = 'best_round={}, best_score={}'.format(
        best_round, best_score)
    log.logger.info(result_message)

    # predict
    now = time.strftime("%m%d-%H%M%S")
    result_path = 'result/result_lgb_{}-{:.4f}.csv'.format(now, best_round)
    # check_path(result_path)
    lgb_predict(lgb_model, conf, x_test, y_test, save_result_path=None)
Ejemplo n.º 3
0
test_accuracy_gb = metrics.accuracy_score(y_test, y_preds)
test_recall_gb = metrics.recall_score(y_test, y_preds)

print('Test Accuracy:', test_accuracy_gb)
print('Test Sensitivity:', test_recall_gb)

#Light GBM Classifier

import lightgbm as lgb
from lightgbm import LGBMClassifier

lgb = LGBMClassifier(max_depth=7,
                        num_leaves=36,
                        learning_rate=0.4)

model = lgb.fit(X_train, y_train)

y_preds = model.predict(X_test)


model = lgb.fit(X_train, y_train)
y_preds=model.predict(X_test)

lgb_cv_score = cross_val_score(lgb, X,y,cv=5)
test_accuracy_lgb = metrics.accuracy_score(y_preds, y_test)
test_recall_lgb = metrics.recall_score(y_preds, y_test)

print('CV Score:', lgb_cv_score.mean())
print('Test Accuracy:', test_accuracy_lgb)
print('Test Sensitivity:', test_recall_lgb)
Ejemplo n.º 4
0
                         n_estimators=800,
                         objective='binary',
                         subsample_freq=1,
                         bagging_fraction=0.6,
                         feature_fraction=0.6,
                         learning_rate=0.01,
                         min_child_weight=2,
                         random_state=20,
                         n_jobs=4)

lr = LogisticRegression()

lr.fit(train_x, train_y)
lr_pred = lr.predict_proba(test_x)[:, 1]

lgb.fit(train_x, train_y)
lgb_pred = lgb.predict_proba(test_x)[:, 1]

gbdt.fit(train_x, train_y)
gbdt_pred = gbdt.predict_proba(test_x)[:, 1]

xgb.fit(train_x, train_y)
xgb_pred = xgb.predict_proba(test_x)[:, 1]

y_pred = 0.7 * lgb_pred + 0.15 * xgb_pred + 0.15 * gbdt_pred

auc = roc_auc_score(test_y, y_pred)
print("xgboost+lightgbm+gbdt的加权auc是{}".format(auc))

mine = MINE()
mine.compute_score(lr_pred, xgb_pred)
Ejemplo n.º 5
0
print(grid.best_params_)
print(grid.best_score_)

lgb_params['reg_alpha'] = grid.best_params['reg_alpha']
lgb_params['reg_lambda'] = grid.best_params['reg_lambda']
lgb_params['colsample_bytree'] = grid.best_params['colsample_bytree']
lgb_params['colsample_bytree'] = grid.best_params['colsample_bytree']
lgb_params['n_estimators'] = grid.best_params_['n_estimators']
lgb.set_params(**lgb_params)
'''

X = train.drop(['target'],axis=1)
test = test.drop(['target'],axis=1)
Y = train['target'].values



lgb.fit(X,Y,verbose=False)
pred = lgb.predict(test)
print(len(pred))
submission = pd.DataFrame({'ID' : range(0,len(pred)),'item_cnt_month': pred})
submission.to_csv(SUBMISSION_FILE,index=False)
print('Process Complete {:.4f}'.format((time.time() - start_time)/60))







Ejemplo n.º 6
0
parameters = {
    'objective': 'binary',
    'metric': 'binary_logloss',
    'n_estimators': 100,
    'max_depth': 6,
    'min_child_weight': 1.25,
    'is_unbalance': 'false',
    'scale_pos_weight': 175,
    'num_leaves': 70,
    'feature_fraction': 0.9,
}

gbm = lightgbm.fit(parameters,
                   train_data,
                   valid_sets=test_data,
                   feval=mcc_eval,
                   early_stopping_rounds=50)

print(gbm.feature_importance())
#save model
filename = './gbm.pkl'
joblib.dump(gbm, filename)

preds = np.ones(y_test.shape[0])
preds = gbm.predict_proba(X_test)[:, 1]
print(roc_auc_score(y_test, preds))
#calculate best threshold
thresholds = np.linspace(0.1, 0.99, 50)
mcc = np.array([matthews_corrcoef(y_test, preds > thr) for thr in thresholds])
plt.plot(thresholds, mcc)
Ejemplo n.º 7
0
# y_pred = classifier.predict(X_test)
# # Gini
# gini_norm = gini_normalized(y_test, y_pred)
# print 'gini coef for ElasticNet=', gini_norm
# =============================================================================


# ### LightGBM

# In[8]:


import lightgbm as lgb
lgb = lgb.LGBMClassifier()
print('Start training...')
lgb.fit(X_train, y_train)
print('Start predicting...')
y_pred = lgb.predict_proba(X_test)[:,1]

# Gini
gini_norm = gini_normalized(y_test, y_pred)
print 'gini coef for Lightgbm=', gini_norm


# ## Stacking

# In[4]:


from sklearn.model_selection import StratifiedKFold
import lightgbm as lgb