コード例 #1
0
class LightGBM(BaseAlgo):

    default_params = {'exec_path': 'lightgbm', 'num_threads': 4}

    def __init__(self, params):
        self.params = self.default_params.copy()

        for k in params:
            self.params[k] = params[k]

    def fit(self,
            X_train,
            y_train,
            X_eval=None,
            y_eval=None,
            seed=42,
            feature_names=None,
            eval_func=None,
            **kwa):
        params = self.params.copy()
        params['bagging_seed'] = seed
        params['feature_fraction_seed'] = seed + 3

        self.model = GBMClassifier(**params)

        if X_eval is None:
            self.model.fit(X_train, y_train)
        else:
            self.model.fit(X_train, y_train, test_data=[(X_eval, y_eval)])

    def predict(self, X):
        return self.model.predict(X)

    def predict_proba(self, X):
        return self.model.predict(X)
コード例 #2
0
    def test_multiclass(self):

        clf = GBMClassifier(exec_path=path_to_exec,
                            min_data_in_leaf=1,
                            learning_rate=0.1,
                            num_leaves=5,
                            num_class=n_classes,
                            metric='multi_logloss',
                            application='multiclass',
                            num_iterations=100)
        clf.fit(Xmulti, Ymulti.argmax(-1))
        clf.fit(Xmulti,
                Ymulti.argmax(-1),
                test_data=[(Xmulti, Ymulti.argmax(-1))])
        score = metrics.accuracy_score(Ymulti.argmax(-1), clf.predict(Xmulti))
        assert score > 0.8
コード例 #3
0
                    num_iterations=1000,
                    min_data_in_leaf=1,
                    num_leaves=10,
                    metric='binary_error',
                    learning_rate=0.1,
                    early_stopping_round=10,
                    verbose=False)

best_rounds = []
scores = []
for i, (train_idx, valid_idx) in enumerate(skf.split(X, Y)):
    x_train = X[train_idx, :]
    y_train = Y[train_idx]

    x_valid = X[valid_idx, :]
    y_valid = Y[valid_idx]

    clf.fit(x_train, y_train, test_data=[(x_valid, y_valid)])
    best_round = clf.best_round
    best_rounds.append(best_round)

    y_pred = clf.predict(x_valid)

    score = metrics.accuracy_score(y_valid, y_pred)
    scores.append(score)

    print("Fold: [{}/{}]: Accuracy: {:.3f}, best round: {}".format(
        i + 1, skf.n_splits, score, best_round))
print("Average: accuracy: {:.3f}, best round: {}".format(
    np.mean(scores), int(np.mean(best_rounds))))
コード例 #4
0
# Parameters
seed = 1337
nfolds = 5
test_size = 0.2
path_to_exec = "~/Documents/apps/LightGBM/lightgbm"
np.random.seed(seed)  # for reproducibility

X, Y = datasets.make_classification(n_samples=1000,
                                    n_features=100,
                                    random_state=seed)
x_train, x_test, y_train, y_test = model_selection.train_test_split(
    X, Y, test_size=test_size, random_state=seed)

# 'exec_path' is the path to lightgbm executable
clf = GBMClassifier(exec_path=path_to_exec,
                    num_iterations=1000,
                    learning_rate=0.01,
                    min_data_in_leaf=1,
                    num_leaves=5,
                    metric='binary_error',
                    early_stopping_round=20)

clf.fit(x_train, y_train, test_data=[(x_test, y_test)])

y_prob = clf.predict_proba(x_test)
y_pred = y_prob.argmax(-1)

print("Log loss: ", metrics.log_loss(y_test, y_prob))
print("Accuracy: ", metrics.accuracy_score(y_test, y_pred))
print("Best round: ", clf.best_round)
コード例 #5
0
bst1 = xgb.train(params, dtrain, params['n'])
# ------------------------------------------------------------------
params = {
    'exec_path': path_to_exec,
    'num_iterations': 108,
    'learning_rate': 0.079,
    'num_leaves': 13,
    'metric': 'binary_error',
    'min_sum_hessian_in_leaf': 1,
    'bagging_fraction': 0.642,
    'bagging_freq': 1,
    'verbose': 0
}

bst2 = GBMClassifier(boosting_type='gbdt', **params)
bst2.fit(X_train, y_train)
# ------------------------------------------------------------------
params_est = {
    'n_estimators': 300,
    'loss': 'exponential',
    'learning_rate': 0.08,
    'subsample': 0.6910000000000001,
    'min_samples_leaf': 340,
    'max_features': 53,
    'random_state': 1
}
bst3 = GradientBoostingClassifier(**params_est)
bst3.fit(X_train, y_train)
# ------------------------------------------------------------------
from keras.callbacks import Callback as keras_clb
random.seed(666)
コード例 #6
0
# -*- coding: utf-8 -*-
"""
@author: Ardalan MEHRANI <*****@*****.**>
@brief:
"""
import pickle
import numpy as np
from sklearn import datasets, metrics, model_selection
from pylightgbm.models import GBMClassifier

# Parameters
path_to_exec = "~/Documents/apps/LightGBM/lightgbm"

X, Y = datasets.make_classification(n_samples=1000,
                                    n_features=100,
                                    random_state=1337)

# 'exec_path' is the path to lightgbm executable
clf = GBMClassifier(exec_path=path_to_exec, verbose=False)

clf.fit(X, Y)

y_pred = clf.predict(X)

print("Accuracy: ", metrics.accuracy_score(Y, y_pred))

# The sklearn API models are picklable
print("Pickling sklearn API models")
pickle.dump(clf, open("clf_gbm.pkl", "wb"))
clf2 = pickle.load(open("clf_gbm.pkl", "rb"))
print(np.allclose(clf.predict(X), clf2.predict(X)))
コード例 #7
0
ファイル: lgb_baseline.py プロジェクト: chepet/MLBootCampV
    num_leaves=64,
    min_data_in_leaf=1,
    min_sum_hessian_in_leaf=1e-4,
    num_iterations=5000,
    num_threads=4,
    early_stopping_round=EARLY_STOPPING,
    drop_rate=0.0001,
    max_depth=6,
    lambda_l1=0.,
    lambda_l2=0.,
    max_bin=63,
    feature_fraction=1.0,
    #bagging_fraction=0.5,
    #bagging_freq=3,
    verbose=True)
cl.fit(X_train, y_train, test_data=[(X_test, y_test)])

#</editor-fold>

#<editor-fold desc="Генерация сабмита">

if MAKE_SUBMISSION:
    print('Computing submission probabilities...')
    y_submission = cl.predict_proba(x_test)[:, 1]
    print('Store submission data')
    submission_filename = os.path.join(submission_folder,
                                       'submission_lightgbm.dat')
    store_submission(y_submission, submission_filename)
    print(
        'Submission data have been stored in {}\n'.format(submission_filename))