Esempio n. 1
0
class LGBMWrapper(BaseEstimator, RegressorMixin):
    def __init__(self, categorical_feature=None, **params):
        self.model = LGBMRegressor(**params)
        self.categorical_feature = categorical_feature

    def fit(self, X, y):
        with warnings.catch_warnings():
            cats = None if self.categorical_feature is None else list(
                X.columns.intersection(self.categorical_feature))
            warnings.filterwarnings(
                "ignore",
                "categorical_feature in Dataset is overridden".lower())
            self.model.fit(
                X, y, **({} if cats is None else {
                    "categorical_feature": cats
                }))
            self.feature_importances_ = self.model.feature_importances_
            return self

    def predict(self, X):
        return self.model.predict(X)

    def get_params(self, deep=True):
        return {
            **self.model.get_params(deep), "categorical_feature":
            self.categorical_feature
        }

    def set_params(self, **params):
        ctf = params.pop("categorical_feature", None)
        if ctf is not None: self.categorical_feature = ctf
        self.model.set_params(params)
class LGBMUncertainty(BaseEstimator, RegressorMixin):
    def __init__(self, **kwargs):
        self.lgb = LGBMRegressor(**kwargs)

    def fit(self, X, y):
        self.lgb.fit(X, y)
        return self

    def predict(self, X, y=None):
        pred = self.lgb.predict(X, pred_leaf=True)

        ind_pred = []
        for row in pred:
            ind_pred.append([
                self.lgb.booster_.get_leaf_output(i, j)
                for i, j in enumerate(row)
            ])
        ind_pred = np.vstack(ind_pred)

        pred_mean = ind_pred.sum(axis=1)
        pred_std = ind_pred.std(axis=1)

        return pred_mean, pred_std

    def get_params(self, deep=True):
        return self.lgb.get_params()

    def set_params(self, **params):
        self.lgb.set_params(**params)
        return self
X = fullDF.filter(regex='feature\.|(spf|dkim|dmarc)\.(align|pass)',
                  axis=1).astype('int')
yS = fullDF['target_score'].astype('float')

RAND_STATE = 27
lgbm_base_params = {
    'random_state': RAND_STATE,
    'learning_rate': 0.3,
    'num_leaves': 31,
    'n_estimators': 750,
    'reg_lambda': 1
}

# (Re-) train a model
rgbm = LGBMRegressor()
rgbm.set_params(**lgbm_base_params)
rgbm.fit(X, yS)
jgbm = rgbm._Booster.dump_model(num_iteration=-1)
num_features = len(jgbm['feature_names'])

# Warming up LRU cache
for i in range(1000):
    _ = _lru_fac(i)

for inc in [True, False]:
    for cs in range(1, int(num_features / 2)):
        _ = _coalition_quotient_numerator(cs, num_features, inc)

# Feature Power
first_tree_default_prediction = rgbm.predict(X.iloc[0:1, :],
                                             pred_contrib=True)[-1][-1]