class LGBMWrapper(BaseEstimator, RegressorMixin): def __init__(self, categorical_feature=None, **params): self.model = LGBMRegressor(**params) self.categorical_feature = categorical_feature def fit(self, X, y): with warnings.catch_warnings(): cats = None if self.categorical_feature is None else list( X.columns.intersection(self.categorical_feature)) warnings.filterwarnings( "ignore", "categorical_feature in Dataset is overridden".lower()) self.model.fit( X, y, **({} if cats is None else { "categorical_feature": cats })) self.feature_importances_ = self.model.feature_importances_ return self def predict(self, X): return self.model.predict(X) def get_params(self, deep=True): return { **self.model.get_params(deep), "categorical_feature": self.categorical_feature } def set_params(self, **params): ctf = params.pop("categorical_feature", None) if ctf is not None: self.categorical_feature = ctf self.model.set_params(params)
class LGBMUncertainty(BaseEstimator, RegressorMixin): def __init__(self, **kwargs): self.lgb = LGBMRegressor(**kwargs) def fit(self, X, y): self.lgb.fit(X, y) return self def predict(self, X, y=None): pred = self.lgb.predict(X, pred_leaf=True) ind_pred = [] for row in pred: ind_pred.append([ self.lgb.booster_.get_leaf_output(i, j) for i, j in enumerate(row) ]) ind_pred = np.vstack(ind_pred) pred_mean = ind_pred.sum(axis=1) pred_std = ind_pred.std(axis=1) return pred_mean, pred_std def get_params(self, deep=True): return self.lgb.get_params() def set_params(self, **params): self.lgb.set_params(**params) return self
X = fullDF.filter(regex='feature\.|(spf|dkim|dmarc)\.(align|pass)', axis=1).astype('int') yS = fullDF['target_score'].astype('float') RAND_STATE = 27 lgbm_base_params = { 'random_state': RAND_STATE, 'learning_rate': 0.3, 'num_leaves': 31, 'n_estimators': 750, 'reg_lambda': 1 } # (Re-) train a model rgbm = LGBMRegressor() rgbm.set_params(**lgbm_base_params) rgbm.fit(X, yS) jgbm = rgbm._Booster.dump_model(num_iteration=-1) num_features = len(jgbm['feature_names']) # Warming up LRU cache for i in range(1000): _ = _lru_fac(i) for inc in [True, False]: for cs in range(1, int(num_features / 2)): _ = _coalition_quotient_numerator(cs, num_features, inc) # Feature Power first_tree_default_prediction = rgbm.predict(X.iloc[0:1, :], pred_contrib=True)[-1][-1]