def test_focal_wce_comparison(): """ Assert that there is no difference between running Focal with alpha=3 and gamma=0 and running WCE with alpha=3. """ X, y = make_classification(n_samples=1000, n_features=10, random_state=41114) X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.25, random_state=41114) alpha = 3.0 gamma = 0 train = lgb.Dataset(X_train, y_train) valid = lgb.Dataset(X_valid, y_valid, reference=train) params_wfl = { 'n_estimators': 300, 'seed': 41114, 'n_jobs': 8, 'learning_rate': 0.1, } wfl_clf = lgb.train(params=params_wfl, train_set=train, valid_sets=[train, valid], valid_names=['train', 'valid'], fobj=WeightedFocalLoss(alpha=alpha, gamma=gamma), feval=WeightedFocalMetric(alpha=alpha, gamma=gamma), early_stopping_rounds=100) params_wce = { 'n_estimators': 300, 'seed': 41114, 'n_jobs': 8, 'learning_rate': 0.1, } wce_clf = lgb.train(params=params_wce, train_set=train, valid_sets=[train, valid], valid_names=['train', 'valid'], fobj=WeightedCrossEntropyLoss(alpha=alpha), feval=WeightedCrossEntropyMetric(alpha=alpha), early_stopping_rounds=100) wfl_preds = clip_sigmoid(wfl_clf.predict(X_valid)) wce_preds = clip_sigmoid(wce_clf.predict(X_valid)) assert mean_absolute_error(wfl_preds, wce_preds) == 0.0
def focal_metric(yhat, dtrain, alpha=alpha, gamma=gamma, XGBoost=XGBoost): """ Weighted Focal Loss Metric. Args: yhat: Predictions dtrain: The XGBoost / LightGBM dataset alpha (float): Scale applied gamma (float): Focusing parameter XGBoost (Bool): If XGBoost is to be implemented Returns: Name of the eval metric, Eval score, Bool to minimise function """ y = dtrain.get_label() yhat = clip_sigmoid(yhat) elements = (-alpha * y * np.log(yhat) * np.power(1 - yhat, gamma) - (1 - y) * np.log(1 - yhat) * np.power(yhat, gamma)) if XGBoost: return f'Focal_alpha{alpha}_gamma{gamma}', (np.sum(elements) / len(y)) else: return f'Focal_alpha{alpha}_gamma{gamma}', (np.sum(elements) / len(y)), False
def test_focal_lgb_implementation(): """ Assert that there is no difference between running Focal with alpha=1 and gamma=0 and LightGBM's internal CE loss. """ X, y = make_classification(n_samples=1000, n_features=10, random_state=41114) X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.25, random_state=41114) alpha = 1.0 gamma = 0 train = lgb.Dataset(X_train, y_train) valid = lgb.Dataset(X_valid, y_valid, reference=train) params_wfl = { 'n_estimators': 300, 'seed': 41114, 'n_jobs': 8, 'learning_rate': 0.1, } wfl_clf = lgb.train(params=params_wfl, train_set=train, valid_sets=[train, valid], valid_names=['train', 'valid'], fobj=WeightedFocalLoss(alpha=alpha, gamma=gamma), feval=WeightedFocalMetric(alpha=alpha, gamma=gamma), early_stopping_rounds=100) params = { 'n_estimators': 300, 'objective': 'cross_entropy', 'seed': 41114, 'n_jobs': 8, 'metric': 'cross_entropy', 'learning_rate': 0.1, 'boost_from_average': False } clf = lgb.train(params=params, train_set=train, valid_sets=[train, valid], valid_names=['train', 'valid'], early_stopping_rounds=100) wfl_preds = clip_sigmoid(wfl_clf.predict(X_valid)) preds = clf.predict(X_valid) assert mean_absolute_error(wfl_preds, preds) == 0.0
def _hessian(yhat, dtrain, alpha): """Compute the weighted cross-entropy hessian. Args: yhat (np.array): Margin predictions dtrain: The XGBoost / LightGBM dataset alpha (float): Scale applied Returns: hess: Weighted cross-entropy Hessian """ y = dtrain.get_label() yhat = clip_sigmoid(yhat) hess = (y * (alpha - 1) + 1) * yhat * (1 - yhat) return hess
def _gradient(yhat, dtrain, alpha): """Compute the weighted cross-entropy gradient. Args: yhat (np.array): Margin predictions dtrain: The XGBoost / LightGBM dataset alpha (float): Scale applied Returns: grad: Weighted cross-entropy gradient """ y = dtrain.get_label() yhat = clip_sigmoid(yhat) grad = (y * yhat * (alpha - 1)) + yhat - (alpha * y) return grad
def weighted_cross_entropy_metric(yhat, dtrain, alpha=alpha, XGBoost=XGBoost): """ Weighted Cross Entropy Metric. Args: yhat: Predictions dtrain: The XGBoost / LightGBM dataset alpha (float): Scale applied XGBoost (Bool): If XGBoost is to be implemented Returns: Name of the eval metric, Eval score, Bool to minimise function """ y = dtrain.get_label() yhat = clip_sigmoid(yhat) elements = - alpha * y * np.log(yhat) - (1 - y) * np.log(1 - yhat) if XGBoost: return f'WCE_alpha{alpha}', (np.sum(elements) / len(y)) else: return f'WCE_alpha{alpha}', (np.sum(elements) / len(y)), False
def _gradient(yhat, dtrain, alpha, gamma): """Compute the weighted focal gradient. Args: yhat (np.array): Margin predictions dtrain: The XGBoost / LightGBM dataset alpha (float): Scale applied gamma (float): Focusing parameter Returns: grad: Weighted Focal Loss gradient """ y = dtrain.get_label() yhat = clip_sigmoid(yhat) grad = ( alpha * y * np.power(1 - yhat, gamma) * (gamma * yhat * np.log(yhat) + yhat - 1) + (1 - y) * np.power(yhat, gamma) * (yhat - gamma * np.log(1 - yhat) * (1 - yhat)) ) return grad
def _hessian(yhat, dtrain, alpha, gamma): """Compute the weighted focal hessian. Args: yhat (np.array): Margin predictions dtrain: The XGBoost / LightGBM dataset alpha (float): Scale applied gamma (float): Focusing parameter Returns: hess: Weighted Focal Loss Hessian """ y = dtrain.get_label() yhat = clip_sigmoid(yhat) hess = ( alpha * y * yhat * np.power(1 - y, gamma) * (gamma * (1 - yhat) * np.log(yhat) + 2 * gamma * (1 - yhat) - np.power(gamma, 2) * yhat * np.log(yhat) + 1 - yhat) + (1 - y) * np.power(yhat, gamma + 1) * (1 - yhat) * (2 * gamma + gamma * (np.log(1 - yhat)) + 1) ) return hess
def test_clip_sigmoid(): assert np.allclose(a=clip_sigmoid(np.array([100, 0, -100])), b=[1 - 1e-15, 0.5, 1e-15])