def rmsle(predt: np.ndarray, dtrain: xgb.DMatrix) -> [str, float]: ''' Root mean squared log error metric.''' y = dtrain.get_label() price = dtrain.get_weight() predt[predt < -1] = -1 + 1e-6 elements = np.power(np.log1p(y) - np.log1p(predt), 2) return 'PyRMSLE', float(np.sqrt(np.sum(elements) / (len(y))))
def log_focal_loss_obj(preds: np.ndarray, dtrain: xgb.DMatrix): labels = dtrain.get_label() # print(preds.shape) kRows, kClasses = preds.shape if dtrain.get_weight().size == 0: # Use 1 as weight if we don't have custom weight. weights = np.ones((kRows, 1), dtype=float) else: weights = dtrain.get_weight() grad = np.zeros((kRows, kClasses), dtype=float) hess = np.zeros((kRows, kClasses), dtype=float) for r in range(kRows): #print(preds[r]) target = int(labels[r]) assert target >= 0 or target <= kClasses p = softmax(preds[r, :]) grad_r, hess_r = focal_logloss_derivative_gamma2(p, target) grad[r] = grad_r * weights[r] hess[r] = hess_r * weights[r] # Right now (XGBoost 1.0.0), reshaping is necessary grad = grad.reshape((kRows * kClasses, 1)) hess = hess.reshape((kRows * kClasses, 1)) return grad, hess
def rmsle(predt: np.ndarray, dtrain: DMatrix) -> Tuple[str, float]: """ Root mean squared log error metric. """ y = dtrain.get_label() predt[predt < -1] = -1 + 1e-6 elements = np.power(np.log1p(y) - np.log1p(predt), 2) return "my_rmsle", float(np.sqrt(np.sum(elements) / len(y))) + DEBUG_ERROR
def fair_metric(predt: np.ndarray, dtrain: xgb.DMatrix): ''' FairXGB Error Metric''' # predt is the prediction array # Find the right protected group vector if len(predt) == len(protected_train): protected_feature = np.array(protected_train.copy()) elif len(predt) == len(protected_full): protected_feature = np.array(protected_full.copy()) elif len(predt) == len(protected_valid): protected_feature = np.array(protected_valid.copy()) else: protected_feature = 0 y = dtrain.get_label() answer = -y * np.log( sigmoid(predt)) - (1 - y) * np.log(1 - sigmoid(predt)) answer += mu * ( protected_feature * np.log(sigmoid(predt)) + (1 - protected_feature) * np.log(1 - sigmoid(predt))) return 'Fair_Metric', float(np.sum(answer) / len(answer))
def hessian(predt: np.ndarray, dtrain: xgb.DMatrix) -> np.ndarray: '''Compute the hessian for squared log error.''' y = dtrain.get_label() price = dtrain.get_weight() return (((np.log1p(price) * (y - predt) + predt + 1) / predt + 1) + (np.log1p(price) - 1) * np.log1p(predt) - np.log1p(y)) / (np.log1p(price) * (y - predt) + predt + 1)**2
def gradient(predt: np.ndarray, dtrain: xgb.DMatrix) -> np.ndarray: '''Compute the gradient squared log error.''' y = dtrain.get_label() price = dtrain.get_weight() return (np.log1p(predt) - np.log1p(y)) / ( (predt + 1) + (np.log1p(price) * (y - predt)) ) # with log is best, maybe apply log in all bot part of equation
def rmsle(predt: np.ndarray, dtrain: xgb.DMatrix) -> Tuple[str, float]: ''' Root mean squared log error metric. :math:`\sqrt{\frac{1}{N}[log(pred + 1) - log(label + 1)]^2}` ''' y = dtrain.get_label() predt[predt < -1] = -1 + 1e-6 elements = np.power(np.log1p(y) - np.log1p(predt), 2) return 'PyRMSLE', float(np.sqrt(np.sum(elements) / len(y)))
def loss(predt: np.ndarray, dtrain: xgb.DMatrix) -> Tuple[str, float]: ''' Root mean squared log error metric. :math:`\sqrt{\frac{1}{N}[log(pred + 1) - log(label + 1)]^2}` ''' y = dtrain.get_label() elements = np.exp(predt) - y return 'PoissonReg', float(np.mean(elements**2))
def eval_error_metric(predt, dtrain: xgb.DMatrix): label = dtrain.get_label() r = np.zeros(predt.shape) gt = predt > 0.5 r[gt] = 1 - label[gt] le = predt <= 0.5 r[le] = label[le] return 'CustomErr', np.sum(r)
def neg_rsqaure_score(predt: np.ndarray, dmat: xgb.DMatrix) -> Tuple[str, float]: """ negated r-sqaure score metric. :param predt: predicted values :param dmat: dmatrix :return: r-sqaure """ gold = dmat.get_label() return 'NegrSquare', -r2_score(y_true=gold, y_pred=predt)
def pearson_corr(predt: np.ndarray, dtrain: xgb.DMatrix): ''' Root mean squared log error metric.''' y = dtrain.get_label() y_bar = np.mean(y) yhat_bar = predt.mean() B = ((y - y_bar) * (predt - yhat_bar)).sum() C = np.sqrt(((y - y_bar) ** 2)).sum() # constant variable D = np.sqrt(((y - yhat_bar) ** 2)).sum() metric = B / np.sqrt(C * D) return 'PearsonCorr', metric
def eval_error_metric(predt, dtrain: xgb.DMatrix): """Evaluation metric for xgb.train""" label = dtrain.get_label() r = np.zeros(predt.shape) gt = predt > 0.5 if predt.size == 0: return "CustomErr", 0 r[gt] = 1 - label[gt] le = predt <= 0.5 r[le] = label[le] return 'CustomErr', np.sum(r)
def custom_metrics(y_pred: list, data: xgb.DMatrix) -> list: y = data.get_label() auprc = calc_auprc(y, y_pred) tp = calc_tp(y, y_pred) tn = calc_tn(y, y_pred) fp = calc_fp(y, y_pred) fn = calc_fn(y, y_pred) precision = calc_precision(tp, tn, fp, fn) recall = calc_recall(tp, tn, fp, fn) fpr = calc_fpr(tp, tn, fp, fn) return [('precision', precision), ('recall', recall), ('fpr', fpr), ('auprc', auprc)]
def custom_loss(prediction: np.ndarray, dtrain: xgb.DMatrix, ERA_len, CESM_len): """ Specify a loss function which takes an constraint objective into consideration and returns gradient and hessian for the training. This objective (Squared Mean Error) is then optimized on the unlabeled CESM data. @param prediction: Prediction values for stacked ERAI and CESMp dataframes @type prediction: np.ndarray @param dtrain: Training set (features and labels) @type dtrain: xgb.DMatrix @param ERA_len: Number of ERAI samples in dtrain @type ERA_len: int @param CESM_len: umber of CESMp samples in dtrain (after ERAI samples) @type CESM_len: int @return: Gradient and hessian of prediction for ERAI and CESM samples @rtype: np.ndarray, np.ndarray """ # Get ERAI labels y_ERA = dtrain.get_label()[:ERA_len] # Split predictions into ERAI and CESM prediction = self.sigmoid(prediction) pred_ERA = prediction[:ERA_len] pred_CESM = prediction[ERA_len:] # Calculate log loss gradient and hessian for ERAI samples grad_logloss = pred_ERA - y_ERA hess_logloss = pred_ERA * (1.0 - pred_ERA) # Calculate SME gradient and hessian for CESM samples grad_SME = 2 * self.lambda_ / CESM_len * pred_CESM * ( 1 - pred_CESM) * (np.mean(pred_CESM) - np.mean(y_ERA)) hess_SME = 2 * self.lambda_ / CESM_len * pred_CESM * ( 1 - pred_CESM) * ( (1 - pred_CESM) * (np.mean(pred_CESM) - np.mean(y_ERA)) - pred_CESM * (np.mean(pred_CESM) - np.mean(y_ERA)) + pred_CESM * (1 - pred_CESM) / CESM_len) # Build full gradient for all samples grad = np.zeros(len(prediction)) grad[:ERA_len] = grad_logloss grad[ERA_len:] = grad_SME # Build full hessian for all samples hess = np.zeros(len(prediction)) hess[:ERA_len] = hess_logloss hess[ERA_len:] = hess_SME return grad, hess
def xgb_mape(preds: np.ndarray, dtrain: DMatrix) -> Tuple[str, float]: """ Mean average precision error metric for evaluation in xgboost. Args: preds: Array of predictions dtrain: DMatrix of data Returns: Tuple of error name (str) and error (float) """ labels = dtrain.get_label() mask = labels != 0 return "mape", (np.fabs(labels - preds) / labels)[mask].mean()
def merror(predt: np.ndarray, dtrain: xgb.DMatrix): y = dtrain.get_label() # Like custom objective, the predt is untransformed leaf weight assert predt.shape == (kRows, kClasses) out = np.zeros(kRows) for r in range(predt.shape[0]): i = np.argmax(predt[r]) out[r] = i assert y.shape == out.shape errors = np.zeros(kRows) errors[y != out] = 1.0 return 'PyMError', np.sum(errors) / kRows
def objective(trial: optuna.trial, dtrain: xgb.DMatrix, dvalid: xgb.DMatrix) -> float: """ """ # define the parameters to be tested param = { "verbosity": 0, "objective": "binary:logistic", "booster": trial.suggest_categorical("booster", ["gbtree", "gblinear", "dart"]), "lambda": trial.suggest_float("lambda", 1e-8, 1.0, log=True), "alpha": trial.suggest_float("alpha", 1e-8, 1.0, log=True), } # choose the different hyperparameter values if param["booster"] == "gbtree" or param["booster"] == "dart": param["max_depth"] = trial.suggest_int("max_depth", 1, 9) param["eta"] = trial.suggest_float("eta", 1e-8, 1.0, log=True) param["gamma"] = trial.suggest_float("gamma", 1e-8, 1.0, log=True) param["grow_policy"] = trial.suggest_categorical( "grow_policy", ["depthwise", "lossguide"]) if param["booster"] == "dart": param["sample_type"] = trial.suggest_categorical( "sample_type", ["uniform", "weighted"]) param["normalize_type"] = trial.suggest_categorical( "normalize_type", ["tree", "forest"]) param["rate_drop"] = trial.suggest_float("rate_drop", 1e-8, 1.0, log=True) param["skip_drop"] = trial.suggest_float("skip_drop", 1e-8, 1.0, log=True) # fit a model to the training dataset using the defined hyperparameter values xgboost_mod = xgb.train(param, dtrain) valid_preds = xgboost_mod.predict(dvalid) valid_pred_labels = np.rint(valid_preds) accuracy = accuracy_score(dvalid.get_label(), valid_pred_labels) return accuracy
def xgb_mape_exp(preds: np.ndarray, dtrain: DMatrix) -> Tuple[str, float]: """ Mean average precision error metric for evaluation in xgboost. NOTE: This will exponentiate the predictions first, in the case where our actual is logged Args: preds: Array of predictions dtrain: DMatrix of data Returns: Tuple of error name (str) and error (float) """ labels = dtrain.get_label() mask = labels != 0 return "mape_exp", (np.fabs(labels - np.exp(preds)) / labels)[mask].mean()
def xgb_log_cosh(preds: np.ndarray, dtrain: DMatrix) -> Tuple[float, float]: """ Log-Cosh objective for xgboost. Args: preds: Array of predictions dtrain: DMatrix of data Returns: Gradient and hessian for log-cosh """ x = preds - dtrain.get_label() grad = np.tanh(x) # pylint: disable=assignment-from-no-return hess = 1 / np.cosh(x)**2 return grad, hess
def xgb_pr_auc(preds: np.ndarray, lgb_train: DMatrix) -> Tuple[str, float]: """ Precision Recall AUC (Area under Curve) of our prediction in lightgbxgboostm Args: preds: Array of predictions lgb_train: DMatrix of data Returns: Precision Recall AUC (Area under Curve) """ labels = lgb_train.get_label() precision, recall, _ = precision_recall_curve(labels, preds) result = auc(recall, precision) return "pr_auc", result
def fforma_loss(self, predt: np.ndarray, dtrain: xgb.DMatrix) -> (str, float): ''' Compute... ''' y = dtrain.get_label().astype(int) n_train = len(y) #print(predt.shape) #print(predt) preds_transformed = predt #np.array([softmax(row) for row in predt]) weighted_avg_loss_func = (preds_transformed * self.contribution_to_owa[y, :]).sum( axis=1).reshape((n_train, 1)) fforma_loss = weighted_avg_loss_func.sum() #print(grad) return 'FFORMA-loss', fforma_loss
def xgb_mpse(preds: np.ndarray, dtrain: DMatrix) -> Tuple[float, float]: """ Mean-Squared Percentage Error objective for xgboost Args: preds: Array of predictions dtrain: DMatrix of data Returns: Gradient and hessian for mean squared percentage error """ yhat = dtrain.get_label() grad = 2.0 / yhat * (preds * 1.0 / yhat - 1) hess = 2.0 / (yhat**2) grad = np.where(np.isinf(grad), 0., grad) hess = np.where(np.isinf(hess), 0., hess) return grad, hess
def corr_xgb( time_id_fold, y_pred: np.array, dtrain: xgb.DMatrix, ) -> Tuple[str, float]: """ Pearson correlation coefficient metric """ y_true = dtrain.get_label() pd_info = pd.DataFrame({ 'time_id': time_id_fold, 'y_pred': y_pred, 'y_true': y_true }) mean_corr = calculate_corr(pd_info) return 'pearson_corr', mean_corr
def xgb_fair(preds: np.ndarray, dtrain: DMatrix) -> Tuple[float, float]: """ Fair loss objective for xgboost. y = c * abs(x) - c * np.log(abs(abs(x) + c)) Args: preds: Array of predictions dtrain: DMatrix of data Returns: Gradient and hessian for fair loss """ x = preds - dtrain.get_label() c = 1 den = abs(x) + c grad = c * x / den hess = c * c / den**2 return grad, hess
def merror(predt: np.ndarray, dtrain: xgb.DMatrix): y = dtrain.get_label() # Like custom objective, the predt is untransformed leaf weight when custom objective # is provided. # With the use of `custom_metric` parameter in train function, custom metric receives # raw input only when custom objective is also being used. Otherwise custom metric # will receive transformed prediction. assert predt.shape == (kRows, kClasses) out = np.zeros(kRows) for r in range(predt.shape[0]): i = np.argmax(predt[r]) out[r] = i assert y.shape == out.shape errors = np.zeros(kRows) errors[y != out] = 1.0 return 'PyMError', np.sum(errors) / kRows
def calibrate( self, data: xgb.DMatrix, response: Optional[Union[np.ndarray, pd.Series]] = None, alpha: Union[int, float] = 0.95, ) -> None: """Method to calibrate conformal intervals that will allow prediction intervals that vary by row. Method calls _calibrate_leaf_node_counts to record the number of times each leaf node is visited across the whole of the passed data. Method calls _calibrate_interval to set the default interval that will be scaled using the inverse of the noncomformity function when making predictions. This allows intervals to vary by instance. Parameters ---------- data : xgb.DMatrix Dataset to use to set baselines. alpha : int or float, default = 0.95 Confidence level for the interval. response : np.ndarray, pd.Series or None, default = None The response values for the records in data. If passed as None then the _calibrate_interval function will attempt to extract the response from the data argument with get_label. """ check_type(data, [xgb.DMatrix], "data") if response is None: # only to stop mypy complaining about get_label method data = cast(xgb.DMatrix, data) response = data.get_label() super().calibrate(data=data, response=response, alpha=alpha)
def softprob_obj(predt: np.ndarray, data: xgb.DMatrix): '''Loss function. Computing the gradient and approximated hessian (diagonal). Reimplements the `multi:softprob` inside XGBoost. ''' labels = data.get_label() if data.get_weight().size == 0: # Use 1 as weight if we don't have custom weight. weights = np.ones((kRows, 1), dtype=float) else: weights = data.get_weight() # The prediction is of shape (rows, classes), each element in a row # represents a raw prediction (leaf weight, hasn't gone through softmax # yet). In XGBoost 1.0.0, the prediction is transformed by a softmax # function, fixed in later versions. assert predt.shape == (kRows, kClasses) grad = np.zeros((kRows, kClasses), dtype=float) hess = np.zeros((kRows, kClasses), dtype=float) eps = 1e-6 # compute the gradient and hessian, slow iterations in Python, only # suitable for demo. Also the one in native XGBoost core is more robust to # numeric overflow as we don't do anything to mitigate the `exp` in # `softmax` here. for r in range(predt.shape[0]): target = labels[r] p = softmax(predt[r, :]) for c in range(predt.shape[1]): assert target >= 0 or target <= kClasses g = p[c] - 1.0 if c == target else p[c] g = g * weights[r] h = max((2.0 * p[c] * (1.0 - p[c]) * weights[r]).item(), eps) grad[r, c] = g hess[r, c] = h # Right now (XGBoost 1.0.0), reshaping is necessary grad = grad.reshape((kRows * kClasses, 1)) hess = hess.reshape((kRows * kClasses, 1)) return grad, hess
def error_softmax_obj(self, predt: np.ndarray, dtrain: xgb.DMatrix) -> (np.ndarray, np.ndarray): ''' Compute... ''' y = dtrain.get_label().astype(int) #print(y) n_train = len(y) #print(predt.shape) #print(predt) preds_transformed = predt #np.array([softmax(row) for row in predt]) weighted_avg_loss_func = (preds_transformed * self.contribution_to_owa[y, :]).sum( axis=1).reshape((n_train, 1)) grad = preds_transformed * (self.contribution_to_owa[y, :] - weighted_avg_loss_func) hess = self.contribution_to_owa[y, :] * preds_transformed * ( 1.0 - preds_transformed) - grad * preds_transformed #print(grad) return grad.reshape(-1, 1), hess.reshape(-1, 1)
def xgb_huber_approx(preds: np.ndarray, dtrain: DMatrix) -> Tuple[float, float]: """ Huber loss (approximation) objective for xgboost. Args: preds: Array of predictions dtrain: DMatrix of data Returns: Gradient and hessian for huber loss """ d = preds - dtrain.get_label() h = 1 scale = 1 + (d / h)**2 scale_sqrt = np.sqrt(scale) grad = d / scale_sqrt hess = 1 / scale / scale_sqrt # TODO returns np.arrays, not floats. return grad, hess
def gradient(predt: np.ndarray, dtrain: xgb.DMatrix): '''Fair Xgboost Gradient''' # predt is the prediction array # Find the right protected group vector if len(predt) == len(protected_train): protected_feature = np.array(protected_train.copy()) elif len(predt) == len(protected_full): protected_feature = np.array(protected_full.copy()) elif len(predt) == len(protected_valid): protected_feature = np.array(protected_valid.copy()) else: protected_feature = 0 y = dtrain.get_label() answer = sigmoid(predt) - y answer += mu * (protected_feature - sigmoid(predt)) return answer