def log_focal_loss_obj(preds: np.ndarray, dtrain: xgb.DMatrix): labels = dtrain.get_label() # print(preds.shape) kRows, kClasses = preds.shape if dtrain.get_weight().size == 0: # Use 1 as weight if we don't have custom weight. weights = np.ones((kRows, 1), dtype=float) else: weights = dtrain.get_weight() grad = np.zeros((kRows, kClasses), dtype=float) hess = np.zeros((kRows, kClasses), dtype=float) for r in range(kRows): #print(preds[r]) target = int(labels[r]) assert target >= 0 or target <= kClasses p = softmax(preds[r, :]) grad_r, hess_r = focal_logloss_derivative_gamma2(p, target) grad[r] = grad_r * weights[r] hess[r] = hess_r * weights[r] # Right now (XGBoost 1.0.0), reshaping is necessary grad = grad.reshape((kRows * kClasses, 1)) hess = hess.reshape((kRows * kClasses, 1)) return grad, hess
def rmsle(predt: np.ndarray, dtrain: xgb.DMatrix) -> [str, float]: ''' Root mean squared log error metric.''' y = dtrain.get_label() price = dtrain.get_weight() predt[predt < -1] = -1 + 1e-6 elements = np.power(np.log1p(y) - np.log1p(predt), 2) return 'PyRMSLE', float(np.sqrt(np.sum(elements) / (len(y))))
def hessian(predt: np.ndarray, dtrain: xgb.DMatrix) -> np.ndarray: '''Compute the hessian for squared log error.''' y = dtrain.get_label() price = dtrain.get_weight() return (((np.log1p(price) * (y - predt) + predt + 1) / predt + 1) + (np.log1p(price) - 1) * np.log1p(predt) - np.log1p(y)) / (np.log1p(price) * (y - predt) + predt + 1)**2
def gradient(predt: np.ndarray, dtrain: xgb.DMatrix) -> np.ndarray: '''Compute the gradient squared log error.''' y = dtrain.get_label() price = dtrain.get_weight() return (np.log1p(predt) - np.log1p(y)) / ( (predt + 1) + (np.log1p(price) * (y - predt)) ) # with log is best, maybe apply log in all bot part of equation
def softprob_obj(predt: np.ndarray, data: xgb.DMatrix): '''Loss function. Computing the gradient and approximated hessian (diagonal). Reimplements the `multi:softprob` inside XGBoost. ''' labels = data.get_label() if data.get_weight().size == 0: # Use 1 as weight if we don't have custom weight. weights = np.ones((kRows, 1), dtype=float) else: weights = data.get_weight() # The prediction is of shape (rows, classes), each element in a row # represents a raw prediction (leaf weight, hasn't gone through softmax # yet). In XGBoost 1.0.0, the prediction is transformed by a softmax # function, fixed in later versions. assert predt.shape == (kRows, kClasses) grad = np.zeros((kRows, kClasses), dtype=float) hess = np.zeros((kRows, kClasses), dtype=float) eps = 1e-6 # compute the gradient and hessian, slow iterations in Python, only # suitable for demo. Also the one in native XGBoost core is more robust to # numeric overflow as we don't do anything to mitigate the `exp` in # `softmax` here. for r in range(predt.shape[0]): target = labels[r] p = softmax(predt[r, :]) for c in range(predt.shape[1]): assert target >= 0 or target <= kClasses g = p[c] - 1.0 if c == target else p[c] g = g * weights[r] h = max((2.0 * p[c] * (1.0 - p[c]) * weights[r]).item(), eps) grad[r, c] = g hess[r, c] = h # Right now (XGBoost 1.0.0), reshaping is necessary grad = grad.reshape((kRows * kClasses, 1)) hess = hess.reshape((kRows * kClasses, 1)) return grad, hess
def _evaluate(self, scores: np.ndarray, clases: xgb.DMatrix) -> Tuple[str, int]: labels = clases.get_label() weights = clases.get_weight() score_corte = self.score_corte return self._evaluar_funcion_ganancia(scores, labels, weights, score_corte)