class ForestMemberRegressor(BaseForestMember, base.Regressor): """Forest member class for regression""" def __init__( self, index_original: int, model: BaseTreeRegressor, created_on: int, drift_detector: base.DriftDetector, warning_detector: base.DriftDetector, is_background_learner, metric: RegressionMetric, ): super().__init__( index_original=index_original, model=model, created_on=created_on, drift_detector=drift_detector, warning_detector=warning_detector, is_background_learner=is_background_learner, metric=metric, ) self._var = Var() # Used to track drift def _drift_detector_input(self, y_true: float, y_pred: float): drift_input = y_true - y_pred self._var.update(drift_input) if self._var.mean.n == 1: return 0.5 # The expected error is the normalized mean error sd = math.sqrt(self._var.get()) # We assume the error follows a normal distribution -> (empirical rule) # 99.73% of the values lie between [mean - 3*sd, mean + 3*sd]. We # assume this range for the normalized data. Hence, we can apply the # min-max norm to cope with ADWIN's requirements return (drift_input + 3 * sd) / (6 * sd) if sd > 0 else 0.5 def reset(self, n_samples_seen): super().reset(n_samples_seen) # Reset the stats for the drift detector self._var = Var() def predict_one(self, x): return self.model.predict_one(x)
class GradHessStats: """Class used to monitor and update the gradient/hessian information in Stochastic Gradient Trees. Represents the aggregated gradient/hessian data in a node (global node statistics), category, or numerical feature's discretized bin. """ def __init__(self): self.g_var = Var() self.h_var = Var() self.gh_cov = Cov() def __iadd__(self, other): self.g_var += other.g_var self.h_var += other.h_var self.gh_cov += other.gh_cov return self def __isub__(self, other): self.g_var -= other.g_var self.h_var -= other.h_var self.gh_cov -= other.gh_cov return self def __add__(self, other): new = copy.deepcopy(self) new += other return new def __sub__(self, other): new = copy.deepcopy(self) new -= other return new def update(self, gh: GradHess, w: float = 1.0): self.g_var.update(gh.gradient, w) self.h_var.update(gh.hessian, w) self.gh_cov.update(gh.gradient, gh.hessian, w) @property def mean(self) -> GradHess: return GradHess(self.g_var.mean.get(), self.h_var.mean.get()) @property def variance(self) -> GradHess: return GradHess(self.g_var.get(), self.h_var.get()) @property def covariance(self) -> float: return self.gh_cov.get() @property def total_weight(self) -> float: return self.g_var.mean.n # This method ignores correlations between delta_pred and the gradients/hessians! Considering # delta_pred is derived from the gradient and hessian sample, this assumption is definitely # violated. However, as empirically demonstrated in the original SGT, this fact does not seem # to significantly impact on the obtained results. def delta_loss_mean_var(self, delta_pred: float) -> Var: m = self.mean n = self.total_weight mean = delta_pred * m.gradient + 0.5 * m.hessian * delta_pred * delta_pred variance = self.variance covariance = self.covariance grad_term_var = delta_pred * delta_pred * variance.gradient hess_term_var = 0.25 * variance.hessian * (delta_pred**4.0) sigma = max( 0.0, grad_term_var + hess_term_var + (delta_pred**3) * covariance) return Var._from_state(n, mean, sigma) # noqa