class Slot: """ The element stored in the quantization hash. Each slot keeps the mean values of the numerical feature, as well as the variance and mean of the target. """ def __init__(self, x: float, y=typing.Union[float, VectorDict], weight: float = 1.0): self.x_stats = Mean() self.x_stats.update(x, weight) self.y_stats: typing.Union[Var, VectorDict] self._update_estimator: typing.Callable[ [typing.Union[float, VectorDict], float], None] self.is_single_target = True self._init_estimator(y) self._update_estimator(y, weight) def _init_estimator(self, y): if isinstance(y, dict): self.is_single_target = False self.y_stats = VectorDict(default_factory=functools.partial(Var)) self._update_estimator = self._update_estimator_multivariate else: self.y_stats = Var() self._update_estimator = self._update_estimator_univariate def _update_estimator_univariate(self, target, sample_weight): self.y_stats.update(target, sample_weight) def _update_estimator_multivariate(self, target, sample_weight): for t in target: self.y_stats[t].update(target[t], sample_weight) def __iadd__(self, o): self.x_stats += o.x_stats self.y_stats += o.y_stats return self def update(self, x, y, sample_weight): self.x_stats.update(x, sample_weight) self._update_estimator(y, sample_weight)
class ForestMemberRegressor(BaseForestMember, base.Regressor): """Forest member class for regression""" def __init__( self, index_original: int, model: BaseTreeRegressor, created_on: int, drift_detector: base.DriftDetector, warning_detector: base.DriftDetector, is_background_learner, metric: RegressionMetric, ): super().__init__( index_original=index_original, model=model, created_on=created_on, drift_detector=drift_detector, warning_detector=warning_detector, is_background_learner=is_background_learner, metric=metric, ) self._var = Var() # Used to track drift def _drift_detector_input(self, y_true: float, y_pred: float): drift_input = y_true - y_pred self._var.update(drift_input) if self._var.mean.n == 1: return 0.5 # The expected error is the normalized mean error sd = math.sqrt(self._var.sigma) # We assume the error follows a normal distribution -> (empirical rule) # 99.73% of the values lie between [mean - 3*sd, mean + 3*sd]. We # assume this range for the normalized data. Hence, we can apply the # min-max norm to cope with ADWIN's requirements return (drift_input + 3 * sd) / (6 * sd) if sd > 0 else 0.5 def reset(self, n_samples_seen): super().reset(n_samples_seen) # Reset the stats for the drift detector self._var = Var() def predict_one(self, x): return self.model.predict_one(x)
class NumericAttributeRegressionQuantizerObserver(AttributeObserver): """Quantizer observer (QO). Utilizes a dynamical hash-based quantization algorithm to keep track of the target statistics and evaluate split candidates. This class implements the algorithm described in [^1]. This attribute observer keeps an internal estimator of the input feature's variance. By doing that, QO can calculate better values for its radius parameter to be passed to future learning nodes. Parameters ---------- radius The quantization radius. References ---------- [^1]: Mastelini, S.M. and de Leon Ferreira, A.C.P., 2021. Using dynamical quantization to perform split attempts in online tree regressors. Pattern Recognition Letters. """ def __init__(self, radius: float = 0.01): super().__init__() self.radius = radius if radius > 0 else 0.01 self._x_var = Var() self._quantizer = FeatureQuantizer(radius=self.radius) def update(self, x, y, sample_weight): if x is None: return else: self._x_var.update(x, sample_weight) self._quantizer.update(x, y, sample_weight) def probability_of_attribute_value_given_class(self, x, y): raise NotImplementedError def best_evaluated_split_suggestion(self, criterion, pre_split_dist, att_idx, binary_only=True): candidate = AttributeSplitSuggestion(None, [{}], -math.inf) # The previously evaluated x value prev_x = None for (x, left_dist) in self._quantizer: # First hash element if prev_x is None: # In case the hash carries just one element return the null split if len(self._quantizer) == 1: return candidate prev_x = x continue right_dist = pre_split_dist - left_dist post_split_dists = [left_dist, right_dist] merit = criterion.merit_of_split(pre_split_dist, post_split_dists) if merit > candidate.merit: split_point = (prev_x + x) / 2.0 candidate = self._update_candidate(split_point, att_idx, post_split_dists, merit) prev_x = x return candidate @property def x_var(self): return self._x_var @staticmethod def _update_candidate(split_point, att_idx, post_split_dists, merit): num_att_binary_test = NumericAttributeBinaryTest( att_idx, split_point, True) candidate = AttributeSplitSuggestion(num_att_binary_test, post_split_dists, merit) return candidate
class GradHessStats: """Class used to monitor and update the gradient/hessian information in Stochastic Gradient Trees. Represents the aggregated gradient/hessian data in a node (global node statistics), category, or numerical feature's discretized bin. """ def __init__(self): self.g_var = Var() self.h_var = Var() self.gh_cov = Cov() def __iadd__(self, other): self.g_var += other.g_var self.h_var += other.h_var self.gh_cov += other.gh_cov return self def __isub__(self, other): self.g_var -= other.g_var self.h_var -= other.h_var self.gh_cov -= other.gh_cov return self def __add__(self, other): new = copy.deepcopy(self) new += other return new def __sub__(self, other): new = copy.deepcopy(self) new -= other return new def update(self, gh: GradHess, w: float = 1.0): self.g_var.update(gh.gradient, w) self.h_var.update(gh.hessian, w) self.gh_cov.update(gh.gradient, gh.hessian, w) @property def mean(self) -> GradHess: return GradHess(self.g_var.mean.get(), self.h_var.mean.get()) @property def variance(self) -> GradHess: return GradHess(self.g_var.get(), self.h_var.get()) @property def covariance(self) -> float: return self.gh_cov.get() @property def total_weight(self) -> float: return self.g_var.mean.n # This method ignores correlations between delta_pred and the gradients/hessians! Considering # delta_pred is derived from the gradient and hessian sample, this assumption is definitely # violated. However, as empirically demonstrated in the original SGT, this fact does not seem # to significantly impact on the obtained results. def delta_loss_mean_var(self, delta_pred: float) -> Var: m = self.mean n = self.total_weight mean = delta_pred * m.gradient + 0.5 * m.hessian * delta_pred * delta_pred variance = self.variance covariance = self.covariance grad_term_var = delta_pred * delta_pred * variance.gradient hess_term_var = 0.25 * variance.hessian * (delta_pred**4.0) sigma = max( 0.0, grad_term_var + hess_term_var + (delta_pred**3) * covariance) return Var._from_state(n, mean, sigma) # noqa