def test_finite_differences(lm, dataset): """Checks the gradient of a linear model via finite differences. References ---------- [^1]: [How to test gradient implementations](https://timvieira.github.io/blog/post/2017/04/21/how-to-test-gradient-implementations/) [^2]: [Stochastic Gradient Descent Tricks](https://cilvr.cs.nyu.edu/diglib/lsml/bottou-sgd-tricks-2012.pdf) """ scaler = preprocessing.StandardScaler() eps = 1e-6 for x, y in dataset: x = scaler.learn_one(x).transform_one(x) # Store the current gradient and weights gradient, _ = lm._eval_gradient_one(x, y, 1) weights = copy.deepcopy(lm._weights) # d is a set of weight perturbations for d in iter_perturbations(weights.keys()): # Pertubate the weights and obtain the loss with the new weights lm._weights = utils.VectorDict( {i: weights[i] + eps * di for i, di in d.items()}) forward = lm.loss(y_true=y, y_pred=lm._raw_dot_one(x)) lm._weights = utils.VectorDict( {i: weights[i] - eps * di for i, di in d.items()}) backward = lm.loss(y_true=y, y_pred=lm._raw_dot_one(x)) # We expect g and h to be equal g = utils.math.dot(d, gradient) h = (forward - backward) / (2 * eps) # Compare signs # TODO: reactivate this check #assert np.sign(g) == np.sign(h) # Check absolute difference # TODO: decrease the tolerance assert abs(g - h) < 1e-5 # Reset the weights to their original values in order not to influence # the training loop, even though it doesn't really matter. lm._weights = weights lm.learn_one(x, y)
def test_optimizer_step_with_dict_same_as_step_with_vector_dict(optimizer): w_dict = {i: random.uniform(-5, 5) for i in range(10)} w_vector = utils.VectorDict(w_dict) g_dict = {i: random.uniform(-5, 5) for i in range(10)} g_vector = utils.VectorDict(g_dict) w_dict = optimizer._step_with_dict(w_dict, g_dict) try: w_vector = optimizer.clone()._step_with_vector(w_vector, g_vector) except NotImplementedError: pytest.skip("step_with_vector not implemented") for i, w in w_vector.to_dict().items(): assert math.isclose(w, w_dict[i])
def __init__( self, optimizer, loss, l2, intercept_init, intercept_lr, clip_gradient, initializer, ): self.optimizer = optimizer self.loss = loss self.l2 = l2 self.intercept_init = intercept_init self.intercept = intercept_init self.intercept_lr = ( optim.schedulers.Constant(intercept_lr) if isinstance(intercept_lr, numbers.Number) else intercept_lr ) self.clip_gradient = clip_gradient self.initializer = initializer self._weights = utils.VectorDict(None) # The predict_many functions are going to return pandas.Series. We can name the series with # the name given to the y series seen during the last learn_many call. self._y_name = None
def _eval_gradient_one(self, x: dict, y: float, w: float) -> (dict, float): loss_gradient = self.loss.gradient(y_true=y, y_pred=self._raw_dot_one(x)) loss_gradient *= w loss_gradient = float(utils.math.clamp(loss_gradient, -self.clip_gradient, self.clip_gradient)) return loss_gradient * utils.VectorDict(x) + 2. * self.l2 * self._weights, loss_gradient
def _learn_mode(self, mask=None): weights = self._weights try: # enable the initializer and set a mask self._weights = utils.VectorDict(weights, self.initializer, mask) yield finally: self._weights = weights
def __iter__(self): aux_stats = (stats.Var() if next(iter( self.hash.values())).is_single_target else utils.VectorDict( default_factory=functools.partial(stats.Var))) for i in sorted(self.hash.keys()): x = self.hash[i].x_stats.get() aux_stats += self.hash[i].y_stats yield x, aux_stats
def _init_estimator(self, y): if isinstance(y, dict): self.is_single_target = False self.y_stats = utils.VectorDict( default_factory=functools.partial(stats.Var)) self._update_estimator = self._update_estimator_multivariate else: self.y_stats = stats.Var() self._update_estimator = self._update_estimator_univariate
def _step_with_vector(self, w, g): if self.m is None: if isinstance(w, np.ndarray): self.m = np.zeros_like(w) self.v = np.zeros_like(w) else: self.m = utils.VectorDict() self.v = utils.VectorDict() lr = self.learning_rate * (1 - self.beta_2**(self.n_iterations + 1))**0.5 lr /= 1 - self.beta_1**(self.n_iterations + 1) self.m = self.beta_1 * self.m + (1 - self.beta_1) * g self.v = self.beta_2 * self.v + (1 - self.beta_2) * g**2 w -= lr * self.m / (self.v**0.5 + self.eps) return w
def _step_with_vector(self, w, g): if self.g2 is None: if isinstance(w, np.ndarray): self.g2 = np.zeros_like(w) else: self.g2 = utils.VectorDict() self.g2 = self.rho * self.g2 + (1 - self.rho) * g**2 w -= self.learning_rate / (self.g2 + self.eps)**0.5 * g return w
def _raw_dot_one(self, x: dict) -> float: return self._weights @ utils.VectorDict(x) + self.intercept