Beispiel #1
0
def test_finite_differences(lm, dataset):
    """Checks the gradient of a linear model via finite differences.

    References
    ----------
    [^1]: [How to test gradient implementations](https://timvieira.github.io/blog/post/2017/04/21/how-to-test-gradient-implementations/)
    [^2]: [Stochastic Gradient Descent Tricks](https://cilvr.cs.nyu.edu/diglib/lsml/bottou-sgd-tricks-2012.pdf)

    """

    scaler = preprocessing.StandardScaler()
    eps = 1e-6

    for x, y in dataset:

        x = scaler.learn_one(x).transform_one(x)

        # Store the current gradient and weights
        gradient, _ = lm._eval_gradient_one(x, y, 1)
        weights = copy.deepcopy(lm._weights)

        # d is a set of weight perturbations
        for d in iter_perturbations(weights.keys()):

            # Pertubate the weights and obtain the loss with the new weights
            lm._weights = utils.VectorDict(
                {i: weights[i] + eps * di
                 for i, di in d.items()})
            forward = lm.loss(y_true=y, y_pred=lm._raw_dot_one(x))
            lm._weights = utils.VectorDict(
                {i: weights[i] - eps * di
                 for i, di in d.items()})
            backward = lm.loss(y_true=y, y_pred=lm._raw_dot_one(x))

            # We expect g and h to be equal
            g = utils.math.dot(d, gradient)
            h = (forward - backward) / (2 * eps)

            # Compare signs
            # TODO: reactivate this check
            #assert np.sign(g) == np.sign(h)

            # Check absolute difference
            # TODO: decrease the tolerance
            assert abs(g - h) < 1e-5

        # Reset the weights to their original values in order not to influence
        # the training loop, even though it doesn't really matter.
        lm._weights = weights
        lm.learn_one(x, y)
Beispiel #2
0
def test_optimizer_step_with_dict_same_as_step_with_vector_dict(optimizer):

    w_dict = {i: random.uniform(-5, 5) for i in range(10)}
    w_vector = utils.VectorDict(w_dict)

    g_dict = {i: random.uniform(-5, 5) for i in range(10)}
    g_vector = utils.VectorDict(g_dict)

    w_dict = optimizer._step_with_dict(w_dict, g_dict)
    try:
        w_vector = optimizer.clone()._step_with_vector(w_vector, g_vector)
    except NotImplementedError:
        pytest.skip("step_with_vector not implemented")

    for i, w in w_vector.to_dict().items():
        assert math.isclose(w, w_dict[i])
Beispiel #3
0
    def __init__(
        self,
        optimizer,
        loss,
        l2,
        intercept_init,
        intercept_lr,
        clip_gradient,
        initializer,
    ):
        self.optimizer = optimizer
        self.loss = loss
        self.l2 = l2
        self.intercept_init = intercept_init
        self.intercept = intercept_init
        self.intercept_lr = (
            optim.schedulers.Constant(intercept_lr)
            if isinstance(intercept_lr, numbers.Number)
            else intercept_lr
        )
        self.clip_gradient = clip_gradient
        self.initializer = initializer
        self._weights = utils.VectorDict(None)

        # The predict_many functions are going to return pandas.Series. We can name the series with
        # the name given to the y series seen during the last learn_many call.
        self._y_name = None
Beispiel #4
0
    def _eval_gradient_one(self, x: dict, y: float, w: float) -> (dict, float):

        loss_gradient = self.loss.gradient(y_true=y, y_pred=self._raw_dot_one(x))
        loss_gradient *= w
        loss_gradient = float(utils.math.clamp(loss_gradient, -self.clip_gradient, self.clip_gradient))

        return loss_gradient * utils.VectorDict(x) + 2. * self.l2 * self._weights, loss_gradient
Beispiel #5
0
 def _learn_mode(self, mask=None):
     weights = self._weights
     try:
         # enable the initializer and set a mask
         self._weights = utils.VectorDict(weights, self.initializer, mask)
         yield
     finally:
         self._weights = weights
Beispiel #6
0
    def __iter__(self):
        aux_stats = (stats.Var() if next(iter(
            self.hash.values())).is_single_target else utils.VectorDict(
                default_factory=functools.partial(stats.Var)))

        for i in sorted(self.hash.keys()):
            x = self.hash[i].x_stats.get()
            aux_stats += self.hash[i].y_stats
            yield x, aux_stats
Beispiel #7
0
 def _init_estimator(self, y):
     if isinstance(y, dict):
         self.is_single_target = False
         self.y_stats = utils.VectorDict(
             default_factory=functools.partial(stats.Var))
         self._update_estimator = self._update_estimator_multivariate
     else:
         self.y_stats = stats.Var()
         self._update_estimator = self._update_estimator_univariate
Beispiel #8
0
    def _step_with_vector(self, w, g):

        if self.m is None:
            if isinstance(w, np.ndarray):
                self.m = np.zeros_like(w)
                self.v = np.zeros_like(w)
            else:
                self.m = utils.VectorDict()
                self.v = utils.VectorDict()

        lr = self.learning_rate * (1 -
                                   self.beta_2**(self.n_iterations + 1))**0.5
        lr /= 1 - self.beta_1**(self.n_iterations + 1)

        self.m = self.beta_1 * self.m + (1 - self.beta_1) * g
        self.v = self.beta_2 * self.v + (1 - self.beta_2) * g**2
        w -= lr * self.m / (self.v**0.5 + self.eps)

        return w
Beispiel #9
0
    def _step_with_vector(self, w, g):

        if self.g2 is None:
            if isinstance(w, np.ndarray):
                self.g2 = np.zeros_like(w)
            else:
                self.g2 = utils.VectorDict()

        self.g2 = self.rho * self.g2 + (1 - self.rho) * g**2
        w -= self.learning_rate / (self.g2 + self.eps)**0.5 * g

        return w
Beispiel #10
0
 def _raw_dot_one(self, x: dict) -> float:
     return self._weights @ utils.VectorDict(x) + self.intercept