Beispiel #1
0
def test_gradient_descent_stops():
    # Test stopping conditions of gradient descent.
    class ObjectiveSmallGradient:
        def __init__(self):
            self.it = -1

        def __call__(self, _, compute_error=True):
            self.it += 1
            return (10 - self.it) / 10.0, np.array([1e-5])

    def flat_function(_, compute_error=True):
        return 0.0, np.ones(1)

    # Gradient norm
    old_stdout = sys.stdout
    sys.stdout = StringIO()
    try:
        _, error, it = _gradient_descent(
            ObjectiveSmallGradient(), np.zeros(1), 0, n_iter=100,
            n_iter_without_progress=100, momentum=0.0, learning_rate=0.0,
            min_gain=0.0, min_grad_norm=1e-5, verbose=2)
    finally:
        out = sys.stdout.getvalue()
        sys.stdout.close()
        sys.stdout = old_stdout
    assert error == 1.0
    assert it == 0
    assert("gradient norm" in out)

    # Maximum number of iterations without improvement
    old_stdout = sys.stdout
    sys.stdout = StringIO()
    try:
        _, error, it = _gradient_descent(
            flat_function, np.zeros(1), 0, n_iter=100,
            n_iter_without_progress=10, momentum=0.0, learning_rate=0.0,
            min_gain=0.0, min_grad_norm=0.0, verbose=2)
    finally:
        out = sys.stdout.getvalue()
        sys.stdout.close()
        sys.stdout = old_stdout
    assert error == 0.0
    assert it == 11
    assert("did not make any progress" in out)

    # Maximum number of iterations
    old_stdout = sys.stdout
    sys.stdout = StringIO()
    try:
        _, error, it = _gradient_descent(
            ObjectiveSmallGradient(), np.zeros(1), 0, n_iter=11,
            n_iter_without_progress=100, momentum=0.0, learning_rate=0.0,
            min_gain=0.0, min_grad_norm=0.0, verbose=2)
    finally:
        out = sys.stdout.getvalue()
        sys.stdout.close()
        sys.stdout = old_stdout
    assert error == 0.0
    assert it == 10
    assert("Iteration 10" in out)
    def jumped_gradient_descent(self, obj_func, params, **opt_args):
        it = opt_args['it'] - 1

        n_jump_without_progress = opt_args[
            "n_iter_without_progress"] // self.jump_size + 1
        remaining = opt_args['n_iter'] - it
        n_jumps = remaining // self.jump_size + 1
        ct_no_improvement = 0
        kl_divergence_best = None

        new_opt_args = opt_args.copy()
        for jump in range(n_jumps):
            new_opt_args['it'] = it + 1
            new_opt_args['n_iter'] = it + self.jump_size
            params, kl_divergence, it = _gradient_descent(
                obj_func, params, **new_opt_args)
            self.X_embedded_jumps.append(params.reshape(-1, self.n_components))
            self.momentum_jumps.append(opt_args['momentum'])
            if jump > 0:
                print(
                    "\rJump {}/{}: best_kl={:.6f}\t current_kl={:.6f}".format(
                        jump + 1, n_jumps, kl_divergence_best, kl_divergence),
                    end="",
                    flush=True)
            if kl_divergence_best is None or kl_divergence < kl_divergence_best:
                kl_divergence_best = kl_divergence
                ct_no_improvement = 0
            else:
                ct_no_improvement += 1
                if ct_no_improvement >= n_jump_without_progress:
                    break
        print()
        return params, kl_divergence, it