コード例 #1
0
ファイル: algos.py プロジェクト: theophilec/oco-project
def train_gd(a: np.array, b: np.array, a_test: np.array, b_test: np.array,
             T: int, alpha: float):
    # add a column of ones to the input data, to avoid having to define an explicit bias in our weights
    a = np.concatenate([a, np.ones((len(a), 1))], axis=1)
    a_test = np.concatenate([a_test, np.ones((len(a_test), 1))], axis=1)
    n, d = a.shape

    # the weights of our SVM classifier
    x = np.zeros(d)

    logger = Logger(algo_tag=rf"GD - $\alpha={alpha}$")
    for t in tqdm(range(1, T + 1)):
        # log our results (before training, to match plots from the class)
        k = max(int(np.log10(t)), 0)
        if t % int(10**k) == 1 or t < 10:
            logger.log(
                iteration=t,
                loss=hinge_loss(a, b, x, alpha),
                train_err=error(a, b, x),
                test_err=error(a_test, b_test, x),
            )

        if alpha == 0:
            # our problem is simply convex (as the hinge loss is a convex function)
            eta_t = 1 / np.sqrt(t)
        else:
            # thanks to the regularization, our problem is alpha strongly convex
            # eta_t = 2 / (alpha * (t + 1))
            eta_t = 1 / (alpha * t)

        grad = hinge_loss_grad(a, b, x, alpha)
        x = x - eta_t * grad

    return x, logger
コード例 #2
0
ファイル: algos.py プロジェクト: theophilec/oco-project
def train_epoch_hogwild(x, a, b, I_p, eta, alpha):
    for i in I_p:
        a_i, b_i = a[i][np.newaxis, :], np.array([b[i]])
        grad = hinge_loss_grad(a_i, b_i, x, alpha)
        for index in np.where(abs(grad) > 0.01)[0]:
            x[index] -= eta * grad[index]
    return x
コード例 #3
0
ファイル: algos.py プロジェクト: theophilec/oco-project
def train_ons(
    a: np.array,
    b: np.array,
    a_test: np.array,
    b_test: np.array,
    T: int,
    gamma: float,
    alpha: float,
    radius: float,
    seed=0,
):
    np.random.seed(seed)
    # add a column of ones to the input data, to avoid having to define an explicit bias in our weights
    a = np.concatenate([a, np.ones((len(a), 1))], axis=1)
    a_test = np.concatenate([a_test, np.ones((len(a_test), 1))], axis=1)
    n, d = a.shape

    # the weights of our SVM classifier
    # x is the averaged weights (online to batch conversion)
    x_avg = np.zeros(d)
    x = np.zeros(d)
    y = np.zeros(d)
    A = 1 / gamma**2 * np.eye(d)
    A_inv = gamma**2 * np.eye(d)

    logger = Logger(
        algo_tag=rf"ONS - $\alpha = {alpha} - \gamma = {gamma} - z={radius}$")
    I = np.random.randint(0, n, T)
    for t in tqdm(range(1, T + 1)):
        # pick random sample
        i = I[t - 1]
        a_, b_ = a[i][np.newaxis, :], np.array([b[i]])

        # log our results (before training, to match plots from the class)
        k = max(int(np.log10(t)), 0)
        if t % int(10**k) == 0 or t < 10:
            logger.log(
                iteration=t,
                loss=hinge_loss(a, b, x_avg, alpha),
                train_err=error(a, b, x_avg),
                test_err=error(a_test, b_test, x_avg),
            )

        grad = hinge_loss_grad(a_, b_, x, alpha)
        gg = np.outer(grad, grad)
        assert gg.shape == (d, d)
        A += gg
        num = A_inv.dot(gg).dot(A_inv)
        denum = 1 + grad.dot(A_inv).dot(grad)
        A_inv -= num / denum

        y = x - 1 / gamma * A_inv.dot(grad)
        x, d_0, theta = l1_ball_proj_weighted(y, radius, np.diag(A))

        # averaging
        x_avg = (x_avg * (t - 1) + x) / t

    return x, logger
コード例 #4
0
ファイル: algos.py プロジェクト: theophilec/oco-project
def train_sgd_proj(
    a: np.array,
    b: np.array,
    a_test: np.array,
    b_test: np.array,
    T: int,
    alpha: float,
    radius: float,
    seed=0,
):
    np.random.seed(seed)
    # add a column of ones to the input data, to avoid having to define an explicit bias in our weights
    a = np.concatenate([a, np.ones((len(a), 1))], axis=1)
    a_test = np.concatenate([a_test, np.ones((len(a_test), 1))], axis=1)
    n, d = a.shape

    # x_avg is the averaged weights (online to batch conversion)
    # x is weight (online version)
    x_avg = np.zeros(d)
    x = np.zeros(d)

    logger = Logger(algo_tag=rf"SGDproj - $\alpha={alpha} - z={radius}$")

    I = np.random.randint(0, n, T)
    for t in tqdm(range(1, T + 1)):
        # pick random sample
        i = I[t - 1]
        a_, b_ = a[i][np.newaxis, :], np.array([b[i]])

        # log our results (before training, to match plots from the class)
        k = max(int(np.log10(t)), 0)
        if t % int(10**k) == 1 or t < 10:
            logger.log(
                iteration=t,
                loss=hinge_loss(a, b, x_avg, alpha),
                train_err=error(a, b, x_avg),
                test_err=error(a_test, b_test, x_avg),
            )

        if alpha == 0:
            # our problem is convex (as the hinge loss is a convex function)
            eta_t = 1 / np.sqrt(t)
        else:
            # eta_t = 2 / (alpha * t)
            eta_t = 1 / (alpha * t)

        grad = hinge_loss_grad(a_, b_, x, alpha)

        x = x - eta_t * grad
        x, d_0, theta = l1_ball_proj(x, radius)

        # averaging
        x_avg = (x_avg * (t - 1) + x) / t

    return x_avg, logger
コード例 #5
0
ファイル: algos.py プロジェクト: theophilec/oco-project
def train_seg_pm(
    a: np.array,
    b: np.array,
    a_test: np.array,
    b_test: np.array,
    T: int,
    radius: float,
    seed=0,
):
    np.random.seed(seed)
    # add a column of ones to the input data, to avoid having to define an explicit bias in our weights
    a = np.concatenate([a, np.ones((len(a), 1))], axis=1)
    a_test = np.concatenate([a_test, np.ones((len(a_test), 1))], axis=1)
    n, d = a.shape

    # the weights of our SVM classifier
    # x is the averaged weights (online to batch conversion)
    x_avg = np.zeros(d)
    x = np.zeros(d)
    theta = np.zeros(2 * d)
    w = np.zeros(2 * d)

    logger = Logger(algo_tag=rf"Seg +- proj - $z={radius}$")
    I = np.random.randint(0, n, T)
    for t in tqdm(range(1, T + 1)):
        # pick random sample
        i = I[t - 1]
        a_, b_ = a[i][np.newaxis, :], np.array([b[i]])

        # log our results (before training, to match plots from the class)
        k = max(int(np.log10(t)), 0)
        if t % int(10**k) == 1 or t < 10:
            logger.log(
                iteration=t,
                loss=hinge_loss(a, b, x_avg, 0),
                train_err=error(a, b, x_avg),
                test_err=error(a_test, b_test, x_avg),
            )

        eta_t = 1 / np.sqrt(t)

        grad = hinge_loss_grad(a_, b_, x, 0)

        theta[:d] = theta[:d] - eta_t * grad
        theta[d:] = theta[d:] + eta_t * grad

        w = softmax(theta)

        x = radius * (w[:d] - w[d:])

        # averaging
        x_avg = (x_avg * (t - 1) + x) / t

    return x, logger
コード例 #6
0
ファイル: algos.py プロジェクト: theophilec/oco-project
def train_adagrad(
    a: np.array,
    b: np.array,
    a_test: np.array,
    b_test: np.array,
    T: int,
    radius: float,
    seed=0,
):
    np.random.seed(seed)
    # add a column of ones to the input data, to avoid having to define an explicit bias in our weights
    a = np.concatenate([a, np.ones((len(a), 1))], axis=1)
    a_test = np.concatenate([a_test, np.ones((len(a_test), 1))], axis=1)
    n, d = a.shape

    # the weights of our SVM classifier
    # x is the averaged weights (online to batch conversion)
    x_avg = np.zeros(d)
    x = np.zeros(d)
    y = np.zeros(d)
    DELTA = 1e-5
    S = np.ones(d) * DELTA

    logger = Logger(algo_tag=rf"Adagrad - $z={radius}$")
    I = np.random.randint(0, n, T)
    for t in tqdm(range(1, T + 1)):
        # pick random sample
        i = I[t - 1]
        a_, b_ = a[i][np.newaxis, :], np.array([b[i]])

        # log our results (before training, to match plots from the class)
        k = max(int(np.log10(t)), 0)
        if t % int(10**k) == 1 or t < 10:
            logger.log(
                iteration=t,
                loss=hinge_loss(a, b, x_avg, 0),
                train_err=error(a, b, x_avg),
                test_err=error(a_test, b_test, x_avg),
            )

        grad = hinge_loss_grad(a_, b_, x, 0)
        S += grad**2

        D = np.diag(np.sqrt(S))
        D_inv = np.diag(1 / np.sqrt(S))

        y = x - D_inv.dot(grad)
        x, d_0, theta = l1_ball_proj_weighted(y, radius, np.diag(D))

        # averaging
        x_avg = (x_avg * (t - 1) + x) / t

    return x, logger
コード例 #7
0
ファイル: algos.py プロジェクト: theophilec/oco-project
def train_sgd(
    a: np.array,
    b: np.array,
    a_test: np.array,
    b_test: np.array,
    T: int,
    alpha: float,
    return_avg=True,
    seed=0,
    use_logger=True,
):
    np.random.seed(seed)
    # add a column of ones to the input data, to avoid having to define an explicit bias in our weights
    a = np.concatenate([a, np.ones((len(a), 1))], axis=1)
    a_test = np.concatenate([a_test, np.ones((len(a_test), 1))], axis=1)
    n, d = a.shape

    # x_avg is the averaged weights (online to batch conversion)
    # x is weight (online version)
    x_avg = np.zeros(d)
    x = np.zeros(d)

    if use_logger:
        logger = Logger(algo_tag=rf"SGD - {'x_avg' if return_avg else 'x_T'}")

    t0 = perf_counter_ns()

    I = np.random.randint(0, n, T)
    for t in tqdm(range(1, T + 1)):
        # pick random sample
        i = I[t - 1]
        a_, b_ = a[i][np.newaxis, :], np.array([b[i]])

        if alpha == 0:
            # our problem is convex (as the hinge loss is a convex function)
            eta_t = 1 / np.sqrt(t)
        else:
            eta_t = 1 / (alpha * t)

        # log our results (before training, to match plots from the class)
        k = max(int(np.log10(t)), 0)
        if (t % int(10**k) == 1 or t < 10) and use_logger:
            logger.log(
                iteration=t,
                loss=hinge_loss(a, b, x_avg, alpha),
                train_err=error(a, b, x_avg),
                test_err=error(a_test, b_test, x_avg),
                eta_t=eta_t,
                time_elapsed=(perf_counter_ns() - t0) / 1e9,
            )

        grad = hinge_loss_grad(a_, b_, x, alpha)

        x = x - eta_t * grad

        # averaging
        if return_avg:
            x_avg = (x_avg * (t - 1) + x) / t
        else:
            x_avg = x

    dt = (perf_counter_ns() - t0) / 1e9  # execution time in sec
    if use_logger:
        return x_avg, logger
    else:
        return dt, T, error(a_test, b_test, x)