def train_gd(a: np.array, b: np.array, a_test: np.array, b_test: np.array, T: int, alpha: float): # add a column of ones to the input data, to avoid having to define an explicit bias in our weights a = np.concatenate([a, np.ones((len(a), 1))], axis=1) a_test = np.concatenate([a_test, np.ones((len(a_test), 1))], axis=1) n, d = a.shape # the weights of our SVM classifier x = np.zeros(d) logger = Logger(algo_tag=rf"GD - $\alpha={alpha}$") for t in tqdm(range(1, T + 1)): # log our results (before training, to match plots from the class) k = max(int(np.log10(t)), 0) if t % int(10**k) == 1 or t < 10: logger.log( iteration=t, loss=hinge_loss(a, b, x, alpha), train_err=error(a, b, x), test_err=error(a_test, b_test, x), ) if alpha == 0: # our problem is simply convex (as the hinge loss is a convex function) eta_t = 1 / np.sqrt(t) else: # thanks to the regularization, our problem is alpha strongly convex # eta_t = 2 / (alpha * (t + 1)) eta_t = 1 / (alpha * t) grad = hinge_loss_grad(a, b, x, alpha) x = x - eta_t * grad return x, logger
def train_epoch_hogwild(x, a, b, I_p, eta, alpha): for i in I_p: a_i, b_i = a[i][np.newaxis, :], np.array([b[i]]) grad = hinge_loss_grad(a_i, b_i, x, alpha) for index in np.where(abs(grad) > 0.01)[0]: x[index] -= eta * grad[index] return x
def train_ons( a: np.array, b: np.array, a_test: np.array, b_test: np.array, T: int, gamma: float, alpha: float, radius: float, seed=0, ): np.random.seed(seed) # add a column of ones to the input data, to avoid having to define an explicit bias in our weights a = np.concatenate([a, np.ones((len(a), 1))], axis=1) a_test = np.concatenate([a_test, np.ones((len(a_test), 1))], axis=1) n, d = a.shape # the weights of our SVM classifier # x is the averaged weights (online to batch conversion) x_avg = np.zeros(d) x = np.zeros(d) y = np.zeros(d) A = 1 / gamma**2 * np.eye(d) A_inv = gamma**2 * np.eye(d) logger = Logger( algo_tag=rf"ONS - $\alpha = {alpha} - \gamma = {gamma} - z={radius}$") I = np.random.randint(0, n, T) for t in tqdm(range(1, T + 1)): # pick random sample i = I[t - 1] a_, b_ = a[i][np.newaxis, :], np.array([b[i]]) # log our results (before training, to match plots from the class) k = max(int(np.log10(t)), 0) if t % int(10**k) == 0 or t < 10: logger.log( iteration=t, loss=hinge_loss(a, b, x_avg, alpha), train_err=error(a, b, x_avg), test_err=error(a_test, b_test, x_avg), ) grad = hinge_loss_grad(a_, b_, x, alpha) gg = np.outer(grad, grad) assert gg.shape == (d, d) A += gg num = A_inv.dot(gg).dot(A_inv) denum = 1 + grad.dot(A_inv).dot(grad) A_inv -= num / denum y = x - 1 / gamma * A_inv.dot(grad) x, d_0, theta = l1_ball_proj_weighted(y, radius, np.diag(A)) # averaging x_avg = (x_avg * (t - 1) + x) / t return x, logger
def train_sgd_proj( a: np.array, b: np.array, a_test: np.array, b_test: np.array, T: int, alpha: float, radius: float, seed=0, ): np.random.seed(seed) # add a column of ones to the input data, to avoid having to define an explicit bias in our weights a = np.concatenate([a, np.ones((len(a), 1))], axis=1) a_test = np.concatenate([a_test, np.ones((len(a_test), 1))], axis=1) n, d = a.shape # x_avg is the averaged weights (online to batch conversion) # x is weight (online version) x_avg = np.zeros(d) x = np.zeros(d) logger = Logger(algo_tag=rf"SGDproj - $\alpha={alpha} - z={radius}$") I = np.random.randint(0, n, T) for t in tqdm(range(1, T + 1)): # pick random sample i = I[t - 1] a_, b_ = a[i][np.newaxis, :], np.array([b[i]]) # log our results (before training, to match plots from the class) k = max(int(np.log10(t)), 0) if t % int(10**k) == 1 or t < 10: logger.log( iteration=t, loss=hinge_loss(a, b, x_avg, alpha), train_err=error(a, b, x_avg), test_err=error(a_test, b_test, x_avg), ) if alpha == 0: # our problem is convex (as the hinge loss is a convex function) eta_t = 1 / np.sqrt(t) else: # eta_t = 2 / (alpha * t) eta_t = 1 / (alpha * t) grad = hinge_loss_grad(a_, b_, x, alpha) x = x - eta_t * grad x, d_0, theta = l1_ball_proj(x, radius) # averaging x_avg = (x_avg * (t - 1) + x) / t return x_avg, logger
def train_seg_pm( a: np.array, b: np.array, a_test: np.array, b_test: np.array, T: int, radius: float, seed=0, ): np.random.seed(seed) # add a column of ones to the input data, to avoid having to define an explicit bias in our weights a = np.concatenate([a, np.ones((len(a), 1))], axis=1) a_test = np.concatenate([a_test, np.ones((len(a_test), 1))], axis=1) n, d = a.shape # the weights of our SVM classifier # x is the averaged weights (online to batch conversion) x_avg = np.zeros(d) x = np.zeros(d) theta = np.zeros(2 * d) w = np.zeros(2 * d) logger = Logger(algo_tag=rf"Seg +- proj - $z={radius}$") I = np.random.randint(0, n, T) for t in tqdm(range(1, T + 1)): # pick random sample i = I[t - 1] a_, b_ = a[i][np.newaxis, :], np.array([b[i]]) # log our results (before training, to match plots from the class) k = max(int(np.log10(t)), 0) if t % int(10**k) == 1 or t < 10: logger.log( iteration=t, loss=hinge_loss(a, b, x_avg, 0), train_err=error(a, b, x_avg), test_err=error(a_test, b_test, x_avg), ) eta_t = 1 / np.sqrt(t) grad = hinge_loss_grad(a_, b_, x, 0) theta[:d] = theta[:d] - eta_t * grad theta[d:] = theta[d:] + eta_t * grad w = softmax(theta) x = radius * (w[:d] - w[d:]) # averaging x_avg = (x_avg * (t - 1) + x) / t return x, logger
def train_adagrad( a: np.array, b: np.array, a_test: np.array, b_test: np.array, T: int, radius: float, seed=0, ): np.random.seed(seed) # add a column of ones to the input data, to avoid having to define an explicit bias in our weights a = np.concatenate([a, np.ones((len(a), 1))], axis=1) a_test = np.concatenate([a_test, np.ones((len(a_test), 1))], axis=1) n, d = a.shape # the weights of our SVM classifier # x is the averaged weights (online to batch conversion) x_avg = np.zeros(d) x = np.zeros(d) y = np.zeros(d) DELTA = 1e-5 S = np.ones(d) * DELTA logger = Logger(algo_tag=rf"Adagrad - $z={radius}$") I = np.random.randint(0, n, T) for t in tqdm(range(1, T + 1)): # pick random sample i = I[t - 1] a_, b_ = a[i][np.newaxis, :], np.array([b[i]]) # log our results (before training, to match plots from the class) k = max(int(np.log10(t)), 0) if t % int(10**k) == 1 or t < 10: logger.log( iteration=t, loss=hinge_loss(a, b, x_avg, 0), train_err=error(a, b, x_avg), test_err=error(a_test, b_test, x_avg), ) grad = hinge_loss_grad(a_, b_, x, 0) S += grad**2 D = np.diag(np.sqrt(S)) D_inv = np.diag(1 / np.sqrt(S)) y = x - D_inv.dot(grad) x, d_0, theta = l1_ball_proj_weighted(y, radius, np.diag(D)) # averaging x_avg = (x_avg * (t - 1) + x) / t return x, logger
def train_sgd( a: np.array, b: np.array, a_test: np.array, b_test: np.array, T: int, alpha: float, return_avg=True, seed=0, use_logger=True, ): np.random.seed(seed) # add a column of ones to the input data, to avoid having to define an explicit bias in our weights a = np.concatenate([a, np.ones((len(a), 1))], axis=1) a_test = np.concatenate([a_test, np.ones((len(a_test), 1))], axis=1) n, d = a.shape # x_avg is the averaged weights (online to batch conversion) # x is weight (online version) x_avg = np.zeros(d) x = np.zeros(d) if use_logger: logger = Logger(algo_tag=rf"SGD - {'x_avg' if return_avg else 'x_T'}") t0 = perf_counter_ns() I = np.random.randint(0, n, T) for t in tqdm(range(1, T + 1)): # pick random sample i = I[t - 1] a_, b_ = a[i][np.newaxis, :], np.array([b[i]]) if alpha == 0: # our problem is convex (as the hinge loss is a convex function) eta_t = 1 / np.sqrt(t) else: eta_t = 1 / (alpha * t) # log our results (before training, to match plots from the class) k = max(int(np.log10(t)), 0) if (t % int(10**k) == 1 or t < 10) and use_logger: logger.log( iteration=t, loss=hinge_loss(a, b, x_avg, alpha), train_err=error(a, b, x_avg), test_err=error(a_test, b_test, x_avg), eta_t=eta_t, time_elapsed=(perf_counter_ns() - t0) / 1e9, ) grad = hinge_loss_grad(a_, b_, x, alpha) x = x - eta_t * grad # averaging if return_avg: x_avg = (x_avg * (t - 1) + x) / t else: x_avg = x dt = (perf_counter_ns() - t0) / 1e9 # execution time in sec if use_logger: return x_avg, logger else: return dt, T, error(a_test, b_test, x)