def stochastic_pegasos(X: np.array, y: np.array, pos_class: int, random_seed=None) -> np.ndarray: n, d = X.shape labels = ((y == pos_class) * 2 - 1) # TODO: make parameters max_iter = 800 num_to_avg = 400 lambd = 0.1 k = 1 if random_seed is not None: np.random.seed(random_seed) random_ids = np.random.choice(n, size=max_iter * k) avg_scale = min(max_iter, num_to_avg) avg_wv = WeightVector(d) wv = WeightVector(d) wvs = [] for i in tqdm(range(max_iter)): x_ids = random_ids[i * k:(i + 1) * k] eta = 1. / (lambd * (i + 2)) grad_ixs, grad_weights = [], [] for j in x_ids: x = X.getrow(j) pred = wv.sparse_dot(x) label = labels[j] if label * pred < 1: grad_ixs.append(j) grad_weights.append(eta * label / k) # Scale wv wv.scale(1. - eta * lambd) # Add sub-gradients for grad_ix, grad_w in zip(grad_ixs, grad_weights): wv.sparse_add(X.getrow(grad_ix), grad_w) # Projection step wv.scale(min(1., 1. / np.sqrt(lambd * wv.get_snorm()))) # Average weights if i >= max_iter - num_to_avg: avg_wv.add(wv, 1. / avg_scale) if (i + 1) % 1 == 0: wvs.append(avg_wv.a * avg_wv.v) else: if (i + 1) % 1 == 0: wvs.append(wv.a * wv.v) return avg_wv.a * avg_wv.v
def multi_pegasos(X: np.array, y: np.array, lasso_svm=True, lsh_ann=False, random_seed=None) -> Tuple[WeightMatrix, Tuple]: n, d = X.shape # TODO: make parameters max_iter = 25 eta0 = 0.1 eta_decay_rate = 0.02 if lasso_svm: k = 100 * int(np.sqrt(n_classes)) lambd = 1. else: k = 100 * int(np.sqrt(n_classes)) lambd = 1. W = WeightMatrix((n_classes, d)) # Wyx = WeightVector(n) # amax1 = BruteforceArgmax(W) if lsh_ann: amax2 = ANNArgmax(n_classes, num_threads, LSH=True, n_features=d, hash_length=2048) else: amax2 = ANNArgmax(n_classes, num_threads) if random_seed is not None: np.random.seed(random_seed) if use_class_sampling: class_uniform_p = 1. / (len(classes_cnt[classes_cnt != 0]) * classes_cnt[y_train]) random_ids = np.random.choice(n, size=max_iter * k, p=class_uniform_p) else: random_ids = np.random.choice(n, size=max_iter * k) # avg_scale = min(max_iter, num_to_avg) # avg_wv = WeightVector(d) amax_multiplier = 1. learning_time = 0. rs_stats = collections.Counter() ys_stats = collections.Counter() with open("log_%s_%d.txt" % (dataset_filename, os.getpid()), "w") as fout: fout.write( "i,learning_time,maf1,mif1,amax_multiplier,nnz_sum,sparsity\n") # a, b = 0., 0. for i in tqdm(range(max_iter)): iter_start = time.time() x_ids = random_ids[i * k:(i + 1) * k] xs = X[x_ids] eta = eta0 / (1 + eta_decay_rate * i) ys = y[x_ids] # rs1 = amax1.query(xs, ys) rs2 = amax2.query(xs, ys) # keks1 = np.array([W.sparse_dot(r_, x_) for r_, x_ in zip(rs1, xs)]) # keks2 = np.array([W.sparse_dot(r_, x_) for r_, x_ in zip(rs2, xs)]) # kek = (keks1 - keks2) # assert np.all(kek >= -1e-9) # print(ys, rs1, rs2) # if np.any(kek >= 1e-9): # # TODO: хотелось бы понять, почему query иногда не "видит" всех векторов в индексе # print("wombat") # a += np.sum(kek <= 1e-9) # b += xs.shape[0] # print("Accuracy score: %.6f" % (a / b)) rs = rs2 grad_ixs, grad_weights = [], [] # Collect class stats # rs_stats.update(rs) # ys_stats.update(ys) for j_, y_, r_, x_ in zip(x_ids, ys, rs, xs): if use_dummy_loss: loss = 1 else: # loss = max(0, 1 + (-dr) - Wyx.elem_get(j_)) # TODO: use wrx from dists wrx = W.sparse_dot(r_, x_) wyx = W.sparse_dot(y_, x_) loss = 1 + wrx - wyx if loss > 0: grad_ixs.append((y_, j_)) grad_weights.append(+eta / k) grad_ixs.append((r_, j_)) grad_weights.append(-eta / k) # Scale weight matrix and Wyx cache matrix if not lasso_svm: iter_scale = 1. - eta * lambd W.scale(iter_scale) amax_multiplier *= iter_scale # Wyx.scale(iter_scale) # Add sub-gradients and project rows onto a sphere of r=1 amax_update = {} for (class_ix, obj_ix), grad_w in zip(grad_ixs, grad_weights): obj = X.getrow(obj_ix) upd = W.sparse_add(class_ix, obj, grad_w) # Incrementally update Wyx (<w_yk, xk>) cache matrix # for x_ix in classes_objects[class_ix]: # Wyx.elem_add(x_ix, sparse_sparse_dot(X.getrow(x_ix), obj) * grad_w) upd.data /= amax_multiplier amax_update[class_ix] = upd # Do soft thresholding for lasso SVM if lasso_svm: W_ixs = list(set(ys) | set(rs)) sparsity = W.nnz / W.dim[0] / W.dim[1] th = gamma * n_classes / len(W_ixs) * lambd * eta if th > 0: for class_ix in W_ixs: upd = W.soft_threshold(class_ix, th) amax_update[class_ix] = upd # Normalize weight matrix and Wyx cache matrix if not lasso_svm: # Projection step iter_norm = min(1., 1. / np.sqrt(lambd * W.snorm)) W.scale(iter_norm) amax_multiplier *= iter_norm # Wyx.scale(iter_norm) # for class_ix, new_val in amax_update.items(): # snorm = np.dot(new_val.data, new_val.data) # new_norm = min(1., 1. / np.sqrt(lambd * snorm)) # amax_update[class_ix] *= new_norm if len(amax_update) > 0: class_ixs = np.array(list(amax_update.keys())) new_values = ss.vstack(list(amax_update.values())) # print(class_ixs) # amax1.update(class_ixs, new_values) amax2.update(class_ixs, new_values) iter_end = time.time() learning_time += iter_end - iter_start if i % 100500 == 0 and i > 0: # Save intermediate W matrix # with open("W_%s.dump" % dataset_filename, "wb") as fout: # pickle.dump(W, fout) # Create test index :( # TODO: incapsulation is broken -- fix # Calculate MaF1 and MiF1 heldout score nnz_sum = sum([x.nnz for x in W.m]) sparsity = nnz_sum / (len(W.m) * W.m[0].shape[1]) Ws = ss.vstack(W.m) * W.a WsT = None # ss.csr_matrix(Ws.T) y_pred_heldout = predict_NN(X_heldout, Ws, WsT, metric="cosine") # y_pred_heldout_dot = predict_NN(X_heldout, Ws, WsT, metric="dot") maf1 = f1_score(y_heldout, y_pred_heldout, average="macro") mif1 = f1_score(y_heldout, y_pred_heldout, average="micro") # maf1_dot = f1_score(y_heldout, y_pred_heldout_dot, average="macro") # mif1_dot = f1_score(y_heldout, y_pred_heldout_dot, average="micro") stats = [ i, learning_time, maf1, mif1, amax_multiplier, nnz_sum, sparsity ] with open("log_%s_%d.txt" % (dataset_filename, os.getpid()), "a") as fout: writer = csv.writer(fout) writer.writerow(stats) print("Learning time: %.1f" % learning_time) print("Non-zero elements: %d" % W.nnz) return W, (ys_stats, rs_stats)