Ejemplo n.º 1
0
def stochastic_pegasos(X: np.array,
                       y: np.array,
                       pos_class: int,
                       random_seed=None) -> np.ndarray:
    n, d = X.shape

    labels = ((y == pos_class) * 2 - 1)

    # TODO: make parameters
    max_iter = 800
    num_to_avg = 400
    lambd = 0.1
    k = 1

    if random_seed is not None:
        np.random.seed(random_seed)
    random_ids = np.random.choice(n, size=max_iter * k)

    avg_scale = min(max_iter, num_to_avg)
    avg_wv = WeightVector(d)
    wv = WeightVector(d)
    wvs = []

    for i in tqdm(range(max_iter)):
        x_ids = random_ids[i * k:(i + 1) * k]
        eta = 1. / (lambd * (i + 2))
        grad_ixs, grad_weights = [], []
        for j in x_ids:
            x = X.getrow(j)
            pred = wv.sparse_dot(x)
            label = labels[j]
            if label * pred < 1:
                grad_ixs.append(j)
                grad_weights.append(eta * label / k)
        # Scale wv
        wv.scale(1. - eta * lambd)
        # Add sub-gradients
        for grad_ix, grad_w in zip(grad_ixs, grad_weights):
            wv.sparse_add(X.getrow(grad_ix), grad_w)
        # Projection step
        wv.scale(min(1., 1. / np.sqrt(lambd * wv.get_snorm())))
        # Average weights
        if i >= max_iter - num_to_avg:
            avg_wv.add(wv, 1. / avg_scale)
            if (i + 1) % 1 == 0:
                wvs.append(avg_wv.a * avg_wv.v)
        else:
            if (i + 1) % 1 == 0:
                wvs.append(wv.a * wv.v)

    return avg_wv.a * avg_wv.v
Ejemplo n.º 2
0
def multi_pegasos(X: np.array,
                  y: np.array,
                  lasso_svm=True,
                  lsh_ann=False,
                  random_seed=None) -> Tuple[WeightMatrix, Tuple]:
    n, d = X.shape

    # TODO: make parameters
    max_iter = 25
    eta0 = 0.1
    eta_decay_rate = 0.02

    if lasso_svm:
        k = 100 * int(np.sqrt(n_classes))
        lambd = 1.
    else:
        k = 100 * int(np.sqrt(n_classes))
        lambd = 1.

    W = WeightMatrix((n_classes, d))
    # Wyx = WeightVector(n)

    # amax1 = BruteforceArgmax(W)
    if lsh_ann:
        amax2 = ANNArgmax(n_classes,
                          num_threads,
                          LSH=True,
                          n_features=d,
                          hash_length=2048)
    else:
        amax2 = ANNArgmax(n_classes, num_threads)

    if random_seed is not None:
        np.random.seed(random_seed)
    if use_class_sampling:
        class_uniform_p = 1. / (len(classes_cnt[classes_cnt != 0]) *
                                classes_cnt[y_train])
        random_ids = np.random.choice(n, size=max_iter * k, p=class_uniform_p)
    else:
        random_ids = np.random.choice(n, size=max_iter * k)

    # avg_scale = min(max_iter, num_to_avg)
    # avg_wv = WeightVector(d)
    amax_multiplier = 1.

    learning_time = 0.

    rs_stats = collections.Counter()
    ys_stats = collections.Counter()

    with open("log_%s_%d.txt" % (dataset_filename, os.getpid()), "w") as fout:
        fout.write(
            "i,learning_time,maf1,mif1,amax_multiplier,nnz_sum,sparsity\n")

    # a, b = 0., 0.
    for i in tqdm(range(max_iter)):
        iter_start = time.time()
        x_ids = random_ids[i * k:(i + 1) * k]
        xs = X[x_ids]
        eta = eta0 / (1 + eta_decay_rate * i)

        ys = y[x_ids]
        # rs1 = amax1.query(xs, ys)
        rs2 = amax2.query(xs, ys)
        # keks1 = np.array([W.sparse_dot(r_, x_) for r_, x_ in zip(rs1, xs)])
        # keks2 = np.array([W.sparse_dot(r_, x_) for r_, x_ in zip(rs2, xs)])
        # kek = (keks1 - keks2)
        # assert np.all(kek >= -1e-9)
        # print(ys, rs1, rs2)
        # if np.any(kek >= 1e-9):
        #     # TODO: хотелось бы понять, почему query иногда не "видит" всех векторов в индексе
        #     print("wombat")
        # a += np.sum(kek <= 1e-9)
        # b += xs.shape[0]
        # print("Accuracy score: %.6f" % (a / b))
        rs = rs2
        grad_ixs, grad_weights = [], []

        # Collect class stats
        # rs_stats.update(rs)
        # ys_stats.update(ys)

        for j_, y_, r_, x_ in zip(x_ids, ys, rs, xs):
            if use_dummy_loss:
                loss = 1
            else:
                # loss = max(0, 1 + (-dr) - Wyx.elem_get(j_))
                # TODO: use wrx from dists
                wrx = W.sparse_dot(r_, x_)
                wyx = W.sparse_dot(y_, x_)
                loss = 1 + wrx - wyx
            if loss > 0:
                grad_ixs.append((y_, j_))
                grad_weights.append(+eta / k)
                grad_ixs.append((r_, j_))
                grad_weights.append(-eta / k)

        # Scale weight matrix and Wyx cache matrix
        if not lasso_svm:
            iter_scale = 1. - eta * lambd
            W.scale(iter_scale)
            amax_multiplier *= iter_scale
            # Wyx.scale(iter_scale)
        # Add sub-gradients and project rows onto a sphere of r=1
        amax_update = {}
        for (class_ix, obj_ix), grad_w in zip(grad_ixs, grad_weights):
            obj = X.getrow(obj_ix)
            upd = W.sparse_add(class_ix, obj, grad_w)
            # Incrementally update Wyx (<w_yk, xk>) cache matrix
            # for x_ix in classes_objects[class_ix]:
            #     Wyx.elem_add(x_ix, sparse_sparse_dot(X.getrow(x_ix), obj) * grad_w)
            upd.data /= amax_multiplier
            amax_update[class_ix] = upd
        # Do soft thresholding for lasso SVM
        if lasso_svm:
            W_ixs = list(set(ys) | set(rs))
            sparsity = W.nnz / W.dim[0] / W.dim[1]
            th = gamma * n_classes / len(W_ixs) * lambd * eta
            if th > 0:
                for class_ix in W_ixs:
                    upd = W.soft_threshold(class_ix, th)
                    amax_update[class_ix] = upd

        # Normalize weight matrix and Wyx cache matrix
        if not lasso_svm:
            # Projection step
            iter_norm = min(1., 1. / np.sqrt(lambd * W.snorm))
            W.scale(iter_norm)
            amax_multiplier *= iter_norm
            # Wyx.scale(iter_norm)
            # for class_ix, new_val in amax_update.items():
            #     snorm = np.dot(new_val.data, new_val.data)
            #     new_norm = min(1., 1. / np.sqrt(lambd * snorm))
            #     amax_update[class_ix] *= new_norm
        if len(amax_update) > 0:
            class_ixs = np.array(list(amax_update.keys()))
            new_values = ss.vstack(list(amax_update.values()))
            # print(class_ixs)
            # amax1.update(class_ixs, new_values)
            amax2.update(class_ixs, new_values)

        iter_end = time.time()
        learning_time += iter_end - iter_start

        if i % 100500 == 0 and i > 0:
            # Save intermediate W matrix
            # with open("W_%s.dump" % dataset_filename, "wb") as fout:
            #     pickle.dump(W, fout)
            # Create test index :(
            # TODO: incapsulation is broken -- fix
            # Calculate MaF1 and MiF1 heldout score
            nnz_sum = sum([x.nnz for x in W.m])
            sparsity = nnz_sum / (len(W.m) * W.m[0].shape[1])
            Ws = ss.vstack(W.m) * W.a
            WsT = None  # ss.csr_matrix(Ws.T)
            y_pred_heldout = predict_NN(X_heldout, Ws, WsT, metric="cosine")
            # y_pred_heldout_dot = predict_NN(X_heldout, Ws, WsT, metric="dot")
            maf1 = f1_score(y_heldout, y_pred_heldout, average="macro")
            mif1 = f1_score(y_heldout, y_pred_heldout, average="micro")
            # maf1_dot = f1_score(y_heldout, y_pred_heldout_dot, average="macro")
            # mif1_dot = f1_score(y_heldout, y_pred_heldout_dot, average="micro")
            stats = [
                i, learning_time, maf1, mif1, amax_multiplier, nnz_sum,
                sparsity
            ]
            with open("log_%s_%d.txt" % (dataset_filename, os.getpid()),
                      "a") as fout:
                writer = csv.writer(fout)
                writer.writerow(stats)

    print("Learning time: %.1f" % learning_time)
    print("Non-zero elements: %d" % W.nnz)
    return W, (ys_stats, rs_stats)