Beispiel #1
0
def main(args):
    # load the dataset
    fpath = "../../../Experiment/Dataset/dat/{0}.dat".format(args.dname)
    X, y = load_dat(fpath, minmax=(0, 1), normalize=False, bias_term=True)
    N = X.shape[0]
    eps_total = args.eps
    delta = args.delta
    obj_clip = args.obj_clip
    grad_clip = args.grad_clip

    # for svm only
    y[y < 1] = -1

    rho = dp_to_zcdp(eps_total, delta)
    print "rho = {:.5f}".format(rho)

    start_time = time.clock()
    w = agd_rho(X,
                y,
                rho,
                eps_total,
                delta,
                svm_grad,
                svm_loss,
                svm_test,
                obj_clip,
                grad_clip,
                reg_coeff=0.01,
                exp_dec=args.exp_dec,
                verbose=True)
    print "time = ", time.clock() - start_time
    loss = svm_loss(w, X, y) / N
    acc = svm_test(w, X, y)
    print "loss: {:.5f}\t  acc: {:5.2f}".format(loss, acc * 100)
Beispiel #2
0
def main(args):
    fpath = "./dataset/{0}.dat".format(args.dname)
    X, y = load_dat(fpath, minmax=(0, 1), bias_term=True)
    N, dim = X.shape

    y[y < 1] = -1

    nrep = args.rep
    T = [1, 100, 1000, 10000, 20000]
    epsilon = []
    nT = len(T)
    learning_rate = 0.05
    sigma = 4  # fixed sigma as in MA paper

    K = 5  # 5-folds cross-validation
    cv_rep = 2
    k = 0
    acc = np.zeros((nT, nrep, K*cv_rep))
    obj = np.zeros((nT, nrep, K*cv_rep))

    rkf = RepeatedKFold(n_splits=K, n_repeats=cv_rep)

    for train, test in rkf.split(X):
        train_X, train_y = X[train, :], y[train]
        test_X, test_y = X[test, :], y[test]

        n_train = train_X.shape[0]
        batch_size = int(np.sqrt(n_train) + 10)

        if args.batch_size > 0:
            batch_size = args.batch_size

        for i in range(nT):
            for j in range(nrep):
                sol, eps = dpsgd_ma(train_X, train_y, svm_grad, sigma,
                                    T[i], learning_rate, batch_size,
                                    reg_coeff=args.reg_coeff,
                                    delta=args.delta)
                obj[i, j, k] = svm_loss(sol, train_X, train_y) / n_train
                acc[i, j, k] = svm_test(sol, test_X, test_y) * 100.0
                # print "acc[{},{},{}]={}".format(i, j, k, acc[i, j, k])

                if j == 0 and k == 0:
                    epsilon.append(eps)
        k += 1

    avg_acc = np.vstack([np.array(epsilon),
                         np.mean(acc, axis=(1, 2)),
                         np.std(acc, axis=(1, 2))])
    avg_obj = np.vstack([np.array(epsilon),
                         np.mean(obj, axis=(1, 2)),
                         np.std(obj, axis=(1, 2))])

    filename = "sgdma_svm_{0}".format(args.dname)
    np.savetxt("{0}_acc.out".format(filename), avg_acc, fmt='%.5f')
    np.savetxt("{0}_obj.out".format(filename), avg_obj, fmt='%.5f')
Beispiel #3
0
def main(args):
    fpath = "./dataset/{0}.dat".format(args.dname)
    X, y = load_dat(fpath, minmax=(0, 1), bias_term=True)
    y[y < 1] = -1
    N, dim = X.shape

    nrep = args.rep
    epsilon = [0.05, 0.1, 0.2, 0.4, 0.8, 1.6]
    neps = len(epsilon)
    learning_rate = 0.1

    K = 5  # 5-folds cross-validation
    cv_rep = 3
    k = 0
    acc = np.zeros((neps, nrep, K * cv_rep))
    obj = np.zeros((neps, nrep, K * cv_rep))

    rkf = RepeatedKFold(n_splits=K, n_repeats=cv_rep)

    for train, test in rkf.split(X):
        train_X, train_y = X[train, :], y[train]
        test_X, test_y = X[test, :], y[test]

        n_train = train_X.shape[0]
        batch_size = int(np.sqrt(n_train) + 10)

        if args.batch_size > 0:
            batch_size = args.batch_size

        for i, eps in enumerate(epsilon):
            # number of iterations
            T = max(int(round((n_train * eps) / 500.0)), 1)
            if args.T > 0:
                T = int(args.T * eps)

            for j in range(nrep):
                sol = dpsgd_adv(train_X,
                                train_y,
                                svm_grad,
                                eps,
                                T,
                                learning_rate,
                                batch_size,
                                reg_coeff=args.reg_coeff)
                obj[i, j, k] = svm_loss(sol, train_X, train_y) / n_train
                acc[i, j, k] = svm_test(sol, test_X, test_y) * 100.0

        k += 1

    avg_acc = np.vstack([np.mean(acc, axis=(1, 2)), np.std(acc, axis=(1, 2))])
    avg_obj = np.vstack([np.mean(obj, axis=(1, 2)), np.std(obj, axis=(1, 2))])

    filename = "sgdadv_svm_{0}".format(args.dname)
    np.savetxt("{0}_acc.out".format(filename), avg_acc, fmt='%.5f')
    np.savetxt("{0}_obj.out".format(filename), avg_obj, fmt='%.5f')
Beispiel #4
0
def main(args):
    fpath = "./dataset/{0}.dat".format(args.dname)
    X, y = load_dat(fpath, minmax=(0, 1), bias_term=True)
    y[y < 1] = -1
    N, dim = X.shape

    nrep = args.rep
    delta = args.delta
    epsilon = [0.05, 0.1, 0.2, 0.4, 0.8, 1.6]
    neps = len(epsilon)

    K = 5  # 5-folds cross-validation
    cv_rep = 2
    k = 0
    acc = np.zeros((neps, nrep, K * cv_rep))
    obj = np.zeros((neps, nrep, K * cv_rep))

    rkf = RepeatedKFold(n_splits=K, n_repeats=cv_rep)

    for train, test in rkf.split(X):
        train_X, train_y = X[train, :], y[train]
        test_X, test_y = X[test, :], y[test]

        n_train = train_X.shape[0]

        for i, eps in enumerate(epsilon):
            # number of iterations
            r = max(int(round((n_train * eps) / 800.0)), 1)
            if args.T > 0:
                r = int(args.T * eps)

            rho = dp_to_zcdp(eps, delta)
            eps_iter = np.sqrt((2. * rho) / r)
            # eps_iter = eps / r

            for j in range(nrep):
                sol = privgene(train_X,
                               train_y,
                               eps_iter,
                               r,
                               svm_score,
                               C=10,
                               batch_size=args.batch_size)
                obj[i, j, k] = svm_loss(sol, train_X, train_y) / n_train
                acc[i, j, k] = svm_test(sol, test_X, test_y) * 100.0
                # print "acc[{},{},{}]={}".format(i, j, k, acc[i, j, k])

        k += 1

    avg_acc = np.vstack([np.mean(acc, axis=(1, 2)), np.std(acc, axis=(1, 2))])
    avg_obj = np.vstack([np.mean(obj, axis=(1, 2)), np.std(obj, axis=(1, 2))])

    filename = "pgene_svm_{0}".format(args.dname)
    np.savetxt("{0}_acc.out".format(filename), avg_acc, fmt='%.5f')
    np.savetxt("{0}_obj.out".format(filename), avg_obj, fmt='%.5f')
Beispiel #5
0
def main(args):
    fpath = "./dataset/{0}.dat".format(args.dname)
    X, y = load_dat(fpath, minmax=(0, 1), normalize=False, bias_term=True)
    N, dim = X.shape

    y[y < 0.5] = -1

    nrep = args.rep
    delta = args.delta
    L = args.L
    step_size = args.step_size
    T = args.T

    epsilon = [0.05, 0.1, 0.2, 0.4, 0.8, 1.6]
    neps = len(epsilon)

    K = 5  # 5-folds cross-validation
    cv_rep = 2
    k = 0
    acc = np.zeros((neps, nrep, K * cv_rep))
    obj = np.zeros((neps, nrep, K * cv_rep))

    rkf = RepeatedKFold(n_splits=K, n_repeats=cv_rep)

    for train, test in rkf.split(X):
        train_X, train_y = X[train, :], y[train]
        test_X, test_y = X[test, :], y[test]

        n_train = train_X.shape[0]

        for i, eps in enumerate(epsilon):
            for j in range(nrep):
                sol = outpert_gd(X,
                                 y,
                                 svm_grad,
                                 eps,
                                 T,
                                 L,
                                 step_size,
                                 delta=delta,
                                 reg_coeff=args.reg_coeff)

                obj[i, j, k] = svm_loss(sol, train_X, train_y) / n_train
                acc[i, j, k] = svm_test(sol, test_X, test_y) * 100.0
                # print "acc[{},{},{}]={}".format(i, j, k, acc[i, j, k])

        k += 1

    avg_acc = np.vstack([np.mean(acc, axis=(1, 2)), np.std(acc, axis=(1, 2))])
    avg_obj = np.vstack([np.mean(obj, axis=(1, 2)), np.std(obj, axis=(1, 2))])

    filename = "outpert_svm_{0}".format(args.dname)
    np.savetxt("{0}_acc.out".format(filename), avg_acc, fmt='%.5f')
    np.savetxt("{0}_obj.out".format(filename), avg_obj, fmt='%.5f')
Beispiel #6
0
    sigma = 4
    batch_size = 1000
    learning_rate = 0.05
    reg_coeff = 0.001

    print "SGD with moments accountant"
    for T in [1, 100, 1000, 10000, 20000]:
        w, eps = dpsgd_ma(X,
                          y,
                          svm_grad,
                          sigma,
                          T,
                          learning_rate,
                          batch_size,
                          reg_coeff=reg_coeff)
        loss = svm_loss(w, X, y) / N
        acc = svm_test(w, X, y)

        print "[T={:5d}] eps: {:.5f}\tloss: {:.5f}\tacc: {:5.2f}".format(
            T, eps, loss, acc * 100)

    print "\nSGD with advanced composition"
    for eps in [0.05, 0.1, 0.2, 0.4, 0.8, 1.6]:
        # used the same heuristic as in PrivGene
        T = max(int(round((N * eps) / 500.0)), 1)
        w = dpsgd_adv(X, y, svm_grad, eps, T, 0.1, batch_size)
        loss = svm_loss(w, X, y) / N
        acc = svm_test(w, X, y)

        print "eps: {:4.2f}\tloss: {:.5f}\tacc: {:5.2f}".format(
            eps, loss, acc * 100)