Example #1
0
def main(args):
    # input parameters
    eps = args.eps
    delta = args.delta

    # load the data
    X, y = load_dat("../dataset/{0}.dat".format(args.dsname))

    N, dim = X.shape

    # number of iterations
    r = max(int(round((N * eps) / 800.0)), 1)

    # privacy budget
    rho = dp_to_zcdp(eps, delta)
    eps_iter = sqrt((2. * rho) / r)

    print " - {0:22s}: {1}".format("r", r)
    print " - {0:22s}: {1}".format("eps_iter", eps_iter)

    score_func = logistic_score
    C = 1
    sol = privgene(X, y, eps_iter, r, score_func, C=C)
    acc = logistic_test(sol, X, y)
    print "Accuracy=", acc
Example #2
0
def main(args):
    fpath = "./dataset/{0}.dat".format(args.dname)
    X, y = load_dat(fpath, minmax=(0, 1), normalize=False, bias_term=True)
    N, dim = X.shape

    nrep = args.rep
    delta = args.delta
    obj_clip = args.obj_clip
    grad_clip = args.grad_clip

    epsilon = [0.05, 0.1, 0.2, 0.4, 0.8, 1.6]
    neps = len(epsilon)

    K = 5  # 5-folds cross-validation
    cv_rep = 2
    k = 0
    acc = np.zeros((neps, nrep, K * cv_rep))
    obj = np.zeros((neps, nrep, K * cv_rep))

    rkf = RepeatedKFold(n_splits=K, n_repeats=cv_rep)

    for train, test in rkf.split(X):
        train_X, train_y = X[train, :], y[train]
        test_X, test_y = X[test, :], y[test]

        n_train = train_X.shape[0]

        for i, eps in enumerate(epsilon):
            ipdb.set_trace()
            rho = dp_to_zcdp(eps, delta)

            for j in range(nrep):
                sol = agd(train_X,
                          train_y,
                          rho,
                          eps,
                          delta,
                          logistic_grad,
                          logistic_loss,
                          logistic_test,
                          obj_clip,
                          grad_clip,
                          reg_coeff=args.reg_coeff,
                          batch_size=args.batch_size,
                          exp_dec=args.exp_dec,
                          gamma=args.gamma,
                          verbose=True)

                obj[i, j, k] = logistic_loss(sol, train_X, train_y) / n_train
                acc[i, j, k] = logistic_test(sol, test_X, test_y) * 100.0
                # print "acc[{},{},{}]={}".format(i, j, k, acc[i, j, k])

        k += 1

    avg_acc = np.vstack([np.mean(acc, axis=(1, 2)), np.std(acc, axis=(1, 2))])
    avg_obj = np.vstack([np.mean(obj, axis=(1, 2)), np.std(obj, axis=(1, 2))])

    filename = "agd_logres_{0}".format(args.dname)
    np.savetxt("{0}_acc.out".format(filename), avg_acc, fmt='%.5f')
    np.savetxt("{0}_obj.out".format(filename), avg_obj, fmt='%.5f')
Example #3
0
def main(args):
    fpath = "./dataset/{0}.dat".format(args.dname)
    X, y = load_dat(fpath, minmax=(0, 1), bias_term=True)
    N, dim = X.shape

    nrep = args.rep
    delta = args.delta
    epsilon = [0.05, 0.1, 0.2, 0.4, 0.8, 1.6]
    neps = len(epsilon)

    K = 5  # 5-folds cross-validation
    cv_rep = 2
    k = 0
    acc = np.zeros((neps, nrep, K * cv_rep))
    obj = np.zeros((neps, nrep, K * cv_rep))

    rkf = RepeatedKFold(n_splits=K, n_repeats=cv_rep)

    for train, test in rkf.split(X):
        train_X, train_y = X[train, :], y[train]
        test_X, test_y = X[test, :], y[test]

        n_train = train_X.shape[0]

        for i, eps in enumerate(epsilon):
            # number of iterations
            r = max(int(round((n_train * eps) / 800.0)), 1)
            if args.T > 0:
                r = int(args.T * eps)
            rho = dp_to_zcdp(eps, delta)
            eps_iter = np.sqrt((2. * rho) / r)
            # eps_iter = eps / r

            for j in range(nrep):
                sol = privgene(train_X,
                               train_y,
                               eps_iter,
                               r,
                               logistic_score,
                               C=1,
                               batch_size=args.batch_size)

                obj[i, j, k] = logistic_loss(sol, train_X, train_y) / n_train
                acc[i, j, k] = logistic_test(sol, test_X, test_y) * 100.0
                # print "acc[{},{},{}]={}".format(i, j, k, acc[i, j, k])

        k += 1

    avg_acc = np.vstack([np.mean(acc, axis=(1, 2)), np.std(acc, axis=(1, 2))])
    avg_obj = np.vstack([np.mean(obj, axis=(1, 2)), np.std(obj, axis=(1, 2))])

    filename = "pgene_logres_{0}".format(args.dname)
    np.savetxt("{0}_acc.out".format(filename), avg_acc, fmt='%.5f')
    np.savetxt("{0}_obj.out".format(filename), avg_obj, fmt='%.5f')
Example #4
0
def main(args):
    fpath = "./dataset/{0}.dat".format(args.dname)
    X, y = load_dat(fpath, minmax=(0, 1), bias_term=True)
    N, dim = X.shape

    nrep = args.rep
    epsilon = [0.05, 0.1, 0.2, 0.4, 0.8, 1.6]
    neps = len(epsilon)
    learning_rate = 0.1

    K = 5  # 5-folds cross-validation
    cv_rep = 3
    k = 0
    acc = np.zeros((neps, nrep, K * cv_rep))
    obj = np.zeros((neps, nrep, K * cv_rep))

    rkf = RepeatedKFold(n_splits=K, n_repeats=cv_rep)

    for train, test in rkf.split(X):
        train_X, train_y = X[train, :], y[train]
        test_X, test_y = X[test, :], y[test]

        n_train = train_X.shape[0]
        batch_size = int(np.sqrt(n_train) + 10)
        if args.batch_size > 0:
            batch_size = args.batch_size

        for i, eps in enumerate(epsilon):
            # number of iterations
            T = max(int(round((n_train * eps) / 500.0)), 1)
            if args.T > 0:
                T = int(args.T * eps)

            for j in range(nrep):
                sol = dpsgd_adv(train_X,
                                train_y,
                                logistic_grad,
                                eps,
                                T,
                                learning_rate,
                                batch_size,
                                reg_coeff=args.reg_coeff)
                obj[i, j, k] = logistic_loss(sol, train_X, train_y) / n_train
                acc[i, j, k] = logistic_test(sol, test_X, test_y) * 100.0
                print("acc[{},{},{}]={}".format(i, j, k, acc[i, j, k]))

        k += 1

    avg_acc = np.vstack([np.mean(acc, axis=(1, 2)), np.std(acc, axis=(1, 2))])
    avg_obj = np.vstack([np.mean(obj, axis=(1, 2)), np.std(obj, axis=(1, 2))])

    filename = "sgdadv_logres_{0}".format(args.dname)
    np.savetxt("{0}_acc.out".format(filename), avg_acc, fmt='%.5f')
    np.savetxt("{0}_obj.out".format(filename), avg_obj, fmt='%.5f')
def main(args):
    fpath = "./dataset/{0}.dat".format(args.dname)
    X, y = load_dat(fpath, minmax=(0, 1), normalize=False, bias_term=True)
    N, dim = X.shape

    nrep = args.rep
    delta = args.delta
    L = args.L
    step_size = args.step_size
    T = args.T

    epsilon = [0.05, 0.1, 0.2, 0.4, 0.8, 1.6]
    neps = len(epsilon)

    K = 5  # 5-folds cross-validation
    cv_rep = 2
    k = 0
    acc = np.zeros((neps, nrep, K * cv_rep))
    obj = np.zeros((neps, nrep, K * cv_rep))

    rkf = RepeatedKFold(n_splits=K, n_repeats=cv_rep)

    for train, test in rkf.split(X):
        train_X, train_y = X[train, :], y[train]
        test_X, test_y = X[test, :], y[test]

        n_train = train_X.shape[0]

        for i, eps in enumerate(epsilon):
            for j in range(nrep):
                sol = outpert_gd(X,
                                 y,
                                 logistic_grad,
                                 eps,
                                 T,
                                 L,
                                 step_size,
                                 delta=delta,
                                 reg_coeff=args.reg_coeff)

                obj[i, j, k] = logistic_loss(sol, train_X, train_y) / n_train
                acc[i, j, k] = logistic_test(sol, test_X, test_y) * 100.0
                # print "acc[{},{},{}]={}".format(i, j, k, acc[i, j, k])

        k += 1

    avg_acc = np.vstack([np.mean(acc, axis=(1, 2)), np.std(acc, axis=(1, 2))])
    avg_obj = np.vstack([np.mean(obj, axis=(1, 2)), np.std(obj, axis=(1, 2))])

    filename = "outpert_logres_{0}".format(args.dname)
    np.savetxt("{0}_acc.out".format(filename), avg_acc, fmt='%.5f')
    np.savetxt("{0}_obj.out".format(filename), avg_obj, fmt='%.5f')
Example #6
0
def main(args):
    # load the dataset
    fpath = "./dataset/{0}.dat".format(args.dname)
    X, y = load_dat(fpath, minmax=(0, 1), normalize=False, bias_term=True)
    N = X.shape[0]

    delta = args.delta
    obj_clip = args.obj_clip
    grad_clip = args.grad_clip

    # variables to change
    epsilon = [0.1, 0.5, 1.0]
    splits = [20, 40, 60, 80, 100, 120]
    n_eps = len(epsilon)
    n_rep = 10
    n_splits = len(splits)

    loss = np.zeros((n_eps, n_splits, n_rep))
    acc = np.zeros((n_eps, n_splits, n_rep))

    for k, eps in enumerate(epsilon):
        rho = dp_to_zcdp(eps, delta)
        print "rho = {:.5f}".format(rho)

        for i, split in enumerate(splits):
            for j in range(n_rep):
                w = agd(X,
                        y,
                        rho,
                        eps,
                        delta,
                        logistic_grad,
                        logistic_loss,
                        logistic_test,
                        obj_clip,
                        grad_clip,
                        reg_coeff=0.0,
                        splits=split)
                loss[k, i, j] = logistic_loss(w, X, y) / N
                acc[k, i, j] = logistic_test(w, X, y)

    avg_loss = np.mean(loss, axis=2)
    avg_acc = np.mean(acc, axis=2)

    np.savetxt('varying_splits_{0}_acc.out'.format(args.dname),
               avg_acc,
               fmt='%.5f')
    np.savetxt('varying_splits_{0}_obj.out'.format(args.dname),
               avg_loss,
               fmt='%.5f')
Example #7
0
def main(args):
    # load the dataset
    fpath = "./dataset/{0}.dat".format(args.dname)
    X, y = load_dat(fpath, minmax=(0, 1), normalize=False, bias_term=True)
    N = X.shape[0]

    delta = args.delta
    obj_clip = args.obj_clip
    grad_clip = args.grad_clip

    # variables to change
    epsilon = [0.1, 0.2, 0.4, 0.8, 1.6]
    gammas = [0.1, 0.2, 0.3, 0.4, 0.5]
    n_eps = len(epsilon)
    n_rep = 10
    n_gammas = len(gammas)

    loss = np.zeros((n_gammas, n_eps, n_rep))
    acc = np.zeros_like(loss)

    for i, gamma in enumerate(gammas):
        for j, eps in enumerate(epsilon):
            rho = dp_to_zcdp(eps, delta)
            print "rho = {:.5f}".format(rho)

            for k in range(n_rep):
                w = agd(X,
                        y,
                        rho,
                        eps,
                        delta,
                        logistic_grad,
                        logistic_loss,
                        logistic_test,
                        obj_clip,
                        grad_clip,
                        reg_coeff=0.0,
                        gamma=gamma)
                loss[i, j, k] = logistic_loss(w, X, y) / N
                acc[i, j, k] = logistic_test(w, X, y)

    avg_loss = np.mean(loss, axis=2)
    avg_acc = np.mean(acc, axis=2)

    np.savetxt('varying_gamma_{0}_acc.out'.format(args.dname),
               avg_acc,
               fmt='%.5f')
    np.savetxt('varying_gamma_{0}_obj.out'.format(args.dname),
               avg_loss,
               fmt='%.5f')
Example #8
0
def main(args):
    fpath = "../../../Experiment/Dataset/dat/{0}.dat".format(args.dname)
    X, y = load_dat(fpath, minmax=(0, 1), normalize=False, bias_term=True)
    N, dim = X.shape
    print("({}, {})".format(N, dim))

    delta = args.delta
    Ts = [20, 30, 40, 50, 60, 70]
    epsilon = [0.05, 0.1, 0.2, 0.4, 0.8, 1.6]
    L = args.L
    step_size = args.step_size
    nT = len(Ts)
    nrep = args.rep
    neps = len(epsilon)

    acc = np.zeros((nT, neps, nrep))
    loss = np.zeros_like(acc)

    for i, T in enumerate(Ts):
        for j, eps in enumerate(epsilon):
            for k in range(nrep):
                w_priv = outpert_gd(X,
                                    y,
                                    logistic_grad,
                                    eps,
                                    T,
                                    L,
                                    step_size,
                                    delta=delta,
                                    reg_coeff=args.reg_coeff)

                loss[i, j, k] = logistic_loss(w_priv, X, y) / N
                acc[i, j, k] = logistic_test(w_priv, X, y) * 100

    avg_obj = np.mean(loss, axis=2)
    avg_acc = np.mean(acc, axis=2)

    filename = "outpert_logres_{0}_T".format(args.dname)
    np.savetxt("{0}_acc.out".format(filename), avg_acc, fmt='%.5f')
    np.savetxt("{0}_obj.out".format(filename), avg_obj, fmt='%.5f')
Example #9
0
def main(args):
    fpath = "./dataset/{0}.dat".format(args.dname)
    X, y = load_dat(fpath, minmax=(0, 1), bias_term=True)
    N, dim = X.shape

    nrep = args.rep
    T = [1, 100, 1000, 10000, 20000]
    epsilon = []
    nT = len(T)
    learning_rate = 0.05
    sigma = 4  # fixed sigma as in MA paper

    K = 5  # 5-folds cross-validation
    cv_rep = 2
    k = 0
    acc = np.zeros((nT, nrep, K * cv_rep))
    obj = np.zeros((nT, nrep, K * cv_rep))

    rkf = RepeatedKFold(n_splits=K, n_repeats=cv_rep)

    for train, test in rkf.split(X):
        train_X, train_y = X[train, :], y[train]
        test_X, test_y = X[test, :], y[test]

        n_train = train_X.shape[0]
        batch_size = int(np.sqrt(n_train) + 10)

        if args.batch_size > 0:
            batch_size = args.batch_size

        for i in range(nT):
            for j in range(nrep):
                sol, eps = dpsgd_ma(train_X,
                                    train_y,
                                    logistic_grad,
                                    sigma,
                                    T[i],
                                    learning_rate,
                                    batch_size,
                                    delta=args.delta)
                obj[i, j, k] = logistic_loss(sol, train_X, train_y) / n_train
                acc[i, j, k] = logistic_test(sol, test_X, test_y) * 100.0
                # print "acc[{},{},{}]={}".format(i, j, k, acc[i, j, k])

                if j == 0 and k == 0:
                    epsilon.append(eps)
        k += 1

    avg_acc = np.vstack([
        np.array(epsilon),
        np.mean(acc, axis=(1, 2)),
        np.std(acc, axis=(1, 2))
    ])
    avg_obj = np.vstack([
        np.array(epsilon),
        np.mean(obj, axis=(1, 2)),
        np.std(obj, axis=(1, 2))
    ])

    filename = "sgdma_logres_{0}".format(args.dname)
    np.savetxt("{0}_acc.out".format(filename), avg_acc, fmt='%.5f')
    np.savetxt("{0}_obj.out".format(filename), avg_obj, fmt='%.5f')