def main(args): # input parameters eps = args.eps delta = args.delta # load the data X, y = load_dat("../dataset/{0}.dat".format(args.dsname)) N, dim = X.shape # number of iterations r = max(int(round((N * eps) / 800.0)), 1) # privacy budget rho = dp_to_zcdp(eps, delta) eps_iter = sqrt((2. * rho) / r) print " - {0:22s}: {1}".format("r", r) print " - {0:22s}: {1}".format("eps_iter", eps_iter) score_func = logistic_score C = 1 sol = privgene(X, y, eps_iter, r, score_func, C=C) acc = logistic_test(sol, X, y) print "Accuracy=", acc
def main(args): fpath = "./dataset/{0}.dat".format(args.dname) X, y = load_dat(fpath, minmax=(0, 1), normalize=False, bias_term=True) N, dim = X.shape nrep = args.rep delta = args.delta obj_clip = args.obj_clip grad_clip = args.grad_clip epsilon = [0.05, 0.1, 0.2, 0.4, 0.8, 1.6] neps = len(epsilon) K = 5 # 5-folds cross-validation cv_rep = 2 k = 0 acc = np.zeros((neps, nrep, K * cv_rep)) obj = np.zeros((neps, nrep, K * cv_rep)) rkf = RepeatedKFold(n_splits=K, n_repeats=cv_rep) for train, test in rkf.split(X): train_X, train_y = X[train, :], y[train] test_X, test_y = X[test, :], y[test] n_train = train_X.shape[0] for i, eps in enumerate(epsilon): ipdb.set_trace() rho = dp_to_zcdp(eps, delta) for j in range(nrep): sol = agd(train_X, train_y, rho, eps, delta, logistic_grad, logistic_loss, logistic_test, obj_clip, grad_clip, reg_coeff=args.reg_coeff, batch_size=args.batch_size, exp_dec=args.exp_dec, gamma=args.gamma, verbose=True) obj[i, j, k] = logistic_loss(sol, train_X, train_y) / n_train acc[i, j, k] = logistic_test(sol, test_X, test_y) * 100.0 # print "acc[{},{},{}]={}".format(i, j, k, acc[i, j, k]) k += 1 avg_acc = np.vstack([np.mean(acc, axis=(1, 2)), np.std(acc, axis=(1, 2))]) avg_obj = np.vstack([np.mean(obj, axis=(1, 2)), np.std(obj, axis=(1, 2))]) filename = "agd_logres_{0}".format(args.dname) np.savetxt("{0}_acc.out".format(filename), avg_acc, fmt='%.5f') np.savetxt("{0}_obj.out".format(filename), avg_obj, fmt='%.5f')
def main(args): fpath = "./dataset/{0}.dat".format(args.dname) X, y = load_dat(fpath, minmax=(0, 1), bias_term=True) N, dim = X.shape nrep = args.rep delta = args.delta epsilon = [0.05, 0.1, 0.2, 0.4, 0.8, 1.6] neps = len(epsilon) K = 5 # 5-folds cross-validation cv_rep = 2 k = 0 acc = np.zeros((neps, nrep, K * cv_rep)) obj = np.zeros((neps, nrep, K * cv_rep)) rkf = RepeatedKFold(n_splits=K, n_repeats=cv_rep) for train, test in rkf.split(X): train_X, train_y = X[train, :], y[train] test_X, test_y = X[test, :], y[test] n_train = train_X.shape[0] for i, eps in enumerate(epsilon): # number of iterations r = max(int(round((n_train * eps) / 800.0)), 1) if args.T > 0: r = int(args.T * eps) rho = dp_to_zcdp(eps, delta) eps_iter = np.sqrt((2. * rho) / r) # eps_iter = eps / r for j in range(nrep): sol = privgene(train_X, train_y, eps_iter, r, logistic_score, C=1, batch_size=args.batch_size) obj[i, j, k] = logistic_loss(sol, train_X, train_y) / n_train acc[i, j, k] = logistic_test(sol, test_X, test_y) * 100.0 # print "acc[{},{},{}]={}".format(i, j, k, acc[i, j, k]) k += 1 avg_acc = np.vstack([np.mean(acc, axis=(1, 2)), np.std(acc, axis=(1, 2))]) avg_obj = np.vstack([np.mean(obj, axis=(1, 2)), np.std(obj, axis=(1, 2))]) filename = "pgene_logres_{0}".format(args.dname) np.savetxt("{0}_acc.out".format(filename), avg_acc, fmt='%.5f') np.savetxt("{0}_obj.out".format(filename), avg_obj, fmt='%.5f')
def main(args): fpath = "./dataset/{0}.dat".format(args.dname) X, y = load_dat(fpath, minmax=(0, 1), bias_term=True) N, dim = X.shape nrep = args.rep epsilon = [0.05, 0.1, 0.2, 0.4, 0.8, 1.6] neps = len(epsilon) learning_rate = 0.1 K = 5 # 5-folds cross-validation cv_rep = 3 k = 0 acc = np.zeros((neps, nrep, K * cv_rep)) obj = np.zeros((neps, nrep, K * cv_rep)) rkf = RepeatedKFold(n_splits=K, n_repeats=cv_rep) for train, test in rkf.split(X): train_X, train_y = X[train, :], y[train] test_X, test_y = X[test, :], y[test] n_train = train_X.shape[0] batch_size = int(np.sqrt(n_train) + 10) if args.batch_size > 0: batch_size = args.batch_size for i, eps in enumerate(epsilon): # number of iterations T = max(int(round((n_train * eps) / 500.0)), 1) if args.T > 0: T = int(args.T * eps) for j in range(nrep): sol = dpsgd_adv(train_X, train_y, logistic_grad, eps, T, learning_rate, batch_size, reg_coeff=args.reg_coeff) obj[i, j, k] = logistic_loss(sol, train_X, train_y) / n_train acc[i, j, k] = logistic_test(sol, test_X, test_y) * 100.0 print("acc[{},{},{}]={}".format(i, j, k, acc[i, j, k])) k += 1 avg_acc = np.vstack([np.mean(acc, axis=(1, 2)), np.std(acc, axis=(1, 2))]) avg_obj = np.vstack([np.mean(obj, axis=(1, 2)), np.std(obj, axis=(1, 2))]) filename = "sgdadv_logres_{0}".format(args.dname) np.savetxt("{0}_acc.out".format(filename), avg_acc, fmt='%.5f') np.savetxt("{0}_obj.out".format(filename), avg_obj, fmt='%.5f')
def main(args): fpath = "./dataset/{0}.dat".format(args.dname) X, y = load_dat(fpath, minmax=(0, 1), normalize=False, bias_term=True) N, dim = X.shape nrep = args.rep delta = args.delta L = args.L step_size = args.step_size T = args.T epsilon = [0.05, 0.1, 0.2, 0.4, 0.8, 1.6] neps = len(epsilon) K = 5 # 5-folds cross-validation cv_rep = 2 k = 0 acc = np.zeros((neps, nrep, K * cv_rep)) obj = np.zeros((neps, nrep, K * cv_rep)) rkf = RepeatedKFold(n_splits=K, n_repeats=cv_rep) for train, test in rkf.split(X): train_X, train_y = X[train, :], y[train] test_X, test_y = X[test, :], y[test] n_train = train_X.shape[0] for i, eps in enumerate(epsilon): for j in range(nrep): sol = outpert_gd(X, y, logistic_grad, eps, T, L, step_size, delta=delta, reg_coeff=args.reg_coeff) obj[i, j, k] = logistic_loss(sol, train_X, train_y) / n_train acc[i, j, k] = logistic_test(sol, test_X, test_y) * 100.0 # print "acc[{},{},{}]={}".format(i, j, k, acc[i, j, k]) k += 1 avg_acc = np.vstack([np.mean(acc, axis=(1, 2)), np.std(acc, axis=(1, 2))]) avg_obj = np.vstack([np.mean(obj, axis=(1, 2)), np.std(obj, axis=(1, 2))]) filename = "outpert_logres_{0}".format(args.dname) np.savetxt("{0}_acc.out".format(filename), avg_acc, fmt='%.5f') np.savetxt("{0}_obj.out".format(filename), avg_obj, fmt='%.5f')
def main(args): # load the dataset fpath = "./dataset/{0}.dat".format(args.dname) X, y = load_dat(fpath, minmax=(0, 1), normalize=False, bias_term=True) N = X.shape[0] delta = args.delta obj_clip = args.obj_clip grad_clip = args.grad_clip # variables to change epsilon = [0.1, 0.5, 1.0] splits = [20, 40, 60, 80, 100, 120] n_eps = len(epsilon) n_rep = 10 n_splits = len(splits) loss = np.zeros((n_eps, n_splits, n_rep)) acc = np.zeros((n_eps, n_splits, n_rep)) for k, eps in enumerate(epsilon): rho = dp_to_zcdp(eps, delta) print "rho = {:.5f}".format(rho) for i, split in enumerate(splits): for j in range(n_rep): w = agd(X, y, rho, eps, delta, logistic_grad, logistic_loss, logistic_test, obj_clip, grad_clip, reg_coeff=0.0, splits=split) loss[k, i, j] = logistic_loss(w, X, y) / N acc[k, i, j] = logistic_test(w, X, y) avg_loss = np.mean(loss, axis=2) avg_acc = np.mean(acc, axis=2) np.savetxt('varying_splits_{0}_acc.out'.format(args.dname), avg_acc, fmt='%.5f') np.savetxt('varying_splits_{0}_obj.out'.format(args.dname), avg_loss, fmt='%.5f')
def main(args): # load the dataset fpath = "./dataset/{0}.dat".format(args.dname) X, y = load_dat(fpath, minmax=(0, 1), normalize=False, bias_term=True) N = X.shape[0] delta = args.delta obj_clip = args.obj_clip grad_clip = args.grad_clip # variables to change epsilon = [0.1, 0.2, 0.4, 0.8, 1.6] gammas = [0.1, 0.2, 0.3, 0.4, 0.5] n_eps = len(epsilon) n_rep = 10 n_gammas = len(gammas) loss = np.zeros((n_gammas, n_eps, n_rep)) acc = np.zeros_like(loss) for i, gamma in enumerate(gammas): for j, eps in enumerate(epsilon): rho = dp_to_zcdp(eps, delta) print "rho = {:.5f}".format(rho) for k in range(n_rep): w = agd(X, y, rho, eps, delta, logistic_grad, logistic_loss, logistic_test, obj_clip, grad_clip, reg_coeff=0.0, gamma=gamma) loss[i, j, k] = logistic_loss(w, X, y) / N acc[i, j, k] = logistic_test(w, X, y) avg_loss = np.mean(loss, axis=2) avg_acc = np.mean(acc, axis=2) np.savetxt('varying_gamma_{0}_acc.out'.format(args.dname), avg_acc, fmt='%.5f') np.savetxt('varying_gamma_{0}_obj.out'.format(args.dname), avg_loss, fmt='%.5f')
def main(args): fpath = "../../../Experiment/Dataset/dat/{0}.dat".format(args.dname) X, y = load_dat(fpath, minmax=(0, 1), normalize=False, bias_term=True) N, dim = X.shape print("({}, {})".format(N, dim)) delta = args.delta Ts = [20, 30, 40, 50, 60, 70] epsilon = [0.05, 0.1, 0.2, 0.4, 0.8, 1.6] L = args.L step_size = args.step_size nT = len(Ts) nrep = args.rep neps = len(epsilon) acc = np.zeros((nT, neps, nrep)) loss = np.zeros_like(acc) for i, T in enumerate(Ts): for j, eps in enumerate(epsilon): for k in range(nrep): w_priv = outpert_gd(X, y, logistic_grad, eps, T, L, step_size, delta=delta, reg_coeff=args.reg_coeff) loss[i, j, k] = logistic_loss(w_priv, X, y) / N acc[i, j, k] = logistic_test(w_priv, X, y) * 100 avg_obj = np.mean(loss, axis=2) avg_acc = np.mean(acc, axis=2) filename = "outpert_logres_{0}_T".format(args.dname) np.savetxt("{0}_acc.out".format(filename), avg_acc, fmt='%.5f') np.savetxt("{0}_obj.out".format(filename), avg_obj, fmt='%.5f')
def main(args): fpath = "./dataset/{0}.dat".format(args.dname) X, y = load_dat(fpath, minmax=(0, 1), bias_term=True) N, dim = X.shape nrep = args.rep T = [1, 100, 1000, 10000, 20000] epsilon = [] nT = len(T) learning_rate = 0.05 sigma = 4 # fixed sigma as in MA paper K = 5 # 5-folds cross-validation cv_rep = 2 k = 0 acc = np.zeros((nT, nrep, K * cv_rep)) obj = np.zeros((nT, nrep, K * cv_rep)) rkf = RepeatedKFold(n_splits=K, n_repeats=cv_rep) for train, test in rkf.split(X): train_X, train_y = X[train, :], y[train] test_X, test_y = X[test, :], y[test] n_train = train_X.shape[0] batch_size = int(np.sqrt(n_train) + 10) if args.batch_size > 0: batch_size = args.batch_size for i in range(nT): for j in range(nrep): sol, eps = dpsgd_ma(train_X, train_y, logistic_grad, sigma, T[i], learning_rate, batch_size, delta=args.delta) obj[i, j, k] = logistic_loss(sol, train_X, train_y) / n_train acc[i, j, k] = logistic_test(sol, test_X, test_y) * 100.0 # print "acc[{},{},{}]={}".format(i, j, k, acc[i, j, k]) if j == 0 and k == 0: epsilon.append(eps) k += 1 avg_acc = np.vstack([ np.array(epsilon), np.mean(acc, axis=(1, 2)), np.std(acc, axis=(1, 2)) ]) avg_obj = np.vstack([ np.array(epsilon), np.mean(obj, axis=(1, 2)), np.std(obj, axis=(1, 2)) ]) filename = "sgdma_logres_{0}".format(args.dname) np.savetxt("{0}_acc.out".format(filename), avg_acc, fmt='%.5f') np.savetxt("{0}_obj.out".format(filename), avg_obj, fmt='%.5f')