def adult_benchmark(): data = Dataset.load('../data/adult.csv', '../data/adult-domain.json') projections = [('occupation', 'race', 'capital-loss'), ('occupation', 'sex', 'native-country'), ('marital-status', 'relationship', 'income>50K'), ('age', 'education-num', 'sex'), ('workclass', 'education-num', 'occupation'), ('marital-status', 'occupation', 'income>50K'), ('race', 'native-country', 'income>50K'), ('occupation', 'capital-gain', 'income>50K'), ('marital-status', 'hours-per-week', 'income>50K'), ('workclass', 'race', 'capital-gain'), ('marital-status', 'relationship', 'capital-gain'), ('workclass', 'education-num', 'capital-gain'), ('education-num', 'relationship', 'race'), ('fnlwgt', 'hours-per-week', 'income>50K'), ('workclass', 'sex', 'native-country')] lookup = {} for attr in data.domain: n = data.domain.size(attr) lookup[attr] = workload.Identity(n) lookup['age'] = workload.Prefix(85) lookup['fnlwgt'] = workload.Prefix(100) lookup['capital-gain'] = workload.Prefix(100) lookup['capital-loss'] = workload.Prefix(100) lookup['hours-per-week'] = workload.Prefix(99) workloads = [] for proj in projections: W = workload.Kronecker([lookup[a] for a in proj]) workloads.append((proj, W)) return data, workloads
def randomKway(name, number, marginal, seed=0): path = "Datasets/{}.csv".format(name) domain = "Datasets/{}-domain.json".format(name) data = Dataset.load(path, domain) return data, randomKwayData(data, number, marginal, seed)
pb_path+=str(i) pb_path+=".csv" print(pb_path) syn_data_privbayes = Dataset.load(pb_path, domain) dq_path=dualquerydata dq_path+=str(i) dq_path+=".csv" print(dq_path) syn_data_dualquery= Dataset.load(dq_path, domain) ''' gm_path = gmdata gm_path += str(i + 1) gm_path += " .csv" print(gm_path) syn_data_r = Dataset.load(gm_path, domain) # err_pb = [] # err_dq = [] err_r = [] print("ss") for p, W in workload: true = W.dot(data.project(p).datavector()) # print(data.project(p).datavector()) # pb = W.dot(syn_data_privbayes.project(p).datavector()) # print(syn_data_privbayes.project(p).datavector()) # dq_data=syn_data_dualquery.project(p).datavector() # dq_data*=total/dq_data.sum() # dq = W.dot(dq_data) # print(syn_data_dualquery.project(p).datavector()) r = W.dot(syn_data_r.project(p).datavector())
help='bounded or unbounded privacy definition') parser.add_argument('--frequency', type=int, help='logging frequency') parser.add_argument('--seed', type=int, help='random seed') parser.add_argument('--save', type=str, help='path to save results') parser.add_argument('--load', type=str, help='path to load results from (skips experiment)') parser.add_argument('--plot', type=str, help='path to save plot') parser.set_defaults(**default_params()) args = parser.parse_args() if args.load: results = pickle.load(open(args.load, 'rb')) else: data = Dataset.load('../data/adult.csv', '../data/adult-domain.json') projections = [['race', 'capital-loss', 'income>50K'], ['marital-status', 'capital-gain', 'income>50K'], ['race', 'native-country', 'income>50K'], ['workclass', 'sex', 'hours-per-week'], ['fnlwgt', 'marital-status', 'relationship'], ['workclass', 'education-num', 'occupation'], ['age', 'relationship', 'sex'], ['occupation', 'sex', 'hours-per-week'], ['occupation', 'relationship', 'income>50K']] measurements = [] for p in projections: Q = sparse.eye(data.domain.size(p)) measurements.append((p, Q))