Exemple #1
0
def adult_benchmark():
    data = Dataset.load('../data/adult.csv', '../data/adult-domain.json')

    projections = [('occupation', 'race', 'capital-loss'),
                   ('occupation', 'sex', 'native-country'),
                   ('marital-status', 'relationship', 'income>50K'),
                   ('age', 'education-num', 'sex'),
                   ('workclass', 'education-num', 'occupation'),
                   ('marital-status', 'occupation', 'income>50K'),
                   ('race', 'native-country', 'income>50K'),
                   ('occupation', 'capital-gain', 'income>50K'),
                   ('marital-status', 'hours-per-week', 'income>50K'),
                   ('workclass', 'race', 'capital-gain'),
                   ('marital-status', 'relationship', 'capital-gain'),
                   ('workclass', 'education-num', 'capital-gain'),
                   ('education-num', 'relationship', 'race'),
                   ('fnlwgt', 'hours-per-week', 'income>50K'),
                   ('workclass', 'sex', 'native-country')]

    lookup = {}
    for attr in data.domain:
        n = data.domain.size(attr)
        lookup[attr] = workload.Identity(n)

    lookup['age'] = workload.Prefix(85)
    lookup['fnlwgt'] = workload.Prefix(100)
    lookup['capital-gain'] = workload.Prefix(100)
    lookup['capital-loss'] = workload.Prefix(100)
    lookup['hours-per-week'] = workload.Prefix(99)

    workloads = []

    for proj in projections:
        W = workload.Kronecker([lookup[a] for a in proj])
        workloads.append((proj, W))

    return data, workloads
Exemple #2
0
def randomKway(name, number, marginal, seed=0):
    path = "Datasets/{}.csv".format(name)
    domain = "Datasets/{}-domain.json".format(name)
    data = Dataset.load(path, domain)
    return data, randomKwayData(data, number, marginal, seed)
    pb_path+=str(i)
    pb_path+=".csv"
    print(pb_path)
    syn_data_privbayes = Dataset.load(pb_path, domain)

    dq_path=dualquerydata
    dq_path+=str(i)
    dq_path+=".csv"
    print(dq_path)
    syn_data_dualquery= Dataset.load(dq_path, domain)
    '''
    gm_path = gmdata
    gm_path += str(i + 1)
    gm_path += " .csv"
    print(gm_path)
    syn_data_r = Dataset.load(gm_path, domain)

    # err_pb = []
    # err_dq = []
    err_r = []
    print("ss")
    for p, W in workload:
        true = W.dot(data.project(p).datavector())
        #    print(data.project(p).datavector())
        #    pb = W.dot(syn_data_privbayes.project(p).datavector())
        #   print(syn_data_privbayes.project(p).datavector())
        #    dq_data=syn_data_dualquery.project(p).datavector()
        #    dq_data*=total/dq_data.sum()
        #   dq = W.dot(dq_data)
        #  print(syn_data_dualquery.project(p).datavector())
        r = W.dot(syn_data_r.project(p).datavector())
Exemple #4
0
                        help='bounded or unbounded privacy definition')
    parser.add_argument('--frequency', type=int, help='logging frequency')
    parser.add_argument('--seed', type=int, help='random seed')
    parser.add_argument('--save', type=str, help='path to save results')
    parser.add_argument('--load',
                        type=str,
                        help='path to load results from (skips experiment)')
    parser.add_argument('--plot', type=str, help='path to save plot')

    parser.set_defaults(**default_params())
    args = parser.parse_args()

    if args.load:
        results = pickle.load(open(args.load, 'rb'))
    else:
        data = Dataset.load('../data/adult.csv', '../data/adult-domain.json')
        projections = [['race', 'capital-loss', 'income>50K'],
                       ['marital-status', 'capital-gain', 'income>50K'],
                       ['race', 'native-country', 'income>50K'],
                       ['workclass', 'sex', 'hours-per-week'],
                       ['fnlwgt', 'marital-status', 'relationship'],
                       ['workclass', 'education-num', 'occupation'],
                       ['age', 'relationship', 'sex'],
                       ['occupation', 'sex', 'hours-per-week'],
                       ['occupation', 'relationship', 'income>50K']]

        measurements = []
        for p in projections:
            Q = sparse.eye(data.domain.size(p))
            measurements.append((p, Q))