def GenAssocTestdata(params1, params2, irr_rand, assoc_testdata_count, csv_in, csv_out): """Read true values from csv_in and output encoded values on csv_out. Replicate assoc_testdata_count times. First value is a string, second is a bool. TODO: Generalize this. """ rows = [] for i, (true_value1, true_value2) in enumerate(csv_in): if i == 0: v1_name = true_value1 v2_name = true_value2 continue # skip header row rows.append((true_value1, true_value2)) # Use the same column names header = ('client', 'cohort', v1_name, v2_name) csv_out.writerow(header) n = assoc_testdata_count report_index = 0 for i in xrange(n): for v1, v2 in rows: client_str = 'c%d' % report_index # randint(a, b) gives i such that a <= i <= b cohort = random.randint(0, params1.num_cohorts - 1) string_encoder = rappor.Encoder(params1, cohort, client_str, irr_rand) bool_encoder = rappor.Encoder(params2, cohort, client_str, irr_rand) # Real users should call e.encode(). For testing purposes, we also want # the PRR. irr1 = string_encoder.encode(v1) # TODO: Convert to bool and encode with basic RAPPOR v2_int = int(v2) #print v2_int irr2 = bool_encoder.encode_bits(v2_int) irr1_str = rappor.bit_string(irr1, params1.num_bloombits) irr2_str = rappor.bit_string(irr2, params2.num_bloombits) csv_out.writerow((client_str, cohort, irr1_str, irr2_str)) report_index += 1
def testEncoder(self): """Expected bloom bits is computed as follows. f_bits = 0xfff0000f and mask_indices = 0x0ffff000 from testGetRapporMasksWithoutPRR() q_bits = 0xfffff0ff from mock_rand.randomness[] and how get_rand_bits works p_bits = 0x000ffff0 from -- do -- bloom_bits_array is 0x0000 0048 (3rd bit and 6th bit, from testSetBloomArray, are set) Bit arithmetic ends up computing bloom_bits_prr = 0x0ff00048 bloom_bits_irr= = 0x0ffffff8 """ params = copy.copy(self.typical_instance) params.prob_f = 0.5 params.prob_p = 0.5 params.prob_q = 0.75 rand_funcs = rappor.SimpleRandFuncs(params, MockRandom()) rand_funcs.cohort_rand_fn = lambda a, b: a e = rappor.Encoder(params, 0, rand_funcs=rand_funcs) cohort, bloom_bits_irr = e.encode("abc") self.assertEquals(0, cohort) self.assertEquals(0x000ffff, bloom_bits_irr)
def RapporClientSim(params, irr_rand, csv_in): """Read true values from csv_in and output encoded values on csv_out.""" header = ['client', 'cohort', 'bloom', 'prr', 'irr'] out_rows = {} # TODO: It would be more instructive/efficient to construct an encoder # instance up front per client, rather than one per row below. for i, (index, client_str, cohort_str, true_value) in csv_in.iterrows(): cohort = int(cohort_str) secret = client_str e = rappor.Encoder(params, cohort, secret, irr_rand) # Real users should call e.encode(). For testing purposes, we also want # the PRR. bloom, prr, irr = e._internal_encode(true_value) bloom_str = rappor.bit_string(bloom, params.num_bloombits) prr_str = rappor.bit_string(prr, params.num_bloombits) irr_str = rappor.bit_string(irr, params.num_bloombits) out_rows[i] = [client_str, cohort_str, bloom_str, prr_str, irr_str] output = pd.DataFrame.from_dict(out_rows, orient='index') output.columns = header output.to_csv('/Users/Michael/PycharmProjects/untitled1/data/output.csv')
def testEncoder(self): # Test encoder with deterministic random function. params = copy.copy(self.typical_instance) params.prob_f = 0.5 params.prob_p = 0.5 params.prob_q = 0.75 # return these 3 probabilities in sequence. rand = MockRandom([0.0, 0.6, 0.0], params) e = rappor.Encoder(params, 0, 'secret', rand) irr = e.encode("abc") self.assertEquals(64493, irr) # given MockRandom, this is what we get
def testEncoder(self): # Test encoder with deterministic random function. params = copy.copy(self.typical_instance) params.prob_f = 0.5 params.prob_p = 0.5 params.prob_q = 0.75 # SimpleRandom will call self.random() below for each bit, which will # return these 3 values in sequence. rand = MockRandom([0.0, 0.6, 0.0]) irr_rand = rappor.SimpleIrrRand(params, _rand=rand) e = rappor.Encoder(params, 0, 'secret', irr_rand) irr = e.encode("abc") self.assertEquals(64493, irr) # given MockRandom, this is what we get
def RapporClientSim(params, irr_rand, csv_in, csv_out): """Read true values from csv_in and output encoded values on csv_out.""" header = ('client', 'cohort', 'bloom', 'prr', 'irr') csv_out.writerow(header) # TODO: It would be more instructive/efficient to construct an encoder # instance up front per client, rather than one per row below. start_time = time.time() for i, (client_str, cohort_str, true_value) in enumerate(csv_in): if i == 0: if client_str != 'client': raise RuntimeError('Expected client header, got %s' % client_str) if cohort_str != 'cohort': raise RuntimeError('Expected cohort header, got %s' % cohort_str) if true_value != 'value': raise RuntimeError('Expected value header, got %s' % value) continue # skip header row #if i == 30: # EARLY STOP # break if i % 10000 == 0: elapsed = time.time() - start_time log('Processed %d inputs in %.2f seconds', i, elapsed) cohort = int(cohort_str) secret = client_str e = rappor.Encoder(params, cohort, secret, irr_rand) # Real users should call e.encode(). For testing purposes, we also want # the PRR. bloom, prr, irr = e._internal_encode(true_value) bloom_str = rappor.bit_string(bloom, params.num_bloombits) prr_str = rappor.bit_string(prr, params.num_bloombits) irr_str = rappor.bit_string(irr, params.num_bloombits) out_row = (client_str, cohort_str, bloom_str, prr_str, irr_str) csv_out.writerow(out_row)
def testEncoder(self): # Test encoder with deterministic random function. params = copy.copy(self.typical_instance) params.prob_f = 0.5 params.prob_p = 0.5 params.prob_q = 0.75 # return these 3 probabilities in sequence. rand = MockRandom([0.0, 0.6, 0.0], params) e = rappor.Encoder(params, 0, 'secret', rand) bits = '01010100101 this is char' print bits, 'encode_str:', e.encode(bits) bits = 999999 print bits, "int2bit {0:b}".format( bits), 'encode_bits:', e.encode_bits(bits) bits = 1000 print bits, "int2bit {0:b}".format( bits), 'encode_bits:', e.encode_bits(bits) bits = 3 print bits, "int2bit {0:b}".format( bits), 'encode_bits:', e.encode_bits(bits) bits = dTob3(1.123) print "1.123 to bits:", bits, 'encode:', "{0:d}".format(e.encode(bits)) bits = dTob3(0.56712) print "0.56712 to bits:", bits, 'encode:', "{0:d}".format( e.encode(bits)) bits = dTob3(0.56711) print "0.56711 to bits:", bits, 'encode:', "{0:d}".format( e.encode(bits)) bits = dTob3(110.124) print "110.124 to bits:", bits, 'encode:', "{0:d}".format( e.encode(bits)) bits = dTob3(5.124) print "5.124 to bits:", bits, 'encode:', "{0:d}".format(e.encode(bits)) bits = dTob3(1.123) print "1.123 to bits:", bits, 'encode:', "{0:d}".format(e.encode(bits))
def main(argv): (opts, argv) = CreateOptionsParser().parse_args(argv) # Copy flags into params params = rappor.Params() params.num_bloombits = opts.num_bits params.num_hashes = opts.num_hashes params.num_cohorts = opts.num_cohorts params.prob_p = opts.prob_p params.prob_q = opts.prob_q params.prob_f = opts.prob_f if opts.random_mode == 'simple': irr_rand = rappor.SimpleIrrRand(params) elif opts.random_mode == 'fast': if fastrand: log('Using fastrand extension') # NOTE: This doesn't take 'rand'. It's seeded in C with srand(). irr_rand = fastrand.FastIrrRand(params) else: log('Warning: fastrand module not importable; see README for build ' 'instructions. Falling back to simple randomness.') irr_rand = rappor.SimpleIrrRand(params) else: raise AssertionError # Other possible implementations: # - random.SystemRandom (probably uses /dev/urandom on Linux) # - HMAC-SHA256 with another secret? This could match C++ byte for byte. # - or srand(0) might do it. csv_in = csv.reader(sys.stdin) csv_out = csv.writer(sys.stdout) header = ('client', 'cohort', 'bloom', 'prr', 'irr') csv_out.writerow(header) # TODO: It would be more instructive/efficient to construct an encoder # instance up front per client, rather than one per row below. start_time = time.time() for i, (client_str, cohort_str, true_value) in enumerate(csv_in): if i == 0: if client_str != 'client': raise RuntimeError('Expected client header, got %s' % client_str) if cohort_str != 'cohort': raise RuntimeError('Expected cohort header, got %s' % cohort_str) if true_value != 'value': raise RuntimeError('Expected value header, got %s' % value) continue # skip header row #if i == 30: # EARLY STOP # break if i % 10000 == 0: elapsed = time.time() - start_time log('Processed %d inputs in %.2f seconds', i, elapsed) cohort = int(cohort_str) secret = client_str e = rappor.Encoder(params, cohort, secret, irr_rand) # Real users should call e.encode(). For testing purposes, we also want # the PRR. bloom, prr, irr = e._internal_encode(true_value) bloom_str = rappor.bit_string(bloom, params.num_bloombits) prr_str = rappor.bit_string(prr, params.num_bloombits) irr_str = rappor.bit_string(irr, params.num_bloombits) out_row = (client_str, cohort_str, bloom_str, prr_str, irr_str) csv_out.writerow(out_row)
df['epsilon'] = df['epsilon'].astype(int) #random choose 100 line date result = df.groupby('epsilon').apply(typicalsamling, typicalNDict) print(result) for i in range(result.shape[0]): params = rappor.Params() params.num_bloombits = 32 params.prob_f = result['f'][i] params.prob_p = result['p'][i] params.prob_q = result['q'][i] rand = MockRandom([0.0, 0.6, 0.0], params) REncode = rappor.Encoder(params, 0, 'secret', rand) testlabel_oi_pb_arr = get_bin_arr(REncode, testlabel_o, 'testlabel', bits_len=32) trainlabel_oi_pb_arr = get_bin_arr(REncode, trainlabel_o, 'trainlabel', bits_len=32) from keras.callbacks import EarlyStopping, History history = History() early_stopping = EarlyStopping(monitor='val_acc', patience=1, verbose=2, mode='auto')
def main(argv): (opts, argv) = CreateOptionsParser().parse_args(argv) if not opts.infile: raise RuntimeError('-i is required') if not opts.out_prefix: raise RuntimeError('--out-prefix is required') # Copy flags into params params = rappor.Params() params.num_bloombits = opts.num_bits params.num_hashes = opts.num_hashes params.num_cohorts = opts.num_cohorts params.prob_p = opts.prob_p params.prob_q = opts.prob_q params.prob_f = opts.prob_f params.flag_oneprr = opts.oneprr prefix = opts.out_prefix outfile = prefix + "_out.csv" histfile = prefix + "_hist.csv" with open(opts.infile) as f: csv_in = csv.reader(f) word_hist = make_histogram(csv_in) # Print true histograms. with open(histfile, 'w') as f: print_histogram(word_hist, f) all_words = sorted(word_hist) # unique words rand = random.Random() # default Mersenne Twister randomness #rand = random.SystemRandom() # cryptographic randomness from OS rand.seed() # Default: seed with sys time if opts.random_mode == 'simple': rand_funcs = rappor.SimpleRandFuncs(params, rand) elif opts.random_mode == 'fast': if fastrand: log('Using fastrand extension') # NOTE: This doesn't take 'rand' rand_funcs = fastrand.FastRandFuncs(params) else: log('Warning: fastrand module not importable; see README for build ' 'instructions. Falling back to simple randomness.') rand_funcs = rappor.SimpleRandFuncs(params, rand) else: raise AssertionError # Do RAPPOR transformation. with open(opts.infile) as f_in, open(outfile, 'w') as f_out: csv_in = csv.reader(f_in) csv_out = csv.writer(f_out) header = ('client', 'cohort', 'rappor') csv_out.writerow(header) cur_client = None # current client start_time = time.time() for i, (client, true_value) in enumerate(csv_in): if i % 10000 == 0: elapsed = time.time() - start_time log('Processed %d inputs in %.2f seconds', i, elapsed) # New encoder instance for each client. if client != cur_client: cur_client = client e = rappor.Encoder(params, cur_client, rand_funcs=rand_funcs) cohort, irr = e.encode(true_value) # encoded is a list of (cohort, rappor) pairs out_row = (client, cohort, bit_string(irr, params.num_bloombits)) csv_out.writerow(out_row)