Beispiel #1
0
def main(argv):
  (opts, argv) = CreateOptionsParser().parse_args(argv)

  # Copy flags into params
  params = rappor.Params()
  params.num_bloombits = opts.num_bits
  params.num_hashes = opts.num_hashes
  params.num_cohorts = opts.num_cohorts
  params.prob_p = opts.prob_p
  params.prob_q = opts.prob_q
  params.prob_f = opts.prob_f

  if opts.random_mode == 'simple':
    irr_rand = rappor.SimpleIrrRand(params)
  elif opts.random_mode == 'fast':
    if fastrand:
      log('Using fastrand extension')
      # NOTE: This doesn't take 'rand'.  It's seeded in C with srand().
      irr_rand = fastrand.FastIrrRand(params)
    else:
      log('Warning: fastrand module not importable; see README for build '
          'instructions.  Falling back to simple randomness.')
      irr_rand = rappor.SimpleIrrRand(params)
  else:
    raise AssertionError
  # Other possible implementations:
  # - random.SystemRandom (probably uses /dev/urandom on Linux)
  # - HMAC-SHA256 with another secret?  This could match C++ byte for byte.
  #   - or srand(0) might do it.

  csv_in = csv.reader(sys.stdin)
  csv_out = csv.writer(sys.stdout)

  if opts.assoc_testdata:
    # Copy flags into params
    params1 = rappor.Params()
    params1.num_bloombits = opts.num_bits
    params1.num_hashes = opts.num_hashes
    params1.num_cohorts = opts.num_cohorts
    params1.prob_p = opts.prob_p
    params1.prob_q = opts.prob_q
    params1.prob_f = opts.prob_f

    # Second one is boolean
    params2 = rappor.Params()
    params2.num_bloombits = 1  # 1 bit for boolean
    params2.num_hashes = opts.num_hashes
    params2.num_cohorts = opts.num_cohorts
    params2.prob_p = opts.prob_p
    params2.prob_q = opts.prob_q
    params2.prob_f = opts.prob_f

    GenAssocTestdata(
        params1, params2, irr_rand, opts.assoc_testdata, csv_in, csv_out)
  else:
    RapporClientSim(params, irr_rand, csv_in, csv_out)
Beispiel #2
0
 def setUp(self):
     self.typical_instance = rappor.Params()
     ti = self.typical_instance  # For convenience
     ti.num_cohorts = 64  # Number of cohorts
     ti.num_hashes = 2  # Number of bloom filter hashes
     ti.num_bloombits = 16  # Number of bloom filter bits
     ti.prob_p = 0.40  # Probability p
     ti.prob_q = 0.70  # Probability q
     ti.prob_f = 0.30  # Probability f
Beispiel #3
0
    def setUp(self):
        self.typical_instance = rappor.Params()
        ti = self.typical_instance  # For convenience
        ti.num_cohorts = 64  # Number of cohorts
        ti.num_hashes = 2  # Number of bloom filter hashes
        ti.num_bloombits = 16  # Number of bloom filter bits
        ti.prob_p = 0.40  # Probability p
        ti.prob_q = 0.70  # Probability q
        ti.prob_f = 0.30  # Probability f

        # TODO: Move this to constructor, or add a different constructor
        ti.flag_oneprr = False  # One PRR for each user/word pair
def main():
    params = rappor.Params()
    params.num_bloombits = 16
    params.num_hashes = 2
    params.num_cohorts = 64
    params.prob_p = 0.5
    params.prob_q = 0.75
    params.prob_f = 0.5

    irr_rand = rappor.SecureIrrRand(params)
    csv_in = pd.read_csv(
        '/Users/Michael/PycharmProjects/untitled1/data/input.csv')
    RapporClientSim(params, irr_rand, csv_in)
Beispiel #5
0
def set_rappor_params(num_bloombits,num_hash,col,f):
    params=rappor.Params()
    params.num_hashes = num_hash         # Number of bloom filter hashes
    params.num_bloombits = num_bloombits
    params.prob_f = f
    params.prob_p = 0.0
    params.prob_q = 1.0
    rand=rappor.MockRandom([0.0, 0.6, 0.0], params)
    secret=str(random)
    #print(rand)
    #secret=random
    e=rappor2.Encoder(params, 0, secret, rand)
    #print(e.params.prob_f)
    return e
Beispiel #6
0
def main():
    params = rappor.Params()
    params.num_bloombits = 16
    params.num_hashes = 2
    params.num_cohorts = 64
    params.prob_p = 0.5
    params.prob_q = 0.75
    params.prob_f = 0.5

    csv_in = pd.read_csv(
        '/Users/Michael/PycharmProjects/untitled1/data/output.csv',
        dtype={
            'bloom': object,
            'prr': object,
            'irr': object
        })

    SumBits(params, csv_in)
Beispiel #7
0
def main(argv):
  (opts, argv) = CreateOptionsParser().parse_args(argv)

  # Copy flags into params
  params = rappor.Params()
  params.num_bloombits = opts.num_bits
  params.num_hashes = opts.num_hashes
  params.num_cohorts = opts.num_cohorts
  params.prob_p = opts.prob_p
  params.prob_q = opts.prob_q
  params.prob_f = opts.prob_f

  if opts.random_mode == 'simple':
    irr_rand = rappor.SimpleIrrRand(params)
  elif opts.random_mode == 'fast':
    if fastrand:
      log('Using fastrand extension')
      # NOTE: This doesn't take 'rand'.  It's seeded in C with srand().
      irr_rand = fastrand.FastIrrRand(params)
    else:
      log('Warning: fastrand module not importable; see README for build '
          'instructions.  Falling back to simple randomness.')
      irr_rand = rappor.SimpleIrrRand(params)
  else:
    raise AssertionError
  # Other possible implementations:
  # - random.SystemRandom (probably uses /dev/urandom on Linux)
  # - HMAC-SHA256 with another secret?  This could match C++ byte for byte.
  #   - or srand(0) might do it.

  csv_in = csv.reader(sys.stdin)
  csv_out = csv.writer(sys.stdout)

  header = ('client', 'cohort', 'bloom', 'prr', 'irr')
  csv_out.writerow(header)

  # TODO: It would be more instructive/efficient to construct an encoder
  # instance up front per client, rather than one per row below.
  start_time = time.time()

  for i, (client_str, cohort_str, true_value) in enumerate(csv_in):
    if i == 0:
      if client_str != 'client':
        raise RuntimeError('Expected client header, got %s' % client_str)
      if cohort_str != 'cohort':
        raise RuntimeError('Expected cohort header, got %s' % cohort_str)
      if true_value != 'value':
        raise RuntimeError('Expected value header, got %s' % value)
      continue  # skip header row

    #if i == 30:  # EARLY STOP
    #  break

    if i % 10000 == 0:
      elapsed = time.time() - start_time
      log('Processed %d inputs in %.2f seconds', i, elapsed)

    cohort = int(cohort_str)
    secret = client_str
    e = rappor.Encoder(params, cohort, secret, irr_rand)

    # Real users should call e.encode().  For testing purposes, we also want
    # the PRR.
    bloom, prr, irr = e._internal_encode(true_value)

    bloom_str = rappor.bit_string(bloom, params.num_bloombits)
    prr_str = rappor.bit_string(prr, params.num_bloombits)
    irr_str = rappor.bit_string(irr, params.num_bloombits)

    out_row = (client_str, cohort_str, bloom_str, prr_str, irr_str)
    csv_out.writerow(out_row)

def typicalsamling(group, typicalNDict):
    name = group.name
    n = typicalNDict[name]
    return group.sample(n=n)


df['epsilon'] = df['epsilon'].astype(int)

#random choose 100 line date
result = df.groupby('epsilon').apply(typicalsamling, typicalNDict)
print(result)

for i in range(result.shape[0]):
    params = rappor.Params()
    params.num_bloombits = 32
    params.prob_f = result['f'][i]
    params.prob_p = result['p'][i]
    params.prob_q = result['q'][i]
    rand = MockRandom([0.0, 0.6, 0.0], params)
    REncode = rappor.Encoder(params, 0, 'secret', rand)
    testlabel_oi_pb_arr = get_bin_arr(REncode,
                                      testlabel_o,
                                      'testlabel',
                                      bits_len=32)
    trainlabel_oi_pb_arr = get_bin_arr(REncode,
                                       trainlabel_o,
                                       'trainlabel',
                                       bits_len=32)
Beispiel #9
0
 def setUp(self):
     self.params = rappor.Params()
     self.params.num_bloombits = 16
     self.params.num_cohorts = 2
Beispiel #10
0
def main(argv):
    (opts, argv) = CreateOptionsParser().parse_args(argv)
    if not opts.infile:
        raise RuntimeError('-i is required')
    if not opts.out_prefix:
        raise RuntimeError('--out-prefix is required')

    # Copy flags into params
    params = rappor.Params()
    params.num_bloombits = opts.num_bits
    params.num_hashes = opts.num_hashes
    params.num_cohorts = opts.num_cohorts
    params.prob_p = opts.prob_p
    params.prob_q = opts.prob_q
    params.prob_f = opts.prob_f
    params.flag_oneprr = opts.oneprr

    prefix = opts.out_prefix

    outfile = prefix + "_out.csv"
    histfile = prefix + "_hist.csv"

    with open(opts.infile) as f:
        csv_in = csv.reader(f)
        word_hist = make_histogram(csv_in)

    # Print true histograms.
    with open(histfile, 'w') as f:
        print_histogram(word_hist, f)

    all_words = sorted(word_hist)  # unique words

    rand = random.Random()  # default Mersenne Twister randomness
    #rand = random.SystemRandom()  # cryptographic randomness from OS

    rand.seed()  # Default: seed with sys time

    if opts.random_mode == 'simple':
        rand_funcs = rappor.SimpleRandFuncs(params, rand)
    elif opts.random_mode == 'fast':
        if fastrand:
            log('Using fastrand extension')
            # NOTE: This doesn't take 'rand'
            rand_funcs = fastrand.FastRandFuncs(params)
        else:
            log('Warning: fastrand module not importable; see README for build '
                'instructions.  Falling back to simple randomness.')
            rand_funcs = rappor.SimpleRandFuncs(params, rand)
    else:
        raise AssertionError

    # Do RAPPOR transformation.
    with open(opts.infile) as f_in, open(outfile, 'w') as f_out:
        csv_in = csv.reader(f_in)
        csv_out = csv.writer(f_out)

        header = ('client', 'cohort', 'rappor')
        csv_out.writerow(header)

        cur_client = None  # current client

        start_time = time.time()

        for i, (client, true_value) in enumerate(csv_in):
            if i % 10000 == 0:
                elapsed = time.time() - start_time
                log('Processed %d inputs in %.2f seconds', i, elapsed)

            # New encoder instance for each client.
            if client != cur_client:
                cur_client = client
                e = rappor.Encoder(params, cur_client, rand_funcs=rand_funcs)

            cohort, irr = e.encode(true_value)

            # encoded is a list of (cohort, rappor) pairs
            out_row = (client, cohort, bit_string(irr, params.num_bloombits))
            csv_out.writerow(out_row)