コード例 #1
0
ファイル: rappor_sim.py プロジェクト: zzzzzzzhaoymm/rappor
def GenAssocTestdata(params1, params2, irr_rand, assoc_testdata_count, csv_in,
                     csv_out):
    """Read true values from csv_in and output encoded values on csv_out.

  Replicate assoc_testdata_count times.  First value is a string, second is a
  bool.  TODO: Generalize this.
  """
    rows = []
    for i, (true_value1, true_value2) in enumerate(csv_in):
        if i == 0:
            v1_name = true_value1
            v2_name = true_value2
            continue  # skip header row

        rows.append((true_value1, true_value2))

    # Use the same column names
    header = ('client', 'cohort', v1_name, v2_name)
    csv_out.writerow(header)

    n = assoc_testdata_count
    report_index = 0
    for i in xrange(n):
        for v1, v2 in rows:
            client_str = 'c%d' % report_index

            # randint(a, b) gives i such that a <= i <= b
            cohort = random.randint(0, params1.num_cohorts - 1)

            string_encoder = rappor.Encoder(params1, cohort, client_str,
                                            irr_rand)
            bool_encoder = rappor.Encoder(params2, cohort, client_str,
                                          irr_rand)

            # Real users should call e.encode().  For testing purposes, we also want
            # the PRR.
            irr1 = string_encoder.encode(v1)

            # TODO: Convert to bool and encode with basic RAPPOR
            v2_int = int(v2)
            #print v2_int
            irr2 = bool_encoder.encode_bits(v2_int)

            irr1_str = rappor.bit_string(irr1, params1.num_bloombits)
            irr2_str = rappor.bit_string(irr2, params2.num_bloombits)

            csv_out.writerow((client_str, cohort, irr1_str, irr2_str))

            report_index += 1
コード例 #2
0
ファイル: rappor_test.py プロジェクト: Honin/rappor
    def testEncoder(self):
        """Expected bloom bits is computed as follows.

    f_bits = 0xfff0000f and mask_indices = 0x0ffff000 from
    testGetRapporMasksWithoutPRR()

    q_bits = 0xfffff0ff from mock_rand.randomness[] and how get_rand_bits works
    p_bits = 0x000ffff0 from -- do --

    bloom_bits_array is 0x0000 0048 (3rd bit and 6th bit, from
    testSetBloomArray, are set)

    Bit arithmetic ends up computing
    bloom_bits_prr = 0x0ff00048
    bloom_bits_irr= = 0x0ffffff8
    """
        params = copy.copy(self.typical_instance)
        params.prob_f = 0.5
        params.prob_p = 0.5
        params.prob_q = 0.75

        rand_funcs = rappor.SimpleRandFuncs(params, MockRandom())
        rand_funcs.cohort_rand_fn = lambda a, b: a
        e = rappor.Encoder(params, 0, rand_funcs=rand_funcs)

        cohort, bloom_bits_irr = e.encode("abc")

        self.assertEquals(0, cohort)
        self.assertEquals(0x000ffff, bloom_bits_irr)
コード例 #3
0
def RapporClientSim(params, irr_rand, csv_in):
    """Read true values from csv_in and output encoded values on csv_out."""
    header = ['client', 'cohort', 'bloom', 'prr', 'irr']
    out_rows = {}

    # TODO: It would be more instructive/efficient to construct an encoder
    # instance up front per client, rather than one per row below.

    for i, (index, client_str, cohort_str, true_value) in csv_in.iterrows():

        cohort = int(cohort_str)
        secret = client_str
        e = rappor.Encoder(params, cohort, secret, irr_rand)

        # Real users should call e.encode().  For testing purposes, we also want
        # the PRR.
        bloom, prr, irr = e._internal_encode(true_value)

        bloom_str = rappor.bit_string(bloom, params.num_bloombits)
        prr_str = rappor.bit_string(prr, params.num_bloombits)
        irr_str = rappor.bit_string(irr, params.num_bloombits)

        out_rows[i] = [client_str, cohort_str, bloom_str, prr_str, irr_str]

    output = pd.DataFrame.from_dict(out_rows, orient='index')
    output.columns = header
    output.to_csv('/Users/Michael/PycharmProjects/untitled1/data/output.csv')
コード例 #4
0
    def testEncoder(self):
        # Test encoder with deterministic random function.
        params = copy.copy(self.typical_instance)
        params.prob_f = 0.5
        params.prob_p = 0.5
        params.prob_q = 0.75

        # return these 3 probabilities in sequence.
        rand = MockRandom([0.0, 0.6, 0.0], params)

        e = rappor.Encoder(params, 0, 'secret', rand)

        irr = e.encode("abc")

        self.assertEquals(64493, irr)  # given MockRandom, this is what we get
コード例 #5
0
ファイル: rappor_test.py プロジェクト: yw3xs/rappor
    def testEncoder(self):
        # Test encoder with deterministic random function.
        params = copy.copy(self.typical_instance)
        params.prob_f = 0.5
        params.prob_p = 0.5
        params.prob_q = 0.75

        # SimpleRandom will call self.random() below for each bit, which will
        # return these 3 values in sequence.
        rand = MockRandom([0.0, 0.6, 0.0])

        irr_rand = rappor.SimpleIrrRand(params, _rand=rand)
        e = rappor.Encoder(params, 0, 'secret', irr_rand)

        irr = e.encode("abc")

        self.assertEquals(64493, irr)  # given MockRandom, this is what we get
コード例 #6
0
ファイル: rappor_sim.py プロジェクト: Rahul-Sindhu/rappor
def RapporClientSim(params, irr_rand, csv_in, csv_out):
    """Read true values from csv_in and output encoded values on csv_out."""
    header = ('client', 'cohort', 'bloom', 'prr', 'irr')
    csv_out.writerow(header)

    # TODO: It would be more instructive/efficient to construct an encoder
    # instance up front per client, rather than one per row below.
    start_time = time.time()

    for i, (client_str, cohort_str, true_value) in enumerate(csv_in):
        if i == 0:
            if client_str != 'client':
                raise RuntimeError('Expected client header, got %s' %
                                   client_str)
            if cohort_str != 'cohort':
                raise RuntimeError('Expected cohort header, got %s' %
                                   cohort_str)
            if true_value != 'value':
                raise RuntimeError('Expected value header, got %s' % value)
            continue  # skip header row

        #if i == 30:  # EARLY STOP
        #  break

        if i % 10000 == 0:
            elapsed = time.time() - start_time
            log('Processed %d inputs in %.2f seconds', i, elapsed)

        cohort = int(cohort_str)
        secret = client_str
        e = rappor.Encoder(params, cohort, secret, irr_rand)

        # Real users should call e.encode().  For testing purposes, we also want
        # the PRR.
        bloom, prr, irr = e._internal_encode(true_value)

        bloom_str = rappor.bit_string(bloom, params.num_bloombits)
        prr_str = rappor.bit_string(prr, params.num_bloombits)
        irr_str = rappor.bit_string(irr, params.num_bloombits)

        out_row = (client_str, cohort_str, bloom_str, prr_str, irr_str)
        csv_out.writerow(out_row)
コード例 #7
0
    def testEncoder(self):
        # Test encoder with deterministic random function.
        params = copy.copy(self.typical_instance)
        params.prob_f = 0.5
        params.prob_p = 0.5
        params.prob_q = 0.75

        # return these 3 probabilities in sequence.
        rand = MockRandom([0.0, 0.6, 0.0], params)

        e = rappor.Encoder(params, 0, 'secret', rand)
        bits = '01010100101 this is char'
        print bits, 'encode_str:', e.encode(bits)
        bits = 999999
        print bits, "int2bit {0:b}".format(
            bits), 'encode_bits:', e.encode_bits(bits)
        bits = 1000
        print bits, "int2bit {0:b}".format(
            bits), 'encode_bits:', e.encode_bits(bits)
        bits = 3
        print bits, "int2bit {0:b}".format(
            bits), 'encode_bits:', e.encode_bits(bits)

        bits = dTob3(1.123)
        print "1.123 to bits:", bits, 'encode:', "{0:d}".format(e.encode(bits))
        bits = dTob3(0.56712)
        print "0.56712 to bits:", bits, 'encode:', "{0:d}".format(
            e.encode(bits))
        bits = dTob3(0.56711)
        print "0.56711 to bits:", bits, 'encode:', "{0:d}".format(
            e.encode(bits))
        bits = dTob3(110.124)
        print "110.124 to bits:", bits, 'encode:', "{0:d}".format(
            e.encode(bits))
        bits = dTob3(5.124)
        print "5.124 to bits:", bits, 'encode:', "{0:d}".format(e.encode(bits))
        bits = dTob3(1.123)
        print "1.123 to bits:", bits, 'encode:', "{0:d}".format(e.encode(bits))
コード例 #8
0
ファイル: rappor_sim.py プロジェクト: ddcv/rappor
def main(argv):
  (opts, argv) = CreateOptionsParser().parse_args(argv)

  # Copy flags into params
  params = rappor.Params()
  params.num_bloombits = opts.num_bits
  params.num_hashes = opts.num_hashes
  params.num_cohorts = opts.num_cohorts
  params.prob_p = opts.prob_p
  params.prob_q = opts.prob_q
  params.prob_f = opts.prob_f

  if opts.random_mode == 'simple':
    irr_rand = rappor.SimpleIrrRand(params)
  elif opts.random_mode == 'fast':
    if fastrand:
      log('Using fastrand extension')
      # NOTE: This doesn't take 'rand'.  It's seeded in C with srand().
      irr_rand = fastrand.FastIrrRand(params)
    else:
      log('Warning: fastrand module not importable; see README for build '
          'instructions.  Falling back to simple randomness.')
      irr_rand = rappor.SimpleIrrRand(params)
  else:
    raise AssertionError
  # Other possible implementations:
  # - random.SystemRandom (probably uses /dev/urandom on Linux)
  # - HMAC-SHA256 with another secret?  This could match C++ byte for byte.
  #   - or srand(0) might do it.

  csv_in = csv.reader(sys.stdin)
  csv_out = csv.writer(sys.stdout)

  header = ('client', 'cohort', 'bloom', 'prr', 'irr')
  csv_out.writerow(header)

  # TODO: It would be more instructive/efficient to construct an encoder
  # instance up front per client, rather than one per row below.
  start_time = time.time()

  for i, (client_str, cohort_str, true_value) in enumerate(csv_in):
    if i == 0:
      if client_str != 'client':
        raise RuntimeError('Expected client header, got %s' % client_str)
      if cohort_str != 'cohort':
        raise RuntimeError('Expected cohort header, got %s' % cohort_str)
      if true_value != 'value':
        raise RuntimeError('Expected value header, got %s' % value)
      continue  # skip header row

    #if i == 30:  # EARLY STOP
    #  break

    if i % 10000 == 0:
      elapsed = time.time() - start_time
      log('Processed %d inputs in %.2f seconds', i, elapsed)

    cohort = int(cohort_str)
    secret = client_str
    e = rappor.Encoder(params, cohort, secret, irr_rand)

    # Real users should call e.encode().  For testing purposes, we also want
    # the PRR.
    bloom, prr, irr = e._internal_encode(true_value)

    bloom_str = rappor.bit_string(bloom, params.num_bloombits)
    prr_str = rappor.bit_string(prr, params.num_bloombits)
    irr_str = rappor.bit_string(irr, params.num_bloombits)

    out_row = (client_str, cohort_str, bloom_str, prr_str, irr_str)
    csv_out.writerow(out_row)
コード例 #9
0

df['epsilon'] = df['epsilon'].astype(int)

#random choose 100 line date
result = df.groupby('epsilon').apply(typicalsamling, typicalNDict)
print(result)

for i in range(result.shape[0]):
    params = rappor.Params()
    params.num_bloombits = 32
    params.prob_f = result['f'][i]
    params.prob_p = result['p'][i]
    params.prob_q = result['q'][i]
    rand = MockRandom([0.0, 0.6, 0.0], params)
    REncode = rappor.Encoder(params, 0, 'secret', rand)
    testlabel_oi_pb_arr = get_bin_arr(REncode,
                                      testlabel_o,
                                      'testlabel',
                                      bits_len=32)
    trainlabel_oi_pb_arr = get_bin_arr(REncode,
                                       trainlabel_o,
                                       'trainlabel',
                                       bits_len=32)

    from keras.callbacks import EarlyStopping, History
    history = History()
    early_stopping = EarlyStopping(monitor='val_acc',
                                   patience=1,
                                   verbose=2,
                                   mode='auto')
コード例 #10
0
ファイル: rappor_sim.py プロジェクト: Honin/rappor
def main(argv):
    (opts, argv) = CreateOptionsParser().parse_args(argv)
    if not opts.infile:
        raise RuntimeError('-i is required')
    if not opts.out_prefix:
        raise RuntimeError('--out-prefix is required')

    # Copy flags into params
    params = rappor.Params()
    params.num_bloombits = opts.num_bits
    params.num_hashes = opts.num_hashes
    params.num_cohorts = opts.num_cohorts
    params.prob_p = opts.prob_p
    params.prob_q = opts.prob_q
    params.prob_f = opts.prob_f
    params.flag_oneprr = opts.oneprr

    prefix = opts.out_prefix

    outfile = prefix + "_out.csv"
    histfile = prefix + "_hist.csv"

    with open(opts.infile) as f:
        csv_in = csv.reader(f)
        word_hist = make_histogram(csv_in)

    # Print true histograms.
    with open(histfile, 'w') as f:
        print_histogram(word_hist, f)

    all_words = sorted(word_hist)  # unique words

    rand = random.Random()  # default Mersenne Twister randomness
    #rand = random.SystemRandom()  # cryptographic randomness from OS

    rand.seed()  # Default: seed with sys time

    if opts.random_mode == 'simple':
        rand_funcs = rappor.SimpleRandFuncs(params, rand)
    elif opts.random_mode == 'fast':
        if fastrand:
            log('Using fastrand extension')
            # NOTE: This doesn't take 'rand'
            rand_funcs = fastrand.FastRandFuncs(params)
        else:
            log('Warning: fastrand module not importable; see README for build '
                'instructions.  Falling back to simple randomness.')
            rand_funcs = rappor.SimpleRandFuncs(params, rand)
    else:
        raise AssertionError

    # Do RAPPOR transformation.
    with open(opts.infile) as f_in, open(outfile, 'w') as f_out:
        csv_in = csv.reader(f_in)
        csv_out = csv.writer(f_out)

        header = ('client', 'cohort', 'rappor')
        csv_out.writerow(header)

        cur_client = None  # current client

        start_time = time.time()

        for i, (client, true_value) in enumerate(csv_in):
            if i % 10000 == 0:
                elapsed = time.time() - start_time
                log('Processed %d inputs in %.2f seconds', i, elapsed)

            # New encoder instance for each client.
            if client != cur_client:
                cur_client = client
                e = rappor.Encoder(params, cur_client, rand_funcs=rand_funcs)

            cohort, irr = e.encode(true_value)

            # encoded is a list of (cohort, rappor) pairs
            out_row = (client, cohort, bit_string(irr, params.num_bloombits))
            csv_out.writerow(out_row)