def __init__(self, data, number_docs_published, number_kwds_per_doc, number_kwds_per_query, curve=DEFAULT_CURVE, repetitions=10, repetitions_publish=5): random.seed(0) self.data = data self.repetitions = repetitions self.repetitions_publish = repetitions_publish self.number_docs_published = number_docs_published self.number_kwds_per_doc = number_kwds_per_doc self.number_kwds_per_query = number_kwds_per_query self.kwds_published = [[(''.join((random.choice(string.ascii_lowercase) for _ in range(KEYWORD_LENGTH)))) for _ in range(number_kwds_per_doc)] for _ in range(number_docs_published)] self.kwds_query = [[(''.join((random.choice(string.ascii_lowercase) for _ in range(KEYWORD_LENGTH)))) for _ in range(number_kwds_per_query)] for _ in range(repetitions)] self.mspsi_client = MSPSIClient(curve) self.mspsi_server = MSPSIServer(curve)
def main(): """Entry point of the program.""" kwds_query = [[(''.join((random.choice(string.ascii_lowercase) for _ in range(16)))) for _ in range(10)] for _ in range(NUM_QUERIES)] docs_published = [[(''.join((random.choice(string.ascii_lowercase) for _ in range(16)))) for _ in range(100)] for _ in range(NUM_DOCS)] mspsi_client = MSPSIClient(CURVE) mspsi_server = MSPSIServer(CURVE) # Profile for publish() pr = cProfile.Profile() pr.enable() (secret_server, published) = mspsi_server.publish(docs_published) pr.disable() pr.print_stats() queries = [] # Profile for query() pr = cProfile.Profile() pr.enable() for i in range(NUM_QUERIES): queries.append(mspsi_client.query(kwds_query[i])) pr.disable() pr.print_stats() replies = [] # Profile for reply() pr = cProfile.Profile() pr.enable() for i in range(NUM_QUERIES): replies.append(mspsi_server.reply(secret_server, queries[i][1])) pr.disable() pr.print_stats() # Profile for compute_cardinalities() pr = cProfile.Profile() pr.enable() for i in range(NUM_QUERIES): mspsi_client.compute_cardinalities(queries[i][0], replies[i], published) pr.disable() pr.print_stats()
def publish(self): docs_published = [[[(''.join((random.choice(string.ascii_lowercase) for _ in range(KEYWORD_LENGTH)))) for _ in range(self.number_kwds_per_doc)] for _ in range(self.number_docs_published)] for _ in range(self.repetitions_publish)] times = [] lengths = [] for docs in docs_published: t0 = time.process_time() # The seacret is not releveant for this benchmark. _, published = self.mspsi_server.publish(docs) t1 = time.process_time() length = MSPSIServer.published_len(published) times.append(t1 - t0) lengths.append(length) self.data['publish'][self.number_docs_published][ self.number_kwds_per_doc][0] = { 'time': times, 'length': lengths }
def publish(self): docs_published = [[[(''.join((random.choice(string.ascii_lowercase) for _ in range(KEYWORD_LENGTH)))) for _ in range(self.number_kwds_per_doc)] for _ in range(self.number_docs_published)] for _ in range(self.repetitions_publish)] times = [] lengths = [] for docs in docs_published: t0 = time.process_time() _, published = self.mspsi_server.publish(docs) t1 = time.process_time() # The first field is the number of documents, the second field is the list points corresponding to the keywords in the documents. length = MSPSIServer.published_len(published) times.append(t1-t0) lengths.append(length) self.data['publish'][self.number_journalists][self.number_docs_published][self.number_kwds_per_doc][0] = {'time': times, 'length': lengths}
def __init__(self, tests): curve = 415 self.mspsi_client = MSPSIClient(curve) self.mspsi_server = MSPSIServer(curve) super().__init__(tests)
class TestMSPSI(unittest.TestCase): def __init__(self, tests): curve = 415 self.mspsi_client = MSPSIClient(curve) self.mspsi_server = MSPSIServer(curve) super().__init__(tests) def test_functionality(self): kwds = [['foo', 'bar', ''], ['foo', 'baz'], ['asdf']] (secret_server, published) = self.mspsi_server.publish(kwds) # Case where respectively 2, 1 and no keywords matches. (secret_client, query) = self.mspsi_client.query(['foo', '']) reply = self.mspsi_server.reply(secret_server, query) cards = self.mspsi_client.compute_cardinalities( secret_client, reply, published) for i, j in zip(cards, [2, 1, 0]): self.assertEqual(i, j) # Case where respectively 1, 1 and no keywords matches. (secret_client, query) = self.mspsi_client.query(['bar', 'baz']) reply = self.mspsi_server.reply(secret_server, query) cards = self.mspsi_client.compute_cardinalities( secret_client, reply, published) for i, j in zip(cards, [1, 1, 0]): self.assertEqual(i, j) # Case where respectively 0, 0 and 1 keywords matches. (secret_client, query) = self.mspsi_client.query(['asdf', 'ghjk']) reply = self.mspsi_server.reply(secret_server, query) cards = self.mspsi_client.compute_cardinalities( secret_client, reply, published) for i, j in zip(cards, [0, 0, 1]): self.assertEqual(i, j) def test_false_positives(self): # Random data generation with keywords known to be inside the corpus random.seed(0) # sets of documents are generated. kwds_in_doc_and_in_query = set([ ''.join([random.choice(string.ascii_lowercase) for _ in range(16)]) for _ in range(20) ]) kwds_in_doc_not_in_query = set([ ''.join([random.choice(string.ascii_lowercase) for _ in range(16)]) for _ in range(1000) ]) kwds_not_in_doc_in_query = set([ ''.join([random.choice(string.ascii_lowercase) for _ in range(16)]) for _ in range(1000) ]) # Ensure there ate no intersection between these two sets. kwds_in_doc_not_in_query -= kwds_in_doc_and_in_query # Ensure there ate no intersection between this set and the two others. kwds_not_in_doc_in_query -= kwds_in_doc_and_in_query kwds_not_in_doc_in_query -= kwds_in_doc_not_in_query kwds_in_doc_and_in_query = list(kwds_in_doc_and_in_query) kwds_in_doc_not_in_query = list(kwds_in_doc_not_in_query) kwds_not_in_doc_in_query = list(kwds_not_in_doc_in_query) # generate documents docs = [ kwds_in_doc_and_in_query + [random.choice(kwds_in_doc_not_in_query) for _ in range(100)] for _ in range(1000) ] # generates queries content. queries_full = [[ random.choice(kwds_in_doc_and_in_query) for _ in range(10) ] for _ in range(1000)] queries_none = [[ random.choice(kwds_not_in_doc_in_query) for _ in range(10) ] for _ in range(1000)] queries_50 = [ ([random.choice(kwds_in_doc_and_in_query) for _ in range(5)] + [random.choice(kwds_not_in_doc_in_query) for _ in range(5)]) for _ in range(1000) ] # Publication of the documents (secret_server, published) = self.mspsi_server.publish(docs) err_false_neg = 0 err_false_pos = 0 n_matches = 0 for queries, expected, info_str in zip( (queries_full, queries_50, queries_none), ([10] * 10, [5] * 10, [0] * 10), ('\n===== Full Match =====', '\n===== 50% match ======', '\n===== 0% match =======')): print(info_str) for query in queries: n_matches += 1 (secret_client, query) = self.mspsi_client.query(query) reply = self.mspsi_server.reply(secret_server, query) cards = self.mspsi_client.compute_cardinalities( secret_client, reply, published) for i, j in zip(cards, expected): if i != j: if i > j: n_false = i - j print( '{} false positive found (expected: {}, found: {})' .format(n_false, j, i)) err_false_pos += n_false else: n_false = j - i print( '{} false negatives found (expected: {}, found: {})' .format(n_false, j, i)) err_false_neg += n_false print( 'A total of {} false negative and {} false positive were for {} queries of 10 keywords.' .format(err_false_neg, err_false_pos, n_matches))
class BenchmarkMSPSI: def __init__(self, data, number_docs_published, number_kwds_per_doc, number_kwds_per_query, curve=DEFAULT_CURVE, repetitions=10, repetitions_publish=5): random.seed(0) self.data = data self.repetitions = repetitions self.repetitions_publish = repetitions_publish self.number_docs_published = number_docs_published self.number_kwds_per_doc = number_kwds_per_doc self.number_kwds_per_query = number_kwds_per_query self.kwds_published = [[(''.join((random.choice(string.ascii_lowercase) for _ in range(KEYWORD_LENGTH)))) for _ in range(number_kwds_per_doc)] for _ in range(number_docs_published)] self.kwds_query = [[(''.join((random.choice(string.ascii_lowercase) for _ in range(KEYWORD_LENGTH)))) for _ in range(number_kwds_per_query)] for _ in range(repetitions)] self.mspsi_client = MSPSIClient(curve) self.mspsi_server = MSPSIServer(curve) def publish(self): docs_published = [[[(''.join((random.choice(string.ascii_lowercase) for _ in range(KEYWORD_LENGTH)))) for _ in range(self.number_kwds_per_doc)] for _ in range(self.number_docs_published)] for _ in range(self.repetitions_publish)] times = [] lengths = [] for docs in docs_published: t0 = time.process_time() # The seacret is not releveant for this benchmark. _, published = self.mspsi_server.publish(docs) t1 = time.process_time() length = MSPSIServer.published_len(published) times.append(t1 - t0) lengths.append(length) self.data['publish'][self.number_docs_published][ self.number_kwds_per_doc][0] = { 'time': times, 'length': lengths } def run(self): (secret_server, published) = self.mspsi_server.publish(self.kwds_published) times = [] lengths = [] queries = [] for kwds in self.kwds_query: t0 = time.process_time() query = self.mspsi_client.query(kwds) t1 = time.process_time() times.append(t1 - t0) length = sum(map(lambda x: len(x), query[1])) lengths.append(length) queries.append(query) self.data['query'][self.number_docs_published][ self.number_kwds_per_doc][self.number_kwds_per_query] = { 'time': times, 'length': lengths } times = [] lengths = [] replies = [] for query in queries: t0 = time.process_time() reply = self.mspsi_server.reply(secret_server, query[1]) t1 = time.process_time() length = sum(map(lambda x: len(x), reply)) times.append(t1 - t0) lengths.append(length) replies.append(reply) self.data['reply'][self.number_docs_published][ self.number_kwds_per_doc][self.number_kwds_per_query] = { 'time': times, 'length': lengths } times = [] for i, reply in enumerate(replies): t0 = time.process_time() self.mspsi_client.compute_cardinalities(queries[i][0], reply, published) t1 = time.process_time() times.append(t1 - t0) # Computing lengths meaningless for cardinalities. This data is not transferred. self.data['cardinality'][self.number_docs_published][ self.number_kwds_per_doc][self.number_kwds_per_query] = { 'time': times, 'length': [] }