예제 #1
0
    def __init__(self,
                 data,
                 number_docs_published,
                 number_kwds_per_doc,
                 number_kwds_per_query,
                 curve=DEFAULT_CURVE,
                 repetitions=10,
                 repetitions_publish=5):
        random.seed(0)

        self.data = data

        self.repetitions = repetitions
        self.repetitions_publish = repetitions_publish
        self.number_docs_published = number_docs_published
        self.number_kwds_per_doc = number_kwds_per_doc
        self.number_kwds_per_query = number_kwds_per_query

        self.kwds_published = [[(''.join((random.choice(string.ascii_lowercase)
                                          for _ in range(KEYWORD_LENGTH))))
                                for _ in range(number_kwds_per_doc)]
                               for _ in range(number_docs_published)]
        self.kwds_query = [[(''.join((random.choice(string.ascii_lowercase)
                                      for _ in range(KEYWORD_LENGTH))))
                            for _ in range(number_kwds_per_query)]
                           for _ in range(repetitions)]

        self.mspsi_client = MSPSIClient(curve)
        self.mspsi_server = MSPSIServer(curve)
def main():
    """Entry point of the program."""

    kwds_query = [[(''.join((random.choice(string.ascii_lowercase)
                             for _ in range(16)))) for _ in range(10)]
                  for _ in range(NUM_QUERIES)]
    docs_published = [[(''.join((random.choice(string.ascii_lowercase)
                                 for _ in range(16)))) for _ in range(100)]
                      for _ in range(NUM_DOCS)]

    mspsi_client = MSPSIClient(CURVE)
    mspsi_server = MSPSIServer(CURVE)

    # Profile for publish()
    pr = cProfile.Profile()
    pr.enable()

    (secret_server, published) = mspsi_server.publish(docs_published)

    pr.disable()
    pr.print_stats()

    queries = []

    # Profile for query()
    pr = cProfile.Profile()
    pr.enable()

    for i in range(NUM_QUERIES):
        queries.append(mspsi_client.query(kwds_query[i]))

    pr.disable()
    pr.print_stats()

    replies = []

    # Profile for reply()
    pr = cProfile.Profile()
    pr.enable()

    for i in range(NUM_QUERIES):
        replies.append(mspsi_server.reply(secret_server, queries[i][1]))

    pr.disable()
    pr.print_stats()

    # Profile for compute_cardinalities()
    pr = cProfile.Profile()
    pr.enable()

    for i in range(NUM_QUERIES):
        mspsi_client.compute_cardinalities(queries[i][0], replies[i],
                                           published)

    pr.disable()
    pr.print_stats()
예제 #3
0
    def publish(self):
        docs_published = [[[(''.join((random.choice(string.ascii_lowercase)
                                      for _ in range(KEYWORD_LENGTH))))
                            for _ in range(self.number_kwds_per_doc)]
                           for _ in range(self.number_docs_published)]
                          for _ in range(self.repetitions_publish)]

        times = []
        lengths = []
        for docs in docs_published:
            t0 = time.process_time()
            # The seacret is not releveant for this benchmark.
            _, published = self.mspsi_server.publish(docs)
            t1 = time.process_time()

            length = MSPSIServer.published_len(published)

            times.append(t1 - t0)
            lengths.append(length)

        self.data['publish'][self.number_docs_published][
            self.number_kwds_per_doc][0] = {
                'time': times,
                'length': lengths
            }
예제 #4
0
    def publish(self):
        docs_published = [[[(''.join((random.choice(string.ascii_lowercase) for _ in range(KEYWORD_LENGTH)))) for _ in range(self.number_kwds_per_doc)] for _ in range(self.number_docs_published)] for _ in range(self.repetitions_publish)]

        times = []
        lengths = []
        for docs in docs_published:
            t0 = time.process_time()
            _, published = self.mspsi_server.publish(docs)
            t1 = time.process_time()

            # The first field is the number of documents, the second field is the list points corresponding to the keywords in the documents.
            length = MSPSIServer.published_len(published)

            times.append(t1-t0)
            lengths.append(length)

        self.data['publish'][self.number_journalists][self.number_docs_published][self.number_kwds_per_doc][0] = {'time': times, 'length': lengths}
 def __init__(self, tests):
     curve = 415
     self.mspsi_client = MSPSIClient(curve)
     self.mspsi_server = MSPSIServer(curve)
     super().__init__(tests)
class TestMSPSI(unittest.TestCase):
    def __init__(self, tests):
        curve = 415
        self.mspsi_client = MSPSIClient(curve)
        self.mspsi_server = MSPSIServer(curve)
        super().__init__(tests)

    def test_functionality(self):
        kwds = [['foo', 'bar', ''], ['foo', 'baz'], ['asdf']]
        (secret_server, published) = self.mspsi_server.publish(kwds)

        # Case where respectively 2, 1 and no keywords matches.
        (secret_client, query) = self.mspsi_client.query(['foo', ''])
        reply = self.mspsi_server.reply(secret_server, query)
        cards = self.mspsi_client.compute_cardinalities(
            secret_client, reply, published)

        for i, j in zip(cards, [2, 1, 0]):
            self.assertEqual(i, j)

        # Case where respectively 1, 1 and no keywords matches.
        (secret_client, query) = self.mspsi_client.query(['bar', 'baz'])
        reply = self.mspsi_server.reply(secret_server, query)
        cards = self.mspsi_client.compute_cardinalities(
            secret_client, reply, published)

        for i, j in zip(cards, [1, 1, 0]):
            self.assertEqual(i, j)

        # Case where respectively 0, 0 and 1 keywords matches.
        (secret_client, query) = self.mspsi_client.query(['asdf', 'ghjk'])
        reply = self.mspsi_server.reply(secret_server, query)
        cards = self.mspsi_client.compute_cardinalities(
            secret_client, reply, published)

        for i, j in zip(cards, [0, 0, 1]):
            self.assertEqual(i, j)

    def test_false_positives(self):
        # Random data generation with keywords known to be inside the corpus
        random.seed(0)

        # sets of documents are generated.
        kwds_in_doc_and_in_query = set([
            ''.join([random.choice(string.ascii_lowercase) for _ in range(16)])
            for _ in range(20)
        ])
        kwds_in_doc_not_in_query = set([
            ''.join([random.choice(string.ascii_lowercase) for _ in range(16)])
            for _ in range(1000)
        ])
        kwds_not_in_doc_in_query = set([
            ''.join([random.choice(string.ascii_lowercase) for _ in range(16)])
            for _ in range(1000)
        ])

        # Ensure there ate no intersection between these two sets.
        kwds_in_doc_not_in_query -= kwds_in_doc_and_in_query

        # Ensure there ate no intersection between this set and the two others.
        kwds_not_in_doc_in_query -= kwds_in_doc_and_in_query
        kwds_not_in_doc_in_query -= kwds_in_doc_not_in_query

        kwds_in_doc_and_in_query = list(kwds_in_doc_and_in_query)
        kwds_in_doc_not_in_query = list(kwds_in_doc_not_in_query)
        kwds_not_in_doc_in_query = list(kwds_not_in_doc_in_query)

        # generate documents
        docs = [
            kwds_in_doc_and_in_query +
            [random.choice(kwds_in_doc_not_in_query) for _ in range(100)]
            for _ in range(1000)
        ]

        # generates queries content.
        queries_full = [[
            random.choice(kwds_in_doc_and_in_query) for _ in range(10)
        ] for _ in range(1000)]
        queries_none = [[
            random.choice(kwds_not_in_doc_in_query) for _ in range(10)
        ] for _ in range(1000)]
        queries_50 = [
            ([random.choice(kwds_in_doc_and_in_query) for _ in range(5)] +
             [random.choice(kwds_not_in_doc_in_query) for _ in range(5)])
            for _ in range(1000)
        ]

        # Publication of the documents
        (secret_server, published) = self.mspsi_server.publish(docs)

        err_false_neg = 0
        err_false_pos = 0
        n_matches = 0

        for queries, expected, info_str in zip(
            (queries_full, queries_50, queries_none),
            ([10] * 10, [5] * 10, [0] * 10),
            ('\n===== Full Match =====', '\n===== 50% match ======',
             '\n===== 0% match =======')):
            print(info_str)
            for query in queries:
                n_matches += 1

                (secret_client, query) = self.mspsi_client.query(query)
                reply = self.mspsi_server.reply(secret_server, query)
                cards = self.mspsi_client.compute_cardinalities(
                    secret_client, reply, published)

                for i, j in zip(cards, expected):
                    if i != j:
                        if i > j:
                            n_false = i - j
                            print(
                                '{} false positive found (expected: {}, found: {})'
                                .format(n_false, j, i))
                            err_false_pos += n_false
                        else:
                            n_false = j - i
                            print(
                                '{} false negatives found (expected: {}, found: {})'
                                .format(n_false, j, i))
                            err_false_neg += n_false

        print(
            'A total of {} false negative and {} false positive were for {} queries of 10 keywords.'
            .format(err_false_neg, err_false_pos, n_matches))
예제 #7
0
class BenchmarkMSPSI:
    def __init__(self,
                 data,
                 number_docs_published,
                 number_kwds_per_doc,
                 number_kwds_per_query,
                 curve=DEFAULT_CURVE,
                 repetitions=10,
                 repetitions_publish=5):
        random.seed(0)

        self.data = data

        self.repetitions = repetitions
        self.repetitions_publish = repetitions_publish
        self.number_docs_published = number_docs_published
        self.number_kwds_per_doc = number_kwds_per_doc
        self.number_kwds_per_query = number_kwds_per_query

        self.kwds_published = [[(''.join((random.choice(string.ascii_lowercase)
                                          for _ in range(KEYWORD_LENGTH))))
                                for _ in range(number_kwds_per_doc)]
                               for _ in range(number_docs_published)]
        self.kwds_query = [[(''.join((random.choice(string.ascii_lowercase)
                                      for _ in range(KEYWORD_LENGTH))))
                            for _ in range(number_kwds_per_query)]
                           for _ in range(repetitions)]

        self.mspsi_client = MSPSIClient(curve)
        self.mspsi_server = MSPSIServer(curve)

    def publish(self):
        docs_published = [[[(''.join((random.choice(string.ascii_lowercase)
                                      for _ in range(KEYWORD_LENGTH))))
                            for _ in range(self.number_kwds_per_doc)]
                           for _ in range(self.number_docs_published)]
                          for _ in range(self.repetitions_publish)]

        times = []
        lengths = []
        for docs in docs_published:
            t0 = time.process_time()
            # The seacret is not releveant for this benchmark.
            _, published = self.mspsi_server.publish(docs)
            t1 = time.process_time()

            length = MSPSIServer.published_len(published)

            times.append(t1 - t0)
            lengths.append(length)

        self.data['publish'][self.number_docs_published][
            self.number_kwds_per_doc][0] = {
                'time': times,
                'length': lengths
            }

    def run(self):
        (secret_server,
         published) = self.mspsi_server.publish(self.kwds_published)

        times = []
        lengths = []
        queries = []
        for kwds in self.kwds_query:
            t0 = time.process_time()
            query = self.mspsi_client.query(kwds)
            t1 = time.process_time()

            times.append(t1 - t0)

            length = sum(map(lambda x: len(x), query[1]))
            lengths.append(length)

            queries.append(query)

        self.data['query'][self.number_docs_published][
            self.number_kwds_per_doc][self.number_kwds_per_query] = {
                'time': times,
                'length': lengths
            }

        times = []
        lengths = []
        replies = []
        for query in queries:
            t0 = time.process_time()
            reply = self.mspsi_server.reply(secret_server, query[1])
            t1 = time.process_time()

            length = sum(map(lambda x: len(x), reply))

            times.append(t1 - t0)
            lengths.append(length)
            replies.append(reply)

        self.data['reply'][self.number_docs_published][
            self.number_kwds_per_doc][self.number_kwds_per_query] = {
                'time': times,
                'length': lengths
            }

        times = []
        for i, reply in enumerate(replies):
            t0 = time.process_time()
            self.mspsi_client.compute_cardinalities(queries[i][0], reply,
                                                    published)
            t1 = time.process_time()

            times.append(t1 - t0)
            # Computing lengths meaningless for cardinalities. This data is not transferred.

        self.data['cardinality'][self.number_docs_published][
            self.number_kwds_per_doc][self.number_kwds_per_query] = {
                'time': times,
                'length': []
            }