Esempio n. 1
0
 def setUp(self):
     ascii_hashing = FieldHashingProperties(
         encoding='ascii',
         comparator=get_comparator({
             'type': 'ngram',
             'n': 2
         }),
         strategy=BitsPerTokenStrategy(20))
     self.fields = [
         StringSpec(identifier='given name',
                    case='lower',
                    min_length=1,
                    max_length=None,
                    hashing_properties=ascii_hashing),
         StringSpec(identifier='surname',
                    case='upper',
                    min_length=1,
                    max_length=None,
                    hashing_properties=ascii_hashing),
         StringSpec(identifier='email address',
                    regex=r'.+@.+\..+',
                    hashing_properties=ascii_hashing),
         IntegerSpec(identifier='age',
                     minimum=18,
                     maximum=99,
                     hashing_properties=ascii_hashing),
         DateSpec(identifier='join date',
                  format='%Y-%m-%d',
                  hashing_properties=ascii_hashing),
         EnumSpec(identifier='account type',
                  values=['free', 'paid'],
                  hashing_properties=ascii_hashing)
     ]
Esempio n. 2
0
    def test_compare_to_legacy(self):
        # Identifier: 'ANY freetext'

        fhp = FieldHashingProperties(ngram=2, hash_type='doubleHash', k=10)

        schema = Schema(
            l=1024,
            kdf_info=base64.b64decode('c2NoZW1hX2V4YW1wbGU='),
            kdf_key_size=64,
            kdf_salt=base64.b64decode(
                'SCbL2zHNnmsckfzchsNkZY9XoHk96P'
                '/G5nUBrM7ybymlEFsMV6PAeDZCNp3rfNUPCtLDMOGQHG4pCQpfhiHCyA=='),
            fields=[
                StringSpec(identifier='ANY text {}'.format(i + 1),
                           hashing_properties=fhp) for i in range(4)
            ])

        row = ['Bobby', 'Bobby', 'Bobby', 'Bobby']
        master_secrets = [
            'No, I am your father'.encode(),
            "No... that's not true! That's impossible!".encode()
        ]
        keys_hkdf = generate_key_lists(master_secrets, len(row), kdf='HKDF')
        keys_legacy = generate_key_lists(master_secrets,
                                         len(row),
                                         kdf='legacy')
        bloom_hkdf = next(stream_bloom_filters([row], keys_hkdf, schema))
        bloom_legacy = next(stream_bloom_filters([row], keys_legacy, schema))
        hkdf_count = bloom_hkdf[0].count()
        legacy_count = bloom_legacy[0].count()
        # lecay will map the 4 Bobbys' to the same bits, whereas hkdf will
        # map each Bobby to different bits.
        self.assertLessEqual(legacy_count, fhp.k * 6)  # 6 bi-grams
        self.assertLess(legacy_count, hkdf_count)
        self.assertLessEqual(hkdf_count, len(row) * legacy_count)
Esempio n. 3
0
 def test_different_weights(self):
     schema = Schema(
         version=1,
         hashing_globals=GlobalHashingProperties(
             k=30,
             kdf_hash='SHA256',
             kdf_info=base64.b64decode('c2NoZW1hX2V4YW1wbGU='),
             kdf_key_size=64,
             kdf_salt=base64.b64decode(
                 'SCbL2zHNnmsckfzchsNkZY9XoHk96P/G5nUBrM7ybymlEFsMV6PAeDZCNp3rfNUPCtLDMOGQHG4pCQpfhiHCyA=='
             ),
             kdf_type='HKDF',
             l=1024,
             hash_type='blakeHash',
             xor_folds=0,
         ),
         fields=[
             StringSpec(
                 identifier='some info',
                 hashing_properties=FieldHashingProperties(
                     encoding=FieldHashingProperties._DEFAULT_ENCODING,
                     ngram=2,
                     positional=False,
                     weight=1),
                 description=None,
                 case=StringSpec._DEFAULT_CASE,
                 min_length=0,
                 max_length=None)
         ])
Esempio n. 4
0
 def test_different_weights(self):
     schema = Schema(
         l=1024,
         xor_folds=0,
         kdf_hash='SHA256',
         kdf_info=base64.b64decode('c2NoZW1hX2V4YW1wbGU='),
         kdf_key_size=64,
         kdf_salt=base64.b64decode(
             'SCbL2zHNnmsckfzchsNkZY9XoHk96P'
             '/G5nUBrM7ybymlEFsMV6PAeDZCNp3rfNUPCtLDMOGQHG4pCQpfhiHCyA=='),
         kdf_type='HKDF',
         fields=[
             StringSpec(
                 identifier='some info',
                 hashing_properties=FieldHashingProperties(
                     encoding=FieldHashingProperties._DEFAULT_ENCODING,
                     comparator=bigram_tokenizer,
                     strategy=BitsPerTokenStrategy(20)
                 ),
                 description=None,
                 case=StringSpec._DEFAULT_CASE,
                 min_length=0,
                 max_length=None
             )
         ]
     )
Esempio n. 5
0
 def mkSchema(hashing_properties):
     return Schema(l=1024,
                   xor_folds=1,
                   kdf_type='HKDF',
                   kdf_hash='SHA256',
                   kdf_salt=base64.b64decode(
                       'SCbL2zHNnmsckfzchsNkZY9XoHk96P'
                       '/G5nUBrM7ybymlEFsMV6PAeDZCNp3r'
                       'fNUPCtLDMOGQHG4pCQpfhiHCyA=='),
                   kdf_info=base64.b64decode('c2NoZW1hX2V4YW1wbGU='),
                   kdf_key_size=64,
                   fields=[
                       StringSpec(identifier='name',
                                  hashing_properties=hashing_properties,
                                  description=None,
                                  case=StringSpec._DEFAULT_CASE,
                                  min_length=1,
                                  max_length=50)
                   ])
Esempio n. 6
0
 def test_compare_to_legacy(self):
     # Identifier: 'ANY freetext'
     schema = Schema(
         version=1,
         hashing_globals=GlobalHashingProperties(
             k=10,
             kdf_hash='SHA256',
             kdf_info=base64.b64decode('c2NoZW1hX2V4YW1wbGU='),
             kdf_key_size=64,
             kdf_salt=base64.b64decode(
                 'SCbL2zHNnmsckfzchsNkZY9XoHk96P/G5nUBrM7ybymlEFsMV6PAeDZCNp3rfNUPCtLDMOGQHG4pCQpfhiHCyA=='
             ),
             kdf_type='HKDF',
             l=1024,
             hash_type='doubleHash',
             hash_prevent_singularity=False,
             xor_folds=0),
         fields=[
             StringSpec(
                 identifier='ANY text 1',
                 hashing_properties=FieldHashingProperties(
                     encoding=FieldHashingProperties._DEFAULT_ENCODING,
                     ngram=2,
                     positional=False,
                     weight=1),
                 description=None,
                 case=StringSpec._DEFAULT_CASE,
                 min_length=0,
                 max_length=None),
             StringSpec(
                 identifier='ANY text 2',
                 hashing_properties=FieldHashingProperties(
                     encoding=FieldHashingProperties._DEFAULT_ENCODING,
                     ngram=2,
                     positional=False,
                     weight=1),
                 description=None,
                 case=StringSpec._DEFAULT_CASE,
                 min_length=0,
                 max_length=None),
             StringSpec(
                 identifier='ANY text 3',
                 hashing_properties=FieldHashingProperties(
                     encoding=FieldHashingProperties._DEFAULT_ENCODING,
                     ngram=2,
                     positional=False,
                     weight=1),
                 description=None,
                 case=StringSpec._DEFAULT_CASE,
                 min_length=0,
                 max_length=None),
             StringSpec(
                 identifier='ANY text 4',
                 hashing_properties=FieldHashingProperties(
                     encoding=FieldHashingProperties._DEFAULT_ENCODING,
                     ngram=2,
                     positional=False,
                     weight=1),
                 description=None,
                 case=StringSpec._DEFAULT_CASE,
                 min_length=0,
                 max_length=None)
         ])