Beispiel #1
0
 def test_process_shingles1(self):
     simhash = Simhash()
     weighted_hash = [1] * HASH_LENGTH
     expected = [
         0, 0, 2, 2, 0, 0, 0, 2, 2, 2, 2, 2, 0, 2, 0, 0, 0, 2, 2, 2, 0, 0,
         2, 2, 0, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 0, 2, 2, 0, 0, 0, 0, 2, 2,
         0, 0, 2, 0, 2, 0, 0, 0, 2, 2, 2, 0, 0, 0, 2, 0, 0, 0, 0, 0, 2, 0,
         2, 0, 0, 2, 2, 2, 2, 2, 0, 2, 2, 0, 2, 2, 0, 0, 2, 0, 0, 0, 0, 0,
         2, 0, 0, 2, 0, 2, 2, 2, 2, 0, 2, 0, 0, 0, 0, 2, 0, 0, 0, 2, 2, 2,
         0, 2, 0, 0, 0, 0, 2, 0, 0, 2, 0, 0, 0, 0, 0, 2, 0, 0
     ]
     result = simhash.process_shingles('Thisisfortesting', weighted_hash)
     assert result == expected
Beispiel #2
0
 def test_process_shingles3(self):
     simhash = Simhash()
     weighted_hash = [
         0, 0, 2, 2, 0, 0, 0, 2, 2, 2, 2, 2, 0, 2, 0, 0, 0, 2, 2, 2, 0, 0,
         2, 2, 0, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 0, 2, 2, 0, 0, 0, 0, 2, 2,
         0, 0, 2, 0, 2, 0, 0, 0, 2, 2, 2, 0, 0, 0, 2, 0, 0, 0, 0, 0, 2, 0,
         2, 0, 0, 2, 2, 2, 2, 2, 0, 2, 2, 0, 2, 2, 0, 0, 2, 0, 0, 0, 0, 0,
         2, 0, 0, 2, 0, 2, 2, 2, 2, 0, 2, 0, 0, 0, 0, 2, 0, 0, 0, 2, 2, 2,
         0, 2, 0, 0, 0, 0, 2, 0, 0, 2, 0, 0, 0, 0, 0, 2, 0, 0
     ]
     expected = [
         1, -1, 1, 1, -1, -1, 1, 3, 1, 1, 1, 1, -1, 1, -1, -1, 1, 1, 1, 1,
         1, 1, 1, 1, 1, 1, 3, 3, 1, 1, 1, 3, 1, 3, 3, 1, 1, 3, -1, 1, 1, 1,
         1, 3, 1, 1, 3, 1, 1, -1, 1, -1, 3, 1, 1, -1, 1, 1, 1, -1, -1, 1, 1,
         -1, 1, 1, 1, 1, -1, 1, 3, 1, 1, 1, 1, 1, 3, 1, 1, 3, -1, 1, 3, -1,
         -1, -1, -1, 1, 1, 1, 1, 1, -1, 3, 3, 3, 3, 1, 1, -1, 1, 1, -1, 3,
         1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, 3, 1, -1, 1, -1, 1,
         1, -1, 1
     ]
     result = simhash.process_shingles('for(inti=0;i<n;i++)', weighted_hash)
     assert result == expected
Beispiel #3
0
 def test_process_shingles2(self):
     simhash = Simhash()
     weighted_hash = [
         0, 0, 2, 2, 0, 0, 0, 2, 2, 2, 2, 2, 0, 2, 0, 0, 0, 2, 2, 2, 0, 0,
         2, 2, 0, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 0, 2, 2, 0, 0, 0, 0, 2, 2,
         0, 0, 2, 0, 2, 0, 0, 0, 2, 2, 2, 0, 0, 0, 2, 0, 0, 0, 0, 0, 2, 0,
         2, 0, 0, 2, 2, 2, 2, 2, 0, 2, 2, 0, 2, 2, 0, 0, 2, 0, 0, 0, 0, 0,
         2, 0, 0, 2, 0, 2, 2, 2, 2, 0, 2, 0, 0, 0, 0, 2, 0, 0, 0, 2, 2, 2,
         0, 2, 0, 0, 0, 0, 2, 0, 0, 2, 0, 0, 0, 0, 0, 2, 0, 0
     ]
     expected = [
         1, 1, 3, 1, -1, 1, 1, 3, 1, 1, 1, 3, 1, 1, 1, -1, -1, 1, 3, 3, 1,
         1, 3, 1, 1, 3, 3, 3, 1, 1, 3, 1, 1, 3, 3, -1, 1, 1, -1, -1, 1, -1,
         3, 1, 1, -1, 3, 1, 3, 1, 1, -1, 3, 3, 1, 1, 1, 1, 3, 1, -1, 1, -1,
         1, 1, -1, 3, 1, 1, 3, 1, 1, 1, 3, 1, 1, 3, -1, 3, 1, 1, 1, 3, -1,
         1, -1, 1, 1, 1, -1, 1, 3, 1, 1, 1, 1, 3, 1, 1, -1, 1, 1, -1, 1, -1,
         1, -1, 1, 3, 3, -1, 1, -1, 1, -1, -1, 3, -1, 1, 3, 1, 1, -1, 1, 1,
         3, 1, -1
     ]
     result = simhash.process_shingles('tryforanotherone', weighted_hash)
     assert result == expected