Esempio n. 1
0
 def test_hamming(self):
     """Correctly calculate Hamming distances between numbers"""
     for _ in range(100):
         num1 = random.randint(0, sys.maxint)
         num2 = random.randint(0, sys.maxint)
         self.assertEqual(hamming(num1, num2),
                          hamming_from_iter(bitlist(num1), bitlist(num2)))
Esempio n. 2
0
 def test_hamming(self):
     """Correctly calculate Hamming distances between numbers"""
     for _ in range(100):
         num1 = random.randint(0, sys.maxint)
         num2 = random.randint(0, sys.maxint)
         self.assertEqual(hamming(num1, num2),
                          hamming_from_iter(bitlist(num1), bitlist(num2)))
Esempio n. 3
0
    def test_simhash_similarity(self):
        """Signatures should be consistent"""
        sh = SimHashSignature(64)

        sig1 = sh.get_signature("abracadabra")
        sig2 = sh.get_signature("abracadabra")
        dist = hamming(sig1, sig2)
        self.assertEqual(dist, 0)

        sig1 = sh.get_signature("abracadabra")
        sig2 = sh.get_signature("arbcd")
        dist = hamming(sig1, sig2)
        self.assertEqual(dist, 13)

        sig1 = sh.get_signature("abracadabra")
        sig2 = sh.get_signature("")
        dist = hamming(sig1, sig2)
        self.assertEqual(dist, 32)
Esempio n. 4
0
    def test_simhash_feature_weights(self):
        """Features should be weighted and should contribute to
        similarity/distance
        """
        sh = SimHashSignature(64)

        sig1 = sh.get_signature("abracadabra")
        sig2 = sh.get_signature("abracdabra")
        dist = hamming(sig1, sig2)
        self.assertEqual(dist, 3)

        sig1 = sh.get_signature("abracadabra", ("cats", 0))
        sig2 = sh.get_signature("abracdabra", ("dogs", 0))
        dist = hamming(sig1, sig2)
        self.assertEqual(dist, 3)

        sig1 = sh.get_signature("abracadabra", ("cats", 0))
        sig2 = sh.get_signature("abracadabra", ("dogs", 0))
        dist = hamming(sig1, sig2)
        self.assertEqual(dist, 0)

        sig1 = sh.get_signature("abracadabra", ("ca", 4))
        sig2 = sh.get_signature("abracadabra", ("do", 4))
        dist = hamming(sig1, sig2)
        self.assertEqual(dist, 9)

        sig1 = sh.get_signature("abracadabra", ("ca", 5))
        sig2 = sh.get_signature("abracadabra", ("do", 5))
        dist = hamming(sig1, sig2)
        self.assertEqual(dist, 9)

        sig1 = sh.get_signature("abracadabra", ("cats", 200))
        sig2 = sh.get_signature("abracadabra", ("dogs", 200))
        dist = hamming(sig1, sig2)
        self.assertEqual(dist, 21)

        sig1 = sh.get_signature("abracadabra", ("cats", 10))
        sig2 = sh.get_signature("abracadabra", ("cats", 10))
        dist = hamming(sig1, sig2)
        self.assertEqual(dist, 0)
Esempio n. 5
0
 def test_simhash_similarity_3(self):
     sh = SimHashSignature(64)
     sig1 = sh.get_signature("abracadabra")
     sig2 = sh.get_signature("")
     dist = hamming(sig1, sig2)
     self.assertEqual(37, dist)
Esempio n. 6
0
 def test_simhash_feature_weights_7(self):
     sh = SimHashSignature(64)
     sig1 = sh.get_signature("abracadabra", ("cats", 10))
     sig2 = sh.get_signature("abracadabra", ("cats", 10))
     dist = hamming(sig1, sig2)
     self.assertEqual(0, dist)
Esempio n. 7
0
 def test_simhash_feature_weights_1(self):
     sh = SimHashSignature(64)
     sig1 = sh.get_signature("abracadabra")
     sig2 = sh.get_signature("abracdabra")
     dist = hamming(sig1, sig2)
     self.assertEqual(3, dist)
Esempio n. 8
0
 def test_minhash_sketch_similarity_3(self):
     sh = MinHashSketchSignature(64)
     sig1 = sh.get_signature("abracadabra")
     sig2 = sh.get_signature("")
     dist = hamming(sig1, sig2)
     self.assertEqual(32, dist)