def test_generate_fingerprint(self): simhash = Simhash() simhash.update('This should work') expected = bitarray( '11100010001110100111100010101000101110111111110000010011000110000110001110000000100000111011101110111100110001011011110001011100' ) assert simhash.generate_fingerprint() == expected simhash.update('this will get added too!') expected = bitarray( '00000010000000000011110010100000101000001111100000000001010110000110101110111000100000110101000000010100100000000010110011010010' ) assert simhash.generate_fingerprint() == expected
def test_similarity_matching3(self): simhash1 = Simhash() simhash2 = Simhash() test_file1 = self.get_test_loc('fingerprint/similarity_matching5.py') test_file2 = self.get_test_loc('fingerprint/similarity_matching6.py') with open(test_file1, 'r') as f: hashable1 = f.read() with open(test_file2, 'r') as f: hashable2 = f.read() simhash1.update(hashable1) simhash2.update(hashable2) distance = simhash1.hamming_distance(simhash1.generate_fingerprint(), simhash2.generate_fingerprint()) assert distance == 13