def test_small_input_b(self): py_similarity = similarities.dice_coefficient_python( (self.filters1, self.filters2[:10]), self.default_threshold, self.default_k) c_similarity = similarities.dice_coefficient_accelerated( (self.filters1, self.filters2[:10]), self.default_threshold, self.default_k) self.assert_similarity_matrices_equal(py_similarity, c_similarity)
def test_same_score_k_none(self): cffi_cands = similarities.dice_coefficient_accelerated( self.filters, self.default_threshold, None) cffi_scores, _ = cffi_cands python_cands = similarities.dice_coefficient_python( self.filters, self.default_threshold, None) python_scores, _ = python_cands assert cffi_scores == python_scores
def test_same_score(self): c_cands = similarities.dice_coefficient_accelerated( self.filters, self.default_threshold, self.default_k) c_scores, _ = c_cands python_cands = similarities.dice_coefficient_python( self.filters, self.default_threshold, self.default_k) python_scores, _ = python_cands assert c_scores == python_scores
def test_not_multiple_of_64(self, k, threshold, bytes_n): datasets = [[bitarray('01001011') * bytes_n], [bitarray('01001011') * bytes_n]] py_similarity = similarities.dice_coefficient_python( datasets, self.default_threshold, k) c_similarity = similarities.dice_coefficient_accelerated(datasets, threshold, k=k) self.assert_similarity_matrices_equal(py_similarity, c_similarity)
def test_memory_use(self): n = 10 f1 = self.filters1[:n] f2 = self.filters2[:n] # If memory is not handled correctly, then this would allocate # several terabytes of RAM. big_k = 1 << 50 py_similarity = similarities.dice_coefficient_python( (f1, f2), self.default_threshold, big_k) c_similarity = similarities.dice_coefficient_accelerated( (f1, f2), self.default_threshold, big_k) self.assert_similarity_matrices_equal(py_similarity, c_similarity)
def test_cffi_manual(self): nl = randomnames.NameList(30) s1, s2 = nl.generate_subsets(5, 1.0) keys = generate_key_lists(('test1', 'test2'), len(nl.schema_types)) f1 = tuple( f[0] for f in bloomfilter.stream_bloom_filters(s1, keys, nl.SCHEMA)) f2 = tuple( f[0] for f in bloomfilter.stream_bloom_filters(s2, keys, nl.SCHEMA)) py_similarity = similarities.dice_coefficient_python( (f1, f2), self.default_threshold, self.default_k) c_similarity = similarities.dice_coefficient_accelerated( (f1, f2), self.default_threshold, self.default_k) self.assert_similarity_matrices_equal(py_similarity, c_similarity)
def test_python(self): similarity = similarities.dice_coefficient_python( self.filters, self.default_threshold, self.default_k) self._check_proportion(similarity)