Esempio n. 1
0
 def test_small_input_b(self):
     py_similarity = similarities.dice_coefficient_python(
         (self.filters1, self.filters2[:10]), self.default_threshold,
         self.default_k)
     c_similarity = similarities.dice_coefficient_accelerated(
         (self.filters1, self.filters2[:10]), self.default_threshold,
         self.default_k)
     self.assert_similarity_matrices_equal(py_similarity, c_similarity)
Esempio n. 2
0
 def test_same_score_k_none(self):
     cffi_cands = similarities.dice_coefficient_accelerated(
         self.filters, self.default_threshold, None)
     cffi_scores, _ = cffi_cands
     python_cands = similarities.dice_coefficient_python(
         self.filters, self.default_threshold, None)
     python_scores, _ = python_cands
     assert cffi_scores == python_scores
Esempio n. 3
0
 def test_same_score(self):
     c_cands = similarities.dice_coefficient_accelerated(
         self.filters, self.default_threshold, self.default_k)
     c_scores, _ = c_cands
     python_cands = similarities.dice_coefficient_python(
         self.filters, self.default_threshold, self.default_k)
     python_scores, _ = python_cands
     assert c_scores == python_scores
Esempio n. 4
0
    def test_not_multiple_of_64(self, k, threshold, bytes_n):
        datasets = [[bitarray('01001011') * bytes_n],
                    [bitarray('01001011') * bytes_n]]

        py_similarity = similarities.dice_coefficient_python(
            datasets, self.default_threshold, k)
        c_similarity = similarities.dice_coefficient_accelerated(datasets,
                                                                 threshold,
                                                                 k=k)
        self.assert_similarity_matrices_equal(py_similarity, c_similarity)
Esempio n. 5
0
 def test_memory_use(self):
     n = 10
     f1 = self.filters1[:n]
     f2 = self.filters2[:n]
     # If memory is not handled correctly, then this would allocate
     # several terabytes of RAM.
     big_k = 1 << 50
     py_similarity = similarities.dice_coefficient_python(
         (f1, f2), self.default_threshold, big_k)
     c_similarity = similarities.dice_coefficient_accelerated(
         (f1, f2), self.default_threshold, big_k)
     self.assert_similarity_matrices_equal(py_similarity, c_similarity)
Esempio n. 6
0
    def test_cffi_manual(self):
        nl = randomnames.NameList(30)
        s1, s2 = nl.generate_subsets(5, 1.0)
        keys = generate_key_lists(('test1', 'test2'), len(nl.schema_types))
        f1 = tuple(
            f[0]
            for f in bloomfilter.stream_bloom_filters(s1, keys, nl.SCHEMA))
        f2 = tuple(
            f[0]
            for f in bloomfilter.stream_bloom_filters(s2, keys, nl.SCHEMA))

        py_similarity = similarities.dice_coefficient_python(
            (f1, f2), self.default_threshold, self.default_k)
        c_similarity = similarities.dice_coefficient_accelerated(
            (f1, f2), self.default_threshold, self.default_k)
        self.assert_similarity_matrices_equal(py_similarity, c_similarity)
Esempio n. 7
0
 def test_cffi(self):
     similarity = similarities.dice_coefficient_accelerated(
         self.filters, self.default_threshold, self.default_k)
     self._check_proportion(similarity)
Esempio n. 8
0
 def test_not_multiple_of_64(self, k, threshold, bytes_n):
     datasets = [[bitarray('01001011') * bytes_n],
                 [bitarray('01001011') * bytes_n]]
     with pytest.raises(NotImplementedError):
         similarities.dice_coefficient_accelerated(datasets, threshold, k=k)
Esempio n. 9
0
 def test_not_multiple_of_8_raises(self, ):
     datasets = [[bitarray('010')], [bitarray('010')]]
     with pytest.raises(NotImplementedError):
         similarities.dice_coefficient_accelerated(
             datasets, threshold=self.default_threshold)