Esempio n. 1
0
 def test_dedupe_coverage(self) :
     predicates = self.data_model.predicates()
     blocker = dedupe.blocking.Blocker(predicates)
     blocker.indexAll({i : x for i, x in enumerate(self.training_records)})
     coverage = training.coveredPairs(blocker.predicates,
                                             self.training)
     assert self.simple(coverage.keys()).issuperset(
           set(["SimplePredicate: (tokenFieldPredicate, name)", 
                "SimplePredicate: (commonSixGram, name)", 
                "TfidfTextCanopyPredicate: (0.4, name)", 
                "SimplePredicate: (sortedAcronym, name)",
                "SimplePredicate: (sameThreeCharStartPredicate, name)", 
                "TfidfTextCanopyPredicate: (0.2, name)", 
                "SimplePredicate: (sameFiveCharStartPredicate, name)", 
                "TfidfTextCanopyPredicate: (0.6, name)", 
                "SimplePredicate: (wholeFieldPredicate, name)", 
                "TfidfTextCanopyPredicate: (0.8, name)", 
                "SimplePredicate: (commonFourGram, name)", 
                "SimplePredicate: (firstTokenPredicate, name)", 
                "SimplePredicate: (sameSevenCharStartPredicate, name)"]))
Esempio n. 2
0
    def test_covered_pairs(self):
        p1 = lambda x : (1,)
        
        cover = training.coveredPairs((p1,), [('a', 'b')]*2)

        assert cover[p1] == {0, 1}
    def test_covered_pairs(self):
        p1 = lambda x, target=None: (1, )  # noqa: E 731

        cover = training.coveredPairs((p1, ), [('a', 'b')] * 2)

        assert cover[p1] == {0, 1}