def test_dedupe_coverage(self) : predicates = self.data_model.predicates() blocker = dedupe.blocking.Blocker(predicates) blocker.indexAll({i : x for i, x in enumerate(self.training_records)}) coverage = training.coveredPairs(blocker.predicates, self.training) assert self.simple(coverage.keys()).issuperset( set(["SimplePredicate: (tokenFieldPredicate, name)", "SimplePredicate: (commonSixGram, name)", "TfidfTextCanopyPredicate: (0.4, name)", "SimplePredicate: (sortedAcronym, name)", "SimplePredicate: (sameThreeCharStartPredicate, name)", "TfidfTextCanopyPredicate: (0.2, name)", "SimplePredicate: (sameFiveCharStartPredicate, name)", "TfidfTextCanopyPredicate: (0.6, name)", "SimplePredicate: (wholeFieldPredicate, name)", "TfidfTextCanopyPredicate: (0.8, name)", "SimplePredicate: (commonFourGram, name)", "SimplePredicate: (firstTokenPredicate, name)", "SimplePredicate: (sameSevenCharStartPredicate, name)"]))
def test_covered_pairs(self): p1 = lambda x : (1,) cover = training.coveredPairs((p1,), [('a', 'b')]*2) assert cover[p1] == {0, 1}
def test_covered_pairs(self): p1 = lambda x, target=None: (1, ) # noqa: E 731 cover = training.coveredPairs((p1, ), [('a', 'b')] * 2) assert cover[p1] == {0, 1}