def test_dedupe_coverage(self): predicates = self.data_model.predicates() blocker = dedupe.blocking.Blocker(predicates) blocker.indexAll({i: x for i, x in enumerate(self.training_records)}) coverage = training.Cover(blocker.predicates, self.training) assert self.simple(coverage.keys()).issuperset( set(["SimplePredicate: (tokenFieldPredicate, name)", "SimplePredicate: (commonSixGram, name)", "TfidfTextCanopyPredicate: (0.4, name)", "SimplePredicate: (sortedAcronym, name)", "SimplePredicate: (sameThreeCharStartPredicate, name)", "TfidfTextCanopyPredicate: (0.2, name)", "SimplePredicate: (sameFiveCharStartPredicate, name)", "TfidfTextCanopyPredicate: (0.6, name)", "SimplePredicate: (wholeFieldPredicate, name)", "TfidfTextCanopyPredicate: (0.8, name)", "SimplePredicate: (commonFourGram, name)", "SimplePredicate: (firstTokenPredicate, name)", "SimplePredicate: (sameSevenCharStartPredicate, name)"]))
def test_compound(self): start = training.Cover({1: {1, 2, 3}, 2: {1, 2}, 3: {2}, 4: {5}}) before = start.copy() after = before.copy() after.update({(1, 2): {1, 2}, (1, 3): {2}, (2, 3): {2}}) before.compound(2) assert before == after before = start.copy() after = start.copy() after.update({ (1, 2): {1, 2}, (1, 3): {2}, (2, 3): {2}, (1, 2, 3): {2} }) before.compound(3) assert before == after
def test_covered_pairs(self): p1 = lambda x, target=None: (1, ) # noqa: E 731 cover = training.Cover((p1, ), [('a', 'b')] * 2) assert cover[p1] == {0, 1}