예제 #1
0
 def test_dedupe_coverage(self):
     predicates = self.data_model.predicates()
     blocker = dedupe.blocking.Blocker(predicates)
     blocker.indexAll({i: x for i, x in enumerate(self.training_records)})
     coverage = training.Cover(blocker.predicates,
                               self.training)
     assert self.simple(coverage.keys()).issuperset(
         set(["SimplePredicate: (tokenFieldPredicate, name)",
              "SimplePredicate: (commonSixGram, name)",
              "TfidfTextCanopyPredicate: (0.4, name)",
              "SimplePredicate: (sortedAcronym, name)",
              "SimplePredicate: (sameThreeCharStartPredicate, name)",
              "TfidfTextCanopyPredicate: (0.2, name)",
              "SimplePredicate: (sameFiveCharStartPredicate, name)",
              "TfidfTextCanopyPredicate: (0.6, name)",
              "SimplePredicate: (wholeFieldPredicate, name)",
              "TfidfTextCanopyPredicate: (0.8, name)",
              "SimplePredicate: (commonFourGram, name)",
              "SimplePredicate: (firstTokenPredicate, name)",
              "SimplePredicate: (sameSevenCharStartPredicate, name)"]))
예제 #2
0
    def test_compound(self):
        start = training.Cover({1: {1, 2, 3}, 2: {1, 2}, 3: {2}, 4: {5}})
        before = start.copy()
        after = before.copy()
        after.update({(1, 2): {1, 2}, (1, 3): {2}, (2, 3): {2}})

        before.compound(2)
        assert before == after

        before = start.copy()
        after = start.copy()
        after.update({
            (1, 2): {1, 2},
            (1, 3): {2},
            (2, 3): {2},
            (1, 2, 3): {2}
        })

        before.compound(3)

        assert before == after
예제 #3
0
    def test_covered_pairs(self):
        p1 = lambda x, target=None: (1, )  # noqa: E 731

        cover = training.Cover((p1, ), [('a', 'b')] * 2)

        assert cover[p1] == {0, 1}