Example #1
0
    def _blockedPairs(self, blocks):
        """
        Generate tuples of pairs of records from a block of records

        Arguments:

        blocks -- an iterable sequence of blocked records
        """

        block, blocks = core.peek(blocks)
        self._checkBlock(block)

        product = itertools.product

        pairs = (product(base, target) for base, target in blocks)

        return itertools.chain.from_iterable(pairs)
Example #2
0
    def _blockedPairs(self, blocks):
        """
        Generate tuples of pairs of records from a block of records

        Arguments:

        blocks -- an iterable sequence of blocked records
        """

        block, blocks = core.peek(blocks)
        self._checkBlock(block)

        combinations = itertools.combinations

        pairs = (combinations(sorted(block), 2) for block in blocks)

        return itertools.chain.from_iterable(pairs)
Example #3
0
File: api.py Project: cojito/dedupe
    def _blockedPairs(self, blocks) :
        """
        Generate tuples of pairs of records from a block of records
        
        Arguments:
        
        blocks -- an iterable sequence of blocked records
        """
        
        block, blocks = core.peek(blocks)
        self._checkBlock(block)

        def pair_gen() :
            for block in blocks :
                for pair in self._blockPairs(block) :
                    yield pair

        return pair_gen()
Example #4
0
    def _blockedPairs(self, blocks):
        """
        Generate tuples of pairs of records from a block of records
        
        Arguments:
        
        blocks -- an iterable sequence of blocked records
        """

        block, blocks = core.peek(blocks)
        self._checkBlock(block)

        def pair_gen():
            disjoint = set.isdisjoint
            blockPairs = self._blockPairs
            for block in blocks:
                for pair in blockPairs(block):
                    ((key_1, record_1, smaller_ids_1), (key_2, record_2, smaller_ids_2)) = pair
                    if disjoint(smaller_ids_1, smaller_ids_2):
                        yield (key_1, record_1), (key_2, record_2)

        return pair_gen()