Beispiel #1
0
    def _blockData(self, data_d):

        blocks = core.TempShelve('blocks')

        if not self.loaded_indices:
            self.blocker.indexAll(data_d)

        block_groups = itertools.groupby(self.blocker(viewitems(data_d)),
                                         lambda x: x[1])

        for record_id, block in block_groups:
            record = data_d[record_id]
            block_ids = sorted(block_key for block_key, _ in block)
            while block_ids:
                id = block_ids.pop()
                if id in blocks:
                    blocks[id] += [(record_id, record, set(block_ids))]
                else:
                    blocks[id] = [(record_id, record, set(block_ids))]

        if not self.loaded_indices:
            self.blocker.resetIndices()

        for block in viewvalues(blocks):
            if len(block) > 1:
                yield block

        blocks.close()
Beispiel #2
0
    def _blockData(self, data_1, data_2):

        blocked_records = core.TempShelve('blocked_records')

        if not self.loaded_indices:
            self.blocker.indexAll(data_2)

        for block_key, record_id in self.blocker(data_2.items(), target=True):
            block = blocked_records.get(block_key, {})
            block[record_id] = data_2[record_id]
            blocked_records[block_key] = block

        for each in self._blockGenerator(data_1, blocked_records):
            yield each

        blocked_records.close()