def test_merge_sorted(): assert list(merge_sorted([1, 2, 3], [1, 2, 3])) == [1, 1, 2, 2, 3, 3] assert list(merge_sorted([1, 3, 5], [2, 4, 6])) == [1, 2, 3, 4, 5, 6] assert list(merge_sorted([1], [2, 4], [3], [])) == [1, 2, 3, 4] assert list(merge_sorted([5, 3, 1], [6, 4, 3], [], key=lambda x: -x)) == [6, 5, 4, 3, 3, 1] assert list(merge_sorted([2, 1, 3], [1, 2, 3], key=lambda x: x // 3)) == [2, 1, 1, 2, 3, 3] assert list(merge_sorted([2, 3], [1, 3], key=lambda x: x // 3)) == [2, 1, 3, 3] assert "".join(merge_sorted("abc", "abc", "abc")) == "aaabbbccc" assert "".join(merge_sorted("abc", "abc", "abc", key=ord)) == "aaabbbccc" assert "".join(merge_sorted("cba", "cba", "cba", key=lambda x: -ord(x))) == "cccbbbaaa" assert list(merge_sorted([1], [2, 3, 4], key=identity)) == [1, 2, 3, 4]
def test_merge_sorted(): assert list(merge_sorted([1, 2, 3], [1, 2, 3])) == [1, 1, 2, 2, 3, 3] assert list(merge_sorted([1, 3, 5], [2, 4, 6])) == [1, 2, 3, 4, 5, 6] assert list(merge_sorted([1], [2, 4], [3], [])) == [1, 2, 3, 4] assert list(merge_sorted([5, 3, 1], [6, 4, 3], [], key=lambda x: -x)) == [6, 5, 4, 3, 3, 1] assert list(merge_sorted([2, 1, 3], [1, 2, 3], key=lambda x: x // 3)) == [2, 1, 1, 2, 3, 3] assert list(merge_sorted([2, 3], [1, 3], key=lambda x: x // 3)) == [2, 1, 3, 3] assert ''.join(merge_sorted('abc', 'abc', 'abc')) == 'aaabbbccc' assert ''.join(merge_sorted('abc', 'abc', 'abc', key=ord)) == 'aaabbbccc' assert ''.join(merge_sorted('cba', 'cba', 'cba', key=lambda x: -ord(x))) == 'cccbbbaaa' assert list(merge_sorted([1], [2, 3, 4], key=identity)) == [1, 2, 3, 4]
def merge_sorted(self, blocks): bucket = self.dset.src.bucket( (self.dset.id, self.idx), self.dset.src.key, None, self.dset.src.block_size_mb, self.dset.src.memory_container, self.dset.src.disk_container ) def spill(nbytes): spilled = bucket.serialize(True) logger.debug('spilled %.2f mb', spilled / 1024 / 1024) return spilled bytes_received = 0 with self.dset.ctx.node.memory.async_release_helper(self.id, spill, priority=1) as memcheck: for block in blocks: bytes_received += block.size data = block.read() bucket.extend(data) memcheck() logger.debug('received block of %.2f mb, %r items', block.size / 1024 / 1024, len(data)) logger.info('sorting %.1f mb', bytes_received / 1024 / 1024) if not bucket.batches: return iter(bucket) else: streams = [bucket] for batch in bucket.batches: streams.append(chain.from_iterable(block.read() for block in reversed(batch))) return merge_sorted(*streams, key=self.dset.src.key)
def test_merge_sorted(): assert list(merge_sorted([1, 2, 3], [1, 2, 3])) == [1, 1, 2, 2, 3, 3] assert list(merge_sorted([1, 3, 5], [2, 4, 6])) == [1, 2, 3, 4, 5, 6] assert list(merge_sorted([1], [2, 4], [3], [])) == [1, 2, 3, 4] assert list(merge_sorted([5, 3, 1], [6, 4, 3], [], key=lambda x: -x)) == [6, 5, 4, 3, 3, 1] assert list(merge_sorted([2, 1, 3], [1, 2, 3], key=lambda x: x // 3)) == [2, 1, 1, 2, 3, 3] assert list(merge_sorted([2, 3], [1, 3], key=lambda x: x // 3)) == [2, 1, 3, 3] assert ''.join(merge_sorted('abc', 'abc', 'abc')) == 'aaabbbccc' assert ''.join(merge_sorted('abc', 'abc', 'abc', key=ord)) == 'aaabbbccc' assert ''.join(merge_sorted('cba', 'cba', 'cba', key=lambda x: -ord(x))) == 'cccbbbaaa'
def tasks(): for ep in merge_sorted(iter_entry_points('deadlines.tasks')): yield ep.load()
def target_classes(): for ep in merge_sorted(iter_entry_points('deadlines.targets')): yield ep
def test_merge_sorted(): assert list(merge_sorted([1, 2, 3], [1, 2, 3])) == [1, 1, 2, 2, 3, 3] assert list(merge_sorted([1, 3, 5], [2, 4, 6])) == [1, 2, 3, 4, 5, 6] assert list(merge_sorted([1], [2, 4], [3], [])) == [1, 2, 3, 4] assert list(merge_sorted([5, 3, 1], [6, 4, 3], [], key=lambda x: -x)) == [6, 5, 4, 3, 3, 1] assert list(merge_sorted([2, 1, 3], [1, 2, 3], key=lambda x: x // 3)) == [2, 1, 1, 2, 3, 3] assert list(merge_sorted([2, 3], [1, 3], key=lambda x: x // 3)) == [2, 1, 3, 3] assert ''.join(merge_sorted('abc', 'abc', 'abc')) == 'aaabbbccc' assert ''.join(merge_sorted('abc', 'abc', 'abc', key=ord)) == 'aaabbbccc' assert ''.join(merge_sorted('cba', 'cba', 'cba', key=lambda x: -ord(x))) == 'cccbbbaaa' assert list(merge_sorted([1], [2, 3, 4], key=identity)) == [1, 2, 3, 4] data = [[(1, 2), (0, 4), (3, 6)], [(5, 3), (6, 5), (8, 8)], [(9, 1), (9, 8), (9, 9)]] assert list(merge_sorted(*data, key=lambda x: x[1])) == [ (9, 1), (1, 2), (5, 3), (0, 4), (6, 5), (3, 6), (8, 8), (9, 8), (9, 9)]
def test_merge_sorted(): assert list(merge_sorted([1, 2, 3], [1, 2, 3])) == [1, 1, 2, 2, 3, 3] assert list(merge_sorted([1, 3, 5], [2, 4, 6])) == [1, 2, 3, 4, 5, 6] assert list(merge_sorted([1], [2, 4], [3], [])) == [1, 2, 3, 4] assert list(merge_sorted([5, 3, 1], [6, 4, 3], [], key=lambda x: -x)) == [ 6, 5, 4, 3, 3, 1, ] assert list(merge_sorted([2, 1, 3], [1, 2, 3], key=lambda x: x // 3)) == [ 2, 1, 1, 2, 3, 3, ] assert list(merge_sorted([2, 3], [1, 3], key=lambda x: x // 3)) == [2, 1, 3, 3] assert "".join(merge_sorted("abc", "abc", "abc")) == "aaabbbccc" assert "".join(merge_sorted("abc", "abc", "abc", key=ord)) == "aaabbbccc" assert ("".join(merge_sorted("cba", "cba", "cba", key=lambda x: -ord(x))) == "cccbbbaaa") assert list(merge_sorted([1], [2, 3, 4], key=identity)) == [1, 2, 3, 4] data = [ [(1, 2), (0, 4), (3, 6)], [(5, 3), (6, 5), (8, 8)], [(9, 1), (9, 8), (9, 9)], ] assert list(merge_sorted(*data, key=lambda x: x[1])) == [ (9, 1), (1, 2), (5, 3), (0, 4), (6, 5), (3, 6), (8, 8), (9, 8), (9, 9), ] assert list(merge_sorted()) == [] assert list(merge_sorted([1, 2, 3])) == [1, 2, 3] assert list(merge_sorted([1, 4, 5], [2, 3])) == [1, 2, 3, 4, 5] assert list(merge_sorted([1, 4, 5], [2, 3], key=identity)) == [1, 2, 3, 4, 5] assert list(merge_sorted([1, 5], [2], [4, 7], [3, 6], key=identity)) == [ 1, 2, 3, 4, 5, 6, 7, ]