def _stream2tempfiles( stream, jobs=1, chunksize=CHUNKSIZE, writer=_PICKLE_IO, **kwargs): """ Sort a stream of data into temporary files. Caller is responsible for deleting files. Tempfile paths are generated with `tempfile.mkstemp()`. Parameters ---------- stream : iter Input stream to sort. jobs : int, optional Sort data with a pool of N workers. chunksize : int, optional Process this many objects from the input stream in each job. Also the maximum amount of objects per tempfile. writer : None or tinysort.io.BaseSerializer, optional Instance of the serializer for writing the stream to disk. kwargs : **kwargs, optional Keyword arguments for `sorted()`. Returns ------- list Temporary file paths. """ tasks = ({ 'data': data, 'writer': writer, 'sort_args': kwargs } for data in tools.slicer(stream, chunksize)) with tools.runner(_mp_sort_into_tempfile, tasks, jobs) as run: return list(run)
def test_slicer_odd(): it = tools.slicer(range(5), 2) assert next(it) == (0, 1) assert next(it) == (2, 3) assert next(it) == (4, ) with pytest.raises(StopIteration): next(it)
def test_slicer_even(): it = tools.slicer(six.moves.xrange(100), 10) for idx, actual in enumerate(it): assert isinstance(actual, tuple) assert len(actual) == 10 # Verify that the values are correct assert actual == tuple((10 * idx) + i for i in range(len(actual))) assert idx == 9