Beispiel #1
0
def _stream2tempfiles(
        stream, jobs=1, chunksize=CHUNKSIZE, writer=_PICKLE_IO, **kwargs):

    """
    Sort a stream of data into temporary files.  Caller is responsible for
    deleting files.  Tempfile paths are generated with `tempfile.mkstemp()`.

    Parameters
    ----------
    stream : iter
        Input stream to sort.
    jobs : int, optional
        Sort data with a pool of N workers.
    chunksize : int, optional
        Process this many objects from the input stream in each job.  Also
        the maximum amount of objects per tempfile.
    writer : None or tinysort.io.BaseSerializer, optional
        Instance of the serializer for writing the stream to disk.
    kwargs : **kwargs, optional
        Keyword arguments for `sorted()`.

    Returns
    -------
    list
        Temporary file paths.
    """

    tasks = ({
        'data': data,
        'writer': writer,
        'sort_args': kwargs
    } for data in tools.slicer(stream, chunksize))

    with tools.runner(_mp_sort_into_tempfile, tasks, jobs) as run:
        return list(run)
Beispiel #2
0
def test_slicer_odd():

    it = tools.slicer(range(5), 2)
    assert next(it) == (0, 1)
    assert next(it) == (2, 3)
    assert next(it) == (4, )
    with pytest.raises(StopIteration):
        next(it)
Beispiel #3
0
def test_slicer_even():
    it = tools.slicer(six.moves.xrange(100), 10)
    for idx, actual in enumerate(it):

        assert isinstance(actual, tuple)
        assert len(actual) == 10

        # Verify that the values are correct
        assert actual == tuple((10 * idx) + i for i in range(len(actual)))

    assert idx == 9