Exemple #1
0
def execute(partial_func: partial,
            num_operations: int,
            progress=None,
            msg: str = '',
            cores=None):
    """
    Executes a function in parallel with shared memory between the processes.

    The array must have been created using
    parallel.utility.create_shared_array(shape, dtype).

    If the input array IS NOT a shared array, the data will NOT BE CHANGED!

    The reason for that is that the processes don't work on the data, but on a
    copy.

    When they process it and return the result, THE RESULT IS NOT ASSIGNED BACK
    TO REPLACE THE ORIGINAL, it is discarded.

    - imap_unordered gives the images back in random order
    - map and map_async do not improve speed performance
    - imap seems to be the best choice

    Using _ in the for _ enumerate is slightly faster, because the tuple
    from enumerate isn't unpacked, and thus some time is saved.

    From performance tests, the chunksize doesn't seem to make much of a
    difference, but having larger chunks usually led to slower performance:

    Shape: (50,512,512)
    1 chunk 3.06s
    2 chunks 3.05s
    3 chunks 3.07s
    4 chunks 3.06s
    5 chunks 3.16s
    6 chunks 3.06s
    7 chunks 3.058s
    8 chunks 3.25s
    9 chunks 3.45s

    :param partial_func: A function constructed using create_partial
    :param num_operations: The expected number of operations - should match the number of images being processed
                           Also used to set the number of progress steps
    :param cores: number of cores that the processing will use
    :param progress: Progress instance to use for progress reporting (optional)
    :param msg: Message to be shown on the progress bar
    :return:
    """

    if not cores:
        cores = pu.get_cores()

    chunksize = pu.calculate_chunksize(cores)

    pu.execute_impl(num_operations, partial_func, cores, chunksize, progress,
                    msg)

    global shared_list
    shared_list = []
Exemple #2
0
def test_execute_impl_par(mock_pool):
    mock_partial = mock.Mock()
    mock_progress = mock.Mock()
    mock_pool_instance = mock.Mock()
    mock_pool_instance.imap.return_value = range(15)
    mock_pool.return_value.__enter__.return_value = mock_pool_instance
    execute_impl(15, mock_partial, 10, 1, mock_progress, "Test")
    mock_pool_instance.imap.assert_called_once()
    assert mock_progress.update.call_count == 15
def execute(data=None,
            partial_func=None,
            cores=None,
            chunksize=None,
            progress=None,
            msg: str = ''):
    """
    Executes a function in parallel with shared memory between the processes.

    The array MUST HAVE BEEN created using
    parallel.utility.create_shared_array(shape, dtype).

    If the input array IS NOT a shared array, the data will NOT BE CHANGED!
    The reason for that is that the processes don't work on the data, but on a
    copy.

    When they process it and return the result, THE RESULT IS NOT ASSIGNED BACK
    TO REPLACE THE ORIGINAL, it is discarded.

    Function choice for iterating over the data:
        - imap_unordered gives the images back in random order!
        - map and map_async cannot replace the data in place and end up
          doubling the memory. They do not improve speed performance either
        - imap seems to be the best choice

    Using _ in the for _ enumerate is slightly faster, because the tuple
    from enumerate isn't unpacked, and thus some time is saved.

    From performance tests, the chunksize doesn't seem to make much of a
    difference, but having larger chunks usually led to slower performance:

    Shape: (50,512,512)
    1 chunk 3.06s
    2 chunks 3.05s
    3 chunks 3.07s
    4 chunks 3.06s
    5 chunks 3.16s
    6 chunks 3.06s
    7 chunks 3.058s
    8 chunks 3.25s
    9 chunks 3.45s

    :param data: the data array that will be processed in parallel
    :param partial_func: a function constructed using partial to pass the
                         correct arguments
    :param cores: number of cores that the processing will use
    :param chunksize: chunk of work per process(worker)
    :param name: name of the task used in progress reporting
    :param progress: Progress instance to use for progress reporting (optional)
    :return: reference to the input shared array
    """
    if not cores:
        cores = pu.get_cores()

    if not chunksize:
        chunksize = pu.calculate_chunksize(cores)

    global shared_data
    # get reference to output data
    # if different shape it will get the reference to the new array
    shared_data = data

    img_num = shared_data.shape[0]
    pu.execute_impl(img_num, partial_func, cores, chunksize, progress, msg)

    # remove the global references to remove unused dangling handles to the
    # data, which might prevent it from being GCed
    temp_data_ref = shared_data
    del shared_data

    return temp_data_ref
Exemple #4
0
def test_execute_impl_one_core(mock_pool):
    mock_partial = mock.Mock()
    mock_progress = mock.Mock()
    execute_impl(1, mock_partial, 1, 1, mock_progress, "Test")
    mock_partial.assert_called_once_with(0)
    mock_progress.update.assert_called_once_with(1, "Test")