Beispiel #1
0
    def compute_batch_parallel(fxn, keys):
        """Execute a function in parallel on the entire batch of keys, using a multi-threaded executor.

        This is a helper function which subclasses of LazyDict can use to implement `compute_batch`.
        Note that speedups will only be obtained if compute is IO bound, due to Python's GIL.

        Args:
            fxn (Callable): function to be called in parallel
            keys (list): a list of keys

        Returns:
            list: result is equivalent to [fxn(key) for key in keys]
        """
        no_result_failure = Failure.silent(
            'No result returned by SimpleExecutor.')
        results = [no_result_failure] * len(keys)
        with SimpleExecutor(fxn) as ex:
            for i, key in enumerate(keys):
                ex.submit(i, key)
            for i, val in ex.results():
                results[i] = val

        for result in results:
            assert result != no_result_failure
        return results
    def test_context_manager(self):
        fxn = lambda x: 2 * x
        with SimpleExecutor(fxn, max_workers=2) as ex:
            for i, x in enumerate(range(10)):
                ex.submit(i, x)
            results = {k: v for k, v in ex.results()}

        correct = {k: 2 * k for k in range(10)}
        assert results == correct
Beispiel #3
0
def _get_all_hits(get_page):
    """Given a function that retrieves a single page of HITs, retrieve all HITs.

    WARNING:
        - this function can be quite slow.
        - results are returned in no particular order.

    Args:
        get_page (Callable[[int, int], list[HIT]]): a function which takes a page size and page number.
            and returns a list of HITs.

            kwargs:
                page_size (int)
                page_number (int)

    Returns:
        generator[HIT]
    """
    page_size = 100  # HITs per page

    # compute the pages that need to be fetched
    search_results = get_page(page_size=page_size, page_number=1)
    total_hits = int(search_results.TotalNumResults)
    total_pages = total_hits / page_size + bool(total_hits % page_size)
    page_nums = list(range(1, total_pages + 1))

    # fetch all the pages in parallel
    fetch_page = lambda i: get_page(page_size=page_size, page_number=i)
    with SimpleExecutor(fetch_page) as executor:
        for i in page_nums:
            executor.submit(i, i)
        for i, page in verboserate(executor.results(),
                                   desc='Fetching pages of HITs',
                                   total=total_pages):
            if isinstance(page, Failure):
                print page.traceback
                continue
            for hit in page:
                yield hit