コード例 #1
0
def process_new_files_parallel(func,
                               paths,
                               workers=None,
                               args=None,
                               kwargs=None):
    """Process files that match ready prefix in parallel.

    Process files that match ready prefix the given function and rename them
    based on the result.
    By default the number of workers is equal to the number of cores.

    Parameters
    ----------
    func : callable
        Function to apply.
    paths : list of str
        Paths of files that would be processed by func.
    workers : int
        Max number of workers to spawn. None default to cpu count.
        Passing -1 forces single thread execution (good for debugging).
    args : list or None
        Extra args to length argument list.
    kwargs : dict or None
        Arbitrary kwargs to be passed to the function.

    Returns
    -------
    list of Any
        Returns with the results of applying the function to each file.

    TODO:
    - Add selector for pool type (process or thread). Threads would be the
      preffered option if `fn` frees the GIL either by using Numba or
      Cython.
    """

    if args is None:
        args = []

    if kwargs is None:
        kwargs = {}

    if workers is None:
        workers = os.cpu_count()

    if workers > 0:
        pool = ProcessPoolExecutor(max_workers=workers)
        logger.info(f"Using process {workers} workers for file processing.")
    else:
        pool = DummyPoolExecutor()
        logger.info("Using a single worker.")

    futures = []
    with pool as executor:
        for p in paths:
            futures.append(executor.submit(func, p, *args, **kwargs))

    wait_futures(futures)
    res = [f.result() for f in futures]
    return res
コード例 #2
0
def test_hashing_mpi_executor():
    from webilastik.scheduling.hashing_mpi_executor import HashingMpiExecutor

    executor = HashingMpiExecutor()
    print(f"Created executor!")
    num_workers = executor.num_workers

    # num_workers = 7
    # executor = ProcessPoolExecutor(max_workers=num_workers)

    num_tasks_per_worker = 20
    wait_time = 1
    expected_duration = num_tasks_per_worker * wait_time

    f = functools.partial(wait_then_echo, wait_time)

    t0 = time.time()
    futures = [
        executor.submit(f, i)
        for i in range(num_workers * num_tasks_per_worker)
    ]
    # futures = [executor.submit(f, i) for i in range(executor.num_workers * num_tasks_per_worker)]
    _ = wait_futures(futures)
    delta = time.time() - t0

    print(
        f"All tasks took {delta}s. Expected completion in ~{expected_duration}s"
    )
    # assert delta < expected_duration + 2

    print(f"Shutting down executor...")
    executor.shutdown()
    print(f"DONE Shutting down executor...")
コード例 #3
0
 def wait(self):
     if len(self._futures) == 0:
         return []
     else:
         res = wait_futures(self._futures)
         GLOBAL_COUNTER.clear()
         return res
コード例 #4
0
ファイル: test_policies.py プロジェクト: tzach/python-driver
    def test_predicate_changes(self):
        """
        Test to validate host filter reacts correctly when the predicate return
        a different subset of the hosts
        HostFilterPolicy
        @since 3.8
        @jira_ticket PYTHON-961
        @expected_result the excluded hosts are ignored

        @test_category policy
        """
        external_event = True
        contact_point = DefaultEndPoint("127.0.0.1")

        single_host = {Host(contact_point, SimpleConvictionPolicy)}
        all_hosts = {
            Host(DefaultEndPoint("127.0.0.{}".format(i)),
                 SimpleConvictionPolicy)
            for i in (1, 2, 3)
        }

        predicate = lambda host: host.endpoint == contact_point if external_event else True
        hfp = ExecutionProfile(load_balancing_policy=HostFilterPolicy(
            RoundRobinPolicy(), predicate=predicate))
        cluster = Cluster((contact_point, ),
                          execution_profiles={EXEC_PROFILE_DEFAULT: hfp},
                          protocol_version=PROTOCOL_VERSION,
                          topology_event_refresh_window=0,
                          status_event_refresh_window=0)
        session = cluster.connect(wait_for_all_pools=True)

        queried_hosts = set()
        for _ in range(10):
            response = session.execute("SELECT * from system.local")
            queried_hosts.update(response.response_future.attempted_hosts)

        self.assertEqual(queried_hosts, single_host)

        external_event = False
        futures = session.update_created_pools()
        wait_futures(futures, timeout=cluster.connect_timeout)

        queried_hosts = set()
        for _ in range(10):
            response = session.execute("SELECT * from system.local")
            queried_hosts.update(response.response_future.attempted_hosts)
        self.assertEqual(queried_hosts, all_hosts)
コード例 #5
0
    def test_predicate_changes(self):
        """
        Test to validate host filter reacts correctly when the predicate return
        a different subset of the hosts
        HostFilterPolicy
        @since 3.8
        @jira_ticket PYTHON-961
        @expected_result the excluded hosts are ignored

        @test_category policy
        """
        external_event = True
        contact_point = DefaultEndPoint("127.0.0.1")

        single_host = {Host(contact_point, SimpleConvictionPolicy)}
        all_hosts = {Host(DefaultEndPoint("127.0.0.{}".format(i)), SimpleConvictionPolicy) for i in (1, 2, 3)}

        predicate = lambda host: host.endpoint == contact_point if external_event else True
        cluster = Cluster((contact_point,), load_balancing_policy=HostFilterPolicy(RoundRobinPolicy(),
                                                                                 predicate=predicate),
                          protocol_version=PROTOCOL_VERSION, topology_event_refresh_window=0,
                          status_event_refresh_window=0)
        session = cluster.connect(wait_for_all_pools=True)

        queried_hosts = set()
        for _ in range(10):
            response = session.execute("SELECT * from system.local")
            queried_hosts.update(response.response_future.attempted_hosts)

        self.assertEqual(queried_hosts, single_host)

        external_event = False
        futures = session.update_created_pools()
        wait_futures(futures, timeout=cluster.connect_timeout)

        queried_hosts = set()
        for _ in range(10):
            response = session.execute("SELECT * from system.local")
            queried_hosts.update(response.response_future.attempted_hosts)
        self.assertEqual(queried_hosts, all_hosts)
コード例 #6
0
        print(fe.to_json_value())

    t = time.time()
    classifier = VigraPixelClassifier.train(
        feature_extractors=selected_feature_extractors,
        label_classes=[class1_annotations, class_2_annotations],
        random_seed=7919,
    )
    print(f"Trained classifier in {time.time() - t} seconds")
    if isinstance(classifier, Exception):
        raise classifier
    f = partial(compute_tile, classifier)

    t = time.time()
    futs: "List[Future[Any]]" = []

    requested_num_tiles = num_tiles or datasource.roi.get_num_tiles(
        tile_shape=datasource.tile_shape)
    # for tile in datasource.roi.get_datasource_tiles():
    #     _ = tile.retrieve() #prefetch
    for tile in datasource.roi.get_datasource_tiles():
        if len(futs) >= requested_num_tiles:
            break
        futs.append(executor.submit(f, tile))
        print(".", end="")
    _ = wait_futures(futs)
    print(f"ARGV: {sys.argv}")
    print(
        f"[{executor.__class__.__name__}] Predicted {len(futs)} tiles sized {datasource.tile_shape} in {time.time() - t}s"
    )