def process_new_files_parallel(func, paths, workers=None, args=None, kwargs=None): """Process files that match ready prefix in parallel. Process files that match ready prefix the given function and rename them based on the result. By default the number of workers is equal to the number of cores. Parameters ---------- func : callable Function to apply. paths : list of str Paths of files that would be processed by func. workers : int Max number of workers to spawn. None default to cpu count. Passing -1 forces single thread execution (good for debugging). args : list or None Extra args to length argument list. kwargs : dict or None Arbitrary kwargs to be passed to the function. Returns ------- list of Any Returns with the results of applying the function to each file. TODO: - Add selector for pool type (process or thread). Threads would be the preffered option if `fn` frees the GIL either by using Numba or Cython. """ if args is None: args = [] if kwargs is None: kwargs = {} if workers is None: workers = os.cpu_count() if workers > 0: pool = ProcessPoolExecutor(max_workers=workers) logger.info(f"Using process {workers} workers for file processing.") else: pool = DummyPoolExecutor() logger.info("Using a single worker.") futures = [] with pool as executor: for p in paths: futures.append(executor.submit(func, p, *args, **kwargs)) wait_futures(futures) res = [f.result() for f in futures] return res
def test_hashing_mpi_executor(): from webilastik.scheduling.hashing_mpi_executor import HashingMpiExecutor executor = HashingMpiExecutor() print(f"Created executor!") num_workers = executor.num_workers # num_workers = 7 # executor = ProcessPoolExecutor(max_workers=num_workers) num_tasks_per_worker = 20 wait_time = 1 expected_duration = num_tasks_per_worker * wait_time f = functools.partial(wait_then_echo, wait_time) t0 = time.time() futures = [ executor.submit(f, i) for i in range(num_workers * num_tasks_per_worker) ] # futures = [executor.submit(f, i) for i in range(executor.num_workers * num_tasks_per_worker)] _ = wait_futures(futures) delta = time.time() - t0 print( f"All tasks took {delta}s. Expected completion in ~{expected_duration}s" ) # assert delta < expected_duration + 2 print(f"Shutting down executor...") executor.shutdown() print(f"DONE Shutting down executor...")
def wait(self): if len(self._futures) == 0: return [] else: res = wait_futures(self._futures) GLOBAL_COUNTER.clear() return res
def test_predicate_changes(self): """ Test to validate host filter reacts correctly when the predicate return a different subset of the hosts HostFilterPolicy @since 3.8 @jira_ticket PYTHON-961 @expected_result the excluded hosts are ignored @test_category policy """ external_event = True contact_point = DefaultEndPoint("127.0.0.1") single_host = {Host(contact_point, SimpleConvictionPolicy)} all_hosts = { Host(DefaultEndPoint("127.0.0.{}".format(i)), SimpleConvictionPolicy) for i in (1, 2, 3) } predicate = lambda host: host.endpoint == contact_point if external_event else True hfp = ExecutionProfile(load_balancing_policy=HostFilterPolicy( RoundRobinPolicy(), predicate=predicate)) cluster = Cluster((contact_point, ), execution_profiles={EXEC_PROFILE_DEFAULT: hfp}, protocol_version=PROTOCOL_VERSION, topology_event_refresh_window=0, status_event_refresh_window=0) session = cluster.connect(wait_for_all_pools=True) queried_hosts = set() for _ in range(10): response = session.execute("SELECT * from system.local") queried_hosts.update(response.response_future.attempted_hosts) self.assertEqual(queried_hosts, single_host) external_event = False futures = session.update_created_pools() wait_futures(futures, timeout=cluster.connect_timeout) queried_hosts = set() for _ in range(10): response = session.execute("SELECT * from system.local") queried_hosts.update(response.response_future.attempted_hosts) self.assertEqual(queried_hosts, all_hosts)
def test_predicate_changes(self): """ Test to validate host filter reacts correctly when the predicate return a different subset of the hosts HostFilterPolicy @since 3.8 @jira_ticket PYTHON-961 @expected_result the excluded hosts are ignored @test_category policy """ external_event = True contact_point = DefaultEndPoint("127.0.0.1") single_host = {Host(contact_point, SimpleConvictionPolicy)} all_hosts = {Host(DefaultEndPoint("127.0.0.{}".format(i)), SimpleConvictionPolicy) for i in (1, 2, 3)} predicate = lambda host: host.endpoint == contact_point if external_event else True cluster = Cluster((contact_point,), load_balancing_policy=HostFilterPolicy(RoundRobinPolicy(), predicate=predicate), protocol_version=PROTOCOL_VERSION, topology_event_refresh_window=0, status_event_refresh_window=0) session = cluster.connect(wait_for_all_pools=True) queried_hosts = set() for _ in range(10): response = session.execute("SELECT * from system.local") queried_hosts.update(response.response_future.attempted_hosts) self.assertEqual(queried_hosts, single_host) external_event = False futures = session.update_created_pools() wait_futures(futures, timeout=cluster.connect_timeout) queried_hosts = set() for _ in range(10): response = session.execute("SELECT * from system.local") queried_hosts.update(response.response_future.attempted_hosts) self.assertEqual(queried_hosts, all_hosts)
print(fe.to_json_value()) t = time.time() classifier = VigraPixelClassifier.train( feature_extractors=selected_feature_extractors, label_classes=[class1_annotations, class_2_annotations], random_seed=7919, ) print(f"Trained classifier in {time.time() - t} seconds") if isinstance(classifier, Exception): raise classifier f = partial(compute_tile, classifier) t = time.time() futs: "List[Future[Any]]" = [] requested_num_tiles = num_tiles or datasource.roi.get_num_tiles( tile_shape=datasource.tile_shape) # for tile in datasource.roi.get_datasource_tiles(): # _ = tile.retrieve() #prefetch for tile in datasource.roi.get_datasource_tiles(): if len(futs) >= requested_num_tiles: break futs.append(executor.submit(f, tile)) print(".", end="") _ = wait_futures(futs) print(f"ARGV: {sys.argv}") print( f"[{executor.__class__.__name__}] Predicted {len(futs)} tiles sized {datasource.tile_shape} in {time.time() - t}s" )