def _search_async_locked(self, pool: futures.Executor, parser: MP3Parser, depth: int) -> List[futures.Future]: """ Walk self._fullpath scheduling tasks to find all songs and sub-directories. Checking of files and subdirectories is submitted as tasks to the specified pool. Must be called with the lock held """ cache = self._load_cache() tasks: List[futures.Future] = [] assert self._fullpath is not None if not self._fullpath.is_dir(): raise IOError(f'Directory "{self._fullpath}" does not exist') self.log.debug('Search %s', self._fullpath.name) for index, filename in enumerate(self._fullpath.iterdir()): abs_fname = str(filename) fstats = os.stat(abs_fname) if stat.S_ISDIR(fstats.st_mode): subdir = Directory(self, 1000 * (self.ref_id + index), filename) self.subdirectories.append(subdir) tasks.append( pool.submit(subdir.search_async, pool, parser, depth + 1)) elif (stat.S_ISREG(fstats.st_mode) and abs_fname.lower().endswith(".mp3") and fstats.st_size <= self.maxFileSize): self._todo += 1 task = pool.submit(self._parse_song, parser, cache, filename, index) task.add_done_callback(self._after_parse_song) tasks.append(task) return tasks
def submit_tasks(executor: Executor, window_size: int, fn, *iterables): """ Assuming all task take approximately the same time. Effectively executor.map, but done in batches/windows to reduce the number of queued Future objects (seems to consume a lot of memory). """ args_iterator = zip(*iterables) fs_queue = deque( (executor.submit(fn, *args) for args in itertools.islice(args_iterator, window_size)), maxlen=window_size, ) try: while fs_queue: # following comment copied from Executor.map # Careful not to keep a reference to the popped future yield fs_queue.popleft() fs_queue.append(executor.submit(fn, *next(args_iterator))) except StopIteration: # no more tasks to submit pass # collect remaining tasks while fs_queue: # Careful not to keep a reference to the popped future yield fs_queue.popleft()
def screencap_h264(device: AdbDevice, executor: Executor): start = time() device.push("../scrcpy-win64/scrcpy-server.jar", "/data/local/tmp/scrcpy-server.jar") print("Pushed server component") sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) port = device.adb_output("reverse", "tcp:5556", "localabstract:scrcpy") print("[ADB]", port) # sock.bind(("127.0.0.1", 5556)) # sock.listen() executor.submit(lambda: print( "[SCRCPY]", device.shell("CLASSPATH=/data/local/tmp/scrcpy-server.jar " "app_process / com.genymobile.scrcpy.Server 0 8000000 " "false - false true"))) print("Executed scrcpy") print("Waiting for connection") # conn, info = sock.accept() # print(info) # print(conn.recv(1)) # print(conn.recv(64)) # print(conn.recv(2)) # print(conn.recv(2)) # print("Closing transmission") # device.shell("\x03") # conn.close() # sock.close() print("Recv h264 took %f seconds" % (time() - start))
def status(pool: Executor, cwd: PurePath) -> VCStatus: if which("git"): try: r = pool.submit(root, cwd=cwd) s_main = pool.submit(_stat_main, cwd=cwd) s_sub = pool.submit(_stat_sub_modules, cwd=cwd) wait(cast(Sequence[Future], (r, s_main, s_sub))) return _parse(r.result(), stats=chain(s_main.result(), s_sub.result())) except CalledProcessError: return VCStatus() else: return VCStatus()
def batchedPoolRunner(testgenfunc: Function, dispatchfunc: Function, pool: Executor, size: int, validator: Function) -> int: """ Given a concurrent.futures.pool run the tuples produced by testgenfunc in size chunks. Submit results back to pool using dispatchfunc and the returned result of the func. testgenfunc is a function generator that produces a tuple with function to run and parameters to the func. The testgenfunc must return a two tuple with a function in the first position and the function parameters in the second position as a dictionary. dispatchfunc must return an object, list or tuple and these should be compatible with its own inputs. POOL can be a ThreadPoolExecutor or a ProcessPoolExecutor size is the processing batch size for submitting to the pool. testgenfunc should produce tuple with the first element as the function and the second element the parameters to the function NOTE: never create a generator that produces closures. Python internally updates the closure in place instead of creating a new one so you'll effectively have the same closure produced throughout the generators life. It's a nasty bug. *** Currently Doesn't work on Process Pools. Working on a solution. """ td = testgenfunc() futures = set([pool.submit(f, **p) for f, p in chunk(size, td)]) count = 0 debug("batchedPoolRunner: Starting main loop.") while len(futures) > 0: done = set() for job in as_completed(futures): if count % 1000 == 0: gc.collect() rslt = job.result() if job is not None else None if rslt is not None: if not validator(rslt): validationFailure("Test case for %s failed validation." % rslt.Function) else: serviceError("FutureResult from thread pool is None.") done.add(job) count += 1 #futures.remove(job) if dispatchfunc is not None: #debug("Running dispatchfunc %s."%dispatchfunc.__name__) if rslt: futures.add(pool.submit(dispatchfunc, rslt)) sys.stdout.write(".") sys.stdout.flush() futures = futures - done if len(futures) < 1000: debug("Adding new jobs") futures.update( set([pool.submit(f, **p) for f, p in chunk(size, td)])) debug(count) return count
def preprocess_cycles(client: InfluxDBClient, executor: Executor, manager: SyncManager, dry_run=False): logger.info("Preprocessing charge cycles") queue = manager.Queue() series = client.list_series("samples") futures = [] # TODO merge results of different detectors for attr, where, detector in [ ('charger_acvoltage', 'charger_acvoltage>0 OR veh_speed > 0', ChargeCycleACVoltageDetection(time_epoch=client.time_epoch)), ('ischarging', 'ischarging>0 OR veh_speed > 0', ChargeCycleIsChargingDetection(time_epoch=client.time_epoch)), ('ac_hvpower', 'ac_hvpower>0 OR veh_speed > 0', ChargeCycleACHVPowerDetection(time_epoch=client.time_epoch)), ('hvbatt_soc', 'hvbatt_soc<200', ChargeCycleDerivDetection(time_epoch=client.time_epoch)) ]: fields = ["time", "participant", "hvbatt_soc", "veh_speed"] if attr not in fields: fields.append(attr) futures += [executor.submit(preprocess_cycle, nr, client, queue, sname, join_selectors([sselector, where]), fields, detector, dry_run) for nr, (sname, sselector) in enumerate(series)] logger.debug("Tasks started, waiting for results...") async_progress(futures, queue) data = [f.result() for f in futures] logger.debug("Tasks done") data.sort(key=lambda a: a[0:1]) logger.info(__("Detected charge cycles:\n{}", tabulate(data, headers=["attr", "#", "cycles", "cycles_disc"])))
def _run_in_pool(executor: Executor, func: Func[T, R], items: List[T], *args: Any, chunk_size: Optional[int] = None) -> Iterable[R]: if chunk_size is None: chunk_size = 1 chunks: Iterable[List[T]] = _split_to_chunks(items, chunk_size) exceptions: List[BaseException] = [] with executor: futures: Dict[Future, int] = {} for chunk in chunks: future = executor.submit(func, chunk, *args) futures[future] = len(chunk) with tqdm(total=len(items)) as pbar: for future in as_completed(set(futures.keys())): pbar.update(futures[future]) del futures[future] e = future.exception() if e is not None: exceptions.append(e) else: yield future.result() if len(exceptions) > 0: raise exceptions[0]
def _bulk_cleanup(self, jobs: Sequence["Job"], ex: Executor) -> Iterable["Job"]: jobs = self.bulk_sync_status(jobs) # safety check for job in jobs: assert job.driver == self.__class__ if job.status in (Job.Status.SUBMITTED, Job.Status.RUNNING): raise InvalidJobStatus( f"Job {job} might be running, please kill first") logger.debug("Cleaning up %d jobs", len(jobs)) def run(job: Job) -> Job: for d in ["log_dir", "output_dir"]: try: path = job.data[d] if os.path.exists(path): logger.debug("Path %s exists, attempting to delete", path) rmtree(path) except Exception: logger.error("Unable to remove directory %s", d) return job futures = [ex.submit(run, j) for j in jobs] for f in as_completed(futures): yield f.result()
def __init__( self, executor: Executor, runner: CaseRunner, cases: Iterable[Case], context: Optional[Context] = None, ): self._future = executor.submit(_run_cases_in_order, runner, cases, context)
def exec_with_progress(executor: Executor, fn, *iterables, **tqdm_kwargs): """ Equivalent to executor.map(fn, *iterables) but displays a tqdm-based progress bar. Does not support timeout or chunksize as executor.submit is used internally. Results are NOT returned in the same order as the iterables order. """ futures_list = [executor.submit(fn, *iterable) for iterable in iterables] for future in tqdm(concurrent.futures.as_completed(futures_list), total=len(futures_list), **tqdm_kwargs): yield future.result()
def bench_executor(ex: Executor, fn, *args, **kwargs) -> None: t1 = time.time() with ex: futures = [ex.submit(fn, *args, **kwargs) for _ in range(MAX_WORKERS)] for future in futures: future.result() t2 = time.time() futures = [ex.submit(fn, *args, **kwargs) for _ in range(MAX_WORKERS)] for future in futures: future.result() t3 = time.time() t4 = time.time() exargs = [f"max_workers={ex._max_workers}"] try: exargs.append(f"mp_context={type(ex._mp_context).__name__}") except AttributeError: pass label = f"{type(ex).__name__}({', '.join(exargs)})" print(f"{label:65s}|{t3 - t2:.6f}|{t2 - t1:.6f}|{t4 - t3:.6f}")
def preprocess_trips(client: InfluxDBClient, executor: Executor, manager: SyncManager, dry_run=False): logger.info("Preprocessing trips") queue = manager.Queue() series = client.list_series("samples") futures = [executor.submit(preprocess_trip, nr, client, queue, sname, sselector, dry_run) for nr, (sname, sselector) in enumerate(series)] logger.debug("Tasks started, waiting for results...") async_progress(futures, queue) data = [f.result() for f in futures] logger.debug("Tasks done") data.sort(key=lambda a: a[0]) logger.info(__("Detected trips:\n{}", tabulate(data, headers=["#", "cycles", "cycles_disc"])))
def _deferred_run_outer_loop( self, input_data: InputDataset, outer_split: Split, data_splitter: DataSplitter, executor: Executor, ) -> Union[Future, OuterLoopResults]: if executor is None: return self._run_outer_loop(input_data, outer_split, data_splitter) return executor.submit( self._run_outer_loop, input_data, outer_split, data_splitter )
def cont(pool: Executor, paths: Iterable[PurePath]) -> None: def c1() -> None: cmd = "trash" if which(cmd): command = (cmd, "--", *map(str, paths)) check_call(command, stdin=DEVNULL, stdout=PIPE, stderr=PIPE, cwd=cwd) else: raise LookupError(LANG("sys_trash_err")) def c2() -> None: try: c1() except (CalledProcessError, LookupError) as e: threadsafe_call(nvim, write, nvim, e, error=True) except Exception as e: log.exception("%s", e) else: enqueue_event(_refresh, True) pool.submit(c2)
def compose(object_path: str, slices: List[storage.Blob], client: storage.Client, executor: Executor) -> storage.Blob: """Compose an object from an indefinite number of slices. Composition will be performed single-threaded but using a tree of accumulators to avoid the one second object update cooldown period in GCS. Cleanup will be performed concurrently using the provided executor. Arguments: object_path {str} -- The path for the final composed blob. slices {List[storage.Blob]} -- A list of the slices which should compose the blob, in order. client {storage.Client} -- A GCS client to use. executor {Executor} -- A concurrent.futures.Executor to use for cleanup execution. Returns: storage.Blob -- The composed blob. """ LOG.info("Composing") chunks = generate_composition_chunks(slices) next_chunks = [] identifier = generate_hex_sequence() while len(next_chunks) > 32 or not next_chunks: # falsey empty list is ok for chunk in chunks: # make intermediate accumulator intermediate_accumulator = storage.Blob.from_string( object_path + next(identifier)) LOG.info("Intermediate composition: %s", intermediate_accumulator) future_iacc = executor.submit(compose_and_cleanup, intermediate_accumulator, chunk, client, executor) # store reference for next iteration next_chunks.append(future_iacc) # let intermediate accumulators finish and go again chunks = generate_composition_chunks(next_chunks) # Now can do final compose final_blob = storage.Blob.from_string(object_path) final_chunk = [blob for sublist in chunks for blob in sublist] compose_and_cleanup(final_blob, final_chunk, client, executor) LOG.info("Composition complete") return final_blob
def new(pool: Executor, root: PurePath, index: Index) -> Node: acc: SimpleQueue = SimpleQueue() bfs_q: SimpleQueue = SimpleQueue() def drain() -> Iterator[PurePath]: while not bfs_q.empty(): yield bfs_q.get() bfs_q.put(root) while not bfs_q.empty(): tasks = tuple( pool.submit(_new, roots=paths, index=index, acc=acc, bfs_q=bfs_q) for paths in chunk(drain(), n=WALK_PARALLELISM_FACTOR) ) wait(tasks) return _join(acc)
def run_in_executor(executor: Executor, coro: Coroutine) -> Future: """ Run an async coroutine in an executor when we aren't already inside an event loop Args: executor: A :class:`ThreadExecutor` or :class:`ProcessExecutor` instance which will run the coroutine Returns: A `Future` which can be used to access the result of the coroutine """ loop = asyncio.new_event_loop() def run_sync_loop(loop, coro): asyncio.set_event_loop(loop) loop.run_until_complete(coro) future = executor.submit(run_sync_loop, loop, coro) return future
def push_upload_jobs(input_stream: io.BufferedReader, object_path: str, slice_size: int, client: storage.Client, executor: Executor) -> List[Future]: """Given an input stream, perform a single-threaded, single-cursor read. This will be fanned out into multiple object slices, and optionally composed into a single object given as `object_path`. If composition is enabled, `object_path` will function as a prefix, to which the suffix `_sliceN` will be appended, where N is a monotonically increasing number starting with 1. Arguments: input_stream {io.BufferedReader} -- The input stream to read. object_path {str} -- The final object path or slice prefix to use. slice_size {int} -- The size of slice to target. client {storage.Client} -- The GCS client to use. executor {Executor} -- The executor to use for the concurrent slice uploads. Returns: List[Future] -- A list of the Future objects representing each blob slice upload. The result of each future will be of the type google.cloud.storage.Blob. """ futures = [] read_bytes = 0 slice_number = 0 while not input_stream.closed: slice_bytes = read_exactly(input_stream, slice_size) read_bytes += len(slice_bytes) stats['read_bytes'] = read_bytes if slice_bytes: LOG.debug("Read slice {}, {} bytes".format(slice_number, read_bytes)) slice_blob = executor.submit( upload_bytes, slice_bytes, object_path + "_slice{}".format(slice_number), client) futures.append(slice_blob) slice_number += 1 else: LOG.info("EOF: {} bytes".format(read_bytes)) break return futures
def run_on_pool(executor: Executor, func: Callable, items: List[Any], *args: Any, chunk_size: Optional[int] = None): if len(items) == 0: return if chunk_size is None: chunk_size = 1 chunks: Iterable[List[Any]] = _split_to_chunks(list(items), chunk_size) exceptions = [] with executor: futures = [executor.submit(func, chunk, *args) for chunk in chunks] for future in as_completed(futures): if future.exception() is not None: exceptions.append(future.exception()) if len(exceptions) > 0: raise exceptions[0]
def read_file( f: io.RawIOBase, executor: futures.Executor, q: _result_queue, stop_reading: threading.Event, ): try: while not stop_reading.is_set(): # make a bytearray and try very hard to fill it. buf = memoryview(bytearray(_CHUNK_SIZE)) ntotal: int = 0 nread: int = 1 while nread > 0: # As of 2020-06-01: typeshed wrongly claims we can't # `readinto(memoryview)`, so we disable type checking. nread = f.readinto(buf[ntotal:]) or 0 # type: ignore ntotal += nread if ntotal == 0: # end of file when we can't fill any return q.put(executor.submit(compute_hash, buf[:ntotal])) finally: q.put(None) # signal end of queue to the printer
def parallelize(lambdagen: Generator or Iterator, pool: Executor, donecb: Function, errorcb: Function) -> None: """ Run a list or generator of functions in a thread pool. Results sent to the donecb callback, Exceptions are sent to the errorcb ballback. :param lambdagen: Generator that produces functions that have no parameters :param pool: This will be a ThreadPoolExecutor :param donecb: This is a callback that is called with the result :param errorcb: This is a callback that is called with an Exception object :return: None is returned. """ with pool: functions = chunk(100, lambdagen) while functions is not None: for x in as_completed([pool.submit(f) for f in functions]): try: if donecb is not None: donecb(x.result()) except Exception as e: serviceError( "parallelize received an exception from thread because %s" % e) if errorcb is not None: errorcb(e) functions = chunk(100, lambdagen) return
def _launch_next_task(self, executor: Executor) -> "Future[OUT] | None": if self._done(): return None if not self.args.has_next(): self.num_args = self.num_dispatched_steps #FIXME return None step_arg = self.args.get_next() if self._status == "pending": self._status = "running" step_index = self.num_dispatched_steps self.num_dispatched_steps += 1 def step_done_callback(future: Future[Any]): # FIXME with self.job_lock: status_changed = False self.num_completed_steps += 1 if self._status == "failed" or self._status == "cancelled": return elif future.exception(): self._status = "failed" status_changed = True elif not self.args.has_next( ) and self.num_dispatched_steps == self.num_completed_steps: self._status = "succeeded" status_changed = True if self.on_progress: self.on_progress(self.uuid, step_index) if status_changed: exception = future.exception() if exception and self.on_failure: self.on_failure(exception) if self.on_success: self.on_success(self.uuid, future.result()) future = executor.submit(self.target, step_arg) future.add_done_callback(step_done_callback) return future
def submit(self, executor: Executor, **kwargs) -> TaskFuture: self.logger.info(f"Submitting Task[{self.handle}:{self.rid}]") self._future = executor.submit(self.execute, **kwargs) tparms = {"rid": self.rid, "cid": self.cid, **kwargs} return TaskFuture(self._future, **tparms)
def __call__(self, executor: Executor) -> Future: log.debug("Submitted task <%s> to executor <%s>", self.__repr__(), executor.__repr__()) return executor.submit(self._task_wrapper(self.task), executor)
def submit(executor_: Executor, args: Tuple) -> Future: if verbose: nonlocal n_submitted n_submitted += 1 log.log(loglevel, f"Job {n_submitted}, submitting args: {args!r}") return executor_.submit(fn, *args)
def _bulk_cleanup(self, jobs: Sequence["Job"], ex: Executor) -> Iterable["Job"]: futures = [ex.submit(self.cleanup, j) for j in jobs] for f in as_completed(futures): yield f.result()
def is_file(pool: Executor, path: PurePath) -> bool: fut = pool.submit(isfile, path) return fut.result()
def batch_apply( items: Iterator, *, fnct: callable = None, quota: Optional[int] = None, interval: int = None, preparekey: Optional[callable] = None, rejectkey: Optional[callable] = None, rejectdefault: Optional[Any] = None, through: dict = {}, executor: Executor = ThreadPoolExecutor()) -> Iterator: """ Applies a function to a large set of items using threading. :items The collection of items to process. :fnct The function to apply to each item. This function should only take one parameter: ONE item. :rejectkey This function is a filter which allows you to define which items should not be processed, in which case they will be replaced by the 'rejectdefault' argument. Should return 'True' is the item is to be rejected, 'False' otherwise. :rejectdefault The value to be returned instead of the process result for an item that has been rejected. The special value 'BOUNCE' allows you to return the same item instead of a default value. :quota Only a set amount of items can be processed in a set interval of time. Once this quota is spent the results already available are yielded back. Once they are exhausted, the next batch of items is processed, but not until the time interval has been completed. :interval The time interval described above (in seconds). :executor You can edit the concurrent executor if you so wish (e.g. to increase max_workers) """ fnct = partial(fnct, **through) if (quota is not None and interval is None or interval is None and quota is not None): raise UserWarning( "When using quotas, 'quota' and 'interval' need to be both provided." ) milestone = time.time() futures = [] for idx, item in enumerate(items): if preparekey is not None: item = preparekey(item) if rejectkey is not None and rejectkey(item): if rejectdefault is not BOUNCE: item = rejectdefault futures.append(RejectionFuture(item)) continue futures.append(executor.submit(fnct, item)) if quota is not None and not (idx + 1) % quota: for future in futures: yield future.result() futures = [] remaining_time = interval - (time.time() - milestone) remaining_time = max(remaining_time, 0) if remaining_time: time.sleep(remaining_time) milestone = time.time() for future in futures: yield future.result()
def add_jobs_to_queue(config: Config, e: Executor, running: Set[Future]) -> None: while len(running) < config.threads: operation = pick_operation(config) future = e.submit(record_operation, operation) running.add(future)
def __init__(self, pool: Executor) -> None: self._q: SimpleQueue = SimpleQueue() pool.submit(self._forever)