def await_all(self, timeout=None): """Await all known futures completion. All futures that have been completed after the timeout expires are returned.. Unlike `first_result`, this method doesn't try to return the result or throw any exceptions. As ever, cancelled Futures are not returned. :param timeout: Amount of time to wait in seconds before giving up and returning what we got until then :return: All futures that have completed and were not cancelled """ (done, possible) = futures_wait(self._futures, timeout=timeout, return_when=ALL_COMPLETED) return [future for future in done if not future.cancelled()]
def collect_info(self): latencies = {c.name: c.results for c in self.clients} results = {"client_latencies": latencies} outfile = self.local_output_directory / "results.json" assert not outfile.exists( ), "Output file already exists somehow at '{path}'".format( path=str(outfile)) with outfile.open("w") as f: json.dump(fp=f, obj=results, indent=2) dest_path = self.local_output_directory / "config.json" with dest_path.open("w") as f: json.dump(self.exp_description, f, indent=2) bench_config_out = self.local_output_directory / "bench_config.json" with bench_config_out.open('w') as f: json.dump(vars(self.args), f, cls=CustomArgEncoder, indent=2) with ThreadPoolExecutor(max_workers=100) as tpe: awaiting = [] awaiting.append( tpe.submit(self.master.copy_result_folder, local_dest=self.local_output_directory)) awaiting.extend( tpe.submit(w.copy_result_folder, local_dest=self.local_output_directory) for w in self.workers) traces_dir = self.local_output_directory / "goodput_traces" assert not traces_dir.exists( ), "Somehow traces dir already exists at '{}'".format( str(traces_dir)) traces_dir.mkdir() awaiting.extend( tpe.submit(w.copy_csvs, local_directory=traces_dir) for w in self.workers) futures_wait(fs=awaiting)
def first_result(self, timeout=None): """Await, and return, the first result from the set of known futures. If an Exception was thrown by the completed Future, this will be thrown instead. :param timeout: Amount of time to wait in seconds before giving up and returning what we got until then :return: The value returned by the first future to finish, or None if no futures completed successfully """ possible = self._futures remaining = timeout started = time.time() while possible and remaining >= 0: (done, possible) = futures_wait(self._futures, timeout=remaining, return_when=FIRST_COMPLETED) remaining = timeout - (time.time() - started) for future in done: if not future.cancelled(): return future.result(0) return None
def wait(self, timeout=None): """ Wait for all Futures to complete """ return futures_wait(self, timeout=timeout)
def process_replay_objects_content( all_objects: Dict[str, List[dict]], *, src: ObjStorage, dst: ObjStorage, exclude_fn: Optional[Callable[[dict], bool]] = None, check_dst: bool = True, concurrency: int = 16, ): """ Takes a list of records from Kafka (see :py:func:`swh.journal.client.JournalClient.process`) and copies them from the `src` objstorage to the `dst` objstorage, if: * `obj['status']` is `'visible'` * `exclude_fn(obj)` is `False` (if `exclude_fn` is provided) * `obj['sha1'] not in dst` (if `check_dst` is True) Args: all_objects: Objects passed by the Kafka client. Most importantly, `all_objects['content'][*]['sha1']` is the sha1 hash of each content. src: An object storage (see :py:func:`swh.objstorage.get_objstorage`) dst: An object storage (see :py:func:`swh.objstorage.get_objstorage`) exclude_fn: Determines whether an object should be copied. check_dst: Determines whether we should check the destination objstorage before copying. Example: >>> from swh.objstorage.factory import get_objstorage >>> src = get_objstorage('memory') >>> dst = get_objstorage('memory') >>> id1 = src.add(b'foo bar') >>> id2 = src.add(b'baz qux') >>> kafka_partitions = { ... 'content': [ ... { ... 'sha1': id1, ... 'status': 'visible', ... }, ... { ... 'sha1': id2, ... 'status': 'visible', ... }, ... ] ... } >>> process_replay_objects_content( ... kafka_partitions, src=src, dst=dst, ... exclude_fn=lambda obj: obj['sha1'] == id1) >>> id1 in dst False >>> id2 in dst True """ vol = [] nb_skipped = 0 nb_failures = 0 t0 = time() def _copy_object(obj): nonlocal nb_skipped nonlocal nb_failures obj_id = obj[ID_HASH_ALGO] if obj["status"] != "visible": nb_skipped += 1 logger.debug("skipped %s (status=%s)", hash_to_hex(obj_id), obj["status"]) statsd.increment( CONTENT_OPERATIONS_METRIC, tags={ "decision": "skipped", "status": obj["status"] }, ) elif exclude_fn and exclude_fn(obj): nb_skipped += 1 logger.debug("skipped %s (manually excluded)", hash_to_hex(obj_id)) statsd.increment(CONTENT_OPERATIONS_METRIC, tags={"decision": "excluded"}) elif check_dst and obj_in_objstorage(obj_id, dst): nb_skipped += 1 logger.debug("skipped %s (in dst)", hash_to_hex(obj_id)) statsd.increment(CONTENT_OPERATIONS_METRIC, tags={"decision": "in_dst"}) else: try: copied = copy_object(obj_id, src, dst) except ObjNotFoundError: nb_skipped += 1 statsd.increment(CONTENT_OPERATIONS_METRIC, tags={"decision": "not_in_src"}) else: if copied is None: nb_failures += 1 statsd.increment(CONTENT_OPERATIONS_METRIC, tags={"decision": "failed"}) else: vol.append(copied) statsd.increment(CONTENT_OPERATIONS_METRIC, tags={"decision": "copied"}) with ThreadPoolExecutor(max_workers=concurrency) as pool: futures = [] for (object_type, objects) in all_objects.items(): if object_type != "content": logger.warning( "Received a series of %s, this should not happen", object_type) continue for obj in objects: futures.append(pool.submit(_copy_object, obj=obj)) futures_wait(futures, return_when=FIRST_EXCEPTION) for f in futures: if f.running(): continue exc = f.exception() if exc: pool.shutdown(wait=False) f.result() raise exc dt = time() - t0 logger.info( "processed %s content objects in %.1fsec " "(%.1f obj/sec, %.1fMB/sec) - %d failed - %d skipped", len(vol), dt, len(vol) / dt, sum(vol) / 1024 / 1024 / dt, nb_failures, nb_skipped, ) if notify: notify("WATCHDOG=1")
def wait(self, timeout=None): return futures_wait(self, timeout=timeout)