Example #1
0
    def await_all(self, timeout=None):
        """Await all known futures completion.

        All futures that have been completed after the timeout expires are returned.. Unlike `first_result`, this
        method doesn't try to return the result or throw any exceptions.

        As ever, cancelled Futures are not returned.

        :param timeout: Amount of time to wait in seconds before giving up and returning what we got until then
        :return:        All futures that have completed and were not cancelled
        """
        (done, possible) = futures_wait(self._futures, timeout=timeout, return_when=ALL_COMPLETED)
        return [future for future in done if not future.cancelled()]
Example #2
0
    def collect_info(self):
        latencies = {c.name: c.results for c in self.clients}
        results = {"client_latencies": latencies}

        outfile = self.local_output_directory / "results.json"
        assert not outfile.exists(
        ), "Output file already exists somehow at '{path}'".format(
            path=str(outfile))
        with outfile.open("w") as f:
            json.dump(fp=f, obj=results, indent=2)

        dest_path = self.local_output_directory / "config.json"
        with dest_path.open("w") as f:
            json.dump(self.exp_description, f, indent=2)

        bench_config_out = self.local_output_directory / "bench_config.json"
        with bench_config_out.open('w') as f:
            json.dump(vars(self.args), f, cls=CustomArgEncoder, indent=2)

        with ThreadPoolExecutor(max_workers=100) as tpe:
            awaiting = []
            awaiting.append(
                tpe.submit(self.master.copy_result_folder,
                           local_dest=self.local_output_directory))
            awaiting.extend(
                tpe.submit(w.copy_result_folder,
                           local_dest=self.local_output_directory)
                for w in self.workers)
            traces_dir = self.local_output_directory / "goodput_traces"
            assert not traces_dir.exists(
            ), "Somehow traces dir already exists at '{}'".format(
                str(traces_dir))
            traces_dir.mkdir()
            awaiting.extend(
                tpe.submit(w.copy_csvs, local_directory=traces_dir)
                for w in self.workers)
            futures_wait(fs=awaiting)
Example #3
0
    def first_result(self, timeout=None):
        """Await, and return, the first result from the set of known futures.

        If an Exception was thrown by the completed Future, this will be thrown instead.

        :param timeout: Amount of time to wait in seconds before giving up and returning what we got until then
        :return:        The value returned by the first future to finish, or None if no futures completed successfully
        """
        possible = self._futures
        remaining = timeout
        started = time.time()

        while possible and remaining >= 0:
            (done, possible) = futures_wait(self._futures, timeout=remaining, return_when=FIRST_COMPLETED)
            remaining = timeout - (time.time() - started)
            for future in done:
                if not future.cancelled():
                    return future.result(0)

        return None
Example #4
0
 def wait(self, timeout=None):
     """
     Wait for all Futures to complete
     """
     return futures_wait(self, timeout=timeout)
Example #5
0
def process_replay_objects_content(
    all_objects: Dict[str, List[dict]],
    *,
    src: ObjStorage,
    dst: ObjStorage,
    exclude_fn: Optional[Callable[[dict], bool]] = None,
    check_dst: bool = True,
    concurrency: int = 16,
):
    """
    Takes a list of records from Kafka (see
    :py:func:`swh.journal.client.JournalClient.process`) and copies them
    from the `src` objstorage to the `dst` objstorage, if:

    * `obj['status']` is `'visible'`
    * `exclude_fn(obj)` is `False` (if `exclude_fn` is provided)
    * `obj['sha1'] not in dst` (if `check_dst` is True)

    Args:
        all_objects: Objects passed by the Kafka client. Most importantly,
            `all_objects['content'][*]['sha1']` is the sha1 hash of each
            content.
        src: An object storage (see :py:func:`swh.objstorage.get_objstorage`)
        dst: An object storage (see :py:func:`swh.objstorage.get_objstorage`)
        exclude_fn: Determines whether an object should be copied.
        check_dst: Determines whether we should check the destination
            objstorage before copying.

    Example:

    >>> from swh.objstorage.factory import get_objstorage
    >>> src = get_objstorage('memory')
    >>> dst = get_objstorage('memory')
    >>> id1 = src.add(b'foo bar')
    >>> id2 = src.add(b'baz qux')
    >>> kafka_partitions = {
    ...     'content': [
    ...         {
    ...             'sha1': id1,
    ...             'status': 'visible',
    ...         },
    ...         {
    ...             'sha1': id2,
    ...             'status': 'visible',
    ...         },
    ...     ]
    ... }
    >>> process_replay_objects_content(
    ...     kafka_partitions, src=src, dst=dst,
    ...     exclude_fn=lambda obj: obj['sha1'] == id1)
    >>> id1 in dst
    False
    >>> id2 in dst
    True
    """
    vol = []
    nb_skipped = 0
    nb_failures = 0
    t0 = time()

    def _copy_object(obj):
        nonlocal nb_skipped
        nonlocal nb_failures

        obj_id = obj[ID_HASH_ALGO]
        if obj["status"] != "visible":
            nb_skipped += 1
            logger.debug("skipped %s (status=%s)", hash_to_hex(obj_id),
                         obj["status"])
            statsd.increment(
                CONTENT_OPERATIONS_METRIC,
                tags={
                    "decision": "skipped",
                    "status": obj["status"]
                },
            )
        elif exclude_fn and exclude_fn(obj):
            nb_skipped += 1
            logger.debug("skipped %s (manually excluded)", hash_to_hex(obj_id))
            statsd.increment(CONTENT_OPERATIONS_METRIC,
                             tags={"decision": "excluded"})
        elif check_dst and obj_in_objstorage(obj_id, dst):
            nb_skipped += 1
            logger.debug("skipped %s (in dst)", hash_to_hex(obj_id))
            statsd.increment(CONTENT_OPERATIONS_METRIC,
                             tags={"decision": "in_dst"})
        else:
            try:
                copied = copy_object(obj_id, src, dst)
            except ObjNotFoundError:
                nb_skipped += 1
                statsd.increment(CONTENT_OPERATIONS_METRIC,
                                 tags={"decision": "not_in_src"})
            else:
                if copied is None:
                    nb_failures += 1
                    statsd.increment(CONTENT_OPERATIONS_METRIC,
                                     tags={"decision": "failed"})
                else:
                    vol.append(copied)
                    statsd.increment(CONTENT_OPERATIONS_METRIC,
                                     tags={"decision": "copied"})

    with ThreadPoolExecutor(max_workers=concurrency) as pool:
        futures = []
        for (object_type, objects) in all_objects.items():
            if object_type != "content":
                logger.warning(
                    "Received a series of %s, this should not happen",
                    object_type)
                continue
            for obj in objects:
                futures.append(pool.submit(_copy_object, obj=obj))

    futures_wait(futures, return_when=FIRST_EXCEPTION)
    for f in futures:
        if f.running():
            continue
        exc = f.exception()
        if exc:
            pool.shutdown(wait=False)
            f.result()
            raise exc

    dt = time() - t0
    logger.info(
        "processed %s content objects in %.1fsec "
        "(%.1f obj/sec, %.1fMB/sec) - %d failed - %d skipped",
        len(vol),
        dt,
        len(vol) / dt,
        sum(vol) / 1024 / 1024 / dt,
        nb_failures,
        nb_skipped,
    )

    if notify:
        notify("WATCHDOG=1")
Example #6
0
 def wait(self, timeout=None):
     return futures_wait(self, timeout=timeout)