Beispiel #1
0
def test_buggy_factory_raises_on_block():
    """
    Tests that if there is an exception thrown in the value factory,
    it is caught in the first call to `block_until_target_successes()`.
    """

    outcomes, worker = generate_workers(
        [(OperatorRule(timeout_min=1, timeout_max=1), 100)],
        seed=123)

    factory = BuggyFactory(list(outcomes))

    # Non-zero stagger timeout to make BuggyFactory raise its error only in 1.5s,
    # So that we got enough successes for `block_until_target_successes()`.
    pool = WorkerPool(worker, factory, target_successes=10, timeout=10, threadpool_size=10, stagger_timeout=1.5)

    pool.start()
    time.sleep(2) # wait for the stagger timeout to finish
    with pytest.raises(Exception, match="Buggy factory"):
        pool.block_until_target_successes()
    # Further calls to `block_until_target_successes()` or `join()` don't throw the error.
    with pytest.raises(Exception, match="Buggy factory"):
        pool.block_until_target_successes()
    pool.cancel()

    with pytest.raises(Exception, match="Buggy factory"):
        pool.join()
Beispiel #2
0
def test_wait_for_successes(join_worker_pool):
    """
    Checks that `block_until_target_successes()` returns in time and gives all the successes,
    if there were enough of them.
    """

    outcomes, worker = generate_workers(
        [
            (OperatorRule(timeout_min=0.5, timeout_max=1.5), 10),
            (OperatorRule(fails=True, timeout_min=1, timeout_max=3), 20),
        ],
        seed=123)

    factory = AllAtOnceFactory(list(outcomes))
    pool = WorkerPool(worker, factory, target_successes=10, timeout=10, threadpool_size=30)
    join_worker_pool(pool)

    t_start = time.monotonic()
    pool.start()
    successes = pool.block_until_target_successes()
    t_end = time.monotonic()

    failures = pool.get_failures()
    assert all(outcomes[value].fails for value in failures)

    assert len(successes) == 10

    # We have more threads in the pool than the workers,
    # so all the successful ones should be able to finish right away.
    assert t_end - t_start < 2

    # Should be able to do it several times
    successes = pool.block_until_target_successes()
    assert len(successes) == 10
Beispiel #3
0
def test_buggy_factory_raises_on_block():
    """
    Tests that if there is an exception thrown in the value factory,
    it is caught in the first call to `block_until_target_successes()`.
    """

    outcomes, worker = generate_workers(
        [(OperatorRule(timeout_min=1, timeout_max=1), 100)], seed=123)

    factory = BuggyFactory(list(outcomes))

    # WorkerPool short circuits once it has sufficient successes. Therefore,
    # the stagger timeout needs to be less than worker timeout,
    # since BuggyFactory only fails if you do a subsequent batch
    # Once the subsequent batch is requested, the BuggyFactory returns an error
    # causing WorkerPool to fail
    pool = WorkerPool(worker,
                      factory,
                      target_successes=10,
                      timeout=10,
                      threadpool_size=10,
                      stagger_timeout=0.75)

    pool.start()
    time.sleep(2)  # wait for the stagger timeout to finish
    with pytest.raises(Exception, match="Buggy factory"):
        pool.block_until_target_successes()
    # Further calls to `block_until_target_successes()` or `join()` don't throw the error.
    with pytest.raises(Exception, match="Buggy factory"):
        pool.block_until_target_successes()
    pool.cancel()

    with pytest.raises(Exception, match="Buggy factory"):
        pool.join()
Beispiel #4
0
def test_cancel_waiting_workers(join_worker_pool):
    """
    If we have a small pool and many workers, it is possible for workers to be enqueued
    one after another in one thread.
    We test that if we call `cancel()`, these enqueued workers are cancelled too.
    """

    outcomes, worker = generate_workers(
        [
            (OperatorRule(timeout_min=1, timeout_max=1), 100),
        ],
        seed=123)

    factory = AllAtOnceFactory(list(outcomes))
    pool = WorkerPool(worker, factory, target_successes=10, timeout=10, threadpool_size=10)
    join_worker_pool(pool)

    t_start = time.monotonic()
    pool.start()
    pool.block_until_target_successes()
    pool.cancel()
    pool.join()
    t_end = time.monotonic()

    # We have 10 threads in the pool and 100 workers that are all enqueued at once at the start.
    # If we didn't check for the cancel condition, we would have to wait for 10 seconds.
    # We get 10 successes after 1s and cancel the workers,
    # but the next workers in each thread have already started, so we have to wait for another 1s.
    assert t_end - t_start < 2.5
Beispiel #5
0
def test_wait_for_successes_timed_out(join_worker_pool):
    """
    Checks that if enough successful workers can't finish before the timeout, we get an exception.
    """

    outcomes, worker = generate_workers(
        [
            (OperatorRule(timeout_min=0, timeout_max=0.5), 9),
            (OperatorRule(timeout_min=1.5, timeout_max=2.5), 1),
            (OperatorRule(fails=True, timeout_min=1.5, timeout_max=2.5), 20),
        ],
        seed=123)

    factory = AllAtOnceFactory(list(outcomes))
    timeout = 1
    pool = WorkerPool(worker, factory, target_successes=10, timeout=timeout, threadpool_size=30)
    join_worker_pool(pool)

    t_start = time.monotonic()
    pool.start()
    with pytest.raises(WorkerPool.TimedOut) as exc_info:
        successes = pool.block_until_target_successes()
    t_end = time.monotonic()

    # Even though timeout is 1, there are long-running workers which we can't interupt.
    assert t_end - t_start < 3

    message = str(exc_info.value)

    # None of the workers actually failed, they just timed out
    assert f"Execution timed out after {timeout}s" == message
Beispiel #6
0
def test_wait_for_successes_out_of_values(join_worker_pool):
    """
    Checks that if there weren't enough successful workers, `block_until_target_successes()`
    raises an exception when the value factory is exhausted.
    """

    outcomes, worker = generate_workers([
        (WorkerRule(timeout_min=0.5, timeout_max=1.5), 9),
        (WorkerRule(fails=True, timeout_min=0.5, timeout_max=1.5), 20),
    ],
                                        seed=123)

    factory = AllAtOnceFactory(list(outcomes))
    pool = WorkerPool(worker,
                      factory,
                      target_successes=10,
                      timeout=10,
                      threadpool_size=15)
    join_worker_pool(pool)

    t_start = time.monotonic()
    pool.start()
    with pytest.raises(WorkerPool.OutOfValues):
        successes = pool.block_until_target_successes()
    t_end = time.monotonic()

    # We have roughly 2 workers per thread, so it shouldn't take longer than 1.5s (max timeout) * 2
    assert t_end - t_start < 4
Beispiel #7
0
    def _make_arrangements(self,
                           network_middleware: RestMiddleware,
                           handpicked_ursulas: Optional[Iterable[Ursula]] = None,
                           timeout: int = 10,
                           ) -> Dict[Ursula, Arrangement]:
        """
        Pick some Ursula addresses and send them arrangement proposals.
        Returns a dictionary of Ursulas to Arrangements if it managed to get `n` responses.
        """

        if handpicked_ursulas is None:
            handpicked_ursulas = []
        handpicked_addresses = [ursula.checksum_address for ursula in handpicked_ursulas]

        reservoir = self._make_reservoir(handpicked_addresses)
        value_factory = PrefetchStrategy(reservoir, self.n)

        def worker(address):
            return self._propose_arrangement(address, network_middleware)

        self.alice.block_until_number_of_known_nodes_is(self.n, learn_on_this_thread=True, eager=True)

        worker_pool = WorkerPool(worker=worker,
                                 value_factory=value_factory,
                                 target_successes=self.n,
                                 timeout=timeout,
                                 stagger_timeout=1,
                                 threadpool_size=self.n)
        worker_pool.start()
        try:
            successes = worker_pool.block_until_target_successes()
        except (WorkerPool.OutOfValues, WorkerPool.TimedOut):
            # It's possible to raise some other exceptions here,
            # but we will use the logic below.
            successes = worker_pool.get_successes()
        finally:
            worker_pool.cancel()
            worker_pool.join()

        accepted_arrangements = {ursula: arrangement for ursula, arrangement in successes.values()}
        failures = worker_pool.get_failures()

        accepted_addresses = ", ".join(ursula.checksum_address for ursula in accepted_arrangements)

        if len(accepted_arrangements) < self.n:

            rejected_proposals = "\n".join(f"{address}: {value}" for address, (type_, value, traceback) in failures.items())

            self.log.debug(
                "Could not find enough Ursulas to accept proposals.\n"
                f"Accepted: {accepted_addresses}\n"
                f"Rejected:\n{rejected_proposals}")

            raise self._not_enough_ursulas_exception()
        else:
            self.log.debug(f"Finished proposing arrangements; accepted: {accepted_addresses}")

        return accepted_arrangements
Beispiel #8
0
class TreasureMapPublisher:

    log = Logger('TreasureMapPublisher')

    def __init__(self,
                 worker,
                 nodes,
                 percent_to_complete_before_release=5,
                 threadpool_size=120,
                 timeout=20):

        self._total = len(nodes)
        self._block_until_this_many_are_complete = math.ceil(
            len(nodes) * percent_to_complete_before_release / 100)
        self._worker_pool = WorkerPool(
            worker=worker,
            value_factory=AllAtOnceFactory(nodes),
            target_successes=self._block_until_this_many_are_complete,
            timeout=timeout,
            stagger_timeout=0,
            threadpool_size=threadpool_size)

    @property
    def completed(self):
        # TODO: lock dict before copying?
        return self._worker_pool.get_successes()

    def start(self):
        self.log.info(f"TreasureMapPublisher starting")
        self._worker_pool.start()
        if reactor.running:
            reactor.callInThread(self.block_until_complete)

    def block_until_success_is_reasonably_likely(self):
        # Note: `OutOfValues`/`TimedOut` may be raised here, which means we didn't even get to
        # `percent_to_complete_before_release` successes. For now just letting it fire.
        self._worker_pool.block_until_target_successes()
        completed = self.completed
        self.log.debug(
            f"The minimal amount of nodes ({len(completed)}) was contacted "
            "while blocking for treasure map publication.")
        return completed

    def block_until_complete(self):
        self._worker_pool.join()
Beispiel #9
0
    def _sample(
        self,
        network_middleware: RestMiddleware,
        ursulas: Optional[Iterable['Ursula']] = None,
        timeout: int = 10,
    ) -> List['Ursula']:
        """Send concurrent requests to the /ping HTTP endpoint of nodes drawn from the reservoir."""

        ursulas = ursulas or []
        handpicked_addresses = [
            ChecksumAddress(ursula.checksum_address) for ursula in ursulas
        ]

        self.publisher.block_until_number_of_known_nodes_is(
            self.shares, learn_on_this_thread=True, eager=True)
        reservoir = self._make_reservoir(handpicked_addresses)
        value_factory = PrefetchStrategy(reservoir, self.shares)

        def worker(address) -> 'Ursula':
            return self._ping_node(address, network_middleware)

        worker_pool = WorkerPool(worker=worker,
                                 value_factory=value_factory,
                                 target_successes=self.shares,
                                 timeout=timeout,
                                 stagger_timeout=1,
                                 threadpool_size=self.shares)
        worker_pool.start()
        try:
            successes = worker_pool.block_until_target_successes()
        except (WorkerPool.OutOfValues, WorkerPool.TimedOut):
            # It's possible to raise some other exceptions here but we will use the logic below.
            successes = worker_pool.get_successes()
        finally:
            worker_pool.cancel()
            worker_pool.join()
        failures = worker_pool.get_failures()

        accepted_addresses = ", ".join(ursula.checksum_address
                                       for ursula in successes.values())
        if len(successes) < self.shares:
            rejections = "\n".join(
                f"{address}: {value}"
                for address, (type_, value, traceback) in failures.items())
            message = "Failed to contact enough sampled nodes.\n"\
                      f"Selected:\n{accepted_addresses}\n" \
                      f"Unavailable:\n{rejections}"
            self.log.debug(message)
            raise self.NotEnoughUrsulas(message)

        self.log.debug(f"Selected nodes for policy: {accepted_addresses}")
        ursulas = list(successes.values())
        return ursulas
Beispiel #10
0
    def get_ursulas(
        self,
        quantity: int,
        exclude_ursulas: Optional[Sequence[ChecksumAddress]] = None,
        include_ursulas: Optional[Sequence[ChecksumAddress]] = None
    ) -> List[UrsulaInfo]:
        reservoir = self._make_reservoir(quantity, exclude_ursulas,
                                         include_ursulas)
        value_factory = PrefetchStrategy(reservoir, quantity)

        def get_ursula_info(ursula_address) -> Porter.UrsulaInfo:
            if to_checksum_address(ursula_address) not in self.known_nodes:
                raise ValueError(f"{ursula_address} is not known")

            ursula_address = to_checksum_address(ursula_address)
            ursula = self.known_nodes[ursula_address]
            try:
                # ensure node is up and reachable
                self.network_middleware.ping(ursula)
                return Porter.UrsulaInfo(
                    checksum_address=ursula_address,
                    uri=f"{ursula.rest_interface.formal_uri}",
                    encrypting_key=ursula.public_keys(DecryptingPower))
            except Exception as e:
                self.log.debug(
                    f"Ursula ({ursula_address}) is unreachable: {str(e)}")
                raise

        self.block_until_number_of_known_nodes_is(
            quantity,
            timeout=self.execution_timeout,
            learn_on_this_thread=True,
            eager=True)

        worker_pool = WorkerPool(worker=get_ursula_info,
                                 value_factory=value_factory,
                                 target_successes=quantity,
                                 timeout=self.execution_timeout,
                                 stagger_timeout=1)
        worker_pool.start()
        try:
            successes = worker_pool.block_until_target_successes()
        finally:
            worker_pool.cancel()
            # don't wait for it to stop by "joining" - too slow...

        ursulas_info = successes.values()
        return list(ursulas_info)
Beispiel #11
0
    def get_ursulas(
        self,
        quantity: int,
        duration_periods: int = None,  # optional for federated mode
        exclude_ursulas: Optional[Sequence[ChecksumAddress]] = None,
        include_ursulas: Optional[Sequence[ChecksumAddress]] = None
    ) -> List[UrsulaInfo]:
        reservoir = self._make_staker_reservoir(quantity, duration_periods,
                                                exclude_ursulas,
                                                include_ursulas)
        value_factory = PrefetchStrategy(reservoir, quantity)

        def get_ursula_info(ursula_address) -> Porter.UrsulaInfo:
            if ursula_address not in self.known_nodes:
                raise ValueError(f"{ursula_address} is not known")

            ursula = self.known_nodes[ursula_address]
            try:
                # verify node is valid
                self.network_middleware.client.verify_and_parse_node_or_host_and_port(
                    node_or_sprout=ursula, host=None, port=None)

                return Porter.UrsulaInfo(
                    checksum_address=ursula_address,
                    uri=f"{ursula.rest_interface.formal_uri}",
                    encrypting_key=ursula.public_keys(DecryptingPower))
            except Exception as e:
                self.log.debug(
                    f"Unable to obtain Ursula information ({ursula_address}): {str(e)}"
                )
                raise

        self.block_until_number_of_known_nodes_is(
            quantity,
            timeout=self.DEFAULT_EXECUTION_TIMEOUT,
            learn_on_this_thread=True,
            eager=True)

        worker_pool = WorkerPool(worker=get_ursula_info,
                                 value_factory=value_factory,
                                 target_successes=quantity,
                                 timeout=self.DEFAULT_EXECUTION_TIMEOUT,
                                 stagger_timeout=1,
                                 threadpool_size=quantity)
        worker_pool.start()
        successes = worker_pool.block_until_target_successes()
        ursulas_info = successes.values()
        return list(ursulas_info)
Beispiel #12
0
def test_wait_for_successes_out_of_values(join_worker_pool):
    """
    Checks that if there weren't enough successful workers, `block_until_target_successes()`
    raises an exception when the value factory is exhausted.
    """

    outcomes, worker = generate_workers(
        [
            (OperatorRule(timeout_min=0.5, timeout_max=1.5), 9),
            (OperatorRule(fails=True, timeout_min=0.5, timeout_max=1.5), 20),
        ],
        seed=123)

    factory = AllAtOnceFactory(list(outcomes))
    pool = WorkerPool(worker, factory, target_successes=10, timeout=10, threadpool_size=15)
    join_worker_pool(pool)

    t_start = time.monotonic()
    pool.start()
    with pytest.raises(WorkerPool.OutOfValues) as exc_info:
        successes = pool.block_until_target_successes()
    t_end = time.monotonic()

    # We have roughly 2 workers per thread, so it shouldn't take longer than 1.5s (max timeout) * 2
    assert t_end - t_start < 4

    message = str(exc_info.value)

    assert "Execution stopped before completion - not enough available values" in message

    # We had 20 workers set up to fail
    num_expected_failures = 20
    assert f"{num_expected_failures} failures recorded" in message

    # check tracebacks
    tracebacks = exc_info.value.get_tracebacks()
    assert len(tracebacks) == num_expected_failures
    for value, traceback in tracebacks.items():
        assert 'raise Exception(f"Operator for {value} failed")' in traceback
        assert f'Operator for {value} failed' in traceback

    # This will be the last line in the displayed traceback;
    # That's where the worker actually failed. (Operator for {value} failed)
    assert 'raise Exception(f"Operator for {value} failed")' in message
Beispiel #13
0
def test_batched_value_generation(join_worker_pool):
    """
    Tests a value factory that gives out value batches in portions.
    """

    outcomes, worker = generate_workers([
        (OperatorRule(timeout_min=0.5, timeout_max=1.5), 80),
        (OperatorRule(fails=True, timeout_min=0.5, timeout_max=1.5), 80),
    ],
                                        seed=123)

    factory = BatchFactory(list(outcomes))
    pool = WorkerPool(worker,
                      factory,
                      target_successes=10,
                      timeout=10,
                      threadpool_size=10,
                      stagger_timeout=0.5)
    join_worker_pool(pool)

    t_start = time.monotonic()
    pool.start()
    successes = pool.block_until_target_successes()
    pool.cancel()
    pool.join()
    t_end = time.monotonic()

    assert len(successes) == 10

    # Check that batch sizes in the factory were getting progressively smaller
    # as the number of successes grew.
    assert all(factory.batch_sizes[i] >= factory.batch_sizes[i + 1]
               for i in range(len(factory.batch_sizes) - 1))

    # Since we canceled the pool, no more workers will be started and we will finish faster
    assert t_end - t_start < 4

    successes_copy = pool.get_successes()
    failures_copy = pool.get_failures()

    assert all(value in successes_copy for value in successes)