Esempio n. 1
0
async def test_local_dask_gateway_server(
        loop: AbstractEventLoop, local_dask_gateway_server: DaskGatewayServer):
    async with Gateway(
            local_dask_gateway_server.address,
            local_dask_gateway_server.proxy_address,
            asynchronous=True,
            auth=auth.BasicAuth("pytest_user",
                                local_dask_gateway_server.password),
    ) as gateway:
        print(f"--> {gateway=} created")
        cluster_options = await gateway.cluster_options()
        gateway_versions = await gateway.get_versions()
        clusters_list = await gateway.list_clusters()
        print(f"--> {gateway_versions=}, {cluster_options=}, {clusters_list=}")
        for option in cluster_options.items():
            print(f"--> {option=}")

        async with gateway.new_cluster() as cluster:
            assert cluster
            print(
                f"--> created new cluster {cluster=}, {cluster.scheduler_info=}"
            )
            NUM_WORKERS = 10
            await cluster.scale(NUM_WORKERS)
            print(f"--> scaling cluster {cluster=} to {NUM_WORKERS} workers")
            async for attempt in AsyncRetrying(reraise=True,
                                               wait=wait_fixed(0.24),
                                               stop=stop_after_delay(30)):
                with attempt:
                    print(
                        f"cluster {cluster=} has now {len(cluster.scheduler_info.get('workers', []))}"
                    )
                    assert len(cluster.scheduler_info.get("workers", 0)) == 10

            async with cluster.get_client() as client:
                print(f"--> created new client {client=}, submitting a job")
                res = await client.submit(lambda x: x + 1, 1)  # type: ignore
                assert res == 2

            print(f"--> scaling cluster {cluster=} back to 0")
            await cluster.scale(0)

            async for attempt in AsyncRetrying(reraise=True,
                                               wait=wait_fixed(0.24),
                                               stop=stop_after_delay(30)):
                with attempt:
                    print(
                        f"cluster {cluster=} has now {len(cluster.scheduler_info.get('workers', []))}"
                    )
                    assert len(cluster.scheduler_info.get("workers", 0)) == 0
Esempio n. 2
0
async def test_publish_event(dask_client: distributed.Client):
    dask_pub = distributed.Pub("some_topic")
    dask_sub = distributed.Sub("some_topic")
    async for attempt in AsyncRetrying(
            reraise=True,
            retry=retry_if_exception_type(AssertionError),
            wait=wait_fixed(0.01),
            stop=stop_after_delay(60),
    ):
        with attempt:
            print(
                f"waiting for subscribers... attempt={attempt.retry_state.attempt_number}"
            )
            assert dask_pub.subscribers
            print("we do have subscribers!")

    event_to_publish = TaskLogEvent(job_id="some_fake_job_id", log="the log")
    publish_event(dask_pub=dask_pub, event=event_to_publish)
    # NOTE: this tests runs a sync dask client,
    # and the CI seems to have sometimes difficulties having this run in a reasonable time
    # hence the long time out
    message = dask_sub.get(timeout=1)
    assert message is not None
    received_task_log_event = TaskLogEvent.parse_raw(message)  # type: ignore
    assert received_task_log_event == event_to_publish
Esempio n. 3
0
async def _assert_and_wait_for_pipeline_state(
    client: TestClient,
    project_id: str,
    expected_state: RunningState,
    expected_api_response: ExpectedResponse,
):
    url_project_state = client.app.router["state_project"].url_for(
        project_id=project_id)
    assert url_project_state == URL(f"/{API_VTAG}/projects/{project_id}/state")
    async for attempt in AsyncRetrying(
            reraise=True,
            stop=stop_after_delay(120),
            wait=wait_fixed(5),
            retry=retry_if_exception_type(AssertionError),
    ):
        with attempt:
            print(
                f"--> waiting for pipeline to complete with {expected_state=} attempt {attempt.retry_state.attempt_number}..."
            )
            resp = await client.get(f"{url_project_state}")
            data, error = await assert_status(resp, expected_api_response.ok)
            assert "state" in data
            assert "value" in data["state"]
            received_study_state = RunningState(data["state"]["value"])
            print(f"<-- received pipeline state: {received_study_state=}")
            assert received_study_state == expected_state
            print(
                f"--> pipeline completed with state {received_study_state=}! "
                f"That's great: {json_dumps(attempt.retry_state.retry_object.statistics)}",
            )
Esempio n. 4
0
def test_product_frontend_app_served(
    simcore_stack_deployed_services: List[Service],
    traefik_service: URL,
    test_url: str,
    expected_in_content: str,
):
    # NOTE: it takes a bit of time until traefik sets up the correct proxy and
    # the webserver takes time to start
    # TODO: determine wait times with pre-calibration step
    @tenacity.retry(
        wait=wait_fixed(5),
        stop=stop_after_delay(1 * MINUTE),
    )
    def request_test_url():
        resp = requests.get(test_url)
        assert (
            resp.ok
        ), f"Failed request {resp.url} with {resp.status_code}: {resp.reason}"
        return resp

    resp = request_test_url()

    # TODO: serch osparc-simcore commit id e.g. 'osparc-simcore v817d82e'
    assert resp.ok
    assert "text/html" in resp.headers["Content-Type"]
    assert expected_in_content in resp.text, "Expected boot not found in response"
Esempio n. 5
0
async def _is_registry_reachable(registry_settings: RegistrySettings) -> None:
    async for attempt in AsyncRetrying(
            wait=wait_fixed(1),
            stop=stop_after_attempt(1),
            before_sleep=before_sleep_log(logger, logging.INFO),
            reraise=True,
    ):
        with attempt:
            async with httpx.AsyncClient() as client:
                params = {}
                if registry_settings.REGISTRY_AUTH:
                    params["auth"] = (
                        registry_settings.REGISTRY_USER,
                        registry_settings.REGISTRY_PW.get_secret_value(),
                    )

                protocol = "https" if registry_settings.REGISTRY_SSL else "http"
                url = f"{protocol}://{registry_settings.api_url}/"
                logging.info("Registry test url ='%s'", url)
                response = await client.get(url, timeout=1, **params)
                reachable = (response.status_code == status.HTTP_200_OK
                             and response.json() == {})
                if not reachable:
                    logger.error("Response: %s", response)
                    error_message = (
                        f"Could not reach registry {registry_settings.api_url} "
                        f"auth={registry_settings.REGISTRY_AUTH}")
                    raise _RegistryNotReachableException(error_message)
Esempio n. 6
0
 def make_retry_decorator(
         retries: int, delay: float
 ) -> typing.Callable[[typing.Callable], typing.Callable]:
     return retry(wait=wait_fixed(delay),
                  retry=(retry_if_result(lambda res: res.status >= 500)
                         | retry_if_exception_type(
                             exception_types=aiohttp.ClientError)),
                  stop=stop_after_attempt(retries + 1))
Esempio n. 7
0
    def __init__(self, logger: Optional[logging.Logger] = None):
        logger = logger or log

        self.kwargs = dict(
            wait=wait_fixed(2),
            stop=stop_after_delay(3 * _MINUTE),
            before_sleep=before_sleep_log(logger, logging.WARNING),
            reraise=True,
        )
Esempio n. 8
0
    def __init__(self, logger: Optional[logging.Logger] = None):
        logger = logger or log

        self.kwargs = dict(
            wait=wait_fixed(self.WAIT_SECS),
            stop=stop_after_attempt(self.ATTEMPTS_COUNT),
            before_sleep=before_sleep_log(logger, logging.WARNING),
            reraise=True,
        )
Esempio n. 9
0
    def __init__(self, logger: Optional[logging.Logger] = None):
        logger = logger or log

        self.kwargs = dict(
            retry=retry_if_exception_type(DatabaseError),
            wait=wait_fixed(self.WAIT_SECS),
            stop=stop_after_attempt(self.ATTEMPTS_COUNT),
            after=after_log(logger, logging.WARNING),
            retry_error_callback=raise_http_unavailable_error,
        )
Esempio n. 10
0
 async def test_sleeps(self):
     start = current_time_ms()
     try:
         async for attempt in tasyncio.AsyncRetrying(
                 stop=stop_after_attempt(1), wait=wait_fixed(1)):
             with attempt:
                 raise Exception()
     except RetryError:
         pass
     t = current_time_ms() - start
     self.assertLess(t, 1.1)
async def assert_and_wait_for_pipeline_status(
    client: httpx.AsyncClient,
    url: AnyHttpUrl,
    user_id: UserID,
    project_uuid: UUID,
    wait_for_states: Optional[List[RunningState]] = None,
) -> ComputationTaskGet:
    if not wait_for_states:
        wait_for_states = [
            RunningState.SUCCESS,
            RunningState.FAILED,
            RunningState.ABORTED,
        ]
    MAX_TIMEOUT_S = 5 * MINUTE

    async def check_pipeline_state() -> ComputationTaskGet:
        response = await client.get(url, params={"user_id": user_id})
        assert (
            response.status_code == status.HTTP_202_ACCEPTED
        ), f"response code is {response.status_code}, error: {response.text}"
        task_out = ComputationTaskGet.parse_obj(response.json())
        assert task_out.id == project_uuid
        assert task_out.url == f"{client.base_url}/v2/computations/{project_uuid}"
        print(
            f"Pipeline '{project_uuid=}' current task out is '{task_out=}'",
        )
        assert wait_for_states
        assert (
            task_out.state in wait_for_states
        ), f"current task state is '{task_out.state}', not in any of {wait_for_states}"
        return task_out

    start = time.monotonic()
    async for attempt in AsyncRetrying(
        stop=stop_after_delay(MAX_TIMEOUT_S),
        wait=wait_fixed(2),
        retry=retry_if_exception_type(AssertionError),
        reraise=True,
    ):
        elapsed_s = time.monotonic() - start
        with attempt:
            print(
                f"Waiting for pipeline '{project_uuid=}' state to be one of: {wait_for_states=}, attempt={attempt.retry_state.attempt_number}, time={elapsed_s}s"
            )
            task_out = await check_pipeline_state()
            print(
                f"Pipeline '{project_uuid=}' state succesfuly became '{task_out.state}'\n{json.dumps(attempt.retry_state.retry_object.statistics, indent=2)}, time={elapsed_s}s"
            )

            return task_out

    # this is only to satisfy pylance
    raise AssertionError("No computation task generated!")
Esempio n. 12
0
def wait_for_status(
    method: Callable[..., T],
    validate: Optional[Callable[[T], bool]] = None,
    fixed_wait_time: float = 5,
    timeout: float = 300,
    **method_kwargs: Any,
) -> T:
    """Tries to run *method* (and run also a validation of its output) until no AssertionError is raised.

    Arguments are described below. More keyword arguments can be given for `method`.

    Args:
        method (Callable): An unreliable method (or a status query). The method will be executed while: \
            (it raises an AssetionError or the `validation` function outputs `False`) and none of the ending \
            conditions is satisfied (look at int arguments)

        validate (Optional[Callable], optional): A callable that validates the output of *method*. \
            It must receives the output of `method` as argument and returns `True` if it is ok and `False` if it is \
            invalid. Defaults to None, meaning no validation will be executed.

        fixed_wait_time (float, optional): Time (in seconds) to wait between attempts. Defaults to 5.

        timeout (float, optional): Time (in seconds) after which no more attempts are made. Defaults to 300 (5 minutes).

    Returns:
        [Any]: The method's output
    """
    @retry(
        wait=wait_fixed(fixed_wait_time),
        stop=stop_after_delay(timeout),
        retry=retry_if_exception_type(AssertionError),
    )
    def _wait_for_status(
        method: Callable[..., T],
        validate: Optional[Callable[[T], bool]] = None,
        **method_kwargs: Any,
    ) -> T:
        """Runs the method and apply validation."""
        try:
            result: T = method(**method_kwargs)
            if validate is not None:
                assert validate(
                    result), f'Validation failed. Result is {result}'
        except Exception as ex:
            if not isinstance(ex, AssertionError):
                print(
                    f'An unexpected error was detected, method result was: {result}'
                )
            raise
        return result

    return _wait_for_status(method=method, validate=validate, **method_kwargs)
Esempio n. 13
0
def simcore_stack_deployed_services(
    docker_registry: UrlStr,
    core_stack_namespace: str,
    ops_stack_namespace: str,
    core_stack_compose_specs: ComposeSpec,
    docker_client: DockerClient,
) -> List[Service]:

    # NOTE: the goal here is NOT to test time-to-deploy but
    # rather guaranteing that the framework is fully deployed before starting
    # tests. Obviously in a critical state in which the frameworks has a problem
    # the fixture will fail
    try:
        for attempt in Retrying(
                wait=wait_fixed(5),
                stop=stop_after_delay(4 * _MINUTE),
                before_sleep=before_sleep_log(log, logging.INFO),
                reraise=True,
        ):
            with attempt:
                for service in docker_client.services.list():
                    assert_service_is_running(service)

    finally:
        for stack_namespace in (core_stack_namespace, ops_stack_namespace):
            subprocess.run(f"docker stack ps {stack_namespace}",
                           shell=True,
                           check=False)

        # logs table like
        #  ID                  NAME                  IMAGE                                      NODE                DESIRED STATE       CURRENT STATE                ERROR
        # xbrhmaygtb76        simcore_sidecar.1     itisfoundation/sidecar:latest              crespo-wkstn        Running             Running 53 seconds ago
        # zde7p8qdwk4j        simcore_rabbit.1      itisfoundation/rabbitmq:3.8.0-management   crespo-wkstn        Running             Running 59 seconds ago
        # f2gxmhwq7hhk        simcore_postgres.1    postgres:10.10                             crespo-wkstn        Running             Running about a minute ago
        # 1lh2hulxmc4q        simcore_director.1    itisfoundation/director:latest             crespo-wkstn        Running             Running 34 seconds ago
        # ...

    # TODO: find a more reliable way to list services in a stack
    core_stack_services: List[Service] = [
        service for service in docker_client.services.list(
            filters={
                "label": f"com.docker.stack.namespace={core_stack_namespace}"
            })
    ]  # type: ignore

    assert (core_stack_services
            ), f"Expected some services in core stack '{core_stack_namespace}'"

    assert len(core_stack_compose_specs["services"].keys()) == len(
        core_stack_services)

    return core_stack_services
Esempio n. 14
0
def wemo_off():
    @tenacity.retry(wait=wait_fixed(10),
                    before_sleep=before_sleep_log(_LOGGER, logging.INFO))
    def discover_and_off():
        address = settings.wemo_address
        port = pywemo.ouimeaux_device.probe_wemo(address)
        url = 'http://%s:%i/setup.xml' % (address, port)
        device = pywemo.discovery.device_from_description(url, None)
        device.off()
        _LOGGER.info("Called off on %s", device)

    discover_and_off()
    return "ok"
Esempio n. 15
0
 async def _check_all_services_are_running():
     async for attempt in AsyncRetrying(
             wait=wait_fixed(5),
             stop=stop_after_delay(8 * MINUTE),
             before_sleep=before_sleep_log(log, logging.INFO),
             reraise=True,
     ):
         with attempt:
             await asyncio.gather(*[
                 asyncio.get_event_loop().run_in_executor(
                     None, assert_service_is_running, service)
                 for service in docker_client.services.list()
             ])
Esempio n. 16
0
async def test_interactive_services_removed_after_logout(
    client: TestClient,
    logged_user: Dict[str, Any],
    empty_user_project: Dict[str, Any],
    mocked_director_v2_api: Dict[str, mock.MagicMock],
    create_dynamic_service_mock,
    client_session_id_factory: Callable[[], str],
    socketio_client_factory: Callable,
    storage_subsystem_mock:
    MockedStorageSubsystem,  # when guest user logs out garbage is collected
    director_v2_service_mock: aioresponses,
    expected_save_state: bool,
):
    # login - logged_user fixture
    # create empty study - empty_user_project fixture
    # create dynamic service - create_dynamic_service_mock fixture
    service = await create_dynamic_service_mock(logged_user["id"],
                                                empty_user_project["uuid"])
    # create websocket
    client_session_id1 = client_session_id_factory()
    sio = await socketio_client_factory(client_session_id1)
    # open project in client 1
    await open_project(client, empty_user_project["uuid"], client_session_id1)
    # logout
    logout_url = client.app.router["auth_logout"].url_for()
    r = await client.post(f"{logout_url}",
                          json={"client_session_id": client_session_id1})
    assert r.url_obj.path == logout_url.path
    await assert_status(r, web.HTTPOk)

    # check result perfomed by background task
    await asyncio.sleep(SERVICE_DELETION_DELAY + 1)
    await garbage_collector_core.collect_garbage(client.app)

    # assert dynamic service is removed *this is done in a fire/forget way so give a bit of leeway
    async for attempt in AsyncRetrying(reraise=True,
                                       stop=stop_after_attempt(10),
                                       wait=wait_fixed(1)):
        with attempt:
            logger.warning(
                "Waiting for stop to have been called service_uuid=%s, save_state=%s",
                service["service_uuid"],
                expected_save_state,
            )
            mocked_director_v2_api[
                "director_v2_core.stop_service"].assert_awaited_with(
                    app=client.server.app,
                    service_uuid=service["service_uuid"],
                    save_state=expected_save_state,
                )
Esempio n. 17
0
async def setup_registry(app: web.Application) -> AsyncIterator[None]:
    logger.debug("pinging registry...")

    @retry(
        wait=wait_fixed(2),
        before_sleep=before_sleep_log(logger, logging.WARNING),
        retry=retry_if_result(lambda result: result == False),
        reraise=True,
    )
    async def wait_until_registry_responsive(app: web.Application) -> bool:
        return await is_registry_responsive(app)

    await wait_until_registry_responsive(app)
    logger.info("Connected to docker registry")
    yield
Esempio n. 18
0
 async def create(
     cls,
     app: FastAPI,
     settings: DaskSchedulerSettings,
     endpoint: AnyUrl,
     authentication: ClusterAuthentication,
 ) -> "DaskClient":
     logger.info(
         "Initiating connection to %s with auth: %s",
         f"dask-scheduler/gateway at {endpoint}",
         authentication,
     )
     async for attempt in AsyncRetrying(
             reraise=True,
             before_sleep=before_sleep_log(logger, logging.WARNING),
             wait=wait_fixed(0.3),
             stop=stop_after_attempt(3),
     ):
         with attempt:
             logger.debug(
                 "Connecting to %s, attempt %s...",
                 endpoint,
                 attempt.retry_state.attempt_number,
             )
             dask_subsystem = await _create_internal_client_based_on_auth(
                 endpoint, authentication)
             check_scheduler_status(dask_subsystem.client)
             instance = cls(
                 app=app,
                 dask_subsystem=dask_subsystem,
                 settings=settings,
                 cancellation_dask_pub=distributed.Pub(
                     TaskCancelEvent.topic_name(),
                     client=dask_subsystem.client),
             )
             logger.info(
                 "Connection to %s succeeded [%s]",
                 f"dask-scheduler/gateway at {endpoint}",
                 json.dumps(attempt.retry_state.retry_object.statistics),
             )
             logger.info(
                 "Scheduler info:\n%s",
                 json.dumps(dask_subsystem.client.scheduler_info(),
                            indent=2),
             )
             return instance
     # this is to satisfy pylance
     raise ValueError("Could not create client")
Esempio n. 19
0
def upgrade_and_close():
    """Used in migration service program to discover, upgrade and close"""

    for attempt in Retrying(wait=wait_fixed(5), after=after_log(log, logging.ERROR)):
        with attempt:
            if not discover.callback():
                raise Exception("Postgres db was not discover")

    # FIXME: if database is not stampped!?
    try:
        info.callback()
        upgrade.callback(revision="head")
        info.callback()
    except Exception:  # pylint: disable=broad-except
        log.exception("Unable to upgrade")

    click.echo("I did my job here. Bye!")
Esempio n. 20
0
async def _assert_and_wait_for_comp_task_states_to_be_transmitted_in_projects(
    project_id: str,
    postgres_session: sa.orm.session.Session,
):

    async for attempt in AsyncRetrying(
            reraise=True,
            stop=stop_after_delay(120),
            wait=wait_fixed(5),
            retry=retry_if_exception_type(AssertionError),
    ):
        with attempt:
            print(
                f"--> waiting for pipeline results to move to projects table, attempt {attempt.retry_state.attempt_number}..."
            )
            comp_tasks_in_db: Dict[NodeIdStr,
                                   Any] = _get_computational_tasks_from_db(
                                       project_id, postgres_session)
            workbench_in_db: Dict[NodeIdStr,
                                  Any] = _get_project_workbench_from_db(
                                      project_id, postgres_session)
            for node_id, node_values in comp_tasks_in_db.items():
                assert (
                    node_id in workbench_in_db
                ), f"node {node_id=} is missing from workbench {json_dumps(workbench_in_db, indent=2)}"

                node_in_project_table = workbench_in_db[node_id]

                # if this one is in, the other should also be but let's check it carefully
                assert node_values.run_hash
                assert "runHash" in node_in_project_table
                assert node_values.run_hash == node_in_project_table["runHash"]

                assert node_values.state
                assert "state" in node_in_project_table
                assert "currentStatus" in node_in_project_table["state"]
                # NOTE: beware that the comp_tasks has StateType and Workbench has RunningState (sic)
                assert (DB_TO_RUNNING_STATE[node_values.state].value ==
                        node_in_project_table["state"]["currentStatus"])
            print(
                "--> tasks were properly transferred! "
                f"That's great: {json_dumps(attempt.retry_state.retry_object.statistics)}",
            )
Esempio n. 21
0
async def assert_service_is_available(  # pylint: disable=redefined-outer-name
        exposed_port: PositiveInt, is_legacy: bool, service_uuid: str) -> None:
    service_address = (f"http://{get_ip()}:{exposed_port}/x/{service_uuid}"
                       if is_legacy else f"http://{get_ip()}:{exposed_port}")
    print(f"checking service @ {service_address}")

    async for attempt in AsyncRetrying(wait=wait_fixed(1),
                                       stop=stop_after_attempt(60),
                                       reraise=True):
        with attempt:
            async with httpx.AsyncClient() as client:
                response = await client.get(service_address)
                print(
                    f"{SEPARATOR}\nAttempt={attempt.retry_state.attempt_number}"
                )
                print(
                    f"Body:\n{response.text}\nHeaders={response.headers}\n{SEPARATOR}"
                )
                assert response.status_code == httpx.codes.OK, response.text
Esempio n. 22
0
async def _assert_wait_for_task_status(
    job_id: str,
    dask_client: DaskClient,
    expected_status: RunningState,
    timeout: Optional[int] = None,
):
    async for attempt in AsyncRetrying(
        reraise=True,
        stop=stop_after_delay(timeout or _ALLOW_TIME_FOR_GATEWAY_TO_CREATE_WORKERS),
        wait=wait_fixed(1),
    ):
        with attempt:
            print(
                f"waiting for task to be {expected_status=}, "
                f"Attempt={attempt.retry_state.attempt_number}"
            )
            current_task_status = await dask_client.get_task_status(job_id)
            assert isinstance(current_task_status, RunningState)
            print(f"{current_task_status=} vs {expected_status=}")
            assert current_task_status == expected_status
Esempio n. 23
0
    async def _create_client(address: str) -> aioredis.Redis:
        client: Optional[aioredis.Redis] = None

        async for attempt in AsyncRetrying(
                stop=stop_after_delay(1 * _MINUTE),
                wait=wait_fixed(_WAIT_SECS),
                before_sleep=before_sleep_log(log, logging.WARNING),
                reraise=True,
        ):
            with attempt:
                client = await aioredis.create_redis_pool(address,
                                                          encoding="utf-8")
                log.info(
                    "Connection to %s succeeded with %s [%s]",
                    f"redis at {address=}",
                    f"{client=}",
                    json.dumps(attempt.retry_state.retry_object.statistics),
                )
        assert client  # nosec
        return client
Esempio n. 24
0
async def test_listen_comp_tasks_task(
    mock_project_subsystem: Dict,
    comp_task_listening_task: None,
    client,
    update_values: Dict[str, Any],
    expected_calls: List[str],
    task_class: NodeClass,
):
    db_engine: aiopg.sa.Engine = client.app[APP_DB_ENGINE_KEY]
    async with db_engine.acquire() as conn:
        # let's put some stuff in there now
        result = await conn.execute(
            comp_tasks.insert()
            .values(outputs=json.dumps({}), node_class=task_class)
            .returning(literal_column("*"))
        )
        row: RowProxy = await result.fetchone()
        task = dict(row)

        # let's update some values
        await conn.execute(
            comp_tasks.update()
            .values(**update_values)
            .where(comp_tasks.c.task_id == task["task_id"])
        )

        # tests whether listener gets hooked calls executed
        for call_name, mocked_call in mock_project_subsystem.items():
            if call_name in expected_calls:
                async for attempt in AsyncRetrying(
                    wait=wait_fixed(1),
                    stop=stop_after_delay(10),
                    retry=retry_if_exception_type(AssertionError),
                    before_sleep=before_sleep_log(logger, logging.INFO),
                    reraise=True,
                ):
                    with attempt:
                        mocked_call.assert_awaited()

            else:
                mocked_call.assert_not_called()
Esempio n. 25
0
def docker_swarm(docker_client: docker.client.DockerClient,
                 keep_docker_up: Iterator[bool]) -> Iterator[None]:
    """inits docker swarm"""

    for attempt in Retrying(wait=wait_fixed(2),
                            stop=stop_after_delay(15),
                            reraise=True):
        with attempt:
            if not _in_docker_swarm(docker_client):
                docker_client.swarm.init(advertise_addr=get_localhost_ip())
            # if still not in swarm, raise an error to try and initialize again
            _in_docker_swarm(docker_client, raise_error=True)

    assert _in_docker_swarm(docker_client) is True

    yield

    if not keep_docker_up:
        assert docker_client.swarm.leave(force=True)

    assert _in_docker_swarm(docker_client) is keep_docker_up
Esempio n. 26
0
async def ensure_volume_cleanup(docker_client: aiodocker.Docker,
                                node_uuid: str) -> None:
    async def _get_volume_names() -> Set[str]:
        volumes_list = await docker_client.volumes.list()
        volume_names: Set[str] = {x["Name"] for x in volumes_list["Volumes"]}
        return volume_names

    for volume_name in await _get_volume_names():
        if volume_name.startswith(f"dy-sidecar_{node_uuid}"):
            # docker volume results to be in use and it takes a bit to remove
            # it once done with it
            async for attempt in AsyncRetrying(
                    reraise=False,
                    stop=stop_after_attempt(15),
                    wait=wait_fixed(5),
            ):
                with attempt:
                    # if volume is still found raise an exception
                    # by the time this finishes all volumes should have been removed
                    if volume_name in await _get_volume_names():
                        raise _VolumeNotExpectedError(volume_name)
Esempio n. 27
0
 async def create_client(url) -> aioredis.Redis:
     # create redis client
     client: Optional[aioredis.Redis] = None
     async for attempt in AsyncRetrying(
             stop=stop_after_delay(1 * _MINUTE),
             wait=wait_fixed(_WAIT_SECS),
             before_sleep=before_sleep_log(log, logging.WARNING),
             reraise=True,
     ):
         with attempt:
             client = await aioredis.create_redis_pool(url,
                                                       encoding="utf-8")
             if not client:
                 raise ValueError(
                     "Expected aioredis client instance, got {client}")
             log.info(
                 "Connection to %s succeeded [%s]",
                 f"redis at {endpoint=}",
                 json.dumps(attempt.retry_state.retry_object.statistics),
             )
     assert client  # no sec
     return client
Esempio n. 28
0
async def test_creating_new_project_from_template_and_disconnecting_does_not_create_project(
    client: TestClient,
    logged_user: Dict[str, Any],
    primary_group: Dict[str, str],
    standard_groups: List[Dict[str, str]],
    template_project: Dict[str, Any],
    expected: ExpectedResponse,
    catalog_subsystem_mock: Callable,
    slow_storage_subsystem_mock: MockedStorageSubsystem,
    project_db_cleaner: None,
):
    catalog_subsystem_mock([template_project])
    # create a project from another and disconnect while doing this by timing out
    # POST /v0/projects
    create_url = client.app.router["create_projects"].url_for()
    assert str(create_url) == f"{API_PREFIX}/projects"
    create_url = create_url.with_query(from_template=template_project["uuid"])
    with pytest.raises(asyncio.TimeoutError):
        await client.post(f"{create_url}", json={}, timeout=5)

    # let's check that there are no new project created, after timing out
    list_url = client.app.router["list_projects"].url_for()
    assert str(list_url) == API_PREFIX + "/projects"
    list_url = list_url.with_query(type="user")
    resp = await client.get(f"{list_url}")
    data, *_ = await assert_status(
        resp,
        expected.ok,
    )
    assert not data

    # NOTE: after coming back here timing-out, the code shall still run
    # in the server which is why we need to retry here
    async for attempt in AsyncRetrying(
        reraise=True, stop=stop_after_delay(20), wait=wait_fixed(1)
    ):
        with attempt:
            slow_storage_subsystem_mock.delete_project.assert_called_once()
Esempio n. 29
0
def simcore_docker_stack_and_registry_ready(
    event_loop: asyncio.AbstractEventLoop,
    docker_registry: UrlStr,
    docker_stack: Dict,
    simcore_services_ready: None,
) -> Dict:
    # At this point `simcore_services_ready` waited until all services
    # are running. Let's make one more check on the web-api
    for attempt in Retrying(
        wait=wait_fixed(1),
        stop=stop_after_delay(0.5 * _MINUTE),
        reraise=True,
        before_sleep=before_sleep_log(log, logging.INFO),
    ):
        with attempt:
            resp = httpx.get("http://127.0.0.1:9081/v0/")
            resp.raise_for_status()
            log.info(
                "Connection to osparc-simcore web API succeeded [%s]",
                json.dumps(attempt.retry_state.retry_object.statistics),
            )

    return docker_stack
Esempio n. 30
0
    mock_sleep = mocker.patch.object(make_request.retry, "sleep")

    with pytest.raises(requests.exceptions.HTTPError) as e:
        make_request()

    assert e.value.response.status_code == codes.too_many_requests

    assert_sleep_calls(
        mocker,
        mock_sleep,
        [float(WAIT_EXP_BASE**i) for i in range(MAX_NUMBER_ATTEMPTS - 1)],
    )


@throttling_retry(wait=wait_fixed(WAIT_EXP_BASE))
def make_request_wait_fixed():
    response = requests.get(TEST_URL)
    response.raise_for_status()
    return response


def test_throttling_retry_wait_fixed(requests_mock, mocker):
    requests_mock.get(
        TEST_URL,
        [
            {
                "status_code": codes.too_many_requests
            },
            {
                "status_code": codes.too_many_requests