Esempio n. 1
0
async def progress_message_parser(app: web.Application, data: bytes) -> None:
    # update corresponding project, node, progress value
    rabbit_message = ProgressRabbitMessage.parse_raw(data)
    try:
        project = await projects_api.update_project_node_progress(
            app,
            rabbit_message.user_id,
            f"{rabbit_message.project_id}",
            f"{rabbit_message.node_id}",
            progress=rabbit_message.progress,
        )
        if project:
            messages: List[SocketMessageDict] = [{
                "event_type": SOCKET_IO_NODE_UPDATED_EVENT,
                "data": {
                    "project_id": project["uuid"],
                    "node_id": rabbit_message.node_id,
                    "data": project["workbench"][f"{rabbit_message.node_id}"],
                },
            }]
            await send_messages(app, f"{rabbit_message.user_id}", messages)
    except ProjectNotFoundError:
        log.warning(
            "project related to received rabbitMQ progress message not found: '%s'",
            json_dumps(rabbit_message, indent=2),
        )
    except NodeNotFoundError:
        log.warning(
            "node related to received rabbitMQ progress message not found: '%s'",
            json_dumps(rabbit_message, indent=2),
        )
def test_serialization_of_uuids(fake_data_dict: Dict[str, Any]):

    uuid_obj = uuid4()
    # NOTE the quotes around expected value
    assert json_dumps(uuid_obj) == f'"{uuid_obj}"'

    obj = {"ids": [uuid4() for _ in range(3)]}
    dump = json_dumps(obj)
    assert json.loads(dump) == export_uuids_to_str(obj)
Esempio n. 3
0
def check_if_cluster_is_able_to_run_pipeline(
    node_id: NodeID,
    scheduler_info: Dict[str, Any],
    task_resources: Dict[str, Any],
    node_image: Image,
    cluster_id: ClusterID,
):
    logger.debug("Dask scheduler infos: %s",
                 json_dumps(scheduler_info, indent=2))
    workers = scheduler_info.get("workers", {})

    def can_task_run_on_worker(task_resources: Dict[str, Any],
                               worker_resources: Dict[str, Any]) -> bool:
        def gen_check(task_resources: Dict[str, Any],
                      worker_resources: Dict[str, Any]) -> Iterable[bool]:
            for r in task_resources:
                yield worker_resources.get(r, 0) >= task_resources[r]

        return all(gen_check(task_resources, worker_resources))

    def cluster_missing_resources(
            task_resources: Dict[str, Any],
            cluster_resources: Dict[str, Any]) -> List[str]:
        return [r for r in task_resources if r not in cluster_resources]

    cluster_resources_counter = collections.Counter()
    can_a_worker_run_task = False
    for worker in workers:
        worker_resources = workers[worker].get("resources", {})
        cluster_resources_counter.update(worker_resources)
        if can_task_run_on_worker(task_resources, worker_resources):
            can_a_worker_run_task = True
    all_available_resources_in_cluster = dict(cluster_resources_counter)

    logger.debug(
        "Dask scheduler total available resources in cluster %s: %s, task needed resources %s",
        cluster_id,
        json_dumps(all_available_resources_in_cluster, indent=2),
        json_dumps(task_resources, indent=2),
    )

    if can_a_worker_run_task:
        return

    # check if we have missing resources
    if missing_resources := cluster_missing_resources(
            task_resources, all_available_resources_in_cluster):
        raise MissingComputationalResourcesError(
            node_id=node_id,
            msg=
            f"Service {node_image.name}:{node_image.tag} cannot be scheduled "
            f"on cluster {cluster_id}: task needs '{task_resources}', "
            f"cluster has '{all_available_resources_in_cluster}', missing: '{missing_resources}'",
        )
Esempio n. 4
0
    def _patch(app_config: Dict) -> Dict[str, str]:
        assert isinstance(app_config, dict)

        print("  - app_config=\n", json_dumps(app_config, indent=1))
        envs = convert_to_environ_vars(app_config)

        print("  - convert_to_environ_vars(app_cfg)=\n",
              json_dumps(envs, indent=1))

        for env_key, env_value in envs.items():
            monkeypatch.setenv(env_key, f"{env_value}")

        return envs
    async def wrapped(request: web.Request):
        try:
            resp = await handler(request)
            return resp

        except KeyError as err:
            # NOTE: handles required request.match_info[*] or request.query[*]
            logger.debug(err, exc_info=True)
            raise web.HTTPBadRequest(
                reason=f"Expected parameter {err}") from err

        except ValidationError as err:
            #  NOTE: pydantic.validate_arguments parses and validates -> ValidationError
            logger.debug(err, exc_info=True)
            raise web.HTTPUnprocessableEntity(
                text=json_dumps({"error": err.errors()}),
                content_type="application/json",
            ) from err

        except (InvalidParameterError, NoCommitError) as err:
            raise web.HTTPUnprocessableEntity(reason=str(err)) from err

        except NotFoundError as err:
            raise web.HTTPNotFound(reason=str(err)) from err

        except ProjectNotFoundError as err:
            logger.debug(err, exc_info=True)
            raise web.HTTPNotFound(
                reason=
                f"Project not found {err.project_uuid} or not accessible. Skipping snapshot"
            ) from err
Esempio n. 6
0
def check_if_cluster_is_able_to_run_pipeline(
    node_id: NodeID,
    scheduler_info: Dict[str, Any],
    task_resources: Dict[str, Any],
    node_image: Image,
    cluster_id: ClusterID,
):
    logger.debug("Dask scheduler infos: %s", json_dumps(scheduler_info, indent=2))
    workers = scheduler_info.get("workers", {})

    def can_task_run_on_worker(
        task_resources: Dict[str, Any], worker_resources: Dict[str, Any]
    ) -> bool:
        def gen_check(
            task_resources: Dict[str, Any], worker_resources: Dict[str, Any]
        ) -> Iterable[bool]:
            for name, required_value in task_resources.items():
                if required_value is None:
                    yield True
                elif worker_has := worker_resources.get(name):
                    yield worker_has >= required_value
                else:
                    yield False

        return all(gen_check(task_resources, worker_resources))
Esempio n. 7
0
    def get(cls, suffix, process=True):
        handlers = cls()
        coro = getattr(handlers, "get_" + suffix)
        loop = asyncio.get_event_loop()
        data = loop.run_until_complete(coro(None))

        return json.loads(json_dumps(data)) if process else data
Esempio n. 8
0
async def parse_output_data(
    db_engine: Engine, job_id: str, data: TaskOutputData
) -> None:
    (
        service_key,
        service_version,
        user_id,
        project_id,
        node_id,
    ) = parse_dask_job_id(job_id)
    logger.debug(
        "parsing output %s of dask task for %s:%s of user %s on project '%s' and node '%s'",
        json_dumps(data, indent=2),
        service_key,
        service_version,
        user_id,
        project_id,
        node_id,
    )

    ports = await _create_node_ports(
        db_engine=db_engine,
        user_id=user_id,
        project_id=project_id,
        node_id=node_id,
    )
    for port_key, port_value in data.items():
        value_to_transfer: Optional[links.ItemValue] = None
        if isinstance(port_value, FileUrl):
            value_to_transfer = port_value.url
        else:
            value_to_transfer = port_value

        await (await ports.outputs)[port_key].set_value(value_to_transfer)
Esempio n. 9
0
def done_dask_callback(
    dask_future: distributed.Future,
    task_to_future_map: Dict[str, distributed.Future],
    user_callback: UserCompleteCB,
    main_loop: asyncio.AbstractEventLoop,
):
    # NOTE: BEWARE we are called in a separate thread!!
    job_id = dask_future.key
    event_data: Optional[TaskStateEvent] = None
    logger.debug("task '%s' completed with status %s", job_id,
                 dask_future.status)
    try:
        if dask_future.status == "error":
            task_exception = dask_future.exception(
                timeout=_DASK_FUTURE_TIMEOUT_S)
            task_traceback = dask_future.traceback(
                timeout=_DASK_FUTURE_TIMEOUT_S)
            event_data = TaskStateEvent(
                job_id=job_id,
                state=RunningState.FAILED,
                msg=json_dumps(
                    traceback.format_exception(type(task_exception),
                                               value=task_exception,
                                               tb=task_traceback)),
            )
        elif dask_future.cancelled():
            event_data = TaskStateEvent(job_id=job_id,
                                        state=RunningState.ABORTED)
        else:
            task_result = cast(
                TaskOutputData,
                dask_future.result(timeout=_DASK_FUTURE_TIMEOUT_S))
            assert task_result  # no sec
            event_data = TaskStateEvent(
                job_id=job_id,
                state=RunningState.SUCCESS,
                msg=task_result.json(),
            )
    except distributed.TimeoutError:
        event_data = TaskStateEvent(
            job_id=job_id,
            state=RunningState.FAILED,
            msg=f"Timeout error getting results of '{job_id}'",
        )
        logger.error(
            "fetching result of '%s' timed-out, please check",
            job_id,
            exc_info=True,
        )
    finally:
        # remove the future from the dict to remove any handle to the future, so the worker can free the memory
        task_to_future_map.pop(job_id)
        logger.debug("dispatching callback to finish task '%s'", job_id)
        assert event_data  # nosec
        try:
            asyncio.run_coroutine_threadsafe(user_callback(event_data),
                                             main_loop)
        except Exception:  # pylint: disable=broad-except
            logger.exception(
                "Unexpected issue while transmitting state to main thread")
Esempio n. 10
0
def _get_environment_variables(
    compose_namespace: str, scheduler_data: SchedulerData, app_settings: AppSettings
) -> Dict[str, str]:
    registry_settings = app_settings.DIRECTOR_V2_DOCKER_REGISTRY
    rabbit_settings = app_settings.DIRECTOR_V2_RABBITMQ

    state_exclude = []
    if scheduler_data.paths_mapping.state_exclude is not None:
        state_exclude = scheduler_data.paths_mapping.state_exclude

    return {
        "SIMCORE_HOST_NAME": scheduler_data.service_name,
        "DYNAMIC_SIDECAR_COMPOSE_NAMESPACE": compose_namespace,
        "DY_SIDECAR_PATH_INPUTS": f"{scheduler_data.paths_mapping.inputs_path}",
        "DY_SIDECAR_PATH_OUTPUTS": f"{scheduler_data.paths_mapping.outputs_path}",
        "DY_SIDECAR_STATE_PATHS": json_dumps(
            [f"{x}" for x in scheduler_data.paths_mapping.state_paths]
        ),
        "DY_SIDECAR_STATE_EXCLUDE": json_dumps([f"{x}" for x in state_exclude]),
        "DY_SIDECAR_USER_ID": f"{scheduler_data.user_id}",
        "DY_SIDECAR_PROJECT_ID": f"{scheduler_data.project_id}",
        "DY_SIDECAR_NODE_ID": f"{scheduler_data.node_uuid}",
        "POSTGRES_HOST": f"{app_settings.POSTGRES.POSTGRES_HOST}",
        "POSTGRES_ENDPOINT": f"{app_settings.POSTGRES.POSTGRES_HOST}:{app_settings.POSTGRES.POSTGRES_PORT}",
        "POSTGRES_PASSWORD": f"{app_settings.POSTGRES.POSTGRES_PASSWORD.get_secret_value()}",
        "POSTGRES_PORT": f"{app_settings.POSTGRES.POSTGRES_PORT}",
        "POSTGRES_USER": f"{app_settings.POSTGRES.POSTGRES_USER}",
        "POSTGRES_DB": f"{app_settings.POSTGRES.POSTGRES_DB}",
        "STORAGE_ENDPOINT": app_settings.STORAGE_ENDPOINT,
        "REGISTRY_AUTH": f"{registry_settings.REGISTRY_AUTH}",
        "REGISTRY_PATH": f"{registry_settings.REGISTRY_PATH}",
        "REGISTRY_URL": f"{registry_settings.REGISTRY_URL}",
        "REGISTRY_USER": f"{registry_settings.REGISTRY_USER}",
        "REGISTRY_PW": f"{registry_settings.REGISTRY_PW.get_secret_value()}",
        "REGISTRY_SSL": f"{registry_settings.REGISTRY_SSL}",
        "RABBIT_HOST": f"{rabbit_settings.RABBIT_HOST}",
        "RABBIT_PORT": f"{rabbit_settings.RABBIT_PORT}",
        "RABBIT_USER": f"{rabbit_settings.RABBIT_USER}",
        "RABBIT_PASSWORD": f"{rabbit_settings.RABBIT_PASSWORD.get_secret_value()}",
        "RABBIT_CHANNELS": json_dumps(rabbit_settings.RABBIT_CHANNELS),
    }
    async def _upsert_snapshot(
        project_checksum: str,
        project: Union[RowProxy, SimpleNamespace],
        conn: SAConnection,
    ):

        # has changes wrt previous commit
        assert project_checksum  # nosec
        insert_stmt = pg_insert(projects_vc_snapshots).values(
            checksum=project_checksum,
            content={
                # FIXME: empty status produces a set() that sqlalchemy cannot serialize. Quick fix
                "workbench": json.loads(json_dumps(project.workbench)),
                "ui": json.loads(json_dumps(project.ui)),
            },
        )
        upsert_snapshot = insert_stmt.on_conflict_do_update(
            constraint=projects_vc_snapshots.primary_key,
            set_=dict(content=insert_stmt.excluded.content),
        )
        await conn.execute(upsert_snapshot)
Esempio n. 12
0
async def send_group_messages(app: Application, room: str,
                              messages: Sequence[SocketMessageDict]) -> None:
    sio: AsyncServer = get_socket_server(app)
    send_tasks = [
        sio.emit(message["event_type"], json_dumps(message["data"]), room=room)
        for message in messages
    ]

    await logged_gather(*send_tasks,
                        reraise=False,
                        log=log,
                        max_concurrency=10)
async def test_app_client_session_json_serialize(
    server: TestServer, fake_data_dict: Dict[str, Any]
):
    session = get_client_session(server.app)

    resp = await session.post(server.make_url("/echo"), json=fake_data_dict)
    assert resp.status == 200

    got = await resp.json()

    expected = json.loads(json_dumps(fake_data_dict))
    assert got == expected
Esempio n. 14
0
async def postgres_cleanup_ctx(app: web.Application) -> AsyncIterator[None]:

    settings = get_plugin_settings(app)
    aiopg_engine = await _ensure_pg_ready(settings)
    app[APP_DB_ENGINE_KEY] = aiopg_engine

    log.info("pg engine created %s", json_dumps(get_engine_state(app),
                                                indent=1))

    yield  # -------------------

    if aiopg_engine is not app.get(APP_DB_ENGINE_KEY):
        log.critical(
            "app does not hold right db engine. Somebody has changed it??")

    await close_engine(aiopg_engine)

    log.debug(
        "pg engine created after shutdown %s (closed=%s): %s",
        aiopg_engine.dsn,
        aiopg_engine.closed,
        json_dumps(get_engine_state(app), indent=1),
    )
Esempio n. 15
0
async def assemble_statics_json(app: web.Application):
    # NOTE: in devel model, the folder might be under construction
    # (qx-compile takes time), therefore we create statics.json
    # on_startup instead of upon setup

    # Adds general server settings
    app_settings = app[APP_SETTINGS_KEY]
    info: Dict = app_settings.to_client_statics()

    # Adds specifics to front-end app
    frontend_settings: FrontEndAppSettings = app_settings.WEBSERVER_FRONTEND
    info.update(frontend_settings.to_statics())

    # cache computed statics.json
    app[APP_FRONTEND_CACHED_STATICS_JSON_KEY] = json_dumps(info)
Esempio n. 16
0
def envelope_json_response(
        obj: Any,
        status_cls: Type[HTTPException] = web.HTTPOk) -> web.Response:
    # TODO: replace all envelope functionality form packages/service-library/src/servicelib/aiohttp/rest_responses.py
    # TODO: Remove middleware to envelope handler responses at packages/service-library/src/servicelib/aiohttp/rest_middlewares.py: envelope_middleware_factory and use instead this
    # TODO: review error_middleware_factory
    if issubclass(status_cls, HTTPError):
        enveloped = Envelope[Any](error=obj)
    else:
        enveloped = Envelope[Any](data=obj)

    return web.Response(
        text=json_dumps(enveloped.dict(**RESPONSE_MODEL_POLICY)),
        content_type="application/json",
        status=status_cls.status_code,
    )
Esempio n. 17
0
async def update_project_node_outputs(
    app: web.Application,
    user_id: int,
    project_id: str,
    node_id: str,
    new_outputs: Optional[Dict],
    new_run_hash: Optional[str],
) -> Tuple[Dict, List[str]]:
    """
    Updates outputs of a given node in a project with 'data'
    """
    log.debug(
        "updating node %s outputs in project %s for user %s with %s: run_hash [%s]",
        node_id,
        project_id,
        user_id,
        json_dumps(new_outputs),
        new_run_hash,
    )
    new_outputs = new_outputs or {}

    partial_workbench_data = {
        node_id: {
            "outputs": new_outputs,
            "runHash": new_run_hash
        },
    }

    db = app[APP_PROJECT_DBAPI]
    updated_project, changed_entries = await db.patch_user_project_workbench(
        partial_workbench_data=partial_workbench_data,
        user_id=user_id,
        project_uuid=project_id,
    )
    log.debug(
        "patched project %s, following entries changed: %s",
        project_id,
        pformat(changed_entries),
    )
    updated_project = await add_project_states_for_user(
        user_id=user_id, project=updated_project, is_template=False, app=app)

    # changed entries come in the form of {node_uuid: {outputs: {changed_key1: value1, changed_key2: value2}}}
    # we do want only the key names
    changed_keys = changed_entries.get(node_id, {}).get("outputs", {}).keys()
    return updated_project, changed_keys
Esempio n. 18
0
async def check_invitation(invitation: Optional[str], db: AsyncpgStorage,
                           cfg: LoginOptions):
    confirmation = None
    if invitation:
        confirmation = await validate_confirmation_code(invitation, db, cfg)

    if confirmation:
        # FIXME: check if action=invitation??
        log.info(
            "Invitation code used. Deleting %s",
            json_dumps(get_confirmation_info(cfg, confirmation), indent=1),
        )
        await db.delete_confirmation(confirmation)
    else:
        raise web.HTTPForbidden(
            reason=("Invalid invitation code."
                    "Your invitation was already used or might have expired."
                    "Please contact our support team to get a new one."))
Esempio n. 19
0
async def setup_director(app: FastAPI) -> None:
    if settings := app.state.settings.CATALOG_DIRECTOR:
        # init client-api
        logger.debug("Setup director at %s...", settings.base_url)
        director_client = DirectorApi(base_url=settings.base_url, app=app)

        # check that the director is accessible
        async for attempt in AsyncRetrying(**director_startup_retry_policy):
            with attempt:
                if not await director_client.is_responsive():
                    raise ValueError("Director-v0 is not responsive")

                logger.info(
                    "Connection to director-v0 succeded [%s]",
                    json_dumps(attempt.retry_state.retry_object.statistics),
                )

        app.state.director_api = director_client
Esempio n. 20
0
async def send_messages(app: Application, user_id: str,
                        messages: Sequence[SocketMessageDict]) -> None:
    sio: AsyncServer = get_socket_server(app)

    socket_ids: List[str] = []
    with managed_resource(user_id, None, app) as rt:
        socket_ids = await rt.find_socket_ids()

    send_tasks = deque()
    for sid in socket_ids:
        for message in messages:
            send_tasks.append(
                sio.emit(message["event_type"],
                         json_dumps(message["data"]),
                         room=sid))
    await logged_gather(*send_tasks,
                        reraise=False,
                        log=log,
                        max_concurrency=10)
Esempio n. 21
0
    async def patch_user_project_workbench(
            self, partial_workbench_data: Dict[str, Any], user_id: int,
            project_uuid: str) -> Tuple[Dict[str, Any], Dict[str, Any]]:
        """patches an EXISTING project from a user
        new_project_data only contains the entries to modify
        """
        log.info("Patching project %s for user %s", project_uuid, user_id)
        async with self.engine.acquire() as conn:
            async with conn.begin() as _transaction:
                current_project: Dict = await self._get_project(
                    conn,
                    user_id,
                    project_uuid,
                    exclude_foreign=["tags"],
                    include_templates=False,
                    for_update=True,
                )
                user_groups: List[RowProxy] = await self.__load_user_groups(
                    conn, user_id)
                _check_project_permissions(current_project, user_id,
                                           user_groups, "write")

                def _patch_workbench(
                    project: Dict[str,
                                  Any], new_partial_workbench_data: Dict[str,
                                                                         Any]
                ) -> Tuple[Dict[str, Any], Dict[str, Any]]:
                    """patch the project workbench with the values in new_data and returns the changed project and changed values"""
                    changed_entries = {}
                    for node_key, new_node_data in new_partial_workbench_data.items(
                    ):
                        current_node_data = project.get("workbench",
                                                        {}).get(node_key)

                        if current_node_data is None:
                            log.debug(
                                "node %s is missing from project, no patch",
                                node_key)
                            raise NodeNotFoundError(project_uuid, node_key)
                        # find changed keys
                        changed_entries.update({
                            node_key:
                            _find_changed_dict_keys(
                                current_node_data,
                                new_node_data,
                                look_for_removed_keys=False,
                            )
                        })
                        # patch
                        current_node_data.update(new_node_data)
                    return (project, changed_entries)

                new_project_data, changed_entries = _patch_workbench(
                    current_project, partial_workbench_data)

                # update timestamps
                new_project_data["lastChangeDate"] = now_str()

                log.debug(
                    "DB updating with new_project_data=%s",
                    json_dumps(dict(new_project_data)),
                )
                result = await conn.execute(
                    # pylint: disable=no-value-for-parameter
                    projects.update().values(
                        **_convert_to_db_names(new_project_data)
                    ).where(projects.c.id == current_project[projects.c.id.key]
                            ).returning(literal_column("*")))
                project: RowProxy = await result.fetchone()
                log.debug(
                    "DB updated returned row project=%s",
                    json_dumps(dict(project.items())),
                )
                user_email = await self._get_user_email(
                    conn, project.prj_owner)

                tags = await self._get_tags_by_project(
                    conn, project_id=project[projects.c.id])
                return (
                    _convert_to_schema_names(project, user_email, tags=tags),
                    changed_entries,
                )
Esempio n. 22
0
def get_dynamic_sidecar_spec(
    scheduler_data: SchedulerData,
    dynamic_sidecar_settings: DynamicSidecarSettings,
    dynamic_sidecar_network_id: str,
    swarm_network_id: str,
    settings: SimcoreServiceSettingsLabel,
    app_settings: AppSettings,
) -> Dict[str, Any]:
    """
    The dynamic-sidecar is responsible for managing the lifecycle
    of the dynamic service. The director-v2 directly coordinates with
    the dynamic-sidecar for this purpose.
    """
    compose_namespace = get_compose_namespace(scheduler_data.node_uuid)

    mounts = [
        # docker socket needed to use the docker api
        {
            "Source": "/var/run/docker.sock",
            "Target": "/var/run/docker.sock",
            "Type": "bind",
        }
    ]

    # Docker does not allow mounting of subfolders from volumes as the following:
    #   `volume_name/inputs:/target_folder/inputs`
    #   `volume_name/outputs:/target_folder/inputs`
    #   `volume_name/path/to/state/01:/target_folder/path_to_state_01`
    #
    # Two separate volumes are required to achieve the following on the spawned
    # dynamic-sidecar containers:
    #   `volume_name_path_to_inputs:/target_folder/path/to/inputs`
    #   `volume_name_path_to_outputs:/target_folder/path/to/outputs`
    #   `volume_name_path_to_state_01:/target_folder/path/to/state/01`
    for path_to_mount in [
        scheduler_data.paths_mapping.inputs_path,
        scheduler_data.paths_mapping.outputs_path,
    ]:
        mounts.append(
            DynamicSidecarVolumesPathsResolver.mount_entry(
                compose_namespace=compose_namespace,
                path=path_to_mount,
                node_uuid=scheduler_data.node_uuid,
            )
        )
    # state paths now get mounted via different driver and are synced to s3 automatically
    for path_to_mount in scheduler_data.paths_mapping.state_paths:
        # for now only enable this with dev features enabled
        if app_settings.DIRECTOR_V2_DEV_FEATURES_ENABLED:
            mounts.append(
                DynamicSidecarVolumesPathsResolver.mount_r_clone(
                    compose_namespace=compose_namespace,
                    path=path_to_mount,
                    project_id=scheduler_data.project_id,
                    node_uuid=scheduler_data.node_uuid,
                    r_clone_settings=dynamic_sidecar_settings.DYNAMIC_SIDECAR_R_CLONE_SETTINGS,
                )
            )
        else:
            mounts.append(
                DynamicSidecarVolumesPathsResolver.mount_entry(
                    compose_namespace=compose_namespace,
                    path=path_to_mount,
                    node_uuid=scheduler_data.node_uuid,
                )
            )

    endpoint_spec = {}

    if dynamic_sidecar_settings.DYNAMIC_SIDECAR_MOUNT_PATH_DEV is not None:
        dynamic_sidecar_path = dynamic_sidecar_settings.DYNAMIC_SIDECAR_MOUNT_PATH_DEV
        if dynamic_sidecar_path is None:
            log.warning(
                (
                    "Could not mount the sources for the dynamic-sidecar, please "
                    "provide env var named DEV_SIMCORE_DYNAMIC_SIDECAR_PATH"
                )
            )
        else:
            mounts.append(
                {
                    "Source": str(dynamic_sidecar_path),
                    "Target": "/devel/services/dynamic-sidecar",
                    "Type": "bind",
                }
            )
            packages_path = (
                dynamic_sidecar_settings.DYNAMIC_SIDECAR_MOUNT_PATH_DEV
                / ".."
                / ".."
                / "packages"
            )
            mounts.append(
                {
                    "Source": str(packages_path),
                    "Target": "/devel/packages",
                    "Type": "bind",
                }
            )
    # expose this service on an empty port
    if dynamic_sidecar_settings.DYNAMIC_SIDECAR_EXPOSE_PORT:
        endpoint_spec["Ports"] = [
            {
                "Protocol": "tcp",
                "TargetPort": dynamic_sidecar_settings.DYNAMIC_SIDECAR_PORT,
            }
        ]

    create_service_params = {
        "endpoint_spec": endpoint_spec,
        "labels": {
            # TODO: let's use a pydantic model with descriptions
            "io.simcore.zone": scheduler_data.simcore_traefik_zone,
            "port": f"{dynamic_sidecar_settings.DYNAMIC_SIDECAR_PORT}",
            "study_id": f"{scheduler_data.project_id}",
            "traefik.docker.network": scheduler_data.dynamic_sidecar_network_name,  # also used for scheduling
            "traefik.enable": "true",
            f"traefik.http.routers.{scheduler_data.service_name}.entrypoints": "http",
            f"traefik.http.routers.{scheduler_data.service_name}.priority": "10",
            f"traefik.http.routers.{scheduler_data.service_name}.rule": "PathPrefix(`/`)",
            f"traefik.http.services.{scheduler_data.service_name}.loadbalancer.server.port": f"{dynamic_sidecar_settings.DYNAMIC_SIDECAR_PORT}",
            "type": ServiceType.MAIN.value,  # required to be listed as an interactive service and be properly cleaned up
            "user_id": f"{scheduler_data.user_id}",
            # the following are used for scheduling
            "uuid": f"{scheduler_data.node_uuid}",  # also needed for removal when project is closed
            "swarm_stack_name": dynamic_sidecar_settings.SWARM_STACK_NAME,
            "service_key": scheduler_data.key,
            "service_tag": scheduler_data.version,
            "paths_mapping": scheduler_data.paths_mapping.json(),
            "compose_spec": json_dumps(scheduler_data.compose_spec),
            "container_http_entry": scheduler_data.container_http_entry,
            "restart_policy": scheduler_data.restart_policy,
        },
        "name": scheduler_data.service_name,
        "networks": [swarm_network_id, dynamic_sidecar_network_id],
        "task_template": {
            "ContainerSpec": {
                "Env": _get_environment_variables(
                    compose_namespace, scheduler_data, app_settings
                ),
                "Hosts": [],
                "Image": dynamic_sidecar_settings.DYNAMIC_SIDECAR_IMAGE,
                "Init": True,
                "Labels": {},
                "Mounts": mounts,
            },
            "Placement": {"Constraints": []},
            "RestartPolicy": {
                "Condition": "on-failure",
                "Delay": 5000000,
                "MaxAttempts": 2,
            },
            # this will get overwritten
            "Resources": {
                "Limits": {"NanoCPUs": 2 * pow(10, 9), "MemoryBytes": 1 * pow(1024, 3)},
                "Reservations": {
                    "NanoCPUs": 1 * pow(10, 8),
                    "MemoryBytes": 500 * pow(1024, 2),
                },
            },
        },
    }

    inject_settings_to_create_service_params(
        labels_service_settings=settings,
        create_service_params=create_service_params,
    )

    return create_service_params
Esempio n. 23
0
    async def action(cls, app: FastAPI, scheduler_data: SchedulerData) -> None:
        logger.debug(
            "Getting docker compose spec for service %s", scheduler_data.service_name
        )

        dynamic_sidecar_client = get_dynamic_sidecar_client(app)
        dynamic_sidecar_endpoint = scheduler_data.dynamic_sidecar.endpoint

        # Starts dynamic SIDECAR -------------------------------------
        # creates a docker compose spec given the service key and tag
        compose_spec = assemble_spec(
            app=app,
            service_key=scheduler_data.key,
            service_tag=scheduler_data.version,
            paths_mapping=scheduler_data.paths_mapping,
            compose_spec=scheduler_data.compose_spec,
            container_http_entry=scheduler_data.container_http_entry,
            dynamic_sidecar_network_name=scheduler_data.dynamic_sidecar_network_name,
        )

        await dynamic_sidecar_client.start_service_creation(
            dynamic_sidecar_endpoint, compose_spec
        )

        # Starts PROXY -----------------------------------------------
        # The entrypoint container name was now computed
        # continue starting the proxy

        # check values have been set by previous step
        if (
            scheduler_data.dynamic_sidecar.dynamic_sidecar_id is None
            or scheduler_data.dynamic_sidecar.dynamic_sidecar_network_id is None
            or scheduler_data.dynamic_sidecar.swarm_network_id is None
            or scheduler_data.dynamic_sidecar.swarm_network_name is None
        ):
            raise ValueError(
                (
                    "Expected a value for all the following values: "
                    f"{scheduler_data.dynamic_sidecar.dynamic_sidecar_id=} "
                    f"{scheduler_data.dynamic_sidecar.dynamic_sidecar_network_id=} "
                    f"{scheduler_data.dynamic_sidecar.swarm_network_id=} "
                    f"{scheduler_data.dynamic_sidecar.swarm_network_name=}"
                )
            )

        dynamic_sidecar_settings: DynamicSidecarSettings = (
            app.state.settings.DYNAMIC_SERVICES.DYNAMIC_SIDECAR
        )

        async for attempt in AsyncRetrying(
            stop=stop_after_delay(
                dynamic_sidecar_settings.DYNAMIC_SIDECAR_WAIT_FOR_CONTAINERS_TO_START
            ),
            wait=wait_fixed(1),
            retry_error_cls=EntrypointContainerNotFoundError,
            before_sleep=before_sleep_log(logger, logging.WARNING),
        ):
            with attempt:
                if scheduler_data.dynamic_sidecar.service_removal_state.was_removed:
                    # the service was removed while waiting for the operation to finish
                    logger.warning(
                        "Stopping `get_entrypoint_container_name` operation. "
                        "Will no try to start the service."
                    )
                    return

                entrypoint_container = await dynamic_sidecar_client.get_entrypoint_container_name(
                    dynamic_sidecar_endpoint=dynamic_sidecar_endpoint,
                    dynamic_sidecar_network_name=scheduler_data.dynamic_sidecar_network_name,
                )
                logger.info(
                    "Fetched container entrypoint name %s", entrypoint_container
                )

        dynamic_sidecar_node_id = await get_node_id_from_task_for_service(
            scheduler_data.dynamic_sidecar.dynamic_sidecar_id, dynamic_sidecar_settings
        )

        dynamic_sidecar_proxy_create_service_params = get_dynamic_proxy_spec(
            scheduler_data=scheduler_data,
            dynamic_sidecar_settings=dynamic_sidecar_settings,
            dynamic_sidecar_network_id=scheduler_data.dynamic_sidecar.dynamic_sidecar_network_id,
            swarm_network_id=scheduler_data.dynamic_sidecar.swarm_network_id,
            swarm_network_name=scheduler_data.dynamic_sidecar.swarm_network_name,
            dynamic_sidecar_node_id=dynamic_sidecar_node_id,
            entrypoint_container_name=entrypoint_container,
            service_port=scheduler_data.service_port,
        )

        logger.debug(
            "dynamic-sidecar-proxy create_service_params %s",
            json_dumps(dynamic_sidecar_proxy_create_service_params),
        )

        # no need for the id any longer
        await create_service_and_get_id(dynamic_sidecar_proxy_create_service_params)
        scheduler_data.dynamic_sidecar.were_services_created = True

        scheduler_data.dynamic_sidecar.was_compose_spec_submitted = True
Esempio n. 24
0
    async def action(cls, app: FastAPI, scheduler_data: SchedulerData) -> None:
        dynamic_sidecar_settings: DynamicSidecarSettings = (
            app.state.settings.DYNAMIC_SERVICES.DYNAMIC_SIDECAR
        )
        # the dynamic-sidecar should merge all the settings, especially:
        # resources and placement derived from all the images in
        # the provided docker-compose spec
        # also other encodes the env vars to target the proper container
        director_v0_client: DirectorV0Client = _get_director_v0_client(app)

        # fetching project form DB and fetching user settings
        projects_repository = _fetch_repo_outside_of_request(app, ProjectsRepository)
        project: ProjectAtDB = await projects_repository.get_project(
            project_id=scheduler_data.project_id
        )

        node_uuid_str = str(scheduler_data.node_uuid)
        node: Optional[Node] = project.workbench.get(node_uuid_str)
        boot_options = (
            node.boot_options
            if node is not None and node.boot_options is not None
            else {}
        )
        logger.info("%s", f"{boot_options=}")

        settings: SimcoreServiceSettingsLabel = await merge_settings_before_use(
            director_v0_client=director_v0_client,
            service_key=scheduler_data.key,
            service_tag=scheduler_data.version,
            service_user_selection_boot_options=boot_options,
        )

        # these configuration should guarantee 245 address network
        network_config = {
            "Name": scheduler_data.dynamic_sidecar_network_name,
            "Driver": "overlay",
            "Labels": {
                "io.simcore.zone": f"{dynamic_sidecar_settings.TRAEFIK_SIMCORE_ZONE}",
                "com.simcore.description": f"interactive for node: {scheduler_data.node_uuid}",
                "uuid": f"{scheduler_data.node_uuid}",  # needed for removal when project is closed
            },
            "Attachable": True,
            "Internal": False,
        }
        dynamic_sidecar_network_id = await create_network(network_config)

        # attach the service to the swarm network dedicated to services
        swarm_network: Dict[str, Any] = await get_swarm_network(
            dynamic_sidecar_settings
        )
        swarm_network_id: str = swarm_network["Id"]
        swarm_network_name: str = swarm_network["Name"]

        # start dynamic-sidecar and run the proxy on the same node
        dynamic_sidecar_create_service_params = get_dynamic_sidecar_spec(
            scheduler_data=scheduler_data,
            dynamic_sidecar_settings=dynamic_sidecar_settings,
            dynamic_sidecar_network_id=dynamic_sidecar_network_id,
            swarm_network_id=swarm_network_id,
            settings=settings,
            app_settings=app.state.settings,
        )
        logger.debug(
            "dynamic-sidecar create_service_params %s",
            json_dumps(dynamic_sidecar_create_service_params),
        )

        dynamic_sidecar_id = await create_service_and_get_id(
            dynamic_sidecar_create_service_params
        )

        # update service_port and assing it to the status
        # needed by CreateUserServices action
        scheduler_data.service_port = extract_service_port_from_compose_start_spec(
            dynamic_sidecar_create_service_params
        )

        # finally mark services created
        scheduler_data.dynamic_sidecar.dynamic_sidecar_id = dynamic_sidecar_id
        scheduler_data.dynamic_sidecar.dynamic_sidecar_network_id = (
            dynamic_sidecar_network_id
        )
        scheduler_data.dynamic_sidecar.swarm_network_id = swarm_network_id
        scheduler_data.dynamic_sidecar.swarm_network_name = swarm_network_name
        scheduler_data.dynamic_sidecar.was_dynamic_sidecar_started = True
Esempio n. 25
0
                ComputationalBackendNotConnectedError,
                ComputationalSchedulerChangedError,
        ):
            # cleanup and re-raise
            if dask_client := self._cluster_to_client_map.pop(
                    cluster.id, None):
                await dask_client.delete()
            raise
        except Exception as exc:
            # cleanup and re-raise
            if dask_client := self._cluster_to_client_map.pop(
                    cluster.id, None):
                await dask_client.delete()
            logger.error(
                "could not create/access dask computational cluster %s",
                json_dumps(cluster),
            )
            raise DaskClientAcquisisitonError(cluster=cluster,
                                              error=exc) from exc


def setup(app: FastAPI, settings: DaskSchedulerSettings) -> None:
    async def on_startup() -> None:
        app.state.dask_clients_pool = await DaskClientsPool.create(
            app=app, settings=settings)

    async def on_shutdown() -> None:
        if app.state.dask_clients_pool:
            await app.state.dask_clients_pool.delete()

    app.add_event_handler("startup", on_startup)
Esempio n. 26
0
    ) -> List[str]:
        return [r for r in task_resources if r not in cluster_resources]

    cluster_resources_counter = collections.Counter()
    can_a_worker_run_task = False
    for worker in workers:
        worker_resources = workers[worker].get("resources", {})
        cluster_resources_counter.update(worker_resources)
        if can_task_run_on_worker(task_resources, worker_resources):
            can_a_worker_run_task = True
    all_available_resources_in_cluster = dict(cluster_resources_counter)

    logger.debug(
        "Dask scheduler total available resources in cluster %s: %s, task needed resources %s",
        cluster_id,
        json_dumps(all_available_resources_in_cluster, indent=2),
        json_dumps(task_resources, indent=2),
    )

    if can_a_worker_run_task:
        return

    # check if we have missing resources
    if missing_resources := cluster_missing_resources(
        task_resources, all_available_resources_in_cluster
    ):
        raise MissingComputationalResourcesError(
            node_id=node_id,
            msg=f"Service {node_image.name}:{node_image.tag} cannot be scheduled "
            f"on cluster {cluster_id}: task needs '{task_resources}', "
            f"cluster has '{all_available_resources_in_cluster}', missing: '{missing_resources}'",
Esempio n. 27
0
    async def replace_user_project(
        self,
        new_project_data: Dict[str, Any],
        user_id: int,
        project_uuid: str,
        include_templates: Optional[bool] = False,
    ) -> Dict[str, Any]:
        """replaces a project from a user
        this method completely replaces a user project with new_project_data only keeping
        the old entries from the project workbench if they exists in the new project workbench.
        """
        log.info("Updating project %s for user %s", project_uuid, user_id)

        async with self.engine.acquire() as conn:
            async with conn.begin() as _transaction:
                current_project: Dict = await self._get_project(
                    conn,
                    user_id,
                    project_uuid,
                    exclude_foreign=["tags"],
                    include_templates=include_templates,
                    for_update=True,
                )
                user_groups: List[RowProxy] = await self.__load_user_groups(
                    conn, user_id)
                _check_project_permissions(current_project, user_id,
                                           user_groups, "write")
                # uuid can ONLY be set upon creation
                if current_project["uuid"] != new_project_data["uuid"]:
                    raise ProjectInvalidRightsError(user_id,
                                                    new_project_data["uuid"])
                # ensure the prj owner is always in the access rights
                owner_primary_gid = await self._get_user_primary_group_gid(
                    conn, current_project[projects.c.prj_owner.key])
                new_project_data.setdefault("accessRights", {}).update(
                    _create_project_access_rights(owner_primary_gid,
                                                  ProjectAccessRights.OWNER))

                # update the workbench
                def _update_workbench(old_project: Dict[str, Any],
                                      new_project: Dict[str, Any]) -> None:
                    # any non set entry in the new workbench is taken from the old one if available
                    old_workbench = old_project["workbench"]
                    new_workbench = new_project["workbench"]
                    for node_key, node in new_workbench.items():
                        old_node = old_workbench.get(node_key)
                        if not old_node:
                            continue
                        for prop in old_node:
                            # check if the key is missing in the new node
                            if prop not in node:
                                # use the old value
                                node[prop] = old_node[prop]
                    return new_project

                _update_workbench(current_project, new_project_data)

                # update timestamps
                new_project_data["lastChangeDate"] = now_str()

                # now update it

                log.debug("DB updating with new_project_data=%s",
                          json_dumps(new_project_data))
                result = await conn.execute(
                    # pylint: disable=no-value-for-parameter
                    projects.update().values(
                        **_convert_to_db_names(new_project_data)
                    ).where(projects.c.id == current_project[projects.c.id.key]
                            ).returning(literal_column("*")))
                project: RowProxy = await result.fetchone()
                log.debug(
                    "DB updated returned row project=%s",
                    json_dumps(dict(project.items())),
                )
                user_email = await self._get_user_email(
                    conn, project.prj_owner)

                tags = await self._get_tags_by_project(
                    conn, project_id=project[projects.c.id])
                return _convert_to_schema_names(project, user_email, tags=tags)
Esempio n. 28
0
        update={
            # TODO: HACK to overcome export from None -> string
            # SOLUTION 1: thumbnail should not be required (check with team!)
            # SOLUTION 2: make thumbnail nullable
            "thumbnail": faker.image_url(),
        }
    )
    assert new_project.workbench is not None
    assert new_project.workbench
    node = new_project.workbench["fc9208d9-1a0a-430c-9951-9feaf1de3368"]
    assert node.inputs
    node.inputs["linspace_stop"] = 4

    resp = await client.put(
        f"/v0/projects/{project_uuid}",
        data=json_dumps(new_project.dict(**REQUEST_MODEL_POLICY)),
    )
    assert resp.status == HTTPStatus.OK, await resp.text()

    # RUN again them ---------------------------------------------------------------------------
    resp = await client.post(
        f"/v0/computation/pipeline/{project_uuid}:start",
        json=RUN_PROJECT.request_payload,
    )
    data, _ = await assert_status(resp, web.HTTPCreated)
    assert project_uuid == data["pipeline_id"]
    ref_ids = data["ref_ids"]
    assert len(ref_ids) == 4

    # GET iterations -----------------------------------------------------------------
    # check iters 1, 2 and 3 share working copies
def test_serialization_of_nested_dicts(fake_data_dict: Dict[str, Any]):

    obj = {"data": fake_data_dict, "ids": [uuid4() for _ in range(3)]}

    dump = json_dumps(obj)
    assert json.loads(dump) == export_uuids_to_str(obj)