async def close_project(request: web.Request) -> web.Response: # TODO: replace by decorator since it checks again authentication await check_permission(request, "project.close") user_id = request[RQT_USERID_KEY] project_uuid = request.match_info.get("project_id") client_session_id = await request.json() try: # ensure the project exists # TODO: temporary hidden until get_handlers_from_namespace refactor to seek marked functions instead! from .projects_api import get_project_for_user with managed_resource(user_id, client_session_id, request.app) as rt: await get_project_for_user( request.app, project_uuid=project_uuid, user_id=user_id, include_templates=True, ) await rt.remove("project_id") other_users = await rt.find_users_of_resource( "project_id", project_uuid) if not other_users: # only remove the services if no one else is using them now fire_and_forget_task( projects_api.remove_project_interactive_services( user_id, project_uuid, request.app)) raise web.HTTPNoContent(content_type="application/json") except ProjectNotFoundError: raise web.HTTPNotFound(reason=f"Project {project_uuid} not found")
async def upload_link(self, user_id: str, file_uuid: str): @retry(**postgres_service_retry_policy_kwargs) async def _execute_query() -> Tuple[int, str]: async with self.engine.acquire() as conn: fmd = FileMetaData() fmd.simcore_from_uuid(file_uuid, self.simcore_bucket_name) fmd.user_id = user_id query = sa.select([ file_meta_data ]).where(file_meta_data.c.file_uuid == file_uuid) # if file already exists, we might want to update a time-stamp rows = await conn.execute(query) exists = await rows.scalar() if exists is None: ins = file_meta_data.insert().values(**vars(fmd)) await conn.execute(ins) return fmd.file_size, fmd.last_modified file_size, last_modified = await _execute_query() bucket_name = self.simcore_bucket_name object_name = file_uuid # a parallel task is tarted which will update the metadata of the updated file # once the update has finished. fire_and_forget_task( self.metadata_file_updater( file_uuid=file_uuid, bucket_name=bucket_name, object_name=object_name, file_size=file_size, last_modified=last_modified, )) return self.s3_client.create_presigned_put_url(bucket_name, object_name)
async def on_user_logout(user_id: str, client_session_id: Optional[str], app: web.Application) -> None: log.debug("user %s must be disconnected", user_id) # find the sockets related to the user sio: AsyncServer = get_socket_server(app) with managed_resource(user_id, client_session_id, app) as rt: # start by disconnecting this client if possible if client_session_id: if socket_id := await rt.get_socket_id(): try: await sio.disconnect(sid=socket_id) except KeyError as exc: log.warning( "Disconnection of socket id '%s' failed. socket id could not be found: [%s]", socket_id, exc, ) # trigger faster gc on disconnect await rt.user_pressed_disconnect() # now let's give a chance to all the clients to properly logout sockets = await rt.find_socket_ids() if sockets: # let's do it as a task so it does not block us here fire_and_forget_task(disconnect_other_sockets(sio, sockets))
async def delete_project(request: web.Request, project_uuid: str, user_id: int) -> None: await delete_project_from_db(request.app, project_uuid, user_id) async def remove_services_and_data(): await remove_project_interactive_services(user_id, project_uuid, request.app) await delete_project_data(request, project_uuid, user_id) fire_and_forget_task(remove_services_and_data())
async def delete_project(app: web.Application, project_uuid: str, user_id: int) -> None: await delete_project_from_db(app, project_uuid, user_id) async def _remove_services_and_data(): await remove_project_interactive_services( user_id, project_uuid, app, notify_users=False ) await delete_project_data(app, project_uuid, user_id) fire_and_forget_task(_remove_services_and_data())
async def upload_link(self, user_id: str, file_uuid: str): """ Creates pre-signed upload link and updates metadata table when link is used and upload is successfuly completed SEE _metadata_file_updater """ async with self.engine.acquire() as conn: can: Optional[AccessRights] = await get_file_access_rights( conn, int(user_id), file_uuid) if not can.write: logger.debug("User %s was not allowed to upload file %s", user_id, file_uuid) raise web.HTTPForbidden( reason= f"User does not have enough access rights to upload file {file_uuid}" ) @retry(**postgres_service_retry_policy_kwargs) async def _init_metadata() -> Tuple[int, str]: async with self.engine.acquire() as conn: fmd = FileMetaData() fmd.simcore_from_uuid(file_uuid, self.simcore_bucket_name) fmd.user_id = user_id # NOTE: takes ownership of uploaded data # if file already exists, we might want to update a time-stamp # upsert file_meta_data insert_stmt = pg_insert(file_meta_data).values(**vars(fmd)) do_nothing_stmt = insert_stmt.on_conflict_do_nothing( index_elements=["file_uuid"]) await conn.execute(do_nothing_stmt) return fmd.file_size, fmd.last_modified file_size, last_modified = await _init_metadata() bucket_name = self.simcore_bucket_name object_name = file_uuid # a parallel task is tarted which will update the metadata of the updated file # once the update has finished. fire_and_forget_task( self._metadata_file_updater( file_uuid=file_uuid, bucket_name=bucket_name, object_name=object_name, file_size=file_size, last_modified=last_modified, )) return self.s3_client.create_presigned_put_url(bucket_name, object_name)
async def post_messages(app: Application, user_id: str, messages: Dict[str, Any]) -> None: sio: AsyncServer = get_socket_server(app) with managed_resource(user_id, None, app) as registry: socket_ids: List[str] = await registry.find_socket_ids() for sid in socket_ids: # We only send the data to the right sockets # Notice that there might be several tabs open for event_name, data in messages.items(): fire_and_forget_task( sio.emit(event_name, json.dumps(data), room=sid))
async def _start_monitoring_container( self, container: DockerContainer) -> asyncio.Future: log_file = self.shared_folders.log_folder / "log.dat" if self.integration_version == version.parse("0.0.0"): # touch output file, so it's ready for the container (v0) log_file.touch() log_processor_task = fire_and_forget_task( monitor_logs_task(log_file, self._post_messages)) return log_processor_task log_processor_task = fire_and_forget_task( monitor_logs_task(container, self._post_messages, log_file)) return log_processor_task
async def close_project(request: web.Request) -> web.Response: user_id = request[RQT_USERID_KEY] project_uuid = request.match_info.get("project_id") client_session_id = await request.json() try: # ensure the project exists project = await projects_api.get_project_for_user( request.app, project_uuid=project_uuid, user_id=user_id, include_templates=True, include_state=False, ) # if we are the only user left we can safely remove the services async def _close_project_task() -> None: try: project_opened_by_others: bool = False with managed_resource(user_id, client_session_id, request.app) as rt: project_users: List[int] = await rt.find_users_of_resource( "project_id", project_uuid) project_opened_by_others = len(project_users) > 1 if not project_opened_by_others: # only remove the services if no one else is using them now await projects_api.remove_project_interactive_services( user_id, project_uuid, request.app) finally: with managed_resource(user_id, client_session_id, request.app) as rt: # now we can remove the lock await rt.remove("project_id") # ensure we notify the user whatever happens, the GC should take care of dangling services in case of issue project[ "state"] = await projects_api.get_project_state_for_user( user_id, project_uuid, request.app) await projects_api.notify_project_state_update( request.app, project) fire_and_forget_task(_close_project_task()) raise web.HTTPNoContent(content_type="application/json") except ProjectNotFoundError as exc: raise web.HTTPNotFound( reason=f"Project {project_uuid} not found") from exc
async def user_logged_out(user_id: str, client_session_id: Optional[str], app: web.Application) -> None: log.debug("user %s must be disconnected", user_id) # find the sockets related to the user sio = get_socket_server(app) with managed_resource(user_id, client_session_id, app) as rt: # start by disconnecting this client if possible if client_session_id: socket_id = await rt.get_socket_id() if socket_id: await sio.disconnect(sid=socket_id) # trigger faster gc on disconnect await rt.user_pressed_disconnect() # now let's give a chance to all the clients to properly logout sockets = await rt.find_socket_ids() if sockets: # let's do it as a task so it does not block us here fire_and_forget_task(disconnect_other_sockets(sio, sockets))
async def try_close_project_for_user( user_id: int, project_uuid: str, client_session_id: str, app: web.Application, ): with managed_resource(user_id, client_session_id, app) as rt: user_to_session_ids: List[ UserSessionID] = await rt.find_users_of_resource( PROJECT_ID_KEY, project_uuid) # first check we have it opened now if UserSessionID(user_id, client_session_id) not in user_to_session_ids: # nothing to do the project is already closed log.warning( "project [%s] is already closed for user [%s].", project_uuid, user_id, ) return # remove the project from our list of opened ones log.debug("removing project [%s] from user [%s] resources", project_uuid, user_id) await rt.remove(PROJECT_ID_KEY) # check it is not opened by someone else user_to_session_ids.remove(UserSessionID(user_id, client_session_id)) log.debug("remaining user_to_session_ids: %s", user_to_session_ids) if not user_to_session_ids: # NOTE: depending on the garbage collector speed, it might already be removing it fire_and_forget_task( remove_project_interactive_services(user_id, project_uuid, app)) else: log.warning( "project [%s] is used by other users: [%s]. This should not be possible", project_uuid, {user_session.user_id for user_session in user_to_session_ids}, )
async def post_group_messages(app: Application, room: str, messages: Dict[str, Any]) -> None: sio: AsyncServer = get_socket_server(app) for event_name, data in messages.items(): fire_and_forget_task(sio.emit(event_name, json.dumps(data), room=room))
async def _run_container(self): start_time = time.perf_counter() container = None docker_image = f"{config.DOCKER_REGISTRY}/{self.task.image['name']}:{self.task.image['tag']}" # NOTE: Env/Binds for log folder is only necessary for integraion "0" docker_container_config = { "Env": [ f"{name.upper()}_FOLDER=/{name}/{self.task.job_id}" for name in ["input", "output", "log"] ], "Cmd": "run", "Image": docker_image, "Labels": { "user_id": str(self.user_id), "study_id": str(self.task.project_id), "node_id": str(self.task.node_id), "nano_cpus_limit": str(config.SERVICES_MAX_NANO_CPUS), "mem_limit": str(config.SERVICES_MAX_MEMORY_BYTES), }, "HostConfig": { "Memory": config.SERVICES_MAX_MEMORY_BYTES, "NanoCPUs": config.SERVICES_MAX_NANO_CPUS, "Init": True, "AutoRemove": False, "Binds": [ f"{config.SIDECAR_DOCKER_VOLUME_INPUT}:/input", f"{config.SIDECAR_DOCKER_VOLUME_OUTPUT}:/output", f"{config.SIDECAR_DOCKER_VOLUME_LOG}:/log", ], }, } log.debug("Running image %s with config %s", docker_image, docker_container_config) # volume paths for car container (w/o prefix) result = "FAILURE" try: docker_client: aiodocker.Docker = aiodocker.Docker() await self._post_messages( LogType.LOG, f"[sidecar]Running {self.task.image['name']}:{self.task.image['tag']}...", ) container = await docker_client.containers.create( config=docker_container_config) # start monitoring logs if self.integration_version == version.parse("0.0.0"): # touch output file, so it's ready for the container (v0) log_file = self.shared_folders.log_folder / "log.dat" log_file.touch() log_processor_task = fire_and_forget_task( monitor_logs_task(log_file, self._post_messages)) else: log_processor_task = fire_and_forget_task( monitor_logs_task(container, self._post_messages)) # start the container await container.start() # indicate container is started await self.rabbit_mq.post_instrumentation_message({ "metrics": "service_started", "user_id": self.user_id, "project_id": self.task.project_id, "service_uuid": self.task.node_id, "service_type": "COMPUTATIONAL", "service_key": self.task.image["name"], "service_tag": self.task.image["tag"], }) # wait until the container finished, either success or fail or timeout container_data = await container.show() while container_data["State"]["Running"]: # reload container data container_data = await container.show() if ((time.perf_counter() - start_time) > config.SERVICES_TIMEOUT_SECONDS and config.SERVICES_TIMEOUT_SECONDS > 0): log.error( "Running container timed-out after %ss and will be stopped now\nlogs: %s", config.SERVICES_TIMEOUT_SECONDS, container.log(stdout=True, stderr=True), ) await container.stop() break # reload container data to check the error code with latest info container_data = await container.show() if container_data["State"]["ExitCode"] > 0: raise exceptions.SidecarException( f"{docker_image} completed with error code {container_data['State']['ExitCode']}: {container_data['State']['Error']}" ) # ensure progress 1.0 is sent await self._post_messages(LogType.PROGRESS, "1.0") result = "SUCCESS" log.info("%s completed with successfully!", docker_image) except aiodocker.exceptions.DockerContainerError: log.exception( "Error while running %s with parameters %s", docker_image, docker_container_config, ) except aiodocker.exceptions.DockerError: log.exception( "Unknown error while trying to run %s with parameters %s", docker_image, docker_container_config, ) finally: stop_time = time.perf_counter() log.info("Running %s took %sseconds", docker_image, stop_time - start_time) if container: # clean up the container await container.delete(force=True) # stop monitoring logs now log_processor_task.cancel() # instrumentation await self.rabbit_mq.post_instrumentation_message({ "metrics": "service_stopped", "user_id": self.user_id, "project_id": self.task.project_id, "service_uuid": self.task.node_id, "service_type": "COMPUTATIONAL", "service_key": self.task.image["name"], "service_tag": self.task.image["tag"], "result": result, }) await log_processor_task
async def post_group_messages(app: Application, room: str, messages: Sequence[SocketMessageDict]) -> None: fire_and_forget_task(send_group_messages(app, room, messages))
async def post_messages(app: Application, user_id: str, messages: Sequence[SocketMessageDict]) -> None: fire_and_forget_task(send_messages(app, user_id, messages))