def make_retry_decorator( retries: int, delay: float ) -> typing.Callable[[typing.Callable], typing.Callable]: return retry(wait=wait_fixed(delay), retry=(retry_if_result(lambda res: res.status >= 500) | retry_if_exception_type( exception_types=aiohttp.ClientError) ), stop=stop_after_attempt(retries + 1))
def __init__(self, logger: Optional[logging.Logger] = None): logger = logger or log self.kwargs = dict( retry=retry_if_exception_type(DatabaseError), wait=wait_fixed(self.WAIT_SECS), stop=stop_after_attempt(self.ATTEMPTS_COUNT), after=after_log(logger, logging.WARNING), retry_error_callback=raise_http_unavailable_error, )
async def _wait_for_call(mocked_fct): async for attempt in AsyncRetrying( stop=stop_after_delay(10), wait=wait_random(0, 1), retry=retry_if_exception_type(AssertionError), reraise=True, ): with attempt: print(f"waiting for call in mocked fct {mocked_fct}, " f"Attempt={attempt.retry_state.attempt_number}") mocked_fct.assert_called_once()
async def assert_and_wait_for_pipeline_status( client: httpx.AsyncClient, url: AnyHttpUrl, user_id: UserID, project_uuid: UUID, wait_for_states: Optional[List[RunningState]] = None, ) -> ComputationTaskGet: if not wait_for_states: wait_for_states = [ RunningState.SUCCESS, RunningState.FAILED, RunningState.ABORTED, ] MAX_TIMEOUT_S = 5 * MINUTE async def check_pipeline_state() -> ComputationTaskGet: response = await client.get(url, params={"user_id": user_id}) assert ( response.status_code == status.HTTP_202_ACCEPTED ), f"response code is {response.status_code}, error: {response.text}" task_out = ComputationTaskGet.parse_obj(response.json()) assert task_out.id == project_uuid assert task_out.url == f"{client.base_url}/v2/computations/{project_uuid}" print( f"Pipeline '{project_uuid=}' current task out is '{task_out=}'", ) assert wait_for_states assert ( task_out.state in wait_for_states ), f"current task state is '{task_out.state}', not in any of {wait_for_states}" return task_out start = time.monotonic() async for attempt in AsyncRetrying( stop=stop_after_delay(MAX_TIMEOUT_S), wait=wait_fixed(2), retry=retry_if_exception_type(AssertionError), reraise=True, ): elapsed_s = time.monotonic() - start with attempt: print( f"Waiting for pipeline '{project_uuid=}' state to be one of: {wait_for_states=}, attempt={attempt.retry_state.attempt_number}, time={elapsed_s}s" ) task_out = await check_pipeline_state() print( f"Pipeline '{project_uuid=}' state succesfuly became '{task_out.state}'\n{json.dumps(attempt.retry_state.retry_object.statistics, indent=2)}, time={elapsed_s}s" ) return task_out # this is only to satisfy pylance raise AssertionError("No computation task generated!")
def wait_for(func, step=1, text=None, timeout=None, throw_exc=False, **kwargs): """ Wrapper function to wait with timeout option. If timeout received, avocado 'wait_for' method will be used. Otherwise the below function will be called. :param func: Function to evaluate. :param step: Time to sleep between attempts in seconds :param text: Text to print while waiting, for debug purposes :param timeout: Timeout in seconds :param throw_exc: Raise exception if timeout expired, but func result is not True :param kwargs: Keyword arguments to func :return: Return value of func. """ if not timeout: return forever_wait_for(func, step, text, **kwargs) res = None def retry_logger(retry_state): # pylint: disable=protected-access LOGGER.debug('wait_for: Retrying {}: attempt {} ended with: {}'.format( text if text else retry_state.fn.__name__, retry_state.attempt_number, str(retry_state.outcome._exception) if retry_state.outcome._exception else retry_state.outcome._result)) try: retry = tenacity.Retrying( reraise=throw_exc, stop=tenacity.stop_after_delay(timeout), wait=tenacity.wait_fixed(step), before_sleep=retry_logger, retry=(retry_if_result(lambda value: not value) | retry_if_exception_type())) res = retry.call(func, **kwargs) except Exception as ex: # pylint: disable=broad-except err = 'Wait for: {}: timeout - {} seconds - expired'.format( text if text else func.__name__, timeout) LOGGER.error(err) if hasattr(ex, 'last_attempt') and ex.last_attempt.exception() is not None: # pylint: disable=no-member LOGGER.error("last error: %s", repr(ex.last_attempt.exception())) # pylint: disable=no-member else: LOGGER.error("last error: %s", repr(ex)) if throw_exc: if hasattr(ex, 'last_attempt') and not ex.last_attempt._result: # pylint: disable=protected-access,no-member raise RetryError(err) raise return res
def wait_for_status( method: Callable[..., T], validate: Optional[Callable[[T], bool]] = None, fixed_wait_time: float = 5, timeout: float = 300, **method_kwargs: Any, ) -> T: """Tries to run *method* (and run also a validation of its output) until no AssertionError is raised. Arguments are described below. More keyword arguments can be given for `method`. Args: method (Callable): An unreliable method (or a status query). The method will be executed while: \ (it raises an AssetionError or the `validation` function outputs `False`) and none of the ending \ conditions is satisfied (look at int arguments) validate (Optional[Callable], optional): A callable that validates the output of *method*. \ It must receives the output of `method` as argument and returns `True` if it is ok and `False` if it is \ invalid. Defaults to None, meaning no validation will be executed. fixed_wait_time (float, optional): Time (in seconds) to wait between attempts. Defaults to 5. timeout (float, optional): Time (in seconds) after which no more attempts are made. Defaults to 300 (5 minutes). Returns: [Any]: The method's output """ @retry( wait=wait_fixed(fixed_wait_time), stop=stop_after_delay(timeout), retry=retry_if_exception_type(AssertionError), ) def _wait_for_status( method: Callable[..., T], validate: Optional[Callable[[T], bool]] = None, **method_kwargs: Any, ) -> T: """Runs the method and apply validation.""" try: result: T = method(**method_kwargs) if validate is not None: assert validate( result), f'Validation failed. Result is {result}' except Exception as ex: if not isinstance(ex, AssertionError): print( f'An unexpected error was detected, method result was: {result}' ) raise return result return _wait_for_status(method=method, validate=validate, **method_kwargs)
def wait_for(func, step=1, text=None, timeout=None, throw_exc=True, **kwargs): """ Wrapper function to wait with timeout option. :param func: Function to evaluate. :param step: Time to sleep between attempts in seconds :param text: Text to print while waiting, for debug purposes :param timeout: Timeout in seconds :param throw_exc: Raise exception if timeout expired, but func result is not True :param kwargs: Keyword arguments to func :return: Return value of func. """ if not timeout: return forever_wait_for(func, step, text, **kwargs) res = None def retry_logger(retry_state): # pylint: disable=protected-access LOGGER.debug( 'wait_for: Retrying %s: attempt %s ended with: %s', text or retry_state.fn.__name__, retry_state.attempt_number, retry_state.outcome._exception or retry_state.outcome._result, ) try: retry = tenacity.Retrying( reraise=throw_exc, stop=tenacity.stop_after_delay(timeout), wait=tenacity.wait_fixed(step), before_sleep=retry_logger, retry=(retry_if_result(lambda value: not value) | retry_if_exception_type())) res = retry(func, **kwargs) except Exception as ex: # pylint: disable=broad-except err = f"Wait for: {text or func.__name__}: timeout - {timeout} seconds - expired" LOGGER.error(err) if hasattr(ex, 'last_attempt') and ex.last_attempt.exception() is not None: # pylint: disable=no-member LOGGER.error("last error: %r", ex.last_attempt.exception()) # pylint: disable=no-member else: LOGGER.error("last error: %r", ex) if throw_exc: if hasattr(ex, 'last_attempt') and not ex.last_attempt._result: # pylint: disable=protected-access,no-member raise RetryError(err) from ex raise return res
async def _assert_and_wait_for_comp_task_states_to_be_transmitted_in_projects( project_id: str, postgres_session: sa.orm.session.Session, ): async for attempt in AsyncRetrying( reraise=True, stop=stop_after_delay(120), wait=wait_fixed(5), retry=retry_if_exception_type(AssertionError), ): with attempt: print( f"--> waiting for pipeline results to move to projects table, attempt {attempt.retry_state.attempt_number}..." ) comp_tasks_in_db: Dict[NodeIdStr, Any] = _get_computational_tasks_from_db( project_id, postgres_session) workbench_in_db: Dict[NodeIdStr, Any] = _get_project_workbench_from_db( project_id, postgres_session) for node_id, node_values in comp_tasks_in_db.items(): assert ( node_id in workbench_in_db ), f"node {node_id=} is missing from workbench {json_dumps(workbench_in_db, indent=2)}" node_in_project_table = workbench_in_db[node_id] # if this one is in, the other should also be but let's check it carefully assert node_values.run_hash assert "runHash" in node_in_project_table assert node_values.run_hash == node_in_project_table["runHash"] assert node_values.state assert "state" in node_in_project_table assert "currentStatus" in node_in_project_table["state"] # NOTE: beware that the comp_tasks has StateType and Workbench has RunningState (sic) assert (DB_TO_RUNNING_STATE[node_values.state].value == node_in_project_table["state"]["currentStatus"]) print( "--> tasks were properly transferred! " f"That's great: {json_dumps(attempt.retry_state.retry_object.statistics)}", )
async def test_listen_comp_tasks_task( mock_project_subsystem: Dict, comp_task_listening_task: None, client, update_values: Dict[str, Any], expected_calls: List[str], task_class: NodeClass, ): db_engine: aiopg.sa.Engine = client.app[APP_DB_ENGINE_KEY] async with db_engine.acquire() as conn: # let's put some stuff in there now result = await conn.execute( comp_tasks.insert() .values(outputs=json.dumps({}), node_class=task_class) .returning(literal_column("*")) ) row: RowProxy = await result.fetchone() task = dict(row) # let's update some values await conn.execute( comp_tasks.update() .values(**update_values) .where(comp_tasks.c.task_id == task["task_id"]) ) # tests whether listener gets hooked calls executed for call_name, mocked_call in mock_project_subsystem.items(): if call_name in expected_calls: async for attempt in AsyncRetrying( wait=wait_fixed(1), stop=stop_after_delay(10), retry=retry_if_exception_type(AssertionError), before_sleep=before_sleep_log(logger, logging.INFO), reraise=True, ): with attempt: mocked_call.assert_awaited() else: mocked_call.assert_not_called()
def wait_for(func, step=1, text=None, timeout=None, throw_exc=False, **kwargs): """ Wrapper function to wait with timeout option. If timeout received, avocado 'wait_for' method will be used. Otherwise the below function will be called. :param func: Function to evaluate. :param step: Time to sleep between attempts in seconds :param text: Text to print while waiting, for debug purposes :param timeout: Timeout in seconds :param throw_exc: Raise exception if timeout expired, but func result is not True :param kwargs: Keyword arguments to func :return: Return value of func. """ if not timeout: return forever_wait_for(func, step, text, **kwargs) res = None try: r = tenacity.Retrying( reraise=throw_exc, stop=tenacity.stop_after_delay(timeout), wait=tenacity.wait_fixed(step), retry=(retry_if_result(lambda value: not value) | retry_if_exception_type()) ) res = r.call(func, **kwargs) except Exception as ex: err = 'Wait for: {}: timeout - {} seconds - expired'.format(text, timeout) LOGGER.error(err) if hasattr(ex, 'last_attempt'): LOGGER.error("last error: %s", repr(ex.last_attempt.exception())) else: LOGGER.error("last error: %s", repr(ex)) if throw_exc: raise return res
class ContainerWrapper: def __init__(self, container_url: str) -> None: self.client = ContainerClient.from_container_url(container_url) @retry( stop=stop_after_attempt(10), wait=wait_random(min=1, max=3), retry=retry_if_exception_type(), before_sleep=before_sleep, reraise=True, ) def upload_file(self, file_path: str, blob_name: str) -> None: with open(file_path, "rb") as handle: self.client.upload_blob(name=blob_name, data=handle, overwrite=True, max_concurrency=10) return None def upload_file_data(self, data: str, blob_name: str) -> None: self.client.upload_blob(name=blob_name, data=data, overwrite=True, max_concurrency=10) def upload_dir(self, dir_path: str, recursive: bool = True) -> None: for path in glob.glob(os.path.join(dir_path, "**"), recursive=recursive): if os.path.isfile(path): blob_name = os.path.relpath(path, start=dir_path) self.upload_file(path, blob_name) @retry( stop=stop_after_attempt(10), wait=wait_random(min=1, max=3), retry=retry_if_exception_type(), before_sleep=before_sleep, reraise=True, ) def delete_blob(self, blob_name: str) -> None: self.client.delete_blob(blob_name) return None @retry( stop=stop_after_attempt(10), wait=wait_random(min=1, max=3), retry=retry_if_exception_type(), before_sleep=before_sleep, reraise=True, ) def download_blob(self, blob_name: str) -> bytes: return cast(bytes, self.client.download_blob(blob_name).content_as_bytes()) @retry( stop=stop_after_attempt(10), wait=wait_random(min=1, max=3), retry=retry_if_exception_type(), before_sleep=before_sleep, reraise=True, ) def list_blobs(self, *, name_starts_with: Optional[str] = None) -> List[str]: result = [ x.name for x in self.client.list_blobs(name_starts_with=name_starts_with) ] return cast(List[str], result)
class DataStorageManager: # pylint: disable=too-many-public-methods """Data storage manager The dsm has access to the database for all meta data and to the actual backend. For now this is simcore's S3 [minio] and the datcore storage facilities. For all data that is in-house (simcore.s3, ...) we keep a synchronized database with meta information for the physical files. For physical changes on S3, that might be time-consuming, the db keeps a state (delete and upload mostly) The dsm provides the following additional functionalities: - listing of folders for a given users, optionally filtered using a regular expression and optionally sorted by one of the meta data keys - upload/download of files client -> S3 : presigned upload link S3 -> client : presigned download link datcore -> client: presigned download link S3 -> datcore: local copy and then upload via their api minio/S3 and postgres can talk nicely with each other via Notifications using rabbigMQ which we already have. See: https://blog.minio.io/part-5-5-publish-minio-events-via-postgresql-50f6cc7a7346 https://docs.minio.io/docs/minio-bucket-notification-guide.html """ # TODO: perhaps can be used a cache? add a lifetime? s3_client: MinioClientWrapper engine: Engine loop: object pool: ThreadPoolExecutor simcore_bucket_name: str has_project_db: bool session: AioSession = field(default_factory=aiobotocore.get_session) datcore_tokens: Dict[str, DatCoreApiToken] = field(default_factory=dict) app: Optional[web.Application] = None def _create_aiobotocore_client_context(self) -> ClientCreatorContext: assert hasattr(self.session, "create_client") # nosec # pylint: disable=no-member # SEE API in https://botocore.amazonaws.com/v1/documentation/api/latest/reference/services/s3.html # SEE https://aiobotocore.readthedocs.io/en/latest/index.html return self.session.create_client( "s3", endpoint_url=self.s3_client.endpoint_url, aws_access_key_id=self.s3_client.access_key, aws_secret_access_key=self.s3_client.secret_key, ) def _get_datcore_tokens( self, user_id: str) -> Tuple[Optional[str], Optional[str]]: # pylint: disable=no-member token = self.datcore_tokens.get(user_id, DatCoreApiToken()) return token.to_tuple() async def locations(self, user_id: str): locs = [] simcore_s3 = {"name": SIMCORE_S3_STR, "id": SIMCORE_S3_ID} locs.append(simcore_s3) api_token, api_secret = self._get_datcore_tokens(user_id) if api_token and api_secret and self.app: if await datcore_adapter.check_user_can_connect( self.app, api_token, api_secret): datcore = {"name": DATCORE_STR, "id": DATCORE_ID} locs.append(datcore) return locs @classmethod def location_from_id(cls, location_id: str): return get_location_from_id(location_id) # LIST/GET --------------------------- # pylint: disable=too-many-arguments # pylint: disable=too-many-branches # pylint: disable=too-many-statements async def list_files(self, user_id: str, location: str, uuid_filter: str = "", regex: str = "") -> List[FileMetaDataEx]: """Returns a list of file paths - Works for simcore.s3 and datcore - Can filter on uuid: useful to filter on project_id/node_id - Can filter upon regular expression (for now only on key: value pairs of the FileMetaData) """ data = deque() if location == SIMCORE_S3_STR: accesible_projects_ids = [] async with self.engine.acquire() as conn, conn.begin(): accesible_projects_ids = await get_readable_project_ids( conn, int(user_id)) where_statement = ( file_meta_data.c.user_id == user_id ) | file_meta_data.c.project_id.in_(accesible_projects_ids) if uuid_filter: where_statement &= file_meta_data.c.file_uuid.ilike( f"%{uuid_filter}%") query = sa.select([file_meta_data]).where(where_statement) async for row in conn.execute(query): dex = to_meta_data_extended(row) if not is_file_entry_valid(dex.fmd): # NOTE: the file is not updated with the information from S3 backend. # 1. Either the file exists, but was never updated in the database # 2. Or the file does not exist or was never completed, and the file_meta_data entry is old and faulty # we need to update from S3 here since the database is not up-to-date dex = await self.try_update_database_from_storage( dex.fmd.file_uuid, dex.fmd.bucket_name, dex.fmd.object_name, ) if dex: data.append(dex) if self.has_project_db: uuid_name_dict = {} # now parse the project to search for node/project names try: async with self.engine.acquire() as conn, conn.begin(): query = sa.select([projects]).where( projects.c.uuid.in_(accesible_projects_ids)) async for row in conn.execute(query): proj_data = dict(row.items()) uuid_name_dict[ proj_data["uuid"]] = proj_data["name"] wb = proj_data["workbench"] for node in wb.keys(): uuid_name_dict[node] = wb[node]["label"] except DBAPIError as _err: logger.exception( "Error querying database for project names") if not uuid_name_dict: # there seems to be no project whatsoever for user_id return [] # only keep files from non-deleted project clean_data = deque() for dx in data: d = dx.fmd if d.project_id not in uuid_name_dict: continue # # FIXME: artifically fills ['project_name', 'node_name', 'file_id', 'raw_file_path', 'display_file_path'] # with information from the projects table! d.project_name = uuid_name_dict[d.project_id] if d.node_id in uuid_name_dict: d.node_name = uuid_name_dict[d.node_id] d.raw_file_path = str( Path(d.project_id) / Path(d.node_id) / Path(d.file_name)) d.display_file_path = d.raw_file_path d.file_id = d.file_uuid if d.node_name and d.project_name: d.display_file_path = str( Path(d.project_name) / Path(d.node_name) / Path(d.file_name)) # once the data was sync to postgres metadata table at this point clean_data.append(dx) data = clean_data elif location == DATCORE_STR: api_token, api_secret = self._get_datcore_tokens(user_id) assert self.app # nosec assert api_secret # nosec assert api_token # nosec return await datcore_adapter.list_all_datasets_files_metadatas( self.app, api_token, api_secret) if uuid_filter: # TODO: incorporate this in db query! _query = re.compile(uuid_filter, re.IGNORECASE) filtered_data = deque() for dx in data: d = dx.fmd if _query.search(d.file_uuid): filtered_data.append(dx) return list(filtered_data) if regex: _query = re.compile(regex, re.IGNORECASE) filtered_data = deque() for dx in data: d = dx.fmd _vars = vars(d) for v in _vars.keys(): if _query.search(v) or _query.search(str(_vars[v])): filtered_data.append(dx) break return list(filtered_data) return list(data) async def list_files_dataset( self, user_id: str, location: str, dataset_id: str ) -> Union[List[FileMetaData], List[FileMetaDataEx]]: # this is a cheap shot, needs fixing once storage/db is in sync data = [] if location == SIMCORE_S3_STR: data: List[FileMetaDataEx] = await self.list_files( user_id, location, uuid_filter=dataset_id + "/") elif location == DATCORE_STR: api_token, api_secret = self._get_datcore_tokens(user_id) # lists all the files inside the dataset assert self.app # nosec assert api_secret # nosec assert api_token # nosec return await datcore_adapter.list_all_files_metadatas_in_dataset( self.app, api_token, api_secret, dataset_id) return data async def list_datasets(self, user_id: str, location: str) -> List[DatasetMetaData]: """Returns a list of top level datasets Works for simcore.s3 and datcore """ data = [] if location == SIMCORE_S3_STR: if self.has_project_db: try: async with self.engine.acquire() as conn, conn.begin(): readable_projects_ids = await get_readable_project_ids( conn, int(user_id)) has_read_access = projects.c.uuid.in_( readable_projects_ids) # FIXME: this DOES NOT read from file-metadata table!!! query = sa.select([projects.c.uuid, projects.c.name ]).where(has_read_access) async for row in conn.execute(query): dmd = DatasetMetaData( dataset_id=row.uuid, display_name=row.name, ) data.append(dmd) except DBAPIError as _err: logger.exception( "Error querying database for project names") elif location == DATCORE_STR: api_token, api_secret = self._get_datcore_tokens(user_id) assert self.app # nosec assert api_secret # nosec assert api_token # nosec return await datcore_adapter.list_datasets(self.app, api_token, api_secret) return data async def list_file(self, user_id: str, location: str, file_uuid: str) -> Optional[FileMetaDataEx]: if location == SIMCORE_S3_STR: async with self.engine.acquire() as conn, conn.begin(): can: Optional[AccessRights] = await get_file_access_rights( conn, int(user_id), file_uuid) if can.read: query = sa.select([ file_meta_data ]).where(file_meta_data.c.file_uuid == file_uuid) result = await conn.execute(query) row = await result.first() if not row: return None file_metadata = to_meta_data_extended(row) if is_file_entry_valid(file_metadata.fmd): return file_metadata # we need to update from S3 here since the database is not up-to-date file_metadata = await self.try_update_database_from_storage( file_metadata.fmd.file_uuid, file_metadata.fmd.bucket_name, file_metadata.fmd.object_name, ) return file_metadata # FIXME: returns None in both cases: file does not exist or use has no access logger.debug("User %s cannot read file %s", user_id, file_uuid) return None elif location == DATCORE_STR: # FIXME: review return inconsistencies # api_token, api_secret = self._get_datcore_tokens(user_id) import warnings warnings.warn("NOT IMPLEMENTED!!!") return None # UPLOAD/DOWNLOAD LINKS --------------------------- async def upload_file_to_datcore(self, _user_id: str, _local_file_path: str, _destination_id: str): import warnings warnings.warn(f"NOT IMPLEMENTED!!! in {self.__class__}") # uploads a locally available file to dat core given the storage path, optionally attached some meta data # api_token, api_secret = self._get_datcore_tokens(user_id) # await dcw.upload_file_to_id(destination_id, local_file_path) async def try_update_database_from_storage( self, file_uuid: str, bucket_name: str, object_name: str, silence_exception: bool = False, ) -> Optional[FileMetaDataEx]: try: async with self._create_aiobotocore_client_context( ) as aioboto_client: result = await aioboto_client.head_object(Bucket=bucket_name, Key=object_name ) # type: ignore file_size = result["ContentLength"] # type: ignore last_modified = result["LastModified"] # type: ignore entity_tag = result["ETag"].strip('"') # type: ignore async with self.engine.acquire() as conn: result: ResultProxy = await conn.execute( file_meta_data.update().where( file_meta_data.c.file_uuid == file_uuid).values( file_size=file_size, last_modified=last_modified, entity_tag=entity_tag, ).returning(literal_column("*"))) if not result: return None row: Optional[RowProxy] = await result.first() if not row: return None return to_meta_data_extended(row) except botocore.exceptions.ClientError: if silence_exception: logger.debug("Error happened while trying to access %s", file_uuid) else: logger.warning("Error happened while trying to access %s", file_uuid, exc_info=True) # the file is not existing or some error happened return None @retry( stop=stop_after_delay(1 * _HOUR), wait=wait_exponential(multiplier=0.1, exp_base=1.2, max=30), retry=(retry_if_exception_type() | retry_if_result(lambda result: result is None)), before_sleep=before_sleep_log(logger, logging.INFO), ) async def auto_update_database_from_storage_task(self, file_uuid: str, bucket_name: str, object_name: str): return await self.try_update_database_from_storage( file_uuid, bucket_name, object_name, silence_exception=True) async def upload_link(self, user_id: str, file_uuid: str): """ Creates pre-signed upload link and updates metadata table when link is used and upload is successfuly completed SEE _metadata_file_updater """ async with self.engine.acquire() as conn: can: Optional[AccessRights] = await get_file_access_rights( conn, int(user_id), file_uuid) if not can.write: logger.debug("User %s was not allowed to upload file %s", user_id, file_uuid) raise web.HTTPForbidden( reason= f"User does not have enough access rights to upload file {file_uuid}" ) @retry(**postgres_service_retry_policy_kwargs) async def _init_metadata() -> Tuple[int, str]: async with self.engine.acquire() as conn: fmd = FileMetaData() fmd.simcore_from_uuid(file_uuid, self.simcore_bucket_name) fmd.user_id = user_id # NOTE: takes ownership of uploaded data # if file already exists, we might want to update a time-stamp # upsert file_meta_data insert_stmt = pg_insert(file_meta_data).values(**vars(fmd)) do_nothing_stmt = insert_stmt.on_conflict_do_nothing( index_elements=["file_uuid"]) await conn.execute(do_nothing_stmt) return fmd.file_size, fmd.last_modified await _init_metadata() bucket_name = self.simcore_bucket_name object_name = file_uuid # a parallel task is tarted which will update the metadata of the updated file # once the update has finished. fire_and_forget_task( self.auto_update_database_from_storage_task( file_uuid=file_uuid, bucket_name=bucket_name, object_name=object_name, )) return self.s3_client.create_presigned_put_url(bucket_name, object_name) async def download_link_s3(self, file_uuid: str, user_id: int) -> str: # access layer async with self.engine.acquire() as conn: can: Optional[AccessRights] = await get_file_access_rights( conn, int(user_id), file_uuid) if not can.read: # NOTE: this is tricky. A user with read access can download and data! # If write permission would be required, then shared projects as views cannot # recover data in nodes (e.g. jupyter cannot pull work data) # logger.debug("User %s was not allowed to download file %s", user_id, file_uuid) raise web.HTTPForbidden( reason= f"User does not have enough rights to download {file_uuid}" ) bucket_name = self.simcore_bucket_name async with self.engine.acquire() as conn: stmt = sa.select([file_meta_data.c.object_name ]).where(file_meta_data.c.file_uuid == file_uuid) object_name: Optional[str] = await conn.scalar(stmt) if object_name is None: raise web.HTTPNotFound( reason=f"File '{file_uuid}' does not exists in storage.") link = self.s3_client.create_presigned_get_url(bucket_name, object_name) return link async def download_link_datcore(self, user_id: str, file_id: str) -> URL: api_token, api_secret = self._get_datcore_tokens(user_id) assert self.app # nosec assert api_secret # nosec assert api_token # nosec return await datcore_adapter.get_file_download_presigned_link( self.app, api_token, api_secret, file_id) # COPY ----------------------------- async def copy_file_s3_s3(self, user_id: str, dest_uuid: str, source_uuid: str): # FIXME: operation MUST be atomic # source is s3, location is s3 to_bucket_name = self.simcore_bucket_name to_object_name = dest_uuid from_bucket = self.simcore_bucket_name from_object_name = source_uuid # FIXME: This is not async! self.s3_client.copy_object(to_bucket_name, to_object_name, from_bucket, from_object_name) # update db async with self.engine.acquire() as conn: fmd = FileMetaData() fmd.simcore_from_uuid(dest_uuid, self.simcore_bucket_name) fmd.user_id = user_id ins = file_meta_data.insert().values(**vars(fmd)) await conn.execute(ins) async def copy_file_s3_datcore(self, user_id: str, dest_uuid: str, source_uuid: str): session = get_client_session(self.app) # source is s3, get link and copy to datcore bucket_name = self.simcore_bucket_name object_name = source_uuid filename = source_uuid.split("/")[-1] s3_dowload_link = self.s3_client.create_presigned_get_url( bucket_name, object_name) with tempfile.TemporaryDirectory() as tmpdir: # FIXME: connect download and upload streams local_file_path = os.path.join(tmpdir, filename) # Downloads S3 -> local await download_to_file_or_raise(session, s3_dowload_link, local_file_path) # Uploads local -> DATCore await self.upload_file_to_datcore( _user_id=user_id, _local_file_path=local_file_path, _destination_id=dest_uuid, ) async def copy_file_datcore_s3( self, user_id: str, dest_uuid: str, source_uuid: str, filename_missing: bool = False, ): session = get_client_session(self.app) # 2 steps: Get download link for local copy, the upload link to s3 # TODO: This should be a redirect stream! dc_link, filename = await self.download_link_datcore( user_id=user_id, file_id=source_uuid) if filename_missing: dest_uuid = str(Path(dest_uuid) / filename) s3_upload_link = await self.upload_link(user_id, dest_uuid) with tempfile.TemporaryDirectory() as tmpdir: # FIXME: connect download and upload streams local_file_path = os.path.join(tmpdir, filename) # Downloads DATCore -> local await download_to_file_or_raise(session, dc_link, local_file_path) # Uploads local -> S3 s3_upload_link = URL(s3_upload_link) async with session.put( s3_upload_link, data=Path(local_file_path).open("rb"), raise_for_status=True, ) as resp: logger.debug( "Uploaded local -> SIMCore %s . Status %s", s3_upload_link, resp.status, ) return dest_uuid async def copy_file( self, user_id: str, dest_location: str, dest_uuid: str, source_location: str, source_uuid: str, ): if source_location == SIMCORE_S3_STR: if dest_location == DATCORE_STR: await self.copy_file_s3_datcore(user_id, dest_uuid, source_uuid) elif dest_location == SIMCORE_S3_STR: await self.copy_file_s3_s3(user_id, dest_uuid, source_uuid) elif source_location == DATCORE_STR: if dest_location == DATCORE_STR: raise NotImplementedError( "copy files from datcore 2 datcore not impl") if dest_location == SIMCORE_S3_STR: await self.copy_file_datcore_s3(user_id, dest_uuid, source_uuid) async def deep_copy_project_simcore_s3( self, user_id: str, source_project: Dict[str, Any], destination_project: Dict[str, Any], node_mapping: Dict[str, str], ): """Parses a given source project and copies all related files to the destination project Since all files are organized as project_id/node_id/filename or links to datcore this function creates a new folder structure project_id/node_id/filename and copies all files to the corresponding places. Additionally, all external files from datcore are being copied and the paths in the destination project are adapted accordingly Lastly, the meta data db is kept in sync """ source_folder = source_project["uuid"] dest_folder = destination_project["uuid"] # access layer async with self.engine.acquire() as conn, conn.begin(): source_access_rights = await get_project_access_rights( conn, int(user_id), project_id=source_folder) dest_access_rights = await get_project_access_rights( conn, int(user_id), project_id=dest_folder) if not source_access_rights.read: logger.debug( "User %s was not allowed to read from project %s", user_id, source_folder, ) raise web.HTTPForbidden( reason= f"User does not have enough access rights to read from project '{source_folder}'" ) if not dest_access_rights.write: logger.debug( "User %s was not allowed to write to project %s", user_id, dest_folder, ) raise web.HTTPForbidden( reason= f"User does not have enough access rights to write to project '{dest_folder}'" ) # build up naming map based on labels uuid_name_dict = {} uuid_name_dict[dest_folder] = destination_project["name"] for src_node_id, src_node in source_project["workbench"].items(): new_node_id = node_mapping.get(src_node_id) if new_node_id is not None: uuid_name_dict[new_node_id] = src_node["label"] async with self._create_aiobotocore_client_context() as aioboto_client: logger.debug( "Listing all items under %s:%s/", self.simcore_bucket_name, source_folder, ) # Step 1: List all objects for this project replace them with the destination object name # and do a copy at the same time collect some names # Note: the / at the end of the Prefix is VERY important, makes the listing several order of magnitudes faster response = await aioboto_client.list_objects_v2( Bucket=self.simcore_bucket_name, Prefix=f"{source_folder}/") contents: List = response.get("Contents", []) logger.debug( "Listed %s items under %s:%s/", len(contents), self.simcore_bucket_name, source_folder, ) for item in contents: source_object_name = item["Key"] source_object_parts = Path(source_object_name).parts if len(source_object_parts) != 3: # This may happen once we have shared/home folders # FIXME: this might cause problems logger.info( "Skipping copy of '%s'. Expected three parts path!", source_object_name, ) continue old_node_id = source_object_parts[1] new_node_id = node_mapping.get(old_node_id) if new_node_id is not None: old_filename = source_object_parts[2] dest_object_name = str( Path(dest_folder) / new_node_id / old_filename) copy_kwargs = dict( CopySource={ "Bucket": self.simcore_bucket_name, "Key": source_object_name, }, Bucket=self.simcore_bucket_name, Key=dest_object_name, ) logger.debug("Copying %s ...", copy_kwargs) # FIXME: if 5GB, it must use multipart upload Upload Part - Copy API # SEE https://botocore.amazonaws.com/v1/documentation/api/latest/reference/services/s3.html#S3.Client.copy_object await aioboto_client.copy_object(**copy_kwargs) # Step 2: List all references in outputs that point to datcore and copy over for node_id, node in destination_project["workbench"].items(): outputs: Dict = node.get("outputs", {}) for _, output in outputs.items(): source = output["path"] if output.get("store") == DATCORE_ID: destination_folder = str(Path(dest_folder) / node_id) logger.info("Copying %s to %s", source, destination_folder) destination = await self.copy_file_datcore_s3( user_id=user_id, dest_uuid=destination_folder, source_uuid=source, filename_missing=True, ) assert destination.startswith(destination_folder) # nosec output["store"] = SIMCORE_S3_ID output["path"] = destination elif output.get("store") == SIMCORE_S3_ID: destination = str( Path(dest_folder) / node_id / Path(source).name) output["store"] = SIMCORE_S3_ID output["path"] = destination fmds = [] async with self._create_aiobotocore_client_context() as aioboto_client: # step 3: list files first to create fmds # Note: the / at the end of the Prefix is VERY important, makes the listing several order of magnitudes faster response = await aioboto_client.list_objects_v2( Bucket=self.simcore_bucket_name, Prefix=f"{dest_folder}/") if "Contents" in response: for item in response["Contents"]: fmd = FileMetaData() fmd.simcore_from_uuid(item["Key"], self.simcore_bucket_name) fmd.project_name = uuid_name_dict.get( dest_folder, "Untitled") fmd.node_name = uuid_name_dict.get(fmd.node_id, "Untitled") fmd.raw_file_path = fmd.file_uuid fmd.display_file_path = str( Path(fmd.project_name) / fmd.node_name / fmd.file_name) fmd.user_id = user_id fmd.file_size = item["Size"] fmd.last_modified = str(item["LastModified"]) fmds.append(fmd) # step 4 sync db async with self.engine.acquire() as conn, conn.begin(): # TODO: upsert in one statment of ALL for fmd in fmds: query = sa.select([ file_meta_data ]).where(file_meta_data.c.file_uuid == fmd.file_uuid) # if file already exists, we might w rows = await conn.execute(query) exists = await rows.scalar() if exists: delete_me = file_meta_data.delete().where( file_meta_data.c.file_uuid == fmd.file_uuid) await conn.execute(delete_me) ins = file_meta_data.insert().values(**vars(fmd)) await conn.execute(ins) # DELETE ------------------------------------- async def delete_file(self, user_id: str, location: str, file_uuid: str): """Deletes a file given its fmd and location Additionally requires a user_id for 3rd party auth For internal storage, the db state should be updated upon completion via Notification mechanism For simcore.s3 we can use the file_name For datcore we need the full path """ if location == SIMCORE_S3_STR: # FIXME: operation MUST be atomic, transaction?? to_delete = [] async with self.engine.acquire() as conn, conn.begin(): can: Optional[AccessRights] = await get_file_access_rights( conn, int(user_id), file_uuid) if not can.delete: logger.debug( "User %s was not allowed to delete file %s", user_id, file_uuid, ) raise web.HTTPForbidden( reason= f"User '{user_id}' does not have enough access rights to delete file {file_uuid}" ) query = sa.select([ file_meta_data.c.bucket_name, file_meta_data.c.object_name ]).where(file_meta_data.c.file_uuid == file_uuid) async for row in conn.execute(query): if self.s3_client.remove_objects(row.bucket_name, [row.object_name]): to_delete.append(file_uuid) await conn.execute(file_meta_data.delete().where( file_meta_data.c.file_uuid.in_(to_delete))) elif location == DATCORE_STR: # FIXME: review return inconsistencies api_token, api_secret = self._get_datcore_tokens(user_id) assert self.app # nosec assert api_secret # nosec assert api_token # nosec await datcore_adapter.delete_file(self.app, api_token, api_secret, file_uuid) async def delete_project_simcore_s3( self, user_id: str, project_id: str, node_id: Optional[str] = None) -> Optional[web.Response]: """Deletes all files from a given node in a project in simcore.s3 and updated db accordingly. If node_id is not given, then all the project files db entries are deleted. """ # FIXME: operation MUST be atomic. Mark for deletion and remove from db when deletion fully confirmed async with self.engine.acquire() as conn, conn.begin(): # access layer can: Optional[AccessRights] = await get_project_access_rights( conn, int(user_id), project_id) if not can.delete: logger.debug( "User %s was not allowed to delete project %s", user_id, project_id, ) raise web.HTTPForbidden( reason=f"User does not have delete access for {project_id}" ) delete_me = file_meta_data.delete().where( file_meta_data.c.project_id == project_id, ) if node_id: delete_me = delete_me.where( file_meta_data.c.node_id == node_id) await conn.execute(delete_me) async with self._create_aiobotocore_client_context() as aioboto_client: # Note: the / at the end of the Prefix is VERY important, makes the listing several order of magnitudes faster response = await aioboto_client.list_objects_v2( Bucket=self.simcore_bucket_name, Prefix=f"{project_id}/{node_id}/" if node_id else f"{project_id}/", ) objects_to_delete = [] for f in response.get("Contents", []): objects_to_delete.append({"Key": f["Key"]}) if objects_to_delete: response = await aioboto_client.delete_objects( Bucket=self.simcore_bucket_name, Delete={"Objects": objects_to_delete}, ) return response # SEARCH ------------------------------------- async def search_files_starting_with(self, user_id: int, prefix: str) -> List[FileMetaDataEx]: # Avoids using list_files since it accounts for projects/nodes # Storage should know NOTHING about those concepts files_meta = deque() async with self.engine.acquire() as conn, conn.begin(): # access layer can_read_projects_ids = await get_readable_project_ids( conn, int(user_id)) has_read_access = ( file_meta_data.c.user_id == str(user_id) ) | file_meta_data.c.project_id.in_(can_read_projects_ids) stmt = sa.select([file_meta_data]).where( file_meta_data.c.file_uuid.startswith(prefix) & has_read_access) async for row in conn.execute(stmt): meta_extended = to_meta_data_extended(row) files_meta.append(meta_extended) return list(files_meta) async def create_soft_link(self, user_id: int, target_uuid: str, link_uuid: str) -> FileMetaDataEx: # validate link_uuid async with self.engine.acquire() as conn: # TODO: select exists(select 1 from file_metadat where file_uuid=12) found = await conn.scalar( sa.select([file_meta_data.c.file_uuid ]).where(file_meta_data.c.file_uuid == link_uuid)) if found: raise ValueError( f"Invalid link {link_uuid}. Link already exists") # validate target_uuid target = await self.list_file(str(user_id), SIMCORE_S3_STR, target_uuid) if not target: raise ValueError( f"Invalid target '{target_uuid}'. File does not exists for this user" ) # duplicate target and change the following columns: target.fmd.file_uuid = link_uuid target.fmd.file_id = link_uuid # NOTE: api-server relies on this id target.fmd.is_soft_link = True async with self.engine.acquire() as conn: stmt = (file_meta_data.insert().values( **attr.asdict(target.fmd)).returning(literal_column("*"))) result = await conn.execute(stmt) link = to_meta_data_extended(await result.first()) return link async def synchronise_meta_data_table(self, location: str, dry_run: bool) -> Dict[str, Any]: PRUNE_CHUNK_SIZE = 20 removed: List[str] = [] to_remove: List[str] = [] async def _prune_db_table(conn): if not dry_run: await conn.execute(file_meta_data.delete().where( file_meta_data.c.object_name.in_(to_remove))) logger.info( "%s %s orphan items", "Would have deleted" if dry_run else "Deleted", len(to_remove), ) removed.extend(to_remove) to_remove.clear() # ---------- assert ( # nosec location == SIMCORE_S3_STR ), "Only with s3, no other sync implemented" # nosec if location == SIMCORE_S3_STR: # NOTE: only valid for simcore, since datcore data is not in the database table # let's get all the files in the table logger.warning( "synchronisation of database/s3 storage started, this will take some time..." ) async with self.engine.acquire( ) as conn, self._create_aiobotocore_client_context( ) as aioboto_client: number_of_rows_in_db = (await conn.scalar( sa.select([sa.func.count()]).select_from(file_meta_data)) or 0) logger.warning( "Total number of entries to check %d", number_of_rows_in_db, ) assert isinstance(aioboto_client, AioBaseClient) # nosec async for row in conn.execute( sa.select([file_meta_data.c.object_name])): s3_key = row.object_name # type: ignore # now check if the file exists in S3 # SEE https://www.peterbe.com/plog/fastest-way-to-find-out-if-a-file-exists-in-s3 response = await aioboto_client.list_objects_v2( Bucket=self.simcore_bucket_name, Prefix=s3_key) if response.get("KeyCount", 0) == 0: # this file does not exist in S3 to_remove.append(s3_key) if len(to_remove) >= PRUNE_CHUNK_SIZE: await _prune_db_table(conn) if to_remove: await _prune_db_table(conn) assert len(to_remove) == 0 # nosec assert len(removed) <= number_of_rows_in_db # nosec logger.info( "%s %d entries ", "Would delete" if dry_run else "Deleting", len(removed), ) return {"removed": removed}
def retry_if_network_error() -> retry_if_exception: return retry_if_exception_type(exception_types=(ConnectionError, ))
async def test_run_pipeline_and_check_state( client: TestClient, sleeper_service: Dict[str, str], postgres_session: sa.orm.session.Session, logged_user: Dict[str, Any], user_project: Dict[str, Any], fake_workbench_adjacency_list: Dict[str, Any], user_role: UserRole, expected: ExpectedResponse, ): project_id = user_project["uuid"] fake_workbench_payload = user_project["workbench"] url_start = client.app.router["start_pipeline"].url_for( project_id=project_id) assert url_start == URL( f"/{API_VTAG}/computation/pipeline/{project_id}:start") # POST /v0/computation/pipeline/{project_id}:start resp = await client.post(f"{url_start}") data, error = await assert_status(resp, expected.created) if error: return assert "pipeline_id" in data assert data["pipeline_id"] == project_id _assert_db_contents( project_id, postgres_session, fake_workbench_payload, fake_workbench_adjacency_list, check_outputs=False, ) url_project_state = client.app.router["state_project"].url_for( project_id=project_id) assert url_project_state == URL(f"/{API_VTAG}/projects/{project_id}/state") running_state_order_lookup = { RunningState.UNKNOWN: 0, RunningState.NOT_STARTED: 1, RunningState.PUBLISHED: 2, RunningState.PENDING: 3, RunningState.STARTED: 4, RunningState.RETRY: 5, RunningState.SUCCESS: 6, RunningState.FAILED: 6, RunningState.ABORTED: 6, } assert all( # pylint: disable=use-a-generator [k in running_state_order_lookup for k in RunningState.__members__ ]), "there are missing members in the order lookup, please complete!" pipeline_state = RunningState.UNKNOWN start = time.monotonic() async for attempt in AsyncRetrying( reraise=True, stop=stop_after_delay(120), wait=wait_fixed(1), retry=retry_if_exception_type(ValueError), ): with attempt: print( f"--> waiting for pipeline to complete attempt {attempt.retry_state.attempt_number}..." ) resp = await client.get(f"{url_project_state}") data, error = await assert_status(resp, expected.ok) assert "state" in data assert "value" in data["state"] received_study_state = RunningState(data["state"]["value"]) print(f"--> project computation state {received_study_state=}") assert ( running_state_order_lookup[received_study_state] >= running_state_order_lookup[pipeline_state] ), (f"the received state {received_study_state} shall be greater " f"or equal to the previous state {pipeline_state}") assert received_study_state not in [ RunningState.ABORTED, RunningState.FAILED, ], "the pipeline did not end up successfully" pipeline_state = received_study_state if received_study_state != RunningState.SUCCESS: raise ValueError print( f"--> pipeline completed with state {received_study_state=}! That's great: {json_dumps(attempt.retry_state.retry_object.statistics)}", ) assert pipeline_state == RunningState.SUCCESS comp_tasks_in_db: Dict[NodeIdStr, Any] = _get_computational_tasks_from_db( project_id, postgres_session) assert all( # pylint: disable=use-a-generator [t.state == StateType.SUCCESS for t in comp_tasks_in_db.values() ]), ("the individual computational services are not finished! " f"Expected to be completed, got {comp_tasks_in_db=}") # we need to wait until the webserver has updated the projects DB await _assert_and_wait_for_comp_task_states_to_be_transmitted_in_projects( project_id, postgres_session) print( f"<-- pipeline completed successfully in {time.monotonic() - start} seconds" )
class ContainerWrapper: client: ContainerClient def __init__(self, container_url: str) -> None: self.client = ContainerClient.from_container_url(container_url) self.container_url = container_url @retry( stop=stop_after_attempt(10), wait=wait_random(min=1, max=3), retry=retry_if_exception_type(), before_sleep=before_sleep, reraise=True, ) def upload_file(self, file_path: str, blob_name: str) -> None: with open(file_path, "rb") as handle: self.client.upload_blob(name=blob_name, data=handle, overwrite=True, max_concurrency=10) return None def upload_file_data(self, data: str, blob_name: str) -> None: self.client.upload_blob(name=blob_name, data=data, overwrite=True, max_concurrency=10) def upload_dir(self, dir_path: str) -> None: # security note: the src for azcopy comes from the server which is # trusted in this context, while the destination is provided by the # user azcopy_sync(dir_path, self.container_url) def download_dir(self, dir_path: str) -> None: # security note: the src for azcopy comes from the server which is # trusted in this context, while the destination is provided by the # user azcopy_sync(self.container_url, dir_path) @retry( stop=stop_after_attempt(10), wait=wait_random(min=1, max=3), retry=retry_if_exception_type(), before_sleep=before_sleep, reraise=True, ) def delete_blob(self, blob_name: str) -> None: self.client.delete_blob(blob_name) return None @retry( stop=stop_after_attempt(10), wait=wait_random(min=1, max=3), retry=retry_if_exception_type(), before_sleep=before_sleep, reraise=True, ) def download_blob(self, blob_name: str) -> bytes: return cast(bytes, self.client.download_blob(blob_name).content_as_bytes()) @retry( stop=stop_after_attempt(10), wait=wait_random(min=1, max=3), retry=retry_if_exception_type(), before_sleep=before_sleep, reraise=True, ) def list_blobs(self, *, name_starts_with: Optional[str] = None) -> List[str]: result = [ x.name for x in self.client.list_blobs(name_starts_with=name_starts_with) ] return cast(List[str], result)
class ContainerWrapper: client: ContainerClient def __init__(self, container_url: str) -> None: self.client = ContainerClient.from_container_url(container_url) self.container_url = container_url @retry( stop=stop_after_attempt(10), wait=wait_random(min=1, max=3), retry=retry_if_exception_type(), before_sleep=before_sleep, reraise=True, ) def upload_file(self, file_path: str, blob_name: str) -> None: try: # Split the container URL to insert the blob_name url_parts = self.container_url.split("?", 1) # Default to azcopy if it is installed azcopy_copy(file_path, url_parts[0] + "/" + blob_name + "?" + url_parts[1]) except Exception as exc: # A subprocess exception would typically only contain the exit status. LOGGER.warning( "Upload using azcopy failed. Check the azcopy logs for more information." ) LOGGER.warning(exc) # Indicate the switch in the approach for clarity in debugging LOGGER.warning("Now attempting to upload using the Python SDK...") # This does not have a try/except since it should be caught by the retry system. # The retry system will always attempt azcopy first and this approach second with open(file_path, "rb") as handle: # Using the Azure SDK default max_concurrency self.client.upload_blob(name=blob_name, data=handle, overwrite=True) return None def upload_file_data(self, data: str, blob_name: str) -> None: with tempfile.TemporaryDirectory() as tmpdir: filename = os.path.join(tmpdir, blob_name) with open(filename, "w") as handle: handle.write(data) self.upload_file(filename, blob_name) def upload_dir(self, dir_path: str) -> None: # security note: the src for azcopy comes from the server which is # trusted in this context, while the destination is provided by the # user azcopy_sync(dir_path, self.container_url) def download_dir(self, dir_path: str) -> None: # security note: the src for azcopy comes from the server which is # trusted in this context, while the destination is provided by the # user azcopy_sync(self.container_url, dir_path) @retry( stop=stop_after_attempt(10), wait=wait_random(min=1, max=3), retry=retry_if_exception_type(), before_sleep=before_sleep, reraise=True, ) def delete_blob(self, blob_name: str) -> None: self.client.delete_blob(blob_name) return None @retry( stop=stop_after_attempt(10), wait=wait_random(min=1, max=3), retry=retry_if_exception_type(), before_sleep=before_sleep, reraise=True, ) def download_blob(self, blob_name: str) -> bytes: return cast(bytes, self.client.download_blob(blob_name).content_as_bytes()) @retry( stop=stop_after_attempt(10), wait=wait_random(min=1, max=3), retry=retry_if_exception_type(), before_sleep=before_sleep, reraise=True, ) def list_blobs(self, *, name_starts_with: Optional[str] = None) -> List[str]: result = [ x.name for x in self.client.list_blobs(name_starts_with=name_starts_with) ] return cast(List[str], result)
def t_db_retry(f): return retry(retry=(retry_if_exception_type(OperationalError) | retry_if_exception_type(Sqlite3OperationalError)), stop=stop_after_attempt(RETRY_DATABASE_OP_TIMES), wait=wait_fixed(RETRY_DATABASE_OP_SECONDS), before_sleep=before_sleep_log)(f)
from simcore_service_webserver.users_exceptions import UserNotFoundError from tenacity._asyncio import AsyncRetrying from tenacity.after import after_log from tenacity.retry import retry_if_exception_type from tenacity.stop import stop_after_attempt, stop_after_delay from tenacity.wait import wait_fixed logger = logging.getLogger(__name__) API_VERSION = "v0" GARBAGE_COLLECTOR_INTERVAL = 1 SERVICE_DELETION_DELAY = 1 CHECK_BACKGROUND_RETRY_POLICY = dict( stop=stop_after_attempt(2), wait=wait_fixed(SERVICE_DELETION_DELAY + GARBAGE_COLLECTOR_INTERVAL), retry=retry_if_exception_type(AssertionError), after=after_log(logger, logging.INFO), reraise=True, ) @pytest.fixture def mock_garbage_collector_task(mocker): """patch the setup of the garbage collector so we can call it manually""" mocker.patch( "simcore_service_webserver.resource_manager.module_setup.setup_garbage_collector", return_value="", ) @pytest.fixture
node_details = await _get_node_details( app, client, list_running_services_with_uuid[0]) return node_details except aiodocker.exceptions.DockerError as err: log.exception("Error while accessing container with uuid: %s", node_uuid) raise exceptions.GenericDockerError( "Error while accessing container", err) from err @retry( wait=wait_fixed(2), stop=stop_after_attempt(3), reraise=True, retry=retry_if_exception_type(ClientConnectionError), ) async def _save_service_state(service_host_name: str, session: aiohttp.ClientSession): response: ClientResponse async with session.post( url=f"http://{service_host_name}/state", timeout=ServicesCommonSettings(). director_dynamic_service_save_timeout, ) as response: try: response.raise_for_status() except ClientResponseError as err: if err.status in ( HTTPStatus.METHOD_NOT_ALLOWED,