예제 #1
0
def throttling_retry(
    retry=retry_if_throttling,
    wait=wait_exponential(exp_base=WAIT_EXP_BASE),
    stop=stop_after_attempt(max_attempt_number=MAX_NUMBER_ATTEMPTS),
    **retry_args,
):
    """
    Decorator based on `tenacity` for retrying a function possibly raising
    requests.exception.HTTPError for status code 429 (too many requests).

    It provides a default configuration that should work properly in most
    cases but all `tenacity.retry` parameters can also be overridden in client
    code.

    When the mmaximum of attempts is reached, the HTTPError exception will then
    be reraised.

    Args:
        retry: function defining request retry condition (default to 429 status code)
            https://tenacity.readthedocs.io/en/latest/#whether-to-retry

        wait: function defining wait strategy before retrying (default to exponential
            backoff) https://tenacity.readthedocs.io/en/latest/#waiting-before-retrying

        stop: function defining when to stop retrying (default after 5 attempts)
            https://tenacity.readthedocs.io/en/latest/#stopping

    """
    return tenacity_retry(retry=retry, wait=wait, stop=stop, reraise=True, **retry_args)
예제 #2
0
def svn_retry():
    return retry(
        retry=retry_if_exception(is_retryable_svn_exception),
        wait=wait_exponential(exp_base=SVN_RETRY_WAIT_EXP_BASE),
        stop=stop_after_attempt(max_attempt_number=SVN_RETRY_MAX_ATTEMPTS),
        before_sleep=before_sleep_log(logger, logging.DEBUG),
        reraise=True,
    )
예제 #3
0
def retry_api_call(func,
                   exceptions=('ThrottlingException',
                               'TooManyRequestsException'),
                   attempt=5,
                   multiplier=1,
                   max_delay=1800,
                   exp_base=2,
                   logger=None,
                   *args,
                   **kwargs):
    retry = tenacity.Retrying(
        retry=retry_if_exception(lambda e: getattr(e, 'response', {}).get(
            'Error', {}).get('Code', None) in exceptions if e else False),
        stop=stop_after_attempt(attempt),
        wait=wait_exponential(multiplier=multiplier,
                              max=max_delay,
                              exp_base=exp_base),
        after=after_log(logger, logger.level) if logger else None,
        reraise=True)
    return retry(func, *args, **kwargs)
예제 #4
0
    def __init__(self,
                 hl='en-US',
                 tz=360,
                 geo='',
                 timeout=DEFAULT_TIMEOUT_CONFIG,
                 proxies=[],
                 retries=0,
                 backoff_factor=0):
        """
        Initialize default values for params
        """
        # google rate limit
        self.google_rl = 'You have reached your quota limit. Please try again later.'

        # set user defined options used globally
        self.tz = tz
        self.hl = hl
        self.geo = geo
        self.kw_list = list()
        self.timeout = timeout
        self.proxies = proxies.copy()  # add a proxy option
        self.blacklisted_proxies = []
        self._rate_limited_proxies = []
        self.proxy_index = 0
        self.cookies = None

        # intialize widget payloads
        self.token_payload = dict()
        self.interest_over_time_widget = dict()
        self.interest_by_region_widget = dict()
        self.related_topics_widget_list = list()
        self.related_queries_widget_list = list()

        self.backoff_factor = backoff_factor
        self.retries = retries
        self._retry_config = dict(
            wait=wait_exponential(multiplier=self.backoff_factor),
            stop=stop_after_attempt(self.retries),
            reraise=True)
예제 #5
0
    def wrapper(f: Any) -> Any:
        @wraps(f)
        @retry(stop=stop_after_attempt(5), wait=wait_exponential(2))
        async def wrapped(*args: Any, **kwargs: Any) -> Any:
            attempt_number = wrapped.retry.statistics["attempt_number"]
            try:
                result = await f(*args, **kwargs)
                logger.info("{} initialized", name)
                return result
            except Exception as e:
                max_attempt_number = wrapped.retry.stop.max_attempt_number
                msg = "{}: initialization failed ({}/{})".format(
                    name, attempt_number, max_attempt_number)
                if attempt_number < max_attempt_number:
                    msg += ", trying again after {} second.".format(
                        2**attempt_number)
                else:
                    msg += "."
                logger.exception(e)
                logger.warning(msg)
                raise e

        return wrapped
예제 #6
0
        user_reviews[html.unescape(review["author"])] = {
            "date":
            dateparser.parse(review["datePublished"], locales=["de",
                                                               "en"]).date(),
            "title":
            html.unescape(review["name"]),
            "text":
            html.unescape(review.get("reviewBody", "")),
            "rating":
            review["reviewRating"]["ratingValue"],
        }

    return (user_rating, user_reviews)


@retry(wait=wait_exponential(), stop=stop_after_attempt(4))
def _get_apple_meta_data(**params) -> types.JSON:
    """Retrieve data from iTunes Search API.

    Returns:
        types.JSON: Raw JSON representation of search result.
    """

    result = requests.get(BASE_URL, params=params)
    result.raise_for_status()

    return result.json()


def _get_metadata_url(podcast: Podcast, ) -> Optional[str]:
    """Retrieve meta data URL from API based on podcast.
예제 #7
0
class DataStorageManager:  # pylint: disable=too-many-public-methods
    """Data storage manager

    The dsm has access to the database for all meta data and to the actual backend. For now this
    is simcore's S3 [minio] and the datcore storage facilities.

    For all data that is in-house (simcore.s3, ...) we keep a synchronized database with meta information
    for the physical files.

    For physical changes on S3, that might be time-consuming, the db keeps a state (delete and upload mostly)

    The dsm provides the following additional functionalities:

    - listing of folders for a given users, optionally filtered using a regular expression and optionally
      sorted by one of the meta data keys

    - upload/download of files

        client -> S3 : presigned upload link
        S3 -> client : presigned download link
        datcore -> client: presigned download link
        S3 -> datcore: local copy and then upload via their api

    minio/S3 and postgres can talk nicely with each other via Notifications using rabbigMQ which we already have.
    See:

        https://blog.minio.io/part-5-5-publish-minio-events-via-postgresql-50f6cc7a7346
        https://docs.minio.io/docs/minio-bucket-notification-guide.html
    """

    # TODO: perhaps can be used a cache? add a lifetime?

    s3_client: MinioClientWrapper
    engine: Engine
    loop: object
    pool: ThreadPoolExecutor
    simcore_bucket_name: str
    has_project_db: bool
    session: AioSession = field(default_factory=aiobotocore.get_session)
    datcore_tokens: Dict[str, DatCoreApiToken] = field(default_factory=dict)
    app: Optional[web.Application] = None

    def _create_aiobotocore_client_context(self) -> ClientCreatorContext:
        assert hasattr(self.session, "create_client")  # nosec
        # pylint: disable=no-member

        # SEE API in https://botocore.amazonaws.com/v1/documentation/api/latest/reference/services/s3.html
        # SEE https://aiobotocore.readthedocs.io/en/latest/index.html
        return self.session.create_client(
            "s3",
            endpoint_url=self.s3_client.endpoint_url,
            aws_access_key_id=self.s3_client.access_key,
            aws_secret_access_key=self.s3_client.secret_key,
        )

    def _get_datcore_tokens(
            self, user_id: str) -> Tuple[Optional[str], Optional[str]]:
        # pylint: disable=no-member
        token = self.datcore_tokens.get(user_id, DatCoreApiToken())
        return token.to_tuple()

    async def locations(self, user_id: str):
        locs = []
        simcore_s3 = {"name": SIMCORE_S3_STR, "id": SIMCORE_S3_ID}
        locs.append(simcore_s3)

        api_token, api_secret = self._get_datcore_tokens(user_id)

        if api_token and api_secret and self.app:
            if await datcore_adapter.check_user_can_connect(
                    self.app, api_token, api_secret):
                datcore = {"name": DATCORE_STR, "id": DATCORE_ID}
                locs.append(datcore)

        return locs

    @classmethod
    def location_from_id(cls, location_id: str):
        return get_location_from_id(location_id)

    # LIST/GET ---------------------------

    # pylint: disable=too-many-arguments
    # pylint: disable=too-many-branches
    # pylint: disable=too-many-statements
    async def list_files(self,
                         user_id: str,
                         location: str,
                         uuid_filter: str = "",
                         regex: str = "") -> List[FileMetaDataEx]:
        """Returns a list of file paths

        - Works for simcore.s3 and datcore
        - Can filter on uuid: useful to filter on project_id/node_id
        - Can filter upon regular expression (for now only on key: value pairs of the FileMetaData)
        """
        data = deque()
        if location == SIMCORE_S3_STR:
            accesible_projects_ids = []
            async with self.engine.acquire() as conn, conn.begin():
                accesible_projects_ids = await get_readable_project_ids(
                    conn, int(user_id))
                where_statement = (
                    file_meta_data.c.user_id == user_id
                ) | file_meta_data.c.project_id.in_(accesible_projects_ids)
                if uuid_filter:
                    where_statement &= file_meta_data.c.file_uuid.ilike(
                        f"%{uuid_filter}%")
                query = sa.select([file_meta_data]).where(where_statement)

                async for row in conn.execute(query):
                    dex = to_meta_data_extended(row)
                    if not is_file_entry_valid(dex.fmd):
                        # NOTE: the file is not updated with the information from S3 backend.
                        # 1. Either the file exists, but was never updated in the database
                        # 2. Or the file does not exist or was never completed, and the file_meta_data entry is old and faulty
                        # we need to update from S3 here since the database is not up-to-date
                        dex = await self.try_update_database_from_storage(
                            dex.fmd.file_uuid,
                            dex.fmd.bucket_name,
                            dex.fmd.object_name,
                        )
                    if dex:
                        data.append(dex)

            if self.has_project_db:
                uuid_name_dict = {}
                # now parse the project to search for node/project names
                try:
                    async with self.engine.acquire() as conn, conn.begin():
                        query = sa.select([projects]).where(
                            projects.c.uuid.in_(accesible_projects_ids))

                        async for row in conn.execute(query):
                            proj_data = dict(row.items())

                            uuid_name_dict[
                                proj_data["uuid"]] = proj_data["name"]
                            wb = proj_data["workbench"]
                            for node in wb.keys():
                                uuid_name_dict[node] = wb[node]["label"]
                except DBAPIError as _err:
                    logger.exception(
                        "Error querying database for project names")

                if not uuid_name_dict:
                    # there seems to be no project whatsoever for user_id
                    return []

                # only keep files from non-deleted project
                clean_data = deque()
                for dx in data:
                    d = dx.fmd
                    if d.project_id not in uuid_name_dict:
                        continue
                    #
                    # FIXME: artifically fills ['project_name', 'node_name', 'file_id', 'raw_file_path', 'display_file_path']
                    #        with information from the projects table!

                    d.project_name = uuid_name_dict[d.project_id]
                    if d.node_id in uuid_name_dict:
                        d.node_name = uuid_name_dict[d.node_id]

                    d.raw_file_path = str(
                        Path(d.project_id) / Path(d.node_id) /
                        Path(d.file_name))
                    d.display_file_path = d.raw_file_path
                    d.file_id = d.file_uuid
                    if d.node_name and d.project_name:
                        d.display_file_path = str(
                            Path(d.project_name) / Path(d.node_name) /
                            Path(d.file_name))
                        # once the data was sync to postgres metadata table at this point
                        clean_data.append(dx)

                data = clean_data

        elif location == DATCORE_STR:
            api_token, api_secret = self._get_datcore_tokens(user_id)
            assert self.app  # nosec
            assert api_secret  # nosec
            assert api_token  # nosec
            return await datcore_adapter.list_all_datasets_files_metadatas(
                self.app, api_token, api_secret)

        if uuid_filter:
            # TODO: incorporate this in db query!
            _query = re.compile(uuid_filter, re.IGNORECASE)
            filtered_data = deque()
            for dx in data:
                d = dx.fmd
                if _query.search(d.file_uuid):
                    filtered_data.append(dx)

            return list(filtered_data)

        if regex:
            _query = re.compile(regex, re.IGNORECASE)
            filtered_data = deque()
            for dx in data:
                d = dx.fmd
                _vars = vars(d)
                for v in _vars.keys():
                    if _query.search(v) or _query.search(str(_vars[v])):
                        filtered_data.append(dx)
                        break
            return list(filtered_data)

        return list(data)

    async def list_files_dataset(
            self, user_id: str, location: str, dataset_id: str
    ) -> Union[List[FileMetaData], List[FileMetaDataEx]]:
        # this is a cheap shot, needs fixing once storage/db is in sync
        data = []
        if location == SIMCORE_S3_STR:
            data: List[FileMetaDataEx] = await self.list_files(
                user_id, location, uuid_filter=dataset_id + "/")

        elif location == DATCORE_STR:
            api_token, api_secret = self._get_datcore_tokens(user_id)
            # lists all the files inside the dataset
            assert self.app  # nosec
            assert api_secret  # nosec
            assert api_token  # nosec
            return await datcore_adapter.list_all_files_metadatas_in_dataset(
                self.app, api_token, api_secret, dataset_id)

        return data

    async def list_datasets(self, user_id: str,
                            location: str) -> List[DatasetMetaData]:
        """Returns a list of top level datasets

        Works for simcore.s3 and datcore

        """
        data = []

        if location == SIMCORE_S3_STR:
            if self.has_project_db:
                try:
                    async with self.engine.acquire() as conn, conn.begin():
                        readable_projects_ids = await get_readable_project_ids(
                            conn, int(user_id))
                        has_read_access = projects.c.uuid.in_(
                            readable_projects_ids)

                        # FIXME: this DOES NOT read from file-metadata table!!!
                        query = sa.select([projects.c.uuid, projects.c.name
                                           ]).where(has_read_access)
                        async for row in conn.execute(query):
                            dmd = DatasetMetaData(
                                dataset_id=row.uuid,
                                display_name=row.name,
                            )
                            data.append(dmd)
                except DBAPIError as _err:
                    logger.exception(
                        "Error querying database for project names")

        elif location == DATCORE_STR:
            api_token, api_secret = self._get_datcore_tokens(user_id)
            assert self.app  # nosec
            assert api_secret  # nosec
            assert api_token  # nosec
            return await datcore_adapter.list_datasets(self.app, api_token,
                                                       api_secret)

        return data

    async def list_file(self, user_id: str, location: str,
                        file_uuid: str) -> Optional[FileMetaDataEx]:

        if location == SIMCORE_S3_STR:

            async with self.engine.acquire() as conn, conn.begin():
                can: Optional[AccessRights] = await get_file_access_rights(
                    conn, int(user_id), file_uuid)
                if can.read:
                    query = sa.select([
                        file_meta_data
                    ]).where(file_meta_data.c.file_uuid == file_uuid)
                    result = await conn.execute(query)
                    row = await result.first()
                    if not row:
                        return None
                    file_metadata = to_meta_data_extended(row)
                    if is_file_entry_valid(file_metadata.fmd):
                        return file_metadata
                    # we need to update from S3 here since the database is not up-to-date
                    file_metadata = await self.try_update_database_from_storage(
                        file_metadata.fmd.file_uuid,
                        file_metadata.fmd.bucket_name,
                        file_metadata.fmd.object_name,
                    )
                    return file_metadata
                # FIXME: returns None in both cases: file does not exist or use has no access
                logger.debug("User %s cannot read file %s", user_id, file_uuid)
                return None

        elif location == DATCORE_STR:
            # FIXME: review return inconsistencies
            # api_token, api_secret = self._get_datcore_tokens(user_id)
            import warnings

            warnings.warn("NOT IMPLEMENTED!!!")
            return None

    # UPLOAD/DOWNLOAD LINKS ---------------------------

    async def upload_file_to_datcore(self, _user_id: str,
                                     _local_file_path: str,
                                     _destination_id: str):
        import warnings

        warnings.warn(f"NOT IMPLEMENTED!!! in {self.__class__}")
        # uploads a locally available file to dat core given the storage path, optionally attached some meta data
        # api_token, api_secret = self._get_datcore_tokens(user_id)
        # await dcw.upload_file_to_id(destination_id, local_file_path)

    async def try_update_database_from_storage(
        self,
        file_uuid: str,
        bucket_name: str,
        object_name: str,
        silence_exception: bool = False,
    ) -> Optional[FileMetaDataEx]:
        try:
            async with self._create_aiobotocore_client_context(
            ) as aioboto_client:
                result = await aioboto_client.head_object(Bucket=bucket_name,
                                                          Key=object_name
                                                          )  # type: ignore

                file_size = result["ContentLength"]  # type: ignore
                last_modified = result["LastModified"]  # type: ignore
                entity_tag = result["ETag"].strip('"')  # type: ignore

                async with self.engine.acquire() as conn:
                    result: ResultProxy = await conn.execute(
                        file_meta_data.update().where(
                            file_meta_data.c.file_uuid == file_uuid).values(
                                file_size=file_size,
                                last_modified=last_modified,
                                entity_tag=entity_tag,
                            ).returning(literal_column("*")))
                    if not result:
                        return None
                    row: Optional[RowProxy] = await result.first()
                    if not row:
                        return None

                    return to_meta_data_extended(row)
        except botocore.exceptions.ClientError:
            if silence_exception:
                logger.debug("Error happened while trying to access %s",
                             file_uuid)
            else:
                logger.warning("Error happened while trying to access %s",
                               file_uuid,
                               exc_info=True)
            # the file is not existing or some error happened
            return None

    @retry(
        stop=stop_after_delay(1 * _HOUR),
        wait=wait_exponential(multiplier=0.1, exp_base=1.2, max=30),
        retry=(retry_if_exception_type()
               | retry_if_result(lambda result: result is None)),
        before_sleep=before_sleep_log(logger, logging.INFO),
    )
    async def auto_update_database_from_storage_task(self, file_uuid: str,
                                                     bucket_name: str,
                                                     object_name: str):
        return await self.try_update_database_from_storage(
            file_uuid, bucket_name, object_name, silence_exception=True)

    async def upload_link(self, user_id: str, file_uuid: str):
        """
        Creates pre-signed upload link and updates metadata table when
        link is used and upload is successfuly completed

        SEE _metadata_file_updater
        """

        async with self.engine.acquire() as conn:
            can: Optional[AccessRights] = await get_file_access_rights(
                conn, int(user_id), file_uuid)
            if not can.write:
                logger.debug("User %s was not allowed to upload file %s",
                             user_id, file_uuid)
                raise web.HTTPForbidden(
                    reason=
                    f"User does not have enough access rights to upload file {file_uuid}"
                )

        @retry(**postgres_service_retry_policy_kwargs)
        async def _init_metadata() -> Tuple[int, str]:
            async with self.engine.acquire() as conn:
                fmd = FileMetaData()
                fmd.simcore_from_uuid(file_uuid, self.simcore_bucket_name)
                fmd.user_id = user_id  # NOTE: takes ownership of uploaded data

                # if file already exists, we might want to update a time-stamp

                # upsert file_meta_data
                insert_stmt = pg_insert(file_meta_data).values(**vars(fmd))
                do_nothing_stmt = insert_stmt.on_conflict_do_nothing(
                    index_elements=["file_uuid"])
                await conn.execute(do_nothing_stmt)

                return fmd.file_size, fmd.last_modified

        await _init_metadata()

        bucket_name = self.simcore_bucket_name
        object_name = file_uuid

        # a parallel task is tarted which will update the metadata of the updated file
        # once the update has finished.
        fire_and_forget_task(
            self.auto_update_database_from_storage_task(
                file_uuid=file_uuid,
                bucket_name=bucket_name,
                object_name=object_name,
            ))
        return self.s3_client.create_presigned_put_url(bucket_name,
                                                       object_name)

    async def download_link_s3(self, file_uuid: str, user_id: int) -> str:

        # access layer
        async with self.engine.acquire() as conn:
            can: Optional[AccessRights] = await get_file_access_rights(
                conn, int(user_id), file_uuid)
            if not can.read:
                # NOTE: this is tricky. A user with read access can download and data!
                # If write permission would be required, then shared projects as views cannot
                # recover data in nodes (e.g. jupyter cannot pull work data)
                #
                logger.debug("User %s was not allowed to download file %s",
                             user_id, file_uuid)
                raise web.HTTPForbidden(
                    reason=
                    f"User does not have enough rights to download {file_uuid}"
                )

        bucket_name = self.simcore_bucket_name
        async with self.engine.acquire() as conn:
            stmt = sa.select([file_meta_data.c.object_name
                              ]).where(file_meta_data.c.file_uuid == file_uuid)
            object_name: Optional[str] = await conn.scalar(stmt)

            if object_name is None:
                raise web.HTTPNotFound(
                    reason=f"File '{file_uuid}' does not exists in storage.")

        link = self.s3_client.create_presigned_get_url(bucket_name,
                                                       object_name)
        return link

    async def download_link_datcore(self, user_id: str, file_id: str) -> URL:
        api_token, api_secret = self._get_datcore_tokens(user_id)
        assert self.app  # nosec
        assert api_secret  # nosec
        assert api_token  # nosec
        return await datcore_adapter.get_file_download_presigned_link(
            self.app, api_token, api_secret, file_id)

    # COPY -----------------------------

    async def copy_file_s3_s3(self, user_id: str, dest_uuid: str,
                              source_uuid: str):
        # FIXME: operation MUST be atomic

        # source is s3, location is s3
        to_bucket_name = self.simcore_bucket_name
        to_object_name = dest_uuid
        from_bucket = self.simcore_bucket_name
        from_object_name = source_uuid
        # FIXME: This is not async!
        self.s3_client.copy_object(to_bucket_name, to_object_name, from_bucket,
                                   from_object_name)

        # update db
        async with self.engine.acquire() as conn:
            fmd = FileMetaData()
            fmd.simcore_from_uuid(dest_uuid, self.simcore_bucket_name)
            fmd.user_id = user_id
            ins = file_meta_data.insert().values(**vars(fmd))
            await conn.execute(ins)

    async def copy_file_s3_datcore(self, user_id: str, dest_uuid: str,
                                   source_uuid: str):
        session = get_client_session(self.app)

        # source is s3, get link and copy to datcore
        bucket_name = self.simcore_bucket_name
        object_name = source_uuid
        filename = source_uuid.split("/")[-1]

        s3_dowload_link = self.s3_client.create_presigned_get_url(
            bucket_name, object_name)

        with tempfile.TemporaryDirectory() as tmpdir:
            # FIXME: connect download and upload streams
            local_file_path = os.path.join(tmpdir, filename)

            # Downloads S3 -> local
            await download_to_file_or_raise(session, s3_dowload_link,
                                            local_file_path)

            # Uploads local -> DATCore
            await self.upload_file_to_datcore(
                _user_id=user_id,
                _local_file_path=local_file_path,
                _destination_id=dest_uuid,
            )

    async def copy_file_datcore_s3(
        self,
        user_id: str,
        dest_uuid: str,
        source_uuid: str,
        filename_missing: bool = False,
    ):
        session = get_client_session(self.app)

        # 2 steps: Get download link for local copy, the upload link to s3
        # TODO: This should be a redirect stream!
        dc_link, filename = await self.download_link_datcore(
            user_id=user_id, file_id=source_uuid)
        if filename_missing:
            dest_uuid = str(Path(dest_uuid) / filename)

        s3_upload_link = await self.upload_link(user_id, dest_uuid)

        with tempfile.TemporaryDirectory() as tmpdir:
            # FIXME: connect download and upload streams

            local_file_path = os.path.join(tmpdir, filename)

            # Downloads DATCore -> local
            await download_to_file_or_raise(session, dc_link, local_file_path)

            # Uploads local -> S3
            s3_upload_link = URL(s3_upload_link)
            async with session.put(
                    s3_upload_link,
                    data=Path(local_file_path).open("rb"),
                    raise_for_status=True,
            ) as resp:
                logger.debug(
                    "Uploaded local -> SIMCore %s . Status %s",
                    s3_upload_link,
                    resp.status,
                )

        return dest_uuid

    async def copy_file(
        self,
        user_id: str,
        dest_location: str,
        dest_uuid: str,
        source_location: str,
        source_uuid: str,
    ):
        if source_location == SIMCORE_S3_STR:
            if dest_location == DATCORE_STR:
                await self.copy_file_s3_datcore(user_id, dest_uuid,
                                                source_uuid)
            elif dest_location == SIMCORE_S3_STR:
                await self.copy_file_s3_s3(user_id, dest_uuid, source_uuid)
        elif source_location == DATCORE_STR:
            if dest_location == DATCORE_STR:
                raise NotImplementedError(
                    "copy files from datcore 2 datcore not impl")
            if dest_location == SIMCORE_S3_STR:
                await self.copy_file_datcore_s3(user_id, dest_uuid,
                                                source_uuid)

    async def deep_copy_project_simcore_s3(
        self,
        user_id: str,
        source_project: Dict[str, Any],
        destination_project: Dict[str, Any],
        node_mapping: Dict[str, str],
    ):
        """Parses a given source project and copies all related files to the destination project

        Since all files are organized as

            project_id/node_id/filename or links to datcore

        this function creates a new folder structure

            project_id/node_id/filename

        and copies all files to the corresponding places.

        Additionally, all external files from datcore are being copied and the paths in the destination
        project are adapted accordingly

        Lastly, the meta data db is kept in sync
        """
        source_folder = source_project["uuid"]
        dest_folder = destination_project["uuid"]

        # access layer
        async with self.engine.acquire() as conn, conn.begin():
            source_access_rights = await get_project_access_rights(
                conn, int(user_id), project_id=source_folder)
            dest_access_rights = await get_project_access_rights(
                conn, int(user_id), project_id=dest_folder)
        if not source_access_rights.read:
            logger.debug(
                "User %s was not allowed to read from project %s",
                user_id,
                source_folder,
            )
            raise web.HTTPForbidden(
                reason=
                f"User does not have enough access rights to read from project '{source_folder}'"
            )

        if not dest_access_rights.write:
            logger.debug(
                "User %s was not allowed to write to project %s",
                user_id,
                dest_folder,
            )
            raise web.HTTPForbidden(
                reason=
                f"User does not have enough access rights to write to project '{dest_folder}'"
            )

        # build up naming map based on labels
        uuid_name_dict = {}
        uuid_name_dict[dest_folder] = destination_project["name"]
        for src_node_id, src_node in source_project["workbench"].items():
            new_node_id = node_mapping.get(src_node_id)
            if new_node_id is not None:
                uuid_name_dict[new_node_id] = src_node["label"]

        async with self._create_aiobotocore_client_context() as aioboto_client:

            logger.debug(
                "Listing all items under  %s:%s/",
                self.simcore_bucket_name,
                source_folder,
            )

            # Step 1: List all objects for this project replace them with the destination object name
            # and do a copy at the same time collect some names
            # Note: the / at the end of the Prefix is VERY important, makes the listing several order of magnitudes faster
            response = await aioboto_client.list_objects_v2(
                Bucket=self.simcore_bucket_name, Prefix=f"{source_folder}/")

            contents: List = response.get("Contents", [])
            logger.debug(
                "Listed  %s items under %s:%s/",
                len(contents),
                self.simcore_bucket_name,
                source_folder,
            )

            for item in contents:
                source_object_name = item["Key"]
                source_object_parts = Path(source_object_name).parts

                if len(source_object_parts) != 3:
                    # This may happen once we have shared/home folders
                    # FIXME: this might cause problems
                    logger.info(
                        "Skipping copy of '%s'. Expected three parts path!",
                        source_object_name,
                    )
                    continue

                old_node_id = source_object_parts[1]
                new_node_id = node_mapping.get(old_node_id)
                if new_node_id is not None:
                    old_filename = source_object_parts[2]
                    dest_object_name = str(
                        Path(dest_folder) / new_node_id / old_filename)

                    copy_kwargs = dict(
                        CopySource={
                            "Bucket": self.simcore_bucket_name,
                            "Key": source_object_name,
                        },
                        Bucket=self.simcore_bucket_name,
                        Key=dest_object_name,
                    )
                    logger.debug("Copying %s ...", copy_kwargs)

                    # FIXME: if 5GB, it must use multipart upload Upload Part - Copy API
                    # SEE https://botocore.amazonaws.com/v1/documentation/api/latest/reference/services/s3.html#S3.Client.copy_object
                    await aioboto_client.copy_object(**copy_kwargs)

        # Step 2: List all references in outputs that point to datcore and copy over
        for node_id, node in destination_project["workbench"].items():
            outputs: Dict = node.get("outputs", {})
            for _, output in outputs.items():
                source = output["path"]

                if output.get("store") == DATCORE_ID:
                    destination_folder = str(Path(dest_folder) / node_id)
                    logger.info("Copying %s to %s", source, destination_folder)

                    destination = await self.copy_file_datcore_s3(
                        user_id=user_id,
                        dest_uuid=destination_folder,
                        source_uuid=source,
                        filename_missing=True,
                    )
                    assert destination.startswith(destination_folder)  # nosec

                    output["store"] = SIMCORE_S3_ID
                    output["path"] = destination

                elif output.get("store") == SIMCORE_S3_ID:
                    destination = str(
                        Path(dest_folder) / node_id / Path(source).name)
                    output["store"] = SIMCORE_S3_ID
                    output["path"] = destination

        fmds = []
        async with self._create_aiobotocore_client_context() as aioboto_client:

            # step 3: list files first to create fmds
            # Note: the / at the end of the Prefix is VERY important, makes the listing several order of magnitudes faster
            response = await aioboto_client.list_objects_v2(
                Bucket=self.simcore_bucket_name, Prefix=f"{dest_folder}/")

            if "Contents" in response:
                for item in response["Contents"]:
                    fmd = FileMetaData()
                    fmd.simcore_from_uuid(item["Key"],
                                          self.simcore_bucket_name)
                    fmd.project_name = uuid_name_dict.get(
                        dest_folder, "Untitled")
                    fmd.node_name = uuid_name_dict.get(fmd.node_id, "Untitled")
                    fmd.raw_file_path = fmd.file_uuid
                    fmd.display_file_path = str(
                        Path(fmd.project_name) / fmd.node_name / fmd.file_name)
                    fmd.user_id = user_id
                    fmd.file_size = item["Size"]
                    fmd.last_modified = str(item["LastModified"])
                    fmds.append(fmd)

        # step 4 sync db
        async with self.engine.acquire() as conn, conn.begin():
            # TODO: upsert in one statment of ALL
            for fmd in fmds:
                query = sa.select([
                    file_meta_data
                ]).where(file_meta_data.c.file_uuid == fmd.file_uuid)
                # if file already exists, we might w
                rows = await conn.execute(query)
                exists = await rows.scalar()
                if exists:
                    delete_me = file_meta_data.delete().where(
                        file_meta_data.c.file_uuid == fmd.file_uuid)
                    await conn.execute(delete_me)
                ins = file_meta_data.insert().values(**vars(fmd))
                await conn.execute(ins)

    # DELETE -------------------------------------

    async def delete_file(self, user_id: str, location: str, file_uuid: str):
        """Deletes a file given its fmd and location

        Additionally requires a user_id for 3rd party auth

        For internal storage, the db state should be updated upon completion via
        Notification mechanism

        For simcore.s3 we can use the file_name
        For datcore we need the full path
        """
        if location == SIMCORE_S3_STR:
            # FIXME: operation MUST be atomic, transaction??

            to_delete = []
            async with self.engine.acquire() as conn, conn.begin():
                can: Optional[AccessRights] = await get_file_access_rights(
                    conn, int(user_id), file_uuid)
                if not can.delete:
                    logger.debug(
                        "User %s was not allowed to delete file %s",
                        user_id,
                        file_uuid,
                    )
                    raise web.HTTPForbidden(
                        reason=
                        f"User '{user_id}' does not have enough access rights to delete file {file_uuid}"
                    )

                query = sa.select([
                    file_meta_data.c.bucket_name, file_meta_data.c.object_name
                ]).where(file_meta_data.c.file_uuid == file_uuid)

                async for row in conn.execute(query):
                    if self.s3_client.remove_objects(row.bucket_name,
                                                     [row.object_name]):
                        to_delete.append(file_uuid)

                await conn.execute(file_meta_data.delete().where(
                    file_meta_data.c.file_uuid.in_(to_delete)))

        elif location == DATCORE_STR:
            # FIXME: review return inconsistencies
            api_token, api_secret = self._get_datcore_tokens(user_id)
            assert self.app  # nosec
            assert api_secret  # nosec
            assert api_token  # nosec
            await datcore_adapter.delete_file(self.app, api_token, api_secret,
                                              file_uuid)

    async def delete_project_simcore_s3(
            self,
            user_id: str,
            project_id: str,
            node_id: Optional[str] = None) -> Optional[web.Response]:
        """Deletes all files from a given node in a project in simcore.s3 and updated db accordingly.
        If node_id is not given, then all the project files db entries are deleted.
        """

        # FIXME: operation MUST be atomic. Mark for deletion and remove from db when deletion fully confirmed
        async with self.engine.acquire() as conn, conn.begin():
            # access layer
            can: Optional[AccessRights] = await get_project_access_rights(
                conn, int(user_id), project_id)
            if not can.delete:
                logger.debug(
                    "User %s was not allowed to delete project %s",
                    user_id,
                    project_id,
                )
                raise web.HTTPForbidden(
                    reason=f"User does not have delete access for {project_id}"
                )

            delete_me = file_meta_data.delete().where(
                file_meta_data.c.project_id == project_id, )
            if node_id:
                delete_me = delete_me.where(
                    file_meta_data.c.node_id == node_id)
            await conn.execute(delete_me)

        async with self._create_aiobotocore_client_context() as aioboto_client:
            # Note: the / at the end of the Prefix is VERY important, makes the listing several order of magnitudes faster
            response = await aioboto_client.list_objects_v2(
                Bucket=self.simcore_bucket_name,
                Prefix=f"{project_id}/{node_id}/"
                if node_id else f"{project_id}/",
            )

            objects_to_delete = []
            for f in response.get("Contents", []):
                objects_to_delete.append({"Key": f["Key"]})

            if objects_to_delete:
                response = await aioboto_client.delete_objects(
                    Bucket=self.simcore_bucket_name,
                    Delete={"Objects": objects_to_delete},
                )
                return response

    # SEARCH -------------------------------------

    async def search_files_starting_with(self, user_id: int,
                                         prefix: str) -> List[FileMetaDataEx]:
        # Avoids using list_files since it accounts for projects/nodes
        # Storage should know NOTHING about those concepts
        files_meta = deque()

        async with self.engine.acquire() as conn, conn.begin():
            # access layer
            can_read_projects_ids = await get_readable_project_ids(
                conn, int(user_id))
            has_read_access = (
                file_meta_data.c.user_id == str(user_id)
            ) | file_meta_data.c.project_id.in_(can_read_projects_ids)

            stmt = sa.select([file_meta_data]).where(
                file_meta_data.c.file_uuid.startswith(prefix)
                & has_read_access)

            async for row in conn.execute(stmt):
                meta_extended = to_meta_data_extended(row)
                files_meta.append(meta_extended)

        return list(files_meta)

    async def create_soft_link(self, user_id: int, target_uuid: str,
                               link_uuid: str) -> FileMetaDataEx:

        # validate link_uuid
        async with self.engine.acquire() as conn:
            # TODO: select exists(select 1 from file_metadat where file_uuid=12)
            found = await conn.scalar(
                sa.select([file_meta_data.c.file_uuid
                           ]).where(file_meta_data.c.file_uuid == link_uuid))
            if found:
                raise ValueError(
                    f"Invalid link {link_uuid}. Link already exists")

        # validate target_uuid
        target = await self.list_file(str(user_id), SIMCORE_S3_STR,
                                      target_uuid)
        if not target:
            raise ValueError(
                f"Invalid target '{target_uuid}'. File does not exists for this user"
            )

        # duplicate target and change the following columns:
        target.fmd.file_uuid = link_uuid
        target.fmd.file_id = link_uuid  # NOTE: api-server relies on this id
        target.fmd.is_soft_link = True

        async with self.engine.acquire() as conn:
            stmt = (file_meta_data.insert().values(
                **attr.asdict(target.fmd)).returning(literal_column("*")))

            result = await conn.execute(stmt)
            link = to_meta_data_extended(await result.first())
            return link

    async def synchronise_meta_data_table(self, location: str,
                                          dry_run: bool) -> Dict[str, Any]:

        PRUNE_CHUNK_SIZE = 20

        removed: List[str] = []
        to_remove: List[str] = []

        async def _prune_db_table(conn):
            if not dry_run:
                await conn.execute(file_meta_data.delete().where(
                    file_meta_data.c.object_name.in_(to_remove)))
            logger.info(
                "%s %s orphan items",
                "Would have deleted" if dry_run else "Deleted",
                len(to_remove),
            )
            removed.extend(to_remove)
            to_remove.clear()

        # ----------

        assert (  # nosec
            location == SIMCORE_S3_STR
        ), "Only with s3, no other sync implemented"  # nosec

        if location == SIMCORE_S3_STR:

            # NOTE: only valid for simcore, since datcore data is not in the database table
            # let's get all the files in the table
            logger.warning(
                "synchronisation of database/s3 storage started, this will take some time..."
            )

            async with self.engine.acquire(
            ) as conn, self._create_aiobotocore_client_context(
            ) as aioboto_client:

                number_of_rows_in_db = (await conn.scalar(
                    sa.select([sa.func.count()]).select_from(file_meta_data))
                                        or 0)
                logger.warning(
                    "Total number of entries to check %d",
                    number_of_rows_in_db,
                )

                assert isinstance(aioboto_client, AioBaseClient)  # nosec

                async for row in conn.execute(
                        sa.select([file_meta_data.c.object_name])):
                    s3_key = row.object_name  # type: ignore

                    # now check if the file exists in S3
                    # SEE https://www.peterbe.com/plog/fastest-way-to-find-out-if-a-file-exists-in-s3
                    response = await aioboto_client.list_objects_v2(
                        Bucket=self.simcore_bucket_name, Prefix=s3_key)
                    if response.get("KeyCount", 0) == 0:
                        # this file does not exist in S3
                        to_remove.append(s3_key)

                    if len(to_remove) >= PRUNE_CHUNK_SIZE:
                        await _prune_db_table(conn)

                if to_remove:
                    await _prune_db_table(conn)

                assert len(to_remove) == 0  # nosec
                assert len(removed) <= number_of_rows_in_db  # nosec

                logger.info(
                    "%s %d entries ",
                    "Would delete" if dry_run else "Deleting",
                    len(removed),
                )

        return {"removed": removed}
예제 #8
0
    async def action(cls, app: FastAPI, scheduler_data: SchedulerData) -> None:
        # invoke container cleanup at this point
        dynamic_sidecar_client: DynamicSidecarClient = get_dynamic_sidecar_client(app)

        try:
            await dynamic_sidecar_client.begin_service_destruction(
                dynamic_sidecar_endpoint=scheduler_data.dynamic_sidecar.endpoint
            )
        except Exception as e:  # pylint: disable=broad-except
            logger.warning(
                "Could not contact dynamic-sidecar to begin destruction of %s\n%s",
                scheduler_data.service_name,
                str(e),
            )

        app_settings: AppSettings = app.state.settings
        dynamic_sidecar_settings: DynamicSidecarSettings = (
            app_settings.DYNAMIC_SERVICES.DYNAMIC_SIDECAR
        )

        if scheduler_data.dynamic_sidecar.service_removal_state.can_save:
            dynamic_sidecar_client = get_dynamic_sidecar_client(app)
            dynamic_sidecar_endpoint = scheduler_data.dynamic_sidecar.endpoint

            logger.info(
                "Calling into dynamic-sidecar to save state and pushing data to nodeports"
            )
            try:
                tasks = [
                    dynamic_sidecar_client.service_push_output_ports(
                        dynamic_sidecar_endpoint,
                    )
                ]
                # When enabled no longer uploads state via nodeports
                # S3 is used to store state paths
                if not app_settings.DIRECTOR_V2_DEV_FEATURES_ENABLED:
                    tasks.append(
                        dynamic_sidecar_client.service_save_state(
                            dynamic_sidecar_endpoint,
                        )
                    )
                await logged_gather(*tasks)
                logger.info("Ports data pushed by dynamic-sidecar")
            except Exception as e:  # pylint: disable=broad-except
                logger.warning(
                    (
                        "Could not contact dynamic-sidecar to save service "
                        "state and upload outputs %s\n%s"
                    ),
                    scheduler_data.service_name,
                    str(e),
                )

        # remove the 2 services
        await remove_dynamic_sidecar_stack(
            node_uuid=scheduler_data.node_uuid,
            dynamic_sidecar_settings=dynamic_sidecar_settings,
        )
        # remove network
        await remove_dynamic_sidecar_network(
            scheduler_data.dynamic_sidecar_network_name
        )

        # remove created inputs and outputs volumes
        async for attempt in AsyncRetrying(
            wait=wait_exponential(min=1),
            stop=stop_after_delay(20),
            retry_error_cls=GenericDockerError,
        ):
            with attempt:
                logger.info(
                    "Trying to remove volumes for %s", scheduler_data.service_name
                )
                await remove_dynamic_sidecar_volumes(scheduler_data.node_uuid)

        logger.debug(
            "Removed dynamic-sidecar created services for '%s'",
            scheduler_data.service_name,
        )

        await app.state.dynamic_sidecar_scheduler.finish_service_removal(
            scheduler_data.node_uuid
        )

        scheduler_data.dynamic_sidecar.service_removal_state.mark_removed()
예제 #9
0
class SnowflakeAdapter(BaseSourceAdapter):
    """The Snowflake Data Warehouse source adapter.

    Args:
        preserve_case: By default the adapter folds case-insensitive strings to lowercase.
                       If preserve_case is True,SnowShu will __not__ alter cases (dangerous!).
    """

    name = 'snowflake'
    SUPPORTS_CROSS_DATABASE = True
    SUPPORTED_FUNCTIONS = set(['ANY_VALUE', 'RLIKE', 'UUID_STRING'])
    SUPPORTED_SAMPLE_METHODS = (BernoulliSampleMethod, )
    REQUIRED_CREDENTIALS = (
        USER,
        PASSWORD,
        ACCOUNT,
        DATABASE,
    )
    ALLOWED_CREDENTIALS = (
        SCHEMA,
        WAREHOUSE,
        ROLE,
    )
    # snowflake in-db is UPPER, but connector is actually lower :(
    DEFAULT_CASE = 'lower'

    DATA_TYPE_MAPPINGS = {
        "array": dtypes.JSON,
        "bigint": dtypes.BIGINT,
        "binary": dtypes.BINARY,
        "boolean": dtypes.BOOLEAN,
        "char": dtypes.CHAR,
        "character": dtypes.CHAR,
        "date": dtypes.DATE,
        "datetime": dtypes.DATETIME,
        "decimal": dtypes.DECIMAL,
        "double": dtypes.FLOAT,
        "double precision": dtypes.FLOAT,
        "float": dtypes.FLOAT,
        "float4": dtypes.FLOAT,
        "float8": dtypes.FLOAT,
        "int": dtypes.BIGINT,
        "integer": dtypes.BIGINT,
        "number": dtypes.BIGINT,
        "numeric": dtypes.NUMERIC,
        "object": dtypes.JSON,
        "real": dtypes.FLOAT,
        "smallint": dtypes.BIGINT,
        "string": dtypes.VARCHAR,
        "text": dtypes.VARCHAR,
        "time": dtypes.TIME,
        "timestamp": dtypes.TIMESTAMP_NTZ,
        "timestamp_ntz": dtypes.TIMESTAMP_NTZ,
        "timestamp_ltz": dtypes.TIMESTAMP_TZ,
        "timestamp_tz": dtypes.TIMESTAMP_TZ,
        "varbinary": dtypes.BINARY,
        "varchar": dtypes.VARCHAR,
        "variant": dtypes.JSON
    }

    MATERIALIZATION_MAPPINGS = {"BASE TABLE": mz.TABLE, "VIEW": mz.VIEW}

    @overrides
    def _get_all_databases(self) -> List[str]:
        """ Use the SHOW api to get all the available db structures."""
        logger.debug('Collecting databases from snowflake...')
        show_result = tuple(
            self._safe_query("SHOW TERSE DATABASES")['name'].tolist())
        databases = list(set(show_result))
        logger.debug(f'Done. Found {len(databases)} databases.')
        return databases

    @overrides
    def _get_all_schemas(
            self,
            database: str,
            exclude_defaults: Optional[bool] = False) -> List[str]:
        logger.debug(f'Collecting schemas from {database} in snowflake...')
        show_result = self._safe_query(
            f'SHOW TERSE SCHEMAS IN DATABASE {database}')['name'].tolist()
        schemas = set(show_result)
        logger.debug(
            f'Done. Found {len(schemas)} schemas in {database} database.')
        return schemas

    @staticmethod
    def population_count_statement(relation: Relation) -> str:
        """creates the count * statement for a relation

        Args:
            relation: the :class:`Relation <snowshu.core.models.relation.Relation>` to create the statement for.
        Returns:
            a query that results in a single row, single column, integer value of the unsampled relation population size
        """
        return f"SELECT COUNT(*) FROM {relation.quoted_dot_notation}"

    @staticmethod
    def view_creation_statement(relation: Relation) -> str:
        return f"""
SELECT
SUBSTRING(GET_DDL('view','{relation.quoted_dot_notation}'),
POSITION(' AS ' IN UPPER(GET_DDL('view','{relation.quoted_dot_notation}')))+3)
"""

    @staticmethod
    def unsampled_statement(relation: Relation) -> str:
        return f"""
SELECT
    *
FROM
    {relation.quoted_dot_notation}
"""

    def directionally_wrap_statement(
            self, sql: str, relation: Relation,
            sample_type: Optional['BaseSampleMethod']) -> str:
        if sample_type is None:
            return sql

        return f"""
WITH
{relation.scoped_cte('SNOWSHU_FINAL_SAMPLE')} AS (
{sql}
)
,{relation.scoped_cte('SNOWSHU_DIRECTIONAL_SAMPLE')} AS (
SELECT
    *
FROM
{relation.scoped_cte('SNOWSHU_FINAL_SAMPLE')}
{self._sample_type_to_query_sql(sample_type)}
)
SELECT
    *
FROM
{relation.scoped_cte('SNOWSHU_DIRECTIONAL_SAMPLE')}
"""

    @staticmethod
    def analyze_wrap_statement(sql: str, relation: Relation) -> str:
        return f"""
WITH
    {relation.scoped_cte('SNOWSHU_COUNT_POPULATION')} AS (
SELECT
    COUNT(*) AS population_size
FROM
    {relation.quoted_dot_notation}
)
,{relation.scoped_cte('SNOWSHU_CORE_SAMPLE')} AS (
{sql}
)
,{relation.scoped_cte('SNOWSHU_CORE_SAMPLE_COUNT')} AS (
SELECT
    COUNT(*) AS sample_size
FROM
    {relation.scoped_cte('SNOWSHU_CORE_SAMPLE')}
)
SELECT
    s.sample_size AS sample_size
    ,p.population_size AS population_size
FROM
    {relation.scoped_cte('SNOWSHU_CORE_SAMPLE_COUNT')} s
INNER JOIN
    {relation.scoped_cte('SNOWSHU_COUNT_POPULATION')} p
ON
    1=1
LIMIT 1
"""

    def sample_statement_from_relation(
            self, relation: Relation, sample_type: Union['BaseSampleMethod',
                                                         None]) -> str:
        """builds the base sample statment for a given relation."""
        query = f"""
SELECT
    *
FROM
    {relation.quoted_dot_notation}
"""
        if sample_type is not None:
            query += f"{self._sample_type_to_query_sql(sample_type)}"
        return query

    @staticmethod
    def union_constraint_statement(subject: Relation, constraint: Relation,
                                   subject_key: str, constraint_key: str,
                                   max_number_of_outliers: int) -> str:
        """ Union statements to select outliers. This does not pull in NULL values. """
        return f"""
(SELECT
    *
FROM
{subject.quoted_dot_notation}
WHERE
    {subject_key}
NOT IN
(SELECT
    {constraint_key}
FROM
{constraint.quoted_dot_notation})
LIMIT {max_number_of_outliers})
"""

    @staticmethod
    def upstream_constraint_statement(relation: Relation, local_key: str,
                                      remote_key: str) -> str:
        """ builds upstream where constraints against downstream full population"""
        return f" {local_key} in (SELECT {remote_key} FROM {relation.quoted_dot_notation})"

    @staticmethod
    def predicate_constraint_statement(relation: Relation, analyze: bool,
                                       local_key: str, remote_key: str) -> str:
        """builds 'where' strings"""
        constraint_sql = str()
        if analyze:
            constraint_sql = f" SELECT {remote_key} AS {local_key} FROM ({relation.core_query})"
        else:

            def quoted(val: Any) -> str:
                return f"'{val}'" if relation.lookup_attribute(
                    remote_key).data_type.requires_quotes else str(val)

            try:
                constraint_set = [
                    quoted(val) for val in relation.data[remote_key].unique()
                ]
                constraint_sql = ','.join(constraint_set)
            except KeyError as err:
                logger.critical(
                    f'failed to build predicates for {relation.dot_notation}: '
                    f'remote key {remote_key} not in dataframe columns ({relation.data.columns})'
                )
                raise err

        return f"{local_key} IN ({constraint_sql}) "

    @staticmethod
    def polymorphic_constraint_statement(
            relation: Relation,  # noqa pylint: disable=too-many-arguments
            analyze: bool,
            local_key: str,
            remote_key: str,
            local_type: str,
            local_type_match_val: str = None) -> str:
        predicate = SnowflakeAdapter.predicate_constraint_statement(
            relation, analyze, local_key, remote_key)
        if local_type_match_val:
            type_match_val = local_type_match_val
        else:
            type_match_val = relation.name[:-1] if relation.name[-1].lower(
            ) == 's' else relation.name
        return f" ({predicate} AND LOWER({local_type}) = LOWER('{type_match_val}') ) "

    @staticmethod
    def _sample_type_to_query_sql(sample_type: 'BaseSampleMethod') -> str:
        if sample_type.name == 'BERNOULLI':
            qualifier = sample_type.probability if sample_type.probability\
                else str(sample_type.rows) + ' ROWS'
            return f"SAMPLE BERNOULLI ({qualifier})"
        if sample_type.name == 'SYSTEM':
            return f"SAMPLE SYSTEM ({sample_type.probability})"

        message = f"{sample_type.name} is not supported for SnowflakeAdapter"
        logger.error(message)
        raise NotImplementedError(message)

    # TODO: change arg name in parent to the fix issue here
    @overrides
    def _build_conn_string(self, overrides: Optional[dict] = None) -> str:  # noqa pylint: disable=redefined-outer-name
        """overrides the base conn string."""
        conn_parts = [
            f"snowflake://{self.credentials.user}:{self.credentials.password}"
            f"@{self.credentials.account}/{self.credentials.database}/"
        ]
        conn_parts.append(self.credentials.schema if self.credentials.
                          schema is not None else '')
        get_args = list()
        for arg in (
                'warehouse',
                'role',
        ):
            if self.credentials.__dict__[arg] is not None:
                get_args.append(f"{arg}={self.credentials.__dict__[arg]}")

        get_string = "?" + "&".join(get_args)
        return (''.join(conn_parts)) + get_string

    @overrides
    def _get_relations_from_database(
            self,
            schema_obj: BaseSourceAdapter._DatabaseObject) -> List[Relation]:
        quoted_database = schema_obj.full_relation.quoted(
            schema_obj.full_relation.database)  # quoted db name
        relation_database = schema_obj.full_relation.database  # case corrected db name
        case_sensitive_schema = schema_obj.case_sensitive_name  # case sensitive schame name
        relations_sql = f"""
                                 SELECT
                                    m.table_schema AS schema,
                                    m.table_name AS relation,
                                    m.table_type AS materialization,
                                    c.column_name AS attribute,
                                    c.ordinal_position AS ordinal,
                                    c.data_type AS data_type
                                 FROM
                                    {quoted_database}.INFORMATION_SCHEMA.TABLES m
                                 INNER JOIN
                                    {quoted_database}.INFORMATION_SCHEMA.COLUMNS c
                                 ON
                                    c.table_schema = m.table_schema
                                 AND
                                    c.table_name = m.table_name
                                 WHERE
                                    m.table_schema = '{case_sensitive_schema}'
                                    AND m.table_schema <> 'INFORMATION_SCHEMA'
                              """

        logger.debug(
            f'Collecting detailed relations from database {quoted_database}...'
        )
        relations_frame = self._safe_query(relations_sql)
        unique_relations = (relations_frame['schema'] + '.' +
                            relations_frame['relation']).unique().tolist()
        logger.debug(
            f'Done collecting relations. Found a total of {len(unique_relations)} '
            f'unique relations in database {quoted_database}')
        relations = list()
        for relation in unique_relations:
            logger.debug(
                f'Building relation { quoted_database + "." + relation }...')
            attributes = list()

            for attribute in relations_frame.loc[(
                    relations_frame['schema'] + '.' +
                    relations_frame['relation']) == relation].itertuples():
                logger.debug(
                    f'adding attribute {attribute.attribute} to relation..')
                attributes.append(
                    Attribute(self._correct_case(attribute.attribute),
                              self._get_data_type(attribute.data_type)))

            relation = Relation(
                relation_database,
                self._correct_case(attribute.schema),  # noqa pylint: disable=undefined-loop-variable
                self._correct_case(attribute.relation),  # noqa pylint: disable=undefined-loop-variable
                self.MATERIALIZATION_MAPPINGS[attribute.materialization],  # noqa pylint: disable=undefined-loop-variable
                attributes)
            logger.debug(f'Added relation {relation.dot_notation} to pool.')
            relations.append(relation)

        logger.debug(
            f'Acquired {len(relations)} total relations from database {quoted_database}.'
        )
        return relations

    @overrides
    def _count_query(self, query: str) -> int:
        count_sql = f"WITH __SNOWSHU__COUNTABLE__QUERY as ({query}) \
                    SELECT COUNT(*) AS count FROM __SNOWSHU__COUNTABLE__QUERY"

        count = int(self._safe_query(count_sql).iloc[0]['count'])
        return count

    @tenacity.retry(wait=wait_exponential(),
                    stop=stop_after_attempt(4),
                    before_sleep=Logger().log_retries,
                    reraise=True)
    @overrides
    def check_count_and_query(self, query: str, max_count: int,
                              unsampled: bool) -> pd.DataFrame:
        """checks the count, if count passes returns results as a dataframe."""
        try:
            logger.debug('Checking count for query...')
            start_time = time.time()
            count = self._count_query(query)
            if unsampled and count > max_count:
                warn_msg = (
                    f'Unsampled relation has {count} rows which is over '
                    f'the max allowed rows for this type of query ({max_count}). '
                    f'All records will be loaded into replica.')
                logger.warning(warn_msg)
            else:
                assert count <= max_count
            logger.debug(
                f'Query count safe at {count} rows in {time.time()-start_time} seconds.'
            )
        except AssertionError:
            message = (
                f'failed to execute query, result would have returned {count} rows '
                f'but the max allowed rows for this type of query is {max_count}.'
            )
            logger.error(message)
            logger.debug(f'failed sql: {query}')
            raise TooManyRecords(message)
        response = self._safe_query(query)
        return response

    @overrides
    def get_connection(
        self,
        database_override: Optional[str] = None,
        schema_override: Optional[str] = None
    ) -> sqlalchemy.engine.base.Engine:
        """Creates a connection engine without transactions.

        By default uses the instance credentials unless database or
        schema override are provided.
        """
        if not self._credentials:
            raise KeyError(
                'Adapter.get_connection called before setting Adapter.credentials'
            )

        logger.debug(f'Aquiring {self.CLASSNAME} connection...')
        overrides = dict(  # noqa pylint: disable=redefined-outer-name 
            (k, v) for (k, v) in dict(database=database_override,
                                      schema=schema_override).items()
            if v is not None)

        engine = sqlalchemy.create_engine(self._build_conn_string(overrides),
                                          poolclass=NullPool)
        logger.debug(f'engine aquired. Conn string: {repr(engine.url)}')
        return engine
예제 #10
0
    async def action(cls, app: FastAPI, scheduler_data: SchedulerData) -> None:
        # invoke container cleanup at this point
        dynamic_sidecar_client: DynamicSidecarClient = get_dynamic_sidecar_client(
            app)

        try:
            await dynamic_sidecar_client.begin_service_destruction(
                dynamic_sidecar_endpoint=scheduler_data.dynamic_sidecar.
                endpoint)
        except Exception as e:  # pylint: disable=broad-except
            logger.warning(
                "Could not contact dynamic-sidecar to begin destruction of %s\n%s",
                scheduler_data.service_name,
                str(e),
            )

        app_settings: AppSettings = app.state.settings
        dynamic_sidecar_settings: DynamicSidecarSettings = (
            app_settings.DYNAMIC_SERVICES.DYNAMIC_SIDECAR)

        if scheduler_data.dynamic_sidecar.service_removal_state.can_save:
            dynamic_sidecar_client = get_dynamic_sidecar_client(app)
            dynamic_sidecar_endpoint = scheduler_data.dynamic_sidecar.endpoint

            logger.info(
                "Calling into dynamic-sidecar to save state and pushing data to nodeports"
            )
            try:
                tasks = [
                    dynamic_sidecar_client.service_push_output_ports(
                        dynamic_sidecar_endpoint, )
                ]
                # When enabled no longer uploads state via nodeports
                # S3 is used to store state paths
                if not app_settings.DIRECTOR_V2_DEV_FEATURES_ENABLED:
                    tasks.append(
                        dynamic_sidecar_client.service_save_state(
                            dynamic_sidecar_endpoint, ))
                await logged_gather(*tasks)
                logger.info("Ports data pushed by dynamic-sidecar")
            except Exception as e:  # pylint: disable=broad-except
                logger.warning(
                    ("Could not contact dynamic-sidecar to save service "
                     "state and upload outputs %s\n%s"),
                    scheduler_data.service_name,
                    str(e),
                )

        # remove the 2 services
        await remove_dynamic_sidecar_stack(
            node_uuid=scheduler_data.node_uuid,
            dynamic_sidecar_settings=dynamic_sidecar_settings,
        )
        # remove network
        await remove_dynamic_sidecar_network(
            scheduler_data.dynamic_sidecar_network_name)

        # remove created inputs and outputs volumes

        # compute which volumes we expected to be removed
        # in case the expected volumes differ from the removed ones
        # show an error
        compose_namespace = get_compose_namespace(scheduler_data.node_uuid)
        expected_volumes_to_remove: Set[str] = {
            DynamicSidecarVolumesPathsResolver.source(
                compose_namespace=compose_namespace, path=path)
            for path in [
                scheduler_data.paths_mapping.inputs_path,
                scheduler_data.paths_mapping.outputs_path,
            ] + scheduler_data.paths_mapping.state_paths
        }

        async for attempt in AsyncRetrying(
                wait=wait_exponential(min=1),
                stop=stop_after_delay(20),
                retry_error_cls=GenericDockerError,
        ):
            with attempt:
                logger.info("Trying to remove volumes for %s",
                            scheduler_data.service_name)

                removed_volumes = await remove_dynamic_sidecar_volumes(
                    scheduler_data.node_uuid)

                if expected_volumes_to_remove != removed_volumes:
                    logger.warning(
                        ("Attention expected to remove %s, instead only removed %s. "
                         "Please check with check that all expected to remove volumes "
                         "are now gone."),
                        expected_volumes_to_remove,
                        removed_volumes,
                    )

        logger.debug(
            "Removed dynamic-sidecar created services for '%s'",
            scheduler_data.service_name,
        )

        await app.state.dynamic_sidecar_scheduler.finish_service_removal(
            scheduler_data.node_uuid)

        scheduler_data.dynamic_sidecar.service_removal_state.mark_removed()
예제 #11
0
class ExperimentalSpotifyPodcastAPI:
    """Representation of the experimental Spotify podcast API."""

    def __init__(self):
        self._bearer: Optional[str] = None
        self._bearer_expires: Optional[dt.datetime] = None
        self._auth_lock = RLock()

    @retry(wait=wait_exponential(), stop=stop_after_attempt(7))
    def _authenticate(self):
        """
        Retrieves a Bearer token for the experimental Spotify API, valid 1 hour.

        Generally follows the steps outlined here:
        https://developer.spotify.com/documentation/general/guides/authorization/code-flow/
        (with a few exceptions)
        """

        with self._auth_lock:
            logger.info("Retrieving Bearer for experimental Spotify API...")

            logger.debug("Generating secrets")

            state = random_string(32)

            code_verifier = random_string(64)
            code_challenge = base64.b64encode(
                hashlib.sha256(code_verifier.encode("utf-8")).digest()
            ).decode("utf-8")

            # Fix up format of code_challenge for spotify
            code_challenge = re.sub(r"=+$", "", code_challenge)
            code_challenge = code_challenge.replace("/", "_")
            code_challenge = code_challenge.replace("+", "-")

            logger.trace("state = {}", state)
            logger.trace("code_verifier = {}", code_verifier)
            logger.trace("code_challenge = {}", code_challenge)

            logger.debug("Requesting User Authorization")
            response = requests.get(
                "https://accounts.spotify.com/oauth2/v2/auth",
                params={
                    "response_type": "code",
                    "client_id": CLIENT_ID,
                    "scope": "streaming ugc-image-upload user-read-email user-read-private",
                    "redirect_uri": "https://podcasters.spotify.com",
                    "code_challenge": code_challenge,
                    "code_challenge_method": "S256",
                    "state": state,
                    "response_mode": "web_message",  # TODO: Figure out if there is a way to get pure JSON
                    "prompt": "none",
                },
                cookies={
                    "sp_dc": SP_DC,
                    "sp_key": SP_KEY,
                },
            )
            response.raise_for_status()

            # We get some weird HTML here that contains some JS
            html = response.text

            match = re.search(r"const authorizationResponse = (.*?);", html, re.DOTALL)
            json_str = match.group(1)

            # The extracted string isn't strictly valid JSON due to some missing quotes,
            # but PyYAML loads it fine
            auth_response = yaml.safe_load(json_str)

            # Confirm that auth was successful
            assert auth_response["type"] == "authorization_response"
            assert auth_response["response"]["state"] == state

            auth_code = auth_response["response"]["code"]

            logger.trace("auth_code = {}", auth_code)

            logger.debug("Requesting Bearer Token")
            response = requests.post(
                "https://accounts.spotify.com/api/token",
                data={
                    "grant_type": "authorization_code",
                    "client_id": CLIENT_ID,
                    "code": auth_code,
                    "redirect_uri": "https://podcasters.spotify.com",
                    "code_verifier": code_verifier,
                },
            )
            response.raise_for_status()

            response_json = response.json()

            self._bearer = response_json["access_token"]
            expires_in = response_json["expires_in"]
            self._bearer_expires = dt.datetime.now() + dt.timedelta(seconds=expires_in)

            logger.trace("bearer = {}", self._bearer)

            logger.success("Bearer token retrieved!")

    def _ensure_auth(self):
        """Checks if Bearer token expires soon. If so, requests a new one."""

        with self._auth_lock:
            if self._bearer is None or self._bearer_expires < (
                dt.datetime.now() - dt.timedelta(minutes=5)
            ):
                self._authenticate()

    @staticmethod
    def _build_url(*path: str) -> str:
        return f"{BASE_URL}{'/'.join(path)}"

    @staticmethod
    def _date_params(start: dt.date, end: dt.date) -> Dict[str, str]:
        return {
            "start": start.isoformat(),
            "end": end.isoformat(),
        }

    def _request(self, url: str, *, params: Optional[Dict[str, str]] = None) -> dict:
        delay = DELAY_BASE
        for attempt in range(6):
            sleep(delay)
            self._ensure_auth()
            response = requests.get(
                url,
                params=params,
                headers={"Authorization": f"Bearer {self._bearer}"},
            )

            if response.status_code in (429, 502, 503, 504):
                delay *= 2
                logger.log(
                    ("INFO" if attempt < 3 else "WARNING"),
                    'Got {} for URL "{}", next delay: {}s',
                    response.status_code,
                    url,
                    delay,
                )
                continue

            elif response.status_code == 401:
                self._authenticate()
                continue

            if not response.ok:
                logger.error("Error in experimental API:")
                logger.info(response.status_code)
                logger.info(response.headers)
                logger.info(response.text)
                response.raise_for_status()

            return response.json()

        raise Exception("All retries failed!")

    def podcast_followers(self, podcast_id: str, start: dt.date, end: dt.date) -> dict:
        """Loads historic follower data for podcast.

        Args:
            podcast_id (str): ID of the podcast to request data for.
            start (dt.date): Earliest date to request data for.
            end (dt.date): Most recent date to request data for.

        Returns:
            dict: Response data from API.
        """
        url = self._build_url(
            "shows",
            podcast_id,
            "followers",
        )
        return self._request(url, params=self._date_params(start, end))

    def podcast_aggregate(
        self,
        podcast_id: str,
        start: dt.date,
        end: Optional[dt.date] = None,
    ) -> dict:
        """Loads podcast demographics data.

        Args:
            podcast_id (str): ID of the podcast to request data for.
            start (dt.date): Earliest date to request data for.
            end (Optional[dt.date], optional): Most recent date to request data for.
              Defaults to None. Will be set to ``start`` if None.

        Returns:
            dict: [description]
        """
        if end is None:
            end = start

        url = self._build_url(
            "shows",
            podcast_id,
            "aggregate",
        )
        return self._request(url, params=self._date_params(start, end))

    def episode_performance(self, episode_id: str) -> dict:
        """Loads episode performance data.

        Args:
            episode_id (str): ID of the episode to request data for.

        Returns:
            dict: Response data from API.
        """

        url = self._build_url("episodes", episode_id, "performance")
        return self._request(url)

    def episode_aggregate(
        self,
        episode_id: str,
        start: dt.date,
        end: Optional[dt.date] = None,
    ) -> dict:
        """Loads episode demographics data.

        Args:
            episode_id (str): ID of the episode to request data for.
            start (dt.date): Earliest date to request data for.
            end (Optional[dt.date], optional): Most recent date to request data for.
              Defaults to None. Will be set to ``start`` if None.

        Returns:
            dict: [description]
        """
        if end is None:
            end = start

        url = self._build_url(
            "episodes",
            episode_id,
            "aggregate",
        )
        return self._request(url, params=self._date_params(start, end))
예제 #12
0
class GitLabProvider(Provider):
    def __init__(self, args, username, token, serverurl, project, mrnumber):
        super().__init__(args, username, token, serverurl)
        self.__project = int(project)
        self.__mrnum = int(mrnumber)

        self.__session = requests.Session()
        self.__session.verify = False

        self.__mr = self.__get_mergerequest(self.__get_project())

        # get diffs
        self.__mr_changes = self.__mr.changes(all=True)
        for change in self.__mr_changes['changes']:
            self._changes.AddChange(change['new_path'], change['diff'],
                                    change['new_file'])

        self.__isvalid = self.__mr.state not in ['merged', 'closed']
        self.__isvalid &= not self.__is_draft() or self.AllowDrafts

    @property
    def Valid(self):
        return self.__isvalid

    def __is_draft(self):
        return self.__mr.work_in_progress

    @retry(wait=wait_exponential(multiplier=1, min=10, max=120),
           stop=stop_after_attempt(5))
    def __get_server_connection(self):
        try:
            return gitlab.Gitlab(self.ServerURL,
                                 private_token=self.Token,
                                 session=self.__session)
        except Exception as e:
            logging.error('GitLab connections failed')
            raise SCABotServerCommError(e)

    @retry(wait=wait_exponential(multiplier=1, min=10, max=120),
           stop=stop_after_attempt(5))
    def __get_project(self):
        try:
            return self.__get_server_connection().projects.get(self.__project)
        except Exception:
            logging.error('Project not found')
            raise SCABotProjectNotFoundError(self.__project)

    @retry(wait=wait_exponential(multiplier=1, min=10, max=120),
           stop=stop_after_attempt(5))
    def __get_mergerequest(self, proj):
        try:
            return proj.mergerequests.get(self.__mrnum)
        except Exception:
            logging.error('MR not found')
            raise SCABotRequestNotFoundError(self.__mrnum)

    @retry(wait=wait_exponential(multiplier=1, min=10, max=120),
           stop=stop_after_attempt(5))
    def SetNote(self, value: Note):
        _obj = {
            'body': value.body,
            'position': {
                'base_sha': self.__mr_changes['diff_refs']['base_sha'],
                'start_sha': self.__mr_changes['diff_refs']['start_sha'],
                'head_sha': self.__mr_changes['diff_refs']['head_sha'],
                'position_type': 'text',
                'old_line': value.lines[1] if len(value.lines) > 1 else None,
                'new_line': value.lines[0],
                'new_path': value.path,
            },
            'author': {
                'username': self.Username,
            },
        }
        try:
            self.__mr.discussions.create(_obj)
        except Exception:
            try:
                del _obj['position']['old_line']
                self.__mr.discussions.create(_obj)
            except Exception:
                logging.error('Set note {note} failed'.format(note=value))

    def GetNote(self, input):
        return Note(
            input.get('author', {}).get('username', 'Unkwown user'),
            input.get('body', ''),
            input.get('position', {}).get('new_path', ''),
            input.get('position', {}).get('new_line', -1),
            input.get('position', {}).get('old_line', None),
            input.get('resolved', False),
            input.get('resolvable', True),
            input,
        )

    @retry(wait=wait_exponential(multiplier=1, min=10, max=120),
           stop=stop_after_attempt(5))
    def GetNotes(self):
        mr = self.__mr
        res = []
        for discussion in mr.discussions.list(all=True):
            for note in mr.discussions.get(discussion.id).attributes['notes']:
                res.append(self.GetNote(note))
        return res

    @retry(wait=wait_exponential(multiplier=1, min=10, max=120),
           stop=stop_after_attempt(5))
    def ResolveNote(self, value: Note):
        note = self.__mr.discussions.get(value.reference.get('id'))
        note.resolve = True
        note.save()

    @retry(wait=wait_exponential(multiplier=1, min=10, max=120),
           stop=stop_after_attempt(5))
    def GetCurrentStatus(self) -> dict:
        return self.__mr
예제 #13
0
# -*- coding: utf-8 -*-
#
# Copyright © 2016 Red Hat, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import tenacity
from tenacity import stop
from tenacity import wait

_default_wait = wait.wait_exponential(max=1)


def retry(stop_max_delay=None, **kwargs):
    k = {"wait": _default_wait, "retry": lambda x: False}
    if stop_max_delay not in (True, False, None):
        k['stop'] = stop.stop_after_delay(stop_max_delay)
    return tenacity.retry(**k)


TryAgain = tenacity.TryAgain
예제 #14
0
파일: _retry.py 프로젝트: openstack/tooz
#
# Copyright © 2016 Red Hat, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import tenacity
from tenacity import stop
from tenacity import wait


_default_wait = wait.wait_exponential(max=1)


def retry(stop_max_delay=None, **kwargs):
    k = {"wait": _default_wait, "retry": lambda x: False}
    if stop_max_delay not in (True, False, None):
        k['stop'] = stop.stop_after_delay(stop_max_delay)
    return tenacity.retry(**k)


TryAgain = tenacity.TryAgain
예제 #15
0
class GitHubProvider(Provider):
    def __init__(self, args, username, token, serverurl, project, mrnumber):
        super().__init__(args, username, token, serverurl)
        self.__project = project
        self.__mrnum = mrnumber

        self.__repo = self.__get_connection()
        self.__pr = self.__get_pr()

        self._changes.AddChangeFromCollection(self.__pr.diff())

        self.__isvalid = self.__pr.state == 'open'
        self.__isvalid &= not self.__is_draft() or self.AllowDrafts

    @property
    def Valid(self):
        return self.__isvalid

    def __is_draft(self):
        return self.__pr.draft

    def __get_github_repo(self):
        return self.__project

    @retry(wait=wait_exponential(multiplier=1, min=10, max=120),
           stop=stop_after_attempt(5))
    def __get_connection(self):
        login = github3.login(self.Username, self.Token)
        if not login:
            raise SCABotServerCommError('Login failed. Check your credentials')
        res = login.repository(self.Username, self.__get_github_repo())
        if not res:
            raise SCABotProjectNotFoundError(self.__get_github_repo())
        return res

    @retry(wait=wait_exponential(multiplier=1, min=10, max=120),
           stop=stop_after_attempt(5))
    def __get_pr(self):
        res = self.__repo.pull_request(self.__mrnum)
        if not res:
            raise SCABotRequestNotFoundError(self.__mrnum)
        return res

    @retry(wait=wait_exponential(multiplier=1, min=10, max=120),
           stop=stop_after_attempt(5))
    def SetNote(self, value: Note):
        try:
            _ref = value.reference
            if not _ref:
                _ref = self.__get_pr().head.sha
            self.__pr.create_review_comment(value.body, _ref, value.path,
                                            value.lines[0])
        except Exception as e:
            raise SCABotServerCommError(e)

    def GetNote(self, input):
        return Note(
            input.user.login,
            input.body_text,
            input.path,
            input.position,
            input.original_position,
            reference=input,
        )

    @retry(wait=wait_exponential(multiplier=1, min=10, max=120),
           stop=stop_after_attempt(5))
    def GetNotes(self):
        return [self.GetNote(x) for x in self.__pr.review_comments()]

    @retry(wait=wait_exponential(multiplier=1, min=10, max=120),
           stop=stop_after_attempt(5))
    def ResolveNote(self, value: Note):
        for c in self.__pr.review_comments():
            if c.id == value.reference.id:
                # Unfortunately github/v3-API doesn't allow
                # programatic resolving of comments
                # so we are going to delete them instead
                c.delete()
                break

    @retry(wait=wait_exponential(multiplier=1, min=10, max=120),
           stop=stop_after_attempt(5))
    def GetCurrentStatus(self) -> dict:
        return self.__pr.__dict__id