def test_session_with_profile_name(self, monkeypatch): client = MagicMock() session = MagicMock(return_value=client) boto3 = MagicMock(session=MagicMock(Session=session)) monkeypatch.setattr("prefect.utilities.aws.boto3", boto3) monkeypatch.setattr( "prefect.utilities.aws.boto3.session.Session.client", client) get_boto_client(resource="not a real resource", use_session=True, profile_name="TestProfile") session_kwargs = session.call_args[1] assert session_kwargs == { "botocore_session": None, "profile_name": "TestProfile", "region_name": None, } args = client.method_calls[0][1] assert args == ("not a real resource", ) kwargs = client.method_calls[0][2] assert kwargs == { "aws_access_key_id": None, "aws_secret_access_key": None, "aws_session_token": None, }
def test_prefers_passed_credentials_over_secrets(self, monkeypatch): client = MagicMock() boto3 = MagicMock(client=client) monkeypatch.setattr("prefect.utilities.aws.boto3", boto3) desired_credentials = { "ACCESS_KEY": "pick", "SECRET_ACCESS_KEY": "these", "SESSION_TOKEN": "please", } with set_temporary_config({"cloud.use_local_secrets": True}): with prefect.context( secrets=dict( AWS_CREDENTIALS={ "ACCESS_KEY": "dont", "SECRET_ACCESS_KEY": "pick", "SESSION_TOKEN": "these", } ) ): get_boto_client( resource="not a real resource", credentials=desired_credentials ) kwargs = client.call_args[1] assert kwargs == { "aws_access_key_id": "pick", "aws_secret_access_key": "these", "aws_session_token": "please", }
def test_creds_default_to_environment(self, mock_boto3): get_boto_client(resource="myresource") kwargs = mock_boto3.client.call_args[1] assert kwargs == { "aws_access_key_id": None, "aws_secret_access_key": None, "aws_session_token": None, "region_name": None, }
def test_client_cache_not_used_extra_kwargs(self, mock_boto3): """If extra kwargs are passed to boto3, we skip the cache since we don't know how to interpret them""" mock_boto3.client.side_effect = lambda *args, **kws: MagicMock() c1 = get_boto_client("myresource", extra_kwarg="stuff") c2 = get_boto_client("myresource", extra_kwarg="stuff") assert len(CACHE) == 0 assert c1 is not c2
def test_creds_default_to_environment(self, monkeypatch): client = MagicMock() boto3 = MagicMock(client=client) monkeypatch.setattr("prefect.utilities.aws.boto3", boto3) get_boto_client(resource="not a real resource") kwargs = client.call_args[1] assert kwargs == { "aws_access_key_id": None, "aws_secret_access_key": None, "aws_session_token": None, }
def test_credentials_provided_in_kwargs(self, mock_boto3): get_boto_client( resource="myresource", aws_access_key_id="id", aws_secret_access_key="secret", aws_session_token="session", ) kwargs = mock_boto3.client.call_args[1] assert kwargs == { "aws_access_key_id": "id", "aws_secret_access_key": "secret", "aws_session_token": "session", "region_name": None, }
def test_uses_context_secrets(self, mock_boto3): with set_temporary_config({"cloud.use_local_secrets": True}): with prefect.context(secrets=dict( AWS_CREDENTIALS={ "ACCESS_KEY": "42", "SECRET_ACCESS_KEY": "99", "SESSION_TOKEN": "1", })): get_boto_client(resource="myresource") kwargs = mock_boto3.client.call_args[1] assert kwargs == { "aws_access_key_id": "42", "aws_secret_access_key": "99", "aws_session_token": "1", "region_name": None, }
def run( self, key: str, credentials: str = None, bucket: str = None, ): """ Task run method. Args: - key (str): the name of the Key within this bucket to retrieve - credentials (dict, optional): your AWS credentials passed from an upstream Secret task; this Secret must be a JSON string with two keys: `ACCESS_KEY` and `SECRET_ACCESS_KEY` which will be passed directly to `boto3`. If not provided here or in context, `boto3` will fall back on standard AWS rules for authentication. - bucket (str, optional): the name of the S3 Bucket to download from Returns: - str: the contents of this Key / Bucket, as a string """ if bucket is None: raise ValueError("A bucket name must be provided.") s3_client = get_boto_client("s3", credentials=credentials) stream = io.BytesIO() ## download s3_client.download_fileobj(Bucket=bucket, Key=key, Fileobj=stream) ## prepare data and return stream.seek(0) output = stream.read() return output.decode()
def run(self, credentials: dict = None): """ Task run method. Activates AWS Step function. Args: - credentials (dict, optional): your AWS credentials passed from an upstream Secret task; this Secret must be a JSON string with two keys: `ACCESS_KEY` and `SECRET_ACCESS_KEY` which will be passed directly to `boto3`. If not provided here or in context, `boto3` will fall back on standard AWS rules for authentication. Returns: - dict: response from AWS StartExecution endpoint """ step_client = get_boto_client("stepfunctions", credentials=credentials, **self.boto_kwargs) response = step_client.start_execution( stateMachineArn=self.state_machine_arn, name=self.execution_name, input=self.execution_input, ) return response
def read_bytes_from_path(path: str) -> bytes: """Read bytes from a given path. Paths may be local files, or remote files (given a supported file scheme). Args: - path (str): The file path Returns: - bytes: The file contents """ parsed = parse_path(path) if not parsed.scheme or parsed.scheme in ("file", "agent"): with open(parsed.path, "rb") as f: return f.read() elif parsed.scheme == "gcs": from prefect.utilities.gcp import get_storage_client client = get_storage_client() bucket = client.bucket(parsed.netloc) blob = bucket.get_blob(parsed.path.lstrip("/")) if blob is None: raise ValueError(f"Job template doesn't exist at {path}") return blob.download_as_bytes() elif parsed.scheme == "s3": from prefect.utilities.aws import get_boto_client client = get_boto_client(resource="s3") stream = io.BytesIO() client.download_fileobj(Bucket=parsed.netloc, Key=parsed.path, Fileobj=stream) return stream.getvalue() else: raise ValueError(f"Unsupported file scheme {path}")
def run(self, credentials: dict = None): """ Task run method. Deletes Lambda function. Args: - credentials (dict, optional): your AWS credentials passed from an upstream Secret task; this Secret must be a JSON string with two keys: `ACCESS_KEY` and `SECRET_ACCESS_KEY` which will be passed directly to `boto3`. If not provided here or in context, `boto3` will fall back on standard AWS rules for authentication. Returns: - dict: response from AWS DeleteFunction endpoint """ lambda_client = get_boto_client("lambda", credentials=credentials, **self.boto_kwargs) # delete function, depending on if qualifier provided if len(self.qualifier) > 0: response = lambda_client.delete_function( FunctionName=self.function_name, Qualifier=self.qualifier) return response response = lambda_client.delete_function( FunctionName=self.function_name) return response
def test_credentials_provided_in_kwargs(self, monkeypatch): client = MagicMock() boto3 = MagicMock(client=client) monkeypatch.setattr("prefect.utilities.aws.boto3", boto3) get_boto_client( resource="not a real resource", aws_access_key_id="id", aws_secret_access_key="secret", aws_session_token="session", ) kwargs = client.call_args[1] assert kwargs == { "aws_access_key_id": "id", "aws_secret_access_key": "secret", "aws_session_token": "session", }
def run( self, parameter_name: str = None, credentials: str = None, boto_kwargs: dict = None, ) -> str: """ Task run method. Args: - parameter_name (str): The name of the parameter to retrieve from SSM. - credentials (dict, optional): Your AWS credentials passed from an upstream Secret task; this Secret must be a JSON string with two keys: `ACCESS_KEY` and `SECRET_ACCESS_KEY` which will be passed directly to `boto3`. If not provided here or in context, `boto3` will fall back on standard AWS rules for authentication. - boto_kwargs (dict, optional): Additional keyword arguments to forward to the boto client. Returns: - str: The parameter value, as a string. """ if parameter_name is None: raise ValueError("A parameter name must be provided.") ssm_client = get_boto_client("ssm", credentials=credentials, **boto_kwargs) parameter_response = ssm_client.get_parameter(Name=parameter_name) parameter_value = str(parameter_response["Parameter"]["Value"]) return parameter_value
def run(self, secret: str = None, credentials: str = None) -> dict: """ Task run method. Args: - secret (str): the name of the secret to retrieve - credentials (dict, optional): your AWS credentials passed from an upstream Secret task; this Secret must be a JSON string with two keys: `ACCESS_KEY` and `SECRET_ACCESS_KEY` which will be passed directly to `boto3`. If not provided here or in context, `boto3` will fall back on standard AWS rules for authentication. Returns: - dict: the contents of this secret, as a dictionary """ if secret is None: raise ValueError("A secret name must be provided.") secrets_client = get_boto_client("secretsmanager", credentials=credentials, **self.boto_kwargs) secret_string = secrets_client.get_secret_value( SecretId=secret)["SecretString"] secret_dict = json.loads(secret_string) return secret_dict
def _boto3_client(self): # type: ignore from prefect.utilities.aws import get_boto_client kwargs = self.client_options or {} return get_boto_client( resource="s3", credentials=None, use_session=False, **kwargs )
def run(self, credentials: dict = None): """ Task fun method. Lists all Lambda functions. Args: - credentials (dict, optional): your AWS credentials passed from an upstream Secret task; this Secret must be a JSON string with two keys: `ACCESS_KEY` and `SECRET_ACCESS_KEY` which will be passed directly to `boto3`. If not provided here or in context, `boto3` will fall back on standard AWS rules for authentication. Returns: - dict : a list of Lambda functions from AWS ListFunctions endpoint """ lambda_client = get_boto_client("lambda", credentials=credentials) ## list functions, optionally passing in marker if not None if self.marker: response = lambda_client.list_functions( MasterRegion=self.master_region, FunctionVersion=self.function_version, Marker=self.marker, MaxItems=self.max_items, ) return response response = lambda_client.list_functions( MasterRegion=self.master_region, FunctionVersion=self.function_version, MaxItems=self.max_items, ) return response
def run( self, function_name: str = None, payload: str = None, credentials: dict = None, ): """ Task run method. Invokes Lambda function. Args: - function_name (str): the name of the Lambda funciton to invoke - payload (bytes or seekable file-like object): the JSON provided to Lambda function as input - credentials (dict, optional): your AWS credentials passed from an upstream Secret task; this Secret must be a JSON string with two keys: `ACCESS_KEY` and `SECRET_ACCESS_KEY` which will be passed directly to `boto3`. If not provided here or in context, `boto3` will fall back on standard AWS rules for authentication. Returns: - dict : response from AWS Invoke endpoint """ lambda_client = get_boto_client("lambda", credentials=credentials) ## invoke lambda function response = lambda_client.invoke( FunctionName=function_name, InvocationType=self.invocation_type, LogType=self.log_type, ClientContext=self.client_context, Payload=payload, Qualifier=self.qualifier, ) return response
def test_credentials_does_not_duplicate_kwargs(self, monkeypatch): client = MagicMock() boto3 = MagicMock(client=client) monkeypatch.setattr("prefect.utilities.aws.boto3", boto3) get_boto_client( resource="not a real resource", credentials={"ACCESS_KEY": "true_key", "SECRET_ACCESS_KEY": "true_secret"}, aws_access_key_id="id", aws_secret_access_key="secret", aws_session_token="session", ) kwargs = client.call_args[1] assert kwargs == { "aws_access_key_id": "true_key", "aws_secret_access_key": "true_secret", "aws_session_token": "session", }
def test_credentials_does_not_duplicate_kwargs(self, mock_boto3): get_boto_client( resource="myresource", credentials={ "ACCESS_KEY": "true_key", "SECRET_ACCESS_KEY": "true_secret" }, aws_access_key_id="id", aws_secret_access_key="secret", aws_session_token="session", ) kwargs = mock_boto3.client.call_args[1] assert kwargs == { "aws_access_key_id": "true_key", "aws_secret_access_key": "true_secret", "aws_session_token": "session", "region_name": None, }
def run( self, job_name: str = None, job_definition: str = None, job_queue: str = None, batch_kwargs: dict = None, credentials: str = None, ): """ Submit a job to the AWS Batch job service. Args: - job_name (str, optional): The AWS batch job name. - job_definition (str, optional): The AWS batch job definition. - job_queue (str, optional): Name of the AWS batch job queue. - batch_kwargs (dict, optional): Additional keyword arguments to pass to the boto3 `submit_job` function. See the [submit_job](https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/batch.html#Batch.Client.submit_job) # noqa documentation for more details. - credentials (dict, optional): your AWS credentials passed from an upstream Secret task; this Secret must be a JSON string with two keys: `ACCESS_KEY` and `SECRET_ACCESS_KEY` which will be passed directly to `boto3`. If not provided here or in context, `boto3` will fall back on standard AWS rules for authentication. """ if not job_name: raise ValueError("A job name must be provided.") if not job_definition: raise ValueError("A job definition must be provided.") if not job_queue: raise ValueError("A job queue must be provided.") if not batch_kwargs: batch_kwargs = {} batch_client = get_boto_client("batch", credentials=credentials, **self.boto_kwargs) try: response = batch_client.submit_job( jobName=job_name, jobQueue=job_queue, jobDefinition=job_definition, **batch_kwargs, ) except Exception as e: self.logger.error("Failed to submit job", exc_info=True) raise FAIL( f"Failed to submit job '{job_name}' to AWS Batch.") from e if not response.get("jobId"): raise FAIL( f"AWS Batch submit response contains no job ID: {response}") return response["jobId"]
def test_uses_context_secrets(self, monkeypatch): client = MagicMock() boto3 = MagicMock(client=client) monkeypatch.setattr("prefect.utilities.aws.boto3", boto3) with set_temporary_config({"cloud.use_local_secrets": True}): with prefect.context(secrets=dict( AWS_CREDENTIALS={ "ACCESS_KEY": "42", "SECRET_ACCESS_KEY": "99", "SESSION_TOKEN": "1", })): get_boto_client(resource="not a real resource") kwargs = client.call_args[1] assert kwargs == { "aws_access_key_id": "42", "aws_secret_access_key": "99", "aws_session_token": "1", }
def run( self, data: str, key: str = None, credentials: dict = None, bucket: str = None, compression: str = None, ): """ Task run method. Args: - data (str): the data payload to upload - key (str, optional): the Key to upload the data under; if not provided, a random `uuid` will be created - credentials (dict, optional): your AWS credentials passed from an upstream Secret task; this Secret must be a JSON string with two keys: `ACCESS_KEY` and `SECRET_ACCESS_KEY` which will be passed directly to `boto3`. If not provided here or in context, `boto3` will fall back on standard AWS rules for authentication. - bucket (str, optional): the name of the S3 Bucket to upload to - compression (str, optional): specifies a file format for compression, compressing data before upload. Currently supports `'gzip'`. Returns: - str: the name of the Key the data payload was uploaded to """ if bucket is None: raise ValueError("A bucket name must be provided.") s3_client = get_boto_client("s3", credentials=credentials, use_session=True, **self.boto_kwargs) # compress data if compression is specified if compression: if compression == "gzip": data = gzip.compress(data) else: raise ValueError( f"Unrecognized compression method '{compression}'.") # prepare data try: stream = io.BytesIO(data) except TypeError: stream = io.BytesIO(data.encode()) # create key if not provided if key is None: key = str(uuid.uuid4()) # upload s3_client.upload_fileobj(stream, Bucket=bucket, Key=key) return key
def test_session_used_if_profile_name_provided(self, mock_boto3): get_boto_client(resource="myresource", profile_name="TestProfile") session_kwargs = mock_boto3.session.Session.call_args[1] assert session_kwargs == { "botocore_session": None, "profile_name": "TestProfile", "region_name": None, } client = mock_boto3.session.Session.return_value.client args = client.call_args[0] assert args == ("myresource", ) kwargs = client.call_args[1] assert kwargs == { "aws_access_key_id": None, "aws_secret_access_key": None, "aws_session_token": None, "region_name": None, }
def initialize_client(self) -> None: """ Initializes an S3 Client. """ from prefect.utilities.aws import get_boto_client # use a new boto session when initializing in case we are in a new thread # see https://boto3.amazonaws.com/v1/documentation/api/latest/guide/resources.html?#multithreading-multiprocessing s3_client = get_boto_client( "s3", credentials=None, use_session=True, **self.boto3_kwargs ) self.client = s3_client
def test_client_cache(self, mock_boto3): class Client: pass mock_boto3.client.side_effect = lambda *a, **kw: Client() c1 = get_boto_client("myresource") assert len(CACHE) == 1 # Cached if same parameters used c2 = get_boto_client("myresource") assert c2 is c1 # Different parameters lead to unique client c3 = get_boto_client("myresource", region_name="a new region") assert c3 is not c1 assert len(CACHE) == 2 del c1, c2, c3 assert len(CACHE) == 0
def test_prefers_passed_credentials_over_secrets(self, mock_boto3): credentials = { "ACCESS_KEY": "pick", "SECRET_ACCESS_KEY": "these", "SESSION_TOKEN": "please", } with set_temporary_config({"cloud.use_local_secrets": True}): with prefect.context(secrets=dict( AWS_CREDENTIALS={ "ACCESS_KEY": "dont", "SECRET_ACCESS_KEY": "pick", "SESSION_TOKEN": "these", })): get_boto_client(resource="myresource", credentials=credentials) kwargs = mock_boto3.client.call_args[1] assert kwargs == { "aws_access_key_id": "pick", "aws_secret_access_key": "these", "aws_session_token": "please", "region_name": None, }
def run( self, key: str, credentials: str = None, bucket: str = None, compression: str = None, as_bytes: bool = False, ): """ Task run method. Args: - key (str): the name of the Key within this bucket to retrieve - credentials (dict, optional): your AWS credentials passed from an upstream Secret task; this Secret must be a JSON string with two keys: `ACCESS_KEY` and `SECRET_ACCESS_KEY` which will be passed directly to `boto3`. If not provided here or in context, `boto3` will fall back on standard AWS rules for authentication. - bucket (str, optional): the name of the S3 Bucket to download from - compression (str, optional): specifies a file format for decompression, decompressing data upon download. Currently supports `'gzip'`. - as_bytes (bool, optional): If true, result will be returned as `bytes` instead of `str`. Defaults to False. Returns: - str: the contents of this Key / Bucket, as a string or bytes """ if bucket is None: raise ValueError("A bucket name must be provided.") s3_client = get_boto_client("s3", credentials=credentials, **self.boto_kwargs) stream = io.BytesIO() # download s3_client.download_fileobj(Bucket=bucket, Key=key, Fileobj=stream) # prepare data and return stream.seek(0) output = stream.read() # decompress data if decompression is specified if compression: if compression == "gzip": output = gzip.decompress(output) else: raise ValueError( f"Unrecognized compression method '{compression}'.") return output if as_bytes else output.decode()
def delete_object_from_s3( key: str = None, bucket: str = None, credentials: dict = None, ): """ Delete an object from S3. key (str): Name of the object within the S3 bucket (/foo/bar/baz.json) bucket (str): Name of the S3 bucket to delete from. credentials (dict): AWS credentials, if None boto will fall back the usual methods of resolution. """ s3_client = get_boto_client("s3", credentials=credentials) s3_client.delete_object(Bucket=bucket, Key=key)
def run( self, prefix: str, delimiter: str = "", page_size: int = None, max_items: int = None, credentials: str = None, bucket: str = None, ): """ Task run method. Args: - prefix (str): the name of the prefix within this bucket to retrieve objects from - delimiter (str): indicates the key hierarchy - page_size (int): controls the number of items returned per page of each result - max_items (int): limits the maximum number of total items returned during pagination - credentials (dict, optional): your AWS credentials passed from an upstream Secret task; this Secret must be a JSON string with two keys: `ACCESS_KEY` and `SECRET_ACCESS_KEY` which will be passed directly to `boto3`. If not provided here or in context, `boto3` will fall back on standard AWS rules for authentication. - bucket (str, optional): the name of the S3 Bucket to list the files of Returns: - list[str]: A list of keys that match the given prefix. """ if bucket is None: raise ValueError("A bucket name must be provided.") s3_client = get_boto_client("s3", credentials=credentials) config = {"PageSize": page_size, "MaxItems": max_items} paginator = s3_client.get_paginator("list_objects_v2") results = paginator.paginate( Bucket=bucket, Prefix=prefix, Delimiter=delimiter, PaginationConfig=config ) files = [] for page in results: files.extend(obj["Key"] for obj in page.get("Contents", [])) return files
def run( self, credentials: dict = None, state_machine_arn: str = None, execution_name: str = None, execution_input: str = None, boto_kwargs: dict = None, ) -> Dict: """ Task run method. Activates AWS Step function. Args: - credentials (dict, optional): your AWS credentials passed from an upstream Secret task; this Secret must be a JSON string with two keys: `ACCESS_KEY` and `SECRET_ACCESS_KEY` which will be passed directly to `boto3`. If not provided here or in context, `boto3` will fall back on standard AWS rules for authentication. - state_machine_arn (str): the Amazon Resource Name (ARN) of the state machine to execute - execution_name (str): the name of the execution, this name must be unique for your AWS account, region, and state machine for 90 days - execution_input (str, optional): string that contains the JSON input data for the execution, defaults to `"{}"` - boto_kwargs (dict, optional): additional keyword arguments to forward to the boto client. Returns: - dict: response from AWS StartExecution endpoint """ if not state_machine_arn: raise ValueError("A state machine ARN must be provided") step_client = get_boto_client("stepfunctions", credentials=credentials, **boto_kwargs) response = step_client.start_execution( stateMachineArn=state_machine_arn, name=execution_name, input=execution_input, ) return response