def read(self, bucket: str, path: str) -> bytes: """Function to read the object from a bucket into memory. Args: bucket: Bucket name. path: Path to locate the object in a bucket. Returns: Bytes encoded object. Raises: ConnectionError: Raised when connection error occured. exceptions.ObjectNotFound: Raised when the object not found. exceptions.BucketNotFound: Raised when the bucket not found. """ try: obj = self.client.get_object(Bucket=bucket, Key=path) return obj["Body"].read() except ParamValidationError as ex: raise exceptions.BucketNotFound(ex) except NoCredentialsError: # pragma: no cover raise ConnectionError("Cannot connect, no credentials provided") except Exception as ex: if type(ex).__name__ == "NoSuchKey": raise exceptions.ObjectNotFound( f"Object '{path}' not found in bucket '{bucket}'" ) if type(ex).__name__ in ["NoSuchBucket", "InvalidBucketName"]: raise exceptions.BucketNotFound(f"Bucket '{bucket}' not found: {ex}") raise Exception(ex) # pragma: no cover
def _list_objects( self, bucket: str, prefix: str = "", max_objects: int = None ) -> List[dict]: """Function to list objects in a bucket with their size. Args: bucket: Bucket name. prefix: Objects prefix to restrict the list of results. max_objects: Max number of keys to output. Returns: List of objects attributes in the bucket. Raises: exceptions.BucketNotFound: Raised when the bucket not found. """ output = [] max_objects = max_objects if max_objects else 1000 paginator = self.client.get_paginator("list_objects_v2") try: for page in paginator.paginate(Bucket=bucket, Prefix=prefix): if "Contents" in page: output.extend(page["Contents"]) except ParamValidationError as ex: raise exceptions.BucketNotFound(ex) except ClientError as ex: if type(ex).__name__ == "NoSuchBucket": raise exceptions.BucketNotFound(f"Bucket '{bucket}' not found.") return output
def copy( self, bucket_source: str, bucket_destination: str, path_source: str, path_destination: str = None, configuration: dict = None, ) -> None: """Function to copy the object from bucket to bucket. Args: bucket_source: Bucket name source. bucket_destination: Bucket name destination. path_source: Initial path to locate the object in bucket. path_destination: Final path to locate the object in bucket. configuration: Extra configurations. #S3.Client.copy_object See: https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/s3.html?highlight=copy_object Raises: ConnectionError: Raised when a connection error to s3 occurred. exceptions.ObjectNotFound: Raised when the object not found. exceptions.BucketNotFound: Raised when the bucket not found. """ configuration = configuration if configuration else {} if (bucket_destination, path_destination) == (bucket_source, path_source): configuration["MetadataDirective"] = "REPLACE" try: obj = self.client.get_object(Bucket=bucket_source, Key=path_source) except NoCredentialsError: # pragma: no cover raise ConnectionError("Cannot connect, no credentials provided") except ClientError as ex: if type(ex).__name__ == "NoSuchKey": raise exceptions.ObjectNotFound( f"Object '{path_source}' not found in bucket '{bucket_source}'" ) if type(ex).__name__ == "NoSuchBucket": raise exceptions.BucketNotFound(f"Bucket '{bucket_source}' not found.") raise Exception(ex) # pragma: no cover configuration["ContentType"] = obj["ContentType"] try: self.client.copy_object( Bucket=bucket_destination, CopySource={"Bucket": bucket_source, "Key": path_source,}, Key=path_destination if path_destination else path_source, **configuration, ) except ClientError as ex: if type(ex).__name__ == "NoSuchBucket": raise exceptions.BucketNotFound( f"Bucket '{bucket_destination}' not found." ) raise Exception(ex) # pragma: no cover
def upload( self, bucket: str, path_source: str, path_destination: str = None ) -> None: """Function to upload the object from disk into a bucket. Args: bucket: Bucket name. path_source: Path to locate the object on fs. path_destination: Path to store the object to. Raises: FileNotFoundError: Raised when file path_source not found. exceptions.BucketNotFound: Raised when the bucket not found. """ if not os.path.exists(path_source): raise FileNotFoundError(f"{path_source} not found") try: self.client.upload_file( Filename=path_source, Bucket=bucket, Key=path_destination if path_destination else path_source, ) except Exception as ex: if type(ex).__name__ == "S3UploadFailedError": raise exceptions.BucketNotFound(f"Bucket '{bucket}' not found.") raise Exception(ex) # pragma: no cover
def write( self, obj: bytes, bucket: str, path: str, configuration: dict = None ) -> None: """Function to write the object from memory into bucket. Args: obj: Object data to store in a bucket. bucket: Bucket name. path: Path to store the object to. configuration: Extra configurations. See: https://boto3.amazonaws.com/v1/documentation/api/1.14.3/reference/services/s3.html?highlight=s3%20client#S3.Client.put_object For example: *.json.gz file should be uploaded with the following configuration: { "ContentEncoding": "gzip", "ContentType": "application/json" } Raises: ConnectionError: Raised when connection error occured. TypeError: Raised when provided attributes have wrong types. exceptions.BucketNotFound: Raised when the bucket not found. """ configuration = configuration if configuration else {} try: self.client.put_object(Body=obj, Bucket=bucket, Key=path, **configuration) except NoCredentialsError: # pragma: no cover raise ConnectionError("Cannot connect, no credentials provided") except Exception as ex: if type(ex).__name__ == "NoSuchBucket": raise exceptions.BucketNotFound(f"Bucket '{bucket}' not found.") if type(ex).__name__ == "ParamValidationError": raise TypeError("Provided function attributes have wrong type.") raise Exception(ex) # pragma: no cover
def list_objects_size(self, bucket: str, prefix: str = "", max_objects: int = None) -> List[Tuple[str, int]]: # pylint: disable=protected-access """Function to list objects in a bucket with their size. Args: bucket: Bucket name. prefix: Objects prefix to restrict the list of results. max_objects: Max number of keys to output. Returns: List of tuples with objects path and size in bytes. Raises: exceptions.BucketNotFound: Raised when the bucket not found. """ bucket_obj = self.client.lookup_bucket(bucket) if not bucket_obj: raise exceptions.BucketNotFound(f"Bucket '{bucket}' not found.") return [(i.name, int(i._properties['size'])) for i in bucket_obj.list_blobs(prefix=prefix, max_results=max_objects)]
def list_objects(self, bucket: str, prefix: str = "", max_objects: int = None) -> List[str]: """Function to list objects in a bucket. Args: bucket: Bucket name. prefix: Objects prefix to restrict the list of results. max_objects: Max number of keys to output. Returns: List of objects path in the bucket. Raises: exceptions.BucketNotFound: Raised when the bucket not found. """ bucket_obj = self.client.lookup_bucket(bucket) if not bucket_obj: raise exceptions.BucketNotFound(f"Bucket '{bucket}' not found.") return [ i.name for i in bucket_obj.list_blobs(prefix=prefix, max_results=max_objects) ]
def download( self, bucket: str, path_source: str, path_destination: str, configuration: dict = None, ) -> None: """Function to download the object from a bucket to disk. Args: bucket: Bucket name. path_source: Path to locate the object in bucket. path_destination: Fs path to store the object to. configuration: Transfer config parameters. See: https://boto3.amazonaws.com/v1/documentation/api/1.14.2/reference/customizations/s3.html#boto3.s3.transfer.TransferConfig Raises: exceptions.ObjectNotFound: Raised when the object not found. exceptions.BucketNotFound: Raised when the bucket not found. exceptions.DestinationPathError: Raised when cannot save object to provided location. exceptions.DestinationPathPermissionsError: Raised when cannot save object to provided location due to lack of permissons. """ if configuration: try: _ = validate(Client.S3_TRANSFER_SCHEMA, configuration) except JsonSchemaException as ex: raise exceptions.ConfigurationError(ex) else: configuration = {k: v["default"] for k, v in Client.S3_TRANSFER_SCHEMA["properties"].items() } try: self.client.download_file( Filename=path_destination, Bucket=bucket, Key=path_source, Config=boto3.s3.transfer.TransferConfig(**configuration), ) except (NotADirectoryError, FileNotFoundError): raise exceptions.DestinationPathError( f"Cannot download file to {path_destination}" ) except PermissionError: raise exceptions.DestinationPathPermissionsError( f"Cannot download file to {path_destination}" ) except ClientError as ex: if type(ex).__name__ == "NoSuchBucket": raise exceptions.BucketNotFound(f"Bucket '{bucket}' not found.") if ex.response["Error"]["Code"] == "404": raise exceptions.ObjectNotFound( f"Object '{path_source}' not found in bucket '{bucket}'" ) raise Exception(ex) # pragma: no cover
def delete_object(self, bucket: str, path: str) -> None: """Function to delete the object from a bucket. Args: bucket: Bucket name. path: Path to locate the object in bucket. Raises: ConnectionError: Raised when a connection error to s3 occurred. exceptions.ObjectNotFound: Raised when the object not found. exceptions.BucketNotFound: Raised when the bucket not found. """ try: self.client.delete_object(Bucket=bucket, Key=path) except Exception as ex: if type(ex).__name__ == "NoSuchBucket": raise exceptions.BucketNotFound(f"Bucket '{bucket}' not found.") raise Exception(ex) # pragma: no cover
def delete_objects(self, bucket: str, paths: List[str]) -> None: """Function to delete the objects from a bucket. Args: bucket: Bucket name. paths: Paths to locate the objects in bucket. Raises: ConnectionError: Raised when a connection error to s3 occurred. cloud_connectors.cloud_storage.exceptions.ObjectNotFound: Raised when the object not found. cloud_connectors.cloud_storage.exceptions.BucketNotFound: Raised when the object not found. """ try: self.client.delete_objects( Bucket=bucket, Delete={"Objects": [{"Key": v} for v in paths], "Quiet": True,}, ) except Exception as ex: if type(ex).__name__ == "NoSuchBucket": raise exceptions.BucketNotFound(f"Bucket '{bucket}' not found.") raise Exception(ex) # pragma: no cover