def test_no_bucket_name_conflict_pos(boto_client: S3Client) -> None: """Test that initialize_s3 will create a bucket if the bucket does not exist in the s3 instance. Parameters ---------- boto_client : S3Client Pytest fixture yielding a s3 client. """ # Check bucket name not in list of buckets bucket_name = "non-conflict-name" buckets = boto_client.list_buckets()["Buckets"] for bucket in buckets: assert bucket["Name"] != bucket_name # Init bucket and check it's now in the bucket list with versioning enabled initialize_s3(bucket_name) buckets = boto_client.list_buckets()["Buckets"] in_buckets = False for bucket in buckets: if bucket["Name"] == bucket_name: in_buckets = True break assert in_buckets assert boto_client.get_bucket_versioning(Bucket=bucket_name)["Status"] == "Enabled"
def upload_analyzers(s3_client: S3Client, analyzers: Sequence[AnalyzerUpload], analyzers_bucket: str) -> None: """ Basically reimplementing upload_local_analyzers.sh Janky, since Jesse will have an analyzer-uploader service pretty soon. """ for (local_path, s3_key) in analyzers: assert s3_key.startswith("analyzers/"), s3_key assert s3_key.endswith("/main.py"), s3_key logging.info(f"S3 uploading analyzer from {local_path}") with open(local_path, "rb") as f: s3_client.put_object(Body=f.read(), Bucket=analyzers_bucket, Key=s3_key)
def test_multiple_model_versions_pos( boto_client: S3Client, boto_bucket: Tuple[Bucket, str] ) -> None: """Test that onnx_file_to_s3 will correctly create multiple model objects of the same model with versioning metadata. Parameters ---------- boto_client : S3Client Client to use for uploading new objects. boto_bucket : Tuple[Bucket, str] boto3 S3 bucket and the bucket name. """ # Upload the same model twice boto_bucket, model_bucket = boto_bucket model_name = "simple_model" for version in range(2): onnx_file_to_s3( "tests/data/simple_onnx_model.onnx", model_bucket, model_name, version ) models = list(boto_bucket.objects.all()) assert len(models) == 1 # Check version metadata added correctly versions = boto_bucket.object_versions.filter(Prefix=model_name) assert len(list(versions)) == 2 for version, version_int in zip(versions, [1, 0]): tags = boto_client.get_object_tagging( Bucket=model_bucket, Key=model_name, VersionId=version.id )["TagSet"] for tag in tags: if tag["Key"] == "model-version": assert tag["Value"] == str(version_int)
def delete_all_object_versions(bucket: Bucket, s3_client: S3Client) -> None: """Deletes every object and every object's versions inside of an s3 bucket. Parameters ---------- bucket : Bucket S3 bucket to delete objects from. s3_client : S3Client S3 client to delete objects with. """ model_objects = bucket.objects.all() for model_object in model_objects: versions = bucket.object_versions.filter(Prefix=model_object.key) for version in versions: s3_client.delete_object( Bucket=TEST_BUCKET_NAME, Key=model_object.key, VersionId=version.id, )
def _list_repo_file_etag(s3_client: S3Client, prefix: str) -> Optional[dict]: repo_file = s3_client.list_objects_v2(Bucket=SCYLLA_REPO_BUCKET, Prefix=prefix) if repo_file["KeyCount"] != 1: LOGGER.debug("No such file `%s' in %s bucket", prefix, SCYLLA_REPO_BUCKET) return None return repo_file["Contents"][0]["ETag"]
def should_create_storage_bucket_public_access_block(self, s3_client: S3Client) -> None: """Test if Geostore Storage S3 Bucket access is blocked for public.""" response = s3_client.get_public_access_block(Bucket=self.storage_bucket_name) public_access_block_configuration = response["PublicAccessBlockConfiguration"] assert public_access_block_configuration["BlockPublicAcls"] is True assert public_access_block_configuration["IgnorePublicAcls"] is True assert public_access_block_configuration["BlockPublicPolicy"] is True assert public_access_block_configuration["RestrictPublicBuckets"] is True
def wait_for_s3_key(bucket_name: str, key: str, s3_client: S3Client) -> None: process_timeout = datetime.now() + timedelta(minutes=3) while CONTENTS_KEY not in s3_client.list_objects(Bucket=bucket_name, Prefix=key): assert ( # pragma: no cover datetime.now() < process_timeout ), f"S3 file '{bucket_name}/{key}' was not created, process timed out." time.sleep(5) # pragma: no cover
def delete_s3_versions(bucket_name: str, version_list: List[ObjectIdentifierTypeDef], s3_client: S3Client) -> None: for index in range(0, len(version_list), DELETE_OBJECTS_MAX_KEYS): response = s3_client.delete_objects( Bucket=bucket_name, Delete=DeleteTypeDef( Objects=version_list[index:index + DELETE_OBJECTS_MAX_KEYS]), ) print(response)
def _download_remote_blob(s3_client: S3Client, bucket: str, key: str) -> Optional["StructuredConfig"]: try: obj = s3_client.get_object(Bucket=bucket, Key=key) return json.loads(obj["Body"].read()) except Exception: logger.debug( "Could not successfully download and parse any pre-existing config" ) return None
def test_bucket_name_conflict_pos(boto_client: S3Client) -> None: """Test that initialize_s3 will create not create a bucket if the bucket does exist in the s3 instance. Parameters ---------- boto_client : S3Client Pytest fixture yielding a s3 client. """ # Create bucket bucket_name = "test-conflict" boto_client.create_bucket(Bucket=bucket_name) # Init bucket and check it's still in the bucket list initialize_s3(bucket_name) buckets = boto_client.list_buckets()["Buckets"] in_buckets = False for bucket in buckets: if bucket["Name"] == bucket_name: in_buckets = True break assert in_buckets
def test_no_conflict_pos(boto_client: S3Client) -> None: """Test that verify_model_version will not raise an error if a model already exists in S3, but does not have a conflicting version. Parameters ---------- boto_client : S3Client Client to use for uploading new objects. """ model_name = "test_model" with NamedTemporaryFile() as tempfile: tempfile.write(b"test data") boto_client.upload_fileobj( tempfile, TEST_BUCKET_NAME, model_name, ) version = 1 boto_client.put_object_tagging( Bucket=TEST_BUCKET_NAME, Key=model_name, Tagging={"TagSet": [{"Key": "model-version", "Value": str(version)}]}, ) verify_model_version(TEST_BUCKET_NAME, model_name, 2, boto_client)
def get_s3_prefix_versions( bucket_name: str, prefix: str, s3_client: S3Client) -> List[ObjectIdentifierTypeDef]: version_list: List[ObjectIdentifierTypeDef] = [] object_versions_paginator = s3_client.get_paginator("list_object_versions") for object_versions_page in object_versions_paginator.paginate( Bucket=bucket_name, Prefix=prefix): for version in object_versions_page.get("Versions", []): version_list.append({ "Key": version["Key"], "VersionId": version["VersionId"] }) assert version_list, version_list return version_list
def test_conflicting_versions_neg(boto_client: S3Client) -> None: """Test that verify_model_version will raise an error if a model and given version already exists in S3. Parameters ---------- boto_client : S3Client Client to use for uploading new objects. """ model_name = "test_model" with NamedTemporaryFile() as tempfile: tempfile.write(b"test data") boto_client.upload_fileobj( tempfile, TEST_BUCKET_NAME, model_name, ) version = 2 boto_client.put_object_tagging( Bucket=TEST_BUCKET_NAME, Key=model_name, Tagging={"TagSet": [{"Key": "model-version", "Value": str(version)}]}, ) with pytest.raises(ValueError) as err: verify_model_version(TEST_BUCKET_NAME, model_name, version, boto_client) assert "already exists" in str(err.value)
def verify_model_version(model_bucket: str, model_name: str, model_version: str, s3_client: S3Client) -> None: """Verify that the given model and version do not conflict with pre-existing S3 objects. Parameters ---------- model_bucket : str Bucket that models are uploaded to. model_name : str Name of the model and bucket object. model_version : str New version for a model. s3_client : S3Client S3 client for connecting to s3. Raises ------ ValueError If the given model and version already exist in S3. """ bucket = boto3.resource( "s3", endpoint_url=os.getenv("S3_URL", "http://localhost:9000"), aws_access_key_id=os.getenv("S3_ACCESS_KEY", "minioadmin"), aws_secret_access_key=os.getenv("S3_SECRET_KEY", "minioadmin"), config=botocore.config.Config(signature_version="s3v4"), region_name=os.getenv("S3_REGION", "us-east-1"), verify=False, ).Bucket(model_bucket) for version in bucket.object_versions.filter(Prefix=model_name): tags = s3_client.get_object_tagging(Bucket=model_bucket, Key=model_name, VersionId=version.id)["TagSet"] for tag in tags: if tag["Key"] == "model-version" and tag["Value"] == str( model_version): logging.error( "Given already existing model version for serializing") raise ValueError( f"Given model version '{model_version}', already exists, please " "try again with a new version")
def get_s3_key_versions(bucket_name: str, key: str, s3_client: S3Client) -> List[ObjectIdentifierTypeDef]: version_list: List[ObjectIdentifierTypeDef] = [] object_versions_paginator = s3_client.get_paginator("list_object_versions") for object_versions_page in object_versions_paginator.paginate( Bucket=bucket_name, Prefix=key): for marker in object_versions_page.get("DeleteMarkers", []): if marker["Key"] == key: version_list.append({ "Key": marker["Key"], "VersionId": marker["VersionId"] }) for version in object_versions_page.get("Versions", []): if version["Key"] == key: version_list.append({ "Key": version["Key"], "VersionId": version["VersionId"] }) assert version_list, version_list return version_list
def should_enable_storage_bucket_versioning(self, s3_client: S3Client) -> None: """Test if Geostore Storage S3 Bucket versioning is enabled.""" response = s3_client.get_bucket_versioning(Bucket=self.storage_bucket_name) assert response["Status"] == "Enabled"
def should_create_storage_bucket_location_constraint(self, s3_client: S3Client) -> None: """Test if Geostore Storage S3 Bucket is created in correct region.""" response = s3_client.get_bucket_location(Bucket=self.storage_bucket_name) assert response["LocationConstraint"] == "ap-southeast-2"
def test_bucket_name_conflict_enable_version_pos(boto_client: S3Client) -> None: """Test that initialize_s3 will create not create a bucket if the bucket does exist in the s3 instance and it will enable versioning. Parameters ---------- boto_client : S3Client Pytest fixture yielding a s3 client. """ # Create bucket and disable versioning on the bucket just made bucket_name = "test-conflict" boto_client.create_bucket(Bucket=bucket_name) boto_client.put_bucket_versioning( Bucket=bucket_name, VersioningConfiguration={"Status": "Suspended"}, ) # Init bucket and check it's in the bucket list and with versioning initialize_s3(bucket_name) buckets = boto_client.list_buckets()["Buckets"] in_buckets = False for bucket in buckets: if bucket["Name"] == bucket_name: in_buckets = True break assert in_buckets assert boto_client.get_bucket_versioning(Bucket=bucket_name)["Status"] == "Enabled" class SimpleMLP(torch.nn.Module): """Simple neural network for testing purposes. Parameters ---------- layer_sizes : list A list of the layer sizes inclusive of the input and output layers classification : bool If True, softmax is applied to the output. Otherwise, L2 normalization is performed. """ def __init__( self, layer_sizes=[256, 128, 8], classification=True, ): super().__init__() self.classification = classification modules = [] for i, s in enumerate(layer_sizes[1:]): modules += [torch.nn.Linear(layer_sizes[i], s)] if i + 1 != len(layer_sizes) - 1: modules += [torch.nn.ReLU()] self.net = torch.nn.Sequential(*modules) def forward(self, input_tensor: torch.Tensor) -> torch.Tensor: """Function for einputecuting the forward pass of a torch nn model. Parameters ---------- input_tensor : torch.Tensor Input to the model. Returns ------- torch.tensor Result of the forward pass of the network. """ input_tensor = input_tensor.reshape( input_tensor.shape[0], input_tensor.shape[-1] ) input_tensor = self.net(input_tensor) if not self.classification: input_tensor = torch.nn.functional.normalize(input_tensor) return input_tensor # serialize_model_to_file def test_serialize_to_temp_file_pos() -> None: """Test that serialize_model_to_file will correctly serialize and deserialize from temporary file. """ # Serialize a simple model to a tempfile model = SimpleMLP() dummy_forward_input = torch.randn(1, 1, 256).to( "cuda" if torch.cuda.is_available() else "cpu" ) with NamedTemporaryFile() as tempfile: serialize_model_to_file( model, tempfile.name, dummy_forward_input, ["output"], {"input": {0: "batch_size"}}, ) tempfile.seek(0) # Deserialize the model from the tempfile onnx_options = onnxruntime.SessionOptions() onnxruntime.InferenceSession( str(tempfile.name), sess_options=onnx_options, ) @pytest.fixture def boto_bucket() -> Iterator[Tuple[Bucket, str]]: """Pytest fixture that yields an S3 bucket for testing and the name of that bucket. Yields ------- Iterator[Tuple[Bucket, str]] The s3 bucket and the name of the bucket. """ s3 = boto3.resource( "s3", endpoint_url=os.getenv("S3_URL", "http://localhost:9000"), aws_access_key_id=os.getenv("S3_ACCESS_KEY", "minioadmin"), aws_secret_access_key=os.getenv("S3_SECRET_KEY", "minioadmin"), config=botocore.config.Config(signature_version="s3v4"), region_name=os.getenv("S3_REGION", "us-east-1"), verify=False, ) yield s3.Bucket(TEST_BUCKET_NAME), TEST_BUCKET_NAME # pytorch_to_onnx_file def delete_all_object_versions(bucket: Bucket, s3_client: S3Client) -> None: """Deletes every object and every object's versions inside of an s3 bucket. Parameters ---------- bucket : Bucket S3 bucket to delete objects from. s3_client : S3Client S3 client to delete objects with. """ model_objects = bucket.objects.all() for model_object in model_objects: versions = bucket.object_versions.filter(Prefix=model_object.key) for version in versions: s3_client.delete_object( Bucket=TEST_BUCKET_NAME, Key=model_object.key, VersionId=version.id, ) @pytest.fixture(autouse=True) def clean_models() -> None: """Pytest fixture for clearing the test models after running a test function.""" s3_client = create_s3_client() s3 = boto3.resource( "s3", aws_access_key_id=environ["AWS_ACCESS_KEY"], aws_secret_access_key=environ["AWS_SECRET_KEY"], endpoint_url=environ["S3ENDPOINT_URL"], ) bucket = s3.Bucket(TEST_BUCKET_NAME) delete_all_object_versions(bucket, s3_client) yield delete_all_object_versions(bucket, s3_client) def deserialize_model_from_file(file_path: Union[str, Path]) -> None: """Deserializes an onnx model from a file. Parameters ---------- file_path : Union[str, Path] Path to the onnx model to deserialize. """ onnx_options = onnxruntime.SessionOptions() onnxruntime.InferenceSession( file_path, sess_options=onnx_options, ) def test_to_file_pos() -> None: """Test that pytorch_to_bucket will correctly serialize a model to file.""" # Serialize and check the model can be deserialized after model = SimpleMLP() dummy_forward_input = torch.randn(1, 1, 256).to( "cuda" if torch.cuda.is_available() else "cpu" ) with NamedTemporaryFile() as tempfile: pytorch_to_onnx_file( model, tempfile.name, 1, 256, dynamic_shape=False, dummy_forward_input=dummy_forward_input, ) deserialize_model_from_file(tempfile.name) def test_no_existing_objects_pos(boto_client: S3Client) -> None: """Test that verify_model_version won't raise any error if there are no objects by the name of the given model. Parameters ---------- boto_client : S3Client Client to use for uploading new objects. """ verify_model_version(TEST_BUCKET_NAME, "test_model", 0, boto_client) def test_no_conflict_pos(boto_client: S3Client) -> None: """Test that verify_model_version will not raise an error if a model already exists in S3, but does not have a conflicting version. Parameters ---------- boto_client : S3Client Client to use for uploading new objects. """ model_name = "test_model" with NamedTemporaryFile() as tempfile: tempfile.write(b"test data") boto_client.upload_fileobj( tempfile, TEST_BUCKET_NAME, model_name, ) version = 1 boto_client.put_object_tagging( Bucket=TEST_BUCKET_NAME, Key=model_name, Tagging={"TagSet": [{"Key": "model-version", "Value": str(version)}]}, ) verify_model_version(TEST_BUCKET_NAME, model_name, 2, boto_client) def test_conflicting_versions_neg(boto_client: S3Client) -> None: """Test that verify_model_version will raise an error if a model and given version already exists in S3. Parameters ---------- boto_client : S3Client Client to use for uploading new objects. """ model_name = "test_model" with NamedTemporaryFile() as tempfile: tempfile.write(b"test data") boto_client.upload_fileobj( tempfile, TEST_BUCKET_NAME, model_name, ) version = 2 boto_client.put_object_tagging( Bucket=TEST_BUCKET_NAME, Key=model_name, Tagging={"TagSet": [{"Key": "model-version", "Value": str(version)}]}, ) with pytest.raises(ValueError) as err: verify_model_version(TEST_BUCKET_NAME, model_name, version, boto_client) assert "already exists" in str(err.value) def test_multiple_model_versions_pos( boto_client: S3Client, boto_bucket: Tuple[Bucket, str] ) -> None: """Test that onnx_file_to_s3 will correctly create multiple model objects of the same model with versioning metadata. Parameters ---------- boto_client : S3Client Client to use for uploading new objects. boto_bucket : Tuple[Bucket, str] boto3 S3 bucket and the bucket name. """ # Upload the same model twice boto_bucket, model_bucket = boto_bucket model_name = "simple_model" for version in range(2): onnx_file_to_s3( "tests/data/simple_onnx_model.onnx", model_bucket, model_name, version ) models = list(boto_bucket.objects.all()) assert len(models) == 1 # Check version metadata added correctly versions = boto_bucket.object_versions.filter(Prefix=model_name) assert len(list(versions)) == 2 for version, version_int in zip(versions, [1, 0]): tags = boto_client.get_object_tagging( Bucket=model_bucket, Key=model_name, VersionId=version.id )["TagSet"] for tag in tags: if tag["Key"] == "model-version": assert tag["Value"] == str(version_int)