Beispiel #1
0
def test_no_bucket_name_conflict_pos(boto_client: S3Client) -> None:
    """Test that initialize_s3 will create a bucket if the bucket does not exist in the
    s3 instance.

    Parameters
    ----------
    boto_client : S3Client
        Pytest fixture yielding a s3 client.
    """
    # Check bucket name not in list of buckets
    bucket_name = "non-conflict-name"
    buckets = boto_client.list_buckets()["Buckets"]
    for bucket in buckets:
        assert bucket["Name"] != bucket_name

    # Init bucket and check it's now in the bucket list with versioning enabled
    initialize_s3(bucket_name)
    buckets = boto_client.list_buckets()["Buckets"]
    in_buckets = False
    for bucket in buckets:
        if bucket["Name"] == bucket_name:
            in_buckets = True
            break
    assert in_buckets
    assert boto_client.get_bucket_versioning(Bucket=bucket_name)["Status"] == "Enabled"
Beispiel #2
0
def upload_analyzers(s3_client: S3Client, analyzers: Sequence[AnalyzerUpload],
                     analyzers_bucket: str) -> None:
    """
    Basically reimplementing upload_local_analyzers.sh
    Janky, since Jesse will have an analyzer-uploader service pretty soon.
    """
    for (local_path, s3_key) in analyzers:
        assert s3_key.startswith("analyzers/"), s3_key
        assert s3_key.endswith("/main.py"), s3_key
        logging.info(f"S3 uploading analyzer from {local_path}")
        with open(local_path, "rb") as f:
            s3_client.put_object(Body=f.read(),
                                 Bucket=analyzers_bucket,
                                 Key=s3_key)
Beispiel #3
0
    def test_multiple_model_versions_pos(
        boto_client: S3Client, boto_bucket: Tuple[Bucket, str]
    ) -> None:
        """Test that onnx_file_to_s3 will correctly create multiple model objects of the
        same model with versioning metadata.

        Parameters
        ----------
        boto_client : S3Client
            Client to use for uploading new objects.
        boto_bucket : Tuple[Bucket, str]
            boto3 S3 bucket and the bucket name.
        """
        # Upload the same model twice
        boto_bucket, model_bucket = boto_bucket
        model_name = "simple_model"
        for version in range(2):
            onnx_file_to_s3(
                "tests/data/simple_onnx_model.onnx", model_bucket, model_name, version
            )
        models = list(boto_bucket.objects.all())
        assert len(models) == 1

        # Check version metadata added correctly
        versions = boto_bucket.object_versions.filter(Prefix=model_name)
        assert len(list(versions)) == 2
        for version, version_int in zip(versions, [1, 0]):
            tags = boto_client.get_object_tagging(
                Bucket=model_bucket, Key=model_name, VersionId=version.id
            )["TagSet"]
            for tag in tags:
                if tag["Key"] == "model-version":
                    assert tag["Value"] == str(version_int)
Beispiel #4
0
    def delete_all_object_versions(bucket: Bucket, s3_client: S3Client) -> None:
        """Deletes every object and every object's versions inside of an s3 bucket.

        Parameters
        ----------
        bucket : Bucket
            S3 bucket to delete objects from.
        s3_client : S3Client
            S3 client to delete objects with.
        """
        model_objects = bucket.objects.all()
        for model_object in model_objects:
            versions = bucket.object_versions.filter(Prefix=model_object.key)
            for version in versions:
                s3_client.delete_object(
                    Bucket=TEST_BUCKET_NAME, Key=model_object.key, VersionId=version.id,
                )
Beispiel #5
0
def _list_repo_file_etag(s3_client: S3Client, prefix: str) -> Optional[dict]:
    repo_file = s3_client.list_objects_v2(Bucket=SCYLLA_REPO_BUCKET,
                                          Prefix=prefix)
    if repo_file["KeyCount"] != 1:
        LOGGER.debug("No such file `%s' in %s bucket", prefix,
                     SCYLLA_REPO_BUCKET)
        return None
    return repo_file["Contents"][0]["ETag"]
Beispiel #6
0
 def should_create_storage_bucket_public_access_block(self, s3_client: S3Client) -> None:
     """Test if Geostore Storage S3 Bucket access is blocked for public."""
     response = s3_client.get_public_access_block(Bucket=self.storage_bucket_name)
     public_access_block_configuration = response["PublicAccessBlockConfiguration"]
     assert public_access_block_configuration["BlockPublicAcls"] is True
     assert public_access_block_configuration["IgnorePublicAcls"] is True
     assert public_access_block_configuration["BlockPublicPolicy"] is True
     assert public_access_block_configuration["RestrictPublicBuckets"] is True
Beispiel #7
0
def wait_for_s3_key(bucket_name: str, key: str, s3_client: S3Client) -> None:

    process_timeout = datetime.now() + timedelta(minutes=3)
    while CONTENTS_KEY not in s3_client.list_objects(Bucket=bucket_name,
                                                     Prefix=key):
        assert (  # pragma: no cover
            datetime.now() < process_timeout
        ), f"S3 file '{bucket_name}/{key}' was not created, process timed out."
        time.sleep(5)  # pragma: no cover
Beispiel #8
0
def delete_s3_versions(bucket_name: str,
                       version_list: List[ObjectIdentifierTypeDef],
                       s3_client: S3Client) -> None:
    for index in range(0, len(version_list), DELETE_OBJECTS_MAX_KEYS):
        response = s3_client.delete_objects(
            Bucket=bucket_name,
            Delete=DeleteTypeDef(
                Objects=version_list[index:index + DELETE_OBJECTS_MAX_KEYS]),
        )
        print(response)
Beispiel #9
0
 def _download_remote_blob(s3_client: S3Client, bucket: str,
                           key: str) -> Optional["StructuredConfig"]:
     try:
         obj = s3_client.get_object(Bucket=bucket, Key=key)
         return json.loads(obj["Body"].read())
     except Exception:
         logger.debug(
             "Could not successfully download and parse any pre-existing config"
         )
         return None
Beispiel #10
0
def test_bucket_name_conflict_pos(boto_client: S3Client) -> None:
    """Test that initialize_s3 will create not create a bucket if the bucket does
    exist in the s3 instance.

    Parameters
    ----------
    boto_client : S3Client
        Pytest fixture yielding a s3 client.
    """
    # Create bucket
    bucket_name = "test-conflict"
    boto_client.create_bucket(Bucket=bucket_name)

    # Init bucket and check it's still in the bucket list
    initialize_s3(bucket_name)
    buckets = boto_client.list_buckets()["Buckets"]
    in_buckets = False
    for bucket in buckets:
        if bucket["Name"] == bucket_name:
            in_buckets = True
            break
    assert in_buckets
Beispiel #11
0
    def test_no_conflict_pos(boto_client: S3Client) -> None:
        """Test that verify_model_version will not raise an error if a model already exists
        in S3, but does not have a conflicting version.

        Parameters
        ----------
        boto_client : S3Client
            Client to use for uploading new objects.
        """
        model_name = "test_model"
        with NamedTemporaryFile() as tempfile:
            tempfile.write(b"test data")
            boto_client.upload_fileobj(
                tempfile, TEST_BUCKET_NAME, model_name,
            )
            version = 1
            boto_client.put_object_tagging(
                Bucket=TEST_BUCKET_NAME,
                Key=model_name,
                Tagging={"TagSet": [{"Key": "model-version", "Value": str(version)}]},
            )
        verify_model_version(TEST_BUCKET_NAME, model_name, 2, boto_client)
Beispiel #12
0
def get_s3_prefix_versions(
        bucket_name: str, prefix: str,
        s3_client: S3Client) -> List[ObjectIdentifierTypeDef]:
    version_list: List[ObjectIdentifierTypeDef] = []
    object_versions_paginator = s3_client.get_paginator("list_object_versions")
    for object_versions_page in object_versions_paginator.paginate(
            Bucket=bucket_name, Prefix=prefix):
        for version in object_versions_page.get("Versions", []):
            version_list.append({
                "Key": version["Key"],
                "VersionId": version["VersionId"]
            })
    assert version_list, version_list
    return version_list
Beispiel #13
0
    def test_conflicting_versions_neg(boto_client: S3Client) -> None:
        """Test that verify_model_version will raise an error if a model and given version
        already exists in S3.

        Parameters
        ----------
        boto_client : S3Client
            Client to use for uploading new objects.
        """
        model_name = "test_model"
        with NamedTemporaryFile() as tempfile:
            tempfile.write(b"test data")
            boto_client.upload_fileobj(
                tempfile, TEST_BUCKET_NAME, model_name,
            )
            version = 2
            boto_client.put_object_tagging(
                Bucket=TEST_BUCKET_NAME,
                Key=model_name,
                Tagging={"TagSet": [{"Key": "model-version", "Value": str(version)}]},
            )
        with pytest.raises(ValueError) as err:
            verify_model_version(TEST_BUCKET_NAME, model_name, version, boto_client)
        assert "already exists" in str(err.value)
Beispiel #14
0
def verify_model_version(model_bucket: str, model_name: str,
                         model_version: str, s3_client: S3Client) -> None:
    """Verify that the given model and version do not conflict with pre-existing S3
    objects.

    Parameters
    ----------
    model_bucket : str
        Bucket that models are uploaded to.
    model_name : str
        Name of the model and bucket object.
    model_version : str
        New version for a model.
    s3_client : S3Client
        S3 client for connecting to s3.

    Raises
    ------
    ValueError
        If the given model and version already exist in S3.
    """
    bucket = boto3.resource(
        "s3",
        endpoint_url=os.getenv("S3_URL", "http://localhost:9000"),
        aws_access_key_id=os.getenv("S3_ACCESS_KEY", "minioadmin"),
        aws_secret_access_key=os.getenv("S3_SECRET_KEY", "minioadmin"),
        config=botocore.config.Config(signature_version="s3v4"),
        region_name=os.getenv("S3_REGION", "us-east-1"),
        verify=False,
    ).Bucket(model_bucket)
    for version in bucket.object_versions.filter(Prefix=model_name):
        tags = s3_client.get_object_tagging(Bucket=model_bucket,
                                            Key=model_name,
                                            VersionId=version.id)["TagSet"]
        for tag in tags:
            if tag["Key"] == "model-version" and tag["Value"] == str(
                    model_version):
                logging.error(
                    "Given already existing model version for serializing")
                raise ValueError(
                    f"Given model version '{model_version}', already exists, please "
                    "try again with a new version")
def get_s3_key_versions(bucket_name: str, key: str,
                        s3_client: S3Client) -> List[ObjectIdentifierTypeDef]:
    version_list: List[ObjectIdentifierTypeDef] = []
    object_versions_paginator = s3_client.get_paginator("list_object_versions")
    for object_versions_page in object_versions_paginator.paginate(
            Bucket=bucket_name, Prefix=key):
        for marker in object_versions_page.get("DeleteMarkers", []):
            if marker["Key"] == key:
                version_list.append({
                    "Key": marker["Key"],
                    "VersionId": marker["VersionId"]
                })
        for version in object_versions_page.get("Versions", []):
            if version["Key"] == key:
                version_list.append({
                    "Key": version["Key"],
                    "VersionId": version["VersionId"]
                })
    assert version_list, version_list
    return version_list
Beispiel #16
0
 def should_enable_storage_bucket_versioning(self, s3_client: S3Client) -> None:
     """Test if Geostore Storage S3 Bucket versioning is enabled."""
     response = s3_client.get_bucket_versioning(Bucket=self.storage_bucket_name)
     assert response["Status"] == "Enabled"
Beispiel #17
0
 def should_create_storage_bucket_location_constraint(self, s3_client: S3Client) -> None:
     """Test if Geostore Storage S3 Bucket is created in correct region."""
     response = s3_client.get_bucket_location(Bucket=self.storage_bucket_name)
     assert response["LocationConstraint"] == "ap-southeast-2"
Beispiel #18
0
def test_bucket_name_conflict_enable_version_pos(boto_client: S3Client) -> None:
    """Test that initialize_s3 will create not create a bucket if the bucket does
    exist in the s3 instance and it will enable versioning.

    Parameters
    ----------
    boto_client : S3Client
        Pytest fixture yielding a s3 client.
    """
    # Create bucket and disable versioning on the bucket just made
    bucket_name = "test-conflict"
    boto_client.create_bucket(Bucket=bucket_name)
    boto_client.put_bucket_versioning(
        Bucket=bucket_name, VersioningConfiguration={"Status": "Suspended"},
    )

    # Init bucket and check it's in the bucket list and with versioning
    initialize_s3(bucket_name)
    buckets = boto_client.list_buckets()["Buckets"]
    in_buckets = False
    for bucket in buckets:
        if bucket["Name"] == bucket_name:
            in_buckets = True
            break
    assert in_buckets
    assert boto_client.get_bucket_versioning(Bucket=bucket_name)["Status"] == "Enabled"


    class SimpleMLP(torch.nn.Module):
        """Simple neural network for testing purposes.

        Parameters
        ----------
        layer_sizes : list
            A list of the layer sizes inclusive of the input
            and output layers
        classification : bool
            If True, softmax is applied to the output. Otherwise,
            L2 normalization is performed.
        """

        def __init__(
            self, layer_sizes=[256, 128, 8], classification=True,
        ):
            super().__init__()

            self.classification = classification
            modules = []
            for i, s in enumerate(layer_sizes[1:]):
                modules += [torch.nn.Linear(layer_sizes[i], s)]
                if i + 1 != len(layer_sizes) - 1:
                    modules += [torch.nn.ReLU()]
            self.net = torch.nn.Sequential(*modules)

        def forward(self, input_tensor: torch.Tensor) -> torch.Tensor:
            """Function for einputecuting the forward pass of a torch nn model.

            Parameters
            ----------
            input_tensor : torch.Tensor
                Input to the model.

            Returns
            -------
            torch.tensor
                Result of the forward pass of the network.
            """
            input_tensor = input_tensor.reshape(
                input_tensor.shape[0], input_tensor.shape[-1]
            )
            input_tensor = self.net(input_tensor)
            if not self.classification:
                input_tensor = torch.nn.functional.normalize(input_tensor)
            return input_tensor


    # serialize_model_to_file


    def test_serialize_to_temp_file_pos() -> None:
        """Test that serialize_model_to_file will correctly serialize and deserialize from
        temporary file.
        """
        # Serialize a simple model to a tempfile
        model = SimpleMLP()
        dummy_forward_input = torch.randn(1, 1, 256).to(
            "cuda" if torch.cuda.is_available() else "cpu"
        )
        with NamedTemporaryFile() as tempfile:
            serialize_model_to_file(
                model,
                tempfile.name,
                dummy_forward_input,
                ["output"],
                {"input": {0: "batch_size"}},
            )
            tempfile.seek(0)

            # Deserialize the model from the tempfile
            onnx_options = onnxruntime.SessionOptions()
            onnxruntime.InferenceSession(
                str(tempfile.name), sess_options=onnx_options,
            )


    @pytest.fixture
    def boto_bucket() -> Iterator[Tuple[Bucket, str]]:
        """Pytest fixture that yields an S3 bucket for testing and the name of that bucket.

        Yields
        -------
        Iterator[Tuple[Bucket, str]]
            The s3 bucket and the name of the bucket.
        """
        s3 = boto3.resource(
            "s3",
            endpoint_url=os.getenv("S3_URL", "http://localhost:9000"),
            aws_access_key_id=os.getenv("S3_ACCESS_KEY", "minioadmin"),
            aws_secret_access_key=os.getenv("S3_SECRET_KEY", "minioadmin"),
            config=botocore.config.Config(signature_version="s3v4"),
            region_name=os.getenv("S3_REGION", "us-east-1"),
            verify=False,
        )
        yield s3.Bucket(TEST_BUCKET_NAME), TEST_BUCKET_NAME


    # pytorch_to_onnx_file


    def delete_all_object_versions(bucket: Bucket, s3_client: S3Client) -> None:
        """Deletes every object and every object's versions inside of an s3 bucket.

        Parameters
        ----------
        bucket : Bucket
            S3 bucket to delete objects from.
        s3_client : S3Client
            S3 client to delete objects with.
        """
        model_objects = bucket.objects.all()
        for model_object in model_objects:
            versions = bucket.object_versions.filter(Prefix=model_object.key)
            for version in versions:
                s3_client.delete_object(
                    Bucket=TEST_BUCKET_NAME, Key=model_object.key, VersionId=version.id,
                )


    @pytest.fixture(autouse=True)
    def clean_models() -> None:
        """Pytest fixture for clearing the test models after running a test function."""
        s3_client = create_s3_client()
        s3 = boto3.resource(
            "s3",
            aws_access_key_id=environ["AWS_ACCESS_KEY"],
            aws_secret_access_key=environ["AWS_SECRET_KEY"],
            endpoint_url=environ["S3ENDPOINT_URL"],
        )
        bucket = s3.Bucket(TEST_BUCKET_NAME)
        delete_all_object_versions(bucket, s3_client)
        yield
        delete_all_object_versions(bucket, s3_client)


    def deserialize_model_from_file(file_path: Union[str, Path]) -> None:
        """Deserializes an onnx model from a file.

        Parameters
        ----------
        file_path : Union[str, Path]
            Path to the onnx model to deserialize.
        """
        onnx_options = onnxruntime.SessionOptions()
        onnxruntime.InferenceSession(
            file_path, sess_options=onnx_options,
        )


    def test_to_file_pos() -> None:
        """Test that pytorch_to_bucket will correctly serialize a model to file."""
        # Serialize and check the model can be deserialized after
        model = SimpleMLP()
        dummy_forward_input = torch.randn(1, 1, 256).to(
            "cuda" if torch.cuda.is_available() else "cpu"
        )
        with NamedTemporaryFile() as tempfile:
            pytorch_to_onnx_file(
                model,
                tempfile.name,
                1,
                256,
                dynamic_shape=False,
                dummy_forward_input=dummy_forward_input,
            )
            deserialize_model_from_file(tempfile.name)



    def test_no_existing_objects_pos(boto_client: S3Client) -> None:
        """Test that verify_model_version won't raise any error if there are no objects by
        the name of the given model.

        Parameters
        ----------
        boto_client : S3Client
            Client to use for uploading new objects.
        """
        verify_model_version(TEST_BUCKET_NAME, "test_model", 0, boto_client)


    def test_no_conflict_pos(boto_client: S3Client) -> None:
        """Test that verify_model_version will not raise an error if a model already exists
        in S3, but does not have a conflicting version.

        Parameters
        ----------
        boto_client : S3Client
            Client to use for uploading new objects.
        """
        model_name = "test_model"
        with NamedTemporaryFile() as tempfile:
            tempfile.write(b"test data")
            boto_client.upload_fileobj(
                tempfile, TEST_BUCKET_NAME, model_name,
            )
            version = 1
            boto_client.put_object_tagging(
                Bucket=TEST_BUCKET_NAME,
                Key=model_name,
                Tagging={"TagSet": [{"Key": "model-version", "Value": str(version)}]},
            )
        verify_model_version(TEST_BUCKET_NAME, model_name, 2, boto_client)


    def test_conflicting_versions_neg(boto_client: S3Client) -> None:
        """Test that verify_model_version will raise an error if a model and given version
        already exists in S3.

        Parameters
        ----------
        boto_client : S3Client
            Client to use for uploading new objects.
        """
        model_name = "test_model"
        with NamedTemporaryFile() as tempfile:
            tempfile.write(b"test data")
            boto_client.upload_fileobj(
                tempfile, TEST_BUCKET_NAME, model_name,
            )
            version = 2
            boto_client.put_object_tagging(
                Bucket=TEST_BUCKET_NAME,
                Key=model_name,
                Tagging={"TagSet": [{"Key": "model-version", "Value": str(version)}]},
            )
        with pytest.raises(ValueError) as err:
            verify_model_version(TEST_BUCKET_NAME, model_name, version, boto_client)
        assert "already exists" in str(err.value)



    def test_multiple_model_versions_pos(
        boto_client: S3Client, boto_bucket: Tuple[Bucket, str]
    ) -> None:
        """Test that onnx_file_to_s3 will correctly create multiple model objects of the
        same model with versioning metadata.

        Parameters
        ----------
        boto_client : S3Client
            Client to use for uploading new objects.
        boto_bucket : Tuple[Bucket, str]
            boto3 S3 bucket and the bucket name.
        """
        # Upload the same model twice
        boto_bucket, model_bucket = boto_bucket
        model_name = "simple_model"
        for version in range(2):
            onnx_file_to_s3(
                "tests/data/simple_onnx_model.onnx", model_bucket, model_name, version
            )
        models = list(boto_bucket.objects.all())
        assert len(models) == 1

        # Check version metadata added correctly
        versions = boto_bucket.object_versions.filter(Prefix=model_name)
        assert len(list(versions)) == 2
        for version, version_int in zip(versions, [1, 0]):
            tags = boto_client.get_object_tagging(
                Bucket=model_bucket, Key=model_name, VersionId=version.id
            )["TagSet"]
            for tag in tags:
                if tag["Key"] == "model-version":
                    assert tag["Value"] == str(version_int)