def test_python_version(image): """ Check that the python version in the image tag is the same as the one on a running container. :param image: ECR image URI """ ctx = Context() container_name = get_container_name("py-version", image) py_version = "" for tag_split in image.split("-"): if tag_split.startswith("py"): if len(tag_split) > 3: py_version = f"Python {tag_split[2]}.{tag_split[3]}" else: py_version = f"Python {tag_split[2]}" start_container(container_name, image, ctx) output = run_cmd_on_container(container_name, ctx, "python --version") container_py_version = output.stdout # Due to py2 deprecation, Python2 version gets streamed to stderr. Python installed via Conda also appears to # stream to stderr, hence the pytorch condition. if "Python 2" in py_version or "pytorch" in image: container_py_version = output.stderr assert py_version in container_py_version, f"Cannot find {py_version} in {container_py_version}"
def test_framework_version_cpu(cpu): """ Check that the framework version in the image tag is the same as the one on a running container. :param cpu: ECR image URI with "cpu" in the name """ image = cpu if "tensorflow-inference" in image: pytest.skip(msg="TF inference does not have core tensorflow installed") tested_framework, tag_framework_version = get_framework_and_version_from_tag( image) # Module name is torch if tested_framework == "pytorch": tested_framework = "torch" ctx = Context() container_name = get_container_name("framework-version", image) start_container(container_name, image, ctx) output = run_cmd_on_container( container_name, ctx, f"import {tested_framework}; print({tested_framework}.__version__)", executable="python") if is_canary_context(): assert tag_framework_version in output.stdout.strip() else: assert tag_framework_version == output.stdout.strip()
def test_sm_pysdk_2(training): """ Simply verify that we have sagemaker > 2.0 in the python sdk. If you find that this test is failing because sm pysdk version is not greater than 2.0, then that means that the image under test needs to be updated. If you find that the training image under test does not have sagemaker pysdk, it should be added or explicitly skipped (with reasoning provided). :param training: training ECR image URI """ # Ensure that sm py sdk 2 is on the container ctx = Context() container_name = get_container_name("sm_pysdk", training) start_container(container_name, training, ctx) sm_version = run_cmd_on_container( container_name, ctx, "import sagemaker; print(sagemaker.__version__)", executable="python").stdout.strip() assert Version(sm_version) > Version( "2"), f"Sagemaker version should be > 2.0. Found version {sm_version}"
def test_sagemaker_studio_analytics_extension(training, package_name): framework, framework_version = test_utils.get_framework_and_version_from_tag( training) utility_package_framework_version_limit = { "pytorch": SpecifierSet(">=1.7,<1.9"), "tensorflow": SpecifierSet(">=2.4,<2.7,!=2.5.*") } if (framework not in utility_package_framework_version_limit or Version(framework_version) not in utility_package_framework_version_limit[framework]): pytest.skip( f"sagemaker_studio_analytics_extension is not installed in {framework} {framework_version} DLCs" ) ctx = Context() container_name = test_utils.get_container_name( f"sagemaker_studio_analytics_extension-{package_name}", training) test_utils.start_container(container_name, training, ctx) # Optionally add version validation in the following steps, rather than just printing it. test_utils.run_cmd_on_container(container_name, ctx, f"pip list | grep -i {package_name}") import_package = package_name.replace("-", "_") import_test_cmd = (f"import {import_package}" if package_name in [ "sagemaker-studio-sparkmagic-lib", "sagemaker-studio-analytics-extension" ] else f"import {import_package}; print({import_package}.__version__)") test_utils.run_cmd_on_container(container_name, ctx, import_test_cmd, executable="python")
def test_pandas(image): """ It's possible that in newer python versions, we may have issues with installing pandas due to lack of presence of the bz2 module in py3 containers. This is a sanity test to ensure that pandas import works properly in all containers. :param image: ECR image URI """ ctx = Context() container_name = get_container_name("pandas", image) start_container(container_name, image, ctx) # Make sure we can install pandas, do not fail right away if there are pip check issues run_cmd_on_container(container_name, ctx, "pip install pandas", warn=True) pandas_import_output = run_cmd_on_container(container_name, ctx, "import pandas", executable="python") assert ( not pandas_import_output.stdout.strip() ), f"Expected no output when importing pandas, but got {pandas_import_output.stdout}" # Simple import test to ensure we do not get a bz2 module import failure run_cmd_on_container(container_name, ctx, "import pandas; print(pandas.__version__)", executable="python")
def test_dataclasses_check(image): """ Ensure there is no dataclasses pip package is installed for python 3.7 and above version. Python version retrieved from the ecr image uri is expected in the format `py<major_verion><minor_version>` :param image: ECR image URI """ ctx = Context() pip_package = "dataclasses" container_name = get_container_name("dataclasses-check", image) python_version = get_python_version_from_image_uri(image).replace("py", "") python_version = int(python_version) if python_version >= 37: start_container(container_name, image, ctx) output = run_cmd_on_container(container_name, ctx, f"pip show {pip_package}", warn=True) if output.return_code == 0: pytest.fail( f"{pip_package} package exists in the DLC image {image} that has py{python_version} version which is greater than py36 version" ) else: LOGGER.info( f"{pip_package} package does not exists in the DLC image {image}" ) else: pytest.skip( f"Skipping test for DLC image {image} that has py36 version as {pip_package} is not included in the python framework" )
def test_framework_version_cpu(image): """ Check that the framework version in the image tag is the same as the one on a running container. This function tests CPU, EIA, and Neuron images. :param image: ECR image URI """ if "gpu" in image: pytest.skip("GPU images will have their framework version tested in test_framework_and_cuda_version_gpu") image_repo_name, _ = get_repository_and_tag_from_image_uri(image) if re.fullmatch(r"(pr-|beta-|nightly-)?tensorflow-inference(-eia)?", image_repo_name): pytest.skip(msg="TF inference for CPU/GPU/EIA does not have core tensorflow installed") tested_framework, tag_framework_version = get_framework_and_version_from_tag(image) # Framework name may include huggingface tested_framework = tested_framework.lstrip("huggingface_") # Module name is torch if tested_framework == "pytorch": tested_framework = "torch" ctx = Context() container_name = get_container_name("framework-version", image) start_container(container_name, image, ctx) output = run_cmd_on_container( container_name, ctx, f"import {tested_framework}; print({tested_framework}.__version__)", executable="python" ) if is_canary_context(): assert tag_framework_version in output.stdout.strip() else: assert tag_framework_version == output.stdout.strip()
def test_utility_packages_using_import(training): """ Verify that utility packages are installed in the Training DLC image :param training: training ECR image URI """ #TODO: revert once habana is supported on SM if "hpu" in training: pytest.skip("Skipping test for Habana images as SM is not yet supported") ctx = Context() container_name = test_utils.get_container_name("utility_packages_using_import", training) test_utils.start_container(container_name, training, ctx) framework, framework_version = test_utils.get_framework_and_version_from_tag(training) utility_package_minimum_framework_version = { "mxnet": "1.8", "pytorch": "1.7", "huggingface_pytorch": "1.7", "tensorflow2": "2.4", "tensorflow1": "1.15", "huggingface_tensorflow": "2.4", } if framework == "tensorflow": framework = "tensorflow1" if framework_version.startswith("1.") else "tensorflow2" if Version(framework_version) < Version(utility_package_minimum_framework_version[framework]): pytest.skip("Extra utility packages will be added going forward.") packages_to_import = UTILITY_PACKAGES_IMPORT for package in packages_to_import: version = test_utils.run_cmd_on_container(container_name, ctx, f"import {package}; print({package}.__version__)", executable="python").stdout.strip() if package == "sagemaker": assert Version(version) > Version("2"), f"Sagemaker version should be > 2.0. Found version {version}"
def test_torchvision_nms_inference(pytorch_inference): """ Check that the internally built torchvision binary is used to resolve the missing nms issue. :param pytorch_inference: framework fixture for pytorch inference """ _, framework_version = get_framework_and_version_from_tag( pytorch_inference) if Version(framework_version) == Version( "1.5.1") and get_processor_from_image_uri( pytorch_inference) == "gpu": pytest.skip("Skipping this test for PT 1.5.1 GPU Inference DLC images") if "eia" in pytorch_inference and Version(framework_version) < Version( "1.5.1"): pytest.skip( "This test does not apply to PT EIA images for PT versions less than 1.5.1" ) if "neuron" in pytorch_inference: pytest.skip( "Skipping because this is not relevant to PT Neuron images") ctx = Context() container_name = get_container_name("torchvision-nms", pytorch_inference) start_container(container_name, pytorch_inference, ctx) run_cmd_on_container( container_name, ctx, f"import torch; import torchvision; print(torch.ops.torchvision.nms)", executable="python")
def test_repo_anaconda_not_present(image): """Test to see if all packages installed in the image do not come from repo.anaconda.com""" try: ctx = Context() container_name = test_utils.get_container_name("anaconda", image) test_utils.start_container(container_name, image, ctx) # First check to see if image has conda installed, if not, skip test since no packages installed from conda present conda_present = test_utils.run_cmd_on_container( container_name, ctx, "find . -name conda -not -path \"**/.github/*\"").stdout.strip() if not conda_present: pytest.skip( f"Image {image} does not have conda installed, skipping test.") # Commands are split in 2 because if warn=True, then even if first command fails silently, no error is raised test_utils.run_cmd_on_container( container_name, ctx, "conda list --explicit > repo_list.txt") grep_result = test_utils.run_cmd_on_container( container_name, ctx, "grep repo.anaconda.com repo_list.txt", warn=True).stdout.strip() if grep_result: raise RuntimeError( f"Image {image} contains packages installed from repo.anaconda.com. " f"Please ensure that these packages are obtained through conda-forge or other alternatives: {grep_result}" ) finally: test_utils.stop_and_remove_container(container_name, ctx)
def test_framework_version_cpu(image): """ Check that the framework version in the image tag is the same as the one on a running container. This function tests CPU, EIA, and Neuron images. :param image: ECR image URI """ if "gpu" in image: pytest.skip( "GPU images will have their framework version tested in test_framework_and_cuda_version_gpu" ) image_repo_name, _ = get_repository_and_tag_from_image_uri(image) if re.fullmatch( r"(pr-|beta-|nightly-)?tensorflow-inference(-eia|-graviton)?", image_repo_name): pytest.skip( msg= "TF inference for CPU/GPU/EIA does not have core tensorflow installed" ) tested_framework, tag_framework_version = get_framework_and_version_from_tag( image) # Framework name may include huggingface if tested_framework.startswith('huggingface_'): tested_framework = tested_framework[len("huggingface_"):] # Module name is torch if tested_framework == "pytorch": tested_framework = "torch" elif tested_framework == "autogluon": tested_framework = "autogluon.core" ctx = Context() container_name = get_container_name("framework-version", image) start_container(container_name, image, ctx) output = run_cmd_on_container( container_name, ctx, f"import {tested_framework}; print({tested_framework}.__version__)", executable="python") if is_canary_context(): assert tag_framework_version in output.stdout.strip() else: if tested_framework == "autogluon.core": assert output.stdout.strip().startswith(tag_framework_version) elif tested_framework == "torch" and Version( tag_framework_version) >= Version("1.10.0"): torch_version_pattern = r"{torch_version}(\+cpu)".format( torch_version=tag_framework_version) assert re.fullmatch( torch_version_pattern, output.stdout.strip() ), (f"torch.__version__ = {output.stdout.strip()} does not match {torch_version_pattern}\n" f"Please specify framework version as X.Y.Z+cpu") else: if "neuron" in image: assert tag_framework_version in output.stdout.strip() else: assert tag_framework_version == output.stdout.strip() stop_and_remove_container(container_name, ctx)
def test_apache_tomcat(image): """ Temporary canary test """ ctx = Context() container_name = test_utils.get_container_name("tomcat", image) test_utils.start_container(container_name, image, ctx) tomcat_output = test_utils.run_cmd_on_container(container_name, ctx, "find / -name *tomcat*").stdout.strip() if tomcat_output: raise RuntimeError(f"Found tomcat installation in {image}. See output: {tomcat_output}")
def test_torchvision_nms_training(pytorch_training): """ Check that the internally built torchvision binary is used to resolve the missing nms issue. :param pytorch_training: framework fixture for pytorch training """ image = pytorch_training ctx = Context() container_name = get_container_name("torchvision-nms", image) start_container(container_name, image, ctx) run_cmd_on_container( container_name, ctx, f"import torch; import torchvision; print(torch.ops.torchvision.nms)", executable="python" )
def test_boto3(mxnet_inference): """ Ensure that boto3 is installed on mxnet inference :param mxnet_inference: ECR image URI """ image = mxnet_inference ctx = Context() container_name = test_utils.get_container_name("boto3", image) test_utils.start_container(container_name, image, ctx) test_utils.run_cmd_on_container(container_name, ctx, 'import boto3', executable="python")
def test_framework_and_neuron_sdk_version(neuron): """ Gets the neuron sdk tag from the image. For that neuron sdk and the frame work version from the image, it gets the expected frame work version. Then checks that the expected framework version same as the one on a running container. This function test only Neuron images. :param image: ECR image URI """ image = neuron tested_framework, neuron_tag_framework_version = get_neuron_framework_and_version_from_tag( image) # neuron tag is there in pytorch images for now. Once all frameworks have it, then this will # be removed if neuron_tag_framework_version is None: if tested_framework is "pytorch": assert neuron_tag_framework_version != None else: pytest.skip(msg="Neuron SDK tag is not there as part of image") # Framework name may include huggingface if tested_framework.startswith('huggingface_'): tested_framework = tested_framework[len("huggingface_"):] if tested_framework == "pytorch": tested_framework = "torch_neuron" elif tested_framework == "tensorflow": tested_framework = "tensorflow_neuron" elif tested_framework == "mxnet": tested_framework = "mxnet" ctx = Context() container_name = get_container_name("framework-version-neuron", image) start_container(container_name, image, ctx) output = run_cmd_on_container( container_name, ctx, f"import {tested_framework}; print({tested_framework}.__version__)", executable="python") if tested_framework == "mxnet": # TODO -For neuron the mx_neuron module does not support the __version__ yet and we # can get the version of only the base mxnet model. The base mxnet model just # has framework version and does not have the neuron semantic version yet. Till # the mx_neuron supports __version__ do the minimal check and not exact match _, tag_framework_version = get_framework_and_version_from_tag(image) assert tag_framework_version == output.stdout.strip() else: assert neuron_tag_framework_version == output.stdout.strip() stop_and_remove_container(container_name, ctx)
def test_emacs(image): """ Ensure that emacs is installed on every image :param image: ECR image URI """ ctx = Context() container_name = get_container_name("emacs", image) start_container(container_name, image, ctx) # Make sure the following emacs sanity tests exit with code 0 run_cmd_on_container(container_name, ctx, "which emacs") run_cmd_on_container(container_name, ctx, "emacs -version")
def test_awscli(mxnet_inference): """ Ensure that boto3 is installed on mxnet inference :param mxnet_inference: ECR image URI """ image = mxnet_inference ctx = Context() container_name = test_utils.get_container_name("awscli", image) test_utils.start_container(container_name, image, ctx) test_utils.run_cmd_on_container(container_name, ctx, "which aws") test_utils.run_cmd_on_container(container_name, ctx, "aws --version")
def test_torchvision_nms_inference(pytorch_inference): """ Check that the internally built torchvision binary is used to resolve the missing nms issue. :param pytorch_inference: framework fixture for pytorch inference """ if "neuron" in pytorch_inference: pytest.skip("Skipping because this is not relevant to PT Neuron images") image = pytorch_inference ctx = Context() container_name = get_container_name("torchvision-nms", image) start_container(container_name, image, ctx) run_cmd_on_container( container_name, ctx, f"import torch; import torchvision; print(torch.ops.torchvision.nms)", executable="python" )
def test_pt_s3_sanity(pytorch_training, pt17_and_above_only): """ Check that the internally built PT S3 binary is properly installed. :param pytorch_training: framework fixture for pytorch training """ _, framework_version = get_framework_and_version_from_tag(pytorch_training) if Version(framework_version) < Version("1.8"): pytest.skip("S3 plugin sanity is supported on PyTorch version >=1.8") ctx = Context() container_name = get_container_name("pt-s3", pytorch_training) start_container(container_name, pytorch_training, ctx) s3_path = 's3://pt-s3plugin-test-data-west2/test_0.JPEG' run_cmd_on_container( container_name, ctx, f"import awsio; print(awsio.__version__); from awsio.python.lib.io.s3.s3dataset import file_exists; print(file_exists(\""+s3_path+"\"))", executable="python" )
def test_tf_serving_version_cpu(tensorflow_inference): """ For non-huggingface non-GPU TF inference images, check that the tag version matches the version of TF serving in the container. Huggingface includes MMS and core TF, hence the versioning scheme is based off of the underlying tensorflow framework version, rather than the TF serving version. GPU inference images will be tested along side `test_framework_and_cuda_version_gpu` in order to be judicious about GPU resources. This test can run directly on the host, and thus does not require additional resources to be spun up. @param tensorflow_inference: ECR image URI """ # Set local variable to clarify contents of fixture image = tensorflow_inference if "gpu" in image: pytest.skip( "GPU images will have their framework version tested in test_framework_and_cuda_version_gpu" ) if "neuron" in image: pytest.skip( "Neuron images will have their framework version tested in test_framework_and_neuron_sdk_version" ) _, tag_framework_version = get_framework_and_version_from_tag(image) image_repo_name, _ = get_repository_and_tag_from_image_uri(image) if re.fullmatch(r"(pr-|beta-|nightly-)?tensorflow-inference", image_repo_name) and Version( tag_framework_version) == Version("2.6.3"): pytest.skip( "Skipping this test for TF 2.6.3 inference as the v2.6.3 version is already on production" ) ctx = Context() container_name = get_container_name("tf-serving-version", image) start_container(container_name, image, ctx) output = run_cmd_on_container(container_name, ctx, "tensorflow_model_server --version", executable="bash") assert re.match(rf"TensorFlow ModelServer: {tag_framework_version}(\D+)?", output.stdout), \ f"Cannot find model server version {tag_framework_version} in {output.stdout}" stop_and_remove_container(container_name, ctx)
def test_torchvision_nms_training(pytorch_training): """ Check that the internally built torchvision binary is used to resolve the missing nms issue. :param pytorch_training: framework fixture for pytorch training """ _, framework_version = get_framework_and_version_from_tag(pytorch_training) if Version(framework_version) == Version( "1.5.1") and get_processor_from_image_uri( pytorch_training) == "gpu": pytest.skip("Skipping this test for PT 1.5.1 GPU Training DLC images") ctx = Context() container_name = get_container_name("torchvision-nms", pytorch_training) start_container(container_name, pytorch_training, ctx) run_cmd_on_container( container_name, ctx, f"import torch; import torchvision; print(torch.ops.torchvision.nms)", executable="python")
def test_ubuntu_version(image): """ Check that the ubuntu version in the image tag is the same as the one on a running container. :param image: ECR image URI """ ctx = Context() container_name = get_container_name("ubuntu-version", image) ubuntu_version = "" for tag_split in image.split("-"): if tag_split.startswith("ubuntu"): ubuntu_version = tag_split.split("ubuntu")[-1] start_container(container_name, image, ctx) output = run_cmd_on_container(container_name, ctx, "cat /etc/os-release") container_ubuntu_version = output.stdout assert "Ubuntu" in container_ubuntu_version assert ubuntu_version in container_ubuntu_version
def test_stray_files(image): """ Test to ensure that unnecessary build artifacts are not present in any easily visible or tmp directories :param image: ECR image URI """ ctx = Context() container_name = get_container_name("test_tmp_dirs", image) start_container(container_name, image, ctx) # Running list of artifacts/artifact regular expressions we do not want in any of the directories stray_artifacts = [r"\.py"] # Running list of allowed files in the /tmp directory allowed_tmp_files = ["hsperfdata_root"] # Ensure stray artifacts are not in the tmp directory tmp = run_cmd_on_container(container_name, ctx, "ls -A /tmp") _assert_artifact_free(tmp, stray_artifacts) # Ensure tmp dir is empty except for whitelisted files tmp_files = tmp.stdout.split() for tmp_file in tmp_files: assert ( tmp_file in allowed_tmp_files ), f"Found unexpected file in tmp dir: {tmp_file}. Allowed tmp files: {allowed_tmp_files}" # We always expect /var/tmp to be empty var_tmp = run_cmd_on_container(container_name, ctx, "ls -A /var/tmp") _assert_artifact_free(var_tmp, stray_artifacts) assert var_tmp.stdout.strip() == "" # Additional check of home and root directories to ensure that stray artifacts are not present home = run_cmd_on_container(container_name, ctx, "ls -A ~") _assert_artifact_free(home, stray_artifacts) root = run_cmd_on_container(container_name, ctx, "ls -A /") _assert_artifact_free(root, stray_artifacts)
def test_utility_packages_using_import(training): """ Verify that utility packages are installed in the Training DLC image :param training: training ECR image URI """ ctx = Context() container_name = test_utils.get_container_name( "utility_packages_using_import", training) test_utils.start_container(container_name, training, ctx) framework, framework_version = test_utils.get_framework_and_version_from_tag( training) utility_package_minimum_framework_version = { "mxnet": "1.8", "pytorch": "1.7", "tensorflow2": "2.4", "tensorflow1": "1.15", } framework = "tensorflow1" if framework == "tensorflow" and framework_version.startswith( "1.") else "tensorflow2" if Version(framework_version) < Version( utility_package_minimum_framework_version[framework]): pytest.skip("Extra utility packages will be added going forward.") for package in UTILITY_PACKAGES_IMPORT: version = test_utils.run_cmd_on_container( container_name, ctx, f"import {package}; print({package}.__version__)", executable="python").stdout.strip() if package == "sagemaker": assert Version(version) > Version( "2" ), f"Sagemaker version should be > 2.0. Found version {sm_version}"
def test_framework_version_cpu(image): """ Check that the framework version in the image tag is the same as the one on a running container. This function tests CPU, EIA images. :param image: ECR image URI """ if "gpu" in image: pytest.skip( "GPU images will have their framework version tested in test_framework_and_cuda_version_gpu" ) if "neuron" in image: pytest.skip( "Neuron images will have their framework version tested in test_framework_and_neuron_sdk_version" ) image_repo_name, _ = get_repository_and_tag_from_image_uri(image) if re.fullmatch( r"(pr-|beta-|nightly-)?tensorflow-inference(-eia|-graviton)?", image_repo_name): pytest.skip( "Non-gpu tensorflow-inference images will be tested in test_tf_serving_version_cpu." ) tested_framework, tag_framework_version = get_framework_and_version_from_tag( image) # Framework name may include huggingface if tested_framework.startswith('huggingface_'): tested_framework = tested_framework[len("huggingface_"):] # Module name is torch if tested_framework == "pytorch": tested_framework = "torch" elif tested_framework == "autogluon": tested_framework = "autogluon.core" ctx = Context() container_name = get_container_name("framework-version", image) start_container(container_name, image, ctx) output = run_cmd_on_container( container_name, ctx, f"import {tested_framework}; print({tested_framework}.__version__)", executable="python") if is_canary_context(): assert tag_framework_version in output.stdout.strip() else: if tested_framework == "autogluon.core": version_to_check = "0.3.1" if tag_framework_version == "0.3.2" else tag_framework_version assert output.stdout.strip().startswith(version_to_check) # Habana v1.2 binary does not follow the X.Y.Z+cpu naming convention elif "habana" not in image_repo_name: if tested_framework == "torch" and Version( tag_framework_version) >= Version("1.10.0"): torch_version_pattern = r"{torch_version}(\+cpu)".format( torch_version=tag_framework_version) assert re.fullmatch( torch_version_pattern, output.stdout.strip() ), (f"torch.__version__ = {output.stdout.strip()} does not match {torch_version_pattern}\n" f"Please specify framework version as X.Y.Z+cpu") else: if "neuron" in image: assert tag_framework_version in output.stdout.strip() if all(_string in image for _string in ["pytorch", "habana", "synapseai1.3.0"]): # Habana Pytorch version looks like 1.10.0a0+gitb488e78 for SynapseAI1.3 PT1.10.1 images pt_fw_version_pattern = r"(\d+(\.\d+){1,2}(-rc\d)?)((a0\+git\w{7}))" pt_fw_version_match = re.fullmatch(pt_fw_version_pattern, output.stdout.strip()) # This is desired for PT1.10.1 images assert pt_fw_version_match.group(1) == "1.10.0" else: assert tag_framework_version == output.stdout.strip() stop_and_remove_container(container_name, ctx)
def test_oss_compliance(image): """ Run oss compliance check on a container to check if license attribution files exist. And upload source of third party packages to S3 bucket. """ THIRD_PARTY_SOURCE_CODE_BUCKET = "aws-dlinfra-licenses" THIRD_PARTY_SOURCE_CODE_BUCKET_PATH = "third_party_source_code" file = "THIRD_PARTY_SOURCE_CODE_URLS" container_name = get_container_name("oss_compliance", image) context = Context() local_repo_path = get_repository_local_path() start_container(container_name, image, context) # run compliance test to make sure license attribution files exists. testOSSCompliance is copied as part of Dockerfile run_cmd_on_container(container_name, context, "/usr/local/bin/testOSSCompliance /root") try: context.run( f"docker cp {container_name}:/root/{file} {os.path.join(local_repo_path, file)}" ) finally: context.run(f"docker rm -f {container_name}", hide=True) s3_resource = boto3.resource("s3") with open(os.path.join(local_repo_path, file)) as source_code_file: for line in source_code_file: name, version, url = line.split(" ") file_name = f"{name}_v{version}_source_code" s3_object_path = f"{THIRD_PARTY_SOURCE_CODE_BUCKET_PATH}/{file_name}.tar.gz" local_file_path = os.path.join(local_repo_path, file_name) for i in range(3): try: if not os.path.isdir(local_file_path): context.run( f"git clone {url.rstrip()} {local_file_path}") context.run( f"tar -czvf {local_file_path}.tar.gz {local_file_path}" ) except Exception as e: time.sleep(1) if i == 2: LOGGER.error(f"Unable to clone git repo. Error: {e}") raise continue try: if os.path.exists(f"{local_file_path}.tar.gz"): LOGGER.info(f"Uploading package to s3 bucket: {line}") s3_resource.Object(THIRD_PARTY_SOURCE_CODE_BUCKET, s3_object_path).load() except botocore.exceptions.ClientError as e: if e.response["Error"]["Code"] == "404": try: # using aws cli as using boto3 expects to upload folder by iterating through each file instead of entire folder. context.run( f"aws s3 cp {local_file_path}.tar.gz s3://{THIRD_PARTY_SOURCE_CODE_BUCKET}/{s3_object_path}" ) object = s3_resource.Bucket( THIRD_PARTY_SOURCE_CODE_BUCKET).Object( s3_object_path) object.Acl().put(ACL="public-read") except ClientError as e: LOGGER.error( f"Unable to upload source code to bucket {THIRD_PARTY_SOURCE_CODE_BUCKET}. Error: {e}" ) raise else: LOGGER.error( f"Unable to check if source code is present on bucket {THIRD_PARTY_SOURCE_CODE_BUCKET}. Error: {e}" ) raise