def run_sagemaker_local_tests(images): """ Function to run the SageMaker Local tests :param images: <list> List of all images to be used in SageMaker tests """ if not images: return # Run sagemaker Local tests framework, _ = get_framework_and_version_from_tag(images[0]) sm_tests_path = os.path.join("test", "sagemaker_tests", framework) sm_tests_tar_name = "sagemaker_tests.tar.gz" run(f"tar -cz --exclude='*.pytest_cache' --exclude='__pycache__' -f {sm_tests_tar_name} {sm_tests_path}") ec2_client = boto3.client("ec2", config=Config(retries={"max_attempts": 10}), region_name=DEFAULT_REGION) for image in images: sm_utils.execute_local_tests(image, ec2_client)
def run_sagemaker_local_tests(images): """ Function to run the SageMaker Local tests :param images: <list> List of all images to be used in SageMaker tests """ if not images: return # Run sagemaker Local tests framework, _ = get_framework_and_version_from_tag(images[0]) sm_tests_path = os.path.join("test", "sagemaker_tests", framework) sm_tests_tar_name = "sagemaker_tests.tar.gz" run(f"tar -cz --exclude='*.pytest_cache' --exclude='__pycache__' -f {sm_tests_tar_name} {sm_tests_path}") pool_number = len(images) with Pool(pool_number) as p: p.map(sm_utils.execute_local_tests, images)
def run_sagemaker_local_tests(images, pytest_cache_params): """ Function to run the SageMaker Local tests :param images: <list> List of all images to be used in SageMaker tests :param pytest_cache_params: <dict> dictionary with data required for pytest cache handler """ if not images: return # Run sagemaker Local tests framework, _ = get_framework_and_version_from_tag(images[0]) sm_tests_path = (os.path.join("test", "sagemaker_tests", framework) if "huggingface" not in framework else os.path.join( "test", "sagemaker_tests", "huggingface*")) sm_tests_tar_name = "sagemaker_tests.tar.gz" run(f"tar -cz --exclude='*.pytest_cache' --exclude='__pycache__' -f {sm_tests_tar_name} {sm_tests_path}" ) pool_number = len(images) with Pool(pool_number) as p: p.starmap(sm_utils.execute_local_tests, [[image, pytest_cache_params] for image in images])
def generate_sagemaker_pytest_cmd(image, sagemaker_test_type): """ Parses the image ECR url and returns appropriate pytest command :param image: ECR url of image :param sagemaker_test_type: local or remote test type :return: <tuple> pytest command to be run, path where it should be executed, image tag """ region = os.getenv("AWS_REGION", DEFAULT_REGION) account_id = os.getenv("ACCOUNT_ID", image.split(".")[0]) print("image name {}".format(image)) sm_remote_docker_base_name, tag = image.split("/")[1].split(":") sm_local_docker_repo_uri = image.split(":")[0] # Assign instance type instance_type = assign_sagemaker_remote_job_instance_type(image) # Get path to test directory find_path = sm_remote_docker_base_name.split("-") # NOTE: We are relying on the fact that repos are defined as <context>-<framework>-<job_type> in our infrastructure framework, framework_version = get_framework_and_version_from_tag(image) framework_major_version = framework_version.split(".")[0] job_type = get_job_type_from_image(image) path = os.path.join("test", "sagemaker_tests", framework, job_type) aws_id_arg = "--aws-id" docker_base_arg = "--docker-base-name" instance_type_arg = "--instance-type" accelerator_type_arg = "--accelerator-type" framework_version_arg = "--framework-version" eia_arg = "ml.eia1.large" processor = ("neuron" if "neuron" in image else "gpu" if "gpu" in image else "eia" if "eia" in image else "cpu") py_version = re.search(r"py\d+", tag).group() sm_local_py_version = "37" if py_version == "py37" else "38" if py_version == "py38" else "2" if py_version == "py27" else "3" if framework == "tensorflow" and job_type == "inference": # Tf Inference tests have an additional sub directory with test integration_path = os.path.join("test", "integration", sagemaker_test_type) else: integration_path = os.path.join("integration", sagemaker_test_type) # Conditions for modifying tensorflow SageMaker pytest commands if framework == "tensorflow" and sagemaker_test_type == SAGEMAKER_REMOTE_TEST_TYPE: if job_type == "inference": aws_id_arg = "--registry" docker_base_arg = "--repo" instance_type_arg = "--instance-types" framework_version_arg = "--versions" integration_path = os.path.join( integration_path, "test_tfs.py") if processor != "eia" else os.path.join( integration_path, "test_ei.py") if framework == "tensorflow" and job_type == "training": aws_id_arg = "--account-id" test_report = os.path.join(os.getcwd(), "test", f"{job_type}_{tag}.xml") local_test_report = os.path.join(UBUNTU_HOME_DIR, "test", f"{job_type}_{tag}_sm_local.xml") # Explanation of why we need the if-condition below: # We have separate Pipeline Actions that run EFA tests, which have the env variable "EFA_DEDICATED=True" configured # so that those Actions only run the EFA tests. # However, there is no such dedicated CB job dedicated to EFA tests in the PR context. This means that when in the # PR context, setting "DISABLE_EFA_TESTS" to True should skip EFA tests, but setting it to False should enable # not just the EFA tests, but also all other tests as well. if is_pr_context(): efa_tests_disabled = os.getenv("DISABLE_EFA_TESTS", "False").lower() == "true" efa_flag = "-m \"not efa\"" if efa_tests_disabled else "" else: efa_dedicated = os.getenv("EFA_DEDICATED", "False").lower() == "true" efa_flag = '--efa' if efa_dedicated else '-m \"not efa\"' region_list = ",".join(SAGEMAKER_EXECUTION_REGIONS) sagemaker_regions_list = f"--sagemaker-regions {region_list}" remote_pytest_cmd = ( f"pytest -rA {integration_path} --region {region} --processor {processor} {docker_base_arg} " f"{sm_remote_docker_base_name} --tag {tag} {framework_version_arg} {framework_version} " f"{aws_id_arg} {account_id} {instance_type_arg} {instance_type} {efa_flag} {sagemaker_regions_list} --junitxml {test_report}" ) if processor == "eia": remote_pytest_cmd += f"{accelerator_type_arg} {eia_arg}" local_pytest_cmd = ( f"pytest -s -v {integration_path} {docker_base_arg} " f"{sm_local_docker_repo_uri} --tag {tag} --framework-version {framework_version} " f"--processor {processor} {aws_id_arg} {account_id} --junitxml {local_test_report}" ) if framework == "tensorflow" and job_type != "inference": local_pytest_cmd = f"{local_pytest_cmd} --py-version {sm_local_py_version} --region {region}" if framework == "tensorflow" and job_type == "training": path = os.path.join(os.path.dirname(path), f"{framework}{framework_major_version}_training") if "huggingface" in framework and job_type == "inference": path = os.path.join("test", "sagemaker_tests", "huggingface", "inference") return ( remote_pytest_cmd if sagemaker_test_type == SAGEMAKER_REMOTE_TEST_TYPE else local_pytest_cmd, path, tag, job_type, )
def execute_local_tests(image, pytest_cache_params): """ Run the sagemaker local tests in ec2 instance for the image :param image: ECR url :param pytest_cache_params: parameters required for :param pytest_cache_util :return: None """ account_id = os.getenv( "ACCOUNT_ID", boto3.client("sts").get_caller_identity()["Account"]) pytest_cache_util = PytestCache(boto3.client("s3"), account_id) ec2_client = boto3.client("ec2", config=Config(retries={"max_attempts": 10}), region_name=DEFAULT_REGION) pytest_command, path, tag, job_type = generate_sagemaker_pytest_cmd( image, SAGEMAKER_LOCAL_TEST_TYPE) pytest_command += " --last-failed --last-failed-no-failures all " print(pytest_command) framework, _ = get_framework_and_version_from_tag(image) random.seed(f"{datetime.datetime.now().strftime('%Y%m%d%H%M%S%f')}") ec2_key_name = f"{job_type}_{tag}_sagemaker_{random.randint(1, 1000)}" region = os.getenv("AWS_REGION", DEFAULT_REGION) ec2_ami_id = UBUNTU_18_BASE_DLAMI_US_EAST_1 if region == "us-east-1" else UBUNTU_18_BASE_DLAMI_US_WEST_2 sm_tests_tar_name = "sagemaker_tests.tar.gz" ec2_test_report_path = os.path.join(UBUNTU_HOME_DIR, "test", f"{job_type}_{tag}_sm_local.xml") instance_id = "" ec2_conn = None try: key_file = generate_ssh_keypair(ec2_client, ec2_key_name) print(f"Launching new Instance for image: {image}") instance_id, ip_address = launch_sagemaker_local_ec2_instance( image, ec2_ami_id, ec2_key_name, region) ec2_conn = ec2_utils.get_ec2_fabric_connection(instance_id, key_file, region) ec2_conn.put(sm_tests_tar_name, f"{UBUNTU_HOME_DIR}") ec2_conn.run( f"$(aws ecr get-login --no-include-email --region {region})") try: ec2_conn.run(f"docker pull {image}", timeout=600) except invoke.exceptions.CommandTimedOut as e: output = ec2_conn.run( f"docker images {image} --format '{{.Repository}}:{{.Tag}}'" ).stdout.strip("\n") if output != image: raise DLCSageMakerLocalTestFailure( f"Image pull for {image} failed.\ndocker images output = {output}" ) from e ec2_conn.run(f"tar -xzf {sm_tests_tar_name}") kill_background_processes_and_run_apt_get_update(ec2_conn) with ec2_conn.cd(path): install_sm_local_dependencies(framework, job_type, image, ec2_conn, ec2_ami_id) pytest_cache_util.download_pytest_cache_from_s3_to_ec2( ec2_conn, path, **pytest_cache_params) # Workaround for mxnet cpu training images as test distributed # causes an issue with fabric ec2_connection if framework == "mxnet" and job_type == "training" and "cpu" in image: try: ec2_conn.run(pytest_command, timeout=1000, warn=True) except exceptions.CommandTimedOut as exc: print(f"Ec2 connection timed out for {image}, {exc}") finally: print(f"Downloading Test reports for image: {image}") ec2_conn.close() ec2_conn_new = ec2_utils.get_ec2_fabric_connection( instance_id, key_file, region) ec2_conn_new.get( ec2_test_report_path, os.path.join("test", f"{job_type}_{tag}_sm_local.xml")) output = subprocess.check_output( f"cat test/{job_type}_{tag}_sm_local.xml", shell=True, executable="/bin/bash") pytest_cache_util.upload_pytest_cache_from_ec2_to_s3( ec2_conn_new, path, **pytest_cache_params) if 'failures="0"' not in str(output): raise ValueError( f"Sagemaker Local tests failed for {image}") else: ec2_conn.run(pytest_command) print(f"Downloading Test reports for image: {image}") ec2_conn.get( ec2_test_report_path, os.path.join("test", f"{job_type}_{tag}_sm_local.xml")) finally: with ec2_conn.cd(path): pytest_cache_util.upload_pytest_cache_from_ec2_to_s3( ec2_conn, path, **pytest_cache_params) print(f"Terminating Instances for image: {image}") ec2_utils.terminate_instance(instance_id, region) print(f"Destroying ssh Key_pair for image: {image}") destroy_ssh_keypair(ec2_client, ec2_key_name) # return None here to prevent errors from multiprocessing.map(). Without this it returns some object by default # which is causing "cannot pickle '_thread.lock' object" error return None
def generate_sagemaker_pytest_cmd(image, sagemaker_test_type): """ Parses the image ECR url and returns appropriate pytest command :param image: ECR url of image :param sagemaker_test_type: local or remote test type :return: <tuple> pytest command to be run, path where it should be executed, image tag """ reruns = 4 region = os.getenv("AWS_REGION", DEFAULT_REGION) account_id = os.getenv("ACCOUNT_ID", image.split(".")[0]) print("image name {}".format(image)) sm_remote_docker_base_name, tag = image.split("/")[1].split(":") sm_local_docker_repo_uri = image.split(":")[0] # Assign instance type instance_type = assign_sagemaker_remote_job_instance_type(image) # Get path to test directory find_path = sm_remote_docker_base_name.split("-") # NOTE: We are relying on the fact that repos are defined as <context>-<framework>-<job_type> in our infrastructure framework, framework_version = get_framework_and_version_from_tag(image) job_type = get_job_type_from_image(image) path = os.path.join("test", "sagemaker_tests", framework, job_type) aws_id_arg = "--aws-id" docker_base_arg = "--docker-base-name" instance_type_arg = "--instance-type" accelerator_type_arg = "--accelerator-type" eia_arg = "ml.eia1.large" framework_version = re.search(r"\d+(\.\d+){2}", tag).group() framework_major_version = framework_version.split(".")[0] processor = "gpu" if "gpu" in image else "eia" if "eia" in image else "cpu" py_version = re.search(r"py\d+", tag).group() sm_local_py_version = "37" if py_version == "py37" else "2" if py_version == "py27" else "3" if framework == "tensorflow" and job_type == "inference": # Tf Inference tests have an additional sub directory with test integration_path = os.path.join("test", "integration", sagemaker_test_type) else: integration_path = os.path.join("integration", sagemaker_test_type) # Conditions for modifying tensorflow SageMaker pytest commands if framework == "tensorflow" and sagemaker_test_type == SAGEMAKER_REMOTE_TEST_TYPE: if job_type == "inference": aws_id_arg = "--registry" docker_base_arg = "--repo" instance_type_arg = "--instance-types" integration_path = os.path.join( integration_path, "test_tfs.py") if processor != "eia" else os.path.join( integration_path, "test_ei.py") if framework == "tensorflow" and job_type == "training": aws_id_arg = "--account-id" test_report = os.path.join(os.getcwd(), "test", f"{job_type}_{tag}.xml") local_test_report = os.path.join(UBUNTU_HOME_DIR, "test", f"{job_type}_{tag}_sm_local.xml") is_py3 = " python3 -m " remote_pytest_cmd = ( f"pytest {integration_path} --region {region} {docker_base_arg} " f"{sm_remote_docker_base_name} --tag {tag} {aws_id_arg} {account_id} " f"{instance_type_arg} {instance_type} --junitxml {test_report}") if processor == "eia": remote_pytest_cmd += (f" {accelerator_type_arg} {eia_arg}") local_pytest_cmd = ( f"{is_py3} pytest -v {integration_path} {docker_base_arg} " f"{sm_local_docker_repo_uri} --tag {tag} --framework-version {framework_version} " f"--processor {processor} {aws_id_arg} {account_id} --junitxml {local_test_report}" ) if framework == "tensorflow" and job_type != "inference": local_pytest_cmd = f"{local_pytest_cmd} --py-version {sm_local_py_version} --region {region}" if framework == "tensorflow" and job_type == "training": path = os.path.join(os.path.dirname(path), f"{framework}{framework_major_version}_training") return ( remote_pytest_cmd if sagemaker_test_type == SAGEMAKER_REMOTE_TEST_TYPE else local_pytest_cmd, path, tag, job_type, )
def execute_local_tests(image, ec2_client): """ Run the sagemaker local tests in ec2 instance for the image :param image: ECR url :param ec2_client: boto3_obj :return: None """ pytest_command, path, tag, job_type = generate_sagemaker_pytest_cmd( image, SAGEMAKER_LOCAL_TEST_TYPE) print(pytest_command) framework, _ = get_framework_and_version_from_tag(image) random.seed(f"{datetime.datetime.now().strftime('%Y%m%d%H%M%S%f')}") ec2_key_name = f"{job_type}_{tag}_sagemaker_{random.randint(1, 1000)}" region = os.getenv("AWS_REGION", DEFAULT_REGION) sm_tests_tar_name = "sagemaker_tests.tar.gz" ec2_test_report_path = os.path.join(UBUNTU_HOME_DIR, "test", f"{job_type}_{tag}_sm_local.xml") try: key_file = generate_ssh_keypair(ec2_client, ec2_key_name) print(f"Launching new Instance for image: {image}") instance_id, ip_address = launch_sagemaker_local_ec2_instance( image, UBUNTU_16_BASE_DLAMI_US_EAST_1 if region == "us-east-1" else UBUNTU_16_BASE_DLAMI_US_WEST_2, ec2_key_name, region) ec2_conn = ec2_utils.get_ec2_fabric_connection(instance_id, key_file, region) ec2_conn.put(sm_tests_tar_name, f"{UBUNTU_HOME_DIR}") ec2_conn.run( f"$(aws ecr get-login --no-include-email --region {region})") ec2_conn.run(f"docker pull {image}") ec2_conn.run(f"tar -xzf {sm_tests_tar_name}") with ec2_conn.cd(path): install_sm_local_dependencies(framework, job_type, image, ec2_conn) # Workaround for mxnet cpu training images as test distributed # causes an issue with fabric ec2_connection if framework == "mxnet" and job_type == "training" and "cpu" in image: try: ec2_conn.run(pytest_command, timeout=1000, warn=True) except exceptions.CommandTimedOut as exc: print(f"Ec2 connection timed out for {image}, {exc}") finally: print(f"Downloading Test reports for image: {image}") ec2_conn.close() ec2_conn_new = ec2_utils.get_ec2_fabric_connection( instance_id, key_file, region) ec2_conn_new.get( ec2_test_report_path, os.path.join("test", f"{job_type}_{tag}_sm_local.xml")) output = subprocess.check_output( f"cat test/{job_type}_{tag}_sm_local.xml", shell=True, executable="/bin/bash") if 'failures="0"' not in str(output): raise ValueError( f"Sagemaker Local tests failed for {image}") else: ec2_conn.run(pytest_command) print(f"Downloading Test reports for image: {image}") ec2_conn.get( ec2_test_report_path, os.path.join("test", f"{job_type}_{tag}_sm_local.xml")) finally: print(f"Terminating Instances for image: {image}") ec2_utils.terminate_instance(instance_id, region) print(f"Destroying ssh Key_pair for image: {image}") destroy_ssh_keypair(ec2_client, ec2_key_name)
def main(): # Define constants start_time = datetime.now() test_type = os.getenv("TEST_TYPE") efa_dedicated = os.getenv("EFA_DEDICATED", "False").lower() == "true" executor_mode = os.getenv("EXECUTOR_MODE", "False").lower() == "true" dlc_images = os.getenv("DLC_IMAGE") if executor_mode else get_dlc_images() # Executing locally ona can provide commit_id or may ommit it. Assigning default value for local executions: commit_id = os.getenv("CODEBUILD_RESOLVED_SOURCE_VERSION", default="unrecognised_commit_id") LOGGER.info(f"Images tested: {dlc_images}") all_image_list = dlc_images.split(" ") standard_images_list = [ image_uri for image_uri in all_image_list if "example" not in image_uri ] # Do not create EKS cluster for when EIA Only Images are present is_all_images_list_eia = all("eia" in image_uri for image_uri in all_image_list) eks_cluster_name = None benchmark_mode = "benchmark" in test_type or is_benchmark_dev_context() specific_test_type = re.sub( "benchmark-", "", test_type) if "benchmark" in test_type else test_type build_context = get_build_context() # quick_checks tests don't have images in it. Using a placeholder here for jobs like that try: framework, version = get_framework_and_version_from_tag( all_image_list[0]) except: framework, version = "general_test", "none" pytest_cache_params = { "commit_id": commit_id, "framework": framework, "version": version, "build_context": build_context, "test_type": test_type, } # In PR context, allow us to switch sagemaker tests to RC tests. # Do not allow them to be both enabled due to capacity issues. if specific_test_type == "sagemaker" and is_rc_test_context( ) and is_pr_context(): specific_test_type = "release_candidate_integration" test_path = os.path.join( "benchmark", specific_test_type) if benchmark_mode else specific_test_type # Skipping non HuggingFace/AG specific tests to execute only sagemaker tests is_hf_image_present = any("huggingface" in image_uri for image_uri in all_image_list) is_ag_image_present = any("autogluon" in image_uri for image_uri in all_image_list) if (is_hf_image_present or is_ag_image_present) and specific_test_type in ("ecs", "ec2", "eks", "bai"): # Creating an empty file for because codebuild job fails without it LOGGER.info( f"NOTE: {specific_test_type} tests not supported on HF or AG. Skipping..." ) report = os.path.join(os.getcwd(), "test", f"{test_type}.xml") sm_utils.generate_empty_report(report, test_type, "huggingface") return if specific_test_type in ( "sanity", "ecs", "ec2", "eks", "canary", "bai", "quick_checks", "release_candidate_integration", ): report = os.path.join(os.getcwd(), "test", f"{test_type}.xml") # The following two report files will only be used by EKS tests, as eks_train.xml and eks_infer.xml. # This is to sequence the tests and prevent one set of tests from waiting too long to be scheduled. report_train = os.path.join(os.getcwd(), "test", f"{test_type}_train.xml") report_infer = os.path.join(os.getcwd(), "test", f"{test_type}_infer.xml") report_multinode_train = os.path.join(os.getcwd(), "test", f"eks_multinode_train.xml") # PyTest must be run in this directory to avoid conflicting w/ sagemaker_tests conftests os.chdir(os.path.join("test", "dlc_tests")) # Pull images for necessary tests if specific_test_type == "sanity": pull_dlc_images(all_image_list) if specific_test_type == "bai": build_bai_docker_container() if specific_test_type == "eks" and not is_all_images_list_eia: frameworks_in_images = [ framework for framework in ("mxnet", "pytorch", "tensorflow") if framework in dlc_images ] if len(frameworks_in_images) != 1: raise ValueError( f"All images in dlc_images must be of a single framework for EKS tests.\n" f"Instead seeing {frameworks_in_images} frameworks.") framework = frameworks_in_images[0] eks_cluster_name = f"{framework}-{build_context}" eks_utils.eks_setup() if eks_utils.is_eks_cluster_active(eks_cluster_name): eks_utils.eks_write_kubeconfig(eks_cluster_name) else: raise Exception( f"EKS cluster {eks_cluster_name} is not in active state") # Execute dlc_tests pytest command pytest_cmd = [ "-s", "-rA", test_path, f"--junitxml={report}", "-n=auto" ] is_habana_image = any("habana" in image_uri for image_uri in all_image_list) if specific_test_type == "ec2": if is_habana_image: context = Context() context.run( "git clone https://github.com/HabanaAI/gaudi-test-suite.git" ) context.run( "tar -c -f gaudi-test-suite.tar.gz gaudi-test-suite") else: pytest_cmd += ["--reruns=1", "--reruns-delay=10"] if is_pr_context(): if specific_test_type == "eks": pytest_cmd.append("--timeout=2340") else: if is_habana_image: pytest_cmd.append("--timeout=18000") else: pytest_cmd.append("--timeout=4860") pytest_cmds = [pytest_cmd] # Execute separate cmd for canaries if specific_test_type in ("canary", "quick_checks"): pytest_cmds = [[ "-s", "-rA", f"--junitxml={report}", "-n=auto", f"--{specific_test_type}", "--ignore=container_tests/" ]] pytest_cmds = [ pytest_cmd + ["--last-failed", "--last-failed-no-failures", "all"] for pytest_cmd in pytest_cmds ] pytest_cache_util.download_pytest_cache_from_s3_to_local( os.getcwd(), **pytest_cache_params) try: # Note:- Running multiple pytest_cmds in a sequence will result in the execution log having two # separate pytest reports, both of which must be examined in case of a manual review of results. cmd_exit_statuses = [ pytest.main(pytest_cmd) for pytest_cmd in pytest_cmds ] if all([status == 0 for status in cmd_exit_statuses]): sys.exit(0) else: raise RuntimeError(pytest_cmds) finally: pytest_cache_util.upload_pytest_cache_from_local_to_s3( os.getcwd(), **pytest_cache_params) # Delete dangling EC2 KeyPairs if os.path.exists(KEYS_TO_DESTROY_FILE): delete_key_pairs(KEYS_TO_DESTROY_FILE) elif specific_test_type == "sagemaker": if "habana" in dlc_images: LOGGER.info(f"Skipping SM tests for Habana. Images: {dlc_images}") # Creating an empty file for because codebuild job fails without it report = os.path.join(os.getcwd(), "test", f"{test_type}.xml") sm_utils.generate_empty_report(report, test_type, "habana") return if benchmark_mode: if "neuron" in dlc_images: LOGGER.info( f"Skipping benchmark sm tests for Neuron. Images: {dlc_images}" ) # Creating an empty file for because codebuild job fails without it report = os.path.join(os.getcwd(), "test", f"{test_type}.xml") sm_utils.generate_empty_report(report, test_type, "neuron") return report = os.path.join(os.getcwd(), "test", f"{test_type}.xml") os.chdir(os.path.join("test", "dlc_tests")) setup_sm_benchmark_env(dlc_images, test_path) pytest_cmd = [ "-s", "-rA", test_path, f"--junitxml={report}", "-n=auto", "-o", "norecursedirs=resources" ] if not is_pr_context(): pytest_cmd += ["--efa"] if efa_dedicated else ["-m", "not efa"] sys.exit(pytest.main(pytest_cmd)) else: sm_remote_images = [ image for image in standard_images_list if not (("tensorflow-inference" in image and "py2" in image) or is_e3_image(image)) ] run_sagemaker_remote_tests(sm_remote_images, pytest_cache_params) if standard_images_list and not sm_remote_images: report = os.path.join(os.getcwd(), "test", f"{test_type}.xml") sm_utils.generate_empty_report(report, test_type, "sm_remote_unsupported") metrics_utils.send_test_duration_metrics(start_time) elif specific_test_type == "sagemaker-local": if "neuron" in dlc_images: LOGGER.info( f"Skipping sagemaker tests because Neuron is not yet supported on SM. Images: {dlc_images}" ) # Creating an empty file for because codebuild job fails without it report = os.path.join(os.getcwd(), "test", f"{test_type}.xml") sm_utils.generate_empty_report(report, test_type, "neuron") return if "habana" in dlc_images: LOGGER.info( f"Skipping sagemaker tests because Habana is not yet supported on SM. Images: {dlc_images}" ) # Creating an empty file for because codebuild job fails without it report = os.path.join(os.getcwd(), "test", f"{test_type}.xml") sm_utils.generate_empty_report(report, test_type, "habana") return testing_image_list = [ image for image in standard_images_list if not (("tensorflow-inference" in image and "py2" in image) or ("eia" in image) or (is_e3_image(image))) ] run_sagemaker_local_tests(testing_image_list, pytest_cache_params) # for EIA Images if len(testing_image_list) == 0: report = os.path.join(os.getcwd(), "test", f"{test_type}.xml") sm_utils.generate_empty_report(report, test_type, "eia") else: raise NotImplementedError( f"{test_type} test is not supported. Only support ec2, ecs, eks, sagemaker and sanity currently" )