def test_ecs_mxnet_inference_gluonnlp_gpu(mxnet_inference, ecs_container_instance, region, gpu_only, py3_only): worker_instance_id, ecs_cluster_arn = ecs_container_instance public_ip_address = ec2_utils.get_public_ip(worker_instance_id, region=region) num_gpus = ec2_utils.get_instance_num_gpus(worker_instance_id, region=region) model_name = "bert_sst" service_name = task_family = revision = None try: service_name, task_family, revision = ecs_utils.setup_ecs_inference_service( mxnet_inference, "mxnet", ecs_cluster_arn, model_name, worker_instance_id, num_gpus=num_gpus, region=region) inference_result = request_mxnet_inference_gluonnlp(public_ip_address) assert inference_result, f"Failed to perform inference at IP address: {public_ip_address}" finally: ecs_utils.tear_down_ecs_inference_service(ecs_cluster_arn, service_name, task_family, revision)
def run_ec2_mxnet_inference(image_uri, model_name, container_tag, ec2_connection, processor, region, target_port, target_management_port): repo_name, image_tag = image_uri.split("/")[-1].split(":") container_name = f"{repo_name}-{image_tag}-ec2-{container_tag}" docker_cmd = "nvidia-docker" if "gpu" in image_uri else "docker" mms_inference_cmd = test_utils.get_inference_run_command(image_uri, model_name, processor) docker_run_cmd = ( f"{docker_cmd} run -itd --name {container_name}" f" -p {target_port}:8080 -p {target_management_port}:8081" f" {image_uri} {mms_inference_cmd}" ) try: ec2_connection.run( f"$(aws ecr get-login --no-include-email --region {region})", hide=True ) LOGGER.info(docker_run_cmd) ec2_connection.run(docker_run_cmd, hide=True) if model_name == SQUEEZENET_MODEL: inference_result = test_utils.request_mxnet_inference( port=target_port, connection=ec2_connection, model="squeezenet" ) elif model_name == BERT_MODEL: inference_result = test_utils.request_mxnet_inference_gluonnlp( port=target_port, connection=ec2_connection ) elif model_name == RESNET_EIA_MODEL: inference_result = test_utils.request_mxnet_inference( port=target_port, connection=ec2_connection, model=model_name ) assert ( inference_result ), f"Failed to perform mxnet {model_name} inference test for image: {image_uri} on ec2" finally: ec2_connection.run(f"docker rm -f {container_name}", warn=True, hide=True)
def test_eks_mxnet_gluonnlp_inference(mxnet_inference, py3_only): if "eia" in mxnet_inference: pytest.skip("Skipping EKS Test for EIA") num_replicas = "1" rand_int = random.randint(4001, 6000) processor = "gpu" if "gpu" in mxnet_inference else "cpu" model = "https://aws-dlc-sample-models.s3.amazonaws.com/bert_sst/bert_sst.mar" yaml_path = os.path.join( os.sep, "tmp", f"mxnet_single_node_gluonnlp_{processor}_inference_{rand_int}.yaml") inference_service_name = selector_name = f"gluonnlp-service-{processor}-{rand_int}" search_replace_dict = { "<MODELS>": model, "<NUM_REPLICAS>": num_replicas, "<SELECTOR_NAME>": selector_name, "<INFERENCE_SERVICE_NAME>": inference_service_name, "<DOCKER_IMAGE_BUILD_ID>": mxnet_inference } if processor == "gpu": search_replace_dict["<NUM_GPUS>"] = "1" eks_utils.write_eks_yaml_file_from_template( eks_utils.get_single_node_inference_template_path("mxnet", processor), yaml_path, search_replace_dict) try: run("kubectl apply -f {}".format(yaml_path)) port_to_forward = random.randint(49152, 65535) if eks_utils.is_service_running(selector_name): eks_utils.eks_forward_port_between_host_and_container( selector_name, port_to_forward, "8080") assert test_utils.request_mxnet_inference_gluonnlp( port=port_to_forward) except ValueError as excp: eks_utils.LOGGER.error("Service is not running: %s", excp) finally: run(f"kubectl delete deployment {selector_name}") run(f"kubectl delete service {selector_name}")