def run_ec2_mxnet_inference(image_uri, model_name, container_tag, ec2_connection, processor, region, target_port, target_management_port): repo_name, image_tag = image_uri.split("/")[-1].split(":") container_name = f"{repo_name}-{image_tag}-ec2-{container_tag}" docker_cmd = "nvidia-docker" if "gpu" in image_uri else "docker" mms_inference_cmd = test_utils.get_inference_run_command(image_uri, model_name, processor) docker_run_cmd = ( f"{docker_cmd} run -itd --name {container_name}" f" -p {target_port}:8080 -p {target_management_port}:8081" f" {image_uri} {mms_inference_cmd}" ) try: ec2_connection.run( f"$(aws ecr get-login --no-include-email --region {region})", hide=True ) LOGGER.info(docker_run_cmd) ec2_connection.run(docker_run_cmd, hide=True) if model_name == SQUEEZENET_MODEL: inference_result = test_utils.request_mxnet_inference( port=target_port, connection=ec2_connection, model="squeezenet" ) elif model_name == BERT_MODEL: inference_result = test_utils.request_mxnet_inference_gluonnlp( port=target_port, connection=ec2_connection ) elif model_name == RESNET_EIA_MODEL: inference_result = test_utils.request_mxnet_inference( port=target_port, connection=ec2_connection, model=model_name ) assert ( inference_result ), f"Failed to perform mxnet {model_name} inference test for image: {image_uri} on ec2" finally: ec2_connection.run(f"docker rm -f {container_name}", warn=True, hide=True)
def test_ecs_mxnet_inference_gpu(mxnet_inference, ecs_container_instance, region, gpu_only): worker_instance_id, ecs_cluster_arn = ecs_container_instance public_ip_address = ec2_utils.get_public_ip(worker_instance_id, region=region) num_gpus = ec2_utils.get_instance_num_gpus(worker_instance_id, region=region) model_name = "squeezenet" service_name = task_family = revision = None try: service_name, task_family, revision = ecs_utils.setup_ecs_inference_service( mxnet_inference, "mxnet", ecs_cluster_arn, model_name, worker_instance_id, num_gpus=num_gpus, region=region) inference_result = request_mxnet_inference(public_ip_address) assert inference_result, f"Failed to perform inference at IP address: {public_ip_address}" finally: ecs_utils.tear_down_ecs_inference_service(ecs_cluster_arn, service_name, task_family, revision)
def test_ecs_mxnet_inference_eia(mxnet_inference_eia, ecs_container_instance, ei_accelerator_type, region, eia_only): worker_instance_id, ecs_cluster_arn = ecs_container_instance public_ip_address = ec2_utils.get_public_ip(worker_instance_id, region=region) model_name = "resnet-152-eia" service_name = task_family = revision = None try: service_name, task_family, revision = ecs_utils.setup_ecs_inference_service( mxnet_inference_eia, "mxnet", ecs_cluster_arn, model_name, worker_instance_id, ei_accelerator_type, region=region, ) inference_result = request_mxnet_inference(public_ip_address, model="resnet-152-eia") assert inference_result, f"Failed to perform inference at IP address: {public_ip_address}" finally: ecs_utils.tear_down_ecs_inference_service(ecs_cluster_arn, service_name, task_family, revision)
def test_eks_mxnet_neuron_inference(mxnet_inference, neuron_only): if "eia" in mxnet_inference or "neuron" not in mxnet_inference: pytest.skip("Skipping EKS Neuron Test for EIA and Non Neuron Images") num_replicas = "1" rand_int = random.randint(4001, 6000) processor = "neuron" model = "mxnet-resnet50=https://aws-dlc-sample-models.s3.amazonaws.com/mxnet/Resnet50-neuron.mar" yaml_path = os.path.join(os.sep, "tmp", f"mxnet_single_node_{processor}_inference_{rand_int}.yaml") inference_service_name = selector_name = f"resnet50-{processor}-{rand_int}" search_replace_dict = { "<MODELS>": model, "<NUM_REPLICAS>": num_replicas, "<SELECTOR_NAME>": selector_name, "<INFERENCE_SERVICE_NAME>": inference_service_name, "<DOCKER_IMAGE_BUILD_ID>": mxnet_inference } search_replace_dict["<NUM_INF1S>"] = "1" device_plugin_path = eks_utils.get_device_plugin_path("mxnet", processor) eks_utils.write_eks_yaml_file_from_template( eks_utils.get_single_node_inference_template_path("mxnet", processor), yaml_path, search_replace_dict ) try: # TODO - once eksctl gets the latest neuron device plugin this can be removed run("kubectl delete -f {}".format(device_plugin_path)) sleep(60) run("kubectl apply -f {}".format(device_plugin_path)) sleep(10) run("kubectl apply -f {}".format(yaml_path)) port_to_forward = random.randint(49152, 65535) if eks_utils.is_service_running(selector_name): eks_utils.eks_forward_port_between_host_and_container(selector_name, port_to_forward, "8080") assert test_utils.request_mxnet_inference(port=port_to_forward, model="mxnet-resnet50") except ValueError as excp: eks_utils.LOGGER.error("Service is not running: %s", excp) finally: run("kubectl cluster-info dump") run(f"kubectl delete deployment {selector_name}") run(f"kubectl delete service {selector_name}")
def test_eks_mxnet_squeezenet_inference(mxnet_inference): if "eia" in mxnet_inference or "neuron" in mxnet_inference: pytest.skip("Skipping EKS Test for EIA and neuron images") num_replicas = "1" rand_int = random.randint(4001, 6000) processor = "gpu" if "gpu" in mxnet_inference else "cpu" model = "squeezenet=https://s3.amazonaws.com/model-server/models/squeezenet_v1.1/squeezenet_v1.1.model" yaml_path = os.path.join( os.sep, "tmp", f"mxnet_single_node_{processor}_inference_{rand_int}.yaml") inference_service_name = selector_name = f"squeezenet-service-{rand_int}" search_replace_dict = { "<MODELS>": model, "<NUM_REPLICAS>": num_replicas, "<SELECTOR_NAME>": selector_name, "<INFERENCE_SERVICE_NAME>": inference_service_name, "<DOCKER_IMAGE_BUILD_ID>": mxnet_inference } if processor == "gpu": search_replace_dict["<NUM_GPUS>"] = "1" eks_utils.write_eks_yaml_file_from_template( eks_utils.get_single_node_inference_template_path("mxnet", processor), yaml_path, search_replace_dict) try: run("kubectl apply -f {}".format(yaml_path)) port_to_forward = random.randint(49152, 65535) if eks_utils.is_service_running(selector_name): eks_utils.eks_forward_port_between_host_and_container( selector_name, port_to_forward, "8080") assert test_utils.request_mxnet_inference(port=port_to_forward) except ValueError as excp: eks_utils.LOGGER.error("Service is not running: %s", excp) finally: run(f"kubectl delete deployment {selector_name}") run(f"kubectl delete service {selector_name}")
def test_eks_mxnet_neuron_inference(mxnet_inference, neuron_only): if "eia" in mxnet_inference or "neuron" not in mxnet_inference: pytest.skip("Skipping EKS Neuron Test for EIA and Non Neuron Images") num_replicas = "1" rand_int = random.randint(4001, 6000) processor = "neuron" server_cmd = "/usr/local/bin/entrypoint.sh -m mxnet-resnet50=https://aws-dlc-sample-models.s3.amazonaws.com/mxnet/Resnet50-neuron.mar -t /home/model-server/config.properties" yaml_path = os.path.join( os.sep, "tmp", f"mxnet_single_node_{processor}_inference_{rand_int}.yaml") inference_service_name = selector_name = f"resnet50-{processor}-{rand_int}" search_replace_dict = { "<NUM_REPLICAS>": num_replicas, "<SELECTOR_NAME>": selector_name, "<INFERENCE_SERVICE_NAME>": inference_service_name, "<DOCKER_IMAGE_BUILD_ID>": mxnet_inference, "<SERVER_CMD>": server_cmd, } search_replace_dict["<NUM_INF1S>"] = "1" eks_utils.write_eks_yaml_file_from_template( eks_utils.get_single_node_inference_template_path("mxnet", processor), yaml_path, search_replace_dict) try: run("kubectl apply -f {}".format(yaml_path)) port_to_forward = random.randint(49152, 65535) if eks_utils.is_service_running(selector_name): eks_utils.eks_forward_port_between_host_and_container( selector_name, port_to_forward, "8080") assert test_utils.request_mxnet_inference(port=port_to_forward, model="mxnet-resnet50") finally: run(f"kubectl delete deployment {selector_name}") run(f"kubectl delete service {selector_name}")
def __test_eks_mxnet_squeezenet_inference(mxnet_inference): num_replicas = "1" rand_int = random.randint(4001, 6000) processor = "gpu" if "gpu" in mxnet_inference else "cpu" test_type = test_utils.get_eks_k8s_test_type_label(mxnet_inference) model = "squeezenet=https://s3.amazonaws.com/model-server/models/squeezenet_v1.1/squeezenet_v1.1.model" yaml_path = os.path.join( os.sep, "tmp", f"mxnet_single_node_{processor}_inference_{rand_int}.yaml") inference_service_name = selector_name = f"squeezenet-service-{rand_int}" search_replace_dict = { "<MODELS>": model, "<NUM_REPLICAS>": num_replicas, "<SELECTOR_NAME>": selector_name, "<INFERENCE_SERVICE_NAME>": inference_service_name, "<DOCKER_IMAGE_BUILD_ID>": mxnet_inference, "<TEST_TYPE>": test_type, } if processor == "gpu": search_replace_dict["<NUM_GPUS>"] = "1" eks_utils.write_eks_yaml_file_from_template( eks_utils.get_single_node_inference_template_path("mxnet", processor), yaml_path, search_replace_dict) try: run("kubectl apply -f {}".format(yaml_path)) port_to_forward = random.randint(49152, 65535) if eks_utils.is_service_running(selector_name): eks_utils.eks_forward_port_between_host_and_container( selector_name, port_to_forward, "8080") assert test_utils.request_mxnet_inference(port=port_to_forward) finally: run(f"kubectl delete deployment {selector_name}") run(f"kubectl delete service {selector_name}")