def test_ecs_pytorch_inference_gpu(pytorch_inference, ecs_container_instance, region, gpu_only): worker_instance_id, ecs_cluster_arn = ecs_container_instance public_ip_address = ec2_utils.get_public_ip(worker_instance_id, region=region) num_gpus = ec2_utils.get_instance_num_gpus(worker_instance_id, region=region) model_name = "pytorch-densenet" service_name = task_family = revision = None try: service_name, task_family, revision = ecs_utils.setup_ecs_inference_service( pytorch_inference, "pytorch", ecs_cluster_arn, model_name, worker_instance_id, num_gpus=num_gpus, region=region) inference_result = request_pytorch_inference_densenet( public_ip_address) assert inference_result, f"Failed to perform inference at IP address: {public_ip_address}" finally: ecs_utils.tear_down_ecs_inference_service(ecs_cluster_arn, service_name, task_family, revision)
def ec2_pytorch_inference(image_uri, processor, ec2_connection, region): repo_name, image_tag = image_uri.split("/")[-1].split(":") container_name = f"{repo_name}-{image_tag}-ec2" model_name = "pytorch-densenet" mms_inference_cmd = test_utils.get_mms_run_command(model_name, processor) docker_cmd = "nvidia-docker" if "gpu" in image_uri else "docker" docker_run_cmd = (f"{docker_cmd} run -itd --name {container_name}" f" -p 80:8080 -p 8081:8081" f" {image_uri} {mms_inference_cmd}") try: ec2_connection.run( f"$(aws ecr get-login --no-include-email --region {region})", hide=True) LOGGER.info(docker_run_cmd) ec2_connection.run(docker_run_cmd, hide=True) inference_result = test_utils.request_pytorch_inference_densenet( connection=ec2_connection) assert ( inference_result ), f"Failed to perform pytorch inference test for image: {image_uri} on ec2" finally: ec2_connection.run(f"docker rm -f {container_name}", warn=True, hide=True)
def test_ecs_pytorch_inference_eia(pytorch_inference_eia, ecs_container_instance, ei_accelerator_type, region, eia_only): worker_instance_id, ecs_cluster_arn = ecs_container_instance public_ip_address = ec2_utils.get_public_ip(worker_instance_id, region=region) model_name = "pytorch-densenet" image_framework, image_framework_version = get_framework_and_version_from_tag( pytorch_inference_eia) if image_framework_version == "1.3.1": model_name = "pytorch-densenet-v1-3-1" service_name = task_family = revision = None try: service_name, task_family, revision = ecs_utils.setup_ecs_inference_service( pytorch_inference_eia, "pytorch", ecs_cluster_arn, model_name, worker_instance_id, ei_accelerator_type, region=region) inference_result = request_pytorch_inference_densenet( public_ip_address) assert inference_result, f"Failed to perform inference at IP address: {public_ip_address}" finally: ecs_utils.tear_down_ecs_inference_service(ecs_cluster_arn, service_name, task_family, revision)
def test_ecs_pytorch_inference_neuron(pytorch_inference_neuron, ecs_container_instance, region): worker_instance_id, ecs_cluster_arn = ecs_container_instance public_ip_address = ec2_utils.get_public_ip(worker_instance_id, region=region) num_neurons = ec2_utils.get_instance_num_inferentias(worker_instance_id, region=region) model_name = "pytorch-resnet-neuron" service_name = task_family = revision = None try: service_name, task_family, revision = ecs_utils.setup_ecs_inference_service( pytorch_inference_neuron, "pytorch", ecs_cluster_arn, model_name, worker_instance_id, num_neurons=num_neurons, region=region) server_type = get_inference_server_type(pytorch_inference_neuron) inference_result = request_pytorch_inference_densenet( public_ip_address, server_type=server_type, model_name=model_name) assert inference_result, f"Failed to perform inference at IP address: {public_ip_address}" finally: ecs_utils.tear_down_ecs_inference_service(ecs_cluster_arn, service_name, task_family, revision)
def test_eks_pytorch_densenet_inference(pytorch_inference): server_type = test_utils.get_inference_server_type(pytorch_inference) if "eia" in pytorch_inference: pytest.skip("Skipping EKS Test for EIA") elif "neuron" in pytorch_inference: pytest.skip( "Neuron specific test is run and so skipping this test for Neuron") elif server_type == "ts": model = "pytorch-densenet=https://torchserve.s3.amazonaws.com/mar_files/densenet161.mar" server_cmd = "torchserve" else: model = "pytorch-densenet=https://dlc-samples.s3.amazonaws.com/pytorch/multi-model-server/densenet/densenet.mar" server_cmd = "multi-model-server" num_replicas = "1" rand_int = random.randint(4001, 6000) processor = "gpu" if "gpu" in pytorch_inference else "cpu" yaml_path = os.path.join( os.sep, "tmp", f"pytorch_single_node_{processor}_inference_{rand_int}.yaml") inference_service_name = selector_name = f"densenet-service-{processor}-{rand_int}" search_replace_dict = { "<MODELS>": model, "<NUM_REPLICAS>": num_replicas, "<SELECTOR_NAME>": selector_name, "<INFERENCE_SERVICE_NAME>": inference_service_name, "<DOCKER_IMAGE_BUILD_ID>": pytorch_inference, "<SERVER_TYPE>": server_type, "<SERVER_CMD>": server_cmd } if processor == "gpu": search_replace_dict["<NUM_GPUS>"] = "1" eks_utils.write_eks_yaml_file_from_template( eks_utils.get_single_node_inference_template_path( "pytorch", processor), yaml_path, search_replace_dict) try: run("kubectl apply -f {}".format(yaml_path)) port_to_forward = random.randint(49152, 65535) if eks_utils.is_service_running(selector_name): eks_utils.eks_forward_port_between_host_and_container( selector_name, port_to_forward, "8080") assert test_utils.request_pytorch_inference_densenet( port=port_to_forward, server_type=server_type) except ValueError as excp: eks_utils.LOGGER.error("Service is not running: %s", excp) finally: run(f"kubectl delete deployment {selector_name}") run(f"kubectl delete service {selector_name}")
def test_eks_pytorch_neuron_inference(pytorch_inference, neuron_only): server_type = test_utils.get_inference_server_type(pytorch_inference) if "neuron" not in pytorch_inference: pytest.skip("Skipping EKS Neuron Test for EIA and Non Neuron Images") model = "pytorch-resnet-neuron=https://aws-dlc-sample-models.s3.amazonaws.com/pytorch/Resnet50-neuron.mar" server_cmd = "/usr/local/bin/entrypoint.sh -m pytorch-resnet-neuron=https://aws-dlc-sample-models.s3.amazonaws.com/pytorch/Resnet50-neuron.mar -t /home/model-server/config.properties" num_replicas = "1" rand_int = random.randint(4001, 6000) processor = "neuron" yaml_path = os.path.join( os.sep, "tmp", f"pytorch_single_node_{processor}_inference_{rand_int}.yaml") inference_service_name = selector_name = f"resnet-{processor}-{rand_int}" search_replace_dict = { "<NUM_REPLICAS>": num_replicas, "<SELECTOR_NAME>": selector_name, "<INFERENCE_SERVICE_NAME>": inference_service_name, "<DOCKER_IMAGE_BUILD_ID>": pytorch_inference, "<SERVER_TYPE>": server_type, "<SERVER_CMD>": server_cmd } search_replace_dict["<NUM_INF1S>"] = "1" eks_utils.write_eks_yaml_file_from_template( eks_utils.get_single_node_inference_template_path( "pytorch", processor), yaml_path, search_replace_dict) device_plugin_path = eks_utils.get_device_plugin_path("pytorch", processor) try: # TODO - once eksctl gets the latest neuron device plugin this can be removed run("kubectl delete -f {}".format(device_plugin_path)) sleep(60) run("kubectl apply -f {}".format(device_plugin_path)) sleep(10) run("kubectl apply -f {}".format(yaml_path)) port_to_forward = random.randint(49152, 65535) if eks_utils.is_service_running(selector_name): eks_utils.eks_forward_port_between_host_and_container( selector_name, port_to_forward, "8080") assert test_utils.request_pytorch_inference_densenet( port=port_to_forward) except ValueError as excp: run("kubectl cluster-info dump") eks_utils.LOGGER.error("Service is not running: %s", excp) finally: run(f"kubectl delete deployment {selector_name}") run(f"kubectl delete service {selector_name}")
def test_eks_pytorch_neuron_inference(pytorch_inference, neuron_only): server_type = test_utils.get_inference_server_type(pytorch_inference) if "neuron" not in pytorch_inference: pytest.skip("Skipping EKS Neuron Test for EIA and Non Neuron Images") else: model = "pytorch-resnet-neuron=https://aws-dlc-sample-models.s3.amazonaws.com/pytorch/Resnet50-neuron.mar" server_cmd = "torchserve" num_replicas = "1" rand_int = random.randint(4001, 6000) processor = "neuron" yaml_path = os.path.join( os.sep, "tmp", f"pytorch_single_node_{processor}_inference_{rand_int}.yaml") inference_service_name = selector_name = f"resnet-{processor}-{rand_int}" search_replace_dict = { "<MODELS>": model, "<NUM_REPLICAS>": num_replicas, "<SELECTOR_NAME>": selector_name, "<INFERENCE_SERVICE_NAME>": inference_service_name, "<DOCKER_IMAGE_BUILD_ID>": pytorch_inference, "<SERVER_TYPE>": server_type, "<SERVER_CMD>": server_cmd, } search_replace_dict["<NUM_INF1S>"] = "1" eks_utils.write_eks_yaml_file_from_template( eks_utils.get_single_node_inference_template_path( "pytorch", processor), yaml_path, search_replace_dict) try: run("kubectl apply -f {}".format(yaml_path)) port_to_forward = random.randint(49152, 65535) if eks_utils.is_service_running(selector_name): eks_utils.eks_forward_port_between_host_and_container( selector_name, port_to_forward, "8080") assert test_utils.request_pytorch_inference_densenet( port=port_to_forward, server_type=server_type, model_name="pytorch-resnet-neuron") finally: run(f"kubectl delete deployment {selector_name}") run(f"kubectl delete service {selector_name}")
def ec2_pytorch_inference(image_uri, processor, ec2_connection, region): repo_name, image_tag = image_uri.split("/")[-1].split(":") container_name = f"{repo_name}-{image_tag}-ec2" model_name = "pytorch-densenet" if processor == "eia": image_framework, image_framework_version = get_framework_and_version_from_tag( image_uri) if image_framework_version == "1.3.1": model_name = "pytorch-densenet-v1-3-1" if processor == "neuron": model_name = "pytorch-resnet-neuron" inference_cmd = test_utils.get_inference_run_command( image_uri, model_name, processor) docker_cmd = "nvidia-docker" if "gpu" in image_uri else "docker" if processor == "neuron": ec2_connection.run("sudo systemctl stop neuron-rtd" ) # Stop neuron-rtd in host env for DLC to start it docker_run_cmd = (f"{docker_cmd} run -itd --name {container_name}" f" -p 80:8080 -p 8081:8081" f" --device=/dev/neuron0 --cap-add IPC_LOCK" f" --env NEURON_MONITOR_CW_REGION={region}" f" {image_uri} {inference_cmd}") else: docker_run_cmd = (f"{docker_cmd} run -itd --name {container_name}" f" -p 80:8080 -p 8081:8081" f" {image_uri} {inference_cmd}") try: ec2_connection.run( f"$(aws ecr get-login --no-include-email --region {region})", hide=True) LOGGER.info(docker_run_cmd) ec2_connection.run(docker_run_cmd, hide=True) server_type = get_inference_server_type(image_uri) inference_result = test_utils.request_pytorch_inference_densenet( connection=ec2_connection, model_name=model_name, server_type=server_type) assert ( inference_result ), f"Failed to perform pytorch inference test for image: {image_uri} on ec2" finally: ec2_connection.run(f"docker rm -f {container_name}", warn=True, hide=True)