def ec2_pytorch_inference(image_uri, processor, ec2_connection, region): repo_name, image_tag = image_uri.split("/")[-1].split(":") container_name = f"{repo_name}-{image_tag}-ec2" model_name = "pytorch-densenet" inference_cmd = test_utils.get_inference_run_command( image_uri, model_name, processor) docker_cmd = "nvidia-docker" if "gpu" in image_uri else "docker" docker_run_cmd = (f"{docker_cmd} run -itd --name {container_name}" f" -p 80:8080 -p 8081:8081" f" {image_uri} {inference_cmd}") try: ec2_connection.run( f"$(aws ecr get-login --no-include-email --region {region})", hide=True) LOGGER.info(docker_run_cmd) ec2_connection.run(docker_run_cmd, hide=True) inference_result = test_utils.request_pytorch_inference_densenet( connection=ec2_connection) assert ( inference_result ), f"Failed to perform pytorch inference test for image: {image_uri} on ec2" finally: ec2_connection.run(f"docker rm -f {container_name}", warn=True, hide=True)
def run_ec2_mxnet_inference(image_uri, model_name, container_tag, ec2_connection, processor, region, target_port, target_management_port): repo_name, image_tag = image_uri.split("/")[-1].split(":") container_name = f"{repo_name}-{image_tag}-ec2-{container_tag}" docker_cmd = "nvidia-docker" if "gpu" in image_uri else "docker" mms_inference_cmd = test_utils.get_inference_run_command(image_uri, model_name, processor) docker_run_cmd = ( f"{docker_cmd} run -itd --name {container_name}" f" -p {target_port}:8080 -p {target_management_port}:8081" f" {image_uri} {mms_inference_cmd}" ) try: ec2_connection.run( f"$(aws ecr get-login --no-include-email --region {region})", hide=True ) LOGGER.info(docker_run_cmd) ec2_connection.run(docker_run_cmd, hide=True) if model_name == SQUEEZENET_MODEL: inference_result = test_utils.request_mxnet_inference( port=target_port, connection=ec2_connection, model="squeezenet" ) elif model_name == BERT_MODEL: inference_result = test_utils.request_mxnet_inference_gluonnlp( port=target_port, connection=ec2_connection ) elif model_name == RESNET_EIA_MODEL: inference_result = test_utils.request_mxnet_inference( port=target_port, connection=ec2_connection, model=model_name ) assert ( inference_result ), f"Failed to perform mxnet {model_name} inference test for image: {image_uri} on ec2" finally: ec2_connection.run(f"docker rm -f {container_name}", warn=True, hide=True)
def ec2_pytorch_inference(image_uri, processor, ec2_connection, region): repo_name, image_tag = image_uri.split("/")[-1].split(":") container_name = f"{repo_name}-{image_tag}-ec2" model_name = "pytorch-densenet" if processor == "eia": image_framework, image_framework_version = get_framework_and_version_from_tag( image_uri) if image_framework_version == "1.3.1": model_name = "pytorch-densenet-v1-3-1" if processor == "neuron": model_name = "pytorch-resnet-neuron" inference_cmd = test_utils.get_inference_run_command( image_uri, model_name, processor) docker_cmd = "nvidia-docker" if "gpu" in image_uri else "docker" if processor == "neuron": ec2_connection.run("sudo systemctl stop neuron-rtd" ) # Stop neuron-rtd in host env for DLC to start it docker_run_cmd = (f"{docker_cmd} run -itd --name {container_name}" f" -p 80:8080 -p 8081:8081" f" --device=/dev/neuron0 --cap-add IPC_LOCK" f" --env NEURON_MONITOR_CW_REGION={region}" f" {image_uri} {inference_cmd}") else: docker_run_cmd = (f"{docker_cmd} run -itd --name {container_name}" f" -p 80:8080 -p 8081:8081" f" {image_uri} {inference_cmd}") try: ec2_connection.run( f"$(aws ecr get-login --no-include-email --region {region})", hide=True) LOGGER.info(docker_run_cmd) ec2_connection.run(docker_run_cmd, hide=True) server_type = get_inference_server_type(image_uri) inference_result = test_utils.request_pytorch_inference_densenet( connection=ec2_connection, model_name=model_name, server_type=server_type) assert ( inference_result ), f"Failed to perform pytorch inference test for image: {image_uri} on ec2" finally: ec2_connection.run(f"docker rm -f {container_name}", warn=True, hide=True)
def setup_ecs_inference_service( docker_image_uri, framework, cluster_arn, model_name, worker_instance_id, ei_accelerator_type=None, num_gpus=None, region=DEFAULT_REGION, ): """ Function to setup Inference service on ECS :param docker_image_uri: :param framework: :param cluster_arn: :param model_name: :param worker_instance_id: :param num_gpus: :param region: :return: <tuple> service_name, task_family, revision if all steps passed else Exception Cleans up the resources if any step fails """ datetime_suffix = datetime.datetime.now().strftime("%Y%m%d-%H-%M-%S") processor = "gpu" if "gpu" in docker_image_uri else "eia" if "eia" in docker_image_uri else "cpu" port_mappings = get_ecs_port_mappings(framework) log_group_name = f"/ecs/{framework}-inference-{processor}" num_cpus = ec2_utils.get_instance_num_cpus(worker_instance_id, region=region) # We assume that about 80% of RAM is free on the instance, since we are not directly querying it to find out # what the memory utilization is. memory = int( ec2_utils.get_instance_memory(worker_instance_id, region=region) * 0.8) cluster_name = get_ecs_cluster_name(cluster_arn, region=region) # Below values here are just for sanity arguments_dict = { "family_name": cluster_name, "image": docker_image_uri, "log_group_name": log_group_name, "log_stream_prefix": datetime_suffix, "port_mappings": port_mappings, "num_cpu": num_cpus, "memory": memory, "region": region } if processor == "gpu" and num_gpus: arguments_dict["num_gpu"] = num_gpus if framework == "tensorflow": arguments_dict[ "environment"] = get_ecs_tensorflow_environment_variables( processor, model_name) print(f"Added environment variables: {arguments_dict['environment']}") elif framework in ["mxnet", "pytorch"]: arguments_dict["container_command"] = [ get_inference_run_command(docker_image_uri, model_name, processor) ] if processor == "eia": arguments_dict["health_check"] = { "retries": 2, "command": [ "CMD-SHELL", "LD_LIBRARY_PATH=/opt/ei_health_check/lib /opt/ei_health_check/bin/health_check" ], "timeout": 5, "interval": 30, "startPeriod": 60 } arguments_dict["inference_accelerators"] = { "deviceName": "device_1", "deviceType": ei_accelerator_type } try: task_family, revision = register_ecs_task_definition(**arguments_dict) print(f"Created Task definition - {task_family}:{revision}") service_name = create_ecs_service(cluster_name, f"service-{cluster_name}", f"{task_family}:{revision}", region=region) print( f"Created ECS service - {service_name} with cloudwatch log group - {log_group_name} " f"log stream prefix - {datetime_suffix}/{cluster_name}") if check_running_task_for_ecs_service(cluster_name, service_name, region=region): print("Service status verified as running. Running inference ...") else: raise Exception(f"No task running in the service: {service_name}") return service_name, task_family, revision except Exception as e: raise ECSServiceCreationException( f"Setup Inference Service Exception - {e}")