def test_ecs_tensorflow_inference_gpu(tensorflow_inference, ecs_container_instance, region, gpu_only): worker_instance_id, ecs_cluster_arn = ecs_container_instance public_ip_address = ec2_utils.get_public_ip(worker_instance_id, region=region) num_gpus = ec2_utils.get_instance_num_gpus(worker_instance_id) model_name = "saved_model_half_plus_two" service_name = task_family = revision = None try: service_name, task_family, revision = ecs_utils.setup_ecs_inference_service( tensorflow_inference, "tensorflow", ecs_cluster_arn, model_name, worker_instance_id, num_gpus=num_gpus, region=region) model_name = get_tensorflow_model_name("gpu", model_name) inference_result = request_tensorflow_inference( model_name, ip_address=public_ip_address) assert inference_result, f"Failed to perform inference at IP address: {public_ip_address}" finally: ecs_utils.tear_down_ecs_inference_service(ecs_cluster_arn, service_name, task_family, revision)
def __ecs_tensorflow_inference_cpu_nlp(tensorflow_inference, ecs_container_instance, region): worker_instance_id, ecs_cluster_arn = ecs_container_instance public_ip_address = ec2_utils.get_public_ip(worker_instance_id, region=region) model_name = "albert" service_name = task_family = revision = None try: service_name, task_family, revision = ecs_utils.setup_ecs_inference_service( tensorflow_inference, "tensorflow", ecs_cluster_arn, model_name, worker_instance_id, region=region ) model_name = get_tensorflow_model_name("cpu", model_name) inference_result = request_tensorflow_inference_nlp(model_name, ip_address=public_ip_address) assert inference_result, f"Failed to perform inference at IP address: {public_ip_address}" finally: ecs_utils.tear_down_ecs_inference_service(ecs_cluster_arn, service_name, task_family, revision)
def get_ecs_tensorflow_environment_variables(processor, model_name): """ Get method for environment variables for tensorflow inference via S3 on ECS Requirement: Model should be hosted in S3 location defined in TENSORFLOW_MODELS_PATH :param processor: :param model_name: :return: <list> JSON """ model_name = get_tensorflow_model_name(processor, model_name) ecs_tensorflow_inference_environment = [ { "name": "MODEL_NAME", "value": model_name }, { "name": "MODEL_BASE_PATH", "value": TENSORFLOW_MODELS_BUCKET }, ] return ecs_tensorflow_inference_environment
def test_ecs_tensorflow_inference_neuron(tensorflow_inference_neuron, ecs_container_instance, region): worker_instance_id, ecs_cluster_arn = ecs_container_instance public_ip_address = ec2_utils.get_public_ip(worker_instance_id, region=region) num_neurons = ec2_utils.get_instance_num_inferentias(worker_instance_id) model_name = "simple" service_name = task_family = revision = None try: service_name, task_family, revision = ecs_utils.setup_ecs_inference_service( tensorflow_inference_neuron, "tensorflow", ecs_cluster_arn, model_name, worker_instance_id, num_neurons=num_neurons, region=region, ) model_name = get_tensorflow_model_name("neuron", model_name) inference_result = request_tensorflow_inference(model_name, ip_address=public_ip_address, inference_string="'{\"instances\": [[1.0, 2.0, 5.0]]}'") assert inference_result, f"Failed to perform inference at IP address: {public_ip_address}" finally: ecs_utils.tear_down_ecs_inference_service(ecs_cluster_arn, service_name, task_family, revision)
def setup_ecs_inference_service( docker_image_uri, framework, cluster_arn, model_name, worker_instance_id, ei_accelerator_type=None, num_gpus=None, num_neurons=None, region=DEFAULT_REGION, ): """ Function to setup Inference service on ECS :param docker_image_uri: :param framework: :param cluster_arn: :param model_name: :param worker_instance_id: :param num_gpus: :param num_neurons: :param region: :return: <tuple> service_name, task_family, revision if all steps passed else Exception Cleans up the resources if any step fails """ datetime_suffix = datetime.datetime.now().strftime("%Y%m%d-%H-%M-%S") processor = "gpu" if "gpu" in docker_image_uri else "eia" if "eia" in docker_image_uri else "neuron" if "neuron" in docker_image_uri else "cpu" port_mappings = get_ecs_port_mappings(framework) log_group_name = f"/ecs/{framework}-inference-{processor}" num_cpus = ec2_utils.get_instance_num_cpus(worker_instance_id, region=region) # We assume that about 80% of RAM is free on the instance, since we are not directly querying it to find out # what the memory utilization is. memory = int( ec2_utils.get_instance_memory(worker_instance_id, region=region) * 0.8) cluster_name = get_ecs_cluster_name(cluster_arn, region=region) # Below values here are just for sanity arguments_dict = { "family_name": cluster_name, "image": docker_image_uri, "log_group_name": log_group_name, "log_stream_prefix": datetime_suffix, "port_mappings": port_mappings, "num_cpu": num_cpus, "memory": memory, "region": region, } if processor == "gpu" and num_gpus: arguments_dict["num_gpu"] = num_gpus if framework == "tensorflow": model_name = get_tensorflow_model_name(processor, model_name) model_base_path = get_tensorflow_model_base_path(docker_image_uri) _, image_framework_version = get_framework_and_version_from_tag( docker_image_uri) if Version(image_framework_version) in SpecifierSet(">=2.7"): arguments_dict["container_command"] = [ build_tensorflow_inference_command_tf27_and_above(model_name) ] arguments_dict["entrypoint"] = ["sh", "-c"] arguments_dict[ "environment"] = get_tensorflow_inference_environment_variables( model_name, model_base_path) print(f"Added environment variables: {arguments_dict['environment']}") elif framework in ["mxnet", "pytorch"]: arguments_dict["container_command"] = [ get_inference_run_command(docker_image_uri, model_name, processor) ] if processor == "eia": arguments_dict["health_check"] = { "retries": 2, "command": [ "CMD-SHELL", "LD_LIBRARY_PATH=/opt/ei_health_check/lib /opt/ei_health_check/bin/health_check" ], "timeout": 5, "interval": 30, "startPeriod": 60, } arguments_dict["inference_accelerators"] = { "deviceName": "device_1", "deviceType": ei_accelerator_type } if processor == "neuron" and num_neurons: arguments_dict["num_neurons"] = num_neurons try: task_family, revision = register_ecs_task_definition(**arguments_dict) print(f"Created Task definition - {task_family}:{revision}") service_name = create_ecs_service(cluster_name, f"service-{cluster_name}", f"{task_family}:{revision}", region=region) print( f"Created ECS service - {service_name} with cloudwatch log group - {log_group_name} " f"log stream prefix - {datetime_suffix}/{cluster_name}") if check_running_task_for_ecs_service(cluster_name, service_name, region=region): print("Service status verified as running. Running inference ...") else: raise Exception(f"No task running in the service: {service_name}") return service_name, task_family, revision except Exception as e: raise ECSServiceCreationException( f"Setup Inference Service Exception - {e}")