def _run_instance_role_disabled(image_uri, ec2_client, ec2_instance, ec2_connection): expected_tag_key = "aws-dlc-autogenerated-tag-do-not-delete" ec2_instance_id, _ = ec2_instance account_id = test_utils.get_account_id_from_image_uri(image_uri) image_region = test_utils.get_region_from_image_uri(image_uri) repo_name, image_tag = test_utils.get_repository_and_tag_from_image_uri( image_uri) framework, _ = test_utils.get_framework_and_version_from_tag(image_uri) job_type = test_utils.get_job_type_from_image(image_uri) processor = test_utils.get_processor_from_image_uri(image_uri) container_name = f"{repo_name}-telemetry_bad_instance_role-ec2" docker_cmd = "nvidia-docker" if processor == "gpu" else "docker" test_utils.login_to_ecr_registry(ec2_connection, account_id, image_region) ec2_connection.run(f"{docker_cmd} pull -q {image_uri}") preexisting_ec2_instance_tags = ec2_utils.get_ec2_instance_tags( ec2_instance_id, ec2_client=ec2_client) if expected_tag_key in preexisting_ec2_instance_tags: ec2_client.remove_tags(Resources=[ec2_instance_id], Tags=[{ "Key": expected_tag_key }]) # Disable access to EC2 instance metadata ec2_connection.run(f"sudo route add -host 169.254.169.254 reject") if "tensorflow" in framework and job_type == "inference": model_name = "saved_model_half_plus_two" model_base_path = test_utils.get_tensorflow_model_base_path(image_uri) env_vars_list = test_utils.get_tensorflow_inference_environment_variables( model_name, model_base_path) env_vars = " ".join([ f"-e {entry['name']}={entry['value']}" for entry in env_vars_list ]) inference_command = get_tensorflow_inference_command_tf27_above( image_uri, model_name) ec2_connection.run( f"{docker_cmd} run {env_vars} --name {container_name} -id {image_uri} {inference_command}" ) time.sleep(5) else: framework_to_import = framework.replace("huggingface_", "") framework_to_import = "torch" if framework_to_import == "pytorch" else framework_to_import ec2_connection.run( f"{docker_cmd} run --name {container_name} -id {image_uri} bash") output = ec2_connection.run( f"{docker_cmd} exec -i {container_name} python -c 'import {framework_to_import}; import time; time.sleep(5)'", warn=True) assert output.ok, f"'import {framework_to_import}' fails when credentials not configured" ec2_instance_tags = ec2_utils.get_ec2_instance_tags(ec2_instance_id, ec2_client=ec2_client) assert expected_tag_key not in ec2_instance_tags, ( f"{expected_tag_key} was applied as an instance tag." "EC2 create_tags went through even though it should not have")
def setup_ecs_inference_service( docker_image_uri, framework, cluster_arn, model_name, worker_instance_id, ei_accelerator_type=None, num_gpus=None, num_neurons=None, region=DEFAULT_REGION, ): """ Function to setup Inference service on ECS :param docker_image_uri: :param framework: :param cluster_arn: :param model_name: :param worker_instance_id: :param num_gpus: :param num_neurons: :param region: :return: <tuple> service_name, task_family, revision if all steps passed else Exception Cleans up the resources if any step fails """ datetime_suffix = datetime.datetime.now().strftime("%Y%m%d-%H-%M-%S") processor = "gpu" if "gpu" in docker_image_uri else "eia" if "eia" in docker_image_uri else "neuron" if "neuron" in docker_image_uri else "cpu" port_mappings = get_ecs_port_mappings(framework) log_group_name = f"/ecs/{framework}-inference-{processor}" num_cpus = ec2_utils.get_instance_num_cpus(worker_instance_id, region=region) # We assume that about 80% of RAM is free on the instance, since we are not directly querying it to find out # what the memory utilization is. memory = int( ec2_utils.get_instance_memory(worker_instance_id, region=region) * 0.8) cluster_name = get_ecs_cluster_name(cluster_arn, region=region) # Below values here are just for sanity arguments_dict = { "family_name": cluster_name, "image": docker_image_uri, "log_group_name": log_group_name, "log_stream_prefix": datetime_suffix, "port_mappings": port_mappings, "num_cpu": num_cpus, "memory": memory, "region": region, } if processor == "gpu" and num_gpus: arguments_dict["num_gpu"] = num_gpus if framework == "tensorflow": model_name = get_tensorflow_model_name(processor, model_name) model_base_path = get_tensorflow_model_base_path(docker_image_uri) _, image_framework_version = get_framework_and_version_from_tag( docker_image_uri) if Version(image_framework_version) in SpecifierSet(">=2.7"): arguments_dict["container_command"] = [ build_tensorflow_inference_command_tf27_and_above(model_name) ] arguments_dict["entrypoint"] = ["sh", "-c"] arguments_dict[ "environment"] = get_tensorflow_inference_environment_variables( model_name, model_base_path) print(f"Added environment variables: {arguments_dict['environment']}") elif framework in ["mxnet", "pytorch"]: arguments_dict["container_command"] = [ get_inference_run_command(docker_image_uri, model_name, processor) ] if processor == "eia": arguments_dict["health_check"] = { "retries": 2, "command": [ "CMD-SHELL", "LD_LIBRARY_PATH=/opt/ei_health_check/lib /opt/ei_health_check/bin/health_check" ], "timeout": 5, "interval": 30, "startPeriod": 60, } arguments_dict["inference_accelerators"] = { "deviceName": "device_1", "deviceType": ei_accelerator_type } if processor == "neuron" and num_neurons: arguments_dict["num_neurons"] = num_neurons try: task_family, revision = register_ecs_task_definition(**arguments_dict) print(f"Created Task definition - {task_family}:{revision}") service_name = create_ecs_service(cluster_name, f"service-{cluster_name}", f"{task_family}:{revision}", region=region) print( f"Created ECS service - {service_name} with cloudwatch log group - {log_group_name} " f"log stream prefix - {datetime_suffix}/{cluster_name}") if check_running_task_for_ecs_service(cluster_name, service_name, region=region): print("Service status verified as running. Running inference ...") else: raise Exception(f"No task running in the service: {service_name}") return service_name, task_family, revision except Exception as e: raise ECSServiceCreationException( f"Setup Inference Service Exception - {e}")