コード例 #1
0
def test_ecs_tensorflow_inference_gpu(tensorflow_inference,
                                      ecs_container_instance, region,
                                      gpu_only):
    worker_instance_id, ecs_cluster_arn = ecs_container_instance
    public_ip_address = ec2_utils.get_public_ip(worker_instance_id,
                                                region=region)
    num_gpus = ec2_utils.get_instance_num_gpus(worker_instance_id)

    model_name = "saved_model_half_plus_two"
    service_name = task_family = revision = None
    try:
        service_name, task_family, revision = ecs_utils.setup_ecs_inference_service(
            tensorflow_inference,
            "tensorflow",
            ecs_cluster_arn,
            model_name,
            worker_instance_id,
            num_gpus=num_gpus,
            region=region)
        model_name = get_tensorflow_model_name("gpu", model_name)
        inference_result = request_tensorflow_inference(
            model_name, ip_address=public_ip_address)
        assert inference_result, f"Failed to perform inference at IP address: {public_ip_address}"

    finally:
        ecs_utils.tear_down_ecs_inference_service(ecs_cluster_arn,
                                                  service_name, task_family,
                                                  revision)
コード例 #2
0
def __ecs_tensorflow_inference_cpu_nlp(tensorflow_inference, ecs_container_instance, region):
    worker_instance_id, ecs_cluster_arn = ecs_container_instance
    public_ip_address = ec2_utils.get_public_ip(worker_instance_id, region=region)

    model_name = "albert"
    service_name = task_family = revision = None
    try:
        service_name, task_family, revision = ecs_utils.setup_ecs_inference_service(
            tensorflow_inference, "tensorflow", ecs_cluster_arn, model_name, worker_instance_id, region=region
        )
        model_name = get_tensorflow_model_name("cpu", model_name)
        inference_result = request_tensorflow_inference_nlp(model_name, ip_address=public_ip_address)
        assert inference_result, f"Failed to perform inference at IP address: {public_ip_address}"

    finally:
        ecs_utils.tear_down_ecs_inference_service(ecs_cluster_arn, service_name, task_family, revision)
コード例 #3
0
def get_ecs_tensorflow_environment_variables(processor, model_name):
    """
    Get method for environment variables for tensorflow inference via S3 on ECS
    Requirement: Model should be hosted in S3 location defined in TENSORFLOW_MODELS_PATH
    :param processor:
    :param model_name:
    :return: <list> JSON
    """
    model_name = get_tensorflow_model_name(processor, model_name)
    ecs_tensorflow_inference_environment = [
        {
            "name": "MODEL_NAME",
            "value": model_name
        },
        {
            "name": "MODEL_BASE_PATH",
            "value": TENSORFLOW_MODELS_BUCKET
        },
    ]

    return ecs_tensorflow_inference_environment
コード例 #4
0
def test_ecs_tensorflow_inference_neuron(tensorflow_inference_neuron, ecs_container_instance, region):
    worker_instance_id, ecs_cluster_arn = ecs_container_instance
    public_ip_address = ec2_utils.get_public_ip(worker_instance_id, region=region)
    num_neurons = ec2_utils.get_instance_num_inferentias(worker_instance_id)

    model_name = "simple"
    service_name = task_family = revision = None
    try:
        service_name, task_family, revision = ecs_utils.setup_ecs_inference_service(
            tensorflow_inference_neuron,
            "tensorflow",
            ecs_cluster_arn,
            model_name,
            worker_instance_id,
            num_neurons=num_neurons,
            region=region,
        )
        model_name = get_tensorflow_model_name("neuron", model_name)
        inference_result = request_tensorflow_inference(model_name, ip_address=public_ip_address, inference_string="'{\"instances\": [[1.0, 2.0, 5.0]]}'")
        assert inference_result, f"Failed to perform inference at IP address: {public_ip_address}"

    finally:
        ecs_utils.tear_down_ecs_inference_service(ecs_cluster_arn, service_name, task_family, revision)
コード例 #5
0
def setup_ecs_inference_service(
    docker_image_uri,
    framework,
    cluster_arn,
    model_name,
    worker_instance_id,
    ei_accelerator_type=None,
    num_gpus=None,
    num_neurons=None,
    region=DEFAULT_REGION,
):
    """
    Function to setup Inference service on ECS
    :param docker_image_uri:
    :param framework:
    :param cluster_arn:
    :param model_name:
    :param worker_instance_id:
    :param num_gpus:
    :param num_neurons:
    :param region:
    :return: <tuple> service_name, task_family, revision if all steps passed else Exception
        Cleans up the resources if any step fails
    """
    datetime_suffix = datetime.datetime.now().strftime("%Y%m%d-%H-%M-%S")
    processor = "gpu" if "gpu" in docker_image_uri else "eia" if "eia" in docker_image_uri else "neuron" if "neuron" in docker_image_uri else "cpu"
    port_mappings = get_ecs_port_mappings(framework)
    log_group_name = f"/ecs/{framework}-inference-{processor}"
    num_cpus = ec2_utils.get_instance_num_cpus(worker_instance_id,
                                               region=region)
    # We assume that about 80% of RAM is free on the instance, since we are not directly querying it to find out
    # what the memory utilization is.
    memory = int(
        ec2_utils.get_instance_memory(worker_instance_id, region=region) * 0.8)
    cluster_name = get_ecs_cluster_name(cluster_arn, region=region)
    # Below values here are just for sanity
    arguments_dict = {
        "family_name": cluster_name,
        "image": docker_image_uri,
        "log_group_name": log_group_name,
        "log_stream_prefix": datetime_suffix,
        "port_mappings": port_mappings,
        "num_cpu": num_cpus,
        "memory": memory,
        "region": region,
    }

    if processor == "gpu" and num_gpus:
        arguments_dict["num_gpu"] = num_gpus
    if framework == "tensorflow":
        model_name = get_tensorflow_model_name(processor, model_name)
        model_base_path = get_tensorflow_model_base_path(docker_image_uri)
        _, image_framework_version = get_framework_and_version_from_tag(
            docker_image_uri)
        if Version(image_framework_version) in SpecifierSet(">=2.7"):
            arguments_dict["container_command"] = [
                build_tensorflow_inference_command_tf27_and_above(model_name)
            ]
            arguments_dict["entrypoint"] = ["sh", "-c"]

        arguments_dict[
            "environment"] = get_tensorflow_inference_environment_variables(
                model_name, model_base_path)
        print(f"Added environment variables: {arguments_dict['environment']}")
    elif framework in ["mxnet", "pytorch"]:
        arguments_dict["container_command"] = [
            get_inference_run_command(docker_image_uri, model_name, processor)
        ]
    if processor == "eia":
        arguments_dict["health_check"] = {
            "retries":
            2,
            "command": [
                "CMD-SHELL",
                "LD_LIBRARY_PATH=/opt/ei_health_check/lib /opt/ei_health_check/bin/health_check"
            ],
            "timeout":
            5,
            "interval":
            30,
            "startPeriod":
            60,
        }
        arguments_dict["inference_accelerators"] = {
            "deviceName": "device_1",
            "deviceType": ei_accelerator_type
        }

    if processor == "neuron" and num_neurons:
        arguments_dict["num_neurons"] = num_neurons

    try:
        task_family, revision = register_ecs_task_definition(**arguments_dict)
        print(f"Created Task definition - {task_family}:{revision}")

        service_name = create_ecs_service(cluster_name,
                                          f"service-{cluster_name}",
                                          f"{task_family}:{revision}",
                                          region=region)
        print(
            f"Created ECS service - {service_name} with cloudwatch log group - {log_group_name} "
            f"log stream prefix - {datetime_suffix}/{cluster_name}")
        if check_running_task_for_ecs_service(cluster_name,
                                              service_name,
                                              region=region):
            print("Service status verified as running. Running inference ...")
        else:
            raise Exception(f"No task running in the service: {service_name}")
        return service_name, task_family, revision
    except Exception as e:
        raise ECSServiceCreationException(
            f"Setup Inference Service Exception - {e}")