Esempio n. 1
0
def ec2_pytorch_inference(image_uri, processor, ec2_connection, region):
    repo_name, image_tag = image_uri.split("/")[-1].split(":")
    container_name = f"{repo_name}-{image_tag}-ec2"
    model_name = "pytorch-densenet"
    inference_cmd = test_utils.get_inference_run_command(
        image_uri, model_name, processor)
    docker_cmd = "nvidia-docker" if "gpu" in image_uri else "docker"

    docker_run_cmd = (f"{docker_cmd} run -itd --name {container_name}"
                      f" -p 80:8080 -p 8081:8081"
                      f" {image_uri} {inference_cmd}")
    try:
        ec2_connection.run(
            f"$(aws ecr get-login --no-include-email --region {region})",
            hide=True)
        LOGGER.info(docker_run_cmd)
        ec2_connection.run(docker_run_cmd, hide=True)
        inference_result = test_utils.request_pytorch_inference_densenet(
            connection=ec2_connection)
        assert (
            inference_result
        ), f"Failed to perform pytorch inference test for image: {image_uri} on ec2"

    finally:
        ec2_connection.run(f"docker rm -f {container_name}",
                           warn=True,
                           hide=True)
Esempio n. 2
0
def run_ec2_mxnet_inference(image_uri, model_name, container_tag, ec2_connection, processor, region, target_port, target_management_port):
    repo_name, image_tag = image_uri.split("/")[-1].split(":")
    container_name = f"{repo_name}-{image_tag}-ec2-{container_tag}"
    docker_cmd = "nvidia-docker" if "gpu" in image_uri else "docker"
    mms_inference_cmd = test_utils.get_inference_run_command(image_uri, model_name, processor)
    docker_run_cmd = (
        f"{docker_cmd} run -itd --name {container_name}"
        f" -p {target_port}:8080 -p {target_management_port}:8081"
        f" {image_uri} {mms_inference_cmd}"
    )
    try:
        ec2_connection.run(
            f"$(aws ecr get-login --no-include-email --region {region})", hide=True
        )
        LOGGER.info(docker_run_cmd)
        ec2_connection.run(docker_run_cmd, hide=True)
        if model_name == SQUEEZENET_MODEL:
            inference_result = test_utils.request_mxnet_inference(
                port=target_port, connection=ec2_connection, model="squeezenet"
            )
        elif model_name == BERT_MODEL:
            inference_result = test_utils.request_mxnet_inference_gluonnlp(
                port=target_port, connection=ec2_connection
            )
        elif model_name == RESNET_EIA_MODEL:
            inference_result = test_utils.request_mxnet_inference(
                port=target_port, connection=ec2_connection, model=model_name
            )
        assert (
            inference_result
        ), f"Failed to perform mxnet {model_name} inference test for image: {image_uri} on ec2"

    finally:
        ec2_connection.run(f"docker rm -f {container_name}", warn=True, hide=True)
def ec2_pytorch_inference(image_uri, processor, ec2_connection, region):
    repo_name, image_tag = image_uri.split("/")[-1].split(":")
    container_name = f"{repo_name}-{image_tag}-ec2"
    model_name = "pytorch-densenet"
    if processor == "eia":
        image_framework, image_framework_version = get_framework_and_version_from_tag(
            image_uri)
        if image_framework_version == "1.3.1":
            model_name = "pytorch-densenet-v1-3-1"
    if processor == "neuron":
        model_name = "pytorch-resnet-neuron"

    inference_cmd = test_utils.get_inference_run_command(
        image_uri, model_name, processor)
    docker_cmd = "nvidia-docker" if "gpu" in image_uri else "docker"

    if processor == "neuron":
        ec2_connection.run("sudo systemctl stop neuron-rtd"
                           )  # Stop neuron-rtd in host env for DLC to start it
        docker_run_cmd = (f"{docker_cmd} run -itd --name {container_name}"
                          f" -p 80:8080 -p 8081:8081"
                          f" --device=/dev/neuron0 --cap-add IPC_LOCK"
                          f" --env NEURON_MONITOR_CW_REGION={region}"
                          f" {image_uri} {inference_cmd}")
    else:
        docker_run_cmd = (f"{docker_cmd} run -itd --name {container_name}"
                          f" -p 80:8080 -p 8081:8081"
                          f" {image_uri} {inference_cmd}")
    try:
        ec2_connection.run(
            f"$(aws ecr get-login --no-include-email --region {region})",
            hide=True)
        LOGGER.info(docker_run_cmd)
        ec2_connection.run(docker_run_cmd, hide=True)
        server_type = get_inference_server_type(image_uri)
        inference_result = test_utils.request_pytorch_inference_densenet(
            connection=ec2_connection,
            model_name=model_name,
            server_type=server_type)
        assert (
            inference_result
        ), f"Failed to perform pytorch inference test for image: {image_uri} on ec2"

    finally:
        ec2_connection.run(f"docker rm -f {container_name}",
                           warn=True,
                           hide=True)
Esempio n. 4
0
def setup_ecs_inference_service(
    docker_image_uri,
    framework,
    cluster_arn,
    model_name,
    worker_instance_id,
    ei_accelerator_type=None,
    num_gpus=None,
    region=DEFAULT_REGION,
):
    """
    Function to setup Inference service on ECS
    :param docker_image_uri:
    :param framework:
    :param cluster_arn:
    :param model_name:
    :param worker_instance_id:
    :param num_gpus:
    :param region:
    :return: <tuple> service_name, task_family, revision if all steps passed else Exception
        Cleans up the resources if any step fails
    """
    datetime_suffix = datetime.datetime.now().strftime("%Y%m%d-%H-%M-%S")
    processor = "gpu" if "gpu" in docker_image_uri else "eia" if "eia" in docker_image_uri else "cpu"
    port_mappings = get_ecs_port_mappings(framework)
    log_group_name = f"/ecs/{framework}-inference-{processor}"
    num_cpus = ec2_utils.get_instance_num_cpus(worker_instance_id,
                                               region=region)
    # We assume that about 80% of RAM is free on the instance, since we are not directly querying it to find out
    # what the memory utilization is.
    memory = int(
        ec2_utils.get_instance_memory(worker_instance_id, region=region) * 0.8)
    cluster_name = get_ecs_cluster_name(cluster_arn, region=region)
    # Below values here are just for sanity
    arguments_dict = {
        "family_name": cluster_name,
        "image": docker_image_uri,
        "log_group_name": log_group_name,
        "log_stream_prefix": datetime_suffix,
        "port_mappings": port_mappings,
        "num_cpu": num_cpus,
        "memory": memory,
        "region": region
    }

    if processor == "gpu" and num_gpus:
        arguments_dict["num_gpu"] = num_gpus
    if framework == "tensorflow":
        arguments_dict[
            "environment"] = get_ecs_tensorflow_environment_variables(
                processor, model_name)
        print(f"Added environment variables: {arguments_dict['environment']}")
    elif framework in ["mxnet", "pytorch"]:
        arguments_dict["container_command"] = [
            get_inference_run_command(docker_image_uri, model_name, processor)
        ]
    if processor == "eia":
        arguments_dict["health_check"] = {
            "retries":
            2,
            "command": [
                "CMD-SHELL",
                "LD_LIBRARY_PATH=/opt/ei_health_check/lib /opt/ei_health_check/bin/health_check"
            ],
            "timeout":
            5,
            "interval":
            30,
            "startPeriod":
            60
        }
        arguments_dict["inference_accelerators"] = {
            "deviceName": "device_1",
            "deviceType": ei_accelerator_type
        }
    try:
        task_family, revision = register_ecs_task_definition(**arguments_dict)
        print(f"Created Task definition - {task_family}:{revision}")

        service_name = create_ecs_service(cluster_name,
                                          f"service-{cluster_name}",
                                          f"{task_family}:{revision}",
                                          region=region)
        print(
            f"Created ECS service - {service_name} with cloudwatch log group - {log_group_name} "
            f"log stream prefix - {datetime_suffix}/{cluster_name}")
        if check_running_task_for_ecs_service(cluster_name,
                                              service_name,
                                              region=region):
            print("Service status verified as running. Running inference ...")
        else:
            raise Exception(f"No task running in the service: {service_name}")
        return service_name, task_family, revision
    except Exception as e:
        raise ECSServiceCreationException(
            f"Setup Inference Service Exception - {e}")