def test_eks_pytorch_densenet_inference(pytorch_inference):
    server_type = test_utils.get_inference_server_type(pytorch_inference)
    if "eia" in pytorch_inference:
        pytest.skip("Skipping EKS Test for EIA")
    elif "neuron" in pytorch_inference:
        pytest.skip(
            "Neuron specific test is run and so skipping this test for Neuron")
    elif server_type == "ts":
        model = "pytorch-densenet=https://torchserve.s3.amazonaws.com/mar_files/densenet161.mar"
        server_cmd = "torchserve"
    else:
        model = "pytorch-densenet=https://dlc-samples.s3.amazonaws.com/pytorch/multi-model-server/densenet/densenet.mar"
        server_cmd = "multi-model-server"

    num_replicas = "1"

    rand_int = random.randint(4001, 6000)

    processor = "gpu" if "gpu" in pytorch_inference else "cpu"

    yaml_path = os.path.join(
        os.sep, "tmp",
        f"pytorch_single_node_{processor}_inference_{rand_int}.yaml")
    inference_service_name = selector_name = f"densenet-service-{processor}-{rand_int}"

    search_replace_dict = {
        "<MODELS>": model,
        "<NUM_REPLICAS>": num_replicas,
        "<SELECTOR_NAME>": selector_name,
        "<INFERENCE_SERVICE_NAME>": inference_service_name,
        "<DOCKER_IMAGE_BUILD_ID>": pytorch_inference,
        "<SERVER_TYPE>": server_type,
        "<SERVER_CMD>": server_cmd
    }

    if processor == "gpu":
        search_replace_dict["<NUM_GPUS>"] = "1"

    eks_utils.write_eks_yaml_file_from_template(
        eks_utils.get_single_node_inference_template_path(
            "pytorch", processor), yaml_path, search_replace_dict)

    try:
        run("kubectl apply -f {}".format(yaml_path))

        port_to_forward = random.randint(49152, 65535)

        if eks_utils.is_service_running(selector_name):
            eks_utils.eks_forward_port_between_host_and_container(
                selector_name, port_to_forward, "8080")

        assert test_utils.request_pytorch_inference_densenet(
            port=port_to_forward, server_type=server_type)
    except ValueError as excp:
        eks_utils.LOGGER.error("Service is not running: %s", excp)
    finally:
        run(f"kubectl delete deployment {selector_name}")
        run(f"kubectl delete service {selector_name}")
Ejemplo n.º 2
0
def test_eks_pytorch_neuron_inference(pytorch_inference, neuron_only):
    server_type = test_utils.get_inference_server_type(pytorch_inference)
    if "neuron" not in pytorch_inference:
        pytest.skip("Skipping EKS Neuron Test for EIA and Non Neuron Images")

    model = "pytorch-resnet-neuron=https://aws-dlc-sample-models.s3.amazonaws.com/pytorch/Resnet50-neuron.mar"
    server_cmd = "/usr/local/bin/entrypoint.sh -m pytorch-resnet-neuron=https://aws-dlc-sample-models.s3.amazonaws.com/pytorch/Resnet50-neuron.mar -t /home/model-server/config.properties"
    num_replicas = "1"
    rand_int = random.randint(4001, 6000)
    processor = "neuron"

    yaml_path = os.path.join(
        os.sep, "tmp",
        f"pytorch_single_node_{processor}_inference_{rand_int}.yaml")
    inference_service_name = selector_name = f"resnet-{processor}-{rand_int}"

    search_replace_dict = {
        "<NUM_REPLICAS>": num_replicas,
        "<SELECTOR_NAME>": selector_name,
        "<INFERENCE_SERVICE_NAME>": inference_service_name,
        "<DOCKER_IMAGE_BUILD_ID>": pytorch_inference,
        "<SERVER_TYPE>": server_type,
        "<SERVER_CMD>": server_cmd
    }

    search_replace_dict["<NUM_INF1S>"] = "1"

    eks_utils.write_eks_yaml_file_from_template(
        eks_utils.get_single_node_inference_template_path(
            "pytorch", processor), yaml_path, search_replace_dict)
    device_plugin_path = eks_utils.get_device_plugin_path("pytorch", processor)

    try:
        # TODO - once eksctl gets the latest neuron device plugin this can be removed
        run("kubectl delete -f {}".format(device_plugin_path))
        sleep(60)
        run("kubectl apply -f {}".format(device_plugin_path))
        sleep(10)

        run("kubectl apply -f {}".format(yaml_path))

        port_to_forward = random.randint(49152, 65535)

        if eks_utils.is_service_running(selector_name):
            eks_utils.eks_forward_port_between_host_and_container(
                selector_name, port_to_forward, "8080")

        assert test_utils.request_pytorch_inference_densenet(
            port=port_to_forward)
    except ValueError as excp:
        run("kubectl cluster-info dump")
        eks_utils.LOGGER.error("Service is not running: %s", excp)
    finally:
        run(f"kubectl delete deployment {selector_name}")
        run(f"kubectl delete service {selector_name}")
def test_eks_tensorflow_neuron_inference(tensorflow_inference, neuron_only):
    if "eia" in tensorflow_inference or "neuron" not in tensorflow_inference:
        pytest.skip("Skipping EKS Neuron Test for EIA and Non Neuron Images")
    num_replicas = "1"

    rand_int = random.randint(4001, 6000)

    processor = "neuron"

    model_name = "mnist_neuron"
    yaml_path = os.path.join(
        os.sep, "tmp",
        f"tensorflow_single_node_{processor}_inference_{rand_int}.yaml")
    inference_service_name = selector_name = f"mnist-{processor}-{rand_int}"

    search_replace_dict = {
        "<MODEL_NAME>": model_name,
        "<MODEL_BASE_PATH>": f"https://aws-dlc-sample-models.s3.amazonaws.com",
        "<NUM_REPLICAS>": num_replicas,
        "<SELECTOR_NAME>": selector_name,
        "<INFERENCE_SERVICE_NAME>": inference_service_name,
        "<DOCKER_IMAGE_BUILD_ID>": tensorflow_inference
    }

    search_replace_dict["<NUM_INF1S>"] = "1"

    eks_utils.write_eks_yaml_file_from_template(
        eks_utils.get_single_node_inference_template_path(
            "tensorflow", processor), yaml_path, search_replace_dict)

    secret_yml_path = eks_utils.get_aws_secret_yml_path()

    try:
        run("kubectl apply -f {}".format(yaml_path))

        port_to_forward = random.randint(49152, 65535)

        if eks_utils.is_service_running(selector_name):
            eks_utils.eks_forward_port_between_host_and_container(
                selector_name, port_to_forward, "8500")

        assert test_utils.request_tensorflow_inference(model_name=model_name,
                                                       port=port_to_forward)
    except ValueError as excp:
        run("kubectl cluster-info dump")
        eks_utils.LOGGER.error("Service is not running: %s", excp)
    finally:
        run(f"kubectl delete deployment {selector_name}")
        run(f"kubectl delete service {selector_name}")
Ejemplo n.º 4
0
def test_eks_mxnet_neuron_inference(mxnet_inference, neuron_only):
    if "eia" in mxnet_inference or "neuron" not in mxnet_inference:
        pytest.skip("Skipping EKS Neuron Test for EIA and Non Neuron Images")
    num_replicas = "1"

    rand_int = random.randint(4001, 6000)

    processor = "neuron"

    model = "mxnet-resnet50=https://aws-dlc-sample-models.s3.amazonaws.com/mxnet/Resnet50-neuron.mar"
    yaml_path = os.path.join(os.sep, "tmp", f"mxnet_single_node_{processor}_inference_{rand_int}.yaml")
    inference_service_name = selector_name = f"resnet50-{processor}-{rand_int}"

    search_replace_dict = {
        "<MODELS>": model,
        "<NUM_REPLICAS>": num_replicas,
        "<SELECTOR_NAME>": selector_name,
        "<INFERENCE_SERVICE_NAME>": inference_service_name,
        "<DOCKER_IMAGE_BUILD_ID>": mxnet_inference
    }

    search_replace_dict["<NUM_INF1S>"] = "1"

    device_plugin_path = eks_utils.get_device_plugin_path("mxnet", processor)

    eks_utils.write_eks_yaml_file_from_template(
        eks_utils.get_single_node_inference_template_path("mxnet", processor), yaml_path, search_replace_dict
    )

    try:
        # TODO - once eksctl gets the latest neuron device plugin this can be removed
        run("kubectl delete -f {}".format(device_plugin_path))
        sleep(60)
        run("kubectl apply -f {}".format(device_plugin_path))
        sleep(10)
        run("kubectl apply -f {}".format(yaml_path))

        port_to_forward = random.randint(49152, 65535)

        if eks_utils.is_service_running(selector_name):
            eks_utils.eks_forward_port_between_host_and_container(selector_name, port_to_forward, "8080")

        assert test_utils.request_mxnet_inference(port=port_to_forward, model="mxnet-resnet50")
    except ValueError as excp:
        eks_utils.LOGGER.error("Service is not running: %s", excp)
    finally:
        run("kubectl cluster-info dump")
        run(f"kubectl delete deployment {selector_name}")
        run(f"kubectl delete service {selector_name}")
Ejemplo n.º 5
0
def test_eks_pytorch_neuron_inference(pytorch_inference, neuron_only):
    server_type = test_utils.get_inference_server_type(pytorch_inference)
    if "neuron" not in pytorch_inference:
        pytest.skip("Skipping EKS Neuron Test for EIA and Non Neuron Images")
    else:
        model = "pytorch-resnet-neuron=https://aws-dlc-sample-models.s3.amazonaws.com/pytorch/Resnet50-neuron.mar"
        server_cmd = "torchserve"

    num_replicas = "1"
    rand_int = random.randint(4001, 6000)
    processor = "neuron"

    yaml_path = os.path.join(
        os.sep, "tmp",
        f"pytorch_single_node_{processor}_inference_{rand_int}.yaml")
    inference_service_name = selector_name = f"resnet-{processor}-{rand_int}"

    search_replace_dict = {
        "<MODELS>": model,
        "<NUM_REPLICAS>": num_replicas,
        "<SELECTOR_NAME>": selector_name,
        "<INFERENCE_SERVICE_NAME>": inference_service_name,
        "<DOCKER_IMAGE_BUILD_ID>": pytorch_inference,
        "<SERVER_TYPE>": server_type,
        "<SERVER_CMD>": server_cmd,
    }

    search_replace_dict["<NUM_INF1S>"] = "1"

    eks_utils.write_eks_yaml_file_from_template(
        eks_utils.get_single_node_inference_template_path(
            "pytorch", processor), yaml_path, search_replace_dict)

    try:
        run("kubectl apply -f {}".format(yaml_path))

        port_to_forward = random.randint(49152, 65535)

        if eks_utils.is_service_running(selector_name):
            eks_utils.eks_forward_port_between_host_and_container(
                selector_name, port_to_forward, "8080")

        assert test_utils.request_pytorch_inference_densenet(
            port=port_to_forward,
            server_type=server_type,
            model_name="pytorch-resnet-neuron")
    finally:
        run(f"kubectl delete deployment {selector_name}")
        run(f"kubectl delete service {selector_name}")
Ejemplo n.º 6
0
def __test_eks_tensorflow_half_plus_two_inference(tensorflow_inference):
    num_replicas = "1"

    rand_int = random.randint(4001, 6000)

    processor = "gpu" if "gpu" in tensorflow_inference else "cpu"

    model_name = f"saved_model_half_plus_two_{processor}"
    yaml_path = os.path.join(
        os.sep, "tmp",
        f"tensorflow_single_node_{processor}_inference_{rand_int}.yaml")
    inference_service_name = selector_name = f"half-plus-two-service-{processor}-{rand_int}"
    model_base_path = get_eks_tensorflow_model_base_path(
        tensorflow_inference, model_name)
    command, args = get_tensorflow_command_args(tensorflow_inference,
                                                model_name, model_base_path)
    test_type = test_utils.get_eks_k8s_test_type_label(tensorflow_inference)
    search_replace_dict = {
        "<NUM_REPLICAS>": num_replicas,
        "<SELECTOR_NAME>": selector_name,
        "<INFERENCE_SERVICE_NAME>": inference_service_name,
        "<DOCKER_IMAGE_BUILD_ID>": tensorflow_inference,
        "<COMMAND>": command,
        "<ARGS>": args,
        "<TEST_TYPE>": test_type,
    }

    if processor == "gpu":
        search_replace_dict["<NUM_GPUS>"] = "1"

    eks_utils.write_eks_yaml_file_from_template(
        eks_utils.get_single_node_inference_template_path(
            "tensorflow", processor), yaml_path, search_replace_dict)

    try:
        run("kubectl apply -f {}".format(yaml_path))

        port_to_forward = random.randint(49152, 65535)

        if eks_utils.is_service_running(selector_name):
            eks_utils.eks_forward_port_between_host_and_container(
                selector_name, port_to_forward, "8500")

        assert test_utils.request_tensorflow_inference(model_name=model_name,
                                                       port=port_to_forward)
    finally:
        run(f"kubectl delete deployment {selector_name}")
        run(f"kubectl delete service {selector_name}")
Ejemplo n.º 7
0
def test_eks_tensorflow_neuron_inference(tensorflow_inference_neuron):
    num_replicas = "1"

    rand_int = random.randint(4001, 6000)

    processor = "neuron"

    model_name = "mnist_neuron"
    yaml_path = os.path.join(
        os.sep, "tmp",
        f"tensorflow_single_node_{processor}_inference_{rand_int}.yaml")
    inference_service_name = selector_name = f"mnist-{processor}-{rand_int}"

    search_replace_dict = {
        "<MODEL_NAME>": model_name,
        "<MODEL_BASE_PATH>": f"s3://aws-dlc-sample-models",
        "<NUM_REPLICAS>": num_replicas,
        "<SELECTOR_NAME>": selector_name,
        "<INFERENCE_SERVICE_NAME>": inference_service_name,
        "<DOCKER_IMAGE_BUILD_ID>": tensorflow_inference_neuron,
    }

    search_replace_dict["<NUM_INF1S>"] = "1"

    eks_utils.write_eks_yaml_file_from_template(
        eks_utils.get_single_node_inference_template_path(
            "tensorflow", processor), yaml_path, search_replace_dict)

    secret_yml_path = eks_utils.get_aws_secret_yml_path()

    try:
        run("kubectl apply -f {}".format(yaml_path))

        port_to_forward = random.randint(49152, 65535)

        if eks_utils.is_service_running(selector_name):
            eks_utils.eks_forward_port_between_host_and_container(
                selector_name, port_to_forward, "8501")

        inference_string = '\'{"instances": ' + "{}".format(
            [[0 for i in range(784)]]) + "}'"
        assert test_utils.request_tensorflow_inference(
            model_name=model_name,
            port=port_to_forward,
            inference_string=inference_string)
    finally:
        run(f"kubectl delete deployment {selector_name}")
        run(f"kubectl delete service {selector_name}")
def test_eks_tensorflow_half_plus_two_inference(tensorflow_inference):
    if "eia" in tensorflow_inference or "neuron" in tensorflow_inference:
        pytest.skip("Skipping EKS Test for EIA and neuron Images")
    num_replicas = "1"

    rand_int = random.randint(4001, 6000)

    processor = "gpu" if "gpu" in tensorflow_inference else "cpu"

    model_name = f"saved_model_half_plus_two_{processor}"
    yaml_path = os.path.join(
        os.sep, "tmp",
        f"tensorflow_single_node_{processor}_inference_{rand_int}.yaml")
    inference_service_name = selector_name = f"half-plus-two-service-{processor}-{rand_int}"

    search_replace_dict = {
        "<MODEL_NAME>": model_name,
        "<MODEL_BASE_PATH>": f"s3://tensoflow-trained-models/{model_name}",
        "<NUM_REPLICAS>": num_replicas,
        "<SELECTOR_NAME>": selector_name,
        "<INFERENCE_SERVICE_NAME>": inference_service_name,
        "<DOCKER_IMAGE_BUILD_ID>": tensorflow_inference
    }

    if processor == "gpu":
        search_replace_dict["<NUM_GPUS>"] = "1"

    eks_utils.write_eks_yaml_file_from_template(
        eks_utils.get_single_node_inference_template_path(
            "tensorflow", processor), yaml_path, search_replace_dict)

    try:
        run("kubectl apply -f {}".format(yaml_path))

        port_to_forward = random.randint(49152, 65535)

        if eks_utils.is_service_running(selector_name):
            eks_utils.eks_forward_port_between_host_and_container(
                selector_name, port_to_forward, "8500")

        assert test_utils.request_tensorflow_inference(model_name=model_name,
                                                       port=port_to_forward)
    except ValueError as excp:
        eks_utils.LOGGER.error("Service is not running: %s", excp)
    finally:
        run(f"kubectl delete deployment {selector_name}")
        run(f"kubectl delete service {selector_name}")
Ejemplo n.º 9
0
def test_eks_mxnet_gluonnlp_inference(mxnet_inference, py3_only):
    if "eia" in mxnet_inference:
        pytest.skip("Skipping EKS Test for EIA")
    num_replicas = "1"

    rand_int = random.randint(4001, 6000)

    processor = "gpu" if "gpu" in mxnet_inference else "cpu"

    model = "https://aws-dlc-sample-models.s3.amazonaws.com/bert_sst/bert_sst.mar"
    yaml_path = os.path.join(
        os.sep, "tmp",
        f"mxnet_single_node_gluonnlp_{processor}_inference_{rand_int}.yaml")
    inference_service_name = selector_name = f"gluonnlp-service-{processor}-{rand_int}"

    search_replace_dict = {
        "<MODELS>": model,
        "<NUM_REPLICAS>": num_replicas,
        "<SELECTOR_NAME>": selector_name,
        "<INFERENCE_SERVICE_NAME>": inference_service_name,
        "<DOCKER_IMAGE_BUILD_ID>": mxnet_inference
    }

    if processor == "gpu":
        search_replace_dict["<NUM_GPUS>"] = "1"

    eks_utils.write_eks_yaml_file_from_template(
        eks_utils.get_single_node_inference_template_path("mxnet", processor),
        yaml_path, search_replace_dict)

    try:
        run("kubectl apply -f {}".format(yaml_path))

        port_to_forward = random.randint(49152, 65535)

        if eks_utils.is_service_running(selector_name):
            eks_utils.eks_forward_port_between_host_and_container(
                selector_name, port_to_forward, "8080")

        assert test_utils.request_mxnet_inference_gluonnlp(
            port=port_to_forward)
    except ValueError as excp:
        eks_utils.LOGGER.error("Service is not running: %s", excp)
    finally:
        run(f"kubectl delete deployment {selector_name}")
        run(f"kubectl delete service {selector_name}")
def test_eks_mxnet_squeezenet_inference(mxnet_inference):
    if "eia" in mxnet_inference or "neuron" in mxnet_inference:
        pytest.skip("Skipping EKS Test for EIA and neuron images")
    num_replicas = "1"

    rand_int = random.randint(4001, 6000)

    processor = "gpu" if "gpu" in mxnet_inference else "cpu"

    model = "squeezenet=https://s3.amazonaws.com/model-server/models/squeezenet_v1.1/squeezenet_v1.1.model"
    yaml_path = os.path.join(
        os.sep, "tmp",
        f"mxnet_single_node_{processor}_inference_{rand_int}.yaml")
    inference_service_name = selector_name = f"squeezenet-service-{rand_int}"

    search_replace_dict = {
        "<MODELS>": model,
        "<NUM_REPLICAS>": num_replicas,
        "<SELECTOR_NAME>": selector_name,
        "<INFERENCE_SERVICE_NAME>": inference_service_name,
        "<DOCKER_IMAGE_BUILD_ID>": mxnet_inference,
    }

    if processor == "gpu":
        search_replace_dict["<NUM_GPUS>"] = "1"

    eks_utils.write_eks_yaml_file_from_template(
        eks_utils.get_single_node_inference_template_path("mxnet", processor),
        yaml_path, search_replace_dict)

    try:
        run("kubectl apply -f {}".format(yaml_path))

        port_to_forward = random.randint(49152, 65535)

        if eks_utils.is_service_running(selector_name):
            eks_utils.eks_forward_port_between_host_and_container(
                selector_name, port_to_forward, "8080")

        assert test_utils.request_mxnet_inference(port=port_to_forward)
    finally:
        run(f"kubectl delete deployment {selector_name}")
        run(f"kubectl delete service {selector_name}")
def test_eks_mxnet_neuron_inference(mxnet_inference, neuron_only):
    if "eia" in mxnet_inference or "neuron" not in mxnet_inference:
        pytest.skip("Skipping EKS Neuron Test for EIA and Non Neuron Images")
    num_replicas = "1"

    rand_int = random.randint(4001, 6000)

    processor = "neuron"

    server_cmd = "/usr/local/bin/entrypoint.sh -m mxnet-resnet50=https://aws-dlc-sample-models.s3.amazonaws.com/mxnet/Resnet50-neuron.mar -t /home/model-server/config.properties"
    yaml_path = os.path.join(
        os.sep, "tmp",
        f"mxnet_single_node_{processor}_inference_{rand_int}.yaml")
    inference_service_name = selector_name = f"resnet50-{processor}-{rand_int}"

    search_replace_dict = {
        "<NUM_REPLICAS>": num_replicas,
        "<SELECTOR_NAME>": selector_name,
        "<INFERENCE_SERVICE_NAME>": inference_service_name,
        "<DOCKER_IMAGE_BUILD_ID>": mxnet_inference,
        "<SERVER_CMD>": server_cmd,
    }

    search_replace_dict["<NUM_INF1S>"] = "1"

    eks_utils.write_eks_yaml_file_from_template(
        eks_utils.get_single_node_inference_template_path("mxnet", processor),
        yaml_path, search_replace_dict)

    try:
        run("kubectl apply -f {}".format(yaml_path))

        port_to_forward = random.randint(49152, 65535)

        if eks_utils.is_service_running(selector_name):
            eks_utils.eks_forward_port_between_host_and_container(
                selector_name, port_to_forward, "8080")

        assert test_utils.request_mxnet_inference(port=port_to_forward,
                                                  model="mxnet-resnet50")
    finally:
        run(f"kubectl delete deployment {selector_name}")
        run(f"kubectl delete service {selector_name}")